diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,3412 +3,8690 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 2414, + "global_step": 6181, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.00041425020712510354, - "grad_norm": 24.199668764830136, - "learning_rate": 4.132231404958678e-08, - "loss": 1.3991, + "epoch": 0.00016178611875101117, + "grad_norm": 2.989531072795082, + "learning_rate": 1.6155088852988694e-08, + "loss": 1.6714, "step": 1 }, { - "epoch": 0.002071251035625518, - "grad_norm": 23.414571187298456, - "learning_rate": 2.066115702479339e-07, - "loss": 1.4112, + "epoch": 0.0008089305937550558, + "grad_norm": 3.0740133028448042, + "learning_rate": 8.077544426494346e-08, + "loss": 1.6455, "step": 5 }, { - "epoch": 0.004142502071251036, - "grad_norm": 14.988102359744419, - "learning_rate": 4.132231404958678e-07, - "loss": 1.3662, + "epoch": 0.0016178611875101116, + "grad_norm": 3.05751861374171, + "learning_rate": 1.6155088852988693e-07, + "loss": 1.6578, "step": 10 }, { - "epoch": 0.006213753106876553, - "grad_norm": 8.653198188938534, - "learning_rate": 6.198347107438018e-07, - "loss": 1.2641, + "epoch": 0.0024267917812651673, + "grad_norm": 3.05989958650721, + "learning_rate": 2.4232633279483037e-07, + "loss": 1.6633, "step": 15 }, { - "epoch": 0.008285004142502071, - "grad_norm": 10.616310133905728, - "learning_rate": 8.264462809917356e-07, - "loss": 1.1368, + "epoch": 0.003235722375020223, + "grad_norm": 3.024001026912918, + "learning_rate": 3.2310177705977386e-07, + "loss": 1.6386, "step": 20 }, { - "epoch": 0.010356255178127589, - "grad_norm": 4.668322782816374, - "learning_rate": 1.0330578512396695e-06, - "loss": 1.0379, + "epoch": 0.004044652968775279, + "grad_norm": 3.1122902778444006, + "learning_rate": 4.038772213247173e-07, + "loss": 1.6687, "step": 25 }, { - "epoch": 0.012427506213753107, - "grad_norm": 3.5182741608551535, - "learning_rate": 1.2396694214876035e-06, - "loss": 0.9937, + "epoch": 0.004853583562530335, + "grad_norm": 2.8890112766655296, + "learning_rate": 4.846526655896607e-07, + "loss": 1.6414, "step": 30 }, { - "epoch": 0.014498757249378625, - "grad_norm": 3.3411660569699504, - "learning_rate": 1.4462809917355372e-06, - "loss": 0.9572, + "epoch": 0.0056625141562853904, + "grad_norm": 2.6523925300683246, + "learning_rate": 5.654281098546043e-07, + "loss": 1.59, "step": 35 }, { - "epoch": 0.016570008285004142, - "grad_norm": 3.037596563067607, - "learning_rate": 1.6528925619834712e-06, - "loss": 0.9289, + "epoch": 0.006471444750040446, + "grad_norm": 2.603827366657395, + "learning_rate": 6.462035541195477e-07, + "loss": 1.6336, "step": 40 }, { - "epoch": 0.018641259320629662, - "grad_norm": 3.2528069002854374, - "learning_rate": 1.859504132231405e-06, - "loss": 0.9214, + "epoch": 0.007280375343795502, + "grad_norm": 2.577297564219146, + "learning_rate": 7.269789983844912e-07, + "loss": 1.6402, "step": 45 }, { - "epoch": 0.020712510356255178, - "grad_norm": 3.2107014690984803, - "learning_rate": 2.066115702479339e-06, - "loss": 0.9086, + "epoch": 0.008089305937550558, + "grad_norm": 2.439515573088333, + "learning_rate": 8.077544426494346e-07, + "loss": 1.6193, "step": 50 }, { - "epoch": 0.022783761391880698, - "grad_norm": 2.9859094433454554, - "learning_rate": 2.2727272727272728e-06, - "loss": 0.8991, + "epoch": 0.008898236531305614, + "grad_norm": 2.4333132929087764, + "learning_rate": 8.885298869143781e-07, + "loss": 1.6122, "step": 55 }, { - "epoch": 0.024855012427506214, - "grad_norm": 3.0590584148615627, - "learning_rate": 2.479338842975207e-06, - "loss": 0.8916, + "epoch": 0.00970716712506067, + "grad_norm": 2.1113677966611464, + "learning_rate": 9.693053311793215e-07, + "loss": 1.6185, "step": 60 }, { - "epoch": 0.026926263463131733, - "grad_norm": 2.9914820848818446, - "learning_rate": 2.6859504132231405e-06, - "loss": 0.8785, + "epoch": 0.010516097718815726, + "grad_norm": 1.8886265008348362, + "learning_rate": 1.0500807754442651e-06, + "loss": 1.5725, "step": 65 }, { - "epoch": 0.02899751449875725, - "grad_norm": 2.9775144313765445, - "learning_rate": 2.8925619834710743e-06, - "loss": 0.8897, + "epoch": 0.011325028312570781, + "grad_norm": 1.8864578193982546, + "learning_rate": 1.1308562197092086e-06, + "loss": 1.5734, "step": 70 }, { - "epoch": 0.03106876553438277, - "grad_norm": 3.128569537433572, - "learning_rate": 3.0991735537190086e-06, - "loss": 0.8834, + "epoch": 0.012133958906325838, + "grad_norm": 1.6120301708706668, + "learning_rate": 1.211631663974152e-06, + "loss": 1.544, "step": 75 }, { - "epoch": 0.033140016570008285, - "grad_norm": 3.1281090024503757, - "learning_rate": 3.3057851239669424e-06, - "loss": 0.8651, + "epoch": 0.012942889500080892, + "grad_norm": 1.5440235686664168, + "learning_rate": 1.2924071082390954e-06, + "loss": 1.5381, "step": 80 }, { - "epoch": 0.035211267605633804, - "grad_norm": 3.0548507580726625, - "learning_rate": 3.5123966942148763e-06, - "loss": 0.8737, + "epoch": 0.013751820093835949, + "grad_norm": 1.3513181383313533, + "learning_rate": 1.3731825525040387e-06, + "loss": 1.5246, "step": 85 }, { - "epoch": 0.037282518641259324, - "grad_norm": 3.088551853774246, - "learning_rate": 3.71900826446281e-06, - "loss": 0.8623, + "epoch": 0.014560750687591004, + "grad_norm": 1.4327471253077833, + "learning_rate": 1.4539579967689823e-06, + "loss": 1.4991, "step": 90 }, { - "epoch": 0.03935376967688484, - "grad_norm": 3.117570527250618, - "learning_rate": 3.925619834710744e-06, - "loss": 0.838, + "epoch": 0.01536968128134606, + "grad_norm": 1.2301698886041794, + "learning_rate": 1.5347334410339258e-06, + "loss": 1.4618, "step": 95 }, { - "epoch": 0.041425020712510356, - "grad_norm": 3.248544738727649, - "learning_rate": 4.132231404958678e-06, - "loss": 0.8675, + "epoch": 0.016178611875101116, + "grad_norm": 1.162576921517532, + "learning_rate": 1.6155088852988692e-06, + "loss": 1.4728, "step": 100 }, { - "epoch": 0.043496271748135876, - "grad_norm": 3.096171447698008, - "learning_rate": 4.338842975206612e-06, - "loss": 0.8394, + "epoch": 0.01698754246885617, + "grad_norm": 1.0168699965353747, + "learning_rate": 1.6962843295638126e-06, + "loss": 1.46, "step": 105 }, { - "epoch": 0.045567522783761395, - "grad_norm": 3.233248587377124, - "learning_rate": 4.5454545454545455e-06, - "loss": 0.857, + "epoch": 0.01779647306261123, + "grad_norm": 1.0395803267996115, + "learning_rate": 1.7770597738287563e-06, + "loss": 1.4594, "step": 110 }, { - "epoch": 0.04763877381938691, - "grad_norm": 3.6001636024321293, - "learning_rate": 4.75206611570248e-06, - "loss": 0.8321, + "epoch": 0.018605403656366284, + "grad_norm": 0.9917353590665066, + "learning_rate": 1.8578352180936995e-06, + "loss": 1.4397, "step": 115 }, { - "epoch": 0.04971002485501243, - "grad_norm": 2.9956895468112084, - "learning_rate": 4.958677685950414e-06, - "loss": 0.833, + "epoch": 0.01941433425012134, + "grad_norm": 0.9523144102295756, + "learning_rate": 1.938610662358643e-06, + "loss": 1.4234, "step": 120 }, { - "epoch": 0.05178127589063795, - "grad_norm": 3.2710428266169953, - "learning_rate": 5.165289256198347e-06, - "loss": 0.847, + "epoch": 0.020223264843876397, + "grad_norm": 0.9258023739630329, + "learning_rate": 2.0193861066235864e-06, + "loss": 1.4226, "step": 125 }, { - "epoch": 0.053852526926263466, - "grad_norm": 3.249727950554207, - "learning_rate": 5.371900826446281e-06, - "loss": 0.8491, + "epoch": 0.021032195437631452, + "grad_norm": 0.9483758496421177, + "learning_rate": 2.1001615508885302e-06, + "loss": 1.3762, "step": 130 }, { - "epoch": 0.05592377796188898, - "grad_norm": 3.638097729325915, - "learning_rate": 5.578512396694216e-06, - "loss": 0.8218, + "epoch": 0.021841126031386507, + "grad_norm": 0.9530895080844857, + "learning_rate": 2.1809369951534733e-06, + "loss": 1.354, "step": 135 }, { - "epoch": 0.0579950289975145, - "grad_norm": 3.136515122305162, - "learning_rate": 5.785123966942149e-06, - "loss": 0.8367, + "epoch": 0.022650056625141562, + "grad_norm": 0.9891655010686481, + "learning_rate": 2.261712439418417e-06, + "loss": 1.3418, "step": 140 }, { - "epoch": 0.06006628003314002, - "grad_norm": 2.992975297054502, - "learning_rate": 5.991735537190083e-06, - "loss": 0.8331, + "epoch": 0.02345898721889662, + "grad_norm": 1.0770341521239468, + "learning_rate": 2.34248788368336e-06, + "loss": 1.3239, "step": 145 }, { - "epoch": 0.06213753106876554, - "grad_norm": 3.1773736798479653, - "learning_rate": 6.198347107438017e-06, - "loss": 0.8159, + "epoch": 0.024267917812651675, + "grad_norm": 1.1321453887674335, + "learning_rate": 2.423263327948304e-06, + "loss": 1.2818, "step": 150 }, { - "epoch": 0.06420878210439106, - "grad_norm": 3.2512013569633, - "learning_rate": 6.404958677685951e-06, - "loss": 0.8261, + "epoch": 0.02507684840640673, + "grad_norm": 1.116259450785514, + "learning_rate": 2.5040387722132474e-06, + "loss": 1.2499, "step": 155 }, { - "epoch": 0.06628003314001657, - "grad_norm": 3.0823463887606852, - "learning_rate": 6.611570247933885e-06, - "loss": 0.8376, + "epoch": 0.025885779000161785, + "grad_norm": 1.1299351548572238, + "learning_rate": 2.584814216478191e-06, + "loss": 1.2403, "step": 160 }, { - "epoch": 0.06835128417564208, - "grad_norm": 3.1208323742016897, - "learning_rate": 6.818181818181818e-06, - "loss": 0.8275, + "epoch": 0.026694709593916843, + "grad_norm": 1.1335363316132652, + "learning_rate": 2.6655896607431343e-06, + "loss": 1.214, "step": 165 }, { - "epoch": 0.07042253521126761, - "grad_norm": 3.0745780302802332, - "learning_rate": 7.0247933884297525e-06, - "loss": 0.8249, + "epoch": 0.027503640187671898, + "grad_norm": 1.1757124749289842, + "learning_rate": 2.7463651050080773e-06, + "loss": 1.163, "step": 170 }, { - "epoch": 0.07249378624689312, - "grad_norm": 3.119480709134969, - "learning_rate": 7.231404958677687e-06, - "loss": 0.8042, + "epoch": 0.028312570781426953, + "grad_norm": 1.2651810945082007, + "learning_rate": 2.827140549273021e-06, + "loss": 1.1479, "step": 175 }, { - "epoch": 0.07456503728251865, - "grad_norm": 3.3248978246402663, - "learning_rate": 7.43801652892562e-06, - "loss": 0.8284, + "epoch": 0.029121501375182008, + "grad_norm": 1.2624388452227249, + "learning_rate": 2.9079159935379646e-06, + "loss": 1.0948, "step": 180 }, { - "epoch": 0.07663628831814416, - "grad_norm": 3.101088437755532, - "learning_rate": 7.644628099173555e-06, - "loss": 0.8158, + "epoch": 0.029930431968937066, + "grad_norm": 1.2111655416186609, + "learning_rate": 2.988691437802908e-06, + "loss": 1.0487, "step": 185 }, { - "epoch": 0.07870753935376967, - "grad_norm": 3.3761252638628707, - "learning_rate": 7.851239669421489e-06, - "loss": 0.8082, + "epoch": 0.03073936256269212, + "grad_norm": 0.8837452655216318, + "learning_rate": 3.0694668820678515e-06, + "loss": 1.0465, "step": 190 }, { - "epoch": 0.0807787903893952, - "grad_norm": 3.211138010347251, - "learning_rate": 8.057851239669421e-06, - "loss": 0.8066, + "epoch": 0.03154829315644718, + "grad_norm": 0.8221498455856253, + "learning_rate": 3.1502423263327954e-06, + "loss": 1.0297, "step": 195 }, { - "epoch": 0.08285004142502071, - "grad_norm": 3.390864723290171, - "learning_rate": 8.264462809917356e-06, - "loss": 0.8071, + "epoch": 0.03235722375020223, + "grad_norm": 0.8439527908802734, + "learning_rate": 3.2310177705977384e-06, + "loss": 1.0015, "step": 200 }, { - "epoch": 0.08492129246064622, - "grad_norm": 2.9884296341537775, - "learning_rate": 8.47107438016529e-06, - "loss": 0.8173, + "epoch": 0.03316615434395729, + "grad_norm": 0.7410910924751422, + "learning_rate": 3.311793214862682e-06, + "loss": 1.0173, "step": 205 }, { - "epoch": 0.08699254349627175, - "grad_norm": 2.9929265463289774, - "learning_rate": 8.677685950413224e-06, - "loss": 0.8139, + "epoch": 0.03397508493771234, + "grad_norm": 0.6701077230967789, + "learning_rate": 3.3925686591276253e-06, + "loss": 0.999, "step": 210 }, { - "epoch": 0.08906379453189726, - "grad_norm": 3.178365772310986, - "learning_rate": 8.884297520661158e-06, - "loss": 0.8077, + "epoch": 0.0347840155314674, + "grad_norm": 0.6355481671837402, + "learning_rate": 3.473344103392569e-06, + "loss": 0.9725, "step": 215 }, { - "epoch": 0.09113504556752279, - "grad_norm": 3.0990381736081556, - "learning_rate": 9.090909090909091e-06, - "loss": 0.7962, + "epoch": 0.03559294612522246, + "grad_norm": 0.5875107208972431, + "learning_rate": 3.5541195476575126e-06, + "loss": 0.9867, "step": 220 }, { - "epoch": 0.0932062966031483, - "grad_norm": 3.3389255178168322, - "learning_rate": 9.297520661157025e-06, - "loss": 0.7985, + "epoch": 0.03640187671897751, + "grad_norm": 0.5994183367628626, + "learning_rate": 3.6348949919224556e-06, + "loss": 0.9943, "step": 225 }, { - "epoch": 0.09527754763877382, - "grad_norm": 3.3752236468584456, - "learning_rate": 9.50413223140496e-06, - "loss": 0.796, + "epoch": 0.03721080731273257, + "grad_norm": 0.6182303752035966, + "learning_rate": 3.715670436187399e-06, + "loss": 0.9985, "step": 230 }, { - "epoch": 0.09734879867439934, - "grad_norm": 3.0456999521377877, - "learning_rate": 9.710743801652894e-06, - "loss": 0.7983, + "epoch": 0.038019737906487626, + "grad_norm": 0.5701097398554537, + "learning_rate": 3.796445880452343e-06, + "loss": 0.9736, "step": 235 }, { - "epoch": 0.09942004971002485, - "grad_norm": 3.2120044737399187, - "learning_rate": 9.917355371900828e-06, - "loss": 0.8055, + "epoch": 0.03882866850024268, + "grad_norm": 0.5584494985009502, + "learning_rate": 3.877221324717286e-06, + "loss": 0.9875, "step": 240 }, { - "epoch": 0.10149130074565037, - "grad_norm": 3.132342089203479, - "learning_rate": 9.999952928077044e-06, - "loss": 0.8021, + "epoch": 0.039637599093997736, + "grad_norm": 0.5782425116149067, + "learning_rate": 3.95799676898223e-06, + "loss": 0.9701, "step": 245 }, { - "epoch": 0.1035625517812759, - "grad_norm": 2.9894402985924766, - "learning_rate": 9.999665269535307e-06, - "loss": 0.8013, + "epoch": 0.040446529687752794, + "grad_norm": 0.6299578118181779, + "learning_rate": 4.038772213247173e-06, + "loss": 0.9671, "step": 250 }, { - "epoch": 0.1056338028169014, - "grad_norm": 3.046903074957559, - "learning_rate": 9.99911611854702e-06, - "loss": 0.7979, + "epoch": 0.041255460281507846, + "grad_norm": 0.5812417163133361, + "learning_rate": 4.119547657512117e-06, + "loss": 0.9579, "step": 255 }, { - "epoch": 0.10770505385252693, - "grad_norm": 2.8665185067493626, - "learning_rate": 9.998305503833872e-06, - "loss": 0.8073, + "epoch": 0.042064390875262904, + "grad_norm": 0.5781954014931824, + "learning_rate": 4.2003231017770605e-06, + "loss": 0.9685, "step": 260 }, { - "epoch": 0.10977630488815245, - "grad_norm": 3.2409413642928278, - "learning_rate": 9.997233467792626e-06, - "loss": 0.7976, + "epoch": 0.042873321469017955, + "grad_norm": 0.608052455969015, + "learning_rate": 4.2810985460420035e-06, + "loss": 0.9701, "step": 265 }, { - "epoch": 0.11184755592377796, - "grad_norm": 2.944077687828054, - "learning_rate": 9.995900066492902e-06, - "loss": 0.8016, + "epoch": 0.043682252062773014, + "grad_norm": 0.5966466875482974, + "learning_rate": 4.3618739903069465e-06, + "loss": 0.9702, "step": 270 }, { - "epoch": 0.11391880695940348, - "grad_norm": 2.9912116980156145, - "learning_rate": 9.994305369674242e-06, - "loss": 0.7853, + "epoch": 0.04449118265652807, + "grad_norm": 0.5596371503375434, + "learning_rate": 4.44264943457189e-06, + "loss": 0.9563, "step": 275 }, { - "epoch": 0.115990057995029, - "grad_norm": 2.892932100424063, - "learning_rate": 9.992449460742464e-06, - "loss": 0.8046, + "epoch": 0.045300113250283124, + "grad_norm": 0.5994461579702871, + "learning_rate": 4.523424878836834e-06, + "loss": 0.9578, "step": 280 }, { - "epoch": 0.11806130903065451, - "grad_norm": 3.055501054823764, - "learning_rate": 9.9903324367653e-06, - "loss": 0.7794, + "epoch": 0.04610904384403818, + "grad_norm": 0.5595808732955236, + "learning_rate": 4.604200323101777e-06, + "loss": 0.9607, "step": 285 }, { - "epoch": 0.12013256006628004, - "grad_norm": 2.7659846238569052, - "learning_rate": 9.98795440846732e-06, - "loss": 0.7814, + "epoch": 0.04691797443779324, + "grad_norm": 0.669537499667742, + "learning_rate": 4.68497576736672e-06, + "loss": 0.9476, "step": 290 }, { - "epoch": 0.12220381110190555, - "grad_norm": 3.0121040906813126, - "learning_rate": 9.985315500224135e-06, - "loss": 0.7809, + "epoch": 0.04772690503154829, + "grad_norm": 0.5511679026608909, + "learning_rate": 4.765751211631664e-06, + "loss": 0.9464, "step": 295 }, { - "epoch": 0.12427506213753108, - "grad_norm": 3.01982583757692, - "learning_rate": 9.982415850055902e-06, - "loss": 0.781, + "epoch": 0.04853583562530335, + "grad_norm": 0.5939912843219857, + "learning_rate": 4.846526655896608e-06, + "loss": 0.9429, "step": 300 }, { - "epoch": 0.1263463131731566, - "grad_norm": 2.716394804747616, - "learning_rate": 9.979255609620095e-06, - "loss": 0.7734, + "epoch": 0.0493447662190584, + "grad_norm": 0.5798565127301802, + "learning_rate": 4.927302100161551e-06, + "loss": 0.9485, "step": 305 }, { - "epoch": 0.12841756420878211, - "grad_norm": 2.690802383393494, - "learning_rate": 9.975834944203581e-06, - "loss": 0.7667, + "epoch": 0.05015369681281346, + "grad_norm": 0.5756338877596261, + "learning_rate": 5.008077544426495e-06, + "loss": 0.9572, "step": 310 }, { - "epoch": 0.1304888152444076, - "grad_norm": 2.9488089043416124, - "learning_rate": 9.972154032713973e-06, - "loss": 0.7805, + "epoch": 0.05096262740656852, + "grad_norm": 0.5842007801259245, + "learning_rate": 5.088852988691439e-06, + "loss": 0.9219, "step": 315 }, { - "epoch": 0.13256006628003314, - "grad_norm": 2.719266979190075, - "learning_rate": 9.968213067670265e-06, - "loss": 0.7763, + "epoch": 0.05177155800032357, + "grad_norm": 0.5909234064830663, + "learning_rate": 5.169628432956382e-06, + "loss": 0.9463, "step": 320 }, { - "epoch": 0.13463131731565867, - "grad_norm": 2.812329679333236, - "learning_rate": 9.964012255192776e-06, - "loss": 0.7533, + "epoch": 0.05258048859407863, + "grad_norm": 0.5964209125322439, + "learning_rate": 5.250403877221325e-06, + "loss": 0.9137, "step": 325 }, { - "epoch": 0.13670256835128416, - "grad_norm": 2.939179450726379, - "learning_rate": 9.959551814992364e-06, - "loss": 0.7538, + "epoch": 0.05338941918783369, + "grad_norm": 0.6196287292433933, + "learning_rate": 5.331179321486269e-06, + "loss": 0.9275, "step": 330 }, { - "epoch": 0.1387738193869097, - "grad_norm": 3.0343927565408566, - "learning_rate": 9.954831980358928e-06, - "loss": 0.7761, + "epoch": 0.05419834978158874, + "grad_norm": 0.6885711787979655, + "learning_rate": 5.411954765751212e-06, + "loss": 0.9308, "step": 335 }, { - "epoch": 0.14084507042253522, - "grad_norm": 3.051369274408743, - "learning_rate": 9.949852998149217e-06, - "loss": 0.7623, + "epoch": 0.055007280375343796, + "grad_norm": 0.5882034901608182, + "learning_rate": 5.492730210016155e-06, + "loss": 0.9226, "step": 340 }, { - "epoch": 0.14291632145816072, - "grad_norm": 2.8926023239726666, - "learning_rate": 9.944615128773911e-06, - "loss": 0.7449, + "epoch": 0.055816210969098855, + "grad_norm": 0.6167809439808443, + "learning_rate": 5.573505654281099e-06, + "loss": 0.9329, "step": 345 }, { - "epoch": 0.14498757249378624, - "grad_norm": 3.0274876780182725, - "learning_rate": 9.939118646184007e-06, - "loss": 0.7564, + "epoch": 0.056625141562853906, + "grad_norm": 0.6473624549151995, + "learning_rate": 5.654281098546042e-06, + "loss": 0.9342, "step": 350 }, { - "epoch": 0.14705882352941177, - "grad_norm": 3.030086237395326, - "learning_rate": 9.933363837856485e-06, - "loss": 0.7636, + "epoch": 0.057434072156608965, + "grad_norm": 0.6268324107561812, + "learning_rate": 5.735056542810986e-06, + "loss": 0.9444, "step": 355 }, { - "epoch": 0.1491300745650373, - "grad_norm": 2.7216718925144443, - "learning_rate": 9.927351004779275e-06, - "loss": 0.7523, + "epoch": 0.058243002750364016, + "grad_norm": 0.6081881922413113, + "learning_rate": 5.815831987075929e-06, + "loss": 0.9266, "step": 360 }, { - "epoch": 0.1512013256006628, - "grad_norm": 2.8963011471510423, - "learning_rate": 9.921080461435522e-06, - "loss": 0.747, + "epoch": 0.059051933344119074, + "grad_norm": 0.623515938924003, + "learning_rate": 5.896607431340873e-06, + "loss": 0.927, "step": 365 }, { - "epoch": 0.15327257663628832, - "grad_norm": 2.908273407418608, - "learning_rate": 9.914552535787122e-06, - "loss": 0.7325, + "epoch": 0.05986086393787413, + "grad_norm": 0.595326688964164, + "learning_rate": 5.977382875605816e-06, + "loss": 0.941, "step": 370 }, { - "epoch": 0.15534382767191385, - "grad_norm": 2.8676437077981722, - "learning_rate": 9.90776756925758e-06, - "loss": 0.7349, + "epoch": 0.060669794531629184, + "grad_norm": 0.7035346939039838, + "learning_rate": 6.058158319870759e-06, + "loss": 0.9243, "step": 375 }, { - "epoch": 0.15741507870753935, - "grad_norm": 2.8556939586506687, - "learning_rate": 9.900725916714157e-06, - "loss": 0.7524, + "epoch": 0.06147872512538424, + "grad_norm": 0.6486917875962197, + "learning_rate": 6.138933764135703e-06, + "loss": 0.945, "step": 380 }, { - "epoch": 0.15948632974316487, - "grad_norm": 2.87186116366176, - "learning_rate": 9.893427946449297e-06, - "loss": 0.7214, + "epoch": 0.0622876557191393, + "grad_norm": 0.6272725751694639, + "learning_rate": 6.219709208400647e-06, + "loss": 0.9333, "step": 385 }, { - "epoch": 0.1615575807787904, - "grad_norm": 2.8757200745024485, - "learning_rate": 9.885874040161373e-06, - "loss": 0.7326, + "epoch": 0.06309658631289436, + "grad_norm": 0.6847897267332435, + "learning_rate": 6.300484652665591e-06, + "loss": 0.9201, "step": 390 }, { - "epoch": 0.1636288318144159, - "grad_norm": 3.215961320189587, - "learning_rate": 9.878064592934723e-06, - "loss": 0.7227, + "epoch": 0.0639055169066494, + "grad_norm": 0.6063331771589294, + "learning_rate": 6.381260096930534e-06, + "loss": 0.9301, "step": 395 }, { - "epoch": 0.16570008285004142, - "grad_norm": 2.9059437980483693, - "learning_rate": 9.87000001321898e-06, - "loss": 0.7509, + "epoch": 0.06471444750040446, + "grad_norm": 0.6535104175430165, + "learning_rate": 6.462035541195477e-06, + "loss": 0.9268, "step": 400 }, { - "epoch": 0.16777133388566695, - "grad_norm": 2.685626883478994, - "learning_rate": 9.86168072280772e-06, - "loss": 0.7223, + "epoch": 0.06552337809415952, + "grad_norm": 0.6298942805679355, + "learning_rate": 6.542810985460421e-06, + "loss": 0.9126, "step": 405 }, { - "epoch": 0.16984258492129245, - "grad_norm": 3.033273475512452, - "learning_rate": 9.853107156816393e-06, - "loss": 0.7385, + "epoch": 0.06633230868791458, + "grad_norm": 0.687088041257841, + "learning_rate": 6.623586429725364e-06, + "loss": 0.8997, "step": 410 }, { - "epoch": 0.17191383595691798, - "grad_norm": 2.7784947677053955, - "learning_rate": 9.844279763659566e-06, - "loss": 0.7237, + "epoch": 0.06714123928166964, + "grad_norm": 0.6809022568244187, + "learning_rate": 6.7043618739903075e-06, + "loss": 0.9221, "step": 415 }, { - "epoch": 0.1739850869925435, - "grad_norm": 2.9117670580445543, - "learning_rate": 9.835199005027477e-06, - "loss": 0.7161, + "epoch": 0.06795016987542468, + "grad_norm": 0.6595953972527364, + "learning_rate": 6.7851373182552505e-06, + "loss": 0.9351, "step": 420 }, { - "epoch": 0.176056338028169, - "grad_norm": 2.8843325957862764, - "learning_rate": 9.825865355861878e-06, - "loss": 0.7256, + "epoch": 0.06875910046917974, + "grad_norm": 0.6526975586915988, + "learning_rate": 6.865912762520195e-06, + "loss": 0.9046, "step": 425 }, { - "epoch": 0.17812758906379453, - "grad_norm": 2.856399843872642, - "learning_rate": 9.816279304331202e-06, - "loss": 0.7142, + "epoch": 0.0695680310629348, + "grad_norm": 0.6747286601047916, + "learning_rate": 6.946688206785138e-06, + "loss": 0.9384, "step": 430 }, { - "epoch": 0.18019884009942005, - "grad_norm": 2.9556721265181163, - "learning_rate": 9.806441351805025e-06, - "loss": 0.7306, + "epoch": 0.07037696165668986, + "grad_norm": 0.705527948534626, + "learning_rate": 7.027463651050081e-06, + "loss": 0.9171, "step": 435 }, { - "epoch": 0.18227009113504558, - "grad_norm": 2.9810648069102172, - "learning_rate": 9.79635201282785e-06, - "loss": 0.705, + "epoch": 0.07118589225044492, + "grad_norm": 0.6636402242054834, + "learning_rate": 7.108239095315025e-06, + "loss": 0.9311, "step": 440 }, { - "epoch": 0.18434134217067108, - "grad_norm": 2.650650945302262, - "learning_rate": 9.786011815092193e-06, - "loss": 0.7026, + "epoch": 0.07199482284419997, + "grad_norm": 0.6340851719954015, + "learning_rate": 7.189014539579968e-06, + "loss": 0.9194, "step": 445 }, { - "epoch": 0.1864125932062966, - "grad_norm": 2.7614731795144727, - "learning_rate": 9.775421299410977e-06, - "loss": 0.6993, + "epoch": 0.07280375343795502, + "grad_norm": 0.7041525654790047, + "learning_rate": 7.269789983844911e-06, + "loss": 0.906, "step": 450 }, { - "epoch": 0.18848384424192213, - "grad_norm": 2.8508808311574487, - "learning_rate": 9.764581019689255e-06, - "loss": 0.707, + "epoch": 0.07361268403171008, + "grad_norm": 1.0272938347497549, + "learning_rate": 7.350565428109855e-06, + "loss": 0.9156, "step": 455 }, { - "epoch": 0.19055509527754763, - "grad_norm": 3.1150961713118646, - "learning_rate": 9.753491542895237e-06, - "loss": 0.7186, + "epoch": 0.07442161462546514, + "grad_norm": 0.7023049681523049, + "learning_rate": 7.431340872374798e-06, + "loss": 0.8947, "step": 460 }, { - "epoch": 0.19262634631317316, - "grad_norm": 3.0323106493041325, - "learning_rate": 9.742153449030639e-06, - "loss": 0.6893, + "epoch": 0.0752305452192202, + "grad_norm": 0.6957484415499231, + "learning_rate": 7.512116316639743e-06, + "loss": 0.9088, "step": 465 }, { - "epoch": 0.19469759734879868, - "grad_norm": 3.0499193466413654, - "learning_rate": 9.730567331100333e-06, - "loss": 0.7106, + "epoch": 0.07603947581297525, + "grad_norm": 0.6699201382717078, + "learning_rate": 7.592891760904686e-06, + "loss": 0.9086, "step": 470 }, { - "epoch": 0.19676884838442418, - "grad_norm": 2.8549221082922, - "learning_rate": 9.71873379508136e-06, - "loss": 0.6801, + "epoch": 0.0768484064067303, + "grad_norm": 0.7101796332013423, + "learning_rate": 7.673667205169629e-06, + "loss": 0.9037, "step": 475 }, { - "epoch": 0.1988400994200497, - "grad_norm": 2.8978651489251703, - "learning_rate": 9.706653459891207e-06, - "loss": 0.6906, + "epoch": 0.07765733700048535, + "grad_norm": 0.7497394327378185, + "learning_rate": 7.754442649434572e-06, + "loss": 0.929, "step": 480 }, { - "epoch": 0.20091135045567524, - "grad_norm": 2.7131289500779237, - "learning_rate": 9.694326957355452e-06, - "loss": 0.6793, + "epoch": 0.07846626759424041, + "grad_norm": 0.7422612177167345, + "learning_rate": 7.835218093699516e-06, + "loss": 0.8977, "step": 485 }, { - "epoch": 0.20298260149130073, - "grad_norm": 2.7450048929619033, - "learning_rate": 9.681754932174719e-06, - "loss": 0.6987, + "epoch": 0.07927519818799547, + "grad_norm": 0.7103824932867557, + "learning_rate": 7.91599353796446e-06, + "loss": 0.9151, "step": 490 }, { - "epoch": 0.20505385252692626, - "grad_norm": 2.752895531078258, - "learning_rate": 9.668938041890952e-06, - "loss": 0.693, + "epoch": 0.08008412878175053, + "grad_norm": 0.7605669414288259, + "learning_rate": 7.996768982229403e-06, + "loss": 0.9234, "step": 495 }, { - "epoch": 0.2071251035625518, - "grad_norm": 2.808137119286013, - "learning_rate": 9.655876956853025e-06, - "loss": 0.681, + "epoch": 0.08089305937550559, + "grad_norm": 0.7765227746326537, + "learning_rate": 8.077544426494346e-06, + "loss": 0.9317, "step": 500 }, { - "epoch": 0.2091963545981773, - "grad_norm": 2.724476444787799, - "learning_rate": 9.64257236018169e-06, - "loss": 0.6716, + "epoch": 0.08170198996926063, + "grad_norm": 0.7114224103403521, + "learning_rate": 8.15831987075929e-06, + "loss": 0.8977, "step": 505 }, { - "epoch": 0.2112676056338028, - "grad_norm": 2.748867424602178, - "learning_rate": 9.629024947733836e-06, - "loss": 0.6944, + "epoch": 0.08251092056301569, + "grad_norm": 0.709653797696482, + "learning_rate": 8.239095315024233e-06, + "loss": 0.9048, "step": 510 }, { - "epoch": 0.21333885666942834, - "grad_norm": 2.9490226550436676, - "learning_rate": 9.615235428066106e-06, - "loss": 0.6844, + "epoch": 0.08331985115677075, + "grad_norm": 0.6694669312530299, + "learning_rate": 8.319870759289176e-06, + "loss": 0.9079, "step": 515 }, { - "epoch": 0.21541010770505387, - "grad_norm": 2.6625828983237634, - "learning_rate": 9.601204522397826e-06, - "loss": 0.6876, + "epoch": 0.08412878175052581, + "grad_norm": 0.6918229992578886, + "learning_rate": 8.400646203554121e-06, + "loss": 0.9161, "step": 520 }, { - "epoch": 0.21748135874067936, - "grad_norm": 2.8580713323817672, - "learning_rate": 9.586932964573298e-06, - "loss": 0.6675, + "epoch": 0.08493771234428087, + "grad_norm": 0.826690733304416, + "learning_rate": 8.481421647819064e-06, + "loss": 0.9179, "step": 525 }, { - "epoch": 0.2195526097763049, - "grad_norm": 2.65085890829761, - "learning_rate": 9.572421501023403e-06, - "loss": 0.6871, + "epoch": 0.08574664293803591, + "grad_norm": 0.8502072042848492, + "learning_rate": 8.562197092084007e-06, + "loss": 0.8922, "step": 530 }, { - "epoch": 0.22162386081193042, - "grad_norm": 2.8073591225727066, - "learning_rate": 9.557670890726576e-06, - "loss": 0.6806, + "epoch": 0.08655557353179097, + "grad_norm": 0.7888391565118393, + "learning_rate": 8.64297253634895e-06, + "loss": 0.9087, "step": 535 }, { - "epoch": 0.22369511184755592, - "grad_norm": 2.890518621456693, - "learning_rate": 9.5426819051691e-06, - "loss": 0.6842, + "epoch": 0.08736450412554603, + "grad_norm": 0.7612155866818295, + "learning_rate": 8.723747980613893e-06, + "loss": 0.9067, "step": 540 }, { - "epoch": 0.22576636288318144, - "grad_norm": 3.037365740542274, - "learning_rate": 9.527455328304756e-06, - "loss": 0.6706, + "epoch": 0.08817343471930109, + "grad_norm": 0.7340496045333647, + "learning_rate": 8.804523424878838e-06, + "loss": 0.9122, "step": 545 }, { - "epoch": 0.22783761391880697, - "grad_norm": 3.007466890807781, - "learning_rate": 9.511991956513828e-06, - "loss": 0.6768, + "epoch": 0.08898236531305614, + "grad_norm": 0.6807951345118415, + "learning_rate": 8.88529886914378e-06, + "loss": 0.9209, "step": 550 }, { - "epoch": 0.22990886495443247, - "grad_norm": 2.991724717996849, - "learning_rate": 9.496292598561445e-06, - "loss": 0.6399, + "epoch": 0.08979129590681119, + "grad_norm": 0.7182636447883889, + "learning_rate": 8.966074313408725e-06, + "loss": 0.9036, "step": 555 }, { - "epoch": 0.231980115990058, - "grad_norm": 2.6643291004806144, - "learning_rate": 9.480358075555278e-06, - "loss": 0.6745, + "epoch": 0.09060022650056625, + "grad_norm": 0.6906380745424776, + "learning_rate": 9.046849757673668e-06, + "loss": 0.8775, "step": 560 }, { - "epoch": 0.23405136702568352, - "grad_norm": 2.7494300595409076, - "learning_rate": 9.464189220902603e-06, - "loss": 0.6659, + "epoch": 0.0914091570943213, + "grad_norm": 0.7205121865769839, + "learning_rate": 9.127625201938612e-06, + "loss": 0.8799, "step": 565 }, { - "epoch": 0.23612261806130902, - "grad_norm": 2.778050795482066, - "learning_rate": 9.447786880266706e-06, - "loss": 0.6682, + "epoch": 0.09221808768807636, + "grad_norm": 0.699683431919587, + "learning_rate": 9.208400646203555e-06, + "loss": 0.8776, "step": 570 }, { - "epoch": 0.23819386909693455, - "grad_norm": 2.8808954872434107, - "learning_rate": 9.431151911522656e-06, - "loss": 0.6603, + "epoch": 0.09302701828183142, + "grad_norm": 0.7092973559328101, + "learning_rate": 9.289176090468498e-06, + "loss": 0.8976, "step": 575 }, { - "epoch": 0.24026512013256007, - "grad_norm": 2.7631310193416008, - "learning_rate": 9.414285184712432e-06, - "loss": 0.6479, + "epoch": 0.09383594887558648, + "grad_norm": 0.7013577925584726, + "learning_rate": 9.36995153473344e-06, + "loss": 0.8915, "step": 580 }, { - "epoch": 0.24233637116818557, - "grad_norm": 2.7504091304269505, - "learning_rate": 9.397187581999424e-06, - "loss": 0.656, + "epoch": 0.09464487946934153, + "grad_norm": 0.7360966395662988, + "learning_rate": 9.450726978998385e-06, + "loss": 0.9049, "step": 585 }, { - "epoch": 0.2444076222038111, - "grad_norm": 2.7704909990609345, - "learning_rate": 9.37985999762229e-06, - "loss": 0.6494, + "epoch": 0.09545381006309658, + "grad_norm": 0.7793159637124253, + "learning_rate": 9.531502423263328e-06, + "loss": 0.8924, "step": 590 }, { - "epoch": 0.24647887323943662, - "grad_norm": 2.7880341771333086, - "learning_rate": 9.362303337848188e-06, - "loss": 0.6498, + "epoch": 0.09626274065685164, + "grad_norm": 0.7373798826049844, + "learning_rate": 9.612277867528273e-06, + "loss": 0.9125, "step": 595 }, { - "epoch": 0.24855012427506215, - "grad_norm": 2.7372277502845064, - "learning_rate": 9.344518520925377e-06, - "loss": 0.635, + "epoch": 0.0970716712506067, + "grad_norm": 0.7155304623776628, + "learning_rate": 9.693053311793216e-06, + "loss": 0.8922, "step": 600 }, { - "epoch": 0.2506213753106877, - "grad_norm": 2.700220180234652, - "learning_rate": 9.326506477035179e-06, - "loss": 0.6205, + "epoch": 0.09788060184436176, + "grad_norm": 0.722457498504299, + "learning_rate": 9.773828756058159e-06, + "loss": 0.9133, "step": 605 }, { - "epoch": 0.2526926263463132, - "grad_norm": 2.9446333047323714, - "learning_rate": 9.308268148243355e-06, - "loss": 0.6377, + "epoch": 0.0986895324381168, + "grad_norm": 0.7006683969526154, + "learning_rate": 9.854604200323102e-06, + "loss": 0.8794, "step": 610 }, { - "epoch": 0.2547638773819387, - "grad_norm": 2.851475449493436, - "learning_rate": 9.289804488450805e-06, - "loss": 0.6395, + "epoch": 0.09949846303187186, + "grad_norm": 0.7854584772109673, + "learning_rate": 9.935379644588045e-06, + "loss": 0.9029, "step": 615 }, { - "epoch": 0.25683512841756423, - "grad_norm": 2.868033711564291, - "learning_rate": 9.271116463343692e-06, - "loss": 0.6421, + "epoch": 0.10030739362562692, + "grad_norm": 0.7970045944993353, + "learning_rate": 9.999999202413539e-06, + "loss": 0.9009, "step": 620 }, { - "epoch": 0.25890637945318973, - "grad_norm": 2.6514436144580307, - "learning_rate": 9.25220505034293e-06, - "loss": 0.6372, + "epoch": 0.10111632421938198, + "grad_norm": 0.8001952905335415, + "learning_rate": 9.999971286914108e-06, + "loss": 0.8689, "step": 625 }, { - "epoch": 0.2609776304888152, - "grad_norm": 2.9338066764643598, - "learning_rate": 9.23307123855307e-06, - "loss": 0.636, + "epoch": 0.10192525481313704, + "grad_norm": 0.7947330109524953, + "learning_rate": 9.999903492346063e-06, + "loss": 0.8836, "step": 630 }, { - "epoch": 0.2630488815244408, - "grad_norm": 3.023298105516956, - "learning_rate": 9.213716028710558e-06, - "loss": 0.6103, + "epoch": 0.1027341854068921, + "grad_norm": 0.6517743774036119, + "learning_rate": 9.999795819250126e-06, + "loss": 0.8937, "step": 635 }, { - "epoch": 0.2651201325600663, - "grad_norm": 2.871190920425405, - "learning_rate": 9.194140433131397e-06, - "loss": 0.6159, + "epoch": 0.10354311600064714, + "grad_norm": 0.7153316955972873, + "learning_rate": 9.99964826848508e-06, + "loss": 0.8952, "step": 640 }, { - "epoch": 0.2671913835956918, - "grad_norm": 2.7631786024911187, - "learning_rate": 9.174345475658208e-06, - "loss": 0.6228, + "epoch": 0.1043520465944022, + "grad_norm": 0.7470689793235693, + "learning_rate": 9.99946084122777e-06, + "loss": 0.905, "step": 645 }, { - "epoch": 0.26926263463131733, - "grad_norm": 2.998454912972155, - "learning_rate": 9.154332191606671e-06, - "loss": 0.6335, + "epoch": 0.10516097718815726, + "grad_norm": 0.8088466120200852, + "learning_rate": 9.99923353897309e-06, + "loss": 0.8982, "step": 650 }, { - "epoch": 0.27133388566694283, - "grad_norm": 2.803386942098811, - "learning_rate": 9.134101627711384e-06, - "loss": 0.6119, + "epoch": 0.10596990778191231, + "grad_norm": 0.6912247019886972, + "learning_rate": 9.998966363533972e-06, + "loss": 0.8895, "step": 655 }, { - "epoch": 0.27340513670256833, - "grad_norm": 2.6219329149002277, - "learning_rate": 9.113654842071114e-06, - "loss": 0.6171, + "epoch": 0.10677883837566737, + "grad_norm": 0.7942586835594388, + "learning_rate": 9.998659317041367e-06, + "loss": 0.8913, "step": 660 }, { - "epoch": 0.2754763877381939, - "grad_norm": 2.61515633927928, - "learning_rate": 9.092992904093451e-06, - "loss": 0.6165, + "epoch": 0.10758776896942242, + "grad_norm": 0.7841096349195548, + "learning_rate": 9.998312401944237e-06, + "loss": 0.8774, "step": 665 }, { - "epoch": 0.2775476387738194, - "grad_norm": 2.8142275849395473, - "learning_rate": 9.072116894438885e-06, - "loss": 0.6091, + "epoch": 0.10839669956317748, + "grad_norm": 0.7485526011092998, + "learning_rate": 9.997925621009527e-06, + "loss": 0.8955, "step": 670 }, { - "epoch": 0.2796188898094449, - "grad_norm": 2.8036261300008856, - "learning_rate": 9.051027904964279e-06, - "loss": 0.6266, + "epoch": 0.10920563015693253, + "grad_norm": 0.7271239921076256, + "learning_rate": 9.997498977322146e-06, + "loss": 0.8871, "step": 675 }, { - "epoch": 0.28169014084507044, - "grad_norm": 2.5728545238650122, - "learning_rate": 9.029727038665765e-06, - "loss": 0.625, + "epoch": 0.11001456075068759, + "grad_norm": 0.7148944860864371, + "learning_rate": 9.997032474284949e-06, + "loss": 0.9008, "step": 680 }, { - "epoch": 0.28376139188069593, - "grad_norm": 2.911382813259161, - "learning_rate": 9.008215409621053e-06, - "loss": 0.6023, + "epoch": 0.11082349134444265, + "grad_norm": 0.7486275146950349, + "learning_rate": 9.996526115618694e-06, + "loss": 0.8786, "step": 685 }, { - "epoch": 0.28583264291632143, - "grad_norm": 2.8794319732821956, - "learning_rate": 8.986494142931168e-06, - "loss": 0.6113, + "epoch": 0.11163242193819771, + "grad_norm": 0.7878464787195636, + "learning_rate": 9.995979905362028e-06, + "loss": 0.8805, "step": 690 }, { - "epoch": 0.287903893951947, - "grad_norm": 2.6994578026382956, - "learning_rate": 8.964564374661597e-06, - "loss": 0.6083, + "epoch": 0.11244135253195275, + "grad_norm": 0.7844738669203999, + "learning_rate": 9.995393847871446e-06, + "loss": 0.8768, "step": 695 }, { - "epoch": 0.2899751449875725, - "grad_norm": 2.901823826406794, - "learning_rate": 8.94242725178288e-06, - "loss": 0.616, + "epoch": 0.11325028312570781, + "grad_norm": 0.7008587939833357, + "learning_rate": 9.994767947821261e-06, + "loss": 0.8848, "step": 700 }, { - "epoch": 0.292046396023198, - "grad_norm": 2.653196617958314, - "learning_rate": 8.920083932110608e-06, - "loss": 0.6196, + "epoch": 0.11405921371946287, + "grad_norm": 0.7176705233353148, + "learning_rate": 9.994102210203567e-06, + "loss": 0.9161, "step": 705 }, { - "epoch": 0.29411764705882354, - "grad_norm": 2.6749585402475264, - "learning_rate": 8.89753558424488e-06, - "loss": 0.597, + "epoch": 0.11486814431321793, + "grad_norm": 0.7984767869907693, + "learning_rate": 9.993396640328191e-06, + "loss": 0.8984, "step": 710 }, { - "epoch": 0.29618889809444904, - "grad_norm": 2.7369834176016723, - "learning_rate": 8.874783387509181e-06, - "loss": 0.6012, + "epoch": 0.11567707490697299, + "grad_norm": 0.7660329081563221, + "learning_rate": 9.992651243822658e-06, + "loss": 0.8918, "step": 715 }, { - "epoch": 0.2982601491300746, - "grad_norm": 2.845681780523314, - "learning_rate": 8.851828531888692e-06, - "loss": 0.6223, + "epoch": 0.11648600550072803, + "grad_norm": 0.7692169317757526, + "learning_rate": 9.991866026632146e-06, + "loss": 0.8845, "step": 720 }, { - "epoch": 0.3003314001657001, - "grad_norm": 2.7849231768312883, - "learning_rate": 8.828672217968055e-06, - "loss": 0.6192, + "epoch": 0.11729493609448309, + "grad_norm": 0.7728932225344195, + "learning_rate": 9.991040995019441e-06, + "loss": 0.8944, "step": 725 }, { - "epoch": 0.3024026512013256, - "grad_norm": 2.7476355382279385, - "learning_rate": 8.805315656868587e-06, - "loss": 0.6053, + "epoch": 0.11810386668823815, + "grad_norm": 0.7481133899054797, + "learning_rate": 9.990176155564874e-06, + "loss": 0.8782, "step": 730 }, { - "epoch": 0.30447390223695114, - "grad_norm": 2.686604793998387, - "learning_rate": 8.781760070184933e-06, - "loss": 0.5934, + "epoch": 0.11891279728199321, + "grad_norm": 0.7221002980302099, + "learning_rate": 9.989271515166287e-06, + "loss": 0.8762, "step": 735 }, { - "epoch": 0.30654515327257664, - "grad_norm": 2.997703713470155, - "learning_rate": 8.75800668992117e-06, - "loss": 0.5956, + "epoch": 0.11972172787574827, + "grad_norm": 0.7251326764742602, + "learning_rate": 9.988327081038962e-06, + "loss": 0.865, "step": 740 }, { - "epoch": 0.30861640430820214, - "grad_norm": 2.621004060213254, - "learning_rate": 8.734056758426367e-06, - "loss": 0.5974, + "epoch": 0.12053065846950331, + "grad_norm": 0.70263798156615, + "learning_rate": 9.987342860715575e-06, + "loss": 0.8885, "step": 745 }, { - "epoch": 0.3106876553438277, - "grad_norm": 2.6549364932267303, - "learning_rate": 8.709911528329623e-06, - "loss": 0.5948, + "epoch": 0.12133958906325837, + "grad_norm": 0.7527476269587685, + "learning_rate": 9.986318862046129e-06, + "loss": 0.9032, "step": 750 }, { - "epoch": 0.3127589063794532, - "grad_norm": 2.7965881278503435, - "learning_rate": 8.685572262474538e-06, - "loss": 0.5967, + "epoch": 0.12214851965701343, + "grad_norm": 0.7398355773627006, + "learning_rate": 9.98525509319789e-06, + "loss": 0.8788, "step": 755 }, { - "epoch": 0.3148301574150787, - "grad_norm": 2.5906530552932185, - "learning_rate": 8.661040233853166e-06, - "loss": 0.5824, + "epoch": 0.12295745025076849, + "grad_norm": 0.7459365223897065, + "learning_rate": 9.984151562655333e-06, + "loss": 0.8754, "step": 760 }, { - "epoch": 0.31690140845070425, - "grad_norm": 2.96203173026875, - "learning_rate": 8.636316725539445e-06, - "loss": 0.5833, + "epoch": 0.12376638084452354, + "grad_norm": 0.7806602396452277, + "learning_rate": 9.983008279220062e-06, + "loss": 0.8716, "step": 765 }, { - "epoch": 0.31897265948632975, - "grad_norm": 2.6456839069829967, - "learning_rate": 8.611403030622074e-06, - "loss": 0.5913, + "epoch": 0.1245753114382786, + "grad_norm": 0.7479182177455358, + "learning_rate": 9.981825252010743e-06, + "loss": 0.8849, "step": 770 }, { - "epoch": 0.32104391052195524, - "grad_norm": 2.552077771616609, - "learning_rate": 8.586300452136895e-06, - "loss": 0.5604, + "epoch": 0.12538424203203366, + "grad_norm": 0.7644325690144114, + "learning_rate": 9.980602490463037e-06, + "loss": 0.8742, "step": 775 }, { - "epoch": 0.3231151615575808, - "grad_norm": 2.681514794026766, - "learning_rate": 8.561010302998734e-06, - "loss": 0.5878, + "epoch": 0.12619317262578872, + "grad_norm": 0.8269622902407507, + "learning_rate": 9.979340004329516e-06, + "loss": 0.8747, "step": 780 }, { - "epoch": 0.3251864125932063, - "grad_norm": 2.810277058352099, - "learning_rate": 8.535533905932739e-06, - "loss": 0.5904, + "epoch": 0.12700210321954375, + "grad_norm": 0.709710236101346, + "learning_rate": 9.978037803679595e-06, + "loss": 0.8929, "step": 785 }, { - "epoch": 0.3272576636288318, - "grad_norm": 2.823366500468815, - "learning_rate": 8.509872593405189e-06, - "loss": 0.5785, + "epoch": 0.1278110338132988, + "grad_norm": 0.7487057568451865, + "learning_rate": 9.97669589889944e-06, + "loss": 0.8944, "step": 790 }, { - "epoch": 0.32932891466445735, - "grad_norm": 2.537302794266039, - "learning_rate": 8.484027707553818e-06, - "loss": 0.5749, + "epoch": 0.12861996440705387, + "grad_norm": 0.8332024529186928, + "learning_rate": 9.975314300691898e-06, + "loss": 0.8803, "step": 795 }, { - "epoch": 0.33140016570008285, - "grad_norm": 2.679914960811432, - "learning_rate": 8.458000600117604e-06, - "loss": 0.5758, + "epoch": 0.12942889500080892, + "grad_norm": 0.7617229913207306, + "learning_rate": 9.973893020076402e-06, + "loss": 0.8764, "step": 800 }, { - "epoch": 0.33347141673570835, - "grad_norm": 2.714722412588362, - "learning_rate": 8.43179263236608e-06, - "loss": 0.5703, + "epoch": 0.13023782559456398, + "grad_norm": 0.7990666084729844, + "learning_rate": 9.972432068388885e-06, + "loss": 0.8841, "step": 805 }, { - "epoch": 0.3355426677713339, - "grad_norm": 2.8825027456443593, - "learning_rate": 8.40540517502813e-06, - "loss": 0.5741, + "epoch": 0.13104675618831904, + "grad_norm": 0.8144767375507436, + "learning_rate": 9.970931457281693e-06, + "loss": 0.8751, "step": 810 }, { - "epoch": 0.3376139188069594, - "grad_norm": 2.69779454090861, - "learning_rate": 8.378839608220304e-06, - "loss": 0.5758, + "epoch": 0.1318556867820741, + "grad_norm": 0.8471484705899229, + "learning_rate": 9.96939119872349e-06, + "loss": 0.878, "step": 815 }, { - "epoch": 0.3396851698425849, - "grad_norm": 2.591195744363521, - "learning_rate": 8.35209732137463e-06, - "loss": 0.5703, + "epoch": 0.13266461737582916, + "grad_norm": 0.7419857259389397, + "learning_rate": 9.96781130499916e-06, + "loss": 0.8593, "step": 820 }, { - "epoch": 0.34175642087821045, - "grad_norm": 2.5928484463176056, - "learning_rate": 8.32517971316595e-06, - "loss": 0.5789, + "epoch": 0.13347354796958422, + "grad_norm": 0.6966615337982099, + "learning_rate": 9.966191788709716e-06, + "loss": 0.8754, "step": 825 }, { - "epoch": 0.34382767191383595, - "grad_norm": 2.8392376730929962, - "learning_rate": 8.298088191438753e-06, - "loss": 0.5573, + "epoch": 0.13428247856333927, + "grad_norm": 0.7552286075445729, + "learning_rate": 9.96453266277219e-06, + "loss": 0.8729, "step": 830 }, { - "epoch": 0.34589892294946145, - "grad_norm": 2.717204690264741, - "learning_rate": 8.270824173133563e-06, - "loss": 0.5553, + "epoch": 0.13509140915709433, + "grad_norm": 0.8299494943538954, + "learning_rate": 9.96283394041954e-06, + "loss": 0.881, "step": 835 }, { - "epoch": 0.347970173985087, - "grad_norm": 2.6426626328945124, - "learning_rate": 8.243389084212808e-06, - "loss": 0.5503, + "epoch": 0.13590033975084936, + "grad_norm": 0.734889508057368, + "learning_rate": 9.961095635200536e-06, + "loss": 0.8642, "step": 840 }, { - "epoch": 0.3500414250207125, - "grad_norm": 2.6361988946032806, - "learning_rate": 8.215784359586257e-06, - "loss": 0.5498, + "epoch": 0.13670927034460442, + "grad_norm": 0.7083874584173164, + "learning_rate": 9.959317760979654e-06, + "loss": 0.87, "step": 845 }, { - "epoch": 0.352112676056338, - "grad_norm": 2.452756091881905, - "learning_rate": 8.188011443035962e-06, - "loss": 0.538, + "epoch": 0.13751820093835948, + "grad_norm": 0.6795303692054026, + "learning_rate": 9.957500331936971e-06, + "loss": 0.8916, "step": 850 }, { - "epoch": 0.35418392709196356, - "grad_norm": 2.740977671213533, - "learning_rate": 8.160071787140742e-06, - "loss": 0.5452, + "epoch": 0.13832713153211454, + "grad_norm": 0.7711107263091647, + "learning_rate": 9.955643362568048e-06, + "loss": 0.8867, "step": 855 }, { - "epoch": 0.35625517812758906, - "grad_norm": 2.650236162364244, - "learning_rate": 8.131966853200226e-06, - "loss": 0.5485, + "epoch": 0.1391360621258696, + "grad_norm": 0.7822464459121171, + "learning_rate": 9.953746867683807e-06, + "loss": 0.875, "step": 860 }, { - "epoch": 0.35832642916321455, - "grad_norm": 2.701846208365303, - "learning_rate": 8.103698111158405e-06, - "loss": 0.5495, + "epoch": 0.13994499271962466, + "grad_norm": 0.8269771768105255, + "learning_rate": 9.951810862410426e-06, + "loss": 0.8762, "step": 865 }, { - "epoch": 0.3603976801988401, - "grad_norm": 2.7179474541169104, - "learning_rate": 8.075267039526764e-06, - "loss": 0.548, + "epoch": 0.14075392331337971, + "grad_norm": 0.766471476016186, + "learning_rate": 9.949835362189215e-06, + "loss": 0.8635, "step": 870 }, { - "epoch": 0.3624689312344656, - "grad_norm": 2.833260463644198, - "learning_rate": 8.046675125306948e-06, - "loss": 0.5632, + "epoch": 0.14156285390713477, + "grad_norm": 0.8196823476460824, + "learning_rate": 9.947820382776482e-06, + "loss": 0.8933, "step": 875 }, { - "epoch": 0.36454018227009116, - "grad_norm": 2.7066133568619724, - "learning_rate": 8.017923863912989e-06, - "loss": 0.5475, + "epoch": 0.14237178450088983, + "grad_norm": 0.7502232673514019, + "learning_rate": 9.945765940243422e-06, + "loss": 0.9162, "step": 880 }, { - "epoch": 0.36661143330571666, - "grad_norm": 2.554105810407388, - "learning_rate": 7.989014759093095e-06, - "loss": 0.5455, + "epoch": 0.1431807150946449, + "grad_norm": 0.8243799834314466, + "learning_rate": 9.943672050975979e-06, + "loss": 0.8973, "step": 885 }, { - "epoch": 0.36868268434134216, - "grad_norm": 2.530083794168148, - "learning_rate": 7.959949322850994e-06, - "loss": 0.5397, + "epoch": 0.14398964568839995, + "grad_norm": 0.7327053926662375, + "learning_rate": 9.94153873167472e-06, + "loss": 0.8754, "step": 890 }, { - "epoch": 0.3707539353769677, - "grad_norm": 2.6016133268460937, - "learning_rate": 7.930729075366867e-06, - "loss": 0.542, + "epoch": 0.14479857628215498, + "grad_norm": 0.7710933741203212, + "learning_rate": 9.9393659993547e-06, + "loss": 0.8989, "step": 895 }, { - "epoch": 0.3728251864125932, - "grad_norm": 2.5754320630229754, - "learning_rate": 7.901355544917827e-06, - "loss": 0.5362, + "epoch": 0.14560750687591004, + "grad_norm": 0.6969476376928585, + "learning_rate": 9.937153871345326e-06, + "loss": 0.8851, "step": 900 }, { - "epoch": 0.3748964374482187, - "grad_norm": 2.576938951435503, - "learning_rate": 7.87183026779799e-06, - "loss": 0.5414, + "epoch": 0.1464164374696651, + "grad_norm": 0.7537226761385668, + "learning_rate": 9.934902365290222e-06, + "loss": 0.8678, "step": 905 }, { - "epoch": 0.37696768848384427, - "grad_norm": 2.6785725784997796, - "learning_rate": 7.842154788238124e-06, - "loss": 0.5512, + "epoch": 0.14722536806342015, + "grad_norm": 0.7880392328433623, + "learning_rate": 9.932611499147082e-06, + "loss": 0.8785, "step": 910 }, { - "epoch": 0.37903893951946976, - "grad_norm": 2.6795394480092414, - "learning_rate": 7.812330658324884e-06, - "loss": 0.5375, + "epoch": 0.1480342986571752, + "grad_norm": 0.7979672909632645, + "learning_rate": 9.930281291187534e-06, + "loss": 0.9031, "step": 915 }, { - "epoch": 0.38111019055509526, - "grad_norm": 2.672382602041246, - "learning_rate": 7.782359437919644e-06, - "loss": 0.5457, + "epoch": 0.14884322925093027, + "grad_norm": 0.769227717875029, + "learning_rate": 9.927911759996989e-06, + "loss": 0.8752, "step": 920 }, { - "epoch": 0.3831814415907208, - "grad_norm": 2.884729781500387, - "learning_rate": 7.75224269457689e-06, - "loss": 0.5239, + "epoch": 0.14965215984468533, + "grad_norm": 0.7501410474884525, + "learning_rate": 9.925502924474495e-06, + "loss": 0.8818, "step": 925 }, { - "epoch": 0.3852526926263463, - "grad_norm": 2.56507336027056, - "learning_rate": 7.721982003462255e-06, - "loss": 0.5405, + "epoch": 0.1504610904384404, + "grad_norm": 0.7900709039002747, + "learning_rate": 9.923054803832585e-06, + "loss": 0.8549, "step": 930 }, { - "epoch": 0.3873239436619718, - "grad_norm": 2.5473419393915, - "learning_rate": 7.691578947270122e-06, - "loss": 0.5438, + "epoch": 0.15127002103219545, + "grad_norm": 0.727528278106019, + "learning_rate": 9.920567417597127e-06, + "loss": 0.869, "step": 935 }, { - "epoch": 0.38939519469759737, - "grad_norm": 2.783657224970263, - "learning_rate": 7.661035116140856e-06, - "loss": 0.5198, + "epoch": 0.1520789516259505, + "grad_norm": 0.7478261446374327, + "learning_rate": 9.918040785607163e-06, + "loss": 0.8803, "step": 940 }, { - "epoch": 0.39146644573322287, - "grad_norm": 2.6274430429540927, - "learning_rate": 7.63035210757763e-06, - "loss": 0.5535, + "epoch": 0.15288788221970556, + "grad_norm": 0.7798141297679039, + "learning_rate": 9.915474928014754e-06, + "loss": 0.8601, "step": 945 }, { - "epoch": 0.39353769676884837, - "grad_norm": 2.7442552412406958, - "learning_rate": 7.599531526362873e-06, - "loss": 0.5304, + "epoch": 0.1536968128134606, + "grad_norm": 0.743300400063168, + "learning_rate": 9.912869865284821e-06, + "loss": 0.873, "step": 950 }, { - "epoch": 0.3956089478044739, - "grad_norm": 2.7785345881187786, - "learning_rate": 7.568574984474335e-06, - "loss": 0.5225, + "epoch": 0.15450574340721565, + "grad_norm": 0.7855652729779465, + "learning_rate": 9.91022561819498e-06, + "loss": 0.8556, "step": 955 }, { - "epoch": 0.3976801988400994, - "grad_norm": 2.720728749217881, - "learning_rate": 7.537484101000787e-06, - "loss": 0.5217, + "epoch": 0.1553146740009707, + "grad_norm": 0.7512935005312259, + "learning_rate": 9.90754220783537e-06, + "loss": 0.8761, "step": 960 }, { - "epoch": 0.3997514498757249, - "grad_norm": 2.637160358823368, - "learning_rate": 7.506260502057325e-06, - "loss": 0.5297, + "epoch": 0.15612360459472577, + "grad_norm": 0.7480633701848225, + "learning_rate": 9.9048196556085e-06, + "loss": 0.884, "step": 965 }, { - "epoch": 0.40182270091135047, - "grad_norm": 2.6335954311355216, - "learning_rate": 7.474905820700334e-06, - "loss": 0.5291, + "epoch": 0.15693253518848083, + "grad_norm": 0.7649617436679113, + "learning_rate": 9.902057983229059e-06, + "loss": 0.8558, "step": 970 }, { - "epoch": 0.40389395194697597, - "grad_norm": 2.656261545637842, - "learning_rate": 7.443421696842066e-06, - "loss": 0.5076, + "epoch": 0.15774146578223588, + "grad_norm": 0.7423506733665634, + "learning_rate": 9.89925721272376e-06, + "loss": 0.8773, "step": 975 }, { - "epoch": 0.40596520298260147, - "grad_norm": 2.6508585415601362, - "learning_rate": 7.411809777164873e-06, - "loss": 0.5183, + "epoch": 0.15855039637599094, + "grad_norm": 0.7931165102529991, + "learning_rate": 9.89641736643116e-06, + "loss": 0.8846, "step": 980 }, { - "epoch": 0.408036454018227, - "grad_norm": 2.665237164150542, - "learning_rate": 7.380071715035089e-06, - "loss": 0.5241, + "epoch": 0.159359326969746, + "grad_norm": 0.7550133131247714, + "learning_rate": 9.893538467001466e-06, + "loss": 0.8727, "step": 985 }, { - "epoch": 0.4101077050538525, - "grad_norm": 2.5770691509075565, - "learning_rate": 7.3482091704165405e-06, - "loss": 0.5164, + "epoch": 0.16016825756350106, + "grad_norm": 0.773118150891364, + "learning_rate": 9.89062053739638e-06, + "loss": 0.8808, "step": 990 }, { - "epoch": 0.412178956089478, - "grad_norm": 2.5586140708653584, - "learning_rate": 7.316223809783745e-06, - "loss": 0.4982, + "epoch": 0.16097718815725612, + "grad_norm": 0.75632598887103, + "learning_rate": 9.887663600888897e-06, + "loss": 0.8713, "step": 995 }, { - "epoch": 0.4142502071251036, - "grad_norm": 2.6304400469333697, - "learning_rate": 7.284117306034733e-06, - "loss": 0.514, + "epoch": 0.16178611875101118, + "grad_norm": 0.8316783703999336, + "learning_rate": 9.88466768106313e-06, + "loss": 0.8571, "step": 1000 }, { - "epoch": 0.4163214581607291, - "grad_norm": 2.5369318681193667, - "learning_rate": 7.2518913384035685e-06, - "loss": 0.5119, + "epoch": 0.1625950493447662, + "grad_norm": 0.7454307910719469, + "learning_rate": 9.881632801814112e-06, + "loss": 0.8602, "step": 1005 }, { - "epoch": 0.4183927091963546, - "grad_norm": 2.7061992404592683, - "learning_rate": 7.219547592372512e-06, - "loss": 0.5175, + "epoch": 0.16340397993852127, + "grad_norm": 0.7564730753355293, + "learning_rate": 9.878558987347613e-06, + "loss": 0.8694, "step": 1010 }, { - "epoch": 0.4204639602319801, - "grad_norm": 2.420325437701678, - "learning_rate": 7.187087759583869e-06, - "loss": 0.5063, + "epoch": 0.16421291053227632, + "grad_norm": 0.7924034403437432, + "learning_rate": 9.875446262179948e-06, + "loss": 0.8695, "step": 1015 }, { - "epoch": 0.4225352112676056, - "grad_norm": 2.7148650041029514, - "learning_rate": 7.15451353775151e-06, - "loss": 0.5237, + "epoch": 0.16502184112603138, + "grad_norm": 0.809312814940399, + "learning_rate": 9.872294651137773e-06, + "loss": 0.8639, "step": 1020 }, { - "epoch": 0.4246064623032311, - "grad_norm": 2.654478692235913, - "learning_rate": 7.121826630572084e-06, - "loss": 0.4966, + "epoch": 0.16583077171978644, + "grad_norm": 0.8496318363889708, + "learning_rate": 9.869104179357898e-06, + "loss": 0.8656, "step": 1025 }, { - "epoch": 0.4266777133388567, - "grad_norm": 2.6379021813183354, - "learning_rate": 7.089028747635908e-06, - "loss": 0.4938, + "epoch": 0.1666397023135415, + "grad_norm": 0.8463627183621499, + "learning_rate": 9.865874872287076e-06, + "loss": 0.8721, "step": 1030 }, { - "epoch": 0.4287489643744822, - "grad_norm": 2.713654107850754, - "learning_rate": 7.056121604337554e-06, - "loss": 0.5074, + "epoch": 0.16744863290729656, + "grad_norm": 0.7538421574954219, + "learning_rate": 9.862606755681805e-06, + "loss": 0.887, "step": 1035 }, { - "epoch": 0.43082021541010773, - "grad_norm": 2.540085709590772, - "learning_rate": 7.023106921786118e-06, - "loss": 0.4995, + "epoch": 0.16825756350105162, + "grad_norm": 0.8015977734669262, + "learning_rate": 9.859299855608127e-06, + "loss": 0.8652, "step": 1040 }, { - "epoch": 0.43289146644573323, - "grad_norm": 2.5985604745407374, - "learning_rate": 6.9899864267152275e-06, - "loss": 0.5318, + "epoch": 0.16906649409480667, + "grad_norm": 0.7608756372365878, + "learning_rate": 9.855954198441411e-06, + "loss": 0.8882, "step": 1045 }, { - "epoch": 0.43496271748135873, - "grad_norm": 2.4411457867844417, - "learning_rate": 6.956761851392706e-06, - "loss": 0.4858, + "epoch": 0.16987542468856173, + "grad_norm": 0.8188556394828449, + "learning_rate": 9.852569810866148e-06, + "loss": 0.8694, "step": 1050 }, { - "epoch": 0.4370339685169843, - "grad_norm": 2.4714643955598072, - "learning_rate": 6.9234349335299835e-06, - "loss": 0.5171, + "epoch": 0.1706843552823168, + "grad_norm": 0.805253431195849, + "learning_rate": 9.849146719875737e-06, + "loss": 0.8754, "step": 1055 }, { - "epoch": 0.4391052195526098, - "grad_norm": 2.4510153141350868, - "learning_rate": 6.890007416191209e-06, - "loss": 0.4875, + "epoch": 0.17149328587607182, + "grad_norm": 0.7590085927711259, + "learning_rate": 9.845684952772274e-06, + "loss": 0.8741, "step": 1060 }, { - "epoch": 0.4411764705882353, - "grad_norm": 2.502867427304289, - "learning_rate": 6.8564810477020835e-06, - "loss": 0.4982, + "epoch": 0.17230221646982688, + "grad_norm": 0.7493004472784411, + "learning_rate": 9.842184537166326e-06, + "loss": 0.8784, "step": 1065 }, { - "epoch": 0.44324772162386084, - "grad_norm": 2.5224619340497183, - "learning_rate": 6.822857581558423e-06, - "loss": 0.4971, + "epoch": 0.17311114706358194, + "grad_norm": 0.8005839214503973, + "learning_rate": 9.838645500976716e-06, + "loss": 0.8668, "step": 1070 }, { - "epoch": 0.44531897265948633, - "grad_norm": 2.622904137029192, - "learning_rate": 6.789138776334441e-06, - "loss": 0.5063, + "epoch": 0.173920077657337, + "grad_norm": 0.7331632983706363, + "learning_rate": 9.835067872430297e-06, + "loss": 0.8577, "step": 1075 }, { - "epoch": 0.44739022369511183, - "grad_norm": 2.4481597241493174, - "learning_rate": 6.7553263955907755e-06, - "loss": 0.4841, + "epoch": 0.17472900825109206, + "grad_norm": 0.8212489518002695, + "learning_rate": 9.831451680061735e-06, + "loss": 0.8503, "step": 1080 }, { - "epoch": 0.4494614747307374, - "grad_norm": 2.505241922300399, - "learning_rate": 6.721422207782249e-06, - "loss": 0.486, + "epoch": 0.1755379388448471, + "grad_norm": 0.7423733165945756, + "learning_rate": 9.82779695271327e-06, + "loss": 0.849, "step": 1085 }, { - "epoch": 0.4515327257663629, - "grad_norm": 2.51038092005137, - "learning_rate": 6.687427986165379e-06, - "loss": 0.4866, + "epoch": 0.17634686943860217, + "grad_norm": 0.8436334149956825, + "learning_rate": 9.824103719534497e-06, + "loss": 0.8739, "step": 1090 }, { - "epoch": 0.4536039768019884, - "grad_norm": 2.618228992641671, - "learning_rate": 6.653345508705629e-06, - "loss": 0.4889, + "epoch": 0.17715580003235723, + "grad_norm": 0.8371560523056507, + "learning_rate": 9.820372009982122e-06, + "loss": 0.8885, "step": 1095 }, { - "epoch": 0.45567522783761394, - "grad_norm": 2.6041268512577873, - "learning_rate": 6.6191765579844205e-06, - "loss": 0.4957, + "epoch": 0.1779647306261123, + "grad_norm": 0.8549576764605578, + "learning_rate": 9.816601853819739e-06, + "loss": 0.8905, "step": 1100 }, { - "epoch": 0.45774647887323944, - "grad_norm": 2.5703382132992414, - "learning_rate": 6.584922921105894e-06, - "loss": 0.4805, + "epoch": 0.17877366121986735, + "grad_norm": 0.8302136713634986, + "learning_rate": 9.81279328111758e-06, + "loss": 0.863, "step": 1105 }, { - "epoch": 0.45981772990886494, - "grad_norm": 2.4810077168179894, - "learning_rate": 6.550586389603451e-06, - "loss": 0.4843, + "epoch": 0.17958259181362238, + "grad_norm": 0.7692548326236497, + "learning_rate": 9.80894632225229e-06, + "loss": 0.8942, "step": 1110 }, { - "epoch": 0.4618889809444905, - "grad_norm": 2.7146694842288492, - "learning_rate": 6.5161687593460395e-06, - "loss": 0.4805, + "epoch": 0.18039152240737744, + "grad_norm": 0.8190077972884791, + "learning_rate": 9.805061007906668e-06, + "loss": 0.8721, "step": 1115 }, { - "epoch": 0.463960231980116, - "grad_norm": 2.661840488355652, - "learning_rate": 6.481671830444243e-06, - "loss": 0.4766, + "epoch": 0.1812004530011325, + "grad_norm": 0.8021866541591391, + "learning_rate": 9.801137369069441e-06, + "loss": 0.8587, "step": 1120 }, { - "epoch": 0.4660314830157415, - "grad_norm": 2.542982024246636, - "learning_rate": 6.447097407156114e-06, - "loss": 0.4714, + "epoch": 0.18200938359488755, + "grad_norm": 0.7295302337882422, + "learning_rate": 9.797175437034997e-06, + "loss": 0.8809, "step": 1125 }, { - "epoch": 0.46810273405136704, - "grad_norm": 2.545352453639695, - "learning_rate": 6.412447297792818e-06, - "loss": 0.4627, + "epoch": 0.1828183141886426, + "grad_norm": 0.7303087771888767, + "learning_rate": 9.79317524340315e-06, + "loss": 0.864, "step": 1130 }, { - "epoch": 0.47017398508699254, - "grad_norm": 2.601762512884624, - "learning_rate": 6.377723314624057e-06, - "loss": 0.4617, + "epoch": 0.18362724478239767, + "grad_norm": 0.7668374049535479, + "learning_rate": 9.789136820078884e-06, + "loss": 0.8768, "step": 1135 }, { - "epoch": 0.47224523612261804, - "grad_norm": 2.5271935902650746, - "learning_rate": 6.3429272737832726e-06, - "loss": 0.482, + "epoch": 0.18443617537615273, + "grad_norm": 0.7779749351860815, + "learning_rate": 9.785060199272096e-06, + "loss": 0.8871, "step": 1140 }, { - "epoch": 0.4743164871582436, - "grad_norm": 2.8257956277427265, - "learning_rate": 6.308060995172673e-06, - "loss": 0.4875, + "epoch": 0.1852451059699078, + "grad_norm": 0.7605751732056839, + "learning_rate": 9.780945413497339e-06, + "loss": 0.8756, "step": 1145 }, { - "epoch": 0.4763877381938691, - "grad_norm": 2.6203637102255843, - "learning_rate": 6.273126302368037e-06, - "loss": 0.4725, + "epoch": 0.18605403656366284, + "grad_norm": 0.7608509607177488, + "learning_rate": 9.776792495573567e-06, + "loss": 0.8651, "step": 1150 }, { - "epoch": 0.4784589892294946, - "grad_norm": 2.4615255657443855, - "learning_rate": 6.238125022523343e-06, - "loss": 0.462, + "epoch": 0.1868629671574179, + "grad_norm": 0.7649020527995475, + "learning_rate": 9.772601478623871e-06, + "loss": 0.8522, "step": 1155 }, { - "epoch": 0.48053024026512015, - "grad_norm": 2.399337323008823, - "learning_rate": 6.203058986275207e-06, - "loss": 0.4706, + "epoch": 0.18767189775117296, + "grad_norm": 0.8284338499023652, + "learning_rate": 9.768372396075213e-06, + "loss": 0.8986, "step": 1160 }, { - "epoch": 0.48260149130074564, - "grad_norm": 2.568546498877998, - "learning_rate": 6.1679300276471285e-06, - "loss": 0.46, + "epoch": 0.188480828344928, + "grad_norm": 0.7588241076036676, + "learning_rate": 9.764105281658161e-06, + "loss": 0.8727, "step": 1165 }, { - "epoch": 0.48467274233637114, - "grad_norm": 2.5774193338530886, - "learning_rate": 6.132739983953579e-06, - "loss": 0.4639, + "epoch": 0.18928975893868305, + "grad_norm": 0.7815552643338736, + "learning_rate": 9.759800169406621e-06, + "loss": 0.8639, "step": 1170 }, { - "epoch": 0.4867439933719967, - "grad_norm": 2.433169814085788, - "learning_rate": 6.097490695703896e-06, - "loss": 0.4706, + "epoch": 0.1900986895324381, + "grad_norm": 0.7935036774949533, + "learning_rate": 9.755457093657562e-06, + "loss": 0.8651, "step": 1175 }, { - "epoch": 0.4888152444076222, - "grad_norm": 2.5672509798739975, - "learning_rate": 6.062184006506027e-06, - "loss": 0.469, + "epoch": 0.19090762012619317, + "grad_norm": 0.7411492264667076, + "learning_rate": 9.751076089050747e-06, + "loss": 0.8675, "step": 1180 }, { - "epoch": 0.4908864954432477, - "grad_norm": 2.566412335613826, - "learning_rate": 6.026821762970102e-06, - "loss": 0.4667, + "epoch": 0.19171655071994823, + "grad_norm": 0.7597654203339239, + "learning_rate": 9.746657190528454e-06, + "loss": 0.8421, "step": 1185 }, { - "epoch": 0.49295774647887325, - "grad_norm": 2.4468040851684885, - "learning_rate": 5.991405814611855e-06, - "loss": 0.4547, + "epoch": 0.19252548131370328, + "grad_norm": 0.7871189163253436, + "learning_rate": 9.742200433335196e-06, + "loss": 0.877, "step": 1190 }, { - "epoch": 0.49502899751449875, - "grad_norm": 2.490658198568155, - "learning_rate": 5.955938013755888e-06, - "loss": 0.4628, + "epoch": 0.19333441190745834, + "grad_norm": 0.7773214820602937, + "learning_rate": 9.737705853017442e-06, + "loss": 0.8721, "step": 1195 }, { - "epoch": 0.4971002485501243, - "grad_norm": 2.463293970038353, - "learning_rate": 5.920420215438794e-06, - "loss": 0.4804, + "epoch": 0.1941433425012134, + "grad_norm": 0.8230817780724599, + "learning_rate": 9.733173485423333e-06, + "loss": 0.8714, "step": 1200 }, { - "epoch": 0.4991714995857498, - "grad_norm": 2.6047443491478393, - "learning_rate": 5.8848542773121285e-06, - "loss": 0.468, + "epoch": 0.19495227309496846, + "grad_norm": 0.7681885249706483, + "learning_rate": 9.7286033667024e-06, + "loss": 0.8594, "step": 1205 }, { - "epoch": 0.5012427506213754, - "grad_norm": 2.4826419630150407, - "learning_rate": 5.849242059545259e-06, - "loss": 0.4565, + "epoch": 0.19576120368872352, + "grad_norm": 0.7640895548306019, + "learning_rate": 9.723995533305262e-06, + "loss": 0.8621, "step": 1210 }, { - "epoch": 0.5033140016570008, - "grad_norm": 2.757538020740051, - "learning_rate": 5.81358542472807e-06, - "loss": 0.4598, + "epoch": 0.19657013428247858, + "grad_norm": 0.876302015000407, + "learning_rate": 9.719350021983356e-06, + "loss": 0.873, "step": 1215 }, { - "epoch": 0.5053852526926264, - "grad_norm": 2.522678699637508, - "learning_rate": 5.777886237773542e-06, - "loss": 0.4451, + "epoch": 0.1973790648762336, + "grad_norm": 0.7687883497172476, + "learning_rate": 9.714666869788622e-06, + "loss": 0.8837, "step": 1220 }, { - "epoch": 0.5074565037282519, - "grad_norm": 2.697025952581437, - "learning_rate": 5.742146365820223e-06, - "loss": 0.4606, + "epoch": 0.19818799546998866, + "grad_norm": 0.7918479859070253, + "learning_rate": 9.709946114073231e-06, + "loss": 0.8742, "step": 1225 }, { - "epoch": 0.5095277547638773, - "grad_norm": 2.5255536162735095, - "learning_rate": 5.706367678134562e-06, - "loss": 0.4587, + "epoch": 0.19899692606374372, + "grad_norm": 0.7704108440790387, + "learning_rate": 9.705187792489263e-06, + "loss": 0.8831, "step": 1230 }, { - "epoch": 0.5115990057995029, - "grad_norm": 2.5706493150820866, - "learning_rate": 5.670552046013151e-06, - "loss": 0.4626, + "epoch": 0.19980585665749878, + "grad_norm": 0.7958664141923848, + "learning_rate": 9.700391942988422e-06, + "loss": 0.8815, "step": 1235 }, { - "epoch": 0.5136702568351285, - "grad_norm": 2.478635300584232, - "learning_rate": 5.634701342684852e-06, - "loss": 0.4495, + "epoch": 0.20061478725125384, + "grad_norm": 0.7299908461383041, + "learning_rate": 9.695558603821735e-06, + "loss": 0.8705, "step": 1240 }, { - "epoch": 0.5157415078707539, - "grad_norm": 2.413081651555011, - "learning_rate": 5.598817443212813e-06, - "loss": 0.4545, + "epoch": 0.2014237178450089, + "grad_norm": 0.7921888129305676, + "learning_rate": 9.69068781353923e-06, + "loss": 0.8651, "step": 1245 }, { - "epoch": 0.5178127589063795, - "grad_norm": 2.4514043208682614, - "learning_rate": 5.562902224396416e-06, - "loss": 0.4487, + "epoch": 0.20223264843876396, + "grad_norm": 0.783261666055412, + "learning_rate": 9.68577961098965e-06, + "loss": 0.8657, "step": 1250 }, { - "epoch": 0.519884009942005, - "grad_norm": 2.8553325471084894, - "learning_rate": 5.526957564673098e-06, - "loss": 0.4491, + "epoch": 0.20304157903251902, + "grad_norm": 0.7640970071314145, + "learning_rate": 9.680834035320127e-06, + "loss": 0.8613, "step": 1255 }, { - "epoch": 0.5219552609776305, - "grad_norm": 2.3499605624954043, - "learning_rate": 5.49098534402012e-06, - "loss": 0.455, + "epoch": 0.20385050962627407, + "grad_norm": 0.7480591135260592, + "learning_rate": 9.675851125975879e-06, + "loss": 0.8522, "step": 1260 }, { - "epoch": 0.524026512013256, - "grad_norm": 2.511645835881994, - "learning_rate": 5.454987443856235e-06, - "loss": 0.4405, + "epoch": 0.20465944022002913, + "grad_norm": 0.764427552884641, + "learning_rate": 9.67083092269989e-06, + "loss": 0.8846, "step": 1265 }, { - "epoch": 0.5260977630488816, - "grad_norm": 2.5573246473567663, - "learning_rate": 5.418965746943281e-06, - "loss": 0.4573, + "epoch": 0.2054683708137842, + "grad_norm": 0.7562918652667725, + "learning_rate": 9.665773465532597e-06, + "loss": 0.8538, "step": 1270 }, { - "epoch": 0.528169014084507, - "grad_norm": 2.6069496932706446, - "learning_rate": 5.3829221372877175e-06, - "loss": 0.4385, + "epoch": 0.20627730140753922, + "grad_norm": 0.7308192790835369, + "learning_rate": 9.660678794811569e-06, + "loss": 0.8758, "step": 1275 }, { - "epoch": 0.5302402651201326, - "grad_norm": 2.3669759764626286, - "learning_rate": 5.34685850004208e-06, - "loss": 0.4474, + "epoch": 0.20708623200129428, + "grad_norm": 0.7327237540533151, + "learning_rate": 9.65554695117118e-06, + "loss": 0.8556, "step": 1280 }, { - "epoch": 0.5323115161557581, - "grad_norm": 2.5262162943745428, - "learning_rate": 5.310776721406392e-06, - "loss": 0.4465, + "epoch": 0.20789516259504934, + "grad_norm": 0.8111511896192851, + "learning_rate": 9.650377975542298e-06, + "loss": 0.8845, "step": 1285 }, { - "epoch": 0.5343827671913836, - "grad_norm": 2.367388706229585, - "learning_rate": 5.2746786885295034e-06, - "loss": 0.4477, + "epoch": 0.2087040931888044, + "grad_norm": 0.7725083000676402, + "learning_rate": 9.645171909151944e-06, + "loss": 0.8713, "step": 1290 }, { - "epoch": 0.5364540182270091, - "grad_norm": 2.4424670349956834, - "learning_rate": 5.238566289410396e-06, - "loss": 0.4347, + "epoch": 0.20951302378255945, + "grad_norm": 0.8087330967911632, + "learning_rate": 9.639928793522976e-06, + "loss": 0.8807, "step": 1295 }, { - "epoch": 0.5385252692626347, - "grad_norm": 2.374597209278358, - "learning_rate": 5.2024414127994325e-06, - "loss": 0.4414, + "epoch": 0.2103219543763145, + "grad_norm": 0.7987317654845778, + "learning_rate": 9.634648670473743e-06, + "loss": 0.8887, "step": 1300 }, { - "epoch": 0.5405965202982601, - "grad_norm": 2.363317895955179, - "learning_rate": 5.166305948099574e-06, - "loss": 0.4211, + "epoch": 0.21113088497006957, + "grad_norm": 0.7222991798791983, + "learning_rate": 9.629331582117766e-06, + "loss": 0.8617, "step": 1305 }, { - "epoch": 0.5426677713338857, - "grad_norm": 2.445116140490052, - "learning_rate": 5.13016178526756e-06, - "loss": 0.4341, + "epoch": 0.21193981556382463, + "grad_norm": 0.8205396256241525, + "learning_rate": 9.623977570863398e-06, + "loss": 0.8512, "step": 1310 }, { - "epoch": 0.5447390223695112, - "grad_norm": 2.451842751824045, - "learning_rate": 5.094010814715062e-06, - "loss": 0.4323, + "epoch": 0.2127487461575797, + "grad_norm": 0.7704920928191922, + "learning_rate": 9.618586679413477e-06, + "loss": 0.8545, "step": 1315 }, { - "epoch": 0.5468102734051367, - "grad_norm": 2.4108281284288116, - "learning_rate": 5.057854927209804e-06, - "loss": 0.4363, + "epoch": 0.21355767675133475, + "grad_norm": 0.8121022691153029, + "learning_rate": 9.613158950764996e-06, + "loss": 0.8647, "step": 1320 }, { - "epoch": 0.5488815244407622, - "grad_norm": 2.47478290810414, - "learning_rate": 5.0216960137766805e-06, - "loss": 0.429, + "epoch": 0.2143666073450898, + "grad_norm": 0.8068844595908223, + "learning_rate": 9.60769442820876e-06, + "loss": 0.8511, "step": 1325 }, { - "epoch": 0.5509527754763878, - "grad_norm": 2.462991177532584, - "learning_rate": 4.985535965598843e-06, - "loss": 0.4504, + "epoch": 0.21517553793884484, + "grad_norm": 0.779446090355673, + "learning_rate": 9.602193155329029e-06, + "loss": 0.8623, "step": 1330 }, { - "epoch": 0.5530240265120132, - "grad_norm": 2.437965270822768, - "learning_rate": 4.949376673918802e-06, - "loss": 0.432, + "epoch": 0.2159844685325999, + "grad_norm": 0.7763229071537453, + "learning_rate": 9.596655176003185e-06, + "loss": 0.8662, "step": 1335 }, { - "epoch": 0.5550952775476388, - "grad_norm": 2.588594999778493, - "learning_rate": 4.913220029939491e-06, - "loss": 0.4335, + "epoch": 0.21679339912635495, + "grad_norm": 0.7877601933457381, + "learning_rate": 9.591080534401371e-06, + "loss": 0.87, "step": 1340 }, { - "epoch": 0.5571665285832643, - "grad_norm": 2.624887614176281, - "learning_rate": 4.877067924725368e-06, - "loss": 0.4268, + "epoch": 0.21760232972011, + "grad_norm": 0.7986846021359395, + "learning_rate": 9.585469274986148e-06, + "loss": 0.8716, "step": 1345 }, { - "epoch": 0.5592377796188898, - "grad_norm": 2.59768554910079, - "learning_rate": 4.840922249103506e-06, - "loss": 0.4304, + "epoch": 0.21841126031386507, + "grad_norm": 0.7790084651864484, + "learning_rate": 9.579821442512131e-06, + "loss": 0.8595, "step": 1350 }, { - "epoch": 0.5613090306545153, - "grad_norm": 2.3988418599389063, - "learning_rate": 4.804784893564697e-06, - "loss": 0.4411, + "epoch": 0.21922019090762013, + "grad_norm": 0.7565290946235815, + "learning_rate": 9.574137082025639e-06, + "loss": 0.878, "step": 1355 }, { - "epoch": 0.5633802816901409, - "grad_norm": 2.580765462139435, - "learning_rate": 4.7686577481645745e-06, - "loss": 0.4082, + "epoch": 0.22002912150137519, + "grad_norm": 0.7534079459249721, + "learning_rate": 9.568416238864335e-06, + "loss": 0.8542, "step": 1360 }, { - "epoch": 0.5654515327257663, - "grad_norm": 2.4467281357535855, - "learning_rate": 4.732542702424759e-06, - "loss": 0.4382, + "epoch": 0.22083805209513024, + "grad_norm": 0.7934001624755948, + "learning_rate": 9.562658958656856e-06, + "loss": 0.8662, "step": 1365 }, { - "epoch": 0.5675227837613919, - "grad_norm": 2.6370177774043073, - "learning_rate": 4.696441645234042e-06, - "loss": 0.4271, + "epoch": 0.2216469826888853, + "grad_norm": 0.7851224446308791, + "learning_rate": 9.556865287322464e-06, + "loss": 0.8708, "step": 1370 }, { - "epoch": 0.5695940347970174, - "grad_norm": 2.475111070321159, - "learning_rate": 4.660356464749578e-06, - "loss": 0.4185, + "epoch": 0.22245591328264036, + "grad_norm": 0.7292260657124646, + "learning_rate": 9.551035271070665e-06, + "loss": 0.8496, "step": 1375 }, { - "epoch": 0.5716652858326429, - "grad_norm": 2.6994305643163776, - "learning_rate": 4.624289048298147e-06, - "loss": 0.4259, + "epoch": 0.22326484387639542, + "grad_norm": 0.8178256842698678, + "learning_rate": 9.54516895640085e-06, + "loss": 0.8615, "step": 1380 }, { - "epoch": 0.5737365368682684, - "grad_norm": 2.4520066328265977, - "learning_rate": 4.588241282277428e-06, - "loss": 0.4179, + "epoch": 0.22407377447015045, + "grad_norm": 0.7761082648853441, + "learning_rate": 9.539266390101922e-06, + "loss": 0.8596, "step": 1385 }, { - "epoch": 0.575807787903894, - "grad_norm": 2.3795955584858386, - "learning_rate": 4.55221505205734e-06, - "loss": 0.4287, + "epoch": 0.2248827050639055, + "grad_norm": 0.7510699850597663, + "learning_rate": 9.533327619251921e-06, + "loss": 0.8591, "step": 1390 }, { - "epoch": 0.5778790389395194, - "grad_norm": 2.381541096732435, - "learning_rate": 4.516212241881448e-06, - "loss": 0.4157, + "epoch": 0.22569163565766057, + "grad_norm": 0.7901677982076935, + "learning_rate": 9.527352691217649e-06, + "loss": 0.856, "step": 1395 }, { - "epoch": 0.579950289975145, - "grad_norm": 2.566306679289895, - "learning_rate": 4.480234734768393e-06, - "loss": 0.4213, + "epoch": 0.22650056625141562, + "grad_norm": 0.8121283027645855, + "learning_rate": 9.52134165365429e-06, + "loss": 0.8686, "step": 1400 }, { - "epoch": 0.5820215410107705, - "grad_norm": 2.396097079278926, - "learning_rate": 4.444284412413418e-06, - "loss": 0.4172, + "epoch": 0.22730949684517068, + "grad_norm": 0.8027876769163085, + "learning_rate": 9.515294554505039e-06, + "loss": 0.8609, "step": 1405 }, { - "epoch": 0.584092792046396, - "grad_norm": 2.3514162631646274, - "learning_rate": 4.408363155089952e-06, - "loss": 0.4211, + "epoch": 0.22811842743892574, + "grad_norm": 0.7677214802906873, + "learning_rate": 9.509211442000705e-06, + "loss": 0.8842, "step": 1410 }, { - "epoch": 0.5861640430820215, - "grad_norm": 2.3172982824966906, - "learning_rate": 4.3724728415512585e-06, - "loss": 0.4193, + "epoch": 0.2289273580326808, + "grad_norm": 0.7935787903248027, + "learning_rate": 9.503092364659343e-06, + "loss": 0.8571, "step": 1415 }, { - "epoch": 0.5882352941176471, - "grad_norm": 2.472751185964712, - "learning_rate": 4.3366153489321855e-06, - "loss": 0.4264, + "epoch": 0.22973628862643586, + "grad_norm": 0.7885818327639111, + "learning_rate": 9.496937371285852e-06, + "loss": 0.8368, "step": 1420 }, { - "epoch": 0.5903065451532725, - "grad_norm": 2.4154833816633365, - "learning_rate": 4.30079255265098e-06, - "loss": 0.4048, + "epoch": 0.23054521922019092, + "grad_norm": 0.7664542560047006, + "learning_rate": 9.490746510971595e-06, + "loss": 0.8586, "step": 1425 }, { - "epoch": 0.5923777961888981, - "grad_norm": 2.5211717793789803, - "learning_rate": 4.265006326311199e-06, - "loss": 0.4089, + "epoch": 0.23135414981394598, + "grad_norm": 0.7915598638517407, + "learning_rate": 9.484519833094006e-06, + "loss": 0.8594, "step": 1430 }, { - "epoch": 0.5944490472245236, - "grad_norm": 2.3701199367093504, - "learning_rate": 4.229258541603723e-06, - "loss": 0.4156, + "epoch": 0.232163080407701, + "grad_norm": 0.8224568006509917, + "learning_rate": 9.478257387316189e-06, + "loss": 0.8604, "step": 1435 }, { - "epoch": 0.5965202982601492, - "grad_norm": 2.3994950566503994, - "learning_rate": 4.1935510682088545e-06, - "loss": 0.4065, + "epoch": 0.23297201100145606, + "grad_norm": 0.7646329363431662, + "learning_rate": 9.471959223586535e-06, + "loss": 0.8667, "step": 1440 }, { - "epoch": 0.5985915492957746, - "grad_norm": 2.4968713927832273, - "learning_rate": 4.157885773698535e-06, - "loss": 0.4057, + "epoch": 0.23378094159521112, + "grad_norm": 0.8054133554377607, + "learning_rate": 9.465625392138314e-06, + "loss": 0.8491, "step": 1445 }, { - "epoch": 0.6006628003314002, - "grad_norm": 2.416064472998044, - "learning_rate": 4.122264523438668e-06, - "loss": 0.3931, + "epoch": 0.23458987218896618, + "grad_norm": 0.7999914607438723, + "learning_rate": 9.459255943489271e-06, + "loss": 0.8787, "step": 1450 }, { - "epoch": 0.6027340513670257, - "grad_norm": 2.2816034379976355, - "learning_rate": 4.086689180491554e-06, - "loss": 0.4099, + "epoch": 0.23539880278272124, + "grad_norm": 0.7770791430956507, + "learning_rate": 9.45285092844124e-06, + "loss": 0.8526, "step": 1455 }, { - "epoch": 0.6048053024026512, - "grad_norm": 2.40826781123055, - "learning_rate": 4.051161605518453e-06, - "loss": 0.4149, + "epoch": 0.2362077333764763, + "grad_norm": 0.817105925992962, + "learning_rate": 9.446410398079716e-06, + "loss": 0.8605, "step": 1460 }, { - "epoch": 0.6068765534382767, - "grad_norm": 2.538687087204467, - "learning_rate": 4.015683656682255e-06, - "loss": 0.4175, + "epoch": 0.23701666397023136, + "grad_norm": 0.7238122855218315, + "learning_rate": 9.439934403773468e-06, + "loss": 0.8727, "step": 1465 }, { - "epoch": 0.6089478044739023, - "grad_norm": 2.430104423449275, - "learning_rate": 3.980257189550316e-06, - "loss": 0.4053, + "epoch": 0.23782559456398641, + "grad_norm": 0.7489559356823628, + "learning_rate": 9.433422997174113e-06, + "loss": 0.8672, "step": 1470 }, { - "epoch": 0.6110190555095277, - "grad_norm": 2.3721045844407143, - "learning_rate": 3.94488405699739e-06, - "loss": 0.4031, + "epoch": 0.23863452515774147, + "grad_norm": 0.8073337490099621, + "learning_rate": 9.42687623021572e-06, + "loss": 0.8631, "step": 1475 }, { - "epoch": 0.6130903065451533, - "grad_norm": 2.338789854463612, - "learning_rate": 3.909566109108727e-06, - "loss": 0.3972, + "epoch": 0.23944345575149653, + "grad_norm": 0.7504637131946496, + "learning_rate": 9.42029415511438e-06, + "loss": 0.85, "step": 1480 }, { - "epoch": 0.6151615575807788, - "grad_norm": 2.2787527608868943, - "learning_rate": 3.874305193083313e-06, - "loss": 0.411, + "epoch": 0.2402523863452516, + "grad_norm": 0.7888833671784519, + "learning_rate": 9.4136768243678e-06, + "loss": 0.8602, "step": 1485 }, { - "epoch": 0.6172328086164043, - "grad_norm": 2.454450620709443, - "learning_rate": 3.839103153137247e-06, - "loss": 0.4008, + "epoch": 0.24106131693900662, + "grad_norm": 0.8101599644677917, + "learning_rate": 9.40702429075488e-06, + "loss": 0.8535, "step": 1490 }, { - "epoch": 0.6193040596520298, - "grad_norm": 2.4225686330793823, - "learning_rate": 3.803961830407297e-06, - "loss": 0.3974, + "epoch": 0.24187024753276168, + "grad_norm": 0.7871350703464342, + "learning_rate": 9.400336607335294e-06, + "loss": 0.8486, "step": 1495 }, { - "epoch": 0.6213753106876554, - "grad_norm": 2.564739658366535, - "learning_rate": 3.768883062854598e-06, - "loss": 0.4152, + "epoch": 0.24267917812651674, + "grad_norm": 0.7878053882806549, + "learning_rate": 9.393613827449064e-06, + "loss": 0.8537, "step": 1500 }, { - "epoch": 0.6234465617232808, - "grad_norm": 2.4055933426253837, - "learning_rate": 3.7338686851685267e-06, - "loss": 0.4021, + "epoch": 0.2434881087202718, + "grad_norm": 0.7716899022882726, + "learning_rate": 9.38685600471614e-06, + "loss": 0.8751, "step": 1505 }, { - "epoch": 0.6255178127589064, - "grad_norm": 2.3297777756780986, - "learning_rate": 3.6989205286707398e-06, - "loss": 0.4087, + "epoch": 0.24429703931402685, + "grad_norm": 0.7804644122014117, + "learning_rate": 9.380063193035968e-06, + "loss": 0.8496, "step": 1510 }, { - "epoch": 0.6275890637945319, - "grad_norm": 2.276174828544462, - "learning_rate": 3.664040421219393e-06, - "loss": 0.4, + "epoch": 0.2451059699077819, + "grad_norm": 0.8146517496395704, + "learning_rate": 9.373235446587055e-06, + "loss": 0.8678, "step": 1515 }, { - "epoch": 0.6296603148301574, - "grad_norm": 2.558523087497906, - "learning_rate": 3.6292301871135425e-06, - "loss": 0.3919, + "epoch": 0.24591490050153697, + "grad_norm": 0.7819896556165521, + "learning_rate": 9.366372819826553e-06, + "loss": 0.8572, "step": 1520 }, { - "epoch": 0.6317315658657829, - "grad_norm": 2.599732843239929, - "learning_rate": 3.59449164699773e-06, - "loss": 0.4016, + "epoch": 0.24672383109529203, + "grad_norm": 0.7667264574062115, + "learning_rate": 9.359475367489805e-06, + "loss": 0.8627, "step": 1525 }, { - "epoch": 0.6338028169014085, - "grad_norm": 2.4726991401898952, - "learning_rate": 3.55982661776676e-06, - "loss": 0.3986, + "epoch": 0.2475327616890471, + "grad_norm": 0.8236534571118577, + "learning_rate": 9.352543144589923e-06, + "loss": 0.8668, "step": 1530 }, { - "epoch": 0.6358740679370339, - "grad_norm": 2.321690978258901, - "learning_rate": 3.5252369124706697e-06, - "loss": 0.3978, + "epoch": 0.24834169228280215, + "grad_norm": 0.7670049125342286, + "learning_rate": 9.345576206417345e-06, + "loss": 0.8418, "step": 1535 }, { - "epoch": 0.6379453189726595, - "grad_norm": 2.6754380236168664, - "learning_rate": 3.4907243402199013e-06, - "loss": 0.4113, + "epoch": 0.2491506228765572, + "grad_norm": 0.7827102030650855, + "learning_rate": 9.338574608539389e-06, + "loss": 0.8439, "step": 1540 }, { - "epoch": 0.640016570008285, - "grad_norm": 2.4052468560081475, - "learning_rate": 3.4562907060906908e-06, - "loss": 0.3813, + "epoch": 0.24995955347031223, + "grad_norm": 0.7839079136756119, + "learning_rate": 9.331538406799815e-06, + "loss": 0.8549, "step": 1545 }, { - "epoch": 0.6420878210439105, - "grad_norm": 2.4570458861707696, - "learning_rate": 3.4219378110306523e-06, - "loss": 0.392, + "epoch": 0.2507684840640673, + "grad_norm": 0.8178409940371506, + "learning_rate": 9.324467657318384e-06, + "loss": 0.8534, "step": 1550 }, { - "epoch": 0.644159072079536, - "grad_norm": 2.3101859891504866, - "learning_rate": 3.3876674517645815e-06, - "loss": 0.3996, + "epoch": 0.25157741465782235, + "grad_norm": 0.8099902787097547, + "learning_rate": 9.317362416490396e-06, + "loss": 0.8589, "step": 1555 }, { - "epoch": 0.6462303231151616, - "grad_norm": 2.4539409882344483, - "learning_rate": 3.353481420700495e-06, - "loss": 0.3979, + "epoch": 0.25238634525157744, + "grad_norm": 0.8397966939088385, + "learning_rate": 9.310222740986258e-06, + "loss": 0.8695, "step": 1560 }, { - "epoch": 0.648301574150787, - "grad_norm": 2.2847760045122087, - "learning_rate": 3.319381505835868e-06, - "loss": 0.3749, + "epoch": 0.25319527584533247, + "grad_norm": 0.8012721851788485, + "learning_rate": 9.303048687751016e-06, + "loss": 0.8647, "step": 1565 }, { - "epoch": 0.6503728251864126, - "grad_norm": 2.2918085600506717, - "learning_rate": 3.285369490664133e-06, - "loss": 0.3895, + "epoch": 0.2540042064390875, + "grad_norm": 0.7848654081297167, + "learning_rate": 9.29584031400391e-06, + "loss": 0.848, "step": 1570 }, { - "epoch": 0.6524440762220381, - "grad_norm": 2.4126038265192538, - "learning_rate": 3.251447154081394e-06, - "loss": 0.3885, + "epoch": 0.2548131370328426, + "grad_norm": 0.7276468748203139, + "learning_rate": 9.288597677237918e-06, + "loss": 0.8451, "step": 1575 }, { - "epoch": 0.6545153272576636, - "grad_norm": 2.4831233203755603, - "learning_rate": 3.2176162702933816e-06, - "loss": 0.413, + "epoch": 0.2556220676265976, + "grad_norm": 0.8006865536918176, + "learning_rate": 9.281320835219294e-06, + "loss": 0.847, "step": 1580 }, { - "epoch": 0.6565865782932891, - "grad_norm": 2.385955156565656, - "learning_rate": 3.183878608722669e-06, - "loss": 0.3838, + "epoch": 0.2564309982203527, + "grad_norm": 0.7628111941438427, + "learning_rate": 9.274009845987106e-06, + "loss": 0.8712, "step": 1585 }, { - "epoch": 0.6586578293289147, - "grad_norm": 2.5426984744026124, - "learning_rate": 3.150235933916115e-06, - "loss": 0.3755, + "epoch": 0.25723992881410773, + "grad_norm": 0.7881647592364686, + "learning_rate": 9.26666476785278e-06, + "loss": 0.8678, "step": 1590 }, { - "epoch": 0.6607290803645401, - "grad_norm": 2.388904551962294, - "learning_rate": 3.1166900054525873e-06, - "loss": 0.3868, + "epoch": 0.2580488594078628, + "grad_norm": 0.8116538712282235, + "learning_rate": 9.259285659399624e-06, + "loss": 0.8535, "step": 1595 }, { - "epoch": 0.6628003314001657, - "grad_norm": 2.5869530818499147, - "learning_rate": 3.0832425778509235e-06, - "loss": 0.3821, + "epoch": 0.25885779000161785, + "grad_norm": 0.7813279263856877, + "learning_rate": 9.251872579482373e-06, + "loss": 0.845, "step": 1600 }, { - "epoch": 0.6648715824357913, - "grad_norm": 2.47920960360778, - "learning_rate": 3.049895400478174e-06, - "loss": 0.3935, + "epoch": 0.25966672059537294, + "grad_norm": 0.7940356068886795, + "learning_rate": 9.244425587226708e-06, + "loss": 0.8492, "step": 1605 }, { - "epoch": 0.6669428334714167, - "grad_norm": 2.4320164826856234, - "learning_rate": 3.0166502174581012e-06, - "loss": 0.3732, + "epoch": 0.26047565118912797, + "grad_norm": 0.8041883146042144, + "learning_rate": 9.236944742028797e-06, + "loss": 0.869, "step": 1610 }, { - "epoch": 0.6690140845070423, - "grad_norm": 2.3447158283526783, - "learning_rate": 2.983508767579956e-06, - "loss": 0.3805, + "epoch": 0.26128458178288305, + "grad_norm": 0.8411011003169626, + "learning_rate": 9.229430103554808e-06, + "loss": 0.8702, "step": 1615 }, { - "epoch": 0.6710853355426678, - "grad_norm": 2.336702612441604, - "learning_rate": 2.950472784207544e-06, - "loss": 0.3814, + "epoch": 0.2620935123766381, + "grad_norm": 0.7994752148577935, + "learning_rate": 9.221881731740442e-06, + "loss": 0.846, "step": 1620 }, { - "epoch": 0.6731565865782932, - "grad_norm": 2.403683822019676, - "learning_rate": 2.917543995188562e-06, - "loss": 0.3799, + "epoch": 0.2629024429703931, + "grad_norm": 0.7904404440251053, + "learning_rate": 9.214299686790453e-06, + "loss": 0.8479, "step": 1625 }, { - "epoch": 0.6752278376139188, - "grad_norm": 2.32245444127231, - "learning_rate": 2.8847241227642255e-06, - "loss": 0.3878, + "epoch": 0.2637113735641482, + "grad_norm": 0.8285353436413048, + "learning_rate": 9.206684029178166e-06, + "loss": 0.867, "step": 1630 }, { - "epoch": 0.6772990886495444, - "grad_norm": 2.2336601101273894, - "learning_rate": 2.852014883479198e-06, - "loss": 0.3668, + "epoch": 0.26452030415790323, + "grad_norm": 0.8167896861662431, + "learning_rate": 9.199034819644997e-06, + "loss": 0.845, "step": 1635 }, { - "epoch": 0.6793703396851698, - "grad_norm": 2.4572013362431666, - "learning_rate": 2.819417988091814e-06, - "loss": 0.3789, + "epoch": 0.2653292347516583, + "grad_norm": 0.7625190041745681, + "learning_rate": 9.191352119199965e-06, + "loss": 0.8715, "step": 1640 }, { - "epoch": 0.6814415907207954, - "grad_norm": 2.374979599510779, - "learning_rate": 2.786935141484586e-06, - "loss": 0.3776, + "epoch": 0.26613816534541335, + "grad_norm": 0.766576459090851, + "learning_rate": 9.183635989119211e-06, + "loss": 0.8554, "step": 1645 }, { - "epoch": 0.6835128417564209, - "grad_norm": 2.387695271521185, - "learning_rate": 2.754568042575061e-06, - "loss": 0.3785, + "epoch": 0.26694709593916843, + "grad_norm": 0.7598503790298559, + "learning_rate": 9.175886490945505e-06, + "loss": 0.8657, "step": 1650 }, { - "epoch": 0.6855840927920464, - "grad_norm": 2.4380470816357573, - "learning_rate": 2.7223183842269442e-06, - "loss": 0.3762, + "epoch": 0.26775602653292346, + "grad_norm": 0.7436415672250105, + "learning_rate": 9.168103686487755e-06, + "loss": 0.8618, "step": 1655 }, { - "epoch": 0.6876553438276719, - "grad_norm": 2.2802764216409996, - "learning_rate": 2.6901878531615677e-06, - "loss": 0.377, + "epoch": 0.26856495712667855, + "grad_norm": 0.8147678677174188, + "learning_rate": 9.160287637820514e-06, + "loss": 0.8591, "step": 1660 }, { - "epoch": 0.6897265948632975, - "grad_norm": 2.3338582614060748, - "learning_rate": 2.658178129869672e-06, - "loss": 0.382, + "epoch": 0.2693738877204336, + "grad_norm": 0.7963714833005615, + "learning_rate": 9.152438407283493e-06, + "loss": 0.8651, "step": 1665 }, { - "epoch": 0.6917978458989229, - "grad_norm": 2.4939738702851932, - "learning_rate": 2.6262908885235046e-06, - "loss": 0.3789, + "epoch": 0.27018281831418867, + "grad_norm": 0.7937419589693009, + "learning_rate": 9.144556057481048e-06, + "loss": 0.8373, "step": 1670 }, { - "epoch": 0.6938690969345485, - "grad_norm": 2.409154689853176, - "learning_rate": 2.594527796889265e-06, - "loss": 0.3859, + "epoch": 0.2709917489079437, + "grad_norm": 0.8239141053085791, + "learning_rate": 9.136640651281694e-06, + "loss": 0.8712, "step": 1675 }, { - "epoch": 0.695940347970174, - "grad_norm": 2.4232651472750724, - "learning_rate": 2.5628905162398797e-06, - "loss": 0.3759, + "epoch": 0.2718006795016987, + "grad_norm": 0.7797626292094625, + "learning_rate": 9.128692251817602e-06, + "loss": 0.8629, "step": 1680 }, { - "epoch": 0.6980115990057995, - "grad_norm": 2.438606094251203, - "learning_rate": 2.531380701268108e-06, - "loss": 0.3707, + "epoch": 0.2726096100954538, + "grad_norm": 0.7697095287067577, + "learning_rate": 9.120710922484089e-06, + "loss": 0.8307, "step": 1685 }, { - "epoch": 0.700082850041425, - "grad_norm": 2.2068267980004164, - "learning_rate": 2.5000000000000015e-06, - "loss": 0.3635, + "epoch": 0.27341854068920884, + "grad_norm": 0.7710196952612974, + "learning_rate": 9.112696726939112e-06, + "loss": 0.8513, "step": 1690 }, { - "epoch": 0.7021541010770506, - "grad_norm": 2.4849851469693487, - "learning_rate": 2.4687500537087027e-06, - "loss": 0.3765, + "epoch": 0.27422747128296393, + "grad_norm": 0.7800639021416464, + "learning_rate": 9.104649729102774e-06, + "loss": 0.8582, "step": 1695 }, { - "epoch": 0.704225352112676, - "grad_norm": 2.3676710626755604, - "learning_rate": 2.4376324968286154e-06, - "loss": 0.3706, + "epoch": 0.27503640187671896, + "grad_norm": 0.8044706731480418, + "learning_rate": 9.096569993156797e-06, + "loss": 0.8406, "step": 1700 }, { - "epoch": 0.7062966031483016, - "grad_norm": 2.4665247552413194, - "learning_rate": 2.40664895686991e-06, - "loss": 0.359, + "epoch": 0.27584533247047405, + "grad_norm": 0.7699086496292621, + "learning_rate": 9.088457583544022e-06, + "loss": 0.8562, "step": 1705 }, { - "epoch": 0.7083678541839271, - "grad_norm": 2.4228989801005882, - "learning_rate": 2.375801054333409e-06, - "loss": 0.3574, + "epoch": 0.2766542630642291, + "grad_norm": 0.8340399404633732, + "learning_rate": 9.080312564967884e-06, + "loss": 0.8694, "step": 1710 }, { - "epoch": 0.7104391052195526, - "grad_norm": 2.447240340812193, - "learning_rate": 2.345090402625822e-06, - "loss": 0.3629, + "epoch": 0.27746319365798416, + "grad_norm": 0.7919025521073914, + "learning_rate": 9.072135002391912e-06, + "loss": 0.8769, "step": 1715 }, { - "epoch": 0.7125103562551781, - "grad_norm": 2.494665611756937, - "learning_rate": 2.3145186079753685e-06, - "loss": 0.3607, + "epoch": 0.2782721242517392, + "grad_norm": 0.7754055380467696, + "learning_rate": 9.063924961039195e-06, + "loss": 0.8852, "step": 1720 }, { - "epoch": 0.7145816072908037, - "grad_norm": 2.3466738040218433, - "learning_rate": 2.2840872693477694e-06, - "loss": 0.3582, + "epoch": 0.2790810548454943, + "grad_norm": 0.76639361552101, + "learning_rate": 9.055682506391866e-06, + "loss": 0.8655, "step": 1725 }, { - "epoch": 0.7166528583264291, - "grad_norm": 2.4579034785541274, - "learning_rate": 2.253797978362617e-06, - "loss": 0.37, + "epoch": 0.2798899854392493, + "grad_norm": 0.7860193334912077, + "learning_rate": 9.04740770419059e-06, + "loss": 0.8572, "step": 1730 }, { - "epoch": 0.7187241093620547, - "grad_norm": 2.294277539263346, - "learning_rate": 2.2236523192101264e-06, - "loss": 0.371, + "epoch": 0.28069891603300434, + "grad_norm": 0.7656637491035695, + "learning_rate": 9.039100620434025e-06, + "loss": 0.8634, "step": 1735 }, { - "epoch": 0.7207953603976802, - "grad_norm": 2.3165105524366676, - "learning_rate": 2.193651868568285e-06, - "loss": 0.3533, + "epoch": 0.28150784662675943, + "grad_norm": 0.8287569040947624, + "learning_rate": 9.030761321378303e-06, + "loss": 0.8539, "step": 1740 }, { - "epoch": 0.7228666114333057, - "grad_norm": 2.3412049707939766, - "learning_rate": 2.16379819552038e-06, - "loss": 0.3598, + "epoch": 0.28231677722051446, + "grad_norm": 0.778379469664662, + "learning_rate": 9.022389873536505e-06, + "loss": 0.8543, "step": 1745 }, { - "epoch": 0.7249378624689312, - "grad_norm": 2.37970184238566, - "learning_rate": 2.1340928614729445e-06, - "loss": 0.3553, + "epoch": 0.28312570781426954, + "grad_norm": 0.8341873388515572, + "learning_rate": 9.01398634367812e-06, + "loss": 0.8378, "step": 1750 }, { - "epoch": 0.7270091135045568, - "grad_norm": 2.5101331147915733, - "learning_rate": 2.1045374200740863e-06, - "loss": 0.3589, + "epoch": 0.2839346384080246, + "grad_norm": 0.807630286757475, + "learning_rate": 9.005550798828521e-06, + "loss": 0.8661, "step": 1755 }, { - "epoch": 0.7290803645401823, - "grad_norm": 2.381929011616353, - "learning_rate": 2.075133417132223e-06, - "loss": 0.3544, + "epoch": 0.28474356900177966, + "grad_norm": 0.7492013925485693, + "learning_rate": 8.997083306268434e-06, + "loss": 0.8646, "step": 1760 }, { - "epoch": 0.7311516155758078, - "grad_norm": 2.64277485599053, - "learning_rate": 2.045882390535248e-06, - "loss": 0.354, + "epoch": 0.2855524995955347, + "grad_norm": 0.820329567734956, + "learning_rate": 8.988583933533384e-06, + "loss": 0.866, "step": 1765 }, { - "epoch": 0.7332228666114333, - "grad_norm": 2.2983110375410756, - "learning_rate": 2.016785870170079e-06, - "loss": 0.359, + "epoch": 0.2863614301892898, + "grad_norm": 0.8427359735718537, + "learning_rate": 8.980052748413177e-06, + "loss": 0.8393, "step": 1770 }, { - "epoch": 0.7352941176470589, - "grad_norm": 2.338818197474944, - "learning_rate": 1.987845377842656e-06, - "loss": 0.3515, + "epoch": 0.2871703607830448, + "grad_norm": 0.7315290649122431, + "learning_rate": 8.971489818951347e-06, + "loss": 0.8561, "step": 1775 }, { - "epoch": 0.7373653686826843, - "grad_norm": 2.412156205295859, - "learning_rate": 1.9590624271983406e-06, - "loss": 0.3707, + "epoch": 0.2879792913767999, + "grad_norm": 0.7968151029522487, + "learning_rate": 8.962895213444618e-06, + "loss": 0.8449, "step": 1780 }, { - "epoch": 0.7394366197183099, - "grad_norm": 2.3221544897617394, - "learning_rate": 1.9304385236427505e-06, - "loss": 0.3522, + "epoch": 0.2887882219705549, + "grad_norm": 0.8497262991663913, + "learning_rate": 8.954269000442353e-06, + "loss": 0.8614, "step": 1785 }, { - "epoch": 0.7415078707539354, - "grad_norm": 2.4316222609963427, - "learning_rate": 1.9019751642630252e-06, - "loss": 0.3574, + "epoch": 0.28959715256430996, + "grad_norm": 0.8884396786756781, + "learning_rate": 8.945611248746015e-06, + "loss": 0.873, "step": 1790 }, { - "epoch": 0.7435791217895609, - "grad_norm": 2.2641479623423635, - "learning_rate": 1.8736738377495196e-06, - "loss": 0.3592, + "epoch": 0.29040608315806504, + "grad_norm": 0.7822894663944296, + "learning_rate": 8.936922027408618e-06, + "loss": 0.8504, "step": 1795 }, { - "epoch": 0.7456503728251864, - "grad_norm": 2.3420647549229265, - "learning_rate": 1.8455360243179537e-06, - "loss": 0.3542, + "epoch": 0.2912150137518201, + "grad_norm": 0.7512013193436944, + "learning_rate": 8.928201405734172e-06, + "loss": 0.8638, "step": 1800 }, { - "epoch": 0.747721623860812, - "grad_norm": 2.456638840774933, - "learning_rate": 1.8175631956319823e-06, - "loss": 0.3542, + "epoch": 0.29202394434557516, + "grad_norm": 0.808524098300231, + "learning_rate": 8.919449453277124e-06, + "loss": 0.8555, "step": 1805 }, { - "epoch": 0.7497928748964374, - "grad_norm": 2.315911960148082, - "learning_rate": 1.7897568147262323e-06, - "loss": 0.3651, + "epoch": 0.2928328749393302, + "grad_norm": 0.8205569397216151, + "learning_rate": 8.910666239841824e-06, + "loss": 0.8393, "step": 1810 }, { - "epoch": 0.751864125932063, - "grad_norm": 2.430456263244821, - "learning_rate": 1.7621183359297817e-06, - "loss": 0.3513, + "epoch": 0.2936418055330853, + "grad_norm": 0.7685972280574218, + "learning_rate": 8.901851835481947e-06, + "loss": 0.8616, "step": 1815 }, { - "epoch": 0.7539353769676885, - "grad_norm": 2.2820850020652137, - "learning_rate": 1.7346492047900897e-06, - "loss": 0.344, + "epoch": 0.2944507361268403, + "grad_norm": 0.7888992044706635, + "learning_rate": 8.893006310499941e-06, + "loss": 0.827, "step": 1820 }, { - "epoch": 0.756006628003314, - "grad_norm": 2.4006568265757755, - "learning_rate": 1.7073508579973996e-06, - "loss": 0.3526, + "epoch": 0.2952596667205954, + "grad_norm": 0.8377015460290415, + "learning_rate": 8.884129735446471e-06, + "loss": 0.8699, "step": 1825 }, { - "epoch": 0.7580778790389395, - "grad_norm": 2.2901041487912774, - "learning_rate": 1.6802247233095914e-06, - "loss": 0.349, + "epoch": 0.2960685973143504, + "grad_norm": 0.8237212938659825, + "learning_rate": 8.875222181119859e-06, + "loss": 0.8503, "step": 1830 }, { - "epoch": 0.7601491300745651, - "grad_norm": 2.4009693108682204, - "learning_rate": 1.6532722194775108e-06, - "loss": 0.3537, + "epoch": 0.2968775279081055, + "grad_norm": 0.7665032383816565, + "learning_rate": 8.866283718565498e-06, + "loss": 0.8589, "step": 1835 }, { - "epoch": 0.7622203811101905, - "grad_norm": 2.343710211021654, - "learning_rate": 1.626494756170765e-06, - "loss": 0.3536, + "epoch": 0.29768645850186054, + "grad_norm": 0.8141188029046369, + "learning_rate": 8.857314419075316e-06, + "loss": 0.8571, "step": 1840 }, { - "epoch": 0.7642916321458161, - "grad_norm": 2.3742949969171363, - "learning_rate": 1.5998937339039889e-06, - "loss": 0.3542, + "epoch": 0.29849538909561557, + "grad_norm": 0.7761079464199027, + "learning_rate": 8.848314354187184e-06, + "loss": 0.8463, "step": 1845 }, { - "epoch": 0.7663628831814416, - "grad_norm": 2.3672793937996093, - "learning_rate": 1.5734705439636017e-06, - "loss": 0.3428, + "epoch": 0.29930431968937066, + "grad_norm": 0.8097442274081034, + "learning_rate": 8.839283595684355e-06, + "loss": 0.8581, "step": 1850 }, { - "epoch": 0.7684341342170671, - "grad_norm": 2.2889634008759803, - "learning_rate": 1.5472265683350397e-06, - "loss": 0.3535, + "epoch": 0.3001132502831257, + "grad_norm": 0.8095252549308872, + "learning_rate": 8.83022221559489e-06, + "loss": 0.8557, "step": 1855 }, { - "epoch": 0.7705053852526926, - "grad_norm": 2.2816526862864164, - "learning_rate": 1.5211631796304721e-06, - "loss": 0.3452, + "epoch": 0.3009221808768808, + "grad_norm": 0.7980409130367505, + "learning_rate": 8.821130286191086e-06, + "loss": 0.8671, "step": 1860 }, { - "epoch": 0.7725766362883182, - "grad_norm": 2.256494769623704, - "learning_rate": 1.495281741017016e-06, - "loss": 0.3569, + "epoch": 0.3017311114706358, + "grad_norm": 0.8173815138889032, + "learning_rate": 8.81200787998889e-06, + "loss": 0.8594, "step": 1865 }, { - "epoch": 0.7746478873239436, - "grad_norm": 2.2248161459647333, - "learning_rate": 1.46958360614543e-06, - "loss": 0.3439, + "epoch": 0.3025400420643909, + "grad_norm": 0.8047823405503607, + "learning_rate": 8.802855069747338e-06, + "loss": 0.8596, "step": 1870 }, { - "epoch": 0.7767191383595692, - "grad_norm": 2.3902740970389065, - "learning_rate": 1.4440701190793278e-06, - "loss": 0.3301, + "epoch": 0.3033489726581459, + "grad_norm": 0.804911943505487, + "learning_rate": 8.793671928467953e-06, + "loss": 0.8397, "step": 1875 }, { - "epoch": 0.7787903893951947, - "grad_norm": 2.439067358960243, - "learning_rate": 1.4187426142248723e-06, - "loss": 0.3462, + "epoch": 0.304157903251901, + "grad_norm": 0.7613233673132125, + "learning_rate": 8.784458529394185e-06, + "loss": 0.8407, "step": 1880 }, { - "epoch": 0.7808616404308202, - "grad_norm": 2.3392746632984585, - "learning_rate": 1.3936024162609897e-06, - "loss": 0.3408, + "epoch": 0.30496683384565604, + "grad_norm": 0.7397028565696792, + "learning_rate": 8.775214946010806e-06, + "loss": 0.8476, "step": 1885 }, { - "epoch": 0.7829328914664457, - "grad_norm": 2.377498700024158, - "learning_rate": 1.3686508400700787e-06, - "loss": 0.3549, + "epoch": 0.3057757644394111, + "grad_norm": 0.7728205441604445, + "learning_rate": 8.765941252043341e-06, + "loss": 0.8384, "step": 1890 }, { - "epoch": 0.7850041425020713, - "grad_norm": 2.3496620916951767, - "learning_rate": 1.3438891906692447e-06, - "loss": 0.3472, + "epoch": 0.30658469503316615, + "grad_norm": 0.8229918467819616, + "learning_rate": 8.756637521457473e-06, + "loss": 0.8488, "step": 1895 }, { - "epoch": 0.7870753935376967, - "grad_norm": 2.289494721325453, - "learning_rate": 1.3193187631420462e-06, - "loss": 0.3355, + "epoch": 0.3073936256269212, + "grad_norm": 0.7848703050564307, + "learning_rate": 8.747303828458446e-06, + "loss": 0.8488, "step": 1900 }, { - "epoch": 0.7891466445733223, - "grad_norm": 2.2947285370456396, - "learning_rate": 1.2949408425707566e-06, - "loss": 0.3394, + "epoch": 0.30820255622067627, + "grad_norm": 0.8185735473058204, + "learning_rate": 8.737940247490487e-06, + "loss": 0.8447, "step": 1905 }, { - "epoch": 0.7912178956089478, - "grad_norm": 2.3617970794893126, - "learning_rate": 1.2707567039691505e-06, - "loss": 0.3497, + "epoch": 0.3090114868144313, + "grad_norm": 0.8175009758279596, + "learning_rate": 8.728546853236202e-06, + "loss": 0.8468, "step": 1910 }, { - "epoch": 0.7932891466445733, - "grad_norm": 2.321811184391228, - "learning_rate": 1.2467676122158224e-06, - "loss": 0.3412, + "epoch": 0.3098204174081864, + "grad_norm": 0.8234001884738343, + "learning_rate": 8.71912372061598e-06, + "loss": 0.8579, "step": 1915 }, { - "epoch": 0.7953603976801988, - "grad_norm": 2.3211063248548967, - "learning_rate": 1.222974821988024e-06, - "loss": 0.3436, + "epoch": 0.3106293480019414, + "grad_norm": 0.8135018815005348, + "learning_rate": 8.70967092478741e-06, + "loss": 0.8333, "step": 1920 }, { - "epoch": 0.7974316487158244, - "grad_norm": 2.367048239376449, - "learning_rate": 1.1993795776960498e-06, - "loss": 0.3387, + "epoch": 0.3114382785956965, + "grad_norm": 0.8316517603780106, + "learning_rate": 8.700188541144658e-06, + "loss": 0.8152, "step": 1925 }, { - "epoch": 0.7995028997514498, - "grad_norm": 2.415017409117694, - "learning_rate": 1.1759831134181504e-06, - "loss": 0.3477, + "epoch": 0.31224720918945154, + "grad_norm": 0.8542272626940559, + "learning_rate": 8.690676645317886e-06, + "loss": 0.8302, "step": 1930 }, { - "epoch": 0.8015741507870754, - "grad_norm": 2.2878955632108737, - "learning_rate": 1.1527866528359805e-06, - "loss": 0.3424, + "epoch": 0.3130561397832066, + "grad_norm": 0.7850582360070165, + "learning_rate": 8.68113531317264e-06, + "loss": 0.8494, "step": 1935 }, { - "epoch": 0.8036454018227009, - "grad_norm": 2.4168712983822407, - "learning_rate": 1.1297914091706086e-06, - "loss": 0.3395, + "epoch": 0.31386507037696165, + "grad_norm": 0.8046763932380032, + "learning_rate": 8.671564620809243e-06, + "loss": 0.8512, "step": 1940 }, { - "epoch": 0.8057166528583264, - "grad_norm": 2.332403241899359, - "learning_rate": 1.1069985851190524e-06, - "loss": 0.3413, + "epoch": 0.31467400097071674, + "grad_norm": 0.8472122957190853, + "learning_rate": 8.661964644562194e-06, + "loss": 0.8481, "step": 1945 }, { - "epoch": 0.8077879038939519, - "grad_norm": 2.245314869032349, - "learning_rate": 1.0844093727913868e-06, - "loss": 0.3302, + "epoch": 0.31548293156447177, + "grad_norm": 0.8098454336630103, + "learning_rate": 8.652335460999554e-06, + "loss": 0.8386, "step": 1950 }, { - "epoch": 0.8098591549295775, - "grad_norm": 2.3766452170038814, - "learning_rate": 1.062024953648384e-06, - "loss": 0.3363, + "epoch": 0.3162918621582268, + "grad_norm": 0.81377246582292, + "learning_rate": 8.64267714692234e-06, + "loss": 0.8109, "step": 1955 }, { - "epoch": 0.8119304059652029, - "grad_norm": 2.3725445423461076, - "learning_rate": 1.039846498439727e-06, - "loss": 0.3312, + "epoch": 0.3171007927519819, + "grad_norm": 0.7941773089013205, + "learning_rate": 8.632989779363907e-06, + "loss": 0.8223, "step": 1960 }, { - "epoch": 0.8140016570008285, - "grad_norm": 2.400378472961016, - "learning_rate": 1.0178751671427755e-06, - "loss": 0.3406, + "epoch": 0.3179097233457369, + "grad_norm": 0.7988879204951932, + "learning_rate": 8.623273435589338e-06, + "loss": 0.8536, "step": 1965 }, { - "epoch": 0.816072908036454, - "grad_norm": 2.600161757489673, - "learning_rate": 9.961121089018933e-07, - "loss": 0.3286, + "epoch": 0.318718653939492, + "grad_norm": 0.7645549102866793, + "learning_rate": 8.613528193094826e-06, + "loss": 0.841, "step": 1970 }, { - "epoch": 0.8181441590720795, - "grad_norm": 2.4280753174812597, - "learning_rate": 9.745584619683524e-07, - "loss": 0.3439, + "epoch": 0.31952758453324703, + "grad_norm": 0.7971671512555748, + "learning_rate": 8.603754129607055e-06, + "loss": 0.8418, "step": 1975 }, { - "epoch": 0.820215410107705, - "grad_norm": 2.260545698321657, - "learning_rate": 9.532153536407923e-07, - "loss": 0.3317, + "epoch": 0.3203365151270021, + "grad_norm": 0.7570319344787492, + "learning_rate": 8.593951323082586e-06, + "loss": 0.8583, "step": 1980 }, { - "epoch": 0.8222866611433306, - "grad_norm": 2.4227916089483204, - "learning_rate": 9.320839002062682e-07, - "loss": 0.3312, + "epoch": 0.32114544572075715, + "grad_norm": 0.8210274942358864, + "learning_rate": 8.584119851707224e-06, + "loss": 0.8493, "step": 1985 }, { - "epoch": 0.824357912178956, - "grad_norm": 2.8477868846172067, - "learning_rate": 9.111652068818621e-07, - "loss": 0.3334, + "epoch": 0.32195437631451224, + "grad_norm": 0.7921398145158576, + "learning_rate": 8.574259793895404e-06, + "loss": 0.8373, "step": 1990 }, { - "epoch": 0.8264291632145816, - "grad_norm": 2.370728174821394, - "learning_rate": 8.904603677568785e-07, - "loss": 0.3278, + "epoch": 0.32276330690826727, + "grad_norm": 0.8093679320396422, + "learning_rate": 8.564371228289563e-06, + "loss": 0.8353, "step": 1995 }, { - "epoch": 0.8285004142502072, - "grad_norm": 2.3115016796994183, - "learning_rate": 8.699704657356195e-07, - "loss": 0.3358, + "epoch": 0.32357223750202235, + "grad_norm": 0.7347855423064408, + "learning_rate": 8.554454233759508e-06, + "loss": 0.856, "step": 2000 }, { - "epoch": 0.8305716652858326, - "grad_norm": 2.253238724272844, - "learning_rate": 8.496965724807516e-07, - "loss": 0.3348, + "epoch": 0.3243811680957774, + "grad_norm": 0.7460223907838642, + "learning_rate": 8.544508889401799e-06, + "loss": 0.8437, "step": 2005 }, { - "epoch": 0.8326429163214581, - "grad_norm": 2.3696595779036573, - "learning_rate": 8.296397483572515e-07, - "loss": 0.3322, + "epoch": 0.3251900986895324, + "grad_norm": 0.7539924040446289, + "learning_rate": 8.534535274539103e-06, + "loss": 0.8448, "step": 2010 }, { - "epoch": 0.8347141673570837, - "grad_norm": 2.3994710782454143, - "learning_rate": 8.098010423769503e-07, - "loss": 0.3159, + "epoch": 0.3259990292832875, + "grad_norm": 0.8431518796240063, + "learning_rate": 8.524533468719569e-06, + "loss": 0.8545, "step": 2015 }, { - "epoch": 0.8367854183927091, - "grad_norm": 2.1347268704516638, - "learning_rate": 7.901814921436624e-07, - "loss": 0.3318, + "epoch": 0.32680795987704253, + "grad_norm": 0.8116074595962663, + "learning_rate": 8.5145035517162e-06, + "loss": 0.8262, "step": 2020 }, { - "epoch": 0.8388566694283347, - "grad_norm": 2.2628490853732397, - "learning_rate": 7.70782123798921e-07, - "loss": 0.3318, + "epoch": 0.3276168904707976, + "grad_norm": 0.7709832113170016, + "learning_rate": 8.504445603526202e-06, + "loss": 0.8718, "step": 2025 }, { - "epoch": 0.8409279204639603, - "grad_norm": 2.2582184248706683, - "learning_rate": 7.516039519683105e-07, - "loss": 0.3234, + "epoch": 0.32842582106455265, + "grad_norm": 0.8030729878881923, + "learning_rate": 8.494359704370357e-06, + "loss": 0.8588, "step": 2030 }, { - "epoch": 0.8429991714995857, - "grad_norm": 2.476131958227405, - "learning_rate": 7.326479797083963e-07, - "loss": 0.3278, + "epoch": 0.32923475165830773, + "grad_norm": 0.8146354616260442, + "learning_rate": 8.484245934692379e-06, + "loss": 0.8448, "step": 2035 }, { - "epoch": 0.8450704225352113, - "grad_norm": 2.3728222901700677, - "learning_rate": 7.139151984542636e-07, - "loss": 0.3306, + "epoch": 0.33004368225206276, + "grad_norm": 0.7977438419631868, + "learning_rate": 8.474104375158277e-06, + "loss": 0.8472, "step": 2040 }, { - "epoch": 0.8471416735708368, - "grad_norm": 2.3665157862844994, - "learning_rate": 6.954065879676653e-07, - "loss": 0.3309, + "epoch": 0.33085261284581785, + "grad_norm": 0.7577130929037066, + "learning_rate": 8.463935106655705e-06, + "loss": 0.849, "step": 2045 }, { - "epoch": 0.8492129246064622, - "grad_norm": 2.5008625602985304, - "learning_rate": 6.771231162857722e-07, - "loss": 0.3362, + "epoch": 0.3316615434395729, + "grad_norm": 0.7702289198050427, + "learning_rate": 8.453738210293316e-06, + "loss": 0.8646, "step": 2050 }, { - "epoch": 0.8512841756420878, - "grad_norm": 2.2712774856509688, - "learning_rate": 6.590657396705525e-07, - "loss": 0.3215, + "epoch": 0.33247047403332797, + "grad_norm": 0.8007959864809794, + "learning_rate": 8.443513767400126e-06, + "loss": 0.8488, "step": 2055 }, { - "epoch": 0.8533554266777134, - "grad_norm": 2.535681331577295, - "learning_rate": 6.412354025587509e-07, - "loss": 0.3229, + "epoch": 0.333279404627083, + "grad_norm": 0.8288190327751878, + "learning_rate": 8.433261859524856e-06, + "loss": 0.8473, "step": 2060 }, { - "epoch": 0.8554266777133389, - "grad_norm": 2.547194182489716, - "learning_rate": 6.236330375124921e-07, - "loss": 0.3461, + "epoch": 0.33408833522083803, + "grad_norm": 0.7899197293508395, + "learning_rate": 8.422982568435283e-06, + "loss": 0.8503, "step": 2065 }, { - "epoch": 0.8574979287489644, - "grad_norm": 2.371373910066343, - "learning_rate": 6.062595651705111e-07, - "loss": 0.3389, + "epoch": 0.3348972658145931, + "grad_norm": 0.7977808601108305, + "learning_rate": 8.412675976117585e-06, + "loss": 0.8711, "step": 2070 }, { - "epoch": 0.8595691797845899, - "grad_norm": 2.439136346587766, - "learning_rate": 5.891158941999959e-07, - "loss": 0.3299, + "epoch": 0.33570619640834815, + "grad_norm": 0.8550202055185513, + "learning_rate": 8.4023421647757e-06, + "loss": 0.8296, "step": 2075 }, { - "epoch": 0.8616404308202155, - "grad_norm": 2.466656635870232, - "learning_rate": 5.722029212490666e-07, - "loss": 0.3298, + "epoch": 0.33651512700210323, + "grad_norm": 0.8370007865096064, + "learning_rate": 8.391981216830651e-06, + "loss": 0.8789, "step": 2080 }, { - "epoch": 0.8637116818558409, - "grad_norm": 2.288892577992347, - "learning_rate": 5.555215308998779e-07, - "loss": 0.3223, + "epoch": 0.33732405759585826, + "grad_norm": 0.7648678747284082, + "learning_rate": 8.381593214919905e-06, + "loss": 0.8615, "step": 2085 }, { - "epoch": 0.8657829328914665, - "grad_norm": 2.401858716049078, - "learning_rate": 5.390725956223531e-07, - "loss": 0.3218, + "epoch": 0.33813298818961335, + "grad_norm": 0.8338949429924307, + "learning_rate": 8.371178241896708e-06, + "loss": 0.8546, "step": 2090 }, { - "epoch": 0.867854183927092, - "grad_norm": 2.33005782306982, - "learning_rate": 5.22856975728554e-07, - "loss": 0.3216, + "epoch": 0.3389419187833684, + "grad_norm": 0.7923771910011903, + "learning_rate": 8.36073638082942e-06, + "loss": 0.8421, "step": 2095 }, { - "epoch": 0.8699254349627175, - "grad_norm": 2.4143761257261303, - "learning_rate": 5.068755193276798e-07, - "loss": 0.3307, + "epoch": 0.33975084937712347, + "grad_norm": 0.7717061524027085, + "learning_rate": 8.350267715000857e-06, + "loss": 0.8459, "step": 2100 }, { - "epoch": 0.871996685998343, - "grad_norm": 2.352096096095919, - "learning_rate": 4.911290622817161e-07, - "loss": 0.3204, + "epoch": 0.3405597799708785, + "grad_norm": 0.7706870416392109, + "learning_rate": 8.339772327907627e-06, + "loss": 0.839, "step": 2105 }, { - "epoch": 0.8740679370339686, - "grad_norm": 2.3702490110719303, - "learning_rate": 4.756184281617121e-07, - "loss": 0.3245, + "epoch": 0.3413687105646336, + "grad_norm": 0.8629826885755151, + "learning_rate": 8.329250303259466e-06, + "loss": 0.847, "step": 2110 }, { - "epoch": 0.876139188069594, - "grad_norm": 2.2456862499766306, - "learning_rate": 4.6034442820471037e-07, - "loss": 0.3182, + "epoch": 0.3421776411583886, + "grad_norm": 0.7813813713851825, + "learning_rate": 8.318701724978564e-06, + "loss": 0.8627, "step": 2115 }, { - "epoch": 0.8782104391052196, - "grad_norm": 2.2534896280263816, - "learning_rate": 4.4530786127131575e-07, - "loss": 0.3312, + "epoch": 0.34298657175214364, + "grad_norm": 0.7760292657058412, + "learning_rate": 8.308126677198896e-06, + "loss": 0.843, "step": 2120 }, { - "epoch": 0.8802816901408451, - "grad_norm": 2.309108572623016, - "learning_rate": 4.305095138039106e-07, - "loss": 0.326, + "epoch": 0.34379550234589873, + "grad_norm": 0.8222906943010696, + "learning_rate": 8.29752524426556e-06, + "loss": 0.8362, "step": 2125 }, { - "epoch": 0.8823529411764706, - "grad_norm": 2.5387736296808563, - "learning_rate": 4.159501597855287e-07, - "loss": 0.3327, + "epoch": 0.34460443293965376, + "grad_norm": 0.8292261329180289, + "learning_rate": 8.286897510734098e-06, + "loss": 0.8548, "step": 2130 }, { - "epoch": 0.8844241922120961, - "grad_norm": 2.4068093993598962, - "learning_rate": 4.0163056069936757e-07, - "loss": 0.3265, + "epoch": 0.34541336353340885, + "grad_norm": 0.7772581002720523, + "learning_rate": 8.276243561369815e-06, + "loss": 0.8475, "step": 2135 }, { - "epoch": 0.8864954432477217, - "grad_norm": 2.410912519062069, - "learning_rate": 3.8755146548896784e-07, - "loss": 0.3286, + "epoch": 0.3462222941271639, + "grad_norm": 0.8663068319361349, + "learning_rate": 8.265563481147118e-06, + "loss": 0.8656, "step": 2140 }, { - "epoch": 0.8885666942833471, - "grad_norm": 2.3441979713103476, - "learning_rate": 3.737136105190337e-07, - "loss": 0.3215, + "epoch": 0.34703122472091896, + "grad_norm": 0.759426780159734, + "learning_rate": 8.254857355248823e-06, + "loss": 0.8272, "step": 2145 }, { - "epoch": 0.8906379453189727, - "grad_norm": 2.4124574145028843, - "learning_rate": 3.6011771953693044e-07, - "loss": 0.3222, + "epoch": 0.347840155314674, + "grad_norm": 0.8045042800571909, + "learning_rate": 8.244125269065492e-06, + "loss": 0.8284, "step": 2150 }, { - "epoch": 0.8927091963545982, - "grad_norm": 2.3579671682789467, - "learning_rate": 3.4676450363481937e-07, - "loss": 0.3227, + "epoch": 0.3486490859084291, + "grad_norm": 0.7696869214740462, + "learning_rate": 8.233367308194735e-06, + "loss": 0.8441, "step": 2155 }, { - "epoch": 0.8947804473902237, - "grad_norm": 2.3464323541089214, - "learning_rate": 3.336546612124758e-07, - "loss": 0.3197, + "epoch": 0.3494580165021841, + "grad_norm": 0.8076486642638429, + "learning_rate": 8.222583558440531e-06, + "loss": 0.845, "step": 2160 }, { - "epoch": 0.8968516984258492, - "grad_norm": 2.4445949626198242, - "learning_rate": 3.20788877940757e-07, - "loss": 0.3164, + "epoch": 0.35026694709593914, + "grad_norm": 0.7966361567902642, + "learning_rate": 8.21177410581256e-06, + "loss": 0.8512, "step": 2165 }, { - "epoch": 0.8989229494614748, - "grad_norm": 2.668457673876882, - "learning_rate": 3.081678267257404e-07, - "loss": 0.3233, + "epoch": 0.3510758776896942, + "grad_norm": 0.8192704344463675, + "learning_rate": 8.200939036525495e-06, + "loss": 0.8475, "step": 2170 }, { - "epoch": 0.9009942004971002, - "grad_norm": 2.2794632035182865, - "learning_rate": 2.9579216767352815e-07, - "loss": 0.314, + "epoch": 0.35188480828344926, + "grad_norm": 0.7866644986853055, + "learning_rate": 8.190078436998326e-06, + "loss": 0.8481, "step": 2175 }, { - "epoch": 0.9030654515327258, - "grad_norm": 2.511600762449477, - "learning_rate": 2.836625480557265e-07, - "loss": 0.3195, + "epoch": 0.35269373887720434, + "grad_norm": 0.7784262615486034, + "learning_rate": 8.179192393853667e-06, + "loss": 0.8482, "step": 2180 }, { - "epoch": 0.9051367025683513, - "grad_norm": 2.290095274854777, - "learning_rate": 2.7177960227558863e-07, - "loss": 0.323, + "epoch": 0.3535026694709594, + "grad_norm": 0.7987375076221339, + "learning_rate": 8.168280993917078e-06, + "loss": 0.8315, "step": 2185 }, { - "epoch": 0.9072079536039768, - "grad_norm": 2.490832241300732, - "learning_rate": 2.601439518348331e-07, - "loss": 0.3229, + "epoch": 0.35431160006471446, + "grad_norm": 0.7513918277520256, + "learning_rate": 8.15734432421634e-06, + "loss": 0.8547, "step": 2190 }, { - "epoch": 0.9092792046396023, - "grad_norm": 2.3369810868473833, - "learning_rate": 2.487562053011422e-07, - "loss": 0.3298, + "epoch": 0.3551205306584695, + "grad_norm": 0.8667321468753193, + "learning_rate": 8.146382471980803e-06, + "loss": 0.8583, "step": 2195 }, { - "epoch": 0.9113504556752279, - "grad_norm": 2.511875787240971, - "learning_rate": 2.376169582763288e-07, - "loss": 0.3273, + "epoch": 0.3559294612522246, + "grad_norm": 0.7568485546003565, + "learning_rate": 8.135395524640659e-06, + "loss": 0.8342, "step": 2200 }, { - "epoch": 0.9134217067108533, - "grad_norm": 2.4194421741312078, - "learning_rate": 2.2672679336518789e-07, - "loss": 0.3187, + "epoch": 0.3567383918459796, + "grad_norm": 0.8356531732827182, + "learning_rate": 8.124383569826253e-06, + "loss": 0.8642, "step": 2205 }, { - "epoch": 0.9154929577464789, - "grad_norm": 2.4729733542961516, - "learning_rate": 2.1608628014502364e-07, - "loss": 0.3209, + "epoch": 0.3575473224397347, + "grad_norm": 0.8080998046421258, + "learning_rate": 8.113346695367393e-06, + "loss": 0.8619, "step": 2210 }, { - "epoch": 0.9175642087821044, - "grad_norm": 2.379186052198249, - "learning_rate": 2.0569597513586004e-07, - "loss": 0.3229, + "epoch": 0.3583562530334897, + "grad_norm": 0.7192475502436593, + "learning_rate": 8.102284989292639e-06, + "loss": 0.858, "step": 2215 }, { - "epoch": 0.9196354598177299, - "grad_norm": 2.3319074686966674, - "learning_rate": 1.955564217713335e-07, - "loss": 0.3181, + "epoch": 0.35916518362724476, + "grad_norm": 0.8273826603550964, + "learning_rate": 8.091198539828601e-06, + "loss": 0.8433, "step": 2220 }, { - "epoch": 0.9217067108533554, - "grad_norm": 2.6517343047945334, - "learning_rate": 1.8566815037026897e-07, - "loss": 0.323, + "epoch": 0.35997411422099984, + "grad_norm": 0.8607381682601364, + "learning_rate": 8.080087435399248e-06, + "loss": 0.8382, "step": 2225 }, { - "epoch": 0.923777961888981, - "grad_norm": 2.232882122217696, - "learning_rate": 1.7603167810894662e-07, - "loss": 0.3196, + "epoch": 0.36078304481475487, + "grad_norm": 0.8001621243743261, + "learning_rate": 8.068951764625186e-06, + "loss": 0.8406, "step": 2230 }, { - "epoch": 0.9258492129246064, - "grad_norm": 2.3988532707568186, - "learning_rate": 1.6664750899404892e-07, - "loss": 0.3183, + "epoch": 0.36159197540850996, + "grad_norm": 0.7615698239071352, + "learning_rate": 8.057791616322958e-06, + "loss": 0.8316, "step": 2235 }, { - "epoch": 0.927920463960232, - "grad_norm": 2.3744099648372403, - "learning_rate": 1.5751613383630128e-07, - "loss": 0.3181, + "epoch": 0.362400906002265, + "grad_norm": 0.8191316557232389, + "learning_rate": 8.046607079504345e-06, + "loss": 0.8585, "step": 2240 }, { - "epoch": 0.9299917149958575, - "grad_norm": 2.419787212184274, - "learning_rate": 1.4863803022480362e-07, - "loss": 0.3244, + "epoch": 0.3632098365960201, + "grad_norm": 0.8329244880945085, + "learning_rate": 8.035398243375636e-06, + "loss": 0.8237, "step": 2245 }, { - "epoch": 0.932062966031483, - "grad_norm": 2.3333109862483856, - "learning_rate": 1.4001366250204762e-07, - "loss": 0.325, + "epoch": 0.3640187671897751, + "grad_norm": 0.8151507773643624, + "learning_rate": 8.024165197336934e-06, + "loss": 0.875, "step": 2250 }, { - "epoch": 0.9341342170671085, - "grad_norm": 2.5299591194957505, - "learning_rate": 1.3164348173963392e-07, - "loss": 0.3251, + "epoch": 0.3648276977835302, + "grad_norm": 0.8450488670993342, + "learning_rate": 8.012908030981442e-06, + "loss": 0.8336, "step": 2255 }, { - "epoch": 0.9362054681027341, - "grad_norm": 2.415328780928961, - "learning_rate": 1.235279257146804e-07, - "loss": 0.3317, + "epoch": 0.3656366283772852, + "grad_norm": 0.7623122756894671, + "learning_rate": 8.00162683409473e-06, + "loss": 0.8535, "step": 2260 }, { - "epoch": 0.9382767191383595, - "grad_norm": 2.39855625868432, - "learning_rate": 1.1566741888692168e-07, - "loss": 0.3106, + "epoch": 0.3664455589710403, + "grad_norm": 0.7904848999057988, + "learning_rate": 7.990321696654044e-06, + "loss": 0.8566, "step": 2265 }, { - "epoch": 0.9403479701739851, - "grad_norm": 2.51301146564224, - "learning_rate": 1.080623723765134e-07, - "loss": 0.3207, + "epoch": 0.36725448956479534, + "grad_norm": 0.7803510833606502, + "learning_rate": 7.978992708827571e-06, + "loss": 0.8544, "step": 2270 }, { - "epoch": 0.9424192212096106, - "grad_norm": 2.5451397411628425, - "learning_rate": 1.0071318394252849e-07, - "loss": 0.321, + "epoch": 0.36806342015855037, + "grad_norm": 0.8005678354543746, + "learning_rate": 7.967639960973727e-06, + "loss": 0.8461, "step": 2275 }, { - "epoch": 0.9444904722452361, - "grad_norm": 2.3238922238777753, - "learning_rate": 9.362023796215036e-08, - "loss": 0.3115, + "epoch": 0.36887235075230546, + "grad_norm": 0.8007519176589866, + "learning_rate": 7.956263543640432e-06, + "loss": 0.8409, "step": 2280 }, { - "epoch": 0.9465617232808616, - "grad_norm": 2.3960161103125595, - "learning_rate": 8.678390541057512e-08, - "loss": 0.3334, + "epoch": 0.3696812813460605, + "grad_norm": 0.8474393359723089, + "learning_rate": 7.944863547564396e-06, + "loss": 0.8585, "step": 2285 }, { - "epoch": 0.9486329743164872, - "grad_norm": 2.455291465574181, - "learning_rate": 8.020454384160437e-08, - "loss": 0.321, + "epoch": 0.3704902119398156, + "grad_norm": 0.8486471733965024, + "learning_rate": 7.933440063670383e-06, + "loss": 0.8399, "step": 2290 }, { - "epoch": 0.9507042253521126, - "grad_norm": 2.313389085031741, - "learning_rate": 7.388249736894615e-08, - "loss": 0.3155, + "epoch": 0.3712991425335706, + "grad_norm": 0.7861689750278411, + "learning_rate": 7.921993183070497e-06, + "loss": 0.8496, "step": 2295 }, { - "epoch": 0.9527754763877382, - "grad_norm": 2.4859967083644507, - "learning_rate": 6.78180966482156e-08, - "loss": 0.3189, + "epoch": 0.3721080731273257, + "grad_norm": 0.8805931639748436, + "learning_rate": 7.910522997063451e-06, + "loss": 0.8351, "step": 2300 }, { - "epoch": 0.9548467274233637, - "grad_norm": 2.5150728286365878, - "learning_rate": 6.201165885964311e-08, - "loss": 0.3196, + "epoch": 0.3729170037210807, + "grad_norm": 0.7946263432456658, + "learning_rate": 7.899029597133836e-06, + "loss": 0.8444, "step": 2305 }, { - "epoch": 0.9569179784589892, - "grad_norm": 2.5646449395309134, - "learning_rate": 5.646348769148491e-08, - "loss": 0.32, + "epoch": 0.3737259343148358, + "grad_norm": 0.8337819610743522, + "learning_rate": 7.887513074951397e-06, + "loss": 0.8491, "step": 2310 }, { - "epoch": 0.9589892294946147, - "grad_norm": 2.4648895794572145, - "learning_rate": 5.117387332413737e-08, - "loss": 0.3142, + "epoch": 0.37453486490859084, + "grad_norm": 0.8157784141608686, + "learning_rate": 7.875973522370294e-06, + "loss": 0.835, "step": 2315 }, { - "epoch": 0.9610604805302403, - "grad_norm": 2.4075943066752292, - "learning_rate": 4.6143092414961396e-08, - "loss": 0.3275, + "epoch": 0.3753437955023459, + "grad_norm": 0.8127539829554938, + "learning_rate": 7.864411031428379e-06, + "loss": 0.8425, "step": 2320 }, { - "epoch": 0.9631317315658657, - "grad_norm": 2.5717189064452604, - "learning_rate": 4.1371408083815126e-08, - "loss": 0.3326, + "epoch": 0.37615272609610095, + "grad_norm": 0.7999538507762206, + "learning_rate": 7.852825694346455e-06, + "loss": 0.8364, "step": 2325 }, { - "epoch": 0.9652029826014913, - "grad_norm": 2.2405081152048982, - "learning_rate": 3.685906989928656e-08, - "loss": 0.3198, + "epoch": 0.376961656689856, + "grad_norm": 0.8590002826350661, + "learning_rate": 7.841217603527545e-06, + "loss": 0.8415, "step": 2330 }, { - "epoch": 0.9672742336371168, - "grad_norm": 2.493323605860437, - "learning_rate": 3.2606313865646276e-08, - "loss": 0.3158, + "epoch": 0.37777058728361107, + "grad_norm": 0.8424318909298105, + "learning_rate": 7.82958685155615e-06, + "loss": 0.8559, "step": 2335 }, { - "epoch": 0.9693454846727423, - "grad_norm": 2.3221489971336307, - "learning_rate": 2.861336241050061e-08, - "loss": 0.3192, + "epoch": 0.3785795178773661, + "grad_norm": 0.7727695207961051, + "learning_rate": 7.817933531197513e-06, + "loss": 0.8479, "step": 2340 }, { - "epoch": 0.9714167357083678, - "grad_norm": 2.224686624765676, - "learning_rate": 2.488042437315985e-08, - "loss": 0.3144, + "epoch": 0.3793884484711212, + "grad_norm": 0.8080185911172371, + "learning_rate": 7.806257735396879e-06, + "loss": 0.832, "step": 2345 }, { - "epoch": 0.9734879867439934, - "grad_norm": 2.517150826241084, - "learning_rate": 2.1407694993714755e-08, - "loss": 0.3201, + "epoch": 0.3801973790648762, + "grad_norm": 0.8144308118806884, + "learning_rate": 7.794559557278757e-06, + "loss": 0.8449, "step": 2350 }, { - "epoch": 0.9755592377796188, - "grad_norm": 2.4950963422344126, - "learning_rate": 1.8195355902824174e-08, - "loss": 0.3094, + "epoch": 0.3810063096586313, + "grad_norm": 0.8257947013812411, + "learning_rate": 7.782839090146172e-06, + "loss": 0.8579, "step": 2355 }, { - "epoch": 0.9776304888152444, - "grad_norm": 2.3719000652949376, - "learning_rate": 1.5243575112218744e-08, - "loss": 0.3069, + "epoch": 0.38181524025238633, + "grad_norm": 0.7945614016073755, + "learning_rate": 7.771096427479925e-06, + "loss": 0.8298, "step": 2360 }, { - "epoch": 0.97970173985087, - "grad_norm": 2.532095428796904, - "learning_rate": 1.2552507005909042e-08, - "loss": 0.317, + "epoch": 0.3826241708461414, + "grad_norm": 0.8410802459537005, + "learning_rate": 7.759331662937841e-06, + "loss": 0.8491, "step": 2365 }, { - "epoch": 0.9817729908864954, - "grad_norm": 2.508218831541988, - "learning_rate": 1.0122292332114814e-08, - "loss": 0.3214, + "epoch": 0.38343310143989645, + "grad_norm": 0.8308755908758182, + "learning_rate": 7.747544890354031e-06, + "loss": 0.8573, "step": 2370 }, { - "epoch": 0.9838442419221209, - "grad_norm": 2.3230365500226156, - "learning_rate": 7.953058195900864e-09, - "loss": 0.3214, + "epoch": 0.38424203203365154, + "grad_norm": 0.8040651764511801, + "learning_rate": 7.735736203738137e-06, + "loss": 0.8344, "step": 2375 }, { - "epoch": 0.9859154929577465, - "grad_norm": 2.3842329300121565, - "learning_rate": 6.044918052531268e-09, - "loss": 0.3267, + "epoch": 0.38505096262740657, + "grad_norm": 0.8125819462471874, + "learning_rate": 7.723905697274586e-06, + "loss": 0.8424, "step": 2380 }, { - "epoch": 0.987986743993372, - "grad_norm": 2.450379118891722, - "learning_rate": 4.397971701533554e-09, - "loss": 0.3231, + "epoch": 0.3858598932211616, + "grad_norm": 0.7539076215491992, + "learning_rate": 7.71205346532183e-06, + "loss": 0.8495, "step": 2385 }, { - "epoch": 0.9900579950289975, - "grad_norm": 2.3274963425198423, - "learning_rate": 3.0123052814812203e-09, - "loss": 0.3284, + "epoch": 0.3866688238149167, + "grad_norm": 0.8155202307231507, + "learning_rate": 7.700179602411615e-06, + "loss": 0.8342, "step": 2390 }, { - "epoch": 0.992129246064623, - "grad_norm": 2.4027725490076075, - "learning_rate": 1.887991265486222e-09, - "loss": 0.309, + "epoch": 0.3874777544086717, + "grad_norm": 0.8220478382780729, + "learning_rate": 7.688284203248197e-06, + "loss": 0.846, "step": 2395 }, { - "epoch": 0.9942004971002486, - "grad_norm": 2.5888360240441015, - "learning_rate": 1.025088457409229e-09, - "loss": 0.3177, + "epoch": 0.3882866850024268, + "grad_norm": 0.8673932844304086, + "learning_rate": 7.67636736270761e-06, + "loss": 0.875, "step": 2400 }, { - "epoch": 0.996271748135874, - "grad_norm": 2.5018345143420917, - "learning_rate": 4.2364198878597216e-10, - "loss": 0.3297, + "epoch": 0.38909561559618183, + "grad_norm": 0.803133188698084, + "learning_rate": 7.664429175836903e-06, + "loss": 0.8672, "step": 2405 }, { - "epoch": 0.9983429991714996, - "grad_norm": 2.22608195027372, - "learning_rate": 8.368331646302353e-11, - "loss": 0.3161, + "epoch": 0.3899045461899369, + "grad_norm": 0.7796141487305905, + "learning_rate": 7.652469737853372e-06, + "loss": 0.8542, "step": 2410 }, + { + "epoch": 0.39071347678369195, + "grad_norm": 0.7721547412514925, + "learning_rate": 7.64048914414382e-06, + "loss": 0.8274, + "step": 2415 + }, + { + "epoch": 0.39152240737744703, + "grad_norm": 0.8076354344928516, + "learning_rate": 7.628487490263779e-06, + "loss": 0.8315, + "step": 2420 + }, + { + "epoch": 0.39233133797120207, + "grad_norm": 0.8310201097209512, + "learning_rate": 7.616464871936748e-06, + "loss": 0.8478, + "step": 2425 + }, + { + "epoch": 0.39314026856495715, + "grad_norm": 0.8088889309070267, + "learning_rate": 7.60442138505345e-06, + "loss": 0.8434, + "step": 2430 + }, + { + "epoch": 0.3939491991587122, + "grad_norm": 0.8845883583499743, + "learning_rate": 7.5923571256710396e-06, + "loss": 0.827, + "step": 2435 + }, + { + "epoch": 0.3947581297524672, + "grad_norm": 0.8075412429174051, + "learning_rate": 7.580272190012357e-06, + "loss": 0.8395, + "step": 2440 + }, + { + "epoch": 0.3955670603462223, + "grad_norm": 0.781477252019071, + "learning_rate": 7.5681666744651505e-06, + "loss": 0.8157, + "step": 2445 + }, + { + "epoch": 0.39637599093997733, + "grad_norm": 0.7999629302259256, + "learning_rate": 7.556040675581311e-06, + "loss": 0.8288, + "step": 2450 + }, + { + "epoch": 0.3971849215337324, + "grad_norm": 0.8292218777187262, + "learning_rate": 7.5438942900761035e-06, + "loss": 0.8417, + "step": 2455 + }, + { + "epoch": 0.39799385212748745, + "grad_norm": 0.7796119585780503, + "learning_rate": 7.531727614827392e-06, + "loss": 0.8512, + "step": 2460 + }, + { + "epoch": 0.39880278272124253, + "grad_norm": 1.0387110538612172, + "learning_rate": 7.519540746874869e-06, + "loss": 0.8261, + "step": 2465 + }, + { + "epoch": 0.39961171331499756, + "grad_norm": 0.8332505684625972, + "learning_rate": 7.507333783419282e-06, + "loss": 0.8643, + "step": 2470 + }, + { + "epoch": 0.40042064390875265, + "grad_norm": 0.80731246004178, + "learning_rate": 7.495106821821656e-06, + "loss": 0.8542, + "step": 2475 + }, + { + "epoch": 0.4012295745025077, + "grad_norm": 0.8894304174472868, + "learning_rate": 7.482859959602518e-06, + "loss": 0.8403, + "step": 2480 + }, + { + "epoch": 0.40203850509626277, + "grad_norm": 0.8754692672700708, + "learning_rate": 7.470593294441124e-06, + "loss": 0.8516, + "step": 2485 + }, + { + "epoch": 0.4028474356900178, + "grad_norm": 0.8789987496906461, + "learning_rate": 7.4583069241746696e-06, + "loss": 0.834, + "step": 2490 + }, + { + "epoch": 0.4036563662837728, + "grad_norm": 0.841200352411981, + "learning_rate": 7.446000946797519e-06, + "loss": 0.8241, + "step": 2495 + }, + { + "epoch": 0.4044652968775279, + "grad_norm": 0.8282889658691387, + "learning_rate": 7.433675460460419e-06, + "loss": 0.845, + "step": 2500 + }, + { + "epoch": 0.40527422747128294, + "grad_norm": 0.8171176352536702, + "learning_rate": 7.421330563469717e-06, + "loss": 0.8361, + "step": 2505 + }, + { + "epoch": 0.40608315806503803, + "grad_norm": 0.7424149733511415, + "learning_rate": 7.408966354286575e-06, + "loss": 0.8394, + "step": 2510 + }, + { + "epoch": 0.40689208865879306, + "grad_norm": 0.772435215520305, + "learning_rate": 7.396582931526194e-06, + "loss": 0.8364, + "step": 2515 + }, + { + "epoch": 0.40770101925254815, + "grad_norm": 0.8577362984373131, + "learning_rate": 7.38418039395701e-06, + "loss": 0.8522, + "step": 2520 + }, + { + "epoch": 0.4085099498463032, + "grad_norm": 0.8324922368152244, + "learning_rate": 7.371758840499921e-06, + "loss": 0.837, + "step": 2525 + }, + { + "epoch": 0.40931888044005826, + "grad_norm": 0.8167673718582185, + "learning_rate": 7.359318370227494e-06, + "loss": 0.8551, + "step": 2530 + }, + { + "epoch": 0.4101278110338133, + "grad_norm": 0.8183481719062728, + "learning_rate": 7.346859082363172e-06, + "loss": 0.8653, + "step": 2535 + }, + { + "epoch": 0.4109367416275684, + "grad_norm": 0.8526948175700703, + "learning_rate": 7.334381076280483e-06, + "loss": 0.8321, + "step": 2540 + }, + { + "epoch": 0.4117456722213234, + "grad_norm": 0.8683127108850153, + "learning_rate": 7.321884451502252e-06, + "loss": 0.8391, + "step": 2545 + }, + { + "epoch": 0.41255460281507844, + "grad_norm": 0.8156307062993907, + "learning_rate": 7.309369307699802e-06, + "loss": 0.8281, + "step": 2550 + }, + { + "epoch": 0.41336353340883353, + "grad_norm": 0.831536181417552, + "learning_rate": 7.296835744692163e-06, + "loss": 0.8417, + "step": 2555 + }, + { + "epoch": 0.41417246400258856, + "grad_norm": 0.840572967544839, + "learning_rate": 7.2842838624452725e-06, + "loss": 0.8172, + "step": 2560 + }, + { + "epoch": 0.41498139459634364, + "grad_norm": 0.8063608871323279, + "learning_rate": 7.271713761071181e-06, + "loss": 0.8533, + "step": 2565 + }, + { + "epoch": 0.4157903251900987, + "grad_norm": 0.9598879655118182, + "learning_rate": 7.259125540827248e-06, + "loss": 0.8323, + "step": 2570 + }, + { + "epoch": 0.41659925578385376, + "grad_norm": 0.77131131850428, + "learning_rate": 7.246519302115355e-06, + "loss": 0.8202, + "step": 2575 + }, + { + "epoch": 0.4174081863776088, + "grad_norm": 0.7740177253879231, + "learning_rate": 7.233895145481086e-06, + "loss": 0.8169, + "step": 2580 + }, + { + "epoch": 0.4182171169713639, + "grad_norm": 0.7993458055375153, + "learning_rate": 7.221253171612944e-06, + "loss": 0.8281, + "step": 2585 + }, + { + "epoch": 0.4190260475651189, + "grad_norm": 0.8017047616887952, + "learning_rate": 7.208593481341536e-06, + "loss": 0.8551, + "step": 2590 + }, + { + "epoch": 0.419834978158874, + "grad_norm": 0.7947124171074315, + "learning_rate": 7.195916175638773e-06, + "loss": 0.8347, + "step": 2595 + }, + { + "epoch": 0.420643908752629, + "grad_norm": 0.8878564841823726, + "learning_rate": 7.183221355617065e-06, + "loss": 0.8377, + "step": 2600 + }, + { + "epoch": 0.42145283934638406, + "grad_norm": 0.8125554403726852, + "learning_rate": 7.170509122528511e-06, + "loss": 0.8322, + "step": 2605 + }, + { + "epoch": 0.42226176994013914, + "grad_norm": 0.8652065540252103, + "learning_rate": 7.157779577764099e-06, + "loss": 0.8367, + "step": 2610 + }, + { + "epoch": 0.4230707005338942, + "grad_norm": 0.8610622568512487, + "learning_rate": 7.145032822852889e-06, + "loss": 0.8378, + "step": 2615 + }, + { + "epoch": 0.42387963112764926, + "grad_norm": 0.7876508067233663, + "learning_rate": 7.132268959461209e-06, + "loss": 0.8375, + "step": 2620 + }, + { + "epoch": 0.4246885617214043, + "grad_norm": 0.8282333937739974, + "learning_rate": 7.119488089391836e-06, + "loss": 0.8406, + "step": 2625 + }, + { + "epoch": 0.4254974923151594, + "grad_norm": 0.8305925610445289, + "learning_rate": 7.106690314583199e-06, + "loss": 0.8414, + "step": 2630 + }, + { + "epoch": 0.4263064229089144, + "grad_norm": 0.789770771549953, + "learning_rate": 7.0938757371085485e-06, + "loss": 0.853, + "step": 2635 + }, + { + "epoch": 0.4271153535026695, + "grad_norm": 0.8456657346625689, + "learning_rate": 7.081044459175159e-06, + "loss": 0.8308, + "step": 2640 + }, + { + "epoch": 0.4279242840964245, + "grad_norm": 0.8352288349586666, + "learning_rate": 7.068196583123495e-06, + "loss": 0.8269, + "step": 2645 + }, + { + "epoch": 0.4287332146901796, + "grad_norm": 0.8448828847445977, + "learning_rate": 7.055332211426417e-06, + "loss": 0.8388, + "step": 2650 + }, + { + "epoch": 0.42954214528393464, + "grad_norm": 0.858853034822238, + "learning_rate": 7.042451446688342e-06, + "loss": 0.8298, + "step": 2655 + }, + { + "epoch": 0.43035107587768967, + "grad_norm": 0.8391759096265385, + "learning_rate": 7.029554391644441e-06, + "loss": 0.8371, + "step": 2660 + }, + { + "epoch": 0.43116000647144476, + "grad_norm": 0.7973420420214934, + "learning_rate": 7.016641149159816e-06, + "loss": 0.8377, + "step": 2665 + }, + { + "epoch": 0.4319689370651998, + "grad_norm": 0.8010191599865539, + "learning_rate": 7.00371182222867e-06, + "loss": 0.8483, + "step": 2670 + }, + { + "epoch": 0.4327778676589549, + "grad_norm": 0.8013088566432139, + "learning_rate": 6.9907665139735035e-06, + "loss": 0.8366, + "step": 2675 + }, + { + "epoch": 0.4335867982527099, + "grad_norm": 0.8481738940133525, + "learning_rate": 6.977805327644275e-06, + "loss": 0.8311, + "step": 2680 + }, + { + "epoch": 0.434395728846465, + "grad_norm": 0.7925446967652235, + "learning_rate": 6.964828366617583e-06, + "loss": 0.8466, + "step": 2685 + }, + { + "epoch": 0.43520465944022, + "grad_norm": 0.810554276504509, + "learning_rate": 6.95183573439585e-06, + "loss": 0.8468, + "step": 2690 + }, + { + "epoch": 0.4360135900339751, + "grad_norm": 0.8018391904385581, + "learning_rate": 6.938827534606484e-06, + "loss": 0.8356, + "step": 2695 + }, + { + "epoch": 0.43682252062773014, + "grad_norm": 0.8273266309236444, + "learning_rate": 6.925803871001058e-06, + "loss": 0.8358, + "step": 2700 + }, + { + "epoch": 0.4376314512214852, + "grad_norm": 0.867800658645478, + "learning_rate": 6.912764847454485e-06, + "loss": 0.8332, + "step": 2705 + }, + { + "epoch": 0.43844038181524025, + "grad_norm": 0.8366321913701985, + "learning_rate": 6.899710567964184e-06, + "loss": 0.8289, + "step": 2710 + }, + { + "epoch": 0.4392493124089953, + "grad_norm": 0.851318956301739, + "learning_rate": 6.8866411366492546e-06, + "loss": 0.8252, + "step": 2715 + }, + { + "epoch": 0.44005824300275037, + "grad_norm": 0.8661629567364111, + "learning_rate": 6.873556657749646e-06, + "loss": 0.8433, + "step": 2720 + }, + { + "epoch": 0.4408671735965054, + "grad_norm": 0.8890212561668989, + "learning_rate": 6.860457235625322e-06, + "loss": 0.8327, + "step": 2725 + }, + { + "epoch": 0.4416761041902605, + "grad_norm": 0.8479920834756562, + "learning_rate": 6.847342974755435e-06, + "loss": 0.8408, + "step": 2730 + }, + { + "epoch": 0.4424850347840155, + "grad_norm": 0.8040960156855093, + "learning_rate": 6.834213979737488e-06, + "loss": 0.8667, + "step": 2735 + }, + { + "epoch": 0.4432939653777706, + "grad_norm": 0.8449566845357666, + "learning_rate": 6.821070355286498e-06, + "loss": 0.8318, + "step": 2740 + }, + { + "epoch": 0.44410289597152564, + "grad_norm": 0.8008355458582818, + "learning_rate": 6.807912206234168e-06, + "loss": 0.8233, + "step": 2745 + }, + { + "epoch": 0.4449118265652807, + "grad_norm": 0.8051699329730306, + "learning_rate": 6.794739637528051e-06, + "loss": 0.8386, + "step": 2750 + }, + { + "epoch": 0.44572075715903575, + "grad_norm": 0.8602857948468763, + "learning_rate": 6.7815527542307e-06, + "loss": 0.8399, + "step": 2755 + }, + { + "epoch": 0.44652968775279084, + "grad_norm": 0.8187853844017233, + "learning_rate": 6.768351661518845e-06, + "loss": 0.8176, + "step": 2760 + }, + { + "epoch": 0.44733861834654587, + "grad_norm": 0.8053208482891812, + "learning_rate": 6.755136464682546e-06, + "loss": 0.8475, + "step": 2765 + }, + { + "epoch": 0.4481475489403009, + "grad_norm": 0.8750589437015153, + "learning_rate": 6.741907269124358e-06, + "loss": 0.8279, + "step": 2770 + }, + { + "epoch": 0.448956479534056, + "grad_norm": 0.8815876030660124, + "learning_rate": 6.728664180358487e-06, + "loss": 0.8438, + "step": 2775 + }, + { + "epoch": 0.449765410127811, + "grad_norm": 0.8318847722342249, + "learning_rate": 6.715407304009948e-06, + "loss": 0.852, + "step": 2780 + }, + { + "epoch": 0.4505743407215661, + "grad_norm": 0.8913600813483957, + "learning_rate": 6.702136745813721e-06, + "loss": 0.8435, + "step": 2785 + }, + { + "epoch": 0.45138327131532113, + "grad_norm": 0.8207757002268261, + "learning_rate": 6.688852611613921e-06, + "loss": 0.8432, + "step": 2790 + }, + { + "epoch": 0.4521922019090762, + "grad_norm": 0.8964348508265096, + "learning_rate": 6.675555007362931e-06, + "loss": 0.8492, + "step": 2795 + }, + { + "epoch": 0.45300113250283125, + "grad_norm": 0.88701161837332, + "learning_rate": 6.662244039120575e-06, + "loss": 0.8488, + "step": 2800 + }, + { + "epoch": 0.45381006309658634, + "grad_norm": 0.7897608660219696, + "learning_rate": 6.648919813053266e-06, + "loss": 0.8436, + "step": 2805 + }, + { + "epoch": 0.45461899369034137, + "grad_norm": 0.8277360620764583, + "learning_rate": 6.635582435433161e-06, + "loss": 0.8473, + "step": 2810 + }, + { + "epoch": 0.4554279242840964, + "grad_norm": 0.8476898435569198, + "learning_rate": 6.6222320126373105e-06, + "loss": 0.8248, + "step": 2815 + }, + { + "epoch": 0.4562368548778515, + "grad_norm": 0.8213309435302791, + "learning_rate": 6.6088686511468106e-06, + "loss": 0.8208, + "step": 2820 + }, + { + "epoch": 0.4570457854716065, + "grad_norm": 0.8314089865069589, + "learning_rate": 6.595492457545953e-06, + "loss": 0.8654, + "step": 2825 + }, + { + "epoch": 0.4578547160653616, + "grad_norm": 0.8098173131527258, + "learning_rate": 6.582103538521383e-06, + "loss": 0.817, + "step": 2830 + }, + { + "epoch": 0.45866364665911663, + "grad_norm": 0.8530608603749926, + "learning_rate": 6.568702000861234e-06, + "loss": 0.858, + "step": 2835 + }, + { + "epoch": 0.4594725772528717, + "grad_norm": 0.884209021072577, + "learning_rate": 6.5552879514542915e-06, + "loss": 0.8314, + "step": 2840 + }, + { + "epoch": 0.46028150784662675, + "grad_norm": 0.8215391384460337, + "learning_rate": 6.541861497289126e-06, + "loss": 0.8284, + "step": 2845 + }, + { + "epoch": 0.46109043844038183, + "grad_norm": 0.7972506723621342, + "learning_rate": 6.528422745453251e-06, + "loss": 0.8439, + "step": 2850 + }, + { + "epoch": 0.46189936903413686, + "grad_norm": 0.8602949746737556, + "learning_rate": 6.514971803132264e-06, + "loss": 0.8366, + "step": 2855 + }, + { + "epoch": 0.46270829962789195, + "grad_norm": 0.8506988739569268, + "learning_rate": 6.50150877760899e-06, + "loss": 0.86, + "step": 2860 + }, + { + "epoch": 0.463517230221647, + "grad_norm": 0.7981906008336666, + "learning_rate": 6.488033776262632e-06, + "loss": 0.8531, + "step": 2865 + }, + { + "epoch": 0.464326160815402, + "grad_norm": 0.7798400777450988, + "learning_rate": 6.474546906567905e-06, + "loss": 0.8457, + "step": 2870 + }, + { + "epoch": 0.4651350914091571, + "grad_norm": 0.8391735146467554, + "learning_rate": 6.46104827609419e-06, + "loss": 0.8408, + "step": 2875 + }, + { + "epoch": 0.46594402200291213, + "grad_norm": 0.8052808401629455, + "learning_rate": 6.447537992504663e-06, + "loss": 0.8486, + "step": 2880 + }, + { + "epoch": 0.4667529525966672, + "grad_norm": 0.7988941689518605, + "learning_rate": 6.434016163555452e-06, + "loss": 0.846, + "step": 2885 + }, + { + "epoch": 0.46756188319042224, + "grad_norm": 0.8165894688556871, + "learning_rate": 6.420482897094764e-06, + "loss": 0.8319, + "step": 2890 + }, + { + "epoch": 0.46837081378417733, + "grad_norm": 0.8121894000178076, + "learning_rate": 6.406938301062031e-06, + "loss": 0.8388, + "step": 2895 + }, + { + "epoch": 0.46917974437793236, + "grad_norm": 0.835869232540653, + "learning_rate": 6.393382483487049e-06, + "loss": 0.8228, + "step": 2900 + }, + { + "epoch": 0.46998867497168745, + "grad_norm": 0.912311843916525, + "learning_rate": 6.379815552489112e-06, + "loss": 0.819, + "step": 2905 + }, + { + "epoch": 0.4707976055654425, + "grad_norm": 0.8202079981924546, + "learning_rate": 6.366237616276161e-06, + "loss": 0.825, + "step": 2910 + }, + { + "epoch": 0.47160653615919756, + "grad_norm": 0.82471875114036, + "learning_rate": 6.3526487831439045e-06, + "loss": 0.8238, + "step": 2915 + }, + { + "epoch": 0.4724154667529526, + "grad_norm": 0.9037615912491918, + "learning_rate": 6.339049161474965e-06, + "loss": 0.8202, + "step": 2920 + }, + { + "epoch": 0.4732243973467076, + "grad_norm": 0.8152831104583218, + "learning_rate": 6.3254388597380165e-06, + "loss": 0.8448, + "step": 2925 + }, + { + "epoch": 0.4740333279404627, + "grad_norm": 0.8118468395706068, + "learning_rate": 6.311817986486917e-06, + "loss": 0.8342, + "step": 2930 + }, + { + "epoch": 0.47484225853421774, + "grad_norm": 0.8075212743777721, + "learning_rate": 6.298186650359832e-06, + "loss": 0.8274, + "step": 2935 + }, + { + "epoch": 0.47565118912797283, + "grad_norm": 0.859392711702304, + "learning_rate": 6.284544960078387e-06, + "loss": 0.8401, + "step": 2940 + }, + { + "epoch": 0.47646011972172786, + "grad_norm": 0.8858782454028935, + "learning_rate": 6.270893024446788e-06, + "loss": 0.8624, + "step": 2945 + }, + { + "epoch": 0.47726905031548295, + "grad_norm": 0.8427992758295506, + "learning_rate": 6.257230952350954e-06, + "loss": 0.839, + "step": 2950 + }, + { + "epoch": 0.478077980909238, + "grad_norm": 0.8680923960328171, + "learning_rate": 6.243558852757654e-06, + "loss": 0.8577, + "step": 2955 + }, + { + "epoch": 0.47888691150299306, + "grad_norm": 0.8682921557831497, + "learning_rate": 6.229876834713633e-06, + "loss": 0.839, + "step": 2960 + }, + { + "epoch": 0.4796958420967481, + "grad_norm": 0.8040852911882872, + "learning_rate": 6.216185007344745e-06, + "loss": 0.8501, + "step": 2965 + }, + { + "epoch": 0.4805047726905032, + "grad_norm": 0.7631057139150006, + "learning_rate": 6.202483479855083e-06, + "loss": 0.8268, + "step": 2970 + }, + { + "epoch": 0.4813137032842582, + "grad_norm": 0.813256135509401, + "learning_rate": 6.188772361526104e-06, + "loss": 0.8196, + "step": 2975 + }, + { + "epoch": 0.48212263387801324, + "grad_norm": 0.8297884897498811, + "learning_rate": 6.175051761715762e-06, + "loss": 0.848, + "step": 2980 + }, + { + "epoch": 0.4829315644717683, + "grad_norm": 0.8334678385544815, + "learning_rate": 6.161321789857635e-06, + "loss": 0.8367, + "step": 2985 + }, + { + "epoch": 0.48374049506552336, + "grad_norm": 0.8985811450220941, + "learning_rate": 6.147582555460048e-06, + "loss": 0.843, + "step": 2990 + }, + { + "epoch": 0.48454942565927844, + "grad_norm": 0.8362819535768706, + "learning_rate": 6.133834168105206e-06, + "loss": 0.8385, + "step": 2995 + }, + { + "epoch": 0.4853583562530335, + "grad_norm": 0.8317479151299757, + "learning_rate": 6.120076737448314e-06, + "loss": 0.8331, + "step": 3000 + }, + { + "epoch": 0.48616728684678856, + "grad_norm": 0.816034350431961, + "learning_rate": 6.106310373216706e-06, + "loss": 0.8389, + "step": 3005 + }, + { + "epoch": 0.4869762174405436, + "grad_norm": 0.8435438416835686, + "learning_rate": 6.092535185208973e-06, + "loss": 0.8144, + "step": 3010 + }, + { + "epoch": 0.4877851480342987, + "grad_norm": 0.8650473536522931, + "learning_rate": 6.078751283294075e-06, + "loss": 0.8415, + "step": 3015 + }, + { + "epoch": 0.4885940786280537, + "grad_norm": 0.8631976632792485, + "learning_rate": 6.0649587774104775e-06, + "loss": 0.8384, + "step": 3020 + }, + { + "epoch": 0.4894030092218088, + "grad_norm": 0.8051930942211001, + "learning_rate": 6.0511577775652744e-06, + "loss": 0.8307, + "step": 3025 + }, + { + "epoch": 0.4902119398155638, + "grad_norm": 0.9000033790636423, + "learning_rate": 6.037348393833298e-06, + "loss": 0.8288, + "step": 3030 + }, + { + "epoch": 0.49102087040931885, + "grad_norm": 0.8370590038208652, + "learning_rate": 6.0235307363562524e-06, + "loss": 0.8331, + "step": 3035 + }, + { + "epoch": 0.49182980100307394, + "grad_norm": 0.8387976240360758, + "learning_rate": 6.009704915341835e-06, + "loss": 0.8429, + "step": 3040 + }, + { + "epoch": 0.49263873159682897, + "grad_norm": 0.849367045833896, + "learning_rate": 5.9958710410628515e-06, + "loss": 0.8245, + "step": 3045 + }, + { + "epoch": 0.49344766219058406, + "grad_norm": 0.817264343403589, + "learning_rate": 5.9820292238563404e-06, + "loss": 0.8279, + "step": 3050 + }, + { + "epoch": 0.4942565927843391, + "grad_norm": 0.8397242273647842, + "learning_rate": 5.96817957412269e-06, + "loss": 0.8211, + "step": 3055 + }, + { + "epoch": 0.4950655233780942, + "grad_norm": 0.8292950717170571, + "learning_rate": 5.954322202324759e-06, + "loss": 0.8447, + "step": 3060 + }, + { + "epoch": 0.4958744539718492, + "grad_norm": 0.8153225989185483, + "learning_rate": 5.940457218987003e-06, + "loss": 0.849, + "step": 3065 + }, + { + "epoch": 0.4966833845656043, + "grad_norm": 0.8817065793496368, + "learning_rate": 5.926584734694579e-06, + "loss": 0.831, + "step": 3070 + }, + { + "epoch": 0.4974923151593593, + "grad_norm": 0.8223135029609022, + "learning_rate": 5.912704860092473e-06, + "loss": 0.8442, + "step": 3075 + }, + { + "epoch": 0.4983012457531144, + "grad_norm": 0.8254057255135826, + "learning_rate": 5.898817705884615e-06, + "loss": 0.8231, + "step": 3080 + }, + { + "epoch": 0.49911017634686944, + "grad_norm": 0.8516771176045018, + "learning_rate": 5.8849233828329964e-06, + "loss": 0.8325, + "step": 3085 + }, + { + "epoch": 0.49991910694062447, + "grad_norm": 0.8770116756575128, + "learning_rate": 5.871022001756786e-06, + "loss": 0.8353, + "step": 3090 + }, + { + "epoch": 0.5007280375343796, + "grad_norm": 0.8416392905846662, + "learning_rate": 5.857113673531446e-06, + "loss": 0.8411, + "step": 3095 + }, + { + "epoch": 0.5015369681281346, + "grad_norm": 0.8727681516631783, + "learning_rate": 5.843198509087848e-06, + "loss": 0.8327, + "step": 3100 + }, + { + "epoch": 0.5023458987218896, + "grad_norm": 0.8122836804739928, + "learning_rate": 5.829276619411392e-06, + "loss": 0.8156, + "step": 3105 + }, + { + "epoch": 0.5031548293156447, + "grad_norm": 0.8419116014056326, + "learning_rate": 5.815348115541112e-06, + "loss": 0.8406, + "step": 3110 + }, + { + "epoch": 0.5039637599093998, + "grad_norm": 0.8496791968992736, + "learning_rate": 5.801413108568798e-06, + "loss": 0.8428, + "step": 3115 + }, + { + "epoch": 0.5047726905031549, + "grad_norm": 0.8370866951694025, + "learning_rate": 5.787471709638109e-06, + "loss": 0.8397, + "step": 3120 + }, + { + "epoch": 0.5055816210969099, + "grad_norm": 0.8807740954559549, + "learning_rate": 5.7735240299436825e-06, + "loss": 0.8281, + "step": 3125 + }, + { + "epoch": 0.5063905516906649, + "grad_norm": 0.880842454890424, + "learning_rate": 5.759570180730255e-06, + "loss": 0.8261, + "step": 3130 + }, + { + "epoch": 0.50719948228442, + "grad_norm": 0.8501925674960217, + "learning_rate": 5.745610273291766e-06, + "loss": 0.8182, + "step": 3135 + }, + { + "epoch": 0.508008412878175, + "grad_norm": 0.7885937426308233, + "learning_rate": 5.731644418970478e-06, + "loss": 0.8006, + "step": 3140 + }, + { + "epoch": 0.5088173434719301, + "grad_norm": 0.8453255098699243, + "learning_rate": 5.717672729156082e-06, + "loss": 0.8408, + "step": 3145 + }, + { + "epoch": 0.5096262740656852, + "grad_norm": 0.9027259053341657, + "learning_rate": 5.703695315284814e-06, + "loss": 0.8415, + "step": 3150 + }, + { + "epoch": 0.5104352046594403, + "grad_norm": 0.8760720225211055, + "learning_rate": 5.689712288838561e-06, + "loss": 0.8459, + "step": 3155 + }, + { + "epoch": 0.5112441352531952, + "grad_norm": 0.8371540278282537, + "learning_rate": 5.675723761343981e-06, + "loss": 0.8217, + "step": 3160 + }, + { + "epoch": 0.5120530658469503, + "grad_norm": 0.8832530229547365, + "learning_rate": 5.661729844371602e-06, + "loss": 0.8321, + "step": 3165 + }, + { + "epoch": 0.5128619964407054, + "grad_norm": 0.8466920163343052, + "learning_rate": 5.64773064953494e-06, + "loss": 0.8443, + "step": 3170 + }, + { + "epoch": 0.5136709270344605, + "grad_norm": 0.8386270391117758, + "learning_rate": 5.633726288489609e-06, + "loss": 0.8215, + "step": 3175 + }, + { + "epoch": 0.5144798576282155, + "grad_norm": 0.8968915867410968, + "learning_rate": 5.619716872932422e-06, + "loss": 0.8369, + "step": 3180 + }, + { + "epoch": 0.5152887882219706, + "grad_norm": 0.8572970506555265, + "learning_rate": 5.6057025146005125e-06, + "loss": 0.8312, + "step": 3185 + }, + { + "epoch": 0.5160977188157256, + "grad_norm": 0.856451059153177, + "learning_rate": 5.591683325270434e-06, + "loss": 0.828, + "step": 3190 + }, + { + "epoch": 0.5169066494094806, + "grad_norm": 0.8777484476404416, + "learning_rate": 5.577659416757267e-06, + "loss": 0.8408, + "step": 3195 + }, + { + "epoch": 0.5177155800032357, + "grad_norm": 0.8571851342757004, + "learning_rate": 5.5636309009137404e-06, + "loss": 0.8337, + "step": 3200 + }, + { + "epoch": 0.5185245105969908, + "grad_norm": 0.829598576934011, + "learning_rate": 5.549597889629325e-06, + "loss": 0.8507, + "step": 3205 + }, + { + "epoch": 0.5193334411907459, + "grad_norm": 0.8763914543424554, + "learning_rate": 5.535560494829345e-06, + "loss": 0.8363, + "step": 3210 + }, + { + "epoch": 0.5201423717845008, + "grad_norm": 0.8614187326018883, + "learning_rate": 5.521518828474092e-06, + "loss": 0.8254, + "step": 3215 + }, + { + "epoch": 0.5209513023782559, + "grad_norm": 0.8803171537853229, + "learning_rate": 5.507473002557922e-06, + "loss": 0.822, + "step": 3220 + }, + { + "epoch": 0.521760232972011, + "grad_norm": 0.8826978372002413, + "learning_rate": 5.493423129108373e-06, + "loss": 0.8249, + "step": 3225 + }, + { + "epoch": 0.5225691635657661, + "grad_norm": 0.8712751190362638, + "learning_rate": 5.47936932018526e-06, + "loss": 0.8423, + "step": 3230 + }, + { + "epoch": 0.5233780941595211, + "grad_norm": 0.8238326567914839, + "learning_rate": 5.465311687879785e-06, + "loss": 0.824, + "step": 3235 + }, + { + "epoch": 0.5241870247532762, + "grad_norm": 0.8899415075708137, + "learning_rate": 5.4512503443136555e-06, + "loss": 0.8663, + "step": 3240 + }, + { + "epoch": 0.5249959553470313, + "grad_norm": 0.9621216340368667, + "learning_rate": 5.437185401638168e-06, + "loss": 0.8414, + "step": 3245 + }, + { + "epoch": 0.5258048859407862, + "grad_norm": 0.8156708167135952, + "learning_rate": 5.423116972033332e-06, + "loss": 0.8249, + "step": 3250 + }, + { + "epoch": 0.5266138165345413, + "grad_norm": 0.8179857114204797, + "learning_rate": 5.409045167706962e-06, + "loss": 0.8421, + "step": 3255 + }, + { + "epoch": 0.5274227471282964, + "grad_norm": 0.9127466183923784, + "learning_rate": 5.394970100893797e-06, + "loss": 0.8359, + "step": 3260 + }, + { + "epoch": 0.5282316777220515, + "grad_norm": 0.8430058361090182, + "learning_rate": 5.380891883854591e-06, + "loss": 0.8152, + "step": 3265 + }, + { + "epoch": 0.5290406083158065, + "grad_norm": 0.9184787098267886, + "learning_rate": 5.366810628875226e-06, + "loss": 0.8253, + "step": 3270 + }, + { + "epoch": 0.5298495389095615, + "grad_norm": 0.8924101947820497, + "learning_rate": 5.352726448265808e-06, + "loss": 0.8386, + "step": 3275 + }, + { + "epoch": 0.5306584695033166, + "grad_norm": 0.7686705583327001, + "learning_rate": 5.338639454359792e-06, + "loss": 0.8305, + "step": 3280 + }, + { + "epoch": 0.5314674000970717, + "grad_norm": 0.8264073173393284, + "learning_rate": 5.324549759513058e-06, + "loss": 0.8279, + "step": 3285 + }, + { + "epoch": 0.5322763306908267, + "grad_norm": 0.7898714553275273, + "learning_rate": 5.310457476103033e-06, + "loss": 0.832, + "step": 3290 + }, + { + "epoch": 0.5330852612845818, + "grad_norm": 0.915049917158178, + "learning_rate": 5.2963627165277884e-06, + "loss": 0.8423, + "step": 3295 + }, + { + "epoch": 0.5338941918783369, + "grad_norm": 0.8390857749633213, + "learning_rate": 5.28226559320515e-06, + "loss": 0.7937, + "step": 3300 + }, + { + "epoch": 0.5347031224720918, + "grad_norm": 0.8766888011605986, + "learning_rate": 5.268166218571792e-06, + "loss": 0.8409, + "step": 3305 + }, + { + "epoch": 0.5355120530658469, + "grad_norm": 0.9055264757342382, + "learning_rate": 5.254064705082345e-06, + "loss": 0.8374, + "step": 3310 + }, + { + "epoch": 0.536320983659602, + "grad_norm": 0.9082065836332707, + "learning_rate": 5.239961165208499e-06, + "loss": 0.8424, + "step": 3315 + }, + { + "epoch": 0.5371299142533571, + "grad_norm": 0.8630555734437255, + "learning_rate": 5.2258557114381085e-06, + "loss": 0.8492, + "step": 3320 + }, + { + "epoch": 0.5379388448471121, + "grad_norm": 0.8387927972292225, + "learning_rate": 5.211748456274291e-06, + "loss": 0.8162, + "step": 3325 + }, + { + "epoch": 0.5387477754408672, + "grad_norm": 0.8838319893113061, + "learning_rate": 5.197639512234532e-06, + "loss": 0.8302, + "step": 3330 + }, + { + "epoch": 0.5395567060346222, + "grad_norm": 0.8810506540467933, + "learning_rate": 5.183528991849784e-06, + "loss": 0.8246, + "step": 3335 + }, + { + "epoch": 0.5403656366283773, + "grad_norm": 0.9032568956242902, + "learning_rate": 5.16941700766358e-06, + "loss": 0.8302, + "step": 3340 + }, + { + "epoch": 0.5411745672221323, + "grad_norm": 0.9878346022756306, + "learning_rate": 5.155303672231123e-06, + "loss": 0.8429, + "step": 3345 + }, + { + "epoch": 0.5419834978158874, + "grad_norm": 0.7990363456989189, + "learning_rate": 5.141189098118392e-06, + "loss": 0.8295, + "step": 3350 + }, + { + "epoch": 0.5427924284096425, + "grad_norm": 0.849668666846281, + "learning_rate": 5.127073397901248e-06, + "loss": 0.8447, + "step": 3355 + }, + { + "epoch": 0.5436013590033975, + "grad_norm": 0.8870097311869496, + "learning_rate": 5.112956684164532e-06, + "loss": 0.8291, + "step": 3360 + }, + { + "epoch": 0.5444102895971525, + "grad_norm": 0.8786319797950302, + "learning_rate": 5.09883906950117e-06, + "loss": 0.8325, + "step": 3365 + }, + { + "epoch": 0.5452192201909076, + "grad_norm": 0.8559879223353866, + "learning_rate": 5.084720666511276e-06, + "loss": 0.8343, + "step": 3370 + }, + { + "epoch": 0.5460281507846627, + "grad_norm": 0.8393771435379772, + "learning_rate": 5.070601587801246e-06, + "loss": 0.8264, + "step": 3375 + }, + { + "epoch": 0.5468370813784177, + "grad_norm": 0.8756680886902389, + "learning_rate": 5.056481945982871e-06, + "loss": 0.8363, + "step": 3380 + }, + { + "epoch": 0.5476460119721728, + "grad_norm": 0.8643664631148287, + "learning_rate": 5.042361853672429e-06, + "loss": 0.8508, + "step": 3385 + }, + { + "epoch": 0.5484549425659279, + "grad_norm": 0.8396247416018382, + "learning_rate": 5.0282414234897905e-06, + "loss": 0.8313, + "step": 3390 + }, + { + "epoch": 0.549263873159683, + "grad_norm": 0.8799962735756717, + "learning_rate": 5.014120768057526e-06, + "loss": 0.8394, + "step": 3395 + }, + { + "epoch": 0.5500728037534379, + "grad_norm": 0.8585807475699035, + "learning_rate": 5e-06, + "loss": 0.8259, + "step": 3400 + }, + { + "epoch": 0.550881734347193, + "grad_norm": 0.8967809635243733, + "learning_rate": 4.985879231942475e-06, + "loss": 0.8232, + "step": 3405 + }, + { + "epoch": 0.5516906649409481, + "grad_norm": 0.8853595174049823, + "learning_rate": 4.97175857651021e-06, + "loss": 0.8196, + "step": 3410 + }, + { + "epoch": 0.5524995955347031, + "grad_norm": 0.839604935690722, + "learning_rate": 4.957638146327575e-06, + "loss": 0.8361, + "step": 3415 + }, + { + "epoch": 0.5533085261284582, + "grad_norm": 0.8391467792592279, + "learning_rate": 4.943518054017131e-06, + "loss": 0.8272, + "step": 3420 + }, + { + "epoch": 0.5541174567222132, + "grad_norm": 0.8844616497057926, + "learning_rate": 4.929398412198756e-06, + "loss": 0.8233, + "step": 3425 + }, + { + "epoch": 0.5549263873159683, + "grad_norm": 0.8052750334117075, + "learning_rate": 4.915279333488726e-06, + "loss": 0.8479, + "step": 3430 + }, + { + "epoch": 0.5557353179097233, + "grad_norm": 0.8647851317448605, + "learning_rate": 4.90116093049883e-06, + "loss": 0.819, + "step": 3435 + }, + { + "epoch": 0.5565442485034784, + "grad_norm": 0.8453777869125878, + "learning_rate": 4.887043315835469e-06, + "loss": 0.8536, + "step": 3440 + }, + { + "epoch": 0.5573531790972335, + "grad_norm": 0.8705052285115737, + "learning_rate": 4.872926602098756e-06, + "loss": 0.8277, + "step": 3445 + }, + { + "epoch": 0.5581621096909886, + "grad_norm": 0.861071586334625, + "learning_rate": 4.85881090188161e-06, + "loss": 0.8081, + "step": 3450 + }, + { + "epoch": 0.5589710402847435, + "grad_norm": 0.8558895543087007, + "learning_rate": 4.844696327768878e-06, + "loss": 0.8086, + "step": 3455 + }, + { + "epoch": 0.5597799708784986, + "grad_norm": 0.9566303499060257, + "learning_rate": 4.83058299233642e-06, + "loss": 0.8388, + "step": 3460 + }, + { + "epoch": 0.5605889014722537, + "grad_norm": 0.8480580281776552, + "learning_rate": 4.8164710081502165e-06, + "loss": 0.8464, + "step": 3465 + }, + { + "epoch": 0.5613978320660087, + "grad_norm": 0.8577968798548672, + "learning_rate": 4.802360487765471e-06, + "loss": 0.8331, + "step": 3470 + }, + { + "epoch": 0.5622067626597638, + "grad_norm": 0.8683642459249713, + "learning_rate": 4.788251543725711e-06, + "loss": 0.8379, + "step": 3475 + }, + { + "epoch": 0.5630156932535189, + "grad_norm": 0.8893404896273497, + "learning_rate": 4.774144288561893e-06, + "loss": 0.8138, + "step": 3480 + }, + { + "epoch": 0.5638246238472739, + "grad_norm": 0.8886059419047985, + "learning_rate": 4.7600388347915025e-06, + "loss": 0.8306, + "step": 3485 + }, + { + "epoch": 0.5646335544410289, + "grad_norm": 0.8630514971290992, + "learning_rate": 4.745935294917658e-06, + "loss": 0.8348, + "step": 3490 + }, + { + "epoch": 0.565442485034784, + "grad_norm": 0.8662086181368763, + "learning_rate": 4.731833781428208e-06, + "loss": 0.8272, + "step": 3495 + }, + { + "epoch": 0.5662514156285391, + "grad_norm": 0.8982952976447754, + "learning_rate": 4.7177344067948526e-06, + "loss": 0.8525, + "step": 3500 + }, + { + "epoch": 0.5670603462222942, + "grad_norm": 0.8422016831549223, + "learning_rate": 4.703637283472213e-06, + "loss": 0.8501, + "step": 3505 + }, + { + "epoch": 0.5678692768160492, + "grad_norm": 0.9548881231159024, + "learning_rate": 4.689542523896969e-06, + "loss": 0.8328, + "step": 3510 + }, + { + "epoch": 0.5686782074098042, + "grad_norm": 0.9166786357525355, + "learning_rate": 4.6754502404869434e-06, + "loss": 0.8205, + "step": 3515 + }, + { + "epoch": 0.5694871380035593, + "grad_norm": 0.9061529794904933, + "learning_rate": 4.661360545640209e-06, + "loss": 0.8352, + "step": 3520 + }, + { + "epoch": 0.5702960685973143, + "grad_norm": 0.8412734635963403, + "learning_rate": 4.647273551734193e-06, + "loss": 0.8348, + "step": 3525 + }, + { + "epoch": 0.5711049991910694, + "grad_norm": 0.8664917924780835, + "learning_rate": 4.633189371124778e-06, + "loss": 0.8274, + "step": 3530 + }, + { + "epoch": 0.5719139297848245, + "grad_norm": 0.8421186404143896, + "learning_rate": 4.619108116145411e-06, + "loss": 0.8354, + "step": 3535 + }, + { + "epoch": 0.5727228603785796, + "grad_norm": 0.9394380176490387, + "learning_rate": 4.6050298991062045e-06, + "loss": 0.8269, + "step": 3540 + }, + { + "epoch": 0.5735317909723345, + "grad_norm": 0.8660091216092733, + "learning_rate": 4.590954832293039e-06, + "loss": 0.8431, + "step": 3545 + }, + { + "epoch": 0.5743407215660896, + "grad_norm": 0.8453775529626361, + "learning_rate": 4.57688302796667e-06, + "loss": 0.8189, + "step": 3550 + }, + { + "epoch": 0.5751496521598447, + "grad_norm": 0.9293223006762544, + "learning_rate": 4.562814598361834e-06, + "loss": 0.8272, + "step": 3555 + }, + { + "epoch": 0.5759585827535998, + "grad_norm": 0.8682075221715769, + "learning_rate": 4.548749655686346e-06, + "loss": 0.8345, + "step": 3560 + }, + { + "epoch": 0.5767675133473548, + "grad_norm": 0.8962654121926869, + "learning_rate": 4.534688312120216e-06, + "loss": 0.8328, + "step": 3565 + }, + { + "epoch": 0.5775764439411099, + "grad_norm": 0.8465460016413964, + "learning_rate": 4.520630679814742e-06, + "loss": 0.8203, + "step": 3570 + }, + { + "epoch": 0.5783853745348649, + "grad_norm": 0.8542030722056939, + "learning_rate": 4.506576870891628e-06, + "loss": 0.8356, + "step": 3575 + }, + { + "epoch": 0.5791943051286199, + "grad_norm": 0.8664072736373561, + "learning_rate": 4.49252699744208e-06, + "loss": 0.8282, + "step": 3580 + }, + { + "epoch": 0.580003235722375, + "grad_norm": 0.8908304692018135, + "learning_rate": 4.47848117152591e-06, + "loss": 0.8486, + "step": 3585 + }, + { + "epoch": 0.5808121663161301, + "grad_norm": 0.9604353292765517, + "learning_rate": 4.464439505170656e-06, + "loss": 0.828, + "step": 3590 + }, + { + "epoch": 0.5816210969098852, + "grad_norm": 0.8847638351175713, + "learning_rate": 4.450402110370677e-06, + "loss": 0.8282, + "step": 3595 + }, + { + "epoch": 0.5824300275036401, + "grad_norm": 0.8448125761174995, + "learning_rate": 4.43636909908626e-06, + "loss": 0.8381, + "step": 3600 + }, + { + "epoch": 0.5832389580973952, + "grad_norm": 0.899925581945171, + "learning_rate": 4.422340583242733e-06, + "loss": 0.8316, + "step": 3605 + }, + { + "epoch": 0.5840478886911503, + "grad_norm": 0.8270612926284294, + "learning_rate": 4.408316674729569e-06, + "loss": 0.8369, + "step": 3610 + }, + { + "epoch": 0.5848568192849054, + "grad_norm": 0.8514310722916417, + "learning_rate": 4.394297485399488e-06, + "loss": 0.8123, + "step": 3615 + }, + { + "epoch": 0.5856657498786604, + "grad_norm": 0.847607926161957, + "learning_rate": 4.3802831270675785e-06, + "loss": 0.8125, + "step": 3620 + }, + { + "epoch": 0.5864746804724155, + "grad_norm": 0.8743645770925027, + "learning_rate": 4.3662737115103925e-06, + "loss": 0.8175, + "step": 3625 + }, + { + "epoch": 0.5872836110661706, + "grad_norm": 0.8872414789266562, + "learning_rate": 4.35226935046506e-06, + "loss": 0.8128, + "step": 3630 + }, + { + "epoch": 0.5880925416599255, + "grad_norm": 0.8748952049996549, + "learning_rate": 4.338270155628401e-06, + "loss": 0.8323, + "step": 3635 + }, + { + "epoch": 0.5889014722536806, + "grad_norm": 0.9131605811984947, + "learning_rate": 4.324276238656023e-06, + "loss": 0.8266, + "step": 3640 + }, + { + "epoch": 0.5897104028474357, + "grad_norm": 0.9378108739015335, + "learning_rate": 4.31028771116144e-06, + "loss": 0.8394, + "step": 3645 + }, + { + "epoch": 0.5905193334411908, + "grad_norm": 0.9270250400148877, + "learning_rate": 4.2963046847151875e-06, + "loss": 0.8415, + "step": 3650 + }, + { + "epoch": 0.5913282640349458, + "grad_norm": 0.8427644597969671, + "learning_rate": 4.282327270843919e-06, + "loss": 0.8206, + "step": 3655 + }, + { + "epoch": 0.5921371946287008, + "grad_norm": 0.9019710217551041, + "learning_rate": 4.268355581029523e-06, + "loss": 0.8414, + "step": 3660 + }, + { + "epoch": 0.5929461252224559, + "grad_norm": 0.9361559273358337, + "learning_rate": 4.254389726708234e-06, + "loss": 0.8388, + "step": 3665 + }, + { + "epoch": 0.593755055816211, + "grad_norm": 0.8479002824514841, + "learning_rate": 4.240429819269746e-06, + "loss": 0.8151, + "step": 3670 + }, + { + "epoch": 0.594563986409966, + "grad_norm": 0.8837530926010035, + "learning_rate": 4.226475970056318e-06, + "loss": 0.8212, + "step": 3675 + }, + { + "epoch": 0.5953729170037211, + "grad_norm": 0.8660636262138016, + "learning_rate": 4.212528290361893e-06, + "loss": 0.8571, + "step": 3680 + }, + { + "epoch": 0.5961818475974762, + "grad_norm": 0.9246667975571174, + "learning_rate": 4.198586891431203e-06, + "loss": 0.8397, + "step": 3685 + }, + { + "epoch": 0.5969907781912311, + "grad_norm": 0.90386970265181, + "learning_rate": 4.1846518844588906e-06, + "loss": 0.8365, + "step": 3690 + }, + { + "epoch": 0.5977997087849862, + "grad_norm": 0.9258635632218947, + "learning_rate": 4.170723380588609e-06, + "loss": 0.8403, + "step": 3695 + }, + { + "epoch": 0.5986086393787413, + "grad_norm": 0.8741091788263792, + "learning_rate": 4.156801490912153e-06, + "loss": 0.822, + "step": 3700 + }, + { + "epoch": 0.5994175699724964, + "grad_norm": 0.8552618808724114, + "learning_rate": 4.142886326468556e-06, + "loss": 0.8414, + "step": 3705 + }, + { + "epoch": 0.6002265005662514, + "grad_norm": 0.8577590928960753, + "learning_rate": 4.128977998243215e-06, + "loss": 0.8234, + "step": 3710 + }, + { + "epoch": 0.6010354311600065, + "grad_norm": 0.9640946415628675, + "learning_rate": 4.115076617167004e-06, + "loss": 0.8344, + "step": 3715 + }, + { + "epoch": 0.6018443617537615, + "grad_norm": 0.8840143210557655, + "learning_rate": 4.101182294115388e-06, + "loss": 0.8211, + "step": 3720 + }, + { + "epoch": 0.6026532923475166, + "grad_norm": 0.8574994142653399, + "learning_rate": 4.087295139907528e-06, + "loss": 0.8166, + "step": 3725 + }, + { + "epoch": 0.6034622229412716, + "grad_norm": 0.8964092793127811, + "learning_rate": 4.073415265305422e-06, + "loss": 0.8171, + "step": 3730 + }, + { + "epoch": 0.6042711535350267, + "grad_norm": 0.963355011643523, + "learning_rate": 4.059542781012998e-06, + "loss": 0.8223, + "step": 3735 + }, + { + "epoch": 0.6050800841287818, + "grad_norm": 0.8678396011067857, + "learning_rate": 4.045677797675242e-06, + "loss": 0.8203, + "step": 3740 + }, + { + "epoch": 0.6058890147225368, + "grad_norm": 0.9121282453763787, + "learning_rate": 4.031820425877313e-06, + "loss": 0.8219, + "step": 3745 + }, + { + "epoch": 0.6066979453162918, + "grad_norm": 0.9216469820283191, + "learning_rate": 4.017970776143662e-06, + "loss": 0.8352, + "step": 3750 + }, + { + "epoch": 0.6075068759100469, + "grad_norm": 0.908885616786373, + "learning_rate": 4.004128958937149e-06, + "loss": 0.8241, + "step": 3755 + }, + { + "epoch": 0.608315806503802, + "grad_norm": 0.8942143952561401, + "learning_rate": 3.990295084658166e-06, + "loss": 0.8361, + "step": 3760 + }, + { + "epoch": 0.609124737097557, + "grad_norm": 0.8730532396217757, + "learning_rate": 3.976469263643748e-06, + "loss": 0.8384, + "step": 3765 + }, + { + "epoch": 0.6099336676913121, + "grad_norm": 0.9177096179325701, + "learning_rate": 3.962651606166703e-06, + "loss": 0.8184, + "step": 3770 + }, + { + "epoch": 0.6107425982850672, + "grad_norm": 0.9852181301325248, + "learning_rate": 3.948842222434728e-06, + "loss": 0.817, + "step": 3775 + }, + { + "epoch": 0.6115515288788222, + "grad_norm": 0.8956044101647403, + "learning_rate": 3.935041222589524e-06, + "loss": 0.818, + "step": 3780 + }, + { + "epoch": 0.6123604594725772, + "grad_norm": 0.9733581516182005, + "learning_rate": 3.9212487167059265e-06, + "loss": 0.8223, + "step": 3785 + }, + { + "epoch": 0.6131693900663323, + "grad_norm": 0.8650513303090718, + "learning_rate": 3.907464814791029e-06, + "loss": 0.8033, + "step": 3790 + }, + { + "epoch": 0.6139783206600874, + "grad_norm": 0.99672664240277, + "learning_rate": 3.893689626783294e-06, + "loss": 0.836, + "step": 3795 + }, + { + "epoch": 0.6147872512538424, + "grad_norm": 0.908345822113972, + "learning_rate": 3.8799232625516884e-06, + "loss": 0.8478, + "step": 3800 + }, + { + "epoch": 0.6155961818475975, + "grad_norm": 0.8836902180965445, + "learning_rate": 3.866165831894796e-06, + "loss": 0.8235, + "step": 3805 + }, + { + "epoch": 0.6164051124413525, + "grad_norm": 0.9264471672860939, + "learning_rate": 3.852417444539953e-06, + "loss": 0.8247, + "step": 3810 + }, + { + "epoch": 0.6172140430351076, + "grad_norm": 0.894173445093799, + "learning_rate": 3.8386782101423665e-06, + "loss": 0.8235, + "step": 3815 + }, + { + "epoch": 0.6180229736288626, + "grad_norm": 0.9438338724077253, + "learning_rate": 3.824948238284238e-06, + "loss": 0.8339, + "step": 3820 + }, + { + "epoch": 0.6188319042226177, + "grad_norm": 0.9012049433189601, + "learning_rate": 3.8112276384738973e-06, + "loss": 0.8217, + "step": 3825 + }, + { + "epoch": 0.6196408348163728, + "grad_norm": 0.9631072787593842, + "learning_rate": 3.797516520144919e-06, + "loss": 0.821, + "step": 3830 + }, + { + "epoch": 0.6204497654101279, + "grad_norm": 0.8562741768165036, + "learning_rate": 3.7838149926552565e-06, + "loss": 0.8254, + "step": 3835 + }, + { + "epoch": 0.6212586960038828, + "grad_norm": 0.9007576913836781, + "learning_rate": 3.770123165286369e-06, + "loss": 0.8187, + "step": 3840 + }, + { + "epoch": 0.6220676265976379, + "grad_norm": 1.0271531293622427, + "learning_rate": 3.7564411472423467e-06, + "loss": 0.8358, + "step": 3845 + }, + { + "epoch": 0.622876557191393, + "grad_norm": 0.8914948323397264, + "learning_rate": 3.7427690476490462e-06, + "loss": 0.8275, + "step": 3850 + }, + { + "epoch": 0.623685487785148, + "grad_norm": 0.9328213480538451, + "learning_rate": 3.7291069755532146e-06, + "loss": 0.8167, + "step": 3855 + }, + { + "epoch": 0.6244944183789031, + "grad_norm": 0.8892937385561739, + "learning_rate": 3.7154550399216137e-06, + "loss": 0.8347, + "step": 3860 + }, + { + "epoch": 0.6253033489726582, + "grad_norm": 0.8798858697832553, + "learning_rate": 3.7018133496401688e-06, + "loss": 0.8383, + "step": 3865 + }, + { + "epoch": 0.6261122795664132, + "grad_norm": 0.98439076252917, + "learning_rate": 3.688182013513085e-06, + "loss": 0.8274, + "step": 3870 + }, + { + "epoch": 0.6269212101601682, + "grad_norm": 0.8370789210688374, + "learning_rate": 3.674561140261983e-06, + "loss": 0.8178, + "step": 3875 + }, + { + "epoch": 0.6277301407539233, + "grad_norm": 0.9153894187914345, + "learning_rate": 3.660950838525036e-06, + "loss": 0.8258, + "step": 3880 + }, + { + "epoch": 0.6285390713476784, + "grad_norm": 0.8922726078616626, + "learning_rate": 3.647351216856099e-06, + "loss": 0.8581, + "step": 3885 + }, + { + "epoch": 0.6293480019414335, + "grad_norm": 0.9058364873912355, + "learning_rate": 3.633762383723841e-06, + "loss": 0.827, + "step": 3890 + }, + { + "epoch": 0.6301569325351885, + "grad_norm": 0.8703352651889255, + "learning_rate": 3.6201844475108884e-06, + "loss": 0.8491, + "step": 3895 + }, + { + "epoch": 0.6309658631289435, + "grad_norm": 0.892101387058938, + "learning_rate": 3.606617516512953e-06, + "loss": 0.8175, + "step": 3900 + }, + { + "epoch": 0.6317747937226986, + "grad_norm": 0.9365570155133393, + "learning_rate": 3.5930616989379697e-06, + "loss": 0.8388, + "step": 3905 + }, + { + "epoch": 0.6325837243164536, + "grad_norm": 0.8773819774765554, + "learning_rate": 3.5795171029052383e-06, + "loss": 0.8294, + "step": 3910 + }, + { + "epoch": 0.6333926549102087, + "grad_norm": 0.8714533378375239, + "learning_rate": 3.5659838364445505e-06, + "loss": 0.8346, + "step": 3915 + }, + { + "epoch": 0.6342015855039638, + "grad_norm": 0.9825642646199596, + "learning_rate": 3.552462007495338e-06, + "loss": 0.8376, + "step": 3920 + }, + { + "epoch": 0.6350105160977189, + "grad_norm": 0.899943511678241, + "learning_rate": 3.5389517239058126e-06, + "loss": 0.8251, + "step": 3925 + }, + { + "epoch": 0.6358194466914738, + "grad_norm": 0.9817959591645147, + "learning_rate": 3.5254530934320956e-06, + "loss": 0.8332, + "step": 3930 + }, + { + "epoch": 0.6366283772852289, + "grad_norm": 0.939309664119045, + "learning_rate": 3.5119662237373684e-06, + "loss": 0.8172, + "step": 3935 + }, + { + "epoch": 0.637437307878984, + "grad_norm": 0.9545145278463266, + "learning_rate": 3.4984912223910105e-06, + "loss": 0.8137, + "step": 3940 + }, + { + "epoch": 0.6382462384727391, + "grad_norm": 0.9284835788915782, + "learning_rate": 3.485028196867738e-06, + "loss": 0.825, + "step": 3945 + }, + { + "epoch": 0.6390551690664941, + "grad_norm": 1.0025398605998348, + "learning_rate": 3.4715772545467507e-06, + "loss": 0.8359, + "step": 3950 + }, + { + "epoch": 0.6398640996602492, + "grad_norm": 0.9250686960694907, + "learning_rate": 3.458138502710876e-06, + "loss": 0.8445, + "step": 3955 + }, + { + "epoch": 0.6406730302540042, + "grad_norm": 0.885581537464598, + "learning_rate": 3.444712048545711e-06, + "loss": 0.8241, + "step": 3960 + }, + { + "epoch": 0.6414819608477592, + "grad_norm": 0.9162743922784746, + "learning_rate": 3.431297999138768e-06, + "loss": 0.8178, + "step": 3965 + }, + { + "epoch": 0.6422908914415143, + "grad_norm": 0.9997360512776706, + "learning_rate": 3.417896461478619e-06, + "loss": 0.849, + "step": 3970 + }, + { + "epoch": 0.6430998220352694, + "grad_norm": 0.9230165688594162, + "learning_rate": 3.4045075424540484e-06, + "loss": 0.8048, + "step": 3975 + }, + { + "epoch": 0.6439087526290245, + "grad_norm": 0.8899064142475545, + "learning_rate": 3.3911313488531907e-06, + "loss": 0.8286, + "step": 3980 + }, + { + "epoch": 0.6447176832227794, + "grad_norm": 0.8669101247237674, + "learning_rate": 3.37776798736269e-06, + "loss": 0.8119, + "step": 3985 + }, + { + "epoch": 0.6455266138165345, + "grad_norm": 0.9167759750434173, + "learning_rate": 3.364417564566839e-06, + "loss": 0.8472, + "step": 3990 + }, + { + "epoch": 0.6463355444102896, + "grad_norm": 0.9521570771011725, + "learning_rate": 3.351080186946736e-06, + "loss": 0.8251, + "step": 3995 + }, + { + "epoch": 0.6471444750040447, + "grad_norm": 0.8382337519442962, + "learning_rate": 3.3377559608794273e-06, + "loss": 0.8453, + "step": 4000 + }, + { + "epoch": 0.6479534055977997, + "grad_norm": 0.9439756459989987, + "learning_rate": 3.3244449926370713e-06, + "loss": 0.8359, + "step": 4005 + }, + { + "epoch": 0.6487623361915548, + "grad_norm": 0.9306298379750102, + "learning_rate": 3.3111473883860813e-06, + "loss": 0.8196, + "step": 4010 + }, + { + "epoch": 0.6495712667853099, + "grad_norm": 0.8706086854228088, + "learning_rate": 3.2978632541862788e-06, + "loss": 0.8252, + "step": 4015 + }, + { + "epoch": 0.6503801973790648, + "grad_norm": 0.918543174205597, + "learning_rate": 3.2845926959900555e-06, + "loss": 0.8194, + "step": 4020 + }, + { + "epoch": 0.6511891279728199, + "grad_norm": 0.9355133643042608, + "learning_rate": 3.2713358196415147e-06, + "loss": 0.8279, + "step": 4025 + }, + { + "epoch": 0.651998058566575, + "grad_norm": 0.8875680347636398, + "learning_rate": 3.2580927308756426e-06, + "loss": 0.8336, + "step": 4030 + }, + { + "epoch": 0.6528069891603301, + "grad_norm": 0.974049905246242, + "learning_rate": 3.2448635353174553e-06, + "loss": 0.8334, + "step": 4035 + }, + { + "epoch": 0.6536159197540851, + "grad_norm": 0.8679703990844894, + "learning_rate": 3.231648338481157e-06, + "loss": 0.8355, + "step": 4040 + }, + { + "epoch": 0.6544248503478401, + "grad_norm": 0.9299757764874559, + "learning_rate": 3.2184472457693005e-06, + "loss": 0.8401, + "step": 4045 + }, + { + "epoch": 0.6552337809415952, + "grad_norm": 0.8615524033230236, + "learning_rate": 3.2052603624719516e-06, + "loss": 0.8217, + "step": 4050 + }, + { + "epoch": 0.6560427115353503, + "grad_norm": 0.9060239627418345, + "learning_rate": 3.1920877937658325e-06, + "loss": 0.8173, + "step": 4055 + }, + { + "epoch": 0.6568516421291053, + "grad_norm": 0.9398327715146758, + "learning_rate": 3.178929644713504e-06, + "loss": 0.8412, + "step": 4060 + }, + { + "epoch": 0.6576605727228604, + "grad_norm": 0.8393737863171705, + "learning_rate": 3.1657860202625145e-06, + "loss": 0.8368, + "step": 4065 + }, + { + "epoch": 0.6584695033166155, + "grad_norm": 0.8802074487722802, + "learning_rate": 3.1526570252445665e-06, + "loss": 0.8203, + "step": 4070 + }, + { + "epoch": 0.6592784339103704, + "grad_norm": 0.9133104861467479, + "learning_rate": 3.1395427643746802e-06, + "loss": 0.8168, + "step": 4075 + }, + { + "epoch": 0.6600873645041255, + "grad_norm": 0.9090731300453533, + "learning_rate": 3.1264433422503564e-06, + "loss": 0.8466, + "step": 4080 + }, + { + "epoch": 0.6608962950978806, + "grad_norm": 0.9229902190782812, + "learning_rate": 3.113358863350747e-06, + "loss": 0.8334, + "step": 4085 + }, + { + "epoch": 0.6617052256916357, + "grad_norm": 0.8999096831432444, + "learning_rate": 3.100289432035818e-06, + "loss": 0.8145, + "step": 4090 + }, + { + "epoch": 0.6625141562853907, + "grad_norm": 0.9226844151049607, + "learning_rate": 3.087235152545517e-06, + "loss": 0.8245, + "step": 4095 + }, + { + "epoch": 0.6633230868791458, + "grad_norm": 0.9923339907314755, + "learning_rate": 3.074196128998943e-06, + "loss": 0.8067, + "step": 4100 + }, + { + "epoch": 0.6641320174729008, + "grad_norm": 0.8793470002054651, + "learning_rate": 3.0611724653935184e-06, + "loss": 0.8154, + "step": 4105 + }, + { + "epoch": 0.6649409480666559, + "grad_norm": 0.9952922559372246, + "learning_rate": 3.048164265604152e-06, + "loss": 0.8339, + "step": 4110 + }, + { + "epoch": 0.6657498786604109, + "grad_norm": 0.8934191696631508, + "learning_rate": 3.035171633382419e-06, + "loss": 0.8126, + "step": 4115 + }, + { + "epoch": 0.666558809254166, + "grad_norm": 0.9074584444838903, + "learning_rate": 3.0221946723557274e-06, + "loss": 0.814, + "step": 4120 + }, + { + "epoch": 0.6673677398479211, + "grad_norm": 0.8823615491806162, + "learning_rate": 3.009233486026497e-06, + "loss": 0.8231, + "step": 4125 + }, + { + "epoch": 0.6681766704416761, + "grad_norm": 0.8985312723090128, + "learning_rate": 2.9962881777713326e-06, + "loss": 0.8279, + "step": 4130 + }, + { + "epoch": 0.6689856010354311, + "grad_norm": 0.8759610512792171, + "learning_rate": 2.983358850840187e-06, + "loss": 0.8503, + "step": 4135 + }, + { + "epoch": 0.6697945316291862, + "grad_norm": 0.875080183012667, + "learning_rate": 2.97044560835556e-06, + "loss": 0.8361, + "step": 4140 + }, + { + "epoch": 0.6706034622229413, + "grad_norm": 0.9343434705098017, + "learning_rate": 2.9575485533116597e-06, + "loss": 0.8422, + "step": 4145 + }, + { + "epoch": 0.6714123928166963, + "grad_norm": 0.9509754443485635, + "learning_rate": 2.944667788573584e-06, + "loss": 0.824, + "step": 4150 + }, + { + "epoch": 0.6722213234104514, + "grad_norm": 0.8941436073858621, + "learning_rate": 2.9318034168765048e-06, + "loss": 0.8329, + "step": 4155 + }, + { + "epoch": 0.6730302540042065, + "grad_norm": 0.9216572906600096, + "learning_rate": 2.9189555408248436e-06, + "loss": 0.838, + "step": 4160 + }, + { + "epoch": 0.6738391845979615, + "grad_norm": 0.9049144748685041, + "learning_rate": 2.906124262891451e-06, + "loss": 0.8392, + "step": 4165 + }, + { + "epoch": 0.6746481151917165, + "grad_norm": 0.9088265471558502, + "learning_rate": 2.893309685416802e-06, + "loss": 0.8219, + "step": 4170 + }, + { + "epoch": 0.6754570457854716, + "grad_norm": 0.958093818584568, + "learning_rate": 2.880511910608164e-06, + "loss": 0.8361, + "step": 4175 + }, + { + "epoch": 0.6762659763792267, + "grad_norm": 0.9461864165794798, + "learning_rate": 2.8677310405387926e-06, + "loss": 0.8145, + "step": 4180 + }, + { + "epoch": 0.6770749069729817, + "grad_norm": 0.8862588950970776, + "learning_rate": 2.854967177147113e-06, + "loss": 0.8101, + "step": 4185 + }, + { + "epoch": 0.6778838375667368, + "grad_norm": 0.95284924784784, + "learning_rate": 2.8422204222359027e-06, + "loss": 0.8355, + "step": 4190 + }, + { + "epoch": 0.6786927681604918, + "grad_norm": 0.9473709868364139, + "learning_rate": 2.829490877471491e-06, + "loss": 0.8213, + "step": 4195 + }, + { + "epoch": 0.6795016987542469, + "grad_norm": 0.8870378301848979, + "learning_rate": 2.816778644382938e-06, + "loss": 0.8143, + "step": 4200 + }, + { + "epoch": 0.6803106293480019, + "grad_norm": 0.9018325718849391, + "learning_rate": 2.804083824361229e-06, + "loss": 0.8265, + "step": 4205 + }, + { + "epoch": 0.681119559941757, + "grad_norm": 0.8727955391410293, + "learning_rate": 2.7914065186584637e-06, + "loss": 0.8254, + "step": 4210 + }, + { + "epoch": 0.6819284905355121, + "grad_norm": 1.0739665165244483, + "learning_rate": 2.778746828387058e-06, + "loss": 0.82, + "step": 4215 + }, + { + "epoch": 0.6827374211292672, + "grad_norm": 0.9350732250295917, + "learning_rate": 2.766104854518915e-06, + "loss": 0.8391, + "step": 4220 + }, + { + "epoch": 0.6835463517230221, + "grad_norm": 1.0087459108347565, + "learning_rate": 2.753480697884647e-06, + "loss": 0.8191, + "step": 4225 + }, + { + "epoch": 0.6843552823167772, + "grad_norm": 0.9139563110633456, + "learning_rate": 2.740874459172752e-06, + "loss": 0.8148, + "step": 4230 + }, + { + "epoch": 0.6851642129105323, + "grad_norm": 0.8530060707424758, + "learning_rate": 2.728286238928821e-06, + "loss": 0.8053, + "step": 4235 + }, + { + "epoch": 0.6859731435042873, + "grad_norm": 0.8777686426009732, + "learning_rate": 2.7157161375547304e-06, + "loss": 0.8114, + "step": 4240 + }, + { + "epoch": 0.6867820740980424, + "grad_norm": 0.897912961167507, + "learning_rate": 2.7031642553078374e-06, + "loss": 0.8418, + "step": 4245 + }, + { + "epoch": 0.6875910046917975, + "grad_norm": 0.9279693202641751, + "learning_rate": 2.690630692300199e-06, + "loss": 0.8404, + "step": 4250 + }, + { + "epoch": 0.6883999352855525, + "grad_norm": 0.9248453334693622, + "learning_rate": 2.6781155484977495e-06, + "loss": 0.8494, + "step": 4255 + }, + { + "epoch": 0.6892088658793075, + "grad_norm": 0.9396606484969379, + "learning_rate": 2.6656189237195186e-06, + "loss": 0.8226, + "step": 4260 + }, + { + "epoch": 0.6900177964730626, + "grad_norm": 0.8634815816211927, + "learning_rate": 2.6531409176368296e-06, + "loss": 0.8265, + "step": 4265 + }, + { + "epoch": 0.6908267270668177, + "grad_norm": 0.9135686768769312, + "learning_rate": 2.6406816297725086e-06, + "loss": 0.8248, + "step": 4270 + }, + { + "epoch": 0.6916356576605728, + "grad_norm": 0.92902484349033, + "learning_rate": 2.628241159500081e-06, + "loss": 0.8173, + "step": 4275 + }, + { + "epoch": 0.6924445882543278, + "grad_norm": 0.8687123389790895, + "learning_rate": 2.6158196060429926e-06, + "loss": 0.8398, + "step": 4280 + }, + { + "epoch": 0.6932535188480828, + "grad_norm": 0.9217730078223995, + "learning_rate": 2.6034170684738065e-06, + "loss": 0.8309, + "step": 4285 + }, + { + "epoch": 0.6940624494418379, + "grad_norm": 0.9769331954136037, + "learning_rate": 2.591033645713424e-06, + "loss": 0.8223, + "step": 4290 + }, + { + "epoch": 0.6948713800355929, + "grad_norm": 0.908044715178041, + "learning_rate": 2.5786694365302855e-06, + "loss": 0.8281, + "step": 4295 + }, + { + "epoch": 0.695680310629348, + "grad_norm": 0.9111114311128146, + "learning_rate": 2.566324539539583e-06, + "loss": 0.8266, + "step": 4300 + }, + { + "epoch": 0.6964892412231031, + "grad_norm": 0.9303322655983225, + "learning_rate": 2.5539990532024827e-06, + "loss": 0.8111, + "step": 4305 + }, + { + "epoch": 0.6972981718168582, + "grad_norm": 0.8418692144813529, + "learning_rate": 2.5416930758253317e-06, + "loss": 0.8256, + "step": 4310 + }, + { + "epoch": 0.6981071024106131, + "grad_norm": 0.9992022955905648, + "learning_rate": 2.5294067055588765e-06, + "loss": 0.8032, + "step": 4315 + }, + { + "epoch": 0.6989160330043682, + "grad_norm": 0.8813775901696349, + "learning_rate": 2.517140040397482e-06, + "loss": 0.8168, + "step": 4320 + }, + { + "epoch": 0.6997249635981233, + "grad_norm": 0.9034982645101618, + "learning_rate": 2.5048931781783457e-06, + "loss": 0.8363, + "step": 4325 + }, + { + "epoch": 0.7005338941918783, + "grad_norm": 0.9291076912208419, + "learning_rate": 2.492666216580719e-06, + "loss": 0.8309, + "step": 4330 + }, + { + "epoch": 0.7013428247856334, + "grad_norm": 0.9136441839215993, + "learning_rate": 2.480459253125132e-06, + "loss": 0.8446, + "step": 4335 + }, + { + "epoch": 0.7021517553793885, + "grad_norm": 0.8945013641006528, + "learning_rate": 2.468272385172609e-06, + "loss": 0.8061, + "step": 4340 + }, + { + "epoch": 0.7029606859731435, + "grad_norm": 0.9843334357090001, + "learning_rate": 2.4561057099238973e-06, + "loss": 0.8245, + "step": 4345 + }, + { + "epoch": 0.7037696165668985, + "grad_norm": 0.9061201723841746, + "learning_rate": 2.4439593244186914e-06, + "loss": 0.8156, + "step": 4350 + }, + { + "epoch": 0.7045785471606536, + "grad_norm": 0.9595180789878346, + "learning_rate": 2.4318333255348524e-06, + "loss": 0.8362, + "step": 4355 + }, + { + "epoch": 0.7053874777544087, + "grad_norm": 0.8905904032697742, + "learning_rate": 2.4197278099876458e-06, + "loss": 0.8013, + "step": 4360 + }, + { + "epoch": 0.7061964083481638, + "grad_norm": 0.9061698973868614, + "learning_rate": 2.407642874328961e-06, + "loss": 0.805, + "step": 4365 + }, + { + "epoch": 0.7070053389419187, + "grad_norm": 0.9192998017239877, + "learning_rate": 2.3955786149465505e-06, + "loss": 0.8445, + "step": 4370 + }, + { + "epoch": 0.7078142695356738, + "grad_norm": 1.0236876517448235, + "learning_rate": 2.3835351280632514e-06, + "loss": 0.8484, + "step": 4375 + }, + { + "epoch": 0.7086232001294289, + "grad_norm": 0.9092749965225744, + "learning_rate": 2.3715125097362246e-06, + "loss": 0.8134, + "step": 4380 + }, + { + "epoch": 0.7094321307231839, + "grad_norm": 0.8913146454343571, + "learning_rate": 2.3595108558561814e-06, + "loss": 0.8231, + "step": 4385 + }, + { + "epoch": 0.710241061316939, + "grad_norm": 0.9163485544067049, + "learning_rate": 2.347530262146629e-06, + "loss": 0.8227, + "step": 4390 + }, + { + "epoch": 0.7110499919106941, + "grad_norm": 0.9692923072785117, + "learning_rate": 2.3355708241630998e-06, + "loss": 0.8114, + "step": 4395 + }, + { + "epoch": 0.7118589225044492, + "grad_norm": 0.9952307777966306, + "learning_rate": 2.3236326372923913e-06, + "loss": 0.8265, + "step": 4400 + }, + { + "epoch": 0.7126678530982041, + "grad_norm": 0.8908142322793587, + "learning_rate": 2.3117157967518052e-06, + "loss": 0.8306, + "step": 4405 + }, + { + "epoch": 0.7134767836919592, + "grad_norm": 0.9192836496709301, + "learning_rate": 2.299820397588387e-06, + "loss": 0.8104, + "step": 4410 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 0.90827967789888, + "learning_rate": 2.2879465346781703e-06, + "loss": 0.8106, + "step": 4415 + }, + { + "epoch": 0.7150946448794694, + "grad_norm": 0.9019495653500154, + "learning_rate": 2.2760943027254168e-06, + "loss": 0.8162, + "step": 4420 + }, + { + "epoch": 0.7159035754732244, + "grad_norm": 0.9225860245555908, + "learning_rate": 2.264263796261864e-06, + "loss": 0.8385, + "step": 4425 + }, + { + "epoch": 0.7167125060669794, + "grad_norm": 0.9905684037464217, + "learning_rate": 2.2524551096459703e-06, + "loss": 0.836, + "step": 4430 + }, + { + "epoch": 0.7175214366607345, + "grad_norm": 0.9642215008382582, + "learning_rate": 2.240668337062162e-06, + "loss": 0.8382, + "step": 4435 + }, + { + "epoch": 0.7183303672544895, + "grad_norm": 1.02786094199622, + "learning_rate": 2.228903572520076e-06, + "loss": 0.8106, + "step": 4440 + }, + { + "epoch": 0.7191392978482446, + "grad_norm": 0.885661361219246, + "learning_rate": 2.2171609098538275e-06, + "loss": 0.8363, + "step": 4445 + }, + { + "epoch": 0.7199482284419997, + "grad_norm": 0.9432094404967053, + "learning_rate": 2.2054404427212427e-06, + "loss": 0.8295, + "step": 4450 + }, + { + "epoch": 0.7207571590357548, + "grad_norm": 0.8867439139171059, + "learning_rate": 2.1937422646031216e-06, + "loss": 0.826, + "step": 4455 + }, + { + "epoch": 0.7215660896295097, + "grad_norm": 0.8799592291488351, + "learning_rate": 2.1820664688024885e-06, + "loss": 0.808, + "step": 4460 + }, + { + "epoch": 0.7223750202232648, + "grad_norm": 0.9187111254961942, + "learning_rate": 2.1704131484438523e-06, + "loss": 0.8284, + "step": 4465 + }, + { + "epoch": 0.7231839508170199, + "grad_norm": 0.8585062008980405, + "learning_rate": 2.1587823964724564e-06, + "loss": 0.8333, + "step": 4470 + }, + { + "epoch": 0.723992881410775, + "grad_norm": 0.8979409403030056, + "learning_rate": 2.1471743056535455e-06, + "loss": 0.8401, + "step": 4475 + }, + { + "epoch": 0.72480181200453, + "grad_norm": 0.9181507434036461, + "learning_rate": 2.1355889685716225e-06, + "loss": 0.8209, + "step": 4480 + }, + { + "epoch": 0.7256107425982851, + "grad_norm": 0.942590732475379, + "learning_rate": 2.124026477629706e-06, + "loss": 0.8066, + "step": 4485 + }, + { + "epoch": 0.7264196731920401, + "grad_norm": 0.9429750289429594, + "learning_rate": 2.1124869250486053e-06, + "loss": 0.8224, + "step": 4490 + }, + { + "epoch": 0.7272286037857951, + "grad_norm": 0.9542611725247429, + "learning_rate": 2.1009704028661643e-06, + "loss": 0.8384, + "step": 4495 + }, + { + "epoch": 0.7280375343795502, + "grad_norm": 0.9040683126789256, + "learning_rate": 2.08947700293655e-06, + "loss": 0.8418, + "step": 4500 + }, + { + "epoch": 0.7288464649733053, + "grad_norm": 0.9481483200578114, + "learning_rate": 2.078006816929503e-06, + "loss": 0.8144, + "step": 4505 + }, + { + "epoch": 0.7296553955670604, + "grad_norm": 0.8940640126195414, + "learning_rate": 2.066559936329618e-06, + "loss": 0.8059, + "step": 4510 + }, + { + "epoch": 0.7304643261608154, + "grad_norm": 0.9261592631490639, + "learning_rate": 2.0551364524356054e-06, + "loss": 0.8261, + "step": 4515 + }, + { + "epoch": 0.7312732567545704, + "grad_norm": 0.896600436452953, + "learning_rate": 2.0437364563595686e-06, + "loss": 0.8437, + "step": 4520 + }, + { + "epoch": 0.7320821873483255, + "grad_norm": 0.8919258636639074, + "learning_rate": 2.0323600390262743e-06, + "loss": 0.8226, + "step": 4525 + }, + { + "epoch": 0.7328911179420806, + "grad_norm": 0.9826507277779656, + "learning_rate": 2.0210072911724293e-06, + "loss": 0.8215, + "step": 4530 + }, + { + "epoch": 0.7337000485358356, + "grad_norm": 0.9182603432302395, + "learning_rate": 2.0096783033459564e-06, + "loss": 0.804, + "step": 4535 + }, + { + "epoch": 0.7345089791295907, + "grad_norm": 0.904398523841377, + "learning_rate": 1.99837316590527e-06, + "loss": 0.8385, + "step": 4540 + }, + { + "epoch": 0.7353179097233458, + "grad_norm": 0.8989331803375635, + "learning_rate": 1.987091969018561e-06, + "loss": 0.8225, + "step": 4545 + }, + { + "epoch": 0.7361268403171007, + "grad_norm": 0.8975780407900149, + "learning_rate": 1.9758348026630663e-06, + "loss": 0.8388, + "step": 4550 + }, + { + "epoch": 0.7369357709108558, + "grad_norm": 0.9121042226321563, + "learning_rate": 1.964601756624366e-06, + "loss": 0.8215, + "step": 4555 + }, + { + "epoch": 0.7377447015046109, + "grad_norm": 0.9214704840321436, + "learning_rate": 1.9533929204956553e-06, + "loss": 0.8142, + "step": 4560 + }, + { + "epoch": 0.738553632098366, + "grad_norm": 0.9189755165680765, + "learning_rate": 1.9422083836770405e-06, + "loss": 0.8307, + "step": 4565 + }, + { + "epoch": 0.739362562692121, + "grad_norm": 0.8679073250223568, + "learning_rate": 1.9310482353748146e-06, + "loss": 0.8118, + "step": 4570 + }, + { + "epoch": 0.7401714932858761, + "grad_norm": 0.9016222202190433, + "learning_rate": 1.9199125646007533e-06, + "loss": 0.8043, + "step": 4575 + }, + { + "epoch": 0.7409804238796311, + "grad_norm": 0.9105339155753437, + "learning_rate": 1.9088014601713993e-06, + "loss": 0.8376, + "step": 4580 + }, + { + "epoch": 0.7417893544733862, + "grad_norm": 1.005707657930975, + "learning_rate": 1.8977150107073632e-06, + "loss": 0.8219, + "step": 4585 + }, + { + "epoch": 0.7425982850671412, + "grad_norm": 0.8230187430989078, + "learning_rate": 1.8866533046326086e-06, + "loss": 0.839, + "step": 4590 + }, + { + "epoch": 0.7434072156608963, + "grad_norm": 0.9054157125777134, + "learning_rate": 1.8756164301737478e-06, + "loss": 0.8215, + "step": 4595 + }, + { + "epoch": 0.7442161462546514, + "grad_norm": 0.9386133499303263, + "learning_rate": 1.8646044753593429e-06, + "loss": 0.8139, + "step": 4600 + }, + { + "epoch": 0.7450250768484064, + "grad_norm": 0.8974275889331189, + "learning_rate": 1.8536175280191971e-06, + "loss": 0.8269, + "step": 4605 + }, + { + "epoch": 0.7458340074421614, + "grad_norm": 0.9291872406905795, + "learning_rate": 1.8426556757836594e-06, + "loss": 0.8104, + "step": 4610 + }, + { + "epoch": 0.7466429380359165, + "grad_norm": 0.921360125713896, + "learning_rate": 1.8317190060829242e-06, + "loss": 0.8078, + "step": 4615 + }, + { + "epoch": 0.7474518686296716, + "grad_norm": 0.9154624659332359, + "learning_rate": 1.820807606146332e-06, + "loss": 0.8228, + "step": 4620 + }, + { + "epoch": 0.7482607992234266, + "grad_norm": 0.9910039258360209, + "learning_rate": 1.8099215630016759e-06, + "loss": 0.8162, + "step": 4625 + }, + { + "epoch": 0.7490697298171817, + "grad_norm": 0.9527574985037581, + "learning_rate": 1.7990609634745087e-06, + "loss": 0.8206, + "step": 4630 + }, + { + "epoch": 0.7498786604109368, + "grad_norm": 0.8617520277776372, + "learning_rate": 1.788225894187443e-06, + "loss": 0.82, + "step": 4635 + }, + { + "epoch": 0.7506875910046918, + "grad_norm": 1.0275065944999713, + "learning_rate": 1.7774164415594692e-06, + "loss": 0.8474, + "step": 4640 + }, + { + "epoch": 0.7514965215984468, + "grad_norm": 1.0046608971251627, + "learning_rate": 1.7666326918052667e-06, + "loss": 0.8144, + "step": 4645 + }, + { + "epoch": 0.7523054521922019, + "grad_norm": 0.9234744279179873, + "learning_rate": 1.7558747309345075e-06, + "loss": 0.8258, + "step": 4650 + }, + { + "epoch": 0.753114382785957, + "grad_norm": 0.987941691115384, + "learning_rate": 1.7451426447511771e-06, + "loss": 0.8161, + "step": 4655 + }, + { + "epoch": 0.753923313379712, + "grad_norm": 1.0005046357018823, + "learning_rate": 1.7344365188528844e-06, + "loss": 0.817, + "step": 4660 + }, + { + "epoch": 0.754732243973467, + "grad_norm": 0.9561730998580897, + "learning_rate": 1.7237564386301869e-06, + "loss": 0.8088, + "step": 4665 + }, + { + "epoch": 0.7555411745672221, + "grad_norm": 0.9447102620547789, + "learning_rate": 1.7131024892659048e-06, + "loss": 0.8118, + "step": 4670 + }, + { + "epoch": 0.7563501051609772, + "grad_norm": 0.9539633697057311, + "learning_rate": 1.7024747557344411e-06, + "loss": 0.844, + "step": 4675 + }, + { + "epoch": 0.7571590357547322, + "grad_norm": 1.0424885395489862, + "learning_rate": 1.6918733228011058e-06, + "loss": 0.8315, + "step": 4680 + }, + { + "epoch": 0.7579679663484873, + "grad_norm": 0.9458202947476442, + "learning_rate": 1.6812982750214385e-06, + "loss": 0.8304, + "step": 4685 + }, + { + "epoch": 0.7587768969422424, + "grad_norm": 0.918182742402279, + "learning_rate": 1.6707496967405347e-06, + "loss": 0.8115, + "step": 4690 + }, + { + "epoch": 0.7595858275359975, + "grad_norm": 0.9291030646393732, + "learning_rate": 1.6602276720923732e-06, + "loss": 0.8163, + "step": 4695 + }, + { + "epoch": 0.7603947581297524, + "grad_norm": 0.9588497352744614, + "learning_rate": 1.6497322849991443e-06, + "loss": 0.8266, + "step": 4700 + }, + { + "epoch": 0.7612036887235075, + "grad_norm": 0.9290169660288218, + "learning_rate": 1.6392636191705818e-06, + "loss": 0.8231, + "step": 4705 + }, + { + "epoch": 0.7620126193172626, + "grad_norm": 0.9469265731424403, + "learning_rate": 1.6288217581032945e-06, + "loss": 0.8283, + "step": 4710 + }, + { + "epoch": 0.7628215499110176, + "grad_norm": 0.9739835381552143, + "learning_rate": 1.618406785080095e-06, + "loss": 0.8358, + "step": 4715 + }, + { + "epoch": 0.7636304805047727, + "grad_norm": 0.9140268328463288, + "learning_rate": 1.60801878316935e-06, + "loss": 0.8173, + "step": 4720 + }, + { + "epoch": 0.7644394110985278, + "grad_norm": 0.9519079424417303, + "learning_rate": 1.5976578352243015e-06, + "loss": 0.8013, + "step": 4725 + }, + { + "epoch": 0.7652483416922828, + "grad_norm": 1.0216658413405784, + "learning_rate": 1.587324023882415e-06, + "loss": 0.827, + "step": 4730 + }, + { + "epoch": 0.7660572722860378, + "grad_norm": 0.981342150260489, + "learning_rate": 1.5770174315647185e-06, + "loss": 0.8172, + "step": 4735 + }, + { + "epoch": 0.7668662028797929, + "grad_norm": 0.9841540010061296, + "learning_rate": 1.566738140475146e-06, + "loss": 0.8298, + "step": 4740 + }, + { + "epoch": 0.767675133473548, + "grad_norm": 0.9647924689522289, + "learning_rate": 1.5564862325998754e-06, + "loss": 0.8349, + "step": 4745 + }, + { + "epoch": 0.7684840640673031, + "grad_norm": 0.9071693118539406, + "learning_rate": 1.5462617897066863e-06, + "loss": 0.8266, + "step": 4750 + }, + { + "epoch": 0.769292994661058, + "grad_norm": 0.9672024055908947, + "learning_rate": 1.536064893344298e-06, + "loss": 0.8062, + "step": 4755 + }, + { + "epoch": 0.7701019252548131, + "grad_norm": 0.9479852003456001, + "learning_rate": 1.5258956248417228e-06, + "loss": 0.8171, + "step": 4760 + }, + { + "epoch": 0.7709108558485682, + "grad_norm": 1.0847747231670404, + "learning_rate": 1.515754065307622e-06, + "loss": 0.8343, + "step": 4765 + }, + { + "epoch": 0.7717197864423232, + "grad_norm": 0.8930428867327512, + "learning_rate": 1.505640295629645e-06, + "loss": 0.8166, + "step": 4770 + }, + { + "epoch": 0.7725287170360783, + "grad_norm": 0.9689746816489254, + "learning_rate": 1.4955543964738e-06, + "loss": 0.8357, + "step": 4775 + }, + { + "epoch": 0.7733376476298334, + "grad_norm": 0.9559994524411303, + "learning_rate": 1.4854964482838014e-06, + "loss": 0.8497, + "step": 4780 + }, + { + "epoch": 0.7741465782235885, + "grad_norm": 0.9337905415707888, + "learning_rate": 1.4754665312804311e-06, + "loss": 0.8312, + "step": 4785 + }, + { + "epoch": 0.7749555088173434, + "grad_norm": 0.9273467544337363, + "learning_rate": 1.4654647254608988e-06, + "loss": 0.8234, + "step": 4790 + }, + { + "epoch": 0.7757644394110985, + "grad_norm": 0.9792032991670162, + "learning_rate": 1.4554911105982022e-06, + "loss": 0.8468, + "step": 4795 + }, + { + "epoch": 0.7765733700048536, + "grad_norm": 0.9045240623424784, + "learning_rate": 1.445545766240492e-06, + "loss": 0.8315, + "step": 4800 + }, + { + "epoch": 0.7773823005986087, + "grad_norm": 0.9547782673298132, + "learning_rate": 1.4356287717104384e-06, + "loss": 0.8259, + "step": 4805 + }, + { + "epoch": 0.7781912311923637, + "grad_norm": 1.0002987188424177, + "learning_rate": 1.4257402061045966e-06, + "loss": 0.8242, + "step": 4810 + }, + { + "epoch": 0.7790001617861187, + "grad_norm": 0.9750841170635594, + "learning_rate": 1.4158801482927764e-06, + "loss": 0.8253, + "step": 4815 + }, + { + "epoch": 0.7798090923798738, + "grad_norm": 1.0204763580485683, + "learning_rate": 1.4060486769174158e-06, + "loss": 0.8124, + "step": 4820 + }, + { + "epoch": 0.7806180229736288, + "grad_norm": 0.9140777892397043, + "learning_rate": 1.396245870392946e-06, + "loss": 0.8209, + "step": 4825 + }, + { + "epoch": 0.7814269535673839, + "grad_norm": 0.9326381455666519, + "learning_rate": 1.3864718069051765e-06, + "loss": 0.8085, + "step": 4830 + }, + { + "epoch": 0.782235884161139, + "grad_norm": 0.8927418792043738, + "learning_rate": 1.376726564410663e-06, + "loss": 0.8281, + "step": 4835 + }, + { + "epoch": 0.7830448147548941, + "grad_norm": 0.9878798053606094, + "learning_rate": 1.3670102206360936e-06, + "loss": 0.8393, + "step": 4840 + }, + { + "epoch": 0.783853745348649, + "grad_norm": 0.9345336173306545, + "learning_rate": 1.3573228530776605e-06, + "loss": 0.8044, + "step": 4845 + }, + { + "epoch": 0.7846626759424041, + "grad_norm": 0.9721628031873526, + "learning_rate": 1.3476645390004473e-06, + "loss": 0.8386, + "step": 4850 + }, + { + "epoch": 0.7854716065361592, + "grad_norm": 0.915209628149697, + "learning_rate": 1.3380353554378074e-06, + "loss": 0.8357, + "step": 4855 + }, + { + "epoch": 0.7862805371299143, + "grad_norm": 0.9820722069486887, + "learning_rate": 1.3284353791907584e-06, + "loss": 0.8333, + "step": 4860 + }, + { + "epoch": 0.7870894677236693, + "grad_norm": 0.9732881075592806, + "learning_rate": 1.3188646868273615e-06, + "loss": 0.8239, + "step": 4865 + }, + { + "epoch": 0.7878983983174244, + "grad_norm": 0.8961967498766817, + "learning_rate": 1.3093233546821143e-06, + "loss": 0.824, + "step": 4870 + }, + { + "epoch": 0.7887073289111795, + "grad_norm": 0.9052548621214841, + "learning_rate": 1.2998114588553429e-06, + "loss": 0.8267, + "step": 4875 + }, + { + "epoch": 0.7895162595049344, + "grad_norm": 1.0580362890932773, + "learning_rate": 1.2903290752125914e-06, + "loss": 0.8218, + "step": 4880 + }, + { + "epoch": 0.7903251900986895, + "grad_norm": 0.853800216222736, + "learning_rate": 1.28087627938402e-06, + "loss": 0.8166, + "step": 4885 + }, + { + "epoch": 0.7911341206924446, + "grad_norm": 0.9652580037959974, + "learning_rate": 1.2714531467637998e-06, + "loss": 0.8238, + "step": 4890 + }, + { + "epoch": 0.7919430512861997, + "grad_norm": 0.8842437766967849, + "learning_rate": 1.2620597525095135e-06, + "loss": 0.817, + "step": 4895 + }, + { + "epoch": 0.7927519818799547, + "grad_norm": 0.9258752585803977, + "learning_rate": 1.2526961715415542e-06, + "loss": 0.8279, + "step": 4900 + }, + { + "epoch": 0.7935609124737097, + "grad_norm": 0.8928563496158387, + "learning_rate": 1.2433624785425291e-06, + "loss": 0.8162, + "step": 4905 + }, + { + "epoch": 0.7943698430674648, + "grad_norm": 0.982000276972604, + "learning_rate": 1.2340587479566597e-06, + "loss": 0.8356, + "step": 4910 + }, + { + "epoch": 0.7951787736612199, + "grad_norm": 0.9282259377376256, + "learning_rate": 1.2247850539891947e-06, + "loss": 0.8253, + "step": 4915 + }, + { + "epoch": 0.7959877042549749, + "grad_norm": 0.9525465075992243, + "learning_rate": 1.2155414706058172e-06, + "loss": 0.8169, + "step": 4920 + }, + { + "epoch": 0.79679663484873, + "grad_norm": 0.9231085182420589, + "learning_rate": 1.206328071532048e-06, + "loss": 0.8138, + "step": 4925 + }, + { + "epoch": 0.7976055654424851, + "grad_norm": 0.9253319119591827, + "learning_rate": 1.197144930252665e-06, + "loss": 0.8225, + "step": 4930 + }, + { + "epoch": 0.79841449603624, + "grad_norm": 0.9002303313112969, + "learning_rate": 1.187992120011111e-06, + "loss": 0.8291, + "step": 4935 + }, + { + "epoch": 0.7992234266299951, + "grad_norm": 0.9376615616843144, + "learning_rate": 1.178869713808916e-06, + "loss": 0.8341, + "step": 4940 + }, + { + "epoch": 0.8000323572237502, + "grad_norm": 0.9698576047423542, + "learning_rate": 1.1697777844051105e-06, + "loss": 0.8147, + "step": 4945 + }, + { + "epoch": 0.8008412878175053, + "grad_norm": 0.9273363034177441, + "learning_rate": 1.1607164043156455e-06, + "loss": 0.8152, + "step": 4950 + }, + { + "epoch": 0.8016502184112603, + "grad_norm": 1.0253181360844876, + "learning_rate": 1.1516856458128167e-06, + "loss": 0.8084, + "step": 4955 + }, + { + "epoch": 0.8024591490050154, + "grad_norm": 0.9400581889744581, + "learning_rate": 1.1426855809246846e-06, + "loss": 0.8232, + "step": 4960 + }, + { + "epoch": 0.8032680795987704, + "grad_norm": 0.9017439278181727, + "learning_rate": 1.133716281434502e-06, + "loss": 0.8212, + "step": 4965 + }, + { + "epoch": 0.8040770101925255, + "grad_norm": 0.9129823712372367, + "learning_rate": 1.1247778188801428e-06, + "loss": 0.8214, + "step": 4970 + }, + { + "epoch": 0.8048859407862805, + "grad_norm": 0.9288433560645074, + "learning_rate": 1.1158702645535285e-06, + "loss": 0.8474, + "step": 4975 + }, + { + "epoch": 0.8056948713800356, + "grad_norm": 0.9211800178241987, + "learning_rate": 1.1069936895000605e-06, + "loss": 0.8087, + "step": 4980 + }, + { + "epoch": 0.8065038019737907, + "grad_norm": 1.0542799666763112, + "learning_rate": 1.0981481645180563e-06, + "loss": 0.842, + "step": 4985 + }, + { + "epoch": 0.8073127325675457, + "grad_norm": 0.9078222972795535, + "learning_rate": 1.0893337601581766e-06, + "loss": 0.8171, + "step": 4990 + }, + { + "epoch": 0.8081216631613007, + "grad_norm": 0.8965670623934641, + "learning_rate": 1.0805505467228762e-06, + "loss": 0.8106, + "step": 4995 + }, + { + "epoch": 0.8089305937550558, + "grad_norm": 0.9618251738798234, + "learning_rate": 1.0717985942658299e-06, + "loss": 0.8146, + "step": 5000 + }, + { + "epoch": 0.8097395243488109, + "grad_norm": 0.950502760029193, + "learning_rate": 1.063077972591382e-06, + "loss": 0.8215, + "step": 5005 + }, + { + "epoch": 0.8105484549425659, + "grad_norm": 0.9493168241113547, + "learning_rate": 1.0543887512539851e-06, + "loss": 0.8194, + "step": 5010 + }, + { + "epoch": 0.811357385536321, + "grad_norm": 0.8823063979936068, + "learning_rate": 1.0457309995576498e-06, + "loss": 0.8081, + "step": 5015 + }, + { + "epoch": 0.8121663161300761, + "grad_norm": 0.9596843704171958, + "learning_rate": 1.0371047865553847e-06, + "loss": 0.7973, + "step": 5020 + }, + { + "epoch": 0.8129752467238311, + "grad_norm": 0.9284122972047167, + "learning_rate": 1.0285101810486535e-06, + "loss": 0.8468, + "step": 5025 + }, + { + "epoch": 0.8137841773175861, + "grad_norm": 0.922908519391859, + "learning_rate": 1.0199472515868235e-06, + "loss": 0.8602, + "step": 5030 + }, + { + "epoch": 0.8145931079113412, + "grad_norm": 0.9150347591949571, + "learning_rate": 1.0114160664666156e-06, + "loss": 0.8285, + "step": 5035 + }, + { + "epoch": 0.8154020385050963, + "grad_norm": 0.961458868733065, + "learning_rate": 1.0029166937315682e-06, + "loss": 0.8261, + "step": 5040 + }, + { + "epoch": 0.8162109690988513, + "grad_norm": 0.9795182791817205, + "learning_rate": 9.94449201171479e-07, + "loss": 0.8077, + "step": 5045 + }, + { + "epoch": 0.8170198996926064, + "grad_norm": 0.9712437822816266, + "learning_rate": 9.86013656321882e-07, + "loss": 0.8136, + "step": 5050 + }, + { + "epoch": 0.8178288302863614, + "grad_norm": 0.9243414819653508, + "learning_rate": 9.77610126463497e-07, + "loss": 0.8116, + "step": 5055 + }, + { + "epoch": 0.8186377608801165, + "grad_norm": 0.8953653000547861, + "learning_rate": 9.69238678621698e-07, + "loss": 0.8052, + "step": 5060 + }, + { + "epoch": 0.8194466914738715, + "grad_norm": 0.9571083425955552, + "learning_rate": 9.608993795659766e-07, + "loss": 0.8361, + "step": 5065 + }, + { + "epoch": 0.8202556220676266, + "grad_norm": 0.8812425656788687, + "learning_rate": 9.525922958094113e-07, + "loss": 0.8091, + "step": 5070 + }, + { + "epoch": 0.8210645526613817, + "grad_norm": 0.9769325986983122, + "learning_rate": 9.443174936081345e-07, + "loss": 0.8384, + "step": 5075 + }, + { + "epoch": 0.8218734832551368, + "grad_norm": 0.9429765500848418, + "learning_rate": 9.360750389608069e-07, + "loss": 0.823, + "step": 5080 + }, + { + "epoch": 0.8226824138488917, + "grad_norm": 0.921228115104811, + "learning_rate": 9.278649976080889e-07, + "loss": 0.8331, + "step": 5085 + }, + { + "epoch": 0.8234913444426468, + "grad_norm": 0.9819735566785222, + "learning_rate": 9.196874350321161e-07, + "loss": 0.829, + "step": 5090 + }, + { + "epoch": 0.8243002750364019, + "grad_norm": 0.8804316419490247, + "learning_rate": 9.11542416455981e-07, + "loss": 0.8253, + "step": 5095 + }, + { + "epoch": 0.8251092056301569, + "grad_norm": 0.9309408261176141, + "learning_rate": 9.034300068432045e-07, + "loss": 0.8294, + "step": 5100 + }, + { + "epoch": 0.825918136223912, + "grad_norm": 0.8366500179121683, + "learning_rate": 8.953502708972279e-07, + "loss": 0.8309, + "step": 5105 + }, + { + "epoch": 0.8267270668176671, + "grad_norm": 0.9984582201604612, + "learning_rate": 8.873032730608883e-07, + "loss": 0.8288, + "step": 5110 + }, + { + "epoch": 0.8275359974114221, + "grad_norm": 1.0257992816612576, + "learning_rate": 8.792890775159124e-07, + "loss": 0.832, + "step": 5115 + }, + { + "epoch": 0.8283449280051771, + "grad_norm": 0.9357127400993555, + "learning_rate": 8.713077481823978e-07, + "loss": 0.8377, + "step": 5120 + }, + { + "epoch": 0.8291538585989322, + "grad_norm": 1.042951179385166, + "learning_rate": 8.633593487183067e-07, + "loss": 0.8171, + "step": 5125 + }, + { + "epoch": 0.8299627891926873, + "grad_norm": 1.0012604552671969, + "learning_rate": 8.554439425189537e-07, + "loss": 0.8017, + "step": 5130 + }, + { + "epoch": 0.8307717197864424, + "grad_norm": 0.9393560978003819, + "learning_rate": 8.475615927165093e-07, + "loss": 0.8208, + "step": 5135 + }, + { + "epoch": 0.8315806503801974, + "grad_norm": 0.9829342370330283, + "learning_rate": 8.397123621794867e-07, + "loss": 0.8213, + "step": 5140 + }, + { + "epoch": 0.8323895809739524, + "grad_norm": 0.993397225397711, + "learning_rate": 8.318963135122471e-07, + "loss": 0.8386, + "step": 5145 + }, + { + "epoch": 0.8331985115677075, + "grad_norm": 0.9203262711299934, + "learning_rate": 8.241135090544966e-07, + "loss": 0.8135, + "step": 5150 + }, + { + "epoch": 0.8340074421614625, + "grad_norm": 0.9284177490644239, + "learning_rate": 8.163640108807897e-07, + "loss": 0.834, + "step": 5155 + }, + { + "epoch": 0.8348163727552176, + "grad_norm": 0.9517537835456206, + "learning_rate": 8.086478808000359e-07, + "loss": 0.7949, + "step": 5160 + }, + { + "epoch": 0.8356253033489727, + "grad_norm": 0.9372135921171859, + "learning_rate": 8.009651803550045e-07, + "loss": 0.8289, + "step": 5165 + }, + { + "epoch": 0.8364342339427278, + "grad_norm": 1.000548748366353, + "learning_rate": 7.933159708218347e-07, + "loss": 0.8268, + "step": 5170 + }, + { + "epoch": 0.8372431645364827, + "grad_norm": 0.9612535821852152, + "learning_rate": 7.857003132095481e-07, + "loss": 0.8182, + "step": 5175 + }, + { + "epoch": 0.8380520951302378, + "grad_norm": 0.9551451813093564, + "learning_rate": 7.7811826825956e-07, + "loss": 0.8399, + "step": 5180 + }, + { + "epoch": 0.8388610257239929, + "grad_norm": 0.9803361111190585, + "learning_rate": 7.705698964451941e-07, + "loss": 0.8345, + "step": 5185 + }, + { + "epoch": 0.839669956317748, + "grad_norm": 0.9615430440473038, + "learning_rate": 7.630552579712041e-07, + "loss": 0.8482, + "step": 5190 + }, + { + "epoch": 0.840478886911503, + "grad_norm": 0.8717103769599843, + "learning_rate": 7.555744127732922e-07, + "loss": 0.8133, + "step": 5195 + }, + { + "epoch": 0.841287817505258, + "grad_norm": 0.9559551946507219, + "learning_rate": 7.481274205176286e-07, + "loss": 0.8396, + "step": 5200 + }, + { + "epoch": 0.8420967480990131, + "grad_norm": 0.9386431006557889, + "learning_rate": 7.407143406003781e-07, + "loss": 0.8166, + "step": 5205 + }, + { + "epoch": 0.8429056786927681, + "grad_norm": 0.9453424188819014, + "learning_rate": 7.33335232147222e-07, + "loss": 0.8318, + "step": 5210 + }, + { + "epoch": 0.8437146092865232, + "grad_norm": 0.9335912363260552, + "learning_rate": 7.25990154012895e-07, + "loss": 0.8043, + "step": 5215 + }, + { + "epoch": 0.8445235398802783, + "grad_norm": 0.9242358250100629, + "learning_rate": 7.186791647807078e-07, + "loss": 0.8216, + "step": 5220 + }, + { + "epoch": 0.8453324704740334, + "grad_norm": 0.8917056191120556, + "learning_rate": 7.114023227620831e-07, + "loss": 0.8263, + "step": 5225 + }, + { + "epoch": 0.8461414010677883, + "grad_norm": 0.8756042557078504, + "learning_rate": 7.04159685996092e-07, + "loss": 0.8234, + "step": 5230 + }, + { + "epoch": 0.8469503316615434, + "grad_norm": 0.9669924510507611, + "learning_rate": 6.969513122489862e-07, + "loss": 0.8317, + "step": 5235 + }, + { + "epoch": 0.8477592622552985, + "grad_norm": 0.9690987783521561, + "learning_rate": 6.897772590137436e-07, + "loss": 0.8196, + "step": 5240 + }, + { + "epoch": 0.8485681928490536, + "grad_norm": 1.032895557175751, + "learning_rate": 6.826375835096038e-07, + "loss": 0.8017, + "step": 5245 + }, + { + "epoch": 0.8493771234428086, + "grad_norm": 1.0187566680457716, + "learning_rate": 6.755323426816169e-07, + "loss": 0.8354, + "step": 5250 + }, + { + "epoch": 0.8501860540365637, + "grad_norm": 0.8816437739999422, + "learning_rate": 6.684615932001847e-07, + "loss": 0.8581, + "step": 5255 + }, + { + "epoch": 0.8509949846303188, + "grad_norm": 0.9088385041537731, + "learning_rate": 6.614253914606134e-07, + "loss": 0.8036, + "step": 5260 + }, + { + "epoch": 0.8518039152240737, + "grad_norm": 0.9703072936941138, + "learning_rate": 6.544237935826558e-07, + "loss": 0.8277, + "step": 5265 + }, + { + "epoch": 0.8526128458178288, + "grad_norm": 0.9508642384505709, + "learning_rate": 6.474568554100768e-07, + "loss": 0.8243, + "step": 5270 + }, + { + "epoch": 0.8534217764115839, + "grad_norm": 0.9433685567993545, + "learning_rate": 6.405246325101955e-07, + "loss": 0.8056, + "step": 5275 + }, + { + "epoch": 0.854230707005339, + "grad_norm": 0.8978485349230313, + "learning_rate": 6.336271801734479e-07, + "loss": 0.8016, + "step": 5280 + }, + { + "epoch": 0.855039637599094, + "grad_norm": 0.9591854547149082, + "learning_rate": 6.267645534129446e-07, + "loss": 0.8184, + "step": 5285 + }, + { + "epoch": 0.855848568192849, + "grad_norm": 0.9368695693533301, + "learning_rate": 6.199368069640343e-07, + "loss": 0.8345, + "step": 5290 + }, + { + "epoch": 0.8566574987866041, + "grad_norm": 0.9363387262284574, + "learning_rate": 6.131439952838608e-07, + "loss": 0.8143, + "step": 5295 + }, + { + "epoch": 0.8574664293803592, + "grad_norm": 0.9449827101149645, + "learning_rate": 6.063861725509374e-07, + "loss": 0.801, + "step": 5300 + }, + { + "epoch": 0.8582753599741142, + "grad_norm": 0.9911552846071767, + "learning_rate": 5.996633926647083e-07, + "loss": 0.8248, + "step": 5305 + }, + { + "epoch": 0.8590842905678693, + "grad_norm": 0.9088842745097252, + "learning_rate": 5.92975709245121e-07, + "loss": 0.8047, + "step": 5310 + }, + { + "epoch": 0.8598932211616244, + "grad_norm": 0.8908028970825979, + "learning_rate": 5.863231756322019e-07, + "loss": 0.8242, + "step": 5315 + }, + { + "epoch": 0.8607021517553793, + "grad_norm": 0.9019135577959921, + "learning_rate": 5.797058448856213e-07, + "loss": 0.8182, + "step": 5320 + }, + { + "epoch": 0.8615110823491344, + "grad_norm": 0.9938100484918152, + "learning_rate": 5.731237697842811e-07, + "loss": 0.8381, + "step": 5325 + }, + { + "epoch": 0.8623200129428895, + "grad_norm": 0.9522671534271417, + "learning_rate": 5.665770028258876e-07, + "loss": 0.8103, + "step": 5330 + }, + { + "epoch": 0.8631289435366446, + "grad_norm": 1.0241489828374508, + "learning_rate": 5.600655962265345e-07, + "loss": 0.8253, + "step": 5335 + }, + { + "epoch": 0.8639378741303996, + "grad_norm": 0.8339409337033903, + "learning_rate": 5.535896019202853e-07, + "loss": 0.8091, + "step": 5340 + }, + { + "epoch": 0.8647468047241547, + "grad_norm": 0.953851217822305, + "learning_rate": 5.471490715587618e-07, + "loss": 0.8277, + "step": 5345 + }, + { + "epoch": 0.8655557353179097, + "grad_norm": 0.9253602828275306, + "learning_rate": 5.407440565107291e-07, + "loss": 0.8352, + "step": 5350 + }, + { + "epoch": 0.8663646659116648, + "grad_norm": 0.8979717645863395, + "learning_rate": 5.343746078616879e-07, + "loss": 0.8346, + "step": 5355 + }, + { + "epoch": 0.8671735965054198, + "grad_norm": 0.9052656857430195, + "learning_rate": 5.280407764134648e-07, + "loss": 0.8098, + "step": 5360 + }, + { + "epoch": 0.8679825270991749, + "grad_norm": 0.9265736364407307, + "learning_rate": 5.21742612683811e-07, + "loss": 0.8134, + "step": 5365 + }, + { + "epoch": 0.86879145769293, + "grad_norm": 0.9905238729956866, + "learning_rate": 5.154801669059961e-07, + "loss": 0.8057, + "step": 5370 + }, + { + "epoch": 0.869600388286685, + "grad_norm": 0.9346637770934627, + "learning_rate": 5.092534890284057e-07, + "loss": 0.8319, + "step": 5375 + }, + { + "epoch": 0.87040931888044, + "grad_norm": 0.9036166328056819, + "learning_rate": 5.030626287141488e-07, + "loss": 0.8242, + "step": 5380 + }, + { + "epoch": 0.8712182494741951, + "grad_norm": 0.957351416925992, + "learning_rate": 4.969076353406571e-07, + "loss": 0.81, + "step": 5385 + }, + { + "epoch": 0.8720271800679502, + "grad_norm": 0.9998030765172857, + "learning_rate": 4.907885579992943e-07, + "loss": 0.7931, + "step": 5390 + }, + { + "epoch": 0.8728361106617052, + "grad_norm": 0.9015423928862624, + "learning_rate": 4.847054454949617e-07, + "loss": 0.8321, + "step": 5395 + }, + { + "epoch": 0.8736450412554603, + "grad_norm": 0.8951302299296955, + "learning_rate": 4.786583463457106e-07, + "loss": 0.8175, + "step": 5400 + }, + { + "epoch": 0.8744539718492154, + "grad_norm": 0.987288936023476, + "learning_rate": 4.726473087823524e-07, + "loss": 0.8149, + "step": 5405 + }, + { + "epoch": 0.8752629024429704, + "grad_norm": 0.9607690476849372, + "learning_rate": 4.666723807480794e-07, + "loss": 0.8395, + "step": 5410 + }, + { + "epoch": 0.8760718330367254, + "grad_norm": 0.9412173085678744, + "learning_rate": 4.6073360989807805e-07, + "loss": 0.8131, + "step": 5415 + }, + { + "epoch": 0.8768807636304805, + "grad_norm": 0.9953808945790064, + "learning_rate": 4.5483104359915095e-07, + "loss": 0.8337, + "step": 5420 + }, + { + "epoch": 0.8776896942242356, + "grad_norm": 0.9224438419258796, + "learning_rate": 4.4896472892933693e-07, + "loss": 0.827, + "step": 5425 + }, + { + "epoch": 0.8784986248179906, + "grad_norm": 0.9067448515242419, + "learning_rate": 4.431347126775382e-07, + "loss": 0.8225, + "step": 5430 + }, + { + "epoch": 0.8793075554117457, + "grad_norm": 0.9160793517967267, + "learning_rate": 4.3734104134314505e-07, + "loss": 0.8093, + "step": 5435 + }, + { + "epoch": 0.8801164860055007, + "grad_norm": 0.8957867534440033, + "learning_rate": 4.3158376113566656e-07, + "loss": 0.8255, + "step": 5440 + }, + { + "epoch": 0.8809254165992558, + "grad_norm": 0.9414164683136745, + "learning_rate": 4.258629179743612e-07, + "loss": 0.8307, + "step": 5445 + }, + { + "epoch": 0.8817343471930108, + "grad_norm": 1.0412360371956955, + "learning_rate": 4.2017855748786975e-07, + "loss": 0.8167, + "step": 5450 + }, + { + "epoch": 0.8825432777867659, + "grad_norm": 0.9168749499249985, + "learning_rate": 4.1453072501385415e-07, + "loss": 0.807, + "step": 5455 + }, + { + "epoch": 0.883352208380521, + "grad_norm": 0.885047475917418, + "learning_rate": 4.089194655986306e-07, + "loss": 0.831, + "step": 5460 + }, + { + "epoch": 0.8841611389742761, + "grad_norm": 0.9248545364082547, + "learning_rate": 4.0334482399681684e-07, + "loss": 0.836, + "step": 5465 + }, + { + "epoch": 0.884970069568031, + "grad_norm": 0.9565631081942835, + "learning_rate": 3.978068446709721e-07, + "loss": 0.8206, + "step": 5470 + }, + { + "epoch": 0.8857790001617861, + "grad_norm": 0.9313667515936709, + "learning_rate": 3.923055717912411e-07, + "loss": 0.8315, + "step": 5475 + }, + { + "epoch": 0.8865879307555412, + "grad_norm": 0.9153735916152161, + "learning_rate": 3.868410492350044e-07, + "loss": 0.8315, + "step": 5480 + }, + { + "epoch": 0.8873968613492962, + "grad_norm": 0.8996467266796176, + "learning_rate": 3.8141332058652447e-07, + "loss": 0.8039, + "step": 5485 + }, + { + "epoch": 0.8882057919430513, + "grad_norm": 0.8739192973106119, + "learning_rate": 3.7602242913660325e-07, + "loss": 0.8196, + "step": 5490 + }, + { + "epoch": 0.8890147225368064, + "grad_norm": 1.0836849051236812, + "learning_rate": 3.7066841788223394e-07, + "loss": 0.8414, + "step": 5495 + }, + { + "epoch": 0.8898236531305614, + "grad_norm": 0.8886207081976889, + "learning_rate": 3.6535132952625784e-07, + "loss": 0.8041, + "step": 5500 + }, + { + "epoch": 0.8906325837243164, + "grad_norm": 0.8954848811354755, + "learning_rate": 3.6007120647702566e-07, + "loss": 0.836, + "step": 5505 + }, + { + "epoch": 0.8914415143180715, + "grad_norm": 0.9763361834402516, + "learning_rate": 3.548280908480556e-07, + "loss": 0.8193, + "step": 5510 + }, + { + "epoch": 0.8922504449118266, + "grad_norm": 0.9676090733901674, + "learning_rate": 3.496220244577025e-07, + "loss": 0.8247, + "step": 5515 + }, + { + "epoch": 0.8930593755055817, + "grad_norm": 0.9583454675377773, + "learning_rate": 3.4445304882882056e-07, + "loss": 0.8232, + "step": 5520 + }, + { + "epoch": 0.8938683060993367, + "grad_norm": 0.9149362551378993, + "learning_rate": 3.3932120518843315e-07, + "loss": 0.8378, + "step": 5525 + }, + { + "epoch": 0.8946772366930917, + "grad_norm": 0.9981131323135035, + "learning_rate": 3.342265344674034e-07, + "loss": 0.8364, + "step": 5530 + }, + { + "epoch": 0.8954861672868468, + "grad_norm": 0.9602463248675637, + "learning_rate": 3.291690773001116e-07, + "loss": 0.8148, + "step": 5535 + }, + { + "epoch": 0.8962950978806018, + "grad_norm": 0.9061694557646229, + "learning_rate": 3.241488740241222e-07, + "loss": 0.8135, + "step": 5540 + }, + { + "epoch": 0.8971040284743569, + "grad_norm": 0.9504624920131578, + "learning_rate": 3.1916596467987395e-07, + "loss": 0.8, + "step": 5545 + }, + { + "epoch": 0.897912959068112, + "grad_norm": 1.0083410313503633, + "learning_rate": 3.142203890103512e-07, + "loss": 0.8194, + "step": 5550 + }, + { + "epoch": 0.8987218896618671, + "grad_norm": 0.981599869718165, + "learning_rate": 3.093121864607707e-07, + "loss": 0.816, + "step": 5555 + }, + { + "epoch": 0.899530820255622, + "grad_norm": 0.927185199167883, + "learning_rate": 3.0444139617826605e-07, + "loss": 0.8133, + "step": 5560 + }, + { + "epoch": 0.9003397508493771, + "grad_norm": 1.0064121664416639, + "learning_rate": 2.996080570115778e-07, + "loss": 0.8262, + "step": 5565 + }, + { + "epoch": 0.9011486814431322, + "grad_norm": 0.9554574033151684, + "learning_rate": 2.9481220751073793e-07, + "loss": 0.8247, + "step": 5570 + }, + { + "epoch": 0.9019576120368872, + "grad_norm": 0.9588661920794765, + "learning_rate": 2.9005388592676987e-07, + "loss": 0.8273, + "step": 5575 + }, + { + "epoch": 0.9027665426306423, + "grad_norm": 0.885495966716363, + "learning_rate": 2.8533313021137765e-07, + "loss": 0.8326, + "step": 5580 + }, + { + "epoch": 0.9035754732243974, + "grad_norm": 0.9273868198730849, + "learning_rate": 2.806499780166455e-07, + "loss": 0.8073, + "step": 5585 + }, + { + "epoch": 0.9043844038181524, + "grad_norm": 1.0414377339942757, + "learning_rate": 2.760044666947387e-07, + "loss": 0.8324, + "step": 5590 + }, + { + "epoch": 0.9051933344119074, + "grad_norm": 0.9699497983834742, + "learning_rate": 2.7139663329760203e-07, + "loss": 0.8222, + "step": 5595 + }, + { + "epoch": 0.9060022650056625, + "grad_norm": 0.9269981190661996, + "learning_rate": 2.668265145766669e-07, + "loss": 0.8298, + "step": 5600 + }, + { + "epoch": 0.9068111955994176, + "grad_norm": 1.0012454133607382, + "learning_rate": 2.6229414698255907e-07, + "loss": 0.8293, + "step": 5605 + }, + { + "epoch": 0.9076201261931727, + "grad_norm": 0.9294225278070521, + "learning_rate": 2.577995666648053e-07, + "loss": 0.8228, + "step": 5610 + }, + { + "epoch": 0.9084290567869276, + "grad_norm": 0.940534897284778, + "learning_rate": 2.533428094715473e-07, + "loss": 0.8232, + "step": 5615 + }, + { + "epoch": 0.9092379873806827, + "grad_norm": 0.9310916767297108, + "learning_rate": 2.489239109492536e-07, + "loss": 0.8196, + "step": 5620 + }, + { + "epoch": 0.9100469179744378, + "grad_norm": 1.012544196651258, + "learning_rate": 2.4454290634243927e-07, + "loss": 0.8092, + "step": 5625 + }, + { + "epoch": 0.9108558485681928, + "grad_norm": 0.9941950459630348, + "learning_rate": 2.4019983059338005e-07, + "loss": 0.8219, + "step": 5630 + }, + { + "epoch": 0.9116647791619479, + "grad_norm": 1.0349621117968182, + "learning_rate": 2.3589471834183975e-07, + "loss": 0.7977, + "step": 5635 + }, + { + "epoch": 0.912473709755703, + "grad_norm": 0.9021187223816023, + "learning_rate": 2.3162760392478777e-07, + "loss": 0.8256, + "step": 5640 + }, + { + "epoch": 0.913282640349458, + "grad_norm": 0.9107025655941831, + "learning_rate": 2.2739852137612984e-07, + "loss": 0.8279, + "step": 5645 + }, + { + "epoch": 0.914091570943213, + "grad_norm": 0.9459797428393495, + "learning_rate": 2.2320750442643423e-07, + "loss": 0.8077, + "step": 5650 + }, + { + "epoch": 0.9149005015369681, + "grad_norm": 1.1288128324454283, + "learning_rate": 2.1905458650266276e-07, + "loss": 0.8407, + "step": 5655 + }, + { + "epoch": 0.9157094321307232, + "grad_norm": 0.9221316094387947, + "learning_rate": 2.149398007279052e-07, + "loss": 0.8209, + "step": 5660 + }, + { + "epoch": 0.9165183627244783, + "grad_norm": 0.9047592079692445, + "learning_rate": 2.108631799211158e-07, + "loss": 0.8037, + "step": 5665 + }, + { + "epoch": 0.9173272933182333, + "grad_norm": 0.9186500058678413, + "learning_rate": 2.0682475659684953e-07, + "loss": 0.836, + "step": 5670 + }, + { + "epoch": 0.9181362239119883, + "grad_norm": 0.8965926605186532, + "learning_rate": 2.0282456296500385e-07, + "loss": 0.8303, + "step": 5675 + }, + { + "epoch": 0.9189451545057434, + "grad_norm": 0.9119449898107598, + "learning_rate": 1.988626309305597e-07, + "loss": 0.838, + "step": 5680 + }, + { + "epoch": 0.9197540850994984, + "grad_norm": 0.9512693378379136, + "learning_rate": 1.9493899209333146e-07, + "loss": 0.8027, + "step": 5685 + }, + { + "epoch": 0.9205630156932535, + "grad_norm": 0.9690819188637002, + "learning_rate": 1.9105367774771122e-07, + "loss": 0.8125, + "step": 5690 + }, + { + "epoch": 0.9213719462870086, + "grad_norm": 0.9571795973045409, + "learning_rate": 1.8720671888242058e-07, + "loss": 0.8271, + "step": 5695 + }, + { + "epoch": 0.9221808768807637, + "grad_norm": 0.909548062163855, + "learning_rate": 1.8339814618026252e-07, + "loss": 0.833, + "step": 5700 + }, + { + "epoch": 0.9229898074745186, + "grad_norm": 0.9572976405315803, + "learning_rate": 1.7962799001787823e-07, + "loss": 0.8103, + "step": 5705 + }, + { + "epoch": 0.9237987380682737, + "grad_norm": 1.0064204020940664, + "learning_rate": 1.7589628046550343e-07, + "loss": 0.8369, + "step": 5710 + }, + { + "epoch": 0.9246076686620288, + "grad_norm": 0.9734981320914123, + "learning_rate": 1.7220304728672977e-07, + "loss": 0.8187, + "step": 5715 + }, + { + "epoch": 0.9254165992557839, + "grad_norm": 0.9779299096983186, + "learning_rate": 1.6854831993826591e-07, + "loss": 0.8212, + "step": 5720 + }, + { + "epoch": 0.9262255298495389, + "grad_norm": 0.8990160141351417, + "learning_rate": 1.6493212756970356e-07, + "loss": 0.8194, + "step": 5725 + }, + { + "epoch": 0.927034460443294, + "grad_norm": 0.9133770658973546, + "learning_rate": 1.6135449902328627e-07, + "loss": 0.8377, + "step": 5730 + }, + { + "epoch": 0.927843391037049, + "grad_norm": 0.8935752099105897, + "learning_rate": 1.578154628336753e-07, + "loss": 0.8247, + "step": 5735 + }, + { + "epoch": 0.928652321630804, + "grad_norm": 0.9599763188366742, + "learning_rate": 1.5431504722772605e-07, + "loss": 0.8259, + "step": 5740 + }, + { + "epoch": 0.9294612522245591, + "grad_norm": 0.9543468652422167, + "learning_rate": 1.5085328012426293e-07, + "loss": 0.8239, + "step": 5745 + }, + { + "epoch": 0.9302701828183142, + "grad_norm": 0.9093126800081541, + "learning_rate": 1.4743018913385308e-07, + "loss": 0.8313, + "step": 5750 + }, + { + "epoch": 0.9310791134120693, + "grad_norm": 1.002356581515829, + "learning_rate": 1.4404580155859106e-07, + "loss": 0.8288, + "step": 5755 + }, + { + "epoch": 0.9318880440058243, + "grad_norm": 0.900910009096251, + "learning_rate": 1.407001443918743e-07, + "loss": 0.824, + "step": 5760 + }, + { + "epoch": 0.9326969745995793, + "grad_norm": 0.9316638128288214, + "learning_rate": 1.373932443181958e-07, + "loss": 0.8376, + "step": 5765 + }, + { + "epoch": 0.9335059051933344, + "grad_norm": 0.8859091785962655, + "learning_rate": 1.3412512771292574e-07, + "loss": 0.8262, + "step": 5770 + }, + { + "epoch": 0.9343148357870895, + "grad_norm": 0.9517796063815803, + "learning_rate": 1.3089582064210293e-07, + "loss": 0.8332, + "step": 5775 + }, + { + "epoch": 0.9351237663808445, + "grad_norm": 0.956425135920899, + "learning_rate": 1.2770534886222709e-07, + "loss": 0.8441, + "step": 5780 + }, + { + "epoch": 0.9359326969745996, + "grad_norm": 0.9436260486622394, + "learning_rate": 1.2455373782005343e-07, + "loss": 0.8246, + "step": 5785 + }, + { + "epoch": 0.9367416275683547, + "grad_norm": 0.8907054197780977, + "learning_rate": 1.2144101265238795e-07, + "loss": 0.8226, + "step": 5790 + }, + { + "epoch": 0.9375505581621096, + "grad_norm": 0.9599764091397304, + "learning_rate": 1.1836719818588971e-07, + "loss": 0.8197, + "step": 5795 + }, + { + "epoch": 0.9383594887558647, + "grad_norm": 0.89616146112279, + "learning_rate": 1.1533231893687158e-07, + "loss": 0.8292, + "step": 5800 + }, + { + "epoch": 0.9391684193496198, + "grad_norm": 0.9254286869197753, + "learning_rate": 1.1233639911110317e-07, + "loss": 0.8106, + "step": 5805 + }, + { + "epoch": 0.9399773499433749, + "grad_norm": 0.9336133063869916, + "learning_rate": 1.0937946260362154e-07, + "loss": 0.8174, + "step": 5810 + }, + { + "epoch": 0.9407862805371299, + "grad_norm": 0.9193663783506368, + "learning_rate": 1.0646153299853523e-07, + "loss": 0.8219, + "step": 5815 + }, + { + "epoch": 0.941595211130885, + "grad_norm": 0.9922561636241934, + "learning_rate": 1.0358263356884223e-07, + "loss": 0.8192, + "step": 5820 + }, + { + "epoch": 0.94240414172464, + "grad_norm": 0.9269516184874731, + "learning_rate": 1.0074278727623955e-07, + "loss": 0.8342, + "step": 5825 + }, + { + "epoch": 0.9432130723183951, + "grad_norm": 0.9567075615786765, + "learning_rate": 9.794201677094162e-08, + "loss": 0.8456, + "step": 5830 + }, + { + "epoch": 0.9440220029121501, + "grad_norm": 0.900361140775962, + "learning_rate": 9.51803443915017e-08, + "loss": 0.8071, + "step": 5835 + }, + { + "epoch": 0.9448309335059052, + "grad_norm": 0.8928244797252599, + "learning_rate": 9.245779216463024e-08, + "loss": 0.8184, + "step": 5840 + }, + { + "epoch": 0.9456398640996603, + "grad_norm": 1.0034007579574744, + "learning_rate": 8.977438180502118e-08, + "loss": 0.8356, + "step": 5845 + }, + { + "epoch": 0.9464487946934153, + "grad_norm": 0.9678539855117049, + "learning_rate": 8.713013471517873e-08, + "loss": 0.8346, + "step": 5850 + }, + { + "epoch": 0.9472577252871703, + "grad_norm": 0.9529843175929286, + "learning_rate": 8.452507198524584e-08, + "loss": 0.8101, + "step": 5855 + }, + { + "epoch": 0.9480666558809254, + "grad_norm": 0.9262321483199513, + "learning_rate": 8.195921439283716e-08, + "loss": 0.8242, + "step": 5860 + }, + { + "epoch": 0.9488755864746805, + "grad_norm": 0.885777314821936, + "learning_rate": 7.943258240287355e-08, + "loss": 0.8201, + "step": 5865 + }, + { + "epoch": 0.9496845170684355, + "grad_norm": 0.9553802642628383, + "learning_rate": 7.694519616741503e-08, + "loss": 0.8331, + "step": 5870 + }, + { + "epoch": 0.9504934476621906, + "grad_norm": 0.9768522628826408, + "learning_rate": 7.449707552550533e-08, + "loss": 0.8147, + "step": 5875 + }, + { + "epoch": 0.9513023782559457, + "grad_norm": 0.9068927130823696, + "learning_rate": 7.208824000301151e-08, + "loss": 0.8302, + "step": 5880 + }, + { + "epoch": 0.9521113088497007, + "grad_norm": 0.9575224681944164, + "learning_rate": 6.971870881246678e-08, + "loss": 0.8145, + "step": 5885 + }, + { + "epoch": 0.9529202394434557, + "grad_norm": 0.8832000510088256, + "learning_rate": 6.738850085291904e-08, + "loss": 0.8177, + "step": 5890 + }, + { + "epoch": 0.9537291700372108, + "grad_norm": 0.9566260863289568, + "learning_rate": 6.509763470977925e-08, + "loss": 0.8321, + "step": 5895 + }, + { + "epoch": 0.9545381006309659, + "grad_norm": 0.9104128242370191, + "learning_rate": 6.284612865467499e-08, + "loss": 0.8271, + "step": 5900 + }, + { + "epoch": 0.9553470312247209, + "grad_norm": 0.9365563187928431, + "learning_rate": 6.063400064530155e-08, + "loss": 0.8251, + "step": 5905 + }, + { + "epoch": 0.956155961818476, + "grad_norm": 0.8777597042529512, + "learning_rate": 5.8461268325281096e-08, + "loss": 0.8021, + "step": 5910 + }, + { + "epoch": 0.956964892412231, + "grad_norm": 0.9899981204211554, + "learning_rate": 5.632794902402205e-08, + "loss": 0.8099, + "step": 5915 + }, + { + "epoch": 0.9577738230059861, + "grad_norm": 0.900474026342178, + "learning_rate": 5.423405975657936e-08, + "loss": 0.8367, + "step": 5920 + }, + { + "epoch": 0.9585827535997411, + "grad_norm": 1.0047500482028124, + "learning_rate": 5.217961722351894e-08, + "loss": 0.8343, + "step": 5925 + }, + { + "epoch": 0.9593916841934962, + "grad_norm": 0.9477198924309056, + "learning_rate": 5.016463781078618e-08, + "loss": 0.8317, + "step": 5930 + }, + { + "epoch": 0.9602006147872513, + "grad_norm": 0.8807307401976437, + "learning_rate": 4.818913758957378e-08, + "loss": 0.8239, + "step": 5935 + }, + { + "epoch": 0.9610095453810064, + "grad_norm": 0.9428322183465226, + "learning_rate": 4.6253132316194103e-08, + "loss": 0.8296, + "step": 5940 + }, + { + "epoch": 0.9618184759747613, + "grad_norm": 0.9762495130698056, + "learning_rate": 4.4356637431953734e-08, + "loss": 0.8035, + "step": 5945 + }, + { + "epoch": 0.9626274065685164, + "grad_norm": 0.9782271222260458, + "learning_rate": 4.2499668063029075e-08, + "loss": 0.8079, + "step": 5950 + }, + { + "epoch": 0.9634363371622715, + "grad_norm": 1.0490239258099892, + "learning_rate": 4.068223902034651e-08, + "loss": 0.827, + "step": 5955 + }, + { + "epoch": 0.9642452677560265, + "grad_norm": 0.9547918261422726, + "learning_rate": 3.89043647994658e-08, + "loss": 0.8309, + "step": 5960 + }, + { + "epoch": 0.9650541983497816, + "grad_norm": 0.9195603493515522, + "learning_rate": 3.716605958046071e-08, + "loss": 0.8324, + "step": 5965 + }, + { + "epoch": 0.9658631289435367, + "grad_norm": 0.9215008996007901, + "learning_rate": 3.546733722781026e-08, + "loss": 0.8202, + "step": 5970 + }, + { + "epoch": 0.9666720595372917, + "grad_norm": 0.9117492446000369, + "learning_rate": 3.3808211290284886e-08, + "loss": 0.8283, + "step": 5975 + }, + { + "epoch": 0.9674809901310467, + "grad_norm": 0.9238030724612377, + "learning_rate": 3.218869500084099e-08, + "loss": 0.8203, + "step": 5980 + }, + { + "epoch": 0.9682899207248018, + "grad_norm": 0.9683332126948087, + "learning_rate": 3.0608801276511556e-08, + "loss": 0.8063, + "step": 5985 + }, + { + "epoch": 0.9690988513185569, + "grad_norm": 0.9578244707163264, + "learning_rate": 2.9068542718307947e-08, + "loss": 0.8178, + "step": 5990 + }, + { + "epoch": 0.969907781912312, + "grad_norm": 1.0002466766621863, + "learning_rate": 2.7567931611116037e-08, + "loss": 0.8298, + "step": 5995 + }, + { + "epoch": 0.970716712506067, + "grad_norm": 0.9033323404105812, + "learning_rate": 2.6106979923599117e-08, + "loss": 0.8314, + "step": 6000 + }, + { + "epoch": 0.971525643099822, + "grad_norm": 0.9981727314544666, + "learning_rate": 2.4685699308102385e-08, + "loss": 0.8245, + "step": 6005 + }, + { + "epoch": 0.9723345736935771, + "grad_norm": 0.90324273915133, + "learning_rate": 2.330410110056025e-08, + "loss": 0.8128, + "step": 6010 + }, + { + "epoch": 0.9731435042873321, + "grad_norm": 0.9184000593665834, + "learning_rate": 2.1962196320406416e-08, + "loss": 0.8234, + "step": 6015 + }, + { + "epoch": 0.9739524348810872, + "grad_norm": 1.0208286644340638, + "learning_rate": 2.065999567048449e-08, + "loss": 0.8313, + "step": 6020 + }, + { + "epoch": 0.9747613654748423, + "grad_norm": 0.8848130385791, + "learning_rate": 1.9397509536964177e-08, + "loss": 0.8045, + "step": 6025 + }, + { + "epoch": 0.9755702960685974, + "grad_norm": 0.8868943198262542, + "learning_rate": 1.8174747989258002e-08, + "loss": 0.8181, + "step": 6030 + }, + { + "epoch": 0.9763792266623523, + "grad_norm": 0.9860697437983315, + "learning_rate": 1.6991720779939157e-08, + "loss": 0.8269, + "step": 6035 + }, + { + "epoch": 0.9771881572561074, + "grad_norm": 0.9169269167928082, + "learning_rate": 1.5848437344667124e-08, + "loss": 0.8155, + "step": 6040 + }, + { + "epoch": 0.9779970878498625, + "grad_norm": 0.8899129024501534, + "learning_rate": 1.4744906802110493e-08, + "loss": 0.814, + "step": 6045 + }, + { + "epoch": 0.9788060184436176, + "grad_norm": 0.9676626630215127, + "learning_rate": 1.3681137953872604e-08, + "loss": 0.8248, + "step": 6050 + }, + { + "epoch": 0.9796149490373726, + "grad_norm": 1.008805163098565, + "learning_rate": 1.2657139284425468e-08, + "loss": 0.8442, + "step": 6055 + }, + { + "epoch": 0.9804238796311276, + "grad_norm": 0.9275436235635999, + "learning_rate": 1.167291896103817e-08, + "loss": 0.8255, + "step": 6060 + }, + { + "epoch": 0.9812328102248827, + "grad_norm": 0.9859360057982008, + "learning_rate": 1.0728484833713581e-08, + "loss": 0.8231, + "step": 6065 + }, + { + "epoch": 0.9820417408186377, + "grad_norm": 0.9537378986780001, + "learning_rate": 9.823844435126184e-09, + "loss": 0.8204, + "step": 6070 + }, + { + "epoch": 0.9828506714123928, + "grad_norm": 0.9029585570220431, + "learning_rate": 8.959004980559905e-09, + "loss": 0.8351, + "step": 6075 + }, + { + "epoch": 0.9836596020061479, + "grad_norm": 0.9441776141647337, + "learning_rate": 8.133973367853708e-09, + "loss": 0.8401, + "step": 6080 + }, + { + "epoch": 0.984468532599903, + "grad_norm": 0.9139424255052578, + "learning_rate": 7.348756177343319e-09, + "loss": 0.8228, + "step": 6085 + }, + { + "epoch": 0.9852774631936579, + "grad_norm": 0.9582142736178327, + "learning_rate": 6.603359671810694e-09, + "loss": 0.8239, + "step": 6090 + }, + { + "epoch": 0.986086393787413, + "grad_norm": 0.9470852983866349, + "learning_rate": 5.8977897964335174e-09, + "loss": 0.8164, + "step": 6095 + }, + { + "epoch": 0.9868953243811681, + "grad_norm": 0.9016603805684872, + "learning_rate": 5.232052178738567e-09, + "loss": 0.8232, + "step": 6100 + }, + { + "epoch": 0.9877042549749232, + "grad_norm": 0.9549402446433066, + "learning_rate": 4.606152128555086e-09, + "loss": 0.8088, + "step": 6105 + }, + { + "epoch": 0.9885131855686782, + "grad_norm": 0.9529061460613366, + "learning_rate": 4.020094637973704e-09, + "loss": 0.8162, + "step": 6110 + }, + { + "epoch": 0.9893221161624333, + "grad_norm": 0.9500963750803435, + "learning_rate": 3.4738843813075795e-09, + "loss": 0.813, + "step": 6115 + }, + { + "epoch": 0.9901310467561883, + "grad_norm": 0.9329204750410419, + "learning_rate": 2.967525715052433e-09, + "loss": 0.839, + "step": 6120 + }, + { + "epoch": 0.9909399773499433, + "grad_norm": 0.9456290946732857, + "learning_rate": 2.5010226778537927e-09, + "loss": 0.8231, + "step": 6125 + }, + { + "epoch": 0.9917489079436984, + "grad_norm": 0.8612020082608306, + "learning_rate": 2.074378990474246e-09, + "loss": 0.8139, + "step": 6130 + }, + { + "epoch": 0.9925578385374535, + "grad_norm": 0.9267109066168538, + "learning_rate": 1.687598055764017e-09, + "loss": 0.8351, + "step": 6135 + }, + { + "epoch": 0.9933667691312086, + "grad_norm": 0.9928733972042116, + "learning_rate": 1.3406829586337656e-09, + "loss": 0.8033, + "step": 6140 + }, + { + "epoch": 0.9941756997249636, + "grad_norm": 0.882709413718118, + "learning_rate": 1.0336364660290532e-09, + "loss": 0.8098, + "step": 6145 + }, + { + "epoch": 0.9949846303187186, + "grad_norm": 0.9193633255003315, + "learning_rate": 7.664610269103589e-10, + "loss": 0.8285, + "step": 6150 + }, + { + "epoch": 0.9957935609124737, + "grad_norm": 0.9524125245282298, + "learning_rate": 5.391587722303194e-10, + "loss": 0.8087, + "step": 6155 + }, + { + "epoch": 0.9966024915062288, + "grad_norm": 0.9577558462345785, + "learning_rate": 3.5173151492096104e-10, + "loss": 0.8309, + "step": 6160 + }, + { + "epoch": 0.9974114220999838, + "grad_norm": 0.892739545774551, + "learning_rate": 2.0418074987538229e-10, + "loss": 0.8144, + "step": 6165 + }, + { + "epoch": 0.9982203526937389, + "grad_norm": 0.973028280918144, + "learning_rate": 9.650765393720563e-11, + "loss": 0.8147, + "step": 6170 + }, + { + "epoch": 0.999029283287494, + "grad_norm": 0.9043858725591138, + "learning_rate": 2.8713085892806415e-11, + "loss": 0.8312, + "step": 6175 + }, + { + "epoch": 0.9998382138812489, + "grad_norm": 0.9568219074455289, + "learning_rate": 7.975864613207762e-13, + "loss": 0.8409, + "step": 6180 + }, { "epoch": 1.0, - "eval_loss": 0.28806865215301514, - "eval_runtime": 1.191, - "eval_samples_per_second": 2.519, - "eval_steps_per_second": 0.84, - "step": 2414 + "eval_loss": 0.8599082231521606, + "eval_runtime": 3.0154, + "eval_samples_per_second": 3.316, + "eval_steps_per_second": 0.995, + "step": 6181 }, { "epoch": 1.0, - "step": 2414, - "total_flos": 252721244405760.0, - "train_loss": 0.5156875643339054, - "train_runtime": 24515.4963, - "train_samples_per_second": 1.575, - "train_steps_per_second": 0.098 + "step": 6181, + "total_flos": 2571089024581632.0, + "train_loss": 0.8638364601301117, + "train_runtime": 28571.7374, + "train_samples_per_second": 3.461, + "train_steps_per_second": 0.216 } ], "logging_steps": 5, - "max_steps": 2414, + "max_steps": 6181, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, @@ -3424,7 +8702,7 @@ "attributes": {} } }, - "total_flos": 252721244405760.0, + "total_flos": 2571089024581632.0, "train_batch_size": 4, "trial_name": null, "trial_params": null