{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999452264884702, "eval_steps": 500, "global_step": 9128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010954702305964835, "grad_norm": 11.6875, "learning_rate": 2.190580503833516e-08, "loss": 1.6695, "step": 1 }, { "epoch": 0.0005477351152982418, "grad_norm": 10.5, "learning_rate": 1.095290251916758e-07, "loss": 1.7344, "step": 5 }, { "epoch": 0.0010954702305964836, "grad_norm": 13.1875, "learning_rate": 2.190580503833516e-07, "loss": 1.6283, "step": 10 }, { "epoch": 0.0016432053458947254, "grad_norm": 8.5625, "learning_rate": 3.285870755750274e-07, "loss": 1.6588, "step": 15 }, { "epoch": 0.0021909404611929672, "grad_norm": 9.4375, "learning_rate": 4.381161007667032e-07, "loss": 1.6831, "step": 20 }, { "epoch": 0.002738675576491209, "grad_norm": 8.1875, "learning_rate": 5.47645125958379e-07, "loss": 1.6779, "step": 25 }, { "epoch": 0.003286410691789451, "grad_norm": 11.25, "learning_rate": 6.571741511500548e-07, "loss": 1.7032, "step": 30 }, { "epoch": 0.0038341458070876924, "grad_norm": 9.125, "learning_rate": 7.667031763417306e-07, "loss": 1.6961, "step": 35 }, { "epoch": 0.0043818809223859344, "grad_norm": 9.5625, "learning_rate": 8.762322015334064e-07, "loss": 1.6005, "step": 40 }, { "epoch": 0.004929616037684176, "grad_norm": 11.4375, "learning_rate": 9.857612267250823e-07, "loss": 1.6734, "step": 45 }, { "epoch": 0.005477351152982418, "grad_norm": 8.5625, "learning_rate": 1.095290251916758e-06, "loss": 1.6523, "step": 50 }, { "epoch": 0.00602508626828066, "grad_norm": 8.0625, "learning_rate": 1.2048192771084338e-06, "loss": 1.6274, "step": 55 }, { "epoch": 0.006572821383578902, "grad_norm": 9.25, "learning_rate": 1.3143483023001096e-06, "loss": 1.6391, "step": 60 }, { "epoch": 0.007120556498877143, "grad_norm": 7.09375, "learning_rate": 1.4238773274917855e-06, "loss": 1.6723, "step": 65 }, { "epoch": 0.007668291614175385, "grad_norm": 8.75, "learning_rate": 1.5334063526834611e-06, "loss": 1.616, "step": 70 }, { "epoch": 0.008216026729473627, "grad_norm": 9.5, "learning_rate": 1.642935377875137e-06, "loss": 1.6565, "step": 75 }, { "epoch": 0.008763761844771869, "grad_norm": 8.875, "learning_rate": 1.7524644030668128e-06, "loss": 1.5556, "step": 80 }, { "epoch": 0.009311496960070111, "grad_norm": 7.4375, "learning_rate": 1.8619934282584886e-06, "loss": 1.5898, "step": 85 }, { "epoch": 0.009859232075368351, "grad_norm": 8.75, "learning_rate": 1.9715224534501647e-06, "loss": 1.5372, "step": 90 }, { "epoch": 0.010406967190666593, "grad_norm": 7.15625, "learning_rate": 2.0810514786418403e-06, "loss": 1.5206, "step": 95 }, { "epoch": 0.010954702305964835, "grad_norm": 5.65625, "learning_rate": 2.190580503833516e-06, "loss": 1.5475, "step": 100 }, { "epoch": 0.011502437421263077, "grad_norm": 7.03125, "learning_rate": 2.300109529025192e-06, "loss": 1.5011, "step": 105 }, { "epoch": 0.01205017253656132, "grad_norm": 4.1875, "learning_rate": 2.4096385542168676e-06, "loss": 1.4805, "step": 110 }, { "epoch": 0.012597907651859561, "grad_norm": 4.5625, "learning_rate": 2.5191675794085437e-06, "loss": 1.4625, "step": 115 }, { "epoch": 0.013145642767157803, "grad_norm": 4.25, "learning_rate": 2.6286966046002193e-06, "loss": 1.4329, "step": 120 }, { "epoch": 0.013693377882456044, "grad_norm": 3.140625, "learning_rate": 2.7382256297918953e-06, "loss": 1.4614, "step": 125 }, { "epoch": 0.014241112997754286, "grad_norm": 2.890625, "learning_rate": 2.847754654983571e-06, "loss": 1.43, "step": 130 }, { "epoch": 0.014788848113052528, "grad_norm": 2.96875, "learning_rate": 2.957283680175247e-06, "loss": 1.4254, "step": 135 }, { "epoch": 0.01533658322835077, "grad_norm": 2.375, "learning_rate": 3.0668127053669222e-06, "loss": 1.3846, "step": 140 }, { "epoch": 0.01588431834364901, "grad_norm": 2.109375, "learning_rate": 3.1763417305585983e-06, "loss": 1.3968, "step": 145 }, { "epoch": 0.016432053458947254, "grad_norm": 2.015625, "learning_rate": 3.285870755750274e-06, "loss": 1.3592, "step": 150 }, { "epoch": 0.016979788574245494, "grad_norm": 3.03125, "learning_rate": 3.39539978094195e-06, "loss": 1.3269, "step": 155 }, { "epoch": 0.017527523689543738, "grad_norm": 1.8359375, "learning_rate": 3.5049288061336256e-06, "loss": 1.3384, "step": 160 }, { "epoch": 0.018075258804841978, "grad_norm": 1.734375, "learning_rate": 3.6144578313253016e-06, "loss": 1.3149, "step": 165 }, { "epoch": 0.018622993920140222, "grad_norm": 1.5703125, "learning_rate": 3.7239868565169773e-06, "loss": 1.331, "step": 170 }, { "epoch": 0.019170729035438462, "grad_norm": 1.78125, "learning_rate": 3.833515881708653e-06, "loss": 1.3452, "step": 175 }, { "epoch": 0.019718464150736702, "grad_norm": 2.078125, "learning_rate": 3.943044906900329e-06, "loss": 1.2923, "step": 180 }, { "epoch": 0.020266199266034946, "grad_norm": 1.734375, "learning_rate": 4.0525739320920046e-06, "loss": 1.351, "step": 185 }, { "epoch": 0.020813934381333186, "grad_norm": 1.5078125, "learning_rate": 4.162102957283681e-06, "loss": 1.2837, "step": 190 }, { "epoch": 0.02136166949663143, "grad_norm": 1.5625, "learning_rate": 4.271631982475356e-06, "loss": 1.2891, "step": 195 }, { "epoch": 0.02190940461192967, "grad_norm": 1.5078125, "learning_rate": 4.381161007667032e-06, "loss": 1.3041, "step": 200 }, { "epoch": 0.022457139727227914, "grad_norm": 1.625, "learning_rate": 4.490690032858708e-06, "loss": 1.2525, "step": 205 }, { "epoch": 0.023004874842526155, "grad_norm": 1.609375, "learning_rate": 4.600219058050384e-06, "loss": 1.3449, "step": 210 }, { "epoch": 0.023552609957824395, "grad_norm": 1.453125, "learning_rate": 4.709748083242059e-06, "loss": 1.2768, "step": 215 }, { "epoch": 0.02410034507312264, "grad_norm": 1.5078125, "learning_rate": 4.819277108433735e-06, "loss": 1.28, "step": 220 }, { "epoch": 0.02464808018842088, "grad_norm": 1.46875, "learning_rate": 4.928806133625411e-06, "loss": 1.2782, "step": 225 }, { "epoch": 0.025195815303719123, "grad_norm": 1.6953125, "learning_rate": 5.038335158817087e-06, "loss": 1.2688, "step": 230 }, { "epoch": 0.025743550419017363, "grad_norm": 1.421875, "learning_rate": 5.1478641840087625e-06, "loss": 1.2547, "step": 235 }, { "epoch": 0.026291285534315607, "grad_norm": 1.5703125, "learning_rate": 5.257393209200439e-06, "loss": 1.2791, "step": 240 }, { "epoch": 0.026839020649613847, "grad_norm": 1.5546875, "learning_rate": 5.366922234392115e-06, "loss": 1.2681, "step": 245 }, { "epoch": 0.027386755764912087, "grad_norm": 1.671875, "learning_rate": 5.476451259583791e-06, "loss": 1.2936, "step": 250 }, { "epoch": 0.02793449088021033, "grad_norm": 1.5234375, "learning_rate": 5.585980284775466e-06, "loss": 1.2555, "step": 255 }, { "epoch": 0.02848222599550857, "grad_norm": 1.59375, "learning_rate": 5.695509309967142e-06, "loss": 1.2712, "step": 260 }, { "epoch": 0.029029961110806815, "grad_norm": 1.59375, "learning_rate": 5.805038335158818e-06, "loss": 1.2957, "step": 265 }, { "epoch": 0.029577696226105055, "grad_norm": 1.5546875, "learning_rate": 5.914567360350494e-06, "loss": 1.2894, "step": 270 }, { "epoch": 0.030125431341403296, "grad_norm": 1.578125, "learning_rate": 6.02409638554217e-06, "loss": 1.2609, "step": 275 }, { "epoch": 0.03067316645670154, "grad_norm": 1.609375, "learning_rate": 6.1336254107338444e-06, "loss": 1.2343, "step": 280 }, { "epoch": 0.03122090157199978, "grad_norm": 1.703125, "learning_rate": 6.2431544359255205e-06, "loss": 1.2609, "step": 285 }, { "epoch": 0.03176863668729802, "grad_norm": 1.4140625, "learning_rate": 6.3526834611171965e-06, "loss": 1.228, "step": 290 }, { "epoch": 0.032316371802596264, "grad_norm": 1.5078125, "learning_rate": 6.462212486308872e-06, "loss": 1.2499, "step": 295 }, { "epoch": 0.03286410691789451, "grad_norm": 1.5546875, "learning_rate": 6.571741511500548e-06, "loss": 1.2826, "step": 300 }, { "epoch": 0.03341184203319275, "grad_norm": 1.5234375, "learning_rate": 6.681270536692224e-06, "loss": 1.2581, "step": 305 }, { "epoch": 0.03395957714849099, "grad_norm": 1.7734375, "learning_rate": 6.7907995618839e-06, "loss": 1.2385, "step": 310 }, { "epoch": 0.03450731226378923, "grad_norm": 1.7734375, "learning_rate": 6.900328587075575e-06, "loss": 1.2487, "step": 315 }, { "epoch": 0.035055047379087476, "grad_norm": 1.7109375, "learning_rate": 7.009857612267251e-06, "loss": 1.2359, "step": 320 }, { "epoch": 0.03560278249438571, "grad_norm": 1.734375, "learning_rate": 7.119386637458927e-06, "loss": 1.2917, "step": 325 }, { "epoch": 0.036150517609683956, "grad_norm": 1.6484375, "learning_rate": 7.228915662650603e-06, "loss": 1.2511, "step": 330 }, { "epoch": 0.0366982527249822, "grad_norm": 1.5234375, "learning_rate": 7.3384446878422785e-06, "loss": 1.2733, "step": 335 }, { "epoch": 0.037245987840280444, "grad_norm": 1.421875, "learning_rate": 7.4479737130339545e-06, "loss": 1.25, "step": 340 }, { "epoch": 0.03779372295557868, "grad_norm": 1.453125, "learning_rate": 7.5575027382256306e-06, "loss": 1.2793, "step": 345 }, { "epoch": 0.038341458070876924, "grad_norm": 1.515625, "learning_rate": 7.667031763417307e-06, "loss": 1.2786, "step": 350 }, { "epoch": 0.03888919318617517, "grad_norm": 1.5, "learning_rate": 7.776560788608982e-06, "loss": 1.2847, "step": 355 }, { "epoch": 0.039436928301473405, "grad_norm": 1.4453125, "learning_rate": 7.886089813800659e-06, "loss": 1.3118, "step": 360 }, { "epoch": 0.03998466341677165, "grad_norm": 1.4921875, "learning_rate": 7.995618838992334e-06, "loss": 1.2672, "step": 365 }, { "epoch": 0.04053239853206989, "grad_norm": 1.484375, "learning_rate": 8.105147864184009e-06, "loss": 1.2752, "step": 370 }, { "epoch": 0.041080133647368136, "grad_norm": 1.515625, "learning_rate": 8.214676889375686e-06, "loss": 1.2129, "step": 375 }, { "epoch": 0.04162786876266637, "grad_norm": 1.46875, "learning_rate": 8.324205914567361e-06, "loss": 1.2755, "step": 380 }, { "epoch": 0.04217560387796462, "grad_norm": 1.4140625, "learning_rate": 8.433734939759038e-06, "loss": 1.1983, "step": 385 }, { "epoch": 0.04272333899326286, "grad_norm": 2.3125, "learning_rate": 8.543263964950712e-06, "loss": 1.2275, "step": 390 }, { "epoch": 0.0432710741085611, "grad_norm": 1.390625, "learning_rate": 8.652792990142389e-06, "loss": 1.1843, "step": 395 }, { "epoch": 0.04381880922385934, "grad_norm": 1.59375, "learning_rate": 8.762322015334064e-06, "loss": 1.2471, "step": 400 }, { "epoch": 0.044366544339157585, "grad_norm": 1.5, "learning_rate": 8.871851040525739e-06, "loss": 1.2367, "step": 405 }, { "epoch": 0.04491427945445583, "grad_norm": 1.53125, "learning_rate": 8.981380065717416e-06, "loss": 1.2504, "step": 410 }, { "epoch": 0.045462014569754065, "grad_norm": 1.53125, "learning_rate": 9.090909090909091e-06, "loss": 1.3088, "step": 415 }, { "epoch": 0.04600974968505231, "grad_norm": 1.4453125, "learning_rate": 9.200438116100768e-06, "loss": 1.2398, "step": 420 }, { "epoch": 0.04655748480035055, "grad_norm": 1.484375, "learning_rate": 9.309967141292443e-06, "loss": 1.2922, "step": 425 }, { "epoch": 0.04710521991564879, "grad_norm": 1.5234375, "learning_rate": 9.419496166484118e-06, "loss": 1.2491, "step": 430 }, { "epoch": 0.04765295503094703, "grad_norm": 1.5078125, "learning_rate": 9.529025191675795e-06, "loss": 1.1853, "step": 435 }, { "epoch": 0.04820069014624528, "grad_norm": 1.59375, "learning_rate": 9.63855421686747e-06, "loss": 1.2793, "step": 440 }, { "epoch": 0.04874842526154352, "grad_norm": 1.4765625, "learning_rate": 9.748083242059146e-06, "loss": 1.2385, "step": 445 }, { "epoch": 0.04929616037684176, "grad_norm": 1.4921875, "learning_rate": 9.857612267250823e-06, "loss": 1.2048, "step": 450 }, { "epoch": 0.04984389549214, "grad_norm": 1.4375, "learning_rate": 9.967141292442498e-06, "loss": 1.208, "step": 455 }, { "epoch": 0.050391630607438245, "grad_norm": 1.40625, "learning_rate": 1.0076670317634175e-05, "loss": 1.2326, "step": 460 }, { "epoch": 0.05093936572273648, "grad_norm": 1.390625, "learning_rate": 1.0186199342825848e-05, "loss": 1.195, "step": 465 }, { "epoch": 0.051487100838034726, "grad_norm": 1.3515625, "learning_rate": 1.0295728368017525e-05, "loss": 1.1941, "step": 470 }, { "epoch": 0.05203483595333297, "grad_norm": 1.4453125, "learning_rate": 1.04052573932092e-05, "loss": 1.2478, "step": 475 }, { "epoch": 0.05258257106863121, "grad_norm": 1.515625, "learning_rate": 1.0514786418400877e-05, "loss": 1.2514, "step": 480 }, { "epoch": 0.05313030618392945, "grad_norm": 1.5859375, "learning_rate": 1.0624315443592552e-05, "loss": 1.2535, "step": 485 }, { "epoch": 0.053678041299227694, "grad_norm": 1.40625, "learning_rate": 1.073384446878423e-05, "loss": 1.232, "step": 490 }, { "epoch": 0.05422577641452594, "grad_norm": 1.6796875, "learning_rate": 1.0843373493975904e-05, "loss": 1.3235, "step": 495 }, { "epoch": 0.054773511529824175, "grad_norm": 1.484375, "learning_rate": 1.0952902519167581e-05, "loss": 1.2356, "step": 500 }, { "epoch": 0.05532124664512242, "grad_norm": 1.4609375, "learning_rate": 1.1062431544359255e-05, "loss": 1.1983, "step": 505 }, { "epoch": 0.05586898176042066, "grad_norm": 1.46875, "learning_rate": 1.1171960569550932e-05, "loss": 1.2143, "step": 510 }, { "epoch": 0.0564167168757189, "grad_norm": 1.515625, "learning_rate": 1.1281489594742607e-05, "loss": 1.2818, "step": 515 }, { "epoch": 0.05696445199101714, "grad_norm": 1.5078125, "learning_rate": 1.1391018619934284e-05, "loss": 1.1754, "step": 520 }, { "epoch": 0.057512187106315386, "grad_norm": 1.3828125, "learning_rate": 1.1500547645125959e-05, "loss": 1.2255, "step": 525 }, { "epoch": 0.05805992222161363, "grad_norm": 1.4609375, "learning_rate": 1.1610076670317636e-05, "loss": 1.162, "step": 530 }, { "epoch": 0.05860765733691187, "grad_norm": 1.65625, "learning_rate": 1.1719605695509311e-05, "loss": 1.2283, "step": 535 }, { "epoch": 0.05915539245221011, "grad_norm": 1.5, "learning_rate": 1.1829134720700988e-05, "loss": 1.2758, "step": 540 }, { "epoch": 0.059703127567508354, "grad_norm": 1.4453125, "learning_rate": 1.1938663745892662e-05, "loss": 1.2252, "step": 545 }, { "epoch": 0.06025086268280659, "grad_norm": 1.421875, "learning_rate": 1.204819277108434e-05, "loss": 1.2171, "step": 550 }, { "epoch": 0.060798597798104835, "grad_norm": 1.5078125, "learning_rate": 1.2157721796276014e-05, "loss": 1.2346, "step": 555 }, { "epoch": 0.06134633291340308, "grad_norm": 1.34375, "learning_rate": 1.2267250821467689e-05, "loss": 1.2295, "step": 560 }, { "epoch": 0.06189406802870132, "grad_norm": 1.4609375, "learning_rate": 1.2376779846659366e-05, "loss": 1.2068, "step": 565 }, { "epoch": 0.06244180314399956, "grad_norm": 1.4140625, "learning_rate": 1.2486308871851041e-05, "loss": 1.1862, "step": 570 }, { "epoch": 0.0629895382592978, "grad_norm": 1.390625, "learning_rate": 1.2595837897042718e-05, "loss": 1.219, "step": 575 }, { "epoch": 0.06353727337459604, "grad_norm": 1.3984375, "learning_rate": 1.2705366922234393e-05, "loss": 1.2089, "step": 580 }, { "epoch": 0.06408500848989429, "grad_norm": 1.4765625, "learning_rate": 1.281489594742607e-05, "loss": 1.2738, "step": 585 }, { "epoch": 0.06463274360519253, "grad_norm": 1.421875, "learning_rate": 1.2924424972617743e-05, "loss": 1.2507, "step": 590 }, { "epoch": 0.06518047872049076, "grad_norm": 1.421875, "learning_rate": 1.303395399780942e-05, "loss": 1.2554, "step": 595 }, { "epoch": 0.06572821383578901, "grad_norm": 1.4453125, "learning_rate": 1.3143483023001096e-05, "loss": 1.2269, "step": 600 }, { "epoch": 0.06627594895108725, "grad_norm": 1.3359375, "learning_rate": 1.3253012048192772e-05, "loss": 1.2278, "step": 605 }, { "epoch": 0.0668236840663855, "grad_norm": 1.4453125, "learning_rate": 1.3362541073384448e-05, "loss": 1.197, "step": 610 }, { "epoch": 0.06737141918168374, "grad_norm": 1.484375, "learning_rate": 1.3472070098576125e-05, "loss": 1.2462, "step": 615 }, { "epoch": 0.06791915429698198, "grad_norm": 1.4453125, "learning_rate": 1.35815991237678e-05, "loss": 1.1692, "step": 620 }, { "epoch": 0.06846688941228023, "grad_norm": 1.3828125, "learning_rate": 1.3691128148959477e-05, "loss": 1.2331, "step": 625 }, { "epoch": 0.06901462452757846, "grad_norm": 1.375, "learning_rate": 1.380065717415115e-05, "loss": 1.2679, "step": 630 }, { "epoch": 0.0695623596428767, "grad_norm": 1.4140625, "learning_rate": 1.3910186199342827e-05, "loss": 1.212, "step": 635 }, { "epoch": 0.07011009475817495, "grad_norm": 1.375, "learning_rate": 1.4019715224534502e-05, "loss": 1.2085, "step": 640 }, { "epoch": 0.07065782987347319, "grad_norm": 1.3828125, "learning_rate": 1.412924424972618e-05, "loss": 1.2214, "step": 645 }, { "epoch": 0.07120556498877142, "grad_norm": 1.375, "learning_rate": 1.4238773274917854e-05, "loss": 1.2937, "step": 650 }, { "epoch": 0.07175330010406968, "grad_norm": 1.4375, "learning_rate": 1.4348302300109531e-05, "loss": 1.2745, "step": 655 }, { "epoch": 0.07230103521936791, "grad_norm": 1.359375, "learning_rate": 1.4457831325301207e-05, "loss": 1.1891, "step": 660 }, { "epoch": 0.07284877033466615, "grad_norm": 1.3203125, "learning_rate": 1.4567360350492883e-05, "loss": 1.2059, "step": 665 }, { "epoch": 0.0733965054499644, "grad_norm": 1.359375, "learning_rate": 1.4676889375684557e-05, "loss": 1.2321, "step": 670 }, { "epoch": 0.07394424056526264, "grad_norm": 1.375, "learning_rate": 1.4786418400876232e-05, "loss": 1.2003, "step": 675 }, { "epoch": 0.07449197568056089, "grad_norm": 1.40625, "learning_rate": 1.4895947426067909e-05, "loss": 1.2113, "step": 680 }, { "epoch": 0.07503971079585912, "grad_norm": 1.34375, "learning_rate": 1.5005476451259584e-05, "loss": 1.2612, "step": 685 }, { "epoch": 0.07558744591115736, "grad_norm": 1.328125, "learning_rate": 1.5115005476451261e-05, "loss": 1.2022, "step": 690 }, { "epoch": 0.07613518102645561, "grad_norm": 1.421875, "learning_rate": 1.5224534501642936e-05, "loss": 1.1903, "step": 695 }, { "epoch": 0.07668291614175385, "grad_norm": 1.3828125, "learning_rate": 1.5334063526834613e-05, "loss": 1.1864, "step": 700 }, { "epoch": 0.07723065125705209, "grad_norm": 1.390625, "learning_rate": 1.5443592552026287e-05, "loss": 1.1988, "step": 705 }, { "epoch": 0.07777838637235034, "grad_norm": 1.34375, "learning_rate": 1.5553121577217964e-05, "loss": 1.2798, "step": 710 }, { "epoch": 0.07832612148764857, "grad_norm": 1.359375, "learning_rate": 1.566265060240964e-05, "loss": 1.2141, "step": 715 }, { "epoch": 0.07887385660294681, "grad_norm": 1.46875, "learning_rate": 1.5772179627601317e-05, "loss": 1.265, "step": 720 }, { "epoch": 0.07942159171824506, "grad_norm": 1.3515625, "learning_rate": 1.588170865279299e-05, "loss": 1.2063, "step": 725 }, { "epoch": 0.0799693268335433, "grad_norm": 1.3828125, "learning_rate": 1.5991237677984668e-05, "loss": 1.192, "step": 730 }, { "epoch": 0.08051706194884153, "grad_norm": 1.3828125, "learning_rate": 1.610076670317634e-05, "loss": 1.2135, "step": 735 }, { "epoch": 0.08106479706413978, "grad_norm": 1.3203125, "learning_rate": 1.6210295728368018e-05, "loss": 1.1986, "step": 740 }, { "epoch": 0.08161253217943802, "grad_norm": 1.25, "learning_rate": 1.6319824753559695e-05, "loss": 1.2055, "step": 745 }, { "epoch": 0.08216026729473627, "grad_norm": 1.3359375, "learning_rate": 1.6429353778751372e-05, "loss": 1.2332, "step": 750 }, { "epoch": 0.08270800241003451, "grad_norm": 1.375, "learning_rate": 1.6538882803943046e-05, "loss": 1.2425, "step": 755 }, { "epoch": 0.08325573752533275, "grad_norm": 1.3046875, "learning_rate": 1.6648411829134722e-05, "loss": 1.2201, "step": 760 }, { "epoch": 0.083803472640631, "grad_norm": 1.3515625, "learning_rate": 1.67579408543264e-05, "loss": 1.2385, "step": 765 }, { "epoch": 0.08435120775592923, "grad_norm": 1.3125, "learning_rate": 1.6867469879518076e-05, "loss": 1.1299, "step": 770 }, { "epoch": 0.08489894287122747, "grad_norm": 1.3515625, "learning_rate": 1.697699890470975e-05, "loss": 1.212, "step": 775 }, { "epoch": 0.08544667798652572, "grad_norm": 1.3515625, "learning_rate": 1.7086527929901423e-05, "loss": 1.2242, "step": 780 }, { "epoch": 0.08599441310182396, "grad_norm": 1.46875, "learning_rate": 1.71960569550931e-05, "loss": 1.2346, "step": 785 }, { "epoch": 0.0865421482171222, "grad_norm": 1.328125, "learning_rate": 1.7305585980284777e-05, "loss": 1.2316, "step": 790 }, { "epoch": 0.08708988333242045, "grad_norm": 1.3359375, "learning_rate": 1.7415115005476454e-05, "loss": 1.1931, "step": 795 }, { "epoch": 0.08763761844771868, "grad_norm": 1.328125, "learning_rate": 1.7524644030668127e-05, "loss": 1.2037, "step": 800 }, { "epoch": 0.08818535356301692, "grad_norm": 1.375, "learning_rate": 1.7634173055859804e-05, "loss": 1.2317, "step": 805 }, { "epoch": 0.08873308867831517, "grad_norm": 1.3046875, "learning_rate": 1.7743702081051478e-05, "loss": 1.2425, "step": 810 }, { "epoch": 0.0892808237936134, "grad_norm": 1.328125, "learning_rate": 1.7853231106243155e-05, "loss": 1.2056, "step": 815 }, { "epoch": 0.08982855890891166, "grad_norm": 1.3046875, "learning_rate": 1.796276013143483e-05, "loss": 1.1954, "step": 820 }, { "epoch": 0.0903762940242099, "grad_norm": 1.3203125, "learning_rate": 1.807228915662651e-05, "loss": 1.2034, "step": 825 }, { "epoch": 0.09092402913950813, "grad_norm": 1.3828125, "learning_rate": 1.8181818181818182e-05, "loss": 1.1815, "step": 830 }, { "epoch": 0.09147176425480638, "grad_norm": 1.328125, "learning_rate": 1.829134720700986e-05, "loss": 1.2272, "step": 835 }, { "epoch": 0.09201949937010462, "grad_norm": 1.2890625, "learning_rate": 1.8400876232201536e-05, "loss": 1.2151, "step": 840 }, { "epoch": 0.09256723448540286, "grad_norm": 1.296875, "learning_rate": 1.8510405257393213e-05, "loss": 1.2056, "step": 845 }, { "epoch": 0.0931149696007011, "grad_norm": 1.34375, "learning_rate": 1.8619934282584886e-05, "loss": 1.2219, "step": 850 }, { "epoch": 0.09366270471599934, "grad_norm": 1.640625, "learning_rate": 1.8729463307776563e-05, "loss": 1.1675, "step": 855 }, { "epoch": 0.09421043983129758, "grad_norm": 1.2890625, "learning_rate": 1.8838992332968237e-05, "loss": 1.2177, "step": 860 }, { "epoch": 0.09475817494659583, "grad_norm": 1.3671875, "learning_rate": 1.8948521358159914e-05, "loss": 1.2518, "step": 865 }, { "epoch": 0.09530591006189407, "grad_norm": 1.265625, "learning_rate": 1.905805038335159e-05, "loss": 1.2463, "step": 870 }, { "epoch": 0.0958536451771923, "grad_norm": 1.2109375, "learning_rate": 1.9167579408543267e-05, "loss": 1.1985, "step": 875 }, { "epoch": 0.09640138029249055, "grad_norm": 1.3125, "learning_rate": 1.927710843373494e-05, "loss": 1.2317, "step": 880 }, { "epoch": 0.09694911540778879, "grad_norm": 1.3046875, "learning_rate": 1.9386637458926618e-05, "loss": 1.2383, "step": 885 }, { "epoch": 0.09749685052308704, "grad_norm": 1.2421875, "learning_rate": 1.949616648411829e-05, "loss": 1.1178, "step": 890 }, { "epoch": 0.09804458563838528, "grad_norm": 1.5390625, "learning_rate": 1.9605695509309968e-05, "loss": 1.2037, "step": 895 }, { "epoch": 0.09859232075368352, "grad_norm": 1.296875, "learning_rate": 1.9715224534501645e-05, "loss": 1.1711, "step": 900 }, { "epoch": 0.09914005586898177, "grad_norm": 1.25, "learning_rate": 1.982475355969332e-05, "loss": 1.223, "step": 905 }, { "epoch": 0.09968779098428, "grad_norm": 1.28125, "learning_rate": 1.9934282584884995e-05, "loss": 1.2393, "step": 910 }, { "epoch": 0.10023552609957824, "grad_norm": 1.296875, "learning_rate": 1.9999997075076013e-05, "loss": 1.1597, "step": 915 }, { "epoch": 0.10078326121487649, "grad_norm": 1.2734375, "learning_rate": 1.999996416970079e-05, "loss": 1.2222, "step": 920 }, { "epoch": 0.10133099633017473, "grad_norm": 1.28125, "learning_rate": 1.9999894702916073e-05, "loss": 1.1993, "step": 925 }, { "epoch": 0.10187873144547296, "grad_norm": 1.328125, "learning_rate": 1.9999788674975834e-05, "loss": 1.2389, "step": 930 }, { "epoch": 0.10242646656077121, "grad_norm": 1.3515625, "learning_rate": 1.9999646086267734e-05, "loss": 1.1934, "step": 935 }, { "epoch": 0.10297420167606945, "grad_norm": 1.2421875, "learning_rate": 1.9999466937313098e-05, "loss": 1.2261, "step": 940 }, { "epoch": 0.10352193679136769, "grad_norm": 1.2734375, "learning_rate": 1.9999251228766922e-05, "loss": 1.1829, "step": 945 }, { "epoch": 0.10406967190666594, "grad_norm": 1.265625, "learning_rate": 1.999899896141787e-05, "loss": 1.2095, "step": 950 }, { "epoch": 0.10461740702196418, "grad_norm": 1.265625, "learning_rate": 1.9998710136188267e-05, "loss": 1.1984, "step": 955 }, { "epoch": 0.10516514213726243, "grad_norm": 1.265625, "learning_rate": 1.999838475413411e-05, "loss": 1.1844, "step": 960 }, { "epoch": 0.10571287725256066, "grad_norm": 1.2734375, "learning_rate": 1.9998022816445037e-05, "loss": 1.2157, "step": 965 }, { "epoch": 0.1062606123678589, "grad_norm": 1.2421875, "learning_rate": 1.999762432444435e-05, "loss": 1.1304, "step": 970 }, { "epoch": 0.10680834748315715, "grad_norm": 1.2578125, "learning_rate": 1.9997189279589003e-05, "loss": 1.163, "step": 975 }, { "epoch": 0.10735608259845539, "grad_norm": 1.2890625, "learning_rate": 1.9996717683469582e-05, "loss": 1.1851, "step": 980 }, { "epoch": 0.10790381771375362, "grad_norm": 1.2890625, "learning_rate": 1.9996209537810317e-05, "loss": 1.211, "step": 985 }, { "epoch": 0.10845155282905188, "grad_norm": 1.390625, "learning_rate": 1.9995664844469064e-05, "loss": 1.1801, "step": 990 }, { "epoch": 0.10899928794435011, "grad_norm": 1.2890625, "learning_rate": 1.9995083605437312e-05, "loss": 1.1569, "step": 995 }, { "epoch": 0.10954702305964835, "grad_norm": 1.2421875, "learning_rate": 1.9994465822840152e-05, "loss": 1.2466, "step": 1000 }, { "epoch": 0.1100947581749466, "grad_norm": 1.34375, "learning_rate": 1.99938114989363e-05, "loss": 1.1697, "step": 1005 }, { "epoch": 0.11064249329024484, "grad_norm": 1.2890625, "learning_rate": 1.9993120636118055e-05, "loss": 1.2242, "step": 1010 }, { "epoch": 0.11119022840554307, "grad_norm": 1.28125, "learning_rate": 1.999239323691133e-05, "loss": 1.1978, "step": 1015 }, { "epoch": 0.11173796352084132, "grad_norm": 1.2734375, "learning_rate": 1.99916293039756e-05, "loss": 1.2025, "step": 1020 }, { "epoch": 0.11228569863613956, "grad_norm": 1.296875, "learning_rate": 1.999082884010393e-05, "loss": 1.2639, "step": 1025 }, { "epoch": 0.1128334337514378, "grad_norm": 1.2734375, "learning_rate": 1.998999184822293e-05, "loss": 1.1972, "step": 1030 }, { "epoch": 0.11338116886673605, "grad_norm": 1.2734375, "learning_rate": 1.9989118331392775e-05, "loss": 1.2215, "step": 1035 }, { "epoch": 0.11392890398203429, "grad_norm": 1.296875, "learning_rate": 1.998820829280718e-05, "loss": 1.2178, "step": 1040 }, { "epoch": 0.11447663909733254, "grad_norm": 1.2421875, "learning_rate": 1.998726173579338e-05, "loss": 1.1748, "step": 1045 }, { "epoch": 0.11502437421263077, "grad_norm": 1.28125, "learning_rate": 1.9986278663812137e-05, "loss": 1.212, "step": 1050 }, { "epoch": 0.11557210932792901, "grad_norm": 1.1796875, "learning_rate": 1.998525908045771e-05, "loss": 1.1583, "step": 1055 }, { "epoch": 0.11611984444322726, "grad_norm": 1.296875, "learning_rate": 1.998420298945786e-05, "loss": 1.1714, "step": 1060 }, { "epoch": 0.1166675795585255, "grad_norm": 1.2421875, "learning_rate": 1.998311039467382e-05, "loss": 1.2036, "step": 1065 }, { "epoch": 0.11721531467382373, "grad_norm": 1.2734375, "learning_rate": 1.9981981300100267e-05, "loss": 1.1606, "step": 1070 }, { "epoch": 0.11776304978912198, "grad_norm": 1.265625, "learning_rate": 1.9980815709865365e-05, "loss": 1.1495, "step": 1075 }, { "epoch": 0.11831078490442022, "grad_norm": 1.2578125, "learning_rate": 1.9979613628230683e-05, "loss": 1.1872, "step": 1080 }, { "epoch": 0.11885852001971846, "grad_norm": 1.3046875, "learning_rate": 1.9978375059591214e-05, "loss": 1.2167, "step": 1085 }, { "epoch": 0.11940625513501671, "grad_norm": 1.296875, "learning_rate": 1.997710000847536e-05, "loss": 1.1988, "step": 1090 }, { "epoch": 0.11995399025031495, "grad_norm": 1.2578125, "learning_rate": 1.9975788479544908e-05, "loss": 1.1961, "step": 1095 }, { "epoch": 0.12050172536561318, "grad_norm": 1.3203125, "learning_rate": 1.9974440477595e-05, "loss": 1.2294, "step": 1100 }, { "epoch": 0.12104946048091143, "grad_norm": 1.2109375, "learning_rate": 1.9973056007554145e-05, "loss": 1.1643, "step": 1105 }, { "epoch": 0.12159719559620967, "grad_norm": 1.3515625, "learning_rate": 1.9971635074484174e-05, "loss": 1.2341, "step": 1110 }, { "epoch": 0.12214493071150792, "grad_norm": 1.25, "learning_rate": 1.9970177683580245e-05, "loss": 1.2422, "step": 1115 }, { "epoch": 0.12269266582680616, "grad_norm": 1.2578125, "learning_rate": 1.99686838401708e-05, "loss": 1.1809, "step": 1120 }, { "epoch": 0.1232404009421044, "grad_norm": 1.265625, "learning_rate": 1.996715354971755e-05, "loss": 1.2402, "step": 1125 }, { "epoch": 0.12378813605740265, "grad_norm": 1.25, "learning_rate": 1.9965586817815494e-05, "loss": 1.1718, "step": 1130 }, { "epoch": 0.12433587117270088, "grad_norm": 1.234375, "learning_rate": 1.996398365019283e-05, "loss": 1.1678, "step": 1135 }, { "epoch": 0.12488360628799912, "grad_norm": 1.3046875, "learning_rate": 1.996234405271099e-05, "loss": 1.2031, "step": 1140 }, { "epoch": 0.12543134140329737, "grad_norm": 1.25, "learning_rate": 1.9960668031364593e-05, "loss": 1.2042, "step": 1145 }, { "epoch": 0.1259790765185956, "grad_norm": 1.25, "learning_rate": 1.9958955592281436e-05, "loss": 1.2292, "step": 1150 }, { "epoch": 0.12652681163389384, "grad_norm": 1.375, "learning_rate": 1.9957206741722455e-05, "loss": 1.2361, "step": 1155 }, { "epoch": 0.12707454674919208, "grad_norm": 1.2421875, "learning_rate": 1.9955421486081718e-05, "loss": 1.1373, "step": 1160 }, { "epoch": 0.12762228186449034, "grad_norm": 1.328125, "learning_rate": 1.9953599831886398e-05, "loss": 1.1941, "step": 1165 }, { "epoch": 0.12817001697978858, "grad_norm": 1.28125, "learning_rate": 1.9951741785796737e-05, "loss": 1.1622, "step": 1170 }, { "epoch": 0.12871775209508682, "grad_norm": 1.2578125, "learning_rate": 1.9949847354606046e-05, "loss": 1.1535, "step": 1175 }, { "epoch": 0.12926548721038505, "grad_norm": 1.28125, "learning_rate": 1.994791654524065e-05, "loss": 1.1981, "step": 1180 }, { "epoch": 0.1298132223256833, "grad_norm": 1.21875, "learning_rate": 1.9945949364759887e-05, "loss": 1.1849, "step": 1185 }, { "epoch": 0.13036095744098153, "grad_norm": 1.234375, "learning_rate": 1.9943945820356075e-05, "loss": 1.1899, "step": 1190 }, { "epoch": 0.1309086925562798, "grad_norm": 1.28125, "learning_rate": 1.994190591935448e-05, "loss": 1.2066, "step": 1195 }, { "epoch": 0.13145642767157803, "grad_norm": 1.3046875, "learning_rate": 1.9939829669213296e-05, "loss": 1.1682, "step": 1200 }, { "epoch": 0.13200416278687627, "grad_norm": 1.25, "learning_rate": 1.9937717077523607e-05, "loss": 1.1963, "step": 1205 }, { "epoch": 0.1325518979021745, "grad_norm": 1.234375, "learning_rate": 1.9935568152009378e-05, "loss": 1.177, "step": 1210 }, { "epoch": 0.13309963301747274, "grad_norm": 1.34375, "learning_rate": 1.9933382900527413e-05, "loss": 1.1594, "step": 1215 }, { "epoch": 0.133647368132771, "grad_norm": 1.3125, "learning_rate": 1.9931161331067327e-05, "loss": 1.183, "step": 1220 }, { "epoch": 0.13419510324806924, "grad_norm": 1.21875, "learning_rate": 1.9928903451751517e-05, "loss": 1.21, "step": 1225 }, { "epoch": 0.13474283836336748, "grad_norm": 1.3515625, "learning_rate": 1.992660927083514e-05, "loss": 1.2326, "step": 1230 }, { "epoch": 0.13529057347866572, "grad_norm": 1.265625, "learning_rate": 1.992427879670608e-05, "loss": 1.1703, "step": 1235 }, { "epoch": 0.13583830859396395, "grad_norm": 1.2265625, "learning_rate": 1.99219120378849e-05, "loss": 1.2061, "step": 1240 }, { "epoch": 0.1363860437092622, "grad_norm": 1.2421875, "learning_rate": 1.991950900302484e-05, "loss": 1.2041, "step": 1245 }, { "epoch": 0.13693377882456045, "grad_norm": 1.265625, "learning_rate": 1.9917069700911766e-05, "loss": 1.1703, "step": 1250 }, { "epoch": 0.1374815139398587, "grad_norm": 1.2578125, "learning_rate": 1.991459414046414e-05, "loss": 1.1598, "step": 1255 }, { "epoch": 0.13802924905515693, "grad_norm": 1.234375, "learning_rate": 1.9912082330732992e-05, "loss": 1.1528, "step": 1260 }, { "epoch": 0.13857698417045516, "grad_norm": 1.2109375, "learning_rate": 1.990953428090189e-05, "loss": 1.1381, "step": 1265 }, { "epoch": 0.1391247192857534, "grad_norm": 1.203125, "learning_rate": 1.9906950000286894e-05, "loss": 1.1351, "step": 1270 }, { "epoch": 0.13967245440105164, "grad_norm": 1.328125, "learning_rate": 1.990432949833653e-05, "loss": 1.1824, "step": 1275 }, { "epoch": 0.1402201895163499, "grad_norm": 1.2890625, "learning_rate": 1.990167278463176e-05, "loss": 1.1525, "step": 1280 }, { "epoch": 0.14076792463164814, "grad_norm": 1.2421875, "learning_rate": 1.9898979868885933e-05, "loss": 1.1933, "step": 1285 }, { "epoch": 0.14131565974694638, "grad_norm": 1.2109375, "learning_rate": 1.989625076094477e-05, "loss": 1.1801, "step": 1290 }, { "epoch": 0.1418633948622446, "grad_norm": 1.25, "learning_rate": 1.9893485470786307e-05, "loss": 1.2175, "step": 1295 }, { "epoch": 0.14241112997754285, "grad_norm": 1.2578125, "learning_rate": 1.9890684008520872e-05, "loss": 1.1855, "step": 1300 }, { "epoch": 0.14295886509284111, "grad_norm": 1.2578125, "learning_rate": 1.9887846384391048e-05, "loss": 1.1998, "step": 1305 }, { "epoch": 0.14350660020813935, "grad_norm": 1.234375, "learning_rate": 1.9884972608771612e-05, "loss": 1.1501, "step": 1310 }, { "epoch": 0.1440543353234376, "grad_norm": 1.2109375, "learning_rate": 1.9882062692169544e-05, "loss": 1.2048, "step": 1315 }, { "epoch": 0.14460207043873582, "grad_norm": 1.25, "learning_rate": 1.987911664522394e-05, "loss": 1.1473, "step": 1320 }, { "epoch": 0.14514980555403406, "grad_norm": 1.3203125, "learning_rate": 1.9876134478706004e-05, "loss": 1.1571, "step": 1325 }, { "epoch": 0.1456975406693323, "grad_norm": 1.2890625, "learning_rate": 1.9873116203518997e-05, "loss": 1.2003, "step": 1330 }, { "epoch": 0.14624527578463056, "grad_norm": 1.2578125, "learning_rate": 1.9870061830698196e-05, "loss": 1.1797, "step": 1335 }, { "epoch": 0.1467930108999288, "grad_norm": 1.2109375, "learning_rate": 1.9866971371410858e-05, "loss": 1.1441, "step": 1340 }, { "epoch": 0.14734074601522704, "grad_norm": 1.3046875, "learning_rate": 1.9863844836956177e-05, "loss": 1.1912, "step": 1345 }, { "epoch": 0.14788848113052527, "grad_norm": 1.203125, "learning_rate": 1.986068223876525e-05, "loss": 1.171, "step": 1350 }, { "epoch": 0.1484362162458235, "grad_norm": 1.296875, "learning_rate": 1.9857483588401023e-05, "loss": 1.1628, "step": 1355 }, { "epoch": 0.14898395136112177, "grad_norm": 1.234375, "learning_rate": 1.9854248897558247e-05, "loss": 1.1522, "step": 1360 }, { "epoch": 0.14953168647642, "grad_norm": 1.2109375, "learning_rate": 1.985097817806346e-05, "loss": 1.1907, "step": 1365 }, { "epoch": 0.15007942159171825, "grad_norm": 1.28125, "learning_rate": 1.9847671441874907e-05, "loss": 1.1937, "step": 1370 }, { "epoch": 0.15062715670701649, "grad_norm": 1.28125, "learning_rate": 1.9844328701082532e-05, "loss": 1.1997, "step": 1375 }, { "epoch": 0.15117489182231472, "grad_norm": 1.2265625, "learning_rate": 1.9840949967907906e-05, "loss": 1.1507, "step": 1380 }, { "epoch": 0.15172262693761296, "grad_norm": 1.2265625, "learning_rate": 1.98375352547042e-05, "loss": 1.1952, "step": 1385 }, { "epoch": 0.15227036205291122, "grad_norm": 1.203125, "learning_rate": 1.983408457395613e-05, "loss": 1.1519, "step": 1390 }, { "epoch": 0.15281809716820946, "grad_norm": 1.390625, "learning_rate": 1.9830597938279915e-05, "loss": 1.2213, "step": 1395 }, { "epoch": 0.1533658322835077, "grad_norm": 1.2421875, "learning_rate": 1.9827075360423236e-05, "loss": 1.168, "step": 1400 }, { "epoch": 0.15391356739880593, "grad_norm": 1.265625, "learning_rate": 1.982351685326518e-05, "loss": 1.1586, "step": 1405 }, { "epoch": 0.15446130251410417, "grad_norm": 1.25, "learning_rate": 1.9819922429816193e-05, "loss": 1.1292, "step": 1410 }, { "epoch": 0.1550090376294024, "grad_norm": 1.28125, "learning_rate": 1.981629210321805e-05, "loss": 1.2092, "step": 1415 }, { "epoch": 0.15555677274470067, "grad_norm": 1.2421875, "learning_rate": 1.9812625886743775e-05, "loss": 1.1997, "step": 1420 }, { "epoch": 0.1561045078599989, "grad_norm": 1.2890625, "learning_rate": 1.980892379379762e-05, "loss": 1.1885, "step": 1425 }, { "epoch": 0.15665224297529715, "grad_norm": 1.203125, "learning_rate": 1.9805185837915014e-05, "loss": 1.1733, "step": 1430 }, { "epoch": 0.15719997809059538, "grad_norm": 1.2734375, "learning_rate": 1.9801412032762495e-05, "loss": 1.1742, "step": 1435 }, { "epoch": 0.15774771320589362, "grad_norm": 1.21875, "learning_rate": 1.9797602392137678e-05, "loss": 1.1946, "step": 1440 }, { "epoch": 0.15829544832119188, "grad_norm": 1.25, "learning_rate": 1.9793756929969195e-05, "loss": 1.1446, "step": 1445 }, { "epoch": 0.15884318343649012, "grad_norm": 1.2421875, "learning_rate": 1.978987566031665e-05, "loss": 1.2024, "step": 1450 }, { "epoch": 0.15939091855178836, "grad_norm": 1.2421875, "learning_rate": 1.9785958597370557e-05, "loss": 1.194, "step": 1455 }, { "epoch": 0.1599386536670866, "grad_norm": 1.2578125, "learning_rate": 1.9782005755452306e-05, "loss": 1.1704, "step": 1460 }, { "epoch": 0.16048638878238483, "grad_norm": 1.21875, "learning_rate": 1.9778017149014098e-05, "loss": 1.2247, "step": 1465 }, { "epoch": 0.16103412389768307, "grad_norm": 1.2109375, "learning_rate": 1.977399279263889e-05, "loss": 1.1891, "step": 1470 }, { "epoch": 0.16158185901298133, "grad_norm": 1.4140625, "learning_rate": 1.9769932701040343e-05, "loss": 1.1585, "step": 1475 }, { "epoch": 0.16212959412827957, "grad_norm": 1.203125, "learning_rate": 1.9765836889062788e-05, "loss": 1.1668, "step": 1480 }, { "epoch": 0.1626773292435778, "grad_norm": 1.2578125, "learning_rate": 1.9761705371681138e-05, "loss": 1.2052, "step": 1485 }, { "epoch": 0.16322506435887604, "grad_norm": 1.4296875, "learning_rate": 1.975753816400086e-05, "loss": 1.1191, "step": 1490 }, { "epoch": 0.16377279947417428, "grad_norm": 1.328125, "learning_rate": 1.975333528125791e-05, "loss": 1.1966, "step": 1495 }, { "epoch": 0.16432053458947254, "grad_norm": 1.203125, "learning_rate": 1.9749096738818663e-05, "loss": 1.1594, "step": 1500 }, { "epoch": 0.16486826970477078, "grad_norm": 1.3359375, "learning_rate": 1.9744822552179895e-05, "loss": 1.2079, "step": 1505 }, { "epoch": 0.16541600482006902, "grad_norm": 1.3046875, "learning_rate": 1.9740512736968688e-05, "loss": 1.1655, "step": 1510 }, { "epoch": 0.16596373993536725, "grad_norm": 1.21875, "learning_rate": 1.9736167308942385e-05, "loss": 1.1866, "step": 1515 }, { "epoch": 0.1665114750506655, "grad_norm": 1.1953125, "learning_rate": 1.973178628398855e-05, "loss": 1.1686, "step": 1520 }, { "epoch": 0.16705921016596373, "grad_norm": 1.2265625, "learning_rate": 1.9727369678124876e-05, "loss": 1.1725, "step": 1525 }, { "epoch": 0.167606945281262, "grad_norm": 1.296875, "learning_rate": 1.9722917507499154e-05, "loss": 1.2279, "step": 1530 }, { "epoch": 0.16815468039656023, "grad_norm": 1.2421875, "learning_rate": 1.9718429788389214e-05, "loss": 1.2022, "step": 1535 }, { "epoch": 0.16870241551185847, "grad_norm": 1.2109375, "learning_rate": 1.971390653720284e-05, "loss": 1.2493, "step": 1540 }, { "epoch": 0.1692501506271567, "grad_norm": 1.2890625, "learning_rate": 1.9709347770477743e-05, "loss": 1.1685, "step": 1545 }, { "epoch": 0.16979788574245494, "grad_norm": 1.2578125, "learning_rate": 1.9704753504881476e-05, "loss": 1.2464, "step": 1550 }, { "epoch": 0.17034562085775318, "grad_norm": 1.1875, "learning_rate": 1.9700123757211372e-05, "loss": 1.1843, "step": 1555 }, { "epoch": 0.17089335597305144, "grad_norm": 1.28125, "learning_rate": 1.969545854439451e-05, "loss": 1.1613, "step": 1560 }, { "epoch": 0.17144109108834968, "grad_norm": 1.2421875, "learning_rate": 1.9690757883487626e-05, "loss": 1.174, "step": 1565 }, { "epoch": 0.17198882620364792, "grad_norm": 1.28125, "learning_rate": 1.9686021791677055e-05, "loss": 1.2428, "step": 1570 }, { "epoch": 0.17253656131894615, "grad_norm": 1.1796875, "learning_rate": 1.9681250286278685e-05, "loss": 1.1463, "step": 1575 }, { "epoch": 0.1730842964342444, "grad_norm": 1.265625, "learning_rate": 1.9676443384737873e-05, "loss": 1.2007, "step": 1580 }, { "epoch": 0.17363203154954265, "grad_norm": 1.234375, "learning_rate": 1.9671601104629388e-05, "loss": 1.1912, "step": 1585 }, { "epoch": 0.1741797666648409, "grad_norm": 1.1953125, "learning_rate": 1.9666723463657357e-05, "loss": 1.2221, "step": 1590 }, { "epoch": 0.17472750178013913, "grad_norm": 1.3203125, "learning_rate": 1.9661810479655184e-05, "loss": 1.2012, "step": 1595 }, { "epoch": 0.17527523689543736, "grad_norm": 1.1953125, "learning_rate": 1.9656862170585494e-05, "loss": 1.1746, "step": 1600 }, { "epoch": 0.1758229720107356, "grad_norm": 1.203125, "learning_rate": 1.965187855454007e-05, "loss": 1.1739, "step": 1605 }, { "epoch": 0.17637070712603384, "grad_norm": 1.265625, "learning_rate": 1.964685964973978e-05, "loss": 1.175, "step": 1610 }, { "epoch": 0.1769184422413321, "grad_norm": 1.21875, "learning_rate": 1.9641805474534514e-05, "loss": 1.1344, "step": 1615 }, { "epoch": 0.17746617735663034, "grad_norm": 1.2265625, "learning_rate": 1.963671604740311e-05, "loss": 1.1242, "step": 1620 }, { "epoch": 0.17801391247192858, "grad_norm": 1.3359375, "learning_rate": 1.9631591386953303e-05, "loss": 1.1899, "step": 1625 }, { "epoch": 0.1785616475872268, "grad_norm": 1.25, "learning_rate": 1.9626431511921638e-05, "loss": 1.2024, "step": 1630 }, { "epoch": 0.17910938270252505, "grad_norm": 1.3125, "learning_rate": 1.9621236441173414e-05, "loss": 1.1714, "step": 1635 }, { "epoch": 0.17965711781782331, "grad_norm": 1.2421875, "learning_rate": 1.9616006193702608e-05, "loss": 1.1386, "step": 1640 }, { "epoch": 0.18020485293312155, "grad_norm": 1.2265625, "learning_rate": 1.9610740788631816e-05, "loss": 1.1612, "step": 1645 }, { "epoch": 0.1807525880484198, "grad_norm": 1.2109375, "learning_rate": 1.9605440245212165e-05, "loss": 1.1568, "step": 1650 }, { "epoch": 0.18130032316371802, "grad_norm": 1.1484375, "learning_rate": 1.960010458282326e-05, "loss": 1.145, "step": 1655 }, { "epoch": 0.18184805827901626, "grad_norm": 1.2578125, "learning_rate": 1.9594733820973105e-05, "loss": 1.1805, "step": 1660 }, { "epoch": 0.1823957933943145, "grad_norm": 1.1875, "learning_rate": 1.958932797929803e-05, "loss": 1.1542, "step": 1665 }, { "epoch": 0.18294352850961276, "grad_norm": 1.21875, "learning_rate": 1.958388707756263e-05, "loss": 1.1288, "step": 1670 }, { "epoch": 0.183491263624911, "grad_norm": 1.171875, "learning_rate": 1.957841113565967e-05, "loss": 1.2251, "step": 1675 }, { "epoch": 0.18403899874020924, "grad_norm": 1.1953125, "learning_rate": 1.957290017361005e-05, "loss": 1.166, "step": 1680 }, { "epoch": 0.18458673385550747, "grad_norm": 1.2265625, "learning_rate": 1.9567354211562693e-05, "loss": 1.179, "step": 1685 }, { "epoch": 0.1851344689708057, "grad_norm": 1.21875, "learning_rate": 1.9561773269794484e-05, "loss": 1.1975, "step": 1690 }, { "epoch": 0.18568220408610395, "grad_norm": 1.265625, "learning_rate": 1.955615736871021e-05, "loss": 1.188, "step": 1695 }, { "epoch": 0.1862299392014022, "grad_norm": 1.265625, "learning_rate": 1.955050652884247e-05, "loss": 1.2116, "step": 1700 }, { "epoch": 0.18677767431670045, "grad_norm": 1.2578125, "learning_rate": 1.9544820770851608e-05, "loss": 1.1793, "step": 1705 }, { "epoch": 0.18732540943199869, "grad_norm": 1.234375, "learning_rate": 1.9539100115525625e-05, "loss": 1.1777, "step": 1710 }, { "epoch": 0.18787314454729692, "grad_norm": 1.203125, "learning_rate": 1.9533344583780124e-05, "loss": 1.1605, "step": 1715 }, { "epoch": 0.18842087966259516, "grad_norm": 1.2890625, "learning_rate": 1.952755419665821e-05, "loss": 1.2486, "step": 1720 }, { "epoch": 0.18896861477789342, "grad_norm": 1.2578125, "learning_rate": 1.9521728975330436e-05, "loss": 1.216, "step": 1725 }, { "epoch": 0.18951634989319166, "grad_norm": 1.203125, "learning_rate": 1.951586894109471e-05, "loss": 1.1769, "step": 1730 }, { "epoch": 0.1900640850084899, "grad_norm": 1.2265625, "learning_rate": 1.9509974115376208e-05, "loss": 1.1962, "step": 1735 }, { "epoch": 0.19061182012378813, "grad_norm": 1.234375, "learning_rate": 1.9504044519727333e-05, "loss": 1.1603, "step": 1740 }, { "epoch": 0.19115955523908637, "grad_norm": 1.25, "learning_rate": 1.9498080175827598e-05, "loss": 1.1867, "step": 1745 }, { "epoch": 0.1917072903543846, "grad_norm": 1.2578125, "learning_rate": 1.949208110548356e-05, "loss": 1.1531, "step": 1750 }, { "epoch": 0.19225502546968287, "grad_norm": 1.2265625, "learning_rate": 1.9486047330628745e-05, "loss": 1.1891, "step": 1755 }, { "epoch": 0.1928027605849811, "grad_norm": 1.1796875, "learning_rate": 1.9479978873323565e-05, "loss": 1.1366, "step": 1760 }, { "epoch": 0.19335049570027935, "grad_norm": 1.3046875, "learning_rate": 1.9473875755755235e-05, "loss": 1.1604, "step": 1765 }, { "epoch": 0.19389823081557758, "grad_norm": 1.28125, "learning_rate": 1.9467738000237685e-05, "loss": 1.1663, "step": 1770 }, { "epoch": 0.19444596593087582, "grad_norm": 1.2734375, "learning_rate": 1.946156562921151e-05, "loss": 1.1747, "step": 1775 }, { "epoch": 0.19499370104617408, "grad_norm": 1.2578125, "learning_rate": 1.9455358665243836e-05, "loss": 1.1973, "step": 1780 }, { "epoch": 0.19554143616147232, "grad_norm": 1.203125, "learning_rate": 1.9449117131028282e-05, "loss": 1.1809, "step": 1785 }, { "epoch": 0.19608917127677056, "grad_norm": 1.203125, "learning_rate": 1.9442841049384865e-05, "loss": 1.2348, "step": 1790 }, { "epoch": 0.1966369063920688, "grad_norm": 1.25, "learning_rate": 1.94365304432599e-05, "loss": 1.1703, "step": 1795 }, { "epoch": 0.19718464150736703, "grad_norm": 1.2265625, "learning_rate": 1.9430185335725942e-05, "loss": 1.1491, "step": 1800 }, { "epoch": 0.19773237662266527, "grad_norm": 1.1796875, "learning_rate": 1.9423805749981673e-05, "loss": 1.143, "step": 1805 }, { "epoch": 0.19828011173796353, "grad_norm": 1.2578125, "learning_rate": 1.9417391709351845e-05, "loss": 1.1962, "step": 1810 }, { "epoch": 0.19882784685326177, "grad_norm": 1.2890625, "learning_rate": 1.9410943237287178e-05, "loss": 1.2093, "step": 1815 }, { "epoch": 0.19937558196856, "grad_norm": 1.203125, "learning_rate": 1.9404460357364282e-05, "loss": 1.165, "step": 1820 }, { "epoch": 0.19992331708385824, "grad_norm": 1.2265625, "learning_rate": 1.939794309328556e-05, "loss": 1.1203, "step": 1825 }, { "epoch": 0.20047105219915648, "grad_norm": 1.2578125, "learning_rate": 1.939139146887914e-05, "loss": 1.1476, "step": 1830 }, { "epoch": 0.20101878731445472, "grad_norm": 1.25, "learning_rate": 1.9384805508098763e-05, "loss": 1.1903, "step": 1835 }, { "epoch": 0.20156652242975298, "grad_norm": 1.2265625, "learning_rate": 1.937818523502372e-05, "loss": 1.21, "step": 1840 }, { "epoch": 0.20211425754505122, "grad_norm": 1.2890625, "learning_rate": 1.9371530673858753e-05, "loss": 1.1682, "step": 1845 }, { "epoch": 0.20266199266034945, "grad_norm": 1.265625, "learning_rate": 1.936484184893395e-05, "loss": 1.1534, "step": 1850 }, { "epoch": 0.2032097277756477, "grad_norm": 1.15625, "learning_rate": 1.93581187847047e-05, "loss": 1.136, "step": 1855 }, { "epoch": 0.20375746289094593, "grad_norm": 1.328125, "learning_rate": 1.9351361505751554e-05, "loss": 1.1976, "step": 1860 }, { "epoch": 0.2043051980062442, "grad_norm": 1.21875, "learning_rate": 1.9344570036780165e-05, "loss": 1.1293, "step": 1865 }, { "epoch": 0.20485293312154243, "grad_norm": 1.2265625, "learning_rate": 1.9337744402621194e-05, "loss": 1.1543, "step": 1870 }, { "epoch": 0.20540066823684067, "grad_norm": 1.2421875, "learning_rate": 1.9330884628230203e-05, "loss": 1.2295, "step": 1875 }, { "epoch": 0.2059484033521389, "grad_norm": 1.359375, "learning_rate": 1.932399073868759e-05, "loss": 1.1531, "step": 1880 }, { "epoch": 0.20649613846743714, "grad_norm": 1.2578125, "learning_rate": 1.9317062759198478e-05, "loss": 1.1634, "step": 1885 }, { "epoch": 0.20704387358273538, "grad_norm": 1.3046875, "learning_rate": 1.9310100715092624e-05, "loss": 1.1359, "step": 1890 }, { "epoch": 0.20759160869803364, "grad_norm": 1.2265625, "learning_rate": 1.9303104631824338e-05, "loss": 1.1439, "step": 1895 }, { "epoch": 0.20813934381333188, "grad_norm": 1.2578125, "learning_rate": 1.929607453497237e-05, "loss": 1.16, "step": 1900 }, { "epoch": 0.20868707892863012, "grad_norm": 1.234375, "learning_rate": 1.9289010450239843e-05, "loss": 1.1588, "step": 1905 }, { "epoch": 0.20923481404392835, "grad_norm": 1.21875, "learning_rate": 1.928191240345414e-05, "loss": 1.1687, "step": 1910 }, { "epoch": 0.2097825491592266, "grad_norm": 1.25, "learning_rate": 1.9274780420566813e-05, "loss": 1.1872, "step": 1915 }, { "epoch": 0.21033028427452485, "grad_norm": 1.1953125, "learning_rate": 1.926761452765349e-05, "loss": 1.1762, "step": 1920 }, { "epoch": 0.2108780193898231, "grad_norm": 1.2265625, "learning_rate": 1.926041475091377e-05, "loss": 1.1131, "step": 1925 }, { "epoch": 0.21142575450512133, "grad_norm": 1.1953125, "learning_rate": 1.925318111667116e-05, "loss": 1.1835, "step": 1930 }, { "epoch": 0.21197348962041956, "grad_norm": 1.21875, "learning_rate": 1.9245913651372935e-05, "loss": 1.2117, "step": 1935 }, { "epoch": 0.2125212247357178, "grad_norm": 1.2265625, "learning_rate": 1.923861238159007e-05, "loss": 1.1396, "step": 1940 }, { "epoch": 0.21306895985101604, "grad_norm": 1.2421875, "learning_rate": 1.9231277334017126e-05, "loss": 1.1932, "step": 1945 }, { "epoch": 0.2136166949663143, "grad_norm": 1.21875, "learning_rate": 1.9223908535472172e-05, "loss": 1.1839, "step": 1950 }, { "epoch": 0.21416443008161254, "grad_norm": 1.2890625, "learning_rate": 1.921650601289667e-05, "loss": 1.1628, "step": 1955 }, { "epoch": 0.21471216519691078, "grad_norm": 1.2109375, "learning_rate": 1.9209069793355382e-05, "loss": 1.1491, "step": 1960 }, { "epoch": 0.215259900312209, "grad_norm": 1.2421875, "learning_rate": 1.9201599904036276e-05, "loss": 1.186, "step": 1965 }, { "epoch": 0.21580763542750725, "grad_norm": 1.2734375, "learning_rate": 1.919409637225041e-05, "loss": 1.1556, "step": 1970 }, { "epoch": 0.2163553705428055, "grad_norm": 1.1953125, "learning_rate": 1.9186559225431857e-05, "loss": 1.1598, "step": 1975 }, { "epoch": 0.21690310565810375, "grad_norm": 1.1953125, "learning_rate": 1.917898849113759e-05, "loss": 1.2076, "step": 1980 }, { "epoch": 0.217450840773402, "grad_norm": 1.234375, "learning_rate": 1.9171384197047376e-05, "loss": 1.1219, "step": 1985 }, { "epoch": 0.21799857588870022, "grad_norm": 1.2109375, "learning_rate": 1.9163746370963687e-05, "loss": 1.1347, "step": 1990 }, { "epoch": 0.21854631100399846, "grad_norm": 1.234375, "learning_rate": 1.9156075040811585e-05, "loss": 1.1717, "step": 1995 }, { "epoch": 0.2190940461192967, "grad_norm": 1.2265625, "learning_rate": 1.9148370234638646e-05, "loss": 1.1705, "step": 2000 }, { "epoch": 0.21964178123459496, "grad_norm": 1.2265625, "learning_rate": 1.914063198061482e-05, "loss": 1.1666, "step": 2005 }, { "epoch": 0.2201895163498932, "grad_norm": 1.25, "learning_rate": 1.9132860307032362e-05, "loss": 1.1711, "step": 2010 }, { "epoch": 0.22073725146519144, "grad_norm": 1.21875, "learning_rate": 1.9125055242305702e-05, "loss": 1.1176, "step": 2015 }, { "epoch": 0.22128498658048967, "grad_norm": 1.2578125, "learning_rate": 1.9117216814971365e-05, "loss": 1.1716, "step": 2020 }, { "epoch": 0.2218327216957879, "grad_norm": 1.2265625, "learning_rate": 1.9109345053687845e-05, "loss": 1.1684, "step": 2025 }, { "epoch": 0.22238045681108615, "grad_norm": 1.3125, "learning_rate": 1.9101439987235514e-05, "loss": 1.1766, "step": 2030 }, { "epoch": 0.2229281919263844, "grad_norm": 1.1953125, "learning_rate": 1.909350164451651e-05, "loss": 1.21, "step": 2035 }, { "epoch": 0.22347592704168265, "grad_norm": 1.28125, "learning_rate": 1.9085530054554642e-05, "loss": 1.2172, "step": 2040 }, { "epoch": 0.22402366215698089, "grad_norm": 1.21875, "learning_rate": 1.907752524649527e-05, "loss": 1.1885, "step": 2045 }, { "epoch": 0.22457139727227912, "grad_norm": 1.25, "learning_rate": 1.9069487249605202e-05, "loss": 1.1708, "step": 2050 }, { "epoch": 0.22511913238757736, "grad_norm": 1.1953125, "learning_rate": 1.90614160932726e-05, "loss": 1.1645, "step": 2055 }, { "epoch": 0.2256668675028756, "grad_norm": 1.25, "learning_rate": 1.9053311807006845e-05, "loss": 1.1632, "step": 2060 }, { "epoch": 0.22621460261817386, "grad_norm": 1.21875, "learning_rate": 1.9045174420438465e-05, "loss": 1.1619, "step": 2065 }, { "epoch": 0.2267623377334721, "grad_norm": 1.25, "learning_rate": 1.9037003963318993e-05, "loss": 1.1658, "step": 2070 }, { "epoch": 0.22731007284877033, "grad_norm": 1.34375, "learning_rate": 1.9028800465520878e-05, "loss": 1.1941, "step": 2075 }, { "epoch": 0.22785780796406857, "grad_norm": 1.203125, "learning_rate": 1.9020563957037378e-05, "loss": 1.1463, "step": 2080 }, { "epoch": 0.2284055430793668, "grad_norm": 1.2421875, "learning_rate": 1.901229446798243e-05, "loss": 1.16, "step": 2085 }, { "epoch": 0.22895327819466507, "grad_norm": 1.234375, "learning_rate": 1.9003992028590568e-05, "loss": 1.1567, "step": 2090 }, { "epoch": 0.2295010133099633, "grad_norm": 1.265625, "learning_rate": 1.8995656669216786e-05, "loss": 1.2039, "step": 2095 }, { "epoch": 0.23004874842526155, "grad_norm": 1.28125, "learning_rate": 1.898728842033644e-05, "loss": 1.1733, "step": 2100 }, { "epoch": 0.23059648354055978, "grad_norm": 1.2421875, "learning_rate": 1.8978887312545135e-05, "loss": 1.1702, "step": 2105 }, { "epoch": 0.23114421865585802, "grad_norm": 1.2109375, "learning_rate": 1.897045337655862e-05, "loss": 1.2218, "step": 2110 }, { "epoch": 0.23169195377115626, "grad_norm": 1.2265625, "learning_rate": 1.8961986643212664e-05, "loss": 1.2084, "step": 2115 }, { "epoch": 0.23223968888645452, "grad_norm": 1.2109375, "learning_rate": 1.895348714346294e-05, "loss": 1.1865, "step": 2120 }, { "epoch": 0.23278742400175276, "grad_norm": 1.265625, "learning_rate": 1.894495490838494e-05, "loss": 1.2198, "step": 2125 }, { "epoch": 0.233335159117051, "grad_norm": 1.1953125, "learning_rate": 1.8936389969173825e-05, "loss": 1.134, "step": 2130 }, { "epoch": 0.23388289423234923, "grad_norm": 1.2421875, "learning_rate": 1.892779235714433e-05, "loss": 1.1399, "step": 2135 }, { "epoch": 0.23443062934764747, "grad_norm": 1.21875, "learning_rate": 1.8919162103730656e-05, "loss": 1.1123, "step": 2140 }, { "epoch": 0.23497836446294573, "grad_norm": 1.25, "learning_rate": 1.891049924048633e-05, "loss": 1.1695, "step": 2145 }, { "epoch": 0.23552609957824397, "grad_norm": 1.2578125, "learning_rate": 1.8901803799084117e-05, "loss": 1.1359, "step": 2150 }, { "epoch": 0.2360738346935422, "grad_norm": 1.3046875, "learning_rate": 1.8893075811315894e-05, "loss": 1.2111, "step": 2155 }, { "epoch": 0.23662156980884044, "grad_norm": 1.2265625, "learning_rate": 1.888431530909253e-05, "loss": 1.1088, "step": 2160 }, { "epoch": 0.23716930492413868, "grad_norm": 1.2734375, "learning_rate": 1.8875522324443762e-05, "loss": 1.1838, "step": 2165 }, { "epoch": 0.23771704003943692, "grad_norm": 1.2109375, "learning_rate": 1.8866696889518107e-05, "loss": 1.1323, "step": 2170 }, { "epoch": 0.23826477515473518, "grad_norm": 1.2578125, "learning_rate": 1.8857839036582707e-05, "loss": 1.1296, "step": 2175 }, { "epoch": 0.23881251027003342, "grad_norm": 1.1953125, "learning_rate": 1.8848948798023238e-05, "loss": 1.1617, "step": 2180 }, { "epoch": 0.23936024538533165, "grad_norm": 1.203125, "learning_rate": 1.8840026206343786e-05, "loss": 1.1627, "step": 2185 }, { "epoch": 0.2399079805006299, "grad_norm": 1.2265625, "learning_rate": 1.8831071294166717e-05, "loss": 1.1173, "step": 2190 }, { "epoch": 0.24045571561592813, "grad_norm": 1.28125, "learning_rate": 1.882208409423257e-05, "loss": 1.212, "step": 2195 }, { "epoch": 0.24100345073122637, "grad_norm": 1.28125, "learning_rate": 1.8813064639399933e-05, "loss": 1.1543, "step": 2200 }, { "epoch": 0.24155118584652463, "grad_norm": 1.2578125, "learning_rate": 1.8804012962645322e-05, "loss": 1.1847, "step": 2205 }, { "epoch": 0.24209892096182287, "grad_norm": 1.2109375, "learning_rate": 1.8794929097063062e-05, "loss": 1.2248, "step": 2210 }, { "epoch": 0.2426466560771211, "grad_norm": 1.234375, "learning_rate": 1.8785813075865164e-05, "loss": 1.2115, "step": 2215 }, { "epoch": 0.24319439119241934, "grad_norm": 1.2265625, "learning_rate": 1.8776664932381208e-05, "loss": 1.1039, "step": 2220 }, { "epoch": 0.24374212630771758, "grad_norm": 1.203125, "learning_rate": 1.8767484700058212e-05, "loss": 1.1608, "step": 2225 }, { "epoch": 0.24428986142301584, "grad_norm": 1.2578125, "learning_rate": 1.875827241246052e-05, "loss": 1.1998, "step": 2230 }, { "epoch": 0.24483759653831408, "grad_norm": 1.296875, "learning_rate": 1.874902810326968e-05, "loss": 1.2347, "step": 2235 }, { "epoch": 0.24538533165361232, "grad_norm": 1.3203125, "learning_rate": 1.873975180628431e-05, "loss": 1.149, "step": 2240 }, { "epoch": 0.24593306676891055, "grad_norm": 1.15625, "learning_rate": 1.873044355541997e-05, "loss": 1.1524, "step": 2245 }, { "epoch": 0.2464808018842088, "grad_norm": 1.21875, "learning_rate": 1.872110338470907e-05, "loss": 1.1628, "step": 2250 }, { "epoch": 0.24702853699950703, "grad_norm": 1.1796875, "learning_rate": 1.8711731328300715e-05, "loss": 1.201, "step": 2255 }, { "epoch": 0.2475762721148053, "grad_norm": 1.2421875, "learning_rate": 1.8702327420460575e-05, "loss": 1.1418, "step": 2260 }, { "epoch": 0.24812400723010353, "grad_norm": 1.171875, "learning_rate": 1.8692891695570795e-05, "loss": 1.1357, "step": 2265 }, { "epoch": 0.24867174234540176, "grad_norm": 1.34375, "learning_rate": 1.8683424188129836e-05, "loss": 1.1994, "step": 2270 }, { "epoch": 0.2492194774607, "grad_norm": 1.21875, "learning_rate": 1.8673924932752366e-05, "loss": 1.1592, "step": 2275 }, { "epoch": 0.24976721257599824, "grad_norm": 1.2578125, "learning_rate": 1.8664393964169125e-05, "loss": 1.1777, "step": 2280 }, { "epoch": 0.2503149476912965, "grad_norm": 1.2734375, "learning_rate": 1.86548313172268e-05, "loss": 1.1569, "step": 2285 }, { "epoch": 0.25086268280659474, "grad_norm": 1.1796875, "learning_rate": 1.8645237026887896e-05, "loss": 1.162, "step": 2290 }, { "epoch": 0.25141041792189295, "grad_norm": 1.2265625, "learning_rate": 1.8635611128230632e-05, "loss": 1.1158, "step": 2295 }, { "epoch": 0.2519581530371912, "grad_norm": 1.2109375, "learning_rate": 1.8625953656448764e-05, "loss": 1.1439, "step": 2300 }, { "epoch": 0.2525058881524895, "grad_norm": 1.1953125, "learning_rate": 1.86162646468515e-05, "loss": 1.1504, "step": 2305 }, { "epoch": 0.2530536232677877, "grad_norm": 1.1875, "learning_rate": 1.8606544134863355e-05, "loss": 1.1322, "step": 2310 }, { "epoch": 0.25360135838308595, "grad_norm": 1.203125, "learning_rate": 1.8596792156024014e-05, "loss": 1.2144, "step": 2315 }, { "epoch": 0.25414909349838416, "grad_norm": 1.1875, "learning_rate": 1.858700874598821e-05, "loss": 1.1363, "step": 2320 }, { "epoch": 0.2546968286136824, "grad_norm": 1.1953125, "learning_rate": 1.8577193940525608e-05, "loss": 1.1982, "step": 2325 }, { "epoch": 0.2552445637289807, "grad_norm": 1.25, "learning_rate": 1.8567347775520642e-05, "loss": 1.1562, "step": 2330 }, { "epoch": 0.2557922988442789, "grad_norm": 1.25, "learning_rate": 1.8557470286972405e-05, "loss": 1.1552, "step": 2335 }, { "epoch": 0.25634003395957716, "grad_norm": 1.1484375, "learning_rate": 1.8547561510994525e-05, "loss": 1.1051, "step": 2340 }, { "epoch": 0.25688776907487537, "grad_norm": 1.15625, "learning_rate": 1.853762148381501e-05, "loss": 1.1806, "step": 2345 }, { "epoch": 0.25743550419017364, "grad_norm": 1.28125, "learning_rate": 1.8527650241776128e-05, "loss": 1.1662, "step": 2350 }, { "epoch": 0.2579832393054719, "grad_norm": 1.2265625, "learning_rate": 1.8517647821334278e-05, "loss": 1.2243, "step": 2355 }, { "epoch": 0.2585309744207701, "grad_norm": 1.2890625, "learning_rate": 1.8507614259059847e-05, "loss": 1.1888, "step": 2360 }, { "epoch": 0.2590787095360684, "grad_norm": 1.234375, "learning_rate": 1.849754959163709e-05, "loss": 1.1542, "step": 2365 }, { "epoch": 0.2596264446513666, "grad_norm": 1.203125, "learning_rate": 1.848745385586398e-05, "loss": 1.1973, "step": 2370 }, { "epoch": 0.26017417976666485, "grad_norm": 1.1953125, "learning_rate": 1.8477327088652078e-05, "loss": 1.1869, "step": 2375 }, { "epoch": 0.26072191488196306, "grad_norm": 1.1953125, "learning_rate": 1.846716932702641e-05, "loss": 1.1914, "step": 2380 }, { "epoch": 0.2612696499972613, "grad_norm": 1.2109375, "learning_rate": 1.8456980608125317e-05, "loss": 1.1582, "step": 2385 }, { "epoch": 0.2618173851125596, "grad_norm": 1.2109375, "learning_rate": 1.8446760969200318e-05, "loss": 1.1917, "step": 2390 }, { "epoch": 0.2623651202278578, "grad_norm": 1.2109375, "learning_rate": 1.8436510447615997e-05, "loss": 1.1456, "step": 2395 }, { "epoch": 0.26291285534315606, "grad_norm": 1.21875, "learning_rate": 1.8426229080849833e-05, "loss": 1.1333, "step": 2400 }, { "epoch": 0.26346059045845427, "grad_norm": 1.234375, "learning_rate": 1.8415916906492093e-05, "loss": 1.1286, "step": 2405 }, { "epoch": 0.26400832557375253, "grad_norm": 1.2265625, "learning_rate": 1.8405573962245666e-05, "loss": 1.1542, "step": 2410 }, { "epoch": 0.2645560606890508, "grad_norm": 1.2109375, "learning_rate": 1.839520028592596e-05, "loss": 1.1858, "step": 2415 }, { "epoch": 0.265103795804349, "grad_norm": 1.2109375, "learning_rate": 1.838479591546072e-05, "loss": 1.1588, "step": 2420 }, { "epoch": 0.26565153091964727, "grad_norm": 1.21875, "learning_rate": 1.8374360888889943e-05, "loss": 1.174, "step": 2425 }, { "epoch": 0.2661992660349455, "grad_norm": 1.234375, "learning_rate": 1.8363895244365687e-05, "loss": 1.2234, "step": 2430 }, { "epoch": 0.26674700115024375, "grad_norm": 1.1796875, "learning_rate": 1.8353399020151954e-05, "loss": 1.1582, "step": 2435 }, { "epoch": 0.267294736265542, "grad_norm": 1.2421875, "learning_rate": 1.8342872254624565e-05, "loss": 1.1537, "step": 2440 }, { "epoch": 0.2678424713808402, "grad_norm": 1.1953125, "learning_rate": 1.8332314986270994e-05, "loss": 1.154, "step": 2445 }, { "epoch": 0.2683902064961385, "grad_norm": 1.15625, "learning_rate": 1.832172725369024e-05, "loss": 1.1304, "step": 2450 }, { "epoch": 0.2689379416114367, "grad_norm": 1.234375, "learning_rate": 1.831110909559269e-05, "loss": 1.1849, "step": 2455 }, { "epoch": 0.26948567672673496, "grad_norm": 1.203125, "learning_rate": 1.830046055079995e-05, "loss": 1.1697, "step": 2460 }, { "epoch": 0.27003341184203317, "grad_norm": 1.15625, "learning_rate": 1.8289781658244757e-05, "loss": 1.1658, "step": 2465 }, { "epoch": 0.27058114695733143, "grad_norm": 1.1953125, "learning_rate": 1.827907245697078e-05, "loss": 1.1697, "step": 2470 }, { "epoch": 0.2711288820726297, "grad_norm": 1.1875, "learning_rate": 1.826833298613251e-05, "loss": 1.1042, "step": 2475 }, { "epoch": 0.2716766171879279, "grad_norm": 1.265625, "learning_rate": 1.825756328499511e-05, "loss": 1.2129, "step": 2480 }, { "epoch": 0.27222435230322617, "grad_norm": 1.453125, "learning_rate": 1.8246763392934256e-05, "loss": 1.2177, "step": 2485 }, { "epoch": 0.2727720874185244, "grad_norm": 1.2265625, "learning_rate": 1.8235933349436025e-05, "loss": 1.134, "step": 2490 }, { "epoch": 0.27331982253382264, "grad_norm": 1.234375, "learning_rate": 1.8225073194096728e-05, "loss": 1.1721, "step": 2495 }, { "epoch": 0.2738675576491209, "grad_norm": 1.25, "learning_rate": 1.8214182966622758e-05, "loss": 1.2035, "step": 2500 }, { "epoch": 0.2744152927644191, "grad_norm": 1.234375, "learning_rate": 1.8203262706830467e-05, "loss": 1.1489, "step": 2505 }, { "epoch": 0.2749630278797174, "grad_norm": 1.21875, "learning_rate": 1.8192312454646007e-05, "loss": 1.1163, "step": 2510 }, { "epoch": 0.2755107629950156, "grad_norm": 1.2578125, "learning_rate": 1.818133225010519e-05, "loss": 1.1847, "step": 2515 }, { "epoch": 0.27605849811031385, "grad_norm": 1.2265625, "learning_rate": 1.8170322133353328e-05, "loss": 1.2177, "step": 2520 }, { "epoch": 0.2766062332256121, "grad_norm": 1.21875, "learning_rate": 1.815928214464511e-05, "loss": 1.1929, "step": 2525 }, { "epoch": 0.27715396834091033, "grad_norm": 1.2109375, "learning_rate": 1.814821232434444e-05, "loss": 1.1931, "step": 2530 }, { "epoch": 0.2777017034562086, "grad_norm": 1.234375, "learning_rate": 1.8137112712924273e-05, "loss": 1.1286, "step": 2535 }, { "epoch": 0.2782494385715068, "grad_norm": 1.2421875, "learning_rate": 1.812598335096651e-05, "loss": 1.1489, "step": 2540 }, { "epoch": 0.27879717368680507, "grad_norm": 1.1875, "learning_rate": 1.8114824279161806e-05, "loss": 1.1457, "step": 2545 }, { "epoch": 0.2793449088021033, "grad_norm": 1.2578125, "learning_rate": 1.810363553830945e-05, "loss": 1.1604, "step": 2550 }, { "epoch": 0.27989264391740154, "grad_norm": 1.2578125, "learning_rate": 1.8092417169317203e-05, "loss": 1.2167, "step": 2555 }, { "epoch": 0.2804403790326998, "grad_norm": 1.234375, "learning_rate": 1.8081169213201145e-05, "loss": 1.1854, "step": 2560 }, { "epoch": 0.280988114147998, "grad_norm": 1.1953125, "learning_rate": 1.806989171108554e-05, "loss": 1.1755, "step": 2565 }, { "epoch": 0.2815358492632963, "grad_norm": 1.2421875, "learning_rate": 1.8058584704202674e-05, "loss": 1.1673, "step": 2570 }, { "epoch": 0.2820835843785945, "grad_norm": 1.28125, "learning_rate": 1.8047248233892706e-05, "loss": 1.2167, "step": 2575 }, { "epoch": 0.28263131949389275, "grad_norm": 1.203125, "learning_rate": 1.8035882341603518e-05, "loss": 1.1441, "step": 2580 }, { "epoch": 0.283179054609191, "grad_norm": 1.2265625, "learning_rate": 1.8024487068890556e-05, "loss": 1.1872, "step": 2585 }, { "epoch": 0.2837267897244892, "grad_norm": 1.265625, "learning_rate": 1.8013062457416702e-05, "loss": 1.1503, "step": 2590 }, { "epoch": 0.2842745248397875, "grad_norm": 1.21875, "learning_rate": 1.800160854895209e-05, "loss": 1.1635, "step": 2595 }, { "epoch": 0.2848222599550857, "grad_norm": 1.171875, "learning_rate": 1.7990125385373978e-05, "loss": 1.1482, "step": 2600 }, { "epoch": 0.28536999507038396, "grad_norm": 1.2890625, "learning_rate": 1.7978613008666577e-05, "loss": 1.1801, "step": 2605 }, { "epoch": 0.28591773018568223, "grad_norm": 1.21875, "learning_rate": 1.796707146092091e-05, "loss": 1.1781, "step": 2610 }, { "epoch": 0.28646546530098044, "grad_norm": 1.2109375, "learning_rate": 1.7955500784334655e-05, "loss": 1.1593, "step": 2615 }, { "epoch": 0.2870132004162787, "grad_norm": 1.203125, "learning_rate": 1.794390102121199e-05, "loss": 1.1746, "step": 2620 }, { "epoch": 0.2875609355315769, "grad_norm": 1.203125, "learning_rate": 1.7932272213963434e-05, "loss": 1.1758, "step": 2625 }, { "epoch": 0.2881086706468752, "grad_norm": 1.2578125, "learning_rate": 1.7920614405105695e-05, "loss": 1.2296, "step": 2630 }, { "epoch": 0.28865640576217344, "grad_norm": 1.1875, "learning_rate": 1.7908927637261522e-05, "loss": 1.1928, "step": 2635 }, { "epoch": 0.28920414087747165, "grad_norm": 1.3203125, "learning_rate": 1.789721195315954e-05, "loss": 1.1237, "step": 2640 }, { "epoch": 0.2897518759927699, "grad_norm": 1.2109375, "learning_rate": 1.7885467395634087e-05, "loss": 1.1734, "step": 2645 }, { "epoch": 0.2902996111080681, "grad_norm": 1.2421875, "learning_rate": 1.7873694007625084e-05, "loss": 1.1656, "step": 2650 }, { "epoch": 0.2908473462233664, "grad_norm": 1.234375, "learning_rate": 1.786189183217784e-05, "loss": 1.1558, "step": 2655 }, { "epoch": 0.2913950813386646, "grad_norm": 1.1875, "learning_rate": 1.785006091244294e-05, "loss": 1.1529, "step": 2660 }, { "epoch": 0.29194281645396286, "grad_norm": 1.1875, "learning_rate": 1.7838201291676037e-05, "loss": 1.1561, "step": 2665 }, { "epoch": 0.2924905515692611, "grad_norm": 1.21875, "learning_rate": 1.7826313013237744e-05, "loss": 1.1463, "step": 2670 }, { "epoch": 0.29303828668455933, "grad_norm": 1.2109375, "learning_rate": 1.7814396120593428e-05, "loss": 1.1611, "step": 2675 }, { "epoch": 0.2935860217998576, "grad_norm": 1.2109375, "learning_rate": 1.7802450657313086e-05, "loss": 1.1687, "step": 2680 }, { "epoch": 0.2941337569151558, "grad_norm": 1.1953125, "learning_rate": 1.7790476667071175e-05, "loss": 1.1818, "step": 2685 }, { "epoch": 0.2946814920304541, "grad_norm": 1.2109375, "learning_rate": 1.7778474193646448e-05, "loss": 1.1589, "step": 2690 }, { "epoch": 0.29522922714575234, "grad_norm": 1.171875, "learning_rate": 1.776644328092179e-05, "loss": 1.1448, "step": 2695 }, { "epoch": 0.29577696226105055, "grad_norm": 1.25, "learning_rate": 1.7754383972884084e-05, "loss": 1.1354, "step": 2700 }, { "epoch": 0.2963246973763488, "grad_norm": 1.1796875, "learning_rate": 1.7742296313624005e-05, "loss": 1.1309, "step": 2705 }, { "epoch": 0.296872432491647, "grad_norm": 1.1875, "learning_rate": 1.773018034733591e-05, "loss": 1.1921, "step": 2710 }, { "epoch": 0.2974201676069453, "grad_norm": 1.2265625, "learning_rate": 1.771803611831762e-05, "loss": 1.1484, "step": 2715 }, { "epoch": 0.29796790272224355, "grad_norm": 1.1953125, "learning_rate": 1.7705863670970327e-05, "loss": 1.2024, "step": 2720 }, { "epoch": 0.29851563783754176, "grad_norm": 1.234375, "learning_rate": 1.7693663049798363e-05, "loss": 1.1696, "step": 2725 }, { "epoch": 0.29906337295284, "grad_norm": 1.21875, "learning_rate": 1.7681434299409077e-05, "loss": 1.1191, "step": 2730 }, { "epoch": 0.29961110806813823, "grad_norm": 1.2265625, "learning_rate": 1.766917746451267e-05, "loss": 1.1662, "step": 2735 }, { "epoch": 0.3001588431834365, "grad_norm": 1.203125, "learning_rate": 1.7656892589922017e-05, "loss": 1.1653, "step": 2740 }, { "epoch": 0.3007065782987347, "grad_norm": 1.2578125, "learning_rate": 1.7644579720552513e-05, "loss": 1.1145, "step": 2745 }, { "epoch": 0.30125431341403297, "grad_norm": 1.21875, "learning_rate": 1.76322389014219e-05, "loss": 1.1201, "step": 2750 }, { "epoch": 0.30180204852933123, "grad_norm": 1.1953125, "learning_rate": 1.761987017765012e-05, "loss": 1.1349, "step": 2755 }, { "epoch": 0.30234978364462944, "grad_norm": 1.2421875, "learning_rate": 1.7607473594459127e-05, "loss": 1.1937, "step": 2760 }, { "epoch": 0.3028975187599277, "grad_norm": 1.28125, "learning_rate": 1.7595049197172744e-05, "loss": 1.1489, "step": 2765 }, { "epoch": 0.3034452538752259, "grad_norm": 1.265625, "learning_rate": 1.7582597031216476e-05, "loss": 1.1559, "step": 2770 }, { "epoch": 0.3039929889905242, "grad_norm": 1.203125, "learning_rate": 1.7570117142117366e-05, "loss": 1.1413, "step": 2775 }, { "epoch": 0.30454072410582245, "grad_norm": 1.2109375, "learning_rate": 1.7557609575503808e-05, "loss": 1.1424, "step": 2780 }, { "epoch": 0.30508845922112066, "grad_norm": 1.2265625, "learning_rate": 1.754507437710539e-05, "loss": 1.1737, "step": 2785 }, { "epoch": 0.3056361943364189, "grad_norm": 1.1953125, "learning_rate": 1.7532511592752734e-05, "loss": 1.1835, "step": 2790 }, { "epoch": 0.30618392945171713, "grad_norm": 1.2421875, "learning_rate": 1.751992126837731e-05, "loss": 1.2163, "step": 2795 }, { "epoch": 0.3067316645670154, "grad_norm": 1.2109375, "learning_rate": 1.7507303450011287e-05, "loss": 1.1411, "step": 2800 }, { "epoch": 0.30727939968231366, "grad_norm": 1.2109375, "learning_rate": 1.7494658183787344e-05, "loss": 1.2053, "step": 2805 }, { "epoch": 0.30782713479761187, "grad_norm": 1.21875, "learning_rate": 1.7481985515938538e-05, "loss": 1.1699, "step": 2810 }, { "epoch": 0.30837486991291013, "grad_norm": 1.296875, "learning_rate": 1.746928549279808e-05, "loss": 1.1804, "step": 2815 }, { "epoch": 0.30892260502820834, "grad_norm": 1.25, "learning_rate": 1.745655816079922e-05, "loss": 1.1437, "step": 2820 }, { "epoch": 0.3094703401435066, "grad_norm": 1.2421875, "learning_rate": 1.744380356647504e-05, "loss": 1.13, "step": 2825 }, { "epoch": 0.3100180752588048, "grad_norm": 1.265625, "learning_rate": 1.743102175645831e-05, "loss": 1.1917, "step": 2830 }, { "epoch": 0.3105658103741031, "grad_norm": 1.234375, "learning_rate": 1.741821277748128e-05, "loss": 1.1613, "step": 2835 }, { "epoch": 0.31111354548940134, "grad_norm": 1.2421875, "learning_rate": 1.7405376676375567e-05, "loss": 1.1408, "step": 2840 }, { "epoch": 0.31166128060469955, "grad_norm": 1.296875, "learning_rate": 1.7392513500071927e-05, "loss": 1.1696, "step": 2845 }, { "epoch": 0.3122090157199978, "grad_norm": 1.1640625, "learning_rate": 1.737962329560011e-05, "loss": 1.1046, "step": 2850 }, { "epoch": 0.312756750835296, "grad_norm": 1.296875, "learning_rate": 1.7366706110088697e-05, "loss": 1.1929, "step": 2855 }, { "epoch": 0.3133044859505943, "grad_norm": 1.2109375, "learning_rate": 1.73537619907649e-05, "loss": 1.1473, "step": 2860 }, { "epoch": 0.31385222106589256, "grad_norm": 1.171875, "learning_rate": 1.7340790984954425e-05, "loss": 1.098, "step": 2865 }, { "epoch": 0.31439995618119077, "grad_norm": 1.234375, "learning_rate": 1.7327793140081256e-05, "loss": 1.1105, "step": 2870 }, { "epoch": 0.31494769129648903, "grad_norm": 1.1796875, "learning_rate": 1.731476850366752e-05, "loss": 1.186, "step": 2875 }, { "epoch": 0.31549542641178724, "grad_norm": 1.328125, "learning_rate": 1.7301717123333297e-05, "loss": 1.2037, "step": 2880 }, { "epoch": 0.3160431615270855, "grad_norm": 1.25, "learning_rate": 1.7288639046796442e-05, "loss": 1.2007, "step": 2885 }, { "epoch": 0.31659089664238377, "grad_norm": 1.1875, "learning_rate": 1.7275534321872415e-05, "loss": 1.1487, "step": 2890 }, { "epoch": 0.317138631757682, "grad_norm": 1.1875, "learning_rate": 1.726240299647411e-05, "loss": 1.1364, "step": 2895 }, { "epoch": 0.31768636687298024, "grad_norm": 1.2265625, "learning_rate": 1.7249245118611684e-05, "loss": 1.1387, "step": 2900 }, { "epoch": 0.31823410198827845, "grad_norm": 1.234375, "learning_rate": 1.723606073639235e-05, "loss": 1.1701, "step": 2905 }, { "epoch": 0.3187818371035767, "grad_norm": 1.2109375, "learning_rate": 1.7222849898020256e-05, "loss": 1.1947, "step": 2910 }, { "epoch": 0.319329572218875, "grad_norm": 1.234375, "learning_rate": 1.720961265179625e-05, "loss": 1.1591, "step": 2915 }, { "epoch": 0.3198773073341732, "grad_norm": 1.265625, "learning_rate": 1.7196349046117753e-05, "loss": 1.1816, "step": 2920 }, { "epoch": 0.32042504244947145, "grad_norm": 1.2265625, "learning_rate": 1.7183059129478543e-05, "loss": 1.1463, "step": 2925 }, { "epoch": 0.32097277756476966, "grad_norm": 1.265625, "learning_rate": 1.7169742950468607e-05, "loss": 1.0909, "step": 2930 }, { "epoch": 0.3215205126800679, "grad_norm": 1.1953125, "learning_rate": 1.7156400557773944e-05, "loss": 1.1657, "step": 2935 }, { "epoch": 0.32206824779536614, "grad_norm": 1.171875, "learning_rate": 1.71430320001764e-05, "loss": 1.1314, "step": 2940 }, { "epoch": 0.3226159829106644, "grad_norm": 1.2421875, "learning_rate": 1.712963732655348e-05, "loss": 1.1076, "step": 2945 }, { "epoch": 0.32316371802596267, "grad_norm": 1.1875, "learning_rate": 1.711621658587817e-05, "loss": 1.1591, "step": 2950 }, { "epoch": 0.3237114531412609, "grad_norm": 1.2890625, "learning_rate": 1.7102769827218773e-05, "loss": 1.1353, "step": 2955 }, { "epoch": 0.32425918825655914, "grad_norm": 1.234375, "learning_rate": 1.7089297099738703e-05, "loss": 1.186, "step": 2960 }, { "epoch": 0.32480692337185735, "grad_norm": 1.2421875, "learning_rate": 1.7075798452696333e-05, "loss": 1.1698, "step": 2965 }, { "epoch": 0.3253546584871556, "grad_norm": 1.2421875, "learning_rate": 1.706227393544479e-05, "loss": 1.1474, "step": 2970 }, { "epoch": 0.3259023936024539, "grad_norm": 1.2265625, "learning_rate": 1.7048723597431804e-05, "loss": 1.1462, "step": 2975 }, { "epoch": 0.3264501287177521, "grad_norm": 1.25, "learning_rate": 1.703514748819948e-05, "loss": 1.1458, "step": 2980 }, { "epoch": 0.32699786383305035, "grad_norm": 1.3203125, "learning_rate": 1.702154565738418e-05, "loss": 1.1661, "step": 2985 }, { "epoch": 0.32754559894834856, "grad_norm": 1.15625, "learning_rate": 1.7007918154716286e-05, "loss": 1.1273, "step": 2990 }, { "epoch": 0.3280933340636468, "grad_norm": 1.1796875, "learning_rate": 1.6994265030020056e-05, "loss": 1.1597, "step": 2995 }, { "epoch": 0.3286410691789451, "grad_norm": 1.2109375, "learning_rate": 1.69805863332134e-05, "loss": 1.1346, "step": 3000 }, { "epoch": 0.3291888042942433, "grad_norm": 1.28125, "learning_rate": 1.6966882114307756e-05, "loss": 1.2173, "step": 3005 }, { "epoch": 0.32973653940954156, "grad_norm": 1.25, "learning_rate": 1.695315242340785e-05, "loss": 1.1515, "step": 3010 }, { "epoch": 0.33028427452483977, "grad_norm": 1.1953125, "learning_rate": 1.6939397310711557e-05, "loss": 1.1896, "step": 3015 }, { "epoch": 0.33083200964013804, "grad_norm": 1.171875, "learning_rate": 1.6925616826509678e-05, "loss": 1.16, "step": 3020 }, { "epoch": 0.33137974475543625, "grad_norm": 1.2421875, "learning_rate": 1.6911811021185795e-05, "loss": 1.1782, "step": 3025 }, { "epoch": 0.3319274798707345, "grad_norm": 1.2109375, "learning_rate": 1.689797994521605e-05, "loss": 1.1129, "step": 3030 }, { "epoch": 0.3324752149860328, "grad_norm": 1.2265625, "learning_rate": 1.688412364916899e-05, "loss": 1.1878, "step": 3035 }, { "epoch": 0.333022950101331, "grad_norm": 1.2421875, "learning_rate": 1.6870242183705374e-05, "loss": 1.1491, "step": 3040 }, { "epoch": 0.33357068521662925, "grad_norm": 1.203125, "learning_rate": 1.6856335599577973e-05, "loss": 1.2087, "step": 3045 }, { "epoch": 0.33411842033192746, "grad_norm": 1.25, "learning_rate": 1.68424039476314e-05, "loss": 1.1259, "step": 3050 }, { "epoch": 0.3346661554472257, "grad_norm": 1.2734375, "learning_rate": 1.6828447278801923e-05, "loss": 1.1633, "step": 3055 }, { "epoch": 0.335213890562524, "grad_norm": 1.2265625, "learning_rate": 1.6814465644117274e-05, "loss": 1.1689, "step": 3060 }, { "epoch": 0.3357616256778222, "grad_norm": 1.1484375, "learning_rate": 1.6800459094696458e-05, "loss": 1.2037, "step": 3065 }, { "epoch": 0.33630936079312046, "grad_norm": 1.3046875, "learning_rate": 1.6786427681749585e-05, "loss": 1.166, "step": 3070 }, { "epoch": 0.33685709590841867, "grad_norm": 1.1875, "learning_rate": 1.677237145657766e-05, "loss": 1.1427, "step": 3075 }, { "epoch": 0.33740483102371693, "grad_norm": 1.375, "learning_rate": 1.6758290470572404e-05, "loss": 1.1598, "step": 3080 }, { "epoch": 0.3379525661390152, "grad_norm": 1.21875, "learning_rate": 1.674418477521607e-05, "loss": 1.1667, "step": 3085 }, { "epoch": 0.3385003012543134, "grad_norm": 1.3203125, "learning_rate": 1.673005442208126e-05, "loss": 1.1389, "step": 3090 }, { "epoch": 0.33904803636961167, "grad_norm": 1.265625, "learning_rate": 1.6715899462830716e-05, "loss": 1.1899, "step": 3095 }, { "epoch": 0.3395957714849099, "grad_norm": 1.2109375, "learning_rate": 1.6701719949217154e-05, "loss": 1.1621, "step": 3100 }, { "epoch": 0.34014350660020815, "grad_norm": 1.203125, "learning_rate": 1.6687515933083055e-05, "loss": 1.1612, "step": 3105 }, { "epoch": 0.34069124171550635, "grad_norm": 1.21875, "learning_rate": 1.66732874663605e-05, "loss": 1.1507, "step": 3110 }, { "epoch": 0.3412389768308046, "grad_norm": 1.21875, "learning_rate": 1.665903460107094e-05, "loss": 1.1442, "step": 3115 }, { "epoch": 0.3417867119461029, "grad_norm": 1.2109375, "learning_rate": 1.664475738932506e-05, "loss": 1.1422, "step": 3120 }, { "epoch": 0.3423344470614011, "grad_norm": 1.21875, "learning_rate": 1.6630455883322535e-05, "loss": 1.1343, "step": 3125 }, { "epoch": 0.34288218217669936, "grad_norm": 1.2265625, "learning_rate": 1.6616130135351884e-05, "loss": 1.1564, "step": 3130 }, { "epoch": 0.34342991729199757, "grad_norm": 1.234375, "learning_rate": 1.660178019779024e-05, "loss": 1.1465, "step": 3135 }, { "epoch": 0.34397765240729583, "grad_norm": 1.203125, "learning_rate": 1.6587406123103194e-05, "loss": 1.1779, "step": 3140 }, { "epoch": 0.3445253875225941, "grad_norm": 1.2265625, "learning_rate": 1.657300796384457e-05, "loss": 1.1625, "step": 3145 }, { "epoch": 0.3450731226378923, "grad_norm": 1.203125, "learning_rate": 1.6558585772656255e-05, "loss": 1.1463, "step": 3150 }, { "epoch": 0.34562085775319057, "grad_norm": 1.1875, "learning_rate": 1.6544139602268008e-05, "loss": 1.1371, "step": 3155 }, { "epoch": 0.3461685928684888, "grad_norm": 1.2265625, "learning_rate": 1.652966950549725e-05, "loss": 1.179, "step": 3160 }, { "epoch": 0.34671632798378704, "grad_norm": 1.1953125, "learning_rate": 1.6515175535248883e-05, "loss": 1.1811, "step": 3165 }, { "epoch": 0.3472640630990853, "grad_norm": 1.2109375, "learning_rate": 1.6500657744515098e-05, "loss": 1.1595, "step": 3170 }, { "epoch": 0.3478117982143835, "grad_norm": 1.25, "learning_rate": 1.6486116186375175e-05, "loss": 1.1437, "step": 3175 }, { "epoch": 0.3483595333296818, "grad_norm": 1.2421875, "learning_rate": 1.6471550913995286e-05, "loss": 1.1407, "step": 3180 }, { "epoch": 0.34890726844498, "grad_norm": 1.2421875, "learning_rate": 1.6456961980628317e-05, "loss": 1.1064, "step": 3185 }, { "epoch": 0.34945500356027825, "grad_norm": 1.1875, "learning_rate": 1.6442349439613648e-05, "loss": 1.1789, "step": 3190 }, { "epoch": 0.35000273867557646, "grad_norm": 1.265625, "learning_rate": 1.6427713344376987e-05, "loss": 1.138, "step": 3195 }, { "epoch": 0.35055047379087473, "grad_norm": 1.1640625, "learning_rate": 1.6413053748430145e-05, "loss": 1.1574, "step": 3200 }, { "epoch": 0.351098208906173, "grad_norm": 1.2890625, "learning_rate": 1.639837070537087e-05, "loss": 1.1448, "step": 3205 }, { "epoch": 0.3516459440214712, "grad_norm": 1.2421875, "learning_rate": 1.6383664268882632e-05, "loss": 1.151, "step": 3210 }, { "epoch": 0.35219367913676947, "grad_norm": 1.2421875, "learning_rate": 1.636893449273442e-05, "loss": 1.1196, "step": 3215 }, { "epoch": 0.3527414142520677, "grad_norm": 1.1640625, "learning_rate": 1.635418143078057e-05, "loss": 1.1732, "step": 3220 }, { "epoch": 0.35328914936736594, "grad_norm": 1.25, "learning_rate": 1.6339405136960544e-05, "loss": 1.1382, "step": 3225 }, { "epoch": 0.3538368844826642, "grad_norm": 1.2734375, "learning_rate": 1.6324605665298755e-05, "loss": 1.1572, "step": 3230 }, { "epoch": 0.3543846195979624, "grad_norm": 1.234375, "learning_rate": 1.630978306990435e-05, "loss": 1.1503, "step": 3235 }, { "epoch": 0.3549323547132607, "grad_norm": 1.203125, "learning_rate": 1.6294937404971016e-05, "loss": 1.1678, "step": 3240 }, { "epoch": 0.3554800898285589, "grad_norm": 1.203125, "learning_rate": 1.6280068724776795e-05, "loss": 1.1897, "step": 3245 }, { "epoch": 0.35602782494385715, "grad_norm": 1.21875, "learning_rate": 1.6265177083683875e-05, "loss": 1.1742, "step": 3250 }, { "epoch": 0.3565755600591554, "grad_norm": 1.265625, "learning_rate": 1.6250262536138383e-05, "loss": 1.1802, "step": 3255 }, { "epoch": 0.3571232951744536, "grad_norm": 1.1953125, "learning_rate": 1.6235325136670208e-05, "loss": 1.1628, "step": 3260 }, { "epoch": 0.3576710302897519, "grad_norm": 1.2421875, "learning_rate": 1.6220364939892783e-05, "loss": 1.1598, "step": 3265 }, { "epoch": 0.3582187654050501, "grad_norm": 1.2578125, "learning_rate": 1.6205382000502887e-05, "loss": 1.1226, "step": 3270 }, { "epoch": 0.35876650052034836, "grad_norm": 1.2265625, "learning_rate": 1.619037637328046e-05, "loss": 1.1599, "step": 3275 }, { "epoch": 0.35931423563564663, "grad_norm": 1.21875, "learning_rate": 1.617534811308839e-05, "loss": 1.1609, "step": 3280 }, { "epoch": 0.35986197075094484, "grad_norm": 1.2109375, "learning_rate": 1.61602972748723e-05, "loss": 1.1611, "step": 3285 }, { "epoch": 0.3604097058662431, "grad_norm": 1.203125, "learning_rate": 1.6145223913660378e-05, "loss": 1.1171, "step": 3290 }, { "epoch": 0.3609574409815413, "grad_norm": 1.21875, "learning_rate": 1.613012808456316e-05, "loss": 1.1123, "step": 3295 }, { "epoch": 0.3615051760968396, "grad_norm": 1.2890625, "learning_rate": 1.6115009842773322e-05, "loss": 1.1953, "step": 3300 }, { "epoch": 0.3620529112121378, "grad_norm": 1.2265625, "learning_rate": 1.609986924356548e-05, "loss": 1.0976, "step": 3305 }, { "epoch": 0.36260064632743605, "grad_norm": 1.15625, "learning_rate": 1.6084706342295994e-05, "loss": 1.1681, "step": 3310 }, { "epoch": 0.3631483814427343, "grad_norm": 1.234375, "learning_rate": 1.6069521194402776e-05, "loss": 1.1529, "step": 3315 }, { "epoch": 0.3636961165580325, "grad_norm": 1.2109375, "learning_rate": 1.605431385540506e-05, "loss": 1.1988, "step": 3320 }, { "epoch": 0.3642438516733308, "grad_norm": 1.265625, "learning_rate": 1.6039084380903222e-05, "loss": 1.1394, "step": 3325 }, { "epoch": 0.364791586788629, "grad_norm": 1.2109375, "learning_rate": 1.602383282657857e-05, "loss": 1.1255, "step": 3330 }, { "epoch": 0.36533932190392726, "grad_norm": 1.1875, "learning_rate": 1.6008559248193128e-05, "loss": 1.1545, "step": 3335 }, { "epoch": 0.3658870570192255, "grad_norm": 1.25, "learning_rate": 1.599326370158946e-05, "loss": 1.1661, "step": 3340 }, { "epoch": 0.36643479213452373, "grad_norm": 1.2421875, "learning_rate": 1.597794624269043e-05, "loss": 1.1714, "step": 3345 }, { "epoch": 0.366982527249822, "grad_norm": 1.1953125, "learning_rate": 1.5962606927499043e-05, "loss": 1.1919, "step": 3350 }, { "epoch": 0.3675302623651202, "grad_norm": 1.2109375, "learning_rate": 1.5947245812098186e-05, "loss": 1.1691, "step": 3355 }, { "epoch": 0.3680779974804185, "grad_norm": 1.1796875, "learning_rate": 1.5931862952650466e-05, "loss": 1.1454, "step": 3360 }, { "epoch": 0.36862573259571674, "grad_norm": 1.296875, "learning_rate": 1.591645840539799e-05, "loss": 1.1235, "step": 3365 }, { "epoch": 0.36917346771101495, "grad_norm": 1.125, "learning_rate": 1.5901032226662154e-05, "loss": 1.1144, "step": 3370 }, { "epoch": 0.3697212028263132, "grad_norm": 1.21875, "learning_rate": 1.588558447284344e-05, "loss": 1.0976, "step": 3375 }, { "epoch": 0.3702689379416114, "grad_norm": 1.2265625, "learning_rate": 1.5870115200421223e-05, "loss": 1.1143, "step": 3380 }, { "epoch": 0.3708166730569097, "grad_norm": 1.1953125, "learning_rate": 1.5854624465953537e-05, "loss": 1.1874, "step": 3385 }, { "epoch": 0.3713644081722079, "grad_norm": 1.203125, "learning_rate": 1.5839112326076905e-05, "loss": 1.1331, "step": 3390 }, { "epoch": 0.37191214328750616, "grad_norm": 1.25, "learning_rate": 1.582357883750609e-05, "loss": 1.2034, "step": 3395 }, { "epoch": 0.3724598784028044, "grad_norm": 1.203125, "learning_rate": 1.5808024057033927e-05, "loss": 1.1387, "step": 3400 }, { "epoch": 0.37300761351810263, "grad_norm": 1.21875, "learning_rate": 1.579244804153108e-05, "loss": 1.1715, "step": 3405 }, { "epoch": 0.3735553486334009, "grad_norm": 1.21875, "learning_rate": 1.5776850847945867e-05, "loss": 1.1801, "step": 3410 }, { "epoch": 0.3741030837486991, "grad_norm": 1.2578125, "learning_rate": 1.5761232533304034e-05, "loss": 1.092, "step": 3415 }, { "epoch": 0.37465081886399737, "grad_norm": 1.2421875, "learning_rate": 1.5745593154708543e-05, "loss": 1.1536, "step": 3420 }, { "epoch": 0.37519855397929563, "grad_norm": 1.1796875, "learning_rate": 1.5729932769339366e-05, "loss": 1.1283, "step": 3425 }, { "epoch": 0.37574628909459384, "grad_norm": 1.203125, "learning_rate": 1.571425143445329e-05, "loss": 1.1645, "step": 3430 }, { "epoch": 0.3762940242098921, "grad_norm": 1.2578125, "learning_rate": 1.5698549207383687e-05, "loss": 1.1203, "step": 3435 }, { "epoch": 0.3768417593251903, "grad_norm": 1.234375, "learning_rate": 1.5682826145540324e-05, "loss": 1.1555, "step": 3440 }, { "epoch": 0.3773894944404886, "grad_norm": 1.3046875, "learning_rate": 1.566708230640913e-05, "loss": 1.1614, "step": 3445 }, { "epoch": 0.37793722955578685, "grad_norm": 1.203125, "learning_rate": 1.5651317747552014e-05, "loss": 1.1987, "step": 3450 }, { "epoch": 0.37848496467108506, "grad_norm": 1.28125, "learning_rate": 1.5635532526606625e-05, "loss": 1.1825, "step": 3455 }, { "epoch": 0.3790326997863833, "grad_norm": 1.25, "learning_rate": 1.5619726701286167e-05, "loss": 1.1229, "step": 3460 }, { "epoch": 0.37958043490168153, "grad_norm": 1.21875, "learning_rate": 1.5603900329379168e-05, "loss": 1.1509, "step": 3465 }, { "epoch": 0.3801281700169798, "grad_norm": 1.203125, "learning_rate": 1.5588053468749285e-05, "loss": 1.185, "step": 3470 }, { "epoch": 0.380675905132278, "grad_norm": 1.2265625, "learning_rate": 1.5572186177335084e-05, "loss": 1.1458, "step": 3475 }, { "epoch": 0.38122364024757627, "grad_norm": 1.3359375, "learning_rate": 1.555629851314982e-05, "loss": 1.1651, "step": 3480 }, { "epoch": 0.38177137536287453, "grad_norm": 1.234375, "learning_rate": 1.5540390534281245e-05, "loss": 1.1313, "step": 3485 }, { "epoch": 0.38231911047817274, "grad_norm": 1.1875, "learning_rate": 1.5524462298891377e-05, "loss": 1.2005, "step": 3490 }, { "epoch": 0.382866845593471, "grad_norm": 1.1875, "learning_rate": 1.5508513865216306e-05, "loss": 1.1227, "step": 3495 }, { "epoch": 0.3834145807087692, "grad_norm": 1.2421875, "learning_rate": 1.5492545291565953e-05, "loss": 1.1439, "step": 3500 }, { "epoch": 0.3839623158240675, "grad_norm": 1.21875, "learning_rate": 1.5476556636323893e-05, "loss": 1.1816, "step": 3505 }, { "epoch": 0.38451005093936574, "grad_norm": 1.234375, "learning_rate": 1.5460547957947105e-05, "loss": 1.124, "step": 3510 }, { "epoch": 0.38505778605466395, "grad_norm": 1.2890625, "learning_rate": 1.5444519314965782e-05, "loss": 1.1489, "step": 3515 }, { "epoch": 0.3856055211699622, "grad_norm": 1.2734375, "learning_rate": 1.542847076598312e-05, "loss": 1.1407, "step": 3520 }, { "epoch": 0.3861532562852604, "grad_norm": 1.1796875, "learning_rate": 1.5412402369675082e-05, "loss": 1.2599, "step": 3525 }, { "epoch": 0.3867009914005587, "grad_norm": 1.234375, "learning_rate": 1.5396314184790194e-05, "loss": 1.1772, "step": 3530 }, { "epoch": 0.38724872651585696, "grad_norm": 1.1953125, "learning_rate": 1.5380206270149353e-05, "loss": 1.1525, "step": 3535 }, { "epoch": 0.38779646163115516, "grad_norm": 1.1640625, "learning_rate": 1.536407868464556e-05, "loss": 1.1601, "step": 3540 }, { "epoch": 0.38834419674645343, "grad_norm": 1.265625, "learning_rate": 1.534793148724376e-05, "loss": 1.1402, "step": 3545 }, { "epoch": 0.38889193186175164, "grad_norm": 1.2109375, "learning_rate": 1.5331764736980602e-05, "loss": 1.1789, "step": 3550 }, { "epoch": 0.3894396669770499, "grad_norm": 1.2578125, "learning_rate": 1.5315578492964203e-05, "loss": 1.1397, "step": 3555 }, { "epoch": 0.38998740209234817, "grad_norm": 1.234375, "learning_rate": 1.5299372814373967e-05, "loss": 1.183, "step": 3560 }, { "epoch": 0.3905351372076464, "grad_norm": 1.1875, "learning_rate": 1.5283147760460354e-05, "loss": 1.1203, "step": 3565 }, { "epoch": 0.39108287232294464, "grad_norm": 1.1328125, "learning_rate": 1.5266903390544662e-05, "loss": 1.0783, "step": 3570 }, { "epoch": 0.39163060743824285, "grad_norm": 1.234375, "learning_rate": 1.5250639764018807e-05, "loss": 1.1258, "step": 3575 }, { "epoch": 0.3921783425535411, "grad_norm": 1.2109375, "learning_rate": 1.5234356940345115e-05, "loss": 1.158, "step": 3580 }, { "epoch": 0.3927260776688393, "grad_norm": 1.203125, "learning_rate": 1.5218054979056093e-05, "loss": 1.1331, "step": 3585 }, { "epoch": 0.3932738127841376, "grad_norm": 1.21875, "learning_rate": 1.5201733939754228e-05, "loss": 1.2043, "step": 3590 }, { "epoch": 0.39382154789943585, "grad_norm": 1.265625, "learning_rate": 1.5185393882111756e-05, "loss": 1.1403, "step": 3595 }, { "epoch": 0.39436928301473406, "grad_norm": 1.21875, "learning_rate": 1.5169034865870438e-05, "loss": 1.1392, "step": 3600 }, { "epoch": 0.3949170181300323, "grad_norm": 1.2578125, "learning_rate": 1.5152656950841361e-05, "loss": 1.1757, "step": 3605 }, { "epoch": 0.39546475324533054, "grad_norm": 1.203125, "learning_rate": 1.5136260196904704e-05, "loss": 1.1485, "step": 3610 }, { "epoch": 0.3960124883606288, "grad_norm": 1.21875, "learning_rate": 1.5119844664009526e-05, "loss": 1.1333, "step": 3615 }, { "epoch": 0.39656022347592707, "grad_norm": 1.28125, "learning_rate": 1.510341041217355e-05, "loss": 1.1758, "step": 3620 }, { "epoch": 0.3971079585912253, "grad_norm": 1.1875, "learning_rate": 1.508695750148292e-05, "loss": 1.1336, "step": 3625 }, { "epoch": 0.39765569370652354, "grad_norm": 1.2421875, "learning_rate": 1.5070485992092023e-05, "loss": 1.1332, "step": 3630 }, { "epoch": 0.39820342882182175, "grad_norm": 1.25, "learning_rate": 1.505399594422323e-05, "loss": 1.2085, "step": 3635 }, { "epoch": 0.39875116393712, "grad_norm": 1.21875, "learning_rate": 1.5037487418166696e-05, "loss": 1.1608, "step": 3640 }, { "epoch": 0.3992988990524183, "grad_norm": 1.2890625, "learning_rate": 1.5020960474280137e-05, "loss": 1.1443, "step": 3645 }, { "epoch": 0.3998466341677165, "grad_norm": 1.2109375, "learning_rate": 1.5004415172988606e-05, "loss": 1.1886, "step": 3650 }, { "epoch": 0.40039436928301475, "grad_norm": 1.3125, "learning_rate": 1.4987851574784271e-05, "loss": 1.1719, "step": 3655 }, { "epoch": 0.40094210439831296, "grad_norm": 1.1875, "learning_rate": 1.4971269740226203e-05, "loss": 1.1735, "step": 3660 }, { "epoch": 0.4014898395136112, "grad_norm": 1.1953125, "learning_rate": 1.4954669729940137e-05, "loss": 1.141, "step": 3665 }, { "epoch": 0.40203757462890943, "grad_norm": 1.2265625, "learning_rate": 1.4938051604618275e-05, "loss": 1.1463, "step": 3670 }, { "epoch": 0.4025853097442077, "grad_norm": 1.1875, "learning_rate": 1.4921415425019039e-05, "loss": 1.1986, "step": 3675 }, { "epoch": 0.40313304485950596, "grad_norm": 1.203125, "learning_rate": 1.4904761251966864e-05, "loss": 1.15, "step": 3680 }, { "epoch": 0.40368077997480417, "grad_norm": 1.21875, "learning_rate": 1.4888089146351971e-05, "loss": 1.1433, "step": 3685 }, { "epoch": 0.40422851509010244, "grad_norm": 1.265625, "learning_rate": 1.4871399169130157e-05, "loss": 1.1749, "step": 3690 }, { "epoch": 0.40477625020540065, "grad_norm": 1.2265625, "learning_rate": 1.4854691381322538e-05, "loss": 1.1315, "step": 3695 }, { "epoch": 0.4053239853206989, "grad_norm": 1.21875, "learning_rate": 1.4837965844015359e-05, "loss": 1.1184, "step": 3700 }, { "epoch": 0.4058717204359972, "grad_norm": 1.203125, "learning_rate": 1.4821222618359769e-05, "loss": 1.1398, "step": 3705 }, { "epoch": 0.4064194555512954, "grad_norm": 1.21875, "learning_rate": 1.480446176557158e-05, "loss": 1.1341, "step": 3710 }, { "epoch": 0.40696719066659365, "grad_norm": 1.234375, "learning_rate": 1.4787683346931047e-05, "loss": 1.1129, "step": 3715 }, { "epoch": 0.40751492578189186, "grad_norm": 1.2109375, "learning_rate": 1.4770887423782654e-05, "loss": 1.1532, "step": 3720 }, { "epoch": 0.4080626608971901, "grad_norm": 1.171875, "learning_rate": 1.4754074057534885e-05, "loss": 1.0848, "step": 3725 }, { "epoch": 0.4086103960124884, "grad_norm": 1.2421875, "learning_rate": 1.4737243309659998e-05, "loss": 1.1446, "step": 3730 }, { "epoch": 0.4091581311277866, "grad_norm": 1.2265625, "learning_rate": 1.4720395241693796e-05, "loss": 1.1819, "step": 3735 }, { "epoch": 0.40970586624308486, "grad_norm": 1.3046875, "learning_rate": 1.4703529915235417e-05, "loss": 1.1483, "step": 3740 }, { "epoch": 0.41025360135838307, "grad_norm": 1.234375, "learning_rate": 1.468664739194709e-05, "loss": 1.1191, "step": 3745 }, { "epoch": 0.41080133647368133, "grad_norm": 1.25, "learning_rate": 1.4669747733553917e-05, "loss": 1.1227, "step": 3750 }, { "epoch": 0.41134907158897954, "grad_norm": 1.203125, "learning_rate": 1.4652831001843656e-05, "loss": 1.1884, "step": 3755 }, { "epoch": 0.4118968067042778, "grad_norm": 1.2421875, "learning_rate": 1.4635897258666484e-05, "loss": 1.1812, "step": 3760 }, { "epoch": 0.41244454181957607, "grad_norm": 1.21875, "learning_rate": 1.4618946565934775e-05, "loss": 1.1986, "step": 3765 }, { "epoch": 0.4129922769348743, "grad_norm": 1.21875, "learning_rate": 1.4601978985622874e-05, "loss": 1.2179, "step": 3770 }, { "epoch": 0.41354001205017255, "grad_norm": 1.1640625, "learning_rate": 1.4584994579766865e-05, "loss": 1.1142, "step": 3775 }, { "epoch": 0.41408774716547075, "grad_norm": 1.21875, "learning_rate": 1.4567993410464354e-05, "loss": 1.1743, "step": 3780 }, { "epoch": 0.414635482280769, "grad_norm": 1.2265625, "learning_rate": 1.4550975539874233e-05, "loss": 1.2144, "step": 3785 }, { "epoch": 0.4151832173960673, "grad_norm": 1.3125, "learning_rate": 1.4533941030216466e-05, "loss": 1.1351, "step": 3790 }, { "epoch": 0.4157309525113655, "grad_norm": 1.21875, "learning_rate": 1.4516889943771835e-05, "loss": 1.1299, "step": 3795 }, { "epoch": 0.41627868762666376, "grad_norm": 1.203125, "learning_rate": 1.4499822342881744e-05, "loss": 1.1562, "step": 3800 }, { "epoch": 0.41682642274196197, "grad_norm": 1.1953125, "learning_rate": 1.4482738289947968e-05, "loss": 1.1797, "step": 3805 }, { "epoch": 0.41737415785726023, "grad_norm": 1.203125, "learning_rate": 1.4465637847432444e-05, "loss": 1.1477, "step": 3810 }, { "epoch": 0.4179218929725585, "grad_norm": 1.1875, "learning_rate": 1.4448521077857013e-05, "loss": 1.1662, "step": 3815 }, { "epoch": 0.4184696280878567, "grad_norm": 1.203125, "learning_rate": 1.4431388043803227e-05, "loss": 1.1298, "step": 3820 }, { "epoch": 0.41901736320315497, "grad_norm": 1.234375, "learning_rate": 1.44142388079121e-05, "loss": 1.1517, "step": 3825 }, { "epoch": 0.4195650983184532, "grad_norm": 1.25, "learning_rate": 1.439707343288388e-05, "loss": 1.1491, "step": 3830 }, { "epoch": 0.42011283343375144, "grad_norm": 1.1953125, "learning_rate": 1.4379891981477816e-05, "loss": 1.1864, "step": 3835 }, { "epoch": 0.4206605685490497, "grad_norm": 1.2109375, "learning_rate": 1.4362694516511946e-05, "loss": 1.1392, "step": 3840 }, { "epoch": 0.4212083036643479, "grad_norm": 1.390625, "learning_rate": 1.434548110086285e-05, "loss": 1.1222, "step": 3845 }, { "epoch": 0.4217560387796462, "grad_norm": 1.328125, "learning_rate": 1.4328251797465434e-05, "loss": 1.1979, "step": 3850 }, { "epoch": 0.4223037738949444, "grad_norm": 1.2421875, "learning_rate": 1.4311006669312672e-05, "loss": 1.2049, "step": 3855 }, { "epoch": 0.42285150901024265, "grad_norm": 1.234375, "learning_rate": 1.4293745779455418e-05, "loss": 1.1786, "step": 3860 }, { "epoch": 0.42339924412554086, "grad_norm": 1.2265625, "learning_rate": 1.4276469191002147e-05, "loss": 1.1306, "step": 3865 }, { "epoch": 0.42394697924083913, "grad_norm": 1.25, "learning_rate": 1.4259176967118719e-05, "loss": 1.1525, "step": 3870 }, { "epoch": 0.4244947143561374, "grad_norm": 1.265625, "learning_rate": 1.4241869171028178e-05, "loss": 1.153, "step": 3875 }, { "epoch": 0.4250424494714356, "grad_norm": 1.1953125, "learning_rate": 1.4224545866010484e-05, "loss": 1.1271, "step": 3880 }, { "epoch": 0.42559018458673387, "grad_norm": 1.2109375, "learning_rate": 1.4207207115402316e-05, "loss": 1.1419, "step": 3885 }, { "epoch": 0.4261379197020321, "grad_norm": 1.171875, "learning_rate": 1.4189852982596813e-05, "loss": 1.1326, "step": 3890 }, { "epoch": 0.42668565481733034, "grad_norm": 1.203125, "learning_rate": 1.4172483531043358e-05, "loss": 1.1815, "step": 3895 }, { "epoch": 0.4272333899326286, "grad_norm": 1.1796875, "learning_rate": 1.4155098824247341e-05, "loss": 1.1368, "step": 3900 }, { "epoch": 0.4277811250479268, "grad_norm": 1.234375, "learning_rate": 1.4137698925769931e-05, "loss": 1.218, "step": 3905 }, { "epoch": 0.4283288601632251, "grad_norm": 1.25, "learning_rate": 1.4120283899227839e-05, "loss": 1.1762, "step": 3910 }, { "epoch": 0.4288765952785233, "grad_norm": 1.2109375, "learning_rate": 1.4102853808293077e-05, "loss": 1.15, "step": 3915 }, { "epoch": 0.42942433039382155, "grad_norm": 1.203125, "learning_rate": 1.408540871669275e-05, "loss": 1.1668, "step": 3920 }, { "epoch": 0.4299720655091198, "grad_norm": 1.1796875, "learning_rate": 1.4067948688208799e-05, "loss": 1.1276, "step": 3925 }, { "epoch": 0.430519800624418, "grad_norm": 1.203125, "learning_rate": 1.4050473786677774e-05, "loss": 1.189, "step": 3930 }, { "epoch": 0.4310675357397163, "grad_norm": 1.234375, "learning_rate": 1.403298407599061e-05, "loss": 1.1595, "step": 3935 }, { "epoch": 0.4316152708550145, "grad_norm": 1.2578125, "learning_rate": 1.4015479620092383e-05, "loss": 1.1478, "step": 3940 }, { "epoch": 0.43216300597031276, "grad_norm": 1.1640625, "learning_rate": 1.3997960482982082e-05, "loss": 1.1392, "step": 3945 }, { "epoch": 0.432710741085611, "grad_norm": 1.265625, "learning_rate": 1.3980426728712369e-05, "loss": 1.1706, "step": 3950 }, { "epoch": 0.43325847620090924, "grad_norm": 1.2421875, "learning_rate": 1.396287842138935e-05, "loss": 1.1913, "step": 3955 }, { "epoch": 0.4338062113162075, "grad_norm": 1.328125, "learning_rate": 1.394531562517234e-05, "loss": 1.1721, "step": 3960 }, { "epoch": 0.4343539464315057, "grad_norm": 1.3984375, "learning_rate": 1.3927738404273634e-05, "loss": 1.2269, "step": 3965 }, { "epoch": 0.434901681546804, "grad_norm": 1.2109375, "learning_rate": 1.391014682295825e-05, "loss": 1.1459, "step": 3970 }, { "epoch": 0.4354494166621022, "grad_norm": 1.25, "learning_rate": 1.3892540945543722e-05, "loss": 1.117, "step": 3975 }, { "epoch": 0.43599715177740045, "grad_norm": 1.2265625, "learning_rate": 1.3874920836399854e-05, "loss": 1.1366, "step": 3980 }, { "epoch": 0.4365448868926987, "grad_norm": 1.2578125, "learning_rate": 1.3857286559948476e-05, "loss": 1.178, "step": 3985 }, { "epoch": 0.4370926220079969, "grad_norm": 1.1796875, "learning_rate": 1.383963818066322e-05, "loss": 1.0893, "step": 3990 }, { "epoch": 0.4376403571232952, "grad_norm": 1.2734375, "learning_rate": 1.3821975763069279e-05, "loss": 1.1656, "step": 3995 }, { "epoch": 0.4381880922385934, "grad_norm": 1.2109375, "learning_rate": 1.3804299371743174e-05, "loss": 1.1316, "step": 4000 }, { "epoch": 0.43873582735389166, "grad_norm": 1.203125, "learning_rate": 1.3786609071312511e-05, "loss": 1.1865, "step": 4005 }, { "epoch": 0.4392835624691899, "grad_norm": 1.203125, "learning_rate": 1.376890492645576e-05, "loss": 1.1552, "step": 4010 }, { "epoch": 0.43983129758448813, "grad_norm": 1.21875, "learning_rate": 1.3751187001901995e-05, "loss": 1.1758, "step": 4015 }, { "epoch": 0.4403790326997864, "grad_norm": 1.203125, "learning_rate": 1.3733455362430684e-05, "loss": 1.1494, "step": 4020 }, { "epoch": 0.4409267678150846, "grad_norm": 1.21875, "learning_rate": 1.3715710072871426e-05, "loss": 1.1465, "step": 4025 }, { "epoch": 0.4414745029303829, "grad_norm": 1.2265625, "learning_rate": 1.369795119810374e-05, "loss": 1.1678, "step": 4030 }, { "epoch": 0.4420222380456811, "grad_norm": 1.2421875, "learning_rate": 1.3680178803056802e-05, "loss": 1.1544, "step": 4035 }, { "epoch": 0.44256997316097935, "grad_norm": 1.2421875, "learning_rate": 1.366239295270923e-05, "loss": 1.1633, "step": 4040 }, { "epoch": 0.4431177082762776, "grad_norm": 1.1875, "learning_rate": 1.3644593712088829e-05, "loss": 1.1644, "step": 4045 }, { "epoch": 0.4436654433915758, "grad_norm": 1.2265625, "learning_rate": 1.3626781146272369e-05, "loss": 1.1366, "step": 4050 }, { "epoch": 0.4442131785068741, "grad_norm": 1.2265625, "learning_rate": 1.3608955320385333e-05, "loss": 1.1761, "step": 4055 }, { "epoch": 0.4447609136221723, "grad_norm": 1.203125, "learning_rate": 1.3591116299601684e-05, "loss": 1.1194, "step": 4060 }, { "epoch": 0.44530864873747056, "grad_norm": 1.1953125, "learning_rate": 1.3573264149143636e-05, "loss": 1.1779, "step": 4065 }, { "epoch": 0.4458563838527688, "grad_norm": 1.140625, "learning_rate": 1.3555398934281397e-05, "loss": 1.1487, "step": 4070 }, { "epoch": 0.44640411896806703, "grad_norm": 1.1953125, "learning_rate": 1.3537520720332943e-05, "loss": 1.1687, "step": 4075 }, { "epoch": 0.4469518540833653, "grad_norm": 1.2421875, "learning_rate": 1.351962957266378e-05, "loss": 1.1544, "step": 4080 }, { "epoch": 0.4474995891986635, "grad_norm": 1.234375, "learning_rate": 1.3501725556686702e-05, "loss": 1.1358, "step": 4085 }, { "epoch": 0.44804732431396177, "grad_norm": 1.265625, "learning_rate": 1.3483808737861547e-05, "loss": 1.1723, "step": 4090 }, { "epoch": 0.44859505942926003, "grad_norm": 1.1875, "learning_rate": 1.3465879181694966e-05, "loss": 1.1463, "step": 4095 }, { "epoch": 0.44914279454455824, "grad_norm": 1.390625, "learning_rate": 1.344793695374018e-05, "loss": 1.15, "step": 4100 }, { "epoch": 0.4496905296598565, "grad_norm": 1.1953125, "learning_rate": 1.3429982119596737e-05, "loss": 1.2038, "step": 4105 }, { "epoch": 0.4502382647751547, "grad_norm": 1.234375, "learning_rate": 1.341201474491027e-05, "loss": 1.1767, "step": 4110 }, { "epoch": 0.450785999890453, "grad_norm": 1.2421875, "learning_rate": 1.3394034895372281e-05, "loss": 1.173, "step": 4115 }, { "epoch": 0.4513337350057512, "grad_norm": 1.234375, "learning_rate": 1.337604263671986e-05, "loss": 1.183, "step": 4120 }, { "epoch": 0.45188147012104946, "grad_norm": 1.21875, "learning_rate": 1.3358038034735485e-05, "loss": 1.1281, "step": 4125 }, { "epoch": 0.4524292052363477, "grad_norm": 1.1640625, "learning_rate": 1.3340021155246745e-05, "loss": 1.187, "step": 4130 }, { "epoch": 0.45297694035164593, "grad_norm": 1.25, "learning_rate": 1.3321992064126132e-05, "loss": 1.1543, "step": 4135 }, { "epoch": 0.4535246754669442, "grad_norm": 1.2421875, "learning_rate": 1.3303950827290781e-05, "loss": 1.191, "step": 4140 }, { "epoch": 0.4540724105822424, "grad_norm": 1.2109375, "learning_rate": 1.328589751070223e-05, "loss": 1.2142, "step": 4145 }, { "epoch": 0.45462014569754067, "grad_norm": 1.171875, "learning_rate": 1.3267832180366189e-05, "loss": 1.1444, "step": 4150 }, { "epoch": 0.45516788081283893, "grad_norm": 1.1953125, "learning_rate": 1.3249754902332285e-05, "loss": 1.1635, "step": 4155 }, { "epoch": 0.45571561592813714, "grad_norm": 1.2421875, "learning_rate": 1.3231665742693838e-05, "loss": 1.156, "step": 4160 }, { "epoch": 0.4562633510434354, "grad_norm": 1.2890625, "learning_rate": 1.3213564767587594e-05, "loss": 1.2229, "step": 4165 }, { "epoch": 0.4568110861587336, "grad_norm": 1.2265625, "learning_rate": 1.3195452043193511e-05, "loss": 1.1456, "step": 4170 }, { "epoch": 0.4573588212740319, "grad_norm": 1.25, "learning_rate": 1.3177327635734497e-05, "loss": 1.1082, "step": 4175 }, { "epoch": 0.45790655638933014, "grad_norm": 1.2421875, "learning_rate": 1.3159191611476183e-05, "loss": 1.1428, "step": 4180 }, { "epoch": 0.45845429150462835, "grad_norm": 1.234375, "learning_rate": 1.3141044036726663e-05, "loss": 1.1755, "step": 4185 }, { "epoch": 0.4590020266199266, "grad_norm": 1.2421875, "learning_rate": 1.3122884977836265e-05, "loss": 1.1714, "step": 4190 }, { "epoch": 0.4595497617352248, "grad_norm": 1.3671875, "learning_rate": 1.3104714501197308e-05, "loss": 1.1976, "step": 4195 }, { "epoch": 0.4600974968505231, "grad_norm": 1.1796875, "learning_rate": 1.3086532673243855e-05, "loss": 1.1892, "step": 4200 }, { "epoch": 0.46064523196582136, "grad_norm": 1.1875, "learning_rate": 1.306833956045147e-05, "loss": 1.1116, "step": 4205 }, { "epoch": 0.46119296708111956, "grad_norm": 1.296875, "learning_rate": 1.3050135229336974e-05, "loss": 1.1555, "step": 4210 }, { "epoch": 0.46174070219641783, "grad_norm": 1.3046875, "learning_rate": 1.3031919746458202e-05, "loss": 1.1401, "step": 4215 }, { "epoch": 0.46228843731171604, "grad_norm": 1.21875, "learning_rate": 1.3013693178413773e-05, "loss": 1.144, "step": 4220 }, { "epoch": 0.4628361724270143, "grad_norm": 1.2109375, "learning_rate": 1.2995455591842825e-05, "loss": 1.1727, "step": 4225 }, { "epoch": 0.4633839075423125, "grad_norm": 1.28125, "learning_rate": 1.2977207053424781e-05, "loss": 1.2165, "step": 4230 }, { "epoch": 0.4639316426576108, "grad_norm": 1.2109375, "learning_rate": 1.2958947629879113e-05, "loss": 1.1501, "step": 4235 }, { "epoch": 0.46447937777290904, "grad_norm": 1.21875, "learning_rate": 1.2940677387965083e-05, "loss": 1.244, "step": 4240 }, { "epoch": 0.46502711288820725, "grad_norm": 1.1796875, "learning_rate": 1.2922396394481513e-05, "loss": 1.1096, "step": 4245 }, { "epoch": 0.4655748480035055, "grad_norm": 1.265625, "learning_rate": 1.2904104716266529e-05, "loss": 1.145, "step": 4250 }, { "epoch": 0.4661225831188037, "grad_norm": 1.1796875, "learning_rate": 1.2885802420197324e-05, "loss": 1.1838, "step": 4255 }, { "epoch": 0.466670318234102, "grad_norm": 1.296875, "learning_rate": 1.2867489573189916e-05, "loss": 1.1849, "step": 4260 }, { "epoch": 0.46721805334940025, "grad_norm": 1.2265625, "learning_rate": 1.2849166242198887e-05, "loss": 1.1366, "step": 4265 }, { "epoch": 0.46776578846469846, "grad_norm": 1.2578125, "learning_rate": 1.2830832494217167e-05, "loss": 1.1374, "step": 4270 }, { "epoch": 0.4683135235799967, "grad_norm": 1.2578125, "learning_rate": 1.2812488396275757e-05, "loss": 1.1513, "step": 4275 }, { "epoch": 0.46886125869529494, "grad_norm": 1.2421875, "learning_rate": 1.2794134015443508e-05, "loss": 1.1682, "step": 4280 }, { "epoch": 0.4694089938105932, "grad_norm": 1.3125, "learning_rate": 1.2775769418826858e-05, "loss": 1.1919, "step": 4285 }, { "epoch": 0.46995672892589146, "grad_norm": 1.1328125, "learning_rate": 1.2757394673569608e-05, "loss": 1.1142, "step": 4290 }, { "epoch": 0.4705044640411897, "grad_norm": 1.203125, "learning_rate": 1.2739009846852655e-05, "loss": 1.1921, "step": 4295 }, { "epoch": 0.47105219915648794, "grad_norm": 1.1953125, "learning_rate": 1.2720615005893758e-05, "loss": 1.154, "step": 4300 }, { "epoch": 0.47159993427178615, "grad_norm": 1.2421875, "learning_rate": 1.2702210217947289e-05, "loss": 1.1875, "step": 4305 }, { "epoch": 0.4721476693870844, "grad_norm": 1.203125, "learning_rate": 1.2683795550303985e-05, "loss": 1.1543, "step": 4310 }, { "epoch": 0.4726954045023826, "grad_norm": 1.1875, "learning_rate": 1.266537107029071e-05, "loss": 1.1906, "step": 4315 }, { "epoch": 0.4732431396176809, "grad_norm": 1.203125, "learning_rate": 1.2646936845270208e-05, "loss": 1.0545, "step": 4320 }, { "epoch": 0.47379087473297915, "grad_norm": 1.21875, "learning_rate": 1.2628492942640835e-05, "loss": 1.1769, "step": 4325 }, { "epoch": 0.47433860984827736, "grad_norm": 1.2265625, "learning_rate": 1.2610039429836345e-05, "loss": 1.1138, "step": 4330 }, { "epoch": 0.4748863449635756, "grad_norm": 1.203125, "learning_rate": 1.2591576374325622e-05, "loss": 1.1499, "step": 4335 }, { "epoch": 0.47543408007887383, "grad_norm": 1.2421875, "learning_rate": 1.2573103843612447e-05, "loss": 1.1579, "step": 4340 }, { "epoch": 0.4759818151941721, "grad_norm": 1.203125, "learning_rate": 1.2554621905235226e-05, "loss": 1.1361, "step": 4345 }, { "epoch": 0.47652955030947036, "grad_norm": 1.2265625, "learning_rate": 1.2536130626766783e-05, "loss": 1.2137, "step": 4350 }, { "epoch": 0.47707728542476857, "grad_norm": 1.234375, "learning_rate": 1.2517630075814078e-05, "loss": 1.1736, "step": 4355 }, { "epoch": 0.47762502054006684, "grad_norm": 1.234375, "learning_rate": 1.2499120320017977e-05, "loss": 1.1649, "step": 4360 }, { "epoch": 0.47817275565536504, "grad_norm": 1.296875, "learning_rate": 1.2480601427052994e-05, "loss": 1.1655, "step": 4365 }, { "epoch": 0.4787204907706633, "grad_norm": 1.265625, "learning_rate": 1.2462073464627058e-05, "loss": 1.1312, "step": 4370 }, { "epoch": 0.4792682258859616, "grad_norm": 1.1953125, "learning_rate": 1.2443536500481252e-05, "loss": 1.1196, "step": 4375 }, { "epoch": 0.4798159610012598, "grad_norm": 1.21875, "learning_rate": 1.2424990602389578e-05, "loss": 1.1134, "step": 4380 }, { "epoch": 0.48036369611655805, "grad_norm": 1.25, "learning_rate": 1.2406435838158686e-05, "loss": 1.2028, "step": 4385 }, { "epoch": 0.48091143123185626, "grad_norm": 1.171875, "learning_rate": 1.2387872275627659e-05, "loss": 1.0878, "step": 4390 }, { "epoch": 0.4814591663471545, "grad_norm": 1.2578125, "learning_rate": 1.2369299982667744e-05, "loss": 1.1652, "step": 4395 }, { "epoch": 0.48200690146245273, "grad_norm": 1.21875, "learning_rate": 1.23507190271821e-05, "loss": 1.1133, "step": 4400 }, { "epoch": 0.482554636577751, "grad_norm": 1.375, "learning_rate": 1.2332129477105562e-05, "loss": 1.1953, "step": 4405 }, { "epoch": 0.48310237169304926, "grad_norm": 1.21875, "learning_rate": 1.2313531400404397e-05, "loss": 1.1242, "step": 4410 }, { "epoch": 0.48365010680834747, "grad_norm": 1.21875, "learning_rate": 1.2294924865076029e-05, "loss": 1.1628, "step": 4415 }, { "epoch": 0.48419784192364573, "grad_norm": 1.1796875, "learning_rate": 1.227630993914882e-05, "loss": 1.1063, "step": 4420 }, { "epoch": 0.48474557703894394, "grad_norm": 1.2265625, "learning_rate": 1.2257686690681812e-05, "loss": 1.1294, "step": 4425 }, { "epoch": 0.4852933121542422, "grad_norm": 1.25, "learning_rate": 1.2239055187764463e-05, "loss": 1.1627, "step": 4430 }, { "epoch": 0.48584104726954047, "grad_norm": 1.2578125, "learning_rate": 1.222041549851642e-05, "loss": 1.1864, "step": 4435 }, { "epoch": 0.4863887823848387, "grad_norm": 1.2578125, "learning_rate": 1.2201767691087265e-05, "loss": 1.113, "step": 4440 }, { "epoch": 0.48693651750013695, "grad_norm": 1.2265625, "learning_rate": 1.2183111833656244e-05, "loss": 1.1931, "step": 4445 }, { "epoch": 0.48748425261543515, "grad_norm": 1.21875, "learning_rate": 1.2164447994432054e-05, "loss": 1.1258, "step": 4450 }, { "epoch": 0.4880319877307334, "grad_norm": 1.2265625, "learning_rate": 1.214577624165256e-05, "loss": 1.1602, "step": 4455 }, { "epoch": 0.4885797228460317, "grad_norm": 1.21875, "learning_rate": 1.2127096643584573e-05, "loss": 1.1441, "step": 4460 }, { "epoch": 0.4891274579613299, "grad_norm": 1.2421875, "learning_rate": 1.2108409268523578e-05, "loss": 1.1582, "step": 4465 }, { "epoch": 0.48967519307662816, "grad_norm": 1.25, "learning_rate": 1.2089714184793492e-05, "loss": 1.1229, "step": 4470 }, { "epoch": 0.49022292819192637, "grad_norm": 1.2109375, "learning_rate": 1.207101146074643e-05, "loss": 1.1002, "step": 4475 }, { "epoch": 0.49077066330722463, "grad_norm": 1.3203125, "learning_rate": 1.205230116476243e-05, "loss": 1.1865, "step": 4480 }, { "epoch": 0.4913183984225229, "grad_norm": 1.3046875, "learning_rate": 1.203358336524921e-05, "loss": 1.2106, "step": 4485 }, { "epoch": 0.4918661335378211, "grad_norm": 1.2421875, "learning_rate": 1.2014858130641936e-05, "loss": 1.1417, "step": 4490 }, { "epoch": 0.49241386865311937, "grad_norm": 1.1953125, "learning_rate": 1.1996125529402946e-05, "loss": 1.1356, "step": 4495 }, { "epoch": 0.4929616037684176, "grad_norm": 1.25, "learning_rate": 1.1977385630021523e-05, "loss": 1.1409, "step": 4500 }, { "epoch": 0.49350933888371584, "grad_norm": 1.2421875, "learning_rate": 1.1958638501013613e-05, "loss": 1.129, "step": 4505 }, { "epoch": 0.49405707399901405, "grad_norm": 1.265625, "learning_rate": 1.1939884210921618e-05, "loss": 1.1589, "step": 4510 }, { "epoch": 0.4946048091143123, "grad_norm": 1.2578125, "learning_rate": 1.1921122828314109e-05, "loss": 1.0934, "step": 4515 }, { "epoch": 0.4951525442296106, "grad_norm": 1.1875, "learning_rate": 1.1902354421785591e-05, "loss": 1.1568, "step": 4520 }, { "epoch": 0.4957002793449088, "grad_norm": 1.328125, "learning_rate": 1.188357905995625e-05, "loss": 1.151, "step": 4525 }, { "epoch": 0.49624801446020705, "grad_norm": 1.265625, "learning_rate": 1.18647968114717e-05, "loss": 1.1709, "step": 4530 }, { "epoch": 0.49679574957550526, "grad_norm": 1.296875, "learning_rate": 1.1846007745002734e-05, "loss": 1.1514, "step": 4535 }, { "epoch": 0.49734348469080353, "grad_norm": 1.2265625, "learning_rate": 1.1827211929245075e-05, "loss": 1.1487, "step": 4540 }, { "epoch": 0.4978912198061018, "grad_norm": 1.1953125, "learning_rate": 1.1808409432919124e-05, "loss": 1.1874, "step": 4545 }, { "epoch": 0.4984389549214, "grad_norm": 1.234375, "learning_rate": 1.1789600324769696e-05, "loss": 1.1591, "step": 4550 }, { "epoch": 0.49898669003669827, "grad_norm": 1.1875, "learning_rate": 1.1770784673565796e-05, "loss": 1.1598, "step": 4555 }, { "epoch": 0.4995344251519965, "grad_norm": 1.1953125, "learning_rate": 1.1751962548100339e-05, "loss": 1.1619, "step": 4560 }, { "epoch": 0.5000821602672947, "grad_norm": 1.21875, "learning_rate": 1.1733134017189918e-05, "loss": 1.1255, "step": 4565 }, { "epoch": 0.500629895382593, "grad_norm": 1.2421875, "learning_rate": 1.1714299149674538e-05, "loss": 1.136, "step": 4570 }, { "epoch": 0.5011776304978912, "grad_norm": 1.2265625, "learning_rate": 1.1695458014417382e-05, "loss": 1.1483, "step": 4575 }, { "epoch": 0.5017253656131895, "grad_norm": 1.28125, "learning_rate": 1.1676610680304539e-05, "loss": 1.1182, "step": 4580 }, { "epoch": 0.5022731007284877, "grad_norm": 1.25, "learning_rate": 1.1657757216244767e-05, "loss": 1.118, "step": 4585 }, { "epoch": 0.5028208358437859, "grad_norm": 1.203125, "learning_rate": 1.163889769116923e-05, "loss": 1.1345, "step": 4590 }, { "epoch": 0.5033685709590842, "grad_norm": 1.1640625, "learning_rate": 1.162003217403127e-05, "loss": 1.1254, "step": 4595 }, { "epoch": 0.5039163060743824, "grad_norm": 1.234375, "learning_rate": 1.1601160733806113e-05, "loss": 1.125, "step": 4600 }, { "epoch": 0.5044640411896807, "grad_norm": 1.3359375, "learning_rate": 1.1582283439490652e-05, "loss": 1.2014, "step": 4605 }, { "epoch": 0.505011776304979, "grad_norm": 1.1953125, "learning_rate": 1.1563400360103188e-05, "loss": 1.1424, "step": 4610 }, { "epoch": 0.5055595114202771, "grad_norm": 1.2109375, "learning_rate": 1.1544511564683165e-05, "loss": 1.1401, "step": 4615 }, { "epoch": 0.5061072465355754, "grad_norm": 1.28125, "learning_rate": 1.152561712229093e-05, "loss": 1.1839, "step": 4620 }, { "epoch": 0.5066549816508736, "grad_norm": 1.2578125, "learning_rate": 1.1506717102007474e-05, "loss": 1.1592, "step": 4625 }, { "epoch": 0.5072027167661719, "grad_norm": 1.203125, "learning_rate": 1.1487811572934184e-05, "loss": 1.1217, "step": 4630 }, { "epoch": 0.5077504518814702, "grad_norm": 1.234375, "learning_rate": 1.1468900604192585e-05, "loss": 1.1532, "step": 4635 }, { "epoch": 0.5082981869967683, "grad_norm": 1.234375, "learning_rate": 1.1449984264924094e-05, "loss": 1.1942, "step": 4640 }, { "epoch": 0.5088459221120666, "grad_norm": 1.25, "learning_rate": 1.143106262428976e-05, "loss": 1.0977, "step": 4645 }, { "epoch": 0.5093936572273648, "grad_norm": 1.21875, "learning_rate": 1.1412135751470017e-05, "loss": 1.1754, "step": 4650 }, { "epoch": 0.5099413923426631, "grad_norm": 1.1796875, "learning_rate": 1.1393203715664427e-05, "loss": 1.131, "step": 4655 }, { "epoch": 0.5104891274579614, "grad_norm": 1.1953125, "learning_rate": 1.137426658609143e-05, "loss": 1.1093, "step": 4660 }, { "epoch": 0.5110368625732595, "grad_norm": 1.2109375, "learning_rate": 1.1355324431988086e-05, "loss": 1.1413, "step": 4665 }, { "epoch": 0.5115845976885578, "grad_norm": 1.1875, "learning_rate": 1.1336377322609832e-05, "loss": 1.1897, "step": 4670 }, { "epoch": 0.5121323328038561, "grad_norm": 1.2890625, "learning_rate": 1.131742532723022e-05, "loss": 1.1527, "step": 4675 }, { "epoch": 0.5126800679191543, "grad_norm": 1.25, "learning_rate": 1.1298468515140662e-05, "loss": 1.1567, "step": 4680 }, { "epoch": 0.5132278030344526, "grad_norm": 1.234375, "learning_rate": 1.1279506955650182e-05, "loss": 1.1145, "step": 4685 }, { "epoch": 0.5137755381497507, "grad_norm": 1.2109375, "learning_rate": 1.1260540718085162e-05, "loss": 1.1819, "step": 4690 }, { "epoch": 0.514323273265049, "grad_norm": 1.25, "learning_rate": 1.1241569871789096e-05, "loss": 1.1306, "step": 4695 }, { "epoch": 0.5148710083803473, "grad_norm": 1.1953125, "learning_rate": 1.1222594486122312e-05, "loss": 1.1663, "step": 4700 }, { "epoch": 0.5154187434956455, "grad_norm": 1.203125, "learning_rate": 1.1203614630461746e-05, "loss": 1.1199, "step": 4705 }, { "epoch": 0.5159664786109438, "grad_norm": 1.2109375, "learning_rate": 1.118463037420067e-05, "loss": 1.1542, "step": 4710 }, { "epoch": 0.516514213726242, "grad_norm": 1.28125, "learning_rate": 1.116564178674846e-05, "loss": 1.1904, "step": 4715 }, { "epoch": 0.5170619488415402, "grad_norm": 1.265625, "learning_rate": 1.1146648937530309e-05, "loss": 1.1787, "step": 4720 }, { "epoch": 0.5176096839568385, "grad_norm": 1.265625, "learning_rate": 1.1127651895986999e-05, "loss": 1.163, "step": 4725 }, { "epoch": 0.5181574190721367, "grad_norm": 1.171875, "learning_rate": 1.1108650731574644e-05, "loss": 1.0855, "step": 4730 }, { "epoch": 0.5187051541874349, "grad_norm": 1.171875, "learning_rate": 1.1089645513764429e-05, "loss": 1.1962, "step": 4735 }, { "epoch": 0.5192528893027332, "grad_norm": 1.2578125, "learning_rate": 1.1070636312042352e-05, "loss": 1.2023, "step": 4740 }, { "epoch": 0.5198006244180314, "grad_norm": 1.265625, "learning_rate": 1.1051623195908987e-05, "loss": 1.1834, "step": 4745 }, { "epoch": 0.5203483595333297, "grad_norm": 1.21875, "learning_rate": 1.1032606234879217e-05, "loss": 1.172, "step": 4750 }, { "epoch": 0.520896094648628, "grad_norm": 1.2109375, "learning_rate": 1.1013585498481983e-05, "loss": 1.1487, "step": 4755 }, { "epoch": 0.5214438297639261, "grad_norm": 1.2578125, "learning_rate": 1.0994561056260016e-05, "loss": 1.2174, "step": 4760 }, { "epoch": 0.5219915648792244, "grad_norm": 1.28125, "learning_rate": 1.0975532977769619e-05, "loss": 1.1955, "step": 4765 }, { "epoch": 0.5225392999945226, "grad_norm": 1.21875, "learning_rate": 1.0956501332580375e-05, "loss": 1.0925, "step": 4770 }, { "epoch": 0.5230870351098209, "grad_norm": 1.1875, "learning_rate": 1.093746619027491e-05, "loss": 1.181, "step": 4775 }, { "epoch": 0.5236347702251192, "grad_norm": 1.2265625, "learning_rate": 1.0918427620448635e-05, "loss": 1.1976, "step": 4780 }, { "epoch": 0.5241825053404173, "grad_norm": 1.2109375, "learning_rate": 1.0899385692709499e-05, "loss": 1.1783, "step": 4785 }, { "epoch": 0.5247302404557156, "grad_norm": 1.2109375, "learning_rate": 1.0880340476677718e-05, "loss": 1.0751, "step": 4790 }, { "epoch": 0.5252779755710139, "grad_norm": 1.21875, "learning_rate": 1.0861292041985538e-05, "loss": 1.1422, "step": 4795 }, { "epoch": 0.5258257106863121, "grad_norm": 1.3203125, "learning_rate": 1.084224045827697e-05, "loss": 1.1123, "step": 4800 }, { "epoch": 0.5263734458016104, "grad_norm": 1.234375, "learning_rate": 1.082318579520754e-05, "loss": 1.1201, "step": 4805 }, { "epoch": 0.5269211809169085, "grad_norm": 1.2265625, "learning_rate": 1.080412812244403e-05, "loss": 1.1319, "step": 4810 }, { "epoch": 0.5274689160322068, "grad_norm": 1.2734375, "learning_rate": 1.0785067509664231e-05, "loss": 1.1475, "step": 4815 }, { "epoch": 0.5280166511475051, "grad_norm": 1.2578125, "learning_rate": 1.0766004026556676e-05, "loss": 1.1697, "step": 4820 }, { "epoch": 0.5285643862628033, "grad_norm": 1.21875, "learning_rate": 1.0746937742820397e-05, "loss": 1.1328, "step": 4825 }, { "epoch": 0.5291121213781016, "grad_norm": 1.2109375, "learning_rate": 1.072786872816466e-05, "loss": 1.1438, "step": 4830 }, { "epoch": 0.5296598564933997, "grad_norm": 1.2265625, "learning_rate": 1.070879705230873e-05, "loss": 1.1648, "step": 4835 }, { "epoch": 0.530207591608698, "grad_norm": 1.1640625, "learning_rate": 1.0689722784981586e-05, "loss": 1.113, "step": 4840 }, { "epoch": 0.5307553267239963, "grad_norm": 1.2265625, "learning_rate": 1.0670645995921687e-05, "loss": 1.1427, "step": 4845 }, { "epoch": 0.5313030618392945, "grad_norm": 1.234375, "learning_rate": 1.0651566754876715e-05, "loss": 1.1771, "step": 4850 }, { "epoch": 0.5318507969545928, "grad_norm": 1.2109375, "learning_rate": 1.0632485131603313e-05, "loss": 1.1629, "step": 4855 }, { "epoch": 0.532398532069891, "grad_norm": 1.203125, "learning_rate": 1.0613401195866835e-05, "loss": 1.2022, "step": 4860 }, { "epoch": 0.5329462671851892, "grad_norm": 1.28125, "learning_rate": 1.059431501744109e-05, "loss": 1.1548, "step": 4865 }, { "epoch": 0.5334940023004875, "grad_norm": 1.25, "learning_rate": 1.0575226666108086e-05, "loss": 1.1427, "step": 4870 }, { "epoch": 0.5340417374157858, "grad_norm": 1.1875, "learning_rate": 1.0556136211657784e-05, "loss": 1.1265, "step": 4875 }, { "epoch": 0.534589472531084, "grad_norm": 1.203125, "learning_rate": 1.0537043723887811e-05, "loss": 1.1737, "step": 4880 }, { "epoch": 0.5351372076463822, "grad_norm": 1.1875, "learning_rate": 1.0517949272603257e-05, "loss": 1.081, "step": 4885 }, { "epoch": 0.5356849427616804, "grad_norm": 1.2734375, "learning_rate": 1.049885292761637e-05, "loss": 1.1054, "step": 4890 }, { "epoch": 0.5362326778769787, "grad_norm": 1.28125, "learning_rate": 1.0479754758746332e-05, "loss": 1.1456, "step": 4895 }, { "epoch": 0.536780412992277, "grad_norm": 1.2265625, "learning_rate": 1.0460654835818989e-05, "loss": 1.1402, "step": 4900 }, { "epoch": 0.5373281481075752, "grad_norm": 1.1953125, "learning_rate": 1.0441553228666603e-05, "loss": 1.1627, "step": 4905 }, { "epoch": 0.5378758832228734, "grad_norm": 1.2734375, "learning_rate": 1.0422450007127591e-05, "loss": 1.1311, "step": 4910 }, { "epoch": 0.5384236183381717, "grad_norm": 1.234375, "learning_rate": 1.0403345241046277e-05, "loss": 1.215, "step": 4915 }, { "epoch": 0.5389713534534699, "grad_norm": 1.1796875, "learning_rate": 1.0384239000272624e-05, "loss": 1.1007, "step": 4920 }, { "epoch": 0.5395190885687682, "grad_norm": 1.1953125, "learning_rate": 1.0365131354661995e-05, "loss": 1.0988, "step": 4925 }, { "epoch": 0.5400668236840663, "grad_norm": 1.203125, "learning_rate": 1.0346022374074885e-05, "loss": 1.1654, "step": 4930 }, { "epoch": 0.5406145587993646, "grad_norm": 1.1953125, "learning_rate": 1.032691212837667e-05, "loss": 1.1612, "step": 4935 }, { "epoch": 0.5411622939146629, "grad_norm": 1.3046875, "learning_rate": 1.0307800687437352e-05, "loss": 1.1963, "step": 4940 }, { "epoch": 0.5417100290299611, "grad_norm": 1.234375, "learning_rate": 1.0288688121131308e-05, "loss": 1.1222, "step": 4945 }, { "epoch": 0.5422577641452594, "grad_norm": 1.2734375, "learning_rate": 1.0269574499337016e-05, "loss": 1.1935, "step": 4950 }, { "epoch": 0.5428054992605575, "grad_norm": 1.234375, "learning_rate": 1.0250459891936831e-05, "loss": 1.1486, "step": 4955 }, { "epoch": 0.5433532343758558, "grad_norm": 1.1953125, "learning_rate": 1.0231344368816694e-05, "loss": 1.0891, "step": 4960 }, { "epoch": 0.5439009694911541, "grad_norm": 1.2421875, "learning_rate": 1.0212227999865905e-05, "loss": 1.1315, "step": 4965 }, { "epoch": 0.5444487046064523, "grad_norm": 1.1875, "learning_rate": 1.0193110854976859e-05, "loss": 1.1676, "step": 4970 }, { "epoch": 0.5449964397217506, "grad_norm": 1.25, "learning_rate": 1.0173993004044777e-05, "loss": 1.1696, "step": 4975 }, { "epoch": 0.5455441748370488, "grad_norm": 1.1953125, "learning_rate": 1.0154874516967466e-05, "loss": 1.1742, "step": 4980 }, { "epoch": 0.546091909952347, "grad_norm": 1.328125, "learning_rate": 1.0135755463645065e-05, "loss": 1.1852, "step": 4985 }, { "epoch": 0.5466396450676453, "grad_norm": 1.4296875, "learning_rate": 1.0116635913979778e-05, "loss": 1.164, "step": 4990 }, { "epoch": 0.5471873801829436, "grad_norm": 1.2734375, "learning_rate": 1.0097515937875619e-05, "loss": 1.1373, "step": 4995 }, { "epoch": 0.5477351152982418, "grad_norm": 1.203125, "learning_rate": 1.0078395605238168e-05, "loss": 1.1487, "step": 5000 }, { "epoch": 0.54828285041354, "grad_norm": 1.21875, "learning_rate": 1.0059274985974305e-05, "loss": 1.1163, "step": 5005 }, { "epoch": 0.5488305855288382, "grad_norm": 1.2265625, "learning_rate": 1.004015414999197e-05, "loss": 1.1109, "step": 5010 }, { "epoch": 0.5493783206441365, "grad_norm": 1.171875, "learning_rate": 1.002103316719987e-05, "loss": 1.1546, "step": 5015 }, { "epoch": 0.5499260557594348, "grad_norm": 1.3125, "learning_rate": 1.0001912107507273e-05, "loss": 1.1904, "step": 5020 }, { "epoch": 0.550473790874733, "grad_norm": 1.2421875, "learning_rate": 9.982791040823714e-06, "loss": 1.1877, "step": 5025 }, { "epoch": 0.5510215259900312, "grad_norm": 1.1875, "learning_rate": 9.963670037058764e-06, "loss": 1.1568, "step": 5030 }, { "epoch": 0.5515692611053294, "grad_norm": 1.234375, "learning_rate": 9.944549166121753e-06, "loss": 1.146, "step": 5035 }, { "epoch": 0.5521169962206277, "grad_norm": 1.1875, "learning_rate": 9.925428497921533e-06, "loss": 1.1435, "step": 5040 }, { "epoch": 0.552664731335926, "grad_norm": 1.1953125, "learning_rate": 9.906308102366216e-06, "loss": 1.1093, "step": 5045 }, { "epoch": 0.5532124664512242, "grad_norm": 1.25, "learning_rate": 9.887188049362906e-06, "loss": 1.1546, "step": 5050 }, { "epoch": 0.5537602015665224, "grad_norm": 1.1796875, "learning_rate": 9.86806840881747e-06, "loss": 1.1049, "step": 5055 }, { "epoch": 0.5543079366818207, "grad_norm": 1.2734375, "learning_rate": 9.848949250634253e-06, "loss": 1.1575, "step": 5060 }, { "epoch": 0.5548556717971189, "grad_norm": 1.21875, "learning_rate": 9.82983064471584e-06, "loss": 1.0936, "step": 5065 }, { "epoch": 0.5554034069124172, "grad_norm": 1.2265625, "learning_rate": 9.810712660962813e-06, "loss": 1.1147, "step": 5070 }, { "epoch": 0.5559511420277155, "grad_norm": 1.21875, "learning_rate": 9.791595369273454e-06, "loss": 1.1929, "step": 5075 }, { "epoch": 0.5564988771430136, "grad_norm": 1.25, "learning_rate": 9.772478839543526e-06, "loss": 1.1763, "step": 5080 }, { "epoch": 0.5570466122583119, "grad_norm": 1.2265625, "learning_rate": 9.753363141666017e-06, "loss": 1.1068, "step": 5085 }, { "epoch": 0.5575943473736101, "grad_norm": 1.2578125, "learning_rate": 9.734248345530854e-06, "loss": 1.1285, "step": 5090 }, { "epoch": 0.5581420824889084, "grad_norm": 1.2578125, "learning_rate": 9.715134521024675e-06, "loss": 1.1655, "step": 5095 }, { "epoch": 0.5586898176042066, "grad_norm": 1.296875, "learning_rate": 9.696021738030575e-06, "loss": 1.1562, "step": 5100 }, { "epoch": 0.5592375527195048, "grad_norm": 1.2265625, "learning_rate": 9.676910066427825e-06, "loss": 1.2125, "step": 5105 }, { "epoch": 0.5597852878348031, "grad_norm": 1.2421875, "learning_rate": 9.657799576091646e-06, "loss": 1.1052, "step": 5110 }, { "epoch": 0.5603330229501013, "grad_norm": 1.2265625, "learning_rate": 9.638690336892936e-06, "loss": 1.1403, "step": 5115 }, { "epoch": 0.5608807580653996, "grad_norm": 1.2265625, "learning_rate": 9.619582418698009e-06, "loss": 1.115, "step": 5120 }, { "epoch": 0.5614284931806978, "grad_norm": 1.234375, "learning_rate": 9.600475891368365e-06, "loss": 1.1525, "step": 5125 }, { "epoch": 0.561976228295996, "grad_norm": 1.25, "learning_rate": 9.58137082476041e-06, "loss": 1.1344, "step": 5130 }, { "epoch": 0.5625239634112943, "grad_norm": 1.28125, "learning_rate": 9.562267288725205e-06, "loss": 1.1938, "step": 5135 }, { "epoch": 0.5630716985265926, "grad_norm": 1.2421875, "learning_rate": 9.543165353108232e-06, "loss": 1.1521, "step": 5140 }, { "epoch": 0.5636194336418908, "grad_norm": 1.2265625, "learning_rate": 9.524065087749097e-06, "loss": 1.1257, "step": 5145 }, { "epoch": 0.564167168757189, "grad_norm": 1.265625, "learning_rate": 9.50496656248132e-06, "loss": 1.1702, "step": 5150 }, { "epoch": 0.5647149038724872, "grad_norm": 1.2578125, "learning_rate": 9.485869847132055e-06, "loss": 1.141, "step": 5155 }, { "epoch": 0.5652626389877855, "grad_norm": 1.2421875, "learning_rate": 9.466775011521825e-06, "loss": 1.1146, "step": 5160 }, { "epoch": 0.5658103741030838, "grad_norm": 1.1953125, "learning_rate": 9.447682125464299e-06, "loss": 1.1636, "step": 5165 }, { "epoch": 0.566358109218382, "grad_norm": 1.203125, "learning_rate": 9.42859125876601e-06, "loss": 1.1135, "step": 5170 }, { "epoch": 0.5669058443336802, "grad_norm": 1.2890625, "learning_rate": 9.409502481226098e-06, "loss": 1.1315, "step": 5175 }, { "epoch": 0.5674535794489785, "grad_norm": 1.203125, "learning_rate": 9.39041586263608e-06, "loss": 1.1671, "step": 5180 }, { "epoch": 0.5680013145642767, "grad_norm": 1.15625, "learning_rate": 9.37133147277958e-06, "loss": 1.1187, "step": 5185 }, { "epoch": 0.568549049679575, "grad_norm": 1.21875, "learning_rate": 9.35224938143206e-06, "loss": 1.1596, "step": 5190 }, { "epoch": 0.5690967847948732, "grad_norm": 1.21875, "learning_rate": 9.333169658360588e-06, "loss": 1.1323, "step": 5195 }, { "epoch": 0.5696445199101714, "grad_norm": 1.2109375, "learning_rate": 9.31409237332357e-06, "loss": 1.1726, "step": 5200 }, { "epoch": 0.5701922550254697, "grad_norm": 1.25, "learning_rate": 9.2950175960705e-06, "loss": 1.1918, "step": 5205 }, { "epoch": 0.5707399901407679, "grad_norm": 1.296875, "learning_rate": 9.275945396341704e-06, "loss": 1.1976, "step": 5210 }, { "epoch": 0.5712877252560662, "grad_norm": 1.265625, "learning_rate": 9.256875843868072e-06, "loss": 1.1623, "step": 5215 }, { "epoch": 0.5718354603713645, "grad_norm": 1.21875, "learning_rate": 9.237809008370836e-06, "loss": 1.1411, "step": 5220 }, { "epoch": 0.5723831954866626, "grad_norm": 1.203125, "learning_rate": 9.218744959561285e-06, "loss": 1.1414, "step": 5225 }, { "epoch": 0.5729309306019609, "grad_norm": 1.2265625, "learning_rate": 9.199683767140511e-06, "loss": 1.124, "step": 5230 }, { "epoch": 0.5734786657172591, "grad_norm": 1.25, "learning_rate": 9.18062550079917e-06, "loss": 1.1288, "step": 5235 }, { "epoch": 0.5740264008325574, "grad_norm": 1.25, "learning_rate": 9.161570230217228e-06, "loss": 1.179, "step": 5240 }, { "epoch": 0.5745741359478557, "grad_norm": 1.2265625, "learning_rate": 9.14251802506368e-06, "loss": 1.1691, "step": 5245 }, { "epoch": 0.5751218710631538, "grad_norm": 1.2265625, "learning_rate": 9.12346895499633e-06, "loss": 1.1064, "step": 5250 }, { "epoch": 0.5756696061784521, "grad_norm": 1.21875, "learning_rate": 9.104423089661512e-06, "loss": 1.154, "step": 5255 }, { "epoch": 0.5762173412937504, "grad_norm": 1.2734375, "learning_rate": 9.085380498693837e-06, "loss": 1.1888, "step": 5260 }, { "epoch": 0.5767650764090486, "grad_norm": 1.1953125, "learning_rate": 9.066341251715958e-06, "loss": 1.1586, "step": 5265 }, { "epoch": 0.5773128115243469, "grad_norm": 1.265625, "learning_rate": 9.047305418338295e-06, "loss": 1.1831, "step": 5270 }, { "epoch": 0.577860546639645, "grad_norm": 1.203125, "learning_rate": 9.028273068158782e-06, "loss": 1.2184, "step": 5275 }, { "epoch": 0.5784082817549433, "grad_norm": 1.265625, "learning_rate": 9.009244270762625e-06, "loss": 1.1374, "step": 5280 }, { "epoch": 0.5789560168702416, "grad_norm": 1.2265625, "learning_rate": 8.990219095722044e-06, "loss": 1.1599, "step": 5285 }, { "epoch": 0.5795037519855398, "grad_norm": 1.46875, "learning_rate": 8.971197612596006e-06, "loss": 1.0858, "step": 5290 }, { "epoch": 0.580051487100838, "grad_norm": 1.3125, "learning_rate": 8.952179890929982e-06, "loss": 1.1553, "step": 5295 }, { "epoch": 0.5805992222161362, "grad_norm": 1.21875, "learning_rate": 8.933166000255692e-06, "loss": 1.1676, "step": 5300 }, { "epoch": 0.5811469573314345, "grad_norm": 1.4921875, "learning_rate": 8.914156010090852e-06, "loss": 1.1262, "step": 5305 }, { "epoch": 0.5816946924467328, "grad_norm": 1.2421875, "learning_rate": 8.895149989938915e-06, "loss": 1.1667, "step": 5310 }, { "epoch": 0.582242427562031, "grad_norm": 1.2421875, "learning_rate": 8.876148009288813e-06, "loss": 1.1792, "step": 5315 }, { "epoch": 0.5827901626773292, "grad_norm": 1.265625, "learning_rate": 8.857150137614718e-06, "loss": 1.1894, "step": 5320 }, { "epoch": 0.5833378977926275, "grad_norm": 1.1875, "learning_rate": 8.83815644437578e-06, "loss": 1.148, "step": 5325 }, { "epoch": 0.5838856329079257, "grad_norm": 1.2265625, "learning_rate": 8.819166999015863e-06, "loss": 1.1339, "step": 5330 }, { "epoch": 0.584433368023224, "grad_norm": 1.1875, "learning_rate": 8.8001818709633e-06, "loss": 1.0615, "step": 5335 }, { "epoch": 0.5849811031385223, "grad_norm": 1.1875, "learning_rate": 8.781201129630655e-06, "loss": 1.1195, "step": 5340 }, { "epoch": 0.5855288382538204, "grad_norm": 1.1484375, "learning_rate": 8.762224844414433e-06, "loss": 1.1996, "step": 5345 }, { "epoch": 0.5860765733691187, "grad_norm": 1.203125, "learning_rate": 8.743253084694858e-06, "loss": 1.1773, "step": 5350 }, { "epoch": 0.5866243084844169, "grad_norm": 1.21875, "learning_rate": 8.724285919835611e-06, "loss": 1.1409, "step": 5355 }, { "epoch": 0.5871720435997152, "grad_norm": 1.1640625, "learning_rate": 8.70532341918356e-06, "loss": 1.1911, "step": 5360 }, { "epoch": 0.5877197787150135, "grad_norm": 1.2734375, "learning_rate": 8.686365652068536e-06, "loss": 1.1553, "step": 5365 }, { "epoch": 0.5882675138303116, "grad_norm": 1.1953125, "learning_rate": 8.667412687803054e-06, "loss": 1.1326, "step": 5370 }, { "epoch": 0.5888152489456099, "grad_norm": 1.21875, "learning_rate": 8.648464595682068e-06, "loss": 1.1896, "step": 5375 }, { "epoch": 0.5893629840609081, "grad_norm": 1.1953125, "learning_rate": 8.629521444982726e-06, "loss": 1.1159, "step": 5380 }, { "epoch": 0.5899107191762064, "grad_norm": 1.265625, "learning_rate": 8.610583304964105e-06, "loss": 1.1625, "step": 5385 }, { "epoch": 0.5904584542915047, "grad_norm": 1.234375, "learning_rate": 8.591650244866957e-06, "loss": 1.1655, "step": 5390 }, { "epoch": 0.5910061894068028, "grad_norm": 1.1875, "learning_rate": 8.572722333913473e-06, "loss": 1.1404, "step": 5395 }, { "epoch": 0.5915539245221011, "grad_norm": 1.203125, "learning_rate": 8.553799641307003e-06, "loss": 1.1366, "step": 5400 }, { "epoch": 0.5921016596373994, "grad_norm": 1.25, "learning_rate": 8.534882236231837e-06, "loss": 1.1413, "step": 5405 }, { "epoch": 0.5926493947526976, "grad_norm": 1.2421875, "learning_rate": 8.515970187852916e-06, "loss": 1.1671, "step": 5410 }, { "epoch": 0.5931971298679959, "grad_norm": 1.2265625, "learning_rate": 8.4970635653156e-06, "loss": 1.1465, "step": 5415 }, { "epoch": 0.593744864983294, "grad_norm": 1.2109375, "learning_rate": 8.478162437745418e-06, "loss": 1.1198, "step": 5420 }, { "epoch": 0.5942926000985923, "grad_norm": 1.25, "learning_rate": 8.459266874247805e-06, "loss": 1.1324, "step": 5425 }, { "epoch": 0.5948403352138906, "grad_norm": 1.15625, "learning_rate": 8.440376943907847e-06, "loss": 1.1416, "step": 5430 }, { "epoch": 0.5953880703291888, "grad_norm": 1.2421875, "learning_rate": 8.42149271579004e-06, "loss": 1.1581, "step": 5435 }, { "epoch": 0.5959358054444871, "grad_norm": 1.2265625, "learning_rate": 8.402614258938038e-06, "loss": 1.1468, "step": 5440 }, { "epoch": 0.5964835405597853, "grad_norm": 1.25, "learning_rate": 8.38374164237438e-06, "loss": 1.1138, "step": 5445 }, { "epoch": 0.5970312756750835, "grad_norm": 1.21875, "learning_rate": 8.36487493510026e-06, "loss": 1.1697, "step": 5450 }, { "epoch": 0.5975790107903818, "grad_norm": 1.2421875, "learning_rate": 8.346014206095272e-06, "loss": 1.1659, "step": 5455 }, { "epoch": 0.59812674590568, "grad_norm": 1.2265625, "learning_rate": 8.32715952431714e-06, "loss": 1.163, "step": 5460 }, { "epoch": 0.5986744810209782, "grad_norm": 1.28125, "learning_rate": 8.308310958701492e-06, "loss": 1.1482, "step": 5465 }, { "epoch": 0.5992222161362765, "grad_norm": 1.21875, "learning_rate": 8.289468578161581e-06, "loss": 1.1804, "step": 5470 }, { "epoch": 0.5997699512515747, "grad_norm": 1.2734375, "learning_rate": 8.270632451588053e-06, "loss": 1.143, "step": 5475 }, { "epoch": 0.600317686366873, "grad_norm": 1.1796875, "learning_rate": 8.251802647848696e-06, "loss": 1.1491, "step": 5480 }, { "epoch": 0.6008654214821713, "grad_norm": 1.28125, "learning_rate": 8.232979235788167e-06, "loss": 1.2016, "step": 5485 }, { "epoch": 0.6014131565974694, "grad_norm": 1.203125, "learning_rate": 8.214162284227758e-06, "loss": 1.1132, "step": 5490 }, { "epoch": 0.6019608917127677, "grad_norm": 1.28125, "learning_rate": 8.195351861965151e-06, "loss": 1.1075, "step": 5495 }, { "epoch": 0.6025086268280659, "grad_norm": 1.25, "learning_rate": 8.176548037774137e-06, "loss": 1.152, "step": 5500 }, { "epoch": 0.6030563619433642, "grad_norm": 1.234375, "learning_rate": 8.157750880404402e-06, "loss": 1.1574, "step": 5505 }, { "epoch": 0.6036040970586625, "grad_norm": 1.21875, "learning_rate": 8.13896045858125e-06, "loss": 1.1635, "step": 5510 }, { "epoch": 0.6041518321739606, "grad_norm": 1.203125, "learning_rate": 8.12017684100535e-06, "loss": 1.1093, "step": 5515 }, { "epoch": 0.6046995672892589, "grad_norm": 1.2421875, "learning_rate": 8.101400096352508e-06, "loss": 1.1926, "step": 5520 }, { "epoch": 0.6052473024045572, "grad_norm": 1.2578125, "learning_rate": 8.082630293273394e-06, "loss": 1.1289, "step": 5525 }, { "epoch": 0.6057950375198554, "grad_norm": 1.2578125, "learning_rate": 8.063867500393296e-06, "loss": 1.1551, "step": 5530 }, { "epoch": 0.6063427726351537, "grad_norm": 1.2421875, "learning_rate": 8.045111786311878e-06, "loss": 1.1592, "step": 5535 }, { "epoch": 0.6068905077504518, "grad_norm": 1.25, "learning_rate": 8.026363219602921e-06, "loss": 1.2031, "step": 5540 }, { "epoch": 0.6074382428657501, "grad_norm": 1.3046875, "learning_rate": 8.007621868814073e-06, "loss": 1.2304, "step": 5545 }, { "epoch": 0.6079859779810484, "grad_norm": 1.203125, "learning_rate": 7.9888878024666e-06, "loss": 1.1297, "step": 5550 }, { "epoch": 0.6085337130963466, "grad_norm": 1.25, "learning_rate": 7.970161089055127e-06, "loss": 1.1341, "step": 5555 }, { "epoch": 0.6090814482116449, "grad_norm": 1.171875, "learning_rate": 7.951441797047412e-06, "loss": 1.0772, "step": 5560 }, { "epoch": 0.609629183326943, "grad_norm": 1.2421875, "learning_rate": 7.93272999488407e-06, "loss": 1.141, "step": 5565 }, { "epoch": 0.6101769184422413, "grad_norm": 1.1953125, "learning_rate": 7.914025750978324e-06, "loss": 1.1046, "step": 5570 }, { "epoch": 0.6107246535575396, "grad_norm": 1.2578125, "learning_rate": 7.895329133715779e-06, "loss": 1.1575, "step": 5575 }, { "epoch": 0.6112723886728378, "grad_norm": 1.265625, "learning_rate": 7.876640211454148e-06, "loss": 1.1169, "step": 5580 }, { "epoch": 0.6118201237881361, "grad_norm": 1.28125, "learning_rate": 7.857959052523005e-06, "loss": 1.157, "step": 5585 }, { "epoch": 0.6123678589034343, "grad_norm": 1.2421875, "learning_rate": 7.839285725223545e-06, "loss": 1.1819, "step": 5590 }, { "epoch": 0.6129155940187325, "grad_norm": 1.1875, "learning_rate": 7.820620297828337e-06, "loss": 1.16, "step": 5595 }, { "epoch": 0.6134633291340308, "grad_norm": 1.1953125, "learning_rate": 7.801962838581051e-06, "loss": 1.1279, "step": 5600 }, { "epoch": 0.614011064249329, "grad_norm": 1.21875, "learning_rate": 7.783313415696231e-06, "loss": 1.1946, "step": 5605 }, { "epoch": 0.6145587993646273, "grad_norm": 1.2734375, "learning_rate": 7.76467209735905e-06, "loss": 1.1686, "step": 5610 }, { "epoch": 0.6151065344799255, "grad_norm": 1.21875, "learning_rate": 7.74603895172503e-06, "loss": 1.206, "step": 5615 }, { "epoch": 0.6156542695952237, "grad_norm": 1.28125, "learning_rate": 7.727414046919825e-06, "loss": 1.1534, "step": 5620 }, { "epoch": 0.616202004710522, "grad_norm": 1.2421875, "learning_rate": 7.70879745103896e-06, "loss": 1.1479, "step": 5625 }, { "epoch": 0.6167497398258203, "grad_norm": 1.203125, "learning_rate": 7.690189232147566e-06, "loss": 1.2199, "step": 5630 }, { "epoch": 0.6172974749411185, "grad_norm": 1.2265625, "learning_rate": 7.671589458280172e-06, "loss": 1.1312, "step": 5635 }, { "epoch": 0.6178452100564167, "grad_norm": 1.234375, "learning_rate": 7.65299819744041e-06, "loss": 1.1763, "step": 5640 }, { "epoch": 0.618392945171715, "grad_norm": 1.2265625, "learning_rate": 7.634415517600789e-06, "loss": 1.1124, "step": 5645 }, { "epoch": 0.6189406802870132, "grad_norm": 1.2421875, "learning_rate": 7.61584148670246e-06, "loss": 1.1387, "step": 5650 }, { "epoch": 0.6194884154023115, "grad_norm": 1.171875, "learning_rate": 7.59727617265493e-06, "loss": 1.1751, "step": 5655 }, { "epoch": 0.6200361505176096, "grad_norm": 1.2734375, "learning_rate": 7.578719643335854e-06, "loss": 1.1762, "step": 5660 }, { "epoch": 0.6205838856329079, "grad_norm": 1.2890625, "learning_rate": 7.560171966590762e-06, "loss": 1.1825, "step": 5665 }, { "epoch": 0.6211316207482062, "grad_norm": 1.2578125, "learning_rate": 7.541633210232812e-06, "loss": 1.161, "step": 5670 }, { "epoch": 0.6216793558635044, "grad_norm": 1.1875, "learning_rate": 7.523103442042556e-06, "loss": 1.097, "step": 5675 }, { "epoch": 0.6222270909788027, "grad_norm": 1.234375, "learning_rate": 7.504582729767687e-06, "loss": 1.1358, "step": 5680 }, { "epoch": 0.6227748260941008, "grad_norm": 1.28125, "learning_rate": 7.486071141122774e-06, "loss": 1.2019, "step": 5685 }, { "epoch": 0.6233225612093991, "grad_norm": 1.21875, "learning_rate": 7.4675687437890375e-06, "loss": 1.1657, "step": 5690 }, { "epoch": 0.6238702963246974, "grad_norm": 1.1953125, "learning_rate": 7.4490756054141e-06, "loss": 1.1705, "step": 5695 }, { "epoch": 0.6244180314399956, "grad_norm": 1.2109375, "learning_rate": 7.430591793611715e-06, "loss": 1.1294, "step": 5700 }, { "epoch": 0.6249657665552939, "grad_norm": 1.234375, "learning_rate": 7.412117375961554e-06, "loss": 1.1325, "step": 5705 }, { "epoch": 0.625513501670592, "grad_norm": 1.203125, "learning_rate": 7.393652420008923e-06, "loss": 1.1276, "step": 5710 }, { "epoch": 0.6260612367858903, "grad_norm": 1.1875, "learning_rate": 7.375196993264555e-06, "loss": 1.1718, "step": 5715 }, { "epoch": 0.6266089719011886, "grad_norm": 1.203125, "learning_rate": 7.356751163204332e-06, "loss": 1.11, "step": 5720 }, { "epoch": 0.6271567070164868, "grad_norm": 1.265625, "learning_rate": 7.33831499726905e-06, "loss": 1.1356, "step": 5725 }, { "epoch": 0.6277044421317851, "grad_norm": 1.1796875, "learning_rate": 7.319888562864165e-06, "loss": 1.1446, "step": 5730 }, { "epoch": 0.6282521772470833, "grad_norm": 1.1953125, "learning_rate": 7.301471927359571e-06, "loss": 1.1267, "step": 5735 }, { "epoch": 0.6287999123623815, "grad_norm": 1.3359375, "learning_rate": 7.283065158089315e-06, "loss": 1.1637, "step": 5740 }, { "epoch": 0.6293476474776798, "grad_norm": 1.3046875, "learning_rate": 7.264668322351385e-06, "loss": 1.1372, "step": 5745 }, { "epoch": 0.6298953825929781, "grad_norm": 1.203125, "learning_rate": 7.24628148740745e-06, "loss": 1.1375, "step": 5750 }, { "epoch": 0.6304431177082763, "grad_norm": 1.265625, "learning_rate": 7.227904720482601e-06, "loss": 1.117, "step": 5755 }, { "epoch": 0.6309908528235745, "grad_norm": 1.203125, "learning_rate": 7.20953808876514e-06, "loss": 1.1562, "step": 5760 }, { "epoch": 0.6315385879388727, "grad_norm": 1.234375, "learning_rate": 7.191181659406297e-06, "loss": 1.12, "step": 5765 }, { "epoch": 0.632086323054171, "grad_norm": 1.265625, "learning_rate": 7.172835499520002e-06, "loss": 1.1717, "step": 5770 }, { "epoch": 0.6326340581694693, "grad_norm": 1.203125, "learning_rate": 7.154499676182648e-06, "loss": 1.1618, "step": 5775 }, { "epoch": 0.6331817932847675, "grad_norm": 1.265625, "learning_rate": 7.136174256432828e-06, "loss": 1.1518, "step": 5780 }, { "epoch": 0.6337295284000657, "grad_norm": 1.3046875, "learning_rate": 7.117859307271095e-06, "loss": 1.1344, "step": 5785 }, { "epoch": 0.634277263515364, "grad_norm": 1.2265625, "learning_rate": 7.099554895659734e-06, "loss": 1.1029, "step": 5790 }, { "epoch": 0.6348249986306622, "grad_norm": 1.2578125, "learning_rate": 7.081261088522482e-06, "loss": 1.1324, "step": 5795 }, { "epoch": 0.6353727337459605, "grad_norm": 1.2265625, "learning_rate": 7.062977952744326e-06, "loss": 1.0928, "step": 5800 }, { "epoch": 0.6359204688612587, "grad_norm": 1.234375, "learning_rate": 7.044705555171223e-06, "loss": 1.1352, "step": 5805 }, { "epoch": 0.6364682039765569, "grad_norm": 1.1953125, "learning_rate": 7.02644396260987e-06, "loss": 1.1719, "step": 5810 }, { "epoch": 0.6370159390918552, "grad_norm": 1.2265625, "learning_rate": 7.008193241827468e-06, "loss": 1.1325, "step": 5815 }, { "epoch": 0.6375636742071534, "grad_norm": 1.203125, "learning_rate": 6.989953459551463e-06, "loss": 1.1598, "step": 5820 }, { "epoch": 0.6381114093224517, "grad_norm": 1.28125, "learning_rate": 6.971724682469303e-06, "loss": 1.1432, "step": 5825 }, { "epoch": 0.63865914443775, "grad_norm": 1.1953125, "learning_rate": 6.953506977228211e-06, "loss": 1.1366, "step": 5830 }, { "epoch": 0.6392068795530481, "grad_norm": 1.1953125, "learning_rate": 6.935300410434927e-06, "loss": 1.1742, "step": 5835 }, { "epoch": 0.6397546146683464, "grad_norm": 1.28125, "learning_rate": 6.917105048655454e-06, "loss": 1.2324, "step": 5840 }, { "epoch": 0.6403023497836446, "grad_norm": 1.2421875, "learning_rate": 6.898920958414843e-06, "loss": 1.1062, "step": 5845 }, { "epoch": 0.6408500848989429, "grad_norm": 1.2421875, "learning_rate": 6.880748206196934e-06, "loss": 1.1941, "step": 5850 }, { "epoch": 0.6413978200142411, "grad_norm": 1.171875, "learning_rate": 6.862586858444102e-06, "loss": 1.1457, "step": 5855 }, { "epoch": 0.6419455551295393, "grad_norm": 1.203125, "learning_rate": 6.84443698155703e-06, "loss": 1.1622, "step": 5860 }, { "epoch": 0.6424932902448376, "grad_norm": 1.2421875, "learning_rate": 6.826298641894473e-06, "loss": 1.1634, "step": 5865 }, { "epoch": 0.6430410253601359, "grad_norm": 1.234375, "learning_rate": 6.808171905772986e-06, "loss": 1.1232, "step": 5870 }, { "epoch": 0.6435887604754341, "grad_norm": 1.1953125, "learning_rate": 6.790056839466715e-06, "loss": 1.164, "step": 5875 }, { "epoch": 0.6441364955907323, "grad_norm": 1.21875, "learning_rate": 6.7719535092071296e-06, "loss": 1.1156, "step": 5880 }, { "epoch": 0.6446842307060305, "grad_norm": 1.25, "learning_rate": 6.753861981182793e-06, "loss": 1.1748, "step": 5885 }, { "epoch": 0.6452319658213288, "grad_norm": 1.25, "learning_rate": 6.735782321539125e-06, "loss": 1.105, "step": 5890 }, { "epoch": 0.6457797009366271, "grad_norm": 1.2734375, "learning_rate": 6.717714596378138e-06, "loss": 1.1865, "step": 5895 }, { "epoch": 0.6463274360519253, "grad_norm": 1.140625, "learning_rate": 6.699658871758223e-06, "loss": 1.1225, "step": 5900 }, { "epoch": 0.6468751711672235, "grad_norm": 1.296875, "learning_rate": 6.681615213693895e-06, "loss": 1.0707, "step": 5905 }, { "epoch": 0.6474229062825217, "grad_norm": 1.2109375, "learning_rate": 6.6635836881555374e-06, "loss": 1.1432, "step": 5910 }, { "epoch": 0.64797064139782, "grad_norm": 1.21875, "learning_rate": 6.645564361069193e-06, "loss": 1.1497, "step": 5915 }, { "epoch": 0.6485183765131183, "grad_norm": 1.203125, "learning_rate": 6.627557298316296e-06, "loss": 1.1894, "step": 5920 }, { "epoch": 0.6490661116284165, "grad_norm": 1.2890625, "learning_rate": 6.609562565733438e-06, "loss": 1.1338, "step": 5925 }, { "epoch": 0.6496138467437147, "grad_norm": 1.2109375, "learning_rate": 6.591580229112135e-06, "loss": 1.0914, "step": 5930 }, { "epoch": 0.650161581859013, "grad_norm": 1.1875, "learning_rate": 6.573610354198587e-06, "loss": 1.1414, "step": 5935 }, { "epoch": 0.6507093169743112, "grad_norm": 1.265625, "learning_rate": 6.555653006693417e-06, "loss": 1.1661, "step": 5940 }, { "epoch": 0.6512570520896095, "grad_norm": 1.1875, "learning_rate": 6.537708252251454e-06, "loss": 1.2106, "step": 5945 }, { "epoch": 0.6518047872049078, "grad_norm": 1.2421875, "learning_rate": 6.519776156481492e-06, "loss": 1.127, "step": 5950 }, { "epoch": 0.6523525223202059, "grad_norm": 1.1953125, "learning_rate": 6.501856784946027e-06, "loss": 1.1524, "step": 5955 }, { "epoch": 0.6529002574355042, "grad_norm": 1.25, "learning_rate": 6.483950203161047e-06, "loss": 1.1249, "step": 5960 }, { "epoch": 0.6534479925508024, "grad_norm": 1.2265625, "learning_rate": 6.466056476595768e-06, "loss": 1.1287, "step": 5965 }, { "epoch": 0.6539957276661007, "grad_norm": 1.21875, "learning_rate": 6.448175670672413e-06, "loss": 1.1386, "step": 5970 }, { "epoch": 0.654543462781399, "grad_norm": 1.2421875, "learning_rate": 6.43030785076597e-06, "loss": 1.21, "step": 5975 }, { "epoch": 0.6550911978966971, "grad_norm": 1.6484375, "learning_rate": 6.412453082203933e-06, "loss": 1.1321, "step": 5980 }, { "epoch": 0.6556389330119954, "grad_norm": 1.25, "learning_rate": 6.394611430266087e-06, "loss": 1.1684, "step": 5985 }, { "epoch": 0.6561866681272936, "grad_norm": 1.234375, "learning_rate": 6.376782960184266e-06, "loss": 1.1898, "step": 5990 }, { "epoch": 0.6567344032425919, "grad_norm": 1.2421875, "learning_rate": 6.358967737142096e-06, "loss": 1.1738, "step": 5995 }, { "epoch": 0.6572821383578902, "grad_norm": 1.28125, "learning_rate": 6.34116582627478e-06, "loss": 1.1621, "step": 6000 }, { "epoch": 0.6578298734731883, "grad_norm": 1.2265625, "learning_rate": 6.323377292668848e-06, "loss": 1.1685, "step": 6005 }, { "epoch": 0.6583776085884866, "grad_norm": 1.2578125, "learning_rate": 6.305602201361915e-06, "loss": 1.1973, "step": 6010 }, { "epoch": 0.6589253437037849, "grad_norm": 1.1875, "learning_rate": 6.2878406173424576e-06, "loss": 1.0964, "step": 6015 }, { "epoch": 0.6594730788190831, "grad_norm": 1.1875, "learning_rate": 6.270092605549564e-06, "loss": 1.1605, "step": 6020 }, { "epoch": 0.6600208139343813, "grad_norm": 1.2265625, "learning_rate": 6.25235823087269e-06, "loss": 1.1705, "step": 6025 }, { "epoch": 0.6605685490496795, "grad_norm": 1.234375, "learning_rate": 6.2346375581514525e-06, "loss": 1.1656, "step": 6030 }, { "epoch": 0.6611162841649778, "grad_norm": 1.2421875, "learning_rate": 6.216930652175358e-06, "loss": 1.1893, "step": 6035 }, { "epoch": 0.6616640192802761, "grad_norm": 1.21875, "learning_rate": 6.199237577683577e-06, "loss": 1.1163, "step": 6040 }, { "epoch": 0.6622117543955743, "grad_norm": 1.1875, "learning_rate": 6.181558399364722e-06, "loss": 1.1637, "step": 6045 }, { "epoch": 0.6627594895108725, "grad_norm": 1.3359375, "learning_rate": 6.163893181856586e-06, "loss": 1.2075, "step": 6050 }, { "epoch": 0.6633072246261708, "grad_norm": 1.2109375, "learning_rate": 6.146241989745932e-06, "loss": 1.1517, "step": 6055 }, { "epoch": 0.663854959741469, "grad_norm": 1.2109375, "learning_rate": 6.128604887568237e-06, "loss": 1.113, "step": 6060 }, { "epoch": 0.6644026948567673, "grad_norm": 1.203125, "learning_rate": 6.1109819398074564e-06, "loss": 1.1239, "step": 6065 }, { "epoch": 0.6649504299720655, "grad_norm": 1.2734375, "learning_rate": 6.09337321089581e-06, "loss": 1.1289, "step": 6070 }, { "epoch": 0.6654981650873637, "grad_norm": 1.203125, "learning_rate": 6.0757787652135195e-06, "loss": 1.1207, "step": 6075 }, { "epoch": 0.666045900202662, "grad_norm": 1.2109375, "learning_rate": 6.05819866708859e-06, "loss": 1.1825, "step": 6080 }, { "epoch": 0.6665936353179602, "grad_norm": 1.2109375, "learning_rate": 6.040632980796566e-06, "loss": 1.1496, "step": 6085 }, { "epoch": 0.6671413704332585, "grad_norm": 1.1796875, "learning_rate": 6.023081770560307e-06, "loss": 1.1455, "step": 6090 }, { "epoch": 0.6676891055485568, "grad_norm": 1.25, "learning_rate": 6.005545100549739e-06, "loss": 1.1437, "step": 6095 }, { "epoch": 0.6682368406638549, "grad_norm": 1.2109375, "learning_rate": 5.9880230348816245e-06, "loss": 1.1507, "step": 6100 }, { "epoch": 0.6687845757791532, "grad_norm": 1.1875, "learning_rate": 5.970515637619346e-06, "loss": 1.096, "step": 6105 }, { "epoch": 0.6693323108944514, "grad_norm": 1.1796875, "learning_rate": 5.953022972772633e-06, "loss": 1.1749, "step": 6110 }, { "epoch": 0.6698800460097497, "grad_norm": 1.203125, "learning_rate": 5.935545104297373e-06, "loss": 1.1504, "step": 6115 }, { "epoch": 0.670427781125048, "grad_norm": 1.2265625, "learning_rate": 5.918082096095339e-06, "loss": 1.1512, "step": 6120 }, { "epoch": 0.6709755162403461, "grad_norm": 1.203125, "learning_rate": 5.900634012013977e-06, "loss": 1.1532, "step": 6125 }, { "epoch": 0.6715232513556444, "grad_norm": 1.2109375, "learning_rate": 5.883200915846181e-06, "loss": 1.0785, "step": 6130 }, { "epoch": 0.6720709864709427, "grad_norm": 1.15625, "learning_rate": 5.865782871330026e-06, "loss": 1.1757, "step": 6135 }, { "epoch": 0.6726187215862409, "grad_norm": 1.2734375, "learning_rate": 5.848379942148568e-06, "loss": 1.2096, "step": 6140 }, { "epoch": 0.6731664567015392, "grad_norm": 1.203125, "learning_rate": 5.830992191929603e-06, "loss": 1.1297, "step": 6145 }, { "epoch": 0.6737141918168373, "grad_norm": 1.1875, "learning_rate": 5.813619684245413e-06, "loss": 1.1174, "step": 6150 }, { "epoch": 0.6742619269321356, "grad_norm": 1.25, "learning_rate": 5.7962624826125725e-06, "loss": 1.1527, "step": 6155 }, { "epoch": 0.6748096620474339, "grad_norm": 1.28125, "learning_rate": 5.7789206504916815e-06, "loss": 1.1952, "step": 6160 }, { "epoch": 0.6753573971627321, "grad_norm": 1.2265625, "learning_rate": 5.761594251287149e-06, "loss": 1.1212, "step": 6165 }, { "epoch": 0.6759051322780304, "grad_norm": 1.1796875, "learning_rate": 5.74428334834696e-06, "loss": 1.142, "step": 6170 }, { "epoch": 0.6764528673933285, "grad_norm": 1.2109375, "learning_rate": 5.726988004962443e-06, "loss": 1.137, "step": 6175 }, { "epoch": 0.6770006025086268, "grad_norm": 1.265625, "learning_rate": 5.709708284368038e-06, "loss": 1.1304, "step": 6180 }, { "epoch": 0.6775483376239251, "grad_norm": 1.171875, "learning_rate": 5.692444249741064e-06, "loss": 1.101, "step": 6185 }, { "epoch": 0.6780960727392233, "grad_norm": 1.2109375, "learning_rate": 5.675195964201492e-06, "loss": 1.1385, "step": 6190 }, { "epoch": 0.6786438078545216, "grad_norm": 1.2578125, "learning_rate": 5.65796349081171e-06, "loss": 1.1179, "step": 6195 }, { "epoch": 0.6791915429698198, "grad_norm": 1.2109375, "learning_rate": 5.640746892576296e-06, "loss": 1.1782, "step": 6200 }, { "epoch": 0.679739278085118, "grad_norm": 1.234375, "learning_rate": 5.623546232441786e-06, "loss": 1.1495, "step": 6205 }, { "epoch": 0.6802870132004163, "grad_norm": 1.2109375, "learning_rate": 5.6063615732964425e-06, "loss": 1.1382, "step": 6210 }, { "epoch": 0.6808347483157146, "grad_norm": 1.234375, "learning_rate": 5.589192977970028e-06, "loss": 1.0981, "step": 6215 }, { "epoch": 0.6813824834310127, "grad_norm": 1.2109375, "learning_rate": 5.57204050923357e-06, "loss": 1.1724, "step": 6220 }, { "epoch": 0.681930218546311, "grad_norm": 1.2265625, "learning_rate": 5.5549042297991366e-06, "loss": 1.1736, "step": 6225 }, { "epoch": 0.6824779536616092, "grad_norm": 1.25, "learning_rate": 5.537784202319607e-06, "loss": 1.1174, "step": 6230 }, { "epoch": 0.6830256887769075, "grad_norm": 1.2421875, "learning_rate": 5.520680489388437e-06, "loss": 1.1457, "step": 6235 }, { "epoch": 0.6835734238922058, "grad_norm": 1.234375, "learning_rate": 5.503593153539437e-06, "loss": 1.1336, "step": 6240 }, { "epoch": 0.6841211590075039, "grad_norm": 1.2890625, "learning_rate": 5.486522257246538e-06, "loss": 1.2187, "step": 6245 }, { "epoch": 0.6846688941228022, "grad_norm": 1.2890625, "learning_rate": 5.4694678629235696e-06, "loss": 1.1642, "step": 6250 }, { "epoch": 0.6852166292381004, "grad_norm": 1.234375, "learning_rate": 5.452430032924017e-06, "loss": 1.1529, "step": 6255 }, { "epoch": 0.6857643643533987, "grad_norm": 1.21875, "learning_rate": 5.4354088295408265e-06, "loss": 1.1982, "step": 6260 }, { "epoch": 0.686312099468697, "grad_norm": 1.2890625, "learning_rate": 5.418404315006125e-06, "loss": 1.0957, "step": 6265 }, { "epoch": 0.6868598345839951, "grad_norm": 1.2421875, "learning_rate": 5.401416551491039e-06, "loss": 1.1719, "step": 6270 }, { "epoch": 0.6874075696992934, "grad_norm": 1.1953125, "learning_rate": 5.384445601105462e-06, "loss": 1.0921, "step": 6275 }, { "epoch": 0.6879553048145917, "grad_norm": 1.25, "learning_rate": 5.367491525897787e-06, "loss": 1.1153, "step": 6280 }, { "epoch": 0.6885030399298899, "grad_norm": 1.2578125, "learning_rate": 5.3505543878547315e-06, "loss": 1.1524, "step": 6285 }, { "epoch": 0.6890507750451882, "grad_norm": 1.2421875, "learning_rate": 5.33363424890108e-06, "loss": 1.1685, "step": 6290 }, { "epoch": 0.6895985101604863, "grad_norm": 1.2421875, "learning_rate": 5.316731170899467e-06, "loss": 1.1593, "step": 6295 }, { "epoch": 0.6901462452757846, "grad_norm": 1.328125, "learning_rate": 5.299845215650157e-06, "loss": 1.1513, "step": 6300 }, { "epoch": 0.6906939803910829, "grad_norm": 1.21875, "learning_rate": 5.282976444890793e-06, "loss": 1.1787, "step": 6305 }, { "epoch": 0.6912417155063811, "grad_norm": 1.2734375, "learning_rate": 5.2661249202962e-06, "loss": 1.125, "step": 6310 }, { "epoch": 0.6917894506216794, "grad_norm": 1.2265625, "learning_rate": 5.249290703478158e-06, "loss": 1.1662, "step": 6315 }, { "epoch": 0.6923371857369776, "grad_norm": 1.2109375, "learning_rate": 5.232473855985149e-06, "loss": 1.1612, "step": 6320 }, { "epoch": 0.6928849208522758, "grad_norm": 1.234375, "learning_rate": 5.215674439302155e-06, "loss": 1.1552, "step": 6325 }, { "epoch": 0.6934326559675741, "grad_norm": 1.234375, "learning_rate": 5.198892514850444e-06, "loss": 1.1097, "step": 6330 }, { "epoch": 0.6939803910828723, "grad_norm": 1.203125, "learning_rate": 5.182128143987302e-06, "loss": 1.1337, "step": 6335 }, { "epoch": 0.6945281261981706, "grad_norm": 1.1953125, "learning_rate": 5.165381388005861e-06, "loss": 1.148, "step": 6340 }, { "epoch": 0.6950758613134688, "grad_norm": 1.34375, "learning_rate": 5.148652308134844e-06, "loss": 1.2329, "step": 6345 }, { "epoch": 0.695623596428767, "grad_norm": 1.328125, "learning_rate": 5.131940965538329e-06, "loss": 1.1538, "step": 6350 }, { "epoch": 0.6961713315440653, "grad_norm": 1.234375, "learning_rate": 5.115247421315572e-06, "loss": 1.2042, "step": 6355 }, { "epoch": 0.6967190666593636, "grad_norm": 1.4296875, "learning_rate": 5.098571736500744e-06, "loss": 1.1985, "step": 6360 }, { "epoch": 0.6972668017746618, "grad_norm": 1.1953125, "learning_rate": 5.081913972062704e-06, "loss": 1.1183, "step": 6365 }, { "epoch": 0.69781453688996, "grad_norm": 1.1875, "learning_rate": 5.0652741889048205e-06, "loss": 1.179, "step": 6370 }, { "epoch": 0.6983622720052582, "grad_norm": 1.21875, "learning_rate": 5.048652447864689e-06, "loss": 1.1346, "step": 6375 }, { "epoch": 0.6989100071205565, "grad_norm": 1.1875, "learning_rate": 5.032048809713967e-06, "loss": 1.1022, "step": 6380 }, { "epoch": 0.6994577422358548, "grad_norm": 1.21875, "learning_rate": 5.015463335158114e-06, "loss": 1.179, "step": 6385 }, { "epoch": 0.7000054773511529, "grad_norm": 1.265625, "learning_rate": 4.998896084836167e-06, "loss": 1.1692, "step": 6390 }, { "epoch": 0.7005532124664512, "grad_norm": 1.2265625, "learning_rate": 4.982347119320557e-06, "loss": 1.1716, "step": 6395 }, { "epoch": 0.7011009475817495, "grad_norm": 1.203125, "learning_rate": 4.965816499116849e-06, "loss": 1.1171, "step": 6400 }, { "epoch": 0.7016486826970477, "grad_norm": 1.2734375, "learning_rate": 4.949304284663535e-06, "loss": 1.1577, "step": 6405 }, { "epoch": 0.702196417812346, "grad_norm": 1.1640625, "learning_rate": 4.932810536331817e-06, "loss": 1.1562, "step": 6410 }, { "epoch": 0.7027441529276441, "grad_norm": 1.171875, "learning_rate": 4.916335314425376e-06, "loss": 1.1456, "step": 6415 }, { "epoch": 0.7032918880429424, "grad_norm": 1.1875, "learning_rate": 4.899878679180167e-06, "loss": 1.1799, "step": 6420 }, { "epoch": 0.7038396231582407, "grad_norm": 1.25, "learning_rate": 4.8834406907641784e-06, "loss": 1.1339, "step": 6425 }, { "epoch": 0.7043873582735389, "grad_norm": 1.1796875, "learning_rate": 4.8670214092772316e-06, "loss": 1.1551, "step": 6430 }, { "epoch": 0.7049350933888372, "grad_norm": 1.234375, "learning_rate": 4.850620894750746e-06, "loss": 1.1504, "step": 6435 }, { "epoch": 0.7054828285041354, "grad_norm": 1.234375, "learning_rate": 4.834239207147532e-06, "loss": 1.2009, "step": 6440 }, { "epoch": 0.7060305636194336, "grad_norm": 1.2578125, "learning_rate": 4.817876406361561e-06, "loss": 1.1192, "step": 6445 }, { "epoch": 0.7065782987347319, "grad_norm": 1.2265625, "learning_rate": 4.801532552217756e-06, "loss": 1.1502, "step": 6450 }, { "epoch": 0.7071260338500301, "grad_norm": 1.1953125, "learning_rate": 4.785207704471763e-06, "loss": 1.142, "step": 6455 }, { "epoch": 0.7076737689653284, "grad_norm": 1.203125, "learning_rate": 4.76890192280974e-06, "loss": 1.1281, "step": 6460 }, { "epoch": 0.7082215040806266, "grad_norm": 1.203125, "learning_rate": 4.7526152668481385e-06, "loss": 1.1053, "step": 6465 }, { "epoch": 0.7087692391959248, "grad_norm": 1.2109375, "learning_rate": 4.736347796133481e-06, "loss": 1.163, "step": 6470 }, { "epoch": 0.7093169743112231, "grad_norm": 1.2421875, "learning_rate": 4.7200995701421455e-06, "loss": 1.2034, "step": 6475 }, { "epoch": 0.7098647094265214, "grad_norm": 1.234375, "learning_rate": 4.703870648280151e-06, "loss": 1.1658, "step": 6480 }, { "epoch": 0.7104124445418196, "grad_norm": 1.2109375, "learning_rate": 4.687661089882934e-06, "loss": 1.1676, "step": 6485 }, { "epoch": 0.7109601796571178, "grad_norm": 1.203125, "learning_rate": 4.671470954215139e-06, "loss": 1.1919, "step": 6490 }, { "epoch": 0.711507914772416, "grad_norm": 1.2109375, "learning_rate": 4.655300300470395e-06, "loss": 1.1288, "step": 6495 }, { "epoch": 0.7120556498877143, "grad_norm": 1.21875, "learning_rate": 4.639149187771102e-06, "loss": 1.1863, "step": 6500 }, { "epoch": 0.7126033850030126, "grad_norm": 1.2578125, "learning_rate": 4.6230176751682185e-06, "loss": 1.1942, "step": 6505 }, { "epoch": 0.7131511201183108, "grad_norm": 1.2890625, "learning_rate": 4.606905821641036e-06, "loss": 1.1683, "step": 6510 }, { "epoch": 0.713698855233609, "grad_norm": 1.2421875, "learning_rate": 4.590813686096981e-06, "loss": 1.0678, "step": 6515 }, { "epoch": 0.7142465903489073, "grad_norm": 1.203125, "learning_rate": 4.5747413273713715e-06, "loss": 1.1841, "step": 6520 }, { "epoch": 0.7147943254642055, "grad_norm": 1.2578125, "learning_rate": 4.558688804227229e-06, "loss": 1.1208, "step": 6525 }, { "epoch": 0.7153420605795038, "grad_norm": 1.296875, "learning_rate": 4.542656175355054e-06, "loss": 1.1631, "step": 6530 }, { "epoch": 0.715889795694802, "grad_norm": 1.234375, "learning_rate": 4.526643499372604e-06, "loss": 1.1604, "step": 6535 }, { "epoch": 0.7164375308101002, "grad_norm": 1.328125, "learning_rate": 4.510650834824692e-06, "loss": 1.1719, "step": 6540 }, { "epoch": 0.7169852659253985, "grad_norm": 1.2421875, "learning_rate": 4.49467824018296e-06, "loss": 1.1601, "step": 6545 }, { "epoch": 0.7175330010406967, "grad_norm": 1.2734375, "learning_rate": 4.4787257738456745e-06, "loss": 1.1557, "step": 6550 }, { "epoch": 0.718080736155995, "grad_norm": 1.1796875, "learning_rate": 4.4627934941375185e-06, "loss": 1.102, "step": 6555 }, { "epoch": 0.7186284712712933, "grad_norm": 1.21875, "learning_rate": 4.446881459309351e-06, "loss": 1.0873, "step": 6560 }, { "epoch": 0.7191762063865914, "grad_norm": 1.2421875, "learning_rate": 4.430989727538023e-06, "loss": 1.1641, "step": 6565 }, { "epoch": 0.7197239415018897, "grad_norm": 1.234375, "learning_rate": 4.415118356926163e-06, "loss": 1.1565, "step": 6570 }, { "epoch": 0.7202716766171879, "grad_norm": 1.2265625, "learning_rate": 4.399267405501938e-06, "loss": 1.174, "step": 6575 }, { "epoch": 0.7208194117324862, "grad_norm": 1.2734375, "learning_rate": 4.383436931218867e-06, "loss": 1.1657, "step": 6580 }, { "epoch": 0.7213671468477844, "grad_norm": 1.265625, "learning_rate": 4.367626991955615e-06, "loss": 1.1566, "step": 6585 }, { "epoch": 0.7219148819630826, "grad_norm": 1.2578125, "learning_rate": 4.351837645515739e-06, "loss": 1.1382, "step": 6590 }, { "epoch": 0.7224626170783809, "grad_norm": 1.234375, "learning_rate": 4.336068949627534e-06, "loss": 1.1729, "step": 6595 }, { "epoch": 0.7230103521936792, "grad_norm": 1.3046875, "learning_rate": 4.320320961943781e-06, "loss": 1.1313, "step": 6600 }, { "epoch": 0.7235580873089774, "grad_norm": 1.203125, "learning_rate": 4.304593740041539e-06, "loss": 1.1615, "step": 6605 }, { "epoch": 0.7241058224242756, "grad_norm": 1.21875, "learning_rate": 4.2888873414219675e-06, "loss": 1.1193, "step": 6610 }, { "epoch": 0.7246535575395738, "grad_norm": 1.234375, "learning_rate": 4.273201823510066e-06, "loss": 1.1548, "step": 6615 }, { "epoch": 0.7252012926548721, "grad_norm": 1.25, "learning_rate": 4.2575372436545125e-06, "loss": 1.1503, "step": 6620 }, { "epoch": 0.7257490277701704, "grad_norm": 1.2109375, "learning_rate": 4.241893659127428e-06, "loss": 1.0939, "step": 6625 }, { "epoch": 0.7262967628854686, "grad_norm": 1.21875, "learning_rate": 4.226271127124153e-06, "loss": 1.1613, "step": 6630 }, { "epoch": 0.7268444980007668, "grad_norm": 1.21875, "learning_rate": 4.210669704763084e-06, "loss": 1.1476, "step": 6635 }, { "epoch": 0.727392233116065, "grad_norm": 1.34375, "learning_rate": 4.195089449085424e-06, "loss": 1.1295, "step": 6640 }, { "epoch": 0.7279399682313633, "grad_norm": 1.1953125, "learning_rate": 4.1795304170549784e-06, "loss": 1.1707, "step": 6645 }, { "epoch": 0.7284877033466616, "grad_norm": 1.2265625, "learning_rate": 4.163992665557975e-06, "loss": 1.1444, "step": 6650 }, { "epoch": 0.7290354384619598, "grad_norm": 1.265625, "learning_rate": 4.148476251402821e-06, "loss": 1.2146, "step": 6655 }, { "epoch": 0.729583173577258, "grad_norm": 1.1953125, "learning_rate": 4.1329812313199195e-06, "loss": 1.1645, "step": 6660 }, { "epoch": 0.7301309086925563, "grad_norm": 1.2265625, "learning_rate": 4.117507661961451e-06, "loss": 1.1362, "step": 6665 }, { "epoch": 0.7306786438078545, "grad_norm": 1.2578125, "learning_rate": 4.102055599901168e-06, "loss": 1.1424, "step": 6670 }, { "epoch": 0.7312263789231528, "grad_norm": 1.25, "learning_rate": 4.08662510163419e-06, "loss": 1.1322, "step": 6675 }, { "epoch": 0.731774114038451, "grad_norm": 1.203125, "learning_rate": 4.071216223576795e-06, "loss": 1.1155, "step": 6680 }, { "epoch": 0.7323218491537492, "grad_norm": 1.21875, "learning_rate": 4.055829022066216e-06, "loss": 1.126, "step": 6685 }, { "epoch": 0.7328695842690475, "grad_norm": 1.234375, "learning_rate": 4.040463553360431e-06, "loss": 1.1659, "step": 6690 }, { "epoch": 0.7334173193843457, "grad_norm": 1.2109375, "learning_rate": 4.025119873637962e-06, "loss": 1.1248, "step": 6695 }, { "epoch": 0.733965054499644, "grad_norm": 1.234375, "learning_rate": 4.009798038997664e-06, "loss": 1.1073, "step": 6700 }, { "epoch": 0.7345127896149423, "grad_norm": 1.1796875, "learning_rate": 3.9944981054585276e-06, "loss": 1.1886, "step": 6705 }, { "epoch": 0.7350605247302404, "grad_norm": 1.234375, "learning_rate": 3.979220128959463e-06, "loss": 1.1453, "step": 6710 }, { "epoch": 0.7356082598455387, "grad_norm": 1.1640625, "learning_rate": 3.96396416535911e-06, "loss": 1.1076, "step": 6715 }, { "epoch": 0.736155994960837, "grad_norm": 1.2109375, "learning_rate": 3.948730270435618e-06, "loss": 1.1978, "step": 6720 }, { "epoch": 0.7367037300761352, "grad_norm": 1.296875, "learning_rate": 3.933518499886455e-06, "loss": 1.1705, "step": 6725 }, { "epoch": 0.7372514651914335, "grad_norm": 1.203125, "learning_rate": 3.918328909328198e-06, "loss": 1.1433, "step": 6730 }, { "epoch": 0.7377992003067316, "grad_norm": 1.21875, "learning_rate": 3.9031615542963305e-06, "loss": 1.1205, "step": 6735 }, { "epoch": 0.7383469354220299, "grad_norm": 1.34375, "learning_rate": 3.8880164902450375e-06, "loss": 1.2021, "step": 6740 }, { "epoch": 0.7388946705373282, "grad_norm": 1.2109375, "learning_rate": 3.8728937725470084e-06, "loss": 1.1087, "step": 6745 }, { "epoch": 0.7394424056526264, "grad_norm": 1.203125, "learning_rate": 3.857793456493226e-06, "loss": 1.1444, "step": 6750 }, { "epoch": 0.7399901407679247, "grad_norm": 1.21875, "learning_rate": 3.842715597292773e-06, "loss": 1.1848, "step": 6755 }, { "epoch": 0.7405378758832228, "grad_norm": 1.2734375, "learning_rate": 3.8276602500726265e-06, "loss": 1.1686, "step": 6760 }, { "epoch": 0.7410856109985211, "grad_norm": 1.1953125, "learning_rate": 3.8126274698774513e-06, "loss": 1.1225, "step": 6765 }, { "epoch": 0.7416333461138194, "grad_norm": 1.2265625, "learning_rate": 3.797617311669417e-06, "loss": 1.2015, "step": 6770 }, { "epoch": 0.7421810812291176, "grad_norm": 1.2734375, "learning_rate": 3.7826298303279652e-06, "loss": 1.1643, "step": 6775 }, { "epoch": 0.7427288163444158, "grad_norm": 1.234375, "learning_rate": 3.7676650806496386e-06, "loss": 1.1988, "step": 6780 }, { "epoch": 0.743276551459714, "grad_norm": 1.2265625, "learning_rate": 3.7527231173478684e-06, "loss": 1.1904, "step": 6785 }, { "epoch": 0.7438242865750123, "grad_norm": 1.1796875, "learning_rate": 3.7378039950527733e-06, "loss": 1.1832, "step": 6790 }, { "epoch": 0.7443720216903106, "grad_norm": 1.2265625, "learning_rate": 3.722907768310965e-06, "loss": 1.1459, "step": 6795 }, { "epoch": 0.7449197568056088, "grad_norm": 1.296875, "learning_rate": 3.7080344915853406e-06, "loss": 1.1343, "step": 6800 }, { "epoch": 0.745467491920907, "grad_norm": 1.3359375, "learning_rate": 3.6931842192548884e-06, "loss": 1.161, "step": 6805 }, { "epoch": 0.7460152270362053, "grad_norm": 1.2109375, "learning_rate": 3.6783570056145025e-06, "loss": 1.089, "step": 6810 }, { "epoch": 0.7465629621515035, "grad_norm": 1.2109375, "learning_rate": 3.6635529048747498e-06, "loss": 1.1435, "step": 6815 }, { "epoch": 0.7471106972668018, "grad_norm": 1.203125, "learning_rate": 3.648771971161702e-06, "loss": 1.1859, "step": 6820 }, { "epoch": 0.7476584323821001, "grad_norm": 1.2265625, "learning_rate": 3.6340142585167415e-06, "loss": 1.1555, "step": 6825 }, { "epoch": 0.7482061674973982, "grad_norm": 1.171875, "learning_rate": 3.6192798208963255e-06, "loss": 1.1323, "step": 6830 }, { "epoch": 0.7487539026126965, "grad_norm": 1.2109375, "learning_rate": 3.6045687121718365e-06, "loss": 1.1747, "step": 6835 }, { "epoch": 0.7493016377279947, "grad_norm": 1.2421875, "learning_rate": 3.589880986129356e-06, "loss": 1.1286, "step": 6840 }, { "epoch": 0.749849372843293, "grad_norm": 1.2734375, "learning_rate": 3.5752166964694613e-06, "loss": 1.1288, "step": 6845 }, { "epoch": 0.7503971079585913, "grad_norm": 1.21875, "learning_rate": 3.5605758968070658e-06, "loss": 1.2077, "step": 6850 }, { "epoch": 0.7509448430738894, "grad_norm": 1.140625, "learning_rate": 3.5459586406711875e-06, "loss": 1.0527, "step": 6855 }, { "epoch": 0.7514925781891877, "grad_norm": 1.234375, "learning_rate": 3.531364981504758e-06, "loss": 1.1637, "step": 6860 }, { "epoch": 0.752040313304486, "grad_norm": 1.3125, "learning_rate": 3.5167949726644545e-06, "loss": 1.1552, "step": 6865 }, { "epoch": 0.7525880484197842, "grad_norm": 1.203125, "learning_rate": 3.5022486674204626e-06, "loss": 1.1109, "step": 6870 }, { "epoch": 0.7531357835350825, "grad_norm": 1.234375, "learning_rate": 3.4877261189563237e-06, "loss": 1.1444, "step": 6875 }, { "epoch": 0.7536835186503806, "grad_norm": 1.265625, "learning_rate": 3.4732273803687133e-06, "loss": 1.1577, "step": 6880 }, { "epoch": 0.7542312537656789, "grad_norm": 1.234375, "learning_rate": 3.4587525046672433e-06, "loss": 1.1359, "step": 6885 }, { "epoch": 0.7547789888809772, "grad_norm": 1.25, "learning_rate": 3.4443015447742977e-06, "loss": 1.1344, "step": 6890 }, { "epoch": 0.7553267239962754, "grad_norm": 1.1953125, "learning_rate": 3.4298745535248135e-06, "loss": 1.1513, "step": 6895 }, { "epoch": 0.7558744591115737, "grad_norm": 1.2109375, "learning_rate": 3.4154715836660814e-06, "loss": 1.1569, "step": 6900 }, { "epoch": 0.7564221942268718, "grad_norm": 1.2265625, "learning_rate": 3.40109268785759e-06, "loss": 1.1421, "step": 6905 }, { "epoch": 0.7569699293421701, "grad_norm": 1.2578125, "learning_rate": 3.3867379186707927e-06, "loss": 1.1312, "step": 6910 }, { "epoch": 0.7575176644574684, "grad_norm": 1.28125, "learning_rate": 3.372407328588938e-06, "loss": 1.1694, "step": 6915 }, { "epoch": 0.7580653995727666, "grad_norm": 1.2578125, "learning_rate": 3.35810097000687e-06, "loss": 1.1709, "step": 6920 }, { "epoch": 0.7586131346880649, "grad_norm": 1.203125, "learning_rate": 3.3438188952308403e-06, "loss": 1.15, "step": 6925 }, { "epoch": 0.7591608698033631, "grad_norm": 1.2265625, "learning_rate": 3.3295611564783136e-06, "loss": 1.0951, "step": 6930 }, { "epoch": 0.7597086049186613, "grad_norm": 1.2578125, "learning_rate": 3.315327805877784e-06, "loss": 1.2345, "step": 6935 }, { "epoch": 0.7602563400339596, "grad_norm": 1.2890625, "learning_rate": 3.3011188954685626e-06, "loss": 1.1564, "step": 6940 }, { "epoch": 0.7608040751492579, "grad_norm": 1.265625, "learning_rate": 3.286934477200624e-06, "loss": 1.1598, "step": 6945 }, { "epoch": 0.761351810264556, "grad_norm": 1.2109375, "learning_rate": 3.2727746029343856e-06, "loss": 1.1203, "step": 6950 }, { "epoch": 0.7618995453798543, "grad_norm": 1.2421875, "learning_rate": 3.258639324440527e-06, "loss": 1.1721, "step": 6955 }, { "epoch": 0.7624472804951525, "grad_norm": 1.234375, "learning_rate": 3.2445286933998044e-06, "loss": 1.1528, "step": 6960 }, { "epoch": 0.7629950156104508, "grad_norm": 1.21875, "learning_rate": 3.23044276140286e-06, "loss": 1.124, "step": 6965 }, { "epoch": 0.7635427507257491, "grad_norm": 1.234375, "learning_rate": 3.2163815799500288e-06, "loss": 1.1527, "step": 6970 }, { "epoch": 0.7640904858410472, "grad_norm": 1.2109375, "learning_rate": 3.202345200451158e-06, "loss": 1.103, "step": 6975 }, { "epoch": 0.7646382209563455, "grad_norm": 1.234375, "learning_rate": 3.1883336742254124e-06, "loss": 1.16, "step": 6980 }, { "epoch": 0.7651859560716437, "grad_norm": 1.1953125, "learning_rate": 3.174347052501091e-06, "loss": 1.1707, "step": 6985 }, { "epoch": 0.765733691186942, "grad_norm": 1.25, "learning_rate": 3.160385386415438e-06, "loss": 1.1882, "step": 6990 }, { "epoch": 0.7662814263022403, "grad_norm": 1.2578125, "learning_rate": 3.146448727014454e-06, "loss": 1.1877, "step": 6995 }, { "epoch": 0.7668291614175384, "grad_norm": 1.2109375, "learning_rate": 3.1325371252527127e-06, "loss": 1.1277, "step": 7000 }, { "epoch": 0.7673768965328367, "grad_norm": 1.2890625, "learning_rate": 3.1186506319931754e-06, "loss": 1.1748, "step": 7005 }, { "epoch": 0.767924631648135, "grad_norm": 1.2578125, "learning_rate": 3.1047892980069984e-06, "loss": 1.2315, "step": 7010 }, { "epoch": 0.7684723667634332, "grad_norm": 1.328125, "learning_rate": 3.0909531739733544e-06, "loss": 1.2003, "step": 7015 }, { "epoch": 0.7690201018787315, "grad_norm": 1.2265625, "learning_rate": 3.0771423104792454e-06, "loss": 1.1535, "step": 7020 }, { "epoch": 0.7695678369940296, "grad_norm": 1.2578125, "learning_rate": 3.0633567580193147e-06, "loss": 1.1765, "step": 7025 }, { "epoch": 0.7701155721093279, "grad_norm": 1.234375, "learning_rate": 3.049596566995666e-06, "loss": 1.1403, "step": 7030 }, { "epoch": 0.7706633072246262, "grad_norm": 1.25, "learning_rate": 3.0358617877176787e-06, "loss": 1.1534, "step": 7035 }, { "epoch": 0.7712110423399244, "grad_norm": 1.203125, "learning_rate": 3.0221524704018212e-06, "loss": 1.1534, "step": 7040 }, { "epoch": 0.7717587774552227, "grad_norm": 1.25, "learning_rate": 3.0084686651714647e-06, "loss": 1.1507, "step": 7045 }, { "epoch": 0.7723065125705209, "grad_norm": 1.1640625, "learning_rate": 2.99481042205672e-06, "loss": 1.1906, "step": 7050 }, { "epoch": 0.7728542476858191, "grad_norm": 1.2421875, "learning_rate": 2.9811777909942185e-06, "loss": 1.1704, "step": 7055 }, { "epoch": 0.7734019828011174, "grad_norm": 1.328125, "learning_rate": 2.9675708218269596e-06, "loss": 1.1865, "step": 7060 }, { "epoch": 0.7739497179164156, "grad_norm": 1.25, "learning_rate": 2.9539895643041283e-06, "loss": 1.1695, "step": 7065 }, { "epoch": 0.7744974530317139, "grad_norm": 1.25, "learning_rate": 2.940434068080883e-06, "loss": 1.1652, "step": 7070 }, { "epoch": 0.7750451881470121, "grad_norm": 1.234375, "learning_rate": 2.9269043827182084e-06, "loss": 1.1423, "step": 7075 }, { "epoch": 0.7755929232623103, "grad_norm": 1.265625, "learning_rate": 2.9134005576827273e-06, "loss": 1.1913, "step": 7080 }, { "epoch": 0.7761406583776086, "grad_norm": 1.25, "learning_rate": 2.899922642346491e-06, "loss": 1.1596, "step": 7085 }, { "epoch": 0.7766883934929069, "grad_norm": 1.21875, "learning_rate": 2.8864706859868443e-06, "loss": 1.1581, "step": 7090 }, { "epoch": 0.7772361286082051, "grad_norm": 1.265625, "learning_rate": 2.8730447377862116e-06, "loss": 1.1263, "step": 7095 }, { "epoch": 0.7777838637235033, "grad_norm": 1.3125, "learning_rate": 2.859644846831918e-06, "loss": 1.1653, "step": 7100 }, { "epoch": 0.7783315988388015, "grad_norm": 1.21875, "learning_rate": 2.8462710621160427e-06, "loss": 1.1609, "step": 7105 }, { "epoch": 0.7788793339540998, "grad_norm": 1.265625, "learning_rate": 2.8329234325351917e-06, "loss": 1.0972, "step": 7110 }, { "epoch": 0.7794270690693981, "grad_norm": 1.234375, "learning_rate": 2.8196020068903574e-06, "loss": 1.1194, "step": 7115 }, { "epoch": 0.7799748041846963, "grad_norm": 1.21875, "learning_rate": 2.8063068338867315e-06, "loss": 1.17, "step": 7120 }, { "epoch": 0.7805225392999945, "grad_norm": 1.203125, "learning_rate": 2.7930379621335035e-06, "loss": 1.1294, "step": 7125 }, { "epoch": 0.7810702744152928, "grad_norm": 1.1875, "learning_rate": 2.779795440143721e-06, "loss": 1.1411, "step": 7130 }, { "epoch": 0.781618009530591, "grad_norm": 1.2890625, "learning_rate": 2.766579316334085e-06, "loss": 1.2037, "step": 7135 }, { "epoch": 0.7821657446458893, "grad_norm": 1.234375, "learning_rate": 2.7533896390247706e-06, "loss": 1.1298, "step": 7140 }, { "epoch": 0.7827134797611874, "grad_norm": 1.171875, "learning_rate": 2.7402264564392788e-06, "loss": 1.1657, "step": 7145 }, { "epoch": 0.7832612148764857, "grad_norm": 1.25, "learning_rate": 2.7270898167042325e-06, "loss": 1.1313, "step": 7150 }, { "epoch": 0.783808949991784, "grad_norm": 1.2109375, "learning_rate": 2.7139797678492032e-06, "loss": 1.1042, "step": 7155 }, { "epoch": 0.7843566851070822, "grad_norm": 1.203125, "learning_rate": 2.700896357806555e-06, "loss": 1.2117, "step": 7160 }, { "epoch": 0.7849044202223805, "grad_norm": 1.2578125, "learning_rate": 2.6878396344112513e-06, "loss": 1.1121, "step": 7165 }, { "epoch": 0.7854521553376786, "grad_norm": 1.234375, "learning_rate": 2.6748096454006812e-06, "loss": 1.1624, "step": 7170 }, { "epoch": 0.7859998904529769, "grad_norm": 1.203125, "learning_rate": 2.6618064384144925e-06, "loss": 1.0864, "step": 7175 }, { "epoch": 0.7865476255682752, "grad_norm": 1.2109375, "learning_rate": 2.648830060994414e-06, "loss": 1.1666, "step": 7180 }, { "epoch": 0.7870953606835734, "grad_norm": 1.2109375, "learning_rate": 2.63588056058408e-06, "loss": 1.0627, "step": 7185 }, { "epoch": 0.7876430957988717, "grad_norm": 1.2265625, "learning_rate": 2.622957984528861e-06, "loss": 1.1774, "step": 7190 }, { "epoch": 0.7881908309141699, "grad_norm": 1.1953125, "learning_rate": 2.6100623800756787e-06, "loss": 1.117, "step": 7195 }, { "epoch": 0.7887385660294681, "grad_norm": 1.2578125, "learning_rate": 2.5971937943728563e-06, "loss": 1.1719, "step": 7200 }, { "epoch": 0.7892863011447664, "grad_norm": 1.1875, "learning_rate": 2.5843522744699246e-06, "loss": 1.1124, "step": 7205 }, { "epoch": 0.7898340362600647, "grad_norm": 1.1640625, "learning_rate": 2.5715378673174575e-06, "loss": 1.1658, "step": 7210 }, { "epoch": 0.7903817713753629, "grad_norm": 1.2578125, "learning_rate": 2.558750619766902e-06, "loss": 1.1308, "step": 7215 }, { "epoch": 0.7909295064906611, "grad_norm": 1.1953125, "learning_rate": 2.545990578570404e-06, "loss": 1.1377, "step": 7220 }, { "epoch": 0.7914772416059593, "grad_norm": 1.203125, "learning_rate": 2.533257790380642e-06, "loss": 1.1657, "step": 7225 }, { "epoch": 0.7920249767212576, "grad_norm": 1.1953125, "learning_rate": 2.5205523017506496e-06, "loss": 1.1398, "step": 7230 }, { "epoch": 0.7925727118365559, "grad_norm": 1.3203125, "learning_rate": 2.5078741591336496e-06, "loss": 1.193, "step": 7235 }, { "epoch": 0.7931204469518541, "grad_norm": 1.21875, "learning_rate": 2.495223408882886e-06, "loss": 1.168, "step": 7240 }, { "epoch": 0.7936681820671523, "grad_norm": 1.265625, "learning_rate": 2.4826000972514498e-06, "loss": 1.1467, "step": 7245 }, { "epoch": 0.7942159171824505, "grad_norm": 1.2109375, "learning_rate": 2.4700042703921132e-06, "loss": 1.154, "step": 7250 }, { "epoch": 0.7947636522977488, "grad_norm": 1.234375, "learning_rate": 2.4574359743571606e-06, "loss": 1.1555, "step": 7255 }, { "epoch": 0.7953113874130471, "grad_norm": 1.28125, "learning_rate": 2.4448952550982142e-06, "loss": 1.1517, "step": 7260 }, { "epoch": 0.7958591225283453, "grad_norm": 1.2734375, "learning_rate": 2.4323821584660846e-06, "loss": 1.1558, "step": 7265 }, { "epoch": 0.7964068576436435, "grad_norm": 1.21875, "learning_rate": 2.4198967302105712e-06, "loss": 1.106, "step": 7270 }, { "epoch": 0.7969545927589418, "grad_norm": 1.265625, "learning_rate": 2.4074390159803273e-06, "loss": 1.17, "step": 7275 }, { "epoch": 0.79750232787424, "grad_norm": 1.203125, "learning_rate": 2.395009061322675e-06, "loss": 1.1446, "step": 7280 }, { "epoch": 0.7980500629895383, "grad_norm": 1.28125, "learning_rate": 2.3826069116834426e-06, "loss": 1.1757, "step": 7285 }, { "epoch": 0.7985977981048366, "grad_norm": 1.171875, "learning_rate": 2.370232612406801e-06, "loss": 1.1398, "step": 7290 }, { "epoch": 0.7991455332201347, "grad_norm": 1.3203125, "learning_rate": 2.3578862087350927e-06, "loss": 1.1763, "step": 7295 }, { "epoch": 0.799693268335433, "grad_norm": 1.234375, "learning_rate": 2.3455677458086702e-06, "loss": 1.1386, "step": 7300 }, { "epoch": 0.8002410034507312, "grad_norm": 1.203125, "learning_rate": 2.3332772686657402e-06, "loss": 1.1302, "step": 7305 }, { "epoch": 0.8007887385660295, "grad_norm": 1.1875, "learning_rate": 2.3210148222421715e-06, "loss": 1.1224, "step": 7310 }, { "epoch": 0.8013364736813277, "grad_norm": 1.21875, "learning_rate": 2.3087804513713563e-06, "loss": 1.1453, "step": 7315 }, { "epoch": 0.8018842087966259, "grad_norm": 1.265625, "learning_rate": 2.29657420078405e-06, "loss": 1.1204, "step": 7320 }, { "epoch": 0.8024319439119242, "grad_norm": 1.3359375, "learning_rate": 2.284396115108174e-06, "loss": 1.1726, "step": 7325 }, { "epoch": 0.8029796790272224, "grad_norm": 1.21875, "learning_rate": 2.272246238868687e-06, "loss": 1.1466, "step": 7330 }, { "epoch": 0.8035274141425207, "grad_norm": 1.21875, "learning_rate": 2.2601246164874168e-06, "loss": 1.1598, "step": 7335 }, { "epoch": 0.8040751492578189, "grad_norm": 1.2578125, "learning_rate": 2.2480312922828717e-06, "loss": 1.1559, "step": 7340 }, { "epoch": 0.8046228843731171, "grad_norm": 1.2734375, "learning_rate": 2.2359663104701133e-06, "loss": 1.1813, "step": 7345 }, { "epoch": 0.8051706194884154, "grad_norm": 1.234375, "learning_rate": 2.2239297151605774e-06, "loss": 1.1431, "step": 7350 }, { "epoch": 0.8057183546037137, "grad_norm": 1.2109375, "learning_rate": 2.2119215503618995e-06, "loss": 1.1598, "step": 7355 }, { "epoch": 0.8062660897190119, "grad_norm": 1.1796875, "learning_rate": 2.1999418599777935e-06, "loss": 1.1539, "step": 7360 }, { "epoch": 0.8068138248343101, "grad_norm": 1.1796875, "learning_rate": 2.1879906878078427e-06, "loss": 1.1623, "step": 7365 }, { "epoch": 0.8073615599496083, "grad_norm": 1.1875, "learning_rate": 2.1760680775473742e-06, "loss": 1.108, "step": 7370 }, { "epoch": 0.8079092950649066, "grad_norm": 1.21875, "learning_rate": 2.1641740727872963e-06, "loss": 1.1486, "step": 7375 }, { "epoch": 0.8084570301802049, "grad_norm": 1.2265625, "learning_rate": 2.152308717013911e-06, "loss": 1.1697, "step": 7380 }, { "epoch": 0.8090047652955031, "grad_norm": 1.2109375, "learning_rate": 2.1404720536087954e-06, "loss": 1.1243, "step": 7385 }, { "epoch": 0.8095525004108013, "grad_norm": 1.2109375, "learning_rate": 2.128664125848615e-06, "loss": 1.1442, "step": 7390 }, { "epoch": 0.8101002355260996, "grad_norm": 1.21875, "learning_rate": 2.116884976904965e-06, "loss": 1.0855, "step": 7395 }, { "epoch": 0.8106479706413978, "grad_norm": 1.25, "learning_rate": 2.105134649844235e-06, "loss": 1.1801, "step": 7400 }, { "epoch": 0.8111957057566961, "grad_norm": 1.1796875, "learning_rate": 2.093413187627431e-06, "loss": 1.0925, "step": 7405 }, { "epoch": 0.8117434408719943, "grad_norm": 1.2578125, "learning_rate": 2.081720633110017e-06, "loss": 1.1548, "step": 7410 }, { "epoch": 0.8122911759872925, "grad_norm": 1.2109375, "learning_rate": 2.070057029041783e-06, "loss": 1.1049, "step": 7415 }, { "epoch": 0.8128389111025908, "grad_norm": 1.2734375, "learning_rate": 2.0584224180666567e-06, "loss": 1.184, "step": 7420 }, { "epoch": 0.813386646217889, "grad_norm": 1.2109375, "learning_rate": 2.046816842722571e-06, "loss": 1.1299, "step": 7425 }, { "epoch": 0.8139343813331873, "grad_norm": 1.1796875, "learning_rate": 2.0352403454412984e-06, "loss": 1.1397, "step": 7430 }, { "epoch": 0.8144821164484856, "grad_norm": 1.1953125, "learning_rate": 2.0236929685482875e-06, "loss": 1.1364, "step": 7435 }, { "epoch": 0.8150298515637837, "grad_norm": 1.171875, "learning_rate": 2.012174754262537e-06, "loss": 1.1209, "step": 7440 }, { "epoch": 0.815577586679082, "grad_norm": 1.2421875, "learning_rate": 2.000685744696409e-06, "loss": 1.1272, "step": 7445 }, { "epoch": 0.8161253217943802, "grad_norm": 1.28125, "learning_rate": 1.9892259818554915e-06, "loss": 1.2119, "step": 7450 }, { "epoch": 0.8166730569096785, "grad_norm": 1.25, "learning_rate": 1.9777955076384446e-06, "loss": 1.1568, "step": 7455 }, { "epoch": 0.8172207920249768, "grad_norm": 1.2109375, "learning_rate": 1.966394363836842e-06, "loss": 1.1831, "step": 7460 }, { "epoch": 0.8177685271402749, "grad_norm": 1.2578125, "learning_rate": 1.955022592135022e-06, "loss": 1.1637, "step": 7465 }, { "epoch": 0.8183162622555732, "grad_norm": 1.25, "learning_rate": 1.9436802341099337e-06, "loss": 1.1363, "step": 7470 }, { "epoch": 0.8188639973708715, "grad_norm": 1.234375, "learning_rate": 1.9323673312309856e-06, "loss": 1.1293, "step": 7475 }, { "epoch": 0.8194117324861697, "grad_norm": 1.21875, "learning_rate": 1.9210839248598924e-06, "loss": 1.1667, "step": 7480 }, { "epoch": 0.819959467601468, "grad_norm": 1.4140625, "learning_rate": 1.9098300562505266e-06, "loss": 1.2437, "step": 7485 }, { "epoch": 0.8205072027167661, "grad_norm": 1.1875, "learning_rate": 1.8986057665487656e-06, "loss": 1.1136, "step": 7490 }, { "epoch": 0.8210549378320644, "grad_norm": 1.2890625, "learning_rate": 1.887411096792342e-06, "loss": 1.1329, "step": 7495 }, { "epoch": 0.8216026729473627, "grad_norm": 1.2109375, "learning_rate": 1.8762460879106925e-06, "loss": 1.1745, "step": 7500 }, { "epoch": 0.8221504080626609, "grad_norm": 1.2421875, "learning_rate": 1.8651107807248091e-06, "loss": 1.1553, "step": 7505 }, { "epoch": 0.8226981431779591, "grad_norm": 1.25, "learning_rate": 1.8540052159470912e-06, "loss": 1.1032, "step": 7510 }, { "epoch": 0.8232458782932573, "grad_norm": 1.28125, "learning_rate": 1.8429294341811933e-06, "loss": 1.1256, "step": 7515 }, { "epoch": 0.8237936134085556, "grad_norm": 1.28125, "learning_rate": 1.8318834759218817e-06, "loss": 1.1521, "step": 7520 }, { "epoch": 0.8243413485238539, "grad_norm": 1.21875, "learning_rate": 1.8208673815548794e-06, "loss": 1.1444, "step": 7525 }, { "epoch": 0.8248890836391521, "grad_norm": 1.1953125, "learning_rate": 1.8098811913567248e-06, "loss": 1.0999, "step": 7530 }, { "epoch": 0.8254368187544503, "grad_norm": 1.2109375, "learning_rate": 1.7989249454946234e-06, "loss": 1.1726, "step": 7535 }, { "epoch": 0.8259845538697486, "grad_norm": 1.3125, "learning_rate": 1.7879986840262953e-06, "loss": 1.1836, "step": 7540 }, { "epoch": 0.8265322889850468, "grad_norm": 1.3046875, "learning_rate": 1.7771024468998377e-06, "loss": 1.1058, "step": 7545 }, { "epoch": 0.8270800241003451, "grad_norm": 1.2734375, "learning_rate": 1.766236273953571e-06, "loss": 1.1559, "step": 7550 }, { "epoch": 0.8276277592156434, "grad_norm": 1.25, "learning_rate": 1.7554002049158947e-06, "loss": 1.1613, "step": 7555 }, { "epoch": 0.8281754943309415, "grad_norm": 1.2265625, "learning_rate": 1.7445942794051552e-06, "loss": 1.1669, "step": 7560 }, { "epoch": 0.8287232294462398, "grad_norm": 1.2109375, "learning_rate": 1.7338185369294725e-06, "loss": 1.1707, "step": 7565 }, { "epoch": 0.829270964561538, "grad_norm": 1.2421875, "learning_rate": 1.7230730168866216e-06, "loss": 1.1434, "step": 7570 }, { "epoch": 0.8298186996768363, "grad_norm": 1.15625, "learning_rate": 1.7123577585638861e-06, "loss": 1.1345, "step": 7575 }, { "epoch": 0.8303664347921346, "grad_norm": 1.2578125, "learning_rate": 1.7016728011378936e-06, "loss": 1.1141, "step": 7580 }, { "epoch": 0.8309141699074327, "grad_norm": 1.3125, "learning_rate": 1.6910181836744955e-06, "loss": 1.1606, "step": 7585 }, { "epoch": 0.831461905022731, "grad_norm": 1.2890625, "learning_rate": 1.68039394512862e-06, "loss": 1.1388, "step": 7590 }, { "epoch": 0.8320096401380292, "grad_norm": 1.2421875, "learning_rate": 1.669800124344112e-06, "loss": 1.1396, "step": 7595 }, { "epoch": 0.8325573752533275, "grad_norm": 1.1796875, "learning_rate": 1.6592367600536209e-06, "loss": 1.1241, "step": 7600 }, { "epoch": 0.8331051103686258, "grad_norm": 1.2578125, "learning_rate": 1.648703890878427e-06, "loss": 1.1524, "step": 7605 }, { "epoch": 0.8336528454839239, "grad_norm": 1.3203125, "learning_rate": 1.6382015553283215e-06, "loss": 1.1413, "step": 7610 }, { "epoch": 0.8342005805992222, "grad_norm": 1.25, "learning_rate": 1.6277297918014711e-06, "loss": 1.1485, "step": 7615 }, { "epoch": 0.8347483157145205, "grad_norm": 1.2109375, "learning_rate": 1.6172886385842457e-06, "loss": 1.1215, "step": 7620 }, { "epoch": 0.8352960508298187, "grad_norm": 1.2421875, "learning_rate": 1.6068781338511131e-06, "loss": 1.1049, "step": 7625 }, { "epoch": 0.835843785945117, "grad_norm": 1.265625, "learning_rate": 1.5964983156644876e-06, "loss": 1.1723, "step": 7630 }, { "epoch": 0.8363915210604151, "grad_norm": 1.1796875, "learning_rate": 1.5861492219745733e-06, "loss": 1.1166, "step": 7635 }, { "epoch": 0.8369392561757134, "grad_norm": 1.25, "learning_rate": 1.575830890619261e-06, "loss": 1.1591, "step": 7640 }, { "epoch": 0.8374869912910117, "grad_norm": 1.234375, "learning_rate": 1.5655433593239566e-06, "loss": 1.1433, "step": 7645 }, { "epoch": 0.8380347264063099, "grad_norm": 1.2734375, "learning_rate": 1.5552866657014542e-06, "loss": 1.1351, "step": 7650 }, { "epoch": 0.8385824615216082, "grad_norm": 1.203125, "learning_rate": 1.5450608472518115e-06, "loss": 1.1423, "step": 7655 }, { "epoch": 0.8391301966369064, "grad_norm": 1.1796875, "learning_rate": 1.5348659413621946e-06, "loss": 1.1316, "step": 7660 }, { "epoch": 0.8396779317522046, "grad_norm": 1.203125, "learning_rate": 1.5247019853067501e-06, "loss": 1.1513, "step": 7665 }, { "epoch": 0.8402256668675029, "grad_norm": 1.1953125, "learning_rate": 1.5145690162464666e-06, "loss": 1.1162, "step": 7670 }, { "epoch": 0.8407734019828011, "grad_norm": 1.1875, "learning_rate": 1.5044670712290399e-06, "loss": 1.1247, "step": 7675 }, { "epoch": 0.8413211370980994, "grad_norm": 1.2421875, "learning_rate": 1.4943961871887368e-06, "loss": 1.1506, "step": 7680 }, { "epoch": 0.8418688722133976, "grad_norm": 1.25, "learning_rate": 1.484356400946264e-06, "loss": 1.1769, "step": 7685 }, { "epoch": 0.8424166073286958, "grad_norm": 1.3125, "learning_rate": 1.47434774920862e-06, "loss": 1.1816, "step": 7690 }, { "epoch": 0.8429643424439941, "grad_norm": 1.203125, "learning_rate": 1.4643702685689832e-06, "loss": 1.1576, "step": 7695 }, { "epoch": 0.8435120775592924, "grad_norm": 1.171875, "learning_rate": 1.4544239955065576e-06, "loss": 1.109, "step": 7700 }, { "epoch": 0.8440598126745905, "grad_norm": 1.2578125, "learning_rate": 1.444508966386451e-06, "loss": 1.2185, "step": 7705 }, { "epoch": 0.8446075477898888, "grad_norm": 1.2421875, "learning_rate": 1.4346252174595354e-06, "loss": 1.1538, "step": 7710 }, { "epoch": 0.845155282905187, "grad_norm": 1.265625, "learning_rate": 1.4247727848623205e-06, "loss": 1.1246, "step": 7715 }, { "epoch": 0.8457030180204853, "grad_norm": 1.265625, "learning_rate": 1.4149517046168182e-06, "loss": 1.1427, "step": 7720 }, { "epoch": 0.8462507531357836, "grad_norm": 1.265625, "learning_rate": 1.40516201263041e-06, "loss": 1.1379, "step": 7725 }, { "epoch": 0.8467984882510817, "grad_norm": 1.21875, "learning_rate": 1.395403744695717e-06, "loss": 1.119, "step": 7730 }, { "epoch": 0.84734622336638, "grad_norm": 1.171875, "learning_rate": 1.385676936490472e-06, "loss": 1.15, "step": 7735 }, { "epoch": 0.8478939584816783, "grad_norm": 1.2421875, "learning_rate": 1.3759816235773838e-06, "loss": 1.1581, "step": 7740 }, { "epoch": 0.8484416935969765, "grad_norm": 1.203125, "learning_rate": 1.366317841404009e-06, "loss": 1.1532, "step": 7745 }, { "epoch": 0.8489894287122748, "grad_norm": 1.21875, "learning_rate": 1.356685625302625e-06, "loss": 1.1954, "step": 7750 }, { "epoch": 0.8495371638275729, "grad_norm": 1.2265625, "learning_rate": 1.3470850104900967e-06, "loss": 1.1654, "step": 7755 }, { "epoch": 0.8500848989428712, "grad_norm": 1.3203125, "learning_rate": 1.3375160320677517e-06, "loss": 1.2019, "step": 7760 }, { "epoch": 0.8506326340581695, "grad_norm": 1.265625, "learning_rate": 1.3279787250212483e-06, "loss": 1.132, "step": 7765 }, { "epoch": 0.8511803691734677, "grad_norm": 1.2109375, "learning_rate": 1.3184731242204508e-06, "loss": 1.2017, "step": 7770 }, { "epoch": 0.851728104288766, "grad_norm": 1.2109375, "learning_rate": 1.3089992644193005e-06, "loss": 1.1279, "step": 7775 }, { "epoch": 0.8522758394040642, "grad_norm": 1.2734375, "learning_rate": 1.2995571802556872e-06, "loss": 1.1969, "step": 7780 }, { "epoch": 0.8528235745193624, "grad_norm": 1.21875, "learning_rate": 1.2901469062513262e-06, "loss": 1.1247, "step": 7785 }, { "epoch": 0.8533713096346607, "grad_norm": 1.1875, "learning_rate": 1.2807684768116292e-06, "loss": 1.1916, "step": 7790 }, { "epoch": 0.8539190447499589, "grad_norm": 1.265625, "learning_rate": 1.2714219262255777e-06, "loss": 1.1319, "step": 7795 }, { "epoch": 0.8544667798652572, "grad_norm": 1.21875, "learning_rate": 1.2621072886656005e-06, "loss": 1.1725, "step": 7800 }, { "epoch": 0.8550145149805554, "grad_norm": 1.2265625, "learning_rate": 1.2528245981874488e-06, "loss": 1.124, "step": 7805 }, { "epoch": 0.8555622500958536, "grad_norm": 1.25, "learning_rate": 1.2435738887300653e-06, "loss": 1.1971, "step": 7810 }, { "epoch": 0.8561099852111519, "grad_norm": 1.234375, "learning_rate": 1.2343551941154763e-06, "loss": 1.1879, "step": 7815 }, { "epoch": 0.8566577203264502, "grad_norm": 1.1875, "learning_rate": 1.225168548048643e-06, "loss": 1.1339, "step": 7820 }, { "epoch": 0.8572054554417484, "grad_norm": 1.2890625, "learning_rate": 1.2160139841173579e-06, "loss": 1.195, "step": 7825 }, { "epoch": 0.8577531905570466, "grad_norm": 1.203125, "learning_rate": 1.206891535792124e-06, "loss": 1.1395, "step": 7830 }, { "epoch": 0.8583009256723448, "grad_norm": 1.203125, "learning_rate": 1.1978012364260116e-06, "loss": 1.1273, "step": 7835 }, { "epoch": 0.8588486607876431, "grad_norm": 1.265625, "learning_rate": 1.1887431192545573e-06, "loss": 1.1573, "step": 7840 }, { "epoch": 0.8593963959029414, "grad_norm": 1.2578125, "learning_rate": 1.1797172173956328e-06, "loss": 1.1382, "step": 7845 }, { "epoch": 0.8599441310182396, "grad_norm": 1.28125, "learning_rate": 1.170723563849323e-06, "loss": 1.1812, "step": 7850 }, { "epoch": 0.8604918661335378, "grad_norm": 1.1953125, "learning_rate": 1.1617621914978184e-06, "loss": 1.1322, "step": 7855 }, { "epoch": 0.861039601248836, "grad_norm": 1.2109375, "learning_rate": 1.1528331331052688e-06, "loss": 1.2111, "step": 7860 }, { "epoch": 0.8615873363641343, "grad_norm": 1.2265625, "learning_rate": 1.1439364213176895e-06, "loss": 1.114, "step": 7865 }, { "epoch": 0.8621350714794326, "grad_norm": 1.2421875, "learning_rate": 1.135072088662833e-06, "loss": 1.1716, "step": 7870 }, { "epoch": 0.8626828065947307, "grad_norm": 1.1953125, "learning_rate": 1.126240167550059e-06, "loss": 1.1152, "step": 7875 }, { "epoch": 0.863230541710029, "grad_norm": 1.203125, "learning_rate": 1.1174406902702362e-06, "loss": 1.1209, "step": 7880 }, { "epoch": 0.8637782768253273, "grad_norm": 1.25, "learning_rate": 1.1086736889956107e-06, "loss": 1.1278, "step": 7885 }, { "epoch": 0.8643260119406255, "grad_norm": 1.2109375, "learning_rate": 1.0999391957796824e-06, "loss": 1.1796, "step": 7890 }, { "epoch": 0.8648737470559238, "grad_norm": 1.1796875, "learning_rate": 1.0912372425571095e-06, "loss": 1.1365, "step": 7895 }, { "epoch": 0.865421482171222, "grad_norm": 1.375, "learning_rate": 1.0825678611435753e-06, "loss": 1.1597, "step": 7900 }, { "epoch": 0.8659692172865202, "grad_norm": 1.234375, "learning_rate": 1.0739310832356664e-06, "loss": 1.1346, "step": 7905 }, { "epoch": 0.8665169524018185, "grad_norm": 1.234375, "learning_rate": 1.0653269404107802e-06, "loss": 1.124, "step": 7910 }, { "epoch": 0.8670646875171167, "grad_norm": 1.2890625, "learning_rate": 1.056755464126985e-06, "loss": 1.1831, "step": 7915 }, { "epoch": 0.867612422632415, "grad_norm": 1.3046875, "learning_rate": 1.0482166857229204e-06, "loss": 1.1719, "step": 7920 }, { "epoch": 0.8681601577477132, "grad_norm": 1.21875, "learning_rate": 1.0397106364176768e-06, "loss": 1.1345, "step": 7925 }, { "epoch": 0.8687078928630114, "grad_norm": 1.28125, "learning_rate": 1.0312373473106741e-06, "loss": 1.1847, "step": 7930 }, { "epoch": 0.8692556279783097, "grad_norm": 1.2265625, "learning_rate": 1.0227968493815698e-06, "loss": 1.1735, "step": 7935 }, { "epoch": 0.869803363093608, "grad_norm": 1.25, "learning_rate": 1.0143891734901235e-06, "loss": 1.1974, "step": 7940 }, { "epoch": 0.8703510982089062, "grad_norm": 1.21875, "learning_rate": 1.0060143503760877e-06, "loss": 1.1209, "step": 7945 }, { "epoch": 0.8708988333242044, "grad_norm": 1.234375, "learning_rate": 9.976724106591128e-07, "loss": 1.1643, "step": 7950 }, { "epoch": 0.8714465684395026, "grad_norm": 1.234375, "learning_rate": 9.89363384838613e-07, "loss": 1.1561, "step": 7955 }, { "epoch": 0.8719943035548009, "grad_norm": 1.203125, "learning_rate": 9.810873032936653e-07, "loss": 1.154, "step": 7960 }, { "epoch": 0.8725420386700992, "grad_norm": 1.328125, "learning_rate": 9.728441962829006e-07, "loss": 1.1731, "step": 7965 }, { "epoch": 0.8730897737853974, "grad_norm": 1.2109375, "learning_rate": 9.646340939443877e-07, "loss": 1.1772, "step": 7970 }, { "epoch": 0.8736375089006956, "grad_norm": 1.234375, "learning_rate": 9.564570262955252e-07, "loss": 1.1721, "step": 7975 }, { "epoch": 0.8741852440159938, "grad_norm": 1.2109375, "learning_rate": 9.483130232329307e-07, "loss": 1.1366, "step": 7980 }, { "epoch": 0.8747329791312921, "grad_norm": 1.1953125, "learning_rate": 9.40202114532337e-07, "loss": 1.1504, "step": 7985 }, { "epoch": 0.8752807142465904, "grad_norm": 1.234375, "learning_rate": 9.321243298484728e-07, "loss": 1.1317, "step": 7990 }, { "epoch": 0.8758284493618886, "grad_norm": 1.234375, "learning_rate": 9.240796987149658e-07, "loss": 1.1397, "step": 7995 }, { "epoch": 0.8763761844771868, "grad_norm": 1.203125, "learning_rate": 9.160682505442242e-07, "loss": 1.15, "step": 8000 }, { "epoch": 0.8769239195924851, "grad_norm": 1.28125, "learning_rate": 9.080900146273386e-07, "loss": 1.1657, "step": 8005 }, { "epoch": 0.8774716547077833, "grad_norm": 1.28125, "learning_rate": 9.001450201339679e-07, "loss": 1.1981, "step": 8010 }, { "epoch": 0.8780193898230816, "grad_norm": 1.25, "learning_rate": 8.92233296112236e-07, "loss": 1.1738, "step": 8015 }, { "epoch": 0.8785671249383799, "grad_norm": 1.3828125, "learning_rate": 8.843548714886252e-07, "loss": 1.0631, "step": 8020 }, { "epoch": 0.879114860053678, "grad_norm": 1.2265625, "learning_rate": 8.765097750678675e-07, "loss": 1.1533, "step": 8025 }, { "epoch": 0.8796625951689763, "grad_norm": 1.21875, "learning_rate": 8.686980355328467e-07, "loss": 1.1551, "step": 8030 }, { "epoch": 0.8802103302842745, "grad_norm": 1.2734375, "learning_rate": 8.609196814444843e-07, "loss": 1.1485, "step": 8035 }, { "epoch": 0.8807580653995728, "grad_norm": 1.2421875, "learning_rate": 8.531747412416424e-07, "loss": 1.14, "step": 8040 }, { "epoch": 0.8813058005148711, "grad_norm": 1.21875, "learning_rate": 8.454632432410137e-07, "loss": 1.1257, "step": 8045 }, { "epoch": 0.8818535356301692, "grad_norm": 1.21875, "learning_rate": 8.377852156370236e-07, "loss": 1.1692, "step": 8050 }, { "epoch": 0.8824012707454675, "grad_norm": 1.296875, "learning_rate": 8.301406865017247e-07, "loss": 1.1838, "step": 8055 }, { "epoch": 0.8829490058607657, "grad_norm": 1.234375, "learning_rate": 8.225296837846919e-07, "loss": 1.1881, "step": 8060 }, { "epoch": 0.883496740976064, "grad_norm": 1.2265625, "learning_rate": 8.149522353129224e-07, "loss": 1.1945, "step": 8065 }, { "epoch": 0.8840444760913622, "grad_norm": 1.265625, "learning_rate": 8.074083687907408e-07, "loss": 1.1544, "step": 8070 }, { "epoch": 0.8845922112066604, "grad_norm": 1.34375, "learning_rate": 7.998981117996796e-07, "loss": 1.1729, "step": 8075 }, { "epoch": 0.8851399463219587, "grad_norm": 1.3203125, "learning_rate": 7.924214917983952e-07, "loss": 1.1486, "step": 8080 }, { "epoch": 0.885687681437257, "grad_norm": 1.2109375, "learning_rate": 7.84978536122567e-07, "loss": 1.2001, "step": 8085 }, { "epoch": 0.8862354165525552, "grad_norm": 1.234375, "learning_rate": 7.775692719847816e-07, "loss": 1.1905, "step": 8090 }, { "epoch": 0.8867831516678534, "grad_norm": 1.1953125, "learning_rate": 7.701937264744564e-07, "loss": 1.1848, "step": 8095 }, { "epoch": 0.8873308867831516, "grad_norm": 1.2421875, "learning_rate": 7.628519265577162e-07, "loss": 1.1559, "step": 8100 }, { "epoch": 0.8878786218984499, "grad_norm": 1.3125, "learning_rate": 7.555438990773134e-07, "loss": 1.1799, "step": 8105 }, { "epoch": 0.8884263570137482, "grad_norm": 1.234375, "learning_rate": 7.482696707525272e-07, "loss": 1.2088, "step": 8110 }, { "epoch": 0.8889740921290464, "grad_norm": 1.1875, "learning_rate": 7.41029268179052e-07, "loss": 1.1298, "step": 8115 }, { "epoch": 0.8895218272443446, "grad_norm": 1.296875, "learning_rate": 7.338227178289148e-07, "loss": 1.1782, "step": 8120 }, { "epoch": 0.8900695623596429, "grad_norm": 1.2578125, "learning_rate": 7.266500460503778e-07, "loss": 1.2125, "step": 8125 }, { "epoch": 0.8906172974749411, "grad_norm": 1.21875, "learning_rate": 7.195112790678293e-07, "loss": 1.1491, "step": 8130 }, { "epoch": 0.8911650325902394, "grad_norm": 1.21875, "learning_rate": 7.124064429817057e-07, "loss": 1.1438, "step": 8135 }, { "epoch": 0.8917127677055376, "grad_norm": 1.2734375, "learning_rate": 7.053355637683801e-07, "loss": 1.1473, "step": 8140 }, { "epoch": 0.8922605028208358, "grad_norm": 1.203125, "learning_rate": 6.98298667280074e-07, "loss": 1.2112, "step": 8145 }, { "epoch": 0.8928082379361341, "grad_norm": 1.1953125, "learning_rate": 6.912957792447683e-07, "loss": 1.1886, "step": 8150 }, { "epoch": 0.8933559730514323, "grad_norm": 1.2734375, "learning_rate": 6.843269252661011e-07, "loss": 1.1485, "step": 8155 }, { "epoch": 0.8939037081667306, "grad_norm": 1.2265625, "learning_rate": 6.773921308232701e-07, "loss": 1.1473, "step": 8160 }, { "epoch": 0.8944514432820289, "grad_norm": 1.2578125, "learning_rate": 6.70491421270959e-07, "loss": 1.1345, "step": 8165 }, { "epoch": 0.894999178397327, "grad_norm": 1.2265625, "learning_rate": 6.636248218392216e-07, "loss": 1.1708, "step": 8170 }, { "epoch": 0.8955469135126253, "grad_norm": 1.265625, "learning_rate": 6.567923576334034e-07, "loss": 1.1519, "step": 8175 }, { "epoch": 0.8960946486279235, "grad_norm": 1.21875, "learning_rate": 6.499940536340488e-07, "loss": 1.1515, "step": 8180 }, { "epoch": 0.8966423837432218, "grad_norm": 1.3125, "learning_rate": 6.432299346967996e-07, "loss": 1.1465, "step": 8185 }, { "epoch": 0.8971901188585201, "grad_norm": 1.28125, "learning_rate": 6.365000255523202e-07, "loss": 1.1477, "step": 8190 }, { "epoch": 0.8977378539738182, "grad_norm": 1.21875, "learning_rate": 6.298043508061946e-07, "loss": 1.163, "step": 8195 }, { "epoch": 0.8982855890891165, "grad_norm": 1.203125, "learning_rate": 6.231429349388396e-07, "loss": 1.1559, "step": 8200 }, { "epoch": 0.8988333242044148, "grad_norm": 1.1875, "learning_rate": 6.165158023054196e-07, "loss": 1.1445, "step": 8205 }, { "epoch": 0.899381059319713, "grad_norm": 1.25, "learning_rate": 6.099229771357517e-07, "loss": 1.1405, "step": 8210 }, { "epoch": 0.8999287944350113, "grad_norm": 1.234375, "learning_rate": 6.0336448353422e-07, "loss": 1.1287, "step": 8215 }, { "epoch": 0.9004765295503094, "grad_norm": 1.234375, "learning_rate": 5.968403454796889e-07, "loss": 1.1489, "step": 8220 }, { "epoch": 0.9010242646656077, "grad_norm": 1.2421875, "learning_rate": 5.9035058682541e-07, "loss": 1.1511, "step": 8225 }, { "epoch": 0.901571999780906, "grad_norm": 1.2421875, "learning_rate": 5.838952312989432e-07, "loss": 1.1326, "step": 8230 }, { "epoch": 0.9021197348962042, "grad_norm": 1.265625, "learning_rate": 5.774743025020602e-07, "loss": 1.1529, "step": 8235 }, { "epoch": 0.9026674700115024, "grad_norm": 1.234375, "learning_rate": 5.710878239106677e-07, "loss": 1.1237, "step": 8240 }, { "epoch": 0.9032152051268006, "grad_norm": 1.2265625, "learning_rate": 5.647358188747143e-07, "loss": 1.1941, "step": 8245 }, { "epoch": 0.9037629402420989, "grad_norm": 1.28125, "learning_rate": 5.584183106181085e-07, "loss": 1.1752, "step": 8250 }, { "epoch": 0.9043106753573972, "grad_norm": 1.1796875, "learning_rate": 5.521353222386361e-07, "loss": 1.1237, "step": 8255 }, { "epoch": 0.9048584104726954, "grad_norm": 1.1953125, "learning_rate": 5.458868767078673e-07, "loss": 1.0955, "step": 8260 }, { "epoch": 0.9054061455879936, "grad_norm": 1.171875, "learning_rate": 5.396729968710835e-07, "loss": 1.1359, "step": 8265 }, { "epoch": 0.9059538807032919, "grad_norm": 1.25, "learning_rate": 5.334937054471867e-07, "loss": 1.1522, "step": 8270 }, { "epoch": 0.9065016158185901, "grad_norm": 1.2578125, "learning_rate": 5.273490250286173e-07, "loss": 1.1596, "step": 8275 }, { "epoch": 0.9070493509338884, "grad_norm": 1.2421875, "learning_rate": 5.212389780812733e-07, "loss": 1.156, "step": 8280 }, { "epoch": 0.9075970860491867, "grad_norm": 1.2734375, "learning_rate": 5.151635869444293e-07, "loss": 1.1344, "step": 8285 }, { "epoch": 0.9081448211644848, "grad_norm": 1.2421875, "learning_rate": 5.091228738306497e-07, "loss": 1.1873, "step": 8290 }, { "epoch": 0.9086925562797831, "grad_norm": 1.375, "learning_rate": 5.03116860825712e-07, "loss": 1.1077, "step": 8295 }, { "epoch": 0.9092402913950813, "grad_norm": 1.2890625, "learning_rate": 4.971455698885263e-07, "loss": 1.1709, "step": 8300 }, { "epoch": 0.9097880265103796, "grad_norm": 1.1953125, "learning_rate": 4.912090228510502e-07, "loss": 1.1968, "step": 8305 }, { "epoch": 0.9103357616256779, "grad_norm": 1.203125, "learning_rate": 4.85307241418218e-07, "loss": 1.1573, "step": 8310 }, { "epoch": 0.910883496740976, "grad_norm": 1.21875, "learning_rate": 4.794402471678483e-07, "loss": 1.2055, "step": 8315 }, { "epoch": 0.9114312318562743, "grad_norm": 1.21875, "learning_rate": 4.7360806155057557e-07, "loss": 1.1618, "step": 8320 }, { "epoch": 0.9119789669715725, "grad_norm": 1.1953125, "learning_rate": 4.6781070588977187e-07, "loss": 1.1254, "step": 8325 }, { "epoch": 0.9125267020868708, "grad_norm": 1.203125, "learning_rate": 4.620482013814609e-07, "loss": 1.121, "step": 8330 }, { "epoch": 0.9130744372021691, "grad_norm": 1.25, "learning_rate": 4.5632056909424517e-07, "loss": 1.1182, "step": 8335 }, { "epoch": 0.9136221723174672, "grad_norm": 1.2265625, "learning_rate": 4.506278299692335e-07, "loss": 1.167, "step": 8340 }, { "epoch": 0.9141699074327655, "grad_norm": 1.296875, "learning_rate": 4.449700048199546e-07, "loss": 1.1577, "step": 8345 }, { "epoch": 0.9147176425480638, "grad_norm": 1.2109375, "learning_rate": 4.393471143322925e-07, "loss": 1.1233, "step": 8350 }, { "epoch": 0.915265377663362, "grad_norm": 1.2265625, "learning_rate": 4.337591790643969e-07, "loss": 1.1454, "step": 8355 }, { "epoch": 0.9158131127786603, "grad_norm": 1.1953125, "learning_rate": 4.2820621944662077e-07, "loss": 1.1687, "step": 8360 }, { "epoch": 0.9163608478939584, "grad_norm": 1.2109375, "learning_rate": 4.226882557814438e-07, "loss": 1.1294, "step": 8365 }, { "epoch": 0.9169085830092567, "grad_norm": 1.2265625, "learning_rate": 4.172053082433858e-07, "loss": 1.1536, "step": 8370 }, { "epoch": 0.917456318124555, "grad_norm": 1.234375, "learning_rate": 4.117573968789501e-07, "loss": 1.1602, "step": 8375 }, { "epoch": 0.9180040532398532, "grad_norm": 1.21875, "learning_rate": 4.063445416065415e-07, "loss": 1.1011, "step": 8380 }, { "epoch": 0.9185517883551515, "grad_norm": 1.2578125, "learning_rate": 4.009667622163882e-07, "loss": 1.2093, "step": 8385 }, { "epoch": 0.9190995234704497, "grad_norm": 1.28125, "learning_rate": 3.9562407837048566e-07, "loss": 1.1483, "step": 8390 }, { "epoch": 0.9196472585857479, "grad_norm": 1.1953125, "learning_rate": 3.9031650960250635e-07, "loss": 1.1597, "step": 8395 }, { "epoch": 0.9201949937010462, "grad_norm": 1.1484375, "learning_rate": 3.850440753177376e-07, "loss": 1.089, "step": 8400 }, { "epoch": 0.9207427288163444, "grad_norm": 1.2578125, "learning_rate": 3.79806794793014e-07, "loss": 1.1409, "step": 8405 }, { "epoch": 0.9212904639316427, "grad_norm": 1.2578125, "learning_rate": 3.7460468717663955e-07, "loss": 1.1505, "step": 8410 }, { "epoch": 0.9218381990469409, "grad_norm": 1.234375, "learning_rate": 3.6943777148831907e-07, "loss": 1.1877, "step": 8415 }, { "epoch": 0.9223859341622391, "grad_norm": 1.2109375, "learning_rate": 3.6430606661909673e-07, "loss": 1.1677, "step": 8420 }, { "epoch": 0.9229336692775374, "grad_norm": 1.234375, "learning_rate": 3.5920959133126987e-07, "loss": 1.1571, "step": 8425 }, { "epoch": 0.9234814043928357, "grad_norm": 1.2890625, "learning_rate": 3.541483642583421e-07, "loss": 1.1979, "step": 8430 }, { "epoch": 0.9240291395081338, "grad_norm": 1.234375, "learning_rate": 3.491224039049379e-07, "loss": 1.1641, "step": 8435 }, { "epoch": 0.9245768746234321, "grad_norm": 1.21875, "learning_rate": 3.441317286467416e-07, "loss": 1.1128, "step": 8440 }, { "epoch": 0.9251246097387303, "grad_norm": 1.2109375, "learning_rate": 3.3917635673043183e-07, "loss": 1.0991, "step": 8445 }, { "epoch": 0.9256723448540286, "grad_norm": 1.2109375, "learning_rate": 3.3425630627361263e-07, "loss": 1.2116, "step": 8450 }, { "epoch": 0.9262200799693269, "grad_norm": 1.296875, "learning_rate": 3.293715952647425e-07, "loss": 1.1897, "step": 8455 }, { "epoch": 0.926767815084625, "grad_norm": 1.234375, "learning_rate": 3.245222415630822e-07, "loss": 1.1805, "step": 8460 }, { "epoch": 0.9273155501999233, "grad_norm": 1.2265625, "learning_rate": 3.197082628986126e-07, "loss": 1.1074, "step": 8465 }, { "epoch": 0.9278632853152216, "grad_norm": 1.1796875, "learning_rate": 3.149296768719834e-07, "loss": 1.0734, "step": 8470 }, { "epoch": 0.9284110204305198, "grad_norm": 1.2109375, "learning_rate": 3.101865009544391e-07, "loss": 1.1775, "step": 8475 }, { "epoch": 0.9289587555458181, "grad_norm": 1.2109375, "learning_rate": 3.054787524877645e-07, "loss": 1.1536, "step": 8480 }, { "epoch": 0.9295064906611162, "grad_norm": 1.2265625, "learning_rate": 3.0080644868420996e-07, "loss": 1.1351, "step": 8485 }, { "epoch": 0.9300542257764145, "grad_norm": 1.2265625, "learning_rate": 2.9616960662643967e-07, "loss": 1.222, "step": 8490 }, { "epoch": 0.9306019608917128, "grad_norm": 1.2421875, "learning_rate": 2.915682432674627e-07, "loss": 1.1448, "step": 8495 }, { "epoch": 0.931149696007011, "grad_norm": 1.1953125, "learning_rate": 2.8700237543057173e-07, "loss": 1.1303, "step": 8500 }, { "epoch": 0.9316974311223093, "grad_norm": 1.25, "learning_rate": 2.824720198092834e-07, "loss": 1.1381, "step": 8505 }, { "epoch": 0.9322451662376074, "grad_norm": 1.2109375, "learning_rate": 2.7797719296727476e-07, "loss": 1.1853, "step": 8510 }, { "epoch": 0.9327929013529057, "grad_norm": 1.203125, "learning_rate": 2.7351791133832685e-07, "loss": 1.1622, "step": 8515 }, { "epoch": 0.933340636468204, "grad_norm": 1.2265625, "learning_rate": 2.69094191226259e-07, "loss": 1.1323, "step": 8520 }, { "epoch": 0.9338883715835022, "grad_norm": 1.2265625, "learning_rate": 2.647060488048736e-07, "loss": 1.1172, "step": 8525 }, { "epoch": 0.9344361066988005, "grad_norm": 1.2109375, "learning_rate": 2.603535001178947e-07, "loss": 1.1416, "step": 8530 }, { "epoch": 0.9349838418140987, "grad_norm": 1.234375, "learning_rate": 2.5603656107891285e-07, "loss": 1.224, "step": 8535 }, { "epoch": 0.9355315769293969, "grad_norm": 1.21875, "learning_rate": 2.517552474713203e-07, "loss": 1.1628, "step": 8540 }, { "epoch": 0.9360793120446952, "grad_norm": 1.1875, "learning_rate": 2.4750957494826033e-07, "loss": 1.1395, "step": 8545 }, { "epoch": 0.9366270471599935, "grad_norm": 1.2109375, "learning_rate": 2.4329955903256376e-07, "loss": 1.169, "step": 8550 }, { "epoch": 0.9371747822752917, "grad_norm": 1.2421875, "learning_rate": 2.391252151167001e-07, "loss": 1.1216, "step": 8555 }, { "epoch": 0.9377225173905899, "grad_norm": 1.21875, "learning_rate": 2.349865584627109e-07, "loss": 1.1221, "step": 8560 }, { "epoch": 0.9382702525058881, "grad_norm": 1.2109375, "learning_rate": 2.308836042021656e-07, "loss": 1.1322, "step": 8565 }, { "epoch": 0.9388179876211864, "grad_norm": 1.2109375, "learning_rate": 2.2681636733609457e-07, "loss": 1.1842, "step": 8570 }, { "epoch": 0.9393657227364847, "grad_norm": 1.265625, "learning_rate": 2.2278486273494272e-07, "loss": 1.1955, "step": 8575 }, { "epoch": 0.9399134578517829, "grad_norm": 1.21875, "learning_rate": 2.1878910513851381e-07, "loss": 1.2065, "step": 8580 }, { "epoch": 0.9404611929670811, "grad_norm": 1.25, "learning_rate": 2.148291091559107e-07, "loss": 1.141, "step": 8585 }, { "epoch": 0.9410089280823793, "grad_norm": 1.234375, "learning_rate": 2.1090488926548968e-07, "loss": 1.145, "step": 8590 }, { "epoch": 0.9415566631976776, "grad_norm": 1.2109375, "learning_rate": 2.070164598148039e-07, "loss": 1.1382, "step": 8595 }, { "epoch": 0.9421043983129759, "grad_norm": 1.2265625, "learning_rate": 2.0316383502054782e-07, "loss": 1.1571, "step": 8600 }, { "epoch": 0.9426521334282741, "grad_norm": 1.2265625, "learning_rate": 1.993470289685162e-07, "loss": 1.1382, "step": 8605 }, { "epoch": 0.9431998685435723, "grad_norm": 1.15625, "learning_rate": 1.9556605561353525e-07, "loss": 1.146, "step": 8610 }, { "epoch": 0.9437476036588706, "grad_norm": 1.25, "learning_rate": 1.9182092877942705e-07, "loss": 1.0946, "step": 8615 }, { "epoch": 0.9442953387741688, "grad_norm": 1.203125, "learning_rate": 1.8811166215895405e-07, "loss": 1.0913, "step": 8620 }, { "epoch": 0.9448430738894671, "grad_norm": 1.234375, "learning_rate": 1.8443826931376474e-07, "loss": 1.0888, "step": 8625 }, { "epoch": 0.9453908090047652, "grad_norm": 1.2421875, "learning_rate": 1.8080076367434918e-07, "loss": 1.1671, "step": 8630 }, { "epoch": 0.9459385441200635, "grad_norm": 1.2109375, "learning_rate": 1.7719915853999014e-07, "loss": 1.1341, "step": 8635 }, { "epoch": 0.9464862792353618, "grad_norm": 1.25, "learning_rate": 1.7363346707870877e-07, "loss": 1.1377, "step": 8640 }, { "epoch": 0.94703401435066, "grad_norm": 1.1953125, "learning_rate": 1.701037023272234e-07, "loss": 1.1857, "step": 8645 }, { "epoch": 0.9475817494659583, "grad_norm": 1.2265625, "learning_rate": 1.666098771908986e-07, "loss": 1.1515, "step": 8650 }, { "epoch": 0.9481294845812565, "grad_norm": 1.2265625, "learning_rate": 1.6315200444369406e-07, "loss": 1.1915, "step": 8655 }, { "epoch": 0.9486772196965547, "grad_norm": 1.2421875, "learning_rate": 1.59730096728129e-07, "loss": 1.1233, "step": 8660 }, { "epoch": 0.949224954811853, "grad_norm": 1.2265625, "learning_rate": 1.5634416655522343e-07, "loss": 1.1587, "step": 8665 }, { "epoch": 0.9497726899271512, "grad_norm": 1.1953125, "learning_rate": 1.5299422630445816e-07, "loss": 1.1867, "step": 8670 }, { "epoch": 0.9503204250424495, "grad_norm": 1.2265625, "learning_rate": 1.4968028822373471e-07, "loss": 1.2192, "step": 8675 }, { "epoch": 0.9508681601577477, "grad_norm": 1.234375, "learning_rate": 1.4640236442931665e-07, "loss": 1.2009, "step": 8680 }, { "epoch": 0.9514158952730459, "grad_norm": 1.203125, "learning_rate": 1.4316046690580178e-07, "loss": 1.1951, "step": 8685 }, { "epoch": 0.9519636303883442, "grad_norm": 1.234375, "learning_rate": 1.399546075060665e-07, "loss": 1.1862, "step": 8690 }, { "epoch": 0.9525113655036425, "grad_norm": 1.265625, "learning_rate": 1.367847979512238e-07, "loss": 1.1432, "step": 8695 }, { "epoch": 0.9530591006189407, "grad_norm": 1.2265625, "learning_rate": 1.3365104983058873e-07, "loss": 1.1178, "step": 8700 }, { "epoch": 0.9536068357342389, "grad_norm": 1.328125, "learning_rate": 1.3055337460162632e-07, "loss": 1.1738, "step": 8705 }, { "epoch": 0.9541545708495371, "grad_norm": 1.2109375, "learning_rate": 1.2749178358991477e-07, "loss": 1.16, "step": 8710 }, { "epoch": 0.9547023059648354, "grad_norm": 1.21875, "learning_rate": 1.244662879891012e-07, "loss": 1.1101, "step": 8715 }, { "epoch": 0.9552500410801337, "grad_norm": 1.2578125, "learning_rate": 1.21476898860865e-07, "loss": 1.2003, "step": 8720 }, { "epoch": 0.9557977761954319, "grad_norm": 1.2265625, "learning_rate": 1.185236271348722e-07, "loss": 1.2025, "step": 8725 }, { "epoch": 0.9563455113107301, "grad_norm": 1.21875, "learning_rate": 1.1560648360874005e-07, "loss": 1.1886, "step": 8730 }, { "epoch": 0.9568932464260284, "grad_norm": 1.3046875, "learning_rate": 1.1272547894799369e-07, "loss": 1.1359, "step": 8735 }, { "epoch": 0.9574409815413266, "grad_norm": 1.2421875, "learning_rate": 1.0988062368603059e-07, "loss": 1.2033, "step": 8740 }, { "epoch": 0.9579887166566249, "grad_norm": 1.234375, "learning_rate": 1.0707192822408063e-07, "loss": 1.2268, "step": 8745 }, { "epoch": 0.9585364517719231, "grad_norm": 1.234375, "learning_rate": 1.0429940283116613e-07, "loss": 1.1409, "step": 8750 }, { "epoch": 0.9590841868872213, "grad_norm": 1.21875, "learning_rate": 1.0156305764406627e-07, "loss": 1.1499, "step": 8755 }, { "epoch": 0.9596319220025196, "grad_norm": 1.265625, "learning_rate": 9.886290266728271e-08, "loss": 1.1508, "step": 8760 }, { "epoch": 0.9601796571178178, "grad_norm": 1.2265625, "learning_rate": 9.619894777299632e-08, "loss": 1.168, "step": 8765 }, { "epoch": 0.9607273922331161, "grad_norm": 1.15625, "learning_rate": 9.357120270103715e-08, "loss": 1.1012, "step": 8770 }, { "epoch": 0.9612751273484144, "grad_norm": 1.265625, "learning_rate": 9.097967705884558e-08, "loss": 1.1533, "step": 8775 }, { "epoch": 0.9618228624637125, "grad_norm": 1.1875, "learning_rate": 8.842438032143908e-08, "loss": 1.1602, "step": 8780 }, { "epoch": 0.9623705975790108, "grad_norm": 1.2578125, "learning_rate": 8.590532183137656e-08, "loss": 1.1479, "step": 8785 }, { "epoch": 0.962918332694309, "grad_norm": 1.1953125, "learning_rate": 8.3422510798723e-08, "loss": 1.1496, "step": 8790 }, { "epoch": 0.9634660678096073, "grad_norm": 1.2734375, "learning_rate": 8.097595630101818e-08, "loss": 1.146, "step": 8795 }, { "epoch": 0.9640138029249055, "grad_norm": 1.1640625, "learning_rate": 7.856566728324244e-08, "loss": 1.0557, "step": 8800 }, { "epoch": 0.9645615380402037, "grad_norm": 1.25, "learning_rate": 7.619165255778327e-08, "loss": 1.1975, "step": 8805 }, { "epoch": 0.965109273155502, "grad_norm": 1.234375, "learning_rate": 7.385392080440535e-08, "loss": 1.1624, "step": 8810 }, { "epoch": 0.9656570082708003, "grad_norm": 1.1953125, "learning_rate": 7.155248057021502e-08, "loss": 1.1225, "step": 8815 }, { "epoch": 0.9662047433860985, "grad_norm": 1.3046875, "learning_rate": 6.928734026963258e-08, "loss": 1.1341, "step": 8820 }, { "epoch": 0.9667524785013967, "grad_norm": 1.25, "learning_rate": 6.705850818436111e-08, "loss": 1.1975, "step": 8825 }, { "epoch": 0.9673002136166949, "grad_norm": 1.2421875, "learning_rate": 6.486599246335212e-08, "loss": 1.1769, "step": 8830 }, { "epoch": 0.9678479487319932, "grad_norm": 1.21875, "learning_rate": 6.270980112278113e-08, "loss": 1.2048, "step": 8835 }, { "epoch": 0.9683956838472915, "grad_norm": 1.2578125, "learning_rate": 6.058994204601765e-08, "loss": 1.0985, "step": 8840 }, { "epoch": 0.9689434189625897, "grad_norm": 1.21875, "learning_rate": 5.850642298359188e-08, "loss": 1.173, "step": 8845 }, { "epoch": 0.9694911540778879, "grad_norm": 1.2109375, "learning_rate": 5.6459251553169226e-08, "loss": 1.104, "step": 8850 }, { "epoch": 0.9700388891931861, "grad_norm": 1.2265625, "learning_rate": 5.444843523952581e-08, "loss": 1.158, "step": 8855 }, { "epoch": 0.9705866243084844, "grad_norm": 1.2265625, "learning_rate": 5.2473981394515205e-08, "loss": 1.1602, "step": 8860 }, { "epoch": 0.9711343594237827, "grad_norm": 1.171875, "learning_rate": 5.0535897237044e-08, "loss": 1.0855, "step": 8865 }, { "epoch": 0.9716820945390809, "grad_norm": 1.328125, "learning_rate": 4.863418985304735e-08, "loss": 1.157, "step": 8870 }, { "epoch": 0.9722298296543791, "grad_norm": 1.2734375, "learning_rate": 4.6768866195460175e-08, "loss": 1.2024, "step": 8875 }, { "epoch": 0.9727775647696774, "grad_norm": 1.2265625, "learning_rate": 4.4939933084192646e-08, "loss": 1.1865, "step": 8880 }, { "epoch": 0.9733252998849756, "grad_norm": 1.203125, "learning_rate": 4.3147397206106945e-08, "loss": 1.1473, "step": 8885 }, { "epoch": 0.9738730350002739, "grad_norm": 1.1953125, "learning_rate": 4.1391265114990584e-08, "loss": 1.1184, "step": 8890 }, { "epoch": 0.9744207701155722, "grad_norm": 1.3046875, "learning_rate": 3.967154323153089e-08, "loss": 1.1739, "step": 8895 }, { "epoch": 0.9749685052308703, "grad_norm": 1.21875, "learning_rate": 3.79882378432983e-08, "loss": 1.1479, "step": 8900 }, { "epoch": 0.9755162403461686, "grad_norm": 1.2109375, "learning_rate": 3.634135510471537e-08, "loss": 1.0821, "step": 8905 }, { "epoch": 0.9760639754614668, "grad_norm": 1.25, "learning_rate": 3.473090103703891e-08, "loss": 1.1344, "step": 8910 }, { "epoch": 0.9766117105767651, "grad_norm": 1.2109375, "learning_rate": 3.315688152833896e-08, "loss": 1.1173, "step": 8915 }, { "epoch": 0.9771594456920634, "grad_norm": 1.21875, "learning_rate": 3.161930233347099e-08, "loss": 1.1412, "step": 8920 }, { "epoch": 0.9777071808073615, "grad_norm": 1.2265625, "learning_rate": 3.0118169074061507e-08, "loss": 1.1319, "step": 8925 }, { "epoch": 0.9782549159226598, "grad_norm": 1.2265625, "learning_rate": 2.8653487238488044e-08, "loss": 1.1994, "step": 8930 }, { "epoch": 0.978802651037958, "grad_norm": 1.2578125, "learning_rate": 2.7225262181849177e-08, "loss": 1.1638, "step": 8935 }, { "epoch": 0.9793503861532563, "grad_norm": 1.3203125, "learning_rate": 2.5833499125957896e-08, "loss": 1.1377, "step": 8940 }, { "epoch": 0.9798981212685546, "grad_norm": 1.2890625, "learning_rate": 2.447820315931382e-08, "loss": 1.125, "step": 8945 }, { "epoch": 0.9804458563838527, "grad_norm": 1.328125, "learning_rate": 2.3159379237087666e-08, "loss": 1.1463, "step": 8950 }, { "epoch": 0.980993591499151, "grad_norm": 1.2890625, "learning_rate": 2.1877032181102376e-08, "loss": 1.1219, "step": 8955 }, { "epoch": 0.9815413266144493, "grad_norm": 1.2578125, "learning_rate": 2.063116667981757e-08, "loss": 1.1693, "step": 8960 }, { "epoch": 0.9820890617297475, "grad_norm": 1.1484375, "learning_rate": 1.942178728830957e-08, "loss": 1.1101, "step": 8965 }, { "epoch": 0.9826367968450458, "grad_norm": 1.2734375, "learning_rate": 1.8248898428253615e-08, "loss": 1.1785, "step": 8970 }, { "epoch": 0.9831845319603439, "grad_norm": 1.234375, "learning_rate": 1.71125043879139e-08, "loss": 1.1803, "step": 8975 }, { "epoch": 0.9837322670756422, "grad_norm": 1.2578125, "learning_rate": 1.601260932212245e-08, "loss": 1.172, "step": 8980 }, { "epoch": 0.9842800021909405, "grad_norm": 1.2421875, "learning_rate": 1.4949217252262505e-08, "loss": 1.1578, "step": 8985 }, { "epoch": 0.9848277373062387, "grad_norm": 1.2578125, "learning_rate": 1.3922332066262923e-08, "loss": 1.1933, "step": 8990 }, { "epoch": 0.9853754724215369, "grad_norm": 1.203125, "learning_rate": 1.2931957518570459e-08, "loss": 1.2329, "step": 8995 }, { "epoch": 0.9859232075368352, "grad_norm": 1.25, "learning_rate": 1.1978097230149755e-08, "loss": 1.2313, "step": 9000 }, { "epoch": 0.9864709426521334, "grad_norm": 1.21875, "learning_rate": 1.1060754688460018e-08, "loss": 1.1615, "step": 9005 }, { "epoch": 0.9870186777674317, "grad_norm": 1.1953125, "learning_rate": 1.017993324744615e-08, "loss": 1.1141, "step": 9010 }, { "epoch": 0.98756641288273, "grad_norm": 1.25, "learning_rate": 9.335636127528746e-09, "loss": 1.182, "step": 9015 }, { "epoch": 0.9881141479980281, "grad_norm": 1.1875, "learning_rate": 8.527866415586338e-09, "loss": 1.129, "step": 9020 }, { "epoch": 0.9886618831133264, "grad_norm": 1.1640625, "learning_rate": 7.75662706495095e-09, "loss": 1.1161, "step": 9025 }, { "epoch": 0.9892096182286246, "grad_norm": 1.21875, "learning_rate": 7.021920895391443e-09, "loss": 1.1489, "step": 9030 }, { "epoch": 0.9897573533439229, "grad_norm": 1.265625, "learning_rate": 6.323750593106859e-09, "loss": 1.1763, "step": 9035 }, { "epoch": 0.9903050884592212, "grad_norm": 1.1953125, "learning_rate": 5.6621187107153145e-09, "loss": 1.1108, "step": 9040 }, { "epoch": 0.9908528235745193, "grad_norm": 1.234375, "learning_rate": 5.037027667246231e-09, "loss": 1.1569, "step": 9045 }, { "epoch": 0.9914005586898176, "grad_norm": 1.25, "learning_rate": 4.44847974812701e-09, "loss": 1.1746, "step": 9050 }, { "epoch": 0.9919482938051158, "grad_norm": 1.1875, "learning_rate": 3.896477105179708e-09, "loss": 1.1355, "step": 9055 }, { "epoch": 0.9924960289204141, "grad_norm": 1.1953125, "learning_rate": 3.381021756612146e-09, "loss": 1.1379, "step": 9060 }, { "epoch": 0.9930437640357124, "grad_norm": 1.390625, "learning_rate": 2.9021155870079255e-09, "loss": 1.1436, "step": 9065 }, { "epoch": 0.9935914991510105, "grad_norm": 1.25, "learning_rate": 2.459760347320872e-09, "loss": 1.181, "step": 9070 }, { "epoch": 0.9941392342663088, "grad_norm": 1.1796875, "learning_rate": 2.053957654871708e-09, "loss": 1.1355, "step": 9075 }, { "epoch": 0.9946869693816071, "grad_norm": 1.1953125, "learning_rate": 1.6847089933358373e-09, "loss": 1.1293, "step": 9080 }, { "epoch": 0.9952347044969053, "grad_norm": 1.2265625, "learning_rate": 1.3520157127444589e-09, "loss": 1.1847, "step": 9085 }, { "epoch": 0.9957824396122036, "grad_norm": 1.234375, "learning_rate": 1.0558790294745713e-09, "loss": 1.1692, "step": 9090 }, { "epoch": 0.9963301747275017, "grad_norm": 1.28125, "learning_rate": 7.96300026248975e-10, "loss": 1.1458, "step": 9095 }, { "epoch": 0.9968779098428, "grad_norm": 1.25, "learning_rate": 5.732796521262796e-10, "loss": 1.1276, "step": 9100 }, { "epoch": 0.9974256449580983, "grad_norm": 1.21875, "learning_rate": 3.8681872250534437e-10, "loss": 1.1752, "step": 9105 }, { "epoch": 0.9979733800733965, "grad_norm": 1.203125, "learning_rate": 2.369179191141768e-10, "loss": 1.1254, "step": 9110 }, { "epoch": 0.9985211151886948, "grad_norm": 1.2734375, "learning_rate": 1.235777900154833e-10, "loss": 1.1485, "step": 9115 }, { "epoch": 0.999068850303993, "grad_norm": 1.25, "learning_rate": 4.679874959556685e-11, "loss": 1.1661, "step": 9120 }, { "epoch": 0.9996165854192912, "grad_norm": 1.28125, "learning_rate": 6.5810785732089985e-12, "loss": 1.1323, "step": 9125 }, { "epoch": 0.9999452264884702, "eval_loss": 1.1566358804702759, "eval_runtime": 1068.2466, "eval_samples_per_second": 15.133, "eval_steps_per_second": 1.892, "step": 9128 }, { "epoch": 0.9999452264884702, "step": 9128, "total_flos": 1.8565750537228124e+18, "train_loss": 1.1714502769954156, "train_runtime": 42415.0484, "train_samples_per_second": 3.443, "train_steps_per_second": 0.215 } ], "logging_steps": 5, "max_steps": 9128, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8565750537228124e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }