{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999713113578335, "eval_steps": 500, "global_step": 17428, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.7377284333132515e-05, "grad_norm": 0.427734375, "learning_rate": 1.1474469305794606e-07, "loss": 1.3587, "step": 1 }, { "epoch": 0.00028688642166566254, "grad_norm": 0.57421875, "learning_rate": 5.737234652897304e-07, "loss": 1.0991, "step": 5 }, { "epoch": 0.0005737728433313251, "grad_norm": 0.349609375, "learning_rate": 1.1474469305794607e-06, "loss": 1.1268, "step": 10 }, { "epoch": 0.0008606592649969877, "grad_norm": 0.416015625, "learning_rate": 1.721170395869191e-06, "loss": 1.1759, "step": 15 }, { "epoch": 0.0011475456866626502, "grad_norm": 0.337890625, "learning_rate": 2.2948938611589215e-06, "loss": 1.1986, "step": 20 }, { "epoch": 0.001434432108328313, "grad_norm": 0.359375, "learning_rate": 2.868617326448652e-06, "loss": 1.1267, "step": 25 }, { "epoch": 0.0017213185299939755, "grad_norm": 0.314453125, "learning_rate": 3.442340791738382e-06, "loss": 1.0619, "step": 30 }, { "epoch": 0.002008204951659638, "grad_norm": 0.33203125, "learning_rate": 4.016064257028113e-06, "loss": 1.1175, "step": 35 }, { "epoch": 0.0022950913733253003, "grad_norm": 0.33203125, "learning_rate": 4.589787722317843e-06, "loss": 1.1421, "step": 40 }, { "epoch": 0.002581977794990963, "grad_norm": 0.32421875, "learning_rate": 5.163511187607573e-06, "loss": 1.1013, "step": 45 }, { "epoch": 0.002868864216656626, "grad_norm": 0.337890625, "learning_rate": 5.737234652897304e-06, "loss": 1.138, "step": 50 }, { "epoch": 0.0031557506383222884, "grad_norm": 0.330078125, "learning_rate": 6.310958118187034e-06, "loss": 1.128, "step": 55 }, { "epoch": 0.003442637059987951, "grad_norm": 0.31640625, "learning_rate": 6.884681583476764e-06, "loss": 1.1071, "step": 60 }, { "epoch": 0.0037295234816536135, "grad_norm": 0.3203125, "learning_rate": 7.4584050487664955e-06, "loss": 1.1443, "step": 65 }, { "epoch": 0.004016409903319276, "grad_norm": 0.328125, "learning_rate": 8.032128514056226e-06, "loss": 1.1746, "step": 70 }, { "epoch": 0.004303296324984938, "grad_norm": 0.2890625, "learning_rate": 8.605851979345956e-06, "loss": 1.0972, "step": 75 }, { "epoch": 0.004590182746650601, "grad_norm": 0.310546875, "learning_rate": 9.179575444635686e-06, "loss": 1.1355, "step": 80 }, { "epoch": 0.004877069168316263, "grad_norm": 0.28125, "learning_rate": 9.753298909925416e-06, "loss": 1.0907, "step": 85 }, { "epoch": 0.005163955589981926, "grad_norm": 0.30078125, "learning_rate": 1.0327022375215146e-05, "loss": 1.0387, "step": 90 }, { "epoch": 0.005450842011647588, "grad_norm": 0.294921875, "learning_rate": 1.0900745840504876e-05, "loss": 1.1107, "step": 95 }, { "epoch": 0.005737728433313252, "grad_norm": 0.30078125, "learning_rate": 1.1474469305794608e-05, "loss": 1.071, "step": 100 }, { "epoch": 0.006024614854978914, "grad_norm": 0.388671875, "learning_rate": 1.2048192771084338e-05, "loss": 1.1581, "step": 105 }, { "epoch": 0.006311501276644577, "grad_norm": 0.40234375, "learning_rate": 1.2621916236374069e-05, "loss": 1.0058, "step": 110 }, { "epoch": 0.006598387698310239, "grad_norm": 0.25390625, "learning_rate": 1.3195639701663797e-05, "loss": 0.9731, "step": 115 }, { "epoch": 0.006885274119975902, "grad_norm": 0.291015625, "learning_rate": 1.3769363166953527e-05, "loss": 0.9738, "step": 120 }, { "epoch": 0.007172160541641564, "grad_norm": 0.2734375, "learning_rate": 1.434308663224326e-05, "loss": 1.0439, "step": 125 }, { "epoch": 0.007459046963307227, "grad_norm": 0.30078125, "learning_rate": 1.4916810097532991e-05, "loss": 1.0217, "step": 130 }, { "epoch": 0.0077459333849728895, "grad_norm": 0.236328125, "learning_rate": 1.549053356282272e-05, "loss": 0.9904, "step": 135 }, { "epoch": 0.008032819806638551, "grad_norm": 0.24609375, "learning_rate": 1.606425702811245e-05, "loss": 0.9875, "step": 140 }, { "epoch": 0.008319706228304214, "grad_norm": 0.25, "learning_rate": 1.663798049340218e-05, "loss": 0.9877, "step": 145 }, { "epoch": 0.008606592649969876, "grad_norm": 0.2890625, "learning_rate": 1.721170395869191e-05, "loss": 0.9866, "step": 150 }, { "epoch": 0.008893479071635539, "grad_norm": 0.259765625, "learning_rate": 1.7785427423981642e-05, "loss": 0.9856, "step": 155 }, { "epoch": 0.009180365493301201, "grad_norm": 0.349609375, "learning_rate": 1.8359150889271372e-05, "loss": 1.0018, "step": 160 }, { "epoch": 0.009467251914966864, "grad_norm": 0.28125, "learning_rate": 1.8932874354561102e-05, "loss": 1.025, "step": 165 }, { "epoch": 0.009754138336632526, "grad_norm": 0.28125, "learning_rate": 1.9506597819850832e-05, "loss": 1.015, "step": 170 }, { "epoch": 0.010041024758298189, "grad_norm": 0.27734375, "learning_rate": 2.0080321285140562e-05, "loss": 0.967, "step": 175 }, { "epoch": 0.010327911179963851, "grad_norm": 0.2451171875, "learning_rate": 2.0654044750430293e-05, "loss": 1.0353, "step": 180 }, { "epoch": 0.010614797601629514, "grad_norm": 0.2578125, "learning_rate": 2.1227768215720023e-05, "loss": 1.0401, "step": 185 }, { "epoch": 0.010901684023295177, "grad_norm": 0.26953125, "learning_rate": 2.1801491681009753e-05, "loss": 1.0499, "step": 190 }, { "epoch": 0.01118857044496084, "grad_norm": 0.265625, "learning_rate": 2.2375215146299486e-05, "loss": 0.923, "step": 195 }, { "epoch": 0.011475456866626503, "grad_norm": 0.267578125, "learning_rate": 2.2948938611589217e-05, "loss": 1.0782, "step": 200 }, { "epoch": 0.011762343288292166, "grad_norm": 0.318359375, "learning_rate": 2.3522662076878947e-05, "loss": 1.0928, "step": 205 }, { "epoch": 0.012049229709957828, "grad_norm": 0.283203125, "learning_rate": 2.4096385542168677e-05, "loss": 1.0674, "step": 210 }, { "epoch": 0.012336116131623491, "grad_norm": 0.26953125, "learning_rate": 2.4670109007458407e-05, "loss": 1.0199, "step": 215 }, { "epoch": 0.012623002553289154, "grad_norm": 0.294921875, "learning_rate": 2.5243832472748137e-05, "loss": 0.9734, "step": 220 }, { "epoch": 0.012909888974954816, "grad_norm": 0.271484375, "learning_rate": 2.5817555938037867e-05, "loss": 1.0254, "step": 225 }, { "epoch": 0.013196775396620479, "grad_norm": 0.3046875, "learning_rate": 2.6391279403327594e-05, "loss": 1.1087, "step": 230 }, { "epoch": 0.013483661818286141, "grad_norm": 0.279296875, "learning_rate": 2.6965002868617328e-05, "loss": 1.0058, "step": 235 }, { "epoch": 0.013770548239951804, "grad_norm": 0.296875, "learning_rate": 2.7538726333907055e-05, "loss": 1.0453, "step": 240 }, { "epoch": 0.014057434661617466, "grad_norm": 0.2890625, "learning_rate": 2.8112449799196788e-05, "loss": 1.0546, "step": 245 }, { "epoch": 0.014344321083283129, "grad_norm": 0.318359375, "learning_rate": 2.868617326448652e-05, "loss": 1.0178, "step": 250 }, { "epoch": 0.014631207504948791, "grad_norm": 0.279296875, "learning_rate": 2.925989672977625e-05, "loss": 0.9928, "step": 255 }, { "epoch": 0.014918093926614454, "grad_norm": 0.359375, "learning_rate": 2.9833620195065982e-05, "loss": 0.966, "step": 260 }, { "epoch": 0.015204980348280116, "grad_norm": 0.27734375, "learning_rate": 3.040734366035571e-05, "loss": 1.034, "step": 265 }, { "epoch": 0.015491866769945779, "grad_norm": 0.279296875, "learning_rate": 3.098106712564544e-05, "loss": 0.946, "step": 270 }, { "epoch": 0.01577875319161144, "grad_norm": 0.28125, "learning_rate": 3.155479059093517e-05, "loss": 0.9599, "step": 275 }, { "epoch": 0.016065639613277102, "grad_norm": 0.314453125, "learning_rate": 3.21285140562249e-05, "loss": 0.9783, "step": 280 }, { "epoch": 0.016352526034942767, "grad_norm": 0.29296875, "learning_rate": 3.2702237521514636e-05, "loss": 1.0202, "step": 285 }, { "epoch": 0.016639412456608427, "grad_norm": 0.30859375, "learning_rate": 3.327596098680436e-05, "loss": 1.0517, "step": 290 }, { "epoch": 0.01692629887827409, "grad_norm": 0.298828125, "learning_rate": 3.3849684452094096e-05, "loss": 1.0645, "step": 295 }, { "epoch": 0.017213185299939752, "grad_norm": 0.298828125, "learning_rate": 3.442340791738382e-05, "loss": 0.963, "step": 300 }, { "epoch": 0.017500071721605417, "grad_norm": 0.3046875, "learning_rate": 3.499713138267356e-05, "loss": 0.9991, "step": 305 }, { "epoch": 0.017786958143271078, "grad_norm": 0.2890625, "learning_rate": 3.5570854847963284e-05, "loss": 0.9191, "step": 310 }, { "epoch": 0.018073844564936742, "grad_norm": 0.310546875, "learning_rate": 3.614457831325301e-05, "loss": 1.0954, "step": 315 }, { "epoch": 0.018360730986602403, "grad_norm": 0.310546875, "learning_rate": 3.6718301778542744e-05, "loss": 0.9925, "step": 320 }, { "epoch": 0.018647617408268067, "grad_norm": 0.3046875, "learning_rate": 3.729202524383247e-05, "loss": 1.0165, "step": 325 }, { "epoch": 0.018934503829933728, "grad_norm": 0.318359375, "learning_rate": 3.7865748709122204e-05, "loss": 1.0476, "step": 330 }, { "epoch": 0.019221390251599392, "grad_norm": 0.306640625, "learning_rate": 3.843947217441193e-05, "loss": 1.0352, "step": 335 }, { "epoch": 0.019508276673265053, "grad_norm": 0.298828125, "learning_rate": 3.9013195639701665e-05, "loss": 1.0157, "step": 340 }, { "epoch": 0.019795163094930717, "grad_norm": 0.279296875, "learning_rate": 3.958691910499139e-05, "loss": 1.0117, "step": 345 }, { "epoch": 0.020082049516596378, "grad_norm": 0.3046875, "learning_rate": 4.0160642570281125e-05, "loss": 1.0196, "step": 350 }, { "epoch": 0.020368935938262042, "grad_norm": 0.298828125, "learning_rate": 4.073436603557086e-05, "loss": 1.0499, "step": 355 }, { "epoch": 0.020655822359927703, "grad_norm": 0.306640625, "learning_rate": 4.1308089500860585e-05, "loss": 1.0211, "step": 360 }, { "epoch": 0.020942708781593367, "grad_norm": 0.283203125, "learning_rate": 4.188181296615032e-05, "loss": 1.0123, "step": 365 }, { "epoch": 0.021229595203259028, "grad_norm": 0.296875, "learning_rate": 4.2455536431440046e-05, "loss": 0.9195, "step": 370 }, { "epoch": 0.021516481624924692, "grad_norm": 0.2890625, "learning_rate": 4.302925989672978e-05, "loss": 0.9714, "step": 375 }, { "epoch": 0.021803368046590353, "grad_norm": 0.291015625, "learning_rate": 4.3602983362019506e-05, "loss": 0.9929, "step": 380 }, { "epoch": 0.022090254468256017, "grad_norm": 0.298828125, "learning_rate": 4.417670682730924e-05, "loss": 0.9873, "step": 385 }, { "epoch": 0.02237714088992168, "grad_norm": 0.3125, "learning_rate": 4.475043029259897e-05, "loss": 0.978, "step": 390 }, { "epoch": 0.022664027311587343, "grad_norm": 0.279296875, "learning_rate": 4.53241537578887e-05, "loss": 1.0114, "step": 395 }, { "epoch": 0.022950913733253007, "grad_norm": 0.275390625, "learning_rate": 4.589787722317843e-05, "loss": 0.9551, "step": 400 }, { "epoch": 0.023237800154918668, "grad_norm": 0.28125, "learning_rate": 4.647160068846816e-05, "loss": 0.9802, "step": 405 }, { "epoch": 0.023524686576584332, "grad_norm": 0.3203125, "learning_rate": 4.7045324153757894e-05, "loss": 0.9705, "step": 410 }, { "epoch": 0.023811572998249993, "grad_norm": 0.28125, "learning_rate": 4.761904761904762e-05, "loss": 1.0406, "step": 415 }, { "epoch": 0.024098459419915657, "grad_norm": 0.298828125, "learning_rate": 4.8192771084337354e-05, "loss": 0.9633, "step": 420 }, { "epoch": 0.024385345841581318, "grad_norm": 0.28515625, "learning_rate": 4.876649454962709e-05, "loss": 1.0005, "step": 425 }, { "epoch": 0.024672232263246982, "grad_norm": 0.3125, "learning_rate": 4.9340218014916814e-05, "loss": 1.0285, "step": 430 }, { "epoch": 0.024959118684912643, "grad_norm": 0.279296875, "learning_rate": 4.991394148020654e-05, "loss": 1.0557, "step": 435 }, { "epoch": 0.025246005106578307, "grad_norm": 0.255859375, "learning_rate": 5.0487664945496275e-05, "loss": 0.9442, "step": 440 }, { "epoch": 0.025532891528243968, "grad_norm": 0.26171875, "learning_rate": 5.1061388410786e-05, "loss": 1.0158, "step": 445 }, { "epoch": 0.025819777949909632, "grad_norm": 0.275390625, "learning_rate": 5.1635111876075735e-05, "loss": 0.9706, "step": 450 }, { "epoch": 0.026106664371575293, "grad_norm": 0.28125, "learning_rate": 5.220883534136547e-05, "loss": 0.9862, "step": 455 }, { "epoch": 0.026393550793240957, "grad_norm": 0.29296875, "learning_rate": 5.278255880665519e-05, "loss": 1.0526, "step": 460 }, { "epoch": 0.026680437214906618, "grad_norm": 0.2734375, "learning_rate": 5.335628227194492e-05, "loss": 0.9403, "step": 465 }, { "epoch": 0.026967323636572282, "grad_norm": 0.2734375, "learning_rate": 5.3930005737234656e-05, "loss": 1.0659, "step": 470 }, { "epoch": 0.027254210058237943, "grad_norm": 0.294921875, "learning_rate": 5.450372920252439e-05, "loss": 0.9986, "step": 475 }, { "epoch": 0.027541096479903607, "grad_norm": 0.275390625, "learning_rate": 5.507745266781411e-05, "loss": 0.9649, "step": 480 }, { "epoch": 0.027827982901569268, "grad_norm": 0.30859375, "learning_rate": 5.565117613310384e-05, "loss": 1.0281, "step": 485 }, { "epoch": 0.028114869323234933, "grad_norm": 0.298828125, "learning_rate": 5.6224899598393576e-05, "loss": 0.9301, "step": 490 }, { "epoch": 0.028401755744900593, "grad_norm": 0.326171875, "learning_rate": 5.679862306368331e-05, "loss": 0.9553, "step": 495 }, { "epoch": 0.028688642166566258, "grad_norm": 0.291015625, "learning_rate": 5.737234652897304e-05, "loss": 1.0002, "step": 500 }, { "epoch": 0.02897552858823192, "grad_norm": 0.3046875, "learning_rate": 5.794606999426276e-05, "loss": 0.9644, "step": 505 }, { "epoch": 0.029262415009897583, "grad_norm": 0.26953125, "learning_rate": 5.85197934595525e-05, "loss": 0.9903, "step": 510 }, { "epoch": 0.029549301431563244, "grad_norm": 0.28515625, "learning_rate": 5.909351692484223e-05, "loss": 1.098, "step": 515 }, { "epoch": 0.029836187853228908, "grad_norm": 0.31640625, "learning_rate": 5.9667240390131964e-05, "loss": 1.063, "step": 520 }, { "epoch": 0.03012307427489457, "grad_norm": 0.275390625, "learning_rate": 6.02409638554217e-05, "loss": 1.0095, "step": 525 }, { "epoch": 0.030409960696560233, "grad_norm": 0.291015625, "learning_rate": 6.081468732071142e-05, "loss": 0.9601, "step": 530 }, { "epoch": 0.030696847118225894, "grad_norm": 0.263671875, "learning_rate": 6.138841078600115e-05, "loss": 0.9653, "step": 535 }, { "epoch": 0.030983733539891558, "grad_norm": 0.263671875, "learning_rate": 6.196213425129088e-05, "loss": 1.0009, "step": 540 }, { "epoch": 0.03127061996155722, "grad_norm": 0.267578125, "learning_rate": 6.253585771658062e-05, "loss": 0.9451, "step": 545 }, { "epoch": 0.03155750638322288, "grad_norm": 0.294921875, "learning_rate": 6.310958118187034e-05, "loss": 1.0467, "step": 550 }, { "epoch": 0.031844392804888544, "grad_norm": 0.265625, "learning_rate": 6.368330464716007e-05, "loss": 0.9509, "step": 555 }, { "epoch": 0.032131279226554205, "grad_norm": 0.283203125, "learning_rate": 6.42570281124498e-05, "loss": 1.004, "step": 560 }, { "epoch": 0.03241816564821987, "grad_norm": 0.28515625, "learning_rate": 6.483075157773954e-05, "loss": 0.9925, "step": 565 }, { "epoch": 0.03270505206988553, "grad_norm": 0.2578125, "learning_rate": 6.540447504302927e-05, "loss": 1.0296, "step": 570 }, { "epoch": 0.032991938491551194, "grad_norm": 0.2734375, "learning_rate": 6.597819850831899e-05, "loss": 1.0156, "step": 575 }, { "epoch": 0.033278824913216855, "grad_norm": 0.283203125, "learning_rate": 6.655192197360873e-05, "loss": 0.9381, "step": 580 }, { "epoch": 0.03356571133488252, "grad_norm": 0.255859375, "learning_rate": 6.712564543889846e-05, "loss": 1.0225, "step": 585 }, { "epoch": 0.03385259775654818, "grad_norm": 0.275390625, "learning_rate": 6.769936890418819e-05, "loss": 1.0165, "step": 590 }, { "epoch": 0.034139484178213844, "grad_norm": 0.2734375, "learning_rate": 6.827309236947793e-05, "loss": 1.0473, "step": 595 }, { "epoch": 0.034426370599879505, "grad_norm": 0.267578125, "learning_rate": 6.884681583476765e-05, "loss": 1.0628, "step": 600 }, { "epoch": 0.03471325702154517, "grad_norm": 0.2890625, "learning_rate": 6.942053930005738e-05, "loss": 0.9474, "step": 605 }, { "epoch": 0.035000143443210834, "grad_norm": 0.271484375, "learning_rate": 6.999426276534711e-05, "loss": 0.9761, "step": 610 }, { "epoch": 0.035287029864876494, "grad_norm": 0.25, "learning_rate": 7.056798623063683e-05, "loss": 0.9428, "step": 615 }, { "epoch": 0.035573916286542155, "grad_norm": 0.28125, "learning_rate": 7.114170969592657e-05, "loss": 1.0622, "step": 620 }, { "epoch": 0.03586080270820782, "grad_norm": 0.25390625, "learning_rate": 7.17154331612163e-05, "loss": 1.0392, "step": 625 }, { "epoch": 0.036147689129873484, "grad_norm": 0.271484375, "learning_rate": 7.228915662650602e-05, "loss": 0.9727, "step": 630 }, { "epoch": 0.036434575551539145, "grad_norm": 0.263671875, "learning_rate": 7.286288009179575e-05, "loss": 0.9595, "step": 635 }, { "epoch": 0.036721461973204805, "grad_norm": 0.271484375, "learning_rate": 7.343660355708549e-05, "loss": 0.9842, "step": 640 }, { "epoch": 0.03700834839487047, "grad_norm": 0.265625, "learning_rate": 7.401032702237521e-05, "loss": 0.9766, "step": 645 }, { "epoch": 0.037295234816536134, "grad_norm": 0.259765625, "learning_rate": 7.458405048766494e-05, "loss": 0.9183, "step": 650 }, { "epoch": 0.037582121238201795, "grad_norm": 0.2578125, "learning_rate": 7.515777395295467e-05, "loss": 1.0354, "step": 655 }, { "epoch": 0.037869007659867455, "grad_norm": 0.263671875, "learning_rate": 7.573149741824441e-05, "loss": 0.9666, "step": 660 }, { "epoch": 0.03815589408153312, "grad_norm": 0.26953125, "learning_rate": 7.630522088353414e-05, "loss": 0.9866, "step": 665 }, { "epoch": 0.038442780503198784, "grad_norm": 0.26171875, "learning_rate": 7.687894434882386e-05, "loss": 0.8749, "step": 670 }, { "epoch": 0.038729666924864445, "grad_norm": 0.265625, "learning_rate": 7.74526678141136e-05, "loss": 1.0652, "step": 675 }, { "epoch": 0.039016553346530106, "grad_norm": 0.25390625, "learning_rate": 7.802639127940333e-05, "loss": 1.0624, "step": 680 }, { "epoch": 0.03930343976819577, "grad_norm": 0.26171875, "learning_rate": 7.860011474469306e-05, "loss": 0.9674, "step": 685 }, { "epoch": 0.039590326189861434, "grad_norm": 0.275390625, "learning_rate": 7.917383820998278e-05, "loss": 0.9625, "step": 690 }, { "epoch": 0.039877212611527095, "grad_norm": 0.259765625, "learning_rate": 7.974756167527252e-05, "loss": 0.9582, "step": 695 }, { "epoch": 0.040164099033192756, "grad_norm": 0.267578125, "learning_rate": 8.032128514056225e-05, "loss": 1.0462, "step": 700 }, { "epoch": 0.040450985454858424, "grad_norm": 0.2470703125, "learning_rate": 8.089500860585198e-05, "loss": 0.9695, "step": 705 }, { "epoch": 0.040737871876524084, "grad_norm": 0.25390625, "learning_rate": 8.146873207114172e-05, "loss": 0.9289, "step": 710 }, { "epoch": 0.041024758298189745, "grad_norm": 0.26171875, "learning_rate": 8.204245553643144e-05, "loss": 1.0257, "step": 715 }, { "epoch": 0.041311644719855406, "grad_norm": 0.25, "learning_rate": 8.261617900172117e-05, "loss": 0.9624, "step": 720 }, { "epoch": 0.041598531141521074, "grad_norm": 0.25, "learning_rate": 8.31899024670109e-05, "loss": 0.9824, "step": 725 }, { "epoch": 0.041885417563186735, "grad_norm": 0.236328125, "learning_rate": 8.376362593230064e-05, "loss": 1.0376, "step": 730 }, { "epoch": 0.042172303984852395, "grad_norm": 0.2890625, "learning_rate": 8.433734939759037e-05, "loss": 1.0186, "step": 735 }, { "epoch": 0.042459190406518056, "grad_norm": 0.2412109375, "learning_rate": 8.491107286288009e-05, "loss": 0.9532, "step": 740 }, { "epoch": 0.042746076828183724, "grad_norm": 0.2470703125, "learning_rate": 8.548479632816982e-05, "loss": 1.0196, "step": 745 }, { "epoch": 0.043032963249849385, "grad_norm": 0.2421875, "learning_rate": 8.605851979345956e-05, "loss": 0.9727, "step": 750 }, { "epoch": 0.043319849671515046, "grad_norm": 0.248046875, "learning_rate": 8.663224325874929e-05, "loss": 0.9535, "step": 755 }, { "epoch": 0.043606736093180706, "grad_norm": 0.2451171875, "learning_rate": 8.720596672403901e-05, "loss": 0.9902, "step": 760 }, { "epoch": 0.043893622514846374, "grad_norm": 0.25, "learning_rate": 8.777969018932875e-05, "loss": 0.9378, "step": 765 }, { "epoch": 0.044180508936512035, "grad_norm": 0.251953125, "learning_rate": 8.835341365461848e-05, "loss": 1.0389, "step": 770 }, { "epoch": 0.044467395358177696, "grad_norm": 0.236328125, "learning_rate": 8.892713711990821e-05, "loss": 0.9353, "step": 775 }, { "epoch": 0.04475428177984336, "grad_norm": 0.255859375, "learning_rate": 8.950086058519795e-05, "loss": 1.0, "step": 780 }, { "epoch": 0.045041168201509024, "grad_norm": 0.24609375, "learning_rate": 9.007458405048767e-05, "loss": 0.9976, "step": 785 }, { "epoch": 0.045328054623174685, "grad_norm": 0.24609375, "learning_rate": 9.06483075157774e-05, "loss": 0.9691, "step": 790 }, { "epoch": 0.045614941044840346, "grad_norm": 0.248046875, "learning_rate": 9.122203098106713e-05, "loss": 1.0199, "step": 795 }, { "epoch": 0.045901827466506014, "grad_norm": 0.236328125, "learning_rate": 9.179575444635687e-05, "loss": 0.9223, "step": 800 }, { "epoch": 0.046188713888171674, "grad_norm": 0.25, "learning_rate": 9.23694779116466e-05, "loss": 0.948, "step": 805 }, { "epoch": 0.046475600309837335, "grad_norm": 0.2431640625, "learning_rate": 9.294320137693632e-05, "loss": 0.9714, "step": 810 }, { "epoch": 0.046762486731502996, "grad_norm": 0.2490234375, "learning_rate": 9.351692484222605e-05, "loss": 0.9428, "step": 815 }, { "epoch": 0.047049373153168664, "grad_norm": 0.2392578125, "learning_rate": 9.409064830751579e-05, "loss": 0.9848, "step": 820 }, { "epoch": 0.047336259574834325, "grad_norm": 0.2294921875, "learning_rate": 9.466437177280552e-05, "loss": 1.0338, "step": 825 }, { "epoch": 0.047623145996499985, "grad_norm": 0.248046875, "learning_rate": 9.523809523809524e-05, "loss": 1.0259, "step": 830 }, { "epoch": 0.047910032418165646, "grad_norm": 0.2470703125, "learning_rate": 9.581181870338497e-05, "loss": 0.9849, "step": 835 }, { "epoch": 0.048196918839831314, "grad_norm": 0.248046875, "learning_rate": 9.638554216867471e-05, "loss": 0.9531, "step": 840 }, { "epoch": 0.048483805261496975, "grad_norm": 0.244140625, "learning_rate": 9.695926563396444e-05, "loss": 0.9447, "step": 845 }, { "epoch": 0.048770691683162636, "grad_norm": 0.255859375, "learning_rate": 9.753298909925417e-05, "loss": 1.0124, "step": 850 }, { "epoch": 0.049057578104828296, "grad_norm": 0.25, "learning_rate": 9.81067125645439e-05, "loss": 0.9955, "step": 855 }, { "epoch": 0.049344464526493964, "grad_norm": 0.265625, "learning_rate": 9.868043602983363e-05, "loss": 1.029, "step": 860 }, { "epoch": 0.049631350948159625, "grad_norm": 0.2333984375, "learning_rate": 9.925415949512336e-05, "loss": 0.9831, "step": 865 }, { "epoch": 0.049918237369825286, "grad_norm": 0.255859375, "learning_rate": 9.982788296041308e-05, "loss": 0.9836, "step": 870 }, { "epoch": 0.050205123791490947, "grad_norm": 0.2412109375, "learning_rate": 0.00010040160642570282, "loss": 0.9967, "step": 875 }, { "epoch": 0.050492010213156614, "grad_norm": 0.251953125, "learning_rate": 0.00010097532989099255, "loss": 1.0441, "step": 880 }, { "epoch": 0.050778896634822275, "grad_norm": 0.236328125, "learning_rate": 0.00010154905335628228, "loss": 1.0311, "step": 885 }, { "epoch": 0.051065783056487936, "grad_norm": 0.240234375, "learning_rate": 0.000102122776821572, "loss": 1.0095, "step": 890 }, { "epoch": 0.0513526694781536, "grad_norm": 0.236328125, "learning_rate": 0.00010269650028686174, "loss": 0.9604, "step": 895 }, { "epoch": 0.051639555899819264, "grad_norm": 0.2451171875, "learning_rate": 0.00010327022375215147, "loss": 1.0467, "step": 900 }, { "epoch": 0.051926442321484925, "grad_norm": 0.244140625, "learning_rate": 0.00010384394721744119, "loss": 0.9439, "step": 905 }, { "epoch": 0.052213328743150586, "grad_norm": 0.234375, "learning_rate": 0.00010441767068273094, "loss": 1.09, "step": 910 }, { "epoch": 0.05250021516481625, "grad_norm": 0.232421875, "learning_rate": 0.00010499139414802066, "loss": 0.9533, "step": 915 }, { "epoch": 0.052787101586481915, "grad_norm": 0.24609375, "learning_rate": 0.00010556511761331038, "loss": 1.0325, "step": 920 }, { "epoch": 0.053073988008147575, "grad_norm": 0.2373046875, "learning_rate": 0.00010613884107860012, "loss": 0.9894, "step": 925 }, { "epoch": 0.053360874429813236, "grad_norm": 0.232421875, "learning_rate": 0.00010671256454388984, "loss": 0.9567, "step": 930 }, { "epoch": 0.0536477608514789, "grad_norm": 0.259765625, "learning_rate": 0.00010728628800917956, "loss": 1.019, "step": 935 }, { "epoch": 0.053934647273144565, "grad_norm": 0.234375, "learning_rate": 0.00010786001147446931, "loss": 0.9696, "step": 940 }, { "epoch": 0.054221533694810226, "grad_norm": 0.23828125, "learning_rate": 0.00010843373493975903, "loss": 0.9668, "step": 945 }, { "epoch": 0.054508420116475886, "grad_norm": 0.2216796875, "learning_rate": 0.00010900745840504878, "loss": 0.9161, "step": 950 }, { "epoch": 0.05479530653814155, "grad_norm": 0.2392578125, "learning_rate": 0.0001095811818703385, "loss": 0.9865, "step": 955 }, { "epoch": 0.055082192959807215, "grad_norm": 0.25390625, "learning_rate": 0.00011015490533562822, "loss": 0.9617, "step": 960 }, { "epoch": 0.055369079381472876, "grad_norm": 0.236328125, "learning_rate": 0.00011072862880091797, "loss": 0.947, "step": 965 }, { "epoch": 0.055655965803138537, "grad_norm": 0.2451171875, "learning_rate": 0.00011130235226620769, "loss": 0.9656, "step": 970 }, { "epoch": 0.0559428522248042, "grad_norm": 0.25390625, "learning_rate": 0.00011187607573149743, "loss": 0.9295, "step": 975 }, { "epoch": 0.056229738646469865, "grad_norm": 0.232421875, "learning_rate": 0.00011244979919678715, "loss": 0.9726, "step": 980 }, { "epoch": 0.056516625068135526, "grad_norm": 0.2333984375, "learning_rate": 0.00011302352266207687, "loss": 0.9681, "step": 985 }, { "epoch": 0.05680351148980119, "grad_norm": 0.2373046875, "learning_rate": 0.00011359724612736662, "loss": 0.9163, "step": 990 }, { "epoch": 0.05709039791146685, "grad_norm": 0.23828125, "learning_rate": 0.00011417096959265634, "loss": 0.9309, "step": 995 }, { "epoch": 0.057377284333132515, "grad_norm": 0.25, "learning_rate": 0.00011474469305794609, "loss": 1.0241, "step": 1000 }, { "epoch": 0.057664170754798176, "grad_norm": 0.236328125, "learning_rate": 0.0001153184165232358, "loss": 0.9737, "step": 1005 }, { "epoch": 0.05795105717646384, "grad_norm": 0.2421875, "learning_rate": 0.00011589213998852553, "loss": 1.0049, "step": 1010 }, { "epoch": 0.0582379435981295, "grad_norm": 0.2490234375, "learning_rate": 0.00011646586345381527, "loss": 1.03, "step": 1015 }, { "epoch": 0.058524830019795165, "grad_norm": 0.25, "learning_rate": 0.000117039586919105, "loss": 0.9817, "step": 1020 }, { "epoch": 0.058811716441460826, "grad_norm": 0.2333984375, "learning_rate": 0.00011761331038439474, "loss": 0.9642, "step": 1025 }, { "epoch": 0.05909860286312649, "grad_norm": 0.2275390625, "learning_rate": 0.00011818703384968446, "loss": 0.9757, "step": 1030 }, { "epoch": 0.05938548928479215, "grad_norm": 0.232421875, "learning_rate": 0.00011876075731497418, "loss": 0.9674, "step": 1035 }, { "epoch": 0.059672375706457816, "grad_norm": 0.2138671875, "learning_rate": 0.00011933448078026393, "loss": 0.9607, "step": 1040 }, { "epoch": 0.059959262128123476, "grad_norm": 0.2265625, "learning_rate": 0.00011990820424555365, "loss": 1.0848, "step": 1045 }, { "epoch": 0.06024614854978914, "grad_norm": 0.2294921875, "learning_rate": 0.0001204819277108434, "loss": 1.0064, "step": 1050 }, { "epoch": 0.0605330349714548, "grad_norm": 0.26171875, "learning_rate": 0.00012105565117613311, "loss": 0.9672, "step": 1055 }, { "epoch": 0.060819921393120466, "grad_norm": 0.2353515625, "learning_rate": 0.00012162937464142283, "loss": 0.9457, "step": 1060 }, { "epoch": 0.06110680781478613, "grad_norm": 0.2412109375, "learning_rate": 0.00012220309810671257, "loss": 1.0216, "step": 1065 }, { "epoch": 0.06139369423645179, "grad_norm": 0.23828125, "learning_rate": 0.0001227768215720023, "loss": 0.9021, "step": 1070 }, { "epoch": 0.06168058065811745, "grad_norm": 0.236328125, "learning_rate": 0.000123350545037292, "loss": 0.9294, "step": 1075 }, { "epoch": 0.061967467079783116, "grad_norm": 0.234375, "learning_rate": 0.00012392426850258177, "loss": 0.9828, "step": 1080 }, { "epoch": 0.06225435350144878, "grad_norm": 0.2314453125, "learning_rate": 0.00012449799196787148, "loss": 0.9699, "step": 1085 }, { "epoch": 0.06254123992311444, "grad_norm": 0.2236328125, "learning_rate": 0.00012507171543316124, "loss": 0.9242, "step": 1090 }, { "epoch": 0.0628281263447801, "grad_norm": 0.326171875, "learning_rate": 0.00012564543889845094, "loss": 0.9758, "step": 1095 }, { "epoch": 0.06311501276644577, "grad_norm": 0.240234375, "learning_rate": 0.00012621916236374068, "loss": 0.9818, "step": 1100 }, { "epoch": 0.06340189918811143, "grad_norm": 0.244140625, "learning_rate": 0.0001267928858290304, "loss": 0.9422, "step": 1105 }, { "epoch": 0.06368878560977709, "grad_norm": 0.23046875, "learning_rate": 0.00012736660929432014, "loss": 0.9781, "step": 1110 }, { "epoch": 0.06397567203144275, "grad_norm": 0.2236328125, "learning_rate": 0.00012794033275960988, "loss": 0.9644, "step": 1115 }, { "epoch": 0.06426255845310841, "grad_norm": 0.2255859375, "learning_rate": 0.0001285140562248996, "loss": 0.9428, "step": 1120 }, { "epoch": 0.06454944487477407, "grad_norm": 0.22265625, "learning_rate": 0.00012908777969018932, "loss": 0.9871, "step": 1125 }, { "epoch": 0.06483633129643974, "grad_norm": 0.2373046875, "learning_rate": 0.00012966150315547908, "loss": 0.9815, "step": 1130 }, { "epoch": 0.0651232177181054, "grad_norm": 0.236328125, "learning_rate": 0.00013023522662076878, "loss": 1.0514, "step": 1135 }, { "epoch": 0.06541010413977107, "grad_norm": 0.22265625, "learning_rate": 0.00013080895008605854, "loss": 0.9455, "step": 1140 }, { "epoch": 0.06569699056143673, "grad_norm": 0.2333984375, "learning_rate": 0.00013138267355134825, "loss": 1.0108, "step": 1145 }, { "epoch": 0.06598387698310239, "grad_norm": 0.2353515625, "learning_rate": 0.00013195639701663798, "loss": 1.0141, "step": 1150 }, { "epoch": 0.06627076340476805, "grad_norm": 0.228515625, "learning_rate": 0.00013253012048192772, "loss": 0.9909, "step": 1155 }, { "epoch": 0.06655764982643371, "grad_norm": 0.2373046875, "learning_rate": 0.00013310384394721745, "loss": 1.0097, "step": 1160 }, { "epoch": 0.06684453624809938, "grad_norm": 0.228515625, "learning_rate": 0.00013367756741250719, "loss": 0.9862, "step": 1165 }, { "epoch": 0.06713142266976505, "grad_norm": 0.228515625, "learning_rate": 0.00013425129087779692, "loss": 0.9606, "step": 1170 }, { "epoch": 0.0674183090914307, "grad_norm": 0.2333984375, "learning_rate": 0.00013482501434308663, "loss": 0.9708, "step": 1175 }, { "epoch": 0.06770519551309637, "grad_norm": 0.2314453125, "learning_rate": 0.00013539873780837639, "loss": 0.9315, "step": 1180 }, { "epoch": 0.06799208193476203, "grad_norm": 0.23046875, "learning_rate": 0.0001359724612736661, "loss": 0.9613, "step": 1185 }, { "epoch": 0.06827896835642769, "grad_norm": 0.2470703125, "learning_rate": 0.00013654618473895585, "loss": 0.9335, "step": 1190 }, { "epoch": 0.06856585477809335, "grad_norm": 0.2421875, "learning_rate": 0.00013711990820424556, "loss": 1.0134, "step": 1195 }, { "epoch": 0.06885274119975901, "grad_norm": 0.234375, "learning_rate": 0.0001376936316695353, "loss": 0.9595, "step": 1200 }, { "epoch": 0.06913962762142468, "grad_norm": 0.23046875, "learning_rate": 0.00013826735513482503, "loss": 0.9511, "step": 1205 }, { "epoch": 0.06942651404309035, "grad_norm": 0.2431640625, "learning_rate": 0.00013884107860011476, "loss": 0.9239, "step": 1210 }, { "epoch": 0.069713400464756, "grad_norm": 0.2373046875, "learning_rate": 0.00013941480206540447, "loss": 0.9585, "step": 1215 }, { "epoch": 0.07000028688642167, "grad_norm": 0.2158203125, "learning_rate": 0.00013998852553069423, "loss": 0.9738, "step": 1220 }, { "epoch": 0.07028717330808733, "grad_norm": 0.2216796875, "learning_rate": 0.00014056224899598393, "loss": 0.9214, "step": 1225 }, { "epoch": 0.07057405972975299, "grad_norm": 0.23828125, "learning_rate": 0.00014113597246127367, "loss": 0.9649, "step": 1230 }, { "epoch": 0.07086094615141865, "grad_norm": 0.2275390625, "learning_rate": 0.0001417096959265634, "loss": 0.9354, "step": 1235 }, { "epoch": 0.07114783257308431, "grad_norm": 0.224609375, "learning_rate": 0.00014228341939185313, "loss": 0.9577, "step": 1240 }, { "epoch": 0.07143471899474998, "grad_norm": 0.2373046875, "learning_rate": 0.00014285714285714287, "loss": 0.9403, "step": 1245 }, { "epoch": 0.07172160541641565, "grad_norm": 0.2255859375, "learning_rate": 0.0001434308663224326, "loss": 1.0236, "step": 1250 }, { "epoch": 0.0720084918380813, "grad_norm": 0.240234375, "learning_rate": 0.00014400458978772233, "loss": 0.9937, "step": 1255 }, { "epoch": 0.07229537825974697, "grad_norm": 0.2265625, "learning_rate": 0.00014457831325301204, "loss": 0.9787, "step": 1260 }, { "epoch": 0.07258226468141263, "grad_norm": 0.23046875, "learning_rate": 0.00014515203671830177, "loss": 1.0276, "step": 1265 }, { "epoch": 0.07286915110307829, "grad_norm": 0.2255859375, "learning_rate": 0.0001457257601835915, "loss": 0.9475, "step": 1270 }, { "epoch": 0.07315603752474395, "grad_norm": 0.234375, "learning_rate": 0.00014629948364888124, "loss": 0.9663, "step": 1275 }, { "epoch": 0.07344292394640961, "grad_norm": 0.279296875, "learning_rate": 0.00014687320711417098, "loss": 1.032, "step": 1280 }, { "epoch": 0.07372981036807529, "grad_norm": 0.2412109375, "learning_rate": 0.0001474469305794607, "loss": 1.009, "step": 1285 }, { "epoch": 0.07401669678974095, "grad_norm": 0.2353515625, "learning_rate": 0.00014802065404475042, "loss": 0.9405, "step": 1290 }, { "epoch": 0.0743035832114066, "grad_norm": 0.251953125, "learning_rate": 0.00014859437751004018, "loss": 1.0318, "step": 1295 }, { "epoch": 0.07459046963307227, "grad_norm": 0.2412109375, "learning_rate": 0.00014916810097532988, "loss": 0.9533, "step": 1300 }, { "epoch": 0.07487735605473793, "grad_norm": 0.244140625, "learning_rate": 0.00014974182444061964, "loss": 0.9656, "step": 1305 }, { "epoch": 0.07516424247640359, "grad_norm": 0.2314453125, "learning_rate": 0.00015031554790590935, "loss": 0.9271, "step": 1310 }, { "epoch": 0.07545112889806925, "grad_norm": 0.22265625, "learning_rate": 0.00015088927137119908, "loss": 0.9356, "step": 1315 }, { "epoch": 0.07573801531973491, "grad_norm": 0.2392578125, "learning_rate": 0.00015146299483648882, "loss": 0.942, "step": 1320 }, { "epoch": 0.07602490174140059, "grad_norm": 0.255859375, "learning_rate": 0.00015203671830177855, "loss": 0.9858, "step": 1325 }, { "epoch": 0.07631178816306625, "grad_norm": 0.234375, "learning_rate": 0.00015261044176706828, "loss": 0.9873, "step": 1330 }, { "epoch": 0.07659867458473191, "grad_norm": 0.25390625, "learning_rate": 0.00015318416523235802, "loss": 0.9403, "step": 1335 }, { "epoch": 0.07688556100639757, "grad_norm": 0.236328125, "learning_rate": 0.00015375788869764772, "loss": 0.9795, "step": 1340 }, { "epoch": 0.07717244742806323, "grad_norm": 0.25390625, "learning_rate": 0.00015433161216293748, "loss": 1.0268, "step": 1345 }, { "epoch": 0.07745933384972889, "grad_norm": 0.216796875, "learning_rate": 0.0001549053356282272, "loss": 0.9544, "step": 1350 }, { "epoch": 0.07774622027139455, "grad_norm": 0.234375, "learning_rate": 0.00015547905909351695, "loss": 0.9845, "step": 1355 }, { "epoch": 0.07803310669306021, "grad_norm": 0.2353515625, "learning_rate": 0.00015605278255880666, "loss": 0.9393, "step": 1360 }, { "epoch": 0.07831999311472589, "grad_norm": 0.2333984375, "learning_rate": 0.0001566265060240964, "loss": 1.0092, "step": 1365 }, { "epoch": 0.07860687953639155, "grad_norm": 0.2255859375, "learning_rate": 0.00015720022948938613, "loss": 0.9362, "step": 1370 }, { "epoch": 0.07889376595805721, "grad_norm": 0.2138671875, "learning_rate": 0.00015777395295467586, "loss": 0.9654, "step": 1375 }, { "epoch": 0.07918065237972287, "grad_norm": 0.267578125, "learning_rate": 0.00015834767641996557, "loss": 1.0074, "step": 1380 }, { "epoch": 0.07946753880138853, "grad_norm": 0.2490234375, "learning_rate": 0.00015892139988525533, "loss": 0.9504, "step": 1385 }, { "epoch": 0.07975442522305419, "grad_norm": 0.244140625, "learning_rate": 0.00015949512335054503, "loss": 0.9538, "step": 1390 }, { "epoch": 0.08004131164471985, "grad_norm": 0.22265625, "learning_rate": 0.0001600688468158348, "loss": 0.9657, "step": 1395 }, { "epoch": 0.08032819806638551, "grad_norm": 0.2294921875, "learning_rate": 0.0001606425702811245, "loss": 0.9322, "step": 1400 }, { "epoch": 0.08061508448805119, "grad_norm": 0.2412109375, "learning_rate": 0.00016121629374641423, "loss": 1.0705, "step": 1405 }, { "epoch": 0.08090197090971685, "grad_norm": 0.2392578125, "learning_rate": 0.00016179001721170397, "loss": 1.0436, "step": 1410 }, { "epoch": 0.08118885733138251, "grad_norm": 0.2392578125, "learning_rate": 0.0001623637406769937, "loss": 0.9882, "step": 1415 }, { "epoch": 0.08147574375304817, "grad_norm": 0.2294921875, "learning_rate": 0.00016293746414228343, "loss": 0.9993, "step": 1420 }, { "epoch": 0.08176263017471383, "grad_norm": 0.2412109375, "learning_rate": 0.00016351118760757317, "loss": 0.9542, "step": 1425 }, { "epoch": 0.08204951659637949, "grad_norm": 0.23046875, "learning_rate": 0.00016408491107286287, "loss": 1.0421, "step": 1430 }, { "epoch": 0.08233640301804515, "grad_norm": 0.259765625, "learning_rate": 0.00016465863453815263, "loss": 0.935, "step": 1435 }, { "epoch": 0.08262328943971081, "grad_norm": 0.265625, "learning_rate": 0.00016523235800344234, "loss": 1.0408, "step": 1440 }, { "epoch": 0.08291017586137649, "grad_norm": 0.24609375, "learning_rate": 0.0001658060814687321, "loss": 0.9487, "step": 1445 }, { "epoch": 0.08319706228304215, "grad_norm": 0.21484375, "learning_rate": 0.0001663798049340218, "loss": 0.9971, "step": 1450 }, { "epoch": 0.08348394870470781, "grad_norm": 0.2353515625, "learning_rate": 0.00016695352839931154, "loss": 0.9903, "step": 1455 }, { "epoch": 0.08377083512637347, "grad_norm": 0.26171875, "learning_rate": 0.00016752725186460127, "loss": 0.9052, "step": 1460 }, { "epoch": 0.08405772154803913, "grad_norm": 0.240234375, "learning_rate": 0.000168100975329891, "loss": 0.9976, "step": 1465 }, { "epoch": 0.08434460796970479, "grad_norm": 0.263671875, "learning_rate": 0.00016867469879518074, "loss": 1.0326, "step": 1470 }, { "epoch": 0.08463149439137045, "grad_norm": 0.265625, "learning_rate": 0.00016924842226047048, "loss": 1.0865, "step": 1475 }, { "epoch": 0.08491838081303611, "grad_norm": 0.24609375, "learning_rate": 0.00016982214572576018, "loss": 1.0016, "step": 1480 }, { "epoch": 0.08520526723470179, "grad_norm": 0.23046875, "learning_rate": 0.00017039586919104992, "loss": 0.9242, "step": 1485 }, { "epoch": 0.08549215365636745, "grad_norm": 0.216796875, "learning_rate": 0.00017096959265633965, "loss": 0.9615, "step": 1490 }, { "epoch": 0.08577904007803311, "grad_norm": 0.236328125, "learning_rate": 0.00017154331612162938, "loss": 0.9043, "step": 1495 }, { "epoch": 0.08606592649969877, "grad_norm": 0.2314453125, "learning_rate": 0.00017211703958691912, "loss": 1.0101, "step": 1500 }, { "epoch": 0.08635281292136443, "grad_norm": 0.2333984375, "learning_rate": 0.00017269076305220885, "loss": 1.0161, "step": 1505 }, { "epoch": 0.08663969934303009, "grad_norm": 0.25390625, "learning_rate": 0.00017326448651749858, "loss": 1.0498, "step": 1510 }, { "epoch": 0.08692658576469575, "grad_norm": 0.240234375, "learning_rate": 0.0001738382099827883, "loss": 0.989, "step": 1515 }, { "epoch": 0.08721347218636141, "grad_norm": 0.251953125, "learning_rate": 0.00017441193344807802, "loss": 1.001, "step": 1520 }, { "epoch": 0.08750035860802709, "grad_norm": 0.25390625, "learning_rate": 0.00017498565691336776, "loss": 1.0149, "step": 1525 }, { "epoch": 0.08778724502969275, "grad_norm": 0.2333984375, "learning_rate": 0.0001755593803786575, "loss": 0.9524, "step": 1530 }, { "epoch": 0.08807413145135841, "grad_norm": 0.2490234375, "learning_rate": 0.00017613310384394722, "loss": 0.9016, "step": 1535 }, { "epoch": 0.08836101787302407, "grad_norm": 0.2412109375, "learning_rate": 0.00017670682730923696, "loss": 0.9577, "step": 1540 }, { "epoch": 0.08864790429468973, "grad_norm": 0.234375, "learning_rate": 0.00017728055077452666, "loss": 1.0364, "step": 1545 }, { "epoch": 0.08893479071635539, "grad_norm": 0.25, "learning_rate": 0.00017785427423981642, "loss": 1.0005, "step": 1550 }, { "epoch": 0.08922167713802105, "grad_norm": 0.2578125, "learning_rate": 0.00017842799770510613, "loss": 0.9478, "step": 1555 }, { "epoch": 0.08950856355968673, "grad_norm": 0.291015625, "learning_rate": 0.0001790017211703959, "loss": 0.9278, "step": 1560 }, { "epoch": 0.08979544998135239, "grad_norm": 0.2412109375, "learning_rate": 0.0001795754446356856, "loss": 0.9875, "step": 1565 }, { "epoch": 0.09008233640301805, "grad_norm": 0.2451171875, "learning_rate": 0.00018014916810097533, "loss": 0.964, "step": 1570 }, { "epoch": 0.09036922282468371, "grad_norm": 0.2421875, "learning_rate": 0.00018072289156626507, "loss": 0.9111, "step": 1575 }, { "epoch": 0.09065610924634937, "grad_norm": 0.244140625, "learning_rate": 0.0001812966150315548, "loss": 0.9186, "step": 1580 }, { "epoch": 0.09094299566801503, "grad_norm": 0.2294921875, "learning_rate": 0.00018187033849684453, "loss": 0.949, "step": 1585 }, { "epoch": 0.09122988208968069, "grad_norm": 0.259765625, "learning_rate": 0.00018244406196213427, "loss": 0.963, "step": 1590 }, { "epoch": 0.09151676851134635, "grad_norm": 0.244140625, "learning_rate": 0.00018301778542742397, "loss": 0.9592, "step": 1595 }, { "epoch": 0.09180365493301203, "grad_norm": 0.2451171875, "learning_rate": 0.00018359150889271373, "loss": 1.0226, "step": 1600 }, { "epoch": 0.09209054135467769, "grad_norm": 0.240234375, "learning_rate": 0.00018416523235800344, "loss": 0.9923, "step": 1605 }, { "epoch": 0.09237742777634335, "grad_norm": 0.251953125, "learning_rate": 0.0001847389558232932, "loss": 1.0008, "step": 1610 }, { "epoch": 0.09266431419800901, "grad_norm": 0.25390625, "learning_rate": 0.0001853126792885829, "loss": 1.0218, "step": 1615 }, { "epoch": 0.09295120061967467, "grad_norm": 0.255859375, "learning_rate": 0.00018588640275387264, "loss": 0.9643, "step": 1620 }, { "epoch": 0.09323808704134033, "grad_norm": 0.2451171875, "learning_rate": 0.00018646012621916237, "loss": 1.0033, "step": 1625 }, { "epoch": 0.09352497346300599, "grad_norm": 0.2255859375, "learning_rate": 0.0001870338496844521, "loss": 1.0036, "step": 1630 }, { "epoch": 0.09381185988467165, "grad_norm": 0.224609375, "learning_rate": 0.00018760757314974184, "loss": 0.93, "step": 1635 }, { "epoch": 0.09409874630633733, "grad_norm": 0.248046875, "learning_rate": 0.00018818129661503157, "loss": 0.9535, "step": 1640 }, { "epoch": 0.09438563272800299, "grad_norm": 0.263671875, "learning_rate": 0.00018875502008032128, "loss": 1.0239, "step": 1645 }, { "epoch": 0.09467251914966865, "grad_norm": 0.234375, "learning_rate": 0.00018932874354561104, "loss": 0.9047, "step": 1650 }, { "epoch": 0.09495940557133431, "grad_norm": 0.2333984375, "learning_rate": 0.00018990246701090075, "loss": 1.0041, "step": 1655 }, { "epoch": 0.09524629199299997, "grad_norm": 0.2470703125, "learning_rate": 0.00019047619047619048, "loss": 0.9327, "step": 1660 }, { "epoch": 0.09553317841466563, "grad_norm": 0.23828125, "learning_rate": 0.00019104991394148021, "loss": 0.9596, "step": 1665 }, { "epoch": 0.09582006483633129, "grad_norm": 0.2421875, "learning_rate": 0.00019162363740676995, "loss": 0.9712, "step": 1670 }, { "epoch": 0.09610695125799695, "grad_norm": 0.25, "learning_rate": 0.00019219736087205968, "loss": 0.9606, "step": 1675 }, { "epoch": 0.09639383767966263, "grad_norm": 0.25390625, "learning_rate": 0.00019277108433734942, "loss": 1.0021, "step": 1680 }, { "epoch": 0.09668072410132829, "grad_norm": 0.244140625, "learning_rate": 0.00019334480780263912, "loss": 0.9667, "step": 1685 }, { "epoch": 0.09696761052299395, "grad_norm": 0.2470703125, "learning_rate": 0.00019391853126792888, "loss": 0.9807, "step": 1690 }, { "epoch": 0.09725449694465961, "grad_norm": 0.2373046875, "learning_rate": 0.0001944922547332186, "loss": 1.0133, "step": 1695 }, { "epoch": 0.09754138336632527, "grad_norm": 0.240234375, "learning_rate": 0.00019506597819850835, "loss": 0.9765, "step": 1700 }, { "epoch": 0.09782826978799093, "grad_norm": 0.2421875, "learning_rate": 0.00019563970166379806, "loss": 0.9554, "step": 1705 }, { "epoch": 0.09811515620965659, "grad_norm": 0.25, "learning_rate": 0.0001962134251290878, "loss": 0.9526, "step": 1710 }, { "epoch": 0.09840204263132225, "grad_norm": 0.2578125, "learning_rate": 0.00019678714859437752, "loss": 0.9998, "step": 1715 }, { "epoch": 0.09868892905298793, "grad_norm": 0.255859375, "learning_rate": 0.00019736087205966726, "loss": 1.035, "step": 1720 }, { "epoch": 0.09897581547465359, "grad_norm": 0.265625, "learning_rate": 0.000197934595524957, "loss": 1.0188, "step": 1725 }, { "epoch": 0.09926270189631925, "grad_norm": 0.265625, "learning_rate": 0.00019850831899024672, "loss": 0.9442, "step": 1730 }, { "epoch": 0.09954958831798491, "grad_norm": 0.2470703125, "learning_rate": 0.00019908204245553643, "loss": 0.9696, "step": 1735 }, { "epoch": 0.09983647473965057, "grad_norm": 0.2431640625, "learning_rate": 0.00019965576592082616, "loss": 0.9646, "step": 1740 }, { "epoch": 0.10012336116131623, "grad_norm": 0.251953125, "learning_rate": 0.00019999999197655853, "loss": 0.9887, "step": 1745 }, { "epoch": 0.10041024758298189, "grad_norm": 0.251953125, "learning_rate": 0.0001999999017128567, "loss": 0.9727, "step": 1750 }, { "epoch": 0.10069713400464755, "grad_norm": 0.248046875, "learning_rate": 0.00019999971115624204, "loss": 0.9446, "step": 1755 }, { "epoch": 0.10098402042631323, "grad_norm": 1.7578125, "learning_rate": 0.00019999942030690567, "loss": 1.0255, "step": 1760 }, { "epoch": 0.10127090684797889, "grad_norm": 0.291015625, "learning_rate": 0.0001999990291651393, "loss": 0.9991, "step": 1765 }, { "epoch": 0.10155779326964455, "grad_norm": 0.2578125, "learning_rate": 0.00019999853773133513, "loss": 0.9955, "step": 1770 }, { "epoch": 0.10184467969131021, "grad_norm": 0.85546875, "learning_rate": 0.00019999794600598616, "loss": 1.0538, "step": 1775 }, { "epoch": 0.10213156611297587, "grad_norm": 0.3984375, "learning_rate": 0.00019999725398968577, "loss": 0.9374, "step": 1780 }, { "epoch": 0.10241845253464153, "grad_norm": 0.322265625, "learning_rate": 0.000199996461683128, "loss": 0.9778, "step": 1785 }, { "epoch": 0.1027053389563072, "grad_norm": 1.46875, "learning_rate": 0.00019999556908710753, "loss": 0.9664, "step": 1790 }, { "epoch": 0.10299222537797285, "grad_norm": 0.44921875, "learning_rate": 0.00019999457620251953, "loss": 0.8715, "step": 1795 }, { "epoch": 0.10327911179963853, "grad_norm": 0.26171875, "learning_rate": 0.0001999934830303598, "loss": 0.9342, "step": 1800 }, { "epoch": 0.10356599822130419, "grad_norm": 9.625, "learning_rate": 0.00019999228957172477, "loss": 1.0524, "step": 1805 }, { "epoch": 0.10385288464296985, "grad_norm": 0.7890625, "learning_rate": 0.0001999909958278113, "loss": 0.9956, "step": 1810 }, { "epoch": 0.10413977106463551, "grad_norm": 1.046875, "learning_rate": 0.000199989601799917, "loss": 0.9373, "step": 1815 }, { "epoch": 0.10442665748630117, "grad_norm": 0.48828125, "learning_rate": 0.00019998810748943994, "loss": 1.0376, "step": 1820 }, { "epoch": 0.10471354390796683, "grad_norm": 0.337890625, "learning_rate": 0.00019998651289787885, "loss": 0.9916, "step": 1825 }, { "epoch": 0.1050004303296325, "grad_norm": 0.2734375, "learning_rate": 0.00019998481802683293, "loss": 1.001, "step": 1830 }, { "epoch": 0.10528731675129815, "grad_norm": 0.6484375, "learning_rate": 0.00019998302287800208, "loss": 1.0004, "step": 1835 }, { "epoch": 0.10557420317296383, "grad_norm": 0.5703125, "learning_rate": 0.00019998112745318669, "loss": 1.0004, "step": 1840 }, { "epoch": 0.10586108959462949, "grad_norm": 0.294921875, "learning_rate": 0.00019997913175428771, "loss": 0.9654, "step": 1845 }, { "epoch": 0.10614797601629515, "grad_norm": 0.306640625, "learning_rate": 0.00019997703578330674, "loss": 1.0472, "step": 1850 }, { "epoch": 0.10643486243796081, "grad_norm": 0.2490234375, "learning_rate": 0.00019997483954234583, "loss": 0.9833, "step": 1855 }, { "epoch": 0.10672174885962647, "grad_norm": 0.31640625, "learning_rate": 0.00019997254303360772, "loss": 0.9066, "step": 1860 }, { "epoch": 0.10700863528129213, "grad_norm": 1.75, "learning_rate": 0.0001999701462593956, "loss": 0.9881, "step": 1865 }, { "epoch": 0.1072955217029578, "grad_norm": 0.34375, "learning_rate": 0.00019996764922211328, "loss": 0.9131, "step": 1870 }, { "epoch": 0.10758240812462345, "grad_norm": 0.28515625, "learning_rate": 0.0001999650519242651, "loss": 0.9866, "step": 1875 }, { "epoch": 0.10786929454628913, "grad_norm": 0.26953125, "learning_rate": 0.00019996235436845603, "loss": 0.9792, "step": 1880 }, { "epoch": 0.10815618096795479, "grad_norm": 0.275390625, "learning_rate": 0.00019995955655739147, "loss": 0.9163, "step": 1885 }, { "epoch": 0.10844306738962045, "grad_norm": 0.306640625, "learning_rate": 0.00019995665849387742, "loss": 0.9521, "step": 1890 }, { "epoch": 0.10872995381128611, "grad_norm": 0.236328125, "learning_rate": 0.0001999536601808205, "loss": 0.9537, "step": 1895 }, { "epoch": 0.10901684023295177, "grad_norm": 0.2392578125, "learning_rate": 0.00019995056162122775, "loss": 1.0421, "step": 1900 }, { "epoch": 0.10930372665461743, "grad_norm": 0.259765625, "learning_rate": 0.0001999473628182068, "loss": 1.0089, "step": 1905 }, { "epoch": 0.1095906130762831, "grad_norm": 0.255859375, "learning_rate": 0.00019994406377496588, "loss": 0.916, "step": 1910 }, { "epoch": 0.10987749949794876, "grad_norm": 0.279296875, "learning_rate": 0.00019994066449481368, "loss": 0.9396, "step": 1915 }, { "epoch": 0.11016438591961443, "grad_norm": 0.263671875, "learning_rate": 0.0001999371649811594, "loss": 0.8991, "step": 1920 }, { "epoch": 0.11045127234128009, "grad_norm": 0.26171875, "learning_rate": 0.00019993356523751287, "loss": 1.0073, "step": 1925 }, { "epoch": 0.11073815876294575, "grad_norm": 0.3125, "learning_rate": 0.00019992986526748432, "loss": 0.9074, "step": 1930 }, { "epoch": 0.11102504518461141, "grad_norm": 0.2578125, "learning_rate": 0.00019992606507478463, "loss": 1.0015, "step": 1935 }, { "epoch": 0.11131193160627707, "grad_norm": 0.2734375, "learning_rate": 0.0001999221646632251, "loss": 0.928, "step": 1940 }, { "epoch": 0.11159881802794273, "grad_norm": 0.279296875, "learning_rate": 0.0001999181640367175, "loss": 1.0132, "step": 1945 }, { "epoch": 0.1118857044496084, "grad_norm": 0.25, "learning_rate": 0.00019991406319927428, "loss": 0.9459, "step": 1950 }, { "epoch": 0.11217259087127407, "grad_norm": 0.26953125, "learning_rate": 0.00019990986215500826, "loss": 0.9181, "step": 1955 }, { "epoch": 0.11245947729293973, "grad_norm": 0.259765625, "learning_rate": 0.00019990556090813277, "loss": 0.9807, "step": 1960 }, { "epoch": 0.11274636371460539, "grad_norm": 0.275390625, "learning_rate": 0.00019990115946296168, "loss": 0.9801, "step": 1965 }, { "epoch": 0.11303325013627105, "grad_norm": 0.2578125, "learning_rate": 0.00019989665782390933, "loss": 0.9654, "step": 1970 }, { "epoch": 0.11332013655793671, "grad_norm": 0.2490234375, "learning_rate": 0.00019989205599549057, "loss": 0.9789, "step": 1975 }, { "epoch": 0.11360702297960237, "grad_norm": 0.2578125, "learning_rate": 0.00019988735398232066, "loss": 0.9993, "step": 1980 }, { "epoch": 0.11389390940126803, "grad_norm": 0.248046875, "learning_rate": 0.00019988255178911543, "loss": 0.9957, "step": 1985 }, { "epoch": 0.1141807958229337, "grad_norm": 0.25390625, "learning_rate": 0.00019987764942069113, "loss": 0.962, "step": 1990 }, { "epoch": 0.11446768224459937, "grad_norm": 0.32421875, "learning_rate": 0.00019987264688196453, "loss": 1.0021, "step": 1995 }, { "epoch": 0.11475456866626503, "grad_norm": 0.25, "learning_rate": 0.00019986754417795278, "loss": 0.9128, "step": 2000 }, { "epoch": 0.11504145508793069, "grad_norm": 0.2734375, "learning_rate": 0.00019986234131377353, "loss": 0.9282, "step": 2005 }, { "epoch": 0.11532834150959635, "grad_norm": 0.263671875, "learning_rate": 0.00019985703829464492, "loss": 1.0265, "step": 2010 }, { "epoch": 0.11561522793126201, "grad_norm": 0.267578125, "learning_rate": 0.0001998516351258855, "loss": 1.0036, "step": 2015 }, { "epoch": 0.11590211435292767, "grad_norm": 0.2392578125, "learning_rate": 0.00019984613181291426, "loss": 0.931, "step": 2020 }, { "epoch": 0.11618900077459333, "grad_norm": 0.267578125, "learning_rate": 0.00019984052836125065, "loss": 0.9383, "step": 2025 }, { "epoch": 0.116475887196259, "grad_norm": 0.2431640625, "learning_rate": 0.00019983482477651455, "loss": 0.8962, "step": 2030 }, { "epoch": 0.11676277361792467, "grad_norm": 0.26953125, "learning_rate": 0.00019982902106442622, "loss": 0.9796, "step": 2035 }, { "epoch": 0.11704966003959033, "grad_norm": 0.263671875, "learning_rate": 0.00019982311723080643, "loss": 0.9679, "step": 2040 }, { "epoch": 0.11733654646125599, "grad_norm": 0.24609375, "learning_rate": 0.00019981711328157626, "loss": 0.8819, "step": 2045 }, { "epoch": 0.11762343288292165, "grad_norm": 0.26171875, "learning_rate": 0.00019981100922275728, "loss": 0.9892, "step": 2050 }, { "epoch": 0.11791031930458731, "grad_norm": 0.25, "learning_rate": 0.00019980480506047143, "loss": 1.053, "step": 2055 }, { "epoch": 0.11819720572625297, "grad_norm": 0.2470703125, "learning_rate": 0.00019979850080094108, "loss": 0.9714, "step": 2060 }, { "epoch": 0.11848409214791863, "grad_norm": 0.26171875, "learning_rate": 0.00019979209645048889, "loss": 0.9407, "step": 2065 }, { "epoch": 0.1187709785695843, "grad_norm": 0.2578125, "learning_rate": 0.00019978559201553803, "loss": 0.9335, "step": 2070 }, { "epoch": 0.11905786499124997, "grad_norm": 0.263671875, "learning_rate": 0.000199778987502612, "loss": 0.9652, "step": 2075 }, { "epoch": 0.11934475141291563, "grad_norm": 0.2734375, "learning_rate": 0.00019977228291833462, "loss": 1.0275, "step": 2080 }, { "epoch": 0.11963163783458129, "grad_norm": 0.251953125, "learning_rate": 0.00019976547826943019, "loss": 0.9112, "step": 2085 }, { "epoch": 0.11991852425624695, "grad_norm": 0.27734375, "learning_rate": 0.00019975857356272322, "loss": 0.9186, "step": 2090 }, { "epoch": 0.12020541067791261, "grad_norm": 0.2578125, "learning_rate": 0.00019975156880513867, "loss": 0.916, "step": 2095 }, { "epoch": 0.12049229709957827, "grad_norm": 0.26171875, "learning_rate": 0.00019974446400370185, "loss": 0.9274, "step": 2100 }, { "epoch": 0.12077918352124394, "grad_norm": 0.26171875, "learning_rate": 0.00019973725916553838, "loss": 0.9107, "step": 2105 }, { "epoch": 0.1210660699429096, "grad_norm": 0.328125, "learning_rate": 0.00019972995429787415, "loss": 0.9817, "step": 2110 }, { "epoch": 0.12135295636457527, "grad_norm": 0.259765625, "learning_rate": 0.00019972254940803553, "loss": 0.9286, "step": 2115 }, { "epoch": 0.12163984278624093, "grad_norm": 0.267578125, "learning_rate": 0.00019971504450344902, "loss": 1.0019, "step": 2120 }, { "epoch": 0.12192672920790659, "grad_norm": 0.2470703125, "learning_rate": 0.00019970743959164155, "loss": 0.9622, "step": 2125 }, { "epoch": 0.12221361562957225, "grad_norm": 0.271484375, "learning_rate": 0.00019969973468024032, "loss": 0.9713, "step": 2130 }, { "epoch": 0.12250050205123791, "grad_norm": 0.27734375, "learning_rate": 0.0001996919297769728, "loss": 0.9778, "step": 2135 }, { "epoch": 0.12278738847290357, "grad_norm": 0.25390625, "learning_rate": 0.00019968402488966677, "loss": 0.9051, "step": 2140 }, { "epoch": 0.12307427489456924, "grad_norm": 0.24609375, "learning_rate": 0.00019967602002625027, "loss": 0.914, "step": 2145 }, { "epoch": 0.1233611613162349, "grad_norm": 0.271484375, "learning_rate": 0.00019966791519475169, "loss": 1.0036, "step": 2150 }, { "epoch": 0.12364804773790057, "grad_norm": 0.26171875, "learning_rate": 0.00019965971040329948, "loss": 0.9597, "step": 2155 }, { "epoch": 0.12393493415956623, "grad_norm": 0.24609375, "learning_rate": 0.00019965140566012253, "loss": 0.9044, "step": 2160 }, { "epoch": 0.12422182058123189, "grad_norm": 0.291015625, "learning_rate": 0.00019964300097355, "loss": 0.961, "step": 2165 }, { "epoch": 0.12450870700289755, "grad_norm": 0.265625, "learning_rate": 0.0001996344963520111, "loss": 0.9994, "step": 2170 }, { "epoch": 0.12479559342456321, "grad_norm": 0.244140625, "learning_rate": 0.00019962589180403542, "loss": 0.9996, "step": 2175 }, { "epoch": 0.1250824798462289, "grad_norm": 0.27734375, "learning_rate": 0.00019961718733825268, "loss": 0.9635, "step": 2180 }, { "epoch": 0.12536936626789455, "grad_norm": 0.248046875, "learning_rate": 0.00019960838296339287, "loss": 0.9133, "step": 2185 }, { "epoch": 0.1256562526895602, "grad_norm": 0.263671875, "learning_rate": 0.00019959947868828618, "loss": 1.0063, "step": 2190 }, { "epoch": 0.12594313911122587, "grad_norm": 0.25390625, "learning_rate": 0.00019959047452186294, "loss": 1.0152, "step": 2195 }, { "epoch": 0.12623002553289153, "grad_norm": 0.2490234375, "learning_rate": 0.00019958137047315375, "loss": 0.9364, "step": 2200 }, { "epoch": 0.1265169119545572, "grad_norm": 0.255859375, "learning_rate": 0.0001995721665512893, "loss": 0.9866, "step": 2205 }, { "epoch": 0.12680379837622285, "grad_norm": 0.2470703125, "learning_rate": 0.00019956286276550048, "loss": 0.9507, "step": 2210 }, { "epoch": 0.12709068479788851, "grad_norm": 0.2734375, "learning_rate": 0.00019955345912511837, "loss": 0.9791, "step": 2215 }, { "epoch": 0.12737757121955418, "grad_norm": 0.2431640625, "learning_rate": 0.0001995439556395741, "loss": 0.8985, "step": 2220 }, { "epoch": 0.12766445764121984, "grad_norm": 0.2431640625, "learning_rate": 0.00019953435231839906, "loss": 1.0073, "step": 2225 }, { "epoch": 0.1279513440628855, "grad_norm": 0.2392578125, "learning_rate": 0.0001995246491712247, "loss": 0.9045, "step": 2230 }, { "epoch": 0.12823823048455116, "grad_norm": 0.251953125, "learning_rate": 0.00019951484620778258, "loss": 0.9958, "step": 2235 }, { "epoch": 0.12852511690621682, "grad_norm": 0.24609375, "learning_rate": 0.00019950494343790438, "loss": 0.956, "step": 2240 }, { "epoch": 0.12881200332788248, "grad_norm": 0.27734375, "learning_rate": 0.00019949494087152193, "loss": 0.9022, "step": 2245 }, { "epoch": 0.12909888974954814, "grad_norm": 0.244140625, "learning_rate": 0.00019948483851866703, "loss": 1.0313, "step": 2250 }, { "epoch": 0.12938577617121383, "grad_norm": 0.248046875, "learning_rate": 0.0001994746363894717, "loss": 0.9478, "step": 2255 }, { "epoch": 0.1296726625928795, "grad_norm": 0.25, "learning_rate": 0.000199464334494168, "loss": 0.8836, "step": 2260 }, { "epoch": 0.12995954901454515, "grad_norm": 0.248046875, "learning_rate": 0.00019945393284308792, "loss": 1.0769, "step": 2265 }, { "epoch": 0.1302464354362108, "grad_norm": 0.259765625, "learning_rate": 0.0001994434314466636, "loss": 0.9285, "step": 2270 }, { "epoch": 0.13053332185787647, "grad_norm": 0.25390625, "learning_rate": 0.00019943283031542726, "loss": 0.938, "step": 2275 }, { "epoch": 0.13082020827954213, "grad_norm": 0.27734375, "learning_rate": 0.00019942212946001103, "loss": 1.035, "step": 2280 }, { "epoch": 0.1311070947012078, "grad_norm": 0.251953125, "learning_rate": 0.00019941132889114718, "loss": 0.9627, "step": 2285 }, { "epoch": 0.13139398112287345, "grad_norm": 0.255859375, "learning_rate": 0.0001994004286196679, "loss": 0.9754, "step": 2290 }, { "epoch": 0.13168086754453912, "grad_norm": 0.2421875, "learning_rate": 0.0001993894286565054, "loss": 0.889, "step": 2295 }, { "epoch": 0.13196775396620478, "grad_norm": 0.26953125, "learning_rate": 0.00019937832901269187, "loss": 1.0065, "step": 2300 }, { "epoch": 0.13225464038787044, "grad_norm": 0.26953125, "learning_rate": 0.00019936712969935948, "loss": 0.9882, "step": 2305 }, { "epoch": 0.1325415268095361, "grad_norm": 0.265625, "learning_rate": 0.00019935583072774039, "loss": 1.0268, "step": 2310 }, { "epoch": 0.13282841323120176, "grad_norm": 0.26171875, "learning_rate": 0.0001993444321091666, "loss": 1.0224, "step": 2315 }, { "epoch": 0.13311529965286742, "grad_norm": 0.248046875, "learning_rate": 0.0001993329338550702, "loss": 0.9343, "step": 2320 }, { "epoch": 0.13340218607453308, "grad_norm": 0.275390625, "learning_rate": 0.00019932133597698313, "loss": 0.9403, "step": 2325 }, { "epoch": 0.13368907249619877, "grad_norm": 0.2412109375, "learning_rate": 0.0001993096384865372, "loss": 0.9411, "step": 2330 }, { "epoch": 0.13397595891786443, "grad_norm": 0.2578125, "learning_rate": 0.00019929784139546426, "loss": 0.9706, "step": 2335 }, { "epoch": 0.1342628453395301, "grad_norm": 0.28125, "learning_rate": 0.00019928594471559587, "loss": 1.043, "step": 2340 }, { "epoch": 0.13454973176119575, "grad_norm": 0.283203125, "learning_rate": 0.00019927394845886361, "loss": 1.0125, "step": 2345 }, { "epoch": 0.1348366181828614, "grad_norm": 0.259765625, "learning_rate": 0.00019926185263729896, "loss": 0.9614, "step": 2350 }, { "epoch": 0.13512350460452707, "grad_norm": 0.2490234375, "learning_rate": 0.00019924965726303304, "loss": 0.9054, "step": 2355 }, { "epoch": 0.13541039102619273, "grad_norm": 0.259765625, "learning_rate": 0.0001992373623482971, "loss": 0.9111, "step": 2360 }, { "epoch": 0.1356972774478584, "grad_norm": 0.244140625, "learning_rate": 0.00019922496790542195, "loss": 0.9587, "step": 2365 }, { "epoch": 0.13598416386952406, "grad_norm": 0.27734375, "learning_rate": 0.00019921247394683844, "loss": 0.946, "step": 2370 }, { "epoch": 0.13627105029118972, "grad_norm": 0.2578125, "learning_rate": 0.00019919988048507713, "loss": 1.0321, "step": 2375 }, { "epoch": 0.13655793671285538, "grad_norm": 0.294921875, "learning_rate": 0.00019918718753276834, "loss": 1.0035, "step": 2380 }, { "epoch": 0.13684482313452104, "grad_norm": 0.255859375, "learning_rate": 0.00019917439510264227, "loss": 0.9164, "step": 2385 }, { "epoch": 0.1371317095561867, "grad_norm": 0.271484375, "learning_rate": 0.0001991615032075288, "loss": 0.988, "step": 2390 }, { "epoch": 0.13741859597785236, "grad_norm": 0.25390625, "learning_rate": 0.00019914851186035755, "loss": 0.9927, "step": 2395 }, { "epoch": 0.13770548239951802, "grad_norm": 0.271484375, "learning_rate": 0.00019913542107415801, "loss": 0.9288, "step": 2400 }, { "epoch": 0.13799236882118368, "grad_norm": 0.265625, "learning_rate": 0.00019912223086205932, "loss": 0.9556, "step": 2405 }, { "epoch": 0.13827925524284937, "grad_norm": 0.26171875, "learning_rate": 0.00019910894123729032, "loss": 0.9792, "step": 2410 }, { "epoch": 0.13856614166451503, "grad_norm": 0.259765625, "learning_rate": 0.00019909555221317955, "loss": 1.024, "step": 2415 }, { "epoch": 0.1388530280861807, "grad_norm": 0.240234375, "learning_rate": 0.00019908206380315528, "loss": 0.9548, "step": 2420 }, { "epoch": 0.13913991450784635, "grad_norm": 0.2431640625, "learning_rate": 0.00019906847602074547, "loss": 0.8983, "step": 2425 }, { "epoch": 0.139426800929512, "grad_norm": 0.267578125, "learning_rate": 0.00019905478887957773, "loss": 0.9681, "step": 2430 }, { "epoch": 0.13971368735117767, "grad_norm": 0.2373046875, "learning_rate": 0.00019904100239337923, "loss": 0.9776, "step": 2435 }, { "epoch": 0.14000057377284333, "grad_norm": 0.255859375, "learning_rate": 0.00019902711657597693, "loss": 1.0531, "step": 2440 }, { "epoch": 0.140287460194509, "grad_norm": 0.25390625, "learning_rate": 0.00019901313144129727, "loss": 0.9636, "step": 2445 }, { "epoch": 0.14057434661617466, "grad_norm": 0.251953125, "learning_rate": 0.00019899904700336642, "loss": 1.0377, "step": 2450 }, { "epoch": 0.14086123303784032, "grad_norm": 0.25, "learning_rate": 0.00019898486327631006, "loss": 1.0089, "step": 2455 }, { "epoch": 0.14114811945950598, "grad_norm": 0.26953125, "learning_rate": 0.00019897058027435347, "loss": 1.0078, "step": 2460 }, { "epoch": 0.14143500588117164, "grad_norm": 0.251953125, "learning_rate": 0.00019895619801182148, "loss": 1.0086, "step": 2465 }, { "epoch": 0.1417218923028373, "grad_norm": 0.275390625, "learning_rate": 0.00019894171650313856, "loss": 0.9727, "step": 2470 }, { "epoch": 0.14200877872450296, "grad_norm": 0.2734375, "learning_rate": 0.00019892713576282863, "loss": 1.0225, "step": 2475 }, { "epoch": 0.14229566514616862, "grad_norm": 0.251953125, "learning_rate": 0.00019891245580551509, "loss": 0.9702, "step": 2480 }, { "epoch": 0.14258255156783428, "grad_norm": 0.248046875, "learning_rate": 0.000198897676645921, "loss": 0.9831, "step": 2485 }, { "epoch": 0.14286943798949997, "grad_norm": 0.251953125, "learning_rate": 0.00019888279829886877, "loss": 1.0063, "step": 2490 }, { "epoch": 0.14315632441116563, "grad_norm": 0.259765625, "learning_rate": 0.00019886782077928038, "loss": 0.9133, "step": 2495 }, { "epoch": 0.1434432108328313, "grad_norm": 0.2578125, "learning_rate": 0.0001988527441021772, "loss": 1.0048, "step": 2500 }, { "epoch": 0.14373009725449695, "grad_norm": 0.259765625, "learning_rate": 0.0001988375682826801, "loss": 0.926, "step": 2505 }, { "epoch": 0.1440169836761626, "grad_norm": 0.267578125, "learning_rate": 0.00019882229333600932, "loss": 0.9906, "step": 2510 }, { "epoch": 0.14430387009782827, "grad_norm": 0.259765625, "learning_rate": 0.00019880691927748467, "loss": 1.0493, "step": 2515 }, { "epoch": 0.14459075651949393, "grad_norm": 0.296875, "learning_rate": 0.00019879144612252515, "loss": 0.9432, "step": 2520 }, { "epoch": 0.1448776429411596, "grad_norm": 0.25, "learning_rate": 0.00019877587388664934, "loss": 0.9294, "step": 2525 }, { "epoch": 0.14516452936282526, "grad_norm": 0.28125, "learning_rate": 0.000198760202585475, "loss": 0.9654, "step": 2530 }, { "epoch": 0.14545141578449092, "grad_norm": 0.263671875, "learning_rate": 0.00019874443223471945, "loss": 0.9756, "step": 2535 }, { "epoch": 0.14573830220615658, "grad_norm": 0.255859375, "learning_rate": 0.0001987285628501992, "loss": 1.0042, "step": 2540 }, { "epoch": 0.14602518862782224, "grad_norm": 0.283203125, "learning_rate": 0.00019871259444783016, "loss": 0.9868, "step": 2545 }, { "epoch": 0.1463120750494879, "grad_norm": 0.25390625, "learning_rate": 0.0001986965270436275, "loss": 0.9804, "step": 2550 }, { "epoch": 0.14659896147115356, "grad_norm": 0.25390625, "learning_rate": 0.00019868036065370567, "loss": 0.9763, "step": 2555 }, { "epoch": 0.14688584789281922, "grad_norm": 0.251953125, "learning_rate": 0.00019866409529427852, "loss": 0.9969, "step": 2560 }, { "epoch": 0.14717273431448488, "grad_norm": 0.275390625, "learning_rate": 0.00019864773098165898, "loss": 1.0363, "step": 2565 }, { "epoch": 0.14745962073615057, "grad_norm": 0.251953125, "learning_rate": 0.00019863126773225935, "loss": 0.9608, "step": 2570 }, { "epoch": 0.14774650715781623, "grad_norm": 0.27734375, "learning_rate": 0.00019861470556259113, "loss": 0.9833, "step": 2575 }, { "epoch": 0.1480333935794819, "grad_norm": 0.259765625, "learning_rate": 0.00019859804448926503, "loss": 1.0343, "step": 2580 }, { "epoch": 0.14832028000114755, "grad_norm": 0.2734375, "learning_rate": 0.0001985812845289909, "loss": 1.0014, "step": 2585 }, { "epoch": 0.1486071664228132, "grad_norm": 0.259765625, "learning_rate": 0.0001985644256985778, "loss": 0.9407, "step": 2590 }, { "epoch": 0.14889405284447887, "grad_norm": 0.26171875, "learning_rate": 0.000198547468014934, "loss": 0.9214, "step": 2595 }, { "epoch": 0.14918093926614454, "grad_norm": 0.265625, "learning_rate": 0.00019853041149506687, "loss": 0.9564, "step": 2600 }, { "epoch": 0.1494678256878102, "grad_norm": 0.267578125, "learning_rate": 0.0001985132561560829, "loss": 1.0476, "step": 2605 }, { "epoch": 0.14975471210947586, "grad_norm": 0.267578125, "learning_rate": 0.00019849600201518768, "loss": 1.0092, "step": 2610 }, { "epoch": 0.15004159853114152, "grad_norm": 0.28125, "learning_rate": 0.00019847864908968592, "loss": 1.0505, "step": 2615 }, { "epoch": 0.15032848495280718, "grad_norm": 0.271484375, "learning_rate": 0.0001984611973969814, "loss": 1.005, "step": 2620 }, { "epoch": 0.15061537137447284, "grad_norm": 0.263671875, "learning_rate": 0.0001984436469545769, "loss": 0.9405, "step": 2625 }, { "epoch": 0.1509022577961385, "grad_norm": 0.2431640625, "learning_rate": 0.00019842599778007437, "loss": 0.9519, "step": 2630 }, { "epoch": 0.15118914421780416, "grad_norm": 0.2431640625, "learning_rate": 0.00019840824989117464, "loss": 0.9599, "step": 2635 }, { "epoch": 0.15147603063946982, "grad_norm": 0.306640625, "learning_rate": 0.0001983904033056776, "loss": 1.0082, "step": 2640 }, { "epoch": 0.15176291706113548, "grad_norm": 0.265625, "learning_rate": 0.00019837245804148218, "loss": 0.9187, "step": 2645 }, { "epoch": 0.15204980348280117, "grad_norm": 0.25, "learning_rate": 0.00019835441411658616, "loss": 1.0235, "step": 2650 }, { "epoch": 0.15233668990446683, "grad_norm": 0.2890625, "learning_rate": 0.00019833627154908635, "loss": 1.0333, "step": 2655 }, { "epoch": 0.1526235763261325, "grad_norm": 0.24609375, "learning_rate": 0.00019831803035717853, "loss": 0.993, "step": 2660 }, { "epoch": 0.15291046274779815, "grad_norm": 0.251953125, "learning_rate": 0.00019829969055915728, "loss": 0.9833, "step": 2665 }, { "epoch": 0.15319734916946381, "grad_norm": 0.267578125, "learning_rate": 0.00019828125217341618, "loss": 0.9627, "step": 2670 }, { "epoch": 0.15348423559112948, "grad_norm": 0.28515625, "learning_rate": 0.0001982627152184476, "loss": 0.9026, "step": 2675 }, { "epoch": 0.15377112201279514, "grad_norm": 0.267578125, "learning_rate": 0.00019824407971284286, "loss": 0.922, "step": 2680 }, { "epoch": 0.1540580084344608, "grad_norm": 0.248046875, "learning_rate": 0.000198225345675292, "loss": 0.9968, "step": 2685 }, { "epoch": 0.15434489485612646, "grad_norm": 0.2578125, "learning_rate": 0.00019820651312458403, "loss": 0.9276, "step": 2690 }, { "epoch": 0.15463178127779212, "grad_norm": 0.255859375, "learning_rate": 0.00019818758207960663, "loss": 0.9642, "step": 2695 }, { "epoch": 0.15491866769945778, "grad_norm": 0.265625, "learning_rate": 0.00019816855255934635, "loss": 1.0146, "step": 2700 }, { "epoch": 0.15520555412112344, "grad_norm": 0.259765625, "learning_rate": 0.0001981494245828884, "loss": 1.0142, "step": 2705 }, { "epoch": 0.1554924405427891, "grad_norm": 0.275390625, "learning_rate": 0.00019813019816941689, "loss": 1.0381, "step": 2710 }, { "epoch": 0.15577932696445476, "grad_norm": 0.2470703125, "learning_rate": 0.00019811087333821453, "loss": 0.9641, "step": 2715 }, { "epoch": 0.15606621338612042, "grad_norm": 0.275390625, "learning_rate": 0.00019809145010866276, "loss": 0.9545, "step": 2720 }, { "epoch": 0.1563530998077861, "grad_norm": 0.265625, "learning_rate": 0.00019807192850024175, "loss": 0.9559, "step": 2725 }, { "epoch": 0.15663998622945177, "grad_norm": 0.2392578125, "learning_rate": 0.00019805230853253033, "loss": 0.9167, "step": 2730 }, { "epoch": 0.15692687265111743, "grad_norm": 0.26953125, "learning_rate": 0.00019803259022520587, "loss": 0.9965, "step": 2735 }, { "epoch": 0.1572137590727831, "grad_norm": 0.2421875, "learning_rate": 0.0001980127735980445, "loss": 0.9369, "step": 2740 }, { "epoch": 0.15750064549444875, "grad_norm": 0.267578125, "learning_rate": 0.00019799285867092096, "loss": 0.9674, "step": 2745 }, { "epoch": 0.15778753191611442, "grad_norm": 0.306640625, "learning_rate": 0.00019797284546380848, "loss": 0.9511, "step": 2750 }, { "epoch": 0.15807441833778008, "grad_norm": 0.2421875, "learning_rate": 0.00019795273399677893, "loss": 1.0382, "step": 2755 }, { "epoch": 0.15836130475944574, "grad_norm": 0.2578125, "learning_rate": 0.00019793252429000266, "loss": 0.9608, "step": 2760 }, { "epoch": 0.1586481911811114, "grad_norm": 0.2578125, "learning_rate": 0.00019791221636374865, "loss": 0.9373, "step": 2765 }, { "epoch": 0.15893507760277706, "grad_norm": 0.255859375, "learning_rate": 0.0001978918102383843, "loss": 0.9594, "step": 2770 }, { "epoch": 0.15922196402444272, "grad_norm": 0.275390625, "learning_rate": 0.00019787130593437553, "loss": 1.0186, "step": 2775 }, { "epoch": 0.15950885044610838, "grad_norm": 0.2578125, "learning_rate": 0.00019785070347228673, "loss": 0.9562, "step": 2780 }, { "epoch": 0.15979573686777404, "grad_norm": 0.259765625, "learning_rate": 0.00019783000287278078, "loss": 1.0695, "step": 2785 }, { "epoch": 0.1600826232894397, "grad_norm": 0.255859375, "learning_rate": 0.00019780920415661882, "loss": 1.0072, "step": 2790 }, { "epoch": 0.16036950971110536, "grad_norm": 0.2734375, "learning_rate": 0.0001977883073446606, "loss": 0.9735, "step": 2795 }, { "epoch": 0.16065639613277102, "grad_norm": 0.2490234375, "learning_rate": 0.00019776731245786414, "loss": 1.0351, "step": 2800 }, { "epoch": 0.1609432825544367, "grad_norm": 0.2490234375, "learning_rate": 0.00019774621951728583, "loss": 0.991, "step": 2805 }, { "epoch": 0.16123016897610237, "grad_norm": 0.255859375, "learning_rate": 0.00019772502854408042, "loss": 0.9546, "step": 2810 }, { "epoch": 0.16151705539776803, "grad_norm": 0.267578125, "learning_rate": 0.000197703739559501, "loss": 0.9768, "step": 2815 }, { "epoch": 0.1618039418194337, "grad_norm": 0.28515625, "learning_rate": 0.00019768235258489888, "loss": 0.9904, "step": 2820 }, { "epoch": 0.16209082824109936, "grad_norm": 0.279296875, "learning_rate": 0.00019766086764172377, "loss": 0.9743, "step": 2825 }, { "epoch": 0.16237771466276502, "grad_norm": 0.271484375, "learning_rate": 0.00019763928475152352, "loss": 1.0114, "step": 2830 }, { "epoch": 0.16266460108443068, "grad_norm": 0.2470703125, "learning_rate": 0.00019761760393594425, "loss": 0.9851, "step": 2835 }, { "epoch": 0.16295148750609634, "grad_norm": 0.279296875, "learning_rate": 0.00019759582521673035, "loss": 0.9588, "step": 2840 }, { "epoch": 0.163238373927762, "grad_norm": 0.2734375, "learning_rate": 0.00019757394861572432, "loss": 0.93, "step": 2845 }, { "epoch": 0.16352526034942766, "grad_norm": 0.2734375, "learning_rate": 0.00019755197415486685, "loss": 1.0172, "step": 2850 }, { "epoch": 0.16381214677109332, "grad_norm": 0.251953125, "learning_rate": 0.00019752990185619682, "loss": 0.9563, "step": 2855 }, { "epoch": 0.16409903319275898, "grad_norm": 0.259765625, "learning_rate": 0.00019750773174185123, "loss": 0.9662, "step": 2860 }, { "epoch": 0.16438591961442464, "grad_norm": 0.279296875, "learning_rate": 0.00019748546383406508, "loss": 0.9817, "step": 2865 }, { "epoch": 0.1646728060360903, "grad_norm": 0.26953125, "learning_rate": 0.00019746309815517153, "loss": 0.9238, "step": 2870 }, { "epoch": 0.16495969245775596, "grad_norm": 0.255859375, "learning_rate": 0.0001974406347276019, "loss": 0.9957, "step": 2875 }, { "epoch": 0.16524657887942162, "grad_norm": 0.25390625, "learning_rate": 0.0001974180735738853, "loss": 0.9593, "step": 2880 }, { "epoch": 0.1655334653010873, "grad_norm": 0.26171875, "learning_rate": 0.00019739541471664907, "loss": 0.9827, "step": 2885 }, { "epoch": 0.16582035172275297, "grad_norm": 0.255859375, "learning_rate": 0.00019737265817861845, "loss": 0.9892, "step": 2890 }, { "epoch": 0.16610723814441863, "grad_norm": 0.28125, "learning_rate": 0.00019734980398261666, "loss": 1.06, "step": 2895 }, { "epoch": 0.1663941245660843, "grad_norm": 0.255859375, "learning_rate": 0.00019732685215156483, "loss": 0.9331, "step": 2900 }, { "epoch": 0.16668101098774996, "grad_norm": 0.287109375, "learning_rate": 0.00019730380270848209, "loss": 0.8761, "step": 2905 }, { "epoch": 0.16696789740941562, "grad_norm": 0.296875, "learning_rate": 0.00019728065567648536, "loss": 0.9744, "step": 2910 }, { "epoch": 0.16725478383108128, "grad_norm": 0.25390625, "learning_rate": 0.00019725741107878958, "loss": 0.9697, "step": 2915 }, { "epoch": 0.16754167025274694, "grad_norm": 0.275390625, "learning_rate": 0.00019723406893870738, "loss": 0.9559, "step": 2920 }, { "epoch": 0.1678285566744126, "grad_norm": 0.263671875, "learning_rate": 0.0001972106292796493, "loss": 0.955, "step": 2925 }, { "epoch": 0.16811544309607826, "grad_norm": 0.267578125, "learning_rate": 0.00019718709212512373, "loss": 0.9623, "step": 2930 }, { "epoch": 0.16840232951774392, "grad_norm": 0.248046875, "learning_rate": 0.00019716345749873674, "loss": 1.0043, "step": 2935 }, { "epoch": 0.16868921593940958, "grad_norm": 0.275390625, "learning_rate": 0.00019713972542419227, "loss": 0.94, "step": 2940 }, { "epoch": 0.16897610236107524, "grad_norm": 0.275390625, "learning_rate": 0.00019711589592529187, "loss": 1.0174, "step": 2945 }, { "epoch": 0.1692629887827409, "grad_norm": 0.296875, "learning_rate": 0.0001970919690259349, "loss": 0.9797, "step": 2950 }, { "epoch": 0.16954987520440656, "grad_norm": 0.279296875, "learning_rate": 0.00019706794475011835, "loss": 1.0466, "step": 2955 }, { "epoch": 0.16983676162607222, "grad_norm": 0.267578125, "learning_rate": 0.00019704382312193687, "loss": 0.9893, "step": 2960 }, { "epoch": 0.1701236480477379, "grad_norm": 0.2578125, "learning_rate": 0.00019701960416558282, "loss": 0.9826, "step": 2965 }, { "epoch": 0.17041053446940357, "grad_norm": 0.255859375, "learning_rate": 0.0001969952879053461, "loss": 0.9424, "step": 2970 }, { "epoch": 0.17069742089106923, "grad_norm": 0.279296875, "learning_rate": 0.00019697087436561418, "loss": 0.9355, "step": 2975 }, { "epoch": 0.1709843073127349, "grad_norm": 0.26953125, "learning_rate": 0.0001969463635708722, "loss": 0.9706, "step": 2980 }, { "epoch": 0.17127119373440056, "grad_norm": 0.263671875, "learning_rate": 0.0001969217555457027, "loss": 0.9898, "step": 2985 }, { "epoch": 0.17155808015606622, "grad_norm": 0.275390625, "learning_rate": 0.00019689705031478586, "loss": 1.0372, "step": 2990 }, { "epoch": 0.17184496657773188, "grad_norm": 0.2734375, "learning_rate": 0.00019687224790289933, "loss": 0.9821, "step": 2995 }, { "epoch": 0.17213185299939754, "grad_norm": 0.263671875, "learning_rate": 0.00019684734833491811, "loss": 0.9086, "step": 3000 }, { "epoch": 0.1724187394210632, "grad_norm": 0.255859375, "learning_rate": 0.0001968223516358148, "loss": 0.9692, "step": 3005 }, { "epoch": 0.17270562584272886, "grad_norm": 0.251953125, "learning_rate": 0.0001967972578306593, "loss": 1.0095, "step": 3010 }, { "epoch": 0.17299251226439452, "grad_norm": 0.2421875, "learning_rate": 0.00019677206694461896, "loss": 0.9775, "step": 3015 }, { "epoch": 0.17327939868606018, "grad_norm": 0.267578125, "learning_rate": 0.0001967467790029585, "loss": 1.04, "step": 3020 }, { "epoch": 0.17356628510772584, "grad_norm": 0.25390625, "learning_rate": 0.0001967213940310399, "loss": 1.0475, "step": 3025 }, { "epoch": 0.1738531715293915, "grad_norm": 0.267578125, "learning_rate": 0.00019669591205432254, "loss": 0.9686, "step": 3030 }, { "epoch": 0.17414005795105716, "grad_norm": 0.240234375, "learning_rate": 0.0001966703330983631, "loss": 0.9238, "step": 3035 }, { "epoch": 0.17442694437272283, "grad_norm": 0.310546875, "learning_rate": 0.00019664465718881543, "loss": 1.004, "step": 3040 }, { "epoch": 0.1747138307943885, "grad_norm": 0.2578125, "learning_rate": 0.00019661888435143073, "loss": 0.9546, "step": 3045 }, { "epoch": 0.17500071721605417, "grad_norm": 0.251953125, "learning_rate": 0.00019659301461205728, "loss": 0.9713, "step": 3050 }, { "epoch": 0.17528760363771984, "grad_norm": 0.265625, "learning_rate": 0.0001965670479966407, "loss": 0.9718, "step": 3055 }, { "epoch": 0.1755744900593855, "grad_norm": 0.28515625, "learning_rate": 0.0001965409845312236, "loss": 0.976, "step": 3060 }, { "epoch": 0.17586137648105116, "grad_norm": 0.267578125, "learning_rate": 0.0001965148242419459, "loss": 0.9294, "step": 3065 }, { "epoch": 0.17614826290271682, "grad_norm": 0.263671875, "learning_rate": 0.0001964885671550445, "loss": 0.9526, "step": 3070 }, { "epoch": 0.17643514932438248, "grad_norm": 0.263671875, "learning_rate": 0.00019646221329685344, "loss": 0.9588, "step": 3075 }, { "epoch": 0.17672203574604814, "grad_norm": 0.28125, "learning_rate": 0.0001964357626938038, "loss": 0.9259, "step": 3080 }, { "epoch": 0.1770089221677138, "grad_norm": 0.24609375, "learning_rate": 0.00019640921537242365, "loss": 0.9901, "step": 3085 }, { "epoch": 0.17729580858937946, "grad_norm": 0.265625, "learning_rate": 0.00019638257135933814, "loss": 0.9384, "step": 3090 }, { "epoch": 0.17758269501104512, "grad_norm": 0.26171875, "learning_rate": 0.00019635583068126935, "loss": 1.0097, "step": 3095 }, { "epoch": 0.17786958143271078, "grad_norm": 0.291015625, "learning_rate": 0.0001963289933650363, "loss": 0.9876, "step": 3100 }, { "epoch": 0.17815646785437644, "grad_norm": 0.294921875, "learning_rate": 0.0001963020594375549, "loss": 0.9243, "step": 3105 }, { "epoch": 0.1784433542760421, "grad_norm": 0.3046875, "learning_rate": 0.00019627502892583806, "loss": 0.9606, "step": 3110 }, { "epoch": 0.17873024069770777, "grad_norm": 0.275390625, "learning_rate": 0.00019624790185699548, "loss": 0.9612, "step": 3115 }, { "epoch": 0.17901712711937345, "grad_norm": 0.291015625, "learning_rate": 0.0001962206782582337, "loss": 0.9649, "step": 3120 }, { "epoch": 0.17930401354103911, "grad_norm": 0.248046875, "learning_rate": 0.0001961933581568561, "loss": 0.9804, "step": 3125 }, { "epoch": 0.17959089996270478, "grad_norm": 0.259765625, "learning_rate": 0.00019616594158026283, "loss": 0.8969, "step": 3130 }, { "epoch": 0.17987778638437044, "grad_norm": 0.25390625, "learning_rate": 0.0001961384285559508, "loss": 0.9219, "step": 3135 }, { "epoch": 0.1801646728060361, "grad_norm": 0.271484375, "learning_rate": 0.0001961108191115136, "loss": 0.9543, "step": 3140 }, { "epoch": 0.18045155922770176, "grad_norm": 0.283203125, "learning_rate": 0.00019608311327464167, "loss": 0.9951, "step": 3145 }, { "epoch": 0.18073844564936742, "grad_norm": 0.44921875, "learning_rate": 0.00019605531107312195, "loss": 0.9926, "step": 3150 }, { "epoch": 0.18102533207103308, "grad_norm": 0.2734375, "learning_rate": 0.00019602741253483817, "loss": 0.9014, "step": 3155 }, { "epoch": 0.18131221849269874, "grad_norm": 0.275390625, "learning_rate": 0.00019599941768777055, "loss": 0.9355, "step": 3160 }, { "epoch": 0.1815991049143644, "grad_norm": 0.26953125, "learning_rate": 0.000195971326559996, "loss": 0.9649, "step": 3165 }, { "epoch": 0.18188599133603006, "grad_norm": 0.279296875, "learning_rate": 0.00019594313917968795, "loss": 0.8945, "step": 3170 }, { "epoch": 0.18217287775769572, "grad_norm": 0.423828125, "learning_rate": 0.00019591485557511636, "loss": 1.0124, "step": 3175 }, { "epoch": 0.18245976417936138, "grad_norm": 0.375, "learning_rate": 0.00019588647577464775, "loss": 0.9298, "step": 3180 }, { "epoch": 0.18274665060102704, "grad_norm": 0.328125, "learning_rate": 0.000195857999806745, "loss": 0.9752, "step": 3185 }, { "epoch": 0.1830335370226927, "grad_norm": 0.27734375, "learning_rate": 0.0001958294276999676, "loss": 0.9948, "step": 3190 }, { "epoch": 0.18332042344435837, "grad_norm": 0.337890625, "learning_rate": 0.00019580075948297135, "loss": 0.9586, "step": 3195 }, { "epoch": 0.18360730986602405, "grad_norm": 0.4140625, "learning_rate": 0.00019577199518450847, "loss": 1.0328, "step": 3200 }, { "epoch": 0.18389419628768972, "grad_norm": 0.298828125, "learning_rate": 0.00019574313483342748, "loss": 0.9108, "step": 3205 }, { "epoch": 0.18418108270935538, "grad_norm": 0.275390625, "learning_rate": 0.00019571417845867337, "loss": 0.9787, "step": 3210 }, { "epoch": 0.18446796913102104, "grad_norm": 0.2578125, "learning_rate": 0.00019568512608928736, "loss": 1.0482, "step": 3215 }, { "epoch": 0.1847548555526867, "grad_norm": 0.27734375, "learning_rate": 0.00019565597775440688, "loss": 0.9919, "step": 3220 }, { "epoch": 0.18504174197435236, "grad_norm": 0.279296875, "learning_rate": 0.00019562673348326573, "loss": 0.9537, "step": 3225 }, { "epoch": 0.18532862839601802, "grad_norm": 0.27734375, "learning_rate": 0.00019559739330519388, "loss": 0.9659, "step": 3230 }, { "epoch": 0.18561551481768368, "grad_norm": 0.27734375, "learning_rate": 0.00019556795724961742, "loss": 0.982, "step": 3235 }, { "epoch": 0.18590240123934934, "grad_norm": 0.26953125, "learning_rate": 0.00019553842534605868, "loss": 0.9582, "step": 3240 }, { "epoch": 0.186189287661015, "grad_norm": 0.2373046875, "learning_rate": 0.00019550879762413615, "loss": 0.9089, "step": 3245 }, { "epoch": 0.18647617408268066, "grad_norm": 0.2890625, "learning_rate": 0.00019547907411356427, "loss": 0.929, "step": 3250 }, { "epoch": 0.18676306050434632, "grad_norm": 0.265625, "learning_rate": 0.00019544925484415372, "loss": 0.9444, "step": 3255 }, { "epoch": 0.18704994692601198, "grad_norm": 0.267578125, "learning_rate": 0.0001954193398458111, "loss": 1.0105, "step": 3260 }, { "epoch": 0.18733683334767764, "grad_norm": 0.267578125, "learning_rate": 0.0001953893291485391, "loss": 0.9351, "step": 3265 }, { "epoch": 0.1876237197693433, "grad_norm": 0.267578125, "learning_rate": 0.00019535922278243634, "loss": 0.996, "step": 3270 }, { "epoch": 0.18791060619100897, "grad_norm": 0.27734375, "learning_rate": 0.00019532902077769735, "loss": 0.9279, "step": 3275 }, { "epoch": 0.18819749261267466, "grad_norm": 0.291015625, "learning_rate": 0.00019529872316461272, "loss": 1.0018, "step": 3280 }, { "epoch": 0.18848437903434032, "grad_norm": 0.26171875, "learning_rate": 0.00019526832997356875, "loss": 1.0276, "step": 3285 }, { "epoch": 0.18877126545600598, "grad_norm": 0.25, "learning_rate": 0.00019523784123504775, "loss": 0.9787, "step": 3290 }, { "epoch": 0.18905815187767164, "grad_norm": 0.26171875, "learning_rate": 0.00019520725697962777, "loss": 0.9135, "step": 3295 }, { "epoch": 0.1893450382993373, "grad_norm": 0.2578125, "learning_rate": 0.00019517657723798268, "loss": 0.9571, "step": 3300 }, { "epoch": 0.18963192472100296, "grad_norm": 0.259765625, "learning_rate": 0.00019514580204088212, "loss": 0.9377, "step": 3305 }, { "epoch": 0.18991881114266862, "grad_norm": 0.2734375, "learning_rate": 0.00019511493141919145, "loss": 0.9443, "step": 3310 }, { "epoch": 0.19020569756433428, "grad_norm": 0.3125, "learning_rate": 0.00019508396540387178, "loss": 1.0005, "step": 3315 }, { "epoch": 0.19049258398599994, "grad_norm": 0.283203125, "learning_rate": 0.0001950529040259798, "loss": 0.9703, "step": 3320 }, { "epoch": 0.1907794704076656, "grad_norm": 0.26171875, "learning_rate": 0.00019502174731666797, "loss": 0.9411, "step": 3325 }, { "epoch": 0.19106635682933126, "grad_norm": 0.265625, "learning_rate": 0.00019499049530718424, "loss": 0.8605, "step": 3330 }, { "epoch": 0.19135324325099692, "grad_norm": 0.267578125, "learning_rate": 0.00019495914802887226, "loss": 0.9798, "step": 3335 }, { "epoch": 0.19164012967266258, "grad_norm": 0.25390625, "learning_rate": 0.00019492770551317106, "loss": 0.9485, "step": 3340 }, { "epoch": 0.19192701609432825, "grad_norm": 0.2451171875, "learning_rate": 0.00019489616779161533, "loss": 1.0127, "step": 3345 }, { "epoch": 0.1922139025159939, "grad_norm": 0.255859375, "learning_rate": 0.00019486453489583525, "loss": 0.9629, "step": 3350 }, { "epoch": 0.19250078893765957, "grad_norm": 0.255859375, "learning_rate": 0.0001948328068575563, "loss": 0.9596, "step": 3355 }, { "epoch": 0.19278767535932526, "grad_norm": 0.404296875, "learning_rate": 0.0001948009837085996, "loss": 1.0033, "step": 3360 }, { "epoch": 0.19307456178099092, "grad_norm": 0.267578125, "learning_rate": 0.00019476906548088148, "loss": 0.9517, "step": 3365 }, { "epoch": 0.19336144820265658, "grad_norm": 0.287109375, "learning_rate": 0.00019473705220641367, "loss": 1.0111, "step": 3370 }, { "epoch": 0.19364833462432224, "grad_norm": 0.2578125, "learning_rate": 0.0001947049439173033, "loss": 0.9941, "step": 3375 }, { "epoch": 0.1939352210459879, "grad_norm": 0.2734375, "learning_rate": 0.00019467274064575275, "loss": 0.9648, "step": 3380 }, { "epoch": 0.19422210746765356, "grad_norm": 0.27734375, "learning_rate": 0.0001946404424240596, "loss": 0.9559, "step": 3385 }, { "epoch": 0.19450899388931922, "grad_norm": 0.275390625, "learning_rate": 0.0001946080492846167, "loss": 1.0018, "step": 3390 }, { "epoch": 0.19479588031098488, "grad_norm": 0.275390625, "learning_rate": 0.00019457556125991216, "loss": 1.0015, "step": 3395 }, { "epoch": 0.19508276673265054, "grad_norm": 0.259765625, "learning_rate": 0.00019454297838252918, "loss": 0.9179, "step": 3400 }, { "epoch": 0.1953696531543162, "grad_norm": 0.25390625, "learning_rate": 0.00019451030068514609, "loss": 0.9449, "step": 3405 }, { "epoch": 0.19565653957598186, "grad_norm": 0.263671875, "learning_rate": 0.00019447752820053634, "loss": 0.9519, "step": 3410 }, { "epoch": 0.19594342599764752, "grad_norm": 0.275390625, "learning_rate": 0.00019444466096156846, "loss": 0.976, "step": 3415 }, { "epoch": 0.19623031241931319, "grad_norm": 0.28125, "learning_rate": 0.00019441169900120598, "loss": 0.9366, "step": 3420 }, { "epoch": 0.19651719884097885, "grad_norm": 0.291015625, "learning_rate": 0.00019437864235250744, "loss": 0.9344, "step": 3425 }, { "epoch": 0.1968040852626445, "grad_norm": 0.255859375, "learning_rate": 0.00019434549104862639, "loss": 1.026, "step": 3430 }, { "epoch": 0.19709097168431017, "grad_norm": 0.263671875, "learning_rate": 0.0001943122451228112, "loss": 0.9705, "step": 3435 }, { "epoch": 0.19737785810597586, "grad_norm": 0.24609375, "learning_rate": 0.00019427890460840526, "loss": 1.0427, "step": 3440 }, { "epoch": 0.19766474452764152, "grad_norm": 0.26953125, "learning_rate": 0.00019424546953884675, "loss": 0.9639, "step": 3445 }, { "epoch": 0.19795163094930718, "grad_norm": 0.263671875, "learning_rate": 0.00019421193994766873, "loss": 0.9777, "step": 3450 }, { "epoch": 0.19823851737097284, "grad_norm": 0.255859375, "learning_rate": 0.000194178315868499, "loss": 1.01, "step": 3455 }, { "epoch": 0.1985254037926385, "grad_norm": 0.251953125, "learning_rate": 0.00019414459733506023, "loss": 0.9636, "step": 3460 }, { "epoch": 0.19881229021430416, "grad_norm": 0.26171875, "learning_rate": 0.00019411078438116969, "loss": 0.958, "step": 3465 }, { "epoch": 0.19909917663596982, "grad_norm": 0.26953125, "learning_rate": 0.00019407687704073943, "loss": 0.9141, "step": 3470 }, { "epoch": 0.19938606305763548, "grad_norm": 0.26953125, "learning_rate": 0.00019404287534777615, "loss": 0.974, "step": 3475 }, { "epoch": 0.19967294947930114, "grad_norm": 0.26171875, "learning_rate": 0.00019400877933638114, "loss": 0.9726, "step": 3480 }, { "epoch": 0.1999598359009668, "grad_norm": 0.287109375, "learning_rate": 0.00019397458904075036, "loss": 1.0179, "step": 3485 }, { "epoch": 0.20024672232263246, "grad_norm": 0.28515625, "learning_rate": 0.00019394030449517428, "loss": 0.9029, "step": 3490 }, { "epoch": 0.20053360874429813, "grad_norm": 0.2734375, "learning_rate": 0.00019390592573403787, "loss": 1.0355, "step": 3495 }, { "epoch": 0.20082049516596379, "grad_norm": 0.279296875, "learning_rate": 0.0001938714527918207, "loss": 0.95, "step": 3500 }, { "epoch": 0.20110738158762945, "grad_norm": 0.259765625, "learning_rate": 0.0001938368857030966, "loss": 1.0558, "step": 3505 }, { "epoch": 0.2013942680092951, "grad_norm": 0.2578125, "learning_rate": 0.00019380222450253405, "loss": 1.0354, "step": 3510 }, { "epoch": 0.2016811544309608, "grad_norm": 0.26953125, "learning_rate": 0.00019376746922489577, "loss": 1.0018, "step": 3515 }, { "epoch": 0.20196804085262646, "grad_norm": 0.279296875, "learning_rate": 0.00019373261990503888, "loss": 0.9424, "step": 3520 }, { "epoch": 0.20225492727429212, "grad_norm": 0.294921875, "learning_rate": 0.00019369767657791479, "loss": 0.9784, "step": 3525 }, { "epoch": 0.20254181369595778, "grad_norm": 0.2578125, "learning_rate": 0.00019366263927856928, "loss": 0.9943, "step": 3530 }, { "epoch": 0.20282870011762344, "grad_norm": 0.25390625, "learning_rate": 0.00019362750804214222, "loss": 0.9192, "step": 3535 }, { "epoch": 0.2031155865392891, "grad_norm": 0.26171875, "learning_rate": 0.0001935922829038679, "loss": 0.9007, "step": 3540 }, { "epoch": 0.20340247296095476, "grad_norm": 0.255859375, "learning_rate": 0.00019355696389907455, "loss": 0.9956, "step": 3545 }, { "epoch": 0.20368935938262042, "grad_norm": 0.2890625, "learning_rate": 0.00019352155106318471, "loss": 0.9217, "step": 3550 }, { "epoch": 0.20397624580428608, "grad_norm": 0.265625, "learning_rate": 0.00019348604443171502, "loss": 0.9377, "step": 3555 }, { "epoch": 0.20426313222595174, "grad_norm": 0.279296875, "learning_rate": 0.00019345044404027613, "loss": 1.05, "step": 3560 }, { "epoch": 0.2045500186476174, "grad_norm": 0.26953125, "learning_rate": 0.0001934147499245727, "loss": 0.9402, "step": 3565 }, { "epoch": 0.20483690506928307, "grad_norm": 0.298828125, "learning_rate": 0.0001933789621204035, "loss": 0.9638, "step": 3570 }, { "epoch": 0.20512379149094873, "grad_norm": 0.259765625, "learning_rate": 0.00019334308066366114, "loss": 0.9854, "step": 3575 }, { "epoch": 0.2054106779126144, "grad_norm": 0.28515625, "learning_rate": 0.00019330710559033225, "loss": 0.9534, "step": 3580 }, { "epoch": 0.20569756433428005, "grad_norm": 0.267578125, "learning_rate": 0.0001932710369364973, "loss": 1.0041, "step": 3585 }, { "epoch": 0.2059844507559457, "grad_norm": 0.271484375, "learning_rate": 0.00019323487473833062, "loss": 0.9217, "step": 3590 }, { "epoch": 0.2062713371776114, "grad_norm": 0.265625, "learning_rate": 0.0001931986190321004, "loss": 0.9715, "step": 3595 }, { "epoch": 0.20655822359927706, "grad_norm": 0.265625, "learning_rate": 0.00019316226985416853, "loss": 0.9646, "step": 3600 }, { "epoch": 0.20684511002094272, "grad_norm": 0.28125, "learning_rate": 0.00019312582724099076, "loss": 0.9726, "step": 3605 }, { "epoch": 0.20713199644260838, "grad_norm": 0.263671875, "learning_rate": 0.00019308929122911642, "loss": 0.8723, "step": 3610 }, { "epoch": 0.20741888286427404, "grad_norm": 0.279296875, "learning_rate": 0.0001930526618551886, "loss": 1.0109, "step": 3615 }, { "epoch": 0.2077057692859397, "grad_norm": 0.2578125, "learning_rate": 0.00019301593915594403, "loss": 0.9474, "step": 3620 }, { "epoch": 0.20799265570760536, "grad_norm": 0.275390625, "learning_rate": 0.00019297912316821298, "loss": 0.9802, "step": 3625 }, { "epoch": 0.20827954212927102, "grad_norm": 0.265625, "learning_rate": 0.00019294221392891932, "loss": 0.9825, "step": 3630 }, { "epoch": 0.20856642855093668, "grad_norm": 0.271484375, "learning_rate": 0.00019290521147508042, "loss": 0.9742, "step": 3635 }, { "epoch": 0.20885331497260234, "grad_norm": 0.275390625, "learning_rate": 0.0001928681158438072, "loss": 1.0083, "step": 3640 }, { "epoch": 0.209140201394268, "grad_norm": 0.27734375, "learning_rate": 0.00019283092707230392, "loss": 1.0363, "step": 3645 }, { "epoch": 0.20942708781593367, "grad_norm": 0.279296875, "learning_rate": 0.0001927936451978684, "loss": 1.0133, "step": 3650 }, { "epoch": 0.20971397423759933, "grad_norm": 0.271484375, "learning_rate": 0.0001927562702578917, "loss": 0.9541, "step": 3655 }, { "epoch": 0.210000860659265, "grad_norm": 0.265625, "learning_rate": 0.00019271880228985828, "loss": 0.9512, "step": 3660 }, { "epoch": 0.21028774708093065, "grad_norm": 0.27734375, "learning_rate": 0.00019268124133134588, "loss": 0.9769, "step": 3665 }, { "epoch": 0.2105746335025963, "grad_norm": 0.251953125, "learning_rate": 0.00019264358742002556, "loss": 0.9107, "step": 3670 }, { "epoch": 0.210861519924262, "grad_norm": 0.271484375, "learning_rate": 0.00019260584059366153, "loss": 1.0649, "step": 3675 }, { "epoch": 0.21114840634592766, "grad_norm": 0.275390625, "learning_rate": 0.00019256800089011123, "loss": 1.0599, "step": 3680 }, { "epoch": 0.21143529276759332, "grad_norm": 0.251953125, "learning_rate": 0.0001925300683473252, "loss": 0.9294, "step": 3685 }, { "epoch": 0.21172217918925898, "grad_norm": 0.27734375, "learning_rate": 0.0001924920430033472, "loss": 0.9389, "step": 3690 }, { "epoch": 0.21200906561092464, "grad_norm": 0.26953125, "learning_rate": 0.00019245392489631392, "loss": 0.9595, "step": 3695 }, { "epoch": 0.2122959520325903, "grad_norm": 0.27734375, "learning_rate": 0.00019241571406445525, "loss": 0.9509, "step": 3700 }, { "epoch": 0.21258283845425596, "grad_norm": 0.26953125, "learning_rate": 0.00019237741054609387, "loss": 0.9953, "step": 3705 }, { "epoch": 0.21286972487592162, "grad_norm": 0.259765625, "learning_rate": 0.00019233901437964562, "loss": 0.9504, "step": 3710 }, { "epoch": 0.21315661129758728, "grad_norm": 0.25, "learning_rate": 0.00019230052560361913, "loss": 0.9972, "step": 3715 }, { "epoch": 0.21344349771925294, "grad_norm": 0.30859375, "learning_rate": 0.00019226194425661598, "loss": 0.9205, "step": 3720 }, { "epoch": 0.2137303841409186, "grad_norm": 0.287109375, "learning_rate": 0.00019222327037733052, "loss": 1.011, "step": 3725 }, { "epoch": 0.21401727056258427, "grad_norm": 0.255859375, "learning_rate": 0.00019218450400454998, "loss": 0.9244, "step": 3730 }, { "epoch": 0.21430415698424993, "grad_norm": 0.296875, "learning_rate": 0.00019214564517715433, "loss": 0.9666, "step": 3735 }, { "epoch": 0.2145910434059156, "grad_norm": 0.265625, "learning_rate": 0.00019210669393411624, "loss": 1.0037, "step": 3740 }, { "epoch": 0.21487792982758125, "grad_norm": 0.2734375, "learning_rate": 0.00019206765031450112, "loss": 0.9621, "step": 3745 }, { "epoch": 0.2151648162492469, "grad_norm": 0.33203125, "learning_rate": 0.00019202851435746695, "loss": 0.9576, "step": 3750 }, { "epoch": 0.2154517026709126, "grad_norm": 0.259765625, "learning_rate": 0.00019198928610226435, "loss": 0.9496, "step": 3755 }, { "epoch": 0.21573858909257826, "grad_norm": 0.283203125, "learning_rate": 0.00019194996558823655, "loss": 1.0261, "step": 3760 }, { "epoch": 0.21602547551424392, "grad_norm": 0.271484375, "learning_rate": 0.00019191055285481927, "loss": 1.0164, "step": 3765 }, { "epoch": 0.21631236193590958, "grad_norm": 0.265625, "learning_rate": 0.00019187104794154074, "loss": 0.9903, "step": 3770 }, { "epoch": 0.21659924835757524, "grad_norm": 0.265625, "learning_rate": 0.00019183145088802158, "loss": 0.9582, "step": 3775 }, { "epoch": 0.2168861347792409, "grad_norm": 0.26953125, "learning_rate": 0.00019179176173397494, "loss": 1.0008, "step": 3780 }, { "epoch": 0.21717302120090656, "grad_norm": 0.25390625, "learning_rate": 0.0001917519805192062, "loss": 0.9579, "step": 3785 }, { "epoch": 0.21745990762257222, "grad_norm": 0.279296875, "learning_rate": 0.00019171210728361317, "loss": 1.0133, "step": 3790 }, { "epoch": 0.21774679404423788, "grad_norm": 0.2490234375, "learning_rate": 0.00019167214206718594, "loss": 0.9843, "step": 3795 }, { "epoch": 0.21803368046590355, "grad_norm": 0.283203125, "learning_rate": 0.0001916320849100068, "loss": 0.9847, "step": 3800 }, { "epoch": 0.2183205668875692, "grad_norm": 0.279296875, "learning_rate": 0.00019159193585225026, "loss": 0.976, "step": 3805 }, { "epoch": 0.21860745330923487, "grad_norm": 0.27734375, "learning_rate": 0.00019155169493418304, "loss": 0.9585, "step": 3810 }, { "epoch": 0.21889433973090053, "grad_norm": 0.259765625, "learning_rate": 0.000191511362196164, "loss": 0.9281, "step": 3815 }, { "epoch": 0.2191812261525662, "grad_norm": 0.251953125, "learning_rate": 0.00019147093767864402, "loss": 0.9886, "step": 3820 }, { "epoch": 0.21946811257423185, "grad_norm": 0.267578125, "learning_rate": 0.00019143042142216607, "loss": 0.9828, "step": 3825 }, { "epoch": 0.2197549989958975, "grad_norm": 0.26953125, "learning_rate": 0.00019138981346736514, "loss": 0.8972, "step": 3830 }, { "epoch": 0.2200418854175632, "grad_norm": 0.26953125, "learning_rate": 0.00019134911385496815, "loss": 0.9413, "step": 3835 }, { "epoch": 0.22032877183922886, "grad_norm": 0.271484375, "learning_rate": 0.00019130832262579398, "loss": 0.8982, "step": 3840 }, { "epoch": 0.22061565826089452, "grad_norm": 0.28125, "learning_rate": 0.00019126743982075337, "loss": 0.9527, "step": 3845 }, { "epoch": 0.22090254468256018, "grad_norm": 0.26171875, "learning_rate": 0.00019122646548084892, "loss": 0.9773, "step": 3850 }, { "epoch": 0.22118943110422584, "grad_norm": 0.28125, "learning_rate": 0.00019118539964717505, "loss": 1.0212, "step": 3855 }, { "epoch": 0.2214763175258915, "grad_norm": 0.279296875, "learning_rate": 0.0001911442423609179, "loss": 0.9419, "step": 3860 }, { "epoch": 0.22176320394755716, "grad_norm": 0.296875, "learning_rate": 0.00019110299366335536, "loss": 0.9641, "step": 3865 }, { "epoch": 0.22205009036922282, "grad_norm": 0.25390625, "learning_rate": 0.00019106165359585698, "loss": 0.9093, "step": 3870 }, { "epoch": 0.22233697679088849, "grad_norm": 0.28515625, "learning_rate": 0.00019102022219988398, "loss": 0.9817, "step": 3875 }, { "epoch": 0.22262386321255415, "grad_norm": 0.255859375, "learning_rate": 0.00019097869951698913, "loss": 1.0365, "step": 3880 }, { "epoch": 0.2229107496342198, "grad_norm": 0.283203125, "learning_rate": 0.0001909370855888168, "loss": 0.9048, "step": 3885 }, { "epoch": 0.22319763605588547, "grad_norm": 0.271484375, "learning_rate": 0.00019089538045710284, "loss": 0.9749, "step": 3890 }, { "epoch": 0.22348452247755113, "grad_norm": 0.259765625, "learning_rate": 0.00019085358416367457, "loss": 0.9713, "step": 3895 }, { "epoch": 0.2237714088992168, "grad_norm": 0.275390625, "learning_rate": 0.0001908116967504508, "loss": 0.9302, "step": 3900 }, { "epoch": 0.22405829532088245, "grad_norm": 0.26953125, "learning_rate": 0.00019076971825944164, "loss": 0.9653, "step": 3905 }, { "epoch": 0.22434518174254814, "grad_norm": 0.2890625, "learning_rate": 0.00019072764873274856, "loss": 0.9798, "step": 3910 }, { "epoch": 0.2246320681642138, "grad_norm": 0.2890625, "learning_rate": 0.0001906854882125644, "loss": 0.9581, "step": 3915 }, { "epoch": 0.22491895458587946, "grad_norm": 0.28515625, "learning_rate": 0.00019064323674117318, "loss": 1.0062, "step": 3920 }, { "epoch": 0.22520584100754512, "grad_norm": 0.267578125, "learning_rate": 0.0001906008943609502, "loss": 0.8883, "step": 3925 }, { "epoch": 0.22549272742921078, "grad_norm": 0.296875, "learning_rate": 0.0001905584611143619, "loss": 0.9824, "step": 3930 }, { "epoch": 0.22577961385087644, "grad_norm": 0.26953125, "learning_rate": 0.00019051593704396587, "loss": 0.9451, "step": 3935 }, { "epoch": 0.2260665002725421, "grad_norm": 0.28515625, "learning_rate": 0.00019047332219241078, "loss": 0.9738, "step": 3940 }, { "epoch": 0.22635338669420776, "grad_norm": 0.29296875, "learning_rate": 0.00019043061660243632, "loss": 0.9701, "step": 3945 }, { "epoch": 0.22664027311587343, "grad_norm": 0.28125, "learning_rate": 0.00019038782031687325, "loss": 1.0223, "step": 3950 }, { "epoch": 0.22692715953753909, "grad_norm": 0.271484375, "learning_rate": 0.0001903449333786432, "loss": 0.9811, "step": 3955 }, { "epoch": 0.22721404595920475, "grad_norm": 0.263671875, "learning_rate": 0.00019030195583075881, "loss": 0.9706, "step": 3960 }, { "epoch": 0.2275009323808704, "grad_norm": 0.263671875, "learning_rate": 0.00019025888771632355, "loss": 0.9887, "step": 3965 }, { "epoch": 0.22778781880253607, "grad_norm": 0.271484375, "learning_rate": 0.00019021572907853177, "loss": 0.957, "step": 3970 }, { "epoch": 0.22807470522420173, "grad_norm": 0.259765625, "learning_rate": 0.00019017247996066852, "loss": 0.8995, "step": 3975 }, { "epoch": 0.2283615916458674, "grad_norm": 0.26953125, "learning_rate": 0.00019012914040610963, "loss": 1.0435, "step": 3980 }, { "epoch": 0.22864847806753305, "grad_norm": 0.26953125, "learning_rate": 0.00019008571045832167, "loss": 0.9228, "step": 3985 }, { "epoch": 0.22893536448919874, "grad_norm": 0.265625, "learning_rate": 0.00019004219016086188, "loss": 0.9704, "step": 3990 }, { "epoch": 0.2292222509108644, "grad_norm": 0.267578125, "learning_rate": 0.00018999857955737798, "loss": 0.9137, "step": 3995 }, { "epoch": 0.22950913733253006, "grad_norm": 0.314453125, "learning_rate": 0.00018995487869160845, "loss": 0.9703, "step": 4000 }, { "epoch": 0.22979602375419572, "grad_norm": 0.259765625, "learning_rate": 0.00018991108760738214, "loss": 0.8956, "step": 4005 }, { "epoch": 0.23008291017586138, "grad_norm": 0.26171875, "learning_rate": 0.00018986720634861848, "loss": 0.928, "step": 4010 }, { "epoch": 0.23036979659752704, "grad_norm": 0.275390625, "learning_rate": 0.00018982323495932732, "loss": 0.984, "step": 4015 }, { "epoch": 0.2306566830191927, "grad_norm": 0.26171875, "learning_rate": 0.00018977917348360888, "loss": 0.9616, "step": 4020 }, { "epoch": 0.23094356944085837, "grad_norm": 0.2890625, "learning_rate": 0.0001897350219656537, "loss": 0.9801, "step": 4025 }, { "epoch": 0.23123045586252403, "grad_norm": 0.27734375, "learning_rate": 0.0001896907804497427, "loss": 0.9083, "step": 4030 }, { "epoch": 0.2315173422841897, "grad_norm": 0.259765625, "learning_rate": 0.00018964644898024707, "loss": 1.0108, "step": 4035 }, { "epoch": 0.23180422870585535, "grad_norm": 0.2578125, "learning_rate": 0.0001896020276016281, "loss": 0.9954, "step": 4040 }, { "epoch": 0.232091115127521, "grad_norm": 0.271484375, "learning_rate": 0.00018955751635843737, "loss": 0.9582, "step": 4045 }, { "epoch": 0.23237800154918667, "grad_norm": 0.275390625, "learning_rate": 0.0001895129152953165, "loss": 0.9049, "step": 4050 }, { "epoch": 0.23266488797085233, "grad_norm": 0.2734375, "learning_rate": 0.00018946822445699735, "loss": 0.9286, "step": 4055 }, { "epoch": 0.232951774392518, "grad_norm": 0.267578125, "learning_rate": 0.00018942344388830158, "loss": 0.9409, "step": 4060 }, { "epoch": 0.23323866081418365, "grad_norm": 0.263671875, "learning_rate": 0.00018937857363414106, "loss": 0.9408, "step": 4065 }, { "epoch": 0.23352554723584934, "grad_norm": 0.28125, "learning_rate": 0.00018933361373951746, "loss": 0.9906, "step": 4070 }, { "epoch": 0.233812433657515, "grad_norm": 0.27734375, "learning_rate": 0.00018928856424952245, "loss": 0.9047, "step": 4075 }, { "epoch": 0.23409932007918066, "grad_norm": 0.26171875, "learning_rate": 0.0001892434252093375, "loss": 1.0379, "step": 4080 }, { "epoch": 0.23438620650084632, "grad_norm": 0.26171875, "learning_rate": 0.00018919819666423396, "loss": 0.9477, "step": 4085 }, { "epoch": 0.23467309292251198, "grad_norm": 0.28515625, "learning_rate": 0.00018915287865957277, "loss": 1.0386, "step": 4090 }, { "epoch": 0.23495997934417764, "grad_norm": 0.263671875, "learning_rate": 0.0001891074712408049, "loss": 0.994, "step": 4095 }, { "epoch": 0.2352468657658433, "grad_norm": 0.296875, "learning_rate": 0.00018906197445347068, "loss": 0.9821, "step": 4100 }, { "epoch": 0.23553375218750897, "grad_norm": 0.275390625, "learning_rate": 0.0001890163883432003, "loss": 0.9415, "step": 4105 }, { "epoch": 0.23582063860917463, "grad_norm": 0.251953125, "learning_rate": 0.00018897071295571335, "loss": 0.9565, "step": 4110 }, { "epoch": 0.2361075250308403, "grad_norm": 0.26171875, "learning_rate": 0.00018892494833681913, "loss": 0.9232, "step": 4115 }, { "epoch": 0.23639441145250595, "grad_norm": 0.2734375, "learning_rate": 0.00018887909453241632, "loss": 0.9647, "step": 4120 }, { "epoch": 0.2366812978741716, "grad_norm": 0.267578125, "learning_rate": 0.0001888331515884931, "loss": 0.9913, "step": 4125 }, { "epoch": 0.23696818429583727, "grad_norm": 0.27734375, "learning_rate": 0.000188787119551127, "loss": 1.0266, "step": 4130 }, { "epoch": 0.23725507071750293, "grad_norm": 0.275390625, "learning_rate": 0.00018874099846648496, "loss": 0.9511, "step": 4135 }, { "epoch": 0.2375419571391686, "grad_norm": 0.267578125, "learning_rate": 0.0001886947883808232, "loss": 0.9892, "step": 4140 }, { "epoch": 0.23782884356083425, "grad_norm": 0.30859375, "learning_rate": 0.0001886484893404872, "loss": 0.9116, "step": 4145 }, { "epoch": 0.23811572998249994, "grad_norm": 0.26171875, "learning_rate": 0.0001886021013919117, "loss": 0.9508, "step": 4150 }, { "epoch": 0.2384026164041656, "grad_norm": 0.28515625, "learning_rate": 0.00018855562458162055, "loss": 0.979, "step": 4155 }, { "epoch": 0.23868950282583126, "grad_norm": 0.263671875, "learning_rate": 0.0001885090589562267, "loss": 0.9924, "step": 4160 }, { "epoch": 0.23897638924749692, "grad_norm": 0.29296875, "learning_rate": 0.00018846240456243225, "loss": 0.9721, "step": 4165 }, { "epoch": 0.23926327566916258, "grad_norm": 0.2734375, "learning_rate": 0.00018841566144702833, "loss": 0.9516, "step": 4170 }, { "epoch": 0.23955016209082824, "grad_norm": 0.2431640625, "learning_rate": 0.00018836882965689493, "loss": 0.9214, "step": 4175 }, { "epoch": 0.2398370485124939, "grad_norm": 0.24609375, "learning_rate": 0.00018832190923900112, "loss": 0.9424, "step": 4180 }, { "epoch": 0.24012393493415957, "grad_norm": 0.279296875, "learning_rate": 0.00018827490024040484, "loss": 1.0017, "step": 4185 }, { "epoch": 0.24041082135582523, "grad_norm": 0.271484375, "learning_rate": 0.00018822780270825277, "loss": 0.9544, "step": 4190 }, { "epoch": 0.2406977077774909, "grad_norm": 0.265625, "learning_rate": 0.00018818061668978046, "loss": 1.0314, "step": 4195 }, { "epoch": 0.24098459419915655, "grad_norm": 0.306640625, "learning_rate": 0.0001881333422323122, "loss": 1.0052, "step": 4200 }, { "epoch": 0.2412714806208222, "grad_norm": 0.283203125, "learning_rate": 0.00018808597938326093, "loss": 0.918, "step": 4205 }, { "epoch": 0.24155836704248787, "grad_norm": 0.265625, "learning_rate": 0.00018803852819012832, "loss": 0.9216, "step": 4210 }, { "epoch": 0.24184525346415353, "grad_norm": 0.271484375, "learning_rate": 0.0001879909887005046, "loss": 0.9629, "step": 4215 }, { "epoch": 0.2421321398858192, "grad_norm": 0.26171875, "learning_rate": 0.00018794336096206852, "loss": 0.9937, "step": 4220 }, { "epoch": 0.24241902630748488, "grad_norm": 0.259765625, "learning_rate": 0.00018789564502258741, "loss": 0.938, "step": 4225 }, { "epoch": 0.24270591272915054, "grad_norm": 0.27734375, "learning_rate": 0.000187847840929917, "loss": 1.051, "step": 4230 }, { "epoch": 0.2429927991508162, "grad_norm": 0.263671875, "learning_rate": 0.00018779994873200146, "loss": 0.8912, "step": 4235 }, { "epoch": 0.24327968557248186, "grad_norm": 0.2734375, "learning_rate": 0.00018775196847687332, "loss": 0.9789, "step": 4240 }, { "epoch": 0.24356657199414752, "grad_norm": 0.2890625, "learning_rate": 0.0001877039002126534, "loss": 0.9762, "step": 4245 }, { "epoch": 0.24385345841581318, "grad_norm": 0.28125, "learning_rate": 0.00018765574398755085, "loss": 1.0363, "step": 4250 }, { "epoch": 0.24414034483747885, "grad_norm": 0.2578125, "learning_rate": 0.00018760749984986298, "loss": 0.9278, "step": 4255 }, { "epoch": 0.2444272312591445, "grad_norm": 0.380859375, "learning_rate": 0.0001875591678479753, "loss": 0.9162, "step": 4260 }, { "epoch": 0.24471411768081017, "grad_norm": 0.27734375, "learning_rate": 0.00018751074803036142, "loss": 0.9574, "step": 4265 }, { "epoch": 0.24500100410247583, "grad_norm": 0.2734375, "learning_rate": 0.00018746224044558302, "loss": 0.9378, "step": 4270 }, { "epoch": 0.2452878905241415, "grad_norm": 0.28515625, "learning_rate": 0.0001874136451422898, "loss": 0.9997, "step": 4275 }, { "epoch": 0.24557477694580715, "grad_norm": 0.279296875, "learning_rate": 0.0001873649621692195, "loss": 1.0322, "step": 4280 }, { "epoch": 0.2458616633674728, "grad_norm": 0.26953125, "learning_rate": 0.00018731619157519774, "loss": 0.9235, "step": 4285 }, { "epoch": 0.24614854978913847, "grad_norm": 0.2734375, "learning_rate": 0.00018726733340913797, "loss": 0.9065, "step": 4290 }, { "epoch": 0.24643543621080413, "grad_norm": 0.271484375, "learning_rate": 0.00018721838772004157, "loss": 0.9641, "step": 4295 }, { "epoch": 0.2467223226324698, "grad_norm": 0.271484375, "learning_rate": 0.0001871693545569976, "loss": 0.9768, "step": 4300 }, { "epoch": 0.24700920905413548, "grad_norm": 0.26953125, "learning_rate": 0.00018712023396918293, "loss": 0.9004, "step": 4305 }, { "epoch": 0.24729609547580114, "grad_norm": 0.302734375, "learning_rate": 0.0001870710260058621, "loss": 0.9212, "step": 4310 }, { "epoch": 0.2475829818974668, "grad_norm": 0.275390625, "learning_rate": 0.00018702173071638716, "loss": 0.8944, "step": 4315 }, { "epoch": 0.24786986831913246, "grad_norm": 0.28515625, "learning_rate": 0.00018697234815019792, "loss": 1.0377, "step": 4320 }, { "epoch": 0.24815675474079812, "grad_norm": 0.29296875, "learning_rate": 0.00018692287835682164, "loss": 1.028, "step": 4325 }, { "epoch": 0.24844364116246379, "grad_norm": 0.28125, "learning_rate": 0.00018687332138587302, "loss": 0.9682, "step": 4330 }, { "epoch": 0.24873052758412945, "grad_norm": 0.265625, "learning_rate": 0.0001868236772870543, "loss": 0.9884, "step": 4335 }, { "epoch": 0.2490174140057951, "grad_norm": 0.283203125, "learning_rate": 0.00018677394611015498, "loss": 0.9923, "step": 4340 }, { "epoch": 0.24930430042746077, "grad_norm": 0.28515625, "learning_rate": 0.000186724127905052, "loss": 0.9725, "step": 4345 }, { "epoch": 0.24959118684912643, "grad_norm": 0.2734375, "learning_rate": 0.00018667422272170955, "loss": 0.9531, "step": 4350 }, { "epoch": 0.2498780732707921, "grad_norm": 0.2890625, "learning_rate": 0.00018662423061017896, "loss": 1.0103, "step": 4355 }, { "epoch": 0.2501649596924578, "grad_norm": 0.27734375, "learning_rate": 0.00018657415162059892, "loss": 1.0019, "step": 4360 }, { "epoch": 0.2504518461141234, "grad_norm": 0.27734375, "learning_rate": 0.0001865239858031951, "loss": 1.0196, "step": 4365 }, { "epoch": 0.2507387325357891, "grad_norm": 0.275390625, "learning_rate": 0.00018647373320828035, "loss": 0.974, "step": 4370 }, { "epoch": 0.25102561895745473, "grad_norm": 0.2578125, "learning_rate": 0.00018642339388625444, "loss": 0.9776, "step": 4375 }, { "epoch": 0.2513125053791204, "grad_norm": 0.283203125, "learning_rate": 0.0001863729678876043, "loss": 0.9649, "step": 4380 }, { "epoch": 0.25159939180078605, "grad_norm": 0.2890625, "learning_rate": 0.00018632245526290352, "loss": 1.0278, "step": 4385 }, { "epoch": 0.25188627822245174, "grad_norm": 0.259765625, "learning_rate": 0.0001862718560628129, "loss": 0.9772, "step": 4390 }, { "epoch": 0.2521731646441174, "grad_norm": 0.271484375, "learning_rate": 0.00018622117033807985, "loss": 0.9653, "step": 4395 }, { "epoch": 0.25246005106578306, "grad_norm": 0.28515625, "learning_rate": 0.00018617039813953855, "loss": 0.8763, "step": 4400 }, { "epoch": 0.2527469374874487, "grad_norm": 0.26953125, "learning_rate": 0.00018611953951811004, "loss": 1.0199, "step": 4405 }, { "epoch": 0.2530338239091144, "grad_norm": 0.26171875, "learning_rate": 0.00018606859452480193, "loss": 0.9409, "step": 4410 }, { "epoch": 0.25332071033078, "grad_norm": 0.27734375, "learning_rate": 0.00018601756321070845, "loss": 0.9849, "step": 4415 }, { "epoch": 0.2536075967524457, "grad_norm": 0.279296875, "learning_rate": 0.0001859664456270105, "loss": 0.9847, "step": 4420 }, { "epoch": 0.2538944831741114, "grad_norm": 0.255859375, "learning_rate": 0.00018591524182497547, "loss": 0.868, "step": 4425 }, { "epoch": 0.25418136959577703, "grad_norm": 0.28515625, "learning_rate": 0.0001858639518559571, "loss": 1.0035, "step": 4430 }, { "epoch": 0.2544682560174427, "grad_norm": 0.26171875, "learning_rate": 0.00018581257577139572, "loss": 0.9441, "step": 4435 }, { "epoch": 0.25475514243910835, "grad_norm": 0.27734375, "learning_rate": 0.00018576111362281794, "loss": 1.0191, "step": 4440 }, { "epoch": 0.25504202886077404, "grad_norm": 0.259765625, "learning_rate": 0.00018570956546183666, "loss": 1.0085, "step": 4445 }, { "epoch": 0.2553289152824397, "grad_norm": 0.294921875, "learning_rate": 0.00018565793134015115, "loss": 0.9635, "step": 4450 }, { "epoch": 0.25561580170410536, "grad_norm": 0.259765625, "learning_rate": 0.00018560621130954674, "loss": 1.0266, "step": 4455 }, { "epoch": 0.255902688125771, "grad_norm": 0.244140625, "learning_rate": 0.00018555440542189508, "loss": 0.946, "step": 4460 }, { "epoch": 0.2561895745474367, "grad_norm": 0.271484375, "learning_rate": 0.00018550251372915382, "loss": 1.0188, "step": 4465 }, { "epoch": 0.2564764609691023, "grad_norm": 0.251953125, "learning_rate": 0.00018545053628336668, "loss": 0.9303, "step": 4470 }, { "epoch": 0.256763347390768, "grad_norm": 0.2578125, "learning_rate": 0.00018539847313666345, "loss": 0.9652, "step": 4475 }, { "epoch": 0.25705023381243364, "grad_norm": 0.271484375, "learning_rate": 0.00018534632434125982, "loss": 0.91, "step": 4480 }, { "epoch": 0.2573371202340993, "grad_norm": 0.2890625, "learning_rate": 0.00018529408994945738, "loss": 1.0394, "step": 4485 }, { "epoch": 0.25762400665576496, "grad_norm": 0.275390625, "learning_rate": 0.0001852417700136436, "loss": 0.9498, "step": 4490 }, { "epoch": 0.25791089307743065, "grad_norm": 0.265625, "learning_rate": 0.00018518936458629165, "loss": 0.9552, "step": 4495 }, { "epoch": 0.2581977794990963, "grad_norm": 0.25390625, "learning_rate": 0.00018513687371996058, "loss": 0.9728, "step": 4500 }, { "epoch": 0.25848466592076197, "grad_norm": 0.25390625, "learning_rate": 0.000185084297467295, "loss": 0.9045, "step": 4505 }, { "epoch": 0.25877155234242766, "grad_norm": 0.28125, "learning_rate": 0.0001850316358810253, "loss": 0.9942, "step": 4510 }, { "epoch": 0.2590584387640933, "grad_norm": 0.298828125, "learning_rate": 0.0001849788890139673, "loss": 0.9677, "step": 4515 }, { "epoch": 0.259345325185759, "grad_norm": 0.251953125, "learning_rate": 0.00018492605691902242, "loss": 0.9934, "step": 4520 }, { "epoch": 0.2596322116074246, "grad_norm": 0.265625, "learning_rate": 0.00018487313964917761, "loss": 0.9579, "step": 4525 }, { "epoch": 0.2599190980290903, "grad_norm": 0.27734375, "learning_rate": 0.00018482013725750512, "loss": 0.9803, "step": 4530 }, { "epoch": 0.26020598445075593, "grad_norm": 0.26953125, "learning_rate": 0.00018476704979716275, "loss": 1.0439, "step": 4535 }, { "epoch": 0.2604928708724216, "grad_norm": 0.265625, "learning_rate": 0.00018471387732139344, "loss": 0.9879, "step": 4540 }, { "epoch": 0.26077975729408726, "grad_norm": 0.263671875, "learning_rate": 0.00018466061988352546, "loss": 1.0047, "step": 4545 }, { "epoch": 0.26106664371575294, "grad_norm": 0.275390625, "learning_rate": 0.00018460727753697234, "loss": 0.9634, "step": 4550 }, { "epoch": 0.2613535301374186, "grad_norm": 0.287109375, "learning_rate": 0.00018455385033523268, "loss": 0.9289, "step": 4555 }, { "epoch": 0.26164041655908427, "grad_norm": 0.2734375, "learning_rate": 0.00018450033833189027, "loss": 0.9254, "step": 4560 }, { "epoch": 0.2619273029807499, "grad_norm": 0.26953125, "learning_rate": 0.0001844467415806139, "loss": 0.9683, "step": 4565 }, { "epoch": 0.2622141894024156, "grad_norm": 0.263671875, "learning_rate": 0.00018439306013515733, "loss": 0.973, "step": 4570 }, { "epoch": 0.2625010758240812, "grad_norm": 0.28125, "learning_rate": 0.00018433929404935935, "loss": 0.9929, "step": 4575 }, { "epoch": 0.2627879622457469, "grad_norm": 0.263671875, "learning_rate": 0.00018428544337714358, "loss": 1.0327, "step": 4580 }, { "epoch": 0.2630748486674126, "grad_norm": 0.27734375, "learning_rate": 0.00018423150817251845, "loss": 0.9909, "step": 4585 }, { "epoch": 0.26336173508907823, "grad_norm": 0.2890625, "learning_rate": 0.0001841774884895772, "loss": 0.9784, "step": 4590 }, { "epoch": 0.2636486215107439, "grad_norm": 0.27734375, "learning_rate": 0.00018412338438249782, "loss": 1.0034, "step": 4595 }, { "epoch": 0.26393550793240955, "grad_norm": 0.2421875, "learning_rate": 0.00018406919590554296, "loss": 0.9409, "step": 4600 }, { "epoch": 0.26422239435407524, "grad_norm": 0.265625, "learning_rate": 0.00018401492311305985, "loss": 0.9357, "step": 4605 }, { "epoch": 0.2645092807757409, "grad_norm": 0.275390625, "learning_rate": 0.00018396056605948032, "loss": 0.9606, "step": 4610 }, { "epoch": 0.26479616719740656, "grad_norm": 0.26953125, "learning_rate": 0.00018390612479932066, "loss": 1.01, "step": 4615 }, { "epoch": 0.2650830536190722, "grad_norm": 0.31640625, "learning_rate": 0.00018385159938718172, "loss": 1.0116, "step": 4620 }, { "epoch": 0.2653699400407379, "grad_norm": 0.259765625, "learning_rate": 0.00018379698987774858, "loss": 0.9924, "step": 4625 }, { "epoch": 0.2656568264624035, "grad_norm": 0.2734375, "learning_rate": 0.00018374229632579087, "loss": 0.9351, "step": 4630 }, { "epoch": 0.2659437128840692, "grad_norm": 0.29296875, "learning_rate": 0.00018368751878616234, "loss": 0.9667, "step": 4635 }, { "epoch": 0.26623059930573484, "grad_norm": 0.265625, "learning_rate": 0.00018363265731380102, "loss": 0.9186, "step": 4640 }, { "epoch": 0.2665174857274005, "grad_norm": 0.3046875, "learning_rate": 0.00018357771196372916, "loss": 0.9553, "step": 4645 }, { "epoch": 0.26680437214906616, "grad_norm": 0.2578125, "learning_rate": 0.00018352268279105314, "loss": 0.9259, "step": 4650 }, { "epoch": 0.26709125857073185, "grad_norm": 0.275390625, "learning_rate": 0.0001834675698509633, "loss": 0.9481, "step": 4655 }, { "epoch": 0.26737814499239754, "grad_norm": 0.263671875, "learning_rate": 0.0001834123731987341, "loss": 0.9682, "step": 4660 }, { "epoch": 0.26766503141406317, "grad_norm": 0.283203125, "learning_rate": 0.00018335709288972395, "loss": 0.9577, "step": 4665 }, { "epoch": 0.26795191783572886, "grad_norm": 0.28125, "learning_rate": 0.00018330172897937513, "loss": 0.92, "step": 4670 }, { "epoch": 0.2682388042573945, "grad_norm": 0.2734375, "learning_rate": 0.00018324628152321373, "loss": 0.9268, "step": 4675 }, { "epoch": 0.2685256906790602, "grad_norm": 0.283203125, "learning_rate": 0.00018319075057684968, "loss": 1.0078, "step": 4680 }, { "epoch": 0.2688125771007258, "grad_norm": 0.255859375, "learning_rate": 0.0001831351361959767, "loss": 0.9208, "step": 4685 }, { "epoch": 0.2690994635223915, "grad_norm": 0.287109375, "learning_rate": 0.0001830794384363721, "loss": 0.9469, "step": 4690 }, { "epoch": 0.26938634994405714, "grad_norm": 0.27734375, "learning_rate": 0.00018302365735389678, "loss": 1.0203, "step": 4695 }, { "epoch": 0.2696732363657228, "grad_norm": 0.28125, "learning_rate": 0.00018296779300449535, "loss": 0.9883, "step": 4700 }, { "epoch": 0.26996012278738846, "grad_norm": 0.265625, "learning_rate": 0.00018291184544419578, "loss": 0.8768, "step": 4705 }, { "epoch": 0.27024700920905415, "grad_norm": 0.275390625, "learning_rate": 0.00018285581472910964, "loss": 1.0429, "step": 4710 }, { "epoch": 0.2705338956307198, "grad_norm": 0.2578125, "learning_rate": 0.00018279970091543174, "loss": 1.0038, "step": 4715 }, { "epoch": 0.27082078205238547, "grad_norm": 0.2734375, "learning_rate": 0.0001827435040594404, "loss": 0.9479, "step": 4720 }, { "epoch": 0.2711076684740511, "grad_norm": 0.275390625, "learning_rate": 0.00018268722421749703, "loss": 1.0259, "step": 4725 }, { "epoch": 0.2713945548957168, "grad_norm": 0.265625, "learning_rate": 0.0001826308614460465, "loss": 0.9755, "step": 4730 }, { "epoch": 0.2716814413173824, "grad_norm": 0.302734375, "learning_rate": 0.00018257441580161663, "loss": 0.9102, "step": 4735 }, { "epoch": 0.2719683277390481, "grad_norm": 0.267578125, "learning_rate": 0.00018251788734081849, "loss": 0.9596, "step": 4740 }, { "epoch": 0.2722552141607138, "grad_norm": 0.265625, "learning_rate": 0.0001824612761203462, "loss": 0.9722, "step": 4745 }, { "epoch": 0.27254210058237943, "grad_norm": 0.2578125, "learning_rate": 0.00018240458219697685, "loss": 1.0371, "step": 4750 }, { "epoch": 0.2728289870040451, "grad_norm": 0.310546875, "learning_rate": 0.00018234780562757045, "loss": 0.9661, "step": 4755 }, { "epoch": 0.27311587342571075, "grad_norm": 0.298828125, "learning_rate": 0.00018229094646906997, "loss": 1.0047, "step": 4760 }, { "epoch": 0.27340275984737644, "grad_norm": 0.2890625, "learning_rate": 0.00018223400477850117, "loss": 0.9734, "step": 4765 }, { "epoch": 0.2736896462690421, "grad_norm": 0.283203125, "learning_rate": 0.00018217698061297254, "loss": 0.9224, "step": 4770 }, { "epoch": 0.27397653269070776, "grad_norm": 0.283203125, "learning_rate": 0.00018211987402967536, "loss": 0.9819, "step": 4775 }, { "epoch": 0.2742634191123734, "grad_norm": 0.265625, "learning_rate": 0.0001820626850858836, "loss": 0.9491, "step": 4780 }, { "epoch": 0.2745503055340391, "grad_norm": 0.267578125, "learning_rate": 0.00018200541383895367, "loss": 0.94, "step": 4785 }, { "epoch": 0.2748371919557047, "grad_norm": 0.279296875, "learning_rate": 0.0001819480603463247, "loss": 0.9467, "step": 4790 }, { "epoch": 0.2751240783773704, "grad_norm": 0.28515625, "learning_rate": 0.00018189062466551824, "loss": 1.0405, "step": 4795 }, { "epoch": 0.27541096479903604, "grad_norm": 0.265625, "learning_rate": 0.0001818331068541382, "loss": 0.95, "step": 4800 }, { "epoch": 0.27569785122070173, "grad_norm": 0.2734375, "learning_rate": 0.00018177550696987096, "loss": 0.9674, "step": 4805 }, { "epoch": 0.27598473764236736, "grad_norm": 0.271484375, "learning_rate": 0.0001817178250704852, "loss": 0.9199, "step": 4810 }, { "epoch": 0.27627162406403305, "grad_norm": 0.2490234375, "learning_rate": 0.00018166006121383185, "loss": 0.9234, "step": 4815 }, { "epoch": 0.27655851048569874, "grad_norm": 0.294921875, "learning_rate": 0.00018160221545784392, "loss": 0.9653, "step": 4820 }, { "epoch": 0.27684539690736437, "grad_norm": 0.28125, "learning_rate": 0.00018154428786053677, "loss": 1.1679, "step": 4825 }, { "epoch": 0.27713228332903006, "grad_norm": 0.2734375, "learning_rate": 0.00018148627848000768, "loss": 0.9453, "step": 4830 }, { "epoch": 0.2774191697506957, "grad_norm": 0.2734375, "learning_rate": 0.00018142818737443603, "loss": 0.9289, "step": 4835 }, { "epoch": 0.2777060561723614, "grad_norm": 0.26171875, "learning_rate": 0.00018137001460208309, "loss": 1.0173, "step": 4840 }, { "epoch": 0.277992942594027, "grad_norm": 0.27734375, "learning_rate": 0.00018131176022129214, "loss": 0.9406, "step": 4845 }, { "epoch": 0.2782798290156927, "grad_norm": 0.26953125, "learning_rate": 0.00018125342429048825, "loss": 0.9349, "step": 4850 }, { "epoch": 0.27856671543735834, "grad_norm": 0.26953125, "learning_rate": 0.00018119500686817824, "loss": 0.9799, "step": 4855 }, { "epoch": 0.278853601859024, "grad_norm": 0.267578125, "learning_rate": 0.00018113650801295073, "loss": 0.9382, "step": 4860 }, { "epoch": 0.27914048828068966, "grad_norm": 0.26171875, "learning_rate": 0.000181077927783476, "loss": 0.9629, "step": 4865 }, { "epoch": 0.27942737470235535, "grad_norm": 0.26953125, "learning_rate": 0.00018101926623850586, "loss": 0.9367, "step": 4870 }, { "epoch": 0.279714261124021, "grad_norm": 0.390625, "learning_rate": 0.00018096052343687382, "loss": 0.9748, "step": 4875 }, { "epoch": 0.28000114754568667, "grad_norm": 0.275390625, "learning_rate": 0.00018090169943749476, "loss": 0.9586, "step": 4880 }, { "epoch": 0.2802880339673523, "grad_norm": 0.3046875, "learning_rate": 0.00018084279429936504, "loss": 1.0856, "step": 4885 }, { "epoch": 0.280574920389018, "grad_norm": 0.267578125, "learning_rate": 0.00018078380808156245, "loss": 0.9844, "step": 4890 }, { "epoch": 0.2808618068106836, "grad_norm": 0.326171875, "learning_rate": 0.00018072474084324593, "loss": 0.9757, "step": 4895 }, { "epoch": 0.2811486932323493, "grad_norm": 0.2578125, "learning_rate": 0.00018066559264365593, "loss": 1.0071, "step": 4900 }, { "epoch": 0.281435579654015, "grad_norm": 0.294921875, "learning_rate": 0.00018060636354211385, "loss": 0.9564, "step": 4905 }, { "epoch": 0.28172246607568063, "grad_norm": 0.26953125, "learning_rate": 0.0001805470535980224, "loss": 0.964, "step": 4910 }, { "epoch": 0.2820093524973463, "grad_norm": 0.255859375, "learning_rate": 0.0001804876628708653, "loss": 0.9423, "step": 4915 }, { "epoch": 0.28229623891901195, "grad_norm": 0.291015625, "learning_rate": 0.00018042819142020727, "loss": 1.0061, "step": 4920 }, { "epoch": 0.28258312534067764, "grad_norm": 0.2734375, "learning_rate": 0.00018036863930569408, "loss": 0.9712, "step": 4925 }, { "epoch": 0.2828700117623433, "grad_norm": 0.287109375, "learning_rate": 0.00018030900658705227, "loss": 0.9819, "step": 4930 }, { "epoch": 0.28315689818400896, "grad_norm": 0.251953125, "learning_rate": 0.00018024929332408933, "loss": 0.9329, "step": 4935 }, { "epoch": 0.2834437846056746, "grad_norm": 0.26953125, "learning_rate": 0.00018018949957669347, "loss": 0.9663, "step": 4940 }, { "epoch": 0.2837306710273403, "grad_norm": 0.28515625, "learning_rate": 0.00018012962540483364, "loss": 0.9632, "step": 4945 }, { "epoch": 0.2840175574490059, "grad_norm": 0.279296875, "learning_rate": 0.00018006967086855948, "loss": 0.9709, "step": 4950 }, { "epoch": 0.2843044438706716, "grad_norm": 0.26171875, "learning_rate": 0.00018000963602800117, "loss": 1.0216, "step": 4955 }, { "epoch": 0.28459133029233724, "grad_norm": 0.263671875, "learning_rate": 0.00017994952094336946, "loss": 0.9454, "step": 4960 }, { "epoch": 0.28487821671400293, "grad_norm": 0.28125, "learning_rate": 0.0001798893256749556, "loss": 0.9857, "step": 4965 }, { "epoch": 0.28516510313566856, "grad_norm": 0.25390625, "learning_rate": 0.0001798290502831312, "loss": 0.9288, "step": 4970 }, { "epoch": 0.28545198955733425, "grad_norm": 0.263671875, "learning_rate": 0.0001797686948283483, "loss": 0.9603, "step": 4975 }, { "epoch": 0.28573887597899994, "grad_norm": 0.2578125, "learning_rate": 0.00017970825937113923, "loss": 0.9366, "step": 4980 }, { "epoch": 0.2860257624006656, "grad_norm": 0.25, "learning_rate": 0.00017964774397211643, "loss": 0.9745, "step": 4985 }, { "epoch": 0.28631264882233126, "grad_norm": 0.26953125, "learning_rate": 0.00017958714869197273, "loss": 0.9843, "step": 4990 }, { "epoch": 0.2865995352439969, "grad_norm": 0.26953125, "learning_rate": 0.00017952647359148087, "loss": 0.9778, "step": 4995 }, { "epoch": 0.2868864216656626, "grad_norm": 0.271484375, "learning_rate": 0.00017946571873149377, "loss": 0.9616, "step": 5000 }, { "epoch": 0.2871733080873282, "grad_norm": 0.2734375, "learning_rate": 0.00017940488417294437, "loss": 1.0831, "step": 5005 }, { "epoch": 0.2874601945089939, "grad_norm": 0.275390625, "learning_rate": 0.00017934396997684537, "loss": 0.9116, "step": 5010 }, { "epoch": 0.28774708093065954, "grad_norm": 0.263671875, "learning_rate": 0.00017928297620428953, "loss": 0.9857, "step": 5015 }, { "epoch": 0.2880339673523252, "grad_norm": 0.275390625, "learning_rate": 0.00017922190291644934, "loss": 0.9836, "step": 5020 }, { "epoch": 0.28832085377399086, "grad_norm": 0.255859375, "learning_rate": 0.00017916075017457698, "loss": 0.9527, "step": 5025 }, { "epoch": 0.28860774019565655, "grad_norm": 0.251953125, "learning_rate": 0.00017909951804000445, "loss": 0.9726, "step": 5030 }, { "epoch": 0.2888946266173222, "grad_norm": 0.267578125, "learning_rate": 0.0001790382065741432, "loss": 0.993, "step": 5035 }, { "epoch": 0.28918151303898787, "grad_norm": 0.265625, "learning_rate": 0.00017897681583848449, "loss": 0.9989, "step": 5040 }, { "epoch": 0.2894683994606535, "grad_norm": 0.27734375, "learning_rate": 0.00017891534589459883, "loss": 0.9973, "step": 5045 }, { "epoch": 0.2897552858823192, "grad_norm": 0.265625, "learning_rate": 0.00017885379680413627, "loss": 0.9504, "step": 5050 }, { "epoch": 0.2900421723039849, "grad_norm": 0.283203125, "learning_rate": 0.0001787921686288263, "loss": 0.9392, "step": 5055 }, { "epoch": 0.2903290587256505, "grad_norm": 0.294921875, "learning_rate": 0.00017873046143047767, "loss": 0.9245, "step": 5060 }, { "epoch": 0.2906159451473162, "grad_norm": 0.255859375, "learning_rate": 0.00017866867527097837, "loss": 0.9887, "step": 5065 }, { "epoch": 0.29090283156898183, "grad_norm": 0.2734375, "learning_rate": 0.0001786068102122956, "loss": 0.9696, "step": 5070 }, { "epoch": 0.2911897179906475, "grad_norm": 0.27734375, "learning_rate": 0.00017854486631647569, "loss": 0.9479, "step": 5075 }, { "epoch": 0.29147660441231316, "grad_norm": 0.26953125, "learning_rate": 0.00017848284364564406, "loss": 0.939, "step": 5080 }, { "epoch": 0.29176349083397884, "grad_norm": 0.275390625, "learning_rate": 0.00017842074226200505, "loss": 1.0141, "step": 5085 }, { "epoch": 0.2920503772556445, "grad_norm": 0.25390625, "learning_rate": 0.0001783585622278421, "loss": 0.8981, "step": 5090 }, { "epoch": 0.29233726367731017, "grad_norm": 0.28125, "learning_rate": 0.00017829630360551737, "loss": 0.9683, "step": 5095 }, { "epoch": 0.2926241500989758, "grad_norm": 0.275390625, "learning_rate": 0.0001782339664574719, "loss": 0.9518, "step": 5100 }, { "epoch": 0.2929110365206415, "grad_norm": 0.298828125, "learning_rate": 0.00017817155084622562, "loss": 0.9627, "step": 5105 }, { "epoch": 0.2931979229423071, "grad_norm": 0.2734375, "learning_rate": 0.00017810905683437683, "loss": 0.9485, "step": 5110 }, { "epoch": 0.2934848093639728, "grad_norm": 0.294921875, "learning_rate": 0.0001780464844846028, "loss": 1.04, "step": 5115 }, { "epoch": 0.29377169578563844, "grad_norm": 0.259765625, "learning_rate": 0.00017798383385965918, "loss": 0.9799, "step": 5120 }, { "epoch": 0.29405858220730413, "grad_norm": 0.291015625, "learning_rate": 0.00017792110502238016, "loss": 0.9765, "step": 5125 }, { "epoch": 0.29434546862896976, "grad_norm": 0.25390625, "learning_rate": 0.0001778582980356784, "loss": 0.9816, "step": 5130 }, { "epoch": 0.29463235505063545, "grad_norm": 0.28125, "learning_rate": 0.00017779541296254487, "loss": 1.0155, "step": 5135 }, { "epoch": 0.29491924147230114, "grad_norm": 0.271484375, "learning_rate": 0.00017773244986604895, "loss": 0.9594, "step": 5140 }, { "epoch": 0.2952061278939668, "grad_norm": 0.298828125, "learning_rate": 0.00017766940880933825, "loss": 0.9479, "step": 5145 }, { "epoch": 0.29549301431563246, "grad_norm": 0.26171875, "learning_rate": 0.00017760628985563845, "loss": 1.0054, "step": 5150 }, { "epoch": 0.2957799007372981, "grad_norm": 0.2578125, "learning_rate": 0.00017754309306825357, "loss": 0.8605, "step": 5155 }, { "epoch": 0.2960667871589638, "grad_norm": 0.275390625, "learning_rate": 0.00017747981851056548, "loss": 0.9471, "step": 5160 }, { "epoch": 0.2963536735806294, "grad_norm": 0.26953125, "learning_rate": 0.00017741646624603417, "loss": 0.9771, "step": 5165 }, { "epoch": 0.2966405600022951, "grad_norm": 0.279296875, "learning_rate": 0.00017735303633819753, "loss": 1.0142, "step": 5170 }, { "epoch": 0.29692744642396074, "grad_norm": 0.267578125, "learning_rate": 0.00017728952885067133, "loss": 0.9662, "step": 5175 }, { "epoch": 0.2972143328456264, "grad_norm": 0.271484375, "learning_rate": 0.00017722594384714916, "loss": 0.9515, "step": 5180 }, { "epoch": 0.29750121926729206, "grad_norm": 0.2470703125, "learning_rate": 0.00017716228139140228, "loss": 0.969, "step": 5185 }, { "epoch": 0.29778810568895775, "grad_norm": 0.2578125, "learning_rate": 0.00017709854154727975, "loss": 1.003, "step": 5190 }, { "epoch": 0.2980749921106234, "grad_norm": 0.322265625, "learning_rate": 0.00017703472437870813, "loss": 1.0214, "step": 5195 }, { "epoch": 0.29836187853228907, "grad_norm": 0.279296875, "learning_rate": 0.00017697082994969158, "loss": 1.018, "step": 5200 }, { "epoch": 0.2986487649539547, "grad_norm": 0.28125, "learning_rate": 0.0001769068583243118, "loss": 0.9132, "step": 5205 }, { "epoch": 0.2989356513756204, "grad_norm": 0.267578125, "learning_rate": 0.0001768428095667278, "loss": 0.905, "step": 5210 }, { "epoch": 0.2992225377972861, "grad_norm": 0.283203125, "learning_rate": 0.00017677868374117606, "loss": 0.9723, "step": 5215 }, { "epoch": 0.2995094242189517, "grad_norm": 0.267578125, "learning_rate": 0.00017671448091197026, "loss": 1.0017, "step": 5220 }, { "epoch": 0.2997963106406174, "grad_norm": 0.28515625, "learning_rate": 0.00017665020114350136, "loss": 0.9476, "step": 5225 }, { "epoch": 0.30008319706228304, "grad_norm": 0.26171875, "learning_rate": 0.00017658584450023747, "loss": 1.0022, "step": 5230 }, { "epoch": 0.3003700834839487, "grad_norm": 0.30078125, "learning_rate": 0.0001765214110467238, "loss": 0.954, "step": 5235 }, { "epoch": 0.30065696990561436, "grad_norm": 0.265625, "learning_rate": 0.00017645690084758267, "loss": 0.9476, "step": 5240 }, { "epoch": 0.30094385632728005, "grad_norm": 0.28515625, "learning_rate": 0.00017639231396751322, "loss": 0.9857, "step": 5245 }, { "epoch": 0.3012307427489457, "grad_norm": 0.26171875, "learning_rate": 0.00017632765047129157, "loss": 0.9455, "step": 5250 }, { "epoch": 0.30151762917061137, "grad_norm": 0.275390625, "learning_rate": 0.00017626291042377077, "loss": 0.9524, "step": 5255 }, { "epoch": 0.301804515592277, "grad_norm": 0.283203125, "learning_rate": 0.00017619809388988049, "loss": 0.9726, "step": 5260 }, { "epoch": 0.3020914020139427, "grad_norm": 0.26171875, "learning_rate": 0.00017613320093462723, "loss": 0.9399, "step": 5265 }, { "epoch": 0.3023782884356083, "grad_norm": 0.25390625, "learning_rate": 0.00017606823162309406, "loss": 0.9723, "step": 5270 }, { "epoch": 0.302665174857274, "grad_norm": 0.2451171875, "learning_rate": 0.00017600318602044066, "loss": 1.014, "step": 5275 }, { "epoch": 0.30295206127893964, "grad_norm": 0.271484375, "learning_rate": 0.00017593806419190325, "loss": 0.9293, "step": 5280 }, { "epoch": 0.30323894770060533, "grad_norm": 0.2734375, "learning_rate": 0.00017587286620279443, "loss": 0.9419, "step": 5285 }, { "epoch": 0.30352583412227097, "grad_norm": 0.279296875, "learning_rate": 0.00017580759211850323, "loss": 0.921, "step": 5290 }, { "epoch": 0.30381272054393665, "grad_norm": 0.265625, "learning_rate": 0.00017574224200449506, "loss": 0.9794, "step": 5295 }, { "epoch": 0.30409960696560234, "grad_norm": 0.2734375, "learning_rate": 0.00017567681592631145, "loss": 0.9639, "step": 5300 }, { "epoch": 0.304386493387268, "grad_norm": 0.27734375, "learning_rate": 0.00017561131394957022, "loss": 0.9969, "step": 5305 }, { "epoch": 0.30467337980893366, "grad_norm": 0.2734375, "learning_rate": 0.00017554573613996524, "loss": 0.9556, "step": 5310 }, { "epoch": 0.3049602662305993, "grad_norm": 0.32421875, "learning_rate": 0.00017548008256326655, "loss": 1.0006, "step": 5315 }, { "epoch": 0.305247152652265, "grad_norm": 0.279296875, "learning_rate": 0.00017541435328531996, "loss": 0.9816, "step": 5320 }, { "epoch": 0.3055340390739306, "grad_norm": 0.28515625, "learning_rate": 0.00017534854837204745, "loss": 0.9844, "step": 5325 }, { "epoch": 0.3058209254955963, "grad_norm": 0.259765625, "learning_rate": 0.00017528266788944676, "loss": 0.9498, "step": 5330 }, { "epoch": 0.30610781191726194, "grad_norm": 0.265625, "learning_rate": 0.00017521671190359132, "loss": 0.9057, "step": 5335 }, { "epoch": 0.30639469833892763, "grad_norm": 0.259765625, "learning_rate": 0.00017515068048063048, "loss": 0.9337, "step": 5340 }, { "epoch": 0.30668158476059326, "grad_norm": 0.296875, "learning_rate": 0.00017508457368678904, "loss": 0.9693, "step": 5345 }, { "epoch": 0.30696847118225895, "grad_norm": 0.271484375, "learning_rate": 0.00017501839158836756, "loss": 0.9641, "step": 5350 }, { "epoch": 0.3072553576039246, "grad_norm": 0.26171875, "learning_rate": 0.00017495213425174205, "loss": 0.9165, "step": 5355 }, { "epoch": 0.3075422440255903, "grad_norm": 0.296875, "learning_rate": 0.000174885801743364, "loss": 0.9796, "step": 5360 }, { "epoch": 0.3078291304472559, "grad_norm": 0.26953125, "learning_rate": 0.00017481939412976024, "loss": 0.9207, "step": 5365 }, { "epoch": 0.3081160168689216, "grad_norm": 0.279296875, "learning_rate": 0.00017475291147753299, "loss": 0.9668, "step": 5370 }, { "epoch": 0.3084029032905873, "grad_norm": 0.25390625, "learning_rate": 0.0001746863538533597, "loss": 0.9248, "step": 5375 }, { "epoch": 0.3086897897122529, "grad_norm": 0.27734375, "learning_rate": 0.000174619721323993, "loss": 0.9274, "step": 5380 }, { "epoch": 0.3089766761339186, "grad_norm": 0.259765625, "learning_rate": 0.0001745530139562607, "loss": 0.9341, "step": 5385 }, { "epoch": 0.30926356255558424, "grad_norm": 0.263671875, "learning_rate": 0.0001744862318170656, "loss": 0.9427, "step": 5390 }, { "epoch": 0.3095504489772499, "grad_norm": 0.28125, "learning_rate": 0.00017441937497338552, "loss": 0.9867, "step": 5395 }, { "epoch": 0.30983733539891556, "grad_norm": 0.265625, "learning_rate": 0.0001743524434922732, "loss": 0.9701, "step": 5400 }, { "epoch": 0.31012422182058125, "grad_norm": 0.26953125, "learning_rate": 0.00017428543744085623, "loss": 1.0206, "step": 5405 }, { "epoch": 0.3104111082422469, "grad_norm": 0.28125, "learning_rate": 0.00017421835688633704, "loss": 0.9761, "step": 5410 }, { "epoch": 0.31069799466391257, "grad_norm": 0.271484375, "learning_rate": 0.0001741512018959927, "loss": 1.0416, "step": 5415 }, { "epoch": 0.3109848810855782, "grad_norm": 0.28125, "learning_rate": 0.00017408397253717496, "loss": 1.0221, "step": 5420 }, { "epoch": 0.3112717675072439, "grad_norm": 0.271484375, "learning_rate": 0.0001740166688773102, "loss": 0.9932, "step": 5425 }, { "epoch": 0.3115586539289095, "grad_norm": 0.267578125, "learning_rate": 0.00017394929098389929, "loss": 0.9938, "step": 5430 }, { "epoch": 0.3118455403505752, "grad_norm": 0.267578125, "learning_rate": 0.00017388183892451755, "loss": 0.9856, "step": 5435 }, { "epoch": 0.31213242677224085, "grad_norm": 0.296875, "learning_rate": 0.00017381431276681464, "loss": 0.9411, "step": 5440 }, { "epoch": 0.31241931319390653, "grad_norm": 0.271484375, "learning_rate": 0.0001737467125785146, "loss": 0.9931, "step": 5445 }, { "epoch": 0.3127061996155722, "grad_norm": 0.298828125, "learning_rate": 0.0001736790384274157, "loss": 1.0169, "step": 5450 }, { "epoch": 0.31299308603723786, "grad_norm": 0.2734375, "learning_rate": 0.00017361129038139038, "loss": 1.0065, "step": 5455 }, { "epoch": 0.31327997245890354, "grad_norm": 0.26953125, "learning_rate": 0.0001735434685083852, "loss": 0.9399, "step": 5460 }, { "epoch": 0.3135668588805692, "grad_norm": 0.279296875, "learning_rate": 0.00017347557287642076, "loss": 1.0187, "step": 5465 }, { "epoch": 0.31385374530223487, "grad_norm": 0.26171875, "learning_rate": 0.00017340760355359161, "loss": 0.9296, "step": 5470 }, { "epoch": 0.3141406317239005, "grad_norm": 0.279296875, "learning_rate": 0.0001733395606080663, "loss": 0.9404, "step": 5475 }, { "epoch": 0.3144275181455662, "grad_norm": 0.28125, "learning_rate": 0.00017327144410808707, "loss": 0.9883, "step": 5480 }, { "epoch": 0.3147144045672318, "grad_norm": 0.265625, "learning_rate": 0.00017320325412197, "loss": 0.9034, "step": 5485 }, { "epoch": 0.3150012909888975, "grad_norm": 0.267578125, "learning_rate": 0.00017313499071810497, "loss": 0.9432, "step": 5490 }, { "epoch": 0.31528817741056314, "grad_norm": 0.2734375, "learning_rate": 0.00017306665396495534, "loss": 1.0101, "step": 5495 }, { "epoch": 0.31557506383222883, "grad_norm": 0.2890625, "learning_rate": 0.0001729982439310581, "loss": 0.9325, "step": 5500 }, { "epoch": 0.31586195025389446, "grad_norm": 0.263671875, "learning_rate": 0.00017292976068502376, "loss": 0.9796, "step": 5505 }, { "epoch": 0.31614883667556015, "grad_norm": 0.28515625, "learning_rate": 0.0001728612042955362, "loss": 1.029, "step": 5510 }, { "epoch": 0.3164357230972258, "grad_norm": 0.24609375, "learning_rate": 0.00017279257483135272, "loss": 0.9203, "step": 5515 }, { "epoch": 0.3167226095188915, "grad_norm": 0.2890625, "learning_rate": 0.00017272387236130383, "loss": 0.9876, "step": 5520 }, { "epoch": 0.3170094959405571, "grad_norm": 0.259765625, "learning_rate": 0.00017265509695429335, "loss": 0.9264, "step": 5525 }, { "epoch": 0.3172963823622228, "grad_norm": 0.251953125, "learning_rate": 0.00017258624867929817, "loss": 0.972, "step": 5530 }, { "epoch": 0.3175832687838885, "grad_norm": 0.27734375, "learning_rate": 0.00017251732760536833, "loss": 0.9633, "step": 5535 }, { "epoch": 0.3178701552055541, "grad_norm": 0.259765625, "learning_rate": 0.00017244833380162687, "loss": 0.9094, "step": 5540 }, { "epoch": 0.3181570416272198, "grad_norm": 0.28125, "learning_rate": 0.0001723792673372697, "loss": 0.9311, "step": 5545 }, { "epoch": 0.31844392804888544, "grad_norm": 0.287109375, "learning_rate": 0.00017231012828156566, "loss": 0.9904, "step": 5550 }, { "epoch": 0.3187308144705511, "grad_norm": 0.275390625, "learning_rate": 0.00017224091670385642, "loss": 0.8683, "step": 5555 }, { "epoch": 0.31901770089221676, "grad_norm": 0.259765625, "learning_rate": 0.00017217163267355638, "loss": 0.9152, "step": 5560 }, { "epoch": 0.31930458731388245, "grad_norm": 0.275390625, "learning_rate": 0.00017210227626015252, "loss": 0.9702, "step": 5565 }, { "epoch": 0.3195914737355481, "grad_norm": 0.26953125, "learning_rate": 0.00017203284753320447, "loss": 0.9488, "step": 5570 }, { "epoch": 0.31987836015721377, "grad_norm": 0.279296875, "learning_rate": 0.00017196334656234446, "loss": 0.9592, "step": 5575 }, { "epoch": 0.3201652465788794, "grad_norm": 0.298828125, "learning_rate": 0.00017189377341727708, "loss": 0.9108, "step": 5580 }, { "epoch": 0.3204521330005451, "grad_norm": 0.279296875, "learning_rate": 0.00017182412816777931, "loss": 0.9298, "step": 5585 }, { "epoch": 0.3207390194222107, "grad_norm": 0.267578125, "learning_rate": 0.00017175441088370045, "loss": 0.9129, "step": 5590 }, { "epoch": 0.3210259058438764, "grad_norm": 0.2734375, "learning_rate": 0.00017168462163496214, "loss": 0.9493, "step": 5595 }, { "epoch": 0.32131279226554205, "grad_norm": 0.255859375, "learning_rate": 0.00017161476049155807, "loss": 0.9256, "step": 5600 }, { "epoch": 0.32159967868720774, "grad_norm": 0.26171875, "learning_rate": 0.00017154482752355406, "loss": 1.0146, "step": 5605 }, { "epoch": 0.3218865651088734, "grad_norm": 0.2578125, "learning_rate": 0.00017147482280108802, "loss": 0.9382, "step": 5610 }, { "epoch": 0.32217345153053906, "grad_norm": 0.259765625, "learning_rate": 0.00017140474639436981, "loss": 1.0042, "step": 5615 }, { "epoch": 0.32246033795220475, "grad_norm": 0.267578125, "learning_rate": 0.0001713345983736811, "loss": 0.9268, "step": 5620 }, { "epoch": 0.3227472243738704, "grad_norm": 0.291015625, "learning_rate": 0.00017126437880937557, "loss": 0.9968, "step": 5625 }, { "epoch": 0.32303411079553607, "grad_norm": 0.279296875, "learning_rate": 0.00017119408777187842, "loss": 0.9655, "step": 5630 }, { "epoch": 0.3233209972172017, "grad_norm": 0.263671875, "learning_rate": 0.00017112372533168672, "loss": 0.9814, "step": 5635 }, { "epoch": 0.3236078836388674, "grad_norm": 0.2734375, "learning_rate": 0.00017105329155936905, "loss": 0.9817, "step": 5640 }, { "epoch": 0.323894770060533, "grad_norm": 0.251953125, "learning_rate": 0.0001709827865255656, "loss": 0.9616, "step": 5645 }, { "epoch": 0.3241816564821987, "grad_norm": 0.25, "learning_rate": 0.0001709122103009879, "loss": 1.0141, "step": 5650 }, { "epoch": 0.32446854290386434, "grad_norm": 0.271484375, "learning_rate": 0.00017084156295641906, "loss": 1.0161, "step": 5655 }, { "epoch": 0.32475542932553003, "grad_norm": 0.265625, "learning_rate": 0.0001707708445627134, "loss": 0.9275, "step": 5660 }, { "epoch": 0.32504231574719566, "grad_norm": 0.2734375, "learning_rate": 0.00017070005519079652, "loss": 0.927, "step": 5665 }, { "epoch": 0.32532920216886135, "grad_norm": 0.283203125, "learning_rate": 0.00017062919491166523, "loss": 0.9895, "step": 5670 }, { "epoch": 0.325616088590527, "grad_norm": 0.279296875, "learning_rate": 0.00017055826379638742, "loss": 0.9869, "step": 5675 }, { "epoch": 0.3259029750121927, "grad_norm": 0.31640625, "learning_rate": 0.00017048726191610202, "loss": 0.9745, "step": 5680 }, { "epoch": 0.3261898614338583, "grad_norm": 0.275390625, "learning_rate": 0.00017041618934201904, "loss": 0.9473, "step": 5685 }, { "epoch": 0.326476747855524, "grad_norm": 0.265625, "learning_rate": 0.0001703450461454192, "loss": 1.0089, "step": 5690 }, { "epoch": 0.3267636342771897, "grad_norm": 0.2578125, "learning_rate": 0.00017027383239765422, "loss": 0.9712, "step": 5695 }, { "epoch": 0.3270505206988553, "grad_norm": 0.26171875, "learning_rate": 0.0001702025481701465, "loss": 0.9297, "step": 5700 }, { "epoch": 0.327337407120521, "grad_norm": 0.271484375, "learning_rate": 0.00017013119353438913, "loss": 0.9875, "step": 5705 }, { "epoch": 0.32762429354218664, "grad_norm": 0.259765625, "learning_rate": 0.00017005976856194582, "loss": 1.0192, "step": 5710 }, { "epoch": 0.32791117996385233, "grad_norm": 0.26953125, "learning_rate": 0.00016998827332445084, "loss": 0.927, "step": 5715 }, { "epoch": 0.32819806638551796, "grad_norm": 0.259765625, "learning_rate": 0.0001699167078936089, "loss": 0.8732, "step": 5720 }, { "epoch": 0.32848495280718365, "grad_norm": 0.2578125, "learning_rate": 0.0001698450723411951, "loss": 0.8891, "step": 5725 }, { "epoch": 0.3287718392288493, "grad_norm": 0.30078125, "learning_rate": 0.00016977336673905497, "loss": 0.9319, "step": 5730 }, { "epoch": 0.32905872565051497, "grad_norm": 0.3359375, "learning_rate": 0.00016970159115910417, "loss": 0.9684, "step": 5735 }, { "epoch": 0.3293456120721806, "grad_norm": 0.275390625, "learning_rate": 0.00016962974567332858, "loss": 0.9291, "step": 5740 }, { "epoch": 0.3296324984938463, "grad_norm": 0.283203125, "learning_rate": 0.00016955783035378424, "loss": 0.929, "step": 5745 }, { "epoch": 0.3299193849155119, "grad_norm": 0.279296875, "learning_rate": 0.00016948584527259715, "loss": 0.9864, "step": 5750 }, { "epoch": 0.3302062713371776, "grad_norm": 0.271484375, "learning_rate": 0.0001694137905019633, "loss": 0.9395, "step": 5755 }, { "epoch": 0.33049315775884325, "grad_norm": 0.259765625, "learning_rate": 0.00016934166611414867, "loss": 0.9835, "step": 5760 }, { "epoch": 0.33078004418050894, "grad_norm": 0.28515625, "learning_rate": 0.0001692694721814889, "loss": 0.971, "step": 5765 }, { "epoch": 0.3310669306021746, "grad_norm": 0.259765625, "learning_rate": 0.0001691972087763895, "loss": 0.9408, "step": 5770 }, { "epoch": 0.33135381702384026, "grad_norm": 0.267578125, "learning_rate": 0.0001691248759713256, "loss": 1.007, "step": 5775 }, { "epoch": 0.33164070344550595, "grad_norm": 0.25, "learning_rate": 0.00016905247383884196, "loss": 0.9664, "step": 5780 }, { "epoch": 0.3319275898671716, "grad_norm": 0.294921875, "learning_rate": 0.00016898000245155282, "loss": 0.9454, "step": 5785 }, { "epoch": 0.33221447628883727, "grad_norm": 0.287109375, "learning_rate": 0.000168907461882142, "loss": 0.9359, "step": 5790 }, { "epoch": 0.3325013627105029, "grad_norm": 0.296875, "learning_rate": 0.00016883485220336257, "loss": 0.9221, "step": 5795 }, { "epoch": 0.3327882491321686, "grad_norm": 0.27734375, "learning_rate": 0.00016876217348803693, "loss": 0.8946, "step": 5800 }, { "epoch": 0.3330751355538342, "grad_norm": 0.28125, "learning_rate": 0.00016868942580905677, "loss": 0.9294, "step": 5805 }, { "epoch": 0.3333620219754999, "grad_norm": 0.2890625, "learning_rate": 0.000168616609239383, "loss": 0.9598, "step": 5810 }, { "epoch": 0.33364890839716554, "grad_norm": 0.267578125, "learning_rate": 0.0001685437238520455, "loss": 1.0093, "step": 5815 }, { "epoch": 0.33393579481883123, "grad_norm": 0.265625, "learning_rate": 0.00016847076972014316, "loss": 1.0157, "step": 5820 }, { "epoch": 0.33422268124049687, "grad_norm": 0.283203125, "learning_rate": 0.00016839774691684395, "loss": 0.981, "step": 5825 }, { "epoch": 0.33450956766216255, "grad_norm": 0.25390625, "learning_rate": 0.00016832465551538465, "loss": 0.9531, "step": 5830 }, { "epoch": 0.3347964540838282, "grad_norm": 0.25390625, "learning_rate": 0.00016825149558907074, "loss": 0.9698, "step": 5835 }, { "epoch": 0.3350833405054939, "grad_norm": 0.259765625, "learning_rate": 0.0001681782672112766, "loss": 0.9621, "step": 5840 }, { "epoch": 0.33537022692715956, "grad_norm": 0.283203125, "learning_rate": 0.00016810497045544515, "loss": 1.0009, "step": 5845 }, { "epoch": 0.3356571133488252, "grad_norm": 0.265625, "learning_rate": 0.0001680316053950879, "loss": 0.9552, "step": 5850 }, { "epoch": 0.3359439997704909, "grad_norm": 0.263671875, "learning_rate": 0.00016795817210378487, "loss": 0.9678, "step": 5855 }, { "epoch": 0.3362308861921565, "grad_norm": 0.271484375, "learning_rate": 0.0001678846706551846, "loss": 0.9359, "step": 5860 }, { "epoch": 0.3365177726138222, "grad_norm": 0.255859375, "learning_rate": 0.00016781110112300377, "loss": 0.921, "step": 5865 }, { "epoch": 0.33680465903548784, "grad_norm": 0.259765625, "learning_rate": 0.0001677374635810276, "loss": 1.0211, "step": 5870 }, { "epoch": 0.33709154545715353, "grad_norm": 0.259765625, "learning_rate": 0.00016766375810310934, "loss": 0.9986, "step": 5875 }, { "epoch": 0.33737843187881916, "grad_norm": 0.259765625, "learning_rate": 0.00016758998476317042, "loss": 0.8914, "step": 5880 }, { "epoch": 0.33766531830048485, "grad_norm": 0.306640625, "learning_rate": 0.0001675161436352004, "loss": 0.9716, "step": 5885 }, { "epoch": 0.3379522047221505, "grad_norm": 0.26171875, "learning_rate": 0.0001674422347932567, "loss": 0.9695, "step": 5890 }, { "epoch": 0.3382390911438162, "grad_norm": 0.2734375, "learning_rate": 0.00016736825831146482, "loss": 0.9317, "step": 5895 }, { "epoch": 0.3385259775654818, "grad_norm": 0.26953125, "learning_rate": 0.00016729421426401787, "loss": 0.898, "step": 5900 }, { "epoch": 0.3388128639871475, "grad_norm": 0.279296875, "learning_rate": 0.000167220102725177, "loss": 0.9603, "step": 5905 }, { "epoch": 0.3390997504088131, "grad_norm": 0.275390625, "learning_rate": 0.0001671459237692708, "loss": 0.8899, "step": 5910 }, { "epoch": 0.3393866368304788, "grad_norm": 0.263671875, "learning_rate": 0.00016707167747069562, "loss": 1.0311, "step": 5915 }, { "epoch": 0.33967352325214445, "grad_norm": 0.25390625, "learning_rate": 0.0001669973639039153, "loss": 0.9362, "step": 5920 }, { "epoch": 0.33996040967381014, "grad_norm": 0.26953125, "learning_rate": 0.0001669229831434611, "loss": 1.0252, "step": 5925 }, { "epoch": 0.3402472960954758, "grad_norm": 0.275390625, "learning_rate": 0.00016684853526393185, "loss": 1.0254, "step": 5930 }, { "epoch": 0.34053418251714146, "grad_norm": 0.275390625, "learning_rate": 0.00016677402033999346, "loss": 0.9544, "step": 5935 }, { "epoch": 0.34082106893880715, "grad_norm": 0.263671875, "learning_rate": 0.00016669943844637924, "loss": 0.9675, "step": 5940 }, { "epoch": 0.3411079553604728, "grad_norm": 0.287109375, "learning_rate": 0.00016662478965788962, "loss": 1.0488, "step": 5945 }, { "epoch": 0.34139484178213847, "grad_norm": 0.294921875, "learning_rate": 0.00016655007404939212, "loss": 0.8801, "step": 5950 }, { "epoch": 0.3416817282038041, "grad_norm": 0.447265625, "learning_rate": 0.00016647529169582122, "loss": 0.9542, "step": 5955 }, { "epoch": 0.3419686146254698, "grad_norm": 0.255859375, "learning_rate": 0.00016640044267217846, "loss": 0.9236, "step": 5960 }, { "epoch": 0.3422555010471354, "grad_norm": 0.275390625, "learning_rate": 0.00016632552705353213, "loss": 1.0163, "step": 5965 }, { "epoch": 0.3425423874688011, "grad_norm": 0.275390625, "learning_rate": 0.00016625054491501738, "loss": 0.9787, "step": 5970 }, { "epoch": 0.34282927389046675, "grad_norm": 0.279296875, "learning_rate": 0.00016617549633183608, "loss": 1.0467, "step": 5975 }, { "epoch": 0.34311616031213243, "grad_norm": 0.263671875, "learning_rate": 0.00016610038137925668, "loss": 0.9085, "step": 5980 }, { "epoch": 0.34340304673379807, "grad_norm": 0.279296875, "learning_rate": 0.00016602520013261424, "loss": 0.9452, "step": 5985 }, { "epoch": 0.34368993315546376, "grad_norm": 0.2578125, "learning_rate": 0.0001659499526673103, "loss": 0.9731, "step": 5990 }, { "epoch": 0.3439768195771294, "grad_norm": 0.251953125, "learning_rate": 0.0001658746390588128, "loss": 1.0542, "step": 5995 }, { "epoch": 0.3442637059987951, "grad_norm": 0.28125, "learning_rate": 0.00016579925938265606, "loss": 0.9834, "step": 6000 }, { "epoch": 0.34455059242046077, "grad_norm": 0.28125, "learning_rate": 0.00016572381371444058, "loss": 0.9785, "step": 6005 }, { "epoch": 0.3448374788421264, "grad_norm": 0.2734375, "learning_rate": 0.0001656483021298331, "loss": 1.0366, "step": 6010 }, { "epoch": 0.3451243652637921, "grad_norm": 0.263671875, "learning_rate": 0.00016557272470456646, "loss": 0.9481, "step": 6015 }, { "epoch": 0.3454112516854577, "grad_norm": 0.294921875, "learning_rate": 0.00016549708151443956, "loss": 0.8768, "step": 6020 }, { "epoch": 0.3456981381071234, "grad_norm": 0.28515625, "learning_rate": 0.00016542137263531723, "loss": 0.9321, "step": 6025 }, { "epoch": 0.34598502452878904, "grad_norm": 0.27734375, "learning_rate": 0.00016534559814313017, "loss": 0.9894, "step": 6030 }, { "epoch": 0.34627191095045473, "grad_norm": 0.3046875, "learning_rate": 0.00016526975811387493, "loss": 1.0148, "step": 6035 }, { "epoch": 0.34655879737212036, "grad_norm": 0.279296875, "learning_rate": 0.00016519385262361372, "loss": 1.0106, "step": 6040 }, { "epoch": 0.34684568379378605, "grad_norm": 0.28515625, "learning_rate": 0.00016511788174847444, "loss": 0.9643, "step": 6045 }, { "epoch": 0.3471325702154517, "grad_norm": 0.263671875, "learning_rate": 0.0001650418455646506, "loss": 0.9491, "step": 6050 }, { "epoch": 0.3474194566371174, "grad_norm": 0.296875, "learning_rate": 0.00016496574414840117, "loss": 0.9549, "step": 6055 }, { "epoch": 0.347706343058783, "grad_norm": 0.267578125, "learning_rate": 0.00016488957757605056, "loss": 0.9358, "step": 6060 }, { "epoch": 0.3479932294804487, "grad_norm": 0.296875, "learning_rate": 0.0001648133459239885, "loss": 1.0463, "step": 6065 }, { "epoch": 0.34828011590211433, "grad_norm": 0.2734375, "learning_rate": 0.00016473704926867, "loss": 0.9985, "step": 6070 }, { "epoch": 0.34856700232378, "grad_norm": 0.265625, "learning_rate": 0.0001646606876866153, "loss": 0.8788, "step": 6075 }, { "epoch": 0.34885388874544565, "grad_norm": 0.294921875, "learning_rate": 0.00016458426125440974, "loss": 0.9631, "step": 6080 }, { "epoch": 0.34914077516711134, "grad_norm": 0.26953125, "learning_rate": 0.0001645077700487036, "loss": 1.0102, "step": 6085 }, { "epoch": 0.349427661588777, "grad_norm": 0.265625, "learning_rate": 0.00016443121414621236, "loss": 0.9978, "step": 6090 }, { "epoch": 0.34971454801044266, "grad_norm": 0.259765625, "learning_rate": 0.00016435459362371612, "loss": 1.0069, "step": 6095 }, { "epoch": 0.35000143443210835, "grad_norm": 0.275390625, "learning_rate": 0.00016427790855805995, "loss": 0.9336, "step": 6100 }, { "epoch": 0.350288320853774, "grad_norm": 0.2578125, "learning_rate": 0.00016420115902615365, "loss": 0.927, "step": 6105 }, { "epoch": 0.35057520727543967, "grad_norm": 0.294921875, "learning_rate": 0.00016412434510497157, "loss": 1.0225, "step": 6110 }, { "epoch": 0.3508620936971053, "grad_norm": 0.26171875, "learning_rate": 0.00016404746687155277, "loss": 1.0265, "step": 6115 }, { "epoch": 0.351148980118771, "grad_norm": 0.263671875, "learning_rate": 0.00016397052440300067, "loss": 0.9201, "step": 6120 }, { "epoch": 0.3514358665404366, "grad_norm": 0.271484375, "learning_rate": 0.00016389351777648325, "loss": 0.9222, "step": 6125 }, { "epoch": 0.3517227529621023, "grad_norm": 0.263671875, "learning_rate": 0.00016381644706923277, "loss": 0.9239, "step": 6130 }, { "epoch": 0.35200963938376795, "grad_norm": 0.26171875, "learning_rate": 0.00016373931235854573, "loss": 0.9759, "step": 6135 }, { "epoch": 0.35229652580543364, "grad_norm": 0.263671875, "learning_rate": 0.00016366211372178285, "loss": 0.9752, "step": 6140 }, { "epoch": 0.35258341222709927, "grad_norm": 0.265625, "learning_rate": 0.00016358485123636903, "loss": 0.9587, "step": 6145 }, { "epoch": 0.35287029864876496, "grad_norm": 0.2578125, "learning_rate": 0.00016350752497979308, "loss": 0.929, "step": 6150 }, { "epoch": 0.3531571850704306, "grad_norm": 0.248046875, "learning_rate": 0.00016343013502960786, "loss": 0.9936, "step": 6155 }, { "epoch": 0.3534440714920963, "grad_norm": 0.287109375, "learning_rate": 0.00016335268146343008, "loss": 0.9504, "step": 6160 }, { "epoch": 0.35373095791376197, "grad_norm": 0.2578125, "learning_rate": 0.00016327516435894025, "loss": 0.9697, "step": 6165 }, { "epoch": 0.3540178443354276, "grad_norm": 0.2734375, "learning_rate": 0.0001631975837938826, "loss": 1.0284, "step": 6170 }, { "epoch": 0.3543047307570933, "grad_norm": 0.283203125, "learning_rate": 0.00016311993984606505, "loss": 0.9574, "step": 6175 }, { "epoch": 0.3545916171787589, "grad_norm": 0.30078125, "learning_rate": 0.00016304223259335898, "loss": 1.0392, "step": 6180 }, { "epoch": 0.3548785036004246, "grad_norm": 0.2890625, "learning_rate": 0.00016296446211369942, "loss": 0.9209, "step": 6185 }, { "epoch": 0.35516539002209024, "grad_norm": 0.259765625, "learning_rate": 0.00016288662848508467, "loss": 0.9735, "step": 6190 }, { "epoch": 0.35545227644375593, "grad_norm": 0.267578125, "learning_rate": 0.00016280873178557643, "loss": 0.9182, "step": 6195 }, { "epoch": 0.35573916286542157, "grad_norm": 0.25390625, "learning_rate": 0.00016273077209329968, "loss": 0.8669, "step": 6200 }, { "epoch": 0.35602604928708725, "grad_norm": 0.27734375, "learning_rate": 0.0001626527494864425, "loss": 0.987, "step": 6205 }, { "epoch": 0.3563129357087529, "grad_norm": 0.294921875, "learning_rate": 0.0001625746640432561, "loss": 0.9733, "step": 6210 }, { "epoch": 0.3565998221304186, "grad_norm": 0.2890625, "learning_rate": 0.0001624965158420548, "loss": 0.9621, "step": 6215 }, { "epoch": 0.3568867085520842, "grad_norm": 0.265625, "learning_rate": 0.0001624183049612157, "loss": 0.9376, "step": 6220 }, { "epoch": 0.3571735949737499, "grad_norm": 0.2734375, "learning_rate": 0.00016234003147917888, "loss": 0.9817, "step": 6225 }, { "epoch": 0.35746048139541553, "grad_norm": 0.283203125, "learning_rate": 0.0001622616954744472, "loss": 1.0048, "step": 6230 }, { "epoch": 0.3577473678170812, "grad_norm": 0.26953125, "learning_rate": 0.00016218329702558616, "loss": 0.9605, "step": 6235 }, { "epoch": 0.3580342542387469, "grad_norm": 0.271484375, "learning_rate": 0.00016210483621122395, "loss": 0.9197, "step": 6240 }, { "epoch": 0.35832114066041254, "grad_norm": 0.26171875, "learning_rate": 0.00016202631311005124, "loss": 0.9409, "step": 6245 }, { "epoch": 0.35860802708207823, "grad_norm": 0.318359375, "learning_rate": 0.00016194772780082125, "loss": 0.9821, "step": 6250 }, { "epoch": 0.35889491350374386, "grad_norm": 0.2578125, "learning_rate": 0.0001618690803623496, "loss": 0.9944, "step": 6255 }, { "epoch": 0.35918179992540955, "grad_norm": 0.28125, "learning_rate": 0.00016179037087351406, "loss": 0.9923, "step": 6260 }, { "epoch": 0.3594686863470752, "grad_norm": 0.328125, "learning_rate": 0.00016171159941325483, "loss": 0.9718, "step": 6265 }, { "epoch": 0.35975557276874087, "grad_norm": 0.2734375, "learning_rate": 0.00016163276606057415, "loss": 0.9481, "step": 6270 }, { "epoch": 0.3600424591904065, "grad_norm": 0.3125, "learning_rate": 0.00016155387089453638, "loss": 0.945, "step": 6275 }, { "epoch": 0.3603293456120722, "grad_norm": 0.298828125, "learning_rate": 0.00016147491399426785, "loss": 0.9435, "step": 6280 }, { "epoch": 0.3606162320337378, "grad_norm": 0.2578125, "learning_rate": 0.0001613958954389568, "loss": 0.9478, "step": 6285 }, { "epoch": 0.3609031184554035, "grad_norm": 0.26171875, "learning_rate": 0.00016131681530785335, "loss": 0.9932, "step": 6290 }, { "epoch": 0.36119000487706915, "grad_norm": 0.271484375, "learning_rate": 0.00016123767368026929, "loss": 0.8874, "step": 6295 }, { "epoch": 0.36147689129873484, "grad_norm": 0.255859375, "learning_rate": 0.0001611584706355782, "loss": 0.9521, "step": 6300 }, { "epoch": 0.36176377772040047, "grad_norm": 0.275390625, "learning_rate": 0.0001610792062532152, "loss": 0.9425, "step": 6305 }, { "epoch": 0.36205066414206616, "grad_norm": 0.4140625, "learning_rate": 0.00016099988061267688, "loss": 0.9188, "step": 6310 }, { "epoch": 0.3623375505637318, "grad_norm": 0.263671875, "learning_rate": 0.00016092049379352132, "loss": 0.9339, "step": 6315 }, { "epoch": 0.3626244369853975, "grad_norm": 0.267578125, "learning_rate": 0.000160841045875368, "loss": 0.9773, "step": 6320 }, { "epoch": 0.36291132340706317, "grad_norm": 0.275390625, "learning_rate": 0.0001607615369378976, "loss": 0.993, "step": 6325 }, { "epoch": 0.3631982098287288, "grad_norm": 0.28515625, "learning_rate": 0.00016068196706085197, "loss": 0.9681, "step": 6330 }, { "epoch": 0.3634850962503945, "grad_norm": 0.263671875, "learning_rate": 0.00016060233632403422, "loss": 0.888, "step": 6335 }, { "epoch": 0.3637719826720601, "grad_norm": 0.291015625, "learning_rate": 0.0001605226448073084, "loss": 0.9844, "step": 6340 }, { "epoch": 0.3640588690937258, "grad_norm": 0.2890625, "learning_rate": 0.0001604428925905995, "loss": 0.975, "step": 6345 }, { "epoch": 0.36434575551539145, "grad_norm": 0.267578125, "learning_rate": 0.00016036307975389344, "loss": 0.8831, "step": 6350 }, { "epoch": 0.36463264193705713, "grad_norm": 0.2734375, "learning_rate": 0.00016028320637723694, "loss": 0.8748, "step": 6355 }, { "epoch": 0.36491952835872277, "grad_norm": 0.361328125, "learning_rate": 0.00016020327254073736, "loss": 0.9945, "step": 6360 }, { "epoch": 0.36520641478038846, "grad_norm": 0.29296875, "learning_rate": 0.0001601232783245628, "loss": 0.9287, "step": 6365 }, { "epoch": 0.3654933012020541, "grad_norm": 0.2578125, "learning_rate": 0.00016004322380894182, "loss": 0.9783, "step": 6370 }, { "epoch": 0.3657801876237198, "grad_norm": 0.2734375, "learning_rate": 0.00015996310907416355, "loss": 0.9824, "step": 6375 }, { "epoch": 0.3660670740453854, "grad_norm": 0.263671875, "learning_rate": 0.00015988293420057744, "loss": 1.0362, "step": 6380 }, { "epoch": 0.3663539604670511, "grad_norm": 0.28125, "learning_rate": 0.0001598026992685933, "loss": 0.9992, "step": 6385 }, { "epoch": 0.36664084688871673, "grad_norm": 0.259765625, "learning_rate": 0.00015972240435868117, "loss": 0.9615, "step": 6390 }, { "epoch": 0.3669277333103824, "grad_norm": 0.29296875, "learning_rate": 0.00015964204955137124, "loss": 0.9546, "step": 6395 }, { "epoch": 0.3672146197320481, "grad_norm": 0.26171875, "learning_rate": 0.00015956163492725372, "loss": 0.9229, "step": 6400 }, { "epoch": 0.36750150615371374, "grad_norm": 0.28125, "learning_rate": 0.00015948116056697888, "loss": 0.9666, "step": 6405 }, { "epoch": 0.36778839257537943, "grad_norm": 0.25, "learning_rate": 0.0001594006265512569, "loss": 0.9889, "step": 6410 }, { "epoch": 0.36807527899704506, "grad_norm": 0.265625, "learning_rate": 0.00015932003296085774, "loss": 0.9284, "step": 6415 }, { "epoch": 0.36836216541871075, "grad_norm": 0.2734375, "learning_rate": 0.00015923937987661116, "loss": 0.998, "step": 6420 }, { "epoch": 0.3686490518403764, "grad_norm": 0.265625, "learning_rate": 0.00015915866737940655, "loss": 0.9247, "step": 6425 }, { "epoch": 0.3689359382620421, "grad_norm": 0.26953125, "learning_rate": 0.00015907789555019296, "loss": 0.9529, "step": 6430 }, { "epoch": 0.3692228246837077, "grad_norm": 0.271484375, "learning_rate": 0.0001589970644699788, "loss": 0.9881, "step": 6435 }, { "epoch": 0.3695097111053734, "grad_norm": 0.263671875, "learning_rate": 0.00015891617421983205, "loss": 0.9, "step": 6440 }, { "epoch": 0.36979659752703903, "grad_norm": 0.283203125, "learning_rate": 0.00015883522488087994, "loss": 0.9568, "step": 6445 }, { "epoch": 0.3700834839487047, "grad_norm": 0.265625, "learning_rate": 0.00015875421653430903, "loss": 0.9365, "step": 6450 }, { "epoch": 0.37037037037037035, "grad_norm": 0.267578125, "learning_rate": 0.00015867314926136509, "loss": 0.9851, "step": 6455 }, { "epoch": 0.37065725679203604, "grad_norm": 0.26953125, "learning_rate": 0.0001585920231433528, "loss": 0.9373, "step": 6460 }, { "epoch": 0.37094414321370167, "grad_norm": 0.265625, "learning_rate": 0.00015851083826163607, "loss": 0.9481, "step": 6465 }, { "epoch": 0.37123102963536736, "grad_norm": 0.2734375, "learning_rate": 0.00015842959469763765, "loss": 0.9843, "step": 6470 }, { "epoch": 0.371517916057033, "grad_norm": 0.2734375, "learning_rate": 0.00015834829253283915, "loss": 0.9618, "step": 6475 }, { "epoch": 0.3718048024786987, "grad_norm": 0.2431640625, "learning_rate": 0.00015826693184878095, "loss": 0.9339, "step": 6480 }, { "epoch": 0.37209168890036437, "grad_norm": 0.279296875, "learning_rate": 0.00015818551272706217, "loss": 0.9379, "step": 6485 }, { "epoch": 0.37237857532203, "grad_norm": 0.30078125, "learning_rate": 0.00015810403524934042, "loss": 0.9895, "step": 6490 }, { "epoch": 0.3726654617436957, "grad_norm": 0.263671875, "learning_rate": 0.00015802249949733202, "loss": 1.0319, "step": 6495 }, { "epoch": 0.3729523481653613, "grad_norm": 0.275390625, "learning_rate": 0.00015794090555281155, "loss": 0.9238, "step": 6500 }, { "epoch": 0.373239234587027, "grad_norm": 0.2734375, "learning_rate": 0.00015785925349761208, "loss": 0.909, "step": 6505 }, { "epoch": 0.37352612100869265, "grad_norm": 0.271484375, "learning_rate": 0.00015777754341362487, "loss": 0.9754, "step": 6510 }, { "epoch": 0.37381300743035834, "grad_norm": 0.26171875, "learning_rate": 0.00015769577538279949, "loss": 0.9287, "step": 6515 }, { "epoch": 0.37409989385202397, "grad_norm": 0.267578125, "learning_rate": 0.0001576139494871435, "loss": 0.9303, "step": 6520 }, { "epoch": 0.37438678027368966, "grad_norm": 0.2734375, "learning_rate": 0.00015753206580872256, "loss": 1.0229, "step": 6525 }, { "epoch": 0.3746736666953553, "grad_norm": 0.2734375, "learning_rate": 0.0001574501244296603, "loss": 0.9759, "step": 6530 }, { "epoch": 0.374960553117021, "grad_norm": 0.259765625, "learning_rate": 0.0001573681254321382, "loss": 0.955, "step": 6535 }, { "epoch": 0.3752474395386866, "grad_norm": 0.263671875, "learning_rate": 0.00015728606889839553, "loss": 0.9068, "step": 6540 }, { "epoch": 0.3755343259603523, "grad_norm": 0.287109375, "learning_rate": 0.00015720395491072918, "loss": 0.999, "step": 6545 }, { "epoch": 0.37582121238201793, "grad_norm": 0.25390625, "learning_rate": 0.00015712178355149385, "loss": 0.9308, "step": 6550 }, { "epoch": 0.3761080988036836, "grad_norm": 0.259765625, "learning_rate": 0.00015703955490310162, "loss": 0.9073, "step": 6555 }, { "epoch": 0.3763949852253493, "grad_norm": 0.275390625, "learning_rate": 0.00015695726904802208, "loss": 0.9493, "step": 6560 }, { "epoch": 0.37668187164701494, "grad_norm": 0.279296875, "learning_rate": 0.0001568749260687822, "loss": 1.0237, "step": 6565 }, { "epoch": 0.37696875806868063, "grad_norm": 0.296875, "learning_rate": 0.00015679252604796623, "loss": 0.9191, "step": 6570 }, { "epoch": 0.37725564449034626, "grad_norm": 0.2578125, "learning_rate": 0.0001567100690682156, "loss": 0.9601, "step": 6575 }, { "epoch": 0.37754253091201195, "grad_norm": 0.267578125, "learning_rate": 0.00015662755521222895, "loss": 0.9521, "step": 6580 }, { "epoch": 0.3778294173336776, "grad_norm": 0.263671875, "learning_rate": 0.00015654498456276188, "loss": 0.9252, "step": 6585 }, { "epoch": 0.3781163037553433, "grad_norm": 0.283203125, "learning_rate": 0.000156462357202627, "loss": 1.0132, "step": 6590 }, { "epoch": 0.3784031901770089, "grad_norm": 0.271484375, "learning_rate": 0.0001563796732146938, "loss": 0.9454, "step": 6595 }, { "epoch": 0.3786900765986746, "grad_norm": 0.27734375, "learning_rate": 0.00015629693268188842, "loss": 1.0079, "step": 6600 }, { "epoch": 0.37897696302034023, "grad_norm": 0.265625, "learning_rate": 0.000156214135687194, "loss": 1.0616, "step": 6605 }, { "epoch": 0.3792638494420059, "grad_norm": 0.28125, "learning_rate": 0.00015613128231365002, "loss": 0.9529, "step": 6610 }, { "epoch": 0.37955073586367155, "grad_norm": 0.25390625, "learning_rate": 0.00015604837264435268, "loss": 0.9917, "step": 6615 }, { "epoch": 0.37983762228533724, "grad_norm": 0.267578125, "learning_rate": 0.00015596540676245454, "loss": 1.0218, "step": 6620 }, { "epoch": 0.3801245087070029, "grad_norm": 0.265625, "learning_rate": 0.00015588238475116464, "loss": 0.9605, "step": 6625 }, { "epoch": 0.38041139512866856, "grad_norm": 0.263671875, "learning_rate": 0.0001557993066937482, "loss": 0.9693, "step": 6630 }, { "epoch": 0.38069828155033425, "grad_norm": 0.2578125, "learning_rate": 0.0001557161726735268, "loss": 0.9227, "step": 6635 }, { "epoch": 0.3809851679719999, "grad_norm": 0.26953125, "learning_rate": 0.0001556329827738779, "loss": 0.9259, "step": 6640 }, { "epoch": 0.38127205439366557, "grad_norm": 0.26953125, "learning_rate": 0.00015554973707823525, "loss": 0.9341, "step": 6645 }, { "epoch": 0.3815589408153312, "grad_norm": 0.267578125, "learning_rate": 0.00015546643567008848, "loss": 0.9537, "step": 6650 }, { "epoch": 0.3818458272369969, "grad_norm": 0.26171875, "learning_rate": 0.00015538307863298303, "loss": 0.9681, "step": 6655 }, { "epoch": 0.3821327136586625, "grad_norm": 0.2890625, "learning_rate": 0.00015529966605052023, "loss": 0.9962, "step": 6660 }, { "epoch": 0.3824196000803282, "grad_norm": 0.2734375, "learning_rate": 0.00015521619800635704, "loss": 1.0101, "step": 6665 }, { "epoch": 0.38270648650199385, "grad_norm": 0.27734375, "learning_rate": 0.00015513267458420606, "loss": 0.9589, "step": 6670 }, { "epoch": 0.38299337292365954, "grad_norm": 0.279296875, "learning_rate": 0.0001550490958678355, "loss": 1.001, "step": 6675 }, { "epoch": 0.38328025934532517, "grad_norm": 0.291015625, "learning_rate": 0.00015496546194106888, "loss": 0.9067, "step": 6680 }, { "epoch": 0.38356714576699086, "grad_norm": 0.265625, "learning_rate": 0.00015488177288778532, "loss": 0.94, "step": 6685 }, { "epoch": 0.3838540321886565, "grad_norm": 0.30078125, "learning_rate": 0.00015479802879191898, "loss": 0.9649, "step": 6690 }, { "epoch": 0.3841409186103222, "grad_norm": 0.26953125, "learning_rate": 0.00015471422973745936, "loss": 0.9982, "step": 6695 }, { "epoch": 0.3844278050319878, "grad_norm": 0.28125, "learning_rate": 0.00015463037580845107, "loss": 0.9649, "step": 6700 }, { "epoch": 0.3847146914536535, "grad_norm": 0.26953125, "learning_rate": 0.00015454646708899374, "loss": 0.9905, "step": 6705 }, { "epoch": 0.38500157787531913, "grad_norm": 0.255859375, "learning_rate": 0.00015446250366324196, "loss": 0.9492, "step": 6710 }, { "epoch": 0.3852884642969848, "grad_norm": 0.263671875, "learning_rate": 0.00015437848561540517, "loss": 0.9667, "step": 6715 }, { "epoch": 0.3855753507186505, "grad_norm": 0.279296875, "learning_rate": 0.00015429441302974755, "loss": 0.9155, "step": 6720 }, { "epoch": 0.38586223714031614, "grad_norm": 0.275390625, "learning_rate": 0.00015421028599058812, "loss": 0.8806, "step": 6725 }, { "epoch": 0.38614912356198183, "grad_norm": 0.28125, "learning_rate": 0.00015412610458230037, "loss": 0.9388, "step": 6730 }, { "epoch": 0.38643600998364747, "grad_norm": 0.26171875, "learning_rate": 0.00015404186888931233, "loss": 0.9418, "step": 6735 }, { "epoch": 0.38672289640531315, "grad_norm": 0.2734375, "learning_rate": 0.00015395757899610662, "loss": 1.0182, "step": 6740 }, { "epoch": 0.3870097828269788, "grad_norm": 0.24609375, "learning_rate": 0.00015387323498722, "loss": 0.981, "step": 6745 }, { "epoch": 0.3872966692486445, "grad_norm": 0.275390625, "learning_rate": 0.00015378883694724369, "loss": 0.9555, "step": 6750 }, { "epoch": 0.3875835556703101, "grad_norm": 0.291015625, "learning_rate": 0.00015370438496082302, "loss": 0.9822, "step": 6755 }, { "epoch": 0.3878704420919758, "grad_norm": 0.26953125, "learning_rate": 0.0001536198791126574, "loss": 0.9368, "step": 6760 }, { "epoch": 0.38815732851364143, "grad_norm": 0.267578125, "learning_rate": 0.00015353531948750026, "loss": 0.8964, "step": 6765 }, { "epoch": 0.3884442149353071, "grad_norm": 0.26953125, "learning_rate": 0.0001534507061701591, "loss": 0.8962, "step": 6770 }, { "epoch": 0.38873110135697275, "grad_norm": 0.275390625, "learning_rate": 0.00015336603924549503, "loss": 1.0406, "step": 6775 }, { "epoch": 0.38901798777863844, "grad_norm": 0.263671875, "learning_rate": 0.0001532813187984232, "loss": 0.9456, "step": 6780 }, { "epoch": 0.3893048742003041, "grad_norm": 0.267578125, "learning_rate": 0.0001531965449139122, "loss": 0.923, "step": 6785 }, { "epoch": 0.38959176062196976, "grad_norm": 0.294921875, "learning_rate": 0.00015311171767698435, "loss": 0.9228, "step": 6790 }, { "epoch": 0.38987864704363545, "grad_norm": 0.271484375, "learning_rate": 0.0001530268371727154, "loss": 0.9183, "step": 6795 }, { "epoch": 0.3901655334653011, "grad_norm": 0.29296875, "learning_rate": 0.0001529419034862346, "loss": 0.9978, "step": 6800 }, { "epoch": 0.3904524198869668, "grad_norm": 0.259765625, "learning_rate": 0.00015285691670272451, "loss": 0.9385, "step": 6805 }, { "epoch": 0.3907393063086324, "grad_norm": 0.29296875, "learning_rate": 0.0001527718769074209, "loss": 0.9563, "step": 6810 }, { "epoch": 0.3910261927302981, "grad_norm": 0.2890625, "learning_rate": 0.00015268678418561276, "loss": 1.0063, "step": 6815 }, { "epoch": 0.3913130791519637, "grad_norm": 0.310546875, "learning_rate": 0.00015260163862264217, "loss": 0.9524, "step": 6820 }, { "epoch": 0.3915999655736294, "grad_norm": 0.275390625, "learning_rate": 0.00015251644030390415, "loss": 0.9097, "step": 6825 }, { "epoch": 0.39188685199529505, "grad_norm": 0.3046875, "learning_rate": 0.00015243118931484667, "loss": 1.0325, "step": 6830 }, { "epoch": 0.39217373841696074, "grad_norm": 0.25390625, "learning_rate": 0.0001523458857409705, "loss": 0.9408, "step": 6835 }, { "epoch": 0.39246062483862637, "grad_norm": 0.2734375, "learning_rate": 0.00015226052966782914, "loss": 0.9173, "step": 6840 }, { "epoch": 0.39274751126029206, "grad_norm": 0.267578125, "learning_rate": 0.0001521751211810288, "loss": 0.9434, "step": 6845 }, { "epoch": 0.3930343976819577, "grad_norm": 0.265625, "learning_rate": 0.00015208966036622825, "loss": 0.9733, "step": 6850 }, { "epoch": 0.3933212841036234, "grad_norm": 0.259765625, "learning_rate": 0.00015200414730913865, "loss": 1.0244, "step": 6855 }, { "epoch": 0.393608170525289, "grad_norm": 0.279296875, "learning_rate": 0.00015191858209552368, "loss": 1.0525, "step": 6860 }, { "epoch": 0.3938950569469547, "grad_norm": 0.275390625, "learning_rate": 0.00015183296481119924, "loss": 1.0148, "step": 6865 }, { "epoch": 0.39418194336862034, "grad_norm": 0.32421875, "learning_rate": 0.00015174729554203348, "loss": 0.9721, "step": 6870 }, { "epoch": 0.394468829790286, "grad_norm": 0.263671875, "learning_rate": 0.00015166157437394672, "loss": 0.9897, "step": 6875 }, { "epoch": 0.3947557162119517, "grad_norm": 0.2890625, "learning_rate": 0.00015157580139291124, "loss": 1.0325, "step": 6880 }, { "epoch": 0.39504260263361735, "grad_norm": 0.28125, "learning_rate": 0.00015148997668495143, "loss": 0.9371, "step": 6885 }, { "epoch": 0.39532948905528303, "grad_norm": 0.251953125, "learning_rate": 0.0001514041003361434, "loss": 0.9715, "step": 6890 }, { "epoch": 0.39561637547694867, "grad_norm": 0.26171875, "learning_rate": 0.00015131817243261512, "loss": 0.897, "step": 6895 }, { "epoch": 0.39590326189861436, "grad_norm": 0.263671875, "learning_rate": 0.00015123219306054634, "loss": 0.9151, "step": 6900 }, { "epoch": 0.39619014832028, "grad_norm": 0.275390625, "learning_rate": 0.00015114616230616835, "loss": 1.021, "step": 6905 }, { "epoch": 0.3964770347419457, "grad_norm": 0.267578125, "learning_rate": 0.00015106008025576393, "loss": 0.8911, "step": 6910 }, { "epoch": 0.3967639211636113, "grad_norm": 0.279296875, "learning_rate": 0.00015097394699566737, "loss": 0.9241, "step": 6915 }, { "epoch": 0.397050807585277, "grad_norm": 0.251953125, "learning_rate": 0.0001508877626122644, "loss": 0.924, "step": 6920 }, { "epoch": 0.39733769400694263, "grad_norm": 0.267578125, "learning_rate": 0.00015080152719199183, "loss": 1.0014, "step": 6925 }, { "epoch": 0.3976245804286083, "grad_norm": 0.26171875, "learning_rate": 0.00015071524082133778, "loss": 0.8877, "step": 6930 }, { "epoch": 0.39791146685027395, "grad_norm": 0.29296875, "learning_rate": 0.00015062890358684148, "loss": 1.0247, "step": 6935 }, { "epoch": 0.39819835327193964, "grad_norm": 0.26953125, "learning_rate": 0.0001505425155750931, "loss": 1.0053, "step": 6940 }, { "epoch": 0.3984852396936053, "grad_norm": 0.2734375, "learning_rate": 0.00015045607687273383, "loss": 0.9355, "step": 6945 }, { "epoch": 0.39877212611527096, "grad_norm": 0.255859375, "learning_rate": 0.00015036958756645564, "loss": 1.0198, "step": 6950 }, { "epoch": 0.39905901253693665, "grad_norm": 0.26953125, "learning_rate": 0.00015028304774300123, "loss": 0.894, "step": 6955 }, { "epoch": 0.3993458989586023, "grad_norm": 0.291015625, "learning_rate": 0.00015019645748916402, "loss": 1.0006, "step": 6960 }, { "epoch": 0.399632785380268, "grad_norm": 0.2734375, "learning_rate": 0.00015010981689178796, "loss": 0.9742, "step": 6965 }, { "epoch": 0.3999196718019336, "grad_norm": 0.3046875, "learning_rate": 0.00015002312603776754, "loss": 0.9005, "step": 6970 }, { "epoch": 0.4002065582235993, "grad_norm": 0.275390625, "learning_rate": 0.00014993638501404762, "loss": 0.957, "step": 6975 }, { "epoch": 0.40049344464526493, "grad_norm": 0.26171875, "learning_rate": 0.00014984959390762335, "loss": 1.0227, "step": 6980 }, { "epoch": 0.4007803310669306, "grad_norm": 0.26953125, "learning_rate": 0.00014976275280554016, "loss": 0.982, "step": 6985 }, { "epoch": 0.40106721748859625, "grad_norm": 0.267578125, "learning_rate": 0.00014967586179489366, "loss": 1.0078, "step": 6990 }, { "epoch": 0.40135410391026194, "grad_norm": 0.28125, "learning_rate": 0.0001495889209628294, "loss": 0.9488, "step": 6995 }, { "epoch": 0.40164099033192757, "grad_norm": 0.279296875, "learning_rate": 0.00014950193039654297, "loss": 0.9791, "step": 7000 }, { "epoch": 0.40192787675359326, "grad_norm": 0.287109375, "learning_rate": 0.00014941489018327988, "loss": 0.9355, "step": 7005 }, { "epoch": 0.4022147631752589, "grad_norm": 0.28515625, "learning_rate": 0.0001493278004103353, "loss": 0.9966, "step": 7010 }, { "epoch": 0.4025016495969246, "grad_norm": 0.259765625, "learning_rate": 0.00014924066116505427, "loss": 0.9108, "step": 7015 }, { "epoch": 0.4027885360185902, "grad_norm": 0.2578125, "learning_rate": 0.00014915347253483126, "loss": 1.0087, "step": 7020 }, { "epoch": 0.4030754224402559, "grad_norm": 0.296875, "learning_rate": 0.00014906623460711046, "loss": 0.9532, "step": 7025 }, { "epoch": 0.4033623088619216, "grad_norm": 0.251953125, "learning_rate": 0.00014897894746938536, "loss": 0.8847, "step": 7030 }, { "epoch": 0.4036491952835872, "grad_norm": 0.29296875, "learning_rate": 0.00014889161120919893, "loss": 0.9371, "step": 7035 }, { "epoch": 0.4039360817052529, "grad_norm": 0.26171875, "learning_rate": 0.00014880422591414323, "loss": 0.9383, "step": 7040 }, { "epoch": 0.40422296812691855, "grad_norm": 0.26171875, "learning_rate": 0.00014871679167185973, "loss": 0.9603, "step": 7045 }, { "epoch": 0.40450985454858424, "grad_norm": 0.267578125, "learning_rate": 0.00014862930857003877, "loss": 0.9217, "step": 7050 }, { "epoch": 0.40479674097024987, "grad_norm": 0.283203125, "learning_rate": 0.00014854177669641983, "loss": 0.9461, "step": 7055 }, { "epoch": 0.40508362739191556, "grad_norm": 0.26953125, "learning_rate": 0.0001484541961387912, "loss": 1.0052, "step": 7060 }, { "epoch": 0.4053705138135812, "grad_norm": 0.263671875, "learning_rate": 0.00014836656698499016, "loss": 0.9165, "step": 7065 }, { "epoch": 0.4056574002352469, "grad_norm": 0.283203125, "learning_rate": 0.00014827888932290257, "loss": 0.9311, "step": 7070 }, { "epoch": 0.4059442866569125, "grad_norm": 0.271484375, "learning_rate": 0.000148191163240463, "loss": 0.986, "step": 7075 }, { "epoch": 0.4062311730785782, "grad_norm": 0.267578125, "learning_rate": 0.00014810338882565455, "loss": 0.9295, "step": 7080 }, { "epoch": 0.40651805950024383, "grad_norm": 0.259765625, "learning_rate": 0.00014801556616650887, "loss": 0.9476, "step": 7085 }, { "epoch": 0.4068049459219095, "grad_norm": 0.267578125, "learning_rate": 0.00014792769535110597, "loss": 0.9008, "step": 7090 }, { "epoch": 0.40709183234357516, "grad_norm": 0.28125, "learning_rate": 0.00014783977646757403, "loss": 0.935, "step": 7095 }, { "epoch": 0.40737871876524084, "grad_norm": 0.25, "learning_rate": 0.00014775180960408966, "loss": 0.9481, "step": 7100 }, { "epoch": 0.4076656051869065, "grad_norm": 0.287109375, "learning_rate": 0.00014766379484887744, "loss": 1.0298, "step": 7105 }, { "epoch": 0.40795249160857217, "grad_norm": 0.263671875, "learning_rate": 0.00014757573229021002, "loss": 1.0119, "step": 7110 }, { "epoch": 0.40823937803023785, "grad_norm": 0.267578125, "learning_rate": 0.00014748762201640796, "loss": 0.9029, "step": 7115 }, { "epoch": 0.4085262644519035, "grad_norm": 0.265625, "learning_rate": 0.00014739946411583977, "loss": 0.9884, "step": 7120 }, { "epoch": 0.4088131508735692, "grad_norm": 0.2890625, "learning_rate": 0.00014731125867692158, "loss": 0.9622, "step": 7125 }, { "epoch": 0.4091000372952348, "grad_norm": 0.294921875, "learning_rate": 0.00014722300578811734, "loss": 0.9128, "step": 7130 }, { "epoch": 0.4093869237169005, "grad_norm": 0.267578125, "learning_rate": 0.00014713470553793853, "loss": 0.9838, "step": 7135 }, { "epoch": 0.40967381013856613, "grad_norm": 0.291015625, "learning_rate": 0.0001470463580149441, "loss": 1.0012, "step": 7140 }, { "epoch": 0.4099606965602318, "grad_norm": 0.25, "learning_rate": 0.00014695796330774048, "loss": 0.9605, "step": 7145 }, { "epoch": 0.41024758298189745, "grad_norm": 0.267578125, "learning_rate": 0.00014686952150498134, "loss": 0.9623, "step": 7150 }, { "epoch": 0.41053446940356314, "grad_norm": 0.2890625, "learning_rate": 0.00014678103269536762, "loss": 0.9733, "step": 7155 }, { "epoch": 0.4108213558252288, "grad_norm": 0.251953125, "learning_rate": 0.00014669249696764748, "loss": 0.9318, "step": 7160 }, { "epoch": 0.41110824224689446, "grad_norm": 0.275390625, "learning_rate": 0.00014660391441061603, "loss": 0.9397, "step": 7165 }, { "epoch": 0.4113951286685601, "grad_norm": 0.275390625, "learning_rate": 0.00014651528511311538, "loss": 0.9972, "step": 7170 }, { "epoch": 0.4116820150902258, "grad_norm": 0.271484375, "learning_rate": 0.0001464266091640345, "loss": 0.9362, "step": 7175 }, { "epoch": 0.4119689015118914, "grad_norm": 0.26953125, "learning_rate": 0.0001463378866523092, "loss": 0.9756, "step": 7180 }, { "epoch": 0.4122557879335571, "grad_norm": 0.28515625, "learning_rate": 0.00014624911766692196, "loss": 0.9809, "step": 7185 }, { "epoch": 0.4125426743552228, "grad_norm": 0.251953125, "learning_rate": 0.00014616030229690186, "loss": 0.9358, "step": 7190 }, { "epoch": 0.4128295607768884, "grad_norm": 0.298828125, "learning_rate": 0.0001460714406313245, "loss": 0.9231, "step": 7195 }, { "epoch": 0.4131164471985541, "grad_norm": 0.26171875, "learning_rate": 0.00014598253275931197, "loss": 0.944, "step": 7200 }, { "epoch": 0.41340333362021975, "grad_norm": 0.291015625, "learning_rate": 0.00014589357877003257, "loss": 0.9755, "step": 7205 }, { "epoch": 0.41369022004188544, "grad_norm": 0.25390625, "learning_rate": 0.00014580457875270098, "loss": 0.9444, "step": 7210 }, { "epoch": 0.41397710646355107, "grad_norm": 0.28515625, "learning_rate": 0.00014571553279657803, "loss": 0.927, "step": 7215 }, { "epoch": 0.41426399288521676, "grad_norm": 0.255859375, "learning_rate": 0.00014562644099097048, "loss": 1.0201, "step": 7220 }, { "epoch": 0.4145508793068824, "grad_norm": 0.27734375, "learning_rate": 0.00014553730342523134, "loss": 0.9543, "step": 7225 }, { "epoch": 0.4148377657285481, "grad_norm": 0.255859375, "learning_rate": 0.0001454481201887592, "loss": 0.9338, "step": 7230 }, { "epoch": 0.4151246521502137, "grad_norm": 0.26953125, "learning_rate": 0.00014535889137099877, "loss": 0.9955, "step": 7235 }, { "epoch": 0.4154115385718794, "grad_norm": 0.255859375, "learning_rate": 0.00014526961706144023, "loss": 0.9394, "step": 7240 }, { "epoch": 0.41569842499354503, "grad_norm": 0.25390625, "learning_rate": 0.00014518029734961947, "loss": 0.9759, "step": 7245 }, { "epoch": 0.4159853114152107, "grad_norm": 0.26953125, "learning_rate": 0.00014509093232511791, "loss": 0.9383, "step": 7250 }, { "epoch": 0.41627219783687636, "grad_norm": 0.2578125, "learning_rate": 0.00014500152207756246, "loss": 0.9081, "step": 7255 }, { "epoch": 0.41655908425854205, "grad_norm": 0.287109375, "learning_rate": 0.00014491206669662533, "loss": 1.0072, "step": 7260 }, { "epoch": 0.4168459706802077, "grad_norm": 0.2490234375, "learning_rate": 0.00014482256627202405, "loss": 0.9506, "step": 7265 }, { "epoch": 0.41713285710187337, "grad_norm": 0.2734375, "learning_rate": 0.00014473302089352123, "loss": 0.8998, "step": 7270 }, { "epoch": 0.41741974352353906, "grad_norm": 0.255859375, "learning_rate": 0.00014464343065092466, "loss": 0.9917, "step": 7275 }, { "epoch": 0.4177066299452047, "grad_norm": 0.271484375, "learning_rate": 0.00014455379563408713, "loss": 0.9395, "step": 7280 }, { "epoch": 0.4179935163668704, "grad_norm": 0.267578125, "learning_rate": 0.00014446411593290625, "loss": 0.8846, "step": 7285 }, { "epoch": 0.418280402788536, "grad_norm": 0.279296875, "learning_rate": 0.0001443743916373245, "loss": 0.9978, "step": 7290 }, { "epoch": 0.4185672892102017, "grad_norm": 0.259765625, "learning_rate": 0.00014428462283732908, "loss": 0.956, "step": 7295 }, { "epoch": 0.41885417563186733, "grad_norm": 0.265625, "learning_rate": 0.0001441948096229518, "loss": 0.914, "step": 7300 }, { "epoch": 0.419141062053533, "grad_norm": 0.25390625, "learning_rate": 0.00014410495208426908, "loss": 0.9081, "step": 7305 }, { "epoch": 0.41942794847519865, "grad_norm": 0.2734375, "learning_rate": 0.00014401505031140171, "loss": 0.949, "step": 7310 }, { "epoch": 0.41971483489686434, "grad_norm": 0.2578125, "learning_rate": 0.00014392510439451494, "loss": 0.9664, "step": 7315 }, { "epoch": 0.42000172131853, "grad_norm": 0.26171875, "learning_rate": 0.00014383511442381822, "loss": 0.9836, "step": 7320 }, { "epoch": 0.42028860774019566, "grad_norm": 0.283203125, "learning_rate": 0.00014374508048956515, "loss": 0.982, "step": 7325 }, { "epoch": 0.4205754941618613, "grad_norm": 0.279296875, "learning_rate": 0.00014365500268205352, "loss": 0.9764, "step": 7330 }, { "epoch": 0.420862380583527, "grad_norm": 0.28515625, "learning_rate": 0.0001435648810916251, "loss": 0.9683, "step": 7335 }, { "epoch": 0.4211492670051926, "grad_norm": 0.259765625, "learning_rate": 0.0001434747158086655, "loss": 0.9833, "step": 7340 }, { "epoch": 0.4214361534268583, "grad_norm": 0.26171875, "learning_rate": 0.00014338450692360418, "loss": 0.9659, "step": 7345 }, { "epoch": 0.421723039848524, "grad_norm": 0.2890625, "learning_rate": 0.0001432942545269144, "loss": 0.9956, "step": 7350 }, { "epoch": 0.42200992627018963, "grad_norm": 0.267578125, "learning_rate": 0.000143203958709113, "loss": 0.9204, "step": 7355 }, { "epoch": 0.4222968126918553, "grad_norm": 0.267578125, "learning_rate": 0.00014311361956076036, "loss": 0.98, "step": 7360 }, { "epoch": 0.42258369911352095, "grad_norm": 0.275390625, "learning_rate": 0.00014302323717246032, "loss": 0.9782, "step": 7365 }, { "epoch": 0.42287058553518664, "grad_norm": 0.275390625, "learning_rate": 0.0001429328116348601, "loss": 0.9533, "step": 7370 }, { "epoch": 0.42315747195685227, "grad_norm": 0.265625, "learning_rate": 0.00014284234303865026, "loss": 0.9648, "step": 7375 }, { "epoch": 0.42344435837851796, "grad_norm": 0.279296875, "learning_rate": 0.0001427518314745644, "loss": 0.9278, "step": 7380 }, { "epoch": 0.4237312448001836, "grad_norm": 0.2734375, "learning_rate": 0.0001426612770333793, "loss": 0.9649, "step": 7385 }, { "epoch": 0.4240181312218493, "grad_norm": 0.28125, "learning_rate": 0.00014257067980591475, "loss": 0.9862, "step": 7390 }, { "epoch": 0.4243050176435149, "grad_norm": 0.275390625, "learning_rate": 0.00014248003988303346, "loss": 1.0041, "step": 7395 }, { "epoch": 0.4245919040651806, "grad_norm": 0.267578125, "learning_rate": 0.00014238935735564094, "loss": 1.0584, "step": 7400 }, { "epoch": 0.42487879048684624, "grad_norm": 0.275390625, "learning_rate": 0.00014229863231468538, "loss": 0.9154, "step": 7405 }, { "epoch": 0.4251656769085119, "grad_norm": 0.2890625, "learning_rate": 0.00014220786485115772, "loss": 0.9809, "step": 7410 }, { "epoch": 0.42545256333017756, "grad_norm": 0.287109375, "learning_rate": 0.0001421170550560913, "loss": 0.9214, "step": 7415 }, { "epoch": 0.42573944975184325, "grad_norm": 0.255859375, "learning_rate": 0.0001420262030205621, "loss": 1.0506, "step": 7420 }, { "epoch": 0.42602633617350893, "grad_norm": 0.302734375, "learning_rate": 0.0001419353088356883, "loss": 1.0369, "step": 7425 }, { "epoch": 0.42631322259517457, "grad_norm": 0.28125, "learning_rate": 0.00014184437259263038, "loss": 0.9779, "step": 7430 }, { "epoch": 0.42660010901684026, "grad_norm": 0.267578125, "learning_rate": 0.00014175339438259112, "loss": 0.9454, "step": 7435 }, { "epoch": 0.4268869954385059, "grad_norm": 0.2734375, "learning_rate": 0.00014166237429681525, "loss": 0.9195, "step": 7440 }, { "epoch": 0.4271738818601716, "grad_norm": 0.259765625, "learning_rate": 0.00014157131242658957, "loss": 0.9841, "step": 7445 }, { "epoch": 0.4274607682818372, "grad_norm": 0.263671875, "learning_rate": 0.0001414802088632428, "loss": 0.8795, "step": 7450 }, { "epoch": 0.4277476547035029, "grad_norm": 0.27734375, "learning_rate": 0.00014138906369814538, "loss": 0.8885, "step": 7455 }, { "epoch": 0.42803454112516853, "grad_norm": 0.2734375, "learning_rate": 0.0001412978770227096, "loss": 0.9524, "step": 7460 }, { "epoch": 0.4283214275468342, "grad_norm": 0.267578125, "learning_rate": 0.00014120664892838933, "loss": 0.9775, "step": 7465 }, { "epoch": 0.42860831396849985, "grad_norm": 0.296875, "learning_rate": 0.0001411153795066799, "loss": 0.9537, "step": 7470 }, { "epoch": 0.42889520039016554, "grad_norm": 0.287109375, "learning_rate": 0.00014102406884911826, "loss": 0.9987, "step": 7475 }, { "epoch": 0.4291820868118312, "grad_norm": 0.279296875, "learning_rate": 0.00014093271704728252, "loss": 0.98, "step": 7480 }, { "epoch": 0.42946897323349686, "grad_norm": 0.271484375, "learning_rate": 0.00014084132419279224, "loss": 0.9802, "step": 7485 }, { "epoch": 0.4297558596551625, "grad_norm": 0.279296875, "learning_rate": 0.00014074989037730798, "loss": 1.0003, "step": 7490 }, { "epoch": 0.4300427460768282, "grad_norm": 0.2578125, "learning_rate": 0.00014065841569253155, "loss": 0.9735, "step": 7495 }, { "epoch": 0.4303296324984938, "grad_norm": 0.3203125, "learning_rate": 0.00014056690023020566, "loss": 0.9811, "step": 7500 }, { "epoch": 0.4306165189201595, "grad_norm": 0.259765625, "learning_rate": 0.00014047534408211383, "loss": 0.9719, "step": 7505 }, { "epoch": 0.4309034053418252, "grad_norm": 0.271484375, "learning_rate": 0.00014038374734008058, "loss": 0.9586, "step": 7510 }, { "epoch": 0.43119029176349083, "grad_norm": 0.25, "learning_rate": 0.00014029211009597097, "loss": 0.9227, "step": 7515 }, { "epoch": 0.4314771781851565, "grad_norm": 0.287109375, "learning_rate": 0.00014020043244169082, "loss": 0.9959, "step": 7520 }, { "epoch": 0.43176406460682215, "grad_norm": 0.267578125, "learning_rate": 0.00014010871446918635, "loss": 0.8981, "step": 7525 }, { "epoch": 0.43205095102848784, "grad_norm": 0.259765625, "learning_rate": 0.00014001695627044428, "loss": 0.9033, "step": 7530 }, { "epoch": 0.4323378374501535, "grad_norm": 0.283203125, "learning_rate": 0.00013992515793749172, "loss": 0.9581, "step": 7535 }, { "epoch": 0.43262472387181916, "grad_norm": 0.3359375, "learning_rate": 0.00013983331956239596, "loss": 0.9514, "step": 7540 }, { "epoch": 0.4329116102934848, "grad_norm": 0.267578125, "learning_rate": 0.00013974144123726442, "loss": 0.9712, "step": 7545 }, { "epoch": 0.4331984967151505, "grad_norm": 0.255859375, "learning_rate": 0.00013964952305424474, "loss": 0.9752, "step": 7550 }, { "epoch": 0.4334853831368161, "grad_norm": 0.275390625, "learning_rate": 0.00013955756510552437, "loss": 1.009, "step": 7555 }, { "epoch": 0.4337722695584818, "grad_norm": 0.279296875, "learning_rate": 0.0001394655674833307, "loss": 1.0456, "step": 7560 }, { "epoch": 0.43405915598014744, "grad_norm": 0.27734375, "learning_rate": 0.00013937353027993092, "loss": 0.9044, "step": 7565 }, { "epoch": 0.4343460424018131, "grad_norm": 0.26171875, "learning_rate": 0.00013928145358763194, "loss": 0.9205, "step": 7570 }, { "epoch": 0.43463292882347876, "grad_norm": 0.26171875, "learning_rate": 0.00013918933749878024, "loss": 0.9606, "step": 7575 }, { "epoch": 0.43491981524514445, "grad_norm": 0.2890625, "learning_rate": 0.00013909718210576179, "loss": 0.9864, "step": 7580 }, { "epoch": 0.43520670166681014, "grad_norm": 0.2578125, "learning_rate": 0.000139004987501002, "loss": 0.9468, "step": 7585 }, { "epoch": 0.43549358808847577, "grad_norm": 0.240234375, "learning_rate": 0.0001389127537769657, "loss": 0.9274, "step": 7590 }, { "epoch": 0.43578047451014146, "grad_norm": 0.275390625, "learning_rate": 0.00013882048102615676, "loss": 0.9332, "step": 7595 }, { "epoch": 0.4360673609318071, "grad_norm": 0.279296875, "learning_rate": 0.00013872816934111838, "loss": 0.9163, "step": 7600 }, { "epoch": 0.4363542473534728, "grad_norm": 0.279296875, "learning_rate": 0.00013863581881443275, "loss": 0.9248, "step": 7605 }, { "epoch": 0.4366411337751384, "grad_norm": 0.2734375, "learning_rate": 0.0001385434295387209, "loss": 0.9486, "step": 7610 }, { "epoch": 0.4369280201968041, "grad_norm": 0.28125, "learning_rate": 0.00013845100160664287, "loss": 0.9743, "step": 7615 }, { "epoch": 0.43721490661846973, "grad_norm": 0.26953125, "learning_rate": 0.0001383585351108974, "loss": 0.9523, "step": 7620 }, { "epoch": 0.4375017930401354, "grad_norm": 0.265625, "learning_rate": 0.00013826603014422192, "loss": 1.0153, "step": 7625 }, { "epoch": 0.43778867946180106, "grad_norm": 0.26953125, "learning_rate": 0.0001381734867993925, "loss": 1.0418, "step": 7630 }, { "epoch": 0.43807556588346674, "grad_norm": 0.28125, "learning_rate": 0.0001380809051692236, "loss": 0.9956, "step": 7635 }, { "epoch": 0.4383624523051324, "grad_norm": 0.263671875, "learning_rate": 0.00013798828534656812, "loss": 1.0244, "step": 7640 }, { "epoch": 0.43864933872679807, "grad_norm": 0.275390625, "learning_rate": 0.00013789562742431727, "loss": 0.9126, "step": 7645 }, { "epoch": 0.4389362251484637, "grad_norm": 0.263671875, "learning_rate": 0.00013780293149540053, "loss": 0.9695, "step": 7650 }, { "epoch": 0.4392231115701294, "grad_norm": 0.263671875, "learning_rate": 0.00013771019765278537, "loss": 0.9484, "step": 7655 }, { "epoch": 0.439509997991795, "grad_norm": 0.302734375, "learning_rate": 0.00013761742598947734, "loss": 0.9468, "step": 7660 }, { "epoch": 0.4397968844134607, "grad_norm": 0.298828125, "learning_rate": 0.00013752461659852, "loss": 0.9873, "step": 7665 }, { "epoch": 0.4400837708351264, "grad_norm": 0.2578125, "learning_rate": 0.00013743176957299464, "loss": 0.9757, "step": 7670 }, { "epoch": 0.44037065725679203, "grad_norm": 0.24609375, "learning_rate": 0.00013733888500602038, "loss": 0.969, "step": 7675 }, { "epoch": 0.4406575436784577, "grad_norm": 0.267578125, "learning_rate": 0.00013724596299075388, "loss": 0.9465, "step": 7680 }, { "epoch": 0.44094443010012335, "grad_norm": 0.265625, "learning_rate": 0.0001371530036203895, "loss": 0.9525, "step": 7685 }, { "epoch": 0.44123131652178904, "grad_norm": 0.2734375, "learning_rate": 0.00013706000698815893, "loss": 0.9346, "step": 7690 }, { "epoch": 0.4415182029434547, "grad_norm": 0.2734375, "learning_rate": 0.00013696697318733134, "loss": 0.9275, "step": 7695 }, { "epoch": 0.44180508936512036, "grad_norm": 0.287109375, "learning_rate": 0.00013687390231121314, "loss": 0.922, "step": 7700 }, { "epoch": 0.442091975786786, "grad_norm": 0.255859375, "learning_rate": 0.00013678079445314783, "loss": 0.9008, "step": 7705 }, { "epoch": 0.4423788622084517, "grad_norm": 0.267578125, "learning_rate": 0.00013668764970651615, "loss": 0.9691, "step": 7710 }, { "epoch": 0.4426657486301173, "grad_norm": 0.259765625, "learning_rate": 0.0001365944681647358, "loss": 0.9503, "step": 7715 }, { "epoch": 0.442952635051783, "grad_norm": 0.29296875, "learning_rate": 0.00013650124992126128, "loss": 1.003, "step": 7720 }, { "epoch": 0.44323952147344864, "grad_norm": 0.310546875, "learning_rate": 0.00013640799506958403, "loss": 0.986, "step": 7725 }, { "epoch": 0.4435264078951143, "grad_norm": 0.265625, "learning_rate": 0.00013631470370323214, "loss": 0.9394, "step": 7730 }, { "epoch": 0.44381329431677996, "grad_norm": 0.2734375, "learning_rate": 0.0001362213759157703, "loss": 0.984, "step": 7735 }, { "epoch": 0.44410018073844565, "grad_norm": 0.275390625, "learning_rate": 0.0001361280118007998, "loss": 1.026, "step": 7740 }, { "epoch": 0.44438706716011134, "grad_norm": 0.2890625, "learning_rate": 0.0001360346114519583, "loss": 0.953, "step": 7745 }, { "epoch": 0.44467395358177697, "grad_norm": 0.30078125, "learning_rate": 0.0001359411749629198, "loss": 0.9711, "step": 7750 }, { "epoch": 0.44496084000344266, "grad_norm": 0.298828125, "learning_rate": 0.0001358477024273946, "loss": 0.9772, "step": 7755 }, { "epoch": 0.4452477264251083, "grad_norm": 0.25390625, "learning_rate": 0.0001357541939391291, "loss": 0.9291, "step": 7760 }, { "epoch": 0.445534612846774, "grad_norm": 0.2890625, "learning_rate": 0.00013566064959190583, "loss": 1.0049, "step": 7765 }, { "epoch": 0.4458214992684396, "grad_norm": 0.28515625, "learning_rate": 0.0001355670694795432, "loss": 0.992, "step": 7770 }, { "epoch": 0.4461083856901053, "grad_norm": 0.291015625, "learning_rate": 0.0001354734536958955, "loss": 0.964, "step": 7775 }, { "epoch": 0.44639527211177094, "grad_norm": 0.2734375, "learning_rate": 0.0001353798023348528, "loss": 0.9363, "step": 7780 }, { "epoch": 0.4466821585334366, "grad_norm": 0.265625, "learning_rate": 0.00013528611549034096, "loss": 0.9233, "step": 7785 }, { "epoch": 0.44696904495510226, "grad_norm": 0.265625, "learning_rate": 0.00013519239325632124, "loss": 0.9871, "step": 7790 }, { "epoch": 0.44725593137676795, "grad_norm": 0.28515625, "learning_rate": 0.00013509863572679057, "loss": 0.9317, "step": 7795 }, { "epoch": 0.4475428177984336, "grad_norm": 0.2890625, "learning_rate": 0.00013500484299578116, "loss": 0.9727, "step": 7800 }, { "epoch": 0.44782970422009927, "grad_norm": 0.26171875, "learning_rate": 0.00013491101515736057, "loss": 0.923, "step": 7805 }, { "epoch": 0.4481165906417649, "grad_norm": 0.275390625, "learning_rate": 0.00013481715230563153, "loss": 1.0512, "step": 7810 }, { "epoch": 0.4484034770634306, "grad_norm": 0.275390625, "learning_rate": 0.00013472325453473197, "loss": 0.9272, "step": 7815 }, { "epoch": 0.4486903634850963, "grad_norm": 0.29296875, "learning_rate": 0.00013462932193883482, "loss": 0.9548, "step": 7820 }, { "epoch": 0.4489772499067619, "grad_norm": 0.251953125, "learning_rate": 0.0001345353546121478, "loss": 0.897, "step": 7825 }, { "epoch": 0.4492641363284276, "grad_norm": 0.267578125, "learning_rate": 0.00013444135264891371, "loss": 0.9414, "step": 7830 }, { "epoch": 0.44955102275009323, "grad_norm": 0.275390625, "learning_rate": 0.00013434731614340984, "loss": 0.9026, "step": 7835 }, { "epoch": 0.4498379091717589, "grad_norm": 0.26953125, "learning_rate": 0.00013425324518994826, "loss": 0.9086, "step": 7840 }, { "epoch": 0.45012479559342455, "grad_norm": 0.283203125, "learning_rate": 0.0001341591398828756, "loss": 1.0101, "step": 7845 }, { "epoch": 0.45041168201509024, "grad_norm": 0.26171875, "learning_rate": 0.00013406500031657283, "loss": 1.0249, "step": 7850 }, { "epoch": 0.4506985684367559, "grad_norm": 0.28125, "learning_rate": 0.00013397082658545543, "loss": 0.9401, "step": 7855 }, { "epoch": 0.45098545485842156, "grad_norm": 0.26171875, "learning_rate": 0.00013387661878397307, "loss": 0.9076, "step": 7860 }, { "epoch": 0.4512723412800872, "grad_norm": 0.26953125, "learning_rate": 0.00013378237700660957, "loss": 0.9293, "step": 7865 }, { "epoch": 0.4515592277017529, "grad_norm": 0.26171875, "learning_rate": 0.00013368810134788278, "loss": 0.9777, "step": 7870 }, { "epoch": 0.4518461141234185, "grad_norm": 0.248046875, "learning_rate": 0.00013359379190234472, "loss": 0.8768, "step": 7875 }, { "epoch": 0.4521330005450842, "grad_norm": 0.248046875, "learning_rate": 0.00013349944876458108, "loss": 0.9264, "step": 7880 }, { "epoch": 0.45241988696674984, "grad_norm": 0.28515625, "learning_rate": 0.00013340507202921152, "loss": 0.917, "step": 7885 }, { "epoch": 0.45270677338841553, "grad_norm": 0.2490234375, "learning_rate": 0.0001333106617908892, "loss": 0.9334, "step": 7890 }, { "epoch": 0.45299365981008116, "grad_norm": 0.2578125, "learning_rate": 0.00013321621814430106, "loss": 1.002, "step": 7895 }, { "epoch": 0.45328054623174685, "grad_norm": 0.275390625, "learning_rate": 0.0001331217411841675, "loss": 0.9649, "step": 7900 }, { "epoch": 0.45356743265341254, "grad_norm": 0.3125, "learning_rate": 0.00013302723100524222, "loss": 0.9087, "step": 7905 }, { "epoch": 0.45385431907507817, "grad_norm": 0.26953125, "learning_rate": 0.0001329326877023124, "loss": 0.9809, "step": 7910 }, { "epoch": 0.45414120549674386, "grad_norm": 0.259765625, "learning_rate": 0.00013283811137019836, "loss": 0.9605, "step": 7915 }, { "epoch": 0.4544280919184095, "grad_norm": 0.267578125, "learning_rate": 0.00013274350210375357, "loss": 0.9423, "step": 7920 }, { "epoch": 0.4547149783400752, "grad_norm": 0.30859375, "learning_rate": 0.00013264885999786442, "loss": 0.9342, "step": 7925 }, { "epoch": 0.4550018647617408, "grad_norm": 0.2890625, "learning_rate": 0.0001325541851474504, "loss": 0.9755, "step": 7930 }, { "epoch": 0.4552887511834065, "grad_norm": 0.267578125, "learning_rate": 0.0001324594776474638, "loss": 0.9757, "step": 7935 }, { "epoch": 0.45557563760507214, "grad_norm": 0.279296875, "learning_rate": 0.0001323647375928895, "loss": 0.9759, "step": 7940 }, { "epoch": 0.4558625240267378, "grad_norm": 0.263671875, "learning_rate": 0.00013226996507874526, "loss": 1.0153, "step": 7945 }, { "epoch": 0.45614941044840346, "grad_norm": 0.271484375, "learning_rate": 0.00013217516020008128, "loss": 0.98, "step": 7950 }, { "epoch": 0.45643629687006915, "grad_norm": 0.2470703125, "learning_rate": 0.00013208032305198018, "loss": 1.0167, "step": 7955 }, { "epoch": 0.4567231832917348, "grad_norm": 0.265625, "learning_rate": 0.00013198545372955706, "loss": 0.988, "step": 7960 }, { "epoch": 0.45701006971340047, "grad_norm": 0.267578125, "learning_rate": 0.00013189055232795915, "loss": 0.9788, "step": 7965 }, { "epoch": 0.4572969561350661, "grad_norm": 0.275390625, "learning_rate": 0.00013179561894236598, "loss": 1.0491, "step": 7970 }, { "epoch": 0.4575838425567318, "grad_norm": 0.26171875, "learning_rate": 0.00013170065366798907, "loss": 0.9504, "step": 7975 }, { "epoch": 0.4578707289783975, "grad_norm": 0.26953125, "learning_rate": 0.00013160565660007195, "loss": 0.9434, "step": 7980 }, { "epoch": 0.4581576154000631, "grad_norm": 0.267578125, "learning_rate": 0.00013151062783389007, "loss": 0.8654, "step": 7985 }, { "epoch": 0.4584445018217288, "grad_norm": 0.3046875, "learning_rate": 0.00013141556746475058, "loss": 0.914, "step": 7990 }, { "epoch": 0.45873138824339443, "grad_norm": 0.267578125, "learning_rate": 0.00013132047558799242, "loss": 0.9539, "step": 7995 }, { "epoch": 0.4590182746650601, "grad_norm": 0.53125, "learning_rate": 0.00013122535229898613, "loss": 0.9955, "step": 8000 }, { "epoch": 0.45930516108672576, "grad_norm": 0.27734375, "learning_rate": 0.0001311301976931337, "loss": 0.887, "step": 8005 }, { "epoch": 0.45959204750839144, "grad_norm": 0.2734375, "learning_rate": 0.00013103501186586855, "loss": 0.9421, "step": 8010 }, { "epoch": 0.4598789339300571, "grad_norm": 0.25390625, "learning_rate": 0.00013093979491265542, "loss": 0.9331, "step": 8015 }, { "epoch": 0.46016582035172277, "grad_norm": 0.26171875, "learning_rate": 0.0001308445469289902, "loss": 1.0029, "step": 8020 }, { "epoch": 0.4604527067733884, "grad_norm": 0.26171875, "learning_rate": 0.00013074926801040005, "loss": 0.9576, "step": 8025 }, { "epoch": 0.4607395931950541, "grad_norm": 0.26171875, "learning_rate": 0.000130653958252443, "loss": 0.9988, "step": 8030 }, { "epoch": 0.4610264796167197, "grad_norm": 0.29296875, "learning_rate": 0.0001305586177507081, "loss": 0.9588, "step": 8035 }, { "epoch": 0.4613133660383854, "grad_norm": 0.267578125, "learning_rate": 0.00013046324660081525, "loss": 0.9585, "step": 8040 }, { "epoch": 0.46160025246005104, "grad_norm": 0.259765625, "learning_rate": 0.00013036784489841495, "loss": 0.9161, "step": 8045 }, { "epoch": 0.46188713888171673, "grad_norm": 0.2578125, "learning_rate": 0.00013027241273918855, "loss": 0.9633, "step": 8050 }, { "epoch": 0.4621740253033824, "grad_norm": 0.259765625, "learning_rate": 0.00013017695021884777, "loss": 0.8822, "step": 8055 }, { "epoch": 0.46246091172504805, "grad_norm": 0.283203125, "learning_rate": 0.00013008145743313487, "loss": 0.9426, "step": 8060 }, { "epoch": 0.46274779814671374, "grad_norm": 0.28125, "learning_rate": 0.00012998593447782246, "loss": 0.9446, "step": 8065 }, { "epoch": 0.4630346845683794, "grad_norm": 0.2734375, "learning_rate": 0.0001298903814487133, "loss": 0.9324, "step": 8070 }, { "epoch": 0.46332157099004506, "grad_norm": 0.271484375, "learning_rate": 0.0001297947984416405, "loss": 0.9068, "step": 8075 }, { "epoch": 0.4636084574117107, "grad_norm": 0.310546875, "learning_rate": 0.0001296991855524671, "loss": 0.9913, "step": 8080 }, { "epoch": 0.4638953438333764, "grad_norm": 0.27734375, "learning_rate": 0.0001296035428770861, "loss": 0.9549, "step": 8085 }, { "epoch": 0.464182230255042, "grad_norm": 0.26171875, "learning_rate": 0.00012950787051142052, "loss": 0.9452, "step": 8090 }, { "epoch": 0.4644691166767077, "grad_norm": 0.26171875, "learning_rate": 0.00012941216855142298, "loss": 0.9959, "step": 8095 }, { "epoch": 0.46475600309837334, "grad_norm": 0.26171875, "learning_rate": 0.00012931643709307588, "loss": 0.9848, "step": 8100 }, { "epoch": 0.465042889520039, "grad_norm": 0.26171875, "learning_rate": 0.00012922067623239117, "loss": 0.9349, "step": 8105 }, { "epoch": 0.46532977594170466, "grad_norm": 0.27734375, "learning_rate": 0.00012912488606541035, "loss": 0.9301, "step": 8110 }, { "epoch": 0.46561666236337035, "grad_norm": 0.291015625, "learning_rate": 0.00012902906668820418, "loss": 0.9538, "step": 8115 }, { "epoch": 0.465903548785036, "grad_norm": 0.23828125, "learning_rate": 0.00012893321819687286, "loss": 0.9802, "step": 8120 }, { "epoch": 0.46619043520670167, "grad_norm": 0.251953125, "learning_rate": 0.0001288373406875457, "loss": 0.8906, "step": 8125 }, { "epoch": 0.4664773216283673, "grad_norm": 0.275390625, "learning_rate": 0.00012874143425638116, "loss": 0.9637, "step": 8130 }, { "epoch": 0.466764208050033, "grad_norm": 0.26953125, "learning_rate": 0.0001286454989995667, "loss": 0.9511, "step": 8135 }, { "epoch": 0.4670510944716987, "grad_norm": 0.255859375, "learning_rate": 0.00012854953501331863, "loss": 1.0333, "step": 8140 }, { "epoch": 0.4673379808933643, "grad_norm": 0.251953125, "learning_rate": 0.0001284535423938822, "loss": 0.9782, "step": 8145 }, { "epoch": 0.46762486731503, "grad_norm": 0.275390625, "learning_rate": 0.0001283575212375312, "loss": 0.981, "step": 8150 }, { "epoch": 0.46791175373669563, "grad_norm": 0.26953125, "learning_rate": 0.00012826147164056822, "loss": 0.962, "step": 8155 }, { "epoch": 0.4681986401583613, "grad_norm": 0.28515625, "learning_rate": 0.00012816539369932422, "loss": 0.9879, "step": 8160 }, { "epoch": 0.46848552658002696, "grad_norm": 0.259765625, "learning_rate": 0.00012806928751015874, "loss": 0.9731, "step": 8165 }, { "epoch": 0.46877241300169264, "grad_norm": 0.275390625, "learning_rate": 0.0001279731531694595, "loss": 0.9965, "step": 8170 }, { "epoch": 0.4690592994233583, "grad_norm": 0.275390625, "learning_rate": 0.00012787699077364262, "loss": 0.8996, "step": 8175 }, { "epoch": 0.46934618584502397, "grad_norm": 0.302734375, "learning_rate": 0.00012778080041915215, "loss": 1.0329, "step": 8180 }, { "epoch": 0.4696330722666896, "grad_norm": 0.271484375, "learning_rate": 0.00012768458220246035, "loss": 1.0113, "step": 8185 }, { "epoch": 0.4699199586883553, "grad_norm": 0.28125, "learning_rate": 0.00012758833622006737, "loss": 0.9769, "step": 8190 }, { "epoch": 0.4702068451100209, "grad_norm": 0.267578125, "learning_rate": 0.0001274920625685012, "loss": 0.9381, "step": 8195 }, { "epoch": 0.4704937315316866, "grad_norm": 0.2490234375, "learning_rate": 0.0001273957613443176, "loss": 0.991, "step": 8200 }, { "epoch": 0.47078061795335224, "grad_norm": 0.2890625, "learning_rate": 0.00012729943264409992, "loss": 0.9723, "step": 8205 }, { "epoch": 0.47106750437501793, "grad_norm": 0.267578125, "learning_rate": 0.00012720307656445914, "loss": 0.992, "step": 8210 }, { "epoch": 0.4713543907966836, "grad_norm": 0.263671875, "learning_rate": 0.0001271066932020337, "loss": 0.9346, "step": 8215 }, { "epoch": 0.47164127721834925, "grad_norm": 0.35546875, "learning_rate": 0.00012701028265348934, "loss": 0.9086, "step": 8220 }, { "epoch": 0.47192816364001494, "grad_norm": 0.265625, "learning_rate": 0.0001269138450155191, "loss": 0.9441, "step": 8225 }, { "epoch": 0.4722150500616806, "grad_norm": 0.283203125, "learning_rate": 0.00012681738038484324, "loss": 0.9497, "step": 8230 }, { "epoch": 0.47250193648334626, "grad_norm": 0.2734375, "learning_rate": 0.00012672088885820897, "loss": 0.9636, "step": 8235 }, { "epoch": 0.4727888229050119, "grad_norm": 0.271484375, "learning_rate": 0.00012662437053239062, "loss": 0.9612, "step": 8240 }, { "epoch": 0.4730757093266776, "grad_norm": 0.287109375, "learning_rate": 0.0001265278255041893, "loss": 0.9312, "step": 8245 }, { "epoch": 0.4733625957483432, "grad_norm": 0.28125, "learning_rate": 0.0001264312538704329, "loss": 0.9868, "step": 8250 }, { "epoch": 0.4736494821700089, "grad_norm": 0.275390625, "learning_rate": 0.00012633465572797604, "loss": 0.9561, "step": 8255 }, { "epoch": 0.47393636859167454, "grad_norm": 0.25390625, "learning_rate": 0.0001262380311736999, "loss": 0.9404, "step": 8260 }, { "epoch": 0.47422325501334023, "grad_norm": 0.291015625, "learning_rate": 0.0001261413803045122, "loss": 0.9134, "step": 8265 }, { "epoch": 0.47451014143500586, "grad_norm": 0.283203125, "learning_rate": 0.00012604470321734694, "loss": 1.0303, "step": 8270 }, { "epoch": 0.47479702785667155, "grad_norm": 0.27734375, "learning_rate": 0.00012594800000916448, "loss": 1.002, "step": 8275 }, { "epoch": 0.4750839142783372, "grad_norm": 0.26171875, "learning_rate": 0.00012585127077695144, "loss": 0.9813, "step": 8280 }, { "epoch": 0.47537080070000287, "grad_norm": 0.255859375, "learning_rate": 0.00012575451561772048, "loss": 0.9432, "step": 8285 }, { "epoch": 0.4756576871216685, "grad_norm": 0.25390625, "learning_rate": 0.00012565773462851017, "loss": 1.0046, "step": 8290 }, { "epoch": 0.4759445735433342, "grad_norm": 0.26953125, "learning_rate": 0.00012556092790638518, "loss": 0.9188, "step": 8295 }, { "epoch": 0.4762314599649999, "grad_norm": 0.265625, "learning_rate": 0.00012546409554843585, "loss": 0.9249, "step": 8300 }, { "epoch": 0.4765183463866655, "grad_norm": 0.26953125, "learning_rate": 0.00012536723765177826, "loss": 0.9318, "step": 8305 }, { "epoch": 0.4768052328083312, "grad_norm": 0.2890625, "learning_rate": 0.0001252703543135541, "loss": 1.0117, "step": 8310 }, { "epoch": 0.47709211922999684, "grad_norm": 0.265625, "learning_rate": 0.0001251734456309306, "loss": 0.8872, "step": 8315 }, { "epoch": 0.4773790056516625, "grad_norm": 0.29296875, "learning_rate": 0.00012507651170110042, "loss": 1.0721, "step": 8320 }, { "epoch": 0.47766589207332816, "grad_norm": 0.28515625, "learning_rate": 0.0001249795526212815, "loss": 0.9386, "step": 8325 }, { "epoch": 0.47795277849499385, "grad_norm": 0.27734375, "learning_rate": 0.000124882568488717, "loss": 0.9458, "step": 8330 }, { "epoch": 0.4782396649166595, "grad_norm": 0.259765625, "learning_rate": 0.00012478555940067528, "loss": 0.9893, "step": 8335 }, { "epoch": 0.47852655133832517, "grad_norm": 0.255859375, "learning_rate": 0.00012468852545444961, "loss": 0.9771, "step": 8340 }, { "epoch": 0.4788134377599908, "grad_norm": 0.267578125, "learning_rate": 0.0001245914667473583, "loss": 0.9857, "step": 8345 }, { "epoch": 0.4791003241816565, "grad_norm": 0.26953125, "learning_rate": 0.00012449438337674447, "loss": 0.9352, "step": 8350 }, { "epoch": 0.4793872106033221, "grad_norm": 0.265625, "learning_rate": 0.00012439727543997586, "loss": 0.9339, "step": 8355 }, { "epoch": 0.4796740970249878, "grad_norm": 0.263671875, "learning_rate": 0.00012430014303444503, "loss": 0.8911, "step": 8360 }, { "epoch": 0.47996098344665344, "grad_norm": 0.255859375, "learning_rate": 0.00012420298625756898, "loss": 0.9066, "step": 8365 }, { "epoch": 0.48024786986831913, "grad_norm": 0.26953125, "learning_rate": 0.0001241058052067892, "loss": 1.0275, "step": 8370 }, { "epoch": 0.4805347562899848, "grad_norm": 0.251953125, "learning_rate": 0.0001240085999795714, "loss": 0.9092, "step": 8375 }, { "epoch": 0.48082164271165045, "grad_norm": 0.28125, "learning_rate": 0.00012391137067340572, "loss": 0.9755, "step": 8380 }, { "epoch": 0.48110852913331614, "grad_norm": 0.263671875, "learning_rate": 0.0001238141173858063, "loss": 0.8828, "step": 8385 }, { "epoch": 0.4813954155549818, "grad_norm": 0.279296875, "learning_rate": 0.00012371684021431144, "loss": 0.9959, "step": 8390 }, { "epoch": 0.48168230197664746, "grad_norm": 0.302734375, "learning_rate": 0.00012361953925648327, "loss": 0.9897, "step": 8395 }, { "epoch": 0.4819691883983131, "grad_norm": 0.279296875, "learning_rate": 0.0001235222146099079, "loss": 0.9686, "step": 8400 }, { "epoch": 0.4822560748199788, "grad_norm": 0.259765625, "learning_rate": 0.00012342486637219517, "loss": 0.9758, "step": 8405 }, { "epoch": 0.4825429612416444, "grad_norm": 0.279296875, "learning_rate": 0.00012332749464097855, "loss": 0.9926, "step": 8410 }, { "epoch": 0.4828298476633101, "grad_norm": 0.265625, "learning_rate": 0.00012323009951391504, "loss": 0.9421, "step": 8415 }, { "epoch": 0.48311673408497574, "grad_norm": 0.291015625, "learning_rate": 0.00012313268108868518, "loss": 0.9655, "step": 8420 }, { "epoch": 0.48340362050664143, "grad_norm": 0.265625, "learning_rate": 0.00012303523946299285, "loss": 0.9819, "step": 8425 }, { "epoch": 0.48369050692830706, "grad_norm": 0.28125, "learning_rate": 0.00012293777473456518, "loss": 0.9449, "step": 8430 }, { "epoch": 0.48397739334997275, "grad_norm": 0.2578125, "learning_rate": 0.00012284028700115245, "loss": 0.9829, "step": 8435 }, { "epoch": 0.4842642797716384, "grad_norm": 0.267578125, "learning_rate": 0.0001227427763605281, "loss": 0.9807, "step": 8440 }, { "epoch": 0.4845511661933041, "grad_norm": 0.28125, "learning_rate": 0.0001226452429104884, "loss": 0.9877, "step": 8445 }, { "epoch": 0.48483805261496976, "grad_norm": 0.29296875, "learning_rate": 0.0001225476867488527, "loss": 1.0281, "step": 8450 }, { "epoch": 0.4851249390366354, "grad_norm": 0.298828125, "learning_rate": 0.00012245010797346296, "loss": 0.9882, "step": 8455 }, { "epoch": 0.4854118254583011, "grad_norm": 0.263671875, "learning_rate": 0.00012235250668218386, "loss": 1.0074, "step": 8460 }, { "epoch": 0.4856987118799667, "grad_norm": 0.265625, "learning_rate": 0.00012225488297290266, "loss": 0.9489, "step": 8465 }, { "epoch": 0.4859855983016324, "grad_norm": 0.2578125, "learning_rate": 0.00012215723694352916, "loss": 0.9546, "step": 8470 }, { "epoch": 0.48627248472329804, "grad_norm": 0.259765625, "learning_rate": 0.00012205956869199549, "loss": 0.9806, "step": 8475 }, { "epoch": 0.4865593711449637, "grad_norm": 0.2578125, "learning_rate": 0.00012196187831625605, "loss": 0.9646, "step": 8480 }, { "epoch": 0.48684625756662936, "grad_norm": 0.25, "learning_rate": 0.00012186416591428751, "loss": 0.9609, "step": 8485 }, { "epoch": 0.48713314398829505, "grad_norm": 0.263671875, "learning_rate": 0.00012176643158408853, "loss": 0.9037, "step": 8490 }, { "epoch": 0.4874200304099607, "grad_norm": 0.259765625, "learning_rate": 0.00012166867542367985, "loss": 0.927, "step": 8495 }, { "epoch": 0.48770691683162637, "grad_norm": 0.2578125, "learning_rate": 0.00012157089753110406, "loss": 0.9143, "step": 8500 }, { "epoch": 0.487993803253292, "grad_norm": 0.259765625, "learning_rate": 0.00012147309800442555, "loss": 1.0134, "step": 8505 }, { "epoch": 0.4882806896749577, "grad_norm": 0.259765625, "learning_rate": 0.00012137527694173038, "loss": 0.9424, "step": 8510 }, { "epoch": 0.4885675760966233, "grad_norm": 0.2470703125, "learning_rate": 0.00012127743444112629, "loss": 0.9291, "step": 8515 }, { "epoch": 0.488854462518289, "grad_norm": 0.265625, "learning_rate": 0.00012117957060074245, "loss": 0.9596, "step": 8520 }, { "epoch": 0.48914134893995465, "grad_norm": 0.259765625, "learning_rate": 0.00012108168551872944, "loss": 0.957, "step": 8525 }, { "epoch": 0.48942823536162033, "grad_norm": 0.287109375, "learning_rate": 0.00012098377929325917, "loss": 0.9623, "step": 8530 }, { "epoch": 0.489715121783286, "grad_norm": 0.302734375, "learning_rate": 0.00012088585202252474, "loss": 1.0269, "step": 8535 }, { "epoch": 0.49000200820495166, "grad_norm": 0.24609375, "learning_rate": 0.00012078790380474037, "loss": 0.9209, "step": 8540 }, { "epoch": 0.49028889462661734, "grad_norm": 0.314453125, "learning_rate": 0.00012068993473814126, "loss": 0.8656, "step": 8545 }, { "epoch": 0.490575781048283, "grad_norm": 0.271484375, "learning_rate": 0.00012059194492098351, "loss": 0.8839, "step": 8550 }, { "epoch": 0.49086266746994867, "grad_norm": 0.259765625, "learning_rate": 0.00012049393445154411, "loss": 0.9081, "step": 8555 }, { "epoch": 0.4911495538916143, "grad_norm": 0.259765625, "learning_rate": 0.0001203959034281207, "loss": 0.9642, "step": 8560 }, { "epoch": 0.49143644031328, "grad_norm": 0.251953125, "learning_rate": 0.00012029785194903153, "loss": 0.9145, "step": 8565 }, { "epoch": 0.4917233267349456, "grad_norm": 0.283203125, "learning_rate": 0.00012019978011261541, "loss": 0.927, "step": 8570 }, { "epoch": 0.4920102131566113, "grad_norm": 0.265625, "learning_rate": 0.00012010168801723149, "loss": 0.9539, "step": 8575 }, { "epoch": 0.49229709957827694, "grad_norm": 0.2578125, "learning_rate": 0.00012000357576125932, "loss": 0.856, "step": 8580 }, { "epoch": 0.49258398599994263, "grad_norm": 0.27734375, "learning_rate": 0.00011990544344309865, "loss": 0.9165, "step": 8585 }, { "epoch": 0.49287087242160826, "grad_norm": 0.28125, "learning_rate": 0.00011980729116116927, "loss": 0.9467, "step": 8590 }, { "epoch": 0.49315775884327395, "grad_norm": 0.251953125, "learning_rate": 0.00011970911901391113, "loss": 0.9961, "step": 8595 }, { "epoch": 0.4934446452649396, "grad_norm": 0.267578125, "learning_rate": 0.00011961092709978402, "loss": 0.9507, "step": 8600 }, { "epoch": 0.4937315316866053, "grad_norm": 0.2890625, "learning_rate": 0.00011951271551726755, "loss": 0.9668, "step": 8605 }, { "epoch": 0.49401841810827096, "grad_norm": 0.27734375, "learning_rate": 0.00011941448436486106, "loss": 1.0119, "step": 8610 }, { "epoch": 0.4943053045299366, "grad_norm": 0.271484375, "learning_rate": 0.00011931623374108358, "loss": 0.9826, "step": 8615 }, { "epoch": 0.4945921909516023, "grad_norm": 0.2734375, "learning_rate": 0.00011921796374447356, "loss": 0.9418, "step": 8620 }, { "epoch": 0.4948790773732679, "grad_norm": 0.26171875, "learning_rate": 0.00011911967447358901, "loss": 0.9033, "step": 8625 }, { "epoch": 0.4951659637949336, "grad_norm": 0.265625, "learning_rate": 0.00011902136602700711, "loss": 0.9738, "step": 8630 }, { "epoch": 0.49545285021659924, "grad_norm": 0.263671875, "learning_rate": 0.00011892303850332443, "loss": 0.952, "step": 8635 }, { "epoch": 0.4957397366382649, "grad_norm": 0.314453125, "learning_rate": 0.00011882469200115656, "loss": 0.9833, "step": 8640 }, { "epoch": 0.49602662305993056, "grad_norm": 0.271484375, "learning_rate": 0.00011872632661913823, "loss": 1.0069, "step": 8645 }, { "epoch": 0.49631350948159625, "grad_norm": 0.2890625, "learning_rate": 0.00011862794245592301, "loss": 0.8768, "step": 8650 }, { "epoch": 0.4966003959032619, "grad_norm": 0.306640625, "learning_rate": 0.00011852953961018332, "loss": 1.0011, "step": 8655 }, { "epoch": 0.49688728232492757, "grad_norm": 0.271484375, "learning_rate": 0.00011843111818061036, "loss": 0.981, "step": 8660 }, { "epoch": 0.4971741687465932, "grad_norm": 0.267578125, "learning_rate": 0.00011833267826591394, "loss": 0.9317, "step": 8665 }, { "epoch": 0.4974610551682589, "grad_norm": 0.27734375, "learning_rate": 0.0001182342199648224, "loss": 0.9526, "step": 8670 }, { "epoch": 0.4977479415899245, "grad_norm": 0.263671875, "learning_rate": 0.00011813574337608258, "loss": 0.9783, "step": 8675 }, { "epoch": 0.4980348280115902, "grad_norm": 0.291015625, "learning_rate": 0.00011803724859845952, "loss": 1.0333, "step": 8680 }, { "epoch": 0.49832171443325585, "grad_norm": 0.2421875, "learning_rate": 0.00011793873573073673, "loss": 0.9438, "step": 8685 }, { "epoch": 0.49860860085492154, "grad_norm": 0.267578125, "learning_rate": 0.00011784020487171566, "loss": 0.9776, "step": 8690 }, { "epoch": 0.4988954872765872, "grad_norm": 0.2890625, "learning_rate": 0.00011774165612021585, "loss": 0.9559, "step": 8695 }, { "epoch": 0.49918237369825286, "grad_norm": 0.283203125, "learning_rate": 0.00011764308957507488, "loss": 0.9299, "step": 8700 }, { "epoch": 0.49946926011991855, "grad_norm": 0.279296875, "learning_rate": 0.00011754450533514807, "loss": 0.95, "step": 8705 }, { "epoch": 0.4997561465415842, "grad_norm": 0.263671875, "learning_rate": 0.00011744590349930849, "loss": 1.001, "step": 8710 }, { "epoch": 0.5000430329632498, "grad_norm": 0.267578125, "learning_rate": 0.00011734728416644694, "loss": 0.8962, "step": 8715 }, { "epoch": 0.5003299193849156, "grad_norm": 0.25, "learning_rate": 0.00011724864743547168, "loss": 0.8902, "step": 8720 }, { "epoch": 0.5006168058065812, "grad_norm": 0.28125, "learning_rate": 0.0001171499934053085, "loss": 0.9538, "step": 8725 }, { "epoch": 0.5009036922282468, "grad_norm": 0.259765625, "learning_rate": 0.00011705132217490047, "loss": 0.8992, "step": 8730 }, { "epoch": 0.5011905786499125, "grad_norm": 0.2734375, "learning_rate": 0.00011695263384320794, "loss": 0.9484, "step": 8735 }, { "epoch": 0.5014774650715782, "grad_norm": 0.26171875, "learning_rate": 0.00011685392850920842, "loss": 0.971, "step": 8740 }, { "epoch": 0.5017643514932438, "grad_norm": 0.28125, "learning_rate": 0.00011675520627189648, "loss": 0.9143, "step": 8745 }, { "epoch": 0.5020512379149095, "grad_norm": 0.30078125, "learning_rate": 0.00011665646723028359, "loss": 0.9677, "step": 8750 }, { "epoch": 0.5023381243365751, "grad_norm": 0.306640625, "learning_rate": 0.00011655771148339812, "loss": 0.9715, "step": 8755 }, { "epoch": 0.5026250107582408, "grad_norm": 0.2578125, "learning_rate": 0.00011645893913028514, "loss": 0.9105, "step": 8760 }, { "epoch": 0.5029118971799065, "grad_norm": 0.263671875, "learning_rate": 0.00011636015027000651, "loss": 0.8818, "step": 8765 }, { "epoch": 0.5031987836015721, "grad_norm": 0.279296875, "learning_rate": 0.00011626134500164047, "loss": 0.9266, "step": 8770 }, { "epoch": 0.5034856700232379, "grad_norm": 0.271484375, "learning_rate": 0.00011616252342428184, "loss": 0.9334, "step": 8775 }, { "epoch": 0.5037725564449035, "grad_norm": 0.271484375, "learning_rate": 0.00011606368563704177, "loss": 0.9179, "step": 8780 }, { "epoch": 0.5040594428665691, "grad_norm": 0.26171875, "learning_rate": 0.00011596483173904762, "loss": 0.9467, "step": 8785 }, { "epoch": 0.5043463292882348, "grad_norm": 0.2578125, "learning_rate": 0.00011586596182944293, "loss": 0.9149, "step": 8790 }, { "epoch": 0.5046332157099005, "grad_norm": 0.263671875, "learning_rate": 0.00011576707600738739, "loss": 0.9662, "step": 8795 }, { "epoch": 0.5049201021315661, "grad_norm": 0.26171875, "learning_rate": 0.00011566817437205643, "loss": 0.9375, "step": 8800 }, { "epoch": 0.5052069885532318, "grad_norm": 0.26953125, "learning_rate": 0.0001155692570226416, "loss": 0.9094, "step": 8805 }, { "epoch": 0.5054938749748974, "grad_norm": 0.265625, "learning_rate": 0.00011547032405835005, "loss": 0.9598, "step": 8810 }, { "epoch": 0.5057807613965631, "grad_norm": 0.265625, "learning_rate": 0.00011537137557840463, "loss": 0.9268, "step": 8815 }, { "epoch": 0.5060676478182288, "grad_norm": 0.259765625, "learning_rate": 0.00011527241168204375, "loss": 0.9564, "step": 8820 }, { "epoch": 0.5063545342398944, "grad_norm": 0.2890625, "learning_rate": 0.00011517343246852126, "loss": 1.0083, "step": 8825 }, { "epoch": 0.50664142066156, "grad_norm": 0.27734375, "learning_rate": 0.00011507443803710643, "loss": 1.0126, "step": 8830 }, { "epoch": 0.5069283070832258, "grad_norm": 0.259765625, "learning_rate": 0.00011497542848708374, "loss": 0.8726, "step": 8835 }, { "epoch": 0.5072151935048914, "grad_norm": 0.291015625, "learning_rate": 0.00011487640391775283, "loss": 0.9107, "step": 8840 }, { "epoch": 0.507502079926557, "grad_norm": 0.255859375, "learning_rate": 0.00011477736442842846, "loss": 0.9928, "step": 8845 }, { "epoch": 0.5077889663482228, "grad_norm": 0.2734375, "learning_rate": 0.00011467831011844027, "loss": 0.961, "step": 8850 }, { "epoch": 0.5080758527698884, "grad_norm": 0.29296875, "learning_rate": 0.00011457924108713287, "loss": 0.9664, "step": 8855 }, { "epoch": 0.5083627391915541, "grad_norm": 0.2734375, "learning_rate": 0.00011448015743386553, "loss": 0.9368, "step": 8860 }, { "epoch": 0.5086496256132197, "grad_norm": 0.412109375, "learning_rate": 0.00011438105925801224, "loss": 0.9562, "step": 8865 }, { "epoch": 0.5089365120348854, "grad_norm": 0.279296875, "learning_rate": 0.00011428194665896155, "loss": 0.8924, "step": 8870 }, { "epoch": 0.5092233984565511, "grad_norm": 0.2470703125, "learning_rate": 0.00011418281973611647, "loss": 0.9168, "step": 8875 }, { "epoch": 0.5095102848782167, "grad_norm": 0.2578125, "learning_rate": 0.00011408367858889437, "loss": 0.9377, "step": 8880 }, { "epoch": 0.5097971712998823, "grad_norm": 0.267578125, "learning_rate": 0.00011398452331672689, "loss": 1.0082, "step": 8885 }, { "epoch": 0.5100840577215481, "grad_norm": 0.26171875, "learning_rate": 0.00011388535401905985, "loss": 0.901, "step": 8890 }, { "epoch": 0.5103709441432137, "grad_norm": 0.26171875, "learning_rate": 0.00011378617079535312, "loss": 0.9243, "step": 8895 }, { "epoch": 0.5106578305648793, "grad_norm": 0.291015625, "learning_rate": 0.00011368697374508052, "loss": 0.9491, "step": 8900 }, { "epoch": 0.510944716986545, "grad_norm": 0.263671875, "learning_rate": 0.00011358776296772982, "loss": 0.973, "step": 8905 }, { "epoch": 0.5112316034082107, "grad_norm": 0.287109375, "learning_rate": 0.00011348853856280244, "loss": 0.9177, "step": 8910 }, { "epoch": 0.5115184898298764, "grad_norm": 0.26171875, "learning_rate": 0.00011338930062981352, "loss": 0.969, "step": 8915 }, { "epoch": 0.511805376251542, "grad_norm": 0.2734375, "learning_rate": 0.0001132900492682918, "loss": 0.9672, "step": 8920 }, { "epoch": 0.5120922626732077, "grad_norm": 0.30078125, "learning_rate": 0.00011319078457777947, "loss": 0.9438, "step": 8925 }, { "epoch": 0.5123791490948734, "grad_norm": 0.28515625, "learning_rate": 0.00011309150665783204, "loss": 0.9689, "step": 8930 }, { "epoch": 0.512666035516539, "grad_norm": 0.279296875, "learning_rate": 0.00011299221560801836, "loss": 0.9375, "step": 8935 }, { "epoch": 0.5129529219382046, "grad_norm": 0.271484375, "learning_rate": 0.00011289291152792038, "loss": 0.9468, "step": 8940 }, { "epoch": 0.5132398083598704, "grad_norm": 0.2578125, "learning_rate": 0.00011279359451713318, "loss": 0.9729, "step": 8945 }, { "epoch": 0.513526694781536, "grad_norm": 0.263671875, "learning_rate": 0.00011269426467526477, "loss": 0.9528, "step": 8950 }, { "epoch": 0.5138135812032016, "grad_norm": 0.265625, "learning_rate": 0.00011259492210193603, "loss": 0.9191, "step": 8955 }, { "epoch": 0.5141004676248673, "grad_norm": 0.255859375, "learning_rate": 0.00011249556689678063, "loss": 0.9982, "step": 8960 }, { "epoch": 0.514387354046533, "grad_norm": 0.26171875, "learning_rate": 0.00011239619915944488, "loss": 0.9644, "step": 8965 }, { "epoch": 0.5146742404681987, "grad_norm": 0.259765625, "learning_rate": 0.00011229681898958775, "loss": 0.9712, "step": 8970 }, { "epoch": 0.5149611268898643, "grad_norm": 0.267578125, "learning_rate": 0.0001121974264868805, "loss": 0.9015, "step": 8975 }, { "epoch": 0.5152480133115299, "grad_norm": 0.26171875, "learning_rate": 0.00011209802175100692, "loss": 0.8879, "step": 8980 }, { "epoch": 0.5155348997331957, "grad_norm": 0.2890625, "learning_rate": 0.00011199860488166302, "loss": 1.0174, "step": 8985 }, { "epoch": 0.5158217861548613, "grad_norm": 0.287109375, "learning_rate": 0.00011189917597855694, "loss": 0.9896, "step": 8990 }, { "epoch": 0.5161086725765269, "grad_norm": 0.259765625, "learning_rate": 0.00011179973514140896, "loss": 0.9473, "step": 8995 }, { "epoch": 0.5163955589981926, "grad_norm": 0.275390625, "learning_rate": 0.00011170028246995123, "loss": 0.9281, "step": 9000 }, { "epoch": 0.5166824454198583, "grad_norm": 0.310546875, "learning_rate": 0.00011160081806392788, "loss": 1.0304, "step": 9005 }, { "epoch": 0.5169693318415239, "grad_norm": 0.267578125, "learning_rate": 0.00011150134202309474, "loss": 0.8893, "step": 9010 }, { "epoch": 0.5172562182631896, "grad_norm": 0.28125, "learning_rate": 0.00011140185444721937, "loss": 0.9668, "step": 9015 }, { "epoch": 0.5175431046848553, "grad_norm": 0.283203125, "learning_rate": 0.00011130235543608081, "loss": 0.9933, "step": 9020 }, { "epoch": 0.517829991106521, "grad_norm": 0.26953125, "learning_rate": 0.00011120284508946959, "loss": 0.9641, "step": 9025 }, { "epoch": 0.5181168775281866, "grad_norm": 0.25, "learning_rate": 0.00011110332350718768, "loss": 0.8897, "step": 9030 }, { "epoch": 0.5184037639498522, "grad_norm": 0.2890625, "learning_rate": 0.00011100379078904828, "loss": 0.9597, "step": 9035 }, { "epoch": 0.518690650371518, "grad_norm": 0.28125, "learning_rate": 0.00011090424703487569, "loss": 1.0005, "step": 9040 }, { "epoch": 0.5189775367931836, "grad_norm": 0.2890625, "learning_rate": 0.0001108046923445054, "loss": 0.9615, "step": 9045 }, { "epoch": 0.5192644232148492, "grad_norm": 0.271484375, "learning_rate": 0.00011070512681778375, "loss": 0.9897, "step": 9050 }, { "epoch": 0.5195513096365149, "grad_norm": 0.275390625, "learning_rate": 0.00011060555055456807, "loss": 0.9487, "step": 9055 }, { "epoch": 0.5198381960581806, "grad_norm": 0.275390625, "learning_rate": 0.00011050596365472637, "loss": 0.9722, "step": 9060 }, { "epoch": 0.5201250824798462, "grad_norm": 0.271484375, "learning_rate": 0.00011040636621813736, "loss": 0.9519, "step": 9065 }, { "epoch": 0.5204119689015119, "grad_norm": 0.271484375, "learning_rate": 0.00011030675834469026, "loss": 0.9566, "step": 9070 }, { "epoch": 0.5206988553231775, "grad_norm": 0.26953125, "learning_rate": 0.00011020714013428484, "loss": 0.9619, "step": 9075 }, { "epoch": 0.5209857417448432, "grad_norm": 0.279296875, "learning_rate": 0.0001101075116868312, "loss": 0.9772, "step": 9080 }, { "epoch": 0.5212726281665089, "grad_norm": 0.294921875, "learning_rate": 0.0001100078731022497, "loss": 0.9155, "step": 9085 }, { "epoch": 0.5215595145881745, "grad_norm": 0.25, "learning_rate": 0.00010990822448047089, "loss": 0.9067, "step": 9090 }, { "epoch": 0.5218464010098403, "grad_norm": 0.267578125, "learning_rate": 0.00010980856592143538, "loss": 0.9713, "step": 9095 }, { "epoch": 0.5221332874315059, "grad_norm": 0.26953125, "learning_rate": 0.00010970889752509374, "loss": 0.945, "step": 9100 }, { "epoch": 0.5224201738531715, "grad_norm": 0.255859375, "learning_rate": 0.00010960921939140638, "loss": 0.9593, "step": 9105 }, { "epoch": 0.5227070602748372, "grad_norm": 0.2734375, "learning_rate": 0.00010950953162034357, "loss": 0.9456, "step": 9110 }, { "epoch": 0.5229939466965029, "grad_norm": 0.25, "learning_rate": 0.00010940983431188508, "loss": 0.9054, "step": 9115 }, { "epoch": 0.5232808331181685, "grad_norm": 0.275390625, "learning_rate": 0.00010931012756602039, "loss": 1.0006, "step": 9120 }, { "epoch": 0.5235677195398342, "grad_norm": 0.275390625, "learning_rate": 0.00010921041148274838, "loss": 0.9529, "step": 9125 }, { "epoch": 0.5238546059614998, "grad_norm": 0.265625, "learning_rate": 0.00010911068616207736, "loss": 0.9472, "step": 9130 }, { "epoch": 0.5241414923831655, "grad_norm": 0.28515625, "learning_rate": 0.00010901095170402479, "loss": 0.9863, "step": 9135 }, { "epoch": 0.5244283788048312, "grad_norm": 0.267578125, "learning_rate": 0.00010891120820861745, "loss": 1.0031, "step": 9140 }, { "epoch": 0.5247152652264968, "grad_norm": 0.265625, "learning_rate": 0.00010881145577589103, "loss": 0.9824, "step": 9145 }, { "epoch": 0.5250021516481624, "grad_norm": 0.29296875, "learning_rate": 0.00010871169450589025, "loss": 0.9784, "step": 9150 }, { "epoch": 0.5252890380698282, "grad_norm": 0.259765625, "learning_rate": 0.00010861192449866871, "loss": 0.9328, "step": 9155 }, { "epoch": 0.5255759244914938, "grad_norm": 0.255859375, "learning_rate": 0.00010851214585428878, "loss": 0.967, "step": 9160 }, { "epoch": 0.5258628109131595, "grad_norm": 0.2734375, "learning_rate": 0.00010841235867282137, "loss": 0.9627, "step": 9165 }, { "epoch": 0.5261496973348252, "grad_norm": 0.28515625, "learning_rate": 0.00010831256305434616, "loss": 0.8964, "step": 9170 }, { "epoch": 0.5264365837564908, "grad_norm": 0.2734375, "learning_rate": 0.00010821275909895115, "loss": 0.9556, "step": 9175 }, { "epoch": 0.5267234701781565, "grad_norm": 0.275390625, "learning_rate": 0.00010811294690673271, "loss": 0.93, "step": 9180 }, { "epoch": 0.5270103565998221, "grad_norm": 0.2734375, "learning_rate": 0.00010801312657779547, "loss": 0.9891, "step": 9185 }, { "epoch": 0.5272972430214878, "grad_norm": 0.34375, "learning_rate": 0.00010791329821225232, "loss": 0.92, "step": 9190 }, { "epoch": 0.5275841294431535, "grad_norm": 0.263671875, "learning_rate": 0.00010781346191022405, "loss": 0.9738, "step": 9195 }, { "epoch": 0.5278710158648191, "grad_norm": 0.271484375, "learning_rate": 0.00010771361777183957, "loss": 1.0151, "step": 9200 }, { "epoch": 0.5281579022864847, "grad_norm": 0.26953125, "learning_rate": 0.00010761376589723553, "loss": 1.0087, "step": 9205 }, { "epoch": 0.5284447887081505, "grad_norm": 0.283203125, "learning_rate": 0.00010751390638655638, "loss": 0.9975, "step": 9210 }, { "epoch": 0.5287316751298161, "grad_norm": 0.275390625, "learning_rate": 0.00010741403933995424, "loss": 0.9519, "step": 9215 }, { "epoch": 0.5290185615514817, "grad_norm": 0.25390625, "learning_rate": 0.00010731416485758879, "loss": 0.889, "step": 9220 }, { "epoch": 0.5293054479731474, "grad_norm": 0.2734375, "learning_rate": 0.00010721428303962713, "loss": 1.0501, "step": 9225 }, { "epoch": 0.5295923343948131, "grad_norm": 0.279296875, "learning_rate": 0.00010711439398624377, "loss": 0.8589, "step": 9230 }, { "epoch": 0.5298792208164788, "grad_norm": 0.271484375, "learning_rate": 0.00010701449779762046, "loss": 0.9429, "step": 9235 }, { "epoch": 0.5301661072381444, "grad_norm": 0.263671875, "learning_rate": 0.00010691459457394604, "loss": 0.9643, "step": 9240 }, { "epoch": 0.5304529936598101, "grad_norm": 0.294921875, "learning_rate": 0.00010681468441541648, "loss": 0.9754, "step": 9245 }, { "epoch": 0.5307398800814758, "grad_norm": 0.28125, "learning_rate": 0.00010671476742223474, "loss": 0.9067, "step": 9250 }, { "epoch": 0.5310267665031414, "grad_norm": 0.30078125, "learning_rate": 0.00010661484369461052, "loss": 0.9117, "step": 9255 }, { "epoch": 0.531313652924807, "grad_norm": 0.302734375, "learning_rate": 0.00010651491333276036, "loss": 0.981, "step": 9260 }, { "epoch": 0.5316005393464728, "grad_norm": 0.265625, "learning_rate": 0.00010641497643690743, "loss": 0.9504, "step": 9265 }, { "epoch": 0.5318874257681384, "grad_norm": 0.267578125, "learning_rate": 0.00010631503310728146, "loss": 0.9156, "step": 9270 }, { "epoch": 0.532174312189804, "grad_norm": 0.259765625, "learning_rate": 0.00010621508344411861, "loss": 0.9353, "step": 9275 }, { "epoch": 0.5324611986114697, "grad_norm": 0.29296875, "learning_rate": 0.0001061151275476614, "loss": 0.8923, "step": 9280 }, { "epoch": 0.5327480850331354, "grad_norm": 0.25390625, "learning_rate": 0.00010601516551815865, "loss": 0.8837, "step": 9285 }, { "epoch": 0.533034971454801, "grad_norm": 0.2578125, "learning_rate": 0.00010591519745586522, "loss": 0.9026, "step": 9290 }, { "epoch": 0.5333218578764667, "grad_norm": 0.265625, "learning_rate": 0.00010581522346104215, "loss": 0.926, "step": 9295 }, { "epoch": 0.5336087442981323, "grad_norm": 0.263671875, "learning_rate": 0.00010571524363395635, "loss": 0.9403, "step": 9300 }, { "epoch": 0.5338956307197981, "grad_norm": 0.28125, "learning_rate": 0.00010561525807488062, "loss": 0.9236, "step": 9305 }, { "epoch": 0.5341825171414637, "grad_norm": 0.27734375, "learning_rate": 0.00010551526688409346, "loss": 0.9872, "step": 9310 }, { "epoch": 0.5344694035631293, "grad_norm": 0.26953125, "learning_rate": 0.00010541527016187903, "loss": 0.9445, "step": 9315 }, { "epoch": 0.5347562899847951, "grad_norm": 0.251953125, "learning_rate": 0.00010531526800852709, "loss": 0.9635, "step": 9320 }, { "epoch": 0.5350431764064607, "grad_norm": 0.248046875, "learning_rate": 0.00010521526052433282, "loss": 0.9379, "step": 9325 }, { "epoch": 0.5353300628281263, "grad_norm": 0.26171875, "learning_rate": 0.00010511524780959667, "loss": 0.9446, "step": 9330 }, { "epoch": 0.535616949249792, "grad_norm": 0.291015625, "learning_rate": 0.0001050152299646245, "loss": 0.9563, "step": 9335 }, { "epoch": 0.5359038356714577, "grad_norm": 0.26171875, "learning_rate": 0.00010491520708972716, "loss": 0.9867, "step": 9340 }, { "epoch": 0.5361907220931234, "grad_norm": 0.2734375, "learning_rate": 0.0001048151792852206, "loss": 0.8738, "step": 9345 }, { "epoch": 0.536477608514789, "grad_norm": 0.279296875, "learning_rate": 0.00010471514665142572, "loss": 1.0163, "step": 9350 }, { "epoch": 0.5367644949364546, "grad_norm": 0.2734375, "learning_rate": 0.00010461510928866828, "loss": 0.9815, "step": 9355 }, { "epoch": 0.5370513813581204, "grad_norm": 0.27734375, "learning_rate": 0.00010451506729727875, "loss": 0.9558, "step": 9360 }, { "epoch": 0.537338267779786, "grad_norm": 0.2578125, "learning_rate": 0.00010441502077759229, "loss": 0.9601, "step": 9365 }, { "epoch": 0.5376251542014516, "grad_norm": 0.2578125, "learning_rate": 0.00010431496982994848, "loss": 0.9688, "step": 9370 }, { "epoch": 0.5379120406231173, "grad_norm": 0.318359375, "learning_rate": 0.00010421491455469153, "loss": 0.9825, "step": 9375 }, { "epoch": 0.538198927044783, "grad_norm": 0.28125, "learning_rate": 0.00010411485505216984, "loss": 1.0181, "step": 9380 }, { "epoch": 0.5384858134664486, "grad_norm": 0.271484375, "learning_rate": 0.00010401479142273611, "loss": 0.9328, "step": 9385 }, { "epoch": 0.5387726998881143, "grad_norm": 0.279296875, "learning_rate": 0.00010391472376674716, "loss": 0.9125, "step": 9390 }, { "epoch": 0.5390595863097799, "grad_norm": 0.265625, "learning_rate": 0.00010381465218456383, "loss": 0.9773, "step": 9395 }, { "epoch": 0.5393464727314456, "grad_norm": 0.29296875, "learning_rate": 0.00010371457677655096, "loss": 0.9982, "step": 9400 }, { "epoch": 0.5396333591531113, "grad_norm": 0.259765625, "learning_rate": 0.0001036144976430772, "loss": 0.9841, "step": 9405 }, { "epoch": 0.5399202455747769, "grad_norm": 0.283203125, "learning_rate": 0.00010351441488451486, "loss": 0.8917, "step": 9410 }, { "epoch": 0.5402071319964427, "grad_norm": 0.26171875, "learning_rate": 0.00010341432860124003, "loss": 0.9447, "step": 9415 }, { "epoch": 0.5404940184181083, "grad_norm": 0.26953125, "learning_rate": 0.00010331423889363223, "loss": 0.9624, "step": 9420 }, { "epoch": 0.5407809048397739, "grad_norm": 0.31640625, "learning_rate": 0.00010321414586207443, "loss": 0.9845, "step": 9425 }, { "epoch": 0.5410677912614396, "grad_norm": 0.265625, "learning_rate": 0.00010311404960695299, "loss": 0.9207, "step": 9430 }, { "epoch": 0.5413546776831053, "grad_norm": 0.2890625, "learning_rate": 0.00010301395022865738, "loss": 0.9677, "step": 9435 }, { "epoch": 0.5416415641047709, "grad_norm": 0.296875, "learning_rate": 0.00010291384782758034, "loss": 0.9794, "step": 9440 }, { "epoch": 0.5419284505264366, "grad_norm": 0.263671875, "learning_rate": 0.00010281374250411755, "loss": 0.963, "step": 9445 }, { "epoch": 0.5422153369481022, "grad_norm": 0.255859375, "learning_rate": 0.00010271363435866765, "loss": 0.9774, "step": 9450 }, { "epoch": 0.5425022233697679, "grad_norm": 0.271484375, "learning_rate": 0.00010261352349163218, "loss": 0.9736, "step": 9455 }, { "epoch": 0.5427891097914336, "grad_norm": 0.263671875, "learning_rate": 0.00010251341000341528, "loss": 0.9856, "step": 9460 }, { "epoch": 0.5430759962130992, "grad_norm": 0.267578125, "learning_rate": 0.00010241329399442379, "loss": 0.937, "step": 9465 }, { "epoch": 0.5433628826347648, "grad_norm": 0.2734375, "learning_rate": 0.00010231317556506708, "loss": 1.0404, "step": 9470 }, { "epoch": 0.5436497690564306, "grad_norm": 0.255859375, "learning_rate": 0.00010221305481575696, "loss": 0.9899, "step": 9475 }, { "epoch": 0.5439366554780962, "grad_norm": 0.283203125, "learning_rate": 0.00010211293184690751, "loss": 0.9576, "step": 9480 }, { "epoch": 0.5442235418997619, "grad_norm": 0.255859375, "learning_rate": 0.00010201280675893507, "loss": 0.9129, "step": 9485 }, { "epoch": 0.5445104283214276, "grad_norm": 0.32421875, "learning_rate": 0.00010191267965225811, "loss": 0.9381, "step": 9490 }, { "epoch": 0.5447973147430932, "grad_norm": 0.267578125, "learning_rate": 0.00010181255062729713, "loss": 0.8722, "step": 9495 }, { "epoch": 0.5450842011647589, "grad_norm": 0.314453125, "learning_rate": 0.00010171241978447455, "loss": 0.9259, "step": 9500 }, { "epoch": 0.5453710875864245, "grad_norm": 0.27734375, "learning_rate": 0.0001016122872242146, "loss": 0.914, "step": 9505 }, { "epoch": 0.5456579740080902, "grad_norm": 0.283203125, "learning_rate": 0.00010151215304694324, "loss": 0.9249, "step": 9510 }, { "epoch": 0.5459448604297559, "grad_norm": 0.2451171875, "learning_rate": 0.00010141201735308805, "loss": 0.9072, "step": 9515 }, { "epoch": 0.5462317468514215, "grad_norm": 0.265625, "learning_rate": 0.00010131188024307817, "loss": 0.9015, "step": 9520 }, { "epoch": 0.5465186332730871, "grad_norm": 0.283203125, "learning_rate": 0.00010121174181734405, "loss": 0.9654, "step": 9525 }, { "epoch": 0.5468055196947529, "grad_norm": 0.314453125, "learning_rate": 0.00010111160217631756, "loss": 0.9653, "step": 9530 }, { "epoch": 0.5470924061164185, "grad_norm": 0.2734375, "learning_rate": 0.00010101146142043178, "loss": 1.0, "step": 9535 }, { "epoch": 0.5473792925380842, "grad_norm": 0.326171875, "learning_rate": 0.0001009113196501209, "loss": 0.8986, "step": 9540 }, { "epoch": 0.5476661789597498, "grad_norm": 0.25390625, "learning_rate": 0.0001008111769658201, "loss": 0.9, "step": 9545 }, { "epoch": 0.5479530653814155, "grad_norm": 0.251953125, "learning_rate": 0.00010071103346796549, "loss": 0.9077, "step": 9550 }, { "epoch": 0.5482399518030812, "grad_norm": 0.263671875, "learning_rate": 0.000100610889256994, "loss": 0.9169, "step": 9555 }, { "epoch": 0.5485268382247468, "grad_norm": 0.2578125, "learning_rate": 0.00010051074443334327, "loss": 0.9981, "step": 9560 }, { "epoch": 0.5488137246464125, "grad_norm": 0.267578125, "learning_rate": 0.00010041059909745156, "loss": 0.9127, "step": 9565 }, { "epoch": 0.5491006110680782, "grad_norm": 0.265625, "learning_rate": 0.00010031045334975768, "loss": 0.9588, "step": 9570 }, { "epoch": 0.5493874974897438, "grad_norm": 0.265625, "learning_rate": 0.00010021030729070076, "loss": 0.9944, "step": 9575 }, { "epoch": 0.5496743839114094, "grad_norm": 0.267578125, "learning_rate": 0.00010011016102072033, "loss": 0.9086, "step": 9580 }, { "epoch": 0.5499612703330752, "grad_norm": 0.263671875, "learning_rate": 0.0001000100146402561, "loss": 0.9508, "step": 9585 }, { "epoch": 0.5502481567547408, "grad_norm": 0.251953125, "learning_rate": 9.990986824974788e-05, "loss": 0.8669, "step": 9590 }, { "epoch": 0.5505350431764064, "grad_norm": 0.26953125, "learning_rate": 9.980972194963552e-05, "loss": 0.9624, "step": 9595 }, { "epoch": 0.5508219295980721, "grad_norm": 0.28515625, "learning_rate": 9.970957584035873e-05, "loss": 0.9104, "step": 9600 }, { "epoch": 0.5511088160197378, "grad_norm": 0.291015625, "learning_rate": 9.96094300223571e-05, "loss": 1.0336, "step": 9605 }, { "epoch": 0.5513957024414035, "grad_norm": 0.275390625, "learning_rate": 9.950928459606984e-05, "loss": 0.929, "step": 9610 }, { "epoch": 0.5516825888630691, "grad_norm": 0.2578125, "learning_rate": 9.940913966193586e-05, "loss": 0.8959, "step": 9615 }, { "epoch": 0.5519694752847347, "grad_norm": 0.3046875, "learning_rate": 9.930899532039347e-05, "loss": 1.0277, "step": 9620 }, { "epoch": 0.5522563617064005, "grad_norm": 0.267578125, "learning_rate": 9.920885167188054e-05, "loss": 0.9278, "step": 9625 }, { "epoch": 0.5525432481280661, "grad_norm": 0.25390625, "learning_rate": 9.910870881683402e-05, "loss": 0.9176, "step": 9630 }, { "epoch": 0.5528301345497317, "grad_norm": 0.267578125, "learning_rate": 9.900856685569027e-05, "loss": 0.9702, "step": 9635 }, { "epoch": 0.5531170209713975, "grad_norm": 0.25390625, "learning_rate": 9.890842588888474e-05, "loss": 0.9026, "step": 9640 }, { "epoch": 0.5534039073930631, "grad_norm": 0.306640625, "learning_rate": 9.88082860168517e-05, "loss": 0.9785, "step": 9645 }, { "epoch": 0.5536907938147287, "grad_norm": 0.25390625, "learning_rate": 9.870814734002456e-05, "loss": 1.0336, "step": 9650 }, { "epoch": 0.5539776802363944, "grad_norm": 0.271484375, "learning_rate": 9.860800995883533e-05, "loss": 0.9716, "step": 9655 }, { "epoch": 0.5542645666580601, "grad_norm": 0.3046875, "learning_rate": 9.850787397371482e-05, "loss": 0.9849, "step": 9660 }, { "epoch": 0.5545514530797258, "grad_norm": 0.265625, "learning_rate": 9.840773948509243e-05, "loss": 0.989, "step": 9665 }, { "epoch": 0.5548383395013914, "grad_norm": 0.279296875, "learning_rate": 9.83076065933961e-05, "loss": 0.9193, "step": 9670 }, { "epoch": 0.555125225923057, "grad_norm": 0.259765625, "learning_rate": 9.820747539905202e-05, "loss": 1.0258, "step": 9675 }, { "epoch": 0.5554121123447228, "grad_norm": 0.26953125, "learning_rate": 9.810734600248486e-05, "loss": 0.8885, "step": 9680 }, { "epoch": 0.5556989987663884, "grad_norm": 0.2734375, "learning_rate": 9.800721850411743e-05, "loss": 0.9265, "step": 9685 }, { "epoch": 0.555985885188054, "grad_norm": 0.259765625, "learning_rate": 9.790709300437052e-05, "loss": 0.9511, "step": 9690 }, { "epoch": 0.5562727716097197, "grad_norm": 0.265625, "learning_rate": 9.780696960366311e-05, "loss": 0.9182, "step": 9695 }, { "epoch": 0.5565596580313854, "grad_norm": 0.2734375, "learning_rate": 9.770684840241191e-05, "loss": 0.9675, "step": 9700 }, { "epoch": 0.556846544453051, "grad_norm": 0.2578125, "learning_rate": 9.76067295010315e-05, "loss": 0.8975, "step": 9705 }, { "epoch": 0.5571334308747167, "grad_norm": 0.255859375, "learning_rate": 9.750661299993415e-05, "loss": 0.8839, "step": 9710 }, { "epoch": 0.5574203172963824, "grad_norm": 0.259765625, "learning_rate": 9.740649899952967e-05, "loss": 0.9149, "step": 9715 }, { "epoch": 0.557707203718048, "grad_norm": 0.259765625, "learning_rate": 9.73063876002255e-05, "loss": 0.914, "step": 9720 }, { "epoch": 0.5579940901397137, "grad_norm": 0.26953125, "learning_rate": 9.720627890242628e-05, "loss": 0.9043, "step": 9725 }, { "epoch": 0.5582809765613793, "grad_norm": 0.27734375, "learning_rate": 9.710617300653412e-05, "loss": 1.0333, "step": 9730 }, { "epoch": 0.5585678629830451, "grad_norm": 0.27734375, "learning_rate": 9.700607001294814e-05, "loss": 0.8718, "step": 9735 }, { "epoch": 0.5588547494047107, "grad_norm": 0.28125, "learning_rate": 9.690597002206477e-05, "loss": 0.9673, "step": 9740 }, { "epoch": 0.5591416358263763, "grad_norm": 0.2734375, "learning_rate": 9.68058731342772e-05, "loss": 0.9382, "step": 9745 }, { "epoch": 0.559428522248042, "grad_norm": 0.271484375, "learning_rate": 9.670577944997566e-05, "loss": 0.9278, "step": 9750 }, { "epoch": 0.5597154086697077, "grad_norm": 0.275390625, "learning_rate": 9.660568906954711e-05, "loss": 0.9444, "step": 9755 }, { "epoch": 0.5600022950913733, "grad_norm": 0.28515625, "learning_rate": 9.65056020933752e-05, "loss": 0.9539, "step": 9760 }, { "epoch": 0.560289181513039, "grad_norm": 0.287109375, "learning_rate": 9.640551862184021e-05, "loss": 0.9755, "step": 9765 }, { "epoch": 0.5605760679347046, "grad_norm": 0.32421875, "learning_rate": 9.630543875531879e-05, "loss": 0.9609, "step": 9770 }, { "epoch": 0.5608629543563703, "grad_norm": 0.2890625, "learning_rate": 9.620536259418416e-05, "loss": 0.8894, "step": 9775 }, { "epoch": 0.561149840778036, "grad_norm": 0.271484375, "learning_rate": 9.610529023880561e-05, "loss": 1.0661, "step": 9780 }, { "epoch": 0.5614367271997016, "grad_norm": 0.26953125, "learning_rate": 9.600522178954879e-05, "loss": 0.9475, "step": 9785 }, { "epoch": 0.5617236136213672, "grad_norm": 0.251953125, "learning_rate": 9.590515734677531e-05, "loss": 0.9669, "step": 9790 }, { "epoch": 0.562010500043033, "grad_norm": 0.27734375, "learning_rate": 9.580509701084286e-05, "loss": 0.915, "step": 9795 }, { "epoch": 0.5622973864646986, "grad_norm": 0.275390625, "learning_rate": 9.570504088210496e-05, "loss": 0.9512, "step": 9800 }, { "epoch": 0.5625842728863643, "grad_norm": 0.263671875, "learning_rate": 9.560498906091085e-05, "loss": 0.925, "step": 9805 }, { "epoch": 0.56287115930803, "grad_norm": 0.279296875, "learning_rate": 9.550494164760562e-05, "loss": 0.9762, "step": 9810 }, { "epoch": 0.5631580457296956, "grad_norm": 0.271484375, "learning_rate": 9.540489874252972e-05, "loss": 0.9598, "step": 9815 }, { "epoch": 0.5634449321513613, "grad_norm": 0.2578125, "learning_rate": 9.53048604460193e-05, "loss": 0.9919, "step": 9820 }, { "epoch": 0.5637318185730269, "grad_norm": 0.251953125, "learning_rate": 9.52048268584057e-05, "loss": 0.8728, "step": 9825 }, { "epoch": 0.5640187049946926, "grad_norm": 0.271484375, "learning_rate": 9.510479808001566e-05, "loss": 0.9707, "step": 9830 }, { "epoch": 0.5643055914163583, "grad_norm": 0.267578125, "learning_rate": 9.500477421117102e-05, "loss": 0.921, "step": 9835 }, { "epoch": 0.5645924778380239, "grad_norm": 0.259765625, "learning_rate": 9.490475535218875e-05, "loss": 0.9341, "step": 9840 }, { "epoch": 0.5648793642596895, "grad_norm": 0.267578125, "learning_rate": 9.480474160338082e-05, "loss": 0.9195, "step": 9845 }, { "epoch": 0.5651662506813553, "grad_norm": 0.30078125, "learning_rate": 9.470473306505392e-05, "loss": 0.8927, "step": 9850 }, { "epoch": 0.5654531371030209, "grad_norm": 0.2578125, "learning_rate": 9.460472983750977e-05, "loss": 0.9275, "step": 9855 }, { "epoch": 0.5657400235246866, "grad_norm": 0.294921875, "learning_rate": 9.450473202104448e-05, "loss": 0.9967, "step": 9860 }, { "epoch": 0.5660269099463522, "grad_norm": 0.296875, "learning_rate": 9.440473971594895e-05, "loss": 0.9919, "step": 9865 }, { "epoch": 0.5663137963680179, "grad_norm": 0.263671875, "learning_rate": 9.430475302250844e-05, "loss": 0.9789, "step": 9870 }, { "epoch": 0.5666006827896836, "grad_norm": 0.27734375, "learning_rate": 9.420477204100264e-05, "loss": 0.952, "step": 9875 }, { "epoch": 0.5668875692113492, "grad_norm": 0.263671875, "learning_rate": 9.41047968717055e-05, "loss": 0.9865, "step": 9880 }, { "epoch": 0.5671744556330149, "grad_norm": 0.265625, "learning_rate": 9.400482761488507e-05, "loss": 0.9289, "step": 9885 }, { "epoch": 0.5674613420546806, "grad_norm": 0.263671875, "learning_rate": 9.390486437080361e-05, "loss": 0.8945, "step": 9890 }, { "epoch": 0.5677482284763462, "grad_norm": 0.27734375, "learning_rate": 9.380490723971717e-05, "loss": 0.9242, "step": 9895 }, { "epoch": 0.5680351148980118, "grad_norm": 0.291015625, "learning_rate": 9.370495632187587e-05, "loss": 0.9461, "step": 9900 }, { "epoch": 0.5683220013196776, "grad_norm": 0.28125, "learning_rate": 9.360501171752339e-05, "loss": 0.9615, "step": 9905 }, { "epoch": 0.5686088877413432, "grad_norm": 0.26171875, "learning_rate": 9.35050735268973e-05, "loss": 0.9437, "step": 9910 }, { "epoch": 0.5688957741630088, "grad_norm": 0.26953125, "learning_rate": 9.340514185022851e-05, "loss": 0.9536, "step": 9915 }, { "epoch": 0.5691826605846745, "grad_norm": 0.259765625, "learning_rate": 9.330521678774157e-05, "loss": 0.9502, "step": 9920 }, { "epoch": 0.5694695470063402, "grad_norm": 0.25390625, "learning_rate": 9.320529843965432e-05, "loss": 0.9087, "step": 9925 }, { "epoch": 0.5697564334280059, "grad_norm": 0.265625, "learning_rate": 9.310538690617788e-05, "loss": 0.9406, "step": 9930 }, { "epoch": 0.5700433198496715, "grad_norm": 0.2578125, "learning_rate": 9.300548228751657e-05, "loss": 0.9437, "step": 9935 }, { "epoch": 0.5703302062713371, "grad_norm": 0.26171875, "learning_rate": 9.290558468386765e-05, "loss": 0.968, "step": 9940 }, { "epoch": 0.5706170926930029, "grad_norm": 0.318359375, "learning_rate": 9.280569419542154e-05, "loss": 1.0125, "step": 9945 }, { "epoch": 0.5709039791146685, "grad_norm": 0.2734375, "learning_rate": 9.270581092236134e-05, "loss": 0.9058, "step": 9950 }, { "epoch": 0.5711908655363341, "grad_norm": 0.267578125, "learning_rate": 9.260593496486302e-05, "loss": 0.9912, "step": 9955 }, { "epoch": 0.5714777519579999, "grad_norm": 0.26953125, "learning_rate": 9.250606642309523e-05, "loss": 0.9665, "step": 9960 }, { "epoch": 0.5717646383796655, "grad_norm": 0.267578125, "learning_rate": 9.240620539721904e-05, "loss": 0.9121, "step": 9965 }, { "epoch": 0.5720515248013311, "grad_norm": 0.26171875, "learning_rate": 9.23063519873882e-05, "loss": 0.9157, "step": 9970 }, { "epoch": 0.5723384112229968, "grad_norm": 0.265625, "learning_rate": 9.220650629374862e-05, "loss": 0.9182, "step": 9975 }, { "epoch": 0.5726252976446625, "grad_norm": 0.275390625, "learning_rate": 9.210666841643857e-05, "loss": 0.9516, "step": 9980 }, { "epoch": 0.5729121840663282, "grad_norm": 0.2578125, "learning_rate": 9.200683845558845e-05, "loss": 0.9678, "step": 9985 }, { "epoch": 0.5731990704879938, "grad_norm": 0.259765625, "learning_rate": 9.190701651132079e-05, "loss": 0.9701, "step": 9990 }, { "epoch": 0.5734859569096594, "grad_norm": 0.2578125, "learning_rate": 9.180720268374992e-05, "loss": 0.9737, "step": 9995 }, { "epoch": 0.5737728433313252, "grad_norm": 0.263671875, "learning_rate": 9.170739707298221e-05, "loss": 0.931, "step": 10000 }, { "epoch": 0.5740597297529908, "grad_norm": 0.298828125, "learning_rate": 9.160759977911576e-05, "loss": 0.8809, "step": 10005 }, { "epoch": 0.5743466161746564, "grad_norm": 0.2578125, "learning_rate": 9.150781090224015e-05, "loss": 0.9776, "step": 10010 }, { "epoch": 0.5746335025963221, "grad_norm": 0.26953125, "learning_rate": 9.140803054243677e-05, "loss": 0.9639, "step": 10015 }, { "epoch": 0.5749203890179878, "grad_norm": 0.28515625, "learning_rate": 9.130825879977828e-05, "loss": 0.9233, "step": 10020 }, { "epoch": 0.5752072754396534, "grad_norm": 0.26953125, "learning_rate": 9.12084957743288e-05, "loss": 0.9529, "step": 10025 }, { "epoch": 0.5754941618613191, "grad_norm": 0.2421875, "learning_rate": 9.110874156614362e-05, "loss": 0.9505, "step": 10030 }, { "epoch": 0.5757810482829848, "grad_norm": 0.330078125, "learning_rate": 9.100899627526933e-05, "loss": 0.915, "step": 10035 }, { "epoch": 0.5760679347046505, "grad_norm": 0.263671875, "learning_rate": 9.090926000174338e-05, "loss": 0.8983, "step": 10040 }, { "epoch": 0.5763548211263161, "grad_norm": 0.251953125, "learning_rate": 9.080953284559433e-05, "loss": 0.9099, "step": 10045 }, { "epoch": 0.5766417075479817, "grad_norm": 0.271484375, "learning_rate": 9.070981490684159e-05, "loss": 0.9564, "step": 10050 }, { "epoch": 0.5769285939696475, "grad_norm": 0.30859375, "learning_rate": 9.061010628549522e-05, "loss": 0.9255, "step": 10055 }, { "epoch": 0.5772154803913131, "grad_norm": 0.248046875, "learning_rate": 9.051040708155606e-05, "loss": 0.9096, "step": 10060 }, { "epoch": 0.5775023668129787, "grad_norm": 0.318359375, "learning_rate": 9.041071739501538e-05, "loss": 0.9518, "step": 10065 }, { "epoch": 0.5777892532346444, "grad_norm": 0.2578125, "learning_rate": 9.0311037325855e-05, "loss": 0.8909, "step": 10070 }, { "epoch": 0.5780761396563101, "grad_norm": 0.26171875, "learning_rate": 9.021136697404706e-05, "loss": 0.9982, "step": 10075 }, { "epoch": 0.5783630260779757, "grad_norm": 0.26171875, "learning_rate": 9.011170643955394e-05, "loss": 0.9137, "step": 10080 }, { "epoch": 0.5786499124996414, "grad_norm": 0.275390625, "learning_rate": 9.001205582232825e-05, "loss": 0.92, "step": 10085 }, { "epoch": 0.578936798921307, "grad_norm": 0.251953125, "learning_rate": 8.991241522231252e-05, "loss": 0.9488, "step": 10090 }, { "epoch": 0.5792236853429727, "grad_norm": 0.2412109375, "learning_rate": 8.981278473943936e-05, "loss": 0.9771, "step": 10095 }, { "epoch": 0.5795105717646384, "grad_norm": 0.2734375, "learning_rate": 8.971316447363115e-05, "loss": 0.9223, "step": 10100 }, { "epoch": 0.579797458186304, "grad_norm": 0.26953125, "learning_rate": 8.96135545248001e-05, "loss": 0.9181, "step": 10105 }, { "epoch": 0.5800843446079698, "grad_norm": 0.271484375, "learning_rate": 8.951395499284797e-05, "loss": 0.9816, "step": 10110 }, { "epoch": 0.5803712310296354, "grad_norm": 0.271484375, "learning_rate": 8.941436597766616e-05, "loss": 0.9894, "step": 10115 }, { "epoch": 0.580658117451301, "grad_norm": 0.2578125, "learning_rate": 8.93147875791355e-05, "loss": 0.9198, "step": 10120 }, { "epoch": 0.5809450038729667, "grad_norm": 0.26171875, "learning_rate": 8.921521989712611e-05, "loss": 0.9714, "step": 10125 }, { "epoch": 0.5812318902946324, "grad_norm": 0.251953125, "learning_rate": 8.91156630314975e-05, "loss": 0.9481, "step": 10130 }, { "epoch": 0.581518776716298, "grad_norm": 0.26953125, "learning_rate": 8.901611708209818e-05, "loss": 0.9611, "step": 10135 }, { "epoch": 0.5818056631379637, "grad_norm": 0.25390625, "learning_rate": 8.891658214876585e-05, "loss": 0.9474, "step": 10140 }, { "epoch": 0.5820925495596293, "grad_norm": 0.244140625, "learning_rate": 8.8817058331327e-05, "loss": 0.9427, "step": 10145 }, { "epoch": 0.582379435981295, "grad_norm": 0.27734375, "learning_rate": 8.871754572959716e-05, "loss": 0.9312, "step": 10150 }, { "epoch": 0.5826663224029607, "grad_norm": 0.251953125, "learning_rate": 8.861804444338045e-05, "loss": 0.9632, "step": 10155 }, { "epoch": 0.5829532088246263, "grad_norm": 0.255859375, "learning_rate": 8.851855457246972e-05, "loss": 0.9219, "step": 10160 }, { "epoch": 0.583240095246292, "grad_norm": 0.296875, "learning_rate": 8.841907621664638e-05, "loss": 0.937, "step": 10165 }, { "epoch": 0.5835269816679577, "grad_norm": 0.26171875, "learning_rate": 8.83196094756802e-05, "loss": 0.8499, "step": 10170 }, { "epoch": 0.5838138680896233, "grad_norm": 0.275390625, "learning_rate": 8.82201544493295e-05, "loss": 0.9717, "step": 10175 }, { "epoch": 0.584100754511289, "grad_norm": 0.2578125, "learning_rate": 8.812071123734058e-05, "loss": 0.9231, "step": 10180 }, { "epoch": 0.5843876409329546, "grad_norm": 0.271484375, "learning_rate": 8.802127993944814e-05, "loss": 0.9177, "step": 10185 }, { "epoch": 0.5846745273546203, "grad_norm": 0.251953125, "learning_rate": 8.792186065537473e-05, "loss": 0.9794, "step": 10190 }, { "epoch": 0.584961413776286, "grad_norm": 0.26171875, "learning_rate": 8.782245348483104e-05, "loss": 0.998, "step": 10195 }, { "epoch": 0.5852483001979516, "grad_norm": 0.25390625, "learning_rate": 8.772305852751542e-05, "loss": 0.9513, "step": 10200 }, { "epoch": 0.5855351866196173, "grad_norm": 0.298828125, "learning_rate": 8.762367588311414e-05, "loss": 0.8855, "step": 10205 }, { "epoch": 0.585822073041283, "grad_norm": 0.2890625, "learning_rate": 8.752430565130103e-05, "loss": 0.9304, "step": 10210 }, { "epoch": 0.5861089594629486, "grad_norm": 0.27734375, "learning_rate": 8.742494793173743e-05, "loss": 0.9583, "step": 10215 }, { "epoch": 0.5863958458846142, "grad_norm": 0.2578125, "learning_rate": 8.73256028240723e-05, "loss": 1.0204, "step": 10220 }, { "epoch": 0.58668273230628, "grad_norm": 0.279296875, "learning_rate": 8.722627042794171e-05, "loss": 0.9869, "step": 10225 }, { "epoch": 0.5869696187279456, "grad_norm": 0.263671875, "learning_rate": 8.712695084296924e-05, "loss": 0.8415, "step": 10230 }, { "epoch": 0.5872565051496113, "grad_norm": 0.275390625, "learning_rate": 8.702764416876537e-05, "loss": 1.0009, "step": 10235 }, { "epoch": 0.5875433915712769, "grad_norm": 0.271484375, "learning_rate": 8.692835050492785e-05, "loss": 0.9342, "step": 10240 }, { "epoch": 0.5878302779929426, "grad_norm": 0.271484375, "learning_rate": 8.682906995104125e-05, "loss": 0.8879, "step": 10245 }, { "epoch": 0.5881171644146083, "grad_norm": 0.287109375, "learning_rate": 8.672980260667702e-05, "loss": 0.9447, "step": 10250 }, { "epoch": 0.5884040508362739, "grad_norm": 0.27734375, "learning_rate": 8.663054857139339e-05, "loss": 0.9813, "step": 10255 }, { "epoch": 0.5886909372579395, "grad_norm": 0.27734375, "learning_rate": 8.653130794473517e-05, "loss": 0.9157, "step": 10260 }, { "epoch": 0.5889778236796053, "grad_norm": 0.251953125, "learning_rate": 8.643208082623386e-05, "loss": 0.9054, "step": 10265 }, { "epoch": 0.5892647101012709, "grad_norm": 0.283203125, "learning_rate": 8.633286731540722e-05, "loss": 0.9475, "step": 10270 }, { "epoch": 0.5895515965229365, "grad_norm": 0.265625, "learning_rate": 8.623366751175958e-05, "loss": 0.9655, "step": 10275 }, { "epoch": 0.5898384829446023, "grad_norm": 0.2490234375, "learning_rate": 8.613448151478131e-05, "loss": 0.9882, "step": 10280 }, { "epoch": 0.5901253693662679, "grad_norm": 0.2578125, "learning_rate": 8.603530942394908e-05, "loss": 0.9706, "step": 10285 }, { "epoch": 0.5904122557879335, "grad_norm": 0.27734375, "learning_rate": 8.593615133872558e-05, "loss": 0.9384, "step": 10290 }, { "epoch": 0.5906991422095992, "grad_norm": 0.279296875, "learning_rate": 8.583700735855941e-05, "loss": 0.9356, "step": 10295 }, { "epoch": 0.5909860286312649, "grad_norm": 0.30078125, "learning_rate": 8.57378775828851e-05, "loss": 1.004, "step": 10300 }, { "epoch": 0.5912729150529306, "grad_norm": 0.255859375, "learning_rate": 8.563876211112282e-05, "loss": 0.9773, "step": 10305 }, { "epoch": 0.5915598014745962, "grad_norm": 0.267578125, "learning_rate": 8.553966104267852e-05, "loss": 0.931, "step": 10310 }, { "epoch": 0.5918466878962618, "grad_norm": 0.26953125, "learning_rate": 8.544057447694358e-05, "loss": 0.9397, "step": 10315 }, { "epoch": 0.5921335743179276, "grad_norm": 0.275390625, "learning_rate": 8.534150251329494e-05, "loss": 0.9633, "step": 10320 }, { "epoch": 0.5924204607395932, "grad_norm": 0.271484375, "learning_rate": 8.52424452510949e-05, "loss": 0.9323, "step": 10325 }, { "epoch": 0.5927073471612588, "grad_norm": 0.2578125, "learning_rate": 8.514340278969089e-05, "loss": 0.8746, "step": 10330 }, { "epoch": 0.5929942335829245, "grad_norm": 0.26953125, "learning_rate": 8.504437522841566e-05, "loss": 0.9724, "step": 10335 }, { "epoch": 0.5932811200045902, "grad_norm": 0.271484375, "learning_rate": 8.494536266658687e-05, "loss": 0.9765, "step": 10340 }, { "epoch": 0.5935680064262558, "grad_norm": 0.27734375, "learning_rate": 8.484636520350724e-05, "loss": 1.0927, "step": 10345 }, { "epoch": 0.5938548928479215, "grad_norm": 0.28515625, "learning_rate": 8.474738293846424e-05, "loss": 0.9808, "step": 10350 }, { "epoch": 0.5941417792695872, "grad_norm": 0.26171875, "learning_rate": 8.464841597073024e-05, "loss": 0.9677, "step": 10355 }, { "epoch": 0.5944286656912529, "grad_norm": 0.27734375, "learning_rate": 8.454946439956213e-05, "loss": 0.96, "step": 10360 }, { "epoch": 0.5947155521129185, "grad_norm": 0.27734375, "learning_rate": 8.445052832420146e-05, "loss": 0.9752, "step": 10365 }, { "epoch": 0.5950024385345841, "grad_norm": 0.26953125, "learning_rate": 8.435160784387423e-05, "loss": 0.9835, "step": 10370 }, { "epoch": 0.5952893249562499, "grad_norm": 0.279296875, "learning_rate": 8.425270305779069e-05, "loss": 0.9595, "step": 10375 }, { "epoch": 0.5955762113779155, "grad_norm": 0.28125, "learning_rate": 8.415381406514551e-05, "loss": 0.9088, "step": 10380 }, { "epoch": 0.5958630977995811, "grad_norm": 0.255859375, "learning_rate": 8.405494096511737e-05, "loss": 0.9592, "step": 10385 }, { "epoch": 0.5961499842212468, "grad_norm": 0.283203125, "learning_rate": 8.395608385686911e-05, "loss": 0.8849, "step": 10390 }, { "epoch": 0.5964368706429125, "grad_norm": 0.2578125, "learning_rate": 8.38572428395475e-05, "loss": 0.9273, "step": 10395 }, { "epoch": 0.5967237570645781, "grad_norm": 0.2734375, "learning_rate": 8.37584180122832e-05, "loss": 0.9745, "step": 10400 }, { "epoch": 0.5970106434862438, "grad_norm": 0.2734375, "learning_rate": 8.365960947419054e-05, "loss": 0.9733, "step": 10405 }, { "epoch": 0.5972975299079094, "grad_norm": 0.265625, "learning_rate": 8.356081732436759e-05, "loss": 0.961, "step": 10410 }, { "epoch": 0.5975844163295752, "grad_norm": 0.2412109375, "learning_rate": 8.346204166189607e-05, "loss": 0.8917, "step": 10415 }, { "epoch": 0.5978713027512408, "grad_norm": 0.263671875, "learning_rate": 8.336328258584093e-05, "loss": 0.9785, "step": 10420 }, { "epoch": 0.5981581891729064, "grad_norm": 0.259765625, "learning_rate": 8.326454019525072e-05, "loss": 0.9333, "step": 10425 }, { "epoch": 0.5984450755945722, "grad_norm": 0.271484375, "learning_rate": 8.316581458915711e-05, "loss": 0.9219, "step": 10430 }, { "epoch": 0.5987319620162378, "grad_norm": 0.267578125, "learning_rate": 8.3067105866575e-05, "loss": 0.9298, "step": 10435 }, { "epoch": 0.5990188484379034, "grad_norm": 0.27734375, "learning_rate": 8.296841412650233e-05, "loss": 1.0153, "step": 10440 }, { "epoch": 0.5993057348595691, "grad_norm": 0.27734375, "learning_rate": 8.286973946792e-05, "loss": 0.9561, "step": 10445 }, { "epoch": 0.5995926212812348, "grad_norm": 0.26171875, "learning_rate": 8.277108198979188e-05, "loss": 0.9661, "step": 10450 }, { "epoch": 0.5998795077029004, "grad_norm": 0.2734375, "learning_rate": 8.267244179106441e-05, "loss": 0.9446, "step": 10455 }, { "epoch": 0.6001663941245661, "grad_norm": 0.263671875, "learning_rate": 8.257381897066691e-05, "loss": 0.9485, "step": 10460 }, { "epoch": 0.6004532805462317, "grad_norm": 0.271484375, "learning_rate": 8.24752136275111e-05, "loss": 0.9191, "step": 10465 }, { "epoch": 0.6007401669678974, "grad_norm": 0.2734375, "learning_rate": 8.237662586049133e-05, "loss": 1.0105, "step": 10470 }, { "epoch": 0.6010270533895631, "grad_norm": 0.302734375, "learning_rate": 8.227805576848418e-05, "loss": 0.9487, "step": 10475 }, { "epoch": 0.6013139398112287, "grad_norm": 0.283203125, "learning_rate": 8.217950345034858e-05, "loss": 0.9419, "step": 10480 }, { "epoch": 0.6016008262328943, "grad_norm": 0.279296875, "learning_rate": 8.208096900492562e-05, "loss": 0.9304, "step": 10485 }, { "epoch": 0.6018877126545601, "grad_norm": 0.267578125, "learning_rate": 8.198245253103843e-05, "loss": 1.0017, "step": 10490 }, { "epoch": 0.6021745990762257, "grad_norm": 0.26953125, "learning_rate": 8.188395412749223e-05, "loss": 1.0351, "step": 10495 }, { "epoch": 0.6024614854978914, "grad_norm": 0.263671875, "learning_rate": 8.178547389307393e-05, "loss": 0.9325, "step": 10500 }, { "epoch": 0.6027483719195571, "grad_norm": 0.28515625, "learning_rate": 8.168701192655243e-05, "loss": 1.027, "step": 10505 }, { "epoch": 0.6030352583412227, "grad_norm": 0.296875, "learning_rate": 8.158856832667811e-05, "loss": 0.9181, "step": 10510 }, { "epoch": 0.6033221447628884, "grad_norm": 0.263671875, "learning_rate": 8.14901431921831e-05, "loss": 0.9326, "step": 10515 }, { "epoch": 0.603609031184554, "grad_norm": 0.263671875, "learning_rate": 8.139173662178086e-05, "loss": 0.9628, "step": 10520 }, { "epoch": 0.6038959176062197, "grad_norm": 0.2734375, "learning_rate": 8.129334871416632e-05, "loss": 0.9514, "step": 10525 }, { "epoch": 0.6041828040278854, "grad_norm": 0.2578125, "learning_rate": 8.119497956801571e-05, "loss": 0.9439, "step": 10530 }, { "epoch": 0.604469690449551, "grad_norm": 0.2734375, "learning_rate": 8.109662928198638e-05, "loss": 1.0132, "step": 10535 }, { "epoch": 0.6047565768712166, "grad_norm": 0.31640625, "learning_rate": 8.099829795471683e-05, "loss": 0.9754, "step": 10540 }, { "epoch": 0.6050434632928824, "grad_norm": 0.271484375, "learning_rate": 8.089998568482643e-05, "loss": 0.9723, "step": 10545 }, { "epoch": 0.605330349714548, "grad_norm": 0.26953125, "learning_rate": 8.080169257091562e-05, "loss": 0.9321, "step": 10550 }, { "epoch": 0.6056172361362137, "grad_norm": 0.279296875, "learning_rate": 8.070341871156541e-05, "loss": 1.0164, "step": 10555 }, { "epoch": 0.6059041225578793, "grad_norm": 0.265625, "learning_rate": 8.060516420533774e-05, "loss": 0.9447, "step": 10560 }, { "epoch": 0.606191008979545, "grad_norm": 0.267578125, "learning_rate": 8.050692915077489e-05, "loss": 0.8857, "step": 10565 }, { "epoch": 0.6064778954012107, "grad_norm": 0.271484375, "learning_rate": 8.040871364639983e-05, "loss": 0.916, "step": 10570 }, { "epoch": 0.6067647818228763, "grad_norm": 0.279296875, "learning_rate": 8.031051779071587e-05, "loss": 0.9563, "step": 10575 }, { "epoch": 0.6070516682445419, "grad_norm": 0.263671875, "learning_rate": 8.021234168220649e-05, "loss": 0.9525, "step": 10580 }, { "epoch": 0.6073385546662077, "grad_norm": 0.26953125, "learning_rate": 8.011418541933558e-05, "loss": 1.0055, "step": 10585 }, { "epoch": 0.6076254410878733, "grad_norm": 0.26953125, "learning_rate": 8.00160491005469e-05, "loss": 0.9487, "step": 10590 }, { "epoch": 0.6079123275095389, "grad_norm": 0.263671875, "learning_rate": 7.991793282426442e-05, "loss": 0.917, "step": 10595 }, { "epoch": 0.6081992139312047, "grad_norm": 0.275390625, "learning_rate": 7.981983668889182e-05, "loss": 0.9883, "step": 10600 }, { "epoch": 0.6084861003528703, "grad_norm": 0.265625, "learning_rate": 7.972176079281275e-05, "loss": 0.9511, "step": 10605 }, { "epoch": 0.608772986774536, "grad_norm": 0.275390625, "learning_rate": 7.962370523439044e-05, "loss": 0.9492, "step": 10610 }, { "epoch": 0.6090598731962016, "grad_norm": 0.26171875, "learning_rate": 7.952567011196774e-05, "loss": 0.9826, "step": 10615 }, { "epoch": 0.6093467596178673, "grad_norm": 0.287109375, "learning_rate": 7.942765552386709e-05, "loss": 0.9432, "step": 10620 }, { "epoch": 0.609633646039533, "grad_norm": 0.255859375, "learning_rate": 7.932966156839018e-05, "loss": 1.0169, "step": 10625 }, { "epoch": 0.6099205324611986, "grad_norm": 0.251953125, "learning_rate": 7.923168834381822e-05, "loss": 0.8777, "step": 10630 }, { "epoch": 0.6102074188828642, "grad_norm": 0.267578125, "learning_rate": 7.913373594841139e-05, "loss": 0.9344, "step": 10635 }, { "epoch": 0.61049430530453, "grad_norm": 0.25, "learning_rate": 7.903580448040917e-05, "loss": 0.9048, "step": 10640 }, { "epoch": 0.6107811917261956, "grad_norm": 0.263671875, "learning_rate": 7.893789403802992e-05, "loss": 0.9632, "step": 10645 }, { "epoch": 0.6110680781478612, "grad_norm": 0.259765625, "learning_rate": 7.884000471947104e-05, "loss": 0.9641, "step": 10650 }, { "epoch": 0.6113549645695269, "grad_norm": 0.279296875, "learning_rate": 7.874213662290862e-05, "loss": 0.9087, "step": 10655 }, { "epoch": 0.6116418509911926, "grad_norm": 0.275390625, "learning_rate": 7.864428984649757e-05, "loss": 0.9175, "step": 10660 }, { "epoch": 0.6119287374128582, "grad_norm": 0.263671875, "learning_rate": 7.854646448837134e-05, "loss": 0.9203, "step": 10665 }, { "epoch": 0.6122156238345239, "grad_norm": 0.26171875, "learning_rate": 7.844866064664189e-05, "loss": 0.9388, "step": 10670 }, { "epoch": 0.6125025102561896, "grad_norm": 0.2578125, "learning_rate": 7.835087841939973e-05, "loss": 0.9698, "step": 10675 }, { "epoch": 0.6127893966778553, "grad_norm": 0.26953125, "learning_rate": 7.82531179047135e-05, "loss": 0.9655, "step": 10680 }, { "epoch": 0.6130762830995209, "grad_norm": 0.251953125, "learning_rate": 7.815537920063019e-05, "loss": 1.0175, "step": 10685 }, { "epoch": 0.6133631695211865, "grad_norm": 0.25, "learning_rate": 7.805766240517498e-05, "loss": 0.9388, "step": 10690 }, { "epoch": 0.6136500559428523, "grad_norm": 0.263671875, "learning_rate": 7.795996761635087e-05, "loss": 0.9418, "step": 10695 }, { "epoch": 0.6139369423645179, "grad_norm": 0.267578125, "learning_rate": 7.786229493213901e-05, "loss": 0.9642, "step": 10700 }, { "epoch": 0.6142238287861835, "grad_norm": 0.255859375, "learning_rate": 7.776464445049817e-05, "loss": 0.9053, "step": 10705 }, { "epoch": 0.6145107152078492, "grad_norm": 0.271484375, "learning_rate": 7.766701626936505e-05, "loss": 0.9985, "step": 10710 }, { "epoch": 0.6147976016295149, "grad_norm": 0.263671875, "learning_rate": 7.75694104866538e-05, "loss": 0.9251, "step": 10715 }, { "epoch": 0.6150844880511805, "grad_norm": 0.275390625, "learning_rate": 7.74718272002563e-05, "loss": 0.967, "step": 10720 }, { "epoch": 0.6153713744728462, "grad_norm": 0.275390625, "learning_rate": 7.737426650804168e-05, "loss": 0.9435, "step": 10725 }, { "epoch": 0.6156582608945118, "grad_norm": 0.26953125, "learning_rate": 7.727672850785651e-05, "loss": 0.9382, "step": 10730 }, { "epoch": 0.6159451473161776, "grad_norm": 0.259765625, "learning_rate": 7.717921329752466e-05, "loss": 1.0313, "step": 10735 }, { "epoch": 0.6162320337378432, "grad_norm": 0.271484375, "learning_rate": 7.708172097484699e-05, "loss": 0.8887, "step": 10740 }, { "epoch": 0.6165189201595088, "grad_norm": 0.27734375, "learning_rate": 7.698425163760156e-05, "loss": 0.9504, "step": 10745 }, { "epoch": 0.6168058065811746, "grad_norm": 0.267578125, "learning_rate": 7.688680538354323e-05, "loss": 0.9591, "step": 10750 }, { "epoch": 0.6170926930028402, "grad_norm": 0.271484375, "learning_rate": 7.678938231040383e-05, "loss": 0.8888, "step": 10755 }, { "epoch": 0.6173795794245058, "grad_norm": 0.2490234375, "learning_rate": 7.669198251589188e-05, "loss": 0.9999, "step": 10760 }, { "epoch": 0.6176664658461715, "grad_norm": 0.25, "learning_rate": 7.659460609769252e-05, "loss": 1.0073, "step": 10765 }, { "epoch": 0.6179533522678372, "grad_norm": 0.259765625, "learning_rate": 7.649725315346761e-05, "loss": 0.9104, "step": 10770 }, { "epoch": 0.6182402386895028, "grad_norm": 0.263671875, "learning_rate": 7.639992378085521e-05, "loss": 0.9227, "step": 10775 }, { "epoch": 0.6185271251111685, "grad_norm": 0.267578125, "learning_rate": 7.630261807747e-05, "loss": 0.9339, "step": 10780 }, { "epoch": 0.6188140115328341, "grad_norm": 0.255859375, "learning_rate": 7.620533614090269e-05, "loss": 0.9802, "step": 10785 }, { "epoch": 0.6191008979544999, "grad_norm": 0.27734375, "learning_rate": 7.610807806872038e-05, "loss": 1.0128, "step": 10790 }, { "epoch": 0.6193877843761655, "grad_norm": 0.28125, "learning_rate": 7.601084395846603e-05, "loss": 0.9487, "step": 10795 }, { "epoch": 0.6196746707978311, "grad_norm": 0.26171875, "learning_rate": 7.591363390765868e-05, "loss": 0.948, "step": 10800 }, { "epoch": 0.6199615572194968, "grad_norm": 0.2578125, "learning_rate": 7.581644801379324e-05, "loss": 0.9213, "step": 10805 }, { "epoch": 0.6202484436411625, "grad_norm": 0.283203125, "learning_rate": 7.571928637434031e-05, "loss": 0.9534, "step": 10810 }, { "epoch": 0.6205353300628281, "grad_norm": 0.263671875, "learning_rate": 7.562214908674633e-05, "loss": 1.0242, "step": 10815 }, { "epoch": 0.6208222164844938, "grad_norm": 0.26171875, "learning_rate": 7.55250362484331e-05, "loss": 0.9748, "step": 10820 }, { "epoch": 0.6211091029061595, "grad_norm": 0.255859375, "learning_rate": 7.542794795679811e-05, "loss": 0.9349, "step": 10825 }, { "epoch": 0.6213959893278251, "grad_norm": 0.275390625, "learning_rate": 7.533088430921402e-05, "loss": 0.9361, "step": 10830 }, { "epoch": 0.6216828757494908, "grad_norm": 0.306640625, "learning_rate": 7.5233845403029e-05, "loss": 0.9245, "step": 10835 }, { "epoch": 0.6219697621711564, "grad_norm": 0.2490234375, "learning_rate": 7.51368313355662e-05, "loss": 0.9017, "step": 10840 }, { "epoch": 0.6222566485928221, "grad_norm": 0.26953125, "learning_rate": 7.5039842204124e-05, "loss": 0.9009, "step": 10845 }, { "epoch": 0.6225435350144878, "grad_norm": 0.25390625, "learning_rate": 7.49428781059757e-05, "loss": 0.91, "step": 10850 }, { "epoch": 0.6228304214361534, "grad_norm": 0.259765625, "learning_rate": 7.484593913836951e-05, "loss": 0.919, "step": 10855 }, { "epoch": 0.623117307857819, "grad_norm": 0.271484375, "learning_rate": 7.474902539852848e-05, "loss": 0.9177, "step": 10860 }, { "epoch": 0.6234041942794848, "grad_norm": 0.265625, "learning_rate": 7.465213698365026e-05, "loss": 0.9752, "step": 10865 }, { "epoch": 0.6236910807011504, "grad_norm": 0.267578125, "learning_rate": 7.455527399090721e-05, "loss": 0.9747, "step": 10870 }, { "epoch": 0.6239779671228161, "grad_norm": 0.267578125, "learning_rate": 7.445843651744609e-05, "loss": 0.9166, "step": 10875 }, { "epoch": 0.6242648535444817, "grad_norm": 0.267578125, "learning_rate": 7.436162466038818e-05, "loss": 0.9544, "step": 10880 }, { "epoch": 0.6245517399661474, "grad_norm": 0.26171875, "learning_rate": 7.426483851682898e-05, "loss": 0.9348, "step": 10885 }, { "epoch": 0.6248386263878131, "grad_norm": 0.259765625, "learning_rate": 7.416807818383817e-05, "loss": 0.9886, "step": 10890 }, { "epoch": 0.6251255128094787, "grad_norm": 0.255859375, "learning_rate": 7.407134375845972e-05, "loss": 0.896, "step": 10895 }, { "epoch": 0.6254123992311444, "grad_norm": 0.25390625, "learning_rate": 7.397463533771139e-05, "loss": 0.9562, "step": 10900 }, { "epoch": 0.6256992856528101, "grad_norm": 0.2734375, "learning_rate": 7.387795301858504e-05, "loss": 0.9069, "step": 10905 }, { "epoch": 0.6259861720744757, "grad_norm": 0.265625, "learning_rate": 7.378129689804623e-05, "loss": 0.964, "step": 10910 }, { "epoch": 0.6262730584961413, "grad_norm": 0.267578125, "learning_rate": 7.368466707303434e-05, "loss": 0.9993, "step": 10915 }, { "epoch": 0.6265599449178071, "grad_norm": 0.27734375, "learning_rate": 7.358806364046226e-05, "loss": 1.0122, "step": 10920 }, { "epoch": 0.6268468313394727, "grad_norm": 0.3125, "learning_rate": 7.349148669721658e-05, "loss": 0.9321, "step": 10925 }, { "epoch": 0.6271337177611384, "grad_norm": 0.263671875, "learning_rate": 7.339493634015711e-05, "loss": 0.9515, "step": 10930 }, { "epoch": 0.627420604182804, "grad_norm": 0.259765625, "learning_rate": 7.329841266611721e-05, "loss": 0.9323, "step": 10935 }, { "epoch": 0.6277074906044697, "grad_norm": 0.2890625, "learning_rate": 7.320191577190336e-05, "loss": 0.9436, "step": 10940 }, { "epoch": 0.6279943770261354, "grad_norm": 0.255859375, "learning_rate": 7.310544575429514e-05, "loss": 0.8917, "step": 10945 }, { "epoch": 0.628281263447801, "grad_norm": 0.275390625, "learning_rate": 7.300900271004534e-05, "loss": 0.9159, "step": 10950 }, { "epoch": 0.6285681498694666, "grad_norm": 0.28515625, "learning_rate": 7.291258673587947e-05, "loss": 0.9851, "step": 10955 }, { "epoch": 0.6288550362911324, "grad_norm": 0.28515625, "learning_rate": 7.281619792849612e-05, "loss": 0.9262, "step": 10960 }, { "epoch": 0.629141922712798, "grad_norm": 0.265625, "learning_rate": 7.271983638456644e-05, "loss": 1.037, "step": 10965 }, { "epoch": 0.6294288091344636, "grad_norm": 0.259765625, "learning_rate": 7.26235022007344e-05, "loss": 0.9866, "step": 10970 }, { "epoch": 0.6297156955561293, "grad_norm": 0.275390625, "learning_rate": 7.252719547361641e-05, "loss": 0.9467, "step": 10975 }, { "epoch": 0.630002581977795, "grad_norm": 0.2734375, "learning_rate": 7.243091629980141e-05, "loss": 0.9895, "step": 10980 }, { "epoch": 0.6302894683994607, "grad_norm": 0.2890625, "learning_rate": 7.233466477585068e-05, "loss": 0.8992, "step": 10985 }, { "epoch": 0.6305763548211263, "grad_norm": 0.275390625, "learning_rate": 7.223844099829773e-05, "loss": 0.907, "step": 10990 }, { "epoch": 0.630863241242792, "grad_norm": 0.2431640625, "learning_rate": 7.214224506364834e-05, "loss": 0.901, "step": 10995 }, { "epoch": 0.6311501276644577, "grad_norm": 0.294921875, "learning_rate": 7.204607706838026e-05, "loss": 0.9398, "step": 11000 }, { "epoch": 0.6314370140861233, "grad_norm": 0.28125, "learning_rate": 7.194993710894335e-05, "loss": 1.0324, "step": 11005 }, { "epoch": 0.6317239005077889, "grad_norm": 0.259765625, "learning_rate": 7.185382528175917e-05, "loss": 0.8882, "step": 11010 }, { "epoch": 0.6320107869294547, "grad_norm": 0.275390625, "learning_rate": 7.175774168322123e-05, "loss": 1.0597, "step": 11015 }, { "epoch": 0.6322976733511203, "grad_norm": 0.2578125, "learning_rate": 7.166168640969464e-05, "loss": 0.9184, "step": 11020 }, { "epoch": 0.6325845597727859, "grad_norm": 0.26953125, "learning_rate": 7.156565955751616e-05, "loss": 0.9656, "step": 11025 }, { "epoch": 0.6328714461944516, "grad_norm": 0.267578125, "learning_rate": 7.146966122299396e-05, "loss": 0.9828, "step": 11030 }, { "epoch": 0.6331583326161173, "grad_norm": 0.26953125, "learning_rate": 7.137369150240769e-05, "loss": 0.9816, "step": 11035 }, { "epoch": 0.633445219037783, "grad_norm": 0.29296875, "learning_rate": 7.127775049200828e-05, "loss": 0.9343, "step": 11040 }, { "epoch": 0.6337321054594486, "grad_norm": 0.263671875, "learning_rate": 7.118183828801781e-05, "loss": 0.9774, "step": 11045 }, { "epoch": 0.6340189918811142, "grad_norm": 0.26171875, "learning_rate": 7.108595498662956e-05, "loss": 0.9291, "step": 11050 }, { "epoch": 0.63430587830278, "grad_norm": 0.263671875, "learning_rate": 7.099010068400781e-05, "loss": 0.9167, "step": 11055 }, { "epoch": 0.6345927647244456, "grad_norm": 0.25390625, "learning_rate": 7.089427547628766e-05, "loss": 0.9648, "step": 11060 }, { "epoch": 0.6348796511461112, "grad_norm": 0.251953125, "learning_rate": 7.079847945957516e-05, "loss": 0.8882, "step": 11065 }, { "epoch": 0.635166537567777, "grad_norm": 0.2890625, "learning_rate": 7.070271272994698e-05, "loss": 0.9479, "step": 11070 }, { "epoch": 0.6354534239894426, "grad_norm": 0.2734375, "learning_rate": 7.060697538345048e-05, "loss": 0.9412, "step": 11075 }, { "epoch": 0.6357403104111082, "grad_norm": 0.263671875, "learning_rate": 7.051126751610346e-05, "loss": 0.9442, "step": 11080 }, { "epoch": 0.6360271968327739, "grad_norm": 0.251953125, "learning_rate": 7.041558922389434e-05, "loss": 0.9543, "step": 11085 }, { "epoch": 0.6363140832544396, "grad_norm": 0.26953125, "learning_rate": 7.031994060278162e-05, "loss": 0.9142, "step": 11090 }, { "epoch": 0.6366009696761052, "grad_norm": 0.271484375, "learning_rate": 7.02243217486943e-05, "loss": 0.9297, "step": 11095 }, { "epoch": 0.6368878560977709, "grad_norm": 0.255859375, "learning_rate": 7.012873275753137e-05, "loss": 0.8885, "step": 11100 }, { "epoch": 0.6371747425194365, "grad_norm": 0.28125, "learning_rate": 7.003317372516189e-05, "loss": 0.9424, "step": 11105 }, { "epoch": 0.6374616289411023, "grad_norm": 0.263671875, "learning_rate": 6.993764474742493e-05, "loss": 0.9036, "step": 11110 }, { "epoch": 0.6377485153627679, "grad_norm": 0.26171875, "learning_rate": 6.984214592012935e-05, "loss": 0.9112, "step": 11115 }, { "epoch": 0.6380354017844335, "grad_norm": 0.294921875, "learning_rate": 6.974667733905377e-05, "loss": 0.9943, "step": 11120 }, { "epoch": 0.6383222882060992, "grad_norm": 0.259765625, "learning_rate": 6.965123909994658e-05, "loss": 0.9293, "step": 11125 }, { "epoch": 0.6386091746277649, "grad_norm": 0.25, "learning_rate": 6.955583129852559e-05, "loss": 0.923, "step": 11130 }, { "epoch": 0.6388960610494305, "grad_norm": 0.2578125, "learning_rate": 6.946045403047821e-05, "loss": 0.9667, "step": 11135 }, { "epoch": 0.6391829474710962, "grad_norm": 0.27734375, "learning_rate": 6.936510739146113e-05, "loss": 0.9108, "step": 11140 }, { "epoch": 0.6394698338927619, "grad_norm": 0.2734375, "learning_rate": 6.926979147710044e-05, "loss": 0.9802, "step": 11145 }, { "epoch": 0.6397567203144275, "grad_norm": 0.287109375, "learning_rate": 6.917450638299123e-05, "loss": 0.9533, "step": 11150 }, { "epoch": 0.6400436067360932, "grad_norm": 0.263671875, "learning_rate": 6.90792522046979e-05, "loss": 0.9407, "step": 11155 }, { "epoch": 0.6403304931577588, "grad_norm": 0.263671875, "learning_rate": 6.898402903775369e-05, "loss": 0.9622, "step": 11160 }, { "epoch": 0.6406173795794246, "grad_norm": 0.275390625, "learning_rate": 6.888883697766076e-05, "loss": 0.911, "step": 11165 }, { "epoch": 0.6409042660010902, "grad_norm": 0.26953125, "learning_rate": 6.87936761198901e-05, "loss": 1.0374, "step": 11170 }, { "epoch": 0.6411911524227558, "grad_norm": 0.275390625, "learning_rate": 6.869854655988139e-05, "loss": 0.9609, "step": 11175 }, { "epoch": 0.6414780388444214, "grad_norm": 0.28125, "learning_rate": 6.860344839304299e-05, "loss": 0.9534, "step": 11180 }, { "epoch": 0.6417649252660872, "grad_norm": 0.275390625, "learning_rate": 6.850838171475165e-05, "loss": 0.9629, "step": 11185 }, { "epoch": 0.6420518116877528, "grad_norm": 0.255859375, "learning_rate": 6.841334662035266e-05, "loss": 0.9682, "step": 11190 }, { "epoch": 0.6423386981094185, "grad_norm": 0.26171875, "learning_rate": 6.83183432051595e-05, "loss": 0.9755, "step": 11195 }, { "epoch": 0.6426255845310841, "grad_norm": 0.259765625, "learning_rate": 6.822337156445406e-05, "loss": 0.944, "step": 11200 }, { "epoch": 0.6429124709527498, "grad_norm": 0.2578125, "learning_rate": 6.812843179348618e-05, "loss": 0.9972, "step": 11205 }, { "epoch": 0.6431993573744155, "grad_norm": 0.275390625, "learning_rate": 6.803352398747384e-05, "loss": 0.9751, "step": 11210 }, { "epoch": 0.6434862437960811, "grad_norm": 0.251953125, "learning_rate": 6.793864824160295e-05, "loss": 0.996, "step": 11215 }, { "epoch": 0.6437731302177468, "grad_norm": 0.298828125, "learning_rate": 6.78438046510272e-05, "loss": 0.9886, "step": 11220 }, { "epoch": 0.6440600166394125, "grad_norm": 0.267578125, "learning_rate": 6.774899331086814e-05, "loss": 1.0218, "step": 11225 }, { "epoch": 0.6443469030610781, "grad_norm": 0.2734375, "learning_rate": 6.765421431621491e-05, "loss": 0.9533, "step": 11230 }, { "epoch": 0.6446337894827437, "grad_norm": 0.2578125, "learning_rate": 6.755946776212421e-05, "loss": 0.921, "step": 11235 }, { "epoch": 0.6449206759044095, "grad_norm": 0.25, "learning_rate": 6.746475374362018e-05, "loss": 0.8925, "step": 11240 }, { "epoch": 0.6452075623260751, "grad_norm": 0.26953125, "learning_rate": 6.737007235569442e-05, "loss": 0.9243, "step": 11245 }, { "epoch": 0.6454944487477408, "grad_norm": 0.28125, "learning_rate": 6.727542369330571e-05, "loss": 0.9651, "step": 11250 }, { "epoch": 0.6457813351694064, "grad_norm": 0.2451171875, "learning_rate": 6.718080785138002e-05, "loss": 0.939, "step": 11255 }, { "epoch": 0.6460682215910721, "grad_norm": 0.2734375, "learning_rate": 6.708622492481051e-05, "loss": 0.9946, "step": 11260 }, { "epoch": 0.6463551080127378, "grad_norm": 0.265625, "learning_rate": 6.699167500845714e-05, "loss": 0.991, "step": 11265 }, { "epoch": 0.6466419944344034, "grad_norm": 0.2578125, "learning_rate": 6.689715819714697e-05, "loss": 0.9257, "step": 11270 }, { "epoch": 0.646928880856069, "grad_norm": 0.279296875, "learning_rate": 6.680267458567366e-05, "loss": 0.9554, "step": 11275 }, { "epoch": 0.6472157672777348, "grad_norm": 0.251953125, "learning_rate": 6.670822426879776e-05, "loss": 0.921, "step": 11280 }, { "epoch": 0.6475026536994004, "grad_norm": 0.267578125, "learning_rate": 6.661380734124625e-05, "loss": 0.9452, "step": 11285 }, { "epoch": 0.647789540121066, "grad_norm": 0.26171875, "learning_rate": 6.65194238977128e-05, "loss": 0.9256, "step": 11290 }, { "epoch": 0.6480764265427318, "grad_norm": 0.2578125, "learning_rate": 6.642507403285732e-05, "loss": 0.9379, "step": 11295 }, { "epoch": 0.6483633129643974, "grad_norm": 0.2734375, "learning_rate": 6.633075784130619e-05, "loss": 0.9369, "step": 11300 }, { "epoch": 0.648650199386063, "grad_norm": 0.32421875, "learning_rate": 6.623647541765195e-05, "loss": 1.0451, "step": 11305 }, { "epoch": 0.6489370858077287, "grad_norm": 0.2734375, "learning_rate": 6.614222685645324e-05, "loss": 1.0281, "step": 11310 }, { "epoch": 0.6492239722293944, "grad_norm": 0.26171875, "learning_rate": 6.604801225223486e-05, "loss": 0.8973, "step": 11315 }, { "epoch": 0.6495108586510601, "grad_norm": 0.2578125, "learning_rate": 6.595383169948738e-05, "loss": 0.8868, "step": 11320 }, { "epoch": 0.6497977450727257, "grad_norm": 0.259765625, "learning_rate": 6.58596852926674e-05, "loss": 0.9136, "step": 11325 }, { "epoch": 0.6500846314943913, "grad_norm": 0.263671875, "learning_rate": 6.576557312619711e-05, "loss": 0.9365, "step": 11330 }, { "epoch": 0.6503715179160571, "grad_norm": 0.2734375, "learning_rate": 6.567149529446447e-05, "loss": 0.9976, "step": 11335 }, { "epoch": 0.6506584043377227, "grad_norm": 0.271484375, "learning_rate": 6.5577451891823e-05, "loss": 0.9617, "step": 11340 }, { "epoch": 0.6509452907593883, "grad_norm": 0.271484375, "learning_rate": 6.548344301259161e-05, "loss": 0.9677, "step": 11345 }, { "epoch": 0.651232177181054, "grad_norm": 0.265625, "learning_rate": 6.53894687510547e-05, "loss": 0.9337, "step": 11350 }, { "epoch": 0.6515190636027197, "grad_norm": 0.2578125, "learning_rate": 6.52955292014618e-05, "loss": 0.9319, "step": 11355 }, { "epoch": 0.6518059500243853, "grad_norm": 0.2734375, "learning_rate": 6.52016244580278e-05, "loss": 0.9615, "step": 11360 }, { "epoch": 0.652092836446051, "grad_norm": 0.271484375, "learning_rate": 6.51077546149325e-05, "loss": 0.9744, "step": 11365 }, { "epoch": 0.6523797228677166, "grad_norm": 0.2421875, "learning_rate": 6.50139197663209e-05, "loss": 0.9281, "step": 11370 }, { "epoch": 0.6526666092893824, "grad_norm": 0.267578125, "learning_rate": 6.492012000630269e-05, "loss": 0.9549, "step": 11375 }, { "epoch": 0.652953495711048, "grad_norm": 0.2734375, "learning_rate": 6.482635542895255e-05, "loss": 0.9484, "step": 11380 }, { "epoch": 0.6532403821327136, "grad_norm": 0.275390625, "learning_rate": 6.473262612830977e-05, "loss": 1.0413, "step": 11385 }, { "epoch": 0.6535272685543794, "grad_norm": 0.279296875, "learning_rate": 6.46389321983783e-05, "loss": 0.9629, "step": 11390 }, { "epoch": 0.653814154976045, "grad_norm": 0.26171875, "learning_rate": 6.45452737331266e-05, "loss": 0.9671, "step": 11395 }, { "epoch": 0.6541010413977106, "grad_norm": 0.26171875, "learning_rate": 6.445165082648755e-05, "loss": 0.9118, "step": 11400 }, { "epoch": 0.6543879278193763, "grad_norm": 0.26171875, "learning_rate": 6.43580635723584e-05, "loss": 0.9599, "step": 11405 }, { "epoch": 0.654674814241042, "grad_norm": 0.283203125, "learning_rate": 6.426451206460061e-05, "loss": 0.9869, "step": 11410 }, { "epoch": 0.6549617006627076, "grad_norm": 0.2578125, "learning_rate": 6.417099639703979e-05, "loss": 0.9547, "step": 11415 }, { "epoch": 0.6552485870843733, "grad_norm": 0.263671875, "learning_rate": 6.407751666346569e-05, "loss": 0.9587, "step": 11420 }, { "epoch": 0.6555354735060389, "grad_norm": 0.259765625, "learning_rate": 6.398407295763187e-05, "loss": 0.9317, "step": 11425 }, { "epoch": 0.6558223599277047, "grad_norm": 0.275390625, "learning_rate": 6.38906653732559e-05, "loss": 0.9809, "step": 11430 }, { "epoch": 0.6561092463493703, "grad_norm": 0.255859375, "learning_rate": 6.3797294004019e-05, "loss": 0.9399, "step": 11435 }, { "epoch": 0.6563961327710359, "grad_norm": 0.296875, "learning_rate": 6.37039589435662e-05, "loss": 0.9468, "step": 11440 }, { "epoch": 0.6566830191927016, "grad_norm": 0.26171875, "learning_rate": 6.361066028550593e-05, "loss": 0.8995, "step": 11445 }, { "epoch": 0.6569699056143673, "grad_norm": 0.267578125, "learning_rate": 6.351739812341036e-05, "loss": 0.9531, "step": 11450 }, { "epoch": 0.6572567920360329, "grad_norm": 0.265625, "learning_rate": 6.342417255081479e-05, "loss": 0.9077, "step": 11455 }, { "epoch": 0.6575436784576986, "grad_norm": 0.27734375, "learning_rate": 6.333098366121804e-05, "loss": 0.9416, "step": 11460 }, { "epoch": 0.6578305648793643, "grad_norm": 0.2470703125, "learning_rate": 6.323783154808205e-05, "loss": 0.9327, "step": 11465 }, { "epoch": 0.6581174513010299, "grad_norm": 0.267578125, "learning_rate": 6.314471630483183e-05, "loss": 1.0023, "step": 11470 }, { "epoch": 0.6584043377226956, "grad_norm": 0.275390625, "learning_rate": 6.305163802485554e-05, "loss": 0.9713, "step": 11475 }, { "epoch": 0.6586912241443612, "grad_norm": 0.25390625, "learning_rate": 6.29585968015041e-05, "loss": 0.9613, "step": 11480 }, { "epoch": 0.658978110566027, "grad_norm": 0.279296875, "learning_rate": 6.286559272809142e-05, "loss": 1.0388, "step": 11485 }, { "epoch": 0.6592649969876926, "grad_norm": 0.25390625, "learning_rate": 6.277262589789406e-05, "loss": 0.9602, "step": 11490 }, { "epoch": 0.6595518834093582, "grad_norm": 0.259765625, "learning_rate": 6.267969640415124e-05, "loss": 0.9162, "step": 11495 }, { "epoch": 0.6598387698310239, "grad_norm": 0.271484375, "learning_rate": 6.258680434006478e-05, "loss": 1.0212, "step": 11500 }, { "epoch": 0.6601256562526896, "grad_norm": 0.25390625, "learning_rate": 6.24939497987989e-05, "loss": 1.0134, "step": 11505 }, { "epoch": 0.6604125426743552, "grad_norm": 0.26953125, "learning_rate": 6.240113287348026e-05, "loss": 0.9589, "step": 11510 }, { "epoch": 0.6606994290960209, "grad_norm": 0.263671875, "learning_rate": 6.230835365719767e-05, "loss": 0.9189, "step": 11515 }, { "epoch": 0.6609863155176865, "grad_norm": 0.263671875, "learning_rate": 6.22156122430023e-05, "loss": 0.9563, "step": 11520 }, { "epoch": 0.6612732019393522, "grad_norm": 0.265625, "learning_rate": 6.212290872390722e-05, "loss": 0.9323, "step": 11525 }, { "epoch": 0.6615600883610179, "grad_norm": 0.255859375, "learning_rate": 6.203024319288762e-05, "loss": 0.8367, "step": 11530 }, { "epoch": 0.6618469747826835, "grad_norm": 0.2578125, "learning_rate": 6.193761574288057e-05, "loss": 0.9449, "step": 11535 }, { "epoch": 0.6621338612043492, "grad_norm": 0.275390625, "learning_rate": 6.184502646678486e-05, "loss": 0.9963, "step": 11540 }, { "epoch": 0.6624207476260149, "grad_norm": 0.28515625, "learning_rate": 6.175247545746116e-05, "loss": 1.013, "step": 11545 }, { "epoch": 0.6627076340476805, "grad_norm": 0.25390625, "learning_rate": 6.165996280773157e-05, "loss": 0.9025, "step": 11550 }, { "epoch": 0.6629945204693461, "grad_norm": 0.330078125, "learning_rate": 6.156748861037991e-05, "loss": 0.9483, "step": 11555 }, { "epoch": 0.6632814068910119, "grad_norm": 0.259765625, "learning_rate": 6.147505295815124e-05, "loss": 0.9291, "step": 11560 }, { "epoch": 0.6635682933126775, "grad_norm": 0.2734375, "learning_rate": 6.138265594375212e-05, "loss": 1.0162, "step": 11565 }, { "epoch": 0.6638551797343432, "grad_norm": 0.263671875, "learning_rate": 6.129029765985028e-05, "loss": 0.928, "step": 11570 }, { "epoch": 0.6641420661560088, "grad_norm": 0.2578125, "learning_rate": 6.119797819907463e-05, "loss": 0.9326, "step": 11575 }, { "epoch": 0.6644289525776745, "grad_norm": 0.263671875, "learning_rate": 6.110569765401513e-05, "loss": 0.9804, "step": 11580 }, { "epoch": 0.6647158389993402, "grad_norm": 0.271484375, "learning_rate": 6.1013456117222686e-05, "loss": 0.9251, "step": 11585 }, { "epoch": 0.6650027254210058, "grad_norm": 0.28515625, "learning_rate": 6.092125368120921e-05, "loss": 0.8896, "step": 11590 }, { "epoch": 0.6652896118426714, "grad_norm": 0.287109375, "learning_rate": 6.082909043844719e-05, "loss": 0.9167, "step": 11595 }, { "epoch": 0.6655764982643372, "grad_norm": 0.2578125, "learning_rate": 6.073696648137001e-05, "loss": 0.9636, "step": 11600 }, { "epoch": 0.6658633846860028, "grad_norm": 0.2734375, "learning_rate": 6.0644881902371474e-05, "loss": 0.9364, "step": 11605 }, { "epoch": 0.6661502711076684, "grad_norm": 0.265625, "learning_rate": 6.055283679380605e-05, "loss": 0.9493, "step": 11610 }, { "epoch": 0.6664371575293342, "grad_norm": 0.26953125, "learning_rate": 6.046083124798851e-05, "loss": 1.0074, "step": 11615 }, { "epoch": 0.6667240439509998, "grad_norm": 0.283203125, "learning_rate": 6.036886535719399e-05, "loss": 0.9114, "step": 11620 }, { "epoch": 0.6670109303726655, "grad_norm": 0.275390625, "learning_rate": 6.027693921365789e-05, "loss": 0.9649, "step": 11625 }, { "epoch": 0.6672978167943311, "grad_norm": 0.267578125, "learning_rate": 6.018505290957565e-05, "loss": 0.9611, "step": 11630 }, { "epoch": 0.6675847032159968, "grad_norm": 0.25390625, "learning_rate": 6.0093206537102866e-05, "loss": 0.9885, "step": 11635 }, { "epoch": 0.6678715896376625, "grad_norm": 0.25, "learning_rate": 6.000140018835497e-05, "loss": 0.997, "step": 11640 }, { "epoch": 0.6681584760593281, "grad_norm": 0.279296875, "learning_rate": 5.990963395540739e-05, "loss": 0.9586, "step": 11645 }, { "epoch": 0.6684453624809937, "grad_norm": 0.26171875, "learning_rate": 5.9817907930295155e-05, "loss": 0.9391, "step": 11650 }, { "epoch": 0.6687322489026595, "grad_norm": 0.279296875, "learning_rate": 5.972622220501315e-05, "loss": 0.9215, "step": 11655 }, { "epoch": 0.6690191353243251, "grad_norm": 0.2734375, "learning_rate": 5.9634576871515656e-05, "loss": 0.8938, "step": 11660 }, { "epoch": 0.6693060217459907, "grad_norm": 0.271484375, "learning_rate": 5.9542972021716616e-05, "loss": 0.9115, "step": 11665 }, { "epoch": 0.6695929081676564, "grad_norm": 0.28125, "learning_rate": 5.945140774748929e-05, "loss": 0.9647, "step": 11670 }, { "epoch": 0.6698797945893221, "grad_norm": 0.28515625, "learning_rate": 5.935988414066617e-05, "loss": 0.9815, "step": 11675 }, { "epoch": 0.6701666810109878, "grad_norm": 0.255859375, "learning_rate": 5.9268401293039125e-05, "loss": 1.0103, "step": 11680 }, { "epoch": 0.6704535674326534, "grad_norm": 0.26171875, "learning_rate": 5.917695929635898e-05, "loss": 0.8659, "step": 11685 }, { "epoch": 0.6707404538543191, "grad_norm": 0.275390625, "learning_rate": 5.908555824233575e-05, "loss": 0.9409, "step": 11690 }, { "epoch": 0.6710273402759848, "grad_norm": 0.2734375, "learning_rate": 5.899419822263822e-05, "loss": 0.9304, "step": 11695 }, { "epoch": 0.6713142266976504, "grad_norm": 0.27734375, "learning_rate": 5.8902879328894156e-05, "loss": 0.897, "step": 11700 }, { "epoch": 0.671601113119316, "grad_norm": 0.2890625, "learning_rate": 5.881160165269004e-05, "loss": 0.979, "step": 11705 }, { "epoch": 0.6718879995409818, "grad_norm": 0.287109375, "learning_rate": 5.872036528557096e-05, "loss": 0.9717, "step": 11710 }, { "epoch": 0.6721748859626474, "grad_norm": 0.30078125, "learning_rate": 5.862917031904066e-05, "loss": 0.9735, "step": 11715 }, { "epoch": 0.672461772384313, "grad_norm": 0.271484375, "learning_rate": 5.853801684456126e-05, "loss": 0.9391, "step": 11720 }, { "epoch": 0.6727486588059787, "grad_norm": 0.27734375, "learning_rate": 5.844690495355338e-05, "loss": 0.9755, "step": 11725 }, { "epoch": 0.6730355452276444, "grad_norm": 0.267578125, "learning_rate": 5.8355834737395856e-05, "loss": 0.8935, "step": 11730 }, { "epoch": 0.67332243164931, "grad_norm": 0.251953125, "learning_rate": 5.8264806287425724e-05, "loss": 0.9377, "step": 11735 }, { "epoch": 0.6736093180709757, "grad_norm": 0.267578125, "learning_rate": 5.817381969493823e-05, "loss": 0.959, "step": 11740 }, { "epoch": 0.6738962044926413, "grad_norm": 0.26953125, "learning_rate": 5.808287505118647e-05, "loss": 1.0001, "step": 11745 }, { "epoch": 0.6741830909143071, "grad_norm": 0.279296875, "learning_rate": 5.799197244738166e-05, "loss": 0.9712, "step": 11750 }, { "epoch": 0.6744699773359727, "grad_norm": 0.265625, "learning_rate": 5.790111197469269e-05, "loss": 0.9161, "step": 11755 }, { "epoch": 0.6747568637576383, "grad_norm": 0.279296875, "learning_rate": 5.781029372424633e-05, "loss": 1.0872, "step": 11760 }, { "epoch": 0.675043750179304, "grad_norm": 0.27734375, "learning_rate": 5.7719517787126856e-05, "loss": 0.9235, "step": 11765 }, { "epoch": 0.6753306366009697, "grad_norm": 0.265625, "learning_rate": 5.762878425437627e-05, "loss": 0.8471, "step": 11770 }, { "epoch": 0.6756175230226353, "grad_norm": 0.25, "learning_rate": 5.753809321699388e-05, "loss": 0.954, "step": 11775 }, { "epoch": 0.675904409444301, "grad_norm": 0.265625, "learning_rate": 5.744744476593652e-05, "loss": 0.9438, "step": 11780 }, { "epoch": 0.6761912958659667, "grad_norm": 0.2578125, "learning_rate": 5.7356838992118277e-05, "loss": 1.0079, "step": 11785 }, { "epoch": 0.6764781822876323, "grad_norm": 0.283203125, "learning_rate": 5.7266275986410324e-05, "loss": 1.0261, "step": 11790 }, { "epoch": 0.676765068709298, "grad_norm": 0.263671875, "learning_rate": 5.717575583964111e-05, "loss": 0.9681, "step": 11795 }, { "epoch": 0.6770519551309636, "grad_norm": 0.28125, "learning_rate": 5.708527864259594e-05, "loss": 0.9181, "step": 11800 }, { "epoch": 0.6773388415526294, "grad_norm": 0.291015625, "learning_rate": 5.6994844486017204e-05, "loss": 0.8705, "step": 11805 }, { "epoch": 0.677625727974295, "grad_norm": 0.265625, "learning_rate": 5.6904453460603955e-05, "loss": 1.0169, "step": 11810 }, { "epoch": 0.6779126143959606, "grad_norm": 0.2578125, "learning_rate": 5.681410565701215e-05, "loss": 0.9531, "step": 11815 }, { "epoch": 0.6781995008176263, "grad_norm": 0.27734375, "learning_rate": 5.672380116585425e-05, "loss": 0.924, "step": 11820 }, { "epoch": 0.678486387239292, "grad_norm": 0.271484375, "learning_rate": 5.663354007769943e-05, "loss": 0.9283, "step": 11825 }, { "epoch": 0.6787732736609576, "grad_norm": 0.251953125, "learning_rate": 5.654332248307319e-05, "loss": 1.0185, "step": 11830 }, { "epoch": 0.6790601600826233, "grad_norm": 0.26171875, "learning_rate": 5.6453148472457476e-05, "loss": 0.9277, "step": 11835 }, { "epoch": 0.6793470465042889, "grad_norm": 0.263671875, "learning_rate": 5.636301813629057e-05, "loss": 0.933, "step": 11840 }, { "epoch": 0.6796339329259546, "grad_norm": 0.271484375, "learning_rate": 5.62729315649668e-05, "loss": 0.9314, "step": 11845 }, { "epoch": 0.6799208193476203, "grad_norm": 0.2734375, "learning_rate": 5.618288884883684e-05, "loss": 0.8851, "step": 11850 }, { "epoch": 0.6802077057692859, "grad_norm": 0.255859375, "learning_rate": 5.6092890078207107e-05, "loss": 0.9085, "step": 11855 }, { "epoch": 0.6804945921909517, "grad_norm": 0.2734375, "learning_rate": 5.600293534334014e-05, "loss": 0.8873, "step": 11860 }, { "epoch": 0.6807814786126173, "grad_norm": 0.259765625, "learning_rate": 5.591302473445429e-05, "loss": 0.9314, "step": 11865 }, { "epoch": 0.6810683650342829, "grad_norm": 0.26171875, "learning_rate": 5.582315834172353e-05, "loss": 0.963, "step": 11870 }, { "epoch": 0.6813552514559486, "grad_norm": 0.271484375, "learning_rate": 5.573333625527767e-05, "loss": 0.9604, "step": 11875 }, { "epoch": 0.6816421378776143, "grad_norm": 0.275390625, "learning_rate": 5.564355856520189e-05, "loss": 0.9937, "step": 11880 }, { "epoch": 0.6819290242992799, "grad_norm": 0.26953125, "learning_rate": 5.555382536153702e-05, "loss": 0.9049, "step": 11885 }, { "epoch": 0.6822159107209456, "grad_norm": 0.248046875, "learning_rate": 5.5464136734279094e-05, "loss": 0.9596, "step": 11890 }, { "epoch": 0.6825027971426112, "grad_norm": 0.265625, "learning_rate": 5.537449277337965e-05, "loss": 0.9714, "step": 11895 }, { "epoch": 0.6827896835642769, "grad_norm": 0.25, "learning_rate": 5.528489356874522e-05, "loss": 0.9063, "step": 11900 }, { "epoch": 0.6830765699859426, "grad_norm": 0.259765625, "learning_rate": 5.5195339210237626e-05, "loss": 0.9748, "step": 11905 }, { "epoch": 0.6833634564076082, "grad_norm": 0.263671875, "learning_rate": 5.510582978767356e-05, "loss": 0.9948, "step": 11910 }, { "epoch": 0.6836503428292738, "grad_norm": 0.29296875, "learning_rate": 5.501636539082478e-05, "loss": 0.9725, "step": 11915 }, { "epoch": 0.6839372292509396, "grad_norm": 0.2412109375, "learning_rate": 5.4926946109417775e-05, "loss": 0.9469, "step": 11920 }, { "epoch": 0.6842241156726052, "grad_norm": 0.2578125, "learning_rate": 5.483757203313383e-05, "loss": 1.0519, "step": 11925 }, { "epoch": 0.6845110020942708, "grad_norm": 0.275390625, "learning_rate": 5.4748243251608965e-05, "loss": 0.9805, "step": 11930 }, { "epoch": 0.6847978885159366, "grad_norm": 0.2578125, "learning_rate": 5.465895985443361e-05, "loss": 0.9927, "step": 11935 }, { "epoch": 0.6850847749376022, "grad_norm": 0.2470703125, "learning_rate": 5.4569721931152864e-05, "loss": 0.881, "step": 11940 }, { "epoch": 0.6853716613592679, "grad_norm": 0.255859375, "learning_rate": 5.448052957126606e-05, "loss": 1.0047, "step": 11945 }, { "epoch": 0.6856585477809335, "grad_norm": 0.287109375, "learning_rate": 5.4391382864226916e-05, "loss": 0.9986, "step": 11950 }, { "epoch": 0.6859454342025992, "grad_norm": 0.263671875, "learning_rate": 5.4302281899443394e-05, "loss": 0.9537, "step": 11955 }, { "epoch": 0.6862323206242649, "grad_norm": 0.28515625, "learning_rate": 5.421322676627747e-05, "loss": 0.9491, "step": 11960 }, { "epoch": 0.6865192070459305, "grad_norm": 0.28125, "learning_rate": 5.412421755404529e-05, "loss": 1.0314, "step": 11965 }, { "epoch": 0.6868060934675961, "grad_norm": 0.255859375, "learning_rate": 5.40352543520168e-05, "loss": 0.9593, "step": 11970 }, { "epoch": 0.6870929798892619, "grad_norm": 0.25, "learning_rate": 5.3946337249415936e-05, "loss": 0.9981, "step": 11975 }, { "epoch": 0.6873798663109275, "grad_norm": 0.28515625, "learning_rate": 5.385746633542027e-05, "loss": 0.9706, "step": 11980 }, { "epoch": 0.6876667527325931, "grad_norm": 0.287109375, "learning_rate": 5.376864169916116e-05, "loss": 0.9865, "step": 11985 }, { "epoch": 0.6879536391542588, "grad_norm": 0.267578125, "learning_rate": 5.367986342972355e-05, "loss": 1.002, "step": 11990 }, { "epoch": 0.6882405255759245, "grad_norm": 0.263671875, "learning_rate": 5.359113161614576e-05, "loss": 0.9297, "step": 11995 }, { "epoch": 0.6885274119975902, "grad_norm": 0.287109375, "learning_rate": 5.3502446347419674e-05, "loss": 0.932, "step": 12000 }, { "epoch": 0.6888142984192558, "grad_norm": 0.267578125, "learning_rate": 5.341380771249037e-05, "loss": 0.9137, "step": 12005 }, { "epoch": 0.6891011848409215, "grad_norm": 0.271484375, "learning_rate": 5.332521580025622e-05, "loss": 1.0213, "step": 12010 }, { "epoch": 0.6893880712625872, "grad_norm": 0.2734375, "learning_rate": 5.323667069956868e-05, "loss": 0.9587, "step": 12015 }, { "epoch": 0.6896749576842528, "grad_norm": 0.267578125, "learning_rate": 5.314817249923236e-05, "loss": 0.9575, "step": 12020 }, { "epoch": 0.6899618441059184, "grad_norm": 0.2578125, "learning_rate": 5.3059721288004714e-05, "loss": 0.9456, "step": 12025 }, { "epoch": 0.6902487305275842, "grad_norm": 0.275390625, "learning_rate": 5.297131715459614e-05, "loss": 0.9663, "step": 12030 }, { "epoch": 0.6905356169492498, "grad_norm": 0.23828125, "learning_rate": 5.288296018766987e-05, "loss": 1.0198, "step": 12035 }, { "epoch": 0.6908225033709154, "grad_norm": 0.2890625, "learning_rate": 5.2794650475841664e-05, "loss": 0.9994, "step": 12040 }, { "epoch": 0.6911093897925811, "grad_norm": 0.24609375, "learning_rate": 5.2706388107680095e-05, "loss": 0.8779, "step": 12045 }, { "epoch": 0.6913962762142468, "grad_norm": 0.265625, "learning_rate": 5.2618173171706064e-05, "loss": 0.957, "step": 12050 }, { "epoch": 0.6916831626359125, "grad_norm": 0.283203125, "learning_rate": 5.253000575639305e-05, "loss": 1.0285, "step": 12055 }, { "epoch": 0.6919700490575781, "grad_norm": 0.267578125, "learning_rate": 5.2441885950166746e-05, "loss": 0.9555, "step": 12060 }, { "epoch": 0.6922569354792437, "grad_norm": 0.25390625, "learning_rate": 5.235381384140519e-05, "loss": 0.9178, "step": 12065 }, { "epoch": 0.6925438219009095, "grad_norm": 0.28125, "learning_rate": 5.226578951843859e-05, "loss": 0.9702, "step": 12070 }, { "epoch": 0.6928307083225751, "grad_norm": 0.26953125, "learning_rate": 5.217781306954912e-05, "loss": 0.9106, "step": 12075 }, { "epoch": 0.6931175947442407, "grad_norm": 0.275390625, "learning_rate": 5.208988458297109e-05, "loss": 0.9334, "step": 12080 }, { "epoch": 0.6934044811659065, "grad_norm": 0.26171875, "learning_rate": 5.2002004146890535e-05, "loss": 0.9384, "step": 12085 }, { "epoch": 0.6936913675875721, "grad_norm": 0.267578125, "learning_rate": 5.191417184944549e-05, "loss": 0.9208, "step": 12090 }, { "epoch": 0.6939782540092377, "grad_norm": 0.2578125, "learning_rate": 5.182638777872555e-05, "loss": 0.8811, "step": 12095 }, { "epoch": 0.6942651404309034, "grad_norm": 0.296875, "learning_rate": 5.1738652022771974e-05, "loss": 0.9103, "step": 12100 }, { "epoch": 0.6945520268525691, "grad_norm": 0.29296875, "learning_rate": 5.165096466957769e-05, "loss": 0.9262, "step": 12105 }, { "epoch": 0.6948389132742347, "grad_norm": 0.27734375, "learning_rate": 5.1563325807086856e-05, "loss": 0.8738, "step": 12110 }, { "epoch": 0.6951257996959004, "grad_norm": 0.275390625, "learning_rate": 5.147573552319526e-05, "loss": 0.8914, "step": 12115 }, { "epoch": 0.695412686117566, "grad_norm": 0.287109375, "learning_rate": 5.138819390574972e-05, "loss": 0.8952, "step": 12120 }, { "epoch": 0.6956995725392318, "grad_norm": 0.255859375, "learning_rate": 5.130070104254847e-05, "loss": 0.9468, "step": 12125 }, { "epoch": 0.6959864589608974, "grad_norm": 0.275390625, "learning_rate": 5.121325702134063e-05, "loss": 0.9501, "step": 12130 }, { "epoch": 0.696273345382563, "grad_norm": 0.353515625, "learning_rate": 5.112586192982653e-05, "loss": 0.9747, "step": 12135 }, { "epoch": 0.6965602318042287, "grad_norm": 0.27734375, "learning_rate": 5.1038515855657264e-05, "loss": 0.9204, "step": 12140 }, { "epoch": 0.6968471182258944, "grad_norm": 0.271484375, "learning_rate": 5.0951218886434884e-05, "loss": 0.9732, "step": 12145 }, { "epoch": 0.69713400464756, "grad_norm": 0.26171875, "learning_rate": 5.086397110971218e-05, "loss": 0.9598, "step": 12150 }, { "epoch": 0.6974208910692257, "grad_norm": 0.26953125, "learning_rate": 5.077677261299251e-05, "loss": 0.8678, "step": 12155 }, { "epoch": 0.6977077774908913, "grad_norm": 0.267578125, "learning_rate": 5.068962348372992e-05, "loss": 0.9874, "step": 12160 }, { "epoch": 0.697994663912557, "grad_norm": 0.2734375, "learning_rate": 5.060252380932886e-05, "loss": 0.9398, "step": 12165 }, { "epoch": 0.6982815503342227, "grad_norm": 0.255859375, "learning_rate": 5.0515473677144254e-05, "loss": 0.958, "step": 12170 }, { "epoch": 0.6985684367558883, "grad_norm": 0.251953125, "learning_rate": 5.042847317448125e-05, "loss": 0.9489, "step": 12175 }, { "epoch": 0.698855323177554, "grad_norm": 0.271484375, "learning_rate": 5.034152238859533e-05, "loss": 0.9528, "step": 12180 }, { "epoch": 0.6991422095992197, "grad_norm": 0.2578125, "learning_rate": 5.025462140669204e-05, "loss": 0.9425, "step": 12185 }, { "epoch": 0.6994290960208853, "grad_norm": 0.267578125, "learning_rate": 5.016777031592694e-05, "loss": 0.962, "step": 12190 }, { "epoch": 0.699715982442551, "grad_norm": 0.2578125, "learning_rate": 5.008096920340568e-05, "loss": 0.9752, "step": 12195 }, { "epoch": 0.7000028688642167, "grad_norm": 0.263671875, "learning_rate": 4.999421815618364e-05, "loss": 0.956, "step": 12200 }, { "epoch": 0.7002897552858823, "grad_norm": 0.255859375, "learning_rate": 4.990751726126612e-05, "loss": 0.9091, "step": 12205 }, { "epoch": 0.700576641707548, "grad_norm": 0.2578125, "learning_rate": 4.9820866605607994e-05, "loss": 0.8879, "step": 12210 }, { "epoch": 0.7008635281292136, "grad_norm": 0.267578125, "learning_rate": 4.973426627611389e-05, "loss": 0.9091, "step": 12215 }, { "epoch": 0.7011504145508793, "grad_norm": 0.265625, "learning_rate": 4.964771635963781e-05, "loss": 0.9416, "step": 12220 }, { "epoch": 0.701437300972545, "grad_norm": 0.271484375, "learning_rate": 4.95612169429833e-05, "loss": 0.982, "step": 12225 }, { "epoch": 0.7017241873942106, "grad_norm": 0.24609375, "learning_rate": 4.9474768112903293e-05, "loss": 0.9896, "step": 12230 }, { "epoch": 0.7020110738158762, "grad_norm": 0.240234375, "learning_rate": 4.9388369956099815e-05, "loss": 0.9021, "step": 12235 }, { "epoch": 0.702297960237542, "grad_norm": 0.265625, "learning_rate": 4.930202255922427e-05, "loss": 0.9309, "step": 12240 }, { "epoch": 0.7025848466592076, "grad_norm": 0.271484375, "learning_rate": 4.9215726008876995e-05, "loss": 0.9511, "step": 12245 }, { "epoch": 0.7028717330808733, "grad_norm": 0.2734375, "learning_rate": 4.9129480391607465e-05, "loss": 0.9546, "step": 12250 }, { "epoch": 0.703158619502539, "grad_norm": 0.28515625, "learning_rate": 4.904328579391393e-05, "loss": 0.9251, "step": 12255 }, { "epoch": 0.7034455059242046, "grad_norm": 0.25, "learning_rate": 4.895714230224363e-05, "loss": 0.9717, "step": 12260 }, { "epoch": 0.7037323923458703, "grad_norm": 0.2734375, "learning_rate": 4.887105000299239e-05, "loss": 1.0073, "step": 12265 }, { "epoch": 0.7040192787675359, "grad_norm": 0.2578125, "learning_rate": 4.8785008982504845e-05, "loss": 0.9337, "step": 12270 }, { "epoch": 0.7043061651892016, "grad_norm": 0.26953125, "learning_rate": 4.8699019327074035e-05, "loss": 0.9608, "step": 12275 }, { "epoch": 0.7045930516108673, "grad_norm": 0.2578125, "learning_rate": 4.861308112294168e-05, "loss": 1.0333, "step": 12280 }, { "epoch": 0.7048799380325329, "grad_norm": 0.287109375, "learning_rate": 4.852719445629773e-05, "loss": 1.0415, "step": 12285 }, { "epoch": 0.7051668244541985, "grad_norm": 0.259765625, "learning_rate": 4.844135941328048e-05, "loss": 0.9759, "step": 12290 }, { "epoch": 0.7054537108758643, "grad_norm": 0.28125, "learning_rate": 4.835557607997656e-05, "loss": 1.0231, "step": 12295 }, { "epoch": 0.7057405972975299, "grad_norm": 0.271484375, "learning_rate": 4.826984454242057e-05, "loss": 0.9123, "step": 12300 }, { "epoch": 0.7060274837191955, "grad_norm": 0.291015625, "learning_rate": 4.818416488659534e-05, "loss": 1.0105, "step": 12305 }, { "epoch": 0.7063143701408612, "grad_norm": 0.26171875, "learning_rate": 4.80985371984315e-05, "loss": 1.0128, "step": 12310 }, { "epoch": 0.7066012565625269, "grad_norm": 0.27734375, "learning_rate": 4.801296156380767e-05, "loss": 0.926, "step": 12315 }, { "epoch": 0.7068881429841926, "grad_norm": 0.26171875, "learning_rate": 4.7927438068550256e-05, "loss": 0.9966, "step": 12320 }, { "epoch": 0.7071750294058582, "grad_norm": 0.271484375, "learning_rate": 4.78419667984333e-05, "loss": 0.9559, "step": 12325 }, { "epoch": 0.7074619158275239, "grad_norm": 0.271484375, "learning_rate": 4.7756547839178564e-05, "loss": 0.9873, "step": 12330 }, { "epoch": 0.7077488022491896, "grad_norm": 0.26953125, "learning_rate": 4.767118127645524e-05, "loss": 0.9543, "step": 12335 }, { "epoch": 0.7080356886708552, "grad_norm": 0.265625, "learning_rate": 4.758586719588007e-05, "loss": 0.9337, "step": 12340 }, { "epoch": 0.7083225750925208, "grad_norm": 0.259765625, "learning_rate": 4.7500605683017076e-05, "loss": 0.9204, "step": 12345 }, { "epoch": 0.7086094615141866, "grad_norm": 0.265625, "learning_rate": 4.74153968233776e-05, "loss": 0.9687, "step": 12350 }, { "epoch": 0.7088963479358522, "grad_norm": 0.248046875, "learning_rate": 4.733024070242024e-05, "loss": 0.8718, "step": 12355 }, { "epoch": 0.7091832343575178, "grad_norm": 0.2734375, "learning_rate": 4.724513740555053e-05, "loss": 0.9764, "step": 12360 }, { "epoch": 0.7094701207791835, "grad_norm": 0.2890625, "learning_rate": 4.716008701812123e-05, "loss": 0.8681, "step": 12365 }, { "epoch": 0.7097570072008492, "grad_norm": 0.259765625, "learning_rate": 4.707508962543188e-05, "loss": 0.9209, "step": 12370 }, { "epoch": 0.7100438936225149, "grad_norm": 0.2890625, "learning_rate": 4.699014531272894e-05, "loss": 0.9734, "step": 12375 }, { "epoch": 0.7103307800441805, "grad_norm": 0.263671875, "learning_rate": 4.690525416520557e-05, "loss": 1.0052, "step": 12380 }, { "epoch": 0.7106176664658461, "grad_norm": 0.2734375, "learning_rate": 4.6820416268001747e-05, "loss": 0.9453, "step": 12385 }, { "epoch": 0.7109045528875119, "grad_norm": 0.30078125, "learning_rate": 4.673563170620385e-05, "loss": 0.8912, "step": 12390 }, { "epoch": 0.7111914393091775, "grad_norm": 0.2578125, "learning_rate": 4.6650900564844935e-05, "loss": 0.9074, "step": 12395 }, { "epoch": 0.7114783257308431, "grad_norm": 0.271484375, "learning_rate": 4.6566222928904436e-05, "loss": 0.9225, "step": 12400 }, { "epoch": 0.7117652121525089, "grad_norm": 0.2734375, "learning_rate": 4.648159888330804e-05, "loss": 0.9795, "step": 12405 }, { "epoch": 0.7120520985741745, "grad_norm": 0.26953125, "learning_rate": 4.639702851292782e-05, "loss": 0.9718, "step": 12410 }, { "epoch": 0.7123389849958401, "grad_norm": 0.2431640625, "learning_rate": 4.631251190258187e-05, "loss": 0.9783, "step": 12415 }, { "epoch": 0.7126258714175058, "grad_norm": 0.287109375, "learning_rate": 4.622804913703452e-05, "loss": 0.928, "step": 12420 }, { "epoch": 0.7129127578391715, "grad_norm": 0.2578125, "learning_rate": 4.614364030099596e-05, "loss": 0.9448, "step": 12425 }, { "epoch": 0.7131996442608372, "grad_norm": 0.27734375, "learning_rate": 4.605928547912237e-05, "loss": 0.9321, "step": 12430 }, { "epoch": 0.7134865306825028, "grad_norm": 0.265625, "learning_rate": 4.597498475601579e-05, "loss": 0.9622, "step": 12435 }, { "epoch": 0.7137734171041684, "grad_norm": 0.26171875, "learning_rate": 4.5890738216223884e-05, "loss": 1.0094, "step": 12440 }, { "epoch": 0.7140603035258342, "grad_norm": 0.30078125, "learning_rate": 4.58065459442401e-05, "loss": 1.0125, "step": 12445 }, { "epoch": 0.7143471899474998, "grad_norm": 0.2734375, "learning_rate": 4.572240802450335e-05, "loss": 0.907, "step": 12450 }, { "epoch": 0.7146340763691654, "grad_norm": 0.25390625, "learning_rate": 4.5638324541398136e-05, "loss": 0.9477, "step": 12455 }, { "epoch": 0.7149209627908311, "grad_norm": 0.265625, "learning_rate": 4.55542955792543e-05, "loss": 0.9153, "step": 12460 }, { "epoch": 0.7152078492124968, "grad_norm": 0.2734375, "learning_rate": 4.547032122234698e-05, "loss": 0.9333, "step": 12465 }, { "epoch": 0.7154947356341624, "grad_norm": 0.26171875, "learning_rate": 4.538640155489666e-05, "loss": 0.9724, "step": 12470 }, { "epoch": 0.7157816220558281, "grad_norm": 0.330078125, "learning_rate": 4.5302536661068816e-05, "loss": 0.9592, "step": 12475 }, { "epoch": 0.7160685084774938, "grad_norm": 0.234375, "learning_rate": 4.521872662497416e-05, "loss": 0.885, "step": 12480 }, { "epoch": 0.7163553948991594, "grad_norm": 0.25390625, "learning_rate": 4.513497153066822e-05, "loss": 0.8931, "step": 12485 }, { "epoch": 0.7166422813208251, "grad_norm": 0.275390625, "learning_rate": 4.505127146215159e-05, "loss": 0.9832, "step": 12490 }, { "epoch": 0.7169291677424907, "grad_norm": 0.25390625, "learning_rate": 4.49676265033695e-05, "loss": 0.9828, "step": 12495 }, { "epoch": 0.7172160541641565, "grad_norm": 0.267578125, "learning_rate": 4.4884036738212074e-05, "loss": 0.9194, "step": 12500 }, { "epoch": 0.7175029405858221, "grad_norm": 0.275390625, "learning_rate": 4.480050225051394e-05, "loss": 0.9192, "step": 12505 }, { "epoch": 0.7177898270074877, "grad_norm": 0.259765625, "learning_rate": 4.4717023124054394e-05, "loss": 0.9769, "step": 12510 }, { "epoch": 0.7180767134291534, "grad_norm": 0.2578125, "learning_rate": 4.463359944255718e-05, "loss": 0.932, "step": 12515 }, { "epoch": 0.7183635998508191, "grad_norm": 0.287109375, "learning_rate": 4.455023128969036e-05, "loss": 0.9208, "step": 12520 }, { "epoch": 0.7186504862724847, "grad_norm": 0.27734375, "learning_rate": 4.446691874906645e-05, "loss": 0.9096, "step": 12525 }, { "epoch": 0.7189373726941504, "grad_norm": 0.25390625, "learning_rate": 4.4383661904242e-05, "loss": 0.9426, "step": 12530 }, { "epoch": 0.719224259115816, "grad_norm": 0.29296875, "learning_rate": 4.430046083871791e-05, "loss": 0.9402, "step": 12535 }, { "epoch": 0.7195111455374817, "grad_norm": 0.267578125, "learning_rate": 4.421731563593895e-05, "loss": 0.9636, "step": 12540 }, { "epoch": 0.7197980319591474, "grad_norm": 0.26171875, "learning_rate": 4.413422637929402e-05, "loss": 0.9838, "step": 12545 }, { "epoch": 0.720084918380813, "grad_norm": 0.265625, "learning_rate": 4.40511931521158e-05, "loss": 0.9323, "step": 12550 }, { "epoch": 0.7203718048024786, "grad_norm": 0.283203125, "learning_rate": 4.396821603768079e-05, "loss": 0.999, "step": 12555 }, { "epoch": 0.7206586912241444, "grad_norm": 0.26171875, "learning_rate": 4.3885295119209294e-05, "loss": 0.98, "step": 12560 }, { "epoch": 0.72094557764581, "grad_norm": 0.2578125, "learning_rate": 4.380243047986513e-05, "loss": 1.0243, "step": 12565 }, { "epoch": 0.7212324640674757, "grad_norm": 0.2734375, "learning_rate": 4.3719622202755816e-05, "loss": 0.9103, "step": 12570 }, { "epoch": 0.7215193504891414, "grad_norm": 0.314453125, "learning_rate": 4.3636870370932194e-05, "loss": 0.9352, "step": 12575 }, { "epoch": 0.721806236910807, "grad_norm": 0.25390625, "learning_rate": 4.3554175067388636e-05, "loss": 0.9956, "step": 12580 }, { "epoch": 0.7220931233324727, "grad_norm": 0.25390625, "learning_rate": 4.3471536375062696e-05, "loss": 0.9594, "step": 12585 }, { "epoch": 0.7223800097541383, "grad_norm": 0.294921875, "learning_rate": 4.338895437683521e-05, "loss": 0.9104, "step": 12590 }, { "epoch": 0.722666896175804, "grad_norm": 0.279296875, "learning_rate": 4.330642915553023e-05, "loss": 0.9707, "step": 12595 }, { "epoch": 0.7229537825974697, "grad_norm": 0.267578125, "learning_rate": 4.322396079391467e-05, "loss": 0.9285, "step": 12600 }, { "epoch": 0.7232406690191353, "grad_norm": 0.26953125, "learning_rate": 4.3141549374698645e-05, "loss": 0.8616, "step": 12605 }, { "epoch": 0.7235275554408009, "grad_norm": 0.275390625, "learning_rate": 4.305919498053495e-05, "loss": 0.9942, "step": 12610 }, { "epoch": 0.7238144418624667, "grad_norm": 0.283203125, "learning_rate": 4.2976897694019356e-05, "loss": 0.9503, "step": 12615 }, { "epoch": 0.7241013282841323, "grad_norm": 0.2734375, "learning_rate": 4.289465759769025e-05, "loss": 0.9108, "step": 12620 }, { "epoch": 0.724388214705798, "grad_norm": 0.271484375, "learning_rate": 4.2812474774028735e-05, "loss": 0.9787, "step": 12625 }, { "epoch": 0.7246751011274636, "grad_norm": 0.2431640625, "learning_rate": 4.27303493054584e-05, "loss": 0.9612, "step": 12630 }, { "epoch": 0.7249619875491293, "grad_norm": 0.259765625, "learning_rate": 4.264828127434539e-05, "loss": 0.9878, "step": 12635 }, { "epoch": 0.725248873970795, "grad_norm": 0.2490234375, "learning_rate": 4.256627076299816e-05, "loss": 0.9706, "step": 12640 }, { "epoch": 0.7255357603924606, "grad_norm": 0.25390625, "learning_rate": 4.248431785366759e-05, "loss": 0.9263, "step": 12645 }, { "epoch": 0.7258226468141263, "grad_norm": 0.267578125, "learning_rate": 4.2402422628546666e-05, "loss": 0.9102, "step": 12650 }, { "epoch": 0.726109533235792, "grad_norm": 0.2890625, "learning_rate": 4.2320585169770565e-05, "loss": 0.9785, "step": 12655 }, { "epoch": 0.7263964196574576, "grad_norm": 0.25, "learning_rate": 4.2238805559416594e-05, "loss": 0.9231, "step": 12660 }, { "epoch": 0.7266833060791232, "grad_norm": 0.265625, "learning_rate": 4.215708387950391e-05, "loss": 1.0038, "step": 12665 }, { "epoch": 0.726970192500789, "grad_norm": 0.271484375, "learning_rate": 4.20754202119937e-05, "loss": 0.9561, "step": 12670 }, { "epoch": 0.7272570789224546, "grad_norm": 0.251953125, "learning_rate": 4.1993814638788944e-05, "loss": 0.9604, "step": 12675 }, { "epoch": 0.7275439653441202, "grad_norm": 0.2451171875, "learning_rate": 4.191226724173426e-05, "loss": 0.9465, "step": 12680 }, { "epoch": 0.7278308517657859, "grad_norm": 0.265625, "learning_rate": 4.1830778102616055e-05, "loss": 0.9697, "step": 12685 }, { "epoch": 0.7281177381874516, "grad_norm": 0.26171875, "learning_rate": 4.174934730316216e-05, "loss": 0.938, "step": 12690 }, { "epoch": 0.7284046246091173, "grad_norm": 0.263671875, "learning_rate": 4.166797492504206e-05, "loss": 0.913, "step": 12695 }, { "epoch": 0.7286915110307829, "grad_norm": 0.24609375, "learning_rate": 4.1586661049866496e-05, "loss": 0.8926, "step": 12700 }, { "epoch": 0.7289783974524485, "grad_norm": 0.28125, "learning_rate": 4.1505405759187666e-05, "loss": 0.9539, "step": 12705 }, { "epoch": 0.7292652838741143, "grad_norm": 0.279296875, "learning_rate": 4.142420913449887e-05, "loss": 0.9492, "step": 12710 }, { "epoch": 0.7295521702957799, "grad_norm": 0.267578125, "learning_rate": 4.13430712572347e-05, "loss": 0.9287, "step": 12715 }, { "epoch": 0.7298390567174455, "grad_norm": 0.2578125, "learning_rate": 4.12619922087708e-05, "loss": 0.8897, "step": 12720 }, { "epoch": 0.7301259431391113, "grad_norm": 0.275390625, "learning_rate": 4.118097207042373e-05, "loss": 0.9018, "step": 12725 }, { "epoch": 0.7304128295607769, "grad_norm": 0.248046875, "learning_rate": 4.1100010923451084e-05, "loss": 0.9338, "step": 12730 }, { "epoch": 0.7306997159824425, "grad_norm": 0.259765625, "learning_rate": 4.10191088490512e-05, "loss": 0.9634, "step": 12735 }, { "epoch": 0.7309866024041082, "grad_norm": 0.267578125, "learning_rate": 4.093826592836322e-05, "loss": 0.935, "step": 12740 }, { "epoch": 0.7312734888257739, "grad_norm": 0.267578125, "learning_rate": 4.0857482242466885e-05, "loss": 0.9184, "step": 12745 }, { "epoch": 0.7315603752474396, "grad_norm": 0.263671875, "learning_rate": 4.077675787238267e-05, "loss": 0.965, "step": 12750 }, { "epoch": 0.7318472616691052, "grad_norm": 0.251953125, "learning_rate": 4.0696092899071416e-05, "loss": 0.8835, "step": 12755 }, { "epoch": 0.7321341480907708, "grad_norm": 0.265625, "learning_rate": 4.061548740343446e-05, "loss": 0.9895, "step": 12760 }, { "epoch": 0.7324210345124366, "grad_norm": 0.265625, "learning_rate": 4.053494146631355e-05, "loss": 0.9351, "step": 12765 }, { "epoch": 0.7327079209341022, "grad_norm": 0.251953125, "learning_rate": 4.045445516849055e-05, "loss": 0.8747, "step": 12770 }, { "epoch": 0.7329948073557678, "grad_norm": 0.25390625, "learning_rate": 4.037402859068764e-05, "loss": 0.906, "step": 12775 }, { "epoch": 0.7332816937774335, "grad_norm": 0.263671875, "learning_rate": 4.029366181356702e-05, "loss": 0.9605, "step": 12780 }, { "epoch": 0.7335685801990992, "grad_norm": 0.265625, "learning_rate": 4.0213354917731004e-05, "loss": 0.9157, "step": 12785 }, { "epoch": 0.7338554666207648, "grad_norm": 0.26953125, "learning_rate": 4.0133107983721726e-05, "loss": 0.9494, "step": 12790 }, { "epoch": 0.7341423530424305, "grad_norm": 0.28125, "learning_rate": 4.005292109202129e-05, "loss": 0.9311, "step": 12795 }, { "epoch": 0.7344292394640962, "grad_norm": 0.2734375, "learning_rate": 3.997279432305158e-05, "loss": 0.8801, "step": 12800 }, { "epoch": 0.7347161258857619, "grad_norm": 0.26171875, "learning_rate": 3.9892727757174074e-05, "loss": 0.9701, "step": 12805 }, { "epoch": 0.7350030123074275, "grad_norm": 0.265625, "learning_rate": 3.981272147469002e-05, "loss": 0.9081, "step": 12810 }, { "epoch": 0.7352898987290931, "grad_norm": 0.2470703125, "learning_rate": 3.973277555584004e-05, "loss": 0.9429, "step": 12815 }, { "epoch": 0.7355767851507589, "grad_norm": 0.26953125, "learning_rate": 3.965289008080438e-05, "loss": 0.9053, "step": 12820 }, { "epoch": 0.7358636715724245, "grad_norm": 0.251953125, "learning_rate": 3.957306512970258e-05, "loss": 0.9727, "step": 12825 }, { "epoch": 0.7361505579940901, "grad_norm": 0.296875, "learning_rate": 3.9493300782593415e-05, "loss": 1.021, "step": 12830 }, { "epoch": 0.7364374444157558, "grad_norm": 0.26953125, "learning_rate": 3.9413597119475044e-05, "loss": 1.004, "step": 12835 }, { "epoch": 0.7367243308374215, "grad_norm": 0.251953125, "learning_rate": 3.9333954220284586e-05, "loss": 0.9596, "step": 12840 }, { "epoch": 0.7370112172590871, "grad_norm": 0.310546875, "learning_rate": 3.925437216489838e-05, "loss": 0.9752, "step": 12845 }, { "epoch": 0.7372981036807528, "grad_norm": 0.302734375, "learning_rate": 3.91748510331316e-05, "loss": 1.0187, "step": 12850 }, { "epoch": 0.7375849901024184, "grad_norm": 0.279296875, "learning_rate": 3.909539090473845e-05, "loss": 0.9257, "step": 12855 }, { "epoch": 0.7378718765240841, "grad_norm": 0.263671875, "learning_rate": 3.9015991859411815e-05, "loss": 1.0112, "step": 12860 }, { "epoch": 0.7381587629457498, "grad_norm": 0.28125, "learning_rate": 3.8936653976783454e-05, "loss": 0.9498, "step": 12865 }, { "epoch": 0.7384456493674154, "grad_norm": 0.279296875, "learning_rate": 3.885737733642366e-05, "loss": 0.995, "step": 12870 }, { "epoch": 0.7387325357890812, "grad_norm": 0.271484375, "learning_rate": 3.877816201784139e-05, "loss": 0.9134, "step": 12875 }, { "epoch": 0.7390194222107468, "grad_norm": 0.271484375, "learning_rate": 3.86990081004841e-05, "loss": 0.9388, "step": 12880 }, { "epoch": 0.7393063086324124, "grad_norm": 0.267578125, "learning_rate": 3.861991566373759e-05, "loss": 0.9267, "step": 12885 }, { "epoch": 0.7395931950540781, "grad_norm": 0.244140625, "learning_rate": 3.85408847869261e-05, "loss": 0.8874, "step": 12890 }, { "epoch": 0.7398800814757438, "grad_norm": 0.271484375, "learning_rate": 3.846191554931201e-05, "loss": 0.9356, "step": 12895 }, { "epoch": 0.7401669678974094, "grad_norm": 0.291015625, "learning_rate": 3.838300803009601e-05, "loss": 0.9441, "step": 12900 }, { "epoch": 0.7404538543190751, "grad_norm": 0.26953125, "learning_rate": 3.8304162308416766e-05, "loss": 0.9811, "step": 12905 }, { "epoch": 0.7407407407407407, "grad_norm": 0.310546875, "learning_rate": 3.822537846335109e-05, "loss": 0.9804, "step": 12910 }, { "epoch": 0.7410276271624064, "grad_norm": 0.28515625, "learning_rate": 3.814665657391365e-05, "loss": 0.9517, "step": 12915 }, { "epoch": 0.7413145135840721, "grad_norm": 0.2578125, "learning_rate": 3.806799671905695e-05, "loss": 0.91, "step": 12920 }, { "epoch": 0.7416014000057377, "grad_norm": 0.267578125, "learning_rate": 3.798939897767141e-05, "loss": 0.9086, "step": 12925 }, { "epoch": 0.7418882864274033, "grad_norm": 0.2578125, "learning_rate": 3.7910863428584985e-05, "loss": 0.8913, "step": 12930 }, { "epoch": 0.7421751728490691, "grad_norm": 0.259765625, "learning_rate": 3.783239015056343e-05, "loss": 0.906, "step": 12935 }, { "epoch": 0.7424620592707347, "grad_norm": 0.26171875, "learning_rate": 3.7753979222309876e-05, "loss": 0.9744, "step": 12940 }, { "epoch": 0.7427489456924004, "grad_norm": 0.26171875, "learning_rate": 3.767563072246508e-05, "loss": 0.9234, "step": 12945 }, { "epoch": 0.743035832114066, "grad_norm": 0.28515625, "learning_rate": 3.7597344729607056e-05, "loss": 0.8801, "step": 12950 }, { "epoch": 0.7433227185357317, "grad_norm": 0.27734375, "learning_rate": 3.751912132225118e-05, "loss": 0.9313, "step": 12955 }, { "epoch": 0.7436096049573974, "grad_norm": 0.2734375, "learning_rate": 3.744096057885014e-05, "loss": 0.9598, "step": 12960 }, { "epoch": 0.743896491379063, "grad_norm": 0.28125, "learning_rate": 3.73628625777936e-05, "loss": 0.9874, "step": 12965 }, { "epoch": 0.7441833778007287, "grad_norm": 0.296875, "learning_rate": 3.7284827397408485e-05, "loss": 0.9683, "step": 12970 }, { "epoch": 0.7444702642223944, "grad_norm": 0.271484375, "learning_rate": 3.720685511595855e-05, "loss": 0.9416, "step": 12975 }, { "epoch": 0.74475715064406, "grad_norm": 0.314453125, "learning_rate": 3.712894581164461e-05, "loss": 0.9525, "step": 12980 }, { "epoch": 0.7450440370657256, "grad_norm": 0.26953125, "learning_rate": 3.705109956260419e-05, "loss": 0.9097, "step": 12985 }, { "epoch": 0.7453309234873914, "grad_norm": 0.2578125, "learning_rate": 3.69733164469117e-05, "loss": 0.9857, "step": 12990 }, { "epoch": 0.745617809909057, "grad_norm": 0.28515625, "learning_rate": 3.68955965425781e-05, "loss": 0.9335, "step": 12995 }, { "epoch": 0.7459046963307226, "grad_norm": 0.2578125, "learning_rate": 3.6817939927551105e-05, "loss": 0.9259, "step": 13000 }, { "epoch": 0.7461915827523883, "grad_norm": 0.265625, "learning_rate": 3.67403466797148e-05, "loss": 0.936, "step": 13005 }, { "epoch": 0.746478469174054, "grad_norm": 0.2734375, "learning_rate": 3.6662816876889837e-05, "loss": 0.9013, "step": 13010 }, { "epoch": 0.7467653555957197, "grad_norm": 0.271484375, "learning_rate": 3.658535059683318e-05, "loss": 0.9263, "step": 13015 }, { "epoch": 0.7470522420173853, "grad_norm": 0.259765625, "learning_rate": 3.650794791723805e-05, "loss": 0.8966, "step": 13020 }, { "epoch": 0.7473391284390509, "grad_norm": 0.275390625, "learning_rate": 3.6430608915734e-05, "loss": 0.9852, "step": 13025 }, { "epoch": 0.7476260148607167, "grad_norm": 0.251953125, "learning_rate": 3.635333366988657e-05, "loss": 0.9679, "step": 13030 }, { "epoch": 0.7479129012823823, "grad_norm": 0.2578125, "learning_rate": 3.6276122257197465e-05, "loss": 0.9413, "step": 13035 }, { "epoch": 0.7481997877040479, "grad_norm": 0.263671875, "learning_rate": 3.6198974755104366e-05, "loss": 0.8877, "step": 13040 }, { "epoch": 0.7484866741257137, "grad_norm": 0.271484375, "learning_rate": 3.6121891240980764e-05, "loss": 0.9677, "step": 13045 }, { "epoch": 0.7487735605473793, "grad_norm": 0.2890625, "learning_rate": 3.604487179213612e-05, "loss": 0.9575, "step": 13050 }, { "epoch": 0.749060446969045, "grad_norm": 0.283203125, "learning_rate": 3.596791648581546e-05, "loss": 0.9758, "step": 13055 }, { "epoch": 0.7493473333907106, "grad_norm": 0.26953125, "learning_rate": 3.589102539919965e-05, "loss": 0.9911, "step": 13060 }, { "epoch": 0.7496342198123763, "grad_norm": 0.259765625, "learning_rate": 3.5814198609405024e-05, "loss": 0.983, "step": 13065 }, { "epoch": 0.749921106234042, "grad_norm": 0.26171875, "learning_rate": 3.5737436193483555e-05, "loss": 0.9201, "step": 13070 }, { "epoch": 0.7502079926557076, "grad_norm": 0.255859375, "learning_rate": 3.56607382284225e-05, "loss": 0.957, "step": 13075 }, { "epoch": 0.7504948790773732, "grad_norm": 0.271484375, "learning_rate": 3.5584104791144603e-05, "loss": 0.9338, "step": 13080 }, { "epoch": 0.750781765499039, "grad_norm": 0.251953125, "learning_rate": 3.5507535958507864e-05, "loss": 0.8666, "step": 13085 }, { "epoch": 0.7510686519207046, "grad_norm": 0.26171875, "learning_rate": 3.543103180730541e-05, "loss": 0.972, "step": 13090 }, { "epoch": 0.7513555383423702, "grad_norm": 0.26171875, "learning_rate": 3.535459241426563e-05, "loss": 0.8875, "step": 13095 }, { "epoch": 0.7516424247640359, "grad_norm": 0.2734375, "learning_rate": 3.5278217856051866e-05, "loss": 0.8928, "step": 13100 }, { "epoch": 0.7519293111857016, "grad_norm": 0.265625, "learning_rate": 3.5201908209262445e-05, "loss": 0.9709, "step": 13105 }, { "epoch": 0.7522161976073672, "grad_norm": 0.27734375, "learning_rate": 3.5125663550430585e-05, "loss": 0.9369, "step": 13110 }, { "epoch": 0.7525030840290329, "grad_norm": 0.25, "learning_rate": 3.504948395602442e-05, "loss": 0.8753, "step": 13115 }, { "epoch": 0.7527899704506986, "grad_norm": 0.27734375, "learning_rate": 3.4973369502446685e-05, "loss": 0.9293, "step": 13120 }, { "epoch": 0.7530768568723643, "grad_norm": 0.279296875, "learning_rate": 3.4897320266034905e-05, "loss": 0.9553, "step": 13125 }, { "epoch": 0.7533637432940299, "grad_norm": 0.2578125, "learning_rate": 3.482133632306117e-05, "loss": 0.9016, "step": 13130 }, { "epoch": 0.7536506297156955, "grad_norm": 0.259765625, "learning_rate": 3.4745417749732003e-05, "loss": 0.9567, "step": 13135 }, { "epoch": 0.7539375161373613, "grad_norm": 0.25, "learning_rate": 3.466956462218849e-05, "loss": 0.9255, "step": 13140 }, { "epoch": 0.7542244025590269, "grad_norm": 0.251953125, "learning_rate": 3.4593777016505946e-05, "loss": 0.934, "step": 13145 }, { "epoch": 0.7545112889806925, "grad_norm": 0.265625, "learning_rate": 3.451805500869413e-05, "loss": 0.9699, "step": 13150 }, { "epoch": 0.7547981754023582, "grad_norm": 0.251953125, "learning_rate": 3.444239867469683e-05, "loss": 0.979, "step": 13155 }, { "epoch": 0.7550850618240239, "grad_norm": 0.28515625, "learning_rate": 3.4366808090392123e-05, "loss": 0.9284, "step": 13160 }, { "epoch": 0.7553719482456895, "grad_norm": 0.248046875, "learning_rate": 3.429128333159208e-05, "loss": 0.8846, "step": 13165 }, { "epoch": 0.7556588346673552, "grad_norm": 0.26953125, "learning_rate": 3.421582447404273e-05, "loss": 0.9769, "step": 13170 }, { "epoch": 0.7559457210890208, "grad_norm": 0.267578125, "learning_rate": 3.414043159342408e-05, "loss": 0.9574, "step": 13175 }, { "epoch": 0.7562326075106865, "grad_norm": 0.26171875, "learning_rate": 3.406510476534985e-05, "loss": 0.9287, "step": 13180 }, { "epoch": 0.7565194939323522, "grad_norm": 0.255859375, "learning_rate": 3.398984406536765e-05, "loss": 0.8996, "step": 13185 }, { "epoch": 0.7568063803540178, "grad_norm": 0.267578125, "learning_rate": 3.391464956895869e-05, "loss": 0.9034, "step": 13190 }, { "epoch": 0.7570932667756836, "grad_norm": 0.30859375, "learning_rate": 3.3839521351537726e-05, "loss": 0.9802, "step": 13195 }, { "epoch": 0.7573801531973492, "grad_norm": 0.2734375, "learning_rate": 3.376445948845322e-05, "loss": 0.9396, "step": 13200 }, { "epoch": 0.7576670396190148, "grad_norm": 0.283203125, "learning_rate": 3.368946405498686e-05, "loss": 0.9319, "step": 13205 }, { "epoch": 0.7579539260406805, "grad_norm": 0.2578125, "learning_rate": 3.361453512635393e-05, "loss": 0.9203, "step": 13210 }, { "epoch": 0.7582408124623462, "grad_norm": 0.26171875, "learning_rate": 3.353967277770282e-05, "loss": 0.9762, "step": 13215 }, { "epoch": 0.7585276988840118, "grad_norm": 0.2578125, "learning_rate": 3.346487708411532e-05, "loss": 0.9259, "step": 13220 }, { "epoch": 0.7588145853056775, "grad_norm": 0.26171875, "learning_rate": 3.3390148120606204e-05, "loss": 0.9831, "step": 13225 }, { "epoch": 0.7591014717273431, "grad_norm": 0.2578125, "learning_rate": 3.331548596212347e-05, "loss": 0.9123, "step": 13230 }, { "epoch": 0.7593883581490088, "grad_norm": 0.251953125, "learning_rate": 3.324089068354797e-05, "loss": 0.9328, "step": 13235 }, { "epoch": 0.7596752445706745, "grad_norm": 0.263671875, "learning_rate": 3.3166362359693596e-05, "loss": 0.9647, "step": 13240 }, { "epoch": 0.7599621309923401, "grad_norm": 0.27734375, "learning_rate": 3.3091901065307084e-05, "loss": 0.9081, "step": 13245 }, { "epoch": 0.7602490174140057, "grad_norm": 0.271484375, "learning_rate": 3.301750687506784e-05, "loss": 0.9484, "step": 13250 }, { "epoch": 0.7605359038356715, "grad_norm": 0.279296875, "learning_rate": 3.29431798635881e-05, "loss": 0.9229, "step": 13255 }, { "epoch": 0.7608227902573371, "grad_norm": 0.29296875, "learning_rate": 3.2868920105412594e-05, "loss": 0.9655, "step": 13260 }, { "epoch": 0.7611096766790028, "grad_norm": 0.26171875, "learning_rate": 3.279472767501876e-05, "loss": 0.8961, "step": 13265 }, { "epoch": 0.7613965631006685, "grad_norm": 0.26171875, "learning_rate": 3.272060264681631e-05, "loss": 0.9216, "step": 13270 }, { "epoch": 0.7616834495223341, "grad_norm": 0.259765625, "learning_rate": 3.264654509514757e-05, "loss": 0.9027, "step": 13275 }, { "epoch": 0.7619703359439998, "grad_norm": 0.259765625, "learning_rate": 3.257255509428705e-05, "loss": 0.9079, "step": 13280 }, { "epoch": 0.7622572223656654, "grad_norm": 0.26171875, "learning_rate": 3.24986327184415e-05, "loss": 0.9147, "step": 13285 }, { "epoch": 0.7625441087873311, "grad_norm": 0.2890625, "learning_rate": 3.2424778041749984e-05, "loss": 0.9353, "step": 13290 }, { "epoch": 0.7628309952089968, "grad_norm": 0.259765625, "learning_rate": 3.235099113828351e-05, "loss": 0.9283, "step": 13295 }, { "epoch": 0.7631178816306624, "grad_norm": 0.27734375, "learning_rate": 3.227727208204523e-05, "loss": 0.8698, "step": 13300 }, { "epoch": 0.763404768052328, "grad_norm": 0.25390625, "learning_rate": 3.2203620946970156e-05, "loss": 0.9619, "step": 13305 }, { "epoch": 0.7636916544739938, "grad_norm": 0.27734375, "learning_rate": 3.213003780692531e-05, "loss": 0.9486, "step": 13310 }, { "epoch": 0.7639785408956594, "grad_norm": 0.259765625, "learning_rate": 3.2056522735709346e-05, "loss": 0.9868, "step": 13315 }, { "epoch": 0.764265427317325, "grad_norm": 0.291015625, "learning_rate": 3.198307580705281e-05, "loss": 0.9784, "step": 13320 }, { "epoch": 0.7645523137389907, "grad_norm": 0.271484375, "learning_rate": 3.190969709461783e-05, "loss": 0.9559, "step": 13325 }, { "epoch": 0.7648392001606564, "grad_norm": 0.2578125, "learning_rate": 3.183638667199809e-05, "loss": 0.9156, "step": 13330 }, { "epoch": 0.7651260865823221, "grad_norm": 0.255859375, "learning_rate": 3.176314461271887e-05, "loss": 1.0296, "step": 13335 }, { "epoch": 0.7654129730039877, "grad_norm": 0.2734375, "learning_rate": 3.1689970990236784e-05, "loss": 0.912, "step": 13340 }, { "epoch": 0.7656998594256533, "grad_norm": 0.283203125, "learning_rate": 3.1616865877939915e-05, "loss": 0.9116, "step": 13345 }, { "epoch": 0.7659867458473191, "grad_norm": 0.255859375, "learning_rate": 3.1543829349147523e-05, "loss": 0.9038, "step": 13350 }, { "epoch": 0.7662736322689847, "grad_norm": 0.259765625, "learning_rate": 3.147086147711022e-05, "loss": 0.9568, "step": 13355 }, { "epoch": 0.7665605186906503, "grad_norm": 0.279296875, "learning_rate": 3.139796233500958e-05, "loss": 0.9531, "step": 13360 }, { "epoch": 0.7668474051123161, "grad_norm": 0.3046875, "learning_rate": 3.132513199595846e-05, "loss": 0.9676, "step": 13365 }, { "epoch": 0.7671342915339817, "grad_norm": 0.26171875, "learning_rate": 3.1252370533000494e-05, "loss": 0.9357, "step": 13370 }, { "epoch": 0.7674211779556473, "grad_norm": 0.263671875, "learning_rate": 3.1179678019110434e-05, "loss": 0.9335, "step": 13375 }, { "epoch": 0.767708064377313, "grad_norm": 0.267578125, "learning_rate": 3.110705452719376e-05, "loss": 0.9837, "step": 13380 }, { "epoch": 0.7679949507989787, "grad_norm": 0.2734375, "learning_rate": 3.1034500130086706e-05, "loss": 0.908, "step": 13385 }, { "epoch": 0.7682818372206444, "grad_norm": 0.275390625, "learning_rate": 3.096201490055635e-05, "loss": 0.9046, "step": 13390 }, { "epoch": 0.76856872364231, "grad_norm": 0.279296875, "learning_rate": 3.088959891130022e-05, "loss": 0.9065, "step": 13395 }, { "epoch": 0.7688556100639756, "grad_norm": 0.283203125, "learning_rate": 3.081725223494656e-05, "loss": 0.9354, "step": 13400 }, { "epoch": 0.7691424964856414, "grad_norm": 0.251953125, "learning_rate": 3.074497494405404e-05, "loss": 0.9483, "step": 13405 }, { "epoch": 0.769429382907307, "grad_norm": 0.2578125, "learning_rate": 3.0672767111111666e-05, "loss": 0.9057, "step": 13410 }, { "epoch": 0.7697162693289726, "grad_norm": 0.2734375, "learning_rate": 3.0600628808538915e-05, "loss": 0.9471, "step": 13415 }, { "epoch": 0.7700031557506383, "grad_norm": 0.25, "learning_rate": 3.05285601086854e-05, "loss": 0.9343, "step": 13420 }, { "epoch": 0.770290042172304, "grad_norm": 0.2578125, "learning_rate": 3.045656108383106e-05, "loss": 0.9745, "step": 13425 }, { "epoch": 0.7705769285939696, "grad_norm": 0.2451171875, "learning_rate": 3.0384631806185815e-05, "loss": 0.9407, "step": 13430 }, { "epoch": 0.7708638150156353, "grad_norm": 0.267578125, "learning_rate": 3.0312772347889773e-05, "loss": 0.943, "step": 13435 }, { "epoch": 0.771150701437301, "grad_norm": 0.26953125, "learning_rate": 3.0240982781012873e-05, "loss": 0.9863, "step": 13440 }, { "epoch": 0.7714375878589667, "grad_norm": 0.283203125, "learning_rate": 3.0169263177555085e-05, "loss": 1.0024, "step": 13445 }, { "epoch": 0.7717244742806323, "grad_norm": 0.263671875, "learning_rate": 3.0097613609446172e-05, "loss": 0.9126, "step": 13450 }, { "epoch": 0.7720113607022979, "grad_norm": 0.283203125, "learning_rate": 3.002603414854559e-05, "loss": 1.0295, "step": 13455 }, { "epoch": 0.7722982471239637, "grad_norm": 0.2890625, "learning_rate": 2.9954524866642585e-05, "loss": 0.9571, "step": 13460 }, { "epoch": 0.7725851335456293, "grad_norm": 0.255859375, "learning_rate": 2.988308583545596e-05, "loss": 0.9613, "step": 13465 }, { "epoch": 0.7728720199672949, "grad_norm": 0.267578125, "learning_rate": 2.9811717126634066e-05, "loss": 0.9705, "step": 13470 }, { "epoch": 0.7731589063889606, "grad_norm": 0.28125, "learning_rate": 2.974041881175468e-05, "loss": 0.9831, "step": 13475 }, { "epoch": 0.7734457928106263, "grad_norm": 0.291015625, "learning_rate": 2.9669190962325112e-05, "loss": 0.966, "step": 13480 }, { "epoch": 0.7737326792322919, "grad_norm": 0.26953125, "learning_rate": 2.959803364978184e-05, "loss": 0.9377, "step": 13485 }, { "epoch": 0.7740195656539576, "grad_norm": 0.259765625, "learning_rate": 2.952694694549073e-05, "loss": 0.9719, "step": 13490 }, { "epoch": 0.7743064520756232, "grad_norm": 0.26953125, "learning_rate": 2.9455930920746778e-05, "loss": 0.8831, "step": 13495 }, { "epoch": 0.774593338497289, "grad_norm": 0.283203125, "learning_rate": 2.9384985646774053e-05, "loss": 1.0191, "step": 13500 }, { "epoch": 0.7748802249189546, "grad_norm": 0.275390625, "learning_rate": 2.9314111194725757e-05, "loss": 0.9275, "step": 13505 }, { "epoch": 0.7751671113406202, "grad_norm": 0.2578125, "learning_rate": 2.9243307635683957e-05, "loss": 0.9317, "step": 13510 }, { "epoch": 0.775453997762286, "grad_norm": 0.26171875, "learning_rate": 2.9172575040659744e-05, "loss": 0.9501, "step": 13515 }, { "epoch": 0.7757408841839516, "grad_norm": 0.2734375, "learning_rate": 2.910191348059289e-05, "loss": 0.8564, "step": 13520 }, { "epoch": 0.7760277706056172, "grad_norm": 0.26171875, "learning_rate": 2.9031323026352053e-05, "loss": 0.9205, "step": 13525 }, { "epoch": 0.7763146570272829, "grad_norm": 0.259765625, "learning_rate": 2.8960803748734534e-05, "loss": 0.9622, "step": 13530 }, { "epoch": 0.7766015434489486, "grad_norm": 0.271484375, "learning_rate": 2.8890355718466177e-05, "loss": 0.8975, "step": 13535 }, { "epoch": 0.7768884298706142, "grad_norm": 0.27734375, "learning_rate": 2.8819979006201526e-05, "loss": 0.9782, "step": 13540 }, { "epoch": 0.7771753162922799, "grad_norm": 0.26171875, "learning_rate": 2.8749673682523404e-05, "loss": 0.9603, "step": 13545 }, { "epoch": 0.7774622027139455, "grad_norm": 0.265625, "learning_rate": 2.8679439817943232e-05, "loss": 0.9029, "step": 13550 }, { "epoch": 0.7777490891356112, "grad_norm": 0.255859375, "learning_rate": 2.860927748290061e-05, "loss": 0.9425, "step": 13555 }, { "epoch": 0.7780359755572769, "grad_norm": 0.271484375, "learning_rate": 2.853918674776345e-05, "loss": 0.9764, "step": 13560 }, { "epoch": 0.7783228619789425, "grad_norm": 0.271484375, "learning_rate": 2.84691676828279e-05, "loss": 0.9246, "step": 13565 }, { "epoch": 0.7786097484006081, "grad_norm": 0.275390625, "learning_rate": 2.8399220358318148e-05, "loss": 0.9608, "step": 13570 }, { "epoch": 0.7788966348222739, "grad_norm": 0.27734375, "learning_rate": 2.832934484438652e-05, "loss": 1.0572, "step": 13575 }, { "epoch": 0.7791835212439395, "grad_norm": 0.263671875, "learning_rate": 2.8259541211113216e-05, "loss": 1.0095, "step": 13580 }, { "epoch": 0.7794704076656052, "grad_norm": 0.3671875, "learning_rate": 2.8189809528506462e-05, "loss": 0.9405, "step": 13585 }, { "epoch": 0.7797572940872709, "grad_norm": 0.2734375, "learning_rate": 2.81201498665022e-05, "loss": 0.857, "step": 13590 }, { "epoch": 0.7800441805089365, "grad_norm": 0.255859375, "learning_rate": 2.8050562294964267e-05, "loss": 0.8924, "step": 13595 }, { "epoch": 0.7803310669306022, "grad_norm": 0.27734375, "learning_rate": 2.798104688368407e-05, "loss": 0.9094, "step": 13600 }, { "epoch": 0.7806179533522678, "grad_norm": 0.267578125, "learning_rate": 2.791160370238075e-05, "loss": 0.9469, "step": 13605 }, { "epoch": 0.7809048397739335, "grad_norm": 0.251953125, "learning_rate": 2.7842232820700977e-05, "loss": 0.9083, "step": 13610 }, { "epoch": 0.7811917261955992, "grad_norm": 0.3046875, "learning_rate": 2.7772934308218846e-05, "loss": 0.973, "step": 13615 }, { "epoch": 0.7814786126172648, "grad_norm": 0.271484375, "learning_rate": 2.7703708234435988e-05, "loss": 0.9516, "step": 13620 }, { "epoch": 0.7817654990389304, "grad_norm": 0.27734375, "learning_rate": 2.7634554668781242e-05, "loss": 0.9682, "step": 13625 }, { "epoch": 0.7820523854605962, "grad_norm": 0.27734375, "learning_rate": 2.7565473680610887e-05, "loss": 0.9785, "step": 13630 }, { "epoch": 0.7823392718822618, "grad_norm": 0.2578125, "learning_rate": 2.7496465339208233e-05, "loss": 0.9353, "step": 13635 }, { "epoch": 0.7826261583039275, "grad_norm": 0.279296875, "learning_rate": 2.7427529713783905e-05, "loss": 0.9497, "step": 13640 }, { "epoch": 0.7829130447255931, "grad_norm": 0.26171875, "learning_rate": 2.7358666873475493e-05, "loss": 0.8938, "step": 13645 }, { "epoch": 0.7831999311472588, "grad_norm": 0.26171875, "learning_rate": 2.7289876887347554e-05, "loss": 0.9411, "step": 13650 }, { "epoch": 0.7834868175689245, "grad_norm": 0.27734375, "learning_rate": 2.722115982439173e-05, "loss": 0.9338, "step": 13655 }, { "epoch": 0.7837737039905901, "grad_norm": 0.2578125, "learning_rate": 2.7152515753526364e-05, "loss": 0.8935, "step": 13660 }, { "epoch": 0.7840605904122558, "grad_norm": 0.265625, "learning_rate": 2.70839447435967e-05, "loss": 1.0433, "step": 13665 }, { "epoch": 0.7843474768339215, "grad_norm": 0.29296875, "learning_rate": 2.7015446863374637e-05, "loss": 0.9904, "step": 13670 }, { "epoch": 0.7846343632555871, "grad_norm": 0.265625, "learning_rate": 2.6947022181558813e-05, "loss": 0.8732, "step": 13675 }, { "epoch": 0.7849212496772527, "grad_norm": 0.271484375, "learning_rate": 2.6878670766774328e-05, "loss": 0.9867, "step": 13680 }, { "epoch": 0.7852081360989185, "grad_norm": 0.26171875, "learning_rate": 2.6810392687572928e-05, "loss": 0.9784, "step": 13685 }, { "epoch": 0.7854950225205841, "grad_norm": 0.255859375, "learning_rate": 2.6742188012432767e-05, "loss": 0.9386, "step": 13690 }, { "epoch": 0.7857819089422498, "grad_norm": 0.255859375, "learning_rate": 2.667405680975831e-05, "loss": 0.928, "step": 13695 }, { "epoch": 0.7860687953639154, "grad_norm": 0.27734375, "learning_rate": 2.6605999147880456e-05, "loss": 1.0328, "step": 13700 }, { "epoch": 0.7863556817855811, "grad_norm": 0.267578125, "learning_rate": 2.6538015095056223e-05, "loss": 0.9818, "step": 13705 }, { "epoch": 0.7866425682072468, "grad_norm": 0.287109375, "learning_rate": 2.6470104719468925e-05, "loss": 0.896, "step": 13710 }, { "epoch": 0.7869294546289124, "grad_norm": 0.26953125, "learning_rate": 2.6402268089227866e-05, "loss": 0.9751, "step": 13715 }, { "epoch": 0.787216341050578, "grad_norm": 0.2734375, "learning_rate": 2.6334505272368493e-05, "loss": 0.9742, "step": 13720 }, { "epoch": 0.7875032274722438, "grad_norm": 0.25, "learning_rate": 2.626681633685213e-05, "loss": 0.975, "step": 13725 }, { "epoch": 0.7877901138939094, "grad_norm": 0.26953125, "learning_rate": 2.6199201350566104e-05, "loss": 0.9139, "step": 13730 }, { "epoch": 0.788077000315575, "grad_norm": 0.2734375, "learning_rate": 2.613166038132345e-05, "loss": 0.9366, "step": 13735 }, { "epoch": 0.7883638867372407, "grad_norm": 0.2578125, "learning_rate": 2.606419349686312e-05, "loss": 0.9983, "step": 13740 }, { "epoch": 0.7886507731589064, "grad_norm": 0.2412109375, "learning_rate": 2.5996800764849638e-05, "loss": 0.8938, "step": 13745 }, { "epoch": 0.788937659580572, "grad_norm": 0.271484375, "learning_rate": 2.5929482252873183e-05, "loss": 0.992, "step": 13750 }, { "epoch": 0.7892245460022377, "grad_norm": 0.2451171875, "learning_rate": 2.5862238028449582e-05, "loss": 0.9313, "step": 13755 }, { "epoch": 0.7895114324239034, "grad_norm": 0.318359375, "learning_rate": 2.579506815902002e-05, "loss": 0.9754, "step": 13760 }, { "epoch": 0.7897983188455691, "grad_norm": 0.271484375, "learning_rate": 2.5727972711951208e-05, "loss": 0.9215, "step": 13765 }, { "epoch": 0.7900852052672347, "grad_norm": 0.27734375, "learning_rate": 2.5660951754535245e-05, "loss": 0.9188, "step": 13770 }, { "epoch": 0.7903720916889003, "grad_norm": 0.259765625, "learning_rate": 2.559400535398938e-05, "loss": 0.9405, "step": 13775 }, { "epoch": 0.7906589781105661, "grad_norm": 0.265625, "learning_rate": 2.5527133577456254e-05, "loss": 1.0501, "step": 13780 }, { "epoch": 0.7909458645322317, "grad_norm": 0.25, "learning_rate": 2.5460336492003522e-05, "loss": 0.9257, "step": 13785 }, { "epoch": 0.7912327509538973, "grad_norm": 0.283203125, "learning_rate": 2.5393614164624047e-05, "loss": 0.9218, "step": 13790 }, { "epoch": 0.791519637375563, "grad_norm": 0.263671875, "learning_rate": 2.5326966662235597e-05, "loss": 0.9927, "step": 13795 }, { "epoch": 0.7918065237972287, "grad_norm": 0.2578125, "learning_rate": 2.5260394051681024e-05, "loss": 1.0309, "step": 13800 }, { "epoch": 0.7920934102188943, "grad_norm": 0.26953125, "learning_rate": 2.5193896399727945e-05, "loss": 0.982, "step": 13805 }, { "epoch": 0.79238029664056, "grad_norm": 0.2734375, "learning_rate": 2.5127473773068888e-05, "loss": 0.9994, "step": 13810 }, { "epoch": 0.7926671830622256, "grad_norm": 0.259765625, "learning_rate": 2.506112623832113e-05, "loss": 0.978, "step": 13815 }, { "epoch": 0.7929540694838914, "grad_norm": 0.271484375, "learning_rate": 2.499485386202659e-05, "loss": 0.9416, "step": 13820 }, { "epoch": 0.793240955905557, "grad_norm": 0.263671875, "learning_rate": 2.49286567106518e-05, "loss": 0.9737, "step": 13825 }, { "epoch": 0.7935278423272226, "grad_norm": 0.267578125, "learning_rate": 2.4862534850587925e-05, "loss": 0.9144, "step": 13830 }, { "epoch": 0.7938147287488884, "grad_norm": 0.25390625, "learning_rate": 2.4796488348150548e-05, "loss": 0.8966, "step": 13835 }, { "epoch": 0.794101615170554, "grad_norm": 0.255859375, "learning_rate": 2.4730517269579667e-05, "loss": 0.9133, "step": 13840 }, { "epoch": 0.7943885015922196, "grad_norm": 0.294921875, "learning_rate": 2.4664621681039723e-05, "loss": 0.9688, "step": 13845 }, { "epoch": 0.7946753880138853, "grad_norm": 0.255859375, "learning_rate": 2.459880164861932e-05, "loss": 1.0074, "step": 13850 }, { "epoch": 0.794962274435551, "grad_norm": 0.28125, "learning_rate": 2.453305723833139e-05, "loss": 0.958, "step": 13855 }, { "epoch": 0.7952491608572166, "grad_norm": 0.267578125, "learning_rate": 2.4467388516113e-05, "loss": 0.9174, "step": 13860 }, { "epoch": 0.7955360472788823, "grad_norm": 0.2578125, "learning_rate": 2.4401795547825234e-05, "loss": 0.915, "step": 13865 }, { "epoch": 0.7958229337005479, "grad_norm": 0.265625, "learning_rate": 2.433627839925332e-05, "loss": 0.9143, "step": 13870 }, { "epoch": 0.7961098201222137, "grad_norm": 0.26171875, "learning_rate": 2.427083713610632e-05, "loss": 0.9074, "step": 13875 }, { "epoch": 0.7963967065438793, "grad_norm": 0.279296875, "learning_rate": 2.42054718240173e-05, "loss": 0.9928, "step": 13880 }, { "epoch": 0.7966835929655449, "grad_norm": 0.251953125, "learning_rate": 2.4140182528543044e-05, "loss": 0.9283, "step": 13885 }, { "epoch": 0.7969704793872106, "grad_norm": 0.28125, "learning_rate": 2.4074969315164176e-05, "loss": 0.9486, "step": 13890 }, { "epoch": 0.7972573658088763, "grad_norm": 0.255859375, "learning_rate": 2.4009832249285035e-05, "loss": 0.9675, "step": 13895 }, { "epoch": 0.7975442522305419, "grad_norm": 0.291015625, "learning_rate": 2.3944771396233467e-05, "loss": 1.0149, "step": 13900 }, { "epoch": 0.7978311386522076, "grad_norm": 0.255859375, "learning_rate": 2.387978682126104e-05, "loss": 0.9606, "step": 13905 }, { "epoch": 0.7981180250738733, "grad_norm": 0.275390625, "learning_rate": 2.3814878589542678e-05, "loss": 0.9347, "step": 13910 }, { "epoch": 0.7984049114955389, "grad_norm": 0.25, "learning_rate": 2.3750046766176846e-05, "loss": 0.9558, "step": 13915 }, { "epoch": 0.7986917979172046, "grad_norm": 0.244140625, "learning_rate": 2.368529141618533e-05, "loss": 0.9776, "step": 13920 }, { "epoch": 0.7989786843388702, "grad_norm": 0.263671875, "learning_rate": 2.362061260451316e-05, "loss": 0.939, "step": 13925 }, { "epoch": 0.799265570760536, "grad_norm": 0.28125, "learning_rate": 2.3556010396028737e-05, "loss": 1.0065, "step": 13930 }, { "epoch": 0.7995524571822016, "grad_norm": 0.2734375, "learning_rate": 2.3491484855523504e-05, "loss": 0.9895, "step": 13935 }, { "epoch": 0.7998393436038672, "grad_norm": 0.265625, "learning_rate": 2.3427036047712125e-05, "loss": 0.9738, "step": 13940 }, { "epoch": 0.8001262300255328, "grad_norm": 0.271484375, "learning_rate": 2.3362664037232184e-05, "loss": 0.9787, "step": 13945 }, { "epoch": 0.8004131164471986, "grad_norm": 0.302734375, "learning_rate": 2.3298368888644386e-05, "loss": 0.9474, "step": 13950 }, { "epoch": 0.8007000028688642, "grad_norm": 0.259765625, "learning_rate": 2.323415066643221e-05, "loss": 0.9172, "step": 13955 }, { "epoch": 0.8009868892905299, "grad_norm": 0.255859375, "learning_rate": 2.3170009435002083e-05, "loss": 0.9855, "step": 13960 }, { "epoch": 0.8012737757121955, "grad_norm": 0.26171875, "learning_rate": 2.3105945258683125e-05, "loss": 0.9349, "step": 13965 }, { "epoch": 0.8015606621338612, "grad_norm": 0.26171875, "learning_rate": 2.3041958201727275e-05, "loss": 0.9141, "step": 13970 }, { "epoch": 0.8018475485555269, "grad_norm": 0.26953125, "learning_rate": 2.29780483283091e-05, "loss": 0.8616, "step": 13975 }, { "epoch": 0.8021344349771925, "grad_norm": 0.2734375, "learning_rate": 2.2914215702525677e-05, "loss": 0.9498, "step": 13980 }, { "epoch": 0.8024213213988582, "grad_norm": 0.306640625, "learning_rate": 2.285046038839672e-05, "loss": 1.0223, "step": 13985 }, { "epoch": 0.8027082078205239, "grad_norm": 0.25390625, "learning_rate": 2.2786782449864298e-05, "loss": 0.9962, "step": 13990 }, { "epoch": 0.8029950942421895, "grad_norm": 0.28515625, "learning_rate": 2.2723181950793004e-05, "loss": 0.9759, "step": 13995 }, { "epoch": 0.8032819806638551, "grad_norm": 0.294921875, "learning_rate": 2.265965895496962e-05, "loss": 0.934, "step": 14000 }, { "epoch": 0.8035688670855209, "grad_norm": 0.255859375, "learning_rate": 2.259621352610333e-05, "loss": 0.9453, "step": 14005 }, { "epoch": 0.8038557535071865, "grad_norm": 0.279296875, "learning_rate": 2.2532845727825445e-05, "loss": 0.895, "step": 14010 }, { "epoch": 0.8041426399288522, "grad_norm": 0.248046875, "learning_rate": 2.246955562368941e-05, "loss": 0.96, "step": 14015 }, { "epoch": 0.8044295263505178, "grad_norm": 0.271484375, "learning_rate": 2.2406343277170827e-05, "loss": 0.9791, "step": 14020 }, { "epoch": 0.8047164127721835, "grad_norm": 0.240234375, "learning_rate": 2.234320875166721e-05, "loss": 0.9636, "step": 14025 }, { "epoch": 0.8050032991938492, "grad_norm": 0.2578125, "learning_rate": 2.228015211049813e-05, "loss": 0.9886, "step": 14030 }, { "epoch": 0.8052901856155148, "grad_norm": 0.26953125, "learning_rate": 2.2217173416904935e-05, "loss": 0.9115, "step": 14035 }, { "epoch": 0.8055770720371804, "grad_norm": 0.2578125, "learning_rate": 2.2154272734050908e-05, "loss": 0.9285, "step": 14040 }, { "epoch": 0.8058639584588462, "grad_norm": 0.30859375, "learning_rate": 2.2091450125020986e-05, "loss": 0.9934, "step": 14045 }, { "epoch": 0.8061508448805118, "grad_norm": 0.287109375, "learning_rate": 2.202870565282188e-05, "loss": 0.9684, "step": 14050 }, { "epoch": 0.8064377313021774, "grad_norm": 0.2734375, "learning_rate": 2.1966039380381944e-05, "loss": 0.9709, "step": 14055 }, { "epoch": 0.8067246177238432, "grad_norm": 0.31640625, "learning_rate": 2.1903451370551e-05, "loss": 0.9703, "step": 14060 }, { "epoch": 0.8070115041455088, "grad_norm": 0.2470703125, "learning_rate": 2.1840941686100524e-05, "loss": 0.922, "step": 14065 }, { "epoch": 0.8072983905671745, "grad_norm": 0.263671875, "learning_rate": 2.1778510389723283e-05, "loss": 0.9288, "step": 14070 }, { "epoch": 0.8075852769888401, "grad_norm": 0.267578125, "learning_rate": 2.1716157544033578e-05, "loss": 0.9018, "step": 14075 }, { "epoch": 0.8078721634105058, "grad_norm": 0.25390625, "learning_rate": 2.1653883211566895e-05, "loss": 0.9552, "step": 14080 }, { "epoch": 0.8081590498321715, "grad_norm": 0.25390625, "learning_rate": 2.1591687454780087e-05, "loss": 0.9906, "step": 14085 }, { "epoch": 0.8084459362538371, "grad_norm": 0.2578125, "learning_rate": 2.152957033605111e-05, "loss": 0.932, "step": 14090 }, { "epoch": 0.8087328226755027, "grad_norm": 0.310546875, "learning_rate": 2.146753191767913e-05, "loss": 0.891, "step": 14095 }, { "epoch": 0.8090197090971685, "grad_norm": 0.279296875, "learning_rate": 2.14055722618843e-05, "loss": 0.8697, "step": 14100 }, { "epoch": 0.8093065955188341, "grad_norm": 0.28125, "learning_rate": 2.1343691430807878e-05, "loss": 0.9596, "step": 14105 }, { "epoch": 0.8095934819404997, "grad_norm": 0.283203125, "learning_rate": 2.1281889486511984e-05, "loss": 1.0848, "step": 14110 }, { "epoch": 0.8098803683621654, "grad_norm": 0.25390625, "learning_rate": 2.122016649097962e-05, "loss": 0.9434, "step": 14115 }, { "epoch": 0.8101672547838311, "grad_norm": 0.27734375, "learning_rate": 2.1158522506114696e-05, "loss": 0.8717, "step": 14120 }, { "epoch": 0.8104541412054967, "grad_norm": 0.25390625, "learning_rate": 2.1096957593741772e-05, "loss": 1.0173, "step": 14125 }, { "epoch": 0.8107410276271624, "grad_norm": 0.279296875, "learning_rate": 2.1035471815606178e-05, "loss": 0.9209, "step": 14130 }, { "epoch": 0.811027914048828, "grad_norm": 0.271484375, "learning_rate": 2.0974065233373864e-05, "loss": 0.9671, "step": 14135 }, { "epoch": 0.8113148004704938, "grad_norm": 0.287109375, "learning_rate": 2.0912737908631296e-05, "loss": 0.9522, "step": 14140 }, { "epoch": 0.8116016868921594, "grad_norm": 0.267578125, "learning_rate": 2.0851489902885556e-05, "loss": 0.9186, "step": 14145 }, { "epoch": 0.811888573313825, "grad_norm": 0.267578125, "learning_rate": 2.079032127756405e-05, "loss": 0.9701, "step": 14150 }, { "epoch": 0.8121754597354908, "grad_norm": 0.267578125, "learning_rate": 2.0729232094014693e-05, "loss": 0.9597, "step": 14155 }, { "epoch": 0.8124623461571564, "grad_norm": 0.263671875, "learning_rate": 2.066822241350561e-05, "loss": 0.9617, "step": 14160 }, { "epoch": 0.812749232578822, "grad_norm": 0.279296875, "learning_rate": 2.06072922972253e-05, "loss": 0.9674, "step": 14165 }, { "epoch": 0.8130361190004877, "grad_norm": 0.267578125, "learning_rate": 2.0546441806282367e-05, "loss": 1.005, "step": 14170 }, { "epoch": 0.8133230054221534, "grad_norm": 0.2734375, "learning_rate": 2.048567100170562e-05, "loss": 0.9959, "step": 14175 }, { "epoch": 0.813609891843819, "grad_norm": 0.26953125, "learning_rate": 2.0424979944443946e-05, "loss": 0.9815, "step": 14180 }, { "epoch": 0.8138967782654847, "grad_norm": 0.287109375, "learning_rate": 2.0364368695366233e-05, "loss": 1.0133, "step": 14185 }, { "epoch": 0.8141836646871503, "grad_norm": 0.259765625, "learning_rate": 2.0303837315261277e-05, "loss": 0.9507, "step": 14190 }, { "epoch": 0.814470551108816, "grad_norm": 0.3046875, "learning_rate": 2.0243385864837884e-05, "loss": 0.9735, "step": 14195 }, { "epoch": 0.8147574375304817, "grad_norm": 0.275390625, "learning_rate": 2.018301440472461e-05, "loss": 1.0265, "step": 14200 }, { "epoch": 0.8150443239521473, "grad_norm": 0.275390625, "learning_rate": 2.012272299546978e-05, "loss": 0.9078, "step": 14205 }, { "epoch": 0.815331210373813, "grad_norm": 0.248046875, "learning_rate": 2.0062511697541485e-05, "loss": 0.8958, "step": 14210 }, { "epoch": 0.8156180967954787, "grad_norm": 0.259765625, "learning_rate": 2.0002380571327482e-05, "loss": 0.9162, "step": 14215 }, { "epoch": 0.8159049832171443, "grad_norm": 0.263671875, "learning_rate": 1.994232967713505e-05, "loss": 0.8864, "step": 14220 }, { "epoch": 0.81619186963881, "grad_norm": 0.265625, "learning_rate": 1.988235907519107e-05, "loss": 0.9853, "step": 14225 }, { "epoch": 0.8164787560604757, "grad_norm": 0.263671875, "learning_rate": 1.9822468825641816e-05, "loss": 0.9142, "step": 14230 }, { "epoch": 0.8167656424821413, "grad_norm": 0.259765625, "learning_rate": 1.9762658988553086e-05, "loss": 0.9686, "step": 14235 }, { "epoch": 0.817052528903807, "grad_norm": 0.279296875, "learning_rate": 1.9702929623909903e-05, "loss": 0.937, "step": 14240 }, { "epoch": 0.8173394153254726, "grad_norm": 0.263671875, "learning_rate": 1.9643280791616702e-05, "loss": 0.9686, "step": 14245 }, { "epoch": 0.8176263017471384, "grad_norm": 0.275390625, "learning_rate": 1.9583712551497036e-05, "loss": 0.9359, "step": 14250 }, { "epoch": 0.817913188168804, "grad_norm": 0.2578125, "learning_rate": 1.9524224963293726e-05, "loss": 0.9685, "step": 14255 }, { "epoch": 0.8182000745904696, "grad_norm": 0.25390625, "learning_rate": 1.946481808666868e-05, "loss": 0.9147, "step": 14260 }, { "epoch": 0.8184869610121353, "grad_norm": 0.2470703125, "learning_rate": 1.9405491981202794e-05, "loss": 0.9796, "step": 14265 }, { "epoch": 0.818773847433801, "grad_norm": 0.267578125, "learning_rate": 1.9346246706396066e-05, "loss": 1.043, "step": 14270 }, { "epoch": 0.8190607338554666, "grad_norm": 0.287109375, "learning_rate": 1.9287082321667295e-05, "loss": 0.8958, "step": 14275 }, { "epoch": 0.8193476202771323, "grad_norm": 0.259765625, "learning_rate": 1.9227998886354305e-05, "loss": 0.9139, "step": 14280 }, { "epoch": 0.8196345066987979, "grad_norm": 0.26953125, "learning_rate": 1.9168996459713594e-05, "loss": 0.9886, "step": 14285 }, { "epoch": 0.8199213931204636, "grad_norm": 0.279296875, "learning_rate": 1.9110075100920466e-05, "loss": 0.9999, "step": 14290 }, { "epoch": 0.8202082795421293, "grad_norm": 0.259765625, "learning_rate": 1.9051234869068978e-05, "loss": 0.9907, "step": 14295 }, { "epoch": 0.8204951659637949, "grad_norm": 0.265625, "learning_rate": 1.8992475823171717e-05, "loss": 0.9527, "step": 14300 }, { "epoch": 0.8207820523854606, "grad_norm": 0.275390625, "learning_rate": 1.8933798022159943e-05, "loss": 0.9802, "step": 14305 }, { "epoch": 0.8210689388071263, "grad_norm": 0.275390625, "learning_rate": 1.8875201524883347e-05, "loss": 0.9615, "step": 14310 }, { "epoch": 0.8213558252287919, "grad_norm": 0.267578125, "learning_rate": 1.8816686390110172e-05, "loss": 0.9785, "step": 14315 }, { "epoch": 0.8216427116504575, "grad_norm": 0.2412109375, "learning_rate": 1.8758252676526945e-05, "loss": 0.8706, "step": 14320 }, { "epoch": 0.8219295980721233, "grad_norm": 0.298828125, "learning_rate": 1.869990044273867e-05, "loss": 0.9158, "step": 14325 }, { "epoch": 0.8222164844937889, "grad_norm": 0.2470703125, "learning_rate": 1.8641629747268486e-05, "loss": 0.8644, "step": 14330 }, { "epoch": 0.8225033709154546, "grad_norm": 0.26953125, "learning_rate": 1.858344064855787e-05, "loss": 0.9103, "step": 14335 }, { "epoch": 0.8227902573371202, "grad_norm": 0.265625, "learning_rate": 1.8525333204966443e-05, "loss": 0.9273, "step": 14340 }, { "epoch": 0.8230771437587859, "grad_norm": 0.265625, "learning_rate": 1.8467307474771855e-05, "loss": 0.9905, "step": 14345 }, { "epoch": 0.8233640301804516, "grad_norm": 0.2578125, "learning_rate": 1.8409363516169907e-05, "loss": 0.9778, "step": 14350 }, { "epoch": 0.8236509166021172, "grad_norm": 0.259765625, "learning_rate": 1.8351501387274307e-05, "loss": 0.9626, "step": 14355 }, { "epoch": 0.8239378030237828, "grad_norm": 0.259765625, "learning_rate": 1.829372114611675e-05, "loss": 0.9024, "step": 14360 }, { "epoch": 0.8242246894454486, "grad_norm": 0.259765625, "learning_rate": 1.823602285064675e-05, "loss": 0.9808, "step": 14365 }, { "epoch": 0.8245115758671142, "grad_norm": 0.2578125, "learning_rate": 1.817840655873171e-05, "loss": 0.8912, "step": 14370 }, { "epoch": 0.8247984622887798, "grad_norm": 0.2490234375, "learning_rate": 1.8120872328156713e-05, "loss": 0.9083, "step": 14375 }, { "epoch": 0.8250853487104456, "grad_norm": 0.259765625, "learning_rate": 1.8063420216624548e-05, "loss": 0.8488, "step": 14380 }, { "epoch": 0.8253722351321112, "grad_norm": 0.26171875, "learning_rate": 1.8006050281755725e-05, "loss": 0.8896, "step": 14385 }, { "epoch": 0.8256591215537769, "grad_norm": 0.255859375, "learning_rate": 1.794876258108823e-05, "loss": 0.8936, "step": 14390 }, { "epoch": 0.8259460079754425, "grad_norm": 0.265625, "learning_rate": 1.7891557172077666e-05, "loss": 1.0125, "step": 14395 }, { "epoch": 0.8262328943971082, "grad_norm": 0.2734375, "learning_rate": 1.783443411209701e-05, "loss": 0.9513, "step": 14400 }, { "epoch": 0.8265197808187739, "grad_norm": 0.255859375, "learning_rate": 1.7777393458436753e-05, "loss": 0.9184, "step": 14405 }, { "epoch": 0.8268066672404395, "grad_norm": 0.27734375, "learning_rate": 1.7720435268304625e-05, "loss": 1.0095, "step": 14410 }, { "epoch": 0.8270935536621051, "grad_norm": 0.27734375, "learning_rate": 1.766355959882575e-05, "loss": 0.943, "step": 14415 }, { "epoch": 0.8273804400837709, "grad_norm": 0.28515625, "learning_rate": 1.7606766507042473e-05, "loss": 0.9648, "step": 14420 }, { "epoch": 0.8276673265054365, "grad_norm": 0.265625, "learning_rate": 1.755005604991423e-05, "loss": 0.974, "step": 14425 }, { "epoch": 0.8279542129271021, "grad_norm": 0.265625, "learning_rate": 1.7493428284317716e-05, "loss": 0.8898, "step": 14430 }, { "epoch": 0.8282410993487678, "grad_norm": 0.263671875, "learning_rate": 1.7436883267046567e-05, "loss": 0.938, "step": 14435 }, { "epoch": 0.8285279857704335, "grad_norm": 0.275390625, "learning_rate": 1.7380421054811524e-05, "loss": 1.0192, "step": 14440 }, { "epoch": 0.8288148721920992, "grad_norm": 0.25390625, "learning_rate": 1.732404170424019e-05, "loss": 0.8849, "step": 14445 }, { "epoch": 0.8291017586137648, "grad_norm": 0.26953125, "learning_rate": 1.7267745271877177e-05, "loss": 0.9837, "step": 14450 }, { "epoch": 0.8293886450354305, "grad_norm": 0.259765625, "learning_rate": 1.7211531814183825e-05, "loss": 0.867, "step": 14455 }, { "epoch": 0.8296755314570962, "grad_norm": 0.26171875, "learning_rate": 1.7155401387538327e-05, "loss": 0.9246, "step": 14460 }, { "epoch": 0.8299624178787618, "grad_norm": 0.2578125, "learning_rate": 1.709935404823555e-05, "loss": 0.8574, "step": 14465 }, { "epoch": 0.8302493043004274, "grad_norm": 0.26953125, "learning_rate": 1.70433898524871e-05, "loss": 0.9351, "step": 14470 }, { "epoch": 0.8305361907220932, "grad_norm": 0.271484375, "learning_rate": 1.698750885642114e-05, "loss": 0.9472, "step": 14475 }, { "epoch": 0.8308230771437588, "grad_norm": 0.263671875, "learning_rate": 1.693171111608237e-05, "loss": 0.9334, "step": 14480 }, { "epoch": 0.8311099635654244, "grad_norm": 0.255859375, "learning_rate": 1.6875996687432073e-05, "loss": 1.0026, "step": 14485 }, { "epoch": 0.8313968499870901, "grad_norm": 0.2734375, "learning_rate": 1.6820365626347878e-05, "loss": 0.9227, "step": 14490 }, { "epoch": 0.8316837364087558, "grad_norm": 0.26171875, "learning_rate": 1.676481798862387e-05, "loss": 0.8875, "step": 14495 }, { "epoch": 0.8319706228304214, "grad_norm": 0.2890625, "learning_rate": 1.6709353829970496e-05, "loss": 0.9082, "step": 14500 }, { "epoch": 0.8322575092520871, "grad_norm": 0.265625, "learning_rate": 1.665397320601436e-05, "loss": 0.9021, "step": 14505 }, { "epoch": 0.8325443956737527, "grad_norm": 0.25390625, "learning_rate": 1.659867617229841e-05, "loss": 0.8888, "step": 14510 }, { "epoch": 0.8328312820954185, "grad_norm": 0.265625, "learning_rate": 1.654346278428166e-05, "loss": 0.947, "step": 14515 }, { "epoch": 0.8331181685170841, "grad_norm": 0.287109375, "learning_rate": 1.6488333097339335e-05, "loss": 0.9312, "step": 14520 }, { "epoch": 0.8334050549387497, "grad_norm": 0.271484375, "learning_rate": 1.64332871667626e-05, "loss": 0.8868, "step": 14525 }, { "epoch": 0.8336919413604154, "grad_norm": 0.279296875, "learning_rate": 1.6378325047758723e-05, "loss": 0.9549, "step": 14530 }, { "epoch": 0.8339788277820811, "grad_norm": 0.259765625, "learning_rate": 1.6323446795450826e-05, "loss": 0.9284, "step": 14535 }, { "epoch": 0.8342657142037467, "grad_norm": 0.2890625, "learning_rate": 1.6268652464877988e-05, "loss": 1.098, "step": 14540 }, { "epoch": 0.8345526006254124, "grad_norm": 0.265625, "learning_rate": 1.6213942110995105e-05, "loss": 1.017, "step": 14545 }, { "epoch": 0.8348394870470781, "grad_norm": 0.296875, "learning_rate": 1.6159315788672825e-05, "loss": 1.0096, "step": 14550 }, { "epoch": 0.8351263734687437, "grad_norm": 0.2451171875, "learning_rate": 1.6104773552697517e-05, "loss": 0.9416, "step": 14555 }, { "epoch": 0.8354132598904094, "grad_norm": 0.2470703125, "learning_rate": 1.6050315457771257e-05, "loss": 0.9599, "step": 14560 }, { "epoch": 0.835700146312075, "grad_norm": 0.248046875, "learning_rate": 1.5995941558511695e-05, "loss": 0.9277, "step": 14565 }, { "epoch": 0.8359870327337408, "grad_norm": 0.2578125, "learning_rate": 1.5941651909452028e-05, "loss": 0.9923, "step": 14570 }, { "epoch": 0.8362739191554064, "grad_norm": 0.30859375, "learning_rate": 1.5887446565041007e-05, "loss": 0.9716, "step": 14575 }, { "epoch": 0.836560805577072, "grad_norm": 0.271484375, "learning_rate": 1.583332557964282e-05, "loss": 0.9944, "step": 14580 }, { "epoch": 0.8368476919987377, "grad_norm": 0.3203125, "learning_rate": 1.5779289007537e-05, "loss": 0.9232, "step": 14585 }, { "epoch": 0.8371345784204034, "grad_norm": 0.267578125, "learning_rate": 1.5725336902918486e-05, "loss": 0.9577, "step": 14590 }, { "epoch": 0.837421464842069, "grad_norm": 0.2578125, "learning_rate": 1.5671469319897425e-05, "loss": 0.9747, "step": 14595 }, { "epoch": 0.8377083512637347, "grad_norm": 0.28125, "learning_rate": 1.561768631249929e-05, "loss": 0.9644, "step": 14600 }, { "epoch": 0.8379952376854003, "grad_norm": 0.345703125, "learning_rate": 1.5563987934664624e-05, "loss": 0.9272, "step": 14605 }, { "epoch": 0.838282124107066, "grad_norm": 0.28515625, "learning_rate": 1.5510374240249205e-05, "loss": 0.963, "step": 14610 }, { "epoch": 0.8385690105287317, "grad_norm": 0.26171875, "learning_rate": 1.5456845283023758e-05, "loss": 0.9626, "step": 14615 }, { "epoch": 0.8388558969503973, "grad_norm": 0.26171875, "learning_rate": 1.540340111667413e-05, "loss": 0.8849, "step": 14620 }, { "epoch": 0.839142783372063, "grad_norm": 0.275390625, "learning_rate": 1.5350041794801097e-05, "loss": 0.9448, "step": 14625 }, { "epoch": 0.8394296697937287, "grad_norm": 0.263671875, "learning_rate": 1.5296767370920273e-05, "loss": 0.9787, "step": 14630 }, { "epoch": 0.8397165562153943, "grad_norm": 0.2578125, "learning_rate": 1.5243577898462246e-05, "loss": 0.9258, "step": 14635 }, { "epoch": 0.84000344263706, "grad_norm": 0.28125, "learning_rate": 1.5190473430772289e-05, "loss": 1.0369, "step": 14640 }, { "epoch": 0.8402903290587257, "grad_norm": 0.27734375, "learning_rate": 1.5137454021110508e-05, "loss": 0.931, "step": 14645 }, { "epoch": 0.8405772154803913, "grad_norm": 0.25390625, "learning_rate": 1.5084519722651658e-05, "loss": 0.8989, "step": 14650 }, { "epoch": 0.840864101902057, "grad_norm": 0.26171875, "learning_rate": 1.5031670588485103e-05, "loss": 0.9113, "step": 14655 }, { "epoch": 0.8411509883237226, "grad_norm": 0.29296875, "learning_rate": 1.497890667161489e-05, "loss": 0.9779, "step": 14660 }, { "epoch": 0.8414378747453883, "grad_norm": 0.2470703125, "learning_rate": 1.4926228024959487e-05, "loss": 0.8672, "step": 14665 }, { "epoch": 0.841724761167054, "grad_norm": 0.25, "learning_rate": 1.4873634701351946e-05, "loss": 0.8913, "step": 14670 }, { "epoch": 0.8420116475887196, "grad_norm": 0.291015625, "learning_rate": 1.4821126753539638e-05, "loss": 0.9864, "step": 14675 }, { "epoch": 0.8422985340103852, "grad_norm": 0.291015625, "learning_rate": 1.4768704234184428e-05, "loss": 0.9811, "step": 14680 }, { "epoch": 0.842585420432051, "grad_norm": 0.2734375, "learning_rate": 1.471636719586238e-05, "loss": 0.9993, "step": 14685 }, { "epoch": 0.8428723068537166, "grad_norm": 0.263671875, "learning_rate": 1.466411569106393e-05, "loss": 0.9146, "step": 14690 }, { "epoch": 0.8431591932753822, "grad_norm": 0.26953125, "learning_rate": 1.4611949772193657e-05, "loss": 0.8918, "step": 14695 }, { "epoch": 0.843446079697048, "grad_norm": 0.2734375, "learning_rate": 1.4559869491570332e-05, "loss": 0.879, "step": 14700 }, { "epoch": 0.8437329661187136, "grad_norm": 0.259765625, "learning_rate": 1.4507874901426877e-05, "loss": 0.9938, "step": 14705 }, { "epoch": 0.8440198525403793, "grad_norm": 0.2890625, "learning_rate": 1.4455966053910187e-05, "loss": 0.9187, "step": 14710 }, { "epoch": 0.8443067389620449, "grad_norm": 0.263671875, "learning_rate": 1.4404143001081238e-05, "loss": 0.9377, "step": 14715 }, { "epoch": 0.8445936253837106, "grad_norm": 0.28515625, "learning_rate": 1.4352405794914892e-05, "loss": 0.9388, "step": 14720 }, { "epoch": 0.8448805118053763, "grad_norm": 0.26953125, "learning_rate": 1.4300754487300006e-05, "loss": 0.9555, "step": 14725 }, { "epoch": 0.8451673982270419, "grad_norm": 0.279296875, "learning_rate": 1.4249189130039175e-05, "loss": 0.9243, "step": 14730 }, { "epoch": 0.8454542846487075, "grad_norm": 0.2578125, "learning_rate": 1.419770977484891e-05, "loss": 0.9909, "step": 14735 }, { "epoch": 0.8457411710703733, "grad_norm": 0.259765625, "learning_rate": 1.4146316473359366e-05, "loss": 0.9172, "step": 14740 }, { "epoch": 0.8460280574920389, "grad_norm": 0.2578125, "learning_rate": 1.4095009277114412e-05, "loss": 0.97, "step": 14745 }, { "epoch": 0.8463149439137045, "grad_norm": 0.267578125, "learning_rate": 1.4043788237571632e-05, "loss": 0.9119, "step": 14750 }, { "epoch": 0.8466018303353702, "grad_norm": 0.279296875, "learning_rate": 1.3992653406102097e-05, "loss": 0.9739, "step": 14755 }, { "epoch": 0.8468887167570359, "grad_norm": 0.259765625, "learning_rate": 1.3941604833990528e-05, "loss": 0.9994, "step": 14760 }, { "epoch": 0.8471756031787016, "grad_norm": 0.265625, "learning_rate": 1.389064257243502e-05, "loss": 0.8985, "step": 14765 }, { "epoch": 0.8474624896003672, "grad_norm": 0.265625, "learning_rate": 1.3839766672547206e-05, "loss": 0.921, "step": 14770 }, { "epoch": 0.8477493760220329, "grad_norm": 0.251953125, "learning_rate": 1.3788977185352003e-05, "loss": 0.9241, "step": 14775 }, { "epoch": 0.8480362624436986, "grad_norm": 0.26953125, "learning_rate": 1.3738274161787768e-05, "loss": 0.9161, "step": 14780 }, { "epoch": 0.8483231488653642, "grad_norm": 0.279296875, "learning_rate": 1.3687657652706076e-05, "loss": 0.9394, "step": 14785 }, { "epoch": 0.8486100352870298, "grad_norm": 0.2470703125, "learning_rate": 1.3637127708871734e-05, "loss": 0.9656, "step": 14790 }, { "epoch": 0.8488969217086956, "grad_norm": 0.26171875, "learning_rate": 1.3586684380962778e-05, "loss": 0.9531, "step": 14795 }, { "epoch": 0.8491838081303612, "grad_norm": 0.259765625, "learning_rate": 1.3536327719570286e-05, "loss": 0.9015, "step": 14800 }, { "epoch": 0.8494706945520268, "grad_norm": 0.267578125, "learning_rate": 1.3486057775198535e-05, "loss": 0.9578, "step": 14805 }, { "epoch": 0.8497575809736925, "grad_norm": 0.314453125, "learning_rate": 1.3435874598264709e-05, "loss": 1.0282, "step": 14810 }, { "epoch": 0.8500444673953582, "grad_norm": 0.2578125, "learning_rate": 1.3385778239099067e-05, "loss": 0.9185, "step": 14815 }, { "epoch": 0.8503313538170238, "grad_norm": 0.255859375, "learning_rate": 1.3335768747944722e-05, "loss": 0.9047, "step": 14820 }, { "epoch": 0.8506182402386895, "grad_norm": 0.2578125, "learning_rate": 1.3285846174957728e-05, "loss": 0.9599, "step": 14825 }, { "epoch": 0.8509051266603551, "grad_norm": 0.2470703125, "learning_rate": 1.3236010570206914e-05, "loss": 0.9059, "step": 14830 }, { "epoch": 0.8511920130820209, "grad_norm": 0.2578125, "learning_rate": 1.3186261983673942e-05, "loss": 0.9243, "step": 14835 }, { "epoch": 0.8514788995036865, "grad_norm": 0.248046875, "learning_rate": 1.3136600465253147e-05, "loss": 0.923, "step": 14840 }, { "epoch": 0.8517657859253521, "grad_norm": 0.26171875, "learning_rate": 1.308702606475154e-05, "loss": 0.9764, "step": 14845 }, { "epoch": 0.8520526723470179, "grad_norm": 0.28125, "learning_rate": 1.3037538831888819e-05, "loss": 0.9518, "step": 14850 }, { "epoch": 0.8523395587686835, "grad_norm": 0.259765625, "learning_rate": 1.298813881629718e-05, "loss": 0.8884, "step": 14855 }, { "epoch": 0.8526264451903491, "grad_norm": 0.279296875, "learning_rate": 1.2938826067521404e-05, "loss": 0.8638, "step": 14860 }, { "epoch": 0.8529133316120148, "grad_norm": 0.265625, "learning_rate": 1.2889600635018762e-05, "loss": 1.0179, "step": 14865 }, { "epoch": 0.8532002180336805, "grad_norm": 0.310546875, "learning_rate": 1.2840462568158874e-05, "loss": 0.8908, "step": 14870 }, { "epoch": 0.8534871044553461, "grad_norm": 0.25390625, "learning_rate": 1.2791411916223827e-05, "loss": 0.923, "step": 14875 }, { "epoch": 0.8537739908770118, "grad_norm": 0.28515625, "learning_rate": 1.2742448728407963e-05, "loss": 0.9229, "step": 14880 }, { "epoch": 0.8540608772986774, "grad_norm": 0.267578125, "learning_rate": 1.2693573053817976e-05, "loss": 0.8928, "step": 14885 }, { "epoch": 0.8543477637203432, "grad_norm": 0.26171875, "learning_rate": 1.2644784941472699e-05, "loss": 0.9357, "step": 14890 }, { "epoch": 0.8546346501420088, "grad_norm": 0.24609375, "learning_rate": 1.2596084440303258e-05, "loss": 0.9196, "step": 14895 }, { "epoch": 0.8549215365636744, "grad_norm": 0.255859375, "learning_rate": 1.2547471599152804e-05, "loss": 0.9682, "step": 14900 }, { "epoch": 0.85520842298534, "grad_norm": 0.27734375, "learning_rate": 1.2498946466776639e-05, "loss": 0.99, "step": 14905 }, { "epoch": 0.8554953094070058, "grad_norm": 0.298828125, "learning_rate": 1.24505090918421e-05, "loss": 0.9751, "step": 14910 }, { "epoch": 0.8557821958286714, "grad_norm": 0.2890625, "learning_rate": 1.240215952292847e-05, "loss": 0.9976, "step": 14915 }, { "epoch": 0.8560690822503371, "grad_norm": 0.26171875, "learning_rate": 1.235389780852696e-05, "loss": 0.951, "step": 14920 }, { "epoch": 0.8563559686720027, "grad_norm": 0.275390625, "learning_rate": 1.2305723997040752e-05, "loss": 0.9541, "step": 14925 }, { "epoch": 0.8566428550936684, "grad_norm": 0.25390625, "learning_rate": 1.2257638136784777e-05, "loss": 0.9363, "step": 14930 }, { "epoch": 0.8569297415153341, "grad_norm": 0.265625, "learning_rate": 1.2209640275985779e-05, "loss": 0.9402, "step": 14935 }, { "epoch": 0.8572166279369997, "grad_norm": 0.26953125, "learning_rate": 1.2161730462782283e-05, "loss": 0.927, "step": 14940 }, { "epoch": 0.8575035143586655, "grad_norm": 0.28125, "learning_rate": 1.211390874522449e-05, "loss": 1.0213, "step": 14945 }, { "epoch": 0.8577904007803311, "grad_norm": 0.265625, "learning_rate": 1.2066175171274219e-05, "loss": 0.9526, "step": 14950 }, { "epoch": 0.8580772872019967, "grad_norm": 0.2314453125, "learning_rate": 1.2018529788804932e-05, "loss": 0.8771, "step": 14955 }, { "epoch": 0.8583641736236624, "grad_norm": 0.26953125, "learning_rate": 1.1970972645601587e-05, "loss": 0.9344, "step": 14960 }, { "epoch": 0.8586510600453281, "grad_norm": 0.2578125, "learning_rate": 1.1923503789360712e-05, "loss": 0.9568, "step": 14965 }, { "epoch": 0.8589379464669937, "grad_norm": 0.2451171875, "learning_rate": 1.1876123267690209e-05, "loss": 0.9081, "step": 14970 }, { "epoch": 0.8592248328886594, "grad_norm": 0.25390625, "learning_rate": 1.1828831128109475e-05, "loss": 0.9976, "step": 14975 }, { "epoch": 0.859511719310325, "grad_norm": 0.2890625, "learning_rate": 1.1781627418049179e-05, "loss": 0.9523, "step": 14980 }, { "epoch": 0.8597986057319907, "grad_norm": 0.267578125, "learning_rate": 1.1734512184851377e-05, "loss": 0.9357, "step": 14985 }, { "epoch": 0.8600854921536564, "grad_norm": 0.234375, "learning_rate": 1.1687485475769343e-05, "loss": 0.9139, "step": 14990 }, { "epoch": 0.860372378575322, "grad_norm": 0.275390625, "learning_rate": 1.1640547337967577e-05, "loss": 0.9907, "step": 14995 }, { "epoch": 0.8606592649969876, "grad_norm": 0.251953125, "learning_rate": 1.1593697818521765e-05, "loss": 0.8954, "step": 15000 }, { "epoch": 0.8609461514186534, "grad_norm": 0.27734375, "learning_rate": 1.1546936964418664e-05, "loss": 0.988, "step": 15005 }, { "epoch": 0.861233037840319, "grad_norm": 0.244140625, "learning_rate": 1.1500264822556194e-05, "loss": 0.9047, "step": 15010 }, { "epoch": 0.8615199242619846, "grad_norm": 0.2451171875, "learning_rate": 1.1453681439743224e-05, "loss": 0.9274, "step": 15015 }, { "epoch": 0.8618068106836504, "grad_norm": 0.24609375, "learning_rate": 1.1407186862699614e-05, "loss": 0.9044, "step": 15020 }, { "epoch": 0.862093697105316, "grad_norm": 0.279296875, "learning_rate": 1.1360781138056209e-05, "loss": 0.967, "step": 15025 }, { "epoch": 0.8623805835269817, "grad_norm": 0.244140625, "learning_rate": 1.1314464312354678e-05, "loss": 1.0125, "step": 15030 }, { "epoch": 0.8626674699486473, "grad_norm": 0.26953125, "learning_rate": 1.1268236432047596e-05, "loss": 0.938, "step": 15035 }, { "epoch": 0.862954356370313, "grad_norm": 0.26953125, "learning_rate": 1.1222097543498244e-05, "loss": 0.9886, "step": 15040 }, { "epoch": 0.8632412427919787, "grad_norm": 0.26953125, "learning_rate": 1.1176047692980773e-05, "loss": 0.9326, "step": 15045 }, { "epoch": 0.8635281292136443, "grad_norm": 0.314453125, "learning_rate": 1.1130086926679894e-05, "loss": 0.9669, "step": 15050 }, { "epoch": 0.8638150156353099, "grad_norm": 0.25, "learning_rate": 1.1084215290691092e-05, "loss": 0.9314, "step": 15055 }, { "epoch": 0.8641019020569757, "grad_norm": 0.259765625, "learning_rate": 1.1038432831020384e-05, "loss": 1.053, "step": 15060 }, { "epoch": 0.8643887884786413, "grad_norm": 0.271484375, "learning_rate": 1.099273959358439e-05, "loss": 0.9351, "step": 15065 }, { "epoch": 0.864675674900307, "grad_norm": 0.25, "learning_rate": 1.0947135624210247e-05, "loss": 0.8548, "step": 15070 }, { "epoch": 0.8649625613219726, "grad_norm": 0.26171875, "learning_rate": 1.0901620968635517e-05, "loss": 0.9116, "step": 15075 }, { "epoch": 0.8652494477436383, "grad_norm": 0.2734375, "learning_rate": 1.0856195672508262e-05, "loss": 0.9776, "step": 15080 }, { "epoch": 0.865536334165304, "grad_norm": 0.46484375, "learning_rate": 1.0810859781386828e-05, "loss": 0.9545, "step": 15085 }, { "epoch": 0.8658232205869696, "grad_norm": 0.294921875, "learning_rate": 1.0765613340739989e-05, "loss": 0.9909, "step": 15090 }, { "epoch": 0.8661101070086353, "grad_norm": 0.38671875, "learning_rate": 1.0720456395946732e-05, "loss": 1.0981, "step": 15095 }, { "epoch": 0.866396993430301, "grad_norm": 0.24609375, "learning_rate": 1.0675388992296353e-05, "loss": 0.9031, "step": 15100 }, { "epoch": 0.8666838798519666, "grad_norm": 0.2578125, "learning_rate": 1.0630411174988275e-05, "loss": 0.9803, "step": 15105 }, { "epoch": 0.8669707662736322, "grad_norm": 0.26171875, "learning_rate": 1.0585522989132102e-05, "loss": 0.9648, "step": 15110 }, { "epoch": 0.867257652695298, "grad_norm": 0.29296875, "learning_rate": 1.0540724479747587e-05, "loss": 0.934, "step": 15115 }, { "epoch": 0.8675445391169636, "grad_norm": 0.275390625, "learning_rate": 1.0496015691764461e-05, "loss": 0.9363, "step": 15120 }, { "epoch": 0.8678314255386292, "grad_norm": 0.265625, "learning_rate": 1.045139667002254e-05, "loss": 0.931, "step": 15125 }, { "epoch": 0.8681183119602949, "grad_norm": 0.2578125, "learning_rate": 1.0406867459271564e-05, "loss": 0.8883, "step": 15130 }, { "epoch": 0.8684051983819606, "grad_norm": 0.275390625, "learning_rate": 1.036242810417124e-05, "loss": 0.8837, "step": 15135 }, { "epoch": 0.8686920848036263, "grad_norm": 0.2578125, "learning_rate": 1.0318078649291119e-05, "loss": 0.9437, "step": 15140 }, { "epoch": 0.8689789712252919, "grad_norm": 0.26171875, "learning_rate": 1.0273819139110608e-05, "loss": 0.9321, "step": 15145 }, { "epoch": 0.8692658576469575, "grad_norm": 0.251953125, "learning_rate": 1.0229649618018933e-05, "loss": 0.9307, "step": 15150 }, { "epoch": 0.8695527440686233, "grad_norm": 0.2734375, "learning_rate": 1.0185570130314991e-05, "loss": 0.9588, "step": 15155 }, { "epoch": 0.8698396304902889, "grad_norm": 0.2578125, "learning_rate": 1.0141580720207466e-05, "loss": 0.9532, "step": 15160 }, { "epoch": 0.8701265169119545, "grad_norm": 0.275390625, "learning_rate": 1.0097681431814621e-05, "loss": 1.0257, "step": 15165 }, { "epoch": 0.8704134033336203, "grad_norm": 0.310546875, "learning_rate": 1.0053872309164414e-05, "loss": 1.0246, "step": 15170 }, { "epoch": 0.8707002897552859, "grad_norm": 0.2734375, "learning_rate": 1.00101533961943e-05, "loss": 0.9567, "step": 15175 }, { "epoch": 0.8709871761769515, "grad_norm": 0.267578125, "learning_rate": 9.966524736751337e-06, "loss": 0.9177, "step": 15180 }, { "epoch": 0.8712740625986172, "grad_norm": 0.2734375, "learning_rate": 9.922986374591969e-06, "loss": 0.9301, "step": 15185 }, { "epoch": 0.8715609490202829, "grad_norm": 0.28515625, "learning_rate": 9.879538353382178e-06, "loss": 0.9605, "step": 15190 }, { "epoch": 0.8718478354419485, "grad_norm": 0.259765625, "learning_rate": 9.836180716697251e-06, "loss": 0.954, "step": 15195 }, { "epoch": 0.8721347218636142, "grad_norm": 0.26171875, "learning_rate": 9.792913508021906e-06, "loss": 0.9541, "step": 15200 }, { "epoch": 0.8724216082852798, "grad_norm": 0.26953125, "learning_rate": 9.749736770750106e-06, "loss": 0.9573, "step": 15205 }, { "epoch": 0.8727084947069456, "grad_norm": 0.271484375, "learning_rate": 9.706650548185091e-06, "loss": 0.9436, "step": 15210 }, { "epoch": 0.8729953811286112, "grad_norm": 0.2734375, "learning_rate": 9.663654883539364e-06, "loss": 0.9438, "step": 15215 }, { "epoch": 0.8732822675502768, "grad_norm": 0.26953125, "learning_rate": 9.620749819934539e-06, "loss": 0.9953, "step": 15220 }, { "epoch": 0.8735691539719425, "grad_norm": 0.3046875, "learning_rate": 9.577935400401406e-06, "loss": 0.9979, "step": 15225 }, { "epoch": 0.8738560403936082, "grad_norm": 0.267578125, "learning_rate": 9.535211667879861e-06, "loss": 0.9719, "step": 15230 }, { "epoch": 0.8741429268152738, "grad_norm": 0.263671875, "learning_rate": 9.492578665218788e-06, "loss": 0.9452, "step": 15235 }, { "epoch": 0.8744298132369395, "grad_norm": 0.263671875, "learning_rate": 9.450036435176136e-06, "loss": 0.9683, "step": 15240 }, { "epoch": 0.8747166996586052, "grad_norm": 0.283203125, "learning_rate": 9.407585020418763e-06, "loss": 0.9658, "step": 15245 }, { "epoch": 0.8750035860802708, "grad_norm": 0.271484375, "learning_rate": 9.365224463522492e-06, "loss": 0.8626, "step": 15250 }, { "epoch": 0.8752904725019365, "grad_norm": 0.248046875, "learning_rate": 9.322954806971985e-06, "loss": 0.9317, "step": 15255 }, { "epoch": 0.8755773589236021, "grad_norm": 0.263671875, "learning_rate": 9.280776093160782e-06, "loss": 0.9955, "step": 15260 }, { "epoch": 0.8758642453452679, "grad_norm": 0.283203125, "learning_rate": 9.238688364391135e-06, "loss": 0.9759, "step": 15265 }, { "epoch": 0.8761511317669335, "grad_norm": 0.26953125, "learning_rate": 9.196691662874135e-06, "loss": 0.9924, "step": 15270 }, { "epoch": 0.8764380181885991, "grad_norm": 0.26171875, "learning_rate": 9.154786030729545e-06, "loss": 0.9888, "step": 15275 }, { "epoch": 0.8767249046102648, "grad_norm": 0.27734375, "learning_rate": 9.112971509985757e-06, "loss": 0.9101, "step": 15280 }, { "epoch": 0.8770117910319305, "grad_norm": 0.279296875, "learning_rate": 9.071248142579825e-06, "loss": 0.9632, "step": 15285 }, { "epoch": 0.8772986774535961, "grad_norm": 0.271484375, "learning_rate": 9.029615970357375e-06, "loss": 0.9665, "step": 15290 }, { "epoch": 0.8775855638752618, "grad_norm": 0.259765625, "learning_rate": 8.988075035072562e-06, "loss": 0.8619, "step": 15295 }, { "epoch": 0.8778724502969274, "grad_norm": 0.28515625, "learning_rate": 8.946625378388029e-06, "loss": 0.9259, "step": 15300 }, { "epoch": 0.8781593367185931, "grad_norm": 0.283203125, "learning_rate": 8.905267041874887e-06, "loss": 0.9454, "step": 15305 }, { "epoch": 0.8784462231402588, "grad_norm": 0.263671875, "learning_rate": 8.864000067012702e-06, "loss": 0.9792, "step": 15310 }, { "epoch": 0.8787331095619244, "grad_norm": 0.259765625, "learning_rate": 8.822824495189297e-06, "loss": 0.9901, "step": 15315 }, { "epoch": 0.87901999598359, "grad_norm": 0.271484375, "learning_rate": 8.781740367700941e-06, "loss": 0.8552, "step": 15320 }, { "epoch": 0.8793068824052558, "grad_norm": 0.25390625, "learning_rate": 8.740747725752118e-06, "loss": 0.9572, "step": 15325 }, { "epoch": 0.8795937688269214, "grad_norm": 0.263671875, "learning_rate": 8.69984661045562e-06, "loss": 0.9567, "step": 15330 }, { "epoch": 0.879880655248587, "grad_norm": 0.263671875, "learning_rate": 8.659037062832342e-06, "loss": 0.9761, "step": 15335 }, { "epoch": 0.8801675416702528, "grad_norm": 0.259765625, "learning_rate": 8.61831912381148e-06, "loss": 0.9221, "step": 15340 }, { "epoch": 0.8804544280919184, "grad_norm": 0.2451171875, "learning_rate": 8.577692834230223e-06, "loss": 0.9304, "step": 15345 }, { "epoch": 0.8807413145135841, "grad_norm": 0.302734375, "learning_rate": 8.53715823483393e-06, "loss": 0.9785, "step": 15350 }, { "epoch": 0.8810282009352497, "grad_norm": 0.2734375, "learning_rate": 8.496715366275976e-06, "loss": 0.9449, "step": 15355 }, { "epoch": 0.8813150873569154, "grad_norm": 0.263671875, "learning_rate": 8.456364269117711e-06, "loss": 0.8776, "step": 15360 }, { "epoch": 0.8816019737785811, "grad_norm": 0.283203125, "learning_rate": 8.416104983828499e-06, "loss": 0.9243, "step": 15365 }, { "epoch": 0.8818888602002467, "grad_norm": 0.283203125, "learning_rate": 8.375937550785539e-06, "loss": 0.9951, "step": 15370 }, { "epoch": 0.8821757466219123, "grad_norm": 0.2734375, "learning_rate": 8.335862010274009e-06, "loss": 1.0172, "step": 15375 }, { "epoch": 0.8824626330435781, "grad_norm": 0.265625, "learning_rate": 8.295878402486856e-06, "loss": 0.9428, "step": 15380 }, { "epoch": 0.8827495194652437, "grad_norm": 0.251953125, "learning_rate": 8.255986767524826e-06, "loss": 0.8969, "step": 15385 }, { "epoch": 0.8830364058869093, "grad_norm": 0.26953125, "learning_rate": 8.21618714539647e-06, "loss": 0.9084, "step": 15390 }, { "epoch": 0.883323292308575, "grad_norm": 0.255859375, "learning_rate": 8.176479576018003e-06, "loss": 0.9383, "step": 15395 }, { "epoch": 0.8836101787302407, "grad_norm": 0.28515625, "learning_rate": 8.136864099213359e-06, "loss": 0.9803, "step": 15400 }, { "epoch": 0.8838970651519064, "grad_norm": 0.275390625, "learning_rate": 8.097340754714078e-06, "loss": 0.973, "step": 15405 }, { "epoch": 0.884183951573572, "grad_norm": 0.2412109375, "learning_rate": 8.05790958215934e-06, "loss": 0.8973, "step": 15410 }, { "epoch": 0.8844708379952377, "grad_norm": 0.271484375, "learning_rate": 8.018570621095822e-06, "loss": 0.991, "step": 15415 }, { "epoch": 0.8847577244169034, "grad_norm": 0.271484375, "learning_rate": 7.979323910977787e-06, "loss": 0.9551, "step": 15420 }, { "epoch": 0.885044610838569, "grad_norm": 0.27734375, "learning_rate": 7.940169491166904e-06, "loss": 0.9941, "step": 15425 }, { "epoch": 0.8853314972602346, "grad_norm": 0.26171875, "learning_rate": 7.90110740093234e-06, "loss": 0.8504, "step": 15430 }, { "epoch": 0.8856183836819004, "grad_norm": 0.263671875, "learning_rate": 7.862137679450653e-06, "loss": 0.8803, "step": 15435 }, { "epoch": 0.885905270103566, "grad_norm": 0.287109375, "learning_rate": 7.823260365805717e-06, "loss": 0.91, "step": 15440 }, { "epoch": 0.8861921565252316, "grad_norm": 0.26171875, "learning_rate": 7.784475498988808e-06, "loss": 0.9697, "step": 15445 }, { "epoch": 0.8864790429468973, "grad_norm": 0.275390625, "learning_rate": 7.745783117898397e-06, "loss": 0.9224, "step": 15450 }, { "epoch": 0.886765929368563, "grad_norm": 0.26953125, "learning_rate": 7.707183261340255e-06, "loss": 1.0197, "step": 15455 }, { "epoch": 0.8870528157902287, "grad_norm": 0.26953125, "learning_rate": 7.668675968027328e-06, "loss": 0.957, "step": 15460 }, { "epoch": 0.8873397022118943, "grad_norm": 0.2734375, "learning_rate": 7.630261276579765e-06, "loss": 0.9196, "step": 15465 }, { "epoch": 0.8876265886335599, "grad_norm": 0.2734375, "learning_rate": 7.5919392255248025e-06, "loss": 0.988, "step": 15470 }, { "epoch": 0.8879134750552257, "grad_norm": 0.27734375, "learning_rate": 7.553709853296764e-06, "loss": 0.9713, "step": 15475 }, { "epoch": 0.8882003614768913, "grad_norm": 0.263671875, "learning_rate": 7.515573198237069e-06, "loss": 0.9196, "step": 15480 }, { "epoch": 0.8884872478985569, "grad_norm": 0.23828125, "learning_rate": 7.477529298594077e-06, "loss": 0.9678, "step": 15485 }, { "epoch": 0.8887741343202227, "grad_norm": 0.26171875, "learning_rate": 7.439578192523211e-06, "loss": 0.9726, "step": 15490 }, { "epoch": 0.8890610207418883, "grad_norm": 0.265625, "learning_rate": 7.4017199180867246e-06, "loss": 1.0294, "step": 15495 }, { "epoch": 0.8893479071635539, "grad_norm": 0.255859375, "learning_rate": 7.363954513253879e-06, "loss": 0.96, "step": 15500 }, { "epoch": 0.8896347935852196, "grad_norm": 0.248046875, "learning_rate": 7.3262820159006765e-06, "loss": 0.8607, "step": 15505 }, { "epoch": 0.8899216800068853, "grad_norm": 0.2490234375, "learning_rate": 7.288702463810026e-06, "loss": 0.9621, "step": 15510 }, { "epoch": 0.890208566428551, "grad_norm": 0.275390625, "learning_rate": 7.2512158946716145e-06, "loss": 0.9429, "step": 15515 }, { "epoch": 0.8904954528502166, "grad_norm": 0.265625, "learning_rate": 7.213822346081822e-06, "loss": 0.942, "step": 15520 }, { "epoch": 0.8907823392718822, "grad_norm": 0.279296875, "learning_rate": 7.176521855543772e-06, "loss": 0.9682, "step": 15525 }, { "epoch": 0.891069225693548, "grad_norm": 0.294921875, "learning_rate": 7.13931446046725e-06, "loss": 0.9848, "step": 15530 }, { "epoch": 0.8913561121152136, "grad_norm": 0.265625, "learning_rate": 7.1022001981686845e-06, "loss": 0.9377, "step": 15535 }, { "epoch": 0.8916429985368792, "grad_norm": 0.287109375, "learning_rate": 7.065179105871056e-06, "loss": 0.9557, "step": 15540 }, { "epoch": 0.8919298849585449, "grad_norm": 0.255859375, "learning_rate": 7.028251220703974e-06, "loss": 0.9163, "step": 15545 }, { "epoch": 0.8922167713802106, "grad_norm": 0.26953125, "learning_rate": 6.99141657970348e-06, "loss": 0.9092, "step": 15550 }, { "epoch": 0.8925036578018762, "grad_norm": 0.2578125, "learning_rate": 6.954675219812201e-06, "loss": 0.9759, "step": 15555 }, { "epoch": 0.8927905442235419, "grad_norm": 0.267578125, "learning_rate": 6.918027177879094e-06, "loss": 0.9452, "step": 15560 }, { "epoch": 0.8930774306452076, "grad_norm": 0.283203125, "learning_rate": 6.881472490659635e-06, "loss": 0.9317, "step": 15565 }, { "epoch": 0.8933643170668732, "grad_norm": 0.2578125, "learning_rate": 6.845011194815598e-06, "loss": 0.9068, "step": 15570 }, { "epoch": 0.8936512034885389, "grad_norm": 0.259765625, "learning_rate": 6.808643326915087e-06, "loss": 0.998, "step": 15575 }, { "epoch": 0.8939380899102045, "grad_norm": 0.27734375, "learning_rate": 6.772368923432593e-06, "loss": 0.9693, "step": 15580 }, { "epoch": 0.8942249763318703, "grad_norm": 0.275390625, "learning_rate": 6.736188020748746e-06, "loss": 0.9591, "step": 15585 }, { "epoch": 0.8945118627535359, "grad_norm": 0.2734375, "learning_rate": 6.700100655150487e-06, "loss": 0.994, "step": 15590 }, { "epoch": 0.8947987491752015, "grad_norm": 0.267578125, "learning_rate": 6.6641068628309545e-06, "loss": 0.9533, "step": 15595 }, { "epoch": 0.8950856355968672, "grad_norm": 0.267578125, "learning_rate": 6.628206679889349e-06, "loss": 0.9325, "step": 15600 }, { "epoch": 0.8953725220185329, "grad_norm": 0.29296875, "learning_rate": 6.5924001423311014e-06, "loss": 0.9467, "step": 15605 }, { "epoch": 0.8956594084401985, "grad_norm": 0.25, "learning_rate": 6.55668728606762e-06, "loss": 0.8933, "step": 15610 }, { "epoch": 0.8959462948618642, "grad_norm": 0.255859375, "learning_rate": 6.521068146916432e-06, "loss": 0.9315, "step": 15615 }, { "epoch": 0.8962331812835298, "grad_norm": 0.255859375, "learning_rate": 6.485542760601027e-06, "loss": 0.9207, "step": 15620 }, { "epoch": 0.8965200677051955, "grad_norm": 0.28125, "learning_rate": 6.450111162750905e-06, "loss": 1.0009, "step": 15625 }, { "epoch": 0.8968069541268612, "grad_norm": 0.251953125, "learning_rate": 6.414773388901452e-06, "loss": 0.8781, "step": 15630 }, { "epoch": 0.8970938405485268, "grad_norm": 0.26953125, "learning_rate": 6.379529474494006e-06, "loss": 0.8925, "step": 15635 }, { "epoch": 0.8973807269701926, "grad_norm": 0.265625, "learning_rate": 6.344379454875771e-06, "loss": 0.9467, "step": 15640 }, { "epoch": 0.8976676133918582, "grad_norm": 0.267578125, "learning_rate": 6.309323365299724e-06, "loss": 0.9559, "step": 15645 }, { "epoch": 0.8979544998135238, "grad_norm": 0.275390625, "learning_rate": 6.274361240924676e-06, "loss": 0.9633, "step": 15650 }, { "epoch": 0.8982413862351895, "grad_norm": 0.2470703125, "learning_rate": 6.239493116815231e-06, "loss": 0.9157, "step": 15655 }, { "epoch": 0.8985282726568552, "grad_norm": 0.291015625, "learning_rate": 6.204719027941641e-06, "loss": 0.8943, "step": 15660 }, { "epoch": 0.8988151590785208, "grad_norm": 0.2578125, "learning_rate": 6.170039009179895e-06, "loss": 0.9013, "step": 15665 }, { "epoch": 0.8991020455001865, "grad_norm": 0.287109375, "learning_rate": 6.135453095311627e-06, "loss": 0.9285, "step": 15670 }, { "epoch": 0.8993889319218521, "grad_norm": 0.255859375, "learning_rate": 6.100961321024112e-06, "loss": 0.9743, "step": 15675 }, { "epoch": 0.8996758183435178, "grad_norm": 0.2578125, "learning_rate": 6.066563720910168e-06, "loss": 1.0059, "step": 15680 }, { "epoch": 0.8999627047651835, "grad_norm": 0.26953125, "learning_rate": 6.032260329468198e-06, "loss": 0.9466, "step": 15685 }, { "epoch": 0.9002495911868491, "grad_norm": 0.26953125, "learning_rate": 5.998051181102082e-06, "loss": 0.8781, "step": 15690 }, { "epoch": 0.9005364776085147, "grad_norm": 0.27734375, "learning_rate": 5.963936310121243e-06, "loss": 0.9677, "step": 15695 }, { "epoch": 0.9008233640301805, "grad_norm": 0.248046875, "learning_rate": 5.929915750740478e-06, "loss": 0.9161, "step": 15700 }, { "epoch": 0.9011102504518461, "grad_norm": 0.26171875, "learning_rate": 5.89598953708006e-06, "loss": 0.9115, "step": 15705 }, { "epoch": 0.9013971368735118, "grad_norm": 0.259765625, "learning_rate": 5.862157703165583e-06, "loss": 0.9553, "step": 15710 }, { "epoch": 0.9016840232951775, "grad_norm": 0.279296875, "learning_rate": 5.828420282928016e-06, "loss": 0.997, "step": 15715 }, { "epoch": 0.9019709097168431, "grad_norm": 0.271484375, "learning_rate": 5.794777310203658e-06, "loss": 0.9453, "step": 15720 }, { "epoch": 0.9022577961385088, "grad_norm": 0.2578125, "learning_rate": 5.761228818734032e-06, "loss": 0.9165, "step": 15725 }, { "epoch": 0.9025446825601744, "grad_norm": 0.26171875, "learning_rate": 5.727774842165956e-06, "loss": 0.9178, "step": 15730 }, { "epoch": 0.9028315689818401, "grad_norm": 0.279296875, "learning_rate": 5.694415414051402e-06, "loss": 0.8639, "step": 15735 }, { "epoch": 0.9031184554035058, "grad_norm": 0.263671875, "learning_rate": 5.6611505678475726e-06, "loss": 0.9805, "step": 15740 }, { "epoch": 0.9034053418251714, "grad_norm": 0.26953125, "learning_rate": 5.627980336916772e-06, "loss": 0.9669, "step": 15745 }, { "epoch": 0.903692228246837, "grad_norm": 0.275390625, "learning_rate": 5.594904754526398e-06, "loss": 0.9872, "step": 15750 }, { "epoch": 0.9039791146685028, "grad_norm": 0.26953125, "learning_rate": 5.561923853848994e-06, "loss": 0.9676, "step": 15755 }, { "epoch": 0.9042660010901684, "grad_norm": 0.263671875, "learning_rate": 5.529037667962067e-06, "loss": 0.967, "step": 15760 }, { "epoch": 0.904552887511834, "grad_norm": 0.271484375, "learning_rate": 5.496246229848179e-06, "loss": 0.9079, "step": 15765 }, { "epoch": 0.9048397739334997, "grad_norm": 0.263671875, "learning_rate": 5.463549572394833e-06, "loss": 0.8895, "step": 15770 }, { "epoch": 0.9051266603551654, "grad_norm": 0.25390625, "learning_rate": 5.4309477283945194e-06, "loss": 0.8952, "step": 15775 }, { "epoch": 0.9054135467768311, "grad_norm": 0.26171875, "learning_rate": 5.3984407305445736e-06, "loss": 0.9741, "step": 15780 }, { "epoch": 0.9057004331984967, "grad_norm": 0.271484375, "learning_rate": 5.366028611447282e-06, "loss": 1.0063, "step": 15785 }, { "epoch": 0.9059873196201623, "grad_norm": 0.255859375, "learning_rate": 5.333711403609698e-06, "loss": 0.9324, "step": 15790 }, { "epoch": 0.9062742060418281, "grad_norm": 0.2490234375, "learning_rate": 5.301489139443738e-06, "loss": 0.9247, "step": 15795 }, { "epoch": 0.9065610924634937, "grad_norm": 0.275390625, "learning_rate": 5.269361851266097e-06, "loss": 1.0244, "step": 15800 }, { "epoch": 0.9068479788851593, "grad_norm": 0.275390625, "learning_rate": 5.237329571298166e-06, "loss": 0.9647, "step": 15805 }, { "epoch": 0.9071348653068251, "grad_norm": 0.265625, "learning_rate": 5.205392331666126e-06, "loss": 0.9641, "step": 15810 }, { "epoch": 0.9074217517284907, "grad_norm": 0.279296875, "learning_rate": 5.173550164400753e-06, "loss": 0.9845, "step": 15815 }, { "epoch": 0.9077086381501563, "grad_norm": 0.306640625, "learning_rate": 5.141803101437559e-06, "loss": 0.9977, "step": 15820 }, { "epoch": 0.907995524571822, "grad_norm": 0.251953125, "learning_rate": 5.110151174616584e-06, "loss": 0.8568, "step": 15825 }, { "epoch": 0.9082824109934877, "grad_norm": 0.271484375, "learning_rate": 5.078594415682536e-06, "loss": 0.9751, "step": 15830 }, { "epoch": 0.9085692974151534, "grad_norm": 0.3125, "learning_rate": 5.04713285628462e-06, "loss": 0.97, "step": 15835 }, { "epoch": 0.908856183836819, "grad_norm": 0.263671875, "learning_rate": 5.015766527976573e-06, "loss": 1.0045, "step": 15840 }, { "epoch": 0.9091430702584846, "grad_norm": 0.2734375, "learning_rate": 4.984495462216654e-06, "loss": 0.9206, "step": 15845 }, { "epoch": 0.9094299566801504, "grad_norm": 0.271484375, "learning_rate": 4.953319690367531e-06, "loss": 1.0479, "step": 15850 }, { "epoch": 0.909716843101816, "grad_norm": 0.28515625, "learning_rate": 4.922239243696358e-06, "loss": 0.9805, "step": 15855 }, { "epoch": 0.9100037295234816, "grad_norm": 0.2578125, "learning_rate": 4.891254153374614e-06, "loss": 0.8655, "step": 15860 }, { "epoch": 0.9102906159451473, "grad_norm": 0.2734375, "learning_rate": 4.860364450478206e-06, "loss": 1.0177, "step": 15865 }, { "epoch": 0.910577502366813, "grad_norm": 0.259765625, "learning_rate": 4.829570165987318e-06, "loss": 0.9443, "step": 15870 }, { "epoch": 0.9108643887884786, "grad_norm": 0.2470703125, "learning_rate": 4.798871330786492e-06, "loss": 0.9409, "step": 15875 }, { "epoch": 0.9111512752101443, "grad_norm": 0.2578125, "learning_rate": 4.7682679756645195e-06, "loss": 0.9746, "step": 15880 }, { "epoch": 0.91143816163181, "grad_norm": 0.25390625, "learning_rate": 4.737760131314406e-06, "loss": 0.8833, "step": 15885 }, { "epoch": 0.9117250480534757, "grad_norm": 0.28515625, "learning_rate": 4.707347828333408e-06, "loss": 0.9432, "step": 15890 }, { "epoch": 0.9120119344751413, "grad_norm": 0.30078125, "learning_rate": 4.677031097222906e-06, "loss": 0.8493, "step": 15895 }, { "epoch": 0.9122988208968069, "grad_norm": 0.271484375, "learning_rate": 4.646809968388499e-06, "loss": 0.9514, "step": 15900 }, { "epoch": 0.9125857073184727, "grad_norm": 0.2578125, "learning_rate": 4.616684472139842e-06, "loss": 0.9089, "step": 15905 }, { "epoch": 0.9128725937401383, "grad_norm": 0.26953125, "learning_rate": 4.586654638690724e-06, "loss": 0.9386, "step": 15910 }, { "epoch": 0.9131594801618039, "grad_norm": 0.27734375, "learning_rate": 4.55672049815895e-06, "loss": 0.9317, "step": 15915 }, { "epoch": 0.9134463665834696, "grad_norm": 0.263671875, "learning_rate": 4.5268820805663855e-06, "loss": 0.8897, "step": 15920 }, { "epoch": 0.9137332530051353, "grad_norm": 0.263671875, "learning_rate": 4.497139415838858e-06, "loss": 0.9088, "step": 15925 }, { "epoch": 0.9140201394268009, "grad_norm": 0.263671875, "learning_rate": 4.467492533806217e-06, "loss": 0.9413, "step": 15930 }, { "epoch": 0.9143070258484666, "grad_norm": 0.265625, "learning_rate": 4.437941464202177e-06, "loss": 0.9366, "step": 15935 }, { "epoch": 0.9145939122701322, "grad_norm": 0.2734375, "learning_rate": 4.408486236664411e-06, "loss": 1.0035, "step": 15940 }, { "epoch": 0.914880798691798, "grad_norm": 0.26171875, "learning_rate": 4.379126880734463e-06, "loss": 0.8072, "step": 15945 }, { "epoch": 0.9151676851134636, "grad_norm": 0.2490234375, "learning_rate": 4.349863425857681e-06, "loss": 0.8787, "step": 15950 }, { "epoch": 0.9154545715351292, "grad_norm": 0.251953125, "learning_rate": 4.320695901383298e-06, "loss": 0.991, "step": 15955 }, { "epoch": 0.915741457956795, "grad_norm": 0.26171875, "learning_rate": 4.291624336564304e-06, "loss": 0.9246, "step": 15960 }, { "epoch": 0.9160283443784606, "grad_norm": 0.26953125, "learning_rate": 4.26264876055743e-06, "loss": 0.9566, "step": 15965 }, { "epoch": 0.9163152308001262, "grad_norm": 0.265625, "learning_rate": 4.233769202423166e-06, "loss": 0.92, "step": 15970 }, { "epoch": 0.9166021172217919, "grad_norm": 0.24609375, "learning_rate": 4.204985691125674e-06, "loss": 0.9485, "step": 15975 }, { "epoch": 0.9168890036434576, "grad_norm": 0.25390625, "learning_rate": 4.176298255532829e-06, "loss": 0.9421, "step": 15980 }, { "epoch": 0.9171758900651232, "grad_norm": 0.26953125, "learning_rate": 4.147706924416095e-06, "loss": 0.9276, "step": 15985 }, { "epoch": 0.9174627764867889, "grad_norm": 0.28515625, "learning_rate": 4.119211726450611e-06, "loss": 0.9399, "step": 15990 }, { "epoch": 0.9177496629084545, "grad_norm": 0.2734375, "learning_rate": 4.0908126902150404e-06, "loss": 1.0004, "step": 15995 }, { "epoch": 0.9180365493301202, "grad_norm": 0.24609375, "learning_rate": 4.062509844191631e-06, "loss": 0.9562, "step": 16000 }, { "epoch": 0.9183234357517859, "grad_norm": 0.275390625, "learning_rate": 4.0343032167662e-06, "loss": 0.9974, "step": 16005 }, { "epoch": 0.9186103221734515, "grad_norm": 0.28515625, "learning_rate": 4.0061928362279846e-06, "loss": 0.9942, "step": 16010 }, { "epoch": 0.9188972085951171, "grad_norm": 0.25, "learning_rate": 3.978178730769733e-06, "loss": 0.9558, "step": 16015 }, { "epoch": 0.9191840950167829, "grad_norm": 0.251953125, "learning_rate": 3.95026092848767e-06, "loss": 0.9057, "step": 16020 }, { "epoch": 0.9194709814384485, "grad_norm": 0.26171875, "learning_rate": 3.922439457381355e-06, "loss": 0.9952, "step": 16025 }, { "epoch": 0.9197578678601142, "grad_norm": 0.275390625, "learning_rate": 3.894714345353801e-06, "loss": 0.8913, "step": 16030 }, { "epoch": 0.9200447542817799, "grad_norm": 0.27734375, "learning_rate": 3.867085620211352e-06, "loss": 0.9954, "step": 16035 }, { "epoch": 0.9203316407034455, "grad_norm": 0.259765625, "learning_rate": 3.8395533096637015e-06, "loss": 0.9582, "step": 16040 }, { "epoch": 0.9206185271251112, "grad_norm": 0.267578125, "learning_rate": 3.8121174413238257e-06, "loss": 0.8973, "step": 16045 }, { "epoch": 0.9209054135467768, "grad_norm": 0.263671875, "learning_rate": 3.7847780427079814e-06, "loss": 0.9567, "step": 16050 }, { "epoch": 0.9211922999684425, "grad_norm": 0.27734375, "learning_rate": 3.7575351412356576e-06, "loss": 0.9315, "step": 16055 }, { "epoch": 0.9214791863901082, "grad_norm": 0.263671875, "learning_rate": 3.73038876422962e-06, "loss": 0.9405, "step": 16060 }, { "epoch": 0.9217660728117738, "grad_norm": 0.279296875, "learning_rate": 3.7033389389157567e-06, "loss": 0.9582, "step": 16065 }, { "epoch": 0.9220529592334394, "grad_norm": 0.28515625, "learning_rate": 3.6763856924231678e-06, "loss": 0.954, "step": 16070 }, { "epoch": 0.9223398456551052, "grad_norm": 0.259765625, "learning_rate": 3.649529051784051e-06, "loss": 0.9762, "step": 16075 }, { "epoch": 0.9226267320767708, "grad_norm": 0.259765625, "learning_rate": 3.62276904393376e-06, "loss": 0.9059, "step": 16080 }, { "epoch": 0.9229136184984364, "grad_norm": 0.267578125, "learning_rate": 3.5961056957107273e-06, "loss": 0.9077, "step": 16085 }, { "epoch": 0.9232005049201021, "grad_norm": 0.26171875, "learning_rate": 3.569539033856406e-06, "loss": 0.9878, "step": 16090 }, { "epoch": 0.9234873913417678, "grad_norm": 0.263671875, "learning_rate": 3.5430690850153046e-06, "loss": 1.0148, "step": 16095 }, { "epoch": 0.9237742777634335, "grad_norm": 0.259765625, "learning_rate": 3.516695875734932e-06, "loss": 0.8691, "step": 16100 }, { "epoch": 0.9240611641850991, "grad_norm": 0.275390625, "learning_rate": 3.4904194324657748e-06, "loss": 0.9384, "step": 16105 }, { "epoch": 0.9243480506067648, "grad_norm": 0.271484375, "learning_rate": 3.4642397815612847e-06, "loss": 0.9701, "step": 16110 }, { "epoch": 0.9246349370284305, "grad_norm": 0.271484375, "learning_rate": 3.438156949277782e-06, "loss": 0.988, "step": 16115 }, { "epoch": 0.9249218234500961, "grad_norm": 0.267578125, "learning_rate": 3.4121709617745745e-06, "loss": 0.9007, "step": 16120 }, { "epoch": 0.9252087098717617, "grad_norm": 0.26953125, "learning_rate": 3.386281845113748e-06, "loss": 0.9432, "step": 16125 }, { "epoch": 0.9254955962934275, "grad_norm": 0.251953125, "learning_rate": 3.3604896252603104e-06, "loss": 0.9452, "step": 16130 }, { "epoch": 0.9257824827150931, "grad_norm": 0.291015625, "learning_rate": 3.334794328082025e-06, "loss": 0.9922, "step": 16135 }, { "epoch": 0.9260693691367587, "grad_norm": 0.318359375, "learning_rate": 3.3091959793495107e-06, "loss": 0.9338, "step": 16140 }, { "epoch": 0.9263562555584244, "grad_norm": 0.2734375, "learning_rate": 3.2836946047360982e-06, "loss": 0.947, "step": 16145 }, { "epoch": 0.9266431419800901, "grad_norm": 0.263671875, "learning_rate": 3.2582902298178953e-06, "loss": 0.9401, "step": 16150 }, { "epoch": 0.9269300284017558, "grad_norm": 0.26171875, "learning_rate": 3.2329828800737096e-06, "loss": 0.9808, "step": 16155 }, { "epoch": 0.9272169148234214, "grad_norm": 0.275390625, "learning_rate": 3.207772580885049e-06, "loss": 0.9434, "step": 16160 }, { "epoch": 0.927503801245087, "grad_norm": 0.314453125, "learning_rate": 3.1826593575360887e-06, "loss": 0.9194, "step": 16165 }, { "epoch": 0.9277906876667528, "grad_norm": 0.263671875, "learning_rate": 3.1576432352136144e-06, "loss": 0.9281, "step": 16170 }, { "epoch": 0.9280775740884184, "grad_norm": 0.275390625, "learning_rate": 3.1327242390070677e-06, "loss": 0.9217, "step": 16175 }, { "epoch": 0.928364460510084, "grad_norm": 0.26953125, "learning_rate": 3.1079023939084573e-06, "loss": 0.9043, "step": 16180 }, { "epoch": 0.9286513469317497, "grad_norm": 0.265625, "learning_rate": 3.0831777248123584e-06, "loss": 0.923, "step": 16185 }, { "epoch": 0.9289382333534154, "grad_norm": 0.267578125, "learning_rate": 3.0585502565158687e-06, "loss": 0.8932, "step": 16190 }, { "epoch": 0.929225119775081, "grad_norm": 0.263671875, "learning_rate": 3.0340200137186526e-06, "loss": 0.8981, "step": 16195 }, { "epoch": 0.9295120061967467, "grad_norm": 0.419921875, "learning_rate": 3.0095870210228083e-06, "loss": 0.983, "step": 16200 }, { "epoch": 0.9297988926184124, "grad_norm": 0.255859375, "learning_rate": 2.985251302932912e-06, "loss": 0.982, "step": 16205 }, { "epoch": 0.930085779040078, "grad_norm": 0.279296875, "learning_rate": 2.9610128838560068e-06, "loss": 0.8345, "step": 16210 }, { "epoch": 0.9303726654617437, "grad_norm": 0.26953125, "learning_rate": 2.936871788101514e-06, "loss": 0.9091, "step": 16215 }, { "epoch": 0.9306595518834093, "grad_norm": 0.26171875, "learning_rate": 2.912828039881288e-06, "loss": 0.8926, "step": 16220 }, { "epoch": 0.9309464383050751, "grad_norm": 0.259765625, "learning_rate": 2.8888816633095063e-06, "loss": 0.9599, "step": 16225 }, { "epoch": 0.9312333247267407, "grad_norm": 0.2734375, "learning_rate": 2.865032682402724e-06, "loss": 0.9284, "step": 16230 }, { "epoch": 0.9315202111484063, "grad_norm": 0.259765625, "learning_rate": 2.8412811210797975e-06, "loss": 0.9112, "step": 16235 }, { "epoch": 0.931807097570072, "grad_norm": 0.26953125, "learning_rate": 2.817627003161882e-06, "loss": 0.9133, "step": 16240 }, { "epoch": 0.9320939839917377, "grad_norm": 0.259765625, "learning_rate": 2.7940703523724134e-06, "loss": 0.899, "step": 16245 }, { "epoch": 0.9323808704134033, "grad_norm": 0.263671875, "learning_rate": 2.770611192337047e-06, "loss": 0.9911, "step": 16250 }, { "epoch": 0.932667756835069, "grad_norm": 0.2734375, "learning_rate": 2.747249546583708e-06, "loss": 0.9853, "step": 16255 }, { "epoch": 0.9329546432567346, "grad_norm": 0.26953125, "learning_rate": 2.7239854385424645e-06, "loss": 0.8525, "step": 16260 }, { "epoch": 0.9332415296784004, "grad_norm": 0.275390625, "learning_rate": 2.70081889154562e-06, "loss": 1.0017, "step": 16265 }, { "epoch": 0.933528416100066, "grad_norm": 0.275390625, "learning_rate": 2.6777499288275775e-06, "loss": 1.0094, "step": 16270 }, { "epoch": 0.9338153025217316, "grad_norm": 0.26171875, "learning_rate": 2.6547785735249187e-06, "loss": 0.9401, "step": 16275 }, { "epoch": 0.9341021889433974, "grad_norm": 0.271484375, "learning_rate": 2.631904848676281e-06, "loss": 0.98, "step": 16280 }, { "epoch": 0.934389075365063, "grad_norm": 0.2890625, "learning_rate": 2.609128777222447e-06, "loss": 1.0471, "step": 16285 }, { "epoch": 0.9346759617867286, "grad_norm": 0.263671875, "learning_rate": 2.586450382006178e-06, "loss": 0.8953, "step": 16290 }, { "epoch": 0.9349628482083943, "grad_norm": 0.291015625, "learning_rate": 2.563869685772358e-06, "loss": 0.9466, "step": 16295 }, { "epoch": 0.93524973463006, "grad_norm": 0.25, "learning_rate": 2.5413867111678367e-06, "loss": 0.9844, "step": 16300 }, { "epoch": 0.9355366210517256, "grad_norm": 0.259765625, "learning_rate": 2.5190014807414454e-06, "loss": 0.9137, "step": 16305 }, { "epoch": 0.9358235074733913, "grad_norm": 0.2578125, "learning_rate": 2.4967140169440464e-06, "loss": 0.9512, "step": 16310 }, { "epoch": 0.9361103938950569, "grad_norm": 0.255859375, "learning_rate": 2.4745243421283706e-06, "loss": 0.9876, "step": 16315 }, { "epoch": 0.9363972803167226, "grad_norm": 0.283203125, "learning_rate": 2.452432478549116e-06, "loss": 0.9733, "step": 16320 }, { "epoch": 0.9366841667383883, "grad_norm": 0.2734375, "learning_rate": 2.430438448362915e-06, "loss": 0.9572, "step": 16325 }, { "epoch": 0.9369710531600539, "grad_norm": 0.279296875, "learning_rate": 2.408542273628189e-06, "loss": 0.98, "step": 16330 }, { "epoch": 0.9372579395817195, "grad_norm": 0.251953125, "learning_rate": 2.3867439763053166e-06, "loss": 0.869, "step": 16335 }, { "epoch": 0.9375448260033853, "grad_norm": 0.279296875, "learning_rate": 2.3650435782564427e-06, "loss": 1.0136, "step": 16340 }, { "epoch": 0.9378317124250509, "grad_norm": 0.265625, "learning_rate": 2.343441101245558e-06, "loss": 0.9063, "step": 16345 }, { "epoch": 0.9381185988467166, "grad_norm": 0.2734375, "learning_rate": 2.3219365669384206e-06, "loss": 0.957, "step": 16350 }, { "epoch": 0.9384054852683823, "grad_norm": 0.26953125, "learning_rate": 2.3005299969026117e-06, "loss": 1.0159, "step": 16355 }, { "epoch": 0.9386923716900479, "grad_norm": 0.2421875, "learning_rate": 2.2792214126073797e-06, "loss": 0.9209, "step": 16360 }, { "epoch": 0.9389792581117136, "grad_norm": 0.255859375, "learning_rate": 2.258010835423774e-06, "loss": 0.9132, "step": 16365 }, { "epoch": 0.9392661445333792, "grad_norm": 0.259765625, "learning_rate": 2.2368982866245225e-06, "loss": 0.9027, "step": 16370 }, { "epoch": 0.9395530309550449, "grad_norm": 0.271484375, "learning_rate": 2.215883787384032e-06, "loss": 0.8756, "step": 16375 }, { "epoch": 0.9398399173767106, "grad_norm": 0.265625, "learning_rate": 2.194967358778366e-06, "loss": 0.96, "step": 16380 }, { "epoch": 0.9401268037983762, "grad_norm": 0.2734375, "learning_rate": 2.1741490217852545e-06, "loss": 0.9063, "step": 16385 }, { "epoch": 0.9404136902200418, "grad_norm": 0.2578125, "learning_rate": 2.1534287972840294e-06, "loss": 0.9121, "step": 16390 }, { "epoch": 0.9407005766417076, "grad_norm": 0.26953125, "learning_rate": 2.1328067060556235e-06, "loss": 0.8915, "step": 16395 }, { "epoch": 0.9409874630633732, "grad_norm": 0.2578125, "learning_rate": 2.1122827687825473e-06, "loss": 0.8799, "step": 16400 }, { "epoch": 0.9412743494850389, "grad_norm": 0.271484375, "learning_rate": 2.091857006048903e-06, "loss": 0.9434, "step": 16405 }, { "epoch": 0.9415612359067045, "grad_norm": 0.2734375, "learning_rate": 2.0715294383402695e-06, "loss": 0.9603, "step": 16410 }, { "epoch": 0.9418481223283702, "grad_norm": 0.271484375, "learning_rate": 2.051300086043806e-06, "loss": 0.9859, "step": 16415 }, { "epoch": 0.9421350087500359, "grad_norm": 0.267578125, "learning_rate": 2.031168969448116e-06, "loss": 0.9843, "step": 16420 }, { "epoch": 0.9424218951717015, "grad_norm": 0.267578125, "learning_rate": 2.0111361087433165e-06, "loss": 0.9443, "step": 16425 }, { "epoch": 0.9427087815933672, "grad_norm": 0.259765625, "learning_rate": 1.9912015240209583e-06, "loss": 0.9405, "step": 16430 }, { "epoch": 0.9429956680150329, "grad_norm": 0.259765625, "learning_rate": 1.9713652352740364e-06, "loss": 0.9171, "step": 16435 }, { "epoch": 0.9432825544366985, "grad_norm": 0.2578125, "learning_rate": 1.951627262396971e-06, "loss": 0.946, "step": 16440 }, { "epoch": 0.9435694408583641, "grad_norm": 0.251953125, "learning_rate": 1.9319876251855606e-06, "loss": 0.9427, "step": 16445 }, { "epoch": 0.9438563272800299, "grad_norm": 0.27734375, "learning_rate": 1.9124463433370045e-06, "loss": 0.9839, "step": 16450 }, { "epoch": 0.9441432137016955, "grad_norm": 0.271484375, "learning_rate": 1.893003436449814e-06, "loss": 0.9677, "step": 16455 }, { "epoch": 0.9444301001233611, "grad_norm": 0.283203125, "learning_rate": 1.8736589240239022e-06, "loss": 0.9607, "step": 16460 }, { "epoch": 0.9447169865450268, "grad_norm": 0.251953125, "learning_rate": 1.8544128254604277e-06, "loss": 0.9521, "step": 16465 }, { "epoch": 0.9450038729666925, "grad_norm": 0.283203125, "learning_rate": 1.8352651600619165e-06, "loss": 0.9267, "step": 16470 }, { "epoch": 0.9452907593883582, "grad_norm": 0.26171875, "learning_rate": 1.8162159470321072e-06, "loss": 0.919, "step": 16475 }, { "epoch": 0.9455776458100238, "grad_norm": 0.259765625, "learning_rate": 1.7972652054760508e-06, "loss": 0.9263, "step": 16480 }, { "epoch": 0.9458645322316894, "grad_norm": 0.287109375, "learning_rate": 1.7784129544000106e-06, "loss": 0.9021, "step": 16485 }, { "epoch": 0.9461514186533552, "grad_norm": 0.279296875, "learning_rate": 1.7596592127114619e-06, "loss": 0.9341, "step": 16490 }, { "epoch": 0.9464383050750208, "grad_norm": 0.279296875, "learning_rate": 1.741003999219115e-06, "loss": 0.9949, "step": 16495 }, { "epoch": 0.9467251914966864, "grad_norm": 0.26171875, "learning_rate": 1.7224473326328484e-06, "loss": 0.9328, "step": 16500 }, { "epoch": 0.9470120779183522, "grad_norm": 0.28125, "learning_rate": 1.703989231563685e-06, "loss": 0.9443, "step": 16505 }, { "epoch": 0.9472989643400178, "grad_norm": 0.259765625, "learning_rate": 1.6856297145238177e-06, "loss": 0.9384, "step": 16510 }, { "epoch": 0.9475858507616834, "grad_norm": 0.271484375, "learning_rate": 1.667368799926572e-06, "loss": 0.9007, "step": 16515 }, { "epoch": 0.9478727371833491, "grad_norm": 0.265625, "learning_rate": 1.6492065060863536e-06, "loss": 0.9356, "step": 16520 }, { "epoch": 0.9481596236050148, "grad_norm": 0.259765625, "learning_rate": 1.6311428512186699e-06, "loss": 0.998, "step": 16525 }, { "epoch": 0.9484465100266805, "grad_norm": 0.259765625, "learning_rate": 1.6131778534401176e-06, "loss": 0.9782, "step": 16530 }, { "epoch": 0.9487333964483461, "grad_norm": 0.26953125, "learning_rate": 1.5953115307683287e-06, "loss": 0.9569, "step": 16535 }, { "epoch": 0.9490202828700117, "grad_norm": 0.271484375, "learning_rate": 1.5775439011219806e-06, "loss": 0.92, "step": 16540 }, { "epoch": 0.9493071692916775, "grad_norm": 0.296875, "learning_rate": 1.5598749823207525e-06, "loss": 0.9067, "step": 16545 }, { "epoch": 0.9495940557133431, "grad_norm": 0.2734375, "learning_rate": 1.5423047920853473e-06, "loss": 0.9675, "step": 16550 }, { "epoch": 0.9498809421350087, "grad_norm": 0.251953125, "learning_rate": 1.5248333480374355e-06, "loss": 0.911, "step": 16555 }, { "epoch": 0.9501678285566744, "grad_norm": 0.3125, "learning_rate": 1.5074606676996561e-06, "loss": 0.9267, "step": 16560 }, { "epoch": 0.9504547149783401, "grad_norm": 0.287109375, "learning_rate": 1.4901867684955829e-06, "loss": 0.9556, "step": 16565 }, { "epoch": 0.9507416014000057, "grad_norm": 0.2578125, "learning_rate": 1.4730116677497351e-06, "loss": 0.9214, "step": 16570 }, { "epoch": 0.9510284878216714, "grad_norm": 0.26953125, "learning_rate": 1.4559353826875344e-06, "loss": 0.9723, "step": 16575 }, { "epoch": 0.951315374243337, "grad_norm": 0.263671875, "learning_rate": 1.438957930435314e-06, "loss": 0.9021, "step": 16580 }, { "epoch": 0.9516022606650028, "grad_norm": 0.2578125, "learning_rate": 1.422079328020265e-06, "loss": 0.9391, "step": 16585 }, { "epoch": 0.9518891470866684, "grad_norm": 0.275390625, "learning_rate": 1.405299592370435e-06, "loss": 0.9706, "step": 16590 }, { "epoch": 0.952176033508334, "grad_norm": 0.265625, "learning_rate": 1.388618740314751e-06, "loss": 0.9549, "step": 16595 }, { "epoch": 0.9524629199299998, "grad_norm": 0.25390625, "learning_rate": 1.37203678858292e-06, "loss": 0.9687, "step": 16600 }, { "epoch": 0.9527498063516654, "grad_norm": 0.2734375, "learning_rate": 1.355553753805483e-06, "loss": 1.0074, "step": 16605 }, { "epoch": 0.953036692773331, "grad_norm": 0.28515625, "learning_rate": 1.3391696525137831e-06, "loss": 0.9855, "step": 16610 }, { "epoch": 0.9533235791949967, "grad_norm": 0.255859375, "learning_rate": 1.3228845011399204e-06, "loss": 0.9408, "step": 16615 }, { "epoch": 0.9536104656166624, "grad_norm": 0.287109375, "learning_rate": 1.3066983160167746e-06, "loss": 1.0093, "step": 16620 }, { "epoch": 0.953897352038328, "grad_norm": 0.271484375, "learning_rate": 1.2906111133779376e-06, "loss": 1.0011, "step": 16625 }, { "epoch": 0.9541842384599937, "grad_norm": 0.248046875, "learning_rate": 1.27462290935777e-06, "loss": 0.9325, "step": 16630 }, { "epoch": 0.9544711248816593, "grad_norm": 0.271484375, "learning_rate": 1.2587337199913118e-06, "loss": 0.8487, "step": 16635 }, { "epoch": 0.954758011303325, "grad_norm": 0.255859375, "learning_rate": 1.2429435612143158e-06, "loss": 0.9819, "step": 16640 }, { "epoch": 0.9550448977249907, "grad_norm": 0.251953125, "learning_rate": 1.227252448863192e-06, "loss": 0.9317, "step": 16645 }, { "epoch": 0.9553317841466563, "grad_norm": 0.26953125, "learning_rate": 1.211660398675052e-06, "loss": 1.054, "step": 16650 }, { "epoch": 0.955618670568322, "grad_norm": 0.271484375, "learning_rate": 1.1961674262876199e-06, "loss": 0.9532, "step": 16655 }, { "epoch": 0.9559055569899877, "grad_norm": 0.263671875, "learning_rate": 1.1807735472392778e-06, "loss": 0.9216, "step": 16660 }, { "epoch": 0.9561924434116533, "grad_norm": 0.27734375, "learning_rate": 1.1654787769689868e-06, "loss": 0.984, "step": 16665 }, { "epoch": 0.956479329833319, "grad_norm": 0.287109375, "learning_rate": 1.150283130816343e-06, "loss": 0.9401, "step": 16670 }, { "epoch": 0.9567662162549847, "grad_norm": 0.275390625, "learning_rate": 1.1351866240215336e-06, "loss": 0.955, "step": 16675 }, { "epoch": 0.9570531026766503, "grad_norm": 0.294921875, "learning_rate": 1.1201892717252692e-06, "loss": 0.9724, "step": 16680 }, { "epoch": 0.957339989098316, "grad_norm": 0.279296875, "learning_rate": 1.1052910889688629e-06, "loss": 0.9075, "step": 16685 }, { "epoch": 0.9576268755199816, "grad_norm": 0.291015625, "learning_rate": 1.0904920906941618e-06, "loss": 0.9695, "step": 16690 }, { "epoch": 0.9579137619416473, "grad_norm": 0.28515625, "learning_rate": 1.075792291743516e-06, "loss": 0.8991, "step": 16695 }, { "epoch": 0.958200648363313, "grad_norm": 0.287109375, "learning_rate": 1.0611917068597877e-06, "loss": 0.954, "step": 16700 }, { "epoch": 0.9584875347849786, "grad_norm": 0.267578125, "learning_rate": 1.0466903506863523e-06, "loss": 0.9536, "step": 16705 }, { "epoch": 0.9587744212066442, "grad_norm": 0.296875, "learning_rate": 1.032288237767065e-06, "loss": 0.9954, "step": 16710 }, { "epoch": 0.95906130762831, "grad_norm": 0.267578125, "learning_rate": 1.0179853825462271e-06, "loss": 0.923, "step": 16715 }, { "epoch": 0.9593481940499756, "grad_norm": 0.275390625, "learning_rate": 1.0037817993686084e-06, "loss": 0.9356, "step": 16720 }, { "epoch": 0.9596350804716413, "grad_norm": 0.27734375, "learning_rate": 9.896775024793914e-07, "loss": 0.9356, "step": 16725 }, { "epoch": 0.9599219668933069, "grad_norm": 0.255859375, "learning_rate": 9.756725060242277e-07, "loss": 0.9212, "step": 16730 }, { "epoch": 0.9602088533149726, "grad_norm": 0.2734375, "learning_rate": 9.617668240491372e-07, "loss": 1.047, "step": 16735 }, { "epoch": 0.9604957397366383, "grad_norm": 0.267578125, "learning_rate": 9.479604705005529e-07, "loss": 0.9376, "step": 16740 }, { "epoch": 0.9607826261583039, "grad_norm": 0.265625, "learning_rate": 9.342534592252761e-07, "loss": 0.9686, "step": 16745 }, { "epoch": 0.9610695125799696, "grad_norm": 0.25390625, "learning_rate": 9.206458039704768e-07, "loss": 0.9913, "step": 16750 }, { "epoch": 0.9613563990016353, "grad_norm": 0.2490234375, "learning_rate": 9.07137518383705e-07, "loss": 0.9494, "step": 16755 }, { "epoch": 0.9616432854233009, "grad_norm": 0.263671875, "learning_rate": 8.9372861601279e-07, "loss": 0.9991, "step": 16760 }, { "epoch": 0.9619301718449665, "grad_norm": 0.28125, "learning_rate": 8.804191103059523e-07, "loss": 0.937, "step": 16765 }, { "epoch": 0.9622170582666323, "grad_norm": 0.271484375, "learning_rate": 8.672090146116917e-07, "loss": 0.9252, "step": 16770 }, { "epoch": 0.9625039446882979, "grad_norm": 0.279296875, "learning_rate": 8.540983421787996e-07, "loss": 0.9812, "step": 16775 }, { "epoch": 0.9627908311099636, "grad_norm": 0.251953125, "learning_rate": 8.410871061563797e-07, "loss": 0.9591, "step": 16780 }, { "epoch": 0.9630777175316292, "grad_norm": 0.2412109375, "learning_rate": 8.281753195937714e-07, "loss": 0.9256, "step": 16785 }, { "epoch": 0.9633646039532949, "grad_norm": 0.26953125, "learning_rate": 8.153629954406161e-07, "loss": 0.8973, "step": 16790 }, { "epoch": 0.9636514903749606, "grad_norm": 0.2451171875, "learning_rate": 8.026501465467684e-07, "loss": 1.0411, "step": 16795 }, { "epoch": 0.9639383767966262, "grad_norm": 0.2578125, "learning_rate": 7.900367856623403e-07, "loss": 0.9045, "step": 16800 }, { "epoch": 0.9642252632182918, "grad_norm": 0.263671875, "learning_rate": 7.775229254376348e-07, "loss": 0.8761, "step": 16805 }, { "epoch": 0.9645121496399576, "grad_norm": 0.255859375, "learning_rate": 7.651085784231793e-07, "loss": 0.946, "step": 16810 }, { "epoch": 0.9647990360616232, "grad_norm": 0.265625, "learning_rate": 7.52793757069703e-07, "loss": 0.9473, "step": 16815 }, { "epoch": 0.9650859224832888, "grad_norm": 0.287109375, "learning_rate": 7.405784737281151e-07, "loss": 0.9959, "step": 16820 }, { "epoch": 0.9653728089049546, "grad_norm": 0.26171875, "learning_rate": 7.284627406494826e-07, "loss": 0.9586, "step": 16825 }, { "epoch": 0.9656596953266202, "grad_norm": 0.251953125, "learning_rate": 7.164465699850409e-07, "loss": 0.9465, "step": 16830 }, { "epoch": 0.9659465817482858, "grad_norm": 0.28125, "learning_rate": 7.045299737861832e-07, "loss": 0.9774, "step": 16835 }, { "epoch": 0.9662334681699515, "grad_norm": 0.27734375, "learning_rate": 6.92712964004405e-07, "loss": 0.9548, "step": 16840 }, { "epoch": 0.9665203545916172, "grad_norm": 0.271484375, "learning_rate": 6.809955524913369e-07, "loss": 0.9791, "step": 16845 }, { "epoch": 0.9668072410132829, "grad_norm": 0.26171875, "learning_rate": 6.693777509987453e-07, "loss": 0.9135, "step": 16850 }, { "epoch": 0.9670941274349485, "grad_norm": 0.251953125, "learning_rate": 6.578595711784541e-07, "loss": 0.9509, "step": 16855 }, { "epoch": 0.9673810138566141, "grad_norm": 0.248046875, "learning_rate": 6.464410245824004e-07, "loss": 1.0064, "step": 16860 }, { "epoch": 0.9676679002782799, "grad_norm": 0.265625, "learning_rate": 6.351221226625903e-07, "loss": 0.9964, "step": 16865 }, { "epoch": 0.9679547866999455, "grad_norm": 0.259765625, "learning_rate": 6.239028767710986e-07, "loss": 0.997, "step": 16870 }, { "epoch": 0.9682416731216111, "grad_norm": 0.2734375, "learning_rate": 6.127832981600246e-07, "loss": 0.8884, "step": 16875 }, { "epoch": 0.9685285595432768, "grad_norm": 0.265625, "learning_rate": 6.017633979815363e-07, "loss": 0.9485, "step": 16880 }, { "epoch": 0.9688154459649425, "grad_norm": 0.271484375, "learning_rate": 5.908431872878372e-07, "loss": 0.9172, "step": 16885 }, { "epoch": 0.9691023323866081, "grad_norm": 0.26171875, "learning_rate": 5.800226770311113e-07, "loss": 0.9511, "step": 16890 }, { "epoch": 0.9693892188082738, "grad_norm": 0.255859375, "learning_rate": 5.693018780635995e-07, "loss": 1.0202, "step": 16895 }, { "epoch": 0.9696761052299395, "grad_norm": 0.26171875, "learning_rate": 5.58680801137501e-07, "loss": 0.933, "step": 16900 }, { "epoch": 0.9699629916516052, "grad_norm": 0.267578125, "learning_rate": 5.481594569050174e-07, "loss": 0.885, "step": 16905 }, { "epoch": 0.9702498780732708, "grad_norm": 0.296875, "learning_rate": 5.377378559183077e-07, "loss": 0.9232, "step": 16910 }, { "epoch": 0.9705367644949364, "grad_norm": 0.275390625, "learning_rate": 5.274160086295332e-07, "loss": 0.9464, "step": 16915 }, { "epoch": 0.9708236509166022, "grad_norm": 0.251953125, "learning_rate": 5.171939253907687e-07, "loss": 0.9384, "step": 16920 }, { "epoch": 0.9711105373382678, "grad_norm": 0.27734375, "learning_rate": 5.070716164540579e-07, "loss": 0.9478, "step": 16925 }, { "epoch": 0.9713974237599334, "grad_norm": 0.26953125, "learning_rate": 4.970490919713577e-07, "loss": 0.9792, "step": 16930 }, { "epoch": 0.9716843101815991, "grad_norm": 0.2734375, "learning_rate": 4.871263619945721e-07, "loss": 0.9806, "step": 16935 }, { "epoch": 0.9719711966032648, "grad_norm": 0.255859375, "learning_rate": 4.773034364754958e-07, "loss": 0.9711, "step": 16940 }, { "epoch": 0.9722580830249304, "grad_norm": 0.26953125, "learning_rate": 4.675803252658484e-07, "loss": 0.9396, "step": 16945 }, { "epoch": 0.9725449694465961, "grad_norm": 0.26953125, "learning_rate": 4.5795703811721825e-07, "loss": 0.9318, "step": 16950 }, { "epoch": 0.9728318558682617, "grad_norm": 0.244140625, "learning_rate": 4.484335846810961e-07, "loss": 0.8472, "step": 16955 }, { "epoch": 0.9731187422899275, "grad_norm": 0.251953125, "learning_rate": 4.3900997450885274e-07, "loss": 0.8956, "step": 16960 }, { "epoch": 0.9734056287115931, "grad_norm": 0.248046875, "learning_rate": 4.2968621705168354e-07, "loss": 0.8557, "step": 16965 }, { "epoch": 0.9736925151332587, "grad_norm": 0.28515625, "learning_rate": 4.204623216606751e-07, "loss": 0.921, "step": 16970 }, { "epoch": 0.9739794015549244, "grad_norm": 0.267578125, "learning_rate": 4.113382975867608e-07, "loss": 0.9776, "step": 16975 }, { "epoch": 0.9742662879765901, "grad_norm": 0.2470703125, "learning_rate": 4.023141539806985e-07, "loss": 0.9555, "step": 16980 }, { "epoch": 0.9745531743982557, "grad_norm": 0.28515625, "learning_rate": 3.9338989989307073e-07, "loss": 0.9091, "step": 16985 }, { "epoch": 0.9748400608199214, "grad_norm": 0.265625, "learning_rate": 3.845655442742624e-07, "loss": 0.9257, "step": 16990 }, { "epoch": 0.9751269472415871, "grad_norm": 0.279296875, "learning_rate": 3.7584109597451623e-07, "loss": 0.9386, "step": 16995 }, { "epoch": 0.9754138336632527, "grad_norm": 0.298828125, "learning_rate": 3.672165637438218e-07, "loss": 0.9565, "step": 17000 }, { "epoch": 0.9757007200849184, "grad_norm": 0.279296875, "learning_rate": 3.586919562319935e-07, "loss": 0.9532, "step": 17005 }, { "epoch": 0.975987606506584, "grad_norm": 0.26953125, "learning_rate": 3.5026728198860324e-07, "loss": 0.9391, "step": 17010 }, { "epoch": 0.9762744929282497, "grad_norm": 0.251953125, "learning_rate": 3.4194254946302573e-07, "loss": 0.9318, "step": 17015 }, { "epoch": 0.9765613793499154, "grad_norm": 0.2470703125, "learning_rate": 3.337177670043823e-07, "loss": 0.9412, "step": 17020 }, { "epoch": 0.976848265771581, "grad_norm": 0.26953125, "learning_rate": 3.255929428615523e-07, "loss": 0.8887, "step": 17025 }, { "epoch": 0.9771351521932466, "grad_norm": 0.275390625, "learning_rate": 3.175680851831619e-07, "loss": 0.9596, "step": 17030 }, { "epoch": 0.9774220386149124, "grad_norm": 0.255859375, "learning_rate": 3.0964320201759545e-07, "loss": 0.9142, "step": 17035 }, { "epoch": 0.977708925036578, "grad_norm": 0.2578125, "learning_rate": 3.0181830131295053e-07, "loss": 0.9786, "step": 17040 }, { "epoch": 0.9779958114582437, "grad_norm": 0.25390625, "learning_rate": 2.9409339091703844e-07, "loss": 0.9415, "step": 17045 }, { "epoch": 0.9782826978799093, "grad_norm": 0.275390625, "learning_rate": 2.8646847857742854e-07, "loss": 0.9181, "step": 17050 }, { "epoch": 0.978569584301575, "grad_norm": 0.267578125, "learning_rate": 2.789435719413813e-07, "loss": 0.9355, "step": 17055 }, { "epoch": 0.9788564707232407, "grad_norm": 0.296875, "learning_rate": 2.7151867855581546e-07, "loss": 0.9364, "step": 17060 }, { "epoch": 0.9791433571449063, "grad_norm": 0.294921875, "learning_rate": 2.641938058674187e-07, "loss": 0.9363, "step": 17065 }, { "epoch": 0.979430243566572, "grad_norm": 0.28125, "learning_rate": 2.569689612225035e-07, "loss": 0.9238, "step": 17070 }, { "epoch": 0.9797171299882377, "grad_norm": 0.2890625, "learning_rate": 2.4984415186709576e-07, "loss": 0.9067, "step": 17075 }, { "epoch": 0.9800040164099033, "grad_norm": 0.26953125, "learning_rate": 2.4281938494686853e-07, "loss": 0.9937, "step": 17080 }, { "epoch": 0.9802909028315689, "grad_norm": 0.255859375, "learning_rate": 2.3589466750718604e-07, "loss": 0.9389, "step": 17085 }, { "epoch": 0.9805777892532347, "grad_norm": 0.271484375, "learning_rate": 2.2907000649304845e-07, "loss": 0.9077, "step": 17090 }, { "epoch": 0.9808646756749003, "grad_norm": 0.283203125, "learning_rate": 2.2234540874911392e-07, "loss": 0.9189, "step": 17095 }, { "epoch": 0.981151562096566, "grad_norm": 0.28125, "learning_rate": 2.1572088101968758e-07, "loss": 1.0134, "step": 17100 }, { "epoch": 0.9814384485182316, "grad_norm": 0.26953125, "learning_rate": 2.0919642994869925e-07, "loss": 0.8936, "step": 17105 }, { "epoch": 0.9817253349398973, "grad_norm": 0.275390625, "learning_rate": 2.0277206207972576e-07, "loss": 0.9482, "step": 17110 }, { "epoch": 0.982012221361563, "grad_norm": 0.298828125, "learning_rate": 1.9644778385596864e-07, "loss": 0.9664, "step": 17115 }, { "epoch": 0.9822991077832286, "grad_norm": 0.26953125, "learning_rate": 1.902236016202208e-07, "loss": 0.9467, "step": 17120 }, { "epoch": 0.9825859942048942, "grad_norm": 0.27734375, "learning_rate": 1.8409952161489997e-07, "loss": 0.916, "step": 17125 }, { "epoch": 0.98287288062656, "grad_norm": 0.263671875, "learning_rate": 1.7807554998203747e-07, "loss": 0.8876, "step": 17130 }, { "epoch": 0.9831597670482256, "grad_norm": 0.255859375, "learning_rate": 1.7215169276325605e-07, "loss": 0.944, "step": 17135 }, { "epoch": 0.9834466534698912, "grad_norm": 0.27734375, "learning_rate": 1.663279558997699e-07, "loss": 0.9699, "step": 17140 }, { "epoch": 0.983733539891557, "grad_norm": 0.26953125, "learning_rate": 1.6060434523238466e-07, "loss": 0.8837, "step": 17145 }, { "epoch": 0.9840204263132226, "grad_norm": 0.248046875, "learning_rate": 1.5498086650147513e-07, "loss": 0.9291, "step": 17150 }, { "epoch": 0.9843073127348883, "grad_norm": 0.275390625, "learning_rate": 1.4945752534699653e-07, "loss": 0.9764, "step": 17155 }, { "epoch": 0.9845941991565539, "grad_norm": 0.26953125, "learning_rate": 1.4403432730847323e-07, "loss": 0.9598, "step": 17160 }, { "epoch": 0.9848810855782196, "grad_norm": 0.2578125, "learning_rate": 1.3871127782500993e-07, "loss": 0.9242, "step": 17165 }, { "epoch": 0.9851679719998853, "grad_norm": 0.279296875, "learning_rate": 1.3348838223523618e-07, "loss": 0.8537, "step": 17170 }, { "epoch": 0.9854548584215509, "grad_norm": 0.271484375, "learning_rate": 1.2836564577735078e-07, "loss": 0.8634, "step": 17175 }, { "epoch": 0.9857417448432165, "grad_norm": 0.244140625, "learning_rate": 1.2334307358911056e-07, "loss": 0.9239, "step": 17180 }, { "epoch": 0.9860286312648823, "grad_norm": 0.287109375, "learning_rate": 1.1842067070779728e-07, "loss": 0.9674, "step": 17185 }, { "epoch": 0.9863155176865479, "grad_norm": 0.251953125, "learning_rate": 1.1359844207023962e-07, "loss": 0.8366, "step": 17190 }, { "epoch": 0.9866024041082135, "grad_norm": 0.27734375, "learning_rate": 1.0887639251280224e-07, "loss": 0.9536, "step": 17195 }, { "epoch": 0.9868892905298792, "grad_norm": 0.26953125, "learning_rate": 1.0425452677135238e-07, "loss": 1.0048, "step": 17200 }, { "epoch": 0.9871761769515449, "grad_norm": 0.31640625, "learning_rate": 9.973284948132656e-08, "loss": 0.9853, "step": 17205 }, { "epoch": 0.9874630633732105, "grad_norm": 0.25390625, "learning_rate": 9.531136517761941e-08, "loss": 0.9073, "step": 17210 }, { "epoch": 0.9877499497948762, "grad_norm": 0.265625, "learning_rate": 9.099007829469486e-08, "loss": 0.8845, "step": 17215 }, { "epoch": 0.9880368362165419, "grad_norm": 0.28515625, "learning_rate": 8.676899316648613e-08, "loss": 0.9607, "step": 17220 }, { "epoch": 0.9883237226382076, "grad_norm": 0.267578125, "learning_rate": 8.264811402646233e-08, "loss": 1.0182, "step": 17225 }, { "epoch": 0.9886106090598732, "grad_norm": 0.275390625, "learning_rate": 7.862744500756192e-08, "loss": 0.9312, "step": 17230 }, { "epoch": 0.9888974954815388, "grad_norm": 0.27734375, "learning_rate": 7.470699014223703e-08, "loss": 0.9567, "step": 17235 }, { "epoch": 0.9891843819032046, "grad_norm": 0.265625, "learning_rate": 7.088675336244244e-08, "loss": 0.9187, "step": 17240 }, { "epoch": 0.9894712683248702, "grad_norm": 0.267578125, "learning_rate": 6.71667384995911e-08, "loss": 0.956, "step": 17245 }, { "epoch": 0.9897581547465358, "grad_norm": 0.259765625, "learning_rate": 6.35469492846208e-08, "loss": 0.9679, "step": 17250 }, { "epoch": 0.9900450411682015, "grad_norm": 0.267578125, "learning_rate": 6.00273893479053e-08, "loss": 0.9105, "step": 17255 }, { "epoch": 0.9903319275898672, "grad_norm": 0.28125, "learning_rate": 5.660806221932102e-08, "loss": 0.9477, "step": 17260 }, { "epoch": 0.9906188140115328, "grad_norm": 0.26171875, "learning_rate": 5.3288971328224747e-08, "loss": 0.962, "step": 17265 }, { "epoch": 0.9909057004331985, "grad_norm": 0.267578125, "learning_rate": 5.0070120003420375e-08, "loss": 0.9531, "step": 17270 }, { "epoch": 0.9911925868548641, "grad_norm": 0.26171875, "learning_rate": 4.6951511473203316e-08, "loss": 0.9721, "step": 17275 }, { "epoch": 0.9914794732765299, "grad_norm": 0.265625, "learning_rate": 4.3933148865316075e-08, "loss": 0.9882, "step": 17280 }, { "epoch": 0.9917663596981955, "grad_norm": 0.263671875, "learning_rate": 4.101503520695937e-08, "loss": 0.9843, "step": 17285 }, { "epoch": 0.9920532461198611, "grad_norm": 0.26171875, "learning_rate": 3.819717342480322e-08, "loss": 0.9372, "step": 17290 }, { "epoch": 0.9923401325415269, "grad_norm": 0.259765625, "learning_rate": 3.547956634495364e-08, "loss": 0.9823, "step": 17295 }, { "epoch": 0.9926270189631925, "grad_norm": 0.25, "learning_rate": 3.286221669299705e-08, "loss": 0.9377, "step": 17300 }, { "epoch": 0.9929139053848581, "grad_norm": 0.265625, "learning_rate": 3.0345127093955875e-08, "loss": 0.9014, "step": 17305 }, { "epoch": 0.9932007918065238, "grad_norm": 0.251953125, "learning_rate": 2.7928300072277424e-08, "loss": 0.8897, "step": 17310 }, { "epoch": 0.9934876782281895, "grad_norm": 0.29296875, "learning_rate": 2.561173805186723e-08, "loss": 0.9683, "step": 17315 }, { "epoch": 0.9937745646498551, "grad_norm": 0.263671875, "learning_rate": 2.339544335610011e-08, "loss": 0.9774, "step": 17320 }, { "epoch": 0.9940614510715208, "grad_norm": 0.263671875, "learning_rate": 2.1279418207742486e-08, "loss": 0.9071, "step": 17325 }, { "epoch": 0.9943483374931864, "grad_norm": 0.267578125, "learning_rate": 1.9263664729030073e-08, "loss": 0.8946, "step": 17330 }, { "epoch": 0.9946352239148522, "grad_norm": 0.263671875, "learning_rate": 1.7348184941623492e-08, "loss": 0.9555, "step": 17335 }, { "epoch": 0.9949221103365178, "grad_norm": 0.263671875, "learning_rate": 1.5532980766608252e-08, "loss": 1.0175, "step": 17340 }, { "epoch": 0.9952089967581834, "grad_norm": 0.279296875, "learning_rate": 1.3818054024516969e-08, "loss": 0.943, "step": 17345 }, { "epoch": 0.995495883179849, "grad_norm": 0.28515625, "learning_rate": 1.2203406435284948e-08, "loss": 0.938, "step": 17350 }, { "epoch": 0.9957827696015148, "grad_norm": 0.263671875, "learning_rate": 1.0689039618305696e-08, "loss": 0.9103, "step": 17355 }, { "epoch": 0.9960696560231804, "grad_norm": 0.27734375, "learning_rate": 9.274955092386516e-09, "loss": 0.9986, "step": 17360 }, { "epoch": 0.9963565424448461, "grad_norm": 0.263671875, "learning_rate": 7.961154275737403e-09, "loss": 0.9417, "step": 17365 }, { "epoch": 0.9966434288665117, "grad_norm": 0.23828125, "learning_rate": 6.747638486026553e-09, "loss": 0.9148, "step": 17370 }, { "epoch": 0.9969303152881774, "grad_norm": 0.291015625, "learning_rate": 5.634408940313751e-09, "loss": 1.0031, "step": 17375 }, { "epoch": 0.9972172017098431, "grad_norm": 0.291015625, "learning_rate": 4.621466755094784e-09, "loss": 0.9821, "step": 17380 }, { "epoch": 0.9975040881315087, "grad_norm": 0.263671875, "learning_rate": 3.7088129462792277e-09, "loss": 0.9413, "step": 17385 }, { "epoch": 0.9977909745531744, "grad_norm": 0.259765625, "learning_rate": 2.896448429201559e-09, "loss": 0.9787, "step": 17390 }, { "epoch": 0.9980778609748401, "grad_norm": 0.30859375, "learning_rate": 2.1843740185878423e-09, "loss": 0.9642, "step": 17395 }, { "epoch": 0.9983647473965057, "grad_norm": 0.26953125, "learning_rate": 1.5725904286223446e-09, "loss": 0.9081, "step": 17400 }, { "epoch": 0.9986516338181713, "grad_norm": 0.265625, "learning_rate": 1.0610982728698203e-09, "loss": 0.9116, "step": 17405 }, { "epoch": 0.9989385202398371, "grad_norm": 0.283203125, "learning_rate": 6.498980643199204e-10, "loss": 0.9469, "step": 17410 }, { "epoch": 0.9992254066615027, "grad_norm": 0.2734375, "learning_rate": 3.389902153760893e-10, "loss": 0.9756, "step": 17415 }, { "epoch": 0.9995122930831684, "grad_norm": 0.283203125, "learning_rate": 1.2837503786666815e-10, "loss": 0.94, "step": 17420 }, { "epoch": 0.999799179504834, "grad_norm": 0.296875, "learning_rate": 1.8052743022689556e-11, "loss": 0.9467, "step": 17425 }, { "epoch": 0.9999713113578335, "eval_loss": 0.9524931311607361, "eval_runtime": 4185.8405, "eval_samples_per_second": 3.686, "eval_steps_per_second": 0.461, "step": 17428 }, { "epoch": 0.9999713113578335, "step": 17428, "total_flos": 1.2254141370633028e+19, "train_loss": 0.9593030543455379, "train_runtime": 137087.8983, "train_samples_per_second": 1.017, "train_steps_per_second": 0.127 } ], "logging_steps": 5, "max_steps": 17428, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.2254141370633028e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }