{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999741314639005, "eval_steps": 500, "global_step": 19328, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.52734375, "learning_rate": 1.0346611484738748e-07, "loss": 2.0672, "step": 1 }, { "epoch": 0.0, "grad_norm": 0.55078125, "learning_rate": 5.173305742369374e-07, "loss": 2.0719, "step": 5 }, { "epoch": 0.0, "grad_norm": 0.60546875, "learning_rate": 1.0346611484738748e-06, "loss": 2.0752, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.65625, "learning_rate": 1.5519917227108122e-06, "loss": 2.0881, "step": 15 }, { "epoch": 0.0, "grad_norm": 0.53125, "learning_rate": 2.0693222969477496e-06, "loss": 2.0211, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.1171875, "learning_rate": 2.586652871184687e-06, "loss": 2.0231, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.5625, "learning_rate": 3.1039834454216244e-06, "loss": 2.0532, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.470703125, "learning_rate": 3.6213140196585623e-06, "loss": 2.0431, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.546875, "learning_rate": 4.138644593895499e-06, "loss": 2.0622, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.5, "learning_rate": 4.655975168132437e-06, "loss": 2.0141, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.57421875, "learning_rate": 5.173305742369374e-06, "loss": 2.0169, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.466796875, "learning_rate": 5.6906363166063115e-06, "loss": 2.0446, "step": 55 }, { "epoch": 0.0, "grad_norm": 0.52734375, "learning_rate": 6.207966890843249e-06, "loss": 2.0514, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.50390625, "learning_rate": 6.725297465080186e-06, "loss": 2.028, "step": 65 }, { "epoch": 0.0, "grad_norm": 0.5078125, "learning_rate": 7.2426280393171246e-06, "loss": 1.9917, "step": 70 }, { "epoch": 0.0, "grad_norm": 0.47265625, "learning_rate": 7.75995861355406e-06, "loss": 2.0271, "step": 75 }, { "epoch": 0.0, "grad_norm": 0.53125, "learning_rate": 8.277289187790999e-06, "loss": 1.9452, "step": 80 }, { "epoch": 0.0, "grad_norm": 0.455078125, "learning_rate": 8.794619762027937e-06, "loss": 1.9469, "step": 85 }, { "epoch": 0.0, "grad_norm": 0.515625, "learning_rate": 9.311950336264873e-06, "loss": 1.9631, "step": 90 }, { "epoch": 0.0, "grad_norm": 0.498046875, "learning_rate": 9.82928091050181e-06, "loss": 1.939, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.4765625, "learning_rate": 1.0346611484738748e-05, "loss": 1.939, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.486328125, "learning_rate": 1.0863942058975686e-05, "loss": 1.9396, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.46484375, "learning_rate": 1.1381272633212623e-05, "loss": 1.9, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 1.1898603207449561e-05, "loss": 1.9245, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.498046875, "learning_rate": 1.2415933781686498e-05, "loss": 1.8493, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.515625, "learning_rate": 1.2933264355923436e-05, "loss": 1.8828, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 1.3450594930160373e-05, "loss": 1.8809, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.46875, "learning_rate": 1.3967925504397311e-05, "loss": 1.8241, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.46484375, "learning_rate": 1.4485256078634249e-05, "loss": 1.8636, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 1.5002586652871187e-05, "loss": 1.8901, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.494140625, "learning_rate": 1.551991722710812e-05, "loss": 1.8464, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.49609375, "learning_rate": 1.603724780134506e-05, "loss": 1.7929, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 1.6554578375581997e-05, "loss": 1.8162, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 1.7071908949818935e-05, "loss": 1.8365, "step": 165 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 1.7589239524055874e-05, "loss": 1.8309, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.6171875, "learning_rate": 1.8106570098292812e-05, "loss": 1.8047, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.5546875, "learning_rate": 1.8623900672529747e-05, "loss": 1.8204, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.72265625, "learning_rate": 1.9141231246766685e-05, "loss": 1.8022, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.578125, "learning_rate": 1.965856182100362e-05, "loss": 1.8294, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 2.0175892395240558e-05, "loss": 1.8189, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.578125, "learning_rate": 2.0693222969477496e-05, "loss": 1.8118, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.57421875, "learning_rate": 2.1210553543714435e-05, "loss": 1.8137, "step": 205 }, { "epoch": 0.01, "grad_norm": 0.58203125, "learning_rate": 2.1727884117951373e-05, "loss": 1.7953, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.61328125, "learning_rate": 2.224521469218831e-05, "loss": 1.8048, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.62109375, "learning_rate": 2.2762545266425246e-05, "loss": 1.7902, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.62890625, "learning_rate": 2.3279875840662184e-05, "loss": 1.8015, "step": 225 }, { "epoch": 0.01, "grad_norm": 0.625, "learning_rate": 2.3797206414899122e-05, "loss": 1.8083, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.63671875, "learning_rate": 2.4314536989136057e-05, "loss": 1.7645, "step": 235 }, { "epoch": 0.01, "grad_norm": 0.6484375, "learning_rate": 2.4831867563372996e-05, "loss": 1.7412, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.66015625, "learning_rate": 2.5349198137609937e-05, "loss": 1.7416, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.640625, "learning_rate": 2.5866528711846872e-05, "loss": 1.7907, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.66015625, "learning_rate": 2.6383859286083807e-05, "loss": 1.7905, "step": 255 }, { "epoch": 0.01, "grad_norm": 0.6640625, "learning_rate": 2.6901189860320745e-05, "loss": 1.7633, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.67578125, "learning_rate": 2.7418520434557683e-05, "loss": 1.7678, "step": 265 }, { "epoch": 0.01, "grad_norm": 0.66015625, "learning_rate": 2.7935851008794622e-05, "loss": 1.7868, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.68359375, "learning_rate": 2.8453181583031557e-05, "loss": 1.7642, "step": 275 }, { "epoch": 0.01, "grad_norm": 0.70703125, "learning_rate": 2.8970512157268498e-05, "loss": 1.7907, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.69921875, "learning_rate": 2.9487842731505433e-05, "loss": 1.7673, "step": 285 }, { "epoch": 0.02, "grad_norm": 0.7109375, "learning_rate": 3.0005173305742375e-05, "loss": 1.7521, "step": 290 }, { "epoch": 0.02, "grad_norm": 0.71875, "learning_rate": 3.052250387997931e-05, "loss": 1.7589, "step": 295 }, { "epoch": 0.02, "grad_norm": 0.69921875, "learning_rate": 3.103983445421624e-05, "loss": 1.7534, "step": 300 }, { "epoch": 0.02, "grad_norm": 0.7109375, "learning_rate": 3.1557165028453186e-05, "loss": 1.7469, "step": 305 }, { "epoch": 0.02, "grad_norm": 0.72265625, "learning_rate": 3.207449560269012e-05, "loss": 1.7285, "step": 310 }, { "epoch": 0.02, "grad_norm": 0.73046875, "learning_rate": 3.259182617692706e-05, "loss": 1.7777, "step": 315 }, { "epoch": 0.02, "grad_norm": 0.74609375, "learning_rate": 3.3109156751163994e-05, "loss": 1.6814, "step": 320 }, { "epoch": 0.02, "grad_norm": 0.76171875, "learning_rate": 3.362648732540093e-05, "loss": 1.739, "step": 325 }, { "epoch": 0.02, "grad_norm": 0.74609375, "learning_rate": 3.414381789963787e-05, "loss": 1.7609, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.73046875, "learning_rate": 3.46611484738748e-05, "loss": 1.7366, "step": 335 }, { "epoch": 0.02, "grad_norm": 0.75, "learning_rate": 3.517847904811175e-05, "loss": 1.7704, "step": 340 }, { "epoch": 0.02, "grad_norm": 0.7578125, "learning_rate": 3.569580962234868e-05, "loss": 1.6942, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.76171875, "learning_rate": 3.6213140196585624e-05, "loss": 1.6884, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.8046875, "learning_rate": 3.6730470770822555e-05, "loss": 1.7542, "step": 355 }, { "epoch": 0.02, "grad_norm": 0.71875, "learning_rate": 3.724780134505949e-05, "loss": 1.7435, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.765625, "learning_rate": 3.776513191929643e-05, "loss": 1.7158, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.7734375, "learning_rate": 3.828246249353337e-05, "loss": 1.7178, "step": 370 }, { "epoch": 0.02, "grad_norm": 0.79296875, "learning_rate": 3.879979306777031e-05, "loss": 1.7613, "step": 375 }, { "epoch": 0.02, "grad_norm": 0.78515625, "learning_rate": 3.931712364200724e-05, "loss": 1.7211, "step": 380 }, { "epoch": 0.02, "grad_norm": 380.0, "learning_rate": 3.9834454216244185e-05, "loss": 1.796, "step": 385 }, { "epoch": 0.02, "grad_norm": 0.77734375, "learning_rate": 4.0351784790481116e-05, "loss": 1.7092, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.765625, "learning_rate": 4.086911536471806e-05, "loss": 1.7148, "step": 395 }, { "epoch": 0.02, "grad_norm": 0.77734375, "learning_rate": 4.138644593895499e-05, "loss": 1.6965, "step": 400 }, { "epoch": 0.02, "grad_norm": 0.7578125, "learning_rate": 4.190377651319193e-05, "loss": 1.7083, "step": 405 }, { "epoch": 0.02, "grad_norm": 0.75390625, "learning_rate": 4.242110708742887e-05, "loss": 1.6792, "step": 410 }, { "epoch": 0.02, "grad_norm": 0.7578125, "learning_rate": 4.293843766166581e-05, "loss": 1.7314, "step": 415 }, { "epoch": 0.02, "grad_norm": 0.8046875, "learning_rate": 4.3455768235902746e-05, "loss": 1.741, "step": 420 }, { "epoch": 0.02, "grad_norm": 0.78125, "learning_rate": 4.397309881013968e-05, "loss": 1.7095, "step": 425 }, { "epoch": 0.02, "grad_norm": 0.8046875, "learning_rate": 4.449042938437662e-05, "loss": 1.7073, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.80078125, "learning_rate": 4.5007759958613554e-05, "loss": 1.7033, "step": 435 }, { "epoch": 0.02, "grad_norm": 0.78515625, "learning_rate": 4.552509053285049e-05, "loss": 1.6986, "step": 440 }, { "epoch": 0.02, "grad_norm": 0.77734375, "learning_rate": 4.604242110708743e-05, "loss": 1.6743, "step": 445 }, { "epoch": 0.02, "grad_norm": 0.82421875, "learning_rate": 4.655975168132437e-05, "loss": 1.6705, "step": 450 }, { "epoch": 0.02, "grad_norm": 0.73828125, "learning_rate": 4.707708225556131e-05, "loss": 1.6937, "step": 455 }, { "epoch": 0.02, "grad_norm": 0.79296875, "learning_rate": 4.7594412829798245e-05, "loss": 1.7355, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.76953125, "learning_rate": 4.811174340403518e-05, "loss": 1.6826, "step": 465 }, { "epoch": 0.02, "grad_norm": 0.83984375, "learning_rate": 4.8629073978272115e-05, "loss": 1.6627, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.78125, "learning_rate": 4.914640455250906e-05, "loss": 1.6788, "step": 475 }, { "epoch": 0.02, "grad_norm": 0.78125, "learning_rate": 4.966373512674599e-05, "loss": 1.7126, "step": 480 }, { "epoch": 0.03, "grad_norm": 0.77734375, "learning_rate": 5.018106570098293e-05, "loss": 1.7152, "step": 485 }, { "epoch": 0.03, "grad_norm": 0.80859375, "learning_rate": 5.0698396275219874e-05, "loss": 1.6685, "step": 490 }, { "epoch": 0.03, "grad_norm": 0.74609375, "learning_rate": 5.12157268494568e-05, "loss": 1.6562, "step": 495 }, { "epoch": 0.03, "grad_norm": 0.7578125, "learning_rate": 5.1733057423693744e-05, "loss": 1.651, "step": 500 }, { "epoch": 0.03, "grad_norm": 0.76953125, "learning_rate": 5.225038799793068e-05, "loss": 1.6997, "step": 505 }, { "epoch": 0.03, "grad_norm": 0.74609375, "learning_rate": 5.2767718572167614e-05, "loss": 1.6904, "step": 510 }, { "epoch": 0.03, "grad_norm": 0.7890625, "learning_rate": 5.328504914640455e-05, "loss": 1.6754, "step": 515 }, { "epoch": 0.03, "grad_norm": 0.7578125, "learning_rate": 5.380237972064149e-05, "loss": 1.7106, "step": 520 }, { "epoch": 0.03, "grad_norm": 0.7890625, "learning_rate": 5.4319710294878435e-05, "loss": 1.6913, "step": 525 }, { "epoch": 0.03, "grad_norm": 0.80078125, "learning_rate": 5.483704086911537e-05, "loss": 1.6779, "step": 530 }, { "epoch": 0.03, "grad_norm": 0.796875, "learning_rate": 5.5354371443352305e-05, "loss": 1.6658, "step": 535 }, { "epoch": 0.03, "grad_norm": 0.76953125, "learning_rate": 5.5871702017589243e-05, "loss": 1.6931, "step": 540 }, { "epoch": 0.03, "grad_norm": 0.76953125, "learning_rate": 5.6389032591826175e-05, "loss": 1.6653, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.75, "learning_rate": 5.690636316606311e-05, "loss": 1.6921, "step": 550 }, { "epoch": 0.03, "grad_norm": 0.74609375, "learning_rate": 5.742369374030006e-05, "loss": 1.6621, "step": 555 }, { "epoch": 0.03, "grad_norm": 0.74609375, "learning_rate": 5.7941024314536996e-05, "loss": 1.6868, "step": 560 }, { "epoch": 0.03, "grad_norm": 0.75, "learning_rate": 5.845835488877393e-05, "loss": 1.672, "step": 565 }, { "epoch": 0.03, "grad_norm": 0.75, "learning_rate": 5.8975685463010866e-05, "loss": 1.6741, "step": 570 }, { "epoch": 0.03, "grad_norm": 0.75390625, "learning_rate": 5.9493016037247804e-05, "loss": 1.6989, "step": 575 }, { "epoch": 0.03, "grad_norm": 0.734375, "learning_rate": 6.001034661148475e-05, "loss": 1.6492, "step": 580 }, { "epoch": 0.03, "grad_norm": 0.78125, "learning_rate": 6.0527677185721674e-05, "loss": 1.7075, "step": 585 }, { "epoch": 0.03, "grad_norm": 0.7421875, "learning_rate": 6.104500775995862e-05, "loss": 1.6889, "step": 590 }, { "epoch": 0.03, "grad_norm": 0.71875, "learning_rate": 6.156233833419556e-05, "loss": 1.6749, "step": 595 }, { "epoch": 0.03, "grad_norm": 0.73046875, "learning_rate": 6.207966890843248e-05, "loss": 1.6923, "step": 600 }, { "epoch": 0.03, "grad_norm": 0.72265625, "learning_rate": 6.259699948266943e-05, "loss": 1.6846, "step": 605 }, { "epoch": 0.03, "grad_norm": 0.72265625, "learning_rate": 6.311433005690637e-05, "loss": 1.6878, "step": 610 }, { "epoch": 0.03, "grad_norm": 0.7265625, "learning_rate": 6.363166063114331e-05, "loss": 1.665, "step": 615 }, { "epoch": 0.03, "grad_norm": 0.72265625, "learning_rate": 6.414899120538024e-05, "loss": 1.6745, "step": 620 }, { "epoch": 0.03, "grad_norm": 0.7890625, "learning_rate": 6.466632177961717e-05, "loss": 1.6813, "step": 625 }, { "epoch": 0.03, "grad_norm": 0.73828125, "learning_rate": 6.518365235385413e-05, "loss": 1.6718, "step": 630 }, { "epoch": 0.03, "grad_norm": 0.796875, "learning_rate": 6.570098292809105e-05, "loss": 1.6575, "step": 635 }, { "epoch": 0.03, "grad_norm": 0.703125, "learning_rate": 6.621831350232799e-05, "loss": 1.6663, "step": 640 }, { "epoch": 0.03, "grad_norm": 0.73828125, "learning_rate": 6.673564407656493e-05, "loss": 1.659, "step": 645 }, { "epoch": 0.03, "grad_norm": 0.71875, "learning_rate": 6.725297465080186e-05, "loss": 1.6705, "step": 650 }, { "epoch": 0.03, "grad_norm": 0.75390625, "learning_rate": 6.77703052250388e-05, "loss": 1.6884, "step": 655 }, { "epoch": 0.03, "grad_norm": 0.71875, "learning_rate": 6.828763579927574e-05, "loss": 1.6974, "step": 660 }, { "epoch": 0.03, "grad_norm": 0.70703125, "learning_rate": 6.880496637351268e-05, "loss": 1.6686, "step": 665 }, { "epoch": 0.03, "grad_norm": 0.69140625, "learning_rate": 6.93222969477496e-05, "loss": 1.6708, "step": 670 }, { "epoch": 0.03, "grad_norm": 0.70703125, "learning_rate": 6.983962752198656e-05, "loss": 1.6826, "step": 675 }, { "epoch": 0.04, "grad_norm": 0.70703125, "learning_rate": 7.03569580962235e-05, "loss": 1.6771, "step": 680 }, { "epoch": 0.04, "grad_norm": 0.67578125, "learning_rate": 7.087428867046043e-05, "loss": 1.657, "step": 685 }, { "epoch": 0.04, "grad_norm": 0.671875, "learning_rate": 7.139161924469736e-05, "loss": 1.6522, "step": 690 }, { "epoch": 0.04, "grad_norm": 0.69140625, "learning_rate": 7.19089498189343e-05, "loss": 1.6453, "step": 695 }, { "epoch": 0.04, "grad_norm": 0.71875, "learning_rate": 7.242628039317125e-05, "loss": 1.6442, "step": 700 }, { "epoch": 0.04, "grad_norm": 0.671875, "learning_rate": 7.294361096740819e-05, "loss": 1.631, "step": 705 }, { "epoch": 0.04, "grad_norm": 0.71484375, "learning_rate": 7.346094154164511e-05, "loss": 1.6205, "step": 710 }, { "epoch": 0.04, "grad_norm": 0.6875, "learning_rate": 7.397827211588205e-05, "loss": 1.6374, "step": 715 }, { "epoch": 0.04, "grad_norm": 0.68359375, "learning_rate": 7.449560269011899e-05, "loss": 1.6693, "step": 720 }, { "epoch": 0.04, "grad_norm": 0.68359375, "learning_rate": 7.501293326435593e-05, "loss": 1.6841, "step": 725 }, { "epoch": 0.04, "grad_norm": 0.68359375, "learning_rate": 7.553026383859286e-05, "loss": 1.6304, "step": 730 }, { "epoch": 0.04, "grad_norm": 0.68359375, "learning_rate": 7.60475944128298e-05, "loss": 1.6373, "step": 735 }, { "epoch": 0.04, "grad_norm": 0.7109375, "learning_rate": 7.656492498706674e-05, "loss": 1.6395, "step": 740 }, { "epoch": 0.04, "grad_norm": 0.6875, "learning_rate": 7.708225556130368e-05, "loss": 1.6679, "step": 745 }, { "epoch": 0.04, "grad_norm": 0.67578125, "learning_rate": 7.759958613554062e-05, "loss": 1.6473, "step": 750 }, { "epoch": 0.04, "grad_norm": 0.6796875, "learning_rate": 7.811691670977755e-05, "loss": 1.6029, "step": 755 }, { "epoch": 0.04, "grad_norm": 0.6640625, "learning_rate": 7.863424728401448e-05, "loss": 1.6523, "step": 760 }, { "epoch": 0.04, "grad_norm": 0.7109375, "learning_rate": 7.915157785825143e-05, "loss": 1.6281, "step": 765 }, { "epoch": 0.04, "grad_norm": 0.6953125, "learning_rate": 7.966890843248837e-05, "loss": 1.6032, "step": 770 }, { "epoch": 0.04, "grad_norm": 0.703125, "learning_rate": 8.018623900672531e-05, "loss": 1.6495, "step": 775 }, { "epoch": 0.04, "grad_norm": 0.66796875, "learning_rate": 8.070356958096223e-05, "loss": 1.6309, "step": 780 }, { "epoch": 0.04, "grad_norm": 0.6796875, "learning_rate": 8.122090015519917e-05, "loss": 1.6321, "step": 785 }, { "epoch": 0.04, "grad_norm": 0.6640625, "learning_rate": 8.173823072943612e-05, "loss": 1.6487, "step": 790 }, { "epoch": 0.04, "grad_norm": 0.6171875, "learning_rate": 8.225556130367305e-05, "loss": 1.648, "step": 795 }, { "epoch": 0.04, "grad_norm": 0.65234375, "learning_rate": 8.277289187790999e-05, "loss": 1.6622, "step": 800 }, { "epoch": 0.04, "grad_norm": 0.65625, "learning_rate": 8.329022245214692e-05, "loss": 1.6737, "step": 805 }, { "epoch": 0.04, "grad_norm": 0.66015625, "learning_rate": 8.380755302638386e-05, "loss": 1.6437, "step": 810 }, { "epoch": 0.04, "grad_norm": 0.640625, "learning_rate": 8.43248836006208e-05, "loss": 1.6797, "step": 815 }, { "epoch": 0.04, "grad_norm": 0.6328125, "learning_rate": 8.484221417485774e-05, "loss": 1.6351, "step": 820 }, { "epoch": 0.04, "grad_norm": 0.66796875, "learning_rate": 8.535954474909468e-05, "loss": 1.6527, "step": 825 }, { "epoch": 0.04, "grad_norm": 0.640625, "learning_rate": 8.587687532333161e-05, "loss": 1.6279, "step": 830 }, { "epoch": 0.04, "grad_norm": 0.6796875, "learning_rate": 8.639420589756855e-05, "loss": 1.6513, "step": 835 }, { "epoch": 0.04, "grad_norm": 0.67578125, "learning_rate": 8.691153647180549e-05, "loss": 1.6415, "step": 840 }, { "epoch": 0.04, "grad_norm": 0.65234375, "learning_rate": 8.742886704604243e-05, "loss": 1.6349, "step": 845 }, { "epoch": 0.04, "grad_norm": 0.6484375, "learning_rate": 8.794619762027935e-05, "loss": 1.6161, "step": 850 }, { "epoch": 0.04, "grad_norm": 0.6484375, "learning_rate": 8.846352819451629e-05, "loss": 1.6249, "step": 855 }, { "epoch": 0.04, "grad_norm": 0.671875, "learning_rate": 8.898085876875324e-05, "loss": 1.6353, "step": 860 }, { "epoch": 0.04, "grad_norm": 0.65234375, "learning_rate": 8.949818934299018e-05, "loss": 1.6625, "step": 865 }, { "epoch": 0.05, "grad_norm": 0.68359375, "learning_rate": 9.001551991722711e-05, "loss": 1.6277, "step": 870 }, { "epoch": 0.05, "grad_norm": 0.62109375, "learning_rate": 9.053285049146405e-05, "loss": 1.675, "step": 875 }, { "epoch": 0.05, "grad_norm": 0.6328125, "learning_rate": 9.105018106570098e-05, "loss": 1.654, "step": 880 }, { "epoch": 0.05, "grad_norm": 0.640625, "learning_rate": 9.156751163993792e-05, "loss": 1.6671, "step": 885 }, { "epoch": 0.05, "grad_norm": 0.62890625, "learning_rate": 9.208484221417486e-05, "loss": 1.6186, "step": 890 }, { "epoch": 0.05, "grad_norm": 0.625, "learning_rate": 9.26021727884118e-05, "loss": 1.643, "step": 895 }, { "epoch": 0.05, "grad_norm": 0.625, "learning_rate": 9.311950336264874e-05, "loss": 1.6187, "step": 900 }, { "epoch": 0.05, "grad_norm": 0.62109375, "learning_rate": 9.363683393688568e-05, "loss": 1.6145, "step": 905 }, { "epoch": 0.05, "grad_norm": 0.62890625, "learning_rate": 9.415416451112261e-05, "loss": 1.6412, "step": 910 }, { "epoch": 0.05, "grad_norm": 0.66015625, "learning_rate": 9.467149508535955e-05, "loss": 1.6214, "step": 915 }, { "epoch": 0.05, "grad_norm": 0.62890625, "learning_rate": 9.518882565959649e-05, "loss": 1.6311, "step": 920 }, { "epoch": 0.05, "grad_norm": 0.6171875, "learning_rate": 9.570615623383343e-05, "loss": 1.6373, "step": 925 }, { "epoch": 0.05, "grad_norm": 0.6171875, "learning_rate": 9.622348680807037e-05, "loss": 1.6238, "step": 930 }, { "epoch": 0.05, "grad_norm": 0.625, "learning_rate": 9.67408173823073e-05, "loss": 1.6447, "step": 935 }, { "epoch": 0.05, "grad_norm": 0.65625, "learning_rate": 9.725814795654423e-05, "loss": 1.6108, "step": 940 }, { "epoch": 0.05, "grad_norm": 0.6484375, "learning_rate": 9.777547853078117e-05, "loss": 1.6195, "step": 945 }, { "epoch": 0.05, "grad_norm": 0.66015625, "learning_rate": 9.829280910501812e-05, "loss": 1.6216, "step": 950 }, { "epoch": 0.05, "grad_norm": 0.6015625, "learning_rate": 9.881013967925506e-05, "loss": 1.6312, "step": 955 }, { "epoch": 0.05, "grad_norm": 0.609375, "learning_rate": 9.932747025349198e-05, "loss": 1.6351, "step": 960 }, { "epoch": 0.05, "grad_norm": 0.6171875, "learning_rate": 9.984480082772892e-05, "loss": 1.6219, "step": 965 }, { "epoch": 0.05, "grad_norm": 0.58984375, "learning_rate": 0.00010036213140196586, "loss": 1.6085, "step": 970 }, { "epoch": 0.05, "grad_norm": 0.59375, "learning_rate": 0.0001008794619762028, "loss": 1.5999, "step": 975 }, { "epoch": 0.05, "grad_norm": 0.6015625, "learning_rate": 0.00010139679255043975, "loss": 1.638, "step": 980 }, { "epoch": 0.05, "grad_norm": 0.62109375, "learning_rate": 0.00010191412312467667, "loss": 1.6279, "step": 985 }, { "epoch": 0.05, "grad_norm": 0.59375, "learning_rate": 0.0001024314536989136, "loss": 1.6012, "step": 990 }, { "epoch": 0.05, "grad_norm": 0.609375, "learning_rate": 0.00010294878427315055, "loss": 1.5978, "step": 995 }, { "epoch": 0.05, "grad_norm": 0.62890625, "learning_rate": 0.00010346611484738749, "loss": 1.6574, "step": 1000 }, { "epoch": 0.05, "grad_norm": 0.62890625, "learning_rate": 0.00010398344542162441, "loss": 1.6144, "step": 1005 }, { "epoch": 0.05, "grad_norm": 0.57421875, "learning_rate": 0.00010450077599586136, "loss": 1.5865, "step": 1010 }, { "epoch": 0.05, "grad_norm": 0.59375, "learning_rate": 0.00010501810657009829, "loss": 1.5854, "step": 1015 }, { "epoch": 0.05, "grad_norm": 0.58984375, "learning_rate": 0.00010553543714433523, "loss": 1.5843, "step": 1020 }, { "epoch": 0.05, "grad_norm": 28.125, "learning_rate": 0.00010605276771857218, "loss": 1.737, "step": 1025 }, { "epoch": 0.05, "grad_norm": 0.59765625, "learning_rate": 0.0001065700982928091, "loss": 1.6076, "step": 1030 }, { "epoch": 0.05, "grad_norm": 0.6015625, "learning_rate": 0.00010708742886704606, "loss": 1.6308, "step": 1035 }, { "epoch": 0.05, "grad_norm": 0.59765625, "learning_rate": 0.00010760475944128298, "loss": 1.5846, "step": 1040 }, { "epoch": 0.05, "grad_norm": 0.59765625, "learning_rate": 0.00010812209001551992, "loss": 1.6332, "step": 1045 }, { "epoch": 0.05, "grad_norm": 0.57421875, "learning_rate": 0.00010863942058975687, "loss": 1.6076, "step": 1050 }, { "epoch": 0.05, "grad_norm": 0.60546875, "learning_rate": 0.0001091567511639938, "loss": 1.6014, "step": 1055 }, { "epoch": 0.05, "grad_norm": 0.5703125, "learning_rate": 0.00010967408173823073, "loss": 1.6394, "step": 1060 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00011019141231246769, "loss": 1.5853, "step": 1065 }, { "epoch": 0.06, "grad_norm": 0.74609375, "learning_rate": 0.00011070874288670461, "loss": 1.5903, "step": 1070 }, { "epoch": 0.06, "grad_norm": 0.609375, "learning_rate": 0.00011122607346094154, "loss": 1.6, "step": 1075 }, { "epoch": 0.06, "grad_norm": 0.6015625, "learning_rate": 0.00011174340403517849, "loss": 1.636, "step": 1080 }, { "epoch": 0.06, "grad_norm": 0.62109375, "learning_rate": 0.00011226073460941543, "loss": 1.632, "step": 1085 }, { "epoch": 0.06, "grad_norm": 0.58984375, "learning_rate": 0.00011277806518365235, "loss": 1.5872, "step": 1090 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.0001132953957578893, "loss": 1.6298, "step": 1095 }, { "epoch": 0.06, "grad_norm": 0.578125, "learning_rate": 0.00011381272633212623, "loss": 1.6073, "step": 1100 }, { "epoch": 0.06, "grad_norm": 0.58203125, "learning_rate": 0.00011433005690636318, "loss": 1.5932, "step": 1105 }, { "epoch": 0.06, "grad_norm": 0.5859375, "learning_rate": 0.00011484738748060012, "loss": 1.5957, "step": 1110 }, { "epoch": 0.06, "grad_norm": 0.7109375, "learning_rate": 0.00011536471805483704, "loss": 1.5781, "step": 1115 }, { "epoch": 0.06, "grad_norm": 0.5859375, "learning_rate": 0.00011588204862907399, "loss": 1.5761, "step": 1120 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00011639937920331092, "loss": 1.5804, "step": 1125 }, { "epoch": 0.06, "grad_norm": 0.59375, "learning_rate": 0.00011691670977754786, "loss": 1.6005, "step": 1130 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00011743404035178481, "loss": 1.5513, "step": 1135 }, { "epoch": 0.06, "grad_norm": 0.625, "learning_rate": 0.00011795137092602173, "loss": 1.6244, "step": 1140 }, { "epoch": 0.06, "grad_norm": 0.5703125, "learning_rate": 0.00011846870150025866, "loss": 1.5941, "step": 1145 }, { "epoch": 0.06, "grad_norm": 0.5703125, "learning_rate": 0.00011898603207449561, "loss": 1.6306, "step": 1150 }, { "epoch": 0.06, "grad_norm": 0.625, "learning_rate": 0.00011950336264873255, "loss": 1.6149, "step": 1155 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.0001200206932229695, "loss": 1.603, "step": 1160 }, { "epoch": 0.06, "grad_norm": 0.578125, "learning_rate": 0.00012053802379720642, "loss": 1.5644, "step": 1165 }, { "epoch": 0.06, "grad_norm": 0.59765625, "learning_rate": 0.00012105535437144335, "loss": 1.5883, "step": 1170 }, { "epoch": 0.06, "grad_norm": 0.55078125, "learning_rate": 0.0001215726849456803, "loss": 1.5846, "step": 1175 }, { "epoch": 0.06, "grad_norm": 0.59375, "learning_rate": 0.00012209001551991724, "loss": 1.6083, "step": 1180 }, { "epoch": 0.06, "grad_norm": 0.578125, "learning_rate": 0.00012260734609415415, "loss": 1.6051, "step": 1185 }, { "epoch": 0.06, "grad_norm": 0.5625, "learning_rate": 0.00012312467666839111, "loss": 1.6224, "step": 1190 }, { "epoch": 0.06, "grad_norm": 0.5859375, "learning_rate": 0.00012364200724262805, "loss": 1.6303, "step": 1195 }, { "epoch": 0.06, "grad_norm": 0.56640625, "learning_rate": 0.00012415933781686496, "loss": 1.5796, "step": 1200 }, { "epoch": 0.06, "grad_norm": 0.5703125, "learning_rate": 0.00012467666839110193, "loss": 1.5645, "step": 1205 }, { "epoch": 0.06, "grad_norm": 0.55859375, "learning_rate": 0.00012519399896533887, "loss": 1.591, "step": 1210 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00012571132953957578, "loss": 1.6042, "step": 1215 }, { "epoch": 0.06, "grad_norm": 0.58203125, "learning_rate": 0.00012622866011381274, "loss": 1.5599, "step": 1220 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00012674599068804966, "loss": 1.6363, "step": 1225 }, { "epoch": 0.06, "grad_norm": 0.5390625, "learning_rate": 0.00012726332126228662, "loss": 1.5934, "step": 1230 }, { "epoch": 0.06, "grad_norm": 0.55859375, "learning_rate": 0.00012778065183652356, "loss": 1.5754, "step": 1235 }, { "epoch": 0.06, "grad_norm": 0.578125, "learning_rate": 0.00012829798241076047, "loss": 1.586, "step": 1240 }, { "epoch": 0.06, "grad_norm": 0.5625, "learning_rate": 0.00012881531298499744, "loss": 1.5841, "step": 1245 }, { "epoch": 0.06, "grad_norm": 0.57421875, "learning_rate": 0.00012933264355923435, "loss": 1.5874, "step": 1250 }, { "epoch": 0.06, "grad_norm": 0.5546875, "learning_rate": 0.00012984997413347129, "loss": 1.592, "step": 1255 }, { "epoch": 0.07, "grad_norm": 0.59765625, "learning_rate": 0.00013036730470770825, "loss": 1.6185, "step": 1260 }, { "epoch": 0.07, "grad_norm": 0.57421875, "learning_rate": 0.00013088463528194516, "loss": 1.603, "step": 1265 }, { "epoch": 0.07, "grad_norm": 0.578125, "learning_rate": 0.0001314019658561821, "loss": 1.542, "step": 1270 }, { "epoch": 0.07, "grad_norm": 0.578125, "learning_rate": 0.00013191929643041904, "loss": 1.5686, "step": 1275 }, { "epoch": 0.07, "grad_norm": 0.59765625, "learning_rate": 0.00013243662700465598, "loss": 1.5893, "step": 1280 }, { "epoch": 0.07, "grad_norm": 0.5859375, "learning_rate": 0.00013295395757889294, "loss": 1.6039, "step": 1285 }, { "epoch": 0.07, "grad_norm": 0.5703125, "learning_rate": 0.00013347128815312985, "loss": 1.6043, "step": 1290 }, { "epoch": 0.07, "grad_norm": 0.56640625, "learning_rate": 0.0001339886187273668, "loss": 1.5886, "step": 1295 }, { "epoch": 0.07, "grad_norm": 0.53125, "learning_rate": 0.00013450594930160373, "loss": 1.6044, "step": 1300 }, { "epoch": 0.07, "grad_norm": 0.56640625, "learning_rate": 0.00013502327987584067, "loss": 1.5665, "step": 1305 }, { "epoch": 0.07, "grad_norm": 0.5703125, "learning_rate": 0.0001355406104500776, "loss": 1.5608, "step": 1310 }, { "epoch": 0.07, "grad_norm": 0.546875, "learning_rate": 0.00013605794102431454, "loss": 1.6247, "step": 1315 }, { "epoch": 0.07, "grad_norm": 0.55859375, "learning_rate": 0.00013657527159855148, "loss": 1.5377, "step": 1320 }, { "epoch": 0.07, "grad_norm": 0.5390625, "learning_rate": 0.00013709260217278842, "loss": 1.5695, "step": 1325 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.00013760993274702536, "loss": 1.5705, "step": 1330 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.0001381272633212623, "loss": 1.5698, "step": 1335 }, { "epoch": 0.07, "grad_norm": 0.546875, "learning_rate": 0.0001386445938954992, "loss": 1.5721, "step": 1340 }, { "epoch": 0.07, "grad_norm": 0.55859375, "learning_rate": 0.00013916192446973617, "loss": 1.5982, "step": 1345 }, { "epoch": 0.07, "grad_norm": 0.578125, "learning_rate": 0.0001396792550439731, "loss": 1.5812, "step": 1350 }, { "epoch": 0.07, "grad_norm": 0.5546875, "learning_rate": 0.00014019658561821005, "loss": 1.62, "step": 1355 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.000140713916192447, "loss": 1.6233, "step": 1360 }, { "epoch": 0.07, "grad_norm": 0.57421875, "learning_rate": 0.0001412312467666839, "loss": 1.5828, "step": 1365 }, { "epoch": 0.07, "grad_norm": 0.5390625, "learning_rate": 0.00014174857734092087, "loss": 1.594, "step": 1370 }, { "epoch": 0.07, "grad_norm": 0.56640625, "learning_rate": 0.0001422659079151578, "loss": 1.6036, "step": 1375 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.00014278323848939471, "loss": 1.616, "step": 1380 }, { "epoch": 0.07, "grad_norm": 0.53125, "learning_rate": 0.00014330056906363168, "loss": 1.5825, "step": 1385 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.0001438178996378686, "loss": 1.5786, "step": 1390 }, { "epoch": 0.07, "grad_norm": 0.54296875, "learning_rate": 0.00014433523021210553, "loss": 1.5681, "step": 1395 }, { "epoch": 0.07, "grad_norm": 0.55859375, "learning_rate": 0.0001448525607863425, "loss": 1.5683, "step": 1400 }, { "epoch": 0.07, "grad_norm": 0.54296875, "learning_rate": 0.0001453698913605794, "loss": 1.5987, "step": 1405 }, { "epoch": 0.07, "grad_norm": 0.53515625, "learning_rate": 0.00014588722193481637, "loss": 1.5762, "step": 1410 }, { "epoch": 0.07, "grad_norm": 0.61328125, "learning_rate": 0.00014640455250905328, "loss": 1.5804, "step": 1415 }, { "epoch": 0.07, "grad_norm": 0.58203125, "learning_rate": 0.00014692188308329022, "loss": 1.5967, "step": 1420 }, { "epoch": 0.07, "grad_norm": 0.55078125, "learning_rate": 0.00014743921365752719, "loss": 1.564, "step": 1425 }, { "epoch": 0.07, "grad_norm": 0.53515625, "learning_rate": 0.0001479565442317641, "loss": 1.5913, "step": 1430 }, { "epoch": 0.07, "grad_norm": 0.546875, "learning_rate": 0.00014847387480600104, "loss": 1.5789, "step": 1435 }, { "epoch": 0.07, "grad_norm": 0.55859375, "learning_rate": 0.00014899120538023797, "loss": 1.5692, "step": 1440 }, { "epoch": 0.07, "grad_norm": 0.53125, "learning_rate": 0.0001495085359544749, "loss": 1.5682, "step": 1445 }, { "epoch": 0.08, "grad_norm": 0.54296875, "learning_rate": 0.00015002586652871185, "loss": 1.5949, "step": 1450 }, { "epoch": 0.08, "grad_norm": 0.578125, "learning_rate": 0.0001505431971029488, "loss": 1.6124, "step": 1455 }, { "epoch": 0.08, "grad_norm": 0.5234375, "learning_rate": 0.00015106052767718573, "loss": 1.611, "step": 1460 }, { "epoch": 0.08, "grad_norm": 0.55859375, "learning_rate": 0.00015157785825142266, "loss": 1.5875, "step": 1465 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.0001520951888256596, "loss": 1.5513, "step": 1470 }, { "epoch": 0.08, "grad_norm": 0.55859375, "learning_rate": 0.00015261251939989654, "loss": 1.5774, "step": 1475 }, { "epoch": 0.08, "grad_norm": 0.54296875, "learning_rate": 0.00015312984997413348, "loss": 1.58, "step": 1480 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00015364718054837042, "loss": 1.584, "step": 1485 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00015416451112260736, "loss": 1.6072, "step": 1490 }, { "epoch": 0.08, "grad_norm": 0.55859375, "learning_rate": 0.0001546818416968443, "loss": 1.545, "step": 1495 }, { "epoch": 0.08, "grad_norm": 0.54296875, "learning_rate": 0.00015519917227108123, "loss": 1.59, "step": 1500 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00015571650284531817, "loss": 1.5463, "step": 1505 }, { "epoch": 0.08, "grad_norm": 0.53515625, "learning_rate": 0.0001562338334195551, "loss": 1.5767, "step": 1510 }, { "epoch": 0.08, "grad_norm": 0.55078125, "learning_rate": 0.00015675116399379205, "loss": 1.6077, "step": 1515 }, { "epoch": 0.08, "grad_norm": 0.54296875, "learning_rate": 0.00015726849456802896, "loss": 1.5653, "step": 1520 }, { "epoch": 0.08, "grad_norm": 0.55859375, "learning_rate": 0.00015778582514226592, "loss": 1.5685, "step": 1525 }, { "epoch": 0.08, "grad_norm": 0.52734375, "learning_rate": 0.00015830315571650286, "loss": 1.5647, "step": 1530 }, { "epoch": 0.08, "grad_norm": 0.52734375, "learning_rate": 0.0001588204862907398, "loss": 1.5662, "step": 1535 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00015933781686497674, "loss": 1.5859, "step": 1540 }, { "epoch": 0.08, "grad_norm": 0.55078125, "learning_rate": 0.00015985514743921365, "loss": 1.5762, "step": 1545 }, { "epoch": 0.08, "grad_norm": 0.5234375, "learning_rate": 0.00016037247801345062, "loss": 1.5691, "step": 1550 }, { "epoch": 0.08, "grad_norm": 0.5625, "learning_rate": 0.00016088980858768755, "loss": 1.5798, "step": 1555 }, { "epoch": 0.08, "grad_norm": 0.54296875, "learning_rate": 0.00016140713916192446, "loss": 1.5793, "step": 1560 }, { "epoch": 0.08, "grad_norm": 0.52734375, "learning_rate": 0.00016192446973616143, "loss": 1.5979, "step": 1565 }, { "epoch": 0.08, "grad_norm": 0.5234375, "learning_rate": 0.00016244180031039834, "loss": 1.5795, "step": 1570 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00016295913088463528, "loss": 1.5537, "step": 1575 }, { "epoch": 0.08, "grad_norm": 0.53515625, "learning_rate": 0.00016347646145887224, "loss": 1.5306, "step": 1580 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00016399379203310916, "loss": 1.5812, "step": 1585 }, { "epoch": 0.08, "grad_norm": 0.53515625, "learning_rate": 0.0001645111226073461, "loss": 1.5892, "step": 1590 }, { "epoch": 0.08, "grad_norm": 0.55859375, "learning_rate": 0.00016502845318158303, "loss": 1.5867, "step": 1595 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 0.00016554578375581997, "loss": 1.5385, "step": 1600 }, { "epoch": 0.08, "grad_norm": 0.5234375, "learning_rate": 0.00016606311433005694, "loss": 1.5428, "step": 1605 }, { "epoch": 0.08, "grad_norm": 0.53125, "learning_rate": 0.00016658044490429385, "loss": 1.5596, "step": 1610 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 0.00016709777547853079, "loss": 1.5558, "step": 1615 }, { "epoch": 0.08, "grad_norm": 0.546875, "learning_rate": 0.00016761510605276772, "loss": 1.5684, "step": 1620 }, { "epoch": 0.08, "grad_norm": 0.5625, "learning_rate": 0.00016813243662700466, "loss": 1.572, "step": 1625 }, { "epoch": 0.08, "grad_norm": 0.52734375, "learning_rate": 0.0001686497672012416, "loss": 1.5505, "step": 1630 }, { "epoch": 0.08, "grad_norm": 0.53125, "learning_rate": 0.00016916709777547854, "loss": 1.5732, "step": 1635 }, { "epoch": 0.08, "grad_norm": 0.51953125, "learning_rate": 0.00016968442834971548, "loss": 1.5629, "step": 1640 }, { "epoch": 0.09, "grad_norm": 0.5390625, "learning_rate": 0.00017020175892395241, "loss": 1.5986, "step": 1645 }, { "epoch": 0.09, "grad_norm": 0.54296875, "learning_rate": 0.00017071908949818935, "loss": 1.5384, "step": 1650 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.0001712364200724263, "loss": 1.5552, "step": 1655 }, { "epoch": 0.09, "grad_norm": 0.54296875, "learning_rate": 0.00017175375064666323, "loss": 1.5812, "step": 1660 }, { "epoch": 0.09, "grad_norm": 0.51171875, "learning_rate": 0.00017227108122090017, "loss": 1.5385, "step": 1665 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.0001727884117951371, "loss": 1.5383, "step": 1670 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00017330574236937404, "loss": 1.5549, "step": 1675 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.00017382307294361098, "loss": 1.5509, "step": 1680 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.0001743404035178479, "loss": 1.6073, "step": 1685 }, { "epoch": 0.09, "grad_norm": 0.55078125, "learning_rate": 0.00017485773409208486, "loss": 1.5538, "step": 1690 }, { "epoch": 0.09, "grad_norm": 0.56640625, "learning_rate": 0.0001753750646663218, "loss": 1.595, "step": 1695 }, { "epoch": 0.09, "grad_norm": 0.55859375, "learning_rate": 0.0001758923952405587, "loss": 1.5658, "step": 1700 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00017640972581479567, "loss": 1.5508, "step": 1705 }, { "epoch": 0.09, "grad_norm": 0.51953125, "learning_rate": 0.00017692705638903259, "loss": 1.5406, "step": 1710 }, { "epoch": 0.09, "grad_norm": 0.55078125, "learning_rate": 0.00017744438696326955, "loss": 1.5676, "step": 1715 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.0001779617175375065, "loss": 1.5622, "step": 1720 }, { "epoch": 0.09, "grad_norm": 0.5390625, "learning_rate": 0.0001784790481117434, "loss": 1.5609, "step": 1725 }, { "epoch": 0.09, "grad_norm": 0.546875, "learning_rate": 0.00017899637868598037, "loss": 1.578, "step": 1730 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.00017951370926021728, "loss": 1.5461, "step": 1735 }, { "epoch": 0.09, "grad_norm": 0.5234375, "learning_rate": 0.00018003103983445421, "loss": 1.5818, "step": 1740 }, { "epoch": 0.09, "grad_norm": 0.5234375, "learning_rate": 0.00018054837040869118, "loss": 1.5628, "step": 1745 }, { "epoch": 0.09, "grad_norm": 5.3125, "learning_rate": 0.0001810657009829281, "loss": 1.568, "step": 1750 }, { "epoch": 0.09, "grad_norm": 0.51171875, "learning_rate": 0.00018158303155716503, "loss": 1.5412, "step": 1755 }, { "epoch": 0.09, "grad_norm": 0.57421875, "learning_rate": 0.00018210036213140197, "loss": 1.6057, "step": 1760 }, { "epoch": 0.09, "grad_norm": 0.546875, "learning_rate": 0.0001826176927056389, "loss": 1.5805, "step": 1765 }, { "epoch": 0.09, "grad_norm": 0.82421875, "learning_rate": 0.00018313502327987584, "loss": 1.5736, "step": 1770 }, { "epoch": 0.09, "grad_norm": 0.6015625, "learning_rate": 0.00018365235385411278, "loss": 1.5586, "step": 1775 }, { "epoch": 0.09, "grad_norm": 2.8125, "learning_rate": 0.00018416968442834972, "loss": 1.5524, "step": 1780 }, { "epoch": 0.09, "grad_norm": 0.56640625, "learning_rate": 0.00018468701500258666, "loss": 1.5908, "step": 1785 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 0.0001852043455768236, "loss": 1.5742, "step": 1790 }, { "epoch": 0.09, "grad_norm": 0.50390625, "learning_rate": 0.00018572167615106054, "loss": 1.5759, "step": 1795 }, { "epoch": 0.09, "grad_norm": 0.9140625, "learning_rate": 0.00018623900672529747, "loss": 1.6109, "step": 1800 }, { "epoch": 0.09, "grad_norm": 0.53125, "learning_rate": 0.0001867563372995344, "loss": 1.545, "step": 1805 }, { "epoch": 0.09, "grad_norm": 0.546875, "learning_rate": 0.00018727366787377135, "loss": 1.5727, "step": 1810 }, { "epoch": 0.09, "grad_norm": 0.5625, "learning_rate": 0.0001877909984480083, "loss": 1.5265, "step": 1815 }, { "epoch": 0.09, "grad_norm": 0.53515625, "learning_rate": 0.00018830832902224523, "loss": 1.5541, "step": 1820 }, { "epoch": 0.09, "grad_norm": 0.5390625, "learning_rate": 0.00018882565959648216, "loss": 1.5675, "step": 1825 }, { "epoch": 0.09, "grad_norm": 1.328125, "learning_rate": 0.0001893429901707191, "loss": 1.5542, "step": 1830 }, { "epoch": 0.09, "grad_norm": 0.5390625, "learning_rate": 0.00018986032074495604, "loss": 1.5244, "step": 1835 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019037765131919298, "loss": 1.5473, "step": 1840 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019089498189342992, "loss": 1.5554, "step": 1845 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.00019141231246766686, "loss": 1.5603, "step": 1850 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.0001919296430419038, "loss": 1.5157, "step": 1855 }, { "epoch": 0.1, "grad_norm": 0.51171875, "learning_rate": 0.00019244697361614073, "loss": 1.5392, "step": 1860 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019296430419037764, "loss": 1.5601, "step": 1865 }, { "epoch": 0.1, "grad_norm": 0.490234375, "learning_rate": 0.0001934816347646146, "loss": 1.5473, "step": 1870 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019399896533885155, "loss": 1.5782, "step": 1875 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019451629591308846, "loss": 1.5659, "step": 1880 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.00019503362648732542, "loss": 1.5565, "step": 1885 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019555095706156234, "loss": 1.5245, "step": 1890 }, { "epoch": 0.1, "grad_norm": 0.515625, "learning_rate": 0.00019606828763579927, "loss": 1.5682, "step": 1895 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019658561821003624, "loss": 1.5439, "step": 1900 }, { "epoch": 0.1, "grad_norm": 5.0, "learning_rate": 0.00019710294878427315, "loss": 1.5547, "step": 1905 }, { "epoch": 0.1, "grad_norm": 0.56640625, "learning_rate": 0.00019762027935851012, "loss": 1.5742, "step": 1910 }, { "epoch": 0.1, "grad_norm": 0.5390625, "learning_rate": 0.00019813760993274703, "loss": 1.524, "step": 1915 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.00019865494050698396, "loss": 1.5328, "step": 1920 }, { "epoch": 0.1, "grad_norm": 0.578125, "learning_rate": 0.00019917227108122093, "loss": 1.5554, "step": 1925 }, { "epoch": 0.1, "grad_norm": 0.5625, "learning_rate": 0.00019968960165545784, "loss": 1.57, "step": 1930 }, { "epoch": 0.1, "grad_norm": 19.5, "learning_rate": 0.00019999999347649694, "loss": 1.5967, "step": 1935 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.00019999992008709735, "loss": 1.5606, "step": 1940 }, { "epoch": 0.1, "grad_norm": 0.51953125, "learning_rate": 0.00019999976515397937, "loss": 1.5559, "step": 1945 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019999952867726936, "loss": 1.5675, "step": 1950 }, { "epoch": 0.1, "grad_norm": 0.5390625, "learning_rate": 0.0001999992106571601, "loss": 1.5612, "step": 1955 }, { "epoch": 0.1, "grad_norm": 0.55859375, "learning_rate": 0.00019999881109391098, "loss": 1.5634, "step": 1960 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.0001999983299878478, "loss": 1.5404, "step": 1965 }, { "epoch": 0.1, "grad_norm": 0.55078125, "learning_rate": 0.00019999776733936286, "loss": 1.5875, "step": 1970 }, { "epoch": 0.1, "grad_norm": 0.61328125, "learning_rate": 0.00019999712314891496, "loss": 1.5651, "step": 1975 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.00019999639741702943, "loss": 1.5764, "step": 1980 }, { "epoch": 0.1, "grad_norm": 0.5390625, "learning_rate": 0.00019999559014429802, "loss": 1.5411, "step": 1985 }, { "epoch": 0.1, "grad_norm": 0.50390625, "learning_rate": 0.00019999470133137906, "loss": 1.5388, "step": 1990 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 0.00019999373097899728, "loss": 1.5659, "step": 1995 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.00019999267908794394, "loss": 1.5484, "step": 2000 }, { "epoch": 0.1, "grad_norm": 0.53125, "learning_rate": 0.00019999154565907682, "loss": 1.5623, "step": 2005 }, { "epoch": 0.1, "grad_norm": 0.546875, "learning_rate": 0.00019999033069332013, "loss": 1.5429, "step": 2010 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.0001999890341916646, "loss": 1.5245, "step": 2015 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.0001999876561551675, "loss": 1.5415, "step": 2020 }, { "epoch": 0.1, "grad_norm": 0.52734375, "learning_rate": 0.00019998619658495245, "loss": 1.5505, "step": 2025 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.00019998465548220972, "loss": 1.5406, "step": 2030 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019998303284819594, "loss": 1.5452, "step": 2035 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.00019998132868423427, "loss": 1.5519, "step": 2040 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019997954299171434, "loss": 1.5671, "step": 2045 }, { "epoch": 0.11, "grad_norm": 0.53515625, "learning_rate": 0.0001999776757720923, "loss": 1.5425, "step": 2050 }, { "epoch": 0.11, "grad_norm": 0.5390625, "learning_rate": 0.00019997572702689073, "loss": 1.5056, "step": 2055 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019997369675769873, "loss": 1.511, "step": 2060 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.00019997158496617184, "loss": 1.5355, "step": 2065 }, { "epoch": 0.11, "grad_norm": 0.5390625, "learning_rate": 0.00019996939165403208, "loss": 1.5132, "step": 2070 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.000199967116823068, "loss": 1.5543, "step": 2075 }, { "epoch": 0.11, "grad_norm": 0.51171875, "learning_rate": 0.00019996476047513454, "loss": 1.5596, "step": 2080 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.0001999623226121532, "loss": 1.5632, "step": 2085 }, { "epoch": 0.11, "grad_norm": 0.494140625, "learning_rate": 0.0001999598032361119, "loss": 1.5241, "step": 2090 }, { "epoch": 0.11, "grad_norm": 0.55859375, "learning_rate": 0.00019995720234906498, "loss": 1.5281, "step": 2095 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019995451995313335, "loss": 1.5327, "step": 2100 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.00019995175605050434, "loss": 1.5475, "step": 2105 }, { "epoch": 0.11, "grad_norm": 0.51953125, "learning_rate": 0.0001999489106434317, "loss": 1.5329, "step": 2110 }, { "epoch": 0.11, "grad_norm": 0.55078125, "learning_rate": 0.0001999459837342357, "loss": 1.5358, "step": 2115 }, { "epoch": 0.11, "grad_norm": 0.51171875, "learning_rate": 0.00019994297532530312, "loss": 1.5547, "step": 2120 }, { "epoch": 0.11, "grad_norm": 0.498046875, "learning_rate": 0.00019993988541908703, "loss": 1.552, "step": 2125 }, { "epoch": 0.11, "grad_norm": 0.55078125, "learning_rate": 0.00019993671401810712, "loss": 1.561, "step": 2130 }, { "epoch": 0.11, "grad_norm": 0.5, "learning_rate": 0.00019993346112494946, "loss": 1.5502, "step": 2135 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.00019993012674226655, "loss": 1.5832, "step": 2140 }, { "epoch": 0.11, "grad_norm": 0.5546875, "learning_rate": 0.0001999267108727774, "loss": 1.5401, "step": 2145 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 0.00019992321351926744, "loss": 1.5634, "step": 2150 }, { "epoch": 0.11, "grad_norm": 0.53125, "learning_rate": 0.00019991963468458853, "loss": 1.5423, "step": 2155 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.00019991597437165899, "loss": 1.5138, "step": 2160 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019991223258346362, "loss": 1.4988, "step": 2165 }, { "epoch": 0.11, "grad_norm": 0.54296875, "learning_rate": 0.00019990840932305353, "loss": 1.5531, "step": 2170 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.0001999045045935464, "loss": 1.522, "step": 2175 }, { "epoch": 0.11, "grad_norm": 0.50390625, "learning_rate": 0.00019990051839812633, "loss": 1.5382, "step": 2180 }, { "epoch": 0.11, "grad_norm": 0.5078125, "learning_rate": 0.00019989645074004376, "loss": 1.5066, "step": 2185 }, { "epoch": 0.11, "grad_norm": 0.51953125, "learning_rate": 0.0001998923016226156, "loss": 1.5335, "step": 2190 }, { "epoch": 0.11, "grad_norm": 0.51953125, "learning_rate": 0.0001998880710492253, "loss": 1.5735, "step": 2195 }, { "epoch": 0.11, "grad_norm": 0.51953125, "learning_rate": 0.0001998837590233225, "loss": 1.5011, "step": 2200 }, { "epoch": 0.11, "grad_norm": 0.51171875, "learning_rate": 0.00019987936554842346, "loss": 1.5417, "step": 2205 }, { "epoch": 0.11, "grad_norm": 0.5234375, "learning_rate": 0.00019987489062811076, "loss": 1.4801, "step": 2210 }, { "epoch": 0.11, "grad_norm": 0.515625, "learning_rate": 0.00019987033426603344, "loss": 1.5865, "step": 2215 }, { "epoch": 0.11, "grad_norm": 1.03125, "learning_rate": 0.00019986569646590692, "loss": 1.5352, "step": 2220 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019986097723151305, "loss": 1.5486, "step": 2225 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.00019985617656670005, "loss": 1.5046, "step": 2230 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019985129447538258, "loss": 1.5207, "step": 2235 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 0.00019984633096154167, "loss": 1.5484, "step": 2240 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 0.00019984128602922477, "loss": 1.4857, "step": 2245 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.00019983615968254573, "loss": 1.5185, "step": 2250 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.0001998309519256847, "loss": 1.5341, "step": 2255 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019982566276288834, "loss": 1.5127, "step": 2260 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019982029219846962, "loss": 1.5261, "step": 2265 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019981484023680787, "loss": 1.5209, "step": 2270 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019980930688234886, "loss": 1.5647, "step": 2275 }, { "epoch": 0.12, "grad_norm": 0.498046875, "learning_rate": 0.00019980369213960472, "loss": 1.5466, "step": 2280 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.00019979799601315388, "loss": 1.5414, "step": 2285 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.00019979221850764117, "loss": 1.5479, "step": 2290 }, { "epoch": 0.12, "grad_norm": 0.70703125, "learning_rate": 0.0001997863596277778, "loss": 1.5523, "step": 2295 }, { "epoch": 0.12, "grad_norm": 0.546875, "learning_rate": 0.00019978041937834137, "loss": 1.5381, "step": 2300 }, { "epoch": 0.12, "grad_norm": 0.49609375, "learning_rate": 0.0001997743977641757, "loss": 1.5089, "step": 2305 }, { "epoch": 0.12, "grad_norm": 6.5625, "learning_rate": 0.00019976829479019113, "loss": 1.5425, "step": 2310 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.0001997621104613642, "loss": 1.5251, "step": 2315 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019975584478273782, "loss": 1.5214, "step": 2320 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019974949775942134, "loss": 1.5123, "step": 2325 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.00019974306939659026, "loss": 1.5431, "step": 2330 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 0.00019973655969948663, "loss": 1.5478, "step": 2335 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019972996867341863, "loss": 1.5273, "step": 2340 }, { "epoch": 0.12, "grad_norm": 0.498046875, "learning_rate": 0.00019972329632376084, "loss": 1.5277, "step": 2345 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 0.00019971654265595415, "loss": 1.5046, "step": 2350 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.00019970970767550577, "loss": 1.5588, "step": 2355 }, { "epoch": 0.12, "grad_norm": 0.52734375, "learning_rate": 0.0001997027913879892, "loss": 1.6231, "step": 2360 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.0001996957937990442, "loss": 1.5189, "step": 2365 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.00019968871491437691, "loss": 1.5559, "step": 2370 }, { "epoch": 0.12, "grad_norm": 0.5078125, "learning_rate": 0.00019968155473975974, "loss": 1.5194, "step": 2375 }, { "epoch": 0.12, "grad_norm": 0.5, "learning_rate": 0.0001996743132810313, "loss": 1.5081, "step": 2380 }, { "epoch": 0.12, "grad_norm": 0.51171875, "learning_rate": 0.0001996669905440966, "loss": 1.5502, "step": 2385 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.0001996595865349269, "loss": 1.5206, "step": 2390 }, { "epoch": 0.12, "grad_norm": 0.515625, "learning_rate": 0.00019965210125955966, "loss": 1.5631, "step": 2395 }, { "epoch": 0.12, "grad_norm": 0.51953125, "learning_rate": 0.00019964453472409867, "loss": 1.4963, "step": 2400 }, { "epoch": 0.12, "grad_norm": 0.5234375, "learning_rate": 0.00019963688693471396, "loss": 1.5388, "step": 2405 }, { "epoch": 0.12, "grad_norm": 0.50390625, "learning_rate": 0.00019962915789764182, "loss": 1.5142, "step": 2410 }, { "epoch": 0.12, "grad_norm": 0.5, "learning_rate": 0.00019962134761918488, "loss": 1.5213, "step": 2415 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019961345610571183, "loss": 1.5471, "step": 2420 }, { "epoch": 0.13, "grad_norm": 0.53515625, "learning_rate": 0.00019960548336365774, "loss": 1.507, "step": 2425 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019959742939952392, "loss": 1.5371, "step": 2430 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019958929421987783, "loss": 1.5465, "step": 2435 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019958107783135326, "loss": 1.4912, "step": 2440 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.00019957278024065013, "loss": 1.5464, "step": 2445 }, { "epoch": 0.13, "grad_norm": 0.50390625, "learning_rate": 0.00019956440145453458, "loss": 1.5109, "step": 2450 }, { "epoch": 0.13, "grad_norm": 0.55078125, "learning_rate": 0.00019955594147983905, "loss": 1.5218, "step": 2455 }, { "epoch": 0.13, "grad_norm": 0.55859375, "learning_rate": 0.00019954740032346208, "loss": 1.4954, "step": 2460 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.0001995387779923685, "loss": 1.5223, "step": 2465 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.0001995300744935892, "loss": 1.5373, "step": 2470 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019952128983422146, "loss": 1.5302, "step": 2475 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.00019951242402142848, "loss": 1.5762, "step": 2480 }, { "epoch": 0.13, "grad_norm": 0.498046875, "learning_rate": 0.0001995034770624399, "loss": 1.4838, "step": 2485 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019949444896455137, "loss": 1.5001, "step": 2490 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.00019948533973512472, "loss": 1.5255, "step": 2495 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.000199476149381588, "loss": 1.5226, "step": 2500 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 0.0001994668779114353, "loss": 1.5017, "step": 2505 }, { "epoch": 0.13, "grad_norm": 0.53515625, "learning_rate": 0.00019945752533222704, "loss": 1.5007, "step": 2510 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019944809165158955, "loss": 1.5411, "step": 2515 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.0001994385768772155, "loss": 1.5656, "step": 2520 }, { "epoch": 0.13, "grad_norm": 0.51953125, "learning_rate": 0.00019942898101686356, "loss": 1.5273, "step": 2525 }, { "epoch": 0.13, "grad_norm": 0.50390625, "learning_rate": 0.00019941930407835857, "loss": 1.5197, "step": 2530 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.00019940954606959143, "loss": 1.4927, "step": 2535 }, { "epoch": 0.13, "grad_norm": 0.52734375, "learning_rate": 0.00019939970699851925, "loss": 1.5054, "step": 2540 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.0001993897868731651, "loss": 1.5288, "step": 2545 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.00019937978570161834, "loss": 1.4935, "step": 2550 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019936970349203423, "loss": 1.5252, "step": 2555 }, { "epoch": 0.13, "grad_norm": 0.515625, "learning_rate": 0.00019935954025263416, "loss": 1.5234, "step": 2560 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 0.00019934929599170568, "loss": 1.5277, "step": 2565 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.00019933897071760235, "loss": 1.5002, "step": 2570 }, { "epoch": 0.13, "grad_norm": 0.51171875, "learning_rate": 0.00019932856443874374, "loss": 1.4713, "step": 2575 }, { "epoch": 0.13, "grad_norm": 0.5390625, "learning_rate": 0.00019931807716361554, "loss": 1.5328, "step": 2580 }, { "epoch": 0.13, "grad_norm": 0.494140625, "learning_rate": 0.00019930750890076947, "loss": 1.5025, "step": 2585 }, { "epoch": 0.13, "grad_norm": 0.5234375, "learning_rate": 0.0001992968596588233, "loss": 1.5213, "step": 2590 }, { "epoch": 0.13, "grad_norm": 0.4921875, "learning_rate": 0.00019928612944646084, "loss": 1.4832, "step": 2595 }, { "epoch": 0.13, "grad_norm": 0.5, "learning_rate": 0.00019927531827243188, "loss": 1.5389, "step": 2600 }, { "epoch": 0.13, "grad_norm": 0.5078125, "learning_rate": 0.0001992644261455523, "loss": 1.5366, "step": 2605 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.0001992534530747039, "loss": 1.5488, "step": 2610 }, { "epoch": 0.14, "grad_norm": 0.498046875, "learning_rate": 0.00019924239906883457, "loss": 1.4925, "step": 2615 }, { "epoch": 0.14, "grad_norm": 0.50390625, "learning_rate": 0.00019923126413695817, "loss": 1.4764, "step": 2620 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019922004828815454, "loss": 1.5102, "step": 2625 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.0001992087515315695, "loss": 1.5488, "step": 2630 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019919737387641485, "loss": 1.5403, "step": 2635 }, { "epoch": 0.14, "grad_norm": 0.5078125, "learning_rate": 0.00019918591533196834, "loss": 1.5077, "step": 2640 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.00019917437590757375, "loss": 1.4922, "step": 2645 }, { "epoch": 0.14, "grad_norm": 0.4921875, "learning_rate": 0.00019916275561264075, "loss": 1.5049, "step": 2650 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019915105445664493, "loss": 1.5126, "step": 2655 }, { "epoch": 0.14, "grad_norm": 0.5078125, "learning_rate": 0.00019913927244912788, "loss": 1.5371, "step": 2660 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.0001991274095996971, "loss": 1.4777, "step": 2665 }, { "epoch": 0.14, "grad_norm": 0.5234375, "learning_rate": 0.00019911546591802604, "loss": 1.5001, "step": 2670 }, { "epoch": 0.14, "grad_norm": 0.5390625, "learning_rate": 0.00019910344141385396, "loss": 1.5312, "step": 2675 }, { "epoch": 0.14, "grad_norm": 0.52734375, "learning_rate": 0.00019909133609698616, "loss": 1.4799, "step": 2680 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019907914997729372, "loss": 1.4944, "step": 2685 }, { "epoch": 0.14, "grad_norm": 0.54296875, "learning_rate": 0.00019906688306471366, "loss": 1.4979, "step": 2690 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019905453536924893, "loss": 1.5127, "step": 2695 }, { "epoch": 0.14, "grad_norm": 0.494140625, "learning_rate": 0.0001990421069009683, "loss": 1.5091, "step": 2700 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.0001990295976700064, "loss": 1.5043, "step": 2705 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019901700768656372, "loss": 1.5113, "step": 2710 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.0001990043369609066, "loss": 1.5135, "step": 2715 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.00019899158550336729, "loss": 1.5443, "step": 2720 }, { "epoch": 0.14, "grad_norm": 0.50390625, "learning_rate": 0.00019897875332434376, "loss": 1.4857, "step": 2725 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019896584043429988, "loss": 1.5084, "step": 2730 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019895284684376524, "loss": 1.4913, "step": 2735 }, { "epoch": 0.14, "grad_norm": 0.50390625, "learning_rate": 0.0001989397725633354, "loss": 1.5016, "step": 2740 }, { "epoch": 0.14, "grad_norm": 0.5078125, "learning_rate": 0.00019892661760367156, "loss": 1.5035, "step": 2745 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019891338197550081, "loss": 1.5148, "step": 2750 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019890006568961597, "loss": 1.4888, "step": 2755 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019888666875687565, "loss": 1.5469, "step": 2760 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 0.00019887319118820418, "loss": 1.5647, "step": 2765 }, { "epoch": 0.14, "grad_norm": 0.5, "learning_rate": 0.0001988596329945917, "loss": 1.5234, "step": 2770 }, { "epoch": 0.14, "grad_norm": 0.53515625, "learning_rate": 0.0001988459941870941, "loss": 1.5043, "step": 2775 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.00019883227477683296, "loss": 1.5245, "step": 2780 }, { "epoch": 0.14, "grad_norm": 0.53125, "learning_rate": 0.00019881847477499557, "loss": 1.5148, "step": 2785 }, { "epoch": 0.14, "grad_norm": 0.5078125, "learning_rate": 0.00019880459419283503, "loss": 1.5096, "step": 2790 }, { "epoch": 0.14, "grad_norm": 0.51953125, "learning_rate": 0.0001987906330416701, "loss": 1.5181, "step": 2795 }, { "epoch": 0.14, "grad_norm": 0.515625, "learning_rate": 0.00019877659133288515, "loss": 1.5406, "step": 2800 }, { "epoch": 0.15, "grad_norm": 0.498046875, "learning_rate": 0.0001987624690779304, "loss": 1.5078, "step": 2805 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019874826628832164, "loss": 1.5313, "step": 2810 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.00019873398297564037, "loss": 1.4807, "step": 2815 }, { "epoch": 0.15, "grad_norm": 0.494140625, "learning_rate": 0.0001987196191515337, "loss": 1.5308, "step": 2820 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.0001987051748277145, "loss": 1.5101, "step": 2825 }, { "epoch": 0.15, "grad_norm": 0.498046875, "learning_rate": 0.00019869065001596118, "loss": 1.4953, "step": 2830 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 0.00019867604472811786, "loss": 1.4817, "step": 2835 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.00019866135897609423, "loss": 1.4956, "step": 2840 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019864659277186555, "loss": 1.5151, "step": 2845 }, { "epoch": 0.15, "grad_norm": 0.515625, "learning_rate": 0.0001986317461274728, "loss": 1.4996, "step": 2850 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019861681905502246, "loss": 1.5088, "step": 2855 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.0001986018115666867, "loss": 1.507, "step": 2860 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019858672367470312, "loss": 1.516, "step": 2865 }, { "epoch": 0.15, "grad_norm": 0.48828125, "learning_rate": 0.000198571555391375, "loss": 1.5231, "step": 2870 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 0.00019855630672907108, "loss": 1.4969, "step": 2875 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.00019854097770022577, "loss": 1.4924, "step": 2880 }, { "epoch": 0.15, "grad_norm": 0.52734375, "learning_rate": 0.0001985255683173389, "loss": 1.5237, "step": 2885 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.00019851007859297585, "loss": 1.5364, "step": 2890 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019849450853976755, "loss": 1.5116, "step": 2895 }, { "epoch": 0.15, "grad_norm": 0.486328125, "learning_rate": 0.0001984788581704104, "loss": 1.4999, "step": 2900 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.0001984631274976663, "loss": 1.4907, "step": 2905 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019844731653436264, "loss": 1.5079, "step": 2910 }, { "epoch": 0.15, "grad_norm": 0.53125, "learning_rate": 0.0001984314252933923, "loss": 1.4708, "step": 2915 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.00019841545378771356, "loss": 1.4926, "step": 2920 }, { "epoch": 0.15, "grad_norm": 0.51953125, "learning_rate": 0.0001983994020303502, "loss": 1.4863, "step": 2925 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019838327003439147, "loss": 1.4937, "step": 2930 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019836705781299196, "loss": 1.535, "step": 2935 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 0.00019835076537937178, "loss": 1.4663, "step": 2940 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.00019833439274681634, "loss": 1.5312, "step": 2945 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 0.00019831793992867652, "loss": 1.5367, "step": 2950 }, { "epoch": 0.15, "grad_norm": 0.5234375, "learning_rate": 0.0001983014069383686, "loss": 1.5108, "step": 2955 }, { "epoch": 0.15, "grad_norm": 0.5390625, "learning_rate": 0.00019828479378937417, "loss": 1.5443, "step": 2960 }, { "epoch": 0.15, "grad_norm": 0.51171875, "learning_rate": 0.00019826810049524026, "loss": 1.506, "step": 2965 }, { "epoch": 0.15, "grad_norm": 0.48828125, "learning_rate": 0.00019825132706957917, "loss": 1.5365, "step": 2970 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.00019823447352606858, "loss": 1.4662, "step": 2975 }, { "epoch": 0.15, "grad_norm": 0.49609375, "learning_rate": 0.00019821753987845156, "loss": 1.5271, "step": 2980 }, { "epoch": 0.15, "grad_norm": 0.54296875, "learning_rate": 0.0001982005261405364, "loss": 1.4906, "step": 2985 }, { "epoch": 0.15, "grad_norm": 0.53515625, "learning_rate": 0.0001981834323261968, "loss": 1.4914, "step": 2990 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 0.00019816625844937163, "loss": 1.5608, "step": 2995 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.0001981490045240652, "loss": 1.4996, "step": 3000 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.00019813167056434693, "loss": 1.4952, "step": 3005 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.00019811425658435166, "loss": 1.5259, "step": 3010 }, { "epoch": 0.16, "grad_norm": 0.5078125, "learning_rate": 0.00019809676259827935, "loss": 1.4858, "step": 3015 }, { "epoch": 0.16, "grad_norm": 0.5, "learning_rate": 0.0001980791886203953, "loss": 1.5333, "step": 3020 }, { "epoch": 0.16, "grad_norm": 0.5234375, "learning_rate": 0.00019806153466502997, "loss": 1.5455, "step": 3025 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019804380074657906, "loss": 1.5162, "step": 3030 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019802598687950352, "loss": 1.5035, "step": 3035 }, { "epoch": 0.16, "grad_norm": 0.52734375, "learning_rate": 0.00019800809307832942, "loss": 1.4887, "step": 3040 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 0.00019799011935764803, "loss": 1.4994, "step": 3045 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.0001979720657321158, "loss": 1.5093, "step": 3050 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019795393221645437, "loss": 1.5287, "step": 3055 }, { "epoch": 0.16, "grad_norm": 0.52734375, "learning_rate": 0.00019793571882545047, "loss": 1.4979, "step": 3060 }, { "epoch": 0.16, "grad_norm": 0.51953125, "learning_rate": 0.00019791742557395602, "loss": 1.5138, "step": 3065 }, { "epoch": 0.16, "grad_norm": 0.5234375, "learning_rate": 0.000197899052476888, "loss": 1.525, "step": 3070 }, { "epoch": 0.16, "grad_norm": 0.53515625, "learning_rate": 0.00019788059954922856, "loss": 1.4849, "step": 3075 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 0.00019786206680602486, "loss": 1.531, "step": 3080 }, { "epoch": 0.16, "grad_norm": 0.56640625, "learning_rate": 0.00019784345426238927, "loss": 1.528, "step": 3085 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.00019782476193349905, "loss": 1.5096, "step": 3090 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 0.00019780598983459678, "loss": 1.5177, "step": 3095 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019778713798098983, "loss": 1.5012, "step": 3100 }, { "epoch": 0.16, "grad_norm": 0.5078125, "learning_rate": 0.00019776820638805077, "loss": 1.4774, "step": 3105 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.0001977491950712171, "loss": 1.5024, "step": 3110 }, { "epoch": 0.16, "grad_norm": 0.498046875, "learning_rate": 0.0001977301040459914, "loss": 1.5284, "step": 3115 }, { "epoch": 0.16, "grad_norm": 0.5234375, "learning_rate": 0.00019771093332794117, "loss": 1.5153, "step": 3120 }, { "epoch": 0.16, "grad_norm": 0.53125, "learning_rate": 0.000197691682932699, "loss": 1.5114, "step": 3125 }, { "epoch": 0.16, "grad_norm": 0.54296875, "learning_rate": 0.00019767235287596237, "loss": 1.4915, "step": 3130 }, { "epoch": 0.16, "grad_norm": 0.5078125, "learning_rate": 0.0001976529431734937, "loss": 1.4751, "step": 3135 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019763345384112043, "loss": 1.5189, "step": 3140 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 0.0001976138848947349, "loss": 1.5114, "step": 3145 }, { "epoch": 0.16, "grad_norm": 0.55859375, "learning_rate": 0.00019759423635029434, "loss": 1.5104, "step": 3150 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.00019757450822382094, "loss": 1.5031, "step": 3155 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 0.00019755470053140178, "loss": 1.4643, "step": 3160 }, { "epoch": 0.16, "grad_norm": 0.515625, "learning_rate": 0.0001975348132891888, "loss": 1.483, "step": 3165 }, { "epoch": 0.16, "grad_norm": 0.498046875, "learning_rate": 0.00019751484651339877, "loss": 1.4759, "step": 3170 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.00019749480022031337, "loss": 1.4971, "step": 3175 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.00019747467442627912, "loss": 1.4768, "step": 3180 }, { "epoch": 0.16, "grad_norm": 0.51171875, "learning_rate": 0.00019745446914770732, "loss": 1.4869, "step": 3185 }, { "epoch": 0.17, "grad_norm": 0.5234375, "learning_rate": 0.00019743418440107418, "loss": 1.5524, "step": 3190 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019741382020292063, "loss": 1.4935, "step": 3195 }, { "epoch": 0.17, "grad_norm": 0.52734375, "learning_rate": 0.00019739337656985234, "loss": 1.5014, "step": 3200 }, { "epoch": 0.17, "grad_norm": 0.494140625, "learning_rate": 0.0001973728535185399, "loss": 1.5191, "step": 3205 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019735225106571854, "loss": 1.4759, "step": 3210 }, { "epoch": 0.17, "grad_norm": 0.49609375, "learning_rate": 0.00019733156922818835, "loss": 1.5358, "step": 3215 }, { "epoch": 0.17, "grad_norm": 0.515625, "learning_rate": 0.00019731080802281396, "loss": 1.4926, "step": 3220 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019728996746652496, "loss": 1.4928, "step": 3225 }, { "epoch": 0.17, "grad_norm": 0.51171875, "learning_rate": 0.00019726904757631544, "loss": 1.4744, "step": 3230 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.0001972480483692443, "loss": 1.516, "step": 3235 }, { "epoch": 0.17, "grad_norm": 0.51171875, "learning_rate": 0.00019722696986243515, "loss": 1.4968, "step": 3240 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019720581207307612, "loss": 1.5516, "step": 3245 }, { "epoch": 0.17, "grad_norm": 0.48046875, "learning_rate": 0.0001971845750184201, "loss": 1.4762, "step": 3250 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019716325871578462, "loss": 1.4957, "step": 3255 }, { "epoch": 0.17, "grad_norm": 0.490234375, "learning_rate": 0.0001971418631825517, "loss": 1.4826, "step": 3260 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019712038843616817, "loss": 1.4764, "step": 3265 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019709883449414535, "loss": 1.4964, "step": 3270 }, { "epoch": 0.17, "grad_norm": 0.515625, "learning_rate": 0.00019707720137405907, "loss": 1.5065, "step": 3275 }, { "epoch": 0.17, "grad_norm": 0.5546875, "learning_rate": 0.00019705548909354983, "loss": 1.4792, "step": 3280 }, { "epoch": 0.17, "grad_norm": 0.57421875, "learning_rate": 0.00019703369767032266, "loss": 1.4646, "step": 3285 }, { "epoch": 0.17, "grad_norm": 0.546875, "learning_rate": 0.0001970118271221471, "loss": 1.5114, "step": 3290 }, { "epoch": 0.17, "grad_norm": 0.5390625, "learning_rate": 0.0001969898774668572, "loss": 1.4886, "step": 3295 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019696784872235158, "loss": 1.509, "step": 3300 }, { "epoch": 0.17, "grad_norm": 0.5234375, "learning_rate": 0.0001969457409065933, "loss": 1.4883, "step": 3305 }, { "epoch": 0.17, "grad_norm": 0.54296875, "learning_rate": 0.00019692355403760987, "loss": 1.5194, "step": 3310 }, { "epoch": 0.17, "grad_norm": 0.5078125, "learning_rate": 0.00019690128813349333, "loss": 1.5052, "step": 3315 }, { "epoch": 0.17, "grad_norm": 0.49609375, "learning_rate": 0.00019687894321240016, "loss": 1.4941, "step": 3320 }, { "epoch": 0.17, "grad_norm": 0.5, "learning_rate": 0.00019685651929255123, "loss": 1.5108, "step": 3325 }, { "epoch": 0.17, "grad_norm": 0.50390625, "learning_rate": 0.0001968340163922319, "loss": 1.5232, "step": 3330 }, { "epoch": 0.17, "grad_norm": 0.50390625, "learning_rate": 0.00019681143452979178, "loss": 1.5024, "step": 3335 }, { "epoch": 0.17, "grad_norm": 0.5390625, "learning_rate": 0.0001967887737236451, "loss": 1.4631, "step": 3340 }, { "epoch": 0.17, "grad_norm": 0.53515625, "learning_rate": 0.00019676603399227023, "loss": 1.5092, "step": 3345 }, { "epoch": 0.17, "grad_norm": 0.4921875, "learning_rate": 0.0001967432153542101, "loss": 1.5084, "step": 3350 }, { "epoch": 0.17, "grad_norm": 0.53125, "learning_rate": 0.00019672031782807178, "loss": 1.4948, "step": 3355 }, { "epoch": 0.17, "grad_norm": 0.490234375, "learning_rate": 0.0001966973414325269, "loss": 1.4951, "step": 3360 }, { "epoch": 0.17, "grad_norm": 0.52734375, "learning_rate": 0.00019667428618631126, "loss": 1.5071, "step": 3365 }, { "epoch": 0.17, "grad_norm": 0.51953125, "learning_rate": 0.00019665115210822489, "loss": 1.4793, "step": 3370 }, { "epoch": 0.17, "grad_norm": 0.4921875, "learning_rate": 0.00019662793921713226, "loss": 1.5236, "step": 3375 }, { "epoch": 0.17, "grad_norm": 0.5078125, "learning_rate": 0.00019660464753196207, "loss": 1.4882, "step": 3380 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.00019658127707170716, "loss": 1.4904, "step": 3385 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.00019655782785542476, "loss": 1.4852, "step": 3390 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.0001965342999022362, "loss": 1.4834, "step": 3395 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.0001965106932313271, "loss": 1.5002, "step": 3400 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.0001964870078619472, "loss": 1.4868, "step": 3405 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.00019646324381341045, "loss": 1.514, "step": 3410 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.000196439401105095, "loss": 1.4797, "step": 3415 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.00019641547975644304, "loss": 1.4759, "step": 3420 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.00019639147978696097, "loss": 1.5039, "step": 3425 }, { "epoch": 0.18, "grad_norm": 0.490234375, "learning_rate": 0.0001963674012162193, "loss": 1.4789, "step": 3430 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.00019634324406385252, "loss": 1.5134, "step": 3435 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.00019631900834955935, "loss": 1.4714, "step": 3440 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.00019629469409310253, "loss": 1.5027, "step": 3445 }, { "epoch": 0.18, "grad_norm": 0.51171875, "learning_rate": 0.00019627030131430875, "loss": 1.5045, "step": 3450 }, { "epoch": 0.18, "grad_norm": 0.51171875, "learning_rate": 0.0001962458300330689, "loss": 1.5231, "step": 3455 }, { "epoch": 0.18, "grad_norm": 0.52734375, "learning_rate": 0.0001962212802693377, "loss": 1.5213, "step": 3460 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 0.000196196652043134, "loss": 1.5295, "step": 3465 }, { "epoch": 0.18, "grad_norm": 0.4921875, "learning_rate": 0.0001961719453745406, "loss": 1.5081, "step": 3470 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.0001961471602837042, "loss": 1.4992, "step": 3475 }, { "epoch": 0.18, "grad_norm": 0.48828125, "learning_rate": 0.00019612229679083555, "loss": 1.5028, "step": 3480 }, { "epoch": 0.18, "grad_norm": 0.5234375, "learning_rate": 0.0001960973549162093, "loss": 1.5116, "step": 3485 }, { "epoch": 0.18, "grad_norm": 0.494140625, "learning_rate": 0.00019607233468016392, "loss": 1.4824, "step": 3490 }, { "epoch": 0.18, "grad_norm": 0.54296875, "learning_rate": 0.00019604723610310194, "loss": 1.5318, "step": 3495 }, { "epoch": 0.18, "grad_norm": 0.5078125, "learning_rate": 0.00019602205920548965, "loss": 1.5108, "step": 3500 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.0001959968040078572, "loss": 1.5155, "step": 3505 }, { "epoch": 0.18, "grad_norm": 0.51171875, "learning_rate": 0.00019597147053079873, "loss": 1.4939, "step": 3510 }, { "epoch": 0.18, "grad_norm": 0.5390625, "learning_rate": 0.00019594605879497202, "loss": 1.4775, "step": 3515 }, { "epoch": 0.18, "grad_norm": 0.51171875, "learning_rate": 0.00019592056882109885, "loss": 1.5044, "step": 3520 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 0.00019589500062996463, "loss": 1.4672, "step": 3525 }, { "epoch": 0.18, "grad_norm": 0.52734375, "learning_rate": 0.00019586935424241873, "loss": 1.4761, "step": 3530 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.00019584362967937406, "loss": 1.4901, "step": 3535 }, { "epoch": 0.18, "grad_norm": 0.51953125, "learning_rate": 0.00019581782696180748, "loss": 1.4989, "step": 3540 }, { "epoch": 0.18, "grad_norm": 0.5, "learning_rate": 0.0001957919461107595, "loss": 1.5152, "step": 3545 }, { "epoch": 0.18, "grad_norm": 0.5546875, "learning_rate": 0.00019576598714733431, "loss": 1.5188, "step": 3550 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 0.00019573995009269988, "loss": 1.4681, "step": 3555 }, { "epoch": 0.18, "grad_norm": 0.49609375, "learning_rate": 0.00019571383496808775, "loss": 1.4673, "step": 3560 }, { "epoch": 0.18, "grad_norm": 0.51171875, "learning_rate": 0.00019568764179479323, "loss": 1.4654, "step": 3565 }, { "epoch": 0.18, "grad_norm": 0.5078125, "learning_rate": 0.0001956613705941752, "loss": 1.5073, "step": 3570 }, { "epoch": 0.18, "grad_norm": 0.5078125, "learning_rate": 0.00019563502138765618, "loss": 1.4786, "step": 3575 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019560859419672237, "loss": 1.5432, "step": 3580 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019558208904292342, "loss": 1.497, "step": 3585 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.0001955555059478727, "loss": 1.4615, "step": 3590 }, { "epoch": 0.19, "grad_norm": 0.484375, "learning_rate": 0.00019552884493324703, "loss": 1.4729, "step": 3595 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019550210602078684, "loss": 1.5153, "step": 3600 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.000195475289232296, "loss": 1.4976, "step": 3605 }, { "epoch": 0.19, "grad_norm": 0.53125, "learning_rate": 0.00019544839458964202, "loss": 1.5, "step": 3610 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.0001954214221147557, "loss": 1.4885, "step": 3615 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019539437182963153, "loss": 1.4852, "step": 3620 }, { "epoch": 0.19, "grad_norm": 0.486328125, "learning_rate": 0.00019536724375632727, "loss": 1.4751, "step": 3625 }, { "epoch": 0.19, "grad_norm": 0.52734375, "learning_rate": 0.00019534003791696417, "loss": 1.4512, "step": 3630 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019531275433372694, "loss": 1.4634, "step": 3635 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.00019528539302886362, "loss": 1.4936, "step": 3640 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019525795402468567, "loss": 1.4721, "step": 3645 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.00019523043734356787, "loss": 1.5107, "step": 3650 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019520284300794837, "loss": 1.5082, "step": 3655 }, { "epoch": 0.19, "grad_norm": 0.5234375, "learning_rate": 0.00019517517104032864, "loss": 1.4966, "step": 3660 }, { "epoch": 0.19, "grad_norm": 0.50390625, "learning_rate": 0.00019514742146327344, "loss": 1.4804, "step": 3665 }, { "epoch": 0.19, "grad_norm": 0.515625, "learning_rate": 0.00019511959429941087, "loss": 1.5073, "step": 3670 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.0001950916895714322, "loss": 1.5222, "step": 3675 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.000195063707302092, "loss": 1.4928, "step": 3680 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.0001950356475142081, "loss": 1.4753, "step": 3685 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019500751023066154, "loss": 1.4971, "step": 3690 }, { "epoch": 0.19, "grad_norm": 0.490234375, "learning_rate": 0.00019497929547439643, "loss": 1.4678, "step": 3695 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.0001949510032684202, "loss": 1.4907, "step": 3700 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.00019492263363580343, "loss": 1.5243, "step": 3705 }, { "epoch": 0.19, "grad_norm": 0.53515625, "learning_rate": 0.00019489418659967975, "loss": 1.485, "step": 3710 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.00019486566218324597, "loss": 1.4388, "step": 3715 }, { "epoch": 0.19, "grad_norm": 0.48828125, "learning_rate": 0.00019483706040976194, "loss": 1.4807, "step": 3720 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 0.0001948083813025506, "loss": 1.4988, "step": 3725 }, { "epoch": 0.19, "grad_norm": 0.49609375, "learning_rate": 0.0001947796248849981, "loss": 1.4606, "step": 3730 }, { "epoch": 0.19, "grad_norm": 0.53125, "learning_rate": 0.0001947507911805534, "loss": 1.4861, "step": 3735 }, { "epoch": 0.19, "grad_norm": 0.498046875, "learning_rate": 0.00019472188021272868, "loss": 1.4952, "step": 3740 }, { "epoch": 0.19, "grad_norm": 0.5390625, "learning_rate": 0.00019469289200509896, "loss": 1.4767, "step": 3745 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 0.00019466382658130232, "loss": 1.5096, "step": 3750 }, { "epoch": 0.19, "grad_norm": 0.4921875, "learning_rate": 0.00019463468396503989, "loss": 1.4809, "step": 3755 }, { "epoch": 0.19, "grad_norm": 0.5078125, "learning_rate": 0.0001946054641800756, "loss": 1.4876, "step": 3760 }, { "epoch": 0.19, "grad_norm": 0.5, "learning_rate": 0.00019457616725023635, "loss": 1.4583, "step": 3765 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.000194546793199412, "loss": 1.4932, "step": 3770 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.00019451734205155527, "loss": 1.4892, "step": 3775 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.00019448781383068174, "loss": 1.5205, "step": 3780 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.0001944582085608698, "loss": 1.5026, "step": 3785 }, { "epoch": 0.2, "grad_norm": 0.4921875, "learning_rate": 0.00019442852626626076, "loss": 1.4821, "step": 3790 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.0001943987669710586, "loss": 1.5057, "step": 3795 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.0001943689306995303, "loss": 1.466, "step": 3800 }, { "epoch": 0.2, "grad_norm": 0.4921875, "learning_rate": 0.00019433901747600537, "loss": 1.4606, "step": 3805 }, { "epoch": 0.2, "grad_norm": 1.453125, "learning_rate": 0.00019430902732487626, "loss": 1.4627, "step": 3810 }, { "epoch": 0.2, "grad_norm": 2.890625, "learning_rate": 0.00019427896027059802, "loss": 1.4864, "step": 3815 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.00019424881633768853, "loss": 1.497, "step": 3820 }, { "epoch": 0.2, "grad_norm": 0.58203125, "learning_rate": 0.00019421859555072822, "loss": 1.4953, "step": 3825 }, { "epoch": 0.2, "grad_norm": 0.5078125, "learning_rate": 0.0001941882979343603, "loss": 1.4895, "step": 3830 }, { "epoch": 0.2, "grad_norm": 0.5, "learning_rate": 0.00019415792351329058, "loss": 1.4654, "step": 3835 }, { "epoch": 0.2, "grad_norm": 0.53515625, "learning_rate": 0.00019412747231228753, "loss": 1.5066, "step": 3840 }, { "epoch": 0.2, "grad_norm": 0.6015625, "learning_rate": 0.00019409694435618222, "loss": 1.5092, "step": 3845 }, { "epoch": 0.2, "grad_norm": 0.5078125, "learning_rate": 0.00019406633966986828, "loss": 1.4891, "step": 3850 }, { "epoch": 0.2, "grad_norm": 0.51953125, "learning_rate": 0.0001940356582783019, "loss": 1.4741, "step": 3855 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.0001940049002065019, "loss": 1.4906, "step": 3860 }, { "epoch": 0.2, "grad_norm": 0.5234375, "learning_rate": 0.00019397406547954954, "loss": 1.4775, "step": 3865 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.00019394315412258868, "loss": 1.4981, "step": 3870 }, { "epoch": 0.2, "grad_norm": 0.5078125, "learning_rate": 0.00019391216616082552, "loss": 1.4714, "step": 3875 }, { "epoch": 0.2, "grad_norm": 0.54296875, "learning_rate": 0.0001938811016195289, "loss": 1.504, "step": 3880 }, { "epoch": 0.2, "grad_norm": 0.5546875, "learning_rate": 0.00019384996052402995, "loss": 1.5086, "step": 3885 }, { "epoch": 0.2, "grad_norm": 0.52734375, "learning_rate": 0.00019381874289972238, "loss": 1.4597, "step": 3890 }, { "epoch": 0.2, "grad_norm": 0.52734375, "learning_rate": 0.0001937874487720621, "loss": 1.527, "step": 3895 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.00019375607816656768, "loss": 1.4881, "step": 3900 }, { "epoch": 0.2, "grad_norm": 0.51171875, "learning_rate": 0.0001937246311088198, "loss": 1.4398, "step": 3905 }, { "epoch": 0.2, "grad_norm": 0.5234375, "learning_rate": 0.0001936931076244616, "loss": 1.5191, "step": 3910 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.0001936615077391985, "loss": 1.5062, "step": 3915 }, { "epoch": 0.2, "grad_norm": 0.5234375, "learning_rate": 0.00019362983147879826, "loss": 1.5024, "step": 3920 }, { "epoch": 0.2, "grad_norm": 0.5, "learning_rate": 0.00019359807886909093, "loss": 1.4988, "step": 3925 }, { "epoch": 0.2, "grad_norm": 0.5234375, "learning_rate": 0.00019356624993596878, "loss": 1.4855, "step": 3930 }, { "epoch": 0.2, "grad_norm": 0.5, "learning_rate": 0.00019353434470538629, "loss": 1.5001, "step": 3935 }, { "epoch": 0.2, "grad_norm": 0.5078125, "learning_rate": 0.00019350236320336023, "loss": 1.4953, "step": 3940 }, { "epoch": 0.2, "grad_norm": 0.494140625, "learning_rate": 0.0001934703054559695, "loss": 1.4843, "step": 3945 }, { "epoch": 0.2, "grad_norm": 0.57421875, "learning_rate": 0.0001934381714893552, "loss": 1.4679, "step": 3950 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 0.00019340596132972062, "loss": 1.4638, "step": 3955 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 0.0001933736750033311, "loss": 1.465, "step": 3960 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019334131253651414, "loss": 1.4728, "step": 3965 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.00019330887395565936, "loss": 1.4941, "step": 3970 }, { "epoch": 0.21, "grad_norm": 0.5234375, "learning_rate": 0.00019327635928721834, "loss": 1.4869, "step": 3975 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.00019324376855770484, "loss": 1.5167, "step": 3980 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.00019321110179369448, "loss": 1.4614, "step": 3985 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.00019317835902182506, "loss": 1.516, "step": 3990 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.0001931455402687963, "loss": 1.4819, "step": 3995 }, { "epoch": 0.21, "grad_norm": 0.5, "learning_rate": 0.00019311264556136975, "loss": 1.4911, "step": 4000 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.00019307967492636905, "loss": 1.4753, "step": 4005 }, { "epoch": 0.21, "grad_norm": 0.546875, "learning_rate": 0.00019304662839067974, "loss": 1.487, "step": 4010 }, { "epoch": 0.21, "grad_norm": 0.50390625, "learning_rate": 0.00019301350598124913, "loss": 1.4955, "step": 4015 }, { "epoch": 0.21, "grad_norm": 0.54296875, "learning_rate": 0.00019298030772508658, "loss": 1.4849, "step": 4020 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 0.00019294703364926315, "loss": 1.5116, "step": 4025 }, { "epoch": 0.21, "grad_norm": 0.5, "learning_rate": 0.00019291368378091176, "loss": 1.4563, "step": 4030 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.0001928802581472272, "loss": 1.4931, "step": 4035 }, { "epoch": 0.21, "grad_norm": 0.515625, "learning_rate": 0.000192846756775466, "loss": 1.4745, "step": 4040 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019281317969294643, "loss": 1.4956, "step": 4045 }, { "epoch": 0.21, "grad_norm": 0.53515625, "learning_rate": 0.00019277952692704848, "loss": 1.4962, "step": 4050 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 0.00019274579850521393, "loss": 1.4451, "step": 4055 }, { "epoch": 0.21, "grad_norm": 0.5546875, "learning_rate": 0.00019271199445494624, "loss": 1.5035, "step": 4060 }, { "epoch": 0.21, "grad_norm": 0.515625, "learning_rate": 0.00019267811480381042, "loss": 1.4918, "step": 4065 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.0001926441595794333, "loss": 1.4766, "step": 4070 }, { "epoch": 0.21, "grad_norm": 0.5234375, "learning_rate": 0.00019261012880950323, "loss": 1.4601, "step": 4075 }, { "epoch": 0.21, "grad_norm": 0.53515625, "learning_rate": 0.00019257602252177017, "loss": 1.4964, "step": 4080 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 0.00019254184074404568, "loss": 1.4682, "step": 4085 }, { "epoch": 0.21, "grad_norm": 0.490234375, "learning_rate": 0.0001925075835042029, "loss": 1.4707, "step": 4090 }, { "epoch": 0.21, "grad_norm": 0.50390625, "learning_rate": 0.00019247325083017648, "loss": 1.4259, "step": 4095 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019243884274996255, "loss": 1.4643, "step": 4100 }, { "epoch": 0.21, "grad_norm": 0.515625, "learning_rate": 0.00019240435929161878, "loss": 1.4715, "step": 4105 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.00019236980048326427, "loss": 1.4774, "step": 4110 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 0.0001923351663530796, "loss": 1.4701, "step": 4115 }, { "epoch": 0.21, "grad_norm": 0.5390625, "learning_rate": 0.00019230045692930677, "loss": 1.4838, "step": 4120 }, { "epoch": 0.21, "grad_norm": 0.54296875, "learning_rate": 0.00019226567224024912, "loss": 1.4794, "step": 4125 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.0001922308123142714, "loss": 1.4795, "step": 4130 }, { "epoch": 0.21, "grad_norm": 0.52734375, "learning_rate": 0.00019219587717979973, "loss": 1.4763, "step": 4135 }, { "epoch": 0.21, "grad_norm": 0.515625, "learning_rate": 0.00019216086686532153, "loss": 1.4965, "step": 4140 }, { "epoch": 0.21, "grad_norm": 0.5078125, "learning_rate": 0.00019212578139938554, "loss": 1.5114, "step": 4145 }, { "epoch": 0.21, "grad_norm": 0.53125, "learning_rate": 0.00019209062081060178, "loss": 1.4927, "step": 4150 }, { "epoch": 0.21, "grad_norm": 0.51953125, "learning_rate": 0.00019205538512764156, "loss": 1.4886, "step": 4155 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.0001920200743792373, "loss": 1.4667, "step": 4160 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019198468859418278, "loss": 1.4878, "step": 4165 }, { "epoch": 0.22, "grad_norm": 0.50390625, "learning_rate": 0.00019194922780133293, "loss": 1.5009, "step": 4170 }, { "epoch": 0.22, "grad_norm": 0.51171875, "learning_rate": 0.00019191369202960378, "loss": 1.4706, "step": 4175 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019187808130797254, "loss": 1.4836, "step": 4180 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019184239566547755, "loss": 1.4744, "step": 4185 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.00019180663513121825, "loss": 1.4759, "step": 4190 }, { "epoch": 0.22, "grad_norm": 0.4921875, "learning_rate": 0.0001917707997343551, "loss": 1.4465, "step": 4195 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.00019173488950410968, "loss": 1.4752, "step": 4200 }, { "epoch": 0.22, "grad_norm": 0.5, "learning_rate": 0.00019169890446976454, "loss": 1.4593, "step": 4205 }, { "epoch": 0.22, "grad_norm": 0.5078125, "learning_rate": 0.00019166284466066319, "loss": 1.4992, "step": 4210 }, { "epoch": 0.22, "grad_norm": 0.53125, "learning_rate": 0.00019162671010621024, "loss": 1.4524, "step": 4215 }, { "epoch": 0.22, "grad_norm": 0.484375, "learning_rate": 0.0001915905008358711, "loss": 1.4715, "step": 4220 }, { "epoch": 0.22, "grad_norm": 0.54296875, "learning_rate": 0.0001915542168791722, "loss": 1.4902, "step": 4225 }, { "epoch": 0.22, "grad_norm": 0.5, "learning_rate": 0.0001915178582657009, "loss": 1.4699, "step": 4230 }, { "epoch": 0.22, "grad_norm": 0.5234375, "learning_rate": 0.00019148142502510533, "loss": 1.4632, "step": 4235 }, { "epoch": 0.22, "grad_norm": 0.515625, "learning_rate": 0.00019144491718709456, "loss": 1.4785, "step": 4240 }, { "epoch": 0.22, "grad_norm": 0.50390625, "learning_rate": 0.00019140833478143847, "loss": 1.4622, "step": 4245 }, { "epoch": 0.22, "grad_norm": 0.5, "learning_rate": 0.0001913716778379677, "loss": 1.4746, "step": 4250 }, { "epoch": 0.22, "grad_norm": 0.51171875, "learning_rate": 0.00019133494638657374, "loss": 1.4865, "step": 4255 }, { "epoch": 0.22, "grad_norm": 0.53125, "learning_rate": 0.0001912981404572088, "loss": 1.4808, "step": 4260 }, { "epoch": 0.22, "grad_norm": 0.515625, "learning_rate": 0.00019126126007988585, "loss": 1.5115, "step": 4265 }, { "epoch": 0.22, "grad_norm": 0.50390625, "learning_rate": 0.0001912243052846785, "loss": 1.4847, "step": 4270 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.0001911872761017211, "loss": 1.4664, "step": 4275 }, { "epoch": 0.22, "grad_norm": 0.55078125, "learning_rate": 0.00019115017256120866, "loss": 1.4713, "step": 4280 }, { "epoch": 0.22, "grad_norm": 0.5078125, "learning_rate": 0.0001911129946933968, "loss": 1.4886, "step": 4285 }, { "epoch": 0.22, "grad_norm": 0.50390625, "learning_rate": 0.00019107574252860178, "loss": 1.4937, "step": 4290 }, { "epoch": 0.22, "grad_norm": 0.515625, "learning_rate": 0.00019103841609720043, "loss": 1.433, "step": 4295 }, { "epoch": 0.22, "grad_norm": 0.494140625, "learning_rate": 0.0001910010154296301, "loss": 1.4664, "step": 4300 }, { "epoch": 0.22, "grad_norm": 0.51171875, "learning_rate": 0.0001909635405563887, "loss": 1.4946, "step": 4305 }, { "epoch": 0.22, "grad_norm": 0.4921875, "learning_rate": 0.0001909259915080347, "loss": 1.4688, "step": 4310 }, { "epoch": 0.22, "grad_norm": 0.51171875, "learning_rate": 0.000190888368315187, "loss": 1.4631, "step": 4315 }, { "epoch": 0.22, "grad_norm": 0.52734375, "learning_rate": 0.000190850671008525, "loss": 1.458, "step": 4320 }, { "epoch": 0.22, "grad_norm": 0.77734375, "learning_rate": 0.00019081289961878848, "loss": 1.4863, "step": 4325 }, { "epoch": 0.22, "grad_norm": 0.490234375, "learning_rate": 0.00019077505417677764, "loss": 1.4789, "step": 4330 }, { "epoch": 0.22, "grad_norm": 0.51953125, "learning_rate": 0.00019073713471335312, "loss": 1.4919, "step": 4335 }, { "epoch": 0.22, "grad_norm": 0.515625, "learning_rate": 0.00019069914125943586, "loss": 1.4988, "step": 4340 }, { "epoch": 0.22, "grad_norm": 0.515625, "learning_rate": 0.0001906610738460072, "loss": 1.5027, "step": 4345 }, { "epoch": 0.23, "grad_norm": 1.3203125, "learning_rate": 0.00019062293250410873, "loss": 1.4981, "step": 4350 }, { "epoch": 0.23, "grad_norm": 0.5546875, "learning_rate": 0.00019058471726484232, "loss": 1.4197, "step": 4355 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.00019054642815937012, "loss": 1.4616, "step": 4360 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.00019050806521891456, "loss": 1.4806, "step": 4365 }, { "epoch": 0.23, "grad_norm": 0.490234375, "learning_rate": 0.0001904696284747582, "loss": 1.4921, "step": 4370 }, { "epoch": 0.23, "grad_norm": 0.50390625, "learning_rate": 0.00019043111795824383, "loss": 1.4368, "step": 4375 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.00019039253370077436, "loss": 1.4956, "step": 4380 }, { "epoch": 0.23, "grad_norm": 0.5390625, "learning_rate": 0.0001903538757338129, "loss": 1.4884, "step": 4385 }, { "epoch": 0.23, "grad_norm": 0.54296875, "learning_rate": 0.00019031514408888257, "loss": 1.5047, "step": 4390 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.00019027633879756663, "loss": 1.441, "step": 4395 }, { "epoch": 0.23, "grad_norm": 0.53515625, "learning_rate": 0.0001902374598915084, "loss": 1.4652, "step": 4400 }, { "epoch": 0.23, "grad_norm": 0.53515625, "learning_rate": 0.0001901985074024112, "loss": 1.4759, "step": 4405 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.00019015948136203836, "loss": 1.5075, "step": 4410 }, { "epoch": 0.23, "grad_norm": 0.51171875, "learning_rate": 0.00019012038180221322, "loss": 1.5099, "step": 4415 }, { "epoch": 0.23, "grad_norm": 0.515625, "learning_rate": 0.00019008120875481897, "loss": 1.4607, "step": 4420 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.00019004196225179886, "loss": 1.4744, "step": 4425 }, { "epoch": 0.23, "grad_norm": 0.5390625, "learning_rate": 0.00019000264232515594, "loss": 1.488, "step": 4430 }, { "epoch": 0.23, "grad_norm": 0.51953125, "learning_rate": 0.00018996324900695318, "loss": 1.462, "step": 4435 }, { "epoch": 0.23, "grad_norm": 0.49609375, "learning_rate": 0.0001899237823293134, "loss": 1.4705, "step": 4440 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.00018988424232441918, "loss": 1.4757, "step": 4445 }, { "epoch": 0.23, "grad_norm": 0.52734375, "learning_rate": 0.000189844629024513, "loss": 1.4953, "step": 4450 }, { "epoch": 0.23, "grad_norm": 0.51953125, "learning_rate": 0.00018980494246189698, "loss": 1.5236, "step": 4455 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.00018976518266893304, "loss": 1.4874, "step": 4460 }, { "epoch": 0.23, "grad_norm": 0.51953125, "learning_rate": 0.00018972534967804286, "loss": 1.4537, "step": 4465 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.00018968544352170776, "loss": 1.4815, "step": 4470 }, { "epoch": 0.23, "grad_norm": 0.5078125, "learning_rate": 0.00018964546423246871, "loss": 1.4856, "step": 4475 }, { "epoch": 0.23, "grad_norm": 0.5234375, "learning_rate": 0.00018960541184292638, "loss": 1.4881, "step": 4480 }, { "epoch": 0.23, "grad_norm": 0.5234375, "learning_rate": 0.00018956528638574096, "loss": 1.4695, "step": 4485 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.00018952508789363227, "loss": 1.4625, "step": 4490 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.0001894848163993797, "loss": 1.475, "step": 4495 }, { "epoch": 0.23, "grad_norm": 0.5390625, "learning_rate": 0.00018944447193582217, "loss": 1.4838, "step": 4500 }, { "epoch": 0.23, "grad_norm": 0.55078125, "learning_rate": 0.00018940405453585798, "loss": 1.4659, "step": 4505 }, { "epoch": 0.23, "grad_norm": 0.53515625, "learning_rate": 0.00018936356423244512, "loss": 1.5054, "step": 4510 }, { "epoch": 0.23, "grad_norm": 0.515625, "learning_rate": 0.0001893230010586009, "loss": 1.4518, "step": 4515 }, { "epoch": 0.23, "grad_norm": 0.5, "learning_rate": 0.000189282365047402, "loss": 1.4761, "step": 4520 }, { "epoch": 0.23, "grad_norm": 0.5546875, "learning_rate": 0.00018924165623198462, "loss": 1.4798, "step": 4525 }, { "epoch": 0.23, "grad_norm": 0.50390625, "learning_rate": 0.00018920087464554427, "loss": 1.4675, "step": 4530 }, { "epoch": 0.23, "grad_norm": 0.53125, "learning_rate": 0.00018916002032133574, "loss": 1.4589, "step": 4535 }, { "epoch": 0.23, "grad_norm": 0.50390625, "learning_rate": 0.00018911909329267325, "loss": 1.4513, "step": 4540 }, { "epoch": 0.24, "grad_norm": 0.51953125, "learning_rate": 0.00018907809359293025, "loss": 1.4666, "step": 4545 }, { "epoch": 0.24, "grad_norm": 0.5, "learning_rate": 0.0001890370212555394, "loss": 1.4913, "step": 4550 }, { "epoch": 0.24, "grad_norm": 0.53515625, "learning_rate": 0.00018899587631399266, "loss": 1.4915, "step": 4555 }, { "epoch": 0.24, "grad_norm": 0.51953125, "learning_rate": 0.0001889546588018412, "loss": 1.476, "step": 4560 }, { "epoch": 0.24, "grad_norm": 0.51953125, "learning_rate": 0.0001889133687526953, "loss": 1.4865, "step": 4565 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.00018887200620022442, "loss": 1.4912, "step": 4570 }, { "epoch": 0.24, "grad_norm": 0.5234375, "learning_rate": 0.0001888305711781572, "loss": 1.493, "step": 4575 }, { "epoch": 0.24, "grad_norm": 0.5234375, "learning_rate": 0.0001887890637202813, "loss": 1.5079, "step": 4580 }, { "epoch": 0.24, "grad_norm": 0.50390625, "learning_rate": 0.00018874748386044345, "loss": 1.448, "step": 4585 }, { "epoch": 0.24, "grad_norm": 0.50390625, "learning_rate": 0.00018870583163254948, "loss": 1.496, "step": 4590 }, { "epoch": 0.24, "grad_norm": 0.5078125, "learning_rate": 0.00018866410707056417, "loss": 1.5013, "step": 4595 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.0001886223102085113, "loss": 1.4647, "step": 4600 }, { "epoch": 0.24, "grad_norm": 0.54296875, "learning_rate": 0.00018858044108047365, "loss": 1.4696, "step": 4605 }, { "epoch": 0.24, "grad_norm": 0.515625, "learning_rate": 0.00018853849972059282, "loss": 1.4539, "step": 4610 }, { "epoch": 0.24, "grad_norm": 0.494140625, "learning_rate": 0.00018849648616306943, "loss": 1.4826, "step": 4615 }, { "epoch": 0.24, "grad_norm": 0.6328125, "learning_rate": 0.00018845440044216294, "loss": 1.4955, "step": 4620 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.0001884122425921916, "loss": 1.4647, "step": 4625 }, { "epoch": 0.24, "grad_norm": 0.52734375, "learning_rate": 0.00018837001264753256, "loss": 1.4642, "step": 4630 }, { "epoch": 0.24, "grad_norm": 0.5703125, "learning_rate": 0.00018832771064262167, "loss": 1.5195, "step": 4635 }, { "epoch": 0.24, "grad_norm": 0.46875, "learning_rate": 0.0001882853366119536, "loss": 1.4337, "step": 4640 }, { "epoch": 0.24, "grad_norm": 0.5078125, "learning_rate": 0.00018824289059008175, "loss": 1.4322, "step": 4645 }, { "epoch": 0.24, "grad_norm": 0.5, "learning_rate": 0.0001882003726116182, "loss": 1.4621, "step": 4650 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.00018815778271123374, "loss": 1.4737, "step": 4655 }, { "epoch": 0.24, "grad_norm": 0.53125, "learning_rate": 0.00018811512092365776, "loss": 1.481, "step": 4660 }, { "epoch": 0.24, "grad_norm": 0.52734375, "learning_rate": 0.00018807238728367828, "loss": 1.4978, "step": 4665 }, { "epoch": 0.24, "grad_norm": 0.515625, "learning_rate": 0.000188029581826142, "loss": 1.491, "step": 4670 }, { "epoch": 0.24, "grad_norm": 0.53125, "learning_rate": 0.00018798670458595402, "loss": 1.4686, "step": 4675 }, { "epoch": 0.24, "grad_norm": 0.482421875, "learning_rate": 0.0001879437555980781, "loss": 1.4545, "step": 4680 }, { "epoch": 0.24, "grad_norm": 0.53125, "learning_rate": 0.0001879007348975365, "loss": 1.4859, "step": 4685 }, { "epoch": 0.24, "grad_norm": 0.50390625, "learning_rate": 0.0001878576425194099, "loss": 1.4621, "step": 4690 }, { "epoch": 0.24, "grad_norm": 0.515625, "learning_rate": 0.00018781447849883744, "loss": 1.502, "step": 4695 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.00018777124287101672, "loss": 1.4946, "step": 4700 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.0001877279356712037, "loss": 1.4527, "step": 4705 }, { "epoch": 0.24, "grad_norm": 0.51171875, "learning_rate": 0.00018768455693471273, "loss": 1.4475, "step": 4710 }, { "epoch": 0.24, "grad_norm": 0.5546875, "learning_rate": 0.0001876411066969164, "loss": 1.4761, "step": 4715 }, { "epoch": 0.24, "grad_norm": 0.51953125, "learning_rate": 0.00018759758499324578, "loss": 1.4905, "step": 4720 }, { "epoch": 0.24, "grad_norm": 0.515625, "learning_rate": 0.00018755399185919002, "loss": 1.4484, "step": 4725 }, { "epoch": 0.24, "grad_norm": 0.5, "learning_rate": 0.0001875103273302967, "loss": 1.4452, "step": 4730 }, { "epoch": 0.24, "grad_norm": 0.51953125, "learning_rate": 0.00018746659144217148, "loss": 1.4934, "step": 4735 }, { "epoch": 0.25, "grad_norm": 0.5234375, "learning_rate": 0.00018742278423047824, "loss": 1.4497, "step": 4740 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.00018737890573093907, "loss": 1.4335, "step": 4745 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.00018733495597933412, "loss": 1.4861, "step": 4750 }, { "epoch": 0.25, "grad_norm": 0.515625, "learning_rate": 0.00018729093501150174, "loss": 1.4859, "step": 4755 }, { "epoch": 0.25, "grad_norm": 0.52734375, "learning_rate": 0.00018724684286333822, "loss": 1.4934, "step": 4760 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 0.00018720267957079805, "loss": 1.4894, "step": 4765 }, { "epoch": 0.25, "grad_norm": 0.515625, "learning_rate": 0.0001871584451698936, "loss": 1.4395, "step": 4770 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.00018711413969669526, "loss": 1.4548, "step": 4775 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 0.00018706976318733141, "loss": 1.486, "step": 4780 }, { "epoch": 0.25, "grad_norm": 0.52734375, "learning_rate": 0.00018702531567798837, "loss": 1.4525, "step": 4785 }, { "epoch": 0.25, "grad_norm": 0.515625, "learning_rate": 0.00018698079720491024, "loss": 1.5032, "step": 4790 }, { "epoch": 0.25, "grad_norm": 0.51171875, "learning_rate": 0.00018693620780439916, "loss": 1.4398, "step": 4795 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.00018689154751281494, "loss": 1.4601, "step": 4800 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.00018684681636657529, "loss": 1.4332, "step": 4805 }, { "epoch": 0.25, "grad_norm": 0.53125, "learning_rate": 0.0001868020144021557, "loss": 1.4677, "step": 4810 }, { "epoch": 0.25, "grad_norm": 0.5390625, "learning_rate": 0.00018675714165608935, "loss": 1.445, "step": 4815 }, { "epoch": 0.25, "grad_norm": 0.50390625, "learning_rate": 0.00018671219816496722, "loss": 1.4806, "step": 4820 }, { "epoch": 0.25, "grad_norm": 0.51171875, "learning_rate": 0.00018666718396543792, "loss": 1.4517, "step": 4825 }, { "epoch": 0.25, "grad_norm": 0.53125, "learning_rate": 0.00018662209909420772, "loss": 1.4778, "step": 4830 }, { "epoch": 0.25, "grad_norm": 0.5546875, "learning_rate": 0.0001865769435880405, "loss": 1.4846, "step": 4835 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.00018653171748375785, "loss": 1.4785, "step": 4840 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 0.00018648642081823877, "loss": 1.4718, "step": 4845 }, { "epoch": 0.25, "grad_norm": 0.515625, "learning_rate": 0.0001864410536284199, "loss": 1.4274, "step": 4850 }, { "epoch": 0.25, "grad_norm": 0.52734375, "learning_rate": 0.00018639561595129537, "loss": 1.5004, "step": 4855 }, { "epoch": 0.25, "grad_norm": 0.53125, "learning_rate": 0.0001863501078239168, "loss": 1.4704, "step": 4860 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 0.0001863045292833932, "loss": 1.4659, "step": 4865 }, { "epoch": 0.25, "grad_norm": 0.55078125, "learning_rate": 0.00018625888036689103, "loss": 1.4741, "step": 4870 }, { "epoch": 0.25, "grad_norm": 0.53515625, "learning_rate": 0.0001862131611116342, "loss": 1.5111, "step": 4875 }, { "epoch": 0.25, "grad_norm": 0.5, "learning_rate": 0.0001861673715549039, "loss": 1.4657, "step": 4880 }, { "epoch": 0.25, "grad_norm": 0.53125, "learning_rate": 0.0001861215117340386, "loss": 1.4569, "step": 4885 }, { "epoch": 0.25, "grad_norm": 0.51171875, "learning_rate": 0.00018607558168643422, "loss": 1.4685, "step": 4890 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 0.0001860295814495438, "loss": 1.4891, "step": 4895 }, { "epoch": 0.25, "grad_norm": 0.55078125, "learning_rate": 0.00018598351106087772, "loss": 1.4572, "step": 4900 }, { "epoch": 0.25, "grad_norm": 0.51953125, "learning_rate": 0.0001859373705580035, "loss": 1.4894, "step": 4905 }, { "epoch": 0.25, "grad_norm": 0.515625, "learning_rate": 0.00018589115997854586, "loss": 1.4507, "step": 4910 }, { "epoch": 0.25, "grad_norm": 0.53515625, "learning_rate": 0.00018584487936018661, "loss": 1.4602, "step": 4915 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 0.00018579852874066476, "loss": 1.4457, "step": 4920 }, { "epoch": 0.25, "grad_norm": 0.50390625, "learning_rate": 0.0001857521081577764, "loss": 1.462, "step": 4925 }, { "epoch": 0.26, "grad_norm": 0.51953125, "learning_rate": 0.0001857056176493745, "loss": 1.4695, "step": 4930 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018565905725336933, "loss": 1.4426, "step": 4935 }, { "epoch": 0.26, "grad_norm": 0.51953125, "learning_rate": 0.00018561242700772788, "loss": 1.5005, "step": 4940 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.00018556572695047427, "loss": 1.4763, "step": 4945 }, { "epoch": 0.26, "grad_norm": 0.498046875, "learning_rate": 0.0001855189571196895, "loss": 1.4547, "step": 4950 }, { "epoch": 0.26, "grad_norm": 0.5390625, "learning_rate": 0.00018547211755351147, "loss": 1.4776, "step": 4955 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.0001854252082901349, "loss": 1.4449, "step": 4960 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.00018537822936781132, "loss": 1.4937, "step": 4965 }, { "epoch": 0.26, "grad_norm": 0.52734375, "learning_rate": 0.00018533118082484927, "loss": 1.4612, "step": 4970 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.0001852840626996138, "loss": 1.4653, "step": 4975 }, { "epoch": 0.26, "grad_norm": 0.5, "learning_rate": 0.00018523687503052685, "loss": 1.4569, "step": 4980 }, { "epoch": 0.26, "grad_norm": 0.5, "learning_rate": 0.00018518961785606703, "loss": 1.4476, "step": 4985 }, { "epoch": 0.26, "grad_norm": 0.5390625, "learning_rate": 0.00018514229121476962, "loss": 1.4912, "step": 4990 }, { "epoch": 0.26, "grad_norm": 0.51171875, "learning_rate": 0.00018509489514522657, "loss": 1.4891, "step": 4995 }, { "epoch": 0.26, "grad_norm": 0.52734375, "learning_rate": 0.00018504742968608639, "loss": 1.4496, "step": 5000 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.00018499989487605423, "loss": 1.4243, "step": 5005 }, { "epoch": 0.26, "grad_norm": 0.50390625, "learning_rate": 0.00018495229075389183, "loss": 1.4679, "step": 5010 }, { "epoch": 0.26, "grad_norm": 0.52734375, "learning_rate": 0.00018490461735841732, "loss": 1.4744, "step": 5015 }, { "epoch": 0.26, "grad_norm": 0.51953125, "learning_rate": 0.0001848568747285054, "loss": 1.483, "step": 5020 }, { "epoch": 0.26, "grad_norm": 0.53125, "learning_rate": 0.00018480906290308722, "loss": 1.4682, "step": 5025 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018476118192115037, "loss": 1.4677, "step": 5030 }, { "epoch": 0.26, "grad_norm": 0.5, "learning_rate": 0.00018471323182173884, "loss": 1.4572, "step": 5035 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.00018466521264395288, "loss": 1.4501, "step": 5040 }, { "epoch": 0.26, "grad_norm": 0.51171875, "learning_rate": 0.0001846171244269492, "loss": 1.486, "step": 5045 }, { "epoch": 0.26, "grad_norm": 0.51171875, "learning_rate": 0.00018456896720994072, "loss": 1.484, "step": 5050 }, { "epoch": 0.26, "grad_norm": 0.54296875, "learning_rate": 0.0001845207410321967, "loss": 1.4618, "step": 5055 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018447244593304253, "loss": 1.4758, "step": 5060 }, { "epoch": 0.26, "grad_norm": 0.515625, "learning_rate": 0.0001844240819518599, "loss": 1.4699, "step": 5065 }, { "epoch": 0.26, "grad_norm": 0.5234375, "learning_rate": 0.00018437564912808665, "loss": 1.4397, "step": 5070 }, { "epoch": 0.26, "grad_norm": 0.50390625, "learning_rate": 0.0001843271475012167, "loss": 1.4299, "step": 5075 }, { "epoch": 0.26, "grad_norm": 0.5546875, "learning_rate": 0.00018427857711080013, "loss": 1.4455, "step": 5080 }, { "epoch": 0.26, "grad_norm": 0.5078125, "learning_rate": 0.00018422993799644302, "loss": 1.5103, "step": 5085 }, { "epoch": 0.26, "grad_norm": 0.53515625, "learning_rate": 0.00018418123019780765, "loss": 1.5017, "step": 5090 }, { "epoch": 0.26, "grad_norm": 0.5390625, "learning_rate": 0.0001841324537546121, "loss": 1.481, "step": 5095 }, { "epoch": 0.26, "grad_norm": 0.51171875, "learning_rate": 0.00018408360870663063, "loss": 1.4721, "step": 5100 }, { "epoch": 0.26, "grad_norm": 0.4921875, "learning_rate": 0.0001840346950936932, "loss": 1.4436, "step": 5105 }, { "epoch": 0.26, "grad_norm": 0.498046875, "learning_rate": 0.00018398571295568595, "loss": 1.4581, "step": 5110 }, { "epoch": 0.26, "grad_norm": 0.51171875, "learning_rate": 0.00018393666233255073, "loss": 1.4752, "step": 5115 }, { "epoch": 0.26, "grad_norm": 0.51171875, "learning_rate": 0.00018388754326428524, "loss": 1.5162, "step": 5120 }, { "epoch": 0.27, "grad_norm": 0.5390625, "learning_rate": 0.00018383835579094304, "loss": 1.4762, "step": 5125 }, { "epoch": 0.27, "grad_norm": 0.53125, "learning_rate": 0.0001837890999526335, "loss": 1.431, "step": 5130 }, { "epoch": 0.27, "grad_norm": 0.5390625, "learning_rate": 0.0001837397757895216, "loss": 1.4589, "step": 5135 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018369038334182825, "loss": 1.4569, "step": 5140 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018364092264982985, "loss": 1.4379, "step": 5145 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018359139375385852, "loss": 1.4665, "step": 5150 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.000183541796694302, "loss": 1.4636, "step": 5155 }, { "epoch": 0.27, "grad_norm": 0.50390625, "learning_rate": 0.00018349213151160366, "loss": 1.4701, "step": 5160 }, { "epoch": 0.27, "grad_norm": 0.5234375, "learning_rate": 0.00018344239824626227, "loss": 1.4937, "step": 5165 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.0001833925969388323, "loss": 1.5028, "step": 5170 }, { "epoch": 0.27, "grad_norm": 0.5078125, "learning_rate": 0.00018334272762992354, "loss": 1.4863, "step": 5175 }, { "epoch": 0.27, "grad_norm": 0.50390625, "learning_rate": 0.0001832927903602014, "loss": 1.4618, "step": 5180 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.0001832427851703866, "loss": 1.5137, "step": 5185 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018319271210125523, "loss": 1.4636, "step": 5190 }, { "epoch": 0.27, "grad_norm": 0.50390625, "learning_rate": 0.00018314257119363876, "loss": 1.4666, "step": 5195 }, { "epoch": 0.27, "grad_norm": 0.52734375, "learning_rate": 0.00018309236248842403, "loss": 1.4487, "step": 5200 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018304208602655306, "loss": 1.4146, "step": 5205 }, { "epoch": 0.27, "grad_norm": 0.5078125, "learning_rate": 0.00018299174184902323, "loss": 1.442, "step": 5210 }, { "epoch": 0.27, "grad_norm": 0.498046875, "learning_rate": 0.0001829413299968871, "loss": 1.4594, "step": 5215 }, { "epoch": 0.27, "grad_norm": 0.498046875, "learning_rate": 0.00018289085051125233, "loss": 1.4576, "step": 5220 }, { "epoch": 0.27, "grad_norm": 0.50390625, "learning_rate": 0.00018284030343328181, "loss": 1.4952, "step": 5225 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.00018278968880419363, "loss": 1.4293, "step": 5230 }, { "epoch": 0.27, "grad_norm": 0.50390625, "learning_rate": 0.00018273900666526078, "loss": 1.4402, "step": 5235 }, { "epoch": 0.27, "grad_norm": 0.53515625, "learning_rate": 0.00018268825705781145, "loss": 1.4513, "step": 5240 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018263744002322874, "loss": 1.4658, "step": 5245 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018258655560295087, "loss": 1.4694, "step": 5250 }, { "epoch": 0.27, "grad_norm": 0.51953125, "learning_rate": 0.00018253560383847082, "loss": 1.4721, "step": 5255 }, { "epoch": 0.27, "grad_norm": 0.53125, "learning_rate": 0.00018248458477133662, "loss": 1.4458, "step": 5260 }, { "epoch": 0.27, "grad_norm": 0.52734375, "learning_rate": 0.00018243349844315117, "loss": 1.4631, "step": 5265 }, { "epoch": 0.27, "grad_norm": 0.53125, "learning_rate": 0.00018238234489557215, "loss": 1.4368, "step": 5270 }, { "epoch": 0.27, "grad_norm": 0.51953125, "learning_rate": 0.0001823311241703122, "loss": 1.4617, "step": 5275 }, { "epoch": 0.27, "grad_norm": 0.5546875, "learning_rate": 0.0001822798363091385, "loss": 1.4589, "step": 5280 }, { "epoch": 0.27, "grad_norm": 0.51171875, "learning_rate": 0.00018222848135387323, "loss": 1.4841, "step": 5285 }, { "epoch": 0.27, "grad_norm": 0.51953125, "learning_rate": 0.0001821770593463931, "loss": 1.4629, "step": 5290 }, { "epoch": 0.27, "grad_norm": 0.515625, "learning_rate": 0.00018212557032862953, "loss": 1.4797, "step": 5295 }, { "epoch": 0.27, "grad_norm": 0.53125, "learning_rate": 0.0001820740143425687, "loss": 1.4725, "step": 5300 }, { "epoch": 0.27, "grad_norm": 0.5078125, "learning_rate": 0.00018202239143025125, "loss": 1.4393, "step": 5305 }, { "epoch": 0.27, "grad_norm": 0.49609375, "learning_rate": 0.00018197070163377248, "loss": 1.4215, "step": 5310 }, { "epoch": 0.27, "grad_norm": 0.53515625, "learning_rate": 0.0001819189449952822, "loss": 1.5017, "step": 5315 }, { "epoch": 0.28, "grad_norm": 0.5078125, "learning_rate": 0.00018186712155698475, "loss": 1.4348, "step": 5320 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.0001818152313611389, "loss": 1.4518, "step": 5325 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.00018176327445005788, "loss": 1.4392, "step": 5330 }, { "epoch": 0.28, "grad_norm": 0.5390625, "learning_rate": 0.0001817112508661093, "loss": 1.4806, "step": 5335 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.0001816591606517152, "loss": 1.4508, "step": 5340 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.0001816070038493519, "loss": 1.4718, "step": 5345 }, { "epoch": 0.28, "grad_norm": 0.5546875, "learning_rate": 0.00018155478050155, "loss": 1.4973, "step": 5350 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.00018150249065089445, "loss": 1.466, "step": 5355 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.00018145013434002434, "loss": 1.4631, "step": 5360 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.00018139771161163295, "loss": 1.4552, "step": 5365 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.00018134522250846783, "loss": 1.4604, "step": 5370 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.00018129266707333052, "loss": 1.4751, "step": 5375 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.00018124004534907675, "loss": 1.4653, "step": 5380 }, { "epoch": 0.28, "grad_norm": 0.498046875, "learning_rate": 0.00018118735737861625, "loss": 1.4767, "step": 5385 }, { "epoch": 0.28, "grad_norm": 0.494140625, "learning_rate": 0.00018113460320491278, "loss": 1.4715, "step": 5390 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.0001810817828709841, "loss": 1.4591, "step": 5395 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.0001810288964199019, "loss": 1.458, "step": 5400 }, { "epoch": 0.28, "grad_norm": 0.52734375, "learning_rate": 0.00018097594389479178, "loss": 1.4588, "step": 5405 }, { "epoch": 0.28, "grad_norm": 0.474609375, "learning_rate": 0.00018092292533883325, "loss": 1.4609, "step": 5410 }, { "epoch": 0.28, "grad_norm": 0.51953125, "learning_rate": 0.00018086984079525965, "loss": 1.4566, "step": 5415 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.00018081669030735814, "loss": 1.4482, "step": 5420 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.0001807634739184696, "loss": 1.4717, "step": 5425 }, { "epoch": 0.28, "grad_norm": 0.55078125, "learning_rate": 0.00018071019167198872, "loss": 1.4687, "step": 5430 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.0001806568436113638, "loss": 1.4822, "step": 5435 }, { "epoch": 0.28, "grad_norm": 0.52734375, "learning_rate": 0.00018060342978009697, "loss": 1.489, "step": 5440 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.00018054995022174377, "loss": 1.4717, "step": 5445 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.00018049640497991355, "loss": 1.4647, "step": 5450 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.0001804427940982691, "loss": 1.4401, "step": 5455 }, { "epoch": 0.28, "grad_norm": 0.51171875, "learning_rate": 0.00018038911762052675, "loss": 1.4547, "step": 5460 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.00018033537559045633, "loss": 1.4169, "step": 5465 }, { "epoch": 0.28, "grad_norm": 0.52734375, "learning_rate": 0.00018028156805188113, "loss": 1.4402, "step": 5470 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 0.00018022769504867788, "loss": 1.3978, "step": 5475 }, { "epoch": 0.28, "grad_norm": 0.53515625, "learning_rate": 0.00018017375662477658, "loss": 1.4334, "step": 5480 }, { "epoch": 0.28, "grad_norm": 0.5078125, "learning_rate": 0.00018011975282416077, "loss": 1.5001, "step": 5485 }, { "epoch": 0.28, "grad_norm": 0.51171875, "learning_rate": 0.00018006568369086708, "loss": 1.471, "step": 5490 }, { "epoch": 0.28, "grad_norm": 0.52734375, "learning_rate": 0.00018001154926898565, "loss": 1.472, "step": 5495 }, { "epoch": 0.28, "grad_norm": 0.515625, "learning_rate": 0.00017995734960265963, "loss": 1.4583, "step": 5500 }, { "epoch": 0.28, "grad_norm": 0.53125, "learning_rate": 0.00017990308473608555, "loss": 1.4341, "step": 5505 }, { "epoch": 0.29, "grad_norm": 0.50390625, "learning_rate": 0.00017984875471351302, "loss": 1.4571, "step": 5510 }, { "epoch": 0.29, "grad_norm": 0.5546875, "learning_rate": 0.00017979435957924476, "loss": 1.4274, "step": 5515 }, { "epoch": 0.29, "grad_norm": 0.53515625, "learning_rate": 0.00017973989937763665, "loss": 1.436, "step": 5520 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.0001796853741530976, "loss": 1.4752, "step": 5525 }, { "epoch": 0.29, "grad_norm": 1.0078125, "learning_rate": 0.0001796307839500895, "loss": 1.4609, "step": 5530 }, { "epoch": 0.29, "grad_norm": 0.51171875, "learning_rate": 0.00017957612881312732, "loss": 1.4496, "step": 5535 }, { "epoch": 0.29, "grad_norm": 0.51171875, "learning_rate": 0.00017952140878677895, "loss": 1.4494, "step": 5540 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.0001794666239156651, "loss": 1.4503, "step": 5545 }, { "epoch": 0.29, "grad_norm": 0.52734375, "learning_rate": 0.00017941177424445943, "loss": 1.4531, "step": 5550 }, { "epoch": 0.29, "grad_norm": 0.5234375, "learning_rate": 0.00017935685981788847, "loss": 1.4794, "step": 5555 }, { "epoch": 0.29, "grad_norm": 0.50390625, "learning_rate": 0.00017930188068073153, "loss": 1.4549, "step": 5560 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017924683687782066, "loss": 1.4862, "step": 5565 }, { "epoch": 0.29, "grad_norm": 0.53515625, "learning_rate": 0.00017919172845404067, "loss": 1.4818, "step": 5570 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017913655545432903, "loss": 1.4655, "step": 5575 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.00017908131792367587, "loss": 1.4568, "step": 5580 }, { "epoch": 0.29, "grad_norm": 0.52734375, "learning_rate": 0.00017902601590712408, "loss": 1.4522, "step": 5585 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.00017897064944976887, "loss": 1.4274, "step": 5590 }, { "epoch": 0.29, "grad_norm": 0.5234375, "learning_rate": 0.00017891521859675824, "loss": 1.4413, "step": 5595 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017885972339329255, "loss": 1.4872, "step": 5600 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017880416388462472, "loss": 1.4727, "step": 5605 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.0001787485401160601, "loss": 1.4454, "step": 5610 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017869285213295634, "loss": 1.4622, "step": 5615 }, { "epoch": 0.29, "grad_norm": 0.5234375, "learning_rate": 0.0001786370999807236, "loss": 1.4323, "step": 5620 }, { "epoch": 0.29, "grad_norm": 0.5234375, "learning_rate": 0.00017858128370482426, "loss": 1.4826, "step": 5625 }, { "epoch": 0.29, "grad_norm": 0.52734375, "learning_rate": 0.00017852540335077302, "loss": 1.468, "step": 5630 }, { "epoch": 0.29, "grad_norm": 0.498046875, "learning_rate": 0.00017846945896413685, "loss": 1.4539, "step": 5635 }, { "epoch": 0.29, "grad_norm": 0.53125, "learning_rate": 0.00017841345059053492, "loss": 1.4769, "step": 5640 }, { "epoch": 0.29, "grad_norm": 0.53515625, "learning_rate": 0.00017835737827563857, "loss": 1.4582, "step": 5645 }, { "epoch": 0.29, "grad_norm": 0.52734375, "learning_rate": 0.00017830124206517128, "loss": 1.4439, "step": 5650 }, { "epoch": 0.29, "grad_norm": 0.546875, "learning_rate": 0.00017824504200490866, "loss": 1.4926, "step": 5655 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.00017818877814067833, "loss": 1.4687, "step": 5660 }, { "epoch": 0.29, "grad_norm": 0.5703125, "learning_rate": 0.00017813245051836, "loss": 1.4605, "step": 5665 }, { "epoch": 0.29, "grad_norm": 0.51953125, "learning_rate": 0.00017807605918388538, "loss": 1.4395, "step": 5670 }, { "epoch": 0.29, "grad_norm": 0.51171875, "learning_rate": 0.00017801960418323802, "loss": 1.4392, "step": 5675 }, { "epoch": 0.29, "grad_norm": 0.5390625, "learning_rate": 0.0001779630855624535, "loss": 1.4397, "step": 5680 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.00017790650336761926, "loss": 1.4667, "step": 5685 }, { "epoch": 0.29, "grad_norm": 0.53515625, "learning_rate": 0.00017784985764487455, "loss": 1.4608, "step": 5690 }, { "epoch": 0.29, "grad_norm": 0.5078125, "learning_rate": 0.00017779314844041047, "loss": 1.4725, "step": 5695 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 0.00017773637580046974, "loss": 1.4486, "step": 5700 }, { "epoch": 0.3, "grad_norm": 0.52734375, "learning_rate": 0.00017767953977134704, "loss": 1.4713, "step": 5705 }, { "epoch": 0.3, "grad_norm": 0.5234375, "learning_rate": 0.00017762264039938855, "loss": 1.4404, "step": 5710 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.0001775656777309922, "loss": 1.4726, "step": 5715 }, { "epoch": 0.3, "grad_norm": 0.51953125, "learning_rate": 0.0001775086518126075, "loss": 1.472, "step": 5720 }, { "epoch": 0.3, "grad_norm": 0.51171875, "learning_rate": 0.00017745156269073555, "loss": 1.412, "step": 5725 }, { "epoch": 0.3, "grad_norm": 0.53125, "learning_rate": 0.00017739441041192896, "loss": 1.4437, "step": 5730 }, { "epoch": 0.3, "grad_norm": 0.51171875, "learning_rate": 0.00017733719502279185, "loss": 1.4726, "step": 5735 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.00017727991656997985, "loss": 1.4665, "step": 5740 }, { "epoch": 0.3, "grad_norm": 0.546875, "learning_rate": 0.00017722257510019996, "loss": 1.4804, "step": 5745 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.00017716517066021056, "loss": 1.4558, "step": 5750 }, { "epoch": 0.3, "grad_norm": 0.5, "learning_rate": 0.00017710770329682144, "loss": 1.4385, "step": 5755 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.00017705017305689365, "loss": 1.4651, "step": 5760 }, { "epoch": 0.3, "grad_norm": 0.53125, "learning_rate": 0.00017699257998733952, "loss": 1.4543, "step": 5765 }, { "epoch": 0.3, "grad_norm": 0.515625, "learning_rate": 0.00017693492413512263, "loss": 1.4433, "step": 5770 }, { "epoch": 0.3, "grad_norm": 0.515625, "learning_rate": 0.00017687720554725772, "loss": 1.4974, "step": 5775 }, { "epoch": 0.3, "grad_norm": 0.515625, "learning_rate": 0.00017681942427081072, "loss": 1.4828, "step": 5780 }, { "epoch": 0.3, "grad_norm": 0.49609375, "learning_rate": 0.00017676158035289868, "loss": 1.4557, "step": 5785 }, { "epoch": 0.3, "grad_norm": 0.48828125, "learning_rate": 0.00017670367384068971, "loss": 1.4556, "step": 5790 }, { "epoch": 0.3, "grad_norm": 0.50390625, "learning_rate": 0.00017664570478140296, "loss": 1.4639, "step": 5795 }, { "epoch": 0.3, "grad_norm": 0.51953125, "learning_rate": 0.00017658767322230862, "loss": 1.4575, "step": 5800 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.00017652957921072783, "loss": 1.4221, "step": 5805 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.0001764714227940326, "loss": 1.4913, "step": 5810 }, { "epoch": 0.3, "grad_norm": 0.515625, "learning_rate": 0.0001764132040196459, "loss": 1.477, "step": 5815 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.0001763549229350415, "loss": 1.4424, "step": 5820 }, { "epoch": 0.3, "grad_norm": 0.50390625, "learning_rate": 0.00017629657958774403, "loss": 1.4894, "step": 5825 }, { "epoch": 0.3, "grad_norm": 0.51953125, "learning_rate": 0.00017623817402532884, "loss": 1.4499, "step": 5830 }, { "epoch": 0.3, "grad_norm": 0.5078125, "learning_rate": 0.00017617970629542207, "loss": 1.4797, "step": 5835 }, { "epoch": 0.3, "grad_norm": 0.53125, "learning_rate": 0.00017612117644570047, "loss": 1.4671, "step": 5840 }, { "epoch": 0.3, "grad_norm": 0.498046875, "learning_rate": 0.0001760625845238915, "loss": 1.4581, "step": 5845 }, { "epoch": 0.3, "grad_norm": 0.5390625, "learning_rate": 0.0001760039305777733, "loss": 1.4995, "step": 5850 }, { "epoch": 0.3, "grad_norm": 0.50390625, "learning_rate": 0.0001759452146551744, "loss": 1.4475, "step": 5855 }, { "epoch": 0.3, "grad_norm": 0.58203125, "learning_rate": 0.00017588643680397408, "loss": 1.4706, "step": 5860 }, { "epoch": 0.3, "grad_norm": 3.921875, "learning_rate": 0.00017582759707210203, "loss": 1.4339, "step": 5865 }, { "epoch": 0.3, "grad_norm": 0.51953125, "learning_rate": 0.0001757686955075383, "loss": 1.4899, "step": 5870 }, { "epoch": 0.3, "grad_norm": 0.5546875, "learning_rate": 0.00017570973215831357, "loss": 1.4016, "step": 5875 }, { "epoch": 0.3, "grad_norm": 0.5234375, "learning_rate": 0.00017565070707250868, "loss": 1.4766, "step": 5880 }, { "epoch": 0.3, "grad_norm": 0.515625, "learning_rate": 0.000175591620298255, "loss": 1.4583, "step": 5885 }, { "epoch": 0.3, "grad_norm": 0.50390625, "learning_rate": 0.00017553247188373402, "loss": 1.4627, "step": 5890 }, { "epoch": 0.3, "grad_norm": 0.515625, "learning_rate": 0.00017547326187717773, "loss": 1.476, "step": 5895 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.00017541399032686811, "loss": 1.4523, "step": 5900 }, { "epoch": 0.31, "grad_norm": 0.5, "learning_rate": 0.00017535465728113746, "loss": 1.4557, "step": 5905 }, { "epoch": 0.31, "grad_norm": 0.50390625, "learning_rate": 0.0001752952627883682, "loss": 1.4563, "step": 5910 }, { "epoch": 0.31, "grad_norm": 0.5, "learning_rate": 0.0001752358068969928, "loss": 1.4079, "step": 5915 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 0.0001751762896554939, "loss": 1.4517, "step": 5920 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.0001751167111124041, "loss": 1.4412, "step": 5925 }, { "epoch": 0.31, "grad_norm": 0.498046875, "learning_rate": 0.00017505707131630597, "loss": 1.4473, "step": 5930 }, { "epoch": 0.31, "grad_norm": 0.51171875, "learning_rate": 0.00017499737031583207, "loss": 1.4326, "step": 5935 }, { "epoch": 0.31, "grad_norm": 0.5390625, "learning_rate": 0.00017493760815966486, "loss": 1.4869, "step": 5940 }, { "epoch": 0.31, "grad_norm": 0.51171875, "learning_rate": 0.00017487778489653667, "loss": 1.4441, "step": 5945 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.00017481790057522964, "loss": 1.4568, "step": 5950 }, { "epoch": 0.31, "grad_norm": 0.5234375, "learning_rate": 0.00017475795524457568, "loss": 1.4663, "step": 5955 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017469794895345656, "loss": 1.4548, "step": 5960 }, { "epoch": 0.31, "grad_norm": 0.48828125, "learning_rate": 0.0001746378817508036, "loss": 1.4363, "step": 5965 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 0.00017457775368559793, "loss": 1.463, "step": 5970 }, { "epoch": 0.31, "grad_norm": 0.515625, "learning_rate": 0.00017451756480687017, "loss": 1.4627, "step": 5975 }, { "epoch": 0.31, "grad_norm": 0.515625, "learning_rate": 0.0001744573151637007, "loss": 1.4662, "step": 5980 }, { "epoch": 0.31, "grad_norm": 0.486328125, "learning_rate": 0.00017439700480521934, "loss": 1.4552, "step": 5985 }, { "epoch": 0.31, "grad_norm": 0.498046875, "learning_rate": 0.0001743366337806054, "loss": 1.4738, "step": 5990 }, { "epoch": 0.31, "grad_norm": 0.5234375, "learning_rate": 0.0001742762021390877, "loss": 1.4809, "step": 5995 }, { "epoch": 0.31, "grad_norm": 0.52734375, "learning_rate": 0.0001742157099299445, "loss": 1.4729, "step": 6000 }, { "epoch": 0.31, "grad_norm": 0.53125, "learning_rate": 0.00017415515720250346, "loss": 1.4271, "step": 6005 }, { "epoch": 0.31, "grad_norm": 0.52734375, "learning_rate": 0.00017409454400614153, "loss": 1.4528, "step": 6010 }, { "epoch": 0.31, "grad_norm": 0.51171875, "learning_rate": 0.00017403387039028503, "loss": 1.4727, "step": 6015 }, { "epoch": 0.31, "grad_norm": 0.5234375, "learning_rate": 0.0001739731364044095, "loss": 1.4952, "step": 6020 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.00017391234209803975, "loss": 1.4583, "step": 6025 }, { "epoch": 0.31, "grad_norm": 0.6171875, "learning_rate": 0.00017385148752074975, "loss": 1.4748, "step": 6030 }, { "epoch": 0.31, "grad_norm": 0.5234375, "learning_rate": 0.0001737905727221626, "loss": 1.4519, "step": 6035 }, { "epoch": 0.31, "grad_norm": 0.515625, "learning_rate": 0.00017372959775195057, "loss": 1.4362, "step": 6040 }, { "epoch": 0.31, "grad_norm": 0.50390625, "learning_rate": 0.00017366856265983493, "loss": 1.4305, "step": 6045 }, { "epoch": 0.31, "grad_norm": 0.53515625, "learning_rate": 0.00017360746749558602, "loss": 1.4634, "step": 6050 }, { "epoch": 0.31, "grad_norm": 0.51953125, "learning_rate": 0.00017354631230902316, "loss": 1.4423, "step": 6055 }, { "epoch": 0.31, "grad_norm": 0.5, "learning_rate": 0.00017348509715001457, "loss": 1.3851, "step": 6060 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 0.00017342382206847744, "loss": 1.4521, "step": 6065 }, { "epoch": 0.31, "grad_norm": 0.51171875, "learning_rate": 0.00017336248711437774, "loss": 1.4607, "step": 6070 }, { "epoch": 0.31, "grad_norm": 0.53515625, "learning_rate": 0.00017330109233773037, "loss": 1.4446, "step": 6075 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 0.00017323963778859892, "loss": 1.4198, "step": 6080 }, { "epoch": 0.31, "grad_norm": 0.5, "learning_rate": 0.00017317812351709576, "loss": 1.4496, "step": 6085 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.00017311654957338196, "loss": 1.4998, "step": 6090 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017305491600766725, "loss": 1.4964, "step": 6095 }, { "epoch": 0.32, "grad_norm": 0.5078125, "learning_rate": 0.00017299322287020995, "loss": 1.467, "step": 6100 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.00017293147021131701, "loss": 1.4281, "step": 6105 }, { "epoch": 0.32, "grad_norm": 0.55078125, "learning_rate": 0.00017286965808134387, "loss": 1.4406, "step": 6110 }, { "epoch": 0.32, "grad_norm": 0.51953125, "learning_rate": 0.00017280778653069442, "loss": 1.446, "step": 6115 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017274585560982117, "loss": 1.4484, "step": 6120 }, { "epoch": 0.32, "grad_norm": 0.5390625, "learning_rate": 0.00017268386536922487, "loss": 1.457, "step": 6125 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.00017262181585945473, "loss": 1.4568, "step": 6130 }, { "epoch": 0.32, "grad_norm": 0.51171875, "learning_rate": 0.00017255970713110825, "loss": 1.4554, "step": 6135 }, { "epoch": 0.32, "grad_norm": 0.498046875, "learning_rate": 0.00017249753923483124, "loss": 1.4656, "step": 6140 }, { "epoch": 0.32, "grad_norm": 0.51953125, "learning_rate": 0.00017243531222131778, "loss": 1.466, "step": 6145 }, { "epoch": 0.32, "grad_norm": 0.53125, "learning_rate": 0.0001723730261413101, "loss": 1.4508, "step": 6150 }, { "epoch": 0.32, "grad_norm": 0.66796875, "learning_rate": 0.00017231068104559864, "loss": 1.4772, "step": 6155 }, { "epoch": 0.32, "grad_norm": 0.5078125, "learning_rate": 0.00017224827698502195, "loss": 1.4592, "step": 6160 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.00017218581401046666, "loss": 1.4591, "step": 6165 }, { "epoch": 0.32, "grad_norm": 0.56640625, "learning_rate": 0.00017212329217286743, "loss": 1.4274, "step": 6170 }, { "epoch": 0.32, "grad_norm": 0.52734375, "learning_rate": 0.000172060711523207, "loss": 1.4351, "step": 6175 }, { "epoch": 0.32, "grad_norm": 0.57421875, "learning_rate": 0.00017199807211251588, "loss": 1.4678, "step": 6180 }, { "epoch": 0.32, "grad_norm": 0.51171875, "learning_rate": 0.00017193537399187272, "loss": 1.4544, "step": 6185 }, { "epoch": 0.32, "grad_norm": 0.5, "learning_rate": 0.00017187261721240388, "loss": 1.4647, "step": 6190 }, { "epoch": 0.32, "grad_norm": 0.5390625, "learning_rate": 0.00017180980182528364, "loss": 1.4611, "step": 6195 }, { "epoch": 0.32, "grad_norm": 0.51171875, "learning_rate": 0.00017174692788173403, "loss": 1.4442, "step": 6200 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017168399543302486, "loss": 1.4441, "step": 6205 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.00017162100453047363, "loss": 1.4559, "step": 6210 }, { "epoch": 0.32, "grad_norm": 0.51171875, "learning_rate": 0.00017155795522544548, "loss": 1.449, "step": 6215 }, { "epoch": 0.32, "grad_norm": 0.5703125, "learning_rate": 0.0001714948475693532, "loss": 1.4631, "step": 6220 }, { "epoch": 0.32, "grad_norm": 0.50390625, "learning_rate": 0.0001714316816136572, "loss": 1.4371, "step": 6225 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017136845740986533, "loss": 1.4613, "step": 6230 }, { "epoch": 0.32, "grad_norm": 0.5234375, "learning_rate": 0.00017130517500953306, "loss": 1.4082, "step": 6235 }, { "epoch": 0.32, "grad_norm": 0.5390625, "learning_rate": 0.0001712418344642632, "loss": 1.4541, "step": 6240 }, { "epoch": 0.32, "grad_norm": 0.55859375, "learning_rate": 0.00017117843582570608, "loss": 1.4396, "step": 6245 }, { "epoch": 0.32, "grad_norm": 0.50390625, "learning_rate": 0.0001711149791455593, "loss": 1.4454, "step": 6250 }, { "epoch": 0.32, "grad_norm": 0.54296875, "learning_rate": 0.00017105146447556787, "loss": 1.4688, "step": 6255 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.00017098789186752403, "loss": 1.5107, "step": 6260 }, { "epoch": 0.32, "grad_norm": 0.494140625, "learning_rate": 0.0001709242613732673, "loss": 1.4628, "step": 6265 }, { "epoch": 0.32, "grad_norm": 0.53515625, "learning_rate": 0.0001708605730446844, "loss": 1.4343, "step": 6270 }, { "epoch": 0.32, "grad_norm": 0.515625, "learning_rate": 0.0001707968269337092, "loss": 1.4394, "step": 6275 }, { "epoch": 0.32, "grad_norm": 0.546875, "learning_rate": 0.00017073302309232268, "loss": 1.4945, "step": 6280 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00017066916157255292, "loss": 1.4309, "step": 6285 }, { "epoch": 0.33, "grad_norm": 0.50390625, "learning_rate": 0.00017060524242647502, "loss": 1.4539, "step": 6290 }, { "epoch": 0.33, "grad_norm": 0.498046875, "learning_rate": 0.00017054126570621107, "loss": 1.4287, "step": 6295 }, { "epoch": 0.33, "grad_norm": 0.494140625, "learning_rate": 0.00017047723146393012, "loss": 1.4452, "step": 6300 }, { "epoch": 0.33, "grad_norm": 0.5234375, "learning_rate": 0.00017041313975184807, "loss": 1.484, "step": 6305 }, { "epoch": 0.33, "grad_norm": 0.5, "learning_rate": 0.00017034899062222776, "loss": 1.4645, "step": 6310 }, { "epoch": 0.33, "grad_norm": 0.546875, "learning_rate": 0.00017028478412737882, "loss": 1.4366, "step": 6315 }, { "epoch": 0.33, "grad_norm": 0.515625, "learning_rate": 0.00017022052031965762, "loss": 1.4023, "step": 6320 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00017015619925146735, "loss": 1.4506, "step": 6325 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.0001700918209752578, "loss": 1.4303, "step": 6330 }, { "epoch": 0.33, "grad_norm": 0.5, "learning_rate": 0.00017002738554352552, "loss": 1.4716, "step": 6335 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00016996289300881353, "loss": 1.4636, "step": 6340 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00016989834342371146, "loss": 1.4566, "step": 6345 }, { "epoch": 0.33, "grad_norm": 0.50390625, "learning_rate": 0.00016983373684085557, "loss": 1.4486, "step": 6350 }, { "epoch": 0.33, "grad_norm": 0.50390625, "learning_rate": 0.00016976907331292846, "loss": 1.4181, "step": 6355 }, { "epoch": 0.33, "grad_norm": 0.52734375, "learning_rate": 0.00016970435289265923, "loss": 1.4574, "step": 6360 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.00016963957563282336, "loss": 1.4554, "step": 6365 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.00016957474158624266, "loss": 1.4864, "step": 6370 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.0001695098508057853, "loss": 1.4319, "step": 6375 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.00016944490334436566, "loss": 1.4662, "step": 6380 }, { "epoch": 0.33, "grad_norm": 0.52734375, "learning_rate": 0.00016937989925494432, "loss": 1.4459, "step": 6385 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00016931483859052813, "loss": 1.4277, "step": 6390 }, { "epoch": 0.33, "grad_norm": 0.53515625, "learning_rate": 0.00016924972140417, "loss": 1.4903, "step": 6395 }, { "epoch": 0.33, "grad_norm": 0.48828125, "learning_rate": 0.00016918454774896892, "loss": 1.4032, "step": 6400 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00016911931767807, "loss": 1.4108, "step": 6405 }, { "epoch": 0.33, "grad_norm": 0.5, "learning_rate": 0.00016905403124466427, "loss": 1.466, "step": 6410 }, { "epoch": 0.33, "grad_norm": 0.53515625, "learning_rate": 0.00016898868850198878, "loss": 1.4487, "step": 6415 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.0001689232895033265, "loss": 1.4416, "step": 6420 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 0.00016885783430200616, "loss": 1.4537, "step": 6425 }, { "epoch": 0.33, "grad_norm": 0.5390625, "learning_rate": 0.0001687923229514025, "loss": 1.444, "step": 6430 }, { "epoch": 0.33, "grad_norm": 0.51171875, "learning_rate": 0.00016872675550493594, "loss": 1.4532, "step": 6435 }, { "epoch": 0.33, "grad_norm": 0.51953125, "learning_rate": 0.00016866113201607257, "loss": 1.4436, "step": 6440 }, { "epoch": 0.33, "grad_norm": 0.5234375, "learning_rate": 0.0001685954525383244, "loss": 1.4328, "step": 6445 }, { "epoch": 0.33, "grad_norm": 0.5078125, "learning_rate": 0.0001685297171252488, "loss": 1.4651, "step": 6450 }, { "epoch": 0.33, "grad_norm": 0.5, "learning_rate": 0.0001684639258304491, "loss": 1.4652, "step": 6455 }, { "epoch": 0.33, "grad_norm": 0.5234375, "learning_rate": 0.00016839807870757387, "loss": 1.4639, "step": 6460 }, { "epoch": 0.33, "grad_norm": 0.5, "learning_rate": 0.00016833217581031738, "loss": 1.4381, "step": 6465 }, { "epoch": 0.33, "grad_norm": 0.53515625, "learning_rate": 0.00016826621719241938, "loss": 1.4469, "step": 6470 }, { "epoch": 0.33, "grad_norm": 0.48828125, "learning_rate": 0.00016820020290766498, "loss": 1.434, "step": 6475 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016813413300988478, "loss": 1.4403, "step": 6480 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.0001680680075529546, "loss": 1.4353, "step": 6485 }, { "epoch": 0.34, "grad_norm": 0.52734375, "learning_rate": 0.00016800182659079568, "loss": 1.4958, "step": 6490 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.0001679355901773745, "loss": 1.4549, "step": 6495 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.0001678692983667027, "loss": 1.4271, "step": 6500 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016780295121283717, "loss": 1.4726, "step": 6505 }, { "epoch": 0.34, "grad_norm": 0.53125, "learning_rate": 0.00016773654876987983, "loss": 1.4884, "step": 6510 }, { "epoch": 0.34, "grad_norm": 0.51953125, "learning_rate": 0.00016767009109197782, "loss": 1.4783, "step": 6515 }, { "epoch": 0.34, "grad_norm": 0.5, "learning_rate": 0.00016760357823332318, "loss": 1.4433, "step": 6520 }, { "epoch": 0.34, "grad_norm": 0.494140625, "learning_rate": 0.00016753701024815304, "loss": 1.4583, "step": 6525 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016747038719074945, "loss": 1.4556, "step": 6530 }, { "epoch": 0.34, "grad_norm": 0.486328125, "learning_rate": 0.00016740370911543938, "loss": 1.4543, "step": 6535 }, { "epoch": 0.34, "grad_norm": 0.51171875, "learning_rate": 0.00016733697607659463, "loss": 1.4557, "step": 6540 }, { "epoch": 0.34, "grad_norm": 0.54296875, "learning_rate": 0.0001672701881286319, "loss": 1.4402, "step": 6545 }, { "epoch": 0.34, "grad_norm": 0.55078125, "learning_rate": 0.00016720334532601254, "loss": 1.4782, "step": 6550 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016713644772324275, "loss": 1.4437, "step": 6555 }, { "epoch": 0.34, "grad_norm": 0.51171875, "learning_rate": 0.00016706949537487336, "loss": 1.4701, "step": 6560 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.0001670024883354998, "loss": 1.437, "step": 6565 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.0001669354266597622, "loss": 1.4347, "step": 6570 }, { "epoch": 0.34, "grad_norm": 0.5, "learning_rate": 0.0001668683104023452, "loss": 1.463, "step": 6575 }, { "epoch": 0.34, "grad_norm": 0.5703125, "learning_rate": 0.00016680113961797788, "loss": 1.4704, "step": 6580 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016673391436143384, "loss": 1.4307, "step": 6585 }, { "epoch": 0.34, "grad_norm": 0.51953125, "learning_rate": 0.00016666663468753118, "loss": 1.4539, "step": 6590 }, { "epoch": 0.34, "grad_norm": 0.51171875, "learning_rate": 0.00016659930065113219, "loss": 1.4447, "step": 6595 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.00016653191230714366, "loss": 1.44, "step": 6600 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016646446971051653, "loss": 1.4302, "step": 6605 }, { "epoch": 0.34, "grad_norm": 0.515625, "learning_rate": 0.00016639697291624615, "loss": 1.4595, "step": 6610 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016632942197937185, "loss": 1.4512, "step": 6615 }, { "epoch": 0.34, "grad_norm": 0.52734375, "learning_rate": 0.00016626181695497726, "loss": 1.4692, "step": 6620 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.00016619415789819012, "loss": 1.4509, "step": 6625 }, { "epoch": 0.34, "grad_norm": 0.546875, "learning_rate": 0.00016612644486418211, "loss": 1.47, "step": 6630 }, { "epoch": 0.34, "grad_norm": 0.56640625, "learning_rate": 0.00016605867790816901, "loss": 1.463, "step": 6635 }, { "epoch": 0.34, "grad_norm": 0.5078125, "learning_rate": 0.00016599085708541065, "loss": 1.423, "step": 6640 }, { "epoch": 0.34, "grad_norm": 0.51953125, "learning_rate": 0.0001659229824512106, "loss": 1.4258, "step": 6645 }, { "epoch": 0.34, "grad_norm": 0.53515625, "learning_rate": 0.0001658550540609164, "loss": 1.4762, "step": 6650 }, { "epoch": 0.34, "grad_norm": 0.55078125, "learning_rate": 0.00016578707196991953, "loss": 1.4485, "step": 6655 }, { "epoch": 0.34, "grad_norm": 0.5234375, "learning_rate": 0.00016571903623365506, "loss": 1.4438, "step": 6660 }, { "epoch": 0.34, "grad_norm": 0.5390625, "learning_rate": 0.00016565094690760193, "loss": 1.4411, "step": 6665 }, { "epoch": 0.35, "grad_norm": 0.51953125, "learning_rate": 0.00016558280404728275, "loss": 1.4442, "step": 6670 }, { "epoch": 0.35, "grad_norm": 0.51953125, "learning_rate": 0.00016551460770826383, "loss": 1.4678, "step": 6675 }, { "epoch": 0.35, "grad_norm": 0.50390625, "learning_rate": 0.00016544635794615498, "loss": 1.4506, "step": 6680 }, { "epoch": 0.35, "grad_norm": 0.49609375, "learning_rate": 0.00016537805481660968, "loss": 1.4264, "step": 6685 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.00016530969837532487, "loss": 1.4492, "step": 6690 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.000165241288678041, "loss": 1.4742, "step": 6695 }, { "epoch": 0.35, "grad_norm": 0.5078125, "learning_rate": 0.00016517282578054187, "loss": 1.454, "step": 6700 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.0001651043097386548, "loss": 1.4484, "step": 6705 }, { "epoch": 0.35, "grad_norm": 0.494140625, "learning_rate": 0.0001650357406082503, "loss": 1.4599, "step": 6710 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016496711844524224, "loss": 1.434, "step": 6715 }, { "epoch": 0.35, "grad_norm": 0.5078125, "learning_rate": 0.00016489844330558773, "loss": 1.4689, "step": 6720 }, { "epoch": 0.35, "grad_norm": 0.55078125, "learning_rate": 0.00016482971524528714, "loss": 1.4879, "step": 6725 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.00016476093432038385, "loss": 1.4342, "step": 6730 }, { "epoch": 0.35, "grad_norm": 0.5078125, "learning_rate": 0.00016469210058696446, "loss": 1.4312, "step": 6735 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.0001646232141011586, "loss": 1.4278, "step": 6740 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016455427491913888, "loss": 1.4728, "step": 6745 }, { "epoch": 0.35, "grad_norm": 0.53515625, "learning_rate": 0.000164485283097121, "loss": 1.4484, "step": 6750 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.00016441623869136343, "loss": 1.4546, "step": 6755 }, { "epoch": 0.35, "grad_norm": 0.53125, "learning_rate": 0.00016434714175816764, "loss": 1.4523, "step": 6760 }, { "epoch": 0.35, "grad_norm": 0.5703125, "learning_rate": 0.00016427799235387784, "loss": 1.4494, "step": 6765 }, { "epoch": 0.35, "grad_norm": 0.494140625, "learning_rate": 0.00016420879053488107, "loss": 1.4351, "step": 6770 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016413953635760714, "loss": 1.4547, "step": 6775 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.0001640702298785285, "loss": 1.4584, "step": 6780 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016400087115416034, "loss": 1.4429, "step": 6785 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.0001639314602410603, "loss": 1.4234, "step": 6790 }, { "epoch": 0.35, "grad_norm": 0.50390625, "learning_rate": 0.00016386199719582874, "loss": 1.4738, "step": 6795 }, { "epoch": 0.35, "grad_norm": 0.5078125, "learning_rate": 0.0001637924820751084, "loss": 1.4311, "step": 6800 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016372291493558453, "loss": 1.4664, "step": 6805 }, { "epoch": 0.35, "grad_norm": 0.52734375, "learning_rate": 0.00016365329583398487, "loss": 1.4486, "step": 6810 }, { "epoch": 0.35, "grad_norm": 0.5390625, "learning_rate": 0.00016358362482707942, "loss": 1.4347, "step": 6815 }, { "epoch": 0.35, "grad_norm": 0.490234375, "learning_rate": 0.0001635139019716806, "loss": 1.3988, "step": 6820 }, { "epoch": 0.35, "grad_norm": 0.515625, "learning_rate": 0.000163444127324643, "loss": 1.4205, "step": 6825 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016337430094286358, "loss": 1.4269, "step": 6830 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 0.00016330442288328134, "loss": 1.4432, "step": 6835 }, { "epoch": 0.35, "grad_norm": 0.54296875, "learning_rate": 0.00016323449320287755, "loss": 1.4516, "step": 6840 }, { "epoch": 0.35, "grad_norm": 0.50390625, "learning_rate": 0.0001631645119586755, "loss": 1.4376, "step": 6845 }, { "epoch": 0.35, "grad_norm": 0.5078125, "learning_rate": 0.0001630944792077405, "loss": 1.457, "step": 6850 }, { "epoch": 0.35, "grad_norm": 0.6015625, "learning_rate": 0.00016302439500718002, "loss": 1.472, "step": 6855 }, { "epoch": 0.35, "grad_norm": 0.5234375, "learning_rate": 0.00016295425941414323, "loss": 1.4462, "step": 6860 }, { "epoch": 0.36, "grad_norm": 0.498046875, "learning_rate": 0.00016288407248582146, "loss": 1.4394, "step": 6865 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.0001628138342794477, "loss": 1.4604, "step": 6870 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016274354485229688, "loss": 1.4599, "step": 6875 }, { "epoch": 0.36, "grad_norm": 0.51171875, "learning_rate": 0.0001626732042616857, "loss": 1.4046, "step": 6880 }, { "epoch": 0.36, "grad_norm": 0.5, "learning_rate": 0.00016260281256497247, "loss": 1.4412, "step": 6885 }, { "epoch": 0.36, "grad_norm": 0.52734375, "learning_rate": 0.00016253236981955726, "loss": 1.4617, "step": 6890 }, { "epoch": 0.36, "grad_norm": 0.515625, "learning_rate": 0.00016246187608288178, "loss": 1.4696, "step": 6895 }, { "epoch": 0.36, "grad_norm": 0.515625, "learning_rate": 0.00016239133141242925, "loss": 1.4534, "step": 6900 }, { "epoch": 0.36, "grad_norm": 0.51171875, "learning_rate": 0.0001623207358657245, "loss": 1.4349, "step": 6905 }, { "epoch": 0.36, "grad_norm": 0.5078125, "learning_rate": 0.0001622500895003338, "loss": 1.4505, "step": 6910 }, { "epoch": 0.36, "grad_norm": 0.5625, "learning_rate": 0.00016217939237386485, "loss": 1.432, "step": 6915 }, { "epoch": 0.36, "grad_norm": 0.515625, "learning_rate": 0.00016210864454396678, "loss": 1.4469, "step": 6920 }, { "epoch": 0.36, "grad_norm": 0.51171875, "learning_rate": 0.00016203784606833, "loss": 1.4463, "step": 6925 }, { "epoch": 0.36, "grad_norm": 0.50390625, "learning_rate": 0.00016196699700468634, "loss": 1.4452, "step": 6930 }, { "epoch": 0.36, "grad_norm": 2.90625, "learning_rate": 0.0001618960974108088, "loss": 1.4029, "step": 6935 }, { "epoch": 0.36, "grad_norm": 0.5234375, "learning_rate": 0.0001618251473445115, "loss": 1.4377, "step": 6940 }, { "epoch": 0.36, "grad_norm": 0.5078125, "learning_rate": 0.00016175414686364994, "loss": 1.4317, "step": 6945 }, { "epoch": 0.36, "grad_norm": 0.53515625, "learning_rate": 0.00016168309602612052, "loss": 1.4971, "step": 6950 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016161199488986077, "loss": 1.4458, "step": 6955 }, { "epoch": 0.36, "grad_norm": 0.54296875, "learning_rate": 0.00016154084351284925, "loss": 1.4253, "step": 6960 }, { "epoch": 0.36, "grad_norm": 0.4921875, "learning_rate": 0.00016146964195310555, "loss": 1.4211, "step": 6965 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016139839026869005, "loss": 1.4844, "step": 6970 }, { "epoch": 0.36, "grad_norm": 0.5078125, "learning_rate": 0.00016132708851770408, "loss": 1.4599, "step": 6975 }, { "epoch": 0.36, "grad_norm": 0.51953125, "learning_rate": 0.00016125573675828983, "loss": 1.4546, "step": 6980 }, { "epoch": 0.36, "grad_norm": 0.5078125, "learning_rate": 0.00016118433504863012, "loss": 1.4241, "step": 6985 }, { "epoch": 0.36, "grad_norm": 0.51953125, "learning_rate": 0.00016111288344694875, "loss": 1.4573, "step": 6990 }, { "epoch": 0.36, "grad_norm": 0.55078125, "learning_rate": 0.00016104138201150994, "loss": 1.4118, "step": 6995 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016096983080061874, "loss": 1.4024, "step": 7000 }, { "epoch": 0.36, "grad_norm": 0.49609375, "learning_rate": 0.00016089822987262067, "loss": 1.4301, "step": 7005 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016082657928590183, "loss": 1.4426, "step": 7010 }, { "epoch": 0.36, "grad_norm": 0.53515625, "learning_rate": 0.00016075487909888886, "loss": 1.4379, "step": 7015 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.0001606831293700488, "loss": 1.4465, "step": 7020 }, { "epoch": 0.36, "grad_norm": 0.50390625, "learning_rate": 0.00016061133015788905, "loss": 1.466, "step": 7025 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016053948152095745, "loss": 1.4491, "step": 7030 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.0001604675835178421, "loss": 1.4804, "step": 7035 }, { "epoch": 0.36, "grad_norm": 0.53125, "learning_rate": 0.00016039563620717128, "loss": 1.4269, "step": 7040 }, { "epoch": 0.36, "grad_norm": 0.5546875, "learning_rate": 0.00016032363964761363, "loss": 1.4239, "step": 7045 }, { "epoch": 0.36, "grad_norm": 0.51171875, "learning_rate": 0.00016025159389787788, "loss": 1.4139, "step": 7050 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00016017949901671276, "loss": 1.444, "step": 7055 }, { "epoch": 0.37, "grad_norm": 0.765625, "learning_rate": 0.00016010735506290726, "loss": 1.4524, "step": 7060 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00016003516209529023, "loss": 1.4677, "step": 7065 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015996292017273058, "loss": 1.4294, "step": 7070 }, { "epoch": 0.37, "grad_norm": 0.51171875, "learning_rate": 0.0001598906293541371, "loss": 1.4313, "step": 7075 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015981828969845844, "loss": 1.4712, "step": 7080 }, { "epoch": 0.37, "grad_norm": 0.53125, "learning_rate": 0.00015974590126468315, "loss": 1.4572, "step": 7085 }, { "epoch": 0.37, "grad_norm": 0.498046875, "learning_rate": 0.00015967346411183941, "loss": 1.4446, "step": 7090 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015960097829899528, "loss": 1.4755, "step": 7095 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.0001595284438852584, "loss": 1.4535, "step": 7100 }, { "epoch": 0.37, "grad_norm": 0.55078125, "learning_rate": 0.00015945586092977612, "loss": 1.4407, "step": 7105 }, { "epoch": 0.37, "grad_norm": 0.515625, "learning_rate": 0.00015938322949173527, "loss": 1.4454, "step": 7110 }, { "epoch": 0.37, "grad_norm": 0.51171875, "learning_rate": 0.00015931054963036232, "loss": 1.4415, "step": 7115 }, { "epoch": 0.37, "grad_norm": 0.5, "learning_rate": 0.00015923782140492317, "loss": 1.4375, "step": 7120 }, { "epoch": 0.37, "grad_norm": 0.55859375, "learning_rate": 0.00015916504487472314, "loss": 1.4466, "step": 7125 }, { "epoch": 0.37, "grad_norm": 0.5078125, "learning_rate": 0.000159092220099107, "loss": 1.4097, "step": 7130 }, { "epoch": 0.37, "grad_norm": 0.5390625, "learning_rate": 0.0001590193471374588, "loss": 1.4065, "step": 7135 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00015894642604920192, "loss": 1.4808, "step": 7140 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015887345689379897, "loss": 1.4544, "step": 7145 }, { "epoch": 0.37, "grad_norm": 0.61328125, "learning_rate": 0.00015880043973075177, "loss": 1.4441, "step": 7150 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00015872737461960126, "loss": 1.4611, "step": 7155 }, { "epoch": 0.37, "grad_norm": 0.5390625, "learning_rate": 0.00015865426161992753, "loss": 1.4104, "step": 7160 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015858110079134966, "loss": 1.4313, "step": 7165 }, { "epoch": 0.37, "grad_norm": 0.54296875, "learning_rate": 0.00015850789219352577, "loss": 1.4332, "step": 7170 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.0001584346358861529, "loss": 1.4422, "step": 7175 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.00015836133192896702, "loss": 1.4478, "step": 7180 }, { "epoch": 0.37, "grad_norm": 0.494140625, "learning_rate": 0.00015828798038174298, "loss": 1.4422, "step": 7185 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.0001582145813042944, "loss": 1.4665, "step": 7190 }, { "epoch": 0.37, "grad_norm": 0.53515625, "learning_rate": 0.0001581411347564736, "loss": 1.4325, "step": 7195 }, { "epoch": 0.37, "grad_norm": 0.546875, "learning_rate": 0.00015806764079817178, "loss": 1.3919, "step": 7200 }, { "epoch": 0.37, "grad_norm": 0.515625, "learning_rate": 0.0001579940994893186, "loss": 1.4532, "step": 7205 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015792051088988246, "loss": 1.4209, "step": 7210 }, { "epoch": 0.37, "grad_norm": 1.546875, "learning_rate": 0.00015784687505987033, "loss": 1.4561, "step": 7215 }, { "epoch": 0.37, "grad_norm": 0.51171875, "learning_rate": 0.00015777319205932758, "loss": 1.442, "step": 7220 }, { "epoch": 0.37, "grad_norm": 0.494140625, "learning_rate": 0.00015769946194833817, "loss": 1.4268, "step": 7225 }, { "epoch": 0.37, "grad_norm": 0.52734375, "learning_rate": 0.00015762568478702436, "loss": 1.4749, "step": 7230 }, { "epoch": 0.37, "grad_norm": 0.515625, "learning_rate": 0.00015755186063554696, "loss": 1.4237, "step": 7235 }, { "epoch": 0.37, "grad_norm": 0.51953125, "learning_rate": 0.00015747798955410483, "loss": 1.4823, "step": 7240 }, { "epoch": 0.37, "grad_norm": 0.5078125, "learning_rate": 0.00015740407160293535, "loss": 1.4482, "step": 7245 }, { "epoch": 0.38, "grad_norm": 0.49609375, "learning_rate": 0.00015733010684231395, "loss": 1.4474, "step": 7250 }, { "epoch": 0.38, "grad_norm": 0.5, "learning_rate": 0.00015725609533255434, "loss": 1.4608, "step": 7255 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015718203713400828, "loss": 1.4731, "step": 7260 }, { "epoch": 0.38, "grad_norm": 0.494140625, "learning_rate": 0.0001571079323070656, "loss": 1.412, "step": 7265 }, { "epoch": 0.38, "grad_norm": 0.51953125, "learning_rate": 0.00015703378091215428, "loss": 1.4288, "step": 7270 }, { "epoch": 0.38, "grad_norm": 0.546875, "learning_rate": 0.00015695958300974007, "loss": 1.4882, "step": 7275 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.0001568853386603268, "loss": 1.4557, "step": 7280 }, { "epoch": 0.38, "grad_norm": 0.5390625, "learning_rate": 0.0001568110479244561, "loss": 1.4651, "step": 7285 }, { "epoch": 0.38, "grad_norm": 0.5703125, "learning_rate": 0.00015673671086270741, "loss": 1.4711, "step": 7290 }, { "epoch": 0.38, "grad_norm": 0.5390625, "learning_rate": 0.00015666232753569807, "loss": 1.4877, "step": 7295 }, { "epoch": 0.38, "grad_norm": 0.5078125, "learning_rate": 0.000156587898004083, "loss": 1.4273, "step": 7300 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.00015651342232855486, "loss": 1.4321, "step": 7305 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015643890056984394, "loss": 1.4391, "step": 7310 }, { "epoch": 0.38, "grad_norm": 0.51953125, "learning_rate": 0.00015636433278871814, "loss": 1.453, "step": 7315 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015628971904598277, "loss": 1.4338, "step": 7320 }, { "epoch": 0.38, "grad_norm": 0.54296875, "learning_rate": 0.00015621505940248076, "loss": 1.4304, "step": 7325 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015614035391909242, "loss": 1.4304, "step": 7330 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015606560265673535, "loss": 1.4565, "step": 7335 }, { "epoch": 0.38, "grad_norm": 0.52734375, "learning_rate": 0.00015599080567636463, "loss": 1.465, "step": 7340 }, { "epoch": 0.38, "grad_norm": 0.5078125, "learning_rate": 0.00015591596303897256, "loss": 1.449, "step": 7345 }, { "epoch": 0.38, "grad_norm": 0.57421875, "learning_rate": 0.00015584107480558858, "loss": 1.4558, "step": 7350 }, { "epoch": 0.38, "grad_norm": 0.5078125, "learning_rate": 0.00015576614103727946, "loss": 1.464, "step": 7355 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015569116179514896, "loss": 1.4592, "step": 7360 }, { "epoch": 0.38, "grad_norm": 0.5390625, "learning_rate": 0.00015561613714033804, "loss": 1.4774, "step": 7365 }, { "epoch": 0.38, "grad_norm": 0.49609375, "learning_rate": 0.00015554106713402466, "loss": 1.4422, "step": 7370 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015546595183742372, "loss": 1.3744, "step": 7375 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.00015539079131178705, "loss": 1.4724, "step": 7380 }, { "epoch": 0.38, "grad_norm": 0.515625, "learning_rate": 0.00015531558561840343, "loss": 1.4461, "step": 7385 }, { "epoch": 0.38, "grad_norm": 0.5234375, "learning_rate": 0.00015524033481859842, "loss": 1.4322, "step": 7390 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.00015516503897373434, "loss": 1.4249, "step": 7395 }, { "epoch": 0.38, "grad_norm": 0.52734375, "learning_rate": 0.00015508969814521025, "loss": 1.4321, "step": 7400 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015501431239446197, "loss": 1.4365, "step": 7405 }, { "epoch": 0.38, "grad_norm": 0.515625, "learning_rate": 0.00015493888178296191, "loss": 1.4282, "step": 7410 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 0.00015486340637221895, "loss": 1.4296, "step": 7415 }, { "epoch": 0.38, "grad_norm": 0.546875, "learning_rate": 0.00015478788622377872, "loss": 1.4462, "step": 7420 }, { "epoch": 0.38, "grad_norm": 0.51171875, "learning_rate": 0.00015471232139922312, "loss": 1.443, "step": 7425 }, { "epoch": 0.38, "grad_norm": 0.53515625, "learning_rate": 0.00015463671196017055, "loss": 1.4506, "step": 7430 }, { "epoch": 0.38, "grad_norm": 0.515625, "learning_rate": 0.00015456105796827588, "loss": 1.4532, "step": 7435 }, { "epoch": 0.38, "grad_norm": 0.53125, "learning_rate": 0.00015448535948523018, "loss": 1.45, "step": 7440 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.00015440961657276088, "loss": 1.4341, "step": 7445 }, { "epoch": 0.39, "grad_norm": 0.50390625, "learning_rate": 0.0001543338292926316, "loss": 1.4292, "step": 7450 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.0001542579977066422, "loss": 1.4081, "step": 7455 }, { "epoch": 0.39, "grad_norm": 0.5, "learning_rate": 0.00015418212187662858, "loss": 1.4163, "step": 7460 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.00015410620186446277, "loss": 1.4281, "step": 7465 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.00015403023773205286, "loss": 1.4514, "step": 7470 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015395422954134278, "loss": 1.4534, "step": 7475 }, { "epoch": 0.39, "grad_norm": 0.5546875, "learning_rate": 0.0001538781773543126, "loss": 1.4488, "step": 7480 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.0001538020812329781, "loss": 1.4453, "step": 7485 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015372594123939094, "loss": 1.4087, "step": 7490 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.00015364975743563858, "loss": 1.4497, "step": 7495 }, { "epoch": 0.39, "grad_norm": 0.55078125, "learning_rate": 0.0001535735298838441, "loss": 1.4217, "step": 7500 }, { "epoch": 0.39, "grad_norm": 0.5, "learning_rate": 0.00015349725864616639, "loss": 1.4277, "step": 7505 }, { "epoch": 0.39, "grad_norm": 0.5625, "learning_rate": 0.00015342094378479988, "loss": 1.4472, "step": 7510 }, { "epoch": 0.39, "grad_norm": 0.54296875, "learning_rate": 0.0001533445853619746, "loss": 1.4571, "step": 7515 }, { "epoch": 0.39, "grad_norm": 0.53125, "learning_rate": 0.0001532681834399561, "loss": 1.4408, "step": 7520 }, { "epoch": 0.39, "grad_norm": 0.498046875, "learning_rate": 0.0001531917380810454, "loss": 1.4034, "step": 7525 }, { "epoch": 0.39, "grad_norm": 0.546875, "learning_rate": 0.00015311524934757893, "loss": 1.4527, "step": 7530 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.0001530387173019285, "loss": 1.4673, "step": 7535 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015296214200650126, "loss": 1.4032, "step": 7540 }, { "epoch": 0.39, "grad_norm": 0.5, "learning_rate": 0.00015288552352373956, "loss": 1.4031, "step": 7545 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.000152808861916121, "loss": 1.4263, "step": 7550 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.00015273215724615846, "loss": 1.4538, "step": 7555 }, { "epoch": 0.39, "grad_norm": 0.50390625, "learning_rate": 0.00015265540957639973, "loss": 1.4294, "step": 7560 }, { "epoch": 0.39, "grad_norm": 0.50390625, "learning_rate": 0.00015257861896942777, "loss": 1.4324, "step": 7565 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 0.0001525017854878606, "loss": 1.4431, "step": 7570 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.0001524249091943511, "loss": 1.4652, "step": 7575 }, { "epoch": 0.39, "grad_norm": 0.50390625, "learning_rate": 0.00015234799015158713, "loss": 1.446, "step": 7580 }, { "epoch": 0.39, "grad_norm": 0.546875, "learning_rate": 0.00015227102842229134, "loss": 1.4417, "step": 7585 }, { "epoch": 0.39, "grad_norm": 0.498046875, "learning_rate": 0.0001521940240692213, "loss": 1.426, "step": 7590 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.00015211697715516927, "loss": 1.4178, "step": 7595 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.0001520398877429622, "loss": 1.4294, "step": 7600 }, { "epoch": 0.39, "grad_norm": 0.515625, "learning_rate": 0.00015196275589546168, "loss": 1.442, "step": 7605 }, { "epoch": 0.39, "grad_norm": 0.5234375, "learning_rate": 0.000151885581675564, "loss": 1.4626, "step": 7610 }, { "epoch": 0.39, "grad_norm": 0.5390625, "learning_rate": 0.0001518083651461999, "loss": 1.4439, "step": 7615 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.0001517311063703347, "loss": 1.3871, "step": 7620 }, { "epoch": 0.39, "grad_norm": 0.55078125, "learning_rate": 0.00015165380541096803, "loss": 1.4404, "step": 7625 }, { "epoch": 0.39, "grad_norm": 0.53515625, "learning_rate": 0.00015157646233113412, "loss": 1.4486, "step": 7630 }, { "epoch": 0.4, "grad_norm": 0.5, "learning_rate": 0.0001514990771939014, "loss": 1.4766, "step": 7635 }, { "epoch": 0.4, "grad_norm": 0.51171875, "learning_rate": 0.00015142165006237266, "loss": 1.4596, "step": 7640 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.0001513441809996849, "loss": 1.4263, "step": 7645 }, { "epoch": 0.4, "grad_norm": 0.5078125, "learning_rate": 0.0001512666700690093, "loss": 1.4117, "step": 7650 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00015118911733355125, "loss": 1.4577, "step": 7655 }, { "epoch": 0.4, "grad_norm": 0.52734375, "learning_rate": 0.00015111152285655013, "loss": 1.4462, "step": 7660 }, { "epoch": 0.4, "grad_norm": 0.51953125, "learning_rate": 0.00015103388670127947, "loss": 1.433, "step": 7665 }, { "epoch": 0.4, "grad_norm": 0.5, "learning_rate": 0.0001509562089310467, "loss": 1.4355, "step": 7670 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.0001508784896091932, "loss": 1.3756, "step": 7675 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.0001508007287990943, "loss": 1.4544, "step": 7680 }, { "epoch": 0.4, "grad_norm": 0.5078125, "learning_rate": 0.00015072292656415906, "loss": 1.4182, "step": 7685 }, { "epoch": 0.4, "grad_norm": 0.53125, "learning_rate": 0.00015064508296783037, "loss": 1.4554, "step": 7690 }, { "epoch": 0.4, "grad_norm": 0.498046875, "learning_rate": 0.00015056719807358485, "loss": 1.4284, "step": 7695 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00015048927194493276, "loss": 1.4262, "step": 7700 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00015041130464541808, "loss": 1.4831, "step": 7705 }, { "epoch": 0.4, "grad_norm": 0.51953125, "learning_rate": 0.00015033329623861822, "loss": 1.4461, "step": 7710 }, { "epoch": 0.4, "grad_norm": 0.4921875, "learning_rate": 0.00015025524678814427, "loss": 1.4351, "step": 7715 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.00015017715635764063, "loss": 1.4615, "step": 7720 }, { "epoch": 0.4, "grad_norm": 0.498046875, "learning_rate": 0.00015009902501078525, "loss": 1.4451, "step": 7725 }, { "epoch": 0.4, "grad_norm": 0.484375, "learning_rate": 0.0001500208528112893, "loss": 1.424, "step": 7730 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014994263982289746, "loss": 1.4405, "step": 7735 }, { "epoch": 0.4, "grad_norm": 0.51171875, "learning_rate": 0.00014986438610938748, "loss": 1.4386, "step": 7740 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014978609173457044, "loss": 1.4277, "step": 7745 }, { "epoch": 0.4, "grad_norm": 0.5546875, "learning_rate": 0.0001497077567622905, "loss": 1.4664, "step": 7750 }, { "epoch": 0.4, "grad_norm": 0.515625, "learning_rate": 0.00014962938125642503, "loss": 1.4372, "step": 7755 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014955096528088428, "loss": 1.4382, "step": 7760 }, { "epoch": 0.4, "grad_norm": 0.51953125, "learning_rate": 0.00014947250889961168, "loss": 1.4066, "step": 7765 }, { "epoch": 0.4, "grad_norm": 0.5078125, "learning_rate": 0.0001493940121765835, "loss": 1.4029, "step": 7770 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.00014931547517580898, "loss": 1.4455, "step": 7775 }, { "epoch": 0.4, "grad_norm": 0.51171875, "learning_rate": 0.00014923689796133007, "loss": 1.4802, "step": 7780 }, { "epoch": 0.4, "grad_norm": 0.498046875, "learning_rate": 0.0001491582805972217, "loss": 1.4362, "step": 7785 }, { "epoch": 0.4, "grad_norm": 0.5390625, "learning_rate": 0.00014907962314759143, "loss": 1.4446, "step": 7790 }, { "epoch": 0.4, "grad_norm": 0.5078125, "learning_rate": 0.00014900092567657946, "loss": 1.4295, "step": 7795 }, { "epoch": 0.4, "grad_norm": 0.50390625, "learning_rate": 0.00014892218824835872, "loss": 1.4239, "step": 7800 }, { "epoch": 0.4, "grad_norm": 0.5234375, "learning_rate": 0.0001488434109271347, "loss": 1.4603, "step": 7805 }, { "epoch": 0.4, "grad_norm": 0.51171875, "learning_rate": 0.00014876459377714541, "loss": 1.458, "step": 7810 }, { "epoch": 0.4, "grad_norm": 0.5, "learning_rate": 0.0001486857368626613, "loss": 1.4326, "step": 7815 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 0.00014860684024798536, "loss": 1.4336, "step": 7820 }, { "epoch": 0.4, "grad_norm": 0.51953125, "learning_rate": 0.00014852790399745276, "loss": 1.4285, "step": 7825 }, { "epoch": 0.41, "grad_norm": 0.515625, "learning_rate": 0.00014844892817543118, "loss": 1.444, "step": 7830 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014836991284632048, "loss": 1.4552, "step": 7835 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.00014829085807455274, "loss": 1.4585, "step": 7840 }, { "epoch": 0.41, "grad_norm": 0.51953125, "learning_rate": 0.00014821176392459224, "loss": 1.4027, "step": 7845 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.0001481326304609353, "loss": 1.4385, "step": 7850 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.0001480534577481104, "loss": 1.4407, "step": 7855 }, { "epoch": 0.41, "grad_norm": 0.51953125, "learning_rate": 0.00014797424585067789, "loss": 1.4317, "step": 7860 }, { "epoch": 0.41, "grad_norm": 0.53125, "learning_rate": 0.0001478949948332302, "loss": 1.4478, "step": 7865 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.00014781570476039163, "loss": 1.4349, "step": 7870 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.00014773637569681823, "loss": 1.4618, "step": 7875 }, { "epoch": 0.41, "grad_norm": 0.515625, "learning_rate": 0.00014765700770719796, "loss": 1.4413, "step": 7880 }, { "epoch": 0.41, "grad_norm": 0.515625, "learning_rate": 0.00014757760085625047, "loss": 1.459, "step": 7885 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.00014749815520872717, "loss": 1.4563, "step": 7890 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014741867082941095, "loss": 1.461, "step": 7895 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.00014733914778311647, "loss": 1.4266, "step": 7900 }, { "epoch": 0.41, "grad_norm": 0.49609375, "learning_rate": 0.00014725958613468976, "loss": 1.4292, "step": 7905 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.00014717998594900844, "loss": 1.4546, "step": 7910 }, { "epoch": 0.41, "grad_norm": 0.51953125, "learning_rate": 0.0001471003472909815, "loss": 1.4307, "step": 7915 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.0001470206702255493, "loss": 1.4127, "step": 7920 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.00014694095481768358, "loss": 1.418, "step": 7925 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014686120113238725, "loss": 1.4108, "step": 7930 }, { "epoch": 0.41, "grad_norm": 0.49609375, "learning_rate": 0.00014678140923469452, "loss": 1.4327, "step": 7935 }, { "epoch": 0.41, "grad_norm": 0.5390625, "learning_rate": 0.0001467015791896707, "loss": 1.4625, "step": 7940 }, { "epoch": 0.41, "grad_norm": 0.546875, "learning_rate": 0.00014662171106241223, "loss": 1.4198, "step": 7945 }, { "epoch": 0.41, "grad_norm": 0.54296875, "learning_rate": 0.0001465418049180466, "loss": 1.4784, "step": 7950 }, { "epoch": 0.41, "grad_norm": 0.5234375, "learning_rate": 0.00014646186082173233, "loss": 1.4499, "step": 7955 }, { "epoch": 0.41, "grad_norm": 0.51953125, "learning_rate": 0.0001463818788386588, "loss": 1.4569, "step": 7960 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.00014630185903404642, "loss": 1.4456, "step": 7965 }, { "epoch": 0.41, "grad_norm": 0.55078125, "learning_rate": 0.00014622180147314632, "loss": 1.4491, "step": 7970 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.0001461417062212405, "loss": 1.464, "step": 7975 }, { "epoch": 0.41, "grad_norm": 0.51171875, "learning_rate": 0.00014606157334364162, "loss": 1.4202, "step": 7980 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.00014598140290569307, "loss": 1.4205, "step": 7985 }, { "epoch": 0.41, "grad_norm": 0.52734375, "learning_rate": 0.00014590119497276887, "loss": 1.4561, "step": 7990 }, { "epoch": 0.41, "grad_norm": 0.5078125, "learning_rate": 0.0001458209496102736, "loss": 1.4398, "step": 7995 }, { "epoch": 0.41, "grad_norm": 0.53515625, "learning_rate": 0.00014574066688364235, "loss": 1.4299, "step": 8000 }, { "epoch": 0.41, "grad_norm": 0.5390625, "learning_rate": 0.0001456603468583407, "loss": 1.4382, "step": 8005 }, { "epoch": 0.41, "grad_norm": 0.5234375, "learning_rate": 0.00014557998959986466, "loss": 1.4097, "step": 8010 }, { "epoch": 0.41, "grad_norm": 0.53125, "learning_rate": 0.00014549959517374056, "loss": 1.4518, "step": 8015 }, { "epoch": 0.41, "grad_norm": 0.50390625, "learning_rate": 0.00014541916364552504, "loss": 1.438, "step": 8020 }, { "epoch": 0.42, "grad_norm": 0.5546875, "learning_rate": 0.00014533869508080504, "loss": 1.458, "step": 8025 }, { "epoch": 0.42, "grad_norm": 0.5078125, "learning_rate": 0.00014525818954519765, "loss": 1.4477, "step": 8030 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.0001451776471043502, "loss": 1.4434, "step": 8035 }, { "epoch": 0.42, "grad_norm": 0.53125, "learning_rate": 0.00014509706782393992, "loss": 1.4283, "step": 8040 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014501645176967428, "loss": 1.452, "step": 8045 }, { "epoch": 0.42, "grad_norm": 0.51171875, "learning_rate": 0.00014493579900729065, "loss": 1.4493, "step": 8050 }, { "epoch": 0.42, "grad_norm": 0.49609375, "learning_rate": 0.00014485510960255638, "loss": 1.4199, "step": 8055 }, { "epoch": 0.42, "grad_norm": 0.515625, "learning_rate": 0.0001447743836212686, "loss": 1.4594, "step": 8060 }, { "epoch": 0.42, "grad_norm": 0.5546875, "learning_rate": 0.00014469362112925436, "loss": 1.4306, "step": 8065 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.0001446128221923704, "loss": 1.4349, "step": 8070 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014453198687650336, "loss": 1.459, "step": 8075 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.0001444511152475693, "loss": 1.4249, "step": 8080 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014437020737151403, "loss": 1.4304, "step": 8085 }, { "epoch": 0.42, "grad_norm": 0.53125, "learning_rate": 0.00014428926331431293, "loss": 1.428, "step": 8090 }, { "epoch": 0.42, "grad_norm": 0.51171875, "learning_rate": 0.00014420828314197078, "loss": 1.4365, "step": 8095 }, { "epoch": 0.42, "grad_norm": 0.50390625, "learning_rate": 0.00014412726692052195, "loss": 1.4066, "step": 8100 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014404621471603005, "loss": 1.4333, "step": 8105 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014396512659458824, "loss": 1.4428, "step": 8110 }, { "epoch": 0.42, "grad_norm": 0.54296875, "learning_rate": 0.0001438840026223187, "loss": 1.4507, "step": 8115 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014380284286537307, "loss": 1.4596, "step": 8120 }, { "epoch": 0.42, "grad_norm": 0.51953125, "learning_rate": 0.00014372164738993206, "loss": 1.4695, "step": 8125 }, { "epoch": 0.42, "grad_norm": 0.484375, "learning_rate": 0.00014364041626220556, "loss": 1.4101, "step": 8130 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014355914954843247, "loss": 1.424, "step": 8135 }, { "epoch": 0.42, "grad_norm": 0.5625, "learning_rate": 0.00014347784731488078, "loss": 1.448, "step": 8140 }, { "epoch": 0.42, "grad_norm": 0.51171875, "learning_rate": 0.00014339650962784736, "loss": 1.4513, "step": 8145 }, { "epoch": 0.42, "grad_norm": 0.515625, "learning_rate": 0.00014331513655365806, "loss": 1.4363, "step": 8150 }, { "epoch": 0.42, "grad_norm": 0.5078125, "learning_rate": 0.00014323372815866757, "loss": 1.44, "step": 8155 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014315228450925943, "loss": 1.469, "step": 8160 }, { "epoch": 0.42, "grad_norm": 0.515625, "learning_rate": 0.00014307080567184575, "loss": 1.4107, "step": 8165 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.00014298929171286753, "loss": 1.4158, "step": 8170 }, { "epoch": 0.42, "grad_norm": 0.53125, "learning_rate": 0.00014290774269879434, "loss": 1.417, "step": 8175 }, { "epoch": 0.42, "grad_norm": 0.53515625, "learning_rate": 0.00014282615869612433, "loss": 1.45, "step": 8180 }, { "epoch": 0.42, "grad_norm": 0.5, "learning_rate": 0.00014274453977138415, "loss": 1.4191, "step": 8185 }, { "epoch": 0.42, "grad_norm": 0.5234375, "learning_rate": 0.000142662885991129, "loss": 1.446, "step": 8190 }, { "epoch": 0.42, "grad_norm": 0.50390625, "learning_rate": 0.00014258119742194242, "loss": 1.4128, "step": 8195 }, { "epoch": 0.42, "grad_norm": 0.5390625, "learning_rate": 0.00014249947413043642, "loss": 1.4124, "step": 8200 }, { "epoch": 0.42, "grad_norm": 0.52734375, "learning_rate": 0.00014241771618325123, "loss": 1.4235, "step": 8205 }, { "epoch": 0.42, "grad_norm": 0.55859375, "learning_rate": 0.00014233592364705535, "loss": 1.4351, "step": 8210 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.0001422540965885455, "loss": 1.4278, "step": 8215 }, { "epoch": 0.43, "grad_norm": 0.52734375, "learning_rate": 0.00014217223507444662, "loss": 1.4011, "step": 8220 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014209033917151167, "loss": 1.4438, "step": 8225 }, { "epoch": 0.43, "grad_norm": 0.5390625, "learning_rate": 0.00014200840894652167, "loss": 1.4237, "step": 8230 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014192644446628556, "loss": 1.3854, "step": 8235 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 0.00014184444579764036, "loss": 1.4199, "step": 8240 }, { "epoch": 0.43, "grad_norm": 0.52734375, "learning_rate": 0.0001417624130074508, "loss": 1.4375, "step": 8245 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014168034616260963, "loss": 1.4375, "step": 8250 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 0.00014159824533003718, "loss": 1.4656, "step": 8255 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.0001415161105766816, "loss": 1.4362, "step": 8260 }, { "epoch": 0.43, "grad_norm": 0.51171875, "learning_rate": 0.0001414339419695187, "loss": 1.4265, "step": 8265 }, { "epoch": 0.43, "grad_norm": 0.53515625, "learning_rate": 0.00014135173957555182, "loss": 1.4623, "step": 8270 }, { "epoch": 0.43, "grad_norm": 0.5, "learning_rate": 0.00014126950346181195, "loss": 1.4236, "step": 8275 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014118723369535747, "loss": 1.4621, "step": 8280 }, { "epoch": 0.43, "grad_norm": 0.53515625, "learning_rate": 0.0001411049303432743, "loss": 1.4385, "step": 8285 }, { "epoch": 0.43, "grad_norm": 0.5703125, "learning_rate": 0.00014102259347267574, "loss": 1.4639, "step": 8290 }, { "epoch": 0.43, "grad_norm": 0.55859375, "learning_rate": 0.00014094022315070236, "loss": 1.4278, "step": 8295 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.00014085781944452201, "loss": 1.426, "step": 8300 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.0001407753824213298, "loss": 1.4421, "step": 8305 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 0.00014069291214834802, "loss": 1.4385, "step": 8310 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 0.00014061040869282608, "loss": 1.4284, "step": 8315 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 0.00014052787212204032, "loss": 1.4321, "step": 8320 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00014044530250329425, "loss": 1.416, "step": 8325 }, { "epoch": 0.43, "grad_norm": 0.50390625, "learning_rate": 0.0001403626999039183, "loss": 1.4127, "step": 8330 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.00014028006439126967, "loss": 1.4603, "step": 8335 }, { "epoch": 0.43, "grad_norm": 0.53125, "learning_rate": 0.00014019739603273251, "loss": 1.4149, "step": 8340 }, { "epoch": 0.43, "grad_norm": 0.5546875, "learning_rate": 0.00014011469489571776, "loss": 1.4229, "step": 8345 }, { "epoch": 0.43, "grad_norm": 0.50390625, "learning_rate": 0.00014003196104766304, "loss": 1.4199, "step": 8350 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 0.00013994919455603263, "loss": 1.4012, "step": 8355 }, { "epoch": 0.43, "grad_norm": 0.546875, "learning_rate": 0.00013986639548831752, "loss": 1.4263, "step": 8360 }, { "epoch": 0.43, "grad_norm": 0.5234375, "learning_rate": 0.00013978356391203514, "loss": 1.4384, "step": 8365 }, { "epoch": 0.43, "grad_norm": 0.50390625, "learning_rate": 0.0001397006998947295, "loss": 1.4276, "step": 8370 }, { "epoch": 0.43, "grad_norm": 0.53125, "learning_rate": 0.00013961780350397112, "loss": 1.4205, "step": 8375 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 0.00013953487480735679, "loss": 1.4579, "step": 8380 }, { "epoch": 0.43, "grad_norm": 0.51171875, "learning_rate": 0.00013945191387250972, "loss": 1.4331, "step": 8385 }, { "epoch": 0.43, "grad_norm": 0.52734375, "learning_rate": 0.00013936892076707937, "loss": 1.4885, "step": 8390 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 0.0001392858955587415, "loss": 1.397, "step": 8395 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 0.00013920283831519802, "loss": 1.4128, "step": 8400 }, { "epoch": 0.43, "grad_norm": 0.5, "learning_rate": 0.0001391197491041769, "loss": 1.4026, "step": 8405 }, { "epoch": 0.44, "grad_norm": 0.5703125, "learning_rate": 0.00013903662799343226, "loss": 1.4581, "step": 8410 }, { "epoch": 0.44, "grad_norm": 0.55078125, "learning_rate": 0.00013895347505074417, "loss": 1.4506, "step": 8415 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.0001388702903439187, "loss": 1.4552, "step": 8420 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.00013878707394078782, "loss": 1.433, "step": 8425 }, { "epoch": 0.44, "grad_norm": 0.5078125, "learning_rate": 0.00013870382590920933, "loss": 1.4658, "step": 8430 }, { "epoch": 0.44, "grad_norm": 1.25, "learning_rate": 0.0001386205463170668, "loss": 1.4267, "step": 8435 }, { "epoch": 0.44, "grad_norm": 0.52734375, "learning_rate": 0.00013853723523226955, "loss": 1.4343, "step": 8440 }, { "epoch": 0.44, "grad_norm": 0.546875, "learning_rate": 0.00013845389272275268, "loss": 1.4299, "step": 8445 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.0001383705188564767, "loss": 1.4275, "step": 8450 }, { "epoch": 0.44, "grad_norm": 0.53515625, "learning_rate": 0.00013828711370142792, "loss": 1.4335, "step": 8455 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.00013820367732561803, "loss": 1.4597, "step": 8460 }, { "epoch": 0.44, "grad_norm": 0.51171875, "learning_rate": 0.00013812020979708418, "loss": 1.422, "step": 8465 }, { "epoch": 0.44, "grad_norm": 0.494140625, "learning_rate": 0.00013803671118388895, "loss": 1.4521, "step": 8470 }, { "epoch": 0.44, "grad_norm": 0.5078125, "learning_rate": 0.0001379531815541203, "loss": 1.4216, "step": 8475 }, { "epoch": 0.44, "grad_norm": 0.51953125, "learning_rate": 0.00013786962097589144, "loss": 1.4213, "step": 8480 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.0001377860295173408, "loss": 1.4642, "step": 8485 }, { "epoch": 0.44, "grad_norm": 0.484375, "learning_rate": 0.00013770240724663208, "loss": 1.397, "step": 8490 }, { "epoch": 0.44, "grad_norm": 0.52734375, "learning_rate": 0.00013761875423195396, "loss": 1.4256, "step": 8495 }, { "epoch": 0.44, "grad_norm": 0.51171875, "learning_rate": 0.00013753507054152034, "loss": 1.4535, "step": 8500 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013745135624357007, "loss": 1.445, "step": 8505 }, { "epoch": 0.44, "grad_norm": 0.5078125, "learning_rate": 0.0001373676114063669, "loss": 1.4233, "step": 8510 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.00013728383609819958, "loss": 1.4573, "step": 8515 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.00013720003038738163, "loss": 1.4007, "step": 8520 }, { "epoch": 0.44, "grad_norm": 0.494140625, "learning_rate": 0.00013711619434225145, "loss": 1.4374, "step": 8525 }, { "epoch": 0.44, "grad_norm": 0.51953125, "learning_rate": 0.0001370323280311721, "loss": 1.4193, "step": 8530 }, { "epoch": 0.44, "grad_norm": 0.494140625, "learning_rate": 0.00013694843152253132, "loss": 1.4188, "step": 8535 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.00013686450488474154, "loss": 1.4665, "step": 8540 }, { "epoch": 0.44, "grad_norm": 0.53515625, "learning_rate": 0.00013678054818623965, "loss": 1.4242, "step": 8545 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.00013669656149548718, "loss": 1.4485, "step": 8550 }, { "epoch": 0.44, "grad_norm": 0.5390625, "learning_rate": 0.00013661254488097003, "loss": 1.4056, "step": 8555 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.00013652849841119856, "loss": 1.4034, "step": 8560 }, { "epoch": 0.44, "grad_norm": 0.53515625, "learning_rate": 0.00013644442215470737, "loss": 1.4411, "step": 8565 }, { "epoch": 0.44, "grad_norm": 0.5234375, "learning_rate": 0.00013636031618005553, "loss": 1.4342, "step": 8570 }, { "epoch": 0.44, "grad_norm": 0.53125, "learning_rate": 0.0001362761805558261, "loss": 1.4277, "step": 8575 }, { "epoch": 0.44, "grad_norm": 0.51171875, "learning_rate": 0.00013619201535062657, "loss": 1.4415, "step": 8580 }, { "epoch": 0.44, "grad_norm": 0.5546875, "learning_rate": 0.00013610782063308837, "loss": 1.4548, "step": 8585 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.00013602359647186708, "loss": 1.4578, "step": 8590 }, { "epoch": 0.44, "grad_norm": 0.515625, "learning_rate": 0.00013593934293564222, "loss": 1.4366, "step": 8595 }, { "epoch": 0.44, "grad_norm": 0.5078125, "learning_rate": 0.00013585506009311738, "loss": 1.4126, "step": 8600 }, { "epoch": 0.45, "grad_norm": 0.515625, "learning_rate": 0.00013577074801301992, "loss": 1.4599, "step": 8605 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.00013568640676410115, "loss": 1.3858, "step": 8610 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013560203641513606, "loss": 1.4707, "step": 8615 }, { "epoch": 0.45, "grad_norm": 0.5, "learning_rate": 0.0001355176370349235, "loss": 1.4366, "step": 8620 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013543320869228585, "loss": 1.4617, "step": 8625 }, { "epoch": 0.45, "grad_norm": 0.5, "learning_rate": 0.00013534875145606925, "loss": 1.3993, "step": 8630 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.00013526426539514324, "loss": 1.4384, "step": 8635 }, { "epoch": 0.45, "grad_norm": 0.5, "learning_rate": 0.00013517975057840097, "loss": 1.4434, "step": 8640 }, { "epoch": 0.45, "grad_norm": 0.4921875, "learning_rate": 0.00013509520707475907, "loss": 1.4362, "step": 8645 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013501063495315743, "loss": 1.4159, "step": 8650 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.0001349260342825595, "loss": 1.3869, "step": 8655 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.00013484140513195166, "loss": 1.3795, "step": 8660 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.0001347567475703439, "loss": 1.3984, "step": 8665 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013467206166676914, "loss": 1.4237, "step": 8670 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.0001345873474902835, "loss": 1.4428, "step": 8675 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.000134502605109966, "loss": 1.4228, "step": 8680 }, { "epoch": 0.45, "grad_norm": 0.55859375, "learning_rate": 0.00013441783459491893, "loss": 1.4034, "step": 8685 }, { "epoch": 0.45, "grad_norm": 0.5703125, "learning_rate": 0.00013433303601426727, "loss": 1.4506, "step": 8690 }, { "epoch": 0.45, "grad_norm": 0.50390625, "learning_rate": 0.0001342482094371591, "loss": 1.4052, "step": 8695 }, { "epoch": 0.45, "grad_norm": 0.54296875, "learning_rate": 0.00013416335493276511, "loss": 1.4684, "step": 8700 }, { "epoch": 0.45, "grad_norm": 0.5390625, "learning_rate": 0.00013407847257027896, "loss": 1.3952, "step": 8705 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013399356241891686, "loss": 1.4229, "step": 8710 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013390862454791785, "loss": 1.4695, "step": 8715 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.00013382365902654336, "loss": 1.4372, "step": 8720 }, { "epoch": 0.45, "grad_norm": 0.49609375, "learning_rate": 0.00013373866592407765, "loss": 1.4514, "step": 8725 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.00013365364530982716, "loss": 1.4135, "step": 8730 }, { "epoch": 0.45, "grad_norm": 0.5234375, "learning_rate": 0.00013356859725312104, "loss": 1.4251, "step": 8735 }, { "epoch": 0.45, "grad_norm": 0.5078125, "learning_rate": 0.0001334835218233106, "loss": 1.4028, "step": 8740 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013339841908976963, "loss": 1.3512, "step": 8745 }, { "epoch": 0.45, "grad_norm": 0.55078125, "learning_rate": 0.00013331328912189407, "loss": 1.4351, "step": 8750 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.00013322813198910212, "loss": 1.4621, "step": 8755 }, { "epoch": 0.45, "grad_norm": 0.51953125, "learning_rate": 0.0001331429477608342, "loss": 1.4065, "step": 8760 }, { "epoch": 0.45, "grad_norm": 0.52734375, "learning_rate": 0.00013305773650655267, "loss": 1.4684, "step": 8765 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013297249829574202, "loss": 1.4672, "step": 8770 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013288723319790875, "loss": 1.4093, "step": 8775 }, { "epoch": 0.45, "grad_norm": 0.498046875, "learning_rate": 0.00013280194128258122, "loss": 1.4403, "step": 8780 }, { "epoch": 0.45, "grad_norm": 0.53125, "learning_rate": 0.00013271662261930971, "loss": 1.4036, "step": 8785 }, { "epoch": 0.45, "grad_norm": 0.53515625, "learning_rate": 0.00013263127727766624, "loss": 1.4239, "step": 8790 }, { "epoch": 0.46, "grad_norm": 0.51171875, "learning_rate": 0.00013254590532724468, "loss": 1.4219, "step": 8795 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.00013246050683766048, "loss": 1.4247, "step": 8800 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00013237508187855093, "loss": 1.4521, "step": 8805 }, { "epoch": 0.46, "grad_norm": 0.51953125, "learning_rate": 0.0001322896305195746, "loss": 1.4264, "step": 8810 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00013220415283041195, "loss": 1.4408, "step": 8815 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013211864888076457, "loss": 1.4116, "step": 8820 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00013203311874035567, "loss": 1.4312, "step": 8825 }, { "epoch": 0.46, "grad_norm": 0.53125, "learning_rate": 0.00013194756247892977, "loss": 1.4617, "step": 8830 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.00013186198016625268, "loss": 1.4574, "step": 8835 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.00013177637187211143, "loss": 1.433, "step": 8840 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.00013169073766631427, "loss": 1.4582, "step": 8845 }, { "epoch": 0.46, "grad_norm": 0.53125, "learning_rate": 0.00013160507761869063, "loss": 1.4727, "step": 8850 }, { "epoch": 0.46, "grad_norm": 0.51953125, "learning_rate": 0.00013151939179909086, "loss": 1.4673, "step": 8855 }, { "epoch": 0.46, "grad_norm": 0.53515625, "learning_rate": 0.0001314336802773865, "loss": 1.4465, "step": 8860 }, { "epoch": 0.46, "grad_norm": 0.49609375, "learning_rate": 0.00013134794312346992, "loss": 1.4471, "step": 8865 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.00013126218040725447, "loss": 1.4217, "step": 8870 }, { "epoch": 0.46, "grad_norm": 0.51953125, "learning_rate": 0.00013117639219867427, "loss": 1.4203, "step": 8875 }, { "epoch": 0.46, "grad_norm": 0.53125, "learning_rate": 0.00013109057856768434, "loss": 1.4375, "step": 8880 }, { "epoch": 0.46, "grad_norm": 0.546875, "learning_rate": 0.00013100473958426028, "loss": 1.4119, "step": 8885 }, { "epoch": 0.46, "grad_norm": 0.55078125, "learning_rate": 0.00013091887531839852, "loss": 1.4325, "step": 8890 }, { "epoch": 0.46, "grad_norm": 0.50390625, "learning_rate": 0.00013083298584011597, "loss": 1.3835, "step": 8895 }, { "epoch": 0.46, "grad_norm": 0.5625, "learning_rate": 0.0001307470712194502, "loss": 1.4462, "step": 8900 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.0001306611315264592, "loss": 1.4318, "step": 8905 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013057516683122152, "loss": 1.431, "step": 8910 }, { "epoch": 0.46, "grad_norm": 0.54296875, "learning_rate": 0.00013048917720383593, "loss": 1.435, "step": 8915 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.00013040316271442173, "loss": 1.4275, "step": 8920 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.0001303171234331183, "loss": 1.422, "step": 8925 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00013023105943008539, "loss": 1.4462, "step": 8930 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.0001301449707755028, "loss": 1.4093, "step": 8935 }, { "epoch": 0.46, "grad_norm": 0.5546875, "learning_rate": 0.00013005885753957048, "loss": 1.4153, "step": 8940 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00012997271979250843, "loss": 1.4319, "step": 8945 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 0.00012988655760455667, "loss": 1.4326, "step": 8950 }, { "epoch": 0.46, "grad_norm": 0.53125, "learning_rate": 0.000129800371045975, "loss": 1.3916, "step": 8955 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00012971416018704333, "loss": 1.4428, "step": 8960 }, { "epoch": 0.46, "grad_norm": 0.515625, "learning_rate": 0.00012962792509806117, "loss": 1.4368, "step": 8965 }, { "epoch": 0.46, "grad_norm": 0.5078125, "learning_rate": 0.0001295416658493479, "loss": 1.4149, "step": 8970 }, { "epoch": 0.46, "grad_norm": 0.5703125, "learning_rate": 0.0001294553825112426, "loss": 1.4172, "step": 8975 }, { "epoch": 0.46, "grad_norm": 0.52734375, "learning_rate": 0.00012936907515410392, "loss": 1.4337, "step": 8980 }, { "epoch": 0.46, "grad_norm": 0.5390625, "learning_rate": 0.00012928274384831014, "loss": 1.3825, "step": 8985 }, { "epoch": 0.47, "grad_norm": 0.5, "learning_rate": 0.00012919638866425913, "loss": 1.4085, "step": 8990 }, { "epoch": 0.47, "grad_norm": 0.51953125, "learning_rate": 0.00012911000967236815, "loss": 1.4035, "step": 8995 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.00012902360694307387, "loss": 1.4052, "step": 9000 }, { "epoch": 0.47, "grad_norm": 0.515625, "learning_rate": 0.00012893718054683242, "loss": 1.4425, "step": 9005 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.00012885073055411903, "loss": 1.4268, "step": 9010 }, { "epoch": 0.47, "grad_norm": 0.53515625, "learning_rate": 0.00012876425703542844, "loss": 1.4251, "step": 9015 }, { "epoch": 0.47, "grad_norm": 0.53515625, "learning_rate": 0.00012867776006127428, "loss": 1.4199, "step": 9020 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012859123970218958, "loss": 1.4357, "step": 9025 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.00012850469602872623, "loss": 1.4119, "step": 9030 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.0001284181291114553, "loss": 1.3873, "step": 9035 }, { "epoch": 0.47, "grad_norm": 0.52734375, "learning_rate": 0.00012833153902096664, "loss": 1.3895, "step": 9040 }, { "epoch": 0.47, "grad_norm": 0.53125, "learning_rate": 0.00012824492582786916, "loss": 1.4456, "step": 9045 }, { "epoch": 0.47, "grad_norm": 0.515625, "learning_rate": 0.00012815828960279047, "loss": 1.41, "step": 9050 }, { "epoch": 0.47, "grad_norm": 0.52734375, "learning_rate": 0.00012807163041637706, "loss": 1.4373, "step": 9055 }, { "epoch": 0.47, "grad_norm": 0.52734375, "learning_rate": 0.0001279849483392941, "loss": 1.3842, "step": 9060 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.00012789824344222546, "loss": 1.4345, "step": 9065 }, { "epoch": 0.47, "grad_norm": 0.5078125, "learning_rate": 0.00012781151579587357, "loss": 1.4377, "step": 9070 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.00012772476547095944, "loss": 1.4504, "step": 9075 }, { "epoch": 0.47, "grad_norm": 0.5078125, "learning_rate": 0.00012763799253822256, "loss": 1.4384, "step": 9080 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.00012755119706842088, "loss": 1.4835, "step": 9085 }, { "epoch": 0.47, "grad_norm": 0.494140625, "learning_rate": 0.00012746437913233066, "loss": 1.4315, "step": 9090 }, { "epoch": 0.47, "grad_norm": 0.5078125, "learning_rate": 0.0001273775388007466, "loss": 1.4752, "step": 9095 }, { "epoch": 0.47, "grad_norm": 0.5625, "learning_rate": 0.00012729067614448156, "loss": 1.4255, "step": 9100 }, { "epoch": 0.47, "grad_norm": 0.50390625, "learning_rate": 0.00012720379123436665, "loss": 1.422, "step": 9105 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012711688414125108, "loss": 1.44, "step": 9110 }, { "epoch": 0.47, "grad_norm": 0.515625, "learning_rate": 0.0001270299549360022, "loss": 1.4008, "step": 9115 }, { "epoch": 0.47, "grad_norm": 0.5234375, "learning_rate": 0.0001269430036895054, "loss": 1.4218, "step": 9120 }, { "epoch": 0.47, "grad_norm": 0.5390625, "learning_rate": 0.00012685603047266398, "loss": 1.4274, "step": 9125 }, { "epoch": 0.47, "grad_norm": 0.51953125, "learning_rate": 0.0001267690353563992, "loss": 1.4504, "step": 9130 }, { "epoch": 0.47, "grad_norm": 0.53125, "learning_rate": 0.00012668201841165017, "loss": 1.4366, "step": 9135 }, { "epoch": 0.47, "grad_norm": 0.55078125, "learning_rate": 0.0001265949797093738, "loss": 1.4325, "step": 9140 }, { "epoch": 0.47, "grad_norm": 0.53125, "learning_rate": 0.00012650791932054473, "loss": 1.4445, "step": 9145 }, { "epoch": 0.47, "grad_norm": 0.498046875, "learning_rate": 0.00012642083731615532, "loss": 1.4038, "step": 9150 }, { "epoch": 0.47, "grad_norm": 0.515625, "learning_rate": 0.0001263337337672155, "loss": 1.4125, "step": 9155 }, { "epoch": 0.47, "grad_norm": 0.58203125, "learning_rate": 0.00012624660874475287, "loss": 1.442, "step": 9160 }, { "epoch": 0.47, "grad_norm": 0.51171875, "learning_rate": 0.00012615946231981238, "loss": 1.4092, "step": 9165 }, { "epoch": 0.47, "grad_norm": 0.5078125, "learning_rate": 0.00012607229456345658, "loss": 1.4424, "step": 9170 }, { "epoch": 0.47, "grad_norm": 0.546875, "learning_rate": 0.0001259851055467653, "loss": 1.4196, "step": 9175 }, { "epoch": 0.47, "grad_norm": 0.5625, "learning_rate": 0.00012589789534083582, "loss": 1.422, "step": 9180 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.0001258106640167826, "loss": 1.4399, "step": 9185 }, { "epoch": 0.48, "grad_norm": 0.478515625, "learning_rate": 0.0001257234116457374, "loss": 1.4069, "step": 9190 }, { "epoch": 0.48, "grad_norm": 0.51171875, "learning_rate": 0.0001256361382988491, "loss": 1.4012, "step": 9195 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.00012554884404728368, "loss": 1.3978, "step": 9200 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012546152896222417, "loss": 1.3993, "step": 9205 }, { "epoch": 0.48, "grad_norm": 0.52734375, "learning_rate": 0.00012537419311487057, "loss": 1.419, "step": 9210 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012528683657643988, "loss": 1.4224, "step": 9215 }, { "epoch": 0.48, "grad_norm": 0.56640625, "learning_rate": 0.0001251994594181659, "loss": 1.4741, "step": 9220 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.00012511206171129927, "loss": 1.4216, "step": 9225 }, { "epoch": 0.48, "grad_norm": 0.55078125, "learning_rate": 0.00012502464352710742, "loss": 1.4371, "step": 9230 }, { "epoch": 0.48, "grad_norm": 0.5, "learning_rate": 0.0001249372049368744, "loss": 1.4272, "step": 9235 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012484974601190097, "loss": 1.4451, "step": 9240 }, { "epoch": 0.48, "grad_norm": 0.5078125, "learning_rate": 0.00012476226682350442, "loss": 1.4301, "step": 9245 }, { "epoch": 0.48, "grad_norm": 0.515625, "learning_rate": 0.00012467476744301866, "loss": 1.4413, "step": 9250 }, { "epoch": 0.48, "grad_norm": 0.52734375, "learning_rate": 0.00012458724794179392, "loss": 1.3858, "step": 9255 }, { "epoch": 0.48, "grad_norm": 0.5390625, "learning_rate": 0.00012449970839119697, "loss": 1.4114, "step": 9260 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.00012441214886261076, "loss": 1.4585, "step": 9265 }, { "epoch": 0.48, "grad_norm": 0.5234375, "learning_rate": 0.00012432456942743477, "loss": 1.4263, "step": 9270 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012423697015708456, "loss": 1.4141, "step": 9275 }, { "epoch": 0.48, "grad_norm": 0.53515625, "learning_rate": 0.0001241493511229918, "loss": 1.4286, "step": 9280 }, { "epoch": 0.48, "grad_norm": 0.5078125, "learning_rate": 0.0001240617123966045, "loss": 1.404, "step": 9285 }, { "epoch": 0.48, "grad_norm": 0.5546875, "learning_rate": 0.00012397405404938652, "loss": 1.4444, "step": 9290 }, { "epoch": 0.48, "grad_norm": 0.51171875, "learning_rate": 0.00012388637615281777, "loss": 1.3767, "step": 9295 }, { "epoch": 0.48, "grad_norm": 0.51953125, "learning_rate": 0.00012379867877839414, "loss": 1.4326, "step": 9300 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.00012371096199762747, "loss": 1.4363, "step": 9305 }, { "epoch": 0.48, "grad_norm": 0.5078125, "learning_rate": 0.0001236232258820452, "loss": 1.3958, "step": 9310 }, { "epoch": 0.48, "grad_norm": 0.52734375, "learning_rate": 0.0001235354705031908, "loss": 1.4464, "step": 9315 }, { "epoch": 0.48, "grad_norm": 0.52734375, "learning_rate": 0.00012344769593262324, "loss": 1.3948, "step": 9320 }, { "epoch": 0.48, "grad_norm": 0.546875, "learning_rate": 0.0001233599022419173, "loss": 1.4061, "step": 9325 }, { "epoch": 0.48, "grad_norm": 0.53125, "learning_rate": 0.0001232720895026632, "loss": 1.4299, "step": 9330 }, { "epoch": 0.48, "grad_norm": 0.51171875, "learning_rate": 0.00012318425778646685, "loss": 1.4029, "step": 9335 }, { "epoch": 0.48, "grad_norm": 0.50390625, "learning_rate": 0.0001230964071649495, "loss": 1.37, "step": 9340 }, { "epoch": 0.48, "grad_norm": 0.51171875, "learning_rate": 0.00012300853770974787, "loss": 1.3944, "step": 9345 }, { "epoch": 0.48, "grad_norm": 0.5078125, "learning_rate": 0.00012292064949251405, "loss": 1.3961, "step": 9350 }, { "epoch": 0.48, "grad_norm": 0.54296875, "learning_rate": 0.00012283274258491543, "loss": 1.3656, "step": 9355 }, { "epoch": 0.48, "grad_norm": 0.53515625, "learning_rate": 0.00012274481705863463, "loss": 1.4532, "step": 9360 }, { "epoch": 0.48, "grad_norm": 0.50390625, "learning_rate": 0.00012265687298536942, "loss": 1.4187, "step": 9365 }, { "epoch": 0.48, "grad_norm": 0.50390625, "learning_rate": 0.00012256891043683276, "loss": 1.4343, "step": 9370 }, { "epoch": 0.49, "grad_norm": 0.515625, "learning_rate": 0.00012248092948475263, "loss": 1.4114, "step": 9375 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.000122392930200872, "loss": 1.3942, "step": 9380 }, { "epoch": 0.49, "grad_norm": 0.52734375, "learning_rate": 0.00012230491265694888, "loss": 1.4235, "step": 9385 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.000122216876924756, "loss": 1.4181, "step": 9390 }, { "epoch": 0.49, "grad_norm": 0.5078125, "learning_rate": 0.00012212882307608116, "loss": 1.4147, "step": 9395 }, { "epoch": 0.49, "grad_norm": 0.53515625, "learning_rate": 0.00012204075118272669, "loss": 1.3878, "step": 9400 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.0001219526613165098, "loss": 1.4422, "step": 9405 }, { "epoch": 0.49, "grad_norm": 0.52734375, "learning_rate": 0.00012186455354926228, "loss": 1.451, "step": 9410 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012177642795283053, "loss": 1.4068, "step": 9415 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00012168828459907551, "loss": 1.4096, "step": 9420 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012160012355987265, "loss": 1.4282, "step": 9425 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00012151194490711178, "loss": 1.4695, "step": 9430 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.00012142374871269713, "loss": 1.4556, "step": 9435 }, { "epoch": 0.49, "grad_norm": 0.50390625, "learning_rate": 0.00012133553504854718, "loss": 1.3891, "step": 9440 }, { "epoch": 0.49, "grad_norm": 0.5, "learning_rate": 0.00012124730398659474, "loss": 1.4015, "step": 9445 }, { "epoch": 0.49, "grad_norm": 0.51953125, "learning_rate": 0.0001211590555987867, "loss": 1.4103, "step": 9450 }, { "epoch": 0.49, "grad_norm": 0.53125, "learning_rate": 0.00012107078995708417, "loss": 1.4507, "step": 9455 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012098250713346231, "loss": 1.4796, "step": 9460 }, { "epoch": 0.49, "grad_norm": 0.55078125, "learning_rate": 0.00012089420719991022, "loss": 1.4649, "step": 9465 }, { "epoch": 0.49, "grad_norm": 0.515625, "learning_rate": 0.00012080589022843107, "loss": 1.4003, "step": 9470 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.0001207175562910418, "loss": 1.4453, "step": 9475 }, { "epoch": 0.49, "grad_norm": 0.515625, "learning_rate": 0.00012062920545977327, "loss": 1.4391, "step": 9480 }, { "epoch": 0.49, "grad_norm": 0.515625, "learning_rate": 0.00012054083780667012, "loss": 1.4181, "step": 9485 }, { "epoch": 0.49, "grad_norm": 0.51171875, "learning_rate": 0.00012045245340379063, "loss": 1.4388, "step": 9490 }, { "epoch": 0.49, "grad_norm": 0.5078125, "learning_rate": 0.0001203640523232068, "loss": 1.44, "step": 9495 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012027563463700427, "loss": 1.4697, "step": 9500 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00012018720041728206, "loss": 1.4354, "step": 9505 }, { "epoch": 0.49, "grad_norm": 0.51171875, "learning_rate": 0.00012009874973615287, "loss": 1.3918, "step": 9510 }, { "epoch": 0.49, "grad_norm": 0.51171875, "learning_rate": 0.00012001028266574268, "loss": 1.4441, "step": 9515 }, { "epoch": 0.49, "grad_norm": 0.5078125, "learning_rate": 0.00011992179927819093, "loss": 1.4441, "step": 9520 }, { "epoch": 0.49, "grad_norm": 0.5390625, "learning_rate": 0.00011983329964565028, "loss": 1.378, "step": 9525 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00011974478384028672, "loss": 1.4261, "step": 9530 }, { "epoch": 0.49, "grad_norm": 0.54296875, "learning_rate": 0.00011965625193427934, "loss": 1.4045, "step": 9535 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00011956770399982045, "loss": 1.4679, "step": 9540 }, { "epoch": 0.49, "grad_norm": 0.5078125, "learning_rate": 0.00011947914010911534, "loss": 1.4444, "step": 9545 }, { "epoch": 0.49, "grad_norm": 0.52734375, "learning_rate": 0.0001193905603343824, "loss": 1.434, "step": 9550 }, { "epoch": 0.49, "grad_norm": 0.50390625, "learning_rate": 0.00011930196474785294, "loss": 1.4215, "step": 9555 }, { "epoch": 0.49, "grad_norm": 0.5234375, "learning_rate": 0.00011921335342177111, "loss": 1.4442, "step": 9560 }, { "epoch": 0.49, "grad_norm": 0.515625, "learning_rate": 0.00011912472642839394, "loss": 1.4107, "step": 9565 }, { "epoch": 0.5, "grad_norm": 0.515625, "learning_rate": 0.00011903608383999125, "loss": 1.3996, "step": 9570 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011894742572884554, "loss": 1.3999, "step": 9575 }, { "epoch": 0.5, "grad_norm": 0.51953125, "learning_rate": 0.00011885875216725205, "loss": 1.4767, "step": 9580 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011877006322751847, "loss": 1.4236, "step": 9585 }, { "epoch": 0.5, "grad_norm": 0.51171875, "learning_rate": 0.00011868135898196519, "loss": 1.429, "step": 9590 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011859263950292496, "loss": 1.4233, "step": 9595 }, { "epoch": 0.5, "grad_norm": 0.515625, "learning_rate": 0.00011850390486274303, "loss": 1.4237, "step": 9600 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.00011841515513377697, "loss": 1.4137, "step": 9605 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011832639038839666, "loss": 1.4343, "step": 9610 }, { "epoch": 0.5, "grad_norm": 0.484375, "learning_rate": 0.00011823761069898425, "loss": 1.4201, "step": 9615 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011814881613793404, "loss": 1.4431, "step": 9620 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.0001180600067776525, "loss": 1.4408, "step": 9625 }, { "epoch": 0.5, "grad_norm": 0.51171875, "learning_rate": 0.00011797118269055812, "loss": 1.4132, "step": 9630 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.0001178823439490814, "loss": 1.4418, "step": 9635 }, { "epoch": 0.5, "grad_norm": 0.5078125, "learning_rate": 0.00011779349062566485, "loss": 1.4081, "step": 9640 }, { "epoch": 0.5, "grad_norm": 0.54296875, "learning_rate": 0.00011770462279276282, "loss": 1.4312, "step": 9645 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.0001176157405228415, "loss": 1.4477, "step": 9650 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.0001175268438883789, "loss": 1.4527, "step": 9655 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.0001174379329618646, "loss": 1.3892, "step": 9660 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011734900781580003, "loss": 1.435, "step": 9665 }, { "epoch": 0.5, "grad_norm": 0.51953125, "learning_rate": 0.00011726006852269804, "loss": 1.4074, "step": 9670 }, { "epoch": 0.5, "grad_norm": 0.546875, "learning_rate": 0.00011717111515508319, "loss": 1.4093, "step": 9675 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011708214778549131, "loss": 1.4405, "step": 9680 }, { "epoch": 0.5, "grad_norm": 0.53515625, "learning_rate": 0.00011699316648646986, "loss": 1.4514, "step": 9685 }, { "epoch": 0.5, "grad_norm": 0.50390625, "learning_rate": 0.00011690417133057747, "loss": 1.4149, "step": 9690 }, { "epoch": 0.5, "grad_norm": 0.51171875, "learning_rate": 0.00011681516239038423, "loss": 1.4184, "step": 9695 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011672613973847136, "loss": 1.4329, "step": 9700 }, { "epoch": 0.5, "grad_norm": 0.53125, "learning_rate": 0.00011663710344743135, "loss": 1.4197, "step": 9705 }, { "epoch": 0.5, "grad_norm": 0.52734375, "learning_rate": 0.00011654805358986766, "loss": 1.425, "step": 9710 }, { "epoch": 0.5, "grad_norm": 0.54296875, "learning_rate": 0.00011645899023839499, "loss": 1.4101, "step": 9715 }, { "epoch": 0.5, "grad_norm": 0.5625, "learning_rate": 0.00011636991346563893, "loss": 1.4285, "step": 9720 }, { "epoch": 0.5, "grad_norm": 0.515625, "learning_rate": 0.00011628082334423608, "loss": 1.3963, "step": 9725 }, { "epoch": 0.5, "grad_norm": 0.55078125, "learning_rate": 0.00011619171994683389, "loss": 1.4059, "step": 9730 }, { "epoch": 0.5, "grad_norm": 0.55859375, "learning_rate": 0.00011610260334609063, "loss": 1.3875, "step": 9735 }, { "epoch": 0.5, "grad_norm": 0.51171875, "learning_rate": 0.00011601347361467534, "loss": 1.4455, "step": 9740 }, { "epoch": 0.5, "grad_norm": 0.57421875, "learning_rate": 0.00011592433082526781, "loss": 1.4541, "step": 9745 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 0.00011583517505055839, "loss": 1.4156, "step": 9750 }, { "epoch": 0.5, "grad_norm": 0.50390625, "learning_rate": 0.00011574600636324813, "loss": 1.3933, "step": 9755 }, { "epoch": 0.5, "grad_norm": 0.55078125, "learning_rate": 0.00011565682483604852, "loss": 1.3996, "step": 9760 }, { "epoch": 0.51, "grad_norm": 0.5078125, "learning_rate": 0.00011556763054168154, "loss": 1.4398, "step": 9765 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011547842355287961, "loss": 1.4131, "step": 9770 }, { "epoch": 0.51, "grad_norm": 0.51171875, "learning_rate": 0.00011538920394238551, "loss": 1.3948, "step": 9775 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011529997178295223, "loss": 1.4212, "step": 9780 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011521072714734309, "loss": 1.4149, "step": 9785 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011512147010833152, "loss": 1.4217, "step": 9790 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011503220073870111, "loss": 1.4138, "step": 9795 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011494291911124544, "loss": 1.4112, "step": 9800 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.0001148536252987682, "loss": 1.3989, "step": 9805 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011476431937408285, "loss": 1.4537, "step": 9810 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.0001146750014100129, "loss": 1.4093, "step": 9815 }, { "epoch": 0.51, "grad_norm": 0.53125, "learning_rate": 0.00011458567147939154, "loss": 1.4279, "step": 9820 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011449632965506183, "loss": 1.4378, "step": 9825 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011440697600987642, "loss": 1.435, "step": 9830 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011431761061669768, "loss": 1.4388, "step": 9835 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011422823354839753, "loss": 1.4265, "step": 9840 }, { "epoch": 0.51, "grad_norm": 0.5703125, "learning_rate": 0.00011413884487785742, "loss": 1.4337, "step": 9845 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011404944467796828, "loss": 1.4373, "step": 9850 }, { "epoch": 0.51, "grad_norm": 0.5234375, "learning_rate": 0.00011396003302163034, "loss": 1.4282, "step": 9855 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011387060998175329, "loss": 1.4358, "step": 9860 }, { "epoch": 0.51, "grad_norm": 0.546875, "learning_rate": 0.00011378117563125608, "loss": 1.4105, "step": 9865 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011369173004306683, "loss": 1.4332, "step": 9870 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011360227329012287, "loss": 1.443, "step": 9875 }, { "epoch": 0.51, "grad_norm": 0.55859375, "learning_rate": 0.00011351280544537064, "loss": 1.4231, "step": 9880 }, { "epoch": 0.51, "grad_norm": 0.51953125, "learning_rate": 0.00011342332658176555, "loss": 1.4377, "step": 9885 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011333383677227214, "loss": 1.4763, "step": 9890 }, { "epoch": 0.51, "grad_norm": 0.50390625, "learning_rate": 0.00011324433608986369, "loss": 1.3878, "step": 9895 }, { "epoch": 0.51, "grad_norm": 0.515625, "learning_rate": 0.00011315482460752252, "loss": 1.4342, "step": 9900 }, { "epoch": 0.51, "grad_norm": 0.51171875, "learning_rate": 0.0001130653023982396, "loss": 1.4205, "step": 9905 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011297576953501481, "loss": 1.4384, "step": 9910 }, { "epoch": 0.51, "grad_norm": 0.51171875, "learning_rate": 0.00011288622609085657, "loss": 1.4072, "step": 9915 }, { "epoch": 0.51, "grad_norm": 0.53515625, "learning_rate": 0.00011279667213878205, "loss": 1.417, "step": 9920 }, { "epoch": 0.51, "grad_norm": 0.52734375, "learning_rate": 0.00011270710775181687, "loss": 1.3894, "step": 9925 }, { "epoch": 0.51, "grad_norm": 0.54296875, "learning_rate": 0.00011261753300299529, "loss": 1.3755, "step": 9930 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011252794796535988, "loss": 1.3901, "step": 9935 }, { "epoch": 0.51, "grad_norm": 0.546875, "learning_rate": 0.0001124383527119617, "loss": 1.4307, "step": 9940 }, { "epoch": 0.51, "grad_norm": 0.5390625, "learning_rate": 0.00011234874731586012, "loss": 1.463, "step": 9945 }, { "epoch": 0.51, "grad_norm": 0.51953125, "learning_rate": 0.00011225913185012276, "loss": 1.3902, "step": 9950 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.00011216950638782545, "loss": 1.3977, "step": 9955 }, { "epoch": 0.52, "grad_norm": 0.51953125, "learning_rate": 0.00011207987100205219, "loss": 1.4353, "step": 9960 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00011199022576589506, "loss": 1.4362, "step": 9965 }, { "epoch": 0.52, "grad_norm": 0.515625, "learning_rate": 0.00011190057075245422, "loss": 1.4327, "step": 9970 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00011181090603483768, "loss": 1.4427, "step": 9975 }, { "epoch": 0.52, "grad_norm": 0.5859375, "learning_rate": 0.00011172123168616153, "loss": 1.4526, "step": 9980 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00011163154777954956, "loss": 1.3659, "step": 9985 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.00011154185438813345, "loss": 1.4265, "step": 9990 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00011145215158505258, "loss": 1.4005, "step": 9995 }, { "epoch": 0.52, "grad_norm": 0.5703125, "learning_rate": 0.00011136243944345402, "loss": 1.4192, "step": 10000 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00011127271803649243, "loss": 1.4063, "step": 10005 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011118298743733004, "loss": 1.4345, "step": 10010 }, { "epoch": 0.52, "grad_norm": 0.51953125, "learning_rate": 0.00011109324771913659, "loss": 1.4432, "step": 10015 }, { "epoch": 0.52, "grad_norm": 0.5703125, "learning_rate": 0.00011100349895508921, "loss": 1.4541, "step": 10020 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.0001109137412183725, "loss": 1.4266, "step": 10025 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.00011082397458217823, "loss": 1.4216, "step": 10030 }, { "epoch": 0.52, "grad_norm": 0.5390625, "learning_rate": 0.0001107341991197056, "loss": 1.4363, "step": 10035 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011064441490416083, "loss": 1.3989, "step": 10040 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00011055462200875743, "loss": 1.4238, "step": 10045 }, { "epoch": 0.52, "grad_norm": 0.53125, "learning_rate": 0.00011046482050671589, "loss": 1.3771, "step": 10050 }, { "epoch": 0.52, "grad_norm": 0.51171875, "learning_rate": 0.00011037501047126379, "loss": 1.4204, "step": 10055 }, { "epoch": 0.52, "grad_norm": 0.56640625, "learning_rate": 0.0001102851919756356, "loss": 1.4242, "step": 10060 }, { "epoch": 0.52, "grad_norm": 0.50390625, "learning_rate": 0.00011019536509307276, "loss": 1.443, "step": 10065 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00011010552989682343, "loss": 1.4455, "step": 10070 }, { "epoch": 0.52, "grad_norm": 0.52734375, "learning_rate": 0.00011001568646014269, "loss": 1.4177, "step": 10075 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00010992583485629227, "loss": 1.4191, "step": 10080 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.00010983597515854055, "loss": 1.394, "step": 10085 }, { "epoch": 0.52, "grad_norm": 0.53125, "learning_rate": 0.00010974610744016254, "loss": 1.4331, "step": 10090 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00010965623177443978, "loss": 1.4209, "step": 10095 }, { "epoch": 0.52, "grad_norm": 0.5234375, "learning_rate": 0.00010956634823466028, "loss": 1.4153, "step": 10100 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00010947645689411849, "loss": 1.4491, "step": 10105 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.00010938655782611517, "loss": 1.3941, "step": 10110 }, { "epoch": 0.52, "grad_norm": 0.55078125, "learning_rate": 0.0001092966511039575, "loss": 1.4261, "step": 10115 }, { "epoch": 0.52, "grad_norm": 0.5625, "learning_rate": 0.00010920673680095874, "loss": 1.4626, "step": 10120 }, { "epoch": 0.52, "grad_norm": 0.5078125, "learning_rate": 0.00010911681499043849, "loss": 1.4246, "step": 10125 }, { "epoch": 0.52, "grad_norm": 0.5, "learning_rate": 0.00010902688574572233, "loss": 1.4021, "step": 10130 }, { "epoch": 0.52, "grad_norm": 0.546875, "learning_rate": 0.00010893694914014201, "loss": 1.4188, "step": 10135 }, { "epoch": 0.52, "grad_norm": 0.53515625, "learning_rate": 0.0001088470052470352, "loss": 1.4214, "step": 10140 }, { "epoch": 0.52, "grad_norm": 0.54296875, "learning_rate": 0.00010875705413974561, "loss": 1.4519, "step": 10145 }, { "epoch": 0.53, "grad_norm": 0.50390625, "learning_rate": 0.00010866709589162276, "loss": 1.4177, "step": 10150 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010857713057602197, "loss": 1.4231, "step": 10155 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.0001084871582663044, "loss": 1.4382, "step": 10160 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010839717903583684, "loss": 1.415, "step": 10165 }, { "epoch": 0.53, "grad_norm": 0.5546875, "learning_rate": 0.00010830719295799181, "loss": 1.4295, "step": 10170 }, { "epoch": 0.53, "grad_norm": 0.498046875, "learning_rate": 0.00010821720010614733, "loss": 1.4191, "step": 10175 }, { "epoch": 0.53, "grad_norm": 0.53125, "learning_rate": 0.000108127200553687, "loss": 1.3981, "step": 10180 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.0001080371943739998, "loss": 1.4236, "step": 10185 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.00010794718164048026, "loss": 1.4121, "step": 10190 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.00010785716242652809, "loss": 1.4075, "step": 10195 }, { "epoch": 0.53, "grad_norm": 0.5390625, "learning_rate": 0.00010776713680554842, "loss": 1.4368, "step": 10200 }, { "epoch": 0.53, "grad_norm": 0.53125, "learning_rate": 0.00010767710485095151, "loss": 1.3704, "step": 10205 }, { "epoch": 0.53, "grad_norm": 0.53125, "learning_rate": 0.00010758706663615284, "loss": 1.4133, "step": 10210 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010749702223457299, "loss": 1.4003, "step": 10215 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.00010740697171963754, "loss": 1.4234, "step": 10220 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.0001073169151647771, "loss": 1.4117, "step": 10225 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010722685264342722, "loss": 1.4075, "step": 10230 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010713678422902825, "loss": 1.4053, "step": 10235 }, { "epoch": 0.53, "grad_norm": 0.53125, "learning_rate": 0.0001070467099950254, "loss": 1.4544, "step": 10240 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.0001069566300148686, "loss": 1.4289, "step": 10245 }, { "epoch": 0.53, "grad_norm": 0.515625, "learning_rate": 0.00010686654436201249, "loss": 1.4284, "step": 10250 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.00010677645310991628, "loss": 1.4181, "step": 10255 }, { "epoch": 0.53, "grad_norm": 0.55078125, "learning_rate": 0.00010668635633204384, "loss": 1.4545, "step": 10260 }, { "epoch": 0.53, "grad_norm": 0.546875, "learning_rate": 0.00010659625410186345, "loss": 1.4424, "step": 10265 }, { "epoch": 0.53, "grad_norm": 0.51953125, "learning_rate": 0.00010650614649284791, "loss": 1.4275, "step": 10270 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010641603357847434, "loss": 1.4008, "step": 10275 }, { "epoch": 0.53, "grad_norm": 0.5390625, "learning_rate": 0.00010632591543222426, "loss": 1.4401, "step": 10280 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.00010623579212758336, "loss": 1.3828, "step": 10285 }, { "epoch": 0.53, "grad_norm": 0.5234375, "learning_rate": 0.00010614566373804167, "loss": 1.4171, "step": 10290 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010605553033709321, "loss": 1.4277, "step": 10295 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.0001059653919982362, "loss": 1.4299, "step": 10300 }, { "epoch": 0.53, "grad_norm": 0.51953125, "learning_rate": 0.00010587524879497286, "loss": 1.4266, "step": 10305 }, { "epoch": 0.53, "grad_norm": 0.54296875, "learning_rate": 0.00010578510080080937, "loss": 1.418, "step": 10310 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 0.0001056949480892558, "loss": 1.4423, "step": 10315 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.00010560479073382605, "loss": 1.4533, "step": 10320 }, { "epoch": 0.53, "grad_norm": 0.52734375, "learning_rate": 0.00010551462880803793, "loss": 1.4045, "step": 10325 }, { "epoch": 0.53, "grad_norm": 0.53515625, "learning_rate": 0.0001054244623854128, "loss": 1.4342, "step": 10330 }, { "epoch": 0.53, "grad_norm": 0.51953125, "learning_rate": 0.00010533429153947582, "loss": 1.4028, "step": 10335 }, { "epoch": 0.53, "grad_norm": 0.5078125, "learning_rate": 0.00010524411634375566, "loss": 1.4093, "step": 10340 }, { "epoch": 0.54, "grad_norm": 0.546875, "learning_rate": 0.00010515393687178467, "loss": 1.4353, "step": 10345 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010506375319709852, "loss": 1.4377, "step": 10350 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010497356539323643, "loss": 1.4349, "step": 10355 }, { "epoch": 0.54, "grad_norm": 0.53125, "learning_rate": 0.00010488337353374093, "loss": 1.421, "step": 10360 }, { "epoch": 0.54, "grad_norm": 0.515625, "learning_rate": 0.00010479317769215793, "loss": 1.4066, "step": 10365 }, { "epoch": 0.54, "grad_norm": 0.51171875, "learning_rate": 0.00010470297794203643, "loss": 1.4299, "step": 10370 }, { "epoch": 0.54, "grad_norm": 0.5, "learning_rate": 0.00010461277435692882, "loss": 1.4323, "step": 10375 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010452256701039045, "loss": 1.4183, "step": 10380 }, { "epoch": 0.54, "grad_norm": 0.5625, "learning_rate": 0.00010443235597597985, "loss": 1.4495, "step": 10385 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010434214132725846, "loss": 1.4152, "step": 10390 }, { "epoch": 0.54, "grad_norm": 0.51171875, "learning_rate": 0.00010425192313779075, "loss": 1.4081, "step": 10395 }, { "epoch": 0.54, "grad_norm": 0.55078125, "learning_rate": 0.00010416170148114404, "loss": 1.4235, "step": 10400 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.0001040714764308885, "loss": 1.3903, "step": 10405 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010398124806059701, "loss": 1.4415, "step": 10410 }, { "epoch": 0.54, "grad_norm": 0.53515625, "learning_rate": 0.00010389101644384524, "loss": 1.4446, "step": 10415 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010380078165421144, "loss": 1.4458, "step": 10420 }, { "epoch": 0.54, "grad_norm": 0.5625, "learning_rate": 0.00010371054376527647, "loss": 1.4719, "step": 10425 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010362030285062369, "loss": 1.4487, "step": 10430 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010353005898383905, "loss": 1.4335, "step": 10435 }, { "epoch": 0.54, "grad_norm": 0.5234375, "learning_rate": 0.00010343981223851074, "loss": 1.4065, "step": 10440 }, { "epoch": 0.54, "grad_norm": 0.50390625, "learning_rate": 0.00010334956268822937, "loss": 1.4032, "step": 10445 }, { "epoch": 0.54, "grad_norm": 0.56640625, "learning_rate": 0.00010325931040658783, "loss": 1.4184, "step": 10450 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010316905546718128, "loss": 1.4337, "step": 10455 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010307879794360701, "loss": 1.3685, "step": 10460 }, { "epoch": 0.54, "grad_norm": 0.5859375, "learning_rate": 0.0001029885379094644, "loss": 1.4441, "step": 10465 }, { "epoch": 0.54, "grad_norm": 0.52734375, "learning_rate": 0.00010289827543835493, "loss": 1.4085, "step": 10470 }, { "epoch": 0.54, "grad_norm": 0.546875, "learning_rate": 0.00010280801060388199, "loss": 1.4465, "step": 10475 }, { "epoch": 0.54, "grad_norm": 0.53515625, "learning_rate": 0.00010271774347965097, "loss": 1.4511, "step": 10480 }, { "epoch": 0.54, "grad_norm": 0.53515625, "learning_rate": 0.00010262747413926907, "loss": 1.4433, "step": 10485 }, { "epoch": 0.54, "grad_norm": 0.515625, "learning_rate": 0.00010253720265634537, "loss": 1.4181, "step": 10490 }, { "epoch": 0.54, "grad_norm": 0.51953125, "learning_rate": 0.00010244692910449061, "loss": 1.4608, "step": 10495 }, { "epoch": 0.54, "grad_norm": 0.482421875, "learning_rate": 0.00010235665355731727, "loss": 1.4125, "step": 10500 }, { "epoch": 0.54, "grad_norm": 0.54296875, "learning_rate": 0.00010226637608843947, "loss": 1.4069, "step": 10505 }, { "epoch": 0.54, "grad_norm": 0.51953125, "learning_rate": 0.00010217609677147287, "loss": 1.4075, "step": 10510 }, { "epoch": 0.54, "grad_norm": 0.53125, "learning_rate": 0.00010208581568003459, "loss": 1.3924, "step": 10515 }, { "epoch": 0.54, "grad_norm": 0.5625, "learning_rate": 0.00010199553288774333, "loss": 1.4404, "step": 10520 }, { "epoch": 0.54, "grad_norm": 0.51171875, "learning_rate": 0.00010190524846821903, "loss": 1.4055, "step": 10525 }, { "epoch": 0.54, "grad_norm": 0.5703125, "learning_rate": 0.00010181496249508305, "loss": 1.4068, "step": 10530 }, { "epoch": 0.55, "grad_norm": 0.578125, "learning_rate": 0.00010172467504195798, "loss": 1.4726, "step": 10535 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 0.00010163438618246763, "loss": 1.4363, "step": 10540 }, { "epoch": 0.55, "grad_norm": 0.5546875, "learning_rate": 0.00010154409599023693, "loss": 1.4466, "step": 10545 }, { "epoch": 0.55, "grad_norm": 0.5625, "learning_rate": 0.00010145380453889195, "loss": 1.4278, "step": 10550 }, { "epoch": 0.55, "grad_norm": 0.51953125, "learning_rate": 0.00010136351190205975, "loss": 1.4285, "step": 10555 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 0.00010127321815336837, "loss": 1.4437, "step": 10560 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 0.00010118292336644668, "loss": 1.3674, "step": 10565 }, { "epoch": 0.55, "grad_norm": 0.51953125, "learning_rate": 0.00010109262761492458, "loss": 1.3475, "step": 10570 }, { "epoch": 0.55, "grad_norm": 0.5390625, "learning_rate": 0.00010100233097243255, "loss": 1.4186, "step": 10575 }, { "epoch": 0.55, "grad_norm": 0.50390625, "learning_rate": 0.00010091203351260194, "loss": 1.3973, "step": 10580 }, { "epoch": 0.55, "grad_norm": 0.498046875, "learning_rate": 0.00010082173530906467, "loss": 1.4284, "step": 10585 }, { "epoch": 0.55, "grad_norm": 0.52734375, "learning_rate": 0.00010073143643545339, "loss": 1.4689, "step": 10590 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 0.00010064113696540111, "loss": 1.3903, "step": 10595 }, { "epoch": 0.55, "grad_norm": 0.52734375, "learning_rate": 0.00010055083697254156, "loss": 1.4076, "step": 10600 }, { "epoch": 0.55, "grad_norm": 0.48828125, "learning_rate": 0.00010046053653050862, "loss": 1.4381, "step": 10605 }, { "epoch": 0.55, "grad_norm": 0.51171875, "learning_rate": 0.00010037023571293682, "loss": 1.4145, "step": 10610 }, { "epoch": 0.55, "grad_norm": 0.55859375, "learning_rate": 0.00010027993459346079, "loss": 1.4353, "step": 10615 }, { "epoch": 0.55, "grad_norm": 0.5234375, "learning_rate": 0.00010018963324571551, "loss": 1.4071, "step": 10620 }, { "epoch": 0.55, "grad_norm": 0.5234375, "learning_rate": 0.00010009933174333608, "loss": 1.3875, "step": 10625 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 0.00010000903015995783, "loss": 1.4194, "step": 10630 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.991872856921601e-05, "loss": 1.4326, "step": 10635 }, { "epoch": 0.55, "grad_norm": 0.5234375, "learning_rate": 9.9828427044746e-05, "loss": 1.4118, "step": 10640 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 9.973812566018309e-05, "loss": 1.4773, "step": 10645 }, { "epoch": 0.55, "grad_norm": 0.53515625, "learning_rate": 9.96478244891624e-05, "loss": 1.4413, "step": 10650 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.955752360531896e-05, "loss": 1.3984, "step": 10655 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.94672230822875e-05, "loss": 1.4695, "step": 10660 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.937692299370251e-05, "loss": 1.404, "step": 10665 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 9.928662341319808e-05, "loss": 1.4331, "step": 10670 }, { "epoch": 0.55, "grad_norm": 0.53515625, "learning_rate": 9.919632441440791e-05, "loss": 1.441, "step": 10675 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.910602607096522e-05, "loss": 1.4424, "step": 10680 }, { "epoch": 0.55, "grad_norm": 0.5390625, "learning_rate": 9.90157284565027e-05, "loss": 1.4336, "step": 10685 }, { "epoch": 0.55, "grad_norm": 0.53125, "learning_rate": 9.892543164465243e-05, "loss": 1.4476, "step": 10690 }, { "epoch": 0.55, "grad_norm": 0.54296875, "learning_rate": 9.883513570904587e-05, "loss": 1.4135, "step": 10695 }, { "epoch": 0.55, "grad_norm": 0.5, "learning_rate": 9.874484072331371e-05, "loss": 1.3951, "step": 10700 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.865454676108592e-05, "loss": 1.4369, "step": 10705 }, { "epoch": 0.55, "grad_norm": 0.546875, "learning_rate": 9.856425389599159e-05, "loss": 1.3727, "step": 10710 }, { "epoch": 0.55, "grad_norm": 0.515625, "learning_rate": 9.847396220165898e-05, "loss": 1.4404, "step": 10715 }, { "epoch": 0.55, "grad_norm": 0.494140625, "learning_rate": 9.838367175171531e-05, "loss": 1.4034, "step": 10720 }, { "epoch": 0.55, "grad_norm": 0.51171875, "learning_rate": 9.829338261978686e-05, "loss": 1.4248, "step": 10725 }, { "epoch": 0.56, "grad_norm": 0.5625, "learning_rate": 9.82030948794988e-05, "loss": 1.4239, "step": 10730 }, { "epoch": 0.56, "grad_norm": 0.515625, "learning_rate": 9.811280860447515e-05, "loss": 1.3853, "step": 10735 }, { "epoch": 0.56, "grad_norm": 0.55859375, "learning_rate": 9.802252386833875e-05, "loss": 1.394, "step": 10740 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.793224074471125e-05, "loss": 1.4399, "step": 10745 }, { "epoch": 0.56, "grad_norm": 0.546875, "learning_rate": 9.784195930721284e-05, "loss": 1.3797, "step": 10750 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.775167962946248e-05, "loss": 1.3967, "step": 10755 }, { "epoch": 0.56, "grad_norm": 0.515625, "learning_rate": 9.76614017850776e-05, "loss": 1.4251, "step": 10760 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.757112584767422e-05, "loss": 1.3978, "step": 10765 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.748085189086668e-05, "loss": 1.4219, "step": 10770 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.739057998826786e-05, "loss": 1.41, "step": 10775 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.730031021348881e-05, "loss": 1.4403, "step": 10780 }, { "epoch": 0.56, "grad_norm": 0.515625, "learning_rate": 9.721004264013899e-05, "loss": 1.3703, "step": 10785 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.711977734182593e-05, "loss": 1.4361, "step": 10790 }, { "epoch": 0.56, "grad_norm": 0.5859375, "learning_rate": 9.702951439215543e-05, "loss": 1.4309, "step": 10795 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.693925386473127e-05, "loss": 1.3903, "step": 10800 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.684899583315531e-05, "loss": 1.4016, "step": 10805 }, { "epoch": 0.56, "grad_norm": 0.546875, "learning_rate": 9.67587403710274e-05, "loss": 1.4281, "step": 10810 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.666848755194519e-05, "loss": 1.4201, "step": 10815 }, { "epoch": 0.56, "grad_norm": 0.53515625, "learning_rate": 9.65782374495043e-05, "loss": 1.4347, "step": 10820 }, { "epoch": 0.56, "grad_norm": 0.53125, "learning_rate": 9.648799013729802e-05, "loss": 1.4321, "step": 10825 }, { "epoch": 0.56, "grad_norm": 0.53125, "learning_rate": 9.63977456889175e-05, "loss": 1.4376, "step": 10830 }, { "epoch": 0.56, "grad_norm": 0.5234375, "learning_rate": 9.630750417795141e-05, "loss": 1.4297, "step": 10835 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.621726567798614e-05, "loss": 1.457, "step": 10840 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.612703026260553e-05, "loss": 1.3704, "step": 10845 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.603679800539102e-05, "loss": 1.3975, "step": 10850 }, { "epoch": 0.56, "grad_norm": 0.55078125, "learning_rate": 9.594656897992133e-05, "loss": 1.4303, "step": 10855 }, { "epoch": 0.56, "grad_norm": 0.55859375, "learning_rate": 9.585634325977268e-05, "loss": 1.4059, "step": 10860 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.57661209185185e-05, "loss": 1.4084, "step": 10865 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.567590202972952e-05, "loss": 1.4019, "step": 10870 }, { "epoch": 0.56, "grad_norm": 0.54296875, "learning_rate": 9.558568666697362e-05, "loss": 1.4165, "step": 10875 }, { "epoch": 0.56, "grad_norm": 0.51953125, "learning_rate": 9.549547490381585e-05, "loss": 1.4164, "step": 10880 }, { "epoch": 0.56, "grad_norm": 0.51171875, "learning_rate": 9.540526681381824e-05, "loss": 1.3971, "step": 10885 }, { "epoch": 0.56, "grad_norm": 0.5546875, "learning_rate": 9.531506247053995e-05, "loss": 1.4246, "step": 10890 }, { "epoch": 0.56, "grad_norm": 0.5703125, "learning_rate": 9.522486194753695e-05, "loss": 1.4365, "step": 10895 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.513466531836221e-05, "loss": 1.3987, "step": 10900 }, { "epoch": 0.56, "grad_norm": 0.52734375, "learning_rate": 9.504447265656544e-05, "loss": 1.4129, "step": 10905 }, { "epoch": 0.56, "grad_norm": 0.5078125, "learning_rate": 9.495428403569317e-05, "loss": 1.4259, "step": 10910 }, { "epoch": 0.56, "grad_norm": 0.53515625, "learning_rate": 9.486409952928858e-05, "loss": 1.429, "step": 10915 }, { "epoch": 0.56, "grad_norm": 0.5390625, "learning_rate": 9.477391921089158e-05, "loss": 1.4268, "step": 10920 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.468374315403858e-05, "loss": 1.3592, "step": 10925 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.459357143226255e-05, "loss": 1.4077, "step": 10930 }, { "epoch": 0.57, "grad_norm": 0.5625, "learning_rate": 9.450340411909293e-05, "loss": 1.4344, "step": 10935 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.441324128805555e-05, "loss": 1.3846, "step": 10940 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.432308301267261e-05, "loss": 1.3887, "step": 10945 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.423292936646257e-05, "loss": 1.4068, "step": 10950 }, { "epoch": 0.57, "grad_norm": 0.55078125, "learning_rate": 9.414278042294012e-05, "loss": 1.389, "step": 10955 }, { "epoch": 0.57, "grad_norm": 0.5078125, "learning_rate": 9.405263625561613e-05, "loss": 1.408, "step": 10960 }, { "epoch": 0.57, "grad_norm": 0.51171875, "learning_rate": 9.396249693799754e-05, "loss": 1.4058, "step": 10965 }, { "epoch": 0.57, "grad_norm": 0.53125, "learning_rate": 9.387236254358741e-05, "loss": 1.4122, "step": 10970 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.378223314588467e-05, "loss": 1.3976, "step": 10975 }, { "epoch": 0.57, "grad_norm": 0.53515625, "learning_rate": 9.36921088183843e-05, "loss": 1.439, "step": 10980 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.360198963457705e-05, "loss": 1.4165, "step": 10985 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.351187566794953e-05, "loss": 1.4397, "step": 10990 }, { "epoch": 0.57, "grad_norm": 0.55078125, "learning_rate": 9.342176699198406e-05, "loss": 1.4671, "step": 10995 }, { "epoch": 0.57, "grad_norm": 0.53125, "learning_rate": 9.333166368015869e-05, "loss": 1.44, "step": 11000 }, { "epoch": 0.57, "grad_norm": 0.51171875, "learning_rate": 9.324156580594704e-05, "loss": 1.4439, "step": 11005 }, { "epoch": 0.57, "grad_norm": 0.5234375, "learning_rate": 9.315147344281836e-05, "loss": 1.4235, "step": 11010 }, { "epoch": 0.57, "grad_norm": 0.53515625, "learning_rate": 9.306138666423733e-05, "loss": 1.4024, "step": 11015 }, { "epoch": 0.57, "grad_norm": 0.53125, "learning_rate": 9.297130554366413e-05, "loss": 1.3865, "step": 11020 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.288123015455436e-05, "loss": 1.4025, "step": 11025 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.279116057035882e-05, "loss": 1.4177, "step": 11030 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.270109686452375e-05, "loss": 1.4183, "step": 11035 }, { "epoch": 0.57, "grad_norm": 0.5390625, "learning_rate": 9.261103911049041e-05, "loss": 1.4063, "step": 11040 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.252098738169538e-05, "loss": 1.4324, "step": 11045 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.24309417515702e-05, "loss": 1.469, "step": 11050 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.234090229354149e-05, "loss": 1.3761, "step": 11055 }, { "epoch": 0.57, "grad_norm": 0.53515625, "learning_rate": 9.225086908103082e-05, "loss": 1.386, "step": 11060 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.216084218745472e-05, "loss": 1.4339, "step": 11065 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.207082168622448e-05, "loss": 1.3859, "step": 11070 }, { "epoch": 0.57, "grad_norm": 0.578125, "learning_rate": 9.198080765074625e-05, "loss": 1.396, "step": 11075 }, { "epoch": 0.57, "grad_norm": 0.52734375, "learning_rate": 9.189080015442085e-05, "loss": 1.3841, "step": 11080 }, { "epoch": 0.57, "grad_norm": 0.57421875, "learning_rate": 9.180079927064386e-05, "loss": 1.382, "step": 11085 }, { "epoch": 0.57, "grad_norm": 0.56640625, "learning_rate": 9.171080507280532e-05, "loss": 1.4463, "step": 11090 }, { "epoch": 0.57, "grad_norm": 0.54296875, "learning_rate": 9.162081763428999e-05, "loss": 1.4456, "step": 11095 }, { "epoch": 0.57, "grad_norm": 0.55078125, "learning_rate": 9.153083702847695e-05, "loss": 1.4228, "step": 11100 }, { "epoch": 0.57, "grad_norm": 0.515625, "learning_rate": 9.14408633287399e-05, "loss": 1.3921, "step": 11105 }, { "epoch": 0.57, "grad_norm": 0.51953125, "learning_rate": 9.135089660844669e-05, "loss": 1.4088, "step": 11110 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 9.126093694095961e-05, "loss": 1.3919, "step": 11115 }, { "epoch": 0.58, "grad_norm": 0.5078125, "learning_rate": 9.117098439963522e-05, "loss": 1.4446, "step": 11120 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 9.108103905782419e-05, "loss": 1.3774, "step": 11125 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 9.099110098887136e-05, "loss": 1.407, "step": 11130 }, { "epoch": 0.58, "grad_norm": 0.58203125, "learning_rate": 9.090117026611564e-05, "loss": 1.4042, "step": 11135 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 9.081124696288995e-05, "loss": 1.4179, "step": 11140 }, { "epoch": 0.58, "grad_norm": 0.546875, "learning_rate": 9.072133115252112e-05, "loss": 1.4189, "step": 11145 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 9.063142290832997e-05, "loss": 1.3894, "step": 11150 }, { "epoch": 0.58, "grad_norm": 0.5703125, "learning_rate": 9.054152230363102e-05, "loss": 1.4122, "step": 11155 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 9.045162941173266e-05, "loss": 1.406, "step": 11160 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 9.036174430593694e-05, "loss": 1.4228, "step": 11165 }, { "epoch": 0.58, "grad_norm": 0.51171875, "learning_rate": 9.027186705953958e-05, "loss": 1.4127, "step": 11170 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 9.018199774582988e-05, "loss": 1.3923, "step": 11175 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 9.009213643809072e-05, "loss": 1.4126, "step": 11180 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 9.000228320959833e-05, "loss": 1.3914, "step": 11185 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.991243813362252e-05, "loss": 1.4298, "step": 11190 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 8.982260128342628e-05, "loss": 1.42, "step": 11195 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 8.973277273226607e-05, "loss": 1.4267, "step": 11200 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 8.96429525533914e-05, "loss": 1.4044, "step": 11205 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 8.95531408200451e-05, "loss": 1.3861, "step": 11210 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 8.946333760546303e-05, "loss": 1.4079, "step": 11215 }, { "epoch": 0.58, "grad_norm": 0.5234375, "learning_rate": 8.937354298287414e-05, "loss": 1.4124, "step": 11220 }, { "epoch": 0.58, "grad_norm": 0.50390625, "learning_rate": 8.928375702550036e-05, "loss": 1.3697, "step": 11225 }, { "epoch": 0.58, "grad_norm": 0.5703125, "learning_rate": 8.919397980655657e-05, "loss": 1.4066, "step": 11230 }, { "epoch": 0.58, "grad_norm": 0.51171875, "learning_rate": 8.910421139925045e-05, "loss": 1.4038, "step": 11235 }, { "epoch": 0.58, "grad_norm": 0.54296875, "learning_rate": 8.901445187678264e-05, "loss": 1.4115, "step": 11240 }, { "epoch": 0.58, "grad_norm": 0.5546875, "learning_rate": 8.892470131234639e-05, "loss": 1.4057, "step": 11245 }, { "epoch": 0.58, "grad_norm": 0.55859375, "learning_rate": 8.883495977912775e-05, "loss": 1.4335, "step": 11250 }, { "epoch": 0.58, "grad_norm": 0.50390625, "learning_rate": 8.874522735030532e-05, "loss": 1.3702, "step": 11255 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.865550409905037e-05, "loss": 1.3975, "step": 11260 }, { "epoch": 0.58, "grad_norm": 0.50390625, "learning_rate": 8.856579009852657e-05, "loss": 1.4373, "step": 11265 }, { "epoch": 0.58, "grad_norm": 0.51953125, "learning_rate": 8.847608542189017e-05, "loss": 1.418, "step": 11270 }, { "epoch": 0.58, "grad_norm": 0.52734375, "learning_rate": 8.83863901422897e-05, "loss": 1.4033, "step": 11275 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 8.829670433286613e-05, "loss": 1.4357, "step": 11280 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.820702806675263e-05, "loss": 1.4209, "step": 11285 }, { "epoch": 0.58, "grad_norm": 0.53125, "learning_rate": 8.811736141707466e-05, "loss": 1.4346, "step": 11290 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 8.802770445694975e-05, "loss": 1.4062, "step": 11295 }, { "epoch": 0.58, "grad_norm": 0.5390625, "learning_rate": 8.793805725948764e-05, "loss": 1.4005, "step": 11300 }, { "epoch": 0.58, "grad_norm": 0.53515625, "learning_rate": 8.784841989778996e-05, "loss": 1.4151, "step": 11305 }, { "epoch": 0.59, "grad_norm": 0.5546875, "learning_rate": 8.775879244495052e-05, "loss": 1.4449, "step": 11310 }, { "epoch": 0.59, "grad_norm": 0.5546875, "learning_rate": 8.766917497405481e-05, "loss": 1.4397, "step": 11315 }, { "epoch": 0.59, "grad_norm": 0.5625, "learning_rate": 8.757956755818041e-05, "loss": 1.424, "step": 11320 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.748997027039653e-05, "loss": 1.4169, "step": 11325 }, { "epoch": 0.59, "grad_norm": 0.51953125, "learning_rate": 8.740038318376423e-05, "loss": 1.4071, "step": 11330 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.731080637133618e-05, "loss": 1.4427, "step": 11335 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.722123990615673e-05, "loss": 1.4193, "step": 11340 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.713168386126173e-05, "loss": 1.3958, "step": 11345 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.704213830967861e-05, "loss": 1.457, "step": 11350 }, { "epoch": 0.59, "grad_norm": 0.5390625, "learning_rate": 8.695260332442616e-05, "loss": 1.4133, "step": 11355 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.686307897851463e-05, "loss": 1.4035, "step": 11360 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.677356534494553e-05, "loss": 1.3896, "step": 11365 }, { "epoch": 0.59, "grad_norm": 0.51171875, "learning_rate": 8.668406249671169e-05, "loss": 1.4115, "step": 11370 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.65945705067971e-05, "loss": 1.4179, "step": 11375 }, { "epoch": 0.59, "grad_norm": 0.5546875, "learning_rate": 8.650508944817692e-05, "loss": 1.4196, "step": 11380 }, { "epoch": 0.59, "grad_norm": 0.56640625, "learning_rate": 8.641561939381737e-05, "loss": 1.4511, "step": 11385 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.632616041667577e-05, "loss": 1.4401, "step": 11390 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.623671258970028e-05, "loss": 1.4221, "step": 11395 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.614727598583015e-05, "loss": 1.4375, "step": 11400 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.605785067799527e-05, "loss": 1.3985, "step": 11405 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.596843673911643e-05, "loss": 1.4173, "step": 11410 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.58790342421052e-05, "loss": 1.443, "step": 11415 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.578964325986368e-05, "loss": 1.3976, "step": 11420 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.570026386528475e-05, "loss": 1.3657, "step": 11425 }, { "epoch": 0.59, "grad_norm": 0.53125, "learning_rate": 8.561089613125166e-05, "loss": 1.4348, "step": 11430 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.55215401306383e-05, "loss": 1.4046, "step": 11435 }, { "epoch": 0.59, "grad_norm": 0.52734375, "learning_rate": 8.543219593630892e-05, "loss": 1.4324, "step": 11440 }, { "epoch": 0.59, "grad_norm": 0.54296875, "learning_rate": 8.534286362111812e-05, "loss": 1.42, "step": 11445 }, { "epoch": 0.59, "grad_norm": 0.546875, "learning_rate": 8.525354325791092e-05, "loss": 1.4366, "step": 11450 }, { "epoch": 0.59, "grad_norm": 0.5703125, "learning_rate": 8.516423491952247e-05, "loss": 1.4362, "step": 11455 }, { "epoch": 0.59, "grad_norm": 0.5390625, "learning_rate": 8.50749386787782e-05, "loss": 1.384, "step": 11460 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.498565460849362e-05, "loss": 1.3842, "step": 11465 }, { "epoch": 0.59, "grad_norm": 0.51953125, "learning_rate": 8.489638278147433e-05, "loss": 1.4209, "step": 11470 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.480712327051599e-05, "loss": 1.3873, "step": 11475 }, { "epoch": 0.59, "grad_norm": 0.5546875, "learning_rate": 8.471787614840416e-05, "loss": 1.4494, "step": 11480 }, { "epoch": 0.59, "grad_norm": 0.50390625, "learning_rate": 8.462864148791432e-05, "loss": 1.422, "step": 11485 }, { "epoch": 0.59, "grad_norm": 0.5234375, "learning_rate": 8.453941936181181e-05, "loss": 1.434, "step": 11490 }, { "epoch": 0.59, "grad_norm": 0.53515625, "learning_rate": 8.445020984285169e-05, "loss": 1.4391, "step": 11495 }, { "epoch": 0.59, "grad_norm": 0.55078125, "learning_rate": 8.436101300377881e-05, "loss": 1.3965, "step": 11500 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.427182891732762e-05, "loss": 1.4083, "step": 11505 }, { "epoch": 0.6, "grad_norm": 0.5234375, "learning_rate": 8.418265765622225e-05, "loss": 1.3746, "step": 11510 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.409349929317623e-05, "loss": 1.3924, "step": 11515 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.400435390089277e-05, "loss": 1.3971, "step": 11520 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.391522155206429e-05, "loss": 1.395, "step": 11525 }, { "epoch": 0.6, "grad_norm": 0.5234375, "learning_rate": 8.382610231937276e-05, "loss": 1.439, "step": 11530 }, { "epoch": 0.6, "grad_norm": 0.578125, "learning_rate": 8.373699627548934e-05, "loss": 1.425, "step": 11535 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.364790349307448e-05, "loss": 1.389, "step": 11540 }, { "epoch": 0.6, "grad_norm": 0.5234375, "learning_rate": 8.355882404477778e-05, "loss": 1.4014, "step": 11545 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.346975800323804e-05, "loss": 1.4194, "step": 11550 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.338070544108304e-05, "loss": 1.4122, "step": 11555 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.329166643092963e-05, "loss": 1.4194, "step": 11560 }, { "epoch": 0.6, "grad_norm": 0.53515625, "learning_rate": 8.320264104538357e-05, "loss": 1.3873, "step": 11565 }, { "epoch": 0.6, "grad_norm": 0.55859375, "learning_rate": 8.311362935703955e-05, "loss": 1.4282, "step": 11570 }, { "epoch": 0.6, "grad_norm": 0.5625, "learning_rate": 8.302463143848102e-05, "loss": 1.4116, "step": 11575 }, { "epoch": 0.6, "grad_norm": 0.53515625, "learning_rate": 8.293564736228034e-05, "loss": 1.4194, "step": 11580 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.284667720099839e-05, "loss": 1.4211, "step": 11585 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.275772102718489e-05, "loss": 1.4032, "step": 11590 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.2668778913378e-05, "loss": 1.3712, "step": 11595 }, { "epoch": 0.6, "grad_norm": 0.54296875, "learning_rate": 8.257985093210455e-05, "loss": 1.4273, "step": 11600 }, { "epoch": 0.6, "grad_norm": 0.53515625, "learning_rate": 8.249093715587972e-05, "loss": 1.4499, "step": 11605 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.240203765720722e-05, "loss": 1.3837, "step": 11610 }, { "epoch": 0.6, "grad_norm": 0.515625, "learning_rate": 8.231315250857902e-05, "loss": 1.4142, "step": 11615 }, { "epoch": 0.6, "grad_norm": 0.56640625, "learning_rate": 8.222428178247548e-05, "loss": 1.3888, "step": 11620 }, { "epoch": 0.6, "grad_norm": 0.51953125, "learning_rate": 8.21354255513651e-05, "loss": 1.384, "step": 11625 }, { "epoch": 0.6, "grad_norm": 0.55078125, "learning_rate": 8.204658388770466e-05, "loss": 1.3775, "step": 11630 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.195775686393897e-05, "loss": 1.4199, "step": 11635 }, { "epoch": 0.6, "grad_norm": 0.5234375, "learning_rate": 8.1868944552501e-05, "loss": 1.3916, "step": 11640 }, { "epoch": 0.6, "grad_norm": 0.5078125, "learning_rate": 8.178014702581162e-05, "loss": 1.3924, "step": 11645 }, { "epoch": 0.6, "grad_norm": 0.546875, "learning_rate": 8.169136435627971e-05, "loss": 1.4131, "step": 11650 }, { "epoch": 0.6, "grad_norm": 0.54296875, "learning_rate": 8.160259661630201e-05, "loss": 1.3814, "step": 11655 }, { "epoch": 0.6, "grad_norm": 0.5390625, "learning_rate": 8.151384387826313e-05, "loss": 1.3941, "step": 11660 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.142510621453536e-05, "loss": 1.395, "step": 11665 }, { "epoch": 0.6, "grad_norm": 0.53515625, "learning_rate": 8.13363836974788e-05, "loss": 1.3861, "step": 11670 }, { "epoch": 0.6, "grad_norm": 0.53125, "learning_rate": 8.124767639944109e-05, "loss": 1.4077, "step": 11675 }, { "epoch": 0.6, "grad_norm": 0.51171875, "learning_rate": 8.115898439275756e-05, "loss": 1.4453, "step": 11680 }, { "epoch": 0.6, "grad_norm": 0.5, "learning_rate": 8.107030774975101e-05, "loss": 1.4159, "step": 11685 }, { "epoch": 0.6, "grad_norm": 0.52734375, "learning_rate": 8.098164654273174e-05, "loss": 1.4389, "step": 11690 }, { "epoch": 0.61, "grad_norm": 0.54296875, "learning_rate": 8.089300084399747e-05, "loss": 1.3747, "step": 11695 }, { "epoch": 0.61, "grad_norm": 0.54296875, "learning_rate": 8.08043707258332e-05, "loss": 1.4058, "step": 11700 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.071575626051133e-05, "loss": 1.4204, "step": 11705 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.062715752029142e-05, "loss": 1.4552, "step": 11710 }, { "epoch": 0.61, "grad_norm": 0.50390625, "learning_rate": 8.053857457742025e-05, "loss": 1.4289, "step": 11715 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.045000750413169e-05, "loss": 1.4027, "step": 11720 }, { "epoch": 0.61, "grad_norm": 0.53515625, "learning_rate": 8.036145637264673e-05, "loss": 1.4515, "step": 11725 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 8.027292125517324e-05, "loss": 1.4288, "step": 11730 }, { "epoch": 0.61, "grad_norm": 0.50390625, "learning_rate": 8.018440222390616e-05, "loss": 1.4238, "step": 11735 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 8.009589935102723e-05, "loss": 1.4264, "step": 11740 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 8.000741270870507e-05, "loss": 1.4174, "step": 11745 }, { "epoch": 0.61, "grad_norm": 0.51171875, "learning_rate": 7.991894236909498e-05, "loss": 1.4294, "step": 11750 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.98304884043391e-05, "loss": 1.4169, "step": 11755 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 7.974205088656606e-05, "loss": 1.4359, "step": 11760 }, { "epoch": 0.61, "grad_norm": 0.5234375, "learning_rate": 7.965362988789121e-05, "loss": 1.4157, "step": 11765 }, { "epoch": 0.61, "grad_norm": 0.5078125, "learning_rate": 7.956522548041635e-05, "loss": 1.4213, "step": 11770 }, { "epoch": 0.61, "grad_norm": 0.5546875, "learning_rate": 7.947683773622982e-05, "loss": 1.4131, "step": 11775 }, { "epoch": 0.61, "grad_norm": 0.515625, "learning_rate": 7.938846672740627e-05, "loss": 1.4234, "step": 11780 }, { "epoch": 0.61, "grad_norm": 0.51953125, "learning_rate": 7.930011252600683e-05, "loss": 1.4135, "step": 11785 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.92117752040788e-05, "loss": 1.4611, "step": 11790 }, { "epoch": 0.61, "grad_norm": 0.50390625, "learning_rate": 7.912345483365581e-05, "loss": 1.4269, "step": 11795 }, { "epoch": 0.61, "grad_norm": 0.51171875, "learning_rate": 7.903515148675762e-05, "loss": 1.4278, "step": 11800 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.894686523539013e-05, "loss": 1.3783, "step": 11805 }, { "epoch": 0.61, "grad_norm": 0.5234375, "learning_rate": 7.885859615154527e-05, "loss": 1.4143, "step": 11810 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 7.877034430720102e-05, "loss": 1.4065, "step": 11815 }, { "epoch": 0.61, "grad_norm": 0.5, "learning_rate": 7.868210977432123e-05, "loss": 1.389, "step": 11820 }, { "epoch": 0.61, "grad_norm": 0.56640625, "learning_rate": 7.85938926248557e-05, "loss": 1.4302, "step": 11825 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.850569293074006e-05, "loss": 1.416, "step": 11830 }, { "epoch": 0.61, "grad_norm": 0.5546875, "learning_rate": 7.841751076389563e-05, "loss": 1.3923, "step": 11835 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 7.832934619622954e-05, "loss": 1.4171, "step": 11840 }, { "epoch": 0.61, "grad_norm": 0.53125, "learning_rate": 7.824119929963444e-05, "loss": 1.4211, "step": 11845 }, { "epoch": 0.61, "grad_norm": 0.5390625, "learning_rate": 7.81530701459887e-05, "loss": 1.4153, "step": 11850 }, { "epoch": 0.61, "grad_norm": 0.49609375, "learning_rate": 7.806495880715614e-05, "loss": 1.4169, "step": 11855 }, { "epoch": 0.61, "grad_norm": 0.5625, "learning_rate": 7.797686535498611e-05, "loss": 1.4031, "step": 11860 }, { "epoch": 0.61, "grad_norm": 0.52734375, "learning_rate": 7.788878986131331e-05, "loss": 1.4202, "step": 11865 }, { "epoch": 0.61, "grad_norm": 0.55078125, "learning_rate": 7.780073239795787e-05, "loss": 1.3808, "step": 11870 }, { "epoch": 0.61, "grad_norm": 0.51953125, "learning_rate": 7.771269303672513e-05, "loss": 1.4002, "step": 11875 }, { "epoch": 0.61, "grad_norm": 0.51953125, "learning_rate": 7.762467184940574e-05, "loss": 1.3846, "step": 11880 }, { "epoch": 0.61, "grad_norm": 0.55859375, "learning_rate": 7.75366689077755e-05, "loss": 1.4054, "step": 11885 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.744868428359536e-05, "loss": 1.3909, "step": 11890 }, { "epoch": 0.62, "grad_norm": 0.5234375, "learning_rate": 7.736071804861127e-05, "loss": 1.4172, "step": 11895 }, { "epoch": 0.62, "grad_norm": 0.53125, "learning_rate": 7.727277027455428e-05, "loss": 1.3996, "step": 11900 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.718484103314026e-05, "loss": 1.4351, "step": 11905 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.709693039607012e-05, "loss": 1.3794, "step": 11910 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.700903843502947e-05, "loss": 1.3866, "step": 11915 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.692116522168877e-05, "loss": 1.428, "step": 11920 }, { "epoch": 0.62, "grad_norm": 0.5703125, "learning_rate": 7.683331082770311e-05, "loss": 1.4656, "step": 11925 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.674547532471235e-05, "loss": 1.4387, "step": 11930 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.665765878434084e-05, "loss": 1.4312, "step": 11935 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 7.656986127819754e-05, "loss": 1.4365, "step": 11940 }, { "epoch": 0.62, "grad_norm": 0.62890625, "learning_rate": 7.648208287787584e-05, "loss": 1.3558, "step": 11945 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.639432365495357e-05, "loss": 1.3889, "step": 11950 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.630658368099291e-05, "loss": 1.422, "step": 11955 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.62188630275404e-05, "loss": 1.3943, "step": 11960 }, { "epoch": 0.62, "grad_norm": 0.54296875, "learning_rate": 7.613116176612672e-05, "loss": 1.3815, "step": 11965 }, { "epoch": 0.62, "grad_norm": 0.52734375, "learning_rate": 7.604347996826682e-05, "loss": 1.4084, "step": 11970 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.595581770545978e-05, "loss": 1.4034, "step": 11975 }, { "epoch": 0.62, "grad_norm": 0.54296875, "learning_rate": 7.58681750491887e-05, "loss": 1.412, "step": 11980 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.578055207092071e-05, "loss": 1.3972, "step": 11985 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.569294884210694e-05, "loss": 1.3899, "step": 11990 }, { "epoch": 0.62, "grad_norm": 0.55859375, "learning_rate": 7.560536543418235e-05, "loss": 1.4266, "step": 11995 }, { "epoch": 0.62, "grad_norm": 0.54296875, "learning_rate": 7.551780191856575e-05, "loss": 1.4088, "step": 12000 }, { "epoch": 0.62, "grad_norm": 0.55078125, "learning_rate": 7.543025836665977e-05, "loss": 1.4214, "step": 12005 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.53427348498507e-05, "loss": 1.4133, "step": 12010 }, { "epoch": 0.62, "grad_norm": 0.51171875, "learning_rate": 7.525523143950859e-05, "loss": 1.3915, "step": 12015 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.516774820698695e-05, "loss": 1.4307, "step": 12020 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.5080285223623e-05, "loss": 1.4384, "step": 12025 }, { "epoch": 0.62, "grad_norm": 0.5234375, "learning_rate": 7.499284256073731e-05, "loss": 1.3982, "step": 12030 }, { "epoch": 0.62, "grad_norm": 0.53515625, "learning_rate": 7.490542028963396e-05, "loss": 1.4055, "step": 12035 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 7.481801848160035e-05, "loss": 1.4069, "step": 12040 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.473063720790727e-05, "loss": 1.3999, "step": 12045 }, { "epoch": 0.62, "grad_norm": 0.5234375, "learning_rate": 7.464327653980865e-05, "loss": 1.4145, "step": 12050 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.455593654854176e-05, "loss": 1.4143, "step": 12055 }, { "epoch": 0.62, "grad_norm": 0.5234375, "learning_rate": 7.446861730532688e-05, "loss": 1.435, "step": 12060 }, { "epoch": 0.62, "grad_norm": 0.546875, "learning_rate": 7.438131888136746e-05, "loss": 1.4205, "step": 12065 }, { "epoch": 0.62, "grad_norm": 0.515625, "learning_rate": 7.429404134784987e-05, "loss": 1.389, "step": 12070 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 7.420678477594361e-05, "loss": 1.4084, "step": 12075 }, { "epoch": 0.62, "grad_norm": 0.578125, "learning_rate": 7.411954923680091e-05, "loss": 1.4572, "step": 12080 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.403233480155697e-05, "loss": 1.3838, "step": 12085 }, { "epoch": 0.63, "grad_norm": 0.5, "learning_rate": 7.394514154132975e-05, "loss": 1.3921, "step": 12090 }, { "epoch": 0.63, "grad_norm": 0.515625, "learning_rate": 7.385796952721991e-05, "loss": 1.4163, "step": 12095 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.377081883031079e-05, "loss": 1.4066, "step": 12100 }, { "epoch": 0.63, "grad_norm": 0.546875, "learning_rate": 7.368368952166839e-05, "loss": 1.43, "step": 12105 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.359658167234125e-05, "loss": 1.4091, "step": 12110 }, { "epoch": 0.63, "grad_norm": 0.515625, "learning_rate": 7.350949535336041e-05, "loss": 1.3923, "step": 12115 }, { "epoch": 0.63, "grad_norm": 0.53125, "learning_rate": 7.342243063573932e-05, "loss": 1.4047, "step": 12120 }, { "epoch": 0.63, "grad_norm": 0.515625, "learning_rate": 7.333538759047389e-05, "loss": 1.3858, "step": 12125 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.324836628854226e-05, "loss": 1.407, "step": 12130 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.316136680090494e-05, "loss": 1.4015, "step": 12135 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.307438919850456e-05, "loss": 1.3805, "step": 12140 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.298743355226599e-05, "loss": 1.4403, "step": 12145 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.290049993309611e-05, "loss": 1.4015, "step": 12150 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.281358841188392e-05, "loss": 1.4074, "step": 12155 }, { "epoch": 0.63, "grad_norm": 0.5234375, "learning_rate": 7.272669905950036e-05, "loss": 1.409, "step": 12160 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.263983194679827e-05, "loss": 1.4018, "step": 12165 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.25529871446124e-05, "loss": 1.4238, "step": 12170 }, { "epoch": 0.63, "grad_norm": 0.53125, "learning_rate": 7.246616472375928e-05, "loss": 1.4122, "step": 12175 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.237936475503719e-05, "loss": 1.4233, "step": 12180 }, { "epoch": 0.63, "grad_norm": 0.55859375, "learning_rate": 7.229258730922615e-05, "loss": 1.4236, "step": 12185 }, { "epoch": 0.63, "grad_norm": 0.53515625, "learning_rate": 7.22058324570877e-05, "loss": 1.3993, "step": 12190 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.21191002693651e-05, "loss": 1.4025, "step": 12195 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.203239081678299e-05, "loss": 1.4303, "step": 12200 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.194570417004759e-05, "loss": 1.3899, "step": 12205 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.185904039984648e-05, "loss": 1.4111, "step": 12210 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.177239957684851e-05, "loss": 1.3872, "step": 12215 }, { "epoch": 0.63, "grad_norm": 0.54296875, "learning_rate": 7.168578177170397e-05, "loss": 1.3984, "step": 12220 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.159918705504424e-05, "loss": 1.4333, "step": 12225 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.151261549748195e-05, "loss": 1.4313, "step": 12230 }, { "epoch": 0.63, "grad_norm": 0.5625, "learning_rate": 7.14260671696108e-05, "loss": 1.4067, "step": 12235 }, { "epoch": 0.63, "grad_norm": 0.5078125, "learning_rate": 7.13395421420056e-05, "loss": 1.4108, "step": 12240 }, { "epoch": 0.63, "grad_norm": 0.52734375, "learning_rate": 7.125304048522211e-05, "loss": 1.4413, "step": 12245 }, { "epoch": 0.63, "grad_norm": 0.5078125, "learning_rate": 7.116656226979708e-05, "loss": 1.3683, "step": 12250 }, { "epoch": 0.63, "grad_norm": 0.51171875, "learning_rate": 7.108010756624808e-05, "loss": 1.3989, "step": 12255 }, { "epoch": 0.63, "grad_norm": 0.55078125, "learning_rate": 7.099367644507357e-05, "loss": 1.4072, "step": 12260 }, { "epoch": 0.63, "grad_norm": 0.49609375, "learning_rate": 7.090726897675277e-05, "loss": 1.3872, "step": 12265 }, { "epoch": 0.63, "grad_norm": 0.515625, "learning_rate": 7.082088523174558e-05, "loss": 1.4026, "step": 12270 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 7.073452528049254e-05, "loss": 1.3791, "step": 12275 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 7.06481891934149e-05, "loss": 1.3941, "step": 12280 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 7.056187704091434e-05, "loss": 1.3936, "step": 12285 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 7.047558889337302e-05, "loss": 1.4152, "step": 12290 }, { "epoch": 0.64, "grad_norm": 0.5625, "learning_rate": 7.03893248211536e-05, "loss": 1.4064, "step": 12295 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 7.030308489459904e-05, "loss": 1.394, "step": 12300 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 7.021686918403266e-05, "loss": 1.4682, "step": 12305 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 7.013067775975799e-05, "loss": 1.415, "step": 12310 }, { "epoch": 0.64, "grad_norm": 0.6484375, "learning_rate": 7.004451069205881e-05, "loss": 1.3852, "step": 12315 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.995836805119897e-05, "loss": 1.4032, "step": 12320 }, { "epoch": 0.64, "grad_norm": 0.55859375, "learning_rate": 6.98722499074225e-05, "loss": 1.4187, "step": 12325 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.978615633095331e-05, "loss": 1.4065, "step": 12330 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 6.970008739199543e-05, "loss": 1.418, "step": 12335 }, { "epoch": 0.64, "grad_norm": 0.5859375, "learning_rate": 6.961404316073267e-05, "loss": 1.4124, "step": 12340 }, { "epoch": 0.64, "grad_norm": 0.5234375, "learning_rate": 6.95280237073288e-05, "loss": 1.4179, "step": 12345 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.944202910192732e-05, "loss": 1.4075, "step": 12350 }, { "epoch": 0.64, "grad_norm": 0.5703125, "learning_rate": 6.93560594146515e-05, "loss": 1.367, "step": 12355 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 6.927011471560422e-05, "loss": 1.4334, "step": 12360 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 6.918419507486813e-05, "loss": 1.4224, "step": 12365 }, { "epoch": 0.64, "grad_norm": 0.498046875, "learning_rate": 6.909830056250527e-05, "loss": 1.3795, "step": 12370 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.901243124855733e-05, "loss": 1.3588, "step": 12375 }, { "epoch": 0.64, "grad_norm": 0.51953125, "learning_rate": 6.892658720304535e-05, "loss": 1.4085, "step": 12380 }, { "epoch": 0.64, "grad_norm": 0.52734375, "learning_rate": 6.884076849596988e-05, "loss": 1.3963, "step": 12385 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.875497519731067e-05, "loss": 1.3857, "step": 12390 }, { "epoch": 0.64, "grad_norm": 0.53125, "learning_rate": 6.866920737702688e-05, "loss": 1.4228, "step": 12395 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.858346510505678e-05, "loss": 1.3961, "step": 12400 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.849774845131791e-05, "loss": 1.425, "step": 12405 }, { "epoch": 0.64, "grad_norm": 0.546875, "learning_rate": 6.841205748570685e-05, "loss": 1.3729, "step": 12410 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.832639227809927e-05, "loss": 1.4257, "step": 12415 }, { "epoch": 0.64, "grad_norm": 0.51953125, "learning_rate": 6.82407528983498e-05, "loss": 1.3877, "step": 12420 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.815513941629204e-05, "loss": 1.4204, "step": 12425 }, { "epoch": 0.64, "grad_norm": 0.5078125, "learning_rate": 6.806955190173848e-05, "loss": 1.431, "step": 12430 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.798399042448039e-05, "loss": 1.3797, "step": 12435 }, { "epoch": 0.64, "grad_norm": 0.53515625, "learning_rate": 6.789845505428782e-05, "loss": 1.4246, "step": 12440 }, { "epoch": 0.64, "grad_norm": 0.515625, "learning_rate": 6.781294586090962e-05, "loss": 1.3974, "step": 12445 }, { "epoch": 0.64, "grad_norm": 0.54296875, "learning_rate": 6.772746291407315e-05, "loss": 1.4657, "step": 12450 }, { "epoch": 0.64, "grad_norm": 0.51171875, "learning_rate": 6.764200628348449e-05, "loss": 1.3938, "step": 12455 }, { "epoch": 0.64, "grad_norm": 0.5390625, "learning_rate": 6.755657603882816e-05, "loss": 1.3811, "step": 12460 }, { "epoch": 0.64, "grad_norm": 0.56640625, "learning_rate": 6.747117224976726e-05, "loss": 1.3983, "step": 12465 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.738579498594322e-05, "loss": 1.4247, "step": 12470 }, { "epoch": 0.65, "grad_norm": 0.50390625, "learning_rate": 6.730044431697595e-05, "loss": 1.3805, "step": 12475 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.721512031246358e-05, "loss": 1.395, "step": 12480 }, { "epoch": 0.65, "grad_norm": 0.5078125, "learning_rate": 6.712982304198254e-05, "loss": 1.417, "step": 12485 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.704455257508743e-05, "loss": 1.4102, "step": 12490 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.695930898131107e-05, "loss": 1.4234, "step": 12495 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.687409233016422e-05, "loss": 1.4232, "step": 12500 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.678890269113587e-05, "loss": 1.396, "step": 12505 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.670374013369279e-05, "loss": 1.4411, "step": 12510 }, { "epoch": 0.65, "grad_norm": 0.515625, "learning_rate": 6.661860472727981e-05, "loss": 1.4217, "step": 12515 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.65334965413195e-05, "loss": 1.4315, "step": 12520 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.644841564521237e-05, "loss": 1.4366, "step": 12525 }, { "epoch": 0.65, "grad_norm": 0.5625, "learning_rate": 6.636336210833654e-05, "loss": 1.4163, "step": 12530 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.627833600004791e-05, "loss": 1.4071, "step": 12535 }, { "epoch": 0.65, "grad_norm": 0.515625, "learning_rate": 6.619333738967996e-05, "loss": 1.3856, "step": 12540 }, { "epoch": 0.65, "grad_norm": 0.58984375, "learning_rate": 6.610836634654382e-05, "loss": 1.441, "step": 12545 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.602342293992805e-05, "loss": 1.409, "step": 12550 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.593850723909875e-05, "loss": 1.3984, "step": 12555 }, { "epoch": 0.65, "grad_norm": 0.486328125, "learning_rate": 6.585361931329937e-05, "loss": 1.3951, "step": 12560 }, { "epoch": 0.65, "grad_norm": 0.51953125, "learning_rate": 6.576875923175075e-05, "loss": 1.4047, "step": 12565 }, { "epoch": 0.65, "grad_norm": 0.515625, "learning_rate": 6.568392706365099e-05, "loss": 1.3699, "step": 12570 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.559912287817547e-05, "loss": 1.3845, "step": 12575 }, { "epoch": 0.65, "grad_norm": 0.5703125, "learning_rate": 6.551434674447676e-05, "loss": 1.3977, "step": 12580 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.542959873168446e-05, "loss": 1.4172, "step": 12585 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 6.534487890890536e-05, "loss": 1.4026, "step": 12590 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.526018734522317e-05, "loss": 1.4244, "step": 12595 }, { "epoch": 0.65, "grad_norm": 0.57421875, "learning_rate": 6.517552410969863e-05, "loss": 1.4092, "step": 12600 }, { "epoch": 0.65, "grad_norm": 0.5625, "learning_rate": 6.50908892713693e-05, "loss": 1.4277, "step": 12605 }, { "epoch": 0.65, "grad_norm": 0.51953125, "learning_rate": 6.50062828992497e-05, "loss": 1.4094, "step": 12610 }, { "epoch": 0.65, "grad_norm": 0.546875, "learning_rate": 6.4921705062331e-05, "loss": 1.4182, "step": 12615 }, { "epoch": 0.65, "grad_norm": 0.54296875, "learning_rate": 6.48371558295812e-05, "loss": 1.4341, "step": 12620 }, { "epoch": 0.65, "grad_norm": 0.5390625, "learning_rate": 6.475263526994494e-05, "loss": 1.3804, "step": 12625 }, { "epoch": 0.65, "grad_norm": 0.5546875, "learning_rate": 6.466814345234348e-05, "loss": 1.4144, "step": 12630 }, { "epoch": 0.65, "grad_norm": 0.61328125, "learning_rate": 6.458368044567466e-05, "loss": 1.4298, "step": 12635 }, { "epoch": 0.65, "grad_norm": 0.55078125, "learning_rate": 6.449924631881277e-05, "loss": 1.3918, "step": 12640 }, { "epoch": 0.65, "grad_norm": 0.5234375, "learning_rate": 6.441484114060865e-05, "loss": 1.4225, "step": 12645 }, { "epoch": 0.65, "grad_norm": 0.578125, "learning_rate": 6.43304649798894e-05, "loss": 1.4302, "step": 12650 }, { "epoch": 0.65, "grad_norm": 0.53515625, "learning_rate": 6.424611790545862e-05, "loss": 1.4131, "step": 12655 }, { "epoch": 0.65, "grad_norm": 0.53125, "learning_rate": 6.416179998609604e-05, "loss": 1.4137, "step": 12660 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.407751129055772e-05, "loss": 1.384, "step": 12665 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.399325188757583e-05, "loss": 1.4397, "step": 12670 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.390902184585869e-05, "loss": 1.4054, "step": 12675 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.382482123409064e-05, "loss": 1.4166, "step": 12680 }, { "epoch": 0.66, "grad_norm": 0.5625, "learning_rate": 6.374065012093206e-05, "loss": 1.4157, "step": 12685 }, { "epoch": 0.66, "grad_norm": 0.54296875, "learning_rate": 6.365650857501926e-05, "loss": 1.4305, "step": 12690 }, { "epoch": 0.66, "grad_norm": 0.5, "learning_rate": 6.357239666496446e-05, "loss": 1.3844, "step": 12695 }, { "epoch": 0.66, "grad_norm": 0.55859375, "learning_rate": 6.348831445935566e-05, "loss": 1.444, "step": 12700 }, { "epoch": 0.66, "grad_norm": 0.55078125, "learning_rate": 6.340426202675669e-05, "loss": 1.45, "step": 12705 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.332023943570706e-05, "loss": 1.4239, "step": 12710 }, { "epoch": 0.66, "grad_norm": 0.55859375, "learning_rate": 6.323624675472202e-05, "loss": 1.4083, "step": 12715 }, { "epoch": 0.66, "grad_norm": 0.55078125, "learning_rate": 6.315228405229232e-05, "loss": 1.4053, "step": 12720 }, { "epoch": 0.66, "grad_norm": 0.54296875, "learning_rate": 6.306835139688438e-05, "loss": 1.4305, "step": 12725 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.298444885694001e-05, "loss": 1.3939, "step": 12730 }, { "epoch": 0.66, "grad_norm": 0.51953125, "learning_rate": 6.290057650087656e-05, "loss": 1.3813, "step": 12735 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.281673439708668e-05, "loss": 1.4162, "step": 12740 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.273292261393846e-05, "loss": 1.4266, "step": 12745 }, { "epoch": 0.66, "grad_norm": 0.546875, "learning_rate": 6.264914121977512e-05, "loss": 1.417, "step": 12750 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.256539028291523e-05, "loss": 1.4123, "step": 12755 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.248166987165247e-05, "loss": 1.4137, "step": 12760 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.239798005425561e-05, "loss": 1.4262, "step": 12765 }, { "epoch": 0.66, "grad_norm": 0.56640625, "learning_rate": 6.231432089896848e-05, "loss": 1.4328, "step": 12770 }, { "epoch": 0.66, "grad_norm": 0.53125, "learning_rate": 6.223069247400998e-05, "loss": 1.4066, "step": 12775 }, { "epoch": 0.66, "grad_norm": 0.57421875, "learning_rate": 6.214709484757382e-05, "loss": 1.4116, "step": 12780 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.206352808782873e-05, "loss": 1.3878, "step": 12785 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.197999226291816e-05, "loss": 1.4295, "step": 12790 }, { "epoch": 0.66, "grad_norm": 0.546875, "learning_rate": 6.189648744096043e-05, "loss": 1.4139, "step": 12795 }, { "epoch": 0.66, "grad_norm": 0.5546875, "learning_rate": 6.181301369004847e-05, "loss": 1.4571, "step": 12800 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.172957107824999e-05, "loss": 1.4152, "step": 12805 }, { "epoch": 0.66, "grad_norm": 0.52734375, "learning_rate": 6.164615967360723e-05, "loss": 1.4134, "step": 12810 }, { "epoch": 0.66, "grad_norm": 0.5078125, "learning_rate": 6.156277954413701e-05, "loss": 1.3994, "step": 12815 }, { "epoch": 0.66, "grad_norm": 0.55078125, "learning_rate": 6.147943075783062e-05, "loss": 1.4376, "step": 12820 }, { "epoch": 0.66, "grad_norm": 0.5390625, "learning_rate": 6.139611338265386e-05, "loss": 1.4031, "step": 12825 }, { "epoch": 0.66, "grad_norm": 0.53125, "learning_rate": 6.131282748654681e-05, "loss": 1.362, "step": 12830 }, { "epoch": 0.66, "grad_norm": 0.53125, "learning_rate": 6.1229573137424e-05, "loss": 1.4034, "step": 12835 }, { "epoch": 0.66, "grad_norm": 0.4921875, "learning_rate": 6.114635040317414e-05, "loss": 1.3764, "step": 12840 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 6.10631593516602e-05, "loss": 1.4128, "step": 12845 }, { "epoch": 0.66, "grad_norm": 0.55859375, "learning_rate": 6.098000005071933e-05, "loss": 1.4145, "step": 12850 }, { "epoch": 0.67, "grad_norm": 0.5546875, "learning_rate": 6.089687256816276e-05, "loss": 1.4306, "step": 12855 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 6.081377697177576e-05, "loss": 1.4322, "step": 12860 }, { "epoch": 0.67, "grad_norm": 0.50390625, "learning_rate": 6.073071332931768e-05, "loss": 1.4077, "step": 12865 }, { "epoch": 0.67, "grad_norm": 0.5625, "learning_rate": 6.064768170852169e-05, "loss": 1.3876, "step": 12870 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 6.0564682177094976e-05, "loss": 1.4286, "step": 12875 }, { "epoch": 0.67, "grad_norm": 0.5234375, "learning_rate": 6.048171480271847e-05, "loss": 1.4198, "step": 12880 }, { "epoch": 0.67, "grad_norm": 0.51171875, "learning_rate": 6.0398779653046876e-05, "loss": 1.4129, "step": 12885 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 6.031587679570869e-05, "loss": 1.3972, "step": 12890 }, { "epoch": 0.67, "grad_norm": 0.5546875, "learning_rate": 6.0233006298306024e-05, "loss": 1.3879, "step": 12895 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 6.015016822841465e-05, "loss": 1.4311, "step": 12900 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 6.006736265358381e-05, "loss": 1.4233, "step": 12905 }, { "epoch": 0.67, "grad_norm": 0.51953125, "learning_rate": 5.9984589641336354e-05, "loss": 1.4113, "step": 12910 }, { "epoch": 0.67, "grad_norm": 0.5234375, "learning_rate": 5.9901849259168484e-05, "loss": 1.4208, "step": 12915 }, { "epoch": 0.67, "grad_norm": 0.546875, "learning_rate": 5.981914157454988e-05, "loss": 1.4341, "step": 12920 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.9736466654923476e-05, "loss": 1.3909, "step": 12925 }, { "epoch": 0.67, "grad_norm": 0.515625, "learning_rate": 5.9653824567705564e-05, "loss": 1.3685, "step": 12930 }, { "epoch": 0.67, "grad_norm": 0.50390625, "learning_rate": 5.9571215380285604e-05, "loss": 1.4367, "step": 12935 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 5.9488639160026274e-05, "loss": 1.4329, "step": 12940 }, { "epoch": 0.67, "grad_norm": 0.57421875, "learning_rate": 5.940609597426332e-05, "loss": 1.4143, "step": 12945 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.932358589030562e-05, "loss": 1.407, "step": 12950 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 5.9241108975434976e-05, "loss": 1.4059, "step": 12955 }, { "epoch": 0.67, "grad_norm": 0.55859375, "learning_rate": 5.9158665296906235e-05, "loss": 1.4186, "step": 12960 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.9076254921947024e-05, "loss": 1.4117, "step": 12965 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.899387791775794e-05, "loss": 1.3814, "step": 12970 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.8911534351512276e-05, "loss": 1.4122, "step": 12975 }, { "epoch": 0.67, "grad_norm": 0.53515625, "learning_rate": 5.882922429035611e-05, "loss": 1.4037, "step": 12980 }, { "epoch": 0.67, "grad_norm": 0.5546875, "learning_rate": 5.874694780140817e-05, "loss": 1.435, "step": 12985 }, { "epoch": 0.67, "grad_norm": 0.55078125, "learning_rate": 5.866470495175982e-05, "loss": 1.4324, "step": 12990 }, { "epoch": 0.67, "grad_norm": 0.55078125, "learning_rate": 5.858249580847499e-05, "loss": 1.4104, "step": 12995 }, { "epoch": 0.67, "grad_norm": 0.54296875, "learning_rate": 5.850032043859013e-05, "loss": 1.411, "step": 13000 }, { "epoch": 0.67, "grad_norm": 0.51953125, "learning_rate": 5.841817890911413e-05, "loss": 1.4082, "step": 13005 }, { "epoch": 0.67, "grad_norm": 0.55078125, "learning_rate": 5.8336071287028315e-05, "loss": 1.4199, "step": 13010 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 5.825399763928634e-05, "loss": 1.4118, "step": 13015 }, { "epoch": 0.67, "grad_norm": 0.52734375, "learning_rate": 5.817195803281421e-05, "loss": 1.3896, "step": 13020 }, { "epoch": 0.67, "grad_norm": 0.55078125, "learning_rate": 5.808995253451006e-05, "loss": 1.4292, "step": 13025 }, { "epoch": 0.67, "grad_norm": 0.51953125, "learning_rate": 5.8007981211244276e-05, "loss": 1.4138, "step": 13030 }, { "epoch": 0.67, "grad_norm": 0.515625, "learning_rate": 5.79260441298594e-05, "loss": 1.3991, "step": 13035 }, { "epoch": 0.67, "grad_norm": 0.51171875, "learning_rate": 5.7844141357170087e-05, "loss": 1.4028, "step": 13040 }, { "epoch": 0.67, "grad_norm": 0.53125, "learning_rate": 5.776227295996284e-05, "loss": 1.3874, "step": 13045 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.768043900499631e-05, "loss": 1.4333, "step": 13050 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.759863955900099e-05, "loss": 1.4307, "step": 13055 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.751687468867929e-05, "loss": 1.4232, "step": 13060 }, { "epoch": 0.68, "grad_norm": 0.55078125, "learning_rate": 5.74351444607053e-05, "loss": 1.4245, "step": 13065 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.7353448941724966e-05, "loss": 1.3847, "step": 13070 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.727178819835592e-05, "loss": 1.4309, "step": 13075 }, { "epoch": 0.68, "grad_norm": 0.55078125, "learning_rate": 5.7190162297187475e-05, "loss": 1.4215, "step": 13080 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.7108571304780355e-05, "loss": 1.4169, "step": 13085 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 5.702701528766703e-05, "loss": 1.4033, "step": 13090 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.694549431235133e-05, "loss": 1.4193, "step": 13095 }, { "epoch": 0.68, "grad_norm": 0.546875, "learning_rate": 5.6864008445308603e-05, "loss": 1.4367, "step": 13100 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.678255775298542e-05, "loss": 1.4123, "step": 13105 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.6701142301799784e-05, "loss": 1.4158, "step": 13110 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.6619762158140955e-05, "loss": 1.4283, "step": 13115 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.6538417388369404e-05, "loss": 1.4124, "step": 13120 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.6457108058816674e-05, "loss": 1.4147, "step": 13125 }, { "epoch": 0.68, "grad_norm": 0.55078125, "learning_rate": 5.6375834235785495e-05, "loss": 1.4181, "step": 13130 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.62945959855496e-05, "loss": 1.3914, "step": 13135 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 5.6213393374353814e-05, "loss": 1.4017, "step": 13140 }, { "epoch": 0.68, "grad_norm": 0.51171875, "learning_rate": 5.6132226468413715e-05, "loss": 1.3974, "step": 13145 }, { "epoch": 0.68, "grad_norm": 0.55078125, "learning_rate": 5.60510953339159e-05, "loss": 1.4176, "step": 13150 }, { "epoch": 0.68, "grad_norm": 0.52734375, "learning_rate": 5.597000003701779e-05, "loss": 1.4182, "step": 13155 }, { "epoch": 0.68, "grad_norm": 0.5703125, "learning_rate": 5.5888940643847574e-05, "loss": 1.4126, "step": 13160 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.580791722050408e-05, "loss": 1.41, "step": 13165 }, { "epoch": 0.68, "grad_norm": 0.55078125, "learning_rate": 5.5726929833056954e-05, "loss": 1.3945, "step": 13170 }, { "epoch": 0.68, "grad_norm": 0.51953125, "learning_rate": 5.5645978547546284e-05, "loss": 1.3876, "step": 13175 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 5.5565063429982865e-05, "loss": 1.4207, "step": 13180 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.5484184546347983e-05, "loss": 1.4061, "step": 13185 }, { "epoch": 0.68, "grad_norm": 0.546875, "learning_rate": 5.540334196259326e-05, "loss": 1.3876, "step": 13190 }, { "epoch": 0.68, "grad_norm": 0.56640625, "learning_rate": 5.532253574464083e-05, "loss": 1.4197, "step": 13195 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.5241765958383154e-05, "loss": 1.4209, "step": 13200 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.516103266968299e-05, "loss": 1.3889, "step": 13205 }, { "epoch": 0.68, "grad_norm": 0.53515625, "learning_rate": 5.508033594437325e-05, "loss": 1.4103, "step": 13210 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.4999675848257147e-05, "loss": 1.3954, "step": 13215 }, { "epoch": 0.68, "grad_norm": 0.55859375, "learning_rate": 5.491905244710796e-05, "loss": 1.4404, "step": 13220 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.48384658066691e-05, "loss": 1.3895, "step": 13225 }, { "epoch": 0.68, "grad_norm": 0.5390625, "learning_rate": 5.47579159926539e-05, "loss": 1.4484, "step": 13230 }, { "epoch": 0.68, "grad_norm": 0.54296875, "learning_rate": 5.467740307074574e-05, "loss": 1.4115, "step": 13235 }, { "epoch": 0.68, "grad_norm": 0.53125, "learning_rate": 5.459692710659792e-05, "loss": 1.4206, "step": 13240 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.451648816583362e-05, "loss": 1.429, "step": 13245 }, { "epoch": 0.69, "grad_norm": 0.5546875, "learning_rate": 5.443608631404573e-05, "loss": 1.4297, "step": 13250 }, { "epoch": 0.69, "grad_norm": 0.53515625, "learning_rate": 5.435572161679698e-05, "loss": 1.3839, "step": 13255 }, { "epoch": 0.69, "grad_norm": 0.5390625, "learning_rate": 5.42753941396198e-05, "loss": 1.3952, "step": 13260 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.419510394801628e-05, "loss": 1.3986, "step": 13265 }, { "epoch": 0.69, "grad_norm": 0.51953125, "learning_rate": 5.411485110745802e-05, "loss": 1.4064, "step": 13270 }, { "epoch": 0.69, "grad_norm": 0.5234375, "learning_rate": 5.4034635683386245e-05, "loss": 1.4064, "step": 13275 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.395445774121166e-05, "loss": 1.3951, "step": 13280 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.387431734631443e-05, "loss": 1.4237, "step": 13285 }, { "epoch": 0.69, "grad_norm": 0.515625, "learning_rate": 5.379421456404397e-05, "loss": 1.4112, "step": 13290 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.371414945971918e-05, "loss": 1.4125, "step": 13295 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 5.3634122098628146e-05, "loss": 1.4689, "step": 13300 }, { "epoch": 0.69, "grad_norm": 0.5625, "learning_rate": 5.3554132546028294e-05, "loss": 1.3824, "step": 13305 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.3474180867146004e-05, "loss": 1.4219, "step": 13310 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.339426712717697e-05, "loss": 1.4107, "step": 13315 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.331439139128587e-05, "loss": 1.4263, "step": 13320 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.323455372460644e-05, "loss": 1.3911, "step": 13325 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.315475419224124e-05, "loss": 1.4176, "step": 13330 }, { "epoch": 0.69, "grad_norm": 0.55078125, "learning_rate": 5.3074992859261895e-05, "loss": 1.4085, "step": 13335 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 5.299526979070879e-05, "loss": 1.3893, "step": 13340 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.2915585051591196e-05, "loss": 1.3686, "step": 13345 }, { "epoch": 0.69, "grad_norm": 0.53125, "learning_rate": 5.2835938706886966e-05, "loss": 1.3746, "step": 13350 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.275633082154279e-05, "loss": 1.3683, "step": 13355 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.2676761460473934e-05, "loss": 1.4154, "step": 13360 }, { "epoch": 0.69, "grad_norm": 0.77734375, "learning_rate": 5.259723068856434e-05, "loss": 1.387, "step": 13365 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.251773857066629e-05, "loss": 1.3847, "step": 13370 }, { "epoch": 0.69, "grad_norm": 0.5078125, "learning_rate": 5.243828517160072e-05, "loss": 1.3706, "step": 13375 }, { "epoch": 0.69, "grad_norm": 0.5234375, "learning_rate": 5.235887055615696e-05, "loss": 1.4202, "step": 13380 }, { "epoch": 0.69, "grad_norm": 0.51171875, "learning_rate": 5.227949478909265e-05, "loss": 1.3961, "step": 13385 }, { "epoch": 0.69, "grad_norm": 0.5546875, "learning_rate": 5.2200157935133865e-05, "loss": 1.4123, "step": 13390 }, { "epoch": 0.69, "grad_norm": 0.53515625, "learning_rate": 5.2120860058974786e-05, "loss": 1.4374, "step": 13395 }, { "epoch": 0.69, "grad_norm": 0.5546875, "learning_rate": 5.204160122527795e-05, "loss": 1.3853, "step": 13400 }, { "epoch": 0.69, "grad_norm": 0.54296875, "learning_rate": 5.196238149867398e-05, "loss": 1.4036, "step": 13405 }, { "epoch": 0.69, "grad_norm": 0.56640625, "learning_rate": 5.188320094376172e-05, "loss": 1.4003, "step": 13410 }, { "epoch": 0.69, "grad_norm": 0.546875, "learning_rate": 5.180405962510789e-05, "loss": 1.4341, "step": 13415 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.172495760724736e-05, "loss": 1.4119, "step": 13420 }, { "epoch": 0.69, "grad_norm": 0.51171875, "learning_rate": 5.1645894954682896e-05, "loss": 1.3961, "step": 13425 }, { "epoch": 0.69, "grad_norm": 0.52734375, "learning_rate": 5.156687173188521e-05, "loss": 1.3973, "step": 13430 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 5.148788800329278e-05, "loss": 1.411, "step": 13435 }, { "epoch": 0.7, "grad_norm": 0.51953125, "learning_rate": 5.140894383331196e-05, "loss": 1.4306, "step": 13440 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.133003928631679e-05, "loss": 1.3455, "step": 13445 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.1251174426649076e-05, "loss": 1.4141, "step": 13450 }, { "epoch": 0.7, "grad_norm": 0.55078125, "learning_rate": 5.117234931861813e-05, "loss": 1.3502, "step": 13455 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 5.109356402650096e-05, "loss": 1.4024, "step": 13460 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 5.1014818614542116e-05, "loss": 1.415, "step": 13465 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 5.0936113146953525e-05, "loss": 1.4265, "step": 13470 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 5.085744768791465e-05, "loss": 1.3924, "step": 13475 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 5.0778822301572226e-05, "loss": 1.3742, "step": 13480 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 5.070023705204041e-05, "loss": 1.4227, "step": 13485 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 5.062169200340058e-05, "loss": 1.4204, "step": 13490 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 5.054318721970137e-05, "loss": 1.3805, "step": 13495 }, { "epoch": 0.7, "grad_norm": 0.54296875, "learning_rate": 5.046472276495848e-05, "loss": 1.3879, "step": 13500 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 5.038629870315486e-05, "loss": 1.3919, "step": 13505 }, { "epoch": 0.7, "grad_norm": 0.5390625, "learning_rate": 5.030791509824041e-05, "loss": 1.4157, "step": 13510 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 5.0229572014132156e-05, "loss": 1.4404, "step": 13515 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 5.0151269514713927e-05, "loss": 1.4115, "step": 13520 }, { "epoch": 0.7, "grad_norm": 0.5546875, "learning_rate": 5.007300766383659e-05, "loss": 1.4164, "step": 13525 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 4.999478652531782e-05, "loss": 1.3713, "step": 13530 }, { "epoch": 0.7, "grad_norm": 0.55078125, "learning_rate": 4.99166061629421e-05, "loss": 1.3609, "step": 13535 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.9838466640460627e-05, "loss": 1.3923, "step": 13540 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.976036802159133e-05, "loss": 1.4069, "step": 13545 }, { "epoch": 0.7, "grad_norm": 0.57421875, "learning_rate": 4.968231037001879e-05, "loss": 1.3967, "step": 13550 }, { "epoch": 0.7, "grad_norm": 0.5390625, "learning_rate": 4.96042937493942e-05, "loss": 1.3987, "step": 13555 }, { "epoch": 0.7, "grad_norm": 0.5, "learning_rate": 4.95263182233352e-05, "loss": 1.4095, "step": 13560 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 4.9448383855426006e-05, "loss": 1.3958, "step": 13565 }, { "epoch": 0.7, "grad_norm": 0.5859375, "learning_rate": 4.937049070921727e-05, "loss": 1.4014, "step": 13570 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.9292638848226024e-05, "loss": 1.4096, "step": 13575 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.9214828335935556e-05, "loss": 1.4241, "step": 13580 }, { "epoch": 0.7, "grad_norm": 0.53125, "learning_rate": 4.913705923579556e-05, "loss": 1.4062, "step": 13585 }, { "epoch": 0.7, "grad_norm": 0.546875, "learning_rate": 4.905933161122187e-05, "loss": 1.3818, "step": 13590 }, { "epoch": 0.7, "grad_norm": 0.5390625, "learning_rate": 4.89816455255966e-05, "loss": 1.4079, "step": 13595 }, { "epoch": 0.7, "grad_norm": 0.5234375, "learning_rate": 4.890400104226782e-05, "loss": 1.4027, "step": 13600 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.882639822454983e-05, "loss": 1.3885, "step": 13605 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 4.87488371357229e-05, "loss": 1.4119, "step": 13610 }, { "epoch": 0.7, "grad_norm": 0.5625, "learning_rate": 4.867131783903333e-05, "loss": 1.4109, "step": 13615 }, { "epoch": 0.7, "grad_norm": 0.53515625, "learning_rate": 4.859384039769319e-05, "loss": 1.4487, "step": 13620 }, { "epoch": 0.7, "grad_norm": 0.52734375, "learning_rate": 4.851640487488057e-05, "loss": 1.404, "step": 13625 }, { "epoch": 0.71, "grad_norm": 0.51171875, "learning_rate": 4.8439011333739314e-05, "loss": 1.4212, "step": 13630 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.836165983737909e-05, "loss": 1.3863, "step": 13635 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.828435044887516e-05, "loss": 1.3797, "step": 13640 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.820708323126856e-05, "loss": 1.4255, "step": 13645 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.81298582475659e-05, "loss": 1.3684, "step": 13650 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.805267556073938e-05, "loss": 1.3921, "step": 13655 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.797553523372663e-05, "loss": 1.419, "step": 13660 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.7898437329430815e-05, "loss": 1.4267, "step": 13665 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.7821381910720484e-05, "loss": 1.3806, "step": 13670 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.774436904042959e-05, "loss": 1.4022, "step": 13675 }, { "epoch": 0.71, "grad_norm": 0.51171875, "learning_rate": 4.766739878135725e-05, "loss": 1.4005, "step": 13680 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.759047119626798e-05, "loss": 1.4232, "step": 13685 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.751358634789143e-05, "loss": 1.4182, "step": 13690 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.743674429892245e-05, "loss": 1.4279, "step": 13695 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.73599451120209e-05, "loss": 1.3974, "step": 13700 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.728318884981175e-05, "loss": 1.3948, "step": 13705 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.7206475574884976e-05, "loss": 1.4153, "step": 13710 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.712980534979553e-05, "loss": 1.394, "step": 13715 }, { "epoch": 0.71, "grad_norm": 0.5234375, "learning_rate": 4.7053178237063135e-05, "loss": 1.3956, "step": 13720 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.697659429917246e-05, "loss": 1.4027, "step": 13725 }, { "epoch": 0.71, "grad_norm": 0.53125, "learning_rate": 4.690005359857297e-05, "loss": 1.3886, "step": 13730 }, { "epoch": 0.71, "grad_norm": 0.5078125, "learning_rate": 4.6823556197678865e-05, "loss": 1.4122, "step": 13735 }, { "epoch": 0.71, "grad_norm": 0.5390625, "learning_rate": 4.674710215886895e-05, "loss": 1.408, "step": 13740 }, { "epoch": 0.71, "grad_norm": 0.54296875, "learning_rate": 4.667069154448679e-05, "loss": 1.4116, "step": 13745 }, { "epoch": 0.71, "grad_norm": 0.498046875, "learning_rate": 4.659432441684047e-05, "loss": 1.3956, "step": 13750 }, { "epoch": 0.71, "grad_norm": 0.55078125, "learning_rate": 4.6518000838202694e-05, "loss": 1.4042, "step": 13755 }, { "epoch": 0.71, "grad_norm": 0.546875, "learning_rate": 4.6441720870810545e-05, "loss": 1.4179, "step": 13760 }, { "epoch": 0.71, "grad_norm": 0.52734375, "learning_rate": 4.636548457686557e-05, "loss": 1.3851, "step": 13765 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.628929201853375e-05, "loss": 1.4158, "step": 13770 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.621314325794539e-05, "loss": 1.3648, "step": 13775 }, { "epoch": 0.71, "grad_norm": 0.5546875, "learning_rate": 4.613703835719511e-05, "loss": 1.3847, "step": 13780 }, { "epoch": 0.71, "grad_norm": 0.55859375, "learning_rate": 4.606097737834163e-05, "loss": 1.4183, "step": 13785 }, { "epoch": 0.71, "grad_norm": 0.53515625, "learning_rate": 4.5984960383408005e-05, "loss": 1.3988, "step": 13790 }, { "epoch": 0.71, "grad_norm": 0.5390625, "learning_rate": 4.590898743438138e-05, "loss": 1.4255, "step": 13795 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.5833058593212984e-05, "loss": 1.4159, "step": 13800 }, { "epoch": 0.71, "grad_norm": 0.55078125, "learning_rate": 4.575717392181801e-05, "loss": 1.4114, "step": 13805 }, { "epoch": 0.71, "grad_norm": 0.56640625, "learning_rate": 4.568133348207572e-05, "loss": 1.4001, "step": 13810 }, { "epoch": 0.71, "grad_norm": 0.515625, "learning_rate": 4.5605537335829275e-05, "loss": 1.3891, "step": 13815 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.5529785544885715e-05, "loss": 1.4303, "step": 13820 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.545407817101598e-05, "loss": 1.4256, "step": 13825 }, { "epoch": 0.72, "grad_norm": 0.55078125, "learning_rate": 4.5378415275954634e-05, "loss": 1.4324, "step": 13830 }, { "epoch": 0.72, "grad_norm": 0.5703125, "learning_rate": 4.53027969214001e-05, "loss": 1.4147, "step": 13835 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.5227223169014456e-05, "loss": 1.4096, "step": 13840 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.5151694080423414e-05, "loss": 1.417, "step": 13845 }, { "epoch": 0.72, "grad_norm": 0.57421875, "learning_rate": 4.50762097172162e-05, "loss": 1.4225, "step": 13850 }, { "epoch": 0.72, "grad_norm": 0.5546875, "learning_rate": 4.500077014094566e-05, "loss": 1.4455, "step": 13855 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.492537541312805e-05, "loss": 1.415, "step": 13860 }, { "epoch": 0.72, "grad_norm": 0.53515625, "learning_rate": 4.485002559524314e-05, "loss": 1.4214, "step": 13865 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.477472074873396e-05, "loss": 1.424, "step": 13870 }, { "epoch": 0.72, "grad_norm": 0.53515625, "learning_rate": 4.469946093500694e-05, "loss": 1.3948, "step": 13875 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.4624246215431796e-05, "loss": 1.3954, "step": 13880 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.4549076651341493e-05, "loss": 1.449, "step": 13885 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.4473952304032065e-05, "loss": 1.3918, "step": 13890 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.439887323476277e-05, "loss": 1.4122, "step": 13895 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.432383950475595e-05, "loss": 1.4061, "step": 13900 }, { "epoch": 0.72, "grad_norm": 0.578125, "learning_rate": 4.4248851175196956e-05, "loss": 1.4182, "step": 13905 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.4173908307234045e-05, "loss": 1.4331, "step": 13910 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.40990109619785e-05, "loss": 1.4382, "step": 13915 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.402415920050447e-05, "loss": 1.406, "step": 13920 }, { "epoch": 0.72, "grad_norm": 0.5703125, "learning_rate": 4.394935308384893e-05, "loss": 1.4181, "step": 13925 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.387459267301155e-05, "loss": 1.4027, "step": 13930 }, { "epoch": 0.72, "grad_norm": 0.50390625, "learning_rate": 4.379987802895483e-05, "loss": 1.3861, "step": 13935 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.3725209212603925e-05, "loss": 1.402, "step": 13940 }, { "epoch": 0.72, "grad_norm": 0.55859375, "learning_rate": 4.3650586284846636e-05, "loss": 1.4507, "step": 13945 }, { "epoch": 0.72, "grad_norm": 0.546875, "learning_rate": 4.357600930653327e-05, "loss": 1.4307, "step": 13950 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.350147833847674e-05, "loss": 1.4185, "step": 13955 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.3426993441452414e-05, "loss": 1.3886, "step": 13960 }, { "epoch": 0.72, "grad_norm": 0.53515625, "learning_rate": 4.335255467619814e-05, "loss": 1.402, "step": 13965 }, { "epoch": 0.72, "grad_norm": 0.5390625, "learning_rate": 4.3278162103414033e-05, "loss": 1.4353, "step": 13970 }, { "epoch": 0.72, "grad_norm": 0.515625, "learning_rate": 4.320381578376264e-05, "loss": 1.4512, "step": 13975 }, { "epoch": 0.72, "grad_norm": 0.5234375, "learning_rate": 4.312951577786876e-05, "loss": 1.4378, "step": 13980 }, { "epoch": 0.72, "grad_norm": 0.53515625, "learning_rate": 4.305526214631948e-05, "loss": 1.4088, "step": 13985 }, { "epoch": 0.72, "grad_norm": 0.54296875, "learning_rate": 4.2981054949663926e-05, "loss": 1.3837, "step": 13990 }, { "epoch": 0.72, "grad_norm": 0.53125, "learning_rate": 4.290689424841351e-05, "loss": 1.3985, "step": 13995 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.283278010304167e-05, "loss": 1.411, "step": 14000 }, { "epoch": 0.72, "grad_norm": 0.51953125, "learning_rate": 4.2758712573983915e-05, "loss": 1.3721, "step": 14005 }, { "epoch": 0.72, "grad_norm": 0.52734375, "learning_rate": 4.268469172163764e-05, "loss": 1.4247, "step": 14010 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.261071760636228e-05, "loss": 1.4248, "step": 14015 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 4.2536790288479135e-05, "loss": 1.4128, "step": 14020 }, { "epoch": 0.73, "grad_norm": 0.5546875, "learning_rate": 4.246290982827137e-05, "loss": 1.385, "step": 14025 }, { "epoch": 0.73, "grad_norm": 0.5390625, "learning_rate": 4.238907628598384e-05, "loss": 1.3891, "step": 14030 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.231528972182324e-05, "loss": 1.4083, "step": 14035 }, { "epoch": 0.73, "grad_norm": 0.53125, "learning_rate": 4.2241550195957924e-05, "loss": 1.4192, "step": 14040 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.2167857768517935e-05, "loss": 1.4116, "step": 14045 }, { "epoch": 0.73, "grad_norm": 0.56640625, "learning_rate": 4.2094212499594785e-05, "loss": 1.3968, "step": 14050 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.2020614449241705e-05, "loss": 1.4164, "step": 14055 }, { "epoch": 0.73, "grad_norm": 0.55078125, "learning_rate": 4.194706367747323e-05, "loss": 1.3963, "step": 14060 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 4.187356024426549e-05, "loss": 1.4072, "step": 14065 }, { "epoch": 0.73, "grad_norm": 0.53125, "learning_rate": 4.1800104209556e-05, "loss": 1.4131, "step": 14070 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.1726695633243527e-05, "loss": 1.4076, "step": 14075 }, { "epoch": 0.73, "grad_norm": 0.51171875, "learning_rate": 4.165333457518823e-05, "loss": 1.3496, "step": 14080 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 4.1580021095211486e-05, "loss": 1.4122, "step": 14085 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.150675525309593e-05, "loss": 1.4152, "step": 14090 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.1433537108585216e-05, "loss": 1.4124, "step": 14095 }, { "epoch": 0.73, "grad_norm": 0.56640625, "learning_rate": 4.1360366721384234e-05, "loss": 1.4217, "step": 14100 }, { "epoch": 0.73, "grad_norm": 0.53515625, "learning_rate": 4.128724415115889e-05, "loss": 1.4085, "step": 14105 }, { "epoch": 0.73, "grad_norm": 0.51953125, "learning_rate": 4.121416945753611e-05, "loss": 1.3978, "step": 14110 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.114114270010372e-05, "loss": 1.3935, "step": 14115 }, { "epoch": 0.73, "grad_norm": 0.5625, "learning_rate": 4.106816393841052e-05, "loss": 1.4313, "step": 14120 }, { "epoch": 0.73, "grad_norm": 0.5625, "learning_rate": 4.099523323196616e-05, "loss": 1.4194, "step": 14125 }, { "epoch": 0.73, "grad_norm": 0.53125, "learning_rate": 4.092235064024111e-05, "loss": 1.3693, "step": 14130 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.0849516222666564e-05, "loss": 1.4015, "step": 14135 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.077673003863446e-05, "loss": 1.4321, "step": 14140 }, { "epoch": 0.73, "grad_norm": 0.53515625, "learning_rate": 4.0703992147497425e-05, "loss": 1.397, "step": 14145 }, { "epoch": 0.73, "grad_norm": 0.546875, "learning_rate": 4.063130260856872e-05, "loss": 1.4254, "step": 14150 }, { "epoch": 0.73, "grad_norm": 0.5234375, "learning_rate": 4.055866148112208e-05, "loss": 1.3843, "step": 14155 }, { "epoch": 0.73, "grad_norm": 0.53515625, "learning_rate": 4.0486068824391856e-05, "loss": 1.3909, "step": 14160 }, { "epoch": 0.73, "grad_norm": 0.52734375, "learning_rate": 4.041352469757283e-05, "loss": 1.4008, "step": 14165 }, { "epoch": 0.73, "grad_norm": 0.55859375, "learning_rate": 4.034102915982031e-05, "loss": 1.4209, "step": 14170 }, { "epoch": 0.73, "grad_norm": 0.54296875, "learning_rate": 4.026858227024978e-05, "loss": 1.4422, "step": 14175 }, { "epoch": 0.73, "grad_norm": 0.56640625, "learning_rate": 4.0196184087937235e-05, "loss": 1.424, "step": 14180 }, { "epoch": 0.73, "grad_norm": 0.5625, "learning_rate": 4.012383467191889e-05, "loss": 1.4286, "step": 14185 }, { "epoch": 0.73, "grad_norm": 0.59765625, "learning_rate": 4.005153408119123e-05, "loss": 1.4388, "step": 14190 }, { "epoch": 0.73, "grad_norm": 0.5390625, "learning_rate": 3.9979282374710824e-05, "loss": 1.3725, "step": 14195 }, { "epoch": 0.73, "grad_norm": 0.53125, "learning_rate": 3.9907079611394485e-05, "loss": 1.4091, "step": 14200 }, { "epoch": 0.73, "grad_norm": 0.51171875, "learning_rate": 3.983492585011906e-05, "loss": 1.3604, "step": 14205 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.9762821149721485e-05, "loss": 1.4136, "step": 14210 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.9690765568998665e-05, "loss": 1.3978, "step": 14215 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.9618759166707396e-05, "loss": 1.375, "step": 14220 }, { "epoch": 0.74, "grad_norm": 0.5546875, "learning_rate": 3.9546802001564454e-05, "loss": 1.4229, "step": 14225 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.9474894132246435e-05, "loss": 1.4022, "step": 14230 }, { "epoch": 0.74, "grad_norm": 0.52734375, "learning_rate": 3.940303561738977e-05, "loss": 1.4145, "step": 14235 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.933122651559054e-05, "loss": 1.4006, "step": 14240 }, { "epoch": 0.74, "grad_norm": 0.515625, "learning_rate": 3.925946688540464e-05, "loss": 1.4169, "step": 14245 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.918775678534759e-05, "loss": 1.4083, "step": 14250 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.911609627389453e-05, "loss": 1.4037, "step": 14255 }, { "epoch": 0.74, "grad_norm": 0.57421875, "learning_rate": 3.904448540948012e-05, "loss": 1.4371, "step": 14260 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.897292425049859e-05, "loss": 1.3692, "step": 14265 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.89014128553036e-05, "loss": 1.4395, "step": 14270 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.8829951282208297e-05, "loss": 1.3866, "step": 14275 }, { "epoch": 0.74, "grad_norm": 0.5390625, "learning_rate": 3.875853958948508e-05, "loss": 1.4246, "step": 14280 }, { "epoch": 0.74, "grad_norm": 0.60546875, "learning_rate": 3.868717783536578e-05, "loss": 1.4123, "step": 14285 }, { "epoch": 0.74, "grad_norm": 0.5390625, "learning_rate": 3.861586607804147e-05, "loss": 1.4189, "step": 14290 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.8544604375662495e-05, "loss": 1.4224, "step": 14295 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.847339278633827e-05, "loss": 1.4446, "step": 14300 }, { "epoch": 0.74, "grad_norm": 0.50390625, "learning_rate": 3.8402231368137454e-05, "loss": 1.3983, "step": 14305 }, { "epoch": 0.74, "grad_norm": 0.5390625, "learning_rate": 3.8331120179087754e-05, "loss": 1.4149, "step": 14310 }, { "epoch": 0.74, "grad_norm": 0.56640625, "learning_rate": 3.8260059277175965e-05, "loss": 1.3856, "step": 14315 }, { "epoch": 0.74, "grad_norm": 0.55078125, "learning_rate": 3.818904872034777e-05, "loss": 1.4143, "step": 14320 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.8118088566507884e-05, "loss": 1.4227, "step": 14325 }, { "epoch": 0.74, "grad_norm": 0.5546875, "learning_rate": 3.804717887351991e-05, "loss": 1.412, "step": 14330 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.797631969920633e-05, "loss": 1.424, "step": 14335 }, { "epoch": 0.74, "grad_norm": 0.5390625, "learning_rate": 3.7905511101348334e-05, "loss": 1.432, "step": 14340 }, { "epoch": 0.74, "grad_norm": 0.51953125, "learning_rate": 3.7834753137685955e-05, "loss": 1.4125, "step": 14345 }, { "epoch": 0.74, "grad_norm": 0.546875, "learning_rate": 3.776404586591794e-05, "loss": 1.4444, "step": 14350 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.769338934370163e-05, "loss": 1.4182, "step": 14355 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.762278362865308e-05, "loss": 1.4039, "step": 14360 }, { "epoch": 0.74, "grad_norm": 0.5234375, "learning_rate": 3.755222877834679e-05, "loss": 1.4062, "step": 14365 }, { "epoch": 0.74, "grad_norm": 0.54296875, "learning_rate": 3.7481724850315894e-05, "loss": 1.3981, "step": 14370 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 3.741127190205196e-05, "loss": 1.3981, "step": 14375 }, { "epoch": 0.74, "grad_norm": 0.515625, "learning_rate": 3.734086999100502e-05, "loss": 1.4209, "step": 14380 }, { "epoch": 0.74, "grad_norm": 0.53125, "learning_rate": 3.7270519174583404e-05, "loss": 1.3797, "step": 14385 }, { "epoch": 0.74, "grad_norm": 0.5625, "learning_rate": 3.7200219510153845e-05, "loss": 1.406, "step": 14390 }, { "epoch": 0.74, "grad_norm": 0.5390625, "learning_rate": 3.7129971055041345e-05, "loss": 1.4059, "step": 14395 }, { "epoch": 0.75, "grad_norm": 0.57421875, "learning_rate": 3.705977386652921e-05, "loss": 1.3911, "step": 14400 }, { "epoch": 0.75, "grad_norm": 0.68359375, "learning_rate": 3.69896280018588e-05, "loss": 1.438, "step": 14405 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.6919533518229734e-05, "loss": 1.3953, "step": 14410 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.6849490472799716e-05, "loss": 1.4092, "step": 14415 }, { "epoch": 0.75, "grad_norm": 0.52734375, "learning_rate": 3.677949892268453e-05, "loss": 1.4013, "step": 14420 }, { "epoch": 0.75, "grad_norm": 0.53515625, "learning_rate": 3.670955892495787e-05, "loss": 1.4047, "step": 14425 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.663967053665147e-05, "loss": 1.3838, "step": 14430 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.6569833814754995e-05, "loss": 1.3798, "step": 14435 }, { "epoch": 0.75, "grad_norm": 0.51171875, "learning_rate": 3.650004881621596e-05, "loss": 1.4116, "step": 14440 }, { "epoch": 0.75, "grad_norm": 0.53515625, "learning_rate": 3.6430315597939636e-05, "loss": 1.3809, "step": 14445 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.636063421678917e-05, "loss": 1.4039, "step": 14450 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.629100472958538e-05, "loss": 1.4236, "step": 14455 }, { "epoch": 0.75, "grad_norm": 0.546875, "learning_rate": 3.6221427193106814e-05, "loss": 1.3737, "step": 14460 }, { "epoch": 0.75, "grad_norm": 0.5703125, "learning_rate": 3.615190166408959e-05, "loss": 1.391, "step": 14465 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.608242819922746e-05, "loss": 1.4241, "step": 14470 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.6013006855171726e-05, "loss": 1.4078, "step": 14475 }, { "epoch": 0.75, "grad_norm": 0.56640625, "learning_rate": 3.5943637688531216e-05, "loss": 1.4076, "step": 14480 }, { "epoch": 0.75, "grad_norm": 0.546875, "learning_rate": 3.58743207558721e-05, "loss": 1.3783, "step": 14485 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.580505611371806e-05, "loss": 1.4014, "step": 14490 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.573584381855012e-05, "loss": 1.3963, "step": 14495 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.566668392680662e-05, "loss": 1.4465, "step": 14500 }, { "epoch": 0.75, "grad_norm": 0.54296875, "learning_rate": 3.5597576494883086e-05, "loss": 1.3737, "step": 14505 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.552852157913238e-05, "loss": 1.4044, "step": 14510 }, { "epoch": 0.75, "grad_norm": 0.5625, "learning_rate": 3.545951923586448e-05, "loss": 1.4472, "step": 14515 }, { "epoch": 0.75, "grad_norm": 0.5390625, "learning_rate": 3.539056952134655e-05, "loss": 1.3723, "step": 14520 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.532167249180271e-05, "loss": 1.4226, "step": 14525 }, { "epoch": 0.75, "grad_norm": 0.53515625, "learning_rate": 3.525282820341428e-05, "loss": 1.3928, "step": 14530 }, { "epoch": 0.75, "grad_norm": 0.55859375, "learning_rate": 3.5184036712319444e-05, "loss": 1.4194, "step": 14535 }, { "epoch": 0.75, "grad_norm": 0.54296875, "learning_rate": 3.5115298074613466e-05, "loss": 1.4332, "step": 14540 }, { "epoch": 0.75, "grad_norm": 0.54296875, "learning_rate": 3.504661234634834e-05, "loss": 1.3965, "step": 14545 }, { "epoch": 0.75, "grad_norm": 0.5546875, "learning_rate": 3.497797958353305e-05, "loss": 1.4264, "step": 14550 }, { "epoch": 0.75, "grad_norm": 0.5546875, "learning_rate": 3.490939984213334e-05, "loss": 1.3683, "step": 14555 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.484087317807176e-05, "loss": 1.4378, "step": 14560 }, { "epoch": 0.75, "grad_norm": 0.51953125, "learning_rate": 3.477239964722748e-05, "loss": 1.3627, "step": 14565 }, { "epoch": 0.75, "grad_norm": 0.52734375, "learning_rate": 3.470397930543645e-05, "loss": 1.3664, "step": 14570 }, { "epoch": 0.75, "grad_norm": 0.53125, "learning_rate": 3.4635612208491194e-05, "loss": 1.3869, "step": 14575 }, { "epoch": 0.75, "grad_norm": 0.5546875, "learning_rate": 3.456729841214083e-05, "loss": 1.3984, "step": 14580 }, { "epoch": 0.75, "grad_norm": 0.5234375, "learning_rate": 3.4499037972091064e-05, "loss": 1.3961, "step": 14585 }, { "epoch": 0.75, "grad_norm": 0.55078125, "learning_rate": 3.443083094400395e-05, "loss": 1.3964, "step": 14590 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.4362677383498123e-05, "loss": 1.4334, "step": 14595 }, { "epoch": 0.76, "grad_norm": 0.515625, "learning_rate": 3.429457734614857e-05, "loss": 1.3997, "step": 14600 }, { "epoch": 0.76, "grad_norm": 0.50390625, "learning_rate": 3.422653088748668e-05, "loss": 1.382, "step": 14605 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.4158538063000046e-05, "loss": 1.4022, "step": 14610 }, { "epoch": 0.76, "grad_norm": 0.51171875, "learning_rate": 3.409059892813261e-05, "loss": 1.3996, "step": 14615 }, { "epoch": 0.76, "grad_norm": 0.5625, "learning_rate": 3.402271353828452e-05, "loss": 1.444, "step": 14620 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.3954881948812125e-05, "loss": 1.4276, "step": 14625 }, { "epoch": 0.76, "grad_norm": 0.5859375, "learning_rate": 3.38871042150278e-05, "loss": 1.3799, "step": 14630 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.381938039220011e-05, "loss": 1.4047, "step": 14635 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.3751710535553615e-05, "loss": 1.3985, "step": 14640 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.368409470026892e-05, "loss": 1.3995, "step": 14645 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.3616532941482494e-05, "loss": 1.4014, "step": 14650 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.354902531428673e-05, "loss": 1.4169, "step": 14655 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.3481571873729924e-05, "loss": 1.4251, "step": 14660 }, { "epoch": 0.76, "grad_norm": 0.5078125, "learning_rate": 3.341417267481616e-05, "loss": 1.4041, "step": 14665 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.334682777250534e-05, "loss": 1.4032, "step": 14670 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.3279537221712975e-05, "loss": 1.4211, "step": 14675 }, { "epoch": 0.76, "grad_norm": 0.54296875, "learning_rate": 3.321230107731035e-05, "loss": 1.3886, "step": 14680 }, { "epoch": 0.76, "grad_norm": 0.55078125, "learning_rate": 3.314511939412438e-05, "loss": 1.3968, "step": 14685 }, { "epoch": 0.76, "grad_norm": 0.5234375, "learning_rate": 3.307799222693756e-05, "loss": 1.4073, "step": 14690 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.301091963048788e-05, "loss": 1.3847, "step": 14695 }, { "epoch": 0.76, "grad_norm": 0.52734375, "learning_rate": 3.294390165946889e-05, "loss": 1.4143, "step": 14700 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.287693836852959e-05, "loss": 1.3882, "step": 14705 }, { "epoch": 0.76, "grad_norm": 0.54296875, "learning_rate": 3.281002981227439e-05, "loss": 1.3611, "step": 14710 }, { "epoch": 0.76, "grad_norm": 0.796875, "learning_rate": 3.2743176045263024e-05, "loss": 1.3952, "step": 14715 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.2676377122010605e-05, "loss": 1.4149, "step": 14720 }, { "epoch": 0.76, "grad_norm": 0.53125, "learning_rate": 3.260963309698749e-05, "loss": 1.3637, "step": 14725 }, { "epoch": 0.76, "grad_norm": 0.57421875, "learning_rate": 3.254294402461933e-05, "loss": 1.4297, "step": 14730 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.2476309959286846e-05, "loss": 1.3733, "step": 14735 }, { "epoch": 0.76, "grad_norm": 0.5546875, "learning_rate": 3.240973095532601e-05, "loss": 1.4047, "step": 14740 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.2343207067027856e-05, "loss": 1.4128, "step": 14745 }, { "epoch": 0.76, "grad_norm": 0.54296875, "learning_rate": 3.227673834863852e-05, "loss": 1.4404, "step": 14750 }, { "epoch": 0.76, "grad_norm": 0.56640625, "learning_rate": 3.221032485435904e-05, "loss": 1.4148, "step": 14755 }, { "epoch": 0.76, "grad_norm": 0.53515625, "learning_rate": 3.214396663834553e-05, "loss": 1.4185, "step": 14760 }, { "epoch": 0.76, "grad_norm": 0.55859375, "learning_rate": 3.2077663754708983e-05, "loss": 1.4212, "step": 14765 }, { "epoch": 0.76, "grad_norm": 0.5390625, "learning_rate": 3.201141625751532e-05, "loss": 1.4082, "step": 14770 }, { "epoch": 0.76, "grad_norm": 0.53125, "learning_rate": 3.194522420078518e-05, "loss": 1.3706, "step": 14775 }, { "epoch": 0.76, "grad_norm": 0.546875, "learning_rate": 3.187908763849412e-05, "loss": 1.4007, "step": 14780 }, { "epoch": 0.76, "grad_norm": 0.53125, "learning_rate": 3.181300662457237e-05, "loss": 1.4022, "step": 14785 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 3.1746981212904944e-05, "loss": 1.4264, "step": 14790 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 3.168101145733139e-05, "loss": 1.4189, "step": 14795 }, { "epoch": 0.77, "grad_norm": 0.5625, "learning_rate": 3.161509741164596e-05, "loss": 1.4334, "step": 14800 }, { "epoch": 0.77, "grad_norm": 0.51953125, "learning_rate": 3.1549239129597484e-05, "loss": 1.4155, "step": 14805 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 3.148343666488931e-05, "loss": 1.3467, "step": 14810 }, { "epoch": 0.77, "grad_norm": 0.5390625, "learning_rate": 3.141769007117921e-05, "loss": 1.4213, "step": 14815 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 3.135199940207947e-05, "loss": 1.3895, "step": 14820 }, { "epoch": 0.77, "grad_norm": 0.5625, "learning_rate": 3.1286364711156734e-05, "loss": 1.4077, "step": 14825 }, { "epoch": 0.77, "grad_norm": 0.5078125, "learning_rate": 3.1220786051932064e-05, "loss": 1.4263, "step": 14830 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 3.1155263477880703e-05, "loss": 1.3859, "step": 14835 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 3.108979704243228e-05, "loss": 1.4295, "step": 14840 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 3.1024386798970586e-05, "loss": 1.3763, "step": 14845 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 3.0959032800833657e-05, "loss": 1.4086, "step": 14850 }, { "epoch": 0.77, "grad_norm": 0.5546875, "learning_rate": 3.089373510131354e-05, "loss": 1.4125, "step": 14855 }, { "epoch": 0.77, "grad_norm": 0.5390625, "learning_rate": 3.0828493753656495e-05, "loss": 1.4476, "step": 14860 }, { "epoch": 0.77, "grad_norm": 0.578125, "learning_rate": 3.076330881106278e-05, "loss": 1.4296, "step": 14865 }, { "epoch": 0.77, "grad_norm": 0.53125, "learning_rate": 3.069818032668668e-05, "loss": 1.434, "step": 14870 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 3.0633108353636376e-05, "loss": 1.4098, "step": 14875 }, { "epoch": 0.77, "grad_norm": 0.546875, "learning_rate": 3.056809294497406e-05, "loss": 1.3864, "step": 14880 }, { "epoch": 0.77, "grad_norm": 0.5625, "learning_rate": 3.050313415371573e-05, "loss": 1.3848, "step": 14885 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 3.0438232032831292e-05, "loss": 1.3968, "step": 14890 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 3.0373386635244327e-05, "loss": 1.4014, "step": 14895 }, { "epoch": 0.77, "grad_norm": 0.515625, "learning_rate": 3.0308598013832256e-05, "loss": 1.3744, "step": 14900 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 3.0243866221426166e-05, "loss": 1.3868, "step": 14905 }, { "epoch": 0.77, "grad_norm": 0.5390625, "learning_rate": 3.0179191310810838e-05, "loss": 1.3965, "step": 14910 }, { "epoch": 0.77, "grad_norm": 0.52734375, "learning_rate": 3.0114573334724592e-05, "loss": 1.4171, "step": 14915 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 3.005001234585939e-05, "loss": 1.4186, "step": 14920 }, { "epoch": 0.77, "grad_norm": 0.55859375, "learning_rate": 2.9985508396860717e-05, "loss": 1.4451, "step": 14925 }, { "epoch": 0.77, "grad_norm": 0.55078125, "learning_rate": 2.9921061540327545e-05, "loss": 1.4018, "step": 14930 }, { "epoch": 0.77, "grad_norm": 0.5390625, "learning_rate": 2.9856671828812244e-05, "loss": 1.3988, "step": 14935 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 2.9792339314820662e-05, "loss": 1.3915, "step": 14940 }, { "epoch": 0.77, "grad_norm": 0.578125, "learning_rate": 2.972806405081191e-05, "loss": 1.4495, "step": 14945 }, { "epoch": 0.77, "grad_norm": 0.5390625, "learning_rate": 2.96638460891985e-05, "loss": 1.4278, "step": 14950 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 2.9599685482346218e-05, "loss": 1.3901, "step": 14955 }, { "epoch": 0.77, "grad_norm": 0.57421875, "learning_rate": 2.9535582282573982e-05, "loss": 1.3876, "step": 14960 }, { "epoch": 0.77, "grad_norm": 0.55078125, "learning_rate": 2.947153654215402e-05, "loss": 1.4109, "step": 14965 }, { "epoch": 0.77, "grad_norm": 0.53515625, "learning_rate": 2.940754831331163e-05, "loss": 1.3847, "step": 14970 }, { "epoch": 0.77, "grad_norm": 0.54296875, "learning_rate": 2.9343617648225273e-05, "loss": 1.3946, "step": 14975 }, { "epoch": 0.78, "grad_norm": 0.5625, "learning_rate": 2.927974459902637e-05, "loss": 1.4048, "step": 14980 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.9215929217799454e-05, "loss": 1.4128, "step": 14985 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.9152171556581998e-05, "loss": 1.3907, "step": 14990 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.9088471667364447e-05, "loss": 1.4094, "step": 14995 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.9024829602090033e-05, "loss": 1.4178, "step": 15000 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.8961245412654936e-05, "loss": 1.4357, "step": 15005 }, { "epoch": 0.78, "grad_norm": 0.51171875, "learning_rate": 2.889771915090812e-05, "loss": 1.433, "step": 15010 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.883425086865129e-05, "loss": 1.4404, "step": 15015 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.8770840617638927e-05, "loss": 1.3897, "step": 15020 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.8707488449578068e-05, "loss": 1.4184, "step": 15025 }, { "epoch": 0.78, "grad_norm": 0.55859375, "learning_rate": 2.8644194416128523e-05, "loss": 1.4287, "step": 15030 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.8580958568902616e-05, "loss": 1.409, "step": 15035 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 2.85177809594653e-05, "loss": 1.4083, "step": 15040 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.8454661639333923e-05, "loss": 1.3748, "step": 15045 }, { "epoch": 0.78, "grad_norm": 0.515625, "learning_rate": 2.839160065997839e-05, "loss": 1.4489, "step": 15050 }, { "epoch": 0.78, "grad_norm": 0.53515625, "learning_rate": 2.832859807282102e-05, "loss": 1.3881, "step": 15055 }, { "epoch": 0.78, "grad_norm": 0.56640625, "learning_rate": 2.8265653929236537e-05, "loss": 1.4145, "step": 15060 }, { "epoch": 0.78, "grad_norm": 0.51953125, "learning_rate": 2.8202768280551894e-05, "loss": 1.4271, "step": 15065 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.813994117804648e-05, "loss": 1.3984, "step": 15070 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.807717267295189e-05, "loss": 1.3794, "step": 15075 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.8014462816451958e-05, "loss": 1.4091, "step": 15080 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.7951811659682625e-05, "loss": 1.4287, "step": 15085 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.7889219253732046e-05, "loss": 1.4234, "step": 15090 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.7826685649640428e-05, "loss": 1.4419, "step": 15095 }, { "epoch": 0.78, "grad_norm": 0.56640625, "learning_rate": 2.7764210898400066e-05, "loss": 1.4219, "step": 15100 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.770179505095518e-05, "loss": 1.4274, "step": 15105 }, { "epoch": 0.78, "grad_norm": 0.5859375, "learning_rate": 2.7639438158202037e-05, "loss": 1.4231, "step": 15110 }, { "epoch": 0.78, "grad_norm": 0.55859375, "learning_rate": 2.757714027098882e-05, "loss": 1.3985, "step": 15115 }, { "epoch": 0.78, "grad_norm": 0.53125, "learning_rate": 2.7514901440115615e-05, "loss": 1.3931, "step": 15120 }, { "epoch": 0.78, "grad_norm": 0.515625, "learning_rate": 2.745272171633424e-05, "loss": 1.4121, "step": 15125 }, { "epoch": 0.78, "grad_norm": 0.5546875, "learning_rate": 2.7390601150348437e-05, "loss": 1.3921, "step": 15130 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.7328539792813668e-05, "loss": 1.382, "step": 15135 }, { "epoch": 0.78, "grad_norm": 0.5234375, "learning_rate": 2.7266537694337147e-05, "loss": 1.42, "step": 15140 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.7204594905477655e-05, "loss": 1.3715, "step": 15145 }, { "epoch": 0.78, "grad_norm": 0.546875, "learning_rate": 2.714271147674572e-05, "loss": 1.3723, "step": 15150 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.7080887458603432e-05, "loss": 1.4189, "step": 15155 }, { "epoch": 0.78, "grad_norm": 0.52734375, "learning_rate": 2.7019122901464477e-05, "loss": 1.4318, "step": 15160 }, { "epoch": 0.78, "grad_norm": 0.54296875, "learning_rate": 2.6957417855693934e-05, "loss": 1.3811, "step": 15165 }, { "epoch": 0.78, "grad_norm": 0.55078125, "learning_rate": 2.6895772371608473e-05, "loss": 1.4058, "step": 15170 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.6834186499476145e-05, "loss": 1.3842, "step": 15175 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.677266028951645e-05, "loss": 1.3844, "step": 15180 }, { "epoch": 0.79, "grad_norm": 0.51171875, "learning_rate": 2.67111937919001e-05, "loss": 1.4222, "step": 15185 }, { "epoch": 0.79, "grad_norm": 0.56640625, "learning_rate": 2.6649787056749254e-05, "loss": 1.4068, "step": 15190 }, { "epoch": 0.79, "grad_norm": 0.5625, "learning_rate": 2.658844013413727e-05, "loss": 1.4152, "step": 15195 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.6527153074088797e-05, "loss": 1.3608, "step": 15200 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.6465925926579548e-05, "loss": 1.4029, "step": 15205 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.6404758741536505e-05, "loss": 1.386, "step": 15210 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.634365156883768e-05, "loss": 1.4222, "step": 15215 }, { "epoch": 0.79, "grad_norm": 0.5234375, "learning_rate": 2.628260445831222e-05, "loss": 1.3958, "step": 15220 }, { "epoch": 0.79, "grad_norm": 0.5546875, "learning_rate": 2.622161745974019e-05, "loss": 1.4211, "step": 15225 }, { "epoch": 0.79, "grad_norm": 0.51171875, "learning_rate": 2.6160690622852746e-05, "loss": 1.4084, "step": 15230 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.6099823997331886e-05, "loss": 1.3864, "step": 15235 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.6039017632810582e-05, "loss": 1.4074, "step": 15240 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.597827157887267e-05, "loss": 1.4254, "step": 15245 }, { "epoch": 0.79, "grad_norm": 0.5078125, "learning_rate": 2.5917585885052742e-05, "loss": 1.3996, "step": 15250 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.585696060083621e-05, "loss": 1.4215, "step": 15255 }, { "epoch": 0.79, "grad_norm": 0.51171875, "learning_rate": 2.5796395775659243e-05, "loss": 1.3848, "step": 15260 }, { "epoch": 0.79, "grad_norm": 0.54296875, "learning_rate": 2.5735891458908713e-05, "loss": 1.3692, "step": 15265 }, { "epoch": 0.79, "grad_norm": 0.5546875, "learning_rate": 2.5675447699922084e-05, "loss": 1.4185, "step": 15270 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.5615064547987487e-05, "loss": 1.4189, "step": 15275 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.555474205234366e-05, "loss": 1.353, "step": 15280 }, { "epoch": 0.79, "grad_norm": 0.54296875, "learning_rate": 2.5494480262179855e-05, "loss": 1.3896, "step": 15285 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.543427922663576e-05, "loss": 1.4108, "step": 15290 }, { "epoch": 0.79, "grad_norm": 0.5625, "learning_rate": 2.537413899480161e-05, "loss": 1.3963, "step": 15295 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.5314059615718034e-05, "loss": 1.4301, "step": 15300 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.525404113837605e-05, "loss": 1.3944, "step": 15305 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 2.5194083611716935e-05, "loss": 1.4192, "step": 15310 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.5134187084632356e-05, "loss": 1.4197, "step": 15315 }, { "epoch": 0.79, "grad_norm": 0.53125, "learning_rate": 2.507435160596422e-05, "loss": 1.4077, "step": 15320 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.5014577224504642e-05, "loss": 1.3889, "step": 15325 }, { "epoch": 0.79, "grad_norm": 0.546875, "learning_rate": 2.4954863988995892e-05, "loss": 1.4001, "step": 15330 }, { "epoch": 0.79, "grad_norm": 0.515625, "learning_rate": 2.4895211948130394e-05, "loss": 1.4222, "step": 15335 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.48356211505507e-05, "loss": 1.3745, "step": 15340 }, { "epoch": 0.79, "grad_norm": 0.5390625, "learning_rate": 2.4776091644849432e-05, "loss": 1.4021, "step": 15345 }, { "epoch": 0.79, "grad_norm": 0.53515625, "learning_rate": 2.4716623479569136e-05, "loss": 1.4166, "step": 15350 }, { "epoch": 0.79, "grad_norm": 0.55078125, "learning_rate": 2.4657216703202435e-05, "loss": 1.4277, "step": 15355 }, { "epoch": 0.79, "grad_norm": 0.515625, "learning_rate": 2.459787136419186e-05, "loss": 1.3755, "step": 15360 }, { "epoch": 0.79, "grad_norm": 0.51953125, "learning_rate": 2.4538587510929878e-05, "loss": 1.4201, "step": 15365 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.4479365191758717e-05, "loss": 1.4043, "step": 15370 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.4420204454970542e-05, "loss": 1.4268, "step": 15375 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.4361105348807256e-05, "loss": 1.4083, "step": 15380 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.430206792146049e-05, "loss": 1.3965, "step": 15385 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.424309222107164e-05, "loss": 1.4135, "step": 15390 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.418417829573165e-05, "loss": 1.431, "step": 15395 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.4125326193481213e-05, "loss": 1.3714, "step": 15400 }, { "epoch": 0.8, "grad_norm": 0.546875, "learning_rate": 2.4066535962310553e-05, "loss": 1.4141, "step": 15405 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.4007807650159464e-05, "loss": 1.425, "step": 15410 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.394914130491719e-05, "loss": 1.4037, "step": 15415 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.3890536974422518e-05, "loss": 1.381, "step": 15420 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.3831994706463623e-05, "loss": 1.401, "step": 15425 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.3773514548778132e-05, "loss": 1.4207, "step": 15430 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.3715096549052908e-05, "loss": 1.4072, "step": 15435 }, { "epoch": 0.8, "grad_norm": 0.5078125, "learning_rate": 2.3656740754924233e-05, "loss": 1.3835, "step": 15440 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.3598447213977625e-05, "loss": 1.3809, "step": 15445 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.354021597374787e-05, "loss": 1.3715, "step": 15450 }, { "epoch": 0.8, "grad_norm": 0.51953125, "learning_rate": 2.3482047081718884e-05, "loss": 1.3965, "step": 15455 }, { "epoch": 0.8, "grad_norm": 0.54296875, "learning_rate": 2.342394058532378e-05, "loss": 1.3943, "step": 15460 }, { "epoch": 0.8, "grad_norm": 0.5546875, "learning_rate": 2.336589653194482e-05, "loss": 1.4223, "step": 15465 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.3307914968913347e-05, "loss": 1.3886, "step": 15470 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.324999594350965e-05, "loss": 1.3564, "step": 15475 }, { "epoch": 0.8, "grad_norm": 0.56640625, "learning_rate": 2.319213950296314e-05, "loss": 1.4224, "step": 15480 }, { "epoch": 0.8, "grad_norm": 0.56640625, "learning_rate": 2.313434569445213e-05, "loss": 1.392, "step": 15485 }, { "epoch": 0.8, "grad_norm": 0.53515625, "learning_rate": 2.3076614565103916e-05, "loss": 1.4152, "step": 15490 }, { "epoch": 0.8, "grad_norm": 0.58203125, "learning_rate": 2.3018946161994594e-05, "loss": 1.4077, "step": 15495 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.2961340532149177e-05, "loss": 1.4043, "step": 15500 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.2903797722541487e-05, "loss": 1.3987, "step": 15505 }, { "epoch": 0.8, "grad_norm": 0.5078125, "learning_rate": 2.2846317780094127e-05, "loss": 1.3903, "step": 15510 }, { "epoch": 0.8, "grad_norm": 0.5234375, "learning_rate": 2.2788900751678367e-05, "loss": 1.4161, "step": 15515 }, { "epoch": 0.8, "grad_norm": 0.52734375, "learning_rate": 2.2731546684114247e-05, "loss": 1.3782, "step": 15520 }, { "epoch": 0.8, "grad_norm": 0.55859375, "learning_rate": 2.2674255624170472e-05, "loss": 1.3911, "step": 15525 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.261702761856429e-05, "loss": 1.3903, "step": 15530 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.2559862713961632e-05, "loss": 1.4021, "step": 15535 }, { "epoch": 0.8, "grad_norm": 0.5625, "learning_rate": 2.2502760956976877e-05, "loss": 1.4184, "step": 15540 }, { "epoch": 0.8, "grad_norm": 0.53125, "learning_rate": 2.2445722394172973e-05, "loss": 1.3844, "step": 15545 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.2388747072061335e-05, "loss": 1.3948, "step": 15550 }, { "epoch": 0.8, "grad_norm": 0.55078125, "learning_rate": 2.2331835037101823e-05, "loss": 1.411, "step": 15555 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.2274986335702597e-05, "loss": 1.4426, "step": 15560 }, { "epoch": 0.81, "grad_norm": 0.55859375, "learning_rate": 2.2218201014220263e-05, "loss": 1.3654, "step": 15565 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.2161479118959737e-05, "loss": 1.414, "step": 15570 }, { "epoch": 0.81, "grad_norm": 0.5703125, "learning_rate": 2.2104820696174235e-05, "loss": 1.3961, "step": 15575 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.204822579206509e-05, "loss": 1.4196, "step": 15580 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.1991694452781975e-05, "loss": 1.4202, "step": 15585 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.1935226724422686e-05, "loss": 1.4266, "step": 15590 }, { "epoch": 0.81, "grad_norm": 0.55078125, "learning_rate": 2.187882265303317e-05, "loss": 1.4199, "step": 15595 }, { "epoch": 0.81, "grad_norm": 0.55078125, "learning_rate": 2.182248228460738e-05, "loss": 1.4131, "step": 15600 }, { "epoch": 0.81, "grad_norm": 0.5390625, "learning_rate": 2.1766205665087426e-05, "loss": 1.4076, "step": 15605 }, { "epoch": 0.81, "grad_norm": 0.5078125, "learning_rate": 2.170999284036338e-05, "loss": 1.3784, "step": 15610 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.1653843856273325e-05, "loss": 1.4066, "step": 15615 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.1597758758603236e-05, "loss": 1.4271, "step": 15620 }, { "epoch": 0.81, "grad_norm": 0.56640625, "learning_rate": 2.154173759308703e-05, "loss": 1.3941, "step": 15625 }, { "epoch": 0.81, "grad_norm": 0.515625, "learning_rate": 2.1485780405406498e-05, "loss": 1.377, "step": 15630 }, { "epoch": 0.81, "grad_norm": 0.578125, "learning_rate": 2.142988724119127e-05, "loss": 1.4119, "step": 15635 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.1374058146018693e-05, "loss": 1.4651, "step": 15640 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.131829316541395e-05, "loss": 1.3771, "step": 15645 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.126259234484992e-05, "loss": 1.4179, "step": 15650 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.120695572974718e-05, "loss": 1.3984, "step": 15655 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.1151383365473875e-05, "loss": 1.4018, "step": 15660 }, { "epoch": 0.81, "grad_norm": 0.57421875, "learning_rate": 2.109587529734586e-05, "loss": 1.4305, "step": 15665 }, { "epoch": 0.81, "grad_norm": 0.55859375, "learning_rate": 2.1040431570626483e-05, "loss": 1.4443, "step": 15670 }, { "epoch": 0.81, "grad_norm": 0.55078125, "learning_rate": 2.0985052230526714e-05, "loss": 1.423, "step": 15675 }, { "epoch": 0.81, "grad_norm": 0.54296875, "learning_rate": 2.092973732220489e-05, "loss": 1.3989, "step": 15680 }, { "epoch": 0.81, "grad_norm": 0.5703125, "learning_rate": 2.0874486890766908e-05, "loss": 1.3782, "step": 15685 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.0819300981266066e-05, "loss": 1.3892, "step": 15690 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.0764179638703076e-05, "loss": 1.3963, "step": 15695 }, { "epoch": 0.81, "grad_norm": 0.53125, "learning_rate": 2.070912290802589e-05, "loss": 1.415, "step": 15700 }, { "epoch": 0.81, "grad_norm": 0.5234375, "learning_rate": 2.0654130834129903e-05, "loss": 1.4013, "step": 15705 }, { "epoch": 0.81, "grad_norm": 0.53515625, "learning_rate": 2.0599203461857707e-05, "loss": 1.4032, "step": 15710 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.054434083599921e-05, "loss": 1.4276, "step": 15715 }, { "epoch": 0.81, "grad_norm": 0.5703125, "learning_rate": 2.0489543001291402e-05, "loss": 1.4058, "step": 15720 }, { "epoch": 0.81, "grad_norm": 0.546875, "learning_rate": 2.0434810002418547e-05, "loss": 1.414, "step": 15725 }, { "epoch": 0.81, "grad_norm": 0.51953125, "learning_rate": 2.0380141884012004e-05, "loss": 1.3732, "step": 15730 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.0325538690650236e-05, "loss": 1.3867, "step": 15735 }, { "epoch": 0.81, "grad_norm": 0.57421875, "learning_rate": 2.0271000466858726e-05, "loss": 1.4122, "step": 15740 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.0216527257110006e-05, "loss": 1.4012, "step": 15745 }, { "epoch": 0.81, "grad_norm": 0.52734375, "learning_rate": 2.0162119105823607e-05, "loss": 1.4146, "step": 15750 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 2.010777605736599e-05, "loss": 1.3965, "step": 15755 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 2.0053498156050555e-05, "loss": 1.3938, "step": 15760 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.9999285446137518e-05, "loss": 1.3813, "step": 15765 }, { "epoch": 0.82, "grad_norm": 0.5625, "learning_rate": 1.9945137971833983e-05, "loss": 1.3778, "step": 15770 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.9891055777293865e-05, "loss": 1.4193, "step": 15775 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.9837038906617843e-05, "loss": 1.4143, "step": 15780 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.9783087403853273e-05, "loss": 1.4312, "step": 15785 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.9729201312994273e-05, "loss": 1.4178, "step": 15790 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.9675380677981603e-05, "loss": 1.4112, "step": 15795 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.962162554270267e-05, "loss": 1.3914, "step": 15800 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.95679359509914e-05, "loss": 1.4105, "step": 15805 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.951431194662834e-05, "loss": 1.3702, "step": 15810 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.946075357334053e-05, "loss": 1.3917, "step": 15815 }, { "epoch": 0.82, "grad_norm": 0.5390625, "learning_rate": 1.9407260874801513e-05, "loss": 1.3756, "step": 15820 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.9353833894631247e-05, "loss": 1.3712, "step": 15825 }, { "epoch": 0.82, "grad_norm": 0.55078125, "learning_rate": 1.9300472676396076e-05, "loss": 1.3687, "step": 15830 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.9247177263608794e-05, "loss": 1.4126, "step": 15835 }, { "epoch": 0.82, "grad_norm": 0.5703125, "learning_rate": 1.9193947699728488e-05, "loss": 1.4815, "step": 15840 }, { "epoch": 0.82, "grad_norm": 0.53515625, "learning_rate": 1.9140784028160574e-05, "loss": 1.3857, "step": 15845 }, { "epoch": 0.82, "grad_norm": 0.578125, "learning_rate": 1.908768629225669e-05, "loss": 1.4191, "step": 15850 }, { "epoch": 0.82, "grad_norm": 0.5625, "learning_rate": 1.9034654535314767e-05, "loss": 1.3807, "step": 15855 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.8981688800578877e-05, "loss": 1.454, "step": 15860 }, { "epoch": 0.82, "grad_norm": 0.65625, "learning_rate": 1.8928789131239343e-05, "loss": 1.3978, "step": 15865 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.887595557043248e-05, "loss": 1.3991, "step": 15870 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.8823188161240813e-05, "loss": 1.4167, "step": 15875 }, { "epoch": 0.82, "grad_norm": 0.54296875, "learning_rate": 1.8770486946692876e-05, "loss": 1.4233, "step": 15880 }, { "epoch": 0.82, "grad_norm": 0.53515625, "learning_rate": 1.8717851969763266e-05, "loss": 1.4152, "step": 15885 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.866528327337249e-05, "loss": 1.4035, "step": 15890 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.861278090038705e-05, "loss": 1.3953, "step": 15895 }, { "epoch": 0.82, "grad_norm": 0.5546875, "learning_rate": 1.8560344893619396e-05, "loss": 1.4082, "step": 15900 }, { "epoch": 0.82, "grad_norm": 0.5234375, "learning_rate": 1.850797529582785e-05, "loss": 1.4083, "step": 15905 }, { "epoch": 0.82, "grad_norm": 0.52734375, "learning_rate": 1.8455672149716496e-05, "loss": 1.4182, "step": 15910 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.840343549793535e-05, "loss": 1.3979, "step": 15915 }, { "epoch": 0.82, "grad_norm": 0.55859375, "learning_rate": 1.835126538308013e-05, "loss": 1.4196, "step": 15920 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.8299161847692358e-05, "loss": 1.3812, "step": 15925 }, { "epoch": 0.82, "grad_norm": 0.53515625, "learning_rate": 1.8247124934259186e-05, "loss": 1.4142, "step": 15930 }, { "epoch": 0.82, "grad_norm": 0.546875, "learning_rate": 1.819515468521349e-05, "loss": 1.4185, "step": 15935 }, { "epoch": 0.82, "grad_norm": 0.58203125, "learning_rate": 1.8143251142933793e-05, "loss": 1.4173, "step": 15940 }, { "epoch": 0.82, "grad_norm": 0.53125, "learning_rate": 1.809141434974423e-05, "loss": 1.4131, "step": 15945 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.803964434791442e-05, "loss": 1.4071, "step": 15950 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.7987941179659608e-05, "loss": 1.356, "step": 15955 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.793630488714053e-05, "loss": 1.4231, "step": 15960 }, { "epoch": 0.83, "grad_norm": 0.51171875, "learning_rate": 1.788473551246339e-05, "loss": 1.3839, "step": 15965 }, { "epoch": 0.83, "grad_norm": 0.55078125, "learning_rate": 1.7833233097679746e-05, "loss": 1.4259, "step": 15970 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.778179768478666e-05, "loss": 1.4312, "step": 15975 }, { "epoch": 0.83, "grad_norm": 0.5078125, "learning_rate": 1.7730429315726494e-05, "loss": 1.3773, "step": 15980 }, { "epoch": 0.83, "grad_norm": 0.5625, "learning_rate": 1.7679128032387004e-05, "loss": 1.3844, "step": 15985 }, { "epoch": 0.83, "grad_norm": 0.57421875, "learning_rate": 1.762789387660113e-05, "loss": 1.4109, "step": 15990 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.7576726890147177e-05, "loss": 1.3568, "step": 15995 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.7525627114748645e-05, "loss": 1.4048, "step": 16000 }, { "epoch": 0.83, "grad_norm": 0.5546875, "learning_rate": 1.7474594592074235e-05, "loss": 1.4195, "step": 16005 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.742362936373776e-05, "loss": 1.4214, "step": 16010 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.737273147129821e-05, "loss": 1.41, "step": 16015 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.7321900956259653e-05, "loss": 1.4071, "step": 16020 }, { "epoch": 0.83, "grad_norm": 0.515625, "learning_rate": 1.727113786007125e-05, "loss": 1.3967, "step": 16025 }, { "epoch": 0.83, "grad_norm": 0.56640625, "learning_rate": 1.7220442224127097e-05, "loss": 1.3973, "step": 16030 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.7169814089766344e-05, "loss": 1.4093, "step": 16035 }, { "epoch": 0.83, "grad_norm": 0.52734375, "learning_rate": 1.7119253498273113e-05, "loss": 1.4139, "step": 16040 }, { "epoch": 0.83, "grad_norm": 0.55078125, "learning_rate": 1.7068760490876422e-05, "loss": 1.3937, "step": 16045 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.701833510875015e-05, "loss": 1.4067, "step": 16050 }, { "epoch": 0.83, "grad_norm": 0.51953125, "learning_rate": 1.696797739301308e-05, "loss": 1.3547, "step": 16055 }, { "epoch": 0.83, "grad_norm": 0.55078125, "learning_rate": 1.6917687384728785e-05, "loss": 1.4478, "step": 16060 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.686746512490569e-05, "loss": 1.4055, "step": 16065 }, { "epoch": 0.83, "grad_norm": 0.54296875, "learning_rate": 1.6817310654496852e-05, "loss": 1.4043, "step": 16070 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.6767224014400173e-05, "loss": 1.4098, "step": 16075 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.6717205245458178e-05, "loss": 1.4182, "step": 16080 }, { "epoch": 0.83, "grad_norm": 0.51953125, "learning_rate": 1.6667254388458088e-05, "loss": 1.3897, "step": 16085 }, { "epoch": 0.83, "grad_norm": 0.5390625, "learning_rate": 1.661737148413167e-05, "loss": 1.3928, "step": 16090 }, { "epoch": 0.83, "grad_norm": 0.578125, "learning_rate": 1.6567556573155374e-05, "loss": 1.401, "step": 16095 }, { "epoch": 0.83, "grad_norm": 0.5390625, "learning_rate": 1.6517809696150143e-05, "loss": 1.4434, "step": 16100 }, { "epoch": 0.83, "grad_norm": 0.5234375, "learning_rate": 1.64681308936815e-05, "loss": 1.4293, "step": 16105 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.641852020625937e-05, "loss": 1.403, "step": 16110 }, { "epoch": 0.83, "grad_norm": 0.5390625, "learning_rate": 1.6368977674338216e-05, "loss": 1.4235, "step": 16115 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.631950333831688e-05, "loss": 1.4061, "step": 16120 }, { "epoch": 0.83, "grad_norm": 0.546875, "learning_rate": 1.6270097238538597e-05, "loss": 1.4604, "step": 16125 }, { "epoch": 0.83, "grad_norm": 0.53125, "learning_rate": 1.6220759415290998e-05, "loss": 1.396, "step": 16130 }, { "epoch": 0.83, "grad_norm": 0.53515625, "learning_rate": 1.6171489908805992e-05, "loss": 1.3742, "step": 16135 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.6122288759259795e-05, "loss": 1.4133, "step": 16140 }, { "epoch": 0.84, "grad_norm": 0.5625, "learning_rate": 1.6073156006772893e-05, "loss": 1.3686, "step": 16145 }, { "epoch": 0.84, "grad_norm": 0.56640625, "learning_rate": 1.6024091691410013e-05, "loss": 1.3744, "step": 16150 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.597509585318001e-05, "loss": 1.3963, "step": 16155 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.592616853203597e-05, "loss": 1.3879, "step": 16160 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.587730976787508e-05, "loss": 1.4037, "step": 16165 }, { "epoch": 0.84, "grad_norm": 0.515625, "learning_rate": 1.582851960053865e-05, "loss": 1.3981, "step": 16170 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.577979806981198e-05, "loss": 1.4068, "step": 16175 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.573114521542447e-05, "loss": 1.4194, "step": 16180 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.5682561077049496e-05, "loss": 1.4033, "step": 16185 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.5634045694304412e-05, "loss": 1.4413, "step": 16190 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.5585599106750515e-05, "loss": 1.3756, "step": 16195 }, { "epoch": 0.84, "grad_norm": 0.5546875, "learning_rate": 1.553722135389294e-05, "loss": 1.4271, "step": 16200 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.548891247518075e-05, "loss": 1.4217, "step": 16205 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.5440672510006848e-05, "loss": 1.418, "step": 16210 }, { "epoch": 0.84, "grad_norm": 0.5234375, "learning_rate": 1.5392501497707945e-05, "loss": 1.3715, "step": 16215 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.5344399477564462e-05, "loss": 1.3594, "step": 16220 }, { "epoch": 0.84, "grad_norm": 0.52734375, "learning_rate": 1.529636648880063e-05, "loss": 1.4132, "step": 16225 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.5248402570584353e-05, "loss": 1.4164, "step": 16230 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.520050776202726e-05, "loss": 1.4124, "step": 16235 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.5152682102184546e-05, "loss": 1.3827, "step": 16240 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.5104925630055078e-05, "loss": 1.4168, "step": 16245 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.5057238384581296e-05, "loss": 1.4161, "step": 16250 }, { "epoch": 0.84, "grad_norm": 0.51953125, "learning_rate": 1.5009620404649193e-05, "loss": 1.424, "step": 16255 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.4962071729088255e-05, "loss": 1.3947, "step": 16260 }, { "epoch": 0.84, "grad_norm": 0.53125, "learning_rate": 1.4914592396671468e-05, "loss": 1.4185, "step": 16265 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.486718244611528e-05, "loss": 1.4036, "step": 16270 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.481984191607959e-05, "loss": 1.397, "step": 16275 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.477257084516761e-05, "loss": 1.4109, "step": 16280 }, { "epoch": 0.84, "grad_norm": 0.5390625, "learning_rate": 1.4725369271925982e-05, "loss": 1.3913, "step": 16285 }, { "epoch": 0.84, "grad_norm": 0.546875, "learning_rate": 1.4678237234844649e-05, "loss": 1.4272, "step": 16290 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.4631174772356881e-05, "loss": 1.4109, "step": 16295 }, { "epoch": 0.84, "grad_norm": 0.55859375, "learning_rate": 1.4584181922839157e-05, "loss": 1.4192, "step": 16300 }, { "epoch": 0.84, "grad_norm": 0.5234375, "learning_rate": 1.4537258724611235e-05, "loss": 1.3831, "step": 16305 }, { "epoch": 0.84, "grad_norm": 0.53515625, "learning_rate": 1.4490405215936066e-05, "loss": 1.3819, "step": 16310 }, { "epoch": 0.84, "grad_norm": 0.54296875, "learning_rate": 1.4443621435019793e-05, "loss": 1.3904, "step": 16315 }, { "epoch": 0.84, "grad_norm": 0.55078125, "learning_rate": 1.4396907420011651e-05, "loss": 1.4392, "step": 16320 }, { "epoch": 0.84, "grad_norm": 0.5703125, "learning_rate": 1.4350263209004034e-05, "loss": 1.453, "step": 16325 }, { "epoch": 0.84, "grad_norm": 0.51953125, "learning_rate": 1.4303688840032381e-05, "loss": 1.4595, "step": 16330 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.4257184351075237e-05, "loss": 1.4095, "step": 16335 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.4210749780054066e-05, "loss": 1.3962, "step": 16340 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.4164385164833394e-05, "loss": 1.3973, "step": 16345 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.4118090543220697e-05, "loss": 1.4014, "step": 16350 }, { "epoch": 0.85, "grad_norm": 0.51171875, "learning_rate": 1.4071865952966368e-05, "loss": 1.3986, "step": 16355 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.4025711431763644e-05, "loss": 1.3975, "step": 16360 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.3979627017248687e-05, "loss": 1.4011, "step": 16365 }, { "epoch": 0.85, "grad_norm": 0.53515625, "learning_rate": 1.393361274700049e-05, "loss": 1.4472, "step": 16370 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3887668658540842e-05, "loss": 1.4033, "step": 16375 }, { "epoch": 0.85, "grad_norm": 0.53515625, "learning_rate": 1.3841794789334239e-05, "loss": 1.4049, "step": 16380 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3795991176788004e-05, "loss": 1.4162, "step": 16385 }, { "epoch": 0.85, "grad_norm": 0.54296875, "learning_rate": 1.3750257858252124e-05, "loss": 1.406, "step": 16390 }, { "epoch": 0.85, "grad_norm": 0.54296875, "learning_rate": 1.3704594871019305e-05, "loss": 1.425, "step": 16395 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3659002252324838e-05, "loss": 1.3878, "step": 16400 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.3613480039346682e-05, "loss": 1.4231, "step": 16405 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3568028269205391e-05, "loss": 1.4099, "step": 16410 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3522646978964027e-05, "loss": 1.4498, "step": 16415 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3477336205628233e-05, "loss": 1.435, "step": 16420 }, { "epoch": 0.85, "grad_norm": 0.494140625, "learning_rate": 1.3432095986146109e-05, "loss": 1.4224, "step": 16425 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.3386926357408257e-05, "loss": 1.3966, "step": 16430 }, { "epoch": 0.85, "grad_norm": 0.53515625, "learning_rate": 1.3341827356247682e-05, "loss": 1.3741, "step": 16435 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3296799019439865e-05, "loss": 1.4017, "step": 16440 }, { "epoch": 0.85, "grad_norm": 0.52734375, "learning_rate": 1.3251841383702557e-05, "loss": 1.4242, "step": 16445 }, { "epoch": 0.85, "grad_norm": 0.578125, "learning_rate": 1.3206954485695944e-05, "loss": 1.3895, "step": 16450 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3162138362022491e-05, "loss": 1.405, "step": 16455 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.3117393049226978e-05, "loss": 1.4151, "step": 16460 }, { "epoch": 0.85, "grad_norm": 0.51953125, "learning_rate": 1.3072718583796405e-05, "loss": 1.3724, "step": 16465 }, { "epoch": 0.85, "grad_norm": 0.57421875, "learning_rate": 1.3028115002160035e-05, "loss": 1.4355, "step": 16470 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.2983582340689304e-05, "loss": 1.4304, "step": 16475 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.2939120635697855e-05, "loss": 1.3647, "step": 16480 }, { "epoch": 0.85, "grad_norm": 0.515625, "learning_rate": 1.2894729923441407e-05, "loss": 1.3749, "step": 16485 }, { "epoch": 0.85, "grad_norm": 0.54296875, "learning_rate": 1.2850410240117849e-05, "loss": 1.403, "step": 16490 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 1.280616162186713e-05, "loss": 1.4169, "step": 16495 }, { "epoch": 0.85, "grad_norm": 0.58203125, "learning_rate": 1.2761984104771252e-05, "loss": 1.3677, "step": 16500 }, { "epoch": 0.85, "grad_norm": 0.54296875, "learning_rate": 1.271787772485421e-05, "loss": 1.4086, "step": 16505 }, { "epoch": 0.85, "grad_norm": 0.53515625, "learning_rate": 1.2673842518082024e-05, "loss": 1.4118, "step": 16510 }, { "epoch": 0.85, "grad_norm": 0.53125, "learning_rate": 1.262987852036267e-05, "loss": 1.4067, "step": 16515 }, { "epoch": 0.85, "grad_norm": 0.5390625, "learning_rate": 1.2585985767546083e-05, "loss": 1.4026, "step": 16520 }, { "epoch": 0.85, "grad_norm": 0.55078125, "learning_rate": 1.2542164295424031e-05, "loss": 1.4008, "step": 16525 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.249841413973022e-05, "loss": 1.3992, "step": 16530 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.2454735336140167e-05, "loss": 1.3753, "step": 16535 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.2411127920271271e-05, "loss": 1.4067, "step": 16540 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.2367591927682598e-05, "loss": 1.4082, "step": 16545 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.2324127393875084e-05, "loss": 1.431, "step": 16550 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.2280734354291346e-05, "loss": 1.3781, "step": 16555 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.2237412844315722e-05, "loss": 1.4195, "step": 16560 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.2194162899274208e-05, "loss": 1.3911, "step": 16565 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.215098455443443e-05, "loss": 1.3816, "step": 16570 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.2107877845005644e-05, "loss": 1.4202, "step": 16575 }, { "epoch": 0.86, "grad_norm": 0.52734375, "learning_rate": 1.2064842806138698e-05, "loss": 1.3935, "step": 16580 }, { "epoch": 0.86, "grad_norm": 0.57421875, "learning_rate": 1.2021879472926023e-05, "loss": 1.4332, "step": 16585 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.1978987880401493e-05, "loss": 1.3974, "step": 16590 }, { "epoch": 0.86, "grad_norm": 0.51171875, "learning_rate": 1.1936168063540554e-05, "loss": 1.418, "step": 16595 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.1893420057260118e-05, "loss": 1.4039, "step": 16600 }, { "epoch": 0.86, "grad_norm": 0.52734375, "learning_rate": 1.1850743896418537e-05, "loss": 1.3886, "step": 16605 }, { "epoch": 0.86, "grad_norm": 0.53125, "learning_rate": 1.1808139615815527e-05, "loss": 1.3926, "step": 16610 }, { "epoch": 0.86, "grad_norm": 0.54296875, "learning_rate": 1.1765607250192245e-05, "loss": 1.4107, "step": 16615 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.1723146834231214e-05, "loss": 1.4003, "step": 16620 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.1680758402556257e-05, "loss": 1.4199, "step": 16625 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.1638441989732473e-05, "loss": 1.3873, "step": 16630 }, { "epoch": 0.86, "grad_norm": 0.5703125, "learning_rate": 1.1596197630266292e-05, "loss": 1.4124, "step": 16635 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.1554025358605369e-05, "loss": 1.4054, "step": 16640 }, { "epoch": 0.86, "grad_norm": 0.57421875, "learning_rate": 1.1511925209138575e-05, "loss": 1.4418, "step": 16645 }, { "epoch": 0.86, "grad_norm": 0.515625, "learning_rate": 1.1469897216195924e-05, "loss": 1.4035, "step": 16650 }, { "epoch": 0.86, "grad_norm": 0.5546875, "learning_rate": 1.142794141404866e-05, "loss": 1.3955, "step": 16655 }, { "epoch": 0.86, "grad_norm": 0.5234375, "learning_rate": 1.1386057836909137e-05, "loss": 1.3921, "step": 16660 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.1344246518930823e-05, "loss": 1.4074, "step": 16665 }, { "epoch": 0.86, "grad_norm": 0.5390625, "learning_rate": 1.1302507494208191e-05, "loss": 1.3626, "step": 16670 }, { "epoch": 0.86, "grad_norm": 0.55078125, "learning_rate": 1.1260840796776873e-05, "loss": 1.4112, "step": 16675 }, { "epoch": 0.86, "grad_norm": 0.578125, "learning_rate": 1.1219246460613452e-05, "loss": 1.4386, "step": 16680 }, { "epoch": 0.86, "grad_norm": 0.5625, "learning_rate": 1.1177724519635547e-05, "loss": 1.4362, "step": 16685 }, { "epoch": 0.86, "grad_norm": 0.55859375, "learning_rate": 1.113627500770167e-05, "loss": 1.3881, "step": 16690 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.109489795861135e-05, "loss": 1.392, "step": 16695 }, { "epoch": 0.86, "grad_norm": 0.546875, "learning_rate": 1.1053593406105001e-05, "loss": 1.3767, "step": 16700 }, { "epoch": 0.86, "grad_norm": 0.58203125, "learning_rate": 1.1012361383863946e-05, "loss": 1.3679, "step": 16705 }, { "epoch": 0.86, "grad_norm": 0.51953125, "learning_rate": 1.0971201925510288e-05, "loss": 1.4073, "step": 16710 }, { "epoch": 0.86, "grad_norm": 0.546875, "learning_rate": 1.0930115064607016e-05, "loss": 1.4165, "step": 16715 }, { "epoch": 0.87, "grad_norm": 0.51953125, "learning_rate": 1.0889100834657917e-05, "loss": 1.403, "step": 16720 }, { "epoch": 0.87, "grad_norm": 0.56640625, "learning_rate": 1.0848159269107538e-05, "loss": 1.3617, "step": 16725 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 1.0807290401341219e-05, "loss": 1.3885, "step": 16730 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0766494264684934e-05, "loss": 1.3835, "step": 16735 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 1.0725770892405407e-05, "loss": 1.3934, "step": 16740 }, { "epoch": 0.87, "grad_norm": 0.51953125, "learning_rate": 1.0685120317710029e-05, "loss": 1.3962, "step": 16745 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.064454257374683e-05, "loss": 1.4156, "step": 16750 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0604037693604396e-05, "loss": 1.427, "step": 16755 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 1.0563605710311974e-05, "loss": 1.3918, "step": 16760 }, { "epoch": 0.87, "grad_norm": 0.5625, "learning_rate": 1.0523246656839314e-05, "loss": 1.4136, "step": 16765 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0482960566096733e-05, "loss": 1.4192, "step": 16770 }, { "epoch": 0.87, "grad_norm": 0.53515625, "learning_rate": 1.0442747470935022e-05, "loss": 1.4162, "step": 16775 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 1.0402607404145449e-05, "loss": 1.3932, "step": 16780 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0362540398459752e-05, "loss": 1.3889, "step": 16785 }, { "epoch": 0.87, "grad_norm": 0.55859375, "learning_rate": 1.0322546486550112e-05, "loss": 1.4413, "step": 16790 }, { "epoch": 0.87, "grad_norm": 0.5234375, "learning_rate": 1.0282625701029037e-05, "loss": 1.4081, "step": 16795 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 1.0242778074449455e-05, "loss": 1.4092, "step": 16800 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 1.0203003639304643e-05, "loss": 1.4168, "step": 16805 }, { "epoch": 0.87, "grad_norm": 0.53125, "learning_rate": 1.0163302428028188e-05, "loss": 1.4001, "step": 16810 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 1.0123674472993916e-05, "loss": 1.4336, "step": 16815 }, { "epoch": 0.87, "grad_norm": 0.5234375, "learning_rate": 1.0084119806516001e-05, "loss": 1.4195, "step": 16820 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 1.0044638460848798e-05, "loss": 1.4218, "step": 16825 }, { "epoch": 0.87, "grad_norm": 0.55078125, "learning_rate": 1.000523046818691e-05, "loss": 1.3904, "step": 16830 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 9.965895860665075e-06, "loss": 1.4194, "step": 16835 }, { "epoch": 0.87, "grad_norm": 0.51953125, "learning_rate": 9.926634670358236e-06, "loss": 1.386, "step": 16840 }, { "epoch": 0.87, "grad_norm": 0.55859375, "learning_rate": 9.887446929281453e-06, "loss": 1.4482, "step": 16845 }, { "epoch": 0.87, "grad_norm": 0.515625, "learning_rate": 9.848332669389916e-06, "loss": 1.405, "step": 16850 }, { "epoch": 0.87, "grad_norm": 0.546875, "learning_rate": 9.809291922578823e-06, "loss": 1.4228, "step": 16855 }, { "epoch": 0.87, "grad_norm": 0.515625, "learning_rate": 9.7703247206835e-06, "loss": 1.3764, "step": 16860 }, { "epoch": 0.87, "grad_norm": 0.5234375, "learning_rate": 9.731431095479281e-06, "loss": 1.3871, "step": 16865 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 9.692611078681513e-06, "loss": 1.4231, "step": 16870 }, { "epoch": 0.87, "grad_norm": 0.5546875, "learning_rate": 9.653864701945469e-06, "loss": 1.4045, "step": 16875 }, { "epoch": 0.87, "grad_norm": 0.5390625, "learning_rate": 9.615191996866446e-06, "loss": 1.4119, "step": 16880 }, { "epoch": 0.87, "grad_norm": 0.53515625, "learning_rate": 9.576592994979617e-06, "loss": 1.3788, "step": 16885 }, { "epoch": 0.87, "grad_norm": 0.54296875, "learning_rate": 9.53806772776008e-06, "loss": 1.4353, "step": 16890 }, { "epoch": 0.87, "grad_norm": 0.56640625, "learning_rate": 9.499616226622766e-06, "loss": 1.4092, "step": 16895 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 9.4612385229225e-06, "loss": 1.4103, "step": 16900 }, { "epoch": 0.87, "grad_norm": 0.55859375, "learning_rate": 9.422934647953929e-06, "loss": 1.4503, "step": 16905 }, { "epoch": 0.87, "grad_norm": 0.52734375, "learning_rate": 9.38470463295148e-06, "loss": 1.4105, "step": 16910 }, { "epoch": 0.88, "grad_norm": 0.53125, "learning_rate": 9.346548509089326e-06, "loss": 1.4304, "step": 16915 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 9.308466307481423e-06, "loss": 1.3896, "step": 16920 }, { "epoch": 0.88, "grad_norm": 0.55859375, "learning_rate": 9.270458059181452e-06, "loss": 1.4248, "step": 16925 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 9.23252379518279e-06, "loss": 1.4388, "step": 16930 }, { "epoch": 0.88, "grad_norm": 0.609375, "learning_rate": 9.194663546418436e-06, "loss": 1.3883, "step": 16935 }, { "epoch": 0.88, "grad_norm": 0.5546875, "learning_rate": 9.156877343761094e-06, "loss": 1.3937, "step": 16940 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 9.11916521802304e-06, "loss": 1.4132, "step": 16945 }, { "epoch": 0.88, "grad_norm": 0.515625, "learning_rate": 9.081527199956196e-06, "loss": 1.4238, "step": 16950 }, { "epoch": 0.88, "grad_norm": 0.5078125, "learning_rate": 9.043963320252025e-06, "loss": 1.3871, "step": 16955 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 9.006473609541511e-06, "loss": 1.4014, "step": 16960 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.96905809839519e-06, "loss": 1.4254, "step": 16965 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.931716817323099e-06, "loss": 1.4089, "step": 16970 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.89444979677474e-06, "loss": 1.4142, "step": 16975 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.857257067139013e-06, "loss": 1.4016, "step": 16980 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.820138658744304e-06, "loss": 1.4131, "step": 16985 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 8.783094601858355e-06, "loss": 1.4196, "step": 16990 }, { "epoch": 0.88, "grad_norm": 0.515625, "learning_rate": 8.746124926688325e-06, "loss": 1.3525, "step": 16995 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 8.709229663380658e-06, "loss": 1.3739, "step": 17000 }, { "epoch": 0.88, "grad_norm": 0.515625, "learning_rate": 8.67240884202113e-06, "loss": 1.4177, "step": 17005 }, { "epoch": 0.88, "grad_norm": 0.5078125, "learning_rate": 8.635662492634855e-06, "loss": 1.3755, "step": 17010 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 8.59899064518619e-06, "loss": 1.3941, "step": 17015 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 8.562393329578767e-06, "loss": 1.4086, "step": 17020 }, { "epoch": 0.88, "grad_norm": 0.546875, "learning_rate": 8.525870575655392e-06, "loss": 1.4255, "step": 17025 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.489422413198112e-06, "loss": 1.3802, "step": 17030 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.453048871928138e-06, "loss": 1.4057, "step": 17035 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 8.416749981505856e-06, "loss": 1.3872, "step": 17040 }, { "epoch": 0.88, "grad_norm": 0.5234375, "learning_rate": 8.380525771530701e-06, "loss": 1.4199, "step": 17045 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.34437627154131e-06, "loss": 1.3667, "step": 17050 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 8.308301511015327e-06, "loss": 1.4286, "step": 17055 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 8.272301519369519e-06, "loss": 1.4153, "step": 17060 }, { "epoch": 0.88, "grad_norm": 0.55078125, "learning_rate": 8.236376325959583e-06, "loss": 1.4186, "step": 17065 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.200525960080308e-06, "loss": 1.387, "step": 17070 }, { "epoch": 0.88, "grad_norm": 0.51171875, "learning_rate": 8.16475045096543e-06, "loss": 1.418, "step": 17075 }, { "epoch": 0.88, "grad_norm": 0.53515625, "learning_rate": 8.129049827787693e-06, "loss": 1.3995, "step": 17080 }, { "epoch": 0.88, "grad_norm": 0.51953125, "learning_rate": 8.093424119658678e-06, "loss": 1.3728, "step": 17085 }, { "epoch": 0.88, "grad_norm": 0.54296875, "learning_rate": 8.057873355628964e-06, "loss": 1.3687, "step": 17090 }, { "epoch": 0.88, "grad_norm": 0.52734375, "learning_rate": 8.022397564687989e-06, "loss": 1.4024, "step": 17095 }, { "epoch": 0.88, "grad_norm": 0.5546875, "learning_rate": 7.986996775764077e-06, "loss": 1.4226, "step": 17100 }, { "epoch": 0.88, "grad_norm": 0.5390625, "learning_rate": 7.951671017724316e-06, "loss": 1.4087, "step": 17105 }, { "epoch": 0.89, "grad_norm": 0.51171875, "learning_rate": 7.916420319374707e-06, "loss": 1.3865, "step": 17110 }, { "epoch": 0.89, "grad_norm": 0.515625, "learning_rate": 7.88124470945999e-06, "loss": 1.413, "step": 17115 }, { "epoch": 0.89, "grad_norm": 0.52734375, "learning_rate": 7.846144216663697e-06, "loss": 1.3887, "step": 17120 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.811118869608081e-06, "loss": 1.4014, "step": 17125 }, { "epoch": 0.89, "grad_norm": 0.5546875, "learning_rate": 7.776168696854147e-06, "loss": 1.4044, "step": 17130 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 7.741293726901589e-06, "loss": 1.4081, "step": 17135 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 7.70649398818879e-06, "loss": 1.3762, "step": 17140 }, { "epoch": 0.89, "grad_norm": 0.51953125, "learning_rate": 7.671769509092741e-06, "loss": 1.3945, "step": 17145 }, { "epoch": 0.89, "grad_norm": 0.52734375, "learning_rate": 7.637120317929114e-06, "loss": 1.374, "step": 17150 }, { "epoch": 0.89, "grad_norm": 0.5234375, "learning_rate": 7.6025464429521635e-06, "loss": 1.3886, "step": 17155 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 7.56804791235477e-06, "loss": 1.3781, "step": 17160 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.533624754268287e-06, "loss": 1.4105, "step": 17165 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.499276996762694e-06, "loss": 1.4119, "step": 17170 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 7.465004667846431e-06, "loss": 1.4016, "step": 17175 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 7.430807795466488e-06, "loss": 1.4167, "step": 17180 }, { "epoch": 0.89, "grad_norm": 0.58984375, "learning_rate": 7.396686407508246e-06, "loss": 1.4252, "step": 17185 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.362640531795606e-06, "loss": 1.3954, "step": 17190 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.328670196090836e-06, "loss": 1.3732, "step": 17195 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.294775428094669e-06, "loss": 1.3902, "step": 17200 }, { "epoch": 0.89, "grad_norm": 0.515625, "learning_rate": 7.26095625544615e-06, "loss": 1.39, "step": 17205 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 7.227212705722719e-06, "loss": 1.444, "step": 17210 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 7.1935448064401445e-06, "loss": 1.4021, "step": 17215 }, { "epoch": 0.89, "grad_norm": 0.5546875, "learning_rate": 7.159952585052532e-06, "loss": 1.4215, "step": 17220 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 7.126436068952202e-06, "loss": 1.4201, "step": 17225 }, { "epoch": 0.89, "grad_norm": 0.578125, "learning_rate": 7.092995285469816e-06, "loss": 1.4256, "step": 17230 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 7.05963026187425e-06, "loss": 1.4086, "step": 17235 }, { "epoch": 0.89, "grad_norm": 0.5234375, "learning_rate": 7.026341025372629e-06, "loss": 1.3837, "step": 17240 }, { "epoch": 0.89, "grad_norm": 0.515625, "learning_rate": 6.993127603110216e-06, "loss": 1.4214, "step": 17245 }, { "epoch": 0.89, "grad_norm": 0.58203125, "learning_rate": 6.959990022170515e-06, "loss": 1.4172, "step": 17250 }, { "epoch": 0.89, "grad_norm": 0.5390625, "learning_rate": 6.926928309575154e-06, "loss": 1.4337, "step": 17255 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 6.893942492283934e-06, "loss": 1.4106, "step": 17260 }, { "epoch": 0.89, "grad_norm": 0.5234375, "learning_rate": 6.861032597194683e-06, "loss": 1.3771, "step": 17265 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 6.828198651143425e-06, "loss": 1.4111, "step": 17270 }, { "epoch": 0.89, "grad_norm": 0.53515625, "learning_rate": 6.795440680904164e-06, "loss": 1.3926, "step": 17275 }, { "epoch": 0.89, "grad_norm": 0.50390625, "learning_rate": 6.762758713189044e-06, "loss": 1.4102, "step": 17280 }, { "epoch": 0.89, "grad_norm": 0.546875, "learning_rate": 6.730152774648113e-06, "loss": 1.3879, "step": 17285 }, { "epoch": 0.89, "grad_norm": 0.55078125, "learning_rate": 6.697622891869515e-06, "loss": 1.3946, "step": 17290 }, { "epoch": 0.89, "grad_norm": 0.53125, "learning_rate": 6.665169091379364e-06, "loss": 1.3882, "step": 17295 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 6.63279139964168e-06, "loss": 1.3692, "step": 17300 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 6.6004898430585e-06, "loss": 1.3792, "step": 17305 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 6.568264447969697e-06, "loss": 1.4321, "step": 17310 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.536115240653096e-06, "loss": 1.445, "step": 17315 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 6.504042247324371e-06, "loss": 1.411, "step": 17320 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 6.47204549413708e-06, "loss": 1.4009, "step": 17325 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 6.440125007182551e-06, "loss": 1.4019, "step": 17330 }, { "epoch": 0.9, "grad_norm": 0.55078125, "learning_rate": 6.408280812489964e-06, "loss": 1.4255, "step": 17335 }, { "epoch": 0.9, "grad_norm": 0.56640625, "learning_rate": 6.37651293602628e-06, "loss": 1.3937, "step": 17340 }, { "epoch": 0.9, "grad_norm": 0.53515625, "learning_rate": 6.344821403696255e-06, "loss": 1.3765, "step": 17345 }, { "epoch": 0.9, "grad_norm": 0.5078125, "learning_rate": 6.313206241342328e-06, "loss": 1.3954, "step": 17350 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 6.281667474744712e-06, "loss": 1.4106, "step": 17355 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 6.2502051296213226e-06, "loss": 1.4296, "step": 17360 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.2188192316277374e-06, "loss": 1.3906, "step": 17365 }, { "epoch": 0.9, "grad_norm": 0.51953125, "learning_rate": 6.1875098063571835e-06, "loss": 1.4083, "step": 17370 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.156276879340583e-06, "loss": 1.4249, "step": 17375 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 6.125120476046431e-06, "loss": 1.396, "step": 17380 }, { "epoch": 0.9, "grad_norm": 0.57421875, "learning_rate": 6.094040621880837e-06, "loss": 1.3768, "step": 17385 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 6.0630373421875055e-06, "loss": 1.4468, "step": 17390 }, { "epoch": 0.9, "grad_norm": 0.51953125, "learning_rate": 6.032110662247659e-06, "loss": 1.4076, "step": 17395 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 6.0012606072800905e-06, "loss": 1.387, "step": 17400 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 5.970487202441122e-06, "loss": 1.3739, "step": 17405 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 5.939790472824535e-06, "loss": 1.4175, "step": 17410 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.909170443461598e-06, "loss": 1.421, "step": 17415 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.878627139321047e-06, "loss": 1.4289, "step": 17420 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.848160585309048e-06, "loss": 1.408, "step": 17425 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 5.817770806269207e-06, "loss": 1.4263, "step": 17430 }, { "epoch": 0.9, "grad_norm": 0.53125, "learning_rate": 5.787457826982457e-06, "loss": 1.4055, "step": 17435 }, { "epoch": 0.9, "grad_norm": 0.55859375, "learning_rate": 5.757221672167168e-06, "loss": 1.4094, "step": 17440 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.727062366479041e-06, "loss": 1.3952, "step": 17445 }, { "epoch": 0.9, "grad_norm": 0.52734375, "learning_rate": 5.696979934511137e-06, "loss": 1.3921, "step": 17450 }, { "epoch": 0.9, "grad_norm": 0.546875, "learning_rate": 5.666974400793779e-06, "loss": 1.4552, "step": 17455 }, { "epoch": 0.9, "grad_norm": 0.5234375, "learning_rate": 5.637045789794626e-06, "loss": 1.4358, "step": 17460 }, { "epoch": 0.9, "grad_norm": 0.54296875, "learning_rate": 5.607194125918602e-06, "loss": 1.43, "step": 17465 }, { "epoch": 0.9, "grad_norm": 0.5546875, "learning_rate": 5.577419433507891e-06, "loss": 1.4005, "step": 17470 }, { "epoch": 0.9, "grad_norm": 0.53125, "learning_rate": 5.547721736841871e-06, "loss": 1.4112, "step": 17475 }, { "epoch": 0.9, "grad_norm": 0.53515625, "learning_rate": 5.518101060137204e-06, "loss": 1.3764, "step": 17480 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 5.488557427547692e-06, "loss": 1.4062, "step": 17485 }, { "epoch": 0.9, "grad_norm": 0.5390625, "learning_rate": 5.459090863164351e-06, "loss": 1.4151, "step": 17490 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 5.429701391015296e-06, "loss": 1.4052, "step": 17495 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 5.400389035065845e-06, "loss": 1.4168, "step": 17500 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 5.371153819218389e-06, "loss": 1.4148, "step": 17505 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 5.341995767312435e-06, "loss": 1.4043, "step": 17510 }, { "epoch": 0.91, "grad_norm": 0.51953125, "learning_rate": 5.312914903124566e-06, "loss": 1.398, "step": 17515 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 5.2839112503684e-06, "loss": 1.426, "step": 17520 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 5.254984832694632e-06, "loss": 1.3925, "step": 17525 }, { "epoch": 0.91, "grad_norm": 0.51953125, "learning_rate": 5.226135673690957e-06, "loss": 1.3968, "step": 17530 }, { "epoch": 0.91, "grad_norm": 0.5546875, "learning_rate": 5.19736379688206e-06, "loss": 1.3513, "step": 17535 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 5.168669225729616e-06, "loss": 1.3746, "step": 17540 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 5.140051983632266e-06, "loss": 1.4029, "step": 17545 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 5.111512093925619e-06, "loss": 1.4421, "step": 17550 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 5.083049579882149e-06, "loss": 1.3623, "step": 17555 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 5.054664464711267e-06, "loss": 1.4099, "step": 17560 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 5.026356771559282e-06, "loss": 1.4274, "step": 17565 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.998126523509361e-06, "loss": 1.3686, "step": 17570 }, { "epoch": 0.91, "grad_norm": 0.54296875, "learning_rate": 4.969973743581502e-06, "loss": 1.3974, "step": 17575 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 4.941898454732563e-06, "loss": 1.4286, "step": 17580 }, { "epoch": 0.91, "grad_norm": 0.5625, "learning_rate": 4.913900679856176e-06, "loss": 1.4128, "step": 17585 }, { "epoch": 0.91, "grad_norm": 0.5234375, "learning_rate": 4.885980441782823e-06, "loss": 1.4221, "step": 17590 }, { "epoch": 0.91, "grad_norm": 0.5078125, "learning_rate": 4.858137763279702e-06, "loss": 1.403, "step": 17595 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 4.830372667050753e-06, "loss": 1.4253, "step": 17600 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.802685175736732e-06, "loss": 1.393, "step": 17605 }, { "epoch": 0.91, "grad_norm": 0.56640625, "learning_rate": 4.775075311915045e-06, "loss": 1.4162, "step": 17610 }, { "epoch": 0.91, "grad_norm": 0.515625, "learning_rate": 4.747543098099838e-06, "loss": 1.3916, "step": 17615 }, { "epoch": 0.91, "grad_norm": 0.546875, "learning_rate": 4.720088556741897e-06, "loss": 1.4114, "step": 17620 }, { "epoch": 0.91, "grad_norm": 0.578125, "learning_rate": 4.6927117102287034e-06, "loss": 1.375, "step": 17625 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 4.665412580884365e-06, "loss": 1.4022, "step": 17630 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 4.638191190969665e-06, "loss": 1.4275, "step": 17635 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.611047562681903e-06, "loss": 1.3914, "step": 17640 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.58398171815504e-06, "loss": 1.3729, "step": 17645 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 4.556993679459587e-06, "loss": 1.3844, "step": 17650 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.530083468602631e-06, "loss": 1.4087, "step": 17655 }, { "epoch": 0.91, "grad_norm": 0.53515625, "learning_rate": 4.503251107527751e-06, "loss": 1.4423, "step": 17660 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 4.476496618115078e-06, "loss": 1.3984, "step": 17665 }, { "epoch": 0.91, "grad_norm": 0.5390625, "learning_rate": 4.449820022181239e-06, "loss": 1.4143, "step": 17670 }, { "epoch": 0.91, "grad_norm": 0.52734375, "learning_rate": 4.423221341479344e-06, "loss": 1.3943, "step": 17675 }, { "epoch": 0.91, "grad_norm": 0.55078125, "learning_rate": 4.396700597698955e-06, "loss": 1.3885, "step": 17680 }, { "epoch": 0.91, "grad_norm": 0.57421875, "learning_rate": 4.3702578124660834e-06, "loss": 1.3702, "step": 17685 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 4.34389300734318e-06, "loss": 1.4119, "step": 17690 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 4.317606203829127e-06, "loss": 1.3952, "step": 17695 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 4.291397423359156e-06, "loss": 1.4155, "step": 17700 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 4.265266687304892e-06, "loss": 1.4023, "step": 17705 }, { "epoch": 0.92, "grad_norm": 0.5390625, "learning_rate": 4.239214016974335e-06, "loss": 1.3853, "step": 17710 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 4.213239433611848e-06, "loss": 1.4075, "step": 17715 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 4.1873429583980325e-06, "loss": 1.4139, "step": 17720 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 4.161524612449896e-06, "loss": 1.4018, "step": 17725 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 4.135784416820665e-06, "loss": 1.4292, "step": 17730 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 4.110122392499915e-06, "loss": 1.3752, "step": 17735 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 4.0845385604133755e-06, "loss": 1.3981, "step": 17740 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 4.059032941423113e-06, "loss": 1.3676, "step": 17745 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 4.033605556327347e-06, "loss": 1.3836, "step": 17750 }, { "epoch": 0.92, "grad_norm": 0.55859375, "learning_rate": 4.008256425860546e-06, "loss": 1.4058, "step": 17755 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 3.982985570693354e-06, "loss": 1.4037, "step": 17760 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.957793011432564e-06, "loss": 1.4318, "step": 17765 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.932678768621145e-06, "loss": 1.381, "step": 17770 }, { "epoch": 0.92, "grad_norm": 0.5546875, "learning_rate": 3.907642862738214e-06, "loss": 1.4374, "step": 17775 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.882685314199009e-06, "loss": 1.3676, "step": 17780 }, { "epoch": 0.92, "grad_norm": 0.5625, "learning_rate": 3.857806143354814e-06, "loss": 1.4109, "step": 17785 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.833005370493081e-06, "loss": 1.3809, "step": 17790 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 3.808283015837277e-06, "loss": 1.4036, "step": 17795 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.7836390995469873e-06, "loss": 1.383, "step": 17800 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.7590736417177365e-06, "loss": 1.4214, "step": 17805 }, { "epoch": 0.92, "grad_norm": 0.5234375, "learning_rate": 3.7345866623811677e-06, "loss": 1.381, "step": 17810 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 3.7101781815048753e-06, "loss": 1.3782, "step": 17815 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 3.6858482189924716e-06, "loss": 1.4236, "step": 17820 }, { "epoch": 0.92, "grad_norm": 0.55078125, "learning_rate": 3.6615967946835084e-06, "loss": 1.3932, "step": 17825 }, { "epoch": 0.92, "grad_norm": 0.5078125, "learning_rate": 3.637423928353523e-06, "loss": 1.3883, "step": 17830 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.6133296397139804e-06, "loss": 1.4071, "step": 17835 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.5893139484122982e-06, "loss": 1.3812, "step": 17840 }, { "epoch": 0.92, "grad_norm": 0.54296875, "learning_rate": 3.565376874031756e-06, "loss": 1.3953, "step": 17845 }, { "epoch": 0.92, "grad_norm": 0.53515625, "learning_rate": 3.541518436091562e-06, "loss": 1.4084, "step": 17850 }, { "epoch": 0.92, "grad_norm": 0.52734375, "learning_rate": 3.517738654046776e-06, "loss": 1.4153, "step": 17855 }, { "epoch": 0.92, "grad_norm": 0.5390625, "learning_rate": 3.4940375472883536e-06, "loss": 1.4134, "step": 17860 }, { "epoch": 0.92, "grad_norm": 0.51953125, "learning_rate": 3.470415135143046e-06, "loss": 1.4192, "step": 17865 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 3.446871436873478e-06, "loss": 1.4571, "step": 17870 }, { "epoch": 0.92, "grad_norm": 0.53125, "learning_rate": 3.42340647167807e-06, "loss": 1.3937, "step": 17875 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 3.400020258691061e-06, "loss": 1.3724, "step": 17880 }, { "epoch": 0.93, "grad_norm": 0.51171875, "learning_rate": 3.3767128169824304e-06, "loss": 1.4317, "step": 17885 }, { "epoch": 0.93, "grad_norm": 0.53125, "learning_rate": 3.353484165557941e-06, "loss": 1.4134, "step": 17890 }, { "epoch": 0.93, "grad_norm": 0.58203125, "learning_rate": 3.33033432335913e-06, "loss": 1.4244, "step": 17895 }, { "epoch": 0.93, "grad_norm": 0.5078125, "learning_rate": 3.307263309263242e-06, "loss": 1.3976, "step": 17900 }, { "epoch": 0.93, "grad_norm": 0.53125, "learning_rate": 3.284271142083284e-06, "loss": 1.4189, "step": 17905 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 3.2613578405679023e-06, "loss": 1.3897, "step": 17910 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 3.238523423401496e-06, "loss": 1.4193, "step": 17915 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.2157679092040927e-06, "loss": 1.4181, "step": 17920 }, { "epoch": 0.93, "grad_norm": 0.53125, "learning_rate": 3.193091316531427e-06, "loss": 1.407, "step": 17925 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 3.1704936638748296e-06, "loss": 1.4167, "step": 17930 }, { "epoch": 0.93, "grad_norm": 0.55078125, "learning_rate": 3.1479749696612713e-06, "loss": 1.3896, "step": 17935 }, { "epoch": 0.93, "grad_norm": 0.5546875, "learning_rate": 3.1255352522533755e-06, "loss": 1.4004, "step": 17940 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 3.1031745299493266e-06, "loss": 1.395, "step": 17945 }, { "epoch": 0.93, "grad_norm": 0.55078125, "learning_rate": 3.0808928209828837e-06, "loss": 1.408, "step": 17950 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 3.058690143523424e-06, "loss": 1.3846, "step": 17955 }, { "epoch": 0.93, "grad_norm": 0.5390625, "learning_rate": 3.0365665156758315e-06, "loss": 1.4238, "step": 17960 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 3.014521955480565e-06, "loss": 1.4062, "step": 17965 }, { "epoch": 0.93, "grad_norm": 0.51953125, "learning_rate": 2.9925564809135776e-06, "loss": 1.3923, "step": 17970 }, { "epoch": 0.93, "grad_norm": 0.51171875, "learning_rate": 2.970670109886353e-06, "loss": 1.4395, "step": 17975 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 2.94886286024586e-06, "loss": 1.4237, "step": 17980 }, { "epoch": 0.93, "grad_norm": 0.5859375, "learning_rate": 2.927134749774585e-06, "loss": 1.4126, "step": 17985 }, { "epoch": 0.93, "grad_norm": 0.5, "learning_rate": 2.9054857961904216e-06, "loss": 1.3602, "step": 17990 }, { "epoch": 0.93, "grad_norm": 0.52734375, "learning_rate": 2.8839160171467485e-06, "loss": 1.4304, "step": 17995 }, { "epoch": 0.93, "grad_norm": 0.53515625, "learning_rate": 2.862425430232385e-06, "loss": 1.4215, "step": 18000 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 2.84101405297158e-06, "loss": 1.3593, "step": 18005 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 2.8196819028239565e-06, "loss": 1.4182, "step": 18010 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 2.7984289971845657e-06, "loss": 1.4155, "step": 18015 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 2.777255353383845e-06, "loss": 1.3748, "step": 18020 }, { "epoch": 0.93, "grad_norm": 0.515625, "learning_rate": 2.756160988687573e-06, "loss": 1.3857, "step": 18025 }, { "epoch": 0.93, "grad_norm": 0.53125, "learning_rate": 2.735145920296889e-06, "loss": 1.4111, "step": 18030 }, { "epoch": 0.93, "grad_norm": 0.55078125, "learning_rate": 2.7142101653482852e-06, "loss": 1.4205, "step": 18035 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 2.693353740913562e-06, "loss": 1.4234, "step": 18040 }, { "epoch": 0.93, "grad_norm": 0.56640625, "learning_rate": 2.6725766639998485e-06, "loss": 1.4098, "step": 18045 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 2.651878951549536e-06, "loss": 1.3929, "step": 18050 }, { "epoch": 0.93, "grad_norm": 0.546875, "learning_rate": 2.6312606204403343e-06, "loss": 1.4238, "step": 18055 }, { "epoch": 0.93, "grad_norm": 0.5234375, "learning_rate": 2.610721687485207e-06, "loss": 1.3936, "step": 18060 }, { "epoch": 0.93, "grad_norm": 0.54296875, "learning_rate": 2.5902621694324005e-06, "loss": 1.3911, "step": 18065 }, { "epoch": 0.93, "grad_norm": 0.55078125, "learning_rate": 2.569882082965358e-06, "loss": 1.4094, "step": 18070 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.5495814447027643e-06, "loss": 1.3685, "step": 18075 }, { "epoch": 0.94, "grad_norm": 0.515625, "learning_rate": 2.5293602711985444e-06, "loss": 1.3835, "step": 18080 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.5092185789418078e-06, "loss": 1.3872, "step": 18085 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.48915638435685e-06, "loss": 1.3883, "step": 18090 }, { "epoch": 0.94, "grad_norm": 0.55078125, "learning_rate": 2.469173703803129e-06, "loss": 1.4209, "step": 18095 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.4492705535753003e-06, "loss": 1.389, "step": 18100 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.429446949903147e-06, "loss": 1.4392, "step": 18105 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 2.409702908951561e-06, "loss": 1.3789, "step": 18110 }, { "epoch": 0.94, "grad_norm": 0.498046875, "learning_rate": 2.3900384468205974e-06, "loss": 1.3633, "step": 18115 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.3704535795454065e-06, "loss": 1.3901, "step": 18120 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.350948323096214e-06, "loss": 1.4299, "step": 18125 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.331522693378374e-06, "loss": 1.3986, "step": 18130 }, { "epoch": 0.94, "grad_norm": 0.55859375, "learning_rate": 2.3121767062322387e-06, "loss": 1.4388, "step": 18135 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.2929103774332882e-06, "loss": 1.3955, "step": 18140 }, { "epoch": 0.94, "grad_norm": 0.51953125, "learning_rate": 2.2737237226920003e-06, "loss": 1.4058, "step": 18145 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 2.2546167576539155e-06, "loss": 1.4167, "step": 18150 }, { "epoch": 0.94, "grad_norm": 0.515625, "learning_rate": 2.2355894978995593e-06, "loss": 1.4242, "step": 18155 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.2166419589444875e-06, "loss": 1.4274, "step": 18160 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.1977741562392294e-06, "loss": 1.414, "step": 18165 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 2.178986105169334e-06, "loss": 1.4187, "step": 18170 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.1602778210552564e-06, "loss": 1.3972, "step": 18175 }, { "epoch": 0.94, "grad_norm": 0.546875, "learning_rate": 2.141649319152461e-06, "loss": 1.3914, "step": 18180 }, { "epoch": 0.94, "grad_norm": 0.53515625, "learning_rate": 2.1231006146513187e-06, "loss": 1.4213, "step": 18185 }, { "epoch": 0.94, "grad_norm": 0.54296875, "learning_rate": 2.1046317226771417e-06, "loss": 1.4014, "step": 18190 }, { "epoch": 0.94, "grad_norm": 0.53125, "learning_rate": 2.086242658290194e-06, "loss": 1.4132, "step": 18195 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 2.0679334364855806e-06, "loss": 1.3834, "step": 18200 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 2.049704072193337e-06, "loss": 1.4112, "step": 18205 }, { "epoch": 0.94, "grad_norm": 0.5390625, "learning_rate": 2.031554580278394e-06, "loss": 1.4272, "step": 18210 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 2.013484975540536e-06, "loss": 1.4039, "step": 18215 }, { "epoch": 0.94, "grad_norm": 0.5859375, "learning_rate": 1.995495272714376e-06, "loss": 1.406, "step": 18220 }, { "epoch": 0.94, "grad_norm": 0.51171875, "learning_rate": 1.9775854864694134e-06, "loss": 1.3711, "step": 18225 }, { "epoch": 0.94, "grad_norm": 0.53515625, "learning_rate": 1.959755631409976e-06, "loss": 1.4097, "step": 18230 }, { "epoch": 0.94, "grad_norm": 0.58984375, "learning_rate": 1.9420057220751907e-06, "loss": 1.4077, "step": 18235 }, { "epoch": 0.94, "grad_norm": 0.64453125, "learning_rate": 1.924335772939012e-06, "loss": 1.3996, "step": 18240 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 1.90674579841017e-06, "loss": 1.4285, "step": 18245 }, { "epoch": 0.94, "grad_norm": 0.57421875, "learning_rate": 1.8892358128322018e-06, "loss": 1.43, "step": 18250 }, { "epoch": 0.94, "grad_norm": 0.55078125, "learning_rate": 1.8718058304834307e-06, "loss": 1.4526, "step": 18255 }, { "epoch": 0.94, "grad_norm": 0.5234375, "learning_rate": 1.8544558655768983e-06, "loss": 1.4166, "step": 18260 }, { "epoch": 0.94, "grad_norm": 0.5546875, "learning_rate": 1.8371859322604323e-06, "loss": 1.4201, "step": 18265 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.8199960446165898e-06, "loss": 1.3988, "step": 18270 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.8028862166626691e-06, "loss": 1.3912, "step": 18275 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.7858564623506547e-06, "loss": 1.3984, "step": 18280 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.768906795567249e-06, "loss": 1.4118, "step": 18285 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.7520372301338516e-06, "loss": 1.37, "step": 18290 }, { "epoch": 0.95, "grad_norm": 0.5625, "learning_rate": 1.7352477798065703e-06, "loss": 1.3932, "step": 18295 }, { "epoch": 0.95, "grad_norm": 0.55859375, "learning_rate": 1.718538458276131e-06, "loss": 1.4033, "step": 18300 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.701909279167946e-06, "loss": 1.402, "step": 18305 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.6853602560421012e-06, "loss": 1.4002, "step": 18310 }, { "epoch": 0.95, "grad_norm": 0.51953125, "learning_rate": 1.6688914023932801e-06, "loss": 1.4128, "step": 18315 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.6525027316507957e-06, "loss": 1.3474, "step": 18320 }, { "epoch": 0.95, "grad_norm": 0.5390625, "learning_rate": 1.6361942571786138e-06, "loss": 1.3617, "step": 18325 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.619965992275274e-06, "loss": 1.4078, "step": 18330 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.6038179501739138e-06, "loss": 1.4057, "step": 18335 }, { "epoch": 0.95, "grad_norm": 0.515625, "learning_rate": 1.587750144042266e-06, "loss": 1.3938, "step": 18340 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.5717625869826168e-06, "loss": 1.3701, "step": 18345 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.555855292031827e-06, "loss": 1.4177, "step": 18350 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.540028272161309e-06, "loss": 1.4019, "step": 18355 }, { "epoch": 0.95, "grad_norm": 0.51953125, "learning_rate": 1.5242815402770172e-06, "loss": 1.3823, "step": 18360 }, { "epoch": 0.95, "grad_norm": 0.55078125, "learning_rate": 1.5086151092194356e-06, "loss": 1.4226, "step": 18365 }, { "epoch": 0.95, "grad_norm": 0.5703125, "learning_rate": 1.4930289917635453e-06, "loss": 1.3835, "step": 18370 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.4775232006188799e-06, "loss": 1.3652, "step": 18375 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.4620977484294362e-06, "loss": 1.4092, "step": 18380 }, { "epoch": 0.95, "grad_norm": 0.5390625, "learning_rate": 1.4467526477737082e-06, "loss": 1.4041, "step": 18385 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.4314879111646861e-06, "loss": 1.4034, "step": 18390 }, { "epoch": 0.95, "grad_norm": 0.51953125, "learning_rate": 1.4163035510498023e-06, "loss": 1.4054, "step": 18395 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.4011995798109522e-06, "loss": 1.3963, "step": 18400 }, { "epoch": 0.95, "grad_norm": 0.5390625, "learning_rate": 1.386176009764506e-06, "loss": 1.4419, "step": 18405 }, { "epoch": 0.95, "grad_norm": 0.5390625, "learning_rate": 1.3712328531612306e-06, "loss": 1.414, "step": 18410 }, { "epoch": 0.95, "grad_norm": 0.53515625, "learning_rate": 1.3563701221863567e-06, "loss": 1.3657, "step": 18415 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.3415878289595008e-06, "loss": 1.4276, "step": 18420 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.3268859855347093e-06, "loss": 1.3805, "step": 18425 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.312264603900437e-06, "loss": 1.4341, "step": 18430 }, { "epoch": 0.95, "grad_norm": 0.5234375, "learning_rate": 1.2977236959795025e-06, "loss": 1.3819, "step": 18435 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.2832632736290983e-06, "loss": 1.4432, "step": 18440 }, { "epoch": 0.95, "grad_norm": 0.54296875, "learning_rate": 1.2688833486408257e-06, "loss": 1.4064, "step": 18445 }, { "epoch": 0.95, "grad_norm": 0.546875, "learning_rate": 1.254583932740594e-06, "loss": 1.4189, "step": 18450 }, { "epoch": 0.95, "grad_norm": 0.52734375, "learning_rate": 1.2403650375887088e-06, "loss": 1.3865, "step": 18455 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 1.2262266747797847e-06, "loss": 1.4215, "step": 18460 }, { "epoch": 0.96, "grad_norm": 0.5625, "learning_rate": 1.2121688558427768e-06, "loss": 1.3978, "step": 18465 }, { "epoch": 0.96, "grad_norm": 0.52734375, "learning_rate": 1.1981915922409603e-06, "loss": 1.4087, "step": 18470 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 1.1842948953719403e-06, "loss": 1.3789, "step": 18475 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 1.1704787765675963e-06, "loss": 1.3762, "step": 18480 }, { "epoch": 0.96, "grad_norm": 0.5390625, "learning_rate": 1.1567432470941163e-06, "loss": 1.4136, "step": 18485 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 1.1430883181519635e-06, "loss": 1.4364, "step": 18490 }, { "epoch": 0.96, "grad_norm": 0.51953125, "learning_rate": 1.1295140008758864e-06, "loss": 1.4176, "step": 18495 }, { "epoch": 0.96, "grad_norm": 0.5390625, "learning_rate": 1.1160203063349195e-06, "loss": 1.4348, "step": 18500 }, { "epoch": 0.96, "grad_norm": 0.56640625, "learning_rate": 1.1026072455322945e-06, "loss": 1.4194, "step": 18505 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 1.089274829405562e-06, "loss": 1.3916, "step": 18510 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 1.0760230688264593e-06, "loss": 1.4221, "step": 18515 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 1.0628519746009757e-06, "loss": 1.3982, "step": 18520 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 1.0497615574693309e-06, "loss": 1.4214, "step": 18525 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 1.0367518281059307e-06, "loss": 1.4061, "step": 18530 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 1.0238227971194004e-06, "loss": 1.3746, "step": 18535 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 1.010974475052584e-06, "loss": 1.4275, "step": 18540 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 9.982068723824677e-07, "loss": 1.3601, "step": 18545 }, { "epoch": 0.96, "grad_norm": 0.546875, "learning_rate": 9.855199995202457e-07, "loss": 1.3907, "step": 18550 }, { "epoch": 0.96, "grad_norm": 0.5546875, "learning_rate": 9.729138668112648e-07, "loss": 1.4076, "step": 18555 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 9.603884845350575e-07, "loss": 1.4104, "step": 18560 }, { "epoch": 0.96, "grad_norm": 0.5703125, "learning_rate": 9.479438629052873e-07, "loss": 1.3915, "step": 18565 }, { "epoch": 0.96, "grad_norm": 0.5703125, "learning_rate": 9.3558001206977e-07, "loss": 1.4368, "step": 18570 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 9.232969421104521e-07, "loss": 1.4154, "step": 18575 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 9.110946630434214e-07, "loss": 1.4285, "step": 18580 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 8.989731848188743e-07, "loss": 1.4052, "step": 18585 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 8.869325173211262e-07, "loss": 1.3765, "step": 18590 }, { "epoch": 0.96, "grad_norm": 0.55859375, "learning_rate": 8.749726703685901e-07, "loss": 1.4041, "step": 18595 }, { "epoch": 0.96, "grad_norm": 0.57421875, "learning_rate": 8.630936537137757e-07, "loss": 1.3355, "step": 18600 }, { "epoch": 0.96, "grad_norm": 0.515625, "learning_rate": 8.51295477043279e-07, "loss": 1.4103, "step": 18605 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 8.395781499777932e-07, "loss": 1.3912, "step": 18610 }, { "epoch": 0.96, "grad_norm": 0.5234375, "learning_rate": 8.279416820720531e-07, "loss": 1.41, "step": 18615 }, { "epoch": 0.96, "grad_norm": 0.54296875, "learning_rate": 8.163860828148906e-07, "loss": 1.3804, "step": 18620 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 8.049113616291793e-07, "loss": 1.4257, "step": 18625 }, { "epoch": 0.96, "grad_norm": 0.53515625, "learning_rate": 7.935175278718232e-07, "loss": 1.3851, "step": 18630 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 7.822045908337905e-07, "loss": 1.3834, "step": 18635 }, { "epoch": 0.96, "grad_norm": 0.51953125, "learning_rate": 7.709725597400908e-07, "loss": 1.4218, "step": 18640 }, { "epoch": 0.96, "grad_norm": 0.53125, "learning_rate": 7.598214437497531e-07, "loss": 1.3998, "step": 18645 }, { "epoch": 0.96, "grad_norm": 0.52734375, "learning_rate": 7.487512519557815e-07, "loss": 1.3893, "step": 18650 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 7.377619933852664e-07, "loss": 1.3919, "step": 18655 }, { "epoch": 0.97, "grad_norm": 0.56640625, "learning_rate": 7.268536769992507e-07, "loss": 1.408, "step": 18660 }, { "epoch": 0.97, "grad_norm": 0.52734375, "learning_rate": 7.160263116927968e-07, "loss": 1.388, "step": 18665 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 7.052799062949312e-07, "loss": 1.4445, "step": 18670 }, { "epoch": 0.97, "grad_norm": 0.58984375, "learning_rate": 6.946144695686885e-07, "loss": 1.3965, "step": 18675 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 6.840300102110785e-07, "loss": 1.4341, "step": 18680 }, { "epoch": 0.97, "grad_norm": 0.57421875, "learning_rate": 6.735265368530641e-07, "loss": 1.3783, "step": 18685 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 6.631040580595605e-07, "loss": 1.3876, "step": 18690 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 6.527625823294692e-07, "loss": 1.3976, "step": 18695 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 6.425021180956114e-07, "loss": 1.3861, "step": 18700 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 6.323226737247723e-07, "loss": 1.4201, "step": 18705 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 6.222242575176341e-07, "loss": 1.4344, "step": 18710 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 6.122068777088319e-07, "loss": 1.433, "step": 18715 }, { "epoch": 0.97, "grad_norm": 0.58203125, "learning_rate": 6.022705424669317e-07, "loss": 1.4217, "step": 18720 }, { "epoch": 0.97, "grad_norm": 0.5625, "learning_rate": 5.924152598943966e-07, "loss": 1.4262, "step": 18725 }, { "epoch": 0.97, "grad_norm": 0.55859375, "learning_rate": 5.826410380275759e-07, "loss": 1.3997, "step": 18730 }, { "epoch": 0.97, "grad_norm": 0.56640625, "learning_rate": 5.729478848367609e-07, "loss": 1.4183, "step": 18735 }, { "epoch": 0.97, "grad_norm": 0.54296875, "learning_rate": 5.633358082260954e-07, "loss": 1.3599, "step": 18740 }, { "epoch": 0.97, "grad_norm": 0.51953125, "learning_rate": 5.53804816033654e-07, "loss": 1.408, "step": 18745 }, { "epoch": 0.97, "grad_norm": 0.55859375, "learning_rate": 5.443549160313421e-07, "loss": 1.404, "step": 18750 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 5.349861159249959e-07, "loss": 1.4162, "step": 18755 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 5.256984233542595e-07, "loss": 1.3979, "step": 18760 }, { "epoch": 0.97, "grad_norm": 0.5078125, "learning_rate": 5.16491845892697e-07, "loss": 1.3693, "step": 18765 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 5.073663910476811e-07, "loss": 1.3507, "step": 18770 }, { "epoch": 0.97, "grad_norm": 0.515625, "learning_rate": 4.983220662604482e-07, "loss": 1.4163, "step": 18775 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 4.893588789060988e-07, "loss": 1.393, "step": 18780 }, { "epoch": 0.97, "grad_norm": 0.52734375, "learning_rate": 4.80476836293542e-07, "loss": 1.4092, "step": 18785 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 4.7167594566555064e-07, "loss": 1.3873, "step": 18790 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 4.6295621419868427e-07, "loss": 1.401, "step": 18795 }, { "epoch": 0.97, "grad_norm": 0.52734375, "learning_rate": 4.5431764900334404e-07, "loss": 1.4024, "step": 18800 }, { "epoch": 0.97, "grad_norm": 0.546875, "learning_rate": 4.457602571237507e-07, "loss": 1.3943, "step": 18805 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 4.3728404553793344e-07, "loss": 1.395, "step": 18810 }, { "epoch": 0.97, "grad_norm": 0.5234375, "learning_rate": 4.288890211576857e-07, "loss": 1.3996, "step": 18815 }, { "epoch": 0.97, "grad_norm": 0.53125, "learning_rate": 4.205751908286537e-07, "loss": 1.4113, "step": 18820 }, { "epoch": 0.97, "grad_norm": 0.51953125, "learning_rate": 4.1234256133024785e-07, "loss": 1.4166, "step": 18825 }, { "epoch": 0.97, "grad_norm": 0.52734375, "learning_rate": 4.0419113937566475e-07, "loss": 1.4103, "step": 18830 }, { "epoch": 0.97, "grad_norm": 0.5390625, "learning_rate": 3.961209316118653e-07, "loss": 1.3753, "step": 18835 }, { "epoch": 0.97, "grad_norm": 0.5546875, "learning_rate": 3.8813194461961856e-07, "loss": 1.4105, "step": 18840 }, { "epoch": 0.97, "grad_norm": 0.53515625, "learning_rate": 3.8022418491344693e-07, "loss": 1.4321, "step": 18845 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 3.723976589416256e-07, "loss": 1.4311, "step": 18850 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 3.6465237308621615e-07, "loss": 1.4195, "step": 18855 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 3.5698833366299975e-07, "loss": 1.4145, "step": 18860 }, { "epoch": 0.98, "grad_norm": 0.55859375, "learning_rate": 3.4940554692154405e-07, "loss": 1.3957, "step": 18865 }, { "epoch": 0.98, "grad_norm": 0.51171875, "learning_rate": 3.41904019045125e-07, "loss": 1.4162, "step": 18870 }, { "epoch": 0.98, "grad_norm": 0.5234375, "learning_rate": 3.344837561507719e-07, "loss": 1.3873, "step": 18875 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 3.2714476428925553e-07, "loss": 1.4291, "step": 18880 }, { "epoch": 0.98, "grad_norm": 0.5625, "learning_rate": 3.1988704944506677e-07, "loss": 1.4272, "step": 18885 }, { "epoch": 0.98, "grad_norm": 0.55078125, "learning_rate": 3.127106175364158e-07, "loss": 1.4175, "step": 18890 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 3.05615474415244e-07, "loss": 1.422, "step": 18895 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 2.9860162586718974e-07, "loss": 1.4079, "step": 18900 }, { "epoch": 0.98, "grad_norm": 0.55078125, "learning_rate": 2.9166907761162264e-07, "loss": 1.4483, "step": 18905 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 2.8481783530159843e-07, "loss": 1.4063, "step": 18910 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 2.780479045238704e-07, "loss": 1.4074, "step": 18915 }, { "epoch": 0.98, "grad_norm": 0.54296875, "learning_rate": 2.7135929079891156e-07, "loss": 1.4116, "step": 18920 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 2.6475199958085897e-07, "loss": 1.3984, "step": 18925 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 2.582260362575584e-07, "loss": 1.4304, "step": 18930 }, { "epoch": 0.98, "grad_norm": 0.51953125, "learning_rate": 2.5178140615051973e-07, "loss": 1.404, "step": 18935 }, { "epoch": 0.98, "grad_norm": 0.51171875, "learning_rate": 2.4541811451493925e-07, "loss": 1.4332, "step": 18940 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 2.391361665396885e-07, "loss": 1.4137, "step": 18945 }, { "epoch": 0.98, "grad_norm": 0.5703125, "learning_rate": 2.3293556734730326e-07, "loss": 1.4516, "step": 18950 }, { "epoch": 0.98, "grad_norm": 0.53515625, "learning_rate": 2.268163219939945e-07, "loss": 1.4191, "step": 18955 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 2.2077843546960408e-07, "loss": 1.3984, "step": 18960 }, { "epoch": 0.98, "grad_norm": 0.56640625, "learning_rate": 2.1482191269768247e-07, "loss": 1.4172, "step": 18965 }, { "epoch": 0.98, "grad_norm": 0.55078125, "learning_rate": 2.089467585353777e-07, "loss": 1.3938, "step": 18970 }, { "epoch": 0.98, "grad_norm": 0.5390625, "learning_rate": 2.0315297777353525e-07, "loss": 1.4371, "step": 18975 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 1.9744057513660928e-07, "loss": 1.4102, "step": 18980 }, { "epoch": 0.98, "grad_norm": 0.5234375, "learning_rate": 1.9180955528270706e-07, "loss": 1.4085, "step": 18985 }, { "epoch": 0.98, "grad_norm": 0.546875, "learning_rate": 1.8625992280357773e-07, "loss": 1.3943, "step": 18990 }, { "epoch": 0.98, "grad_norm": 0.52734375, "learning_rate": 1.8079168222461252e-07, "loss": 1.4169, "step": 18995 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 1.7540483800481122e-07, "loss": 1.379, "step": 19000 }, { "epoch": 0.98, "grad_norm": 0.51171875, "learning_rate": 1.7009939453680456e-07, "loss": 1.3613, "step": 19005 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 1.6487535614687633e-07, "loss": 1.4412, "step": 19010 }, { "epoch": 0.98, "grad_norm": 0.5546875, "learning_rate": 1.5973272709487453e-07, "loss": 1.4065, "step": 19015 }, { "epoch": 0.98, "grad_norm": 0.49609375, "learning_rate": 1.5467151157431136e-07, "loss": 1.3788, "step": 19020 }, { "epoch": 0.98, "grad_norm": 0.5703125, "learning_rate": 1.4969171371228552e-07, "loss": 1.4031, "step": 19025 }, { "epoch": 0.98, "grad_norm": 0.53125, "learning_rate": 1.447933375695265e-07, "loss": 1.376, "step": 19030 }, { "epoch": 0.98, "grad_norm": 0.5625, "learning_rate": 1.3997638714033923e-07, "loss": 1.3969, "step": 19035 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 1.3524086635265942e-07, "loss": 1.3911, "step": 19040 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 1.305867790679982e-07, "loss": 1.4282, "step": 19045 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 1.2601412908147536e-07, "loss": 1.4233, "step": 19050 }, { "epoch": 0.99, "grad_norm": 0.5390625, "learning_rate": 1.2152292012181932e-07, "loss": 1.4246, "step": 19055 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 1.1711315585131166e-07, "loss": 1.4038, "step": 19060 }, { "epoch": 0.99, "grad_norm": 0.50390625, "learning_rate": 1.1278483986586486e-07, "loss": 1.3726, "step": 19065 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 1.085379756949223e-07, "loss": 1.4142, "step": 19070 }, { "epoch": 0.99, "grad_norm": 0.515625, "learning_rate": 1.0437256680155827e-07, "loss": 1.3689, "step": 19075 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 1.0028861658238909e-07, "loss": 1.4363, "step": 19080 }, { "epoch": 0.99, "grad_norm": 0.515625, "learning_rate": 9.628612836763973e-08, "loss": 1.4179, "step": 19085 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 9.236510542107723e-08, "loss": 1.3981, "step": 19090 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 8.85255509400662e-08, "loss": 1.4081, "step": 19095 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 8.476746805550218e-08, "loss": 1.4231, "step": 19100 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 8.109085983188936e-08, "loss": 1.3895, "step": 19105 }, { "epoch": 0.99, "grad_norm": 0.55078125, "learning_rate": 7.74957292672629e-08, "loss": 1.4086, "step": 19110 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 7.39820792932333e-08, "loss": 1.3892, "step": 19115 }, { "epoch": 0.99, "grad_norm": 0.51171875, "learning_rate": 7.054991277496425e-08, "loss": 1.4197, "step": 19120 }, { "epoch": 0.99, "grad_norm": 0.5625, "learning_rate": 6.719923251116145e-08, "loss": 1.4062, "step": 19125 }, { "epoch": 0.99, "grad_norm": 0.546875, "learning_rate": 6.393004123411706e-08, "loss": 1.424, "step": 19130 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 6.074234160963199e-08, "loss": 1.3763, "step": 19135 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 5.763613623709363e-08, "loss": 1.4039, "step": 19140 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 5.461142764940919e-08, "loss": 1.4028, "step": 19145 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 5.166821831305013e-08, "loss": 1.3935, "step": 19150 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 4.880651062800779e-08, "loss": 1.4228, "step": 19155 }, { "epoch": 0.99, "grad_norm": 0.55859375, "learning_rate": 4.6026306927848814e-08, "loss": 1.4005, "step": 19160 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 4.332760947962644e-08, "loss": 1.4044, "step": 19165 }, { "epoch": 0.99, "grad_norm": 0.56640625, "learning_rate": 4.0710420483980326e-08, "loss": 1.4302, "step": 19170 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 3.817474207505889e-08, "loss": 1.3764, "step": 19175 }, { "epoch": 0.99, "grad_norm": 0.5546875, "learning_rate": 3.572057632055259e-08, "loss": 1.4099, "step": 19180 }, { "epoch": 0.99, "grad_norm": 0.515625, "learning_rate": 3.3347925221682844e-08, "loss": 1.3482, "step": 19185 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 3.1056790713202e-08, "loss": 1.4299, "step": 19190 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 2.884717466338227e-08, "loss": 1.4004, "step": 19195 }, { "epoch": 0.99, "grad_norm": 0.54296875, "learning_rate": 2.6719078874026803e-08, "loss": 1.4316, "step": 19200 }, { "epoch": 0.99, "grad_norm": 0.53515625, "learning_rate": 2.4672505080458597e-08, "loss": 1.3985, "step": 19205 }, { "epoch": 0.99, "grad_norm": 0.52734375, "learning_rate": 2.2707454951553797e-08, "loss": 1.4001, "step": 19210 }, { "epoch": 0.99, "grad_norm": 0.5078125, "learning_rate": 2.082393008966399e-08, "loss": 1.4053, "step": 19215 }, { "epoch": 0.99, "grad_norm": 0.5234375, "learning_rate": 1.9021932030705015e-08, "loss": 1.4332, "step": 19220 }, { "epoch": 0.99, "grad_norm": 0.53125, "learning_rate": 1.7301462244079246e-08, "loss": 1.4338, "step": 19225 }, { "epoch": 0.99, "grad_norm": 0.5234375, "learning_rate": 1.5662522132742218e-08, "loss": 1.3848, "step": 19230 }, { "epoch": 1.0, "grad_norm": 0.5859375, "learning_rate": 1.4105113033124895e-08, "loss": 1.4195, "step": 19235 }, { "epoch": 1.0, "grad_norm": 0.5546875, "learning_rate": 1.2629236215211393e-08, "loss": 1.3918, "step": 19240 }, { "epoch": 1.0, "grad_norm": 0.53125, "learning_rate": 1.1234892882494574e-08, "loss": 1.4252, "step": 19245 }, { "epoch": 1.0, "grad_norm": 0.55078125, "learning_rate": 9.922084171953839e-09, "loss": 1.4026, "step": 19250 }, { "epoch": 1.0, "grad_norm": 0.55859375, "learning_rate": 8.690811154121737e-09, "loss": 1.3924, "step": 19255 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 7.541074833006257e-09, "loss": 1.3749, "step": 19260 }, { "epoch": 1.0, "grad_norm": 0.55859375, "learning_rate": 6.472876146168538e-09, "loss": 1.4085, "step": 19265 }, { "epoch": 1.0, "grad_norm": 0.53125, "learning_rate": 5.486215964645158e-09, "loss": 1.442, "step": 19270 }, { "epoch": 1.0, "grad_norm": 0.54296875, "learning_rate": 4.581095092992538e-09, "loss": 1.4407, "step": 19275 }, { "epoch": 1.0, "grad_norm": 0.5390625, "learning_rate": 3.757514269286944e-09, "loss": 1.4129, "step": 19280 }, { "epoch": 1.0, "grad_norm": 0.53515625, "learning_rate": 3.0154741651022833e-09, "loss": 1.4254, "step": 19285 }, { "epoch": 1.0, "grad_norm": 0.53125, "learning_rate": 2.354975385543412e-09, "loss": 1.413, "step": 19290 }, { "epoch": 1.0, "grad_norm": 0.52734375, "learning_rate": 1.776018469179519e-09, "loss": 1.397, "step": 19295 }, { "epoch": 1.0, "grad_norm": 0.5234375, "learning_rate": 1.2786038881329455e-09, "loss": 1.4139, "step": 19300 }, { "epoch": 1.0, "grad_norm": 0.515625, "learning_rate": 8.627320480125711e-10, "loss": 1.4221, "step": 19305 }, { "epoch": 1.0, "grad_norm": 0.53125, "learning_rate": 5.284032879249168e-10, "loss": 1.3758, "step": 19310 }, { "epoch": 1.0, "grad_norm": 0.515625, "learning_rate": 2.7561788049634827e-10, "loss": 1.3815, "step": 19315 }, { "epoch": 1.0, "grad_norm": 0.56640625, "learning_rate": 1.0437603187307688e-10, "loss": 1.4298, "step": 19320 }, { "epoch": 1.0, "grad_norm": 0.51953125, "learning_rate": 1.4677881676750105e-11, "loss": 1.3775, "step": 19325 }, { "epoch": 1.0, "eval_loss": 1.4011043310165405, "eval_runtime": 25105.0488, "eval_samples_per_second": 6.821, "eval_steps_per_second": 1.705, "step": 19328 }, { "epoch": 1.0, "step": 19328, "total_flos": 1.3590087469130318e+19, "train_loss": 1.456397372168421, "train_runtime": 104474.4259, "train_samples_per_second": 1.48, "train_steps_per_second": 0.185 } ], "logging_steps": 5, "max_steps": 19328, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.3590087469130318e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }