{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4553, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 7.812648680225853, "learning_rate": 2.9197080291970804e-07, "loss": 2.184, "step": 1 }, { "epoch": 0.0, "grad_norm": 8.477363513420425, "learning_rate": 5.839416058394161e-07, "loss": 2.3547, "step": 2 }, { "epoch": 0.0, "grad_norm": 8.667187992946625, "learning_rate": 8.759124087591242e-07, "loss": 2.3479, "step": 3 }, { "epoch": 0.0, "grad_norm": 8.975762686028327, "learning_rate": 1.1678832116788322e-06, "loss": 2.5346, "step": 4 }, { "epoch": 0.0, "grad_norm": 9.085661236932879, "learning_rate": 1.4598540145985402e-06, "loss": 2.4729, "step": 5 }, { "epoch": 0.0, "grad_norm": 7.885329238897419, "learning_rate": 1.7518248175182485e-06, "loss": 2.2296, "step": 6 }, { "epoch": 0.0, "grad_norm": 8.312140664960022, "learning_rate": 2.0437956204379563e-06, "loss": 2.3955, "step": 7 }, { "epoch": 0.0, "grad_norm": 7.9862669851230255, "learning_rate": 2.3357664233576643e-06, "loss": 2.3144, "step": 8 }, { "epoch": 0.0, "grad_norm": 6.660452935107435, "learning_rate": 2.627737226277373e-06, "loss": 2.2619, "step": 9 }, { "epoch": 0.0, "grad_norm": 5.839865306189313, "learning_rate": 2.9197080291970804e-06, "loss": 2.0876, "step": 10 }, { "epoch": 0.0, "grad_norm": 6.218399728082995, "learning_rate": 3.2116788321167884e-06, "loss": 2.1238, "step": 11 }, { "epoch": 0.0, "grad_norm": 4.2177468696706075, "learning_rate": 3.503649635036497e-06, "loss": 1.9576, "step": 12 }, { "epoch": 0.0, "grad_norm": 4.546161818531478, "learning_rate": 3.7956204379562045e-06, "loss": 1.8759, "step": 13 }, { "epoch": 0.0, "grad_norm": 3.6162250465985224, "learning_rate": 4.0875912408759126e-06, "loss": 1.7203, "step": 14 }, { "epoch": 0.0, "grad_norm": 3.134376192710703, "learning_rate": 4.379562043795621e-06, "loss": 1.6647, "step": 15 }, { "epoch": 0.0, "grad_norm": 2.625610462231106, "learning_rate": 4.671532846715329e-06, "loss": 1.6871, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.90084477103016, "learning_rate": 4.963503649635037e-06, "loss": 1.5972, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.074734287101781, "learning_rate": 5.255474452554746e-06, "loss": 1.5691, "step": 18 }, { "epoch": 0.0, "grad_norm": 2.3580179922502973, "learning_rate": 5.547445255474453e-06, "loss": 1.4562, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.1643910072609365, "learning_rate": 5.839416058394161e-06, "loss": 1.4711, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.9961717444782574, "learning_rate": 6.13138686131387e-06, "loss": 1.5118, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.9070903614201673, "learning_rate": 6.423357664233577e-06, "loss": 1.5549, "step": 22 }, { "epoch": 0.01, "grad_norm": 2.1352623364264467, "learning_rate": 6.715328467153285e-06, "loss": 1.5622, "step": 23 }, { "epoch": 0.01, "grad_norm": 2.1105178562720255, "learning_rate": 7.007299270072994e-06, "loss": 1.526, "step": 24 }, { "epoch": 0.01, "grad_norm": 1.943169696695018, "learning_rate": 7.299270072992701e-06, "loss": 1.308, "step": 25 }, { "epoch": 0.01, "grad_norm": 1.9445026964211596, "learning_rate": 7.591240875912409e-06, "loss": 1.4118, "step": 26 }, { "epoch": 0.01, "grad_norm": 1.6960437094301213, "learning_rate": 7.883211678832117e-06, "loss": 1.3774, "step": 27 }, { "epoch": 0.01, "grad_norm": 1.806297937832825, "learning_rate": 8.175182481751825e-06, "loss": 1.3251, "step": 28 }, { "epoch": 0.01, "grad_norm": 1.925963480221896, "learning_rate": 8.467153284671533e-06, "loss": 1.2795, "step": 29 }, { "epoch": 0.01, "grad_norm": 1.761128734910925, "learning_rate": 8.759124087591241e-06, "loss": 1.3643, "step": 30 }, { "epoch": 0.01, "grad_norm": 1.6845485873825556, "learning_rate": 9.05109489051095e-06, "loss": 1.1851, "step": 31 }, { "epoch": 0.01, "grad_norm": 1.6619942577047109, "learning_rate": 9.343065693430657e-06, "loss": 1.3317, "step": 32 }, { "epoch": 0.01, "grad_norm": 1.684565641977221, "learning_rate": 9.635036496350367e-06, "loss": 1.4255, "step": 33 }, { "epoch": 0.01, "grad_norm": 1.7809624690835513, "learning_rate": 9.927007299270073e-06, "loss": 1.24, "step": 34 }, { "epoch": 0.01, "grad_norm": 1.5927230667953016, "learning_rate": 1.0218978102189783e-05, "loss": 1.1678, "step": 35 }, { "epoch": 0.01, "grad_norm": 1.708251796110156, "learning_rate": 1.0510948905109491e-05, "loss": 1.2783, "step": 36 }, { "epoch": 0.01, "grad_norm": 1.61533474408533, "learning_rate": 1.0802919708029198e-05, "loss": 1.2122, "step": 37 }, { "epoch": 0.01, "grad_norm": 1.534946029285862, "learning_rate": 1.1094890510948906e-05, "loss": 1.086, "step": 38 }, { "epoch": 0.01, "grad_norm": 1.7288084999717537, "learning_rate": 1.1386861313868614e-05, "loss": 1.2476, "step": 39 }, { "epoch": 0.01, "grad_norm": 1.6147373823283309, "learning_rate": 1.1678832116788322e-05, "loss": 1.2814, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.6600660591309946, "learning_rate": 1.1970802919708031e-05, "loss": 1.3103, "step": 41 }, { "epoch": 0.01, "grad_norm": 1.5377864291720114, "learning_rate": 1.226277372262774e-05, "loss": 1.2692, "step": 42 }, { "epoch": 0.01, "grad_norm": 1.5523788716951157, "learning_rate": 1.2554744525547446e-05, "loss": 1.1435, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.5435394390617185, "learning_rate": 1.2846715328467154e-05, "loss": 1.1664, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.6764266187454582, "learning_rate": 1.3138686131386862e-05, "loss": 1.1319, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.5873994149609632, "learning_rate": 1.343065693430657e-05, "loss": 1.2192, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.8001990049344387, "learning_rate": 1.372262773722628e-05, "loss": 1.1245, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.8115756867886097, "learning_rate": 1.4014598540145988e-05, "loss": 1.2252, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.577246544210627, "learning_rate": 1.4306569343065696e-05, "loss": 1.1797, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.5385036325197972, "learning_rate": 1.4598540145985402e-05, "loss": 1.0706, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.5936588934494904, "learning_rate": 1.489051094890511e-05, "loss": 1.1354, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.5968142951430568, "learning_rate": 1.5182481751824818e-05, "loss": 1.1386, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.5492790883712821, "learning_rate": 1.5474452554744528e-05, "loss": 1.1177, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.6938133819094698, "learning_rate": 1.5766423357664234e-05, "loss": 1.178, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.4934152874568871, "learning_rate": 1.6058394160583944e-05, "loss": 1.1386, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.5257065527660285, "learning_rate": 1.635036496350365e-05, "loss": 0.9883, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.3479382206148167, "learning_rate": 1.664233576642336e-05, "loss": 0.9845, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.4843433577527922, "learning_rate": 1.6934306569343066e-05, "loss": 1.034, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.5578311593823568, "learning_rate": 1.7226277372262773e-05, "loss": 1.0024, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.6841397207836093, "learning_rate": 1.7518248175182482e-05, "loss": 1.1098, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.5967023842041272, "learning_rate": 1.7810218978102192e-05, "loss": 1.0532, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.458201329751063, "learning_rate": 1.81021897810219e-05, "loss": 1.0601, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.4896668719717416, "learning_rate": 1.8394160583941608e-05, "loss": 0.9523, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.594477543540855, "learning_rate": 1.8686131386861315e-05, "loss": 1.0128, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.5053984138120584, "learning_rate": 1.897810218978102e-05, "loss": 1.0969, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.4337321118516833, "learning_rate": 1.9270072992700734e-05, "loss": 0.978, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.5514572553484482, "learning_rate": 1.956204379562044e-05, "loss": 1.0519, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.5646238007711162, "learning_rate": 1.9854014598540147e-05, "loss": 1.0664, "step": 68 }, { "epoch": 0.02, "grad_norm": 1.5916594463537463, "learning_rate": 2.0145985401459857e-05, "loss": 1.0785, "step": 69 }, { "epoch": 0.02, "grad_norm": 1.3923766430469164, "learning_rate": 2.0437956204379566e-05, "loss": 0.9919, "step": 70 }, { "epoch": 0.02, "grad_norm": 1.6016619395577614, "learning_rate": 2.0729927007299273e-05, "loss": 1.0618, "step": 71 }, { "epoch": 0.02, "grad_norm": 1.5406397758843127, "learning_rate": 2.1021897810218982e-05, "loss": 1.0485, "step": 72 }, { "epoch": 0.02, "grad_norm": 1.6912492146176519, "learning_rate": 2.131386861313869e-05, "loss": 1.1137, "step": 73 }, { "epoch": 0.02, "grad_norm": 1.4428980274558936, "learning_rate": 2.1605839416058395e-05, "loss": 1.0067, "step": 74 }, { "epoch": 0.02, "grad_norm": 1.4853785284781014, "learning_rate": 2.1897810218978105e-05, "loss": 1.0771, "step": 75 }, { "epoch": 0.02, "grad_norm": 1.5161668261227093, "learning_rate": 2.218978102189781e-05, "loss": 1.0348, "step": 76 }, { "epoch": 0.02, "grad_norm": 1.4922789700419026, "learning_rate": 2.248175182481752e-05, "loss": 1.0129, "step": 77 }, { "epoch": 0.02, "grad_norm": 1.3967521784164405, "learning_rate": 2.2773722627737227e-05, "loss": 0.9375, "step": 78 }, { "epoch": 0.02, "grad_norm": 1.4611514337742868, "learning_rate": 2.3065693430656934e-05, "loss": 1.0035, "step": 79 }, { "epoch": 0.02, "grad_norm": 1.4726560071545307, "learning_rate": 2.3357664233576643e-05, "loss": 0.9118, "step": 80 }, { "epoch": 0.02, "grad_norm": 1.509851608690166, "learning_rate": 2.3649635036496353e-05, "loss": 0.9736, "step": 81 }, { "epoch": 0.02, "grad_norm": 1.4854110314617655, "learning_rate": 2.3941605839416063e-05, "loss": 1.0382, "step": 82 }, { "epoch": 0.02, "grad_norm": 1.4438095939193825, "learning_rate": 2.423357664233577e-05, "loss": 0.9426, "step": 83 }, { "epoch": 0.02, "grad_norm": 1.588853832486513, "learning_rate": 2.452554744525548e-05, "loss": 1.0912, "step": 84 }, { "epoch": 0.02, "grad_norm": 1.4629616000754389, "learning_rate": 2.4817518248175185e-05, "loss": 0.9363, "step": 85 }, { "epoch": 0.02, "grad_norm": 1.5126486586979293, "learning_rate": 2.510948905109489e-05, "loss": 0.8875, "step": 86 }, { "epoch": 0.02, "grad_norm": 1.6442965886013419, "learning_rate": 2.54014598540146e-05, "loss": 1.0676, "step": 87 }, { "epoch": 0.02, "grad_norm": 1.5786077686311497, "learning_rate": 2.5693430656934308e-05, "loss": 1.0544, "step": 88 }, { "epoch": 0.02, "grad_norm": 1.5424329382663844, "learning_rate": 2.5985401459854017e-05, "loss": 1.0603, "step": 89 }, { "epoch": 0.02, "grad_norm": 1.3763163074731595, "learning_rate": 2.6277372262773724e-05, "loss": 0.9341, "step": 90 }, { "epoch": 0.02, "grad_norm": 1.36645217739804, "learning_rate": 2.656934306569343e-05, "loss": 0.9277, "step": 91 }, { "epoch": 0.02, "grad_norm": 1.476560239436927, "learning_rate": 2.686131386861314e-05, "loss": 0.9549, "step": 92 }, { "epoch": 0.02, "grad_norm": 1.651790661601902, "learning_rate": 2.7153284671532846e-05, "loss": 1.0434, "step": 93 }, { "epoch": 0.02, "grad_norm": 1.564711569632748, "learning_rate": 2.744525547445256e-05, "loss": 0.9528, "step": 94 }, { "epoch": 0.02, "grad_norm": 1.3892575516439725, "learning_rate": 2.7737226277372266e-05, "loss": 0.9055, "step": 95 }, { "epoch": 0.02, "grad_norm": 1.47338517258931, "learning_rate": 2.8029197080291975e-05, "loss": 0.9045, "step": 96 }, { "epoch": 0.02, "grad_norm": 1.4283014468117288, "learning_rate": 2.832116788321168e-05, "loss": 0.9418, "step": 97 }, { "epoch": 0.02, "grad_norm": 1.4984556035926158, "learning_rate": 2.861313868613139e-05, "loss": 1.0147, "step": 98 }, { "epoch": 0.02, "grad_norm": 1.5189781457158873, "learning_rate": 2.8905109489051098e-05, "loss": 0.9159, "step": 99 }, { "epoch": 0.02, "grad_norm": 1.4628925807313362, "learning_rate": 2.9197080291970804e-05, "loss": 0.9209, "step": 100 }, { "epoch": 0.02, "grad_norm": 1.5929870275147635, "learning_rate": 2.9489051094890514e-05, "loss": 0.9557, "step": 101 }, { "epoch": 0.02, "grad_norm": 1.4503689132203692, "learning_rate": 2.978102189781022e-05, "loss": 0.8777, "step": 102 }, { "epoch": 0.02, "grad_norm": 1.387439894877902, "learning_rate": 3.007299270072993e-05, "loss": 0.8537, "step": 103 }, { "epoch": 0.02, "grad_norm": 1.4809151733801909, "learning_rate": 3.0364963503649636e-05, "loss": 0.8067, "step": 104 }, { "epoch": 0.02, "grad_norm": 1.4872114687064437, "learning_rate": 3.0656934306569346e-05, "loss": 0.9561, "step": 105 }, { "epoch": 0.02, "grad_norm": 1.3704334201331871, "learning_rate": 3.0948905109489056e-05, "loss": 0.832, "step": 106 }, { "epoch": 0.02, "grad_norm": 1.2970664101409684, "learning_rate": 3.1240875912408765e-05, "loss": 0.8222, "step": 107 }, { "epoch": 0.02, "grad_norm": 1.4920176464295187, "learning_rate": 3.153284671532847e-05, "loss": 0.9368, "step": 108 }, { "epoch": 0.02, "grad_norm": 1.3703280318629414, "learning_rate": 3.182481751824818e-05, "loss": 0.9345, "step": 109 }, { "epoch": 0.02, "grad_norm": 1.3550291640320893, "learning_rate": 3.211678832116789e-05, "loss": 0.8403, "step": 110 }, { "epoch": 0.02, "grad_norm": 1.4486058010850487, "learning_rate": 3.24087591240876e-05, "loss": 0.8456, "step": 111 }, { "epoch": 0.02, "grad_norm": 1.5455373509810715, "learning_rate": 3.27007299270073e-05, "loss": 0.9237, "step": 112 }, { "epoch": 0.02, "grad_norm": 1.562125351812911, "learning_rate": 3.299270072992701e-05, "loss": 0.8496, "step": 113 }, { "epoch": 0.03, "grad_norm": 1.409157185100568, "learning_rate": 3.328467153284672e-05, "loss": 0.8066, "step": 114 }, { "epoch": 0.03, "grad_norm": 1.5594940262435777, "learning_rate": 3.357664233576642e-05, "loss": 0.8461, "step": 115 }, { "epoch": 0.03, "grad_norm": 1.587613727971374, "learning_rate": 3.386861313868613e-05, "loss": 0.8593, "step": 116 }, { "epoch": 0.03, "grad_norm": 1.4364812807164014, "learning_rate": 3.416058394160584e-05, "loss": 0.9084, "step": 117 }, { "epoch": 0.03, "grad_norm": 1.4354830980730897, "learning_rate": 3.4452554744525545e-05, "loss": 0.8356, "step": 118 }, { "epoch": 0.03, "grad_norm": 1.370928545179788, "learning_rate": 3.474452554744526e-05, "loss": 0.8, "step": 119 }, { "epoch": 0.03, "grad_norm": 1.4805453237896125, "learning_rate": 3.5036496350364965e-05, "loss": 0.8707, "step": 120 }, { "epoch": 0.03, "grad_norm": 1.3529579147537623, "learning_rate": 3.5328467153284675e-05, "loss": 0.7764, "step": 121 }, { "epoch": 0.03, "grad_norm": 1.547681039749328, "learning_rate": 3.5620437956204384e-05, "loss": 0.8775, "step": 122 }, { "epoch": 0.03, "grad_norm": 1.5332301823846584, "learning_rate": 3.5912408759124094e-05, "loss": 0.9876, "step": 123 }, { "epoch": 0.03, "grad_norm": 1.512126463485808, "learning_rate": 3.62043795620438e-05, "loss": 0.9528, "step": 124 }, { "epoch": 0.03, "grad_norm": 1.4554233218365884, "learning_rate": 3.649635036496351e-05, "loss": 0.8889, "step": 125 }, { "epoch": 0.03, "grad_norm": 1.3808554901543213, "learning_rate": 3.6788321167883217e-05, "loss": 0.7677, "step": 126 }, { "epoch": 0.03, "grad_norm": 1.4277159223916567, "learning_rate": 3.708029197080292e-05, "loss": 0.7742, "step": 127 }, { "epoch": 0.03, "grad_norm": 1.49818015966614, "learning_rate": 3.737226277372263e-05, "loss": 0.7219, "step": 128 }, { "epoch": 0.03, "grad_norm": 1.6176138053372855, "learning_rate": 3.766423357664234e-05, "loss": 0.9805, "step": 129 }, { "epoch": 0.03, "grad_norm": 1.6421738352692505, "learning_rate": 3.795620437956204e-05, "loss": 0.9051, "step": 130 }, { "epoch": 0.03, "grad_norm": 1.3299414386620134, "learning_rate": 3.824817518248176e-05, "loss": 0.7942, "step": 131 }, { "epoch": 0.03, "grad_norm": 1.390975222259264, "learning_rate": 3.854014598540147e-05, "loss": 0.9009, "step": 132 }, { "epoch": 0.03, "grad_norm": 1.33912799723986, "learning_rate": 3.883211678832117e-05, "loss": 0.8367, "step": 133 }, { "epoch": 0.03, "grad_norm": 1.302621175946519, "learning_rate": 3.912408759124088e-05, "loss": 0.7549, "step": 134 }, { "epoch": 0.03, "grad_norm": 1.405998800941922, "learning_rate": 3.941605839416059e-05, "loss": 0.7584, "step": 135 }, { "epoch": 0.03, "grad_norm": 1.5325238903977623, "learning_rate": 3.9708029197080294e-05, "loss": 0.8071, "step": 136 }, { "epoch": 0.03, "grad_norm": 1.4485039194756526, "learning_rate": 4e-05, "loss": 0.7941, "step": 137 }, { "epoch": 0.03, "grad_norm": 1.4259171930589734, "learning_rate": 3.99999949389387e-05, "loss": 0.8034, "step": 138 }, { "epoch": 0.03, "grad_norm": 1.3034281791915299, "learning_rate": 3.999997975575736e-05, "loss": 0.7876, "step": 139 }, { "epoch": 0.03, "grad_norm": 1.5026318191108297, "learning_rate": 3.9999954450463665e-05, "loss": 0.8762, "step": 140 }, { "epoch": 0.03, "grad_norm": 1.3526883590322558, "learning_rate": 3.9999919023070414e-05, "loss": 0.8062, "step": 141 }, { "epoch": 0.03, "grad_norm": 1.385679946587705, "learning_rate": 3.999987347359555e-05, "loss": 0.7728, "step": 142 }, { "epoch": 0.03, "grad_norm": 1.4683155472393012, "learning_rate": 3.999981780206212e-05, "loss": 0.7596, "step": 143 }, { "epoch": 0.03, "grad_norm": 1.3424429191942802, "learning_rate": 3.99997520084983e-05, "loss": 0.7176, "step": 144 }, { "epoch": 0.03, "grad_norm": 1.4314773279087492, "learning_rate": 3.999967609293739e-05, "loss": 0.9188, "step": 145 }, { "epoch": 0.03, "grad_norm": 1.3056482672181737, "learning_rate": 3.99995900554178e-05, "loss": 0.7416, "step": 146 }, { "epoch": 0.03, "grad_norm": 1.4585949890098764, "learning_rate": 3.999949389598309e-05, "loss": 0.824, "step": 147 }, { "epoch": 0.03, "grad_norm": 1.337799603783892, "learning_rate": 3.999938761468192e-05, "loss": 0.6902, "step": 148 }, { "epoch": 0.03, "grad_norm": 1.5249693382807843, "learning_rate": 3.9999271211568084e-05, "loss": 0.7957, "step": 149 }, { "epoch": 0.03, "grad_norm": 1.5249336134421996, "learning_rate": 3.999914468670048e-05, "loss": 0.7885, "step": 150 }, { "epoch": 0.03, "grad_norm": 1.3788826483849044, "learning_rate": 3.999900804014317e-05, "loss": 0.807, "step": 151 }, { "epoch": 0.03, "grad_norm": 1.408791428424769, "learning_rate": 3.9998861271965285e-05, "loss": 0.7954, "step": 152 }, { "epoch": 0.03, "grad_norm": 1.288136513782112, "learning_rate": 3.999870438224111e-05, "loss": 0.7176, "step": 153 }, { "epoch": 0.03, "grad_norm": 1.3975412917477223, "learning_rate": 3.999853737105007e-05, "loss": 0.8284, "step": 154 }, { "epoch": 0.03, "grad_norm": 1.5646770569437787, "learning_rate": 3.9998360238476655e-05, "loss": 0.8673, "step": 155 }, { "epoch": 0.03, "grad_norm": 1.4448607692497883, "learning_rate": 3.999817298461054e-05, "loss": 0.8099, "step": 156 }, { "epoch": 0.03, "grad_norm": 1.4221062105434186, "learning_rate": 3.999797560954649e-05, "loss": 0.8186, "step": 157 }, { "epoch": 0.03, "grad_norm": 1.3541790105795066, "learning_rate": 3.999776811338439e-05, "loss": 0.7527, "step": 158 }, { "epoch": 0.03, "grad_norm": 1.310569615173891, "learning_rate": 3.999755049622926e-05, "loss": 0.6947, "step": 159 }, { "epoch": 0.04, "grad_norm": 1.2855053459421322, "learning_rate": 3.9997322758191244e-05, "loss": 0.6425, "step": 160 }, { "epoch": 0.04, "grad_norm": 1.456957613232235, "learning_rate": 3.999708489938559e-05, "loss": 0.7138, "step": 161 }, { "epoch": 0.04, "grad_norm": 1.3154442325459081, "learning_rate": 3.999683691993268e-05, "loss": 0.6217, "step": 162 }, { "epoch": 0.04, "grad_norm": 1.3707120967385478, "learning_rate": 3.999657881995802e-05, "loss": 0.734, "step": 163 }, { "epoch": 0.04, "grad_norm": 1.3464468131540774, "learning_rate": 3.9996310599592244e-05, "loss": 0.8018, "step": 164 }, { "epoch": 0.04, "grad_norm": 1.3131685370844959, "learning_rate": 3.9996032258971097e-05, "loss": 0.7634, "step": 165 }, { "epoch": 0.04, "grad_norm": 1.3014680349591794, "learning_rate": 3.9995743798235445e-05, "loss": 0.7904, "step": 166 }, { "epoch": 0.04, "grad_norm": 1.3306836781749012, "learning_rate": 3.999544521753128e-05, "loss": 0.7516, "step": 167 }, { "epoch": 0.04, "grad_norm": 1.3280616745156995, "learning_rate": 3.999513651700971e-05, "loss": 0.7404, "step": 168 }, { "epoch": 0.04, "grad_norm": 1.382294035015746, "learning_rate": 3.999481769682699e-05, "loss": 0.7423, "step": 169 }, { "epoch": 0.04, "grad_norm": 1.289816572248431, "learning_rate": 3.9994488757144454e-05, "loss": 0.6838, "step": 170 }, { "epoch": 0.04, "grad_norm": 1.351114849421247, "learning_rate": 3.999414969812859e-05, "loss": 0.7064, "step": 171 }, { "epoch": 0.04, "grad_norm": 1.112602374906335, "learning_rate": 3.9993800519951e-05, "loss": 0.5968, "step": 172 }, { "epoch": 0.04, "grad_norm": 1.3237891836733933, "learning_rate": 3.99934412227884e-05, "loss": 0.7149, "step": 173 }, { "epoch": 0.04, "grad_norm": 1.3127998282064572, "learning_rate": 3.999307180682264e-05, "loss": 0.6785, "step": 174 }, { "epoch": 0.04, "grad_norm": 1.3832342075926638, "learning_rate": 3.9992692272240684e-05, "loss": 0.681, "step": 175 }, { "epoch": 0.04, "grad_norm": 1.2908340765054303, "learning_rate": 3.99923026192346e-05, "loss": 0.6676, "step": 176 }, { "epoch": 0.04, "grad_norm": 1.3544599313339964, "learning_rate": 3.999190284800162e-05, "loss": 0.7121, "step": 177 }, { "epoch": 0.04, "grad_norm": 1.2556712245455064, "learning_rate": 3.9991492958744046e-05, "loss": 0.7016, "step": 178 }, { "epoch": 0.04, "grad_norm": 1.3426939784728575, "learning_rate": 3.9991072951669334e-05, "loss": 0.7133, "step": 179 }, { "epoch": 0.04, "grad_norm": 1.2358437986095847, "learning_rate": 3.999064282699006e-05, "loss": 0.6931, "step": 180 }, { "epoch": 0.04, "grad_norm": 1.4329296933567661, "learning_rate": 3.999020258492391e-05, "loss": 0.7873, "step": 181 }, { "epoch": 0.04, "grad_norm": 1.3413303680374504, "learning_rate": 3.998975222569368e-05, "loss": 0.668, "step": 182 }, { "epoch": 0.04, "grad_norm": 1.3699727488682398, "learning_rate": 3.9989291749527314e-05, "loss": 0.744, "step": 183 }, { "epoch": 0.04, "grad_norm": 1.2672579556409398, "learning_rate": 3.998882115665786e-05, "loss": 0.7541, "step": 184 }, { "epoch": 0.04, "grad_norm": 1.1930919291337678, "learning_rate": 3.998834044732348e-05, "loss": 0.6611, "step": 185 }, { "epoch": 0.04, "grad_norm": 1.3237952171145917, "learning_rate": 3.9987849621767473e-05, "loss": 0.6954, "step": 186 }, { "epoch": 0.04, "grad_norm": 1.3491031122269155, "learning_rate": 3.998734868023825e-05, "loss": 0.6532, "step": 187 }, { "epoch": 0.04, "grad_norm": 1.2474273911255802, "learning_rate": 3.998683762298933e-05, "loss": 0.614, "step": 188 }, { "epoch": 0.04, "grad_norm": 1.4339242497046318, "learning_rate": 3.9986316450279365e-05, "loss": 0.7592, "step": 189 }, { "epoch": 0.04, "grad_norm": 1.3155313182105586, "learning_rate": 3.9985785162372135e-05, "loss": 0.6463, "step": 190 }, { "epoch": 0.04, "grad_norm": 1.2720545649011348, "learning_rate": 3.998524375953651e-05, "loss": 0.6925, "step": 191 }, { "epoch": 0.04, "grad_norm": 1.3250032424887084, "learning_rate": 3.998469224204652e-05, "loss": 0.6736, "step": 192 }, { "epoch": 0.04, "grad_norm": 1.3797040149914255, "learning_rate": 3.998413061018126e-05, "loss": 0.7955, "step": 193 }, { "epoch": 0.04, "grad_norm": 1.2805602845669852, "learning_rate": 3.9983558864225005e-05, "loss": 0.7063, "step": 194 }, { "epoch": 0.04, "grad_norm": 1.4422115567764362, "learning_rate": 3.9982977004467106e-05, "loss": 0.6954, "step": 195 }, { "epoch": 0.04, "grad_norm": 1.207992023189052, "learning_rate": 3.998238503120205e-05, "loss": 0.6594, "step": 196 }, { "epoch": 0.04, "grad_norm": 1.363506484348838, "learning_rate": 3.998178294472944e-05, "loss": 0.6448, "step": 197 }, { "epoch": 0.04, "grad_norm": 1.322121278442904, "learning_rate": 3.998117074535398e-05, "loss": 0.6708, "step": 198 }, { "epoch": 0.04, "grad_norm": 1.349313308768587, "learning_rate": 3.9980548433385525e-05, "loss": 0.6601, "step": 199 }, { "epoch": 0.04, "grad_norm": 1.3259708662327934, "learning_rate": 3.997991600913903e-05, "loss": 0.6067, "step": 200 }, { "epoch": 0.04, "grad_norm": 1.2868747989456146, "learning_rate": 3.9979273472934556e-05, "loss": 0.6959, "step": 201 }, { "epoch": 0.04, "grad_norm": 1.2581457798279778, "learning_rate": 3.9978620825097306e-05, "loss": 0.6414, "step": 202 }, { "epoch": 0.04, "grad_norm": 1.2659091865878211, "learning_rate": 3.997795806595758e-05, "loss": 0.6121, "step": 203 }, { "epoch": 0.04, "grad_norm": 1.2788722607588214, "learning_rate": 3.9977285195850816e-05, "loss": 0.7051, "step": 204 }, { "epoch": 0.05, "grad_norm": 1.268616331979891, "learning_rate": 3.9976602215117554e-05, "loss": 0.5972, "step": 205 }, { "epoch": 0.05, "grad_norm": 1.2646466465057298, "learning_rate": 3.997590912410345e-05, "loss": 0.5288, "step": 206 }, { "epoch": 0.05, "grad_norm": 1.4566461703664726, "learning_rate": 3.997520592315929e-05, "loss": 0.706, "step": 207 }, { "epoch": 0.05, "grad_norm": 1.298420685844027, "learning_rate": 3.997449261264095e-05, "loss": 0.673, "step": 208 }, { "epoch": 0.05, "grad_norm": 1.2174582849832838, "learning_rate": 3.997376919290946e-05, "loss": 0.5606, "step": 209 }, { "epoch": 0.05, "grad_norm": 1.164802930233094, "learning_rate": 3.997303566433094e-05, "loss": 0.5624, "step": 210 }, { "epoch": 0.05, "grad_norm": 1.1716197435217768, "learning_rate": 3.997229202727663e-05, "loss": 0.5921, "step": 211 }, { "epoch": 0.05, "grad_norm": 1.2121105296122996, "learning_rate": 3.99715382821229e-05, "loss": 0.6432, "step": 212 }, { "epoch": 0.05, "grad_norm": 1.2524098531752716, "learning_rate": 3.997077442925122e-05, "loss": 0.6192, "step": 213 }, { "epoch": 0.05, "grad_norm": 1.1942955627853422, "learning_rate": 3.997000046904817e-05, "loss": 0.6344, "step": 214 }, { "epoch": 0.05, "grad_norm": 1.2520875665187947, "learning_rate": 3.996921640190547e-05, "loss": 0.6857, "step": 215 }, { "epoch": 0.05, "grad_norm": 1.177316721281303, "learning_rate": 3.996842222821994e-05, "loss": 0.5736, "step": 216 }, { "epoch": 0.05, "grad_norm": 1.224012311462119, "learning_rate": 3.9967617948393504e-05, "loss": 0.6102, "step": 217 }, { "epoch": 0.05, "grad_norm": 1.2894347607016468, "learning_rate": 3.996680356283322e-05, "loss": 0.6487, "step": 218 }, { "epoch": 0.05, "grad_norm": 1.3895495252866978, "learning_rate": 3.996597907195126e-05, "loss": 0.661, "step": 219 }, { "epoch": 0.05, "grad_norm": 1.32050710293565, "learning_rate": 3.996514447616489e-05, "loss": 0.6459, "step": 220 }, { "epoch": 0.05, "grad_norm": 1.180191235644313, "learning_rate": 3.996429977589653e-05, "loss": 0.6245, "step": 221 }, { "epoch": 0.05, "grad_norm": 1.249228191518979, "learning_rate": 3.9963444971573656e-05, "loss": 0.6258, "step": 222 }, { "epoch": 0.05, "grad_norm": 1.2439700598103596, "learning_rate": 3.996258006362891e-05, "loss": 0.6282, "step": 223 }, { "epoch": 0.05, "grad_norm": 1.262995307376024, "learning_rate": 3.996170505250002e-05, "loss": 0.5968, "step": 224 }, { "epoch": 0.05, "grad_norm": 1.2049038110227466, "learning_rate": 3.9960819938629834e-05, "loss": 0.6372, "step": 225 }, { "epoch": 0.05, "grad_norm": 1.1953981655564385, "learning_rate": 3.995992472246632e-05, "loss": 0.5788, "step": 226 }, { "epoch": 0.05, "grad_norm": 1.1900866094695106, "learning_rate": 3.995901940446254e-05, "loss": 0.571, "step": 227 }, { "epoch": 0.05, "grad_norm": 1.2301740989906733, "learning_rate": 3.995810398507669e-05, "loss": 0.5917, "step": 228 }, { "epoch": 0.05, "grad_norm": 1.2523850099236096, "learning_rate": 3.995717846477207e-05, "loss": 0.5853, "step": 229 }, { "epoch": 0.05, "grad_norm": 1.2378971695538876, "learning_rate": 3.9956242844017094e-05, "loss": 0.5738, "step": 230 }, { "epoch": 0.05, "grad_norm": 1.1288816455944937, "learning_rate": 3.995529712328528e-05, "loss": 0.5542, "step": 231 }, { "epoch": 0.05, "grad_norm": 1.1148924829622244, "learning_rate": 3.995434130305526e-05, "loss": 0.5093, "step": 232 }, { "epoch": 0.05, "grad_norm": 1.2799793548707197, "learning_rate": 3.995337538381079e-05, "loss": 0.5565, "step": 233 }, { "epoch": 0.05, "grad_norm": 1.231420239912531, "learning_rate": 3.995239936604072e-05, "loss": 0.5714, "step": 234 }, { "epoch": 0.05, "grad_norm": 1.3992004870442785, "learning_rate": 3.995141325023902e-05, "loss": 0.6154, "step": 235 }, { "epoch": 0.05, "grad_norm": 1.1949779565924818, "learning_rate": 3.995041703690477e-05, "loss": 0.5143, "step": 236 }, { "epoch": 0.05, "grad_norm": 1.198730767088812, "learning_rate": 3.994941072654215e-05, "loss": 0.4691, "step": 237 }, { "epoch": 0.05, "grad_norm": 1.2731486267833902, "learning_rate": 3.9948394319660485e-05, "loss": 0.5808, "step": 238 }, { "epoch": 0.05, "grad_norm": 1.3653911887989123, "learning_rate": 3.994736781677416e-05, "loss": 0.65, "step": 239 }, { "epoch": 0.05, "grad_norm": 1.3105154929120355, "learning_rate": 3.994633121840271e-05, "loss": 0.6434, "step": 240 }, { "epoch": 0.05, "grad_norm": 1.2381977811292837, "learning_rate": 3.994528452507076e-05, "loss": 0.5829, "step": 241 }, { "epoch": 0.05, "grad_norm": 1.1402137746645646, "learning_rate": 3.994422773730803e-05, "loss": 0.509, "step": 242 }, { "epoch": 0.05, "grad_norm": 1.208429458784759, "learning_rate": 3.99431608556494e-05, "loss": 0.6454, "step": 243 }, { "epoch": 0.05, "grad_norm": 1.1542773043398307, "learning_rate": 3.99420838806348e-05, "loss": 0.5745, "step": 244 }, { "epoch": 0.05, "grad_norm": 1.0919978139422268, "learning_rate": 3.99409968128093e-05, "loss": 0.4986, "step": 245 }, { "epoch": 0.05, "grad_norm": 1.113345977507724, "learning_rate": 3.993989965272308e-05, "loss": 0.5712, "step": 246 }, { "epoch": 0.05, "grad_norm": 1.3593819278233579, "learning_rate": 3.99387924009314e-05, "loss": 0.6304, "step": 247 }, { "epoch": 0.05, "grad_norm": 1.1946094913942888, "learning_rate": 3.9937675057994666e-05, "loss": 0.5803, "step": 248 }, { "epoch": 0.05, "grad_norm": 1.1889538648909175, "learning_rate": 3.993654762447837e-05, "loss": 0.5862, "step": 249 }, { "epoch": 0.05, "grad_norm": 1.0987200486198738, "learning_rate": 3.9935410100953105e-05, "loss": 0.4049, "step": 250 }, { "epoch": 0.06, "grad_norm": 1.2101428962694776, "learning_rate": 3.993426248799458e-05, "loss": 0.57, "step": 251 }, { "epoch": 0.06, "grad_norm": 1.234140639450306, "learning_rate": 3.993310478618361e-05, "loss": 0.5653, "step": 252 }, { "epoch": 0.06, "grad_norm": 1.2708140773043917, "learning_rate": 3.993193699610612e-05, "loss": 0.588, "step": 253 }, { "epoch": 0.06, "grad_norm": 1.2785513299746012, "learning_rate": 3.9930759118353124e-05, "loss": 0.6005, "step": 254 }, { "epoch": 0.06, "grad_norm": 1.4274452845681305, "learning_rate": 3.992957115352077e-05, "loss": 0.6807, "step": 255 }, { "epoch": 0.06, "grad_norm": 1.2004749788586693, "learning_rate": 3.992837310221028e-05, "loss": 0.587, "step": 256 }, { "epoch": 0.06, "grad_norm": 1.1666004752501002, "learning_rate": 3.9927164965028006e-05, "loss": 0.4766, "step": 257 }, { "epoch": 0.06, "grad_norm": 1.18853834833233, "learning_rate": 3.9925946742585385e-05, "loss": 0.5422, "step": 258 }, { "epoch": 0.06, "grad_norm": 1.2094667424537495, "learning_rate": 3.9924718435498964e-05, "loss": 0.559, "step": 259 }, { "epoch": 0.06, "grad_norm": 1.2747637155785871, "learning_rate": 3.9923480044390405e-05, "loss": 0.5448, "step": 260 }, { "epoch": 0.06, "grad_norm": 1.1455729564410346, "learning_rate": 3.9922231569886464e-05, "loss": 0.4595, "step": 261 }, { "epoch": 0.06, "grad_norm": 1.2280623868943858, "learning_rate": 3.9920973012619e-05, "loss": 0.4862, "step": 262 }, { "epoch": 0.06, "grad_norm": 1.0514020822946684, "learning_rate": 3.9919704373224984e-05, "loss": 0.4702, "step": 263 }, { "epoch": 0.06, "grad_norm": 1.1276534365607926, "learning_rate": 3.991842565234647e-05, "loss": 0.5056, "step": 264 }, { "epoch": 0.06, "grad_norm": 1.1465460843532795, "learning_rate": 3.991713685063063e-05, "loss": 0.4813, "step": 265 }, { "epoch": 0.06, "grad_norm": 1.0890863993360724, "learning_rate": 3.991583796872974e-05, "loss": 0.5131, "step": 266 }, { "epoch": 0.06, "grad_norm": 1.291958832256156, "learning_rate": 3.991452900730116e-05, "loss": 0.5547, "step": 267 }, { "epoch": 0.06, "grad_norm": 1.1784316698414166, "learning_rate": 3.991320996700737e-05, "loss": 0.4902, "step": 268 }, { "epoch": 0.06, "grad_norm": 1.1724553006564273, "learning_rate": 3.991188084851596e-05, "loss": 0.4878, "step": 269 }, { "epoch": 0.06, "grad_norm": 1.2264566558071297, "learning_rate": 3.991054165249958e-05, "loss": 0.5361, "step": 270 }, { "epoch": 0.06, "grad_norm": 1.204194868916881, "learning_rate": 3.990919237963602e-05, "loss": 0.4693, "step": 271 }, { "epoch": 0.06, "grad_norm": 1.165114880738373, "learning_rate": 3.9907833030608153e-05, "loss": 0.4728, "step": 272 }, { "epoch": 0.06, "grad_norm": 1.2662323801183593, "learning_rate": 3.990646360610395e-05, "loss": 0.5497, "step": 273 }, { "epoch": 0.06, "grad_norm": 1.3041430039539232, "learning_rate": 3.9905084106816494e-05, "loss": 0.5175, "step": 274 }, { "epoch": 0.06, "grad_norm": 1.2265639578452223, "learning_rate": 3.990369453344394e-05, "loss": 0.5045, "step": 275 }, { "epoch": 0.06, "grad_norm": 1.100467868486642, "learning_rate": 3.9902294886689576e-05, "loss": 0.5279, "step": 276 }, { "epoch": 0.06, "grad_norm": 1.2000185011391165, "learning_rate": 3.990088516726177e-05, "loss": 0.508, "step": 277 }, { "epoch": 0.06, "grad_norm": 1.2701732714039602, "learning_rate": 3.9899465375873985e-05, "loss": 0.5277, "step": 278 }, { "epoch": 0.06, "grad_norm": 1.3908324783365231, "learning_rate": 3.989803551324479e-05, "loss": 0.6504, "step": 279 }, { "epoch": 0.06, "grad_norm": 1.2523779661533918, "learning_rate": 3.989659558009784e-05, "loss": 0.5366, "step": 280 }, { "epoch": 0.06, "grad_norm": 1.122515159453593, "learning_rate": 3.98951455771619e-05, "loss": 0.4976, "step": 281 }, { "epoch": 0.06, "grad_norm": 1.2132206711933338, "learning_rate": 3.989368550517083e-05, "loss": 0.5127, "step": 282 }, { "epoch": 0.06, "grad_norm": 1.0510171391498828, "learning_rate": 3.989221536486357e-05, "loss": 0.3852, "step": 283 }, { "epoch": 0.06, "grad_norm": 1.084079497691504, "learning_rate": 3.989073515698417e-05, "loss": 0.4238, "step": 284 }, { "epoch": 0.06, "grad_norm": 1.2574323944666181, "learning_rate": 3.988924488228178e-05, "loss": 0.4225, "step": 285 }, { "epoch": 0.06, "grad_norm": 1.2767442702237093, "learning_rate": 3.988774454151063e-05, "loss": 0.4731, "step": 286 }, { "epoch": 0.06, "grad_norm": 1.1875667553258227, "learning_rate": 3.988623413543006e-05, "loss": 0.4941, "step": 287 }, { "epoch": 0.06, "grad_norm": 1.1722931179545868, "learning_rate": 3.9884713664804485e-05, "loss": 0.5455, "step": 288 }, { "epoch": 0.06, "grad_norm": 1.1571276658690817, "learning_rate": 3.9883183130403424e-05, "loss": 0.5155, "step": 289 }, { "epoch": 0.06, "grad_norm": 1.273868698414468, "learning_rate": 3.98816425330015e-05, "loss": 0.6212, "step": 290 }, { "epoch": 0.06, "grad_norm": 1.1349200407927071, "learning_rate": 3.9880091873378416e-05, "loss": 0.5138, "step": 291 }, { "epoch": 0.06, "grad_norm": 1.2036075862726578, "learning_rate": 3.9878531152318966e-05, "loss": 0.4458, "step": 292 }, { "epoch": 0.06, "grad_norm": 1.2523328946048258, "learning_rate": 3.987696037061304e-05, "loss": 0.5277, "step": 293 }, { "epoch": 0.06, "grad_norm": 1.0786026711932888, "learning_rate": 3.9875379529055624e-05, "loss": 0.4084, "step": 294 }, { "epoch": 0.06, "grad_norm": 1.3213455402401926, "learning_rate": 3.987378862844679e-05, "loss": 0.5018, "step": 295 }, { "epoch": 0.07, "grad_norm": 1.3459430807302186, "learning_rate": 3.987218766959171e-05, "loss": 0.5742, "step": 296 }, { "epoch": 0.07, "grad_norm": 1.16850411383607, "learning_rate": 3.987057665330063e-05, "loss": 0.5138, "step": 297 }, { "epoch": 0.07, "grad_norm": 1.1359638447179328, "learning_rate": 3.986895558038889e-05, "loss": 0.4713, "step": 298 }, { "epoch": 0.07, "grad_norm": 1.2118807164844807, "learning_rate": 3.986732445167694e-05, "loss": 0.4895, "step": 299 }, { "epoch": 0.07, "grad_norm": 1.1783109806499985, "learning_rate": 3.9865683267990295e-05, "loss": 0.4614, "step": 300 }, { "epoch": 0.07, "grad_norm": 1.2716055957300278, "learning_rate": 3.986403203015957e-05, "loss": 0.4847, "step": 301 }, { "epoch": 0.07, "grad_norm": 1.293195952307599, "learning_rate": 3.9862370739020455e-05, "loss": 0.5353, "step": 302 }, { "epoch": 0.07, "grad_norm": 1.2657770607105185, "learning_rate": 3.9860699395413764e-05, "loss": 0.499, "step": 303 }, { "epoch": 0.07, "grad_norm": 1.0391015640602386, "learning_rate": 3.985901800018535e-05, "loss": 0.4169, "step": 304 }, { "epoch": 0.07, "grad_norm": 1.215289852748404, "learning_rate": 3.98573265541862e-05, "loss": 0.5027, "step": 305 }, { "epoch": 0.07, "grad_norm": 1.1711716130426661, "learning_rate": 3.985562505827235e-05, "loss": 0.4303, "step": 306 }, { "epoch": 0.07, "grad_norm": 1.1602282774687112, "learning_rate": 3.985391351330494e-05, "loss": 0.4306, "step": 307 }, { "epoch": 0.07, "grad_norm": 1.0702587587550187, "learning_rate": 3.985219192015019e-05, "loss": 0.4604, "step": 308 }, { "epoch": 0.07, "grad_norm": 1.163744991959594, "learning_rate": 3.985046027967943e-05, "loss": 0.5091, "step": 309 }, { "epoch": 0.07, "grad_norm": 1.0533315348509231, "learning_rate": 3.984871859276902e-05, "loss": 0.4756, "step": 310 }, { "epoch": 0.07, "grad_norm": 1.164256649339924, "learning_rate": 3.984696686030046e-05, "loss": 0.4769, "step": 311 }, { "epoch": 0.07, "grad_norm": 1.099305458146492, "learning_rate": 3.9845205083160315e-05, "loss": 0.441, "step": 312 }, { "epoch": 0.07, "grad_norm": 1.0560989542749786, "learning_rate": 3.984343326224022e-05, "loss": 0.4744, "step": 313 }, { "epoch": 0.07, "grad_norm": 1.2728584236684755, "learning_rate": 3.9841651398436907e-05, "loss": 0.5036, "step": 314 }, { "epoch": 0.07, "grad_norm": 1.092996828428869, "learning_rate": 3.983985949265219e-05, "loss": 0.4692, "step": 315 }, { "epoch": 0.07, "grad_norm": 1.0802089067026577, "learning_rate": 3.983805754579297e-05, "loss": 0.3904, "step": 316 }, { "epoch": 0.07, "grad_norm": 1.1006761055323477, "learning_rate": 3.98362455587712e-05, "loss": 0.4057, "step": 317 }, { "epoch": 0.07, "grad_norm": 1.109374892543734, "learning_rate": 3.9834423532503975e-05, "loss": 0.4865, "step": 318 }, { "epoch": 0.07, "grad_norm": 1.0895982144411165, "learning_rate": 3.9832591467913405e-05, "loss": 0.4018, "step": 319 }, { "epoch": 0.07, "grad_norm": 1.1037257711790474, "learning_rate": 3.9830749365926716e-05, "loss": 0.4063, "step": 320 }, { "epoch": 0.07, "grad_norm": 1.137745763617826, "learning_rate": 3.982889722747621e-05, "loss": 0.4296, "step": 321 }, { "epoch": 0.07, "grad_norm": 1.0638032101547767, "learning_rate": 3.9827035053499264e-05, "loss": 0.4478, "step": 322 }, { "epoch": 0.07, "grad_norm": 1.1201766246933287, "learning_rate": 3.982516284493834e-05, "loss": 0.4129, "step": 323 }, { "epoch": 0.07, "grad_norm": 1.0943915529159343, "learning_rate": 3.982328060274097e-05, "loss": 0.4141, "step": 324 }, { "epoch": 0.07, "grad_norm": 1.0233495543454487, "learning_rate": 3.982138832785976e-05, "loss": 0.4061, "step": 325 }, { "epoch": 0.07, "grad_norm": 1.2608735646201592, "learning_rate": 3.981948602125242e-05, "loss": 0.4925, "step": 326 }, { "epoch": 0.07, "grad_norm": 1.1493183831878495, "learning_rate": 3.98175736838817e-05, "loss": 0.465, "step": 327 }, { "epoch": 0.07, "grad_norm": 1.1636103321511055, "learning_rate": 3.981565131671546e-05, "loss": 0.5018, "step": 328 }, { "epoch": 0.07, "grad_norm": 1.0613981592394197, "learning_rate": 3.981371892072661e-05, "loss": 0.4161, "step": 329 }, { "epoch": 0.07, "grad_norm": 1.1859586148399008, "learning_rate": 3.981177649689317e-05, "loss": 0.4475, "step": 330 }, { "epoch": 0.07, "grad_norm": 1.0116317888392918, "learning_rate": 3.980982404619819e-05, "loss": 0.42, "step": 331 }, { "epoch": 0.07, "grad_norm": 1.1123049018464455, "learning_rate": 3.9807861569629815e-05, "loss": 0.4625, "step": 332 }, { "epoch": 0.07, "grad_norm": 1.0650799226994028, "learning_rate": 3.980588906818129e-05, "loss": 0.3434, "step": 333 }, { "epoch": 0.07, "grad_norm": 1.1856989001065437, "learning_rate": 3.980390654285088e-05, "loss": 0.4546, "step": 334 }, { "epoch": 0.07, "grad_norm": 1.1165114271140357, "learning_rate": 3.980191399464198e-05, "loss": 0.4023, "step": 335 }, { "epoch": 0.07, "grad_norm": 1.1062497456868197, "learning_rate": 3.979991142456302e-05, "loss": 0.3757, "step": 336 }, { "epoch": 0.07, "grad_norm": 1.1251817132637552, "learning_rate": 3.9797898833627514e-05, "loss": 0.4204, "step": 337 }, { "epoch": 0.07, "grad_norm": 1.076857499009745, "learning_rate": 3.979587622285404e-05, "loss": 0.3577, "step": 338 }, { "epoch": 0.07, "grad_norm": 1.149526689402929, "learning_rate": 3.979384359326626e-05, "loss": 0.5064, "step": 339 }, { "epoch": 0.07, "grad_norm": 1.0409359785175913, "learning_rate": 3.97918009458929e-05, "loss": 0.3905, "step": 340 }, { "epoch": 0.07, "grad_norm": 1.0025633502264812, "learning_rate": 3.9789748281767754e-05, "loss": 0.3792, "step": 341 }, { "epoch": 0.08, "grad_norm": 1.1626214445933571, "learning_rate": 3.978768560192969e-05, "loss": 0.4412, "step": 342 }, { "epoch": 0.08, "grad_norm": 1.2902301008688999, "learning_rate": 3.978561290742265e-05, "loss": 0.4021, "step": 343 }, { "epoch": 0.08, "grad_norm": 1.1143634555494666, "learning_rate": 3.978353019929562e-05, "loss": 0.4048, "step": 344 }, { "epoch": 0.08, "grad_norm": 1.195219148468686, "learning_rate": 3.978143747860269e-05, "loss": 0.4526, "step": 345 }, { "epoch": 0.08, "grad_norm": 1.0725898738809163, "learning_rate": 3.977933474640298e-05, "loss": 0.4468, "step": 346 }, { "epoch": 0.08, "grad_norm": 1.1025137615426255, "learning_rate": 3.9777222003760714e-05, "loss": 0.4239, "step": 347 }, { "epoch": 0.08, "grad_norm": 1.0611781143806014, "learning_rate": 3.977509925174515e-05, "loss": 0.4622, "step": 348 }, { "epoch": 0.08, "grad_norm": 1.1455237866366754, "learning_rate": 3.977296649143064e-05, "loss": 0.4485, "step": 349 }, { "epoch": 0.08, "grad_norm": 1.0029408603777608, "learning_rate": 3.9770823723896574e-05, "loss": 0.3871, "step": 350 }, { "epoch": 0.08, "grad_norm": 1.0213749026395467, "learning_rate": 3.976867095022742e-05, "loss": 0.4471, "step": 351 }, { "epoch": 0.08, "grad_norm": 1.148935430626447, "learning_rate": 3.9766508171512715e-05, "loss": 0.4755, "step": 352 }, { "epoch": 0.08, "grad_norm": 1.0381439853059833, "learning_rate": 3.976433538884706e-05, "loss": 0.376, "step": 353 }, { "epoch": 0.08, "grad_norm": 1.2431017791896115, "learning_rate": 3.97621526033301e-05, "loss": 0.478, "step": 354 }, { "epoch": 0.08, "grad_norm": 1.1850241399471089, "learning_rate": 3.9759959816066575e-05, "loss": 0.4424, "step": 355 }, { "epoch": 0.08, "grad_norm": 1.0482157192652117, "learning_rate": 3.975775702816625e-05, "loss": 0.3953, "step": 356 }, { "epoch": 0.08, "grad_norm": 1.173974001130304, "learning_rate": 3.975554424074397e-05, "loss": 0.4347, "step": 357 }, { "epoch": 0.08, "grad_norm": 1.1813716361579474, "learning_rate": 3.975332145491965e-05, "loss": 0.4359, "step": 358 }, { "epoch": 0.08, "grad_norm": 1.1235249173635753, "learning_rate": 3.975108867181826e-05, "loss": 0.3873, "step": 359 }, { "epoch": 0.08, "grad_norm": 1.1060145348426775, "learning_rate": 3.974884589256981e-05, "loss": 0.3915, "step": 360 }, { "epoch": 0.08, "grad_norm": 1.2229112353142, "learning_rate": 3.97465931183094e-05, "loss": 0.4774, "step": 361 }, { "epoch": 0.08, "grad_norm": 1.0023051158568568, "learning_rate": 3.9744330350177156e-05, "loss": 0.3639, "step": 362 }, { "epoch": 0.08, "grad_norm": 1.1390226422572052, "learning_rate": 3.974205758931828e-05, "loss": 0.3841, "step": 363 }, { "epoch": 0.08, "grad_norm": 1.079691315794632, "learning_rate": 3.973977483688305e-05, "loss": 0.3615, "step": 364 }, { "epoch": 0.08, "grad_norm": 1.1729070569328504, "learning_rate": 3.9737482094026764e-05, "loss": 0.3758, "step": 365 }, { "epoch": 0.08, "grad_norm": 1.250347184603744, "learning_rate": 3.9735179361909803e-05, "loss": 0.4381, "step": 366 }, { "epoch": 0.08, "grad_norm": 1.0533736911619336, "learning_rate": 3.9732866641697586e-05, "loss": 0.3279, "step": 367 }, { "epoch": 0.08, "grad_norm": 1.454656798533515, "learning_rate": 3.9730543934560595e-05, "loss": 0.3942, "step": 368 }, { "epoch": 0.08, "grad_norm": 1.079306576961687, "learning_rate": 3.9728211241674363e-05, "loss": 0.3655, "step": 369 }, { "epoch": 0.08, "grad_norm": 1.1195722616367776, "learning_rate": 3.972586856421949e-05, "loss": 0.4542, "step": 370 }, { "epoch": 0.08, "grad_norm": 1.1305671274736946, "learning_rate": 3.9723515903381625e-05, "loss": 0.4273, "step": 371 }, { "epoch": 0.08, "grad_norm": 1.050735189002656, "learning_rate": 3.9721153260351446e-05, "loss": 0.3854, "step": 372 }, { "epoch": 0.08, "grad_norm": 1.0818517653131625, "learning_rate": 3.971878063632471e-05, "loss": 0.395, "step": 373 }, { "epoch": 0.08, "grad_norm": 1.0001708123235014, "learning_rate": 3.971639803250221e-05, "loss": 0.4188, "step": 374 }, { "epoch": 0.08, "grad_norm": 1.0316954142109276, "learning_rate": 3.9714005450089815e-05, "loss": 0.3553, "step": 375 }, { "epoch": 0.08, "grad_norm": 1.0061576919391308, "learning_rate": 3.971160289029841e-05, "loss": 0.3474, "step": 376 }, { "epoch": 0.08, "grad_norm": 1.07023146942143, "learning_rate": 3.9709190354343936e-05, "loss": 0.382, "step": 377 }, { "epoch": 0.08, "grad_norm": 0.9957014321050548, "learning_rate": 3.9706767843447417e-05, "loss": 0.3575, "step": 378 }, { "epoch": 0.08, "grad_norm": 0.9866634592012732, "learning_rate": 3.970433535883489e-05, "loss": 0.3528, "step": 379 }, { "epoch": 0.08, "grad_norm": 1.2674516282642205, "learning_rate": 3.970189290173744e-05, "loss": 0.4578, "step": 380 }, { "epoch": 0.08, "grad_norm": 1.0813481346976104, "learning_rate": 3.969944047339122e-05, "loss": 0.3588, "step": 381 }, { "epoch": 0.08, "grad_norm": 1.035604185679093, "learning_rate": 3.969697807503742e-05, "loss": 0.3632, "step": 382 }, { "epoch": 0.08, "grad_norm": 1.0593263688079588, "learning_rate": 3.969450570792227e-05, "loss": 0.3982, "step": 383 }, { "epoch": 0.08, "grad_norm": 1.1305488858558645, "learning_rate": 3.969202337329705e-05, "loss": 0.4326, "step": 384 }, { "epoch": 0.08, "grad_norm": 0.9800345078543079, "learning_rate": 3.968953107241809e-05, "loss": 0.3461, "step": 385 }, { "epoch": 0.08, "grad_norm": 1.1507518570325785, "learning_rate": 3.9687028806546756e-05, "loss": 0.4266, "step": 386 }, { "epoch": 0.08, "grad_norm": 1.160973771402753, "learning_rate": 3.968451657694946e-05, "loss": 0.3961, "step": 387 }, { "epoch": 0.09, "grad_norm": 1.0522505120629397, "learning_rate": 3.9681994384897654e-05, "loss": 0.3832, "step": 388 }, { "epoch": 0.09, "grad_norm": 1.0262066600350739, "learning_rate": 3.967946223166784e-05, "loss": 0.3564, "step": 389 }, { "epoch": 0.09, "grad_norm": 1.1021390447735178, "learning_rate": 3.967692011854155e-05, "loss": 0.3057, "step": 390 }, { "epoch": 0.09, "grad_norm": 1.1459370595048486, "learning_rate": 3.967436804680537e-05, "loss": 0.406, "step": 391 }, { "epoch": 0.09, "grad_norm": 1.1801973971437896, "learning_rate": 3.9671806017750915e-05, "loss": 0.4478, "step": 392 }, { "epoch": 0.09, "grad_norm": 1.142012235916646, "learning_rate": 3.966923403267485e-05, "loss": 0.3831, "step": 393 }, { "epoch": 0.09, "grad_norm": 1.0224250852753491, "learning_rate": 3.9666652092878856e-05, "loss": 0.3376, "step": 394 }, { "epoch": 0.09, "grad_norm": 1.0248021935123073, "learning_rate": 3.966406019966968e-05, "loss": 0.3892, "step": 395 }, { "epoch": 0.09, "grad_norm": 1.0170305356568081, "learning_rate": 3.9661458354359105e-05, "loss": 0.3874, "step": 396 }, { "epoch": 0.09, "grad_norm": 1.1045133957989504, "learning_rate": 3.9658846558263925e-05, "loss": 0.4759, "step": 397 }, { "epoch": 0.09, "grad_norm": 1.0138047559369607, "learning_rate": 3.965622481270599e-05, "loss": 0.3802, "step": 398 }, { "epoch": 0.09, "grad_norm": 0.9892519680502024, "learning_rate": 3.9653593119012185e-05, "loss": 0.3557, "step": 399 }, { "epoch": 0.09, "grad_norm": 1.0388442075496735, "learning_rate": 3.965095147851442e-05, "loss": 0.4092, "step": 400 }, { "epoch": 0.09, "grad_norm": 1.0368870498665264, "learning_rate": 3.9648299892549654e-05, "loss": 0.3768, "step": 401 }, { "epoch": 0.09, "grad_norm": 1.0832108220495158, "learning_rate": 3.964563836245987e-05, "loss": 0.359, "step": 402 }, { "epoch": 0.09, "grad_norm": 1.03824313562385, "learning_rate": 3.964296688959208e-05, "loss": 0.4537, "step": 403 }, { "epoch": 0.09, "grad_norm": 1.054298611752923, "learning_rate": 3.964028547529832e-05, "loss": 0.3528, "step": 404 }, { "epoch": 0.09, "grad_norm": 0.9577731934987448, "learning_rate": 3.9637594120935697e-05, "loss": 0.366, "step": 405 }, { "epoch": 0.09, "grad_norm": 0.9435210790316217, "learning_rate": 3.9634892827866306e-05, "loss": 0.3064, "step": 406 }, { "epoch": 0.09, "grad_norm": 0.9685154907886202, "learning_rate": 3.9632181597457296e-05, "loss": 0.3803, "step": 407 }, { "epoch": 0.09, "grad_norm": 1.1840539421603884, "learning_rate": 3.9629460431080825e-05, "loss": 0.4352, "step": 408 }, { "epoch": 0.09, "grad_norm": 1.0350869209537155, "learning_rate": 3.96267293301141e-05, "loss": 0.3756, "step": 409 }, { "epoch": 0.09, "grad_norm": 0.9668692821784871, "learning_rate": 3.962398829593935e-05, "loss": 0.3315, "step": 410 }, { "epoch": 0.09, "grad_norm": 1.0181116034631583, "learning_rate": 3.962123732994383e-05, "loss": 0.3729, "step": 411 }, { "epoch": 0.09, "grad_norm": 1.0769434549688277, "learning_rate": 3.961847643351981e-05, "loss": 0.344, "step": 412 }, { "epoch": 0.09, "grad_norm": 1.0925582933041047, "learning_rate": 3.961570560806461e-05, "loss": 0.4162, "step": 413 }, { "epoch": 0.09, "grad_norm": 1.102557118811125, "learning_rate": 3.9612924854980556e-05, "loss": 0.4166, "step": 414 }, { "epoch": 0.09, "grad_norm": 1.1286347270584014, "learning_rate": 3.9610134175675e-05, "loss": 0.3693, "step": 415 }, { "epoch": 0.09, "grad_norm": 0.9415423662860831, "learning_rate": 3.960733357156033e-05, "loss": 0.3004, "step": 416 }, { "epoch": 0.09, "grad_norm": 1.0366728990271383, "learning_rate": 3.960452304405394e-05, "loss": 0.3846, "step": 417 }, { "epoch": 0.09, "grad_norm": 0.9635313119417835, "learning_rate": 3.960170259457826e-05, "loss": 0.2909, "step": 418 }, { "epoch": 0.09, "grad_norm": 0.9654734792005484, "learning_rate": 3.959887222456075e-05, "loss": 0.3031, "step": 419 }, { "epoch": 0.09, "grad_norm": 0.95233152536584, "learning_rate": 3.959603193543385e-05, "loss": 0.3166, "step": 420 }, { "epoch": 0.09, "grad_norm": 1.0463975415402151, "learning_rate": 3.959318172863506e-05, "loss": 0.3488, "step": 421 }, { "epoch": 0.09, "grad_norm": 1.1291429212168487, "learning_rate": 3.95903216056069e-05, "loss": 0.4022, "step": 422 }, { "epoch": 0.09, "grad_norm": 1.0365884913215422, "learning_rate": 3.958745156779688e-05, "loss": 0.347, "step": 423 }, { "epoch": 0.09, "grad_norm": 1.0666907074325134, "learning_rate": 3.9584571616657544e-05, "loss": 0.3873, "step": 424 }, { "epoch": 0.09, "grad_norm": 0.9862395898344498, "learning_rate": 3.958168175364646e-05, "loss": 0.3259, "step": 425 }, { "epoch": 0.09, "grad_norm": 0.8618518508668179, "learning_rate": 3.957878198022621e-05, "loss": 0.2429, "step": 426 }, { "epoch": 0.09, "grad_norm": 0.9302577665948445, "learning_rate": 3.957587229786437e-05, "loss": 0.3339, "step": 427 }, { "epoch": 0.09, "grad_norm": 0.9680432848986547, "learning_rate": 3.9572952708033564e-05, "loss": 0.3006, "step": 428 }, { "epoch": 0.09, "grad_norm": 1.0056531379893139, "learning_rate": 3.9570023212211405e-05, "loss": 0.3533, "step": 429 }, { "epoch": 0.09, "grad_norm": 1.042314188290271, "learning_rate": 3.956708381188054e-05, "loss": 0.3704, "step": 430 }, { "epoch": 0.09, "grad_norm": 0.9955369477688994, "learning_rate": 3.95641345085286e-05, "loss": 0.3224, "step": 431 }, { "epoch": 0.09, "grad_norm": 1.1437350162712814, "learning_rate": 3.956117530364826e-05, "loss": 0.3429, "step": 432 }, { "epoch": 0.1, "grad_norm": 1.1434482489093942, "learning_rate": 3.955820619873719e-05, "loss": 0.3979, "step": 433 }, { "epoch": 0.1, "grad_norm": 1.0885754934405945, "learning_rate": 3.955522719529807e-05, "loss": 0.3778, "step": 434 }, { "epoch": 0.1, "grad_norm": 1.006883414746575, "learning_rate": 3.9552238294838584e-05, "loss": 0.3259, "step": 435 }, { "epoch": 0.1, "grad_norm": 0.9424097245084614, "learning_rate": 3.954923949887144e-05, "loss": 0.2967, "step": 436 }, { "epoch": 0.1, "grad_norm": 0.879260419905861, "learning_rate": 3.954623080891435e-05, "loss": 0.2797, "step": 437 }, { "epoch": 0.1, "grad_norm": 1.0029412763867633, "learning_rate": 3.954321222649003e-05, "loss": 0.3128, "step": 438 }, { "epoch": 0.1, "grad_norm": 0.9197166521725636, "learning_rate": 3.95401837531262e-05, "loss": 0.281, "step": 439 }, { "epoch": 0.1, "grad_norm": 1.1461508686625692, "learning_rate": 3.953714539035558e-05, "loss": 0.3273, "step": 440 }, { "epoch": 0.1, "grad_norm": 1.0014668673416294, "learning_rate": 3.9534097139715926e-05, "loss": 0.3021, "step": 441 }, { "epoch": 0.1, "grad_norm": 1.0224160491504546, "learning_rate": 3.9531039002749955e-05, "loss": 0.331, "step": 442 }, { "epoch": 0.1, "grad_norm": 1.0680427709794582, "learning_rate": 3.952797098100543e-05, "loss": 0.3789, "step": 443 }, { "epoch": 0.1, "grad_norm": 0.9893873455653348, "learning_rate": 3.952489307603507e-05, "loss": 0.309, "step": 444 }, { "epoch": 0.1, "grad_norm": 1.0181761172525154, "learning_rate": 3.9521805289396645e-05, "loss": 0.3104, "step": 445 }, { "epoch": 0.1, "grad_norm": 1.1361743895130338, "learning_rate": 3.951870762265288e-05, "loss": 0.3481, "step": 446 }, { "epoch": 0.1, "grad_norm": 0.9401021448397183, "learning_rate": 3.9515600077371545e-05, "loss": 0.2785, "step": 447 }, { "epoch": 0.1, "grad_norm": 1.0281058266194, "learning_rate": 3.951248265512538e-05, "loss": 0.3919, "step": 448 }, { "epoch": 0.1, "grad_norm": 1.0209799810421567, "learning_rate": 3.950935535749213e-05, "loss": 0.3407, "step": 449 }, { "epoch": 0.1, "grad_norm": 1.0680804960868895, "learning_rate": 3.950621818605453e-05, "loss": 0.3009, "step": 450 }, { "epoch": 0.1, "grad_norm": 0.9809830673051324, "learning_rate": 3.950307114240034e-05, "loss": 0.2788, "step": 451 }, { "epoch": 0.1, "grad_norm": 1.091927999903504, "learning_rate": 3.9499914228122286e-05, "loss": 0.3584, "step": 452 }, { "epoch": 0.1, "grad_norm": 1.0496342294388752, "learning_rate": 3.9496747444818105e-05, "loss": 0.3215, "step": 453 }, { "epoch": 0.1, "grad_norm": 0.9472906994204717, "learning_rate": 3.9493570794090524e-05, "loss": 0.2863, "step": 454 }, { "epoch": 0.1, "grad_norm": 1.026604796399034, "learning_rate": 3.9490384277547266e-05, "loss": 0.3253, "step": 455 }, { "epoch": 0.1, "grad_norm": 1.0059919251445888, "learning_rate": 3.9487187896801054e-05, "loss": 0.3646, "step": 456 }, { "epoch": 0.1, "grad_norm": 1.1146466364962124, "learning_rate": 3.9483981653469586e-05, "loss": 0.3671, "step": 457 }, { "epoch": 0.1, "grad_norm": 0.8839794649332503, "learning_rate": 3.948076554917556e-05, "loss": 0.2903, "step": 458 }, { "epoch": 0.1, "grad_norm": 1.0456336903595724, "learning_rate": 3.9477539585546676e-05, "loss": 0.335, "step": 459 }, { "epoch": 0.1, "grad_norm": 1.052518634948309, "learning_rate": 3.9474303764215606e-05, "loss": 0.345, "step": 460 }, { "epoch": 0.1, "grad_norm": 1.0089797719510034, "learning_rate": 3.9471058086820024e-05, "loss": 0.3258, "step": 461 }, { "epoch": 0.1, "grad_norm": 0.9364948924689621, "learning_rate": 3.9467802555002584e-05, "loss": 0.2942, "step": 462 }, { "epoch": 0.1, "grad_norm": 1.027574984559381, "learning_rate": 3.946453717041093e-05, "loss": 0.3173, "step": 463 }, { "epoch": 0.1, "grad_norm": 1.019409640137024, "learning_rate": 3.94612619346977e-05, "loss": 0.3705, "step": 464 }, { "epoch": 0.1, "grad_norm": 0.9900787652596897, "learning_rate": 3.94579768495205e-05, "loss": 0.338, "step": 465 }, { "epoch": 0.1, "grad_norm": 0.9350070869462811, "learning_rate": 3.9454681916541936e-05, "loss": 0.2733, "step": 466 }, { "epoch": 0.1, "grad_norm": 1.005931903483432, "learning_rate": 3.94513771374296e-05, "loss": 0.2998, "step": 467 }, { "epoch": 0.1, "grad_norm": 0.9176156521492399, "learning_rate": 3.9448062513856056e-05, "loss": 0.277, "step": 468 }, { "epoch": 0.1, "grad_norm": 1.0603063040051155, "learning_rate": 3.944473804749885e-05, "loss": 0.3182, "step": 469 }, { "epoch": 0.1, "grad_norm": 0.878378035447139, "learning_rate": 3.944140374004052e-05, "loss": 0.2254, "step": 470 }, { "epoch": 0.1, "grad_norm": 1.0312910360783618, "learning_rate": 3.9438059593168586e-05, "loss": 0.3281, "step": 471 }, { "epoch": 0.1, "grad_norm": 0.9213199884441453, "learning_rate": 3.943470560857553e-05, "loss": 0.2669, "step": 472 }, { "epoch": 0.1, "grad_norm": 1.0196847617381182, "learning_rate": 3.943134178795883e-05, "loss": 0.2907, "step": 473 }, { "epoch": 0.1, "grad_norm": 1.0983891331319187, "learning_rate": 3.942796813302094e-05, "loss": 0.3776, "step": 474 }, { "epoch": 0.1, "grad_norm": 0.9573627754065492, "learning_rate": 3.942458464546928e-05, "loss": 0.3094, "step": 475 }, { "epoch": 0.1, "grad_norm": 1.002048777872047, "learning_rate": 3.942119132701625e-05, "loss": 0.3293, "step": 476 }, { "epoch": 0.1, "grad_norm": 0.9371921987692436, "learning_rate": 3.9417788179379245e-05, "loss": 0.3096, "step": 477 }, { "epoch": 0.1, "grad_norm": 0.9228511103876207, "learning_rate": 3.941437520428061e-05, "loss": 0.2665, "step": 478 }, { "epoch": 0.11, "grad_norm": 0.7805922600482893, "learning_rate": 3.941095240344766e-05, "loss": 0.2244, "step": 479 }, { "epoch": 0.11, "grad_norm": 0.9002954594099436, "learning_rate": 3.940751977861272e-05, "loss": 0.341, "step": 480 }, { "epoch": 0.11, "grad_norm": 0.8502514747667408, "learning_rate": 3.9404077331513044e-05, "loss": 0.3013, "step": 481 }, { "epoch": 0.11, "grad_norm": 0.9184259777508003, "learning_rate": 3.940062506389089e-05, "loss": 0.2894, "step": 482 }, { "epoch": 0.11, "grad_norm": 0.973871506001312, "learning_rate": 3.9397162977493455e-05, "loss": 0.2658, "step": 483 }, { "epoch": 0.11, "grad_norm": 0.9796821204711598, "learning_rate": 3.939369107407293e-05, "loss": 0.3231, "step": 484 }, { "epoch": 0.11, "grad_norm": 0.9145901127233058, "learning_rate": 3.939020935538647e-05, "loss": 0.2403, "step": 485 }, { "epoch": 0.11, "grad_norm": 0.9041537672139351, "learning_rate": 3.938671782319619e-05, "loss": 0.253, "step": 486 }, { "epoch": 0.11, "grad_norm": 0.9090908183292864, "learning_rate": 3.938321647926918e-05, "loss": 0.3126, "step": 487 }, { "epoch": 0.11, "grad_norm": 0.939022195366846, "learning_rate": 3.937970532537749e-05, "loss": 0.2991, "step": 488 }, { "epoch": 0.11, "grad_norm": 0.9406854553058501, "learning_rate": 3.937618436329813e-05, "loss": 0.2799, "step": 489 }, { "epoch": 0.11, "grad_norm": 1.028239160942009, "learning_rate": 3.937265359481309e-05, "loss": 0.2919, "step": 490 }, { "epoch": 0.11, "grad_norm": 0.9503870238436994, "learning_rate": 3.936911302170931e-05, "loss": 0.2791, "step": 491 }, { "epoch": 0.11, "grad_norm": 1.0751408174236268, "learning_rate": 3.936556264577869e-05, "loss": 0.331, "step": 492 }, { "epoch": 0.11, "grad_norm": 0.8973227509717273, "learning_rate": 3.9362002468818105e-05, "loss": 0.2709, "step": 493 }, { "epoch": 0.11, "grad_norm": 0.9372297533259436, "learning_rate": 3.935843249262939e-05, "loss": 0.2964, "step": 494 }, { "epoch": 0.11, "grad_norm": 0.8989041982170147, "learning_rate": 3.9354852719019306e-05, "loss": 0.2787, "step": 495 }, { "epoch": 0.11, "grad_norm": 0.8896321903424332, "learning_rate": 3.935126314979962e-05, "loss": 0.3095, "step": 496 }, { "epoch": 0.11, "grad_norm": 0.8254472878283721, "learning_rate": 3.934766378678704e-05, "loss": 0.2711, "step": 497 }, { "epoch": 0.11, "grad_norm": 0.9562348458080392, "learning_rate": 3.93440546318032e-05, "loss": 0.2691, "step": 498 }, { "epoch": 0.11, "grad_norm": 0.9090852124946016, "learning_rate": 3.934043568667473e-05, "loss": 0.2988, "step": 499 }, { "epoch": 0.11, "grad_norm": 1.056004246219132, "learning_rate": 3.933680695323321e-05, "loss": 0.3244, "step": 500 }, { "epoch": 0.11, "grad_norm": 0.927229316161898, "learning_rate": 3.9333168433315144e-05, "loss": 0.2705, "step": 501 }, { "epoch": 0.11, "grad_norm": 0.9840266504477935, "learning_rate": 3.932952012876203e-05, "loss": 0.2898, "step": 502 }, { "epoch": 0.11, "grad_norm": 0.7816009496163775, "learning_rate": 3.9325862041420275e-05, "loss": 0.2449, "step": 503 }, { "epoch": 0.11, "grad_norm": 0.8185233814439627, "learning_rate": 3.9322194173141284e-05, "loss": 0.2487, "step": 504 }, { "epoch": 0.11, "grad_norm": 0.9029351150313263, "learning_rate": 3.931851652578137e-05, "loss": 0.2685, "step": 505 }, { "epoch": 0.11, "grad_norm": 0.827850296234817, "learning_rate": 3.9314829101201814e-05, "loss": 0.2831, "step": 506 }, { "epoch": 0.11, "grad_norm": 0.7696270568383994, "learning_rate": 3.9311131901268855e-05, "loss": 0.2205, "step": 507 }, { "epoch": 0.11, "grad_norm": 1.0038396909743488, "learning_rate": 3.930742492785366e-05, "loss": 0.3061, "step": 508 }, { "epoch": 0.11, "grad_norm": 0.9429509639247784, "learning_rate": 3.930370818283235e-05, "loss": 0.2612, "step": 509 }, { "epoch": 0.11, "grad_norm": 0.938932373628078, "learning_rate": 3.9299981668085997e-05, "loss": 0.288, "step": 510 }, { "epoch": 0.11, "grad_norm": 0.9599716170406715, "learning_rate": 3.929624538550061e-05, "loss": 0.3057, "step": 511 }, { "epoch": 0.11, "grad_norm": 0.9938410999118946, "learning_rate": 3.929249933696715e-05, "loss": 0.3066, "step": 512 }, { "epoch": 0.11, "grad_norm": 0.9474069706180214, "learning_rate": 3.92887435243815e-05, "loss": 0.2881, "step": 513 }, { "epoch": 0.11, "grad_norm": 0.9390446335906655, "learning_rate": 3.928497794964452e-05, "loss": 0.3123, "step": 514 }, { "epoch": 0.11, "grad_norm": 0.8349768564591767, "learning_rate": 3.928120261466198e-05, "loss": 0.2915, "step": 515 }, { "epoch": 0.11, "grad_norm": 0.7805330038112448, "learning_rate": 3.92774175213446e-05, "loss": 0.2132, "step": 516 }, { "epoch": 0.11, "grad_norm": 0.9165333015064896, "learning_rate": 3.927362267160804e-05, "loss": 0.2749, "step": 517 }, { "epoch": 0.11, "grad_norm": 0.9711977889904639, "learning_rate": 3.92698180673729e-05, "loss": 0.2505, "step": 518 }, { "epoch": 0.11, "grad_norm": 1.0065510862094755, "learning_rate": 3.9266003710564706e-05, "loss": 0.3477, "step": 519 }, { "epoch": 0.11, "grad_norm": 0.876275699363774, "learning_rate": 3.9262179603113934e-05, "loss": 0.2555, "step": 520 }, { "epoch": 0.11, "grad_norm": 0.8644224193306435, "learning_rate": 3.925834574695599e-05, "loss": 0.2725, "step": 521 }, { "epoch": 0.11, "grad_norm": 1.0584898049535252, "learning_rate": 3.9254502144031204e-05, "loss": 0.3504, "step": 522 }, { "epoch": 0.11, "grad_norm": 0.869473785667698, "learning_rate": 3.925064879628485e-05, "loss": 0.2564, "step": 523 }, { "epoch": 0.12, "grad_norm": 0.9656636079150069, "learning_rate": 3.924678570566714e-05, "loss": 0.3155, "step": 524 }, { "epoch": 0.12, "grad_norm": 0.8794120519948292, "learning_rate": 3.9242912874133186e-05, "loss": 0.2562, "step": 525 }, { "epoch": 0.12, "grad_norm": 0.9395312239131293, "learning_rate": 3.9239030303643074e-05, "loss": 0.2911, "step": 526 }, { "epoch": 0.12, "grad_norm": 0.9857349991843363, "learning_rate": 3.9235137996161786e-05, "loss": 0.307, "step": 527 }, { "epoch": 0.12, "grad_norm": 0.8903683074995272, "learning_rate": 3.9231235953659244e-05, "loss": 0.2542, "step": 528 }, { "epoch": 0.12, "grad_norm": 0.9085176641041314, "learning_rate": 3.9227324178110295e-05, "loss": 0.2314, "step": 529 }, { "epoch": 0.12, "grad_norm": 0.7851733685045973, "learning_rate": 3.922340267149472e-05, "loss": 0.2062, "step": 530 }, { "epoch": 0.12, "grad_norm": 1.0448656298749703, "learning_rate": 3.9219471435797205e-05, "loss": 0.3111, "step": 531 }, { "epoch": 0.12, "grad_norm": 0.9052352485708491, "learning_rate": 3.921553047300739e-05, "loss": 0.2788, "step": 532 }, { "epoch": 0.12, "grad_norm": 0.9514362508272822, "learning_rate": 3.9211579785119804e-05, "loss": 0.3458, "step": 533 }, { "epoch": 0.12, "grad_norm": 0.9124283488112793, "learning_rate": 3.9207619374133917e-05, "loss": 0.2927, "step": 534 }, { "epoch": 0.12, "grad_norm": 0.7971053071157986, "learning_rate": 3.920364924205412e-05, "loss": 0.2527, "step": 535 }, { "epoch": 0.12, "grad_norm": 0.8210814368924492, "learning_rate": 3.9199669390889725e-05, "loss": 0.2371, "step": 536 }, { "epoch": 0.12, "grad_norm": 0.8722992155557622, "learning_rate": 3.919567982265495e-05, "loss": 0.2327, "step": 537 }, { "epoch": 0.12, "grad_norm": 1.0838935882765854, "learning_rate": 3.9191680539368956e-05, "loss": 0.2947, "step": 538 }, { "epoch": 0.12, "grad_norm": 0.9371555967328974, "learning_rate": 3.9187671543055785e-05, "loss": 0.2702, "step": 539 }, { "epoch": 0.12, "grad_norm": 1.017192862829085, "learning_rate": 3.918365283574443e-05, "loss": 0.3159, "step": 540 }, { "epoch": 0.12, "grad_norm": 0.9847778827974156, "learning_rate": 3.9179624419468766e-05, "loss": 0.3008, "step": 541 }, { "epoch": 0.12, "grad_norm": 0.9191660114354011, "learning_rate": 3.917558629626762e-05, "loss": 0.2984, "step": 542 }, { "epoch": 0.12, "grad_norm": 0.8005048321274897, "learning_rate": 3.917153846818471e-05, "loss": 0.258, "step": 543 }, { "epoch": 0.12, "grad_norm": 0.8634314427972728, "learning_rate": 3.916748093726864e-05, "loss": 0.2936, "step": 544 }, { "epoch": 0.12, "grad_norm": 0.8338975347471274, "learning_rate": 3.9163413705572984e-05, "loss": 0.2798, "step": 545 }, { "epoch": 0.12, "grad_norm": 0.8007292939985748, "learning_rate": 3.9159336775156165e-05, "loss": 0.2621, "step": 546 }, { "epoch": 0.12, "grad_norm": 0.7741079025905953, "learning_rate": 3.9155250148081564e-05, "loss": 0.2351, "step": 547 }, { "epoch": 0.12, "grad_norm": 0.7709212940307969, "learning_rate": 3.9151153826417436e-05, "loss": 0.2426, "step": 548 }, { "epoch": 0.12, "grad_norm": 1.028936449677963, "learning_rate": 3.914704781223696e-05, "loss": 0.3413, "step": 549 }, { "epoch": 0.12, "grad_norm": 0.8438898606413698, "learning_rate": 3.9142932107618214e-05, "loss": 0.28, "step": 550 }, { "epoch": 0.12, "grad_norm": 0.8175362974968846, "learning_rate": 3.913880671464418e-05, "loss": 0.2164, "step": 551 }, { "epoch": 0.12, "grad_norm": 0.9028316681419125, "learning_rate": 3.9134671635402745e-05, "loss": 0.2527, "step": 552 }, { "epoch": 0.12, "grad_norm": 0.9539675506215372, "learning_rate": 3.91305268719867e-05, "loss": 0.3183, "step": 553 }, { "epoch": 0.12, "grad_norm": 0.9654179150393969, "learning_rate": 3.912637242649373e-05, "loss": 0.2934, "step": 554 }, { "epoch": 0.12, "grad_norm": 0.997502725449283, "learning_rate": 3.912220830102643e-05, "loss": 0.3352, "step": 555 }, { "epoch": 0.12, "grad_norm": 0.903468665235631, "learning_rate": 3.911803449769228e-05, "loss": 0.2729, "step": 556 }, { "epoch": 0.12, "grad_norm": 0.866565928350931, "learning_rate": 3.911385101860369e-05, "loss": 0.2233, "step": 557 }, { "epoch": 0.12, "grad_norm": 0.940595000286064, "learning_rate": 3.9109657865877924e-05, "loss": 0.282, "step": 558 }, { "epoch": 0.12, "grad_norm": 0.9123056270044905, "learning_rate": 3.910545504163716e-05, "loss": 0.2633, "step": 559 }, { "epoch": 0.12, "grad_norm": 0.8137460104380139, "learning_rate": 3.9101242548008496e-05, "loss": 0.2456, "step": 560 }, { "epoch": 0.12, "grad_norm": 0.9614642800546382, "learning_rate": 3.9097020387123876e-05, "loss": 0.2924, "step": 561 }, { "epoch": 0.12, "grad_norm": 0.816181425687044, "learning_rate": 3.9092788561120174e-05, "loss": 0.2334, "step": 562 }, { "epoch": 0.12, "grad_norm": 0.8107074624429237, "learning_rate": 3.9088547072139145e-05, "loss": 0.2132, "step": 563 }, { "epoch": 0.12, "grad_norm": 1.000443360272963, "learning_rate": 3.9084295922327414e-05, "loss": 0.2928, "step": 564 }, { "epoch": 0.12, "grad_norm": 0.8417879179369573, "learning_rate": 3.908003511383654e-05, "loss": 0.2363, "step": 565 }, { "epoch": 0.12, "grad_norm": 0.8597390530824697, "learning_rate": 3.907576464882294e-05, "loss": 0.2598, "step": 566 }, { "epoch": 0.12, "grad_norm": 0.8753681089747455, "learning_rate": 3.90714845294479e-05, "loss": 0.2594, "step": 567 }, { "epoch": 0.12, "grad_norm": 0.8363784268898656, "learning_rate": 3.9067194757877635e-05, "loss": 0.2449, "step": 568 }, { "epoch": 0.12, "grad_norm": 0.7827517000446115, "learning_rate": 3.906289533628322e-05, "loss": 0.2065, "step": 569 }, { "epoch": 0.13, "grad_norm": 0.7850525441169758, "learning_rate": 3.9058586266840614e-05, "loss": 0.2414, "step": 570 }, { "epoch": 0.13, "grad_norm": 0.7769308862816632, "learning_rate": 3.905426755173068e-05, "loss": 0.2141, "step": 571 }, { "epoch": 0.13, "grad_norm": 0.8177360400387669, "learning_rate": 3.904993919313912e-05, "loss": 0.247, "step": 572 }, { "epoch": 0.13, "grad_norm": 0.8041845064375056, "learning_rate": 3.9045601193256564e-05, "loss": 0.2307, "step": 573 }, { "epoch": 0.13, "grad_norm": 0.9279095293640436, "learning_rate": 3.9041253554278486e-05, "loss": 0.2471, "step": 574 }, { "epoch": 0.13, "grad_norm": 0.9726708652363452, "learning_rate": 3.9036896278405264e-05, "loss": 0.3427, "step": 575 }, { "epoch": 0.13, "grad_norm": 0.9646956604271677, "learning_rate": 3.9032529367842145e-05, "loss": 0.2814, "step": 576 }, { "epoch": 0.13, "grad_norm": 0.893512462181103, "learning_rate": 3.902815282479923e-05, "loss": 0.2492, "step": 577 }, { "epoch": 0.13, "grad_norm": 0.7250958066974518, "learning_rate": 3.902376665149153e-05, "loss": 0.2237, "step": 578 }, { "epoch": 0.13, "grad_norm": 0.9281262022068445, "learning_rate": 3.9019370850138915e-05, "loss": 0.3319, "step": 579 }, { "epoch": 0.13, "grad_norm": 0.9334790785845333, "learning_rate": 3.9014965422966115e-05, "loss": 0.2829, "step": 580 }, { "epoch": 0.13, "grad_norm": 0.718761526927567, "learning_rate": 3.9010550372202756e-05, "loss": 0.1868, "step": 581 }, { "epoch": 0.13, "grad_norm": 0.860903057648702, "learning_rate": 3.900612570008331e-05, "loss": 0.2548, "step": 582 }, { "epoch": 0.13, "grad_norm": 0.8865750234134181, "learning_rate": 3.900169140884715e-05, "loss": 0.2613, "step": 583 }, { "epoch": 0.13, "grad_norm": 0.8633144251460134, "learning_rate": 3.899724750073848e-05, "loss": 0.2258, "step": 584 }, { "epoch": 0.13, "grad_norm": 0.7977090846094947, "learning_rate": 3.899279397800639e-05, "loss": 0.2384, "step": 585 }, { "epoch": 0.13, "grad_norm": 0.8879218656965611, "learning_rate": 3.8988330842904844e-05, "loss": 0.2288, "step": 586 }, { "epoch": 0.13, "grad_norm": 0.9116832056120259, "learning_rate": 3.8983858097692656e-05, "loss": 0.2735, "step": 587 }, { "epoch": 0.13, "grad_norm": 0.9106254187002378, "learning_rate": 3.8979375744633515e-05, "loss": 0.3284, "step": 588 }, { "epoch": 0.13, "grad_norm": 0.7889815185084196, "learning_rate": 3.897488378599596e-05, "loss": 0.2806, "step": 589 }, { "epoch": 0.13, "grad_norm": 0.7409425835211813, "learning_rate": 3.8970382224053414e-05, "loss": 0.2252, "step": 590 }, { "epoch": 0.13, "grad_norm": 0.8416200881445274, "learning_rate": 3.8965871061084126e-05, "loss": 0.2315, "step": 591 }, { "epoch": 0.13, "grad_norm": 0.8657765637959218, "learning_rate": 3.896135029937123e-05, "loss": 0.2482, "step": 592 }, { "epoch": 0.13, "grad_norm": 0.9129124767347182, "learning_rate": 3.895681994120272e-05, "loss": 0.2974, "step": 593 }, { "epoch": 0.13, "grad_norm": 0.8568144364938592, "learning_rate": 3.8952279988871425e-05, "loss": 0.2579, "step": 594 }, { "epoch": 0.13, "grad_norm": 0.8096790861857919, "learning_rate": 3.894773044467505e-05, "loss": 0.2494, "step": 595 }, { "epoch": 0.13, "grad_norm": 0.9138980293654226, "learning_rate": 3.8943171310916146e-05, "loss": 0.2804, "step": 596 }, { "epoch": 0.13, "grad_norm": 0.7697566907591287, "learning_rate": 3.893860258990212e-05, "loss": 0.2106, "step": 597 }, { "epoch": 0.13, "grad_norm": 0.8346549918050894, "learning_rate": 3.893402428394522e-05, "loss": 0.2223, "step": 598 }, { "epoch": 0.13, "grad_norm": 0.923388192914631, "learning_rate": 3.892943639536257e-05, "loss": 0.2345, "step": 599 }, { "epoch": 0.13, "grad_norm": 0.88024735599371, "learning_rate": 3.8924838926476114e-05, "loss": 0.2564, "step": 600 }, { "epoch": 0.13, "grad_norm": 0.7871692719872424, "learning_rate": 3.892023187961268e-05, "loss": 0.2286, "step": 601 }, { "epoch": 0.13, "grad_norm": 0.9327809454662354, "learning_rate": 3.891561525710389e-05, "loss": 0.2585, "step": 602 }, { "epoch": 0.13, "grad_norm": 0.7632589174906094, "learning_rate": 3.891098906128628e-05, "loss": 0.211, "step": 603 }, { "epoch": 0.13, "grad_norm": 1.020860879227997, "learning_rate": 3.890635329450118e-05, "loss": 0.2721, "step": 604 }, { "epoch": 0.13, "grad_norm": 0.8419214497236249, "learning_rate": 3.890170795909477e-05, "loss": 0.2499, "step": 605 }, { "epoch": 0.13, "grad_norm": 0.6970832009057898, "learning_rate": 3.88970530574181e-05, "loss": 0.1971, "step": 606 }, { "epoch": 0.13, "grad_norm": 0.9467412696800499, "learning_rate": 3.889238859182703e-05, "loss": 0.2719, "step": 607 }, { "epoch": 0.13, "grad_norm": 0.9102254685698031, "learning_rate": 3.888771456468229e-05, "loss": 0.2563, "step": 608 }, { "epoch": 0.13, "grad_norm": 0.8363262947985433, "learning_rate": 3.8883030978349416e-05, "loss": 0.196, "step": 609 }, { "epoch": 0.13, "grad_norm": 0.8584818620470888, "learning_rate": 3.887833783519882e-05, "loss": 0.2561, "step": 610 }, { "epoch": 0.13, "grad_norm": 0.839118786563085, "learning_rate": 3.887363513760571e-05, "loss": 0.275, "step": 611 }, { "epoch": 0.13, "grad_norm": 0.8060011279995126, "learning_rate": 3.8868922887950165e-05, "loss": 0.2507, "step": 612 }, { "epoch": 0.13, "grad_norm": 0.8452849730912027, "learning_rate": 3.886420108861708e-05, "loss": 0.2726, "step": 613 }, { "epoch": 0.13, "grad_norm": 0.7714129837419096, "learning_rate": 3.885946974199618e-05, "loss": 0.2003, "step": 614 }, { "epoch": 0.14, "grad_norm": 0.6995611577801152, "learning_rate": 3.8854728850482034e-05, "loss": 0.1937, "step": 615 }, { "epoch": 0.14, "grad_norm": 0.9346788556596127, "learning_rate": 3.884997841647404e-05, "loss": 0.2539, "step": 616 }, { "epoch": 0.14, "grad_norm": 0.9487142595839332, "learning_rate": 3.8845218442376416e-05, "loss": 0.2369, "step": 617 }, { "epoch": 0.14, "grad_norm": 0.7760025415649283, "learning_rate": 3.8840448930598216e-05, "loss": 0.2482, "step": 618 }, { "epoch": 0.14, "grad_norm": 0.7882415731253646, "learning_rate": 3.8835669883553315e-05, "loss": 0.199, "step": 619 }, { "epoch": 0.14, "grad_norm": 0.8341878959807444, "learning_rate": 3.883088130366042e-05, "loss": 0.2105, "step": 620 }, { "epoch": 0.14, "grad_norm": 0.8429857961152651, "learning_rate": 3.882608319334306e-05, "loss": 0.2527, "step": 621 }, { "epoch": 0.14, "grad_norm": 0.8876945403277204, "learning_rate": 3.88212755550296e-05, "loss": 0.29, "step": 622 }, { "epoch": 0.14, "grad_norm": 0.8439356988078114, "learning_rate": 3.88164583911532e-05, "loss": 0.2386, "step": 623 }, { "epoch": 0.14, "grad_norm": 0.7962827164352877, "learning_rate": 3.881163170415186e-05, "loss": 0.1941, "step": 624 }, { "epoch": 0.14, "grad_norm": 0.8120727149134773, "learning_rate": 3.88067954964684e-05, "loss": 0.2458, "step": 625 }, { "epoch": 0.14, "grad_norm": 0.8753465238779636, "learning_rate": 3.880194977055045e-05, "loss": 0.2773, "step": 626 }, { "epoch": 0.14, "grad_norm": 0.8029545020424, "learning_rate": 3.8797094528850474e-05, "loss": 0.2832, "step": 627 }, { "epoch": 0.14, "grad_norm": 0.7780470996760221, "learning_rate": 3.8792229773825716e-05, "loss": 0.2225, "step": 628 }, { "epoch": 0.14, "grad_norm": 0.7320854126262574, "learning_rate": 3.878735550793827e-05, "loss": 0.1794, "step": 629 }, { "epoch": 0.14, "grad_norm": 0.7694790430319758, "learning_rate": 3.8782471733655044e-05, "loss": 0.1954, "step": 630 }, { "epoch": 0.14, "grad_norm": 0.8376801766691149, "learning_rate": 3.877757845344773e-05, "loss": 0.2288, "step": 631 }, { "epoch": 0.14, "grad_norm": 0.8453942983822659, "learning_rate": 3.8772675669792855e-05, "loss": 0.2335, "step": 632 }, { "epoch": 0.14, "grad_norm": 0.8760557277124035, "learning_rate": 3.876776338517174e-05, "loss": 0.2096, "step": 633 }, { "epoch": 0.14, "grad_norm": 1.0169787261523278, "learning_rate": 3.876284160207053e-05, "loss": 0.2896, "step": 634 }, { "epoch": 0.14, "grad_norm": 0.9192946577722143, "learning_rate": 3.875791032298017e-05, "loss": 0.2784, "step": 635 }, { "epoch": 0.14, "grad_norm": 0.9142417405933866, "learning_rate": 3.875296955039641e-05, "loss": 0.302, "step": 636 }, { "epoch": 0.14, "grad_norm": 0.8178038976593689, "learning_rate": 3.874801928681979e-05, "loss": 0.1832, "step": 637 }, { "epoch": 0.14, "grad_norm": 0.760477864838044, "learning_rate": 3.87430595347557e-05, "loss": 0.2009, "step": 638 }, { "epoch": 0.14, "grad_norm": 0.8154029704759584, "learning_rate": 3.873809029671427e-05, "loss": 0.2425, "step": 639 }, { "epoch": 0.14, "grad_norm": 0.8286592451665006, "learning_rate": 3.873311157521048e-05, "loss": 0.2414, "step": 640 }, { "epoch": 0.14, "grad_norm": 0.9469051028965093, "learning_rate": 3.8728123372764085e-05, "loss": 0.2503, "step": 641 }, { "epoch": 0.14, "grad_norm": 0.727655593449102, "learning_rate": 3.8723125691899646e-05, "loss": 0.2048, "step": 642 }, { "epoch": 0.14, "grad_norm": 1.1400898762459823, "learning_rate": 3.871811853514652e-05, "loss": 0.1988, "step": 643 }, { "epoch": 0.14, "grad_norm": 0.8699188520141753, "learning_rate": 3.871310190503886e-05, "loss": 0.2335, "step": 644 }, { "epoch": 0.14, "grad_norm": 0.9371101840525764, "learning_rate": 3.870807580411561e-05, "loss": 0.2717, "step": 645 }, { "epoch": 0.14, "grad_norm": 0.9328906871545495, "learning_rate": 3.870304023492051e-05, "loss": 0.2805, "step": 646 }, { "epoch": 0.14, "grad_norm": 0.7801770662800196, "learning_rate": 3.8697995200002105e-05, "loss": 0.236, "step": 647 }, { "epoch": 0.14, "grad_norm": 1.1148783153797293, "learning_rate": 3.8692940701913706e-05, "loss": 0.2292, "step": 648 }, { "epoch": 0.14, "grad_norm": 0.7670811410430906, "learning_rate": 3.868787674321343e-05, "loss": 0.2143, "step": 649 }, { "epoch": 0.14, "grad_norm": 0.8094460238957172, "learning_rate": 3.868280332646417e-05, "loss": 0.2613, "step": 650 }, { "epoch": 0.14, "grad_norm": 0.7476612820351332, "learning_rate": 3.867772045423362e-05, "loss": 0.2234, "step": 651 }, { "epoch": 0.14, "grad_norm": 0.784818700094082, "learning_rate": 3.8672628129094255e-05, "loss": 0.2476, "step": 652 }, { "epoch": 0.14, "grad_norm": 0.793072773541811, "learning_rate": 3.8667526353623326e-05, "loss": 0.194, "step": 653 }, { "epoch": 0.14, "grad_norm": 0.892512980615161, "learning_rate": 3.866241513040288e-05, "loss": 0.2593, "step": 654 }, { "epoch": 0.14, "grad_norm": 0.9285416794821778, "learning_rate": 3.8657294462019735e-05, "loss": 0.2425, "step": 655 }, { "epoch": 0.14, "grad_norm": 0.8956017231761512, "learning_rate": 3.865216435106549e-05, "loss": 0.2275, "step": 656 }, { "epoch": 0.14, "grad_norm": 0.7304122413360475, "learning_rate": 3.8647024800136524e-05, "loss": 0.1792, "step": 657 }, { "epoch": 0.14, "grad_norm": 0.8574688318554834, "learning_rate": 3.8641875811834004e-05, "loss": 0.2151, "step": 658 }, { "epoch": 0.14, "grad_norm": 0.7538143754231571, "learning_rate": 3.863671738876385e-05, "loss": 0.2138, "step": 659 }, { "epoch": 0.14, "grad_norm": 0.8502167733846998, "learning_rate": 3.863154953353679e-05, "loss": 0.2274, "step": 660 }, { "epoch": 0.15, "grad_norm": 0.8107420169367895, "learning_rate": 3.8626372248768295e-05, "loss": 0.2394, "step": 661 }, { "epoch": 0.15, "grad_norm": 0.9014610828498489, "learning_rate": 3.862118553707863e-05, "loss": 0.3027, "step": 662 }, { "epoch": 0.15, "grad_norm": 0.9112825423972205, "learning_rate": 3.86159894010928e-05, "loss": 0.2908, "step": 663 }, { "epoch": 0.15, "grad_norm": 0.6628921068182834, "learning_rate": 3.8610783843440626e-05, "loss": 0.1389, "step": 664 }, { "epoch": 0.15, "grad_norm": 0.7176790136393478, "learning_rate": 3.8605568866756666e-05, "loss": 0.1723, "step": 665 }, { "epoch": 0.15, "grad_norm": 0.7682664032440877, "learning_rate": 3.860034447368024e-05, "loss": 0.2148, "step": 666 }, { "epoch": 0.15, "grad_norm": 0.9302215965381995, "learning_rate": 3.8595110666855466e-05, "loss": 0.296, "step": 667 }, { "epoch": 0.15, "grad_norm": 0.8521139476720233, "learning_rate": 3.858986744893119e-05, "loss": 0.23, "step": 668 }, { "epoch": 0.15, "grad_norm": 0.7464405472134348, "learning_rate": 3.858461482256103e-05, "loss": 0.2147, "step": 669 }, { "epoch": 0.15, "grad_norm": 0.8596453154852116, "learning_rate": 3.8579352790403395e-05, "loss": 0.2161, "step": 670 }, { "epoch": 0.15, "grad_norm": 0.809061992905901, "learning_rate": 3.857408135512142e-05, "loss": 0.2014, "step": 671 }, { "epoch": 0.15, "grad_norm": 0.8437509889950078, "learning_rate": 3.8568800519383e-05, "loss": 0.2255, "step": 672 }, { "epoch": 0.15, "grad_norm": 0.7568962614568128, "learning_rate": 3.856351028586082e-05, "loss": 0.1787, "step": 673 }, { "epoch": 0.15, "grad_norm": 0.8055013226380432, "learning_rate": 3.855821065723228e-05, "loss": 0.2017, "step": 674 }, { "epoch": 0.15, "grad_norm": 0.8826729993400942, "learning_rate": 3.855290163617956e-05, "loss": 0.2608, "step": 675 }, { "epoch": 0.15, "grad_norm": 0.7806975127749172, "learning_rate": 3.8547583225389596e-05, "loss": 0.2001, "step": 676 }, { "epoch": 0.15, "grad_norm": 0.7618422970469434, "learning_rate": 3.8542255427554065e-05, "loss": 0.2289, "step": 677 }, { "epoch": 0.15, "grad_norm": 0.7862885694034616, "learning_rate": 3.85369182453694e-05, "loss": 0.2798, "step": 678 }, { "epoch": 0.15, "grad_norm": 0.6671069424883396, "learning_rate": 3.853157168153677e-05, "loss": 0.1671, "step": 679 }, { "epoch": 0.15, "grad_norm": 0.739590608981885, "learning_rate": 3.852621573876212e-05, "loss": 0.2328, "step": 680 }, { "epoch": 0.15, "grad_norm": 0.7928258081145453, "learning_rate": 3.8520850419756104e-05, "loss": 0.2329, "step": 681 }, { "epoch": 0.15, "grad_norm": 0.8702197972343868, "learning_rate": 3.851547572723416e-05, "loss": 0.2651, "step": 682 }, { "epoch": 0.15, "grad_norm": 0.8421114800562508, "learning_rate": 3.851009166391646e-05, "loss": 0.2441, "step": 683 }, { "epoch": 0.15, "grad_norm": 0.8450856410310613, "learning_rate": 3.850469823252789e-05, "loss": 0.2525, "step": 684 }, { "epoch": 0.15, "grad_norm": 0.7375467269448306, "learning_rate": 3.849929543579812e-05, "loss": 0.187, "step": 685 }, { "epoch": 0.15, "grad_norm": 0.807894049241486, "learning_rate": 3.849388327646152e-05, "loss": 0.2275, "step": 686 }, { "epoch": 0.15, "grad_norm": 0.8135449219336123, "learning_rate": 3.848846175725722e-05, "loss": 0.2376, "step": 687 }, { "epoch": 0.15, "grad_norm": 0.8856364968360584, "learning_rate": 3.84830308809291e-05, "loss": 0.2332, "step": 688 }, { "epoch": 0.15, "grad_norm": 0.6883587675575743, "learning_rate": 3.8477590650225735e-05, "loss": 0.164, "step": 689 }, { "epoch": 0.15, "grad_norm": 0.7784342568726977, "learning_rate": 3.8472141067900485e-05, "loss": 0.1987, "step": 690 }, { "epoch": 0.15, "grad_norm": 0.7494893719948371, "learning_rate": 3.84666821367114e-05, "loss": 0.2167, "step": 691 }, { "epoch": 0.15, "grad_norm": 0.739221327572983, "learning_rate": 3.846121385942128e-05, "loss": 0.1996, "step": 692 }, { "epoch": 0.15, "grad_norm": 0.8289543173872378, "learning_rate": 3.845573623879766e-05, "loss": 0.1907, "step": 693 }, { "epoch": 0.15, "grad_norm": 0.8561945235943589, "learning_rate": 3.845024927761279e-05, "loss": 0.2405, "step": 694 }, { "epoch": 0.15, "grad_norm": 0.7586867804796876, "learning_rate": 3.844475297864366e-05, "loss": 0.1756, "step": 695 }, { "epoch": 0.15, "grad_norm": 0.820059891816306, "learning_rate": 3.843924734467199e-05, "loss": 0.2047, "step": 696 }, { "epoch": 0.15, "grad_norm": 0.8113901187168117, "learning_rate": 3.843373237848419e-05, "loss": 0.1931, "step": 697 }, { "epoch": 0.15, "grad_norm": 0.8109688269530724, "learning_rate": 3.842820808287144e-05, "loss": 0.2202, "step": 698 }, { "epoch": 0.15, "grad_norm": 0.8227791892675015, "learning_rate": 3.842267446062962e-05, "loss": 0.2283, "step": 699 }, { "epoch": 0.15, "grad_norm": 0.7573331516312104, "learning_rate": 3.841713151455931e-05, "loss": 0.1784, "step": 700 }, { "epoch": 0.15, "grad_norm": 0.7528941421925391, "learning_rate": 3.8411579247465845e-05, "loss": 0.2109, "step": 701 }, { "epoch": 0.15, "grad_norm": 0.7843007603501683, "learning_rate": 3.840601766215926e-05, "loss": 0.21, "step": 702 }, { "epoch": 0.15, "grad_norm": 0.8236345145931844, "learning_rate": 3.840044676145431e-05, "loss": 0.2033, "step": 703 }, { "epoch": 0.15, "grad_norm": 0.752640763991213, "learning_rate": 3.839486654817045e-05, "loss": 0.2086, "step": 704 }, { "epoch": 0.15, "grad_norm": 0.8262639276251157, "learning_rate": 3.838927702513187e-05, "loss": 0.2025, "step": 705 }, { "epoch": 0.16, "grad_norm": 0.7761474298353154, "learning_rate": 3.838367819516746e-05, "loss": 0.1692, "step": 706 }, { "epoch": 0.16, "grad_norm": 0.8734125314600135, "learning_rate": 3.837807006111082e-05, "loss": 0.2258, "step": 707 }, { "epoch": 0.16, "grad_norm": 0.7484943854071063, "learning_rate": 3.837245262580027e-05, "loss": 0.1844, "step": 708 }, { "epoch": 0.16, "grad_norm": 0.8271628494538433, "learning_rate": 3.836682589207882e-05, "loss": 0.1746, "step": 709 }, { "epoch": 0.16, "grad_norm": 0.8443352117983162, "learning_rate": 3.836118986279419e-05, "loss": 0.235, "step": 710 }, { "epoch": 0.16, "grad_norm": 0.8112915295285071, "learning_rate": 3.835554454079882e-05, "loss": 0.2069, "step": 711 }, { "epoch": 0.16, "grad_norm": 0.7067376891994592, "learning_rate": 3.834988992894983e-05, "loss": 0.1963, "step": 712 }, { "epoch": 0.16, "grad_norm": 0.7762592744503582, "learning_rate": 3.834422603010906e-05, "loss": 0.2214, "step": 713 }, { "epoch": 0.16, "grad_norm": 0.7241916326324417, "learning_rate": 3.833855284714305e-05, "loss": 0.2028, "step": 714 }, { "epoch": 0.16, "grad_norm": 0.8083965411167431, "learning_rate": 3.833287038292303e-05, "loss": 0.2359, "step": 715 }, { "epoch": 0.16, "grad_norm": 0.7589641747017328, "learning_rate": 3.832717864032492e-05, "loss": 0.1956, "step": 716 }, { "epoch": 0.16, "grad_norm": 0.7667360613003436, "learning_rate": 3.832147762222936e-05, "loss": 0.1972, "step": 717 }, { "epoch": 0.16, "grad_norm": 0.7712735817641144, "learning_rate": 3.8315767331521655e-05, "loss": 0.1818, "step": 718 }, { "epoch": 0.16, "grad_norm": 0.9010674834916976, "learning_rate": 3.831004777109183e-05, "loss": 0.2536, "step": 719 }, { "epoch": 0.16, "grad_norm": 0.8485400958054092, "learning_rate": 3.8304318943834584e-05, "loss": 0.2258, "step": 720 }, { "epoch": 0.16, "grad_norm": 0.7302916613026862, "learning_rate": 3.8298580852649316e-05, "loss": 0.2157, "step": 721 }, { "epoch": 0.16, "grad_norm": 0.7151270654118754, "learning_rate": 3.82928335004401e-05, "loss": 0.1962, "step": 722 }, { "epoch": 0.16, "grad_norm": 0.7447894614304839, "learning_rate": 3.828707689011572e-05, "loss": 0.2116, "step": 723 }, { "epoch": 0.16, "grad_norm": 0.7271553000291014, "learning_rate": 3.828131102458962e-05, "loss": 0.1966, "step": 724 }, { "epoch": 0.16, "grad_norm": 0.7429093314076324, "learning_rate": 3.827553590677996e-05, "loss": 0.1906, "step": 725 }, { "epoch": 0.16, "grad_norm": 0.716800949542447, "learning_rate": 3.8269751539609525e-05, "loss": 0.1946, "step": 726 }, { "epoch": 0.16, "grad_norm": 0.7666347229218324, "learning_rate": 3.8263957926005855e-05, "loss": 0.2524, "step": 727 }, { "epoch": 0.16, "grad_norm": 0.8161614155859114, "learning_rate": 3.825815506890111e-05, "loss": 0.2168, "step": 728 }, { "epoch": 0.16, "grad_norm": 0.6637236010874694, "learning_rate": 3.825234297123216e-05, "loss": 0.1729, "step": 729 }, { "epoch": 0.16, "grad_norm": 0.8388439526076809, "learning_rate": 3.824652163594056e-05, "loss": 0.2296, "step": 730 }, { "epoch": 0.16, "grad_norm": 0.8218573419682884, "learning_rate": 3.8240691065972486e-05, "loss": 0.2196, "step": 731 }, { "epoch": 0.16, "grad_norm": 0.8293467452318114, "learning_rate": 3.823485126427886e-05, "loss": 0.2345, "step": 732 }, { "epoch": 0.16, "grad_norm": 0.6922598919352861, "learning_rate": 3.822900223381522e-05, "loss": 0.1656, "step": 733 }, { "epoch": 0.16, "grad_norm": 0.8068375981982576, "learning_rate": 3.8223143977541806e-05, "loss": 0.2273, "step": 734 }, { "epoch": 0.16, "grad_norm": 0.7797871053978261, "learning_rate": 3.821727649842352e-05, "loss": 0.2594, "step": 735 }, { "epoch": 0.16, "grad_norm": 0.7766407226832587, "learning_rate": 3.821139979942992e-05, "loss": 0.1986, "step": 736 }, { "epoch": 0.16, "grad_norm": 0.8017956519170995, "learning_rate": 3.820551388353525e-05, "loss": 0.2132, "step": 737 }, { "epoch": 0.16, "grad_norm": 0.7925431937110463, "learning_rate": 3.819961875371839e-05, "loss": 0.2119, "step": 738 }, { "epoch": 0.16, "grad_norm": 0.7698235900481465, "learning_rate": 3.819371441296292e-05, "loss": 0.2025, "step": 739 }, { "epoch": 0.16, "grad_norm": 0.8134358225280137, "learning_rate": 3.8187800864257065e-05, "loss": 0.2063, "step": 740 }, { "epoch": 0.16, "grad_norm": 0.7621643328146783, "learning_rate": 3.818187811059369e-05, "loss": 0.1876, "step": 741 }, { "epoch": 0.16, "grad_norm": 0.9041169813600348, "learning_rate": 3.817594615497035e-05, "loss": 0.2496, "step": 742 }, { "epoch": 0.16, "grad_norm": 0.7800227635069699, "learning_rate": 3.817000500038924e-05, "loss": 0.2249, "step": 743 }, { "epoch": 0.16, "grad_norm": 0.7763990863508718, "learning_rate": 3.8164054649857206e-05, "loss": 0.204, "step": 744 }, { "epoch": 0.16, "grad_norm": 0.6792244923631663, "learning_rate": 3.815809510638578e-05, "loss": 0.1702, "step": 745 }, { "epoch": 0.16, "grad_norm": 0.8416913667837724, "learning_rate": 3.81521263729911e-05, "loss": 0.2529, "step": 746 }, { "epoch": 0.16, "grad_norm": 0.6874297279510242, "learning_rate": 3.8146148452694e-05, "loss": 0.182, "step": 747 }, { "epoch": 0.16, "grad_norm": 0.7552469616361313, "learning_rate": 3.8140161348519924e-05, "loss": 0.238, "step": 748 }, { "epoch": 0.16, "grad_norm": 0.7362886210805616, "learning_rate": 3.813416506349899e-05, "loss": 0.1778, "step": 749 }, { "epoch": 0.16, "grad_norm": 0.6942190723817684, "learning_rate": 3.8128159600665954e-05, "loss": 0.184, "step": 750 }, { "epoch": 0.16, "grad_norm": 0.7012048145090823, "learning_rate": 3.812214496306022e-05, "loss": 0.1744, "step": 751 }, { "epoch": 0.17, "grad_norm": 0.7449986674789698, "learning_rate": 3.8116121153725824e-05, "loss": 0.1862, "step": 752 }, { "epoch": 0.17, "grad_norm": 0.7682776915316732, "learning_rate": 3.8110088175711456e-05, "loss": 0.2081, "step": 753 }, { "epoch": 0.17, "grad_norm": 0.7913974923405264, "learning_rate": 3.810404603207045e-05, "loss": 0.2172, "step": 754 }, { "epoch": 0.17, "grad_norm": 0.9037616043612097, "learning_rate": 3.809799472586077e-05, "loss": 0.2525, "step": 755 }, { "epoch": 0.17, "grad_norm": 0.9625775987134346, "learning_rate": 3.809193426014501e-05, "loss": 0.2522, "step": 756 }, { "epoch": 0.17, "grad_norm": 0.7113503315961358, "learning_rate": 3.808586463799042e-05, "loss": 0.1654, "step": 757 }, { "epoch": 0.17, "grad_norm": 0.748331040872349, "learning_rate": 3.807978586246887e-05, "loss": 0.1693, "step": 758 }, { "epoch": 0.17, "grad_norm": 0.6747668375753214, "learning_rate": 3.8073697936656866e-05, "loss": 0.1564, "step": 759 }, { "epoch": 0.17, "grad_norm": 0.7857764152943993, "learning_rate": 3.806760086363554e-05, "loss": 0.2224, "step": 760 }, { "epoch": 0.17, "grad_norm": 0.8058116800725232, "learning_rate": 3.806149464649066e-05, "loss": 0.2057, "step": 761 }, { "epoch": 0.17, "grad_norm": 0.7572603869023338, "learning_rate": 3.8055379288312625e-05, "loss": 0.1831, "step": 762 }, { "epoch": 0.17, "grad_norm": 0.8191501193026167, "learning_rate": 3.8049254792196443e-05, "loss": 0.1991, "step": 763 }, { "epoch": 0.17, "grad_norm": 0.7519661245832501, "learning_rate": 3.804312116124177e-05, "loss": 0.2015, "step": 764 }, { "epoch": 0.17, "grad_norm": 0.8708368444295916, "learning_rate": 3.8036978398552876e-05, "loss": 0.2625, "step": 765 }, { "epoch": 0.17, "grad_norm": 0.6962016132327279, "learning_rate": 3.803082650723864e-05, "loss": 0.1752, "step": 766 }, { "epoch": 0.17, "grad_norm": 0.7236730484580767, "learning_rate": 3.802466549041258e-05, "loss": 0.2141, "step": 767 }, { "epoch": 0.17, "grad_norm": 0.7697581619865724, "learning_rate": 3.8018495351192825e-05, "loss": 0.2054, "step": 768 }, { "epoch": 0.17, "grad_norm": 0.769659506086983, "learning_rate": 3.801231609270212e-05, "loss": 0.203, "step": 769 }, { "epoch": 0.17, "grad_norm": 0.6636546733887948, "learning_rate": 3.800612771806781e-05, "loss": 0.1446, "step": 770 }, { "epoch": 0.17, "grad_norm": 0.6869472535775166, "learning_rate": 3.79999302304219e-05, "loss": 0.1652, "step": 771 }, { "epoch": 0.17, "grad_norm": 0.7367351024507836, "learning_rate": 3.799372363290095e-05, "loss": 0.1737, "step": 772 }, { "epoch": 0.17, "grad_norm": 0.7346502964947343, "learning_rate": 3.798750792864617e-05, "loss": 0.1551, "step": 773 }, { "epoch": 0.17, "grad_norm": 0.8866153270215831, "learning_rate": 3.798128312080336e-05, "loss": 0.2897, "step": 774 }, { "epoch": 0.17, "grad_norm": 0.7273975618740585, "learning_rate": 3.7975049212522934e-05, "loss": 0.1959, "step": 775 }, { "epoch": 0.17, "grad_norm": 0.7774320772682493, "learning_rate": 3.7968806206959915e-05, "loss": 0.1848, "step": 776 }, { "epoch": 0.17, "grad_norm": 0.8070108520057053, "learning_rate": 3.7962554107273926e-05, "loss": 0.2094, "step": 777 }, { "epoch": 0.17, "grad_norm": 0.7002195516351798, "learning_rate": 3.795629291662919e-05, "loss": 0.2065, "step": 778 }, { "epoch": 0.17, "grad_norm": 0.7795505349867997, "learning_rate": 3.795002263819453e-05, "loss": 0.1989, "step": 779 }, { "epoch": 0.17, "grad_norm": 0.7985563678683656, "learning_rate": 3.7943743275143384e-05, "loss": 0.2017, "step": 780 }, { "epoch": 0.17, "grad_norm": 0.8227484003895893, "learning_rate": 3.793745483065377e-05, "loss": 0.2073, "step": 781 }, { "epoch": 0.17, "grad_norm": 0.7756298858710011, "learning_rate": 3.7931157307908304e-05, "loss": 0.1646, "step": 782 }, { "epoch": 0.17, "grad_norm": 0.7802675554915577, "learning_rate": 3.792485071009421e-05, "loss": 0.2172, "step": 783 }, { "epoch": 0.17, "grad_norm": 0.7938318675917536, "learning_rate": 3.7918535040403284e-05, "loss": 0.2116, "step": 784 }, { "epoch": 0.17, "grad_norm": 0.788083214737397, "learning_rate": 3.791221030203193e-05, "loss": 0.1677, "step": 785 }, { "epoch": 0.17, "grad_norm": 0.8406960709035459, "learning_rate": 3.790587649818115e-05, "loss": 0.2083, "step": 786 }, { "epoch": 0.17, "grad_norm": 0.8092112371843942, "learning_rate": 3.78995336320565e-05, "loss": 0.2049, "step": 787 }, { "epoch": 0.17, "grad_norm": 0.7295052196559106, "learning_rate": 3.789318170686816e-05, "loss": 0.2061, "step": 788 }, { "epoch": 0.17, "grad_norm": 0.8338696939204928, "learning_rate": 3.788682072583087e-05, "loss": 0.206, "step": 789 }, { "epoch": 0.17, "grad_norm": 0.8162035530743967, "learning_rate": 3.788045069216396e-05, "loss": 0.2347, "step": 790 }, { "epoch": 0.17, "grad_norm": 0.7600016092609635, "learning_rate": 3.787407160909134e-05, "loss": 0.2107, "step": 791 }, { "epoch": 0.17, "grad_norm": 0.7609946305841209, "learning_rate": 3.786768347984152e-05, "loss": 0.1974, "step": 792 }, { "epoch": 0.17, "grad_norm": 0.735952873636344, "learning_rate": 3.7861286307647555e-05, "loss": 0.1909, "step": 793 }, { "epoch": 0.17, "grad_norm": 0.6722596642043193, "learning_rate": 3.78548800957471e-05, "loss": 0.1568, "step": 794 }, { "epoch": 0.17, "grad_norm": 0.6333915980625863, "learning_rate": 3.7848464847382376e-05, "loss": 0.1725, "step": 795 }, { "epoch": 0.17, "grad_norm": 0.796500772603936, "learning_rate": 3.7842040565800184e-05, "loss": 0.1909, "step": 796 }, { "epoch": 0.18, "grad_norm": 0.7852543631975609, "learning_rate": 3.783560725425188e-05, "loss": 0.1969, "step": 797 }, { "epoch": 0.18, "grad_norm": 0.7451596629183987, "learning_rate": 3.782916491599341e-05, "loss": 0.1965, "step": 798 }, { "epoch": 0.18, "grad_norm": 0.6560391132655947, "learning_rate": 3.782271355428529e-05, "loss": 0.1756, "step": 799 }, { "epoch": 0.18, "grad_norm": 0.859883106182944, "learning_rate": 3.781625317239258e-05, "loss": 0.2243, "step": 800 }, { "epoch": 0.18, "grad_norm": 0.7195191207343877, "learning_rate": 3.780978377358493e-05, "loss": 0.153, "step": 801 }, { "epoch": 0.18, "grad_norm": 0.6798048904594878, "learning_rate": 3.7803305361136534e-05, "loss": 0.1773, "step": 802 }, { "epoch": 0.18, "grad_norm": 0.7079466905190117, "learning_rate": 3.7796817938326155e-05, "loss": 0.1877, "step": 803 }, { "epoch": 0.18, "grad_norm": 0.7535864591552314, "learning_rate": 3.7790321508437124e-05, "loss": 0.1981, "step": 804 }, { "epoch": 0.18, "grad_norm": 0.7119576397655025, "learning_rate": 3.778381607475732e-05, "loss": 0.1625, "step": 805 }, { "epoch": 0.18, "grad_norm": 0.8590587033895257, "learning_rate": 3.777730164057919e-05, "loss": 0.2357, "step": 806 }, { "epoch": 0.18, "grad_norm": 0.6929512219539972, "learning_rate": 3.777077820919972e-05, "loss": 0.1484, "step": 807 }, { "epoch": 0.18, "grad_norm": 0.7499874829201456, "learning_rate": 3.776424578392045e-05, "loss": 0.2245, "step": 808 }, { "epoch": 0.18, "grad_norm": 0.7393534307485781, "learning_rate": 3.775770436804751e-05, "loss": 0.1982, "step": 809 }, { "epoch": 0.18, "grad_norm": 0.7135289926633145, "learning_rate": 3.775115396489153e-05, "loss": 0.1488, "step": 810 }, { "epoch": 0.18, "grad_norm": 0.7734600025332912, "learning_rate": 3.77445945777677e-05, "loss": 0.1963, "step": 811 }, { "epoch": 0.18, "grad_norm": 0.710812400458968, "learning_rate": 3.773802620999579e-05, "loss": 0.1591, "step": 812 }, { "epoch": 0.18, "grad_norm": 0.7657793629017298, "learning_rate": 3.773144886490007e-05, "loss": 0.1688, "step": 813 }, { "epoch": 0.18, "grad_norm": 0.811511355240138, "learning_rate": 3.7724862545809394e-05, "loss": 0.1988, "step": 814 }, { "epoch": 0.18, "grad_norm": 0.7990316490808798, "learning_rate": 3.771826725605713e-05, "loss": 0.192, "step": 815 }, { "epoch": 0.18, "grad_norm": 0.6723617298825829, "learning_rate": 3.771166299898118e-05, "loss": 0.169, "step": 816 }, { "epoch": 0.18, "grad_norm": 0.7521643403897306, "learning_rate": 3.770504977792402e-05, "loss": 0.1875, "step": 817 }, { "epoch": 0.18, "grad_norm": 0.7114388930055676, "learning_rate": 3.7698427596232636e-05, "loss": 0.1927, "step": 818 }, { "epoch": 0.18, "grad_norm": 0.7085657907035885, "learning_rate": 3.7691796457258546e-05, "loss": 0.1811, "step": 819 }, { "epoch": 0.18, "grad_norm": 0.7836401046171007, "learning_rate": 3.7685156364357825e-05, "loss": 0.1845, "step": 820 }, { "epoch": 0.18, "grad_norm": 0.7266652024397258, "learning_rate": 3.767850732089105e-05, "loss": 0.2094, "step": 821 }, { "epoch": 0.18, "grad_norm": 0.8105800759742975, "learning_rate": 3.7671849330223345e-05, "loss": 0.199, "step": 822 }, { "epoch": 0.18, "grad_norm": 0.7686728710156864, "learning_rate": 3.766518239572437e-05, "loss": 0.2106, "step": 823 }, { "epoch": 0.18, "grad_norm": 0.7493906724832401, "learning_rate": 3.76585065207683e-05, "loss": 0.2183, "step": 824 }, { "epoch": 0.18, "grad_norm": 0.6940159657288101, "learning_rate": 3.765182170873383e-05, "loss": 0.1491, "step": 825 }, { "epoch": 0.18, "grad_norm": 0.6327228718145429, "learning_rate": 3.7645127963004176e-05, "loss": 0.129, "step": 826 }, { "epoch": 0.18, "grad_norm": 0.7669510937458179, "learning_rate": 3.76384252869671e-05, "loss": 0.191, "step": 827 }, { "epoch": 0.18, "grad_norm": 0.757024453588316, "learning_rate": 3.7631713684014866e-05, "loss": 0.2127, "step": 828 }, { "epoch": 0.18, "grad_norm": 0.7048436247771179, "learning_rate": 3.7624993157544246e-05, "loss": 0.1713, "step": 829 }, { "epoch": 0.18, "grad_norm": 0.8488451798433339, "learning_rate": 3.761826371095655e-05, "loss": 0.2248, "step": 830 }, { "epoch": 0.18, "grad_norm": 0.6127937468869562, "learning_rate": 3.7611525347657584e-05, "loss": 0.1252, "step": 831 }, { "epoch": 0.18, "grad_norm": 0.6806426735887422, "learning_rate": 3.7604778071057685e-05, "loss": 0.1563, "step": 832 }, { "epoch": 0.18, "grad_norm": 0.6743973690986225, "learning_rate": 3.759802188457168e-05, "loss": 0.1774, "step": 833 }, { "epoch": 0.18, "grad_norm": 0.7603683630524931, "learning_rate": 3.759125679161893e-05, "loss": 0.236, "step": 834 }, { "epoch": 0.18, "grad_norm": 0.6793108367801289, "learning_rate": 3.758448279562327e-05, "loss": 0.1412, "step": 835 }, { "epoch": 0.18, "grad_norm": 0.6519421923379566, "learning_rate": 3.757769990001308e-05, "loss": 0.1167, "step": 836 }, { "epoch": 0.18, "grad_norm": 0.7396227239586961, "learning_rate": 3.757090810822122e-05, "loss": 0.1992, "step": 837 }, { "epoch": 0.18, "grad_norm": 0.7742992955873289, "learning_rate": 3.756410742368505e-05, "loss": 0.204, "step": 838 }, { "epoch": 0.18, "grad_norm": 0.7887451364575835, "learning_rate": 3.7557297849846444e-05, "loss": 0.1708, "step": 839 }, { "epoch": 0.18, "grad_norm": 0.7601953776719075, "learning_rate": 3.7550479390151766e-05, "loss": 0.169, "step": 840 }, { "epoch": 0.18, "grad_norm": 0.6972835762854639, "learning_rate": 3.754365204805189e-05, "loss": 0.1534, "step": 841 }, { "epoch": 0.18, "grad_norm": 0.7799931143187769, "learning_rate": 3.753681582700216e-05, "loss": 0.1763, "step": 842 }, { "epoch": 0.19, "grad_norm": 0.7701780437869349, "learning_rate": 3.752997073046244e-05, "loss": 0.1606, "step": 843 }, { "epoch": 0.19, "grad_norm": 0.8026366534002741, "learning_rate": 3.752311676189708e-05, "loss": 0.1872, "step": 844 }, { "epoch": 0.19, "grad_norm": 0.8119168389561734, "learning_rate": 3.75162539247749e-05, "loss": 0.2032, "step": 845 }, { "epoch": 0.19, "grad_norm": 0.6718226124056262, "learning_rate": 3.750938222256924e-05, "loss": 0.1569, "step": 846 }, { "epoch": 0.19, "grad_norm": 0.7637510136724011, "learning_rate": 3.75025016587579e-05, "loss": 0.2004, "step": 847 }, { "epoch": 0.19, "grad_norm": 0.6415349498989384, "learning_rate": 3.7495612236823175e-05, "loss": 0.1509, "step": 848 }, { "epoch": 0.19, "grad_norm": 0.8286716528290092, "learning_rate": 3.7488713960251845e-05, "loss": 0.1616, "step": 849 }, { "epoch": 0.19, "grad_norm": 0.6569549543464369, "learning_rate": 3.748180683253518e-05, "loss": 0.1562, "step": 850 }, { "epoch": 0.19, "grad_norm": 0.7173550378920462, "learning_rate": 3.747489085716891e-05, "loss": 0.1558, "step": 851 }, { "epoch": 0.19, "grad_norm": 0.7504036135246938, "learning_rate": 3.746796603765325e-05, "loss": 0.1769, "step": 852 }, { "epoch": 0.19, "grad_norm": 0.6710972165250231, "learning_rate": 3.7461032377492905e-05, "loss": 0.1642, "step": 853 }, { "epoch": 0.19, "grad_norm": 0.7352966450199, "learning_rate": 3.745408988019703e-05, "loss": 0.1819, "step": 854 }, { "epoch": 0.19, "grad_norm": 0.7553549569396881, "learning_rate": 3.744713854927928e-05, "loss": 0.1826, "step": 855 }, { "epoch": 0.19, "grad_norm": 0.8046090541490418, "learning_rate": 3.7440178388257746e-05, "loss": 0.2045, "step": 856 }, { "epoch": 0.19, "grad_norm": 0.7820578784963078, "learning_rate": 3.743320940065503e-05, "loss": 0.1769, "step": 857 }, { "epoch": 0.19, "grad_norm": 0.6971775715256504, "learning_rate": 3.7426231589998166e-05, "loss": 0.1782, "step": 858 }, { "epoch": 0.19, "grad_norm": 0.782002353801258, "learning_rate": 3.741924495981867e-05, "loss": 0.2065, "step": 859 }, { "epoch": 0.19, "grad_norm": 0.6984916614807525, "learning_rate": 3.741224951365251e-05, "loss": 0.1677, "step": 860 }, { "epoch": 0.19, "grad_norm": 0.6849913343173801, "learning_rate": 3.740524525504014e-05, "loss": 0.156, "step": 861 }, { "epoch": 0.19, "grad_norm": 0.6846778338963099, "learning_rate": 3.739823218752645e-05, "loss": 0.1699, "step": 862 }, { "epoch": 0.19, "grad_norm": 0.7003902445939452, "learning_rate": 3.7391210314660796e-05, "loss": 0.1621, "step": 863 }, { "epoch": 0.19, "grad_norm": 0.6867990822182168, "learning_rate": 3.7384179639996997e-05, "loss": 0.1499, "step": 864 }, { "epoch": 0.19, "grad_norm": 0.644974619639601, "learning_rate": 3.7377140167093316e-05, "loss": 0.1218, "step": 865 }, { "epoch": 0.19, "grad_norm": 0.783606332695021, "learning_rate": 3.7370091899512464e-05, "loss": 0.1907, "step": 866 }, { "epoch": 0.19, "grad_norm": 0.6615394006948083, "learning_rate": 3.736303484082163e-05, "loss": 0.1477, "step": 867 }, { "epoch": 0.19, "grad_norm": 0.6937158662114102, "learning_rate": 3.7355968994592414e-05, "loss": 0.1625, "step": 868 }, { "epoch": 0.19, "grad_norm": 0.7429604370422286, "learning_rate": 3.7348894364400914e-05, "loss": 0.1805, "step": 869 }, { "epoch": 0.19, "grad_norm": 0.7475581950878672, "learning_rate": 3.734181095382761e-05, "loss": 0.204, "step": 870 }, { "epoch": 0.19, "grad_norm": 0.6430529894982431, "learning_rate": 3.733471876645749e-05, "loss": 0.1358, "step": 871 }, { "epoch": 0.19, "grad_norm": 0.7923755023892722, "learning_rate": 3.732761780587993e-05, "loss": 0.1778, "step": 872 }, { "epoch": 0.19, "grad_norm": 0.7170539000161786, "learning_rate": 3.732050807568878e-05, "loss": 0.1676, "step": 873 }, { "epoch": 0.19, "grad_norm": 0.6665991411184689, "learning_rate": 3.7313389579482315e-05, "loss": 0.1833, "step": 874 }, { "epoch": 0.19, "grad_norm": 0.6874944513270407, "learning_rate": 3.7306262320863245e-05, "loss": 0.161, "step": 875 }, { "epoch": 0.19, "grad_norm": 0.6105940436302919, "learning_rate": 3.729912630343874e-05, "loss": 0.1261, "step": 876 }, { "epoch": 0.19, "grad_norm": 0.6619533928976752, "learning_rate": 3.729198153082036e-05, "loss": 0.1569, "step": 877 }, { "epoch": 0.19, "grad_norm": 0.569243487381569, "learning_rate": 3.7284828006624125e-05, "loss": 0.1229, "step": 878 }, { "epoch": 0.19, "grad_norm": 0.7142199102812501, "learning_rate": 3.7277665734470476e-05, "loss": 0.1871, "step": 879 }, { "epoch": 0.19, "grad_norm": 0.629269678533343, "learning_rate": 3.727049471798429e-05, "loss": 0.1472, "step": 880 }, { "epoch": 0.19, "grad_norm": 0.654901322109771, "learning_rate": 3.726331496079486e-05, "loss": 0.149, "step": 881 }, { "epoch": 0.19, "grad_norm": 0.6841025284739801, "learning_rate": 3.7256126466535896e-05, "loss": 0.1534, "step": 882 }, { "epoch": 0.19, "grad_norm": 0.7865678235231162, "learning_rate": 3.724892923884555e-05, "loss": 0.2295, "step": 883 }, { "epoch": 0.19, "grad_norm": 0.7841110484485813, "learning_rate": 3.724172328136638e-05, "loss": 0.2037, "step": 884 }, { "epoch": 0.19, "grad_norm": 0.6599561399278121, "learning_rate": 3.723450859774536e-05, "loss": 0.1311, "step": 885 }, { "epoch": 0.19, "grad_norm": 0.6987130451495835, "learning_rate": 3.7227285191633894e-05, "loss": 0.1495, "step": 886 }, { "epoch": 0.19, "grad_norm": 0.7402887248919082, "learning_rate": 3.722005306668778e-05, "loss": 0.1579, "step": 887 }, { "epoch": 0.2, "grad_norm": 0.6984442998567899, "learning_rate": 3.721281222656725e-05, "loss": 0.1619, "step": 888 }, { "epoch": 0.2, "grad_norm": 0.7065217305585293, "learning_rate": 3.7205562674936945e-05, "loss": 0.1709, "step": 889 }, { "epoch": 0.2, "grad_norm": 0.732226984161801, "learning_rate": 3.719830441546589e-05, "loss": 0.1782, "step": 890 }, { "epoch": 0.2, "grad_norm": 0.6858639758075553, "learning_rate": 3.7191037451827545e-05, "loss": 0.1719, "step": 891 }, { "epoch": 0.2, "grad_norm": 0.7299863476978986, "learning_rate": 3.718376178769976e-05, "loss": 0.2216, "step": 892 }, { "epoch": 0.2, "grad_norm": 0.8304332736342658, "learning_rate": 3.71764774267648e-05, "loss": 0.2096, "step": 893 }, { "epoch": 0.2, "grad_norm": 0.7169787516591115, "learning_rate": 3.716918437270932e-05, "loss": 0.1684, "step": 894 }, { "epoch": 0.2, "grad_norm": 0.7166803665478112, "learning_rate": 3.7161882629224386e-05, "loss": 0.1607, "step": 895 }, { "epoch": 0.2, "grad_norm": 0.724778723656452, "learning_rate": 3.7154572200005446e-05, "loss": 0.1759, "step": 896 }, { "epoch": 0.2, "grad_norm": 0.7554750870258391, "learning_rate": 3.714725308875236e-05, "loss": 0.2013, "step": 897 }, { "epoch": 0.2, "grad_norm": 0.7150835979381013, "learning_rate": 3.713992529916936e-05, "loss": 0.1539, "step": 898 }, { "epoch": 0.2, "grad_norm": 0.6787440010634678, "learning_rate": 3.7132588834965104e-05, "loss": 0.1945, "step": 899 }, { "epoch": 0.2, "grad_norm": 0.6542829825221722, "learning_rate": 3.712524369985262e-05, "loss": 0.1392, "step": 900 }, { "epoch": 0.2, "grad_norm": 0.665774640200682, "learning_rate": 3.711788989754931e-05, "loss": 0.1615, "step": 901 }, { "epoch": 0.2, "grad_norm": 0.7030846690161123, "learning_rate": 3.711052743177699e-05, "loss": 0.1855, "step": 902 }, { "epoch": 0.2, "grad_norm": 0.7936334689648732, "learning_rate": 3.710315630626185e-05, "loss": 0.2092, "step": 903 }, { "epoch": 0.2, "grad_norm": 0.6753815367370155, "learning_rate": 3.7095776524734464e-05, "loss": 0.1527, "step": 904 }, { "epoch": 0.2, "grad_norm": 0.7084634511777324, "learning_rate": 3.7088388090929776e-05, "loss": 0.1491, "step": 905 }, { "epoch": 0.2, "grad_norm": 0.7112534208835603, "learning_rate": 3.708099100858712e-05, "loss": 0.1755, "step": 906 }, { "epoch": 0.2, "grad_norm": 0.6211746451040592, "learning_rate": 3.7073585281450206e-05, "loss": 0.1484, "step": 907 }, { "epoch": 0.2, "grad_norm": 0.6404982883466234, "learning_rate": 3.706617091326712e-05, "loss": 0.1445, "step": 908 }, { "epoch": 0.2, "grad_norm": 0.7272965199740121, "learning_rate": 3.705874790779032e-05, "loss": 0.1616, "step": 909 }, { "epoch": 0.2, "grad_norm": 0.7658933441744524, "learning_rate": 3.705131626877664e-05, "loss": 0.2129, "step": 910 }, { "epoch": 0.2, "grad_norm": 0.6850082803142199, "learning_rate": 3.7043875999987254e-05, "loss": 0.1677, "step": 911 }, { "epoch": 0.2, "grad_norm": 0.6362997911677422, "learning_rate": 3.7036427105187754e-05, "loss": 0.1608, "step": 912 }, { "epoch": 0.2, "grad_norm": 0.6987606444597272, "learning_rate": 3.7028969588148056e-05, "loss": 0.1946, "step": 913 }, { "epoch": 0.2, "grad_norm": 0.7026652210384798, "learning_rate": 3.702150345264247e-05, "loss": 0.1781, "step": 914 }, { "epoch": 0.2, "grad_norm": 0.7209204369399616, "learning_rate": 3.701402870244963e-05, "loss": 0.1754, "step": 915 }, { "epoch": 0.2, "grad_norm": 0.727384123209215, "learning_rate": 3.700654534135257e-05, "loss": 0.1557, "step": 916 }, { "epoch": 0.2, "grad_norm": 0.6332642391018083, "learning_rate": 3.699905337313866e-05, "loss": 0.1417, "step": 917 }, { "epoch": 0.2, "grad_norm": 0.6780052852874722, "learning_rate": 3.699155280159964e-05, "loss": 0.1562, "step": 918 }, { "epoch": 0.2, "grad_norm": 0.7154214327932445, "learning_rate": 3.698404363053158e-05, "loss": 0.1461, "step": 919 }, { "epoch": 0.2, "grad_norm": 0.700188076774634, "learning_rate": 3.697652586373493e-05, "loss": 0.1707, "step": 920 }, { "epoch": 0.2, "grad_norm": 0.7230025827617601, "learning_rate": 3.696899950501447e-05, "loss": 0.1257, "step": 921 }, { "epoch": 0.2, "grad_norm": 0.778229060740858, "learning_rate": 3.6961464558179333e-05, "loss": 0.1885, "step": 922 }, { "epoch": 0.2, "grad_norm": 0.7561932991777188, "learning_rate": 3.695392102704302e-05, "loss": 0.171, "step": 923 }, { "epoch": 0.2, "grad_norm": 0.7260621922208205, "learning_rate": 3.694636891542334e-05, "loss": 0.1571, "step": 924 }, { "epoch": 0.2, "grad_norm": 0.7317835353052108, "learning_rate": 3.693880822714247e-05, "loss": 0.1773, "step": 925 }, { "epoch": 0.2, "grad_norm": 0.6661179092038415, "learning_rate": 3.693123896602692e-05, "loss": 0.1748, "step": 926 }, { "epoch": 0.2, "grad_norm": 0.6743277867135127, "learning_rate": 3.692366113590754e-05, "loss": 0.1851, "step": 927 }, { "epoch": 0.2, "grad_norm": 0.6938020824093218, "learning_rate": 3.691607474061951e-05, "loss": 0.164, "step": 928 }, { "epoch": 0.2, "grad_norm": 0.6384162741338156, "learning_rate": 3.690847978400236e-05, "loss": 0.1469, "step": 929 }, { "epoch": 0.2, "grad_norm": 0.6410681192219297, "learning_rate": 3.690087626989994e-05, "loss": 0.1861, "step": 930 }, { "epoch": 0.2, "grad_norm": 0.5976785393840714, "learning_rate": 3.689326420216044e-05, "loss": 0.1303, "step": 931 }, { "epoch": 0.2, "grad_norm": 0.6056843527575427, "learning_rate": 3.6885643584636366e-05, "loss": 0.1374, "step": 932 }, { "epoch": 0.2, "grad_norm": 0.6691852000232525, "learning_rate": 3.6878014421184565e-05, "loss": 0.1767, "step": 933 }, { "epoch": 0.21, "grad_norm": 0.6218312041996754, "learning_rate": 3.68703767156662e-05, "loss": 0.1642, "step": 934 }, { "epoch": 0.21, "grad_norm": 0.6618795080274562, "learning_rate": 3.6862730471946766e-05, "loss": 0.1342, "step": 935 }, { "epoch": 0.21, "grad_norm": 0.6863753483288044, "learning_rate": 3.685507569389606e-05, "loss": 0.1342, "step": 936 }, { "epoch": 0.21, "grad_norm": 0.6551060924803995, "learning_rate": 3.6847412385388236e-05, "loss": 0.1366, "step": 937 }, { "epoch": 0.21, "grad_norm": 0.7465156840052052, "learning_rate": 3.683974055030172e-05, "loss": 0.1627, "step": 938 }, { "epoch": 0.21, "grad_norm": 0.6625105281209221, "learning_rate": 3.6832060192519286e-05, "loss": 0.1526, "step": 939 }, { "epoch": 0.21, "grad_norm": 0.7086275742437467, "learning_rate": 3.6824371315928e-05, "loss": 0.1485, "step": 940 }, { "epoch": 0.21, "grad_norm": 0.7417666798211808, "learning_rate": 3.681667392441926e-05, "loss": 0.1433, "step": 941 }, { "epoch": 0.21, "grad_norm": 0.7386253806277999, "learning_rate": 3.680896802188876e-05, "loss": 0.1707, "step": 942 }, { "epoch": 0.21, "grad_norm": 0.7037793611394684, "learning_rate": 3.6801253612236506e-05, "loss": 0.1838, "step": 943 }, { "epoch": 0.21, "grad_norm": 0.6457543991417521, "learning_rate": 3.679353069936681e-05, "loss": 0.1514, "step": 944 }, { "epoch": 0.21, "grad_norm": 0.7350562567817532, "learning_rate": 3.678579928718827e-05, "loss": 0.1374, "step": 945 }, { "epoch": 0.21, "grad_norm": 0.6461300988449086, "learning_rate": 3.6778059379613815e-05, "loss": 0.1402, "step": 946 }, { "epoch": 0.21, "grad_norm": 0.6394658184714149, "learning_rate": 3.6770310980560654e-05, "loss": 0.1477, "step": 947 }, { "epoch": 0.21, "grad_norm": 0.7594333893105324, "learning_rate": 3.676255409395031e-05, "loss": 0.2031, "step": 948 }, { "epoch": 0.21, "grad_norm": 0.7019386880161659, "learning_rate": 3.675478872370858e-05, "loss": 0.1665, "step": 949 }, { "epoch": 0.21, "grad_norm": 0.6626999283463161, "learning_rate": 3.674701487376557e-05, "loss": 0.1439, "step": 950 }, { "epoch": 0.21, "grad_norm": 0.6551620456940492, "learning_rate": 3.673923254805566e-05, "loss": 0.1425, "step": 951 }, { "epoch": 0.21, "grad_norm": 0.63563867244813, "learning_rate": 3.6731441750517566e-05, "loss": 0.1256, "step": 952 }, { "epoch": 0.21, "grad_norm": 0.6957176828119622, "learning_rate": 3.672364248509422e-05, "loss": 0.147, "step": 953 }, { "epoch": 0.21, "grad_norm": 0.7562490242565576, "learning_rate": 3.67158347557329e-05, "loss": 0.2017, "step": 954 }, { "epoch": 0.21, "grad_norm": 0.6225715905123154, "learning_rate": 3.670801856638514e-05, "loss": 0.1394, "step": 955 }, { "epoch": 0.21, "grad_norm": 0.678394908191864, "learning_rate": 3.6700193921006766e-05, "loss": 0.1524, "step": 956 }, { "epoch": 0.21, "grad_norm": 0.7398774587490798, "learning_rate": 3.669236082355787e-05, "loss": 0.1792, "step": 957 }, { "epoch": 0.21, "grad_norm": 0.6206214596588789, "learning_rate": 3.668451927800283e-05, "loss": 0.1363, "step": 958 }, { "epoch": 0.21, "grad_norm": 0.6027596192132293, "learning_rate": 3.667666928831032e-05, "loss": 0.1196, "step": 959 }, { "epoch": 0.21, "grad_norm": 0.7351357598038659, "learning_rate": 3.666881085845324e-05, "loss": 0.2001, "step": 960 }, { "epoch": 0.21, "grad_norm": 0.5988897173734646, "learning_rate": 3.6660943992408817e-05, "loss": 0.1227, "step": 961 }, { "epoch": 0.21, "grad_norm": 0.6458349279158401, "learning_rate": 3.66530686941585e-05, "loss": 0.1292, "step": 962 }, { "epoch": 0.21, "grad_norm": 0.7829938118934919, "learning_rate": 3.664518496768802e-05, "loss": 0.1853, "step": 963 }, { "epoch": 0.21, "grad_norm": 0.6575365851561895, "learning_rate": 3.663729281698741e-05, "loss": 0.1553, "step": 964 }, { "epoch": 0.21, "grad_norm": 0.6642437855720914, "learning_rate": 3.662939224605091e-05, "loss": 0.142, "step": 965 }, { "epoch": 0.21, "grad_norm": 0.6479234151294552, "learning_rate": 3.6621483258877055e-05, "loss": 0.1318, "step": 966 }, { "epoch": 0.21, "grad_norm": 0.7309571127932903, "learning_rate": 3.6613565859468626e-05, "loss": 0.1765, "step": 967 }, { "epoch": 0.21, "grad_norm": 0.7002418747321221, "learning_rate": 3.660564005183268e-05, "loss": 0.179, "step": 968 }, { "epoch": 0.21, "grad_norm": 0.7563400514440753, "learning_rate": 3.659770583998051e-05, "loss": 0.1672, "step": 969 }, { "epoch": 0.21, "grad_norm": 0.77837280709059, "learning_rate": 3.658976322792766e-05, "loss": 0.2449, "step": 970 }, { "epoch": 0.21, "grad_norm": 0.6816422864134033, "learning_rate": 3.658181221969395e-05, "loss": 0.1447, "step": 971 }, { "epoch": 0.21, "grad_norm": 0.6553011346867443, "learning_rate": 3.657385281930343e-05, "loss": 0.1439, "step": 972 }, { "epoch": 0.21, "grad_norm": 0.5941559257267682, "learning_rate": 3.65658850307844e-05, "loss": 0.1584, "step": 973 }, { "epoch": 0.21, "grad_norm": 0.6940833811429217, "learning_rate": 3.65579088581694e-05, "loss": 0.1608, "step": 974 }, { "epoch": 0.21, "grad_norm": 0.6568072768488372, "learning_rate": 3.6549924305495225e-05, "loss": 0.1553, "step": 975 }, { "epoch": 0.21, "grad_norm": 0.7325600048784592, "learning_rate": 3.6541931376802906e-05, "loss": 0.1493, "step": 976 }, { "epoch": 0.21, "grad_norm": 0.5516350533636646, "learning_rate": 3.653393007613771e-05, "loss": 0.1195, "step": 977 }, { "epoch": 0.21, "grad_norm": 0.7327823413100717, "learning_rate": 3.652592040754917e-05, "loss": 0.1737, "step": 978 }, { "epoch": 0.22, "grad_norm": 0.6378932955160965, "learning_rate": 3.651790237509098e-05, "loss": 0.1621, "step": 979 }, { "epoch": 0.22, "grad_norm": 0.6945962967680264, "learning_rate": 3.650987598282116e-05, "loss": 0.1377, "step": 980 }, { "epoch": 0.22, "grad_norm": 0.6427290487016571, "learning_rate": 3.6501841234801886e-05, "loss": 0.1622, "step": 981 }, { "epoch": 0.22, "grad_norm": 0.6658021914902404, "learning_rate": 3.649379813509961e-05, "loss": 0.1562, "step": 982 }, { "epoch": 0.22, "grad_norm": 0.6810380080871552, "learning_rate": 3.648574668778499e-05, "loss": 0.1648, "step": 983 }, { "epoch": 0.22, "grad_norm": 0.7029924267717719, "learning_rate": 3.647768689693291e-05, "loss": 0.1484, "step": 984 }, { "epoch": 0.22, "grad_norm": 0.8445207819259016, "learning_rate": 3.646961876662248e-05, "loss": 0.2061, "step": 985 }, { "epoch": 0.22, "grad_norm": 0.6621453829626172, "learning_rate": 3.6461542300937035e-05, "loss": 0.1546, "step": 986 }, { "epoch": 0.22, "grad_norm": 0.7710667892427043, "learning_rate": 3.645345750396412e-05, "loss": 0.186, "step": 987 }, { "epoch": 0.22, "grad_norm": 0.6722052228392159, "learning_rate": 3.64453643797955e-05, "loss": 0.1712, "step": 988 }, { "epoch": 0.22, "grad_norm": 0.6294382345032234, "learning_rate": 3.643726293252717e-05, "loss": 0.1267, "step": 989 }, { "epoch": 0.22, "grad_norm": 0.6357988604761875, "learning_rate": 3.642915316625929e-05, "loss": 0.14, "step": 990 }, { "epoch": 0.22, "grad_norm": 0.7048080222855339, "learning_rate": 3.642103508509629e-05, "loss": 0.1741, "step": 991 }, { "epoch": 0.22, "grad_norm": 0.5709092662827024, "learning_rate": 3.641290869314676e-05, "loss": 0.1459, "step": 992 }, { "epoch": 0.22, "grad_norm": 0.7015535808582499, "learning_rate": 3.640477399452354e-05, "loss": 0.1733, "step": 993 }, { "epoch": 0.22, "grad_norm": 0.6066420477109242, "learning_rate": 3.639663099334363e-05, "loss": 0.1353, "step": 994 }, { "epoch": 0.22, "grad_norm": 0.5647230459909748, "learning_rate": 3.6388479693728266e-05, "loss": 0.1051, "step": 995 }, { "epoch": 0.22, "grad_norm": 0.6302293400902045, "learning_rate": 3.638032009980286e-05, "loss": 0.1498, "step": 996 }, { "epoch": 0.22, "grad_norm": 0.7179210692669409, "learning_rate": 3.637215221569705e-05, "loss": 0.1741, "step": 997 }, { "epoch": 0.22, "grad_norm": 0.6376237580841241, "learning_rate": 3.636397604554463e-05, "loss": 0.1267, "step": 998 }, { "epoch": 0.22, "grad_norm": 0.6459328410696206, "learning_rate": 3.635579159348362e-05, "loss": 0.1058, "step": 999 }, { "epoch": 0.22, "grad_norm": 0.7772120071799035, "learning_rate": 3.634759886365623e-05, "loss": 0.1715, "step": 1000 }, { "epoch": 0.22, "grad_norm": 0.6411320145391166, "learning_rate": 3.633939786020884e-05, "loss": 0.1518, "step": 1001 }, { "epoch": 0.22, "grad_norm": 0.6501649573892895, "learning_rate": 3.633118858729203e-05, "loss": 0.1375, "step": 1002 }, { "epoch": 0.22, "grad_norm": 0.6743897461008136, "learning_rate": 3.632297104906057e-05, "loss": 0.156, "step": 1003 }, { "epoch": 0.22, "grad_norm": 0.6672403479914584, "learning_rate": 3.63147452496734e-05, "loss": 0.1124, "step": 1004 }, { "epoch": 0.22, "grad_norm": 0.6429425165843409, "learning_rate": 3.6306511193293636e-05, "loss": 0.1337, "step": 1005 }, { "epoch": 0.22, "grad_norm": 0.6776541626704364, "learning_rate": 3.629826888408861e-05, "loss": 0.1574, "step": 1006 }, { "epoch": 0.22, "grad_norm": 0.7718525068581041, "learning_rate": 3.629001832622979e-05, "loss": 0.2403, "step": 1007 }, { "epoch": 0.22, "grad_norm": 0.6612823182052278, "learning_rate": 3.628175952389283e-05, "loss": 0.1602, "step": 1008 }, { "epoch": 0.22, "grad_norm": 0.6752689823461115, "learning_rate": 3.627349248125757e-05, "loss": 0.1424, "step": 1009 }, { "epoch": 0.22, "grad_norm": 0.5796774048453173, "learning_rate": 3.6265217202508006e-05, "loss": 0.134, "step": 1010 }, { "epoch": 0.22, "grad_norm": 0.6309566599944132, "learning_rate": 3.625693369183231e-05, "loss": 0.1365, "step": 1011 }, { "epoch": 0.22, "grad_norm": 0.6002505226450737, "learning_rate": 3.624864195342281e-05, "loss": 0.144, "step": 1012 }, { "epoch": 0.22, "grad_norm": 0.7674195414564663, "learning_rate": 3.624034199147602e-05, "loss": 0.1914, "step": 1013 }, { "epoch": 0.22, "grad_norm": 0.7049269580203319, "learning_rate": 3.623203381019259e-05, "loss": 0.1618, "step": 1014 }, { "epoch": 0.22, "grad_norm": 0.6315551794383809, "learning_rate": 3.6223717413777346e-05, "loss": 0.116, "step": 1015 }, { "epoch": 0.22, "grad_norm": 0.6859663850789501, "learning_rate": 3.621539280643926e-05, "loss": 0.1655, "step": 1016 }, { "epoch": 0.22, "grad_norm": 0.6495537170312613, "learning_rate": 3.620705999239148e-05, "loss": 0.1481, "step": 1017 }, { "epoch": 0.22, "grad_norm": 0.6116131249215796, "learning_rate": 3.619871897585129e-05, "loss": 0.1341, "step": 1018 }, { "epoch": 0.22, "grad_norm": 0.5413486818915718, "learning_rate": 3.6190369761040116e-05, "loss": 0.1103, "step": 1019 }, { "epoch": 0.22, "grad_norm": 0.5985065221200078, "learning_rate": 3.618201235218356e-05, "loss": 0.1247, "step": 1020 }, { "epoch": 0.22, "grad_norm": 0.6556592734863363, "learning_rate": 3.617364675351136e-05, "loss": 0.1522, "step": 1021 }, { "epoch": 0.22, "grad_norm": 0.5977013764143297, "learning_rate": 3.61652729692574e-05, "loss": 0.1161, "step": 1022 }, { "epoch": 0.22, "grad_norm": 0.7310760747668861, "learning_rate": 3.615689100365968e-05, "loss": 0.1906, "step": 1023 }, { "epoch": 0.22, "grad_norm": 0.6738576689062206, "learning_rate": 3.6148500860960386e-05, "loss": 0.1552, "step": 1024 }, { "epoch": 0.23, "grad_norm": 0.5956457892169247, "learning_rate": 3.614010254540581e-05, "loss": 0.1199, "step": 1025 }, { "epoch": 0.23, "grad_norm": 0.6877198301331007, "learning_rate": 3.6131696061246405e-05, "loss": 0.1557, "step": 1026 }, { "epoch": 0.23, "grad_norm": 0.6930864253545237, "learning_rate": 3.612328141273673e-05, "loss": 0.1761, "step": 1027 }, { "epoch": 0.23, "grad_norm": 0.711283547158347, "learning_rate": 3.6114858604135496e-05, "loss": 0.1623, "step": 1028 }, { "epoch": 0.23, "grad_norm": 0.729195653248427, "learning_rate": 3.610642763970553e-05, "loss": 0.1848, "step": 1029 }, { "epoch": 0.23, "grad_norm": 0.563524690420537, "learning_rate": 3.6097988523713816e-05, "loss": 0.1184, "step": 1030 }, { "epoch": 0.23, "grad_norm": 0.6577990052567404, "learning_rate": 3.608954126043141e-05, "loss": 0.1756, "step": 1031 }, { "epoch": 0.23, "grad_norm": 0.6586686025055664, "learning_rate": 3.608108585413356e-05, "loss": 0.15, "step": 1032 }, { "epoch": 0.23, "grad_norm": 0.5790512809345955, "learning_rate": 3.6072622309099566e-05, "loss": 0.1078, "step": 1033 }, { "epoch": 0.23, "grad_norm": 0.7169676117411847, "learning_rate": 3.60641506296129e-05, "loss": 0.1548, "step": 1034 }, { "epoch": 0.23, "grad_norm": 0.6659877021730962, "learning_rate": 3.605567081996113e-05, "loss": 0.1395, "step": 1035 }, { "epoch": 0.23, "grad_norm": 0.6933439870926703, "learning_rate": 3.604718288443593e-05, "loss": 0.162, "step": 1036 }, { "epoch": 0.23, "grad_norm": 0.6841192351087376, "learning_rate": 3.60386868273331e-05, "loss": 0.1639, "step": 1037 }, { "epoch": 0.23, "grad_norm": 0.7478730320930891, "learning_rate": 3.603018265295255e-05, "loss": 0.1938, "step": 1038 }, { "epoch": 0.23, "grad_norm": 0.6873603592288243, "learning_rate": 3.60216703655983e-05, "loss": 0.1807, "step": 1039 }, { "epoch": 0.23, "grad_norm": 0.6615960711149769, "learning_rate": 3.601314996957845e-05, "loss": 0.1431, "step": 1040 }, { "epoch": 0.23, "grad_norm": 0.6790527141864818, "learning_rate": 3.600462146920525e-05, "loss": 0.1507, "step": 1041 }, { "epoch": 0.23, "grad_norm": 0.6228395314339072, "learning_rate": 3.5996084868795015e-05, "loss": 0.1275, "step": 1042 }, { "epoch": 0.23, "grad_norm": 0.6983925184063805, "learning_rate": 3.5987540172668164e-05, "loss": 0.1587, "step": 1043 }, { "epoch": 0.23, "grad_norm": 0.6764855215492669, "learning_rate": 3.597898738514923e-05, "loss": 0.1606, "step": 1044 }, { "epoch": 0.23, "grad_norm": 0.6701212772521431, "learning_rate": 3.5970426510566824e-05, "loss": 0.1587, "step": 1045 }, { "epoch": 0.23, "grad_norm": 0.6212399628517273, "learning_rate": 3.5961857553253665e-05, "loss": 0.1464, "step": 1046 }, { "epoch": 0.23, "grad_norm": 0.622063222033276, "learning_rate": 3.595328051754654e-05, "loss": 0.1172, "step": 1047 }, { "epoch": 0.23, "grad_norm": 0.6384653346584779, "learning_rate": 3.594469540778637e-05, "loss": 0.1121, "step": 1048 }, { "epoch": 0.23, "grad_norm": 0.704067827281616, "learning_rate": 3.593610222831809e-05, "loss": 0.1264, "step": 1049 }, { "epoch": 0.23, "grad_norm": 0.655922558247646, "learning_rate": 3.59275009834908e-05, "loss": 0.1078, "step": 1050 }, { "epoch": 0.23, "grad_norm": 0.7779504143522625, "learning_rate": 3.591889167765762e-05, "loss": 0.1519, "step": 1051 }, { "epoch": 0.23, "grad_norm": 0.7507066576498731, "learning_rate": 3.591027431517577e-05, "loss": 0.1231, "step": 1052 }, { "epoch": 0.23, "grad_norm": 0.6901421494497663, "learning_rate": 3.590164890040657e-05, "loss": 0.1529, "step": 1053 }, { "epoch": 0.23, "grad_norm": 0.7730705084134945, "learning_rate": 3.589301543771537e-05, "loss": 0.1819, "step": 1054 }, { "epoch": 0.23, "grad_norm": 0.6758688500577673, "learning_rate": 3.588437393147164e-05, "loss": 0.1753, "step": 1055 }, { "epoch": 0.23, "grad_norm": 0.6888931204638528, "learning_rate": 3.587572438604889e-05, "loss": 0.1456, "step": 1056 }, { "epoch": 0.23, "grad_norm": 0.7073774385951197, "learning_rate": 3.586706680582471e-05, "loss": 0.1722, "step": 1057 }, { "epoch": 0.23, "grad_norm": 0.6509485915727481, "learning_rate": 3.585840119518075e-05, "loss": 0.1305, "step": 1058 }, { "epoch": 0.23, "grad_norm": 0.6538960381990047, "learning_rate": 3.584972755850273e-05, "loss": 0.1341, "step": 1059 }, { "epoch": 0.23, "grad_norm": 0.6963019242777403, "learning_rate": 3.584104590018044e-05, "loss": 0.1436, "step": 1060 }, { "epoch": 0.23, "grad_norm": 0.6382712644732104, "learning_rate": 3.58323562246077e-05, "loss": 0.1604, "step": 1061 }, { "epoch": 0.23, "grad_norm": 0.6796882673237294, "learning_rate": 3.5823658536182426e-05, "loss": 0.1758, "step": 1062 }, { "epoch": 0.23, "grad_norm": 0.6260051512475788, "learning_rate": 3.5814952839306574e-05, "loss": 0.1561, "step": 1063 }, { "epoch": 0.23, "grad_norm": 0.6347693340486589, "learning_rate": 3.580623913838613e-05, "loss": 0.1605, "step": 1064 }, { "epoch": 0.23, "grad_norm": 0.6308013133443539, "learning_rate": 3.579751743783118e-05, "loss": 0.1308, "step": 1065 }, { "epoch": 0.23, "grad_norm": 0.5812309702967815, "learning_rate": 3.578878774205581e-05, "loss": 0.1118, "step": 1066 }, { "epoch": 0.23, "grad_norm": 0.6310631390462856, "learning_rate": 3.578005005547817e-05, "loss": 0.1333, "step": 1067 }, { "epoch": 0.23, "grad_norm": 0.6877280203948094, "learning_rate": 3.577130438252046e-05, "loss": 0.1531, "step": 1068 }, { "epoch": 0.23, "grad_norm": 0.6483847585163999, "learning_rate": 3.576255072760893e-05, "loss": 0.1143, "step": 1069 }, { "epoch": 0.24, "grad_norm": 0.6452634640367501, "learning_rate": 3.575378909517385e-05, "loss": 0.1313, "step": 1070 }, { "epoch": 0.24, "grad_norm": 0.6900713690875435, "learning_rate": 3.574501948964954e-05, "loss": 0.1479, "step": 1071 }, { "epoch": 0.24, "grad_norm": 0.7639374722338221, "learning_rate": 3.5736241915474345e-05, "loss": 0.1593, "step": 1072 }, { "epoch": 0.24, "grad_norm": 0.6134283533254882, "learning_rate": 3.572745637709065e-05, "loss": 0.1205, "step": 1073 }, { "epoch": 0.24, "grad_norm": 0.7151564450937763, "learning_rate": 3.5718662878944876e-05, "loss": 0.1524, "step": 1074 }, { "epoch": 0.24, "grad_norm": 0.5888968945754794, "learning_rate": 3.570986142548746e-05, "loss": 0.1039, "step": 1075 }, { "epoch": 0.24, "grad_norm": 0.6295134178319302, "learning_rate": 3.5701052021172874e-05, "loss": 0.1472, "step": 1076 }, { "epoch": 0.24, "grad_norm": 0.6032800307183738, "learning_rate": 3.5692234670459615e-05, "loss": 0.165, "step": 1077 }, { "epoch": 0.24, "grad_norm": 0.7610619476509329, "learning_rate": 3.5683409377810185e-05, "loss": 0.1877, "step": 1078 }, { "epoch": 0.24, "grad_norm": 0.6134598830225814, "learning_rate": 3.567457614769113e-05, "loss": 0.1307, "step": 1079 }, { "epoch": 0.24, "grad_norm": 0.6395293146696429, "learning_rate": 3.566573498457301e-05, "loss": 0.1499, "step": 1080 }, { "epoch": 0.24, "grad_norm": 0.6567376459395161, "learning_rate": 3.5656885892930376e-05, "loss": 0.1432, "step": 1081 }, { "epoch": 0.24, "grad_norm": 0.6329789825719756, "learning_rate": 3.564802887724181e-05, "loss": 0.1358, "step": 1082 }, { "epoch": 0.24, "grad_norm": 0.7292218096636005, "learning_rate": 3.563916394198991e-05, "loss": 0.1604, "step": 1083 }, { "epoch": 0.24, "grad_norm": 0.683673200077468, "learning_rate": 3.5630291091661276e-05, "loss": 0.136, "step": 1084 }, { "epoch": 0.24, "grad_norm": 0.5416118215035685, "learning_rate": 3.562141033074649e-05, "loss": 0.1176, "step": 1085 }, { "epoch": 0.24, "grad_norm": 0.61694461534053, "learning_rate": 3.5612521663740183e-05, "loss": 0.1251, "step": 1086 }, { "epoch": 0.24, "grad_norm": 0.6421387164295951, "learning_rate": 3.560362509514096e-05, "loss": 0.1551, "step": 1087 }, { "epoch": 0.24, "grad_norm": 0.6245773793912889, "learning_rate": 3.5594720629451414e-05, "loss": 0.1375, "step": 1088 }, { "epoch": 0.24, "grad_norm": 0.5643563897157868, "learning_rate": 3.558580827117817e-05, "loss": 0.1104, "step": 1089 }, { "epoch": 0.24, "grad_norm": 0.6756127207978516, "learning_rate": 3.557688802483181e-05, "loss": 0.1397, "step": 1090 }, { "epoch": 0.24, "grad_norm": 0.6775988792391023, "learning_rate": 3.556795989492694e-05, "loss": 0.1342, "step": 1091 }, { "epoch": 0.24, "grad_norm": 0.7394963916306223, "learning_rate": 3.555902388598213e-05, "loss": 0.1246, "step": 1092 }, { "epoch": 0.24, "grad_norm": 0.6586100511807396, "learning_rate": 3.555008000251995e-05, "loss": 0.1442, "step": 1093 }, { "epoch": 0.24, "grad_norm": 0.6084882691317554, "learning_rate": 3.554112824906696e-05, "loss": 0.1148, "step": 1094 }, { "epoch": 0.24, "grad_norm": 0.6846720447127864, "learning_rate": 3.55321686301537e-05, "loss": 0.1451, "step": 1095 }, { "epoch": 0.24, "grad_norm": 0.7111722537625023, "learning_rate": 3.552320115031468e-05, "loss": 0.1472, "step": 1096 }, { "epoch": 0.24, "grad_norm": 0.6502074020788126, "learning_rate": 3.55142258140884e-05, "loss": 0.15, "step": 1097 }, { "epoch": 0.24, "grad_norm": 0.6718778610168578, "learning_rate": 3.5505242626017326e-05, "loss": 0.1447, "step": 1098 }, { "epoch": 0.24, "grad_norm": 0.6165806889739158, "learning_rate": 3.549625159064792e-05, "loss": 0.133, "step": 1099 }, { "epoch": 0.24, "grad_norm": 0.6153270348904686, "learning_rate": 3.5487252712530583e-05, "loss": 0.1295, "step": 1100 }, { "epoch": 0.24, "grad_norm": 0.6274481272059175, "learning_rate": 3.547824599621971e-05, "loss": 0.1329, "step": 1101 }, { "epoch": 0.24, "grad_norm": 0.661905555524102, "learning_rate": 3.546923144627366e-05, "loss": 0.1649, "step": 1102 }, { "epoch": 0.24, "grad_norm": 0.5984050572617661, "learning_rate": 3.546020906725474e-05, "loss": 0.1405, "step": 1103 }, { "epoch": 0.24, "grad_norm": 0.5674648783650594, "learning_rate": 3.5451178863729244e-05, "loss": 0.1001, "step": 1104 }, { "epoch": 0.24, "grad_norm": 0.5261440618084035, "learning_rate": 3.5442140840267404e-05, "loss": 0.1103, "step": 1105 }, { "epoch": 0.24, "grad_norm": 0.6960247977514382, "learning_rate": 3.543309500144343e-05, "loss": 0.159, "step": 1106 }, { "epoch": 0.24, "grad_norm": 0.6214548177062327, "learning_rate": 3.542404135183547e-05, "loss": 0.1372, "step": 1107 }, { "epoch": 0.24, "grad_norm": 0.6376840494046262, "learning_rate": 3.541497989602562e-05, "loss": 0.1349, "step": 1108 }, { "epoch": 0.24, "grad_norm": 0.6090079449196457, "learning_rate": 3.540591063859996e-05, "loss": 0.124, "step": 1109 }, { "epoch": 0.24, "grad_norm": 0.6539680223900682, "learning_rate": 3.539683358414848e-05, "loss": 0.1531, "step": 1110 }, { "epoch": 0.24, "grad_norm": 0.7891131847457237, "learning_rate": 3.538774873726514e-05, "loss": 0.172, "step": 1111 }, { "epoch": 0.24, "grad_norm": 0.6280573215367934, "learning_rate": 3.537865610254784e-05, "loss": 0.1262, "step": 1112 }, { "epoch": 0.24, "grad_norm": 0.6193859444660762, "learning_rate": 3.536955568459841e-05, "loss": 0.1145, "step": 1113 }, { "epoch": 0.24, "grad_norm": 0.5785477613220341, "learning_rate": 3.536044748802263e-05, "loss": 0.143, "step": 1114 }, { "epoch": 0.24, "grad_norm": 0.5395700856893703, "learning_rate": 3.535133151743022e-05, "loss": 0.1128, "step": 1115 }, { "epoch": 0.25, "grad_norm": 0.5701944934567039, "learning_rate": 3.534220777743482e-05, "loss": 0.1186, "step": 1116 }, { "epoch": 0.25, "grad_norm": 0.6152525157373263, "learning_rate": 3.5333076272654014e-05, "loss": 0.128, "step": 1117 }, { "epoch": 0.25, "grad_norm": 0.6256664061206213, "learning_rate": 3.532393700770932e-05, "loss": 0.1221, "step": 1118 }, { "epoch": 0.25, "grad_norm": 0.6757214911173773, "learning_rate": 3.5314789987226156e-05, "loss": 0.1129, "step": 1119 }, { "epoch": 0.25, "grad_norm": 0.6246653137541823, "learning_rate": 3.5305635215833914e-05, "loss": 0.1228, "step": 1120 }, { "epoch": 0.25, "grad_norm": 0.6707085530900672, "learning_rate": 3.5296472698165856e-05, "loss": 0.1376, "step": 1121 }, { "epoch": 0.25, "grad_norm": 0.6179075025066263, "learning_rate": 3.5287302438859204e-05, "loss": 0.1042, "step": 1122 }, { "epoch": 0.25, "grad_norm": 0.6481782785733887, "learning_rate": 3.5278124442555066e-05, "loss": 0.1291, "step": 1123 }, { "epoch": 0.25, "grad_norm": 0.6074076709129159, "learning_rate": 3.526893871389849e-05, "loss": 0.1111, "step": 1124 }, { "epoch": 0.25, "grad_norm": 0.6630142240551968, "learning_rate": 3.5259745257538443e-05, "loss": 0.1322, "step": 1125 }, { "epoch": 0.25, "grad_norm": 0.7361292481937083, "learning_rate": 3.525054407812777e-05, "loss": 0.1826, "step": 1126 }, { "epoch": 0.25, "grad_norm": 0.5535894189934888, "learning_rate": 3.524133518032325e-05, "loss": 0.1087, "step": 1127 }, { "epoch": 0.25, "grad_norm": 0.8351112988813633, "learning_rate": 3.5232118568785565e-05, "loss": 0.244, "step": 1128 }, { "epoch": 0.25, "grad_norm": 0.6121295635932749, "learning_rate": 3.52228942481793e-05, "loss": 0.1496, "step": 1129 }, { "epoch": 0.25, "grad_norm": 0.5354744737740527, "learning_rate": 3.5213662223172935e-05, "loss": 0.1045, "step": 1130 }, { "epoch": 0.25, "grad_norm": 0.6452633485710266, "learning_rate": 3.520442249843887e-05, "loss": 0.1192, "step": 1131 }, { "epoch": 0.25, "grad_norm": 0.6178008785901008, "learning_rate": 3.5195175078653355e-05, "loss": 0.1412, "step": 1132 }, { "epoch": 0.25, "grad_norm": 0.5797864392513596, "learning_rate": 3.51859199684966e-05, "loss": 0.0976, "step": 1133 }, { "epoch": 0.25, "grad_norm": 0.6622076271098327, "learning_rate": 3.517665717265265e-05, "loss": 0.1363, "step": 1134 }, { "epoch": 0.25, "grad_norm": 0.6045944053491373, "learning_rate": 3.516738669580947e-05, "loss": 0.1131, "step": 1135 }, { "epoch": 0.25, "grad_norm": 0.6398177294162027, "learning_rate": 3.5158108542658915e-05, "loss": 0.1404, "step": 1136 }, { "epoch": 0.25, "grad_norm": 0.6773543157930411, "learning_rate": 3.5148822717896694e-05, "loss": 0.1461, "step": 1137 }, { "epoch": 0.25, "grad_norm": 0.5875629005848713, "learning_rate": 3.513952922622243e-05, "loss": 0.1133, "step": 1138 }, { "epoch": 0.25, "grad_norm": 0.6161178059033141, "learning_rate": 3.513022807233964e-05, "loss": 0.1102, "step": 1139 }, { "epoch": 0.25, "grad_norm": 0.6611613235073424, "learning_rate": 3.5120919260955655e-05, "loss": 0.1372, "step": 1140 }, { "epoch": 0.25, "grad_norm": 0.6502130397718782, "learning_rate": 3.511160279678174e-05, "loss": 0.1408, "step": 1141 }, { "epoch": 0.25, "grad_norm": 0.592408848966039, "learning_rate": 3.510227868453302e-05, "loss": 0.104, "step": 1142 }, { "epoch": 0.25, "grad_norm": 0.6089071776647552, "learning_rate": 3.509294692892847e-05, "loss": 0.1135, "step": 1143 }, { "epoch": 0.25, "grad_norm": 0.6034376470564629, "learning_rate": 3.508360753469097e-05, "loss": 0.1398, "step": 1144 }, { "epoch": 0.25, "grad_norm": 0.6381865824576154, "learning_rate": 3.5074260506547225e-05, "loss": 0.1279, "step": 1145 }, { "epoch": 0.25, "grad_norm": 0.6098214250407791, "learning_rate": 3.506490584922784e-05, "loss": 0.1167, "step": 1146 }, { "epoch": 0.25, "grad_norm": 0.699568101782631, "learning_rate": 3.5055543567467244e-05, "loss": 0.1656, "step": 1147 }, { "epoch": 0.25, "grad_norm": 0.5408009953766895, "learning_rate": 3.504617366600376e-05, "loss": 0.115, "step": 1148 }, { "epoch": 0.25, "grad_norm": 0.5839136648587737, "learning_rate": 3.503679614957955e-05, "loss": 0.1162, "step": 1149 }, { "epoch": 0.25, "grad_norm": 0.7622599708603124, "learning_rate": 3.502741102294063e-05, "loss": 0.1708, "step": 1150 }, { "epoch": 0.25, "grad_norm": 0.6580762882445695, "learning_rate": 3.501801829083688e-05, "loss": 0.1396, "step": 1151 }, { "epoch": 0.25, "grad_norm": 0.5034837652030538, "learning_rate": 3.500861795802201e-05, "loss": 0.1035, "step": 1152 }, { "epoch": 0.25, "grad_norm": 0.6301220343485374, "learning_rate": 3.499921002925357e-05, "loss": 0.134, "step": 1153 }, { "epoch": 0.25, "grad_norm": 0.6759966027552546, "learning_rate": 3.4989794509293005e-05, "loss": 0.134, "step": 1154 }, { "epoch": 0.25, "grad_norm": 0.5163242338440555, "learning_rate": 3.498037140290555e-05, "loss": 0.0933, "step": 1155 }, { "epoch": 0.25, "grad_norm": 0.6237718913913396, "learning_rate": 3.497094071486029e-05, "loss": 0.1475, "step": 1156 }, { "epoch": 0.25, "grad_norm": 0.5704668175929202, "learning_rate": 3.4961502449930165e-05, "loss": 0.1297, "step": 1157 }, { "epoch": 0.25, "grad_norm": 0.6364792917633192, "learning_rate": 3.495205661289193e-05, "loss": 0.1467, "step": 1158 }, { "epoch": 0.25, "grad_norm": 0.6446432305362467, "learning_rate": 3.494260320852619e-05, "loss": 0.1305, "step": 1159 }, { "epoch": 0.25, "grad_norm": 0.6163755701743595, "learning_rate": 3.493314224161737e-05, "loss": 0.104, "step": 1160 }, { "epoch": 0.25, "grad_norm": 0.5823381370149056, "learning_rate": 3.4923673716953717e-05, "loss": 0.1106, "step": 1161 }, { "epoch": 0.26, "grad_norm": 0.5806140158264087, "learning_rate": 3.4914197639327306e-05, "loss": 0.1231, "step": 1162 }, { "epoch": 0.26, "grad_norm": 0.5533175408118538, "learning_rate": 3.490471401353405e-05, "loss": 0.0922, "step": 1163 }, { "epoch": 0.26, "grad_norm": 0.7623329229694302, "learning_rate": 3.489522284437366e-05, "loss": 0.1804, "step": 1164 }, { "epoch": 0.26, "grad_norm": 0.6246931753430318, "learning_rate": 3.488572413664969e-05, "loss": 0.164, "step": 1165 }, { "epoch": 0.26, "grad_norm": 0.5766952465211387, "learning_rate": 3.4876217895169474e-05, "loss": 0.1108, "step": 1166 }, { "epoch": 0.26, "grad_norm": 0.5636533991904469, "learning_rate": 3.4866704124744196e-05, "loss": 0.1229, "step": 1167 }, { "epoch": 0.26, "grad_norm": 0.5361245854759259, "learning_rate": 3.4857182830188816e-05, "loss": 0.0994, "step": 1168 }, { "epoch": 0.26, "grad_norm": 0.6510711408936877, "learning_rate": 3.484765401632214e-05, "loss": 0.1251, "step": 1169 }, { "epoch": 0.26, "grad_norm": 0.6100764028197836, "learning_rate": 3.483811768796674e-05, "loss": 0.1093, "step": 1170 }, { "epoch": 0.26, "grad_norm": 0.6169278286668669, "learning_rate": 3.482857384994903e-05, "loss": 0.1295, "step": 1171 }, { "epoch": 0.26, "grad_norm": 0.6201544082164446, "learning_rate": 3.4819022507099184e-05, "loss": 0.122, "step": 1172 }, { "epoch": 0.26, "grad_norm": 0.5516199530485807, "learning_rate": 3.480946366425121e-05, "loss": 0.1092, "step": 1173 }, { "epoch": 0.26, "grad_norm": 0.7000214215815579, "learning_rate": 3.4799897326242895e-05, "loss": 0.1739, "step": 1174 }, { "epoch": 0.26, "grad_norm": 0.632988563816966, "learning_rate": 3.479032349791581e-05, "loss": 0.1376, "step": 1175 }, { "epoch": 0.26, "grad_norm": 0.6082218703833914, "learning_rate": 3.478074218411534e-05, "loss": 0.1378, "step": 1176 }, { "epoch": 0.26, "grad_norm": 0.5413108598529882, "learning_rate": 3.477115338969065e-05, "loss": 0.0979, "step": 1177 }, { "epoch": 0.26, "grad_norm": 0.5812498390033458, "learning_rate": 3.476155711949467e-05, "loss": 0.1012, "step": 1178 }, { "epoch": 0.26, "grad_norm": 0.6965512588572845, "learning_rate": 3.475195337838415e-05, "loss": 0.1541, "step": 1179 }, { "epoch": 0.26, "grad_norm": 0.5625723951164735, "learning_rate": 3.474234217121959e-05, "loss": 0.1207, "step": 1180 }, { "epoch": 0.26, "grad_norm": 0.596036773798448, "learning_rate": 3.473272350286529e-05, "loss": 0.1274, "step": 1181 }, { "epoch": 0.26, "grad_norm": 0.7216373167904762, "learning_rate": 3.4723097378189306e-05, "loss": 0.1523, "step": 1182 }, { "epoch": 0.26, "grad_norm": 0.6263755919104075, "learning_rate": 3.471346380206349e-05, "loss": 0.1208, "step": 1183 }, { "epoch": 0.26, "grad_norm": 0.5691475139297871, "learning_rate": 3.470382277936345e-05, "loss": 0.113, "step": 1184 }, { "epoch": 0.26, "grad_norm": 0.5948601182830424, "learning_rate": 3.4694174314968564e-05, "loss": 0.1156, "step": 1185 }, { "epoch": 0.26, "grad_norm": 0.7207090796247909, "learning_rate": 3.468451841376198e-05, "loss": 0.1529, "step": 1186 }, { "epoch": 0.26, "grad_norm": 0.634722053793067, "learning_rate": 3.467485508063061e-05, "loss": 0.1158, "step": 1187 }, { "epoch": 0.26, "grad_norm": 0.5649868774578078, "learning_rate": 3.466518432046512e-05, "loss": 0.1024, "step": 1188 }, { "epoch": 0.26, "grad_norm": 0.6426538115570382, "learning_rate": 3.4655506138159954e-05, "loss": 0.1087, "step": 1189 }, { "epoch": 0.26, "grad_norm": 0.555267433780788, "learning_rate": 3.464582053861329e-05, "loss": 0.1083, "step": 1190 }, { "epoch": 0.26, "grad_norm": 0.5448565153350091, "learning_rate": 3.463612752672707e-05, "loss": 0.133, "step": 1191 }, { "epoch": 0.26, "grad_norm": 0.5585144360127174, "learning_rate": 3.462642710740699e-05, "loss": 0.1085, "step": 1192 }, { "epoch": 0.26, "grad_norm": 0.6569064580859775, "learning_rate": 3.461671928556248e-05, "loss": 0.136, "step": 1193 }, { "epoch": 0.26, "grad_norm": 0.6260130301816523, "learning_rate": 3.4607004066106754e-05, "loss": 0.1196, "step": 1194 }, { "epoch": 0.26, "grad_norm": 0.5934602130643271, "learning_rate": 3.459728145395671e-05, "loss": 0.1346, "step": 1195 }, { "epoch": 0.26, "grad_norm": 0.7028667611513228, "learning_rate": 3.458755145403306e-05, "loss": 0.1429, "step": 1196 }, { "epoch": 0.26, "grad_norm": 0.648635856899249, "learning_rate": 3.457781407126018e-05, "loss": 0.1357, "step": 1197 }, { "epoch": 0.26, "grad_norm": 0.5471637780816928, "learning_rate": 3.456806931056624e-05, "loss": 0.1034, "step": 1198 }, { "epoch": 0.26, "grad_norm": 0.642561405045521, "learning_rate": 3.4558317176883116e-05, "loss": 0.1435, "step": 1199 }, { "epoch": 0.26, "grad_norm": 0.6153785899846203, "learning_rate": 3.454855767514643e-05, "loss": 0.1289, "step": 1200 }, { "epoch": 0.26, "grad_norm": 0.5295401151516663, "learning_rate": 3.453879081029552e-05, "loss": 0.0893, "step": 1201 }, { "epoch": 0.26, "grad_norm": 0.6474491768483753, "learning_rate": 3.452901658727345e-05, "loss": 0.1256, "step": 1202 }, { "epoch": 0.26, "grad_norm": 0.6736327841856329, "learning_rate": 3.451923501102703e-05, "loss": 0.1364, "step": 1203 }, { "epoch": 0.26, "grad_norm": 0.5755152177033249, "learning_rate": 3.450944608650677e-05, "loss": 0.1296, "step": 1204 }, { "epoch": 0.26, "grad_norm": 0.5771117222914954, "learning_rate": 3.449964981866689e-05, "loss": 0.1182, "step": 1205 }, { "epoch": 0.26, "grad_norm": 0.6209119854673251, "learning_rate": 3.4489846212465356e-05, "loss": 0.1205, "step": 1206 }, { "epoch": 0.27, "grad_norm": 0.7319852214009278, "learning_rate": 3.448003527286383e-05, "loss": 0.1527, "step": 1207 }, { "epoch": 0.27, "grad_norm": 0.5456084707930992, "learning_rate": 3.447021700482769e-05, "loss": 0.106, "step": 1208 }, { "epoch": 0.27, "grad_norm": 0.6069089189394817, "learning_rate": 3.446039141332602e-05, "loss": 0.0915, "step": 1209 }, { "epoch": 0.27, "grad_norm": 0.6142817069593046, "learning_rate": 3.4450558503331606e-05, "loss": 0.1182, "step": 1210 }, { "epoch": 0.27, "grad_norm": 0.7099143830243355, "learning_rate": 3.444071827982096e-05, "loss": 0.1767, "step": 1211 }, { "epoch": 0.27, "grad_norm": 0.5684238934684104, "learning_rate": 3.4430870747774266e-05, "loss": 0.1236, "step": 1212 }, { "epoch": 0.27, "grad_norm": 0.6007230524614405, "learning_rate": 3.442101591217542e-05, "loss": 0.1112, "step": 1213 }, { "epoch": 0.27, "grad_norm": 0.6021835292176488, "learning_rate": 3.441115377801202e-05, "loss": 0.1253, "step": 1214 }, { "epoch": 0.27, "grad_norm": 0.5635790541604533, "learning_rate": 3.440128435027536e-05, "loss": 0.1188, "step": 1215 }, { "epoch": 0.27, "grad_norm": 0.57701656687504, "learning_rate": 3.43914076339604e-05, "loss": 0.1268, "step": 1216 }, { "epoch": 0.27, "grad_norm": 0.6599674519186035, "learning_rate": 3.438152363406582e-05, "loss": 0.1276, "step": 1217 }, { "epoch": 0.27, "grad_norm": 0.6034804645653913, "learning_rate": 3.437163235559396e-05, "loss": 0.1168, "step": 1218 }, { "epoch": 0.27, "grad_norm": 0.6539808050788924, "learning_rate": 3.4361733803550874e-05, "loss": 0.1581, "step": 1219 }, { "epoch": 0.27, "grad_norm": 0.6012704313617491, "learning_rate": 3.4351827982946274e-05, "loss": 0.1041, "step": 1220 }, { "epoch": 0.27, "grad_norm": 0.6296357135917684, "learning_rate": 3.434191489879355e-05, "loss": 0.1284, "step": 1221 }, { "epoch": 0.27, "grad_norm": 0.6326536049743969, "learning_rate": 3.433199455610978e-05, "loss": 0.1221, "step": 1222 }, { "epoch": 0.27, "grad_norm": 0.6212516919732799, "learning_rate": 3.43220669599157e-05, "loss": 0.1399, "step": 1223 }, { "epoch": 0.27, "grad_norm": 0.5812341236694485, "learning_rate": 3.431213211523574e-05, "loss": 0.1036, "step": 1224 }, { "epoch": 0.27, "grad_norm": 0.6630829711308373, "learning_rate": 3.430219002709799e-05, "loss": 0.123, "step": 1225 }, { "epoch": 0.27, "grad_norm": 0.6440885931112753, "learning_rate": 3.429224070053419e-05, "loss": 0.136, "step": 1226 }, { "epoch": 0.27, "grad_norm": 0.6477891829287996, "learning_rate": 3.428228414057975e-05, "loss": 0.1256, "step": 1227 }, { "epoch": 0.27, "grad_norm": 0.589110113997143, "learning_rate": 3.427232035227377e-05, "loss": 0.1169, "step": 1228 }, { "epoch": 0.27, "grad_norm": 0.6328822438930977, "learning_rate": 3.426234934065896e-05, "loss": 0.1068, "step": 1229 }, { "epoch": 0.27, "grad_norm": 0.5505090947435014, "learning_rate": 3.4252371110781716e-05, "loss": 0.142, "step": 1230 }, { "epoch": 0.27, "grad_norm": 0.6575949374797614, "learning_rate": 3.424238566769209e-05, "loss": 0.1311, "step": 1231 }, { "epoch": 0.27, "grad_norm": 0.5399678961164646, "learning_rate": 3.423239301644377e-05, "loss": 0.108, "step": 1232 }, { "epoch": 0.27, "grad_norm": 0.636209225537825, "learning_rate": 3.42223931620941e-05, "loss": 0.1432, "step": 1233 }, { "epoch": 0.27, "grad_norm": 0.5430976659746752, "learning_rate": 3.421238610970406e-05, "loss": 0.0976, "step": 1234 }, { "epoch": 0.27, "grad_norm": 0.654773185785552, "learning_rate": 3.4202371864338295e-05, "loss": 0.1374, "step": 1235 }, { "epoch": 0.27, "grad_norm": 0.6577328323362479, "learning_rate": 3.419235043106506e-05, "loss": 0.1299, "step": 1236 }, { "epoch": 0.27, "grad_norm": 0.6343967950769432, "learning_rate": 3.4182321814956274e-05, "loss": 0.1374, "step": 1237 }, { "epoch": 0.27, "grad_norm": 0.5689775703313454, "learning_rate": 3.4172286021087475e-05, "loss": 0.1115, "step": 1238 }, { "epoch": 0.27, "grad_norm": 0.5321393422744918, "learning_rate": 3.416224305453785e-05, "loss": 0.0976, "step": 1239 }, { "epoch": 0.27, "grad_norm": 0.5895353104496781, "learning_rate": 3.4152192920390195e-05, "loss": 0.115, "step": 1240 }, { "epoch": 0.27, "grad_norm": 0.6693416117685603, "learning_rate": 3.4142135623730954e-05, "loss": 0.1299, "step": 1241 }, { "epoch": 0.27, "grad_norm": 0.5223463271451472, "learning_rate": 3.413207116965018e-05, "loss": 0.0913, "step": 1242 }, { "epoch": 0.27, "grad_norm": 0.5742671712584116, "learning_rate": 3.412199956324155e-05, "loss": 0.1103, "step": 1243 }, { "epoch": 0.27, "grad_norm": 0.6471738567231234, "learning_rate": 3.4111920809602374e-05, "loss": 0.1202, "step": 1244 }, { "epoch": 0.27, "grad_norm": 0.5738606536557671, "learning_rate": 3.4101834913833576e-05, "loss": 0.1253, "step": 1245 }, { "epoch": 0.27, "grad_norm": 0.5329715099951039, "learning_rate": 3.4091741881039677e-05, "loss": 0.0931, "step": 1246 }, { "epoch": 0.27, "grad_norm": 0.6004768334508961, "learning_rate": 3.4081641716328826e-05, "loss": 0.1212, "step": 1247 }, { "epoch": 0.27, "grad_norm": 0.6238993728835703, "learning_rate": 3.407153442481278e-05, "loss": 0.1188, "step": 1248 }, { "epoch": 0.27, "grad_norm": 0.6391662877126049, "learning_rate": 3.4061420011606906e-05, "loss": 0.1327, "step": 1249 }, { "epoch": 0.27, "grad_norm": 0.6850578541931815, "learning_rate": 3.405129848183017e-05, "loss": 0.1346, "step": 1250 }, { "epoch": 0.27, "grad_norm": 0.6620741753237898, "learning_rate": 3.404116984060513e-05, "loss": 0.1424, "step": 1251 }, { "epoch": 0.27, "grad_norm": 0.5884064094161954, "learning_rate": 3.403103409305796e-05, "loss": 0.1271, "step": 1252 }, { "epoch": 0.28, "grad_norm": 0.592150818385704, "learning_rate": 3.402089124431843e-05, "loss": 0.1224, "step": 1253 }, { "epoch": 0.28, "grad_norm": 0.5933580610751684, "learning_rate": 3.4010741299519885e-05, "loss": 0.0974, "step": 1254 }, { "epoch": 0.28, "grad_norm": 0.5395084968672039, "learning_rate": 3.400058426379929e-05, "loss": 0.0956, "step": 1255 }, { "epoch": 0.28, "grad_norm": 0.5568274468905576, "learning_rate": 3.3990420142297165e-05, "loss": 0.0934, "step": 1256 }, { "epoch": 0.28, "grad_norm": 0.6464044573387644, "learning_rate": 3.398024894015764e-05, "loss": 0.1278, "step": 1257 }, { "epoch": 0.28, "grad_norm": 0.6397880110723696, "learning_rate": 3.3970070662528436e-05, "loss": 0.1261, "step": 1258 }, { "epoch": 0.28, "grad_norm": 0.580808956213012, "learning_rate": 3.395988531456083e-05, "loss": 0.1214, "step": 1259 }, { "epoch": 0.28, "grad_norm": 0.6264461475891724, "learning_rate": 3.394969290140969e-05, "loss": 0.1241, "step": 1260 }, { "epoch": 0.28, "grad_norm": 0.6984251622896039, "learning_rate": 3.393949342823346e-05, "loss": 0.1567, "step": 1261 }, { "epoch": 0.28, "grad_norm": 0.5048838159783609, "learning_rate": 3.3929286900194154e-05, "loss": 0.0969, "step": 1262 }, { "epoch": 0.28, "grad_norm": 0.5949825243427104, "learning_rate": 3.3919073322457364e-05, "loss": 0.1053, "step": 1263 }, { "epoch": 0.28, "grad_norm": 0.5917974541286745, "learning_rate": 3.3908852700192236e-05, "loss": 0.1162, "step": 1264 }, { "epoch": 0.28, "grad_norm": 0.6482849401229682, "learning_rate": 3.38986250385715e-05, "loss": 0.1305, "step": 1265 }, { "epoch": 0.28, "grad_norm": 0.5296304643919447, "learning_rate": 3.388839034277142e-05, "loss": 0.0985, "step": 1266 }, { "epoch": 0.28, "grad_norm": 0.6002721447464341, "learning_rate": 3.387814861797186e-05, "loss": 0.1077, "step": 1267 }, { "epoch": 0.28, "grad_norm": 0.5918012813995788, "learning_rate": 3.386789986935621e-05, "loss": 0.1221, "step": 1268 }, { "epoch": 0.28, "grad_norm": 0.5493203124861112, "learning_rate": 3.385764410211143e-05, "loss": 0.1078, "step": 1269 }, { "epoch": 0.28, "grad_norm": 0.5263530608990402, "learning_rate": 3.3847381321428e-05, "loss": 0.1154, "step": 1270 }, { "epoch": 0.28, "grad_norm": 0.5414700579233093, "learning_rate": 3.383711153250002e-05, "loss": 0.1058, "step": 1271 }, { "epoch": 0.28, "grad_norm": 0.6080252787344901, "learning_rate": 3.382683474052506e-05, "loss": 0.1299, "step": 1272 }, { "epoch": 0.28, "grad_norm": 0.5519697744311083, "learning_rate": 3.381655095070428e-05, "loss": 0.0927, "step": 1273 }, { "epoch": 0.28, "grad_norm": 0.5399789621528959, "learning_rate": 3.3806260168242365e-05, "loss": 0.0976, "step": 1274 }, { "epoch": 0.28, "grad_norm": 0.6127765546933293, "learning_rate": 3.379596239834755e-05, "loss": 0.1375, "step": 1275 }, { "epoch": 0.28, "grad_norm": 0.5594481223279179, "learning_rate": 3.3785657646231596e-05, "loss": 0.105, "step": 1276 }, { "epoch": 0.28, "grad_norm": 0.5644887629151611, "learning_rate": 3.37753459171098e-05, "loss": 0.1016, "step": 1277 }, { "epoch": 0.28, "grad_norm": 0.5622478820791422, "learning_rate": 3.376502721620098e-05, "loss": 0.118, "step": 1278 }, { "epoch": 0.28, "grad_norm": 0.6754545844501156, "learning_rate": 3.375470154872751e-05, "loss": 0.13, "step": 1279 }, { "epoch": 0.28, "grad_norm": 0.6377381197945493, "learning_rate": 3.3744368919915275e-05, "loss": 0.1602, "step": 1280 }, { "epoch": 0.28, "grad_norm": 0.6207918116959827, "learning_rate": 3.3734029334993675e-05, "loss": 0.1371, "step": 1281 }, { "epoch": 0.28, "grad_norm": 0.538156355942291, "learning_rate": 3.372368279919563e-05, "loss": 0.1094, "step": 1282 }, { "epoch": 0.28, "grad_norm": 0.7058682228466938, "learning_rate": 3.3713329317757594e-05, "loss": 0.163, "step": 1283 }, { "epoch": 0.28, "grad_norm": 0.5479527616275841, "learning_rate": 3.370296889591953e-05, "loss": 0.093, "step": 1284 }, { "epoch": 0.28, "grad_norm": 0.5901569015180179, "learning_rate": 3.369260153892491e-05, "loss": 0.1216, "step": 1285 }, { "epoch": 0.28, "grad_norm": 0.5840498973615459, "learning_rate": 3.3682227252020716e-05, "loss": 0.1125, "step": 1286 }, { "epoch": 0.28, "grad_norm": 0.522568565049669, "learning_rate": 3.367184604045743e-05, "loss": 0.1061, "step": 1287 }, { "epoch": 0.28, "grad_norm": 0.5738316483256367, "learning_rate": 3.3661457909489056e-05, "loss": 0.1105, "step": 1288 }, { "epoch": 0.28, "grad_norm": 0.6034257939219156, "learning_rate": 3.365106286437309e-05, "loss": 0.1189, "step": 1289 }, { "epoch": 0.28, "grad_norm": 0.5368924066832583, "learning_rate": 3.364066091037052e-05, "loss": 0.0988, "step": 1290 }, { "epoch": 0.28, "grad_norm": 0.6711078744452189, "learning_rate": 3.3630252052745844e-05, "loss": 0.1559, "step": 1291 }, { "epoch": 0.28, "grad_norm": 0.5399400230496249, "learning_rate": 3.361983629676705e-05, "loss": 0.1077, "step": 1292 }, { "epoch": 0.28, "grad_norm": 0.6108261338608086, "learning_rate": 3.360941364770562e-05, "loss": 0.1294, "step": 1293 }, { "epoch": 0.28, "grad_norm": 0.5468824113616102, "learning_rate": 3.359898411083652e-05, "loss": 0.1225, "step": 1294 }, { "epoch": 0.28, "grad_norm": 0.6249837634838636, "learning_rate": 3.358854769143819e-05, "loss": 0.1312, "step": 1295 }, { "epoch": 0.28, "grad_norm": 0.5940600639666558, "learning_rate": 3.357810439479258e-05, "loss": 0.1086, "step": 1296 }, { "epoch": 0.28, "grad_norm": 0.5821095356360554, "learning_rate": 3.356765422618509e-05, "loss": 0.108, "step": 1297 }, { "epoch": 0.29, "grad_norm": 0.6157257055801524, "learning_rate": 3.355719719090465e-05, "loss": 0.1548, "step": 1298 }, { "epoch": 0.29, "grad_norm": 0.5744449922340548, "learning_rate": 3.3546733294243585e-05, "loss": 0.1333, "step": 1299 }, { "epoch": 0.29, "grad_norm": 0.6436444510808119, "learning_rate": 3.353626254149776e-05, "loss": 0.1208, "step": 1300 }, { "epoch": 0.29, "grad_norm": 0.5962607853391843, "learning_rate": 3.3525784937966474e-05, "loss": 0.1112, "step": 1301 }, { "epoch": 0.29, "grad_norm": 0.5523535289195513, "learning_rate": 3.3515300488952534e-05, "loss": 0.0944, "step": 1302 }, { "epoch": 0.29, "grad_norm": 0.501809006719086, "learning_rate": 3.350480919976216e-05, "loss": 0.087, "step": 1303 }, { "epoch": 0.29, "grad_norm": 0.5981814750628635, "learning_rate": 3.349431107570506e-05, "loss": 0.1275, "step": 1304 }, { "epoch": 0.29, "grad_norm": 0.6180237524967851, "learning_rate": 3.348380612209441e-05, "loss": 0.1173, "step": 1305 }, { "epoch": 0.29, "grad_norm": 0.6130160104393875, "learning_rate": 3.347329434424683e-05, "loss": 0.1229, "step": 1306 }, { "epoch": 0.29, "grad_norm": 0.544583943908318, "learning_rate": 3.346277574748238e-05, "loss": 0.1194, "step": 1307 }, { "epoch": 0.29, "grad_norm": 0.5317881607186777, "learning_rate": 3.345225033712459e-05, "loss": 0.0983, "step": 1308 }, { "epoch": 0.29, "grad_norm": 0.5799913180868896, "learning_rate": 3.344171811850045e-05, "loss": 0.1168, "step": 1309 }, { "epoch": 0.29, "grad_norm": 0.5500312731261546, "learning_rate": 3.3431179096940375e-05, "loss": 0.1332, "step": 1310 }, { "epoch": 0.29, "grad_norm": 0.6583755222113519, "learning_rate": 3.3420633277778214e-05, "loss": 0.1362, "step": 1311 }, { "epoch": 0.29, "grad_norm": 0.5739367824277034, "learning_rate": 3.341008066635129e-05, "loss": 0.114, "step": 1312 }, { "epoch": 0.29, "grad_norm": 0.6181973031072495, "learning_rate": 3.339952126800033e-05, "loss": 0.1252, "step": 1313 }, { "epoch": 0.29, "grad_norm": 0.5774385397685252, "learning_rate": 3.3388955088069524e-05, "loss": 0.1402, "step": 1314 }, { "epoch": 0.29, "grad_norm": 0.6174955926961799, "learning_rate": 3.3378382131906465e-05, "loss": 0.1398, "step": 1315 }, { "epoch": 0.29, "grad_norm": 0.5129309703011087, "learning_rate": 3.33678024048622e-05, "loss": 0.0974, "step": 1316 }, { "epoch": 0.29, "grad_norm": 0.6284531566427997, "learning_rate": 3.335721591229119e-05, "loss": 0.1331, "step": 1317 }, { "epoch": 0.29, "grad_norm": 0.7647961002582618, "learning_rate": 3.334662265955133e-05, "loss": 0.1739, "step": 1318 }, { "epoch": 0.29, "grad_norm": 0.6942168615205172, "learning_rate": 3.3336022652003924e-05, "loss": 0.1737, "step": 1319 }, { "epoch": 0.29, "grad_norm": 0.5579231077747826, "learning_rate": 3.33254158950137e-05, "loss": 0.0998, "step": 1320 }, { "epoch": 0.29, "grad_norm": 0.500234608445358, "learning_rate": 3.331480239394881e-05, "loss": 0.1234, "step": 1321 }, { "epoch": 0.29, "grad_norm": 0.5624056313086268, "learning_rate": 3.330418215418081e-05, "loss": 0.1177, "step": 1322 }, { "epoch": 0.29, "grad_norm": 0.544769976921429, "learning_rate": 3.329355518108466e-05, "loss": 0.086, "step": 1323 }, { "epoch": 0.29, "grad_norm": 0.6480806356860086, "learning_rate": 3.328292148003875e-05, "loss": 0.1699, "step": 1324 }, { "epoch": 0.29, "grad_norm": 0.5556130008667703, "learning_rate": 3.3272281056424854e-05, "loss": 0.117, "step": 1325 }, { "epoch": 0.29, "grad_norm": 0.546150790661389, "learning_rate": 3.326163391562814e-05, "loss": 0.1028, "step": 1326 }, { "epoch": 0.29, "grad_norm": 0.5950563265080882, "learning_rate": 3.325098006303722e-05, "loss": 0.1133, "step": 1327 }, { "epoch": 0.29, "grad_norm": 0.606291157023492, "learning_rate": 3.324031950404406e-05, "loss": 0.0992, "step": 1328 }, { "epoch": 0.29, "grad_norm": 0.6167163257536733, "learning_rate": 3.322965224404403e-05, "loss": 0.1478, "step": 1329 }, { "epoch": 0.29, "grad_norm": 0.5856656270301086, "learning_rate": 3.3218978288435896e-05, "loss": 0.1117, "step": 1330 }, { "epoch": 0.29, "grad_norm": 0.5518055042821899, "learning_rate": 3.3208297642621824e-05, "loss": 0.0941, "step": 1331 }, { "epoch": 0.29, "grad_norm": 0.553741508057109, "learning_rate": 3.319761031200735e-05, "loss": 0.1246, "step": 1332 }, { "epoch": 0.29, "grad_norm": 0.5034305485838049, "learning_rate": 3.318691630200138e-05, "loss": 0.0952, "step": 1333 }, { "epoch": 0.29, "grad_norm": 0.49705691216473424, "learning_rate": 3.317621561801624e-05, "loss": 0.0896, "step": 1334 }, { "epoch": 0.29, "grad_norm": 0.5258869535349345, "learning_rate": 3.316550826546761e-05, "loss": 0.0988, "step": 1335 }, { "epoch": 0.29, "grad_norm": 0.5489673641506772, "learning_rate": 3.315479424977453e-05, "loss": 0.1149, "step": 1336 }, { "epoch": 0.29, "grad_norm": 0.5114559925851911, "learning_rate": 3.3144073576359455e-05, "loss": 0.1157, "step": 1337 }, { "epoch": 0.29, "grad_norm": 0.5566812333135119, "learning_rate": 3.313334625064816e-05, "loss": 0.1118, "step": 1338 }, { "epoch": 0.29, "grad_norm": 0.5873765440338292, "learning_rate": 3.312261227806982e-05, "loss": 0.1208, "step": 1339 }, { "epoch": 0.29, "grad_norm": 0.5055540186849343, "learning_rate": 3.311187166405696e-05, "loss": 0.086, "step": 1340 }, { "epoch": 0.29, "grad_norm": 0.5983374210667861, "learning_rate": 3.310112441404548e-05, "loss": 0.1095, "step": 1341 }, { "epoch": 0.29, "grad_norm": 0.5834239707649026, "learning_rate": 3.309037053347462e-05, "loss": 0.1269, "step": 1342 }, { "epoch": 0.29, "grad_norm": 0.5894128349595298, "learning_rate": 3.3079610027786985e-05, "loss": 0.1095, "step": 1343 }, { "epoch": 0.3, "grad_norm": 0.5633359895686817, "learning_rate": 3.306884290242854e-05, "loss": 0.1089, "step": 1344 }, { "epoch": 0.3, "grad_norm": 0.5123450254265632, "learning_rate": 3.3058069162848586e-05, "loss": 0.1056, "step": 1345 }, { "epoch": 0.3, "grad_norm": 0.6086173605103516, "learning_rate": 3.3047288814499786e-05, "loss": 0.0937, "step": 1346 }, { "epoch": 0.3, "grad_norm": 0.6991284701068382, "learning_rate": 3.3036501862838125e-05, "loss": 0.1692, "step": 1347 }, { "epoch": 0.3, "grad_norm": 0.5441333712760471, "learning_rate": 3.302570831332297e-05, "loss": 0.108, "step": 1348 }, { "epoch": 0.3, "grad_norm": 0.5533949069537027, "learning_rate": 3.301490817141698e-05, "loss": 0.1199, "step": 1349 }, { "epoch": 0.3, "grad_norm": 0.5405876312130093, "learning_rate": 3.300410144258619e-05, "loss": 0.1341, "step": 1350 }, { "epoch": 0.3, "grad_norm": 0.4227404547560645, "learning_rate": 3.2993288132299935e-05, "loss": 0.0827, "step": 1351 }, { "epoch": 0.3, "grad_norm": 0.627268037704672, "learning_rate": 3.298246824603091e-05, "loss": 0.1427, "step": 1352 }, { "epoch": 0.3, "grad_norm": 0.5433477821457182, "learning_rate": 3.297164178925512e-05, "loss": 0.1175, "step": 1353 }, { "epoch": 0.3, "grad_norm": 0.525409683959608, "learning_rate": 3.2960808767451905e-05, "loss": 0.1077, "step": 1354 }, { "epoch": 0.3, "grad_norm": 0.4823978124632984, "learning_rate": 3.294996918610393e-05, "loss": 0.0896, "step": 1355 }, { "epoch": 0.3, "grad_norm": 0.5202600057428901, "learning_rate": 3.293912305069715e-05, "loss": 0.0906, "step": 1356 }, { "epoch": 0.3, "grad_norm": 0.6476774932359233, "learning_rate": 3.292827036672089e-05, "loss": 0.1117, "step": 1357 }, { "epoch": 0.3, "grad_norm": 0.5763221674018213, "learning_rate": 3.291741113966773e-05, "loss": 0.1023, "step": 1358 }, { "epoch": 0.3, "grad_norm": 0.6494560056062909, "learning_rate": 3.290654537503362e-05, "loss": 0.1113, "step": 1359 }, { "epoch": 0.3, "grad_norm": 0.7034436033874668, "learning_rate": 3.2895673078317775e-05, "loss": 0.1229, "step": 1360 }, { "epoch": 0.3, "grad_norm": 0.6143205425695772, "learning_rate": 3.288479425502273e-05, "loss": 0.1194, "step": 1361 }, { "epoch": 0.3, "grad_norm": 0.6209591173806414, "learning_rate": 3.287390891065433e-05, "loss": 0.1485, "step": 1362 }, { "epoch": 0.3, "grad_norm": 0.5999097786066504, "learning_rate": 3.2863017050721715e-05, "loss": 0.1246, "step": 1363 }, { "epoch": 0.3, "grad_norm": 0.48418642803392065, "learning_rate": 3.2852118680737306e-05, "loss": 0.0786, "step": 1364 }, { "epoch": 0.3, "grad_norm": 0.540208482983803, "learning_rate": 3.2841213806216864e-05, "loss": 0.1044, "step": 1365 }, { "epoch": 0.3, "grad_norm": 0.5094061859507117, "learning_rate": 3.283030243267939e-05, "loss": 0.0803, "step": 1366 }, { "epoch": 0.3, "grad_norm": 0.5646400010314275, "learning_rate": 3.281938456564721e-05, "loss": 0.1106, "step": 1367 }, { "epoch": 0.3, "grad_norm": 0.5686819203471248, "learning_rate": 3.2808460210645906e-05, "loss": 0.1155, "step": 1368 }, { "epoch": 0.3, "grad_norm": 0.6112084804857806, "learning_rate": 3.2797529373204375e-05, "loss": 0.1225, "step": 1369 }, { "epoch": 0.3, "grad_norm": 0.5162515878999319, "learning_rate": 3.278659205885479e-05, "loss": 0.0996, "step": 1370 }, { "epoch": 0.3, "grad_norm": 0.6340510175256542, "learning_rate": 3.2775648273132574e-05, "loss": 0.1383, "step": 1371 }, { "epoch": 0.3, "grad_norm": 0.5671948435074174, "learning_rate": 3.2764698021576446e-05, "loss": 0.1048, "step": 1372 }, { "epoch": 0.3, "grad_norm": 0.6487673644312671, "learning_rate": 3.27537413097284e-05, "loss": 0.1625, "step": 1373 }, { "epoch": 0.3, "grad_norm": 0.5601474787877964, "learning_rate": 3.27427781431337e-05, "loss": 0.1142, "step": 1374 }, { "epoch": 0.3, "grad_norm": 0.5813905402472606, "learning_rate": 3.273180852734087e-05, "loss": 0.1012, "step": 1375 }, { "epoch": 0.3, "grad_norm": 0.4734692792644643, "learning_rate": 3.27208324679017e-05, "loss": 0.0724, "step": 1376 }, { "epoch": 0.3, "grad_norm": 0.5780253453324077, "learning_rate": 3.270984997037123e-05, "loss": 0.1006, "step": 1377 }, { "epoch": 0.3, "grad_norm": 0.5225369977400518, "learning_rate": 3.269886104030778e-05, "loss": 0.1089, "step": 1378 }, { "epoch": 0.3, "grad_norm": 0.5194797060071085, "learning_rate": 3.268786568327291e-05, "loss": 0.1096, "step": 1379 }, { "epoch": 0.3, "grad_norm": 0.5409759066217806, "learning_rate": 3.2676863904831444e-05, "loss": 0.1027, "step": 1380 }, { "epoch": 0.3, "grad_norm": 0.5970778315366738, "learning_rate": 3.266585571055145e-05, "loss": 0.1028, "step": 1381 }, { "epoch": 0.3, "grad_norm": 0.6425400928500881, "learning_rate": 3.2654841106004225e-05, "loss": 0.1467, "step": 1382 }, { "epoch": 0.3, "grad_norm": 0.5410733519236234, "learning_rate": 3.264382009676435e-05, "loss": 0.1025, "step": 1383 }, { "epoch": 0.3, "grad_norm": 0.5964767435581186, "learning_rate": 3.263279268840961e-05, "loss": 0.1334, "step": 1384 }, { "epoch": 0.3, "grad_norm": 0.5198936345823255, "learning_rate": 3.262175888652106e-05, "loss": 0.0953, "step": 1385 }, { "epoch": 0.3, "grad_norm": 0.656659815755759, "learning_rate": 3.261071869668296e-05, "loss": 0.1355, "step": 1386 }, { "epoch": 0.3, "grad_norm": 0.588144393341704, "learning_rate": 3.259967212448282e-05, "loss": 0.1398, "step": 1387 }, { "epoch": 0.3, "grad_norm": 0.5605675563918083, "learning_rate": 3.2588619175511387e-05, "loss": 0.1048, "step": 1388 }, { "epoch": 0.31, "grad_norm": 0.452692499602161, "learning_rate": 3.2577559855362614e-05, "loss": 0.0811, "step": 1389 }, { "epoch": 0.31, "grad_norm": 0.5681878057077676, "learning_rate": 3.2566494169633693e-05, "loss": 0.1066, "step": 1390 }, { "epoch": 0.31, "grad_norm": 0.5795194716502409, "learning_rate": 3.255542212392505e-05, "loss": 0.1226, "step": 1391 }, { "epoch": 0.31, "grad_norm": 0.565216129538394, "learning_rate": 3.2544343723840296e-05, "loss": 0.0917, "step": 1392 }, { "epoch": 0.31, "grad_norm": 0.4864753285882939, "learning_rate": 3.253325897498629e-05, "loss": 0.0918, "step": 1393 }, { "epoch": 0.31, "grad_norm": 0.5830497790492641, "learning_rate": 3.2522167882973085e-05, "loss": 0.0995, "step": 1394 }, { "epoch": 0.31, "grad_norm": 0.5171751360841577, "learning_rate": 3.251107045341395e-05, "loss": 0.0975, "step": 1395 }, { "epoch": 0.31, "grad_norm": 0.6417617249571287, "learning_rate": 3.249996669192537e-05, "loss": 0.1543, "step": 1396 }, { "epoch": 0.31, "grad_norm": 0.5739693132332222, "learning_rate": 3.248885660412701e-05, "loss": 0.115, "step": 1397 }, { "epoch": 0.31, "grad_norm": 0.5815615711071502, "learning_rate": 3.247774019564178e-05, "loss": 0.0829, "step": 1398 }, { "epoch": 0.31, "grad_norm": 0.6249244644296427, "learning_rate": 3.2466617472095736e-05, "loss": 0.0991, "step": 1399 }, { "epoch": 0.31, "grad_norm": 0.5152293334436637, "learning_rate": 3.245548843911817e-05, "loss": 0.0931, "step": 1400 }, { "epoch": 0.31, "grad_norm": 0.5970091713898942, "learning_rate": 3.244435310234156e-05, "loss": 0.1193, "step": 1401 }, { "epoch": 0.31, "grad_norm": 0.5920520899415288, "learning_rate": 3.243321146740155e-05, "loss": 0.0919, "step": 1402 }, { "epoch": 0.31, "grad_norm": 0.5128584538616451, "learning_rate": 3.2422063539937006e-05, "loss": 0.0919, "step": 1403 }, { "epoch": 0.31, "grad_norm": 0.5298398319280635, "learning_rate": 3.2410909325589954e-05, "loss": 0.085, "step": 1404 }, { "epoch": 0.31, "grad_norm": 0.5807596435304535, "learning_rate": 3.239974883000561e-05, "loss": 0.1002, "step": 1405 }, { "epoch": 0.31, "grad_norm": 0.5915929109199566, "learning_rate": 3.2388582058832375e-05, "loss": 0.117, "step": 1406 }, { "epoch": 0.31, "grad_norm": 0.5410412944116243, "learning_rate": 3.237740901772181e-05, "loss": 0.1044, "step": 1407 }, { "epoch": 0.31, "grad_norm": 0.5061377860140891, "learning_rate": 3.2366229712328675e-05, "loss": 0.0859, "step": 1408 }, { "epoch": 0.31, "grad_norm": 0.5453665501606407, "learning_rate": 3.235504414831087e-05, "loss": 0.1005, "step": 1409 }, { "epoch": 0.31, "grad_norm": 0.6237674002810024, "learning_rate": 3.234385233132949e-05, "loss": 0.1188, "step": 1410 }, { "epoch": 0.31, "grad_norm": 0.5550178833391438, "learning_rate": 3.233265426704877e-05, "loss": 0.0948, "step": 1411 }, { "epoch": 0.31, "grad_norm": 0.5620625702416668, "learning_rate": 3.232144996113613e-05, "loss": 0.0876, "step": 1412 }, { "epoch": 0.31, "grad_norm": 0.6278011493027039, "learning_rate": 3.231023941926213e-05, "loss": 0.1235, "step": 1413 }, { "epoch": 0.31, "grad_norm": 0.6123232800643602, "learning_rate": 3.22990226471005e-05, "loss": 0.1085, "step": 1414 }, { "epoch": 0.31, "grad_norm": 0.5878673054578677, "learning_rate": 3.2287799650328116e-05, "loss": 0.1088, "step": 1415 }, { "epoch": 0.31, "grad_norm": 0.5936979973757809, "learning_rate": 3.2276570434625e-05, "loss": 0.108, "step": 1416 }, { "epoch": 0.31, "grad_norm": 0.5982325150532656, "learning_rate": 3.226533500567433e-05, "loss": 0.121, "step": 1417 }, { "epoch": 0.31, "grad_norm": 0.5867558677097133, "learning_rate": 3.2254093369162425e-05, "loss": 0.115, "step": 1418 }, { "epoch": 0.31, "grad_norm": 0.5545845473477063, "learning_rate": 3.2242845530778755e-05, "loss": 0.1124, "step": 1419 }, { "epoch": 0.31, "grad_norm": 0.4871739458666332, "learning_rate": 3.22315914962159e-05, "loss": 0.0867, "step": 1420 }, { "epoch": 0.31, "grad_norm": 0.4646669259868776, "learning_rate": 3.2220331271169614e-05, "loss": 0.1008, "step": 1421 }, { "epoch": 0.31, "grad_norm": 0.4921344698236326, "learning_rate": 3.220906486133876e-05, "loss": 0.0877, "step": 1422 }, { "epoch": 0.31, "grad_norm": 0.506246499649945, "learning_rate": 3.219779227242534e-05, "loss": 0.1104, "step": 1423 }, { "epoch": 0.31, "grad_norm": 0.5522790387999251, "learning_rate": 3.218651351013447e-05, "loss": 0.0856, "step": 1424 }, { "epoch": 0.31, "grad_norm": 0.571981261756722, "learning_rate": 3.217522858017442e-05, "loss": 0.0969, "step": 1425 }, { "epoch": 0.31, "grad_norm": 0.6090202766293858, "learning_rate": 3.216393748825654e-05, "loss": 0.1176, "step": 1426 }, { "epoch": 0.31, "grad_norm": 0.44838768752167724, "learning_rate": 3.2152640240095335e-05, "loss": 0.0705, "step": 1427 }, { "epoch": 0.31, "grad_norm": 0.46354196491303296, "learning_rate": 3.2141336841408406e-05, "loss": 0.0671, "step": 1428 }, { "epoch": 0.31, "grad_norm": 0.5880174468365021, "learning_rate": 3.2130027297916476e-05, "loss": 0.1175, "step": 1429 }, { "epoch": 0.31, "grad_norm": 0.5015029372740631, "learning_rate": 3.2118711615343366e-05, "loss": 0.0792, "step": 1430 }, { "epoch": 0.31, "grad_norm": 0.6066306502092649, "learning_rate": 3.210738979941603e-05, "loss": 0.1205, "step": 1431 }, { "epoch": 0.31, "grad_norm": 0.5556678704800044, "learning_rate": 3.2096061855864485e-05, "loss": 0.0861, "step": 1432 }, { "epoch": 0.31, "grad_norm": 0.5559779455558638, "learning_rate": 3.2084727790421895e-05, "loss": 0.1002, "step": 1433 }, { "epoch": 0.31, "grad_norm": 0.5594356834272175, "learning_rate": 3.207338760882448e-05, "loss": 0.0945, "step": 1434 }, { "epoch": 0.32, "grad_norm": 0.49659919521560014, "learning_rate": 3.20620413168116e-05, "loss": 0.0853, "step": 1435 }, { "epoch": 0.32, "grad_norm": 0.5243465444052824, "learning_rate": 3.205068892012565e-05, "loss": 0.0922, "step": 1436 }, { "epoch": 0.32, "grad_norm": 0.5947183943564954, "learning_rate": 3.203933042451218e-05, "loss": 0.1038, "step": 1437 }, { "epoch": 0.32, "grad_norm": 0.5286617057434441, "learning_rate": 3.202796583571977e-05, "loss": 0.0944, "step": 1438 }, { "epoch": 0.32, "grad_norm": 0.6296246613226489, "learning_rate": 3.2016595159500127e-05, "loss": 0.1377, "step": 1439 }, { "epoch": 0.32, "grad_norm": 0.6802923646189153, "learning_rate": 3.2005218401608006e-05, "loss": 0.1245, "step": 1440 }, { "epoch": 0.32, "grad_norm": 0.4712275674018901, "learning_rate": 3.1993835567801266e-05, "loss": 0.0807, "step": 1441 }, { "epoch": 0.32, "grad_norm": 0.6099906647859015, "learning_rate": 3.19824466638408e-05, "loss": 0.1082, "step": 1442 }, { "epoch": 0.32, "grad_norm": 0.5531651196773758, "learning_rate": 3.1971051695490644e-05, "loss": 0.1041, "step": 1443 }, { "epoch": 0.32, "grad_norm": 0.5385036889218184, "learning_rate": 3.195965066851784e-05, "loss": 0.1112, "step": 1444 }, { "epoch": 0.32, "grad_norm": 0.5480856709740979, "learning_rate": 3.194824358869252e-05, "loss": 0.0909, "step": 1445 }, { "epoch": 0.32, "grad_norm": 0.5267635403138891, "learning_rate": 3.1936830461787866e-05, "loss": 0.1064, "step": 1446 }, { "epoch": 0.32, "grad_norm": 0.5324786787734438, "learning_rate": 3.192541129358014e-05, "loss": 0.0897, "step": 1447 }, { "epoch": 0.32, "grad_norm": 0.5094036410184004, "learning_rate": 3.191398608984867e-05, "loss": 0.093, "step": 1448 }, { "epoch": 0.32, "grad_norm": 0.5502984927559366, "learning_rate": 3.19025548563758e-05, "loss": 0.1035, "step": 1449 }, { "epoch": 0.32, "grad_norm": 0.579324277408475, "learning_rate": 3.189111759894695e-05, "loss": 0.1183, "step": 1450 }, { "epoch": 0.32, "grad_norm": 0.5289617793855, "learning_rate": 3.1879674323350594e-05, "loss": 0.0944, "step": 1451 }, { "epoch": 0.32, "grad_norm": 0.560668692826493, "learning_rate": 3.186822503537823e-05, "loss": 0.1117, "step": 1452 }, { "epoch": 0.32, "grad_norm": 0.5176155796534162, "learning_rate": 3.1856769740824426e-05, "loss": 0.0911, "step": 1453 }, { "epoch": 0.32, "grad_norm": 0.49774506343824937, "learning_rate": 3.184530844548678e-05, "loss": 0.0838, "step": 1454 }, { "epoch": 0.32, "grad_norm": 0.512447042752987, "learning_rate": 3.183384115516591e-05, "loss": 0.09, "step": 1455 }, { "epoch": 0.32, "grad_norm": 0.6195166856706205, "learning_rate": 3.182236787566549e-05, "loss": 0.1248, "step": 1456 }, { "epoch": 0.32, "grad_norm": 0.49141038368318024, "learning_rate": 3.181088861279222e-05, "loss": 0.0838, "step": 1457 }, { "epoch": 0.32, "grad_norm": 0.5814312908518064, "learning_rate": 3.179940337235582e-05, "loss": 0.1247, "step": 1458 }, { "epoch": 0.32, "grad_norm": 0.5920081186495002, "learning_rate": 3.178791216016904e-05, "loss": 0.1222, "step": 1459 }, { "epoch": 0.32, "grad_norm": 0.5004816119982474, "learning_rate": 3.177641498204765e-05, "loss": 0.0956, "step": 1460 }, { "epoch": 0.32, "grad_norm": 0.682944398095459, "learning_rate": 3.1764911843810456e-05, "loss": 0.1432, "step": 1461 }, { "epoch": 0.32, "grad_norm": 0.547596346725736, "learning_rate": 3.175340275127925e-05, "loss": 0.0909, "step": 1462 }, { "epoch": 0.32, "grad_norm": 0.4748960029795314, "learning_rate": 3.1741887710278874e-05, "loss": 0.0804, "step": 1463 }, { "epoch": 0.32, "grad_norm": 0.5348561413037483, "learning_rate": 3.173036672663714e-05, "loss": 0.0884, "step": 1464 }, { "epoch": 0.32, "grad_norm": 0.5017013215085425, "learning_rate": 3.17188398061849e-05, "loss": 0.1001, "step": 1465 }, { "epoch": 0.32, "grad_norm": 0.5777986223814436, "learning_rate": 3.170730695475599e-05, "loss": 0.126, "step": 1466 }, { "epoch": 0.32, "grad_norm": 0.533350410659888, "learning_rate": 3.1695768178187267e-05, "loss": 0.1064, "step": 1467 }, { "epoch": 0.32, "grad_norm": 0.5274072962020079, "learning_rate": 3.168422348231857e-05, "loss": 0.095, "step": 1468 }, { "epoch": 0.32, "grad_norm": 0.5806139901619206, "learning_rate": 3.1672672872992755e-05, "loss": 0.1023, "step": 1469 }, { "epoch": 0.32, "grad_norm": 0.5420230616118438, "learning_rate": 3.166111635605564e-05, "loss": 0.1033, "step": 1470 }, { "epoch": 0.32, "grad_norm": 0.5674809224959466, "learning_rate": 3.164955393735605e-05, "loss": 0.0948, "step": 1471 }, { "epoch": 0.32, "grad_norm": 0.5598604472015583, "learning_rate": 3.1637985622745795e-05, "loss": 0.0861, "step": 1472 }, { "epoch": 0.32, "grad_norm": 0.48932312245861176, "learning_rate": 3.1626411418079684e-05, "loss": 0.0933, "step": 1473 }, { "epoch": 0.32, "grad_norm": 0.49542347239489665, "learning_rate": 3.1614831329215475e-05, "loss": 0.0905, "step": 1474 }, { "epoch": 0.32, "grad_norm": 0.5307744365947702, "learning_rate": 3.160324536201393e-05, "loss": 0.0922, "step": 1475 }, { "epoch": 0.32, "grad_norm": 0.6242590804075524, "learning_rate": 3.159165352233879e-05, "loss": 0.1098, "step": 1476 }, { "epoch": 0.32, "grad_norm": 0.5361180260014112, "learning_rate": 3.158005581605673e-05, "loss": 0.0815, "step": 1477 }, { "epoch": 0.32, "grad_norm": 0.5969933467395213, "learning_rate": 3.156845224903745e-05, "loss": 0.1015, "step": 1478 }, { "epoch": 0.32, "grad_norm": 0.5662072456926389, "learning_rate": 3.1556842827153556e-05, "loss": 0.1035, "step": 1479 }, { "epoch": 0.33, "grad_norm": 0.5801233344651101, "learning_rate": 3.154522755628067e-05, "loss": 0.0885, "step": 1480 }, { "epoch": 0.33, "grad_norm": 0.5516999070831768, "learning_rate": 3.153360644229735e-05, "loss": 0.0943, "step": 1481 }, { "epoch": 0.33, "grad_norm": 0.5700633928952555, "learning_rate": 3.1521979491085095e-05, "loss": 0.1038, "step": 1482 }, { "epoch": 0.33, "grad_norm": 0.5798399465153998, "learning_rate": 3.15103467085284e-05, "loss": 0.1173, "step": 1483 }, { "epoch": 0.33, "grad_norm": 0.5076361716853368, "learning_rate": 3.149870810051467e-05, "loss": 0.0959, "step": 1484 }, { "epoch": 0.33, "grad_norm": 0.5473599327454586, "learning_rate": 3.148706367293428e-05, "loss": 0.0934, "step": 1485 }, { "epoch": 0.33, "grad_norm": 0.5861413728164516, "learning_rate": 3.147541343168055e-05, "loss": 0.1274, "step": 1486 }, { "epoch": 0.33, "grad_norm": 0.5542745530605213, "learning_rate": 3.146375738264975e-05, "loss": 0.0949, "step": 1487 }, { "epoch": 0.33, "grad_norm": 0.5921784986551651, "learning_rate": 3.145209553174105e-05, "loss": 0.1391, "step": 1488 }, { "epoch": 0.33, "grad_norm": 0.5181984692655839, "learning_rate": 3.14404278848566e-05, "loss": 0.0863, "step": 1489 }, { "epoch": 0.33, "grad_norm": 0.5962280462130014, "learning_rate": 3.142875444790147e-05, "loss": 0.0991, "step": 1490 }, { "epoch": 0.33, "grad_norm": 0.48386436125900834, "learning_rate": 3.141707522678365e-05, "loss": 0.0813, "step": 1491 }, { "epoch": 0.33, "grad_norm": 0.48607849700451455, "learning_rate": 3.140539022741408e-05, "loss": 0.0923, "step": 1492 }, { "epoch": 0.33, "grad_norm": 0.4443590095314898, "learning_rate": 3.139369945570659e-05, "loss": 0.0806, "step": 1493 }, { "epoch": 0.33, "grad_norm": 0.605275350877237, "learning_rate": 3.138200291757797e-05, "loss": 0.1126, "step": 1494 }, { "epoch": 0.33, "grad_norm": 0.5292985641621455, "learning_rate": 3.137030061894789e-05, "loss": 0.0933, "step": 1495 }, { "epoch": 0.33, "grad_norm": 0.5313127705010521, "learning_rate": 3.135859256573898e-05, "loss": 0.0958, "step": 1496 }, { "epoch": 0.33, "grad_norm": 0.5485840930268028, "learning_rate": 3.134687876387673e-05, "loss": 0.096, "step": 1497 }, { "epoch": 0.33, "grad_norm": 0.5495443320540766, "learning_rate": 3.1335159219289585e-05, "loss": 0.0903, "step": 1498 }, { "epoch": 0.33, "grad_norm": 0.49349570123777126, "learning_rate": 3.132343393790887e-05, "loss": 0.0879, "step": 1499 }, { "epoch": 0.33, "grad_norm": 0.48489367494468105, "learning_rate": 3.131170292566883e-05, "loss": 0.092, "step": 1500 }, { "epoch": 0.33, "grad_norm": 0.5636963836988381, "learning_rate": 3.12999661885066e-05, "loss": 0.1014, "step": 1501 }, { "epoch": 0.33, "grad_norm": 0.5906622809294992, "learning_rate": 3.12882237323622e-05, "loss": 0.107, "step": 1502 }, { "epoch": 0.33, "grad_norm": 0.49269801109528827, "learning_rate": 3.127647556317858e-05, "loss": 0.0805, "step": 1503 }, { "epoch": 0.33, "grad_norm": 0.5129773917096273, "learning_rate": 3.126472168690156e-05, "loss": 0.0737, "step": 1504 }, { "epoch": 0.33, "grad_norm": 0.4865049017917022, "learning_rate": 3.125296210947983e-05, "loss": 0.0882, "step": 1505 }, { "epoch": 0.33, "grad_norm": 0.5390699634864613, "learning_rate": 3.1241196836865e-05, "loss": 0.0939, "step": 1506 }, { "epoch": 0.33, "grad_norm": 0.4798041608770732, "learning_rate": 3.1229425875011534e-05, "loss": 0.1018, "step": 1507 }, { "epoch": 0.33, "grad_norm": 0.47996977350808995, "learning_rate": 3.12176492298768e-05, "loss": 0.0691, "step": 1508 }, { "epoch": 0.33, "grad_norm": 0.44069173253425836, "learning_rate": 3.120586690742102e-05, "loss": 0.0631, "step": 1509 }, { "epoch": 0.33, "grad_norm": 0.5452199141375765, "learning_rate": 3.119407891360732e-05, "loss": 0.1006, "step": 1510 }, { "epoch": 0.33, "grad_norm": 0.5161302576367768, "learning_rate": 3.118228525440165e-05, "loss": 0.0915, "step": 1511 }, { "epoch": 0.33, "grad_norm": 0.49472325169915915, "learning_rate": 3.1170485935772864e-05, "loss": 0.0884, "step": 1512 }, { "epoch": 0.33, "grad_norm": 0.5517885722810728, "learning_rate": 3.1158680963692676e-05, "loss": 0.0884, "step": 1513 }, { "epoch": 0.33, "grad_norm": 0.46487889838167595, "learning_rate": 3.114687034413564e-05, "loss": 0.0756, "step": 1514 }, { "epoch": 0.33, "grad_norm": 0.5892782721730733, "learning_rate": 3.1135054083079194e-05, "loss": 0.1095, "step": 1515 }, { "epoch": 0.33, "grad_norm": 0.513953012326265, "learning_rate": 3.112323218650362e-05, "loss": 0.0992, "step": 1516 }, { "epoch": 0.33, "grad_norm": 0.4966780008099141, "learning_rate": 3.111140466039205e-05, "loss": 0.08, "step": 1517 }, { "epoch": 0.33, "grad_norm": 0.494700088726365, "learning_rate": 3.1099571510730466e-05, "loss": 0.0836, "step": 1518 }, { "epoch": 0.33, "grad_norm": 0.46990742839572375, "learning_rate": 3.1087732743507704e-05, "loss": 0.0791, "step": 1519 }, { "epoch": 0.33, "grad_norm": 0.45231846028789485, "learning_rate": 3.107588836471542e-05, "loss": 0.0751, "step": 1520 }, { "epoch": 0.33, "grad_norm": 0.5732737179976433, "learning_rate": 3.106403838034815e-05, "loss": 0.0994, "step": 1521 }, { "epoch": 0.33, "grad_norm": 0.555422497418479, "learning_rate": 3.1052182796403225e-05, "loss": 0.1109, "step": 1522 }, { "epoch": 0.33, "grad_norm": 0.44664638246672456, "learning_rate": 3.104032161888084e-05, "loss": 0.0782, "step": 1523 }, { "epoch": 0.33, "grad_norm": 0.49068254145545215, "learning_rate": 3.1028454853784e-05, "loss": 0.0817, "step": 1524 }, { "epoch": 0.33, "grad_norm": 0.5007696784195629, "learning_rate": 3.101658250711856e-05, "loss": 0.0835, "step": 1525 }, { "epoch": 0.34, "grad_norm": 0.47998469089890194, "learning_rate": 3.100470458489318e-05, "loss": 0.0867, "step": 1526 }, { "epoch": 0.34, "grad_norm": 0.5557227346756548, "learning_rate": 3.099282109311934e-05, "loss": 0.1048, "step": 1527 }, { "epoch": 0.34, "grad_norm": 0.5085157659837836, "learning_rate": 3.098093203781137e-05, "loss": 0.1068, "step": 1528 }, { "epoch": 0.34, "grad_norm": 0.4919272294724993, "learning_rate": 3.0969037424986376e-05, "loss": 0.0793, "step": 1529 }, { "epoch": 0.34, "grad_norm": 0.4970668800100589, "learning_rate": 3.09571372606643e-05, "loss": 0.0718, "step": 1530 }, { "epoch": 0.34, "grad_norm": 0.6050020449580161, "learning_rate": 3.09452315508679e-05, "loss": 0.1104, "step": 1531 }, { "epoch": 0.34, "grad_norm": 0.5542740153786689, "learning_rate": 3.09333203016227e-05, "loss": 0.0835, "step": 1532 }, { "epoch": 0.34, "grad_norm": 0.5153961251662009, "learning_rate": 3.0921403518957076e-05, "loss": 0.0965, "step": 1533 }, { "epoch": 0.34, "grad_norm": 0.6651957547171047, "learning_rate": 3.0909481208902185e-05, "loss": 0.1351, "step": 1534 }, { "epoch": 0.34, "grad_norm": 0.5472869956005133, "learning_rate": 3.089755337749198e-05, "loss": 0.0888, "step": 1535 }, { "epoch": 0.34, "grad_norm": 0.6644839351985476, "learning_rate": 3.08856200307632e-05, "loss": 0.114, "step": 1536 }, { "epoch": 0.34, "grad_norm": 0.5567185196834562, "learning_rate": 3.08736811747554e-05, "loss": 0.0958, "step": 1537 }, { "epoch": 0.34, "grad_norm": 0.5060256625780866, "learning_rate": 3.08617368155109e-05, "loss": 0.0637, "step": 1538 }, { "epoch": 0.34, "grad_norm": 0.5294189030931142, "learning_rate": 3.084978695907482e-05, "loss": 0.0915, "step": 1539 }, { "epoch": 0.34, "grad_norm": 0.5011807744937476, "learning_rate": 3.0837831611495036e-05, "loss": 0.0851, "step": 1540 }, { "epoch": 0.34, "grad_norm": 0.5303344691440417, "learning_rate": 3.082587077882225e-05, "loss": 0.0788, "step": 1541 }, { "epoch": 0.34, "grad_norm": 0.5673118455006193, "learning_rate": 3.081390446710989e-05, "loss": 0.0943, "step": 1542 }, { "epoch": 0.34, "grad_norm": 0.5125749137834809, "learning_rate": 3.080193268241419e-05, "loss": 0.0832, "step": 1543 }, { "epoch": 0.34, "grad_norm": 0.5498719250505671, "learning_rate": 3.0789955430794145e-05, "loss": 0.1065, "step": 1544 }, { "epoch": 0.34, "grad_norm": 0.5784083522581387, "learning_rate": 3.077797271831152e-05, "loss": 0.104, "step": 1545 }, { "epoch": 0.34, "grad_norm": 0.5247577414829844, "learning_rate": 3.076598455103081e-05, "loss": 0.0856, "step": 1546 }, { "epoch": 0.34, "grad_norm": 0.5081610437163209, "learning_rate": 3.0753990935019345e-05, "loss": 0.0854, "step": 1547 }, { "epoch": 0.34, "grad_norm": 0.47914656479452084, "learning_rate": 3.074199187634713e-05, "loss": 0.0884, "step": 1548 }, { "epoch": 0.34, "grad_norm": 0.4793126533479205, "learning_rate": 3.072998738108699e-05, "loss": 0.084, "step": 1549 }, { "epoch": 0.34, "grad_norm": 0.5118582440755662, "learning_rate": 3.071797745531445e-05, "loss": 0.0909, "step": 1550 }, { "epoch": 0.34, "grad_norm": 0.5628049871355234, "learning_rate": 3.070596210510783e-05, "loss": 0.1084, "step": 1551 }, { "epoch": 0.34, "grad_norm": 0.6168438884461107, "learning_rate": 3.069394133654815e-05, "loss": 0.1213, "step": 1552 }, { "epoch": 0.34, "grad_norm": 0.5079984177861392, "learning_rate": 3.068191515571921e-05, "loss": 0.0857, "step": 1553 }, { "epoch": 0.34, "grad_norm": 0.46789837405160123, "learning_rate": 3.066988356870752e-05, "loss": 0.0895, "step": 1554 }, { "epoch": 0.34, "grad_norm": 0.5426420148085699, "learning_rate": 3.0657846581602355e-05, "loss": 0.1254, "step": 1555 }, { "epoch": 0.34, "grad_norm": 0.558099434620568, "learning_rate": 3.06458042004957e-05, "loss": 0.0789, "step": 1556 }, { "epoch": 0.34, "grad_norm": 0.4035657639202378, "learning_rate": 3.063375643148228e-05, "loss": 0.062, "step": 1557 }, { "epoch": 0.34, "grad_norm": 0.5096574573562189, "learning_rate": 3.062170328065954e-05, "loss": 0.1382, "step": 1558 }, { "epoch": 0.34, "grad_norm": 0.4840329254405113, "learning_rate": 3.060964475412766e-05, "loss": 0.0796, "step": 1559 }, { "epoch": 0.34, "grad_norm": 0.49819952925350103, "learning_rate": 3.059758085798954e-05, "loss": 0.0909, "step": 1560 }, { "epoch": 0.34, "grad_norm": 0.5484340243759227, "learning_rate": 3.058551159835078e-05, "loss": 0.0892, "step": 1561 }, { "epoch": 0.34, "grad_norm": 0.553949052585398, "learning_rate": 3.057343698131971e-05, "loss": 0.0914, "step": 1562 }, { "epoch": 0.34, "grad_norm": 0.5722146906660561, "learning_rate": 3.056135701300736e-05, "loss": 0.0987, "step": 1563 }, { "epoch": 0.34, "grad_norm": 0.5633963225303233, "learning_rate": 3.054927169952749e-05, "loss": 0.103, "step": 1564 }, { "epoch": 0.34, "grad_norm": 0.6369758742073995, "learning_rate": 3.053718104699654e-05, "loss": 0.1306, "step": 1565 }, { "epoch": 0.34, "grad_norm": 0.5158833810020567, "learning_rate": 3.052508506153368e-05, "loss": 0.0986, "step": 1566 }, { "epoch": 0.34, "grad_norm": 0.46353267453790725, "learning_rate": 3.051298374926074e-05, "loss": 0.0915, "step": 1567 }, { "epoch": 0.34, "grad_norm": 0.5066324461289279, "learning_rate": 3.0500877116302284e-05, "loss": 0.0788, "step": 1568 }, { "epoch": 0.34, "grad_norm": 0.5822538423849566, "learning_rate": 3.0488765168785544e-05, "loss": 0.111, "step": 1569 }, { "epoch": 0.34, "grad_norm": 0.5673723334758027, "learning_rate": 3.047664791284046e-05, "loss": 0.1206, "step": 1570 }, { "epoch": 0.35, "grad_norm": 0.5087601549133268, "learning_rate": 3.046452535459963e-05, "loss": 0.0846, "step": 1571 }, { "epoch": 0.35, "grad_norm": 0.5376127346602497, "learning_rate": 3.045239750019839e-05, "loss": 0.0835, "step": 1572 }, { "epoch": 0.35, "grad_norm": 0.5174346149067873, "learning_rate": 3.044026435577469e-05, "loss": 0.0971, "step": 1573 }, { "epoch": 0.35, "grad_norm": 0.5149142539760165, "learning_rate": 3.0428125927469198e-05, "loss": 0.0895, "step": 1574 }, { "epoch": 0.35, "grad_norm": 0.5280456957441996, "learning_rate": 3.0415982221425257e-05, "loss": 0.1009, "step": 1575 }, { "epoch": 0.35, "grad_norm": 0.4897789378386961, "learning_rate": 3.040383324378885e-05, "loss": 0.0722, "step": 1576 }, { "epoch": 0.35, "grad_norm": 0.5358871703246333, "learning_rate": 3.0391679000708673e-05, "loss": 0.1011, "step": 1577 }, { "epoch": 0.35, "grad_norm": 0.5310148953074585, "learning_rate": 3.0379519498336054e-05, "loss": 0.0898, "step": 1578 }, { "epoch": 0.35, "grad_norm": 0.5301812866397277, "learning_rate": 3.036735474282498e-05, "loss": 0.0927, "step": 1579 }, { "epoch": 0.35, "grad_norm": 0.6411726169481955, "learning_rate": 3.035518474033212e-05, "loss": 0.1478, "step": 1580 }, { "epoch": 0.35, "grad_norm": 0.5214365676511248, "learning_rate": 3.0343009497016787e-05, "loss": 0.0989, "step": 1581 }, { "epoch": 0.35, "grad_norm": 0.5757916505202934, "learning_rate": 3.0330829019040945e-05, "loss": 0.1243, "step": 1582 }, { "epoch": 0.35, "grad_norm": 0.5416318503218875, "learning_rate": 3.0318643312569204e-05, "loss": 0.1017, "step": 1583 }, { "epoch": 0.35, "grad_norm": 0.5471932166567245, "learning_rate": 3.0306452383768833e-05, "loss": 0.0784, "step": 1584 }, { "epoch": 0.35, "grad_norm": 0.5177630400072355, "learning_rate": 3.0294256238809727e-05, "loss": 0.1019, "step": 1585 }, { "epoch": 0.35, "grad_norm": 0.45850777738104637, "learning_rate": 3.0282054883864434e-05, "loss": 0.0764, "step": 1586 }, { "epoch": 0.35, "grad_norm": 0.4849117443070667, "learning_rate": 3.026984832510814e-05, "loss": 0.0741, "step": 1587 }, { "epoch": 0.35, "grad_norm": 0.5921277169521008, "learning_rate": 3.025763656871865e-05, "loss": 0.1184, "step": 1588 }, { "epoch": 0.35, "grad_norm": 0.6143843819104651, "learning_rate": 3.024541962087641e-05, "loss": 0.1227, "step": 1589 }, { "epoch": 0.35, "grad_norm": 0.4186554019034123, "learning_rate": 3.0233197487764494e-05, "loss": 0.0758, "step": 1590 }, { "epoch": 0.35, "grad_norm": 0.4419283184812778, "learning_rate": 3.0220970175568604e-05, "loss": 0.0936, "step": 1591 }, { "epoch": 0.35, "grad_norm": 0.49122642356168894, "learning_rate": 3.020873769047705e-05, "loss": 0.1048, "step": 1592 }, { "epoch": 0.35, "grad_norm": 0.5101086861605791, "learning_rate": 3.019650003868077e-05, "loss": 0.0856, "step": 1593 }, { "epoch": 0.35, "grad_norm": 0.5561429928068005, "learning_rate": 3.0184257226373317e-05, "loss": 0.0963, "step": 1594 }, { "epoch": 0.35, "grad_norm": 0.4809035682977758, "learning_rate": 3.0172009259750852e-05, "loss": 0.0893, "step": 1595 }, { "epoch": 0.35, "grad_norm": 0.5498611936108522, "learning_rate": 3.015975614501214e-05, "loss": 0.1033, "step": 1596 }, { "epoch": 0.35, "grad_norm": 0.5622349220448989, "learning_rate": 3.0147497888358564e-05, "loss": 0.0949, "step": 1597 }, { "epoch": 0.35, "grad_norm": 0.479641613334266, "learning_rate": 3.0135234495994107e-05, "loss": 0.0908, "step": 1598 }, { "epoch": 0.35, "grad_norm": 0.5252805686745193, "learning_rate": 3.0122965974125335e-05, "loss": 0.0927, "step": 1599 }, { "epoch": 0.35, "grad_norm": 0.598178610317444, "learning_rate": 3.0110692328961435e-05, "loss": 0.118, "step": 1600 }, { "epoch": 0.35, "grad_norm": 0.5620250816314273, "learning_rate": 3.0098413566714165e-05, "loss": 0.1106, "step": 1601 }, { "epoch": 0.35, "grad_norm": 0.5314593183248383, "learning_rate": 3.008612969359788e-05, "loss": 0.0918, "step": 1602 }, { "epoch": 0.35, "grad_norm": 0.4614640800284727, "learning_rate": 3.0073840715829532e-05, "loss": 0.0706, "step": 1603 }, { "epoch": 0.35, "grad_norm": 0.4640629244972625, "learning_rate": 3.006154663962865e-05, "loss": 0.0763, "step": 1604 }, { "epoch": 0.35, "grad_norm": 0.4099952563441726, "learning_rate": 3.0049247471217326e-05, "loss": 0.0592, "step": 1605 }, { "epoch": 0.35, "grad_norm": 0.5684856523583793, "learning_rate": 3.0036943216820256e-05, "loss": 0.1109, "step": 1606 }, { "epoch": 0.35, "grad_norm": 0.47359148612090135, "learning_rate": 3.00246338826647e-05, "loss": 0.0738, "step": 1607 }, { "epoch": 0.35, "grad_norm": 0.47715175657175857, "learning_rate": 3.001231947498048e-05, "loss": 0.0643, "step": 1608 }, { "epoch": 0.35, "grad_norm": 0.5851278178459499, "learning_rate": 3.0000000000000004e-05, "loss": 0.1059, "step": 1609 }, { "epoch": 0.35, "grad_norm": 0.6081181794555448, "learning_rate": 2.998767546395822e-05, "loss": 0.1035, "step": 1610 }, { "epoch": 0.35, "grad_norm": 0.47406968880158457, "learning_rate": 2.9975345873092662e-05, "loss": 0.0911, "step": 1611 }, { "epoch": 0.35, "grad_norm": 0.511908372296521, "learning_rate": 2.996301123364341e-05, "loss": 0.0761, "step": 1612 }, { "epoch": 0.35, "grad_norm": 0.5312066902009361, "learning_rate": 2.9950671551853094e-05, "loss": 0.0902, "step": 1613 }, { "epoch": 0.35, "grad_norm": 0.5479439778163137, "learning_rate": 2.9938326833966914e-05, "loss": 0.1056, "step": 1614 }, { "epoch": 0.35, "grad_norm": 0.5139200169640844, "learning_rate": 2.992597708623259e-05, "loss": 0.1016, "step": 1615 }, { "epoch": 0.35, "grad_norm": 0.4303417426917004, "learning_rate": 2.991362231490042e-05, "loss": 0.0837, "step": 1616 }, { "epoch": 0.36, "grad_norm": 0.5217671859754873, "learning_rate": 2.990126252622323e-05, "loss": 0.091, "step": 1617 }, { "epoch": 0.36, "grad_norm": 0.5249282333731543, "learning_rate": 2.9888897726456374e-05, "loss": 0.0917, "step": 1618 }, { "epoch": 0.36, "grad_norm": 0.5065982386168, "learning_rate": 2.9876527921857756e-05, "loss": 0.1009, "step": 1619 }, { "epoch": 0.36, "grad_norm": 0.5157414796063478, "learning_rate": 2.986415311868782e-05, "loss": 0.091, "step": 1620 }, { "epoch": 0.36, "grad_norm": 0.4295084493558709, "learning_rate": 2.985177332320952e-05, "loss": 0.0606, "step": 1621 }, { "epoch": 0.36, "grad_norm": 0.5135798788910292, "learning_rate": 2.9839388541688352e-05, "loss": 0.0907, "step": 1622 }, { "epoch": 0.36, "grad_norm": 0.5477632937661516, "learning_rate": 2.9826998780392324e-05, "loss": 0.1087, "step": 1623 }, { "epoch": 0.36, "grad_norm": 0.5377968013052618, "learning_rate": 2.9814604045591974e-05, "loss": 0.082, "step": 1624 }, { "epoch": 0.36, "grad_norm": 0.4219926210962844, "learning_rate": 2.980220434356035e-05, "loss": 0.0731, "step": 1625 }, { "epoch": 0.36, "grad_norm": 0.45800952057021754, "learning_rate": 2.9789799680573014e-05, "loss": 0.0815, "step": 1626 }, { "epoch": 0.36, "grad_norm": 0.4190818901895578, "learning_rate": 2.9777390062908056e-05, "loss": 0.0612, "step": 1627 }, { "epoch": 0.36, "grad_norm": 0.654880934842561, "learning_rate": 2.976497549684605e-05, "loss": 0.0936, "step": 1628 }, { "epoch": 0.36, "grad_norm": 0.44223417433418194, "learning_rate": 2.9752555988670084e-05, "loss": 0.064, "step": 1629 }, { "epoch": 0.36, "grad_norm": 0.5239911069788755, "learning_rate": 2.9740131544665748e-05, "loss": 0.0842, "step": 1630 }, { "epoch": 0.36, "grad_norm": 0.4960591161457972, "learning_rate": 2.9727702171121125e-05, "loss": 0.082, "step": 1631 }, { "epoch": 0.36, "grad_norm": 0.4905212900166773, "learning_rate": 2.9715267874326805e-05, "loss": 0.0901, "step": 1632 }, { "epoch": 0.36, "grad_norm": 0.46224708730131725, "learning_rate": 2.970282866057586e-05, "loss": 0.0767, "step": 1633 }, { "epoch": 0.36, "grad_norm": 0.5464459507312848, "learning_rate": 2.969038453616385e-05, "loss": 0.0953, "step": 1634 }, { "epoch": 0.36, "grad_norm": 0.4984873899167725, "learning_rate": 2.9677935507388817e-05, "loss": 0.0708, "step": 1635 }, { "epoch": 0.36, "grad_norm": 0.534677360835388, "learning_rate": 2.96654815805513e-05, "loss": 0.0912, "step": 1636 }, { "epoch": 0.36, "grad_norm": 0.4562672481150706, "learning_rate": 2.965302276195431e-05, "loss": 0.0751, "step": 1637 }, { "epoch": 0.36, "grad_norm": 0.37727624751445854, "learning_rate": 2.9640559057903325e-05, "loss": 0.0692, "step": 1638 }, { "epoch": 0.36, "grad_norm": 0.47634405851813993, "learning_rate": 2.9628090474706304e-05, "loss": 0.088, "step": 1639 }, { "epoch": 0.36, "grad_norm": 0.45687423356353307, "learning_rate": 2.9615617018673663e-05, "loss": 0.0689, "step": 1640 }, { "epoch": 0.36, "grad_norm": 0.4847616221641512, "learning_rate": 2.9603138696118315e-05, "loss": 0.0768, "step": 1641 }, { "epoch": 0.36, "grad_norm": 0.4417883982633577, "learning_rate": 2.9590655513355598e-05, "loss": 0.0727, "step": 1642 }, { "epoch": 0.36, "grad_norm": 0.44089015395594955, "learning_rate": 2.957816747670334e-05, "loss": 0.06, "step": 1643 }, { "epoch": 0.36, "grad_norm": 0.42681674855841306, "learning_rate": 2.956567459248181e-05, "loss": 0.0702, "step": 1644 }, { "epoch": 0.36, "grad_norm": 0.566858413689165, "learning_rate": 2.9553176867013714e-05, "loss": 0.102, "step": 1645 }, { "epoch": 0.36, "grad_norm": 0.5197223799349769, "learning_rate": 2.9540674306624262e-05, "loss": 0.0942, "step": 1646 }, { "epoch": 0.36, "grad_norm": 0.5280340692011758, "learning_rate": 2.9528166917641048e-05, "loss": 0.0882, "step": 1647 }, { "epoch": 0.36, "grad_norm": 0.5924065599923164, "learning_rate": 2.951565470639415e-05, "loss": 0.1015, "step": 1648 }, { "epoch": 0.36, "grad_norm": 0.5174930718552494, "learning_rate": 2.9503137679216073e-05, "loss": 0.0926, "step": 1649 }, { "epoch": 0.36, "grad_norm": 0.5359374699717699, "learning_rate": 2.9490615842441764e-05, "loss": 0.0907, "step": 1650 }, { "epoch": 0.36, "grad_norm": 0.4148972931125151, "learning_rate": 2.94780892024086e-05, "loss": 0.0677, "step": 1651 }, { "epoch": 0.36, "grad_norm": 0.5461000946942457, "learning_rate": 2.9465557765456387e-05, "loss": 0.0973, "step": 1652 }, { "epoch": 0.36, "grad_norm": 0.4813988529715445, "learning_rate": 2.9453021537927363e-05, "loss": 0.077, "step": 1653 }, { "epoch": 0.36, "grad_norm": 0.46848046182768466, "learning_rate": 2.9440480526166193e-05, "loss": 0.0686, "step": 1654 }, { "epoch": 0.36, "grad_norm": 0.5593676507989175, "learning_rate": 2.9427934736519962e-05, "loss": 0.0954, "step": 1655 }, { "epoch": 0.36, "grad_norm": 0.5000285200091341, "learning_rate": 2.9415384175338154e-05, "loss": 0.0729, "step": 1656 }, { "epoch": 0.36, "grad_norm": 0.4873686387291929, "learning_rate": 2.9402828848972706e-05, "loss": 0.0838, "step": 1657 }, { "epoch": 0.36, "grad_norm": 0.4687000724905962, "learning_rate": 2.9390268763777938e-05, "loss": 0.0793, "step": 1658 }, { "epoch": 0.36, "grad_norm": 0.47911341170825583, "learning_rate": 2.937770392611058e-05, "loss": 0.089, "step": 1659 }, { "epoch": 0.36, "grad_norm": 0.5289099148013381, "learning_rate": 2.9365134342329783e-05, "loss": 0.0908, "step": 1660 }, { "epoch": 0.36, "grad_norm": 0.5187240295343717, "learning_rate": 2.935256001879709e-05, "loss": 0.0853, "step": 1661 }, { "epoch": 0.37, "grad_norm": 0.5361045176781944, "learning_rate": 2.9339980961876434e-05, "loss": 0.1128, "step": 1662 }, { "epoch": 0.37, "grad_norm": 0.5844729876855916, "learning_rate": 2.932739717793416e-05, "loss": 0.0931, "step": 1663 }, { "epoch": 0.37, "grad_norm": 0.48204295918542, "learning_rate": 2.9314808673338997e-05, "loss": 0.0693, "step": 1664 }, { "epoch": 0.37, "grad_norm": 0.5227459566825613, "learning_rate": 2.9302215454462063e-05, "loss": 0.0818, "step": 1665 }, { "epoch": 0.37, "grad_norm": 0.49666751512139234, "learning_rate": 2.928961752767686e-05, "loss": 0.0805, "step": 1666 }, { "epoch": 0.37, "grad_norm": 0.5397645000579105, "learning_rate": 2.9277014899359284e-05, "loss": 0.0955, "step": 1667 }, { "epoch": 0.37, "grad_norm": 0.49132051250609765, "learning_rate": 2.92644075758876e-05, "loss": 0.0943, "step": 1668 }, { "epoch": 0.37, "grad_norm": 0.49830369498658783, "learning_rate": 2.9251795563642445e-05, "loss": 0.0933, "step": 1669 }, { "epoch": 0.37, "grad_norm": 0.4875517622048043, "learning_rate": 2.923917886900685e-05, "loss": 0.0717, "step": 1670 }, { "epoch": 0.37, "grad_norm": 0.45240061506285206, "learning_rate": 2.922655749836618e-05, "loss": 0.0757, "step": 1671 }, { "epoch": 0.37, "grad_norm": 0.4454040265973597, "learning_rate": 2.921393145810821e-05, "loss": 0.0709, "step": 1672 }, { "epoch": 0.37, "grad_norm": 0.511675650841848, "learning_rate": 2.9201300754623046e-05, "loss": 0.0962, "step": 1673 }, { "epoch": 0.37, "grad_norm": 0.5622649231419086, "learning_rate": 2.9188665394303163e-05, "loss": 0.1, "step": 1674 }, { "epoch": 0.37, "grad_norm": 0.5357272311190564, "learning_rate": 2.9176025383543395e-05, "loss": 0.0753, "step": 1675 }, { "epoch": 0.37, "grad_norm": 0.4992327167306387, "learning_rate": 2.916338072874093e-05, "loss": 0.0919, "step": 1676 }, { "epoch": 0.37, "grad_norm": 0.5875316043225938, "learning_rate": 2.915073143629531e-05, "loss": 0.111, "step": 1677 }, { "epoch": 0.37, "grad_norm": 0.5132310956027424, "learning_rate": 2.9138077512608417e-05, "loss": 0.0993, "step": 1678 }, { "epoch": 0.37, "grad_norm": 0.4525387029271844, "learning_rate": 2.9125418964084474e-05, "loss": 0.0912, "step": 1679 }, { "epoch": 0.37, "grad_norm": 0.5493623027701254, "learning_rate": 2.9112755797130052e-05, "loss": 0.1015, "step": 1680 }, { "epoch": 0.37, "grad_norm": 0.47496973493428624, "learning_rate": 2.910008801815406e-05, "loss": 0.0692, "step": 1681 }, { "epoch": 0.37, "grad_norm": 0.487745387722754, "learning_rate": 2.908741563356774e-05, "loss": 0.0816, "step": 1682 }, { "epoch": 0.37, "grad_norm": 0.5300960630503472, "learning_rate": 2.9074738649784665e-05, "loss": 0.0796, "step": 1683 }, { "epoch": 0.37, "grad_norm": 0.4775093397006017, "learning_rate": 2.9062057073220723e-05, "loss": 0.0837, "step": 1684 }, { "epoch": 0.37, "grad_norm": 0.4841079591075937, "learning_rate": 2.9049370910294143e-05, "loss": 0.0792, "step": 1685 }, { "epoch": 0.37, "grad_norm": 0.44417570809304496, "learning_rate": 2.9036680167425476e-05, "loss": 0.0658, "step": 1686 }, { "epoch": 0.37, "grad_norm": 0.5095910836808137, "learning_rate": 2.902398485103758e-05, "loss": 0.1016, "step": 1687 }, { "epoch": 0.37, "grad_norm": 0.4611040557981612, "learning_rate": 2.901128496755564e-05, "loss": 0.0745, "step": 1688 }, { "epoch": 0.37, "grad_norm": 0.5023244234107879, "learning_rate": 2.899858052340713e-05, "loss": 0.077, "step": 1689 }, { "epoch": 0.37, "grad_norm": 0.4784058581203675, "learning_rate": 2.8985871525021857e-05, "loss": 0.0832, "step": 1690 }, { "epoch": 0.37, "grad_norm": 0.5384177623104506, "learning_rate": 2.897315797883192e-05, "loss": 0.1033, "step": 1691 }, { "epoch": 0.37, "grad_norm": 0.4415949696551052, "learning_rate": 2.896043989127172e-05, "loss": 0.0798, "step": 1692 }, { "epoch": 0.37, "grad_norm": 0.46916304510129275, "learning_rate": 2.8947717268777968e-05, "loss": 0.0854, "step": 1693 }, { "epoch": 0.37, "grad_norm": 0.5489348718405892, "learning_rate": 2.8934990117789658e-05, "loss": 0.1041, "step": 1694 }, { "epoch": 0.37, "grad_norm": 0.4737551187122979, "learning_rate": 2.8922258444748074e-05, "loss": 0.0661, "step": 1695 }, { "epoch": 0.37, "grad_norm": 0.529193290906736, "learning_rate": 2.8909522256096795e-05, "loss": 0.0812, "step": 1696 }, { "epoch": 0.37, "grad_norm": 0.47969049521141716, "learning_rate": 2.8896781558281688e-05, "loss": 0.0744, "step": 1697 }, { "epoch": 0.37, "grad_norm": 0.4855578500331324, "learning_rate": 2.88840363577509e-05, "loss": 0.0651, "step": 1698 }, { "epoch": 0.37, "grad_norm": 0.5289464261954826, "learning_rate": 2.8871286660954846e-05, "loss": 0.0764, "step": 1699 }, { "epoch": 0.37, "grad_norm": 0.45181143211991037, "learning_rate": 2.8858532474346232e-05, "loss": 0.0728, "step": 1700 }, { "epoch": 0.37, "grad_norm": 0.48186171593767424, "learning_rate": 2.8845773804380028e-05, "loss": 0.0715, "step": 1701 }, { "epoch": 0.37, "grad_norm": 0.5211316176442896, "learning_rate": 2.883301065751348e-05, "loss": 0.1015, "step": 1702 }, { "epoch": 0.37, "grad_norm": 0.4493606509118241, "learning_rate": 2.882024304020609e-05, "loss": 0.0631, "step": 1703 }, { "epoch": 0.37, "grad_norm": 0.45422305871096313, "learning_rate": 2.8807470958919626e-05, "loss": 0.0674, "step": 1704 }, { "epoch": 0.37, "grad_norm": 0.4887822899310735, "learning_rate": 2.8794694420118117e-05, "loss": 0.0814, "step": 1705 }, { "epoch": 0.37, "grad_norm": 0.5160722238022565, "learning_rate": 2.8781913430267857e-05, "loss": 0.0777, "step": 1706 }, { "epoch": 0.37, "grad_norm": 0.5285768573530069, "learning_rate": 2.876912799583737e-05, "loss": 0.0714, "step": 1707 }, { "epoch": 0.38, "grad_norm": 0.6155821286682815, "learning_rate": 2.8756338123297455e-05, "loss": 0.1196, "step": 1708 }, { "epoch": 0.38, "grad_norm": 0.48713356943010355, "learning_rate": 2.8743543819121132e-05, "loss": 0.0688, "step": 1709 }, { "epoch": 0.38, "grad_norm": 0.5582549394781079, "learning_rate": 2.8730745089783686e-05, "loss": 0.0951, "step": 1710 }, { "epoch": 0.38, "grad_norm": 0.5635245846493944, "learning_rate": 2.871794194176263e-05, "loss": 0.1159, "step": 1711 }, { "epoch": 0.38, "grad_norm": 0.4709760578978905, "learning_rate": 2.8705134381537718e-05, "loss": 0.0635, "step": 1712 }, { "epoch": 0.38, "grad_norm": 0.5130103896057514, "learning_rate": 2.869232241559093e-05, "loss": 0.0764, "step": 1713 }, { "epoch": 0.38, "grad_norm": 0.4696199133462722, "learning_rate": 2.8679506050406475e-05, "loss": 0.0656, "step": 1714 }, { "epoch": 0.38, "grad_norm": 0.5931310187899642, "learning_rate": 2.8666685292470814e-05, "loss": 0.1018, "step": 1715 }, { "epoch": 0.38, "grad_norm": 0.5262454428214967, "learning_rate": 2.8653860148272596e-05, "loss": 0.0946, "step": 1716 }, { "epoch": 0.38, "grad_norm": 0.5290788996994328, "learning_rate": 2.8641030624302704e-05, "loss": 0.0929, "step": 1717 }, { "epoch": 0.38, "grad_norm": 0.4861553758570568, "learning_rate": 2.8628196727054244e-05, "loss": 0.0931, "step": 1718 }, { "epoch": 0.38, "grad_norm": 0.44621232790038645, "learning_rate": 2.8615358463022533e-05, "loss": 0.0729, "step": 1719 }, { "epoch": 0.38, "grad_norm": 0.4687295909253299, "learning_rate": 2.860251583870509e-05, "loss": 0.095, "step": 1720 }, { "epoch": 0.38, "grad_norm": 0.41306681848024107, "learning_rate": 2.8589668860601643e-05, "loss": 0.0783, "step": 1721 }, { "epoch": 0.38, "grad_norm": 0.4714748814598099, "learning_rate": 2.857681753521413e-05, "loss": 0.0862, "step": 1722 }, { "epoch": 0.38, "grad_norm": 0.47383239316472453, "learning_rate": 2.856396186904669e-05, "loss": 0.0586, "step": 1723 }, { "epoch": 0.38, "grad_norm": 0.49578643651846827, "learning_rate": 2.8551101868605644e-05, "loss": 0.081, "step": 1724 }, { "epoch": 0.38, "grad_norm": 0.5084300467725035, "learning_rate": 2.8538237540399528e-05, "loss": 0.0899, "step": 1725 }, { "epoch": 0.38, "grad_norm": 0.5145259762148203, "learning_rate": 2.8525368890939055e-05, "loss": 0.1061, "step": 1726 }, { "epoch": 0.38, "grad_norm": 0.4566929861417621, "learning_rate": 2.851249592673712e-05, "loss": 0.0661, "step": 1727 }, { "epoch": 0.38, "grad_norm": 0.5720207810244956, "learning_rate": 2.8499618654308815e-05, "loss": 0.1057, "step": 1728 }, { "epoch": 0.38, "grad_norm": 0.5089245284548529, "learning_rate": 2.8486737080171405e-05, "loss": 0.0875, "step": 1729 }, { "epoch": 0.38, "grad_norm": 0.4464601818882488, "learning_rate": 2.847385121084434e-05, "loss": 0.059, "step": 1730 }, { "epoch": 0.38, "grad_norm": 0.47582491093389556, "learning_rate": 2.8460961052849222e-05, "loss": 0.0879, "step": 1731 }, { "epoch": 0.38, "grad_norm": 0.426657448845374, "learning_rate": 2.8448066612709854e-05, "loss": 0.0684, "step": 1732 }, { "epoch": 0.38, "grad_norm": 0.44690866676969737, "learning_rate": 2.843516789695219e-05, "loss": 0.0584, "step": 1733 }, { "epoch": 0.38, "grad_norm": 0.5096894422876526, "learning_rate": 2.842226491210434e-05, "loss": 0.0747, "step": 1734 }, { "epoch": 0.38, "grad_norm": 0.4899342442689138, "learning_rate": 2.8409357664696585e-05, "loss": 0.0949, "step": 1735 }, { "epoch": 0.38, "grad_norm": 0.5672997103580923, "learning_rate": 2.8396446161261372e-05, "loss": 0.0891, "step": 1736 }, { "epoch": 0.38, "grad_norm": 0.5439526542017444, "learning_rate": 2.8383530408333285e-05, "loss": 0.0949, "step": 1737 }, { "epoch": 0.38, "grad_norm": 0.48880725756917676, "learning_rate": 2.8370610412449066e-05, "loss": 0.0777, "step": 1738 }, { "epoch": 0.38, "grad_norm": 0.46023548137096015, "learning_rate": 2.8357686180147604e-05, "loss": 0.0708, "step": 1739 }, { "epoch": 0.38, "grad_norm": 0.44760904912110805, "learning_rate": 2.834475771796993e-05, "loss": 0.0571, "step": 1740 }, { "epoch": 0.38, "grad_norm": 0.5765089956129296, "learning_rate": 2.8331825032459228e-05, "loss": 0.1086, "step": 1741 }, { "epoch": 0.38, "grad_norm": 0.44519501525006766, "learning_rate": 2.8318888130160796e-05, "loss": 0.076, "step": 1742 }, { "epoch": 0.38, "grad_norm": 0.4456507586007182, "learning_rate": 2.830594701762209e-05, "loss": 0.0683, "step": 1743 }, { "epoch": 0.38, "grad_norm": 0.4807542557401542, "learning_rate": 2.8293001701392677e-05, "loss": 0.0814, "step": 1744 }, { "epoch": 0.38, "grad_norm": 0.46088174709060936, "learning_rate": 2.828005218802427e-05, "loss": 0.0681, "step": 1745 }, { "epoch": 0.38, "grad_norm": 0.48053604523088983, "learning_rate": 2.8267098484070693e-05, "loss": 0.0894, "step": 1746 }, { "epoch": 0.38, "grad_norm": 0.6099883440733267, "learning_rate": 2.8254140596087897e-05, "loss": 0.1019, "step": 1747 }, { "epoch": 0.38, "grad_norm": 0.4979950821817546, "learning_rate": 2.8241178530633947e-05, "loss": 0.099, "step": 1748 }, { "epoch": 0.38, "grad_norm": 0.5786325700981236, "learning_rate": 2.822821229426902e-05, "loss": 0.1113, "step": 1749 }, { "epoch": 0.38, "grad_norm": 0.5114477182388685, "learning_rate": 2.8215241893555415e-05, "loss": 0.0924, "step": 1750 }, { "epoch": 0.38, "grad_norm": 0.6553502044963802, "learning_rate": 2.8202267335057522e-05, "loss": 0.1022, "step": 1751 }, { "epoch": 0.38, "grad_norm": 0.44788021856062976, "learning_rate": 2.818928862534185e-05, "loss": 0.0545, "step": 1752 }, { "epoch": 0.39, "grad_norm": 0.5089788979669672, "learning_rate": 2.817630577097701e-05, "loss": 0.0936, "step": 1753 }, { "epoch": 0.39, "grad_norm": 0.5125735183646032, "learning_rate": 2.8163318778533692e-05, "loss": 0.0874, "step": 1754 }, { "epoch": 0.39, "grad_norm": 0.47712190033503193, "learning_rate": 2.81503276545847e-05, "loss": 0.0893, "step": 1755 }, { "epoch": 0.39, "grad_norm": 0.4760484424362048, "learning_rate": 2.8137332405704922e-05, "loss": 0.0736, "step": 1756 }, { "epoch": 0.39, "grad_norm": 0.4860070942294226, "learning_rate": 2.812433303847133e-05, "loss": 0.083, "step": 1757 }, { "epoch": 0.39, "grad_norm": 0.4364222125430108, "learning_rate": 2.811132955946298e-05, "loss": 0.0692, "step": 1758 }, { "epoch": 0.39, "grad_norm": 0.476793749000669, "learning_rate": 2.8098321975261026e-05, "loss": 0.071, "step": 1759 }, { "epoch": 0.39, "grad_norm": 0.4861955576919296, "learning_rate": 2.8085310292448666e-05, "loss": 0.0799, "step": 1760 }, { "epoch": 0.39, "grad_norm": 0.541403703559733, "learning_rate": 2.8072294517611208e-05, "loss": 0.094, "step": 1761 }, { "epoch": 0.39, "grad_norm": 0.4389854315999451, "learning_rate": 2.805927465733601e-05, "loss": 0.0738, "step": 1762 }, { "epoch": 0.39, "grad_norm": 0.4030064060918374, "learning_rate": 2.8046250718212507e-05, "loss": 0.0583, "step": 1763 }, { "epoch": 0.39, "grad_norm": 0.4475027397007963, "learning_rate": 2.8033222706832187e-05, "loss": 0.0786, "step": 1764 }, { "epoch": 0.39, "grad_norm": 0.5462551418734595, "learning_rate": 2.802019062978861e-05, "loss": 0.0936, "step": 1765 }, { "epoch": 0.39, "grad_norm": 0.45795786896413304, "learning_rate": 2.80071544936774e-05, "loss": 0.0723, "step": 1766 }, { "epoch": 0.39, "grad_norm": 0.4787774284563339, "learning_rate": 2.7994114305096208e-05, "loss": 0.0738, "step": 1767 }, { "epoch": 0.39, "grad_norm": 0.47482042368927674, "learning_rate": 2.7981070070644764e-05, "loss": 0.0885, "step": 1768 }, { "epoch": 0.39, "grad_norm": 0.512290693201746, "learning_rate": 2.7968021796924834e-05, "loss": 0.1083, "step": 1769 }, { "epoch": 0.39, "grad_norm": 0.5731261621710955, "learning_rate": 2.7954969490540223e-05, "loss": 0.0942, "step": 1770 }, { "epoch": 0.39, "grad_norm": 0.4821992429933812, "learning_rate": 2.7941913158096792e-05, "loss": 0.0758, "step": 1771 }, { "epoch": 0.39, "grad_norm": 0.44004913602999246, "learning_rate": 2.7928852806202424e-05, "loss": 0.0578, "step": 1772 }, { "epoch": 0.39, "grad_norm": 0.4511290690935369, "learning_rate": 2.7915788441467052e-05, "loss": 0.0722, "step": 1773 }, { "epoch": 0.39, "grad_norm": 0.5211129169284835, "learning_rate": 2.790272007050262e-05, "loss": 0.0794, "step": 1774 }, { "epoch": 0.39, "grad_norm": 0.4790374868523092, "learning_rate": 2.7889647699923114e-05, "loss": 0.0806, "step": 1775 }, { "epoch": 0.39, "grad_norm": 0.4536289832412529, "learning_rate": 2.7876571336344546e-05, "loss": 0.0626, "step": 1776 }, { "epoch": 0.39, "grad_norm": 0.4069042439652636, "learning_rate": 2.7863490986384945e-05, "loss": 0.0739, "step": 1777 }, { "epoch": 0.39, "grad_norm": 0.4723581682503797, "learning_rate": 2.7850406656664346e-05, "loss": 0.0899, "step": 1778 }, { "epoch": 0.39, "grad_norm": 0.5206130388403967, "learning_rate": 2.783731835380482e-05, "loss": 0.0877, "step": 1779 }, { "epoch": 0.39, "grad_norm": 0.4326390418050192, "learning_rate": 2.782422608443043e-05, "loss": 0.0569, "step": 1780 }, { "epoch": 0.39, "grad_norm": 0.4592044870680303, "learning_rate": 2.781112985516725e-05, "loss": 0.0694, "step": 1781 }, { "epoch": 0.39, "grad_norm": 0.5556496080285102, "learning_rate": 2.7798029672643375e-05, "loss": 0.0923, "step": 1782 }, { "epoch": 0.39, "grad_norm": 0.5149313856452441, "learning_rate": 2.778492554348887e-05, "loss": 0.0843, "step": 1783 }, { "epoch": 0.39, "grad_norm": 0.5557231100718248, "learning_rate": 2.7771817474335835e-05, "loss": 0.09, "step": 1784 }, { "epoch": 0.39, "grad_norm": 0.4594406943803711, "learning_rate": 2.7758705471818327e-05, "loss": 0.0775, "step": 1785 }, { "epoch": 0.39, "grad_norm": 0.4263839655470079, "learning_rate": 2.7745589542572424e-05, "loss": 0.0609, "step": 1786 }, { "epoch": 0.39, "grad_norm": 0.43457029535529285, "learning_rate": 2.7732469693236166e-05, "loss": 0.0683, "step": 1787 }, { "epoch": 0.39, "grad_norm": 0.549615261585891, "learning_rate": 2.77193459304496e-05, "loss": 0.0943, "step": 1788 }, { "epoch": 0.39, "grad_norm": 0.4156646680903677, "learning_rate": 2.7706218260854738e-05, "loss": 0.0583, "step": 1789 }, { "epoch": 0.39, "grad_norm": 0.4454491221674145, "learning_rate": 2.7693086691095573e-05, "loss": 0.0791, "step": 1790 }, { "epoch": 0.39, "grad_norm": 0.4934414527256401, "learning_rate": 2.767995122781807e-05, "loss": 0.0768, "step": 1791 }, { "epoch": 0.39, "grad_norm": 0.47652166222870884, "learning_rate": 2.7666811877670177e-05, "loss": 0.0723, "step": 1792 }, { "epoch": 0.39, "grad_norm": 0.4011121621863688, "learning_rate": 2.7653668647301797e-05, "loss": 0.0605, "step": 1793 }, { "epoch": 0.39, "grad_norm": 0.4426381298575097, "learning_rate": 2.7640521543364797e-05, "loss": 0.0705, "step": 1794 }, { "epoch": 0.39, "grad_norm": 0.5711811713692367, "learning_rate": 2.7627370572513005e-05, "loss": 0.0787, "step": 1795 }, { "epoch": 0.39, "grad_norm": 0.5513816158664855, "learning_rate": 2.7614215741402204e-05, "loss": 0.0921, "step": 1796 }, { "epoch": 0.39, "grad_norm": 0.4770801109582723, "learning_rate": 2.7601057056690148e-05, "loss": 0.0572, "step": 1797 }, { "epoch": 0.39, "grad_norm": 0.4465907973766518, "learning_rate": 2.7587894525036517e-05, "loss": 0.0676, "step": 1798 }, { "epoch": 0.4, "grad_norm": 0.43528794602088433, "learning_rate": 2.7574728153102956e-05, "loss": 0.064, "step": 1799 }, { "epoch": 0.4, "grad_norm": 0.45708921668069113, "learning_rate": 2.7561557947553037e-05, "loss": 0.0721, "step": 1800 }, { "epoch": 0.4, "grad_norm": 0.49661595351522414, "learning_rate": 2.7548383915052287e-05, "loss": 0.0833, "step": 1801 }, { "epoch": 0.4, "grad_norm": 0.4647576709194282, "learning_rate": 2.7535206062268174e-05, "loss": 0.0874, "step": 1802 }, { "epoch": 0.4, "grad_norm": 0.4758847529637113, "learning_rate": 2.7522024395870075e-05, "loss": 0.073, "step": 1803 }, { "epoch": 0.4, "grad_norm": 0.5512991289922461, "learning_rate": 2.7508838922529316e-05, "loss": 0.0728, "step": 1804 }, { "epoch": 0.4, "grad_norm": 0.45794397490796895, "learning_rate": 2.7495649648919153e-05, "loss": 0.082, "step": 1805 }, { "epoch": 0.4, "grad_norm": 0.5781542280902109, "learning_rate": 2.7482456581714757e-05, "loss": 0.1036, "step": 1806 }, { "epoch": 0.4, "grad_norm": 0.5296405647661102, "learning_rate": 2.7469259727593213e-05, "loss": 0.0862, "step": 1807 }, { "epoch": 0.4, "grad_norm": 0.43297235105373694, "learning_rate": 2.7456059093233537e-05, "loss": 0.0723, "step": 1808 }, { "epoch": 0.4, "grad_norm": 0.47428964905001236, "learning_rate": 2.7442854685316643e-05, "loss": 0.082, "step": 1809 }, { "epoch": 0.4, "grad_norm": 0.45791286631225797, "learning_rate": 2.7429646510525373e-05, "loss": 0.0568, "step": 1810 }, { "epoch": 0.4, "grad_norm": 0.442750554782858, "learning_rate": 2.7416434575544455e-05, "loss": 0.0675, "step": 1811 }, { "epoch": 0.4, "grad_norm": 0.47779904642868215, "learning_rate": 2.7403218887060538e-05, "loss": 0.0828, "step": 1812 }, { "epoch": 0.4, "grad_norm": 0.4937883108703817, "learning_rate": 2.738999945176215e-05, "loss": 0.0599, "step": 1813 }, { "epoch": 0.4, "grad_norm": 0.46984544546305385, "learning_rate": 2.7376776276339745e-05, "loss": 0.0701, "step": 1814 }, { "epoch": 0.4, "grad_norm": 0.4381758034347087, "learning_rate": 2.7363549367485648e-05, "loss": 0.0646, "step": 1815 }, { "epoch": 0.4, "grad_norm": 0.46041729230823847, "learning_rate": 2.7350318731894075e-05, "loss": 0.0812, "step": 1816 }, { "epoch": 0.4, "grad_norm": 0.4205301713824055, "learning_rate": 2.7337084376261135e-05, "loss": 0.0705, "step": 1817 }, { "epoch": 0.4, "grad_norm": 0.4711574720111425, "learning_rate": 2.7323846307284814e-05, "loss": 0.0731, "step": 1818 }, { "epoch": 0.4, "grad_norm": 0.46499916491894955, "learning_rate": 2.7310604531664983e-05, "loss": 0.0679, "step": 1819 }, { "epoch": 0.4, "grad_norm": 0.4927366614179062, "learning_rate": 2.7297359056103378e-05, "loss": 0.085, "step": 1820 }, { "epoch": 0.4, "grad_norm": 0.45650688477575435, "learning_rate": 2.7284109887303628e-05, "loss": 0.071, "step": 1821 }, { "epoch": 0.4, "grad_norm": 0.5742329446504342, "learning_rate": 2.7270857031971203e-05, "loss": 0.1062, "step": 1822 }, { "epoch": 0.4, "grad_norm": 0.531793288511195, "learning_rate": 2.7257600496813475e-05, "loss": 0.0912, "step": 1823 }, { "epoch": 0.4, "grad_norm": 0.502106461315447, "learning_rate": 2.7244340288539638e-05, "loss": 0.0726, "step": 1824 }, { "epoch": 0.4, "grad_norm": 0.5183084194087579, "learning_rate": 2.7231076413860774e-05, "loss": 0.0635, "step": 1825 }, { "epoch": 0.4, "grad_norm": 0.45798770559459295, "learning_rate": 2.721780887948981e-05, "loss": 0.0539, "step": 1826 }, { "epoch": 0.4, "grad_norm": 0.4696202623789847, "learning_rate": 2.7204537692141526e-05, "loss": 0.0771, "step": 1827 }, { "epoch": 0.4, "grad_norm": 0.43115573073857216, "learning_rate": 2.7191262858532552e-05, "loss": 0.0682, "step": 1828 }, { "epoch": 0.4, "grad_norm": 0.49340640619357506, "learning_rate": 2.7177984385381366e-05, "loss": 0.0853, "step": 1829 }, { "epoch": 0.4, "grad_norm": 0.47025112752865444, "learning_rate": 2.7164702279408275e-05, "loss": 0.0615, "step": 1830 }, { "epoch": 0.4, "grad_norm": 0.4015113404633209, "learning_rate": 2.715141654733544e-05, "loss": 0.0517, "step": 1831 }, { "epoch": 0.4, "grad_norm": 0.5527488248304472, "learning_rate": 2.7138127195886856e-05, "loss": 0.0959, "step": 1832 }, { "epoch": 0.4, "grad_norm": 0.4134978456925406, "learning_rate": 2.712483423178834e-05, "loss": 0.0639, "step": 1833 }, { "epoch": 0.4, "grad_norm": 0.38267705428554666, "learning_rate": 2.7111537661767537e-05, "loss": 0.0503, "step": 1834 }, { "epoch": 0.4, "grad_norm": 0.502958604296117, "learning_rate": 2.7098237492553937e-05, "loss": 0.0683, "step": 1835 }, { "epoch": 0.4, "grad_norm": 0.4738071710618732, "learning_rate": 2.7084933730878824e-05, "loss": 0.0812, "step": 1836 }, { "epoch": 0.4, "grad_norm": 0.5045485432184268, "learning_rate": 2.7071626383475327e-05, "loss": 0.0827, "step": 1837 }, { "epoch": 0.4, "grad_norm": 0.4528071340923516, "learning_rate": 2.7058315457078358e-05, "loss": 0.0705, "step": 1838 }, { "epoch": 0.4, "grad_norm": 0.4197753118195681, "learning_rate": 2.7045000958424674e-05, "loss": 0.0679, "step": 1839 }, { "epoch": 0.4, "grad_norm": 0.5344421188410078, "learning_rate": 2.7031682894252816e-05, "loss": 0.0901, "step": 1840 }, { "epoch": 0.4, "grad_norm": 0.48017108039036727, "learning_rate": 2.701836127130314e-05, "loss": 0.0642, "step": 1841 }, { "epoch": 0.4, "grad_norm": 0.4774242801446497, "learning_rate": 2.7005036096317802e-05, "loss": 0.0709, "step": 1842 }, { "epoch": 0.4, "grad_norm": 0.4355363374128159, "learning_rate": 2.6991707376040755e-05, "loss": 0.0642, "step": 1843 }, { "epoch": 0.41, "grad_norm": 0.5430117802755907, "learning_rate": 2.6978375117217743e-05, "loss": 0.0812, "step": 1844 }, { "epoch": 0.41, "grad_norm": 0.44822825823647566, "learning_rate": 2.696503932659631e-05, "loss": 0.0683, "step": 1845 }, { "epoch": 0.41, "grad_norm": 0.44747005604877643, "learning_rate": 2.6951700010925774e-05, "loss": 0.0627, "step": 1846 }, { "epoch": 0.41, "grad_norm": 0.6233900076002594, "learning_rate": 2.6938357176957243e-05, "loss": 0.0998, "step": 1847 }, { "epoch": 0.41, "grad_norm": 0.5089348933656639, "learning_rate": 2.692501083144362e-05, "loss": 0.0581, "step": 1848 }, { "epoch": 0.41, "grad_norm": 0.4220702284742675, "learning_rate": 2.6911660981139563e-05, "loss": 0.0589, "step": 1849 }, { "epoch": 0.41, "grad_norm": 0.4690353796410688, "learning_rate": 2.6898307632801515e-05, "loss": 0.0624, "step": 1850 }, { "epoch": 0.41, "grad_norm": 0.4031684164575929, "learning_rate": 2.6884950793187684e-05, "loss": 0.0523, "step": 1851 }, { "epoch": 0.41, "grad_norm": 0.4621623301535883, "learning_rate": 2.6871590469058052e-05, "loss": 0.0745, "step": 1852 }, { "epoch": 0.41, "grad_norm": 0.46212234801320057, "learning_rate": 2.6858226667174362e-05, "loss": 0.0803, "step": 1853 }, { "epoch": 0.41, "grad_norm": 0.4780853049312022, "learning_rate": 2.684485939430011e-05, "loss": 0.0621, "step": 1854 }, { "epoch": 0.41, "grad_norm": 0.5359598516579986, "learning_rate": 2.683148865720056e-05, "loss": 0.1006, "step": 1855 }, { "epoch": 0.41, "grad_norm": 0.3983820708371192, "learning_rate": 2.6818114462642726e-05, "loss": 0.0472, "step": 1856 }, { "epoch": 0.41, "grad_norm": 0.4445445650382663, "learning_rate": 2.6804736817395362e-05, "loss": 0.0584, "step": 1857 }, { "epoch": 0.41, "grad_norm": 0.5937913830038436, "learning_rate": 2.6791355728228986e-05, "loss": 0.1188, "step": 1858 }, { "epoch": 0.41, "grad_norm": 0.49584486117172055, "learning_rate": 2.6777971201915843e-05, "loss": 0.0693, "step": 1859 }, { "epoch": 0.41, "grad_norm": 0.5385538271988911, "learning_rate": 2.676458324522992e-05, "loss": 0.0734, "step": 1860 }, { "epoch": 0.41, "grad_norm": 0.5635618148652323, "learning_rate": 2.675119186494696e-05, "loss": 0.0983, "step": 1861 }, { "epoch": 0.41, "grad_norm": 0.48687003097642834, "learning_rate": 2.6737797067844403e-05, "loss": 0.068, "step": 1862 }, { "epoch": 0.41, "grad_norm": 0.5590274434322308, "learning_rate": 2.6724398860701453e-05, "loss": 0.0995, "step": 1863 }, { "epoch": 0.41, "grad_norm": 0.4434149114972751, "learning_rate": 2.6710997250299012e-05, "loss": 0.0634, "step": 1864 }, { "epoch": 0.41, "grad_norm": 0.41477458829925434, "learning_rate": 2.6697592243419723e-05, "loss": 0.0588, "step": 1865 }, { "epoch": 0.41, "grad_norm": 0.41729900141627835, "learning_rate": 2.668418384684795e-05, "loss": 0.0632, "step": 1866 }, { "epoch": 0.41, "grad_norm": 0.4770965866541249, "learning_rate": 2.6670772067369754e-05, "loss": 0.0783, "step": 1867 }, { "epoch": 0.41, "grad_norm": 0.4641929366087564, "learning_rate": 2.6657356911772922e-05, "loss": 0.0657, "step": 1868 }, { "epoch": 0.41, "grad_norm": 0.38559123115511384, "learning_rate": 2.6643938386846945e-05, "loss": 0.0578, "step": 1869 }, { "epoch": 0.41, "grad_norm": 0.47675891643241647, "learning_rate": 2.663051649938303e-05, "loss": 0.0861, "step": 1870 }, { "epoch": 0.41, "grad_norm": 0.4886254585109774, "learning_rate": 2.6617091256174058e-05, "loss": 0.0805, "step": 1871 }, { "epoch": 0.41, "grad_norm": 0.45498374647935974, "learning_rate": 2.6603662664014644e-05, "loss": 0.08, "step": 1872 }, { "epoch": 0.41, "grad_norm": 0.4548434360441476, "learning_rate": 2.659023072970107e-05, "loss": 0.0832, "step": 1873 }, { "epoch": 0.41, "grad_norm": 0.4640783853091864, "learning_rate": 2.657679546003133e-05, "loss": 0.0824, "step": 1874 }, { "epoch": 0.41, "grad_norm": 0.44275094182535174, "learning_rate": 2.656335686180509e-05, "loss": 0.0513, "step": 1875 }, { "epoch": 0.41, "grad_norm": 0.4445752851132524, "learning_rate": 2.6549914941823713e-05, "loss": 0.0763, "step": 1876 }, { "epoch": 0.41, "grad_norm": 0.4123212080066735, "learning_rate": 2.6536469706890226e-05, "loss": 0.084, "step": 1877 }, { "epoch": 0.41, "grad_norm": 0.5086670654238233, "learning_rate": 2.652302116380935e-05, "loss": 0.087, "step": 1878 }, { "epoch": 0.41, "grad_norm": 0.48448247640076997, "learning_rate": 2.6509569319387477e-05, "loss": 0.0784, "step": 1879 }, { "epoch": 0.41, "grad_norm": 0.4387107172929702, "learning_rate": 2.6496114180432672e-05, "loss": 0.062, "step": 1880 }, { "epoch": 0.41, "grad_norm": 0.4821219496508101, "learning_rate": 2.6482655753754657e-05, "loss": 0.0841, "step": 1881 }, { "epoch": 0.41, "grad_norm": 0.5209558215749556, "learning_rate": 2.6469194046164818e-05, "loss": 0.0942, "step": 1882 }, { "epoch": 0.41, "grad_norm": 0.40424438725563716, "learning_rate": 2.6455729064476227e-05, "loss": 0.0701, "step": 1883 }, { "epoch": 0.41, "grad_norm": 0.4744976367419273, "learning_rate": 2.6442260815503575e-05, "loss": 0.0695, "step": 1884 }, { "epoch": 0.41, "grad_norm": 0.45511068792452036, "learning_rate": 2.6428789306063233e-05, "loss": 0.0689, "step": 1885 }, { "epoch": 0.41, "grad_norm": 0.39648076699405155, "learning_rate": 2.6415314542973214e-05, "loss": 0.0569, "step": 1886 }, { "epoch": 0.41, "grad_norm": 0.39929021953304733, "learning_rate": 2.6401836533053186e-05, "loss": 0.0633, "step": 1887 }, { "epoch": 0.41, "grad_norm": 0.5730543722482679, "learning_rate": 2.6388355283124435e-05, "loss": 0.0875, "step": 1888 }, { "epoch": 0.41, "grad_norm": 0.47096256371662787, "learning_rate": 2.637487080000992e-05, "loss": 0.0705, "step": 1889 }, { "epoch": 0.42, "grad_norm": 0.4661904390045287, "learning_rate": 2.636138309053421e-05, "loss": 0.0533, "step": 1890 }, { "epoch": 0.42, "grad_norm": 0.503316933883678, "learning_rate": 2.634789216152353e-05, "loss": 0.07, "step": 1891 }, { "epoch": 0.42, "grad_norm": 0.5061427615048223, "learning_rate": 2.63343980198057e-05, "loss": 0.0905, "step": 1892 }, { "epoch": 0.42, "grad_norm": 0.4891768824481601, "learning_rate": 2.6320900672210216e-05, "loss": 0.0663, "step": 1893 }, { "epoch": 0.42, "grad_norm": 0.46323637785237165, "learning_rate": 2.6307400125568147e-05, "loss": 0.0685, "step": 1894 }, { "epoch": 0.42, "grad_norm": 0.5383597506348967, "learning_rate": 2.629389638671221e-05, "loss": 0.0838, "step": 1895 }, { "epoch": 0.42, "grad_norm": 0.47678387299932706, "learning_rate": 2.6280389462476733e-05, "loss": 0.0821, "step": 1896 }, { "epoch": 0.42, "grad_norm": 0.46968283023403673, "learning_rate": 2.6266879359697647e-05, "loss": 0.0678, "step": 1897 }, { "epoch": 0.42, "grad_norm": 0.4568098298074498, "learning_rate": 2.6253366085212503e-05, "loss": 0.075, "step": 1898 }, { "epoch": 0.42, "grad_norm": 0.4514430333537384, "learning_rate": 2.6239849645860447e-05, "loss": 0.0744, "step": 1899 }, { "epoch": 0.42, "grad_norm": 0.44452307815951564, "learning_rate": 2.6226330048482233e-05, "loss": 0.0718, "step": 1900 }, { "epoch": 0.42, "grad_norm": 0.4951100245452337, "learning_rate": 2.6212807299920218e-05, "loss": 0.0713, "step": 1901 }, { "epoch": 0.42, "grad_norm": 0.4672619725199249, "learning_rate": 2.6199281407018338e-05, "loss": 0.0758, "step": 1902 }, { "epoch": 0.42, "grad_norm": 0.35243519444327964, "learning_rate": 2.618575237662214e-05, "loss": 0.0512, "step": 1903 }, { "epoch": 0.42, "grad_norm": 0.40736542307882356, "learning_rate": 2.6172220215578743e-05, "loss": 0.0544, "step": 1904 }, { "epoch": 0.42, "grad_norm": 0.4704904194392552, "learning_rate": 2.615868493073686e-05, "loss": 0.0806, "step": 1905 }, { "epoch": 0.42, "grad_norm": 0.36351862709614324, "learning_rate": 2.614514652894678e-05, "loss": 0.0582, "step": 1906 }, { "epoch": 0.42, "grad_norm": 0.4428274181493983, "learning_rate": 2.613160501706037e-05, "loss": 0.062, "step": 1907 }, { "epoch": 0.42, "grad_norm": 0.37773973289563717, "learning_rate": 2.6118060401931073e-05, "loss": 0.057, "step": 1908 }, { "epoch": 0.42, "grad_norm": 0.4885177039781334, "learning_rate": 2.6104512690413906e-05, "loss": 0.0975, "step": 1909 }, { "epoch": 0.42, "grad_norm": 0.49160308049558293, "learning_rate": 2.609096188936544e-05, "loss": 0.0669, "step": 1910 }, { "epoch": 0.42, "grad_norm": 0.4757599091402448, "learning_rate": 2.607740800564383e-05, "loss": 0.0663, "step": 1911 }, { "epoch": 0.42, "grad_norm": 0.4879069304967567, "learning_rate": 2.6063851046108766e-05, "loss": 0.0749, "step": 1912 }, { "epoch": 0.42, "grad_norm": 0.6329272601773189, "learning_rate": 2.605029101762152e-05, "loss": 0.1222, "step": 1913 }, { "epoch": 0.42, "grad_norm": 0.4395667713293501, "learning_rate": 2.6036727927044897e-05, "loss": 0.0693, "step": 1914 }, { "epoch": 0.42, "grad_norm": 0.42974012226050445, "learning_rate": 2.602316178124327e-05, "loss": 0.0624, "step": 1915 }, { "epoch": 0.42, "grad_norm": 0.5026687685967235, "learning_rate": 2.6009592587082538e-05, "loss": 0.0715, "step": 1916 }, { "epoch": 0.42, "grad_norm": 0.4787859406190129, "learning_rate": 2.5996020351430163e-05, "loss": 0.0772, "step": 1917 }, { "epoch": 0.42, "grad_norm": 0.49640720707144015, "learning_rate": 2.598244508115513e-05, "loss": 0.0869, "step": 1918 }, { "epoch": 0.42, "grad_norm": 0.44704691774444677, "learning_rate": 2.596886678312797e-05, "loss": 0.0717, "step": 1919 }, { "epoch": 0.42, "grad_norm": 0.566518259483125, "learning_rate": 2.5955285464220738e-05, "loss": 0.1036, "step": 1920 }, { "epoch": 0.42, "grad_norm": 0.49886906395355324, "learning_rate": 2.594170113130703e-05, "loss": 0.0717, "step": 1921 }, { "epoch": 0.42, "grad_norm": 0.3631885369239595, "learning_rate": 2.5928113791261952e-05, "loss": 0.0446, "step": 1922 }, { "epoch": 0.42, "grad_norm": 0.42634171162088436, "learning_rate": 2.5914523450962147e-05, "loss": 0.0643, "step": 1923 }, { "epoch": 0.42, "grad_norm": 0.49661806889154125, "learning_rate": 2.590093011728577e-05, "loss": 0.0865, "step": 1924 }, { "epoch": 0.42, "grad_norm": 0.37300980468627076, "learning_rate": 2.588733379711248e-05, "loss": 0.0465, "step": 1925 }, { "epoch": 0.42, "grad_norm": 0.41554669022488777, "learning_rate": 2.587373449732347e-05, "loss": 0.051, "step": 1926 }, { "epoch": 0.42, "grad_norm": 0.45649580278378604, "learning_rate": 2.5860132224801424e-05, "loss": 0.0712, "step": 1927 }, { "epoch": 0.42, "grad_norm": 0.40900949293536104, "learning_rate": 2.584652698643054e-05, "loss": 0.0591, "step": 1928 }, { "epoch": 0.42, "grad_norm": 0.3939289367631103, "learning_rate": 2.58329187890965e-05, "loss": 0.0472, "step": 1929 }, { "epoch": 0.42, "grad_norm": 0.46501062106749136, "learning_rate": 2.581930763968651e-05, "loss": 0.0741, "step": 1930 }, { "epoch": 0.42, "grad_norm": 0.4760442792819372, "learning_rate": 2.580569354508925e-05, "loss": 0.0687, "step": 1931 }, { "epoch": 0.42, "grad_norm": 0.4572477561474321, "learning_rate": 2.5792076512194895e-05, "loss": 0.075, "step": 1932 }, { "epoch": 0.42, "grad_norm": 0.5661992714329125, "learning_rate": 2.5778456547895117e-05, "loss": 0.0905, "step": 1933 }, { "epoch": 0.42, "grad_norm": 0.4468749226389998, "learning_rate": 2.5764833659083053e-05, "loss": 0.0656, "step": 1934 }, { "epoch": 0.42, "grad_norm": 0.5459863800019874, "learning_rate": 2.5751207852653334e-05, "loss": 0.1169, "step": 1935 }, { "epoch": 0.43, "grad_norm": 0.4620661092127289, "learning_rate": 2.5737579135502068e-05, "loss": 0.0691, "step": 1936 }, { "epoch": 0.43, "grad_norm": 0.47962413766985756, "learning_rate": 2.572394751452683e-05, "loss": 0.0738, "step": 1937 }, { "epoch": 0.43, "grad_norm": 0.4390047116787088, "learning_rate": 2.5710312996626667e-05, "loss": 0.0751, "step": 1938 }, { "epoch": 0.43, "grad_norm": 0.4369903218236427, "learning_rate": 2.569667558870209e-05, "loss": 0.0652, "step": 1939 }, { "epoch": 0.43, "grad_norm": 0.45743852384713024, "learning_rate": 2.5683035297655076e-05, "loss": 0.075, "step": 1940 }, { "epoch": 0.43, "grad_norm": 0.4280689098342205, "learning_rate": 2.566939213038906e-05, "loss": 0.0711, "step": 1941 }, { "epoch": 0.43, "grad_norm": 0.46608190968404795, "learning_rate": 2.5655746093808934e-05, "loss": 0.0796, "step": 1942 }, { "epoch": 0.43, "grad_norm": 0.440689855902259, "learning_rate": 2.564209719482104e-05, "loss": 0.0509, "step": 1943 }, { "epoch": 0.43, "grad_norm": 0.47352029309397264, "learning_rate": 2.5628445440333164e-05, "loss": 0.0917, "step": 1944 }, { "epoch": 0.43, "grad_norm": 0.44388221799202127, "learning_rate": 2.5614790837254555e-05, "loss": 0.0565, "step": 1945 }, { "epoch": 0.43, "grad_norm": 0.4410939757738066, "learning_rate": 2.5601133392495886e-05, "loss": 0.0636, "step": 1946 }, { "epoch": 0.43, "grad_norm": 0.4855035279152683, "learning_rate": 2.558747311296926e-05, "loss": 0.0593, "step": 1947 }, { "epoch": 0.43, "grad_norm": 0.4578283320218462, "learning_rate": 2.5573810005588245e-05, "loss": 0.0786, "step": 1948 }, { "epoch": 0.43, "grad_norm": 0.49591954995008763, "learning_rate": 2.5560144077267826e-05, "loss": 0.0731, "step": 1949 }, { "epoch": 0.43, "grad_norm": 0.4308347471916173, "learning_rate": 2.5546475334924398e-05, "loss": 0.0584, "step": 1950 }, { "epoch": 0.43, "grad_norm": 0.4204925562690745, "learning_rate": 2.5532803785475802e-05, "loss": 0.0656, "step": 1951 }, { "epoch": 0.43, "grad_norm": 0.49925856453889894, "learning_rate": 2.5519129435841298e-05, "loss": 0.077, "step": 1952 }, { "epoch": 0.43, "grad_norm": 0.4948463978529427, "learning_rate": 2.550545229294155e-05, "loss": 0.0663, "step": 1953 }, { "epoch": 0.43, "grad_norm": 0.4999508982628857, "learning_rate": 2.549177236369865e-05, "loss": 0.0838, "step": 1954 }, { "epoch": 0.43, "grad_norm": 0.45857363239203563, "learning_rate": 2.5478089655036086e-05, "loss": 0.061, "step": 1955 }, { "epoch": 0.43, "grad_norm": 0.35649923674406764, "learning_rate": 2.5464404173878775e-05, "loss": 0.0626, "step": 1956 }, { "epoch": 0.43, "grad_norm": 0.4048285926917149, "learning_rate": 2.5450715927153012e-05, "loss": 0.0536, "step": 1957 }, { "epoch": 0.43, "grad_norm": 0.38869547173262164, "learning_rate": 2.54370249217865e-05, "loss": 0.0501, "step": 1958 }, { "epoch": 0.43, "grad_norm": 0.5734012519543342, "learning_rate": 2.542333116470835e-05, "loss": 0.0768, "step": 1959 }, { "epoch": 0.43, "grad_norm": 0.4480250545333302, "learning_rate": 2.5409634662849053e-05, "loss": 0.0644, "step": 1960 }, { "epoch": 0.43, "grad_norm": 0.47188119947390483, "learning_rate": 2.5395935423140487e-05, "loss": 0.0771, "step": 1961 }, { "epoch": 0.43, "grad_norm": 0.4498040408396079, "learning_rate": 2.5382233452515927e-05, "loss": 0.0795, "step": 1962 }, { "epoch": 0.43, "grad_norm": 0.5292149441641056, "learning_rate": 2.5368528757910027e-05, "loss": 0.0749, "step": 1963 }, { "epoch": 0.43, "grad_norm": 0.44870011618221406, "learning_rate": 2.5354821346258813e-05, "loss": 0.0622, "step": 1964 }, { "epoch": 0.43, "grad_norm": 0.4956262894189136, "learning_rate": 2.534111122449969e-05, "loss": 0.0914, "step": 1965 }, { "epoch": 0.43, "grad_norm": 0.5129120286948241, "learning_rate": 2.532739839957143e-05, "loss": 0.0824, "step": 1966 }, { "epoch": 0.43, "grad_norm": 0.4525574549928641, "learning_rate": 2.5313682878414185e-05, "loss": 0.0588, "step": 1967 }, { "epoch": 0.43, "grad_norm": 0.49355155904583753, "learning_rate": 2.529996466796946e-05, "loss": 0.0858, "step": 1968 }, { "epoch": 0.43, "grad_norm": 0.47739849869196777, "learning_rate": 2.5286243775180128e-05, "loss": 0.0738, "step": 1969 }, { "epoch": 0.43, "grad_norm": 0.3867738087309617, "learning_rate": 2.5272520206990418e-05, "loss": 0.0636, "step": 1970 }, { "epoch": 0.43, "grad_norm": 0.45189343176114144, "learning_rate": 2.5258793970345908e-05, "loss": 0.0667, "step": 1971 }, { "epoch": 0.43, "grad_norm": 0.45433940606363016, "learning_rate": 2.5245065072193534e-05, "loss": 0.069, "step": 1972 }, { "epoch": 0.43, "grad_norm": 0.519966287528791, "learning_rate": 2.5231333519481577e-05, "loss": 0.0744, "step": 1973 }, { "epoch": 0.43, "grad_norm": 0.4226544385437567, "learning_rate": 2.5217599319159654e-05, "loss": 0.0551, "step": 1974 }, { "epoch": 0.43, "grad_norm": 0.3707658825119907, "learning_rate": 2.5203862478178732e-05, "loss": 0.0505, "step": 1975 }, { "epoch": 0.43, "grad_norm": 0.5165477933826751, "learning_rate": 2.519012300349111e-05, "loss": 0.0848, "step": 1976 }, { "epoch": 0.43, "grad_norm": 0.4305647132097901, "learning_rate": 2.5176380902050418e-05, "loss": 0.0624, "step": 1977 }, { "epoch": 0.43, "grad_norm": 0.4972505673558224, "learning_rate": 2.516263618081162e-05, "loss": 0.0629, "step": 1978 }, { "epoch": 0.43, "grad_norm": 0.46207126902088547, "learning_rate": 2.5148888846731007e-05, "loss": 0.0777, "step": 1979 }, { "epoch": 0.43, "grad_norm": 0.44448269945982694, "learning_rate": 2.5135138906766185e-05, "loss": 0.0675, "step": 1980 }, { "epoch": 0.44, "grad_norm": 0.4387582668384212, "learning_rate": 2.512138636787608e-05, "loss": 0.0724, "step": 1981 }, { "epoch": 0.44, "grad_norm": 0.48427690004796514, "learning_rate": 2.510763123702094e-05, "loss": 0.0745, "step": 1982 }, { "epoch": 0.44, "grad_norm": 0.401680074912499, "learning_rate": 2.5093873521162323e-05, "loss": 0.05, "step": 1983 }, { "epoch": 0.44, "grad_norm": 0.48928307565120804, "learning_rate": 2.5080113227263093e-05, "loss": 0.0863, "step": 1984 }, { "epoch": 0.44, "grad_norm": 0.3701655575188742, "learning_rate": 2.5066350362287407e-05, "loss": 0.0556, "step": 1985 }, { "epoch": 0.44, "grad_norm": 0.37219148199736224, "learning_rate": 2.5052584933200756e-05, "loss": 0.0562, "step": 1986 }, { "epoch": 0.44, "grad_norm": 0.522504487406247, "learning_rate": 2.5038816946969894e-05, "loss": 0.0867, "step": 1987 }, { "epoch": 0.44, "grad_norm": 0.4148666203116047, "learning_rate": 2.5025046410562888e-05, "loss": 0.076, "step": 1988 }, { "epoch": 0.44, "grad_norm": 0.4241673390970852, "learning_rate": 2.501127333094909e-05, "loss": 0.058, "step": 1989 }, { "epoch": 0.44, "grad_norm": 0.524471741528751, "learning_rate": 2.4997497715099134e-05, "loss": 0.0695, "step": 1990 }, { "epoch": 0.44, "grad_norm": 0.48102112938397373, "learning_rate": 2.4983719569984955e-05, "loss": 0.0668, "step": 1991 }, { "epoch": 0.44, "grad_norm": 0.3793570795807171, "learning_rate": 2.496993890257975e-05, "loss": 0.0638, "step": 1992 }, { "epoch": 0.44, "grad_norm": 0.3804833732473676, "learning_rate": 2.4956155719858e-05, "loss": 0.0473, "step": 1993 }, { "epoch": 0.44, "grad_norm": 0.4429668419744718, "learning_rate": 2.4942370028795456e-05, "loss": 0.0663, "step": 1994 }, { "epoch": 0.44, "grad_norm": 0.5238030139613532, "learning_rate": 2.4928581836369147e-05, "loss": 0.089, "step": 1995 }, { "epoch": 0.44, "grad_norm": 0.4618954483378638, "learning_rate": 2.4914791149557358e-05, "loss": 0.0645, "step": 1996 }, { "epoch": 0.44, "grad_norm": 0.40761575008590367, "learning_rate": 2.490099797533964e-05, "loss": 0.0684, "step": 1997 }, { "epoch": 0.44, "grad_norm": 0.39386441748205986, "learning_rate": 2.48872023206968e-05, "loss": 0.0582, "step": 1998 }, { "epoch": 0.44, "grad_norm": 0.580610602439528, "learning_rate": 2.487340419261091e-05, "loss": 0.1076, "step": 1999 }, { "epoch": 0.44, "grad_norm": 0.41888062660197195, "learning_rate": 2.485960359806528e-05, "loss": 0.0584, "step": 2000 }, { "epoch": 0.44, "grad_norm": 0.4820066510667588, "learning_rate": 2.4845800544044483e-05, "loss": 0.0792, "step": 2001 }, { "epoch": 0.44, "grad_norm": 0.4113433733628816, "learning_rate": 2.4831995037534325e-05, "loss": 0.0639, "step": 2002 }, { "epoch": 0.44, "grad_norm": 0.4688180556167094, "learning_rate": 2.481818708552185e-05, "loss": 0.0657, "step": 2003 }, { "epoch": 0.44, "grad_norm": 0.557453565759295, "learning_rate": 2.480437669499537e-05, "loss": 0.0744, "step": 2004 }, { "epoch": 0.44, "grad_norm": 0.45971494928556966, "learning_rate": 2.479056387294438e-05, "loss": 0.0745, "step": 2005 }, { "epoch": 0.44, "grad_norm": 0.3851151984198529, "learning_rate": 2.4776748626359656e-05, "loss": 0.0543, "step": 2006 }, { "epoch": 0.44, "grad_norm": 0.5287876565298385, "learning_rate": 2.4762930962233164e-05, "loss": 0.0922, "step": 2007 }, { "epoch": 0.44, "grad_norm": 0.5038474884074328, "learning_rate": 2.4749110887558114e-05, "loss": 0.0628, "step": 2008 }, { "epoch": 0.44, "grad_norm": 0.43435565919150204, "learning_rate": 2.4735288409328937e-05, "loss": 0.0619, "step": 2009 }, { "epoch": 0.44, "grad_norm": 0.4305378735493745, "learning_rate": 2.472146353454127e-05, "loss": 0.0666, "step": 2010 }, { "epoch": 0.44, "grad_norm": 0.42052916150574776, "learning_rate": 2.4707636270191956e-05, "loss": 0.0669, "step": 2011 }, { "epoch": 0.44, "grad_norm": 0.4320426553114052, "learning_rate": 2.4693806623279074e-05, "loss": 0.0643, "step": 2012 }, { "epoch": 0.44, "grad_norm": 0.45603134128190864, "learning_rate": 2.4679974600801882e-05, "loss": 0.0756, "step": 2013 }, { "epoch": 0.44, "grad_norm": 0.5432398825807966, "learning_rate": 2.4666140209760862e-05, "loss": 0.1047, "step": 2014 }, { "epoch": 0.44, "grad_norm": 0.3501122018088189, "learning_rate": 2.4652303457157677e-05, "loss": 0.0458, "step": 2015 }, { "epoch": 0.44, "grad_norm": 0.4849934014382745, "learning_rate": 2.4638464349995186e-05, "loss": 0.0739, "step": 2016 }, { "epoch": 0.44, "grad_norm": 0.4127435189367342, "learning_rate": 2.4624622895277462e-05, "loss": 0.0532, "step": 2017 }, { "epoch": 0.44, "grad_norm": 0.3852790853300477, "learning_rate": 2.461077910000974e-05, "loss": 0.0526, "step": 2018 }, { "epoch": 0.44, "grad_norm": 0.39799662114630846, "learning_rate": 2.4596932971198446e-05, "loss": 0.0678, "step": 2019 }, { "epoch": 0.44, "grad_norm": 0.42710675392442127, "learning_rate": 2.4583084515851194e-05, "loss": 0.0618, "step": 2020 }, { "epoch": 0.44, "grad_norm": 0.4730950517637139, "learning_rate": 2.456923374097678e-05, "loss": 0.0679, "step": 2021 }, { "epoch": 0.44, "grad_norm": 0.3588081118074976, "learning_rate": 2.4555380653585158e-05, "loss": 0.0578, "step": 2022 }, { "epoch": 0.44, "grad_norm": 0.4513238923236032, "learning_rate": 2.4541525260687468e-05, "loss": 0.0626, "step": 2023 }, { "epoch": 0.44, "grad_norm": 0.5074836840144351, "learning_rate": 2.4527667569295996e-05, "loss": 0.0722, "step": 2024 }, { "epoch": 0.44, "grad_norm": 0.45947534801574375, "learning_rate": 2.4513807586424214e-05, "loss": 0.0532, "step": 2025 }, { "epoch": 0.44, "grad_norm": 0.41736844885552515, "learning_rate": 2.449994531908675e-05, "loss": 0.0625, "step": 2026 }, { "epoch": 0.45, "grad_norm": 0.4226561308347636, "learning_rate": 2.4486080774299364e-05, "loss": 0.0602, "step": 2027 }, { "epoch": 0.45, "grad_norm": 0.44196260886816885, "learning_rate": 2.4472213959079002e-05, "loss": 0.0608, "step": 2028 }, { "epoch": 0.45, "grad_norm": 0.48246632112555954, "learning_rate": 2.4458344880443735e-05, "loss": 0.0785, "step": 2029 }, { "epoch": 0.45, "grad_norm": 0.43183535502932363, "learning_rate": 2.4444473545412804e-05, "loss": 0.0519, "step": 2030 }, { "epoch": 0.45, "grad_norm": 0.47867407219085056, "learning_rate": 2.4430599961006563e-05, "loss": 0.0679, "step": 2031 }, { "epoch": 0.45, "grad_norm": 0.385196541309476, "learning_rate": 2.441672413424652e-05, "loss": 0.0545, "step": 2032 }, { "epoch": 0.45, "grad_norm": 0.38971940926330956, "learning_rate": 2.4402846072155313e-05, "loss": 0.0598, "step": 2033 }, { "epoch": 0.45, "grad_norm": 0.4747944694621555, "learning_rate": 2.4388965781756727e-05, "loss": 0.0663, "step": 2034 }, { "epoch": 0.45, "grad_norm": 0.5031381711953609, "learning_rate": 2.437508327007565e-05, "loss": 0.0781, "step": 2035 }, { "epoch": 0.45, "grad_norm": 0.3809346246962901, "learning_rate": 2.4361198544138117e-05, "loss": 0.0534, "step": 2036 }, { "epoch": 0.45, "grad_norm": 0.34526724195138403, "learning_rate": 2.4347311610971255e-05, "loss": 0.0496, "step": 2037 }, { "epoch": 0.45, "grad_norm": 0.4932194744743591, "learning_rate": 2.4333422477603342e-05, "loss": 0.0835, "step": 2038 }, { "epoch": 0.45, "grad_norm": 0.48806754967168864, "learning_rate": 2.4319531151063753e-05, "loss": 0.0631, "step": 2039 }, { "epoch": 0.45, "grad_norm": 0.3881418457614026, "learning_rate": 2.4305637638382967e-05, "loss": 0.0453, "step": 2040 }, { "epoch": 0.45, "grad_norm": 0.3958838664809399, "learning_rate": 2.4291741946592575e-05, "loss": 0.0618, "step": 2041 }, { "epoch": 0.45, "grad_norm": 0.4232946985585955, "learning_rate": 2.427784408272528e-05, "loss": 0.0558, "step": 2042 }, { "epoch": 0.45, "grad_norm": 0.45405005706414564, "learning_rate": 2.4263944053814866e-05, "loss": 0.072, "step": 2043 }, { "epoch": 0.45, "grad_norm": 0.41057790245168874, "learning_rate": 2.4250041866896234e-05, "loss": 0.0633, "step": 2044 }, { "epoch": 0.45, "grad_norm": 0.49894964639299666, "learning_rate": 2.4236137529005355e-05, "loss": 0.0733, "step": 2045 }, { "epoch": 0.45, "grad_norm": 0.4828462008717174, "learning_rate": 2.4222231047179303e-05, "loss": 0.0645, "step": 2046 }, { "epoch": 0.45, "grad_norm": 0.4851688985358276, "learning_rate": 2.420832242845624e-05, "loss": 0.0518, "step": 2047 }, { "epoch": 0.45, "grad_norm": 0.47643881896966234, "learning_rate": 2.41944116798754e-05, "loss": 0.0719, "step": 2048 }, { "epoch": 0.45, "grad_norm": 0.46149104227028975, "learning_rate": 2.4180498808477096e-05, "loss": 0.0696, "step": 2049 }, { "epoch": 0.45, "grad_norm": 0.4211329716125816, "learning_rate": 2.4166583821302712e-05, "loss": 0.0714, "step": 2050 }, { "epoch": 0.45, "grad_norm": 0.3851136120143572, "learning_rate": 2.4152666725394717e-05, "loss": 0.0594, "step": 2051 }, { "epoch": 0.45, "grad_norm": 0.4271611243632751, "learning_rate": 2.413874752779664e-05, "loss": 0.0637, "step": 2052 }, { "epoch": 0.45, "grad_norm": 0.4608236592549507, "learning_rate": 2.412482623555307e-05, "loss": 0.0786, "step": 2053 }, { "epoch": 0.45, "grad_norm": 0.4462314293039966, "learning_rate": 2.411090285570965e-05, "loss": 0.0636, "step": 2054 }, { "epoch": 0.45, "grad_norm": 0.41341095170918657, "learning_rate": 2.4096977395313096e-05, "loss": 0.0691, "step": 2055 }, { "epoch": 0.45, "grad_norm": 0.3892252639047788, "learning_rate": 2.4083049861411173e-05, "loss": 0.0557, "step": 2056 }, { "epoch": 0.45, "grad_norm": 0.43831609495639573, "learning_rate": 2.4069120261052682e-05, "loss": 0.0713, "step": 2057 }, { "epoch": 0.45, "grad_norm": 0.4160579785202587, "learning_rate": 2.4055188601287483e-05, "loss": 0.0649, "step": 2058 }, { "epoch": 0.45, "grad_norm": 0.35123628740593144, "learning_rate": 2.404125488916647e-05, "loss": 0.0519, "step": 2059 }, { "epoch": 0.45, "grad_norm": 0.3956505788576386, "learning_rate": 2.402731913174159e-05, "loss": 0.0516, "step": 2060 }, { "epoch": 0.45, "grad_norm": 0.5417443335922905, "learning_rate": 2.4013381336065805e-05, "loss": 0.0875, "step": 2061 }, { "epoch": 0.45, "grad_norm": 0.4087644851229479, "learning_rate": 2.399944150919313e-05, "loss": 0.0595, "step": 2062 }, { "epoch": 0.45, "grad_norm": 0.4386785504995785, "learning_rate": 2.398549965817858e-05, "loss": 0.0634, "step": 2063 }, { "epoch": 0.45, "grad_norm": 0.3772173533599839, "learning_rate": 2.3971555790078228e-05, "loss": 0.0626, "step": 2064 }, { "epoch": 0.45, "grad_norm": 0.49008808764987444, "learning_rate": 2.3957609911949146e-05, "loss": 0.0765, "step": 2065 }, { "epoch": 0.45, "grad_norm": 0.3784129283938517, "learning_rate": 2.3943662030849426e-05, "loss": 0.0541, "step": 2066 }, { "epoch": 0.45, "grad_norm": 0.42390100230516714, "learning_rate": 2.3929712153838173e-05, "loss": 0.0645, "step": 2067 }, { "epoch": 0.45, "grad_norm": 0.5508192333353973, "learning_rate": 2.3915760287975515e-05, "loss": 0.0865, "step": 2068 }, { "epoch": 0.45, "grad_norm": 0.48057344117933043, "learning_rate": 2.390180644032257e-05, "loss": 0.0718, "step": 2069 }, { "epoch": 0.45, "grad_norm": 0.44469013559146137, "learning_rate": 2.3887850617941464e-05, "loss": 0.0731, "step": 2070 }, { "epoch": 0.45, "grad_norm": 0.37449150577991236, "learning_rate": 2.3873892827895332e-05, "loss": 0.0592, "step": 2071 }, { "epoch": 0.46, "grad_norm": 0.3453121668611188, "learning_rate": 2.3859933077248285e-05, "loss": 0.0572, "step": 2072 }, { "epoch": 0.46, "grad_norm": 0.3831193628254262, "learning_rate": 2.3845971373065452e-05, "loss": 0.0448, "step": 2073 }, { "epoch": 0.46, "grad_norm": 0.4014324311553023, "learning_rate": 2.3832007722412934e-05, "loss": 0.066, "step": 2074 }, { "epoch": 0.46, "grad_norm": 0.4756276701803358, "learning_rate": 2.3818042132357812e-05, "loss": 0.0615, "step": 2075 }, { "epoch": 0.46, "grad_norm": 0.46100292711300944, "learning_rate": 2.3804074609968158e-05, "loss": 0.0621, "step": 2076 }, { "epoch": 0.46, "grad_norm": 0.40052272199260675, "learning_rate": 2.3790105162313032e-05, "loss": 0.0569, "step": 2077 }, { "epoch": 0.46, "grad_norm": 0.47979434684151695, "learning_rate": 2.3776133796462446e-05, "loss": 0.0566, "step": 2078 }, { "epoch": 0.46, "grad_norm": 0.503394081000372, "learning_rate": 2.3762160519487402e-05, "loss": 0.089, "step": 2079 }, { "epoch": 0.46, "grad_norm": 0.47883710377584393, "learning_rate": 2.3748185338459847e-05, "loss": 0.0656, "step": 2080 }, { "epoch": 0.46, "grad_norm": 0.4445039536286361, "learning_rate": 2.3734208260452727e-05, "loss": 0.0679, "step": 2081 }, { "epoch": 0.46, "grad_norm": 0.6269723526807839, "learning_rate": 2.372022929253991e-05, "loss": 0.0928, "step": 2082 }, { "epoch": 0.46, "grad_norm": 0.45459800573575326, "learning_rate": 2.3706248441796246e-05, "loss": 0.0678, "step": 2083 }, { "epoch": 0.46, "grad_norm": 0.48578360533979176, "learning_rate": 2.369226571529752e-05, "loss": 0.0953, "step": 2084 }, { "epoch": 0.46, "grad_norm": 0.3936335860836555, "learning_rate": 2.3678281120120485e-05, "loss": 0.0661, "step": 2085 }, { "epoch": 0.46, "grad_norm": 0.3621791611864308, "learning_rate": 2.366429466334283e-05, "loss": 0.0416, "step": 2086 }, { "epoch": 0.46, "grad_norm": 0.4244046662682168, "learning_rate": 2.3650306352043182e-05, "loss": 0.0617, "step": 2087 }, { "epoch": 0.46, "grad_norm": 0.3979055930722182, "learning_rate": 2.3636316193301107e-05, "loss": 0.047, "step": 2088 }, { "epoch": 0.46, "grad_norm": 0.5196930482789566, "learning_rate": 2.3622324194197118e-05, "loss": 0.0734, "step": 2089 }, { "epoch": 0.46, "grad_norm": 0.3873937091870021, "learning_rate": 2.3608330361812652e-05, "loss": 0.0708, "step": 2090 }, { "epoch": 0.46, "grad_norm": 0.3548939847473483, "learning_rate": 2.3594334703230065e-05, "loss": 0.0524, "step": 2091 }, { "epoch": 0.46, "grad_norm": 0.3971874411438089, "learning_rate": 2.3580337225532663e-05, "loss": 0.0568, "step": 2092 }, { "epoch": 0.46, "grad_norm": 0.4571903636475039, "learning_rate": 2.356633793580463e-05, "loss": 0.0694, "step": 2093 }, { "epoch": 0.46, "grad_norm": 0.38851891196183885, "learning_rate": 2.355233684113111e-05, "loss": 0.0735, "step": 2094 }, { "epoch": 0.46, "grad_norm": 0.4311069970136712, "learning_rate": 2.3538333948598142e-05, "loss": 0.0633, "step": 2095 }, { "epoch": 0.46, "grad_norm": 0.5541316457548243, "learning_rate": 2.3524329265292668e-05, "loss": 0.0989, "step": 2096 }, { "epoch": 0.46, "grad_norm": 0.5119881248531218, "learning_rate": 2.3510322798302553e-05, "loss": 0.0733, "step": 2097 }, { "epoch": 0.46, "grad_norm": 0.35231347937292995, "learning_rate": 2.3496314554716543e-05, "loss": 0.0499, "step": 2098 }, { "epoch": 0.46, "grad_norm": 0.4671012811618288, "learning_rate": 2.348230454162431e-05, "loss": 0.0646, "step": 2099 }, { "epoch": 0.46, "grad_norm": 0.41502455742116545, "learning_rate": 2.34682927661164e-05, "loss": 0.0609, "step": 2100 }, { "epoch": 0.46, "grad_norm": 0.36580421750435826, "learning_rate": 2.3454279235284264e-05, "loss": 0.0516, "step": 2101 }, { "epoch": 0.46, "grad_norm": 0.3288414490352913, "learning_rate": 2.344026395622023e-05, "loss": 0.0411, "step": 2102 }, { "epoch": 0.46, "grad_norm": 0.5474729266505735, "learning_rate": 2.3426246936017514e-05, "loss": 0.0827, "step": 2103 }, { "epoch": 0.46, "grad_norm": 0.3939901361416779, "learning_rate": 2.3412228181770224e-05, "loss": 0.0615, "step": 2104 }, { "epoch": 0.46, "grad_norm": 0.4041682605449802, "learning_rate": 2.3398207700573336e-05, "loss": 0.066, "step": 2105 }, { "epoch": 0.46, "grad_norm": 0.45185146919493696, "learning_rate": 2.3384185499522696e-05, "loss": 0.0771, "step": 2106 }, { "epoch": 0.46, "grad_norm": 0.39996471547427803, "learning_rate": 2.337016158571503e-05, "loss": 0.0537, "step": 2107 }, { "epoch": 0.46, "grad_norm": 0.441245767232677, "learning_rate": 2.335613596624793e-05, "loss": 0.0729, "step": 2108 }, { "epoch": 0.46, "grad_norm": 0.4054840643605466, "learning_rate": 2.334210864821984e-05, "loss": 0.0524, "step": 2109 }, { "epoch": 0.46, "grad_norm": 0.48563810981326716, "learning_rate": 2.3328079638730073e-05, "loss": 0.0801, "step": 2110 }, { "epoch": 0.46, "grad_norm": 0.4039880983221286, "learning_rate": 2.3314048944878804e-05, "loss": 0.0548, "step": 2111 }, { "epoch": 0.46, "grad_norm": 0.48363093170054183, "learning_rate": 2.330001657376705e-05, "loss": 0.0823, "step": 2112 }, { "epoch": 0.46, "grad_norm": 0.3865247490288611, "learning_rate": 2.3285982532496676e-05, "loss": 0.0684, "step": 2113 }, { "epoch": 0.46, "grad_norm": 0.37701545951017845, "learning_rate": 2.32719468281704e-05, "loss": 0.0563, "step": 2114 }, { "epoch": 0.46, "grad_norm": 0.4805666350852702, "learning_rate": 2.325790946789178e-05, "loss": 0.0873, "step": 2115 }, { "epoch": 0.46, "grad_norm": 0.394559689242731, "learning_rate": 2.32438704587652e-05, "loss": 0.0542, "step": 2116 }, { "epoch": 0.46, "grad_norm": 0.3775259419236919, "learning_rate": 2.3229829807895904e-05, "loss": 0.0574, "step": 2117 }, { "epoch": 0.47, "grad_norm": 0.39554686176948745, "learning_rate": 2.3215787522389935e-05, "loss": 0.0521, "step": 2118 }, { "epoch": 0.47, "grad_norm": 0.41370662654906654, "learning_rate": 2.3201743609354187e-05, "loss": 0.0639, "step": 2119 }, { "epoch": 0.47, "grad_norm": 0.34958462121375417, "learning_rate": 2.3187698075896378e-05, "loss": 0.0453, "step": 2120 }, { "epoch": 0.47, "grad_norm": 0.4804601591008989, "learning_rate": 2.317365092912503e-05, "loss": 0.0712, "step": 2121 }, { "epoch": 0.47, "grad_norm": 0.4395080871120076, "learning_rate": 2.3159602176149493e-05, "loss": 0.078, "step": 2122 }, { "epoch": 0.47, "grad_norm": 0.49353773105297966, "learning_rate": 2.314555182407992e-05, "loss": 0.0694, "step": 2123 }, { "epoch": 0.47, "grad_norm": 0.41822454165715695, "learning_rate": 2.3131499880027294e-05, "loss": 0.0651, "step": 2124 }, { "epoch": 0.47, "grad_norm": 0.4385989215954988, "learning_rate": 2.311744635110338e-05, "loss": 0.0643, "step": 2125 }, { "epoch": 0.47, "grad_norm": 0.45052014793681966, "learning_rate": 2.3103391244420754e-05, "loss": 0.0683, "step": 2126 }, { "epoch": 0.47, "grad_norm": 0.454793816685309, "learning_rate": 2.30893345670928e-05, "loss": 0.0699, "step": 2127 }, { "epoch": 0.47, "grad_norm": 0.44657509817367813, "learning_rate": 2.3075276326233676e-05, "loss": 0.0803, "step": 2128 }, { "epoch": 0.47, "grad_norm": 0.40947483315993416, "learning_rate": 2.306121652895836e-05, "loss": 0.062, "step": 2129 }, { "epoch": 0.47, "grad_norm": 0.38736266655873886, "learning_rate": 2.3047155182382584e-05, "loss": 0.0543, "step": 2130 }, { "epoch": 0.47, "grad_norm": 0.3548728643355111, "learning_rate": 2.3033092293622903e-05, "loss": 0.0476, "step": 2131 }, { "epoch": 0.47, "grad_norm": 0.4771604694937133, "learning_rate": 2.3019027869796607e-05, "loss": 0.0645, "step": 2132 }, { "epoch": 0.47, "grad_norm": 0.4048655468083591, "learning_rate": 2.3004961918021804e-05, "loss": 0.0533, "step": 2133 }, { "epoch": 0.47, "grad_norm": 0.3945217886819489, "learning_rate": 2.2990894445417355e-05, "loss": 0.0698, "step": 2134 }, { "epoch": 0.47, "grad_norm": 0.44724783383095956, "learning_rate": 2.2976825459102898e-05, "loss": 0.0602, "step": 2135 }, { "epoch": 0.47, "grad_norm": 0.4704754068846899, "learning_rate": 2.2962754966198815e-05, "loss": 0.0769, "step": 2136 }, { "epoch": 0.47, "grad_norm": 0.4445963002428909, "learning_rate": 2.2948682973826292e-05, "loss": 0.072, "step": 2137 }, { "epoch": 0.47, "grad_norm": 0.38627250047225226, "learning_rate": 2.2934609489107236e-05, "loss": 0.0499, "step": 2138 }, { "epoch": 0.47, "grad_norm": 0.5143966493187458, "learning_rate": 2.292053451916433e-05, "loss": 0.0722, "step": 2139 }, { "epoch": 0.47, "grad_norm": 0.42852994882712525, "learning_rate": 2.2906458071121e-05, "loss": 0.0503, "step": 2140 }, { "epoch": 0.47, "grad_norm": 0.3497800144203384, "learning_rate": 2.289238015210142e-05, "loss": 0.0421, "step": 2141 }, { "epoch": 0.47, "grad_norm": 0.4088490317765311, "learning_rate": 2.2878300769230522e-05, "loss": 0.0426, "step": 2142 }, { "epoch": 0.47, "grad_norm": 0.45912554578699355, "learning_rate": 2.2864219929633956e-05, "loss": 0.0635, "step": 2143 }, { "epoch": 0.47, "grad_norm": 0.4630192606083607, "learning_rate": 2.2850137640438126e-05, "loss": 0.0781, "step": 2144 }, { "epoch": 0.47, "grad_norm": 0.45750754553403933, "learning_rate": 2.2836053908770165e-05, "loss": 0.0743, "step": 2145 }, { "epoch": 0.47, "grad_norm": 0.46505834931196594, "learning_rate": 2.2821968741757935e-05, "loss": 0.0754, "step": 2146 }, { "epoch": 0.47, "grad_norm": 0.38697667623807835, "learning_rate": 2.280788214653003e-05, "loss": 0.0662, "step": 2147 }, { "epoch": 0.47, "grad_norm": 0.376535846827464, "learning_rate": 2.2793794130215753e-05, "loss": 0.0484, "step": 2148 }, { "epoch": 0.47, "grad_norm": 0.41352409770681026, "learning_rate": 2.2779704699945136e-05, "loss": 0.0645, "step": 2149 }, { "epoch": 0.47, "grad_norm": 0.3455735087539365, "learning_rate": 2.2765613862848936e-05, "loss": 0.0504, "step": 2150 }, { "epoch": 0.47, "grad_norm": 0.42004527563202043, "learning_rate": 2.2751521626058607e-05, "loss": 0.0643, "step": 2151 }, { "epoch": 0.47, "grad_norm": 0.40863652926214156, "learning_rate": 2.2737427996706316e-05, "loss": 0.0544, "step": 2152 }, { "epoch": 0.47, "grad_norm": 0.3377745875628734, "learning_rate": 2.2723332981924937e-05, "loss": 0.0459, "step": 2153 }, { "epoch": 0.47, "grad_norm": 0.39150200622356357, "learning_rate": 2.2709236588848036e-05, "loss": 0.0535, "step": 2154 }, { "epoch": 0.47, "grad_norm": 0.5176623006064641, "learning_rate": 2.269513882460989e-05, "loss": 0.0888, "step": 2155 }, { "epoch": 0.47, "grad_norm": 0.40359070514234835, "learning_rate": 2.268103969634547e-05, "loss": 0.0482, "step": 2156 }, { "epoch": 0.47, "grad_norm": 0.45307968176111113, "learning_rate": 2.266693921119042e-05, "loss": 0.0661, "step": 2157 }, { "epoch": 0.47, "grad_norm": 0.6422601162212864, "learning_rate": 2.2652837376281087e-05, "loss": 0.0816, "step": 2158 }, { "epoch": 0.47, "grad_norm": 0.3880241018409379, "learning_rate": 2.2638734198754496e-05, "loss": 0.0642, "step": 2159 }, { "epoch": 0.47, "grad_norm": 0.3337084539500318, "learning_rate": 2.2624629685748353e-05, "loss": 0.0465, "step": 2160 }, { "epoch": 0.47, "grad_norm": 0.37604125816273715, "learning_rate": 2.261052384440104e-05, "loss": 0.0629, "step": 2161 }, { "epoch": 0.47, "grad_norm": 0.41380834876811373, "learning_rate": 2.2596416681851595e-05, "loss": 0.0567, "step": 2162 }, { "epoch": 0.48, "grad_norm": 0.4765554959142064, "learning_rate": 2.2582308205239757e-05, "loss": 0.0697, "step": 2163 }, { "epoch": 0.48, "grad_norm": 0.34652462567446174, "learning_rate": 2.256819842170591e-05, "loss": 0.0428, "step": 2164 }, { "epoch": 0.48, "grad_norm": 0.3953894167167263, "learning_rate": 2.2554087338391098e-05, "loss": 0.0622, "step": 2165 }, { "epoch": 0.48, "grad_norm": 0.47163784244279544, "learning_rate": 2.2539974962437022e-05, "loss": 0.0849, "step": 2166 }, { "epoch": 0.48, "grad_norm": 0.4278849506541713, "learning_rate": 2.252586130098605e-05, "loss": 0.0663, "step": 2167 }, { "epoch": 0.48, "grad_norm": 0.49671398648944554, "learning_rate": 2.251174636118119e-05, "loss": 0.0574, "step": 2168 }, { "epoch": 0.48, "grad_norm": 0.40812914237217485, "learning_rate": 2.2497630150166102e-05, "loss": 0.0607, "step": 2169 }, { "epoch": 0.48, "grad_norm": 0.4092688451970953, "learning_rate": 2.2483512675085085e-05, "loss": 0.0657, "step": 2170 }, { "epoch": 0.48, "grad_norm": 0.41064415023368733, "learning_rate": 2.2469393943083068e-05, "loss": 0.0525, "step": 2171 }, { "epoch": 0.48, "grad_norm": 0.4334403445391836, "learning_rate": 2.245527396130565e-05, "loss": 0.062, "step": 2172 }, { "epoch": 0.48, "grad_norm": 0.4424358614004642, "learning_rate": 2.2441152736899026e-05, "loss": 0.0805, "step": 2173 }, { "epoch": 0.48, "grad_norm": 0.4298174228183831, "learning_rate": 2.242703027701004e-05, "loss": 0.0587, "step": 2174 }, { "epoch": 0.48, "grad_norm": 0.3311299151354249, "learning_rate": 2.2412906588786147e-05, "loss": 0.0548, "step": 2175 }, { "epoch": 0.48, "grad_norm": 0.5231035220386947, "learning_rate": 2.2398781679375445e-05, "loss": 0.0883, "step": 2176 }, { "epoch": 0.48, "grad_norm": 0.432308870762196, "learning_rate": 2.2384655555926625e-05, "loss": 0.0757, "step": 2177 }, { "epoch": 0.48, "grad_norm": 0.4268838796627569, "learning_rate": 2.237052822558901e-05, "loss": 0.0714, "step": 2178 }, { "epoch": 0.48, "grad_norm": 0.4251954835217631, "learning_rate": 2.235639969551253e-05, "loss": 0.0518, "step": 2179 }, { "epoch": 0.48, "grad_norm": 0.38094984104985813, "learning_rate": 2.2342269972847718e-05, "loss": 0.0544, "step": 2180 }, { "epoch": 0.48, "grad_norm": 0.4359203499322573, "learning_rate": 2.232813906474572e-05, "loss": 0.0518, "step": 2181 }, { "epoch": 0.48, "grad_norm": 0.42064737147089587, "learning_rate": 2.2314006978358263e-05, "loss": 0.0462, "step": 2182 }, { "epoch": 0.48, "grad_norm": 0.426380243595148, "learning_rate": 2.2299873720837692e-05, "loss": 0.0452, "step": 2183 }, { "epoch": 0.48, "grad_norm": 0.3966205907889756, "learning_rate": 2.2285739299336933e-05, "loss": 0.0465, "step": 2184 }, { "epoch": 0.48, "grad_norm": 0.3828731994773382, "learning_rate": 2.22716037210095e-05, "loss": 0.0639, "step": 2185 }, { "epoch": 0.48, "grad_norm": 0.39471942832875806, "learning_rate": 2.2257466993009503e-05, "loss": 0.0642, "step": 2186 }, { "epoch": 0.48, "grad_norm": 0.4215792042821033, "learning_rate": 2.2243329122491617e-05, "loss": 0.0574, "step": 2187 }, { "epoch": 0.48, "grad_norm": 0.49431560268048147, "learning_rate": 2.222919011661111e-05, "loss": 0.077, "step": 2188 }, { "epoch": 0.48, "grad_norm": 0.4424619456539286, "learning_rate": 2.2215049982523827e-05, "loss": 0.0567, "step": 2189 }, { "epoch": 0.48, "grad_norm": 0.4930441228976846, "learning_rate": 2.2200908727386167e-05, "loss": 0.0829, "step": 2190 }, { "epoch": 0.48, "grad_norm": 0.43646680221292344, "learning_rate": 2.2186766358355106e-05, "loss": 0.0687, "step": 2191 }, { "epoch": 0.48, "grad_norm": 0.3875802010940222, "learning_rate": 2.217262288258818e-05, "loss": 0.0563, "step": 2192 }, { "epoch": 0.48, "grad_norm": 0.4571296553349933, "learning_rate": 2.2158478307243507e-05, "loss": 0.0694, "step": 2193 }, { "epoch": 0.48, "grad_norm": 0.40632144593168007, "learning_rate": 2.2144332639479722e-05, "loss": 0.0535, "step": 2194 }, { "epoch": 0.48, "grad_norm": 0.4048639641812654, "learning_rate": 2.213018588645605e-05, "loss": 0.0625, "step": 2195 }, { "epoch": 0.48, "grad_norm": 0.4720688945731608, "learning_rate": 2.2116038055332238e-05, "loss": 0.0681, "step": 2196 }, { "epoch": 0.48, "grad_norm": 0.4141419892215765, "learning_rate": 2.2101889153268595e-05, "loss": 0.0575, "step": 2197 }, { "epoch": 0.48, "grad_norm": 0.38017011040985954, "learning_rate": 2.2087739187425967e-05, "loss": 0.0561, "step": 2198 }, { "epoch": 0.48, "grad_norm": 0.42091886466391487, "learning_rate": 2.2073588164965737e-05, "loss": 0.0536, "step": 2199 }, { "epoch": 0.48, "grad_norm": 0.395803308184746, "learning_rate": 2.205943609304983e-05, "loss": 0.0507, "step": 2200 }, { "epoch": 0.48, "grad_norm": 0.41410210258103936, "learning_rate": 2.2045282978840684e-05, "loss": 0.0505, "step": 2201 }, { "epoch": 0.48, "grad_norm": 0.31556133204089304, "learning_rate": 2.2031128829501293e-05, "loss": 0.0407, "step": 2202 }, { "epoch": 0.48, "grad_norm": 0.4462056154997179, "learning_rate": 2.2016973652195145e-05, "loss": 0.0724, "step": 2203 }, { "epoch": 0.48, "grad_norm": 0.4439268303853345, "learning_rate": 2.200281745408627e-05, "loss": 0.0596, "step": 2204 }, { "epoch": 0.48, "grad_norm": 0.4004929016486724, "learning_rate": 2.1988660242339205e-05, "loss": 0.0568, "step": 2205 }, { "epoch": 0.48, "grad_norm": 0.3522127284277402, "learning_rate": 2.1974502024119002e-05, "loss": 0.0456, "step": 2206 }, { "epoch": 0.48, "grad_norm": 0.5424500327237654, "learning_rate": 2.196034280659122e-05, "loss": 0.0967, "step": 2207 }, { "epoch": 0.48, "grad_norm": 0.3807729378451628, "learning_rate": 2.1946182596921917e-05, "loss": 0.0496, "step": 2208 }, { "epoch": 0.49, "grad_norm": 0.45126173437400224, "learning_rate": 2.1932021402277682e-05, "loss": 0.0748, "step": 2209 }, { "epoch": 0.49, "grad_norm": 0.4133062834630545, "learning_rate": 2.1917859229825565e-05, "loss": 0.0555, "step": 2210 }, { "epoch": 0.49, "grad_norm": 0.41212271718488375, "learning_rate": 2.1903696086733142e-05, "loss": 0.0635, "step": 2211 }, { "epoch": 0.49, "grad_norm": 0.36619925925389807, "learning_rate": 2.188953198016846e-05, "loss": 0.0457, "step": 2212 }, { "epoch": 0.49, "grad_norm": 0.3598676704542792, "learning_rate": 2.1875366917300057e-05, "loss": 0.0431, "step": 2213 }, { "epoch": 0.49, "grad_norm": 0.5540880806015249, "learning_rate": 2.1861200905296952e-05, "loss": 0.0816, "step": 2214 }, { "epoch": 0.49, "grad_norm": 0.43048077962859, "learning_rate": 2.1847033951328673e-05, "loss": 0.0618, "step": 2215 }, { "epoch": 0.49, "grad_norm": 0.3485028107336857, "learning_rate": 2.1832866062565183e-05, "loss": 0.0458, "step": 2216 }, { "epoch": 0.49, "grad_norm": 0.45006451806515346, "learning_rate": 2.1818697246176943e-05, "loss": 0.0561, "step": 2217 }, { "epoch": 0.49, "grad_norm": 0.414669886935835, "learning_rate": 2.1804527509334875e-05, "loss": 0.0639, "step": 2218 }, { "epoch": 0.49, "grad_norm": 0.38722038486539806, "learning_rate": 2.1790356859210378e-05, "loss": 0.0507, "step": 2219 }, { "epoch": 0.49, "grad_norm": 0.3876689642497525, "learning_rate": 2.17761853029753e-05, "loss": 0.0625, "step": 2220 }, { "epoch": 0.49, "grad_norm": 0.3570540990550638, "learning_rate": 2.176201284780195e-05, "loss": 0.0452, "step": 2221 }, { "epoch": 0.49, "grad_norm": 0.4030564116747038, "learning_rate": 2.1747839500863096e-05, "loss": 0.0664, "step": 2222 }, { "epoch": 0.49, "grad_norm": 0.4450630526663112, "learning_rate": 2.1733665269331953e-05, "loss": 0.0569, "step": 2223 }, { "epoch": 0.49, "grad_norm": 0.34117537533148296, "learning_rate": 2.1719490160382196e-05, "loss": 0.043, "step": 2224 }, { "epoch": 0.49, "grad_norm": 0.43426080890571217, "learning_rate": 2.1705314181187922e-05, "loss": 0.057, "step": 2225 }, { "epoch": 0.49, "grad_norm": 0.3532074486008631, "learning_rate": 2.169113733892369e-05, "loss": 0.0622, "step": 2226 }, { "epoch": 0.49, "grad_norm": 0.5107080218410214, "learning_rate": 2.1676959640764484e-05, "loss": 0.0657, "step": 2227 }, { "epoch": 0.49, "grad_norm": 0.37515124608793443, "learning_rate": 2.166278109388572e-05, "loss": 0.0508, "step": 2228 }, { "epoch": 0.49, "grad_norm": 0.3836904306804695, "learning_rate": 2.1648601705463263e-05, "loss": 0.0482, "step": 2229 }, { "epoch": 0.49, "grad_norm": 0.3251608294742402, "learning_rate": 2.1634421482673368e-05, "loss": 0.0361, "step": 2230 }, { "epoch": 0.49, "grad_norm": 0.3571923357885689, "learning_rate": 2.1620240432692737e-05, "loss": 0.062, "step": 2231 }, { "epoch": 0.49, "grad_norm": 0.44233513042496453, "learning_rate": 2.1606058562698496e-05, "loss": 0.0776, "step": 2232 }, { "epoch": 0.49, "grad_norm": 0.44027822389523236, "learning_rate": 2.1591875879868177e-05, "loss": 0.0623, "step": 2233 }, { "epoch": 0.49, "grad_norm": 0.4128785029628535, "learning_rate": 2.157769239137971e-05, "loss": 0.0465, "step": 2234 }, { "epoch": 0.49, "grad_norm": 0.3351935421486037, "learning_rate": 2.1563508104411457e-05, "loss": 0.0516, "step": 2235 }, { "epoch": 0.49, "grad_norm": 0.3493431292244364, "learning_rate": 2.1549323026142168e-05, "loss": 0.0478, "step": 2236 }, { "epoch": 0.49, "grad_norm": 0.4243760676082615, "learning_rate": 2.153513716375099e-05, "loss": 0.0732, "step": 2237 }, { "epoch": 0.49, "grad_norm": 0.3524577186976111, "learning_rate": 2.1520950524417484e-05, "loss": 0.0507, "step": 2238 }, { "epoch": 0.49, "grad_norm": 0.4432719980591418, "learning_rate": 2.1506763115321602e-05, "loss": 0.0595, "step": 2239 }, { "epoch": 0.49, "grad_norm": 0.4883926569683455, "learning_rate": 2.1492574943643666e-05, "loss": 0.0712, "step": 2240 }, { "epoch": 0.49, "grad_norm": 0.41592459958861955, "learning_rate": 2.1478386016564406e-05, "loss": 0.0588, "step": 2241 }, { "epoch": 0.49, "grad_norm": 0.3325416680808275, "learning_rate": 2.1464196341264915e-05, "loss": 0.0545, "step": 2242 }, { "epoch": 0.49, "grad_norm": 0.3711408484837303, "learning_rate": 2.145000592492668e-05, "loss": 0.0559, "step": 2243 }, { "epoch": 0.49, "grad_norm": 0.38396880621795976, "learning_rate": 2.1435814774731557e-05, "loss": 0.0454, "step": 2244 }, { "epoch": 0.49, "grad_norm": 0.4628181078800962, "learning_rate": 2.1421622897861777e-05, "loss": 0.0704, "step": 2245 }, { "epoch": 0.49, "grad_norm": 0.3477668479133176, "learning_rate": 2.1407430301499934e-05, "loss": 0.051, "step": 2246 }, { "epoch": 0.49, "grad_norm": 0.4742206818542849, "learning_rate": 2.139323699282899e-05, "loss": 0.0551, "step": 2247 }, { "epoch": 0.49, "grad_norm": 0.44695137678370234, "learning_rate": 2.1379042979032256e-05, "loss": 0.0703, "step": 2248 }, { "epoch": 0.49, "grad_norm": 0.4000663002023182, "learning_rate": 2.1364848267293424e-05, "loss": 0.0516, "step": 2249 }, { "epoch": 0.49, "grad_norm": 0.43719202509645305, "learning_rate": 2.1350652864796513e-05, "loss": 0.0638, "step": 2250 }, { "epoch": 0.49, "grad_norm": 0.39235092344908234, "learning_rate": 2.133645677872591e-05, "loss": 0.0548, "step": 2251 }, { "epoch": 0.49, "grad_norm": 0.3668704388739481, "learning_rate": 2.1322260016266337e-05, "loss": 0.0408, "step": 2252 }, { "epoch": 0.49, "grad_norm": 0.3150238402367162, "learning_rate": 2.1308062584602865e-05, "loss": 0.041, "step": 2253 }, { "epoch": 0.5, "grad_norm": 0.5245566289748852, "learning_rate": 2.1293864490920897e-05, "loss": 0.0728, "step": 2254 }, { "epoch": 0.5, "grad_norm": 0.3644941243520977, "learning_rate": 2.1279665742406187e-05, "loss": 0.0399, "step": 2255 }, { "epoch": 0.5, "grad_norm": 0.40779250084958746, "learning_rate": 2.126546634624479e-05, "loss": 0.0548, "step": 2256 }, { "epoch": 0.5, "grad_norm": 0.3788580323047344, "learning_rate": 2.125126630962312e-05, "loss": 0.0603, "step": 2257 }, { "epoch": 0.5, "grad_norm": 0.39045975050796067, "learning_rate": 2.1237065639727906e-05, "loss": 0.0525, "step": 2258 }, { "epoch": 0.5, "grad_norm": 0.4137295517953831, "learning_rate": 2.1222864343746185e-05, "loss": 0.052, "step": 2259 }, { "epoch": 0.5, "grad_norm": 0.4671759084073663, "learning_rate": 2.1208662428865326e-05, "loss": 0.0558, "step": 2260 }, { "epoch": 0.5, "grad_norm": 0.3834009707770895, "learning_rate": 2.1194459902272997e-05, "loss": 0.0619, "step": 2261 }, { "epoch": 0.5, "grad_norm": 0.33960149615028973, "learning_rate": 2.1180256771157194e-05, "loss": 0.0547, "step": 2262 }, { "epoch": 0.5, "grad_norm": 0.3576002665943287, "learning_rate": 2.1166053042706204e-05, "loss": 0.0446, "step": 2263 }, { "epoch": 0.5, "grad_norm": 0.35063333642538436, "learning_rate": 2.115184872410862e-05, "loss": 0.0528, "step": 2264 }, { "epoch": 0.5, "grad_norm": 0.39879788200945676, "learning_rate": 2.113764382255334e-05, "loss": 0.0545, "step": 2265 }, { "epoch": 0.5, "grad_norm": 0.5747178691662079, "learning_rate": 2.1123438345229537e-05, "loss": 0.0866, "step": 2266 }, { "epoch": 0.5, "grad_norm": 0.572943588549069, "learning_rate": 2.110923229932671e-05, "loss": 0.0792, "step": 2267 }, { "epoch": 0.5, "grad_norm": 0.3623014794385008, "learning_rate": 2.1095025692034614e-05, "loss": 0.0482, "step": 2268 }, { "epoch": 0.5, "grad_norm": 0.5828839910191299, "learning_rate": 2.1080818530543304e-05, "loss": 0.0825, "step": 2269 }, { "epoch": 0.5, "grad_norm": 0.478978707586908, "learning_rate": 2.106661082204311e-05, "loss": 0.0578, "step": 2270 }, { "epoch": 0.5, "grad_norm": 0.36041172424779067, "learning_rate": 2.105240257372464e-05, "loss": 0.0578, "step": 2271 }, { "epoch": 0.5, "grad_norm": 0.38219240140228306, "learning_rate": 2.1038193792778775e-05, "loss": 0.057, "step": 2272 }, { "epoch": 0.5, "grad_norm": 0.36689056402462356, "learning_rate": 2.102398448639667e-05, "loss": 0.0555, "step": 2273 }, { "epoch": 0.5, "grad_norm": 0.41942277567971076, "learning_rate": 2.100977466176973e-05, "loss": 0.0474, "step": 2274 }, { "epoch": 0.5, "grad_norm": 0.4291063713877314, "learning_rate": 2.099556432608965e-05, "loss": 0.069, "step": 2275 }, { "epoch": 0.5, "grad_norm": 0.32794329743943423, "learning_rate": 2.0981353486548363e-05, "loss": 0.0444, "step": 2276 }, { "epoch": 0.5, "grad_norm": 0.4401056489035421, "learning_rate": 2.096714215033806e-05, "loss": 0.0674, "step": 2277 }, { "epoch": 0.5, "grad_norm": 0.3810406766864367, "learning_rate": 2.095293032465119e-05, "loss": 0.0634, "step": 2278 }, { "epoch": 0.5, "grad_norm": 0.42038816246072847, "learning_rate": 2.0938718016680433e-05, "loss": 0.069, "step": 2279 }, { "epoch": 0.5, "grad_norm": 0.3479869212907461, "learning_rate": 2.0924505233618734e-05, "loss": 0.05, "step": 2280 }, { "epoch": 0.5, "grad_norm": 0.3993746637811574, "learning_rate": 2.0910291982659277e-05, "loss": 0.06, "step": 2281 }, { "epoch": 0.5, "grad_norm": 0.4005270748702094, "learning_rate": 2.0896078270995463e-05, "loss": 0.0489, "step": 2282 }, { "epoch": 0.5, "grad_norm": 0.396981721331143, "learning_rate": 2.0881864105820936e-05, "loss": 0.0685, "step": 2283 }, { "epoch": 0.5, "grad_norm": 0.36236271560600947, "learning_rate": 2.0867649494329587e-05, "loss": 0.0481, "step": 2284 }, { "epoch": 0.5, "grad_norm": 0.3550763741797328, "learning_rate": 2.085343444371551e-05, "loss": 0.051, "step": 2285 }, { "epoch": 0.5, "grad_norm": 0.3787923425353629, "learning_rate": 2.083921896117303e-05, "loss": 0.0529, "step": 2286 }, { "epoch": 0.5, "grad_norm": 0.39855603717447186, "learning_rate": 2.0825003053896686e-05, "loss": 0.0469, "step": 2287 }, { "epoch": 0.5, "grad_norm": 0.36050130735945973, "learning_rate": 2.0810786729081237e-05, "loss": 0.0589, "step": 2288 }, { "epoch": 0.5, "grad_norm": 0.373236063972357, "learning_rate": 2.079656999392166e-05, "loss": 0.0405, "step": 2289 }, { "epoch": 0.5, "grad_norm": 0.4681168094320828, "learning_rate": 2.0782352855613128e-05, "loss": 0.075, "step": 2290 }, { "epoch": 0.5, "grad_norm": 0.4689835125713308, "learning_rate": 2.0768135321351016e-05, "loss": 0.0728, "step": 2291 }, { "epoch": 0.5, "grad_norm": 0.331002402888966, "learning_rate": 2.0753917398330902e-05, "loss": 0.0493, "step": 2292 }, { "epoch": 0.5, "grad_norm": 0.36350895298160313, "learning_rate": 2.073969909374858e-05, "loss": 0.0413, "step": 2293 }, { "epoch": 0.5, "grad_norm": 0.43343697540317466, "learning_rate": 2.0725480414800012e-05, "loss": 0.0616, "step": 2294 }, { "epoch": 0.5, "grad_norm": 0.3888972027018617, "learning_rate": 2.0711261368681356e-05, "loss": 0.0509, "step": 2295 }, { "epoch": 0.5, "grad_norm": 0.3704147551794404, "learning_rate": 2.069704196258896e-05, "loss": 0.0412, "step": 2296 }, { "epoch": 0.5, "grad_norm": 0.390403971128073, "learning_rate": 2.068282220371936e-05, "loss": 0.0567, "step": 2297 }, { "epoch": 0.5, "grad_norm": 0.42857761960439805, "learning_rate": 2.066860209926925e-05, "loss": 0.0492, "step": 2298 }, { "epoch": 0.5, "grad_norm": 0.4495829059886345, "learning_rate": 2.0654381656435526e-05, "loss": 0.0656, "step": 2299 }, { "epoch": 0.51, "grad_norm": 0.314731332667737, "learning_rate": 2.064016088241523e-05, "loss": 0.0397, "step": 2300 }, { "epoch": 0.51, "grad_norm": 0.4139443714850748, "learning_rate": 2.0625939784405586e-05, "loss": 0.0552, "step": 2301 }, { "epoch": 0.51, "grad_norm": 0.4813590291607367, "learning_rate": 2.0611718369603982e-05, "loss": 0.0897, "step": 2302 }, { "epoch": 0.51, "grad_norm": 0.4462453207203422, "learning_rate": 2.0597496645207964e-05, "loss": 0.0727, "step": 2303 }, { "epoch": 0.51, "grad_norm": 0.4211409505176882, "learning_rate": 2.0583274618415227e-05, "loss": 0.0639, "step": 2304 }, { "epoch": 0.51, "grad_norm": 0.35422693001649885, "learning_rate": 2.056905229642363e-05, "loss": 0.0498, "step": 2305 }, { "epoch": 0.51, "grad_norm": 0.3386480188822363, "learning_rate": 2.055482968643118e-05, "loss": 0.0424, "step": 2306 }, { "epoch": 0.51, "grad_norm": 0.4217757355504114, "learning_rate": 2.0540606795636022e-05, "loss": 0.0708, "step": 2307 }, { "epoch": 0.51, "grad_norm": 0.38629292635355156, "learning_rate": 2.0526383631236454e-05, "loss": 0.0576, "step": 2308 }, { "epoch": 0.51, "grad_norm": 0.3800503672611192, "learning_rate": 2.0512160200430896e-05, "loss": 0.0543, "step": 2309 }, { "epoch": 0.51, "grad_norm": 0.3909405768720875, "learning_rate": 2.0497936510417928e-05, "loss": 0.065, "step": 2310 }, { "epoch": 0.51, "grad_norm": 0.30695811315431687, "learning_rate": 2.048371256839624e-05, "loss": 0.0409, "step": 2311 }, { "epoch": 0.51, "grad_norm": 0.45774239859423366, "learning_rate": 2.046948838156465e-05, "loss": 0.0712, "step": 2312 }, { "epoch": 0.51, "grad_norm": 0.40685103045526805, "learning_rate": 2.0455263957122113e-05, "loss": 0.0633, "step": 2313 }, { "epoch": 0.51, "grad_norm": 0.3732654071254558, "learning_rate": 2.04410393022677e-05, "loss": 0.0518, "step": 2314 }, { "epoch": 0.51, "grad_norm": 0.4410804964372272, "learning_rate": 2.0426814424200592e-05, "loss": 0.0498, "step": 2315 }, { "epoch": 0.51, "grad_norm": 0.48450435941465114, "learning_rate": 2.041258933012009e-05, "loss": 0.0722, "step": 2316 }, { "epoch": 0.51, "grad_norm": 0.33728466935387436, "learning_rate": 2.0398364027225593e-05, "loss": 0.0481, "step": 2317 }, { "epoch": 0.51, "grad_norm": 0.42652390832959736, "learning_rate": 2.0384138522716626e-05, "loss": 0.0631, "step": 2318 }, { "epoch": 0.51, "grad_norm": 0.4007601451708101, "learning_rate": 2.036991282379279e-05, "loss": 0.0543, "step": 2319 }, { "epoch": 0.51, "grad_norm": 0.3920582037638183, "learning_rate": 2.0355686937653818e-05, "loss": 0.048, "step": 2320 }, { "epoch": 0.51, "grad_norm": 0.400337680944842, "learning_rate": 2.03414608714995e-05, "loss": 0.0511, "step": 2321 }, { "epoch": 0.51, "grad_norm": 0.3276742041075169, "learning_rate": 2.0327234632529738e-05, "loss": 0.042, "step": 2322 }, { "epoch": 0.51, "grad_norm": 0.38644815861691056, "learning_rate": 2.0313008227944527e-05, "loss": 0.0457, "step": 2323 }, { "epoch": 0.51, "grad_norm": 0.42529913528824476, "learning_rate": 2.029878166494393e-05, "loss": 0.0631, "step": 2324 }, { "epoch": 0.51, "grad_norm": 0.4331816871780329, "learning_rate": 2.0284554950728106e-05, "loss": 0.0625, "step": 2325 }, { "epoch": 0.51, "grad_norm": 0.3881718279896845, "learning_rate": 2.0270328092497266e-05, "loss": 0.0331, "step": 2326 }, { "epoch": 0.51, "grad_norm": 0.3789899674574855, "learning_rate": 2.025610109745173e-05, "loss": 0.064, "step": 2327 }, { "epoch": 0.51, "grad_norm": 0.3858736417812594, "learning_rate": 2.024187397279186e-05, "loss": 0.0475, "step": 2328 }, { "epoch": 0.51, "grad_norm": 0.36311207185275024, "learning_rate": 2.0227646725718085e-05, "loss": 0.0522, "step": 2329 }, { "epoch": 0.51, "grad_norm": 0.42080811418689446, "learning_rate": 2.021341936343091e-05, "loss": 0.0669, "step": 2330 }, { "epoch": 0.51, "grad_norm": 0.4367811223292118, "learning_rate": 2.0199191893130893e-05, "loss": 0.0642, "step": 2331 }, { "epoch": 0.51, "grad_norm": 0.44304571946339333, "learning_rate": 2.018496432201863e-05, "loss": 0.0667, "step": 2332 }, { "epoch": 0.51, "grad_norm": 0.39111846272706985, "learning_rate": 2.017073665729479e-05, "loss": 0.0576, "step": 2333 }, { "epoch": 0.51, "grad_norm": 0.3756661458893297, "learning_rate": 2.0156508906160083e-05, "loss": 0.0558, "step": 2334 }, { "epoch": 0.51, "grad_norm": 0.36196762405032007, "learning_rate": 2.0142281075815253e-05, "loss": 0.0538, "step": 2335 }, { "epoch": 0.51, "grad_norm": 0.41753657449199966, "learning_rate": 2.0128053173461105e-05, "loss": 0.0533, "step": 2336 }, { "epoch": 0.51, "grad_norm": 0.35695799445414533, "learning_rate": 2.0113825206298458e-05, "loss": 0.049, "step": 2337 }, { "epoch": 0.51, "grad_norm": 0.43629295869942647, "learning_rate": 2.009959718152818e-05, "loss": 0.0629, "step": 2338 }, { "epoch": 0.51, "grad_norm": 0.36733210536684613, "learning_rate": 2.008536910635115e-05, "loss": 0.0533, "step": 2339 }, { "epoch": 0.51, "grad_norm": 0.34239426858627797, "learning_rate": 2.00711409879683e-05, "loss": 0.0422, "step": 2340 }, { "epoch": 0.51, "grad_norm": 0.370170287497713, "learning_rate": 2.0056912833580557e-05, "loss": 0.0583, "step": 2341 }, { "epoch": 0.51, "grad_norm": 0.37309516474357096, "learning_rate": 2.0042684650388882e-05, "loss": 0.0557, "step": 2342 }, { "epoch": 0.51, "grad_norm": 0.3704571936199428, "learning_rate": 2.0028456445594234e-05, "loss": 0.0485, "step": 2343 }, { "epoch": 0.51, "grad_norm": 0.5111271590786414, "learning_rate": 2.0014228226397618e-05, "loss": 0.0692, "step": 2344 }, { "epoch": 0.52, "grad_norm": 0.408704732406459, "learning_rate": 2e-05, "loss": 0.0522, "step": 2345 }, { "epoch": 0.52, "grad_norm": 0.4179396931182973, "learning_rate": 1.998577177360239e-05, "loss": 0.0465, "step": 2346 }, { "epoch": 0.52, "grad_norm": 0.4131803652646927, "learning_rate": 1.997154355440577e-05, "loss": 0.0621, "step": 2347 }, { "epoch": 0.52, "grad_norm": 0.5090762625579097, "learning_rate": 1.995731534961113e-05, "loss": 0.0812, "step": 2348 }, { "epoch": 0.52, "grad_norm": 0.3399041878463311, "learning_rate": 1.9943087166419453e-05, "loss": 0.0424, "step": 2349 }, { "epoch": 0.52, "grad_norm": 0.4759776482082502, "learning_rate": 1.9928859012031703e-05, "loss": 0.0664, "step": 2350 }, { "epoch": 0.52, "grad_norm": 0.40435171446462115, "learning_rate": 1.991463089364885e-05, "loss": 0.0494, "step": 2351 }, { "epoch": 0.52, "grad_norm": 0.34789546494594176, "learning_rate": 1.9900402818471825e-05, "loss": 0.0511, "step": 2352 }, { "epoch": 0.52, "grad_norm": 0.33811444934171764, "learning_rate": 1.9886174793701546e-05, "loss": 0.0375, "step": 2353 }, { "epoch": 0.52, "grad_norm": 0.44538547520252336, "learning_rate": 1.98719468265389e-05, "loss": 0.0686, "step": 2354 }, { "epoch": 0.52, "grad_norm": 0.35413298259696435, "learning_rate": 1.985771892418475e-05, "loss": 0.0517, "step": 2355 }, { "epoch": 0.52, "grad_norm": 0.3550469127739452, "learning_rate": 1.9843491093839927e-05, "loss": 0.0567, "step": 2356 }, { "epoch": 0.52, "grad_norm": 0.39031416923953993, "learning_rate": 1.982926334270522e-05, "loss": 0.0485, "step": 2357 }, { "epoch": 0.52, "grad_norm": 0.35131035345630074, "learning_rate": 1.9815035677981378e-05, "loss": 0.0486, "step": 2358 }, { "epoch": 0.52, "grad_norm": 0.4054292677985494, "learning_rate": 1.9800808106869117e-05, "loss": 0.0615, "step": 2359 }, { "epoch": 0.52, "grad_norm": 0.29001962872784276, "learning_rate": 1.9786580636569092e-05, "loss": 0.0312, "step": 2360 }, { "epoch": 0.52, "grad_norm": 0.3415296109097626, "learning_rate": 1.9772353274281918e-05, "loss": 0.035, "step": 2361 }, { "epoch": 0.52, "grad_norm": 0.30966855244264235, "learning_rate": 1.9758126027208146e-05, "loss": 0.031, "step": 2362 }, { "epoch": 0.52, "grad_norm": 0.3744315965633129, "learning_rate": 1.9743898902548273e-05, "loss": 0.0589, "step": 2363 }, { "epoch": 0.52, "grad_norm": 0.4780805205555588, "learning_rate": 1.972967190750274e-05, "loss": 0.0575, "step": 2364 }, { "epoch": 0.52, "grad_norm": 0.4121418980786573, "learning_rate": 1.9715445049271907e-05, "loss": 0.0533, "step": 2365 }, { "epoch": 0.52, "grad_norm": 0.46827083574793354, "learning_rate": 1.9701218335056076e-05, "loss": 0.0667, "step": 2366 }, { "epoch": 0.52, "grad_norm": 0.3978848832437258, "learning_rate": 1.9686991772055476e-05, "loss": 0.0542, "step": 2367 }, { "epoch": 0.52, "grad_norm": 0.39031993397702963, "learning_rate": 1.9672765367470265e-05, "loss": 0.0574, "step": 2368 }, { "epoch": 0.52, "grad_norm": 0.3806681226158581, "learning_rate": 1.9658539128500507e-05, "loss": 0.0535, "step": 2369 }, { "epoch": 0.52, "grad_norm": 0.5512545560791239, "learning_rate": 1.964431306234619e-05, "loss": 0.0692, "step": 2370 }, { "epoch": 0.52, "grad_norm": 0.39036734666751816, "learning_rate": 1.9630087176207212e-05, "loss": 0.0645, "step": 2371 }, { "epoch": 0.52, "grad_norm": 0.3950232657691014, "learning_rate": 1.9615861477283384e-05, "loss": 0.0426, "step": 2372 }, { "epoch": 0.52, "grad_norm": 0.3738941337640487, "learning_rate": 1.9601635972774414e-05, "loss": 0.0541, "step": 2373 }, { "epoch": 0.52, "grad_norm": 0.36719159874759005, "learning_rate": 1.958741066987992e-05, "loss": 0.061, "step": 2374 }, { "epoch": 0.52, "grad_norm": 0.3758156370104676, "learning_rate": 1.9573185575799414e-05, "loss": 0.0546, "step": 2375 }, { "epoch": 0.52, "grad_norm": 0.3452507119112734, "learning_rate": 1.95589606977323e-05, "loss": 0.0481, "step": 2376 }, { "epoch": 0.52, "grad_norm": 0.36708523969916607, "learning_rate": 1.9544736042877886e-05, "loss": 0.0505, "step": 2377 }, { "epoch": 0.52, "grad_norm": 0.35413298259696435, "learning_rate": 1.9530511618435352e-05, "loss": 0.0432, "step": 2378 }, { "epoch": 0.52, "grad_norm": 0.35248344384197294, "learning_rate": 1.9516287431603767e-05, "loss": 0.0411, "step": 2379 }, { "epoch": 0.52, "grad_norm": 0.41347060093390803, "learning_rate": 1.950206348958208e-05, "loss": 0.06, "step": 2380 }, { "epoch": 0.52, "grad_norm": 0.3132393316635138, "learning_rate": 1.948783979956911e-05, "loss": 0.0455, "step": 2381 }, { "epoch": 0.52, "grad_norm": 0.48297148065402884, "learning_rate": 1.9473616368763556e-05, "loss": 0.0713, "step": 2382 }, { "epoch": 0.52, "grad_norm": 0.3520791601840363, "learning_rate": 1.9459393204363988e-05, "loss": 0.0344, "step": 2383 }, { "epoch": 0.52, "grad_norm": 0.44459028405128037, "learning_rate": 1.944517031356882e-05, "loss": 0.0627, "step": 2384 }, { "epoch": 0.52, "grad_norm": 0.3420051594032704, "learning_rate": 1.9430947703576373e-05, "loss": 0.0582, "step": 2385 }, { "epoch": 0.52, "grad_norm": 0.3404259134894278, "learning_rate": 1.9416725381584777e-05, "loss": 0.0393, "step": 2386 }, { "epoch": 0.52, "grad_norm": 0.289782799432669, "learning_rate": 1.9402503354792043e-05, "loss": 0.048, "step": 2387 }, { "epoch": 0.52, "grad_norm": 0.3397732584839841, "learning_rate": 1.938828163039602e-05, "loss": 0.0401, "step": 2388 }, { "epoch": 0.52, "grad_norm": 0.32293331451981655, "learning_rate": 1.9374060215594417e-05, "loss": 0.0371, "step": 2389 }, { "epoch": 0.52, "grad_norm": 0.4402223424612593, "learning_rate": 1.9359839117584775e-05, "loss": 0.0523, "step": 2390 }, { "epoch": 0.53, "grad_norm": 0.3906156538798913, "learning_rate": 1.934561834356448e-05, "loss": 0.0559, "step": 2391 }, { "epoch": 0.53, "grad_norm": 0.4286245550140536, "learning_rate": 1.9331397900730754e-05, "loss": 0.0484, "step": 2392 }, { "epoch": 0.53, "grad_norm": 0.3395809275399548, "learning_rate": 1.9317177796280643e-05, "loss": 0.0467, "step": 2393 }, { "epoch": 0.53, "grad_norm": 0.3323436613670127, "learning_rate": 1.930295803741104e-05, "loss": 0.0462, "step": 2394 }, { "epoch": 0.53, "grad_norm": 0.40455667008457696, "learning_rate": 1.9288738631318648e-05, "loss": 0.0459, "step": 2395 }, { "epoch": 0.53, "grad_norm": 0.47881007574553464, "learning_rate": 1.9274519585199995e-05, "loss": 0.0668, "step": 2396 }, { "epoch": 0.53, "grad_norm": 0.4284875239822807, "learning_rate": 1.9260300906251422e-05, "loss": 0.0618, "step": 2397 }, { "epoch": 0.53, "grad_norm": 0.41261309532758883, "learning_rate": 1.92460826016691e-05, "loss": 0.0531, "step": 2398 }, { "epoch": 0.53, "grad_norm": 0.3862499130890881, "learning_rate": 1.9231864678648994e-05, "loss": 0.0604, "step": 2399 }, { "epoch": 0.53, "grad_norm": 0.4244319113266689, "learning_rate": 1.9217647144386885e-05, "loss": 0.0706, "step": 2400 }, { "epoch": 0.53, "grad_norm": 0.38182235654596364, "learning_rate": 1.9203430006078348e-05, "loss": 0.0493, "step": 2401 }, { "epoch": 0.53, "grad_norm": 0.3904403440806351, "learning_rate": 1.918921327091876e-05, "loss": 0.055, "step": 2402 }, { "epoch": 0.53, "grad_norm": 0.3772309223455192, "learning_rate": 1.9174996946103318e-05, "loss": 0.0557, "step": 2403 }, { "epoch": 0.53, "grad_norm": 0.3871718555457978, "learning_rate": 1.9160781038826973e-05, "loss": 0.044, "step": 2404 }, { "epoch": 0.53, "grad_norm": 0.3547079727381328, "learning_rate": 1.9146565556284492e-05, "loss": 0.0448, "step": 2405 }, { "epoch": 0.53, "grad_norm": 0.37406062645948895, "learning_rate": 1.9132350505670416e-05, "loss": 0.0505, "step": 2406 }, { "epoch": 0.53, "grad_norm": 0.35909978030619516, "learning_rate": 1.9118135894179067e-05, "loss": 0.0537, "step": 2407 }, { "epoch": 0.53, "grad_norm": 0.3432187613770574, "learning_rate": 1.910392172900455e-05, "loss": 0.0558, "step": 2408 }, { "epoch": 0.53, "grad_norm": 0.36632735372906744, "learning_rate": 1.9089708017340733e-05, "loss": 0.0451, "step": 2409 }, { "epoch": 0.53, "grad_norm": 0.37284915737457436, "learning_rate": 1.9075494766381263e-05, "loss": 0.0422, "step": 2410 }, { "epoch": 0.53, "grad_norm": 0.3631603901305983, "learning_rate": 1.906128198331957e-05, "loss": 0.0448, "step": 2411 }, { "epoch": 0.53, "grad_norm": 0.4002529372751417, "learning_rate": 1.9047069675348816e-05, "loss": 0.0488, "step": 2412 }, { "epoch": 0.53, "grad_norm": 0.34655052250355045, "learning_rate": 1.9032857849661942e-05, "loss": 0.0377, "step": 2413 }, { "epoch": 0.53, "grad_norm": 0.392345625319532, "learning_rate": 1.901864651345164e-05, "loss": 0.0498, "step": 2414 }, { "epoch": 0.53, "grad_norm": 0.37201830482951675, "learning_rate": 1.9004435673910356e-05, "loss": 0.0472, "step": 2415 }, { "epoch": 0.53, "grad_norm": 0.34400427690331264, "learning_rate": 1.8990225338230276e-05, "loss": 0.0372, "step": 2416 }, { "epoch": 0.53, "grad_norm": 0.34003686350944584, "learning_rate": 1.8976015513603344e-05, "loss": 0.0426, "step": 2417 }, { "epoch": 0.53, "grad_norm": 0.4402325478679173, "learning_rate": 1.8961806207221235e-05, "loss": 0.0523, "step": 2418 }, { "epoch": 0.53, "grad_norm": 0.377458855742547, "learning_rate": 1.8947597426275368e-05, "loss": 0.0394, "step": 2419 }, { "epoch": 0.53, "grad_norm": 0.3755874006904432, "learning_rate": 1.8933389177956896e-05, "loss": 0.0408, "step": 2420 }, { "epoch": 0.53, "grad_norm": 0.4364706771337179, "learning_rate": 1.8919181469456703e-05, "loss": 0.0583, "step": 2421 }, { "epoch": 0.53, "grad_norm": 0.3461203132234935, "learning_rate": 1.8904974307965393e-05, "loss": 0.0426, "step": 2422 }, { "epoch": 0.53, "grad_norm": 0.34278681584956155, "learning_rate": 1.8890767700673296e-05, "loss": 0.0375, "step": 2423 }, { "epoch": 0.53, "grad_norm": 0.38381606533579277, "learning_rate": 1.8876561654770466e-05, "loss": 0.0504, "step": 2424 }, { "epoch": 0.53, "grad_norm": 0.31069624805201673, "learning_rate": 1.8862356177446667e-05, "loss": 0.0406, "step": 2425 }, { "epoch": 0.53, "grad_norm": 0.37474832434021943, "learning_rate": 1.8848151275891383e-05, "loss": 0.0469, "step": 2426 }, { "epoch": 0.53, "grad_norm": 0.3399929508362034, "learning_rate": 1.8833946957293796e-05, "loss": 0.035, "step": 2427 }, { "epoch": 0.53, "grad_norm": 0.29861208018557206, "learning_rate": 1.8819743228842806e-05, "loss": 0.0445, "step": 2428 }, { "epoch": 0.53, "grad_norm": 0.3823212371359132, "learning_rate": 1.8805540097727003e-05, "loss": 0.0523, "step": 2429 }, { "epoch": 0.53, "grad_norm": 0.3661808052382058, "learning_rate": 1.8791337571134677e-05, "loss": 0.0437, "step": 2430 }, { "epoch": 0.53, "grad_norm": 0.43966059555670706, "learning_rate": 1.877713565625382e-05, "loss": 0.0717, "step": 2431 }, { "epoch": 0.53, "grad_norm": 0.42020291527145803, "learning_rate": 1.8762934360272097e-05, "loss": 0.0399, "step": 2432 }, { "epoch": 0.53, "grad_norm": 0.33925193469677256, "learning_rate": 1.8748733690376883e-05, "loss": 0.0396, "step": 2433 }, { "epoch": 0.53, "grad_norm": 0.3561620344345107, "learning_rate": 1.8734533653755216e-05, "loss": 0.0548, "step": 2434 }, { "epoch": 0.53, "grad_norm": 0.3905977048874348, "learning_rate": 1.8720334257593826e-05, "loss": 0.0555, "step": 2435 }, { "epoch": 0.54, "grad_norm": 0.3188506005050462, "learning_rate": 1.8706135509079103e-05, "loss": 0.0429, "step": 2436 }, { "epoch": 0.54, "grad_norm": 0.29873289198951014, "learning_rate": 1.869193741539714e-05, "loss": 0.0397, "step": 2437 }, { "epoch": 0.54, "grad_norm": 0.33672384891056945, "learning_rate": 1.8677739983733666e-05, "loss": 0.0382, "step": 2438 }, { "epoch": 0.54, "grad_norm": 0.38256970315878586, "learning_rate": 1.8663543221274096e-05, "loss": 0.0551, "step": 2439 }, { "epoch": 0.54, "grad_norm": 0.357453475138913, "learning_rate": 1.8649347135203494e-05, "loss": 0.0482, "step": 2440 }, { "epoch": 0.54, "grad_norm": 0.41836751650342313, "learning_rate": 1.8635151732706586e-05, "loss": 0.0528, "step": 2441 }, { "epoch": 0.54, "grad_norm": 0.45123432602435154, "learning_rate": 1.862095702096775e-05, "loss": 0.0814, "step": 2442 }, { "epoch": 0.54, "grad_norm": 0.3612207407800378, "learning_rate": 1.860676300717102e-05, "loss": 0.0443, "step": 2443 }, { "epoch": 0.54, "grad_norm": 0.38921682215656894, "learning_rate": 1.8592569698500076e-05, "loss": 0.0546, "step": 2444 }, { "epoch": 0.54, "grad_norm": 0.4375983876905605, "learning_rate": 1.8578377102138223e-05, "loss": 0.0591, "step": 2445 }, { "epoch": 0.54, "grad_norm": 0.3719401094194151, "learning_rate": 1.8564185225268446e-05, "loss": 0.0482, "step": 2446 }, { "epoch": 0.54, "grad_norm": 0.3118690319675185, "learning_rate": 1.8549994075073327e-05, "loss": 0.0492, "step": 2447 }, { "epoch": 0.54, "grad_norm": 0.30169929010979746, "learning_rate": 1.853580365873509e-05, "loss": 0.035, "step": 2448 }, { "epoch": 0.54, "grad_norm": 0.31670811464494275, "learning_rate": 1.8521613983435604e-05, "loss": 0.0383, "step": 2449 }, { "epoch": 0.54, "grad_norm": 0.39067542704295666, "learning_rate": 1.8507425056356338e-05, "loss": 0.0519, "step": 2450 }, { "epoch": 0.54, "grad_norm": 0.4427692334043251, "learning_rate": 1.8493236884678405e-05, "loss": 0.0681, "step": 2451 }, { "epoch": 0.54, "grad_norm": 0.33943585877574, "learning_rate": 1.847904947558252e-05, "loss": 0.0401, "step": 2452 }, { "epoch": 0.54, "grad_norm": 0.3351146467659251, "learning_rate": 1.8464862836249014e-05, "loss": 0.0492, "step": 2453 }, { "epoch": 0.54, "grad_norm": 0.2713231355761402, "learning_rate": 1.8450676973857842e-05, "loss": 0.0354, "step": 2454 }, { "epoch": 0.54, "grad_norm": 0.35652747056555917, "learning_rate": 1.843649189558855e-05, "loss": 0.0473, "step": 2455 }, { "epoch": 0.54, "grad_norm": 0.37071698788929364, "learning_rate": 1.8422307608620292e-05, "loss": 0.047, "step": 2456 }, { "epoch": 0.54, "grad_norm": 0.3180103963623487, "learning_rate": 1.840812412013183e-05, "loss": 0.0448, "step": 2457 }, { "epoch": 0.54, "grad_norm": 0.371256983142258, "learning_rate": 1.8393941437301507e-05, "loss": 0.0463, "step": 2458 }, { "epoch": 0.54, "grad_norm": 0.3359779843620849, "learning_rate": 1.8379759567307266e-05, "loss": 0.049, "step": 2459 }, { "epoch": 0.54, "grad_norm": 0.3967052464630342, "learning_rate": 1.8365578517326642e-05, "loss": 0.0514, "step": 2460 }, { "epoch": 0.54, "grad_norm": 0.35972747931859106, "learning_rate": 1.8351398294536747e-05, "loss": 0.0389, "step": 2461 }, { "epoch": 0.54, "grad_norm": 0.4577699218323339, "learning_rate": 1.833721890611428e-05, "loss": 0.0621, "step": 2462 }, { "epoch": 0.54, "grad_norm": 0.4291514086865876, "learning_rate": 1.832304035923552e-05, "loss": 0.0661, "step": 2463 }, { "epoch": 0.54, "grad_norm": 0.43516989503352316, "learning_rate": 1.8308862661076313e-05, "loss": 0.0589, "step": 2464 }, { "epoch": 0.54, "grad_norm": 0.38741815302530797, "learning_rate": 1.829468581881208e-05, "loss": 0.0564, "step": 2465 }, { "epoch": 0.54, "grad_norm": 0.37852651700468604, "learning_rate": 1.8280509839617814e-05, "loss": 0.0466, "step": 2466 }, { "epoch": 0.54, "grad_norm": 0.32719829805658424, "learning_rate": 1.8266334730668054e-05, "loss": 0.0385, "step": 2467 }, { "epoch": 0.54, "grad_norm": 0.40737122086407546, "learning_rate": 1.8252160499136914e-05, "loss": 0.051, "step": 2468 }, { "epoch": 0.54, "grad_norm": 0.36836061366207723, "learning_rate": 1.8237987152198063e-05, "loss": 0.0511, "step": 2469 }, { "epoch": 0.54, "grad_norm": 0.4453552292518177, "learning_rate": 1.822381469702471e-05, "loss": 0.0661, "step": 2470 }, { "epoch": 0.54, "grad_norm": 0.32987103935269346, "learning_rate": 1.8209643140789622e-05, "loss": 0.0463, "step": 2471 }, { "epoch": 0.54, "grad_norm": 0.4150262987754753, "learning_rate": 1.8195472490665125e-05, "loss": 0.0531, "step": 2472 }, { "epoch": 0.54, "grad_norm": 0.4306926761039924, "learning_rate": 1.8181302753823064e-05, "loss": 0.064, "step": 2473 }, { "epoch": 0.54, "grad_norm": 0.39084008970839546, "learning_rate": 1.8167133937434823e-05, "loss": 0.0502, "step": 2474 }, { "epoch": 0.54, "grad_norm": 0.5909688967670853, "learning_rate": 1.8152966048671334e-05, "loss": 0.0722, "step": 2475 }, { "epoch": 0.54, "grad_norm": 0.38884734980182967, "learning_rate": 1.813879909470305e-05, "loss": 0.0555, "step": 2476 }, { "epoch": 0.54, "grad_norm": 0.37595676518621624, "learning_rate": 1.8124633082699956e-05, "loss": 0.0502, "step": 2477 }, { "epoch": 0.54, "grad_norm": 0.474914844935941, "learning_rate": 1.8110468019831553e-05, "loss": 0.0721, "step": 2478 }, { "epoch": 0.54, "grad_norm": 0.3843573907369612, "learning_rate": 1.8096303913266864e-05, "loss": 0.0756, "step": 2479 }, { "epoch": 0.54, "grad_norm": 0.40612778292424834, "learning_rate": 1.808214077017444e-05, "loss": 0.0581, "step": 2480 }, { "epoch": 0.54, "grad_norm": 0.3720001394825335, "learning_rate": 1.8067978597722325e-05, "loss": 0.0549, "step": 2481 }, { "epoch": 0.55, "grad_norm": 0.3975497809613756, "learning_rate": 1.8053817403078087e-05, "loss": 0.0665, "step": 2482 }, { "epoch": 0.55, "grad_norm": 0.3122717977815116, "learning_rate": 1.8039657193408788e-05, "loss": 0.043, "step": 2483 }, { "epoch": 0.55, "grad_norm": 0.40600406098096514, "learning_rate": 1.8025497975881004e-05, "loss": 0.0543, "step": 2484 }, { "epoch": 0.55, "grad_norm": 0.300010101327112, "learning_rate": 1.8011339757660798e-05, "loss": 0.0365, "step": 2485 }, { "epoch": 0.55, "grad_norm": 0.3967849268788347, "learning_rate": 1.7997182545913732e-05, "loss": 0.0409, "step": 2486 }, { "epoch": 0.55, "grad_norm": 0.29865259737449684, "learning_rate": 1.798302634780486e-05, "loss": 0.0439, "step": 2487 }, { "epoch": 0.55, "grad_norm": 0.3211444888899818, "learning_rate": 1.796887117049871e-05, "loss": 0.0387, "step": 2488 }, { "epoch": 0.55, "grad_norm": 0.3812861472331901, "learning_rate": 1.7954717021159316e-05, "loss": 0.055, "step": 2489 }, { "epoch": 0.55, "grad_norm": 0.3748673760814513, "learning_rate": 1.7940563906950175e-05, "loss": 0.0447, "step": 2490 }, { "epoch": 0.55, "grad_norm": 0.3460907783122389, "learning_rate": 1.7926411835034267e-05, "loss": 0.0545, "step": 2491 }, { "epoch": 0.55, "grad_norm": 0.41721838147453766, "learning_rate": 1.791226081257404e-05, "loss": 0.0578, "step": 2492 }, { "epoch": 0.55, "grad_norm": 0.3697827924639646, "learning_rate": 1.7898110846731415e-05, "loss": 0.0587, "step": 2493 }, { "epoch": 0.55, "grad_norm": 0.4420947385348038, "learning_rate": 1.7883961944667772e-05, "loss": 0.0588, "step": 2494 }, { "epoch": 0.55, "grad_norm": 0.33188920067903177, "learning_rate": 1.786981411354396e-05, "loss": 0.0486, "step": 2495 }, { "epoch": 0.55, "grad_norm": 0.40870752154700785, "learning_rate": 1.7855667360520277e-05, "loss": 0.0512, "step": 2496 }, { "epoch": 0.55, "grad_norm": 0.35365349625987297, "learning_rate": 1.7841521692756497e-05, "loss": 0.05, "step": 2497 }, { "epoch": 0.55, "grad_norm": 0.3862683148655958, "learning_rate": 1.782737711741182e-05, "loss": 0.0457, "step": 2498 }, { "epoch": 0.55, "grad_norm": 0.34301279953855635, "learning_rate": 1.7813233641644904e-05, "loss": 0.0419, "step": 2499 }, { "epoch": 0.55, "grad_norm": 0.326006270529698, "learning_rate": 1.7799091272613843e-05, "loss": 0.0404, "step": 2500 }, { "epoch": 0.55, "grad_norm": 0.38251507162182874, "learning_rate": 1.778495001747618e-05, "loss": 0.0534, "step": 2501 }, { "epoch": 0.55, "grad_norm": 0.3813334717597466, "learning_rate": 1.7770809883388896e-05, "loss": 0.044, "step": 2502 }, { "epoch": 0.55, "grad_norm": 0.39002479217662644, "learning_rate": 1.775667087750839e-05, "loss": 0.0435, "step": 2503 }, { "epoch": 0.55, "grad_norm": 0.3337172281948741, "learning_rate": 1.774253300699051e-05, "loss": 0.0428, "step": 2504 }, { "epoch": 0.55, "grad_norm": 0.36579148748737944, "learning_rate": 1.77283962789905e-05, "loss": 0.0432, "step": 2505 }, { "epoch": 0.55, "grad_norm": 0.3800404866196099, "learning_rate": 1.771426070066307e-05, "loss": 0.0534, "step": 2506 }, { "epoch": 0.55, "grad_norm": 0.35188721389983824, "learning_rate": 1.770012627916231e-05, "loss": 0.0443, "step": 2507 }, { "epoch": 0.55, "grad_norm": 0.35454603054259937, "learning_rate": 1.768599302164174e-05, "loss": 0.0421, "step": 2508 }, { "epoch": 0.55, "grad_norm": 0.37317982646238296, "learning_rate": 1.7671860935254285e-05, "loss": 0.0587, "step": 2509 }, { "epoch": 0.55, "grad_norm": 0.2835275173347284, "learning_rate": 1.7657730027152286e-05, "loss": 0.036, "step": 2510 }, { "epoch": 0.55, "grad_norm": 0.35067588481115647, "learning_rate": 1.7643600304487475e-05, "loss": 0.042, "step": 2511 }, { "epoch": 0.55, "grad_norm": 0.3442711239562008, "learning_rate": 1.7629471774410997e-05, "loss": 0.0367, "step": 2512 }, { "epoch": 0.55, "grad_norm": 0.3680934484137127, "learning_rate": 1.7615344444073385e-05, "loss": 0.0415, "step": 2513 }, { "epoch": 0.55, "grad_norm": 0.35494281301469954, "learning_rate": 1.7601218320624562e-05, "loss": 0.047, "step": 2514 }, { "epoch": 0.55, "grad_norm": 0.41312054788957775, "learning_rate": 1.7587093411213856e-05, "loss": 0.0511, "step": 2515 }, { "epoch": 0.55, "grad_norm": 0.32683657684756035, "learning_rate": 1.7572969722989967e-05, "loss": 0.0441, "step": 2516 }, { "epoch": 0.55, "grad_norm": 0.4399027579856521, "learning_rate": 1.755884726310098e-05, "loss": 0.062, "step": 2517 }, { "epoch": 0.55, "grad_norm": 0.4171422826714955, "learning_rate": 1.754472603869436e-05, "loss": 0.0502, "step": 2518 }, { "epoch": 0.55, "grad_norm": 0.38591179318083907, "learning_rate": 1.7530606056916935e-05, "loss": 0.0452, "step": 2519 }, { "epoch": 0.55, "grad_norm": 0.32631634465151743, "learning_rate": 1.751648732491493e-05, "loss": 0.046, "step": 2520 }, { "epoch": 0.55, "grad_norm": 0.3409260255227103, "learning_rate": 1.7502369849833908e-05, "loss": 0.0461, "step": 2521 }, { "epoch": 0.55, "grad_norm": 0.3786171075383073, "learning_rate": 1.748825363881881e-05, "loss": 0.046, "step": 2522 }, { "epoch": 0.55, "grad_norm": 0.35722992125319664, "learning_rate": 1.7474138699013953e-05, "loss": 0.0563, "step": 2523 }, { "epoch": 0.55, "grad_norm": 0.298713450151254, "learning_rate": 1.746002503756298e-05, "loss": 0.0374, "step": 2524 }, { "epoch": 0.55, "grad_norm": 0.30910242384037684, "learning_rate": 1.7445912661608912e-05, "loss": 0.0407, "step": 2525 }, { "epoch": 0.55, "grad_norm": 0.46861530593996237, "learning_rate": 1.7431801578294097e-05, "loss": 0.0517, "step": 2526 }, { "epoch": 0.56, "grad_norm": 0.394621111767584, "learning_rate": 1.7417691794760247e-05, "loss": 0.0524, "step": 2527 }, { "epoch": 0.56, "grad_norm": 0.3686938129873323, "learning_rate": 1.740358331814841e-05, "loss": 0.0452, "step": 2528 }, { "epoch": 0.56, "grad_norm": 0.4074832829749992, "learning_rate": 1.7389476155598974e-05, "loss": 0.0514, "step": 2529 }, { "epoch": 0.56, "grad_norm": 0.342856394410026, "learning_rate": 1.7375370314251657e-05, "loss": 0.0429, "step": 2530 }, { "epoch": 0.56, "grad_norm": 0.3690525333308805, "learning_rate": 1.7361265801245504e-05, "loss": 0.0548, "step": 2531 }, { "epoch": 0.56, "grad_norm": 0.37886233911028505, "learning_rate": 1.7347162623718913e-05, "loss": 0.0615, "step": 2532 }, { "epoch": 0.56, "grad_norm": 0.35522498691184834, "learning_rate": 1.7333060788809582e-05, "loss": 0.0384, "step": 2533 }, { "epoch": 0.56, "grad_norm": 0.35634166307916276, "learning_rate": 1.7318960303654534e-05, "loss": 0.0404, "step": 2534 }, { "epoch": 0.56, "grad_norm": 0.4526984574928489, "learning_rate": 1.7304861175390112e-05, "loss": 0.0644, "step": 2535 }, { "epoch": 0.56, "grad_norm": 0.3635237509931962, "learning_rate": 1.729076341115197e-05, "loss": 0.0689, "step": 2536 }, { "epoch": 0.56, "grad_norm": 0.3394390414952252, "learning_rate": 1.7276667018075073e-05, "loss": 0.0521, "step": 2537 }, { "epoch": 0.56, "grad_norm": 0.358893528049773, "learning_rate": 1.726257200329369e-05, "loss": 0.0439, "step": 2538 }, { "epoch": 0.56, "grad_norm": 0.3475280696866417, "learning_rate": 1.72484783739414e-05, "loss": 0.0516, "step": 2539 }, { "epoch": 0.56, "grad_norm": 0.31888040371514104, "learning_rate": 1.7234386137151067e-05, "loss": 0.0389, "step": 2540 }, { "epoch": 0.56, "grad_norm": 0.3433524563974395, "learning_rate": 1.7220295300054867e-05, "loss": 0.0416, "step": 2541 }, { "epoch": 0.56, "grad_norm": 0.3505328039852608, "learning_rate": 1.7206205869784254e-05, "loss": 0.0434, "step": 2542 }, { "epoch": 0.56, "grad_norm": 0.31793390406469835, "learning_rate": 1.719211785346998e-05, "loss": 0.0378, "step": 2543 }, { "epoch": 0.56, "grad_norm": 0.3443520648747606, "learning_rate": 1.717803125824207e-05, "loss": 0.0513, "step": 2544 }, { "epoch": 0.56, "grad_norm": 0.328554734885512, "learning_rate": 1.716394609122984e-05, "loss": 0.0448, "step": 2545 }, { "epoch": 0.56, "grad_norm": 0.2885518589813259, "learning_rate": 1.714986235956188e-05, "loss": 0.0401, "step": 2546 }, { "epoch": 0.56, "grad_norm": 0.4041937728850449, "learning_rate": 1.713578007036605e-05, "loss": 0.0582, "step": 2547 }, { "epoch": 0.56, "grad_norm": 0.39700011361334303, "learning_rate": 1.712169923076948e-05, "loss": 0.058, "step": 2548 }, { "epoch": 0.56, "grad_norm": 0.29387935265903975, "learning_rate": 1.710761984789858e-05, "loss": 0.0382, "step": 2549 }, { "epoch": 0.56, "grad_norm": 0.503105355110246, "learning_rate": 1.7093541928879004e-05, "loss": 0.0946, "step": 2550 }, { "epoch": 0.56, "grad_norm": 0.33110208087717735, "learning_rate": 1.7079465480835677e-05, "loss": 0.0399, "step": 2551 }, { "epoch": 0.56, "grad_norm": 0.26865055107203595, "learning_rate": 1.7065390510892767e-05, "loss": 0.0423, "step": 2552 }, { "epoch": 0.56, "grad_norm": 0.4193389039629841, "learning_rate": 1.7051317026173715e-05, "loss": 0.0548, "step": 2553 }, { "epoch": 0.56, "grad_norm": 0.3646464384051765, "learning_rate": 1.703724503380119e-05, "loss": 0.0496, "step": 2554 }, { "epoch": 0.56, "grad_norm": 0.328030254672491, "learning_rate": 1.7023174540897112e-05, "loss": 0.0367, "step": 2555 }, { "epoch": 0.56, "grad_norm": 0.37608506259623486, "learning_rate": 1.7009105554582652e-05, "loss": 0.0529, "step": 2556 }, { "epoch": 0.56, "grad_norm": 0.3777206197480758, "learning_rate": 1.6995038081978193e-05, "loss": 0.0584, "step": 2557 }, { "epoch": 0.56, "grad_norm": 0.30499260860226335, "learning_rate": 1.6980972130203396e-05, "loss": 0.0367, "step": 2558 }, { "epoch": 0.56, "grad_norm": 0.3273897106889949, "learning_rate": 1.6966907706377103e-05, "loss": 0.0396, "step": 2559 }, { "epoch": 0.56, "grad_norm": 0.3536332498520532, "learning_rate": 1.695284481761742e-05, "loss": 0.0504, "step": 2560 }, { "epoch": 0.56, "grad_norm": 0.388388218718895, "learning_rate": 1.6938783471041647e-05, "loss": 0.0537, "step": 2561 }, { "epoch": 0.56, "grad_norm": 0.2954232205110963, "learning_rate": 1.692472367376633e-05, "loss": 0.039, "step": 2562 }, { "epoch": 0.56, "grad_norm": 0.464982372739429, "learning_rate": 1.691066543290721e-05, "loss": 0.0805, "step": 2563 }, { "epoch": 0.56, "grad_norm": 0.27277565284516625, "learning_rate": 1.6896608755579256e-05, "loss": 0.0413, "step": 2564 }, { "epoch": 0.56, "grad_norm": 0.35453117303355675, "learning_rate": 1.6882553648896625e-05, "loss": 0.0425, "step": 2565 }, { "epoch": 0.56, "grad_norm": 0.27941979857681537, "learning_rate": 1.686850011997271e-05, "loss": 0.0342, "step": 2566 }, { "epoch": 0.56, "grad_norm": 0.3715773793955695, "learning_rate": 1.685444817592008e-05, "loss": 0.0618, "step": 2567 }, { "epoch": 0.56, "grad_norm": 0.373749675112682, "learning_rate": 1.6840397823850513e-05, "loss": 0.0532, "step": 2568 }, { "epoch": 0.56, "grad_norm": 0.3291121008569529, "learning_rate": 1.6826349070874973e-05, "loss": 0.0406, "step": 2569 }, { "epoch": 0.56, "grad_norm": 0.35252667772513235, "learning_rate": 1.6812301924103626e-05, "loss": 0.0529, "step": 2570 }, { "epoch": 0.56, "grad_norm": 0.3799685501586278, "learning_rate": 1.6798256390645816e-05, "loss": 0.0451, "step": 2571 }, { "epoch": 0.56, "grad_norm": 0.35622731688714665, "learning_rate": 1.6784212477610075e-05, "loss": 0.0442, "step": 2572 }, { "epoch": 0.57, "grad_norm": 0.3117283831568278, "learning_rate": 1.6770170192104107e-05, "loss": 0.0331, "step": 2573 }, { "epoch": 0.57, "grad_norm": 0.32447007790578225, "learning_rate": 1.67561295412348e-05, "loss": 0.0452, "step": 2574 }, { "epoch": 0.57, "grad_norm": 0.3309813336600599, "learning_rate": 1.6742090532108228e-05, "loss": 0.0392, "step": 2575 }, { "epoch": 0.57, "grad_norm": 0.40694373758744323, "learning_rate": 1.6728053171829603e-05, "loss": 0.0606, "step": 2576 }, { "epoch": 0.57, "grad_norm": 0.5453939819669776, "learning_rate": 1.6714017467503328e-05, "loss": 0.1091, "step": 2577 }, { "epoch": 0.57, "grad_norm": 0.4217280732488526, "learning_rate": 1.6699983426232955e-05, "loss": 0.048, "step": 2578 }, { "epoch": 0.57, "grad_norm": 0.3787519396364005, "learning_rate": 1.6685951055121203e-05, "loss": 0.0446, "step": 2579 }, { "epoch": 0.57, "grad_norm": 0.335666791384736, "learning_rate": 1.667192036126993e-05, "loss": 0.047, "step": 2580 }, { "epoch": 0.57, "grad_norm": 0.26844197075243886, "learning_rate": 1.665789135178017e-05, "loss": 0.0343, "step": 2581 }, { "epoch": 0.57, "grad_norm": 0.36392243519255474, "learning_rate": 1.664386403375208e-05, "loss": 0.0453, "step": 2582 }, { "epoch": 0.57, "grad_norm": 0.4301809598543906, "learning_rate": 1.6629838414284972e-05, "loss": 0.0472, "step": 2583 }, { "epoch": 0.57, "grad_norm": 0.3268916702396021, "learning_rate": 1.6615814500477307e-05, "loss": 0.0423, "step": 2584 }, { "epoch": 0.57, "grad_norm": 0.3298473680154127, "learning_rate": 1.6601792299426668e-05, "loss": 0.0391, "step": 2585 }, { "epoch": 0.57, "grad_norm": 0.3484045718761079, "learning_rate": 1.658777181822978e-05, "loss": 0.0436, "step": 2586 }, { "epoch": 0.57, "grad_norm": 0.3389652780822943, "learning_rate": 1.6573753063982492e-05, "loss": 0.0448, "step": 2587 }, { "epoch": 0.57, "grad_norm": 0.37909805712879996, "learning_rate": 1.655973604377978e-05, "loss": 0.06, "step": 2588 }, { "epoch": 0.57, "grad_norm": 0.3678902536237636, "learning_rate": 1.6545720764715746e-05, "loss": 0.039, "step": 2589 }, { "epoch": 0.57, "grad_norm": 0.3260284611076184, "learning_rate": 1.6531707233883607e-05, "loss": 0.0459, "step": 2590 }, { "epoch": 0.57, "grad_norm": 0.40296197818244606, "learning_rate": 1.651769545837569e-05, "loss": 0.0643, "step": 2591 }, { "epoch": 0.57, "grad_norm": 0.2994351050540486, "learning_rate": 1.650368544528346e-05, "loss": 0.03, "step": 2592 }, { "epoch": 0.57, "grad_norm": 0.352678202806525, "learning_rate": 1.6489677201697453e-05, "loss": 0.0436, "step": 2593 }, { "epoch": 0.57, "grad_norm": 0.35567778425230934, "learning_rate": 1.6475670734707336e-05, "loss": 0.0538, "step": 2594 }, { "epoch": 0.57, "grad_norm": 0.31933516756017416, "learning_rate": 1.6461666051401865e-05, "loss": 0.0378, "step": 2595 }, { "epoch": 0.57, "grad_norm": 0.3431270003998748, "learning_rate": 1.6447663158868897e-05, "loss": 0.0368, "step": 2596 }, { "epoch": 0.57, "grad_norm": 0.3797904247919991, "learning_rate": 1.6433662064195378e-05, "loss": 0.0427, "step": 2597 }, { "epoch": 0.57, "grad_norm": 0.40201406952923524, "learning_rate": 1.641966277446735e-05, "loss": 0.0773, "step": 2598 }, { "epoch": 0.57, "grad_norm": 0.3433373423549848, "learning_rate": 1.6405665296769942e-05, "loss": 0.0334, "step": 2599 }, { "epoch": 0.57, "grad_norm": 0.41390034361412825, "learning_rate": 1.6391669638187355e-05, "loss": 0.0634, "step": 2600 }, { "epoch": 0.57, "grad_norm": 0.3183565198150102, "learning_rate": 1.6377675805802882e-05, "loss": 0.0384, "step": 2601 }, { "epoch": 0.57, "grad_norm": 0.34967596602657924, "learning_rate": 1.6363683806698896e-05, "loss": 0.0466, "step": 2602 }, { "epoch": 0.57, "grad_norm": 0.3991341204438977, "learning_rate": 1.6349693647956824e-05, "loss": 0.0667, "step": 2603 }, { "epoch": 0.57, "grad_norm": 0.3746840417493953, "learning_rate": 1.6335705336657176e-05, "loss": 0.0417, "step": 2604 }, { "epoch": 0.57, "grad_norm": 0.30447485423593174, "learning_rate": 1.632171887987952e-05, "loss": 0.0398, "step": 2605 }, { "epoch": 0.57, "grad_norm": 0.27851170434104927, "learning_rate": 1.6307734284702484e-05, "loss": 0.0446, "step": 2606 }, { "epoch": 0.57, "grad_norm": 0.3622261229518271, "learning_rate": 1.6293751558203764e-05, "loss": 0.0417, "step": 2607 }, { "epoch": 0.57, "grad_norm": 0.3671122128835008, "learning_rate": 1.6279770707460096e-05, "loss": 0.0481, "step": 2608 }, { "epoch": 0.57, "grad_norm": 0.29716355956407814, "learning_rate": 1.6265791739547276e-05, "loss": 0.0413, "step": 2609 }, { "epoch": 0.57, "grad_norm": 0.3207904459828551, "learning_rate": 1.625181466154015e-05, "loss": 0.0335, "step": 2610 }, { "epoch": 0.57, "grad_norm": 0.3842392433599277, "learning_rate": 1.62378394805126e-05, "loss": 0.0432, "step": 2611 }, { "epoch": 0.57, "grad_norm": 0.3508987306493617, "learning_rate": 1.6223866203537558e-05, "loss": 0.0439, "step": 2612 }, { "epoch": 0.57, "grad_norm": 0.3887107101284892, "learning_rate": 1.6209894837686974e-05, "loss": 0.0493, "step": 2613 }, { "epoch": 0.57, "grad_norm": 0.34506361480253667, "learning_rate": 1.6195925390031845e-05, "loss": 0.0385, "step": 2614 }, { "epoch": 0.57, "grad_norm": 0.3061896731845957, "learning_rate": 1.61819578676422e-05, "loss": 0.0326, "step": 2615 }, { "epoch": 0.57, "grad_norm": 0.32517755591683667, "learning_rate": 1.616799227758708e-05, "loss": 0.0453, "step": 2616 }, { "epoch": 0.57, "grad_norm": 0.3009063658647218, "learning_rate": 1.6154028626934548e-05, "loss": 0.0353, "step": 2617 }, { "epoch": 0.58, "grad_norm": 0.34151820130246274, "learning_rate": 1.6140066922751715e-05, "loss": 0.0423, "step": 2618 }, { "epoch": 0.58, "grad_norm": 0.3868738884362936, "learning_rate": 1.612610717210467e-05, "loss": 0.0526, "step": 2619 }, { "epoch": 0.58, "grad_norm": 0.29858877532184536, "learning_rate": 1.611214938205854e-05, "loss": 0.0415, "step": 2620 }, { "epoch": 0.58, "grad_norm": 0.30091310063498156, "learning_rate": 1.609819355967744e-05, "loss": 0.0368, "step": 2621 }, { "epoch": 0.58, "grad_norm": 0.43227457296134053, "learning_rate": 1.6084239712024492e-05, "loss": 0.0533, "step": 2622 }, { "epoch": 0.58, "grad_norm": 0.36195687927499137, "learning_rate": 1.6070287846161834e-05, "loss": 0.041, "step": 2623 }, { "epoch": 0.58, "grad_norm": 0.2939982699218436, "learning_rate": 1.6056337969150584e-05, "loss": 0.0298, "step": 2624 }, { "epoch": 0.58, "grad_norm": 0.33352795025660326, "learning_rate": 1.6042390088050864e-05, "loss": 0.0448, "step": 2625 }, { "epoch": 0.58, "grad_norm": 0.49970352800821244, "learning_rate": 1.6028444209921775e-05, "loss": 0.0493, "step": 2626 }, { "epoch": 0.58, "grad_norm": 0.3028238717966314, "learning_rate": 1.601450034182142e-05, "loss": 0.0352, "step": 2627 }, { "epoch": 0.58, "grad_norm": 0.33227471793707164, "learning_rate": 1.6000558490806877e-05, "loss": 0.0548, "step": 2628 }, { "epoch": 0.58, "grad_norm": 0.2640987190823617, "learning_rate": 1.59866186639342e-05, "loss": 0.0317, "step": 2629 }, { "epoch": 0.58, "grad_norm": 0.3609816875079501, "learning_rate": 1.597268086825842e-05, "loss": 0.0461, "step": 2630 }, { "epoch": 0.58, "grad_norm": 0.3688559266875854, "learning_rate": 1.5958745110833536e-05, "loss": 0.0521, "step": 2631 }, { "epoch": 0.58, "grad_norm": 0.366729450630457, "learning_rate": 1.5944811398712527e-05, "loss": 0.052, "step": 2632 }, { "epoch": 0.58, "grad_norm": 0.35198386689963646, "learning_rate": 1.5930879738947328e-05, "loss": 0.0561, "step": 2633 }, { "epoch": 0.58, "grad_norm": 0.32985824394095403, "learning_rate": 1.5916950138588834e-05, "loss": 0.0519, "step": 2634 }, { "epoch": 0.58, "grad_norm": 0.3273032320388355, "learning_rate": 1.5903022604686908e-05, "loss": 0.047, "step": 2635 }, { "epoch": 0.58, "grad_norm": 0.2991084361099232, "learning_rate": 1.5889097144290357e-05, "loss": 0.0357, "step": 2636 }, { "epoch": 0.58, "grad_norm": 0.3294866894962564, "learning_rate": 1.587517376444694e-05, "loss": 0.0484, "step": 2637 }, { "epoch": 0.58, "grad_norm": 0.34033152773148057, "learning_rate": 1.5861252472203367e-05, "loss": 0.0433, "step": 2638 }, { "epoch": 0.58, "grad_norm": 0.32795379350134185, "learning_rate": 1.5847333274605286e-05, "loss": 0.0445, "step": 2639 }, { "epoch": 0.58, "grad_norm": 0.44996915420142114, "learning_rate": 1.5833416178697298e-05, "loss": 0.082, "step": 2640 }, { "epoch": 0.58, "grad_norm": 0.310305219470742, "learning_rate": 1.5819501191522917e-05, "loss": 0.0355, "step": 2641 }, { "epoch": 0.58, "grad_norm": 0.2931443006346977, "learning_rate": 1.5805588320124607e-05, "loss": 0.0338, "step": 2642 }, { "epoch": 0.58, "grad_norm": 0.3440901958568532, "learning_rate": 1.5791677571543762e-05, "loss": 0.0477, "step": 2643 }, { "epoch": 0.58, "grad_norm": 0.43826924543888984, "learning_rate": 1.5777768952820697e-05, "loss": 0.0781, "step": 2644 }, { "epoch": 0.58, "grad_norm": 0.38238867828954753, "learning_rate": 1.576386247099465e-05, "loss": 0.0561, "step": 2645 }, { "epoch": 0.58, "grad_norm": 0.3758116521453048, "learning_rate": 1.5749958133103772e-05, "loss": 0.0616, "step": 2646 }, { "epoch": 0.58, "grad_norm": 0.2914145634669556, "learning_rate": 1.5736055946185137e-05, "loss": 0.0326, "step": 2647 }, { "epoch": 0.58, "grad_norm": 0.3146572398322556, "learning_rate": 1.572215591727473e-05, "loss": 0.0393, "step": 2648 }, { "epoch": 0.58, "grad_norm": 0.3275491907399743, "learning_rate": 1.570825805340743e-05, "loss": 0.0462, "step": 2649 }, { "epoch": 0.58, "grad_norm": 0.3821830245198099, "learning_rate": 1.5694362361617043e-05, "loss": 0.0479, "step": 2650 }, { "epoch": 0.58, "grad_norm": 0.38612555273676535, "learning_rate": 1.568046884893626e-05, "loss": 0.0585, "step": 2651 }, { "epoch": 0.58, "grad_norm": 0.35829701664699615, "learning_rate": 1.5666577522396658e-05, "loss": 0.0501, "step": 2652 }, { "epoch": 0.58, "grad_norm": 0.3198429364267204, "learning_rate": 1.565268838902875e-05, "loss": 0.039, "step": 2653 }, { "epoch": 0.58, "grad_norm": 0.33457842637181534, "learning_rate": 1.5638801455861893e-05, "loss": 0.0462, "step": 2654 }, { "epoch": 0.58, "grad_norm": 0.34628602363815075, "learning_rate": 1.5624916729924354e-05, "loss": 0.0505, "step": 2655 }, { "epoch": 0.58, "grad_norm": 0.28053922378784835, "learning_rate": 1.561103421824328e-05, "loss": 0.0305, "step": 2656 }, { "epoch": 0.58, "grad_norm": 0.3882918488804002, "learning_rate": 1.5597153927844693e-05, "loss": 0.0448, "step": 2657 }, { "epoch": 0.58, "grad_norm": 0.4070758311921904, "learning_rate": 1.5583275865753492e-05, "loss": 0.0483, "step": 2658 }, { "epoch": 0.58, "grad_norm": 0.3473070941618986, "learning_rate": 1.556940003899345e-05, "loss": 0.0444, "step": 2659 }, { "epoch": 0.58, "grad_norm": 0.4384647699142405, "learning_rate": 1.55555264545872e-05, "loss": 0.0759, "step": 2660 }, { "epoch": 0.58, "grad_norm": 0.321066527129186, "learning_rate": 1.5541655119556262e-05, "loss": 0.0393, "step": 2661 }, { "epoch": 0.58, "grad_norm": 0.32812148046876627, "learning_rate": 1.5527786040921e-05, "loss": 0.0379, "step": 2662 }, { "epoch": 0.58, "grad_norm": 0.2679898790309503, "learning_rate": 1.551391922570064e-05, "loss": 0.0298, "step": 2663 }, { "epoch": 0.59, "grad_norm": 0.4259319213815324, "learning_rate": 1.550005468091326e-05, "loss": 0.0726, "step": 2664 }, { "epoch": 0.59, "grad_norm": 0.2978625936497866, "learning_rate": 1.548619241357579e-05, "loss": 0.0356, "step": 2665 }, { "epoch": 0.59, "grad_norm": 0.4194003392848951, "learning_rate": 1.5472332430704007e-05, "loss": 0.0549, "step": 2666 }, { "epoch": 0.59, "grad_norm": 0.30088993684977516, "learning_rate": 1.545847473931254e-05, "loss": 0.0426, "step": 2667 }, { "epoch": 0.59, "grad_norm": 0.44909686217909617, "learning_rate": 1.5444619346414845e-05, "loss": 0.0627, "step": 2668 }, { "epoch": 0.59, "grad_norm": 0.3775639896284503, "learning_rate": 1.543076625902322e-05, "loss": 0.0522, "step": 2669 }, { "epoch": 0.59, "grad_norm": 0.3054246299862562, "learning_rate": 1.5416915484148805e-05, "loss": 0.0386, "step": 2670 }, { "epoch": 0.59, "grad_norm": 0.39041315056117604, "learning_rate": 1.5403067028801558e-05, "loss": 0.0507, "step": 2671 }, { "epoch": 0.59, "grad_norm": 0.31897717776320117, "learning_rate": 1.5389220899990267e-05, "loss": 0.0503, "step": 2672 }, { "epoch": 0.59, "grad_norm": 0.307557708802915, "learning_rate": 1.5375377104722545e-05, "loss": 0.043, "step": 2673 }, { "epoch": 0.59, "grad_norm": 0.3631592617515664, "learning_rate": 1.5361535650004818e-05, "loss": 0.0489, "step": 2674 }, { "epoch": 0.59, "grad_norm": 0.33431035022896627, "learning_rate": 1.5347696542842333e-05, "loss": 0.0384, "step": 2675 }, { "epoch": 0.59, "grad_norm": 0.3262609257458062, "learning_rate": 1.5333859790239148e-05, "loss": 0.042, "step": 2676 }, { "epoch": 0.59, "grad_norm": 0.3015285843470328, "learning_rate": 1.5320025399198125e-05, "loss": 0.0466, "step": 2677 }, { "epoch": 0.59, "grad_norm": 0.4392304937366175, "learning_rate": 1.530619337672093e-05, "loss": 0.0576, "step": 2678 }, { "epoch": 0.59, "grad_norm": 0.3149699353448315, "learning_rate": 1.5292363729808048e-05, "loss": 0.0371, "step": 2679 }, { "epoch": 0.59, "grad_norm": 0.3775983831905447, "learning_rate": 1.5278536465458738e-05, "loss": 0.0641, "step": 2680 }, { "epoch": 0.59, "grad_norm": 0.3950621366217049, "learning_rate": 1.5264711590671067e-05, "loss": 0.0524, "step": 2681 }, { "epoch": 0.59, "grad_norm": 0.3429047857260409, "learning_rate": 1.5250889112441889e-05, "loss": 0.0512, "step": 2682 }, { "epoch": 0.59, "grad_norm": 0.2749659869443981, "learning_rate": 1.5237069037766843e-05, "loss": 0.0361, "step": 2683 }, { "epoch": 0.59, "grad_norm": 0.3164070859356422, "learning_rate": 1.5223251373640354e-05, "loss": 0.0386, "step": 2684 }, { "epoch": 0.59, "grad_norm": 0.28880691828118865, "learning_rate": 1.5209436127055627e-05, "loss": 0.0301, "step": 2685 }, { "epoch": 0.59, "grad_norm": 0.3426884602907715, "learning_rate": 1.5195623305004637e-05, "loss": 0.037, "step": 2686 }, { "epoch": 0.59, "grad_norm": 0.32675668975035227, "learning_rate": 1.5181812914478146e-05, "loss": 0.039, "step": 2687 }, { "epoch": 0.59, "grad_norm": 0.36544928118454606, "learning_rate": 1.5168004962465681e-05, "loss": 0.0533, "step": 2688 }, { "epoch": 0.59, "grad_norm": 0.27877802104980726, "learning_rate": 1.5154199455955523e-05, "loss": 0.0308, "step": 2689 }, { "epoch": 0.59, "grad_norm": 0.3260941327124316, "learning_rate": 1.5140396401934725e-05, "loss": 0.0497, "step": 2690 }, { "epoch": 0.59, "grad_norm": 0.29156792479249044, "learning_rate": 1.5126595807389098e-05, "loss": 0.0418, "step": 2691 }, { "epoch": 0.59, "grad_norm": 0.3199316994412665, "learning_rate": 1.5112797679303206e-05, "loss": 0.0384, "step": 2692 }, { "epoch": 0.59, "grad_norm": 0.35845826192788094, "learning_rate": 1.5099002024660368e-05, "loss": 0.0579, "step": 2693 }, { "epoch": 0.59, "grad_norm": 0.3281439367007041, "learning_rate": 1.5085208850442649e-05, "loss": 0.0431, "step": 2694 }, { "epoch": 0.59, "grad_norm": 0.38845412705942295, "learning_rate": 1.5071418163630855e-05, "loss": 0.0475, "step": 2695 }, { "epoch": 0.59, "grad_norm": 0.38170965111248956, "learning_rate": 1.5057629971204546e-05, "loss": 0.0535, "step": 2696 }, { "epoch": 0.59, "grad_norm": 0.3093982061679945, "learning_rate": 1.5043844280142005e-05, "loss": 0.0325, "step": 2697 }, { "epoch": 0.59, "grad_norm": 0.28981425502855296, "learning_rate": 1.5030061097420255e-05, "loss": 0.0433, "step": 2698 }, { "epoch": 0.59, "grad_norm": 0.31525880655108296, "learning_rate": 1.5016280430015052e-05, "loss": 0.0359, "step": 2699 }, { "epoch": 0.59, "grad_norm": 0.3033155093331658, "learning_rate": 1.5002502284900871e-05, "loss": 0.0319, "step": 2700 }, { "epoch": 0.59, "grad_norm": 0.3376865101202634, "learning_rate": 1.4988726669050917e-05, "loss": 0.0498, "step": 2701 }, { "epoch": 0.59, "grad_norm": 0.41858342686618616, "learning_rate": 1.4974953589437117e-05, "loss": 0.0576, "step": 2702 }, { "epoch": 0.59, "grad_norm": 0.3182429118187405, "learning_rate": 1.4961183053030106e-05, "loss": 0.0403, "step": 2703 }, { "epoch": 0.59, "grad_norm": 0.3506493152510276, "learning_rate": 1.4947415066799247e-05, "loss": 0.0492, "step": 2704 }, { "epoch": 0.59, "grad_norm": 0.2502868020514377, "learning_rate": 1.4933649637712593e-05, "loss": 0.03, "step": 2705 }, { "epoch": 0.59, "grad_norm": 0.33765236497062373, "learning_rate": 1.4919886772736915e-05, "loss": 0.0325, "step": 2706 }, { "epoch": 0.59, "grad_norm": 0.29693614179525, "learning_rate": 1.4906126478837683e-05, "loss": 0.0356, "step": 2707 }, { "epoch": 0.59, "grad_norm": 0.3101886626634362, "learning_rate": 1.4892368762979067e-05, "loss": 0.0302, "step": 2708 }, { "epoch": 0.59, "grad_norm": 0.3130714198962901, "learning_rate": 1.4878613632123928e-05, "loss": 0.0318, "step": 2709 }, { "epoch": 0.6, "grad_norm": 0.3793925281604337, "learning_rate": 1.4864861093233827e-05, "loss": 0.0504, "step": 2710 }, { "epoch": 0.6, "grad_norm": 0.3023329996568408, "learning_rate": 1.4851111153269005e-05, "loss": 0.0421, "step": 2711 }, { "epoch": 0.6, "grad_norm": 0.3567602360211933, "learning_rate": 1.4837363819188379e-05, "loss": 0.0402, "step": 2712 }, { "epoch": 0.6, "grad_norm": 0.33821174488992095, "learning_rate": 1.4823619097949584e-05, "loss": 0.0482, "step": 2713 }, { "epoch": 0.6, "grad_norm": 0.3185716751967196, "learning_rate": 1.4809876996508897e-05, "loss": 0.0481, "step": 2714 }, { "epoch": 0.6, "grad_norm": 0.3710762421342836, "learning_rate": 1.4796137521821274e-05, "loss": 0.0469, "step": 2715 }, { "epoch": 0.6, "grad_norm": 0.2994532560113849, "learning_rate": 1.4782400680840352e-05, "loss": 0.0335, "step": 2716 }, { "epoch": 0.6, "grad_norm": 0.32841658125444484, "learning_rate": 1.4768666480518432e-05, "loss": 0.0407, "step": 2717 }, { "epoch": 0.6, "grad_norm": 0.3524634367329856, "learning_rate": 1.4754934927806473e-05, "loss": 0.0466, "step": 2718 }, { "epoch": 0.6, "grad_norm": 0.3246530817861148, "learning_rate": 1.4741206029654098e-05, "loss": 0.0337, "step": 2719 }, { "epoch": 0.6, "grad_norm": 0.3791839722292734, "learning_rate": 1.472747979300959e-05, "loss": 0.0434, "step": 2720 }, { "epoch": 0.6, "grad_norm": 0.336279440002278, "learning_rate": 1.4713756224819872e-05, "loss": 0.0417, "step": 2721 }, { "epoch": 0.6, "grad_norm": 0.2882385625480761, "learning_rate": 1.4700035332030545e-05, "loss": 0.0374, "step": 2722 }, { "epoch": 0.6, "grad_norm": 0.28726647419164664, "learning_rate": 1.468631712158582e-05, "loss": 0.044, "step": 2723 }, { "epoch": 0.6, "grad_norm": 0.49525735590255704, "learning_rate": 1.4672601600428578e-05, "loss": 0.0589, "step": 2724 }, { "epoch": 0.6, "grad_norm": 0.3239146725506776, "learning_rate": 1.465888877550032e-05, "loss": 0.0361, "step": 2725 }, { "epoch": 0.6, "grad_norm": 0.31194783543268184, "learning_rate": 1.4645178653741194e-05, "loss": 0.0417, "step": 2726 }, { "epoch": 0.6, "grad_norm": 0.39087053211734085, "learning_rate": 1.4631471242089978e-05, "loss": 0.0435, "step": 2727 }, { "epoch": 0.6, "grad_norm": 0.34705907989168727, "learning_rate": 1.4617766547484075e-05, "loss": 0.0473, "step": 2728 }, { "epoch": 0.6, "grad_norm": 0.32899760973128733, "learning_rate": 1.4604064576859513e-05, "loss": 0.0309, "step": 2729 }, { "epoch": 0.6, "grad_norm": 0.4117188795693934, "learning_rate": 1.459036533715095e-05, "loss": 0.0578, "step": 2730 }, { "epoch": 0.6, "grad_norm": 0.37993141000758085, "learning_rate": 1.4576668835291654e-05, "loss": 0.0381, "step": 2731 }, { "epoch": 0.6, "grad_norm": 0.35835120282232763, "learning_rate": 1.4562975078213504e-05, "loss": 0.0458, "step": 2732 }, { "epoch": 0.6, "grad_norm": 0.4180931726097148, "learning_rate": 1.4549284072846996e-05, "loss": 0.0523, "step": 2733 }, { "epoch": 0.6, "grad_norm": 0.368766958848891, "learning_rate": 1.4535595826121233e-05, "loss": 0.0461, "step": 2734 }, { "epoch": 0.6, "grad_norm": 0.37347475448631995, "learning_rate": 1.4521910344963918e-05, "loss": 0.0523, "step": 2735 }, { "epoch": 0.6, "grad_norm": 0.2811875141454974, "learning_rate": 1.450822763630136e-05, "loss": 0.0296, "step": 2736 }, { "epoch": 0.6, "grad_norm": 0.3818052430755057, "learning_rate": 1.4494547707058459e-05, "loss": 0.0525, "step": 2737 }, { "epoch": 0.6, "grad_norm": 0.2714308816365113, "learning_rate": 1.4480870564158704e-05, "loss": 0.0256, "step": 2738 }, { "epoch": 0.6, "grad_norm": 0.3279851547325715, "learning_rate": 1.44671962145242e-05, "loss": 0.0396, "step": 2739 }, { "epoch": 0.6, "grad_norm": 0.3168864080154981, "learning_rate": 1.4453524665075607e-05, "loss": 0.0375, "step": 2740 }, { "epoch": 0.6, "grad_norm": 0.2895756110984442, "learning_rate": 1.4439855922732182e-05, "loss": 0.0352, "step": 2741 }, { "epoch": 0.6, "grad_norm": 0.3121535049197099, "learning_rate": 1.4426189994411756e-05, "loss": 0.0447, "step": 2742 }, { "epoch": 0.6, "grad_norm": 0.3011568251237467, "learning_rate": 1.4412526887030745e-05, "loss": 0.0447, "step": 2743 }, { "epoch": 0.6, "grad_norm": 0.34098788844481187, "learning_rate": 1.4398866607504128e-05, "loss": 0.0515, "step": 2744 }, { "epoch": 0.6, "grad_norm": 0.3039500285882022, "learning_rate": 1.4385209162745453e-05, "loss": 0.035, "step": 2745 }, { "epoch": 0.6, "grad_norm": 0.36113540046057563, "learning_rate": 1.4371554559666843e-05, "loss": 0.0576, "step": 2746 }, { "epoch": 0.6, "grad_norm": 0.4289938442926769, "learning_rate": 1.4357902805178965e-05, "loss": 0.0448, "step": 2747 }, { "epoch": 0.6, "grad_norm": 0.39897293689915525, "learning_rate": 1.434425390619107e-05, "loss": 0.0571, "step": 2748 }, { "epoch": 0.6, "grad_norm": 0.3216986321138276, "learning_rate": 1.4330607869610945e-05, "loss": 0.0371, "step": 2749 }, { "epoch": 0.6, "grad_norm": 0.4036541127822171, "learning_rate": 1.431696470234493e-05, "loss": 0.051, "step": 2750 }, { "epoch": 0.6, "grad_norm": 0.31114819926250076, "learning_rate": 1.4303324411297918e-05, "loss": 0.0492, "step": 2751 }, { "epoch": 0.6, "grad_norm": 0.3387100998933343, "learning_rate": 1.4289687003373342e-05, "loss": 0.0468, "step": 2752 }, { "epoch": 0.6, "grad_norm": 0.2328871549688841, "learning_rate": 1.4276052485473177e-05, "loss": 0.0239, "step": 2753 }, { "epoch": 0.6, "grad_norm": 0.3202228653331459, "learning_rate": 1.4262420864497939e-05, "loss": 0.0432, "step": 2754 }, { "epoch": 0.61, "grad_norm": 0.2696762524200947, "learning_rate": 1.4248792147346668e-05, "loss": 0.0317, "step": 2755 }, { "epoch": 0.61, "grad_norm": 0.31967655879116696, "learning_rate": 1.4235166340916955e-05, "loss": 0.045, "step": 2756 }, { "epoch": 0.61, "grad_norm": 0.2970760568117321, "learning_rate": 1.4221543452104891e-05, "loss": 0.0308, "step": 2757 }, { "epoch": 0.61, "grad_norm": 0.42458892253619795, "learning_rate": 1.4207923487805108e-05, "loss": 0.0672, "step": 2758 }, { "epoch": 0.61, "grad_norm": 0.33230513328162714, "learning_rate": 1.4194306454910757e-05, "loss": 0.0397, "step": 2759 }, { "epoch": 0.61, "grad_norm": 0.42762986739777514, "learning_rate": 1.4180692360313494e-05, "loss": 0.0554, "step": 2760 }, { "epoch": 0.61, "grad_norm": 0.27931077295684037, "learning_rate": 1.4167081210903501e-05, "loss": 0.04, "step": 2761 }, { "epoch": 0.61, "grad_norm": 0.41547331587630704, "learning_rate": 1.4153473013569468e-05, "loss": 0.0458, "step": 2762 }, { "epoch": 0.61, "grad_norm": 0.3026604592932078, "learning_rate": 1.413986777519858e-05, "loss": 0.0389, "step": 2763 }, { "epoch": 0.61, "grad_norm": 0.40036944821808274, "learning_rate": 1.412626550267653e-05, "loss": 0.0751, "step": 2764 }, { "epoch": 0.61, "grad_norm": 0.47896027439194794, "learning_rate": 1.4112666202887522e-05, "loss": 0.0722, "step": 2765 }, { "epoch": 0.61, "grad_norm": 0.27886565471312885, "learning_rate": 1.4099069882714236e-05, "loss": 0.038, "step": 2766 }, { "epoch": 0.61, "grad_norm": 0.30368425797912313, "learning_rate": 1.4085476549037856e-05, "loss": 0.0446, "step": 2767 }, { "epoch": 0.61, "grad_norm": 0.2981647908149259, "learning_rate": 1.4071886208738053e-05, "loss": 0.0388, "step": 2768 }, { "epoch": 0.61, "grad_norm": 0.33278599396225034, "learning_rate": 1.4058298868692979e-05, "loss": 0.0512, "step": 2769 }, { "epoch": 0.61, "grad_norm": 0.33682190007711665, "learning_rate": 1.4044714535779269e-05, "loss": 0.0424, "step": 2770 }, { "epoch": 0.61, "grad_norm": 0.30438516973860114, "learning_rate": 1.403113321687204e-05, "loss": 0.0483, "step": 2771 }, { "epoch": 0.61, "grad_norm": 0.33630166165650016, "learning_rate": 1.4017554918844872e-05, "loss": 0.0401, "step": 2772 }, { "epoch": 0.61, "grad_norm": 0.340924135150723, "learning_rate": 1.4003979648569839e-05, "loss": 0.0489, "step": 2773 }, { "epoch": 0.61, "grad_norm": 0.28300769245153057, "learning_rate": 1.3990407412917462e-05, "loss": 0.0283, "step": 2774 }, { "epoch": 0.61, "grad_norm": 0.3466167661491617, "learning_rate": 1.3976838218756733e-05, "loss": 0.0487, "step": 2775 }, { "epoch": 0.61, "grad_norm": 0.3346830945121258, "learning_rate": 1.3963272072955106e-05, "loss": 0.0462, "step": 2776 }, { "epoch": 0.61, "grad_norm": 0.29863913799445185, "learning_rate": 1.3949708982378487e-05, "loss": 0.0415, "step": 2777 }, { "epoch": 0.61, "grad_norm": 0.253393359450197, "learning_rate": 1.3936148953891242e-05, "loss": 0.0321, "step": 2778 }, { "epoch": 0.61, "grad_norm": 0.3637070570173247, "learning_rate": 1.392259199435618e-05, "loss": 0.0524, "step": 2779 }, { "epoch": 0.61, "grad_norm": 0.3759873425808452, "learning_rate": 1.3909038110634567e-05, "loss": 0.0573, "step": 2780 }, { "epoch": 0.61, "grad_norm": 0.28541006262766017, "learning_rate": 1.3895487309586097e-05, "loss": 0.0416, "step": 2781 }, { "epoch": 0.61, "grad_norm": 0.40106802601906233, "learning_rate": 1.388193959806893e-05, "loss": 0.0618, "step": 2782 }, { "epoch": 0.61, "grad_norm": 0.3662019652221195, "learning_rate": 1.3868394982939636e-05, "loss": 0.0489, "step": 2783 }, { "epoch": 0.61, "grad_norm": 0.34370404716597247, "learning_rate": 1.3854853471053225e-05, "loss": 0.0477, "step": 2784 }, { "epoch": 0.61, "grad_norm": 0.3543753733624572, "learning_rate": 1.3841315069263146e-05, "loss": 0.0482, "step": 2785 }, { "epoch": 0.61, "grad_norm": 0.35525774719252445, "learning_rate": 1.3827779784421262e-05, "loss": 0.0406, "step": 2786 }, { "epoch": 0.61, "grad_norm": 0.3808691561494414, "learning_rate": 1.3814247623377868e-05, "loss": 0.0578, "step": 2787 }, { "epoch": 0.61, "grad_norm": 0.23788466046753176, "learning_rate": 1.3800718592981668e-05, "loss": 0.0244, "step": 2788 }, { "epoch": 0.61, "grad_norm": 0.3964649815819659, "learning_rate": 1.3787192700079792e-05, "loss": 0.0508, "step": 2789 }, { "epoch": 0.61, "grad_norm": 0.26043611771738767, "learning_rate": 1.377366995151777e-05, "loss": 0.0312, "step": 2790 }, { "epoch": 0.61, "grad_norm": 0.37760732145197373, "learning_rate": 1.3760150354139558e-05, "loss": 0.0401, "step": 2791 }, { "epoch": 0.61, "grad_norm": 0.2952347920625057, "learning_rate": 1.3746633914787504e-05, "loss": 0.036, "step": 2792 }, { "epoch": 0.61, "grad_norm": 0.27096026759420505, "learning_rate": 1.3733120640302358e-05, "loss": 0.0276, "step": 2793 }, { "epoch": 0.61, "grad_norm": 0.28976262851872026, "learning_rate": 1.3719610537523274e-05, "loss": 0.0368, "step": 2794 }, { "epoch": 0.61, "grad_norm": 0.2947502773848208, "learning_rate": 1.3706103613287796e-05, "loss": 0.0366, "step": 2795 }, { "epoch": 0.61, "grad_norm": 0.36182000985970075, "learning_rate": 1.369259987443186e-05, "loss": 0.0538, "step": 2796 }, { "epoch": 0.61, "grad_norm": 0.2963917463149579, "learning_rate": 1.3679099327789794e-05, "loss": 0.0267, "step": 2797 }, { "epoch": 0.61, "grad_norm": 0.3148398893701771, "learning_rate": 1.3665601980194297e-05, "loss": 0.0496, "step": 2798 }, { "epoch": 0.61, "grad_norm": 0.3313389348355963, "learning_rate": 1.3652107838476476e-05, "loss": 0.0375, "step": 2799 }, { "epoch": 0.61, "grad_norm": 0.2519413849217148, "learning_rate": 1.3638616909465791e-05, "loss": 0.0359, "step": 2800 }, { "epoch": 0.62, "grad_norm": 0.32790310486681473, "learning_rate": 1.3625129199990083e-05, "loss": 0.0364, "step": 2801 }, { "epoch": 0.62, "grad_norm": 0.37081440902313717, "learning_rate": 1.3611644716875568e-05, "loss": 0.061, "step": 2802 }, { "epoch": 0.62, "grad_norm": 0.3476633542652982, "learning_rate": 1.3598163466946823e-05, "loss": 0.0408, "step": 2803 }, { "epoch": 0.62, "grad_norm": 0.3780332911227297, "learning_rate": 1.3584685457026789e-05, "loss": 0.0504, "step": 2804 }, { "epoch": 0.62, "grad_norm": 0.31770609896556845, "learning_rate": 1.3571210693936774e-05, "loss": 0.0388, "step": 2805 }, { "epoch": 0.62, "grad_norm": 0.31575286182698364, "learning_rate": 1.3557739184496435e-05, "loss": 0.0437, "step": 2806 }, { "epoch": 0.62, "grad_norm": 0.3594767177759614, "learning_rate": 1.3544270935523778e-05, "loss": 0.0447, "step": 2807 }, { "epoch": 0.62, "grad_norm": 0.36754325109400565, "learning_rate": 1.3530805953835182e-05, "loss": 0.0515, "step": 2808 }, { "epoch": 0.62, "grad_norm": 0.4071580934482072, "learning_rate": 1.351734424624535e-05, "loss": 0.0578, "step": 2809 }, { "epoch": 0.62, "grad_norm": 0.3646757576366893, "learning_rate": 1.3503885819567335e-05, "loss": 0.0547, "step": 2810 }, { "epoch": 0.62, "grad_norm": 0.2684636603321318, "learning_rate": 1.3490430680612528e-05, "loss": 0.0354, "step": 2811 }, { "epoch": 0.62, "grad_norm": 0.28134491431539715, "learning_rate": 1.3476978836190658e-05, "loss": 0.0264, "step": 2812 }, { "epoch": 0.62, "grad_norm": 0.33385682598409755, "learning_rate": 1.3463530293109783e-05, "loss": 0.0379, "step": 2813 }, { "epoch": 0.62, "grad_norm": 0.32932744636579747, "learning_rate": 1.34500850581763e-05, "loss": 0.0367, "step": 2814 }, { "epoch": 0.62, "grad_norm": 0.37019697552233266, "learning_rate": 1.3436643138194918e-05, "loss": 0.0495, "step": 2815 }, { "epoch": 0.62, "grad_norm": 0.358547314897313, "learning_rate": 1.3423204539968677e-05, "loss": 0.0321, "step": 2816 }, { "epoch": 0.62, "grad_norm": 0.2801113922919793, "learning_rate": 1.3409769270298934e-05, "loss": 0.029, "step": 2817 }, { "epoch": 0.62, "grad_norm": 0.3508957261880292, "learning_rate": 1.3396337335985361e-05, "loss": 0.0336, "step": 2818 }, { "epoch": 0.62, "grad_norm": 0.3909390712800989, "learning_rate": 1.3382908743825947e-05, "loss": 0.0522, "step": 2819 }, { "epoch": 0.62, "grad_norm": 0.2928226106459401, "learning_rate": 1.336948350061698e-05, "loss": 0.0346, "step": 2820 }, { "epoch": 0.62, "grad_norm": 0.2954126026903477, "learning_rate": 1.335606161315306e-05, "loss": 0.0261, "step": 2821 }, { "epoch": 0.62, "grad_norm": 0.2717804015217529, "learning_rate": 1.3342643088227085e-05, "loss": 0.0398, "step": 2822 }, { "epoch": 0.62, "grad_norm": 0.33073288121978467, "learning_rate": 1.3329227932630255e-05, "loss": 0.0388, "step": 2823 }, { "epoch": 0.62, "grad_norm": 0.33370518308471864, "learning_rate": 1.3315816153152055e-05, "loss": 0.0439, "step": 2824 }, { "epoch": 0.62, "grad_norm": 0.30608677504159165, "learning_rate": 1.3302407756580278e-05, "loss": 0.0387, "step": 2825 }, { "epoch": 0.62, "grad_norm": 0.33089457778381837, "learning_rate": 1.3289002749700992e-05, "loss": 0.0325, "step": 2826 }, { "epoch": 0.62, "grad_norm": 0.4467097997419248, "learning_rate": 1.3275601139298556e-05, "loss": 0.0457, "step": 2827 }, { "epoch": 0.62, "grad_norm": 0.3706083636868158, "learning_rate": 1.3262202932155602e-05, "loss": 0.0484, "step": 2828 }, { "epoch": 0.62, "grad_norm": 0.3404835345982751, "learning_rate": 1.3248808135053048e-05, "loss": 0.0498, "step": 2829 }, { "epoch": 0.62, "grad_norm": 0.2949020555922916, "learning_rate": 1.3235416754770082e-05, "loss": 0.0475, "step": 2830 }, { "epoch": 0.62, "grad_norm": 0.377446064790745, "learning_rate": 1.3222028798084165e-05, "loss": 0.0534, "step": 2831 }, { "epoch": 0.62, "grad_norm": 0.32032310654361856, "learning_rate": 1.3208644271771026e-05, "loss": 0.0443, "step": 2832 }, { "epoch": 0.62, "grad_norm": 0.37997190318260876, "learning_rate": 1.3195263182604638e-05, "loss": 0.0436, "step": 2833 }, { "epoch": 0.62, "grad_norm": 0.3834620822777257, "learning_rate": 1.3181885537357277e-05, "loss": 0.0493, "step": 2834 }, { "epoch": 0.62, "grad_norm": 0.2942071173338933, "learning_rate": 1.3168511342799444e-05, "loss": 0.0337, "step": 2835 }, { "epoch": 0.62, "grad_norm": 0.30761294891828145, "learning_rate": 1.3155140605699894e-05, "loss": 0.0422, "step": 2836 }, { "epoch": 0.62, "grad_norm": 0.3319158914665372, "learning_rate": 1.3141773332825647e-05, "loss": 0.0331, "step": 2837 }, { "epoch": 0.62, "grad_norm": 0.44578035773700425, "learning_rate": 1.3128409530941957e-05, "loss": 0.0581, "step": 2838 }, { "epoch": 0.62, "grad_norm": 0.32801149312387384, "learning_rate": 1.3115049206812325e-05, "loss": 0.0507, "step": 2839 }, { "epoch": 0.62, "grad_norm": 0.382789377565198, "learning_rate": 1.3101692367198498e-05, "loss": 0.0921, "step": 2840 }, { "epoch": 0.62, "grad_norm": 0.30446166447700507, "learning_rate": 1.3088339018860439e-05, "loss": 0.0435, "step": 2841 }, { "epoch": 0.62, "grad_norm": 0.39309102804653384, "learning_rate": 1.307498916855638e-05, "loss": 0.0453, "step": 2842 }, { "epoch": 0.62, "grad_norm": 0.2857678139199307, "learning_rate": 1.3061642823042757e-05, "loss": 0.039, "step": 2843 }, { "epoch": 0.62, "grad_norm": 0.3226794735584289, "learning_rate": 1.3048299989074234e-05, "loss": 0.0423, "step": 2844 }, { "epoch": 0.62, "grad_norm": 0.22917456866136676, "learning_rate": 1.3034960673403699e-05, "loss": 0.0292, "step": 2845 }, { "epoch": 0.63, "grad_norm": 0.2897688766383443, "learning_rate": 1.3021624882782262e-05, "loss": 0.0356, "step": 2846 }, { "epoch": 0.63, "grad_norm": 0.3024013533413087, "learning_rate": 1.3008292623959253e-05, "loss": 0.0429, "step": 2847 }, { "epoch": 0.63, "grad_norm": 0.3064032443695261, "learning_rate": 1.2994963903682205e-05, "loss": 0.0392, "step": 2848 }, { "epoch": 0.63, "grad_norm": 0.30547864610100867, "learning_rate": 1.2981638728696868e-05, "loss": 0.0475, "step": 2849 }, { "epoch": 0.63, "grad_norm": 0.3186415609393602, "learning_rate": 1.2968317105747189e-05, "loss": 0.043, "step": 2850 }, { "epoch": 0.63, "grad_norm": 0.32282427137613623, "learning_rate": 1.2954999041575331e-05, "loss": 0.0412, "step": 2851 }, { "epoch": 0.63, "grad_norm": 0.3826593365122257, "learning_rate": 1.2941684542921646e-05, "loss": 0.0493, "step": 2852 }, { "epoch": 0.63, "grad_norm": 0.2961884893065571, "learning_rate": 1.2928373616524682e-05, "loss": 0.0288, "step": 2853 }, { "epoch": 0.63, "grad_norm": 0.3475935590133547, "learning_rate": 1.291506626912118e-05, "loss": 0.0475, "step": 2854 }, { "epoch": 0.63, "grad_norm": 0.35536169179632393, "learning_rate": 1.290176250744607e-05, "loss": 0.0425, "step": 2855 }, { "epoch": 0.63, "grad_norm": 0.3053531955003352, "learning_rate": 1.2888462338232466e-05, "loss": 0.0371, "step": 2856 }, { "epoch": 0.63, "grad_norm": 0.2528767475118705, "learning_rate": 1.287516576821167e-05, "loss": 0.0245, "step": 2857 }, { "epoch": 0.63, "grad_norm": 0.2888324182721138, "learning_rate": 1.2861872804113154e-05, "loss": 0.0376, "step": 2858 }, { "epoch": 0.63, "grad_norm": 0.3111029151397963, "learning_rate": 1.284858345266456e-05, "loss": 0.0434, "step": 2859 }, { "epoch": 0.63, "grad_norm": 0.42479953105033164, "learning_rate": 1.2835297720591729e-05, "loss": 0.0472, "step": 2860 }, { "epoch": 0.63, "grad_norm": 0.2913783456363997, "learning_rate": 1.282201561461864e-05, "loss": 0.0347, "step": 2861 }, { "epoch": 0.63, "grad_norm": 0.37374209985708284, "learning_rate": 1.2808737141467451e-05, "loss": 0.0481, "step": 2862 }, { "epoch": 0.63, "grad_norm": 0.3775900958717164, "learning_rate": 1.2795462307858478e-05, "loss": 0.0506, "step": 2863 }, { "epoch": 0.63, "grad_norm": 0.31289014304287066, "learning_rate": 1.2782191120510196e-05, "loss": 0.0361, "step": 2864 }, { "epoch": 0.63, "grad_norm": 0.2958720733681559, "learning_rate": 1.2768923586139232e-05, "loss": 0.0393, "step": 2865 }, { "epoch": 0.63, "grad_norm": 0.3559906869055891, "learning_rate": 1.275565971146037e-05, "loss": 0.048, "step": 2866 }, { "epoch": 0.63, "grad_norm": 0.2940382066111115, "learning_rate": 1.2742399503186528e-05, "loss": 0.0433, "step": 2867 }, { "epoch": 0.63, "grad_norm": 0.3047524162975901, "learning_rate": 1.2729142968028793e-05, "loss": 0.0368, "step": 2868 }, { "epoch": 0.63, "grad_norm": 0.28501375112905175, "learning_rate": 1.2715890112696379e-05, "loss": 0.0292, "step": 2869 }, { "epoch": 0.63, "grad_norm": 0.2598049162784958, "learning_rate": 1.2702640943896625e-05, "loss": 0.0312, "step": 2870 }, { "epoch": 0.63, "grad_norm": 0.3311882312332492, "learning_rate": 1.2689395468335027e-05, "loss": 0.0424, "step": 2871 }, { "epoch": 0.63, "grad_norm": 0.3100638922291191, "learning_rate": 1.2676153692715195e-05, "loss": 0.0322, "step": 2872 }, { "epoch": 0.63, "grad_norm": 0.3158769536699534, "learning_rate": 1.2662915623738874e-05, "loss": 0.0401, "step": 2873 }, { "epoch": 0.63, "grad_norm": 0.3905736317237826, "learning_rate": 1.2649681268105933e-05, "loss": 0.0494, "step": 2874 }, { "epoch": 0.63, "grad_norm": 0.3623962492290295, "learning_rate": 1.263645063251436e-05, "loss": 0.0494, "step": 2875 }, { "epoch": 0.63, "grad_norm": 0.3047706172742518, "learning_rate": 1.2623223723660258e-05, "loss": 0.0402, "step": 2876 }, { "epoch": 0.63, "grad_norm": 0.29039474153923495, "learning_rate": 1.2610000548237851e-05, "loss": 0.0345, "step": 2877 }, { "epoch": 0.63, "grad_norm": 0.42741917183463485, "learning_rate": 1.259678111293947e-05, "loss": 0.0456, "step": 2878 }, { "epoch": 0.63, "grad_norm": 0.36327702509566523, "learning_rate": 1.2583565424455552e-05, "loss": 0.0591, "step": 2879 }, { "epoch": 0.63, "grad_norm": 0.25609254768094003, "learning_rate": 1.2570353489474637e-05, "loss": 0.0385, "step": 2880 }, { "epoch": 0.63, "grad_norm": 0.3580431515431179, "learning_rate": 1.2557145314683364e-05, "loss": 0.0451, "step": 2881 }, { "epoch": 0.63, "grad_norm": 0.24723126894418496, "learning_rate": 1.254394090676647e-05, "loss": 0.0337, "step": 2882 }, { "epoch": 0.63, "grad_norm": 0.24767297062551186, "learning_rate": 1.2530740272406792e-05, "loss": 0.0307, "step": 2883 }, { "epoch": 0.63, "grad_norm": 0.2776789184659262, "learning_rate": 1.2517543418285247e-05, "loss": 0.0334, "step": 2884 }, { "epoch": 0.63, "grad_norm": 0.32881618544241237, "learning_rate": 1.2504350351080845e-05, "loss": 0.0403, "step": 2885 }, { "epoch": 0.63, "grad_norm": 0.3183871649098128, "learning_rate": 1.2491161077470682e-05, "loss": 0.0448, "step": 2886 }, { "epoch": 0.63, "grad_norm": 0.2888848042097438, "learning_rate": 1.2477975604129929e-05, "loss": 0.0389, "step": 2887 }, { "epoch": 0.63, "grad_norm": 0.3045754593476319, "learning_rate": 1.2464793937731831e-05, "loss": 0.0383, "step": 2888 }, { "epoch": 0.63, "grad_norm": 0.42008218588417245, "learning_rate": 1.2451616084947714e-05, "loss": 0.0645, "step": 2889 }, { "epoch": 0.63, "grad_norm": 0.2813345199320012, "learning_rate": 1.243844205244697e-05, "loss": 0.0334, "step": 2890 }, { "epoch": 0.63, "grad_norm": 0.37685954459211185, "learning_rate": 1.2425271846897053e-05, "loss": 0.0558, "step": 2891 }, { "epoch": 0.64, "grad_norm": 0.4008627736124671, "learning_rate": 1.2412105474963491e-05, "loss": 0.0533, "step": 2892 }, { "epoch": 0.64, "grad_norm": 0.4143105789384026, "learning_rate": 1.2398942943309855e-05, "loss": 0.066, "step": 2893 }, { "epoch": 0.64, "grad_norm": 0.34944816154299846, "learning_rate": 1.2385784258597796e-05, "loss": 0.0427, "step": 2894 }, { "epoch": 0.64, "grad_norm": 0.3303130854129566, "learning_rate": 1.2372629427487e-05, "loss": 0.05, "step": 2895 }, { "epoch": 0.64, "grad_norm": 0.2697725459850731, "learning_rate": 1.235947845663521e-05, "loss": 0.027, "step": 2896 }, { "epoch": 0.64, "grad_norm": 0.3182834932663021, "learning_rate": 1.2346331352698206e-05, "loss": 0.0363, "step": 2897 }, { "epoch": 0.64, "grad_norm": 0.24598310647552704, "learning_rate": 1.2333188122329824e-05, "loss": 0.0408, "step": 2898 }, { "epoch": 0.64, "grad_norm": 0.29446130119261393, "learning_rate": 1.2320048772181932e-05, "loss": 0.0401, "step": 2899 }, { "epoch": 0.64, "grad_norm": 0.27915071784669715, "learning_rate": 1.2306913308904435e-05, "loss": 0.0265, "step": 2900 }, { "epoch": 0.64, "grad_norm": 0.31153922205430945, "learning_rate": 1.2293781739145274e-05, "loss": 0.0347, "step": 2901 }, { "epoch": 0.64, "grad_norm": 0.2740717752120284, "learning_rate": 1.2280654069550404e-05, "loss": 0.0375, "step": 2902 }, { "epoch": 0.64, "grad_norm": 0.32661056712366027, "learning_rate": 1.2267530306763837e-05, "loss": 0.0329, "step": 2903 }, { "epoch": 0.64, "grad_norm": 0.25680366284318407, "learning_rate": 1.2254410457427581e-05, "loss": 0.0272, "step": 2904 }, { "epoch": 0.64, "grad_norm": 0.40490566249909893, "learning_rate": 1.2241294528181678e-05, "loss": 0.0406, "step": 2905 }, { "epoch": 0.64, "grad_norm": 0.24799753434259125, "learning_rate": 1.2228182525664175e-05, "loss": 0.0265, "step": 2906 }, { "epoch": 0.64, "grad_norm": 0.32278382224724494, "learning_rate": 1.2215074456511136e-05, "loss": 0.0349, "step": 2907 }, { "epoch": 0.64, "grad_norm": 0.2880573193834743, "learning_rate": 1.2201970327356639e-05, "loss": 0.0378, "step": 2908 }, { "epoch": 0.64, "grad_norm": 0.3387098689255451, "learning_rate": 1.2188870144832758e-05, "loss": 0.0433, "step": 2909 }, { "epoch": 0.64, "grad_norm": 0.3689953957324699, "learning_rate": 1.217577391556958e-05, "loss": 0.0433, "step": 2910 }, { "epoch": 0.64, "grad_norm": 0.28615291381637514, "learning_rate": 1.2162681646195187e-05, "loss": 0.0269, "step": 2911 }, { "epoch": 0.64, "grad_norm": 0.37840625375641673, "learning_rate": 1.2149593343335658e-05, "loss": 0.0417, "step": 2912 }, { "epoch": 0.64, "grad_norm": 0.33880744429424287, "learning_rate": 1.2136509013615063e-05, "loss": 0.0418, "step": 2913 }, { "epoch": 0.64, "grad_norm": 0.2916809260197858, "learning_rate": 1.2123428663655457e-05, "loss": 0.0345, "step": 2914 }, { "epoch": 0.64, "grad_norm": 0.349445347161046, "learning_rate": 1.211035230007689e-05, "loss": 0.0398, "step": 2915 }, { "epoch": 0.64, "grad_norm": 0.30144971408574184, "learning_rate": 1.209727992949739e-05, "loss": 0.0345, "step": 2916 }, { "epoch": 0.64, "grad_norm": 0.28327437195210436, "learning_rate": 1.2084211558532958e-05, "loss": 0.0269, "step": 2917 }, { "epoch": 0.64, "grad_norm": 0.35992758188097324, "learning_rate": 1.2071147193797578e-05, "loss": 0.0456, "step": 2918 }, { "epoch": 0.64, "grad_norm": 0.3139806835397025, "learning_rate": 1.2058086841903211e-05, "loss": 0.0362, "step": 2919 }, { "epoch": 0.64, "grad_norm": 0.33326614964467255, "learning_rate": 1.204503050945978e-05, "loss": 0.0426, "step": 2920 }, { "epoch": 0.64, "grad_norm": 0.26053383893253446, "learning_rate": 1.2031978203075172e-05, "loss": 0.032, "step": 2921 }, { "epoch": 0.64, "grad_norm": 0.2218677210621221, "learning_rate": 1.2018929929355241e-05, "loss": 0.0185, "step": 2922 }, { "epoch": 0.64, "grad_norm": 0.4643167666122263, "learning_rate": 1.2005885694903796e-05, "loss": 0.0613, "step": 2923 }, { "epoch": 0.64, "grad_norm": 0.3133421636518249, "learning_rate": 1.1992845506322607e-05, "loss": 0.0306, "step": 2924 }, { "epoch": 0.64, "grad_norm": 0.3368456784936427, "learning_rate": 1.1979809370211392e-05, "loss": 0.0441, "step": 2925 }, { "epoch": 0.64, "grad_norm": 0.3515826537395515, "learning_rate": 1.196677729316782e-05, "loss": 0.0424, "step": 2926 }, { "epoch": 0.64, "grad_norm": 0.32891554060474937, "learning_rate": 1.1953749281787502e-05, "loss": 0.0388, "step": 2927 }, { "epoch": 0.64, "grad_norm": 0.383314698567998, "learning_rate": 1.194072534266399e-05, "loss": 0.0481, "step": 2928 }, { "epoch": 0.64, "grad_norm": 0.2887688253301843, "learning_rate": 1.1927705482388794e-05, "loss": 0.0403, "step": 2929 }, { "epoch": 0.64, "grad_norm": 0.3175047889483473, "learning_rate": 1.1914689707551337e-05, "loss": 0.0392, "step": 2930 }, { "epoch": 0.64, "grad_norm": 0.3006459960029491, "learning_rate": 1.1901678024738983e-05, "loss": 0.0337, "step": 2931 }, { "epoch": 0.64, "grad_norm": 0.3369660599983346, "learning_rate": 1.1888670440537025e-05, "loss": 0.0335, "step": 2932 }, { "epoch": 0.64, "grad_norm": 0.32840089325379707, "learning_rate": 1.1875666961528679e-05, "loss": 0.0336, "step": 2933 }, { "epoch": 0.64, "grad_norm": 0.31666973116713637, "learning_rate": 1.1862667594295086e-05, "loss": 0.043, "step": 2934 }, { "epoch": 0.64, "grad_norm": 0.28063185632481785, "learning_rate": 1.1849672345415306e-05, "loss": 0.0425, "step": 2935 }, { "epoch": 0.64, "grad_norm": 0.34532325693352134, "learning_rate": 1.1836681221466308e-05, "loss": 0.0393, "step": 2936 }, { "epoch": 0.65, "grad_norm": 0.30076662250967534, "learning_rate": 1.1823694229022995e-05, "loss": 0.0391, "step": 2937 }, { "epoch": 0.65, "grad_norm": 0.3062019003833736, "learning_rate": 1.181071137465815e-05, "loss": 0.035, "step": 2938 }, { "epoch": 0.65, "grad_norm": 0.37222397001806645, "learning_rate": 1.1797732664942481e-05, "loss": 0.0744, "step": 2939 }, { "epoch": 0.65, "grad_norm": 0.2864970600075523, "learning_rate": 1.1784758106444594e-05, "loss": 0.039, "step": 2940 }, { "epoch": 0.65, "grad_norm": 0.2879502575008263, "learning_rate": 1.1771787705730983e-05, "loss": 0.0304, "step": 2941 }, { "epoch": 0.65, "grad_norm": 0.31231118697034904, "learning_rate": 1.175882146936606e-05, "loss": 0.0406, "step": 2942 }, { "epoch": 0.65, "grad_norm": 0.3410455347601863, "learning_rate": 1.1745859403912108e-05, "loss": 0.0366, "step": 2943 }, { "epoch": 0.65, "grad_norm": 0.30274079838983087, "learning_rate": 1.1732901515929312e-05, "loss": 0.0391, "step": 2944 }, { "epoch": 0.65, "grad_norm": 0.3174696348718507, "learning_rate": 1.1719947811975732e-05, "loss": 0.0403, "step": 2945 }, { "epoch": 0.65, "grad_norm": 0.31613914376879376, "learning_rate": 1.1706998298607325e-05, "loss": 0.0428, "step": 2946 }, { "epoch": 0.65, "grad_norm": 0.43372456407309057, "learning_rate": 1.1694052982377915e-05, "loss": 0.0508, "step": 2947 }, { "epoch": 0.65, "grad_norm": 0.27547772700698475, "learning_rate": 1.1681111869839209e-05, "loss": 0.0354, "step": 2948 }, { "epoch": 0.65, "grad_norm": 0.23440111332696512, "learning_rate": 1.166817496754078e-05, "loss": 0.0248, "step": 2949 }, { "epoch": 0.65, "grad_norm": 0.6419558309085079, "learning_rate": 1.1655242282030068e-05, "loss": 0.0934, "step": 2950 }, { "epoch": 0.65, "grad_norm": 0.4207470437930287, "learning_rate": 1.1642313819852405e-05, "loss": 0.0574, "step": 2951 }, { "epoch": 0.65, "grad_norm": 0.2677592757227379, "learning_rate": 1.1629389587550939e-05, "loss": 0.0438, "step": 2952 }, { "epoch": 0.65, "grad_norm": 0.30039862038000953, "learning_rate": 1.1616469591666725e-05, "loss": 0.0329, "step": 2953 }, { "epoch": 0.65, "grad_norm": 0.2554436429253239, "learning_rate": 1.1603553838738635e-05, "loss": 0.0307, "step": 2954 }, { "epoch": 0.65, "grad_norm": 0.3277463772091227, "learning_rate": 1.1590642335303417e-05, "loss": 0.0348, "step": 2955 }, { "epoch": 0.65, "grad_norm": 0.28809162716177045, "learning_rate": 1.1577735087895664e-05, "loss": 0.0267, "step": 2956 }, { "epoch": 0.65, "grad_norm": 0.2916931356045432, "learning_rate": 1.1564832103047818e-05, "loss": 0.0361, "step": 2957 }, { "epoch": 0.65, "grad_norm": 0.2786283695535751, "learning_rate": 1.1551933387290149e-05, "loss": 0.0338, "step": 2958 }, { "epoch": 0.65, "grad_norm": 0.3756814169764887, "learning_rate": 1.1539038947150783e-05, "loss": 0.0556, "step": 2959 }, { "epoch": 0.65, "grad_norm": 0.3044743158896093, "learning_rate": 1.152614878915567e-05, "loss": 0.0357, "step": 2960 }, { "epoch": 0.65, "grad_norm": 0.2520853479294366, "learning_rate": 1.1513262919828603e-05, "loss": 0.0318, "step": 2961 }, { "epoch": 0.65, "grad_norm": 0.30014479788558823, "learning_rate": 1.1500381345691192e-05, "loss": 0.0374, "step": 2962 }, { "epoch": 0.65, "grad_norm": 0.37943443373256613, "learning_rate": 1.1487504073262886e-05, "loss": 0.0519, "step": 2963 }, { "epoch": 0.65, "grad_norm": 0.26828896988529727, "learning_rate": 1.1474631109060957e-05, "loss": 0.0327, "step": 2964 }, { "epoch": 0.65, "grad_norm": 0.3523671584378296, "learning_rate": 1.1461762459600476e-05, "loss": 0.0442, "step": 2965 }, { "epoch": 0.65, "grad_norm": 0.26789693565200207, "learning_rate": 1.1448898131394364e-05, "loss": 0.0293, "step": 2966 }, { "epoch": 0.65, "grad_norm": 0.2616347989618018, "learning_rate": 1.1436038130953317e-05, "loss": 0.0356, "step": 2967 }, { "epoch": 0.65, "grad_norm": 0.23439360385810168, "learning_rate": 1.142318246478588e-05, "loss": 0.0297, "step": 2968 }, { "epoch": 0.65, "grad_norm": 0.27644543837366153, "learning_rate": 1.1410331139398365e-05, "loss": 0.0425, "step": 2969 }, { "epoch": 0.65, "grad_norm": 0.31879390713338973, "learning_rate": 1.1397484161294924e-05, "loss": 0.0387, "step": 2970 }, { "epoch": 0.65, "grad_norm": 0.2862545054093661, "learning_rate": 1.138464153697747e-05, "loss": 0.0386, "step": 2971 }, { "epoch": 0.65, "grad_norm": 0.3208737689501007, "learning_rate": 1.1371803272945759e-05, "loss": 0.0434, "step": 2972 }, { "epoch": 0.65, "grad_norm": 0.3828787259796405, "learning_rate": 1.1358969375697297e-05, "loss": 0.0419, "step": 2973 }, { "epoch": 0.65, "grad_norm": 0.34019136737319144, "learning_rate": 1.1346139851727412e-05, "loss": 0.0394, "step": 2974 }, { "epoch": 0.65, "grad_norm": 0.28714105859439243, "learning_rate": 1.1333314707529188e-05, "loss": 0.0325, "step": 2975 }, { "epoch": 0.65, "grad_norm": 0.24338669906714824, "learning_rate": 1.1320493949593528e-05, "loss": 0.0251, "step": 2976 }, { "epoch": 0.65, "grad_norm": 0.23301690332500383, "learning_rate": 1.1307677584409076e-05, "loss": 0.0299, "step": 2977 }, { "epoch": 0.65, "grad_norm": 0.3014458336750867, "learning_rate": 1.1294865618462294e-05, "loss": 0.0355, "step": 2978 }, { "epoch": 0.65, "grad_norm": 0.2848679116820537, "learning_rate": 1.128205805823737e-05, "loss": 0.0279, "step": 2979 }, { "epoch": 0.65, "grad_norm": 0.3149477107873396, "learning_rate": 1.1269254910216316e-05, "loss": 0.0406, "step": 2980 }, { "epoch": 0.65, "grad_norm": 0.3981554491624712, "learning_rate": 1.1256456180878867e-05, "loss": 0.0541, "step": 2981 }, { "epoch": 0.65, "grad_norm": 0.3531659752357583, "learning_rate": 1.1243661876702552e-05, "loss": 0.0536, "step": 2982 }, { "epoch": 0.66, "grad_norm": 0.3250805534889989, "learning_rate": 1.1230872004162631e-05, "loss": 0.0537, "step": 2983 }, { "epoch": 0.66, "grad_norm": 0.2604980611756454, "learning_rate": 1.1218086569732152e-05, "loss": 0.0323, "step": 2984 }, { "epoch": 0.66, "grad_norm": 0.26375732450122863, "learning_rate": 1.1205305579881883e-05, "loss": 0.0361, "step": 2985 }, { "epoch": 0.66, "grad_norm": 0.2597553996891595, "learning_rate": 1.1192529041080382e-05, "loss": 0.0243, "step": 2986 }, { "epoch": 0.66, "grad_norm": 0.30060744527767147, "learning_rate": 1.1179756959793918e-05, "loss": 0.0306, "step": 2987 }, { "epoch": 0.66, "grad_norm": 0.35249120117934213, "learning_rate": 1.1166989342486524e-05, "loss": 0.0486, "step": 2988 }, { "epoch": 0.66, "grad_norm": 0.26966911052098275, "learning_rate": 1.1154226195619979e-05, "loss": 0.0249, "step": 2989 }, { "epoch": 0.66, "grad_norm": 0.3111691508172709, "learning_rate": 1.1141467525653773e-05, "loss": 0.0332, "step": 2990 }, { "epoch": 0.66, "grad_norm": 0.2694851241517991, "learning_rate": 1.1128713339045162e-05, "loss": 0.0269, "step": 2991 }, { "epoch": 0.66, "grad_norm": 0.4213832001964649, "learning_rate": 1.1115963642249107e-05, "loss": 0.0444, "step": 2992 }, { "epoch": 0.66, "grad_norm": 0.2772025299513124, "learning_rate": 1.110321844171832e-05, "loss": 0.0401, "step": 2993 }, { "epoch": 0.66, "grad_norm": 0.3647805520649611, "learning_rate": 1.1090477743903212e-05, "loss": 0.0505, "step": 2994 }, { "epoch": 0.66, "grad_norm": 0.26937476051921777, "learning_rate": 1.1077741555251938e-05, "loss": 0.0311, "step": 2995 }, { "epoch": 0.66, "grad_norm": 0.2771986729672004, "learning_rate": 1.1065009882210352e-05, "loss": 0.0404, "step": 2996 }, { "epoch": 0.66, "grad_norm": 0.2800261839050899, "learning_rate": 1.1052282731222035e-05, "loss": 0.0341, "step": 2997 }, { "epoch": 0.66, "grad_norm": 0.24857849312844407, "learning_rate": 1.1039560108728277e-05, "loss": 0.0315, "step": 2998 }, { "epoch": 0.66, "grad_norm": 0.3018654689903294, "learning_rate": 1.1026842021168088e-05, "loss": 0.032, "step": 2999 }, { "epoch": 0.66, "grad_norm": 0.3464088564074809, "learning_rate": 1.101412847497815e-05, "loss": 0.0424, "step": 3000 }, { "epoch": 0.66, "grad_norm": 0.3691593750727993, "learning_rate": 1.100141947659288e-05, "loss": 0.0518, "step": 3001 }, { "epoch": 0.66, "grad_norm": 0.3224610978512565, "learning_rate": 1.0988715032444369e-05, "loss": 0.0417, "step": 3002 }, { "epoch": 0.66, "grad_norm": 0.2211300173153049, "learning_rate": 1.0976015148962427e-05, "loss": 0.0245, "step": 3003 }, { "epoch": 0.66, "grad_norm": 0.3657974757425489, "learning_rate": 1.0963319832574528e-05, "loss": 0.0489, "step": 3004 }, { "epoch": 0.66, "grad_norm": 0.2633445827705902, "learning_rate": 1.0950629089705857e-05, "loss": 0.028, "step": 3005 }, { "epoch": 0.66, "grad_norm": 0.24596810539431577, "learning_rate": 1.0937942926779279e-05, "loss": 0.0301, "step": 3006 }, { "epoch": 0.66, "grad_norm": 0.266475549896758, "learning_rate": 1.0925261350215344e-05, "loss": 0.0342, "step": 3007 }, { "epoch": 0.66, "grad_norm": 0.31072261324263634, "learning_rate": 1.091258436643226e-05, "loss": 0.0427, "step": 3008 }, { "epoch": 0.66, "grad_norm": 0.2857607873897929, "learning_rate": 1.0899911981845946e-05, "loss": 0.038, "step": 3009 }, { "epoch": 0.66, "grad_norm": 0.24947990020506072, "learning_rate": 1.0887244202869951e-05, "loss": 0.0364, "step": 3010 }, { "epoch": 0.66, "grad_norm": 0.27201095119687674, "learning_rate": 1.0874581035915534e-05, "loss": 0.0297, "step": 3011 }, { "epoch": 0.66, "grad_norm": 0.2753170131482302, "learning_rate": 1.0861922487391588e-05, "loss": 0.0312, "step": 3012 }, { "epoch": 0.66, "grad_norm": 0.29611178219925743, "learning_rate": 1.0849268563704696e-05, "loss": 0.034, "step": 3013 }, { "epoch": 0.66, "grad_norm": 0.3435229722174912, "learning_rate": 1.0836619271259072e-05, "loss": 0.0441, "step": 3014 }, { "epoch": 0.66, "grad_norm": 0.2658495654982007, "learning_rate": 1.0823974616456607e-05, "loss": 0.0338, "step": 3015 }, { "epoch": 0.66, "grad_norm": 0.29094652981374264, "learning_rate": 1.0811334605696837e-05, "loss": 0.0371, "step": 3016 }, { "epoch": 0.66, "grad_norm": 0.252277608889838, "learning_rate": 1.0798699245376959e-05, "loss": 0.0273, "step": 3017 }, { "epoch": 0.66, "grad_norm": 0.2975109341359133, "learning_rate": 1.078606854189179e-05, "loss": 0.0383, "step": 3018 }, { "epoch": 0.66, "grad_norm": 0.36072595267535207, "learning_rate": 1.0773442501633822e-05, "loss": 0.0433, "step": 3019 }, { "epoch": 0.66, "grad_norm": 0.3628036271553774, "learning_rate": 1.0760821130993157e-05, "loss": 0.0625, "step": 3020 }, { "epoch": 0.66, "grad_norm": 0.3527759379658487, "learning_rate": 1.0748204436357562e-05, "loss": 0.0415, "step": 3021 }, { "epoch": 0.66, "grad_norm": 0.37273677699622976, "learning_rate": 1.0735592424112404e-05, "loss": 0.0464, "step": 3022 }, { "epoch": 0.66, "grad_norm": 0.3031672797261527, "learning_rate": 1.0722985100640717e-05, "loss": 0.0437, "step": 3023 }, { "epoch": 0.66, "grad_norm": 0.4999697348256345, "learning_rate": 1.0710382472323145e-05, "loss": 0.099, "step": 3024 }, { "epoch": 0.66, "grad_norm": 0.29739183060223257, "learning_rate": 1.0697784545537943e-05, "loss": 0.0391, "step": 3025 }, { "epoch": 0.66, "grad_norm": 0.3408796045707868, "learning_rate": 1.0685191326661015e-05, "loss": 0.0495, "step": 3026 }, { "epoch": 0.66, "grad_norm": 0.3168906283615832, "learning_rate": 1.0672602822065845e-05, "loss": 0.0412, "step": 3027 }, { "epoch": 0.67, "grad_norm": 0.22778330125223972, "learning_rate": 1.0660019038123577e-05, "loss": 0.0265, "step": 3028 }, { "epoch": 0.67, "grad_norm": 0.31655710650685553, "learning_rate": 1.0647439981202918e-05, "loss": 0.0323, "step": 3029 }, { "epoch": 0.67, "grad_norm": 0.2903265250999385, "learning_rate": 1.0634865657670227e-05, "loss": 0.0455, "step": 3030 }, { "epoch": 0.67, "grad_norm": 0.2576086654583902, "learning_rate": 1.0622296073889417e-05, "loss": 0.0364, "step": 3031 }, { "epoch": 0.67, "grad_norm": 0.2611284573747479, "learning_rate": 1.0609731236222069e-05, "loss": 0.0346, "step": 3032 }, { "epoch": 0.67, "grad_norm": 0.2360144101401728, "learning_rate": 1.0597171151027297e-05, "loss": 0.0349, "step": 3033 }, { "epoch": 0.67, "grad_norm": 0.2767069046268152, "learning_rate": 1.058461582466185e-05, "loss": 0.036, "step": 3034 }, { "epoch": 0.67, "grad_norm": 0.29520680387895365, "learning_rate": 1.0572065263480046e-05, "loss": 0.0328, "step": 3035 }, { "epoch": 0.67, "grad_norm": 0.2730848217792552, "learning_rate": 1.0559519473833815e-05, "loss": 0.0235, "step": 3036 }, { "epoch": 0.67, "grad_norm": 0.3385652741735248, "learning_rate": 1.0546978462072642e-05, "loss": 0.0386, "step": 3037 }, { "epoch": 0.67, "grad_norm": 0.2996355629707367, "learning_rate": 1.0534442234543623e-05, "loss": 0.0436, "step": 3038 }, { "epoch": 0.67, "grad_norm": 0.3374086702363696, "learning_rate": 1.0521910797591408e-05, "loss": 0.0393, "step": 3039 }, { "epoch": 0.67, "grad_norm": 0.34725073405930423, "learning_rate": 1.0509384157558236e-05, "loss": 0.0497, "step": 3040 }, { "epoch": 0.67, "grad_norm": 0.3113323809669054, "learning_rate": 1.0496862320783926e-05, "loss": 0.043, "step": 3041 }, { "epoch": 0.67, "grad_norm": 0.32109315459110344, "learning_rate": 1.0484345293605853e-05, "loss": 0.0342, "step": 3042 }, { "epoch": 0.67, "grad_norm": 0.2767139456583379, "learning_rate": 1.0471833082358954e-05, "loss": 0.0296, "step": 3043 }, { "epoch": 0.67, "grad_norm": 0.3111162424314804, "learning_rate": 1.0459325693375746e-05, "loss": 0.042, "step": 3044 }, { "epoch": 0.67, "grad_norm": 0.3189332388597514, "learning_rate": 1.0446823132986283e-05, "loss": 0.0361, "step": 3045 }, { "epoch": 0.67, "grad_norm": 0.31497175676644285, "learning_rate": 1.0434325407518204e-05, "loss": 0.0347, "step": 3046 }, { "epoch": 0.67, "grad_norm": 0.2779880749191047, "learning_rate": 1.0421832523296665e-05, "loss": 0.0261, "step": 3047 }, { "epoch": 0.67, "grad_norm": 0.31046564011343325, "learning_rate": 1.04093444866444e-05, "loss": 0.0403, "step": 3048 }, { "epoch": 0.67, "grad_norm": 0.2831393860747274, "learning_rate": 1.0396861303881691e-05, "loss": 0.0286, "step": 3049 }, { "epoch": 0.67, "grad_norm": 0.2727575294371441, "learning_rate": 1.0384382981326336e-05, "loss": 0.0359, "step": 3050 }, { "epoch": 0.67, "grad_norm": 0.3663008312520523, "learning_rate": 1.0371909525293709e-05, "loss": 0.0523, "step": 3051 }, { "epoch": 0.67, "grad_norm": 0.30116656010510984, "learning_rate": 1.0359440942096682e-05, "loss": 0.0309, "step": 3052 }, { "epoch": 0.67, "grad_norm": 0.3353678619920413, "learning_rate": 1.0346977238045699e-05, "loss": 0.0397, "step": 3053 }, { "epoch": 0.67, "grad_norm": 0.3399476298151822, "learning_rate": 1.0334518419448703e-05, "loss": 0.0396, "step": 3054 }, { "epoch": 0.67, "grad_norm": 0.23916380456187727, "learning_rate": 1.0322064492611195e-05, "loss": 0.0299, "step": 3055 }, { "epoch": 0.67, "grad_norm": 0.25943841618555563, "learning_rate": 1.0309615463836162e-05, "loss": 0.0241, "step": 3056 }, { "epoch": 0.67, "grad_norm": 0.3557983798400247, "learning_rate": 1.0297171339424148e-05, "loss": 0.049, "step": 3057 }, { "epoch": 0.67, "grad_norm": 0.324009885498815, "learning_rate": 1.0284732125673198e-05, "loss": 0.0409, "step": 3058 }, { "epoch": 0.67, "grad_norm": 0.20160362060773568, "learning_rate": 1.0272297828878881e-05, "loss": 0.0209, "step": 3059 }, { "epoch": 0.67, "grad_norm": 0.26653484585635645, "learning_rate": 1.0259868455334259e-05, "loss": 0.0284, "step": 3060 }, { "epoch": 0.67, "grad_norm": 0.2883275328288837, "learning_rate": 1.0247444011329928e-05, "loss": 0.0343, "step": 3061 }, { "epoch": 0.67, "grad_norm": 0.30784072020467396, "learning_rate": 1.0235024503153956e-05, "loss": 0.0337, "step": 3062 }, { "epoch": 0.67, "grad_norm": 0.39213557944528954, "learning_rate": 1.0222609937091952e-05, "loss": 0.0571, "step": 3063 }, { "epoch": 0.67, "grad_norm": 0.35248830540852255, "learning_rate": 1.0210200319426988e-05, "loss": 0.0289, "step": 3064 }, { "epoch": 0.67, "grad_norm": 0.24872786988966386, "learning_rate": 1.0197795656439662e-05, "loss": 0.0281, "step": 3065 }, { "epoch": 0.67, "grad_norm": 0.3033114439975192, "learning_rate": 1.0185395954408031e-05, "loss": 0.0372, "step": 3066 }, { "epoch": 0.67, "grad_norm": 0.28971036273483797, "learning_rate": 1.0173001219607683e-05, "loss": 0.0481, "step": 3067 }, { "epoch": 0.67, "grad_norm": 0.3235645357995982, "learning_rate": 1.0160611458311651e-05, "loss": 0.0369, "step": 3068 }, { "epoch": 0.67, "grad_norm": 0.405914241250726, "learning_rate": 1.0148226676790482e-05, "loss": 0.047, "step": 3069 }, { "epoch": 0.67, "grad_norm": 0.33128543565256136, "learning_rate": 1.013584688131218e-05, "loss": 0.0559, "step": 3070 }, { "epoch": 0.67, "grad_norm": 0.2835106988154603, "learning_rate": 1.0123472078142248e-05, "loss": 0.0301, "step": 3071 }, { "epoch": 0.67, "grad_norm": 0.3415746784045918, "learning_rate": 1.011110227354363e-05, "loss": 0.047, "step": 3072 }, { "epoch": 0.67, "grad_norm": 0.2912476657806638, "learning_rate": 1.0098737473776781e-05, "loss": 0.0398, "step": 3073 }, { "epoch": 0.68, "grad_norm": 0.24532364133573092, "learning_rate": 1.0086377685099578e-05, "loss": 0.0244, "step": 3074 }, { "epoch": 0.68, "grad_norm": 0.32993961565654917, "learning_rate": 1.0074022913767411e-05, "loss": 0.0449, "step": 3075 }, { "epoch": 0.68, "grad_norm": 0.3160682568379425, "learning_rate": 1.006167316603309e-05, "loss": 0.042, "step": 3076 }, { "epoch": 0.68, "grad_norm": 0.36620874022778316, "learning_rate": 1.0049328448146908e-05, "loss": 0.0427, "step": 3077 }, { "epoch": 0.68, "grad_norm": 0.2819488901785845, "learning_rate": 1.0036988766356592e-05, "loss": 0.0289, "step": 3078 }, { "epoch": 0.68, "grad_norm": 0.3035661039266114, "learning_rate": 1.0024654126907343e-05, "loss": 0.0415, "step": 3079 }, { "epoch": 0.68, "grad_norm": 0.2880209639389726, "learning_rate": 1.0012324536041781e-05, "loss": 0.0335, "step": 3080 }, { "epoch": 0.68, "grad_norm": 0.2384137582520422, "learning_rate": 1.0000000000000006e-05, "loss": 0.0306, "step": 3081 }, { "epoch": 0.68, "grad_norm": 0.3243717499020729, "learning_rate": 9.987680525019521e-06, "loss": 0.0373, "step": 3082 }, { "epoch": 0.68, "grad_norm": 0.37508876067967445, "learning_rate": 9.975366117335301e-06, "loss": 0.0556, "step": 3083 }, { "epoch": 0.68, "grad_norm": 0.25705657300879875, "learning_rate": 9.96305678317975e-06, "loss": 0.0302, "step": 3084 }, { "epoch": 0.68, "grad_norm": 0.2486956514718051, "learning_rate": 9.950752528782679e-06, "loss": 0.0277, "step": 3085 }, { "epoch": 0.68, "grad_norm": 0.31257510474818884, "learning_rate": 9.938453360371363e-06, "loss": 0.0405, "step": 3086 }, { "epoch": 0.68, "grad_norm": 0.2958070972835022, "learning_rate": 9.926159284170471e-06, "loss": 0.0378, "step": 3087 }, { "epoch": 0.68, "grad_norm": 0.2978037311107582, "learning_rate": 9.913870306402129e-06, "loss": 0.0359, "step": 3088 }, { "epoch": 0.68, "grad_norm": 0.24685634047575178, "learning_rate": 9.901586433285845e-06, "loss": 0.0246, "step": 3089 }, { "epoch": 0.68, "grad_norm": 0.29914513770600243, "learning_rate": 9.889307671038579e-06, "loss": 0.0297, "step": 3090 }, { "epoch": 0.68, "grad_norm": 0.25373522060085896, "learning_rate": 9.877034025874675e-06, "loss": 0.0279, "step": 3091 }, { "epoch": 0.68, "grad_norm": 0.28816754753091417, "learning_rate": 9.864765504005901e-06, "loss": 0.0401, "step": 3092 }, { "epoch": 0.68, "grad_norm": 0.2882007434774494, "learning_rate": 9.852502111641438e-06, "loss": 0.0426, "step": 3093 }, { "epoch": 0.68, "grad_norm": 0.25232177201688216, "learning_rate": 9.840243854987868e-06, "loss": 0.0345, "step": 3094 }, { "epoch": 0.68, "grad_norm": 0.24840152348033523, "learning_rate": 9.827990740249156e-06, "loss": 0.03, "step": 3095 }, { "epoch": 0.68, "grad_norm": 0.2506674410256598, "learning_rate": 9.815742773626693e-06, "loss": 0.0342, "step": 3096 }, { "epoch": 0.68, "grad_norm": 0.23513415096530796, "learning_rate": 9.803499961319234e-06, "loss": 0.0199, "step": 3097 }, { "epoch": 0.68, "grad_norm": 0.3594340193010657, "learning_rate": 9.791262309522959e-06, "loss": 0.0534, "step": 3098 }, { "epoch": 0.68, "grad_norm": 0.26262614250660854, "learning_rate": 9.779029824431403e-06, "loss": 0.0328, "step": 3099 }, { "epoch": 0.68, "grad_norm": 0.3819389106850532, "learning_rate": 9.766802512235507e-06, "loss": 0.0734, "step": 3100 }, { "epoch": 0.68, "grad_norm": 0.3023854983385063, "learning_rate": 9.75458037912359e-06, "loss": 0.0339, "step": 3101 }, { "epoch": 0.68, "grad_norm": 0.3121884819870665, "learning_rate": 9.742363431281356e-06, "loss": 0.0437, "step": 3102 }, { "epoch": 0.68, "grad_norm": 0.25550331208570076, "learning_rate": 9.73015167489186e-06, "loss": 0.0253, "step": 3103 }, { "epoch": 0.68, "grad_norm": 0.3430055880911585, "learning_rate": 9.717945116135568e-06, "loss": 0.0373, "step": 3104 }, { "epoch": 0.68, "grad_norm": 0.31624401313030304, "learning_rate": 9.705743761190273e-06, "loss": 0.0406, "step": 3105 }, { "epoch": 0.68, "grad_norm": 0.2834614200655747, "learning_rate": 9.693547616231173e-06, "loss": 0.0361, "step": 3106 }, { "epoch": 0.68, "grad_norm": 0.3513387179755282, "learning_rate": 9.681356687430798e-06, "loss": 0.0434, "step": 3107 }, { "epoch": 0.68, "grad_norm": 0.28196666062468867, "learning_rate": 9.669170980959063e-06, "loss": 0.0335, "step": 3108 }, { "epoch": 0.68, "grad_norm": 0.33908513780252647, "learning_rate": 9.656990502983216e-06, "loss": 0.0317, "step": 3109 }, { "epoch": 0.68, "grad_norm": 0.313221265991117, "learning_rate": 9.644815259667881e-06, "loss": 0.035, "step": 3110 }, { "epoch": 0.68, "grad_norm": 0.26410877623606116, "learning_rate": 9.632645257175027e-06, "loss": 0.0354, "step": 3111 }, { "epoch": 0.68, "grad_norm": 0.28774523227496873, "learning_rate": 9.620480501663954e-06, "loss": 0.0274, "step": 3112 }, { "epoch": 0.68, "grad_norm": 0.3178335188120151, "learning_rate": 9.608320999291333e-06, "loss": 0.0353, "step": 3113 }, { "epoch": 0.68, "grad_norm": 0.3229114765344905, "learning_rate": 9.59616675621115e-06, "loss": 0.0352, "step": 3114 }, { "epoch": 0.68, "grad_norm": 0.27446791096771567, "learning_rate": 9.58401777857475e-06, "loss": 0.026, "step": 3115 }, { "epoch": 0.68, "grad_norm": 0.28892722701999873, "learning_rate": 9.571874072530809e-06, "loss": 0.0355, "step": 3116 }, { "epoch": 0.68, "grad_norm": 0.31541426299089276, "learning_rate": 9.559735644225316e-06, "loss": 0.0263, "step": 3117 }, { "epoch": 0.68, "grad_norm": 0.25088864578936493, "learning_rate": 9.547602499801616e-06, "loss": 0.026, "step": 3118 }, { "epoch": 0.69, "grad_norm": 0.30759405624372854, "learning_rate": 9.53547464540037e-06, "loss": 0.0459, "step": 3119 }, { "epoch": 0.69, "grad_norm": 0.2569584862348946, "learning_rate": 9.523352087159548e-06, "loss": 0.0328, "step": 3120 }, { "epoch": 0.69, "grad_norm": 0.2631748601423334, "learning_rate": 9.511234831214464e-06, "loss": 0.0301, "step": 3121 }, { "epoch": 0.69, "grad_norm": 0.22610547568516606, "learning_rate": 9.499122883697724e-06, "loss": 0.031, "step": 3122 }, { "epoch": 0.69, "grad_norm": 0.30749758862891285, "learning_rate": 9.487016250739269e-06, "loss": 0.0391, "step": 3123 }, { "epoch": 0.69, "grad_norm": 0.2865122859893621, "learning_rate": 9.474914938466328e-06, "loss": 0.0266, "step": 3124 }, { "epoch": 0.69, "grad_norm": 0.3161344066840338, "learning_rate": 9.462818953003465e-06, "loss": 0.0372, "step": 3125 }, { "epoch": 0.69, "grad_norm": 0.3230785751011246, "learning_rate": 9.45072830047251e-06, "loss": 0.0471, "step": 3126 }, { "epoch": 0.69, "grad_norm": 0.3371732737427641, "learning_rate": 9.438642986992641e-06, "loss": 0.0389, "step": 3127 }, { "epoch": 0.69, "grad_norm": 0.25125590412357773, "learning_rate": 9.426563018680293e-06, "loss": 0.0219, "step": 3128 }, { "epoch": 0.69, "grad_norm": 0.3338988345174679, "learning_rate": 9.414488401649227e-06, "loss": 0.0361, "step": 3129 }, { "epoch": 0.69, "grad_norm": 0.3217851467083712, "learning_rate": 9.40241914201046e-06, "loss": 0.0336, "step": 3130 }, { "epoch": 0.69, "grad_norm": 0.35707314994576855, "learning_rate": 9.390355245872337e-06, "loss": 0.0482, "step": 3131 }, { "epoch": 0.69, "grad_norm": 0.2898732748699959, "learning_rate": 9.378296719340459e-06, "loss": 0.0295, "step": 3132 }, { "epoch": 0.69, "grad_norm": 0.2828287214822192, "learning_rate": 9.366243568517726e-06, "loss": 0.0373, "step": 3133 }, { "epoch": 0.69, "grad_norm": 0.2516824465950768, "learning_rate": 9.354195799504305e-06, "loss": 0.0264, "step": 3134 }, { "epoch": 0.69, "grad_norm": 0.2505504389748185, "learning_rate": 9.342153418397647e-06, "loss": 0.0372, "step": 3135 }, { "epoch": 0.69, "grad_norm": 0.29056852673906025, "learning_rate": 9.330116431292478e-06, "loss": 0.0407, "step": 3136 }, { "epoch": 0.69, "grad_norm": 0.26675860352481984, "learning_rate": 9.318084844280798e-06, "loss": 0.0393, "step": 3137 }, { "epoch": 0.69, "grad_norm": 0.26130883298179075, "learning_rate": 9.306058663451852e-06, "loss": 0.0258, "step": 3138 }, { "epoch": 0.69, "grad_norm": 0.2881059025457402, "learning_rate": 9.294037894892178e-06, "loss": 0.0365, "step": 3139 }, { "epoch": 0.69, "grad_norm": 0.3193162684551438, "learning_rate": 9.28202254468555e-06, "loss": 0.0387, "step": 3140 }, { "epoch": 0.69, "grad_norm": 0.31524669429337604, "learning_rate": 9.270012618913018e-06, "loss": 0.0431, "step": 3141 }, { "epoch": 0.69, "grad_norm": 0.29463283952180785, "learning_rate": 9.258008123652868e-06, "loss": 0.0413, "step": 3142 }, { "epoch": 0.69, "grad_norm": 0.3625294895344755, "learning_rate": 9.246009064980657e-06, "loss": 0.0479, "step": 3143 }, { "epoch": 0.69, "grad_norm": 0.29733082010017053, "learning_rate": 9.23401544896919e-06, "loss": 0.0338, "step": 3144 }, { "epoch": 0.69, "grad_norm": 0.2953035650970452, "learning_rate": 9.22202728168849e-06, "loss": 0.0384, "step": 3145 }, { "epoch": 0.69, "grad_norm": 0.26778874148341164, "learning_rate": 9.210044569205863e-06, "loss": 0.0262, "step": 3146 }, { "epoch": 0.69, "grad_norm": 0.2789062088611049, "learning_rate": 9.198067317585816e-06, "loss": 0.0368, "step": 3147 }, { "epoch": 0.69, "grad_norm": 0.27855798043463303, "learning_rate": 9.186095532890121e-06, "loss": 0.0423, "step": 3148 }, { "epoch": 0.69, "grad_norm": 0.24564879928196806, "learning_rate": 9.174129221177762e-06, "loss": 0.034, "step": 3149 }, { "epoch": 0.69, "grad_norm": 0.2798131159847416, "learning_rate": 9.162168388504972e-06, "loss": 0.0291, "step": 3150 }, { "epoch": 0.69, "grad_norm": 0.29632821676191, "learning_rate": 9.150213040925193e-06, "loss": 0.0328, "step": 3151 }, { "epoch": 0.69, "grad_norm": 0.42087156559602495, "learning_rate": 9.138263184489104e-06, "loss": 0.0482, "step": 3152 }, { "epoch": 0.69, "grad_norm": 0.30010754823131197, "learning_rate": 9.1263188252446e-06, "loss": 0.0341, "step": 3153 }, { "epoch": 0.69, "grad_norm": 0.3008349234511671, "learning_rate": 9.114379969236802e-06, "loss": 0.0331, "step": 3154 }, { "epoch": 0.69, "grad_norm": 0.2628119822938154, "learning_rate": 9.102446622508025e-06, "loss": 0.0341, "step": 3155 }, { "epoch": 0.69, "grad_norm": 0.2439599627576946, "learning_rate": 9.090518791097822e-06, "loss": 0.0304, "step": 3156 }, { "epoch": 0.69, "grad_norm": 0.3738203169466667, "learning_rate": 9.078596481042927e-06, "loss": 0.0526, "step": 3157 }, { "epoch": 0.69, "grad_norm": 0.28116721683341717, "learning_rate": 9.066679698377311e-06, "loss": 0.0219, "step": 3158 }, { "epoch": 0.69, "grad_norm": 0.26721846640871477, "learning_rate": 9.054768449132115e-06, "loss": 0.0354, "step": 3159 }, { "epoch": 0.69, "grad_norm": 0.30702480620011646, "learning_rate": 9.042862739335707e-06, "loss": 0.0344, "step": 3160 }, { "epoch": 0.69, "grad_norm": 0.26577193740057614, "learning_rate": 9.030962575013622e-06, "loss": 0.0343, "step": 3161 }, { "epoch": 0.69, "grad_norm": 0.27656957541860233, "learning_rate": 9.019067962188634e-06, "loss": 0.0274, "step": 3162 }, { "epoch": 0.69, "grad_norm": 0.24374774601701155, "learning_rate": 9.007178906880655e-06, "loss": 0.0234, "step": 3163 }, { "epoch": 0.69, "grad_norm": 0.2787367800070968, "learning_rate": 8.995295415106829e-06, "loss": 0.0299, "step": 3164 }, { "epoch": 0.7, "grad_norm": 0.2559452349619959, "learning_rate": 8.983417492881443e-06, "loss": 0.0324, "step": 3165 }, { "epoch": 0.7, "grad_norm": 0.2008699540297915, "learning_rate": 8.971545146216005e-06, "loss": 0.0235, "step": 3166 }, { "epoch": 0.7, "grad_norm": 0.23118591258238033, "learning_rate": 8.959678381119166e-06, "loss": 0.0305, "step": 3167 }, { "epoch": 0.7, "grad_norm": 0.32931410946889816, "learning_rate": 8.947817203596785e-06, "loss": 0.0272, "step": 3168 }, { "epoch": 0.7, "grad_norm": 0.27844357354404126, "learning_rate": 8.935961619651859e-06, "loss": 0.0347, "step": 3169 }, { "epoch": 0.7, "grad_norm": 0.3423909501339809, "learning_rate": 8.924111635284582e-06, "loss": 0.0372, "step": 3170 }, { "epoch": 0.7, "grad_norm": 0.36290249512540756, "learning_rate": 8.91226725649231e-06, "loss": 0.0532, "step": 3171 }, { "epoch": 0.7, "grad_norm": 0.2738942418836486, "learning_rate": 8.900428489269541e-06, "loss": 0.0275, "step": 3172 }, { "epoch": 0.7, "grad_norm": 0.3044333253627893, "learning_rate": 8.888595339607961e-06, "loss": 0.0354, "step": 3173 }, { "epoch": 0.7, "grad_norm": 0.22113366457544695, "learning_rate": 8.876767813496388e-06, "loss": 0.0244, "step": 3174 }, { "epoch": 0.7, "grad_norm": 0.33908047958026905, "learning_rate": 8.86494591692081e-06, "loss": 0.0385, "step": 3175 }, { "epoch": 0.7, "grad_norm": 0.2877423063537248, "learning_rate": 8.85312965586437e-06, "loss": 0.0375, "step": 3176 }, { "epoch": 0.7, "grad_norm": 0.29744477556872667, "learning_rate": 8.841319036307334e-06, "loss": 0.0412, "step": 3177 }, { "epoch": 0.7, "grad_norm": 0.39751737601611026, "learning_rate": 8.829514064227138e-06, "loss": 0.0539, "step": 3178 }, { "epoch": 0.7, "grad_norm": 0.28259364348374433, "learning_rate": 8.817714745598358e-06, "loss": 0.0314, "step": 3179 }, { "epoch": 0.7, "grad_norm": 0.25615170210053695, "learning_rate": 8.805921086392686e-06, "loss": 0.0305, "step": 3180 }, { "epoch": 0.7, "grad_norm": 0.27951022982259094, "learning_rate": 8.79413309257898e-06, "loss": 0.0299, "step": 3181 }, { "epoch": 0.7, "grad_norm": 0.27529645892553, "learning_rate": 8.782350770123202e-06, "loss": 0.033, "step": 3182 }, { "epoch": 0.7, "grad_norm": 0.27557445369722716, "learning_rate": 8.770574124988474e-06, "loss": 0.0348, "step": 3183 }, { "epoch": 0.7, "grad_norm": 0.2881043250471307, "learning_rate": 8.758803163135008e-06, "loss": 0.0392, "step": 3184 }, { "epoch": 0.7, "grad_norm": 0.25932623335834537, "learning_rate": 8.74703789052018e-06, "loss": 0.0322, "step": 3185 }, { "epoch": 0.7, "grad_norm": 0.28605773253215566, "learning_rate": 8.73527831309844e-06, "loss": 0.0305, "step": 3186 }, { "epoch": 0.7, "grad_norm": 0.3632188353707027, "learning_rate": 8.723524436821418e-06, "loss": 0.0359, "step": 3187 }, { "epoch": 0.7, "grad_norm": 0.30155488632157246, "learning_rate": 8.711776267637794e-06, "loss": 0.0304, "step": 3188 }, { "epoch": 0.7, "grad_norm": 0.29998057173364495, "learning_rate": 8.700033811493407e-06, "loss": 0.0343, "step": 3189 }, { "epoch": 0.7, "grad_norm": 0.3343495163164025, "learning_rate": 8.688297074331171e-06, "loss": 0.0513, "step": 3190 }, { "epoch": 0.7, "grad_norm": 0.2502815627983121, "learning_rate": 8.676566062091135e-06, "loss": 0.0284, "step": 3191 }, { "epoch": 0.7, "grad_norm": 0.27547491420485215, "learning_rate": 8.66484078071042e-06, "loss": 0.0343, "step": 3192 }, { "epoch": 0.7, "grad_norm": 0.3151790461619625, "learning_rate": 8.653121236123278e-06, "loss": 0.0319, "step": 3193 }, { "epoch": 0.7, "grad_norm": 0.2732800984344735, "learning_rate": 8.641407434261031e-06, "loss": 0.0316, "step": 3194 }, { "epoch": 0.7, "grad_norm": 0.3315842255890883, "learning_rate": 8.62969938105211e-06, "loss": 0.0378, "step": 3195 }, { "epoch": 0.7, "grad_norm": 0.26288651665797536, "learning_rate": 8.617997082422031e-06, "loss": 0.0311, "step": 3196 }, { "epoch": 0.7, "grad_norm": 0.2804867268488633, "learning_rate": 8.606300544293412e-06, "loss": 0.0364, "step": 3197 }, { "epoch": 0.7, "grad_norm": 0.34573039793205407, "learning_rate": 8.594609772585922e-06, "loss": 0.0449, "step": 3198 }, { "epoch": 0.7, "grad_norm": 0.24380722932954038, "learning_rate": 8.582924773216353e-06, "loss": 0.0244, "step": 3199 }, { "epoch": 0.7, "grad_norm": 0.27146298166414884, "learning_rate": 8.571245552098533e-06, "loss": 0.0351, "step": 3200 }, { "epoch": 0.7, "grad_norm": 0.291641356300972, "learning_rate": 8.559572115143406e-06, "loss": 0.0251, "step": 3201 }, { "epoch": 0.7, "grad_norm": 0.2927027066573506, "learning_rate": 8.547904468258957e-06, "loss": 0.0287, "step": 3202 }, { "epoch": 0.7, "grad_norm": 0.26510552491525224, "learning_rate": 8.536242617350265e-06, "loss": 0.0352, "step": 3203 }, { "epoch": 0.7, "grad_norm": 0.286560637094647, "learning_rate": 8.524586568319451e-06, "loss": 0.041, "step": 3204 }, { "epoch": 0.7, "grad_norm": 0.2858820001489877, "learning_rate": 8.51293632706572e-06, "loss": 0.0278, "step": 3205 }, { "epoch": 0.7, "grad_norm": 0.38231843088964895, "learning_rate": 8.501291899485337e-06, "loss": 0.0433, "step": 3206 }, { "epoch": 0.7, "grad_norm": 0.4060582662170565, "learning_rate": 8.489653291471607e-06, "loss": 0.0549, "step": 3207 }, { "epoch": 0.7, "grad_norm": 0.28511768237017565, "learning_rate": 8.47802050891491e-06, "loss": 0.035, "step": 3208 }, { "epoch": 0.7, "grad_norm": 0.2831822880939062, "learning_rate": 8.466393557702659e-06, "loss": 0.0385, "step": 3209 }, { "epoch": 0.71, "grad_norm": 0.3482690938759456, "learning_rate": 8.454772443719339e-06, "loss": 0.0435, "step": 3210 }, { "epoch": 0.71, "grad_norm": 0.2907088281881407, "learning_rate": 8.443157172846448e-06, "loss": 0.0307, "step": 3211 }, { "epoch": 0.71, "grad_norm": 0.26492070343199503, "learning_rate": 8.43154775096256e-06, "loss": 0.0347, "step": 3212 }, { "epoch": 0.71, "grad_norm": 0.3079662703787326, "learning_rate": 8.419944183943266e-06, "loss": 0.0357, "step": 3213 }, { "epoch": 0.71, "grad_norm": 0.2970429372118634, "learning_rate": 8.408346477661218e-06, "loss": 0.029, "step": 3214 }, { "epoch": 0.71, "grad_norm": 0.295109165165925, "learning_rate": 8.39675463798607e-06, "loss": 0.0315, "step": 3215 }, { "epoch": 0.71, "grad_norm": 0.26527102109089074, "learning_rate": 8.385168670784532e-06, "loss": 0.0337, "step": 3216 }, { "epoch": 0.71, "grad_norm": 0.3220551699472881, "learning_rate": 8.373588581920325e-06, "loss": 0.0315, "step": 3217 }, { "epoch": 0.71, "grad_norm": 0.2518849539387254, "learning_rate": 8.362014377254213e-06, "loss": 0.0275, "step": 3218 }, { "epoch": 0.71, "grad_norm": 0.2867164128387573, "learning_rate": 8.35044606264396e-06, "loss": 0.035, "step": 3219 }, { "epoch": 0.71, "grad_norm": 0.3426814485669959, "learning_rate": 8.338883643944375e-06, "loss": 0.0445, "step": 3220 }, { "epoch": 0.71, "grad_norm": 0.22337205335160346, "learning_rate": 8.327327127007247e-06, "loss": 0.0261, "step": 3221 }, { "epoch": 0.71, "grad_norm": 0.31098813071247333, "learning_rate": 8.315776517681428e-06, "loss": 0.0372, "step": 3222 }, { "epoch": 0.71, "grad_norm": 0.27028504987487467, "learning_rate": 8.304231821812733e-06, "loss": 0.0302, "step": 3223 }, { "epoch": 0.71, "grad_norm": 0.3178525060794622, "learning_rate": 8.292693045244016e-06, "loss": 0.0364, "step": 3224 }, { "epoch": 0.71, "grad_norm": 0.3628950219355067, "learning_rate": 8.281160193815108e-06, "loss": 0.045, "step": 3225 }, { "epoch": 0.71, "grad_norm": 0.44063406048921605, "learning_rate": 8.269633273362872e-06, "loss": 0.0511, "step": 3226 }, { "epoch": 0.71, "grad_norm": 0.2285235476954415, "learning_rate": 8.258112289721134e-06, "loss": 0.021, "step": 3227 }, { "epoch": 0.71, "grad_norm": 0.3376012235303488, "learning_rate": 8.246597248720756e-06, "loss": 0.0408, "step": 3228 }, { "epoch": 0.71, "grad_norm": 0.2039304225668567, "learning_rate": 8.23508815618955e-06, "loss": 0.0212, "step": 3229 }, { "epoch": 0.71, "grad_norm": 0.357302119303284, "learning_rate": 8.22358501795235e-06, "loss": 0.0481, "step": 3230 }, { "epoch": 0.71, "grad_norm": 0.31132384935073115, "learning_rate": 8.212087839830968e-06, "loss": 0.0267, "step": 3231 }, { "epoch": 0.71, "grad_norm": 0.2606308660144364, "learning_rate": 8.200596627644187e-06, "loss": 0.0324, "step": 3232 }, { "epoch": 0.71, "grad_norm": 0.30183494841210046, "learning_rate": 8.189111387207782e-06, "loss": 0.0378, "step": 3233 }, { "epoch": 0.71, "grad_norm": 0.2965052083452161, "learning_rate": 8.177632124334513e-06, "loss": 0.05, "step": 3234 }, { "epoch": 0.71, "grad_norm": 0.3487436348153068, "learning_rate": 8.16615884483409e-06, "loss": 0.0393, "step": 3235 }, { "epoch": 0.71, "grad_norm": 0.24122902188482195, "learning_rate": 8.154691554513228e-06, "loss": 0.0295, "step": 3236 }, { "epoch": 0.71, "grad_norm": 0.3403639155904013, "learning_rate": 8.143230259175574e-06, "loss": 0.0389, "step": 3237 }, { "epoch": 0.71, "grad_norm": 0.2303562859744233, "learning_rate": 8.13177496462177e-06, "loss": 0.0251, "step": 3238 }, { "epoch": 0.71, "grad_norm": 0.28828770958504507, "learning_rate": 8.120325676649416e-06, "loss": 0.0324, "step": 3239 }, { "epoch": 0.71, "grad_norm": 0.3063426422594093, "learning_rate": 8.108882401053055e-06, "loss": 0.0243, "step": 3240 }, { "epoch": 0.71, "grad_norm": 0.31969377026789886, "learning_rate": 8.09744514362421e-06, "loss": 0.0453, "step": 3241 }, { "epoch": 0.71, "grad_norm": 0.28191090126797036, "learning_rate": 8.086013910151334e-06, "loss": 0.0288, "step": 3242 }, { "epoch": 0.71, "grad_norm": 0.2359073376480803, "learning_rate": 8.07458870641986e-06, "loss": 0.0261, "step": 3243 }, { "epoch": 0.71, "grad_norm": 0.31954006938375107, "learning_rate": 8.063169538212139e-06, "loss": 0.0395, "step": 3244 }, { "epoch": 0.71, "grad_norm": 0.2573348880995303, "learning_rate": 8.051756411307494e-06, "loss": 0.0297, "step": 3245 }, { "epoch": 0.71, "grad_norm": 0.22732547837337838, "learning_rate": 8.040349331482167e-06, "loss": 0.0315, "step": 3246 }, { "epoch": 0.71, "grad_norm": 0.2483432531029029, "learning_rate": 8.028948304509356e-06, "loss": 0.0351, "step": 3247 }, { "epoch": 0.71, "grad_norm": 0.36573764989291757, "learning_rate": 8.017553336159192e-06, "loss": 0.0394, "step": 3248 }, { "epoch": 0.71, "grad_norm": 0.2565588816574629, "learning_rate": 8.006164432198747e-06, "loss": 0.0366, "step": 3249 }, { "epoch": 0.71, "grad_norm": 0.28825887887882923, "learning_rate": 7.994781598391995e-06, "loss": 0.0302, "step": 3250 }, { "epoch": 0.71, "grad_norm": 0.3507524907614378, "learning_rate": 7.983404840499882e-06, "loss": 0.0483, "step": 3251 }, { "epoch": 0.71, "grad_norm": 0.2725297711015889, "learning_rate": 7.972034164280231e-06, "loss": 0.0336, "step": 3252 }, { "epoch": 0.71, "grad_norm": 0.2762138156797559, "learning_rate": 7.96066957548783e-06, "loss": 0.0378, "step": 3253 }, { "epoch": 0.71, "grad_norm": 0.2705830355830464, "learning_rate": 7.949311079874352e-06, "loss": 0.0311, "step": 3254 }, { "epoch": 0.71, "grad_norm": 0.3045217601223277, "learning_rate": 7.937958683188407e-06, "loss": 0.0322, "step": 3255 }, { "epoch": 0.72, "grad_norm": 0.26781638181581713, "learning_rate": 7.926612391175516e-06, "loss": 0.0326, "step": 3256 }, { "epoch": 0.72, "grad_norm": 0.3282592589589826, "learning_rate": 7.915272209578112e-06, "loss": 0.0357, "step": 3257 }, { "epoch": 0.72, "grad_norm": 0.25635829358510154, "learning_rate": 7.903938144135515e-06, "loss": 0.0265, "step": 3258 }, { "epoch": 0.72, "grad_norm": 0.2292502081203452, "learning_rate": 7.892610200583979e-06, "loss": 0.0252, "step": 3259 }, { "epoch": 0.72, "grad_norm": 0.2563374980548708, "learning_rate": 7.881288384656634e-06, "loss": 0.0226, "step": 3260 }, { "epoch": 0.72, "grad_norm": 0.3228283910172808, "learning_rate": 7.869972702083532e-06, "loss": 0.0306, "step": 3261 }, { "epoch": 0.72, "grad_norm": 0.2937913662617634, "learning_rate": 7.8586631585916e-06, "loss": 0.0297, "step": 3262 }, { "epoch": 0.72, "grad_norm": 0.27880336936959554, "learning_rate": 7.847359759904675e-06, "loss": 0.0303, "step": 3263 }, { "epoch": 0.72, "grad_norm": 0.2654205404899196, "learning_rate": 7.836062511743468e-06, "loss": 0.0291, "step": 3264 }, { "epoch": 0.72, "grad_norm": 0.35197320895624984, "learning_rate": 7.824771419825588e-06, "loss": 0.0528, "step": 3265 }, { "epoch": 0.72, "grad_norm": 0.3555739110978662, "learning_rate": 7.813486489865534e-06, "loss": 0.0394, "step": 3266 }, { "epoch": 0.72, "grad_norm": 0.27169094871366306, "learning_rate": 7.802207727574665e-06, "loss": 0.029, "step": 3267 }, { "epoch": 0.72, "grad_norm": 0.24856026153573255, "learning_rate": 7.790935138661246e-06, "loss": 0.0354, "step": 3268 }, { "epoch": 0.72, "grad_norm": 0.2589370304520512, "learning_rate": 7.779668728830389e-06, "loss": 0.0287, "step": 3269 }, { "epoch": 0.72, "grad_norm": 0.2636273240492979, "learning_rate": 7.768408503784108e-06, "loss": 0.0259, "step": 3270 }, { "epoch": 0.72, "grad_norm": 0.27505692087975914, "learning_rate": 7.757154469221257e-06, "loss": 0.035, "step": 3271 }, { "epoch": 0.72, "grad_norm": 0.26944460374515017, "learning_rate": 7.745906630837586e-06, "loss": 0.0297, "step": 3272 }, { "epoch": 0.72, "grad_norm": 0.2780305927222107, "learning_rate": 7.734664994325672e-06, "loss": 0.0289, "step": 3273 }, { "epoch": 0.72, "grad_norm": 0.33649667510576525, "learning_rate": 7.723429565375006e-06, "loss": 0.0413, "step": 3274 }, { "epoch": 0.72, "grad_norm": 0.3260934244253876, "learning_rate": 7.71220034967189e-06, "loss": 0.0342, "step": 3275 }, { "epoch": 0.72, "grad_norm": 0.34872161845458016, "learning_rate": 7.700977352899506e-06, "loss": 0.0428, "step": 3276 }, { "epoch": 0.72, "grad_norm": 0.29277538276010856, "learning_rate": 7.68976058073787e-06, "loss": 0.0297, "step": 3277 }, { "epoch": 0.72, "grad_norm": 0.27543608648700496, "learning_rate": 7.678550038863877e-06, "loss": 0.0332, "step": 3278 }, { "epoch": 0.72, "grad_norm": 0.2683624967035358, "learning_rate": 7.667345732951233e-06, "loss": 0.0341, "step": 3279 }, { "epoch": 0.72, "grad_norm": 0.29395466532988246, "learning_rate": 7.656147668670519e-06, "loss": 0.0345, "step": 3280 }, { "epoch": 0.72, "grad_norm": 0.2438548131709034, "learning_rate": 7.644955851689129e-06, "loss": 0.0278, "step": 3281 }, { "epoch": 0.72, "grad_norm": 0.23469584280035205, "learning_rate": 7.63377028767133e-06, "loss": 0.0252, "step": 3282 }, { "epoch": 0.72, "grad_norm": 0.26294336342626873, "learning_rate": 7.622590982278189e-06, "loss": 0.0297, "step": 3283 }, { "epoch": 0.72, "grad_norm": 0.2637056117197377, "learning_rate": 7.611417941167634e-06, "loss": 0.0414, "step": 3284 }, { "epoch": 0.72, "grad_norm": 0.24425409350483882, "learning_rate": 7.600251169994392e-06, "loss": 0.0275, "step": 3285 }, { "epoch": 0.72, "grad_norm": 0.3440145969738492, "learning_rate": 7.589090674410056e-06, "loss": 0.0407, "step": 3286 }, { "epoch": 0.72, "grad_norm": 0.32265773384916846, "learning_rate": 7.577936460063e-06, "loss": 0.0457, "step": 3287 }, { "epoch": 0.72, "grad_norm": 0.21486533663414784, "learning_rate": 7.566788532598457e-06, "loss": 0.0242, "step": 3288 }, { "epoch": 0.72, "grad_norm": 0.3426672398743925, "learning_rate": 7.555646897658448e-06, "loss": 0.0418, "step": 3289 }, { "epoch": 0.72, "grad_norm": 0.2978583663413471, "learning_rate": 7.544511560881829e-06, "loss": 0.0507, "step": 3290 }, { "epoch": 0.72, "grad_norm": 0.29657699033028645, "learning_rate": 7.533382527904263e-06, "loss": 0.0372, "step": 3291 }, { "epoch": 0.72, "grad_norm": 0.30881177467626963, "learning_rate": 7.5222598043582274e-06, "loss": 0.0374, "step": 3292 }, { "epoch": 0.72, "grad_norm": 0.26934463841005973, "learning_rate": 7.511143395872986e-06, "loss": 0.0333, "step": 3293 }, { "epoch": 0.72, "grad_norm": 0.28768593124767466, "learning_rate": 7.500033308074639e-06, "loss": 0.0252, "step": 3294 }, { "epoch": 0.72, "grad_norm": 0.3364082852313981, "learning_rate": 7.488929546586053e-06, "loss": 0.037, "step": 3295 }, { "epoch": 0.72, "grad_norm": 0.22541449997196128, "learning_rate": 7.477832117026924e-06, "loss": 0.0245, "step": 3296 }, { "epoch": 0.72, "grad_norm": 0.24872789984440977, "learning_rate": 7.466741025013715e-06, "loss": 0.0285, "step": 3297 }, { "epoch": 0.72, "grad_norm": 0.22554073458836682, "learning_rate": 7.455656276159713e-06, "loss": 0.025, "step": 3298 }, { "epoch": 0.72, "grad_norm": 0.29784291989654366, "learning_rate": 7.444577876074956e-06, "loss": 0.0274, "step": 3299 }, { "epoch": 0.72, "grad_norm": 0.2899906362057722, "learning_rate": 7.4335058303663056e-06, "loss": 0.0322, "step": 3300 }, { "epoch": 0.73, "grad_norm": 0.29269440839549676, "learning_rate": 7.422440144637395e-06, "loss": 0.0427, "step": 3301 }, { "epoch": 0.73, "grad_norm": 0.3068639585835522, "learning_rate": 7.411380824488621e-06, "loss": 0.0359, "step": 3302 }, { "epoch": 0.73, "grad_norm": 0.25979565324964654, "learning_rate": 7.400327875517188e-06, "loss": 0.03, "step": 3303 }, { "epoch": 0.73, "grad_norm": 0.31275196646339726, "learning_rate": 7.389281303317046e-06, "loss": 0.0399, "step": 3304 }, { "epoch": 0.73, "grad_norm": 0.247167598282481, "learning_rate": 7.37824111347895e-06, "loss": 0.0331, "step": 3305 }, { "epoch": 0.73, "grad_norm": 0.25559863425551843, "learning_rate": 7.367207311590392e-06, "loss": 0.0323, "step": 3306 }, { "epoch": 0.73, "grad_norm": 0.2560772877837243, "learning_rate": 7.356179903235654e-06, "loss": 0.0357, "step": 3307 }, { "epoch": 0.73, "grad_norm": 0.25963678248029315, "learning_rate": 7.345158893995774e-06, "loss": 0.0271, "step": 3308 }, { "epoch": 0.73, "grad_norm": 0.30030508869803, "learning_rate": 7.33414428944856e-06, "loss": 0.0376, "step": 3309 }, { "epoch": 0.73, "grad_norm": 0.3277322712263529, "learning_rate": 7.3231360951685574e-06, "loss": 0.0448, "step": 3310 }, { "epoch": 0.73, "grad_norm": 0.23406026688899606, "learning_rate": 7.312134316727093e-06, "loss": 0.0227, "step": 3311 }, { "epoch": 0.73, "grad_norm": 0.27825149220668854, "learning_rate": 7.301138959692225e-06, "loss": 0.0339, "step": 3312 }, { "epoch": 0.73, "grad_norm": 0.2623883975162797, "learning_rate": 7.290150029628777e-06, "loss": 0.0306, "step": 3313 }, { "epoch": 0.73, "grad_norm": 0.31569686298159066, "learning_rate": 7.2791675320983076e-06, "loss": 0.0406, "step": 3314 }, { "epoch": 0.73, "grad_norm": 0.2641515959588397, "learning_rate": 7.268191472659136e-06, "loss": 0.0256, "step": 3315 }, { "epoch": 0.73, "grad_norm": 0.26511555792736424, "learning_rate": 7.257221856866295e-06, "loss": 0.0291, "step": 3316 }, { "epoch": 0.73, "grad_norm": 0.25792506679317007, "learning_rate": 7.246258690271599e-06, "loss": 0.0268, "step": 3317 }, { "epoch": 0.73, "grad_norm": 0.5339755656834957, "learning_rate": 7.235301978423555e-06, "loss": 0.0309, "step": 3318 }, { "epoch": 0.73, "grad_norm": 0.2740130090122586, "learning_rate": 7.224351726867433e-06, "loss": 0.032, "step": 3319 }, { "epoch": 0.73, "grad_norm": 0.22585862262855183, "learning_rate": 7.213407941145214e-06, "loss": 0.0195, "step": 3320 }, { "epoch": 0.73, "grad_norm": 0.2225228462116293, "learning_rate": 7.202470626795626e-06, "loss": 0.0243, "step": 3321 }, { "epoch": 0.73, "grad_norm": 0.2747098746967936, "learning_rate": 7.191539789354096e-06, "loss": 0.0319, "step": 3322 }, { "epoch": 0.73, "grad_norm": 0.24156826594726816, "learning_rate": 7.180615434352802e-06, "loss": 0.0217, "step": 3323 }, { "epoch": 0.73, "grad_norm": 0.2107368857465454, "learning_rate": 7.1696975673206125e-06, "loss": 0.0284, "step": 3324 }, { "epoch": 0.73, "grad_norm": 0.2335866384795216, "learning_rate": 7.158786193783138e-06, "loss": 0.0234, "step": 3325 }, { "epoch": 0.73, "grad_norm": 0.2748073054489228, "learning_rate": 7.147881319262695e-06, "loss": 0.0379, "step": 3326 }, { "epoch": 0.73, "grad_norm": 0.2523174461142641, "learning_rate": 7.136982949278293e-06, "loss": 0.0279, "step": 3327 }, { "epoch": 0.73, "grad_norm": 0.25476723458425904, "learning_rate": 7.126091089345679e-06, "loss": 0.0313, "step": 3328 }, { "epoch": 0.73, "grad_norm": 0.26992223549141114, "learning_rate": 7.115205744977276e-06, "loss": 0.0309, "step": 3329 }, { "epoch": 0.73, "grad_norm": 0.28147717414007484, "learning_rate": 7.104326921682236e-06, "loss": 0.0256, "step": 3330 }, { "epoch": 0.73, "grad_norm": 0.2750101911217013, "learning_rate": 7.093454624966387e-06, "loss": 0.0323, "step": 3331 }, { "epoch": 0.73, "grad_norm": 0.2562045469910545, "learning_rate": 7.082588860332271e-06, "loss": 0.0309, "step": 3332 }, { "epoch": 0.73, "grad_norm": 0.27352463832409785, "learning_rate": 7.071729633279118e-06, "loss": 0.0288, "step": 3333 }, { "epoch": 0.73, "grad_norm": 0.2512556075898497, "learning_rate": 7.060876949302855e-06, "loss": 0.0236, "step": 3334 }, { "epoch": 0.73, "grad_norm": 0.2677385726256808, "learning_rate": 7.050030813896078e-06, "loss": 0.0278, "step": 3335 }, { "epoch": 0.73, "grad_norm": 0.28504892184507447, "learning_rate": 7.0391912325481e-06, "loss": 0.0325, "step": 3336 }, { "epoch": 0.73, "grad_norm": 0.36006169432920915, "learning_rate": 7.028358210744881e-06, "loss": 0.0477, "step": 3337 }, { "epoch": 0.73, "grad_norm": 0.21469547183354776, "learning_rate": 7.017531753969098e-06, "loss": 0.024, "step": 3338 }, { "epoch": 0.73, "grad_norm": 0.259636954657241, "learning_rate": 7.006711867700069e-06, "loss": 0.0307, "step": 3339 }, { "epoch": 0.73, "grad_norm": 0.24688085446478022, "learning_rate": 6.995898557413823e-06, "loss": 0.0296, "step": 3340 }, { "epoch": 0.73, "grad_norm": 0.2831645150009331, "learning_rate": 6.985091828583024e-06, "loss": 0.0363, "step": 3341 }, { "epoch": 0.73, "grad_norm": 0.278823985728811, "learning_rate": 6.974291686677035e-06, "loss": 0.0413, "step": 3342 }, { "epoch": 0.73, "grad_norm": 0.2746523572511557, "learning_rate": 6.96349813716187e-06, "loss": 0.0298, "step": 3343 }, { "epoch": 0.73, "grad_norm": 0.2719467808247288, "learning_rate": 6.952711185500223e-06, "loss": 0.0312, "step": 3344 }, { "epoch": 0.73, "grad_norm": 0.23624465193321353, "learning_rate": 6.941930837151416e-06, "loss": 0.0232, "step": 3345 }, { "epoch": 0.73, "grad_norm": 0.21903564332030248, "learning_rate": 6.931157097571468e-06, "loss": 0.0294, "step": 3346 }, { "epoch": 0.74, "grad_norm": 0.2691619388462333, "learning_rate": 6.920389972213017e-06, "loss": 0.0357, "step": 3347 }, { "epoch": 0.74, "grad_norm": 0.28274667722063346, "learning_rate": 6.909629466525389e-06, "loss": 0.0351, "step": 3348 }, { "epoch": 0.74, "grad_norm": 0.30060128612135306, "learning_rate": 6.898875585954527e-06, "loss": 0.0315, "step": 3349 }, { "epoch": 0.74, "grad_norm": 0.3506542341179284, "learning_rate": 6.888128335943041e-06, "loss": 0.046, "step": 3350 }, { "epoch": 0.74, "grad_norm": 0.3748067914417686, "learning_rate": 6.877387721930182e-06, "loss": 0.0425, "step": 3351 }, { "epoch": 0.74, "grad_norm": 0.3375190279153201, "learning_rate": 6.866653749351846e-06, "loss": 0.0399, "step": 3352 }, { "epoch": 0.74, "grad_norm": 0.29486608925001806, "learning_rate": 6.855926423640549e-06, "loss": 0.0347, "step": 3353 }, { "epoch": 0.74, "grad_norm": 0.280396105674734, "learning_rate": 6.84520575022547e-06, "loss": 0.0352, "step": 3354 }, { "epoch": 0.74, "grad_norm": 0.28398588460864693, "learning_rate": 6.8344917345323935e-06, "loss": 0.0439, "step": 3355 }, { "epoch": 0.74, "grad_norm": 0.31521055564088574, "learning_rate": 6.823784381983764e-06, "loss": 0.0381, "step": 3356 }, { "epoch": 0.74, "grad_norm": 0.2925692186413323, "learning_rate": 6.8130836979986236e-06, "loss": 0.0437, "step": 3357 }, { "epoch": 0.74, "grad_norm": 0.2670446214626016, "learning_rate": 6.802389687992666e-06, "loss": 0.0302, "step": 3358 }, { "epoch": 0.74, "grad_norm": 0.3065052462551992, "learning_rate": 6.791702357378185e-06, "loss": 0.0364, "step": 3359 }, { "epoch": 0.74, "grad_norm": 0.30816710228955385, "learning_rate": 6.781021711564107e-06, "loss": 0.0401, "step": 3360 }, { "epoch": 0.74, "grad_norm": 0.26630850813961854, "learning_rate": 6.770347755955982e-06, "loss": 0.0307, "step": 3361 }, { "epoch": 0.74, "grad_norm": 0.26040227214448936, "learning_rate": 6.7596804959559494e-06, "loss": 0.0314, "step": 3362 }, { "epoch": 0.74, "grad_norm": 0.23877831503346528, "learning_rate": 6.749019936962791e-06, "loss": 0.0354, "step": 3363 }, { "epoch": 0.74, "grad_norm": 0.2416854392082955, "learning_rate": 6.7383660843718635e-06, "loss": 0.0322, "step": 3364 }, { "epoch": 0.74, "grad_norm": 0.3053072470737272, "learning_rate": 6.727718943575161e-06, "loss": 0.0345, "step": 3365 }, { "epoch": 0.74, "grad_norm": 0.2022255644424565, "learning_rate": 6.717078519961257e-06, "loss": 0.0174, "step": 3366 }, { "epoch": 0.74, "grad_norm": 0.2389068286781463, "learning_rate": 6.706444818915345e-06, "loss": 0.024, "step": 3367 }, { "epoch": 0.74, "grad_norm": 0.30114718879034874, "learning_rate": 6.695817845819188e-06, "loss": 0.0386, "step": 3368 }, { "epoch": 0.74, "grad_norm": 0.2640765581395411, "learning_rate": 6.68519760605119e-06, "loss": 0.0272, "step": 3369 }, { "epoch": 0.74, "grad_norm": 0.254462659503167, "learning_rate": 6.674584104986295e-06, "loss": 0.0303, "step": 3370 }, { "epoch": 0.74, "grad_norm": 0.353073391476766, "learning_rate": 6.66397734799608e-06, "loss": 0.0402, "step": 3371 }, { "epoch": 0.74, "grad_norm": 0.2137880832225216, "learning_rate": 6.653377340448673e-06, "loss": 0.0215, "step": 3372 }, { "epoch": 0.74, "grad_norm": 0.2648998909920871, "learning_rate": 6.642784087708814e-06, "loss": 0.0256, "step": 3373 }, { "epoch": 0.74, "grad_norm": 0.2506035433595592, "learning_rate": 6.6321975951378034e-06, "loss": 0.0279, "step": 3374 }, { "epoch": 0.74, "grad_norm": 0.22480728717961235, "learning_rate": 6.621617868093544e-06, "loss": 0.0218, "step": 3375 }, { "epoch": 0.74, "grad_norm": 0.2735224591828485, "learning_rate": 6.611044911930477e-06, "loss": 0.0343, "step": 3376 }, { "epoch": 0.74, "grad_norm": 0.23876396906016184, "learning_rate": 6.6004787319996714e-06, "loss": 0.0291, "step": 3377 }, { "epoch": 0.74, "grad_norm": 0.3272250071635811, "learning_rate": 6.589919333648711e-06, "loss": 0.0406, "step": 3378 }, { "epoch": 0.74, "grad_norm": 0.2897342145880577, "learning_rate": 6.579366722221789e-06, "loss": 0.0368, "step": 3379 }, { "epoch": 0.74, "grad_norm": 0.3224209613385021, "learning_rate": 6.568820903059632e-06, "loss": 0.0395, "step": 3380 }, { "epoch": 0.74, "grad_norm": 0.26667334739442633, "learning_rate": 6.558281881499556e-06, "loss": 0.0408, "step": 3381 }, { "epoch": 0.74, "grad_norm": 0.2810547733903214, "learning_rate": 6.547749662875411e-06, "loss": 0.0356, "step": 3382 }, { "epoch": 0.74, "grad_norm": 0.27188158136930185, "learning_rate": 6.537224252517633e-06, "loss": 0.0232, "step": 3383 }, { "epoch": 0.74, "grad_norm": 0.24156934543400135, "learning_rate": 6.526705655753183e-06, "loss": 0.034, "step": 3384 }, { "epoch": 0.74, "grad_norm": 0.23455296753242358, "learning_rate": 6.516193877905592e-06, "loss": 0.029, "step": 3385 }, { "epoch": 0.74, "grad_norm": 0.3370798117890245, "learning_rate": 6.505688924294944e-06, "loss": 0.0356, "step": 3386 }, { "epoch": 0.74, "grad_norm": 0.2723097159370534, "learning_rate": 6.495190800237845e-06, "loss": 0.0322, "step": 3387 }, { "epoch": 0.74, "grad_norm": 0.23418612817301912, "learning_rate": 6.484699511047474e-06, "loss": 0.0225, "step": 3388 }, { "epoch": 0.74, "grad_norm": 0.2419803256251913, "learning_rate": 6.474215062033527e-06, "loss": 0.0231, "step": 3389 }, { "epoch": 0.74, "grad_norm": 0.30069783527957566, "learning_rate": 6.463737458502255e-06, "loss": 0.0358, "step": 3390 }, { "epoch": 0.74, "grad_norm": 0.26570232051378745, "learning_rate": 6.453266705756427e-06, "loss": 0.03, "step": 3391 }, { "epoch": 0.75, "grad_norm": 0.2755429272846124, "learning_rate": 6.442802809095363e-06, "loss": 0.0344, "step": 3392 }, { "epoch": 0.75, "grad_norm": 0.2552849093532269, "learning_rate": 6.4323457738149034e-06, "loss": 0.0264, "step": 3393 }, { "epoch": 0.75, "grad_norm": 0.3070690782068707, "learning_rate": 6.421895605207427e-06, "loss": 0.0228, "step": 3394 }, { "epoch": 0.75, "grad_norm": 0.21608959849101006, "learning_rate": 6.41145230856181e-06, "loss": 0.0219, "step": 3395 }, { "epoch": 0.75, "grad_norm": 0.3256684718703453, "learning_rate": 6.401015889163489e-06, "loss": 0.0457, "step": 3396 }, { "epoch": 0.75, "grad_norm": 0.27969183039376333, "learning_rate": 6.3905863522943786e-06, "loss": 0.0294, "step": 3397 }, { "epoch": 0.75, "grad_norm": 0.32161444562324737, "learning_rate": 6.380163703232953e-06, "loss": 0.0523, "step": 3398 }, { "epoch": 0.75, "grad_norm": 0.30481554666388455, "learning_rate": 6.369747947254159e-06, "loss": 0.038, "step": 3399 }, { "epoch": 0.75, "grad_norm": 0.2658260510208518, "learning_rate": 6.35933908962949e-06, "loss": 0.0303, "step": 3400 }, { "epoch": 0.75, "grad_norm": 0.2595659506770729, "learning_rate": 6.348937135626922e-06, "loss": 0.0311, "step": 3401 }, { "epoch": 0.75, "grad_norm": 0.20705685367249238, "learning_rate": 6.338542090510951e-06, "loss": 0.022, "step": 3402 }, { "epoch": 0.75, "grad_norm": 0.28922769620260047, "learning_rate": 6.328153959542573e-06, "loss": 0.0294, "step": 3403 }, { "epoch": 0.75, "grad_norm": 0.32985597391757204, "learning_rate": 6.3177727479792914e-06, "loss": 0.0394, "step": 3404 }, { "epoch": 0.75, "grad_norm": 0.23242230775937095, "learning_rate": 6.307398461075091e-06, "loss": 0.0285, "step": 3405 }, { "epoch": 0.75, "grad_norm": 0.27369074674458943, "learning_rate": 6.297031104080471e-06, "loss": 0.0238, "step": 3406 }, { "epoch": 0.75, "grad_norm": 0.262031994012767, "learning_rate": 6.286670682242404e-06, "loss": 0.0308, "step": 3407 }, { "epoch": 0.75, "grad_norm": 0.26526526326030525, "learning_rate": 6.276317200804376e-06, "loss": 0.0274, "step": 3408 }, { "epoch": 0.75, "grad_norm": 0.3540060319507838, "learning_rate": 6.265970665006334e-06, "loss": 0.0469, "step": 3409 }, { "epoch": 0.75, "grad_norm": 0.2258842114700864, "learning_rate": 6.255631080084735e-06, "loss": 0.027, "step": 3410 }, { "epoch": 0.75, "grad_norm": 0.31748298824722165, "learning_rate": 6.245298451272486e-06, "loss": 0.0358, "step": 3411 }, { "epoch": 0.75, "grad_norm": 0.2888240733028864, "learning_rate": 6.234972783799023e-06, "loss": 0.0362, "step": 3412 }, { "epoch": 0.75, "grad_norm": 0.2723556916884701, "learning_rate": 6.224654082890207e-06, "loss": 0.03, "step": 3413 }, { "epoch": 0.75, "grad_norm": 0.2978367536021447, "learning_rate": 6.214342353768412e-06, "loss": 0.0324, "step": 3414 }, { "epoch": 0.75, "grad_norm": 0.29954612094059346, "learning_rate": 6.2040376016524506e-06, "loss": 0.0492, "step": 3415 }, { "epoch": 0.75, "grad_norm": 0.30504933904023807, "learning_rate": 6.193739831757637e-06, "loss": 0.0362, "step": 3416 }, { "epoch": 0.75, "grad_norm": 0.3184118403165737, "learning_rate": 6.183449049295722e-06, "loss": 0.0334, "step": 3417 }, { "epoch": 0.75, "grad_norm": 0.2485101608567878, "learning_rate": 6.1731652594749465e-06, "loss": 0.0288, "step": 3418 }, { "epoch": 0.75, "grad_norm": 0.23786661938343767, "learning_rate": 6.162888467499988e-06, "loss": 0.0282, "step": 3419 }, { "epoch": 0.75, "grad_norm": 0.29388923997343264, "learning_rate": 6.152618678571996e-06, "loss": 0.0257, "step": 3420 }, { "epoch": 0.75, "grad_norm": 0.414291264659481, "learning_rate": 6.1423558978885836e-06, "loss": 0.0706, "step": 3421 }, { "epoch": 0.75, "grad_norm": 0.24517493308692564, "learning_rate": 6.1321001306437946e-06, "loss": 0.0268, "step": 3422 }, { "epoch": 0.75, "grad_norm": 0.27522385058717846, "learning_rate": 6.121851382028146e-06, "loss": 0.0316, "step": 3423 }, { "epoch": 0.75, "grad_norm": 0.2731825730117969, "learning_rate": 6.111609657228581e-06, "loss": 0.04, "step": 3424 }, { "epoch": 0.75, "grad_norm": 0.3421778054801919, "learning_rate": 6.101374961428512e-06, "loss": 0.0295, "step": 3425 }, { "epoch": 0.75, "grad_norm": 0.2716664314464027, "learning_rate": 6.091147299807769e-06, "loss": 0.0325, "step": 3426 }, { "epoch": 0.75, "grad_norm": 0.37625543729839095, "learning_rate": 6.080926677542646e-06, "loss": 0.0461, "step": 3427 }, { "epoch": 0.75, "grad_norm": 0.2710786161128589, "learning_rate": 6.070713099805845e-06, "loss": 0.0327, "step": 3428 }, { "epoch": 0.75, "grad_norm": 0.2750153250108817, "learning_rate": 6.0605065717665445e-06, "loss": 0.0272, "step": 3429 }, { "epoch": 0.75, "grad_norm": 0.27125907715758363, "learning_rate": 6.050307098590311e-06, "loss": 0.0327, "step": 3430 }, { "epoch": 0.75, "grad_norm": 0.24568234978981157, "learning_rate": 6.040114685439175e-06, "loss": 0.0278, "step": 3431 }, { "epoch": 0.75, "grad_norm": 0.26473579513348117, "learning_rate": 6.029929337471565e-06, "loss": 0.0251, "step": 3432 }, { "epoch": 0.75, "grad_norm": 0.2821258180316564, "learning_rate": 6.019751059842362e-06, "loss": 0.0373, "step": 3433 }, { "epoch": 0.75, "grad_norm": 0.2629471744972731, "learning_rate": 6.009579857702843e-06, "loss": 0.0273, "step": 3434 }, { "epoch": 0.75, "grad_norm": 0.2585106845912787, "learning_rate": 5.999415736200724e-06, "loss": 0.0311, "step": 3435 }, { "epoch": 0.75, "grad_norm": 0.3633553009034456, "learning_rate": 5.98925870048012e-06, "loss": 0.0366, "step": 3436 }, { "epoch": 0.75, "grad_norm": 0.35135252298030384, "learning_rate": 5.979108755681575e-06, "loss": 0.0429, "step": 3437 }, { "epoch": 0.76, "grad_norm": 0.2768956715291986, "learning_rate": 5.968965906942039e-06, "loss": 0.0386, "step": 3438 }, { "epoch": 0.76, "grad_norm": 0.2373179757750094, "learning_rate": 5.958830159394875e-06, "loss": 0.0292, "step": 3439 }, { "epoch": 0.76, "grad_norm": 0.27913695872408384, "learning_rate": 5.948701518169835e-06, "loss": 0.0353, "step": 3440 }, { "epoch": 0.76, "grad_norm": 0.26605618765585465, "learning_rate": 5.938579988393099e-06, "loss": 0.0366, "step": 3441 }, { "epoch": 0.76, "grad_norm": 0.23421451319195605, "learning_rate": 5.928465575187221e-06, "loss": 0.0304, "step": 3442 }, { "epoch": 0.76, "grad_norm": 0.2615844894609264, "learning_rate": 5.918358283671182e-06, "loss": 0.0386, "step": 3443 }, { "epoch": 0.76, "grad_norm": 0.27336979436518566, "learning_rate": 5.90825811896033e-06, "loss": 0.0379, "step": 3444 }, { "epoch": 0.76, "grad_norm": 0.365486384468251, "learning_rate": 5.89816508616643e-06, "loss": 0.0427, "step": 3445 }, { "epoch": 0.76, "grad_norm": 0.29642779145819365, "learning_rate": 5.888079190397628e-06, "loss": 0.0477, "step": 3446 }, { "epoch": 0.76, "grad_norm": 0.22215723703476092, "learning_rate": 5.878000436758453e-06, "loss": 0.0203, "step": 3447 }, { "epoch": 0.76, "grad_norm": 0.21405507026085965, "learning_rate": 5.86792883034983e-06, "loss": 0.0232, "step": 3448 }, { "epoch": 0.76, "grad_norm": 0.2628546164396591, "learning_rate": 5.857864376269051e-06, "loss": 0.0271, "step": 3449 }, { "epoch": 0.76, "grad_norm": 0.2734873453757123, "learning_rate": 5.847807079609804e-06, "loss": 0.0305, "step": 3450 }, { "epoch": 0.76, "grad_norm": 0.22837991841975297, "learning_rate": 5.837756945462154e-06, "loss": 0.0294, "step": 3451 }, { "epoch": 0.76, "grad_norm": 0.2840929849505443, "learning_rate": 5.827713978912524e-06, "loss": 0.0252, "step": 3452 }, { "epoch": 0.76, "grad_norm": 0.21994348626473945, "learning_rate": 5.817678185043733e-06, "loss": 0.0356, "step": 3453 }, { "epoch": 0.76, "grad_norm": 0.2650177690558583, "learning_rate": 5.807649568934945e-06, "loss": 0.0319, "step": 3454 }, { "epoch": 0.76, "grad_norm": 0.2915132735135509, "learning_rate": 5.79762813566171e-06, "loss": 0.0342, "step": 3455 }, { "epoch": 0.76, "grad_norm": 0.2420591891239867, "learning_rate": 5.7876138902959445e-06, "loss": 0.0258, "step": 3456 }, { "epoch": 0.76, "grad_norm": 0.272814886513111, "learning_rate": 5.777606837905905e-06, "loss": 0.0363, "step": 3457 }, { "epoch": 0.76, "grad_norm": 0.22757156696554626, "learning_rate": 5.767606983556237e-06, "loss": 0.0327, "step": 3458 }, { "epoch": 0.76, "grad_norm": 0.25811792835034714, "learning_rate": 5.757614332307912e-06, "loss": 0.025, "step": 3459 }, { "epoch": 0.76, "grad_norm": 0.2748675417510592, "learning_rate": 5.7476288892182905e-06, "loss": 0.0406, "step": 3460 }, { "epoch": 0.76, "grad_norm": 0.29713312020558075, "learning_rate": 5.737650659341048e-06, "loss": 0.0299, "step": 3461 }, { "epoch": 0.76, "grad_norm": 0.27745443472639314, "learning_rate": 5.7276796477262365e-06, "loss": 0.0344, "step": 3462 }, { "epoch": 0.76, "grad_norm": 0.17985481267953204, "learning_rate": 5.717715859420246e-06, "loss": 0.018, "step": 3463 }, { "epoch": 0.76, "grad_norm": 0.19989098468629776, "learning_rate": 5.707759299465816e-06, "loss": 0.0179, "step": 3464 }, { "epoch": 0.76, "grad_norm": 0.2669343513181597, "learning_rate": 5.6978099729020105e-06, "loss": 0.0304, "step": 3465 }, { "epoch": 0.76, "grad_norm": 0.21945043379538026, "learning_rate": 5.68786788476426e-06, "loss": 0.0249, "step": 3466 }, { "epoch": 0.76, "grad_norm": 0.27184858522676725, "learning_rate": 5.6779330400843e-06, "loss": 0.0297, "step": 3467 }, { "epoch": 0.76, "grad_norm": 0.26262306439441285, "learning_rate": 5.66800544389023e-06, "loss": 0.023, "step": 3468 }, { "epoch": 0.76, "grad_norm": 0.32370174504924254, "learning_rate": 5.658085101206456e-06, "loss": 0.0523, "step": 3469 }, { "epoch": 0.76, "grad_norm": 0.27778446749075747, "learning_rate": 5.648172017053737e-06, "loss": 0.0287, "step": 3470 }, { "epoch": 0.76, "grad_norm": 0.4630715543716684, "learning_rate": 5.638266196449123e-06, "loss": 0.0579, "step": 3471 }, { "epoch": 0.76, "grad_norm": 0.2009417597347354, "learning_rate": 5.628367644406039e-06, "loss": 0.0205, "step": 3472 }, { "epoch": 0.76, "grad_norm": 0.3200980375658781, "learning_rate": 5.618476365934184e-06, "loss": 0.0349, "step": 3473 }, { "epoch": 0.76, "grad_norm": 0.28005963986738336, "learning_rate": 5.608592366039607e-06, "loss": 0.0403, "step": 3474 }, { "epoch": 0.76, "grad_norm": 0.25529178241170714, "learning_rate": 5.598715649724647e-06, "loss": 0.0283, "step": 3475 }, { "epoch": 0.76, "grad_norm": 0.2284862303779472, "learning_rate": 5.588846221987982e-06, "loss": 0.0334, "step": 3476 }, { "epoch": 0.76, "grad_norm": 0.23788867724142732, "learning_rate": 5.578984087824581e-06, "loss": 0.0219, "step": 3477 }, { "epoch": 0.76, "grad_norm": 0.2614670582312282, "learning_rate": 5.569129252225745e-06, "loss": 0.0258, "step": 3478 }, { "epoch": 0.76, "grad_norm": 0.24618739662400665, "learning_rate": 5.559281720179046e-06, "loss": 0.0221, "step": 3479 }, { "epoch": 0.76, "grad_norm": 0.2834080185115215, "learning_rate": 5.549441496668393e-06, "loss": 0.0385, "step": 3480 }, { "epoch": 0.76, "grad_norm": 0.20664769757287502, "learning_rate": 5.539608586673988e-06, "loss": 0.0302, "step": 3481 }, { "epoch": 0.76, "grad_norm": 0.25517500283450656, "learning_rate": 5.529782995172315e-06, "loss": 0.0257, "step": 3482 }, { "epoch": 0.76, "grad_norm": 0.2605643648285357, "learning_rate": 5.519964727136178e-06, "loss": 0.0308, "step": 3483 }, { "epoch": 0.77, "grad_norm": 0.2539689646809514, "learning_rate": 5.510153787534651e-06, "loss": 0.0201, "step": 3484 }, { "epoch": 0.77, "grad_norm": 0.2587773590320521, "learning_rate": 5.500350181333121e-06, "loss": 0.034, "step": 3485 }, { "epoch": 0.77, "grad_norm": 0.24054653541860618, "learning_rate": 5.490553913493242e-06, "loss": 0.0216, "step": 3486 }, { "epoch": 0.77, "grad_norm": 0.33018462590373493, "learning_rate": 5.48076498897298e-06, "loss": 0.0315, "step": 3487 }, { "epoch": 0.77, "grad_norm": 0.27005919291089847, "learning_rate": 5.470983412726547e-06, "loss": 0.0232, "step": 3488 }, { "epoch": 0.77, "grad_norm": 0.20541222015878996, "learning_rate": 5.461209189704486e-06, "loss": 0.0172, "step": 3489 }, { "epoch": 0.77, "grad_norm": 0.23395304844311773, "learning_rate": 5.451442324853571e-06, "loss": 0.0257, "step": 3490 }, { "epoch": 0.77, "grad_norm": 0.24403252306408385, "learning_rate": 5.441682823116887e-06, "loss": 0.0237, "step": 3491 }, { "epoch": 0.77, "grad_norm": 0.2592484625124243, "learning_rate": 5.431930689433762e-06, "loss": 0.0246, "step": 3492 }, { "epoch": 0.77, "grad_norm": 0.26405085419475327, "learning_rate": 5.422185928739827e-06, "loss": 0.0247, "step": 3493 }, { "epoch": 0.77, "grad_norm": 0.23156080301978307, "learning_rate": 5.4124485459669485e-06, "loss": 0.0264, "step": 3494 }, { "epoch": 0.77, "grad_norm": 0.2927319905143484, "learning_rate": 5.402718546043293e-06, "loss": 0.0275, "step": 3495 }, { "epoch": 0.77, "grad_norm": 0.3207400538826871, "learning_rate": 5.392995933893255e-06, "loss": 0.0259, "step": 3496 }, { "epoch": 0.77, "grad_norm": 0.28107603308849083, "learning_rate": 5.383280714437518e-06, "loss": 0.0252, "step": 3497 }, { "epoch": 0.77, "grad_norm": 0.28216600923457863, "learning_rate": 5.373572892593013e-06, "loss": 0.0379, "step": 3498 }, { "epoch": 0.77, "grad_norm": 0.22892613503596634, "learning_rate": 5.363872473272935e-06, "loss": 0.0222, "step": 3499 }, { "epoch": 0.77, "grad_norm": 0.24876891199660664, "learning_rate": 5.354179461386712e-06, "loss": 0.039, "step": 3500 }, { "epoch": 0.77, "grad_norm": 0.38444579333155693, "learning_rate": 5.3444938618400524e-06, "loss": 0.0455, "step": 3501 }, { "epoch": 0.77, "grad_norm": 0.26324160755669274, "learning_rate": 5.334815679534882e-06, "loss": 0.0241, "step": 3502 }, { "epoch": 0.77, "grad_norm": 0.24986183550745963, "learning_rate": 5.325144919369398e-06, "loss": 0.0287, "step": 3503 }, { "epoch": 0.77, "grad_norm": 0.2715735803501177, "learning_rate": 5.315481586238025e-06, "loss": 0.0253, "step": 3504 }, { "epoch": 0.77, "grad_norm": 0.2895472817426594, "learning_rate": 5.305825685031445e-06, "loss": 0.0296, "step": 3505 }, { "epoch": 0.77, "grad_norm": 0.25753565576129356, "learning_rate": 5.296177220636556e-06, "loss": 0.0263, "step": 3506 }, { "epoch": 0.77, "grad_norm": 0.25278871061247116, "learning_rate": 5.286536197936512e-06, "loss": 0.0349, "step": 3507 }, { "epoch": 0.77, "grad_norm": 0.3275757120245091, "learning_rate": 5.276902621810691e-06, "loss": 0.0298, "step": 3508 }, { "epoch": 0.77, "grad_norm": 0.2513118834223532, "learning_rate": 5.267276497134715e-06, "loss": 0.0292, "step": 3509 }, { "epoch": 0.77, "grad_norm": 0.2804987463676603, "learning_rate": 5.257657828780409e-06, "loss": 0.0289, "step": 3510 }, { "epoch": 0.77, "grad_norm": 0.2999060270032961, "learning_rate": 5.2480466216158565e-06, "loss": 0.0376, "step": 3511 }, { "epoch": 0.77, "grad_norm": 0.2903810148621985, "learning_rate": 5.2384428805053325e-06, "loss": 0.0302, "step": 3512 }, { "epoch": 0.77, "grad_norm": 0.3013708108225558, "learning_rate": 5.228846610309359e-06, "loss": 0.0365, "step": 3513 }, { "epoch": 0.77, "grad_norm": 0.3061993089879895, "learning_rate": 5.219257815884662e-06, "loss": 0.0314, "step": 3514 }, { "epoch": 0.77, "grad_norm": 0.3321456487709721, "learning_rate": 5.209676502084191e-06, "loss": 0.0341, "step": 3515 }, { "epoch": 0.77, "grad_norm": 0.2771528150926492, "learning_rate": 5.200102673757115e-06, "loss": 0.033, "step": 3516 }, { "epoch": 0.77, "grad_norm": 0.2362254842017043, "learning_rate": 5.190536335748792e-06, "loss": 0.0235, "step": 3517 }, { "epoch": 0.77, "grad_norm": 0.32312161596950045, "learning_rate": 5.180977492900823e-06, "loss": 0.0386, "step": 3518 }, { "epoch": 0.77, "grad_norm": 0.2725945558677096, "learning_rate": 5.171426150050977e-06, "loss": 0.0316, "step": 3519 }, { "epoch": 0.77, "grad_norm": 0.35931874954513726, "learning_rate": 5.161882312033264e-06, "loss": 0.0579, "step": 3520 }, { "epoch": 0.77, "grad_norm": 0.23317933272713304, "learning_rate": 5.152345983677866e-06, "loss": 0.0182, "step": 3521 }, { "epoch": 0.77, "grad_norm": 0.24431754758865426, "learning_rate": 5.142817169811189e-06, "loss": 0.0259, "step": 3522 }, { "epoch": 0.77, "grad_norm": 0.23309584279315684, "learning_rate": 5.133295875255808e-06, "loss": 0.0228, "step": 3523 }, { "epoch": 0.77, "grad_norm": 0.24801257038945074, "learning_rate": 5.1237821048305305e-06, "loss": 0.0295, "step": 3524 }, { "epoch": 0.77, "grad_norm": 0.22829640298379072, "learning_rate": 5.114275863350313e-06, "loss": 0.0222, "step": 3525 }, { "epoch": 0.77, "grad_norm": 0.29244651024135737, "learning_rate": 5.104777155626341e-06, "loss": 0.0262, "step": 3526 }, { "epoch": 0.77, "grad_norm": 0.2948691845239951, "learning_rate": 5.095285986465952e-06, "loss": 0.0307, "step": 3527 }, { "epoch": 0.77, "grad_norm": 0.25266239963235654, "learning_rate": 5.085802360672701e-06, "loss": 0.0258, "step": 3528 }, { "epoch": 0.78, "grad_norm": 0.19868677769673954, "learning_rate": 5.076326283046291e-06, "loss": 0.02, "step": 3529 }, { "epoch": 0.78, "grad_norm": 0.361861026704966, "learning_rate": 5.066857758382642e-06, "loss": 0.0369, "step": 3530 }, { "epoch": 0.78, "grad_norm": 0.2777206385309938, "learning_rate": 5.057396791473807e-06, "loss": 0.027, "step": 3531 }, { "epoch": 0.78, "grad_norm": 0.24217204844498547, "learning_rate": 5.047943387108072e-06, "loss": 0.0343, "step": 3532 }, { "epoch": 0.78, "grad_norm": 0.27128613046774036, "learning_rate": 5.038497550069836e-06, "loss": 0.0323, "step": 3533 }, { "epoch": 0.78, "grad_norm": 0.25555773418961775, "learning_rate": 5.029059285139715e-06, "loss": 0.0259, "step": 3534 }, { "epoch": 0.78, "grad_norm": 0.26456868282566975, "learning_rate": 5.019628597094455e-06, "loss": 0.0216, "step": 3535 }, { "epoch": 0.78, "grad_norm": 0.1992101760964003, "learning_rate": 5.010205490706998e-06, "loss": 0.0241, "step": 3536 }, { "epoch": 0.78, "grad_norm": 0.2213759420941348, "learning_rate": 5.000789970746427e-06, "loss": 0.0179, "step": 3537 }, { "epoch": 0.78, "grad_norm": 0.3218004163420789, "learning_rate": 4.9913820419780035e-06, "loss": 0.0291, "step": 3538 }, { "epoch": 0.78, "grad_norm": 0.24229933094903675, "learning_rate": 4.981981709163126e-06, "loss": 0.025, "step": 3539 }, { "epoch": 0.78, "grad_norm": 0.26500260138134524, "learning_rate": 4.972588977059369e-06, "loss": 0.0312, "step": 3540 }, { "epoch": 0.78, "grad_norm": 0.2412525558089232, "learning_rate": 4.963203850420455e-06, "loss": 0.0268, "step": 3541 }, { "epoch": 0.78, "grad_norm": 0.2521682973733539, "learning_rate": 4.953826333996243e-06, "loss": 0.0292, "step": 3542 }, { "epoch": 0.78, "grad_norm": 0.2608333813928269, "learning_rate": 4.944456432532765e-06, "loss": 0.0319, "step": 3543 }, { "epoch": 0.78, "grad_norm": 0.2768863210021235, "learning_rate": 4.93509415077217e-06, "loss": 0.0426, "step": 3544 }, { "epoch": 0.78, "grad_norm": 0.28003648051964447, "learning_rate": 4.925739493452783e-06, "loss": 0.0377, "step": 3545 }, { "epoch": 0.78, "grad_norm": 0.2550543284744202, "learning_rate": 4.916392465309037e-06, "loss": 0.0293, "step": 3546 }, { "epoch": 0.78, "grad_norm": 0.35349700415028396, "learning_rate": 4.907053071071535e-06, "loss": 0.0384, "step": 3547 }, { "epoch": 0.78, "grad_norm": 0.2566989489840341, "learning_rate": 4.89772131546699e-06, "loss": 0.0259, "step": 3548 }, { "epoch": 0.78, "grad_norm": 0.21212567254501036, "learning_rate": 4.888397203218265e-06, "loss": 0.0248, "step": 3549 }, { "epoch": 0.78, "grad_norm": 0.28878337683718536, "learning_rate": 4.879080739044351e-06, "loss": 0.0309, "step": 3550 }, { "epoch": 0.78, "grad_norm": 0.24755283025469316, "learning_rate": 4.869771927660371e-06, "loss": 0.0331, "step": 3551 }, { "epoch": 0.78, "grad_norm": 0.21122812630478413, "learning_rate": 4.860470773777566e-06, "loss": 0.0178, "step": 3552 }, { "epoch": 0.78, "grad_norm": 0.2693034328794336, "learning_rate": 4.851177282103312e-06, "loss": 0.0347, "step": 3553 }, { "epoch": 0.78, "grad_norm": 0.23671622762815486, "learning_rate": 4.841891457341095e-06, "loss": 0.0317, "step": 3554 }, { "epoch": 0.78, "grad_norm": 0.26564499835858235, "learning_rate": 4.832613304190537e-06, "loss": 0.0306, "step": 3555 }, { "epoch": 0.78, "grad_norm": 0.276596284408431, "learning_rate": 4.823342827347357e-06, "loss": 0.0291, "step": 3556 }, { "epoch": 0.78, "grad_norm": 0.2284591882249527, "learning_rate": 4.814080031503407e-06, "loss": 0.0222, "step": 3557 }, { "epoch": 0.78, "grad_norm": 0.288734481898206, "learning_rate": 4.804824921346645e-06, "loss": 0.0365, "step": 3558 }, { "epoch": 0.78, "grad_norm": 0.3201243964751489, "learning_rate": 4.795577501561144e-06, "loss": 0.0397, "step": 3559 }, { "epoch": 0.78, "grad_norm": 0.24722216014706502, "learning_rate": 4.786337776827066e-06, "loss": 0.0272, "step": 3560 }, { "epoch": 0.78, "grad_norm": 0.217336635706269, "learning_rate": 4.777105751820708e-06, "loss": 0.0205, "step": 3561 }, { "epoch": 0.78, "grad_norm": 0.22388667061618933, "learning_rate": 4.767881431214441e-06, "loss": 0.0247, "step": 3562 }, { "epoch": 0.78, "grad_norm": 0.2911109512169312, "learning_rate": 4.758664819676759e-06, "loss": 0.0424, "step": 3563 }, { "epoch": 0.78, "grad_norm": 0.2558790738042841, "learning_rate": 4.7494559218722395e-06, "loss": 0.0295, "step": 3564 }, { "epoch": 0.78, "grad_norm": 0.271038856225442, "learning_rate": 4.74025474246157e-06, "loss": 0.0316, "step": 3565 }, { "epoch": 0.78, "grad_norm": 0.21785685733395657, "learning_rate": 4.7310612861015125e-06, "loss": 0.0276, "step": 3566 }, { "epoch": 0.78, "grad_norm": 0.3025124505530959, "learning_rate": 4.7218755574449394e-06, "loss": 0.0348, "step": 3567 }, { "epoch": 0.78, "grad_norm": 0.2646960535885467, "learning_rate": 4.712697561140802e-06, "loss": 0.0415, "step": 3568 }, { "epoch": 0.78, "grad_norm": 0.2712372677535224, "learning_rate": 4.703527301834148e-06, "loss": 0.0301, "step": 3569 }, { "epoch": 0.78, "grad_norm": 0.3025306754676668, "learning_rate": 4.69436478416609e-06, "loss": 0.0361, "step": 3570 }, { "epoch": 0.78, "grad_norm": 0.2173128089024891, "learning_rate": 4.685210012773844e-06, "loss": 0.0255, "step": 3571 }, { "epoch": 0.78, "grad_norm": 0.279395026186865, "learning_rate": 4.676062992290686e-06, "loss": 0.0243, "step": 3572 }, { "epoch": 0.78, "grad_norm": 0.3091795467682432, "learning_rate": 4.666923727345991e-06, "loss": 0.0309, "step": 3573 }, { "epoch": 0.78, "grad_norm": 0.2500044405065996, "learning_rate": 4.657792222565185e-06, "loss": 0.0299, "step": 3574 }, { "epoch": 0.79, "grad_norm": 0.27663942010892373, "learning_rate": 4.6486684825697845e-06, "loss": 0.0426, "step": 3575 }, { "epoch": 0.79, "grad_norm": 0.32554082300480536, "learning_rate": 4.639552511977374e-06, "loss": 0.0415, "step": 3576 }, { "epoch": 0.79, "grad_norm": 0.23337846135799326, "learning_rate": 4.630444315401594e-06, "loss": 0.0297, "step": 3577 }, { "epoch": 0.79, "grad_norm": 0.21699514000752693, "learning_rate": 4.621343897452169e-06, "loss": 0.0246, "step": 3578 }, { "epoch": 0.79, "grad_norm": 0.2296995565273345, "learning_rate": 4.612251262734864e-06, "loss": 0.0282, "step": 3579 }, { "epoch": 0.79, "grad_norm": 0.2650624798273476, "learning_rate": 4.603166415851527e-06, "loss": 0.0381, "step": 3580 }, { "epoch": 0.79, "grad_norm": 0.24872530874554657, "learning_rate": 4.594089361400047e-06, "loss": 0.0313, "step": 3581 }, { "epoch": 0.79, "grad_norm": 0.22568069053377085, "learning_rate": 4.585020103974387e-06, "loss": 0.0187, "step": 3582 }, { "epoch": 0.79, "grad_norm": 0.2579961469440179, "learning_rate": 4.575958648164536e-06, "loss": 0.0306, "step": 3583 }, { "epoch": 0.79, "grad_norm": 0.25226387554943125, "learning_rate": 4.5669049985565735e-06, "loss": 0.0305, "step": 3584 }, { "epoch": 0.79, "grad_norm": 0.2301970755656676, "learning_rate": 4.5578591597325935e-06, "loss": 0.0235, "step": 3585 }, { "epoch": 0.79, "grad_norm": 0.273026429858453, "learning_rate": 4.54882113627076e-06, "loss": 0.0295, "step": 3586 }, { "epoch": 0.79, "grad_norm": 0.25671518772600055, "learning_rate": 4.53979093274526e-06, "loss": 0.0295, "step": 3587 }, { "epoch": 0.79, "grad_norm": 0.24103823359464405, "learning_rate": 4.530768553726348e-06, "loss": 0.0281, "step": 3588 }, { "epoch": 0.79, "grad_norm": 0.2895857997258674, "learning_rate": 4.521754003780294e-06, "loss": 0.0335, "step": 3589 }, { "epoch": 0.79, "grad_norm": 0.23744475387192743, "learning_rate": 4.512747287469426e-06, "loss": 0.0282, "step": 3590 }, { "epoch": 0.79, "grad_norm": 0.22834568551656584, "learning_rate": 4.503748409352089e-06, "loss": 0.0291, "step": 3591 }, { "epoch": 0.79, "grad_norm": 0.24846882112866903, "learning_rate": 4.494757373982674e-06, "loss": 0.0255, "step": 3592 }, { "epoch": 0.79, "grad_norm": 0.24942828909284545, "learning_rate": 4.4857741859116024e-06, "loss": 0.028, "step": 3593 }, { "epoch": 0.79, "grad_norm": 0.2613375150307862, "learning_rate": 4.476798849685322e-06, "loss": 0.0219, "step": 3594 }, { "epoch": 0.79, "grad_norm": 0.26663221804814036, "learning_rate": 4.467831369846301e-06, "loss": 0.0359, "step": 3595 }, { "epoch": 0.79, "grad_norm": 0.21777006753956984, "learning_rate": 4.458871750933038e-06, "loss": 0.0219, "step": 3596 }, { "epoch": 0.79, "grad_norm": 0.25977810134181817, "learning_rate": 4.449919997480047e-06, "loss": 0.0283, "step": 3597 }, { "epoch": 0.79, "grad_norm": 0.276873794837206, "learning_rate": 4.4409761140178765e-06, "loss": 0.026, "step": 3598 }, { "epoch": 0.79, "grad_norm": 0.31331411650670776, "learning_rate": 4.432040105073065e-06, "loss": 0.0369, "step": 3599 }, { "epoch": 0.79, "grad_norm": 0.25877502691246485, "learning_rate": 4.4231119751681885e-06, "loss": 0.0414, "step": 3600 }, { "epoch": 0.79, "grad_norm": 0.2487674519392905, "learning_rate": 4.414191728821838e-06, "loss": 0.0343, "step": 3601 }, { "epoch": 0.79, "grad_norm": 0.234888951728711, "learning_rate": 4.405279370548587e-06, "loss": 0.0313, "step": 3602 }, { "epoch": 0.79, "grad_norm": 0.2263207543502551, "learning_rate": 4.396374904859051e-06, "loss": 0.0324, "step": 3603 }, { "epoch": 0.79, "grad_norm": 0.34164101452982987, "learning_rate": 4.387478336259821e-06, "loss": 0.0471, "step": 3604 }, { "epoch": 0.79, "grad_norm": 0.2783808859779696, "learning_rate": 4.3785896692535165e-06, "loss": 0.0417, "step": 3605 }, { "epoch": 0.79, "grad_norm": 0.3006577671775209, "learning_rate": 4.369708908338735e-06, "loss": 0.0285, "step": 3606 }, { "epoch": 0.79, "grad_norm": 0.20801565665914815, "learning_rate": 4.360836058010096e-06, "loss": 0.0216, "step": 3607 }, { "epoch": 0.79, "grad_norm": 0.2478282971656624, "learning_rate": 4.351971122758194e-06, "loss": 0.0242, "step": 3608 }, { "epoch": 0.79, "grad_norm": 0.30743639038019077, "learning_rate": 4.343114107069628e-06, "loss": 0.0414, "step": 3609 }, { "epoch": 0.79, "grad_norm": 0.28202020301212055, "learning_rate": 4.334265015426993e-06, "loss": 0.0363, "step": 3610 }, { "epoch": 0.79, "grad_norm": 0.3470685577777562, "learning_rate": 4.3254238523088695e-06, "loss": 0.0313, "step": 3611 }, { "epoch": 0.79, "grad_norm": 0.28720296255665057, "learning_rate": 4.316590622189815e-06, "loss": 0.0329, "step": 3612 }, { "epoch": 0.79, "grad_norm": 0.2573910794628964, "learning_rate": 4.307765329540394e-06, "loss": 0.0252, "step": 3613 }, { "epoch": 0.79, "grad_norm": 0.23434111032246852, "learning_rate": 4.298947978827128e-06, "loss": 0.033, "step": 3614 }, { "epoch": 0.79, "grad_norm": 0.28824083722117905, "learning_rate": 4.290138574512546e-06, "loss": 0.0279, "step": 3615 }, { "epoch": 0.79, "grad_norm": 0.24409707253133334, "learning_rate": 4.2813371210551294e-06, "loss": 0.0283, "step": 3616 }, { "epoch": 0.79, "grad_norm": 0.21835811457671117, "learning_rate": 4.272543622909355e-06, "loss": 0.0252, "step": 3617 }, { "epoch": 0.79, "grad_norm": 0.1936975565814078, "learning_rate": 4.263758084525656e-06, "loss": 0.0187, "step": 3618 }, { "epoch": 0.79, "grad_norm": 0.27033475993678024, "learning_rate": 4.254980510350464e-06, "loss": 0.0257, "step": 3619 }, { "epoch": 0.8, "grad_norm": 0.2880795106332577, "learning_rate": 4.246210904826149e-06, "loss": 0.0263, "step": 3620 }, { "epoch": 0.8, "grad_norm": 0.21054254750349924, "learning_rate": 4.237449272391072e-06, "loss": 0.0221, "step": 3621 }, { "epoch": 0.8, "grad_norm": 0.23125726971927127, "learning_rate": 4.228695617479541e-06, "loss": 0.0217, "step": 3622 }, { "epoch": 0.8, "grad_norm": 0.22920103881009266, "learning_rate": 4.219949944521842e-06, "loss": 0.0282, "step": 3623 }, { "epoch": 0.8, "grad_norm": 0.2115455622915119, "learning_rate": 4.2112122579442015e-06, "loss": 0.0296, "step": 3624 }, { "epoch": 0.8, "grad_norm": 0.2582877143193991, "learning_rate": 4.202482562168832e-06, "loss": 0.0284, "step": 3625 }, { "epoch": 0.8, "grad_norm": 0.257448546398354, "learning_rate": 4.193760861613865e-06, "loss": 0.0287, "step": 3626 }, { "epoch": 0.8, "grad_norm": 0.2342688001988582, "learning_rate": 4.185047160693432e-06, "loss": 0.0344, "step": 3627 }, { "epoch": 0.8, "grad_norm": 0.19579876449193206, "learning_rate": 4.176341463817573e-06, "loss": 0.0227, "step": 3628 }, { "epoch": 0.8, "grad_norm": 0.20072648826742953, "learning_rate": 4.167643775392305e-06, "loss": 0.0227, "step": 3629 }, { "epoch": 0.8, "grad_norm": 0.27461042878774905, "learning_rate": 4.1589540998195695e-06, "loss": 0.0346, "step": 3630 }, { "epoch": 0.8, "grad_norm": 0.23646947085972797, "learning_rate": 4.150272441497276e-06, "loss": 0.0307, "step": 3631 }, { "epoch": 0.8, "grad_norm": 0.2571087392858843, "learning_rate": 4.141598804819256e-06, "loss": 0.0355, "step": 3632 }, { "epoch": 0.8, "grad_norm": 0.20020142522644224, "learning_rate": 4.132933194175299e-06, "loss": 0.0203, "step": 3633 }, { "epoch": 0.8, "grad_norm": 0.40853096605429784, "learning_rate": 4.124275613951114e-06, "loss": 0.0491, "step": 3634 }, { "epoch": 0.8, "grad_norm": 0.36319887601451634, "learning_rate": 4.115626068528362e-06, "loss": 0.0279, "step": 3635 }, { "epoch": 0.8, "grad_norm": 0.27277745555992244, "learning_rate": 4.106984562284633e-06, "loss": 0.0317, "step": 3636 }, { "epoch": 0.8, "grad_norm": 0.21533601026253893, "learning_rate": 4.0983510995934365e-06, "loss": 0.0253, "step": 3637 }, { "epoch": 0.8, "grad_norm": 0.28834596956729397, "learning_rate": 4.089725684824235e-06, "loss": 0.029, "step": 3638 }, { "epoch": 0.8, "grad_norm": 0.23269264092864095, "learning_rate": 4.081108322342389e-06, "loss": 0.0249, "step": 3639 }, { "epoch": 0.8, "grad_norm": 0.2555612909769337, "learning_rate": 4.07249901650921e-06, "loss": 0.0271, "step": 3640 }, { "epoch": 0.8, "grad_norm": 0.32616875698403835, "learning_rate": 4.0638977716819105e-06, "loss": 0.0393, "step": 3641 }, { "epoch": 0.8, "grad_norm": 0.22646813236034966, "learning_rate": 4.055304592213645e-06, "loss": 0.0183, "step": 3642 }, { "epoch": 0.8, "grad_norm": 0.2554617697085855, "learning_rate": 4.046719482453461e-06, "loss": 0.0242, "step": 3643 }, { "epoch": 0.8, "grad_norm": 0.32966515704916, "learning_rate": 4.038142446746342e-06, "loss": 0.0483, "step": 3644 }, { "epoch": 0.8, "grad_norm": 0.22952046321610411, "learning_rate": 4.029573489433179e-06, "loss": 0.0205, "step": 3645 }, { "epoch": 0.8, "grad_norm": 0.17493135242590682, "learning_rate": 4.021012614850779e-06, "loss": 0.0172, "step": 3646 }, { "epoch": 0.8, "grad_norm": 0.1928107541498316, "learning_rate": 4.012459827331841e-06, "loss": 0.0206, "step": 3647 }, { "epoch": 0.8, "grad_norm": 0.2356499327867729, "learning_rate": 4.003915131204996e-06, "loss": 0.0277, "step": 3648 }, { "epoch": 0.8, "grad_norm": 0.39366916856177986, "learning_rate": 3.995378530794754e-06, "loss": 0.05, "step": 3649 }, { "epoch": 0.8, "grad_norm": 0.23756317691259934, "learning_rate": 3.986850030421554e-06, "loss": 0.0241, "step": 3650 }, { "epoch": 0.8, "grad_norm": 0.29357554855893153, "learning_rate": 3.97832963440171e-06, "loss": 0.0287, "step": 3651 }, { "epoch": 0.8, "grad_norm": 0.2522710228865532, "learning_rate": 3.969817347047451e-06, "loss": 0.0264, "step": 3652 }, { "epoch": 0.8, "grad_norm": 0.2546196525397989, "learning_rate": 3.961313172666898e-06, "loss": 0.0284, "step": 3653 }, { "epoch": 0.8, "grad_norm": 0.19070338607451412, "learning_rate": 3.952817115564076e-06, "loss": 0.0169, "step": 3654 }, { "epoch": 0.8, "grad_norm": 0.2269978205123538, "learning_rate": 3.944329180038875e-06, "loss": 0.0285, "step": 3655 }, { "epoch": 0.8, "grad_norm": 0.2821964657057303, "learning_rate": 3.935849370387104e-06, "loss": 0.0272, "step": 3656 }, { "epoch": 0.8, "grad_norm": 0.22076824375348655, "learning_rate": 3.927377690900436e-06, "loss": 0.0247, "step": 3657 }, { "epoch": 0.8, "grad_norm": 0.21780141300077158, "learning_rate": 3.91891414586645e-06, "loss": 0.0282, "step": 3658 }, { "epoch": 0.8, "grad_norm": 0.3060406446126063, "learning_rate": 3.91045873956859e-06, "loss": 0.0352, "step": 3659 }, { "epoch": 0.8, "grad_norm": 0.2893880608826457, "learning_rate": 3.902011476286196e-06, "loss": 0.0318, "step": 3660 }, { "epoch": 0.8, "grad_norm": 0.25120418334859473, "learning_rate": 3.893572360294471e-06, "loss": 0.0281, "step": 3661 }, { "epoch": 0.8, "grad_norm": 0.27315269351638816, "learning_rate": 3.885141395864509e-06, "loss": 0.0232, "step": 3662 }, { "epoch": 0.8, "grad_norm": 0.27229528279275966, "learning_rate": 3.876718587263278e-06, "loss": 0.0318, "step": 3663 }, { "epoch": 0.8, "grad_norm": 0.2782702484612129, "learning_rate": 3.868303938753599e-06, "loss": 0.0335, "step": 3664 }, { "epoch": 0.8, "grad_norm": 0.18907673797905694, "learning_rate": 3.859897454594192e-06, "loss": 0.0235, "step": 3665 }, { "epoch": 0.81, "grad_norm": 0.23696425237289911, "learning_rate": 3.851499139039618e-06, "loss": 0.0255, "step": 3666 }, { "epoch": 0.81, "grad_norm": 0.2530445441962811, "learning_rate": 3.843108996340323e-06, "loss": 0.0231, "step": 3667 }, { "epoch": 0.81, "grad_norm": 0.1827468819515304, "learning_rate": 3.834727030742613e-06, "loss": 0.0173, "step": 3668 }, { "epoch": 0.81, "grad_norm": 0.25870133838473247, "learning_rate": 3.826353246488641e-06, "loss": 0.0331, "step": 3669 }, { "epoch": 0.81, "grad_norm": 0.26622786548554345, "learning_rate": 3.817987647816437e-06, "loss": 0.0368, "step": 3670 }, { "epoch": 0.81, "grad_norm": 0.32275130924443896, "learning_rate": 3.809630238959887e-06, "loss": 0.0367, "step": 3671 }, { "epoch": 0.81, "grad_norm": 0.25701007813794574, "learning_rate": 3.8012810241487175e-06, "loss": 0.0246, "step": 3672 }, { "epoch": 0.81, "grad_norm": 0.22454690479132108, "learning_rate": 3.7929400076085255e-06, "loss": 0.0302, "step": 3673 }, { "epoch": 0.81, "grad_norm": 0.26587275563033913, "learning_rate": 3.7846071935607408e-06, "loss": 0.0326, "step": 3674 }, { "epoch": 0.81, "grad_norm": 0.2474751604624967, "learning_rate": 3.7762825862226637e-06, "loss": 0.0312, "step": 3675 }, { "epoch": 0.81, "grad_norm": 0.30273048644491024, "learning_rate": 3.767966189807415e-06, "loss": 0.0369, "step": 3676 }, { "epoch": 0.81, "grad_norm": 0.2579598294846661, "learning_rate": 3.7596580085239897e-06, "loss": 0.0294, "step": 3677 }, { "epoch": 0.81, "grad_norm": 0.2648196493565275, "learning_rate": 3.7513580465771893e-06, "loss": 0.0258, "step": 3678 }, { "epoch": 0.81, "grad_norm": 0.3436867699123688, "learning_rate": 3.7430663081676977e-06, "loss": 0.0559, "step": 3679 }, { "epoch": 0.81, "grad_norm": 0.3231971801587294, "learning_rate": 3.734782797491998e-06, "loss": 0.0412, "step": 3680 }, { "epoch": 0.81, "grad_norm": 0.21947458017702118, "learning_rate": 3.7265075187424373e-06, "loss": 0.0209, "step": 3681 }, { "epoch": 0.81, "grad_norm": 0.23357392740506586, "learning_rate": 3.7182404761071735e-06, "loss": 0.0304, "step": 3682 }, { "epoch": 0.81, "grad_norm": 0.23760398389086507, "learning_rate": 3.7099816737702197e-06, "loss": 0.0267, "step": 3683 }, { "epoch": 0.81, "grad_norm": 0.29003768817820047, "learning_rate": 3.7017311159113956e-06, "loss": 0.0271, "step": 3684 }, { "epoch": 0.81, "grad_norm": 0.24743537921386893, "learning_rate": 3.6934888067063667e-06, "loss": 0.0256, "step": 3685 }, { "epoch": 0.81, "grad_norm": 0.2274675625296988, "learning_rate": 3.68525475032661e-06, "loss": 0.0284, "step": 3686 }, { "epoch": 0.81, "grad_norm": 0.2360116794638995, "learning_rate": 3.677028950939434e-06, "loss": 0.023, "step": 3687 }, { "epoch": 0.81, "grad_norm": 0.24521057659949916, "learning_rate": 3.6688114127079665e-06, "loss": 0.0228, "step": 3688 }, { "epoch": 0.81, "grad_norm": 0.2635946796584002, "learning_rate": 3.6606021397911605e-06, "loss": 0.0273, "step": 3689 }, { "epoch": 0.81, "grad_norm": 0.24556618863648857, "learning_rate": 3.652401136343768e-06, "loss": 0.03, "step": 3690 }, { "epoch": 0.81, "grad_norm": 0.2609608908122948, "learning_rate": 3.6442084065163784e-06, "loss": 0.0344, "step": 3691 }, { "epoch": 0.81, "grad_norm": 0.28327570018181836, "learning_rate": 3.636023954455372e-06, "loss": 0.0301, "step": 3692 }, { "epoch": 0.81, "grad_norm": 0.23041111823593596, "learning_rate": 3.6278477843029603e-06, "loss": 0.0287, "step": 3693 }, { "epoch": 0.81, "grad_norm": 0.2625289730703892, "learning_rate": 3.6196799001971416e-06, "loss": 0.0256, "step": 3694 }, { "epoch": 0.81, "grad_norm": 0.1630802210645409, "learning_rate": 3.6115203062717386e-06, "loss": 0.0228, "step": 3695 }, { "epoch": 0.81, "grad_norm": 0.3190133451470103, "learning_rate": 3.6033690066563765e-06, "loss": 0.0414, "step": 3696 }, { "epoch": 0.81, "grad_norm": 0.2435520707321006, "learning_rate": 3.5952260054764663e-06, "loss": 0.0283, "step": 3697 }, { "epoch": 0.81, "grad_norm": 0.22822638897501876, "learning_rate": 3.5870913068532455e-06, "loss": 0.0257, "step": 3698 }, { "epoch": 0.81, "grad_norm": 0.2763722961816296, "learning_rate": 3.5789649149037197e-06, "loss": 0.0305, "step": 3699 }, { "epoch": 0.81, "grad_norm": 0.2695684269130349, "learning_rate": 3.5708468337407177e-06, "loss": 0.0291, "step": 3700 }, { "epoch": 0.81, "grad_norm": 0.20398215857725774, "learning_rate": 3.562737067472841e-06, "loss": 0.0195, "step": 3701 }, { "epoch": 0.81, "grad_norm": 0.2502541740080814, "learning_rate": 3.554635620204503e-06, "loss": 0.0315, "step": 3702 }, { "epoch": 0.81, "grad_norm": 0.24771777420698718, "learning_rate": 3.546542496035883e-06, "loss": 0.0328, "step": 3703 }, { "epoch": 0.81, "grad_norm": 0.25344037083741355, "learning_rate": 3.5384576990629672e-06, "loss": 0.0224, "step": 3704 }, { "epoch": 0.81, "grad_norm": 0.26966825403295247, "learning_rate": 3.53038123337752e-06, "loss": 0.0457, "step": 3705 }, { "epoch": 0.81, "grad_norm": 0.4245201824319466, "learning_rate": 3.5223131030670942e-06, "loss": 0.0387, "step": 3706 }, { "epoch": 0.81, "grad_norm": 0.2605416601937401, "learning_rate": 3.5142533122150147e-06, "loss": 0.0341, "step": 3707 }, { "epoch": 0.81, "grad_norm": 0.2730210811890883, "learning_rate": 3.506201864900396e-06, "loss": 0.0277, "step": 3708 }, { "epoch": 0.81, "grad_norm": 0.2157021971740995, "learning_rate": 3.4981587651981185e-06, "loss": 0.0288, "step": 3709 }, { "epoch": 0.81, "grad_norm": 0.2760890194326243, "learning_rate": 3.490124017178851e-06, "loss": 0.0322, "step": 3710 }, { "epoch": 0.82, "grad_norm": 0.27453212981064085, "learning_rate": 3.482097624909022e-06, "loss": 0.0333, "step": 3711 }, { "epoch": 0.82, "grad_norm": 0.253931058992105, "learning_rate": 3.474079592450845e-06, "loss": 0.0282, "step": 3712 }, { "epoch": 0.82, "grad_norm": 0.24944489661127137, "learning_rate": 3.466069923862283e-06, "loss": 0.0248, "step": 3713 }, { "epoch": 0.82, "grad_norm": 0.2315743324149911, "learning_rate": 3.458068623197097e-06, "loss": 0.0299, "step": 3714 }, { "epoch": 0.82, "grad_norm": 0.23188678469876328, "learning_rate": 3.4500756945047774e-06, "loss": 0.0175, "step": 3715 }, { "epoch": 0.82, "grad_norm": 0.23487875366002967, "learning_rate": 3.442091141830608e-06, "loss": 0.026, "step": 3716 }, { "epoch": 0.82, "grad_norm": 0.2520699045582025, "learning_rate": 3.4341149692156074e-06, "loss": 0.0262, "step": 3717 }, { "epoch": 0.82, "grad_norm": 0.22399663690028476, "learning_rate": 3.426147180696577e-06, "loss": 0.0334, "step": 3718 }, { "epoch": 0.82, "grad_norm": 0.25049011051335446, "learning_rate": 3.4181877803060528e-06, "loss": 0.0299, "step": 3719 }, { "epoch": 0.82, "grad_norm": 0.2646873698859137, "learning_rate": 3.4102367720723438e-06, "loss": 0.0315, "step": 3720 }, { "epoch": 0.82, "grad_norm": 0.2442497314795245, "learning_rate": 3.402294160019499e-06, "loss": 0.0297, "step": 3721 }, { "epoch": 0.82, "grad_norm": 0.3376258407979168, "learning_rate": 3.394359948167325e-06, "loss": 0.0407, "step": 3722 }, { "epoch": 0.82, "grad_norm": 0.2851709205302547, "learning_rate": 3.386434140531378e-06, "loss": 0.0291, "step": 3723 }, { "epoch": 0.82, "grad_norm": 0.3110857792115305, "learning_rate": 3.3785167411229523e-06, "loss": 0.0428, "step": 3724 }, { "epoch": 0.82, "grad_norm": 0.24018609717101114, "learning_rate": 3.3706077539490933e-06, "loss": 0.0259, "step": 3725 }, { "epoch": 0.82, "grad_norm": 0.24668818539972917, "learning_rate": 3.362707183012597e-06, "loss": 0.0232, "step": 3726 }, { "epoch": 0.82, "grad_norm": 0.23018877351239234, "learning_rate": 3.354815032311978e-06, "loss": 0.0303, "step": 3727 }, { "epoch": 0.82, "grad_norm": 0.22833097773982017, "learning_rate": 3.34693130584151e-06, "loss": 0.0215, "step": 3728 }, { "epoch": 0.82, "grad_norm": 0.24650305059085142, "learning_rate": 3.3390560075911906e-06, "loss": 0.0277, "step": 3729 }, { "epoch": 0.82, "grad_norm": 0.17666722183901298, "learning_rate": 3.331189141546758e-06, "loss": 0.0202, "step": 3730 }, { "epoch": 0.82, "grad_norm": 0.26592299631376487, "learning_rate": 3.3233307116896874e-06, "loss": 0.0443, "step": 3731 }, { "epoch": 0.82, "grad_norm": 0.22041344832436627, "learning_rate": 3.3154807219971684e-06, "loss": 0.024, "step": 3732 }, { "epoch": 0.82, "grad_norm": 0.34140833978428003, "learning_rate": 3.307639176442137e-06, "loss": 0.0325, "step": 3733 }, { "epoch": 0.82, "grad_norm": 0.23034179552941636, "learning_rate": 3.299806078993242e-06, "loss": 0.0202, "step": 3734 }, { "epoch": 0.82, "grad_norm": 0.1835631081191456, "learning_rate": 3.2919814336148657e-06, "loss": 0.0205, "step": 3735 }, { "epoch": 0.82, "grad_norm": 0.20295680858908874, "learning_rate": 3.2841652442671033e-06, "loss": 0.0222, "step": 3736 }, { "epoch": 0.82, "grad_norm": 0.2775041089741753, "learning_rate": 3.276357514905788e-06, "loss": 0.0322, "step": 3737 }, { "epoch": 0.82, "grad_norm": 0.22677679450652197, "learning_rate": 3.2685582494824386e-06, "loss": 0.0285, "step": 3738 }, { "epoch": 0.82, "grad_norm": 0.28460758151071236, "learning_rate": 3.260767451944338e-06, "loss": 0.0383, "step": 3739 }, { "epoch": 0.82, "grad_norm": 0.23390609406113763, "learning_rate": 3.252985126234434e-06, "loss": 0.0309, "step": 3740 }, { "epoch": 0.82, "grad_norm": 0.20212903136937763, "learning_rate": 3.245211276291427e-06, "loss": 0.0258, "step": 3741 }, { "epoch": 0.82, "grad_norm": 0.26395564880049716, "learning_rate": 3.237445906049694e-06, "loss": 0.0457, "step": 3742 }, { "epoch": 0.82, "grad_norm": 0.29903038510175467, "learning_rate": 3.229689019439348e-06, "loss": 0.0315, "step": 3743 }, { "epoch": 0.82, "grad_norm": 0.25464142224590275, "learning_rate": 3.2219406203861903e-06, "loss": 0.0321, "step": 3744 }, { "epoch": 0.82, "grad_norm": 0.28081431020683456, "learning_rate": 3.2142007128117393e-06, "loss": 0.0265, "step": 3745 }, { "epoch": 0.82, "grad_norm": 0.281312485217518, "learning_rate": 3.2064693006332013e-06, "loss": 0.028, "step": 3746 }, { "epoch": 0.82, "grad_norm": 0.22573948873242403, "learning_rate": 3.1987463877634962e-06, "loss": 0.0204, "step": 3747 }, { "epoch": 0.82, "grad_norm": 0.22625983509268627, "learning_rate": 3.1910319781112364e-06, "loss": 0.0235, "step": 3748 }, { "epoch": 0.82, "grad_norm": 0.2566258691874267, "learning_rate": 3.1833260755807392e-06, "loss": 0.0261, "step": 3749 }, { "epoch": 0.82, "grad_norm": 0.216310359009026, "learning_rate": 3.1756286840719987e-06, "loss": 0.0293, "step": 3750 }, { "epoch": 0.82, "grad_norm": 0.2803729209720778, "learning_rate": 3.16793980748072e-06, "loss": 0.0373, "step": 3751 }, { "epoch": 0.82, "grad_norm": 0.2628738760749563, "learning_rate": 3.160259449698282e-06, "loss": 0.0312, "step": 3752 }, { "epoch": 0.82, "grad_norm": 0.23420699774006093, "learning_rate": 3.1525876146117707e-06, "loss": 0.0315, "step": 3753 }, { "epoch": 0.82, "grad_norm": 0.2373883684596302, "learning_rate": 3.144924306103938e-06, "loss": 0.0253, "step": 3754 }, { "epoch": 0.82, "grad_norm": 0.21807232689411402, "learning_rate": 3.1372695280532415e-06, "loss": 0.0314, "step": 3755 }, { "epoch": 0.82, "grad_norm": 0.26653987743804225, "learning_rate": 3.129623284333805e-06, "loss": 0.0357, "step": 3756 }, { "epoch": 0.83, "grad_norm": 0.25989470465701053, "learning_rate": 3.1219855788154385e-06, "loss": 0.0293, "step": 3757 }, { "epoch": 0.83, "grad_norm": 0.23634236663278327, "learning_rate": 3.1143564153636395e-06, "loss": 0.0179, "step": 3758 }, { "epoch": 0.83, "grad_norm": 0.23741298135441422, "learning_rate": 3.1067357978395663e-06, "loss": 0.0263, "step": 3759 }, { "epoch": 0.83, "grad_norm": 0.20682910731405757, "learning_rate": 3.0991237301000664e-06, "loss": 0.0218, "step": 3760 }, { "epoch": 0.83, "grad_norm": 0.26802454553833704, "learning_rate": 3.0915202159976453e-06, "loss": 0.0309, "step": 3761 }, { "epoch": 0.83, "grad_norm": 0.21710761028010478, "learning_rate": 3.083925259380498e-06, "loss": 0.0217, "step": 3762 }, { "epoch": 0.83, "grad_norm": 0.2641650215307287, "learning_rate": 3.0763388640924698e-06, "loss": 0.032, "step": 3763 }, { "epoch": 0.83, "grad_norm": 0.24072832019634527, "learning_rate": 3.068761033973087e-06, "loss": 0.0305, "step": 3764 }, { "epoch": 0.83, "grad_norm": 0.27969197690566716, "learning_rate": 3.0611917728575347e-06, "loss": 0.0324, "step": 3765 }, { "epoch": 0.83, "grad_norm": 0.20708361449276919, "learning_rate": 3.053631084576667e-06, "loss": 0.0265, "step": 3766 }, { "epoch": 0.83, "grad_norm": 0.2899234297188242, "learning_rate": 3.046078972956985e-06, "loss": 0.0331, "step": 3767 }, { "epoch": 0.83, "grad_norm": 0.24798203168945265, "learning_rate": 3.038535441820669e-06, "loss": 0.0205, "step": 3768 }, { "epoch": 0.83, "grad_norm": 0.20767294205972725, "learning_rate": 3.0310004949855366e-06, "loss": 0.025, "step": 3769 }, { "epoch": 0.83, "grad_norm": 0.21678776764856741, "learning_rate": 3.0234741362650787e-06, "loss": 0.0276, "step": 3770 }, { "epoch": 0.83, "grad_norm": 0.3020937016231474, "learning_rate": 3.0159563694684245e-06, "loss": 0.0505, "step": 3771 }, { "epoch": 0.83, "grad_norm": 0.23336752682863882, "learning_rate": 3.008447198400368e-06, "loss": 0.0223, "step": 3772 }, { "epoch": 0.83, "grad_norm": 0.21756051435717852, "learning_rate": 3.0009466268613384e-06, "loss": 0.027, "step": 3773 }, { "epoch": 0.83, "grad_norm": 0.2660732974190698, "learning_rate": 2.9934546586474346e-06, "loss": 0.0283, "step": 3774 }, { "epoch": 0.83, "grad_norm": 0.18405623723241546, "learning_rate": 2.985971297550374e-06, "loss": 0.0187, "step": 3775 }, { "epoch": 0.83, "grad_norm": 0.21215769391278613, "learning_rate": 2.9784965473575434e-06, "loss": 0.0231, "step": 3776 }, { "epoch": 0.83, "grad_norm": 0.19297202644192088, "learning_rate": 2.9710304118519473e-06, "loss": 0.0198, "step": 3777 }, { "epoch": 0.83, "grad_norm": 0.2579171806283081, "learning_rate": 2.9635728948122542e-06, "loss": 0.023, "step": 3778 }, { "epoch": 0.83, "grad_norm": 0.28781569138454693, "learning_rate": 2.95612400001275e-06, "loss": 0.0362, "step": 3779 }, { "epoch": 0.83, "grad_norm": 0.24115771119516768, "learning_rate": 2.9486837312233742e-06, "loss": 0.0222, "step": 3780 }, { "epoch": 0.83, "grad_norm": 0.26588425886678607, "learning_rate": 2.9412520922096834e-06, "loss": 0.0305, "step": 3781 }, { "epoch": 0.83, "grad_norm": 0.25585837036401493, "learning_rate": 2.93382908673288e-06, "loss": 0.0304, "step": 3782 }, { "epoch": 0.83, "grad_norm": 0.3162272853760593, "learning_rate": 2.926414718549797e-06, "loss": 0.0363, "step": 3783 }, { "epoch": 0.83, "grad_norm": 0.2875859075196345, "learning_rate": 2.9190089914128837e-06, "loss": 0.0312, "step": 3784 }, { "epoch": 0.83, "grad_norm": 0.2679100483916866, "learning_rate": 2.911611909070229e-06, "loss": 0.0248, "step": 3785 }, { "epoch": 0.83, "grad_norm": 0.23112737974506606, "learning_rate": 2.9042234752655417e-06, "loss": 0.0205, "step": 3786 }, { "epoch": 0.83, "grad_norm": 0.20902051345813139, "learning_rate": 2.8968436937381515e-06, "loss": 0.024, "step": 3787 }, { "epoch": 0.83, "grad_norm": 0.28679984138719145, "learning_rate": 2.889472568223015e-06, "loss": 0.0317, "step": 3788 }, { "epoch": 0.83, "grad_norm": 0.38947517921805536, "learning_rate": 2.8821101024506947e-06, "loss": 0.0397, "step": 3789 }, { "epoch": 0.83, "grad_norm": 0.2625566137662427, "learning_rate": 2.874756300147388e-06, "loss": 0.0259, "step": 3790 }, { "epoch": 0.83, "grad_norm": 0.3466975462705927, "learning_rate": 2.867411165034901e-06, "loss": 0.0433, "step": 3791 }, { "epoch": 0.83, "grad_norm": 0.30872740446645297, "learning_rate": 2.8600747008306417e-06, "loss": 0.0287, "step": 3792 }, { "epoch": 0.83, "grad_norm": 0.24879147066498217, "learning_rate": 2.8527469112476524e-06, "loss": 0.0271, "step": 3793 }, { "epoch": 0.83, "grad_norm": 0.22946090485031406, "learning_rate": 2.8454277999945603e-06, "loss": 0.0186, "step": 3794 }, { "epoch": 0.83, "grad_norm": 0.3252357251599501, "learning_rate": 2.8381173707756214e-06, "loss": 0.0337, "step": 3795 }, { "epoch": 0.83, "grad_norm": 0.22712515976584763, "learning_rate": 2.8308156272906794e-06, "loss": 0.0229, "step": 3796 }, { "epoch": 0.83, "grad_norm": 0.20611607509466556, "learning_rate": 2.8235225732352043e-06, "loss": 0.031, "step": 3797 }, { "epoch": 0.83, "grad_norm": 0.2323427626660013, "learning_rate": 2.8162382123002418e-06, "loss": 0.0179, "step": 3798 }, { "epoch": 0.83, "grad_norm": 0.2634233928672666, "learning_rate": 2.8089625481724604e-06, "loss": 0.027, "step": 3799 }, { "epoch": 0.83, "grad_norm": 0.23105383007828828, "learning_rate": 2.8016955845341143e-06, "loss": 0.0235, "step": 3800 }, { "epoch": 0.83, "grad_norm": 0.2677735359976356, "learning_rate": 2.794437325063064e-06, "loss": 0.0227, "step": 3801 }, { "epoch": 0.84, "grad_norm": 0.24656583525090428, "learning_rate": 2.7871877734327514e-06, "loss": 0.0297, "step": 3802 }, { "epoch": 0.84, "grad_norm": 0.23939083838791006, "learning_rate": 2.7799469333122275e-06, "loss": 0.0255, "step": 3803 }, { "epoch": 0.84, "grad_norm": 0.24008755733334192, "learning_rate": 2.772714808366115e-06, "loss": 0.0316, "step": 3804 }, { "epoch": 0.84, "grad_norm": 0.2231639043966675, "learning_rate": 2.76549140225465e-06, "loss": 0.0303, "step": 3805 }, { "epoch": 0.84, "grad_norm": 0.25595756277460696, "learning_rate": 2.758276718633628e-06, "loss": 0.027, "step": 3806 }, { "epoch": 0.84, "grad_norm": 0.211849651128429, "learning_rate": 2.751070761154453e-06, "loss": 0.0201, "step": 3807 }, { "epoch": 0.84, "grad_norm": 0.26129634418740444, "learning_rate": 2.743873533464105e-06, "loss": 0.0265, "step": 3808 }, { "epoch": 0.84, "grad_norm": 0.24285469137835775, "learning_rate": 2.7366850392051468e-06, "loss": 0.0235, "step": 3809 }, { "epoch": 0.84, "grad_norm": 0.28917501167290083, "learning_rate": 2.7295052820157097e-06, "loss": 0.029, "step": 3810 }, { "epoch": 0.84, "grad_norm": 0.277645363473503, "learning_rate": 2.722334265529527e-06, "loss": 0.0318, "step": 3811 }, { "epoch": 0.84, "grad_norm": 0.2101197108159598, "learning_rate": 2.715171993375878e-06, "loss": 0.0219, "step": 3812 }, { "epoch": 0.84, "grad_norm": 0.3028963455553397, "learning_rate": 2.7080184691796474e-06, "loss": 0.0292, "step": 3813 }, { "epoch": 0.84, "grad_norm": 0.22889463668631502, "learning_rate": 2.7008736965612658e-06, "loss": 0.018, "step": 3814 }, { "epoch": 0.84, "grad_norm": 0.24035723576142085, "learning_rate": 2.6937376791367566e-06, "loss": 0.0256, "step": 3815 }, { "epoch": 0.84, "grad_norm": 0.2424393006312234, "learning_rate": 2.6866104205176925e-06, "loss": 0.0333, "step": 3816 }, { "epoch": 0.84, "grad_norm": 0.24892566481864783, "learning_rate": 2.679491924311226e-06, "loss": 0.0237, "step": 3817 }, { "epoch": 0.84, "grad_norm": 0.2524314302875021, "learning_rate": 2.67238219412008e-06, "loss": 0.0269, "step": 3818 }, { "epoch": 0.84, "grad_norm": 0.3008899987543034, "learning_rate": 2.6652812335425184e-06, "loss": 0.0384, "step": 3819 }, { "epoch": 0.84, "grad_norm": 0.2663023530727557, "learning_rate": 2.6581890461723925e-06, "loss": 0.0364, "step": 3820 }, { "epoch": 0.84, "grad_norm": 0.23916030765402485, "learning_rate": 2.651105635599094e-06, "loss": 0.0288, "step": 3821 }, { "epoch": 0.84, "grad_norm": 0.314366417534471, "learning_rate": 2.6440310054075877e-06, "loss": 0.0396, "step": 3822 }, { "epoch": 0.84, "grad_norm": 0.19393635409315052, "learning_rate": 2.6369651591783774e-06, "loss": 0.0222, "step": 3823 }, { "epoch": 0.84, "grad_norm": 0.2970231715412625, "learning_rate": 2.629908100487544e-06, "loss": 0.0367, "step": 3824 }, { "epoch": 0.84, "grad_norm": 0.2730624897084373, "learning_rate": 2.6228598329066902e-06, "loss": 0.0329, "step": 3825 }, { "epoch": 0.84, "grad_norm": 0.2221304474051573, "learning_rate": 2.6158203600030076e-06, "loss": 0.0208, "step": 3826 }, { "epoch": 0.84, "grad_norm": 0.21295674198523606, "learning_rate": 2.6087896853392037e-06, "loss": 0.0213, "step": 3827 }, { "epoch": 0.84, "grad_norm": 0.2634736057395541, "learning_rate": 2.6017678124735545e-06, "loss": 0.0249, "step": 3828 }, { "epoch": 0.84, "grad_norm": 0.2801856325424695, "learning_rate": 2.594754744959862e-06, "loss": 0.0331, "step": 3829 }, { "epoch": 0.84, "grad_norm": 0.38273736644602885, "learning_rate": 2.5877504863474933e-06, "loss": 0.0441, "step": 3830 }, { "epoch": 0.84, "grad_norm": 0.23987318038548458, "learning_rate": 2.58075504018134e-06, "loss": 0.0247, "step": 3831 }, { "epoch": 0.84, "grad_norm": 0.3165671210408318, "learning_rate": 2.5737684100018446e-06, "loss": 0.0504, "step": 3832 }, { "epoch": 0.84, "grad_norm": 0.26761475194029594, "learning_rate": 2.566790599344973e-06, "loss": 0.0264, "step": 3833 }, { "epoch": 0.84, "grad_norm": 0.28997572133362387, "learning_rate": 2.5598216117422547e-06, "loss": 0.0292, "step": 3834 }, { "epoch": 0.84, "grad_norm": 0.23359317715679853, "learning_rate": 2.552861450720725e-06, "loss": 0.0239, "step": 3835 }, { "epoch": 0.84, "grad_norm": 0.225136801073581, "learning_rate": 2.5459101198029724e-06, "loss": 0.0188, "step": 3836 }, { "epoch": 0.84, "grad_norm": 0.2199612868449696, "learning_rate": 2.538967622507098e-06, "loss": 0.0228, "step": 3837 }, { "epoch": 0.84, "grad_norm": 0.263409052655707, "learning_rate": 2.532033962346754e-06, "loss": 0.0244, "step": 3838 }, { "epoch": 0.84, "grad_norm": 0.23126387425634926, "learning_rate": 2.525109142831095e-06, "loss": 0.0264, "step": 3839 }, { "epoch": 0.84, "grad_norm": 0.22309392442868756, "learning_rate": 2.5181931674648265e-06, "loss": 0.022, "step": 3840 }, { "epoch": 0.84, "grad_norm": 0.24698059805352435, "learning_rate": 2.5112860397481553e-06, "loss": 0.0276, "step": 3841 }, { "epoch": 0.84, "grad_norm": 0.4268208681918181, "learning_rate": 2.50438776317683e-06, "loss": 0.0494, "step": 3842 }, { "epoch": 0.84, "grad_norm": 0.2269383387911252, "learning_rate": 2.497498341242104e-06, "loss": 0.0291, "step": 3843 }, { "epoch": 0.84, "grad_norm": 0.25532006069273766, "learning_rate": 2.490617777430766e-06, "loss": 0.0289, "step": 3844 }, { "epoch": 0.84, "grad_norm": 0.19921207416793138, "learning_rate": 2.4837460752251e-06, "loss": 0.0219, "step": 3845 }, { "epoch": 0.84, "grad_norm": 0.23851431720373856, "learning_rate": 2.476883238102925e-06, "loss": 0.0269, "step": 3846 }, { "epoch": 0.84, "grad_norm": 0.28687080540218757, "learning_rate": 2.4700292695375596e-06, "loss": 0.0336, "step": 3847 }, { "epoch": 0.85, "grad_norm": 0.2877835383842551, "learning_rate": 2.4631841729978435e-06, "loss": 0.0386, "step": 3848 }, { "epoch": 0.85, "grad_norm": 0.21595209508503585, "learning_rate": 2.456347951948115e-06, "loss": 0.027, "step": 3849 }, { "epoch": 0.85, "grad_norm": 0.25590705427331534, "learning_rate": 2.449520609848237e-06, "loss": 0.0285, "step": 3850 }, { "epoch": 0.85, "grad_norm": 0.2914738983095523, "learning_rate": 2.442702150153562e-06, "loss": 0.0302, "step": 3851 }, { "epoch": 0.85, "grad_norm": 0.22516376244575387, "learning_rate": 2.4358925763149557e-06, "loss": 0.0266, "step": 3852 }, { "epoch": 0.85, "grad_norm": 0.2483287171253741, "learning_rate": 2.4290918917787876e-06, "loss": 0.0265, "step": 3853 }, { "epoch": 0.85, "grad_norm": 0.32799978364742516, "learning_rate": 2.4223000999869227e-06, "loss": 0.0379, "step": 3854 }, { "epoch": 0.85, "grad_norm": 0.21206941491648676, "learning_rate": 2.4155172043767337e-06, "loss": 0.0253, "step": 3855 }, { "epoch": 0.85, "grad_norm": 0.2549331586084214, "learning_rate": 2.4087432083810792e-06, "loss": 0.0241, "step": 3856 }, { "epoch": 0.85, "grad_norm": 0.26321166106479543, "learning_rate": 2.401978115428325e-06, "loss": 0.0232, "step": 3857 }, { "epoch": 0.85, "grad_norm": 0.2478637469868672, "learning_rate": 2.395221928942322e-06, "loss": 0.029, "step": 3858 }, { "epoch": 0.85, "grad_norm": 0.25795024024458996, "learning_rate": 2.388474652342416e-06, "loss": 0.0296, "step": 3859 }, { "epoch": 0.85, "grad_norm": 0.2210709536496926, "learning_rate": 2.3817362890434526e-06, "loss": 0.0384, "step": 3860 }, { "epoch": 0.85, "grad_norm": 0.27839401340087205, "learning_rate": 2.375006842455756e-06, "loss": 0.0324, "step": 3861 }, { "epoch": 0.85, "grad_norm": 0.29559086088363035, "learning_rate": 2.3682863159851377e-06, "loss": 0.0322, "step": 3862 }, { "epoch": 0.85, "grad_norm": 0.258958163884098, "learning_rate": 2.3615747130329013e-06, "loss": 0.045, "step": 3863 }, { "epoch": 0.85, "grad_norm": 0.23485219378085304, "learning_rate": 2.3548720369958256e-06, "loss": 0.0288, "step": 3864 }, { "epoch": 0.85, "grad_norm": 0.2157679964756096, "learning_rate": 2.3481782912661788e-06, "loss": 0.0228, "step": 3865 }, { "epoch": 0.85, "grad_norm": 0.3899911506272396, "learning_rate": 2.3414934792317047e-06, "loss": 0.0413, "step": 3866 }, { "epoch": 0.85, "grad_norm": 0.2272902507691261, "learning_rate": 2.334817604275632e-06, "loss": 0.0268, "step": 3867 }, { "epoch": 0.85, "grad_norm": 0.22339759350247212, "learning_rate": 2.3281506697766522e-06, "loss": 0.0252, "step": 3868 }, { "epoch": 0.85, "grad_norm": 0.2745319398362205, "learning_rate": 2.3214926791089563e-06, "loss": 0.0263, "step": 3869 }, { "epoch": 0.85, "grad_norm": 0.3004511222885687, "learning_rate": 2.3148436356421813e-06, "loss": 0.0405, "step": 3870 }, { "epoch": 0.85, "grad_norm": 0.18937862543847042, "learning_rate": 2.3082035427414585e-06, "loss": 0.0212, "step": 3871 }, { "epoch": 0.85, "grad_norm": 0.2781247439008241, "learning_rate": 2.301572403767369e-06, "loss": 0.028, "step": 3872 }, { "epoch": 0.85, "grad_norm": 0.25762808590872704, "learning_rate": 2.2949502220759866e-06, "loss": 0.0247, "step": 3873 }, { "epoch": 0.85, "grad_norm": 0.2648198885000602, "learning_rate": 2.2883370010188232e-06, "loss": 0.0364, "step": 3874 }, { "epoch": 0.85, "grad_norm": 0.22559464012633032, "learning_rate": 2.2817327439428836e-06, "loss": 0.0267, "step": 3875 }, { "epoch": 0.85, "grad_norm": 0.2344780456991246, "learning_rate": 2.2751374541906122e-06, "loss": 0.0327, "step": 3876 }, { "epoch": 0.85, "grad_norm": 0.22897060453096788, "learning_rate": 2.26855113509993e-06, "loss": 0.0278, "step": 3877 }, { "epoch": 0.85, "grad_norm": 0.23947667601923092, "learning_rate": 2.261973790004217e-06, "loss": 0.0281, "step": 3878 }, { "epoch": 0.85, "grad_norm": 0.2541858893822508, "learning_rate": 2.2554054222323018e-06, "loss": 0.0265, "step": 3879 }, { "epoch": 0.85, "grad_norm": 0.2625970621268977, "learning_rate": 2.2488460351084827e-06, "loss": 0.0302, "step": 3880 }, { "epoch": 0.85, "grad_norm": 0.23400592354466196, "learning_rate": 2.242295631952496e-06, "loss": 0.0231, "step": 3881 }, { "epoch": 0.85, "grad_norm": 0.24455926478727372, "learning_rate": 2.235754216079551e-06, "loss": 0.0295, "step": 3882 }, { "epoch": 0.85, "grad_norm": 0.29506134373317705, "learning_rate": 2.229221790800291e-06, "loss": 0.0319, "step": 3883 }, { "epoch": 0.85, "grad_norm": 0.25740314988611324, "learning_rate": 2.2226983594208187e-06, "loss": 0.0342, "step": 3884 }, { "epoch": 0.85, "grad_norm": 0.32531390161423046, "learning_rate": 2.216183925242681e-06, "loss": 0.0268, "step": 3885 }, { "epoch": 0.85, "grad_norm": 0.25434132493678374, "learning_rate": 2.209678491562881e-06, "loss": 0.0227, "step": 3886 }, { "epoch": 0.85, "grad_norm": 0.2397500094154205, "learning_rate": 2.2031820616738477e-06, "loss": 0.0202, "step": 3887 }, { "epoch": 0.85, "grad_norm": 0.2139124016890787, "learning_rate": 2.1966946388634746e-06, "loss": 0.0291, "step": 3888 }, { "epoch": 0.85, "grad_norm": 0.2131140088158562, "learning_rate": 2.190216226415074e-06, "loss": 0.0207, "step": 3889 }, { "epoch": 0.85, "grad_norm": 0.2961527295114186, "learning_rate": 2.1837468276074227e-06, "loss": 0.0418, "step": 3890 }, { "epoch": 0.85, "grad_norm": 0.23153809405938847, "learning_rate": 2.1772864457147126e-06, "loss": 0.0268, "step": 3891 }, { "epoch": 0.85, "grad_norm": 0.28574910654176955, "learning_rate": 2.1708350840065927e-06, "loss": 0.0344, "step": 3892 }, { "epoch": 0.86, "grad_norm": 0.23484012227863682, "learning_rate": 2.164392745748125e-06, "loss": 0.0257, "step": 3893 }, { "epoch": 0.86, "grad_norm": 0.23499878683943798, "learning_rate": 2.1579594341998235e-06, "loss": 0.0337, "step": 3894 }, { "epoch": 0.86, "grad_norm": 0.2245127594514266, "learning_rate": 2.151535152617625e-06, "loss": 0.0246, "step": 3895 }, { "epoch": 0.86, "grad_norm": 0.22720426068665847, "learning_rate": 2.1451199042529035e-06, "loss": 0.0211, "step": 3896 }, { "epoch": 0.86, "grad_norm": 0.17705131105658264, "learning_rate": 2.1387136923524475e-06, "loss": 0.017, "step": 3897 }, { "epoch": 0.86, "grad_norm": 0.25607005756259693, "learning_rate": 2.1323165201584863e-06, "loss": 0.0269, "step": 3898 }, { "epoch": 0.86, "grad_norm": 0.19983322148628807, "learning_rate": 2.125928390908658e-06, "loss": 0.023, "step": 3899 }, { "epoch": 0.86, "grad_norm": 0.2594664579030888, "learning_rate": 2.1195493078360486e-06, "loss": 0.0205, "step": 3900 }, { "epoch": 0.86, "grad_norm": 0.2109651459131922, "learning_rate": 2.113179274169137e-06, "loss": 0.0227, "step": 3901 }, { "epoch": 0.86, "grad_norm": 0.24432628438712112, "learning_rate": 2.1068182931318424e-06, "loss": 0.0229, "step": 3902 }, { "epoch": 0.86, "grad_norm": 0.19328943586785619, "learning_rate": 2.1004663679434987e-06, "loss": 0.0148, "step": 3903 }, { "epoch": 0.86, "grad_norm": 0.22900019726057594, "learning_rate": 2.0941235018188543e-06, "loss": 0.0318, "step": 3904 }, { "epoch": 0.86, "grad_norm": 0.27477682985090945, "learning_rate": 2.0877896979680654e-06, "loss": 0.0248, "step": 3905 }, { "epoch": 0.86, "grad_norm": 0.26366659088491357, "learning_rate": 2.0814649595967194e-06, "loss": 0.0333, "step": 3906 }, { "epoch": 0.86, "grad_norm": 0.21312036279908078, "learning_rate": 2.0751492899057957e-06, "loss": 0.0204, "step": 3907 }, { "epoch": 0.86, "grad_norm": 0.20255649311395135, "learning_rate": 2.0688426920916992e-06, "loss": 0.0229, "step": 3908 }, { "epoch": 0.86, "grad_norm": 0.29645066302079465, "learning_rate": 2.062545169346235e-06, "loss": 0.0228, "step": 3909 }, { "epoch": 0.86, "grad_norm": 0.29417591617827665, "learning_rate": 2.05625672485662e-06, "loss": 0.032, "step": 3910 }, { "epoch": 0.86, "grad_norm": 0.2254233331599424, "learning_rate": 2.049977361805471e-06, "loss": 0.0238, "step": 3911 }, { "epoch": 0.86, "grad_norm": 0.28845767590186433, "learning_rate": 2.043707083370814e-06, "loss": 0.0301, "step": 3912 }, { "epoch": 0.86, "grad_norm": 0.21304851766358, "learning_rate": 2.03744589272608e-06, "loss": 0.0307, "step": 3913 }, { "epoch": 0.86, "grad_norm": 0.3292309877269268, "learning_rate": 2.031193793040087e-06, "loss": 0.0344, "step": 3914 }, { "epoch": 0.86, "grad_norm": 0.20086938838265608, "learning_rate": 2.0249507874770714e-06, "loss": 0.0248, "step": 3915 }, { "epoch": 0.86, "grad_norm": 0.2349814278411253, "learning_rate": 2.018716879196645e-06, "loss": 0.0286, "step": 3916 }, { "epoch": 0.86, "grad_norm": 0.3030606388311023, "learning_rate": 2.0124920713538378e-06, "loss": 0.0334, "step": 3917 }, { "epoch": 0.86, "grad_norm": 0.2677680128398089, "learning_rate": 2.006276367099054e-06, "loss": 0.0301, "step": 3918 }, { "epoch": 0.86, "grad_norm": 0.18866716146011084, "learning_rate": 2.000069769578108e-06, "loss": 0.02, "step": 3919 }, { "epoch": 0.86, "grad_norm": 0.2781445132064626, "learning_rate": 1.9938722819321854e-06, "loss": 0.0259, "step": 3920 }, { "epoch": 0.86, "grad_norm": 0.21489925534501872, "learning_rate": 1.987683907297888e-06, "loss": 0.0226, "step": 3921 }, { "epoch": 0.86, "grad_norm": 0.22646346887563482, "learning_rate": 1.9815046488071774e-06, "loss": 0.0239, "step": 3922 }, { "epoch": 0.86, "grad_norm": 0.2401493666437388, "learning_rate": 1.9753345095874234e-06, "loss": 0.0238, "step": 3923 }, { "epoch": 0.86, "grad_norm": 0.29931022052689477, "learning_rate": 1.9691734927613625e-06, "loss": 0.0297, "step": 3924 }, { "epoch": 0.86, "grad_norm": 0.22429214133725534, "learning_rate": 1.9630216014471326e-06, "loss": 0.023, "step": 3925 }, { "epoch": 0.86, "grad_norm": 0.2424895187698589, "learning_rate": 1.9568788387582338e-06, "loss": 0.0251, "step": 3926 }, { "epoch": 0.86, "grad_norm": 0.2084886071605321, "learning_rate": 1.950745207803566e-06, "loss": 0.0137, "step": 3927 }, { "epoch": 0.86, "grad_norm": 0.26072221315148936, "learning_rate": 1.9446207116873815e-06, "loss": 0.0333, "step": 3928 }, { "epoch": 0.86, "grad_norm": 0.178197163888115, "learning_rate": 1.9385053535093455e-06, "loss": 0.0212, "step": 3929 }, { "epoch": 0.86, "grad_norm": 0.2847289973644325, "learning_rate": 1.9323991363644645e-06, "loss": 0.0288, "step": 3930 }, { "epoch": 0.86, "grad_norm": 0.27226567535181007, "learning_rate": 1.9263020633431416e-06, "loss": 0.0214, "step": 3931 }, { "epoch": 0.86, "grad_norm": 0.31075256061803264, "learning_rate": 1.9202141375311335e-06, "loss": 0.0388, "step": 3932 }, { "epoch": 0.86, "grad_norm": 0.2823310579403776, "learning_rate": 1.9141353620095835e-06, "loss": 0.0372, "step": 3933 }, { "epoch": 0.86, "grad_norm": 0.18758217679707598, "learning_rate": 1.9080657398549916e-06, "loss": 0.0209, "step": 3934 }, { "epoch": 0.86, "grad_norm": 0.19805705065045512, "learning_rate": 1.902005274139238e-06, "loss": 0.02, "step": 3935 }, { "epoch": 0.86, "grad_norm": 0.23035887345797168, "learning_rate": 1.8959539679295536e-06, "loss": 0.0225, "step": 3936 }, { "epoch": 0.86, "grad_norm": 0.22014119339755225, "learning_rate": 1.8899118242885462e-06, "loss": 0.0318, "step": 3937 }, { "epoch": 0.86, "grad_norm": 0.230950296961087, "learning_rate": 1.8838788462741852e-06, "loss": 0.024, "step": 3938 }, { "epoch": 0.87, "grad_norm": 0.24027613145127033, "learning_rate": 1.8778550369397886e-06, "loss": 0.0231, "step": 3939 }, { "epoch": 0.87, "grad_norm": 0.27697689347173, "learning_rate": 1.8718403993340528e-06, "loss": 0.024, "step": 3940 }, { "epoch": 0.87, "grad_norm": 0.2174718847409269, "learning_rate": 1.865834936501012e-06, "loss": 0.0245, "step": 3941 }, { "epoch": 0.87, "grad_norm": 0.19060331401042857, "learning_rate": 1.8598386514800793e-06, "loss": 0.0176, "step": 3942 }, { "epoch": 0.87, "grad_norm": 0.3065493380980718, "learning_rate": 1.8538515473060026e-06, "loss": 0.0351, "step": 3943 }, { "epoch": 0.87, "grad_norm": 0.2501655715787742, "learning_rate": 1.847873627008896e-06, "loss": 0.0307, "step": 3944 }, { "epoch": 0.87, "grad_norm": 0.22951989513899249, "learning_rate": 1.8419048936142191e-06, "loss": 0.0315, "step": 3945 }, { "epoch": 0.87, "grad_norm": 0.21929841608063866, "learning_rate": 1.8359453501427916e-06, "loss": 0.0228, "step": 3946 }, { "epoch": 0.87, "grad_norm": 0.2332128800695291, "learning_rate": 1.8299949996107646e-06, "loss": 0.0281, "step": 3947 }, { "epoch": 0.87, "grad_norm": 0.19833979529150592, "learning_rate": 1.8240538450296563e-06, "loss": 0.026, "step": 3948 }, { "epoch": 0.87, "grad_norm": 0.2472978983664071, "learning_rate": 1.8181218894063146e-06, "loss": 0.0397, "step": 3949 }, { "epoch": 0.87, "grad_norm": 0.2029199757606438, "learning_rate": 1.8121991357429425e-06, "loss": 0.0159, "step": 3950 }, { "epoch": 0.87, "grad_norm": 0.24460092258866165, "learning_rate": 1.8062855870370798e-06, "loss": 0.0306, "step": 3951 }, { "epoch": 0.87, "grad_norm": 0.19939599847379869, "learning_rate": 1.8003812462816127e-06, "loss": 0.0234, "step": 3952 }, { "epoch": 0.87, "grad_norm": 0.27247723493064596, "learning_rate": 1.7944861164647576e-06, "loss": 0.0372, "step": 3953 }, { "epoch": 0.87, "grad_norm": 0.22841116971338493, "learning_rate": 1.788600200570083e-06, "loss": 0.0255, "step": 3954 }, { "epoch": 0.87, "grad_norm": 0.2392800145649786, "learning_rate": 1.782723501576482e-06, "loss": 0.0317, "step": 3955 }, { "epoch": 0.87, "grad_norm": 0.25418792652631794, "learning_rate": 1.7768560224581955e-06, "loss": 0.0335, "step": 3956 }, { "epoch": 0.87, "grad_norm": 0.21703919623420148, "learning_rate": 1.770997766184781e-06, "loss": 0.0245, "step": 3957 }, { "epoch": 0.87, "grad_norm": 0.16896704715874653, "learning_rate": 1.7651487357211472e-06, "loss": 0.0108, "step": 3958 }, { "epoch": 0.87, "grad_norm": 0.2984374241054898, "learning_rate": 1.7593089340275149e-06, "loss": 0.0298, "step": 3959 }, { "epoch": 0.87, "grad_norm": 0.22443900073523565, "learning_rate": 1.7534783640594533e-06, "loss": 0.0202, "step": 3960 }, { "epoch": 0.87, "grad_norm": 0.20475501992519377, "learning_rate": 1.7476570287678396e-06, "loss": 0.0191, "step": 3961 }, { "epoch": 0.87, "grad_norm": 0.20974471025253075, "learning_rate": 1.741844931098895e-06, "loss": 0.0205, "step": 3962 }, { "epoch": 0.87, "grad_norm": 0.2384955974233491, "learning_rate": 1.7360420739941486e-06, "loss": 0.0263, "step": 3963 }, { "epoch": 0.87, "grad_norm": 0.22069584994858502, "learning_rate": 1.7302484603904756e-06, "loss": 0.0314, "step": 3964 }, { "epoch": 0.87, "grad_norm": 0.19494000199605882, "learning_rate": 1.7244640932200484e-06, "loss": 0.0234, "step": 3965 }, { "epoch": 0.87, "grad_norm": 0.2347603887776966, "learning_rate": 1.7186889754103763e-06, "loss": 0.0297, "step": 3966 }, { "epoch": 0.87, "grad_norm": 0.23574376172896755, "learning_rate": 1.7129231098842791e-06, "loss": 0.0297, "step": 3967 }, { "epoch": 0.87, "grad_norm": 0.26678069527800613, "learning_rate": 1.707166499559898e-06, "loss": 0.0412, "step": 3968 }, { "epoch": 0.87, "grad_norm": 0.25157095142644803, "learning_rate": 1.701419147350687e-06, "loss": 0.026, "step": 3969 }, { "epoch": 0.87, "grad_norm": 0.24909355821890453, "learning_rate": 1.6956810561654213e-06, "loss": 0.0236, "step": 3970 }, { "epoch": 0.87, "grad_norm": 0.2158988964917277, "learning_rate": 1.6899522289081737e-06, "loss": 0.0178, "step": 3971 }, { "epoch": 0.87, "grad_norm": 0.2413588622411202, "learning_rate": 1.6842326684783473e-06, "loss": 0.0203, "step": 3972 }, { "epoch": 0.87, "grad_norm": 0.2026649179715184, "learning_rate": 1.6785223777706482e-06, "loss": 0.0254, "step": 3973 }, { "epoch": 0.87, "grad_norm": 0.33542930621663797, "learning_rate": 1.6728213596750831e-06, "loss": 0.0343, "step": 3974 }, { "epoch": 0.87, "grad_norm": 0.26128103176874784, "learning_rate": 1.667129617076977e-06, "loss": 0.0356, "step": 3975 }, { "epoch": 0.87, "grad_norm": 0.25916595365103207, "learning_rate": 1.661447152856952e-06, "loss": 0.029, "step": 3976 }, { "epoch": 0.87, "grad_norm": 0.21586879341047518, "learning_rate": 1.6557739698909436e-06, "loss": 0.0251, "step": 3977 }, { "epoch": 0.87, "grad_norm": 0.24176543898825847, "learning_rate": 1.650110071050175e-06, "loss": 0.025, "step": 3978 }, { "epoch": 0.87, "grad_norm": 0.25397719345805286, "learning_rate": 1.6444554592011909e-06, "loss": 0.0189, "step": 3979 }, { "epoch": 0.87, "grad_norm": 0.19706409849080536, "learning_rate": 1.638810137205813e-06, "loss": 0.0212, "step": 3980 }, { "epoch": 0.87, "grad_norm": 0.24386950888774606, "learning_rate": 1.6331741079211872e-06, "loss": 0.0263, "step": 3981 }, { "epoch": 0.87, "grad_norm": 0.22030736666982426, "learning_rate": 1.627547374199734e-06, "loss": 0.0244, "step": 3982 }, { "epoch": 0.87, "grad_norm": 0.2842806738262328, "learning_rate": 1.6219299388891797e-06, "loss": 0.0219, "step": 3983 }, { "epoch": 0.88, "grad_norm": 0.20590756121999668, "learning_rate": 1.6163218048325413e-06, "loss": 0.0238, "step": 3984 }, { "epoch": 0.88, "grad_norm": 0.38306856349578233, "learning_rate": 1.610722974868133e-06, "loss": 0.0324, "step": 3985 }, { "epoch": 0.88, "grad_norm": 0.26639646808642814, "learning_rate": 1.6051334518295546e-06, "loss": 0.0246, "step": 3986 }, { "epoch": 0.88, "grad_norm": 0.2887242116187527, "learning_rate": 1.5995532385456992e-06, "loss": 0.0276, "step": 3987 }, { "epoch": 0.88, "grad_norm": 0.33405953392589066, "learning_rate": 1.5939823378407426e-06, "loss": 0.0417, "step": 3988 }, { "epoch": 0.88, "grad_norm": 0.21000298367378883, "learning_rate": 1.5884207525341566e-06, "loss": 0.0158, "step": 3989 }, { "epoch": 0.88, "grad_norm": 0.26179161765461234, "learning_rate": 1.5828684854406918e-06, "loss": 0.0276, "step": 3990 }, { "epoch": 0.88, "grad_norm": 0.28524516300225977, "learning_rate": 1.577325539370389e-06, "loss": 0.0297, "step": 3991 }, { "epoch": 0.88, "grad_norm": 0.25805574559513955, "learning_rate": 1.57179191712856e-06, "loss": 0.0322, "step": 3992 }, { "epoch": 0.88, "grad_norm": 0.2801519256785691, "learning_rate": 1.5662676215158112e-06, "loss": 0.034, "step": 3993 }, { "epoch": 0.88, "grad_norm": 0.2271380103121437, "learning_rate": 1.5607526553280172e-06, "loss": 0.0216, "step": 3994 }, { "epoch": 0.88, "grad_norm": 0.23207426122064762, "learning_rate": 1.5552470213563408e-06, "loss": 0.0264, "step": 3995 }, { "epoch": 0.88, "grad_norm": 0.2378034099816618, "learning_rate": 1.549750722387211e-06, "loss": 0.0285, "step": 3996 }, { "epoch": 0.88, "grad_norm": 0.35806127587094355, "learning_rate": 1.5442637612023425e-06, "loss": 0.0507, "step": 3997 }, { "epoch": 0.88, "grad_norm": 0.2183148961496767, "learning_rate": 1.5387861405787252e-06, "loss": 0.0228, "step": 3998 }, { "epoch": 0.88, "grad_norm": 0.2775771945043418, "learning_rate": 1.5333178632886058e-06, "loss": 0.0353, "step": 3999 }, { "epoch": 0.88, "grad_norm": 0.1723180176519468, "learning_rate": 1.5278589320995218e-06, "loss": 0.0178, "step": 4000 }, { "epoch": 0.88, "grad_norm": 0.2291028880465536, "learning_rate": 1.5224093497742654e-06, "loss": 0.0254, "step": 4001 }, { "epoch": 0.88, "grad_norm": 0.21540815603745844, "learning_rate": 1.5169691190709057e-06, "loss": 0.0336, "step": 4002 }, { "epoch": 0.88, "grad_norm": 0.2567826857678153, "learning_rate": 1.5115382427427827e-06, "loss": 0.0345, "step": 4003 }, { "epoch": 0.88, "grad_norm": 0.2195455017079472, "learning_rate": 1.5061167235384867e-06, "loss": 0.0308, "step": 4004 }, { "epoch": 0.88, "grad_norm": 0.25006646524482823, "learning_rate": 1.5007045642018868e-06, "loss": 0.0289, "step": 4005 }, { "epoch": 0.88, "grad_norm": 0.21073223652648765, "learning_rate": 1.4953017674721083e-06, "loss": 0.0253, "step": 4006 }, { "epoch": 0.88, "grad_norm": 0.256005805825172, "learning_rate": 1.4899083360835408e-06, "loss": 0.0326, "step": 4007 }, { "epoch": 0.88, "grad_norm": 0.27258169581314906, "learning_rate": 1.484524272765837e-06, "loss": 0.0268, "step": 4008 }, { "epoch": 0.88, "grad_norm": 0.2409342982553185, "learning_rate": 1.479149580243895e-06, "loss": 0.0274, "step": 4009 }, { "epoch": 0.88, "grad_norm": 0.20566295612613625, "learning_rate": 1.4737842612378894e-06, "loss": 0.02, "step": 4010 }, { "epoch": 0.88, "grad_norm": 0.18890379873346602, "learning_rate": 1.468428318463233e-06, "loss": 0.0205, "step": 4011 }, { "epoch": 0.88, "grad_norm": 0.20006802862239356, "learning_rate": 1.4630817546306087e-06, "loss": 0.0236, "step": 4012 }, { "epoch": 0.88, "grad_norm": 0.23546096032008776, "learning_rate": 1.4577445724459382e-06, "loss": 0.0379, "step": 4013 }, { "epoch": 0.88, "grad_norm": 0.1940759232465805, "learning_rate": 1.4524167746104034e-06, "loss": 0.0301, "step": 4014 }, { "epoch": 0.88, "grad_norm": 0.2289597930390186, "learning_rate": 1.4470983638204384e-06, "loss": 0.0319, "step": 4015 }, { "epoch": 0.88, "grad_norm": 0.22939968273863018, "learning_rate": 1.4417893427677276e-06, "loss": 0.0298, "step": 4016 }, { "epoch": 0.88, "grad_norm": 0.25927656787820885, "learning_rate": 1.4364897141391888e-06, "loss": 0.0262, "step": 4017 }, { "epoch": 0.88, "grad_norm": 0.24248916542782697, "learning_rate": 1.4311994806170048e-06, "loss": 0.0365, "step": 4018 }, { "epoch": 0.88, "grad_norm": 0.24264834765102183, "learning_rate": 1.42591864487859e-06, "loss": 0.0251, "step": 4019 }, { "epoch": 0.88, "grad_norm": 0.23210383541824384, "learning_rate": 1.4206472095966107e-06, "loss": 0.029, "step": 4020 }, { "epoch": 0.88, "grad_norm": 0.2245166089428411, "learning_rate": 1.4153851774389703e-06, "loss": 0.0233, "step": 4021 }, { "epoch": 0.88, "grad_norm": 0.22277273510764165, "learning_rate": 1.4101325510688192e-06, "loss": 0.0209, "step": 4022 }, { "epoch": 0.88, "grad_norm": 0.24405514555826774, "learning_rate": 1.4048893331445367e-06, "loss": 0.0162, "step": 4023 }, { "epoch": 0.88, "grad_norm": 0.21945974468325163, "learning_rate": 1.3996555263197587e-06, "loss": 0.0211, "step": 4024 }, { "epoch": 0.88, "grad_norm": 0.21372582279354385, "learning_rate": 1.3944311332433368e-06, "loss": 0.0276, "step": 4025 }, { "epoch": 0.88, "grad_norm": 0.32695586956517514, "learning_rate": 1.3892161565593743e-06, "loss": 0.0343, "step": 4026 }, { "epoch": 0.88, "grad_norm": 0.22012335659477245, "learning_rate": 1.3840105989071995e-06, "loss": 0.0294, "step": 4027 }, { "epoch": 0.88, "grad_norm": 0.3275218369942323, "learning_rate": 1.3788144629213785e-06, "loss": 0.0367, "step": 4028 }, { "epoch": 0.88, "grad_norm": 0.29462431745081913, "learning_rate": 1.3736277512317076e-06, "loss": 0.0359, "step": 4029 }, { "epoch": 0.89, "grad_norm": 0.2192795508608225, "learning_rate": 1.3684504664632137e-06, "loss": 0.0198, "step": 4030 }, { "epoch": 0.89, "grad_norm": 0.280980113131807, "learning_rate": 1.3632826112361497e-06, "loss": 0.0376, "step": 4031 }, { "epoch": 0.89, "grad_norm": 0.2036203982346656, "learning_rate": 1.3581241881660011e-06, "loss": 0.0202, "step": 4032 }, { "epoch": 0.89, "grad_norm": 0.2446505142993538, "learning_rate": 1.352975199863482e-06, "loss": 0.0273, "step": 4033 }, { "epoch": 0.89, "grad_norm": 0.3287036992419058, "learning_rate": 1.3478356489345168e-06, "loss": 0.0291, "step": 4034 }, { "epoch": 0.89, "grad_norm": 0.19690709533655248, "learning_rate": 1.3427055379802733e-06, "loss": 0.0292, "step": 4035 }, { "epoch": 0.89, "grad_norm": 0.20444550805776926, "learning_rate": 1.3375848695971239e-06, "loss": 0.0188, "step": 4036 }, { "epoch": 0.89, "grad_norm": 0.20293176152451714, "learning_rate": 1.3324736463766775e-06, "loss": 0.0229, "step": 4037 }, { "epoch": 0.89, "grad_norm": 0.22411925703638305, "learning_rate": 1.3273718709057493e-06, "loss": 0.0255, "step": 4038 }, { "epoch": 0.89, "grad_norm": 0.20742693462055836, "learning_rate": 1.322279545766385e-06, "loss": 0.0184, "step": 4039 }, { "epoch": 0.89, "grad_norm": 0.22124226303366012, "learning_rate": 1.3171966735358343e-06, "loss": 0.0238, "step": 4040 }, { "epoch": 0.89, "grad_norm": 0.21374453332669846, "learning_rate": 1.3121232567865793e-06, "loss": 0.0222, "step": 4041 }, { "epoch": 0.89, "grad_norm": 0.2995226151281877, "learning_rate": 1.3070592980862994e-06, "loss": 0.0282, "step": 4042 }, { "epoch": 0.89, "grad_norm": 0.20906475339624503, "learning_rate": 1.3020047999979002e-06, "loss": 0.0268, "step": 4043 }, { "epoch": 0.89, "grad_norm": 0.22014655769406827, "learning_rate": 1.2969597650794907e-06, "loss": 0.0238, "step": 4044 }, { "epoch": 0.89, "grad_norm": 0.22428625333450491, "learning_rate": 1.2919241958843975e-06, "loss": 0.0227, "step": 4045 }, { "epoch": 0.89, "grad_norm": 0.24069317372127222, "learning_rate": 1.2868980949611486e-06, "loss": 0.0284, "step": 4046 }, { "epoch": 0.89, "grad_norm": 0.2820574772017969, "learning_rate": 1.2818814648534895e-06, "loss": 0.0351, "step": 4047 }, { "epoch": 0.89, "grad_norm": 0.24369227630638465, "learning_rate": 1.2768743081003598e-06, "loss": 0.0232, "step": 4048 }, { "epoch": 0.89, "grad_norm": 0.25864173006102703, "learning_rate": 1.2718766272359195e-06, "loss": 0.0289, "step": 4049 }, { "epoch": 0.89, "grad_norm": 0.2382527631397271, "learning_rate": 1.266888424789523e-06, "loss": 0.0228, "step": 4050 }, { "epoch": 0.89, "grad_norm": 0.3481749832938333, "learning_rate": 1.261909703285733e-06, "loss": 0.0343, "step": 4051 }, { "epoch": 0.89, "grad_norm": 0.22609741882848491, "learning_rate": 1.2569404652443073e-06, "loss": 0.0244, "step": 4052 }, { "epoch": 0.89, "grad_norm": 0.2095581360449896, "learning_rate": 1.2519807131802097e-06, "loss": 0.0287, "step": 4053 }, { "epoch": 0.89, "grad_norm": 0.2500651244931791, "learning_rate": 1.2470304496035968e-06, "loss": 0.0362, "step": 4054 }, { "epoch": 0.89, "grad_norm": 0.22651535398660863, "learning_rate": 1.2420896770198355e-06, "loss": 0.0238, "step": 4055 }, { "epoch": 0.89, "grad_norm": 0.1854365472547319, "learning_rate": 1.237158397929472e-06, "loss": 0.014, "step": 4056 }, { "epoch": 0.89, "grad_norm": 0.2610574880424356, "learning_rate": 1.2322366148282638e-06, "loss": 0.0227, "step": 4057 }, { "epoch": 0.89, "grad_norm": 0.19375172698881735, "learning_rate": 1.2273243302071513e-06, "loss": 0.0163, "step": 4058 }, { "epoch": 0.89, "grad_norm": 0.21801280222549693, "learning_rate": 1.2224215465522726e-06, "loss": 0.0292, "step": 4059 }, { "epoch": 0.89, "grad_norm": 0.21858894004737908, "learning_rate": 1.2175282663449584e-06, "loss": 0.0311, "step": 4060 }, { "epoch": 0.89, "grad_norm": 0.23674585923679517, "learning_rate": 1.2126444920617297e-06, "loss": 0.0244, "step": 4061 }, { "epoch": 0.89, "grad_norm": 0.24901589774890992, "learning_rate": 1.2077702261742875e-06, "loss": 0.0328, "step": 4062 }, { "epoch": 0.89, "grad_norm": 0.21182642939812502, "learning_rate": 1.2029054711495358e-06, "loss": 0.0235, "step": 4063 }, { "epoch": 0.89, "grad_norm": 0.24339609680372667, "learning_rate": 1.19805022944955e-06, "loss": 0.0215, "step": 4064 }, { "epoch": 0.89, "grad_norm": 0.23009335122623728, "learning_rate": 1.193204503531602e-06, "loss": 0.0266, "step": 4065 }, { "epoch": 0.89, "grad_norm": 0.21858024822689778, "learning_rate": 1.1883682958481413e-06, "loss": 0.0203, "step": 4066 }, { "epoch": 0.89, "grad_norm": 0.1924488225475941, "learning_rate": 1.1835416088468033e-06, "loss": 0.0201, "step": 4067 }, { "epoch": 0.89, "grad_norm": 0.2545215926378505, "learning_rate": 1.178724444970405e-06, "loss": 0.0242, "step": 4068 }, { "epoch": 0.89, "grad_norm": 0.22614522029108927, "learning_rate": 1.1739168066569406e-06, "loss": 0.0203, "step": 4069 }, { "epoch": 0.89, "grad_norm": 0.23238722759341868, "learning_rate": 1.1691186963395861e-06, "loss": 0.0219, "step": 4070 }, { "epoch": 0.89, "grad_norm": 0.18852350905397555, "learning_rate": 1.1643301164466926e-06, "loss": 0.0153, "step": 4071 }, { "epoch": 0.89, "grad_norm": 0.2369111490974642, "learning_rate": 1.1595510694017943e-06, "loss": 0.0218, "step": 4072 }, { "epoch": 0.89, "grad_norm": 0.21487258370856713, "learning_rate": 1.154781557623592e-06, "loss": 0.027, "step": 4073 }, { "epoch": 0.89, "grad_norm": 0.17336881153748232, "learning_rate": 1.1500215835259664e-06, "loss": 0.0178, "step": 4074 }, { "epoch": 0.9, "grad_norm": 0.23694717100406254, "learning_rate": 1.1452711495179659e-06, "loss": 0.0344, "step": 4075 }, { "epoch": 0.9, "grad_norm": 0.25646757681452675, "learning_rate": 1.1405302580038224e-06, "loss": 0.0252, "step": 4076 }, { "epoch": 0.9, "grad_norm": 0.22545380458878736, "learning_rate": 1.1357989113829237e-06, "loss": 0.0318, "step": 4077 }, { "epoch": 0.9, "grad_norm": 0.26999264077912627, "learning_rate": 1.1310771120498386e-06, "loss": 0.0314, "step": 4078 }, { "epoch": 0.9, "grad_norm": 0.2638899995894788, "learning_rate": 1.1263648623942912e-06, "loss": 0.0306, "step": 4079 }, { "epoch": 0.9, "grad_norm": 0.23883802215613728, "learning_rate": 1.1216621648011873e-06, "loss": 0.0292, "step": 4080 }, { "epoch": 0.9, "grad_norm": 0.24956217813602427, "learning_rate": 1.1169690216505846e-06, "loss": 0.0202, "step": 4081 }, { "epoch": 0.9, "grad_norm": 0.30855110936179464, "learning_rate": 1.1122854353177171e-06, "loss": 0.0309, "step": 4082 }, { "epoch": 0.9, "grad_norm": 0.25117188211918606, "learning_rate": 1.1076114081729682e-06, "loss": 0.0228, "step": 4083 }, { "epoch": 0.9, "grad_norm": 0.23851976026841812, "learning_rate": 1.1029469425819039e-06, "loss": 0.0268, "step": 4084 }, { "epoch": 0.9, "grad_norm": 0.20732132365666606, "learning_rate": 1.0982920409052312e-06, "loss": 0.0222, "step": 4085 }, { "epoch": 0.9, "grad_norm": 0.1951781001201858, "learning_rate": 1.0936467054988276e-06, "loss": 0.0291, "step": 4086 }, { "epoch": 0.9, "grad_norm": 0.20133334225995106, "learning_rate": 1.0890109387137216e-06, "loss": 0.0231, "step": 4087 }, { "epoch": 0.9, "grad_norm": 0.23568867642602953, "learning_rate": 1.0843847428961074e-06, "loss": 0.0279, "step": 4088 }, { "epoch": 0.9, "grad_norm": 0.22112487061724806, "learning_rate": 1.0797681203873255e-06, "loss": 0.0309, "step": 4089 }, { "epoch": 0.9, "grad_norm": 0.2557224897586447, "learning_rate": 1.0751610735238848e-06, "loss": 0.0389, "step": 4090 }, { "epoch": 0.9, "grad_norm": 0.25351980585021766, "learning_rate": 1.0705636046374334e-06, "loss": 0.0269, "step": 4091 }, { "epoch": 0.9, "grad_norm": 0.2546228420351522, "learning_rate": 1.0659757160547813e-06, "loss": 0.0255, "step": 4092 }, { "epoch": 0.9, "grad_norm": 0.27955351555875024, "learning_rate": 1.0613974100978885e-06, "loss": 0.0308, "step": 4093 }, { "epoch": 0.9, "grad_norm": 0.22525865998805114, "learning_rate": 1.0568286890838575e-06, "loss": 0.0188, "step": 4094 }, { "epoch": 0.9, "grad_norm": 0.2952819419874532, "learning_rate": 1.0522695553249562e-06, "loss": 0.0275, "step": 4095 }, { "epoch": 0.9, "grad_norm": 0.2355088938746616, "learning_rate": 1.047720011128579e-06, "loss": 0.032, "step": 4096 }, { "epoch": 0.9, "grad_norm": 0.23432050707250046, "learning_rate": 1.0431800587972862e-06, "loss": 0.0189, "step": 4097 }, { "epoch": 0.9, "grad_norm": 0.22924152239045292, "learning_rate": 1.038649700628771e-06, "loss": 0.0309, "step": 4098 }, { "epoch": 0.9, "grad_norm": 0.3333575741576425, "learning_rate": 1.0341289389158793e-06, "loss": 0.0321, "step": 4099 }, { "epoch": 0.9, "grad_norm": 0.21430262129224475, "learning_rate": 1.029617775946592e-06, "loss": 0.0222, "step": 4100 }, { "epoch": 0.9, "grad_norm": 0.21626876395465008, "learning_rate": 1.0251162140040383e-06, "loss": 0.0233, "step": 4101 }, { "epoch": 0.9, "grad_norm": 0.22993769318753188, "learning_rate": 1.0206242553664868e-06, "loss": 0.0222, "step": 4102 }, { "epoch": 0.9, "grad_norm": 0.2886638598948931, "learning_rate": 1.016141902307346e-06, "loss": 0.035, "step": 4103 }, { "epoch": 0.9, "grad_norm": 0.23592357847054904, "learning_rate": 1.011669157095161e-06, "loss": 0.032, "step": 4104 }, { "epoch": 0.9, "grad_norm": 0.2396318431478083, "learning_rate": 1.0072060219936164e-06, "loss": 0.0183, "step": 4105 }, { "epoch": 0.9, "grad_norm": 0.26774349810437653, "learning_rate": 1.002752499261528e-06, "loss": 0.0318, "step": 4106 }, { "epoch": 0.9, "grad_norm": 0.23850047079925532, "learning_rate": 9.98308591152859e-07, "loss": 0.0339, "step": 4107 }, { "epoch": 0.9, "grad_norm": 0.2522215337607944, "learning_rate": 9.93874299916693e-07, "loss": 0.0243, "step": 4108 }, { "epoch": 0.9, "grad_norm": 0.22719153686620427, "learning_rate": 9.894496277972498e-07, "loss": 0.03, "step": 4109 }, { "epoch": 0.9, "grad_norm": 0.1965662389439065, "learning_rate": 9.850345770338875e-07, "loss": 0.0282, "step": 4110 }, { "epoch": 0.9, "grad_norm": 0.24668700750247183, "learning_rate": 9.80629149861092e-07, "loss": 0.032, "step": 4111 }, { "epoch": 0.9, "grad_norm": 0.23108659783490207, "learning_rate": 9.76233348508473e-07, "loss": 0.0265, "step": 4112 }, { "epoch": 0.9, "grad_norm": 0.2106170339238177, "learning_rate": 9.718471752007753e-07, "loss": 0.0189, "step": 4113 }, { "epoch": 0.9, "grad_norm": 0.21015933222442607, "learning_rate": 9.67470632157863e-07, "loss": 0.0221, "step": 4114 }, { "epoch": 0.9, "grad_norm": 0.20480083611108585, "learning_rate": 9.63103721594738e-07, "loss": 0.0241, "step": 4115 }, { "epoch": 0.9, "grad_norm": 0.2944418429963534, "learning_rate": 9.587464457215146e-07, "loss": 0.0288, "step": 4116 }, { "epoch": 0.9, "grad_norm": 0.23625826788366722, "learning_rate": 9.54398806743444e-07, "loss": 0.0224, "step": 4117 }, { "epoch": 0.9, "grad_norm": 0.263884818661961, "learning_rate": 9.500608068608841e-07, "loss": 0.0247, "step": 4118 }, { "epoch": 0.9, "grad_norm": 0.23296904069458949, "learning_rate": 9.457324482693275e-07, "loss": 0.0228, "step": 4119 }, { "epoch": 0.9, "grad_norm": 0.28249035291319474, "learning_rate": 9.414137331593842e-07, "loss": 0.0379, "step": 4120 }, { "epoch": 0.91, "grad_norm": 0.23230560985053014, "learning_rate": 9.371046637167835e-07, "loss": 0.0167, "step": 4121 }, { "epoch": 0.91, "grad_norm": 0.25321784965640365, "learning_rate": 9.328052421223676e-07, "loss": 0.0247, "step": 4122 }, { "epoch": 0.91, "grad_norm": 0.22432202741914464, "learning_rate": 9.285154705521048e-07, "loss": 0.029, "step": 4123 }, { "epoch": 0.91, "grad_norm": 0.21346787402733183, "learning_rate": 9.242353511770697e-07, "loss": 0.0219, "step": 4124 }, { "epoch": 0.91, "grad_norm": 0.23649280894740557, "learning_rate": 9.199648861634625e-07, "loss": 0.0261, "step": 4125 }, { "epoch": 0.91, "grad_norm": 0.1963177180843091, "learning_rate": 9.157040776725856e-07, "loss": 0.0204, "step": 4126 }, { "epoch": 0.91, "grad_norm": 0.23517643281117567, "learning_rate": 9.11452927860863e-07, "loss": 0.0234, "step": 4127 }, { "epoch": 0.91, "grad_norm": 0.2606570501311804, "learning_rate": 9.072114388798314e-07, "loss": 0.0229, "step": 4128 }, { "epoch": 0.91, "grad_norm": 0.2713708297069939, "learning_rate": 9.029796128761292e-07, "loss": 0.0274, "step": 4129 }, { "epoch": 0.91, "grad_norm": 0.21933885064572553, "learning_rate": 8.987574519915121e-07, "loss": 0.0144, "step": 4130 }, { "epoch": 0.91, "grad_norm": 0.24057086391334367, "learning_rate": 8.945449583628396e-07, "loss": 0.0228, "step": 4131 }, { "epoch": 0.91, "grad_norm": 0.20083751483840492, "learning_rate": 8.903421341220842e-07, "loss": 0.0253, "step": 4132 }, { "epoch": 0.91, "grad_norm": 0.23787963353213165, "learning_rate": 8.861489813963154e-07, "loss": 0.0222, "step": 4133 }, { "epoch": 0.91, "grad_norm": 0.2820054610447314, "learning_rate": 8.819655023077201e-07, "loss": 0.0304, "step": 4134 }, { "epoch": 0.91, "grad_norm": 0.2659333346855394, "learning_rate": 8.777916989735736e-07, "loss": 0.0293, "step": 4135 }, { "epoch": 0.91, "grad_norm": 0.2236776098834561, "learning_rate": 8.736275735062749e-07, "loss": 0.0203, "step": 4136 }, { "epoch": 0.91, "grad_norm": 0.3545968818018609, "learning_rate": 8.694731280133051e-07, "loss": 0.04, "step": 4137 }, { "epoch": 0.91, "grad_norm": 0.19651733713495328, "learning_rate": 8.653283645972598e-07, "loss": 0.0195, "step": 4138 }, { "epoch": 0.91, "grad_norm": 0.24283739537981863, "learning_rate": 8.611932853558236e-07, "loss": 0.0356, "step": 4139 }, { "epoch": 0.91, "grad_norm": 0.18475203675836635, "learning_rate": 8.570678923817888e-07, "loss": 0.0156, "step": 4140 }, { "epoch": 0.91, "grad_norm": 0.2951715057089647, "learning_rate": 8.529521877630409e-07, "loss": 0.0328, "step": 4141 }, { "epoch": 0.91, "grad_norm": 0.21674076419565533, "learning_rate": 8.48846173582567e-07, "loss": 0.0212, "step": 4142 }, { "epoch": 0.91, "grad_norm": 0.24360912451878924, "learning_rate": 8.447498519184405e-07, "loss": 0.0233, "step": 4143 }, { "epoch": 0.91, "grad_norm": 0.1933249240586514, "learning_rate": 8.406632248438362e-07, "loss": 0.0147, "step": 4144 }, { "epoch": 0.91, "grad_norm": 0.2590958414376381, "learning_rate": 8.365862944270243e-07, "loss": 0.0361, "step": 4145 }, { "epoch": 0.91, "grad_norm": 0.23042954903530702, "learning_rate": 8.325190627313628e-07, "loss": 0.0283, "step": 4146 }, { "epoch": 0.91, "grad_norm": 0.2168782742937472, "learning_rate": 8.284615318152988e-07, "loss": 0.0256, "step": 4147 }, { "epoch": 0.91, "grad_norm": 0.23256130203793526, "learning_rate": 8.244137037323807e-07, "loss": 0.0243, "step": 4148 }, { "epoch": 0.91, "grad_norm": 0.2065068928120067, "learning_rate": 8.203755805312319e-07, "loss": 0.027, "step": 4149 }, { "epoch": 0.91, "grad_norm": 0.2840058099562093, "learning_rate": 8.163471642555798e-07, "loss": 0.0318, "step": 4150 }, { "epoch": 0.91, "grad_norm": 0.24845108378959502, "learning_rate": 8.123284569442203e-07, "loss": 0.0214, "step": 4151 }, { "epoch": 0.91, "grad_norm": 0.26372225244833675, "learning_rate": 8.083194606310507e-07, "loss": 0.0255, "step": 4152 }, { "epoch": 0.91, "grad_norm": 0.222776832053034, "learning_rate": 8.043201773450526e-07, "loss": 0.0328, "step": 4153 }, { "epoch": 0.91, "grad_norm": 0.2358572422346872, "learning_rate": 8.003306091102803e-07, "loss": 0.0363, "step": 4154 }, { "epoch": 0.91, "grad_norm": 0.20142653929065527, "learning_rate": 7.963507579458851e-07, "loss": 0.0197, "step": 4155 }, { "epoch": 0.91, "grad_norm": 0.23690345186906567, "learning_rate": 7.923806258660893e-07, "loss": 0.0185, "step": 4156 }, { "epoch": 0.91, "grad_norm": 0.27213312753447716, "learning_rate": 7.884202148802056e-07, "loss": 0.0234, "step": 4157 }, { "epoch": 0.91, "grad_norm": 0.24306988711717298, "learning_rate": 7.844695269926194e-07, "loss": 0.0212, "step": 4158 }, { "epoch": 0.91, "grad_norm": 0.2129738146430894, "learning_rate": 7.805285642027983e-07, "loss": 0.0246, "step": 4159 }, { "epoch": 0.91, "grad_norm": 0.25549705710181264, "learning_rate": 7.765973285052863e-07, "loss": 0.0319, "step": 4160 }, { "epoch": 0.91, "grad_norm": 0.22458338379785636, "learning_rate": 7.726758218897079e-07, "loss": 0.0234, "step": 4161 }, { "epoch": 0.91, "grad_norm": 0.23688323660850585, "learning_rate": 7.687640463407597e-07, "loss": 0.0226, "step": 4162 }, { "epoch": 0.91, "grad_norm": 0.20468908374843428, "learning_rate": 7.648620038382204e-07, "loss": 0.0262, "step": 4163 }, { "epoch": 0.91, "grad_norm": 0.23873796624918533, "learning_rate": 7.609696963569325e-07, "loss": 0.0238, "step": 4164 }, { "epoch": 0.91, "grad_norm": 0.25653442844889807, "learning_rate": 7.5708712586682e-07, "loss": 0.027, "step": 4165 }, { "epoch": 0.92, "grad_norm": 0.22679305677521136, "learning_rate": 7.532142943328713e-07, "loss": 0.0226, "step": 4166 }, { "epoch": 0.92, "grad_norm": 0.2986472835490037, "learning_rate": 7.493512037151563e-07, "loss": 0.0373, "step": 4167 }, { "epoch": 0.92, "grad_norm": 0.19956714422279945, "learning_rate": 7.454978559688019e-07, "loss": 0.0188, "step": 4168 }, { "epoch": 0.92, "grad_norm": 0.2401934798253411, "learning_rate": 7.416542530440174e-07, "loss": 0.0289, "step": 4169 }, { "epoch": 0.92, "grad_norm": 0.25598255133583186, "learning_rate": 7.378203968860643e-07, "loss": 0.031, "step": 4170 }, { "epoch": 0.92, "grad_norm": 0.2686609508781585, "learning_rate": 7.339962894352925e-07, "loss": 0.0337, "step": 4171 }, { "epoch": 0.92, "grad_norm": 0.2355377600378674, "learning_rate": 7.30181932627101e-07, "loss": 0.0296, "step": 4172 }, { "epoch": 0.92, "grad_norm": 0.23873494683739044, "learning_rate": 7.263773283919584e-07, "loss": 0.029, "step": 4173 }, { "epoch": 0.92, "grad_norm": 0.2928635599121976, "learning_rate": 7.225824786553981e-07, "loss": 0.0269, "step": 4174 }, { "epoch": 0.92, "grad_norm": 0.2290730321214082, "learning_rate": 7.187973853380215e-07, "loss": 0.021, "step": 4175 }, { "epoch": 0.92, "grad_norm": 0.2189374018796047, "learning_rate": 7.150220503554783e-07, "loss": 0.0225, "step": 4176 }, { "epoch": 0.92, "grad_norm": 0.23741704533576782, "learning_rate": 7.112564756184981e-07, "loss": 0.0263, "step": 4177 }, { "epoch": 0.92, "grad_norm": 0.24110244905868689, "learning_rate": 7.075006630328518e-07, "loss": 0.0175, "step": 4178 }, { "epoch": 0.92, "grad_norm": 0.2777066073372374, "learning_rate": 7.037546144993901e-07, "loss": 0.0327, "step": 4179 }, { "epoch": 0.92, "grad_norm": 0.28038260696353856, "learning_rate": 7.000183319140053e-07, "loss": 0.0229, "step": 4180 }, { "epoch": 0.92, "grad_norm": 0.28097797855363493, "learning_rate": 6.962918171676536e-07, "loss": 0.033, "step": 4181 }, { "epoch": 0.92, "grad_norm": 0.2777447286787186, "learning_rate": 6.925750721463443e-07, "loss": 0.03, "step": 4182 }, { "epoch": 0.92, "grad_norm": 0.3036071747282006, "learning_rate": 6.88868098731148e-07, "loss": 0.0404, "step": 4183 }, { "epoch": 0.92, "grad_norm": 0.2548057468877089, "learning_rate": 6.851708987981865e-07, "loss": 0.0246, "step": 4184 }, { "epoch": 0.92, "grad_norm": 0.20866771129708744, "learning_rate": 6.814834742186361e-07, "loss": 0.0165, "step": 4185 }, { "epoch": 0.92, "grad_norm": 0.24086860735556023, "learning_rate": 6.778058268587217e-07, "loss": 0.0374, "step": 4186 }, { "epoch": 0.92, "grad_norm": 0.23213997743465795, "learning_rate": 6.741379585797236e-07, "loss": 0.0236, "step": 4187 }, { "epoch": 0.92, "grad_norm": 0.2619117480694197, "learning_rate": 6.704798712379768e-07, "loss": 0.0334, "step": 4188 }, { "epoch": 0.92, "grad_norm": 0.2228225789458934, "learning_rate": 6.66831566684858e-07, "loss": 0.026, "step": 4189 }, { "epoch": 0.92, "grad_norm": 0.23261587877650092, "learning_rate": 6.631930467667991e-07, "loss": 0.0254, "step": 4190 }, { "epoch": 0.92, "grad_norm": 0.23538675480397928, "learning_rate": 6.595643133252716e-07, "loss": 0.0234, "step": 4191 }, { "epoch": 0.92, "grad_norm": 0.20758722435386293, "learning_rate": 6.559453681968064e-07, "loss": 0.0261, "step": 4192 }, { "epoch": 0.92, "grad_norm": 0.23417337009794903, "learning_rate": 6.523362132129718e-07, "loss": 0.0214, "step": 4193 }, { "epoch": 0.92, "grad_norm": 0.2359065086018697, "learning_rate": 6.487368502003821e-07, "loss": 0.0197, "step": 4194 }, { "epoch": 0.92, "grad_norm": 0.2548183929766835, "learning_rate": 6.451472809806958e-07, "loss": 0.0254, "step": 4195 }, { "epoch": 0.92, "grad_norm": 0.24662451047698913, "learning_rate": 6.415675073706174e-07, "loss": 0.0226, "step": 4196 }, { "epoch": 0.92, "grad_norm": 0.2464448150470119, "learning_rate": 6.379975311818931e-07, "loss": 0.0378, "step": 4197 }, { "epoch": 0.92, "grad_norm": 0.2398910472719313, "learning_rate": 6.344373542213112e-07, "loss": 0.0254, "step": 4198 }, { "epoch": 0.92, "grad_norm": 0.195640993439512, "learning_rate": 6.308869782906946e-07, "loss": 0.0181, "step": 4199 }, { "epoch": 0.92, "grad_norm": 0.23935219596327983, "learning_rate": 6.27346405186915e-07, "loss": 0.0238, "step": 4200 }, { "epoch": 0.92, "grad_norm": 0.2357560713882237, "learning_rate": 6.238156367018744e-07, "loss": 0.0276, "step": 4201 }, { "epoch": 0.92, "grad_norm": 0.2454419207707864, "learning_rate": 6.202946746225191e-07, "loss": 0.0321, "step": 4202 }, { "epoch": 0.92, "grad_norm": 0.2008752487806573, "learning_rate": 6.16783520730826e-07, "loss": 0.0179, "step": 4203 }, { "epoch": 0.92, "grad_norm": 0.23999128215324347, "learning_rate": 6.132821768038133e-07, "loss": 0.0412, "step": 4204 }, { "epoch": 0.92, "grad_norm": 0.18943407021881573, "learning_rate": 6.097906446135349e-07, "loss": 0.0196, "step": 4205 }, { "epoch": 0.92, "grad_norm": 0.24820520949456476, "learning_rate": 6.063089259270749e-07, "loss": 0.0241, "step": 4206 }, { "epoch": 0.92, "grad_norm": 0.22108302713818226, "learning_rate": 6.028370225065527e-07, "loss": 0.0256, "step": 4207 }, { "epoch": 0.92, "grad_norm": 0.20942082686440533, "learning_rate": 5.993749361091206e-07, "loss": 0.0233, "step": 4208 }, { "epoch": 0.92, "grad_norm": 0.21624351021780297, "learning_rate": 5.95922668486959e-07, "loss": 0.0221, "step": 4209 }, { "epoch": 0.92, "grad_norm": 0.2030324908655009, "learning_rate": 5.92480221387286e-07, "loss": 0.0266, "step": 4210 }, { "epoch": 0.92, "grad_norm": 0.2234644610137812, "learning_rate": 5.890475965523412e-07, "loss": 0.0195, "step": 4211 }, { "epoch": 0.93, "grad_norm": 0.20601259497684363, "learning_rate": 5.856247957193995e-07, "loss": 0.021, "step": 4212 }, { "epoch": 0.93, "grad_norm": 0.2268001855233441, "learning_rate": 5.822118206207594e-07, "loss": 0.0288, "step": 4213 }, { "epoch": 0.93, "grad_norm": 0.231620561267959, "learning_rate": 5.788086729837505e-07, "loss": 0.014, "step": 4214 }, { "epoch": 0.93, "grad_norm": 0.27735890091896426, "learning_rate": 5.754153545307262e-07, "loss": 0.033, "step": 4215 }, { "epoch": 0.93, "grad_norm": 0.22598401570866886, "learning_rate": 5.720318669790636e-07, "loss": 0.0209, "step": 4216 }, { "epoch": 0.93, "grad_norm": 0.2529445187259624, "learning_rate": 5.68658212041171e-07, "loss": 0.0325, "step": 4217 }, { "epoch": 0.93, "grad_norm": 0.28292747763192433, "learning_rate": 5.652943914244713e-07, "loss": 0.0184, "step": 4218 }, { "epoch": 0.93, "grad_norm": 0.23992488277864113, "learning_rate": 5.61940406831416e-07, "loss": 0.0251, "step": 4219 }, { "epoch": 0.93, "grad_norm": 0.2524621095413231, "learning_rate": 5.585962599594807e-07, "loss": 0.0261, "step": 4220 }, { "epoch": 0.93, "grad_norm": 0.2279851821378975, "learning_rate": 5.552619525011538e-07, "loss": 0.0177, "step": 4221 }, { "epoch": 0.93, "grad_norm": 0.23148640939756462, "learning_rate": 5.519374861439497e-07, "loss": 0.0286, "step": 4222 }, { "epoch": 0.93, "grad_norm": 0.26922703589088404, "learning_rate": 5.486228625704049e-07, "loss": 0.0315, "step": 4223 }, { "epoch": 0.93, "grad_norm": 0.25237760346266275, "learning_rate": 5.453180834580663e-07, "loss": 0.0243, "step": 4224 }, { "epoch": 0.93, "grad_norm": 0.28562854167820906, "learning_rate": 5.42023150479507e-07, "loss": 0.0342, "step": 4225 }, { "epoch": 0.93, "grad_norm": 0.2511582366420523, "learning_rate": 5.387380653023066e-07, "loss": 0.029, "step": 4226 }, { "epoch": 0.93, "grad_norm": 0.1821205709968757, "learning_rate": 5.354628295890729e-07, "loss": 0.0203, "step": 4227 }, { "epoch": 0.93, "grad_norm": 0.210666880264267, "learning_rate": 5.321974449974198e-07, "loss": 0.0224, "step": 4228 }, { "epoch": 0.93, "grad_norm": 0.19076632553010706, "learning_rate": 5.289419131799811e-07, "loss": 0.0215, "step": 4229 }, { "epoch": 0.93, "grad_norm": 0.21570515041374458, "learning_rate": 5.256962357843942e-07, "loss": 0.0194, "step": 4230 }, { "epoch": 0.93, "grad_norm": 0.23654925822055214, "learning_rate": 5.224604144533274e-07, "loss": 0.0227, "step": 4231 }, { "epoch": 0.93, "grad_norm": 0.2083108333517541, "learning_rate": 5.192344508244418e-07, "loss": 0.0241, "step": 4232 }, { "epoch": 0.93, "grad_norm": 0.2713138676090215, "learning_rate": 5.160183465304203e-07, "loss": 0.0346, "step": 4233 }, { "epoch": 0.93, "grad_norm": 0.20912935451025808, "learning_rate": 5.128121031989497e-07, "loss": 0.0157, "step": 4234 }, { "epoch": 0.93, "grad_norm": 0.19525838102139567, "learning_rate": 5.096157224527343e-07, "loss": 0.02, "step": 4235 }, { "epoch": 0.93, "grad_norm": 0.3220334690689413, "learning_rate": 5.0642920590948e-07, "loss": 0.0321, "step": 4236 }, { "epoch": 0.93, "grad_norm": 0.2030117379374764, "learning_rate": 5.032525551819012e-07, "loss": 0.0231, "step": 4237 }, { "epoch": 0.93, "grad_norm": 0.26590538653566287, "learning_rate": 5.000857718777186e-07, "loss": 0.0315, "step": 4238 }, { "epoch": 0.93, "grad_norm": 0.3333967292621206, "learning_rate": 4.969288575996656e-07, "loss": 0.0372, "step": 4239 }, { "epoch": 0.93, "grad_norm": 0.282552722045364, "learning_rate": 4.937818139454709e-07, "loss": 0.0245, "step": 4240 }, { "epoch": 0.93, "grad_norm": 0.28079548508313534, "learning_rate": 4.906446425078782e-07, "loss": 0.0313, "step": 4241 }, { "epoch": 0.93, "grad_norm": 0.23541741617190348, "learning_rate": 4.87517344874624e-07, "loss": 0.0271, "step": 4242 }, { "epoch": 0.93, "grad_norm": 0.20766293227191449, "learning_rate": 4.843999226284579e-07, "loss": 0.0203, "step": 4243 }, { "epoch": 0.93, "grad_norm": 0.24228220287830854, "learning_rate": 4.812923773471201e-07, "loss": 0.0349, "step": 4244 }, { "epoch": 0.93, "grad_norm": 0.20977995413316466, "learning_rate": 4.781947106033635e-07, "loss": 0.0138, "step": 4245 }, { "epoch": 0.93, "grad_norm": 0.22777955603260774, "learning_rate": 4.7510692396493197e-07, "loss": 0.0256, "step": 4246 }, { "epoch": 0.93, "grad_norm": 0.2576554426894255, "learning_rate": 4.720290189945775e-07, "loss": 0.0282, "step": 4247 }, { "epoch": 0.93, "grad_norm": 0.2607966734003366, "learning_rate": 4.689609972500453e-07, "loss": 0.0224, "step": 4248 }, { "epoch": 0.93, "grad_norm": 0.26018610602543085, "learning_rate": 4.659028602840776e-07, "loss": 0.0223, "step": 4249 }, { "epoch": 0.93, "grad_norm": 0.24925940310591385, "learning_rate": 4.628546096444186e-07, "loss": 0.0354, "step": 4250 }, { "epoch": 0.93, "grad_norm": 0.23503375459028097, "learning_rate": 4.5981624687380764e-07, "loss": 0.0189, "step": 4251 }, { "epoch": 0.93, "grad_norm": 0.2559029927821459, "learning_rate": 4.567877735099768e-07, "loss": 0.0327, "step": 4252 }, { "epoch": 0.93, "grad_norm": 0.22518056306733947, "learning_rate": 4.5376919108565345e-07, "loss": 0.0331, "step": 4253 }, { "epoch": 0.93, "grad_norm": 0.2204743355679495, "learning_rate": 4.507605011285643e-07, "loss": 0.0225, "step": 4254 }, { "epoch": 0.93, "grad_norm": 0.24288990851315317, "learning_rate": 4.477617051614225e-07, "loss": 0.0244, "step": 4255 }, { "epoch": 0.93, "grad_norm": 0.24696868193397514, "learning_rate": 4.4477280470194064e-07, "loss": 0.0357, "step": 4256 }, { "epoch": 0.93, "grad_norm": 0.19652242690213034, "learning_rate": 4.4179380126281533e-07, "loss": 0.0196, "step": 4257 }, { "epoch": 0.94, "grad_norm": 0.26521811456515404, "learning_rate": 4.3882469635174287e-07, "loss": 0.036, "step": 4258 }, { "epoch": 0.94, "grad_norm": 0.22014588081843334, "learning_rate": 4.358654914714033e-07, "loss": 0.0313, "step": 4259 }, { "epoch": 0.94, "grad_norm": 0.2394356203137378, "learning_rate": 4.329161881194677e-07, "loss": 0.0297, "step": 4260 }, { "epoch": 0.94, "grad_norm": 0.23013257708396465, "learning_rate": 4.299767877885974e-07, "loss": 0.0272, "step": 4261 }, { "epoch": 0.94, "grad_norm": 0.19900803548497645, "learning_rate": 4.270472919664426e-07, "loss": 0.0279, "step": 4262 }, { "epoch": 0.94, "grad_norm": 0.24045887203637645, "learning_rate": 4.241277021356327e-07, "loss": 0.025, "step": 4263 }, { "epoch": 0.94, "grad_norm": 0.2620758637838076, "learning_rate": 4.212180197737992e-07, "loss": 0.0243, "step": 4264 }, { "epoch": 0.94, "grad_norm": 0.23535719742103473, "learning_rate": 4.183182463535418e-07, "loss": 0.0227, "step": 4265 }, { "epoch": 0.94, "grad_norm": 0.20651391007562137, "learning_rate": 4.1542838334245994e-07, "loss": 0.0206, "step": 4266 }, { "epoch": 0.94, "grad_norm": 0.28741586635869343, "learning_rate": 4.1254843220312814e-07, "loss": 0.0243, "step": 4267 }, { "epoch": 0.94, "grad_norm": 0.22432157903277913, "learning_rate": 4.096783943931093e-07, "loss": 0.0302, "step": 4268 }, { "epoch": 0.94, "grad_norm": 0.2236200437273739, "learning_rate": 4.0681827136494157e-07, "loss": 0.0234, "step": 4269 }, { "epoch": 0.94, "grad_norm": 0.22921955065597152, "learning_rate": 4.039680645661581e-07, "loss": 0.022, "step": 4270 }, { "epoch": 0.94, "grad_norm": 0.2059903699825295, "learning_rate": 4.011277754392606e-07, "loss": 0.0307, "step": 4271 }, { "epoch": 0.94, "grad_norm": 0.24625171405418378, "learning_rate": 3.9829740542174143e-07, "loss": 0.0331, "step": 4272 }, { "epoch": 0.94, "grad_norm": 0.280307993602784, "learning_rate": 3.954769559460614e-07, "loss": 0.0223, "step": 4273 }, { "epoch": 0.94, "grad_norm": 0.2639314292337824, "learning_rate": 3.9266642843967415e-07, "loss": 0.0255, "step": 4274 }, { "epoch": 0.94, "grad_norm": 0.15444482880435126, "learning_rate": 3.8986582432500196e-07, "loss": 0.0118, "step": 4275 }, { "epoch": 0.94, "grad_norm": 0.2156307803636951, "learning_rate": 3.8707514501944657e-07, "loss": 0.0222, "step": 4276 }, { "epoch": 0.94, "grad_norm": 0.24637660921685095, "learning_rate": 3.842943919353914e-07, "loss": 0.0297, "step": 4277 }, { "epoch": 0.94, "grad_norm": 0.19506373775960947, "learning_rate": 3.815235664801908e-07, "loss": 0.0225, "step": 4278 }, { "epoch": 0.94, "grad_norm": 0.21452376204645704, "learning_rate": 3.787626700561742e-07, "loss": 0.0253, "step": 4279 }, { "epoch": 0.94, "grad_norm": 0.2523996551046947, "learning_rate": 3.7601170406065034e-07, "loss": 0.0214, "step": 4280 }, { "epoch": 0.94, "grad_norm": 0.20145856927965056, "learning_rate": 3.732706698859012e-07, "loss": 0.0212, "step": 4281 }, { "epoch": 0.94, "grad_norm": 0.2139929832573777, "learning_rate": 3.705395689191771e-07, "loss": 0.0232, "step": 4282 }, { "epoch": 0.94, "grad_norm": 0.23201054149913145, "learning_rate": 3.6781840254271227e-07, "loss": 0.0255, "step": 4283 }, { "epoch": 0.94, "grad_norm": 0.2101806378306389, "learning_rate": 3.651071721336963e-07, "loss": 0.0256, "step": 4284 }, { "epoch": 0.94, "grad_norm": 0.258124278577854, "learning_rate": 3.62405879064307e-07, "loss": 0.0235, "step": 4285 }, { "epoch": 0.94, "grad_norm": 0.20574487654342996, "learning_rate": 3.59714524701682e-07, "loss": 0.0196, "step": 4286 }, { "epoch": 0.94, "grad_norm": 0.2911754015930831, "learning_rate": 3.5703311040793167e-07, "loss": 0.0259, "step": 4287 }, { "epoch": 0.94, "grad_norm": 0.24479935089522756, "learning_rate": 3.543616375401393e-07, "loss": 0.0251, "step": 4288 }, { "epoch": 0.94, "grad_norm": 0.22234509292754578, "learning_rate": 3.517001074503501e-07, "loss": 0.0296, "step": 4289 }, { "epoch": 0.94, "grad_norm": 0.2600904312728723, "learning_rate": 3.490485214855799e-07, "loss": 0.032, "step": 4290 }, { "epoch": 0.94, "grad_norm": 0.22179503947671178, "learning_rate": 3.464068809878196e-07, "loss": 0.023, "step": 4291 }, { "epoch": 0.94, "grad_norm": 0.3118749447098188, "learning_rate": 3.4377518729401317e-07, "loss": 0.0336, "step": 4292 }, { "epoch": 0.94, "grad_norm": 0.20872304745239587, "learning_rate": 3.4115344173607957e-07, "loss": 0.0196, "step": 4293 }, { "epoch": 0.94, "grad_norm": 0.23229821706240353, "learning_rate": 3.3854164564089964e-07, "loss": 0.028, "step": 4294 }, { "epoch": 0.94, "grad_norm": 0.2519289049439303, "learning_rate": 3.359398003303183e-07, "loss": 0.0213, "step": 4295 }, { "epoch": 0.94, "grad_norm": 0.25250354053828866, "learning_rate": 3.333479071211465e-07, "loss": 0.0296, "step": 4296 }, { "epoch": 0.94, "grad_norm": 0.3185874262580183, "learning_rate": 3.307659673251595e-07, "loss": 0.036, "step": 4297 }, { "epoch": 0.94, "grad_norm": 0.267967650630909, "learning_rate": 3.281939822490876e-07, "loss": 0.0359, "step": 4298 }, { "epoch": 0.94, "grad_norm": 0.2060807469237559, "learning_rate": 3.256319531946317e-07, "loss": 0.0157, "step": 4299 }, { "epoch": 0.94, "grad_norm": 0.21211101678876754, "learning_rate": 3.230798814584502e-07, "loss": 0.0258, "step": 4300 }, { "epoch": 0.94, "grad_norm": 0.23040547552978455, "learning_rate": 3.2053776833216533e-07, "loss": 0.0255, "step": 4301 }, { "epoch": 0.94, "grad_norm": 0.25107194149449047, "learning_rate": 3.1800561510234805e-07, "loss": 0.0193, "step": 4302 }, { "epoch": 0.95, "grad_norm": 0.2353953641280843, "learning_rate": 3.1548342305054435e-07, "loss": 0.0261, "step": 4303 }, { "epoch": 0.95, "grad_norm": 0.21677579863228158, "learning_rate": 3.1297119345324645e-07, "loss": 0.0252, "step": 4304 }, { "epoch": 0.95, "grad_norm": 0.2627323790992519, "learning_rate": 3.104689275819128e-07, "loss": 0.0226, "step": 4305 }, { "epoch": 0.95, "grad_norm": 0.18438741916920828, "learning_rate": 3.079766267029527e-07, "loss": 0.0179, "step": 4306 }, { "epoch": 0.95, "grad_norm": 0.18393177001251199, "learning_rate": 3.0549429207773483e-07, "loss": 0.0199, "step": 4307 }, { "epoch": 0.95, "grad_norm": 0.19469604482620562, "learning_rate": 3.030219249625854e-07, "loss": 0.0169, "step": 4308 }, { "epoch": 0.95, "grad_norm": 0.24246372344830378, "learning_rate": 3.005595266087835e-07, "loss": 0.0278, "step": 4309 }, { "epoch": 0.95, "grad_norm": 0.18704342884511982, "learning_rate": 2.9810709826256557e-07, "loss": 0.0179, "step": 4310 }, { "epoch": 0.95, "grad_norm": 0.27612142790059196, "learning_rate": 2.956646411651165e-07, "loss": 0.0339, "step": 4311 }, { "epoch": 0.95, "grad_norm": 0.252322613566112, "learning_rate": 2.932321565525853e-07, "loss": 0.028, "step": 4312 }, { "epoch": 0.95, "grad_norm": 0.23652565007706597, "learning_rate": 2.9080964565606273e-07, "loss": 0.0327, "step": 4313 }, { "epoch": 0.95, "grad_norm": 0.22163762100975448, "learning_rate": 2.883971097015992e-07, "loss": 0.0256, "step": 4314 }, { "epoch": 0.95, "grad_norm": 0.23073831246607718, "learning_rate": 2.859945499101913e-07, "loss": 0.0242, "step": 4315 }, { "epoch": 0.95, "grad_norm": 0.23499828748890664, "learning_rate": 2.8360196749778857e-07, "loss": 0.0294, "step": 4316 }, { "epoch": 0.95, "grad_norm": 0.2615324752419736, "learning_rate": 2.8121936367529357e-07, "loss": 0.0289, "step": 4317 }, { "epoch": 0.95, "grad_norm": 0.21855633541573388, "learning_rate": 2.788467396485595e-07, "loss": 0.0237, "step": 4318 }, { "epoch": 0.95, "grad_norm": 0.2838499246799449, "learning_rate": 2.7648409661837903e-07, "loss": 0.0325, "step": 4319 }, { "epoch": 0.95, "grad_norm": 0.2482569248197878, "learning_rate": 2.7413143578050915e-07, "loss": 0.0243, "step": 4320 }, { "epoch": 0.95, "grad_norm": 0.24498061064465365, "learning_rate": 2.7178875832563734e-07, "loss": 0.0209, "step": 4321 }, { "epoch": 0.95, "grad_norm": 0.2504161559849217, "learning_rate": 2.6945606543941073e-07, "loss": 0.0322, "step": 4322 }, { "epoch": 0.95, "grad_norm": 0.1695524452953065, "learning_rate": 2.671333583024205e-07, "loss": 0.019, "step": 4323 }, { "epoch": 0.95, "grad_norm": 0.24340795824962608, "learning_rate": 2.6482063809020186e-07, "loss": 0.0296, "step": 4324 }, { "epoch": 0.95, "grad_norm": 0.19713535350830463, "learning_rate": 2.625179059732341e-07, "loss": 0.019, "step": 4325 }, { "epoch": 0.95, "grad_norm": 0.24370940463376908, "learning_rate": 2.6022516311695166e-07, "loss": 0.0241, "step": 4326 }, { "epoch": 0.95, "grad_norm": 0.3164895148273076, "learning_rate": 2.579424106817174e-07, "loss": 0.0373, "step": 4327 }, { "epoch": 0.95, "grad_norm": 0.23927869121772133, "learning_rate": 2.556696498228495e-07, "loss": 0.0268, "step": 4328 }, { "epoch": 0.95, "grad_norm": 0.23079495055507038, "learning_rate": 2.5340688169060767e-07, "loss": 0.0241, "step": 4329 }, { "epoch": 0.95, "grad_norm": 0.2125157196177094, "learning_rate": 2.511541074301915e-07, "loss": 0.0244, "step": 4330 }, { "epoch": 0.95, "grad_norm": 0.4308091868465902, "learning_rate": 2.489113281817424e-07, "loss": 0.0395, "step": 4331 }, { "epoch": 0.95, "grad_norm": 0.21628613495322382, "learning_rate": 2.4667854508034774e-07, "loss": 0.0283, "step": 4332 }, { "epoch": 0.95, "grad_norm": 0.2564300695277641, "learning_rate": 2.444557592560304e-07, "loss": 0.0321, "step": 4333 }, { "epoch": 0.95, "grad_norm": 0.22927813997690163, "learning_rate": 2.4224297183375487e-07, "loss": 0.0219, "step": 4334 }, { "epoch": 0.95, "grad_norm": 0.2885607540390936, "learning_rate": 2.400401839334299e-07, "loss": 0.0306, "step": 4335 }, { "epoch": 0.95, "grad_norm": 0.18273473210359006, "learning_rate": 2.378473966698991e-07, "loss": 0.0251, "step": 4336 }, { "epoch": 0.95, "grad_norm": 0.2038541873515383, "learning_rate": 2.356646111529415e-07, "loss": 0.0163, "step": 4337 }, { "epoch": 0.95, "grad_norm": 0.22756448693732054, "learning_rate": 2.3349182848728447e-07, "loss": 0.0209, "step": 4338 }, { "epoch": 0.95, "grad_norm": 0.25275938272936144, "learning_rate": 2.3132904977258175e-07, "loss": 0.0275, "step": 4339 }, { "epoch": 0.95, "grad_norm": 0.25280394797837497, "learning_rate": 2.291762761034333e-07, "loss": 0.034, "step": 4340 }, { "epoch": 0.95, "grad_norm": 0.2656973011126717, "learning_rate": 2.2703350856936534e-07, "loss": 0.0339, "step": 4341 }, { "epoch": 0.95, "grad_norm": 0.2092671130621699, "learning_rate": 2.249007482548482e-07, "loss": 0.0234, "step": 4342 }, { "epoch": 0.95, "grad_norm": 0.23992350864382142, "learning_rate": 2.2277799623928953e-07, "loss": 0.0296, "step": 4343 }, { "epoch": 0.95, "grad_norm": 0.2881623764785107, "learning_rate": 2.2066525359701885e-07, "loss": 0.04, "step": 4344 }, { "epoch": 0.95, "grad_norm": 0.20662916472588463, "learning_rate": 2.1856252139731637e-07, "loss": 0.0307, "step": 4345 }, { "epoch": 0.95, "grad_norm": 0.2559865096959685, "learning_rate": 2.1646980070437973e-07, "loss": 0.0398, "step": 4346 }, { "epoch": 0.95, "grad_norm": 0.2727011438698973, "learning_rate": 2.14387092577355e-07, "loss": 0.0281, "step": 4347 }, { "epoch": 0.95, "grad_norm": 0.2275752092064004, "learning_rate": 2.1231439807031019e-07, "loss": 0.0227, "step": 4348 }, { "epoch": 0.96, "grad_norm": 0.25661856732154437, "learning_rate": 2.102517182322483e-07, "loss": 0.0212, "step": 4349 }, { "epoch": 0.96, "grad_norm": 0.2125043426799214, "learning_rate": 2.0819905410710327e-07, "loss": 0.0185, "step": 4350 }, { "epoch": 0.96, "grad_norm": 0.1752174395363728, "learning_rate": 2.0615640673374181e-07, "loss": 0.0182, "step": 4351 }, { "epoch": 0.96, "grad_norm": 0.17804550354493404, "learning_rate": 2.0412377714596365e-07, "loss": 0.0159, "step": 4352 }, { "epoch": 0.96, "grad_norm": 0.24171920077251674, "learning_rate": 2.0210116637249032e-07, "loss": 0.0254, "step": 4353 }, { "epoch": 0.96, "grad_norm": 0.23536818987728242, "learning_rate": 2.0008857543698078e-07, "loss": 0.0203, "step": 4354 }, { "epoch": 0.96, "grad_norm": 0.19003395630424846, "learning_rate": 1.9808600535802024e-07, "loss": 0.0193, "step": 4355 }, { "epoch": 0.96, "grad_norm": 0.21540936662109153, "learning_rate": 1.9609345714911575e-07, "loss": 0.0286, "step": 4356 }, { "epoch": 0.96, "grad_norm": 0.25794112724783735, "learning_rate": 1.941109318187162e-07, "loss": 0.0263, "step": 4357 }, { "epoch": 0.96, "grad_norm": 0.20516243064617495, "learning_rate": 1.9213843037018344e-07, "loss": 0.0215, "step": 4358 }, { "epoch": 0.96, "grad_norm": 0.2646495776838115, "learning_rate": 1.9017595380181442e-07, "loss": 0.0318, "step": 4359 }, { "epoch": 0.96, "grad_norm": 0.26166491168237466, "learning_rate": 1.8822350310683246e-07, "loss": 0.0251, "step": 4360 }, { "epoch": 0.96, "grad_norm": 0.2060624071455964, "learning_rate": 1.862810792733849e-07, "loss": 0.0249, "step": 4361 }, { "epoch": 0.96, "grad_norm": 0.2474567798364588, "learning_rate": 1.843486832845409e-07, "loss": 0.0254, "step": 4362 }, { "epoch": 0.96, "grad_norm": 0.2384592704533461, "learning_rate": 1.8242631611830263e-07, "loss": 0.0279, "step": 4363 }, { "epoch": 0.96, "grad_norm": 0.21230885870940985, "learning_rate": 1.8051397874758736e-07, "loss": 0.0316, "step": 4364 }, { "epoch": 0.96, "grad_norm": 0.26554681524874807, "learning_rate": 1.786116721402431e-07, "loss": 0.0214, "step": 4365 }, { "epoch": 0.96, "grad_norm": 0.20386617493254652, "learning_rate": 1.7671939725903752e-07, "loss": 0.0249, "step": 4366 }, { "epoch": 0.96, "grad_norm": 0.23737255742813607, "learning_rate": 1.7483715506166455e-07, "loss": 0.0291, "step": 4367 }, { "epoch": 0.96, "grad_norm": 0.19966449611016848, "learning_rate": 1.729649465007377e-07, "loss": 0.0269, "step": 4368 }, { "epoch": 0.96, "grad_norm": 0.24499828752824235, "learning_rate": 1.7110277252379238e-07, "loss": 0.0312, "step": 4369 }, { "epoch": 0.96, "grad_norm": 0.23564291367368878, "learning_rate": 1.692506340732858e-07, "loss": 0.0234, "step": 4370 }, { "epoch": 0.96, "grad_norm": 0.2292955570735456, "learning_rate": 1.6740853208659923e-07, "loss": 0.0176, "step": 4371 }, { "epoch": 0.96, "grad_norm": 0.3094994858065757, "learning_rate": 1.655764674960292e-07, "loss": 0.0357, "step": 4372 }, { "epoch": 0.96, "grad_norm": 0.22058824064100482, "learning_rate": 1.6375444122879613e-07, "loss": 0.0266, "step": 4373 }, { "epoch": 0.96, "grad_norm": 0.24158084937870627, "learning_rate": 1.6194245420704025e-07, "loss": 0.0243, "step": 4374 }, { "epoch": 0.96, "grad_norm": 0.2617999847509731, "learning_rate": 1.6014050734781461e-07, "loss": 0.0193, "step": 4375 }, { "epoch": 0.96, "grad_norm": 0.21384703310406136, "learning_rate": 1.583486015630986e-07, "loss": 0.0278, "step": 4376 }, { "epoch": 0.96, "grad_norm": 0.2059478121015002, "learning_rate": 1.565667377597868e-07, "loss": 0.0222, "step": 4377 }, { "epoch": 0.96, "grad_norm": 0.25593105797118465, "learning_rate": 1.5479491683969117e-07, "loss": 0.0291, "step": 4378 }, { "epoch": 0.96, "grad_norm": 0.2607773603511524, "learning_rate": 1.5303313969954103e-07, "loss": 0.0239, "step": 4379 }, { "epoch": 0.96, "grad_norm": 0.18820977971594646, "learning_rate": 1.5128140723098317e-07, "loss": 0.0193, "step": 4380 }, { "epoch": 0.96, "grad_norm": 0.22737894441256667, "learning_rate": 1.4953972032057952e-07, "loss": 0.02, "step": 4381 }, { "epoch": 0.96, "grad_norm": 0.22069038084251644, "learning_rate": 1.4780807984980716e-07, "loss": 0.0256, "step": 4382 }, { "epoch": 0.96, "grad_norm": 0.2673301940370567, "learning_rate": 1.4608648669506287e-07, "loss": 0.0311, "step": 4383 }, { "epoch": 0.96, "grad_norm": 0.21062860123586485, "learning_rate": 1.443749417276541e-07, "loss": 0.0309, "step": 4384 }, { "epoch": 0.96, "grad_norm": 0.25730030158581574, "learning_rate": 1.4267344581380127e-07, "loss": 0.03, "step": 4385 }, { "epoch": 0.96, "grad_norm": 0.23933088782605375, "learning_rate": 1.4098199981464887e-07, "loss": 0.0316, "step": 4386 }, { "epoch": 0.96, "grad_norm": 0.27638496638288323, "learning_rate": 1.3930060458624106e-07, "loss": 0.0289, "step": 4387 }, { "epoch": 0.96, "grad_norm": 0.231611256723867, "learning_rate": 1.37629260979546e-07, "loss": 0.0222, "step": 4388 }, { "epoch": 0.96, "grad_norm": 0.21467912609032905, "learning_rate": 1.3596796984044037e-07, "loss": 0.0209, "step": 4389 }, { "epoch": 0.96, "grad_norm": 0.21185405602366125, "learning_rate": 1.3431673200971386e-07, "loss": 0.0229, "step": 4390 }, { "epoch": 0.96, "grad_norm": 0.31429962297338676, "learning_rate": 1.3267554832306463e-07, "loss": 0.0379, "step": 4391 }, { "epoch": 0.96, "grad_norm": 0.25666941476925725, "learning_rate": 1.310444196111127e-07, "loss": 0.0329, "step": 4392 }, { "epoch": 0.96, "grad_norm": 0.24319215788408693, "learning_rate": 1.2942334669937773e-07, "loss": 0.0285, "step": 4393 }, { "epoch": 0.97, "grad_norm": 0.22563324469404789, "learning_rate": 1.2781233040829234e-07, "loss": 0.0186, "step": 4394 }, { "epoch": 0.97, "grad_norm": 0.24097933441907754, "learning_rate": 1.2621137155320872e-07, "loss": 0.0315, "step": 4395 }, { "epoch": 0.97, "grad_norm": 0.22624021660480337, "learning_rate": 1.2462047094437657e-07, "loss": 0.0185, "step": 4396 }, { "epoch": 0.97, "grad_norm": 0.2228838276219386, "learning_rate": 1.2303962938696068e-07, "loss": 0.0244, "step": 4397 }, { "epoch": 0.97, "grad_norm": 0.24666175686507735, "learning_rate": 1.2146884768103883e-07, "loss": 0.0245, "step": 4398 }, { "epoch": 0.97, "grad_norm": 0.23184753427529117, "learning_rate": 1.1990812662158846e-07, "loss": 0.0288, "step": 4399 }, { "epoch": 0.97, "grad_norm": 0.31526595551894643, "learning_rate": 1.1835746699850215e-07, "loss": 0.0262, "step": 4400 }, { "epoch": 0.97, "grad_norm": 0.2519075219335554, "learning_rate": 1.1681686959657879e-07, "loss": 0.0254, "step": 4401 }, { "epoch": 0.97, "grad_norm": 0.2899146535678704, "learning_rate": 1.1528633519552357e-07, "loss": 0.0271, "step": 4402 }, { "epoch": 0.97, "grad_norm": 0.1967784891024214, "learning_rate": 1.1376586456994798e-07, "loss": 0.0216, "step": 4403 }, { "epoch": 0.97, "grad_norm": 0.20469461639787942, "learning_rate": 1.1225545848937203e-07, "loss": 0.0271, "step": 4404 }, { "epoch": 0.97, "grad_norm": 0.2547110347756392, "learning_rate": 1.1075511771822423e-07, "loss": 0.0289, "step": 4405 }, { "epoch": 0.97, "grad_norm": 0.24842515021044465, "learning_rate": 1.0926484301583273e-07, "loss": 0.0342, "step": 4406 }, { "epoch": 0.97, "grad_norm": 0.27344049724571035, "learning_rate": 1.0778463513643645e-07, "loss": 0.041, "step": 4407 }, { "epoch": 0.97, "grad_norm": 0.2866099288819449, "learning_rate": 1.0631449482917833e-07, "loss": 0.0293, "step": 4408 }, { "epoch": 0.97, "grad_norm": 0.2351114860967714, "learning_rate": 1.0485442283810321e-07, "loss": 0.0276, "step": 4409 }, { "epoch": 0.97, "grad_norm": 0.21697494136147136, "learning_rate": 1.0340441990216443e-07, "loss": 0.022, "step": 4410 }, { "epoch": 0.97, "grad_norm": 0.21252921688956675, "learning_rate": 1.019644867552172e-07, "loss": 0.024, "step": 4411 }, { "epoch": 0.97, "grad_norm": 0.23640651821365824, "learning_rate": 1.0053462412601855e-07, "loss": 0.0249, "step": 4412 }, { "epoch": 0.97, "grad_norm": 0.20857305245457847, "learning_rate": 9.911483273823408e-08, "loss": 0.021, "step": 4413 }, { "epoch": 0.97, "grad_norm": 0.23242865480423908, "learning_rate": 9.770511331042454e-08, "loss": 0.0347, "step": 4414 }, { "epoch": 0.97, "grad_norm": 0.2642728949258274, "learning_rate": 9.630546655606365e-08, "loss": 0.0216, "step": 4415 }, { "epoch": 0.97, "grad_norm": 0.28584540713271855, "learning_rate": 9.491589318351368e-08, "loss": 0.0381, "step": 4416 }, { "epoch": 0.97, "grad_norm": 0.19898143358756218, "learning_rate": 9.353639389605207e-08, "loss": 0.0191, "step": 4417 }, { "epoch": 0.97, "grad_norm": 0.23981266690114933, "learning_rate": 9.216696939184922e-08, "loss": 0.0248, "step": 4418 }, { "epoch": 0.97, "grad_norm": 0.2955660826219277, "learning_rate": 9.080762036398184e-08, "loss": 0.0298, "step": 4419 }, { "epoch": 0.97, "grad_norm": 0.27698898459903903, "learning_rate": 8.94583475004196e-08, "loss": 0.0337, "step": 4420 }, { "epoch": 0.97, "grad_norm": 0.2443484680475507, "learning_rate": 8.811915148404294e-08, "loss": 0.03, "step": 4421 }, { "epoch": 0.97, "grad_norm": 0.20734399888944313, "learning_rate": 8.679003299262523e-08, "loss": 0.0265, "step": 4422 }, { "epoch": 0.97, "grad_norm": 0.2711587228885377, "learning_rate": 8.547099269884396e-08, "loss": 0.029, "step": 4423 }, { "epoch": 0.97, "grad_norm": 0.22715330372892692, "learning_rate": 8.416203127026734e-08, "loss": 0.0272, "step": 4424 }, { "epoch": 0.97, "grad_norm": 0.18137627510868737, "learning_rate": 8.286314936937434e-08, "loss": 0.0207, "step": 4425 }, { "epoch": 0.97, "grad_norm": 0.25726488504265727, "learning_rate": 8.157434765353466e-08, "loss": 0.0222, "step": 4426 }, { "epoch": 0.97, "grad_norm": 0.22919501685576507, "learning_rate": 8.029562677502212e-08, "loss": 0.0216, "step": 4427 }, { "epoch": 0.97, "grad_norm": 0.22722395988706445, "learning_rate": 7.902698738099901e-08, "loss": 0.0251, "step": 4428 }, { "epoch": 0.97, "grad_norm": 0.25666707801063404, "learning_rate": 7.776843011353619e-08, "loss": 0.0306, "step": 4429 }, { "epoch": 0.97, "grad_norm": 0.26100535452765306, "learning_rate": 7.651995560959525e-08, "loss": 0.0265, "step": 4430 }, { "epoch": 0.97, "grad_norm": 0.21609393419766798, "learning_rate": 7.528156450103963e-08, "loss": 0.0225, "step": 4431 }, { "epoch": 0.97, "grad_norm": 0.21134781666393448, "learning_rate": 7.405325741462354e-08, "loss": 0.0201, "step": 4432 }, { "epoch": 0.97, "grad_norm": 0.17877245193474503, "learning_rate": 7.283503497200083e-08, "loss": 0.0137, "step": 4433 }, { "epoch": 0.97, "grad_norm": 0.22853090773120874, "learning_rate": 7.162689778972276e-08, "loss": 0.0273, "step": 4434 }, { "epoch": 0.97, "grad_norm": 0.2219781169228068, "learning_rate": 7.042884647923353e-08, "loss": 0.0245, "step": 4435 }, { "epoch": 0.97, "grad_norm": 0.21269905150098456, "learning_rate": 6.924088164687703e-08, "loss": 0.0239, "step": 4436 }, { "epoch": 0.97, "grad_norm": 0.2551449125986645, "learning_rate": 6.806300389388565e-08, "loss": 0.0367, "step": 4437 }, { "epoch": 0.97, "grad_norm": 0.24152648610438776, "learning_rate": 6.689521381639363e-08, "loss": 0.0222, "step": 4438 }, { "epoch": 0.97, "grad_norm": 0.2695999196245218, "learning_rate": 6.573751200542599e-08, "loss": 0.0194, "step": 4439 }, { "epoch": 0.98, "grad_norm": 0.18773310793839185, "learning_rate": 6.458989904690072e-08, "loss": 0.0177, "step": 4440 }, { "epoch": 0.98, "grad_norm": 0.3021901684419358, "learning_rate": 6.345237552163541e-08, "loss": 0.026, "step": 4441 }, { "epoch": 0.98, "grad_norm": 0.16928288465784194, "learning_rate": 6.232494200533623e-08, "loss": 0.0136, "step": 4442 }, { "epoch": 0.98, "grad_norm": 0.3627623677343884, "learning_rate": 6.120759906860008e-08, "loss": 0.0468, "step": 4443 }, { "epoch": 0.98, "grad_norm": 0.2693787295266967, "learning_rate": 6.010034727692792e-08, "loss": 0.0301, "step": 4444 }, { "epoch": 0.98, "grad_norm": 0.2592147780396339, "learning_rate": 5.900318719070264e-08, "loss": 0.0259, "step": 4445 }, { "epoch": 0.98, "grad_norm": 0.3031344323804535, "learning_rate": 5.791611936520447e-08, "loss": 0.0305, "step": 4446 }, { "epoch": 0.98, "grad_norm": 0.227992224592611, "learning_rate": 5.683914435060445e-08, "loss": 0.0258, "step": 4447 }, { "epoch": 0.98, "grad_norm": 0.19873245575682397, "learning_rate": 5.577226269196656e-08, "loss": 0.0151, "step": 4448 }, { "epoch": 0.98, "grad_norm": 0.2479018741222908, "learning_rate": 5.471547492924778e-08, "loss": 0.0242, "step": 4449 }, { "epoch": 0.98, "grad_norm": 0.2322767429661271, "learning_rate": 5.3668781597291386e-08, "loss": 0.0269, "step": 4450 }, { "epoch": 0.98, "grad_norm": 0.21112779581354238, "learning_rate": 5.263218322584029e-08, "loss": 0.0245, "step": 4451 }, { "epoch": 0.98, "grad_norm": 0.23287870886567938, "learning_rate": 5.160568033951929e-08, "loss": 0.0309, "step": 4452 }, { "epoch": 0.98, "grad_norm": 0.2579778516701443, "learning_rate": 5.058927345784836e-08, "loss": 0.0317, "step": 4453 }, { "epoch": 0.98, "grad_norm": 0.2629862737455073, "learning_rate": 4.9582963095238247e-08, "loss": 0.0267, "step": 4454 }, { "epoch": 0.98, "grad_norm": 0.2353377120505107, "learning_rate": 4.8586749760985987e-08, "loss": 0.0295, "step": 4455 }, { "epoch": 0.98, "grad_norm": 0.26850899047468857, "learning_rate": 4.7600633959286044e-08, "loss": 0.0267, "step": 4456 }, { "epoch": 0.98, "grad_norm": 0.25478691547878657, "learning_rate": 4.6624616189214765e-08, "loss": 0.025, "step": 4457 }, { "epoch": 0.98, "grad_norm": 0.25562566901681366, "learning_rate": 4.565869694474367e-08, "loss": 0.0298, "step": 4458 }, { "epoch": 0.98, "grad_norm": 0.2124237838973614, "learning_rate": 4.470287671472395e-08, "loss": 0.0176, "step": 4459 }, { "epoch": 0.98, "grad_norm": 0.24257266273430095, "learning_rate": 4.375715598290864e-08, "loss": 0.0304, "step": 4460 }, { "epoch": 0.98, "grad_norm": 0.2193001317922933, "learning_rate": 4.2821535227930424e-08, "loss": 0.0226, "step": 4461 }, { "epoch": 0.98, "grad_norm": 0.23471578618040925, "learning_rate": 4.1896014923310525e-08, "loss": 0.0235, "step": 4462 }, { "epoch": 0.98, "grad_norm": 0.17518535913199723, "learning_rate": 4.098059553746536e-08, "loss": 0.0186, "step": 4463 }, { "epoch": 0.98, "grad_norm": 0.22191256420057429, "learning_rate": 4.0075277533688784e-08, "loss": 0.0261, "step": 4464 }, { "epoch": 0.98, "grad_norm": 0.26132643888696494, "learning_rate": 3.918006137017205e-08, "loss": 0.0243, "step": 4465 }, { "epoch": 0.98, "grad_norm": 0.18770673401504426, "learning_rate": 3.829494749998608e-08, "loss": 0.0165, "step": 4466 }, { "epoch": 0.98, "grad_norm": 0.20540545543875058, "learning_rate": 3.7419936371094756e-08, "loss": 0.0281, "step": 4467 }, { "epoch": 0.98, "grad_norm": 0.22627322869373032, "learning_rate": 3.655502842634606e-08, "loss": 0.039, "step": 4468 }, { "epoch": 0.98, "grad_norm": 0.23842989084493904, "learning_rate": 3.570022410347651e-08, "loss": 0.0272, "step": 4469 }, { "epoch": 0.98, "grad_norm": 0.2099454474834303, "learning_rate": 3.485552383510671e-08, "loss": 0.0195, "step": 4470 }, { "epoch": 0.98, "grad_norm": 0.21160685337741267, "learning_rate": 3.402092804874357e-08, "loss": 0.0223, "step": 4471 }, { "epoch": 0.98, "grad_norm": 0.22340505571162192, "learning_rate": 3.3196437166780336e-08, "loss": 0.0287, "step": 4472 }, { "epoch": 0.98, "grad_norm": 0.19631909382666662, "learning_rate": 3.2382051606500986e-08, "loss": 0.0196, "step": 4473 }, { "epoch": 0.98, "grad_norm": 0.2376322898599393, "learning_rate": 3.1577771780066936e-08, "loss": 0.0277, "step": 4474 }, { "epoch": 0.98, "grad_norm": 0.2635491827444607, "learning_rate": 3.078359809453257e-08, "loss": 0.0348, "step": 4475 }, { "epoch": 0.98, "grad_norm": 0.18508521419488638, "learning_rate": 2.999953095182972e-08, "loss": 0.0123, "step": 4476 }, { "epoch": 0.98, "grad_norm": 0.2867455805144428, "learning_rate": 2.9225570748785402e-08, "loss": 0.0334, "step": 4477 }, { "epoch": 0.98, "grad_norm": 0.21363303896832175, "learning_rate": 2.8461717877099615e-08, "loss": 0.0207, "step": 4478 }, { "epoch": 0.98, "grad_norm": 0.21129233926357469, "learning_rate": 2.770797272336756e-08, "loss": 0.0248, "step": 4479 }, { "epoch": 0.98, "grad_norm": 0.2801433088352883, "learning_rate": 2.696433566905965e-08, "loss": 0.0351, "step": 4480 }, { "epoch": 0.98, "grad_norm": 0.2532927363554509, "learning_rate": 2.623080709054149e-08, "loss": 0.0282, "step": 4481 }, { "epoch": 0.98, "grad_norm": 0.2864289817800324, "learning_rate": 2.550738735905167e-08, "loss": 0.0324, "step": 4482 }, { "epoch": 0.98, "grad_norm": 0.2373376988185147, "learning_rate": 2.479407684071733e-08, "loss": 0.0258, "step": 4483 }, { "epoch": 0.98, "grad_norm": 0.2127057068581972, "learning_rate": 2.4090875896551903e-08, "loss": 0.0177, "step": 4484 }, { "epoch": 0.99, "grad_norm": 0.23692122823597622, "learning_rate": 2.3397784882448483e-08, "loss": 0.0299, "step": 4485 }, { "epoch": 0.99, "grad_norm": 0.20988079581968236, "learning_rate": 2.2714804149184256e-08, "loss": 0.0262, "step": 4486 }, { "epoch": 0.99, "grad_norm": 0.23558350368400716, "learning_rate": 2.2041934042420497e-08, "loss": 0.0209, "step": 4487 }, { "epoch": 0.99, "grad_norm": 0.2822744071559274, "learning_rate": 2.137917490269814e-08, "loss": 0.031, "step": 4488 }, { "epoch": 0.99, "grad_norm": 0.32218783046509364, "learning_rate": 2.0726527065448865e-08, "loss": 0.0352, "step": 4489 }, { "epoch": 0.99, "grad_norm": 0.2496145301608871, "learning_rate": 2.0083990860977343e-08, "loss": 0.033, "step": 4490 }, { "epoch": 0.99, "grad_norm": 0.2534664306167664, "learning_rate": 1.9451566614479e-08, "loss": 0.0314, "step": 4491 }, { "epoch": 0.99, "grad_norm": 0.21548093461183634, "learning_rate": 1.8829254646022256e-08, "loss": 0.0247, "step": 4492 }, { "epoch": 0.99, "grad_norm": 0.2361792103104299, "learning_rate": 1.8217055270568497e-08, "loss": 0.0211, "step": 4493 }, { "epoch": 0.99, "grad_norm": 0.2320042231582333, "learning_rate": 1.7614968797952102e-08, "loss": 0.0239, "step": 4494 }, { "epoch": 0.99, "grad_norm": 0.2202331890576311, "learning_rate": 1.702299553289377e-08, "loss": 0.0336, "step": 4495 }, { "epoch": 0.99, "grad_norm": 0.23760916560119846, "learning_rate": 1.6441135774996066e-08, "loss": 0.0317, "step": 4496 }, { "epoch": 0.99, "grad_norm": 0.29437743706311953, "learning_rate": 1.586938981873898e-08, "loss": 0.0276, "step": 4497 }, { "epoch": 0.99, "grad_norm": 0.18441166189966846, "learning_rate": 1.530775795348882e-08, "loss": 0.0143, "step": 4498 }, { "epoch": 0.99, "grad_norm": 0.23832741274571254, "learning_rate": 1.4756240463491555e-08, "loss": 0.0232, "step": 4499 }, { "epoch": 0.99, "grad_norm": 0.2247094470461646, "learning_rate": 1.421483762787057e-08, "loss": 0.0274, "step": 4500 }, { "epoch": 0.99, "grad_norm": 0.3082288685382838, "learning_rate": 1.368354972063557e-08, "loss": 0.0349, "step": 4501 }, { "epoch": 0.99, "grad_norm": 0.19854662215208568, "learning_rate": 1.3162377010673689e-08, "loss": 0.027, "step": 4502 }, { "epoch": 0.99, "grad_norm": 0.31196752721804216, "learning_rate": 1.2651319761753933e-08, "loss": 0.0464, "step": 4503 }, { "epoch": 0.99, "grad_norm": 0.30917682369252936, "learning_rate": 1.2150378232527183e-08, "loss": 0.0323, "step": 4504 }, { "epoch": 0.99, "grad_norm": 0.22311947137196755, "learning_rate": 1.1659552676519525e-08, "loss": 0.0258, "step": 4505 }, { "epoch": 0.99, "grad_norm": 0.2824374931944922, "learning_rate": 1.1178843342143363e-08, "loss": 0.03, "step": 4506 }, { "epoch": 0.99, "grad_norm": 0.2217890767832452, "learning_rate": 1.070825047268631e-08, "loss": 0.0223, "step": 4507 }, { "epoch": 0.99, "grad_norm": 0.2394927603447689, "learning_rate": 1.024777430632229e-08, "loss": 0.0262, "step": 4508 }, { "epoch": 0.99, "grad_norm": 0.30117619581870964, "learning_rate": 9.797415076095996e-09, "loss": 0.0328, "step": 4509 }, { "epoch": 0.99, "grad_norm": 0.28627625082660674, "learning_rate": 9.357173009942878e-09, "loss": 0.0275, "step": 4510 }, { "epoch": 0.99, "grad_norm": 0.20651706686452403, "learning_rate": 8.927048330666932e-09, "loss": 0.0214, "step": 4511 }, { "epoch": 0.99, "grad_norm": 0.2013347207359353, "learning_rate": 8.50704125595847e-09, "loss": 0.0292, "step": 4512 }, { "epoch": 0.99, "grad_norm": 0.24912173257265088, "learning_rate": 8.097151998387453e-09, "loss": 0.0236, "step": 4513 }, { "epoch": 0.99, "grad_norm": 0.2045073513613524, "learning_rate": 7.697380765399053e-09, "loss": 0.0181, "step": 4514 }, { "epoch": 0.99, "grad_norm": 0.2715851713271294, "learning_rate": 7.3077277593203155e-09, "loss": 0.0358, "step": 4515 }, { "epoch": 0.99, "grad_norm": 0.24233401389028633, "learning_rate": 6.928193177360154e-09, "loss": 0.0288, "step": 4516 }, { "epoch": 0.99, "grad_norm": 0.2407937244795581, "learning_rate": 6.558777211598255e-09, "loss": 0.0264, "step": 4517 }, { "epoch": 0.99, "grad_norm": 0.27028214168412523, "learning_rate": 6.199480049000617e-09, "loss": 0.0313, "step": 4518 }, { "epoch": 0.99, "grad_norm": 0.22730838559569844, "learning_rate": 5.850301871410668e-09, "loss": 0.0315, "step": 4519 }, { "epoch": 0.99, "grad_norm": 0.22956373032952396, "learning_rate": 5.51124285554927e-09, "loss": 0.0258, "step": 4520 }, { "epoch": 0.99, "grad_norm": 0.22859658331428653, "learning_rate": 5.182303173016934e-09, "loss": 0.0302, "step": 4521 }, { "epoch": 0.99, "grad_norm": 0.20019196298169717, "learning_rate": 4.8634829902893806e-09, "loss": 0.02, "step": 4522 }, { "epoch": 0.99, "grad_norm": 0.256797976281126, "learning_rate": 4.554782468726426e-09, "loss": 0.0336, "step": 4523 }, { "epoch": 0.99, "grad_norm": 0.24382401345259824, "learning_rate": 4.256201764560874e-09, "loss": 0.0276, "step": 4524 }, { "epoch": 0.99, "grad_norm": 0.26355353631491457, "learning_rate": 3.967741028907401e-09, "loss": 0.0321, "step": 4525 }, { "epoch": 0.99, "grad_norm": 0.236170992355893, "learning_rate": 3.6894004077558942e-09, "loss": 0.0261, "step": 4526 }, { "epoch": 0.99, "grad_norm": 0.21118369570046402, "learning_rate": 3.421180041980332e-09, "loss": 0.026, "step": 4527 }, { "epoch": 0.99, "grad_norm": 0.215920213733391, "learning_rate": 3.1630800673254636e-09, "loss": 0.0233, "step": 4528 }, { "epoch": 0.99, "grad_norm": 0.341207246372847, "learning_rate": 2.9151006144201298e-09, "loss": 0.0292, "step": 4529 }, { "epoch": 0.99, "grad_norm": 0.27762589408390465, "learning_rate": 2.6772418087639417e-09, "loss": 0.0251, "step": 4530 }, { "epoch": 1.0, "grad_norm": 0.19320572422128685, "learning_rate": 2.4495037707428226e-09, "loss": 0.0163, "step": 4531 }, { "epoch": 1.0, "grad_norm": 0.26428603242243764, "learning_rate": 2.2318866156134654e-09, "loss": 0.0248, "step": 4532 }, { "epoch": 1.0, "grad_norm": 0.33253742231504263, "learning_rate": 2.0243904535144353e-09, "loss": 0.0453, "step": 4533 }, { "epoch": 1.0, "grad_norm": 0.22122698196730783, "learning_rate": 1.8270153894617282e-09, "loss": 0.0256, "step": 4534 }, { "epoch": 1.0, "grad_norm": 0.23089916641515254, "learning_rate": 1.6397615233465503e-09, "loss": 0.0264, "step": 4535 }, { "epoch": 1.0, "grad_norm": 0.22087355590784324, "learning_rate": 1.4626289499397596e-09, "loss": 0.0225, "step": 4536 }, { "epoch": 1.0, "grad_norm": 0.24501147023180128, "learning_rate": 1.2956177588896445e-09, "loss": 0.0271, "step": 4537 }, { "epoch": 1.0, "grad_norm": 0.23313726392912676, "learning_rate": 1.138728034719705e-09, "loss": 0.0281, "step": 4538 }, { "epoch": 1.0, "grad_norm": 0.24477027569640544, "learning_rate": 9.919598568353118e-10, "loss": 0.0282, "step": 4539 }, { "epoch": 1.0, "grad_norm": 0.29386165607098685, "learning_rate": 8.553132995170466e-10, "loss": 0.0264, "step": 4540 }, { "epoch": 1.0, "grad_norm": 0.36618149702624436, "learning_rate": 7.287884319184813e-10, "loss": 0.0326, "step": 4541 }, { "epoch": 1.0, "grad_norm": 0.28540606856102807, "learning_rate": 6.123853180795003e-10, "loss": 0.0362, "step": 4542 }, { "epoch": 1.0, "grad_norm": 0.20529983887905334, "learning_rate": 5.061040169107578e-10, "loss": 0.0167, "step": 4543 }, { "epoch": 1.0, "grad_norm": 0.2795280753393063, "learning_rate": 4.0994458220033896e-10, "loss": 0.0294, "step": 4544 }, { "epoch": 1.0, "grad_norm": 0.2661258659536841, "learning_rate": 3.2390706261598015e-10, "loss": 0.0309, "step": 4545 }, { "epoch": 1.0, "grad_norm": 0.2145268096519866, "learning_rate": 2.479915017028489e-10, "loss": 0.0283, "step": 4546 }, { "epoch": 1.0, "grad_norm": 0.18357432039405988, "learning_rate": 1.8219793788132322e-10, "loss": 0.0201, "step": 4547 }, { "epoch": 1.0, "grad_norm": 0.2166040353315691, "learning_rate": 1.265264044514325e-10, "loss": 0.0244, "step": 4548 }, { "epoch": 1.0, "grad_norm": 0.23382594647015617, "learning_rate": 8.097692958619619e-11, "loss": 0.0268, "step": 4549 }, { "epoch": 1.0, "grad_norm": 0.24069606796661666, "learning_rate": 4.5549536340505627e-11, "loss": 0.0201, "step": 4550 }, { "epoch": 1.0, "grad_norm": 0.22354764024397278, "learning_rate": 2.024424264224223e-11, "loss": 0.0247, "step": 4551 }, { "epoch": 1.0, "grad_norm": 0.22004441054170246, "learning_rate": 5.061061301159242e-12, "loss": 0.0244, "step": 4552 }, { "epoch": 1.0, "grad_norm": 0.27832210607118085, "learning_rate": 0.0, "loss": 0.0444, "step": 4553 }, { "epoch": 1.0, "step": 4553, "total_flos": 1.7101442093126403e+22, "train_loss": 0.1402213812993695, "train_runtime": 25492.8985, "train_samples_per_second": 11.43, "train_steps_per_second": 0.179 } ], "logging_steps": 1.0, "max_steps": 4553, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 1.7101442093126403e+22, "train_batch_size": 2, "trial_name": null, "trial_params": null }