{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999521645539344, "eval_steps": 2613, "global_step": 10452, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.240769189720181, "learning_rate": 1.9138755980861247e-07, "loss": 1.9865, "step": 1 }, { "epoch": 0.0, "eval_loss": 2.280837297439575, "eval_runtime": 4185.9784, "eval_samples_per_second": 19.976, "eval_steps_per_second": 2.497, "step": 1 }, { "epoch": 0.0, "grad_norm": 3.223090418187222, "learning_rate": 3.8277511961722493e-07, "loss": 1.9703, "step": 2 }, { "epoch": 0.0, "grad_norm": 3.6145046678806407, "learning_rate": 5.741626794258373e-07, "loss": 2.1381, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.6296311605868885, "learning_rate": 7.655502392344499e-07, "loss": 2.456, "step": 4 }, { "epoch": 0.0, "grad_norm": 3.8401650197500428, "learning_rate": 9.569377990430622e-07, "loss": 2.1077, "step": 5 }, { "epoch": 0.0, "grad_norm": 3.683890691252437, "learning_rate": 1.1483253588516746e-06, "loss": 2.1413, "step": 6 }, { "epoch": 0.0, "grad_norm": 3.6432534368591782, "learning_rate": 1.339712918660287e-06, "loss": 2.1568, "step": 7 }, { "epoch": 0.0, "grad_norm": 4.663365277527622, "learning_rate": 1.5311004784688997e-06, "loss": 2.3463, "step": 8 }, { "epoch": 0.0, "grad_norm": 3.6372978147069697, "learning_rate": 1.722488038277512e-06, "loss": 2.1712, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.852872878211269, "learning_rate": 1.9138755980861244e-06, "loss": 2.2722, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.6549109607459043, "learning_rate": 2.105263157894737e-06, "loss": 1.9729, "step": 11 }, { "epoch": 0.0, "grad_norm": 3.2318002906734535, "learning_rate": 2.2966507177033493e-06, "loss": 2.0595, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.6991822490019413, "learning_rate": 2.488038277511962e-06, "loss": 2.0928, "step": 13 }, { "epoch": 0.0, "grad_norm": 4.159355481440864, "learning_rate": 2.679425837320574e-06, "loss": 2.1411, "step": 14 }, { "epoch": 0.0, "grad_norm": 4.061437005322893, "learning_rate": 2.870813397129187e-06, "loss": 2.3334, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.7145200953671766, "learning_rate": 3.0622009569377995e-06, "loss": 1.9779, "step": 16 }, { "epoch": 0.0, "grad_norm": 4.3951391847417565, "learning_rate": 3.2535885167464113e-06, "loss": 2.24, "step": 17 }, { "epoch": 0.0, "grad_norm": 4.225335307764433, "learning_rate": 3.444976076555024e-06, "loss": 2.2625, "step": 18 }, { "epoch": 0.0, "grad_norm": 4.08014522806263, "learning_rate": 3.636363636363636e-06, "loss": 2.1072, "step": 19 }, { "epoch": 0.0, "grad_norm": 4.750695114221436, "learning_rate": 3.827751196172249e-06, "loss": 2.2998, "step": 20 }, { "epoch": 0.0, "grad_norm": 4.576295112434383, "learning_rate": 4.019138755980861e-06, "loss": 2.1414, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.5573739807898805, "learning_rate": 4.210526315789474e-06, "loss": 2.3373, "step": 22 }, { "epoch": 0.0, "grad_norm": 4.369397245889477, "learning_rate": 4.401913875598086e-06, "loss": 2.1658, "step": 23 }, { "epoch": 0.0, "grad_norm": 5.28624090773153, "learning_rate": 4.5933014354066986e-06, "loss": 2.2938, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.9534751396409655, "learning_rate": 4.784688995215311e-06, "loss": 2.2733, "step": 25 }, { "epoch": 0.0, "grad_norm": 5.381089984082533, "learning_rate": 4.976076555023924e-06, "loss": 2.2244, "step": 26 }, { "epoch": 0.0, "grad_norm": 6.226111694547378, "learning_rate": 5.167464114832536e-06, "loss": 2.2223, "step": 27 }, { "epoch": 0.0, "grad_norm": 3.5666909272147245, "learning_rate": 5.358851674641148e-06, "loss": 1.7762, "step": 28 }, { "epoch": 0.0, "grad_norm": 5.398756537107184, "learning_rate": 5.550239234449761e-06, "loss": 2.4073, "step": 29 }, { "epoch": 0.0, "grad_norm": 4.315805192926013, "learning_rate": 5.741626794258374e-06, "loss": 2.1847, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.532277066830244, "learning_rate": 5.933014354066986e-06, "loss": 2.2245, "step": 31 }, { "epoch": 0.0, "grad_norm": 3.413171467566141, "learning_rate": 6.124401913875599e-06, "loss": 2.0755, "step": 32 }, { "epoch": 0.0, "grad_norm": 3.2310871639043746, "learning_rate": 6.315789473684211e-06, "loss": 2.2345, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.3848849049261207, "learning_rate": 6.5071770334928226e-06, "loss": 2.0588, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.0082260641673706, "learning_rate": 6.698564593301436e-06, "loss": 2.1575, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.45419363377176, "learning_rate": 6.889952153110048e-06, "loss": 2.2532, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.316572782291838, "learning_rate": 7.081339712918661e-06, "loss": 1.9725, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.2302956707592998, "learning_rate": 7.272727272727272e-06, "loss": 2.2166, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.2417025568909201, "learning_rate": 7.464114832535886e-06, "loss": 2.2785, "step": 39 }, { "epoch": 0.0, "grad_norm": 1.0623452341947435, "learning_rate": 7.655502392344498e-06, "loss": 2.1188, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.9459508227302174, "learning_rate": 7.846889952153112e-06, "loss": 2.1225, "step": 41 }, { "epoch": 0.0, "grad_norm": 0.9821381236389229, "learning_rate": 8.038277511961722e-06, "loss": 1.9644, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.8227720071849114, "learning_rate": 8.229665071770336e-06, "loss": 1.8818, "step": 43 }, { "epoch": 0.0, "grad_norm": 0.9772589694714745, "learning_rate": 8.421052631578948e-06, "loss": 2.1738, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.8288391194584817, "learning_rate": 8.61244019138756e-06, "loss": 2.0598, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.7365091983688079, "learning_rate": 8.803827751196173e-06, "loss": 2.0051, "step": 46 }, { "epoch": 0.0, "grad_norm": 0.8202389292532828, "learning_rate": 8.995215311004785e-06, "loss": 2.0005, "step": 47 }, { "epoch": 0.0, "grad_norm": 0.7517874679840845, "learning_rate": 9.186602870813397e-06, "loss": 1.9756, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.8041110653388241, "learning_rate": 9.377990430622011e-06, "loss": 2.1019, "step": 49 }, { "epoch": 0.0, "grad_norm": 0.7364867668939584, "learning_rate": 9.569377990430622e-06, "loss": 1.8784, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.5933286532299948, "learning_rate": 9.760765550239234e-06, "loss": 1.743, "step": 51 }, { "epoch": 0.0, "grad_norm": 0.792868413879173, "learning_rate": 9.952153110047848e-06, "loss": 1.8373, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.8312221240251918, "learning_rate": 1.014354066985646e-05, "loss": 1.9098, "step": 53 }, { "epoch": 0.01, "grad_norm": 0.8539046550616419, "learning_rate": 1.0334928229665072e-05, "loss": 2.0752, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.7680076367144099, "learning_rate": 1.0526315789473684e-05, "loss": 2.0076, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.6386760911858004, "learning_rate": 1.0717703349282297e-05, "loss": 1.8095, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.7287884197298926, "learning_rate": 1.0909090909090909e-05, "loss": 1.9806, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.7722026786294885, "learning_rate": 1.1100478468899523e-05, "loss": 1.9635, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.7231409481435274, "learning_rate": 1.1291866028708133e-05, "loss": 1.8613, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.7824727930965788, "learning_rate": 1.1483253588516747e-05, "loss": 2.1725, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.6677563917530093, "learning_rate": 1.167464114832536e-05, "loss": 1.9352, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.7009328416411565, "learning_rate": 1.1866028708133972e-05, "loss": 1.8884, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.9109348341862854, "learning_rate": 1.2057416267942584e-05, "loss": 2.3183, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.8411347289624691, "learning_rate": 1.2248803827751198e-05, "loss": 1.8075, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.7674394625955453, "learning_rate": 1.2440191387559808e-05, "loss": 1.9963, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.7848289715014719, "learning_rate": 1.2631578947368422e-05, "loss": 1.9276, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.7238409990110771, "learning_rate": 1.2822966507177035e-05, "loss": 1.9353, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.8235100929493389, "learning_rate": 1.3014354066985645e-05, "loss": 1.8938, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.6283052176051154, "learning_rate": 1.320574162679426e-05, "loss": 1.8028, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.654778519712797, "learning_rate": 1.3397129186602871e-05, "loss": 1.6469, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.7728600382414659, "learning_rate": 1.3588516746411483e-05, "loss": 1.7839, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.9160985506203067, "learning_rate": 1.3779904306220096e-05, "loss": 1.8154, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.8592513182940328, "learning_rate": 1.397129186602871e-05, "loss": 1.8178, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.0716389160468884, "learning_rate": 1.4162679425837322e-05, "loss": 1.9752, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.0526260733114412, "learning_rate": 1.4354066985645934e-05, "loss": 1.8998, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.9676822741503439, "learning_rate": 1.4545454545454545e-05, "loss": 1.7514, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.2545561036432802, "learning_rate": 1.4736842105263157e-05, "loss": 1.8165, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.834675739891747, "learning_rate": 1.4928229665071772e-05, "loss": 1.8987, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.25568745378183, "learning_rate": 1.5119617224880383e-05, "loss": 1.8836, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.8547248072580093, "learning_rate": 1.5311004784688995e-05, "loss": 1.625, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.9130624521617219, "learning_rate": 1.5502392344497607e-05, "loss": 1.65, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.4571618225829908, "learning_rate": 1.5693779904306223e-05, "loss": 1.7592, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.5681812827811266, "learning_rate": 1.5885167464114832e-05, "loss": 1.7274, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.7707101692645586, "learning_rate": 1.6076555023923444e-05, "loss": 1.7496, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.4920166991759014, "learning_rate": 1.6267942583732056e-05, "loss": 1.815, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.765151570274179, "learning_rate": 1.6459330143540672e-05, "loss": 1.6131, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.7780322419019302, "learning_rate": 1.6650717703349284e-05, "loss": 1.6553, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.6971849026842456, "learning_rate": 1.6842105263157896e-05, "loss": 1.6212, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.6737627863204966, "learning_rate": 1.7033492822966505e-05, "loss": 1.6319, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.7581487338796276, "learning_rate": 1.722488038277512e-05, "loss": 1.5457, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.6884712501693219, "learning_rate": 1.7416267942583733e-05, "loss": 1.682, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.6472187205118166, "learning_rate": 1.7607655502392345e-05, "loss": 1.7893, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.5392317136449472, "learning_rate": 1.7799043062200958e-05, "loss": 1.5724, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.613083864173068, "learning_rate": 1.799043062200957e-05, "loss": 1.5933, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.554854239010525, "learning_rate": 1.8181818181818182e-05, "loss": 1.4444, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.5895819501317506, "learning_rate": 1.8373205741626794e-05, "loss": 1.463, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.6834369586439761, "learning_rate": 1.8564593301435407e-05, "loss": 1.6334, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.5941595870489239, "learning_rate": 1.8755980861244022e-05, "loss": 1.6881, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.6765509171345243, "learning_rate": 1.8947368421052634e-05, "loss": 1.5896, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.5523045136213458, "learning_rate": 1.9138755980861243e-05, "loss": 1.4551, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.6537194645181346, "learning_rate": 1.9330143540669855e-05, "loss": 1.5513, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.5087498837023968, "learning_rate": 1.9521531100478468e-05, "loss": 1.4715, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.6335719742406299, "learning_rate": 1.9712918660287083e-05, "loss": 1.5313, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.647032541641365, "learning_rate": 1.9904306220095696e-05, "loss": 1.4794, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.5641054209541718, "learning_rate": 2.0095693779904308e-05, "loss": 1.5301, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.623411201252464, "learning_rate": 2.028708133971292e-05, "loss": 1.4618, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.6469566283028377, "learning_rate": 2.0478468899521532e-05, "loss": 1.5438, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.5328026134772941, "learning_rate": 2.0669856459330144e-05, "loss": 1.4587, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.5404945621124031, "learning_rate": 2.0861244019138757e-05, "loss": 1.3749, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.6198671572468233, "learning_rate": 2.105263157894737e-05, "loss": 1.5662, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.5344071404450242, "learning_rate": 2.1244019138755985e-05, "loss": 1.6258, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.5077736180550365, "learning_rate": 2.1435406698564593e-05, "loss": 1.4085, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.5916690974950262, "learning_rate": 2.1626794258373206e-05, "loss": 1.4315, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.4799015981708095, "learning_rate": 2.1818181818181818e-05, "loss": 1.4183, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.4571232168458159, "learning_rate": 2.2009569377990433e-05, "loss": 1.3658, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.4917591648756044, "learning_rate": 2.2200956937799046e-05, "loss": 1.3724, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.4170683829952185, "learning_rate": 2.2392344497607658e-05, "loss": 1.4197, "step": 117 }, { "epoch": 0.01, "grad_norm": 0.49893891430948417, "learning_rate": 2.2583732057416267e-05, "loss": 1.4005, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.4056789281529712, "learning_rate": 2.2775119617224882e-05, "loss": 1.4419, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.47274513283719816, "learning_rate": 2.2966507177033495e-05, "loss": 1.3163, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.4495315355981231, "learning_rate": 2.3157894736842107e-05, "loss": 1.3599, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.4142057365520808, "learning_rate": 2.334928229665072e-05, "loss": 1.4484, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.4421864791497608, "learning_rate": 2.354066985645933e-05, "loss": 1.3629, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.44850267009277406, "learning_rate": 2.3732057416267943e-05, "loss": 1.4036, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.49021107734394687, "learning_rate": 2.3923444976076556e-05, "loss": 1.3724, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.47738353199872885, "learning_rate": 2.4114832535885168e-05, "loss": 1.3304, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.4704616937001858, "learning_rate": 2.4306220095693784e-05, "loss": 1.376, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.4351806446488345, "learning_rate": 2.4497607655502396e-05, "loss": 1.3438, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.5275547600152736, "learning_rate": 2.4688995215311005e-05, "loss": 1.2731, "step": 129 }, { "epoch": 0.01, "grad_norm": 0.3787982803600526, "learning_rate": 2.4880382775119617e-05, "loss": 1.3392, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.3850735556535156, "learning_rate": 2.507177033492823e-05, "loss": 1.2673, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.4339128757111585, "learning_rate": 2.5263157894736845e-05, "loss": 1.3284, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.39005187564392557, "learning_rate": 2.5454545454545454e-05, "loss": 1.3347, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.35602922226403455, "learning_rate": 2.564593301435407e-05, "loss": 1.2362, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.40601160369590406, "learning_rate": 2.583732057416268e-05, "loss": 1.3591, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.4408392546545607, "learning_rate": 2.602870813397129e-05, "loss": 1.2837, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.46362552165830334, "learning_rate": 2.6220095693779906e-05, "loss": 1.316, "step": 137 }, { "epoch": 0.01, "grad_norm": 0.3504892333833388, "learning_rate": 2.641148325358852e-05, "loss": 1.1857, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.4362884443543931, "learning_rate": 2.660287081339713e-05, "loss": 1.2435, "step": 139 }, { "epoch": 0.01, "grad_norm": 0.50850093231113, "learning_rate": 2.6794258373205743e-05, "loss": 1.3235, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.4187742784108479, "learning_rate": 2.698564593301435e-05, "loss": 1.3693, "step": 141 }, { "epoch": 0.01, "grad_norm": 0.44107080428520234, "learning_rate": 2.7177033492822967e-05, "loss": 1.202, "step": 142 }, { "epoch": 0.01, "grad_norm": 0.42167608964495673, "learning_rate": 2.7368421052631583e-05, "loss": 1.2693, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.4269486411389392, "learning_rate": 2.755980861244019e-05, "loss": 1.1825, "step": 144 }, { "epoch": 0.01, "grad_norm": 0.4220464029961541, "learning_rate": 2.7751196172248807e-05, "loss": 1.3205, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.4620642882432513, "learning_rate": 2.794258373205742e-05, "loss": 1.2891, "step": 146 }, { "epoch": 0.01, "grad_norm": 0.43743294942437483, "learning_rate": 2.8133971291866028e-05, "loss": 1.1969, "step": 147 }, { "epoch": 0.01, "grad_norm": 0.4414019243397515, "learning_rate": 2.8325358851674644e-05, "loss": 1.1672, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.1295244665123245, "learning_rate": 2.8516746411483253e-05, "loss": 1.3175, "step": 149 }, { "epoch": 0.01, "grad_norm": 0.5055922001428373, "learning_rate": 2.8708133971291868e-05, "loss": 1.2696, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.48319661000499564, "learning_rate": 2.889952153110048e-05, "loss": 1.3168, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.5720466946520338, "learning_rate": 2.909090909090909e-05, "loss": 1.2982, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.68753321360197, "learning_rate": 2.9282296650717705e-05, "loss": 1.2791, "step": 153 }, { "epoch": 0.01, "grad_norm": 0.6221880802372888, "learning_rate": 2.9473684210526314e-05, "loss": 1.2647, "step": 154 }, { "epoch": 0.01, "grad_norm": 0.5504261765445813, "learning_rate": 2.966507177033493e-05, "loss": 1.3102, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.6618113787746298, "learning_rate": 2.9856459330143545e-05, "loss": 1.292, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.6002633450221915, "learning_rate": 3.0047846889952154e-05, "loss": 1.33, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.7118508153621136, "learning_rate": 3.0239234449760766e-05, "loss": 1.2062, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.6591754857765905, "learning_rate": 3.043062200956938e-05, "loss": 1.2539, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.4714338006090391, "learning_rate": 3.062200956937799e-05, "loss": 1.2384, "step": 160 }, { "epoch": 0.02, "grad_norm": 1.0300987484894157, "learning_rate": 3.08133971291866e-05, "loss": 1.2171, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.6469298394859133, "learning_rate": 3.1004784688995215e-05, "loss": 1.0836, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.5601908591127592, "learning_rate": 3.119617224880383e-05, "loss": 1.1867, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.7363177930575833, "learning_rate": 3.1387559808612446e-05, "loss": 1.3128, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.7107903409156414, "learning_rate": 3.157894736842105e-05, "loss": 1.2501, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.464636577316954, "learning_rate": 3.1770334928229664e-05, "loss": 1.1671, "step": 166 }, { "epoch": 0.02, "grad_norm": 0.8259444575108333, "learning_rate": 3.196172248803828e-05, "loss": 1.2928, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.7945572957938246, "learning_rate": 3.215311004784689e-05, "loss": 1.2516, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.5895739273419747, "learning_rate": 3.234449760765551e-05, "loss": 1.162, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.6927568627921579, "learning_rate": 3.253588516746411e-05, "loss": 1.2802, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.639241808296525, "learning_rate": 3.272727272727273e-05, "loss": 1.1999, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.8979192020036151, "learning_rate": 3.2918660287081344e-05, "loss": 1.1854, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.5314730657868201, "learning_rate": 3.311004784688995e-05, "loss": 1.2365, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.3835011610660225, "learning_rate": 3.330143540669857e-05, "loss": 1.3014, "step": 174 }, { "epoch": 0.02, "grad_norm": 0.46600481942740146, "learning_rate": 3.349282296650718e-05, "loss": 1.2222, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.568331869349057, "learning_rate": 3.368421052631579e-05, "loss": 1.2219, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.5161782745938108, "learning_rate": 3.3875598086124405e-05, "loss": 1.0828, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.5678907934059946, "learning_rate": 3.406698564593301e-05, "loss": 1.241, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.40729089487435055, "learning_rate": 3.425837320574163e-05, "loss": 1.2201, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.5273474366443621, "learning_rate": 3.444976076555024e-05, "loss": 1.1262, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.4807276904108668, "learning_rate": 3.4641148325358854e-05, "loss": 1.1957, "step": 181 }, { "epoch": 0.02, "grad_norm": 0.5372452514446321, "learning_rate": 3.4832535885167466e-05, "loss": 1.1621, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.6605408366116067, "learning_rate": 3.502392344497608e-05, "loss": 1.2327, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.5555779108688886, "learning_rate": 3.521531100478469e-05, "loss": 1.1938, "step": 184 }, { "epoch": 0.02, "grad_norm": 0.5517608446497938, "learning_rate": 3.54066985645933e-05, "loss": 1.2651, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.4524359827250868, "learning_rate": 3.5598086124401915e-05, "loss": 1.2182, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.4495056117139968, "learning_rate": 3.578947368421053e-05, "loss": 1.2133, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.43420633627744476, "learning_rate": 3.598086124401914e-05, "loss": 1.078, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.4991578407930919, "learning_rate": 3.617224880382775e-05, "loss": 1.0683, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.46736895171034254, "learning_rate": 3.6363636363636364e-05, "loss": 1.1533, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.48830760264155376, "learning_rate": 3.6555023923444976e-05, "loss": 1.2539, "step": 191 }, { "epoch": 0.02, "grad_norm": 0.5148588474362115, "learning_rate": 3.674641148325359e-05, "loss": 1.0486, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.4669961130515612, "learning_rate": 3.693779904306221e-05, "loss": 1.1949, "step": 193 }, { "epoch": 0.02, "grad_norm": 0.32879504352969363, "learning_rate": 3.712918660287081e-05, "loss": 1.189, "step": 194 }, { "epoch": 0.02, "grad_norm": 0.35633398240141656, "learning_rate": 3.7320574162679425e-05, "loss": 1.2112, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.42378382756263683, "learning_rate": 3.7511961722488044e-05, "loss": 1.1999, "step": 196 }, { "epoch": 0.02, "grad_norm": 0.41483450868577093, "learning_rate": 3.770334928229665e-05, "loss": 1.1283, "step": 197 }, { "epoch": 0.02, "grad_norm": 0.4603179290827383, "learning_rate": 3.789473684210527e-05, "loss": 1.2353, "step": 198 }, { "epoch": 0.02, "grad_norm": 0.5574543274654291, "learning_rate": 3.8086124401913874e-05, "loss": 1.1948, "step": 199 }, { "epoch": 0.02, "grad_norm": 0.6846239185739854, "learning_rate": 3.8277511961722486e-05, "loss": 1.3231, "step": 200 }, { "epoch": 0.02, "grad_norm": 0.520683870817622, "learning_rate": 3.8468899521531105e-05, "loss": 1.2201, "step": 201 }, { "epoch": 0.02, "grad_norm": 0.42683935885681074, "learning_rate": 3.866028708133971e-05, "loss": 1.165, "step": 202 }, { "epoch": 0.02, "grad_norm": 0.39470003294526096, "learning_rate": 3.885167464114833e-05, "loss": 1.1994, "step": 203 }, { "epoch": 0.02, "grad_norm": 0.4391753396869876, "learning_rate": 3.9043062200956935e-05, "loss": 1.1813, "step": 204 }, { "epoch": 0.02, "grad_norm": 0.4449276816002856, "learning_rate": 3.9234449760765554e-05, "loss": 1.1764, "step": 205 }, { "epoch": 0.02, "grad_norm": 0.45472581470602813, "learning_rate": 3.9425837320574167e-05, "loss": 1.1766, "step": 206 }, { "epoch": 0.02, "grad_norm": 0.37367376899076454, "learning_rate": 3.961722488038277e-05, "loss": 1.0581, "step": 207 }, { "epoch": 0.02, "grad_norm": 0.5183447282360463, "learning_rate": 3.980861244019139e-05, "loss": 1.1351, "step": 208 }, { "epoch": 0.02, "grad_norm": 0.5895054254055697, "learning_rate": 4e-05, "loss": 1.2017, "step": 209 }, { "epoch": 0.02, "grad_norm": 0.39968348371397444, "learning_rate": 4.0191387559808616e-05, "loss": 1.2353, "step": 210 }, { "epoch": 0.02, "grad_norm": 0.4089518582688394, "learning_rate": 4.038277511961723e-05, "loss": 1.2072, "step": 211 }, { "epoch": 0.02, "grad_norm": 0.5490805672572786, "learning_rate": 4.057416267942584e-05, "loss": 1.2593, "step": 212 }, { "epoch": 0.02, "grad_norm": 0.42397521041394676, "learning_rate": 4.076555023923445e-05, "loss": 1.155, "step": 213 }, { "epoch": 0.02, "grad_norm": 0.4981026360180907, "learning_rate": 4.0956937799043064e-05, "loss": 1.2279, "step": 214 }, { "epoch": 0.02, "grad_norm": 0.503200310794026, "learning_rate": 4.114832535885168e-05, "loss": 1.1134, "step": 215 }, { "epoch": 0.02, "grad_norm": 0.5127306365983669, "learning_rate": 4.133971291866029e-05, "loss": 1.1993, "step": 216 }, { "epoch": 0.02, "grad_norm": 0.4935452094233486, "learning_rate": 4.15311004784689e-05, "loss": 1.1962, "step": 217 }, { "epoch": 0.02, "grad_norm": 0.38834813884933866, "learning_rate": 4.172248803827751e-05, "loss": 1.2529, "step": 218 }, { "epoch": 0.02, "grad_norm": 0.4742710182667002, "learning_rate": 4.1913875598086126e-05, "loss": 1.1861, "step": 219 }, { "epoch": 0.02, "grad_norm": 0.4762816225973413, "learning_rate": 4.210526315789474e-05, "loss": 1.2221, "step": 220 }, { "epoch": 0.02, "grad_norm": 0.5405607329270613, "learning_rate": 4.229665071770335e-05, "loss": 1.3099, "step": 221 }, { "epoch": 0.02, "grad_norm": 0.38474621695351074, "learning_rate": 4.248803827751197e-05, "loss": 1.1669, "step": 222 }, { "epoch": 0.02, "grad_norm": 0.47522706488556143, "learning_rate": 4.2679425837320574e-05, "loss": 1.1813, "step": 223 }, { "epoch": 0.02, "grad_norm": 0.43816325603257117, "learning_rate": 4.287081339712919e-05, "loss": 1.1912, "step": 224 }, { "epoch": 0.02, "grad_norm": 0.47579537819240086, "learning_rate": 4.3062200956937806e-05, "loss": 1.165, "step": 225 }, { "epoch": 0.02, "grad_norm": 0.5148567080870619, "learning_rate": 4.325358851674641e-05, "loss": 1.1997, "step": 226 }, { "epoch": 0.02, "grad_norm": 0.4019413785534228, "learning_rate": 4.344497607655503e-05, "loss": 1.2448, "step": 227 }, { "epoch": 0.02, "grad_norm": 0.4766231281692962, "learning_rate": 4.3636363636363636e-05, "loss": 1.1374, "step": 228 }, { "epoch": 0.02, "grad_norm": 0.4375975135050194, "learning_rate": 4.382775119617225e-05, "loss": 1.2484, "step": 229 }, { "epoch": 0.02, "grad_norm": 0.4312959866778293, "learning_rate": 4.401913875598087e-05, "loss": 1.1143, "step": 230 }, { "epoch": 0.02, "grad_norm": 0.4329834158025715, "learning_rate": 4.421052631578947e-05, "loss": 1.2335, "step": 231 }, { "epoch": 0.02, "grad_norm": 0.37836691609878936, "learning_rate": 4.440191387559809e-05, "loss": 1.118, "step": 232 }, { "epoch": 0.02, "grad_norm": 0.37526619539689987, "learning_rate": 4.45933014354067e-05, "loss": 1.2968, "step": 233 }, { "epoch": 0.02, "grad_norm": 0.4175158591003783, "learning_rate": 4.4784688995215316e-05, "loss": 1.1954, "step": 234 }, { "epoch": 0.02, "grad_norm": 0.37094222885704037, "learning_rate": 4.497607655502393e-05, "loss": 1.0976, "step": 235 }, { "epoch": 0.02, "grad_norm": 0.4586856505320375, "learning_rate": 4.5167464114832533e-05, "loss": 1.1628, "step": 236 }, { "epoch": 0.02, "grad_norm": 0.44587593738328374, "learning_rate": 4.535885167464115e-05, "loss": 1.1917, "step": 237 }, { "epoch": 0.02, "grad_norm": 0.3594860823761438, "learning_rate": 4.5550239234449765e-05, "loss": 1.192, "step": 238 }, { "epoch": 0.02, "grad_norm": 0.38283214874386773, "learning_rate": 4.574162679425838e-05, "loss": 1.2205, "step": 239 }, { "epoch": 0.02, "grad_norm": 0.33492904991363276, "learning_rate": 4.593301435406699e-05, "loss": 1.2155, "step": 240 }, { "epoch": 0.02, "grad_norm": 0.4113511012996176, "learning_rate": 4.6124401913875595e-05, "loss": 1.2222, "step": 241 }, { "epoch": 0.02, "grad_norm": 0.4337572841707065, "learning_rate": 4.6315789473684214e-05, "loss": 1.0193, "step": 242 }, { "epoch": 0.02, "grad_norm": 0.34650969754183997, "learning_rate": 4.6507177033492826e-05, "loss": 1.2571, "step": 243 }, { "epoch": 0.02, "grad_norm": 0.4758935405313615, "learning_rate": 4.669856459330144e-05, "loss": 1.1967, "step": 244 }, { "epoch": 0.02, "grad_norm": 0.39684085109310996, "learning_rate": 4.688995215311005e-05, "loss": 1.1169, "step": 245 }, { "epoch": 0.02, "grad_norm": 0.4823470965603195, "learning_rate": 4.708133971291866e-05, "loss": 1.2347, "step": 246 }, { "epoch": 0.02, "grad_norm": 0.37717028125791024, "learning_rate": 4.7272727272727275e-05, "loss": 1.094, "step": 247 }, { "epoch": 0.02, "grad_norm": 0.3667379872819918, "learning_rate": 4.746411483253589e-05, "loss": 1.2391, "step": 248 }, { "epoch": 0.02, "grad_norm": 0.3407975972338634, "learning_rate": 4.76555023923445e-05, "loss": 1.0632, "step": 249 }, { "epoch": 0.02, "grad_norm": 0.49238987132507245, "learning_rate": 4.784688995215311e-05, "loss": 1.2121, "step": 250 }, { "epoch": 0.02, "grad_norm": 0.46706882037804415, "learning_rate": 4.8038277511961724e-05, "loss": 1.2437, "step": 251 }, { "epoch": 0.02, "grad_norm": 0.4088473587226235, "learning_rate": 4.8229665071770336e-05, "loss": 1.1572, "step": 252 }, { "epoch": 0.02, "grad_norm": 0.3821462444529073, "learning_rate": 4.842105263157895e-05, "loss": 1.1603, "step": 253 }, { "epoch": 0.02, "grad_norm": 0.4782990514183433, "learning_rate": 4.861244019138757e-05, "loss": 1.2409, "step": 254 }, { "epoch": 0.02, "grad_norm": 0.45087555655452305, "learning_rate": 4.880382775119617e-05, "loss": 1.1556, "step": 255 }, { "epoch": 0.02, "grad_norm": 0.3865676678179632, "learning_rate": 4.899521531100479e-05, "loss": 1.1945, "step": 256 }, { "epoch": 0.02, "grad_norm": 0.4099982787948705, "learning_rate": 4.91866028708134e-05, "loss": 1.17, "step": 257 }, { "epoch": 0.02, "grad_norm": 0.46016977600480446, "learning_rate": 4.937799043062201e-05, "loss": 1.2024, "step": 258 }, { "epoch": 0.02, "grad_norm": 0.4712335677622079, "learning_rate": 4.956937799043063e-05, "loss": 1.1252, "step": 259 }, { "epoch": 0.02, "grad_norm": 0.3955435220531026, "learning_rate": 4.9760765550239234e-05, "loss": 1.201, "step": 260 }, { "epoch": 0.02, "grad_norm": 0.48382588647342273, "learning_rate": 4.995215311004785e-05, "loss": 1.3135, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.39028060459262043, "learning_rate": 5.014354066985646e-05, "loss": 1.2573, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.4094204023390644, "learning_rate": 5.033492822966508e-05, "loss": 1.1411, "step": 263 }, { "epoch": 0.03, "grad_norm": 0.37405702073279246, "learning_rate": 5.052631578947369e-05, "loss": 1.1924, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.38656220760519894, "learning_rate": 5.0717703349282295e-05, "loss": 1.2062, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.4152109433386491, "learning_rate": 5.090909090909091e-05, "loss": 1.2412, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.3485732917182752, "learning_rate": 5.1100478468899526e-05, "loss": 1.2917, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.3551920352066169, "learning_rate": 5.129186602870814e-05, "loss": 1.1494, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.3817527119708618, "learning_rate": 5.1483253588516744e-05, "loss": 1.2689, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.35043446712727827, "learning_rate": 5.167464114832536e-05, "loss": 1.1263, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.4560107489672836, "learning_rate": 5.1866028708133975e-05, "loss": 1.2355, "step": 271 }, { "epoch": 0.03, "grad_norm": 0.411519693827521, "learning_rate": 5.205741626794258e-05, "loss": 1.2213, "step": 272 }, { "epoch": 0.03, "grad_norm": 0.4017892181431639, "learning_rate": 5.22488038277512e-05, "loss": 1.2103, "step": 273 }, { "epoch": 0.03, "grad_norm": 0.46457066156222404, "learning_rate": 5.244019138755981e-05, "loss": 1.1638, "step": 274 }, { "epoch": 0.03, "grad_norm": 0.41073989648253845, "learning_rate": 5.2631578947368424e-05, "loss": 1.2773, "step": 275 }, { "epoch": 0.03, "grad_norm": 0.3730027098705776, "learning_rate": 5.282296650717704e-05, "loss": 1.1376, "step": 276 }, { "epoch": 0.03, "grad_norm": 0.3860157617102252, "learning_rate": 5.301435406698565e-05, "loss": 1.2109, "step": 277 }, { "epoch": 0.03, "grad_norm": 0.4349386298334119, "learning_rate": 5.320574162679426e-05, "loss": 1.1575, "step": 278 }, { "epoch": 0.03, "grad_norm": 0.4640773330272392, "learning_rate": 5.339712918660288e-05, "loss": 1.1985, "step": 279 }, { "epoch": 0.03, "grad_norm": 0.45865729558497004, "learning_rate": 5.3588516746411485e-05, "loss": 1.2145, "step": 280 }, { "epoch": 0.03, "grad_norm": 0.37111837421497373, "learning_rate": 5.37799043062201e-05, "loss": 1.1538, "step": 281 }, { "epoch": 0.03, "grad_norm": 0.40099943789483994, "learning_rate": 5.39712918660287e-05, "loss": 1.0804, "step": 282 }, { "epoch": 0.03, "grad_norm": 0.42817755702563415, "learning_rate": 5.416267942583733e-05, "loss": 1.202, "step": 283 }, { "epoch": 0.03, "grad_norm": 0.34712198940947375, "learning_rate": 5.4354066985645934e-05, "loss": 1.2341, "step": 284 }, { "epoch": 0.03, "grad_norm": 0.39457404932238177, "learning_rate": 5.4545454545454546e-05, "loss": 1.1571, "step": 285 }, { "epoch": 0.03, "grad_norm": 0.36930568124767427, "learning_rate": 5.4736842105263165e-05, "loss": 1.1275, "step": 286 }, { "epoch": 0.03, "grad_norm": 0.4662030178725213, "learning_rate": 5.492822966507177e-05, "loss": 1.1811, "step": 287 }, { "epoch": 0.03, "grad_norm": 0.40587674329213624, "learning_rate": 5.511961722488038e-05, "loss": 1.102, "step": 288 }, { "epoch": 0.03, "grad_norm": 0.35779077893034156, "learning_rate": 5.5311004784689e-05, "loss": 1.2788, "step": 289 }, { "epoch": 0.03, "grad_norm": 0.31159530938785, "learning_rate": 5.5502392344497614e-05, "loss": 1.1778, "step": 290 }, { "epoch": 0.03, "grad_norm": 0.4160898781250681, "learning_rate": 5.569377990430622e-05, "loss": 1.2064, "step": 291 }, { "epoch": 0.03, "grad_norm": 0.43660026046564104, "learning_rate": 5.588516746411484e-05, "loss": 1.1494, "step": 292 }, { "epoch": 0.03, "grad_norm": 0.40119987919562433, "learning_rate": 5.607655502392345e-05, "loss": 1.1623, "step": 293 }, { "epoch": 0.03, "grad_norm": 0.37717288362355916, "learning_rate": 5.6267942583732056e-05, "loss": 1.1194, "step": 294 }, { "epoch": 0.03, "grad_norm": 0.4057229096001704, "learning_rate": 5.645933014354067e-05, "loss": 1.0581, "step": 295 }, { "epoch": 0.03, "grad_norm": 0.4670818825975868, "learning_rate": 5.665071770334929e-05, "loss": 1.171, "step": 296 }, { "epoch": 0.03, "grad_norm": 0.5482039795115164, "learning_rate": 5.68421052631579e-05, "loss": 1.1082, "step": 297 }, { "epoch": 0.03, "grad_norm": 0.4468277071138632, "learning_rate": 5.7033492822966505e-05, "loss": 1.2785, "step": 298 }, { "epoch": 0.03, "grad_norm": 0.3500497863626851, "learning_rate": 5.7224880382775124e-05, "loss": 1.1713, "step": 299 }, { "epoch": 0.03, "grad_norm": 0.5457539009822924, "learning_rate": 5.7416267942583736e-05, "loss": 1.1259, "step": 300 }, { "epoch": 0.03, "grad_norm": 0.3521279155346515, "learning_rate": 5.760765550239234e-05, "loss": 1.2143, "step": 301 }, { "epoch": 0.03, "grad_norm": 0.38014559184795155, "learning_rate": 5.779904306220096e-05, "loss": 1.2169, "step": 302 }, { "epoch": 0.03, "grad_norm": 0.4368587205355613, "learning_rate": 5.799043062200957e-05, "loss": 1.1354, "step": 303 }, { "epoch": 0.03, "grad_norm": 0.3773750084543655, "learning_rate": 5.818181818181818e-05, "loss": 1.204, "step": 304 }, { "epoch": 0.03, "grad_norm": 0.4661908984222087, "learning_rate": 5.8373205741626804e-05, "loss": 1.2339, "step": 305 }, { "epoch": 0.03, "grad_norm": 0.4233656313317972, "learning_rate": 5.856459330143541e-05, "loss": 1.3069, "step": 306 }, { "epoch": 0.03, "grad_norm": 0.4452233614591402, "learning_rate": 5.875598086124402e-05, "loss": 1.1966, "step": 307 }, { "epoch": 0.03, "grad_norm": 0.379869871739064, "learning_rate": 5.894736842105263e-05, "loss": 1.1233, "step": 308 }, { "epoch": 0.03, "grad_norm": 0.3634900550395493, "learning_rate": 5.9138755980861246e-05, "loss": 1.2106, "step": 309 }, { "epoch": 0.03, "grad_norm": 0.38506818193045056, "learning_rate": 5.933014354066986e-05, "loss": 1.2411, "step": 310 }, { "epoch": 0.03, "grad_norm": 0.41159846103302217, "learning_rate": 5.9521531100478464e-05, "loss": 1.3279, "step": 311 }, { "epoch": 0.03, "grad_norm": 0.28921210577002654, "learning_rate": 5.971291866028709e-05, "loss": 1.0785, "step": 312 }, { "epoch": 0.03, "grad_norm": 0.38616302251379914, "learning_rate": 5.9904306220095695e-05, "loss": 1.2536, "step": 313 }, { "epoch": 0.03, "grad_norm": 0.4101883225822147, "learning_rate": 6.009569377990431e-05, "loss": 1.1666, "step": 314 }, { "epoch": 0.03, "grad_norm": 0.3423514370886581, "learning_rate": 6.028708133971293e-05, "loss": 1.2331, "step": 315 }, { "epoch": 0.03, "grad_norm": 0.41207456262790537, "learning_rate": 6.047846889952153e-05, "loss": 1.1403, "step": 316 }, { "epoch": 0.03, "grad_norm": 0.40588734564935647, "learning_rate": 6.0669856459330144e-05, "loss": 1.1986, "step": 317 }, { "epoch": 0.03, "grad_norm": 0.37104840888562607, "learning_rate": 6.086124401913876e-05, "loss": 1.169, "step": 318 }, { "epoch": 0.03, "grad_norm": 0.39655898223668296, "learning_rate": 6.105263157894737e-05, "loss": 1.1296, "step": 319 }, { "epoch": 0.03, "grad_norm": 0.5221245550161207, "learning_rate": 6.124401913875598e-05, "loss": 1.0809, "step": 320 }, { "epoch": 0.03, "grad_norm": 0.4021445879644258, "learning_rate": 6.143540669856461e-05, "loss": 1.1897, "step": 321 }, { "epoch": 0.03, "grad_norm": 0.38180254930918234, "learning_rate": 6.16267942583732e-05, "loss": 1.1859, "step": 322 }, { "epoch": 0.03, "grad_norm": 0.3828401683172079, "learning_rate": 6.181818181818182e-05, "loss": 1.0985, "step": 323 }, { "epoch": 0.03, "grad_norm": 0.43356536766643855, "learning_rate": 6.200956937799043e-05, "loss": 1.1524, "step": 324 }, { "epoch": 0.03, "grad_norm": 0.39197554253988764, "learning_rate": 6.220095693779904e-05, "loss": 1.1607, "step": 325 }, { "epoch": 0.03, "grad_norm": 0.3768192836960789, "learning_rate": 6.239234449760765e-05, "loss": 1.2115, "step": 326 }, { "epoch": 0.03, "grad_norm": 0.4123016217273888, "learning_rate": 6.258373205741627e-05, "loss": 1.2046, "step": 327 }, { "epoch": 0.03, "grad_norm": 0.38690332283125584, "learning_rate": 6.277511961722489e-05, "loss": 1.2207, "step": 328 }, { "epoch": 0.03, "grad_norm": 0.3958359496615601, "learning_rate": 6.296650717703349e-05, "loss": 1.1334, "step": 329 }, { "epoch": 0.03, "grad_norm": 0.36561214007046366, "learning_rate": 6.31578947368421e-05, "loss": 1.1196, "step": 330 }, { "epoch": 0.03, "grad_norm": 0.36953124111301633, "learning_rate": 6.334928229665073e-05, "loss": 1.1203, "step": 331 }, { "epoch": 0.03, "grad_norm": 0.391180586728009, "learning_rate": 6.354066985645933e-05, "loss": 1.2479, "step": 332 }, { "epoch": 0.03, "grad_norm": 0.483739471817649, "learning_rate": 6.373205741626794e-05, "loss": 1.1225, "step": 333 }, { "epoch": 0.03, "grad_norm": 0.4233252985553476, "learning_rate": 6.392344497607657e-05, "loss": 1.1772, "step": 334 }, { "epoch": 0.03, "grad_norm": 0.4261190391654762, "learning_rate": 6.411483253588518e-05, "loss": 1.1752, "step": 335 }, { "epoch": 0.03, "grad_norm": 0.4217550233368759, "learning_rate": 6.430622009569378e-05, "loss": 1.2335, "step": 336 }, { "epoch": 0.03, "grad_norm": 0.4126104400962645, "learning_rate": 6.449760765550239e-05, "loss": 1.1095, "step": 337 }, { "epoch": 0.03, "grad_norm": 0.48994576863601885, "learning_rate": 6.468899521531101e-05, "loss": 1.2933, "step": 338 }, { "epoch": 0.03, "grad_norm": 0.3928982582790676, "learning_rate": 6.488038277511961e-05, "loss": 1.1598, "step": 339 }, { "epoch": 0.03, "grad_norm": 0.38476512783934874, "learning_rate": 6.507177033492823e-05, "loss": 1.3208, "step": 340 }, { "epoch": 0.03, "grad_norm": 0.3810331248367921, "learning_rate": 6.526315789473685e-05, "loss": 1.1183, "step": 341 }, { "epoch": 0.03, "grad_norm": 0.38985962649469763, "learning_rate": 6.545454545454546e-05, "loss": 1.2319, "step": 342 }, { "epoch": 0.03, "grad_norm": 0.44280071804731985, "learning_rate": 6.564593301435406e-05, "loss": 1.2374, "step": 343 }, { "epoch": 0.03, "grad_norm": 0.3221949054184306, "learning_rate": 6.583732057416269e-05, "loss": 1.182, "step": 344 }, { "epoch": 0.03, "grad_norm": 0.3765883193637935, "learning_rate": 6.60287081339713e-05, "loss": 1.085, "step": 345 }, { "epoch": 0.03, "grad_norm": 0.36077725709386577, "learning_rate": 6.62200956937799e-05, "loss": 1.1741, "step": 346 }, { "epoch": 0.03, "grad_norm": 0.3844614909912839, "learning_rate": 6.641148325358852e-05, "loss": 1.1308, "step": 347 }, { "epoch": 0.03, "grad_norm": 0.356160190962461, "learning_rate": 6.660287081339714e-05, "loss": 1.2303, "step": 348 }, { "epoch": 0.03, "grad_norm": 0.40199466397728084, "learning_rate": 6.679425837320575e-05, "loss": 1.1404, "step": 349 }, { "epoch": 0.03, "grad_norm": 0.40760457805411343, "learning_rate": 6.698564593301436e-05, "loss": 1.1441, "step": 350 }, { "epoch": 0.03, "grad_norm": 0.36664650507067836, "learning_rate": 6.717703349282297e-05, "loss": 1.0822, "step": 351 }, { "epoch": 0.03, "grad_norm": 0.3774008230280487, "learning_rate": 6.736842105263159e-05, "loss": 1.1882, "step": 352 }, { "epoch": 0.03, "grad_norm": 0.3743284992535933, "learning_rate": 6.755980861244018e-05, "loss": 1.1174, "step": 353 }, { "epoch": 0.03, "grad_norm": 0.3803668006858151, "learning_rate": 6.775119617224881e-05, "loss": 1.2074, "step": 354 }, { "epoch": 0.03, "grad_norm": 0.3777040131335034, "learning_rate": 6.794258373205742e-05, "loss": 1.0946, "step": 355 }, { "epoch": 0.03, "grad_norm": 0.3794465077922223, "learning_rate": 6.813397129186602e-05, "loss": 1.0683, "step": 356 }, { "epoch": 0.03, "grad_norm": 0.3794400803152343, "learning_rate": 6.832535885167465e-05, "loss": 1.1377, "step": 357 }, { "epoch": 0.03, "grad_norm": 0.3246160370634747, "learning_rate": 6.851674641148326e-05, "loss": 1.2539, "step": 358 }, { "epoch": 0.03, "grad_norm": 0.3663994418682161, "learning_rate": 6.870813397129187e-05, "loss": 1.179, "step": 359 }, { "epoch": 0.03, "grad_norm": 0.4165644814006045, "learning_rate": 6.889952153110048e-05, "loss": 1.2068, "step": 360 }, { "epoch": 0.03, "grad_norm": 0.40002794816857074, "learning_rate": 6.90909090909091e-05, "loss": 1.236, "step": 361 }, { "epoch": 0.03, "grad_norm": 0.36752801689828113, "learning_rate": 6.928229665071771e-05, "loss": 1.1806, "step": 362 }, { "epoch": 0.03, "grad_norm": 0.4118641376720516, "learning_rate": 6.947368421052632e-05, "loss": 1.2099, "step": 363 }, { "epoch": 0.03, "grad_norm": 0.342905855911731, "learning_rate": 6.966507177033493e-05, "loss": 1.1753, "step": 364 }, { "epoch": 0.03, "grad_norm": 0.37601728725049927, "learning_rate": 6.985645933014354e-05, "loss": 1.2123, "step": 365 }, { "epoch": 0.04, "grad_norm": 0.3996876187572624, "learning_rate": 7.004784688995216e-05, "loss": 1.2063, "step": 366 }, { "epoch": 0.04, "grad_norm": 0.4995513430917594, "learning_rate": 7.023923444976077e-05, "loss": 1.1249, "step": 367 }, { "epoch": 0.04, "grad_norm": 0.38011883982869116, "learning_rate": 7.043062200956938e-05, "loss": 1.091, "step": 368 }, { "epoch": 0.04, "grad_norm": 0.40185997614954744, "learning_rate": 7.0622009569378e-05, "loss": 1.2879, "step": 369 }, { "epoch": 0.04, "grad_norm": 0.37530412149005404, "learning_rate": 7.08133971291866e-05, "loss": 1.2832, "step": 370 }, { "epoch": 0.04, "grad_norm": 0.4067556360842582, "learning_rate": 7.100478468899522e-05, "loss": 1.0513, "step": 371 }, { "epoch": 0.04, "grad_norm": 0.4450195271251879, "learning_rate": 7.119617224880383e-05, "loss": 1.1781, "step": 372 }, { "epoch": 0.04, "grad_norm": 0.3878038365086055, "learning_rate": 7.138755980861244e-05, "loss": 1.2654, "step": 373 }, { "epoch": 0.04, "grad_norm": 0.34902250016212555, "learning_rate": 7.157894736842105e-05, "loss": 1.0807, "step": 374 }, { "epoch": 0.04, "grad_norm": 0.4789371923266489, "learning_rate": 7.177033492822967e-05, "loss": 1.0782, "step": 375 }, { "epoch": 0.04, "grad_norm": 0.3443621112474482, "learning_rate": 7.196172248803828e-05, "loss": 1.0874, "step": 376 }, { "epoch": 0.04, "grad_norm": 0.4354953964559702, "learning_rate": 7.215311004784689e-05, "loss": 1.1893, "step": 377 }, { "epoch": 0.04, "grad_norm": 0.3806981757319183, "learning_rate": 7.23444976076555e-05, "loss": 1.1033, "step": 378 }, { "epoch": 0.04, "grad_norm": 0.3640708591532273, "learning_rate": 7.253588516746413e-05, "loss": 1.1008, "step": 379 }, { "epoch": 0.04, "grad_norm": 0.40487824027906655, "learning_rate": 7.272727272727273e-05, "loss": 1.2676, "step": 380 }, { "epoch": 0.04, "grad_norm": 0.3974099352341619, "learning_rate": 7.291866028708134e-05, "loss": 1.1297, "step": 381 }, { "epoch": 0.04, "grad_norm": 0.4412922873426402, "learning_rate": 7.311004784688995e-05, "loss": 1.2022, "step": 382 }, { "epoch": 0.04, "grad_norm": 0.4177424000999219, "learning_rate": 7.330143540669856e-05, "loss": 1.0769, "step": 383 }, { "epoch": 0.04, "grad_norm": 0.37843676467423115, "learning_rate": 7.349282296650718e-05, "loss": 1.1895, "step": 384 }, { "epoch": 0.04, "grad_norm": 0.37904784029757954, "learning_rate": 7.368421052631579e-05, "loss": 1.0346, "step": 385 }, { "epoch": 0.04, "grad_norm": 0.3678229762318383, "learning_rate": 7.387559808612442e-05, "loss": 1.1415, "step": 386 }, { "epoch": 0.04, "grad_norm": 0.44631509974989175, "learning_rate": 7.406698564593301e-05, "loss": 1.1593, "step": 387 }, { "epoch": 0.04, "grad_norm": 0.4904633395013365, "learning_rate": 7.425837320574163e-05, "loss": 1.1944, "step": 388 }, { "epoch": 0.04, "grad_norm": 0.37152475181241584, "learning_rate": 7.444976076555025e-05, "loss": 1.1698, "step": 389 }, { "epoch": 0.04, "grad_norm": 0.4011204212131284, "learning_rate": 7.464114832535885e-05, "loss": 1.205, "step": 390 }, { "epoch": 0.04, "grad_norm": 0.36761682195572815, "learning_rate": 7.483253588516746e-05, "loss": 1.2973, "step": 391 }, { "epoch": 0.04, "grad_norm": 0.40374546939981903, "learning_rate": 7.502392344497609e-05, "loss": 1.1507, "step": 392 }, { "epoch": 0.04, "grad_norm": 0.4255006771988514, "learning_rate": 7.52153110047847e-05, "loss": 1.1891, "step": 393 }, { "epoch": 0.04, "grad_norm": 0.3865518214834786, "learning_rate": 7.54066985645933e-05, "loss": 1.185, "step": 394 }, { "epoch": 0.04, "grad_norm": 0.41827717268535636, "learning_rate": 7.559808612440191e-05, "loss": 1.0622, "step": 395 }, { "epoch": 0.04, "grad_norm": 0.37110879057589674, "learning_rate": 7.578947368421054e-05, "loss": 1.1381, "step": 396 }, { "epoch": 0.04, "grad_norm": 0.46618966480421226, "learning_rate": 7.598086124401914e-05, "loss": 1.2105, "step": 397 }, { "epoch": 0.04, "grad_norm": 0.4266096767917638, "learning_rate": 7.617224880382775e-05, "loss": 1.2304, "step": 398 }, { "epoch": 0.04, "grad_norm": 0.3951434711007925, "learning_rate": 7.636363636363637e-05, "loss": 1.1194, "step": 399 }, { "epoch": 0.04, "grad_norm": 0.5249468851294161, "learning_rate": 7.655502392344497e-05, "loss": 1.1557, "step": 400 }, { "epoch": 0.04, "grad_norm": 0.35849242393679126, "learning_rate": 7.674641148325359e-05, "loss": 1.1586, "step": 401 }, { "epoch": 0.04, "grad_norm": 0.4163074752217252, "learning_rate": 7.693779904306221e-05, "loss": 1.1884, "step": 402 }, { "epoch": 0.04, "grad_norm": 0.39359654231287206, "learning_rate": 7.712918660287082e-05, "loss": 1.2162, "step": 403 }, { "epoch": 0.04, "grad_norm": 0.40661594470681456, "learning_rate": 7.732057416267942e-05, "loss": 1.1578, "step": 404 }, { "epoch": 0.04, "grad_norm": 0.4060870336417891, "learning_rate": 7.751196172248805e-05, "loss": 1.2462, "step": 405 }, { "epoch": 0.04, "grad_norm": 0.34776732343372285, "learning_rate": 7.770334928229666e-05, "loss": 1.1907, "step": 406 }, { "epoch": 0.04, "grad_norm": 0.37012929061225097, "learning_rate": 7.789473684210526e-05, "loss": 1.084, "step": 407 }, { "epoch": 0.04, "grad_norm": 0.3191687282557576, "learning_rate": 7.808612440191387e-05, "loss": 1.1337, "step": 408 }, { "epoch": 0.04, "grad_norm": 0.428711696888013, "learning_rate": 7.82775119617225e-05, "loss": 1.2142, "step": 409 }, { "epoch": 0.04, "grad_norm": 0.5197761387762115, "learning_rate": 7.846889952153111e-05, "loss": 1.2915, "step": 410 }, { "epoch": 0.04, "grad_norm": 0.41273170978967216, "learning_rate": 7.866028708133971e-05, "loss": 1.1028, "step": 411 }, { "epoch": 0.04, "grad_norm": 0.3568549043871766, "learning_rate": 7.885167464114833e-05, "loss": 1.1575, "step": 412 }, { "epoch": 0.04, "grad_norm": 0.43257689825178397, "learning_rate": 7.904306220095695e-05, "loss": 1.1115, "step": 413 }, { "epoch": 0.04, "grad_norm": 0.4355392516104201, "learning_rate": 7.923444976076554e-05, "loss": 1.208, "step": 414 }, { "epoch": 0.04, "grad_norm": 0.48359013518775035, "learning_rate": 7.942583732057417e-05, "loss": 1.1864, "step": 415 }, { "epoch": 0.04, "grad_norm": 0.3730295702560985, "learning_rate": 7.961722488038278e-05, "loss": 1.1261, "step": 416 }, { "epoch": 0.04, "grad_norm": 0.3966685187279031, "learning_rate": 7.98086124401914e-05, "loss": 1.2786, "step": 417 }, { "epoch": 0.04, "grad_norm": 0.3786523559458125, "learning_rate": 8e-05, "loss": 1.1127, "step": 418 }, { "epoch": 0.04, "grad_norm": 0.4039420784784596, "learning_rate": 8.019138755980862e-05, "loss": 1.1653, "step": 419 }, { "epoch": 0.04, "grad_norm": 0.3836563304661635, "learning_rate": 8.038277511961723e-05, "loss": 1.1504, "step": 420 }, { "epoch": 0.04, "grad_norm": 0.3915882211031514, "learning_rate": 8.057416267942584e-05, "loss": 1.1881, "step": 421 }, { "epoch": 0.04, "grad_norm": 0.3585833381861096, "learning_rate": 8.076555023923446e-05, "loss": 1.1215, "step": 422 }, { "epoch": 0.04, "grad_norm": 0.34093033582588217, "learning_rate": 8.095693779904307e-05, "loss": 1.1502, "step": 423 }, { "epoch": 0.04, "grad_norm": 0.35015224800454925, "learning_rate": 8.114832535885168e-05, "loss": 1.1377, "step": 424 }, { "epoch": 0.04, "grad_norm": 0.36225302300603957, "learning_rate": 8.133971291866029e-05, "loss": 1.146, "step": 425 }, { "epoch": 0.04, "grad_norm": 0.401290151391233, "learning_rate": 8.15311004784689e-05, "loss": 1.1257, "step": 426 }, { "epoch": 0.04, "grad_norm": 0.42354295033157774, "learning_rate": 8.172248803827752e-05, "loss": 1.1861, "step": 427 }, { "epoch": 0.04, "grad_norm": 0.47531433461725525, "learning_rate": 8.191387559808613e-05, "loss": 1.2387, "step": 428 }, { "epoch": 0.04, "grad_norm": 0.3956478442168383, "learning_rate": 8.210526315789474e-05, "loss": 1.2046, "step": 429 }, { "epoch": 0.04, "grad_norm": 0.40300702532566746, "learning_rate": 8.229665071770335e-05, "loss": 1.2882, "step": 430 }, { "epoch": 0.04, "grad_norm": 0.4247965626832039, "learning_rate": 8.248803827751197e-05, "loss": 1.2151, "step": 431 }, { "epoch": 0.04, "grad_norm": 0.3667653514802216, "learning_rate": 8.267942583732058e-05, "loss": 1.2397, "step": 432 }, { "epoch": 0.04, "grad_norm": 0.368289021497025, "learning_rate": 8.287081339712919e-05, "loss": 1.1336, "step": 433 }, { "epoch": 0.04, "grad_norm": 0.35548517154282255, "learning_rate": 8.30622009569378e-05, "loss": 1.0854, "step": 434 }, { "epoch": 0.04, "grad_norm": 0.43281064565162874, "learning_rate": 8.325358851674641e-05, "loss": 1.1418, "step": 435 }, { "epoch": 0.04, "grad_norm": 0.36804478901968624, "learning_rate": 8.344497607655503e-05, "loss": 1.1754, "step": 436 }, { "epoch": 0.04, "grad_norm": 0.3680855445618144, "learning_rate": 8.363636363636364e-05, "loss": 1.1909, "step": 437 }, { "epoch": 0.04, "grad_norm": 0.38659927520583764, "learning_rate": 8.382775119617225e-05, "loss": 1.2076, "step": 438 }, { "epoch": 0.04, "grad_norm": 0.341767677446713, "learning_rate": 8.401913875598086e-05, "loss": 1.0057, "step": 439 }, { "epoch": 0.04, "grad_norm": 0.4256342728148254, "learning_rate": 8.421052631578948e-05, "loss": 1.2104, "step": 440 }, { "epoch": 0.04, "grad_norm": 0.37216536384041526, "learning_rate": 8.440191387559809e-05, "loss": 1.1144, "step": 441 }, { "epoch": 0.04, "grad_norm": 0.3791754076001631, "learning_rate": 8.45933014354067e-05, "loss": 1.2319, "step": 442 }, { "epoch": 0.04, "grad_norm": 0.42421700255160355, "learning_rate": 8.478468899521531e-05, "loss": 1.1934, "step": 443 }, { "epoch": 0.04, "grad_norm": 0.3678445305043515, "learning_rate": 8.497607655502394e-05, "loss": 1.1886, "step": 444 }, { "epoch": 0.04, "grad_norm": 0.4219847133147515, "learning_rate": 8.516746411483254e-05, "loss": 1.1748, "step": 445 }, { "epoch": 0.04, "grad_norm": 0.33966219534041503, "learning_rate": 8.535885167464115e-05, "loss": 1.1416, "step": 446 }, { "epoch": 0.04, "grad_norm": 0.3426950270667347, "learning_rate": 8.555023923444977e-05, "loss": 1.2117, "step": 447 }, { "epoch": 0.04, "grad_norm": 0.31268235060757094, "learning_rate": 8.574162679425837e-05, "loss": 1.1099, "step": 448 }, { "epoch": 0.04, "grad_norm": 0.4283403832969395, "learning_rate": 8.593301435406699e-05, "loss": 1.1204, "step": 449 }, { "epoch": 0.04, "grad_norm": 0.360981174023238, "learning_rate": 8.612440191387561e-05, "loss": 1.1203, "step": 450 }, { "epoch": 0.04, "grad_norm": 0.31441146897954253, "learning_rate": 8.631578947368421e-05, "loss": 1.1703, "step": 451 }, { "epoch": 0.04, "grad_norm": 0.6665116249955306, "learning_rate": 8.650717703349282e-05, "loss": 1.1965, "step": 452 }, { "epoch": 0.04, "grad_norm": 0.34282354864583103, "learning_rate": 8.669856459330143e-05, "loss": 1.1108, "step": 453 }, { "epoch": 0.04, "grad_norm": 0.3353048313959663, "learning_rate": 8.688995215311006e-05, "loss": 1.0893, "step": 454 }, { "epoch": 0.04, "grad_norm": 0.3851891541705526, "learning_rate": 8.708133971291866e-05, "loss": 1.065, "step": 455 }, { "epoch": 0.04, "grad_norm": 0.40582359225585646, "learning_rate": 8.727272727272727e-05, "loss": 1.1551, "step": 456 }, { "epoch": 0.04, "grad_norm": 0.3310055383140587, "learning_rate": 8.74641148325359e-05, "loss": 1.1374, "step": 457 }, { "epoch": 0.04, "grad_norm": 0.3697503368052755, "learning_rate": 8.76555023923445e-05, "loss": 1.0852, "step": 458 }, { "epoch": 0.04, "grad_norm": 0.3494933513359935, "learning_rate": 8.784688995215311e-05, "loss": 1.1092, "step": 459 }, { "epoch": 0.04, "grad_norm": 0.359799849220072, "learning_rate": 8.803827751196173e-05, "loss": 1.0291, "step": 460 }, { "epoch": 0.04, "grad_norm": 0.3692901471865079, "learning_rate": 8.822966507177035e-05, "loss": 1.1516, "step": 461 }, { "epoch": 0.04, "grad_norm": 0.31771285202360866, "learning_rate": 8.842105263157894e-05, "loss": 1.0414, "step": 462 }, { "epoch": 0.04, "grad_norm": 0.39382351019262535, "learning_rate": 8.861244019138757e-05, "loss": 1.1987, "step": 463 }, { "epoch": 0.04, "grad_norm": 0.37948154502600623, "learning_rate": 8.880382775119618e-05, "loss": 1.0162, "step": 464 }, { "epoch": 0.04, "grad_norm": 0.4454086610740419, "learning_rate": 8.899521531100478e-05, "loss": 1.1686, "step": 465 }, { "epoch": 0.04, "grad_norm": 0.3992597027171503, "learning_rate": 8.91866028708134e-05, "loss": 1.1067, "step": 466 }, { "epoch": 0.04, "grad_norm": 0.402787251335542, "learning_rate": 8.937799043062202e-05, "loss": 1.1733, "step": 467 }, { "epoch": 0.04, "grad_norm": 0.3772514735030477, "learning_rate": 8.956937799043063e-05, "loss": 1.1926, "step": 468 }, { "epoch": 0.04, "grad_norm": 0.38379331799775773, "learning_rate": 8.976076555023923e-05, "loss": 1.2207, "step": 469 }, { "epoch": 0.04, "grad_norm": 0.37849416076396786, "learning_rate": 8.995215311004786e-05, "loss": 1.158, "step": 470 }, { "epoch": 0.05, "grad_norm": 0.40932072449271345, "learning_rate": 9.014354066985647e-05, "loss": 1.124, "step": 471 }, { "epoch": 0.05, "grad_norm": 0.34313554427354404, "learning_rate": 9.033492822966507e-05, "loss": 1.2141, "step": 472 }, { "epoch": 0.05, "grad_norm": 0.2960457574671995, "learning_rate": 9.052631578947369e-05, "loss": 1.1269, "step": 473 }, { "epoch": 0.05, "grad_norm": 0.3364724543703671, "learning_rate": 9.07177033492823e-05, "loss": 1.0963, "step": 474 }, { "epoch": 0.05, "grad_norm": 0.4102740455894671, "learning_rate": 9.090909090909092e-05, "loss": 0.9836, "step": 475 }, { "epoch": 0.05, "grad_norm": 0.3576390479541009, "learning_rate": 9.110047846889953e-05, "loss": 1.088, "step": 476 }, { "epoch": 0.05, "grad_norm": 0.4063740081724684, "learning_rate": 9.129186602870814e-05, "loss": 1.0916, "step": 477 }, { "epoch": 0.05, "grad_norm": 0.3866688811135483, "learning_rate": 9.148325358851675e-05, "loss": 1.1582, "step": 478 }, { "epoch": 0.05, "grad_norm": 0.35233155736688976, "learning_rate": 9.167464114832537e-05, "loss": 1.2166, "step": 479 }, { "epoch": 0.05, "grad_norm": 0.3523742613417453, "learning_rate": 9.186602870813398e-05, "loss": 1.1492, "step": 480 }, { "epoch": 0.05, "grad_norm": 0.4091175614367992, "learning_rate": 9.205741626794259e-05, "loss": 1.2015, "step": 481 }, { "epoch": 0.05, "grad_norm": 0.40461705008021104, "learning_rate": 9.224880382775119e-05, "loss": 1.1021, "step": 482 }, { "epoch": 0.05, "grad_norm": 0.3555229450892527, "learning_rate": 9.244019138755981e-05, "loss": 1.1677, "step": 483 }, { "epoch": 0.05, "grad_norm": 0.39219146946985217, "learning_rate": 9.263157894736843e-05, "loss": 1.1448, "step": 484 }, { "epoch": 0.05, "grad_norm": 0.3959881230046531, "learning_rate": 9.282296650717704e-05, "loss": 1.1031, "step": 485 }, { "epoch": 0.05, "grad_norm": 0.37437224905160343, "learning_rate": 9.301435406698565e-05, "loss": 1.2172, "step": 486 }, { "epoch": 0.05, "grad_norm": 0.34627056702655357, "learning_rate": 9.320574162679426e-05, "loss": 1.0524, "step": 487 }, { "epoch": 0.05, "grad_norm": 0.34958866976468206, "learning_rate": 9.339712918660288e-05, "loss": 1.1962, "step": 488 }, { "epoch": 0.05, "grad_norm": 0.3592293846050586, "learning_rate": 9.358851674641149e-05, "loss": 1.1292, "step": 489 }, { "epoch": 0.05, "grad_norm": 0.4512710733697596, "learning_rate": 9.37799043062201e-05, "loss": 1.1657, "step": 490 }, { "epoch": 0.05, "grad_norm": 0.3630522232972779, "learning_rate": 9.397129186602871e-05, "loss": 1.1852, "step": 491 }, { "epoch": 0.05, "grad_norm": 0.37647580929450847, "learning_rate": 9.416267942583733e-05, "loss": 1.1968, "step": 492 }, { "epoch": 0.05, "grad_norm": 0.4947723206359184, "learning_rate": 9.435406698564594e-05, "loss": 1.1535, "step": 493 }, { "epoch": 0.05, "grad_norm": 0.384481476371926, "learning_rate": 9.454545454545455e-05, "loss": 1.0762, "step": 494 }, { "epoch": 0.05, "grad_norm": 0.30759408641071373, "learning_rate": 9.473684210526316e-05, "loss": 1.2025, "step": 495 }, { "epoch": 0.05, "grad_norm": 0.39262382714536653, "learning_rate": 9.492822966507177e-05, "loss": 1.1072, "step": 496 }, { "epoch": 0.05, "grad_norm": 0.3707577579487458, "learning_rate": 9.511961722488039e-05, "loss": 1.201, "step": 497 }, { "epoch": 0.05, "grad_norm": 0.37259701318014815, "learning_rate": 9.5311004784689e-05, "loss": 1.1808, "step": 498 }, { "epoch": 0.05, "grad_norm": 0.3899497483331902, "learning_rate": 9.550239234449761e-05, "loss": 1.155, "step": 499 }, { "epoch": 0.05, "grad_norm": 0.3056736695030619, "learning_rate": 9.569377990430622e-05, "loss": 1.1485, "step": 500 }, { "epoch": 0.05, "grad_norm": 0.3771828447592661, "learning_rate": 9.588516746411484e-05, "loss": 1.1475, "step": 501 }, { "epoch": 0.05, "grad_norm": 0.3512506645855738, "learning_rate": 9.607655502392345e-05, "loss": 1.137, "step": 502 }, { "epoch": 0.05, "grad_norm": 0.44787972025939427, "learning_rate": 9.626794258373206e-05, "loss": 1.1137, "step": 503 }, { "epoch": 0.05, "grad_norm": 0.43899468578920847, "learning_rate": 9.645933014354067e-05, "loss": 1.198, "step": 504 }, { "epoch": 0.05, "grad_norm": 0.37520028378057024, "learning_rate": 9.66507177033493e-05, "loss": 1.12, "step": 505 }, { "epoch": 0.05, "grad_norm": 0.44437839309325783, "learning_rate": 9.68421052631579e-05, "loss": 1.1266, "step": 506 }, { "epoch": 0.05, "grad_norm": 0.3713684887370264, "learning_rate": 9.703349282296651e-05, "loss": 1.0703, "step": 507 }, { "epoch": 0.05, "grad_norm": 0.3785759236620265, "learning_rate": 9.722488038277513e-05, "loss": 1.1825, "step": 508 }, { "epoch": 0.05, "grad_norm": 0.3949986518868616, "learning_rate": 9.741626794258373e-05, "loss": 1.205, "step": 509 }, { "epoch": 0.05, "grad_norm": 0.32820866590924014, "learning_rate": 9.760765550239235e-05, "loss": 1.0473, "step": 510 }, { "epoch": 0.05, "grad_norm": 0.3438055422000543, "learning_rate": 9.779904306220096e-05, "loss": 1.2331, "step": 511 }, { "epoch": 0.05, "grad_norm": 0.4782013192332416, "learning_rate": 9.799043062200958e-05, "loss": 1.0641, "step": 512 }, { "epoch": 0.05, "grad_norm": 0.37127997208806374, "learning_rate": 9.818181818181818e-05, "loss": 1.1557, "step": 513 }, { "epoch": 0.05, "grad_norm": 0.37810255782547136, "learning_rate": 9.83732057416268e-05, "loss": 1.1545, "step": 514 }, { "epoch": 0.05, "grad_norm": 0.4105466030008024, "learning_rate": 9.856459330143542e-05, "loss": 1.2272, "step": 515 }, { "epoch": 0.05, "grad_norm": 0.4517743884986355, "learning_rate": 9.875598086124402e-05, "loss": 1.1229, "step": 516 }, { "epoch": 0.05, "grad_norm": 0.399218104135688, "learning_rate": 9.894736842105263e-05, "loss": 1.1467, "step": 517 }, { "epoch": 0.05, "grad_norm": 0.3693460754043359, "learning_rate": 9.913875598086126e-05, "loss": 1.169, "step": 518 }, { "epoch": 0.05, "grad_norm": 0.3446333300820591, "learning_rate": 9.933014354066987e-05, "loss": 1.0783, "step": 519 }, { "epoch": 0.05, "grad_norm": 0.41719266096581403, "learning_rate": 9.952153110047847e-05, "loss": 1.211, "step": 520 }, { "epoch": 0.05, "grad_norm": 0.4139824733210239, "learning_rate": 9.97129186602871e-05, "loss": 1.2271, "step": 521 }, { "epoch": 0.05, "grad_norm": 0.3284583647624778, "learning_rate": 9.99043062200957e-05, "loss": 1.1363, "step": 522 }, { "epoch": 0.05, "grad_norm": 0.3743628377274405, "learning_rate": 0.0001000956937799043, "loss": 1.1254, "step": 523 }, { "epoch": 0.05, "grad_norm": 0.4187480747840408, "learning_rate": 0.00010028708133971292, "loss": 1.1542, "step": 524 }, { "epoch": 0.05, "grad_norm": 0.35701214246846424, "learning_rate": 0.00010047846889952153, "loss": 1.1318, "step": 525 }, { "epoch": 0.05, "grad_norm": 0.39966360139311247, "learning_rate": 0.00010066985645933015, "loss": 1.0679, "step": 526 }, { "epoch": 0.05, "grad_norm": 0.42408986083219885, "learning_rate": 0.00010086124401913877, "loss": 1.086, "step": 527 }, { "epoch": 0.05, "grad_norm": 0.33931985700987544, "learning_rate": 0.00010105263157894738, "loss": 1.1684, "step": 528 }, { "epoch": 0.05, "grad_norm": 0.33040989829144524, "learning_rate": 0.00010124401913875599, "loss": 1.1301, "step": 529 }, { "epoch": 0.05, "grad_norm": 0.3404311052002135, "learning_rate": 0.00010143540669856459, "loss": 1.0848, "step": 530 }, { "epoch": 0.05, "grad_norm": 0.34611637641008364, "learning_rate": 0.0001016267942583732, "loss": 1.1635, "step": 531 }, { "epoch": 0.05, "grad_norm": 0.3672822694004909, "learning_rate": 0.00010181818181818181, "loss": 1.0811, "step": 532 }, { "epoch": 0.05, "grad_norm": 0.34761252015363225, "learning_rate": 0.00010200956937799044, "loss": 1.1502, "step": 533 }, { "epoch": 0.05, "grad_norm": 0.27503858553543464, "learning_rate": 0.00010220095693779905, "loss": 1.1257, "step": 534 }, { "epoch": 0.05, "grad_norm": 0.3368407495501332, "learning_rate": 0.00010239234449760766, "loss": 1.1195, "step": 535 }, { "epoch": 0.05, "grad_norm": 0.3448997570516004, "learning_rate": 0.00010258373205741628, "loss": 1.1955, "step": 536 }, { "epoch": 0.05, "grad_norm": 0.4366845539188124, "learning_rate": 0.00010277511961722488, "loss": 1.1175, "step": 537 }, { "epoch": 0.05, "grad_norm": 0.35681716286224935, "learning_rate": 0.00010296650717703349, "loss": 1.1577, "step": 538 }, { "epoch": 0.05, "grad_norm": 0.3359937686441125, "learning_rate": 0.00010315789473684211, "loss": 1.1318, "step": 539 }, { "epoch": 0.05, "grad_norm": 0.3398927803425864, "learning_rate": 0.00010334928229665073, "loss": 1.1278, "step": 540 }, { "epoch": 0.05, "grad_norm": 0.38532827109393014, "learning_rate": 0.00010354066985645934, "loss": 1.1273, "step": 541 }, { "epoch": 0.05, "grad_norm": 0.3740363062511281, "learning_rate": 0.00010373205741626795, "loss": 1.0253, "step": 542 }, { "epoch": 0.05, "grad_norm": 0.34342518179762227, "learning_rate": 0.00010392344497607656, "loss": 1.1462, "step": 543 }, { "epoch": 0.05, "grad_norm": 0.3590449087862375, "learning_rate": 0.00010411483253588516, "loss": 1.1392, "step": 544 }, { "epoch": 0.05, "grad_norm": 0.37655451333728496, "learning_rate": 0.00010430622009569377, "loss": 1.1218, "step": 545 }, { "epoch": 0.05, "grad_norm": 0.3336387102623628, "learning_rate": 0.0001044976076555024, "loss": 1.2268, "step": 546 }, { "epoch": 0.05, "grad_norm": 0.38550698369605846, "learning_rate": 0.00010468899521531101, "loss": 1.1752, "step": 547 }, { "epoch": 0.05, "grad_norm": 0.39228288833022, "learning_rate": 0.00010488038277511962, "loss": 1.2562, "step": 548 }, { "epoch": 0.05, "grad_norm": 0.34746795623865406, "learning_rate": 0.00010507177033492824, "loss": 1.2464, "step": 549 }, { "epoch": 0.05, "grad_norm": 0.350081344824654, "learning_rate": 0.00010526315789473685, "loss": 1.0909, "step": 550 }, { "epoch": 0.05, "grad_norm": 0.35258812822080665, "learning_rate": 0.00010545454545454545, "loss": 1.1647, "step": 551 }, { "epoch": 0.05, "grad_norm": 0.3894512796451852, "learning_rate": 0.00010564593301435409, "loss": 1.0903, "step": 552 }, { "epoch": 0.05, "grad_norm": 0.3560681821041678, "learning_rate": 0.00010583732057416268, "loss": 1.0612, "step": 553 }, { "epoch": 0.05, "grad_norm": 0.37166266505359585, "learning_rate": 0.0001060287081339713, "loss": 1.2101, "step": 554 }, { "epoch": 0.05, "grad_norm": 0.36530860942069354, "learning_rate": 0.00010622009569377991, "loss": 1.044, "step": 555 }, { "epoch": 0.05, "grad_norm": 0.33955376662887854, "learning_rate": 0.00010641148325358852, "loss": 1.093, "step": 556 }, { "epoch": 0.05, "grad_norm": 0.31114780875878933, "learning_rate": 0.00010660287081339712, "loss": 1.146, "step": 557 }, { "epoch": 0.05, "grad_norm": 0.3676757652032558, "learning_rate": 0.00010679425837320576, "loss": 1.0386, "step": 558 }, { "epoch": 0.05, "grad_norm": 0.32774291097340136, "learning_rate": 0.00010698564593301437, "loss": 1.0935, "step": 559 }, { "epoch": 0.05, "grad_norm": 0.33490784632156506, "learning_rate": 0.00010717703349282297, "loss": 1.1692, "step": 560 }, { "epoch": 0.05, "grad_norm": 0.3502773499002359, "learning_rate": 0.00010736842105263158, "loss": 1.1153, "step": 561 }, { "epoch": 0.05, "grad_norm": 0.36351558175076165, "learning_rate": 0.0001075598086124402, "loss": 1.0359, "step": 562 }, { "epoch": 0.05, "grad_norm": 0.32317295912626365, "learning_rate": 0.00010775119617224881, "loss": 1.0597, "step": 563 }, { "epoch": 0.05, "grad_norm": 0.37457465032706805, "learning_rate": 0.0001079425837320574, "loss": 1.2346, "step": 564 }, { "epoch": 0.05, "grad_norm": 0.3230784156429463, "learning_rate": 0.00010813397129186604, "loss": 1.0292, "step": 565 }, { "epoch": 0.05, "grad_norm": 0.29938140614694264, "learning_rate": 0.00010832535885167466, "loss": 1.068, "step": 566 }, { "epoch": 0.05, "grad_norm": 0.37830783204331137, "learning_rate": 0.00010851674641148326, "loss": 1.1449, "step": 567 }, { "epoch": 0.05, "grad_norm": 0.36419764707025626, "learning_rate": 0.00010870813397129187, "loss": 1.0981, "step": 568 }, { "epoch": 0.05, "grad_norm": 0.3748154030309419, "learning_rate": 0.00010889952153110048, "loss": 1.2252, "step": 569 }, { "epoch": 0.05, "grad_norm": 0.3368617151990764, "learning_rate": 0.00010909090909090909, "loss": 1.2124, "step": 570 }, { "epoch": 0.05, "grad_norm": 0.37988770907164504, "learning_rate": 0.00010928229665071772, "loss": 1.2043, "step": 571 }, { "epoch": 0.05, "grad_norm": 0.31449121687746323, "learning_rate": 0.00010947368421052633, "loss": 1.1775, "step": 572 }, { "epoch": 0.05, "grad_norm": 0.3568124415520435, "learning_rate": 0.00010966507177033494, "loss": 1.1014, "step": 573 }, { "epoch": 0.05, "grad_norm": 0.3673211031508777, "learning_rate": 0.00010985645933014354, "loss": 1.2182, "step": 574 }, { "epoch": 0.06, "grad_norm": 0.34202187757429514, "learning_rate": 0.00011004784688995215, "loss": 1.0369, "step": 575 }, { "epoch": 0.06, "grad_norm": 0.3905415136176411, "learning_rate": 0.00011023923444976077, "loss": 1.1507, "step": 576 }, { "epoch": 0.06, "grad_norm": 0.3468401555240628, "learning_rate": 0.00011043062200956938, "loss": 1.1642, "step": 577 }, { "epoch": 0.06, "grad_norm": 0.4192072995004005, "learning_rate": 0.000110622009569378, "loss": 1.164, "step": 578 }, { "epoch": 0.06, "grad_norm": 0.379758035778582, "learning_rate": 0.00011081339712918662, "loss": 1.1536, "step": 579 }, { "epoch": 0.06, "grad_norm": 0.41419578240684846, "learning_rate": 0.00011100478468899523, "loss": 1.0945, "step": 580 }, { "epoch": 0.06, "grad_norm": 0.3250523681546178, "learning_rate": 0.00011119617224880383, "loss": 1.099, "step": 581 }, { "epoch": 0.06, "grad_norm": 0.38315025894656307, "learning_rate": 0.00011138755980861244, "loss": 1.1642, "step": 582 }, { "epoch": 0.06, "grad_norm": 0.30382004794249146, "learning_rate": 0.00011157894736842105, "loss": 1.252, "step": 583 }, { "epoch": 0.06, "grad_norm": 0.36873995920664776, "learning_rate": 0.00011177033492822968, "loss": 1.1613, "step": 584 }, { "epoch": 0.06, "grad_norm": 0.40209606862367175, "learning_rate": 0.00011196172248803829, "loss": 1.2187, "step": 585 }, { "epoch": 0.06, "grad_norm": 0.39972092223451644, "learning_rate": 0.0001121531100478469, "loss": 1.1339, "step": 586 }, { "epoch": 0.06, "grad_norm": 0.3581792841100828, "learning_rate": 0.00011234449760765551, "loss": 1.1467, "step": 587 }, { "epoch": 0.06, "grad_norm": 0.33476559121109767, "learning_rate": 0.00011253588516746411, "loss": 1.1809, "step": 588 }, { "epoch": 0.06, "grad_norm": 0.3450568464002908, "learning_rate": 0.00011272727272727272, "loss": 1.1955, "step": 589 }, { "epoch": 0.06, "grad_norm": 0.30713056981495374, "learning_rate": 0.00011291866028708134, "loss": 1.153, "step": 590 }, { "epoch": 0.06, "grad_norm": 0.40056590609091713, "learning_rate": 0.00011311004784688996, "loss": 1.0824, "step": 591 }, { "epoch": 0.06, "grad_norm": 0.32202083458180564, "learning_rate": 0.00011330143540669858, "loss": 1.1039, "step": 592 }, { "epoch": 0.06, "grad_norm": 0.28281909786135145, "learning_rate": 0.00011349282296650719, "loss": 1.1722, "step": 593 }, { "epoch": 0.06, "grad_norm": 0.2930441788185507, "learning_rate": 0.0001136842105263158, "loss": 1.1902, "step": 594 }, { "epoch": 0.06, "grad_norm": 0.3491373061731604, "learning_rate": 0.0001138755980861244, "loss": 1.1247, "step": 595 }, { "epoch": 0.06, "grad_norm": 0.3110622779886572, "learning_rate": 0.00011406698564593301, "loss": 1.2289, "step": 596 }, { "epoch": 0.06, "grad_norm": 0.34519065720613423, "learning_rate": 0.00011425837320574164, "loss": 1.1169, "step": 597 }, { "epoch": 0.06, "grad_norm": 0.3066625621843041, "learning_rate": 0.00011444976076555025, "loss": 1.1645, "step": 598 }, { "epoch": 0.06, "grad_norm": 0.32116731229953854, "learning_rate": 0.00011464114832535886, "loss": 1.0933, "step": 599 }, { "epoch": 0.06, "grad_norm": 0.3511568531959789, "learning_rate": 0.00011483253588516747, "loss": 1.1087, "step": 600 }, { "epoch": 0.06, "grad_norm": 0.32112239871920967, "learning_rate": 0.00011502392344497607, "loss": 1.1406, "step": 601 }, { "epoch": 0.06, "grad_norm": 0.39367325401303266, "learning_rate": 0.00011521531100478468, "loss": 1.1545, "step": 602 }, { "epoch": 0.06, "grad_norm": 0.3392107735520774, "learning_rate": 0.0001154066985645933, "loss": 1.1566, "step": 603 }, { "epoch": 0.06, "grad_norm": 0.35936783606471423, "learning_rate": 0.00011559808612440192, "loss": 1.1135, "step": 604 }, { "epoch": 0.06, "grad_norm": 0.3453223570806925, "learning_rate": 0.00011578947368421053, "loss": 1.1679, "step": 605 }, { "epoch": 0.06, "grad_norm": 0.3988207022091826, "learning_rate": 0.00011598086124401915, "loss": 1.1266, "step": 606 }, { "epoch": 0.06, "grad_norm": 0.35616581701014133, "learning_rate": 0.00011617224880382776, "loss": 1.0747, "step": 607 }, { "epoch": 0.06, "grad_norm": 0.34856430848542924, "learning_rate": 0.00011636363636363636, "loss": 1.1737, "step": 608 }, { "epoch": 0.06, "grad_norm": 0.39749502570874873, "learning_rate": 0.00011655502392344497, "loss": 1.1367, "step": 609 }, { "epoch": 0.06, "grad_norm": 0.3817892480214725, "learning_rate": 0.00011674641148325361, "loss": 1.1423, "step": 610 }, { "epoch": 0.06, "grad_norm": 0.37169774084550616, "learning_rate": 0.00011693779904306221, "loss": 1.2363, "step": 611 }, { "epoch": 0.06, "grad_norm": 0.36680842275104286, "learning_rate": 0.00011712918660287082, "loss": 1.1137, "step": 612 }, { "epoch": 0.06, "grad_norm": 0.30862259202802894, "learning_rate": 0.00011732057416267943, "loss": 1.1156, "step": 613 }, { "epoch": 0.06, "grad_norm": 0.3381253043590406, "learning_rate": 0.00011751196172248804, "loss": 1.1913, "step": 614 }, { "epoch": 0.06, "grad_norm": 0.35640646013161875, "learning_rate": 0.00011770334928229664, "loss": 1.2953, "step": 615 }, { "epoch": 0.06, "grad_norm": 0.3180351478003401, "learning_rate": 0.00011789473684210525, "loss": 1.1151, "step": 616 }, { "epoch": 0.06, "grad_norm": 0.2838039245590443, "learning_rate": 0.0001180861244019139, "loss": 1.1178, "step": 617 }, { "epoch": 0.06, "grad_norm": 0.32734113884885613, "learning_rate": 0.00011827751196172249, "loss": 1.1095, "step": 618 }, { "epoch": 0.06, "grad_norm": 0.4976044214884747, "learning_rate": 0.0001184688995215311, "loss": 1.1425, "step": 619 }, { "epoch": 0.06, "grad_norm": 0.32042584160221055, "learning_rate": 0.00011866028708133972, "loss": 1.0799, "step": 620 }, { "epoch": 0.06, "grad_norm": 0.3258958466495425, "learning_rate": 0.00011885167464114833, "loss": 1.221, "step": 621 }, { "epoch": 0.06, "grad_norm": 0.3112060174372619, "learning_rate": 0.00011904306220095693, "loss": 1.2115, "step": 622 }, { "epoch": 0.06, "grad_norm": 0.30118176636144206, "learning_rate": 0.00011923444976076557, "loss": 1.1361, "step": 623 }, { "epoch": 0.06, "grad_norm": 0.3361051300196263, "learning_rate": 0.00011942583732057418, "loss": 1.191, "step": 624 }, { "epoch": 0.06, "grad_norm": 0.2931267938868614, "learning_rate": 0.00011961722488038278, "loss": 1.0851, "step": 625 }, { "epoch": 0.06, "grad_norm": 0.32113885088697364, "learning_rate": 0.00011980861244019139, "loss": 1.1867, "step": 626 }, { "epoch": 0.06, "grad_norm": 0.34101527118946584, "learning_rate": 0.00012, "loss": 1.1836, "step": 627 }, { "epoch": 0.06, "grad_norm": 0.27834089475655605, "learning_rate": 0.00012019138755980862, "loss": 1.2618, "step": 628 }, { "epoch": 0.06, "grad_norm": 0.3689617429853991, "learning_rate": 0.00012038277511961724, "loss": 1.1289, "step": 629 }, { "epoch": 0.06, "grad_norm": 0.32141982153995574, "learning_rate": 0.00012057416267942585, "loss": 1.245, "step": 630 }, { "epoch": 0.06, "grad_norm": 0.3577846092529067, "learning_rate": 0.00012076555023923447, "loss": 1.1214, "step": 631 }, { "epoch": 0.06, "grad_norm": 0.3547093306453314, "learning_rate": 0.00012095693779904306, "loss": 1.1151, "step": 632 }, { "epoch": 0.06, "grad_norm": 0.3235863986240357, "learning_rate": 0.00012114832535885168, "loss": 1.1431, "step": 633 }, { "epoch": 0.06, "grad_norm": 0.3315145079366769, "learning_rate": 0.00012133971291866029, "loss": 1.1589, "step": 634 }, { "epoch": 0.06, "grad_norm": 0.44616643435072917, "learning_rate": 0.0001215311004784689, "loss": 1.1899, "step": 635 }, { "epoch": 0.06, "grad_norm": 0.34695172958733533, "learning_rate": 0.00012172248803827753, "loss": 1.2423, "step": 636 }, { "epoch": 0.06, "grad_norm": 0.32832699890038897, "learning_rate": 0.00012191387559808614, "loss": 1.0669, "step": 637 }, { "epoch": 0.06, "grad_norm": 0.34250522043183074, "learning_rate": 0.00012210526315789474, "loss": 1.1031, "step": 638 }, { "epoch": 0.06, "grad_norm": 0.3536036436534466, "learning_rate": 0.00012229665071770336, "loss": 1.1833, "step": 639 }, { "epoch": 0.06, "grad_norm": 0.38952860754869895, "learning_rate": 0.00012248803827751196, "loss": 1.2269, "step": 640 }, { "epoch": 0.06, "grad_norm": 0.38372067281414196, "learning_rate": 0.00012267942583732056, "loss": 1.1696, "step": 641 }, { "epoch": 0.06, "grad_norm": 0.36604610363956575, "learning_rate": 0.00012287081339712921, "loss": 1.143, "step": 642 }, { "epoch": 0.06, "grad_norm": 0.29457638902628325, "learning_rate": 0.0001230622009569378, "loss": 1.1118, "step": 643 }, { "epoch": 0.06, "grad_norm": 0.31971053516113995, "learning_rate": 0.0001232535885167464, "loss": 1.2257, "step": 644 }, { "epoch": 0.06, "grad_norm": 0.3479548829091419, "learning_rate": 0.00012344497607655504, "loss": 1.2003, "step": 645 }, { "epoch": 0.06, "grad_norm": 0.39895729607686864, "learning_rate": 0.00012363636363636364, "loss": 1.2362, "step": 646 }, { "epoch": 0.06, "grad_norm": 0.32542242184693576, "learning_rate": 0.00012382775119617226, "loss": 1.1924, "step": 647 }, { "epoch": 0.06, "grad_norm": 0.37946269414290873, "learning_rate": 0.00012401913875598086, "loss": 1.2259, "step": 648 }, { "epoch": 0.06, "grad_norm": 0.37785595963877666, "learning_rate": 0.00012421052631578949, "loss": 1.252, "step": 649 }, { "epoch": 0.06, "grad_norm": 0.39908004119966145, "learning_rate": 0.00012440191387559808, "loss": 1.1444, "step": 650 }, { "epoch": 0.06, "grad_norm": 0.38865614676807153, "learning_rate": 0.0001245933014354067, "loss": 1.2314, "step": 651 }, { "epoch": 0.06, "grad_norm": 0.3204842742106689, "learning_rate": 0.0001247846889952153, "loss": 1.0824, "step": 652 }, { "epoch": 0.06, "grad_norm": 0.35369352498295387, "learning_rate": 0.00012497607655502393, "loss": 1.0264, "step": 653 }, { "epoch": 0.06, "grad_norm": 0.3305618992525529, "learning_rate": 0.00012516746411483253, "loss": 1.1012, "step": 654 }, { "epoch": 0.06, "grad_norm": 0.3757616845139893, "learning_rate": 0.00012535885167464116, "loss": 1.2916, "step": 655 }, { "epoch": 0.06, "grad_norm": 0.32567276622705355, "learning_rate": 0.00012555023923444978, "loss": 1.218, "step": 656 }, { "epoch": 0.06, "grad_norm": 0.30320222866051544, "learning_rate": 0.00012574162679425838, "loss": 1.0551, "step": 657 }, { "epoch": 0.06, "grad_norm": 0.47041450898052456, "learning_rate": 0.00012593301435406698, "loss": 1.2101, "step": 658 }, { "epoch": 0.06, "grad_norm": 0.3265512828583142, "learning_rate": 0.0001261244019138756, "loss": 1.0902, "step": 659 }, { "epoch": 0.06, "grad_norm": 0.3095200661644063, "learning_rate": 0.0001263157894736842, "loss": 1.2483, "step": 660 }, { "epoch": 0.06, "grad_norm": 0.39865694866961127, "learning_rate": 0.0001265071770334928, "loss": 1.0507, "step": 661 }, { "epoch": 0.06, "grad_norm": 0.35606148522081404, "learning_rate": 0.00012669856459330146, "loss": 1.0753, "step": 662 }, { "epoch": 0.06, "grad_norm": 0.3445720816931114, "learning_rate": 0.00012688995215311006, "loss": 1.143, "step": 663 }, { "epoch": 0.06, "grad_norm": 0.3677160012348687, "learning_rate": 0.00012708133971291866, "loss": 1.0608, "step": 664 }, { "epoch": 0.06, "grad_norm": 0.2980120877326159, "learning_rate": 0.00012727272727272728, "loss": 1.0872, "step": 665 }, { "epoch": 0.06, "grad_norm": 0.2896118505469009, "learning_rate": 0.00012746411483253588, "loss": 1.1485, "step": 666 }, { "epoch": 0.06, "grad_norm": 0.3350125319603418, "learning_rate": 0.0001276555023923445, "loss": 1.1236, "step": 667 }, { "epoch": 0.06, "grad_norm": 0.3517268797460554, "learning_rate": 0.00012784688995215313, "loss": 1.1378, "step": 668 }, { "epoch": 0.06, "grad_norm": 0.4220707921759215, "learning_rate": 0.00012803827751196173, "loss": 1.1656, "step": 669 }, { "epoch": 0.06, "grad_norm": 0.3098050517214006, "learning_rate": 0.00012822966507177036, "loss": 1.0732, "step": 670 }, { "epoch": 0.06, "grad_norm": 0.38036416406983276, "learning_rate": 0.00012842105263157895, "loss": 1.1597, "step": 671 }, { "epoch": 0.06, "grad_norm": 0.32201151129472433, "learning_rate": 0.00012861244019138755, "loss": 1.1557, "step": 672 }, { "epoch": 0.06, "grad_norm": 0.3477368553208273, "learning_rate": 0.00012880382775119618, "loss": 1.093, "step": 673 }, { "epoch": 0.06, "grad_norm": 0.33206153473346633, "learning_rate": 0.00012899521531100478, "loss": 1.0872, "step": 674 }, { "epoch": 0.06, "grad_norm": 0.3797973671348287, "learning_rate": 0.0001291866028708134, "loss": 1.1932, "step": 675 }, { "epoch": 0.06, "grad_norm": 0.38021465107794655, "learning_rate": 0.00012937799043062203, "loss": 1.2037, "step": 676 }, { "epoch": 0.06, "grad_norm": 0.40680529142131094, "learning_rate": 0.00012956937799043063, "loss": 1.13, "step": 677 }, { "epoch": 0.06, "grad_norm": 0.3662653154346482, "learning_rate": 0.00012976076555023923, "loss": 1.137, "step": 678 }, { "epoch": 0.06, "grad_norm": 0.389523749301837, "learning_rate": 0.00012995215311004785, "loss": 1.1714, "step": 679 }, { "epoch": 0.07, "grad_norm": 0.33672031522727297, "learning_rate": 0.00013014354066985645, "loss": 1.08, "step": 680 }, { "epoch": 0.07, "grad_norm": 0.3259489924855725, "learning_rate": 0.00013033492822966508, "loss": 1.1592, "step": 681 }, { "epoch": 0.07, "grad_norm": 0.4310205006695421, "learning_rate": 0.0001305263157894737, "loss": 1.1125, "step": 682 }, { "epoch": 0.07, "grad_norm": 0.3354208256542673, "learning_rate": 0.0001307177033492823, "loss": 1.1612, "step": 683 }, { "epoch": 0.07, "grad_norm": 0.33633246177327786, "learning_rate": 0.00013090909090909093, "loss": 1.1075, "step": 684 }, { "epoch": 0.07, "grad_norm": 0.31028161222188255, "learning_rate": 0.00013110047846889953, "loss": 1.1152, "step": 685 }, { "epoch": 0.07, "grad_norm": 0.4524180110599837, "learning_rate": 0.00013129186602870812, "loss": 1.1259, "step": 686 }, { "epoch": 0.07, "grad_norm": 0.3742481866033862, "learning_rate": 0.00013148325358851675, "loss": 1.2494, "step": 687 }, { "epoch": 0.07, "grad_norm": 0.3619012526518613, "learning_rate": 0.00013167464114832538, "loss": 1.101, "step": 688 }, { "epoch": 0.07, "grad_norm": 0.32394020898287806, "learning_rate": 0.00013186602870813397, "loss": 1.1392, "step": 689 }, { "epoch": 0.07, "grad_norm": 0.33391715304609637, "learning_rate": 0.0001320574162679426, "loss": 1.1594, "step": 690 }, { "epoch": 0.07, "grad_norm": 0.28785180948649514, "learning_rate": 0.0001322488038277512, "loss": 1.0496, "step": 691 }, { "epoch": 0.07, "grad_norm": 0.38088138786466363, "learning_rate": 0.0001324401913875598, "loss": 1.065, "step": 692 }, { "epoch": 0.07, "grad_norm": 0.3208927040153503, "learning_rate": 0.00013263157894736842, "loss": 1.1636, "step": 693 }, { "epoch": 0.07, "grad_norm": 0.3033641718971694, "learning_rate": 0.00013282296650717705, "loss": 1.1452, "step": 694 }, { "epoch": 0.07, "grad_norm": 0.2948469058966827, "learning_rate": 0.00013301435406698565, "loss": 1.1589, "step": 695 }, { "epoch": 0.07, "grad_norm": 0.30477569159510964, "learning_rate": 0.00013320574162679427, "loss": 1.181, "step": 696 }, { "epoch": 0.07, "grad_norm": 0.3410300113129024, "learning_rate": 0.00013339712918660287, "loss": 1.1628, "step": 697 }, { "epoch": 0.07, "grad_norm": 0.3058561315902832, "learning_rate": 0.0001335885167464115, "loss": 1.084, "step": 698 }, { "epoch": 0.07, "grad_norm": 0.34051994364928995, "learning_rate": 0.0001337799043062201, "loss": 1.0576, "step": 699 }, { "epoch": 0.07, "grad_norm": 0.3096222099295184, "learning_rate": 0.00013397129186602872, "loss": 1.0741, "step": 700 }, { "epoch": 0.07, "grad_norm": 0.28113955153682396, "learning_rate": 0.00013416267942583732, "loss": 1.1019, "step": 701 }, { "epoch": 0.07, "grad_norm": 0.2956254577619277, "learning_rate": 0.00013435406698564595, "loss": 1.1044, "step": 702 }, { "epoch": 0.07, "grad_norm": 0.31157316700478505, "learning_rate": 0.00013454545454545455, "loss": 1.1694, "step": 703 }, { "epoch": 0.07, "grad_norm": 0.3072975606896904, "learning_rate": 0.00013473684210526317, "loss": 1.1256, "step": 704 }, { "epoch": 0.07, "grad_norm": 0.35422315692846823, "learning_rate": 0.00013492822966507177, "loss": 1.202, "step": 705 }, { "epoch": 0.07, "grad_norm": 0.3039926886966394, "learning_rate": 0.00013511961722488037, "loss": 1.1784, "step": 706 }, { "epoch": 0.07, "grad_norm": 0.272337523028655, "learning_rate": 0.00013531100478468902, "loss": 1.126, "step": 707 }, { "epoch": 0.07, "grad_norm": 0.3396634306821353, "learning_rate": 0.00013550239234449762, "loss": 1.1249, "step": 708 }, { "epoch": 0.07, "grad_norm": 0.32221861659032364, "learning_rate": 0.00013569377990430622, "loss": 1.1312, "step": 709 }, { "epoch": 0.07, "grad_norm": 0.30678629631733856, "learning_rate": 0.00013588516746411485, "loss": 1.1462, "step": 710 }, { "epoch": 0.07, "grad_norm": 0.331787225449244, "learning_rate": 0.00013607655502392344, "loss": 1.1713, "step": 711 }, { "epoch": 0.07, "grad_norm": 0.2617883116352453, "learning_rate": 0.00013626794258373204, "loss": 1.1893, "step": 712 }, { "epoch": 0.07, "grad_norm": 0.34346153319515627, "learning_rate": 0.0001364593301435407, "loss": 1.1891, "step": 713 }, { "epoch": 0.07, "grad_norm": 0.3606280112508664, "learning_rate": 0.0001366507177033493, "loss": 1.3581, "step": 714 }, { "epoch": 0.07, "grad_norm": 0.30602332471532506, "learning_rate": 0.0001368421052631579, "loss": 1.2075, "step": 715 }, { "epoch": 0.07, "grad_norm": 0.2966643308304905, "learning_rate": 0.00013703349282296652, "loss": 1.1248, "step": 716 }, { "epoch": 0.07, "grad_norm": 0.3692677324708085, "learning_rate": 0.00013722488038277512, "loss": 1.1325, "step": 717 }, { "epoch": 0.07, "grad_norm": 0.3153436398786279, "learning_rate": 0.00013741626794258374, "loss": 1.141, "step": 718 }, { "epoch": 0.07, "grad_norm": 0.2993620796785782, "learning_rate": 0.00013760765550239234, "loss": 1.151, "step": 719 }, { "epoch": 0.07, "grad_norm": 0.360199053324579, "learning_rate": 0.00013779904306220097, "loss": 1.1671, "step": 720 }, { "epoch": 0.07, "grad_norm": 0.34616040756962774, "learning_rate": 0.0001379904306220096, "loss": 1.1314, "step": 721 }, { "epoch": 0.07, "grad_norm": 0.32093543405134595, "learning_rate": 0.0001381818181818182, "loss": 1.1017, "step": 722 }, { "epoch": 0.07, "grad_norm": 0.3072115942434, "learning_rate": 0.0001383732057416268, "loss": 1.2032, "step": 723 }, { "epoch": 0.07, "grad_norm": 0.33680085828062, "learning_rate": 0.00013856459330143542, "loss": 1.191, "step": 724 }, { "epoch": 0.07, "grad_norm": 0.27852291513017413, "learning_rate": 0.00013875598086124402, "loss": 1.2035, "step": 725 }, { "epoch": 0.07, "grad_norm": 0.3269080941652961, "learning_rate": 0.00013894736842105264, "loss": 1.1417, "step": 726 }, { "epoch": 0.07, "grad_norm": 0.2911551586198448, "learning_rate": 0.00013913875598086127, "loss": 1.2055, "step": 727 }, { "epoch": 0.07, "grad_norm": 0.3478754660709439, "learning_rate": 0.00013933014354066987, "loss": 1.1967, "step": 728 }, { "epoch": 0.07, "grad_norm": 0.31136552748186935, "learning_rate": 0.00013952153110047846, "loss": 1.1666, "step": 729 }, { "epoch": 0.07, "grad_norm": 0.29853571203421375, "learning_rate": 0.0001397129186602871, "loss": 1.1213, "step": 730 }, { "epoch": 0.07, "grad_norm": 0.29830710212889877, "learning_rate": 0.0001399043062200957, "loss": 1.2283, "step": 731 }, { "epoch": 0.07, "grad_norm": 0.2988658722078936, "learning_rate": 0.00014009569377990431, "loss": 1.2391, "step": 732 }, { "epoch": 0.07, "grad_norm": 0.3453871024753651, "learning_rate": 0.00014028708133971294, "loss": 1.2111, "step": 733 }, { "epoch": 0.07, "grad_norm": 0.3495768486847103, "learning_rate": 0.00014047846889952154, "loss": 1.1027, "step": 734 }, { "epoch": 0.07, "grad_norm": 0.3164024002188871, "learning_rate": 0.00014066985645933016, "loss": 1.1015, "step": 735 }, { "epoch": 0.07, "grad_norm": 0.3044139633248399, "learning_rate": 0.00014086124401913876, "loss": 1.0509, "step": 736 }, { "epoch": 0.07, "grad_norm": 0.281079329494108, "learning_rate": 0.00014105263157894736, "loss": 1.0826, "step": 737 }, { "epoch": 0.07, "grad_norm": 0.30636375000054217, "learning_rate": 0.000141244019138756, "loss": 1.2202, "step": 738 }, { "epoch": 0.07, "grad_norm": 0.29149180784544115, "learning_rate": 0.0001414354066985646, "loss": 1.1551, "step": 739 }, { "epoch": 0.07, "grad_norm": 0.3073819307679817, "learning_rate": 0.0001416267942583732, "loss": 1.2248, "step": 740 }, { "epoch": 0.07, "grad_norm": 0.3217985704338287, "learning_rate": 0.00014181818181818184, "loss": 1.2045, "step": 741 }, { "epoch": 0.07, "grad_norm": 0.3369269229369114, "learning_rate": 0.00014200956937799044, "loss": 1.1671, "step": 742 }, { "epoch": 0.07, "grad_norm": 0.2981155510935532, "learning_rate": 0.00014220095693779904, "loss": 1.1354, "step": 743 }, { "epoch": 0.07, "grad_norm": 0.3002935893022973, "learning_rate": 0.00014239234449760766, "loss": 1.0369, "step": 744 }, { "epoch": 0.07, "grad_norm": 0.3061234355072447, "learning_rate": 0.00014258373205741626, "loss": 1.0122, "step": 745 }, { "epoch": 0.07, "grad_norm": 0.28759317860073835, "learning_rate": 0.00014277511961722489, "loss": 1.0997, "step": 746 }, { "epoch": 0.07, "grad_norm": 0.3064828735905134, "learning_rate": 0.0001429665071770335, "loss": 1.1225, "step": 747 }, { "epoch": 0.07, "grad_norm": 0.3604086247045263, "learning_rate": 0.0001431578947368421, "loss": 1.1635, "step": 748 }, { "epoch": 0.07, "grad_norm": 0.2914789050629064, "learning_rate": 0.0001433492822966507, "loss": 1.1704, "step": 749 }, { "epoch": 0.07, "grad_norm": 0.3105462532363453, "learning_rate": 0.00014354066985645933, "loss": 1.144, "step": 750 }, { "epoch": 0.07, "grad_norm": 0.29242484393022483, "learning_rate": 0.00014373205741626793, "loss": 1.0991, "step": 751 }, { "epoch": 0.07, "grad_norm": 0.3009843941043775, "learning_rate": 0.00014392344497607656, "loss": 1.1409, "step": 752 }, { "epoch": 0.07, "grad_norm": 0.35368948528183997, "learning_rate": 0.00014411483253588518, "loss": 1.0839, "step": 753 }, { "epoch": 0.07, "grad_norm": 0.2908104621427735, "learning_rate": 0.00014430622009569378, "loss": 1.0997, "step": 754 }, { "epoch": 0.07, "grad_norm": 0.2761031765983028, "learning_rate": 0.0001444976076555024, "loss": 1.0389, "step": 755 }, { "epoch": 0.07, "grad_norm": 0.34458640320872364, "learning_rate": 0.000144688995215311, "loss": 1.0666, "step": 756 }, { "epoch": 0.07, "grad_norm": 0.3426791854461418, "learning_rate": 0.0001448803827751196, "loss": 1.0227, "step": 757 }, { "epoch": 0.07, "grad_norm": 0.33484757936373594, "learning_rate": 0.00014507177033492826, "loss": 1.1276, "step": 758 }, { "epoch": 0.07, "grad_norm": 0.31113235116750904, "learning_rate": 0.00014526315789473686, "loss": 1.1435, "step": 759 }, { "epoch": 0.07, "grad_norm": 0.3187397115174627, "learning_rate": 0.00014545454545454546, "loss": 1.2091, "step": 760 }, { "epoch": 0.07, "grad_norm": 0.322859636752832, "learning_rate": 0.00014564593301435408, "loss": 1.1463, "step": 761 }, { "epoch": 0.07, "grad_norm": 0.3054079698873811, "learning_rate": 0.00014583732057416268, "loss": 1.1532, "step": 762 }, { "epoch": 0.07, "grad_norm": 0.37827200804472255, "learning_rate": 0.00014602870813397128, "loss": 1.1536, "step": 763 }, { "epoch": 0.07, "grad_norm": 0.33688004627148077, "learning_rate": 0.0001462200956937799, "loss": 1.1633, "step": 764 }, { "epoch": 0.07, "grad_norm": 0.3057781806456222, "learning_rate": 0.00014641148325358853, "loss": 1.1336, "step": 765 }, { "epoch": 0.07, "grad_norm": 0.3214472678202446, "learning_rate": 0.00014660287081339713, "loss": 1.178, "step": 766 }, { "epoch": 0.07, "grad_norm": 0.3615283182183831, "learning_rate": 0.00014679425837320576, "loss": 1.1158, "step": 767 }, { "epoch": 0.07, "grad_norm": 0.3147571028922824, "learning_rate": 0.00014698564593301435, "loss": 1.131, "step": 768 }, { "epoch": 0.07, "grad_norm": 0.269304950091198, "learning_rate": 0.00014717703349282298, "loss": 1.1885, "step": 769 }, { "epoch": 0.07, "grad_norm": 0.2817004499058875, "learning_rate": 0.00014736842105263158, "loss": 1.0634, "step": 770 }, { "epoch": 0.07, "grad_norm": 0.34677018154047495, "learning_rate": 0.0001475598086124402, "loss": 1.2329, "step": 771 }, { "epoch": 0.07, "grad_norm": 0.33187657643162116, "learning_rate": 0.00014775119617224883, "loss": 1.0673, "step": 772 }, { "epoch": 0.07, "grad_norm": 0.33397275501257906, "learning_rate": 0.00014794258373205743, "loss": 1.0975, "step": 773 }, { "epoch": 0.07, "grad_norm": 0.2977218730080119, "learning_rate": 0.00014813397129186603, "loss": 1.0581, "step": 774 }, { "epoch": 0.07, "grad_norm": 0.3480266756123412, "learning_rate": 0.00014832535885167465, "loss": 1.2395, "step": 775 }, { "epoch": 0.07, "grad_norm": 0.33681513429762355, "learning_rate": 0.00014851674641148325, "loss": 1.1306, "step": 776 }, { "epoch": 0.07, "grad_norm": 0.31749818370425387, "learning_rate": 0.00014870813397129185, "loss": 1.0901, "step": 777 }, { "epoch": 0.07, "grad_norm": 0.33455138564966774, "learning_rate": 0.0001488995215311005, "loss": 1.2032, "step": 778 }, { "epoch": 0.07, "grad_norm": 0.3504419380990198, "learning_rate": 0.0001490909090909091, "loss": 1.0602, "step": 779 }, { "epoch": 0.07, "grad_norm": 0.3023880222584541, "learning_rate": 0.0001492822966507177, "loss": 1.1374, "step": 780 }, { "epoch": 0.07, "grad_norm": 0.5469036927255182, "learning_rate": 0.00014947368421052633, "loss": 1.1802, "step": 781 }, { "epoch": 0.07, "grad_norm": 0.308842167335779, "learning_rate": 0.00014966507177033493, "loss": 1.0936, "step": 782 }, { "epoch": 0.07, "grad_norm": 0.2986359721179498, "learning_rate": 0.00014985645933014355, "loss": 1.1307, "step": 783 }, { "epoch": 0.08, "grad_norm": 0.31664348432490785, "learning_rate": 0.00015004784688995218, "loss": 1.2007, "step": 784 }, { "epoch": 0.08, "grad_norm": 0.265541663614485, "learning_rate": 0.00015023923444976078, "loss": 1.1276, "step": 785 }, { "epoch": 0.08, "grad_norm": 0.3204030653032886, "learning_rate": 0.0001504306220095694, "loss": 1.1439, "step": 786 }, { "epoch": 0.08, "grad_norm": 0.2783784466928858, "learning_rate": 0.000150622009569378, "loss": 1.2329, "step": 787 }, { "epoch": 0.08, "grad_norm": 0.30216230746794037, "learning_rate": 0.0001508133971291866, "loss": 1.1853, "step": 788 }, { "epoch": 0.08, "grad_norm": 0.3085281413718923, "learning_rate": 0.00015100478468899522, "loss": 1.1631, "step": 789 }, { "epoch": 0.08, "grad_norm": 0.3221875710439296, "learning_rate": 0.00015119617224880382, "loss": 1.0776, "step": 790 }, { "epoch": 0.08, "grad_norm": 0.2846073440656778, "learning_rate": 0.00015138755980861245, "loss": 1.1563, "step": 791 }, { "epoch": 0.08, "grad_norm": 0.26550401235533877, "learning_rate": 0.00015157894736842108, "loss": 1.0467, "step": 792 }, { "epoch": 0.08, "grad_norm": 0.37881120245858113, "learning_rate": 0.00015177033492822967, "loss": 1.1699, "step": 793 }, { "epoch": 0.08, "grad_norm": 0.33594596707038277, "learning_rate": 0.00015196172248803827, "loss": 1.1653, "step": 794 }, { "epoch": 0.08, "grad_norm": 0.3233311908095246, "learning_rate": 0.0001521531100478469, "loss": 1.1507, "step": 795 }, { "epoch": 0.08, "grad_norm": 0.267677768320064, "learning_rate": 0.0001523444976076555, "loss": 1.1356, "step": 796 }, { "epoch": 0.08, "grad_norm": 0.29484155965355746, "learning_rate": 0.00015253588516746412, "loss": 1.0681, "step": 797 }, { "epoch": 0.08, "grad_norm": 0.3284519128368135, "learning_rate": 0.00015272727272727275, "loss": 1.0024, "step": 798 }, { "epoch": 0.08, "grad_norm": 0.28701234783478413, "learning_rate": 0.00015291866028708135, "loss": 1.0892, "step": 799 }, { "epoch": 0.08, "grad_norm": 0.31864235047065265, "learning_rate": 0.00015311004784688995, "loss": 1.1727, "step": 800 }, { "epoch": 0.08, "grad_norm": 0.2745618500591329, "learning_rate": 0.00015330143540669857, "loss": 1.0224, "step": 801 }, { "epoch": 0.08, "grad_norm": 0.30047657670046785, "learning_rate": 0.00015349282296650717, "loss": 1.1017, "step": 802 }, { "epoch": 0.08, "grad_norm": 0.2914367942025512, "learning_rate": 0.0001536842105263158, "loss": 1.0163, "step": 803 }, { "epoch": 0.08, "grad_norm": 0.2816221100141218, "learning_rate": 0.00015387559808612442, "loss": 1.1372, "step": 804 }, { "epoch": 0.08, "grad_norm": 0.34419073166680986, "learning_rate": 0.00015406698564593302, "loss": 1.1991, "step": 805 }, { "epoch": 0.08, "grad_norm": 0.2888432316245811, "learning_rate": 0.00015425837320574165, "loss": 1.1627, "step": 806 }, { "epoch": 0.08, "grad_norm": 0.3191302154072048, "learning_rate": 0.00015444976076555024, "loss": 1.2458, "step": 807 }, { "epoch": 0.08, "grad_norm": 0.2727293598902053, "learning_rate": 0.00015464114832535884, "loss": 1.1085, "step": 808 }, { "epoch": 0.08, "grad_norm": 0.3029996217533104, "learning_rate": 0.00015483253588516747, "loss": 1.1319, "step": 809 }, { "epoch": 0.08, "grad_norm": 0.2874429714766323, "learning_rate": 0.0001550239234449761, "loss": 1.1123, "step": 810 }, { "epoch": 0.08, "grad_norm": 0.2665739546686572, "learning_rate": 0.0001552153110047847, "loss": 1.143, "step": 811 }, { "epoch": 0.08, "grad_norm": 0.2958054625397739, "learning_rate": 0.00015540669856459332, "loss": 1.135, "step": 812 }, { "epoch": 0.08, "grad_norm": 0.255412895550101, "learning_rate": 0.00015559808612440192, "loss": 1.1859, "step": 813 }, { "epoch": 0.08, "grad_norm": 0.31547097343732156, "learning_rate": 0.00015578947368421052, "loss": 1.1657, "step": 814 }, { "epoch": 0.08, "grad_norm": 0.34867394487181774, "learning_rate": 0.00015598086124401914, "loss": 1.0777, "step": 815 }, { "epoch": 0.08, "grad_norm": 0.2921678038171233, "learning_rate": 0.00015617224880382774, "loss": 1.0522, "step": 816 }, { "epoch": 0.08, "grad_norm": 0.28919534963089716, "learning_rate": 0.00015636363636363637, "loss": 1.0779, "step": 817 }, { "epoch": 0.08, "grad_norm": 0.3114505303709412, "learning_rate": 0.000156555023923445, "loss": 1.1119, "step": 818 }, { "epoch": 0.08, "grad_norm": 0.3065682691442617, "learning_rate": 0.0001567464114832536, "loss": 1.1352, "step": 819 }, { "epoch": 0.08, "grad_norm": 0.3614199195441891, "learning_rate": 0.00015693779904306222, "loss": 1.1612, "step": 820 }, { "epoch": 0.08, "grad_norm": 0.3101608207788147, "learning_rate": 0.00015712918660287082, "loss": 1.252, "step": 821 }, { "epoch": 0.08, "grad_norm": 0.2975075722366304, "learning_rate": 0.00015732057416267941, "loss": 1.0687, "step": 822 }, { "epoch": 0.08, "grad_norm": 0.2664048730695144, "learning_rate": 0.00015751196172248807, "loss": 1.0832, "step": 823 }, { "epoch": 0.08, "grad_norm": 0.2952527620974602, "learning_rate": 0.00015770334928229667, "loss": 1.1082, "step": 824 }, { "epoch": 0.08, "grad_norm": 0.3295245234429144, "learning_rate": 0.00015789473684210527, "loss": 1.162, "step": 825 }, { "epoch": 0.08, "grad_norm": 0.3102397113238992, "learning_rate": 0.0001580861244019139, "loss": 1.0738, "step": 826 }, { "epoch": 0.08, "grad_norm": 0.2693269386909286, "learning_rate": 0.0001582775119617225, "loss": 1.028, "step": 827 }, { "epoch": 0.08, "grad_norm": 0.3669225930993825, "learning_rate": 0.0001584688995215311, "loss": 1.15, "step": 828 }, { "epoch": 0.08, "grad_norm": 0.29318593683220057, "learning_rate": 0.00015866028708133974, "loss": 1.2408, "step": 829 }, { "epoch": 0.08, "grad_norm": 0.2894248048442511, "learning_rate": 0.00015885167464114834, "loss": 1.1752, "step": 830 }, { "epoch": 0.08, "grad_norm": 0.29125115224083087, "learning_rate": 0.00015904306220095694, "loss": 0.9574, "step": 831 }, { "epoch": 0.08, "grad_norm": 0.40878890760263803, "learning_rate": 0.00015923444976076556, "loss": 1.2104, "step": 832 }, { "epoch": 0.08, "grad_norm": 0.27861032872082103, "learning_rate": 0.00015942583732057416, "loss": 1.1413, "step": 833 }, { "epoch": 0.08, "grad_norm": 0.2982611167661862, "learning_rate": 0.0001596172248803828, "loss": 1.2593, "step": 834 }, { "epoch": 0.08, "grad_norm": 0.29326214489772795, "learning_rate": 0.0001598086124401914, "loss": 1.18, "step": 835 }, { "epoch": 0.08, "grad_norm": 0.27131394008530785, "learning_rate": 0.00016, "loss": 1.1674, "step": 836 }, { "epoch": 0.08, "grad_norm": 0.26712034034407034, "learning_rate": 0.00016019138755980864, "loss": 1.0161, "step": 837 }, { "epoch": 0.08, "grad_norm": 0.36369725154573823, "learning_rate": 0.00016038277511961724, "loss": 1.175, "step": 838 }, { "epoch": 0.08, "grad_norm": 0.3085671724318983, "learning_rate": 0.00016057416267942584, "loss": 1.1461, "step": 839 }, { "epoch": 0.08, "grad_norm": 0.28077141855727894, "learning_rate": 0.00016076555023923446, "loss": 1.0922, "step": 840 }, { "epoch": 0.08, "grad_norm": 0.3270351461507469, "learning_rate": 0.00016095693779904306, "loss": 1.0463, "step": 841 }, { "epoch": 0.08, "grad_norm": 0.23981764247780088, "learning_rate": 0.0001611483253588517, "loss": 0.9635, "step": 842 }, { "epoch": 0.08, "grad_norm": 0.28201419160149344, "learning_rate": 0.0001613397129186603, "loss": 1.1173, "step": 843 }, { "epoch": 0.08, "grad_norm": 0.26889491956006867, "learning_rate": 0.0001615311004784689, "loss": 1.1132, "step": 844 }, { "epoch": 0.08, "grad_norm": 0.27688897066555573, "learning_rate": 0.0001617224880382775, "loss": 1.0963, "step": 845 }, { "epoch": 0.08, "grad_norm": 0.24565660227717426, "learning_rate": 0.00016191387559808614, "loss": 1.0694, "step": 846 }, { "epoch": 0.08, "grad_norm": 0.28311675225629357, "learning_rate": 0.00016210526315789473, "loss": 1.0727, "step": 847 }, { "epoch": 0.08, "grad_norm": 0.25275425247450756, "learning_rate": 0.00016229665071770336, "loss": 1.1726, "step": 848 }, { "epoch": 0.08, "grad_norm": 0.3073349091629191, "learning_rate": 0.00016248803827751199, "loss": 1.237, "step": 849 }, { "epoch": 0.08, "grad_norm": 0.3698105893782691, "learning_rate": 0.00016267942583732058, "loss": 1.1529, "step": 850 }, { "epoch": 0.08, "grad_norm": 0.3066504421764291, "learning_rate": 0.00016287081339712918, "loss": 1.1875, "step": 851 }, { "epoch": 0.08, "grad_norm": 0.2853734077261547, "learning_rate": 0.0001630622009569378, "loss": 1.1232, "step": 852 }, { "epoch": 0.08, "grad_norm": 0.27498683022213083, "learning_rate": 0.0001632535885167464, "loss": 1.2345, "step": 853 }, { "epoch": 0.08, "grad_norm": 0.26436373680139863, "learning_rate": 0.00016344497607655503, "loss": 1.1443, "step": 854 }, { "epoch": 0.08, "grad_norm": 0.29039546604591765, "learning_rate": 0.00016363636363636366, "loss": 1.1451, "step": 855 }, { "epoch": 0.08, "grad_norm": 0.2845332734411919, "learning_rate": 0.00016382775119617226, "loss": 1.1658, "step": 856 }, { "epoch": 0.08, "grad_norm": 0.3118984941168386, "learning_rate": 0.00016401913875598088, "loss": 1.133, "step": 857 }, { "epoch": 0.08, "grad_norm": 0.2910324342007811, "learning_rate": 0.00016421052631578948, "loss": 1.138, "step": 858 }, { "epoch": 0.08, "grad_norm": 0.3067211385198509, "learning_rate": 0.00016440191387559808, "loss": 1.1517, "step": 859 }, { "epoch": 0.08, "grad_norm": 0.290740982507053, "learning_rate": 0.0001645933014354067, "loss": 1.0561, "step": 860 }, { "epoch": 0.08, "grad_norm": 0.3144516777697552, "learning_rate": 0.0001647846889952153, "loss": 1.0661, "step": 861 }, { "epoch": 0.08, "grad_norm": 0.2970636821654555, "learning_rate": 0.00016497607655502393, "loss": 1.1634, "step": 862 }, { "epoch": 0.08, "grad_norm": 0.3146333025319219, "learning_rate": 0.00016516746411483256, "loss": 1.0652, "step": 863 }, { "epoch": 0.08, "grad_norm": 0.2644767264588937, "learning_rate": 0.00016535885167464116, "loss": 1.1516, "step": 864 }, { "epoch": 0.08, "grad_norm": 0.3006840203451009, "learning_rate": 0.00016555023923444975, "loss": 1.1175, "step": 865 }, { "epoch": 0.08, "grad_norm": 0.2809420339644184, "learning_rate": 0.00016574162679425838, "loss": 1.1057, "step": 866 }, { "epoch": 0.08, "grad_norm": 0.3769059520574524, "learning_rate": 0.00016593301435406698, "loss": 1.1713, "step": 867 }, { "epoch": 0.08, "grad_norm": 0.33622542833176833, "learning_rate": 0.0001661244019138756, "loss": 1.2223, "step": 868 }, { "epoch": 0.08, "grad_norm": 0.2715119578007493, "learning_rate": 0.00016631578947368423, "loss": 1.0926, "step": 869 }, { "epoch": 0.08, "grad_norm": 0.2788006611781337, "learning_rate": 0.00016650717703349283, "loss": 1.0598, "step": 870 }, { "epoch": 0.08, "grad_norm": 0.29918887354582546, "learning_rate": 0.00016669856459330145, "loss": 1.1363, "step": 871 }, { "epoch": 0.08, "grad_norm": 0.27116956033088324, "learning_rate": 0.00016688995215311005, "loss": 1.1357, "step": 872 }, { "epoch": 0.08, "grad_norm": 0.3651972053932287, "learning_rate": 0.00016708133971291865, "loss": 1.1862, "step": 873 }, { "epoch": 0.08, "grad_norm": 0.2941314020229377, "learning_rate": 0.00016727272727272728, "loss": 1.229, "step": 874 }, { "epoch": 0.08, "grad_norm": 0.29667386462622886, "learning_rate": 0.0001674641148325359, "loss": 1.105, "step": 875 }, { "epoch": 0.08, "grad_norm": 0.2888327808151174, "learning_rate": 0.0001676555023923445, "loss": 1.1328, "step": 876 }, { "epoch": 0.08, "grad_norm": 0.31564538771648376, "learning_rate": 0.00016784688995215313, "loss": 1.2407, "step": 877 }, { "epoch": 0.08, "grad_norm": 0.31419100780147885, "learning_rate": 0.00016803827751196173, "loss": 1.0472, "step": 878 }, { "epoch": 0.08, "grad_norm": 0.26956520480679047, "learning_rate": 0.00016822966507177033, "loss": 1.1524, "step": 879 }, { "epoch": 0.08, "grad_norm": 0.2726391902939466, "learning_rate": 0.00016842105263157895, "loss": 1.11, "step": 880 }, { "epoch": 0.08, "grad_norm": 0.2993169367221595, "learning_rate": 0.00016861244019138758, "loss": 1.2059, "step": 881 }, { "epoch": 0.08, "grad_norm": 0.2936290798225595, "learning_rate": 0.00016880382775119618, "loss": 1.1026, "step": 882 }, { "epoch": 0.08, "grad_norm": 0.25728140759420537, "learning_rate": 0.0001689952153110048, "loss": 1.136, "step": 883 }, { "epoch": 0.08, "grad_norm": 0.2659884049250215, "learning_rate": 0.0001691866028708134, "loss": 1.0311, "step": 884 }, { "epoch": 0.08, "grad_norm": 0.29849696827544475, "learning_rate": 0.00016937799043062203, "loss": 1.0995, "step": 885 }, { "epoch": 0.08, "grad_norm": 0.28395796526200556, "learning_rate": 0.00016956937799043062, "loss": 1.0948, "step": 886 }, { "epoch": 0.08, "grad_norm": 0.3445355283030851, "learning_rate": 0.00016976076555023925, "loss": 1.1103, "step": 887 }, { "epoch": 0.08, "grad_norm": 0.2538735838657434, "learning_rate": 0.00016995215311004788, "loss": 1.0752, "step": 888 }, { "epoch": 0.09, "grad_norm": 0.3026322031952384, "learning_rate": 0.00017014354066985647, "loss": 1.1359, "step": 889 }, { "epoch": 0.09, "grad_norm": 0.3200296529545863, "learning_rate": 0.00017033492822966507, "loss": 1.1851, "step": 890 }, { "epoch": 0.09, "grad_norm": 0.29333134950174405, "learning_rate": 0.0001705263157894737, "loss": 1.0547, "step": 891 }, { "epoch": 0.09, "grad_norm": 0.3189167751421413, "learning_rate": 0.0001707177033492823, "loss": 1.0605, "step": 892 }, { "epoch": 0.09, "grad_norm": 0.28038685627245685, "learning_rate": 0.0001709090909090909, "loss": 1.1087, "step": 893 }, { "epoch": 0.09, "grad_norm": 0.27600976689482803, "learning_rate": 0.00017110047846889955, "loss": 1.1681, "step": 894 }, { "epoch": 0.09, "grad_norm": 0.30930194872855393, "learning_rate": 0.00017129186602870815, "loss": 1.1325, "step": 895 }, { "epoch": 0.09, "grad_norm": 0.31300532276472537, "learning_rate": 0.00017148325358851675, "loss": 1.1002, "step": 896 }, { "epoch": 0.09, "grad_norm": 0.2991016495421357, "learning_rate": 0.00017167464114832537, "loss": 1.076, "step": 897 }, { "epoch": 0.09, "grad_norm": 0.31594108017658423, "learning_rate": 0.00017186602870813397, "loss": 1.136, "step": 898 }, { "epoch": 0.09, "grad_norm": 0.27497095491638146, "learning_rate": 0.0001720574162679426, "loss": 1.2323, "step": 899 }, { "epoch": 0.09, "grad_norm": 0.310962024257193, "learning_rate": 0.00017224880382775122, "loss": 1.1398, "step": 900 }, { "epoch": 0.09, "grad_norm": 0.29697512814616595, "learning_rate": 0.00017244019138755982, "loss": 1.1342, "step": 901 }, { "epoch": 0.09, "grad_norm": 0.239946301235031, "learning_rate": 0.00017263157894736842, "loss": 1.2081, "step": 902 }, { "epoch": 0.09, "grad_norm": 0.2874130680609666, "learning_rate": 0.00017282296650717705, "loss": 1.125, "step": 903 }, { "epoch": 0.09, "grad_norm": 0.3210023421862061, "learning_rate": 0.00017301435406698564, "loss": 1.2268, "step": 904 }, { "epoch": 0.09, "grad_norm": 0.25985019789372976, "learning_rate": 0.00017320574162679427, "loss": 1.0704, "step": 905 }, { "epoch": 0.09, "grad_norm": 0.28932579761775323, "learning_rate": 0.00017339712918660287, "loss": 1.1939, "step": 906 }, { "epoch": 0.09, "grad_norm": 0.2984436701321717, "learning_rate": 0.0001735885167464115, "loss": 1.1441, "step": 907 }, { "epoch": 0.09, "grad_norm": 0.33279429925895665, "learning_rate": 0.00017377990430622012, "loss": 1.2299, "step": 908 }, { "epoch": 0.09, "grad_norm": 0.28785481315035893, "learning_rate": 0.00017397129186602872, "loss": 1.118, "step": 909 }, { "epoch": 0.09, "grad_norm": 0.31655385538112546, "learning_rate": 0.00017416267942583732, "loss": 1.189, "step": 910 }, { "epoch": 0.09, "grad_norm": 0.3038855880357351, "learning_rate": 0.00017435406698564594, "loss": 1.0654, "step": 911 }, { "epoch": 0.09, "grad_norm": 0.3042729440177485, "learning_rate": 0.00017454545454545454, "loss": 1.1553, "step": 912 }, { "epoch": 0.09, "grad_norm": 0.266680706575244, "learning_rate": 0.00017473684210526317, "loss": 1.1006, "step": 913 }, { "epoch": 0.09, "grad_norm": 0.2741425104907168, "learning_rate": 0.0001749282296650718, "loss": 1.2319, "step": 914 }, { "epoch": 0.09, "grad_norm": 0.2520129822636353, "learning_rate": 0.0001751196172248804, "loss": 1.1394, "step": 915 }, { "epoch": 0.09, "grad_norm": 0.28607836069753895, "learning_rate": 0.000175311004784689, "loss": 1.2108, "step": 916 }, { "epoch": 0.09, "grad_norm": 0.28836296745411716, "learning_rate": 0.00017550239234449762, "loss": 1.0772, "step": 917 }, { "epoch": 0.09, "grad_norm": 0.26291170956841414, "learning_rate": 0.00017569377990430622, "loss": 1.1532, "step": 918 }, { "epoch": 0.09, "grad_norm": 0.27624746568071396, "learning_rate": 0.00017588516746411484, "loss": 1.1178, "step": 919 }, { "epoch": 0.09, "grad_norm": 0.28200386776822395, "learning_rate": 0.00017607655502392347, "loss": 1.1105, "step": 920 }, { "epoch": 0.09, "grad_norm": 0.25462518315632554, "learning_rate": 0.00017626794258373207, "loss": 1.0717, "step": 921 }, { "epoch": 0.09, "grad_norm": 0.27932944411599797, "learning_rate": 0.0001764593301435407, "loss": 1.2486, "step": 922 }, { "epoch": 0.09, "grad_norm": 0.29462379215808215, "learning_rate": 0.0001766507177033493, "loss": 1.226, "step": 923 }, { "epoch": 0.09, "grad_norm": 0.2741976731865599, "learning_rate": 0.0001768421052631579, "loss": 1.1797, "step": 924 }, { "epoch": 0.09, "grad_norm": 0.2532434659032646, "learning_rate": 0.00017703349282296652, "loss": 1.0828, "step": 925 }, { "epoch": 0.09, "grad_norm": 0.329346060797211, "learning_rate": 0.00017722488038277514, "loss": 1.1125, "step": 926 }, { "epoch": 0.09, "grad_norm": 0.2644644824352827, "learning_rate": 0.00017741626794258374, "loss": 1.1048, "step": 927 }, { "epoch": 0.09, "grad_norm": 0.2617940651450908, "learning_rate": 0.00017760765550239237, "loss": 1.2178, "step": 928 }, { "epoch": 0.09, "grad_norm": 0.29432756373678265, "learning_rate": 0.00017779904306220096, "loss": 1.1336, "step": 929 }, { "epoch": 0.09, "grad_norm": 0.28911304731696175, "learning_rate": 0.00017799043062200956, "loss": 1.1578, "step": 930 }, { "epoch": 0.09, "grad_norm": 0.3006870934673598, "learning_rate": 0.0001781818181818182, "loss": 1.0588, "step": 931 }, { "epoch": 0.09, "grad_norm": 0.31210608325092193, "learning_rate": 0.0001783732057416268, "loss": 1.2426, "step": 932 }, { "epoch": 0.09, "grad_norm": 0.27626145357478726, "learning_rate": 0.0001785645933014354, "loss": 1.1609, "step": 933 }, { "epoch": 0.09, "grad_norm": 0.2683905399507039, "learning_rate": 0.00017875598086124404, "loss": 1.1457, "step": 934 }, { "epoch": 0.09, "grad_norm": 0.2661353870666551, "learning_rate": 0.00017894736842105264, "loss": 1.2095, "step": 935 }, { "epoch": 0.09, "grad_norm": 0.33062559297582395, "learning_rate": 0.00017913875598086126, "loss": 1.2396, "step": 936 }, { "epoch": 0.09, "grad_norm": 0.26950737804952357, "learning_rate": 0.00017933014354066986, "loss": 1.1983, "step": 937 }, { "epoch": 0.09, "grad_norm": 0.29499843784362234, "learning_rate": 0.00017952153110047846, "loss": 1.2442, "step": 938 }, { "epoch": 0.09, "grad_norm": 0.31186904072609634, "learning_rate": 0.00017971291866028709, "loss": 1.1494, "step": 939 }, { "epoch": 0.09, "grad_norm": 0.2514154775647367, "learning_rate": 0.0001799043062200957, "loss": 1.1061, "step": 940 }, { "epoch": 0.09, "grad_norm": 0.28595401946483395, "learning_rate": 0.0001800956937799043, "loss": 1.0322, "step": 941 }, { "epoch": 0.09, "grad_norm": 0.32459069445525873, "learning_rate": 0.00018028708133971294, "loss": 1.2007, "step": 942 }, { "epoch": 0.09, "grad_norm": 0.31789984803696647, "learning_rate": 0.00018047846889952154, "loss": 1.1057, "step": 943 }, { "epoch": 0.09, "grad_norm": 0.2893543986536651, "learning_rate": 0.00018066985645933013, "loss": 1.1108, "step": 944 }, { "epoch": 0.09, "grad_norm": 0.225754938363265, "learning_rate": 0.00018086124401913876, "loss": 1.0842, "step": 945 }, { "epoch": 0.09, "grad_norm": 0.3473860341063463, "learning_rate": 0.00018105263157894739, "loss": 1.0824, "step": 946 }, { "epoch": 0.09, "grad_norm": 0.2922950981615233, "learning_rate": 0.00018124401913875598, "loss": 1.1143, "step": 947 }, { "epoch": 0.09, "grad_norm": 0.29161352434420446, "learning_rate": 0.0001814354066985646, "loss": 1.2325, "step": 948 }, { "epoch": 0.09, "grad_norm": 0.29481985803408, "learning_rate": 0.0001816267942583732, "loss": 1.1527, "step": 949 }, { "epoch": 0.09, "grad_norm": 0.2874729386092549, "learning_rate": 0.00018181818181818183, "loss": 1.1476, "step": 950 }, { "epoch": 0.09, "grad_norm": 0.24921692256090058, "learning_rate": 0.00018200956937799043, "loss": 1.1567, "step": 951 }, { "epoch": 0.09, "grad_norm": 0.31742487463024494, "learning_rate": 0.00018220095693779906, "loss": 1.1013, "step": 952 }, { "epoch": 0.09, "grad_norm": 0.27456172247325683, "learning_rate": 0.00018239234449760766, "loss": 1.2069, "step": 953 }, { "epoch": 0.09, "grad_norm": 0.28859064993450634, "learning_rate": 0.00018258373205741628, "loss": 1.2123, "step": 954 }, { "epoch": 0.09, "grad_norm": 0.2750397123362856, "learning_rate": 0.00018277511961722488, "loss": 1.1231, "step": 955 }, { "epoch": 0.09, "grad_norm": 0.23915748062608722, "learning_rate": 0.0001829665071770335, "loss": 1.1612, "step": 956 }, { "epoch": 0.09, "grad_norm": 0.2722479832588104, "learning_rate": 0.0001831578947368421, "loss": 1.1897, "step": 957 }, { "epoch": 0.09, "grad_norm": 0.2863352675014705, "learning_rate": 0.00018334928229665073, "loss": 1.1062, "step": 958 }, { "epoch": 0.09, "grad_norm": 0.25258807482282203, "learning_rate": 0.00018354066985645936, "loss": 1.1574, "step": 959 }, { "epoch": 0.09, "grad_norm": 0.27455220282180104, "learning_rate": 0.00018373205741626796, "loss": 1.0886, "step": 960 }, { "epoch": 0.09, "grad_norm": 0.2693559937731345, "learning_rate": 0.00018392344497607656, "loss": 1.1873, "step": 961 }, { "epoch": 0.09, "grad_norm": 0.2487980816971801, "learning_rate": 0.00018411483253588518, "loss": 1.2245, "step": 962 }, { "epoch": 0.09, "grad_norm": 0.6732577941816555, "learning_rate": 0.00018430622009569378, "loss": 1.1813, "step": 963 }, { "epoch": 0.09, "grad_norm": 0.2912759304052633, "learning_rate": 0.00018449760765550238, "loss": 1.168, "step": 964 }, { "epoch": 0.09, "grad_norm": 0.26989856763778836, "learning_rate": 0.00018468899521531103, "loss": 1.0137, "step": 965 }, { "epoch": 0.09, "grad_norm": 0.25602835842131616, "learning_rate": 0.00018488038277511963, "loss": 1.1879, "step": 966 }, { "epoch": 0.09, "grad_norm": 0.25725078226468107, "learning_rate": 0.00018507177033492823, "loss": 1.174, "step": 967 }, { "epoch": 0.09, "grad_norm": 0.27889203556658276, "learning_rate": 0.00018526315789473685, "loss": 1.2013, "step": 968 }, { "epoch": 0.09, "grad_norm": 0.28462011286220296, "learning_rate": 0.00018545454545454545, "loss": 1.0878, "step": 969 }, { "epoch": 0.09, "grad_norm": 0.27682759364760257, "learning_rate": 0.00018564593301435408, "loss": 1.085, "step": 970 }, { "epoch": 0.09, "grad_norm": 0.28136179421463786, "learning_rate": 0.0001858373205741627, "loss": 1.1807, "step": 971 }, { "epoch": 0.09, "grad_norm": 0.2506017584700625, "learning_rate": 0.0001860287081339713, "loss": 1.1538, "step": 972 }, { "epoch": 0.09, "grad_norm": 0.2681898339952538, "learning_rate": 0.00018622009569377993, "loss": 1.0615, "step": 973 }, { "epoch": 0.09, "grad_norm": 0.2838246571307257, "learning_rate": 0.00018641148325358853, "loss": 1.1778, "step": 974 }, { "epoch": 0.09, "grad_norm": 0.2758038504041395, "learning_rate": 0.00018660287081339713, "loss": 1.1038, "step": 975 }, { "epoch": 0.09, "grad_norm": 0.28266068816982276, "learning_rate": 0.00018679425837320575, "loss": 1.1487, "step": 976 }, { "epoch": 0.09, "grad_norm": 0.2655825547541941, "learning_rate": 0.00018698564593301435, "loss": 1.0846, "step": 977 }, { "epoch": 0.09, "grad_norm": 0.2750864417199089, "learning_rate": 0.00018717703349282298, "loss": 1.0925, "step": 978 }, { "epoch": 0.09, "grad_norm": 0.28328763891237363, "learning_rate": 0.0001873684210526316, "loss": 1.1602, "step": 979 }, { "epoch": 0.09, "grad_norm": 0.274427495879147, "learning_rate": 0.0001875598086124402, "loss": 1.0184, "step": 980 }, { "epoch": 0.09, "grad_norm": 0.29677822769592865, "learning_rate": 0.0001877511961722488, "loss": 1.1316, "step": 981 }, { "epoch": 0.09, "grad_norm": 0.35675044865453487, "learning_rate": 0.00018794258373205743, "loss": 1.1299, "step": 982 }, { "epoch": 0.09, "grad_norm": 0.27471990871455726, "learning_rate": 0.00018813397129186602, "loss": 1.0977, "step": 983 }, { "epoch": 0.09, "grad_norm": 0.32677576558264015, "learning_rate": 0.00018832535885167465, "loss": 1.0416, "step": 984 }, { "epoch": 0.09, "grad_norm": 0.3449420887466517, "learning_rate": 0.00018851674641148328, "loss": 1.218, "step": 985 }, { "epoch": 0.09, "grad_norm": 0.3187055721961639, "learning_rate": 0.00018870813397129187, "loss": 1.1091, "step": 986 }, { "epoch": 0.09, "grad_norm": 0.3143792697319127, "learning_rate": 0.0001888995215311005, "loss": 1.067, "step": 987 }, { "epoch": 0.09, "grad_norm": 0.2742909947428014, "learning_rate": 0.0001890909090909091, "loss": 1.225, "step": 988 }, { "epoch": 0.09, "grad_norm": 0.27319677319302543, "learning_rate": 0.0001892822966507177, "loss": 1.1487, "step": 989 }, { "epoch": 0.09, "grad_norm": 0.2758157497549949, "learning_rate": 0.00018947368421052632, "loss": 1.1109, "step": 990 }, { "epoch": 0.09, "grad_norm": 0.2574079506381213, "learning_rate": 0.00018966507177033495, "loss": 1.1476, "step": 991 }, { "epoch": 0.09, "grad_norm": 0.332702187603211, "learning_rate": 0.00018985645933014355, "loss": 1.0896, "step": 992 }, { "epoch": 0.1, "grad_norm": 0.2861721583962, "learning_rate": 0.00019004784688995217, "loss": 1.2234, "step": 993 }, { "epoch": 0.1, "grad_norm": 0.2467936267051518, "learning_rate": 0.00019023923444976077, "loss": 1.1656, "step": 994 }, { "epoch": 0.1, "grad_norm": 0.28781558903434595, "learning_rate": 0.00019043062200956937, "loss": 1.1853, "step": 995 }, { "epoch": 0.1, "grad_norm": 0.2916455357271407, "learning_rate": 0.000190622009569378, "loss": 1.0269, "step": 996 }, { "epoch": 0.1, "grad_norm": 0.3201870144576391, "learning_rate": 0.00019081339712918662, "loss": 1.1852, "step": 997 }, { "epoch": 0.1, "grad_norm": 0.27545254213477577, "learning_rate": 0.00019100478468899522, "loss": 1.0957, "step": 998 }, { "epoch": 0.1, "grad_norm": 0.2826496819385951, "learning_rate": 0.00019119617224880385, "loss": 1.2255, "step": 999 }, { "epoch": 0.1, "grad_norm": 0.2967102485192698, "learning_rate": 0.00019138755980861245, "loss": 1.1536, "step": 1000 }, { "epoch": 0.1, "grad_norm": 0.29117608778714893, "learning_rate": 0.00019157894736842104, "loss": 1.0878, "step": 1001 }, { "epoch": 0.1, "grad_norm": 0.28851304804169287, "learning_rate": 0.00019177033492822967, "loss": 1.0898, "step": 1002 }, { "epoch": 0.1, "grad_norm": 0.27111717804566754, "learning_rate": 0.00019196172248803827, "loss": 1.2214, "step": 1003 }, { "epoch": 0.1, "grad_norm": 0.29632228590140464, "learning_rate": 0.0001921531100478469, "loss": 1.1534, "step": 1004 }, { "epoch": 0.1, "grad_norm": 0.30166486227944156, "learning_rate": 0.00019234449760765552, "loss": 1.1784, "step": 1005 }, { "epoch": 0.1, "grad_norm": 0.261168294050402, "learning_rate": 0.00019253588516746412, "loss": 1.2274, "step": 1006 }, { "epoch": 0.1, "grad_norm": 0.2696524388115216, "learning_rate": 0.00019272727272727274, "loss": 1.1256, "step": 1007 }, { "epoch": 0.1, "grad_norm": 0.30883168940001077, "learning_rate": 0.00019291866028708134, "loss": 1.1804, "step": 1008 }, { "epoch": 0.1, "grad_norm": 0.2901725454324794, "learning_rate": 0.00019311004784688994, "loss": 1.0656, "step": 1009 }, { "epoch": 0.1, "grad_norm": 0.30050679633218647, "learning_rate": 0.0001933014354066986, "loss": 1.1217, "step": 1010 }, { "epoch": 0.1, "grad_norm": 0.2763711001518656, "learning_rate": 0.0001934928229665072, "loss": 1.2114, "step": 1011 }, { "epoch": 0.1, "grad_norm": 0.2676109407157463, "learning_rate": 0.0001936842105263158, "loss": 1.0474, "step": 1012 }, { "epoch": 0.1, "grad_norm": 0.2747480845011328, "learning_rate": 0.00019387559808612442, "loss": 1.038, "step": 1013 }, { "epoch": 0.1, "grad_norm": 0.24960295337688276, "learning_rate": 0.00019406698564593302, "loss": 1.0625, "step": 1014 }, { "epoch": 0.1, "grad_norm": 0.2721591800223072, "learning_rate": 0.00019425837320574162, "loss": 1.1327, "step": 1015 }, { "epoch": 0.1, "grad_norm": 0.2877329511310855, "learning_rate": 0.00019444976076555027, "loss": 1.228, "step": 1016 }, { "epoch": 0.1, "grad_norm": 0.2568028077694964, "learning_rate": 0.00019464114832535887, "loss": 1.0683, "step": 1017 }, { "epoch": 0.1, "grad_norm": 0.2678405294971607, "learning_rate": 0.00019483253588516747, "loss": 1.1125, "step": 1018 }, { "epoch": 0.1, "grad_norm": 0.2963652522200652, "learning_rate": 0.0001950239234449761, "loss": 1.0905, "step": 1019 }, { "epoch": 0.1, "grad_norm": 0.26009393679319537, "learning_rate": 0.0001952153110047847, "loss": 1.1036, "step": 1020 }, { "epoch": 0.1, "grad_norm": 0.3049720818580699, "learning_rate": 0.00019540669856459332, "loss": 1.1964, "step": 1021 }, { "epoch": 0.1, "grad_norm": 0.3050130613963167, "learning_rate": 0.00019559808612440191, "loss": 1.1293, "step": 1022 }, { "epoch": 0.1, "grad_norm": 0.24297369971258104, "learning_rate": 0.00019578947368421054, "loss": 1.1143, "step": 1023 }, { "epoch": 0.1, "grad_norm": 0.24077286684290172, "learning_rate": 0.00019598086124401917, "loss": 1.0764, "step": 1024 }, { "epoch": 0.1, "grad_norm": 0.3113100418888948, "learning_rate": 0.00019617224880382777, "loss": 1.1246, "step": 1025 }, { "epoch": 0.1, "grad_norm": 0.2784731985247703, "learning_rate": 0.00019636363636363636, "loss": 1.0998, "step": 1026 }, { "epoch": 0.1, "grad_norm": 0.2542533680624268, "learning_rate": 0.000196555023923445, "loss": 1.114, "step": 1027 }, { "epoch": 0.1, "grad_norm": 0.28332309977048276, "learning_rate": 0.0001967464114832536, "loss": 1.1719, "step": 1028 }, { "epoch": 0.1, "grad_norm": 0.25261282572279636, "learning_rate": 0.00019693779904306221, "loss": 1.1069, "step": 1029 }, { "epoch": 0.1, "grad_norm": 0.28908512950153364, "learning_rate": 0.00019712918660287084, "loss": 1.0939, "step": 1030 }, { "epoch": 0.1, "grad_norm": 0.2624681443069945, "learning_rate": 0.00019732057416267944, "loss": 1.1426, "step": 1031 }, { "epoch": 0.1, "grad_norm": 0.26954809036931093, "learning_rate": 0.00019751196172248804, "loss": 1.0777, "step": 1032 }, { "epoch": 0.1, "grad_norm": 0.2921435580998635, "learning_rate": 0.00019770334928229666, "loss": 1.0915, "step": 1033 }, { "epoch": 0.1, "grad_norm": 0.21257005701595452, "learning_rate": 0.00019789473684210526, "loss": 1.1055, "step": 1034 }, { "epoch": 0.1, "grad_norm": 0.27523674720420943, "learning_rate": 0.0001980861244019139, "loss": 1.0515, "step": 1035 }, { "epoch": 0.1, "grad_norm": 0.26415508019617007, "learning_rate": 0.0001982775119617225, "loss": 1.0515, "step": 1036 }, { "epoch": 0.1, "grad_norm": 0.25592610307218705, "learning_rate": 0.0001984688995215311, "loss": 1.1751, "step": 1037 }, { "epoch": 0.1, "grad_norm": 0.25430310175648296, "learning_rate": 0.00019866028708133974, "loss": 1.123, "step": 1038 }, { "epoch": 0.1, "grad_norm": 0.2861528947212422, "learning_rate": 0.00019885167464114834, "loss": 1.0859, "step": 1039 }, { "epoch": 0.1, "grad_norm": 0.2738046774076065, "learning_rate": 0.00019904306220095693, "loss": 1.1405, "step": 1040 }, { "epoch": 0.1, "grad_norm": 0.2726143048105954, "learning_rate": 0.00019923444976076556, "loss": 1.264, "step": 1041 }, { "epoch": 0.1, "grad_norm": 0.27872152562297303, "learning_rate": 0.0001994258373205742, "loss": 1.3155, "step": 1042 }, { "epoch": 0.1, "grad_norm": 0.2852218650666301, "learning_rate": 0.00019961722488038279, "loss": 1.1655, "step": 1043 }, { "epoch": 0.1, "grad_norm": 0.23588395214808744, "learning_rate": 0.0001998086124401914, "loss": 1.0397, "step": 1044 }, { "epoch": 0.1, "grad_norm": 0.2666368869674148, "learning_rate": 0.0002, "loss": 1.1416, "step": 1045 }, { "epoch": 0.1, "grad_norm": 0.2930022628207633, "learning_rate": 0.00019999999874871857, "loss": 1.0405, "step": 1046 }, { "epoch": 0.1, "grad_norm": 0.2710774549293637, "learning_rate": 0.00019999999499487433, "loss": 1.1506, "step": 1047 }, { "epoch": 0.1, "grad_norm": 0.25385995300238745, "learning_rate": 0.00019999998873846737, "loss": 1.267, "step": 1048 }, { "epoch": 0.1, "grad_norm": 0.27774997441775784, "learning_rate": 0.00019999997997949785, "loss": 1.1939, "step": 1049 }, { "epoch": 0.1, "grad_norm": 0.26076832391011084, "learning_rate": 0.00019999996871796597, "loss": 1.1117, "step": 1050 }, { "epoch": 0.1, "grad_norm": 0.235868498103916, "learning_rate": 0.00019999995495387202, "loss": 1.1701, "step": 1051 }, { "epoch": 0.1, "grad_norm": 0.24077986955464514, "learning_rate": 0.00019999993868721638, "loss": 1.137, "step": 1052 }, { "epoch": 0.1, "grad_norm": 0.2512166776788439, "learning_rate": 0.0001999999199179994, "loss": 1.1414, "step": 1053 }, { "epoch": 0.1, "grad_norm": 0.2654407191870313, "learning_rate": 0.00019999989864622159, "loss": 1.0333, "step": 1054 }, { "epoch": 0.1, "grad_norm": 0.2642901952276395, "learning_rate": 0.00019999987487188348, "loss": 1.1385, "step": 1055 }, { "epoch": 0.1, "grad_norm": 0.23723222653673273, "learning_rate": 0.00019999984859498562, "loss": 1.1103, "step": 1056 }, { "epoch": 0.1, "grad_norm": 0.2576508658140534, "learning_rate": 0.00019999981981552872, "loss": 1.0641, "step": 1057 }, { "epoch": 0.1, "grad_norm": 0.2512702002371694, "learning_rate": 0.00019999978853351346, "loss": 1.1742, "step": 1058 }, { "epoch": 0.1, "grad_norm": 0.2544113883838849, "learning_rate": 0.0001999997547489407, "loss": 1.249, "step": 1059 }, { "epoch": 0.1, "grad_norm": 0.29453181610522905, "learning_rate": 0.00019999971846181117, "loss": 1.0817, "step": 1060 }, { "epoch": 0.1, "grad_norm": 0.3046660451518799, "learning_rate": 0.00019999967967212587, "loss": 1.1202, "step": 1061 }, { "epoch": 0.1, "grad_norm": 0.2711240525076058, "learning_rate": 0.0001999996383798857, "loss": 1.1488, "step": 1062 }, { "epoch": 0.1, "grad_norm": 0.26069781523913904, "learning_rate": 0.0001999995945850918, "loss": 1.1204, "step": 1063 }, { "epoch": 0.1, "grad_norm": 0.2759936162347398, "learning_rate": 0.00019999954828774514, "loss": 1.2418, "step": 1064 }, { "epoch": 0.1, "grad_norm": 0.32559445650786323, "learning_rate": 0.00019999949948784696, "loss": 1.0396, "step": 1065 }, { "epoch": 0.1, "grad_norm": 0.29446236701732303, "learning_rate": 0.00019999944818539843, "loss": 0.9828, "step": 1066 }, { "epoch": 0.1, "grad_norm": 0.2512414839667447, "learning_rate": 0.00019999939438040092, "loss": 1.0964, "step": 1067 }, { "epoch": 0.1, "grad_norm": 0.2522166834487399, "learning_rate": 0.00019999933807285567, "loss": 1.1991, "step": 1068 }, { "epoch": 0.1, "grad_norm": 0.2776488352071124, "learning_rate": 0.00019999927926276417, "loss": 1.2357, "step": 1069 }, { "epoch": 0.1, "grad_norm": 0.3107756422517976, "learning_rate": 0.00019999921795012783, "loss": 1.1875, "step": 1070 }, { "epoch": 0.1, "grad_norm": 0.23140904026513692, "learning_rate": 0.00019999915413494823, "loss": 1.0612, "step": 1071 }, { "epoch": 0.1, "grad_norm": 0.28830876945790945, "learning_rate": 0.00019999908781722693, "loss": 1.0622, "step": 1072 }, { "epoch": 0.1, "grad_norm": 0.24641652710238304, "learning_rate": 0.00019999901899696564, "loss": 1.1553, "step": 1073 }, { "epoch": 0.1, "grad_norm": 0.3285726477920543, "learning_rate": 0.00019999894767416603, "loss": 1.1287, "step": 1074 }, { "epoch": 0.1, "grad_norm": 0.2868313621923491, "learning_rate": 0.00019999887384882992, "loss": 1.1679, "step": 1075 }, { "epoch": 0.1, "grad_norm": 0.2888935086026084, "learning_rate": 0.00019999879752095914, "loss": 1.078, "step": 1076 }, { "epoch": 0.1, "grad_norm": 0.25581751198117825, "learning_rate": 0.0001999987186905556, "loss": 1.1637, "step": 1077 }, { "epoch": 0.1, "grad_norm": 0.28668728467855203, "learning_rate": 0.0001999986373576213, "loss": 1.1557, "step": 1078 }, { "epoch": 0.1, "grad_norm": 0.2858855815822476, "learning_rate": 0.00019999855352215824, "loss": 1.1543, "step": 1079 }, { "epoch": 0.1, "grad_norm": 0.24866679351848656, "learning_rate": 0.0001999984671841685, "loss": 1.088, "step": 1080 }, { "epoch": 0.1, "grad_norm": 0.2802988000108613, "learning_rate": 0.00019999837834365432, "loss": 1.1045, "step": 1081 }, { "epoch": 0.1, "grad_norm": 0.28290564790646, "learning_rate": 0.00019999828700061786, "loss": 1.1013, "step": 1082 }, { "epoch": 0.1, "grad_norm": 0.2670174309559056, "learning_rate": 0.0001999981931550614, "loss": 1.0202, "step": 1083 }, { "epoch": 0.1, "grad_norm": 0.29742334914408336, "learning_rate": 0.00019999809680698734, "loss": 1.1634, "step": 1084 }, { "epoch": 0.1, "grad_norm": 0.2613603430268145, "learning_rate": 0.00019999799795639804, "loss": 1.1906, "step": 1085 }, { "epoch": 0.1, "grad_norm": 0.2368844788947555, "learning_rate": 0.000199997896603296, "loss": 1.1789, "step": 1086 }, { "epoch": 0.1, "grad_norm": 0.28495588713062425, "learning_rate": 0.00019999779274768376, "loss": 1.1759, "step": 1087 }, { "epoch": 0.1, "grad_norm": 0.2822715528001476, "learning_rate": 0.0001999976863895639, "loss": 1.0508, "step": 1088 }, { "epoch": 0.1, "grad_norm": 0.28574997489486803, "learning_rate": 0.0001999975775289391, "loss": 1.1224, "step": 1089 }, { "epoch": 0.1, "grad_norm": 0.261898017632014, "learning_rate": 0.00019999746616581208, "loss": 1.1035, "step": 1090 }, { "epoch": 0.1, "grad_norm": 0.28859851706983464, "learning_rate": 0.00019999735230018562, "loss": 1.1726, "step": 1091 }, { "epoch": 0.1, "grad_norm": 0.26017489671317706, "learning_rate": 0.00019999723593206256, "loss": 1.0777, "step": 1092 }, { "epoch": 0.1, "grad_norm": 0.2808346316808804, "learning_rate": 0.00019999711706144584, "loss": 1.1169, "step": 1093 }, { "epoch": 0.1, "grad_norm": 0.26961618081539596, "learning_rate": 0.0001999969956883384, "loss": 1.1525, "step": 1094 }, { "epoch": 0.1, "grad_norm": 0.23998423085029977, "learning_rate": 0.0001999968718127433, "loss": 1.1088, "step": 1095 }, { "epoch": 0.1, "grad_norm": 0.27133623294565795, "learning_rate": 0.00019999674543466368, "loss": 1.0187, "step": 1096 }, { "epoch": 0.1, "grad_norm": 0.28888071684005906, "learning_rate": 0.00019999661655410261, "loss": 1.0476, "step": 1097 }, { "epoch": 0.11, "grad_norm": 0.26874358976360063, "learning_rate": 0.0001999964851710634, "loss": 1.1429, "step": 1098 }, { "epoch": 0.11, "grad_norm": 0.2803133560421145, "learning_rate": 0.0001999963512855493, "loss": 1.2504, "step": 1099 }, { "epoch": 0.11, "grad_norm": 0.2913063287806824, "learning_rate": 0.00019999621489756364, "loss": 1.1604, "step": 1100 }, { "epoch": 0.11, "grad_norm": 0.27440490495841235, "learning_rate": 0.00019999607600710984, "loss": 1.1517, "step": 1101 }, { "epoch": 0.11, "grad_norm": 0.277572309820751, "learning_rate": 0.00019999593461419144, "loss": 1.0957, "step": 1102 }, { "epoch": 0.11, "grad_norm": 0.3075924298382781, "learning_rate": 0.0001999957907188119, "loss": 1.142, "step": 1103 }, { "epoch": 0.11, "grad_norm": 0.23985413219751897, "learning_rate": 0.00019999564432097487, "loss": 1.1932, "step": 1104 }, { "epoch": 0.11, "grad_norm": 0.2408338302884486, "learning_rate": 0.00019999549542068395, "loss": 1.0735, "step": 1105 }, { "epoch": 0.11, "grad_norm": 0.26874856387294116, "learning_rate": 0.00019999534401794297, "loss": 1.1553, "step": 1106 }, { "epoch": 0.11, "grad_norm": 0.3014584853984502, "learning_rate": 0.00019999519011275566, "loss": 1.1655, "step": 1107 }, { "epoch": 0.11, "grad_norm": 0.2843833242046219, "learning_rate": 0.00019999503370512583, "loss": 1.1877, "step": 1108 }, { "epoch": 0.11, "grad_norm": 0.2512315616335756, "learning_rate": 0.00019999487479505746, "loss": 1.2, "step": 1109 }, { "epoch": 0.11, "grad_norm": 0.3854687733857706, "learning_rate": 0.00019999471338255452, "loss": 1.1755, "step": 1110 }, { "epoch": 0.11, "grad_norm": 0.2957638364283729, "learning_rate": 0.00019999454946762103, "loss": 1.1496, "step": 1111 }, { "epoch": 0.11, "grad_norm": 0.2866505879252708, "learning_rate": 0.00019999438305026108, "loss": 0.9671, "step": 1112 }, { "epoch": 0.11, "grad_norm": 0.31100005319009444, "learning_rate": 0.00019999421413047886, "loss": 1.1924, "step": 1113 }, { "epoch": 0.11, "grad_norm": 0.2736868129625665, "learning_rate": 0.00019999404270827856, "loss": 1.0565, "step": 1114 }, { "epoch": 0.11, "grad_norm": 0.3082559508155182, "learning_rate": 0.00019999386878366454, "loss": 1.1636, "step": 1115 }, { "epoch": 0.11, "grad_norm": 0.2709734888315765, "learning_rate": 0.0001999936923566411, "loss": 1.1289, "step": 1116 }, { "epoch": 0.11, "grad_norm": 0.32185710854614685, "learning_rate": 0.00019999351342721262, "loss": 1.1404, "step": 1117 }, { "epoch": 0.11, "grad_norm": 0.31162451372291133, "learning_rate": 0.0001999933319953837, "loss": 1.112, "step": 1118 }, { "epoch": 0.11, "grad_norm": 0.2752825720487004, "learning_rate": 0.00019999314806115872, "loss": 1.143, "step": 1119 }, { "epoch": 0.11, "grad_norm": 0.2917340741765025, "learning_rate": 0.0001999929616245424, "loss": 1.0736, "step": 1120 }, { "epoch": 0.11, "grad_norm": 0.302518080441679, "learning_rate": 0.0001999927726855394, "loss": 1.0372, "step": 1121 }, { "epoch": 0.11, "grad_norm": 0.25312327730893897, "learning_rate": 0.00019999258124415442, "loss": 1.1355, "step": 1122 }, { "epoch": 0.11, "grad_norm": 0.2656439197184839, "learning_rate": 0.00019999238730039222, "loss": 1.0496, "step": 1123 }, { "epoch": 0.11, "grad_norm": 0.24862847164472834, "learning_rate": 0.00019999219085425768, "loss": 1.0786, "step": 1124 }, { "epoch": 0.11, "grad_norm": 0.28410932219305585, "learning_rate": 0.00019999199190575575, "loss": 1.0904, "step": 1125 }, { "epoch": 0.11, "grad_norm": 0.2720824714159536, "learning_rate": 0.00019999179045489135, "loss": 1.0153, "step": 1126 }, { "epoch": 0.11, "grad_norm": 0.2759096778009793, "learning_rate": 0.00019999158650166958, "loss": 1.1001, "step": 1127 }, { "epoch": 0.11, "grad_norm": 0.6106436469666682, "learning_rate": 0.0001999913800460955, "loss": 1.1342, "step": 1128 }, { "epoch": 0.11, "grad_norm": 0.23702252854532238, "learning_rate": 0.00019999117108817428, "loss": 1.0916, "step": 1129 }, { "epoch": 0.11, "grad_norm": 0.25849383000967896, "learning_rate": 0.0001999909596279112, "loss": 1.1749, "step": 1130 }, { "epoch": 0.11, "grad_norm": 0.28015440839970107, "learning_rate": 0.0001999907456653115, "loss": 1.1269, "step": 1131 }, { "epoch": 0.11, "grad_norm": 0.26460543807236786, "learning_rate": 0.00019999052920038053, "loss": 1.1749, "step": 1132 }, { "epoch": 0.11, "grad_norm": 0.27199116543714963, "learning_rate": 0.0001999903102331237, "loss": 1.0885, "step": 1133 }, { "epoch": 0.11, "grad_norm": 0.22326728149419828, "learning_rate": 0.00019999008876354658, "loss": 1.106, "step": 1134 }, { "epoch": 0.11, "grad_norm": 0.2957916257263048, "learning_rate": 0.0001999898647916546, "loss": 1.0432, "step": 1135 }, { "epoch": 0.11, "grad_norm": 0.2926664019325837, "learning_rate": 0.00019998963831745344, "loss": 1.0905, "step": 1136 }, { "epoch": 0.11, "grad_norm": 0.2560158671609372, "learning_rate": 0.00019998940934094872, "loss": 1.0585, "step": 1137 }, { "epoch": 0.11, "grad_norm": 0.26739898061958195, "learning_rate": 0.00019998917786214618, "loss": 1.0375, "step": 1138 }, { "epoch": 0.11, "grad_norm": 0.28363806649148315, "learning_rate": 0.00019998894388105164, "loss": 1.1372, "step": 1139 }, { "epoch": 0.11, "grad_norm": 0.24811695289905492, "learning_rate": 0.00019998870739767094, "loss": 0.963, "step": 1140 }, { "epoch": 0.11, "grad_norm": 0.28924944955268556, "learning_rate": 0.00019998846841201, "loss": 1.084, "step": 1141 }, { "epoch": 0.11, "grad_norm": 0.2636078259266071, "learning_rate": 0.00019998822692407478, "loss": 1.0698, "step": 1142 }, { "epoch": 0.11, "grad_norm": 0.2960715052303091, "learning_rate": 0.0001999879829338714, "loss": 1.2418, "step": 1143 }, { "epoch": 0.11, "grad_norm": 0.23055523423338184, "learning_rate": 0.00019998773644140584, "loss": 1.169, "step": 1144 }, { "epoch": 0.11, "grad_norm": 0.3043989132237114, "learning_rate": 0.00019998748744668436, "loss": 1.1707, "step": 1145 }, { "epoch": 0.11, "grad_norm": 0.2652220574427321, "learning_rate": 0.00019998723594971316, "loss": 1.0908, "step": 1146 }, { "epoch": 0.11, "grad_norm": 0.3564678490888735, "learning_rate": 0.00019998698195049857, "loss": 1.2161, "step": 1147 }, { "epoch": 0.11, "grad_norm": 0.31147303496629464, "learning_rate": 0.0001999867254490469, "loss": 1.0795, "step": 1148 }, { "epoch": 0.11, "grad_norm": 0.33889241778400275, "learning_rate": 0.00019998646644536457, "loss": 1.2739, "step": 1149 }, { "epoch": 0.11, "grad_norm": 0.27545717862181845, "learning_rate": 0.00019998620493945807, "loss": 1.074, "step": 1150 }, { "epoch": 0.11, "grad_norm": 0.2576593733645889, "learning_rate": 0.00019998594093133395, "loss": 1.1171, "step": 1151 }, { "epoch": 0.11, "grad_norm": 0.25688278200109543, "learning_rate": 0.00019998567442099888, "loss": 1.081, "step": 1152 }, { "epoch": 0.11, "grad_norm": 0.3149168655482506, "learning_rate": 0.0001999854054084594, "loss": 1.1463, "step": 1153 }, { "epoch": 0.11, "grad_norm": 0.28337563224538714, "learning_rate": 0.00019998513389372233, "loss": 1.1503, "step": 1154 }, { "epoch": 0.11, "grad_norm": 0.28770905143410885, "learning_rate": 0.00019998485987679447, "loss": 1.0847, "step": 1155 }, { "epoch": 0.11, "grad_norm": 0.2606963858756736, "learning_rate": 0.00019998458335768264, "loss": 1.2108, "step": 1156 }, { "epoch": 0.11, "grad_norm": 0.2764798228490211, "learning_rate": 0.00019998430433639376, "loss": 1.1206, "step": 1157 }, { "epoch": 0.11, "grad_norm": 0.3028071620221027, "learning_rate": 0.00019998402281293484, "loss": 1.1628, "step": 1158 }, { "epoch": 0.11, "grad_norm": 0.23132033284887418, "learning_rate": 0.00019998373878731291, "loss": 1.0603, "step": 1159 }, { "epoch": 0.11, "grad_norm": 0.3197463940127305, "learning_rate": 0.0001999834522595351, "loss": 1.1337, "step": 1160 }, { "epoch": 0.11, "grad_norm": 0.258332321698546, "learning_rate": 0.00019998316322960853, "loss": 1.1347, "step": 1161 }, { "epoch": 0.11, "grad_norm": 0.37002001593224093, "learning_rate": 0.00019998287169754045, "loss": 1.0973, "step": 1162 }, { "epoch": 0.11, "grad_norm": 0.35455352761567094, "learning_rate": 0.00019998257766333822, "loss": 1.0645, "step": 1163 }, { "epoch": 0.11, "grad_norm": 0.25846010518779355, "learning_rate": 0.00019998228112700912, "loss": 1.099, "step": 1164 }, { "epoch": 0.11, "grad_norm": 0.45574094165617823, "learning_rate": 0.00019998198208856058, "loss": 1.2218, "step": 1165 }, { "epoch": 0.11, "grad_norm": 0.2806569349689396, "learning_rate": 0.0001999816805480001, "loss": 1.163, "step": 1166 }, { "epoch": 0.11, "grad_norm": 0.3230556556910955, "learning_rate": 0.00019998137650533527, "loss": 1.0275, "step": 1167 }, { "epoch": 0.11, "grad_norm": 0.295834882980768, "learning_rate": 0.0001999810699605736, "loss": 1.0928, "step": 1168 }, { "epoch": 0.11, "grad_norm": 0.2838870309959414, "learning_rate": 0.0001999807609137229, "loss": 1.1008, "step": 1169 }, { "epoch": 0.11, "grad_norm": 0.3164419453755688, "learning_rate": 0.00019998044936479076, "loss": 1.1307, "step": 1170 }, { "epoch": 0.11, "grad_norm": 0.22581223994903243, "learning_rate": 0.00019998013531378504, "loss": 1.1228, "step": 1171 }, { "epoch": 0.11, "grad_norm": 0.2611545463660394, "learning_rate": 0.00019997981876071364, "loss": 1.1299, "step": 1172 }, { "epoch": 0.11, "grad_norm": 0.2531091040846973, "learning_rate": 0.00019997949970558437, "loss": 1.2127, "step": 1173 }, { "epoch": 0.11, "grad_norm": 0.3026109003824534, "learning_rate": 0.00019997917814840537, "loss": 1.1661, "step": 1174 }, { "epoch": 0.11, "grad_norm": 0.3246636144590807, "learning_rate": 0.00019997885408918454, "loss": 1.0933, "step": 1175 }, { "epoch": 0.11, "grad_norm": 0.2574562133107501, "learning_rate": 0.0001999785275279301, "loss": 1.1417, "step": 1176 }, { "epoch": 0.11, "grad_norm": 0.2774322857021015, "learning_rate": 0.00019997819846465014, "loss": 1.2012, "step": 1177 }, { "epoch": 0.11, "grad_norm": 0.2616318983640859, "learning_rate": 0.00019997786689935292, "loss": 1.064, "step": 1178 }, { "epoch": 0.11, "grad_norm": 0.28254029261985597, "learning_rate": 0.00019997753283204677, "loss": 1.0777, "step": 1179 }, { "epoch": 0.11, "grad_norm": 0.27452271119130867, "learning_rate": 0.00019997719626274, "loss": 1.2698, "step": 1180 }, { "epoch": 0.11, "grad_norm": 0.28281251810785174, "learning_rate": 0.0001999768571914411, "loss": 1.0866, "step": 1181 }, { "epoch": 0.11, "grad_norm": 0.28877654959328175, "learning_rate": 0.00019997651561815848, "loss": 1.0607, "step": 1182 }, { "epoch": 0.11, "grad_norm": 0.31037965805002504, "learning_rate": 0.00019997617154290077, "loss": 1.1267, "step": 1183 }, { "epoch": 0.11, "grad_norm": 0.28875914132128616, "learning_rate": 0.0001999758249656765, "loss": 1.159, "step": 1184 }, { "epoch": 0.11, "grad_norm": 0.26263865798211755, "learning_rate": 0.00019997547588649438, "loss": 1.1947, "step": 1185 }, { "epoch": 0.11, "grad_norm": 0.25456732695723555, "learning_rate": 0.00019997512430536314, "loss": 1.1032, "step": 1186 }, { "epoch": 0.11, "grad_norm": 0.32332294350656676, "learning_rate": 0.00019997477022229158, "loss": 1.1283, "step": 1187 }, { "epoch": 0.11, "grad_norm": 0.3066913219600098, "learning_rate": 0.00019997441363728857, "loss": 1.2178, "step": 1188 }, { "epoch": 0.11, "grad_norm": 0.2982903122596879, "learning_rate": 0.00019997405455036304, "loss": 1.1613, "step": 1189 }, { "epoch": 0.11, "grad_norm": 0.3072555573162715, "learning_rate": 0.00019997369296152396, "loss": 1.1927, "step": 1190 }, { "epoch": 0.11, "grad_norm": 0.27576655968710867, "learning_rate": 0.00019997332887078034, "loss": 1.148, "step": 1191 }, { "epoch": 0.11, "grad_norm": 0.2651145511693654, "learning_rate": 0.0001999729622781414, "loss": 1.061, "step": 1192 }, { "epoch": 0.11, "grad_norm": 0.26492079668590324, "learning_rate": 0.00019997259318361622, "loss": 1.0942, "step": 1193 }, { "epoch": 0.11, "grad_norm": 0.268256812103858, "learning_rate": 0.00019997222158721405, "loss": 1.0478, "step": 1194 }, { "epoch": 0.11, "grad_norm": 0.29544097416774406, "learning_rate": 0.00019997184748894422, "loss": 1.084, "step": 1195 }, { "epoch": 0.11, "grad_norm": 0.24315111594316274, "learning_rate": 0.00019997147088881607, "loss": 1.1187, "step": 1196 }, { "epoch": 0.11, "grad_norm": 0.2887864253869539, "learning_rate": 0.00019997109178683905, "loss": 1.1425, "step": 1197 }, { "epoch": 0.11, "grad_norm": 0.243613837120699, "learning_rate": 0.0001999707101830226, "loss": 1.2192, "step": 1198 }, { "epoch": 0.11, "grad_norm": 0.2670339437152679, "learning_rate": 0.00019997032607737633, "loss": 0.9346, "step": 1199 }, { "epoch": 0.11, "grad_norm": 0.286415306705152, "learning_rate": 0.0001999699394699098, "loss": 1.2044, "step": 1200 }, { "epoch": 0.11, "grad_norm": 0.2649888516882499, "learning_rate": 0.0001999695503606327, "loss": 1.1028, "step": 1201 }, { "epoch": 0.11, "grad_norm": 0.2784005327190465, "learning_rate": 0.00019996915874955477, "loss": 1.1883, "step": 1202 }, { "epoch": 0.12, "grad_norm": 0.2827618352465213, "learning_rate": 0.00019996876463668586, "loss": 1.1373, "step": 1203 }, { "epoch": 0.12, "grad_norm": 0.27252281665016315, "learning_rate": 0.00019996836802203575, "loss": 1.1434, "step": 1204 }, { "epoch": 0.12, "grad_norm": 0.2829042974144935, "learning_rate": 0.00019996796890561438, "loss": 1.1242, "step": 1205 }, { "epoch": 0.12, "grad_norm": 0.25919288560265524, "learning_rate": 0.0001999675672874318, "loss": 1.0836, "step": 1206 }, { "epoch": 0.12, "grad_norm": 0.2462264710662166, "learning_rate": 0.00019996716316749802, "loss": 1.0824, "step": 1207 }, { "epoch": 0.12, "grad_norm": 0.24248848464047051, "learning_rate": 0.00019996675654582313, "loss": 1.1398, "step": 1208 }, { "epoch": 0.12, "grad_norm": 0.2542004323802939, "learning_rate": 0.00019996634742241732, "loss": 1.0721, "step": 1209 }, { "epoch": 0.12, "grad_norm": 0.2665303881003603, "learning_rate": 0.0001999659357972909, "loss": 1.1183, "step": 1210 }, { "epoch": 0.12, "grad_norm": 0.2776270813403137, "learning_rate": 0.00019996552167045407, "loss": 1.2601, "step": 1211 }, { "epoch": 0.12, "grad_norm": 0.3169789236787061, "learning_rate": 0.00019996510504191722, "loss": 1.2331, "step": 1212 }, { "epoch": 0.12, "grad_norm": 0.247880062307769, "learning_rate": 0.00019996468591169082, "loss": 1.1088, "step": 1213 }, { "epoch": 0.12, "grad_norm": 0.2743889662351891, "learning_rate": 0.00019996426427978532, "loss": 1.0541, "step": 1214 }, { "epoch": 0.12, "grad_norm": 0.2911803551557875, "learning_rate": 0.00019996384014621128, "loss": 1.0826, "step": 1215 }, { "epoch": 0.12, "grad_norm": 0.26095098186965116, "learning_rate": 0.0001999634135109793, "loss": 1.3256, "step": 1216 }, { "epoch": 0.12, "grad_norm": 0.2856385392323691, "learning_rate": 0.0001999629843741001, "loss": 1.1093, "step": 1217 }, { "epoch": 0.12, "grad_norm": 0.2530173370522601, "learning_rate": 0.00019996255273558436, "loss": 1.0579, "step": 1218 }, { "epoch": 0.12, "grad_norm": 0.2549728915097247, "learning_rate": 0.00019996211859544296, "loss": 0.9691, "step": 1219 }, { "epoch": 0.12, "grad_norm": 0.2799442188519433, "learning_rate": 0.00019996168195368668, "loss": 1.0273, "step": 1220 }, { "epoch": 0.12, "grad_norm": 0.30580035630418173, "learning_rate": 0.0001999612428103265, "loss": 1.2193, "step": 1221 }, { "epoch": 0.12, "grad_norm": 0.2773508640042361, "learning_rate": 0.00019996080116537339, "loss": 1.059, "step": 1222 }, { "epoch": 0.12, "grad_norm": 0.2917592835447524, "learning_rate": 0.0001999603570188384, "loss": 1.1454, "step": 1223 }, { "epoch": 0.12, "grad_norm": 0.27511773778172, "learning_rate": 0.00019995991037073267, "loss": 1.0708, "step": 1224 }, { "epoch": 0.12, "grad_norm": 0.2299812326773386, "learning_rate": 0.00019995946122106735, "loss": 0.9796, "step": 1225 }, { "epoch": 0.12, "grad_norm": 0.28024776222182823, "learning_rate": 0.00019995900956985369, "loss": 1.1255, "step": 1226 }, { "epoch": 0.12, "grad_norm": 0.26814475354624795, "learning_rate": 0.000199958555417103, "loss": 1.0957, "step": 1227 }, { "epoch": 0.12, "grad_norm": 0.2872677212088015, "learning_rate": 0.00019995809876282664, "loss": 1.035, "step": 1228 }, { "epoch": 0.12, "grad_norm": 0.2521840908110662, "learning_rate": 0.00019995763960703605, "loss": 1.0637, "step": 1229 }, { "epoch": 0.12, "grad_norm": 0.40673410489243833, "learning_rate": 0.00019995717794974268, "loss": 1.0153, "step": 1230 }, { "epoch": 0.12, "grad_norm": 0.31023442713481986, "learning_rate": 0.0001999567137909581, "loss": 1.1233, "step": 1231 }, { "epoch": 0.12, "grad_norm": 0.2712959430519531, "learning_rate": 0.000199956247130694, "loss": 1.1379, "step": 1232 }, { "epoch": 0.12, "grad_norm": 0.28625127223535557, "learning_rate": 0.0001999557779689619, "loss": 1.2184, "step": 1233 }, { "epoch": 0.12, "grad_norm": 0.30076306094072325, "learning_rate": 0.0001999553063057737, "loss": 1.1837, "step": 1234 }, { "epoch": 0.12, "grad_norm": 0.2653252660175731, "learning_rate": 0.00019995483214114114, "loss": 1.0155, "step": 1235 }, { "epoch": 0.12, "grad_norm": 0.2752439328777632, "learning_rate": 0.0001999543554750761, "loss": 1.1711, "step": 1236 }, { "epoch": 0.12, "grad_norm": 0.27211117879122465, "learning_rate": 0.00019995387630759046, "loss": 1.1123, "step": 1237 }, { "epoch": 0.12, "grad_norm": 0.26487361645343366, "learning_rate": 0.00019995339463869626, "loss": 1.074, "step": 1238 }, { "epoch": 0.12, "grad_norm": 0.3021321790418319, "learning_rate": 0.00019995291046840554, "loss": 1.119, "step": 1239 }, { "epoch": 0.12, "grad_norm": 0.2361755130550678, "learning_rate": 0.00019995242379673041, "loss": 1.1614, "step": 1240 }, { "epoch": 0.12, "grad_norm": 0.254336538394881, "learning_rate": 0.00019995193462368308, "loss": 1.0516, "step": 1241 }, { "epoch": 0.12, "grad_norm": 0.26752053595877906, "learning_rate": 0.00019995144294927575, "loss": 1.0767, "step": 1242 }, { "epoch": 0.12, "grad_norm": 0.2597082485713151, "learning_rate": 0.00019995094877352075, "loss": 1.0225, "step": 1243 }, { "epoch": 0.12, "grad_norm": 0.2455539656845191, "learning_rate": 0.00019995045209643042, "loss": 1.1359, "step": 1244 }, { "epoch": 0.12, "grad_norm": 0.27400725474036985, "learning_rate": 0.00019994995291801725, "loss": 1.1361, "step": 1245 }, { "epoch": 0.12, "grad_norm": 0.2924655478762353, "learning_rate": 0.00019994945123829366, "loss": 1.1821, "step": 1246 }, { "epoch": 0.12, "grad_norm": 0.29710840841514063, "learning_rate": 0.00019994894705727224, "loss": 1.2383, "step": 1247 }, { "epoch": 0.12, "grad_norm": 0.2813768369686393, "learning_rate": 0.0001999484403749656, "loss": 1.0407, "step": 1248 }, { "epoch": 0.12, "grad_norm": 0.28144059564025686, "learning_rate": 0.00019994793119138644, "loss": 1.2073, "step": 1249 }, { "epoch": 0.12, "grad_norm": 0.24328115129403632, "learning_rate": 0.00019994741950654746, "loss": 1.1233, "step": 1250 }, { "epoch": 0.12, "grad_norm": 0.2779013414605166, "learning_rate": 0.00019994690532046155, "loss": 1.1908, "step": 1251 }, { "epoch": 0.12, "grad_norm": 0.2939081176965555, "learning_rate": 0.00019994638863314146, "loss": 1.0965, "step": 1252 }, { "epoch": 0.12, "grad_norm": 0.267694708686987, "learning_rate": 0.0001999458694446002, "loss": 1.0733, "step": 1253 }, { "epoch": 0.12, "grad_norm": 0.31511116622951807, "learning_rate": 0.00019994534775485075, "loss": 1.0266, "step": 1254 }, { "epoch": 0.12, "grad_norm": 0.2718983181420745, "learning_rate": 0.00019994482356390617, "loss": 1.1895, "step": 1255 }, { "epoch": 0.12, "grad_norm": 0.2558317058922546, "learning_rate": 0.00019994429687177957, "loss": 1.1239, "step": 1256 }, { "epoch": 0.12, "grad_norm": 0.2744763799941594, "learning_rate": 0.00019994376767848407, "loss": 1.2122, "step": 1257 }, { "epoch": 0.12, "grad_norm": 0.2643964783968129, "learning_rate": 0.00019994323598403302, "loss": 1.0028, "step": 1258 }, { "epoch": 0.12, "grad_norm": 0.25268007251056396, "learning_rate": 0.0001999427017884397, "loss": 1.1376, "step": 1259 }, { "epoch": 0.12, "grad_norm": 0.230312980862343, "learning_rate": 0.0001999421650917174, "loss": 1.0732, "step": 1260 }, { "epoch": 0.12, "grad_norm": 0.2740089140496981, "learning_rate": 0.00019994162589387964, "loss": 0.9982, "step": 1261 }, { "epoch": 0.12, "grad_norm": 0.2726058611551938, "learning_rate": 0.0001999410841949399, "loss": 1.039, "step": 1262 }, { "epoch": 0.12, "grad_norm": 0.3233351751878856, "learning_rate": 0.00019994053999491167, "loss": 1.2084, "step": 1263 }, { "epoch": 0.12, "grad_norm": 0.24718142863026807, "learning_rate": 0.00019993999329380864, "loss": 0.9947, "step": 1264 }, { "epoch": 0.12, "grad_norm": 0.2877635724046095, "learning_rate": 0.00019993944409164448, "loss": 1.1189, "step": 1265 }, { "epoch": 0.12, "grad_norm": 0.3002448997033507, "learning_rate": 0.00019993889238843288, "loss": 1.0936, "step": 1266 }, { "epoch": 0.12, "grad_norm": 0.24948366812390463, "learning_rate": 0.00019993833818418772, "loss": 1.1574, "step": 1267 }, { "epoch": 0.12, "grad_norm": 0.24037766838141317, "learning_rate": 0.00019993778147892285, "loss": 1.1475, "step": 1268 }, { "epoch": 0.12, "grad_norm": 0.24578207537112048, "learning_rate": 0.00019993722227265218, "loss": 1.1365, "step": 1269 }, { "epoch": 0.12, "grad_norm": 0.24088318104194462, "learning_rate": 0.00019993666056538972, "loss": 1.0947, "step": 1270 }, { "epoch": 0.12, "grad_norm": 0.2921571119742658, "learning_rate": 0.0001999360963571495, "loss": 1.0772, "step": 1271 }, { "epoch": 0.12, "grad_norm": 0.30049773628170273, "learning_rate": 0.00019993552964794566, "loss": 1.2072, "step": 1272 }, { "epoch": 0.12, "grad_norm": 0.3160778257013834, "learning_rate": 0.0001999349604377924, "loss": 1.0676, "step": 1273 }, { "epoch": 0.12, "grad_norm": 0.23884600224412095, "learning_rate": 0.00019993438872670396, "loss": 1.0855, "step": 1274 }, { "epoch": 0.12, "grad_norm": 0.2615500721708398, "learning_rate": 0.0001999338145146946, "loss": 1.1958, "step": 1275 }, { "epoch": 0.12, "grad_norm": 0.2591847182045251, "learning_rate": 0.00019993323780177874, "loss": 1.0991, "step": 1276 }, { "epoch": 0.12, "grad_norm": 0.2800525884700228, "learning_rate": 0.00019993265858797083, "loss": 1.0018, "step": 1277 }, { "epoch": 0.12, "grad_norm": 0.25703108671920066, "learning_rate": 0.0001999320768732853, "loss": 1.0842, "step": 1278 }, { "epoch": 0.12, "grad_norm": 0.2814109826464174, "learning_rate": 0.00019993149265773674, "loss": 1.1056, "step": 1279 }, { "epoch": 0.12, "grad_norm": 0.26560101203311826, "learning_rate": 0.0001999309059413398, "loss": 1.1028, "step": 1280 }, { "epoch": 0.12, "grad_norm": 0.2592301570333206, "learning_rate": 0.00019993031672410912, "loss": 1.2395, "step": 1281 }, { "epoch": 0.12, "grad_norm": 0.2903887529235589, "learning_rate": 0.00019992972500605945, "loss": 1.2269, "step": 1282 }, { "epoch": 0.12, "grad_norm": 0.30985749070799845, "learning_rate": 0.00019992913078720559, "loss": 1.0394, "step": 1283 }, { "epoch": 0.12, "grad_norm": 0.2427582461596586, "learning_rate": 0.00019992853406756246, "loss": 1.0323, "step": 1284 }, { "epoch": 0.12, "grad_norm": 0.2674764639506977, "learning_rate": 0.00019992793484714495, "loss": 1.0569, "step": 1285 }, { "epoch": 0.12, "grad_norm": 0.27159902681019893, "learning_rate": 0.00019992733312596808, "loss": 1.0051, "step": 1286 }, { "epoch": 0.12, "grad_norm": 0.27222923746834743, "learning_rate": 0.00019992672890404689, "loss": 1.1311, "step": 1287 }, { "epoch": 0.12, "grad_norm": 0.2557430982813261, "learning_rate": 0.0001999261221813965, "loss": 1.121, "step": 1288 }, { "epoch": 0.12, "grad_norm": 0.2927850888283984, "learning_rate": 0.0001999255129580321, "loss": 1.1497, "step": 1289 }, { "epoch": 0.12, "grad_norm": 0.2859310412286254, "learning_rate": 0.00019992490123396897, "loss": 1.1786, "step": 1290 }, { "epoch": 0.12, "grad_norm": 0.25665923491793874, "learning_rate": 0.00019992428700922236, "loss": 1.0947, "step": 1291 }, { "epoch": 0.12, "grad_norm": 0.24916615055802435, "learning_rate": 0.00019992367028380764, "loss": 1.1687, "step": 1292 }, { "epoch": 0.12, "grad_norm": 0.2731992285743491, "learning_rate": 0.00019992305105774033, "loss": 1.1253, "step": 1293 }, { "epoch": 0.12, "grad_norm": 0.26082758101857634, "learning_rate": 0.0001999224293310358, "loss": 1.1551, "step": 1294 }, { "epoch": 0.12, "grad_norm": 0.2677306810392585, "learning_rate": 0.00019992180510370976, "loss": 1.2005, "step": 1295 }, { "epoch": 0.12, "grad_norm": 0.23143531134792353, "learning_rate": 0.00019992117837577768, "loss": 1.0862, "step": 1296 }, { "epoch": 0.12, "grad_norm": 0.2655840524523936, "learning_rate": 0.00019992054914725533, "loss": 1.1536, "step": 1297 }, { "epoch": 0.12, "grad_norm": 0.2640787870858716, "learning_rate": 0.00019991991741815849, "loss": 1.1011, "step": 1298 }, { "epoch": 0.12, "grad_norm": 0.26001791207694314, "learning_rate": 0.00019991928318850285, "loss": 1.191, "step": 1299 }, { "epoch": 0.12, "grad_norm": 0.34274104078637135, "learning_rate": 0.0001999186464583044, "loss": 1.1189, "step": 1300 }, { "epoch": 0.12, "grad_norm": 0.25164999546725786, "learning_rate": 0.000199918007227579, "loss": 1.1755, "step": 1301 }, { "epoch": 0.12, "grad_norm": 0.2619736635468605, "learning_rate": 0.00019991736549634267, "loss": 1.0093, "step": 1302 }, { "epoch": 0.12, "grad_norm": 0.2610328681252208, "learning_rate": 0.00019991672126461147, "loss": 1.2091, "step": 1303 }, { "epoch": 0.12, "grad_norm": 0.27609879841972346, "learning_rate": 0.00019991607453240153, "loss": 1.1545, "step": 1304 }, { "epoch": 0.12, "grad_norm": 0.2438919416348471, "learning_rate": 0.00019991542529972905, "loss": 1.0793, "step": 1305 }, { "epoch": 0.12, "grad_norm": 0.34769105624706653, "learning_rate": 0.00019991477356661022, "loss": 1.0689, "step": 1306 }, { "epoch": 0.13, "grad_norm": 0.24988099416617487, "learning_rate": 0.0001999141193330614, "loss": 0.9971, "step": 1307 }, { "epoch": 0.13, "grad_norm": 0.26587548843318687, "learning_rate": 0.00019991346259909897, "loss": 1.0847, "step": 1308 }, { "epoch": 0.13, "grad_norm": 0.27946757540245054, "learning_rate": 0.00019991280336473935, "loss": 1.137, "step": 1309 }, { "epoch": 0.13, "grad_norm": 0.2928663604215012, "learning_rate": 0.000199912141629999, "loss": 1.157, "step": 1310 }, { "epoch": 0.13, "grad_norm": 0.24871198294790245, "learning_rate": 0.00019991147739489455, "loss": 1.0734, "step": 1311 }, { "epoch": 0.13, "grad_norm": 0.2384639920280004, "learning_rate": 0.00019991081065944254, "loss": 1.0737, "step": 1312 }, { "epoch": 0.13, "grad_norm": 0.2656285356842293, "learning_rate": 0.00019991014142365976, "loss": 1.0513, "step": 1313 }, { "epoch": 0.13, "grad_norm": 0.28562521186701684, "learning_rate": 0.00019990946968756286, "loss": 1.1639, "step": 1314 }, { "epoch": 0.13, "grad_norm": 0.2648382753477716, "learning_rate": 0.0001999087954511687, "loss": 1.1691, "step": 1315 }, { "epoch": 0.13, "grad_norm": 0.3183214028731789, "learning_rate": 0.00019990811871449412, "loss": 1.1523, "step": 1316 }, { "epoch": 0.13, "grad_norm": 0.27420941387779174, "learning_rate": 0.0001999074394775561, "loss": 1.0882, "step": 1317 }, { "epoch": 0.13, "grad_norm": 0.24879969547328998, "learning_rate": 0.00019990675774037164, "loss": 1.0498, "step": 1318 }, { "epoch": 0.13, "grad_norm": 0.2738940434550004, "learning_rate": 0.00019990607350295776, "loss": 1.145, "step": 1319 }, { "epoch": 0.13, "grad_norm": 0.25098756631874913, "learning_rate": 0.0001999053867653316, "loss": 1.0748, "step": 1320 }, { "epoch": 0.13, "grad_norm": 0.285510083056974, "learning_rate": 0.00019990469752751032, "loss": 1.0964, "step": 1321 }, { "epoch": 0.13, "grad_norm": 0.2528776643597391, "learning_rate": 0.00019990400578951125, "loss": 1.1219, "step": 1322 }, { "epoch": 0.13, "grad_norm": 0.2737172954753307, "learning_rate": 0.0001999033115513516, "loss": 1.144, "step": 1323 }, { "epoch": 0.13, "grad_norm": 0.2788543402132724, "learning_rate": 0.00019990261481304882, "loss": 1.2348, "step": 1324 }, { "epoch": 0.13, "grad_norm": 0.26612476420962583, "learning_rate": 0.00019990191557462032, "loss": 1.1158, "step": 1325 }, { "epoch": 0.13, "grad_norm": 0.27339420148068405, "learning_rate": 0.00019990121383608357, "loss": 1.193, "step": 1326 }, { "epoch": 0.13, "grad_norm": 0.2660334700185283, "learning_rate": 0.0001999005095974562, "loss": 1.0692, "step": 1327 }, { "epoch": 0.13, "grad_norm": 0.2779459146879295, "learning_rate": 0.00019989980285875576, "loss": 1.1296, "step": 1328 }, { "epoch": 0.13, "grad_norm": 0.30588845858874797, "learning_rate": 0.00019989909361999998, "loss": 1.102, "step": 1329 }, { "epoch": 0.13, "grad_norm": 0.27026409826467807, "learning_rate": 0.0001998983818812066, "loss": 1.0786, "step": 1330 }, { "epoch": 0.13, "grad_norm": 0.23833256583341253, "learning_rate": 0.00019989766764239342, "loss": 1.1167, "step": 1331 }, { "epoch": 0.13, "grad_norm": 0.21010010951243135, "learning_rate": 0.00019989695090357832, "loss": 0.9995, "step": 1332 }, { "epoch": 0.13, "grad_norm": 0.28853987325226016, "learning_rate": 0.00019989623166477926, "loss": 0.9722, "step": 1333 }, { "epoch": 0.13, "grad_norm": 0.27581828527816954, "learning_rate": 0.0001998955099260142, "loss": 1.1367, "step": 1334 }, { "epoch": 0.13, "grad_norm": 0.3266981204077468, "learning_rate": 0.00019989478568730124, "loss": 1.0954, "step": 1335 }, { "epoch": 0.13, "grad_norm": 0.26634585407279676, "learning_rate": 0.00019989405894865848, "loss": 1.0318, "step": 1336 }, { "epoch": 0.13, "grad_norm": 0.2862324766372512, "learning_rate": 0.0001998933297101041, "loss": 1.2006, "step": 1337 }, { "epoch": 0.13, "grad_norm": 0.3219899860952693, "learning_rate": 0.0001998925979716564, "loss": 1.2428, "step": 1338 }, { "epoch": 0.13, "grad_norm": 0.2593333848699128, "learning_rate": 0.0001998918637333336, "loss": 1.0661, "step": 1339 }, { "epoch": 0.13, "grad_norm": 0.25460421291647545, "learning_rate": 0.00019989112699515417, "loss": 1.1549, "step": 1340 }, { "epoch": 0.13, "grad_norm": 0.2806093932847469, "learning_rate": 0.0001998903877571365, "loss": 1.1396, "step": 1341 }, { "epoch": 0.13, "grad_norm": 0.28375827740366566, "learning_rate": 0.00019988964601929911, "loss": 1.1933, "step": 1342 }, { "epoch": 0.13, "grad_norm": 0.25635075031051086, "learning_rate": 0.00019988890178166053, "loss": 1.0908, "step": 1343 }, { "epoch": 0.13, "grad_norm": 0.2888250115582476, "learning_rate": 0.00019988815504423942, "loss": 1.1556, "step": 1344 }, { "epoch": 0.13, "grad_norm": 0.23088975772081866, "learning_rate": 0.00019988740580705443, "loss": 1.1304, "step": 1345 }, { "epoch": 0.13, "grad_norm": 0.27596330498232263, "learning_rate": 0.00019988665407012435, "loss": 1.11, "step": 1346 }, { "epoch": 0.13, "grad_norm": 0.24512542720942831, "learning_rate": 0.00019988589983346798, "loss": 1.1509, "step": 1347 }, { "epoch": 0.13, "grad_norm": 0.25605979489959907, "learning_rate": 0.00019988514309710417, "loss": 1.0923, "step": 1348 }, { "epoch": 0.13, "grad_norm": 0.2532094527125873, "learning_rate": 0.0001998843838610519, "loss": 1.0768, "step": 1349 }, { "epoch": 0.13, "grad_norm": 0.2951636432930729, "learning_rate": 0.00019988362212533013, "loss": 1.1431, "step": 1350 }, { "epoch": 0.13, "grad_norm": 0.25484076775146086, "learning_rate": 0.000199882857889958, "loss": 1.1345, "step": 1351 }, { "epoch": 0.13, "grad_norm": 0.23726743985931845, "learning_rate": 0.0001998820911549545, "loss": 1.106, "step": 1352 }, { "epoch": 0.13, "grad_norm": 0.2587569140280319, "learning_rate": 0.00019988132192033892, "loss": 1.1131, "step": 1353 }, { "epoch": 0.13, "grad_norm": 0.26259253430908974, "learning_rate": 0.0001998805501861305, "loss": 1.1066, "step": 1354 }, { "epoch": 0.13, "grad_norm": 0.24656763039460808, "learning_rate": 0.00019987977595234852, "loss": 1.1207, "step": 1355 }, { "epoch": 0.13, "grad_norm": 0.27728609474508775, "learning_rate": 0.0001998789992190124, "loss": 1.0683, "step": 1356 }, { "epoch": 0.13, "grad_norm": 0.266917547376331, "learning_rate": 0.00019987821998614154, "loss": 1.1693, "step": 1357 }, { "epoch": 0.13, "grad_norm": 0.2915661042761893, "learning_rate": 0.00019987743825375544, "loss": 1.1064, "step": 1358 }, { "epoch": 0.13, "grad_norm": 0.24772664636633338, "learning_rate": 0.00019987665402187367, "loss": 0.9948, "step": 1359 }, { "epoch": 0.13, "grad_norm": 0.2855087767458927, "learning_rate": 0.0001998758672905159, "loss": 1.1449, "step": 1360 }, { "epoch": 0.13, "grad_norm": 0.2772392094730354, "learning_rate": 0.00019987507805970176, "loss": 1.1349, "step": 1361 }, { "epoch": 0.13, "grad_norm": 0.2612704349828971, "learning_rate": 0.000199874286329451, "loss": 1.0995, "step": 1362 }, { "epoch": 0.13, "grad_norm": 0.24900155104384822, "learning_rate": 0.00019987349209978352, "loss": 1.1157, "step": 1363 }, { "epoch": 0.13, "grad_norm": 0.26136521449937644, "learning_rate": 0.0001998726953707191, "loss": 1.1324, "step": 1364 }, { "epoch": 0.13, "grad_norm": 0.27010231238155247, "learning_rate": 0.0001998718961422777, "loss": 1.1295, "step": 1365 }, { "epoch": 0.13, "grad_norm": 0.28056706441584167, "learning_rate": 0.00019987109441447934, "loss": 1.1236, "step": 1366 }, { "epoch": 0.13, "grad_norm": 0.24673474376997923, "learning_rate": 0.00019987029018734407, "loss": 1.1493, "step": 1367 }, { "epoch": 0.13, "grad_norm": 0.25553237785153865, "learning_rate": 0.00019986948346089201, "loss": 1.1698, "step": 1368 }, { "epoch": 0.13, "grad_norm": 0.2370749606006542, "learning_rate": 0.0001998686742351434, "loss": 1.03, "step": 1369 }, { "epoch": 0.13, "grad_norm": 0.2786623699017042, "learning_rate": 0.00019986786251011842, "loss": 1.0002, "step": 1370 }, { "epoch": 0.13, "grad_norm": 0.2582710457594854, "learning_rate": 0.0001998670482858374, "loss": 1.1957, "step": 1371 }, { "epoch": 0.13, "grad_norm": 0.2414613658655144, "learning_rate": 0.00019986623156232076, "loss": 1.1471, "step": 1372 }, { "epoch": 0.13, "grad_norm": 0.28959428619565936, "learning_rate": 0.0001998654123395889, "loss": 1.1716, "step": 1373 }, { "epoch": 0.13, "grad_norm": 0.22250465518264687, "learning_rate": 0.00019986459061766234, "loss": 1.146, "step": 1374 }, { "epoch": 0.13, "grad_norm": 0.26717391829997156, "learning_rate": 0.00019986376639656163, "loss": 1.1858, "step": 1375 }, { "epoch": 0.13, "grad_norm": 0.2580258297035148, "learning_rate": 0.00019986293967630742, "loss": 1.1115, "step": 1376 }, { "epoch": 0.13, "grad_norm": 0.2678948286097016, "learning_rate": 0.0001998621104569204, "loss": 1.1202, "step": 1377 }, { "epoch": 0.13, "grad_norm": 0.291208905746566, "learning_rate": 0.00019986127873842128, "loss": 1.1424, "step": 1378 }, { "epoch": 0.13, "grad_norm": 0.21902207523941578, "learning_rate": 0.00019986044452083087, "loss": 1.1687, "step": 1379 }, { "epoch": 0.13, "grad_norm": 0.2907782173595465, "learning_rate": 0.00019985960780417012, "loss": 1.068, "step": 1380 }, { "epoch": 0.13, "grad_norm": 0.2629226322026941, "learning_rate": 0.0001998587685884599, "loss": 1.0816, "step": 1381 }, { "epoch": 0.13, "grad_norm": 0.2828450761974692, "learning_rate": 0.00019985792687372126, "loss": 1.0958, "step": 1382 }, { "epoch": 0.13, "grad_norm": 0.26457153514550896, "learning_rate": 0.00019985708265997523, "loss": 1.1454, "step": 1383 }, { "epoch": 0.13, "grad_norm": 0.25857136554982324, "learning_rate": 0.00019985623594724294, "loss": 1.074, "step": 1384 }, { "epoch": 0.13, "grad_norm": 0.24297032009355968, "learning_rate": 0.00019985538673554558, "loss": 1.2311, "step": 1385 }, { "epoch": 0.13, "grad_norm": 0.29342141362537133, "learning_rate": 0.00019985453502490447, "loss": 1.0765, "step": 1386 }, { "epoch": 0.13, "grad_norm": 0.27688151236884495, "learning_rate": 0.0001998536808153408, "loss": 1.1203, "step": 1387 }, { "epoch": 0.13, "grad_norm": 0.288243343301449, "learning_rate": 0.000199852824106876, "loss": 1.0883, "step": 1388 }, { "epoch": 0.13, "grad_norm": 0.252259697983258, "learning_rate": 0.00019985196489953158, "loss": 1.2147, "step": 1389 }, { "epoch": 0.13, "grad_norm": 0.26515095142272627, "learning_rate": 0.00019985110319332896, "loss": 1.0793, "step": 1390 }, { "epoch": 0.13, "grad_norm": 0.28200900869959167, "learning_rate": 0.00019985023898828972, "loss": 1.0851, "step": 1391 }, { "epoch": 0.13, "grad_norm": 0.22101241462701582, "learning_rate": 0.0001998493722844355, "loss": 1.0172, "step": 1392 }, { "epoch": 0.13, "grad_norm": 0.29717507654200653, "learning_rate": 0.000199848503081788, "loss": 1.1634, "step": 1393 }, { "epoch": 0.13, "grad_norm": 0.2734078285310837, "learning_rate": 0.00019984763138036893, "loss": 1.206, "step": 1394 }, { "epoch": 0.13, "grad_norm": 0.31430473861156777, "learning_rate": 0.00019984675718020016, "loss": 1.1355, "step": 1395 }, { "epoch": 0.13, "grad_norm": 0.25927920918090913, "learning_rate": 0.00019984588048130352, "loss": 1.0166, "step": 1396 }, { "epoch": 0.13, "grad_norm": 0.2943475145072853, "learning_rate": 0.000199845001283701, "loss": 1.0848, "step": 1397 }, { "epoch": 0.13, "grad_norm": 0.26595828195937876, "learning_rate": 0.0001998441195874145, "loss": 1.0692, "step": 1398 }, { "epoch": 0.13, "grad_norm": 0.27821823753565383, "learning_rate": 0.00019984323539246624, "loss": 1.2192, "step": 1399 }, { "epoch": 0.13, "grad_norm": 0.2678637821368645, "learning_rate": 0.00019984234869887825, "loss": 1.121, "step": 1400 }, { "epoch": 0.13, "grad_norm": 0.25399068189204, "learning_rate": 0.0001998414595066727, "loss": 1.0925, "step": 1401 }, { "epoch": 0.13, "grad_norm": 0.23859094414183193, "learning_rate": 0.00019984056781587191, "loss": 1.0955, "step": 1402 }, { "epoch": 0.13, "grad_norm": 0.23969916810545017, "learning_rate": 0.00019983967362649814, "loss": 1.0125, "step": 1403 }, { "epoch": 0.13, "grad_norm": 0.2542524563052129, "learning_rate": 0.0001998387769385738, "loss": 1.0373, "step": 1404 }, { "epoch": 0.13, "grad_norm": 0.27487621829351494, "learning_rate": 0.0001998378777521213, "loss": 1.0952, "step": 1405 }, { "epoch": 0.13, "grad_norm": 0.2654518507826389, "learning_rate": 0.0001998369760671632, "loss": 1.1369, "step": 1406 }, { "epoch": 0.13, "grad_norm": 0.273289916929377, "learning_rate": 0.000199836071883722, "loss": 1.1703, "step": 1407 }, { "epoch": 0.13, "grad_norm": 0.26544162174771085, "learning_rate": 0.0001998351652018204, "loss": 1.1784, "step": 1408 }, { "epoch": 0.13, "grad_norm": 0.2746593440350384, "learning_rate": 0.000199834256021481, "loss": 1.0443, "step": 1409 }, { "epoch": 0.13, "grad_norm": 0.2501215693772626, "learning_rate": 0.00019983334434272662, "loss": 1.089, "step": 1410 }, { "epoch": 0.13, "grad_norm": 0.268614062421362, "learning_rate": 0.00019983243016558007, "loss": 0.9597, "step": 1411 }, { "epoch": 0.14, "grad_norm": 0.25030986160663105, "learning_rate": 0.00019983151349006417, "loss": 1.2056, "step": 1412 }, { "epoch": 0.14, "grad_norm": 0.2603381688146406, "learning_rate": 0.00019983059431620195, "loss": 1.1547, "step": 1413 }, { "epoch": 0.14, "grad_norm": 0.2421120755485211, "learning_rate": 0.0001998296726440163, "loss": 1.1084, "step": 1414 }, { "epoch": 0.14, "grad_norm": 0.2521346105354822, "learning_rate": 0.00019982874847353043, "loss": 1.0624, "step": 1415 }, { "epoch": 0.14, "grad_norm": 0.27862535658172966, "learning_rate": 0.00019982782180476733, "loss": 1.1186, "step": 1416 }, { "epoch": 0.14, "grad_norm": 0.2947914398618809, "learning_rate": 0.00019982689263775026, "loss": 1.0843, "step": 1417 }, { "epoch": 0.14, "grad_norm": 0.28536726878487906, "learning_rate": 0.0001998259609725025, "loss": 1.0653, "step": 1418 }, { "epoch": 0.14, "grad_norm": 0.2854261291390814, "learning_rate": 0.00019982502680904732, "loss": 1.1266, "step": 1419 }, { "epoch": 0.14, "grad_norm": 0.2788285385425596, "learning_rate": 0.0001998240901474081, "loss": 1.1592, "step": 1420 }, { "epoch": 0.14, "grad_norm": 0.2919295150525652, "learning_rate": 0.00019982315098760825, "loss": 1.23, "step": 1421 }, { "epoch": 0.14, "grad_norm": 0.2667438613342299, "learning_rate": 0.00019982220932967135, "loss": 1.1653, "step": 1422 }, { "epoch": 0.14, "grad_norm": 0.2937856653774384, "learning_rate": 0.00019982126517362092, "loss": 1.054, "step": 1423 }, { "epoch": 0.14, "grad_norm": 0.2787585191603891, "learning_rate": 0.0001998203185194806, "loss": 1.1195, "step": 1424 }, { "epoch": 0.14, "grad_norm": 0.2808724466906523, "learning_rate": 0.00019981936936727402, "loss": 1.1324, "step": 1425 }, { "epoch": 0.14, "grad_norm": 0.24401430651001996, "learning_rate": 0.00019981841771702505, "loss": 1.138, "step": 1426 }, { "epoch": 0.14, "grad_norm": 0.25174511987554116, "learning_rate": 0.00019981746356875744, "loss": 1.1906, "step": 1427 }, { "epoch": 0.14, "grad_norm": 0.2538503054933492, "learning_rate": 0.00019981650692249504, "loss": 1.1029, "step": 1428 }, { "epoch": 0.14, "grad_norm": 0.2764211181550846, "learning_rate": 0.00019981554777826185, "loss": 1.058, "step": 1429 }, { "epoch": 0.14, "grad_norm": 0.2952219050552689, "learning_rate": 0.00019981458613608182, "loss": 1.0941, "step": 1430 }, { "epoch": 0.14, "grad_norm": 0.2787064009838231, "learning_rate": 0.00019981362199597907, "loss": 1.1565, "step": 1431 }, { "epoch": 0.14, "grad_norm": 0.2737728015545912, "learning_rate": 0.00019981265535797766, "loss": 1.181, "step": 1432 }, { "epoch": 0.14, "grad_norm": 0.24759285916434062, "learning_rate": 0.00019981168622210184, "loss": 1.1008, "step": 1433 }, { "epoch": 0.14, "grad_norm": 0.25095588755500636, "learning_rate": 0.00019981071458837586, "loss": 1.1312, "step": 1434 }, { "epoch": 0.14, "grad_norm": 0.24482769279016886, "learning_rate": 0.00019980974045682399, "loss": 1.0652, "step": 1435 }, { "epoch": 0.14, "grad_norm": 0.3060859989141741, "learning_rate": 0.00019980876382747064, "loss": 1.157, "step": 1436 }, { "epoch": 0.14, "grad_norm": 0.2880153757669946, "learning_rate": 0.00019980778470034025, "loss": 1.2203, "step": 1437 }, { "epoch": 0.14, "grad_norm": 0.27454359894274216, "learning_rate": 0.00019980680307545733, "loss": 1.0434, "step": 1438 }, { "epoch": 0.14, "grad_norm": 0.2883237368727595, "learning_rate": 0.00019980581895284646, "loss": 1.1272, "step": 1439 }, { "epoch": 0.14, "grad_norm": 0.2703448143010634, "learning_rate": 0.0001998048323325322, "loss": 1.0698, "step": 1440 }, { "epoch": 0.14, "grad_norm": 0.2583337496657767, "learning_rate": 0.00019980384321453931, "loss": 1.0424, "step": 1441 }, { "epoch": 0.14, "grad_norm": 0.2698951572905667, "learning_rate": 0.00019980285159889251, "loss": 1.1694, "step": 1442 }, { "epoch": 0.14, "grad_norm": 0.3289970000039555, "learning_rate": 0.00019980185748561663, "loss": 1.1242, "step": 1443 }, { "epoch": 0.14, "grad_norm": 0.33181434848256175, "learning_rate": 0.00019980086087473655, "loss": 1.2397, "step": 1444 }, { "epoch": 0.14, "grad_norm": 0.3011135638992476, "learning_rate": 0.0001997998617662772, "loss": 1.1714, "step": 1445 }, { "epoch": 0.14, "grad_norm": 0.26023630385034113, "learning_rate": 0.0001997988601602636, "loss": 1.1072, "step": 1446 }, { "epoch": 0.14, "grad_norm": 0.27251940674998737, "learning_rate": 0.00019979785605672078, "loss": 1.0825, "step": 1447 }, { "epoch": 0.14, "grad_norm": 0.2554656785010738, "learning_rate": 0.0001997968494556739, "loss": 1.0752, "step": 1448 }, { "epoch": 0.14, "grad_norm": 0.23684498161731513, "learning_rate": 0.00019979584035714813, "loss": 1.06, "step": 1449 }, { "epoch": 0.14, "grad_norm": 0.31872148847175163, "learning_rate": 0.00019979482876116876, "loss": 1.1667, "step": 1450 }, { "epoch": 0.14, "grad_norm": 0.25697407458502153, "learning_rate": 0.00019979381466776107, "loss": 1.0822, "step": 1451 }, { "epoch": 0.14, "grad_norm": 0.2680749561732018, "learning_rate": 0.00019979279807695046, "loss": 1.1473, "step": 1452 }, { "epoch": 0.14, "grad_norm": 0.23738996733574236, "learning_rate": 0.00019979177898876233, "loss": 1.1218, "step": 1453 }, { "epoch": 0.14, "grad_norm": 0.2518660152482727, "learning_rate": 0.00019979075740322224, "loss": 1.1364, "step": 1454 }, { "epoch": 0.14, "grad_norm": 0.2843340075762548, "learning_rate": 0.00019978973332035574, "loss": 1.1052, "step": 1455 }, { "epoch": 0.14, "grad_norm": 0.23668848969056627, "learning_rate": 0.0001997887067401884, "loss": 1.1692, "step": 1456 }, { "epoch": 0.14, "grad_norm": 0.28502539435405605, "learning_rate": 0.000199787677662746, "loss": 1.2374, "step": 1457 }, { "epoch": 0.14, "grad_norm": 0.25655559740700473, "learning_rate": 0.00019978664608805423, "loss": 1.1088, "step": 1458 }, { "epoch": 0.14, "grad_norm": 0.2585840843019671, "learning_rate": 0.00019978561201613895, "loss": 1.1601, "step": 1459 }, { "epoch": 0.14, "grad_norm": 0.2550422041426665, "learning_rate": 0.00019978457544702602, "loss": 1.1033, "step": 1460 }, { "epoch": 0.14, "grad_norm": 0.2899278429933367, "learning_rate": 0.00019978353638074137, "loss": 1.1611, "step": 1461 }, { "epoch": 0.14, "grad_norm": 0.234358263645623, "learning_rate": 0.000199782494817311, "loss": 1.0151, "step": 1462 }, { "epoch": 0.14, "grad_norm": 0.24241424067059672, "learning_rate": 0.00019978145075676097, "loss": 1.1854, "step": 1463 }, { "epoch": 0.14, "grad_norm": 0.2685159364940143, "learning_rate": 0.00019978040419911744, "loss": 1.0691, "step": 1464 }, { "epoch": 0.14, "grad_norm": 0.31415333163030407, "learning_rate": 0.0001997793551444066, "loss": 1.0344, "step": 1465 }, { "epoch": 0.14, "grad_norm": 0.2366061685991993, "learning_rate": 0.00019977830359265472, "loss": 1.1004, "step": 1466 }, { "epoch": 0.14, "grad_norm": 0.2485341188969311, "learning_rate": 0.00019977724954388802, "loss": 1.1078, "step": 1467 }, { "epoch": 0.14, "grad_norm": 0.29387809184755664, "learning_rate": 0.00019977619299813297, "loss": 1.1114, "step": 1468 }, { "epoch": 0.14, "grad_norm": 0.2652962832837201, "learning_rate": 0.00019977513395541598, "loss": 1.1429, "step": 1469 }, { "epoch": 0.14, "grad_norm": 0.296485600586474, "learning_rate": 0.00019977407241576355, "loss": 1.1725, "step": 1470 }, { "epoch": 0.14, "grad_norm": 0.25206336596375595, "learning_rate": 0.00019977300837920227, "loss": 1.1611, "step": 1471 }, { "epoch": 0.14, "grad_norm": 0.2550564024652486, "learning_rate": 0.00019977194184575873, "loss": 1.1339, "step": 1472 }, { "epoch": 0.14, "grad_norm": 0.27675386051052386, "learning_rate": 0.00019977087281545966, "loss": 1.1179, "step": 1473 }, { "epoch": 0.14, "grad_norm": 0.2615226908183446, "learning_rate": 0.00019976980128833178, "loss": 1.0797, "step": 1474 }, { "epoch": 0.14, "grad_norm": 0.268113224944456, "learning_rate": 0.00019976872726440193, "loss": 1.1163, "step": 1475 }, { "epoch": 0.14, "grad_norm": 0.23912506420391394, "learning_rate": 0.00019976765074369697, "loss": 1.099, "step": 1476 }, { "epoch": 0.14, "grad_norm": 0.28513459887618126, "learning_rate": 0.00019976657172624383, "loss": 1.0811, "step": 1477 }, { "epoch": 0.14, "grad_norm": 0.28114711060770275, "learning_rate": 0.00019976549021206958, "loss": 1.2168, "step": 1478 }, { "epoch": 0.14, "grad_norm": 0.2572724456046117, "learning_rate": 0.0001997644062012012, "loss": 1.0752, "step": 1479 }, { "epoch": 0.14, "grad_norm": 0.3019482336468289, "learning_rate": 0.00019976331969366587, "loss": 1.1129, "step": 1480 }, { "epoch": 0.14, "grad_norm": 0.2566195486989078, "learning_rate": 0.00019976223068949076, "loss": 1.1825, "step": 1481 }, { "epoch": 0.14, "grad_norm": 0.27980667259100733, "learning_rate": 0.00019976113918870314, "loss": 1.1828, "step": 1482 }, { "epoch": 0.14, "grad_norm": 0.30150809201942436, "learning_rate": 0.0001997600451913303, "loss": 1.1247, "step": 1483 }, { "epoch": 0.14, "grad_norm": 0.2716498140574145, "learning_rate": 0.0001997589486973996, "loss": 1.1254, "step": 1484 }, { "epoch": 0.14, "grad_norm": 0.2625386839698671, "learning_rate": 0.00019975784970693855, "loss": 1.1081, "step": 1485 }, { "epoch": 0.14, "grad_norm": 0.29792002537853113, "learning_rate": 0.00019975674821997463, "loss": 1.1597, "step": 1486 }, { "epoch": 0.14, "grad_norm": 0.2301276867707596, "learning_rate": 0.00019975564423653538, "loss": 1.1342, "step": 1487 }, { "epoch": 0.14, "grad_norm": 0.27119741051691004, "learning_rate": 0.00019975453775664844, "loss": 1.0453, "step": 1488 }, { "epoch": 0.14, "grad_norm": 0.25640967867486597, "learning_rate": 0.0001997534287803415, "loss": 1.0959, "step": 1489 }, { "epoch": 0.14, "grad_norm": 0.2817881208330018, "learning_rate": 0.00019975231730764227, "loss": 1.1004, "step": 1490 }, { "epoch": 0.14, "grad_norm": 0.2632972390210799, "learning_rate": 0.00019975120333857866, "loss": 1.0682, "step": 1491 }, { "epoch": 0.14, "grad_norm": 0.256251122254461, "learning_rate": 0.0001997500868731785, "loss": 1.1663, "step": 1492 }, { "epoch": 0.14, "grad_norm": 0.26242859708220295, "learning_rate": 0.0001997489679114697, "loss": 1.0843, "step": 1493 }, { "epoch": 0.14, "grad_norm": 0.28009382446859793, "learning_rate": 0.0001997478464534803, "loss": 1.1744, "step": 1494 }, { "epoch": 0.14, "grad_norm": 0.2448192879010323, "learning_rate": 0.0001997467224992383, "loss": 1.1268, "step": 1495 }, { "epoch": 0.14, "grad_norm": 0.23428824139945728, "learning_rate": 0.00019974559604877195, "loss": 1.0997, "step": 1496 }, { "epoch": 0.14, "grad_norm": 0.2833166934983789, "learning_rate": 0.00019974446710210934, "loss": 1.0867, "step": 1497 }, { "epoch": 0.14, "grad_norm": 0.25022212744952455, "learning_rate": 0.00019974333565927878, "loss": 1.0903, "step": 1498 }, { "epoch": 0.14, "grad_norm": 0.26855972657496696, "learning_rate": 0.00019974220172030852, "loss": 1.0304, "step": 1499 }, { "epoch": 0.14, "grad_norm": 0.26473122846509034, "learning_rate": 0.000199741065285227, "loss": 1.0981, "step": 1500 }, { "epoch": 0.14, "grad_norm": 0.2915333904654244, "learning_rate": 0.00019973992635406265, "loss": 1.157, "step": 1501 }, { "epoch": 0.14, "grad_norm": 0.37217781009539613, "learning_rate": 0.00019973878492684393, "loss": 1.1401, "step": 1502 }, { "epoch": 0.14, "grad_norm": 0.2745430144825149, "learning_rate": 0.00019973764100359942, "loss": 1.1318, "step": 1503 }, { "epoch": 0.14, "grad_norm": 0.2755977808247509, "learning_rate": 0.0001997364945843578, "loss": 1.1609, "step": 1504 }, { "epoch": 0.14, "grad_norm": 0.2915733276431069, "learning_rate": 0.00019973534566914772, "loss": 1.086, "step": 1505 }, { "epoch": 0.14, "grad_norm": 0.2599783190286603, "learning_rate": 0.00019973419425799792, "loss": 1.1108, "step": 1506 }, { "epoch": 0.14, "grad_norm": 0.280753933923634, "learning_rate": 0.0001997330403509372, "loss": 1.147, "step": 1507 }, { "epoch": 0.14, "grad_norm": 0.27055165527737557, "learning_rate": 0.0001997318839479945, "loss": 1.0952, "step": 1508 }, { "epoch": 0.14, "grad_norm": 0.2491475388699138, "learning_rate": 0.00019973072504919875, "loss": 1.0893, "step": 1509 }, { "epoch": 0.14, "grad_norm": 0.22529007248240643, "learning_rate": 0.00019972956365457887, "loss": 0.9453, "step": 1510 }, { "epoch": 0.14, "grad_norm": 0.29669549846388793, "learning_rate": 0.000199728399764164, "loss": 1.2225, "step": 1511 }, { "epoch": 0.14, "grad_norm": 0.27716026577726227, "learning_rate": 0.00019972723337798327, "loss": 1.2158, "step": 1512 }, { "epoch": 0.14, "grad_norm": 0.2615727080933855, "learning_rate": 0.00019972606449606583, "loss": 1.1745, "step": 1513 }, { "epoch": 0.14, "grad_norm": 0.2613301650841691, "learning_rate": 0.00019972489311844097, "loss": 1.0105, "step": 1514 }, { "epoch": 0.14, "grad_norm": 0.2637350666912871, "learning_rate": 0.00019972371924513796, "loss": 1.2445, "step": 1515 }, { "epoch": 0.15, "grad_norm": 0.23442111345358183, "learning_rate": 0.0001997225428761862, "loss": 1.0897, "step": 1516 }, { "epoch": 0.15, "grad_norm": 0.2790398743404188, "learning_rate": 0.00019972136401161516, "loss": 1.0976, "step": 1517 }, { "epoch": 0.15, "grad_norm": 0.24161573374816125, "learning_rate": 0.00019972018265145428, "loss": 1.0778, "step": 1518 }, { "epoch": 0.15, "grad_norm": 0.2599798006860897, "learning_rate": 0.00019971899879573317, "loss": 1.0797, "step": 1519 }, { "epoch": 0.15, "grad_norm": 0.24106641987255334, "learning_rate": 0.00019971781244448145, "loss": 1.0863, "step": 1520 }, { "epoch": 0.15, "grad_norm": 0.28462310372801436, "learning_rate": 0.0001997166235977288, "loss": 1.2261, "step": 1521 }, { "epoch": 0.15, "grad_norm": 0.2715980154291507, "learning_rate": 0.00019971543225550498, "loss": 1.074, "step": 1522 }, { "epoch": 0.15, "grad_norm": 0.24961681655649778, "learning_rate": 0.0001997142384178398, "loss": 1.102, "step": 1523 }, { "epoch": 0.15, "grad_norm": 0.27808524258122086, "learning_rate": 0.00019971304208476313, "loss": 1.0456, "step": 1524 }, { "epoch": 0.15, "grad_norm": 0.272393710564596, "learning_rate": 0.0001997118432563049, "loss": 0.9979, "step": 1525 }, { "epoch": 0.15, "grad_norm": 0.2985225834683353, "learning_rate": 0.00019971064193249517, "loss": 1.1062, "step": 1526 }, { "epoch": 0.15, "grad_norm": 0.31751577230140715, "learning_rate": 0.0001997094381133639, "loss": 1.1574, "step": 1527 }, { "epoch": 0.15, "grad_norm": 0.2311208784177862, "learning_rate": 0.00019970823179894134, "loss": 1.0605, "step": 1528 }, { "epoch": 0.15, "grad_norm": 0.27395265358450693, "learning_rate": 0.00019970702298925756, "loss": 1.0394, "step": 1529 }, { "epoch": 0.15, "grad_norm": 0.31050969831593717, "learning_rate": 0.0001997058116843429, "loss": 1.21, "step": 1530 }, { "epoch": 0.15, "grad_norm": 0.29740261342037966, "learning_rate": 0.00019970459788422762, "loss": 1.0973, "step": 1531 }, { "epoch": 0.15, "grad_norm": 0.30710726934897853, "learning_rate": 0.00019970338158894213, "loss": 1.0899, "step": 1532 }, { "epoch": 0.15, "grad_norm": 0.38054688041895546, "learning_rate": 0.00019970216279851686, "loss": 1.0608, "step": 1533 }, { "epoch": 0.15, "grad_norm": 0.26727890820952926, "learning_rate": 0.0001997009415129823, "loss": 1.1018, "step": 1534 }, { "epoch": 0.15, "grad_norm": 0.29384471950665425, "learning_rate": 0.000199699717732369, "loss": 1.088, "step": 1535 }, { "epoch": 0.15, "grad_norm": 0.245541461264911, "learning_rate": 0.00019969849145670763, "loss": 1.0829, "step": 1536 }, { "epoch": 0.15, "grad_norm": 0.3031137108785239, "learning_rate": 0.0001996972626860288, "loss": 1.138, "step": 1537 }, { "epoch": 0.15, "grad_norm": 0.27477763946901834, "learning_rate": 0.0001996960314203634, "loss": 1.1612, "step": 1538 }, { "epoch": 0.15, "grad_norm": 0.2970092386217553, "learning_rate": 0.0001996947976597421, "loss": 1.0688, "step": 1539 }, { "epoch": 0.15, "grad_norm": 0.3258862801086761, "learning_rate": 0.00019969356140419584, "loss": 1.1302, "step": 1540 }, { "epoch": 0.15, "grad_norm": 0.2995257919569518, "learning_rate": 0.00019969232265375556, "loss": 1.0475, "step": 1541 }, { "epoch": 0.15, "grad_norm": 0.26050135512792294, "learning_rate": 0.00019969108140845224, "loss": 1.1356, "step": 1542 }, { "epoch": 0.15, "grad_norm": 0.258577882146825, "learning_rate": 0.00019968983766831695, "loss": 1.0545, "step": 1543 }, { "epoch": 0.15, "grad_norm": 0.2627007677252112, "learning_rate": 0.00019968859143338084, "loss": 1.1386, "step": 1544 }, { "epoch": 0.15, "grad_norm": 0.2633365267212909, "learning_rate": 0.00019968734270367505, "loss": 1.1784, "step": 1545 }, { "epoch": 0.15, "grad_norm": 0.28411120690910896, "learning_rate": 0.0001996860914792309, "loss": 1.0888, "step": 1546 }, { "epoch": 0.15, "grad_norm": 0.26646479947544477, "learning_rate": 0.00019968483776007962, "loss": 1.1659, "step": 1547 }, { "epoch": 0.15, "grad_norm": 0.24786146098955286, "learning_rate": 0.00019968358154625265, "loss": 1.2185, "step": 1548 }, { "epoch": 0.15, "grad_norm": 0.23607348527883257, "learning_rate": 0.0001996823228377814, "loss": 1.0735, "step": 1549 }, { "epoch": 0.15, "grad_norm": 0.2570468565891483, "learning_rate": 0.00019968106163469735, "loss": 1.0616, "step": 1550 }, { "epoch": 0.15, "grad_norm": 0.25852020457845143, "learning_rate": 0.00019967979793703212, "loss": 1.093, "step": 1551 }, { "epoch": 0.15, "grad_norm": 0.28278864970040046, "learning_rate": 0.00019967853174481727, "loss": 1.0388, "step": 1552 }, { "epoch": 0.15, "grad_norm": 0.2941535073545967, "learning_rate": 0.00019967726305808453, "loss": 1.0706, "step": 1553 }, { "epoch": 0.15, "grad_norm": 0.2622528773036849, "learning_rate": 0.00019967599187686562, "loss": 1.0883, "step": 1554 }, { "epoch": 0.15, "grad_norm": 0.27742024001398574, "learning_rate": 0.00019967471820119242, "loss": 1.0728, "step": 1555 }, { "epoch": 0.15, "grad_norm": 0.3052593236911406, "learning_rate": 0.00019967344203109671, "loss": 1.1789, "step": 1556 }, { "epoch": 0.15, "grad_norm": 0.28599434696605697, "learning_rate": 0.0001996721633666105, "loss": 1.1562, "step": 1557 }, { "epoch": 0.15, "grad_norm": 0.24494683600954692, "learning_rate": 0.00019967088220776573, "loss": 1.2048, "step": 1558 }, { "epoch": 0.15, "grad_norm": 0.25067138267095485, "learning_rate": 0.0001996695985545945, "loss": 1.1256, "step": 1559 }, { "epoch": 0.15, "grad_norm": 0.3027501173185624, "learning_rate": 0.00019966831240712893, "loss": 1.1089, "step": 1560 }, { "epoch": 0.15, "grad_norm": 0.29832312031178204, "learning_rate": 0.0001996670237654012, "loss": 1.1435, "step": 1561 }, { "epoch": 0.15, "grad_norm": 0.2602686000520101, "learning_rate": 0.00019966573262944357, "loss": 1.2332, "step": 1562 }, { "epoch": 0.15, "grad_norm": 0.2805057631189005, "learning_rate": 0.00019966443899928831, "loss": 1.0837, "step": 1563 }, { "epoch": 0.15, "grad_norm": 0.38166616945412846, "learning_rate": 0.00019966314287496787, "loss": 1.059, "step": 1564 }, { "epoch": 0.15, "grad_norm": 0.2764938914379305, "learning_rate": 0.00019966184425651464, "loss": 1.1139, "step": 1565 }, { "epoch": 0.15, "grad_norm": 0.24932842836394434, "learning_rate": 0.0001996605431439611, "loss": 1.1142, "step": 1566 }, { "epoch": 0.15, "grad_norm": 0.29196882597427254, "learning_rate": 0.00019965923953733987, "loss": 1.1607, "step": 1567 }, { "epoch": 0.15, "grad_norm": 0.2709530754699249, "learning_rate": 0.00019965793343668347, "loss": 1.0495, "step": 1568 }, { "epoch": 0.15, "grad_norm": 0.2598432982826659, "learning_rate": 0.0001996566248420247, "loss": 1.1288, "step": 1569 }, { "epoch": 0.15, "grad_norm": 0.46312766196550453, "learning_rate": 0.00019965531375339628, "loss": 1.1307, "step": 1570 }, { "epoch": 0.15, "grad_norm": 0.26262906846828266, "learning_rate": 0.00019965400017083097, "loss": 1.1543, "step": 1571 }, { "epoch": 0.15, "grad_norm": 0.25314149624310883, "learning_rate": 0.00019965268409436168, "loss": 1.0466, "step": 1572 }, { "epoch": 0.15, "grad_norm": 0.2687356408453828, "learning_rate": 0.00019965136552402136, "loss": 1.1159, "step": 1573 }, { "epoch": 0.15, "grad_norm": 0.2708428789609682, "learning_rate": 0.00019965004445984298, "loss": 1.055, "step": 1574 }, { "epoch": 0.15, "grad_norm": 0.3839155117397319, "learning_rate": 0.0001996487209018596, "loss": 1.0844, "step": 1575 }, { "epoch": 0.15, "grad_norm": 0.7734934544648255, "learning_rate": 0.00019964739485010436, "loss": 1.1704, "step": 1576 }, { "epoch": 0.15, "grad_norm": 0.2985808342595769, "learning_rate": 0.00019964606630461042, "loss": 1.1233, "step": 1577 }, { "epoch": 0.15, "grad_norm": 0.28199357321934904, "learning_rate": 0.00019964473526541107, "loss": 1.1306, "step": 1578 }, { "epoch": 0.15, "grad_norm": 0.2708719269403245, "learning_rate": 0.0001996434017325396, "loss": 1.2283, "step": 1579 }, { "epoch": 0.15, "grad_norm": 0.2792985547025937, "learning_rate": 0.00019964206570602936, "loss": 1.1385, "step": 1580 }, { "epoch": 0.15, "grad_norm": 0.23415050247231725, "learning_rate": 0.0001996407271859138, "loss": 1.0389, "step": 1581 }, { "epoch": 0.15, "grad_norm": 0.2740160527199919, "learning_rate": 0.00019963938617222643, "loss": 1.1243, "step": 1582 }, { "epoch": 0.15, "grad_norm": 0.27399387661110913, "learning_rate": 0.0001996380426650008, "loss": 1.1087, "step": 1583 }, { "epoch": 0.15, "grad_norm": 0.2692759658009343, "learning_rate": 0.0001996366966642705, "loss": 1.1163, "step": 1584 }, { "epoch": 0.15, "grad_norm": 0.9607464989916183, "learning_rate": 0.0001996353481700693, "loss": 1.0442, "step": 1585 }, { "epoch": 0.15, "grad_norm": 0.24459484067831805, "learning_rate": 0.00019963399718243084, "loss": 1.0841, "step": 1586 }, { "epoch": 0.15, "grad_norm": 0.2762521737160385, "learning_rate": 0.00019963264370138903, "loss": 1.1496, "step": 1587 }, { "epoch": 0.15, "grad_norm": 0.25933505174178323, "learning_rate": 0.0001996312877269777, "loss": 1.2522, "step": 1588 }, { "epoch": 0.15, "grad_norm": 0.27189774149978774, "learning_rate": 0.00019962992925923073, "loss": 1.1413, "step": 1589 }, { "epoch": 0.15, "grad_norm": 3.576402306240423, "learning_rate": 0.00019962856829818223, "loss": 1.1702, "step": 1590 }, { "epoch": 0.15, "grad_norm": 0.28151915869781446, "learning_rate": 0.00019962720484386614, "loss": 1.1608, "step": 1591 }, { "epoch": 0.15, "grad_norm": 3.487170407807799, "learning_rate": 0.00019962583889631663, "loss": 1.1038, "step": 1592 }, { "epoch": 0.15, "grad_norm": 0.25611542436265444, "learning_rate": 0.00019962447045556792, "loss": 1.0629, "step": 1593 }, { "epoch": 0.15, "grad_norm": 0.24414987846237798, "learning_rate": 0.00019962309952165425, "loss": 1.0264, "step": 1594 }, { "epoch": 0.15, "grad_norm": 0.26011148045017146, "learning_rate": 0.00019962172609460982, "loss": 1.0993, "step": 1595 }, { "epoch": 0.15, "grad_norm": 0.2618849192631391, "learning_rate": 0.00019962035017446916, "loss": 1.1054, "step": 1596 }, { "epoch": 0.15, "grad_norm": 0.24456979964789494, "learning_rate": 0.0001996189717612666, "loss": 1.1605, "step": 1597 }, { "epoch": 0.15, "grad_norm": 0.2832924711395636, "learning_rate": 0.00019961759085503666, "loss": 1.1245, "step": 1598 }, { "epoch": 0.15, "grad_norm": 0.24882178646084718, "learning_rate": 0.00019961620745581387, "loss": 1.0725, "step": 1599 }, { "epoch": 0.15, "grad_norm": 0.22925300602806323, "learning_rate": 0.00019961482156363296, "loss": 1.0953, "step": 1600 }, { "epoch": 0.15, "grad_norm": 0.2531531358177607, "learning_rate": 0.00019961343317852846, "loss": 1.0204, "step": 1601 }, { "epoch": 0.15, "grad_norm": 0.2738032960535575, "learning_rate": 0.00019961204230053525, "loss": 1.1092, "step": 1602 }, { "epoch": 0.15, "grad_norm": 0.2731400838916497, "learning_rate": 0.00019961064892968806, "loss": 1.1173, "step": 1603 }, { "epoch": 0.15, "grad_norm": 0.2493741546517925, "learning_rate": 0.00019960925306602176, "loss": 1.0769, "step": 1604 }, { "epoch": 0.15, "grad_norm": 0.25050950474007155, "learning_rate": 0.0001996078547095713, "loss": 1.133, "step": 1605 }, { "epoch": 0.15, "grad_norm": 0.29817105037980673, "learning_rate": 0.0001996064538603717, "loss": 1.1855, "step": 1606 }, { "epoch": 0.15, "grad_norm": 0.28263632425868207, "learning_rate": 0.00019960505051845796, "loss": 1.136, "step": 1607 }, { "epoch": 0.15, "grad_norm": 0.23133396852491225, "learning_rate": 0.00019960364468386526, "loss": 0.9476, "step": 1608 }, { "epoch": 0.15, "grad_norm": 0.2775442108767544, "learning_rate": 0.00019960223635662874, "loss": 1.1606, "step": 1609 }, { "epoch": 0.15, "grad_norm": 0.2805090162171342, "learning_rate": 0.00019960082553678365, "loss": 1.143, "step": 1610 }, { "epoch": 0.15, "grad_norm": 0.2537997461349602, "learning_rate": 0.0001995994122243653, "loss": 1.1834, "step": 1611 }, { "epoch": 0.15, "grad_norm": 0.32560983069595756, "learning_rate": 0.00019959799641940907, "loss": 0.9919, "step": 1612 }, { "epoch": 0.15, "grad_norm": 0.2777624913037504, "learning_rate": 0.0001995965781219504, "loss": 1.1872, "step": 1613 }, { "epoch": 0.15, "grad_norm": 0.2758857420110973, "learning_rate": 0.00019959515733202477, "loss": 1.137, "step": 1614 }, { "epoch": 0.15, "grad_norm": 0.256755743287946, "learning_rate": 0.0001995937340496677, "loss": 1.0922, "step": 1615 }, { "epoch": 0.15, "grad_norm": 0.2517079524462437, "learning_rate": 0.00019959230827491488, "loss": 1.0859, "step": 1616 }, { "epoch": 0.15, "grad_norm": 0.2724013994730525, "learning_rate": 0.00019959088000780193, "loss": 1.0288, "step": 1617 }, { "epoch": 0.15, "grad_norm": 0.3008600441450719, "learning_rate": 0.00019958944924836463, "loss": 1.044, "step": 1618 }, { "epoch": 0.15, "grad_norm": 0.29350285017468836, "learning_rate": 0.00019958801599663877, "loss": 1.1309, "step": 1619 }, { "epoch": 0.15, "grad_norm": 0.26935905493782025, "learning_rate": 0.0001995865802526602, "loss": 1.1497, "step": 1620 }, { "epoch": 0.16, "grad_norm": 0.2619749727571851, "learning_rate": 0.0001995851420164649, "loss": 1.226, "step": 1621 }, { "epoch": 0.16, "grad_norm": 0.2566559504175784, "learning_rate": 0.00019958370128808883, "loss": 1.126, "step": 1622 }, { "epoch": 0.16, "grad_norm": 0.2950868154557528, "learning_rate": 0.00019958225806756806, "loss": 1.1165, "step": 1623 }, { "epoch": 0.16, "grad_norm": 0.262703858238673, "learning_rate": 0.00019958081235493867, "loss": 1.1535, "step": 1624 }, { "epoch": 0.16, "grad_norm": 0.27043560750628914, "learning_rate": 0.00019957936415023687, "loss": 1.1192, "step": 1625 }, { "epoch": 0.16, "grad_norm": 0.263242304281921, "learning_rate": 0.00019957791345349892, "loss": 1.0326, "step": 1626 }, { "epoch": 0.16, "grad_norm": 0.2815449484727248, "learning_rate": 0.0001995764602647611, "loss": 1.0835, "step": 1627 }, { "epoch": 0.16, "grad_norm": 0.2868280654211211, "learning_rate": 0.00019957500458405976, "loss": 1.1983, "step": 1628 }, { "epoch": 0.16, "grad_norm": 0.31775561559603943, "learning_rate": 0.00019957354641143136, "loss": 1.163, "step": 1629 }, { "epoch": 0.16, "grad_norm": 0.2962143104484358, "learning_rate": 0.00019957208574691238, "loss": 1.2085, "step": 1630 }, { "epoch": 0.16, "grad_norm": 0.33134737467969283, "learning_rate": 0.0001995706225905394, "loss": 1.1736, "step": 1631 }, { "epoch": 0.16, "grad_norm": 0.2973914160477879, "learning_rate": 0.00019956915694234895, "loss": 1.1877, "step": 1632 }, { "epoch": 0.16, "grad_norm": 0.23797305266943983, "learning_rate": 0.00019956768880237781, "loss": 1.13, "step": 1633 }, { "epoch": 0.16, "grad_norm": 0.28430135940967705, "learning_rate": 0.0001995662181706627, "loss": 1.1628, "step": 1634 }, { "epoch": 0.16, "grad_norm": 0.2606274355294148, "learning_rate": 0.00019956474504724038, "loss": 1.1124, "step": 1635 }, { "epoch": 0.16, "grad_norm": 0.26540643513755174, "learning_rate": 0.00019956326943214775, "loss": 1.0509, "step": 1636 }, { "epoch": 0.16, "grad_norm": 0.25782459518811457, "learning_rate": 0.00019956179132542173, "loss": 1.0932, "step": 1637 }, { "epoch": 0.16, "grad_norm": 0.24697183220142635, "learning_rate": 0.00019956031072709932, "loss": 1.2274, "step": 1638 }, { "epoch": 0.16, "grad_norm": 0.2609501474414402, "learning_rate": 0.0001995588276372175, "loss": 1.0552, "step": 1639 }, { "epoch": 0.16, "grad_norm": 0.2649049635321031, "learning_rate": 0.00019955734205581352, "loss": 0.9704, "step": 1640 }, { "epoch": 0.16, "grad_norm": 0.2593017975988553, "learning_rate": 0.00019955585398292447, "loss": 1.1551, "step": 1641 }, { "epoch": 0.16, "grad_norm": 0.25956189155243276, "learning_rate": 0.0001995543634185876, "loss": 1.1989, "step": 1642 }, { "epoch": 0.16, "grad_norm": 0.2560637809135954, "learning_rate": 0.0001995528703628402, "loss": 1.1312, "step": 1643 }, { "epoch": 0.16, "grad_norm": 0.27697911347482446, "learning_rate": 0.00019955137481571968, "loss": 1.2054, "step": 1644 }, { "epoch": 0.16, "grad_norm": 0.24778928635339068, "learning_rate": 0.00019954987677726343, "loss": 1.1358, "step": 1645 }, { "epoch": 0.16, "grad_norm": 0.26610781310481363, "learning_rate": 0.00019954837624750895, "loss": 1.1007, "step": 1646 }, { "epoch": 0.16, "grad_norm": 0.2799296986376897, "learning_rate": 0.0001995468732264938, "loss": 1.0621, "step": 1647 }, { "epoch": 0.16, "grad_norm": 0.26342475793330683, "learning_rate": 0.00019954536771425556, "loss": 1.1325, "step": 1648 }, { "epoch": 0.16, "grad_norm": 0.2809166549010256, "learning_rate": 0.00019954385971083193, "loss": 1.0778, "step": 1649 }, { "epoch": 0.16, "grad_norm": 0.28395265589643537, "learning_rate": 0.00019954234921626068, "loss": 0.9792, "step": 1650 }, { "epoch": 0.16, "grad_norm": 0.3037040271080715, "learning_rate": 0.00019954083623057955, "loss": 1.1754, "step": 1651 }, { "epoch": 0.16, "grad_norm": 0.23645934008105318, "learning_rate": 0.00019953932075382646, "loss": 1.1307, "step": 1652 }, { "epoch": 0.16, "grad_norm": 0.2723890058669645, "learning_rate": 0.00019953780278603932, "loss": 1.1161, "step": 1653 }, { "epoch": 0.16, "grad_norm": 0.2607964386960627, "learning_rate": 0.00019953628232725608, "loss": 1.1741, "step": 1654 }, { "epoch": 0.16, "grad_norm": 0.27487552426540823, "learning_rate": 0.0001995347593775148, "loss": 1.131, "step": 1655 }, { "epoch": 0.16, "grad_norm": 0.24819637701868438, "learning_rate": 0.00019953323393685367, "loss": 1.1246, "step": 1656 }, { "epoch": 0.16, "grad_norm": 0.2880503601951672, "learning_rate": 0.00019953170600531074, "loss": 1.1414, "step": 1657 }, { "epoch": 0.16, "grad_norm": 0.2661262998136285, "learning_rate": 0.00019953017558292438, "loss": 1.1857, "step": 1658 }, { "epoch": 0.16, "grad_norm": 0.2660986350757843, "learning_rate": 0.00019952864266973278, "loss": 1.1092, "step": 1659 }, { "epoch": 0.16, "grad_norm": 0.2713659116488214, "learning_rate": 0.00019952710726577435, "loss": 1.0772, "step": 1660 }, { "epoch": 0.16, "grad_norm": 0.29223182156019084, "learning_rate": 0.00019952556937108753, "loss": 1.0789, "step": 1661 }, { "epoch": 0.16, "grad_norm": 0.2582755115758311, "learning_rate": 0.00019952402898571077, "loss": 1.1875, "step": 1662 }, { "epoch": 0.16, "grad_norm": 0.2654852860667591, "learning_rate": 0.00019952248610968264, "loss": 1.0323, "step": 1663 }, { "epoch": 0.16, "grad_norm": 0.2935124336052021, "learning_rate": 0.00019952094074304175, "loss": 0.9859, "step": 1664 }, { "epoch": 0.16, "grad_norm": 0.2546178333136953, "learning_rate": 0.00019951939288582676, "loss": 1.0756, "step": 1665 }, { "epoch": 0.16, "grad_norm": 0.3002327247319587, "learning_rate": 0.0001995178425380764, "loss": 1.1356, "step": 1666 }, { "epoch": 0.16, "grad_norm": 0.2792232404834516, "learning_rate": 0.00019951628969982953, "loss": 1.1251, "step": 1667 }, { "epoch": 0.16, "grad_norm": 0.28042254510601033, "learning_rate": 0.00019951473437112495, "loss": 1.0406, "step": 1668 }, { "epoch": 0.16, "grad_norm": 0.2869293910747367, "learning_rate": 0.0001995131765520016, "loss": 1.0321, "step": 1669 }, { "epoch": 0.16, "grad_norm": 0.26731773733638436, "learning_rate": 0.00019951161624249844, "loss": 1.1865, "step": 1670 }, { "epoch": 0.16, "grad_norm": 0.26494146088310505, "learning_rate": 0.00019951005344265462, "loss": 1.105, "step": 1671 }, { "epoch": 0.16, "grad_norm": 0.26633126397973417, "learning_rate": 0.0001995084881525091, "loss": 1.0366, "step": 1672 }, { "epoch": 0.16, "grad_norm": 0.27563381490550426, "learning_rate": 0.00019950692037210113, "loss": 1.1346, "step": 1673 }, { "epoch": 0.16, "grad_norm": 0.2808572907767721, "learning_rate": 0.00019950535010146994, "loss": 1.1304, "step": 1674 }, { "epoch": 0.16, "grad_norm": 0.249082820217972, "learning_rate": 0.00019950377734065486, "loss": 1.1375, "step": 1675 }, { "epoch": 0.16, "grad_norm": 0.242030163260417, "learning_rate": 0.00019950220208969519, "loss": 1.0647, "step": 1676 }, { "epoch": 0.16, "grad_norm": 0.23873021173771233, "learning_rate": 0.00019950062434863038, "loss": 1.1427, "step": 1677 }, { "epoch": 0.16, "grad_norm": 0.2526857334515431, "learning_rate": 0.00019949904411749995, "loss": 1.0652, "step": 1678 }, { "epoch": 0.16, "grad_norm": 0.22005647549892257, "learning_rate": 0.00019949746139634336, "loss": 1.2141, "step": 1679 }, { "epoch": 0.16, "grad_norm": 0.29830008364083893, "learning_rate": 0.0001994958761852003, "loss": 0.9724, "step": 1680 }, { "epoch": 0.16, "grad_norm": 0.2336810810860809, "learning_rate": 0.00019949428848411036, "loss": 1.0718, "step": 1681 }, { "epoch": 0.16, "grad_norm": 0.2596500357999934, "learning_rate": 0.00019949269829311336, "loss": 1.1449, "step": 1682 }, { "epoch": 0.16, "grad_norm": 0.28287000384537997, "learning_rate": 0.00019949110561224905, "loss": 1.1513, "step": 1683 }, { "epoch": 0.16, "grad_norm": 0.27286503590527955, "learning_rate": 0.00019948951044155728, "loss": 1.095, "step": 1684 }, { "epoch": 0.16, "grad_norm": 0.26619367233558344, "learning_rate": 0.000199487912781078, "loss": 1.0861, "step": 1685 }, { "epoch": 0.16, "grad_norm": 0.29588681836665753, "learning_rate": 0.0001994863126308512, "loss": 1.0361, "step": 1686 }, { "epoch": 0.16, "grad_norm": 0.27036346205815504, "learning_rate": 0.00019948470999091685, "loss": 1.1066, "step": 1687 }, { "epoch": 0.16, "grad_norm": 0.24182695997990206, "learning_rate": 0.00019948310486131513, "loss": 0.9643, "step": 1688 }, { "epoch": 0.16, "grad_norm": 0.26214221449152814, "learning_rate": 0.0001994814972420862, "loss": 1.1246, "step": 1689 }, { "epoch": 0.16, "grad_norm": 0.2804122217034823, "learning_rate": 0.0001994798871332703, "loss": 1.1795, "step": 1690 }, { "epoch": 0.16, "grad_norm": 0.2594172377288309, "learning_rate": 0.00019947827453490767, "loss": 1.1388, "step": 1691 }, { "epoch": 0.16, "grad_norm": 0.2893954998932536, "learning_rate": 0.0001994766594470387, "loss": 1.0079, "step": 1692 }, { "epoch": 0.16, "grad_norm": 0.2936972888903826, "learning_rate": 0.0001994750418697038, "loss": 1.1258, "step": 1693 }, { "epoch": 0.16, "grad_norm": 0.26900132587121695, "learning_rate": 0.00019947342180294346, "loss": 1.2546, "step": 1694 }, { "epoch": 0.16, "grad_norm": 0.26577176120374074, "learning_rate": 0.00019947179924679825, "loss": 1.1571, "step": 1695 }, { "epoch": 0.16, "grad_norm": 0.2734445337941885, "learning_rate": 0.00019947017420130872, "loss": 1.0859, "step": 1696 }, { "epoch": 0.16, "grad_norm": 0.24000154406623206, "learning_rate": 0.0001994685466665156, "loss": 1.1031, "step": 1697 }, { "epoch": 0.16, "grad_norm": 0.22779641522136282, "learning_rate": 0.00019946691664245956, "loss": 1.0854, "step": 1698 }, { "epoch": 0.16, "grad_norm": 0.2479814495549998, "learning_rate": 0.0001994652841291814, "loss": 1.1121, "step": 1699 }, { "epoch": 0.16, "grad_norm": 0.250268222311568, "learning_rate": 0.00019946364912672203, "loss": 1.0779, "step": 1700 }, { "epoch": 0.16, "grad_norm": 0.24912184436362922, "learning_rate": 0.00019946201163512233, "loss": 1.0067, "step": 1701 }, { "epoch": 0.16, "grad_norm": 0.3207294751787716, "learning_rate": 0.00019946037165442327, "loss": 1.303, "step": 1702 }, { "epoch": 0.16, "grad_norm": 0.28907607252287926, "learning_rate": 0.0001994587291846659, "loss": 1.1877, "step": 1703 }, { "epoch": 0.16, "grad_norm": 0.25848051240176273, "learning_rate": 0.0001994570842258913, "loss": 1.0081, "step": 1704 }, { "epoch": 0.16, "grad_norm": 0.2756315421016595, "learning_rate": 0.00019945543677814067, "loss": 1.0329, "step": 1705 }, { "epoch": 0.16, "grad_norm": 0.2505304319664102, "learning_rate": 0.00019945378684145526, "loss": 1.1502, "step": 1706 }, { "epoch": 0.16, "grad_norm": 0.3140138337651653, "learning_rate": 0.00019945213441587633, "loss": 1.108, "step": 1707 }, { "epoch": 0.16, "grad_norm": 0.2650198799790111, "learning_rate": 0.0001994504795014452, "loss": 1.0843, "step": 1708 }, { "epoch": 0.16, "grad_norm": 0.27741715785219834, "learning_rate": 0.00019944882209820333, "loss": 1.0471, "step": 1709 }, { "epoch": 0.16, "grad_norm": 0.28688576394081394, "learning_rate": 0.0001994471622061922, "loss": 1.0745, "step": 1710 }, { "epoch": 0.16, "grad_norm": 0.3160625561308828, "learning_rate": 0.0001994454998254533, "loss": 1.0412, "step": 1711 }, { "epoch": 0.16, "grad_norm": 0.29390242181574067, "learning_rate": 0.0001994438349560283, "loss": 1.0297, "step": 1712 }, { "epoch": 0.16, "grad_norm": 0.2662667067917202, "learning_rate": 0.00019944216759795885, "loss": 1.0189, "step": 1713 }, { "epoch": 0.16, "grad_norm": 0.2652127225373576, "learning_rate": 0.00019944049775128661, "loss": 1.0433, "step": 1714 }, { "epoch": 0.16, "grad_norm": 0.28198238645107127, "learning_rate": 0.00019943882541605343, "loss": 1.1984, "step": 1715 }, { "epoch": 0.16, "grad_norm": 0.2930815699661024, "learning_rate": 0.00019943715059230117, "loss": 1.0741, "step": 1716 }, { "epoch": 0.16, "grad_norm": 0.32027322958606264, "learning_rate": 0.0001994354732800717, "loss": 1.097, "step": 1717 }, { "epoch": 0.16, "grad_norm": 0.23663660962171493, "learning_rate": 0.00019943379347940704, "loss": 1.0879, "step": 1718 }, { "epoch": 0.16, "grad_norm": 0.26819927151645284, "learning_rate": 0.0001994321111903492, "loss": 1.1009, "step": 1719 }, { "epoch": 0.16, "grad_norm": 0.23575422949387753, "learning_rate": 0.00019943042641294028, "loss": 1.1155, "step": 1720 }, { "epoch": 0.16, "grad_norm": 0.23615429167267799, "learning_rate": 0.00019942873914722243, "loss": 1.1978, "step": 1721 }, { "epoch": 0.16, "grad_norm": 0.23075710530915816, "learning_rate": 0.00019942704939323794, "loss": 1.1802, "step": 1722 }, { "epoch": 0.16, "grad_norm": 0.26619797669604706, "learning_rate": 0.00019942535715102903, "loss": 1.2049, "step": 1723 }, { "epoch": 0.16, "grad_norm": 0.2766557794492051, "learning_rate": 0.00019942366242063807, "loss": 1.1062, "step": 1724 }, { "epoch": 0.17, "grad_norm": 0.2650480870677421, "learning_rate": 0.00019942196520210748, "loss": 1.2134, "step": 1725 }, { "epoch": 0.17, "grad_norm": 0.25567802198203454, "learning_rate": 0.00019942026549547973, "loss": 1.1747, "step": 1726 }, { "epoch": 0.17, "grad_norm": 0.27010898900806307, "learning_rate": 0.00019941856330079732, "loss": 1.2341, "step": 1727 }, { "epoch": 0.17, "grad_norm": 0.2672243888343421, "learning_rate": 0.0001994168586181029, "loss": 1.038, "step": 1728 }, { "epoch": 0.17, "grad_norm": 0.27037675730748634, "learning_rate": 0.00019941515144743913, "loss": 1.1484, "step": 1729 }, { "epoch": 0.17, "grad_norm": 0.27423511003272355, "learning_rate": 0.00019941344178884868, "loss": 1.1925, "step": 1730 }, { "epoch": 0.17, "grad_norm": 0.25846818596543564, "learning_rate": 0.0001994117296423744, "loss": 1.0063, "step": 1731 }, { "epoch": 0.17, "grad_norm": 0.2686589532404364, "learning_rate": 0.0001994100150080591, "loss": 1.0285, "step": 1732 }, { "epoch": 0.17, "grad_norm": 0.2766088906498271, "learning_rate": 0.00019940829788594569, "loss": 1.0765, "step": 1733 }, { "epoch": 0.17, "grad_norm": 0.26339315358801624, "learning_rate": 0.00019940657827607715, "loss": 1.1692, "step": 1734 }, { "epoch": 0.17, "grad_norm": 0.2603560134384319, "learning_rate": 0.0001994048561784965, "loss": 1.0406, "step": 1735 }, { "epoch": 0.17, "grad_norm": 0.2904525258177709, "learning_rate": 0.0001994031315932469, "loss": 0.9866, "step": 1736 }, { "epoch": 0.17, "grad_norm": 0.254967597751732, "learning_rate": 0.00019940140452037142, "loss": 1.1711, "step": 1737 }, { "epoch": 0.17, "grad_norm": 0.2587539173182572, "learning_rate": 0.00019939967495991332, "loss": 1.1377, "step": 1738 }, { "epoch": 0.17, "grad_norm": 0.26239329017748414, "learning_rate": 0.0001993979429119159, "loss": 1.0846, "step": 1739 }, { "epoch": 0.17, "grad_norm": 0.2641030306092387, "learning_rate": 0.00019939620837642247, "loss": 1.1515, "step": 1740 }, { "epoch": 0.17, "grad_norm": 0.29629448536301656, "learning_rate": 0.00019939447135347647, "loss": 1.1464, "step": 1741 }, { "epoch": 0.17, "grad_norm": 0.259586106165695, "learning_rate": 0.00019939273184312137, "loss": 1.0899, "step": 1742 }, { "epoch": 0.17, "grad_norm": 0.26296382387837713, "learning_rate": 0.0001993909898454007, "loss": 1.0482, "step": 1743 }, { "epoch": 0.17, "grad_norm": 0.28051523053769833, "learning_rate": 0.000199389245360358, "loss": 1.1547, "step": 1744 }, { "epoch": 0.17, "grad_norm": 0.2671245457129155, "learning_rate": 0.00019938749838803696, "loss": 1.0592, "step": 1745 }, { "epoch": 0.17, "grad_norm": 0.3181482392262407, "learning_rate": 0.00019938574892848135, "loss": 1.0635, "step": 1746 }, { "epoch": 0.17, "grad_norm": 0.2899294422265618, "learning_rate": 0.0001993839969817349, "loss": 1.0768, "step": 1747 }, { "epoch": 0.17, "grad_norm": 0.25002771184882505, "learning_rate": 0.00019938224254784147, "loss": 1.1619, "step": 1748 }, { "epoch": 0.17, "grad_norm": 0.2568140049401059, "learning_rate": 0.00019938048562684495, "loss": 1.0229, "step": 1749 }, { "epoch": 0.17, "grad_norm": 0.36292519666598, "learning_rate": 0.00019937872621878934, "loss": 1.1794, "step": 1750 }, { "epoch": 0.17, "grad_norm": 0.2932029976728667, "learning_rate": 0.0001993769643237186, "loss": 1.1902, "step": 1751 }, { "epoch": 0.17, "grad_norm": 0.2771533942225469, "learning_rate": 0.00019937519994167694, "loss": 1.0731, "step": 1752 }, { "epoch": 0.17, "grad_norm": 0.30070233491956233, "learning_rate": 0.00019937343307270842, "loss": 1.1186, "step": 1753 }, { "epoch": 0.17, "grad_norm": 0.23520740536849988, "learning_rate": 0.00019937166371685727, "loss": 1.0768, "step": 1754 }, { "epoch": 0.17, "grad_norm": 0.2689615596060478, "learning_rate": 0.0001993698918741678, "loss": 1.0889, "step": 1755 }, { "epoch": 0.17, "grad_norm": 0.2667096465377454, "learning_rate": 0.0001993681175446843, "loss": 1.1273, "step": 1756 }, { "epoch": 0.17, "grad_norm": 0.2774024784883662, "learning_rate": 0.00019936634072845126, "loss": 1.0687, "step": 1757 }, { "epoch": 0.17, "grad_norm": 0.2948555113393165, "learning_rate": 0.00019936456142551306, "loss": 1.1369, "step": 1758 }, { "epoch": 0.17, "grad_norm": 0.2783762471401322, "learning_rate": 0.00019936277963591428, "loss": 1.1218, "step": 1759 }, { "epoch": 0.17, "grad_norm": 0.24719261245618435, "learning_rate": 0.00019936099535969946, "loss": 1.1024, "step": 1760 }, { "epoch": 0.17, "grad_norm": 0.3182614470412637, "learning_rate": 0.00019935920859691332, "loss": 1.1849, "step": 1761 }, { "epoch": 0.17, "grad_norm": 0.23710818113640875, "learning_rate": 0.00019935741934760053, "loss": 1.0466, "step": 1762 }, { "epoch": 0.17, "grad_norm": 0.2638247973270966, "learning_rate": 0.00019935562761180586, "loss": 1.046, "step": 1763 }, { "epoch": 0.17, "grad_norm": 0.29051740614485994, "learning_rate": 0.0001993538333895742, "loss": 1.0124, "step": 1764 }, { "epoch": 0.17, "grad_norm": 0.344576484247007, "learning_rate": 0.0001993520366809504, "loss": 1.0553, "step": 1765 }, { "epoch": 0.17, "grad_norm": 0.2734046389788186, "learning_rate": 0.00019935023748597942, "loss": 1.1175, "step": 1766 }, { "epoch": 0.17, "grad_norm": 0.2658344263426795, "learning_rate": 0.00019934843580470633, "loss": 1.1288, "step": 1767 }, { "epoch": 0.17, "grad_norm": 0.24386557141888807, "learning_rate": 0.0001993466316371762, "loss": 1.0556, "step": 1768 }, { "epoch": 0.17, "grad_norm": 0.2903532438407926, "learning_rate": 0.00019934482498343417, "loss": 1.1214, "step": 1769 }, { "epoch": 0.17, "grad_norm": 0.23273429637205256, "learning_rate": 0.00019934301584352543, "loss": 1.1819, "step": 1770 }, { "epoch": 0.17, "grad_norm": 0.26436150903465644, "learning_rate": 0.0001993412042174953, "loss": 1.2129, "step": 1771 }, { "epoch": 0.17, "grad_norm": 0.272699055168821, "learning_rate": 0.00019933939010538914, "loss": 1.0845, "step": 1772 }, { "epoch": 0.17, "grad_norm": 0.24313514050010182, "learning_rate": 0.00019933757350725227, "loss": 1.0947, "step": 1773 }, { "epoch": 0.17, "grad_norm": 0.2743855476662608, "learning_rate": 0.00019933575442313022, "loss": 1.0721, "step": 1774 }, { "epoch": 0.17, "grad_norm": 0.27484803850628725, "learning_rate": 0.00019933393285306847, "loss": 1.086, "step": 1775 }, { "epoch": 0.17, "grad_norm": 0.2862674448464463, "learning_rate": 0.0001993321087971126, "loss": 1.1687, "step": 1776 }, { "epoch": 0.17, "grad_norm": 0.2654433158884731, "learning_rate": 0.00019933028225530832, "loss": 1.1524, "step": 1777 }, { "epoch": 0.17, "grad_norm": 0.28570985555687595, "learning_rate": 0.00019932845322770127, "loss": 1.1032, "step": 1778 }, { "epoch": 0.17, "grad_norm": 0.2398384669725776, "learning_rate": 0.00019932662171433726, "loss": 1.1805, "step": 1779 }, { "epoch": 0.17, "grad_norm": 0.27132648388882097, "learning_rate": 0.00019932478771526212, "loss": 1.1706, "step": 1780 }, { "epoch": 0.17, "grad_norm": 0.2490907055488986, "learning_rate": 0.00019932295123052175, "loss": 1.0303, "step": 1781 }, { "epoch": 0.17, "grad_norm": 0.2483810784888273, "learning_rate": 0.0001993211122601621, "loss": 1.183, "step": 1782 }, { "epoch": 0.17, "grad_norm": 0.23864167216181773, "learning_rate": 0.00019931927080422921, "loss": 1.0438, "step": 1783 }, { "epoch": 0.17, "grad_norm": 0.27530929268313675, "learning_rate": 0.0001993174268627691, "loss": 1.0515, "step": 1784 }, { "epoch": 0.17, "grad_norm": 0.24646180451212257, "learning_rate": 0.00019931558043582802, "loss": 1.1064, "step": 1785 }, { "epoch": 0.17, "grad_norm": 0.3072867379708468, "learning_rate": 0.00019931373152345206, "loss": 1.0433, "step": 1786 }, { "epoch": 0.17, "grad_norm": 0.2699984246251364, "learning_rate": 0.0001993118801256876, "loss": 1.2135, "step": 1787 }, { "epoch": 0.17, "grad_norm": 0.29491554910347884, "learning_rate": 0.00019931002624258093, "loss": 1.0451, "step": 1788 }, { "epoch": 0.17, "grad_norm": 0.2592446364845811, "learning_rate": 0.00019930816987417843, "loss": 1.0678, "step": 1789 }, { "epoch": 0.17, "grad_norm": 0.27551160462797714, "learning_rate": 0.00019930631102052656, "loss": 1.099, "step": 1790 }, { "epoch": 0.17, "grad_norm": 0.3004312874610158, "learning_rate": 0.00019930444968167184, "loss": 1.2349, "step": 1791 }, { "epoch": 0.17, "grad_norm": 0.2902276940371733, "learning_rate": 0.00019930258585766083, "loss": 1.0191, "step": 1792 }, { "epoch": 0.17, "grad_norm": 0.2880266394681308, "learning_rate": 0.00019930071954854026, "loss": 1.0946, "step": 1793 }, { "epoch": 0.17, "grad_norm": 0.27739961453298684, "learning_rate": 0.00019929885075435673, "loss": 1.0883, "step": 1794 }, { "epoch": 0.17, "grad_norm": 0.26237306161853147, "learning_rate": 0.00019929697947515705, "loss": 1.13, "step": 1795 }, { "epoch": 0.17, "grad_norm": 0.25732915392100997, "learning_rate": 0.0001992951057109881, "loss": 1.0223, "step": 1796 }, { "epoch": 0.17, "grad_norm": 0.26231079106015476, "learning_rate": 0.00019929322946189669, "loss": 1.2334, "step": 1797 }, { "epoch": 0.17, "grad_norm": 0.2689310820422752, "learning_rate": 0.00019929135072792979, "loss": 1.0859, "step": 1798 }, { "epoch": 0.17, "grad_norm": 0.28231810159636933, "learning_rate": 0.00019928946950913446, "loss": 1.1499, "step": 1799 }, { "epoch": 0.17, "grad_norm": 0.26114756115035764, "learning_rate": 0.00019928758580555777, "loss": 1.0692, "step": 1800 }, { "epoch": 0.17, "grad_norm": 0.21499560470777954, "learning_rate": 0.00019928569961724684, "loss": 1.0246, "step": 1801 }, { "epoch": 0.17, "grad_norm": 0.2499670112727543, "learning_rate": 0.00019928381094424887, "loss": 1.0571, "step": 1802 }, { "epoch": 0.17, "grad_norm": 0.2668951326275685, "learning_rate": 0.00019928191978661112, "loss": 1.0914, "step": 1803 }, { "epoch": 0.17, "grad_norm": 0.2436534609761875, "learning_rate": 0.00019928002614438096, "loss": 1.115, "step": 1804 }, { "epoch": 0.17, "grad_norm": 0.24543054408218462, "learning_rate": 0.00019927813001760573, "loss": 1.1398, "step": 1805 }, { "epoch": 0.17, "grad_norm": 0.275192488837919, "learning_rate": 0.0001992762314063329, "loss": 1.157, "step": 1806 }, { "epoch": 0.17, "grad_norm": 0.24910350712903606, "learning_rate": 0.00019927433031061, "loss": 1.1897, "step": 1807 }, { "epoch": 0.17, "grad_norm": 0.2440628864265639, "learning_rate": 0.0001992724267304846, "loss": 1.1515, "step": 1808 }, { "epoch": 0.17, "grad_norm": 0.2878309034150722, "learning_rate": 0.0001992705206660043, "loss": 1.2264, "step": 1809 }, { "epoch": 0.17, "grad_norm": 0.2910816442764025, "learning_rate": 0.00019926861211721684, "loss": 1.0837, "step": 1810 }, { "epoch": 0.17, "grad_norm": 0.26250435365108804, "learning_rate": 0.00019926670108416997, "loss": 1.2048, "step": 1811 }, { "epoch": 0.17, "grad_norm": 0.2870815342486622, "learning_rate": 0.00019926478756691153, "loss": 1.1559, "step": 1812 }, { "epoch": 0.17, "grad_norm": 0.27090400410537946, "learning_rate": 0.0001992628715654894, "loss": 1.2068, "step": 1813 }, { "epoch": 0.17, "grad_norm": 0.28120763276720057, "learning_rate": 0.0001992609530799515, "loss": 1.0354, "step": 1814 }, { "epoch": 0.17, "grad_norm": 0.2894660442297703, "learning_rate": 0.0001992590321103459, "loss": 1.0584, "step": 1815 }, { "epoch": 0.17, "grad_norm": 0.2780933045178876, "learning_rate": 0.00019925710865672063, "loss": 1.2506, "step": 1816 }, { "epoch": 0.17, "grad_norm": 0.28692036245587266, "learning_rate": 0.0001992551827191238, "loss": 1.1621, "step": 1817 }, { "epoch": 0.17, "grad_norm": 0.2982135909302484, "learning_rate": 0.00019925325429760368, "loss": 1.0948, "step": 1818 }, { "epoch": 0.17, "grad_norm": 0.2820173648746567, "learning_rate": 0.0001992513233922085, "loss": 1.1477, "step": 1819 }, { "epoch": 0.17, "grad_norm": 0.2639471711581256, "learning_rate": 0.00019924939000298656, "loss": 1.0992, "step": 1820 }, { "epoch": 0.17, "grad_norm": 0.3258628877911058, "learning_rate": 0.00019924745412998625, "loss": 1.0337, "step": 1821 }, { "epoch": 0.17, "grad_norm": 0.32533873777010336, "learning_rate": 0.00019924551577325605, "loss": 1.036, "step": 1822 }, { "epoch": 0.17, "grad_norm": 0.29923641265792245, "learning_rate": 0.00019924357493284443, "loss": 1.1199, "step": 1823 }, { "epoch": 0.17, "grad_norm": 0.2871942608589218, "learning_rate": 0.00019924163160879997, "loss": 1.111, "step": 1824 }, { "epoch": 0.17, "grad_norm": 0.32570726041176123, "learning_rate": 0.0001992396858011713, "loss": 1.208, "step": 1825 }, { "epoch": 0.17, "grad_norm": 0.2745227229675084, "learning_rate": 0.00019923773751000714, "loss": 1.0936, "step": 1826 }, { "epoch": 0.17, "grad_norm": 0.29321444725480744, "learning_rate": 0.00019923578673535622, "loss": 1.1939, "step": 1827 }, { "epoch": 0.17, "grad_norm": 0.31320266548147835, "learning_rate": 0.0001992338334772674, "loss": 1.092, "step": 1828 }, { "epoch": 0.17, "grad_norm": 0.27134903395742416, "learning_rate": 0.0001992318777357895, "loss": 1.1936, "step": 1829 }, { "epoch": 0.18, "grad_norm": 0.25932835062459336, "learning_rate": 0.0001992299195109715, "loss": 1.1709, "step": 1830 }, { "epoch": 0.18, "grad_norm": 0.2821426140168247, "learning_rate": 0.0001992279588028624, "loss": 1.0848, "step": 1831 }, { "epoch": 0.18, "grad_norm": 0.27780532527873364, "learning_rate": 0.00019922599561151126, "loss": 1.0701, "step": 1832 }, { "epoch": 0.18, "grad_norm": 0.3213413443644143, "learning_rate": 0.00019922402993696725, "loss": 1.2066, "step": 1833 }, { "epoch": 0.18, "grad_norm": 0.24152642775118963, "learning_rate": 0.00019922206177927948, "loss": 1.0779, "step": 1834 }, { "epoch": 0.18, "grad_norm": 0.26358149683834353, "learning_rate": 0.00019922009113849728, "loss": 1.0631, "step": 1835 }, { "epoch": 0.18, "grad_norm": 0.2771938797580255, "learning_rate": 0.00019921811801466995, "loss": 1.0627, "step": 1836 }, { "epoch": 0.18, "grad_norm": 0.24390434386104548, "learning_rate": 0.00019921614240784688, "loss": 1.0826, "step": 1837 }, { "epoch": 0.18, "grad_norm": 0.2883536207722251, "learning_rate": 0.00019921416431807748, "loss": 1.0587, "step": 1838 }, { "epoch": 0.18, "grad_norm": 0.26058794322850115, "learning_rate": 0.00019921218374541124, "loss": 1.0926, "step": 1839 }, { "epoch": 0.18, "grad_norm": 0.253199761945334, "learning_rate": 0.00019921020068989776, "loss": 1.0659, "step": 1840 }, { "epoch": 0.18, "grad_norm": 0.27605817699682705, "learning_rate": 0.00019920821515158666, "loss": 1.0807, "step": 1841 }, { "epoch": 0.18, "grad_norm": 0.34296685061041043, "learning_rate": 0.0001992062271305276, "loss": 1.0399, "step": 1842 }, { "epoch": 0.18, "grad_norm": 0.24791123495573145, "learning_rate": 0.0001992042366267704, "loss": 1.0986, "step": 1843 }, { "epoch": 0.18, "grad_norm": 0.2730759255331824, "learning_rate": 0.0001992022436403648, "loss": 1.1249, "step": 1844 }, { "epoch": 0.18, "grad_norm": 0.25825382605152414, "learning_rate": 0.0001992002481713607, "loss": 1.1515, "step": 1845 }, { "epoch": 0.18, "grad_norm": 0.2709947998255462, "learning_rate": 0.0001991982502198081, "loss": 1.0644, "step": 1846 }, { "epoch": 0.18, "grad_norm": 0.3041130032344396, "learning_rate": 0.0001991962497857569, "loss": 1.2027, "step": 1847 }, { "epoch": 0.18, "grad_norm": 0.2514054383827712, "learning_rate": 0.00019919424686925722, "loss": 1.1181, "step": 1848 }, { "epoch": 0.18, "grad_norm": 0.2671075622096655, "learning_rate": 0.00019919224147035914, "loss": 1.0748, "step": 1849 }, { "epoch": 0.18, "grad_norm": 0.2563779371324541, "learning_rate": 0.00019919023358911292, "loss": 1.1708, "step": 1850 }, { "epoch": 0.18, "grad_norm": 0.27518701288612535, "learning_rate": 0.00019918822322556877, "loss": 1.068, "step": 1851 }, { "epoch": 0.18, "grad_norm": 0.23464980973730304, "learning_rate": 0.00019918621037977693, "loss": 1.115, "step": 1852 }, { "epoch": 0.18, "grad_norm": 0.22754310999457095, "learning_rate": 0.0001991841950517879, "loss": 1.143, "step": 1853 }, { "epoch": 0.18, "grad_norm": 0.2402610477626132, "learning_rate": 0.00019918217724165205, "loss": 1.045, "step": 1854 }, { "epoch": 0.18, "grad_norm": 0.2466867783180613, "learning_rate": 0.00019918015694941988, "loss": 1.1413, "step": 1855 }, { "epoch": 0.18, "grad_norm": 0.27021081362108734, "learning_rate": 0.00019917813417514194, "loss": 1.1366, "step": 1856 }, { "epoch": 0.18, "grad_norm": 0.2290266939618748, "learning_rate": 0.00019917610891886884, "loss": 1.077, "step": 1857 }, { "epoch": 0.18, "grad_norm": 0.2912099506203328, "learning_rate": 0.0001991740811806513, "loss": 1.2381, "step": 1858 }, { "epoch": 0.18, "grad_norm": 0.25710098640782725, "learning_rate": 0.00019917205096054005, "loss": 1.1494, "step": 1859 }, { "epoch": 0.18, "grad_norm": 0.23444875824179745, "learning_rate": 0.00019917001825858592, "loss": 1.1993, "step": 1860 }, { "epoch": 0.18, "grad_norm": 0.27972502638604985, "learning_rate": 0.00019916798307483973, "loss": 1.0881, "step": 1861 }, { "epoch": 0.18, "grad_norm": 0.22927317304767958, "learning_rate": 0.00019916594540935246, "loss": 1.1226, "step": 1862 }, { "epoch": 0.18, "grad_norm": 0.2777824230056472, "learning_rate": 0.00019916390526217507, "loss": 1.2791, "step": 1863 }, { "epoch": 0.18, "grad_norm": 0.29153525342371706, "learning_rate": 0.0001991618626333586, "loss": 1.1707, "step": 1864 }, { "epoch": 0.18, "grad_norm": 0.26506907160815607, "learning_rate": 0.00019915981752295422, "loss": 1.1309, "step": 1865 }, { "epoch": 0.18, "grad_norm": 0.24281279217203466, "learning_rate": 0.00019915776993101311, "loss": 1.117, "step": 1866 }, { "epoch": 0.18, "grad_norm": 0.2915237131661395, "learning_rate": 0.00019915571985758645, "loss": 1.1615, "step": 1867 }, { "epoch": 0.18, "grad_norm": 0.26598319618634586, "learning_rate": 0.00019915366730272562, "loss": 1.2443, "step": 1868 }, { "epoch": 0.18, "grad_norm": 0.25083927497077174, "learning_rate": 0.00019915161226648193, "loss": 1.091, "step": 1869 }, { "epoch": 0.18, "grad_norm": 0.24871843858798745, "learning_rate": 0.00019914955474890683, "loss": 1.2225, "step": 1870 }, { "epoch": 0.18, "grad_norm": 0.2807205864271564, "learning_rate": 0.00019914749475005182, "loss": 1.0856, "step": 1871 }, { "epoch": 0.18, "grad_norm": 0.2564207640831039, "learning_rate": 0.00019914543226996846, "loss": 1.1381, "step": 1872 }, { "epoch": 0.18, "grad_norm": 0.2711542979084927, "learning_rate": 0.00019914336730870828, "loss": 1.1482, "step": 1873 }, { "epoch": 0.18, "grad_norm": 0.24605707766197607, "learning_rate": 0.00019914129986632308, "loss": 1.0468, "step": 1874 }, { "epoch": 0.18, "grad_norm": 0.25895981792303296, "learning_rate": 0.00019913922994286453, "loss": 1.1124, "step": 1875 }, { "epoch": 0.18, "grad_norm": 0.26071698636999885, "learning_rate": 0.00019913715753838444, "loss": 1.0977, "step": 1876 }, { "epoch": 0.18, "grad_norm": 0.26270406723403217, "learning_rate": 0.00019913508265293468, "loss": 0.9724, "step": 1877 }, { "epoch": 0.18, "grad_norm": 0.2729621283459525, "learning_rate": 0.00019913300528656718, "loss": 1.0379, "step": 1878 }, { "epoch": 0.18, "grad_norm": 0.2667009672221466, "learning_rate": 0.00019913092543933392, "loss": 1.201, "step": 1879 }, { "epoch": 0.18, "grad_norm": 0.3062682088725694, "learning_rate": 0.00019912884311128692, "loss": 1.2133, "step": 1880 }, { "epoch": 0.18, "grad_norm": 0.2471603219169982, "learning_rate": 0.00019912675830247834, "loss": 1.0426, "step": 1881 }, { "epoch": 0.18, "grad_norm": 0.2706944754253154, "learning_rate": 0.00019912467101296035, "loss": 1.0739, "step": 1882 }, { "epoch": 0.18, "grad_norm": 0.23877771124529687, "learning_rate": 0.00019912258124278517, "loss": 1.1371, "step": 1883 }, { "epoch": 0.18, "grad_norm": 0.2501800385911424, "learning_rate": 0.00019912048899200507, "loss": 0.985, "step": 1884 }, { "epoch": 0.18, "grad_norm": 0.2543490544283289, "learning_rate": 0.00019911839426067245, "loss": 1.0922, "step": 1885 }, { "epoch": 0.18, "grad_norm": 0.2610356534075215, "learning_rate": 0.0001991162970488397, "loss": 1.062, "step": 1886 }, { "epoch": 0.18, "grad_norm": 0.31809853336105126, "learning_rate": 0.0001991141973565594, "loss": 1.0172, "step": 1887 }, { "epoch": 0.18, "grad_norm": 0.3037942696710212, "learning_rate": 0.00019911209518388393, "loss": 1.2201, "step": 1888 }, { "epoch": 0.18, "grad_norm": 0.2612653091466676, "learning_rate": 0.00019910999053086604, "loss": 1.1529, "step": 1889 }, { "epoch": 0.18, "grad_norm": 0.25927362339534166, "learning_rate": 0.00019910788339755833, "loss": 1.1727, "step": 1890 }, { "epoch": 0.18, "grad_norm": 0.26339036209636246, "learning_rate": 0.00019910577378401355, "loss": 1.0759, "step": 1891 }, { "epoch": 0.18, "grad_norm": 0.26180032269848985, "learning_rate": 0.00019910366169028452, "loss": 1.0782, "step": 1892 }, { "epoch": 0.18, "grad_norm": 0.27889254421508974, "learning_rate": 0.00019910154711642403, "loss": 1.1011, "step": 1893 }, { "epoch": 0.18, "grad_norm": 0.2549984542896029, "learning_rate": 0.00019909943006248505, "loss": 1.15, "step": 1894 }, { "epoch": 0.18, "grad_norm": 0.2274655301809786, "learning_rate": 0.0001990973105285206, "loss": 1.1843, "step": 1895 }, { "epoch": 0.18, "grad_norm": 0.259196981448463, "learning_rate": 0.00019909518851458363, "loss": 1.0451, "step": 1896 }, { "epoch": 0.18, "grad_norm": 0.37452396398591586, "learning_rate": 0.0001990930640207273, "loss": 1.1319, "step": 1897 }, { "epoch": 0.18, "grad_norm": 0.30511870306850003, "learning_rate": 0.00019909093704700473, "loss": 1.1613, "step": 1898 }, { "epoch": 0.18, "grad_norm": 0.29479478078757926, "learning_rate": 0.00019908880759346925, "loss": 1.1725, "step": 1899 }, { "epoch": 0.18, "grad_norm": 0.3086910524148668, "learning_rate": 0.00019908667566017406, "loss": 1.1686, "step": 1900 }, { "epoch": 0.18, "grad_norm": 0.2974264651570825, "learning_rate": 0.0001990845412471725, "loss": 1.1257, "step": 1901 }, { "epoch": 0.18, "grad_norm": 0.256728492572231, "learning_rate": 0.00019908240435451805, "loss": 1.0166, "step": 1902 }, { "epoch": 0.18, "grad_norm": 0.23937868610540455, "learning_rate": 0.00019908026498226418, "loss": 1.1205, "step": 1903 }, { "epoch": 0.18, "grad_norm": 0.272693028176549, "learning_rate": 0.00019907812313046437, "loss": 1.1055, "step": 1904 }, { "epoch": 0.18, "grad_norm": 0.27354401730685546, "learning_rate": 0.00019907597879917227, "loss": 1.1253, "step": 1905 }, { "epoch": 0.18, "grad_norm": 0.27305416183963116, "learning_rate": 0.00019907383198844157, "loss": 1.0841, "step": 1906 }, { "epoch": 0.18, "grad_norm": 0.31852024294594067, "learning_rate": 0.00019907168269832592, "loss": 1.1546, "step": 1907 }, { "epoch": 0.18, "grad_norm": 0.29709692567412604, "learning_rate": 0.00019906953092887916, "loss": 1.2313, "step": 1908 }, { "epoch": 0.18, "grad_norm": 0.2617821076298669, "learning_rate": 0.00019906737668015515, "loss": 0.943, "step": 1909 }, { "epoch": 0.18, "grad_norm": 0.2636246176724674, "learning_rate": 0.00019906521995220774, "loss": 1.0627, "step": 1910 }, { "epoch": 0.18, "grad_norm": 0.2679413444345284, "learning_rate": 0.00019906306074509095, "loss": 1.1503, "step": 1911 }, { "epoch": 0.18, "grad_norm": 0.24358831048061988, "learning_rate": 0.0001990608990588588, "loss": 1.0467, "step": 1912 }, { "epoch": 0.18, "grad_norm": 0.2873239577441084, "learning_rate": 0.0001990587348935654, "loss": 1.2148, "step": 1913 }, { "epoch": 0.18, "grad_norm": 0.2970349427468681, "learning_rate": 0.00019905656824926492, "loss": 1.1718, "step": 1914 }, { "epoch": 0.18, "grad_norm": 0.328114785718262, "learning_rate": 0.00019905439912601156, "loss": 1.0894, "step": 1915 }, { "epoch": 0.18, "grad_norm": 0.2768655282500371, "learning_rate": 0.00019905222752385958, "loss": 0.9798, "step": 1916 }, { "epoch": 0.18, "grad_norm": 0.24581149927304233, "learning_rate": 0.00019905005344286338, "loss": 1.1947, "step": 1917 }, { "epoch": 0.18, "grad_norm": 0.24905142815716402, "learning_rate": 0.00019904787688307735, "loss": 1.0603, "step": 1918 }, { "epoch": 0.18, "grad_norm": 0.25006568481196073, "learning_rate": 0.00019904569784455592, "loss": 1.1451, "step": 1919 }, { "epoch": 0.18, "grad_norm": 0.24640239497730002, "learning_rate": 0.0001990435163273537, "loss": 1.1513, "step": 1920 }, { "epoch": 0.18, "grad_norm": 0.3158678665771197, "learning_rate": 0.00019904133233152518, "loss": 1.1675, "step": 1921 }, { "epoch": 0.18, "grad_norm": 0.2925864535264506, "learning_rate": 0.0001990391458571251, "loss": 1.0303, "step": 1922 }, { "epoch": 0.18, "grad_norm": 0.2919025168815423, "learning_rate": 0.00019903695690420817, "loss": 1.2033, "step": 1923 }, { "epoch": 0.18, "grad_norm": 0.2537949728039277, "learning_rate": 0.00019903476547282914, "loss": 1.144, "step": 1924 }, { "epoch": 0.18, "grad_norm": 0.2583763269969566, "learning_rate": 0.00019903257156304285, "loss": 1.1037, "step": 1925 }, { "epoch": 0.18, "grad_norm": 0.2613319587588986, "learning_rate": 0.00019903037517490422, "loss": 1.0958, "step": 1926 }, { "epoch": 0.18, "grad_norm": 0.2595089644687432, "learning_rate": 0.00019902817630846822, "loss": 1.1155, "step": 1927 }, { "epoch": 0.18, "grad_norm": 0.25297594336763907, "learning_rate": 0.00019902597496378985, "loss": 1.1028, "step": 1928 }, { "epoch": 0.18, "grad_norm": 0.30513489135176775, "learning_rate": 0.00019902377114092425, "loss": 1.1394, "step": 1929 }, { "epoch": 0.18, "grad_norm": 0.3021684093102426, "learning_rate": 0.00019902156483992653, "loss": 0.9847, "step": 1930 }, { "epoch": 0.18, "grad_norm": 0.2590031991887478, "learning_rate": 0.00019901935606085193, "loss": 1.036, "step": 1931 }, { "epoch": 0.18, "grad_norm": 0.27555119779022413, "learning_rate": 0.00019901714480375572, "loss": 1.0828, "step": 1932 }, { "epoch": 0.18, "grad_norm": 0.2590125935515052, "learning_rate": 0.0001990149310686932, "loss": 1.0109, "step": 1933 }, { "epoch": 0.19, "grad_norm": 0.27535539380394297, "learning_rate": 0.0001990127148557198, "loss": 1.0645, "step": 1934 }, { "epoch": 0.19, "grad_norm": 0.2604038154667027, "learning_rate": 0.000199010496164891, "loss": 1.1085, "step": 1935 }, { "epoch": 0.19, "grad_norm": 0.24979308709369744, "learning_rate": 0.0001990082749962623, "loss": 1.084, "step": 1936 }, { "epoch": 0.19, "grad_norm": 0.3032327587240259, "learning_rate": 0.0001990060513498893, "loss": 1.1131, "step": 1937 }, { "epoch": 0.19, "grad_norm": 0.2678255255634505, "learning_rate": 0.00019900382522582765, "loss": 1.1368, "step": 1938 }, { "epoch": 0.19, "grad_norm": 0.2180195498119384, "learning_rate": 0.00019900159662413305, "loss": 1.0595, "step": 1939 }, { "epoch": 0.19, "grad_norm": 0.23825147073097672, "learning_rate": 0.00019899936554486128, "loss": 1.1574, "step": 1940 }, { "epoch": 0.19, "grad_norm": 0.23730357351239173, "learning_rate": 0.00019899713198806812, "loss": 1.1184, "step": 1941 }, { "epoch": 0.19, "grad_norm": 0.2805064829854749, "learning_rate": 0.00019899489595380957, "loss": 1.2007, "step": 1942 }, { "epoch": 0.19, "grad_norm": 0.2473758737702457, "learning_rate": 0.00019899265744214152, "loss": 1.0602, "step": 1943 }, { "epoch": 0.19, "grad_norm": 0.2718909446342109, "learning_rate": 0.00019899041645312, "loss": 1.1384, "step": 1944 }, { "epoch": 0.19, "grad_norm": 0.2541735443801485, "learning_rate": 0.0001989881729868011, "loss": 1.0676, "step": 1945 }, { "epoch": 0.19, "grad_norm": 0.2500138203165472, "learning_rate": 0.00019898592704324094, "loss": 1.0983, "step": 1946 }, { "epoch": 0.19, "grad_norm": 0.2593377151457589, "learning_rate": 0.00019898367862249575, "loss": 1.0257, "step": 1947 }, { "epoch": 0.19, "grad_norm": 0.2495745540042679, "learning_rate": 0.00019898142772462182, "loss": 1.0384, "step": 1948 }, { "epoch": 0.19, "grad_norm": 0.2969538903134302, "learning_rate": 0.00019897917434967544, "loss": 1.1127, "step": 1949 }, { "epoch": 0.19, "grad_norm": 0.26632242247343, "learning_rate": 0.00019897691849771301, "loss": 1.1186, "step": 1950 }, { "epoch": 0.19, "grad_norm": 0.29353632758131, "learning_rate": 0.00019897466016879098, "loss": 1.0999, "step": 1951 }, { "epoch": 0.19, "grad_norm": 0.2888464592603483, "learning_rate": 0.00019897239936296588, "loss": 1.0546, "step": 1952 }, { "epoch": 0.19, "grad_norm": 0.2576028803170492, "learning_rate": 0.00019897013608029428, "loss": 1.0409, "step": 1953 }, { "epoch": 0.19, "grad_norm": 0.310447192134622, "learning_rate": 0.00019896787032083285, "loss": 1.1755, "step": 1954 }, { "epoch": 0.19, "grad_norm": 0.28394813161935395, "learning_rate": 0.00019896560208463825, "loss": 1.071, "step": 1955 }, { "epoch": 0.19, "grad_norm": 0.23857662880995645, "learning_rate": 0.00019896333137176726, "loss": 0.9972, "step": 1956 }, { "epoch": 0.19, "grad_norm": 0.27471986726786446, "learning_rate": 0.00019896105818227673, "loss": 1.1453, "step": 1957 }, { "epoch": 0.19, "grad_norm": 0.26890257495666114, "learning_rate": 0.00019895878251622348, "loss": 1.0331, "step": 1958 }, { "epoch": 0.19, "grad_norm": 0.2663478542354145, "learning_rate": 0.00019895650437366452, "loss": 1.0474, "step": 1959 }, { "epoch": 0.19, "grad_norm": 0.2604279634361445, "learning_rate": 0.00019895422375465686, "loss": 1.1096, "step": 1960 }, { "epoch": 0.19, "grad_norm": 0.2606384507083339, "learning_rate": 0.00019895194065925754, "loss": 1.0248, "step": 1961 }, { "epoch": 0.19, "grad_norm": 0.27935688700196437, "learning_rate": 0.00019894965508752375, "loss": 1.2211, "step": 1962 }, { "epoch": 0.19, "grad_norm": 0.2745748469246835, "learning_rate": 0.00019894736703951263, "loss": 1.0072, "step": 1963 }, { "epoch": 0.19, "grad_norm": 0.23091984266360946, "learning_rate": 0.00019894507651528148, "loss": 1.043, "step": 1964 }, { "epoch": 0.19, "grad_norm": 0.22878065896236086, "learning_rate": 0.00019894278351488757, "loss": 1.0798, "step": 1965 }, { "epoch": 0.19, "grad_norm": 0.2554600035367144, "learning_rate": 0.00019894048803838834, "loss": 1.1627, "step": 1966 }, { "epoch": 0.19, "grad_norm": 0.2737920223861705, "learning_rate": 0.00019893819008584123, "loss": 1.1778, "step": 1967 }, { "epoch": 0.19, "grad_norm": 0.2708368675723523, "learning_rate": 0.0001989358896573037, "loss": 0.9277, "step": 1968 }, { "epoch": 0.19, "grad_norm": 0.24550243352794962, "learning_rate": 0.00019893358675283337, "loss": 1.1226, "step": 1969 }, { "epoch": 0.19, "grad_norm": 0.2642424699796039, "learning_rate": 0.00019893128137248787, "loss": 1.1078, "step": 1970 }, { "epoch": 0.19, "grad_norm": 0.2573419222534839, "learning_rate": 0.00019892897351632484, "loss": 1.2793, "step": 1971 }, { "epoch": 0.19, "grad_norm": 0.27224735814088064, "learning_rate": 0.00019892666318440213, "loss": 1.0788, "step": 1972 }, { "epoch": 0.19, "grad_norm": 0.2766376630314068, "learning_rate": 0.00019892435037677746, "loss": 1.1132, "step": 1973 }, { "epoch": 0.19, "grad_norm": 0.3067049268603965, "learning_rate": 0.00019892203509350875, "loss": 1.0906, "step": 1974 }, { "epoch": 0.19, "grad_norm": 0.27698131540064885, "learning_rate": 0.00019891971733465395, "loss": 1.1791, "step": 1975 }, { "epoch": 0.19, "grad_norm": 0.2874873201823343, "learning_rate": 0.00019891739710027105, "loss": 1.1604, "step": 1976 }, { "epoch": 0.19, "grad_norm": 0.28015375821188654, "learning_rate": 0.00019891507439041814, "loss": 1.1313, "step": 1977 }, { "epoch": 0.19, "grad_norm": 0.2690366043454793, "learning_rate": 0.0001989127492051533, "loss": 1.199, "step": 1978 }, { "epoch": 0.19, "grad_norm": 0.23644122244331564, "learning_rate": 0.00019891042154453477, "loss": 1.0604, "step": 1979 }, { "epoch": 0.19, "grad_norm": 0.23968738721131738, "learning_rate": 0.00019890809140862077, "loss": 1.0409, "step": 1980 }, { "epoch": 0.19, "grad_norm": 0.24461539048772374, "learning_rate": 0.0001989057587974696, "loss": 1.103, "step": 1981 }, { "epoch": 0.19, "grad_norm": 0.2826392500904235, "learning_rate": 0.0001989034237111397, "loss": 1.1352, "step": 1982 }, { "epoch": 0.19, "grad_norm": 0.2772470869290075, "learning_rate": 0.0001989010861496894, "loss": 1.1124, "step": 1983 }, { "epoch": 0.19, "grad_norm": 0.2848620471084776, "learning_rate": 0.00019889874611317732, "loss": 1.0845, "step": 1984 }, { "epoch": 0.19, "grad_norm": 0.2797268047479781, "learning_rate": 0.00019889640360166194, "loss": 1.1135, "step": 1985 }, { "epoch": 0.19, "grad_norm": 0.2569875278431773, "learning_rate": 0.00019889405861520188, "loss": 1.1096, "step": 1986 }, { "epoch": 0.19, "grad_norm": 0.2617687295928765, "learning_rate": 0.0001988917111538559, "loss": 1.0682, "step": 1987 }, { "epoch": 0.19, "grad_norm": 0.28290160215390237, "learning_rate": 0.00019888936121768266, "loss": 1.1322, "step": 1988 }, { "epoch": 0.19, "grad_norm": 0.2626538158195342, "learning_rate": 0.00019888700880674103, "loss": 1.1404, "step": 1989 }, { "epoch": 0.19, "grad_norm": 0.27468104620820544, "learning_rate": 0.00019888465392108986, "loss": 1.211, "step": 1990 }, { "epoch": 0.19, "grad_norm": 0.2684528257690631, "learning_rate": 0.00019888229656078808, "loss": 1.086, "step": 1991 }, { "epoch": 0.19, "grad_norm": 0.2701652519028749, "learning_rate": 0.00019887993672589466, "loss": 1.1998, "step": 1992 }, { "epoch": 0.19, "grad_norm": 0.26483806146239974, "learning_rate": 0.00019887757441646868, "loss": 1.0015, "step": 1993 }, { "epoch": 0.19, "grad_norm": 0.25776041537869526, "learning_rate": 0.00019887520963256927, "loss": 1.1646, "step": 1994 }, { "epoch": 0.19, "grad_norm": 0.2857416415267135, "learning_rate": 0.00019887284237425558, "loss": 1.1295, "step": 1995 }, { "epoch": 0.19, "grad_norm": 0.2720535216646917, "learning_rate": 0.00019887047264158692, "loss": 1.0362, "step": 1996 }, { "epoch": 0.19, "grad_norm": 0.2556899383886345, "learning_rate": 0.0001988681004346225, "loss": 1.239, "step": 1997 }, { "epoch": 0.19, "grad_norm": 0.29555721414719655, "learning_rate": 0.00019886572575342174, "loss": 1.1347, "step": 1998 }, { "epoch": 0.19, "grad_norm": 0.28921149656501194, "learning_rate": 0.00019886334859804406, "loss": 1.1826, "step": 1999 }, { "epoch": 0.19, "grad_norm": 0.274012668267724, "learning_rate": 0.00019886096896854896, "loss": 1.0865, "step": 2000 }, { "epoch": 0.19, "grad_norm": 0.2422334744946127, "learning_rate": 0.00019885858686499594, "loss": 1.0813, "step": 2001 }, { "epoch": 0.19, "grad_norm": 0.30780112818652483, "learning_rate": 0.00019885620228744468, "loss": 1.0997, "step": 2002 }, { "epoch": 0.19, "grad_norm": 0.24632043466385958, "learning_rate": 0.00019885381523595484, "loss": 1.0984, "step": 2003 }, { "epoch": 0.19, "grad_norm": 0.27662537602831366, "learning_rate": 0.00019885142571058614, "loss": 1.1465, "step": 2004 }, { "epoch": 0.19, "grad_norm": 0.24285561219836202, "learning_rate": 0.00019884903371139838, "loss": 1.0971, "step": 2005 }, { "epoch": 0.19, "grad_norm": 0.24656395139808884, "learning_rate": 0.00019884663923845142, "loss": 1.143, "step": 2006 }, { "epoch": 0.19, "grad_norm": 0.2629422784962355, "learning_rate": 0.0001988442422918052, "loss": 1.1407, "step": 2007 }, { "epoch": 0.19, "grad_norm": 0.2811274440605523, "learning_rate": 0.0001988418428715197, "loss": 1.159, "step": 2008 }, { "epoch": 0.19, "grad_norm": 0.28599512163427754, "learning_rate": 0.00019883944097765497, "loss": 1.2293, "step": 2009 }, { "epoch": 0.19, "grad_norm": 0.2637511197971286, "learning_rate": 0.0001988370366102711, "loss": 1.0897, "step": 2010 }, { "epoch": 0.19, "grad_norm": 0.2621878190951607, "learning_rate": 0.00019883462976942826, "loss": 1.0737, "step": 2011 }, { "epoch": 0.19, "grad_norm": 0.2762514383867885, "learning_rate": 0.0001988322204551867, "loss": 1.1701, "step": 2012 }, { "epoch": 0.19, "grad_norm": 0.24133383394031283, "learning_rate": 0.00019882980866760673, "loss": 1.0147, "step": 2013 }, { "epoch": 0.19, "grad_norm": 0.26354770911760284, "learning_rate": 0.00019882739440674863, "loss": 1.1734, "step": 2014 }, { "epoch": 0.19, "grad_norm": 0.28989400649087416, "learning_rate": 0.00019882497767267294, "loss": 0.9902, "step": 2015 }, { "epoch": 0.19, "grad_norm": 0.27932170163937037, "learning_rate": 0.00019882255846544005, "loss": 1.1016, "step": 2016 }, { "epoch": 0.19, "grad_norm": 0.25887068615609143, "learning_rate": 0.00019882013678511052, "loss": 1.0908, "step": 2017 }, { "epoch": 0.19, "grad_norm": 0.28911454498855693, "learning_rate": 0.000198817712631745, "loss": 1.1229, "step": 2018 }, { "epoch": 0.19, "grad_norm": 0.2546367208219804, "learning_rate": 0.00019881528600540404, "loss": 1.1906, "step": 2019 }, { "epoch": 0.19, "grad_norm": 0.2736344196999536, "learning_rate": 0.0001988128569061485, "loss": 1.1621, "step": 2020 }, { "epoch": 0.19, "grad_norm": 0.2818744152091652, "learning_rate": 0.0001988104253340391, "loss": 1.0773, "step": 2021 }, { "epoch": 0.19, "grad_norm": 0.2826631297854352, "learning_rate": 0.00019880799128913672, "loss": 1.043, "step": 2022 }, { "epoch": 0.19, "grad_norm": 0.22842355541316772, "learning_rate": 0.00019880555477150223, "loss": 0.8834, "step": 2023 }, { "epoch": 0.19, "grad_norm": 0.3137419246856175, "learning_rate": 0.00019880311578119667, "loss": 1.1337, "step": 2024 }, { "epoch": 0.19, "grad_norm": 0.2733714490698032, "learning_rate": 0.00019880067431828102, "loss": 1.183, "step": 2025 }, { "epoch": 0.19, "grad_norm": 0.26580523535512374, "learning_rate": 0.00019879823038281642, "loss": 1.121, "step": 2026 }, { "epoch": 0.19, "grad_norm": 0.26858458548658276, "learning_rate": 0.000198795783974864, "loss": 1.0527, "step": 2027 }, { "epoch": 0.19, "grad_norm": 0.2763698286676362, "learning_rate": 0.00019879333509448496, "loss": 1.0042, "step": 2028 }, { "epoch": 0.19, "grad_norm": 0.24269742693449786, "learning_rate": 0.00019879088374174066, "loss": 1.1615, "step": 2029 }, { "epoch": 0.19, "grad_norm": 0.30073652024639724, "learning_rate": 0.0001987884299166924, "loss": 1.121, "step": 2030 }, { "epoch": 0.19, "grad_norm": 0.2556748923041058, "learning_rate": 0.00019878597361940161, "loss": 0.8961, "step": 2031 }, { "epoch": 0.19, "grad_norm": 0.2591634855819998, "learning_rate": 0.00019878351484992974, "loss": 1.1487, "step": 2032 }, { "epoch": 0.19, "grad_norm": 0.2600632642281181, "learning_rate": 0.00019878105360833832, "loss": 1.1916, "step": 2033 }, { "epoch": 0.19, "grad_norm": 0.28116821489552873, "learning_rate": 0.00019877858989468894, "loss": 1.0512, "step": 2034 }, { "epoch": 0.19, "grad_norm": 0.20261666938146597, "learning_rate": 0.0001987761237090433, "loss": 1.1838, "step": 2035 }, { "epoch": 0.19, "grad_norm": 0.29561896466102555, "learning_rate": 0.00019877365505146304, "loss": 1.0852, "step": 2036 }, { "epoch": 0.19, "grad_norm": 0.29527708211041853, "learning_rate": 0.00019877118392201, "loss": 1.1186, "step": 2037 }, { "epoch": 0.19, "grad_norm": 0.250583627663694, "learning_rate": 0.00019876871032074603, "loss": 1.045, "step": 2038 }, { "epoch": 0.2, "grad_norm": 0.3097749132789377, "learning_rate": 0.000198766234247733, "loss": 1.1162, "step": 2039 }, { "epoch": 0.2, "grad_norm": 0.2557816245578032, "learning_rate": 0.0001987637557030329, "loss": 1.0323, "step": 2040 }, { "epoch": 0.2, "grad_norm": 0.28034027348239304, "learning_rate": 0.00019876127468670772, "loss": 1.1111, "step": 2041 }, { "epoch": 0.2, "grad_norm": 0.27069634417343275, "learning_rate": 0.00019875879119881957, "loss": 1.0432, "step": 2042 }, { "epoch": 0.2, "grad_norm": 0.2579239339061907, "learning_rate": 0.00019875630523943062, "loss": 1.0104, "step": 2043 }, { "epoch": 0.2, "grad_norm": 0.28379405935029695, "learning_rate": 0.00019875381680860304, "loss": 1.1044, "step": 2044 }, { "epoch": 0.2, "grad_norm": 0.25276024384054346, "learning_rate": 0.00019875132590639917, "loss": 1.0816, "step": 2045 }, { "epoch": 0.2, "grad_norm": 0.2529581499654312, "learning_rate": 0.00019874883253288126, "loss": 1.0982, "step": 2046 }, { "epoch": 0.2, "grad_norm": 0.27524747771098035, "learning_rate": 0.00019874633668811177, "loss": 1.1365, "step": 2047 }, { "epoch": 0.2, "grad_norm": 0.28979122397869284, "learning_rate": 0.00019874383837215314, "loss": 1.1472, "step": 2048 }, { "epoch": 0.2, "grad_norm": 0.26208338201886544, "learning_rate": 0.00019874133758506792, "loss": 1.0844, "step": 2049 }, { "epoch": 0.2, "grad_norm": 0.23244493332423305, "learning_rate": 0.00019873883432691868, "loss": 1.0652, "step": 2050 }, { "epoch": 0.2, "grad_norm": 0.26326298210214855, "learning_rate": 0.000198736328597768, "loss": 1.1114, "step": 2051 }, { "epoch": 0.2, "grad_norm": 0.25775993234870526, "learning_rate": 0.0001987338203976787, "loss": 0.9868, "step": 2052 }, { "epoch": 0.2, "grad_norm": 0.2558696856240754, "learning_rate": 0.00019873130972671347, "loss": 1.0485, "step": 2053 }, { "epoch": 0.2, "grad_norm": 0.25810577145871305, "learning_rate": 0.00019872879658493515, "loss": 1.0948, "step": 2054 }, { "epoch": 0.2, "grad_norm": 0.26764993308160495, "learning_rate": 0.00019872628097240667, "loss": 1.1752, "step": 2055 }, { "epoch": 0.2, "grad_norm": 0.2844642341098131, "learning_rate": 0.00019872376288919093, "loss": 1.1397, "step": 2056 }, { "epoch": 0.2, "grad_norm": 0.27934363473211593, "learning_rate": 0.00019872124233535102, "loss": 1.2909, "step": 2057 }, { "epoch": 0.2, "grad_norm": 0.28305414844226917, "learning_rate": 0.00019871871931094996, "loss": 1.1307, "step": 2058 }, { "epoch": 0.2, "grad_norm": 0.2547448994013944, "learning_rate": 0.0001987161938160509, "loss": 1.0631, "step": 2059 }, { "epoch": 0.2, "grad_norm": 0.2457309364326485, "learning_rate": 0.00019871366585071706, "loss": 1.0862, "step": 2060 }, { "epoch": 0.2, "grad_norm": 0.2892837695062467, "learning_rate": 0.00019871113541501168, "loss": 1.1139, "step": 2061 }, { "epoch": 0.2, "grad_norm": 0.297033511165508, "learning_rate": 0.0001987086025089981, "loss": 1.158, "step": 2062 }, { "epoch": 0.2, "grad_norm": 0.28710128200720647, "learning_rate": 0.00019870606713273968, "loss": 1.0244, "step": 2063 }, { "epoch": 0.2, "grad_norm": 0.29989733418311626, "learning_rate": 0.00019870352928629993, "loss": 1.0888, "step": 2064 }, { "epoch": 0.2, "grad_norm": 0.2698108293542839, "learning_rate": 0.00019870098896974234, "loss": 1.0765, "step": 2065 }, { "epoch": 0.2, "grad_norm": 0.2833643780662451, "learning_rate": 0.00019869844618313046, "loss": 1.0251, "step": 2066 }, { "epoch": 0.2, "grad_norm": 0.3349028897187998, "learning_rate": 0.00019869590092652791, "loss": 1.1527, "step": 2067 }, { "epoch": 0.2, "grad_norm": 0.26736402567126216, "learning_rate": 0.0001986933531999984, "loss": 1.1222, "step": 2068 }, { "epoch": 0.2, "grad_norm": 0.24629779305771238, "learning_rate": 0.00019869080300360576, "loss": 1.0542, "step": 2069 }, { "epoch": 0.2, "grad_norm": 0.2659353555548773, "learning_rate": 0.00019868825033741373, "loss": 1.1196, "step": 2070 }, { "epoch": 0.2, "grad_norm": 0.2880977765710642, "learning_rate": 0.00019868569520148618, "loss": 1.1662, "step": 2071 }, { "epoch": 0.2, "grad_norm": 0.26777705281811776, "learning_rate": 0.0001986831375958871, "loss": 1.1153, "step": 2072 }, { "epoch": 0.2, "grad_norm": 0.2446161703167875, "learning_rate": 0.0001986805775206805, "loss": 1.1845, "step": 2073 }, { "epoch": 0.2, "grad_norm": 0.2873479752588241, "learning_rate": 0.00019867801497593042, "loss": 1.19, "step": 2074 }, { "epoch": 0.2, "grad_norm": 0.2574173866976346, "learning_rate": 0.000198675449961701, "loss": 1.1004, "step": 2075 }, { "epoch": 0.2, "grad_norm": 0.31037055152728826, "learning_rate": 0.00019867288247805642, "loss": 1.1266, "step": 2076 }, { "epoch": 0.2, "grad_norm": 0.25230875618755544, "learning_rate": 0.00019867031252506095, "loss": 1.0861, "step": 2077 }, { "epoch": 0.2, "grad_norm": 0.28027907388788925, "learning_rate": 0.0001986677401027789, "loss": 1.0899, "step": 2078 }, { "epoch": 0.2, "grad_norm": 0.2629017191349244, "learning_rate": 0.00019866516521127462, "loss": 1.1268, "step": 2079 }, { "epoch": 0.2, "grad_norm": 0.2858944012914975, "learning_rate": 0.0001986625878506126, "loss": 1.1248, "step": 2080 }, { "epoch": 0.2, "grad_norm": 0.3010750896726883, "learning_rate": 0.00019866000802085728, "loss": 1.109, "step": 2081 }, { "epoch": 0.2, "grad_norm": 0.27335673435624314, "learning_rate": 0.0001986574257220733, "loss": 1.0929, "step": 2082 }, { "epoch": 0.2, "grad_norm": 0.25277983760592904, "learning_rate": 0.0001986548409543252, "loss": 1.0946, "step": 2083 }, { "epoch": 0.2, "grad_norm": 0.2522955007250379, "learning_rate": 0.00019865225371767773, "loss": 1.1279, "step": 2084 }, { "epoch": 0.2, "grad_norm": 0.2876473859106391, "learning_rate": 0.00019864966401219559, "loss": 1.044, "step": 2085 }, { "epoch": 0.2, "grad_norm": 0.5481907916561444, "learning_rate": 0.00019864707183794362, "loss": 1.3456, "step": 2086 }, { "epoch": 0.2, "grad_norm": 0.26281784160346944, "learning_rate": 0.00019864447719498667, "loss": 1.2029, "step": 2087 }, { "epoch": 0.2, "grad_norm": 0.27656267392775247, "learning_rate": 0.00019864188008338968, "loss": 1.1244, "step": 2088 }, { "epoch": 0.2, "grad_norm": 0.2712507307178155, "learning_rate": 0.00019863928050321765, "loss": 1.2326, "step": 2089 }, { "epoch": 0.2, "grad_norm": 0.27024578206691424, "learning_rate": 0.00019863667845453563, "loss": 1.1642, "step": 2090 }, { "epoch": 0.2, "grad_norm": 0.305876067098806, "learning_rate": 0.00019863407393740876, "loss": 1.2, "step": 2091 }, { "epoch": 0.2, "grad_norm": 0.2665786067215833, "learning_rate": 0.00019863146695190217, "loss": 1.1217, "step": 2092 }, { "epoch": 0.2, "grad_norm": 0.23808439538640014, "learning_rate": 0.00019862885749808115, "loss": 1.089, "step": 2093 }, { "epoch": 0.2, "grad_norm": 0.27236917331109767, "learning_rate": 0.00019862624557601103, "loss": 1.1333, "step": 2094 }, { "epoch": 0.2, "grad_norm": 0.26305710425726253, "learning_rate": 0.00019862363118575705, "loss": 1.1396, "step": 2095 }, { "epoch": 0.2, "grad_norm": 0.2302881958735561, "learning_rate": 0.00019862101432738475, "loss": 1.0263, "step": 2096 }, { "epoch": 0.2, "grad_norm": 0.2954108631439019, "learning_rate": 0.0001986183950009596, "loss": 1.1058, "step": 2097 }, { "epoch": 0.2, "grad_norm": 0.2948895500433461, "learning_rate": 0.00019861577320654712, "loss": 1.1621, "step": 2098 }, { "epoch": 0.2, "grad_norm": 0.24612354772654924, "learning_rate": 0.00019861314894421294, "loss": 1.1682, "step": 2099 }, { "epoch": 0.2, "grad_norm": 0.2770485117180903, "learning_rate": 0.00019861052221402275, "loss": 1.0537, "step": 2100 }, { "epoch": 0.2, "grad_norm": 0.2803003393924788, "learning_rate": 0.00019860789301604222, "loss": 1.1575, "step": 2101 }, { "epoch": 0.2, "grad_norm": 0.263398275519541, "learning_rate": 0.00019860526135033723, "loss": 1.1161, "step": 2102 }, { "epoch": 0.2, "grad_norm": 0.2735697531308213, "learning_rate": 0.0001986026272169736, "loss": 1.1304, "step": 2103 }, { "epoch": 0.2, "grad_norm": 0.2837690209815238, "learning_rate": 0.00019859999061601726, "loss": 0.9939, "step": 2104 }, { "epoch": 0.2, "grad_norm": 0.2611549781543971, "learning_rate": 0.00019859735154753418, "loss": 1.0968, "step": 2105 }, { "epoch": 0.2, "grad_norm": 0.2858960886543411, "learning_rate": 0.0001985947100115904, "loss": 1.1623, "step": 2106 }, { "epoch": 0.2, "grad_norm": 0.3657978801967696, "learning_rate": 0.00019859206600825207, "loss": 1.2114, "step": 2107 }, { "epoch": 0.2, "grad_norm": 0.24528859351726237, "learning_rate": 0.0001985894195375853, "loss": 1.1096, "step": 2108 }, { "epoch": 0.2, "grad_norm": 0.309781272595587, "learning_rate": 0.00019858677059965632, "loss": 1.1382, "step": 2109 }, { "epoch": 0.2, "grad_norm": 0.3015108916795954, "learning_rate": 0.0001985841191945315, "loss": 1.0789, "step": 2110 }, { "epoch": 0.2, "grad_norm": 0.27510018422236365, "learning_rate": 0.0001985814653222771, "loss": 1.1214, "step": 2111 }, { "epoch": 0.2, "grad_norm": 0.2504556220073607, "learning_rate": 0.0001985788089829596, "loss": 1.1829, "step": 2112 }, { "epoch": 0.2, "grad_norm": 0.27607247184581263, "learning_rate": 0.00019857615017664543, "loss": 1.2014, "step": 2113 }, { "epoch": 0.2, "grad_norm": 0.28257879262143415, "learning_rate": 0.00019857348890340117, "loss": 1.1302, "step": 2114 }, { "epoch": 0.2, "grad_norm": 0.2961265516298664, "learning_rate": 0.0001985708251632934, "loss": 1.0324, "step": 2115 }, { "epoch": 0.2, "grad_norm": 0.2942307299808682, "learning_rate": 0.00019856815895638876, "loss": 1.0799, "step": 2116 }, { "epoch": 0.2, "grad_norm": 0.2541933617332, "learning_rate": 0.000198565490282754, "loss": 1.1498, "step": 2117 }, { "epoch": 0.2, "grad_norm": 0.28011641730308906, "learning_rate": 0.0001985628191424559, "loss": 1.1392, "step": 2118 }, { "epoch": 0.2, "grad_norm": 0.2807759455450216, "learning_rate": 0.0001985601455355613, "loss": 1.1776, "step": 2119 }, { "epoch": 0.2, "grad_norm": 0.32430654597893255, "learning_rate": 0.00019855746946213714, "loss": 1.1778, "step": 2120 }, { "epoch": 0.2, "grad_norm": 0.2525816278621571, "learning_rate": 0.00019855479092225037, "loss": 1.1537, "step": 2121 }, { "epoch": 0.2, "grad_norm": 0.26640266147857056, "learning_rate": 0.00019855210991596796, "loss": 1.096, "step": 2122 }, { "epoch": 0.2, "grad_norm": 0.25884771414681745, "learning_rate": 0.00019854942644335712, "loss": 1.1562, "step": 2123 }, { "epoch": 0.2, "grad_norm": 0.27617724462201587, "learning_rate": 0.00019854674050448493, "loss": 1.1385, "step": 2124 }, { "epoch": 0.2, "grad_norm": 0.2919548651872331, "learning_rate": 0.00019854405209941863, "loss": 1.0791, "step": 2125 }, { "epoch": 0.2, "grad_norm": 0.24993403620332835, "learning_rate": 0.00019854136122822547, "loss": 1.0431, "step": 2126 }, { "epoch": 0.2, "grad_norm": 0.23940290308480794, "learning_rate": 0.0001985386678909728, "loss": 1.0944, "step": 2127 }, { "epoch": 0.2, "grad_norm": 0.2852028804707256, "learning_rate": 0.00019853597208772808, "loss": 1.0735, "step": 2128 }, { "epoch": 0.2, "grad_norm": 0.2799825280793891, "learning_rate": 0.0001985332738185587, "loss": 1.1108, "step": 2129 }, { "epoch": 0.2, "grad_norm": 0.2803961566058768, "learning_rate": 0.00019853057308353225, "loss": 1.1428, "step": 2130 }, { "epoch": 0.2, "grad_norm": 0.2681024606858511, "learning_rate": 0.00019852786988271628, "loss": 1.1777, "step": 2131 }, { "epoch": 0.2, "grad_norm": 0.28971370065149094, "learning_rate": 0.0001985251642161784, "loss": 1.1166, "step": 2132 }, { "epoch": 0.2, "grad_norm": 0.2809462172886824, "learning_rate": 0.0001985224560839864, "loss": 1.1337, "step": 2133 }, { "epoch": 0.2, "grad_norm": 0.2662105547019178, "learning_rate": 0.00019851974548620803, "loss": 1.2131, "step": 2134 }, { "epoch": 0.2, "grad_norm": 0.2689850661970803, "learning_rate": 0.0001985170324229111, "loss": 1.1857, "step": 2135 }, { "epoch": 0.2, "grad_norm": 0.2831472805779883, "learning_rate": 0.00019851431689416353, "loss": 1.1575, "step": 2136 }, { "epoch": 0.2, "grad_norm": 0.2877033555483126, "learning_rate": 0.00019851159890003323, "loss": 1.0868, "step": 2137 }, { "epoch": 0.2, "grad_norm": 0.29781126767542937, "learning_rate": 0.00019850887844058827, "loss": 1.1535, "step": 2138 }, { "epoch": 0.2, "grad_norm": 0.2528619996193506, "learning_rate": 0.00019850615551589672, "loss": 1.0632, "step": 2139 }, { "epoch": 0.2, "grad_norm": 0.2605060917972941, "learning_rate": 0.00019850343012602672, "loss": 1.1709, "step": 2140 }, { "epoch": 0.2, "grad_norm": 0.2773145775379898, "learning_rate": 0.0001985007022710465, "loss": 1.1957, "step": 2141 }, { "epoch": 0.2, "grad_norm": 0.28927051493387645, "learning_rate": 0.00019849797195102426, "loss": 1.0608, "step": 2142 }, { "epoch": 0.21, "grad_norm": 0.31331821900541645, "learning_rate": 0.0001984952391660284, "loss": 1.1371, "step": 2143 }, { "epoch": 0.21, "grad_norm": 0.2785760487723139, "learning_rate": 0.00019849250391612726, "loss": 1.1553, "step": 2144 }, { "epoch": 0.21, "grad_norm": 0.28557512665641493, "learning_rate": 0.0001984897662013893, "loss": 1.1505, "step": 2145 }, { "epoch": 0.21, "grad_norm": 0.2640270431957491, "learning_rate": 0.00019848702602188304, "loss": 1.0196, "step": 2146 }, { "epoch": 0.21, "grad_norm": 0.26301343020312196, "learning_rate": 0.00019848428337767708, "loss": 1.0716, "step": 2147 }, { "epoch": 0.21, "grad_norm": 0.24955497958144957, "learning_rate": 0.00019848153826884004, "loss": 1.1068, "step": 2148 }, { "epoch": 0.21, "grad_norm": 0.25592481094445924, "learning_rate": 0.00019847879069544058, "loss": 1.0493, "step": 2149 }, { "epoch": 0.21, "grad_norm": 0.2690607872687816, "learning_rate": 0.0001984760406575475, "loss": 1.1645, "step": 2150 }, { "epoch": 0.21, "grad_norm": 0.29603325449239903, "learning_rate": 0.00019847328815522964, "loss": 1.0333, "step": 2151 }, { "epoch": 0.21, "grad_norm": 0.25742762041890327, "learning_rate": 0.00019847053318855582, "loss": 1.2017, "step": 2152 }, { "epoch": 0.21, "grad_norm": 0.30645008656891, "learning_rate": 0.00019846777575759504, "loss": 1.1346, "step": 2153 }, { "epoch": 0.21, "grad_norm": 0.27044205667054494, "learning_rate": 0.00019846501586241627, "loss": 1.097, "step": 2154 }, { "epoch": 0.21, "grad_norm": 0.23220679441493658, "learning_rate": 0.00019846225350308864, "loss": 1.0664, "step": 2155 }, { "epoch": 0.21, "grad_norm": 0.26546624754158665, "learning_rate": 0.00019845948867968117, "loss": 1.0479, "step": 2156 }, { "epoch": 0.21, "grad_norm": 0.2798970718045841, "learning_rate": 0.00019845672139226316, "loss": 1.0244, "step": 2157 }, { "epoch": 0.21, "grad_norm": 0.2784787690728781, "learning_rate": 0.00019845395164090382, "loss": 1.1114, "step": 2158 }, { "epoch": 0.21, "grad_norm": 0.24443020869424956, "learning_rate": 0.00019845117942567244, "loss": 1.1341, "step": 2159 }, { "epoch": 0.21, "grad_norm": 0.244756739484968, "learning_rate": 0.00019844840474663843, "loss": 1.0807, "step": 2160 }, { "epoch": 0.21, "grad_norm": 0.2702201314078314, "learning_rate": 0.00019844562760387122, "loss": 1.1269, "step": 2161 }, { "epoch": 0.21, "grad_norm": 0.29077240998538223, "learning_rate": 0.00019844284799744032, "loss": 1.1688, "step": 2162 }, { "epoch": 0.21, "grad_norm": 0.2683927419703879, "learning_rate": 0.00019844006592741525, "loss": 1.0173, "step": 2163 }, { "epoch": 0.21, "grad_norm": 0.3109361300962534, "learning_rate": 0.0001984372813938657, "loss": 1.148, "step": 2164 }, { "epoch": 0.21, "grad_norm": 0.2501468531327423, "learning_rate": 0.00019843449439686128, "loss": 1.1907, "step": 2165 }, { "epoch": 0.21, "grad_norm": 0.2664858040953975, "learning_rate": 0.0001984317049364718, "loss": 1.1097, "step": 2166 }, { "epoch": 0.21, "grad_norm": 0.2549104447589198, "learning_rate": 0.00019842891301276704, "loss": 1.0737, "step": 2167 }, { "epoch": 0.21, "grad_norm": 0.2908504505180112, "learning_rate": 0.00019842611862581685, "loss": 1.0539, "step": 2168 }, { "epoch": 0.21, "grad_norm": 0.2959941920542236, "learning_rate": 0.00019842332177569122, "loss": 1.1418, "step": 2169 }, { "epoch": 0.21, "grad_norm": 0.26475147358616613, "learning_rate": 0.00019842052246246008, "loss": 1.0866, "step": 2170 }, { "epoch": 0.21, "grad_norm": 0.28262317137702664, "learning_rate": 0.0001984177206861935, "loss": 1.1431, "step": 2171 }, { "epoch": 0.21, "grad_norm": 0.2615494504849684, "learning_rate": 0.00019841491644696164, "loss": 1.1576, "step": 2172 }, { "epoch": 0.21, "grad_norm": 0.27419165343889973, "learning_rate": 0.00019841210974483464, "loss": 1.1325, "step": 2173 }, { "epoch": 0.21, "grad_norm": 0.24040329342282296, "learning_rate": 0.0001984093005798827, "loss": 1.1437, "step": 2174 }, { "epoch": 0.21, "grad_norm": 0.29409408115598895, "learning_rate": 0.00019840648895217623, "loss": 1.1064, "step": 2175 }, { "epoch": 0.21, "grad_norm": 0.2523665722905447, "learning_rate": 0.00019840367486178548, "loss": 1.07, "step": 2176 }, { "epoch": 0.21, "grad_norm": 0.25402746178920604, "learning_rate": 0.00019840085830878095, "loss": 1.0573, "step": 2177 }, { "epoch": 0.21, "grad_norm": 0.24450222623833068, "learning_rate": 0.00019839803929323305, "loss": 1.1127, "step": 2178 }, { "epoch": 0.21, "grad_norm": 0.24554337014527297, "learning_rate": 0.00019839521781521245, "loss": 1.0781, "step": 2179 }, { "epoch": 0.21, "grad_norm": 0.29644405786503714, "learning_rate": 0.00019839239387478962, "loss": 1.1072, "step": 2180 }, { "epoch": 0.21, "grad_norm": 0.23532298664328116, "learning_rate": 0.00019838956747203533, "loss": 0.9529, "step": 2181 }, { "epoch": 0.21, "grad_norm": 0.255567234762623, "learning_rate": 0.00019838673860702027, "loss": 1.2165, "step": 2182 }, { "epoch": 0.21, "grad_norm": 0.2565392465769596, "learning_rate": 0.00019838390727981527, "loss": 1.049, "step": 2183 }, { "epoch": 0.21, "grad_norm": 0.28869850717775036, "learning_rate": 0.00019838107349049111, "loss": 1.2043, "step": 2184 }, { "epoch": 0.21, "grad_norm": 0.2717398071388341, "learning_rate": 0.0001983782372391188, "loss": 1.1689, "step": 2185 }, { "epoch": 0.21, "grad_norm": 0.2714997167115452, "learning_rate": 0.00019837539852576923, "loss": 1.0412, "step": 2186 }, { "epoch": 0.21, "grad_norm": 0.2528223715764014, "learning_rate": 0.0001983725573505135, "loss": 1.0636, "step": 2187 }, { "epoch": 0.21, "grad_norm": 0.24457509080188328, "learning_rate": 0.0001983697137134227, "loss": 1.0427, "step": 2188 }, { "epoch": 0.21, "grad_norm": 0.2647502073171626, "learning_rate": 0.00019836686761456803, "loss": 1.1109, "step": 2189 }, { "epoch": 0.21, "grad_norm": 0.25621160412291943, "learning_rate": 0.00019836401905402062, "loss": 1.1426, "step": 2190 }, { "epoch": 0.21, "grad_norm": 0.2875669800942636, "learning_rate": 0.00019836116803185184, "loss": 1.0843, "step": 2191 }, { "epoch": 0.21, "grad_norm": 0.26793735322362255, "learning_rate": 0.000198358314548133, "loss": 1.2198, "step": 2192 }, { "epoch": 0.21, "grad_norm": 0.30265207091393975, "learning_rate": 0.00019835545860293551, "loss": 0.9996, "step": 2193 }, { "epoch": 0.21, "grad_norm": 0.25384247043679864, "learning_rate": 0.0001983526001963309, "loss": 1.1222, "step": 2194 }, { "epoch": 0.21, "grad_norm": 0.25800061296353, "learning_rate": 0.00019834973932839062, "loss": 1.0905, "step": 2195 }, { "epoch": 0.21, "grad_norm": 0.27769824178444574, "learning_rate": 0.00019834687599918632, "loss": 1.0538, "step": 2196 }, { "epoch": 0.21, "grad_norm": 0.2772132245071213, "learning_rate": 0.00019834401020878963, "loss": 1.0624, "step": 2197 }, { "epoch": 0.21, "grad_norm": 0.26363087570930127, "learning_rate": 0.0001983411419572723, "loss": 0.9887, "step": 2198 }, { "epoch": 0.21, "grad_norm": 0.2815107276805014, "learning_rate": 0.00019833827124470608, "loss": 1.1811, "step": 2199 }, { "epoch": 0.21, "grad_norm": 0.2785544171403854, "learning_rate": 0.0001983353980711628, "loss": 1.1437, "step": 2200 }, { "epoch": 0.21, "grad_norm": 0.3011117821316356, "learning_rate": 0.0001983325224367144, "loss": 1.0398, "step": 2201 }, { "epoch": 0.21, "grad_norm": 0.2445670553372607, "learning_rate": 0.00019832964434143282, "loss": 1.101, "step": 2202 }, { "epoch": 0.21, "grad_norm": 0.25914004062255874, "learning_rate": 0.00019832676378539005, "loss": 1.1808, "step": 2203 }, { "epoch": 0.21, "grad_norm": 0.2754672779595424, "learning_rate": 0.00019832388076865826, "loss": 1.0929, "step": 2204 }, { "epoch": 0.21, "grad_norm": 0.2565507943348922, "learning_rate": 0.00019832099529130959, "loss": 1.0699, "step": 2205 }, { "epoch": 0.21, "grad_norm": 0.3343174008427606, "learning_rate": 0.00019831810735341618, "loss": 1.0145, "step": 2206 }, { "epoch": 0.21, "grad_norm": 0.2804796337948149, "learning_rate": 0.00019831521695505035, "loss": 1.0897, "step": 2207 }, { "epoch": 0.21, "grad_norm": 0.2525345630451486, "learning_rate": 0.00019831232409628445, "loss": 1.0794, "step": 2208 }, { "epoch": 0.21, "grad_norm": 0.24602578478108195, "learning_rate": 0.0001983094287771908, "loss": 1.1439, "step": 2209 }, { "epoch": 0.21, "grad_norm": 0.26742569319862003, "learning_rate": 0.00019830653099784195, "loss": 1.1399, "step": 2210 }, { "epoch": 0.21, "grad_norm": 0.30626650715688947, "learning_rate": 0.00019830363075831037, "loss": 1.2276, "step": 2211 }, { "epoch": 0.21, "grad_norm": 0.2978427240509176, "learning_rate": 0.00019830072805866866, "loss": 1.215, "step": 2212 }, { "epoch": 0.21, "grad_norm": 0.2795761909559458, "learning_rate": 0.00019829782289898943, "loss": 1.2044, "step": 2213 }, { "epoch": 0.21, "grad_norm": 0.28310847050083876, "learning_rate": 0.0001982949152793454, "loss": 1.1433, "step": 2214 }, { "epoch": 0.21, "grad_norm": 0.2796955122824297, "learning_rate": 0.00019829200519980937, "loss": 1.0606, "step": 2215 }, { "epoch": 0.21, "grad_norm": 0.2528523703698838, "learning_rate": 0.0001982890926604541, "loss": 1.068, "step": 2216 }, { "epoch": 0.21, "grad_norm": 0.2523940552498862, "learning_rate": 0.00019828617766135255, "loss": 1.0647, "step": 2217 }, { "epoch": 0.21, "grad_norm": 0.2970231511295705, "learning_rate": 0.0001982832602025776, "loss": 1.2357, "step": 2218 }, { "epoch": 0.21, "grad_norm": 0.27974233774133495, "learning_rate": 0.00019828034028420232, "loss": 1.0735, "step": 2219 }, { "epoch": 0.21, "grad_norm": 0.269451164229955, "learning_rate": 0.00019827741790629975, "loss": 1.0784, "step": 2220 }, { "epoch": 0.21, "grad_norm": 0.2658199878337128, "learning_rate": 0.00019827449306894304, "loss": 1.0841, "step": 2221 }, { "epoch": 0.21, "grad_norm": 0.257731802421506, "learning_rate": 0.00019827156577220537, "loss": 1.2333, "step": 2222 }, { "epoch": 0.21, "grad_norm": 0.32039613850942644, "learning_rate": 0.00019826863601616, "loss": 1.1436, "step": 2223 }, { "epoch": 0.21, "grad_norm": 0.23336247900235474, "learning_rate": 0.00019826570380088025, "loss": 1.1719, "step": 2224 }, { "epoch": 0.21, "grad_norm": 0.28395673225685364, "learning_rate": 0.0001982627691264395, "loss": 1.183, "step": 2225 }, { "epoch": 0.21, "grad_norm": 0.2848669382303132, "learning_rate": 0.00019825983199291122, "loss": 1.1098, "step": 2226 }, { "epoch": 0.21, "grad_norm": 0.27510914460004676, "learning_rate": 0.0001982568924003689, "loss": 1.1285, "step": 2227 }, { "epoch": 0.21, "grad_norm": 0.2921265346308141, "learning_rate": 0.00019825395034888605, "loss": 1.1692, "step": 2228 }, { "epoch": 0.21, "grad_norm": 0.2761631545730766, "learning_rate": 0.00019825100583853637, "loss": 1.1872, "step": 2229 }, { "epoch": 0.21, "grad_norm": 0.2592595323871488, "learning_rate": 0.00019824805886939353, "loss": 1.0289, "step": 2230 }, { "epoch": 0.21, "grad_norm": 0.29309277372861997, "learning_rate": 0.00019824510944153125, "loss": 1.1123, "step": 2231 }, { "epoch": 0.21, "grad_norm": 0.27306258003376527, "learning_rate": 0.00019824215755502337, "loss": 1.1453, "step": 2232 }, { "epoch": 0.21, "grad_norm": 0.263424668387752, "learning_rate": 0.00019823920320994373, "loss": 1.1002, "step": 2233 }, { "epoch": 0.21, "grad_norm": 0.2938351047841059, "learning_rate": 0.00019823624640636633, "loss": 1.127, "step": 2234 }, { "epoch": 0.21, "grad_norm": 0.24927930137531826, "learning_rate": 0.0001982332871443651, "loss": 1.0708, "step": 2235 }, { "epoch": 0.21, "grad_norm": 0.2786877424279345, "learning_rate": 0.00019823032542401413, "loss": 1.0868, "step": 2236 }, { "epoch": 0.21, "grad_norm": 0.2798063724399594, "learning_rate": 0.00019822736124538754, "loss": 1.1573, "step": 2237 }, { "epoch": 0.21, "grad_norm": 0.2633597183766863, "learning_rate": 0.00019822439460855947, "loss": 1.1058, "step": 2238 }, { "epoch": 0.21, "grad_norm": 0.24785877289941977, "learning_rate": 0.00019822142551360422, "loss": 1.0471, "step": 2239 }, { "epoch": 0.21, "grad_norm": 0.2713868952406667, "learning_rate": 0.00019821845396059606, "loss": 1.0428, "step": 2240 }, { "epoch": 0.21, "grad_norm": 0.2811381014767392, "learning_rate": 0.0001982154799496094, "loss": 1.0762, "step": 2241 }, { "epoch": 0.21, "grad_norm": 0.2725029043198944, "learning_rate": 0.00019821250348071856, "loss": 1.1293, "step": 2242 }, { "epoch": 0.21, "grad_norm": 0.2827100509016029, "learning_rate": 0.00019820952455399814, "loss": 1.1447, "step": 2243 }, { "epoch": 0.21, "grad_norm": 0.2758022374658988, "learning_rate": 0.00019820654316952263, "loss": 1.1659, "step": 2244 }, { "epoch": 0.21, "grad_norm": 0.2633396491372797, "learning_rate": 0.00019820355932736666, "loss": 1.0462, "step": 2245 }, { "epoch": 0.21, "grad_norm": 0.2731226166685037, "learning_rate": 0.00019820057302760488, "loss": 0.9548, "step": 2246 }, { "epoch": 0.21, "grad_norm": 0.25556114167089006, "learning_rate": 0.00019819758427031206, "loss": 1.2312, "step": 2247 }, { "epoch": 0.22, "grad_norm": 0.28848997593382414, "learning_rate": 0.00019819459305556297, "loss": 1.0739, "step": 2248 }, { "epoch": 0.22, "grad_norm": 0.23351405165987268, "learning_rate": 0.0001981915993834325, "loss": 1.0641, "step": 2249 }, { "epoch": 0.22, "grad_norm": 0.26791227089364905, "learning_rate": 0.00019818860325399552, "loss": 1.1015, "step": 2250 }, { "epoch": 0.22, "grad_norm": 0.2820213434051579, "learning_rate": 0.00019818560466732706, "loss": 1.063, "step": 2251 }, { "epoch": 0.22, "grad_norm": 0.24527078074648306, "learning_rate": 0.00019818260362350213, "loss": 1.1702, "step": 2252 }, { "epoch": 0.22, "grad_norm": 0.2720420411260554, "learning_rate": 0.0001981796001225958, "loss": 1.0912, "step": 2253 }, { "epoch": 0.22, "grad_norm": 0.2713012314046693, "learning_rate": 0.00019817659416468332, "loss": 1.0524, "step": 2254 }, { "epoch": 0.22, "grad_norm": 0.26924822640795093, "learning_rate": 0.00019817358574983983, "loss": 1.0871, "step": 2255 }, { "epoch": 0.22, "grad_norm": 0.27363733386951783, "learning_rate": 0.0001981705748781407, "loss": 1.0598, "step": 2256 }, { "epoch": 0.22, "grad_norm": 0.24189809697792714, "learning_rate": 0.0001981675615496612, "loss": 1.084, "step": 2257 }, { "epoch": 0.22, "grad_norm": 0.2706773118754261, "learning_rate": 0.0001981645457644768, "loss": 1.0637, "step": 2258 }, { "epoch": 0.22, "grad_norm": 0.27820113281091335, "learning_rate": 0.00019816152752266292, "loss": 1.1624, "step": 2259 }, { "epoch": 0.22, "grad_norm": 0.23093374468477146, "learning_rate": 0.00019815850682429516, "loss": 1.1735, "step": 2260 }, { "epoch": 0.22, "grad_norm": 0.26705391917092386, "learning_rate": 0.00019815548366944904, "loss": 1.049, "step": 2261 }, { "epoch": 0.22, "grad_norm": 0.28355313423369083, "learning_rate": 0.00019815245805820028, "loss": 1.0949, "step": 2262 }, { "epoch": 0.22, "grad_norm": 0.2395712965169708, "learning_rate": 0.00019814942999062457, "loss": 1.05, "step": 2263 }, { "epoch": 0.22, "grad_norm": 0.285804478616941, "learning_rate": 0.00019814639946679768, "loss": 1.1369, "step": 2264 }, { "epoch": 0.22, "grad_norm": 0.25061529704124386, "learning_rate": 0.00019814336648679546, "loss": 1.0655, "step": 2265 }, { "epoch": 0.22, "grad_norm": 0.2909795934470434, "learning_rate": 0.0001981403310506938, "loss": 1.1807, "step": 2266 }, { "epoch": 0.22, "grad_norm": 0.23297851873356334, "learning_rate": 0.00019813729315856869, "loss": 1.152, "step": 2267 }, { "epoch": 0.22, "grad_norm": 0.25889655866668293, "learning_rate": 0.00019813425281049613, "loss": 1.1054, "step": 2268 }, { "epoch": 0.22, "grad_norm": 0.23977654506120644, "learning_rate": 0.00019813121000655223, "loss": 1.1002, "step": 2269 }, { "epoch": 0.22, "grad_norm": 0.2549715877517098, "learning_rate": 0.00019812816474681314, "loss": 1.057, "step": 2270 }, { "epoch": 0.22, "grad_norm": 0.26247547673776234, "learning_rate": 0.00019812511703135504, "loss": 1.0619, "step": 2271 }, { "epoch": 0.22, "grad_norm": 0.2867669044848128, "learning_rate": 0.00019812206686025424, "loss": 1.1794, "step": 2272 }, { "epoch": 0.22, "grad_norm": 0.27304725331072943, "learning_rate": 0.000198119014233587, "loss": 1.1895, "step": 2273 }, { "epoch": 0.22, "grad_norm": 0.25831673662414345, "learning_rate": 0.00019811595915142979, "loss": 1.088, "step": 2274 }, { "epoch": 0.22, "grad_norm": 0.23021022674148214, "learning_rate": 0.00019811290161385906, "loss": 1.0841, "step": 2275 }, { "epoch": 0.22, "grad_norm": 0.2575794159303839, "learning_rate": 0.00019810984162095129, "loss": 1.0906, "step": 2276 }, { "epoch": 0.22, "grad_norm": 0.2609400179291492, "learning_rate": 0.00019810677917278305, "loss": 1.1717, "step": 2277 }, { "epoch": 0.22, "grad_norm": 0.28398870543915045, "learning_rate": 0.00019810371426943105, "loss": 1.1347, "step": 2278 }, { "epoch": 0.22, "grad_norm": 0.26309231855699067, "learning_rate": 0.0001981006469109719, "loss": 1.1804, "step": 2279 }, { "epoch": 0.22, "grad_norm": 0.2902283356657511, "learning_rate": 0.00019809757709748243, "loss": 1.1167, "step": 2280 }, { "epoch": 0.22, "grad_norm": 0.25748170041372764, "learning_rate": 0.00019809450482903942, "loss": 1.1476, "step": 2281 }, { "epoch": 0.22, "grad_norm": 0.2667597093950612, "learning_rate": 0.0001980914301057198, "loss": 1.1277, "step": 2282 }, { "epoch": 0.22, "grad_norm": 0.27836946786170796, "learning_rate": 0.0001980883529276005, "loss": 1.1525, "step": 2283 }, { "epoch": 0.22, "grad_norm": 0.3083167991873422, "learning_rate": 0.0001980852732947585, "loss": 1.1216, "step": 2284 }, { "epoch": 0.22, "grad_norm": 0.2676745480686396, "learning_rate": 0.00019808219120727086, "loss": 1.1328, "step": 2285 }, { "epoch": 0.22, "grad_norm": 0.25527058852259726, "learning_rate": 0.0001980791066652148, "loss": 1.065, "step": 2286 }, { "epoch": 0.22, "grad_norm": 0.28337351811282757, "learning_rate": 0.00019807601966866746, "loss": 1.1723, "step": 2287 }, { "epoch": 0.22, "grad_norm": 0.27319098266987507, "learning_rate": 0.00019807293021770604, "loss": 1.0549, "step": 2288 }, { "epoch": 0.22, "grad_norm": 0.30841736311542484, "learning_rate": 0.00019806983831240795, "loss": 1.1445, "step": 2289 }, { "epoch": 0.22, "grad_norm": 0.2555510965247522, "learning_rate": 0.0001980667439528505, "loss": 1.1424, "step": 2290 }, { "epoch": 0.22, "grad_norm": 0.2641571799003314, "learning_rate": 0.00019806364713911116, "loss": 1.033, "step": 2291 }, { "epoch": 0.22, "grad_norm": 0.2838900082793651, "learning_rate": 0.0001980605478712674, "loss": 1.0774, "step": 2292 }, { "epoch": 0.22, "grad_norm": 0.31407713147896055, "learning_rate": 0.00019805744614939682, "loss": 1.2683, "step": 2293 }, { "epoch": 0.22, "grad_norm": 0.27082803879903133, "learning_rate": 0.00019805434197357703, "loss": 1.1711, "step": 2294 }, { "epoch": 0.22, "grad_norm": 0.27007517574821516, "learning_rate": 0.0001980512353438857, "loss": 1.1142, "step": 2295 }, { "epoch": 0.22, "grad_norm": 0.25200965101215933, "learning_rate": 0.00019804812626040056, "loss": 1.1365, "step": 2296 }, { "epoch": 0.22, "grad_norm": 0.2482335861309017, "learning_rate": 0.00019804501472319946, "loss": 1.0387, "step": 2297 }, { "epoch": 0.22, "grad_norm": 0.27093750047305093, "learning_rate": 0.0001980419007323602, "loss": 1.0562, "step": 2298 }, { "epoch": 0.22, "grad_norm": 0.2823581467965368, "learning_rate": 0.00019803878428796082, "loss": 1.2542, "step": 2299 }, { "epoch": 0.22, "grad_norm": 0.27114630287941716, "learning_rate": 0.00019803566539007924, "loss": 1.1863, "step": 2300 }, { "epoch": 0.22, "grad_norm": 0.27533847009087203, "learning_rate": 0.0001980325440387935, "loss": 0.9211, "step": 2301 }, { "epoch": 0.22, "grad_norm": 0.24736978230602902, "learning_rate": 0.00019802942023418175, "loss": 1.1474, "step": 2302 }, { "epoch": 0.22, "grad_norm": 0.24528908482065118, "learning_rate": 0.00019802629397632212, "loss": 1.0203, "step": 2303 }, { "epoch": 0.22, "grad_norm": 0.3102766690223985, "learning_rate": 0.00019802316526529293, "loss": 1.1166, "step": 2304 }, { "epoch": 0.22, "grad_norm": 0.2645211126197188, "learning_rate": 0.00019802003410117238, "loss": 1.09, "step": 2305 }, { "epoch": 0.22, "grad_norm": 0.24314341601375852, "learning_rate": 0.0001980169004840389, "loss": 1.067, "step": 2306 }, { "epoch": 0.22, "grad_norm": 0.28901370914218866, "learning_rate": 0.00019801376441397087, "loss": 1.125, "step": 2307 }, { "epoch": 0.22, "grad_norm": 0.22977734256634133, "learning_rate": 0.00019801062589104676, "loss": 1.1017, "step": 2308 }, { "epoch": 0.22, "grad_norm": 0.2763909341056602, "learning_rate": 0.00019800748491534517, "loss": 1.1466, "step": 2309 }, { "epoch": 0.22, "grad_norm": 0.2638965585187832, "learning_rate": 0.00019800434148694468, "loss": 1.0884, "step": 2310 }, { "epoch": 0.22, "grad_norm": 0.25365372028597144, "learning_rate": 0.00019800119560592393, "loss": 1.1063, "step": 2311 }, { "epoch": 0.22, "grad_norm": 0.2946212685821378, "learning_rate": 0.0001979980472723617, "loss": 1.0209, "step": 2312 }, { "epoch": 0.22, "grad_norm": 0.28135433865494547, "learning_rate": 0.00019799489648633675, "loss": 1.1626, "step": 2313 }, { "epoch": 0.22, "grad_norm": 0.3064233206742547, "learning_rate": 0.00019799174324792787, "loss": 1.1433, "step": 2314 }, { "epoch": 0.22, "grad_norm": 0.2800872851032662, "learning_rate": 0.00019798858755721405, "loss": 1.039, "step": 2315 }, { "epoch": 0.22, "grad_norm": 0.25898676357853834, "learning_rate": 0.00019798542941427426, "loss": 1.1401, "step": 2316 }, { "epoch": 0.22, "grad_norm": 0.26001355286806555, "learning_rate": 0.00019798226881918753, "loss": 1.0741, "step": 2317 }, { "epoch": 0.22, "grad_norm": 0.31195060460939816, "learning_rate": 0.00019797910577203293, "loss": 1.155, "step": 2318 }, { "epoch": 0.22, "grad_norm": 0.2853360314799912, "learning_rate": 0.00019797594027288963, "loss": 1.1006, "step": 2319 }, { "epoch": 0.22, "grad_norm": 0.2282844809122414, "learning_rate": 0.00019797277232183684, "loss": 1.0532, "step": 2320 }, { "epoch": 0.22, "grad_norm": 0.28551517768089857, "learning_rate": 0.00019796960191895385, "loss": 1.0486, "step": 2321 }, { "epoch": 0.22, "grad_norm": 0.2590261403859847, "learning_rate": 0.00019796642906432004, "loss": 1.0397, "step": 2322 }, { "epoch": 0.22, "grad_norm": 0.23480527152036285, "learning_rate": 0.0001979632537580147, "loss": 1.0072, "step": 2323 }, { "epoch": 0.22, "grad_norm": 0.26973107870500546, "learning_rate": 0.00019796007600011742, "loss": 1.1077, "step": 2324 }, { "epoch": 0.22, "grad_norm": 0.26492201462618237, "learning_rate": 0.0001979568957907077, "loss": 1.1157, "step": 2325 }, { "epoch": 0.22, "grad_norm": 0.279425391462633, "learning_rate": 0.00019795371312986504, "loss": 1.0199, "step": 2326 }, { "epoch": 0.22, "grad_norm": 0.2836141793634087, "learning_rate": 0.00019795052801766915, "loss": 1.1172, "step": 2327 }, { "epoch": 0.22, "grad_norm": 0.2544431831518797, "learning_rate": 0.0001979473404541998, "loss": 1.0867, "step": 2328 }, { "epoch": 0.22, "grad_norm": 0.2402666227462647, "learning_rate": 0.0001979441504395366, "loss": 1.0139, "step": 2329 }, { "epoch": 0.22, "grad_norm": 0.2503197635519736, "learning_rate": 0.00019794095797375953, "loss": 1.0556, "step": 2330 }, { "epoch": 0.22, "grad_norm": 0.2581127509866034, "learning_rate": 0.00019793776305694846, "loss": 0.9451, "step": 2331 }, { "epoch": 0.22, "grad_norm": 0.24403281634446966, "learning_rate": 0.0001979345656891833, "loss": 1.1031, "step": 2332 }, { "epoch": 0.22, "grad_norm": 0.2532440703626388, "learning_rate": 0.00019793136587054405, "loss": 1.0738, "step": 2333 }, { "epoch": 0.22, "grad_norm": 0.2597097391411967, "learning_rate": 0.00019792816360111087, "loss": 1.1359, "step": 2334 }, { "epoch": 0.22, "grad_norm": 0.27991032105875546, "learning_rate": 0.00019792495888096382, "loss": 0.9373, "step": 2335 }, { "epoch": 0.22, "grad_norm": 0.2713405323422737, "learning_rate": 0.00019792175171018313, "loss": 1.0818, "step": 2336 }, { "epoch": 0.22, "grad_norm": 0.2942256218211258, "learning_rate": 0.00019791854208884907, "loss": 1.1304, "step": 2337 }, { "epoch": 0.22, "grad_norm": 0.24670357822710387, "learning_rate": 0.00019791533001704194, "loss": 1.0164, "step": 2338 }, { "epoch": 0.22, "grad_norm": 0.25797879496591175, "learning_rate": 0.00019791211549484216, "loss": 1.0922, "step": 2339 }, { "epoch": 0.22, "grad_norm": 0.28874847418524446, "learning_rate": 0.00019790889852233016, "loss": 1.2126, "step": 2340 }, { "epoch": 0.22, "grad_norm": 0.2831045631432414, "learning_rate": 0.00019790567909958644, "loss": 1.1913, "step": 2341 }, { "epoch": 0.22, "grad_norm": 0.25052218126412557, "learning_rate": 0.00019790245722669153, "loss": 1.1242, "step": 2342 }, { "epoch": 0.22, "grad_norm": 0.2980993557396919, "learning_rate": 0.00019789923290372614, "loss": 1.0499, "step": 2343 }, { "epoch": 0.22, "grad_norm": 0.26321626908133683, "learning_rate": 0.00019789600613077092, "loss": 1.0864, "step": 2344 }, { "epoch": 0.22, "grad_norm": 0.26596294842052304, "learning_rate": 0.0001978927769079066, "loss": 1.0538, "step": 2345 }, { "epoch": 0.22, "grad_norm": 0.25422065022309154, "learning_rate": 0.00019788954523521402, "loss": 1.2115, "step": 2346 }, { "epoch": 0.22, "grad_norm": 0.23598171182692104, "learning_rate": 0.00019788631111277406, "loss": 1.0686, "step": 2347 }, { "epoch": 0.22, "grad_norm": 0.2847052874921601, "learning_rate": 0.00019788307454066763, "loss": 1.0641, "step": 2348 }, { "epoch": 0.22, "grad_norm": 0.25357051014186355, "learning_rate": 0.00019787983551897576, "loss": 1.0484, "step": 2349 }, { "epoch": 0.22, "grad_norm": 0.2411514518029808, "learning_rate": 0.00019787659404777946, "loss": 1.133, "step": 2350 }, { "epoch": 0.22, "grad_norm": 0.25446854039811434, "learning_rate": 0.0001978733501271599, "loss": 1.1672, "step": 2351 }, { "epoch": 0.23, "grad_norm": 0.26777410057209505, "learning_rate": 0.00019787010375719826, "loss": 1.138, "step": 2352 }, { "epoch": 0.23, "grad_norm": 0.27808638420934434, "learning_rate": 0.0001978668549379757, "loss": 1.1782, "step": 2353 }, { "epoch": 0.23, "grad_norm": 0.2592972824957828, "learning_rate": 0.00019786360366957367, "loss": 1.1013, "step": 2354 }, { "epoch": 0.23, "grad_norm": 0.25628836959733703, "learning_rate": 0.0001978603499520734, "loss": 0.9967, "step": 2355 }, { "epoch": 0.23, "grad_norm": 0.2636173075017777, "learning_rate": 0.0001978570937855564, "loss": 1.1131, "step": 2356 }, { "epoch": 0.23, "grad_norm": 0.2965203958377406, "learning_rate": 0.0001978538351701041, "loss": 1.1412, "step": 2357 }, { "epoch": 0.23, "grad_norm": 0.2551730645828324, "learning_rate": 0.0001978505741057981, "loss": 1.1347, "step": 2358 }, { "epoch": 0.23, "grad_norm": 0.3173189097283464, "learning_rate": 0.00019784731059271996, "loss": 1.1354, "step": 2359 }, { "epoch": 0.23, "grad_norm": 0.287566565828461, "learning_rate": 0.00019784404463095144, "loss": 1.0052, "step": 2360 }, { "epoch": 0.23, "grad_norm": 0.24804179463343937, "learning_rate": 0.00019784077622057416, "loss": 1.1699, "step": 2361 }, { "epoch": 0.23, "grad_norm": 0.3073578708099378, "learning_rate": 0.00019783750536166993, "loss": 1.1715, "step": 2362 }, { "epoch": 0.23, "grad_norm": 0.22734700217911738, "learning_rate": 0.0001978342320543207, "loss": 0.9563, "step": 2363 }, { "epoch": 0.23, "grad_norm": 0.2590258564685437, "learning_rate": 0.0001978309562986083, "loss": 1.0816, "step": 2364 }, { "epoch": 0.23, "grad_norm": 0.27557855270603093, "learning_rate": 0.00019782767809461475, "loss": 1.077, "step": 2365 }, { "epoch": 0.23, "grad_norm": 0.25603874246559705, "learning_rate": 0.00019782439744242205, "loss": 1.146, "step": 2366 }, { "epoch": 0.23, "grad_norm": 0.2761244679810728, "learning_rate": 0.00019782111434211235, "loss": 1.107, "step": 2367 }, { "epoch": 0.23, "grad_norm": 0.26568572466382623, "learning_rate": 0.00019781782879376775, "loss": 1.1306, "step": 2368 }, { "epoch": 0.23, "grad_norm": 0.2723644753582144, "learning_rate": 0.00019781454079747054, "loss": 1.1705, "step": 2369 }, { "epoch": 0.23, "grad_norm": 0.26902633260641967, "learning_rate": 0.00019781125035330297, "loss": 1.0754, "step": 2370 }, { "epoch": 0.23, "grad_norm": 0.2711384423626026, "learning_rate": 0.0001978079574613474, "loss": 1.1202, "step": 2371 }, { "epoch": 0.23, "grad_norm": 0.28204033368789444, "learning_rate": 0.0001978046621216862, "loss": 1.1706, "step": 2372 }, { "epoch": 0.23, "grad_norm": 0.26064152152322406, "learning_rate": 0.00019780136433440184, "loss": 1.0596, "step": 2373 }, { "epoch": 0.23, "grad_norm": 0.2907116807628749, "learning_rate": 0.00019779806409957692, "loss": 1.1855, "step": 2374 }, { "epoch": 0.23, "grad_norm": 0.2739091926383725, "learning_rate": 0.00019779476141729396, "loss": 1.1093, "step": 2375 }, { "epoch": 0.23, "grad_norm": 0.24773766770558472, "learning_rate": 0.00019779145628763564, "loss": 1.0423, "step": 2376 }, { "epoch": 0.23, "grad_norm": 0.31039070403873764, "learning_rate": 0.00019778814871068465, "loss": 1.125, "step": 2377 }, { "epoch": 0.23, "grad_norm": 0.25711704080946696, "learning_rate": 0.0001977848386865238, "loss": 1.1752, "step": 2378 }, { "epoch": 0.23, "grad_norm": 0.24583579381340756, "learning_rate": 0.0001977815262152359, "loss": 1.0188, "step": 2379 }, { "epoch": 0.23, "grad_norm": 0.28912054275819865, "learning_rate": 0.00019777821129690387, "loss": 1.0406, "step": 2380 }, { "epoch": 0.23, "grad_norm": 0.27568971892193483, "learning_rate": 0.0001977748939316106, "loss": 1.1377, "step": 2381 }, { "epoch": 0.23, "grad_norm": 0.2703312754633334, "learning_rate": 0.0001977715741194392, "loss": 1.0889, "step": 2382 }, { "epoch": 0.23, "grad_norm": 0.3244168867345363, "learning_rate": 0.00019776825186047268, "loss": 1.2365, "step": 2383 }, { "epoch": 0.23, "grad_norm": 0.26694652233140037, "learning_rate": 0.00019776492715479428, "loss": 0.9792, "step": 2384 }, { "epoch": 0.23, "grad_norm": 0.25774010566880007, "learning_rate": 0.00019776160000248706, "loss": 1.0835, "step": 2385 }, { "epoch": 0.23, "grad_norm": 0.24706317723666119, "learning_rate": 0.0001977582704036344, "loss": 1.0586, "step": 2386 }, { "epoch": 0.23, "grad_norm": 0.2990804163988819, "learning_rate": 0.00019775493835831959, "loss": 1.0996, "step": 2387 }, { "epoch": 0.23, "grad_norm": 0.25726590762789603, "learning_rate": 0.00019775160386662597, "loss": 1.117, "step": 2388 }, { "epoch": 0.23, "grad_norm": 0.2650577582941275, "learning_rate": 0.00019774826692863705, "loss": 1.0128, "step": 2389 }, { "epoch": 0.23, "grad_norm": 0.2668462355680707, "learning_rate": 0.00019774492754443635, "loss": 1.0804, "step": 2390 }, { "epoch": 0.23, "grad_norm": 0.2992539843834909, "learning_rate": 0.00019774158571410737, "loss": 1.2372, "step": 2391 }, { "epoch": 0.23, "grad_norm": 0.2864988762111991, "learning_rate": 0.00019773824143773377, "loss": 1.0984, "step": 2392 }, { "epoch": 0.23, "grad_norm": 0.304882593233717, "learning_rate": 0.00019773489471539926, "loss": 1.1334, "step": 2393 }, { "epoch": 0.23, "grad_norm": 0.28019107709650354, "learning_rate": 0.00019773154554718762, "loss": 1.0708, "step": 2394 }, { "epoch": 0.23, "grad_norm": 0.22866457976217683, "learning_rate": 0.00019772819393318262, "loss": 1.0887, "step": 2395 }, { "epoch": 0.23, "grad_norm": 0.26481598326652184, "learning_rate": 0.00019772483987346812, "loss": 1.1488, "step": 2396 }, { "epoch": 0.23, "grad_norm": 0.27476032047452265, "learning_rate": 0.0001977214833681281, "loss": 1.0884, "step": 2397 }, { "epoch": 0.23, "grad_norm": 0.2671685597445549, "learning_rate": 0.00019771812441724652, "loss": 1.021, "step": 2398 }, { "epoch": 0.23, "grad_norm": 0.2777768402686966, "learning_rate": 0.00019771476302090754, "loss": 1.0786, "step": 2399 }, { "epoch": 0.23, "grad_norm": 0.25286519332057394, "learning_rate": 0.00019771139917919512, "loss": 0.9957, "step": 2400 }, { "epoch": 0.23, "grad_norm": 0.25899496107583564, "learning_rate": 0.00019770803289219355, "loss": 1.232, "step": 2401 }, { "epoch": 0.23, "grad_norm": 0.2554893399049988, "learning_rate": 0.00019770466415998706, "loss": 1.1702, "step": 2402 }, { "epoch": 0.23, "grad_norm": 0.26615765059825036, "learning_rate": 0.00019770129298265994, "loss": 1.1589, "step": 2403 }, { "epoch": 0.23, "grad_norm": 0.2867731760890007, "learning_rate": 0.00019769791936029657, "loss": 1.0231, "step": 2404 }, { "epoch": 0.23, "grad_norm": 0.25865612120055026, "learning_rate": 0.00019769454329298134, "loss": 1.0742, "step": 2405 }, { "epoch": 0.23, "grad_norm": 0.27168406194912653, "learning_rate": 0.00019769116478079876, "loss": 1.11, "step": 2406 }, { "epoch": 0.23, "grad_norm": 0.29872484561116197, "learning_rate": 0.00019768778382383344, "loss": 1.0637, "step": 2407 }, { "epoch": 0.23, "grad_norm": 0.27164584328580743, "learning_rate": 0.0001976844004221699, "loss": 1.0909, "step": 2408 }, { "epoch": 0.23, "grad_norm": 0.2739762871753536, "learning_rate": 0.00019768101457589283, "loss": 1.0961, "step": 2409 }, { "epoch": 0.23, "grad_norm": 0.25357273039005795, "learning_rate": 0.00019767762628508702, "loss": 1.0625, "step": 2410 }, { "epoch": 0.23, "grad_norm": 0.26504750469407357, "learning_rate": 0.00019767423554983718, "loss": 1.0843, "step": 2411 }, { "epoch": 0.23, "grad_norm": 0.274182183625254, "learning_rate": 0.00019767084237022823, "loss": 1.144, "step": 2412 }, { "epoch": 0.23, "grad_norm": 0.282485486193323, "learning_rate": 0.00019766744674634508, "loss": 1.1495, "step": 2413 }, { "epoch": 0.23, "grad_norm": 0.24729551379445136, "learning_rate": 0.00019766404867827269, "loss": 1.1148, "step": 2414 }, { "epoch": 0.23, "grad_norm": 0.23229018534328089, "learning_rate": 0.00019766064816609607, "loss": 1.0309, "step": 2415 }, { "epoch": 0.23, "grad_norm": 0.252259979313865, "learning_rate": 0.00019765724520990038, "loss": 1.1207, "step": 2416 }, { "epoch": 0.23, "grad_norm": 0.23700561147186552, "learning_rate": 0.00019765383980977074, "loss": 1.1039, "step": 2417 }, { "epoch": 0.23, "grad_norm": 0.26556810321631696, "learning_rate": 0.0001976504319657924, "loss": 1.1749, "step": 2418 }, { "epoch": 0.23, "grad_norm": 0.23143332904854963, "learning_rate": 0.00019764702167805064, "loss": 1.1775, "step": 2419 }, { "epoch": 0.23, "grad_norm": 0.27696732231432797, "learning_rate": 0.00019764360894663076, "loss": 1.0399, "step": 2420 }, { "epoch": 0.23, "grad_norm": 0.27237322850717344, "learning_rate": 0.00019764019377161823, "loss": 1.0703, "step": 2421 }, { "epoch": 0.23, "grad_norm": 0.2535297853960644, "learning_rate": 0.00019763677615309847, "loss": 1.0938, "step": 2422 }, { "epoch": 0.23, "grad_norm": 0.26018058934136795, "learning_rate": 0.00019763335609115703, "loss": 1.0601, "step": 2423 }, { "epoch": 0.23, "grad_norm": 0.3439807499205691, "learning_rate": 0.0001976299335858795, "loss": 1.04, "step": 2424 }, { "epoch": 0.23, "grad_norm": 0.24885428737327994, "learning_rate": 0.0001976265086373515, "loss": 1.0878, "step": 2425 }, { "epoch": 0.23, "grad_norm": 0.2518594237134699, "learning_rate": 0.0001976230812456588, "loss": 1.1054, "step": 2426 }, { "epoch": 0.23, "grad_norm": 0.27045653240229783, "learning_rate": 0.0001976196514108871, "loss": 0.9974, "step": 2427 }, { "epoch": 0.23, "grad_norm": 0.27897290427116067, "learning_rate": 0.0001976162191331223, "loss": 1.1238, "step": 2428 }, { "epoch": 0.23, "grad_norm": 0.26488970207228696, "learning_rate": 0.00019761278441245023, "loss": 1.0545, "step": 2429 }, { "epoch": 0.23, "grad_norm": 0.2676698182906196, "learning_rate": 0.00019760934724895692, "loss": 1.1392, "step": 2430 }, { "epoch": 0.23, "grad_norm": 0.2554504815510488, "learning_rate": 0.00019760590764272834, "loss": 1.1628, "step": 2431 }, { "epoch": 0.23, "grad_norm": 0.23803751811537502, "learning_rate": 0.0001976024655938506, "loss": 1.1082, "step": 2432 }, { "epoch": 0.23, "grad_norm": 0.2843747176664285, "learning_rate": 0.00019759902110240977, "loss": 1.0482, "step": 2433 }, { "epoch": 0.23, "grad_norm": 0.23996308884375614, "learning_rate": 0.00019759557416849214, "loss": 1.0972, "step": 2434 }, { "epoch": 0.23, "grad_norm": 0.26431108924886854, "learning_rate": 0.00019759212479218393, "loss": 1.1708, "step": 2435 }, { "epoch": 0.23, "grad_norm": 0.26205638413878823, "learning_rate": 0.0001975886729735714, "loss": 1.1489, "step": 2436 }, { "epoch": 0.23, "grad_norm": 0.26602482241211184, "learning_rate": 0.00019758521871274107, "loss": 1.1432, "step": 2437 }, { "epoch": 0.23, "grad_norm": 0.2611287789955635, "learning_rate": 0.00019758176200977928, "loss": 1.0747, "step": 2438 }, { "epoch": 0.23, "grad_norm": 0.2512098131069008, "learning_rate": 0.00019757830286477258, "loss": 1.1516, "step": 2439 }, { "epoch": 0.23, "grad_norm": 0.300485183834668, "learning_rate": 0.0001975748412778075, "loss": 1.0355, "step": 2440 }, { "epoch": 0.23, "grad_norm": 0.24888115485358228, "learning_rate": 0.00019757137724897073, "loss": 1.1842, "step": 2441 }, { "epoch": 0.23, "grad_norm": 0.24322023911969542, "learning_rate": 0.0001975679107783489, "loss": 1.1434, "step": 2442 }, { "epoch": 0.23, "grad_norm": 0.25278550537075395, "learning_rate": 0.00019756444186602877, "loss": 1.1414, "step": 2443 }, { "epoch": 0.23, "grad_norm": 0.2629822500133049, "learning_rate": 0.0001975609705120972, "loss": 1.1144, "step": 2444 }, { "epoch": 0.23, "grad_norm": 0.25330431801210734, "learning_rate": 0.00019755749671664102, "loss": 1.1006, "step": 2445 }, { "epoch": 0.23, "grad_norm": 0.27277917321975415, "learning_rate": 0.00019755402047974717, "loss": 1.1218, "step": 2446 }, { "epoch": 0.23, "grad_norm": 0.24954891050559894, "learning_rate": 0.00019755054180150262, "loss": 1.168, "step": 2447 }, { "epoch": 0.23, "grad_norm": 0.24441850562430112, "learning_rate": 0.00019754706068199446, "loss": 0.9717, "step": 2448 }, { "epoch": 0.23, "grad_norm": 0.26980275249400415, "learning_rate": 0.00019754357712130984, "loss": 1.0781, "step": 2449 }, { "epoch": 0.23, "grad_norm": 0.2644012899065647, "learning_rate": 0.00019754009111953586, "loss": 1.2219, "step": 2450 }, { "epoch": 0.23, "grad_norm": 0.28502445348284167, "learning_rate": 0.00019753660267675982, "loss": 1.1411, "step": 2451 }, { "epoch": 0.23, "grad_norm": 0.24105312734962844, "learning_rate": 0.000197533111793069, "loss": 1.0395, "step": 2452 }, { "epoch": 0.23, "grad_norm": 0.28402705301365877, "learning_rate": 0.0001975296184685507, "loss": 1.1438, "step": 2453 }, { "epoch": 0.23, "grad_norm": 0.251590448441616, "learning_rate": 0.00019752612270329247, "loss": 1.0017, "step": 2454 }, { "epoch": 0.23, "grad_norm": 0.27385681060181105, "learning_rate": 0.0001975226244973817, "loss": 1.2012, "step": 2455 }, { "epoch": 0.23, "grad_norm": 0.24837745278204337, "learning_rate": 0.000197519123850906, "loss": 1.1111, "step": 2456 }, { "epoch": 0.24, "grad_norm": 0.25263755603908267, "learning_rate": 0.0001975156207639529, "loss": 1.1382, "step": 2457 }, { "epoch": 0.24, "grad_norm": 0.2913062612133686, "learning_rate": 0.0001975121152366101, "loss": 1.144, "step": 2458 }, { "epoch": 0.24, "grad_norm": 0.279969123492799, "learning_rate": 0.00019750860726896536, "loss": 1.1385, "step": 2459 }, { "epoch": 0.24, "grad_norm": 0.24114428531190943, "learning_rate": 0.00019750509686110643, "loss": 1.0758, "step": 2460 }, { "epoch": 0.24, "grad_norm": 0.24865099555297668, "learning_rate": 0.00019750158401312117, "loss": 1.0137, "step": 2461 }, { "epoch": 0.24, "grad_norm": 0.30049591909461865, "learning_rate": 0.0001974980687250975, "loss": 1.324, "step": 2462 }, { "epoch": 0.24, "grad_norm": 0.2535091796023302, "learning_rate": 0.00019749455099712332, "loss": 1.1444, "step": 2463 }, { "epoch": 0.24, "grad_norm": 0.24893191882319649, "learning_rate": 0.00019749103082928682, "loss": 0.8933, "step": 2464 }, { "epoch": 0.24, "grad_norm": 0.2930991676695541, "learning_rate": 0.00019748750822167594, "loss": 1.0118, "step": 2465 }, { "epoch": 0.24, "grad_norm": 0.2436760336989602, "learning_rate": 0.00019748398317437894, "loss": 1.0733, "step": 2466 }, { "epoch": 0.24, "grad_norm": 0.2815078161913315, "learning_rate": 0.00019748045568748396, "loss": 1.1311, "step": 2467 }, { "epoch": 0.24, "grad_norm": 0.2707645703704046, "learning_rate": 0.00019747692576107935, "loss": 1.1313, "step": 2468 }, { "epoch": 0.24, "grad_norm": 0.2605533882651966, "learning_rate": 0.00019747339339525337, "loss": 1.0691, "step": 2469 }, { "epoch": 0.24, "grad_norm": 0.25404802860541503, "learning_rate": 0.00019746985859009448, "loss": 1.1801, "step": 2470 }, { "epoch": 0.24, "grad_norm": 0.2769596865608125, "learning_rate": 0.00019746632134569114, "loss": 1.0646, "step": 2471 }, { "epoch": 0.24, "grad_norm": 0.28755610236991974, "learning_rate": 0.0001974627816621318, "loss": 1.0567, "step": 2472 }, { "epoch": 0.24, "grad_norm": 0.2561327873358053, "learning_rate": 0.00019745923953950516, "loss": 1.1097, "step": 2473 }, { "epoch": 0.24, "grad_norm": 0.27123359888401705, "learning_rate": 0.00019745569497789975, "loss": 1.0804, "step": 2474 }, { "epoch": 0.24, "grad_norm": 0.3011360941789264, "learning_rate": 0.00019745214797740437, "loss": 0.9762, "step": 2475 }, { "epoch": 0.24, "grad_norm": 0.23621703283724582, "learning_rate": 0.00019744859853810772, "loss": 1.2314, "step": 2476 }, { "epoch": 0.24, "grad_norm": 0.2585948578333519, "learning_rate": 0.00019744504666009864, "loss": 1.1219, "step": 2477 }, { "epoch": 0.24, "grad_norm": 0.23886114739760986, "learning_rate": 0.00019744149234346604, "loss": 1.0854, "step": 2478 }, { "epoch": 0.24, "grad_norm": 0.27090142697493064, "learning_rate": 0.00019743793558829885, "loss": 1.1247, "step": 2479 }, { "epoch": 0.24, "grad_norm": 0.26214502250978616, "learning_rate": 0.00019743437639468606, "loss": 1.0928, "step": 2480 }, { "epoch": 0.24, "grad_norm": 0.26399673853102246, "learning_rate": 0.00019743081476271675, "loss": 1.229, "step": 2481 }, { "epoch": 0.24, "grad_norm": 0.3147492977324059, "learning_rate": 0.00019742725069248014, "loss": 1.1473, "step": 2482 }, { "epoch": 0.24, "grad_norm": 0.26237262208219775, "learning_rate": 0.0001974236841840653, "loss": 1.0796, "step": 2483 }, { "epoch": 0.24, "grad_norm": 0.2556663951288371, "learning_rate": 0.00019742011523756154, "loss": 1.0103, "step": 2484 }, { "epoch": 0.24, "grad_norm": 0.2780031870200213, "learning_rate": 0.0001974165438530582, "loss": 1.1362, "step": 2485 }, { "epoch": 0.24, "grad_norm": 0.27186052495109936, "learning_rate": 0.0001974129700306446, "loss": 1.0304, "step": 2486 }, { "epoch": 0.24, "grad_norm": 0.28105904722620484, "learning_rate": 0.0001974093937704102, "loss": 1.1844, "step": 2487 }, { "epoch": 0.24, "grad_norm": 0.25863957400723964, "learning_rate": 0.00019740581507244449, "loss": 1.1119, "step": 2488 }, { "epoch": 0.24, "grad_norm": 0.28394214465783835, "learning_rate": 0.00019740223393683706, "loss": 1.1119, "step": 2489 }, { "epoch": 0.24, "grad_norm": 0.26485094517848373, "learning_rate": 0.00019739865036367751, "loss": 1.1412, "step": 2490 }, { "epoch": 0.24, "grad_norm": 0.28470688440068453, "learning_rate": 0.0001973950643530555, "loss": 1.0705, "step": 2491 }, { "epoch": 0.24, "grad_norm": 0.26331398514634713, "learning_rate": 0.00019739147590506085, "loss": 1.052, "step": 2492 }, { "epoch": 0.24, "grad_norm": 0.25097699443135785, "learning_rate": 0.00019738788501978325, "loss": 1.0758, "step": 2493 }, { "epoch": 0.24, "grad_norm": 0.24766896766086546, "learning_rate": 0.00019738429169731262, "loss": 1.0952, "step": 2494 }, { "epoch": 0.24, "grad_norm": 0.28022175227433616, "learning_rate": 0.00019738069593773893, "loss": 1.0738, "step": 2495 }, { "epoch": 0.24, "grad_norm": 0.2522932834715685, "learning_rate": 0.0001973770977411521, "loss": 1.1787, "step": 2496 }, { "epoch": 0.24, "grad_norm": 0.2926767224083834, "learning_rate": 0.0001973734971076422, "loss": 1.1114, "step": 2497 }, { "epoch": 0.24, "grad_norm": 0.2915842765013931, "learning_rate": 0.00019736989403729935, "loss": 1.0235, "step": 2498 }, { "epoch": 0.24, "grad_norm": 0.2462394398490279, "learning_rate": 0.0001973662885302137, "loss": 1.0616, "step": 2499 }, { "epoch": 0.24, "grad_norm": 0.2608600216455592, "learning_rate": 0.00019736268058647547, "loss": 1.2348, "step": 2500 }, { "epoch": 0.24, "grad_norm": 0.27127694754468956, "learning_rate": 0.000197359070206175, "loss": 1.2243, "step": 2501 }, { "epoch": 0.24, "grad_norm": 0.27181980704622394, "learning_rate": 0.00019735545738940258, "loss": 1.0685, "step": 2502 }, { "epoch": 0.24, "grad_norm": 0.26463599414343286, "learning_rate": 0.00019735184213624866, "loss": 1.083, "step": 2503 }, { "epoch": 0.24, "grad_norm": 0.2858932841784855, "learning_rate": 0.00019734822444680372, "loss": 1.1853, "step": 2504 }, { "epoch": 0.24, "grad_norm": 0.29334417625161935, "learning_rate": 0.00019734460432115826, "loss": 1.149, "step": 2505 }, { "epoch": 0.24, "grad_norm": 0.30013981243424426, "learning_rate": 0.00019734098175940292, "loss": 1.1454, "step": 2506 }, { "epoch": 0.24, "grad_norm": 0.25388848360987426, "learning_rate": 0.00019733735676162833, "loss": 1.0328, "step": 2507 }, { "epoch": 0.24, "grad_norm": 0.2911267465827057, "learning_rate": 0.0001973337293279252, "loss": 1.0848, "step": 2508 }, { "epoch": 0.24, "grad_norm": 0.3184503213445064, "learning_rate": 0.00019733009945838435, "loss": 1.1812, "step": 2509 }, { "epoch": 0.24, "grad_norm": 0.24860610501636035, "learning_rate": 0.00019732646715309656, "loss": 1.0944, "step": 2510 }, { "epoch": 0.24, "grad_norm": 0.2523314374067835, "learning_rate": 0.00019732283241215276, "loss": 1.0563, "step": 2511 }, { "epoch": 0.24, "grad_norm": 0.26493511638695577, "learning_rate": 0.00019731919523564395, "loss": 1.0797, "step": 2512 }, { "epoch": 0.24, "grad_norm": 0.34257964691618786, "learning_rate": 0.00019731555562366108, "loss": 1.0442, "step": 2513 }, { "epoch": 0.24, "grad_norm": 0.278969034097831, "learning_rate": 0.0001973119135762953, "loss": 1.085, "step": 2514 }, { "epoch": 0.24, "grad_norm": 0.2821218023861396, "learning_rate": 0.00019730826909363771, "loss": 1.1072, "step": 2515 }, { "epoch": 0.24, "grad_norm": 0.2550975254765698, "learning_rate": 0.00019730462217577955, "loss": 1.0796, "step": 2516 }, { "epoch": 0.24, "grad_norm": 0.24932942154107635, "learning_rate": 0.00019730097282281202, "loss": 1.0744, "step": 2517 }, { "epoch": 0.24, "grad_norm": 0.278741555046221, "learning_rate": 0.00019729732103482652, "loss": 1.2485, "step": 2518 }, { "epoch": 0.24, "grad_norm": 0.28093405939217236, "learning_rate": 0.0001972936668119144, "loss": 1.107, "step": 2519 }, { "epoch": 0.24, "grad_norm": 0.25703049143132695, "learning_rate": 0.00019729001015416714, "loss": 1.1391, "step": 2520 }, { "epoch": 0.24, "grad_norm": 0.26010024870942694, "learning_rate": 0.00019728635106167622, "loss": 1.0808, "step": 2521 }, { "epoch": 0.24, "grad_norm": 0.2640446473647651, "learning_rate": 0.00019728268953453324, "loss": 1.0537, "step": 2522 }, { "epoch": 0.24, "grad_norm": 0.26949297336392625, "learning_rate": 0.0001972790255728298, "loss": 1.0032, "step": 2523 }, { "epoch": 0.24, "grad_norm": 0.27078666739320834, "learning_rate": 0.00019727535917665764, "loss": 1.0726, "step": 2524 }, { "epoch": 0.24, "grad_norm": 0.2270614158675183, "learning_rate": 0.00019727169034610843, "loss": 1.084, "step": 2525 }, { "epoch": 0.24, "grad_norm": 0.2772909155669702, "learning_rate": 0.00019726801908127403, "loss": 1.0104, "step": 2526 }, { "epoch": 0.24, "grad_norm": 0.26805528081435065, "learning_rate": 0.00019726434538224638, "loss": 1.0985, "step": 2527 }, { "epoch": 0.24, "grad_norm": 0.3021364267615302, "learning_rate": 0.00019726066924911732, "loss": 1.1961, "step": 2528 }, { "epoch": 0.24, "grad_norm": 0.2742459883930204, "learning_rate": 0.0001972569906819789, "loss": 0.8326, "step": 2529 }, { "epoch": 0.24, "grad_norm": 0.2965953559002404, "learning_rate": 0.00019725330968092315, "loss": 1.107, "step": 2530 }, { "epoch": 0.24, "grad_norm": 0.2684835136930391, "learning_rate": 0.0001972496262460422, "loss": 1.0751, "step": 2531 }, { "epoch": 0.24, "grad_norm": 0.2718707210872007, "learning_rate": 0.00019724594037742824, "loss": 1.0515, "step": 2532 }, { "epoch": 0.24, "grad_norm": 0.25725745750943513, "learning_rate": 0.00019724225207517354, "loss": 1.1485, "step": 2533 }, { "epoch": 0.24, "grad_norm": 0.2890461500320697, "learning_rate": 0.0001972385613393703, "loss": 1.1419, "step": 2534 }, { "epoch": 0.24, "grad_norm": 0.2440614465912653, "learning_rate": 0.000197234868170111, "loss": 1.0766, "step": 2535 }, { "epoch": 0.24, "grad_norm": 0.2631890740774793, "learning_rate": 0.00019723117256748802, "loss": 1.0583, "step": 2536 }, { "epoch": 0.24, "grad_norm": 0.2654794914552144, "learning_rate": 0.0001972274745315938, "loss": 1.103, "step": 2537 }, { "epoch": 0.24, "grad_norm": 0.25772820159308557, "learning_rate": 0.00019722377406252095, "loss": 1.1091, "step": 2538 }, { "epoch": 0.24, "grad_norm": 0.2759349478590902, "learning_rate": 0.00019722007116036204, "loss": 1.0997, "step": 2539 }, { "epoch": 0.24, "grad_norm": 0.29182935407079413, "learning_rate": 0.00019721636582520978, "loss": 0.9975, "step": 2540 }, { "epoch": 0.24, "grad_norm": 0.2706259485506027, "learning_rate": 0.00019721265805715686, "loss": 1.0848, "step": 2541 }, { "epoch": 0.24, "grad_norm": 0.27778012967187665, "learning_rate": 0.00019720894785629604, "loss": 1.1421, "step": 2542 }, { "epoch": 0.24, "grad_norm": 0.24224758551615272, "learning_rate": 0.00019720523522272023, "loss": 1.1094, "step": 2543 }, { "epoch": 0.24, "grad_norm": 0.27897485799673416, "learning_rate": 0.0001972015201565223, "loss": 0.9736, "step": 2544 }, { "epoch": 0.24, "grad_norm": 0.25755355735125685, "learning_rate": 0.00019719780265779527, "loss": 1.1512, "step": 2545 }, { "epoch": 0.24, "grad_norm": 0.26839822619910775, "learning_rate": 0.00019719408272663211, "loss": 1.0875, "step": 2546 }, { "epoch": 0.24, "grad_norm": 0.3230859730143896, "learning_rate": 0.00019719036036312595, "loss": 1.1185, "step": 2547 }, { "epoch": 0.24, "grad_norm": 0.3000203530525595, "learning_rate": 0.00019718663556736997, "loss": 0.9699, "step": 2548 }, { "epoch": 0.24, "grad_norm": 0.2673877046628615, "learning_rate": 0.00019718290833945732, "loss": 1.0584, "step": 2549 }, { "epoch": 0.24, "grad_norm": 0.28638413313248523, "learning_rate": 0.00019717917867948136, "loss": 1.1215, "step": 2550 }, { "epoch": 0.24, "grad_norm": 0.27932539137807955, "learning_rate": 0.00019717544658753533, "loss": 1.0187, "step": 2551 }, { "epoch": 0.24, "grad_norm": 0.29577216947674934, "learning_rate": 0.00019717171206371268, "loss": 1.0782, "step": 2552 }, { "epoch": 0.24, "grad_norm": 0.29132618922289466, "learning_rate": 0.00019716797510810688, "loss": 1.1716, "step": 2553 }, { "epoch": 0.24, "grad_norm": 0.30978702723114215, "learning_rate": 0.00019716423572081144, "loss": 1.176, "step": 2554 }, { "epoch": 0.24, "grad_norm": 0.26767419284947913, "learning_rate": 0.0001971604939019199, "loss": 0.9729, "step": 2555 }, { "epoch": 0.24, "grad_norm": 0.24950178862061026, "learning_rate": 0.000197156749651526, "loss": 1.1032, "step": 2556 }, { "epoch": 0.24, "grad_norm": 0.2676153474407372, "learning_rate": 0.00019715300296972333, "loss": 1.0011, "step": 2557 }, { "epoch": 0.24, "grad_norm": 0.24233870876913635, "learning_rate": 0.00019714925385660572, "loss": 1.0203, "step": 2558 }, { "epoch": 0.24, "grad_norm": 0.26922786191679665, "learning_rate": 0.00019714550231226697, "loss": 1.0598, "step": 2559 }, { "epoch": 0.24, "grad_norm": 0.25818023471817164, "learning_rate": 0.000197141748336801, "loss": 1.1347, "step": 2560 }, { "epoch": 0.25, "grad_norm": 0.2618807690441127, "learning_rate": 0.00019713799193030166, "loss": 1.1211, "step": 2561 }, { "epoch": 0.25, "grad_norm": 0.29741704565126115, "learning_rate": 0.00019713423309286309, "loss": 1.0597, "step": 2562 }, { "epoch": 0.25, "grad_norm": 0.28884125218560003, "learning_rate": 0.00019713047182457928, "loss": 1.0358, "step": 2563 }, { "epoch": 0.25, "grad_norm": 0.26649025648335334, "learning_rate": 0.00019712670812554434, "loss": 1.162, "step": 2564 }, { "epoch": 0.25, "grad_norm": 0.2947735363660406, "learning_rate": 0.00019712294199585248, "loss": 1.0586, "step": 2565 }, { "epoch": 0.25, "grad_norm": 0.25677075754484635, "learning_rate": 0.000197119173435598, "loss": 1.1841, "step": 2566 }, { "epoch": 0.25, "grad_norm": 0.2637205544668129, "learning_rate": 0.00019711540244487515, "loss": 1.0261, "step": 2567 }, { "epoch": 0.25, "grad_norm": 0.24823082695362084, "learning_rate": 0.0001971116290237783, "loss": 1.0277, "step": 2568 }, { "epoch": 0.25, "grad_norm": 0.25229952638255043, "learning_rate": 0.0001971078531724019, "loss": 1.0751, "step": 2569 }, { "epoch": 0.25, "grad_norm": 0.30254391196407815, "learning_rate": 0.00019710407489084047, "loss": 1.1231, "step": 2570 }, { "epoch": 0.25, "grad_norm": 0.3021948667516261, "learning_rate": 0.00019710029417918854, "loss": 1.0033, "step": 2571 }, { "epoch": 0.25, "grad_norm": 0.23588733965722716, "learning_rate": 0.00019709651103754067, "loss": 1.1007, "step": 2572 }, { "epoch": 0.25, "grad_norm": 0.24382613129524622, "learning_rate": 0.00019709272546599164, "loss": 1.0672, "step": 2573 }, { "epoch": 0.25, "grad_norm": 0.2733353690652961, "learning_rate": 0.00019708893746463613, "loss": 1.1596, "step": 2574 }, { "epoch": 0.25, "grad_norm": 0.26912681562116336, "learning_rate": 0.00019708514703356894, "loss": 1.1428, "step": 2575 }, { "epoch": 0.25, "grad_norm": 0.2956110006161434, "learning_rate": 0.00019708135417288491, "loss": 1.0804, "step": 2576 }, { "epoch": 0.25, "grad_norm": 0.2630689064650718, "learning_rate": 0.000197077558882679, "loss": 1.0586, "step": 2577 }, { "epoch": 0.25, "grad_norm": 0.2689950863610693, "learning_rate": 0.00019707376116304617, "loss": 1.1105, "step": 2578 }, { "epoch": 0.25, "grad_norm": 0.2910234057476756, "learning_rate": 0.00019706996101408146, "loss": 1.0988, "step": 2579 }, { "epoch": 0.25, "grad_norm": 0.27854677846223636, "learning_rate": 0.00019706615843587995, "loss": 1.1806, "step": 2580 }, { "epoch": 0.25, "grad_norm": 0.24191714015622726, "learning_rate": 0.00019706235342853683, "loss": 1.0358, "step": 2581 }, { "epoch": 0.25, "grad_norm": 0.2654110236015743, "learning_rate": 0.00019705854599214734, "loss": 1.1535, "step": 2582 }, { "epoch": 0.25, "grad_norm": 0.24454579378976135, "learning_rate": 0.0001970547361268067, "loss": 1.0781, "step": 2583 }, { "epoch": 0.25, "grad_norm": 0.24819542572155914, "learning_rate": 0.00019705092383261028, "loss": 1.1531, "step": 2584 }, { "epoch": 0.25, "grad_norm": 0.31712338038836446, "learning_rate": 0.00019704710910965352, "loss": 1.2182, "step": 2585 }, { "epoch": 0.25, "grad_norm": 0.2770504057135462, "learning_rate": 0.00019704329195803188, "loss": 0.9587, "step": 2586 }, { "epoch": 0.25, "grad_norm": 0.2381187276476534, "learning_rate": 0.00019703947237784087, "loss": 0.9862, "step": 2587 }, { "epoch": 0.25, "grad_norm": 0.29302550861588544, "learning_rate": 0.00019703565036917605, "loss": 1.0806, "step": 2588 }, { "epoch": 0.25, "grad_norm": 0.26005985628816053, "learning_rate": 0.0001970318259321331, "loss": 0.9799, "step": 2589 }, { "epoch": 0.25, "grad_norm": 0.27882886267706336, "learning_rate": 0.0001970279990668077, "loss": 1.1686, "step": 2590 }, { "epoch": 0.25, "grad_norm": 0.26205774855080516, "learning_rate": 0.0001970241697732957, "loss": 1.1234, "step": 2591 }, { "epoch": 0.25, "grad_norm": 0.26668517374001244, "learning_rate": 0.00019702033805169285, "loss": 1.0884, "step": 2592 }, { "epoch": 0.25, "grad_norm": 0.25715862286863395, "learning_rate": 0.00019701650390209504, "loss": 1.2244, "step": 2593 }, { "epoch": 0.25, "grad_norm": 0.3058673752020222, "learning_rate": 0.00019701266732459827, "loss": 1.0458, "step": 2594 }, { "epoch": 0.25, "grad_norm": 0.2529497741528999, "learning_rate": 0.00019700882831929852, "loss": 1.3244, "step": 2595 }, { "epoch": 0.25, "grad_norm": 0.2943742754766903, "learning_rate": 0.0001970049868862919, "loss": 1.0521, "step": 2596 }, { "epoch": 0.25, "grad_norm": 0.25522996903552037, "learning_rate": 0.0001970011430256745, "loss": 1.0895, "step": 2597 }, { "epoch": 0.25, "grad_norm": 0.2671836556503752, "learning_rate": 0.00019699729673754255, "loss": 1.1154, "step": 2598 }, { "epoch": 0.25, "grad_norm": 0.3007590432393228, "learning_rate": 0.00019699344802199224, "loss": 1.2871, "step": 2599 }, { "epoch": 0.25, "grad_norm": 0.2991338944551984, "learning_rate": 0.00019698959687911998, "loss": 1.0738, "step": 2600 }, { "epoch": 0.25, "grad_norm": 0.26091410208452626, "learning_rate": 0.00019698574330902208, "loss": 1.2443, "step": 2601 }, { "epoch": 0.25, "grad_norm": 0.27563048614294183, "learning_rate": 0.00019698188731179502, "loss": 1.0806, "step": 2602 }, { "epoch": 0.25, "grad_norm": 0.26585816626226483, "learning_rate": 0.00019697802888753526, "loss": 0.9916, "step": 2603 }, { "epoch": 0.25, "grad_norm": 0.27522846970536974, "learning_rate": 0.0001969741680363394, "loss": 1.0998, "step": 2604 }, { "epoch": 0.25, "grad_norm": 0.2602657313030757, "learning_rate": 0.00019697030475830402, "loss": 1.0419, "step": 2605 }, { "epoch": 0.25, "grad_norm": 0.2453226312120246, "learning_rate": 0.00019696643905352582, "loss": 0.9631, "step": 2606 }, { "epoch": 0.25, "grad_norm": 0.29471825123420187, "learning_rate": 0.00019696257092210155, "loss": 1.0666, "step": 2607 }, { "epoch": 0.25, "grad_norm": 0.2902443561039826, "learning_rate": 0.000196958700364128, "loss": 1.0988, "step": 2608 }, { "epoch": 0.25, "grad_norm": 0.2538912185129694, "learning_rate": 0.00019695482737970202, "loss": 1.1586, "step": 2609 }, { "epoch": 0.25, "grad_norm": 0.28579871153918274, "learning_rate": 0.0001969509519689206, "loss": 1.0839, "step": 2610 }, { "epoch": 0.25, "grad_norm": 0.23553918792552284, "learning_rate": 0.00019694707413188062, "loss": 1.0165, "step": 2611 }, { "epoch": 0.25, "grad_norm": 0.28381053637003834, "learning_rate": 0.0001969431938686792, "loss": 1.0899, "step": 2612 }, { "epoch": 0.25, "grad_norm": 0.2605318408738508, "learning_rate": 0.00019693931117941346, "loss": 1.1033, "step": 2613 }, { "epoch": 0.25, "eval_loss": 1.1338403224945068, "eval_runtime": 4230.6339, "eval_samples_per_second": 19.765, "eval_steps_per_second": 2.471, "step": 2613 }, { "epoch": 0.25, "grad_norm": 0.255252333620048, "learning_rate": 0.00019693542606418052, "loss": 1.1488, "step": 2614 }, { "epoch": 0.25, "grad_norm": 0.30633153587103285, "learning_rate": 0.00019693153852307757, "loss": 1.0757, "step": 2615 }, { "epoch": 0.25, "grad_norm": 0.27505703870757664, "learning_rate": 0.000196927648556202, "loss": 1.1299, "step": 2616 }, { "epoch": 0.25, "grad_norm": 0.2713935955775416, "learning_rate": 0.00019692375616365112, "loss": 1.0189, "step": 2617 }, { "epoch": 0.25, "grad_norm": 0.2684321316986978, "learning_rate": 0.00019691986134552227, "loss": 1.1241, "step": 2618 }, { "epoch": 0.25, "grad_norm": 0.25621412932428106, "learning_rate": 0.00019691596410191303, "loss": 1.0671, "step": 2619 }, { "epoch": 0.25, "grad_norm": 0.26289530369330816, "learning_rate": 0.00019691206443292085, "loss": 0.9654, "step": 2620 }, { "epoch": 0.25, "grad_norm": 0.2650174904695066, "learning_rate": 0.00019690816233864337, "loss": 1.0996, "step": 2621 }, { "epoch": 0.25, "grad_norm": 0.2575327507563281, "learning_rate": 0.0001969042578191782, "loss": 1.1166, "step": 2622 }, { "epoch": 0.25, "grad_norm": 0.26454046915240054, "learning_rate": 0.00019690035087462307, "loss": 0.9949, "step": 2623 }, { "epoch": 0.25, "grad_norm": 0.2805971262563047, "learning_rate": 0.0001968964415050758, "loss": 1.0859, "step": 2624 }, { "epoch": 0.25, "grad_norm": 0.21973703426240196, "learning_rate": 0.00019689252971063416, "loss": 1.1679, "step": 2625 }, { "epoch": 0.25, "grad_norm": 0.3139171336564937, "learning_rate": 0.00019688861549139607, "loss": 1.2573, "step": 2626 }, { "epoch": 0.25, "grad_norm": 0.3076875510868648, "learning_rate": 0.0001968846988474595, "loss": 1.1237, "step": 2627 }, { "epoch": 0.25, "grad_norm": 0.28812414941928133, "learning_rate": 0.00019688077977892245, "loss": 1.0934, "step": 2628 }, { "epoch": 0.25, "grad_norm": 0.31182412232961415, "learning_rate": 0.00019687685828588297, "loss": 1.0891, "step": 2629 }, { "epoch": 0.25, "grad_norm": 0.2625520463054526, "learning_rate": 0.00019687293436843926, "loss": 1.1112, "step": 2630 }, { "epoch": 0.25, "grad_norm": 0.25837520805297015, "learning_rate": 0.00019686900802668946, "loss": 0.9673, "step": 2631 }, { "epoch": 0.25, "grad_norm": 0.27460647995152365, "learning_rate": 0.00019686507926073188, "loss": 1.1495, "step": 2632 }, { "epoch": 0.25, "grad_norm": 0.26861650854795094, "learning_rate": 0.00019686114807066478, "loss": 1.0432, "step": 2633 }, { "epoch": 0.25, "grad_norm": 0.28385341495610805, "learning_rate": 0.0001968572144565866, "loss": 1.1461, "step": 2634 }, { "epoch": 0.25, "grad_norm": 0.271570918418974, "learning_rate": 0.00019685327841859572, "loss": 1.185, "step": 2635 }, { "epoch": 0.25, "grad_norm": 0.2673269371926348, "learning_rate": 0.00019684933995679074, "loss": 1.1282, "step": 2636 }, { "epoch": 0.25, "grad_norm": 0.2403632191882647, "learning_rate": 0.0001968453990712701, "loss": 1.0608, "step": 2637 }, { "epoch": 0.25, "grad_norm": 0.2586849785633008, "learning_rate": 0.00019684145576213252, "loss": 1.1015, "step": 2638 }, { "epoch": 0.25, "grad_norm": 0.2649189546870879, "learning_rate": 0.00019683751002947663, "loss": 1.1428, "step": 2639 }, { "epoch": 0.25, "grad_norm": 0.2524600423422356, "learning_rate": 0.0001968335618734012, "loss": 1.0497, "step": 2640 }, { "epoch": 0.25, "grad_norm": 0.25016994107064466, "learning_rate": 0.00019682961129400503, "loss": 1.1158, "step": 2641 }, { "epoch": 0.25, "grad_norm": 0.26421571146347217, "learning_rate": 0.000196825658291387, "loss": 1.0628, "step": 2642 }, { "epoch": 0.25, "grad_norm": 0.282173839472887, "learning_rate": 0.00019682170286564597, "loss": 0.998, "step": 2643 }, { "epoch": 0.25, "grad_norm": 0.2323812037769926, "learning_rate": 0.00019681774501688102, "loss": 1.0137, "step": 2644 }, { "epoch": 0.25, "grad_norm": 0.2901381801505397, "learning_rate": 0.0001968137847451911, "loss": 1.0866, "step": 2645 }, { "epoch": 0.25, "grad_norm": 0.26576724197777407, "learning_rate": 0.0001968098220506754, "loss": 1.0927, "step": 2646 }, { "epoch": 0.25, "grad_norm": 0.2696230704622509, "learning_rate": 0.0001968058569334331, "loss": 1.1894, "step": 2647 }, { "epoch": 0.25, "grad_norm": 0.26017036741055427, "learning_rate": 0.00019680188939356336, "loss": 0.9647, "step": 2648 }, { "epoch": 0.25, "grad_norm": 0.2677987402758377, "learning_rate": 0.0001967979194311655, "loss": 1.0862, "step": 2649 }, { "epoch": 0.25, "grad_norm": 0.28711631837028534, "learning_rate": 0.00019679394704633888, "loss": 1.2582, "step": 2650 }, { "epoch": 0.25, "grad_norm": 0.26765916334917605, "learning_rate": 0.00019678997223918288, "loss": 1.0659, "step": 2651 }, { "epoch": 0.25, "grad_norm": 0.26777059535455733, "learning_rate": 0.000196785995009797, "loss": 1.1978, "step": 2652 }, { "epoch": 0.25, "grad_norm": 0.22518302167201712, "learning_rate": 0.00019678201535828076, "loss": 1.0836, "step": 2653 }, { "epoch": 0.25, "grad_norm": 0.2639486825941676, "learning_rate": 0.00019677803328473377, "loss": 1.1509, "step": 2654 }, { "epoch": 0.25, "grad_norm": 0.23538041430872067, "learning_rate": 0.00019677404878925566, "loss": 0.9895, "step": 2655 }, { "epoch": 0.25, "grad_norm": 0.295994436523556, "learning_rate": 0.00019677006187194618, "loss": 1.0979, "step": 2656 }, { "epoch": 0.25, "grad_norm": 0.28307792530010506, "learning_rate": 0.00019676607253290508, "loss": 1.0768, "step": 2657 }, { "epoch": 0.25, "grad_norm": 0.2761519436955159, "learning_rate": 0.0001967620807722322, "loss": 1.0224, "step": 2658 }, { "epoch": 0.25, "grad_norm": 0.2879025567486724, "learning_rate": 0.00019675808659002744, "loss": 1.1083, "step": 2659 }, { "epoch": 0.25, "grad_norm": 0.3004898697546929, "learning_rate": 0.00019675408998639076, "loss": 1.1356, "step": 2660 }, { "epoch": 0.25, "grad_norm": 0.31015042588546826, "learning_rate": 0.00019675009096142214, "loss": 1.1486, "step": 2661 }, { "epoch": 0.25, "grad_norm": 0.2811558967690104, "learning_rate": 0.0001967460895152217, "loss": 1.0928, "step": 2662 }, { "epoch": 0.25, "grad_norm": 0.2633365965205071, "learning_rate": 0.00019674208564788957, "loss": 1.1153, "step": 2663 }, { "epoch": 0.25, "grad_norm": 0.2860499750304183, "learning_rate": 0.00019673807935952596, "loss": 1.1082, "step": 2664 }, { "epoch": 0.25, "grad_norm": 0.29758088608843136, "learning_rate": 0.0001967340706502311, "loss": 1.2912, "step": 2665 }, { "epoch": 0.26, "grad_norm": 0.2568418533290371, "learning_rate": 0.00019673005952010534, "loss": 1.1485, "step": 2666 }, { "epoch": 0.26, "grad_norm": 0.26096215810194917, "learning_rate": 0.00019672604596924904, "loss": 1.0748, "step": 2667 }, { "epoch": 0.26, "grad_norm": 0.28867342954233294, "learning_rate": 0.00019672202999776266, "loss": 1.0794, "step": 2668 }, { "epoch": 0.26, "grad_norm": 0.21142091669081053, "learning_rate": 0.0001967180116057467, "loss": 1.0324, "step": 2669 }, { "epoch": 0.26, "grad_norm": 0.2592661813230898, "learning_rate": 0.00019671399079330168, "loss": 1.1463, "step": 2670 }, { "epoch": 0.26, "grad_norm": 0.23960804899506707, "learning_rate": 0.00019670996756052827, "loss": 1.0908, "step": 2671 }, { "epoch": 0.26, "grad_norm": 0.24684475939341896, "learning_rate": 0.00019670594190752713, "loss": 1.036, "step": 2672 }, { "epoch": 0.26, "grad_norm": 0.28350271509951525, "learning_rate": 0.00019670191383439907, "loss": 1.0251, "step": 2673 }, { "epoch": 0.26, "grad_norm": 0.5535529462100653, "learning_rate": 0.00019669788334124476, "loss": 1.052, "step": 2674 }, { "epoch": 0.26, "grad_norm": 0.3077728064262205, "learning_rate": 0.0001966938504281652, "loss": 1.116, "step": 2675 }, { "epoch": 0.26, "grad_norm": 0.2656350026116821, "learning_rate": 0.00019668981509526128, "loss": 1.2018, "step": 2676 }, { "epoch": 0.26, "grad_norm": 0.2479927609636305, "learning_rate": 0.00019668577734263394, "loss": 1.104, "step": 2677 }, { "epoch": 0.26, "grad_norm": 0.2777215172332179, "learning_rate": 0.00019668173717038426, "loss": 1.1844, "step": 2678 }, { "epoch": 0.26, "grad_norm": 0.2829530059947927, "learning_rate": 0.00019667769457861335, "loss": 1.0842, "step": 2679 }, { "epoch": 0.26, "grad_norm": 0.2550816083760124, "learning_rate": 0.00019667364956742236, "loss": 1.1394, "step": 2680 }, { "epoch": 0.26, "grad_norm": 0.2753775575550518, "learning_rate": 0.00019666960213691255, "loss": 1.1293, "step": 2681 }, { "epoch": 0.26, "grad_norm": 0.2767717867669593, "learning_rate": 0.0001966655522871852, "loss": 1.0783, "step": 2682 }, { "epoch": 0.26, "grad_norm": 0.26993060617287373, "learning_rate": 0.00019666150001834164, "loss": 1.0235, "step": 2683 }, { "epoch": 0.26, "grad_norm": 0.2870195128020576, "learning_rate": 0.00019665744533048328, "loss": 1.0422, "step": 2684 }, { "epoch": 0.26, "grad_norm": 0.27640315301309837, "learning_rate": 0.0001966533882237116, "loss": 1.1165, "step": 2685 }, { "epoch": 0.26, "grad_norm": 0.2694095090611699, "learning_rate": 0.00019664932869812814, "loss": 1.0239, "step": 2686 }, { "epoch": 0.26, "grad_norm": 0.32106995191546245, "learning_rate": 0.0001966452667538345, "loss": 1.1492, "step": 2687 }, { "epoch": 0.26, "grad_norm": 0.27424915699699154, "learning_rate": 0.00019664120239093233, "loss": 1.1685, "step": 2688 }, { "epoch": 0.26, "grad_norm": 0.29135122016294623, "learning_rate": 0.0001966371356095233, "loss": 1.1547, "step": 2689 }, { "epoch": 0.26, "grad_norm": 0.26527031382403177, "learning_rate": 0.00019663306640970926, "loss": 1.1219, "step": 2690 }, { "epoch": 0.26, "grad_norm": 0.27127353395064424, "learning_rate": 0.00019662899479159197, "loss": 0.9899, "step": 2691 }, { "epoch": 0.26, "grad_norm": 0.3033029254082761, "learning_rate": 0.00019662492075527336, "loss": 1.1249, "step": 2692 }, { "epoch": 0.26, "grad_norm": 0.26657961186508666, "learning_rate": 0.00019662084430085538, "loss": 1.1842, "step": 2693 }, { "epoch": 0.26, "grad_norm": 0.28599030101340955, "learning_rate": 0.00019661676542844007, "loss": 1.1839, "step": 2694 }, { "epoch": 0.26, "grad_norm": 0.30087762030890813, "learning_rate": 0.00019661268413812946, "loss": 1.0863, "step": 2695 }, { "epoch": 0.26, "grad_norm": 0.325088281089176, "learning_rate": 0.00019660860043002574, "loss": 1.049, "step": 2696 }, { "epoch": 0.26, "grad_norm": 0.2828097114488758, "learning_rate": 0.00019660451430423103, "loss": 1.1857, "step": 2697 }, { "epoch": 0.26, "grad_norm": 0.2701302351874178, "learning_rate": 0.00019660042576084767, "loss": 1.1144, "step": 2698 }, { "epoch": 0.26, "grad_norm": 0.2835386842245424, "learning_rate": 0.00019659633479997794, "loss": 1.1066, "step": 2699 }, { "epoch": 0.26, "grad_norm": 0.243379840423929, "learning_rate": 0.00019659224142172424, "loss": 1.0204, "step": 2700 }, { "epoch": 0.26, "grad_norm": 0.24266059096127535, "learning_rate": 0.00019658814562618896, "loss": 1.1088, "step": 2701 }, { "epoch": 0.26, "grad_norm": 0.2617930016670921, "learning_rate": 0.00019658404741347462, "loss": 1.1113, "step": 2702 }, { "epoch": 0.26, "grad_norm": 0.2547097198503651, "learning_rate": 0.00019657994678368385, "loss": 1.0444, "step": 2703 }, { "epoch": 0.26, "grad_norm": 0.2625250549016416, "learning_rate": 0.00019657584373691917, "loss": 1.1396, "step": 2704 }, { "epoch": 0.26, "grad_norm": 0.26689206957202594, "learning_rate": 0.0001965717382732833, "loss": 1.1596, "step": 2705 }, { "epoch": 0.26, "grad_norm": 0.2903595268276037, "learning_rate": 0.000196567630392879, "loss": 1.0474, "step": 2706 }, { "epoch": 0.26, "grad_norm": 0.2572495572058499, "learning_rate": 0.00019656352009580908, "loss": 1.0761, "step": 2707 }, { "epoch": 0.26, "grad_norm": 0.2470130498530773, "learning_rate": 0.00019655940738217635, "loss": 1.0546, "step": 2708 }, { "epoch": 0.26, "grad_norm": 0.2930008567798905, "learning_rate": 0.00019655529225208378, "loss": 1.1107, "step": 2709 }, { "epoch": 0.26, "grad_norm": 0.25989997031388734, "learning_rate": 0.00019655117470563434, "loss": 1.0839, "step": 2710 }, { "epoch": 0.26, "grad_norm": 0.30098329989662703, "learning_rate": 0.00019654705474293107, "loss": 1.1056, "step": 2711 }, { "epoch": 0.26, "grad_norm": 0.2424595664908627, "learning_rate": 0.00019654293236407707, "loss": 1.0923, "step": 2712 }, { "epoch": 0.26, "grad_norm": 0.28519149075066125, "learning_rate": 0.00019653880756917552, "loss": 1.0625, "step": 2713 }, { "epoch": 0.26, "grad_norm": 0.31144165591888, "learning_rate": 0.00019653468035832965, "loss": 1.2098, "step": 2714 }, { "epoch": 0.26, "grad_norm": 0.2714581157840239, "learning_rate": 0.0001965305507316427, "loss": 1.092, "step": 2715 }, { "epoch": 0.26, "grad_norm": 0.28440142688999087, "learning_rate": 0.0001965264186892181, "loss": 1.0165, "step": 2716 }, { "epoch": 0.26, "grad_norm": 0.24262222631275346, "learning_rate": 0.00019652228423115917, "loss": 1.0364, "step": 2717 }, { "epoch": 0.26, "grad_norm": 0.2743500722654148, "learning_rate": 0.00019651814735756942, "loss": 1.0864, "step": 2718 }, { "epoch": 0.26, "grad_norm": 0.28526861621648486, "learning_rate": 0.00019651400806855237, "loss": 1.1124, "step": 2719 }, { "epoch": 0.26, "grad_norm": 0.31205779575841586, "learning_rate": 0.00019650986636421164, "loss": 1.102, "step": 2720 }, { "epoch": 0.26, "grad_norm": 0.2661120177899195, "learning_rate": 0.00019650572224465084, "loss": 1.1081, "step": 2721 }, { "epoch": 0.26, "grad_norm": 0.29878434646252183, "learning_rate": 0.00019650157570997364, "loss": 1.1004, "step": 2722 }, { "epoch": 0.26, "grad_norm": 0.2612778406780732, "learning_rate": 0.00019649742676028394, "loss": 1.2379, "step": 2723 }, { "epoch": 0.26, "grad_norm": 0.2933566914172725, "learning_rate": 0.00019649327539568543, "loss": 1.162, "step": 2724 }, { "epoch": 0.26, "grad_norm": 0.24094136996286655, "learning_rate": 0.0001964891216162821, "loss": 1.0807, "step": 2725 }, { "epoch": 0.26, "grad_norm": 0.2943459316325484, "learning_rate": 0.00019648496542217783, "loss": 0.998, "step": 2726 }, { "epoch": 0.26, "grad_norm": 0.22961741180973028, "learning_rate": 0.00019648080681347664, "loss": 1.1305, "step": 2727 }, { "epoch": 0.26, "grad_norm": 0.2701859846638287, "learning_rate": 0.00019647664579028267, "loss": 1.0889, "step": 2728 }, { "epoch": 0.26, "grad_norm": 0.26857093567006685, "learning_rate": 0.0001964724823527, "loss": 1.2013, "step": 2729 }, { "epoch": 0.26, "grad_norm": 0.31949382152840944, "learning_rate": 0.0001964683165008328, "loss": 1.1718, "step": 2730 }, { "epoch": 0.26, "grad_norm": 0.28367090867227224, "learning_rate": 0.00019646414823478535, "loss": 1.1491, "step": 2731 }, { "epoch": 0.26, "grad_norm": 0.2504701281061607, "learning_rate": 0.000196459977554662, "loss": 1.1017, "step": 2732 }, { "epoch": 0.26, "grad_norm": 0.25221421853430986, "learning_rate": 0.00019645580446056706, "loss": 1.1185, "step": 2733 }, { "epoch": 0.26, "grad_norm": 0.28143888782957394, "learning_rate": 0.000196451628952605, "loss": 1.1759, "step": 2734 }, { "epoch": 0.26, "grad_norm": 0.2578191227270372, "learning_rate": 0.00019644745103088033, "loss": 1.1787, "step": 2735 }, { "epoch": 0.26, "grad_norm": 0.26056470647250435, "learning_rate": 0.00019644327069549754, "loss": 1.1649, "step": 2736 }, { "epoch": 0.26, "grad_norm": 0.27176880852893454, "learning_rate": 0.00019643908794656135, "loss": 1.1057, "step": 2737 }, { "epoch": 0.26, "grad_norm": 0.2882235242635738, "learning_rate": 0.00019643490278417632, "loss": 1.1081, "step": 2738 }, { "epoch": 0.26, "grad_norm": 0.27263458978440536, "learning_rate": 0.00019643071520844725, "loss": 1.0712, "step": 2739 }, { "epoch": 0.26, "grad_norm": 0.25866965428198324, "learning_rate": 0.00019642652521947894, "loss": 1.1159, "step": 2740 }, { "epoch": 0.26, "grad_norm": 0.2630902327301495, "learning_rate": 0.00019642233281737625, "loss": 1.0603, "step": 2741 }, { "epoch": 0.26, "grad_norm": 0.25820714973330133, "learning_rate": 0.00019641813800224406, "loss": 0.9922, "step": 2742 }, { "epoch": 0.26, "grad_norm": 0.2724772043679332, "learning_rate": 0.00019641394077418736, "loss": 1.0461, "step": 2743 }, { "epoch": 0.26, "grad_norm": 0.27088818855216434, "learning_rate": 0.00019640974113331123, "loss": 1.1959, "step": 2744 }, { "epoch": 0.26, "grad_norm": 0.2733607733632018, "learning_rate": 0.00019640553907972072, "loss": 1.1886, "step": 2745 }, { "epoch": 0.26, "grad_norm": 0.24676149904011196, "learning_rate": 0.000196401334613521, "loss": 1.0969, "step": 2746 }, { "epoch": 0.26, "grad_norm": 0.27834584403561474, "learning_rate": 0.00019639712773481728, "loss": 1.1597, "step": 2747 }, { "epoch": 0.26, "grad_norm": 0.25755736145056124, "learning_rate": 0.0001963929184437149, "loss": 0.9978, "step": 2748 }, { "epoch": 0.26, "grad_norm": 0.31079398376305944, "learning_rate": 0.00019638870674031913, "loss": 1.1268, "step": 2749 }, { "epoch": 0.26, "grad_norm": 0.2774704968716273, "learning_rate": 0.0001963844926247354, "loss": 1.1641, "step": 2750 }, { "epoch": 0.26, "grad_norm": 0.2781649290866802, "learning_rate": 0.00019638027609706916, "loss": 1.0707, "step": 2751 }, { "epoch": 0.26, "grad_norm": 0.2552137884918176, "learning_rate": 0.00019637605715742593, "loss": 1.2287, "step": 2752 }, { "epoch": 0.26, "grad_norm": 0.2599068780978502, "learning_rate": 0.00019637183580591133, "loss": 1.0331, "step": 2753 }, { "epoch": 0.26, "grad_norm": 0.27220701541938597, "learning_rate": 0.00019636761204263093, "loss": 1.0395, "step": 2754 }, { "epoch": 0.26, "grad_norm": 0.24397827889832657, "learning_rate": 0.0001963633858676905, "loss": 1.1166, "step": 2755 }, { "epoch": 0.26, "grad_norm": 0.2497024411579182, "learning_rate": 0.00019635915728119575, "loss": 1.0847, "step": 2756 }, { "epoch": 0.26, "grad_norm": 0.2589117765444782, "learning_rate": 0.00019635492628325256, "loss": 1.0086, "step": 2757 }, { "epoch": 0.26, "grad_norm": 0.29313802184742693, "learning_rate": 0.00019635069287396678, "loss": 1.1229, "step": 2758 }, { "epoch": 0.26, "grad_norm": 0.2618259520734273, "learning_rate": 0.00019634645705344435, "loss": 1.1459, "step": 2759 }, { "epoch": 0.26, "grad_norm": 0.2454773691105509, "learning_rate": 0.0001963422188217913, "loss": 0.968, "step": 2760 }, { "epoch": 0.26, "grad_norm": 0.28372490824875446, "learning_rate": 0.00019633797817911365, "loss": 1.1127, "step": 2761 }, { "epoch": 0.26, "grad_norm": 0.3273752692424284, "learning_rate": 0.00019633373512551754, "loss": 1.1669, "step": 2762 }, { "epoch": 0.26, "grad_norm": 0.2366079234217832, "learning_rate": 0.0001963294896611092, "loss": 1.0848, "step": 2763 }, { "epoch": 0.26, "grad_norm": 0.5261995752384835, "learning_rate": 0.00019632524178599483, "loss": 1.1333, "step": 2764 }, { "epoch": 0.26, "grad_norm": 0.26665251541131973, "learning_rate": 0.00019632099150028074, "loss": 1.0651, "step": 2765 }, { "epoch": 0.26, "grad_norm": 0.2584540431624957, "learning_rate": 0.0001963167388040733, "loss": 1.107, "step": 2766 }, { "epoch": 0.26, "grad_norm": 0.2634462477386324, "learning_rate": 0.00019631248369747893, "loss": 1.124, "step": 2767 }, { "epoch": 0.26, "grad_norm": 0.29816863160086016, "learning_rate": 0.00019630822618060413, "loss": 1.1173, "step": 2768 }, { "epoch": 0.26, "grad_norm": 0.25963330208582736, "learning_rate": 0.00019630396625355546, "loss": 1.0285, "step": 2769 }, { "epoch": 0.27, "grad_norm": 0.2887065947096526, "learning_rate": 0.00019629970391643947, "loss": 1.1987, "step": 2770 }, { "epoch": 0.27, "grad_norm": 0.27628742653188076, "learning_rate": 0.0001962954391693629, "loss": 1.067, "step": 2771 }, { "epoch": 0.27, "grad_norm": 0.2707072531440452, "learning_rate": 0.00019629117201243242, "loss": 0.9484, "step": 2772 }, { "epoch": 0.27, "grad_norm": 0.2973518749444581, "learning_rate": 0.0001962869024457549, "loss": 1.0383, "step": 2773 }, { "epoch": 0.27, "grad_norm": 0.26365187418455077, "learning_rate": 0.0001962826304694371, "loss": 1.0947, "step": 2774 }, { "epoch": 0.27, "grad_norm": 0.2591589967435732, "learning_rate": 0.00019627835608358596, "loss": 1.07, "step": 2775 }, { "epoch": 0.27, "grad_norm": 0.26492792550338157, "learning_rate": 0.00019627407928830842, "loss": 1.1614, "step": 2776 }, { "epoch": 0.27, "grad_norm": 0.25376976035220183, "learning_rate": 0.00019626980008371158, "loss": 1.0263, "step": 2777 }, { "epoch": 0.27, "grad_norm": 0.23447258204438867, "learning_rate": 0.0001962655184699025, "loss": 1.1122, "step": 2778 }, { "epoch": 0.27, "grad_norm": 0.2562093782282185, "learning_rate": 0.00019626123444698828, "loss": 1.0457, "step": 2779 }, { "epoch": 0.27, "grad_norm": 0.2723192435370688, "learning_rate": 0.00019625694801507618, "loss": 1.0636, "step": 2780 }, { "epoch": 0.27, "grad_norm": 0.29084143851467165, "learning_rate": 0.00019625265917427346, "loss": 1.1492, "step": 2781 }, { "epoch": 0.27, "grad_norm": 0.2759691568719512, "learning_rate": 0.00019624836792468746, "loss": 1.0947, "step": 2782 }, { "epoch": 0.27, "grad_norm": 0.2847571657532783, "learning_rate": 0.00019624407426642557, "loss": 1.0998, "step": 2783 }, { "epoch": 0.27, "grad_norm": 0.2821857453688257, "learning_rate": 0.00019623977819959522, "loss": 1.0525, "step": 2784 }, { "epoch": 0.27, "grad_norm": 0.2710683595054374, "learning_rate": 0.00019623547972430394, "loss": 1.1189, "step": 2785 }, { "epoch": 0.27, "grad_norm": 0.2650861466646489, "learning_rate": 0.00019623117884065932, "loss": 1.0532, "step": 2786 }, { "epoch": 0.27, "grad_norm": 0.28632313517563235, "learning_rate": 0.00019622687554876893, "loss": 1.0432, "step": 2787 }, { "epoch": 0.27, "grad_norm": 0.2927818066954993, "learning_rate": 0.00019622256984874053, "loss": 1.0847, "step": 2788 }, { "epoch": 0.27, "grad_norm": 0.2631443348972877, "learning_rate": 0.00019621826174068185, "loss": 1.1038, "step": 2789 }, { "epoch": 0.27, "grad_norm": 0.26694650397049624, "learning_rate": 0.00019621395122470066, "loss": 0.9954, "step": 2790 }, { "epoch": 0.27, "grad_norm": 0.27909721930965364, "learning_rate": 0.00019620963830090492, "loss": 0.9486, "step": 2791 }, { "epoch": 0.27, "grad_norm": 0.24079022570344805, "learning_rate": 0.0001962053229694025, "loss": 0.9676, "step": 2792 }, { "epoch": 0.27, "grad_norm": 0.2559668456069455, "learning_rate": 0.0001962010052303014, "loss": 1.1104, "step": 2793 }, { "epoch": 0.27, "grad_norm": 0.25016354937216784, "learning_rate": 0.0001961966850837097, "loss": 1.1744, "step": 2794 }, { "epoch": 0.27, "grad_norm": 0.25412464453761213, "learning_rate": 0.0001961923625297355, "loss": 1.1563, "step": 2795 }, { "epoch": 0.27, "grad_norm": 0.25700418834836664, "learning_rate": 0.00019618803756848695, "loss": 1.155, "step": 2796 }, { "epoch": 0.27, "grad_norm": 0.29832287781586436, "learning_rate": 0.0001961837102000723, "loss": 1.0929, "step": 2797 }, { "epoch": 0.27, "grad_norm": 0.24969396689469361, "learning_rate": 0.00019617938042459988, "loss": 1.0692, "step": 2798 }, { "epoch": 0.27, "grad_norm": 0.27702787637233217, "learning_rate": 0.00019617504824217803, "loss": 1.0812, "step": 2799 }, { "epoch": 0.27, "grad_norm": 0.2900431406784246, "learning_rate": 0.00019617071365291512, "loss": 1.1213, "step": 2800 }, { "epoch": 0.27, "grad_norm": 0.2650773606437207, "learning_rate": 0.0001961663766569197, "loss": 1.0438, "step": 2801 }, { "epoch": 0.27, "grad_norm": 0.23346920952561745, "learning_rate": 0.00019616203725430023, "loss": 0.9972, "step": 2802 }, { "epoch": 0.27, "grad_norm": 0.2739900839005771, "learning_rate": 0.00019615769544516532, "loss": 1.0435, "step": 2803 }, { "epoch": 0.27, "grad_norm": 0.3190423287534823, "learning_rate": 0.00019615335122962372, "loss": 1.1342, "step": 2804 }, { "epoch": 0.27, "grad_norm": 0.2830709781599123, "learning_rate": 0.00019614900460778403, "loss": 1.0853, "step": 2805 }, { "epoch": 0.27, "grad_norm": 0.26452593616904946, "learning_rate": 0.00019614465557975507, "loss": 1.1323, "step": 2806 }, { "epoch": 0.27, "grad_norm": 0.2592995682321865, "learning_rate": 0.00019614030414564568, "loss": 1.068, "step": 2807 }, { "epoch": 0.27, "grad_norm": 0.2827474115219191, "learning_rate": 0.00019613595030556477, "loss": 1.1488, "step": 2808 }, { "epoch": 0.27, "grad_norm": 0.2649963078693095, "learning_rate": 0.0001961315940596213, "loss": 0.996, "step": 2809 }, { "epoch": 0.27, "grad_norm": 0.26346285838705263, "learning_rate": 0.00019612723540792426, "loss": 1.1175, "step": 2810 }, { "epoch": 0.27, "grad_norm": 0.28954499176521176, "learning_rate": 0.00019612287435058273, "loss": 1.1733, "step": 2811 }, { "epoch": 0.27, "grad_norm": 0.2761795119898941, "learning_rate": 0.00019611851088770585, "loss": 1.1321, "step": 2812 }, { "epoch": 0.27, "grad_norm": 0.277378324643611, "learning_rate": 0.00019611414501940284, "loss": 1.291, "step": 2813 }, { "epoch": 0.27, "grad_norm": 0.29898305586569235, "learning_rate": 0.00019610977674578296, "loss": 1.1056, "step": 2814 }, { "epoch": 0.27, "grad_norm": 0.2728332232087226, "learning_rate": 0.00019610540606695547, "loss": 1.1051, "step": 2815 }, { "epoch": 0.27, "grad_norm": 0.3020216145302391, "learning_rate": 0.0001961010329830298, "loss": 0.9909, "step": 2816 }, { "epoch": 0.27, "grad_norm": 0.2614179979610706, "learning_rate": 0.00019609665749411543, "loss": 1.0927, "step": 2817 }, { "epoch": 0.27, "grad_norm": 0.2486123416932773, "learning_rate": 0.00019609227960032177, "loss": 1.1096, "step": 2818 }, { "epoch": 0.27, "grad_norm": 0.31135834548170077, "learning_rate": 0.00019608789930175845, "loss": 1.1178, "step": 2819 }, { "epoch": 0.27, "grad_norm": 0.277755259399668, "learning_rate": 0.00019608351659853503, "loss": 1.1473, "step": 2820 }, { "epoch": 0.27, "grad_norm": 0.2584745367908558, "learning_rate": 0.00019607913149076125, "loss": 1.0122, "step": 2821 }, { "epoch": 0.27, "grad_norm": 0.2619607556881495, "learning_rate": 0.0001960747439785468, "loss": 1.128, "step": 2822 }, { "epoch": 0.27, "grad_norm": 0.29050519695473265, "learning_rate": 0.00019607035406200152, "loss": 1.0491, "step": 2823 }, { "epoch": 0.27, "grad_norm": 0.21812110204389035, "learning_rate": 0.00019606596174123525, "loss": 1.1075, "step": 2824 }, { "epoch": 0.27, "grad_norm": 0.2839251249015717, "learning_rate": 0.00019606156701635792, "loss": 1.1837, "step": 2825 }, { "epoch": 0.27, "grad_norm": 0.2958308386052112, "learning_rate": 0.0001960571698874795, "loss": 1.0984, "step": 2826 }, { "epoch": 0.27, "grad_norm": 0.2671536081657146, "learning_rate": 0.00019605277035470998, "loss": 1.0137, "step": 2827 }, { "epoch": 0.27, "grad_norm": 0.260771384058964, "learning_rate": 0.00019604836841815958, "loss": 1.0675, "step": 2828 }, { "epoch": 0.27, "grad_norm": 0.2615419961091573, "learning_rate": 0.00019604396407793835, "loss": 1.055, "step": 2829 }, { "epoch": 0.27, "grad_norm": 0.27494047722891274, "learning_rate": 0.0001960395573341566, "loss": 1.1479, "step": 2830 }, { "epoch": 0.27, "grad_norm": 0.3121477873782516, "learning_rate": 0.00019603514818692454, "loss": 1.0033, "step": 2831 }, { "epoch": 0.27, "grad_norm": 0.26737638033087213, "learning_rate": 0.00019603073663635256, "loss": 1.1077, "step": 2832 }, { "epoch": 0.27, "grad_norm": 0.24631307751232157, "learning_rate": 0.00019602632268255103, "loss": 1.0545, "step": 2833 }, { "epoch": 0.27, "grad_norm": 0.3013621704790624, "learning_rate": 0.00019602190632563043, "loss": 1.0969, "step": 2834 }, { "epoch": 0.27, "grad_norm": 0.2783637530154318, "learning_rate": 0.00019601748756570126, "loss": 1.0622, "step": 2835 }, { "epoch": 0.27, "grad_norm": 0.2574957740041804, "learning_rate": 0.00019601306640287415, "loss": 1.101, "step": 2836 }, { "epoch": 0.27, "grad_norm": 0.26247517047321034, "learning_rate": 0.00019600864283725967, "loss": 1.0651, "step": 2837 }, { "epoch": 0.27, "grad_norm": 0.25876279601882096, "learning_rate": 0.0001960042168689686, "loss": 1.062, "step": 2838 }, { "epoch": 0.27, "grad_norm": 0.25523429862063185, "learning_rate": 0.00019599978849811164, "loss": 1.1267, "step": 2839 }, { "epoch": 0.27, "grad_norm": 0.23537001704505256, "learning_rate": 0.00019599535772479968, "loss": 1.0823, "step": 2840 }, { "epoch": 0.27, "grad_norm": 0.2462574908056951, "learning_rate": 0.00019599092454914351, "loss": 1.0301, "step": 2841 }, { "epoch": 0.27, "grad_norm": 0.2721311709805547, "learning_rate": 0.00019598648897125416, "loss": 1.067, "step": 2842 }, { "epoch": 0.27, "grad_norm": 0.3069559448268241, "learning_rate": 0.0001959820509912426, "loss": 1.013, "step": 2843 }, { "epoch": 0.27, "grad_norm": 0.2702912452683172, "learning_rate": 0.00019597761060921985, "loss": 1.1137, "step": 2844 }, { "epoch": 0.27, "grad_norm": 0.2797213146710235, "learning_rate": 0.00019597316782529715, "loss": 1.0939, "step": 2845 }, { "epoch": 0.27, "grad_norm": 0.2787776466517135, "learning_rate": 0.00019596872263958552, "loss": 1.1058, "step": 2846 }, { "epoch": 0.27, "grad_norm": 0.26116233626175234, "learning_rate": 0.00019596427505219635, "loss": 1.0094, "step": 2847 }, { "epoch": 0.27, "grad_norm": 0.2693281423942913, "learning_rate": 0.0001959598250632409, "loss": 1.1635, "step": 2848 }, { "epoch": 0.27, "grad_norm": 0.2506743951314745, "learning_rate": 0.00019595537267283047, "loss": 1.1075, "step": 2849 }, { "epoch": 0.27, "grad_norm": 0.24106080467021557, "learning_rate": 0.00019595091788107656, "loss": 0.9761, "step": 2850 }, { "epoch": 0.27, "grad_norm": 0.2860007338075784, "learning_rate": 0.0001959464606880906, "loss": 1.0474, "step": 2851 }, { "epoch": 0.27, "grad_norm": 0.2359433024977078, "learning_rate": 0.00019594200109398417, "loss": 1.0804, "step": 2852 }, { "epoch": 0.27, "grad_norm": 0.24715768704519478, "learning_rate": 0.0001959375390988689, "loss": 1.1157, "step": 2853 }, { "epoch": 0.27, "grad_norm": 0.2586351713329031, "learning_rate": 0.0001959330747028564, "loss": 1.0448, "step": 2854 }, { "epoch": 0.27, "grad_norm": 0.25499580779707726, "learning_rate": 0.00019592860790605842, "loss": 1.1657, "step": 2855 }, { "epoch": 0.27, "grad_norm": 0.29734674634840746, "learning_rate": 0.0001959241387085867, "loss": 1.1446, "step": 2856 }, { "epoch": 0.27, "grad_norm": 0.2647465723279589, "learning_rate": 0.00019591966711055315, "loss": 1.1668, "step": 2857 }, { "epoch": 0.27, "grad_norm": 0.26368966136330935, "learning_rate": 0.00019591519311206964, "loss": 1.1992, "step": 2858 }, { "epoch": 0.27, "grad_norm": 0.3126849078518487, "learning_rate": 0.00019591071671324817, "loss": 1.141, "step": 2859 }, { "epoch": 0.27, "grad_norm": 0.25583665345229606, "learning_rate": 0.00019590623791420071, "loss": 1.1441, "step": 2860 }, { "epoch": 0.27, "grad_norm": 0.23927224607942188, "learning_rate": 0.00019590175671503938, "loss": 1.1212, "step": 2861 }, { "epoch": 0.27, "grad_norm": 0.28570558886340036, "learning_rate": 0.00019589727311587632, "loss": 1.0623, "step": 2862 }, { "epoch": 0.27, "grad_norm": 0.23141130803687024, "learning_rate": 0.00019589278711682373, "loss": 1.1051, "step": 2863 }, { "epoch": 0.27, "grad_norm": 0.31126189932366843, "learning_rate": 0.00019588829871799388, "loss": 1.098, "step": 2864 }, { "epoch": 0.27, "grad_norm": 0.2368691173217574, "learning_rate": 0.00019588380791949906, "loss": 0.937, "step": 2865 }, { "epoch": 0.27, "grad_norm": 0.2905469407114906, "learning_rate": 0.0001958793147214517, "loss": 1.1837, "step": 2866 }, { "epoch": 0.27, "grad_norm": 0.25681129260211033, "learning_rate": 0.00019587481912396426, "loss": 1.0659, "step": 2867 }, { "epoch": 0.27, "grad_norm": 0.3115969168588661, "learning_rate": 0.0001958703211271492, "loss": 1.1245, "step": 2868 }, { "epoch": 0.27, "grad_norm": 0.2629500967052983, "learning_rate": 0.0001958658207311191, "loss": 1.0451, "step": 2869 }, { "epoch": 0.27, "grad_norm": 0.26897588144338785, "learning_rate": 0.0001958613179359866, "loss": 1.0232, "step": 2870 }, { "epoch": 0.27, "grad_norm": 0.2745821063034373, "learning_rate": 0.00019585681274186434, "loss": 1.1058, "step": 2871 }, { "epoch": 0.27, "grad_norm": 0.28134411874328125, "learning_rate": 0.00019585230514886513, "loss": 1.0646, "step": 2872 }, { "epoch": 0.27, "grad_norm": 0.2739086818209478, "learning_rate": 0.0001958477951571017, "loss": 1.0774, "step": 2873 }, { "epoch": 0.27, "grad_norm": 0.24039032053442347, "learning_rate": 0.000195843282766687, "loss": 1.1367, "step": 2874 }, { "epoch": 0.28, "grad_norm": 0.2729224048445481, "learning_rate": 0.00019583876797773391, "loss": 1.0894, "step": 2875 }, { "epoch": 0.28, "grad_norm": 0.2548563216514712, "learning_rate": 0.0001958342507903554, "loss": 1.1571, "step": 2876 }, { "epoch": 0.28, "grad_norm": 0.2611894266017868, "learning_rate": 0.00019582973120466454, "loss": 1.1219, "step": 2877 }, { "epoch": 0.28, "grad_norm": 0.2517135134023402, "learning_rate": 0.00019582520922077444, "loss": 1.1457, "step": 2878 }, { "epoch": 0.28, "grad_norm": 0.25685092446562596, "learning_rate": 0.00019582068483879822, "loss": 1.0947, "step": 2879 }, { "epoch": 0.28, "grad_norm": 0.2531800123875422, "learning_rate": 0.00019581615805884918, "loss": 1.0824, "step": 2880 }, { "epoch": 0.28, "grad_norm": 0.29746440693794585, "learning_rate": 0.00019581162888104056, "loss": 1.127, "step": 2881 }, { "epoch": 0.28, "grad_norm": 0.2951013215212664, "learning_rate": 0.0001958070973054857, "loss": 0.9708, "step": 2882 }, { "epoch": 0.28, "grad_norm": 0.26703604465532904, "learning_rate": 0.00019580256333229804, "loss": 1.0996, "step": 2883 }, { "epoch": 0.28, "grad_norm": 0.3054149559628321, "learning_rate": 0.00019579802696159098, "loss": 1.1972, "step": 2884 }, { "epoch": 0.28, "grad_norm": 0.2554844575267877, "learning_rate": 0.00019579348819347814, "loss": 1.1815, "step": 2885 }, { "epoch": 0.28, "grad_norm": 0.27725302476518116, "learning_rate": 0.00019578894702807303, "loss": 1.1779, "step": 2886 }, { "epoch": 0.28, "grad_norm": 0.2812121223614431, "learning_rate": 0.0001957844034654893, "loss": 1.0931, "step": 2887 }, { "epoch": 0.28, "grad_norm": 0.28657879183155893, "learning_rate": 0.0001957798575058407, "loss": 1.209, "step": 2888 }, { "epoch": 0.28, "grad_norm": 0.32607385402952277, "learning_rate": 0.00019577530914924096, "loss": 1.083, "step": 2889 }, { "epoch": 0.28, "grad_norm": 0.2622053283912726, "learning_rate": 0.00019577075839580395, "loss": 1.1331, "step": 2890 }, { "epoch": 0.28, "grad_norm": 0.27330452773103864, "learning_rate": 0.00019576620524564347, "loss": 1.1223, "step": 2891 }, { "epoch": 0.28, "grad_norm": 0.28722568397660336, "learning_rate": 0.00019576164969887353, "loss": 1.0848, "step": 2892 }, { "epoch": 0.28, "grad_norm": 0.25111702949361947, "learning_rate": 0.00019575709175560815, "loss": 1.1015, "step": 2893 }, { "epoch": 0.28, "grad_norm": 0.27784956399299426, "learning_rate": 0.00019575253141596136, "loss": 1.0712, "step": 2894 }, { "epoch": 0.28, "grad_norm": 0.29027995612195606, "learning_rate": 0.00019574796868004728, "loss": 1.0522, "step": 2895 }, { "epoch": 0.28, "grad_norm": 0.2662180849874586, "learning_rate": 0.00019574340354798012, "loss": 1.0711, "step": 2896 }, { "epoch": 0.28, "grad_norm": 0.272996240282476, "learning_rate": 0.00019573883601987409, "loss": 1.1081, "step": 2897 }, { "epoch": 0.28, "grad_norm": 0.2500926481674787, "learning_rate": 0.00019573426609584353, "loss": 1.0818, "step": 2898 }, { "epoch": 0.28, "grad_norm": 0.2690599907288768, "learning_rate": 0.00019572969377600278, "loss": 1.1512, "step": 2899 }, { "epoch": 0.28, "grad_norm": 0.26895706721452967, "learning_rate": 0.00019572511906046632, "loss": 1.106, "step": 2900 }, { "epoch": 0.28, "grad_norm": 0.28870985265507426, "learning_rate": 0.00019572054194934855, "loss": 1.0406, "step": 2901 }, { "epoch": 0.28, "grad_norm": 0.2569564435809099, "learning_rate": 0.00019571596244276408, "loss": 1.1162, "step": 2902 }, { "epoch": 0.28, "grad_norm": 0.26799107456705956, "learning_rate": 0.0001957113805408275, "loss": 1.0301, "step": 2903 }, { "epoch": 0.28, "grad_norm": 0.2476133822775531, "learning_rate": 0.00019570679624365348, "loss": 1.1889, "step": 2904 }, { "epoch": 0.28, "grad_norm": 0.28688265263367885, "learning_rate": 0.00019570220955135673, "loss": 1.0879, "step": 2905 }, { "epoch": 0.28, "grad_norm": 0.279647668676218, "learning_rate": 0.000195697620464052, "loss": 1.1741, "step": 2906 }, { "epoch": 0.28, "grad_norm": 0.26565186704575483, "learning_rate": 0.0001956930289818542, "loss": 1.0533, "step": 2907 }, { "epoch": 0.28, "grad_norm": 0.27848478850209946, "learning_rate": 0.00019568843510487822, "loss": 1.0685, "step": 2908 }, { "epoch": 0.28, "grad_norm": 0.27824963257277385, "learning_rate": 0.00019568383883323902, "loss": 1.154, "step": 2909 }, { "epoch": 0.28, "grad_norm": 0.2889721158688345, "learning_rate": 0.0001956792401670516, "loss": 1.0229, "step": 2910 }, { "epoch": 0.28, "grad_norm": 0.2433224132508536, "learning_rate": 0.00019567463910643106, "loss": 1.0934, "step": 2911 }, { "epoch": 0.28, "grad_norm": 0.2558852060337151, "learning_rate": 0.00019567003565149256, "loss": 1.035, "step": 2912 }, { "epoch": 0.28, "grad_norm": 0.25619363373049336, "learning_rate": 0.0001956654298023513, "loss": 1.1415, "step": 2913 }, { "epoch": 0.28, "grad_norm": 0.2913829589793719, "learning_rate": 0.0001956608215591225, "loss": 1.0902, "step": 2914 }, { "epoch": 0.28, "grad_norm": 0.25636509940637525, "learning_rate": 0.00019565621092192156, "loss": 1.0804, "step": 2915 }, { "epoch": 0.28, "grad_norm": 0.2911863913856473, "learning_rate": 0.00019565159789086377, "loss": 1.0234, "step": 2916 }, { "epoch": 0.28, "grad_norm": 0.3098448701698118, "learning_rate": 0.00019564698246606467, "loss": 1.018, "step": 2917 }, { "epoch": 0.28, "grad_norm": 0.28894296908926365, "learning_rate": 0.00019564236464763971, "loss": 1.0444, "step": 2918 }, { "epoch": 0.28, "grad_norm": 0.2969290728405071, "learning_rate": 0.00019563774443570448, "loss": 1.0826, "step": 2919 }, { "epoch": 0.28, "grad_norm": 0.26737166147445435, "learning_rate": 0.00019563312183037458, "loss": 1.1668, "step": 2920 }, { "epoch": 0.28, "grad_norm": 0.28206692002567496, "learning_rate": 0.0001956284968317657, "loss": 1.0922, "step": 2921 }, { "epoch": 0.28, "grad_norm": 0.24812211567721631, "learning_rate": 0.0001956238694399936, "loss": 1.1432, "step": 2922 }, { "epoch": 0.28, "grad_norm": 0.2599840736277116, "learning_rate": 0.00019561923965517405, "loss": 1.0521, "step": 2923 }, { "epoch": 0.28, "grad_norm": 0.2543940271199818, "learning_rate": 0.00019561460747742295, "loss": 1.1435, "step": 2924 }, { "epoch": 0.28, "grad_norm": 0.27672619084132033, "learning_rate": 0.0001956099729068562, "loss": 1.0804, "step": 2925 }, { "epoch": 0.28, "grad_norm": 0.257274251896411, "learning_rate": 0.0001956053359435898, "loss": 1.1605, "step": 2926 }, { "epoch": 0.28, "grad_norm": 0.2700086089277536, "learning_rate": 0.00019560069658773976, "loss": 1.1006, "step": 2927 }, { "epoch": 0.28, "grad_norm": 0.2729066392922699, "learning_rate": 0.00019559605483942223, "loss": 0.9848, "step": 2928 }, { "epoch": 0.28, "grad_norm": 0.2819343853836847, "learning_rate": 0.0001955914106987533, "loss": 1.0987, "step": 2929 }, { "epoch": 0.28, "grad_norm": 0.28151027604586293, "learning_rate": 0.00019558676416584929, "loss": 1.1282, "step": 2930 }, { "epoch": 0.28, "grad_norm": 0.26677614820796297, "learning_rate": 0.0001955821152408264, "loss": 1.1034, "step": 2931 }, { "epoch": 0.28, "grad_norm": 0.3059666303412851, "learning_rate": 0.00019557746392380104, "loss": 1.1612, "step": 2932 }, { "epoch": 0.28, "grad_norm": 0.2819684123621462, "learning_rate": 0.00019557281021488957, "loss": 1.0681, "step": 2933 }, { "epoch": 0.28, "grad_norm": 0.23809460047261669, "learning_rate": 0.00019556815411420842, "loss": 1.0539, "step": 2934 }, { "epoch": 0.28, "grad_norm": 0.26195198259872626, "learning_rate": 0.0001955634956218742, "loss": 1.0663, "step": 2935 }, { "epoch": 0.28, "grad_norm": 0.2776958285854396, "learning_rate": 0.00019555883473800344, "loss": 1.2042, "step": 2936 }, { "epoch": 0.28, "grad_norm": 0.2776878056257854, "learning_rate": 0.00019555417146271275, "loss": 1.0723, "step": 2937 }, { "epoch": 0.28, "grad_norm": 0.22289840395374008, "learning_rate": 0.00019554950579611888, "loss": 1.101, "step": 2938 }, { "epoch": 0.28, "grad_norm": 0.2765820638955678, "learning_rate": 0.00019554483773833855, "loss": 1.1081, "step": 2939 }, { "epoch": 0.28, "grad_norm": 0.313800298123316, "learning_rate": 0.00019554016728948865, "loss": 1.1322, "step": 2940 }, { "epoch": 0.28, "grad_norm": 0.25205600427022146, "learning_rate": 0.00019553549444968602, "loss": 1.0419, "step": 2941 }, { "epoch": 0.28, "grad_norm": 0.271795514200843, "learning_rate": 0.00019553081921904757, "loss": 1.1375, "step": 2942 }, { "epoch": 0.28, "grad_norm": 0.28101236125294443, "learning_rate": 0.00019552614159769034, "loss": 1.0403, "step": 2943 }, { "epoch": 0.28, "grad_norm": 0.25207664771282795, "learning_rate": 0.0001955214615857314, "loss": 1.0785, "step": 2944 }, { "epoch": 0.28, "grad_norm": 0.27380543511172994, "learning_rate": 0.00019551677918328784, "loss": 1.1187, "step": 2945 }, { "epoch": 0.28, "grad_norm": 0.2732262776480482, "learning_rate": 0.00019551209439047683, "loss": 1.1426, "step": 2946 }, { "epoch": 0.28, "grad_norm": 0.25773244127240097, "learning_rate": 0.00019550740720741564, "loss": 1.019, "step": 2947 }, { "epoch": 0.28, "grad_norm": 0.2649249368780316, "learning_rate": 0.0001955027176342216, "loss": 1.0553, "step": 2948 }, { "epoch": 0.28, "grad_norm": 0.2778435224973774, "learning_rate": 0.00019549802567101198, "loss": 1.1484, "step": 2949 }, { "epoch": 0.28, "grad_norm": 0.2835832433902629, "learning_rate": 0.00019549333131790427, "loss": 1.1626, "step": 2950 }, { "epoch": 0.28, "grad_norm": 0.2684901729578864, "learning_rate": 0.00019548863457501592, "loss": 1.0469, "step": 2951 }, { "epoch": 0.28, "grad_norm": 0.2917738619766324, "learning_rate": 0.0001954839354424645, "loss": 1.0706, "step": 2952 }, { "epoch": 0.28, "grad_norm": 0.2719180117243129, "learning_rate": 0.00019547923392036756, "loss": 1.069, "step": 2953 }, { "epoch": 0.28, "grad_norm": 0.32365658454747653, "learning_rate": 0.00019547453000884278, "loss": 1.2248, "step": 2954 }, { "epoch": 0.28, "grad_norm": 0.22919570649820376, "learning_rate": 0.0001954698237080079, "loss": 0.9762, "step": 2955 }, { "epoch": 0.28, "grad_norm": 0.27049554069580856, "learning_rate": 0.00019546511501798068, "loss": 1.0445, "step": 2956 }, { "epoch": 0.28, "grad_norm": 0.2321876330280462, "learning_rate": 0.00019546040393887896, "loss": 0.9582, "step": 2957 }, { "epoch": 0.28, "grad_norm": 0.2866826620321833, "learning_rate": 0.00019545569047082063, "loss": 1.0803, "step": 2958 }, { "epoch": 0.28, "grad_norm": 0.2513540583580118, "learning_rate": 0.00019545097461392364, "loss": 1.083, "step": 2959 }, { "epoch": 0.28, "grad_norm": 0.26654262390528605, "learning_rate": 0.00019544625636830606, "loss": 1.0319, "step": 2960 }, { "epoch": 0.28, "grad_norm": 0.2413056145935159, "learning_rate": 0.00019544153573408592, "loss": 1.158, "step": 2961 }, { "epoch": 0.28, "grad_norm": 0.26314675374807356, "learning_rate": 0.00019543681271138135, "loss": 1.1581, "step": 2962 }, { "epoch": 0.28, "grad_norm": 0.302120035744808, "learning_rate": 0.00019543208730031056, "loss": 1.0621, "step": 2963 }, { "epoch": 0.28, "grad_norm": 0.2784231038369151, "learning_rate": 0.0001954273595009918, "loss": 1.1521, "step": 2964 }, { "epoch": 0.28, "grad_norm": 0.3301489966327534, "learning_rate": 0.00019542262931354342, "loss": 1.1991, "step": 2965 }, { "epoch": 0.28, "grad_norm": 0.2625920950709, "learning_rate": 0.00019541789673808378, "loss": 1.1439, "step": 2966 }, { "epoch": 0.28, "grad_norm": 0.2799876119710994, "learning_rate": 0.00019541316177473127, "loss": 1.2343, "step": 2967 }, { "epoch": 0.28, "grad_norm": 0.25861702387999425, "learning_rate": 0.00019540842442360444, "loss": 1.0334, "step": 2968 }, { "epoch": 0.28, "grad_norm": 0.26861453164120885, "learning_rate": 0.00019540368468482183, "loss": 1.0876, "step": 2969 }, { "epoch": 0.28, "grad_norm": 0.2790820406297911, "learning_rate": 0.00019539894255850203, "loss": 1.2192, "step": 2970 }, { "epoch": 0.28, "grad_norm": 0.26958726146743006, "learning_rate": 0.00019539419804476377, "loss": 1.071, "step": 2971 }, { "epoch": 0.28, "grad_norm": 0.2677639222097805, "learning_rate": 0.00019538945114372573, "loss": 1.2223, "step": 2972 }, { "epoch": 0.28, "grad_norm": 0.26245494649962503, "learning_rate": 0.00019538470185550674, "loss": 1.105, "step": 2973 }, { "epoch": 0.28, "grad_norm": 0.2693830042475195, "learning_rate": 0.00019537995018022563, "loss": 1.1118, "step": 2974 }, { "epoch": 0.28, "grad_norm": 0.24631907921671092, "learning_rate": 0.0001953751961180013, "loss": 1.0035, "step": 2975 }, { "epoch": 0.28, "grad_norm": 0.30613365197670445, "learning_rate": 0.00019537043966895277, "loss": 1.0775, "step": 2976 }, { "epoch": 0.28, "grad_norm": 0.2776873480898374, "learning_rate": 0.00019536568083319903, "loss": 1.0197, "step": 2977 }, { "epoch": 0.28, "grad_norm": 0.28885110017090304, "learning_rate": 0.00019536091961085922, "loss": 1.1091, "step": 2978 }, { "epoch": 0.29, "grad_norm": 0.2567410212869977, "learning_rate": 0.00019535615600205247, "loss": 1.0443, "step": 2979 }, { "epoch": 0.29, "grad_norm": 0.28945345981693216, "learning_rate": 0.00019535139000689795, "loss": 1.0625, "step": 2980 }, { "epoch": 0.29, "grad_norm": 0.2774312492094651, "learning_rate": 0.000195346621625515, "loss": 1.1077, "step": 2981 }, { "epoch": 0.29, "grad_norm": 0.26395699634012787, "learning_rate": 0.00019534185085802293, "loss": 1.0201, "step": 2982 }, { "epoch": 0.29, "grad_norm": 0.28496537247736714, "learning_rate": 0.0001953370777045411, "loss": 1.1266, "step": 2983 }, { "epoch": 0.29, "grad_norm": 0.3243637973540252, "learning_rate": 0.00019533230216518897, "loss": 1.1888, "step": 2984 }, { "epoch": 0.29, "grad_norm": 0.29081576092276984, "learning_rate": 0.00019532752424008607, "loss": 1.0315, "step": 2985 }, { "epoch": 0.29, "grad_norm": 0.2850948882845963, "learning_rate": 0.00019532274392935198, "loss": 1.0013, "step": 2986 }, { "epoch": 0.29, "grad_norm": 0.2923430922930697, "learning_rate": 0.0001953179612331063, "loss": 0.9897, "step": 2987 }, { "epoch": 0.29, "grad_norm": 0.29321038881690914, "learning_rate": 0.00019531317615146873, "loss": 1.1548, "step": 2988 }, { "epoch": 0.29, "grad_norm": 0.28328990204826776, "learning_rate": 0.00019530838868455906, "loss": 1.0857, "step": 2989 }, { "epoch": 0.29, "grad_norm": 0.25769647168620946, "learning_rate": 0.00019530359883249701, "loss": 1.025, "step": 2990 }, { "epoch": 0.29, "grad_norm": 0.2796837930311345, "learning_rate": 0.00019529880659540256, "loss": 1.1879, "step": 2991 }, { "epoch": 0.29, "grad_norm": 0.2787744820467792, "learning_rate": 0.00019529401197339557, "loss": 1.1248, "step": 2992 }, { "epoch": 0.29, "grad_norm": 0.269889282961234, "learning_rate": 0.00019528921496659603, "loss": 1.0331, "step": 2993 }, { "epoch": 0.29, "grad_norm": 0.28222433704753624, "learning_rate": 0.00019528441557512398, "loss": 1.0523, "step": 2994 }, { "epoch": 0.29, "grad_norm": 0.27863870289129955, "learning_rate": 0.00019527961379909957, "loss": 1.1745, "step": 2995 }, { "epoch": 0.29, "grad_norm": 0.31883034906636215, "learning_rate": 0.00019527480963864294, "loss": 1.0541, "step": 2996 }, { "epoch": 0.29, "grad_norm": 0.27111306738362173, "learning_rate": 0.0001952700030938743, "loss": 1.1796, "step": 2997 }, { "epoch": 0.29, "grad_norm": 0.27408759182730913, "learning_rate": 0.00019526519416491401, "loss": 1.0041, "step": 2998 }, { "epoch": 0.29, "grad_norm": 0.24522235103299234, "learning_rate": 0.0001952603828518823, "loss": 0.8971, "step": 2999 }, { "epoch": 0.29, "grad_norm": 0.2583032592016034, "learning_rate": 0.00019525556915489967, "loss": 0.9294, "step": 3000 }, { "epoch": 0.29, "grad_norm": 0.25624359117015355, "learning_rate": 0.00019525075307408655, "loss": 0.9701, "step": 3001 }, { "epoch": 0.29, "grad_norm": 0.28720231190552714, "learning_rate": 0.0001952459346095635, "loss": 1.0356, "step": 3002 }, { "epoch": 0.29, "grad_norm": 0.29696988241107114, "learning_rate": 0.00019524111376145105, "loss": 1.0428, "step": 3003 }, { "epoch": 0.29, "grad_norm": 0.26750283784091977, "learning_rate": 0.00019523629052986988, "loss": 1.0176, "step": 3004 }, { "epoch": 0.29, "grad_norm": 0.28397737724131106, "learning_rate": 0.00019523146491494067, "loss": 1.0977, "step": 3005 }, { "epoch": 0.29, "grad_norm": 0.30294167237041875, "learning_rate": 0.0001952266369167842, "loss": 1.1875, "step": 3006 }, { "epoch": 0.29, "grad_norm": 0.2580269760608724, "learning_rate": 0.00019522180653552132, "loss": 1.0923, "step": 3007 }, { "epoch": 0.29, "grad_norm": 0.2788977172196587, "learning_rate": 0.00019521697377127285, "loss": 1.1057, "step": 3008 }, { "epoch": 0.29, "grad_norm": 0.26291500633347537, "learning_rate": 0.00019521213862415979, "loss": 1.1551, "step": 3009 }, { "epoch": 0.29, "grad_norm": 0.2760993256298351, "learning_rate": 0.00019520730109430314, "loss": 1.0357, "step": 3010 }, { "epoch": 0.29, "grad_norm": 0.2851223724038372, "learning_rate": 0.0001952024611818239, "loss": 1.1088, "step": 3011 }, { "epoch": 0.29, "grad_norm": 0.2613411081969399, "learning_rate": 0.00019519761888684326, "loss": 1.0058, "step": 3012 }, { "epoch": 0.29, "grad_norm": 0.2374352044099212, "learning_rate": 0.0001951927742094824, "loss": 1.0545, "step": 3013 }, { "epoch": 0.29, "grad_norm": 0.27080671804410106, "learning_rate": 0.00019518792714986254, "loss": 1.1475, "step": 3014 }, { "epoch": 0.29, "grad_norm": 0.2993708318879525, "learning_rate": 0.00019518307770810496, "loss": 1.0931, "step": 3015 }, { "epoch": 0.29, "grad_norm": 0.29400732138156965, "learning_rate": 0.00019517822588433102, "loss": 1.0799, "step": 3016 }, { "epoch": 0.29, "grad_norm": 0.25464534887846263, "learning_rate": 0.0001951733716786622, "loss": 0.993, "step": 3017 }, { "epoch": 0.29, "grad_norm": 0.2652448178700676, "learning_rate": 0.0001951685150912199, "loss": 1.1835, "step": 3018 }, { "epoch": 0.29, "grad_norm": 0.3077147987981465, "learning_rate": 0.00019516365612212572, "loss": 1.0706, "step": 3019 }, { "epoch": 0.29, "grad_norm": 0.30685934393160413, "learning_rate": 0.00019515879477150123, "loss": 1.0244, "step": 3020 }, { "epoch": 0.29, "grad_norm": 0.2856291230251649, "learning_rate": 0.00019515393103946812, "loss": 1.0963, "step": 3021 }, { "epoch": 0.29, "grad_norm": 0.2767228605351276, "learning_rate": 0.00019514906492614805, "loss": 0.9945, "step": 3022 }, { "epoch": 0.29, "grad_norm": 0.27035783848571304, "learning_rate": 0.00019514419643166283, "loss": 1.1075, "step": 3023 }, { "epoch": 0.29, "grad_norm": 0.2990769279659998, "learning_rate": 0.0001951393255561343, "loss": 1.1556, "step": 3024 }, { "epoch": 0.29, "grad_norm": 0.2844357764929915, "learning_rate": 0.00019513445229968438, "loss": 0.9933, "step": 3025 }, { "epoch": 0.29, "grad_norm": 0.3070197133609208, "learning_rate": 0.000195129576662435, "loss": 1.0885, "step": 3026 }, { "epoch": 0.29, "grad_norm": 0.2820008198156176, "learning_rate": 0.0001951246986445082, "loss": 1.0819, "step": 3027 }, { "epoch": 0.29, "grad_norm": 0.27229352040640303, "learning_rate": 0.00019511981824602598, "loss": 1.046, "step": 3028 }, { "epoch": 0.29, "grad_norm": 0.2739544259171004, "learning_rate": 0.00019511493546711054, "loss": 1.0647, "step": 3029 }, { "epoch": 0.29, "grad_norm": 0.2620879572449313, "learning_rate": 0.00019511005030788407, "loss": 1.1027, "step": 3030 }, { "epoch": 0.29, "grad_norm": 0.251134914749705, "learning_rate": 0.00019510516276846884, "loss": 1.0464, "step": 3031 }, { "epoch": 0.29, "grad_norm": 0.23480951173895892, "learning_rate": 0.0001951002728489871, "loss": 1.0258, "step": 3032 }, { "epoch": 0.29, "grad_norm": 0.2637238517781191, "learning_rate": 0.0001950953805495613, "loss": 1.0609, "step": 3033 }, { "epoch": 0.29, "grad_norm": 0.23941970843056096, "learning_rate": 0.0001950904858703138, "loss": 0.9995, "step": 3034 }, { "epoch": 0.29, "grad_norm": 0.2674317195338645, "learning_rate": 0.00019508558881136716, "loss": 1.1166, "step": 3035 }, { "epoch": 0.29, "grad_norm": 0.2671056081371556, "learning_rate": 0.0001950806893728439, "loss": 1.1134, "step": 3036 }, { "epoch": 0.29, "grad_norm": 0.25637597126512307, "learning_rate": 0.0001950757875548666, "loss": 1.0847, "step": 3037 }, { "epoch": 0.29, "grad_norm": 0.26941442259152115, "learning_rate": 0.000195070883357558, "loss": 1.1351, "step": 3038 }, { "epoch": 0.29, "grad_norm": 0.26139367123922513, "learning_rate": 0.00019506597678104078, "loss": 1.1819, "step": 3039 }, { "epoch": 0.29, "grad_norm": 0.2730769424356869, "learning_rate": 0.00019506106782543774, "loss": 1.0862, "step": 3040 }, { "epoch": 0.29, "grad_norm": 0.2564183571077773, "learning_rate": 0.00019505615649087173, "loss": 1.057, "step": 3041 }, { "epoch": 0.29, "grad_norm": 0.27496154581521765, "learning_rate": 0.00019505124277746568, "loss": 1.0365, "step": 3042 }, { "epoch": 0.29, "grad_norm": 0.2906578614460428, "learning_rate": 0.00019504632668534253, "loss": 1.0765, "step": 3043 }, { "epoch": 0.29, "grad_norm": 0.24889624819261374, "learning_rate": 0.00019504140821462534, "loss": 1.0847, "step": 3044 }, { "epoch": 0.29, "grad_norm": 0.26592584153440635, "learning_rate": 0.00019503648736543715, "loss": 1.0803, "step": 3045 }, { "epoch": 0.29, "grad_norm": 0.2944881481822344, "learning_rate": 0.00019503156413790113, "loss": 1.0591, "step": 3046 }, { "epoch": 0.29, "grad_norm": 0.2918642575968384, "learning_rate": 0.00019502663853214052, "loss": 1.0976, "step": 3047 }, { "epoch": 0.29, "grad_norm": 0.26006791100294435, "learning_rate": 0.00019502171054827856, "loss": 1.1608, "step": 3048 }, { "epoch": 0.29, "grad_norm": 0.3177927171422205, "learning_rate": 0.00019501678018643854, "loss": 1.1429, "step": 3049 }, { "epoch": 0.29, "grad_norm": 0.26535744168889774, "learning_rate": 0.0001950118474467439, "loss": 1.0564, "step": 3050 }, { "epoch": 0.29, "grad_norm": 0.30516176813295376, "learning_rate": 0.00019500691232931806, "loss": 1.085, "step": 3051 }, { "epoch": 0.29, "grad_norm": 0.3062397007408206, "learning_rate": 0.00019500197483428454, "loss": 1.202, "step": 3052 }, { "epoch": 0.29, "grad_norm": 0.30600809859823863, "learning_rate": 0.0001949970349617669, "loss": 1.0306, "step": 3053 }, { "epoch": 0.29, "grad_norm": 0.26553989672712847, "learning_rate": 0.00019499209271188874, "loss": 1.1253, "step": 3054 }, { "epoch": 0.29, "grad_norm": 0.3115655362592651, "learning_rate": 0.00019498714808477375, "loss": 1.0844, "step": 3055 }, { "epoch": 0.29, "grad_norm": 0.2733439826555348, "learning_rate": 0.00019498220108054573, "loss": 1.0594, "step": 3056 }, { "epoch": 0.29, "grad_norm": 0.27824607858191935, "learning_rate": 0.00019497725169932839, "loss": 1.1842, "step": 3057 }, { "epoch": 0.29, "grad_norm": 0.29361249395554595, "learning_rate": 0.00019497229994124563, "loss": 1.156, "step": 3058 }, { "epoch": 0.29, "grad_norm": 0.2909096666085275, "learning_rate": 0.00019496734580642139, "loss": 1.0713, "step": 3059 }, { "epoch": 0.29, "grad_norm": 0.3008406748557583, "learning_rate": 0.00019496238929497968, "loss": 0.9974, "step": 3060 }, { "epoch": 0.29, "grad_norm": 0.2557208115014124, "learning_rate": 0.00019495743040704445, "loss": 1.1056, "step": 3061 }, { "epoch": 0.29, "grad_norm": 0.2619314513826558, "learning_rate": 0.00019495246914273985, "loss": 1.121, "step": 3062 }, { "epoch": 0.29, "grad_norm": 0.2777726951083541, "learning_rate": 0.00019494750550219, "loss": 1.1167, "step": 3063 }, { "epoch": 0.29, "grad_norm": 0.27592070412234426, "learning_rate": 0.00019494253948551922, "loss": 1.1754, "step": 3064 }, { "epoch": 0.29, "grad_norm": 0.27823194441497656, "learning_rate": 0.0001949375710928517, "loss": 1.0839, "step": 3065 }, { "epoch": 0.29, "grad_norm": 0.2982548750310373, "learning_rate": 0.00019493260032431176, "loss": 1.1597, "step": 3066 }, { "epoch": 0.29, "grad_norm": 0.2859522245926003, "learning_rate": 0.00019492762718002386, "loss": 1.1175, "step": 3067 }, { "epoch": 0.29, "grad_norm": 0.25703939560705324, "learning_rate": 0.00019492265166011244, "loss": 1.0775, "step": 3068 }, { "epoch": 0.29, "grad_norm": 0.28439233978147976, "learning_rate": 0.000194917673764702, "loss": 1.1447, "step": 3069 }, { "epoch": 0.29, "grad_norm": 0.2794544690967338, "learning_rate": 0.00019491269349391712, "loss": 1.1776, "step": 3070 }, { "epoch": 0.29, "grad_norm": 0.2438574837351192, "learning_rate": 0.00019490771084788242, "loss": 1.1019, "step": 3071 }, { "epoch": 0.29, "grad_norm": 0.24023569609710485, "learning_rate": 0.00019490272582672262, "loss": 1.1135, "step": 3072 }, { "epoch": 0.29, "grad_norm": 0.29972615964686367, "learning_rate": 0.00019489773843056244, "loss": 1.069, "step": 3073 }, { "epoch": 0.29, "grad_norm": 0.2589384956974427, "learning_rate": 0.00019489274865952676, "loss": 1.2025, "step": 3074 }, { "epoch": 0.29, "grad_norm": 0.2597338399690944, "learning_rate": 0.00019488775651374038, "loss": 1.0932, "step": 3075 }, { "epoch": 0.29, "grad_norm": 0.31065520260111457, "learning_rate": 0.00019488276199332825, "loss": 1.2195, "step": 3076 }, { "epoch": 0.29, "grad_norm": 0.25453729205708414, "learning_rate": 0.0001948777650984154, "loss": 1.107, "step": 3077 }, { "epoch": 0.29, "grad_norm": 0.29862291936090024, "learning_rate": 0.00019487276582912683, "loss": 1.1301, "step": 3078 }, { "epoch": 0.29, "grad_norm": 0.3221818146284146, "learning_rate": 0.00019486776418558766, "loss": 1.1191, "step": 3079 }, { "epoch": 0.29, "grad_norm": 0.3782095978448149, "learning_rate": 0.0001948627601679231, "loss": 1.0482, "step": 3080 }, { "epoch": 0.29, "grad_norm": 0.28378395622830654, "learning_rate": 0.0001948577537762583, "loss": 1.1376, "step": 3081 }, { "epoch": 0.29, "grad_norm": 0.3319042029552207, "learning_rate": 0.00019485274501071864, "loss": 1.0665, "step": 3082 }, { "epoch": 0.29, "grad_norm": 0.26347510714336875, "learning_rate": 0.00019484773387142942, "loss": 1.1515, "step": 3083 }, { "epoch": 0.3, "grad_norm": 0.3101715271811368, "learning_rate": 0.000194842720358516, "loss": 1.1255, "step": 3084 }, { "epoch": 0.3, "grad_norm": 0.25293432322802967, "learning_rate": 0.00019483770447210397, "loss": 1.0296, "step": 3085 }, { "epoch": 0.3, "grad_norm": 0.28621886274041924, "learning_rate": 0.00019483268621231875, "loss": 0.9487, "step": 3086 }, { "epoch": 0.3, "grad_norm": 0.2747791927139919, "learning_rate": 0.00019482766557928592, "loss": 1.0543, "step": 3087 }, { "epoch": 0.3, "grad_norm": 0.26223503943348814, "learning_rate": 0.00019482264257313122, "loss": 1.0122, "step": 3088 }, { "epoch": 0.3, "grad_norm": 0.27709490802994835, "learning_rate": 0.00019481761719398027, "loss": 1.119, "step": 3089 }, { "epoch": 0.3, "grad_norm": 0.26196843932694963, "learning_rate": 0.00019481258944195886, "loss": 1.0707, "step": 3090 }, { "epoch": 0.3, "grad_norm": 0.2782018365147699, "learning_rate": 0.00019480755931719281, "loss": 1.038, "step": 3091 }, { "epoch": 0.3, "grad_norm": 0.24361390545083103, "learning_rate": 0.00019480252681980802, "loss": 1.056, "step": 3092 }, { "epoch": 0.3, "grad_norm": 0.2674864730406895, "learning_rate": 0.0001947974919499304, "loss": 1.1546, "step": 3093 }, { "epoch": 0.3, "grad_norm": 0.25038494984812143, "learning_rate": 0.00019479245470768595, "loss": 1.0509, "step": 3094 }, { "epoch": 0.3, "grad_norm": 0.27600843103065575, "learning_rate": 0.00019478741509320076, "loss": 1.1192, "step": 3095 }, { "epoch": 0.3, "grad_norm": 0.2736297017538167, "learning_rate": 0.00019478237310660093, "loss": 1.1505, "step": 3096 }, { "epoch": 0.3, "grad_norm": 0.2545876917992688, "learning_rate": 0.00019477732874801265, "loss": 1.0992, "step": 3097 }, { "epoch": 0.3, "grad_norm": 0.2539192301177348, "learning_rate": 0.0001947722820175622, "loss": 1.044, "step": 3098 }, { "epoch": 0.3, "grad_norm": 0.25814263130227105, "learning_rate": 0.00019476723291537575, "loss": 1.1827, "step": 3099 }, { "epoch": 0.3, "grad_norm": 0.2976356116278242, "learning_rate": 0.0001947621814415798, "loss": 0.9778, "step": 3100 }, { "epoch": 0.3, "grad_norm": 0.2625055643669641, "learning_rate": 0.00019475712759630068, "loss": 1.0887, "step": 3101 }, { "epoch": 0.3, "grad_norm": 0.26312176570889856, "learning_rate": 0.00019475207137966487, "loss": 1.0807, "step": 3102 }, { "epoch": 0.3, "grad_norm": 0.2618322867889844, "learning_rate": 0.00019474701279179895, "loss": 1.2045, "step": 3103 }, { "epoch": 0.3, "grad_norm": 0.2891550174279668, "learning_rate": 0.00019474195183282947, "loss": 1.0771, "step": 3104 }, { "epoch": 0.3, "grad_norm": 0.3202983567379544, "learning_rate": 0.00019473688850288312, "loss": 1.1852, "step": 3105 }, { "epoch": 0.3, "grad_norm": 0.25021772062444586, "learning_rate": 0.0001947318228020866, "loss": 1.1832, "step": 3106 }, { "epoch": 0.3, "grad_norm": 0.2930599815597174, "learning_rate": 0.00019472675473056666, "loss": 1.0763, "step": 3107 }, { "epoch": 0.3, "grad_norm": 0.30676853669698495, "learning_rate": 0.00019472168428845014, "loss": 1.0405, "step": 3108 }, { "epoch": 0.3, "grad_norm": 0.2648409443563144, "learning_rate": 0.00019471661147586395, "loss": 1.1125, "step": 3109 }, { "epoch": 0.3, "grad_norm": 0.26584314244912965, "learning_rate": 0.00019471153629293503, "loss": 1.0697, "step": 3110 }, { "epoch": 0.3, "grad_norm": 0.37140190776674137, "learning_rate": 0.0001947064587397904, "loss": 1.0939, "step": 3111 }, { "epoch": 0.3, "grad_norm": 0.259017588096064, "learning_rate": 0.00019470137881655712, "loss": 1.0809, "step": 3112 }, { "epoch": 0.3, "grad_norm": 0.26998271747435276, "learning_rate": 0.00019469629652336232, "loss": 1.0425, "step": 3113 }, { "epoch": 0.3, "grad_norm": 0.25718343306878455, "learning_rate": 0.0001946912118603332, "loss": 1.1163, "step": 3114 }, { "epoch": 0.3, "grad_norm": 0.2568678014483129, "learning_rate": 0.00019468612482759695, "loss": 0.9441, "step": 3115 }, { "epoch": 0.3, "grad_norm": 0.2765085464241682, "learning_rate": 0.00019468103542528094, "loss": 1.0876, "step": 3116 }, { "epoch": 0.3, "grad_norm": 0.23905715647271397, "learning_rate": 0.0001946759436535125, "loss": 1.1495, "step": 3117 }, { "epoch": 0.3, "grad_norm": 0.23414722392686665, "learning_rate": 0.00019467084951241907, "loss": 1.0045, "step": 3118 }, { "epoch": 0.3, "grad_norm": 0.2825675728276391, "learning_rate": 0.00019466575300212816, "loss": 1.0469, "step": 3119 }, { "epoch": 0.3, "grad_norm": 0.27456256708811555, "learning_rate": 0.00019466065412276727, "loss": 0.952, "step": 3120 }, { "epoch": 0.3, "grad_norm": 0.23902984443073147, "learning_rate": 0.00019465555287446402, "loss": 1.1261, "step": 3121 }, { "epoch": 0.3, "grad_norm": 0.2860878827274362, "learning_rate": 0.00019465044925734605, "loss": 0.9592, "step": 3122 }, { "epoch": 0.3, "grad_norm": 0.27511877405249713, "learning_rate": 0.00019464534327154112, "loss": 1.0913, "step": 3123 }, { "epoch": 0.3, "grad_norm": 0.25798267940905334, "learning_rate": 0.000194640234917177, "loss": 1.0157, "step": 3124 }, { "epoch": 0.3, "grad_norm": 0.284100415728136, "learning_rate": 0.00019463512419438153, "loss": 1.1027, "step": 3125 }, { "epoch": 0.3, "grad_norm": 0.2382259658744238, "learning_rate": 0.00019463001110328257, "loss": 1.1828, "step": 3126 }, { "epoch": 0.3, "grad_norm": 0.31222857704200785, "learning_rate": 0.0001946248956440081, "loss": 1.1124, "step": 3127 }, { "epoch": 0.3, "grad_norm": 0.2625066692419136, "learning_rate": 0.00019461977781668618, "loss": 1.0737, "step": 3128 }, { "epoch": 0.3, "grad_norm": 0.2793854323707662, "learning_rate": 0.00019461465762144487, "loss": 1.1363, "step": 3129 }, { "epoch": 0.3, "grad_norm": 0.27108476180470265, "learning_rate": 0.00019460953505841223, "loss": 1.1485, "step": 3130 }, { "epoch": 0.3, "grad_norm": 0.26903383341011894, "learning_rate": 0.0001946044101277166, "loss": 1.0214, "step": 3131 }, { "epoch": 0.3, "grad_norm": 0.30317173630025673, "learning_rate": 0.00019459928282948607, "loss": 1.0941, "step": 3132 }, { "epoch": 0.3, "grad_norm": 0.2833482336806812, "learning_rate": 0.00019459415316384906, "loss": 1.1549, "step": 3133 }, { "epoch": 0.3, "grad_norm": 0.2652521067523786, "learning_rate": 0.00019458902113093395, "loss": 1.0997, "step": 3134 }, { "epoch": 0.3, "grad_norm": 0.2796165666023849, "learning_rate": 0.0001945838867308691, "loss": 1.1974, "step": 3135 }, { "epoch": 0.3, "grad_norm": 0.2795419726491581, "learning_rate": 0.00019457874996378304, "loss": 1.0421, "step": 3136 }, { "epoch": 0.3, "grad_norm": 0.27461888378734867, "learning_rate": 0.00019457361082980432, "loss": 1.0375, "step": 3137 }, { "epoch": 0.3, "grad_norm": 0.2751564840738016, "learning_rate": 0.00019456846932906156, "loss": 0.9755, "step": 3138 }, { "epoch": 0.3, "grad_norm": 0.27520201040938214, "learning_rate": 0.00019456332546168343, "loss": 0.9982, "step": 3139 }, { "epoch": 0.3, "grad_norm": 0.28743296092030146, "learning_rate": 0.00019455817922779868, "loss": 0.8786, "step": 3140 }, { "epoch": 0.3, "grad_norm": 0.2791638788893473, "learning_rate": 0.000194553030627536, "loss": 1.1424, "step": 3141 }, { "epoch": 0.3, "grad_norm": 0.2605528838298395, "learning_rate": 0.00019454787966102435, "loss": 1.0785, "step": 3142 }, { "epoch": 0.3, "grad_norm": 0.2652776678480821, "learning_rate": 0.00019454272632839255, "loss": 1.0047, "step": 3143 }, { "epoch": 0.3, "grad_norm": 0.2770395767937961, "learning_rate": 0.00019453757062976964, "loss": 1.1224, "step": 3144 }, { "epoch": 0.3, "grad_norm": 0.2774545055078794, "learning_rate": 0.00019453241256528462, "loss": 1.0218, "step": 3145 }, { "epoch": 0.3, "grad_norm": 0.29292827749120715, "learning_rate": 0.00019452725213506654, "loss": 1.1559, "step": 3146 }, { "epoch": 0.3, "grad_norm": 0.2769766053696637, "learning_rate": 0.00019452208933924459, "loss": 1.0685, "step": 3147 }, { "epoch": 0.3, "grad_norm": 0.24168206829639038, "learning_rate": 0.00019451692417794792, "loss": 1.092, "step": 3148 }, { "epoch": 0.3, "grad_norm": 0.3243097289376712, "learning_rate": 0.00019451175665130584, "loss": 1.1109, "step": 3149 }, { "epoch": 0.3, "grad_norm": 0.2851118834219872, "learning_rate": 0.00019450658675944764, "loss": 1.0859, "step": 3150 }, { "epoch": 0.3, "grad_norm": 0.26646697928523183, "learning_rate": 0.00019450141450250272, "loss": 1.092, "step": 3151 }, { "epoch": 0.3, "grad_norm": 0.31149135380894666, "learning_rate": 0.0001944962398806005, "loss": 1.0533, "step": 3152 }, { "epoch": 0.3, "grad_norm": 0.31641860365957436, "learning_rate": 0.00019449106289387048, "loss": 1.0906, "step": 3153 }, { "epoch": 0.3, "grad_norm": 0.2861114243921771, "learning_rate": 0.00019448588354244227, "loss": 1.1436, "step": 3154 }, { "epoch": 0.3, "grad_norm": 0.2678359187873902, "learning_rate": 0.0001944807018264454, "loss": 1.0751, "step": 3155 }, { "epoch": 0.3, "grad_norm": 0.2795381844318716, "learning_rate": 0.00019447551774600958, "loss": 1.0243, "step": 3156 }, { "epoch": 0.3, "grad_norm": 0.2646096339194622, "learning_rate": 0.00019447033130126458, "loss": 1.0279, "step": 3157 }, { "epoch": 0.3, "grad_norm": 0.295589829682441, "learning_rate": 0.00019446514249234017, "loss": 1.0735, "step": 3158 }, { "epoch": 0.3, "grad_norm": 0.2980645819323912, "learning_rate": 0.0001944599513193662, "loss": 1.145, "step": 3159 }, { "epoch": 0.3, "grad_norm": 0.3115102825868531, "learning_rate": 0.00019445475778247256, "loss": 1.1983, "step": 3160 }, { "epoch": 0.3, "grad_norm": 0.2813320240238785, "learning_rate": 0.00019444956188178927, "loss": 1.1677, "step": 3161 }, { "epoch": 0.3, "grad_norm": 0.23562367846120347, "learning_rate": 0.00019444436361744632, "loss": 1.1973, "step": 3162 }, { "epoch": 0.3, "grad_norm": 0.2928636477767414, "learning_rate": 0.0001944391629895738, "loss": 1.1904, "step": 3163 }, { "epoch": 0.3, "grad_norm": 0.2938628446043182, "learning_rate": 0.0001944339599983019, "loss": 1.1338, "step": 3164 }, { "epoch": 0.3, "grad_norm": 0.2698493025290344, "learning_rate": 0.00019442875464376077, "loss": 1.085, "step": 3165 }, { "epoch": 0.3, "grad_norm": 0.30254516206760285, "learning_rate": 0.00019442354692608075, "loss": 1.21, "step": 3166 }, { "epoch": 0.3, "grad_norm": 0.26210762133120497, "learning_rate": 0.0001944183368453921, "loss": 1.0347, "step": 3167 }, { "epoch": 0.3, "grad_norm": 0.2307520308777141, "learning_rate": 0.00019441312440182524, "loss": 1.0734, "step": 3168 }, { "epoch": 0.3, "grad_norm": 0.256456644213979, "learning_rate": 0.0001944079095955106, "loss": 1.1074, "step": 3169 }, { "epoch": 0.3, "grad_norm": 0.27238535013463366, "learning_rate": 0.00019440269242657868, "loss": 1.1605, "step": 3170 }, { "epoch": 0.3, "grad_norm": 0.27536976098604626, "learning_rate": 0.00019439747289516009, "loss": 1.2405, "step": 3171 }, { "epoch": 0.3, "grad_norm": 0.2537359030538808, "learning_rate": 0.00019439225100138536, "loss": 1.0907, "step": 3172 }, { "epoch": 0.3, "grad_norm": 0.27291761429286016, "learning_rate": 0.00019438702674538525, "loss": 1.0893, "step": 3173 }, { "epoch": 0.3, "grad_norm": 0.2650287732828565, "learning_rate": 0.00019438180012729047, "loss": 1.0594, "step": 3174 }, { "epoch": 0.3, "grad_norm": 0.2566176933125399, "learning_rate": 0.00019437657114723184, "loss": 1.0371, "step": 3175 }, { "epoch": 0.3, "grad_norm": 0.27936305636263997, "learning_rate": 0.0001943713398053402, "loss": 1.1312, "step": 3176 }, { "epoch": 0.3, "grad_norm": 0.29167361844632245, "learning_rate": 0.00019436610610174646, "loss": 1.116, "step": 3177 }, { "epoch": 0.3, "grad_norm": 0.28843098771577746, "learning_rate": 0.00019436087003658163, "loss": 1.0541, "step": 3178 }, { "epoch": 0.3, "grad_norm": 0.32516725974523086, "learning_rate": 0.0001943556316099767, "loss": 1.1834, "step": 3179 }, { "epoch": 0.3, "grad_norm": 0.2541028278026109, "learning_rate": 0.0001943503908220628, "loss": 1.1216, "step": 3180 }, { "epoch": 0.3, "grad_norm": 0.2834034653405795, "learning_rate": 0.00019434514767297108, "loss": 1.2544, "step": 3181 }, { "epoch": 0.3, "grad_norm": 0.30931710915213423, "learning_rate": 0.00019433990216283274, "loss": 1.0865, "step": 3182 }, { "epoch": 0.3, "grad_norm": 0.2777726505717253, "learning_rate": 0.00019433465429177904, "loss": 1.0133, "step": 3183 }, { "epoch": 0.3, "grad_norm": 0.2893309338829881, "learning_rate": 0.00019432940405994135, "loss": 1.1005, "step": 3184 }, { "epoch": 0.3, "grad_norm": 0.2718000624288272, "learning_rate": 0.00019432415146745103, "loss": 0.9954, "step": 3185 }, { "epoch": 0.3, "grad_norm": 0.2963971328585791, "learning_rate": 0.00019431889651443953, "loss": 1.0576, "step": 3186 }, { "epoch": 0.3, "grad_norm": 0.2767798358909039, "learning_rate": 0.00019431363920103837, "loss": 1.1268, "step": 3187 }, { "epoch": 0.3, "grad_norm": 0.31700853184697214, "learning_rate": 0.00019430837952737914, "loss": 1.061, "step": 3188 }, { "epoch": 0.31, "grad_norm": 0.25763423615478015, "learning_rate": 0.0001943031174935934, "loss": 1.0927, "step": 3189 }, { "epoch": 0.31, "grad_norm": 0.2610139408028157, "learning_rate": 0.00019429785309981292, "loss": 1.0666, "step": 3190 }, { "epoch": 0.31, "grad_norm": 0.2884216831748455, "learning_rate": 0.00019429258634616941, "loss": 1.064, "step": 3191 }, { "epoch": 0.31, "grad_norm": 0.2502879838444048, "learning_rate": 0.00019428731723279463, "loss": 1.1431, "step": 3192 }, { "epoch": 0.31, "grad_norm": 0.25880669047725313, "learning_rate": 0.0001942820457598205, "loss": 1.0648, "step": 3193 }, { "epoch": 0.31, "grad_norm": 0.251763418883559, "learning_rate": 0.0001942767719273789, "loss": 1.095, "step": 3194 }, { "epoch": 0.31, "grad_norm": 0.28454723290611905, "learning_rate": 0.00019427149573560183, "loss": 1.0639, "step": 3195 }, { "epoch": 0.31, "grad_norm": 0.2740113209524548, "learning_rate": 0.00019426621718462137, "loss": 1.0383, "step": 3196 }, { "epoch": 0.31, "grad_norm": 0.2894651427378075, "learning_rate": 0.00019426093627456954, "loss": 1.0393, "step": 3197 }, { "epoch": 0.31, "grad_norm": 0.272356851821048, "learning_rate": 0.00019425565300557857, "loss": 1.0492, "step": 3198 }, { "epoch": 0.31, "grad_norm": 0.2834745514772499, "learning_rate": 0.00019425036737778063, "loss": 1.1115, "step": 3199 }, { "epoch": 0.31, "grad_norm": 0.290735855354118, "learning_rate": 0.00019424507939130802, "loss": 1.1519, "step": 3200 }, { "epoch": 0.31, "grad_norm": 0.26309247215034526, "learning_rate": 0.00019423978904629303, "loss": 1.1589, "step": 3201 }, { "epoch": 0.31, "grad_norm": 0.3034788416160877, "learning_rate": 0.00019423449634286812, "loss": 1.0927, "step": 3202 }, { "epoch": 0.31, "grad_norm": 0.24953378202378695, "learning_rate": 0.00019422920128116573, "loss": 1.0734, "step": 3203 }, { "epoch": 0.31, "grad_norm": 0.30391403166399206, "learning_rate": 0.00019422390386131835, "loss": 1.1223, "step": 3204 }, { "epoch": 0.31, "grad_norm": 0.26768113646614333, "learning_rate": 0.00019421860408345856, "loss": 1.074, "step": 3205 }, { "epoch": 0.31, "grad_norm": 0.26968925174933783, "learning_rate": 0.000194213301947719, "loss": 1.1126, "step": 3206 }, { "epoch": 0.31, "grad_norm": 0.25338870451527706, "learning_rate": 0.0001942079974542323, "loss": 1.2239, "step": 3207 }, { "epoch": 0.31, "grad_norm": 0.27878381417056575, "learning_rate": 0.0001942026906031313, "loss": 1.184, "step": 3208 }, { "epoch": 0.31, "grad_norm": 0.25893716244148623, "learning_rate": 0.00019419738139454874, "loss": 1.1045, "step": 3209 }, { "epoch": 0.31, "grad_norm": 0.2657479758737288, "learning_rate": 0.0001941920698286175, "loss": 1.0898, "step": 3210 }, { "epoch": 0.31, "grad_norm": 0.274528853814968, "learning_rate": 0.00019418675590547054, "loss": 1.2649, "step": 3211 }, { "epoch": 0.31, "grad_norm": 0.28015720519786214, "learning_rate": 0.00019418143962524084, "loss": 1.1167, "step": 3212 }, { "epoch": 0.31, "grad_norm": 0.26974290585106064, "learning_rate": 0.00019417612098806137, "loss": 1.0439, "step": 3213 }, { "epoch": 0.31, "grad_norm": 0.26730225585717254, "learning_rate": 0.00019417079999406532, "loss": 1.1091, "step": 3214 }, { "epoch": 0.31, "grad_norm": 0.24578940286887485, "learning_rate": 0.0001941654766433858, "loss": 1.1115, "step": 3215 }, { "epoch": 0.31, "grad_norm": 0.24916795650749093, "learning_rate": 0.00019416015093615604, "loss": 0.9763, "step": 3216 }, { "epoch": 0.31, "grad_norm": 0.3032785764886797, "learning_rate": 0.00019415482287250935, "loss": 1.0748, "step": 3217 }, { "epoch": 0.31, "grad_norm": 0.2840877489578721, "learning_rate": 0.00019414949245257903, "loss": 1.0943, "step": 3218 }, { "epoch": 0.31, "grad_norm": 0.2721658275584308, "learning_rate": 0.0001941441596764985, "loss": 1.0798, "step": 3219 }, { "epoch": 0.31, "grad_norm": 0.27711342122729105, "learning_rate": 0.00019413882454440118, "loss": 1.0857, "step": 3220 }, { "epoch": 0.31, "grad_norm": 0.264240156198682, "learning_rate": 0.00019413348705642065, "loss": 1.1476, "step": 3221 }, { "epoch": 0.31, "grad_norm": 0.2672728525407875, "learning_rate": 0.00019412814721269042, "loss": 1.1006, "step": 3222 }, { "epoch": 0.31, "grad_norm": 0.2790227898377365, "learning_rate": 0.00019412280501334418, "loss": 1.0214, "step": 3223 }, { "epoch": 0.31, "grad_norm": 0.26686766116618904, "learning_rate": 0.00019411746045851553, "loss": 1.0939, "step": 3224 }, { "epoch": 0.31, "grad_norm": 0.26689843862224927, "learning_rate": 0.00019411211354833832, "loss": 1.1118, "step": 3225 }, { "epoch": 0.31, "grad_norm": 0.2645315881506633, "learning_rate": 0.00019410676428294633, "loss": 1.064, "step": 3226 }, { "epoch": 0.31, "grad_norm": 0.27750130260616634, "learning_rate": 0.00019410141266247338, "loss": 1.1626, "step": 3227 }, { "epoch": 0.31, "grad_norm": 0.2739213945232073, "learning_rate": 0.0001940960586870535, "loss": 1.0854, "step": 3228 }, { "epoch": 0.31, "grad_norm": 0.28287550537217365, "learning_rate": 0.00019409070235682055, "loss": 1.0474, "step": 3229 }, { "epoch": 0.31, "grad_norm": 0.2731751302387, "learning_rate": 0.0001940853436719087, "loss": 0.9977, "step": 3230 }, { "epoch": 0.31, "grad_norm": 0.2950771929234802, "learning_rate": 0.00019407998263245194, "loss": 1.1031, "step": 3231 }, { "epoch": 0.31, "grad_norm": 0.274205168506372, "learning_rate": 0.0001940746192385845, "loss": 1.0148, "step": 3232 }, { "epoch": 0.31, "grad_norm": 0.24694847878806808, "learning_rate": 0.0001940692534904406, "loss": 1.0189, "step": 3233 }, { "epoch": 0.31, "grad_norm": 0.2884458695976643, "learning_rate": 0.00019406388538815454, "loss": 1.0534, "step": 3234 }, { "epoch": 0.31, "grad_norm": 0.25614325988090403, "learning_rate": 0.0001940585149318606, "loss": 1.1938, "step": 3235 }, { "epoch": 0.31, "grad_norm": 0.28634252057649984, "learning_rate": 0.0001940531421216932, "loss": 1.1215, "step": 3236 }, { "epoch": 0.31, "grad_norm": 0.26353357452427006, "learning_rate": 0.00019404776695778684, "loss": 1.0671, "step": 3237 }, { "epoch": 0.31, "grad_norm": 0.252301189841136, "learning_rate": 0.00019404238944027596, "loss": 1.0318, "step": 3238 }, { "epoch": 0.31, "grad_norm": 0.2921004823006027, "learning_rate": 0.0001940370095692952, "loss": 1.1743, "step": 3239 }, { "epoch": 0.31, "grad_norm": 0.3010241530589875, "learning_rate": 0.0001940316273449792, "loss": 1.1167, "step": 3240 }, { "epoch": 0.31, "grad_norm": 0.26470231728020255, "learning_rate": 0.00019402624276746263, "loss": 1.1322, "step": 3241 }, { "epoch": 0.31, "grad_norm": 0.26518892722531195, "learning_rate": 0.00019402085583688022, "loss": 1.043, "step": 3242 }, { "epoch": 0.31, "grad_norm": 0.28690580948021155, "learning_rate": 0.0001940154665533668, "loss": 1.0667, "step": 3243 }, { "epoch": 0.31, "grad_norm": 0.2810395515843624, "learning_rate": 0.00019401007491705725, "loss": 0.9801, "step": 3244 }, { "epoch": 0.31, "grad_norm": 0.2572499963401515, "learning_rate": 0.00019400468092808647, "loss": 1.159, "step": 3245 }, { "epoch": 0.31, "grad_norm": 0.2644492737063085, "learning_rate": 0.00019399928458658952, "loss": 1.119, "step": 3246 }, { "epoch": 0.31, "grad_norm": 0.2693774572143553, "learning_rate": 0.00019399388589270134, "loss": 1.1763, "step": 3247 }, { "epoch": 0.31, "grad_norm": 0.3074129041443745, "learning_rate": 0.00019398848484655714, "loss": 1.1109, "step": 3248 }, { "epoch": 0.31, "grad_norm": 0.22885608780661518, "learning_rate": 0.00019398308144829202, "loss": 1.1484, "step": 3249 }, { "epoch": 0.31, "grad_norm": 0.2431406554688736, "learning_rate": 0.0001939776756980412, "loss": 1.1024, "step": 3250 }, { "epoch": 0.31, "grad_norm": 0.2508813769274511, "learning_rate": 0.00019397226759594003, "loss": 1.1161, "step": 3251 }, { "epoch": 0.31, "grad_norm": 0.259498174018731, "learning_rate": 0.00019396685714212378, "loss": 1.1121, "step": 3252 }, { "epoch": 0.31, "grad_norm": 0.25690080144023086, "learning_rate": 0.00019396144433672787, "loss": 1.1951, "step": 3253 }, { "epoch": 0.31, "grad_norm": 0.2703955152553523, "learning_rate": 0.00019395602917988774, "loss": 1.0848, "step": 3254 }, { "epoch": 0.31, "grad_norm": 0.3248488191794343, "learning_rate": 0.00019395061167173895, "loss": 1.1507, "step": 3255 }, { "epoch": 0.31, "grad_norm": 0.28991024777070024, "learning_rate": 0.00019394519181241705, "loss": 1.1015, "step": 3256 }, { "epoch": 0.31, "grad_norm": 0.2764459982278797, "learning_rate": 0.00019393976960205772, "loss": 0.9972, "step": 3257 }, { "epoch": 0.31, "grad_norm": 0.26862234853504874, "learning_rate": 0.00019393434504079657, "loss": 1.1453, "step": 3258 }, { "epoch": 0.31, "grad_norm": 0.27971969886015396, "learning_rate": 0.00019392891812876944, "loss": 1.0825, "step": 3259 }, { "epoch": 0.31, "grad_norm": 0.2770468588755897, "learning_rate": 0.00019392348886611207, "loss": 1.0536, "step": 3260 }, { "epoch": 0.31, "grad_norm": 0.271609510880968, "learning_rate": 0.00019391805725296038, "loss": 1.0481, "step": 3261 }, { "epoch": 0.31, "grad_norm": 0.24387465904724714, "learning_rate": 0.00019391262328945027, "loss": 1.0953, "step": 3262 }, { "epoch": 0.31, "grad_norm": 0.306543464049848, "learning_rate": 0.00019390718697571776, "loss": 1.1486, "step": 3263 }, { "epoch": 0.31, "grad_norm": 0.30439534530685314, "learning_rate": 0.00019390174831189887, "loss": 1.0251, "step": 3264 }, { "epoch": 0.31, "grad_norm": 0.2702567545507524, "learning_rate": 0.0001938963072981297, "loss": 1.0219, "step": 3265 }, { "epoch": 0.31, "grad_norm": 0.24990979733983004, "learning_rate": 0.00019389086393454644, "loss": 0.9841, "step": 3266 }, { "epoch": 0.31, "grad_norm": 0.30401365328102387, "learning_rate": 0.0001938854182212853, "loss": 1.0608, "step": 3267 }, { "epoch": 0.31, "grad_norm": 0.3051517353842027, "learning_rate": 0.00019387997015848254, "loss": 1.0624, "step": 3268 }, { "epoch": 0.31, "grad_norm": 0.28665306616118164, "learning_rate": 0.00019387451974627455, "loss": 1.0742, "step": 3269 }, { "epoch": 0.31, "grad_norm": 0.3231516980435964, "learning_rate": 0.0001938690669847977, "loss": 1.1872, "step": 3270 }, { "epoch": 0.31, "grad_norm": 0.2956462625808931, "learning_rate": 0.00019386361187418848, "loss": 1.1729, "step": 3271 }, { "epoch": 0.31, "grad_norm": 0.2657542153528369, "learning_rate": 0.00019385815441458335, "loss": 1.0359, "step": 3272 }, { "epoch": 0.31, "grad_norm": 0.3012539639681833, "learning_rate": 0.0001938526946061189, "loss": 1.224, "step": 3273 }, { "epoch": 0.31, "grad_norm": 0.23858077659378998, "learning_rate": 0.00019384723244893182, "loss": 0.9866, "step": 3274 }, { "epoch": 0.31, "grad_norm": 0.28237967640012435, "learning_rate": 0.00019384176794315876, "loss": 1.1095, "step": 3275 }, { "epoch": 0.31, "grad_norm": 0.2599923150922761, "learning_rate": 0.0001938363010889365, "loss": 1.1204, "step": 3276 }, { "epoch": 0.31, "grad_norm": 0.2869091154710219, "learning_rate": 0.00019383083188640178, "loss": 1.0668, "step": 3277 }, { "epoch": 0.31, "grad_norm": 0.31710794291071975, "learning_rate": 0.00019382536033569155, "loss": 1.0612, "step": 3278 }, { "epoch": 0.31, "grad_norm": 0.29559893780311114, "learning_rate": 0.0001938198864369427, "loss": 1.077, "step": 3279 }, { "epoch": 0.31, "grad_norm": 0.2898590799989514, "learning_rate": 0.00019381441019029224, "loss": 0.9822, "step": 3280 }, { "epoch": 0.31, "grad_norm": 0.2653384099576511, "learning_rate": 0.00019380893159587722, "loss": 1.0328, "step": 3281 }, { "epoch": 0.31, "grad_norm": 0.3059551368813422, "learning_rate": 0.00019380345065383468, "loss": 1.1349, "step": 3282 }, { "epoch": 0.31, "grad_norm": 0.24886086837418994, "learning_rate": 0.0001937979673643019, "loss": 1.068, "step": 3283 }, { "epoch": 0.31, "grad_norm": 0.23919413973427292, "learning_rate": 0.000193792481727416, "loss": 1.1181, "step": 3284 }, { "epoch": 0.31, "grad_norm": 0.25404095588714376, "learning_rate": 0.0001937869937433143, "loss": 1.045, "step": 3285 }, { "epoch": 0.31, "grad_norm": 0.223637661799907, "learning_rate": 0.00019378150341213416, "loss": 1.0104, "step": 3286 }, { "epoch": 0.31, "grad_norm": 0.2779252243873626, "learning_rate": 0.00019377601073401293, "loss": 1.1353, "step": 3287 }, { "epoch": 0.31, "grad_norm": 0.27508596502440386, "learning_rate": 0.0001937705157090881, "loss": 1.0028, "step": 3288 }, { "epoch": 0.31, "grad_norm": 0.2829961966551457, "learning_rate": 0.0001937650183374972, "loss": 1.1252, "step": 3289 }, { "epoch": 0.31, "grad_norm": 0.2547718896278451, "learning_rate": 0.00019375951861937775, "loss": 1.1001, "step": 3290 }, { "epoch": 0.31, "grad_norm": 0.2386176095295127, "learning_rate": 0.00019375401655486745, "loss": 0.9711, "step": 3291 }, { "epoch": 0.31, "grad_norm": 0.26234578344158516, "learning_rate": 0.00019374851214410397, "loss": 1.0339, "step": 3292 }, { "epoch": 0.32, "grad_norm": 0.2919799926949464, "learning_rate": 0.00019374300538722503, "loss": 1.0436, "step": 3293 }, { "epoch": 0.32, "grad_norm": 0.26522986184686614, "learning_rate": 0.00019373749628436848, "loss": 1.1032, "step": 3294 }, { "epoch": 0.32, "grad_norm": 0.27747724141686647, "learning_rate": 0.00019373198483567215, "loss": 1.1199, "step": 3295 }, { "epoch": 0.32, "grad_norm": 0.251558053545879, "learning_rate": 0.00019372647104127401, "loss": 1.0908, "step": 3296 }, { "epoch": 0.32, "grad_norm": 0.2866203569622034, "learning_rate": 0.00019372095490131206, "loss": 1.1136, "step": 3297 }, { "epoch": 0.32, "grad_norm": 0.26519901166294035, "learning_rate": 0.00019371543641592427, "loss": 1.0744, "step": 3298 }, { "epoch": 0.32, "grad_norm": 0.2670922090903857, "learning_rate": 0.0001937099155852488, "loss": 1.0004, "step": 3299 }, { "epoch": 0.32, "grad_norm": 0.2582252671213613, "learning_rate": 0.0001937043924094238, "loss": 1.0158, "step": 3300 }, { "epoch": 0.32, "grad_norm": 0.2780671456060699, "learning_rate": 0.00019369886688858746, "loss": 1.0033, "step": 3301 }, { "epoch": 0.32, "grad_norm": 0.26115496930707605, "learning_rate": 0.00019369333902287812, "loss": 1.1021, "step": 3302 }, { "epoch": 0.32, "grad_norm": 0.2806226279994436, "learning_rate": 0.00019368780881243408, "loss": 1.0112, "step": 3303 }, { "epoch": 0.32, "grad_norm": 0.2544906876251208, "learning_rate": 0.00019368227625739376, "loss": 1.1054, "step": 3304 }, { "epoch": 0.32, "grad_norm": 0.2856721128498456, "learning_rate": 0.00019367674135789559, "loss": 1.1403, "step": 3305 }, { "epoch": 0.32, "grad_norm": 0.28275774405141135, "learning_rate": 0.00019367120411407807, "loss": 1.1926, "step": 3306 }, { "epoch": 0.32, "grad_norm": 0.30945016209372866, "learning_rate": 0.00019366566452607984, "loss": 1.0632, "step": 3307 }, { "epoch": 0.32, "grad_norm": 0.28757906013236484, "learning_rate": 0.00019366012259403945, "loss": 1.0334, "step": 3308 }, { "epoch": 0.32, "grad_norm": 0.28623289799348706, "learning_rate": 0.00019365457831809564, "loss": 1.0065, "step": 3309 }, { "epoch": 0.32, "grad_norm": 0.31283779658466215, "learning_rate": 0.00019364903169838714, "loss": 1.1444, "step": 3310 }, { "epoch": 0.32, "grad_norm": 0.25353872879290523, "learning_rate": 0.0001936434827350528, "loss": 1.1551, "step": 3311 }, { "epoch": 0.32, "grad_norm": 0.28420385173740026, "learning_rate": 0.00019363793142823142, "loss": 1.0866, "step": 3312 }, { "epoch": 0.32, "grad_norm": 0.2756468967799023, "learning_rate": 0.00019363237777806193, "loss": 1.1621, "step": 3313 }, { "epoch": 0.32, "grad_norm": 0.2725051208626741, "learning_rate": 0.0001936268217846834, "loss": 1.0359, "step": 3314 }, { "epoch": 0.32, "grad_norm": 0.2785478077945499, "learning_rate": 0.0001936212634482348, "loss": 1.1558, "step": 3315 }, { "epoch": 0.32, "grad_norm": 0.2506394562927538, "learning_rate": 0.00019361570276885522, "loss": 1.1897, "step": 3316 }, { "epoch": 0.32, "grad_norm": 0.2606058706252523, "learning_rate": 0.00019361013974668385, "loss": 1.1776, "step": 3317 }, { "epoch": 0.32, "grad_norm": 0.2496048710743572, "learning_rate": 0.0001936045743818599, "loss": 0.9721, "step": 3318 }, { "epoch": 0.32, "grad_norm": 0.26993436748357164, "learning_rate": 0.00019359900667452264, "loss": 1.104, "step": 3319 }, { "epoch": 0.32, "grad_norm": 0.255141548047229, "learning_rate": 0.0001935934366248114, "loss": 1.0802, "step": 3320 }, { "epoch": 0.32, "grad_norm": 0.27647316322378807, "learning_rate": 0.00019358786423286564, "loss": 1.0241, "step": 3321 }, { "epoch": 0.32, "grad_norm": 0.2923601566113196, "learning_rate": 0.00019358228949882474, "loss": 1.0406, "step": 3322 }, { "epoch": 0.32, "grad_norm": 0.24263042595969897, "learning_rate": 0.00019357671242282821, "loss": 0.9864, "step": 3323 }, { "epoch": 0.32, "grad_norm": 0.27774846850692353, "learning_rate": 0.00019357113300501566, "loss": 1.0937, "step": 3324 }, { "epoch": 0.32, "grad_norm": 0.27923854319931557, "learning_rate": 0.0001935655512455267, "loss": 1.0601, "step": 3325 }, { "epoch": 0.32, "grad_norm": 0.3061443475173794, "learning_rate": 0.000193559967144501, "loss": 1.2078, "step": 3326 }, { "epoch": 0.32, "grad_norm": 0.2846307312850083, "learning_rate": 0.00019355438070207834, "loss": 1.0301, "step": 3327 }, { "epoch": 0.32, "grad_norm": 0.27792518753199547, "learning_rate": 0.0001935487919183985, "loss": 1.0656, "step": 3328 }, { "epoch": 0.32, "grad_norm": 0.2845450991292036, "learning_rate": 0.00019354320079360132, "loss": 1.0507, "step": 3329 }, { "epoch": 0.32, "grad_norm": 0.31509060671206557, "learning_rate": 0.0001935376073278268, "loss": 1.1854, "step": 3330 }, { "epoch": 0.32, "grad_norm": 0.27538788262531033, "learning_rate": 0.00019353201152121484, "loss": 1.0761, "step": 3331 }, { "epoch": 0.32, "grad_norm": 0.3085113970962017, "learning_rate": 0.00019352641337390552, "loss": 1.0149, "step": 3332 }, { "epoch": 0.32, "grad_norm": 0.2649187417930221, "learning_rate": 0.00019352081288603895, "loss": 0.9846, "step": 3333 }, { "epoch": 0.32, "grad_norm": 0.2702951480669255, "learning_rate": 0.0001935152100577552, "loss": 1.1289, "step": 3334 }, { "epoch": 0.32, "grad_norm": 0.2587123130961769, "learning_rate": 0.00019350960488919458, "loss": 1.1603, "step": 3335 }, { "epoch": 0.32, "grad_norm": 0.26093692625675025, "learning_rate": 0.00019350399738049735, "loss": 1.189, "step": 3336 }, { "epoch": 0.32, "grad_norm": 0.2571900526712086, "learning_rate": 0.0001934983875318038, "loss": 1.1219, "step": 3337 }, { "epoch": 0.32, "grad_norm": 0.2823069020729405, "learning_rate": 0.0001934927753432543, "loss": 1.0871, "step": 3338 }, { "epoch": 0.32, "grad_norm": 0.29397830877053815, "learning_rate": 0.00019348716081498942, "loss": 1.0944, "step": 3339 }, { "epoch": 0.32, "grad_norm": 0.29592501153858697, "learning_rate": 0.00019348154394714952, "loss": 1.0847, "step": 3340 }, { "epoch": 0.32, "grad_norm": 0.27462052813299775, "learning_rate": 0.00019347592473987528, "loss": 1.1752, "step": 3341 }, { "epoch": 0.32, "grad_norm": 0.24616995149088777, "learning_rate": 0.00019347030319330727, "loss": 1.1025, "step": 3342 }, { "epoch": 0.32, "grad_norm": 0.27129435980716926, "learning_rate": 0.00019346467930758614, "loss": 1.1367, "step": 3343 }, { "epoch": 0.32, "grad_norm": 0.27336188709510006, "learning_rate": 0.0001934590530828527, "loss": 1.1193, "step": 3344 }, { "epoch": 0.32, "grad_norm": 0.27826604163520996, "learning_rate": 0.0001934534245192477, "loss": 1.1751, "step": 3345 }, { "epoch": 0.32, "grad_norm": 0.2998027219687268, "learning_rate": 0.00019344779361691203, "loss": 1.1368, "step": 3346 }, { "epoch": 0.32, "grad_norm": 0.28056799541807836, "learning_rate": 0.0001934421603759866, "loss": 1.0334, "step": 3347 }, { "epoch": 0.32, "grad_norm": 0.24723826046732345, "learning_rate": 0.00019343652479661237, "loss": 1.0881, "step": 3348 }, { "epoch": 0.32, "grad_norm": 0.3324987905537479, "learning_rate": 0.00019343088687893037, "loss": 1.0454, "step": 3349 }, { "epoch": 0.32, "grad_norm": 0.2612587061661008, "learning_rate": 0.00019342524662308174, "loss": 1.1285, "step": 3350 }, { "epoch": 0.32, "grad_norm": 0.3175396328178754, "learning_rate": 0.0001934196040292076, "loss": 1.1002, "step": 3351 }, { "epoch": 0.32, "grad_norm": 0.2982419959527945, "learning_rate": 0.00019341395909744914, "loss": 1.1169, "step": 3352 }, { "epoch": 0.32, "grad_norm": 0.2665630466583381, "learning_rate": 0.00019340831182794763, "loss": 1.0946, "step": 3353 }, { "epoch": 0.32, "grad_norm": 0.2747715419393994, "learning_rate": 0.00019340266222084445, "loss": 1.0806, "step": 3354 }, { "epoch": 0.32, "grad_norm": 0.26678051390620816, "learning_rate": 0.00019339701027628093, "loss": 1.0541, "step": 3355 }, { "epoch": 0.32, "grad_norm": 0.2598902398444097, "learning_rate": 0.00019339135599439852, "loss": 1.0699, "step": 3356 }, { "epoch": 0.32, "grad_norm": 0.254933790104563, "learning_rate": 0.00019338569937533872, "loss": 1.174, "step": 3357 }, { "epoch": 0.32, "grad_norm": 0.3128276440616595, "learning_rate": 0.00019338004041924314, "loss": 1.1016, "step": 3358 }, { "epoch": 0.32, "grad_norm": 0.296430797427781, "learning_rate": 0.00019337437912625332, "loss": 1.108, "step": 3359 }, { "epoch": 0.32, "grad_norm": 0.2970936872836418, "learning_rate": 0.00019336871549651102, "loss": 1.0936, "step": 3360 }, { "epoch": 0.32, "grad_norm": 0.30049312775342935, "learning_rate": 0.0001933630495301579, "loss": 1.1077, "step": 3361 }, { "epoch": 0.32, "grad_norm": 0.25869029149150324, "learning_rate": 0.0001933573812273358, "loss": 1.1524, "step": 3362 }, { "epoch": 0.32, "grad_norm": 0.25180878439069226, "learning_rate": 0.00019335171058818657, "loss": 0.9934, "step": 3363 }, { "epoch": 0.32, "grad_norm": 0.2582499973976482, "learning_rate": 0.0001933460376128521, "loss": 1.0993, "step": 3364 }, { "epoch": 0.32, "grad_norm": 0.27195751975656457, "learning_rate": 0.0001933403623014744, "loss": 1.0789, "step": 3365 }, { "epoch": 0.32, "grad_norm": 0.2725473211575861, "learning_rate": 0.00019333468465419545, "loss": 1.1782, "step": 3366 }, { "epoch": 0.32, "grad_norm": 0.2696499000977069, "learning_rate": 0.00019332900467115735, "loss": 1.079, "step": 3367 }, { "epoch": 0.32, "grad_norm": 0.26468748026222183, "learning_rate": 0.00019332332235250227, "loss": 1.0032, "step": 3368 }, { "epoch": 0.32, "grad_norm": 0.27136388855246457, "learning_rate": 0.00019331763769837239, "loss": 1.1859, "step": 3369 }, { "epoch": 0.32, "grad_norm": 0.3147503114706967, "learning_rate": 0.00019331195070890997, "loss": 0.9721, "step": 3370 }, { "epoch": 0.32, "grad_norm": 0.26623718570346866, "learning_rate": 0.00019330626138425733, "loss": 1.1342, "step": 3371 }, { "epoch": 0.32, "grad_norm": 0.238352475784514, "learning_rate": 0.0001933005697245569, "loss": 1.0439, "step": 3372 }, { "epoch": 0.32, "grad_norm": 0.28164020535606377, "learning_rate": 0.00019329487572995104, "loss": 1.0531, "step": 3373 }, { "epoch": 0.32, "grad_norm": 0.2798281524042237, "learning_rate": 0.0001932891794005823, "loss": 1.0947, "step": 3374 }, { "epoch": 0.32, "grad_norm": 0.2593240400068639, "learning_rate": 0.0001932834807365932, "loss": 1.3053, "step": 3375 }, { "epoch": 0.32, "grad_norm": 0.2956714093140169, "learning_rate": 0.0001932777797381264, "loss": 1.1146, "step": 3376 }, { "epoch": 0.32, "grad_norm": 0.24933340729423376, "learning_rate": 0.0001932720764053245, "loss": 1.0169, "step": 3377 }, { "epoch": 0.32, "grad_norm": 0.2746804152121392, "learning_rate": 0.0001932663707383303, "loss": 1.2153, "step": 3378 }, { "epoch": 0.32, "grad_norm": 0.2438039762627365, "learning_rate": 0.00019326066273728652, "loss": 1.1174, "step": 3379 }, { "epoch": 0.32, "grad_norm": 0.2667417249557865, "learning_rate": 0.00019325495240233608, "loss": 1.1442, "step": 3380 }, { "epoch": 0.32, "grad_norm": 0.282105093224216, "learning_rate": 0.0001932492397336218, "loss": 1.0328, "step": 3381 }, { "epoch": 0.32, "grad_norm": 0.2700271902826537, "learning_rate": 0.00019324352473128675, "loss": 1.1098, "step": 3382 }, { "epoch": 0.32, "grad_norm": 0.2810010796311137, "learning_rate": 0.00019323780739547382, "loss": 1.1228, "step": 3383 }, { "epoch": 0.32, "grad_norm": 0.27543586143634324, "learning_rate": 0.00019323208772632623, "loss": 1.0899, "step": 3384 }, { "epoch": 0.32, "grad_norm": 0.28148476530118366, "learning_rate": 0.00019322636572398705, "loss": 1.0954, "step": 3385 }, { "epoch": 0.32, "grad_norm": 0.24691162930671992, "learning_rate": 0.00019322064138859943, "loss": 1.0638, "step": 3386 }, { "epoch": 0.32, "grad_norm": 0.2766771037796505, "learning_rate": 0.0001932149147203067, "loss": 1.0988, "step": 3387 }, { "epoch": 0.32, "grad_norm": 0.27879746565463825, "learning_rate": 0.00019320918571925214, "loss": 1.1285, "step": 3388 }, { "epoch": 0.32, "grad_norm": 0.22878400180565586, "learning_rate": 0.00019320345438557913, "loss": 1.0721, "step": 3389 }, { "epoch": 0.32, "grad_norm": 0.28209510083017475, "learning_rate": 0.0001931977207194311, "loss": 1.0092, "step": 3390 }, { "epoch": 0.32, "grad_norm": 0.2718261063085623, "learning_rate": 0.00019319198472095154, "loss": 1.118, "step": 3391 }, { "epoch": 0.32, "grad_norm": 0.29842791503503757, "learning_rate": 0.00019318624639028397, "loss": 1.0923, "step": 3392 }, { "epoch": 0.32, "grad_norm": 0.24518361819708703, "learning_rate": 0.00019318050572757206, "loss": 1.0292, "step": 3393 }, { "epoch": 0.32, "grad_norm": 0.24792628965630142, "learning_rate": 0.00019317476273295937, "loss": 1.0833, "step": 3394 }, { "epoch": 0.32, "grad_norm": 0.29078999381758364, "learning_rate": 0.00019316901740658974, "loss": 1.1031, "step": 3395 }, { "epoch": 0.32, "grad_norm": 0.2567096843477097, "learning_rate": 0.00019316326974860688, "loss": 1.0013, "step": 3396 }, { "epoch": 0.32, "grad_norm": 0.2554741348697831, "learning_rate": 0.00019315751975915464, "loss": 1.0173, "step": 3397 }, { "epoch": 0.33, "grad_norm": 0.28242380704250564, "learning_rate": 0.00019315176743837692, "loss": 1.1378, "step": 3398 }, { "epoch": 0.33, "grad_norm": 0.31001384373991414, "learning_rate": 0.00019314601278641767, "loss": 1.1999, "step": 3399 }, { "epoch": 0.33, "grad_norm": 0.2833937019206038, "learning_rate": 0.0001931402558034209, "loss": 1.1491, "step": 3400 }, { "epoch": 0.33, "grad_norm": 0.3026457744535161, "learning_rate": 0.00019313449648953075, "loss": 1.2268, "step": 3401 }, { "epoch": 0.33, "grad_norm": 0.26414264616917044, "learning_rate": 0.00019312873484489122, "loss": 1.0955, "step": 3402 }, { "epoch": 0.33, "grad_norm": 0.24532958588409323, "learning_rate": 0.0001931229708696466, "loss": 1.0478, "step": 3403 }, { "epoch": 0.33, "grad_norm": 0.30331695478379483, "learning_rate": 0.00019311720456394115, "loss": 1.0953, "step": 3404 }, { "epoch": 0.33, "grad_norm": 0.28074250597379036, "learning_rate": 0.00019311143592791908, "loss": 1.1166, "step": 3405 }, { "epoch": 0.33, "grad_norm": 0.2409279290445585, "learning_rate": 0.00019310566496172482, "loss": 1.1022, "step": 3406 }, { "epoch": 0.33, "grad_norm": 0.27337622876374895, "learning_rate": 0.00019309989166550276, "loss": 1.0369, "step": 3407 }, { "epoch": 0.33, "grad_norm": 0.27239368467190694, "learning_rate": 0.00019309411603939746, "loss": 1.0825, "step": 3408 }, { "epoch": 0.33, "grad_norm": 0.2510203565258273, "learning_rate": 0.00019308833808355335, "loss": 1.1297, "step": 3409 }, { "epoch": 0.33, "grad_norm": 0.3019608047256465, "learning_rate": 0.0001930825577981151, "loss": 1.0537, "step": 3410 }, { "epoch": 0.33, "grad_norm": 0.27782305027611853, "learning_rate": 0.00019307677518322732, "loss": 1.0568, "step": 3411 }, { "epoch": 0.33, "grad_norm": 0.27292941976306373, "learning_rate": 0.00019307099023903475, "loss": 1.1049, "step": 3412 }, { "epoch": 0.33, "grad_norm": 0.31644950630512886, "learning_rate": 0.00019306520296568213, "loss": 0.9, "step": 3413 }, { "epoch": 0.33, "grad_norm": 0.25546779805105374, "learning_rate": 0.00019305941336331437, "loss": 1.1242, "step": 3414 }, { "epoch": 0.33, "grad_norm": 0.2792316426100012, "learning_rate": 0.00019305362143207629, "loss": 1.0101, "step": 3415 }, { "epoch": 0.33, "grad_norm": 0.28856119813600223, "learning_rate": 0.00019304782717211282, "loss": 1.0683, "step": 3416 }, { "epoch": 0.33, "grad_norm": 0.2538864469655074, "learning_rate": 0.00019304203058356903, "loss": 1.0736, "step": 3417 }, { "epoch": 0.33, "grad_norm": 0.27697511534173397, "learning_rate": 0.00019303623166658994, "loss": 1.0237, "step": 3418 }, { "epoch": 0.33, "grad_norm": 0.2816777125591762, "learning_rate": 0.00019303043042132067, "loss": 0.9735, "step": 3419 }, { "epoch": 0.33, "grad_norm": 0.270336650155053, "learning_rate": 0.00019302462684790643, "loss": 1.0452, "step": 3420 }, { "epoch": 0.33, "grad_norm": 0.3171677773770914, "learning_rate": 0.00019301882094649244, "loss": 1.0691, "step": 3421 }, { "epoch": 0.33, "grad_norm": 0.30382729652830914, "learning_rate": 0.00019301301271722397, "loss": 1.1365, "step": 3422 }, { "epoch": 0.33, "grad_norm": 0.2811965071217192, "learning_rate": 0.00019300720216024642, "loss": 1.0391, "step": 3423 }, { "epoch": 0.33, "grad_norm": 0.2525300639055227, "learning_rate": 0.00019300138927570517, "loss": 1.1168, "step": 3424 }, { "epoch": 0.33, "grad_norm": 0.2566849695783432, "learning_rate": 0.00019299557406374574, "loss": 1.0444, "step": 3425 }, { "epoch": 0.33, "grad_norm": 0.29423746734781697, "learning_rate": 0.00019298975652451357, "loss": 1.1018, "step": 3426 }, { "epoch": 0.33, "grad_norm": 0.27988383125452326, "learning_rate": 0.00019298393665815434, "loss": 1.0843, "step": 3427 }, { "epoch": 0.33, "grad_norm": 0.2914047802879343, "learning_rate": 0.00019297811446481364, "loss": 1.1238, "step": 3428 }, { "epoch": 0.33, "grad_norm": 0.26554271237108157, "learning_rate": 0.0001929722899446372, "loss": 1.0654, "step": 3429 }, { "epoch": 0.33, "grad_norm": 0.2626759072606683, "learning_rate": 0.00019296646309777078, "loss": 1.0299, "step": 3430 }, { "epoch": 0.33, "grad_norm": 0.24310810286273438, "learning_rate": 0.00019296063392436016, "loss": 1.1926, "step": 3431 }, { "epoch": 0.33, "grad_norm": 0.2681389299412346, "learning_rate": 0.0001929548024245513, "loss": 1.0535, "step": 3432 }, { "epoch": 0.33, "grad_norm": 0.2705554100674985, "learning_rate": 0.00019294896859849007, "loss": 1.1006, "step": 3433 }, { "epoch": 0.33, "grad_norm": 0.29956584728704405, "learning_rate": 0.00019294313244632246, "loss": 1.1618, "step": 3434 }, { "epoch": 0.33, "grad_norm": 0.25979972995620976, "learning_rate": 0.00019293729396819455, "loss": 1.1341, "step": 3435 }, { "epoch": 0.33, "grad_norm": 0.31840805453945864, "learning_rate": 0.0001929314531642525, "loss": 1.0505, "step": 3436 }, { "epoch": 0.33, "grad_norm": 0.30601505649557453, "learning_rate": 0.0001929256100346424, "loss": 1.1247, "step": 3437 }, { "epoch": 0.33, "grad_norm": 0.255162069191932, "learning_rate": 0.0001929197645795105, "loss": 1.0914, "step": 3438 }, { "epoch": 0.33, "grad_norm": 0.27220081852340894, "learning_rate": 0.00019291391679900308, "loss": 1.1204, "step": 3439 }, { "epoch": 0.33, "grad_norm": 0.2680103064629152, "learning_rate": 0.00019290806669326651, "loss": 1.1374, "step": 3440 }, { "epoch": 0.33, "grad_norm": 0.26907374179829274, "learning_rate": 0.0001929022142624472, "loss": 1.104, "step": 3441 }, { "epoch": 0.33, "grad_norm": 0.25948281052343475, "learning_rate": 0.00019289635950669158, "loss": 1.1315, "step": 3442 }, { "epoch": 0.33, "grad_norm": 0.25383699970065504, "learning_rate": 0.00019289050242614616, "loss": 1.0563, "step": 3443 }, { "epoch": 0.33, "grad_norm": 0.28630705685350616, "learning_rate": 0.00019288464302095757, "loss": 1.0699, "step": 3444 }, { "epoch": 0.33, "grad_norm": 0.2622608246535195, "learning_rate": 0.00019287878129127238, "loss": 1.0172, "step": 3445 }, { "epoch": 0.33, "grad_norm": 0.27001089784818566, "learning_rate": 0.00019287291723723735, "loss": 0.9808, "step": 3446 }, { "epoch": 0.33, "grad_norm": 0.3040076339208099, "learning_rate": 0.00019286705085899916, "loss": 1.2147, "step": 3447 }, { "epoch": 0.33, "grad_norm": 0.2996187859781569, "learning_rate": 0.00019286118215670471, "loss": 1.1517, "step": 3448 }, { "epoch": 0.33, "grad_norm": 0.33317490146125284, "learning_rate": 0.00019285531113050075, "loss": 1.0603, "step": 3449 }, { "epoch": 0.33, "grad_norm": 0.2900875599565296, "learning_rate": 0.00019284943778053433, "loss": 1.1299, "step": 3450 }, { "epoch": 0.33, "grad_norm": 0.28251347088813633, "learning_rate": 0.00019284356210695234, "loss": 1.1064, "step": 3451 }, { "epoch": 0.33, "grad_norm": 0.28341004906992046, "learning_rate": 0.00019283768410990185, "loss": 1.0892, "step": 3452 }, { "epoch": 0.33, "grad_norm": 0.2883977918976394, "learning_rate": 0.00019283180378953, "loss": 1.1733, "step": 3453 }, { "epoch": 0.33, "grad_norm": 0.260390677587717, "learning_rate": 0.0001928259211459839, "loss": 0.9542, "step": 3454 }, { "epoch": 0.33, "grad_norm": 0.29136611867774026, "learning_rate": 0.0001928200361794108, "loss": 1.0044, "step": 3455 }, { "epoch": 0.33, "grad_norm": 0.26446944983636694, "learning_rate": 0.00019281414888995795, "loss": 1.083, "step": 3456 }, { "epoch": 0.33, "grad_norm": 0.2483864576667101, "learning_rate": 0.0001928082592777727, "loss": 1.0629, "step": 3457 }, { "epoch": 0.33, "grad_norm": 0.24787693382866996, "learning_rate": 0.00019280236734300243, "loss": 1.0367, "step": 3458 }, { "epoch": 0.33, "grad_norm": 0.3038463165988747, "learning_rate": 0.00019279647308579457, "loss": 1.0523, "step": 3459 }, { "epoch": 0.33, "grad_norm": 0.278602231033513, "learning_rate": 0.00019279057650629667, "loss": 1.168, "step": 3460 }, { "epoch": 0.33, "grad_norm": 0.2488098065303038, "learning_rate": 0.0001927846776046563, "loss": 1.0209, "step": 3461 }, { "epoch": 0.33, "grad_norm": 0.2758374803041767, "learning_rate": 0.00019277877638102103, "loss": 1.2443, "step": 3462 }, { "epoch": 0.33, "grad_norm": 0.26309237485251713, "learning_rate": 0.00019277287283553856, "loss": 0.9914, "step": 3463 }, { "epoch": 0.33, "grad_norm": 0.23795915412482324, "learning_rate": 0.00019276696696835668, "loss": 1.1086, "step": 3464 }, { "epoch": 0.33, "grad_norm": 0.3224730190795291, "learning_rate": 0.0001927610587796231, "loss": 1.2036, "step": 3465 }, { "epoch": 0.33, "grad_norm": 0.282804393013691, "learning_rate": 0.00019275514826948577, "loss": 1.137, "step": 3466 }, { "epoch": 0.33, "grad_norm": 0.26066639065374003, "learning_rate": 0.00019274923543809253, "loss": 1.127, "step": 3467 }, { "epoch": 0.33, "grad_norm": 0.24788428251360412, "learning_rate": 0.00019274332028559142, "loss": 1.0519, "step": 3468 }, { "epoch": 0.33, "grad_norm": 0.290526276121983, "learning_rate": 0.0001927374028121304, "loss": 1.1116, "step": 3469 }, { "epoch": 0.33, "grad_norm": 0.2673170033387275, "learning_rate": 0.00019273148301785759, "loss": 1.1649, "step": 3470 }, { "epoch": 0.33, "grad_norm": 0.264616160691623, "learning_rate": 0.00019272556090292115, "loss": 1.1035, "step": 3471 }, { "epoch": 0.33, "grad_norm": 0.22386779170530668, "learning_rate": 0.00019271963646746927, "loss": 1.0443, "step": 3472 }, { "epoch": 0.33, "grad_norm": 0.29588656285029913, "learning_rate": 0.00019271370971165022, "loss": 1.1571, "step": 3473 }, { "epoch": 0.33, "grad_norm": 0.26869130366990424, "learning_rate": 0.00019270778063561233, "loss": 0.8483, "step": 3474 }, { "epoch": 0.33, "grad_norm": 0.2442917412236581, "learning_rate": 0.00019270184923950395, "loss": 1.1119, "step": 3475 }, { "epoch": 0.33, "grad_norm": 0.27434823182121076, "learning_rate": 0.00019269591552347352, "loss": 1.0498, "step": 3476 }, { "epoch": 0.33, "grad_norm": 0.26614974020743387, "learning_rate": 0.00019268997948766956, "loss": 1.0791, "step": 3477 }, { "epoch": 0.33, "grad_norm": 0.2959411757891701, "learning_rate": 0.00019268404113224059, "loss": 1.1704, "step": 3478 }, { "epoch": 0.33, "grad_norm": 0.26576214735143877, "learning_rate": 0.00019267810045733527, "loss": 1.0451, "step": 3479 }, { "epoch": 0.33, "grad_norm": 0.26797549883523514, "learning_rate": 0.00019267215746310222, "loss": 1.1066, "step": 3480 }, { "epoch": 0.33, "grad_norm": 0.26415504530857153, "learning_rate": 0.0001926662121496902, "loss": 0.9906, "step": 3481 }, { "epoch": 0.33, "grad_norm": 0.2811191242637275, "learning_rate": 0.000192660264517248, "loss": 1.2062, "step": 3482 }, { "epoch": 0.33, "grad_norm": 0.26967275386109973, "learning_rate": 0.0001926543145659244, "loss": 1.1001, "step": 3483 }, { "epoch": 0.33, "grad_norm": 0.24658725442432267, "learning_rate": 0.00019264836229586837, "loss": 1.0698, "step": 3484 }, { "epoch": 0.33, "grad_norm": 0.2716734887510684, "learning_rate": 0.00019264240770722885, "loss": 1.0129, "step": 3485 }, { "epoch": 0.33, "grad_norm": 0.24634587706914243, "learning_rate": 0.00019263645080015485, "loss": 0.9294, "step": 3486 }, { "epoch": 0.33, "grad_norm": 0.26665979653082733, "learning_rate": 0.00019263049157479544, "loss": 1.0361, "step": 3487 }, { "epoch": 0.33, "grad_norm": 0.262098888665953, "learning_rate": 0.0001926245300312998, "loss": 1.0617, "step": 3488 }, { "epoch": 0.33, "grad_norm": 0.27380325184122906, "learning_rate": 0.00019261856616981703, "loss": 1.0235, "step": 3489 }, { "epoch": 0.33, "grad_norm": 0.25372809019534937, "learning_rate": 0.00019261259999049646, "loss": 1.0733, "step": 3490 }, { "epoch": 0.33, "grad_norm": 0.2703271495420422, "learning_rate": 0.00019260663149348736, "loss": 1.1621, "step": 3491 }, { "epoch": 0.33, "grad_norm": 0.285631485877341, "learning_rate": 0.00019260066067893915, "loss": 1.119, "step": 3492 }, { "epoch": 0.33, "grad_norm": 0.28067090620266755, "learning_rate": 0.00019259468754700114, "loss": 1.1283, "step": 3493 }, { "epoch": 0.33, "grad_norm": 0.25704313202950074, "learning_rate": 0.00019258871209782292, "loss": 1.1286, "step": 3494 }, { "epoch": 0.33, "grad_norm": 0.29208835332994176, "learning_rate": 0.00019258273433155399, "loss": 1.1172, "step": 3495 }, { "epoch": 0.33, "grad_norm": 0.26926728161587604, "learning_rate": 0.00019257675424834395, "loss": 1.0684, "step": 3496 }, { "epoch": 0.33, "grad_norm": 0.2732931154751818, "learning_rate": 0.00019257077184834244, "loss": 1.1041, "step": 3497 }, { "epoch": 0.33, "grad_norm": 0.26420765126059936, "learning_rate": 0.00019256478713169917, "loss": 1.0886, "step": 3498 }, { "epoch": 0.33, "grad_norm": 0.26082579867661154, "learning_rate": 0.00019255880009856396, "loss": 1.0275, "step": 3499 }, { "epoch": 0.33, "grad_norm": 0.28137431081329883, "learning_rate": 0.0001925528107490866, "loss": 1.0764, "step": 3500 }, { "epoch": 0.33, "grad_norm": 0.26907207440559944, "learning_rate": 0.00019254681908341696, "loss": 1.0703, "step": 3501 }, { "epoch": 0.34, "grad_norm": 0.31906228924539476, "learning_rate": 0.00019254082510170503, "loss": 1.0448, "step": 3502 }, { "epoch": 0.34, "grad_norm": 0.27170761831755913, "learning_rate": 0.0001925348288041008, "loss": 1.1028, "step": 3503 }, { "epoch": 0.34, "grad_norm": 0.2855022956911831, "learning_rate": 0.00019252883019075433, "loss": 1.0736, "step": 3504 }, { "epoch": 0.34, "grad_norm": 0.2857537178954334, "learning_rate": 0.0001925228292618157, "loss": 1.0172, "step": 3505 }, { "epoch": 0.34, "grad_norm": 0.26288096894393326, "learning_rate": 0.0001925168260174351, "loss": 1.0262, "step": 3506 }, { "epoch": 0.34, "grad_norm": 0.2682150517518947, "learning_rate": 0.00019251082045776283, "loss": 1.0227, "step": 3507 }, { "epoch": 0.34, "grad_norm": 0.28175497164571117, "learning_rate": 0.00019250481258294911, "loss": 1.0507, "step": 3508 }, { "epoch": 0.34, "grad_norm": 0.28227600468816066, "learning_rate": 0.00019249880239314435, "loss": 1.1972, "step": 3509 }, { "epoch": 0.34, "grad_norm": 0.2933560845393136, "learning_rate": 0.0001924927898884989, "loss": 1.0141, "step": 3510 }, { "epoch": 0.34, "grad_norm": 0.2659979972720397, "learning_rate": 0.0001924867750691633, "loss": 1.1605, "step": 3511 }, { "epoch": 0.34, "grad_norm": 0.28458190404309464, "learning_rate": 0.00019248075793528794, "loss": 1.1147, "step": 3512 }, { "epoch": 0.34, "grad_norm": 0.30482251375794783, "learning_rate": 0.00019247473848702358, "loss": 1.2108, "step": 3513 }, { "epoch": 0.34, "grad_norm": 0.30589628132765245, "learning_rate": 0.00019246871672452072, "loss": 1.1377, "step": 3514 }, { "epoch": 0.34, "grad_norm": 0.2515859551958094, "learning_rate": 0.00019246269264793013, "loss": 1.0792, "step": 3515 }, { "epoch": 0.34, "grad_norm": 0.2822974779571469, "learning_rate": 0.00019245666625740252, "loss": 1.1263, "step": 3516 }, { "epoch": 0.34, "grad_norm": 0.26320433675688415, "learning_rate": 0.00019245063755308873, "loss": 1.0395, "step": 3517 }, { "epoch": 0.34, "grad_norm": 0.27747602120555126, "learning_rate": 0.00019244460653513966, "loss": 1.0819, "step": 3518 }, { "epoch": 0.34, "grad_norm": 0.27754855559046226, "learning_rate": 0.00019243857320370622, "loss": 1.1293, "step": 3519 }, { "epoch": 0.34, "grad_norm": 0.2718497648773705, "learning_rate": 0.00019243253755893934, "loss": 1.0904, "step": 3520 }, { "epoch": 0.34, "grad_norm": 0.24497317555871573, "learning_rate": 0.00019242649960099018, "loss": 1.0817, "step": 3521 }, { "epoch": 0.34, "grad_norm": 0.2832235216362736, "learning_rate": 0.00019242045933000974, "loss": 1.1896, "step": 3522 }, { "epoch": 0.34, "grad_norm": 0.2804650893498078, "learning_rate": 0.00019241441674614925, "loss": 1.0998, "step": 3523 }, { "epoch": 0.34, "grad_norm": 0.2746480898368063, "learning_rate": 0.00019240837184955986, "loss": 1.1329, "step": 3524 }, { "epoch": 0.34, "grad_norm": 0.27811869745054746, "learning_rate": 0.0001924023246403929, "loss": 1.2432, "step": 3525 }, { "epoch": 0.34, "grad_norm": 0.2438034625107248, "learning_rate": 0.0001923962751187997, "loss": 1.0454, "step": 3526 }, { "epoch": 0.34, "grad_norm": 0.29872712215291863, "learning_rate": 0.00019239022328493166, "loss": 1.0986, "step": 3527 }, { "epoch": 0.34, "grad_norm": 0.28352993745178234, "learning_rate": 0.00019238416913894022, "loss": 1.0937, "step": 3528 }, { "epoch": 0.34, "grad_norm": 0.2573940016691036, "learning_rate": 0.00019237811268097685, "loss": 1.061, "step": 3529 }, { "epoch": 0.34, "grad_norm": 0.26547291167111237, "learning_rate": 0.00019237205391119317, "loss": 1.1062, "step": 3530 }, { "epoch": 0.34, "grad_norm": 0.2434569652472095, "learning_rate": 0.0001923659928297408, "loss": 0.994, "step": 3531 }, { "epoch": 0.34, "grad_norm": 0.24604489384038644, "learning_rate": 0.00019235992943677138, "loss": 1.0407, "step": 3532 }, { "epoch": 0.34, "grad_norm": 0.2566897641461854, "learning_rate": 0.0001923538637324367, "loss": 1.0814, "step": 3533 }, { "epoch": 0.34, "grad_norm": 0.2833118574755522, "learning_rate": 0.00019234779571688856, "loss": 1.0133, "step": 3534 }, { "epoch": 0.34, "grad_norm": 0.2641589772291687, "learning_rate": 0.00019234172539027875, "loss": 1.0779, "step": 3535 }, { "epoch": 0.34, "grad_norm": 0.26348102788720923, "learning_rate": 0.00019233565275275926, "loss": 1.0349, "step": 3536 }, { "epoch": 0.34, "grad_norm": 0.26442584979280237, "learning_rate": 0.00019232957780448203, "loss": 1.0908, "step": 3537 }, { "epoch": 0.34, "grad_norm": 0.21497138036257077, "learning_rate": 0.00019232350054559908, "loss": 1.0206, "step": 3538 }, { "epoch": 0.34, "grad_norm": 0.28584115929317455, "learning_rate": 0.00019231742097626248, "loss": 1.0948, "step": 3539 }, { "epoch": 0.34, "grad_norm": 0.28169323923794704, "learning_rate": 0.00019231133909662442, "loss": 1.1113, "step": 3540 }, { "epoch": 0.34, "grad_norm": 0.27099757848452277, "learning_rate": 0.0001923052549068371, "loss": 1.1069, "step": 3541 }, { "epoch": 0.34, "grad_norm": 0.2576793427814868, "learning_rate": 0.00019229916840705276, "loss": 0.9917, "step": 3542 }, { "epoch": 0.34, "grad_norm": 0.261636637446303, "learning_rate": 0.0001922930795974237, "loss": 1.0584, "step": 3543 }, { "epoch": 0.34, "grad_norm": 0.27616031671725766, "learning_rate": 0.0001922869884781023, "loss": 1.0674, "step": 3544 }, { "epoch": 0.34, "grad_norm": 0.2603326493271169, "learning_rate": 0.0001922808950492411, "loss": 0.9925, "step": 3545 }, { "epoch": 0.34, "grad_norm": 0.2611098162609211, "learning_rate": 0.00019227479931099243, "loss": 1.1144, "step": 3546 }, { "epoch": 0.34, "grad_norm": 0.28213224868364634, "learning_rate": 0.00019226870126350893, "loss": 0.9999, "step": 3547 }, { "epoch": 0.34, "grad_norm": 0.26909330897869277, "learning_rate": 0.00019226260090694322, "loss": 1.1827, "step": 3548 }, { "epoch": 0.34, "grad_norm": 0.2204883513538407, "learning_rate": 0.00019225649824144788, "loss": 1.0633, "step": 3549 }, { "epoch": 0.34, "grad_norm": 0.2800738248678148, "learning_rate": 0.00019225039326717575, "loss": 1.0618, "step": 3550 }, { "epoch": 0.34, "grad_norm": 0.3026024017055852, "learning_rate": 0.0001922442859842795, "loss": 1.0372, "step": 3551 }, { "epoch": 0.34, "grad_norm": 0.25052538631469556, "learning_rate": 0.00019223817639291206, "loss": 1.1017, "step": 3552 }, { "epoch": 0.34, "grad_norm": 0.2534940085440102, "learning_rate": 0.00019223206449322627, "loss": 1.063, "step": 3553 }, { "epoch": 0.34, "grad_norm": 0.2829769188702781, "learning_rate": 0.0001922259502853751, "loss": 1.0918, "step": 3554 }, { "epoch": 0.34, "grad_norm": 0.2515647647546932, "learning_rate": 0.0001922198337695116, "loss": 1.1084, "step": 3555 }, { "epoch": 0.34, "grad_norm": 0.2716176574803204, "learning_rate": 0.00019221371494578874, "loss": 1.1048, "step": 3556 }, { "epoch": 0.34, "grad_norm": 0.2544586203933426, "learning_rate": 0.00019220759381435976, "loss": 0.9334, "step": 3557 }, { "epoch": 0.34, "grad_norm": 0.25686564637963666, "learning_rate": 0.00019220147037537775, "loss": 1.1342, "step": 3558 }, { "epoch": 0.34, "grad_norm": 0.2856418475083659, "learning_rate": 0.00019219534462899603, "loss": 1.0993, "step": 3559 }, { "epoch": 0.34, "grad_norm": 0.25892037090159264, "learning_rate": 0.00019218921657536785, "loss": 1.1242, "step": 3560 }, { "epoch": 0.34, "grad_norm": 0.22637468596243374, "learning_rate": 0.00019218308621464657, "loss": 0.9695, "step": 3561 }, { "epoch": 0.34, "grad_norm": 0.24205716240527825, "learning_rate": 0.00019217695354698566, "loss": 0.9977, "step": 3562 }, { "epoch": 0.34, "grad_norm": 0.28419771649107817, "learning_rate": 0.00019217081857253855, "loss": 1.1305, "step": 3563 }, { "epoch": 0.34, "grad_norm": 0.2997025561180733, "learning_rate": 0.00019216468129145878, "loss": 1.1392, "step": 3564 }, { "epoch": 0.34, "grad_norm": 0.2575883782828923, "learning_rate": 0.00019215854170389992, "loss": 1.1146, "step": 3565 }, { "epoch": 0.34, "grad_norm": 0.2378156355843756, "learning_rate": 0.00019215239981001565, "loss": 1.1623, "step": 3566 }, { "epoch": 0.34, "grad_norm": 0.22766346459594225, "learning_rate": 0.00019214625560995963, "loss": 1.1813, "step": 3567 }, { "epoch": 0.34, "grad_norm": 0.2815971957249664, "learning_rate": 0.0001921401091038857, "loss": 1.0946, "step": 3568 }, { "epoch": 0.34, "grad_norm": 0.24854801853444775, "learning_rate": 0.0001921339602919476, "loss": 1.1996, "step": 3569 }, { "epoch": 0.34, "grad_norm": 0.28777374446942766, "learning_rate": 0.00019212780917429923, "loss": 1.2163, "step": 3570 }, { "epoch": 0.34, "grad_norm": 0.2755310826907456, "learning_rate": 0.00019212165575109452, "loss": 1.2163, "step": 3571 }, { "epoch": 0.34, "grad_norm": 0.23202223527427898, "learning_rate": 0.00019211550002248755, "loss": 1.0259, "step": 3572 }, { "epoch": 0.34, "grad_norm": 0.25126040011653317, "learning_rate": 0.00019210934198863225, "loss": 1.1719, "step": 3573 }, { "epoch": 0.34, "grad_norm": 0.31182851203253326, "learning_rate": 0.00019210318164968276, "loss": 0.9726, "step": 3574 }, { "epoch": 0.34, "grad_norm": 0.30037163533010824, "learning_rate": 0.00019209701900579332, "loss": 1.1423, "step": 3575 }, { "epoch": 0.34, "grad_norm": 0.2769715350587386, "learning_rate": 0.00019209085405711806, "loss": 1.023, "step": 3576 }, { "epoch": 0.34, "grad_norm": 0.2737139517166542, "learning_rate": 0.0001920846868038113, "loss": 1.1156, "step": 3577 }, { "epoch": 0.34, "grad_norm": 0.27215926972723625, "learning_rate": 0.00019207851724602738, "loss": 1.2292, "step": 3578 }, { "epoch": 0.34, "grad_norm": 0.25589677103292885, "learning_rate": 0.0001920723453839207, "loss": 1.066, "step": 3579 }, { "epoch": 0.34, "grad_norm": 0.27063989588024306, "learning_rate": 0.00019206617121764573, "loss": 1.1828, "step": 3580 }, { "epoch": 0.34, "grad_norm": 0.2621987437844738, "learning_rate": 0.00019205999474735695, "loss": 1.113, "step": 3581 }, { "epoch": 0.34, "grad_norm": 0.27664343721303936, "learning_rate": 0.00019205381597320895, "loss": 1.0935, "step": 3582 }, { "epoch": 0.34, "grad_norm": 0.2521436526457091, "learning_rate": 0.00019204763489535633, "loss": 1.0474, "step": 3583 }, { "epoch": 0.34, "grad_norm": 0.29893749568707256, "learning_rate": 0.00019204145151395383, "loss": 1.0565, "step": 3584 }, { "epoch": 0.34, "grad_norm": 0.2865188185320406, "learning_rate": 0.00019203526582915615, "loss": 1.0099, "step": 3585 }, { "epoch": 0.34, "grad_norm": 0.30840096394075645, "learning_rate": 0.0001920290778411181, "loss": 1.0744, "step": 3586 }, { "epoch": 0.34, "grad_norm": 0.27719489340614456, "learning_rate": 0.00019202288754999454, "loss": 1.1818, "step": 3587 }, { "epoch": 0.34, "grad_norm": 0.26038266306240404, "learning_rate": 0.00019201669495594036, "loss": 1.1681, "step": 3588 }, { "epoch": 0.34, "grad_norm": 0.2535864656959433, "learning_rate": 0.00019201050005911057, "loss": 1.0594, "step": 3589 }, { "epoch": 0.34, "grad_norm": 0.26996214011774267, "learning_rate": 0.0001920043028596602, "loss": 1.0319, "step": 3590 }, { "epoch": 0.34, "grad_norm": 0.293644543486573, "learning_rate": 0.00019199810335774432, "loss": 1.0956, "step": 3591 }, { "epoch": 0.34, "grad_norm": 0.24802640655532923, "learning_rate": 0.0001919919015535181, "loss": 1.028, "step": 3592 }, { "epoch": 0.34, "grad_norm": 0.288494054514753, "learning_rate": 0.0001919856974471367, "loss": 1.2163, "step": 3593 }, { "epoch": 0.34, "grad_norm": 0.28835626422546046, "learning_rate": 0.00019197949103875542, "loss": 1.1595, "step": 3594 }, { "epoch": 0.34, "grad_norm": 0.2834452761231032, "learning_rate": 0.00019197328232852957, "loss": 1.2283, "step": 3595 }, { "epoch": 0.34, "grad_norm": 0.2712391659872608, "learning_rate": 0.00019196707131661456, "loss": 0.9865, "step": 3596 }, { "epoch": 0.34, "grad_norm": 0.28960322854085857, "learning_rate": 0.00019196085800316577, "loss": 1.0389, "step": 3597 }, { "epoch": 0.34, "grad_norm": 0.26819478685827985, "learning_rate": 0.00019195464238833872, "loss": 1.0676, "step": 3598 }, { "epoch": 0.34, "grad_norm": 0.27909258767693773, "learning_rate": 0.00019194842447228894, "loss": 1.1414, "step": 3599 }, { "epoch": 0.34, "grad_norm": 0.29782335526237375, "learning_rate": 0.00019194220425517203, "loss": 1.0944, "step": 3600 }, { "epoch": 0.34, "grad_norm": 0.2990681773116163, "learning_rate": 0.00019193598173714368, "loss": 1.2351, "step": 3601 }, { "epoch": 0.34, "grad_norm": 0.25309601233066126, "learning_rate": 0.00019192975691835967, "loss": 0.9833, "step": 3602 }, { "epoch": 0.34, "grad_norm": 0.2530498249236964, "learning_rate": 0.00019192352979897564, "loss": 1.1116, "step": 3603 }, { "epoch": 0.34, "grad_norm": 0.261372825515641, "learning_rate": 0.00019191730037914755, "loss": 1.1451, "step": 3604 }, { "epoch": 0.34, "grad_norm": 0.2224404725086543, "learning_rate": 0.00019191106865903125, "loss": 1.1346, "step": 3605 }, { "epoch": 0.34, "grad_norm": 0.24745135431170612, "learning_rate": 0.00019190483463878266, "loss": 1.0474, "step": 3606 }, { "epoch": 0.35, "grad_norm": 0.2680064496452726, "learning_rate": 0.00019189859831855786, "loss": 1.0726, "step": 3607 }, { "epoch": 0.35, "grad_norm": 0.2728343814388248, "learning_rate": 0.00019189235969851285, "loss": 1.143, "step": 3608 }, { "epoch": 0.35, "grad_norm": 0.2758236497986754, "learning_rate": 0.0001918861187788038, "loss": 1.1073, "step": 3609 }, { "epoch": 0.35, "grad_norm": 0.24891564132409086, "learning_rate": 0.00019187987555958688, "loss": 0.9501, "step": 3610 }, { "epoch": 0.35, "grad_norm": 0.28175698358764867, "learning_rate": 0.00019187363004101834, "loss": 1.119, "step": 3611 }, { "epoch": 0.35, "grad_norm": 0.29465454747771924, "learning_rate": 0.00019186738222325446, "loss": 1.1071, "step": 3612 }, { "epoch": 0.35, "grad_norm": 0.28563894659906613, "learning_rate": 0.00019186113210645158, "loss": 0.9567, "step": 3613 }, { "epoch": 0.35, "grad_norm": 0.2665261670344539, "learning_rate": 0.00019185487969076618, "loss": 1.0447, "step": 3614 }, { "epoch": 0.35, "grad_norm": 0.29057112065403007, "learning_rate": 0.00019184862497635466, "loss": 1.094, "step": 3615 }, { "epoch": 0.35, "grad_norm": 0.24917921323756612, "learning_rate": 0.0001918423679633736, "loss": 1.0332, "step": 3616 }, { "epoch": 0.35, "grad_norm": 0.25724912043463755, "learning_rate": 0.0001918361086519795, "loss": 1.1526, "step": 3617 }, { "epoch": 0.35, "grad_norm": 0.30458714457974434, "learning_rate": 0.00019182984704232912, "loss": 1.1059, "step": 3618 }, { "epoch": 0.35, "grad_norm": 0.233003803525938, "learning_rate": 0.00019182358313457907, "loss": 1.0059, "step": 3619 }, { "epoch": 0.35, "grad_norm": 0.3095013736710616, "learning_rate": 0.00019181731692888615, "loss": 1.2226, "step": 3620 }, { "epoch": 0.35, "grad_norm": 0.3112580093485402, "learning_rate": 0.0001918110484254072, "loss": 1.0111, "step": 3621 }, { "epoch": 0.35, "grad_norm": 0.2615583637323915, "learning_rate": 0.00019180477762429905, "loss": 1.0417, "step": 3622 }, { "epoch": 0.35, "grad_norm": 0.25331226852742855, "learning_rate": 0.00019179850452571864, "loss": 1.1169, "step": 3623 }, { "epoch": 0.35, "grad_norm": 0.2653623351810319, "learning_rate": 0.00019179222912982295, "loss": 1.0001, "step": 3624 }, { "epoch": 0.35, "grad_norm": 0.3027616299847975, "learning_rate": 0.00019178595143676903, "loss": 1.0122, "step": 3625 }, { "epoch": 0.35, "grad_norm": 0.2596463436944122, "learning_rate": 0.00019177967144671403, "loss": 1.0603, "step": 3626 }, { "epoch": 0.35, "grad_norm": 0.26322578302738187, "learning_rate": 0.00019177338915981503, "loss": 1.1179, "step": 3627 }, { "epoch": 0.35, "grad_norm": 0.26304450867502877, "learning_rate": 0.00019176710457622932, "loss": 1.1117, "step": 3628 }, { "epoch": 0.35, "grad_norm": 0.26025672008441436, "learning_rate": 0.00019176081769611413, "loss": 1.0988, "step": 3629 }, { "epoch": 0.35, "grad_norm": 0.264676786106681, "learning_rate": 0.00019175452851962678, "loss": 1.1449, "step": 3630 }, { "epoch": 0.35, "grad_norm": 0.28753505819216246, "learning_rate": 0.00019174823704692473, "loss": 0.9785, "step": 3631 }, { "epoch": 0.35, "grad_norm": 0.27286886659302656, "learning_rate": 0.00019174194327816534, "loss": 1.0712, "step": 3632 }, { "epoch": 0.35, "grad_norm": 0.2851788136509597, "learning_rate": 0.0001917356472135062, "loss": 1.1066, "step": 3633 }, { "epoch": 0.35, "grad_norm": 0.2596390703916512, "learning_rate": 0.00019172934885310484, "loss": 1.03, "step": 3634 }, { "epoch": 0.35, "grad_norm": 0.27793648005875066, "learning_rate": 0.00019172304819711886, "loss": 1.0643, "step": 3635 }, { "epoch": 0.35, "grad_norm": 0.2785181884043944, "learning_rate": 0.00019171674524570595, "loss": 1.1464, "step": 3636 }, { "epoch": 0.35, "grad_norm": 0.30776612944819687, "learning_rate": 0.00019171043999902386, "loss": 1.0927, "step": 3637 }, { "epoch": 0.35, "grad_norm": 0.2477006043242119, "learning_rate": 0.00019170413245723036, "loss": 1.0933, "step": 3638 }, { "epoch": 0.35, "grad_norm": 0.28543848973925623, "learning_rate": 0.00019169782262048332, "loss": 1.0738, "step": 3639 }, { "epoch": 0.35, "grad_norm": 0.23369616317344152, "learning_rate": 0.00019169151048894066, "loss": 1.0539, "step": 3640 }, { "epoch": 0.35, "grad_norm": 0.26607663383639485, "learning_rate": 0.00019168519606276027, "loss": 1.0068, "step": 3641 }, { "epoch": 0.35, "grad_norm": 0.27489969296500305, "learning_rate": 0.00019167887934210028, "loss": 1.1252, "step": 3642 }, { "epoch": 0.35, "grad_norm": 0.26853165269525164, "learning_rate": 0.00019167256032711868, "loss": 1.1099, "step": 3643 }, { "epoch": 0.35, "grad_norm": 0.26605252525945594, "learning_rate": 0.00019166623901797366, "loss": 1.063, "step": 3644 }, { "epoch": 0.35, "grad_norm": 0.2857813746118553, "learning_rate": 0.0001916599154148234, "loss": 1.1668, "step": 3645 }, { "epoch": 0.35, "grad_norm": 0.3004731926385603, "learning_rate": 0.00019165358951782612, "loss": 1.1223, "step": 3646 }, { "epoch": 0.35, "grad_norm": 0.30149091959544416, "learning_rate": 0.0001916472613271402, "loss": 1.0124, "step": 3647 }, { "epoch": 0.35, "grad_norm": 0.24733679580665724, "learning_rate": 0.00019164093084292393, "loss": 1.1462, "step": 3648 }, { "epoch": 0.35, "grad_norm": 0.21990278220565118, "learning_rate": 0.00019163459806533582, "loss": 1.0147, "step": 3649 }, { "epoch": 0.35, "grad_norm": 0.26246347077998894, "learning_rate": 0.00019162826299453427, "loss": 1.0042, "step": 3650 }, { "epoch": 0.35, "grad_norm": 0.2552863623874917, "learning_rate": 0.00019162192563067785, "loss": 1.0509, "step": 3651 }, { "epoch": 0.35, "grad_norm": 0.26792837644378786, "learning_rate": 0.00019161558597392516, "loss": 1.0793, "step": 3652 }, { "epoch": 0.35, "grad_norm": 0.26602863063633964, "learning_rate": 0.00019160924402443488, "loss": 1.0786, "step": 3653 }, { "epoch": 0.35, "grad_norm": 0.2748453626108522, "learning_rate": 0.00019160289978236567, "loss": 1.1711, "step": 3654 }, { "epoch": 0.35, "grad_norm": 0.2892103795799046, "learning_rate": 0.00019159655324787634, "loss": 1.0855, "step": 3655 }, { "epoch": 0.35, "grad_norm": 0.29356881676510077, "learning_rate": 0.00019159020442112567, "loss": 1.0765, "step": 3656 }, { "epoch": 0.35, "grad_norm": 0.25797959221770866, "learning_rate": 0.0001915838533022726, "loss": 1.0762, "step": 3657 }, { "epoch": 0.35, "grad_norm": 0.24086139476910498, "learning_rate": 0.00019157749989147602, "loss": 1.0265, "step": 3658 }, { "epoch": 0.35, "grad_norm": 0.29161814627285754, "learning_rate": 0.00019157114418889498, "loss": 1.0909, "step": 3659 }, { "epoch": 0.35, "grad_norm": 0.2899831061438026, "learning_rate": 0.0001915647861946885, "loss": 1.0793, "step": 3660 }, { "epoch": 0.35, "grad_norm": 0.2444771404216185, "learning_rate": 0.00019155842590901564, "loss": 1.0686, "step": 3661 }, { "epoch": 0.35, "grad_norm": 0.2757863183788381, "learning_rate": 0.0001915520633320357, "loss": 1.1698, "step": 3662 }, { "epoch": 0.35, "grad_norm": 0.2850756152744979, "learning_rate": 0.00019154569846390783, "loss": 1.2098, "step": 3663 }, { "epoch": 0.35, "grad_norm": 0.2229292059572456, "learning_rate": 0.00019153933130479128, "loss": 1.1241, "step": 3664 }, { "epoch": 0.35, "grad_norm": 0.2763336437474556, "learning_rate": 0.00019153296185484545, "loss": 1.0689, "step": 3665 }, { "epoch": 0.35, "grad_norm": 0.26355818754319077, "learning_rate": 0.00019152659011422975, "loss": 1.1286, "step": 3666 }, { "epoch": 0.35, "grad_norm": 0.2834332018069187, "learning_rate": 0.0001915202160831036, "loss": 1.1524, "step": 3667 }, { "epoch": 0.35, "grad_norm": 0.2829528091832065, "learning_rate": 0.0001915138397616265, "loss": 1.1356, "step": 3668 }, { "epoch": 0.35, "grad_norm": 0.2549124521154805, "learning_rate": 0.00019150746114995809, "loss": 1.0091, "step": 3669 }, { "epoch": 0.35, "grad_norm": 0.2787455502134115, "learning_rate": 0.00019150108024825792, "loss": 1.1973, "step": 3670 }, { "epoch": 0.35, "grad_norm": 0.3045237462568005, "learning_rate": 0.00019149469705668574, "loss": 1.0021, "step": 3671 }, { "epoch": 0.35, "grad_norm": 0.2563873686787024, "learning_rate": 0.00019148831157540122, "loss": 1.0652, "step": 3672 }, { "epoch": 0.35, "grad_norm": 0.26639470855690445, "learning_rate": 0.0001914819238045642, "loss": 1.0402, "step": 3673 }, { "epoch": 0.35, "grad_norm": 0.24615772856913087, "learning_rate": 0.00019147553374433457, "loss": 1.1036, "step": 3674 }, { "epoch": 0.35, "grad_norm": 0.26050952114504417, "learning_rate": 0.00019146914139487223, "loss": 1.082, "step": 3675 }, { "epoch": 0.35, "grad_norm": 0.2711202214111707, "learning_rate": 0.00019146274675633712, "loss": 1.022, "step": 3676 }, { "epoch": 0.35, "grad_norm": 0.27642386341556874, "learning_rate": 0.0001914563498288893, "loss": 1.1196, "step": 3677 }, { "epoch": 0.35, "grad_norm": 0.26017602194644773, "learning_rate": 0.00019144995061268886, "loss": 1.0743, "step": 3678 }, { "epoch": 0.35, "grad_norm": 0.2802611591626233, "learning_rate": 0.00019144354910789592, "loss": 1.1542, "step": 3679 }, { "epoch": 0.35, "grad_norm": 0.30109865674446373, "learning_rate": 0.00019143714531467067, "loss": 1.0761, "step": 3680 }, { "epoch": 0.35, "grad_norm": 0.29232637101985814, "learning_rate": 0.00019143073923317343, "loss": 1.1051, "step": 3681 }, { "epoch": 0.35, "grad_norm": 0.26726588819259994, "learning_rate": 0.00019142433086356445, "loss": 1.1052, "step": 3682 }, { "epoch": 0.35, "grad_norm": 0.3170937940560179, "learning_rate": 0.00019141792020600416, "loss": 1.0991, "step": 3683 }, { "epoch": 0.35, "grad_norm": 0.26117278170269886, "learning_rate": 0.00019141150726065292, "loss": 1.1537, "step": 3684 }, { "epoch": 0.35, "grad_norm": 0.23933035001939557, "learning_rate": 0.00019140509202767128, "loss": 1.0034, "step": 3685 }, { "epoch": 0.35, "grad_norm": 0.261470828936781, "learning_rate": 0.00019139867450721978, "loss": 1.0356, "step": 3686 }, { "epoch": 0.35, "grad_norm": 0.28669585869759606, "learning_rate": 0.000191392254699459, "loss": 1.1138, "step": 3687 }, { "epoch": 0.35, "grad_norm": 0.28522961501456195, "learning_rate": 0.00019138583260454962, "loss": 1.1261, "step": 3688 }, { "epoch": 0.35, "grad_norm": 0.24515845894099997, "learning_rate": 0.00019137940822265234, "loss": 1.0393, "step": 3689 }, { "epoch": 0.35, "grad_norm": 0.30181790008831666, "learning_rate": 0.00019137298155392794, "loss": 1.1297, "step": 3690 }, { "epoch": 0.35, "grad_norm": 0.23025123557623256, "learning_rate": 0.0001913665525985372, "loss": 0.9475, "step": 3691 }, { "epoch": 0.35, "grad_norm": 0.2916644640665306, "learning_rate": 0.0001913601213566411, "loss": 1.0052, "step": 3692 }, { "epoch": 0.35, "grad_norm": 0.2914800928299035, "learning_rate": 0.00019135368782840058, "loss": 1.0656, "step": 3693 }, { "epoch": 0.35, "grad_norm": 0.2638508110242151, "learning_rate": 0.00019134725201397655, "loss": 1.1046, "step": 3694 }, { "epoch": 0.35, "grad_norm": 0.2862216325349102, "learning_rate": 0.00019134081391353018, "loss": 1.1441, "step": 3695 }, { "epoch": 0.35, "grad_norm": 0.31225707110177847, "learning_rate": 0.00019133437352722253, "loss": 1.143, "step": 3696 }, { "epoch": 0.35, "grad_norm": 0.30593521073577173, "learning_rate": 0.00019132793085521477, "loss": 1.1625, "step": 3697 }, { "epoch": 0.35, "grad_norm": 0.29258514429430604, "learning_rate": 0.00019132148589766812, "loss": 1.1549, "step": 3698 }, { "epoch": 0.35, "grad_norm": 0.25691353350560925, "learning_rate": 0.00019131503865474388, "loss": 1.086, "step": 3699 }, { "epoch": 0.35, "grad_norm": 0.28413967582202954, "learning_rate": 0.00019130858912660346, "loss": 1.0516, "step": 3700 }, { "epoch": 0.35, "grad_norm": 0.2486682279993249, "learning_rate": 0.0001913021373134082, "loss": 1.144, "step": 3701 }, { "epoch": 0.35, "grad_norm": 0.2799664249634028, "learning_rate": 0.00019129568321531957, "loss": 1.203, "step": 3702 }, { "epoch": 0.35, "grad_norm": 0.2360258518505738, "learning_rate": 0.00019128922683249905, "loss": 1.0592, "step": 3703 }, { "epoch": 0.35, "grad_norm": 0.28969445762878065, "learning_rate": 0.0001912827681651083, "loss": 1.2039, "step": 3704 }, { "epoch": 0.35, "grad_norm": 0.2925284783203515, "learning_rate": 0.0001912763072133089, "loss": 1.0849, "step": 3705 }, { "epoch": 0.35, "grad_norm": 0.33511999769210443, "learning_rate": 0.00019126984397726252, "loss": 1.0263, "step": 3706 }, { "epoch": 0.35, "grad_norm": 0.2898231101162137, "learning_rate": 0.00019126337845713098, "loss": 1.042, "step": 3707 }, { "epoch": 0.35, "grad_norm": 0.2787807763701908, "learning_rate": 0.000191256910653076, "loss": 0.9801, "step": 3708 }, { "epoch": 0.35, "grad_norm": 0.2655169550180212, "learning_rate": 0.0001912504405652595, "loss": 1.024, "step": 3709 }, { "epoch": 0.35, "grad_norm": 0.23484914814404617, "learning_rate": 0.00019124396819384336, "loss": 1.083, "step": 3710 }, { "epoch": 0.36, "grad_norm": 0.2399809754737731, "learning_rate": 0.00019123749353898957, "loss": 0.8806, "step": 3711 }, { "epoch": 0.36, "grad_norm": 0.25071481306083293, "learning_rate": 0.00019123101660086018, "loss": 1.0832, "step": 3712 }, { "epoch": 0.36, "grad_norm": 0.26019964031949383, "learning_rate": 0.00019122453737961724, "loss": 1.105, "step": 3713 }, { "epoch": 0.36, "grad_norm": 0.27184915954233857, "learning_rate": 0.00019121805587542294, "loss": 1.1295, "step": 3714 }, { "epoch": 0.36, "grad_norm": 0.3232217248521111, "learning_rate": 0.00019121157208843947, "loss": 1.0362, "step": 3715 }, { "epoch": 0.36, "grad_norm": 0.2484485517327626, "learning_rate": 0.00019120508601882906, "loss": 1.0417, "step": 3716 }, { "epoch": 0.36, "grad_norm": 0.28882594941774437, "learning_rate": 0.00019119859766675407, "loss": 1.0759, "step": 3717 }, { "epoch": 0.36, "grad_norm": 0.26746773610204805, "learning_rate": 0.00019119210703237685, "loss": 0.9896, "step": 3718 }, { "epoch": 0.36, "grad_norm": 0.3030903175749709, "learning_rate": 0.00019118561411585986, "loss": 1.0477, "step": 3719 }, { "epoch": 0.36, "grad_norm": 0.2647502528125401, "learning_rate": 0.00019117911891736552, "loss": 1.0581, "step": 3720 }, { "epoch": 0.36, "grad_norm": 0.31861248281328974, "learning_rate": 0.00019117262143705647, "loss": 1.144, "step": 3721 }, { "epoch": 0.36, "grad_norm": 0.25783949986971005, "learning_rate": 0.00019116612167509526, "loss": 1.2301, "step": 3722 }, { "epoch": 0.36, "grad_norm": 0.24441169216486786, "learning_rate": 0.00019115961963164454, "loss": 1.118, "step": 3723 }, { "epoch": 0.36, "grad_norm": 0.2659364840495592, "learning_rate": 0.00019115311530686706, "loss": 1.0523, "step": 3724 }, { "epoch": 0.36, "grad_norm": 0.2522530337190297, "learning_rate": 0.00019114660870092558, "loss": 0.9005, "step": 3725 }, { "epoch": 0.36, "grad_norm": 0.2647152274884048, "learning_rate": 0.00019114009981398294, "loss": 1.0931, "step": 3726 }, { "epoch": 0.36, "grad_norm": 0.2702016934536367, "learning_rate": 0.000191133588646202, "loss": 1.0832, "step": 3727 }, { "epoch": 0.36, "grad_norm": 0.24668859082298314, "learning_rate": 0.00019112707519774576, "loss": 0.9758, "step": 3728 }, { "epoch": 0.36, "grad_norm": 0.2672446623933551, "learning_rate": 0.0001911205594687772, "loss": 1.149, "step": 3729 }, { "epoch": 0.36, "grad_norm": 0.2600484573038422, "learning_rate": 0.00019111404145945933, "loss": 1.057, "step": 3730 }, { "epoch": 0.36, "grad_norm": 0.31076021763085576, "learning_rate": 0.00019110752116995535, "loss": 1.1161, "step": 3731 }, { "epoch": 0.36, "grad_norm": 0.2626503992229002, "learning_rate": 0.00019110099860042835, "loss": 1.1429, "step": 3732 }, { "epoch": 0.36, "grad_norm": 0.2550015497297485, "learning_rate": 0.00019109447375104165, "loss": 1.207, "step": 3733 }, { "epoch": 0.36, "grad_norm": 0.29381829699592593, "learning_rate": 0.00019108794662195847, "loss": 1.071, "step": 3734 }, { "epoch": 0.36, "grad_norm": 0.2582343525213059, "learning_rate": 0.00019108141721334217, "loss": 1.0302, "step": 3735 }, { "epoch": 0.36, "grad_norm": 0.3033664358520454, "learning_rate": 0.00019107488552535617, "loss": 1.0559, "step": 3736 }, { "epoch": 0.36, "grad_norm": 0.2804498013242838, "learning_rate": 0.00019106835155816395, "loss": 1.1039, "step": 3737 }, { "epoch": 0.36, "grad_norm": 0.2804541814983532, "learning_rate": 0.00019106181531192894, "loss": 1.1296, "step": 3738 }, { "epoch": 0.36, "grad_norm": 0.30659613447065276, "learning_rate": 0.0001910552767868148, "loss": 1.1612, "step": 3739 }, { "epoch": 0.36, "grad_norm": 0.2891921914653932, "learning_rate": 0.00019104873598298517, "loss": 1.0669, "step": 3740 }, { "epoch": 0.36, "grad_norm": 0.28558383598746206, "learning_rate": 0.00019104219290060366, "loss": 1.2211, "step": 3741 }, { "epoch": 0.36, "grad_norm": 0.29494252698624446, "learning_rate": 0.00019103564753983405, "loss": 0.9863, "step": 3742 }, { "epoch": 0.36, "grad_norm": 0.2556255730742099, "learning_rate": 0.00019102909990084018, "loss": 1.0618, "step": 3743 }, { "epoch": 0.36, "grad_norm": 0.2506965201053014, "learning_rate": 0.00019102254998378584, "loss": 0.9738, "step": 3744 }, { "epoch": 0.36, "grad_norm": 0.29913681320353497, "learning_rate": 0.00019101599778883498, "loss": 1.0876, "step": 3745 }, { "epoch": 0.36, "grad_norm": 0.2773312706953958, "learning_rate": 0.0001910094433161516, "loss": 1.0741, "step": 3746 }, { "epoch": 0.36, "grad_norm": 0.2917033659607459, "learning_rate": 0.0001910028865658997, "loss": 1.1748, "step": 3747 }, { "epoch": 0.36, "grad_norm": 0.2296887419120594, "learning_rate": 0.00019099632753824335, "loss": 0.9938, "step": 3748 }, { "epoch": 0.36, "grad_norm": 0.2704584908164929, "learning_rate": 0.00019098976623334673, "loss": 1.0166, "step": 3749 }, { "epoch": 0.36, "grad_norm": 0.21917669304416104, "learning_rate": 0.00019098320265137402, "loss": 1.0756, "step": 3750 }, { "epoch": 0.36, "grad_norm": 0.26280707594546593, "learning_rate": 0.0001909766367924895, "loss": 1.2615, "step": 3751 }, { "epoch": 0.36, "grad_norm": 0.29377384953319624, "learning_rate": 0.00019097006865685743, "loss": 1.0886, "step": 3752 }, { "epoch": 0.36, "grad_norm": 0.23301855169248875, "learning_rate": 0.00019096349824464226, "loss": 0.9808, "step": 3753 }, { "epoch": 0.36, "grad_norm": 0.253520755622324, "learning_rate": 0.00019095692555600832, "loss": 1.1587, "step": 3754 }, { "epoch": 0.36, "grad_norm": 0.2754796183392769, "learning_rate": 0.00019095035059112024, "loss": 1.1064, "step": 3755 }, { "epoch": 0.36, "grad_norm": 0.2747255493731151, "learning_rate": 0.00019094377335014242, "loss": 1.055, "step": 3756 }, { "epoch": 0.36, "grad_norm": 0.28773064135087933, "learning_rate": 0.00019093719383323952, "loss": 1.1881, "step": 3757 }, { "epoch": 0.36, "grad_norm": 0.2806691047427897, "learning_rate": 0.0001909306120405762, "loss": 1.1801, "step": 3758 }, { "epoch": 0.36, "grad_norm": 0.3132693357401042, "learning_rate": 0.00019092402797231715, "loss": 1.0493, "step": 3759 }, { "epoch": 0.36, "grad_norm": 0.25209467298228705, "learning_rate": 0.00019091744162862717, "loss": 1.03, "step": 3760 }, { "epoch": 0.36, "grad_norm": 0.31060570827727507, "learning_rate": 0.0001909108530096711, "loss": 1.21, "step": 3761 }, { "epoch": 0.36, "grad_norm": 0.25830756717657666, "learning_rate": 0.00019090426211561376, "loss": 1.0167, "step": 3762 }, { "epoch": 0.36, "grad_norm": 0.2535702002191736, "learning_rate": 0.00019089766894662014, "loss": 1.0515, "step": 3763 }, { "epoch": 0.36, "grad_norm": 0.278969068298096, "learning_rate": 0.00019089107350285522, "loss": 1.1331, "step": 3764 }, { "epoch": 0.36, "grad_norm": 0.2807324463593699, "learning_rate": 0.00019088447578448407, "loss": 1.0715, "step": 3765 }, { "epoch": 0.36, "grad_norm": 0.28955522038757464, "learning_rate": 0.0001908778757916718, "loss": 1.0668, "step": 3766 }, { "epoch": 0.36, "grad_norm": 0.2595700503045936, "learning_rate": 0.00019087127352458358, "loss": 1.1446, "step": 3767 }, { "epoch": 0.36, "grad_norm": 0.28444433757950754, "learning_rate": 0.00019086466898338462, "loss": 1.0789, "step": 3768 }, { "epoch": 0.36, "grad_norm": 0.2787597254903445, "learning_rate": 0.00019085806216824017, "loss": 0.9789, "step": 3769 }, { "epoch": 0.36, "grad_norm": 0.2941116656712117, "learning_rate": 0.0001908514530793157, "loss": 1.0751, "step": 3770 }, { "epoch": 0.36, "grad_norm": 0.22941662757128714, "learning_rate": 0.00019084484171677646, "loss": 1.1191, "step": 3771 }, { "epoch": 0.36, "grad_norm": 0.24999713030985823, "learning_rate": 0.00019083822808078798, "loss": 1.1661, "step": 3772 }, { "epoch": 0.36, "grad_norm": 0.28806317138126275, "learning_rate": 0.00019083161217151574, "loss": 1.138, "step": 3773 }, { "epoch": 0.36, "grad_norm": 0.26401411481347825, "learning_rate": 0.00019082499398912533, "loss": 1.0933, "step": 3774 }, { "epoch": 0.36, "grad_norm": 0.27780628237715865, "learning_rate": 0.00019081837353378237, "loss": 1.1479, "step": 3775 }, { "epoch": 0.36, "grad_norm": 0.2567041443182185, "learning_rate": 0.00019081175080565253, "loss": 0.9848, "step": 3776 }, { "epoch": 0.36, "grad_norm": 0.33134929043752975, "learning_rate": 0.00019080512580490154, "loss": 1.005, "step": 3777 }, { "epoch": 0.36, "grad_norm": 0.25790224327167643, "learning_rate": 0.0001907984985316952, "loss": 1.063, "step": 3778 }, { "epoch": 0.36, "grad_norm": 0.2542673197030738, "learning_rate": 0.0001907918689861994, "loss": 1.1054, "step": 3779 }, { "epoch": 0.36, "grad_norm": 0.29841436435986246, "learning_rate": 0.00019078523716858, "loss": 1.0715, "step": 3780 }, { "epoch": 0.36, "grad_norm": 0.2607650355473665, "learning_rate": 0.000190778603079003, "loss": 1.0562, "step": 3781 }, { "epoch": 0.36, "grad_norm": 0.27320429258836315, "learning_rate": 0.00019077196671763436, "loss": 1.0277, "step": 3782 }, { "epoch": 0.36, "grad_norm": 0.2548164654391468, "learning_rate": 0.00019076532808464026, "loss": 1.0822, "step": 3783 }, { "epoch": 0.36, "grad_norm": 0.3085428982977869, "learning_rate": 0.00019075868718018677, "loss": 1.1456, "step": 3784 }, { "epoch": 0.36, "grad_norm": 0.31897081017855045, "learning_rate": 0.0001907520440044401, "loss": 1.0907, "step": 3785 }, { "epoch": 0.36, "grad_norm": 0.2491436372725657, "learning_rate": 0.00019074539855756646, "loss": 1.0872, "step": 3786 }, { "epoch": 0.36, "grad_norm": 0.2844533490165419, "learning_rate": 0.00019073875083973222, "loss": 1.0052, "step": 3787 }, { "epoch": 0.36, "grad_norm": 0.3018741634697304, "learning_rate": 0.0001907321008511037, "loss": 0.9965, "step": 3788 }, { "epoch": 0.36, "grad_norm": 0.2423698565672897, "learning_rate": 0.0001907254485918473, "loss": 1.0812, "step": 3789 }, { "epoch": 0.36, "grad_norm": 0.2975822751648389, "learning_rate": 0.0001907187940621296, "loss": 0.9894, "step": 3790 }, { "epoch": 0.36, "grad_norm": 0.29200730112195994, "learning_rate": 0.000190712137262117, "loss": 1.0714, "step": 3791 }, { "epoch": 0.36, "grad_norm": 0.28370419210343656, "learning_rate": 0.0001907054781919762, "loss": 1.077, "step": 3792 }, { "epoch": 0.36, "grad_norm": 0.2722830323949, "learning_rate": 0.0001906988168518738, "loss": 1.0713, "step": 3793 }, { "epoch": 0.36, "grad_norm": 0.24874541049005577, "learning_rate": 0.00019069215324197646, "loss": 1.0738, "step": 3794 }, { "epoch": 0.36, "grad_norm": 0.28546406332135904, "learning_rate": 0.00019068548736245102, "loss": 1.1608, "step": 3795 }, { "epoch": 0.36, "grad_norm": 0.29760842933931714, "learning_rate": 0.00019067881921346427, "loss": 1.0152, "step": 3796 }, { "epoch": 0.36, "grad_norm": 0.2656364460449629, "learning_rate": 0.00019067214879518306, "loss": 1.1331, "step": 3797 }, { "epoch": 0.36, "grad_norm": 0.2807658825912433, "learning_rate": 0.00019066547610777437, "loss": 1.0905, "step": 3798 }, { "epoch": 0.36, "grad_norm": 0.2821813716967829, "learning_rate": 0.00019065880115140513, "loss": 1.0497, "step": 3799 }, { "epoch": 0.36, "grad_norm": 0.26730901537371593, "learning_rate": 0.00019065212392624243, "loss": 1.0252, "step": 3800 }, { "epoch": 0.36, "grad_norm": 0.26067574808177124, "learning_rate": 0.00019064544443245335, "loss": 0.9627, "step": 3801 }, { "epoch": 0.36, "grad_norm": 0.26226327026433305, "learning_rate": 0.00019063876267020507, "loss": 1.0757, "step": 3802 }, { "epoch": 0.36, "grad_norm": 0.2804595303992103, "learning_rate": 0.00019063207863966478, "loss": 1.1859, "step": 3803 }, { "epoch": 0.36, "grad_norm": 0.265318805788959, "learning_rate": 0.00019062539234099973, "loss": 1.0262, "step": 3804 }, { "epoch": 0.36, "grad_norm": 0.2530307087999173, "learning_rate": 0.00019061870377437733, "loss": 1.0954, "step": 3805 }, { "epoch": 0.36, "grad_norm": 0.2734697145712888, "learning_rate": 0.00019061201293996488, "loss": 1.0704, "step": 3806 }, { "epoch": 0.36, "grad_norm": 0.24462035828995993, "learning_rate": 0.00019060531983792987, "loss": 1.1139, "step": 3807 }, { "epoch": 0.36, "grad_norm": 0.2894517260409347, "learning_rate": 0.00019059862446843982, "loss": 1.0643, "step": 3808 }, { "epoch": 0.36, "grad_norm": 0.28221499051147075, "learning_rate": 0.00019059192683166222, "loss": 1.0379, "step": 3809 }, { "epoch": 0.36, "grad_norm": 0.2596597115877193, "learning_rate": 0.00019058522692776473, "loss": 1.1256, "step": 3810 }, { "epoch": 0.36, "grad_norm": 0.26056578947550624, "learning_rate": 0.00019057852475691498, "loss": 1.002, "step": 3811 }, { "epoch": 0.36, "grad_norm": 0.27913230085219015, "learning_rate": 0.00019057182031928074, "loss": 1.1348, "step": 3812 }, { "epoch": 0.36, "grad_norm": 0.27929429057142885, "learning_rate": 0.00019056511361502975, "loss": 1.1987, "step": 3813 }, { "epoch": 0.36, "grad_norm": 0.235530597825584, "learning_rate": 0.0001905584046443299, "loss": 1.1361, "step": 3814 }, { "epoch": 0.36, "grad_norm": 0.2522023187837088, "learning_rate": 0.00019055169340734908, "loss": 1.0713, "step": 3815 }, { "epoch": 0.37, "grad_norm": 0.280427344983831, "learning_rate": 0.0001905449799042552, "loss": 1.0557, "step": 3816 }, { "epoch": 0.37, "grad_norm": 0.2969054368145568, "learning_rate": 0.0001905382641352163, "loss": 1.1357, "step": 3817 }, { "epoch": 0.37, "grad_norm": 0.2440950945037354, "learning_rate": 0.00019053154610040044, "loss": 0.9862, "step": 3818 }, { "epoch": 0.37, "grad_norm": 0.26826804773960794, "learning_rate": 0.0001905248257999757, "loss": 1.1239, "step": 3819 }, { "epoch": 0.37, "grad_norm": 0.2505950276337422, "learning_rate": 0.00019051810323411034, "loss": 1.0886, "step": 3820 }, { "epoch": 0.37, "grad_norm": 0.27676795581151226, "learning_rate": 0.00019051137840297256, "loss": 1.1414, "step": 3821 }, { "epoch": 0.37, "grad_norm": 0.2571438975717034, "learning_rate": 0.00019050465130673067, "loss": 1.0289, "step": 3822 }, { "epoch": 0.37, "grad_norm": 0.25111218618944997, "learning_rate": 0.00019049792194555294, "loss": 0.9651, "step": 3823 }, { "epoch": 0.37, "grad_norm": 0.2610987856579739, "learning_rate": 0.00019049119031960788, "loss": 1.1635, "step": 3824 }, { "epoch": 0.37, "grad_norm": 0.2432839279077218, "learning_rate": 0.00019048445642906388, "loss": 1.1106, "step": 3825 }, { "epoch": 0.37, "grad_norm": 0.25411198588402173, "learning_rate": 0.00019047772027408954, "loss": 1.0766, "step": 3826 }, { "epoch": 0.37, "grad_norm": 0.2611685254036077, "learning_rate": 0.00019047098185485335, "loss": 1.0616, "step": 3827 }, { "epoch": 0.37, "grad_norm": 0.2486133581840728, "learning_rate": 0.00019046424117152402, "loss": 0.9964, "step": 3828 }, { "epoch": 0.37, "grad_norm": 0.3298032342837613, "learning_rate": 0.00019045749822427016, "loss": 0.9625, "step": 3829 }, { "epoch": 0.37, "grad_norm": 0.29292144210717325, "learning_rate": 0.00019045075301326057, "loss": 1.0938, "step": 3830 }, { "epoch": 0.37, "grad_norm": 0.2906727228238908, "learning_rate": 0.00019044400553866405, "loss": 1.0653, "step": 3831 }, { "epoch": 0.37, "grad_norm": 0.25691594404144225, "learning_rate": 0.00019043725580064939, "loss": 1.0817, "step": 3832 }, { "epoch": 0.37, "grad_norm": 0.2787203095759583, "learning_rate": 0.00019043050379938565, "loss": 1.0643, "step": 3833 }, { "epoch": 0.37, "grad_norm": 0.26242429857133936, "learning_rate": 0.00019042374953504165, "loss": 1.0383, "step": 3834 }, { "epoch": 0.37, "grad_norm": 0.2750170989226645, "learning_rate": 0.00019041699300778654, "loss": 1.0113, "step": 3835 }, { "epoch": 0.37, "grad_norm": 0.24689367063544376, "learning_rate": 0.00019041023421778933, "loss": 1.0589, "step": 3836 }, { "epoch": 0.37, "grad_norm": 0.3101949979537771, "learning_rate": 0.0001904034731652192, "loss": 1.0844, "step": 3837 }, { "epoch": 0.37, "grad_norm": 0.3026108279779279, "learning_rate": 0.00019039670985024533, "loss": 0.9748, "step": 3838 }, { "epoch": 0.37, "grad_norm": 0.28436273819827806, "learning_rate": 0.00019038994427303697, "loss": 1.13, "step": 3839 }, { "epoch": 0.37, "grad_norm": 0.26672874828160903, "learning_rate": 0.00019038317643376346, "loss": 0.9425, "step": 3840 }, { "epoch": 0.37, "grad_norm": 0.2551937537263912, "learning_rate": 0.00019037640633259417, "loss": 1.1193, "step": 3841 }, { "epoch": 0.37, "grad_norm": 0.2421616493910976, "learning_rate": 0.00019036963396969848, "loss": 1.1925, "step": 3842 }, { "epoch": 0.37, "grad_norm": 0.27403481599431856, "learning_rate": 0.00019036285934524594, "loss": 0.9328, "step": 3843 }, { "epoch": 0.37, "grad_norm": 0.2736343566787731, "learning_rate": 0.00019035608245940603, "loss": 1.1229, "step": 3844 }, { "epoch": 0.37, "grad_norm": 0.2984644122142435, "learning_rate": 0.00019034930331234836, "loss": 1.0706, "step": 3845 }, { "epoch": 0.37, "grad_norm": 0.25600173944542287, "learning_rate": 0.00019034252190424264, "loss": 1.1189, "step": 3846 }, { "epoch": 0.37, "grad_norm": 0.27750211746463055, "learning_rate": 0.0001903357382352585, "loss": 1.2051, "step": 3847 }, { "epoch": 0.37, "grad_norm": 0.308006852697485, "learning_rate": 0.00019032895230556573, "loss": 1.1935, "step": 3848 }, { "epoch": 0.37, "grad_norm": 0.2606236441816338, "learning_rate": 0.00019032216411533415, "loss": 1.1713, "step": 3849 }, { "epoch": 0.37, "grad_norm": 0.28436108514736663, "learning_rate": 0.00019031537366473369, "loss": 1.0714, "step": 3850 }, { "epoch": 0.37, "grad_norm": 0.3140913304294229, "learning_rate": 0.00019030858095393422, "loss": 1.109, "step": 3851 }, { "epoch": 0.37, "grad_norm": 0.27820186741488906, "learning_rate": 0.00019030178598310573, "loss": 1.1456, "step": 3852 }, { "epoch": 0.37, "grad_norm": 0.25313171010488306, "learning_rate": 0.00019029498875241832, "loss": 1.0996, "step": 3853 }, { "epoch": 0.37, "grad_norm": 0.26632647868200215, "learning_rate": 0.00019028818926204207, "loss": 1.0288, "step": 3854 }, { "epoch": 0.37, "grad_norm": 0.2802151300023979, "learning_rate": 0.00019028138751214714, "loss": 1.1178, "step": 3855 }, { "epoch": 0.37, "grad_norm": 0.28551319793457147, "learning_rate": 0.00019027458350290375, "loss": 1.1742, "step": 3856 }, { "epoch": 0.37, "grad_norm": 0.29354399099059836, "learning_rate": 0.00019026777723448214, "loss": 1.1434, "step": 3857 }, { "epoch": 0.37, "grad_norm": 0.28766227685882445, "learning_rate": 0.00019026096870705274, "loss": 1.1227, "step": 3858 }, { "epoch": 0.37, "grad_norm": 0.25123810673486535, "learning_rate": 0.0001902541579207858, "loss": 1.0514, "step": 3859 }, { "epoch": 0.37, "grad_norm": 0.3035800237589507, "learning_rate": 0.00019024734487585186, "loss": 1.0958, "step": 3860 }, { "epoch": 0.37, "grad_norm": 0.24205745440797985, "learning_rate": 0.0001902405295724214, "loss": 1.1692, "step": 3861 }, { "epoch": 0.37, "grad_norm": 0.27865447448135033, "learning_rate": 0.00019023371201066497, "loss": 1.0144, "step": 3862 }, { "epoch": 0.37, "grad_norm": 0.2660377758295032, "learning_rate": 0.0001902268921907532, "loss": 0.9894, "step": 3863 }, { "epoch": 0.37, "grad_norm": 0.2726012616002911, "learning_rate": 0.00019022007011285674, "loss": 1.0454, "step": 3864 }, { "epoch": 0.37, "grad_norm": 0.2726846785840144, "learning_rate": 0.0001902132457771463, "loss": 1.0598, "step": 3865 }, { "epoch": 0.37, "grad_norm": 0.25701844846319727, "learning_rate": 0.0001902064191837927, "loss": 1.1679, "step": 3866 }, { "epoch": 0.37, "grad_norm": 0.2921405168078913, "learning_rate": 0.00019019959033296678, "loss": 1.0524, "step": 3867 }, { "epoch": 0.37, "grad_norm": 0.27129522822886254, "learning_rate": 0.00019019275922483943, "loss": 1.105, "step": 3868 }, { "epoch": 0.37, "grad_norm": 0.2718161044722209, "learning_rate": 0.0001901859258595816, "loss": 1.0613, "step": 3869 }, { "epoch": 0.37, "grad_norm": 0.2536896991586868, "learning_rate": 0.00019017909023736428, "loss": 0.9996, "step": 3870 }, { "epoch": 0.37, "grad_norm": 0.29175405932195664, "learning_rate": 0.00019017225235835853, "loss": 1.1386, "step": 3871 }, { "epoch": 0.37, "grad_norm": 0.30144877670952286, "learning_rate": 0.00019016541222273553, "loss": 1.1468, "step": 3872 }, { "epoch": 0.37, "grad_norm": 0.2931426576366692, "learning_rate": 0.00019015856983066644, "loss": 1.1721, "step": 3873 }, { "epoch": 0.37, "grad_norm": 0.2916770600223048, "learning_rate": 0.00019015172518232242, "loss": 1.0945, "step": 3874 }, { "epoch": 0.37, "grad_norm": 0.27166976489578, "learning_rate": 0.00019014487827787483, "loss": 1.0891, "step": 3875 }, { "epoch": 0.37, "grad_norm": 0.2839705782430795, "learning_rate": 0.00019013802911749505, "loss": 1.1197, "step": 3876 }, { "epoch": 0.37, "grad_norm": 0.298375581966575, "learning_rate": 0.0001901311777013544, "loss": 1.0689, "step": 3877 }, { "epoch": 0.37, "grad_norm": 0.3013484850412901, "learning_rate": 0.0001901243240296244, "loss": 1.0352, "step": 3878 }, { "epoch": 0.37, "grad_norm": 0.25624648167577585, "learning_rate": 0.00019011746810247658, "loss": 1.0749, "step": 3879 }, { "epoch": 0.37, "grad_norm": 0.25490116870005675, "learning_rate": 0.00019011060992008244, "loss": 1.1551, "step": 3880 }, { "epoch": 0.37, "grad_norm": 0.28069672688477176, "learning_rate": 0.00019010374948261367, "loss": 0.9975, "step": 3881 }, { "epoch": 0.37, "grad_norm": 0.2651997155968314, "learning_rate": 0.0001900968867902419, "loss": 1.1731, "step": 3882 }, { "epoch": 0.37, "grad_norm": 0.3115313345415178, "learning_rate": 0.00019009002184313897, "loss": 1.1077, "step": 3883 }, { "epoch": 0.37, "grad_norm": 0.2775305592186655, "learning_rate": 0.00019008315464147662, "loss": 1.1736, "step": 3884 }, { "epoch": 0.37, "grad_norm": 0.28786278339401167, "learning_rate": 0.0001900762851854267, "loss": 1.0521, "step": 3885 }, { "epoch": 0.37, "grad_norm": 0.23797474326903245, "learning_rate": 0.0001900694134751611, "loss": 1.1405, "step": 3886 }, { "epoch": 0.37, "grad_norm": 0.2605654562908968, "learning_rate": 0.00019006253951085186, "loss": 1.029, "step": 3887 }, { "epoch": 0.37, "grad_norm": 0.25694650291010046, "learning_rate": 0.00019005566329267096, "loss": 1.0665, "step": 3888 }, { "epoch": 0.37, "grad_norm": 0.26716631196903606, "learning_rate": 0.0001900487848207905, "loss": 1.1494, "step": 3889 }, { "epoch": 0.37, "grad_norm": 0.3116260242272765, "learning_rate": 0.00019004190409538255, "loss": 1.117, "step": 3890 }, { "epoch": 0.37, "grad_norm": 0.26774746733662874, "learning_rate": 0.00019003502111661943, "loss": 1.1987, "step": 3891 }, { "epoch": 0.37, "grad_norm": 0.25363750120087714, "learning_rate": 0.0001900281358846733, "loss": 1.0831, "step": 3892 }, { "epoch": 0.37, "grad_norm": 0.28339718118220925, "learning_rate": 0.00019002124839971647, "loss": 1.1161, "step": 3893 }, { "epoch": 0.37, "grad_norm": 0.2541180184606548, "learning_rate": 0.00019001435866192133, "loss": 1.037, "step": 3894 }, { "epoch": 0.37, "grad_norm": 0.2662101976131836, "learning_rate": 0.0001900074666714603, "loss": 1.0495, "step": 3895 }, { "epoch": 0.37, "grad_norm": 0.24244962084116864, "learning_rate": 0.00019000057242850584, "loss": 1.143, "step": 3896 }, { "epoch": 0.37, "grad_norm": 0.2815830104446293, "learning_rate": 0.00018999367593323048, "loss": 1.1087, "step": 3897 }, { "epoch": 0.37, "grad_norm": 0.2661433168734344, "learning_rate": 0.00018998677718580687, "loss": 1.0699, "step": 3898 }, { "epoch": 0.37, "grad_norm": 0.278452400141501, "learning_rate": 0.00018997987618640756, "loss": 0.9984, "step": 3899 }, { "epoch": 0.37, "grad_norm": 0.2950352883266951, "learning_rate": 0.00018997297293520533, "loss": 1.0725, "step": 3900 }, { "epoch": 0.37, "grad_norm": 0.24855309318205562, "learning_rate": 0.00018996606743237288, "loss": 1.1091, "step": 3901 }, { "epoch": 0.37, "grad_norm": 0.26157079774866104, "learning_rate": 0.00018995915967808305, "loss": 1.137, "step": 3902 }, { "epoch": 0.37, "grad_norm": 0.2817964268191354, "learning_rate": 0.00018995224967250873, "loss": 1.0887, "step": 3903 }, { "epoch": 0.37, "grad_norm": 0.303055127418326, "learning_rate": 0.00018994533741582283, "loss": 1.0509, "step": 3904 }, { "epoch": 0.37, "grad_norm": 0.29319713632683003, "learning_rate": 0.00018993842290819833, "loss": 1.1527, "step": 3905 }, { "epoch": 0.37, "grad_norm": 0.28460911802279176, "learning_rate": 0.00018993150614980824, "loss": 1.1663, "step": 3906 }, { "epoch": 0.37, "grad_norm": 0.24323458888587937, "learning_rate": 0.00018992458714082574, "loss": 1.107, "step": 3907 }, { "epoch": 0.37, "grad_norm": 0.2507473651775092, "learning_rate": 0.0001899176658814239, "loss": 1.1291, "step": 3908 }, { "epoch": 0.37, "grad_norm": 0.26851442265034603, "learning_rate": 0.00018991074237177595, "loss": 1.0296, "step": 3909 }, { "epoch": 0.37, "grad_norm": 0.27910401854051703, "learning_rate": 0.0001899038166120552, "loss": 1.0243, "step": 3910 }, { "epoch": 0.37, "grad_norm": 0.2643335688729103, "learning_rate": 0.0001898968886024349, "loss": 1.1176, "step": 3911 }, { "epoch": 0.37, "grad_norm": 0.2845122162938141, "learning_rate": 0.0001898899583430885, "loss": 1.1254, "step": 3912 }, { "epoch": 0.37, "grad_norm": 0.2631105012541486, "learning_rate": 0.00018988302583418937, "loss": 1.0436, "step": 3913 }, { "epoch": 0.37, "grad_norm": 0.26527130872751753, "learning_rate": 0.00018987609107591104, "loss": 1.0063, "step": 3914 }, { "epoch": 0.37, "grad_norm": 0.26612171965346043, "learning_rate": 0.00018986915406842708, "loss": 1.1018, "step": 3915 }, { "epoch": 0.37, "grad_norm": 0.29895893771415827, "learning_rate": 0.000189862214811911, "loss": 1.0477, "step": 3916 }, { "epoch": 0.37, "grad_norm": 0.298508391495162, "learning_rate": 0.00018985527330653653, "loss": 1.1202, "step": 3917 }, { "epoch": 0.37, "grad_norm": 0.2862203670774759, "learning_rate": 0.0001898483295524774, "loss": 1.1676, "step": 3918 }, { "epoch": 0.37, "grad_norm": 0.7409275636159137, "learning_rate": 0.00018984138354990736, "loss": 1.4645, "step": 3919 }, { "epoch": 0.38, "grad_norm": 0.26519753642569927, "learning_rate": 0.0001898344352990002, "loss": 1.0743, "step": 3920 }, { "epoch": 0.38, "grad_norm": 0.29786591251006633, "learning_rate": 0.00018982748479992988, "loss": 1.1278, "step": 3921 }, { "epoch": 0.38, "grad_norm": 0.278707089601725, "learning_rate": 0.00018982053205287024, "loss": 1.0982, "step": 3922 }, { "epoch": 0.38, "grad_norm": 0.233468164464521, "learning_rate": 0.00018981357705799538, "loss": 1.0338, "step": 3923 }, { "epoch": 0.38, "grad_norm": 0.32242913731155076, "learning_rate": 0.0001898066198154793, "loss": 1.1282, "step": 3924 }, { "epoch": 0.38, "grad_norm": 0.2712636448427821, "learning_rate": 0.00018979966032549612, "loss": 1.0868, "step": 3925 }, { "epoch": 0.38, "grad_norm": 0.27812674663111897, "learning_rate": 0.00018979269858822, "loss": 0.9507, "step": 3926 }, { "epoch": 0.38, "grad_norm": 0.31036023474014135, "learning_rate": 0.00018978573460382516, "loss": 1.066, "step": 3927 }, { "epoch": 0.38, "grad_norm": 0.2741036928468329, "learning_rate": 0.00018977876837248587, "loss": 1.114, "step": 3928 }, { "epoch": 0.38, "grad_norm": 0.32960721822124844, "learning_rate": 0.0001897717998943765, "loss": 0.9916, "step": 3929 }, { "epoch": 0.38, "grad_norm": 0.25938355281972847, "learning_rate": 0.0001897648291696714, "loss": 1.0566, "step": 3930 }, { "epoch": 0.38, "grad_norm": 0.27023081057739146, "learning_rate": 0.00018975785619854504, "loss": 1.1365, "step": 3931 }, { "epoch": 0.38, "grad_norm": 0.30667049605255553, "learning_rate": 0.00018975088098117194, "loss": 1.0995, "step": 3932 }, { "epoch": 0.38, "grad_norm": 0.24394315516483825, "learning_rate": 0.00018974390351772665, "loss": 1.0412, "step": 3933 }, { "epoch": 0.38, "grad_norm": 0.2801905314214369, "learning_rate": 0.00018973692380838371, "loss": 1.0848, "step": 3934 }, { "epoch": 0.38, "grad_norm": 0.2381280727680932, "learning_rate": 0.00018972994185331788, "loss": 1.0596, "step": 3935 }, { "epoch": 0.38, "grad_norm": 0.23812773003536764, "learning_rate": 0.00018972295765270388, "loss": 1.0666, "step": 3936 }, { "epoch": 0.38, "grad_norm": 0.32096602227564136, "learning_rate": 0.00018971597120671647, "loss": 1.0948, "step": 3937 }, { "epoch": 0.38, "grad_norm": 0.26277005671959386, "learning_rate": 0.0001897089825155305, "loss": 0.992, "step": 3938 }, { "epoch": 0.38, "grad_norm": 0.25895544694478345, "learning_rate": 0.00018970199157932084, "loss": 1.0121, "step": 3939 }, { "epoch": 0.38, "grad_norm": 0.33276374338059417, "learning_rate": 0.0001896949983982625, "loss": 1.1811, "step": 3940 }, { "epoch": 0.38, "grad_norm": 0.2899952697366727, "learning_rate": 0.00018968800297253043, "loss": 1.0459, "step": 3941 }, { "epoch": 0.38, "grad_norm": 0.28332378815166603, "learning_rate": 0.0001896810053022997, "loss": 1.1406, "step": 3942 }, { "epoch": 0.38, "grad_norm": 0.2563768370833164, "learning_rate": 0.00018967400538774548, "loss": 1.0918, "step": 3943 }, { "epoch": 0.38, "grad_norm": 0.2527720587986432, "learning_rate": 0.00018966700322904293, "loss": 1.0922, "step": 3944 }, { "epoch": 0.38, "grad_norm": 0.25592462221706, "learning_rate": 0.00018965999882636725, "loss": 1.0781, "step": 3945 }, { "epoch": 0.38, "grad_norm": 0.3089279882540109, "learning_rate": 0.00018965299217989375, "loss": 1.1559, "step": 3946 }, { "epoch": 0.38, "grad_norm": 0.25525835490586896, "learning_rate": 0.00018964598328979776, "loss": 1.1349, "step": 3947 }, { "epoch": 0.38, "grad_norm": 0.2841608784955134, "learning_rate": 0.00018963897215625472, "loss": 0.984, "step": 3948 }, { "epoch": 0.38, "grad_norm": 0.272016327051106, "learning_rate": 0.00018963195877944007, "loss": 1.058, "step": 3949 }, { "epoch": 0.38, "grad_norm": 0.3037390334949688, "learning_rate": 0.0001896249431595293, "loss": 1.0846, "step": 3950 }, { "epoch": 0.38, "grad_norm": 0.2497613489471199, "learning_rate": 0.000189617925296698, "loss": 1.0793, "step": 3951 }, { "epoch": 0.38, "grad_norm": 0.27852691608884916, "learning_rate": 0.00018961090519112182, "loss": 1.1262, "step": 3952 }, { "epoch": 0.38, "grad_norm": 0.2844330777819289, "learning_rate": 0.0001896038828429764, "loss": 1.0902, "step": 3953 }, { "epoch": 0.38, "grad_norm": 0.30372667850264373, "learning_rate": 0.0001895968582524375, "loss": 0.9642, "step": 3954 }, { "epoch": 0.38, "grad_norm": 0.24532996718177777, "learning_rate": 0.00018958983141968095, "loss": 0.9768, "step": 3955 }, { "epoch": 0.38, "grad_norm": 0.27188695012581215, "learning_rate": 0.0001895828023448825, "loss": 1.0809, "step": 3956 }, { "epoch": 0.38, "grad_norm": 0.28620857758847607, "learning_rate": 0.00018957577102821817, "loss": 1.1338, "step": 3957 }, { "epoch": 0.38, "grad_norm": 0.2967269096451259, "learning_rate": 0.00018956873746986386, "loss": 1.0498, "step": 3958 }, { "epoch": 0.38, "grad_norm": 0.28197346617437485, "learning_rate": 0.00018956170166999558, "loss": 1.1001, "step": 3959 }, { "epoch": 0.38, "grad_norm": 0.32447316500456774, "learning_rate": 0.00018955466362878943, "loss": 1.1346, "step": 3960 }, { "epoch": 0.38, "grad_norm": 0.29206790559618895, "learning_rate": 0.00018954762334642158, "loss": 1.1531, "step": 3961 }, { "epoch": 0.38, "grad_norm": 0.31059482758918283, "learning_rate": 0.00018954058082306817, "loss": 1.0824, "step": 3962 }, { "epoch": 0.38, "grad_norm": 0.2599356809697041, "learning_rate": 0.0001895335360589054, "loss": 1.0651, "step": 3963 }, { "epoch": 0.38, "grad_norm": 0.28568670159243, "learning_rate": 0.00018952648905410966, "loss": 1.111, "step": 3964 }, { "epoch": 0.38, "grad_norm": 0.2741580155840068, "learning_rate": 0.0001895194398088573, "loss": 1.0367, "step": 3965 }, { "epoch": 0.38, "grad_norm": 0.2460551214193779, "learning_rate": 0.00018951238832332464, "loss": 1.1306, "step": 3966 }, { "epoch": 0.38, "grad_norm": 0.28164939664440597, "learning_rate": 0.00018950533459768823, "loss": 1.0736, "step": 3967 }, { "epoch": 0.38, "grad_norm": 0.2935359891153058, "learning_rate": 0.00018949827863212456, "loss": 1.1287, "step": 3968 }, { "epoch": 0.38, "grad_norm": 0.27127546853360357, "learning_rate": 0.00018949122042681023, "loss": 0.9919, "step": 3969 }, { "epoch": 0.38, "grad_norm": 0.3153198801953517, "learning_rate": 0.00018948415998192182, "loss": 1.1561, "step": 3970 }, { "epoch": 0.38, "grad_norm": 0.2639733592331646, "learning_rate": 0.0001894770972976361, "loss": 1.0033, "step": 3971 }, { "epoch": 0.38, "grad_norm": 0.26313646288953435, "learning_rate": 0.0001894700323741298, "loss": 1.1798, "step": 3972 }, { "epoch": 0.38, "grad_norm": 0.2711515260554288, "learning_rate": 0.0001894629652115797, "loss": 1.1126, "step": 3973 }, { "epoch": 0.38, "grad_norm": 0.272492854127571, "learning_rate": 0.0001894558958101627, "loss": 0.9806, "step": 3974 }, { "epoch": 0.38, "grad_norm": 0.27502865859424286, "learning_rate": 0.00018944882417005565, "loss": 1.088, "step": 3975 }, { "epoch": 0.38, "grad_norm": 0.3343809998581711, "learning_rate": 0.00018944175029143558, "loss": 1.1771, "step": 3976 }, { "epoch": 0.38, "grad_norm": 0.2894367965617339, "learning_rate": 0.0001894346741744795, "loss": 1.1597, "step": 3977 }, { "epoch": 0.38, "grad_norm": 0.25286298932737533, "learning_rate": 0.00018942759581936446, "loss": 1.0403, "step": 3978 }, { "epoch": 0.38, "grad_norm": 0.27494211784286493, "learning_rate": 0.00018942051522626764, "loss": 1.1405, "step": 3979 }, { "epoch": 0.38, "grad_norm": 0.28818353355011783, "learning_rate": 0.00018941343239536624, "loss": 1.0295, "step": 3980 }, { "epoch": 0.38, "grad_norm": 0.25536310488024094, "learning_rate": 0.0001894063473268375, "loss": 1.0547, "step": 3981 }, { "epoch": 0.38, "grad_norm": 0.32569914313859244, "learning_rate": 0.00018939926002085872, "loss": 1.2414, "step": 3982 }, { "epoch": 0.38, "grad_norm": 0.2869364080234295, "learning_rate": 0.0001893921704776073, "loss": 0.9819, "step": 3983 }, { "epoch": 0.38, "grad_norm": 0.2657350930953723, "learning_rate": 0.0001893850786972606, "loss": 1.0408, "step": 3984 }, { "epoch": 0.38, "grad_norm": 0.29775662133242436, "learning_rate": 0.0001893779846799961, "loss": 1.1624, "step": 3985 }, { "epoch": 0.38, "grad_norm": 0.27355863349278053, "learning_rate": 0.00018937088842599142, "loss": 1.2827, "step": 3986 }, { "epoch": 0.38, "grad_norm": 0.2732303129004138, "learning_rate": 0.00018936378993542408, "loss": 1.0976, "step": 3987 }, { "epoch": 0.38, "grad_norm": 0.3021647282609973, "learning_rate": 0.00018935668920847171, "loss": 1.0696, "step": 3988 }, { "epoch": 0.38, "grad_norm": 0.272227355179039, "learning_rate": 0.00018934958624531207, "loss": 1.0169, "step": 3989 }, { "epoch": 0.38, "grad_norm": 0.2958029148879499, "learning_rate": 0.00018934248104612283, "loss": 1.1677, "step": 3990 }, { "epoch": 0.38, "grad_norm": 0.2961165738395803, "learning_rate": 0.00018933537361108188, "loss": 1.1413, "step": 3991 }, { "epoch": 0.38, "grad_norm": 0.3276060272777321, "learning_rate": 0.00018932826394036707, "loss": 1.1431, "step": 3992 }, { "epoch": 0.38, "grad_norm": 0.24414858635640996, "learning_rate": 0.00018932115203415631, "loss": 0.9642, "step": 3993 }, { "epoch": 0.38, "grad_norm": 0.24286855215872527, "learning_rate": 0.0001893140378926276, "loss": 1.0458, "step": 3994 }, { "epoch": 0.38, "grad_norm": 0.2846207625112956, "learning_rate": 0.0001893069215159589, "loss": 1.1316, "step": 3995 }, { "epoch": 0.38, "grad_norm": 0.28248248879909615, "learning_rate": 0.00018929980290432842, "loss": 1.1581, "step": 3996 }, { "epoch": 0.38, "grad_norm": 0.27218202374232026, "learning_rate": 0.00018929268205791422, "loss": 1.2227, "step": 3997 }, { "epoch": 0.38, "grad_norm": 0.26852560065398273, "learning_rate": 0.00018928555897689456, "loss": 0.9302, "step": 3998 }, { "epoch": 0.38, "grad_norm": 0.2776665044902241, "learning_rate": 0.00018927843366144765, "loss": 1.0248, "step": 3999 }, { "epoch": 0.38, "grad_norm": 0.27797522607330477, "learning_rate": 0.00018927130611175183, "loss": 1.166, "step": 4000 }, { "epoch": 0.38, "grad_norm": 0.23621425637102375, "learning_rate": 0.00018926417632798547, "loss": 1.1106, "step": 4001 }, { "epoch": 0.38, "grad_norm": 0.2926545007076596, "learning_rate": 0.000189257044310327, "loss": 1.1281, "step": 4002 }, { "epoch": 0.38, "grad_norm": 0.27712375321038807, "learning_rate": 0.00018924991005895493, "loss": 1.0626, "step": 4003 }, { "epoch": 0.38, "grad_norm": 0.231003959100629, "learning_rate": 0.0001892427735740477, "loss": 0.9582, "step": 4004 }, { "epoch": 0.38, "grad_norm": 0.29692646912091747, "learning_rate": 0.00018923563485578405, "loss": 1.0728, "step": 4005 }, { "epoch": 0.38, "grad_norm": 0.26325491425794023, "learning_rate": 0.0001892284939043425, "loss": 1.1163, "step": 4006 }, { "epoch": 0.38, "grad_norm": 0.27243932891523137, "learning_rate": 0.00018922135071990185, "loss": 1.0374, "step": 4007 }, { "epoch": 0.38, "grad_norm": 0.28198690740674986, "learning_rate": 0.0001892142053026408, "loss": 1.1425, "step": 4008 }, { "epoch": 0.38, "grad_norm": 0.28509269273338117, "learning_rate": 0.00018920705765273818, "loss": 1.3779, "step": 4009 }, { "epoch": 0.38, "grad_norm": 0.28126037107058494, "learning_rate": 0.0001891999077703729, "loss": 1.1616, "step": 4010 }, { "epoch": 0.38, "grad_norm": 0.3282196814898794, "learning_rate": 0.00018919275565572387, "loss": 1.1184, "step": 4011 }, { "epoch": 0.38, "grad_norm": 0.2796592489259632, "learning_rate": 0.00018918560130897006, "loss": 1.1493, "step": 4012 }, { "epoch": 0.38, "grad_norm": 0.35705040515515696, "learning_rate": 0.00018917844473029054, "loss": 1.035, "step": 4013 }, { "epoch": 0.38, "grad_norm": 0.26954678750551575, "learning_rate": 0.00018917128591986439, "loss": 1.1217, "step": 4014 }, { "epoch": 0.38, "grad_norm": 0.268134645202258, "learning_rate": 0.00018916412487787076, "loss": 1.1415, "step": 4015 }, { "epoch": 0.38, "grad_norm": 0.2869059780412037, "learning_rate": 0.0001891569616044889, "loss": 1.1142, "step": 4016 }, { "epoch": 0.38, "grad_norm": 0.2866248425276922, "learning_rate": 0.000189149796099898, "loss": 0.968, "step": 4017 }, { "epoch": 0.38, "grad_norm": 0.2854847382998254, "learning_rate": 0.00018914262836427744, "loss": 1.1804, "step": 4018 }, { "epoch": 0.38, "grad_norm": 0.28804702455957, "learning_rate": 0.00018913545839780658, "loss": 0.9394, "step": 4019 }, { "epoch": 0.38, "grad_norm": 0.26449776920625534, "learning_rate": 0.00018912828620066486, "loss": 1.1066, "step": 4020 }, { "epoch": 0.38, "grad_norm": 0.2946737668152781, "learning_rate": 0.00018912111177303177, "loss": 1.2112, "step": 4021 }, { "epoch": 0.38, "grad_norm": 0.2901645389196644, "learning_rate": 0.00018911393511508685, "loss": 1.039, "step": 4022 }, { "epoch": 0.38, "grad_norm": 0.27790893974884995, "learning_rate": 0.00018910675622700967, "loss": 1.058, "step": 4023 }, { "epoch": 0.38, "grad_norm": 0.27176601010684964, "learning_rate": 0.00018909957510897992, "loss": 1.1212, "step": 4024 }, { "epoch": 0.39, "grad_norm": 0.27542708140447214, "learning_rate": 0.00018909239176117732, "loss": 1.0686, "step": 4025 }, { "epoch": 0.39, "grad_norm": 0.27230675704373336, "learning_rate": 0.0001890852061837816, "loss": 1.1652, "step": 4026 }, { "epoch": 0.39, "grad_norm": 0.2557788458829725, "learning_rate": 0.00018907801837697265, "loss": 0.955, "step": 4027 }, { "epoch": 0.39, "grad_norm": 0.2601254941936526, "learning_rate": 0.00018907082834093028, "loss": 1.0526, "step": 4028 }, { "epoch": 0.39, "grad_norm": 0.266059316761016, "learning_rate": 0.00018906363607583445, "loss": 1.0664, "step": 4029 }, { "epoch": 0.39, "grad_norm": 0.26037123171682397, "learning_rate": 0.00018905644158186515, "loss": 1.027, "step": 4030 }, { "epoch": 0.39, "grad_norm": 0.2563328237402112, "learning_rate": 0.00018904924485920247, "loss": 1.1243, "step": 4031 }, { "epoch": 0.39, "grad_norm": 0.28925250146928455, "learning_rate": 0.0001890420459080264, "loss": 1.1726, "step": 4032 }, { "epoch": 0.39, "grad_norm": 0.28975657876816185, "learning_rate": 0.0001890348447285172, "loss": 1.0362, "step": 4033 }, { "epoch": 0.39, "grad_norm": 0.266527827382827, "learning_rate": 0.00018902764132085507, "loss": 1.1282, "step": 4034 }, { "epoch": 0.39, "grad_norm": 0.2819868839387067, "learning_rate": 0.00018902043568522027, "loss": 1.0481, "step": 4035 }, { "epoch": 0.39, "grad_norm": 0.26260787323629486, "learning_rate": 0.0001890132278217931, "loss": 1.0494, "step": 4036 }, { "epoch": 0.39, "grad_norm": 0.29231684987402756, "learning_rate": 0.00018900601773075396, "loss": 1.0668, "step": 4037 }, { "epoch": 0.39, "grad_norm": 0.2689538216980018, "learning_rate": 0.00018899880541228332, "loss": 1.0549, "step": 4038 }, { "epoch": 0.39, "grad_norm": 0.2748948697066311, "learning_rate": 0.0001889915908665616, "loss": 1.0691, "step": 4039 }, { "epoch": 0.39, "grad_norm": 0.26070699821281274, "learning_rate": 0.00018898437409376942, "loss": 1.0906, "step": 4040 }, { "epoch": 0.39, "grad_norm": 0.29273143034067534, "learning_rate": 0.00018897715509408734, "loss": 1.0254, "step": 4041 }, { "epoch": 0.39, "grad_norm": 0.3032571667610144, "learning_rate": 0.00018896993386769602, "loss": 0.9515, "step": 4042 }, { "epoch": 0.39, "grad_norm": 0.2567507749894945, "learning_rate": 0.0001889627104147762, "loss": 1.054, "step": 4043 }, { "epoch": 0.39, "grad_norm": 0.3048904231170557, "learning_rate": 0.00018895548473550866, "loss": 1.0738, "step": 4044 }, { "epoch": 0.39, "grad_norm": 0.27093062547894164, "learning_rate": 0.00018894825683007417, "loss": 1.1361, "step": 4045 }, { "epoch": 0.39, "grad_norm": 0.2907476290882117, "learning_rate": 0.00018894102669865368, "loss": 1.2182, "step": 4046 }, { "epoch": 0.39, "grad_norm": 0.27932158114529015, "learning_rate": 0.0001889337943414281, "loss": 1.0292, "step": 4047 }, { "epoch": 0.39, "grad_norm": 0.303163185427342, "learning_rate": 0.00018892655975857842, "loss": 1.0809, "step": 4048 }, { "epoch": 0.39, "grad_norm": 0.28389248942158896, "learning_rate": 0.0001889193229502857, "loss": 0.9802, "step": 4049 }, { "epoch": 0.39, "grad_norm": 0.23743556853791362, "learning_rate": 0.000188912083916731, "loss": 1.0415, "step": 4050 }, { "epoch": 0.39, "grad_norm": 0.2607618534899767, "learning_rate": 0.00018890484265809558, "loss": 1.1187, "step": 4051 }, { "epoch": 0.39, "grad_norm": 0.282981189123625, "learning_rate": 0.00018889759917456057, "loss": 1.1592, "step": 4052 }, { "epoch": 0.39, "grad_norm": 0.2875209522933082, "learning_rate": 0.00018889035346630726, "loss": 1.1722, "step": 4053 }, { "epoch": 0.39, "grad_norm": 0.28469142917045703, "learning_rate": 0.000188883105533517, "loss": 1.0714, "step": 4054 }, { "epoch": 0.39, "grad_norm": 0.27563667585065, "learning_rate": 0.00018887585537637116, "loss": 0.9172, "step": 4055 }, { "epoch": 0.39, "grad_norm": 0.2735767551268766, "learning_rate": 0.00018886860299505118, "loss": 1.1189, "step": 4056 }, { "epoch": 0.39, "grad_norm": 0.30194229419916335, "learning_rate": 0.00018886134838973857, "loss": 1.0886, "step": 4057 }, { "epoch": 0.39, "grad_norm": 0.2932088348927251, "learning_rate": 0.00018885409156061488, "loss": 1.115, "step": 4058 }, { "epoch": 0.39, "grad_norm": 0.27110810019888465, "learning_rate": 0.00018884683250786167, "loss": 1.0398, "step": 4059 }, { "epoch": 0.39, "grad_norm": 0.33237664113893134, "learning_rate": 0.00018883957123166066, "loss": 1.1189, "step": 4060 }, { "epoch": 0.39, "grad_norm": 0.2507604291402785, "learning_rate": 0.00018883230773219354, "loss": 1.0053, "step": 4061 }, { "epoch": 0.39, "grad_norm": 0.2972413921137987, "learning_rate": 0.00018882504200964207, "loss": 1.1487, "step": 4062 }, { "epoch": 0.39, "grad_norm": 0.26170579658272336, "learning_rate": 0.00018881777406418816, "loss": 1.1655, "step": 4063 }, { "epoch": 0.39, "grad_norm": 0.30453124626017863, "learning_rate": 0.00018881050389601357, "loss": 0.9793, "step": 4064 }, { "epoch": 0.39, "grad_norm": 0.27274778300480246, "learning_rate": 0.00018880323150530034, "loss": 1.019, "step": 4065 }, { "epoch": 0.39, "grad_norm": 0.2597880316659366, "learning_rate": 0.0001887959568922304, "loss": 1.13, "step": 4066 }, { "epoch": 0.39, "grad_norm": 0.30696703463261027, "learning_rate": 0.00018878868005698586, "loss": 1.068, "step": 4067 }, { "epoch": 0.39, "grad_norm": 0.254482856569276, "learning_rate": 0.0001887814009997488, "loss": 1.156, "step": 4068 }, { "epoch": 0.39, "grad_norm": 0.29100192996613405, "learning_rate": 0.00018877411972070135, "loss": 1.1195, "step": 4069 }, { "epoch": 0.39, "grad_norm": 0.2592354961216204, "learning_rate": 0.0001887668362200258, "loss": 1.1087, "step": 4070 }, { "epoch": 0.39, "grad_norm": 0.28232033919806615, "learning_rate": 0.00018875955049790438, "loss": 1.0374, "step": 4071 }, { "epoch": 0.39, "grad_norm": 0.2710379764058582, "learning_rate": 0.00018875226255451942, "loss": 1.0692, "step": 4072 }, { "epoch": 0.39, "grad_norm": 0.3211139224345676, "learning_rate": 0.00018874497239005332, "loss": 1.0422, "step": 4073 }, { "epoch": 0.39, "grad_norm": 0.26011924754300747, "learning_rate": 0.0001887376800046885, "loss": 1.0197, "step": 4074 }, { "epoch": 0.39, "grad_norm": 0.2596113402217534, "learning_rate": 0.00018873038539860747, "loss": 1.0965, "step": 4075 }, { "epoch": 0.39, "grad_norm": 0.2666964093730143, "learning_rate": 0.0001887230885719928, "loss": 1.1003, "step": 4076 }, { "epoch": 0.39, "grad_norm": 0.24755453906410413, "learning_rate": 0.00018871578952502703, "loss": 1.0753, "step": 4077 }, { "epoch": 0.39, "grad_norm": 0.25330256715469024, "learning_rate": 0.0001887084882578929, "loss": 1.0962, "step": 4078 }, { "epoch": 0.39, "grad_norm": 0.2664417858327983, "learning_rate": 0.00018870118477077309, "loss": 1.1026, "step": 4079 }, { "epoch": 0.39, "grad_norm": 0.2887769686304296, "learning_rate": 0.00018869387906385044, "loss": 1.085, "step": 4080 }, { "epoch": 0.39, "grad_norm": 0.2945622145717073, "learning_rate": 0.00018868657113730764, "loss": 1.0453, "step": 4081 }, { "epoch": 0.39, "grad_norm": 0.2914962906932826, "learning_rate": 0.0001886792609913277, "loss": 1.1669, "step": 4082 }, { "epoch": 0.39, "grad_norm": 0.2982094102526812, "learning_rate": 0.00018867194862609354, "loss": 1.1398, "step": 4083 }, { "epoch": 0.39, "grad_norm": 0.2943744623045212, "learning_rate": 0.0001886646340417881, "loss": 1.1049, "step": 4084 }, { "epoch": 0.39, "grad_norm": 0.2808334985745818, "learning_rate": 0.0001886573172385945, "loss": 1.0434, "step": 4085 }, { "epoch": 0.39, "grad_norm": 0.28765243366041743, "learning_rate": 0.0001886499982166958, "loss": 1.1537, "step": 4086 }, { "epoch": 0.39, "grad_norm": 0.2838538473305344, "learning_rate": 0.0001886426769762752, "loss": 1.1621, "step": 4087 }, { "epoch": 0.39, "grad_norm": 0.2779123375968445, "learning_rate": 0.00018863535351751586, "loss": 1.0426, "step": 4088 }, { "epoch": 0.39, "grad_norm": 0.26234781180349137, "learning_rate": 0.00018862802784060115, "loss": 1.1284, "step": 4089 }, { "epoch": 0.39, "grad_norm": 0.27792379078088625, "learning_rate": 0.00018862069994571428, "loss": 0.9789, "step": 4090 }, { "epoch": 0.39, "grad_norm": 0.3213818567797798, "learning_rate": 0.00018861336983303875, "loss": 1.0857, "step": 4091 }, { "epoch": 0.39, "grad_norm": 0.2942679485146761, "learning_rate": 0.0001886060375027579, "loss": 1.0244, "step": 4092 }, { "epoch": 0.39, "grad_norm": 0.2934982143646054, "learning_rate": 0.0001885987029550553, "loss": 1.0908, "step": 4093 }, { "epoch": 0.39, "grad_norm": 0.2705085832069158, "learning_rate": 0.00018859136619011447, "loss": 1.0971, "step": 4094 }, { "epoch": 0.39, "grad_norm": 0.2645736238106595, "learning_rate": 0.00018858402720811905, "loss": 1.0806, "step": 4095 }, { "epoch": 0.39, "grad_norm": 0.25805002703777125, "learning_rate": 0.00018857668600925264, "loss": 1.0815, "step": 4096 }, { "epoch": 0.39, "grad_norm": 0.2768106728894951, "learning_rate": 0.00018856934259369902, "loss": 1.0963, "step": 4097 }, { "epoch": 0.39, "grad_norm": 0.2952517992902994, "learning_rate": 0.00018856199696164194, "loss": 1.0913, "step": 4098 }, { "epoch": 0.39, "grad_norm": 0.25015646961672283, "learning_rate": 0.0001885546491132652, "loss": 1.0521, "step": 4099 }, { "epoch": 0.39, "grad_norm": 0.2933146156325368, "learning_rate": 0.00018854729904875273, "loss": 1.1188, "step": 4100 }, { "epoch": 0.39, "grad_norm": 0.2906550664804429, "learning_rate": 0.00018853994676828846, "loss": 1.1433, "step": 4101 }, { "epoch": 0.39, "grad_norm": 0.26884740955424163, "learning_rate": 0.00018853259227205634, "loss": 1.1072, "step": 4102 }, { "epoch": 0.39, "grad_norm": 0.3113311790033681, "learning_rate": 0.0001885252355602405, "loss": 1.1282, "step": 4103 }, { "epoch": 0.39, "grad_norm": 0.28155446924580935, "learning_rate": 0.00018851787663302498, "loss": 1.083, "step": 4104 }, { "epoch": 0.39, "grad_norm": 0.29809592194674794, "learning_rate": 0.00018851051549059397, "loss": 1.0569, "step": 4105 }, { "epoch": 0.39, "grad_norm": 0.24384181868224059, "learning_rate": 0.0001885031521331317, "loss": 1.0561, "step": 4106 }, { "epoch": 0.39, "grad_norm": 0.27108319846787104, "learning_rate": 0.0001884957865608224, "loss": 1.058, "step": 4107 }, { "epoch": 0.39, "grad_norm": 0.2943767607152993, "learning_rate": 0.00018848841877385045, "loss": 1.1393, "step": 4108 }, { "epoch": 0.39, "grad_norm": 0.29474696198802675, "learning_rate": 0.00018848104877240015, "loss": 1.1804, "step": 4109 }, { "epoch": 0.39, "grad_norm": 0.2951250932743141, "learning_rate": 0.00018847367655665606, "loss": 1.2261, "step": 4110 }, { "epoch": 0.39, "grad_norm": 0.2608593829479291, "learning_rate": 0.0001884663021268026, "loss": 1.0769, "step": 4111 }, { "epoch": 0.39, "grad_norm": 0.29600065532000475, "learning_rate": 0.0001884589254830243, "loss": 1.0772, "step": 4112 }, { "epoch": 0.39, "grad_norm": 0.2990313407932539, "learning_rate": 0.0001884515466255058, "loss": 1.1775, "step": 4113 }, { "epoch": 0.39, "grad_norm": 0.2843669340987528, "learning_rate": 0.00018844416555443178, "loss": 1.1374, "step": 4114 }, { "epoch": 0.39, "grad_norm": 0.2508179755628321, "learning_rate": 0.00018843678226998693, "loss": 1.1355, "step": 4115 }, { "epoch": 0.39, "grad_norm": 0.2754853279028242, "learning_rate": 0.000188429396772356, "loss": 1.0528, "step": 4116 }, { "epoch": 0.39, "grad_norm": 0.2692652041119292, "learning_rate": 0.00018842200906172386, "loss": 1.081, "step": 4117 }, { "epoch": 0.39, "grad_norm": 0.2581020541470848, "learning_rate": 0.00018841461913827537, "loss": 1.1674, "step": 4118 }, { "epoch": 0.39, "grad_norm": 0.30950720631732437, "learning_rate": 0.0001884072270021955, "loss": 1.1665, "step": 4119 }, { "epoch": 0.39, "grad_norm": 0.2485313737562046, "learning_rate": 0.00018839983265366917, "loss": 1.1825, "step": 4120 }, { "epoch": 0.39, "grad_norm": 0.2644284997612591, "learning_rate": 0.0001883924360928815, "loss": 1.1264, "step": 4121 }, { "epoch": 0.39, "grad_norm": 0.29112561286000443, "learning_rate": 0.0001883850373200175, "loss": 1.1339, "step": 4122 }, { "epoch": 0.39, "grad_norm": 0.27090825993266066, "learning_rate": 0.00018837763633526247, "loss": 1.1998, "step": 4123 }, { "epoch": 0.39, "grad_norm": 0.3146596625378274, "learning_rate": 0.0001883702331388015, "loss": 1.1325, "step": 4124 }, { "epoch": 0.39, "grad_norm": 0.31943559497066387, "learning_rate": 0.00018836282773081992, "loss": 1.0685, "step": 4125 }, { "epoch": 0.39, "grad_norm": 0.3067968110428308, "learning_rate": 0.00018835542011150303, "loss": 1.1561, "step": 4126 }, { "epoch": 0.39, "grad_norm": 0.27469103129370837, "learning_rate": 0.00018834801028103627, "loss": 1.0606, "step": 4127 }, { "epoch": 0.39, "grad_norm": 0.2565388108324173, "learning_rate": 0.00018834059823960497, "loss": 1.1264, "step": 4128 }, { "epoch": 0.4, "grad_norm": 0.26620235976121565, "learning_rate": 0.0001883331839873947, "loss": 0.9498, "step": 4129 }, { "epoch": 0.4, "grad_norm": 0.2736769679639783, "learning_rate": 0.00018832576752459099, "loss": 1.1824, "step": 4130 }, { "epoch": 0.4, "grad_norm": 0.28039373078224655, "learning_rate": 0.00018831834885137943, "loss": 1.0567, "step": 4131 }, { "epoch": 0.4, "grad_norm": 0.28360004650576354, "learning_rate": 0.00018831092796794572, "loss": 1.1355, "step": 4132 }, { "epoch": 0.4, "grad_norm": 0.3007266751162003, "learning_rate": 0.0001883035048744755, "loss": 1.1805, "step": 4133 }, { "epoch": 0.4, "grad_norm": 0.27426609206190533, "learning_rate": 0.00018829607957115458, "loss": 1.1591, "step": 4134 }, { "epoch": 0.4, "grad_norm": 0.2975062542878613, "learning_rate": 0.00018828865205816877, "loss": 1.1326, "step": 4135 }, { "epoch": 0.4, "grad_norm": 0.24138546192014423, "learning_rate": 0.00018828122233570396, "loss": 1.0062, "step": 4136 }, { "epoch": 0.4, "grad_norm": 0.3259428242836633, "learning_rate": 0.00018827379040394607, "loss": 1.0638, "step": 4137 }, { "epoch": 0.4, "grad_norm": 0.2893519126284774, "learning_rate": 0.00018826635626308113, "loss": 1.0938, "step": 4138 }, { "epoch": 0.4, "grad_norm": 0.2649524956937564, "learning_rate": 0.00018825891991329513, "loss": 1.0869, "step": 4139 }, { "epoch": 0.4, "grad_norm": 0.28942290188391856, "learning_rate": 0.00018825148135477417, "loss": 0.9653, "step": 4140 }, { "epoch": 0.4, "grad_norm": 0.303875931112067, "learning_rate": 0.00018824404058770443, "loss": 1.1875, "step": 4141 }, { "epoch": 0.4, "grad_norm": 0.27790521276702573, "learning_rate": 0.00018823659761227216, "loss": 1.1442, "step": 4142 }, { "epoch": 0.4, "grad_norm": 0.2499769206341798, "learning_rate": 0.00018822915242866354, "loss": 0.9754, "step": 4143 }, { "epoch": 0.4, "grad_norm": 0.30156640795095413, "learning_rate": 0.00018822170503706494, "loss": 1.1384, "step": 4144 }, { "epoch": 0.4, "grad_norm": 0.2551627106250161, "learning_rate": 0.00018821425543766275, "loss": 0.9835, "step": 4145 }, { "epoch": 0.4, "grad_norm": 0.2699256367377609, "learning_rate": 0.00018820680363064335, "loss": 1.0811, "step": 4146 }, { "epoch": 0.4, "grad_norm": 0.2685887519613943, "learning_rate": 0.00018819934961619323, "loss": 1.1884, "step": 4147 }, { "epoch": 0.4, "grad_norm": 0.27522442156799526, "learning_rate": 0.000188191893394499, "loss": 1.1393, "step": 4148 }, { "epoch": 0.4, "grad_norm": 0.25866933190885216, "learning_rate": 0.0001881844349657472, "loss": 1.0642, "step": 4149 }, { "epoch": 0.4, "grad_norm": 0.2767798366498473, "learning_rate": 0.00018817697433012447, "loss": 1.1294, "step": 4150 }, { "epoch": 0.4, "grad_norm": 0.25741272329454395, "learning_rate": 0.00018816951148781756, "loss": 1.2252, "step": 4151 }, { "epoch": 0.4, "grad_norm": 0.2703985849781623, "learning_rate": 0.00018816204643901322, "loss": 1.0734, "step": 4152 }, { "epoch": 0.4, "grad_norm": 0.3049033588778724, "learning_rate": 0.00018815457918389822, "loss": 1.053, "step": 4153 }, { "epoch": 0.4, "grad_norm": 0.22456734765420605, "learning_rate": 0.00018814710972265953, "loss": 0.9969, "step": 4154 }, { "epoch": 0.4, "grad_norm": 0.2773921790872893, "learning_rate": 0.00018813963805548397, "loss": 1.0376, "step": 4155 }, { "epoch": 0.4, "grad_norm": 0.34700507029136146, "learning_rate": 0.0001881321641825586, "loss": 1.1532, "step": 4156 }, { "epoch": 0.4, "grad_norm": 0.3113422889257847, "learning_rate": 0.00018812468810407043, "loss": 1.0595, "step": 4157 }, { "epoch": 0.4, "grad_norm": 0.29393485580712975, "learning_rate": 0.00018811720982020655, "loss": 1.1448, "step": 4158 }, { "epoch": 0.4, "grad_norm": 0.2958948874029899, "learning_rate": 0.00018810972933115412, "loss": 1.1131, "step": 4159 }, { "epoch": 0.4, "grad_norm": 0.2465662069981653, "learning_rate": 0.00018810224663710033, "loss": 0.8469, "step": 4160 }, { "epoch": 0.4, "grad_norm": 0.2876082268093369, "learning_rate": 0.00018809476173823247, "loss": 1.0811, "step": 4161 }, { "epoch": 0.4, "grad_norm": 0.3097135244404772, "learning_rate": 0.0001880872746347378, "loss": 1.1378, "step": 4162 }, { "epoch": 0.4, "grad_norm": 0.2793139934409337, "learning_rate": 0.00018807978532680374, "loss": 1.0819, "step": 4163 }, { "epoch": 0.4, "grad_norm": 0.2657493887824555, "learning_rate": 0.0001880722938146177, "loss": 1.1998, "step": 4164 }, { "epoch": 0.4, "grad_norm": 0.2504323188479509, "learning_rate": 0.00018806480009836716, "loss": 1.136, "step": 4165 }, { "epoch": 0.4, "grad_norm": 0.2770886792479756, "learning_rate": 0.00018805730417823964, "loss": 1.0626, "step": 4166 }, { "epoch": 0.4, "grad_norm": 0.28327442888376786, "learning_rate": 0.00018804980605442273, "loss": 1.2524, "step": 4167 }, { "epoch": 0.4, "grad_norm": 0.2738803155422817, "learning_rate": 0.00018804230572710411, "loss": 1.2178, "step": 4168 }, { "epoch": 0.4, "grad_norm": 0.2621464862724536, "learning_rate": 0.00018803480319647145, "loss": 1.1003, "step": 4169 }, { "epoch": 0.4, "grad_norm": 0.2829875367760781, "learning_rate": 0.0001880272984627125, "loss": 1.1652, "step": 4170 }, { "epoch": 0.4, "grad_norm": 0.25110962790800634, "learning_rate": 0.00018801979152601508, "loss": 1.0741, "step": 4171 }, { "epoch": 0.4, "grad_norm": 0.3021846850869496, "learning_rate": 0.0001880122823865671, "loss": 1.1427, "step": 4172 }, { "epoch": 0.4, "grad_norm": 0.25970989561092755, "learning_rate": 0.00018800477104455638, "loss": 1.1105, "step": 4173 }, { "epoch": 0.4, "grad_norm": 0.3037823815880924, "learning_rate": 0.00018799725750017098, "loss": 1.04, "step": 4174 }, { "epoch": 0.4, "grad_norm": 0.28340725697761254, "learning_rate": 0.00018798974175359892, "loss": 1.0287, "step": 4175 }, { "epoch": 0.4, "grad_norm": 0.2907181694596812, "learning_rate": 0.00018798222380502825, "loss": 1.1459, "step": 4176 }, { "epoch": 0.4, "grad_norm": 0.2798317614131016, "learning_rate": 0.00018797470365464718, "loss": 1.0269, "step": 4177 }, { "epoch": 0.4, "grad_norm": 0.2451699568307834, "learning_rate": 0.0001879671813026438, "loss": 1.0953, "step": 4178 }, { "epoch": 0.4, "grad_norm": 0.26115132881368164, "learning_rate": 0.00018795965674920647, "loss": 0.95, "step": 4179 }, { "epoch": 0.4, "grad_norm": 0.28240121314882904, "learning_rate": 0.00018795212999452344, "loss": 1.0437, "step": 4180 }, { "epoch": 0.4, "grad_norm": 0.30408179576379274, "learning_rate": 0.00018794460103878306, "loss": 1.1509, "step": 4181 }, { "epoch": 0.4, "grad_norm": 0.2613648521070335, "learning_rate": 0.00018793706988217378, "loss": 1.0649, "step": 4182 }, { "epoch": 0.4, "grad_norm": 0.2807710295201798, "learning_rate": 0.00018792953652488405, "loss": 1.2016, "step": 4183 }, { "epoch": 0.4, "grad_norm": 0.27595401782163975, "learning_rate": 0.0001879220009671024, "loss": 1.12, "step": 4184 }, { "epoch": 0.4, "grad_norm": 0.28286269456870644, "learning_rate": 0.00018791446320901747, "loss": 1.0609, "step": 4185 }, { "epoch": 0.4, "grad_norm": 0.28563194802618863, "learning_rate": 0.0001879069232508178, "loss": 1.0778, "step": 4186 }, { "epoch": 0.4, "grad_norm": 0.270002889739277, "learning_rate": 0.00018789938109269215, "loss": 1.0788, "step": 4187 }, { "epoch": 0.4, "grad_norm": 0.25196325847643697, "learning_rate": 0.00018789183673482924, "loss": 1.1108, "step": 4188 }, { "epoch": 0.4, "grad_norm": 0.25601666120815847, "learning_rate": 0.00018788429017741785, "loss": 1.043, "step": 4189 }, { "epoch": 0.4, "grad_norm": 0.2743217941272287, "learning_rate": 0.0001878767414206469, "loss": 1.0379, "step": 4190 }, { "epoch": 0.4, "grad_norm": 0.2971315421501434, "learning_rate": 0.00018786919046470527, "loss": 1.1175, "step": 4191 }, { "epoch": 0.4, "grad_norm": 0.24701289964452056, "learning_rate": 0.0001878616373097819, "loss": 1.047, "step": 4192 }, { "epoch": 0.4, "grad_norm": 0.28093581228292824, "learning_rate": 0.00018785408195606587, "loss": 1.1656, "step": 4193 }, { "epoch": 0.4, "grad_norm": 0.2738956970254292, "learning_rate": 0.0001878465244037462, "loss": 1.0897, "step": 4194 }, { "epoch": 0.4, "grad_norm": 0.3300931758890875, "learning_rate": 0.00018783896465301205, "loss": 1.1008, "step": 4195 }, { "epoch": 0.4, "grad_norm": 0.2339082478589511, "learning_rate": 0.0001878314027040526, "loss": 1.1647, "step": 4196 }, { "epoch": 0.4, "grad_norm": 0.2575164996133438, "learning_rate": 0.0001878238385570571, "loss": 1.1316, "step": 4197 }, { "epoch": 0.4, "grad_norm": 0.28065885782328104, "learning_rate": 0.00018781627221221484, "loss": 1.1204, "step": 4198 }, { "epoch": 0.4, "grad_norm": 0.27240538013841803, "learning_rate": 0.0001878087036697152, "loss": 1.1995, "step": 4199 }, { "epoch": 0.4, "grad_norm": 0.28177346756683214, "learning_rate": 0.00018780113292974756, "loss": 1.0969, "step": 4200 }, { "epoch": 0.4, "grad_norm": 0.2869344753327611, "learning_rate": 0.00018779355999250135, "loss": 1.02, "step": 4201 }, { "epoch": 0.4, "grad_norm": 0.317258761790818, "learning_rate": 0.00018778598485816618, "loss": 1.0979, "step": 4202 }, { "epoch": 0.4, "grad_norm": 0.28088629903560547, "learning_rate": 0.00018777840752693152, "loss": 1.0249, "step": 4203 }, { "epoch": 0.4, "grad_norm": 0.2796415407726609, "learning_rate": 0.00018777082799898705, "loss": 1.2466, "step": 4204 }, { "epoch": 0.4, "grad_norm": 0.28573228367442755, "learning_rate": 0.00018776324627452247, "loss": 1.1008, "step": 4205 }, { "epoch": 0.4, "grad_norm": 0.36653558077222376, "learning_rate": 0.0001877556623537275, "loss": 1.1889, "step": 4206 }, { "epoch": 0.4, "grad_norm": 0.257060176220037, "learning_rate": 0.00018774807623679192, "loss": 1.096, "step": 4207 }, { "epoch": 0.4, "grad_norm": 0.26689142065335364, "learning_rate": 0.00018774048792390559, "loss": 1.1396, "step": 4208 }, { "epoch": 0.4, "grad_norm": 0.27797803343333893, "learning_rate": 0.0001877328974152584, "loss": 1.0059, "step": 4209 }, { "epoch": 0.4, "grad_norm": 0.26890403496189735, "learning_rate": 0.00018772530471104028, "loss": 1.1074, "step": 4210 }, { "epoch": 0.4, "grad_norm": 0.29380901154741923, "learning_rate": 0.00018771770981144132, "loss": 1.1014, "step": 4211 }, { "epoch": 0.4, "grad_norm": 0.2777299581369351, "learning_rate": 0.00018771011271665153, "loss": 1.0938, "step": 4212 }, { "epoch": 0.4, "grad_norm": 0.26403372932978386, "learning_rate": 0.00018770251342686104, "loss": 1.1595, "step": 4213 }, { "epoch": 0.4, "grad_norm": 0.28636002615157274, "learning_rate": 0.00018769491194226006, "loss": 1.0455, "step": 4214 }, { "epoch": 0.4, "grad_norm": 0.2844974569194305, "learning_rate": 0.00018768730826303876, "loss": 1.0772, "step": 4215 }, { "epoch": 0.4, "grad_norm": 0.25117753184917296, "learning_rate": 0.0001876797023893875, "loss": 1.1215, "step": 4216 }, { "epoch": 0.4, "grad_norm": 0.25290828237696705, "learning_rate": 0.00018767209432149652, "loss": 1.0751, "step": 4217 }, { "epoch": 0.4, "grad_norm": 0.2641125806261801, "learning_rate": 0.0001876644840595563, "loss": 1.1248, "step": 4218 }, { "epoch": 0.4, "grad_norm": 0.26789458127024735, "learning_rate": 0.00018765687160375732, "loss": 1.058, "step": 4219 }, { "epoch": 0.4, "grad_norm": 0.2656234103720025, "learning_rate": 0.00018764925695428998, "loss": 1.0255, "step": 4220 }, { "epoch": 0.4, "grad_norm": 0.24975067381842872, "learning_rate": 0.00018764164011134495, "loss": 1.1212, "step": 4221 }, { "epoch": 0.4, "grad_norm": 0.2395528550635482, "learning_rate": 0.00018763402107511276, "loss": 0.9989, "step": 4222 }, { "epoch": 0.4, "grad_norm": 0.2532908262725297, "learning_rate": 0.00018762639984578412, "loss": 1.1901, "step": 4223 }, { "epoch": 0.4, "grad_norm": 0.31719585665339967, "learning_rate": 0.00018761877642354977, "loss": 1.2269, "step": 4224 }, { "epoch": 0.4, "grad_norm": 0.31874436060610234, "learning_rate": 0.00018761115080860046, "loss": 0.9891, "step": 4225 }, { "epoch": 0.4, "grad_norm": 0.29050294955105876, "learning_rate": 0.00018760352300112705, "loss": 1.1358, "step": 4226 }, { "epoch": 0.4, "grad_norm": 0.27399249257978747, "learning_rate": 0.00018759589300132041, "loss": 1.0868, "step": 4227 }, { "epoch": 0.4, "grad_norm": 0.2696276778720872, "learning_rate": 0.00018758826080937148, "loss": 1.1893, "step": 4228 }, { "epoch": 0.4, "grad_norm": 0.28473533529326145, "learning_rate": 0.00018758062642547133, "loss": 1.1131, "step": 4229 }, { "epoch": 0.4, "grad_norm": 0.27137640986272094, "learning_rate": 0.00018757298984981092, "loss": 0.976, "step": 4230 }, { "epoch": 0.4, "grad_norm": 0.3035287993677886, "learning_rate": 0.0001875653510825814, "loss": 1.2036, "step": 4231 }, { "epoch": 0.4, "grad_norm": 0.2730310702863054, "learning_rate": 0.00018755771012397393, "loss": 1.1172, "step": 4232 }, { "epoch": 0.4, "grad_norm": 0.2515152904223668, "learning_rate": 0.00018755006697417976, "loss": 1.0117, "step": 4233 }, { "epoch": 0.41, "grad_norm": 0.2628436719293594, "learning_rate": 0.00018754242163339014, "loss": 1.0782, "step": 4234 }, { "epoch": 0.41, "grad_norm": 0.26021326272006884, "learning_rate": 0.0001875347741017964, "loss": 1.1058, "step": 4235 }, { "epoch": 0.41, "grad_norm": 0.2739556248198109, "learning_rate": 0.0001875271243795899, "loss": 0.9982, "step": 4236 }, { "epoch": 0.41, "grad_norm": 0.30134796736592306, "learning_rate": 0.00018751947246696212, "loss": 1.0949, "step": 4237 }, { "epoch": 0.41, "grad_norm": 0.271764675869869, "learning_rate": 0.00018751181836410455, "loss": 1.0182, "step": 4238 }, { "epoch": 0.41, "grad_norm": 0.24250081027639273, "learning_rate": 0.0001875041620712087, "loss": 1.0884, "step": 4239 }, { "epoch": 0.41, "grad_norm": 0.2869641568770455, "learning_rate": 0.0001874965035884662, "loss": 1.1567, "step": 4240 }, { "epoch": 0.41, "grad_norm": 0.2969381271520231, "learning_rate": 0.00018748884291606874, "loss": 1.0765, "step": 4241 }, { "epoch": 0.41, "grad_norm": 0.26178640355726157, "learning_rate": 0.00018748118005420798, "loss": 1.1142, "step": 4242 }, { "epoch": 0.41, "grad_norm": 0.2800257202847653, "learning_rate": 0.0001874735150030757, "loss": 1.1046, "step": 4243 }, { "epoch": 0.41, "grad_norm": 0.2944863333696688, "learning_rate": 0.00018746584776286376, "loss": 1.038, "step": 4244 }, { "epoch": 0.41, "grad_norm": 0.30216624143418275, "learning_rate": 0.00018745817833376398, "loss": 1.2037, "step": 4245 }, { "epoch": 0.41, "grad_norm": 0.2663185997548643, "learning_rate": 0.00018745050671596834, "loss": 1.1279, "step": 4246 }, { "epoch": 0.41, "grad_norm": 0.2447130737177253, "learning_rate": 0.00018744283290966882, "loss": 1.0917, "step": 4247 }, { "epoch": 0.41, "grad_norm": 0.290268659831609, "learning_rate": 0.00018743515691505743, "loss": 1.031, "step": 4248 }, { "epoch": 0.41, "grad_norm": 0.28648517218701464, "learning_rate": 0.0001874274787323263, "loss": 1.0971, "step": 4249 }, { "epoch": 0.41, "grad_norm": 0.2682660047938872, "learning_rate": 0.00018741979836166755, "loss": 1.1424, "step": 4250 }, { "epoch": 0.41, "grad_norm": 0.3030211943382245, "learning_rate": 0.00018741211580327344, "loss": 1.254, "step": 4251 }, { "epoch": 0.41, "grad_norm": 0.3098473572044177, "learning_rate": 0.00018740443105733613, "loss": 0.9803, "step": 4252 }, { "epoch": 0.41, "grad_norm": 0.2797263417102469, "learning_rate": 0.00018739674412404807, "loss": 1.0723, "step": 4253 }, { "epoch": 0.41, "grad_norm": 0.27611130556479674, "learning_rate": 0.00018738905500360154, "loss": 1.0926, "step": 4254 }, { "epoch": 0.41, "grad_norm": 0.2787572653472917, "learning_rate": 0.00018738136369618897, "loss": 1.0504, "step": 4255 }, { "epoch": 0.41, "grad_norm": 0.28676439879084453, "learning_rate": 0.00018737367020200285, "loss": 1.0677, "step": 4256 }, { "epoch": 0.41, "grad_norm": 0.2898408979989211, "learning_rate": 0.00018736597452123575, "loss": 1.0629, "step": 4257 }, { "epoch": 0.41, "grad_norm": 0.265340677178945, "learning_rate": 0.00018735827665408022, "loss": 1.0293, "step": 4258 }, { "epoch": 0.41, "grad_norm": 0.2910287914202519, "learning_rate": 0.0001873505766007289, "loss": 0.9698, "step": 4259 }, { "epoch": 0.41, "grad_norm": 0.2899174476618089, "learning_rate": 0.00018734287436137452, "loss": 1.1953, "step": 4260 }, { "epoch": 0.41, "grad_norm": 0.2566780706363218, "learning_rate": 0.0001873351699362098, "loss": 1.0803, "step": 4261 }, { "epoch": 0.41, "grad_norm": 0.25026487846526846, "learning_rate": 0.00018732746332542758, "loss": 1.0512, "step": 4262 }, { "epoch": 0.41, "grad_norm": 0.2634865581228525, "learning_rate": 0.00018731975452922073, "loss": 1.0902, "step": 4263 }, { "epoch": 0.41, "grad_norm": 0.2739235925675129, "learning_rate": 0.0001873120435477821, "loss": 1.2044, "step": 4264 }, { "epoch": 0.41, "grad_norm": 0.3392678199356469, "learning_rate": 0.00018730433038130473, "loss": 1.1055, "step": 4265 }, { "epoch": 0.41, "grad_norm": 0.2394900592357321, "learning_rate": 0.0001872966150299816, "loss": 1.0701, "step": 4266 }, { "epoch": 0.41, "grad_norm": 0.2700640038637131, "learning_rate": 0.00018728889749400584, "loss": 1.0162, "step": 4267 }, { "epoch": 0.41, "grad_norm": 0.2912889000502306, "learning_rate": 0.00018728117777357055, "loss": 1.1419, "step": 4268 }, { "epoch": 0.41, "grad_norm": 0.2760619669010758, "learning_rate": 0.00018727345586886892, "loss": 1.0408, "step": 4269 }, { "epoch": 0.41, "grad_norm": 0.2927376911193287, "learning_rate": 0.00018726573178009422, "loss": 1.0906, "step": 4270 }, { "epoch": 0.41, "grad_norm": 0.25519561721394524, "learning_rate": 0.00018725800550743976, "loss": 1.0385, "step": 4271 }, { "epoch": 0.41, "grad_norm": 0.2830277309204389, "learning_rate": 0.00018725027705109886, "loss": 1.0272, "step": 4272 }, { "epoch": 0.41, "grad_norm": 0.2664819202376281, "learning_rate": 0.00018724254641126493, "loss": 1.1779, "step": 4273 }, { "epoch": 0.41, "grad_norm": 0.2574469843738646, "learning_rate": 0.00018723481358813145, "loss": 1.0864, "step": 4274 }, { "epoch": 0.41, "grad_norm": 0.2757193054824571, "learning_rate": 0.00018722707858189193, "loss": 1.0145, "step": 4275 }, { "epoch": 0.41, "grad_norm": 0.29257610913040466, "learning_rate": 0.00018721934139273998, "loss": 0.9679, "step": 4276 }, { "epoch": 0.41, "grad_norm": 0.289091197361481, "learning_rate": 0.00018721160202086914, "loss": 1.092, "step": 4277 }, { "epoch": 0.41, "grad_norm": 0.24344559699327611, "learning_rate": 0.0001872038604664732, "loss": 1.1082, "step": 4278 }, { "epoch": 0.41, "grad_norm": 0.2810770812752845, "learning_rate": 0.0001871961167297458, "loss": 1.0673, "step": 4279 }, { "epoch": 0.41, "grad_norm": 0.2861371909971846, "learning_rate": 0.0001871883708108808, "loss": 1.0974, "step": 4280 }, { "epoch": 0.41, "grad_norm": 0.31326751338971903, "learning_rate": 0.00018718062271007204, "loss": 1.1654, "step": 4281 }, { "epoch": 0.41, "grad_norm": 0.2695673863745955, "learning_rate": 0.00018717287242751341, "loss": 1.045, "step": 4282 }, { "epoch": 0.41, "grad_norm": 0.3078415478161515, "learning_rate": 0.00018716511996339885, "loss": 1.0805, "step": 4283 }, { "epoch": 0.41, "grad_norm": 0.26900716775364764, "learning_rate": 0.00018715736531792237, "loss": 1.1007, "step": 4284 }, { "epoch": 0.41, "grad_norm": 0.28023295676688076, "learning_rate": 0.00018714960849127804, "loss": 1.0815, "step": 4285 }, { "epoch": 0.41, "grad_norm": 0.2898989628594484, "learning_rate": 0.00018714184948366, "loss": 1.0418, "step": 4286 }, { "epoch": 0.41, "grad_norm": 0.26061571588826044, "learning_rate": 0.00018713408829526242, "loss": 1.1429, "step": 4287 }, { "epoch": 0.41, "grad_norm": 0.2745641023863704, "learning_rate": 0.0001871263249262795, "loss": 1.1773, "step": 4288 }, { "epoch": 0.41, "grad_norm": 0.26470950342417765, "learning_rate": 0.00018711855937690556, "loss": 1.1311, "step": 4289 }, { "epoch": 0.41, "grad_norm": 0.29468498976148555, "learning_rate": 0.00018711079164733491, "loss": 1.0414, "step": 4290 }, { "epoch": 0.41, "grad_norm": 0.26226927485345436, "learning_rate": 0.00018710302173776194, "loss": 1.0876, "step": 4291 }, { "epoch": 0.41, "grad_norm": 0.24393119618376538, "learning_rate": 0.00018709524964838115, "loss": 1.0936, "step": 4292 }, { "epoch": 0.41, "grad_norm": 0.2871335693752671, "learning_rate": 0.00018708747537938696, "loss": 1.0862, "step": 4293 }, { "epoch": 0.41, "grad_norm": 0.25454898572621976, "learning_rate": 0.00018707969893097399, "loss": 0.9553, "step": 4294 }, { "epoch": 0.41, "grad_norm": 0.2612510649191512, "learning_rate": 0.0001870719203033368, "loss": 1.015, "step": 4295 }, { "epoch": 0.41, "grad_norm": 0.2735824234461878, "learning_rate": 0.0001870641394966701, "loss": 1.1366, "step": 4296 }, { "epoch": 0.41, "grad_norm": 0.2561249258130775, "learning_rate": 0.00018705635651116857, "loss": 0.9771, "step": 4297 }, { "epoch": 0.41, "grad_norm": 0.2724097621208985, "learning_rate": 0.00018704857134702705, "loss": 1.0373, "step": 4298 }, { "epoch": 0.41, "grad_norm": 0.2624090546858682, "learning_rate": 0.00018704078400444028, "loss": 1.0986, "step": 4299 }, { "epoch": 0.41, "grad_norm": 0.2933893818570987, "learning_rate": 0.0001870329944836032, "loss": 1.0659, "step": 4300 }, { "epoch": 0.41, "grad_norm": 0.25403733960977537, "learning_rate": 0.00018702520278471074, "loss": 1.1726, "step": 4301 }, { "epoch": 0.41, "grad_norm": 0.27542609220308545, "learning_rate": 0.00018701740890795788, "loss": 1.1237, "step": 4302 }, { "epoch": 0.41, "grad_norm": 0.2536691041486995, "learning_rate": 0.0001870096128535397, "loss": 0.9981, "step": 4303 }, { "epoch": 0.41, "grad_norm": 0.2732550236694586, "learning_rate": 0.00018700181462165126, "loss": 1.0504, "step": 4304 }, { "epoch": 0.41, "grad_norm": 0.26940609611280475, "learning_rate": 0.0001869940142124877, "loss": 1.0977, "step": 4305 }, { "epoch": 0.41, "grad_norm": 0.2912691081559738, "learning_rate": 0.0001869862116262443, "loss": 1.0703, "step": 4306 }, { "epoch": 0.41, "grad_norm": 0.27335596729191, "learning_rate": 0.00018697840686311628, "loss": 1.1206, "step": 4307 }, { "epoch": 0.41, "grad_norm": 0.2882836785046496, "learning_rate": 0.00018697059992329895, "loss": 1.0942, "step": 4308 }, { "epoch": 0.41, "grad_norm": 0.2736335086651974, "learning_rate": 0.0001869627908069877, "loss": 1.09, "step": 4309 }, { "epoch": 0.41, "grad_norm": 0.25546265700852766, "learning_rate": 0.00018695497951437795, "loss": 1.1653, "step": 4310 }, { "epoch": 0.41, "grad_norm": 0.27099441480074316, "learning_rate": 0.0001869471660456652, "loss": 1.0797, "step": 4311 }, { "epoch": 0.41, "grad_norm": 0.2734354890739324, "learning_rate": 0.00018693935040104497, "loss": 1.0753, "step": 4312 }, { "epoch": 0.41, "grad_norm": 0.3009809400535224, "learning_rate": 0.00018693153258071286, "loss": 1.1689, "step": 4313 }, { "epoch": 0.41, "grad_norm": 0.29754474015094556, "learning_rate": 0.00018692371258486451, "loss": 1.0379, "step": 4314 }, { "epoch": 0.41, "grad_norm": 0.2996282403269194, "learning_rate": 0.00018691589041369564, "loss": 1.1081, "step": 4315 }, { "epoch": 0.41, "grad_norm": 0.24314970149389548, "learning_rate": 0.000186908066067402, "loss": 1.0464, "step": 4316 }, { "epoch": 0.41, "grad_norm": 0.24701089925472527, "learning_rate": 0.00018690023954617932, "loss": 1.1741, "step": 4317 }, { "epoch": 0.41, "grad_norm": 0.27607286639486517, "learning_rate": 0.0001868924108502236, "loss": 1.0594, "step": 4318 }, { "epoch": 0.41, "grad_norm": 0.3017023954760616, "learning_rate": 0.00018688457997973065, "loss": 1.0294, "step": 4319 }, { "epoch": 0.41, "grad_norm": 0.24937940795309205, "learning_rate": 0.00018687674693489647, "loss": 1.0421, "step": 4320 }, { "epoch": 0.41, "grad_norm": 0.23892217344372102, "learning_rate": 0.00018686891171591712, "loss": 1.0908, "step": 4321 }, { "epoch": 0.41, "grad_norm": 0.28068063323508174, "learning_rate": 0.00018686107432298868, "loss": 1.1319, "step": 4322 }, { "epoch": 0.41, "grad_norm": 0.26786810608469297, "learning_rate": 0.00018685323475630723, "loss": 1.0355, "step": 4323 }, { "epoch": 0.41, "grad_norm": 0.2435421171517636, "learning_rate": 0.000186845393016069, "loss": 1.0141, "step": 4324 }, { "epoch": 0.41, "grad_norm": 0.27884414310128647, "learning_rate": 0.00018683754910247025, "loss": 1.1473, "step": 4325 }, { "epoch": 0.41, "grad_norm": 0.2890938734276742, "learning_rate": 0.00018682970301570726, "loss": 1.1203, "step": 4326 }, { "epoch": 0.41, "grad_norm": 0.2769615698559789, "learning_rate": 0.00018682185475597636, "loss": 1.0541, "step": 4327 }, { "epoch": 0.41, "grad_norm": 0.2785590604016616, "learning_rate": 0.00018681400432347397, "loss": 1.0544, "step": 4328 }, { "epoch": 0.41, "grad_norm": 0.29418313155446657, "learning_rate": 0.00018680615171839658, "loss": 1.1175, "step": 4329 }, { "epoch": 0.41, "grad_norm": 0.3025538424684769, "learning_rate": 0.00018679829694094068, "loss": 1.2113, "step": 4330 }, { "epoch": 0.41, "grad_norm": 0.2910730421005133, "learning_rate": 0.00018679043999130288, "loss": 1.132, "step": 4331 }, { "epoch": 0.41, "grad_norm": 0.29253393692155966, "learning_rate": 0.00018678258086967975, "loss": 1.095, "step": 4332 }, { "epoch": 0.41, "grad_norm": 0.28305224842813903, "learning_rate": 0.00018677471957626797, "loss": 1.1026, "step": 4333 }, { "epoch": 0.41, "grad_norm": 0.2924260897779223, "learning_rate": 0.0001867668561112643, "loss": 1.0386, "step": 4334 }, { "epoch": 0.41, "grad_norm": 0.293768434817011, "learning_rate": 0.00018675899047486557, "loss": 1.0448, "step": 4335 }, { "epoch": 0.41, "grad_norm": 0.2941288478482527, "learning_rate": 0.00018675112266726854, "loss": 1.0662, "step": 4336 }, { "epoch": 0.41, "grad_norm": 0.2957044806998864, "learning_rate": 0.00018674325268867016, "loss": 0.9872, "step": 4337 }, { "epoch": 0.42, "grad_norm": 0.3170426741111006, "learning_rate": 0.00018673538053926735, "loss": 1.0532, "step": 4338 }, { "epoch": 0.42, "grad_norm": 0.3000921754928047, "learning_rate": 0.00018672750621925714, "loss": 1.1413, "step": 4339 }, { "epoch": 0.42, "grad_norm": 0.2998372677155844, "learning_rate": 0.00018671962972883658, "loss": 1.1425, "step": 4340 }, { "epoch": 0.42, "grad_norm": 0.2645728037896027, "learning_rate": 0.00018671175106820277, "loss": 1.0747, "step": 4341 }, { "epoch": 0.42, "grad_norm": 0.28183953656999067, "learning_rate": 0.00018670387023755295, "loss": 0.9737, "step": 4342 }, { "epoch": 0.42, "grad_norm": 0.256299564446901, "learning_rate": 0.00018669598723708422, "loss": 1.1139, "step": 4343 }, { "epoch": 0.42, "grad_norm": 0.26935203909823185, "learning_rate": 0.00018668810206699395, "loss": 1.0717, "step": 4344 }, { "epoch": 0.42, "grad_norm": 0.23352656548172643, "learning_rate": 0.00018668021472747944, "loss": 1.0427, "step": 4345 }, { "epoch": 0.42, "grad_norm": 0.2778191273219274, "learning_rate": 0.00018667232521873807, "loss": 1.0442, "step": 4346 }, { "epoch": 0.42, "grad_norm": 0.2708092397068009, "learning_rate": 0.00018666443354096733, "loss": 1.1642, "step": 4347 }, { "epoch": 0.42, "grad_norm": 0.2628086430153175, "learning_rate": 0.00018665653969436466, "loss": 1.0567, "step": 4348 }, { "epoch": 0.42, "grad_norm": 0.2689137058024899, "learning_rate": 0.00018664864367912758, "loss": 1.055, "step": 4349 }, { "epoch": 0.42, "grad_norm": 0.27522982747891467, "learning_rate": 0.00018664074549545377, "loss": 0.9796, "step": 4350 }, { "epoch": 0.42, "grad_norm": 0.2585842007123571, "learning_rate": 0.00018663284514354084, "loss": 1.0592, "step": 4351 }, { "epoch": 0.42, "grad_norm": 0.2698306914125685, "learning_rate": 0.0001866249426235865, "loss": 1.084, "step": 4352 }, { "epoch": 0.42, "grad_norm": 0.30565305866824105, "learning_rate": 0.00018661703793578855, "loss": 1.0316, "step": 4353 }, { "epoch": 0.42, "grad_norm": 0.26114362713963013, "learning_rate": 0.00018660913108034478, "loss": 1.1677, "step": 4354 }, { "epoch": 0.42, "grad_norm": 0.3016235809178839, "learning_rate": 0.00018660122205745313, "loss": 1.1933, "step": 4355 }, { "epoch": 0.42, "grad_norm": 0.2863853851725708, "learning_rate": 0.0001865933108673114, "loss": 0.9809, "step": 4356 }, { "epoch": 0.42, "grad_norm": 0.2912386308622707, "learning_rate": 0.00018658539751011767, "loss": 1.1289, "step": 4357 }, { "epoch": 0.42, "grad_norm": 0.2776875205740512, "learning_rate": 0.00018657748198606995, "loss": 1.0565, "step": 4358 }, { "epoch": 0.42, "grad_norm": 0.2748712764200445, "learning_rate": 0.00018656956429536633, "loss": 1.0782, "step": 4359 }, { "epoch": 0.42, "grad_norm": 0.255166953661312, "learning_rate": 0.00018656164443820494, "loss": 1.0291, "step": 4360 }, { "epoch": 0.42, "grad_norm": 0.2665620217061101, "learning_rate": 0.00018655372241478403, "loss": 1.114, "step": 4361 }, { "epoch": 0.42, "grad_norm": 0.2544779490108305, "learning_rate": 0.00018654579822530179, "loss": 1.2265, "step": 4362 }, { "epoch": 0.42, "grad_norm": 0.23854453003136591, "learning_rate": 0.00018653787186995654, "loss": 1.033, "step": 4363 }, { "epoch": 0.42, "grad_norm": 0.2710547109595493, "learning_rate": 0.00018652994334894668, "loss": 1.0567, "step": 4364 }, { "epoch": 0.42, "grad_norm": 0.27823107428416105, "learning_rate": 0.00018652201266247063, "loss": 1.0357, "step": 4365 }, { "epoch": 0.42, "grad_norm": 0.3065920295229727, "learning_rate": 0.0001865140798107268, "loss": 0.9394, "step": 4366 }, { "epoch": 0.42, "grad_norm": 0.29060026851055215, "learning_rate": 0.00018650614479391378, "loss": 1.0582, "step": 4367 }, { "epoch": 0.42, "grad_norm": 0.2798363947842491, "learning_rate": 0.00018649820761223012, "loss": 1.1008, "step": 4368 }, { "epoch": 0.42, "grad_norm": 0.2524281791152147, "learning_rate": 0.00018649026826587442, "loss": 1.0788, "step": 4369 }, { "epoch": 0.42, "grad_norm": 0.3175783594422539, "learning_rate": 0.00018648232675504543, "loss": 1.0091, "step": 4370 }, { "epoch": 0.42, "grad_norm": 0.2605770620877395, "learning_rate": 0.00018647438307994185, "loss": 1.0315, "step": 4371 }, { "epoch": 0.42, "grad_norm": 0.3074184313253669, "learning_rate": 0.0001864664372407625, "loss": 1.11, "step": 4372 }, { "epoch": 0.42, "grad_norm": 0.2804678028999845, "learning_rate": 0.0001864584892377062, "loss": 1.0521, "step": 4373 }, { "epoch": 0.42, "grad_norm": 0.25285663187552027, "learning_rate": 0.00018645053907097187, "loss": 1.0407, "step": 4374 }, { "epoch": 0.42, "grad_norm": 0.32876163956484417, "learning_rate": 0.00018644258674075848, "loss": 0.9872, "step": 4375 }, { "epoch": 0.42, "grad_norm": 0.25777775379310247, "learning_rate": 0.000186434632247265, "loss": 1.0567, "step": 4376 }, { "epoch": 0.42, "grad_norm": 0.29826590288671295, "learning_rate": 0.00018642667559069055, "loss": 1.0757, "step": 4377 }, { "epoch": 0.42, "grad_norm": 0.27317079924980964, "learning_rate": 0.0001864187167712342, "loss": 1.1448, "step": 4378 }, { "epoch": 0.42, "grad_norm": 0.3070229873978444, "learning_rate": 0.00018641075578909518, "loss": 1.0379, "step": 4379 }, { "epoch": 0.42, "grad_norm": 0.2718896024874906, "learning_rate": 0.0001864027926444727, "loss": 1.1072, "step": 4380 }, { "epoch": 0.42, "grad_norm": 0.2620818390598612, "learning_rate": 0.00018639482733756601, "loss": 1.0537, "step": 4381 }, { "epoch": 0.42, "grad_norm": 0.2289284295029631, "learning_rate": 0.00018638685986857448, "loss": 1.0194, "step": 4382 }, { "epoch": 0.42, "grad_norm": 0.29452954070001014, "learning_rate": 0.00018637889023769748, "loss": 1.1051, "step": 4383 }, { "epoch": 0.42, "grad_norm": 0.24409495634240713, "learning_rate": 0.00018637091844513445, "loss": 1.089, "step": 4384 }, { "epoch": 0.42, "grad_norm": 0.2444141149494506, "learning_rate": 0.00018636294449108493, "loss": 0.9955, "step": 4385 }, { "epoch": 0.42, "grad_norm": 0.25886299860296796, "learning_rate": 0.00018635496837574844, "loss": 1.0442, "step": 4386 }, { "epoch": 0.42, "grad_norm": 0.2788912752950406, "learning_rate": 0.00018634699009932462, "loss": 1.1965, "step": 4387 }, { "epoch": 0.42, "grad_norm": 0.2671936525699648, "learning_rate": 0.00018633900966201304, "loss": 1.0077, "step": 4388 }, { "epoch": 0.42, "grad_norm": 0.28071558942931124, "learning_rate": 0.00018633102706401355, "loss": 1.1462, "step": 4389 }, { "epoch": 0.42, "grad_norm": 0.34829423214490346, "learning_rate": 0.00018632304230552582, "loss": 1.0623, "step": 4390 }, { "epoch": 0.42, "grad_norm": 0.24191132707171756, "learning_rate": 0.0001863150553867497, "loss": 1.0765, "step": 4391 }, { "epoch": 0.42, "grad_norm": 0.27536728708801206, "learning_rate": 0.00018630706630788505, "loss": 1.0193, "step": 4392 }, { "epoch": 0.42, "grad_norm": 0.2697499088562797, "learning_rate": 0.00018629907506913186, "loss": 1.1339, "step": 4393 }, { "epoch": 0.42, "grad_norm": 0.26546348358382443, "learning_rate": 0.00018629108167069006, "loss": 1.1509, "step": 4394 }, { "epoch": 0.42, "grad_norm": 0.2627657556993223, "learning_rate": 0.00018628308611275972, "loss": 1.1698, "step": 4395 }, { "epoch": 0.42, "grad_norm": 0.2681938025996225, "learning_rate": 0.00018627508839554093, "loss": 1.0299, "step": 4396 }, { "epoch": 0.42, "grad_norm": 0.26016966609609554, "learning_rate": 0.00018626708851923382, "loss": 1.132, "step": 4397 }, { "epoch": 0.42, "grad_norm": 0.28096390529787246, "learning_rate": 0.0001862590864840386, "loss": 1.1004, "step": 4398 }, { "epoch": 0.42, "grad_norm": 0.2914134240329839, "learning_rate": 0.00018625108229015555, "loss": 1.0259, "step": 4399 }, { "epoch": 0.42, "grad_norm": 0.27633727064382735, "learning_rate": 0.00018624307593778495, "loss": 1.0006, "step": 4400 }, { "epoch": 0.42, "grad_norm": 0.26565720029010786, "learning_rate": 0.00018623506742712715, "loss": 1.0687, "step": 4401 }, { "epoch": 0.42, "grad_norm": 0.24736946692607406, "learning_rate": 0.00018622705675838263, "loss": 1.1022, "step": 4402 }, { "epoch": 0.42, "grad_norm": 0.23869319091908384, "learning_rate": 0.0001862190439317518, "loss": 1.1136, "step": 4403 }, { "epoch": 0.42, "grad_norm": 0.26085938439573125, "learning_rate": 0.0001862110289474352, "loss": 1.0647, "step": 4404 }, { "epoch": 0.42, "grad_norm": 0.2615746057557681, "learning_rate": 0.00018620301180563342, "loss": 1.026, "step": 4405 }, { "epoch": 0.42, "grad_norm": 0.265385119223784, "learning_rate": 0.0001861949925065471, "loss": 1.0762, "step": 4406 }, { "epoch": 0.42, "grad_norm": 0.2621600644598475, "learning_rate": 0.00018618697105037693, "loss": 1.0342, "step": 4407 }, { "epoch": 0.42, "grad_norm": 0.28497896413460505, "learning_rate": 0.00018617894743732361, "loss": 1.0353, "step": 4408 }, { "epoch": 0.42, "grad_norm": 0.2537528026008887, "learning_rate": 0.00018617092166758802, "loss": 0.9979, "step": 4409 }, { "epoch": 0.42, "grad_norm": 0.273548636864498, "learning_rate": 0.00018616289374137092, "loss": 1.1967, "step": 4410 }, { "epoch": 0.42, "grad_norm": 0.32894219686067755, "learning_rate": 0.0001861548636588733, "loss": 1.082, "step": 4411 }, { "epoch": 0.42, "grad_norm": 0.26569438454996513, "learning_rate": 0.00018614683142029602, "loss": 0.8886, "step": 4412 }, { "epoch": 0.42, "grad_norm": 0.25788494163824816, "learning_rate": 0.00018613879702584013, "loss": 1.0712, "step": 4413 }, { "epoch": 0.42, "grad_norm": 0.272910401506832, "learning_rate": 0.00018613076047570678, "loss": 1.0169, "step": 4414 }, { "epoch": 0.42, "grad_norm": 0.27161074974964666, "learning_rate": 0.00018612272177009694, "loss": 1.1233, "step": 4415 }, { "epoch": 0.42, "grad_norm": 0.28453465725372484, "learning_rate": 0.0001861146809092119, "loss": 1.0644, "step": 4416 }, { "epoch": 0.42, "grad_norm": 0.26695328078689184, "learning_rate": 0.00018610663789325288, "loss": 1.0859, "step": 4417 }, { "epoch": 0.42, "grad_norm": 0.28644342117192206, "learning_rate": 0.00018609859272242108, "loss": 1.1014, "step": 4418 }, { "epoch": 0.42, "grad_norm": 0.28232507692715614, "learning_rate": 0.0001860905453969179, "loss": 1.0183, "step": 4419 }, { "epoch": 0.42, "grad_norm": 0.28914880496465856, "learning_rate": 0.0001860824959169447, "loss": 1.0779, "step": 4420 }, { "epoch": 0.42, "grad_norm": 0.27341424599975833, "learning_rate": 0.000186074444282703, "loss": 0.9407, "step": 4421 }, { "epoch": 0.42, "grad_norm": 0.31013888814670343, "learning_rate": 0.00018606639049439415, "loss": 1.1336, "step": 4422 }, { "epoch": 0.42, "grad_norm": 0.2804627470142664, "learning_rate": 0.00018605833455221984, "loss": 1.1237, "step": 4423 }, { "epoch": 0.42, "grad_norm": 0.27060408840367484, "learning_rate": 0.00018605027645638163, "loss": 1.0914, "step": 4424 }, { "epoch": 0.42, "grad_norm": 0.2949572609779446, "learning_rate": 0.00018604221620708113, "loss": 1.0142, "step": 4425 }, { "epoch": 0.42, "grad_norm": 0.2978775593613743, "learning_rate": 0.00018603415380452013, "loss": 1.0299, "step": 4426 }, { "epoch": 0.42, "grad_norm": 0.27422717885209724, "learning_rate": 0.00018602608924890034, "loss": 0.9475, "step": 4427 }, { "epoch": 0.42, "grad_norm": 0.24881221794442596, "learning_rate": 0.0001860180225404236, "loss": 1.0083, "step": 4428 }, { "epoch": 0.42, "grad_norm": 0.28296329045097385, "learning_rate": 0.00018600995367929182, "loss": 1.1519, "step": 4429 }, { "epoch": 0.42, "grad_norm": 0.3132917134168698, "learning_rate": 0.00018600188266570687, "loss": 1.1355, "step": 4430 }, { "epoch": 0.42, "grad_norm": 0.2656930447471748, "learning_rate": 0.00018599380949987072, "loss": 1.1187, "step": 4431 }, { "epoch": 0.42, "grad_norm": 0.29159128798152506, "learning_rate": 0.0001859857341819855, "loss": 1.0272, "step": 4432 }, { "epoch": 0.42, "grad_norm": 0.26189338837080134, "learning_rate": 0.00018597765671225322, "loss": 1.0971, "step": 4433 }, { "epoch": 0.42, "grad_norm": 0.28052538093117707, "learning_rate": 0.00018596957709087603, "loss": 1.0194, "step": 4434 }, { "epoch": 0.42, "grad_norm": 0.2652637453080487, "learning_rate": 0.0001859614953180562, "loss": 1.0746, "step": 4435 }, { "epoch": 0.42, "grad_norm": 0.2829916437751392, "learning_rate": 0.00018595341139399584, "loss": 1.1021, "step": 4436 }, { "epoch": 0.42, "grad_norm": 0.25812189396799773, "learning_rate": 0.0001859453253188974, "loss": 1.0858, "step": 4437 }, { "epoch": 0.42, "grad_norm": 0.26939995257598304, "learning_rate": 0.00018593723709296316, "loss": 1.0994, "step": 4438 }, { "epoch": 0.42, "grad_norm": 0.2860192381686427, "learning_rate": 0.00018592914671639553, "loss": 1.0819, "step": 4439 }, { "epoch": 0.42, "grad_norm": 0.2791379239990837, "learning_rate": 0.00018592105418939705, "loss": 1.0618, "step": 4440 }, { "epoch": 0.42, "grad_norm": 0.2740072559598784, "learning_rate": 0.00018591295951217015, "loss": 1.1346, "step": 4441 }, { "epoch": 0.42, "grad_norm": 0.2889247502983577, "learning_rate": 0.00018590486268491748, "loss": 1.0459, "step": 4442 }, { "epoch": 0.43, "grad_norm": 0.2978227410800553, "learning_rate": 0.0001858967637078416, "loss": 1.0501, "step": 4443 }, { "epoch": 0.43, "grad_norm": 0.3018285492782609, "learning_rate": 0.00018588866258114524, "loss": 1.0894, "step": 4444 }, { "epoch": 0.43, "grad_norm": 0.28346415215862286, "learning_rate": 0.0001858805593050311, "loss": 0.9718, "step": 4445 }, { "epoch": 0.43, "grad_norm": 0.26039184338131427, "learning_rate": 0.000185872453879702, "loss": 1.1089, "step": 4446 }, { "epoch": 0.43, "grad_norm": 0.3206288442226837, "learning_rate": 0.0001858643463053608, "loss": 1.0651, "step": 4447 }, { "epoch": 0.43, "grad_norm": 0.265449658618547, "learning_rate": 0.00018585623658221034, "loss": 1.0637, "step": 4448 }, { "epoch": 0.43, "grad_norm": 0.29272693696344226, "learning_rate": 0.0001858481247104536, "loss": 1.0931, "step": 4449 }, { "epoch": 0.43, "grad_norm": 0.29578928026152973, "learning_rate": 0.0001858400106902936, "loss": 1.1793, "step": 4450 }, { "epoch": 0.43, "grad_norm": 0.27791665518502323, "learning_rate": 0.00018583189452193338, "loss": 1.0318, "step": 4451 }, { "epoch": 0.43, "grad_norm": 0.28708172386603614, "learning_rate": 0.00018582377620557602, "loss": 1.0001, "step": 4452 }, { "epoch": 0.43, "grad_norm": 0.2945398920234109, "learning_rate": 0.0001858156557414248, "loss": 1.0574, "step": 4453 }, { "epoch": 0.43, "grad_norm": 0.23921340949830877, "learning_rate": 0.0001858075331296828, "loss": 1.0104, "step": 4454 }, { "epoch": 0.43, "grad_norm": 0.3568337762102039, "learning_rate": 0.00018579940837055338, "loss": 0.9811, "step": 4455 }, { "epoch": 0.43, "grad_norm": 0.3247650997857825, "learning_rate": 0.00018579128146423984, "loss": 1.1677, "step": 4456 }, { "epoch": 0.43, "grad_norm": 0.3235988446494336, "learning_rate": 0.00018578315241094554, "loss": 1.1063, "step": 4457 }, { "epoch": 0.43, "grad_norm": 0.2910859031590426, "learning_rate": 0.00018577502121087396, "loss": 1.1122, "step": 4458 }, { "epoch": 0.43, "grad_norm": 0.2855175850221935, "learning_rate": 0.00018576688786422856, "loss": 1.0616, "step": 4459 }, { "epoch": 0.43, "grad_norm": 0.24829230055660786, "learning_rate": 0.0001857587523712129, "loss": 1.0424, "step": 4460 }, { "epoch": 0.43, "grad_norm": 0.2840641696870963, "learning_rate": 0.00018575061473203054, "loss": 1.2132, "step": 4461 }, { "epoch": 0.43, "grad_norm": 0.27078441085947547, "learning_rate": 0.0001857424749468852, "loss": 1.0858, "step": 4462 }, { "epoch": 0.43, "grad_norm": 0.285656535390425, "learning_rate": 0.0001857343330159805, "loss": 1.0003, "step": 4463 }, { "epoch": 0.43, "grad_norm": 0.2687099265187277, "learning_rate": 0.00018572618893952024, "loss": 1.0664, "step": 4464 }, { "epoch": 0.43, "grad_norm": 0.3181574270847497, "learning_rate": 0.00018571804271770822, "loss": 0.9845, "step": 4465 }, { "epoch": 0.43, "grad_norm": 0.25226801914556535, "learning_rate": 0.0001857098943507483, "loss": 1.1452, "step": 4466 }, { "epoch": 0.43, "grad_norm": 0.3055598706754259, "learning_rate": 0.00018570174383884442, "loss": 1.0501, "step": 4467 }, { "epoch": 0.43, "grad_norm": 0.2382755864156548, "learning_rate": 0.00018569359118220056, "loss": 1.0189, "step": 4468 }, { "epoch": 0.43, "grad_norm": 0.26719151556229226, "learning_rate": 0.00018568543638102072, "loss": 1.1856, "step": 4469 }, { "epoch": 0.43, "grad_norm": 0.2671695573995491, "learning_rate": 0.00018567727943550897, "loss": 1.0382, "step": 4470 }, { "epoch": 0.43, "grad_norm": 0.27765193324490084, "learning_rate": 0.00018566912034586946, "loss": 1.2756, "step": 4471 }, { "epoch": 0.43, "grad_norm": 0.27186513834244824, "learning_rate": 0.00018566095911230638, "loss": 1.0309, "step": 4472 }, { "epoch": 0.43, "grad_norm": 0.29391579590132144, "learning_rate": 0.00018565279573502392, "loss": 1.1029, "step": 4473 }, { "epoch": 0.43, "grad_norm": 0.30436200911544314, "learning_rate": 0.00018564463021422645, "loss": 1.0607, "step": 4474 }, { "epoch": 0.43, "grad_norm": 0.29885017590562324, "learning_rate": 0.00018563646255011828, "loss": 1.1022, "step": 4475 }, { "epoch": 0.43, "grad_norm": 0.30524559756359343, "learning_rate": 0.0001856282927429038, "loss": 1.1297, "step": 4476 }, { "epoch": 0.43, "grad_norm": 0.2862736844725348, "learning_rate": 0.0001856201207927875, "loss": 1.0444, "step": 4477 }, { "epoch": 0.43, "grad_norm": 0.27357248947278107, "learning_rate": 0.00018561194669997386, "loss": 1.1338, "step": 4478 }, { "epoch": 0.43, "grad_norm": 0.2642596700408733, "learning_rate": 0.00018560377046466747, "loss": 1.0775, "step": 4479 }, { "epoch": 0.43, "grad_norm": 0.28175880695705074, "learning_rate": 0.00018559559208707288, "loss": 1.1622, "step": 4480 }, { "epoch": 0.43, "grad_norm": 0.29569453857541267, "learning_rate": 0.00018558741156739483, "loss": 1.0761, "step": 4481 }, { "epoch": 0.43, "grad_norm": 0.2751922546444621, "learning_rate": 0.000185579228905838, "loss": 1.0507, "step": 4482 }, { "epoch": 0.43, "grad_norm": 0.27122561641213205, "learning_rate": 0.00018557104410260722, "loss": 1.174, "step": 4483 }, { "epoch": 0.43, "grad_norm": 0.2912196997412804, "learning_rate": 0.00018556285715790724, "loss": 1.0137, "step": 4484 }, { "epoch": 0.43, "grad_norm": 0.2696109196990019, "learning_rate": 0.00018555466807194303, "loss": 0.8727, "step": 4485 }, { "epoch": 0.43, "grad_norm": 0.27947099836689787, "learning_rate": 0.00018554647684491943, "loss": 1.1609, "step": 4486 }, { "epoch": 0.43, "grad_norm": 0.26675781929194203, "learning_rate": 0.00018553828347704152, "loss": 1.0248, "step": 4487 }, { "epoch": 0.43, "grad_norm": 0.2588683196044053, "learning_rate": 0.00018553008796851428, "loss": 1.0563, "step": 4488 }, { "epoch": 0.43, "grad_norm": 0.27999097742810836, "learning_rate": 0.00018552189031954285, "loss": 1.0313, "step": 4489 }, { "epoch": 0.43, "grad_norm": 0.2895515425733008, "learning_rate": 0.00018551369053033237, "loss": 1.0168, "step": 4490 }, { "epoch": 0.43, "grad_norm": 0.2545597330861161, "learning_rate": 0.00018550548860108804, "loss": 1.0956, "step": 4491 }, { "epoch": 0.43, "grad_norm": 0.26512014338659784, "learning_rate": 0.00018549728453201513, "loss": 1.1138, "step": 4492 }, { "epoch": 0.43, "grad_norm": 0.27434370289340704, "learning_rate": 0.0001854890783233189, "loss": 1.0605, "step": 4493 }, { "epoch": 0.43, "grad_norm": 0.2936485774839461, "learning_rate": 0.0001854808699752048, "loss": 1.0747, "step": 4494 }, { "epoch": 0.43, "grad_norm": 0.27403335709343807, "learning_rate": 0.00018547265948787818, "loss": 1.0193, "step": 4495 }, { "epoch": 0.43, "grad_norm": 0.28508114808522955, "learning_rate": 0.00018546444686154455, "loss": 1.0224, "step": 4496 }, { "epoch": 0.43, "grad_norm": 0.3342643589174626, "learning_rate": 0.00018545623209640941, "loss": 1.0092, "step": 4497 }, { "epoch": 0.43, "grad_norm": 0.26332940264787036, "learning_rate": 0.0001854480151926784, "loss": 1.1283, "step": 4498 }, { "epoch": 0.43, "grad_norm": 0.30979393657373167, "learning_rate": 0.00018543979615055705, "loss": 1.1229, "step": 4499 }, { "epoch": 0.43, "grad_norm": 0.29782251228163314, "learning_rate": 0.00018543157497025113, "loss": 1.0053, "step": 4500 }, { "epoch": 0.43, "grad_norm": 0.26274351011419134, "learning_rate": 0.00018542335165196635, "loss": 1.1258, "step": 4501 }, { "epoch": 0.43, "grad_norm": 0.3085288258220842, "learning_rate": 0.00018541512619590854, "loss": 1.0965, "step": 4502 }, { "epoch": 0.43, "grad_norm": 0.30190633074116374, "learning_rate": 0.00018540689860228348, "loss": 1.2104, "step": 4503 }, { "epoch": 0.43, "grad_norm": 0.2754205944826528, "learning_rate": 0.0001853986688712971, "loss": 1.1199, "step": 4504 }, { "epoch": 0.43, "grad_norm": 0.2590596310721408, "learning_rate": 0.00018539043700315538, "loss": 1.1025, "step": 4505 }, { "epoch": 0.43, "grad_norm": 0.24485764570082005, "learning_rate": 0.0001853822029980643, "loss": 0.9729, "step": 4506 }, { "epoch": 0.43, "grad_norm": 0.27774076617481674, "learning_rate": 0.00018537396685622994, "loss": 1.103, "step": 4507 }, { "epoch": 0.43, "grad_norm": 0.2682689587887445, "learning_rate": 0.00018536572857785842, "loss": 1.105, "step": 4508 }, { "epoch": 0.43, "grad_norm": 0.29122611914479773, "learning_rate": 0.00018535748816315585, "loss": 1.1096, "step": 4509 }, { "epoch": 0.43, "grad_norm": 0.23291917772580553, "learning_rate": 0.0001853492456123285, "loss": 1.1044, "step": 4510 }, { "epoch": 0.43, "grad_norm": 0.33929513760098673, "learning_rate": 0.00018534100092558266, "loss": 1.1069, "step": 4511 }, { "epoch": 0.43, "grad_norm": 0.2541464302266371, "learning_rate": 0.00018533275410312464, "loss": 1.043, "step": 4512 }, { "epoch": 0.43, "grad_norm": 0.2729757201162812, "learning_rate": 0.0001853245051451608, "loss": 1.0917, "step": 4513 }, { "epoch": 0.43, "grad_norm": 0.2881511710111317, "learning_rate": 0.00018531625405189761, "loss": 1.0334, "step": 4514 }, { "epoch": 0.43, "grad_norm": 0.3004052066232476, "learning_rate": 0.00018530800082354153, "loss": 1.2131, "step": 4515 }, { "epoch": 0.43, "grad_norm": 0.296301949311051, "learning_rate": 0.0001852997454602991, "loss": 1.153, "step": 4516 }, { "epoch": 0.43, "grad_norm": 0.26666605205862975, "learning_rate": 0.00018529148796237696, "loss": 1.0267, "step": 4517 }, { "epoch": 0.43, "grad_norm": 0.27466066458804184, "learning_rate": 0.00018528322832998172, "loss": 1.0631, "step": 4518 }, { "epoch": 0.43, "grad_norm": 0.2913602893847047, "learning_rate": 0.0001852749665633201, "loss": 1.1576, "step": 4519 }, { "epoch": 0.43, "grad_norm": 0.2631891203792342, "learning_rate": 0.00018526670266259885, "loss": 1.0626, "step": 4520 }, { "epoch": 0.43, "grad_norm": 0.33347156766562963, "learning_rate": 0.00018525843662802477, "loss": 1.0737, "step": 4521 }, { "epoch": 0.43, "grad_norm": 0.2790606063839219, "learning_rate": 0.00018525016845980473, "loss": 1.0466, "step": 4522 }, { "epoch": 0.43, "grad_norm": 0.2836733588666177, "learning_rate": 0.00018524189815814565, "loss": 1.0932, "step": 4523 }, { "epoch": 0.43, "grad_norm": 0.3028905196190124, "learning_rate": 0.0001852336257232545, "loss": 1.0616, "step": 4524 }, { "epoch": 0.43, "grad_norm": 0.2845873599717972, "learning_rate": 0.00018522535115533828, "loss": 1.0551, "step": 4525 }, { "epoch": 0.43, "grad_norm": 0.24471061186570162, "learning_rate": 0.0001852170744546041, "loss": 1.0166, "step": 4526 }, { "epoch": 0.43, "grad_norm": 0.2877270171258153, "learning_rate": 0.00018520879562125905, "loss": 1.0145, "step": 4527 }, { "epoch": 0.43, "grad_norm": 0.2770560293900152, "learning_rate": 0.00018520051465551038, "loss": 1.1089, "step": 4528 }, { "epoch": 0.43, "grad_norm": 0.27958349827422824, "learning_rate": 0.00018519223155756526, "loss": 1.1539, "step": 4529 }, { "epoch": 0.43, "grad_norm": 0.24834333728465557, "learning_rate": 0.000185183946327631, "loss": 1.1034, "step": 4530 }, { "epoch": 0.43, "grad_norm": 0.2749131019387281, "learning_rate": 0.00018517565896591494, "loss": 0.9705, "step": 4531 }, { "epoch": 0.43, "grad_norm": 0.2717292532230968, "learning_rate": 0.00018516736947262453, "loss": 1.0913, "step": 4532 }, { "epoch": 0.43, "grad_norm": 0.22308007763116086, "learning_rate": 0.00018515907784796712, "loss": 1.0885, "step": 4533 }, { "epoch": 0.43, "grad_norm": 0.2723580969909008, "learning_rate": 0.00018515078409215029, "loss": 1.0822, "step": 4534 }, { "epoch": 0.43, "grad_norm": 0.2723449059110442, "learning_rate": 0.00018514248820538157, "loss": 1.1214, "step": 4535 }, { "epoch": 0.43, "grad_norm": 0.2985439043563554, "learning_rate": 0.0001851341901878686, "loss": 1.0749, "step": 4536 }, { "epoch": 0.43, "grad_norm": 0.26995390366164945, "learning_rate": 0.000185125890039819, "loss": 1.1233, "step": 4537 }, { "epoch": 0.43, "grad_norm": 0.2852650589577388, "learning_rate": 0.00018511758776144048, "loss": 1.135, "step": 4538 }, { "epoch": 0.43, "grad_norm": 0.25495506751354075, "learning_rate": 0.0001851092833529408, "loss": 1.0329, "step": 4539 }, { "epoch": 0.43, "grad_norm": 0.2788212963607499, "learning_rate": 0.0001851009768145279, "loss": 1.0452, "step": 4540 }, { "epoch": 0.43, "grad_norm": 0.29286510945056726, "learning_rate": 0.00018509266814640952, "loss": 1.0701, "step": 4541 }, { "epoch": 0.43, "grad_norm": 0.298753776585502, "learning_rate": 0.00018508435734879367, "loss": 1.0797, "step": 4542 }, { "epoch": 0.43, "grad_norm": 0.2893400312544143, "learning_rate": 0.00018507604442188826, "loss": 1.1236, "step": 4543 }, { "epoch": 0.43, "grad_norm": 0.297108222760269, "learning_rate": 0.0001850677293659014, "loss": 1.1958, "step": 4544 }, { "epoch": 0.43, "grad_norm": 0.3026573746982335, "learning_rate": 0.00018505941218104112, "loss": 1.0817, "step": 4545 }, { "epoch": 0.43, "grad_norm": 0.27275443083535883, "learning_rate": 0.00018505109286751564, "loss": 1.0752, "step": 4546 }, { "epoch": 0.44, "grad_norm": 0.27885346389765003, "learning_rate": 0.00018504277142553308, "loss": 1.0219, "step": 4547 }, { "epoch": 0.44, "grad_norm": 0.3180339146414116, "learning_rate": 0.00018503444785530172, "loss": 1.0987, "step": 4548 }, { "epoch": 0.44, "grad_norm": 0.2808155483149112, "learning_rate": 0.00018502612215702988, "loss": 1.073, "step": 4549 }, { "epoch": 0.44, "grad_norm": 0.25391611727242125, "learning_rate": 0.00018501779433092587, "loss": 1.0876, "step": 4550 }, { "epoch": 0.44, "grad_norm": 0.2261609383943027, "learning_rate": 0.00018500946437719813, "loss": 1.0297, "step": 4551 }, { "epoch": 0.44, "grad_norm": 0.2754443425644491, "learning_rate": 0.00018500113229605512, "loss": 1.082, "step": 4552 }, { "epoch": 0.44, "grad_norm": 0.27586769729030647, "learning_rate": 0.00018499279808770536, "loss": 1.137, "step": 4553 }, { "epoch": 0.44, "grad_norm": 0.31905598519632444, "learning_rate": 0.0001849844617523574, "loss": 1.1845, "step": 4554 }, { "epoch": 0.44, "grad_norm": 0.23729193980262855, "learning_rate": 0.00018497612329021988, "loss": 0.9061, "step": 4555 }, { "epoch": 0.44, "grad_norm": 0.2782155585428631, "learning_rate": 0.00018496778270150145, "loss": 1.1527, "step": 4556 }, { "epoch": 0.44, "grad_norm": 0.2710366956832406, "learning_rate": 0.0001849594399864109, "loss": 1.0705, "step": 4557 }, { "epoch": 0.44, "grad_norm": 0.22796189873292866, "learning_rate": 0.00018495109514515693, "loss": 1.0695, "step": 4558 }, { "epoch": 0.44, "grad_norm": 0.2831241401573468, "learning_rate": 0.00018494274817794842, "loss": 0.9582, "step": 4559 }, { "epoch": 0.44, "grad_norm": 0.2824780217557077, "learning_rate": 0.0001849343990849943, "loss": 1.0315, "step": 4560 }, { "epoch": 0.44, "grad_norm": 0.270556872472652, "learning_rate": 0.0001849260478665034, "loss": 1.0019, "step": 4561 }, { "epoch": 0.44, "grad_norm": 0.25499089437909833, "learning_rate": 0.00018491769452268482, "loss": 1.0728, "step": 4562 }, { "epoch": 0.44, "grad_norm": 0.2908571742460526, "learning_rate": 0.00018490933905374754, "loss": 1.0077, "step": 4563 }, { "epoch": 0.44, "grad_norm": 0.28487382701620256, "learning_rate": 0.0001849009814599007, "loss": 1.0629, "step": 4564 }, { "epoch": 0.44, "grad_norm": 0.26175206299618, "learning_rate": 0.00018489262174135345, "loss": 1.1802, "step": 4565 }, { "epoch": 0.44, "grad_norm": 0.2881582041583295, "learning_rate": 0.00018488425989831496, "loss": 0.9577, "step": 4566 }, { "epoch": 0.44, "grad_norm": 0.2833504131778664, "learning_rate": 0.00018487589593099455, "loss": 1.2034, "step": 4567 }, { "epoch": 0.44, "grad_norm": 0.2623734320038995, "learning_rate": 0.00018486752983960146, "loss": 1.1153, "step": 4568 }, { "epoch": 0.44, "grad_norm": 0.2522517398867482, "learning_rate": 0.00018485916162434515, "loss": 1.0618, "step": 4569 }, { "epoch": 0.44, "grad_norm": 0.24153711019667667, "learning_rate": 0.00018485079128543496, "loss": 1.0822, "step": 4570 }, { "epoch": 0.44, "grad_norm": 0.2882476151134348, "learning_rate": 0.0001848424188230804, "loss": 1.1657, "step": 4571 }, { "epoch": 0.44, "grad_norm": 0.2964951820277572, "learning_rate": 0.00018483404423749096, "loss": 1.1222, "step": 4572 }, { "epoch": 0.44, "grad_norm": 0.27784406217795626, "learning_rate": 0.00018482566752887628, "loss": 1.0545, "step": 4573 }, { "epoch": 0.44, "grad_norm": 0.26707431862694997, "learning_rate": 0.00018481728869744596, "loss": 1.0567, "step": 4574 }, { "epoch": 0.44, "grad_norm": 0.3017539840667631, "learning_rate": 0.00018480890774340964, "loss": 1.1777, "step": 4575 }, { "epoch": 0.44, "grad_norm": 0.3113524234554564, "learning_rate": 0.00018480052466697715, "loss": 1.1013, "step": 4576 }, { "epoch": 0.44, "grad_norm": 0.308306701323019, "learning_rate": 0.00018479213946835822, "loss": 1.1211, "step": 4577 }, { "epoch": 0.44, "grad_norm": 0.2710413287763368, "learning_rate": 0.00018478375214776272, "loss": 1.1882, "step": 4578 }, { "epoch": 0.44, "grad_norm": 0.31364976820788937, "learning_rate": 0.00018477536270540052, "loss": 1.0807, "step": 4579 }, { "epoch": 0.44, "grad_norm": 0.25048911528630063, "learning_rate": 0.00018476697114148158, "loss": 1.0959, "step": 4580 }, { "epoch": 0.44, "grad_norm": 0.2663984503966, "learning_rate": 0.00018475857745621594, "loss": 1.0236, "step": 4581 }, { "epoch": 0.44, "grad_norm": 0.26614213923311075, "learning_rate": 0.00018475018164981362, "loss": 1.0751, "step": 4582 }, { "epoch": 0.44, "grad_norm": 0.2814084782252928, "learning_rate": 0.00018474178372248474, "loss": 0.9935, "step": 4583 }, { "epoch": 0.44, "grad_norm": 0.25798031430362306, "learning_rate": 0.00018473338367443946, "loss": 0.9938, "step": 4584 }, { "epoch": 0.44, "grad_norm": 0.2865428384395744, "learning_rate": 0.00018472498150588803, "loss": 1.0878, "step": 4585 }, { "epoch": 0.44, "grad_norm": 0.3077652321307214, "learning_rate": 0.00018471657721704066, "loss": 1.1567, "step": 4586 }, { "epoch": 0.44, "grad_norm": 0.31452880319912974, "learning_rate": 0.0001847081708081077, "loss": 1.1304, "step": 4587 }, { "epoch": 0.44, "grad_norm": 0.2763324776356194, "learning_rate": 0.00018469976227929955, "loss": 1.0634, "step": 4588 }, { "epoch": 0.44, "grad_norm": 0.29691042342244073, "learning_rate": 0.0001846913516308266, "loss": 1.1236, "step": 4589 }, { "epoch": 0.44, "grad_norm": 0.2846334629692399, "learning_rate": 0.00018468293886289935, "loss": 0.9717, "step": 4590 }, { "epoch": 0.44, "grad_norm": 0.2668935421303119, "learning_rate": 0.00018467452397572833, "loss": 1.082, "step": 4591 }, { "epoch": 0.44, "grad_norm": 0.28376985073992644, "learning_rate": 0.00018466610696952416, "loss": 1.088, "step": 4592 }, { "epoch": 0.44, "grad_norm": 0.3014664421717131, "learning_rate": 0.00018465768784449742, "loss": 0.9671, "step": 4593 }, { "epoch": 0.44, "grad_norm": 0.27756706076476795, "learning_rate": 0.00018464926660085885, "loss": 1.0486, "step": 4594 }, { "epoch": 0.44, "grad_norm": 0.31276907084202593, "learning_rate": 0.00018464084323881918, "loss": 1.1944, "step": 4595 }, { "epoch": 0.44, "grad_norm": 0.2710547392306981, "learning_rate": 0.00018463241775858923, "loss": 1.0873, "step": 4596 }, { "epoch": 0.44, "grad_norm": 0.2726412664274147, "learning_rate": 0.00018462399016037982, "loss": 0.975, "step": 4597 }, { "epoch": 0.44, "grad_norm": 0.28160638881053474, "learning_rate": 0.00018461556044440186, "loss": 1.092, "step": 4598 }, { "epoch": 0.44, "grad_norm": 0.2725964852327896, "learning_rate": 0.00018460712861086633, "loss": 1.1128, "step": 4599 }, { "epoch": 0.44, "grad_norm": 0.27984134666173177, "learning_rate": 0.00018459869465998425, "loss": 1.1321, "step": 4600 }, { "epoch": 0.44, "grad_norm": 0.2863490711350883, "learning_rate": 0.00018459025859196663, "loss": 1.1366, "step": 4601 }, { "epoch": 0.44, "grad_norm": 0.2879506583130578, "learning_rate": 0.00018458182040702466, "loss": 1.1197, "step": 4602 }, { "epoch": 0.44, "grad_norm": 0.26719713518673577, "learning_rate": 0.00018457338010536946, "loss": 0.9944, "step": 4603 }, { "epoch": 0.44, "grad_norm": 0.3121012835925408, "learning_rate": 0.0001845649376872123, "loss": 1.198, "step": 4604 }, { "epoch": 0.44, "grad_norm": 0.30448939179247264, "learning_rate": 0.0001845564931527644, "loss": 0.9822, "step": 4605 }, { "epoch": 0.44, "grad_norm": 0.2735559381750156, "learning_rate": 0.00018454804650223713, "loss": 1.0978, "step": 4606 }, { "epoch": 0.44, "grad_norm": 0.2555127503868371, "learning_rate": 0.0001845395977358418, "loss": 1.0871, "step": 4607 }, { "epoch": 0.44, "grad_norm": 0.2784512056488346, "learning_rate": 0.00018453114685379, "loss": 1.1117, "step": 4608 }, { "epoch": 0.44, "grad_norm": 0.27412619492911333, "learning_rate": 0.0001845226938562931, "loss": 0.9858, "step": 4609 }, { "epoch": 0.44, "grad_norm": 0.3198996291332153, "learning_rate": 0.00018451423874356261, "loss": 1.0908, "step": 4610 }, { "epoch": 0.44, "grad_norm": 0.2893727277713405, "learning_rate": 0.00018450578151581022, "loss": 1.1565, "step": 4611 }, { "epoch": 0.44, "grad_norm": 0.3012087634762126, "learning_rate": 0.00018449732217324754, "loss": 1.1766, "step": 4612 }, { "epoch": 0.44, "grad_norm": 0.29522825483614856, "learning_rate": 0.00018448886071608625, "loss": 1.2191, "step": 4613 }, { "epoch": 0.44, "grad_norm": 0.27761023884520314, "learning_rate": 0.00018448039714453814, "loss": 1.2029, "step": 4614 }, { "epoch": 0.44, "grad_norm": 0.22548736584501833, "learning_rate": 0.000184471931458815, "loss": 1.0772, "step": 4615 }, { "epoch": 0.44, "grad_norm": 0.28551218282274554, "learning_rate": 0.00018446346365912867, "loss": 1.1001, "step": 4616 }, { "epoch": 0.44, "grad_norm": 0.29640261684492736, "learning_rate": 0.0001844549937456911, "loss": 1.0567, "step": 4617 }, { "epoch": 0.44, "grad_norm": 0.26167728449746586, "learning_rate": 0.0001844465217187142, "loss": 1.0648, "step": 4618 }, { "epoch": 0.44, "grad_norm": 0.27822656705075727, "learning_rate": 0.00018443804757841003, "loss": 1.1127, "step": 4619 }, { "epoch": 0.44, "grad_norm": 0.2904755993031546, "learning_rate": 0.00018442957132499069, "loss": 1.0909, "step": 4620 }, { "epoch": 0.44, "grad_norm": 0.28052003011517346, "learning_rate": 0.00018442109295866823, "loss": 1.0709, "step": 4621 }, { "epoch": 0.44, "grad_norm": 0.25770690828327825, "learning_rate": 0.00018441261247965487, "loss": 1.1385, "step": 4622 }, { "epoch": 0.44, "grad_norm": 0.256212600739427, "learning_rate": 0.00018440412988816283, "loss": 1.0721, "step": 4623 }, { "epoch": 0.44, "grad_norm": 0.2749581272974963, "learning_rate": 0.0001843956451844044, "loss": 1.1453, "step": 4624 }, { "epoch": 0.44, "grad_norm": 0.3046942593317576, "learning_rate": 0.0001843871583685919, "loss": 1.189, "step": 4625 }, { "epoch": 0.44, "grad_norm": 0.28752913128850266, "learning_rate": 0.00018437866944093773, "loss": 1.2043, "step": 4626 }, { "epoch": 0.44, "grad_norm": 0.2900180388718663, "learning_rate": 0.00018437017840165434, "loss": 1.1533, "step": 4627 }, { "epoch": 0.44, "grad_norm": 0.26073951801493656, "learning_rate": 0.0001843616852509542, "loss": 1.0558, "step": 4628 }, { "epoch": 0.44, "grad_norm": 0.3077527251913004, "learning_rate": 0.00018435318998904986, "loss": 1.0387, "step": 4629 }, { "epoch": 0.44, "grad_norm": 0.29483737359579054, "learning_rate": 0.00018434469261615393, "loss": 1.0913, "step": 4630 }, { "epoch": 0.44, "grad_norm": 0.2933924970818525, "learning_rate": 0.00018433619313247906, "loss": 1.0347, "step": 4631 }, { "epoch": 0.44, "grad_norm": 0.26212171413860036, "learning_rate": 0.00018432769153823797, "loss": 1.0702, "step": 4632 }, { "epoch": 0.44, "grad_norm": 0.2546684009326573, "learning_rate": 0.00018431918783364337, "loss": 1.028, "step": 4633 }, { "epoch": 0.44, "grad_norm": 0.24838393365899913, "learning_rate": 0.00018431068201890812, "loss": 1.0165, "step": 4634 }, { "epoch": 0.44, "grad_norm": 0.3099036311014584, "learning_rate": 0.00018430217409424505, "loss": 0.9548, "step": 4635 }, { "epoch": 0.44, "grad_norm": 0.2759430315688091, "learning_rate": 0.00018429366405986713, "loss": 1.149, "step": 4636 }, { "epoch": 0.44, "grad_norm": 0.2953749611748448, "learning_rate": 0.00018428515191598726, "loss": 1.1464, "step": 4637 }, { "epoch": 0.44, "grad_norm": 0.2654249301676195, "learning_rate": 0.0001842766376628185, "loss": 1.0327, "step": 4638 }, { "epoch": 0.44, "grad_norm": 0.2802886640388649, "learning_rate": 0.0001842681213005739, "loss": 1.1143, "step": 4639 }, { "epoch": 0.44, "grad_norm": 0.29564078608147715, "learning_rate": 0.00018425960282946661, "loss": 0.8881, "step": 4640 }, { "epoch": 0.44, "grad_norm": 0.2925251530239038, "learning_rate": 0.00018425108224970983, "loss": 1.1731, "step": 4641 }, { "epoch": 0.44, "grad_norm": 0.2900983438301286, "learning_rate": 0.00018424255956151674, "loss": 1.177, "step": 4642 }, { "epoch": 0.44, "grad_norm": 0.31019044957642683, "learning_rate": 0.00018423403476510065, "loss": 1.1355, "step": 4643 }, { "epoch": 0.44, "grad_norm": 0.3213591456521516, "learning_rate": 0.00018422550786067492, "loss": 1.077, "step": 4644 }, { "epoch": 0.44, "grad_norm": 0.2677075004163226, "learning_rate": 0.0001842169788484529, "loss": 1.0312, "step": 4645 }, { "epoch": 0.44, "grad_norm": 0.25726221862210036, "learning_rate": 0.0001842084477286481, "loss": 0.9965, "step": 4646 }, { "epoch": 0.44, "grad_norm": 0.29706048227507303, "learning_rate": 0.00018419991450147394, "loss": 1.0823, "step": 4647 }, { "epoch": 0.44, "grad_norm": 0.2567670140513427, "learning_rate": 0.000184191379167144, "loss": 1.0802, "step": 4648 }, { "epoch": 0.44, "grad_norm": 0.2730948766094166, "learning_rate": 0.00018418284172587188, "loss": 1.0743, "step": 4649 }, { "epoch": 0.44, "grad_norm": 0.2576764085593571, "learning_rate": 0.00018417430217787124, "loss": 1.0818, "step": 4650 }, { "epoch": 0.44, "grad_norm": 0.28479973480576726, "learning_rate": 0.00018416576052335582, "loss": 1.0513, "step": 4651 }, { "epoch": 0.45, "grad_norm": 0.28752546745083385, "learning_rate": 0.0001841572167625393, "loss": 1.1685, "step": 4652 }, { "epoch": 0.45, "grad_norm": 0.2846736795661952, "learning_rate": 0.00018414867089563557, "loss": 1.2087, "step": 4653 }, { "epoch": 0.45, "grad_norm": 0.27455162051832954, "learning_rate": 0.00018414012292285845, "loss": 1.0672, "step": 4654 }, { "epoch": 0.45, "grad_norm": 0.26145375283055017, "learning_rate": 0.00018413157284442186, "loss": 1.0861, "step": 4655 }, { "epoch": 0.45, "grad_norm": 0.2871748977574908, "learning_rate": 0.0001841230206605398, "loss": 1.168, "step": 4656 }, { "epoch": 0.45, "grad_norm": 0.26086270240508314, "learning_rate": 0.00018411446637142632, "loss": 1.1131, "step": 4657 }, { "epoch": 0.45, "grad_norm": 0.26968127971681205, "learning_rate": 0.0001841059099772954, "loss": 1.0536, "step": 4658 }, { "epoch": 0.45, "grad_norm": 0.2665866276637478, "learning_rate": 0.00018409735147836124, "loss": 1.2219, "step": 4659 }, { "epoch": 0.45, "grad_norm": 0.2788104688731091, "learning_rate": 0.000184088790874838, "loss": 1.0498, "step": 4660 }, { "epoch": 0.45, "grad_norm": 0.3042257990864148, "learning_rate": 0.00018408022816693994, "loss": 1.1032, "step": 4661 }, { "epoch": 0.45, "grad_norm": 0.23891458103453578, "learning_rate": 0.0001840716633548813, "loss": 1.1667, "step": 4662 }, { "epoch": 0.45, "grad_norm": 0.2668685305184029, "learning_rate": 0.00018406309643887649, "loss": 1.0313, "step": 4663 }, { "epoch": 0.45, "grad_norm": 0.26923096074860575, "learning_rate": 0.0001840545274191398, "loss": 1.096, "step": 4664 }, { "epoch": 0.45, "grad_norm": 0.26120691585456396, "learning_rate": 0.0001840459562958858, "loss": 1.0147, "step": 4665 }, { "epoch": 0.45, "grad_norm": 0.279287076136616, "learning_rate": 0.0001840373830693289, "loss": 1.12, "step": 4666 }, { "epoch": 0.45, "grad_norm": 0.2517878873784173, "learning_rate": 0.00018402880773968363, "loss": 1.0207, "step": 4667 }, { "epoch": 0.45, "grad_norm": 0.2554752277219163, "learning_rate": 0.00018402023030716469, "loss": 1.1272, "step": 4668 }, { "epoch": 0.45, "grad_norm": 0.24985971217131164, "learning_rate": 0.00018401165077198666, "loss": 1.0905, "step": 4669 }, { "epoch": 0.45, "grad_norm": 0.2515290610443281, "learning_rate": 0.0001840030691343643, "loss": 1.1553, "step": 4670 }, { "epoch": 0.45, "grad_norm": 0.2632716739172169, "learning_rate": 0.00018399448539451228, "loss": 1.0571, "step": 4671 }, { "epoch": 0.45, "grad_norm": 0.29388108923128464, "learning_rate": 0.00018398589955264552, "loss": 1.0905, "step": 4672 }, { "epoch": 0.45, "grad_norm": 0.34065302184228613, "learning_rate": 0.00018397731160897882, "loss": 1.118, "step": 4673 }, { "epoch": 0.45, "grad_norm": 0.26474232885623084, "learning_rate": 0.00018396872156372713, "loss": 1.1804, "step": 4674 }, { "epoch": 0.45, "grad_norm": 0.23096264177639642, "learning_rate": 0.00018396012941710542, "loss": 1.0124, "step": 4675 }, { "epoch": 0.45, "grad_norm": 0.26143495936449157, "learning_rate": 0.00018395153516932868, "loss": 1.0354, "step": 4676 }, { "epoch": 0.45, "grad_norm": 0.2909688243868478, "learning_rate": 0.00018394293882061203, "loss": 1.0682, "step": 4677 }, { "epoch": 0.45, "grad_norm": 0.2786275126719699, "learning_rate": 0.00018393434037117056, "loss": 1.1167, "step": 4678 }, { "epoch": 0.45, "grad_norm": 0.31697489334219714, "learning_rate": 0.0001839257398212195, "loss": 1.0991, "step": 4679 }, { "epoch": 0.45, "grad_norm": 0.27077228765384886, "learning_rate": 0.00018391713717097404, "loss": 0.9453, "step": 4680 }, { "epoch": 0.45, "grad_norm": 0.2762723786687761, "learning_rate": 0.0001839085324206495, "loss": 1.1394, "step": 4681 }, { "epoch": 0.45, "grad_norm": 0.2904037438324207, "learning_rate": 0.00018389992557046116, "loss": 0.9806, "step": 4682 }, { "epoch": 0.45, "grad_norm": 0.33416394497118107, "learning_rate": 0.00018389131662062449, "loss": 1.0519, "step": 4683 }, { "epoch": 0.45, "grad_norm": 0.280572824730302, "learning_rate": 0.00018388270557135488, "loss": 1.0679, "step": 4684 }, { "epoch": 0.45, "grad_norm": 0.318148944370186, "learning_rate": 0.00018387409242286786, "loss": 1.0326, "step": 4685 }, { "epoch": 0.45, "grad_norm": 0.2592469384169005, "learning_rate": 0.00018386547717537895, "loss": 1.154, "step": 4686 }, { "epoch": 0.45, "grad_norm": 0.24639073939502068, "learning_rate": 0.00018385685982910376, "loss": 1.0404, "step": 4687 }, { "epoch": 0.45, "grad_norm": 0.305988694893456, "learning_rate": 0.00018384824038425796, "loss": 1.0658, "step": 4688 }, { "epoch": 0.45, "grad_norm": 0.29161415294102544, "learning_rate": 0.00018383961884105724, "loss": 1.0198, "step": 4689 }, { "epoch": 0.45, "grad_norm": 0.2679552986606862, "learning_rate": 0.00018383099519971737, "loss": 1.081, "step": 4690 }, { "epoch": 0.45, "grad_norm": 0.27720828972286177, "learning_rate": 0.00018382236946045416, "loss": 1.1043, "step": 4691 }, { "epoch": 0.45, "grad_norm": 0.26516410427337717, "learning_rate": 0.0001838137416234835, "loss": 0.9742, "step": 4692 }, { "epoch": 0.45, "grad_norm": 0.27803791295610497, "learning_rate": 0.00018380511168902128, "loss": 1.0465, "step": 4693 }, { "epoch": 0.45, "grad_norm": 0.3744328239378393, "learning_rate": 0.00018379647965728344, "loss": 1.1089, "step": 4694 }, { "epoch": 0.45, "grad_norm": 0.2973942314834355, "learning_rate": 0.00018378784552848605, "loss": 1.0153, "step": 4695 }, { "epoch": 0.45, "grad_norm": 0.26659388968085823, "learning_rate": 0.00018377920930284515, "loss": 1.1312, "step": 4696 }, { "epoch": 0.45, "grad_norm": 0.25981005331538953, "learning_rate": 0.0001837705709805769, "loss": 1.0054, "step": 4697 }, { "epoch": 0.45, "grad_norm": 0.22213075334205962, "learning_rate": 0.00018376193056189745, "loss": 1.015, "step": 4698 }, { "epoch": 0.45, "grad_norm": 0.275135826725204, "learning_rate": 0.00018375328804702304, "loss": 0.965, "step": 4699 }, { "epoch": 0.45, "grad_norm": 0.29035632968889363, "learning_rate": 0.00018374464343617, "loss": 1.0289, "step": 4700 }, { "epoch": 0.45, "grad_norm": 0.27764015636676737, "learning_rate": 0.0001837359967295546, "loss": 1.0597, "step": 4701 }, { "epoch": 0.45, "grad_norm": 0.25677252798632294, "learning_rate": 0.00018372734792739323, "loss": 1.0364, "step": 4702 }, { "epoch": 0.45, "grad_norm": 0.3040774439028437, "learning_rate": 0.0001837186970299024, "loss": 1.157, "step": 4703 }, { "epoch": 0.45, "grad_norm": 0.2693075199539607, "learning_rate": 0.00018371004403729853, "loss": 1.0576, "step": 4704 }, { "epoch": 0.45, "grad_norm": 0.29196540371409324, "learning_rate": 0.0001837013889497982, "loss": 1.1876, "step": 4705 }, { "epoch": 0.45, "grad_norm": 0.2926321409398997, "learning_rate": 0.00018369273176761802, "loss": 1.1449, "step": 4706 }, { "epoch": 0.45, "grad_norm": 0.26895329592482214, "learning_rate": 0.00018368407249097466, "loss": 1.0901, "step": 4707 }, { "epoch": 0.45, "grad_norm": 0.2606964589821753, "learning_rate": 0.00018367541112008476, "loss": 1.1665, "step": 4708 }, { "epoch": 0.45, "grad_norm": 0.3222647861912937, "learning_rate": 0.0001836667476551651, "loss": 1.0423, "step": 4709 }, { "epoch": 0.45, "grad_norm": 0.28474742820692894, "learning_rate": 0.00018365808209643253, "loss": 1.0235, "step": 4710 }, { "epoch": 0.45, "grad_norm": 0.29857635596649795, "learning_rate": 0.00018364941444410385, "loss": 1.1574, "step": 4711 }, { "epoch": 0.45, "grad_norm": 0.3222102111839974, "learning_rate": 0.00018364074469839602, "loss": 1.1667, "step": 4712 }, { "epoch": 0.45, "grad_norm": 0.31112582358071006, "learning_rate": 0.00018363207285952595, "loss": 1.152, "step": 4713 }, { "epoch": 0.45, "grad_norm": 0.26706361932201095, "learning_rate": 0.00018362339892771072, "loss": 1.1072, "step": 4714 }, { "epoch": 0.45, "grad_norm": 0.3008566236074501, "learning_rate": 0.00018361472290316736, "loss": 1.0795, "step": 4715 }, { "epoch": 0.45, "grad_norm": 0.2802364960499926, "learning_rate": 0.00018360604478611303, "loss": 1.062, "step": 4716 }, { "epoch": 0.45, "grad_norm": 0.25442275739177317, "learning_rate": 0.00018359736457676488, "loss": 1.0775, "step": 4717 }, { "epoch": 0.45, "grad_norm": 0.28931573899470037, "learning_rate": 0.00018358868227534014, "loss": 1.1024, "step": 4718 }, { "epoch": 0.45, "grad_norm": 0.256028110974589, "learning_rate": 0.0001835799978820561, "loss": 1.1924, "step": 4719 }, { "epoch": 0.45, "grad_norm": 0.2645246955746137, "learning_rate": 0.00018357131139713008, "loss": 1.1301, "step": 4720 }, { "epoch": 0.45, "grad_norm": 0.3130330679624715, "learning_rate": 0.0001835626228207795, "loss": 1.0055, "step": 4721 }, { "epoch": 0.45, "grad_norm": 0.28521400562617844, "learning_rate": 0.00018355393215322173, "loss": 1.1705, "step": 4722 }, { "epoch": 0.45, "grad_norm": 0.23137236678760192, "learning_rate": 0.0001835452393946743, "loss": 1.0553, "step": 4723 }, { "epoch": 0.45, "grad_norm": 0.2716401502501313, "learning_rate": 0.00018353654454535473, "loss": 0.9819, "step": 4724 }, { "epoch": 0.45, "grad_norm": 0.25530206196101923, "learning_rate": 0.00018352784760548066, "loss": 1.0459, "step": 4725 }, { "epoch": 0.45, "grad_norm": 0.3028518361428745, "learning_rate": 0.0001835191485752697, "loss": 1.1051, "step": 4726 }, { "epoch": 0.45, "grad_norm": 0.266666477668348, "learning_rate": 0.00018351044745493957, "loss": 1.0546, "step": 4727 }, { "epoch": 0.45, "grad_norm": 0.270858102859276, "learning_rate": 0.000183501744244708, "loss": 1.1381, "step": 4728 }, { "epoch": 0.45, "grad_norm": 0.2626211922077772, "learning_rate": 0.0001834930389447928, "loss": 1.0571, "step": 4729 }, { "epoch": 0.45, "grad_norm": 0.25526998893008135, "learning_rate": 0.00018348433155541182, "loss": 1.0052, "step": 4730 }, { "epoch": 0.45, "grad_norm": 0.2578903383304173, "learning_rate": 0.000183475622076783, "loss": 0.9924, "step": 4731 }, { "epoch": 0.45, "grad_norm": 0.2413218945569206, "learning_rate": 0.00018346691050912423, "loss": 1.1513, "step": 4732 }, { "epoch": 0.45, "grad_norm": 0.2446885085320821, "learning_rate": 0.0001834581968526536, "loss": 1.0376, "step": 4733 }, { "epoch": 0.45, "grad_norm": 0.3680136627218759, "learning_rate": 0.00018344948110758912, "loss": 0.9561, "step": 4734 }, { "epoch": 0.45, "grad_norm": 0.2723843485796278, "learning_rate": 0.00018344076327414896, "loss": 1.0291, "step": 4735 }, { "epoch": 0.45, "grad_norm": 0.3027706624868208, "learning_rate": 0.00018343204335255123, "loss": 1.1075, "step": 4736 }, { "epoch": 0.45, "grad_norm": 0.27483257758567703, "learning_rate": 0.00018342332134301418, "loss": 1.0461, "step": 4737 }, { "epoch": 0.45, "grad_norm": 0.30639774434247813, "learning_rate": 0.00018341459724575612, "loss": 1.1396, "step": 4738 }, { "epoch": 0.45, "grad_norm": 0.30130628173745716, "learning_rate": 0.00018340587106099532, "loss": 1.0496, "step": 4739 }, { "epoch": 0.45, "grad_norm": 0.2951096190186332, "learning_rate": 0.00018339714278895017, "loss": 1.0468, "step": 4740 }, { "epoch": 0.45, "grad_norm": 0.255641750420738, "learning_rate": 0.0001833884124298391, "loss": 0.9368, "step": 4741 }, { "epoch": 0.45, "grad_norm": 0.28182596587401804, "learning_rate": 0.00018337967998388062, "loss": 1.1615, "step": 4742 }, { "epoch": 0.45, "grad_norm": 0.2654967829673649, "learning_rate": 0.00018337094545129327, "loss": 1.1393, "step": 4743 }, { "epoch": 0.45, "grad_norm": 0.2809627165424522, "learning_rate": 0.00018336220883229557, "loss": 1.0288, "step": 4744 }, { "epoch": 0.45, "grad_norm": 0.3063917931895973, "learning_rate": 0.0001833534701271062, "loss": 1.1096, "step": 4745 }, { "epoch": 0.45, "grad_norm": 0.2978525520912866, "learning_rate": 0.00018334472933594388, "loss": 1.1158, "step": 4746 }, { "epoch": 0.45, "grad_norm": 0.2957907023562577, "learning_rate": 0.00018333598645902733, "loss": 1.0369, "step": 4747 }, { "epoch": 0.45, "grad_norm": 0.23971767785327205, "learning_rate": 0.00018332724149657534, "loss": 1.0324, "step": 4748 }, { "epoch": 0.45, "grad_norm": 0.25958963718826317, "learning_rate": 0.00018331849444880676, "loss": 1.0323, "step": 4749 }, { "epoch": 0.45, "grad_norm": 0.2702035936636161, "learning_rate": 0.00018330974531594046, "loss": 1.146, "step": 4750 }, { "epoch": 0.45, "grad_norm": 0.30569382745175333, "learning_rate": 0.00018330099409819548, "loss": 1.1357, "step": 4751 }, { "epoch": 0.45, "grad_norm": 0.2820062684104576, "learning_rate": 0.00018329224079579072, "loss": 1.1124, "step": 4752 }, { "epoch": 0.45, "grad_norm": 0.2829441998287804, "learning_rate": 0.0001832834854089453, "loss": 1.0883, "step": 4753 }, { "epoch": 0.45, "grad_norm": 0.30058225164600477, "learning_rate": 0.00018327472793787833, "loss": 1.1226, "step": 4754 }, { "epoch": 0.45, "grad_norm": 0.2806946975067004, "learning_rate": 0.00018326596838280897, "loss": 1.0858, "step": 4755 }, { "epoch": 0.46, "grad_norm": 0.26835184504189624, "learning_rate": 0.0001832572067439564, "loss": 1.0372, "step": 4756 }, { "epoch": 0.46, "grad_norm": 0.2284738744280186, "learning_rate": 0.00018324844302153992, "loss": 0.9814, "step": 4757 }, { "epoch": 0.46, "grad_norm": 0.2678772918055495, "learning_rate": 0.00018323967721577881, "loss": 1.1667, "step": 4758 }, { "epoch": 0.46, "grad_norm": 0.2842258914883526, "learning_rate": 0.00018323090932689248, "loss": 1.0497, "step": 4759 }, { "epoch": 0.46, "grad_norm": 0.2909027276625095, "learning_rate": 0.00018322213935510035, "loss": 1.083, "step": 4760 }, { "epoch": 0.46, "grad_norm": 0.2861834538414527, "learning_rate": 0.00018321336730062185, "loss": 1.2472, "step": 4761 }, { "epoch": 0.46, "grad_norm": 0.3061550280271623, "learning_rate": 0.00018320459316367656, "loss": 1.0457, "step": 4762 }, { "epoch": 0.46, "grad_norm": 0.29646221655536087, "learning_rate": 0.00018319581694448402, "loss": 1.2093, "step": 4763 }, { "epoch": 0.46, "grad_norm": 0.24948311138239332, "learning_rate": 0.00018318703864326387, "loss": 1.0602, "step": 4764 }, { "epoch": 0.46, "grad_norm": 0.2972189282962595, "learning_rate": 0.0001831782582602358, "loss": 1.0545, "step": 4765 }, { "epoch": 0.46, "grad_norm": 0.2766121044623654, "learning_rate": 0.00018316947579561955, "loss": 1.1347, "step": 4766 }, { "epoch": 0.46, "grad_norm": 0.24848006867021413, "learning_rate": 0.0001831606912496349, "loss": 0.9744, "step": 4767 }, { "epoch": 0.46, "grad_norm": 0.31237542451843275, "learning_rate": 0.00018315190462250166, "loss": 1.1843, "step": 4768 }, { "epoch": 0.46, "grad_norm": 0.29004056616059715, "learning_rate": 0.00018314311591443978, "loss": 1.1249, "step": 4769 }, { "epoch": 0.46, "grad_norm": 0.26116055277908656, "learning_rate": 0.00018313432512566914, "loss": 1.1072, "step": 4770 }, { "epoch": 0.46, "grad_norm": 0.28883119567906423, "learning_rate": 0.0001831255322564098, "loss": 1.0457, "step": 4771 }, { "epoch": 0.46, "grad_norm": 0.303739333682482, "learning_rate": 0.00018311673730688174, "loss": 1.0541, "step": 4772 }, { "epoch": 0.46, "grad_norm": 0.29152935043740263, "learning_rate": 0.00018310794027730513, "loss": 0.9989, "step": 4773 }, { "epoch": 0.46, "grad_norm": 0.2642591264939651, "learning_rate": 0.00018309914116790006, "loss": 1.0887, "step": 4774 }, { "epoch": 0.46, "grad_norm": 0.2737816497235381, "learning_rate": 0.00018309033997888677, "loss": 0.9973, "step": 4775 }, { "epoch": 0.46, "grad_norm": 0.27257528194967345, "learning_rate": 0.0001830815367104855, "loss": 1.0495, "step": 4776 }, { "epoch": 0.46, "grad_norm": 0.28425805359861567, "learning_rate": 0.00018307273136291654, "loss": 1.0373, "step": 4777 }, { "epoch": 0.46, "grad_norm": 0.29026318888377095, "learning_rate": 0.00018306392393640025, "loss": 1.0727, "step": 4778 }, { "epoch": 0.46, "grad_norm": 0.27166227285923494, "learning_rate": 0.0001830551144311571, "loss": 1.0168, "step": 4779 }, { "epoch": 0.46, "grad_norm": 0.2755929576909545, "learning_rate": 0.00018304630284740752, "loss": 1.1526, "step": 4780 }, { "epoch": 0.46, "grad_norm": 0.2899920317630215, "learning_rate": 0.00018303748918537197, "loss": 1.1746, "step": 4781 }, { "epoch": 0.46, "grad_norm": 0.2986410508568978, "learning_rate": 0.00018302867344527113, "loss": 0.9996, "step": 4782 }, { "epoch": 0.46, "grad_norm": 0.265948409051619, "learning_rate": 0.00018301985562732548, "loss": 1.0769, "step": 4783 }, { "epoch": 0.46, "grad_norm": 0.28694971143700526, "learning_rate": 0.0001830110357317558, "loss": 1.1081, "step": 4784 }, { "epoch": 0.46, "grad_norm": 0.27742537509438936, "learning_rate": 0.00018300221375878282, "loss": 1.0131, "step": 4785 }, { "epoch": 0.46, "grad_norm": 0.2625193602157892, "learning_rate": 0.00018299338970862724, "loss": 1.1648, "step": 4786 }, { "epoch": 0.46, "grad_norm": 0.2738231281645282, "learning_rate": 0.00018298456358150996, "loss": 1.0869, "step": 4787 }, { "epoch": 0.46, "grad_norm": 0.28533535787671566, "learning_rate": 0.00018297573537765175, "loss": 1.1657, "step": 4788 }, { "epoch": 0.46, "grad_norm": 0.24149533195194994, "learning_rate": 0.00018296690509727367, "loss": 1.1252, "step": 4789 }, { "epoch": 0.46, "grad_norm": 0.2513397282973885, "learning_rate": 0.00018295807274059663, "loss": 1.1539, "step": 4790 }, { "epoch": 0.46, "grad_norm": 0.26483605282680023, "learning_rate": 0.00018294923830784168, "loss": 1.1032, "step": 4791 }, { "epoch": 0.46, "grad_norm": 0.24791217448830763, "learning_rate": 0.0001829404017992299, "loss": 1.019, "step": 4792 }, { "epoch": 0.46, "grad_norm": 0.2666231822052287, "learning_rate": 0.00018293156321498247, "loss": 1.0347, "step": 4793 }, { "epoch": 0.46, "grad_norm": 0.29874993409578854, "learning_rate": 0.0001829227225553205, "loss": 1.178, "step": 4794 }, { "epoch": 0.46, "grad_norm": 0.2738977464194366, "learning_rate": 0.00018291387982046536, "loss": 1.1841, "step": 4795 }, { "epoch": 0.46, "grad_norm": 0.26913204839505855, "learning_rate": 0.00018290503501063819, "loss": 1.1249, "step": 4796 }, { "epoch": 0.46, "grad_norm": 0.2642140627592377, "learning_rate": 0.00018289618812606046, "loss": 1.1228, "step": 4797 }, { "epoch": 0.46, "grad_norm": 0.2923403068856712, "learning_rate": 0.00018288733916695351, "loss": 1.0207, "step": 4798 }, { "epoch": 0.46, "grad_norm": 0.2574187448061279, "learning_rate": 0.0001828784881335388, "loss": 1.022, "step": 4799 }, { "epoch": 0.46, "grad_norm": 0.2705462102994065, "learning_rate": 0.00018286963502603786, "loss": 1.1524, "step": 4800 }, { "epoch": 0.46, "grad_norm": 0.2570338839265344, "learning_rate": 0.0001828607798446722, "loss": 1.0328, "step": 4801 }, { "epoch": 0.46, "grad_norm": 0.3210236176915547, "learning_rate": 0.00018285192258966343, "loss": 1.1115, "step": 4802 }, { "epoch": 0.46, "grad_norm": 0.28608741562007683, "learning_rate": 0.00018284306326123327, "loss": 1.1011, "step": 4803 }, { "epoch": 0.46, "grad_norm": 0.2945292323022467, "learning_rate": 0.00018283420185960338, "loss": 1.0293, "step": 4804 }, { "epoch": 0.46, "grad_norm": 0.2921369853346895, "learning_rate": 0.00018282533838499552, "loss": 1.0956, "step": 4805 }, { "epoch": 0.46, "grad_norm": 0.2508835050525997, "learning_rate": 0.0001828164728376315, "loss": 0.9944, "step": 4806 }, { "epoch": 0.46, "grad_norm": 0.31162618222530697, "learning_rate": 0.00018280760521773322, "loss": 1.1071, "step": 4807 }, { "epoch": 0.46, "grad_norm": 0.26840278702905573, "learning_rate": 0.00018279873552552256, "loss": 1.0196, "step": 4808 }, { "epoch": 0.46, "grad_norm": 0.28949640843261154, "learning_rate": 0.0001827898637612215, "loss": 1.0671, "step": 4809 }, { "epoch": 0.46, "grad_norm": 0.26451664824875853, "learning_rate": 0.00018278098992505207, "loss": 1.037, "step": 4810 }, { "epoch": 0.46, "grad_norm": 0.3163920468320238, "learning_rate": 0.00018277211401723634, "loss": 1.0914, "step": 4811 }, { "epoch": 0.46, "grad_norm": 0.293156846011273, "learning_rate": 0.00018276323603799645, "loss": 1.0247, "step": 4812 }, { "epoch": 0.46, "grad_norm": 0.3167908615898505, "learning_rate": 0.00018275435598755457, "loss": 1.0896, "step": 4813 }, { "epoch": 0.46, "grad_norm": 0.29195900263136115, "learning_rate": 0.0001827454738661329, "loss": 1.106, "step": 4814 }, { "epoch": 0.46, "grad_norm": 0.24880439045988673, "learning_rate": 0.00018273658967395378, "loss": 1.0249, "step": 4815 }, { "epoch": 0.46, "grad_norm": 0.27945847139446783, "learning_rate": 0.00018272770341123948, "loss": 1.1502, "step": 4816 }, { "epoch": 0.46, "grad_norm": 0.2826003957306108, "learning_rate": 0.0001827188150782124, "loss": 1.0295, "step": 4817 }, { "epoch": 0.46, "grad_norm": 0.2733756686159319, "learning_rate": 0.000182709924675095, "loss": 1.1557, "step": 4818 }, { "epoch": 0.46, "grad_norm": 0.2719146164107582, "learning_rate": 0.00018270103220210975, "loss": 1.1701, "step": 4819 }, { "epoch": 0.46, "grad_norm": 0.27143925540828834, "learning_rate": 0.0001826921376594792, "loss": 1.094, "step": 4820 }, { "epoch": 0.46, "grad_norm": 0.3064338525945154, "learning_rate": 0.00018268324104742592, "loss": 1.1517, "step": 4821 }, { "epoch": 0.46, "grad_norm": 0.31652916261666003, "learning_rate": 0.00018267434236617257, "loss": 1.1124, "step": 4822 }, { "epoch": 0.46, "grad_norm": 0.28456338644193074, "learning_rate": 0.00018266544161594185, "loss": 1.1934, "step": 4823 }, { "epoch": 0.46, "grad_norm": 0.24199134880492237, "learning_rate": 0.0001826565387969565, "loss": 0.9981, "step": 4824 }, { "epoch": 0.46, "grad_norm": 0.26907228489033597, "learning_rate": 0.00018264763390943932, "loss": 0.9906, "step": 4825 }, { "epoch": 0.46, "grad_norm": 0.2885973158602934, "learning_rate": 0.00018263872695361316, "loss": 1.0922, "step": 4826 }, { "epoch": 0.46, "grad_norm": 0.2618403285446365, "learning_rate": 0.00018262981792970093, "loss": 1.0619, "step": 4827 }, { "epoch": 0.46, "grad_norm": 0.2508955108209627, "learning_rate": 0.00018262090683792556, "loss": 1.0296, "step": 4828 }, { "epoch": 0.46, "grad_norm": 0.2805927803935353, "learning_rate": 0.00018261199367851008, "loss": 1.0554, "step": 4829 }, { "epoch": 0.46, "grad_norm": 0.2703573079658423, "learning_rate": 0.00018260307845167754, "loss": 1.0561, "step": 4830 }, { "epoch": 0.46, "grad_norm": 0.2674244714124627, "learning_rate": 0.00018259416115765103, "loss": 1.1403, "step": 4831 }, { "epoch": 0.46, "grad_norm": 0.28142267132464976, "learning_rate": 0.00018258524179665377, "loss": 1.142, "step": 4832 }, { "epoch": 0.46, "grad_norm": 0.28299880159536434, "learning_rate": 0.00018257632036890891, "loss": 1.0733, "step": 4833 }, { "epoch": 0.46, "grad_norm": 0.26287599628896613, "learning_rate": 0.0001825673968746397, "loss": 1.036, "step": 4834 }, { "epoch": 0.46, "grad_norm": 0.27196439255395516, "learning_rate": 0.00018255847131406954, "loss": 1.0408, "step": 4835 }, { "epoch": 0.46, "grad_norm": 0.27846018633896047, "learning_rate": 0.00018254954368742172, "loss": 1.0871, "step": 4836 }, { "epoch": 0.46, "grad_norm": 0.29707264756040697, "learning_rate": 0.00018254061399491968, "loss": 1.0648, "step": 4837 }, { "epoch": 0.46, "grad_norm": 0.26505419324604984, "learning_rate": 0.00018253168223678694, "loss": 1.1028, "step": 4838 }, { "epoch": 0.46, "grad_norm": 0.255410178872581, "learning_rate": 0.00018252274841324697, "loss": 1.0632, "step": 4839 }, { "epoch": 0.46, "grad_norm": 0.31740617878546407, "learning_rate": 0.00018251381252452334, "loss": 1.0536, "step": 4840 }, { "epoch": 0.46, "grad_norm": 0.283151238470317, "learning_rate": 0.0001825048745708397, "loss": 1.0617, "step": 4841 }, { "epoch": 0.46, "grad_norm": 0.2754257741606846, "learning_rate": 0.0001824959345524197, "loss": 1.1151, "step": 4842 }, { "epoch": 0.46, "grad_norm": 0.27843031061325174, "learning_rate": 0.00018248699246948714, "loss": 1.0461, "step": 4843 }, { "epoch": 0.46, "grad_norm": 0.28725916243319277, "learning_rate": 0.00018247804832226573, "loss": 1.1349, "step": 4844 }, { "epoch": 0.46, "grad_norm": 0.2656780830493282, "learning_rate": 0.00018246910211097933, "loss": 1.0609, "step": 4845 }, { "epoch": 0.46, "grad_norm": 0.2735044242141262, "learning_rate": 0.0001824601538358518, "loss": 1.1276, "step": 4846 }, { "epoch": 0.46, "grad_norm": 0.2758621380929105, "learning_rate": 0.00018245120349710708, "loss": 0.9056, "step": 4847 }, { "epoch": 0.46, "grad_norm": 0.2399901693338529, "learning_rate": 0.00018244225109496922, "loss": 1.0778, "step": 4848 }, { "epoch": 0.46, "grad_norm": 0.24569048440162722, "learning_rate": 0.0001824332966296622, "loss": 1.1481, "step": 4849 }, { "epoch": 0.46, "grad_norm": 0.24697964457020974, "learning_rate": 0.00018242434010141013, "loss": 1.0456, "step": 4850 }, { "epoch": 0.46, "grad_norm": 0.26864292763639014, "learning_rate": 0.0001824153815104371, "loss": 1.0611, "step": 4851 }, { "epoch": 0.46, "grad_norm": 0.32764299735013946, "learning_rate": 0.0001824064208569674, "loss": 1.1193, "step": 4852 }, { "epoch": 0.46, "grad_norm": 0.2841433629068385, "learning_rate": 0.00018239745814122523, "loss": 1.1592, "step": 4853 }, { "epoch": 0.46, "grad_norm": 0.2803323057557425, "learning_rate": 0.00018238849336343487, "loss": 1.103, "step": 4854 }, { "epoch": 0.46, "grad_norm": 0.2403783130731807, "learning_rate": 0.00018237952652382067, "loss": 1.1279, "step": 4855 }, { "epoch": 0.46, "grad_norm": 0.2644107034178308, "learning_rate": 0.00018237055762260708, "loss": 1.0965, "step": 4856 }, { "epoch": 0.46, "grad_norm": 0.30697640163453677, "learning_rate": 0.0001823615866600185, "loss": 1.2236, "step": 4857 }, { "epoch": 0.46, "grad_norm": 0.2735457493491036, "learning_rate": 0.00018235261363627945, "loss": 1.0618, "step": 4858 }, { "epoch": 0.46, "grad_norm": 0.2406737920100741, "learning_rate": 0.00018234363855161448, "loss": 1.004, "step": 4859 }, { "epoch": 0.46, "grad_norm": 0.2484692248494346, "learning_rate": 0.00018233466140624822, "loss": 0.9887, "step": 4860 }, { "epoch": 0.47, "grad_norm": 0.2896830439213678, "learning_rate": 0.00018232568220040532, "loss": 1.1294, "step": 4861 }, { "epoch": 0.47, "grad_norm": 0.26324509988169137, "learning_rate": 0.00018231670093431042, "loss": 1.1409, "step": 4862 }, { "epoch": 0.47, "grad_norm": 0.2545994960965595, "learning_rate": 0.00018230771760818844, "loss": 1.0028, "step": 4863 }, { "epoch": 0.47, "grad_norm": 0.2708183217546789, "learning_rate": 0.000182298732222264, "loss": 1.0793, "step": 4864 }, { "epoch": 0.47, "grad_norm": 0.2917108244983868, "learning_rate": 0.00018228974477676216, "loss": 1.0369, "step": 4865 }, { "epoch": 0.47, "grad_norm": 0.261745568565937, "learning_rate": 0.0001822807552719077, "loss": 0.962, "step": 4866 }, { "epoch": 0.47, "grad_norm": 0.2743799469305386, "learning_rate": 0.0001822717637079256, "loss": 1.2131, "step": 4867 }, { "epoch": 0.47, "grad_norm": 0.2979549118848966, "learning_rate": 0.0001822627700850409, "loss": 1.0951, "step": 4868 }, { "epoch": 0.47, "grad_norm": 0.2589301792916495, "learning_rate": 0.00018225377440347874, "loss": 1.1224, "step": 4869 }, { "epoch": 0.47, "grad_norm": 0.27151217602723077, "learning_rate": 0.00018224477666346414, "loss": 1.2002, "step": 4870 }, { "epoch": 0.47, "grad_norm": 0.2678272434705896, "learning_rate": 0.00018223577686522232, "loss": 1.0903, "step": 4871 }, { "epoch": 0.47, "grad_norm": 0.28664523382221585, "learning_rate": 0.0001822267750089785, "loss": 1.1004, "step": 4872 }, { "epoch": 0.47, "grad_norm": 0.2947186333319964, "learning_rate": 0.00018221777109495797, "loss": 1.0248, "step": 4873 }, { "epoch": 0.47, "grad_norm": 0.2911818485318527, "learning_rate": 0.00018220876512338604, "loss": 1.1243, "step": 4874 }, { "epoch": 0.47, "grad_norm": 0.2389316378471969, "learning_rate": 0.0001821997570944881, "loss": 1.0098, "step": 4875 }, { "epoch": 0.47, "grad_norm": 0.29112691824378567, "learning_rate": 0.00018219074700848956, "loss": 1.1284, "step": 4876 }, { "epoch": 0.47, "grad_norm": 0.32234639877925986, "learning_rate": 0.00018218173486561593, "loss": 1.1974, "step": 4877 }, { "epoch": 0.47, "grad_norm": 0.28693422182466444, "learning_rate": 0.00018217272066609275, "loss": 1.07, "step": 4878 }, { "epoch": 0.47, "grad_norm": 0.24948119387159542, "learning_rate": 0.00018216370441014558, "loss": 1.0697, "step": 4879 }, { "epoch": 0.47, "grad_norm": 0.2766198420685631, "learning_rate": 0.00018215468609800007, "loss": 1.1055, "step": 4880 }, { "epoch": 0.47, "grad_norm": 0.2913825743782603, "learning_rate": 0.0001821456657298819, "loss": 1.0749, "step": 4881 }, { "epoch": 0.47, "grad_norm": 0.2688271558729764, "learning_rate": 0.00018213664330601683, "loss": 0.9326, "step": 4882 }, { "epoch": 0.47, "grad_norm": 0.30551493928355505, "learning_rate": 0.00018212761882663062, "loss": 1.1667, "step": 4883 }, { "epoch": 0.47, "grad_norm": 0.3272843823903593, "learning_rate": 0.00018211859229194918, "loss": 1.0908, "step": 4884 }, { "epoch": 0.47, "grad_norm": 0.2683326547425987, "learning_rate": 0.00018210956370219832, "loss": 1.1501, "step": 4885 }, { "epoch": 0.47, "grad_norm": 0.2508457951874583, "learning_rate": 0.00018210053305760403, "loss": 1.0294, "step": 4886 }, { "epoch": 0.47, "grad_norm": 0.3066517541478313, "learning_rate": 0.0001820915003583923, "loss": 1.1289, "step": 4887 }, { "epoch": 0.47, "grad_norm": 0.2979558635215368, "learning_rate": 0.0001820824656047892, "loss": 1.0369, "step": 4888 }, { "epoch": 0.47, "grad_norm": 0.24297953137434605, "learning_rate": 0.0001820734287970208, "loss": 1.0952, "step": 4889 }, { "epoch": 0.47, "grad_norm": 0.2642818716032307, "learning_rate": 0.00018206438993531324, "loss": 1.1495, "step": 4890 }, { "epoch": 0.47, "grad_norm": 0.28250099631091213, "learning_rate": 0.0001820553490198928, "loss": 1.2023, "step": 4891 }, { "epoch": 0.47, "grad_norm": 0.24244241524853985, "learning_rate": 0.00018204630605098563, "loss": 1.1123, "step": 4892 }, { "epoch": 0.47, "grad_norm": 0.28117721265077, "learning_rate": 0.00018203726102881807, "loss": 1.1124, "step": 4893 }, { "epoch": 0.47, "grad_norm": 0.24950954002584746, "learning_rate": 0.00018202821395361656, "loss": 1.0812, "step": 4894 }, { "epoch": 0.47, "grad_norm": 0.29484743456459533, "learning_rate": 0.0001820191648256074, "loss": 1.019, "step": 4895 }, { "epoch": 0.47, "grad_norm": 0.26478910448375825, "learning_rate": 0.00018201011364501712, "loss": 1.061, "step": 4896 }, { "epoch": 0.47, "grad_norm": 0.2802200200065187, "learning_rate": 0.00018200106041207218, "loss": 1.2153, "step": 4897 }, { "epoch": 0.47, "grad_norm": 0.2931006305889275, "learning_rate": 0.00018199200512699918, "loss": 1.1586, "step": 4898 }, { "epoch": 0.47, "grad_norm": 0.28693848326326754, "learning_rate": 0.00018198294779002473, "loss": 0.8363, "step": 4899 }, { "epoch": 0.47, "grad_norm": 0.2488053532878251, "learning_rate": 0.00018197388840137548, "loss": 1.0084, "step": 4900 }, { "epoch": 0.47, "grad_norm": 0.30279834945613493, "learning_rate": 0.00018196482696127814, "loss": 1.0889, "step": 4901 }, { "epoch": 0.47, "grad_norm": 0.27315676586595977, "learning_rate": 0.0001819557634699595, "loss": 1.194, "step": 4902 }, { "epoch": 0.47, "grad_norm": 0.27870113697280036, "learning_rate": 0.0001819466979276464, "loss": 1.0237, "step": 4903 }, { "epoch": 0.47, "grad_norm": 0.26068511407455874, "learning_rate": 0.00018193763033456565, "loss": 1.141, "step": 4904 }, { "epoch": 0.47, "grad_norm": 0.25403699988609485, "learning_rate": 0.0001819285606909442, "loss": 0.985, "step": 4905 }, { "epoch": 0.47, "grad_norm": 0.25520183475888464, "learning_rate": 0.00018191948899700904, "loss": 0.9452, "step": 4906 }, { "epoch": 0.47, "grad_norm": 0.2890128875837949, "learning_rate": 0.00018191041525298719, "loss": 0.9948, "step": 4907 }, { "epoch": 0.47, "grad_norm": 0.2826684783407896, "learning_rate": 0.00018190133945910573, "loss": 1.1965, "step": 4908 }, { "epoch": 0.47, "grad_norm": 0.27927424785621435, "learning_rate": 0.00018189226161559175, "loss": 1.0749, "step": 4909 }, { "epoch": 0.47, "grad_norm": 0.28389430568060914, "learning_rate": 0.00018188318172267245, "loss": 1.1575, "step": 4910 }, { "epoch": 0.47, "grad_norm": 0.21938938517365983, "learning_rate": 0.0001818740997805751, "loss": 1.0206, "step": 4911 }, { "epoch": 0.47, "grad_norm": 0.29366370308740425, "learning_rate": 0.00018186501578952693, "loss": 1.0559, "step": 4912 }, { "epoch": 0.47, "grad_norm": 0.29917686177952224, "learning_rate": 0.0001818559297497553, "loss": 1.164, "step": 4913 }, { "epoch": 0.47, "grad_norm": 0.24934981554280172, "learning_rate": 0.00018184684166148754, "loss": 0.978, "step": 4914 }, { "epoch": 0.47, "grad_norm": 0.25613501770670105, "learning_rate": 0.00018183775152495117, "loss": 1.1179, "step": 4915 }, { "epoch": 0.47, "grad_norm": 0.28150065228278026, "learning_rate": 0.00018182865934037362, "loss": 1.0, "step": 4916 }, { "epoch": 0.47, "grad_norm": 0.2736305655565639, "learning_rate": 0.00018181956510798246, "loss": 0.9436, "step": 4917 }, { "epoch": 0.47, "grad_norm": 0.23963815077384734, "learning_rate": 0.00018181046882800525, "loss": 1.1872, "step": 4918 }, { "epoch": 0.47, "grad_norm": 0.26272710687943224, "learning_rate": 0.00018180137050066963, "loss": 1.1077, "step": 4919 }, { "epoch": 0.47, "grad_norm": 0.2491631914332162, "learning_rate": 0.00018179227012620332, "loss": 1.0311, "step": 4920 }, { "epoch": 0.47, "grad_norm": 0.2850474055751791, "learning_rate": 0.00018178316770483405, "loss": 1.072, "step": 4921 }, { "epoch": 0.47, "grad_norm": 0.2563447984596202, "learning_rate": 0.0001817740632367896, "loss": 1.0942, "step": 4922 }, { "epoch": 0.47, "grad_norm": 0.28963605095483796, "learning_rate": 0.00018176495672229782, "loss": 1.1151, "step": 4923 }, { "epoch": 0.47, "grad_norm": 0.29799210873069965, "learning_rate": 0.0001817558481615866, "loss": 1.0651, "step": 4924 }, { "epoch": 0.47, "grad_norm": 0.2900355369937844, "learning_rate": 0.0001817467375548839, "loss": 1.0395, "step": 4925 }, { "epoch": 0.47, "grad_norm": 0.26790765666005045, "learning_rate": 0.00018173762490241777, "loss": 1.0195, "step": 4926 }, { "epoch": 0.47, "grad_norm": 0.27029423974434824, "learning_rate": 0.00018172851020441616, "loss": 1.0376, "step": 4927 }, { "epoch": 0.47, "grad_norm": 0.29781631086031485, "learning_rate": 0.00018171939346110723, "loss": 1.1626, "step": 4928 }, { "epoch": 0.47, "grad_norm": 0.24153000189250098, "learning_rate": 0.0001817102746727191, "loss": 1.1416, "step": 4929 }, { "epoch": 0.47, "grad_norm": 0.2832270079995993, "learning_rate": 0.00018170115383948001, "loss": 1.0366, "step": 4930 }, { "epoch": 0.47, "grad_norm": 0.28980962720099673, "learning_rate": 0.0001816920309616182, "loss": 1.1199, "step": 4931 }, { "epoch": 0.47, "grad_norm": 0.2885127084279082, "learning_rate": 0.00018168290603936198, "loss": 0.9624, "step": 4932 }, { "epoch": 0.47, "grad_norm": 0.264979114595309, "learning_rate": 0.00018167377907293966, "loss": 1.1314, "step": 4933 }, { "epoch": 0.47, "grad_norm": 0.29190107567170176, "learning_rate": 0.00018166465006257972, "loss": 1.1603, "step": 4934 }, { "epoch": 0.47, "grad_norm": 0.29218193365264744, "learning_rate": 0.0001816555190085106, "loss": 1.1024, "step": 4935 }, { "epoch": 0.47, "grad_norm": 0.290286097531124, "learning_rate": 0.00018164638591096078, "loss": 1.0179, "step": 4936 }, { "epoch": 0.47, "grad_norm": 0.3045929894370889, "learning_rate": 0.00018163725077015883, "loss": 1.1458, "step": 4937 }, { "epoch": 0.47, "grad_norm": 0.3096182938272493, "learning_rate": 0.0001816281135863334, "loss": 1.0445, "step": 4938 }, { "epoch": 0.47, "grad_norm": 0.2989704049787465, "learning_rate": 0.00018161897435971312, "loss": 1.0597, "step": 4939 }, { "epoch": 0.47, "grad_norm": 0.33521100047691355, "learning_rate": 0.00018160983309052671, "loss": 1.0351, "step": 4940 }, { "epoch": 0.47, "grad_norm": 0.27937510049751796, "learning_rate": 0.00018160068977900293, "loss": 1.0309, "step": 4941 }, { "epoch": 0.47, "grad_norm": 0.30607280379755825, "learning_rate": 0.00018159154442537058, "loss": 1.0614, "step": 4942 }, { "epoch": 0.47, "grad_norm": 0.2942443003492716, "learning_rate": 0.0001815823970298586, "loss": 1.0296, "step": 4943 }, { "epoch": 0.47, "grad_norm": 0.27168124667388854, "learning_rate": 0.00018157324759269583, "loss": 1.2025, "step": 4944 }, { "epoch": 0.47, "grad_norm": 0.3040911262976451, "learning_rate": 0.00018156409611411127, "loss": 1.1002, "step": 4945 }, { "epoch": 0.47, "grad_norm": 0.2932905809708139, "learning_rate": 0.00018155494259433397, "loss": 1.1174, "step": 4946 }, { "epoch": 0.47, "grad_norm": 0.27057969270876014, "learning_rate": 0.00018154578703359294, "loss": 1.1873, "step": 4947 }, { "epoch": 0.47, "grad_norm": 0.23291125191630732, "learning_rate": 0.00018153662943211737, "loss": 1.0286, "step": 4948 }, { "epoch": 0.47, "grad_norm": 0.2935414983637233, "learning_rate": 0.00018152746979013638, "loss": 1.0808, "step": 4949 }, { "epoch": 0.47, "grad_norm": 0.28416378913000195, "learning_rate": 0.00018151830810787925, "loss": 1.1802, "step": 4950 }, { "epoch": 0.47, "grad_norm": 0.2804898105323499, "learning_rate": 0.00018150914438557522, "loss": 1.0825, "step": 4951 }, { "epoch": 0.47, "grad_norm": 0.2908103000886573, "learning_rate": 0.0001814999786234536, "loss": 1.1133, "step": 4952 }, { "epoch": 0.47, "grad_norm": 0.25695695427126014, "learning_rate": 0.0001814908108217438, "loss": 1.1093, "step": 4953 }, { "epoch": 0.47, "grad_norm": 0.27219006337780743, "learning_rate": 0.0001814816409806753, "loss": 1.1098, "step": 4954 }, { "epoch": 0.47, "grad_norm": 0.3013175495467479, "learning_rate": 0.00018147246910047747, "loss": 1.0026, "step": 4955 }, { "epoch": 0.47, "grad_norm": 0.24847902288191978, "learning_rate": 0.0001814632951813799, "loss": 0.999, "step": 4956 }, { "epoch": 0.47, "grad_norm": 0.2691757208602004, "learning_rate": 0.00018145411922361219, "loss": 1.0259, "step": 4957 }, { "epoch": 0.47, "grad_norm": 0.2601243300778845, "learning_rate": 0.00018144494122740394, "loss": 0.9758, "step": 4958 }, { "epoch": 0.47, "grad_norm": 0.27967276767781657, "learning_rate": 0.00018143576119298484, "loss": 1.0271, "step": 4959 }, { "epoch": 0.47, "grad_norm": 0.3093700281906271, "learning_rate": 0.00018142657912058465, "loss": 0.9627, "step": 4960 }, { "epoch": 0.47, "grad_norm": 0.2951333334350976, "learning_rate": 0.00018141739501043315, "loss": 1.0788, "step": 4961 }, { "epoch": 0.47, "grad_norm": 0.2754995868240653, "learning_rate": 0.00018140820886276018, "loss": 0.987, "step": 4962 }, { "epoch": 0.47, "grad_norm": 0.2650597795296595, "learning_rate": 0.0001813990206777956, "loss": 1.108, "step": 4963 }, { "epoch": 0.47, "grad_norm": 0.2680629620506122, "learning_rate": 0.00018138983045576937, "loss": 1.0824, "step": 4964 }, { "epoch": 0.48, "grad_norm": 0.2745197902744774, "learning_rate": 0.00018138063819691147, "loss": 1.087, "step": 4965 }, { "epoch": 0.48, "grad_norm": 0.2951908263527352, "learning_rate": 0.00018137144390145194, "loss": 1.0986, "step": 4966 }, { "epoch": 0.48, "grad_norm": 0.3006255204787449, "learning_rate": 0.00018136224756962093, "loss": 1.0538, "step": 4967 }, { "epoch": 0.48, "grad_norm": 0.28348543906022167, "learning_rate": 0.00018135304920164854, "loss": 1.0905, "step": 4968 }, { "epoch": 0.48, "grad_norm": 0.2572498672154097, "learning_rate": 0.00018134384879776497, "loss": 1.1133, "step": 4969 }, { "epoch": 0.48, "grad_norm": 0.30267258785481194, "learning_rate": 0.00018133464635820042, "loss": 1.1097, "step": 4970 }, { "epoch": 0.48, "grad_norm": 0.2645184466065096, "learning_rate": 0.00018132544188318526, "loss": 1.111, "step": 4971 }, { "epoch": 0.48, "grad_norm": 0.2981463331584167, "learning_rate": 0.0001813162353729498, "loss": 1.0594, "step": 4972 }, { "epoch": 0.48, "grad_norm": 0.3028092831683072, "learning_rate": 0.0001813070268277244, "loss": 1.227, "step": 4973 }, { "epoch": 0.48, "grad_norm": 0.276916640360599, "learning_rate": 0.00018129781624773961, "loss": 1.1563, "step": 4974 }, { "epoch": 0.48, "grad_norm": 0.26497515296266566, "learning_rate": 0.00018128860363322586, "loss": 1.0489, "step": 4975 }, { "epoch": 0.48, "grad_norm": 0.2870089379191648, "learning_rate": 0.00018127938898441373, "loss": 1.1085, "step": 4976 }, { "epoch": 0.48, "grad_norm": 0.2804072923871364, "learning_rate": 0.00018127017230153378, "loss": 1.1697, "step": 4977 }, { "epoch": 0.48, "grad_norm": 0.26600268228720364, "learning_rate": 0.0001812609535848167, "loss": 1.0651, "step": 4978 }, { "epoch": 0.48, "grad_norm": 0.3017636575581846, "learning_rate": 0.0001812517328344932, "loss": 1.0479, "step": 4979 }, { "epoch": 0.48, "grad_norm": 0.2848522851547357, "learning_rate": 0.000181242510050794, "loss": 1.0961, "step": 4980 }, { "epoch": 0.48, "grad_norm": 0.29706092141646745, "learning_rate": 0.00018123328523394992, "loss": 1.0572, "step": 4981 }, { "epoch": 0.48, "grad_norm": 0.24623214065631238, "learning_rate": 0.00018122405838419186, "loss": 1.0209, "step": 4982 }, { "epoch": 0.48, "grad_norm": 0.27148089077495985, "learning_rate": 0.00018121482950175067, "loss": 1.0425, "step": 4983 }, { "epoch": 0.48, "grad_norm": 0.27374692095758846, "learning_rate": 0.00018120559858685734, "loss": 1.0984, "step": 4984 }, { "epoch": 0.48, "grad_norm": 0.28314322190570707, "learning_rate": 0.00018119636563974285, "loss": 1.0245, "step": 4985 }, { "epoch": 0.48, "grad_norm": 0.27988607306147967, "learning_rate": 0.0001811871306606383, "loss": 1.0786, "step": 4986 }, { "epoch": 0.48, "grad_norm": 0.28485376383594274, "learning_rate": 0.0001811778936497748, "loss": 1.0639, "step": 4987 }, { "epoch": 0.48, "grad_norm": 0.23607692697159477, "learning_rate": 0.00018116865460738343, "loss": 1.0506, "step": 4988 }, { "epoch": 0.48, "grad_norm": 0.29135188062241957, "learning_rate": 0.0001811594135336955, "loss": 1.0689, "step": 4989 }, { "epoch": 0.48, "grad_norm": 0.26704486335325794, "learning_rate": 0.00018115017042894227, "loss": 1.1443, "step": 4990 }, { "epoch": 0.48, "grad_norm": 0.26331319852301815, "learning_rate": 0.00018114092529335497, "loss": 1.0934, "step": 4991 }, { "epoch": 0.48, "grad_norm": 0.26377752564879786, "learning_rate": 0.00018113167812716506, "loss": 0.9711, "step": 4992 }, { "epoch": 0.48, "grad_norm": 0.26534776082769385, "learning_rate": 0.0001811224289306039, "loss": 1.0002, "step": 4993 }, { "epoch": 0.48, "grad_norm": 0.2711195707498488, "learning_rate": 0.00018111317770390297, "loss": 1.2213, "step": 4994 }, { "epoch": 0.48, "grad_norm": 0.24845192655050088, "learning_rate": 0.0001811039244472938, "loss": 1.1603, "step": 4995 }, { "epoch": 0.48, "grad_norm": 0.2774236359347863, "learning_rate": 0.00018109466916100793, "loss": 1.0153, "step": 4996 }, { "epoch": 0.48, "grad_norm": 0.291748829551597, "learning_rate": 0.000181085411845277, "loss": 1.139, "step": 4997 }, { "epoch": 0.48, "grad_norm": 0.2707487926064819, "learning_rate": 0.0001810761525003327, "loss": 0.9485, "step": 4998 }, { "epoch": 0.48, "grad_norm": 0.31763719053401285, "learning_rate": 0.0001810668911264067, "loss": 1.0691, "step": 4999 }, { "epoch": 0.48, "grad_norm": 0.271398772008147, "learning_rate": 0.00018105762772373086, "loss": 1.0501, "step": 5000 }, { "epoch": 0.48, "grad_norm": 0.2723475184642883, "learning_rate": 0.00018104836229253688, "loss": 1.0046, "step": 5001 }, { "epoch": 0.48, "grad_norm": 0.23738205025152834, "learning_rate": 0.00018103909483305672, "loss": 1.1247, "step": 5002 }, { "epoch": 0.48, "grad_norm": 0.28212585900181886, "learning_rate": 0.00018102982534552226, "loss": 0.966, "step": 5003 }, { "epoch": 0.48, "grad_norm": 0.2923303986208393, "learning_rate": 0.00018102055383016554, "loss": 1.177, "step": 5004 }, { "epoch": 0.48, "grad_norm": 0.261063011725201, "learning_rate": 0.0001810112802872185, "loss": 0.9956, "step": 5005 }, { "epoch": 0.48, "grad_norm": 0.28452157947524576, "learning_rate": 0.0001810020047169133, "loss": 1.1531, "step": 5006 }, { "epoch": 0.48, "grad_norm": 0.30435514362331084, "learning_rate": 0.00018099272711948197, "loss": 1.101, "step": 5007 }, { "epoch": 0.48, "grad_norm": 0.3171881336122615, "learning_rate": 0.0001809834474951568, "loss": 1.1477, "step": 5008 }, { "epoch": 0.48, "grad_norm": 0.3021432074002858, "learning_rate": 0.00018097416584416992, "loss": 1.1089, "step": 5009 }, { "epoch": 0.48, "grad_norm": 0.2584355712285805, "learning_rate": 0.00018096488216675364, "loss": 1.0171, "step": 5010 }, { "epoch": 0.48, "grad_norm": 0.2784662384579421, "learning_rate": 0.00018095559646314033, "loss": 1.1563, "step": 5011 }, { "epoch": 0.48, "grad_norm": 0.2590883277277557, "learning_rate": 0.00018094630873356234, "loss": 0.9934, "step": 5012 }, { "epoch": 0.48, "grad_norm": 0.31013602501461507, "learning_rate": 0.0001809370189782521, "loss": 1.0347, "step": 5013 }, { "epoch": 0.48, "grad_norm": 0.2908179871344749, "learning_rate": 0.00018092772719744207, "loss": 1.1276, "step": 5014 }, { "epoch": 0.48, "grad_norm": 0.27894866086973424, "learning_rate": 0.0001809184333913648, "loss": 1.1603, "step": 5015 }, { "epoch": 0.48, "grad_norm": 0.30799725337128564, "learning_rate": 0.0001809091375602529, "loss": 0.9902, "step": 5016 }, { "epoch": 0.48, "grad_norm": 0.26185924608898803, "learning_rate": 0.00018089983970433896, "loss": 1.0665, "step": 5017 }, { "epoch": 0.48, "grad_norm": 0.23371144120926612, "learning_rate": 0.0001808905398238557, "loss": 1.0455, "step": 5018 }, { "epoch": 0.48, "grad_norm": 0.27647347653440757, "learning_rate": 0.00018088123791903588, "loss": 1.1008, "step": 5019 }, { "epoch": 0.48, "grad_norm": 0.2619795224095397, "learning_rate": 0.0001808719339901122, "loss": 1.111, "step": 5020 }, { "epoch": 0.48, "grad_norm": 0.28812383140009523, "learning_rate": 0.00018086262803731758, "loss": 1.1625, "step": 5021 }, { "epoch": 0.48, "grad_norm": 0.29790943849759494, "learning_rate": 0.00018085332006088486, "loss": 1.0231, "step": 5022 }, { "epoch": 0.48, "grad_norm": 0.2584586663721738, "learning_rate": 0.00018084401006104699, "loss": 0.9985, "step": 5023 }, { "epoch": 0.48, "grad_norm": 0.3204107870274487, "learning_rate": 0.00018083469803803696, "loss": 1.0371, "step": 5024 }, { "epoch": 0.48, "grad_norm": 0.32851014719630056, "learning_rate": 0.0001808253839920878, "loss": 1.0865, "step": 5025 }, { "epoch": 0.48, "grad_norm": 0.28813789079186414, "learning_rate": 0.00018081606792343262, "loss": 1.0732, "step": 5026 }, { "epoch": 0.48, "grad_norm": 0.2778577229940317, "learning_rate": 0.00018080674983230455, "loss": 0.9632, "step": 5027 }, { "epoch": 0.48, "grad_norm": 0.25371536354442387, "learning_rate": 0.00018079742971893677, "loss": 1.0637, "step": 5028 }, { "epoch": 0.48, "grad_norm": 0.2608878940566144, "learning_rate": 0.00018078810758356256, "loss": 1.1195, "step": 5029 }, { "epoch": 0.48, "grad_norm": 0.2880007088671419, "learning_rate": 0.00018077878342641514, "loss": 1.0541, "step": 5030 }, { "epoch": 0.48, "grad_norm": 0.27719099372774664, "learning_rate": 0.0001807694572477279, "loss": 1.14, "step": 5031 }, { "epoch": 0.48, "grad_norm": 0.26236080400560896, "learning_rate": 0.00018076012904773427, "loss": 1.0992, "step": 5032 }, { "epoch": 0.48, "grad_norm": 0.28662749205806315, "learning_rate": 0.00018075079882666763, "loss": 1.1681, "step": 5033 }, { "epoch": 0.48, "grad_norm": 0.24357430650710507, "learning_rate": 0.0001807414665847615, "loss": 1.0774, "step": 5034 }, { "epoch": 0.48, "grad_norm": 0.22511154512758563, "learning_rate": 0.00018073213232224945, "loss": 0.9704, "step": 5035 }, { "epoch": 0.48, "grad_norm": 0.25551533972771157, "learning_rate": 0.000180722796039365, "loss": 1.1366, "step": 5036 }, { "epoch": 0.48, "grad_norm": 0.27262724569941255, "learning_rate": 0.0001807134577363419, "loss": 0.9719, "step": 5037 }, { "epoch": 0.48, "grad_norm": 0.2640695671336433, "learning_rate": 0.00018070411741341377, "loss": 1.0389, "step": 5038 }, { "epoch": 0.48, "grad_norm": 0.25865882131840123, "learning_rate": 0.00018069477507081438, "loss": 1.0127, "step": 5039 }, { "epoch": 0.48, "grad_norm": 0.3041466915560575, "learning_rate": 0.00018068543070877752, "loss": 1.1345, "step": 5040 }, { "epoch": 0.48, "grad_norm": 0.280532311208517, "learning_rate": 0.00018067608432753706, "loss": 0.98, "step": 5041 }, { "epoch": 0.48, "grad_norm": 0.3067608848042604, "learning_rate": 0.0001806667359273269, "loss": 1.1467, "step": 5042 }, { "epoch": 0.48, "grad_norm": 0.28794591958027166, "learning_rate": 0.00018065738550838094, "loss": 1.1362, "step": 5043 }, { "epoch": 0.48, "grad_norm": 0.25975883602188704, "learning_rate": 0.00018064803307093325, "loss": 1.0846, "step": 5044 }, { "epoch": 0.48, "grad_norm": 0.28326599642955586, "learning_rate": 0.00018063867861521784, "loss": 1.0675, "step": 5045 }, { "epoch": 0.48, "grad_norm": 0.2558529563820642, "learning_rate": 0.00018062932214146882, "loss": 1.1281, "step": 5046 }, { "epoch": 0.48, "grad_norm": 0.28221647075585127, "learning_rate": 0.0001806199636499203, "loss": 1.0499, "step": 5047 }, { "epoch": 0.48, "grad_norm": 0.25047524197955967, "learning_rate": 0.00018061060314080658, "loss": 1.1112, "step": 5048 }, { "epoch": 0.48, "grad_norm": 0.25460102742828555, "learning_rate": 0.00018060124061436184, "loss": 1.139, "step": 5049 }, { "epoch": 0.48, "grad_norm": 0.2684967059066573, "learning_rate": 0.00018059187607082037, "loss": 1.0121, "step": 5050 }, { "epoch": 0.48, "grad_norm": 0.2740879178050141, "learning_rate": 0.00018058250951041656, "loss": 1.2213, "step": 5051 }, { "epoch": 0.48, "grad_norm": 0.23814111073074376, "learning_rate": 0.0001805731409333848, "loss": 0.946, "step": 5052 }, { "epoch": 0.48, "grad_norm": 0.2846550890405717, "learning_rate": 0.00018056377033995959, "loss": 1.0843, "step": 5053 }, { "epoch": 0.48, "grad_norm": 0.26517227533600335, "learning_rate": 0.00018055439773037536, "loss": 1.0066, "step": 5054 }, { "epoch": 0.48, "grad_norm": 0.27053031387998794, "learning_rate": 0.0001805450231048667, "loss": 0.9459, "step": 5055 }, { "epoch": 0.48, "grad_norm": 0.3019675939625031, "learning_rate": 0.00018053564646366822, "loss": 1.2236, "step": 5056 }, { "epoch": 0.48, "grad_norm": 0.25798882391808686, "learning_rate": 0.00018052626780701457, "loss": 1.1254, "step": 5057 }, { "epoch": 0.48, "grad_norm": 0.27311813604505014, "learning_rate": 0.00018051688713514047, "loss": 1.0087, "step": 5058 }, { "epoch": 0.48, "grad_norm": 0.24271338025668723, "learning_rate": 0.00018050750444828067, "loss": 1.0985, "step": 5059 }, { "epoch": 0.48, "grad_norm": 0.28513204364807454, "learning_rate": 0.00018049811974666996, "loss": 0.968, "step": 5060 }, { "epoch": 0.48, "grad_norm": 0.2684639300431927, "learning_rate": 0.00018048873303054324, "loss": 1.0187, "step": 5061 }, { "epoch": 0.48, "grad_norm": 0.28668337168219155, "learning_rate": 0.00018047934430013535, "loss": 1.1447, "step": 5062 }, { "epoch": 0.48, "grad_norm": 0.2526134197715635, "learning_rate": 0.0001804699535556813, "loss": 1.1222, "step": 5063 }, { "epoch": 0.48, "grad_norm": 0.2734155691633195, "learning_rate": 0.0001804605607974161, "loss": 1.0668, "step": 5064 }, { "epoch": 0.48, "grad_norm": 0.29009495050980677, "learning_rate": 0.0001804511660255748, "loss": 1.143, "step": 5065 }, { "epoch": 0.48, "grad_norm": 0.24381730481982744, "learning_rate": 0.0001804417692403925, "loss": 1.2, "step": 5066 }, { "epoch": 0.48, "grad_norm": 0.28667698702842986, "learning_rate": 0.00018043237044210438, "loss": 0.9782, "step": 5067 }, { "epoch": 0.48, "grad_norm": 0.2290214469722657, "learning_rate": 0.00018042296963094562, "loss": 1.0481, "step": 5068 }, { "epoch": 0.48, "grad_norm": 0.25712013816908574, "learning_rate": 0.00018041356680715152, "loss": 1.0808, "step": 5069 }, { "epoch": 0.49, "grad_norm": 0.27191198243103304, "learning_rate": 0.00018040416197095737, "loss": 1.0839, "step": 5070 }, { "epoch": 0.49, "grad_norm": 0.2896020255383838, "learning_rate": 0.00018039475512259855, "loss": 1.0719, "step": 5071 }, { "epoch": 0.49, "grad_norm": 0.2722755649971534, "learning_rate": 0.0001803853462623104, "loss": 1.111, "step": 5072 }, { "epoch": 0.49, "grad_norm": 0.296096353606665, "learning_rate": 0.0001803759353903285, "loss": 1.1522, "step": 5073 }, { "epoch": 0.49, "grad_norm": 0.2516486412186821, "learning_rate": 0.0001803665225068883, "loss": 1.0666, "step": 5074 }, { "epoch": 0.49, "grad_norm": 0.28162660247716065, "learning_rate": 0.00018035710761222533, "loss": 1.0581, "step": 5075 }, { "epoch": 0.49, "grad_norm": 0.2573369100267432, "learning_rate": 0.00018034769070657524, "loss": 1.0313, "step": 5076 }, { "epoch": 0.49, "grad_norm": 0.25964129622911775, "learning_rate": 0.00018033827179017372, "loss": 1.0417, "step": 5077 }, { "epoch": 0.49, "grad_norm": 0.2584869469852085, "learning_rate": 0.00018032885086325645, "loss": 1.0705, "step": 5078 }, { "epoch": 0.49, "grad_norm": 0.2803573003244443, "learning_rate": 0.0001803194279260592, "loss": 1.1134, "step": 5079 }, { "epoch": 0.49, "grad_norm": 0.25948244844278057, "learning_rate": 0.00018031000297881778, "loss": 1.1251, "step": 5080 }, { "epoch": 0.49, "grad_norm": 0.27477615836677105, "learning_rate": 0.00018030057602176806, "loss": 1.0987, "step": 5081 }, { "epoch": 0.49, "grad_norm": 0.2949596444499225, "learning_rate": 0.00018029114705514596, "loss": 1.1267, "step": 5082 }, { "epoch": 0.49, "grad_norm": 0.3126152488004548, "learning_rate": 0.00018028171607918747, "loss": 1.1043, "step": 5083 }, { "epoch": 0.49, "grad_norm": 0.3158944201979104, "learning_rate": 0.00018027228309412853, "loss": 1.1333, "step": 5084 }, { "epoch": 0.49, "grad_norm": 0.23721139976987007, "learning_rate": 0.00018026284810020532, "loss": 1.0958, "step": 5085 }, { "epoch": 0.49, "grad_norm": 0.2620591461537738, "learning_rate": 0.00018025341109765384, "loss": 1.0396, "step": 5086 }, { "epoch": 0.49, "grad_norm": 0.29187944723111464, "learning_rate": 0.00018024397208671035, "loss": 1.1689, "step": 5087 }, { "epoch": 0.49, "grad_norm": 0.2740567842867544, "learning_rate": 0.000180234531067611, "loss": 1.0742, "step": 5088 }, { "epoch": 0.49, "grad_norm": 0.28997111975839956, "learning_rate": 0.00018022508804059207, "loss": 1.1359, "step": 5089 }, { "epoch": 0.49, "grad_norm": 0.264169160568814, "learning_rate": 0.00018021564300588994, "loss": 0.9856, "step": 5090 }, { "epoch": 0.49, "grad_norm": 0.25799277848982, "learning_rate": 0.0001802061959637409, "loss": 1.0802, "step": 5091 }, { "epoch": 0.49, "grad_norm": 0.27789264617161874, "learning_rate": 0.0001801967469143814, "loss": 1.1495, "step": 5092 }, { "epoch": 0.49, "grad_norm": 0.284905774011098, "learning_rate": 0.0001801872958580479, "loss": 1.1043, "step": 5093 }, { "epoch": 0.49, "grad_norm": 0.23840852278323477, "learning_rate": 0.00018017784279497693, "loss": 1.0545, "step": 5094 }, { "epoch": 0.49, "grad_norm": 0.29365200369529876, "learning_rate": 0.00018016838772540506, "loss": 1.0454, "step": 5095 }, { "epoch": 0.49, "grad_norm": 0.2905391396194257, "learning_rate": 0.0001801589306495689, "loss": 1.0989, "step": 5096 }, { "epoch": 0.49, "grad_norm": 0.30717926924064015, "learning_rate": 0.00018014947156770513, "loss": 1.0989, "step": 5097 }, { "epoch": 0.49, "grad_norm": 0.24633770830565768, "learning_rate": 0.00018014001048005044, "loss": 0.9554, "step": 5098 }, { "epoch": 0.49, "grad_norm": 0.269877103345373, "learning_rate": 0.00018013054738684166, "loss": 1.1133, "step": 5099 }, { "epoch": 0.49, "grad_norm": 0.2995678521690841, "learning_rate": 0.00018012108228831556, "loss": 1.1077, "step": 5100 }, { "epoch": 0.49, "grad_norm": 0.24440039782838777, "learning_rate": 0.000180111615184709, "loss": 1.0012, "step": 5101 }, { "epoch": 0.49, "grad_norm": 0.27251554211773565, "learning_rate": 0.00018010214607625894, "loss": 1.1004, "step": 5102 }, { "epoch": 0.49, "grad_norm": 0.2810534155604437, "learning_rate": 0.0001800926749632023, "loss": 1.0193, "step": 5103 }, { "epoch": 0.49, "grad_norm": 0.27100726353528887, "learning_rate": 0.0001800832018457762, "loss": 1.0602, "step": 5104 }, { "epoch": 0.49, "grad_norm": 0.26564758624041535, "learning_rate": 0.00018007372672421756, "loss": 1.079, "step": 5105 }, { "epoch": 0.49, "grad_norm": 0.2974717566929695, "learning_rate": 0.00018006424959876363, "loss": 1.1326, "step": 5106 }, { "epoch": 0.49, "grad_norm": 0.30488571850565943, "learning_rate": 0.00018005477046965153, "loss": 1.0309, "step": 5107 }, { "epoch": 0.49, "grad_norm": 0.2145246313949886, "learning_rate": 0.0001800452893371185, "loss": 1.0762, "step": 5108 }, { "epoch": 0.49, "grad_norm": 0.2636249075639135, "learning_rate": 0.00018003580620140177, "loss": 1.153, "step": 5109 }, { "epoch": 0.49, "grad_norm": 0.27782368506119703, "learning_rate": 0.0001800263210627387, "loss": 1.117, "step": 5110 }, { "epoch": 0.49, "grad_norm": 0.3007327930211106, "learning_rate": 0.00018001683392136666, "loss": 1.1193, "step": 5111 }, { "epoch": 0.49, "grad_norm": 0.28929437068723235, "learning_rate": 0.00018000734477752306, "loss": 1.0561, "step": 5112 }, { "epoch": 0.49, "grad_norm": 0.28549761815962377, "learning_rate": 0.00017999785363144536, "loss": 1.1016, "step": 5113 }, { "epoch": 0.49, "grad_norm": 0.2863390229819487, "learning_rate": 0.0001799883604833711, "loss": 1.1143, "step": 5114 }, { "epoch": 0.49, "grad_norm": 0.2758612911370492, "learning_rate": 0.00017997886533353786, "loss": 1.1496, "step": 5115 }, { "epoch": 0.49, "grad_norm": 0.26382558236784287, "learning_rate": 0.00017996936818218324, "loss": 1.0343, "step": 5116 }, { "epoch": 0.49, "grad_norm": 0.2695740841257572, "learning_rate": 0.00017995986902954493, "loss": 1.1589, "step": 5117 }, { "epoch": 0.49, "grad_norm": 0.315794471443836, "learning_rate": 0.00017995036787586064, "loss": 0.9799, "step": 5118 }, { "epoch": 0.49, "grad_norm": 0.2707540246275962, "learning_rate": 0.00017994086472136815, "loss": 1.089, "step": 5119 }, { "epoch": 0.49, "grad_norm": 0.3337912375773768, "learning_rate": 0.0001799313595663053, "loss": 1.0462, "step": 5120 }, { "epoch": 0.49, "grad_norm": 0.30849482259896643, "learning_rate": 0.0001799218524109099, "loss": 0.9913, "step": 5121 }, { "epoch": 0.49, "grad_norm": 0.2794905681957685, "learning_rate": 0.00017991234325541995, "loss": 1.0798, "step": 5122 }, { "epoch": 0.49, "grad_norm": 0.26906629376110763, "learning_rate": 0.00017990283210007335, "loss": 0.9918, "step": 5123 }, { "epoch": 0.49, "grad_norm": 0.2640810736886688, "learning_rate": 0.00017989331894510818, "loss": 1.0668, "step": 5124 }, { "epoch": 0.49, "grad_norm": 0.3403000076240741, "learning_rate": 0.0001798838037907625, "loss": 1.1871, "step": 5125 }, { "epoch": 0.49, "grad_norm": 0.29390271181136557, "learning_rate": 0.00017987428663727441, "loss": 1.0834, "step": 5126 }, { "epoch": 0.49, "grad_norm": 0.29242522687517936, "learning_rate": 0.00017986476748488214, "loss": 0.9395, "step": 5127 }, { "epoch": 0.49, "grad_norm": 0.30030389910750893, "learning_rate": 0.00017985524633382381, "loss": 1.0064, "step": 5128 }, { "epoch": 0.49, "grad_norm": 0.26767260775313606, "learning_rate": 0.00017984572318433778, "loss": 1.2037, "step": 5129 }, { "epoch": 0.49, "grad_norm": 0.2883783786958614, "learning_rate": 0.00017983619803666235, "loss": 1.0901, "step": 5130 }, { "epoch": 0.49, "grad_norm": 0.2615336186142248, "learning_rate": 0.00017982667089103588, "loss": 1.1084, "step": 5131 }, { "epoch": 0.49, "grad_norm": 0.2806392247590465, "learning_rate": 0.0001798171417476968, "loss": 1.0532, "step": 5132 }, { "epoch": 0.49, "grad_norm": 0.2563049389592253, "learning_rate": 0.0001798076106068836, "loss": 1.0789, "step": 5133 }, { "epoch": 0.49, "grad_norm": 0.32787128373934044, "learning_rate": 0.0001797980774688348, "loss": 1.0862, "step": 5134 }, { "epoch": 0.49, "grad_norm": 0.26980316995262976, "learning_rate": 0.00017978854233378891, "loss": 1.2006, "step": 5135 }, { "epoch": 0.49, "grad_norm": 0.2359678414179539, "learning_rate": 0.00017977900520198465, "loss": 0.9747, "step": 5136 }, { "epoch": 0.49, "grad_norm": 0.28300881314834403, "learning_rate": 0.00017976946607366063, "loss": 1.0696, "step": 5137 }, { "epoch": 0.49, "grad_norm": 0.27366802597990486, "learning_rate": 0.0001797599249490556, "loss": 0.9662, "step": 5138 }, { "epoch": 0.49, "grad_norm": 0.2714308944537401, "learning_rate": 0.00017975038182840828, "loss": 1.083, "step": 5139 }, { "epoch": 0.49, "grad_norm": 0.2644809976461488, "learning_rate": 0.00017974083671195757, "loss": 1.081, "step": 5140 }, { "epoch": 0.49, "grad_norm": 0.2813538147596539, "learning_rate": 0.0001797312895999423, "loss": 1.1335, "step": 5141 }, { "epoch": 0.49, "grad_norm": 0.2910923380431433, "learning_rate": 0.0001797217404926014, "loss": 1.0232, "step": 5142 }, { "epoch": 0.49, "grad_norm": 0.26419502096815084, "learning_rate": 0.00017971218939017382, "loss": 1.106, "step": 5143 }, { "epoch": 0.49, "grad_norm": 0.30598831197367454, "learning_rate": 0.00017970263629289864, "loss": 1.1303, "step": 5144 }, { "epoch": 0.49, "grad_norm": 0.24615417333770134, "learning_rate": 0.00017969308120101488, "loss": 0.969, "step": 5145 }, { "epoch": 0.49, "grad_norm": 0.2592721166455555, "learning_rate": 0.00017968352411476166, "loss": 1.0971, "step": 5146 }, { "epoch": 0.49, "grad_norm": 0.2868843022304741, "learning_rate": 0.00017967396503437816, "loss": 0.986, "step": 5147 }, { "epoch": 0.49, "grad_norm": 0.2936385306501026, "learning_rate": 0.00017966440396010366, "loss": 1.0833, "step": 5148 }, { "epoch": 0.49, "grad_norm": 0.2947617623317851, "learning_rate": 0.00017965484089217735, "loss": 1.0711, "step": 5149 }, { "epoch": 0.49, "grad_norm": 0.2628043083883782, "learning_rate": 0.0001796452758308386, "loss": 1.1146, "step": 5150 }, { "epoch": 0.49, "grad_norm": 0.31452969252198026, "learning_rate": 0.00017963570877632676, "loss": 1.1144, "step": 5151 }, { "epoch": 0.49, "grad_norm": 0.2596821467826261, "learning_rate": 0.00017962613972888125, "loss": 1.0938, "step": 5152 }, { "epoch": 0.49, "grad_norm": 0.27739044259675993, "learning_rate": 0.00017961656868874156, "loss": 1.0211, "step": 5153 }, { "epoch": 0.49, "grad_norm": 0.2937922180821596, "learning_rate": 0.0001796069956561472, "loss": 1.0647, "step": 5154 }, { "epoch": 0.49, "grad_norm": 0.27965187717460716, "learning_rate": 0.00017959742063133774, "loss": 1.0117, "step": 5155 }, { "epoch": 0.49, "grad_norm": 0.2840025373723606, "learning_rate": 0.00017958784361455282, "loss": 0.9706, "step": 5156 }, { "epoch": 0.49, "grad_norm": 0.3178568601522771, "learning_rate": 0.00017957826460603205, "loss": 1.018, "step": 5157 }, { "epoch": 0.49, "grad_norm": 0.2860032058314914, "learning_rate": 0.00017956868360601526, "loss": 1.1566, "step": 5158 }, { "epoch": 0.49, "grad_norm": 0.28443581551498176, "learning_rate": 0.00017955910061474213, "loss": 1.0321, "step": 5159 }, { "epoch": 0.49, "grad_norm": 0.27187438960276544, "learning_rate": 0.0001795495156324525, "loss": 1.0505, "step": 5160 }, { "epoch": 0.49, "grad_norm": 0.29288869429900377, "learning_rate": 0.00017953992865938622, "loss": 1.0511, "step": 5161 }, { "epoch": 0.49, "grad_norm": 0.30056027600724244, "learning_rate": 0.00017953033969578326, "loss": 1.1062, "step": 5162 }, { "epoch": 0.49, "grad_norm": 0.26821431519549427, "learning_rate": 0.00017952074874188356, "loss": 1.092, "step": 5163 }, { "epoch": 0.49, "grad_norm": 0.24906638557874644, "learning_rate": 0.00017951115579792717, "loss": 1.1022, "step": 5164 }, { "epoch": 0.49, "grad_norm": 0.26309218013010355, "learning_rate": 0.0001795015608641541, "loss": 1.0265, "step": 5165 }, { "epoch": 0.49, "grad_norm": 0.26268394260057937, "learning_rate": 0.00017949196394080453, "loss": 1.0653, "step": 5166 }, { "epoch": 0.49, "grad_norm": 0.2896832205130746, "learning_rate": 0.00017948236502811859, "loss": 1.0854, "step": 5167 }, { "epoch": 0.49, "grad_norm": 0.2625237394571958, "learning_rate": 0.00017947276412633652, "loss": 0.9619, "step": 5168 }, { "epoch": 0.49, "grad_norm": 0.27986610289816, "learning_rate": 0.0001794631612356986, "loss": 1.1716, "step": 5169 }, { "epoch": 0.49, "grad_norm": 0.28211340979754457, "learning_rate": 0.0001794535563564451, "loss": 1.1021, "step": 5170 }, { "epoch": 0.49, "grad_norm": 0.24926141918189934, "learning_rate": 0.00017944394948881642, "loss": 1.056, "step": 5171 }, { "epoch": 0.49, "grad_norm": 0.2869730653075155, "learning_rate": 0.00017943434063305298, "loss": 0.9492, "step": 5172 }, { "epoch": 0.49, "grad_norm": 0.29187379432960536, "learning_rate": 0.00017942472978939525, "loss": 1.1833, "step": 5173 }, { "epoch": 0.5, "grad_norm": 0.28973741108012707, "learning_rate": 0.00017941511695808372, "loss": 1.0489, "step": 5174 }, { "epoch": 0.5, "grad_norm": 0.27689121010931406, "learning_rate": 0.000179405502139359, "loss": 1.1769, "step": 5175 }, { "epoch": 0.5, "grad_norm": 0.3067616467203378, "learning_rate": 0.00017939588533346168, "loss": 1.1473, "step": 5176 }, { "epoch": 0.5, "grad_norm": 0.2757139090006968, "learning_rate": 0.0001793862665406324, "loss": 1.1183, "step": 5177 }, { "epoch": 0.5, "grad_norm": 0.27386836919563157, "learning_rate": 0.00017937664576111198, "loss": 1.0154, "step": 5178 }, { "epoch": 0.5, "grad_norm": 0.2918051909413474, "learning_rate": 0.00017936702299514105, "loss": 1.1863, "step": 5179 }, { "epoch": 0.5, "grad_norm": 0.2964159774568591, "learning_rate": 0.00017935739824296052, "loss": 1.0043, "step": 5180 }, { "epoch": 0.5, "grad_norm": 0.31639186034892197, "learning_rate": 0.0001793477715048112, "loss": 1.1946, "step": 5181 }, { "epoch": 0.5, "grad_norm": 0.3060832768680089, "learning_rate": 0.00017933814278093407, "loss": 0.9377, "step": 5182 }, { "epoch": 0.5, "grad_norm": 0.3453844813102695, "learning_rate": 0.00017932851207157002, "loss": 1.0465, "step": 5183 }, { "epoch": 0.5, "grad_norm": 0.25905466276473943, "learning_rate": 0.0001793188793769601, "loss": 1.1672, "step": 5184 }, { "epoch": 0.5, "grad_norm": 0.29227551232364024, "learning_rate": 0.00017930924469734537, "loss": 1.0898, "step": 5185 }, { "epoch": 0.5, "grad_norm": 0.2513616979563531, "learning_rate": 0.00017929960803296697, "loss": 1.0656, "step": 5186 }, { "epoch": 0.5, "grad_norm": 0.275133905403024, "learning_rate": 0.00017928996938406603, "loss": 1.002, "step": 5187 }, { "epoch": 0.5, "grad_norm": 0.28951906430321944, "learning_rate": 0.00017928032875088375, "loss": 1.1197, "step": 5188 }, { "epoch": 0.5, "grad_norm": 0.26163177751335326, "learning_rate": 0.00017927068613366145, "loss": 1.1226, "step": 5189 }, { "epoch": 0.5, "grad_norm": 0.308746805614135, "learning_rate": 0.00017926104153264042, "loss": 1.0741, "step": 5190 }, { "epoch": 0.5, "grad_norm": 0.29075350653382204, "learning_rate": 0.00017925139494806198, "loss": 1.1404, "step": 5191 }, { "epoch": 0.5, "grad_norm": 0.2760956753624024, "learning_rate": 0.0001792417463801676, "loss": 1.132, "step": 5192 }, { "epoch": 0.5, "grad_norm": 0.30467872599141255, "learning_rate": 0.0001792320958291987, "loss": 1.1227, "step": 5193 }, { "epoch": 0.5, "grad_norm": 0.269599432070437, "learning_rate": 0.0001792224432953968, "loss": 1.1915, "step": 5194 }, { "epoch": 0.5, "grad_norm": 0.2540692313113988, "learning_rate": 0.00017921278877900348, "loss": 1.1215, "step": 5195 }, { "epoch": 0.5, "grad_norm": 0.25608501554671015, "learning_rate": 0.0001792031322802603, "loss": 1.0529, "step": 5196 }, { "epoch": 0.5, "grad_norm": 0.2646934249716702, "learning_rate": 0.00017919347379940904, "loss": 1.1269, "step": 5197 }, { "epoch": 0.5, "grad_norm": 0.2458734180042231, "learning_rate": 0.00017918381333669126, "loss": 1.0294, "step": 5198 }, { "epoch": 0.5, "grad_norm": 0.2629321130779039, "learning_rate": 0.0001791741508923488, "loss": 1.0545, "step": 5199 }, { "epoch": 0.5, "grad_norm": 0.29932796860263766, "learning_rate": 0.00017916448646662346, "loss": 1.1029, "step": 5200 }, { "epoch": 0.5, "grad_norm": 0.2794636942689881, "learning_rate": 0.00017915482005975708, "loss": 0.9605, "step": 5201 }, { "epoch": 0.5, "grad_norm": 0.2805023432544276, "learning_rate": 0.00017914515167199158, "loss": 1.0897, "step": 5202 }, { "epoch": 0.5, "grad_norm": 0.25784189464219454, "learning_rate": 0.00017913548130356894, "loss": 1.013, "step": 5203 }, { "epoch": 0.5, "grad_norm": 0.31553870897854386, "learning_rate": 0.00017912580895473114, "loss": 1.1689, "step": 5204 }, { "epoch": 0.5, "grad_norm": 0.31827443288264134, "learning_rate": 0.00017911613462572024, "loss": 1.0521, "step": 5205 }, { "epoch": 0.5, "grad_norm": 0.3025879844146086, "learning_rate": 0.00017910645831677836, "loss": 0.997, "step": 5206 }, { "epoch": 0.5, "grad_norm": 0.26057117438515826, "learning_rate": 0.0001790967800281476, "loss": 1.1074, "step": 5207 }, { "epoch": 0.5, "grad_norm": 0.2611725357612145, "learning_rate": 0.00017908709976007024, "loss": 1.0784, "step": 5208 }, { "epoch": 0.5, "grad_norm": 0.2344376304389919, "learning_rate": 0.0001790774175127885, "loss": 1.0707, "step": 5209 }, { "epoch": 0.5, "grad_norm": 0.29122733484693347, "learning_rate": 0.00017906773328654472, "loss": 1.1213, "step": 5210 }, { "epoch": 0.5, "grad_norm": 0.2759187534926237, "learning_rate": 0.00017905804708158118, "loss": 1.0693, "step": 5211 }, { "epoch": 0.5, "grad_norm": 0.2777254558995927, "learning_rate": 0.00017904835889814033, "loss": 1.0366, "step": 5212 }, { "epoch": 0.5, "grad_norm": 0.2714206402525035, "learning_rate": 0.00017903866873646463, "loss": 1.1107, "step": 5213 }, { "epoch": 0.5, "grad_norm": 0.3012099609994761, "learning_rate": 0.0001790289765967966, "loss": 1.0848, "step": 5214 }, { "epoch": 0.5, "grad_norm": 0.3032845339156945, "learning_rate": 0.00017901928247937872, "loss": 1.1453, "step": 5215 }, { "epoch": 0.5, "grad_norm": 0.26396959858562596, "learning_rate": 0.00017900958638445365, "loss": 1.1375, "step": 5216 }, { "epoch": 0.5, "grad_norm": 0.2727644440686032, "learning_rate": 0.00017899988831226402, "loss": 1.0937, "step": 5217 }, { "epoch": 0.5, "grad_norm": 0.3053107974870675, "learning_rate": 0.00017899018826305252, "loss": 1.1792, "step": 5218 }, { "epoch": 0.5, "grad_norm": 0.26793488874015303, "learning_rate": 0.00017898048623706195, "loss": 1.1571, "step": 5219 }, { "epoch": 0.5, "grad_norm": 0.2597411208953679, "learning_rate": 0.00017897078223453504, "loss": 1.052, "step": 5220 }, { "epoch": 0.5, "grad_norm": 0.31195341891857165, "learning_rate": 0.0001789610762557147, "loss": 1.0902, "step": 5221 }, { "epoch": 0.5, "grad_norm": 0.2744676529459866, "learning_rate": 0.0001789513683008438, "loss": 1.0829, "step": 5222 }, { "epoch": 0.5, "grad_norm": 0.26093271474594537, "learning_rate": 0.00017894165837016528, "loss": 0.9847, "step": 5223 }, { "epoch": 0.5, "grad_norm": 0.290962254361299, "learning_rate": 0.00017893194646392214, "loss": 1.0686, "step": 5224 }, { "epoch": 0.5, "grad_norm": 0.26375436627341603, "learning_rate": 0.00017892223258235746, "loss": 1.1454, "step": 5225 }, { "epoch": 0.5, "grad_norm": 0.2646502633063915, "learning_rate": 0.00017891251672571428, "loss": 1.0035, "step": 5226 }, { "epoch": 0.5, "eval_loss": 1.130812406539917, "eval_runtime": 4229.0908, "eval_samples_per_second": 19.772, "eval_steps_per_second": 2.472, "step": 5226 }, { "epoch": 0.5, "grad_norm": 0.27086856649700053, "learning_rate": 0.00017890279889423577, "loss": 1.2004, "step": 5227 }, { "epoch": 0.5, "grad_norm": 0.26907032517761026, "learning_rate": 0.00017889307908816514, "loss": 1.1269, "step": 5228 }, { "epoch": 0.5, "grad_norm": 0.2559621504798168, "learning_rate": 0.00017888335730774563, "loss": 1.0099, "step": 5229 }, { "epoch": 0.5, "grad_norm": 0.2886629743550709, "learning_rate": 0.00017887363355322054, "loss": 1.0698, "step": 5230 }, { "epoch": 0.5, "grad_norm": 0.3107523248743581, "learning_rate": 0.00017886390782483318, "loss": 1.086, "step": 5231 }, { "epoch": 0.5, "grad_norm": 0.2731347287540774, "learning_rate": 0.00017885418012282696, "loss": 1.1274, "step": 5232 }, { "epoch": 0.5, "grad_norm": 0.3032030305104551, "learning_rate": 0.00017884445044744532, "loss": 1.0157, "step": 5233 }, { "epoch": 0.5, "grad_norm": 0.29204175835395896, "learning_rate": 0.00017883471879893176, "loss": 1.0925, "step": 5234 }, { "epoch": 0.5, "grad_norm": 0.27875381267269567, "learning_rate": 0.00017882498517752984, "loss": 1.0, "step": 5235 }, { "epoch": 0.5, "grad_norm": 0.2744649218236222, "learning_rate": 0.00017881524958348311, "loss": 1.1309, "step": 5236 }, { "epoch": 0.5, "grad_norm": 0.2638597475413694, "learning_rate": 0.00017880551201703522, "loss": 1.1368, "step": 5237 }, { "epoch": 0.5, "grad_norm": 0.29740497162072044, "learning_rate": 0.00017879577247842984, "loss": 1.163, "step": 5238 }, { "epoch": 0.5, "grad_norm": 0.2792113343183714, "learning_rate": 0.00017878603096791078, "loss": 0.9453, "step": 5239 }, { "epoch": 0.5, "grad_norm": 0.3252881204864866, "learning_rate": 0.00017877628748572176, "loss": 0.9687, "step": 5240 }, { "epoch": 0.5, "grad_norm": 0.2993197408644714, "learning_rate": 0.00017876654203210666, "loss": 1.1889, "step": 5241 }, { "epoch": 0.5, "grad_norm": 0.25588679975602296, "learning_rate": 0.0001787567946073093, "loss": 1.0073, "step": 5242 }, { "epoch": 0.5, "grad_norm": 0.25844292628782845, "learning_rate": 0.00017874704521157368, "loss": 1.1023, "step": 5243 }, { "epoch": 0.5, "grad_norm": 0.2701484124438951, "learning_rate": 0.00017873729384514374, "loss": 1.1446, "step": 5244 }, { "epoch": 0.5, "grad_norm": 0.28163616631177285, "learning_rate": 0.00017872754050826358, "loss": 1.1451, "step": 5245 }, { "epoch": 0.5, "grad_norm": 0.24357842637206142, "learning_rate": 0.00017871778520117722, "loss": 1.0879, "step": 5246 }, { "epoch": 0.5, "grad_norm": 0.2854538423239079, "learning_rate": 0.0001787080279241288, "loss": 1.1346, "step": 5247 }, { "epoch": 0.5, "grad_norm": 0.28966646255088585, "learning_rate": 0.00017869826867736253, "loss": 1.1342, "step": 5248 }, { "epoch": 0.5, "grad_norm": 0.269137927932761, "learning_rate": 0.0001786885074611226, "loss": 1.2118, "step": 5249 }, { "epoch": 0.5, "grad_norm": 0.24965315368924026, "learning_rate": 0.00017867874427565336, "loss": 1.0445, "step": 5250 }, { "epoch": 0.5, "grad_norm": 0.3268586937562193, "learning_rate": 0.00017866897912119907, "loss": 1.0391, "step": 5251 }, { "epoch": 0.5, "grad_norm": 0.2628697700403388, "learning_rate": 0.00017865921199800415, "loss": 1.0788, "step": 5252 }, { "epoch": 0.5, "grad_norm": 0.25466346365515374, "learning_rate": 0.00017864944290631301, "loss": 1.1108, "step": 5253 }, { "epoch": 0.5, "grad_norm": 0.2785430342878654, "learning_rate": 0.00017863967184637014, "loss": 0.9847, "step": 5254 }, { "epoch": 0.5, "grad_norm": 0.2821653592665947, "learning_rate": 0.00017862989881842003, "loss": 1.1659, "step": 5255 }, { "epoch": 0.5, "grad_norm": 0.28381789349374786, "learning_rate": 0.0001786201238227073, "loss": 1.0368, "step": 5256 }, { "epoch": 0.5, "grad_norm": 0.28526194687906037, "learning_rate": 0.00017861034685947658, "loss": 1.0789, "step": 5257 }, { "epoch": 0.5, "grad_norm": 0.2646173880017078, "learning_rate": 0.0001786005679289725, "loss": 1.0564, "step": 5258 }, { "epoch": 0.5, "grad_norm": 0.23710824870546326, "learning_rate": 0.0001785907870314398, "loss": 0.9625, "step": 5259 }, { "epoch": 0.5, "grad_norm": 0.26259679119084095, "learning_rate": 0.0001785810041671233, "loss": 1.1021, "step": 5260 }, { "epoch": 0.5, "grad_norm": 0.2709420502635169, "learning_rate": 0.00017857121933626777, "loss": 1.0062, "step": 5261 }, { "epoch": 0.5, "grad_norm": 0.2613609827266697, "learning_rate": 0.0001785614325391181, "loss": 1.0678, "step": 5262 }, { "epoch": 0.5, "grad_norm": 0.28813131561794453, "learning_rate": 0.00017855164377591918, "loss": 0.9172, "step": 5263 }, { "epoch": 0.5, "grad_norm": 0.27234699286481917, "learning_rate": 0.000178541853046916, "loss": 1.1471, "step": 5264 }, { "epoch": 0.5, "grad_norm": 0.2506386238760721, "learning_rate": 0.0001785320603523536, "loss": 1.0259, "step": 5265 }, { "epoch": 0.5, "grad_norm": 0.2540092486293724, "learning_rate": 0.00017852226569247708, "loss": 1.0877, "step": 5266 }, { "epoch": 0.5, "grad_norm": 0.29602747120892153, "learning_rate": 0.00017851246906753145, "loss": 1.109, "step": 5267 }, { "epoch": 0.5, "grad_norm": 0.2992977885795131, "learning_rate": 0.00017850267047776197, "loss": 1.0069, "step": 5268 }, { "epoch": 0.5, "grad_norm": 0.28770619817428295, "learning_rate": 0.0001784928699234138, "loss": 1.0456, "step": 5269 }, { "epoch": 0.5, "grad_norm": 0.2621648485300015, "learning_rate": 0.00017848306740473227, "loss": 1.0159, "step": 5270 }, { "epoch": 0.5, "grad_norm": 0.27475331658348695, "learning_rate": 0.00017847326292196261, "loss": 1.026, "step": 5271 }, { "epoch": 0.5, "grad_norm": 0.2883171040107314, "learning_rate": 0.00017846345647535026, "loss": 1.0636, "step": 5272 }, { "epoch": 0.5, "grad_norm": 0.2679836068828528, "learning_rate": 0.0001784536480651406, "loss": 1.1411, "step": 5273 }, { "epoch": 0.5, "grad_norm": 0.2718315189359781, "learning_rate": 0.00017844383769157905, "loss": 1.0714, "step": 5274 }, { "epoch": 0.5, "grad_norm": 0.2679104057163504, "learning_rate": 0.0001784340253549112, "loss": 1.0169, "step": 5275 }, { "epoch": 0.5, "grad_norm": 0.28568786629626025, "learning_rate": 0.00017842421105538256, "loss": 1.0771, "step": 5276 }, { "epoch": 0.5, "grad_norm": 0.2548173055279772, "learning_rate": 0.00017841439479323877, "loss": 1.1359, "step": 5277 }, { "epoch": 0.5, "grad_norm": 0.24999339748919044, "learning_rate": 0.00017840457656872544, "loss": 1.0132, "step": 5278 }, { "epoch": 0.51, "grad_norm": 0.3003717665696886, "learning_rate": 0.00017839475638208832, "loss": 1.0838, "step": 5279 }, { "epoch": 0.51, "grad_norm": 0.31887733553865744, "learning_rate": 0.00017838493423357314, "loss": 1.0885, "step": 5280 }, { "epoch": 0.51, "grad_norm": 0.2625967483202807, "learning_rate": 0.00017837511012342572, "loss": 1.1534, "step": 5281 }, { "epoch": 0.51, "grad_norm": 0.31614913625651914, "learning_rate": 0.0001783652840518919, "loss": 1.0403, "step": 5282 }, { "epoch": 0.51, "grad_norm": 0.26033042065792134, "learning_rate": 0.00017835545601921764, "loss": 1.0581, "step": 5283 }, { "epoch": 0.51, "grad_norm": 0.2856070132728536, "learning_rate": 0.00017834562602564883, "loss": 1.0107, "step": 5284 }, { "epoch": 0.51, "grad_norm": 0.2536339900536725, "learning_rate": 0.00017833579407143147, "loss": 0.9525, "step": 5285 }, { "epoch": 0.51, "grad_norm": 0.26624532509069715, "learning_rate": 0.00017832596015681165, "loss": 1.0309, "step": 5286 }, { "epoch": 0.51, "grad_norm": 0.26639398423968824, "learning_rate": 0.00017831612428203543, "loss": 1.103, "step": 5287 }, { "epoch": 0.51, "grad_norm": 0.2854143682160096, "learning_rate": 0.00017830628644734898, "loss": 1.1898, "step": 5288 }, { "epoch": 0.51, "grad_norm": 0.2647981085190368, "learning_rate": 0.0001782964466529985, "loss": 1.0559, "step": 5289 }, { "epoch": 0.51, "grad_norm": 0.2692642057535311, "learning_rate": 0.00017828660489923025, "loss": 1.1121, "step": 5290 }, { "epoch": 0.51, "grad_norm": 0.2790653912792434, "learning_rate": 0.00017827676118629054, "loss": 1.119, "step": 5291 }, { "epoch": 0.51, "grad_norm": 0.2731285711487075, "learning_rate": 0.00017826691551442564, "loss": 1.1326, "step": 5292 }, { "epoch": 0.51, "grad_norm": 0.27848520893646905, "learning_rate": 0.000178257067883882, "loss": 1.0876, "step": 5293 }, { "epoch": 0.51, "grad_norm": 0.27766691943147104, "learning_rate": 0.00017824721829490608, "loss": 1.0075, "step": 5294 }, { "epoch": 0.51, "grad_norm": 0.24812476086316548, "learning_rate": 0.00017823736674774432, "loss": 0.9859, "step": 5295 }, { "epoch": 0.51, "grad_norm": 0.24136690327253105, "learning_rate": 0.00017822751324264328, "loss": 0.9295, "step": 5296 }, { "epoch": 0.51, "grad_norm": 0.22887560800126114, "learning_rate": 0.00017821765777984957, "loss": 1.1376, "step": 5297 }, { "epoch": 0.51, "grad_norm": 0.2746004567986452, "learning_rate": 0.0001782078003596098, "loss": 1.0057, "step": 5298 }, { "epoch": 0.51, "grad_norm": 0.2425791039008265, "learning_rate": 0.0001781979409821707, "loss": 1.1405, "step": 5299 }, { "epoch": 0.51, "grad_norm": 0.2718956254441623, "learning_rate": 0.00017818807964777898, "loss": 1.0884, "step": 5300 }, { "epoch": 0.51, "grad_norm": 0.2616875209648741, "learning_rate": 0.0001781782163566814, "loss": 1.0947, "step": 5301 }, { "epoch": 0.51, "grad_norm": 0.24842391055912055, "learning_rate": 0.00017816835110912485, "loss": 0.9207, "step": 5302 }, { "epoch": 0.51, "grad_norm": 0.2747180515251053, "learning_rate": 0.00017815848390535617, "loss": 0.9877, "step": 5303 }, { "epoch": 0.51, "grad_norm": 0.2875077951068339, "learning_rate": 0.00017814861474562232, "loss": 1.0289, "step": 5304 }, { "epoch": 0.51, "grad_norm": 0.2763116859597204, "learning_rate": 0.00017813874363017027, "loss": 1.0357, "step": 5305 }, { "epoch": 0.51, "grad_norm": 0.28069395829776694, "learning_rate": 0.00017812887055924703, "loss": 1.1048, "step": 5306 }, { "epoch": 0.51, "grad_norm": 0.30047513289045635, "learning_rate": 0.00017811899553309975, "loss": 1.0333, "step": 5307 }, { "epoch": 0.51, "grad_norm": 0.2650398736977853, "learning_rate": 0.00017810911855197547, "loss": 1.0565, "step": 5308 }, { "epoch": 0.51, "grad_norm": 0.24640154980602844, "learning_rate": 0.0001780992396161214, "loss": 1.1352, "step": 5309 }, { "epoch": 0.51, "grad_norm": 0.28475244026032503, "learning_rate": 0.00017808935872578482, "loss": 1.0479, "step": 5310 }, { "epoch": 0.51, "grad_norm": 0.2756255403166274, "learning_rate": 0.00017807947588121295, "loss": 1.0636, "step": 5311 }, { "epoch": 0.51, "grad_norm": 0.3137323153824364, "learning_rate": 0.00017806959108265308, "loss": 1.2068, "step": 5312 }, { "epoch": 0.51, "grad_norm": 0.2842470117681127, "learning_rate": 0.00017805970433035266, "loss": 1.229, "step": 5313 }, { "epoch": 0.51, "grad_norm": 0.2981813888336136, "learning_rate": 0.00017804981562455908, "loss": 1.1881, "step": 5314 }, { "epoch": 0.51, "grad_norm": 0.28279186295473696, "learning_rate": 0.00017803992496551982, "loss": 1.0685, "step": 5315 }, { "epoch": 0.51, "grad_norm": 0.2702456382826126, "learning_rate": 0.0001780300323534824, "loss": 0.9998, "step": 5316 }, { "epoch": 0.51, "grad_norm": 0.276914892943855, "learning_rate": 0.00017802013778869436, "loss": 1.0531, "step": 5317 }, { "epoch": 0.51, "grad_norm": 0.27960841961549454, "learning_rate": 0.0001780102412714033, "loss": 1.1359, "step": 5318 }, { "epoch": 0.51, "grad_norm": 0.25797705830410184, "learning_rate": 0.00017800034280185699, "loss": 1.0185, "step": 5319 }, { "epoch": 0.51, "grad_norm": 0.2857057108150563, "learning_rate": 0.00017799044238030307, "loss": 1.0631, "step": 5320 }, { "epoch": 0.51, "grad_norm": 0.25584721058778875, "learning_rate": 0.0001779805400069893, "loss": 0.987, "step": 5321 }, { "epoch": 0.51, "grad_norm": 0.2325654159439427, "learning_rate": 0.0001779706356821635, "loss": 1.0651, "step": 5322 }, { "epoch": 0.51, "grad_norm": 0.2970569353859898, "learning_rate": 0.00017796072940607353, "loss": 1.13, "step": 5323 }, { "epoch": 0.51, "grad_norm": 0.2929022741608403, "learning_rate": 0.00017795082117896734, "loss": 1.0719, "step": 5324 }, { "epoch": 0.51, "grad_norm": 0.2609795745084068, "learning_rate": 0.00017794091100109283, "loss": 1.0067, "step": 5325 }, { "epoch": 0.51, "grad_norm": 0.30762587611696396, "learning_rate": 0.0001779309988726981, "loss": 1.147, "step": 5326 }, { "epoch": 0.51, "grad_norm": 0.2661981298944252, "learning_rate": 0.00017792108479403106, "loss": 1.0734, "step": 5327 }, { "epoch": 0.51, "grad_norm": 0.2764342632822514, "learning_rate": 0.00017791116876533994, "loss": 1.1101, "step": 5328 }, { "epoch": 0.51, "grad_norm": 0.24938502770088783, "learning_rate": 0.00017790125078687288, "loss": 1.0666, "step": 5329 }, { "epoch": 0.51, "grad_norm": 0.3336408049693267, "learning_rate": 0.000177891330858878, "loss": 1.0902, "step": 5330 }, { "epoch": 0.51, "grad_norm": 0.3218184473435925, "learning_rate": 0.00017788140898160367, "loss": 1.0904, "step": 5331 }, { "epoch": 0.51, "grad_norm": 0.2789740899659255, "learning_rate": 0.0001778714851552981, "loss": 1.0243, "step": 5332 }, { "epoch": 0.51, "grad_norm": 0.266597050786924, "learning_rate": 0.00017786155938020968, "loss": 1.0069, "step": 5333 }, { "epoch": 0.51, "grad_norm": 0.28223064418935384, "learning_rate": 0.00017785163165658685, "loss": 1.1401, "step": 5334 }, { "epoch": 0.51, "grad_norm": 0.272562171370601, "learning_rate": 0.00017784170198467797, "loss": 1.0984, "step": 5335 }, { "epoch": 0.51, "grad_norm": 0.23364083669723149, "learning_rate": 0.00017783177036473155, "loss": 1.0585, "step": 5336 }, { "epoch": 0.51, "grad_norm": 0.2423799752683352, "learning_rate": 0.0001778218367969962, "loss": 0.9586, "step": 5337 }, { "epoch": 0.51, "grad_norm": 0.28056034525742624, "learning_rate": 0.00017781190128172045, "loss": 1.0374, "step": 5338 }, { "epoch": 0.51, "grad_norm": 0.31575138951832793, "learning_rate": 0.000177801963819153, "loss": 1.1025, "step": 5339 }, { "epoch": 0.51, "grad_norm": 0.2756564243357627, "learning_rate": 0.00017779202440954247, "loss": 1.073, "step": 5340 }, { "epoch": 0.51, "grad_norm": 0.2855725418706688, "learning_rate": 0.00017778208305313766, "loss": 0.9841, "step": 5341 }, { "epoch": 0.51, "grad_norm": 0.2622842600538124, "learning_rate": 0.00017777213975018734, "loss": 0.9923, "step": 5342 }, { "epoch": 0.51, "grad_norm": 0.27868325397356525, "learning_rate": 0.00017776219450094032, "loss": 1.0104, "step": 5343 }, { "epoch": 0.51, "grad_norm": 0.2910306279110282, "learning_rate": 0.00017775224730564554, "loss": 0.9543, "step": 5344 }, { "epoch": 0.51, "grad_norm": 0.30979629764973976, "learning_rate": 0.0001777422981645519, "loss": 1.1145, "step": 5345 }, { "epoch": 0.51, "grad_norm": 0.30299227741795537, "learning_rate": 0.00017773234707790838, "loss": 1.0723, "step": 5346 }, { "epoch": 0.51, "grad_norm": 0.2925383103599133, "learning_rate": 0.00017772239404596402, "loss": 1.1494, "step": 5347 }, { "epoch": 0.51, "grad_norm": 0.25939612879354135, "learning_rate": 0.00017771243906896793, "loss": 1.0362, "step": 5348 }, { "epoch": 0.51, "grad_norm": 0.292894121185352, "learning_rate": 0.00017770248214716918, "loss": 1.0974, "step": 5349 }, { "epoch": 0.51, "grad_norm": 0.2934306234314348, "learning_rate": 0.000177692523280817, "loss": 1.0471, "step": 5350 }, { "epoch": 0.51, "grad_norm": 0.2620038040723942, "learning_rate": 0.0001776825624701606, "loss": 1.05, "step": 5351 }, { "epoch": 0.51, "grad_norm": 0.258906223183488, "learning_rate": 0.00017767259971544923, "loss": 1.0016, "step": 5352 }, { "epoch": 0.51, "grad_norm": 0.2693727580254897, "learning_rate": 0.00017766263501693222, "loss": 1.1004, "step": 5353 }, { "epoch": 0.51, "grad_norm": 0.2656780441277641, "learning_rate": 0.000177652668374859, "loss": 1.1314, "step": 5354 }, { "epoch": 0.51, "grad_norm": 0.26233985901713786, "learning_rate": 0.00017764269978947893, "loss": 1.0327, "step": 5355 }, { "epoch": 0.51, "grad_norm": 0.2816892908241542, "learning_rate": 0.00017763272926104152, "loss": 1.1384, "step": 5356 }, { "epoch": 0.51, "grad_norm": 0.29263486374255826, "learning_rate": 0.00017762275678979625, "loss": 1.1649, "step": 5357 }, { "epoch": 0.51, "grad_norm": 0.25054193247048423, "learning_rate": 0.00017761278237599272, "loss": 0.9422, "step": 5358 }, { "epoch": 0.51, "grad_norm": 0.2728514545749119, "learning_rate": 0.00017760280601988052, "loss": 1.1264, "step": 5359 }, { "epoch": 0.51, "grad_norm": 0.2787897298192036, "learning_rate": 0.00017759282772170933, "loss": 1.069, "step": 5360 }, { "epoch": 0.51, "grad_norm": 0.28051149945852427, "learning_rate": 0.00017758284748172889, "loss": 1.0434, "step": 5361 }, { "epoch": 0.51, "grad_norm": 0.2434867215928191, "learning_rate": 0.0001775728653001889, "loss": 1.094, "step": 5362 }, { "epoch": 0.51, "grad_norm": 0.298894712000934, "learning_rate": 0.00017756288117733922, "loss": 1.0858, "step": 5363 }, { "epoch": 0.51, "grad_norm": 0.2989152889611858, "learning_rate": 0.00017755289511342968, "loss": 1.0862, "step": 5364 }, { "epoch": 0.51, "grad_norm": 0.26061563505956564, "learning_rate": 0.0001775429071087102, "loss": 1.1229, "step": 5365 }, { "epoch": 0.51, "grad_norm": 0.3078317769181313, "learning_rate": 0.00017753291716343075, "loss": 1.0443, "step": 5366 }, { "epoch": 0.51, "grad_norm": 0.271922412205937, "learning_rate": 0.00017752292527784132, "loss": 1.0569, "step": 5367 }, { "epoch": 0.51, "grad_norm": 0.2981868268132513, "learning_rate": 0.00017751293145219194, "loss": 1.1245, "step": 5368 }, { "epoch": 0.51, "grad_norm": 0.28791587214094305, "learning_rate": 0.00017750293568673275, "loss": 1.1032, "step": 5369 }, { "epoch": 0.51, "grad_norm": 0.2770400531641322, "learning_rate": 0.00017749293798171388, "loss": 1.1548, "step": 5370 }, { "epoch": 0.51, "grad_norm": 0.31035234976544707, "learning_rate": 0.00017748293833738554, "loss": 1.0248, "step": 5371 }, { "epoch": 0.51, "grad_norm": 0.32563997547553536, "learning_rate": 0.000177472936753998, "loss": 1.0142, "step": 5372 }, { "epoch": 0.51, "grad_norm": 0.2965449735119415, "learning_rate": 0.0001774629332318015, "loss": 1.074, "step": 5373 }, { "epoch": 0.51, "grad_norm": 0.24827461718041824, "learning_rate": 0.00017745292777104638, "loss": 1.0601, "step": 5374 }, { "epoch": 0.51, "grad_norm": 0.28164100400780073, "learning_rate": 0.00017744292037198312, "loss": 1.1411, "step": 5375 }, { "epoch": 0.51, "grad_norm": 0.255726016665569, "learning_rate": 0.00017743291103486207, "loss": 1.0318, "step": 5376 }, { "epoch": 0.51, "grad_norm": 0.25998427104390703, "learning_rate": 0.0001774228997599338, "loss": 1.0986, "step": 5377 }, { "epoch": 0.51, "grad_norm": 0.28575851348545084, "learning_rate": 0.00017741288654744874, "loss": 1.0325, "step": 5378 }, { "epoch": 0.51, "grad_norm": 0.31525644009615983, "learning_rate": 0.0001774028713976576, "loss": 1.0881, "step": 5379 }, { "epoch": 0.51, "grad_norm": 0.27772393103651905, "learning_rate": 0.00017739285431081093, "loss": 1.0819, "step": 5380 }, { "epoch": 0.51, "grad_norm": 0.28458960847371934, "learning_rate": 0.00017738283528715944, "loss": 1.053, "step": 5381 }, { "epoch": 0.51, "grad_norm": 0.2579176716986825, "learning_rate": 0.00017737281432695387, "loss": 1.0221, "step": 5382 }, { "epoch": 0.51, "grad_norm": 0.2682071919326544, "learning_rate": 0.000177362791430445, "loss": 1.0632, "step": 5383 }, { "epoch": 0.52, "grad_norm": 0.2817647505413598, "learning_rate": 0.00017735276659788365, "loss": 1.0892, "step": 5384 }, { "epoch": 0.52, "grad_norm": 0.27370042339496314, "learning_rate": 0.0001773427398295207, "loss": 1.1051, "step": 5385 }, { "epoch": 0.52, "grad_norm": 0.29120157753379705, "learning_rate": 0.00017733271112560707, "loss": 1.057, "step": 5386 }, { "epoch": 0.52, "grad_norm": 0.274235384389006, "learning_rate": 0.00017732268048639376, "loss": 1.1645, "step": 5387 }, { "epoch": 0.52, "grad_norm": 0.2554060673031894, "learning_rate": 0.00017731264791213177, "loss": 1.1206, "step": 5388 }, { "epoch": 0.52, "grad_norm": 0.29038444135208097, "learning_rate": 0.00017730261340307216, "loss": 1.0847, "step": 5389 }, { "epoch": 0.52, "grad_norm": 0.2927716873164211, "learning_rate": 0.00017729257695946608, "loss": 1.1842, "step": 5390 }, { "epoch": 0.52, "grad_norm": 0.28575627634335404, "learning_rate": 0.00017728253858156467, "loss": 0.9855, "step": 5391 }, { "epoch": 0.52, "grad_norm": 0.2741064932408415, "learning_rate": 0.0001772724982696192, "loss": 1.0681, "step": 5392 }, { "epoch": 0.52, "grad_norm": 0.3221555396964685, "learning_rate": 0.00017726245602388087, "loss": 1.1304, "step": 5393 }, { "epoch": 0.52, "grad_norm": 0.29940916808744256, "learning_rate": 0.00017725241184460101, "loss": 1.1972, "step": 5394 }, { "epoch": 0.52, "grad_norm": 0.2555325051925566, "learning_rate": 0.000177242365732031, "loss": 1.0663, "step": 5395 }, { "epoch": 0.52, "grad_norm": 0.29478956996412253, "learning_rate": 0.00017723231768642227, "loss": 1.1792, "step": 5396 }, { "epoch": 0.52, "grad_norm": 0.2691856309392673, "learning_rate": 0.0001772222677080262, "loss": 1.0935, "step": 5397 }, { "epoch": 0.52, "grad_norm": 0.26554481426737436, "learning_rate": 0.00017721221579709438, "loss": 1.1013, "step": 5398 }, { "epoch": 0.52, "grad_norm": 0.2974199816061973, "learning_rate": 0.00017720216195387834, "loss": 1.1026, "step": 5399 }, { "epoch": 0.52, "grad_norm": 0.25988903587070844, "learning_rate": 0.00017719210617862967, "loss": 0.9989, "step": 5400 }, { "epoch": 0.52, "grad_norm": 0.2644737230686996, "learning_rate": 0.00017718204847160004, "loss": 1.1928, "step": 5401 }, { "epoch": 0.52, "grad_norm": 0.321621116578329, "learning_rate": 0.0001771719888330411, "loss": 1.0699, "step": 5402 }, { "epoch": 0.52, "grad_norm": 0.3117270874799254, "learning_rate": 0.00017716192726320468, "loss": 1.0265, "step": 5403 }, { "epoch": 0.52, "grad_norm": 0.26397074260532366, "learning_rate": 0.0001771518637623425, "loss": 1.0595, "step": 5404 }, { "epoch": 0.52, "grad_norm": 0.26070999892086566, "learning_rate": 0.00017714179833070646, "loss": 0.9732, "step": 5405 }, { "epoch": 0.52, "grad_norm": 0.29516458483721847, "learning_rate": 0.00017713173096854846, "loss": 1.244, "step": 5406 }, { "epoch": 0.52, "grad_norm": 0.28006511868535894, "learning_rate": 0.0001771216616761204, "loss": 1.0229, "step": 5407 }, { "epoch": 0.52, "grad_norm": 0.2785160995661278, "learning_rate": 0.0001771115904536743, "loss": 1.0974, "step": 5408 }, { "epoch": 0.52, "grad_norm": 0.3039976285784444, "learning_rate": 0.00017710151730146215, "loss": 1.1096, "step": 5409 }, { "epoch": 0.52, "grad_norm": 0.2725502855772708, "learning_rate": 0.0001770914422197361, "loss": 1.0443, "step": 5410 }, { "epoch": 0.52, "grad_norm": 0.2968618423089058, "learning_rate": 0.00017708136520874822, "loss": 1.0383, "step": 5411 }, { "epoch": 0.52, "grad_norm": 0.2787402009547091, "learning_rate": 0.00017707128626875078, "loss": 1.1659, "step": 5412 }, { "epoch": 0.52, "grad_norm": 0.2873480043549193, "learning_rate": 0.00017706120539999595, "loss": 0.9287, "step": 5413 }, { "epoch": 0.52, "grad_norm": 0.27057828580034904, "learning_rate": 0.00017705112260273602, "loss": 0.9655, "step": 5414 }, { "epoch": 0.52, "grad_norm": 0.2705987981572107, "learning_rate": 0.00017704103787722332, "loss": 1.1033, "step": 5415 }, { "epoch": 0.52, "grad_norm": 0.2883167160040527, "learning_rate": 0.00017703095122371024, "loss": 1.1945, "step": 5416 }, { "epoch": 0.52, "grad_norm": 0.28040572260172153, "learning_rate": 0.00017702086264244918, "loss": 1.1136, "step": 5417 }, { "epoch": 0.52, "grad_norm": 0.23953916000277492, "learning_rate": 0.0001770107721336926, "loss": 1.127, "step": 5418 }, { "epoch": 0.52, "grad_norm": 0.2666697798476311, "learning_rate": 0.0001770006796976931, "loss": 1.1178, "step": 5419 }, { "epoch": 0.52, "grad_norm": 0.26595583947693385, "learning_rate": 0.00017699058533470318, "loss": 1.1537, "step": 5420 }, { "epoch": 0.52, "grad_norm": 0.3018520026520265, "learning_rate": 0.00017698048904497547, "loss": 1.0672, "step": 5421 }, { "epoch": 0.52, "grad_norm": 0.2585809987651865, "learning_rate": 0.00017697039082876264, "loss": 0.9036, "step": 5422 }, { "epoch": 0.52, "grad_norm": 0.30520095394033386, "learning_rate": 0.0001769602906863174, "loss": 1.1266, "step": 5423 }, { "epoch": 0.52, "grad_norm": 0.26703193999809494, "learning_rate": 0.00017695018861789254, "loss": 1.0113, "step": 5424 }, { "epoch": 0.52, "grad_norm": 0.2665069045940571, "learning_rate": 0.00017694008462374082, "loss": 1.1435, "step": 5425 }, { "epoch": 0.52, "grad_norm": 0.3066957091926963, "learning_rate": 0.00017692997870411513, "loss": 1.1477, "step": 5426 }, { "epoch": 0.52, "grad_norm": 0.3060426948221189, "learning_rate": 0.0001769198708592684, "loss": 1.0636, "step": 5427 }, { "epoch": 0.52, "grad_norm": 0.31035289985606923, "learning_rate": 0.00017690976108945353, "loss": 1.1531, "step": 5428 }, { "epoch": 0.52, "grad_norm": 0.27105532382356223, "learning_rate": 0.00017689964939492358, "loss": 1.1738, "step": 5429 }, { "epoch": 0.52, "grad_norm": 0.30441191653566924, "learning_rate": 0.00017688953577593158, "loss": 1.272, "step": 5430 }, { "epoch": 0.52, "grad_norm": 0.30963973002090833, "learning_rate": 0.0001768794202327306, "loss": 1.2062, "step": 5431 }, { "epoch": 0.52, "grad_norm": 0.27923640796457394, "learning_rate": 0.0001768693027655738, "loss": 1.1393, "step": 5432 }, { "epoch": 0.52, "grad_norm": 0.2714745780737401, "learning_rate": 0.00017685918337471442, "loss": 1.0229, "step": 5433 }, { "epoch": 0.52, "grad_norm": 0.2746763330279501, "learning_rate": 0.00017684906206040567, "loss": 1.025, "step": 5434 }, { "epoch": 0.52, "grad_norm": 0.25420475121865566, "learning_rate": 0.0001768389388229008, "loss": 1.0173, "step": 5435 }, { "epoch": 0.52, "grad_norm": 0.23858794911194617, "learning_rate": 0.00017682881366245322, "loss": 1.0893, "step": 5436 }, { "epoch": 0.52, "grad_norm": 0.30224482212537046, "learning_rate": 0.0001768186865793163, "loss": 1.1222, "step": 5437 }, { "epoch": 0.52, "grad_norm": 0.23278958691465643, "learning_rate": 0.00017680855757374345, "loss": 1.0458, "step": 5438 }, { "epoch": 0.52, "grad_norm": 0.26428873295787486, "learning_rate": 0.0001767984266459882, "loss": 1.0792, "step": 5439 }, { "epoch": 0.52, "grad_norm": 0.2594486178953975, "learning_rate": 0.00017678829379630406, "loss": 1.0737, "step": 5440 }, { "epoch": 0.52, "grad_norm": 0.2743596761285101, "learning_rate": 0.0001767781590249446, "loss": 1.0216, "step": 5441 }, { "epoch": 0.52, "grad_norm": 0.3014933928943263, "learning_rate": 0.00017676802233216346, "loss": 1.0686, "step": 5442 }, { "epoch": 0.52, "grad_norm": 0.30975481238566765, "learning_rate": 0.00017675788371821432, "loss": 1.1826, "step": 5443 }, { "epoch": 0.52, "grad_norm": 0.24533869531288546, "learning_rate": 0.00017674774318335085, "loss": 1.162, "step": 5444 }, { "epoch": 0.52, "grad_norm": 0.27791112618813957, "learning_rate": 0.0001767376007278269, "loss": 0.9609, "step": 5445 }, { "epoch": 0.52, "grad_norm": 0.31278080093016536, "learning_rate": 0.00017672745635189633, "loss": 1.1661, "step": 5446 }, { "epoch": 0.52, "grad_norm": 0.29874080218471216, "learning_rate": 0.00017671731005581287, "loss": 1.1068, "step": 5447 }, { "epoch": 0.52, "grad_norm": 0.2569503493201101, "learning_rate": 0.0001767071618398305, "loss": 1.0538, "step": 5448 }, { "epoch": 0.52, "grad_norm": 0.29826323414906175, "learning_rate": 0.00017669701170420322, "loss": 1.1264, "step": 5449 }, { "epoch": 0.52, "grad_norm": 0.2812826097473197, "learning_rate": 0.00017668685964918504, "loss": 1.0982, "step": 5450 }, { "epoch": 0.52, "grad_norm": 0.2973251253473092, "learning_rate": 0.00017667670567502998, "loss": 1.0728, "step": 5451 }, { "epoch": 0.52, "grad_norm": 0.24818251204984365, "learning_rate": 0.0001766665497819922, "loss": 1.0567, "step": 5452 }, { "epoch": 0.52, "grad_norm": 0.2884278666490467, "learning_rate": 0.00017665639197032582, "loss": 1.0685, "step": 5453 }, { "epoch": 0.52, "grad_norm": 0.262999153831761, "learning_rate": 0.00017664623224028503, "loss": 0.9473, "step": 5454 }, { "epoch": 0.52, "grad_norm": 0.3152269030127414, "learning_rate": 0.0001766360705921241, "loss": 1.169, "step": 5455 }, { "epoch": 0.52, "grad_norm": 0.25251227559062966, "learning_rate": 0.00017662590702609737, "loss": 1.0352, "step": 5456 }, { "epoch": 0.52, "grad_norm": 0.32106633074490737, "learning_rate": 0.00017661574154245914, "loss": 1.0894, "step": 5457 }, { "epoch": 0.52, "grad_norm": 0.2693010524825926, "learning_rate": 0.00017660557414146384, "loss": 1.0703, "step": 5458 }, { "epoch": 0.52, "grad_norm": 0.33501583797096984, "learning_rate": 0.0001765954048233659, "loss": 1.2, "step": 5459 }, { "epoch": 0.52, "grad_norm": 0.2766667108631253, "learning_rate": 0.0001765852335884198, "loss": 1.1096, "step": 5460 }, { "epoch": 0.52, "grad_norm": 0.28642621560137427, "learning_rate": 0.0001765750604368801, "loss": 1.0589, "step": 5461 }, { "epoch": 0.52, "grad_norm": 0.29446092888521896, "learning_rate": 0.0001765648853690014, "loss": 1.0758, "step": 5462 }, { "epoch": 0.52, "grad_norm": 0.26574617011082774, "learning_rate": 0.00017655470838503834, "loss": 1.0517, "step": 5463 }, { "epoch": 0.52, "grad_norm": 0.29380273713342564, "learning_rate": 0.00017654452948524555, "loss": 1.1355, "step": 5464 }, { "epoch": 0.52, "grad_norm": 0.26610600805445106, "learning_rate": 0.00017653434866987783, "loss": 1.156, "step": 5465 }, { "epoch": 0.52, "grad_norm": 0.23226425198871892, "learning_rate": 0.00017652416593918994, "loss": 1.1446, "step": 5466 }, { "epoch": 0.52, "grad_norm": 0.3136944620672886, "learning_rate": 0.00017651398129343667, "loss": 1.1359, "step": 5467 }, { "epoch": 0.52, "grad_norm": 0.29970255604775625, "learning_rate": 0.00017650379473287296, "loss": 1.0718, "step": 5468 }, { "epoch": 0.52, "grad_norm": 0.2782035372010567, "learning_rate": 0.0001764936062577537, "loss": 1.0908, "step": 5469 }, { "epoch": 0.52, "grad_norm": 0.2616477257641661, "learning_rate": 0.00017648341586833387, "loss": 0.9107, "step": 5470 }, { "epoch": 0.52, "grad_norm": 0.2663523876553537, "learning_rate": 0.00017647322356486848, "loss": 0.9981, "step": 5471 }, { "epoch": 0.52, "grad_norm": 0.26092658264988006, "learning_rate": 0.0001764630293476126, "loss": 1.1115, "step": 5472 }, { "epoch": 0.52, "grad_norm": 0.27133782980711774, "learning_rate": 0.0001764528332168214, "loss": 1.1506, "step": 5473 }, { "epoch": 0.52, "grad_norm": 0.29494184409148894, "learning_rate": 0.00017644263517274997, "loss": 1.0381, "step": 5474 }, { "epoch": 0.52, "grad_norm": 0.293344464237028, "learning_rate": 0.00017643243521565355, "loss": 1.0955, "step": 5475 }, { "epoch": 0.52, "grad_norm": 0.3113070820581294, "learning_rate": 0.0001764222333457874, "loss": 0.9733, "step": 5476 }, { "epoch": 0.52, "grad_norm": 0.2686929294263848, "learning_rate": 0.00017641202956340685, "loss": 1.141, "step": 5477 }, { "epoch": 0.52, "grad_norm": 0.2963778388036239, "learning_rate": 0.0001764018238687672, "loss": 1.049, "step": 5478 }, { "epoch": 0.52, "grad_norm": 0.27712005782523735, "learning_rate": 0.00017639161626212393, "loss": 0.9666, "step": 5479 }, { "epoch": 0.52, "grad_norm": 0.25074497020772385, "learning_rate": 0.00017638140674373245, "loss": 1.091, "step": 5480 }, { "epoch": 0.52, "grad_norm": 0.27481911625527383, "learning_rate": 0.00017637119531384822, "loss": 0.9804, "step": 5481 }, { "epoch": 0.52, "grad_norm": 0.30326990661840897, "learning_rate": 0.00017636098197272687, "loss": 1.0196, "step": 5482 }, { "epoch": 0.52, "grad_norm": 0.2536248159519646, "learning_rate": 0.00017635076672062395, "loss": 1.0655, "step": 5483 }, { "epoch": 0.52, "grad_norm": 0.24871071887818122, "learning_rate": 0.0001763405495577951, "loss": 1.0337, "step": 5484 }, { "epoch": 0.52, "grad_norm": 0.27076559378833587, "learning_rate": 0.00017633033048449607, "loss": 1.0868, "step": 5485 }, { "epoch": 0.52, "grad_norm": 0.26447743099546034, "learning_rate": 0.00017632010950098247, "loss": 1.1067, "step": 5486 }, { "epoch": 0.52, "grad_norm": 0.2354573802397408, "learning_rate": 0.00017630988660751018, "loss": 0.9972, "step": 5487 }, { "epoch": 0.53, "grad_norm": 0.31657477736419243, "learning_rate": 0.00017629966180433503, "loss": 1.1436, "step": 5488 }, { "epoch": 0.53, "grad_norm": 0.31018143648114505, "learning_rate": 0.0001762894350917129, "loss": 1.1186, "step": 5489 }, { "epoch": 0.53, "grad_norm": 0.2928001553899392, "learning_rate": 0.00017627920646989971, "loss": 1.1422, "step": 5490 }, { "epoch": 0.53, "grad_norm": 0.30032975451874694, "learning_rate": 0.00017626897593915142, "loss": 1.0623, "step": 5491 }, { "epoch": 0.53, "grad_norm": 0.3019231833502392, "learning_rate": 0.0001762587434997241, "loss": 1.0632, "step": 5492 }, { "epoch": 0.53, "grad_norm": 0.2647986946052283, "learning_rate": 0.0001762485091518738, "loss": 1.0093, "step": 5493 }, { "epoch": 0.53, "grad_norm": 0.2490169986270707, "learning_rate": 0.0001762382728958566, "loss": 1.1866, "step": 5494 }, { "epoch": 0.53, "grad_norm": 0.27623003094440113, "learning_rate": 0.00017622803473192874, "loss": 1.0377, "step": 5495 }, { "epoch": 0.53, "grad_norm": 0.29379957830634423, "learning_rate": 0.0001762177946603464, "loss": 1.1129, "step": 5496 }, { "epoch": 0.53, "grad_norm": 0.26006713944566145, "learning_rate": 0.00017620755268136584, "loss": 1.0786, "step": 5497 }, { "epoch": 0.53, "grad_norm": 0.2984517115544308, "learning_rate": 0.00017619730879524337, "loss": 1.145, "step": 5498 }, { "epoch": 0.53, "grad_norm": 0.2634404683614875, "learning_rate": 0.00017618706300223536, "loss": 1.0225, "step": 5499 }, { "epoch": 0.53, "grad_norm": 0.2770749610740461, "learning_rate": 0.00017617681530259822, "loss": 1.0321, "step": 5500 }, { "epoch": 0.53, "grad_norm": 0.29909952213865876, "learning_rate": 0.00017616656569658843, "loss": 0.9445, "step": 5501 }, { "epoch": 0.53, "grad_norm": 0.28919886120746485, "learning_rate": 0.00017615631418446242, "loss": 1.0648, "step": 5502 }, { "epoch": 0.53, "grad_norm": 0.28680714124802115, "learning_rate": 0.00017614606076647683, "loss": 1.1729, "step": 5503 }, { "epoch": 0.53, "grad_norm": 0.3018876386617487, "learning_rate": 0.00017613580544288817, "loss": 0.9817, "step": 5504 }, { "epoch": 0.53, "grad_norm": 0.28707084465144506, "learning_rate": 0.00017612554821395314, "loss": 1.1636, "step": 5505 }, { "epoch": 0.53, "grad_norm": 0.31719224150490105, "learning_rate": 0.00017611528907992844, "loss": 1.0808, "step": 5506 }, { "epoch": 0.53, "grad_norm": 0.2625161917732417, "learning_rate": 0.00017610502804107082, "loss": 1.1023, "step": 5507 }, { "epoch": 0.53, "grad_norm": 0.2872356902170121, "learning_rate": 0.00017609476509763698, "loss": 1.2038, "step": 5508 }, { "epoch": 0.53, "grad_norm": 0.25936712334845474, "learning_rate": 0.00017608450024988382, "loss": 0.9567, "step": 5509 }, { "epoch": 0.53, "grad_norm": 0.2791045831669703, "learning_rate": 0.0001760742334980683, "loss": 1.1003, "step": 5510 }, { "epoch": 0.53, "grad_norm": 0.2647336003050076, "learning_rate": 0.00017606396484244721, "loss": 1.1102, "step": 5511 }, { "epoch": 0.53, "grad_norm": 0.27574681080006713, "learning_rate": 0.00017605369428327761, "loss": 1.048, "step": 5512 }, { "epoch": 0.53, "grad_norm": 0.27994961262509854, "learning_rate": 0.00017604342182081653, "loss": 0.9866, "step": 5513 }, { "epoch": 0.53, "grad_norm": 0.261516937266442, "learning_rate": 0.000176033147455321, "loss": 1.0791, "step": 5514 }, { "epoch": 0.53, "grad_norm": 0.2995061891654347, "learning_rate": 0.0001760228711870482, "loss": 1.1309, "step": 5515 }, { "epoch": 0.53, "grad_norm": 0.2820497610571214, "learning_rate": 0.00017601259301625524, "loss": 1.0, "step": 5516 }, { "epoch": 0.53, "grad_norm": 0.24255160815288854, "learning_rate": 0.0001760023129431994, "loss": 1.0015, "step": 5517 }, { "epoch": 0.53, "grad_norm": 0.25094239441876454, "learning_rate": 0.0001759920309681379, "loss": 1.0133, "step": 5518 }, { "epoch": 0.53, "grad_norm": 0.283208208913356, "learning_rate": 0.00017598174709132803, "loss": 1.1402, "step": 5519 }, { "epoch": 0.53, "grad_norm": 0.2826249410012862, "learning_rate": 0.00017597146131302722, "loss": 1.0725, "step": 5520 }, { "epoch": 0.53, "grad_norm": 0.28680753301102535, "learning_rate": 0.00017596117363349282, "loss": 1.1058, "step": 5521 }, { "epoch": 0.53, "grad_norm": 0.2915298750178331, "learning_rate": 0.00017595088405298234, "loss": 1.0234, "step": 5522 }, { "epoch": 0.53, "grad_norm": 0.29543680871161176, "learning_rate": 0.00017594059257175325, "loss": 1.1357, "step": 5523 }, { "epoch": 0.53, "grad_norm": 0.2734660068763002, "learning_rate": 0.0001759302991900631, "loss": 0.9803, "step": 5524 }, { "epoch": 0.53, "grad_norm": 0.29698197284064065, "learning_rate": 0.0001759200039081695, "loss": 1.0425, "step": 5525 }, { "epoch": 0.53, "grad_norm": 0.28542474538774965, "learning_rate": 0.00017590970672633007, "loss": 1.0379, "step": 5526 }, { "epoch": 0.53, "grad_norm": 0.2891844662811193, "learning_rate": 0.00017589940764480252, "loss": 1.0073, "step": 5527 }, { "epoch": 0.53, "grad_norm": 0.28262400994939874, "learning_rate": 0.00017588910666384462, "loss": 1.0782, "step": 5528 }, { "epoch": 0.53, "grad_norm": 0.2780628600213389, "learning_rate": 0.00017587880378371412, "loss": 1.0688, "step": 5529 }, { "epoch": 0.53, "grad_norm": 0.2884519639543894, "learning_rate": 0.00017586849900466883, "loss": 1.0724, "step": 5530 }, { "epoch": 0.53, "grad_norm": 0.2815574093742316, "learning_rate": 0.00017585819232696675, "loss": 1.0518, "step": 5531 }, { "epoch": 0.53, "grad_norm": 0.28038754616537154, "learning_rate": 0.00017584788375086565, "loss": 1.0102, "step": 5532 }, { "epoch": 0.53, "grad_norm": 0.2525746581044452, "learning_rate": 0.00017583757327662363, "loss": 1.0554, "step": 5533 }, { "epoch": 0.53, "grad_norm": 0.2922109980484516, "learning_rate": 0.00017582726090449867, "loss": 1.1027, "step": 5534 }, { "epoch": 0.53, "grad_norm": 0.2751484254089623, "learning_rate": 0.00017581694663474886, "loss": 1.0159, "step": 5535 }, { "epoch": 0.53, "grad_norm": 0.26863647079539626, "learning_rate": 0.00017580663046763231, "loss": 1.0621, "step": 5536 }, { "epoch": 0.53, "grad_norm": 0.2985191792690513, "learning_rate": 0.00017579631240340716, "loss": 1.1758, "step": 5537 }, { "epoch": 0.53, "grad_norm": 0.27614555889342746, "learning_rate": 0.00017578599244233168, "loss": 1.061, "step": 5538 }, { "epoch": 0.53, "grad_norm": 0.25905510867782044, "learning_rate": 0.00017577567058466414, "loss": 1.1777, "step": 5539 }, { "epoch": 0.53, "grad_norm": 0.3278506380359264, "learning_rate": 0.00017576534683066278, "loss": 1.1552, "step": 5540 }, { "epoch": 0.53, "grad_norm": 0.273031516195577, "learning_rate": 0.000175755021180586, "loss": 1.1249, "step": 5541 }, { "epoch": 0.53, "grad_norm": 0.29918764475128873, "learning_rate": 0.00017574469363469222, "loss": 1.0937, "step": 5542 }, { "epoch": 0.53, "grad_norm": 0.2753312938615492, "learning_rate": 0.00017573436419323986, "loss": 1.1123, "step": 5543 }, { "epoch": 0.53, "grad_norm": 0.3076985822417243, "learning_rate": 0.00017572403285648743, "loss": 1.1392, "step": 5544 }, { "epoch": 0.53, "grad_norm": 0.2858977995760822, "learning_rate": 0.00017571369962469352, "loss": 1.0683, "step": 5545 }, { "epoch": 0.53, "grad_norm": 0.2826635982664086, "learning_rate": 0.00017570336449811667, "loss": 1.01, "step": 5546 }, { "epoch": 0.53, "grad_norm": 0.25331688616473097, "learning_rate": 0.00017569302747701558, "loss": 1.0202, "step": 5547 }, { "epoch": 0.53, "grad_norm": 0.25299991837835445, "learning_rate": 0.00017568268856164886, "loss": 0.9968, "step": 5548 }, { "epoch": 0.53, "grad_norm": 0.26907253785105184, "learning_rate": 0.00017567234775227533, "loss": 1.0216, "step": 5549 }, { "epoch": 0.53, "grad_norm": 0.2638154646751891, "learning_rate": 0.0001756620050491537, "loss": 1.169, "step": 5550 }, { "epoch": 0.53, "grad_norm": 0.2674914565002557, "learning_rate": 0.0001756516604525429, "loss": 1.0112, "step": 5551 }, { "epoch": 0.53, "grad_norm": 0.27204031327263634, "learning_rate": 0.00017564131396270168, "loss": 1.0392, "step": 5552 }, { "epoch": 0.53, "grad_norm": 0.2808788802646591, "learning_rate": 0.0001756309655798891, "loss": 1.1142, "step": 5553 }, { "epoch": 0.53, "grad_norm": 0.2737824930333972, "learning_rate": 0.00017562061530436405, "loss": 1.1227, "step": 5554 }, { "epoch": 0.53, "grad_norm": 0.28525600932665507, "learning_rate": 0.00017561026313638557, "loss": 1.0276, "step": 5555 }, { "epoch": 0.53, "grad_norm": 0.26176420613090035, "learning_rate": 0.00017559990907621274, "loss": 1.1253, "step": 5556 }, { "epoch": 0.53, "grad_norm": 0.25918345855645814, "learning_rate": 0.00017558955312410468, "loss": 1.1035, "step": 5557 }, { "epoch": 0.53, "grad_norm": 0.29529919222022755, "learning_rate": 0.00017557919528032054, "loss": 1.1018, "step": 5558 }, { "epoch": 0.53, "grad_norm": 0.28939151113442757, "learning_rate": 0.00017556883554511953, "loss": 1.0248, "step": 5559 }, { "epoch": 0.53, "grad_norm": 0.3092935541500706, "learning_rate": 0.00017555847391876093, "loss": 1.133, "step": 5560 }, { "epoch": 0.53, "grad_norm": 0.268156083959369, "learning_rate": 0.00017554811040150403, "loss": 1.1213, "step": 5561 }, { "epoch": 0.53, "grad_norm": 0.2621845882914305, "learning_rate": 0.0001755377449936082, "loss": 1.0878, "step": 5562 }, { "epoch": 0.53, "grad_norm": 0.2661667850236094, "learning_rate": 0.0001755273776953328, "loss": 1.0683, "step": 5563 }, { "epoch": 0.53, "grad_norm": 0.25877418531921775, "learning_rate": 0.00017551700850693732, "loss": 1.1574, "step": 5564 }, { "epoch": 0.53, "grad_norm": 0.28288655943020663, "learning_rate": 0.00017550663742868126, "loss": 1.1328, "step": 5565 }, { "epoch": 0.53, "grad_norm": 0.26185751105253613, "learning_rate": 0.00017549626446082412, "loss": 0.973, "step": 5566 }, { "epoch": 0.53, "grad_norm": 0.295683213989208, "learning_rate": 0.0001754858896036255, "loss": 0.9697, "step": 5567 }, { "epoch": 0.53, "grad_norm": 0.3264307309216949, "learning_rate": 0.0001754755128573451, "loss": 1.0159, "step": 5568 }, { "epoch": 0.53, "grad_norm": 0.2753013510908348, "learning_rate": 0.00017546513422224253, "loss": 1.0423, "step": 5569 }, { "epoch": 0.53, "grad_norm": 0.25968975647863907, "learning_rate": 0.00017545475369857755, "loss": 1.1648, "step": 5570 }, { "epoch": 0.53, "grad_norm": 0.2804203410262089, "learning_rate": 0.00017544437128660993, "loss": 1.0268, "step": 5571 }, { "epoch": 0.53, "grad_norm": 0.31239158563885117, "learning_rate": 0.0001754339869865995, "loss": 1.1495, "step": 5572 }, { "epoch": 0.53, "grad_norm": 0.2733500389923679, "learning_rate": 0.00017542360079880615, "loss": 1.1128, "step": 5573 }, { "epoch": 0.53, "grad_norm": 0.24320614149769154, "learning_rate": 0.00017541321272348978, "loss": 1.0674, "step": 5574 }, { "epoch": 0.53, "grad_norm": 0.3181218743470043, "learning_rate": 0.00017540282276091039, "loss": 1.0091, "step": 5575 }, { "epoch": 0.53, "grad_norm": 0.3031447060986254, "learning_rate": 0.00017539243091132793, "loss": 1.1715, "step": 5576 }, { "epoch": 0.53, "grad_norm": 0.34380612538960487, "learning_rate": 0.00017538203717500252, "loss": 0.9964, "step": 5577 }, { "epoch": 0.53, "grad_norm": 0.2661056584426639, "learning_rate": 0.00017537164155219428, "loss": 1.0928, "step": 5578 }, { "epoch": 0.53, "grad_norm": 0.26522368104967226, "learning_rate": 0.0001753612440431633, "loss": 1.0348, "step": 5579 }, { "epoch": 0.53, "grad_norm": 0.28303690003405435, "learning_rate": 0.00017535084464816985, "loss": 1.0077, "step": 5580 }, { "epoch": 0.53, "grad_norm": 0.29888676081156723, "learning_rate": 0.00017534044336747418, "loss": 0.9651, "step": 5581 }, { "epoch": 0.53, "grad_norm": 0.23610813701308117, "learning_rate": 0.00017533004020133653, "loss": 1.0869, "step": 5582 }, { "epoch": 0.53, "grad_norm": 0.2473777150573363, "learning_rate": 0.00017531963515001725, "loss": 1.0581, "step": 5583 }, { "epoch": 0.53, "grad_norm": 0.2878283161585639, "learning_rate": 0.00017530922821377683, "loss": 1.1182, "step": 5584 }, { "epoch": 0.53, "grad_norm": 0.2895723267837339, "learning_rate": 0.0001752988193928756, "loss": 1.0193, "step": 5585 }, { "epoch": 0.53, "grad_norm": 0.25924626472676504, "learning_rate": 0.00017528840868757413, "loss": 1.0764, "step": 5586 }, { "epoch": 0.53, "grad_norm": 0.2697046584787591, "learning_rate": 0.00017527799609813287, "loss": 1.2123, "step": 5587 }, { "epoch": 0.53, "grad_norm": 0.2779570564976789, "learning_rate": 0.00017526758162481247, "loss": 1.0463, "step": 5588 }, { "epoch": 0.53, "grad_norm": 0.2659291393898864, "learning_rate": 0.00017525716526787353, "loss": 0.8657, "step": 5589 }, { "epoch": 0.53, "grad_norm": 0.24940778956498255, "learning_rate": 0.00017524674702757676, "loss": 0.9349, "step": 5590 }, { "epoch": 0.53, "grad_norm": 0.3512612443921498, "learning_rate": 0.00017523632690418281, "loss": 1.0309, "step": 5591 }, { "epoch": 0.53, "grad_norm": 0.27949443291870935, "learning_rate": 0.0001752259048979525, "loss": 1.0867, "step": 5592 }, { "epoch": 0.54, "grad_norm": 0.25241117168172916, "learning_rate": 0.00017521548100914668, "loss": 0.9595, "step": 5593 }, { "epoch": 0.54, "grad_norm": 0.31177379926003695, "learning_rate": 0.00017520505523802615, "loss": 1.1147, "step": 5594 }, { "epoch": 0.54, "grad_norm": 0.3035376452103113, "learning_rate": 0.00017519462758485186, "loss": 1.1091, "step": 5595 }, { "epoch": 0.54, "grad_norm": 0.28334338339825155, "learning_rate": 0.00017518419804988473, "loss": 1.0027, "step": 5596 }, { "epoch": 0.54, "grad_norm": 0.27635425273795633, "learning_rate": 0.00017517376663338583, "loss": 1.0832, "step": 5597 }, { "epoch": 0.54, "grad_norm": 0.27479092014996065, "learning_rate": 0.00017516333333561615, "loss": 1.0254, "step": 5598 }, { "epoch": 0.54, "grad_norm": 0.2923804540268794, "learning_rate": 0.00017515289815683683, "loss": 1.0939, "step": 5599 }, { "epoch": 0.54, "grad_norm": 0.2689789333872261, "learning_rate": 0.000175142461097309, "loss": 1.0924, "step": 5600 }, { "epoch": 0.54, "grad_norm": 0.2712229566662846, "learning_rate": 0.00017513202215729384, "loss": 1.1212, "step": 5601 }, { "epoch": 0.54, "grad_norm": 0.28499521398979466, "learning_rate": 0.0001751215813370526, "loss": 1.1271, "step": 5602 }, { "epoch": 0.54, "grad_norm": 0.272591489298431, "learning_rate": 0.00017511113863684662, "loss": 1.1602, "step": 5603 }, { "epoch": 0.54, "grad_norm": 0.3008741005719719, "learning_rate": 0.00017510069405693714, "loss": 1.115, "step": 5604 }, { "epoch": 0.54, "grad_norm": 0.2531166951692093, "learning_rate": 0.00017509024759758561, "loss": 1.0802, "step": 5605 }, { "epoch": 0.54, "grad_norm": 0.24148412845313758, "learning_rate": 0.00017507979925905347, "loss": 0.953, "step": 5606 }, { "epoch": 0.54, "grad_norm": 0.2534357412942829, "learning_rate": 0.00017506934904160213, "loss": 1.0784, "step": 5607 }, { "epoch": 0.54, "grad_norm": 0.270455450479634, "learning_rate": 0.00017505889694549316, "loss": 1.0495, "step": 5608 }, { "epoch": 0.54, "grad_norm": 0.2718743775512695, "learning_rate": 0.00017504844297098812, "loss": 1.1477, "step": 5609 }, { "epoch": 0.54, "grad_norm": 0.2627508977482193, "learning_rate": 0.00017503798711834863, "loss": 0.9724, "step": 5610 }, { "epoch": 0.54, "grad_norm": 0.28688718615510195, "learning_rate": 0.00017502752938783637, "loss": 1.0391, "step": 5611 }, { "epoch": 0.54, "grad_norm": 0.3088622269070572, "learning_rate": 0.000175017069779713, "loss": 1.07, "step": 5612 }, { "epoch": 0.54, "grad_norm": 0.2519490699405952, "learning_rate": 0.00017500660829424035, "loss": 0.9973, "step": 5613 }, { "epoch": 0.54, "grad_norm": 0.2593258598031295, "learning_rate": 0.00017499614493168017, "loss": 1.1488, "step": 5614 }, { "epoch": 0.54, "grad_norm": 0.28609091063527664, "learning_rate": 0.00017498567969229432, "loss": 1.1316, "step": 5615 }, { "epoch": 0.54, "grad_norm": 0.2876241038098421, "learning_rate": 0.00017497521257634472, "loss": 1.142, "step": 5616 }, { "epoch": 0.54, "grad_norm": 0.2699264999096545, "learning_rate": 0.0001749647435840933, "loss": 1.1133, "step": 5617 }, { "epoch": 0.54, "grad_norm": 0.23212212244543703, "learning_rate": 0.00017495427271580207, "loss": 1.0177, "step": 5618 }, { "epoch": 0.54, "grad_norm": 0.3191035435173965, "learning_rate": 0.00017494379997173306, "loss": 1.0753, "step": 5619 }, { "epoch": 0.54, "grad_norm": 0.28382503691739164, "learning_rate": 0.00017493332535214835, "loss": 1.0277, "step": 5620 }, { "epoch": 0.54, "grad_norm": 0.2620982386743405, "learning_rate": 0.00017492284885731006, "loss": 1.1087, "step": 5621 }, { "epoch": 0.54, "grad_norm": 0.24048768135309437, "learning_rate": 0.00017491237048748042, "loss": 1.0179, "step": 5622 }, { "epoch": 0.54, "grad_norm": 0.29297621702120474, "learning_rate": 0.00017490189024292157, "loss": 1.031, "step": 5623 }, { "epoch": 0.54, "grad_norm": 0.24169868093525804, "learning_rate": 0.00017489140812389591, "loss": 1.1275, "step": 5624 }, { "epoch": 0.54, "grad_norm": 0.3265517682655214, "learning_rate": 0.00017488092413066566, "loss": 1.09, "step": 5625 }, { "epoch": 0.54, "grad_norm": 0.2889070937315453, "learning_rate": 0.00017487043826349324, "loss": 1.0827, "step": 5626 }, { "epoch": 0.54, "grad_norm": 0.2616726142921512, "learning_rate": 0.00017485995052264107, "loss": 1.1226, "step": 5627 }, { "epoch": 0.54, "grad_norm": 0.2823144964930083, "learning_rate": 0.00017484946090837153, "loss": 0.9189, "step": 5628 }, { "epoch": 0.54, "grad_norm": 0.26247896162782614, "learning_rate": 0.0001748389694209472, "loss": 1.0863, "step": 5629 }, { "epoch": 0.54, "grad_norm": 0.29529068834973227, "learning_rate": 0.0001748284760606307, "loss": 1.1572, "step": 5630 }, { "epoch": 0.54, "grad_norm": 0.2725314072487987, "learning_rate": 0.00017481798082768447, "loss": 1.0527, "step": 5631 }, { "epoch": 0.54, "grad_norm": 0.3268266589015598, "learning_rate": 0.0001748074837223713, "loss": 1.1146, "step": 5632 }, { "epoch": 0.54, "grad_norm": 0.31412407097134504, "learning_rate": 0.0001747969847449538, "loss": 1.0571, "step": 5633 }, { "epoch": 0.54, "grad_norm": 0.3129044080305532, "learning_rate": 0.0001747864838956948, "loss": 1.1354, "step": 5634 }, { "epoch": 0.54, "grad_norm": 0.26484366612789245, "learning_rate": 0.00017477598117485697, "loss": 1.0219, "step": 5635 }, { "epoch": 0.54, "grad_norm": 0.2881609277060793, "learning_rate": 0.00017476547658270327, "loss": 1.0661, "step": 5636 }, { "epoch": 0.54, "grad_norm": 0.2785791649184835, "learning_rate": 0.0001747549701194965, "loss": 1.009, "step": 5637 }, { "epoch": 0.54, "grad_norm": 0.2781943063291394, "learning_rate": 0.00017474446178549963, "loss": 1.1618, "step": 5638 }, { "epoch": 0.54, "grad_norm": 0.286700570393826, "learning_rate": 0.00017473395158097566, "loss": 1.1184, "step": 5639 }, { "epoch": 0.54, "grad_norm": 0.332805537003659, "learning_rate": 0.00017472343950618755, "loss": 1.0486, "step": 5640 }, { "epoch": 0.54, "grad_norm": 0.27971090508186786, "learning_rate": 0.0001747129255613984, "loss": 1.1044, "step": 5641 }, { "epoch": 0.54, "grad_norm": 0.28836371050426973, "learning_rate": 0.00017470240974687133, "loss": 1.0402, "step": 5642 }, { "epoch": 0.54, "grad_norm": 0.30716094757551443, "learning_rate": 0.00017469189206286952, "loss": 1.113, "step": 5643 }, { "epoch": 0.54, "grad_norm": 0.2743312373131173, "learning_rate": 0.00017468137250965617, "loss": 1.0299, "step": 5644 }, { "epoch": 0.54, "grad_norm": 0.2609975564796487, "learning_rate": 0.00017467085108749454, "loss": 1.104, "step": 5645 }, { "epoch": 0.54, "grad_norm": 0.29364822283678577, "learning_rate": 0.0001746603277966479, "loss": 1.0865, "step": 5646 }, { "epoch": 0.54, "grad_norm": 0.2745578477787385, "learning_rate": 0.00017464980263737968, "loss": 1.1527, "step": 5647 }, { "epoch": 0.54, "grad_norm": 0.26626720698396383, "learning_rate": 0.00017463927560995321, "loss": 1.0189, "step": 5648 }, { "epoch": 0.54, "grad_norm": 0.27709851029020705, "learning_rate": 0.000174628746714632, "loss": 1.1241, "step": 5649 }, { "epoch": 0.54, "grad_norm": 0.306265199776124, "learning_rate": 0.00017461821595167945, "loss": 1.1816, "step": 5650 }, { "epoch": 0.54, "grad_norm": 0.23094636122975018, "learning_rate": 0.00017460768332135918, "loss": 1.0002, "step": 5651 }, { "epoch": 0.54, "grad_norm": 0.25739454457437366, "learning_rate": 0.00017459714882393473, "loss": 1.0748, "step": 5652 }, { "epoch": 0.54, "grad_norm": 0.24839067246041194, "learning_rate": 0.00017458661245966974, "loss": 1.1297, "step": 5653 }, { "epoch": 0.54, "grad_norm": 0.2592947317889122, "learning_rate": 0.0001745760742288279, "loss": 1.1097, "step": 5654 }, { "epoch": 0.54, "grad_norm": 0.29910161986761424, "learning_rate": 0.00017456553413167293, "loss": 1.0156, "step": 5655 }, { "epoch": 0.54, "grad_norm": 0.2740550204065586, "learning_rate": 0.00017455499216846864, "loss": 1.1618, "step": 5656 }, { "epoch": 0.54, "grad_norm": 0.2773348179265606, "learning_rate": 0.00017454444833947877, "loss": 1.0988, "step": 5657 }, { "epoch": 0.54, "grad_norm": 0.2813065691260808, "learning_rate": 0.00017453390264496728, "loss": 0.9826, "step": 5658 }, { "epoch": 0.54, "grad_norm": 0.2775051391562762, "learning_rate": 0.000174523355085198, "loss": 1.0356, "step": 5659 }, { "epoch": 0.54, "grad_norm": 0.28015989674393643, "learning_rate": 0.00017451280566043492, "loss": 1.1507, "step": 5660 }, { "epoch": 0.54, "grad_norm": 0.3011891640480555, "learning_rate": 0.00017450225437094208, "loss": 1.16, "step": 5661 }, { "epoch": 0.54, "grad_norm": 0.38100561539213623, "learning_rate": 0.00017449170121698347, "loss": 1.0236, "step": 5662 }, { "epoch": 0.54, "grad_norm": 0.27806057374613613, "learning_rate": 0.00017448114619882321, "loss": 1.0843, "step": 5663 }, { "epoch": 0.54, "grad_norm": 0.2851575165624332, "learning_rate": 0.0001744705893167255, "loss": 1.1458, "step": 5664 }, { "epoch": 0.54, "grad_norm": 0.2913077381839934, "learning_rate": 0.00017446003057095447, "loss": 1.1039, "step": 5665 }, { "epoch": 0.54, "grad_norm": 0.28664285006948, "learning_rate": 0.00017444946996177433, "loss": 1.1214, "step": 5666 }, { "epoch": 0.54, "grad_norm": 0.2838055565232777, "learning_rate": 0.00017443890748944946, "loss": 1.0184, "step": 5667 }, { "epoch": 0.54, "grad_norm": 0.2511465662279637, "learning_rate": 0.00017442834315424416, "loss": 1.0698, "step": 5668 }, { "epoch": 0.54, "grad_norm": 0.27586967822568237, "learning_rate": 0.0001744177769564228, "loss": 1.0234, "step": 5669 }, { "epoch": 0.54, "grad_norm": 0.23851868493807254, "learning_rate": 0.00017440720889624978, "loss": 1.109, "step": 5670 }, { "epoch": 0.54, "grad_norm": 0.29280665470416317, "learning_rate": 0.00017439663897398958, "loss": 1.1473, "step": 5671 }, { "epoch": 0.54, "grad_norm": 0.27293284903881204, "learning_rate": 0.00017438606718990675, "loss": 1.0855, "step": 5672 }, { "epoch": 0.54, "grad_norm": 0.3102331601705705, "learning_rate": 0.00017437549354426586, "loss": 0.979, "step": 5673 }, { "epoch": 0.54, "grad_norm": 0.2904424568361712, "learning_rate": 0.00017436491803733147, "loss": 1.0758, "step": 5674 }, { "epoch": 0.54, "grad_norm": 0.3038807854764857, "learning_rate": 0.00017435434066936828, "loss": 1.0813, "step": 5675 }, { "epoch": 0.54, "grad_norm": 0.2982170129842694, "learning_rate": 0.00017434376144064096, "loss": 0.9269, "step": 5676 }, { "epoch": 0.54, "grad_norm": 0.29066232686572985, "learning_rate": 0.00017433318035141432, "loss": 1.0761, "step": 5677 }, { "epoch": 0.54, "grad_norm": 0.36384380111246045, "learning_rate": 0.0001743225974019531, "loss": 1.1277, "step": 5678 }, { "epoch": 0.54, "grad_norm": 0.28539850304066966, "learning_rate": 0.00017431201259252222, "loss": 1.1838, "step": 5679 }, { "epoch": 0.54, "grad_norm": 0.3010319695376003, "learning_rate": 0.00017430142592338648, "loss": 0.9309, "step": 5680 }, { "epoch": 0.54, "grad_norm": 0.2639994319325743, "learning_rate": 0.00017429083739481087, "loss": 1.1049, "step": 5681 }, { "epoch": 0.54, "grad_norm": 0.2979159287960645, "learning_rate": 0.00017428024700706036, "loss": 1.1221, "step": 5682 }, { "epoch": 0.54, "grad_norm": 0.2819099394302753, "learning_rate": 0.0001742696547604, "loss": 1.0916, "step": 5683 }, { "epoch": 0.54, "grad_norm": 0.260104363656028, "learning_rate": 0.00017425906065509484, "loss": 1.0034, "step": 5684 }, { "epoch": 0.54, "grad_norm": 0.2970820007503617, "learning_rate": 0.00017424846469141, "loss": 1.1024, "step": 5685 }, { "epoch": 0.54, "grad_norm": 0.2620883115817067, "learning_rate": 0.0001742378668696107, "loss": 1.0562, "step": 5686 }, { "epoch": 0.54, "grad_norm": 0.2491078043946645, "learning_rate": 0.0001742272671899621, "loss": 1.0625, "step": 5687 }, { "epoch": 0.54, "grad_norm": 0.30629114639440114, "learning_rate": 0.00017421666565272948, "loss": 1.202, "step": 5688 }, { "epoch": 0.54, "grad_norm": 0.30830018039821566, "learning_rate": 0.0001742060622581782, "loss": 1.0535, "step": 5689 }, { "epoch": 0.54, "grad_norm": 0.2724222943145667, "learning_rate": 0.00017419545700657354, "loss": 1.1325, "step": 5690 }, { "epoch": 0.54, "grad_norm": 0.23708317781034122, "learning_rate": 0.00017418484989818096, "loss": 1.0638, "step": 5691 }, { "epoch": 0.54, "grad_norm": 0.26229220094670336, "learning_rate": 0.00017417424093326588, "loss": 0.9439, "step": 5692 }, { "epoch": 0.54, "grad_norm": 0.25017286913990483, "learning_rate": 0.0001741636301120938, "loss": 1.079, "step": 5693 }, { "epoch": 0.54, "grad_norm": 0.2860341282853943, "learning_rate": 0.00017415301743493026, "loss": 1.109, "step": 5694 }, { "epoch": 0.54, "grad_norm": 0.27000375628865514, "learning_rate": 0.00017414240290204087, "loss": 1.0366, "step": 5695 }, { "epoch": 0.54, "grad_norm": 0.2944772274521784, "learning_rate": 0.00017413178651369123, "loss": 1.106, "step": 5696 }, { "epoch": 0.55, "grad_norm": 0.29923112387635076, "learning_rate": 0.00017412116827014707, "loss": 1.118, "step": 5697 }, { "epoch": 0.55, "grad_norm": 0.27744625628639163, "learning_rate": 0.00017411054817167407, "loss": 1.0582, "step": 5698 }, { "epoch": 0.55, "grad_norm": 0.2872887689318743, "learning_rate": 0.00017409992621853803, "loss": 1.135, "step": 5699 }, { "epoch": 0.55, "grad_norm": 0.2868590588865866, "learning_rate": 0.00017408930241100476, "loss": 1.1218, "step": 5700 }, { "epoch": 0.55, "grad_norm": 0.28039756959308726, "learning_rate": 0.00017407867674934014, "loss": 1.0089, "step": 5701 }, { "epoch": 0.55, "grad_norm": 0.26834033841073884, "learning_rate": 0.00017406804923381008, "loss": 1.0931, "step": 5702 }, { "epoch": 0.55, "grad_norm": 0.2679055295777244, "learning_rate": 0.00017405741986468054, "loss": 1.0362, "step": 5703 }, { "epoch": 0.55, "grad_norm": 0.31948114555469886, "learning_rate": 0.00017404678864221752, "loss": 1.1499, "step": 5704 }, { "epoch": 0.55, "grad_norm": 0.254735913314033, "learning_rate": 0.00017403615556668708, "loss": 1.0521, "step": 5705 }, { "epoch": 0.55, "grad_norm": 0.2758568173458296, "learning_rate": 0.00017402552063835533, "loss": 1.0276, "step": 5706 }, { "epoch": 0.55, "grad_norm": 0.2881056253518769, "learning_rate": 0.0001740148838574884, "loss": 0.9892, "step": 5707 }, { "epoch": 0.55, "grad_norm": 0.28754810292102423, "learning_rate": 0.00017400424522435247, "loss": 1.0138, "step": 5708 }, { "epoch": 0.55, "grad_norm": 0.26622897762881104, "learning_rate": 0.0001739936047392138, "loss": 1.0121, "step": 5709 }, { "epoch": 0.55, "grad_norm": 0.3257799896467255, "learning_rate": 0.00017398296240233866, "loss": 1.0488, "step": 5710 }, { "epoch": 0.55, "grad_norm": 0.300245783416075, "learning_rate": 0.0001739723182139934, "loss": 1.199, "step": 5711 }, { "epoch": 0.55, "grad_norm": 0.29793504189936215, "learning_rate": 0.00017396167217444437, "loss": 1.0326, "step": 5712 }, { "epoch": 0.55, "grad_norm": 0.2895788092153529, "learning_rate": 0.00017395102428395803, "loss": 1.0282, "step": 5713 }, { "epoch": 0.55, "grad_norm": 0.2759129672263007, "learning_rate": 0.0001739403745428008, "loss": 1.1353, "step": 5714 }, { "epoch": 0.55, "grad_norm": 0.2736823382715656, "learning_rate": 0.0001739297229512393, "loss": 1.0918, "step": 5715 }, { "epoch": 0.55, "grad_norm": 0.30836802392427065, "learning_rate": 0.00017391906950953994, "loss": 1.1101, "step": 5716 }, { "epoch": 0.55, "grad_norm": 0.2923956322766849, "learning_rate": 0.00017390841421796943, "loss": 1.1005, "step": 5717 }, { "epoch": 0.55, "grad_norm": 0.2582193114884755, "learning_rate": 0.00017389775707679444, "loss": 1.0708, "step": 5718 }, { "epoch": 0.55, "grad_norm": 0.24558151292411884, "learning_rate": 0.0001738870980862816, "loss": 1.1303, "step": 5719 }, { "epoch": 0.55, "grad_norm": 0.29322536081033934, "learning_rate": 0.0001738764372466977, "loss": 1.0927, "step": 5720 }, { "epoch": 0.55, "grad_norm": 0.2659755712941869, "learning_rate": 0.00017386577455830952, "loss": 1.0032, "step": 5721 }, { "epoch": 0.55, "grad_norm": 0.2635454724857395, "learning_rate": 0.00017385511002138393, "loss": 1.2808, "step": 5722 }, { "epoch": 0.55, "grad_norm": 0.26786955193926654, "learning_rate": 0.0001738444436361878, "loss": 1.0475, "step": 5723 }, { "epoch": 0.55, "grad_norm": 0.28425119186729847, "learning_rate": 0.00017383377540298805, "loss": 1.0817, "step": 5724 }, { "epoch": 0.55, "grad_norm": 0.28386565821296494, "learning_rate": 0.00017382310532205165, "loss": 0.9009, "step": 5725 }, { "epoch": 0.55, "grad_norm": 0.2455076247487801, "learning_rate": 0.00017381243339364565, "loss": 0.9972, "step": 5726 }, { "epoch": 0.55, "grad_norm": 0.2949244070026697, "learning_rate": 0.00017380175961803713, "loss": 1.0042, "step": 5727 }, { "epoch": 0.55, "grad_norm": 0.2806527868238312, "learning_rate": 0.00017379108399549317, "loss": 1.1932, "step": 5728 }, { "epoch": 0.55, "grad_norm": 0.27785708544616866, "learning_rate": 0.000173780406526281, "loss": 1.0914, "step": 5729 }, { "epoch": 0.55, "grad_norm": 0.28683282908042806, "learning_rate": 0.00017376972721066776, "loss": 1.1145, "step": 5730 }, { "epoch": 0.55, "grad_norm": 0.2838770112878104, "learning_rate": 0.00017375904604892073, "loss": 1.0765, "step": 5731 }, { "epoch": 0.55, "grad_norm": 0.2934314888524707, "learning_rate": 0.0001737483630413072, "loss": 1.0595, "step": 5732 }, { "epoch": 0.55, "grad_norm": 0.25830945410699585, "learning_rate": 0.00017373767818809456, "loss": 0.9669, "step": 5733 }, { "epoch": 0.55, "grad_norm": 0.2665953859888515, "learning_rate": 0.00017372699148955018, "loss": 1.0714, "step": 5734 }, { "epoch": 0.55, "grad_norm": 0.2731137251463795, "learning_rate": 0.0001737163029459415, "loss": 1.0746, "step": 5735 }, { "epoch": 0.55, "grad_norm": 0.25825981988483343, "learning_rate": 0.00017370561255753602, "loss": 0.9534, "step": 5736 }, { "epoch": 0.55, "grad_norm": 0.30383684481721607, "learning_rate": 0.00017369492032460123, "loss": 1.1384, "step": 5737 }, { "epoch": 0.55, "grad_norm": 0.31417342120569464, "learning_rate": 0.00017368422624740478, "loss": 1.0576, "step": 5738 }, { "epoch": 0.55, "grad_norm": 0.2593381194717112, "learning_rate": 0.00017367353032621426, "loss": 1.0573, "step": 5739 }, { "epoch": 0.55, "grad_norm": 0.2778893532357316, "learning_rate": 0.00017366283256129732, "loss": 0.9766, "step": 5740 }, { "epoch": 0.55, "grad_norm": 0.3072153615495613, "learning_rate": 0.0001736521329529217, "loss": 1.1247, "step": 5741 }, { "epoch": 0.55, "grad_norm": 0.28745857193672664, "learning_rate": 0.00017364143150135517, "loss": 1.1141, "step": 5742 }, { "epoch": 0.55, "grad_norm": 0.2651127640673508, "learning_rate": 0.00017363072820686552, "loss": 1.0829, "step": 5743 }, { "epoch": 0.55, "grad_norm": 0.2902298639371502, "learning_rate": 0.00017362002306972065, "loss": 0.987, "step": 5744 }, { "epoch": 0.55, "grad_norm": 0.27655920423756686, "learning_rate": 0.00017360931609018842, "loss": 1.0076, "step": 5745 }, { "epoch": 0.55, "grad_norm": 0.2567390152989073, "learning_rate": 0.0001735986072685368, "loss": 1.0375, "step": 5746 }, { "epoch": 0.55, "grad_norm": 0.24515803399806585, "learning_rate": 0.00017358789660503377, "loss": 1.0745, "step": 5747 }, { "epoch": 0.55, "grad_norm": 0.2575517841295073, "learning_rate": 0.0001735771840999474, "loss": 1.0153, "step": 5748 }, { "epoch": 0.55, "grad_norm": 0.2764368481508909, "learning_rate": 0.0001735664697535457, "loss": 1.1584, "step": 5749 }, { "epoch": 0.55, "grad_norm": 0.27351695694822586, "learning_rate": 0.0001735557535660969, "loss": 1.0281, "step": 5750 }, { "epoch": 0.55, "grad_norm": 0.2667030418236546, "learning_rate": 0.00017354503553786916, "loss": 1.1321, "step": 5751 }, { "epoch": 0.55, "grad_norm": 0.29279846320754066, "learning_rate": 0.00017353431566913066, "loss": 0.9984, "step": 5752 }, { "epoch": 0.55, "grad_norm": 0.2415120154785472, "learning_rate": 0.0001735235939601497, "loss": 1.0611, "step": 5753 }, { "epoch": 0.55, "grad_norm": 0.25955357361068576, "learning_rate": 0.00017351287041119458, "loss": 1.1294, "step": 5754 }, { "epoch": 0.55, "grad_norm": 0.294206732380647, "learning_rate": 0.0001735021450225337, "loss": 1.0812, "step": 5755 }, { "epoch": 0.55, "grad_norm": 0.29031247550246536, "learning_rate": 0.00017349141779443542, "loss": 1.0576, "step": 5756 }, { "epoch": 0.55, "grad_norm": 0.2672750801707316, "learning_rate": 0.00017348068872716823, "loss": 1.0851, "step": 5757 }, { "epoch": 0.55, "grad_norm": 0.26957335518481657, "learning_rate": 0.00017346995782100062, "loss": 1.0858, "step": 5758 }, { "epoch": 0.55, "grad_norm": 0.32734612669273955, "learning_rate": 0.00017345922507620116, "loss": 1.0656, "step": 5759 }, { "epoch": 0.55, "grad_norm": 0.24154451886063427, "learning_rate": 0.00017344849049303842, "loss": 0.9896, "step": 5760 }, { "epoch": 0.55, "grad_norm": 0.27709567176869415, "learning_rate": 0.00017343775407178104, "loss": 1.0805, "step": 5761 }, { "epoch": 0.55, "grad_norm": 0.272244078427738, "learning_rate": 0.0001734270158126977, "loss": 1.0433, "step": 5762 }, { "epoch": 0.55, "grad_norm": 0.2673872164348252, "learning_rate": 0.00017341627571605716, "loss": 1.1464, "step": 5763 }, { "epoch": 0.55, "grad_norm": 0.27315997652853985, "learning_rate": 0.00017340553378212816, "loss": 1.1567, "step": 5764 }, { "epoch": 0.55, "grad_norm": 0.23474379585480734, "learning_rate": 0.00017339479001117955, "loss": 1.0089, "step": 5765 }, { "epoch": 0.55, "grad_norm": 0.27652850547755903, "learning_rate": 0.00017338404440348022, "loss": 1.0461, "step": 5766 }, { "epoch": 0.55, "grad_norm": 0.31896644764441623, "learning_rate": 0.00017337329695929902, "loss": 1.0359, "step": 5767 }, { "epoch": 0.55, "grad_norm": 0.274574680140215, "learning_rate": 0.00017336254767890498, "loss": 1.0201, "step": 5768 }, { "epoch": 0.55, "grad_norm": 0.2630831469195627, "learning_rate": 0.00017335179656256705, "loss": 1.0975, "step": 5769 }, { "epoch": 0.55, "grad_norm": 0.2793728024955909, "learning_rate": 0.00017334104361055436, "loss": 1.1332, "step": 5770 }, { "epoch": 0.55, "grad_norm": 0.27408839587852935, "learning_rate": 0.0001733302888231359, "loss": 0.939, "step": 5771 }, { "epoch": 0.55, "grad_norm": 0.27297436472505615, "learning_rate": 0.0001733195322005809, "loss": 1.0168, "step": 5772 }, { "epoch": 0.55, "grad_norm": 0.30630256010686685, "learning_rate": 0.00017330877374315855, "loss": 1.1169, "step": 5773 }, { "epoch": 0.55, "grad_norm": 0.2767879583144832, "learning_rate": 0.00017329801345113802, "loss": 1.0233, "step": 5774 }, { "epoch": 0.55, "grad_norm": 0.29684472098320214, "learning_rate": 0.0001732872513247887, "loss": 1.0463, "step": 5775 }, { "epoch": 0.55, "grad_norm": 0.2785233560114013, "learning_rate": 0.00017327648736437977, "loss": 1.142, "step": 5776 }, { "epoch": 0.55, "grad_norm": 0.3208276011124166, "learning_rate": 0.00017326572157018078, "loss": 1.1932, "step": 5777 }, { "epoch": 0.55, "grad_norm": 0.22684754778328245, "learning_rate": 0.000173254953942461, "loss": 1.0656, "step": 5778 }, { "epoch": 0.55, "grad_norm": 0.3374242730404179, "learning_rate": 0.00017324418448148998, "loss": 1.049, "step": 5779 }, { "epoch": 0.55, "grad_norm": 0.3428578883405557, "learning_rate": 0.0001732334131875372, "loss": 1.1952, "step": 5780 }, { "epoch": 0.55, "grad_norm": 0.27472370192399703, "learning_rate": 0.00017322264006087225, "loss": 1.2441, "step": 5781 }, { "epoch": 0.55, "grad_norm": 0.27491429642165643, "learning_rate": 0.0001732118651017647, "loss": 1.0994, "step": 5782 }, { "epoch": 0.55, "grad_norm": 0.27459654102558345, "learning_rate": 0.00017320108831048422, "loss": 1.0496, "step": 5783 }, { "epoch": 0.55, "grad_norm": 0.2699347475330186, "learning_rate": 0.0001731903096873005, "loss": 1.1214, "step": 5784 }, { "epoch": 0.55, "grad_norm": 0.296175698180158, "learning_rate": 0.00017317952923248328, "loss": 1.0308, "step": 5785 }, { "epoch": 0.55, "grad_norm": 0.29058979499652565, "learning_rate": 0.00017316874694630236, "loss": 1.0828, "step": 5786 }, { "epoch": 0.55, "grad_norm": 0.2997585209236349, "learning_rate": 0.00017315796282902753, "loss": 1.1366, "step": 5787 }, { "epoch": 0.55, "grad_norm": 0.27673867013826936, "learning_rate": 0.00017314717688092873, "loss": 0.945, "step": 5788 }, { "epoch": 0.55, "grad_norm": 0.23598291014298442, "learning_rate": 0.00017313638910227585, "loss": 0.9402, "step": 5789 }, { "epoch": 0.55, "grad_norm": 0.26553154287144975, "learning_rate": 0.00017312559949333886, "loss": 1.1785, "step": 5790 }, { "epoch": 0.55, "grad_norm": 0.28452803788674824, "learning_rate": 0.0001731148080543878, "loss": 1.124, "step": 5791 }, { "epoch": 0.55, "grad_norm": 0.2705626232253567, "learning_rate": 0.00017310401478569273, "loss": 1.0191, "step": 5792 }, { "epoch": 0.55, "grad_norm": 0.24817380433889646, "learning_rate": 0.0001730932196875237, "loss": 1.0482, "step": 5793 }, { "epoch": 0.55, "grad_norm": 0.26652308262279306, "learning_rate": 0.00017308242276015094, "loss": 1.0738, "step": 5794 }, { "epoch": 0.55, "grad_norm": 0.24818290945260654, "learning_rate": 0.00017307162400384462, "loss": 1.0175, "step": 5795 }, { "epoch": 0.55, "grad_norm": 0.26331611360600954, "learning_rate": 0.000173060823418875, "loss": 1.0788, "step": 5796 }, { "epoch": 0.55, "grad_norm": 0.25070740236687494, "learning_rate": 0.00017305002100551233, "loss": 0.9522, "step": 5797 }, { "epoch": 0.55, "grad_norm": 0.27552761246559027, "learning_rate": 0.000173039216764027, "loss": 1.1854, "step": 5798 }, { "epoch": 0.55, "grad_norm": 0.24607203901766253, "learning_rate": 0.00017302841069468934, "loss": 1.0574, "step": 5799 }, { "epoch": 0.55, "grad_norm": 0.24789972386779166, "learning_rate": 0.00017301760279776982, "loss": 1.0585, "step": 5800 }, { "epoch": 0.55, "grad_norm": 0.22933484912929925, "learning_rate": 0.00017300679307353888, "loss": 1.0806, "step": 5801 }, { "epoch": 0.56, "grad_norm": 0.27649671163848355, "learning_rate": 0.0001729959815222671, "loss": 1.1766, "step": 5802 }, { "epoch": 0.56, "grad_norm": 0.24226545584694414, "learning_rate": 0.00017298516814422498, "loss": 0.9117, "step": 5803 }, { "epoch": 0.56, "grad_norm": 0.26242930789541363, "learning_rate": 0.00017297435293968315, "loss": 1.1165, "step": 5804 }, { "epoch": 0.56, "grad_norm": 0.3120420916073829, "learning_rate": 0.0001729635359089123, "loss": 1.0574, "step": 5805 }, { "epoch": 0.56, "grad_norm": 0.2701256919492001, "learning_rate": 0.00017295271705218307, "loss": 1.1047, "step": 5806 }, { "epoch": 0.56, "grad_norm": 0.29645770053433096, "learning_rate": 0.0001729418963697663, "loss": 1.1329, "step": 5807 }, { "epoch": 0.56, "grad_norm": 0.24632237776935018, "learning_rate": 0.0001729310738619327, "loss": 1.1359, "step": 5808 }, { "epoch": 0.56, "grad_norm": 0.27163467992709467, "learning_rate": 0.00017292024952895313, "loss": 1.1006, "step": 5809 }, { "epoch": 0.56, "grad_norm": 0.26959200346069406, "learning_rate": 0.0001729094233710985, "loss": 1.0042, "step": 5810 }, { "epoch": 0.56, "grad_norm": 0.289959983695105, "learning_rate": 0.00017289859538863973, "loss": 1.1085, "step": 5811 }, { "epoch": 0.56, "grad_norm": 0.3213875350320641, "learning_rate": 0.0001728877655818478, "loss": 1.1062, "step": 5812 }, { "epoch": 0.56, "grad_norm": 0.3270439141750809, "learning_rate": 0.0001728769339509937, "loss": 1.0258, "step": 5813 }, { "epoch": 0.56, "grad_norm": 0.3409072299007263, "learning_rate": 0.00017286610049634856, "loss": 0.9644, "step": 5814 }, { "epoch": 0.56, "grad_norm": 0.24496972615719473, "learning_rate": 0.00017285526521818346, "loss": 1.1259, "step": 5815 }, { "epoch": 0.56, "grad_norm": 0.3139089621120804, "learning_rate": 0.00017284442811676953, "loss": 1.0404, "step": 5816 }, { "epoch": 0.56, "grad_norm": 0.26970603580675145, "learning_rate": 0.00017283358919237802, "loss": 1.0835, "step": 5817 }, { "epoch": 0.56, "grad_norm": 0.2733732615441018, "learning_rate": 0.0001728227484452802, "loss": 1.0744, "step": 5818 }, { "epoch": 0.56, "grad_norm": 0.28036462116083893, "learning_rate": 0.00017281190587574728, "loss": 1.1692, "step": 5819 }, { "epoch": 0.56, "grad_norm": 0.2871660582969163, "learning_rate": 0.0001728010614840507, "loss": 1.0495, "step": 5820 }, { "epoch": 0.56, "grad_norm": 0.23884476735212962, "learning_rate": 0.00017279021527046178, "loss": 1.0443, "step": 5821 }, { "epoch": 0.56, "grad_norm": 0.2873207540071599, "learning_rate": 0.00017277936723525197, "loss": 0.993, "step": 5822 }, { "epoch": 0.56, "grad_norm": 0.30547644132922835, "learning_rate": 0.00017276851737869274, "loss": 1.0843, "step": 5823 }, { "epoch": 0.56, "grad_norm": 0.3057414154299913, "learning_rate": 0.00017275766570105567, "loss": 1.0655, "step": 5824 }, { "epoch": 0.56, "grad_norm": 0.2632962822442975, "learning_rate": 0.00017274681220261226, "loss": 1.0939, "step": 5825 }, { "epoch": 0.56, "grad_norm": 0.30877886648124775, "learning_rate": 0.00017273595688363416, "loss": 1.1277, "step": 5826 }, { "epoch": 0.56, "grad_norm": 0.26307234053148654, "learning_rate": 0.00017272509974439304, "loss": 1.0079, "step": 5827 }, { "epoch": 0.56, "grad_norm": 0.2841767029229268, "learning_rate": 0.00017271424078516055, "loss": 1.1768, "step": 5828 }, { "epoch": 0.56, "grad_norm": 0.2603214619363828, "learning_rate": 0.00017270338000620856, "loss": 1.0806, "step": 5829 }, { "epoch": 0.56, "grad_norm": 0.2545067452234432, "learning_rate": 0.00017269251740780874, "loss": 1.0412, "step": 5830 }, { "epoch": 0.56, "grad_norm": 0.27855295154779947, "learning_rate": 0.000172681652990233, "loss": 1.2206, "step": 5831 }, { "epoch": 0.56, "grad_norm": 0.28261805590758377, "learning_rate": 0.00017267078675375322, "loss": 1.0033, "step": 5832 }, { "epoch": 0.56, "grad_norm": 0.27667495248962876, "learning_rate": 0.00017265991869864128, "loss": 1.0831, "step": 5833 }, { "epoch": 0.56, "grad_norm": 0.2714857639489485, "learning_rate": 0.00017264904882516928, "loss": 1.109, "step": 5834 }, { "epoch": 0.56, "grad_norm": 0.27229604294119086, "learning_rate": 0.00017263817713360915, "loss": 1.0874, "step": 5835 }, { "epoch": 0.56, "grad_norm": 0.293920707567098, "learning_rate": 0.00017262730362423297, "loss": 1.0823, "step": 5836 }, { "epoch": 0.56, "grad_norm": 0.2613726158263339, "learning_rate": 0.00017261642829731287, "loss": 1.0599, "step": 5837 }, { "epoch": 0.56, "grad_norm": 0.2889492784232317, "learning_rate": 0.00017260555115312104, "loss": 1.1224, "step": 5838 }, { "epoch": 0.56, "grad_norm": 0.25970536207303385, "learning_rate": 0.00017259467219192968, "loss": 1.1015, "step": 5839 }, { "epoch": 0.56, "grad_norm": 0.28971995596673233, "learning_rate": 0.00017258379141401098, "loss": 1.0696, "step": 5840 }, { "epoch": 0.56, "grad_norm": 0.30430404198918504, "learning_rate": 0.00017257290881963732, "loss": 1.0823, "step": 5841 }, { "epoch": 0.56, "grad_norm": 0.2713987221161718, "learning_rate": 0.00017256202440908095, "loss": 1.0604, "step": 5842 }, { "epoch": 0.56, "grad_norm": 0.3001859725452708, "learning_rate": 0.00017255113818261437, "loss": 0.8537, "step": 5843 }, { "epoch": 0.56, "grad_norm": 0.24037659758164331, "learning_rate": 0.00017254025014050995, "loss": 1.0035, "step": 5844 }, { "epoch": 0.56, "grad_norm": 0.29289291269851125, "learning_rate": 0.00017252936028304015, "loss": 0.9527, "step": 5845 }, { "epoch": 0.56, "grad_norm": 0.28320339512377446, "learning_rate": 0.00017251846861047755, "loss": 1.0616, "step": 5846 }, { "epoch": 0.56, "grad_norm": 0.2844954523450815, "learning_rate": 0.0001725075751230947, "loss": 1.1248, "step": 5847 }, { "epoch": 0.56, "grad_norm": 0.25465891657756395, "learning_rate": 0.0001724966798211642, "loss": 1.2268, "step": 5848 }, { "epoch": 0.56, "grad_norm": 0.318243854381763, "learning_rate": 0.00017248578270495873, "loss": 1.1983, "step": 5849 }, { "epoch": 0.56, "grad_norm": 0.24392495708256506, "learning_rate": 0.00017247488377475102, "loss": 1.1131, "step": 5850 }, { "epoch": 0.56, "grad_norm": 0.339924875849168, "learning_rate": 0.00017246398303081377, "loss": 1.1255, "step": 5851 }, { "epoch": 0.56, "grad_norm": 0.2824682986273372, "learning_rate": 0.00017245308047341977, "loss": 1.0928, "step": 5852 }, { "epoch": 0.56, "grad_norm": 0.294553844921709, "learning_rate": 0.00017244217610284194, "loss": 1.1941, "step": 5853 }, { "epoch": 0.56, "grad_norm": 0.3175139266139292, "learning_rate": 0.0001724312699193531, "loss": 1.0488, "step": 5854 }, { "epoch": 0.56, "grad_norm": 0.26336543686584807, "learning_rate": 0.0001724203619232262, "loss": 1.0406, "step": 5855 }, { "epoch": 0.56, "grad_norm": 0.28698720470502975, "learning_rate": 0.00017240945211473426, "loss": 1.1673, "step": 5856 }, { "epoch": 0.56, "grad_norm": 0.2705696042656757, "learning_rate": 0.0001723985404941503, "loss": 1.0654, "step": 5857 }, { "epoch": 0.56, "grad_norm": 0.2609774023108237, "learning_rate": 0.0001723876270617473, "loss": 1.0166, "step": 5858 }, { "epoch": 0.56, "grad_norm": 0.2879084621496255, "learning_rate": 0.0001723767118177985, "loss": 1.1731, "step": 5859 }, { "epoch": 0.56, "grad_norm": 0.25995995206356726, "learning_rate": 0.00017236579476257694, "loss": 1.1141, "step": 5860 }, { "epoch": 0.56, "grad_norm": 0.2832052274386346, "learning_rate": 0.00017235487589635593, "loss": 1.0356, "step": 5861 }, { "epoch": 0.56, "grad_norm": 0.3009633175427442, "learning_rate": 0.00017234395521940866, "loss": 1.0634, "step": 5862 }, { "epoch": 0.56, "grad_norm": 0.28724524890382835, "learning_rate": 0.00017233303273200842, "loss": 1.0978, "step": 5863 }, { "epoch": 0.56, "grad_norm": 0.30150303153924973, "learning_rate": 0.0001723221084344286, "loss": 1.0883, "step": 5864 }, { "epoch": 0.56, "grad_norm": 0.2602169228739236, "learning_rate": 0.00017231118232694255, "loss": 0.8809, "step": 5865 }, { "epoch": 0.56, "grad_norm": 0.2597086205288419, "learning_rate": 0.00017230025440982373, "loss": 1.0913, "step": 5866 }, { "epoch": 0.56, "grad_norm": 0.2627390421998769, "learning_rate": 0.0001722893246833456, "loss": 0.9786, "step": 5867 }, { "epoch": 0.56, "grad_norm": 0.29086276852649495, "learning_rate": 0.0001722783931477817, "loss": 1.1312, "step": 5868 }, { "epoch": 0.56, "grad_norm": 0.2631914398523322, "learning_rate": 0.00017226745980340556, "loss": 1.1383, "step": 5869 }, { "epoch": 0.56, "grad_norm": 0.293675216189509, "learning_rate": 0.00017225652465049086, "loss": 1.037, "step": 5870 }, { "epoch": 0.56, "grad_norm": 0.27269400481933986, "learning_rate": 0.0001722455876893112, "loss": 1.0594, "step": 5871 }, { "epoch": 0.56, "grad_norm": 0.2913003315526418, "learning_rate": 0.00017223464892014028, "loss": 1.0343, "step": 5872 }, { "epoch": 0.56, "grad_norm": 0.30749338727180847, "learning_rate": 0.0001722237083432519, "loss": 1.0609, "step": 5873 }, { "epoch": 0.56, "grad_norm": 0.2780051476505012, "learning_rate": 0.00017221276595891984, "loss": 1.0403, "step": 5874 }, { "epoch": 0.56, "grad_norm": 0.28351257017042697, "learning_rate": 0.0001722018217674179, "loss": 1.1222, "step": 5875 }, { "epoch": 0.56, "grad_norm": 0.30556461707747595, "learning_rate": 0.00017219087576902, "loss": 1.1095, "step": 5876 }, { "epoch": 0.56, "grad_norm": 0.2747330702712088, "learning_rate": 0.00017217992796400005, "loss": 1.0258, "step": 5877 }, { "epoch": 0.56, "grad_norm": 0.3102546058358946, "learning_rate": 0.00017216897835263209, "loss": 0.9957, "step": 5878 }, { "epoch": 0.56, "grad_norm": 0.3232197983937078, "learning_rate": 0.00017215802693519003, "loss": 1.0768, "step": 5879 }, { "epoch": 0.56, "grad_norm": 0.269670235437008, "learning_rate": 0.00017214707371194802, "loss": 1.1033, "step": 5880 }, { "epoch": 0.56, "grad_norm": 0.2633897286188578, "learning_rate": 0.00017213611868318015, "loss": 1.0035, "step": 5881 }, { "epoch": 0.56, "grad_norm": 0.3042144256612858, "learning_rate": 0.00017212516184916056, "loss": 1.0715, "step": 5882 }, { "epoch": 0.56, "grad_norm": 0.266078239126639, "learning_rate": 0.0001721142032101635, "loss": 1.1544, "step": 5883 }, { "epoch": 0.56, "grad_norm": 0.30458594940724576, "learning_rate": 0.00017210324276646316, "loss": 1.1251, "step": 5884 }, { "epoch": 0.56, "grad_norm": 0.28395232279028726, "learning_rate": 0.00017209228051833387, "loss": 1.0925, "step": 5885 }, { "epoch": 0.56, "grad_norm": 0.29349557855153724, "learning_rate": 0.00017208131646604993, "loss": 1.1322, "step": 5886 }, { "epoch": 0.56, "grad_norm": 0.2729717369394539, "learning_rate": 0.00017207035060988574, "loss": 1.0009, "step": 5887 }, { "epoch": 0.56, "grad_norm": 0.278418729052514, "learning_rate": 0.00017205938295011575, "loss": 1.0637, "step": 5888 }, { "epoch": 0.56, "grad_norm": 0.2689002291046586, "learning_rate": 0.00017204841348701438, "loss": 1.1421, "step": 5889 }, { "epoch": 0.56, "grad_norm": 0.2936863871934149, "learning_rate": 0.00017203744222085623, "loss": 1.078, "step": 5890 }, { "epoch": 0.56, "grad_norm": 0.291267791636599, "learning_rate": 0.00017202646915191578, "loss": 1.2519, "step": 5891 }, { "epoch": 0.56, "grad_norm": 0.2717676026532944, "learning_rate": 0.0001720154942804677, "loss": 1.0844, "step": 5892 }, { "epoch": 0.56, "grad_norm": 0.2774147698904939, "learning_rate": 0.0001720045176067866, "loss": 1.0487, "step": 5893 }, { "epoch": 0.56, "grad_norm": 0.27887254178612914, "learning_rate": 0.00017199353913114717, "loss": 1.1334, "step": 5894 }, { "epoch": 0.56, "grad_norm": 0.28978059963913083, "learning_rate": 0.00017198255885382421, "loss": 1.0868, "step": 5895 }, { "epoch": 0.56, "grad_norm": 0.2766397079264888, "learning_rate": 0.00017197157677509246, "loss": 1.1068, "step": 5896 }, { "epoch": 0.56, "grad_norm": 0.2851293309828098, "learning_rate": 0.00017196059289522678, "loss": 1.0717, "step": 5897 }, { "epoch": 0.56, "grad_norm": 0.2549062649316722, "learning_rate": 0.00017194960721450206, "loss": 1.0199, "step": 5898 }, { "epoch": 0.56, "grad_norm": 0.2461949098073852, "learning_rate": 0.00017193861973319316, "loss": 1.0461, "step": 5899 }, { "epoch": 0.56, "grad_norm": 0.27203801042895437, "learning_rate": 0.0001719276304515751, "loss": 1.1691, "step": 5900 }, { "epoch": 0.56, "grad_norm": 0.27460790915022437, "learning_rate": 0.00017191663936992288, "loss": 1.105, "step": 5901 }, { "epoch": 0.56, "grad_norm": 0.29915013883951985, "learning_rate": 0.0001719056464885116, "loss": 0.9848, "step": 5902 }, { "epoch": 0.56, "grad_norm": 0.29579312716715256, "learning_rate": 0.00017189465180761628, "loss": 1.1115, "step": 5903 }, { "epoch": 0.56, "grad_norm": 0.30761711337398795, "learning_rate": 0.00017188365532751213, "loss": 1.164, "step": 5904 }, { "epoch": 0.56, "grad_norm": 0.32410192655697073, "learning_rate": 0.00017187265704847433, "loss": 1.089, "step": 5905 }, { "epoch": 0.57, "grad_norm": 0.2629249103887719, "learning_rate": 0.00017186165697077809, "loss": 0.9588, "step": 5906 }, { "epoch": 0.57, "grad_norm": 0.28511065582542094, "learning_rate": 0.00017185065509469876, "loss": 1.1679, "step": 5907 }, { "epoch": 0.57, "grad_norm": 0.2813371419019583, "learning_rate": 0.00017183965142051163, "loss": 1.0888, "step": 5908 }, { "epoch": 0.57, "grad_norm": 0.27646089721769807, "learning_rate": 0.00017182864594849205, "loss": 1.0513, "step": 5909 }, { "epoch": 0.57, "grad_norm": 0.30831778021556755, "learning_rate": 0.00017181763867891547, "loss": 1.1152, "step": 5910 }, { "epoch": 0.57, "grad_norm": 0.31669929806320846, "learning_rate": 0.00017180662961205733, "loss": 1.0554, "step": 5911 }, { "epoch": 0.57, "grad_norm": 0.2881244715767111, "learning_rate": 0.00017179561874819318, "loss": 1.0702, "step": 5912 }, { "epoch": 0.57, "grad_norm": 0.28072579932667696, "learning_rate": 0.00017178460608759853, "loss": 0.9758, "step": 5913 }, { "epoch": 0.57, "grad_norm": 0.2943649793795249, "learning_rate": 0.00017177359163054903, "loss": 1.0715, "step": 5914 }, { "epoch": 0.57, "grad_norm": 0.24478814096705148, "learning_rate": 0.00017176257537732025, "loss": 1.0606, "step": 5915 }, { "epoch": 0.57, "grad_norm": 0.27922088346647145, "learning_rate": 0.00017175155732818796, "loss": 1.1677, "step": 5916 }, { "epoch": 0.57, "grad_norm": 0.25994214062479126, "learning_rate": 0.00017174053748342783, "loss": 0.9742, "step": 5917 }, { "epoch": 0.57, "grad_norm": 0.282588772331292, "learning_rate": 0.00017172951584331565, "loss": 0.9751, "step": 5918 }, { "epoch": 0.57, "grad_norm": 0.2878741945844007, "learning_rate": 0.0001717184924081273, "loss": 1.0041, "step": 5919 }, { "epoch": 0.57, "grad_norm": 0.3029923936191996, "learning_rate": 0.00017170746717813854, "loss": 1.1071, "step": 5920 }, { "epoch": 0.57, "grad_norm": 0.27290844629846106, "learning_rate": 0.0001716964401536254, "loss": 0.8786, "step": 5921 }, { "epoch": 0.57, "grad_norm": 0.31966494850943117, "learning_rate": 0.00017168541133486377, "loss": 1.1118, "step": 5922 }, { "epoch": 0.57, "grad_norm": 0.2689850435328001, "learning_rate": 0.00017167438072212968, "loss": 1.1024, "step": 5923 }, { "epoch": 0.57, "grad_norm": 0.27928731290969944, "learning_rate": 0.00017166334831569916, "loss": 1.1399, "step": 5924 }, { "epoch": 0.57, "grad_norm": 0.29591002661910865, "learning_rate": 0.00017165231411584827, "loss": 1.1074, "step": 5925 }, { "epoch": 0.57, "grad_norm": 0.3104516805825287, "learning_rate": 0.00017164127812285324, "loss": 1.0186, "step": 5926 }, { "epoch": 0.57, "grad_norm": 0.3329888234877537, "learning_rate": 0.00017163024033699017, "loss": 1.1095, "step": 5927 }, { "epoch": 0.57, "grad_norm": 0.27420800800981127, "learning_rate": 0.00017161920075853534, "loss": 1.1286, "step": 5928 }, { "epoch": 0.57, "grad_norm": 0.2687683336478087, "learning_rate": 0.000171608159387765, "loss": 1.1067, "step": 5929 }, { "epoch": 0.57, "grad_norm": 0.28848253158286347, "learning_rate": 0.00017159711622495544, "loss": 1.1874, "step": 5930 }, { "epoch": 0.57, "grad_norm": 0.28239325672690274, "learning_rate": 0.0001715860712703831, "loss": 1.1561, "step": 5931 }, { "epoch": 0.57, "grad_norm": 0.2918884091102849, "learning_rate": 0.00017157502452432429, "loss": 1.0429, "step": 5932 }, { "epoch": 0.57, "grad_norm": 0.24790386180695678, "learning_rate": 0.00017156397598705548, "loss": 0.9923, "step": 5933 }, { "epoch": 0.57, "grad_norm": 0.24570072442193652, "learning_rate": 0.00017155292565885328, "loss": 1.0945, "step": 5934 }, { "epoch": 0.57, "grad_norm": 0.23125439331824077, "learning_rate": 0.0001715418735399941, "loss": 0.9957, "step": 5935 }, { "epoch": 0.57, "grad_norm": 0.2554465249970479, "learning_rate": 0.00017153081963075458, "loss": 1.1284, "step": 5936 }, { "epoch": 0.57, "grad_norm": 0.2794800525821183, "learning_rate": 0.00017151976393141132, "loss": 0.9941, "step": 5937 }, { "epoch": 0.57, "grad_norm": 0.30723518305938063, "learning_rate": 0.000171508706442241, "loss": 1.1475, "step": 5938 }, { "epoch": 0.57, "grad_norm": 0.30203130938607553, "learning_rate": 0.00017149764716352045, "loss": 1.0853, "step": 5939 }, { "epoch": 0.57, "grad_norm": 0.260328670983559, "learning_rate": 0.00017148658609552627, "loss": 1.1423, "step": 5940 }, { "epoch": 0.57, "grad_norm": 0.293768400385368, "learning_rate": 0.00017147552323853538, "loss": 1.0503, "step": 5941 }, { "epoch": 0.57, "grad_norm": 0.26640544240980046, "learning_rate": 0.00017146445859282457, "loss": 1.0018, "step": 5942 }, { "epoch": 0.57, "grad_norm": 0.3159499546692293, "learning_rate": 0.00017145339215867078, "loss": 1.0303, "step": 5943 }, { "epoch": 0.57, "grad_norm": 0.25980174549043206, "learning_rate": 0.00017144232393635094, "loss": 1.1472, "step": 5944 }, { "epoch": 0.57, "grad_norm": 0.2727359561908714, "learning_rate": 0.00017143125392614207, "loss": 1.1425, "step": 5945 }, { "epoch": 0.57, "grad_norm": 0.28270017496530925, "learning_rate": 0.0001714201821283212, "loss": 1.1023, "step": 5946 }, { "epoch": 0.57, "grad_norm": 0.299277933501389, "learning_rate": 0.0001714091085431653, "loss": 1.1593, "step": 5947 }, { "epoch": 0.57, "grad_norm": 0.2762476973789334, "learning_rate": 0.00017139803317095165, "loss": 1.0942, "step": 5948 }, { "epoch": 0.57, "grad_norm": 0.3120883708584473, "learning_rate": 0.00017138695601195733, "loss": 1.0574, "step": 5949 }, { "epoch": 0.57, "grad_norm": 0.27024241469527543, "learning_rate": 0.00017137587706645956, "loss": 1.12, "step": 5950 }, { "epoch": 0.57, "grad_norm": 0.2584604551495038, "learning_rate": 0.00017136479633473562, "loss": 1.0682, "step": 5951 }, { "epoch": 0.57, "grad_norm": 0.2525657024101026, "learning_rate": 0.0001713537138170628, "loss": 1.1037, "step": 5952 }, { "epoch": 0.57, "grad_norm": 0.28274675021766404, "learning_rate": 0.00017134262951371842, "loss": 1.0135, "step": 5953 }, { "epoch": 0.57, "grad_norm": 0.2774062534984412, "learning_rate": 0.00017133154342497995, "loss": 0.9824, "step": 5954 }, { "epoch": 0.57, "grad_norm": 0.2832539855963225, "learning_rate": 0.00017132045555112474, "loss": 0.9708, "step": 5955 }, { "epoch": 0.57, "grad_norm": 0.24641803171377838, "learning_rate": 0.0001713093658924303, "loss": 0.9826, "step": 5956 }, { "epoch": 0.57, "grad_norm": 0.29062056115142054, "learning_rate": 0.0001712982744491742, "loss": 0.9739, "step": 5957 }, { "epoch": 0.57, "grad_norm": 0.2922884188711339, "learning_rate": 0.00017128718122163395, "loss": 0.9959, "step": 5958 }, { "epoch": 0.57, "grad_norm": 0.26793724543335196, "learning_rate": 0.00017127608621008718, "loss": 1.0553, "step": 5959 }, { "epoch": 0.57, "grad_norm": 0.276065664300244, "learning_rate": 0.00017126498941481155, "loss": 1.1004, "step": 5960 }, { "epoch": 0.57, "grad_norm": 0.26445107517178895, "learning_rate": 0.00017125389083608479, "loss": 1.0136, "step": 5961 }, { "epoch": 0.57, "grad_norm": 0.2724968692637002, "learning_rate": 0.00017124279047418464, "loss": 0.9554, "step": 5962 }, { "epoch": 0.57, "grad_norm": 0.2638896257757664, "learning_rate": 0.00017123168832938886, "loss": 1.0249, "step": 5963 }, { "epoch": 0.57, "grad_norm": 0.25113449381499126, "learning_rate": 0.00017122058440197533, "loss": 1.0309, "step": 5964 }, { "epoch": 0.57, "grad_norm": 0.29069944989919566, "learning_rate": 0.0001712094786922219, "loss": 1.0919, "step": 5965 }, { "epoch": 0.57, "grad_norm": 0.28312314556833795, "learning_rate": 0.00017119837120040652, "loss": 1.0403, "step": 5966 }, { "epoch": 0.57, "grad_norm": 0.26755188643974503, "learning_rate": 0.00017118726192680717, "loss": 1.0553, "step": 5967 }, { "epoch": 0.57, "grad_norm": 0.26486022885030275, "learning_rate": 0.0001711761508717018, "loss": 1.0489, "step": 5968 }, { "epoch": 0.57, "grad_norm": 0.3020328536278636, "learning_rate": 0.00017116503803536856, "loss": 1.1584, "step": 5969 }, { "epoch": 0.57, "grad_norm": 0.34335564008695746, "learning_rate": 0.00017115392341808555, "loss": 1.1107, "step": 5970 }, { "epoch": 0.57, "grad_norm": 0.30126089791181193, "learning_rate": 0.00017114280702013084, "loss": 1.0434, "step": 5971 }, { "epoch": 0.57, "grad_norm": 0.28087942592247517, "learning_rate": 0.00017113168884178267, "loss": 1.0781, "step": 5972 }, { "epoch": 0.57, "grad_norm": 0.2728680461126566, "learning_rate": 0.0001711205688833193, "loss": 1.0257, "step": 5973 }, { "epoch": 0.57, "grad_norm": 0.2985709605948755, "learning_rate": 0.000171109447145019, "loss": 1.1728, "step": 5974 }, { "epoch": 0.57, "grad_norm": 0.3082359218373612, "learning_rate": 0.0001710983236271601, "loss": 1.0554, "step": 5975 }, { "epoch": 0.57, "grad_norm": 0.2795284645154645, "learning_rate": 0.00017108719833002094, "loss": 1.0948, "step": 5976 }, { "epoch": 0.57, "grad_norm": 0.28120362222029427, "learning_rate": 0.00017107607125387998, "loss": 0.9671, "step": 5977 }, { "epoch": 0.57, "grad_norm": 0.2823788055491324, "learning_rate": 0.00017106494239901566, "loss": 1.0073, "step": 5978 }, { "epoch": 0.57, "grad_norm": 0.31065181033843386, "learning_rate": 0.00017105381176570652, "loss": 1.1014, "step": 5979 }, { "epoch": 0.57, "grad_norm": 0.30900428726246143, "learning_rate": 0.00017104267935423107, "loss": 1.0998, "step": 5980 }, { "epoch": 0.57, "grad_norm": 0.30988350895081024, "learning_rate": 0.0001710315451648679, "loss": 1.0667, "step": 5981 }, { "epoch": 0.57, "grad_norm": 0.2981798835140071, "learning_rate": 0.0001710204091978957, "loss": 1.065, "step": 5982 }, { "epoch": 0.57, "grad_norm": 0.2732778988161561, "learning_rate": 0.0001710092714535931, "loss": 1.184, "step": 5983 }, { "epoch": 0.57, "grad_norm": 0.27164434920682934, "learning_rate": 0.00017099813193223887, "loss": 1.0524, "step": 5984 }, { "epoch": 0.57, "grad_norm": 0.29410498736707635, "learning_rate": 0.00017098699063411178, "loss": 1.0777, "step": 5985 }, { "epoch": 0.57, "grad_norm": 0.28163842201092487, "learning_rate": 0.0001709758475594906, "loss": 1.0853, "step": 5986 }, { "epoch": 0.57, "grad_norm": 0.27782202027455466, "learning_rate": 0.00017096470270865427, "loss": 1.0146, "step": 5987 }, { "epoch": 0.57, "grad_norm": 0.2768790776720804, "learning_rate": 0.0001709535560818816, "loss": 1.1312, "step": 5988 }, { "epoch": 0.57, "grad_norm": 0.2824780756717249, "learning_rate": 0.00017094240767945166, "loss": 1.059, "step": 5989 }, { "epoch": 0.57, "grad_norm": 0.27033728590320183, "learning_rate": 0.00017093125750164333, "loss": 1.197, "step": 5990 }, { "epoch": 0.57, "grad_norm": 0.3100948675967715, "learning_rate": 0.00017092010554873574, "loss": 1.1042, "step": 5991 }, { "epoch": 0.57, "grad_norm": 0.28820788597927405, "learning_rate": 0.0001709089518210079, "loss": 1.1635, "step": 5992 }, { "epoch": 0.57, "grad_norm": 0.28201539629588646, "learning_rate": 0.00017089779631873904, "loss": 0.9284, "step": 5993 }, { "epoch": 0.57, "grad_norm": 0.27679231851615277, "learning_rate": 0.0001708866390422082, "loss": 1.0167, "step": 5994 }, { "epoch": 0.57, "grad_norm": 0.27944884514951135, "learning_rate": 0.0001708754799916947, "loss": 1.139, "step": 5995 }, { "epoch": 0.57, "grad_norm": 0.3002084383540107, "learning_rate": 0.00017086431916747778, "loss": 1.0454, "step": 5996 }, { "epoch": 0.57, "grad_norm": 0.26816351694504426, "learning_rate": 0.0001708531565698367, "loss": 1.0773, "step": 5997 }, { "epoch": 0.57, "grad_norm": 0.30990517015210073, "learning_rate": 0.0001708419921990509, "loss": 1.1122, "step": 5998 }, { "epoch": 0.57, "grad_norm": 0.30574794780484754, "learning_rate": 0.0001708308260553997, "loss": 1.1214, "step": 5999 }, { "epoch": 0.57, "grad_norm": 0.2690373353619774, "learning_rate": 0.00017081965813916253, "loss": 1.062, "step": 6000 }, { "epoch": 0.57, "grad_norm": 0.27472519148333513, "learning_rate": 0.00017080848845061892, "loss": 1.0179, "step": 6001 }, { "epoch": 0.57, "grad_norm": 0.27470990890182256, "learning_rate": 0.0001707973169900484, "loss": 1.0747, "step": 6002 }, { "epoch": 0.57, "grad_norm": 0.29061144311513604, "learning_rate": 0.00017078614375773052, "loss": 1.1307, "step": 6003 }, { "epoch": 0.57, "grad_norm": 0.2713055223809409, "learning_rate": 0.00017077496875394493, "loss": 1.0008, "step": 6004 }, { "epoch": 0.57, "grad_norm": 0.2925514548076972, "learning_rate": 0.00017076379197897122, "loss": 1.1317, "step": 6005 }, { "epoch": 0.57, "grad_norm": 0.31101769602212137, "learning_rate": 0.00017075261343308916, "loss": 1.1037, "step": 6006 }, { "epoch": 0.57, "grad_norm": 0.2647833297981797, "learning_rate": 0.00017074143311657852, "loss": 1.0076, "step": 6007 }, { "epoch": 0.57, "grad_norm": 0.3013454403605899, "learning_rate": 0.00017073025102971903, "loss": 1.0792, "step": 6008 }, { "epoch": 0.57, "grad_norm": 0.263491300792746, "learning_rate": 0.00017071906717279053, "loss": 0.9797, "step": 6009 }, { "epoch": 0.57, "grad_norm": 0.2724539339009908, "learning_rate": 0.00017070788154607293, "loss": 0.9805, "step": 6010 }, { "epoch": 0.58, "grad_norm": 0.31840312141141264, "learning_rate": 0.00017069669414984618, "loss": 1.142, "step": 6011 }, { "epoch": 0.58, "grad_norm": 0.2983653164774024, "learning_rate": 0.00017068550498439025, "loss": 1.103, "step": 6012 }, { "epoch": 0.58, "grad_norm": 0.282150969685538, "learning_rate": 0.00017067431404998507, "loss": 1.0682, "step": 6013 }, { "epoch": 0.58, "grad_norm": 0.2771901829735762, "learning_rate": 0.00017066312134691083, "loss": 1.009, "step": 6014 }, { "epoch": 0.58, "grad_norm": 0.2933084300533384, "learning_rate": 0.00017065192687544753, "loss": 1.102, "step": 6015 }, { "epoch": 0.58, "grad_norm": 0.27679497108587947, "learning_rate": 0.00017064073063587535, "loss": 1.1393, "step": 6016 }, { "epoch": 0.58, "grad_norm": 0.28895991711416996, "learning_rate": 0.00017062953262847455, "loss": 1.0694, "step": 6017 }, { "epoch": 0.58, "grad_norm": 0.27823308988900375, "learning_rate": 0.00017061833285352527, "loss": 1.0905, "step": 6018 }, { "epoch": 0.58, "grad_norm": 0.2748970724583933, "learning_rate": 0.00017060713131130778, "loss": 1.1278, "step": 6019 }, { "epoch": 0.58, "grad_norm": 0.2730226539296915, "learning_rate": 0.00017059592800210252, "loss": 1.0858, "step": 6020 }, { "epoch": 0.58, "grad_norm": 0.271328807355675, "learning_rate": 0.00017058472292618977, "loss": 0.9972, "step": 6021 }, { "epoch": 0.58, "grad_norm": 0.27544958968420746, "learning_rate": 0.00017057351608384995, "loss": 1.1456, "step": 6022 }, { "epoch": 0.58, "grad_norm": 0.2833122301378086, "learning_rate": 0.00017056230747536355, "loss": 1.1727, "step": 6023 }, { "epoch": 0.58, "grad_norm": 0.2894626470697644, "learning_rate": 0.00017055109710101108, "loss": 1.2121, "step": 6024 }, { "epoch": 0.58, "grad_norm": 0.3306470002946546, "learning_rate": 0.00017053988496107305, "loss": 1.0776, "step": 6025 }, { "epoch": 0.58, "grad_norm": 0.27472508074504254, "learning_rate": 0.00017052867105583005, "loss": 0.973, "step": 6026 }, { "epoch": 0.58, "grad_norm": 0.3021983840922597, "learning_rate": 0.00017051745538556278, "loss": 1.0972, "step": 6027 }, { "epoch": 0.58, "grad_norm": 0.2970721247856769, "learning_rate": 0.0001705062379505518, "loss": 1.1562, "step": 6028 }, { "epoch": 0.58, "grad_norm": 0.2830455403468363, "learning_rate": 0.00017049501875107795, "loss": 0.9997, "step": 6029 }, { "epoch": 0.58, "grad_norm": 0.28710721053351523, "learning_rate": 0.00017048379778742193, "loss": 1.1453, "step": 6030 }, { "epoch": 0.58, "grad_norm": 0.27478245168556914, "learning_rate": 0.00017047257505986457, "loss": 1.0092, "step": 6031 }, { "epoch": 0.58, "grad_norm": 0.26896422201399867, "learning_rate": 0.00017046135056868677, "loss": 1.0855, "step": 6032 }, { "epoch": 0.58, "grad_norm": 0.24707636757742915, "learning_rate": 0.00017045012431416936, "loss": 1.0971, "step": 6033 }, { "epoch": 0.58, "grad_norm": 0.2493183820340796, "learning_rate": 0.0001704388962965933, "loss": 0.9823, "step": 6034 }, { "epoch": 0.58, "grad_norm": 0.2805800589880677, "learning_rate": 0.00017042766651623962, "loss": 1.0523, "step": 6035 }, { "epoch": 0.58, "grad_norm": 0.3038587968507171, "learning_rate": 0.00017041643497338931, "loss": 1.0162, "step": 6036 }, { "epoch": 0.58, "grad_norm": 0.30638016511222943, "learning_rate": 0.00017040520166832344, "loss": 1.0421, "step": 6037 }, { "epoch": 0.58, "grad_norm": 0.26320036877135733, "learning_rate": 0.00017039396660132317, "loss": 0.946, "step": 6038 }, { "epoch": 0.58, "grad_norm": 0.2754763891016706, "learning_rate": 0.00017038272977266966, "loss": 1.0652, "step": 6039 }, { "epoch": 0.58, "grad_norm": 0.26134384808972805, "learning_rate": 0.0001703714911826441, "loss": 1.1064, "step": 6040 }, { "epoch": 0.58, "grad_norm": 0.2651686121986517, "learning_rate": 0.0001703602508315277, "loss": 1.1166, "step": 6041 }, { "epoch": 0.58, "grad_norm": 0.25447003550809477, "learning_rate": 0.00017034900871960184, "loss": 1.0199, "step": 6042 }, { "epoch": 0.58, "grad_norm": 0.28285744813232555, "learning_rate": 0.0001703377648471478, "loss": 1.0086, "step": 6043 }, { "epoch": 0.58, "grad_norm": 0.2651737161168531, "learning_rate": 0.000170326519214447, "loss": 1.0729, "step": 6044 }, { "epoch": 0.58, "grad_norm": 0.2855727067410992, "learning_rate": 0.00017031527182178092, "loss": 1.0919, "step": 6045 }, { "epoch": 0.58, "grad_norm": 0.26177841865296425, "learning_rate": 0.0001703040226694309, "loss": 0.9408, "step": 6046 }, { "epoch": 0.58, "grad_norm": 0.23691819299378247, "learning_rate": 0.00017029277175767854, "loss": 1.0849, "step": 6047 }, { "epoch": 0.58, "grad_norm": 0.2511952002294269, "learning_rate": 0.0001702815190868054, "loss": 1.0779, "step": 6048 }, { "epoch": 0.58, "grad_norm": 0.29110924767645496, "learning_rate": 0.00017027026465709307, "loss": 0.9933, "step": 6049 }, { "epoch": 0.58, "grad_norm": 0.2830264573548182, "learning_rate": 0.00017025900846882321, "loss": 1.0192, "step": 6050 }, { "epoch": 0.58, "grad_norm": 0.28455741758111286, "learning_rate": 0.00017024775052227752, "loss": 1.0588, "step": 6051 }, { "epoch": 0.58, "grad_norm": 0.28932106749302483, "learning_rate": 0.0001702364908177377, "loss": 1.1211, "step": 6052 }, { "epoch": 0.58, "grad_norm": 0.3307206565435221, "learning_rate": 0.00017022522935548554, "loss": 1.0975, "step": 6053 }, { "epoch": 0.58, "grad_norm": 0.25352957645066343, "learning_rate": 0.0001702139661358029, "loss": 1.0298, "step": 6054 }, { "epoch": 0.58, "grad_norm": 0.2572310440026188, "learning_rate": 0.00017020270115897164, "loss": 1.0728, "step": 6055 }, { "epoch": 0.58, "grad_norm": 0.3172532579427463, "learning_rate": 0.00017019143442527365, "loss": 1.1023, "step": 6056 }, { "epoch": 0.58, "grad_norm": 0.282565292417829, "learning_rate": 0.0001701801659349909, "loss": 1.0488, "step": 6057 }, { "epoch": 0.58, "grad_norm": 0.275135543466168, "learning_rate": 0.00017016889568840542, "loss": 1.1721, "step": 6058 }, { "epoch": 0.58, "grad_norm": 0.26182321188521385, "learning_rate": 0.00017015762368579918, "loss": 1.0598, "step": 6059 }, { "epoch": 0.58, "grad_norm": 0.26125752561442744, "learning_rate": 0.00017014634992745434, "loss": 1.1684, "step": 6060 }, { "epoch": 0.58, "grad_norm": 0.27290128374782624, "learning_rate": 0.000170135074413653, "loss": 0.9901, "step": 6061 }, { "epoch": 0.58, "grad_norm": 0.2653240038158296, "learning_rate": 0.00017012379714467736, "loss": 1.1086, "step": 6062 }, { "epoch": 0.58, "grad_norm": 0.3293682851680674, "learning_rate": 0.0001701125181208096, "loss": 1.117, "step": 6063 }, { "epoch": 0.58, "grad_norm": 0.2329956724588023, "learning_rate": 0.00017010123734233204, "loss": 1.0925, "step": 6064 }, { "epoch": 0.58, "grad_norm": 0.2755426850023404, "learning_rate": 0.00017008995480952694, "loss": 1.1292, "step": 6065 }, { "epoch": 0.58, "grad_norm": 0.29650880006845154, "learning_rate": 0.00017007867052267666, "loss": 1.0334, "step": 6066 }, { "epoch": 0.58, "grad_norm": 0.2803458685525201, "learning_rate": 0.00017006738448206363, "loss": 1.0168, "step": 6067 }, { "epoch": 0.58, "grad_norm": 0.2916272901141261, "learning_rate": 0.00017005609668797024, "loss": 1.0039, "step": 6068 }, { "epoch": 0.58, "grad_norm": 0.2691648334575106, "learning_rate": 0.00017004480714067903, "loss": 1.0727, "step": 6069 }, { "epoch": 0.58, "grad_norm": 0.3270372768288688, "learning_rate": 0.00017003351584047249, "loss": 1.1879, "step": 6070 }, { "epoch": 0.58, "grad_norm": 0.2782777828019017, "learning_rate": 0.0001700222227876332, "loss": 1.0006, "step": 6071 }, { "epoch": 0.58, "grad_norm": 0.2661246430524472, "learning_rate": 0.00017001092798244377, "loss": 1.0393, "step": 6072 }, { "epoch": 0.58, "grad_norm": 0.281715885539005, "learning_rate": 0.00016999963142518687, "loss": 1.1322, "step": 6073 }, { "epoch": 0.58, "grad_norm": 0.2986783823949029, "learning_rate": 0.0001699883331161452, "loss": 0.9984, "step": 6074 }, { "epoch": 0.58, "grad_norm": 0.28065012498362923, "learning_rate": 0.00016997703305560153, "loss": 1.06, "step": 6075 }, { "epoch": 0.58, "grad_norm": 0.3292647162053623, "learning_rate": 0.00016996573124383862, "loss": 1.075, "step": 6076 }, { "epoch": 0.58, "grad_norm": 0.3073957042018125, "learning_rate": 0.0001699544276811393, "loss": 1.1219, "step": 6077 }, { "epoch": 0.58, "grad_norm": 0.3076254806022189, "learning_rate": 0.00016994312236778646, "loss": 1.1214, "step": 6078 }, { "epoch": 0.58, "grad_norm": 0.24563100316558975, "learning_rate": 0.00016993181530406304, "loss": 1.0868, "step": 6079 }, { "epoch": 0.58, "grad_norm": 0.32781029130768446, "learning_rate": 0.00016992050649025197, "loss": 1.0481, "step": 6080 }, { "epoch": 0.58, "grad_norm": 0.27854015122577225, "learning_rate": 0.0001699091959266363, "loss": 1.0749, "step": 6081 }, { "epoch": 0.58, "grad_norm": 0.2844546319680473, "learning_rate": 0.00016989788361349906, "loss": 1.0692, "step": 6082 }, { "epoch": 0.58, "grad_norm": 0.26692237610439323, "learning_rate": 0.00016988656955112337, "loss": 1.1151, "step": 6083 }, { "epoch": 0.58, "grad_norm": 0.26269204364410026, "learning_rate": 0.00016987525373979233, "loss": 0.9348, "step": 6084 }, { "epoch": 0.58, "grad_norm": 0.27306822357867006, "learning_rate": 0.00016986393617978918, "loss": 1.2159, "step": 6085 }, { "epoch": 0.58, "grad_norm": 0.2827921168255787, "learning_rate": 0.0001698526168713971, "loss": 0.9376, "step": 6086 }, { "epoch": 0.58, "grad_norm": 0.23947335215371623, "learning_rate": 0.00016984129581489935, "loss": 1.0817, "step": 6087 }, { "epoch": 0.58, "grad_norm": 0.3343810073022071, "learning_rate": 0.0001698299730105793, "loss": 1.1555, "step": 6088 }, { "epoch": 0.58, "grad_norm": 0.26515752282559285, "learning_rate": 0.00016981864845872033, "loss": 1.1123, "step": 6089 }, { "epoch": 0.58, "grad_norm": 0.27681937200219026, "learning_rate": 0.00016980732215960575, "loss": 1.0367, "step": 6090 }, { "epoch": 0.58, "grad_norm": 0.2519010649566014, "learning_rate": 0.0001697959941135191, "loss": 1.0861, "step": 6091 }, { "epoch": 0.58, "grad_norm": 0.2704443985177932, "learning_rate": 0.00016978466432074381, "loss": 1.0398, "step": 6092 }, { "epoch": 0.58, "grad_norm": 0.28884051221369256, "learning_rate": 0.00016977333278156347, "loss": 1.0501, "step": 6093 }, { "epoch": 0.58, "grad_norm": 0.2749924386950333, "learning_rate": 0.0001697619994962616, "loss": 1.0743, "step": 6094 }, { "epoch": 0.58, "grad_norm": 0.279690239046547, "learning_rate": 0.00016975066446512185, "loss": 1.1504, "step": 6095 }, { "epoch": 0.58, "grad_norm": 0.24616630543118756, "learning_rate": 0.00016973932768842787, "loss": 1.1121, "step": 6096 }, { "epoch": 0.58, "grad_norm": 0.25087990953142, "learning_rate": 0.00016972798916646336, "loss": 1.0454, "step": 6097 }, { "epoch": 0.58, "grad_norm": 0.2766189199572125, "learning_rate": 0.00016971664889951215, "loss": 1.0582, "step": 6098 }, { "epoch": 0.58, "grad_norm": 0.28980752883287725, "learning_rate": 0.00016970530688785798, "loss": 1.1664, "step": 6099 }, { "epoch": 0.58, "grad_norm": 0.334844857943314, "learning_rate": 0.0001696939631317847, "loss": 1.1833, "step": 6100 }, { "epoch": 0.58, "grad_norm": 0.2923718680275688, "learning_rate": 0.0001696826176315762, "loss": 1.063, "step": 6101 }, { "epoch": 0.58, "grad_norm": 0.2927999981743822, "learning_rate": 0.00016967127038751637, "loss": 1.0568, "step": 6102 }, { "epoch": 0.58, "grad_norm": 0.30725008382724595, "learning_rate": 0.0001696599213998892, "loss": 1.113, "step": 6103 }, { "epoch": 0.58, "grad_norm": 0.2864392247154817, "learning_rate": 0.00016964857066897876, "loss": 1.0959, "step": 6104 }, { "epoch": 0.58, "grad_norm": 0.29730179190891975, "learning_rate": 0.00016963721819506904, "loss": 1.0681, "step": 6105 }, { "epoch": 0.58, "grad_norm": 0.2338409227151746, "learning_rate": 0.00016962586397844417, "loss": 0.9203, "step": 6106 }, { "epoch": 0.58, "grad_norm": 0.30140296809951767, "learning_rate": 0.0001696145080193883, "loss": 1.1501, "step": 6107 }, { "epoch": 0.58, "grad_norm": 0.29957908772250824, "learning_rate": 0.00016960315031818563, "loss": 0.9797, "step": 6108 }, { "epoch": 0.58, "grad_norm": 0.2703387299476374, "learning_rate": 0.00016959179087512038, "loss": 1.1028, "step": 6109 }, { "epoch": 0.58, "grad_norm": 0.24345511227668115, "learning_rate": 0.0001695804296904768, "loss": 1.1393, "step": 6110 }, { "epoch": 0.58, "grad_norm": 0.28466641760021855, "learning_rate": 0.00016956906676453927, "loss": 1.0691, "step": 6111 }, { "epoch": 0.58, "grad_norm": 0.26942292513037813, "learning_rate": 0.00016955770209759206, "loss": 1.1351, "step": 6112 }, { "epoch": 0.58, "grad_norm": 0.2432370590626464, "learning_rate": 0.0001695463356899197, "loss": 1.111, "step": 6113 }, { "epoch": 0.58, "grad_norm": 0.2768776116799371, "learning_rate": 0.00016953496754180657, "loss": 1.0402, "step": 6114 }, { "epoch": 0.59, "grad_norm": 0.2782542666733751, "learning_rate": 0.00016952359765353716, "loss": 1.016, "step": 6115 }, { "epoch": 0.59, "grad_norm": 0.2606113670203369, "learning_rate": 0.00016951222602539604, "loss": 1.1145, "step": 6116 }, { "epoch": 0.59, "grad_norm": 0.28152618216858916, "learning_rate": 0.00016950085265766775, "loss": 1.0144, "step": 6117 }, { "epoch": 0.59, "grad_norm": 0.2762557373966403, "learning_rate": 0.000169489477550637, "loss": 1.0188, "step": 6118 }, { "epoch": 0.59, "grad_norm": 0.27464396847770955, "learning_rate": 0.00016947810070458836, "loss": 1.0246, "step": 6119 }, { "epoch": 0.59, "grad_norm": 0.2553714383261195, "learning_rate": 0.00016946672211980656, "loss": 1.0676, "step": 6120 }, { "epoch": 0.59, "grad_norm": 0.2663697635305504, "learning_rate": 0.00016945534179657642, "loss": 1.0607, "step": 6121 }, { "epoch": 0.59, "grad_norm": 0.2724085363177121, "learning_rate": 0.00016944395973518273, "loss": 1.0022, "step": 6122 }, { "epoch": 0.59, "grad_norm": 0.2830680768080686, "learning_rate": 0.00016943257593591025, "loss": 1.1051, "step": 6123 }, { "epoch": 0.59, "grad_norm": 0.28224872460306244, "learning_rate": 0.00016942119039904392, "loss": 1.066, "step": 6124 }, { "epoch": 0.59, "grad_norm": 0.26377009917280125, "learning_rate": 0.0001694098031248687, "loss": 1.0324, "step": 6125 }, { "epoch": 0.59, "grad_norm": 0.2718917219979864, "learning_rate": 0.0001693984141136695, "loss": 1.0499, "step": 6126 }, { "epoch": 0.59, "grad_norm": 0.259809210248362, "learning_rate": 0.0001693870233657314, "loss": 1.0218, "step": 6127 }, { "epoch": 0.59, "grad_norm": 0.2757737836543942, "learning_rate": 0.00016937563088133942, "loss": 1.0728, "step": 6128 }, { "epoch": 0.59, "grad_norm": 0.3068744212914902, "learning_rate": 0.0001693642366607787, "loss": 1.0689, "step": 6129 }, { "epoch": 0.59, "grad_norm": 0.2559474725346445, "learning_rate": 0.00016935284070433436, "loss": 0.9259, "step": 6130 }, { "epoch": 0.59, "grad_norm": 0.2889397480153887, "learning_rate": 0.00016934144301229155, "loss": 1.1747, "step": 6131 }, { "epoch": 0.59, "grad_norm": 0.3088905871334589, "learning_rate": 0.0001693300435849356, "loss": 1.172, "step": 6132 }, { "epoch": 0.59, "grad_norm": 0.30471384991519385, "learning_rate": 0.00016931864242255171, "loss": 1.0735, "step": 6133 }, { "epoch": 0.59, "grad_norm": 0.5872459177211959, "learning_rate": 0.00016930723952542523, "loss": 0.9714, "step": 6134 }, { "epoch": 0.59, "grad_norm": 0.26880571869976677, "learning_rate": 0.0001692958348938415, "loss": 1.0637, "step": 6135 }, { "epoch": 0.59, "grad_norm": 0.2842892019706417, "learning_rate": 0.000169284428528086, "loss": 1.0986, "step": 6136 }, { "epoch": 0.59, "grad_norm": 0.3139992368614553, "learning_rate": 0.0001692730204284441, "loss": 1.0542, "step": 6137 }, { "epoch": 0.59, "grad_norm": 0.2939973190730885, "learning_rate": 0.00016926161059520133, "loss": 1.018, "step": 6138 }, { "epoch": 0.59, "grad_norm": 0.2745235711764705, "learning_rate": 0.00016925019902864325, "loss": 1.0936, "step": 6139 }, { "epoch": 0.59, "grad_norm": 0.295538764046495, "learning_rate": 0.0001692387857290554, "loss": 1.238, "step": 6140 }, { "epoch": 0.59, "grad_norm": 0.2918293376304929, "learning_rate": 0.00016922737069672344, "loss": 1.0324, "step": 6141 }, { "epoch": 0.59, "grad_norm": 0.2661937879474406, "learning_rate": 0.000169215953931933, "loss": 1.1461, "step": 6142 }, { "epoch": 0.59, "grad_norm": 0.29698439715428343, "learning_rate": 0.0001692045354349698, "loss": 1.0072, "step": 6143 }, { "epoch": 0.59, "grad_norm": 0.2841385747639344, "learning_rate": 0.0001691931152061196, "loss": 1.0169, "step": 6144 }, { "epoch": 0.59, "grad_norm": 0.2525826472239577, "learning_rate": 0.00016918169324566825, "loss": 1.0821, "step": 6145 }, { "epoch": 0.59, "grad_norm": 0.30426659332063666, "learning_rate": 0.0001691702695539015, "loss": 1.0161, "step": 6146 }, { "epoch": 0.59, "grad_norm": 0.3247256246142582, "learning_rate": 0.00016915884413110536, "loss": 1.0768, "step": 6147 }, { "epoch": 0.59, "grad_norm": 0.28154171353151247, "learning_rate": 0.0001691474169775656, "loss": 1.0462, "step": 6148 }, { "epoch": 0.59, "grad_norm": 0.32206989451215157, "learning_rate": 0.0001691359880935683, "loss": 1.0335, "step": 6149 }, { "epoch": 0.59, "grad_norm": 0.29975094798814433, "learning_rate": 0.00016912455747939946, "loss": 1.08, "step": 6150 }, { "epoch": 0.59, "grad_norm": 0.2940671992246637, "learning_rate": 0.0001691131251353451, "loss": 1.0279, "step": 6151 }, { "epoch": 0.59, "grad_norm": 0.30973218098094907, "learning_rate": 0.00016910169106169138, "loss": 1.1113, "step": 6152 }, { "epoch": 0.59, "grad_norm": 0.2985525692876313, "learning_rate": 0.0001690902552587244, "loss": 1.0637, "step": 6153 }, { "epoch": 0.59, "grad_norm": 0.2539451576750145, "learning_rate": 0.00016907881772673032, "loss": 1.0424, "step": 6154 }, { "epoch": 0.59, "grad_norm": 0.2687758329509323, "learning_rate": 0.00016906737846599548, "loss": 1.0618, "step": 6155 }, { "epoch": 0.59, "grad_norm": 0.27971131955326073, "learning_rate": 0.00016905593747680602, "loss": 1.0893, "step": 6156 }, { "epoch": 0.59, "grad_norm": 0.2543125383692707, "learning_rate": 0.00016904449475944837, "loss": 1.1021, "step": 6157 }, { "epoch": 0.59, "grad_norm": 0.2789446404233202, "learning_rate": 0.00016903305031420885, "loss": 1.1103, "step": 6158 }, { "epoch": 0.59, "grad_norm": 0.28334066222200344, "learning_rate": 0.00016902160414137383, "loss": 1.065, "step": 6159 }, { "epoch": 0.59, "grad_norm": 0.2358577232682755, "learning_rate": 0.0001690101562412298, "loss": 1.021, "step": 6160 }, { "epoch": 0.59, "grad_norm": 0.27559840264421875, "learning_rate": 0.00016899870661406325, "loss": 1.1527, "step": 6161 }, { "epoch": 0.59, "grad_norm": 0.30571401380369834, "learning_rate": 0.0001689872552601607, "loss": 1.1118, "step": 6162 }, { "epoch": 0.59, "grad_norm": 0.29488378034589946, "learning_rate": 0.00016897580217980872, "loss": 1.1922, "step": 6163 }, { "epoch": 0.59, "grad_norm": 0.31253654893817673, "learning_rate": 0.00016896434737329394, "loss": 0.9932, "step": 6164 }, { "epoch": 0.59, "grad_norm": 0.271362147934014, "learning_rate": 0.00016895289084090304, "loss": 1.1036, "step": 6165 }, { "epoch": 0.59, "grad_norm": 0.2703212529224629, "learning_rate": 0.0001689414325829227, "loss": 1.1416, "step": 6166 }, { "epoch": 0.59, "grad_norm": 0.2824957796659181, "learning_rate": 0.0001689299725996397, "loss": 1.0145, "step": 6167 }, { "epoch": 0.59, "grad_norm": 0.2976711651462754, "learning_rate": 0.00016891851089134079, "loss": 1.0386, "step": 6168 }, { "epoch": 0.59, "grad_norm": 0.26975195101683597, "learning_rate": 0.00016890704745831282, "loss": 1.0879, "step": 6169 }, { "epoch": 0.59, "grad_norm": 0.2643048862785792, "learning_rate": 0.00016889558230084273, "loss": 1.0786, "step": 6170 }, { "epoch": 0.59, "grad_norm": 0.2717495195222049, "learning_rate": 0.00016888411541921735, "loss": 1.1159, "step": 6171 }, { "epoch": 0.59, "grad_norm": 0.2833620530227191, "learning_rate": 0.0001688726468137237, "loss": 1.097, "step": 6172 }, { "epoch": 0.59, "grad_norm": 0.25257033358758985, "learning_rate": 0.00016886117648464878, "loss": 1.0967, "step": 6173 }, { "epoch": 0.59, "grad_norm": 0.27530661316906846, "learning_rate": 0.00016884970443227965, "loss": 1.0395, "step": 6174 }, { "epoch": 0.59, "grad_norm": 0.2527678120212698, "learning_rate": 0.00016883823065690337, "loss": 1.0305, "step": 6175 }, { "epoch": 0.59, "grad_norm": 0.2574184683768036, "learning_rate": 0.00016882675515880714, "loss": 1.0878, "step": 6176 }, { "epoch": 0.59, "grad_norm": 0.25761118308699854, "learning_rate": 0.00016881527793827808, "loss": 0.9574, "step": 6177 }, { "epoch": 0.59, "grad_norm": 0.27107127416113136, "learning_rate": 0.00016880379899560346, "loss": 1.1092, "step": 6178 }, { "epoch": 0.59, "grad_norm": 0.2842811642803488, "learning_rate": 0.0001687923183310705, "loss": 1.1637, "step": 6179 }, { "epoch": 0.59, "grad_norm": 0.2728440826452907, "learning_rate": 0.00016878083594496656, "loss": 0.9771, "step": 6180 }, { "epoch": 0.59, "grad_norm": 0.2886790568351847, "learning_rate": 0.000168769351837579, "loss": 0.9462, "step": 6181 }, { "epoch": 0.59, "grad_norm": 0.25378061719229456, "learning_rate": 0.00016875786600919514, "loss": 1.0195, "step": 6182 }, { "epoch": 0.59, "grad_norm": 0.2942815181656749, "learning_rate": 0.00016874637846010248, "loss": 1.1533, "step": 6183 }, { "epoch": 0.59, "grad_norm": 0.26947359910559543, "learning_rate": 0.00016873488919058854, "loss": 0.9764, "step": 6184 }, { "epoch": 0.59, "grad_norm": 0.2997412184794112, "learning_rate": 0.00016872339820094074, "loss": 1.0704, "step": 6185 }, { "epoch": 0.59, "grad_norm": 0.241116336668137, "learning_rate": 0.00016871190549144673, "loss": 1.0213, "step": 6186 }, { "epoch": 0.59, "grad_norm": 0.29798499768309145, "learning_rate": 0.00016870041106239412, "loss": 1.0051, "step": 6187 }, { "epoch": 0.59, "grad_norm": 0.28286155546938635, "learning_rate": 0.00016868891491407054, "loss": 1.1061, "step": 6188 }, { "epoch": 0.59, "grad_norm": 0.24398969837876033, "learning_rate": 0.00016867741704676368, "loss": 1.0401, "step": 6189 }, { "epoch": 0.59, "grad_norm": 0.28740259957474623, "learning_rate": 0.0001686659174607613, "loss": 1.1156, "step": 6190 }, { "epoch": 0.59, "grad_norm": 0.2892058554716867, "learning_rate": 0.0001686544161563512, "loss": 1.0922, "step": 6191 }, { "epoch": 0.59, "grad_norm": 0.31955410366097675, "learning_rate": 0.00016864291313382115, "loss": 1.0641, "step": 6192 }, { "epoch": 0.59, "grad_norm": 0.2794578951295337, "learning_rate": 0.00016863140839345908, "loss": 1.0945, "step": 6193 }, { "epoch": 0.59, "grad_norm": 0.30665149624277827, "learning_rate": 0.00016861990193555292, "loss": 1.0357, "step": 6194 }, { "epoch": 0.59, "grad_norm": 0.27440411091642547, "learning_rate": 0.00016860839376039053, "loss": 1.2011, "step": 6195 }, { "epoch": 0.59, "grad_norm": 0.2817725535852179, "learning_rate": 0.00016859688386826, "loss": 1.0482, "step": 6196 }, { "epoch": 0.59, "grad_norm": 0.3011982191792621, "learning_rate": 0.0001685853722594493, "loss": 1.0853, "step": 6197 }, { "epoch": 0.59, "grad_norm": 0.29959655201744034, "learning_rate": 0.00016857385893424658, "loss": 1.0111, "step": 6198 }, { "epoch": 0.59, "grad_norm": 0.2802735304530325, "learning_rate": 0.00016856234389293995, "loss": 1.1118, "step": 6199 }, { "epoch": 0.59, "grad_norm": 0.30399590914216984, "learning_rate": 0.00016855082713581758, "loss": 1.0576, "step": 6200 }, { "epoch": 0.59, "grad_norm": 0.28089634625081406, "learning_rate": 0.00016853930866316766, "loss": 1.1154, "step": 6201 }, { "epoch": 0.59, "grad_norm": 0.33064903413133756, "learning_rate": 0.00016852778847527847, "loss": 1.0004, "step": 6202 }, { "epoch": 0.59, "grad_norm": 0.28719188673845863, "learning_rate": 0.00016851626657243834, "loss": 1.0599, "step": 6203 }, { "epoch": 0.59, "grad_norm": 0.2562638224677485, "learning_rate": 0.0001685047429549355, "loss": 1.0726, "step": 6204 }, { "epoch": 0.59, "grad_norm": 0.27769483083836394, "learning_rate": 0.00016849321762305846, "loss": 1.0452, "step": 6205 }, { "epoch": 0.59, "grad_norm": 0.311376243880295, "learning_rate": 0.00016848169057709562, "loss": 1.1139, "step": 6206 }, { "epoch": 0.59, "grad_norm": 0.33186169126683296, "learning_rate": 0.00016847016181733543, "loss": 1.1708, "step": 6207 }, { "epoch": 0.59, "grad_norm": 0.31561407579512624, "learning_rate": 0.0001684586313440664, "loss": 1.0678, "step": 6208 }, { "epoch": 0.59, "grad_norm": 0.2907024924078061, "learning_rate": 0.00016844709915757707, "loss": 1.0885, "step": 6209 }, { "epoch": 0.59, "grad_norm": 0.29061404968369564, "learning_rate": 0.0001684355652581561, "loss": 1.0829, "step": 6210 }, { "epoch": 0.59, "grad_norm": 0.2379708775530534, "learning_rate": 0.00016842402964609209, "loss": 1.0688, "step": 6211 }, { "epoch": 0.59, "grad_norm": 0.278600564728756, "learning_rate": 0.00016841249232167372, "loss": 0.975, "step": 6212 }, { "epoch": 0.59, "grad_norm": 0.2995894305662367, "learning_rate": 0.00016840095328518975, "loss": 1.0657, "step": 6213 }, { "epoch": 0.59, "grad_norm": 0.28752497877593164, "learning_rate": 0.00016838941253692895, "loss": 1.1441, "step": 6214 }, { "epoch": 0.59, "grad_norm": 0.24322263909794603, "learning_rate": 0.00016837787007718008, "loss": 0.9605, "step": 6215 }, { "epoch": 0.59, "grad_norm": 0.2706035311294752, "learning_rate": 0.00016836632590623208, "loss": 1.0975, "step": 6216 }, { "epoch": 0.59, "grad_norm": 0.32174695455051877, "learning_rate": 0.0001683547800243738, "loss": 1.0976, "step": 6217 }, { "epoch": 0.59, "grad_norm": 0.2886586498798772, "learning_rate": 0.00016834323243189415, "loss": 1.1534, "step": 6218 }, { "epoch": 0.59, "grad_norm": 0.2855976937236902, "learning_rate": 0.00016833168312908222, "loss": 1.1865, "step": 6219 }, { "epoch": 0.6, "grad_norm": 0.2667954790754751, "learning_rate": 0.00016832013211622694, "loss": 1.2281, "step": 6220 }, { "epoch": 0.6, "grad_norm": 0.24933114672059709, "learning_rate": 0.00016830857939361738, "loss": 0.9391, "step": 6221 }, { "epoch": 0.6, "grad_norm": 0.27072321221050405, "learning_rate": 0.0001682970249615427, "loss": 1.0534, "step": 6222 }, { "epoch": 0.6, "grad_norm": 0.28347062120739175, "learning_rate": 0.0001682854688202921, "loss": 1.0556, "step": 6223 }, { "epoch": 0.6, "grad_norm": 0.31128711878834986, "learning_rate": 0.00016827391097015473, "loss": 1.0196, "step": 6224 }, { "epoch": 0.6, "grad_norm": 0.29466538688649235, "learning_rate": 0.00016826235141141976, "loss": 1.1365, "step": 6225 }, { "epoch": 0.6, "grad_norm": 0.25144131922524754, "learning_rate": 0.0001682507901443766, "loss": 1.01, "step": 6226 }, { "epoch": 0.6, "grad_norm": 0.2613185058399168, "learning_rate": 0.00016823922716931451, "loss": 1.0745, "step": 6227 }, { "epoch": 0.6, "grad_norm": 0.27346829355051, "learning_rate": 0.00016822766248652288, "loss": 1.0676, "step": 6228 }, { "epoch": 0.6, "grad_norm": 0.28753894574718736, "learning_rate": 0.0001682160960962911, "loss": 1.1616, "step": 6229 }, { "epoch": 0.6, "grad_norm": 0.27774064585558034, "learning_rate": 0.00016820452799890865, "loss": 1.0945, "step": 6230 }, { "epoch": 0.6, "grad_norm": 0.29844620149160067, "learning_rate": 0.00016819295819466503, "loss": 1.1999, "step": 6231 }, { "epoch": 0.6, "grad_norm": 0.23278252939346147, "learning_rate": 0.00016818138668384976, "loss": 1.1572, "step": 6232 }, { "epoch": 0.6, "grad_norm": 0.2910036401832354, "learning_rate": 0.00016816981346675242, "loss": 1.0418, "step": 6233 }, { "epoch": 0.6, "grad_norm": 0.2679309567861235, "learning_rate": 0.00016815823854366267, "loss": 1.1591, "step": 6234 }, { "epoch": 0.6, "grad_norm": 0.28959484717417816, "learning_rate": 0.0001681466619148702, "loss": 1.0724, "step": 6235 }, { "epoch": 0.6, "grad_norm": 0.24969815430510078, "learning_rate": 0.00016813508358066466, "loss": 1.0834, "step": 6236 }, { "epoch": 0.6, "grad_norm": 0.2817944152182526, "learning_rate": 0.00016812350354133583, "loss": 1.0752, "step": 6237 }, { "epoch": 0.6, "grad_norm": 0.3095644924541878, "learning_rate": 0.00016811192179717353, "loss": 1.0517, "step": 6238 }, { "epoch": 0.6, "grad_norm": 0.2826064816728548, "learning_rate": 0.00016810033834846754, "loss": 0.9918, "step": 6239 }, { "epoch": 0.6, "grad_norm": 0.2670350115993338, "learning_rate": 0.0001680887531955078, "loss": 1.0787, "step": 6240 }, { "epoch": 0.6, "grad_norm": 0.2786656526566983, "learning_rate": 0.00016807716633858425, "loss": 1.1872, "step": 6241 }, { "epoch": 0.6, "grad_norm": 0.24633827043340034, "learning_rate": 0.00016806557777798676, "loss": 1.1754, "step": 6242 }, { "epoch": 0.6, "grad_norm": 0.2510571689731006, "learning_rate": 0.00016805398751400548, "loss": 1.0483, "step": 6243 }, { "epoch": 0.6, "grad_norm": 0.25726341692210536, "learning_rate": 0.00016804239554693036, "loss": 1.0626, "step": 6244 }, { "epoch": 0.6, "grad_norm": 0.2973987113917484, "learning_rate": 0.00016803080187705152, "loss": 1.1194, "step": 6245 }, { "epoch": 0.6, "grad_norm": 0.24985679408993092, "learning_rate": 0.00016801920650465912, "loss": 0.9716, "step": 6246 }, { "epoch": 0.6, "grad_norm": 0.2373094805089608, "learning_rate": 0.00016800760943004334, "loss": 0.9422, "step": 6247 }, { "epoch": 0.6, "grad_norm": 0.30043718466849206, "learning_rate": 0.0001679960106534944, "loss": 1.0484, "step": 6248 }, { "epoch": 0.6, "grad_norm": 0.286122704125855, "learning_rate": 0.0001679844101753025, "loss": 1.1008, "step": 6249 }, { "epoch": 0.6, "grad_norm": 0.262959271095953, "learning_rate": 0.00016797280799575804, "loss": 1.1192, "step": 6250 }, { "epoch": 0.6, "grad_norm": 0.2715165644061917, "learning_rate": 0.00016796120411515138, "loss": 1.0921, "step": 6251 }, { "epoch": 0.6, "grad_norm": 0.28638836988275007, "learning_rate": 0.00016794959853377284, "loss": 1.1773, "step": 6252 }, { "epoch": 0.6, "grad_norm": 0.2776266911958128, "learning_rate": 0.00016793799125191288, "loss": 1.0659, "step": 6253 }, { "epoch": 0.6, "grad_norm": 0.28766018953162986, "learning_rate": 0.00016792638226986202, "loss": 1.0234, "step": 6254 }, { "epoch": 0.6, "grad_norm": 0.24147276451994776, "learning_rate": 0.00016791477158791077, "loss": 1.0404, "step": 6255 }, { "epoch": 0.6, "grad_norm": 0.28431529367278485, "learning_rate": 0.00016790315920634964, "loss": 1.1169, "step": 6256 }, { "epoch": 0.6, "grad_norm": 0.297855580080018, "learning_rate": 0.00016789154512546927, "loss": 1.1081, "step": 6257 }, { "epoch": 0.6, "grad_norm": 0.26389657019849716, "learning_rate": 0.00016787992934556032, "loss": 1.0314, "step": 6258 }, { "epoch": 0.6, "grad_norm": 0.2928163841306302, "learning_rate": 0.0001678683118669135, "loss": 1.2123, "step": 6259 }, { "epoch": 0.6, "grad_norm": 0.2975129278676374, "learning_rate": 0.00016785669268981949, "loss": 1.1446, "step": 6260 }, { "epoch": 0.6, "grad_norm": 0.2853505641305865, "learning_rate": 0.00016784507181456912, "loss": 0.9765, "step": 6261 }, { "epoch": 0.6, "grad_norm": 0.29085919322220416, "learning_rate": 0.0001678334492414532, "loss": 1.0566, "step": 6262 }, { "epoch": 0.6, "grad_norm": 0.2951008224142174, "learning_rate": 0.00016782182497076257, "loss": 1.0795, "step": 6263 }, { "epoch": 0.6, "grad_norm": 0.2866385453022363, "learning_rate": 0.00016781019900278813, "loss": 1.043, "step": 6264 }, { "epoch": 0.6, "grad_norm": 0.28440677949059723, "learning_rate": 0.00016779857133782087, "loss": 1.0659, "step": 6265 }, { "epoch": 0.6, "grad_norm": 0.2513177187771401, "learning_rate": 0.0001677869419761517, "loss": 1.0256, "step": 6266 }, { "epoch": 0.6, "grad_norm": 0.28595770700712264, "learning_rate": 0.00016777531091807175, "loss": 1.1266, "step": 6267 }, { "epoch": 0.6, "grad_norm": 0.2514182299644808, "learning_rate": 0.00016776367816387206, "loss": 1.118, "step": 6268 }, { "epoch": 0.6, "grad_norm": 0.2766744455914722, "learning_rate": 0.00016775204371384373, "loss": 1.0138, "step": 6269 }, { "epoch": 0.6, "grad_norm": 0.30206625962707667, "learning_rate": 0.0001677404075682779, "loss": 1.194, "step": 6270 }, { "epoch": 0.6, "grad_norm": 0.2631889376384602, "learning_rate": 0.0001677287697274658, "loss": 0.9856, "step": 6271 }, { "epoch": 0.6, "grad_norm": 0.25406693794632235, "learning_rate": 0.0001677171301916987, "loss": 1.0063, "step": 6272 }, { "epoch": 0.6, "grad_norm": 0.28950694169287894, "learning_rate": 0.00016770548896126783, "loss": 1.0609, "step": 6273 }, { "epoch": 0.6, "grad_norm": 0.2761692174626653, "learning_rate": 0.00016769384603646455, "loss": 1.0876, "step": 6274 }, { "epoch": 0.6, "grad_norm": 0.27628317931169183, "learning_rate": 0.00016768220141758023, "loss": 1.0243, "step": 6275 }, { "epoch": 0.6, "grad_norm": 0.2660866756756718, "learning_rate": 0.0001676705551049063, "loss": 1.0615, "step": 6276 }, { "epoch": 0.6, "grad_norm": 0.24048261615675012, "learning_rate": 0.0001676589070987342, "loss": 1.152, "step": 6277 }, { "epoch": 0.6, "grad_norm": 0.30207736888758835, "learning_rate": 0.0001676472573993554, "loss": 1.1271, "step": 6278 }, { "epoch": 0.6, "grad_norm": 0.28419649601292135, "learning_rate": 0.0001676356060070615, "loss": 1.0366, "step": 6279 }, { "epoch": 0.6, "grad_norm": 0.25939328583829085, "learning_rate": 0.00016762395292214404, "loss": 1.1239, "step": 6280 }, { "epoch": 0.6, "grad_norm": 0.24732667648488688, "learning_rate": 0.00016761229814489466, "loss": 1.0374, "step": 6281 }, { "epoch": 0.6, "grad_norm": 0.28033363094981284, "learning_rate": 0.00016760064167560502, "loss": 1.1033, "step": 6282 }, { "epoch": 0.6, "grad_norm": 0.2837131506670996, "learning_rate": 0.00016758898351456683, "loss": 1.1314, "step": 6283 }, { "epoch": 0.6, "grad_norm": 0.3341961682280646, "learning_rate": 0.00016757732366207186, "loss": 1.1564, "step": 6284 }, { "epoch": 0.6, "grad_norm": 0.2789710186742297, "learning_rate": 0.00016756566211841188, "loss": 1.0238, "step": 6285 }, { "epoch": 0.6, "grad_norm": 0.3018369450049017, "learning_rate": 0.00016755399888387874, "loss": 1.0962, "step": 6286 }, { "epoch": 0.6, "grad_norm": 0.265390590494802, "learning_rate": 0.00016754233395876439, "loss": 1.0181, "step": 6287 }, { "epoch": 0.6, "grad_norm": 0.2849229079192597, "learning_rate": 0.0001675306673433606, "loss": 1.0362, "step": 6288 }, { "epoch": 0.6, "grad_norm": 0.279551424881973, "learning_rate": 0.00016751899903795947, "loss": 1.0863, "step": 6289 }, { "epoch": 0.6, "grad_norm": 0.30798802423123717, "learning_rate": 0.00016750732904285292, "loss": 1.0984, "step": 6290 }, { "epoch": 0.6, "grad_norm": 0.28386059483142134, "learning_rate": 0.00016749565735833306, "loss": 1.0513, "step": 6291 }, { "epoch": 0.6, "grad_norm": 0.2971213923065991, "learning_rate": 0.0001674839839846919, "loss": 1.0989, "step": 6292 }, { "epoch": 0.6, "grad_norm": 0.279028774197727, "learning_rate": 0.0001674723089222217, "loss": 0.9628, "step": 6293 }, { "epoch": 0.6, "grad_norm": 0.3005236299374139, "learning_rate": 0.00016746063217121452, "loss": 0.9983, "step": 6294 }, { "epoch": 0.6, "grad_norm": 0.2556549845006676, "learning_rate": 0.00016744895373196265, "loss": 1.0838, "step": 6295 }, { "epoch": 0.6, "grad_norm": 0.2754893169400004, "learning_rate": 0.00016743727360475833, "loss": 1.0528, "step": 6296 }, { "epoch": 0.6, "grad_norm": 0.2525035036290481, "learning_rate": 0.00016742559178989383, "loss": 1.1438, "step": 6297 }, { "epoch": 0.6, "grad_norm": 0.2994082116871912, "learning_rate": 0.00016741390828766152, "loss": 1.0641, "step": 6298 }, { "epoch": 0.6, "grad_norm": 0.2915966725527121, "learning_rate": 0.00016740222309835382, "loss": 1.1256, "step": 6299 }, { "epoch": 0.6, "grad_norm": 0.2976305302047498, "learning_rate": 0.00016739053622226305, "loss": 1.0256, "step": 6300 }, { "epoch": 0.6, "grad_norm": 0.30003197401374204, "learning_rate": 0.00016737884765968185, "loss": 1.1071, "step": 6301 }, { "epoch": 0.6, "grad_norm": 0.25781935696696723, "learning_rate": 0.0001673671574109026, "loss": 1.0172, "step": 6302 }, { "epoch": 0.6, "grad_norm": 0.2854845428390189, "learning_rate": 0.00016735546547621787, "loss": 1.1589, "step": 6303 }, { "epoch": 0.6, "grad_norm": 0.30166239723579663, "learning_rate": 0.00016734377185592032, "loss": 1.1384, "step": 6304 }, { "epoch": 0.6, "grad_norm": 0.28301402595792946, "learning_rate": 0.00016733207655030254, "loss": 1.0882, "step": 6305 }, { "epoch": 0.6, "grad_norm": 0.25169386672382876, "learning_rate": 0.00016732037955965724, "loss": 1.1254, "step": 6306 }, { "epoch": 0.6, "grad_norm": 0.28028422765627997, "learning_rate": 0.00016730868088427712, "loss": 1.0061, "step": 6307 }, { "epoch": 0.6, "grad_norm": 0.29537665905438676, "learning_rate": 0.00016729698052445497, "loss": 1.0539, "step": 6308 }, { "epoch": 0.6, "grad_norm": 0.28211755903503166, "learning_rate": 0.0001672852784804836, "loss": 1.0818, "step": 6309 }, { "epoch": 0.6, "grad_norm": 0.2685003161595143, "learning_rate": 0.00016727357475265582, "loss": 0.9727, "step": 6310 }, { "epoch": 0.6, "grad_norm": 0.271237791328004, "learning_rate": 0.00016726186934126457, "loss": 1.0693, "step": 6311 }, { "epoch": 0.6, "grad_norm": 0.29234966306787097, "learning_rate": 0.00016725016224660274, "loss": 1.0985, "step": 6312 }, { "epoch": 0.6, "grad_norm": 0.2415128764029434, "learning_rate": 0.00016723845346896336, "loss": 1.0949, "step": 6313 }, { "epoch": 0.6, "grad_norm": 0.28487373231440377, "learning_rate": 0.00016722674300863942, "loss": 1.0031, "step": 6314 }, { "epoch": 0.6, "grad_norm": 0.24380319070580195, "learning_rate": 0.00016721503086592398, "loss": 0.9791, "step": 6315 }, { "epoch": 0.6, "grad_norm": 0.2854973044582919, "learning_rate": 0.00016720331704111015, "loss": 1.0771, "step": 6316 }, { "epoch": 0.6, "grad_norm": 0.2926702434021319, "learning_rate": 0.0001671916015344911, "loss": 1.0796, "step": 6317 }, { "epoch": 0.6, "grad_norm": 0.2625594361259241, "learning_rate": 0.00016717988434636, "loss": 1.0848, "step": 6318 }, { "epoch": 0.6, "grad_norm": 0.272123940034111, "learning_rate": 0.00016716816547701003, "loss": 1.0751, "step": 6319 }, { "epoch": 0.6, "grad_norm": 0.25026070785323234, "learning_rate": 0.00016715644492673452, "loss": 1.0925, "step": 6320 }, { "epoch": 0.6, "grad_norm": 0.28111670498694746, "learning_rate": 0.00016714472269582678, "loss": 1.1395, "step": 6321 }, { "epoch": 0.6, "grad_norm": 0.30210515943962146, "learning_rate": 0.00016713299878458012, "loss": 1.117, "step": 6322 }, { "epoch": 0.6, "grad_norm": 0.2717616248041951, "learning_rate": 0.00016712127319328803, "loss": 1.0637, "step": 6323 }, { "epoch": 0.61, "grad_norm": 0.24801900424048062, "learning_rate": 0.00016710954592224386, "loss": 1.1057, "step": 6324 }, { "epoch": 0.61, "grad_norm": 0.2898349287109403, "learning_rate": 0.00016709781697174113, "loss": 1.0904, "step": 6325 }, { "epoch": 0.61, "grad_norm": 0.2615926237179176, "learning_rate": 0.00016708608634207338, "loss": 1.013, "step": 6326 }, { "epoch": 0.61, "grad_norm": 0.25404922506608696, "learning_rate": 0.00016707435403353412, "loss": 1.0322, "step": 6327 }, { "epoch": 0.61, "grad_norm": 0.2993573106392043, "learning_rate": 0.000167062620046417, "loss": 1.1574, "step": 6328 }, { "epoch": 0.61, "grad_norm": 0.27922342474327827, "learning_rate": 0.0001670508843810157, "loss": 1.0711, "step": 6329 }, { "epoch": 0.61, "grad_norm": 0.3011854686091883, "learning_rate": 0.00016703914703762387, "loss": 1.054, "step": 6330 }, { "epoch": 0.61, "grad_norm": 0.25394776998572083, "learning_rate": 0.00016702740801653523, "loss": 1.0363, "step": 6331 }, { "epoch": 0.61, "grad_norm": 0.32790248402799294, "learning_rate": 0.00016701566731804358, "loss": 1.1621, "step": 6332 }, { "epoch": 0.61, "grad_norm": 0.2825346749177906, "learning_rate": 0.00016700392494244277, "loss": 0.9924, "step": 6333 }, { "epoch": 0.61, "grad_norm": 0.2615222834120971, "learning_rate": 0.0001669921808900266, "loss": 1.1129, "step": 6334 }, { "epoch": 0.61, "grad_norm": 0.2685877115229659, "learning_rate": 0.000166980435161089, "loss": 1.0624, "step": 6335 }, { "epoch": 0.61, "grad_norm": 0.2782305195102996, "learning_rate": 0.00016696868775592394, "loss": 1.1125, "step": 6336 }, { "epoch": 0.61, "grad_norm": 0.26429721749726304, "learning_rate": 0.00016695693867482535, "loss": 1.0898, "step": 6337 }, { "epoch": 0.61, "grad_norm": 0.3006719307055734, "learning_rate": 0.0001669451879180873, "loss": 1.1202, "step": 6338 }, { "epoch": 0.61, "grad_norm": 0.2546442290098055, "learning_rate": 0.00016693343548600386, "loss": 1.0205, "step": 6339 }, { "epoch": 0.61, "grad_norm": 0.3163145075653169, "learning_rate": 0.00016692168137886912, "loss": 1.0082, "step": 6340 }, { "epoch": 0.61, "grad_norm": 0.26767591471900337, "learning_rate": 0.00016690992559697726, "loss": 1.0872, "step": 6341 }, { "epoch": 0.61, "grad_norm": 0.3092574347023085, "learning_rate": 0.00016689816814062245, "loss": 1.0457, "step": 6342 }, { "epoch": 0.61, "grad_norm": 0.2637784366965473, "learning_rate": 0.00016688640901009894, "loss": 1.0477, "step": 6343 }, { "epoch": 0.61, "grad_norm": 0.30076256747451274, "learning_rate": 0.000166874648205701, "loss": 1.1079, "step": 6344 }, { "epoch": 0.61, "grad_norm": 0.2923545840530909, "learning_rate": 0.00016686288572772295, "loss": 1.2222, "step": 6345 }, { "epoch": 0.61, "grad_norm": 0.3153084026273102, "learning_rate": 0.00016685112157645916, "loss": 0.9666, "step": 6346 }, { "epoch": 0.61, "grad_norm": 0.2963713357046184, "learning_rate": 0.00016683935575220407, "loss": 1.0839, "step": 6347 }, { "epoch": 0.61, "grad_norm": 0.2737775027067931, "learning_rate": 0.00016682758825525208, "loss": 1.1275, "step": 6348 }, { "epoch": 0.61, "grad_norm": 0.2800942528032241, "learning_rate": 0.00016681581908589772, "loss": 0.9692, "step": 6349 }, { "epoch": 0.61, "grad_norm": 0.3109943800000079, "learning_rate": 0.00016680404824443546, "loss": 1.0421, "step": 6350 }, { "epoch": 0.61, "grad_norm": 0.27462352106623983, "learning_rate": 0.0001667922757311599, "loss": 1.1845, "step": 6351 }, { "epoch": 0.61, "grad_norm": 0.32025726456616344, "learning_rate": 0.0001667805015463657, "loss": 1.0136, "step": 6352 }, { "epoch": 0.61, "grad_norm": 0.301660874521527, "learning_rate": 0.00016676872569034744, "loss": 1.1865, "step": 6353 }, { "epoch": 0.61, "grad_norm": 0.29842518248408173, "learning_rate": 0.00016675694816339987, "loss": 1.2503, "step": 6354 }, { "epoch": 0.61, "grad_norm": 0.26028217660743824, "learning_rate": 0.00016674516896581773, "loss": 0.9257, "step": 6355 }, { "epoch": 0.61, "grad_norm": 0.290277459931021, "learning_rate": 0.00016673338809789577, "loss": 1.0985, "step": 6356 }, { "epoch": 0.61, "grad_norm": 0.27210815585379544, "learning_rate": 0.00016672160555992885, "loss": 1.1111, "step": 6357 }, { "epoch": 0.61, "grad_norm": 0.27064221588352777, "learning_rate": 0.0001667098213522118, "loss": 0.9628, "step": 6358 }, { "epoch": 0.61, "grad_norm": 0.31149953692608723, "learning_rate": 0.00016669803547503958, "loss": 1.0404, "step": 6359 }, { "epoch": 0.61, "grad_norm": 0.2859751349658579, "learning_rate": 0.0001666862479287071, "loss": 0.9679, "step": 6360 }, { "epoch": 0.61, "grad_norm": 0.29734676588988057, "learning_rate": 0.0001666744587135093, "loss": 1.1466, "step": 6361 }, { "epoch": 0.61, "grad_norm": 0.2695100397906395, "learning_rate": 0.00016666266782974133, "loss": 1.0106, "step": 6362 }, { "epoch": 0.61, "grad_norm": 0.33059150738359744, "learning_rate": 0.00016665087527769815, "loss": 1.1697, "step": 6363 }, { "epoch": 0.61, "grad_norm": 0.2783320081837106, "learning_rate": 0.00016663908105767495, "loss": 1.0822, "step": 6364 }, { "epoch": 0.61, "grad_norm": 0.31565170424334077, "learning_rate": 0.00016662728516996688, "loss": 1.1806, "step": 6365 }, { "epoch": 0.61, "grad_norm": 0.2709155225707643, "learning_rate": 0.0001666154876148691, "loss": 1.0718, "step": 6366 }, { "epoch": 0.61, "grad_norm": 0.2743956028640576, "learning_rate": 0.00016660368839267693, "loss": 1.1101, "step": 6367 }, { "epoch": 0.61, "grad_norm": 0.3083906678441462, "learning_rate": 0.00016659188750368554, "loss": 1.1813, "step": 6368 }, { "epoch": 0.61, "grad_norm": 0.27005425077077816, "learning_rate": 0.00016658008494819032, "loss": 1.01, "step": 6369 }, { "epoch": 0.61, "grad_norm": 0.3190770015107477, "learning_rate": 0.00016656828072648665, "loss": 1.0918, "step": 6370 }, { "epoch": 0.61, "grad_norm": 0.2786920758144284, "learning_rate": 0.00016655647483886988, "loss": 0.9536, "step": 6371 }, { "epoch": 0.61, "grad_norm": 0.28799801927513924, "learning_rate": 0.00016654466728563557, "loss": 1.1653, "step": 6372 }, { "epoch": 0.61, "grad_norm": 0.2740543474851488, "learning_rate": 0.00016653285806707908, "loss": 1.0493, "step": 6373 }, { "epoch": 0.61, "grad_norm": 0.283012370411115, "learning_rate": 0.000166521047183496, "loss": 1.0719, "step": 6374 }, { "epoch": 0.61, "grad_norm": 0.2866968650229277, "learning_rate": 0.00016650923463518196, "loss": 0.9907, "step": 6375 }, { "epoch": 0.61, "grad_norm": 0.2802944367538119, "learning_rate": 0.00016649742042243248, "loss": 1.0706, "step": 6376 }, { "epoch": 0.61, "grad_norm": 0.27154248970374756, "learning_rate": 0.00016648560454554328, "loss": 1.1223, "step": 6377 }, { "epoch": 0.61, "grad_norm": 0.2838775500172997, "learning_rate": 0.00016647378700481005, "loss": 1.0581, "step": 6378 }, { "epoch": 0.61, "grad_norm": 0.2920529073936615, "learning_rate": 0.00016646196780052848, "loss": 1.0481, "step": 6379 }, { "epoch": 0.61, "grad_norm": 0.2670726827629129, "learning_rate": 0.00016645014693299442, "loss": 1.0974, "step": 6380 }, { "epoch": 0.61, "grad_norm": 0.24730959097979163, "learning_rate": 0.00016643832440250367, "loss": 1.1209, "step": 6381 }, { "epoch": 0.61, "grad_norm": 0.2947114263680111, "learning_rate": 0.00016642650020935214, "loss": 0.9936, "step": 6382 }, { "epoch": 0.61, "grad_norm": 0.3233060078932617, "learning_rate": 0.00016641467435383564, "loss": 1.1597, "step": 6383 }, { "epoch": 0.61, "grad_norm": 0.26002181675498415, "learning_rate": 0.00016640284683625017, "loss": 0.9699, "step": 6384 }, { "epoch": 0.61, "grad_norm": 0.26273440181552443, "learning_rate": 0.0001663910176568918, "loss": 1.1475, "step": 6385 }, { "epoch": 0.61, "grad_norm": 0.24674523967677028, "learning_rate": 0.00016637918681605639, "loss": 1.0134, "step": 6386 }, { "epoch": 0.61, "grad_norm": 0.2618701917902359, "learning_rate": 0.00016636735431404019, "loss": 1.1473, "step": 6387 }, { "epoch": 0.61, "grad_norm": 0.28468586915692684, "learning_rate": 0.00016635552015113918, "loss": 1.0526, "step": 6388 }, { "epoch": 0.61, "grad_norm": 0.3028992395965799, "learning_rate": 0.0001663436843276496, "loss": 1.0536, "step": 6389 }, { "epoch": 0.61, "grad_norm": 0.2789358949175681, "learning_rate": 0.00016633184684386763, "loss": 1.1303, "step": 6390 }, { "epoch": 0.61, "grad_norm": 0.29126579487824283, "learning_rate": 0.00016632000770008947, "loss": 1.0763, "step": 6391 }, { "epoch": 0.61, "grad_norm": 0.27542318323495574, "learning_rate": 0.0001663081668966115, "loss": 1.1802, "step": 6392 }, { "epoch": 0.61, "grad_norm": 0.30182241732147397, "learning_rate": 0.00016629632443372993, "loss": 1.054, "step": 6393 }, { "epoch": 0.61, "grad_norm": 0.2819317111614903, "learning_rate": 0.0001662844803117412, "loss": 1.0247, "step": 6394 }, { "epoch": 0.61, "grad_norm": 0.2813661907175226, "learning_rate": 0.00016627263453094168, "loss": 1.153, "step": 6395 }, { "epoch": 0.61, "grad_norm": 0.3156855165232155, "learning_rate": 0.00016626078709162782, "loss": 1.1216, "step": 6396 }, { "epoch": 0.61, "grad_norm": 0.2762036811217373, "learning_rate": 0.00016624893799409613, "loss": 1.0578, "step": 6397 }, { "epoch": 0.61, "grad_norm": 0.25813174874980066, "learning_rate": 0.00016623708723864314, "loss": 1.0028, "step": 6398 }, { "epoch": 0.61, "grad_norm": 0.2547111721851036, "learning_rate": 0.0001662252348255654, "loss": 1.0322, "step": 6399 }, { "epoch": 0.61, "grad_norm": 0.2670666010121307, "learning_rate": 0.00016621338075515954, "loss": 1.0951, "step": 6400 }, { "epoch": 0.61, "grad_norm": 0.25941920255451006, "learning_rate": 0.00016620152502772224, "loss": 1.009, "step": 6401 }, { "epoch": 0.61, "grad_norm": 0.32953249149853675, "learning_rate": 0.00016618966764355016, "loss": 1.1824, "step": 6402 }, { "epoch": 0.61, "grad_norm": 0.3400097911397109, "learning_rate": 0.00016617780860294002, "loss": 1.0785, "step": 6403 }, { "epoch": 0.61, "grad_norm": 0.2606374235637558, "learning_rate": 0.00016616594790618865, "loss": 1.0066, "step": 6404 }, { "epoch": 0.61, "grad_norm": 0.28689829208173906, "learning_rate": 0.00016615408555359284, "loss": 1.1033, "step": 6405 }, { "epoch": 0.61, "grad_norm": 0.275071242493895, "learning_rate": 0.00016614222154544948, "loss": 1.0145, "step": 6406 }, { "epoch": 0.61, "grad_norm": 0.3036143858185991, "learning_rate": 0.00016613035588205542, "loss": 0.976, "step": 6407 }, { "epoch": 0.61, "grad_norm": 0.2739250339275323, "learning_rate": 0.00016611848856370768, "loss": 0.9842, "step": 6408 }, { "epoch": 0.61, "grad_norm": 0.269718907859172, "learning_rate": 0.0001661066195907032, "loss": 1.0573, "step": 6409 }, { "epoch": 0.61, "grad_norm": 0.27815112294798416, "learning_rate": 0.000166094748963339, "loss": 1.1291, "step": 6410 }, { "epoch": 0.61, "grad_norm": 0.2810699578192083, "learning_rate": 0.0001660828766819122, "loss": 0.9918, "step": 6411 }, { "epoch": 0.61, "grad_norm": 0.25506946682445086, "learning_rate": 0.00016607100274671982, "loss": 1.1449, "step": 6412 }, { "epoch": 0.61, "grad_norm": 0.2890524163750954, "learning_rate": 0.00016605912715805915, "loss": 1.124, "step": 6413 }, { "epoch": 0.61, "grad_norm": 0.29021337271256664, "learning_rate": 0.00016604724991622726, "loss": 1.0194, "step": 6414 }, { "epoch": 0.61, "grad_norm": 0.27539899144845825, "learning_rate": 0.00016603537102152145, "loss": 1.077, "step": 6415 }, { "epoch": 0.61, "grad_norm": 0.29031381020521185, "learning_rate": 0.00016602349047423895, "loss": 1.068, "step": 6416 }, { "epoch": 0.61, "grad_norm": 0.259955978828572, "learning_rate": 0.00016601160827467713, "loss": 0.9664, "step": 6417 }, { "epoch": 0.61, "grad_norm": 0.29036829398239994, "learning_rate": 0.00016599972442313333, "loss": 1.0306, "step": 6418 }, { "epoch": 0.61, "grad_norm": 0.2771649514160956, "learning_rate": 0.00016598783891990496, "loss": 1.0726, "step": 6419 }, { "epoch": 0.61, "grad_norm": 0.2597924047332126, "learning_rate": 0.00016597595176528942, "loss": 1.0015, "step": 6420 }, { "epoch": 0.61, "grad_norm": 0.291533841923736, "learning_rate": 0.00016596406295958421, "loss": 1.0385, "step": 6421 }, { "epoch": 0.61, "grad_norm": 0.27196336627994283, "learning_rate": 0.0001659521725030869, "loss": 1.0915, "step": 6422 }, { "epoch": 0.61, "grad_norm": 0.3091516771574349, "learning_rate": 0.00016594028039609504, "loss": 1.1498, "step": 6423 }, { "epoch": 0.61, "grad_norm": 0.269385742381281, "learning_rate": 0.00016592838663890617, "loss": 1.1685, "step": 6424 }, { "epoch": 0.61, "grad_norm": 0.31387068153422903, "learning_rate": 0.00016591649123181803, "loss": 1.0578, "step": 6425 }, { "epoch": 0.61, "grad_norm": 0.2715541694948404, "learning_rate": 0.00016590459417512824, "loss": 1.0411, "step": 6426 }, { "epoch": 0.61, "grad_norm": 0.31668645833456616, "learning_rate": 0.00016589269546913457, "loss": 1.045, "step": 6427 }, { "epoch": 0.61, "grad_norm": 0.26537674929552535, "learning_rate": 0.0001658807951141348, "loss": 0.9741, "step": 6428 }, { "epoch": 0.62, "grad_norm": 0.30617980217717927, "learning_rate": 0.00016586889311042674, "loss": 1.0323, "step": 6429 }, { "epoch": 0.62, "grad_norm": 0.2893643055407271, "learning_rate": 0.00016585698945830818, "loss": 0.9841, "step": 6430 }, { "epoch": 0.62, "grad_norm": 0.26428036894894746, "learning_rate": 0.00016584508415807712, "loss": 1.0233, "step": 6431 }, { "epoch": 0.62, "grad_norm": 0.25987613015099514, "learning_rate": 0.00016583317721003142, "loss": 0.9113, "step": 6432 }, { "epoch": 0.62, "grad_norm": 0.2676269631291612, "learning_rate": 0.0001658212686144691, "loss": 1.0322, "step": 6433 }, { "epoch": 0.62, "grad_norm": 0.27792570272364814, "learning_rate": 0.00016580935837168817, "loss": 1.1305, "step": 6434 }, { "epoch": 0.62, "grad_norm": 0.3054138054002042, "learning_rate": 0.00016579744648198666, "loss": 0.9909, "step": 6435 }, { "epoch": 0.62, "grad_norm": 0.28755696638880873, "learning_rate": 0.0001657855329456627, "loss": 1.1987, "step": 6436 }, { "epoch": 0.62, "grad_norm": 0.2937260358743029, "learning_rate": 0.0001657736177630145, "loss": 1.175, "step": 6437 }, { "epoch": 0.62, "grad_norm": 0.25543914091501174, "learning_rate": 0.00016576170093434008, "loss": 0.9054, "step": 6438 }, { "epoch": 0.62, "grad_norm": 0.2358502828421239, "learning_rate": 0.00016574978245993783, "loss": 1.1184, "step": 6439 }, { "epoch": 0.62, "grad_norm": 0.32159479122906715, "learning_rate": 0.00016573786234010593, "loss": 1.0697, "step": 6440 }, { "epoch": 0.62, "grad_norm": 0.29597420637835464, "learning_rate": 0.0001657259405751427, "loss": 1.0147, "step": 6441 }, { "epoch": 0.62, "grad_norm": 0.27502750185112845, "learning_rate": 0.0001657140171653465, "loss": 1.1519, "step": 6442 }, { "epoch": 0.62, "grad_norm": 0.28880378692907466, "learning_rate": 0.00016570209211101578, "loss": 1.0251, "step": 6443 }, { "epoch": 0.62, "grad_norm": 0.29201502579906263, "learning_rate": 0.00016569016541244884, "loss": 1.1337, "step": 6444 }, { "epoch": 0.62, "grad_norm": 0.2399540511362018, "learning_rate": 0.00016567823706994426, "loss": 1.0998, "step": 6445 }, { "epoch": 0.62, "grad_norm": 0.3268134405014831, "learning_rate": 0.00016566630708380052, "loss": 0.9888, "step": 6446 }, { "epoch": 0.62, "grad_norm": 0.3103957574422285, "learning_rate": 0.00016565437545431618, "loss": 1.1474, "step": 6447 }, { "epoch": 0.62, "grad_norm": 0.2718259809863342, "learning_rate": 0.0001656424421817898, "loss": 0.9704, "step": 6448 }, { "epoch": 0.62, "grad_norm": 0.27353989936463624, "learning_rate": 0.00016563050726652007, "loss": 1.0992, "step": 6449 }, { "epoch": 0.62, "grad_norm": 0.2856249195760054, "learning_rate": 0.00016561857070880565, "loss": 1.1137, "step": 6450 }, { "epoch": 0.62, "grad_norm": 0.273725036502166, "learning_rate": 0.00016560663250894526, "loss": 0.9277, "step": 6451 }, { "epoch": 0.62, "grad_norm": 0.2763856735936746, "learning_rate": 0.00016559469266723767, "loss": 1.0204, "step": 6452 }, { "epoch": 0.62, "grad_norm": 0.28085668794591295, "learning_rate": 0.00016558275118398164, "loss": 0.9921, "step": 6453 }, { "epoch": 0.62, "grad_norm": 0.2735774703202355, "learning_rate": 0.00016557080805947605, "loss": 0.9692, "step": 6454 }, { "epoch": 0.62, "grad_norm": 0.26616410789129574, "learning_rate": 0.0001655588632940198, "loss": 0.9541, "step": 6455 }, { "epoch": 0.62, "grad_norm": 0.2890311486765243, "learning_rate": 0.0001655469168879118, "loss": 1.0578, "step": 6456 }, { "epoch": 0.62, "grad_norm": 0.29609104767351785, "learning_rate": 0.00016553496884145097, "loss": 1.0883, "step": 6457 }, { "epoch": 0.62, "grad_norm": 0.2608637836134015, "learning_rate": 0.0001655230191549364, "loss": 1.0304, "step": 6458 }, { "epoch": 0.62, "grad_norm": 0.27959926336448077, "learning_rate": 0.00016551106782866705, "loss": 1.0061, "step": 6459 }, { "epoch": 0.62, "grad_norm": 0.2771253997843709, "learning_rate": 0.0001654991148629421, "loss": 1.1194, "step": 6460 }, { "epoch": 0.62, "grad_norm": 0.28449814950864316, "learning_rate": 0.00016548716025806062, "loss": 1.088, "step": 6461 }, { "epoch": 0.62, "grad_norm": 0.2691654812600947, "learning_rate": 0.0001654752040143218, "loss": 1.0968, "step": 6462 }, { "epoch": 0.62, "grad_norm": 0.31954023097003986, "learning_rate": 0.00016546324613202483, "loss": 0.9794, "step": 6463 }, { "epoch": 0.62, "grad_norm": 0.2649303795932421, "learning_rate": 0.000165451286611469, "loss": 1.0693, "step": 6464 }, { "epoch": 0.62, "grad_norm": 0.2630512812762983, "learning_rate": 0.0001654393254529536, "loss": 1.0332, "step": 6465 }, { "epoch": 0.62, "grad_norm": 0.2938590102565347, "learning_rate": 0.00016542736265677795, "loss": 1.1377, "step": 6466 }, { "epoch": 0.62, "grad_norm": 0.29423857954744015, "learning_rate": 0.0001654153982232414, "loss": 1.0742, "step": 6467 }, { "epoch": 0.62, "grad_norm": 0.2770519185899091, "learning_rate": 0.00016540343215264342, "loss": 0.9763, "step": 6468 }, { "epoch": 0.62, "grad_norm": 0.27149015096394086, "learning_rate": 0.00016539146444528345, "loss": 1.2458, "step": 6469 }, { "epoch": 0.62, "grad_norm": 0.2723835734113916, "learning_rate": 0.00016537949510146097, "loss": 1.0588, "step": 6470 }, { "epoch": 0.62, "grad_norm": 0.2860060696336331, "learning_rate": 0.00016536752412147555, "loss": 0.9471, "step": 6471 }, { "epoch": 0.62, "grad_norm": 0.256259821327159, "learning_rate": 0.0001653555515056268, "loss": 1.0662, "step": 6472 }, { "epoch": 0.62, "grad_norm": 0.26590917435909617, "learning_rate": 0.00016534357725421422, "loss": 0.9938, "step": 6473 }, { "epoch": 0.62, "grad_norm": 0.30036253247255307, "learning_rate": 0.0001653316013675376, "loss": 0.9861, "step": 6474 }, { "epoch": 0.62, "grad_norm": 0.24447569581162235, "learning_rate": 0.00016531962384589655, "loss": 1.0813, "step": 6475 }, { "epoch": 0.62, "grad_norm": 0.31129076026873814, "learning_rate": 0.0001653076446895909, "loss": 1.0898, "step": 6476 }, { "epoch": 0.62, "grad_norm": 0.2755133042122456, "learning_rate": 0.00016529566389892039, "loss": 1.0924, "step": 6477 }, { "epoch": 0.62, "grad_norm": 0.24784520601298288, "learning_rate": 0.00016528368147418485, "loss": 1.2024, "step": 6478 }, { "epoch": 0.62, "grad_norm": 0.2869637235166629, "learning_rate": 0.00016527169741568416, "loss": 1.0971, "step": 6479 }, { "epoch": 0.62, "grad_norm": 0.25871213333736603, "learning_rate": 0.00016525971172371822, "loss": 0.993, "step": 6480 }, { "epoch": 0.62, "grad_norm": 0.2604087934206153, "learning_rate": 0.00016524772439858694, "loss": 1.1067, "step": 6481 }, { "epoch": 0.62, "grad_norm": 0.2816319137305283, "learning_rate": 0.0001652357354405904, "loss": 1.0467, "step": 6482 }, { "epoch": 0.62, "grad_norm": 0.27702751178720125, "learning_rate": 0.0001652237448500286, "loss": 1.0449, "step": 6483 }, { "epoch": 0.62, "grad_norm": 0.27403004840211326, "learning_rate": 0.00016521175262720154, "loss": 1.1382, "step": 6484 }, { "epoch": 0.62, "grad_norm": 0.27012660723215115, "learning_rate": 0.00016519975877240942, "loss": 1.1069, "step": 6485 }, { "epoch": 0.62, "grad_norm": 0.262756289576385, "learning_rate": 0.00016518776328595234, "loss": 1.0257, "step": 6486 }, { "epoch": 0.62, "grad_norm": 0.2898435765096172, "learning_rate": 0.0001651757661681305, "loss": 1.2361, "step": 6487 }, { "epoch": 0.62, "grad_norm": 0.2778059074062683, "learning_rate": 0.0001651637674192442, "loss": 1.116, "step": 6488 }, { "epoch": 0.62, "grad_norm": 0.29747157960661386, "learning_rate": 0.00016515176703959364, "loss": 1.0449, "step": 6489 }, { "epoch": 0.62, "grad_norm": 0.2758337700493432, "learning_rate": 0.00016513976502947913, "loss": 0.9708, "step": 6490 }, { "epoch": 0.62, "grad_norm": 0.29558292743334763, "learning_rate": 0.00016512776138920108, "loss": 1.0372, "step": 6491 }, { "epoch": 0.62, "grad_norm": 0.26433384453275566, "learning_rate": 0.0001651157561190599, "loss": 1.1413, "step": 6492 }, { "epoch": 0.62, "grad_norm": 0.3164936697696587, "learning_rate": 0.00016510374921935598, "loss": 1.1305, "step": 6493 }, { "epoch": 0.62, "grad_norm": 0.2680448222970725, "learning_rate": 0.00016509174069038985, "loss": 1.2003, "step": 6494 }, { "epoch": 0.62, "grad_norm": 0.26701918502526756, "learning_rate": 0.00016507973053246197, "loss": 0.9809, "step": 6495 }, { "epoch": 0.62, "grad_norm": 0.28698630137463055, "learning_rate": 0.00016506771874587296, "loss": 1.1306, "step": 6496 }, { "epoch": 0.62, "grad_norm": 0.2861081221048606, "learning_rate": 0.00016505570533092333, "loss": 1.2112, "step": 6497 }, { "epoch": 0.62, "grad_norm": 0.27462328764365, "learning_rate": 0.00016504369028791382, "loss": 1.0667, "step": 6498 }, { "epoch": 0.62, "grad_norm": 0.2542616461897349, "learning_rate": 0.0001650316736171451, "loss": 1.1647, "step": 6499 }, { "epoch": 0.62, "grad_norm": 0.268648077124173, "learning_rate": 0.00016501965531891786, "loss": 1.023, "step": 6500 }, { "epoch": 0.62, "grad_norm": 0.30932903309172555, "learning_rate": 0.0001650076353935329, "loss": 1.0771, "step": 6501 }, { "epoch": 0.62, "grad_norm": 0.2711465953776904, "learning_rate": 0.000164995613841291, "loss": 1.023, "step": 6502 }, { "epoch": 0.62, "grad_norm": 0.2488690322666522, "learning_rate": 0.000164983590662493, "loss": 1.1685, "step": 6503 }, { "epoch": 0.62, "grad_norm": 0.2447593866202204, "learning_rate": 0.00016497156585743982, "loss": 1.0553, "step": 6504 }, { "epoch": 0.62, "grad_norm": 0.28996537913588744, "learning_rate": 0.00016495953942643237, "loss": 1.0453, "step": 6505 }, { "epoch": 0.62, "grad_norm": 0.3100091579089353, "learning_rate": 0.00016494751136977165, "loss": 1.0363, "step": 6506 }, { "epoch": 0.62, "grad_norm": 0.26438432513334514, "learning_rate": 0.0001649354816877586, "loss": 1.0239, "step": 6507 }, { "epoch": 0.62, "grad_norm": 0.3136841961736258, "learning_rate": 0.0001649234503806943, "loss": 1.1726, "step": 6508 }, { "epoch": 0.62, "grad_norm": 0.2793775958515165, "learning_rate": 0.0001649114174488799, "loss": 1.0953, "step": 6509 }, { "epoch": 0.62, "grad_norm": 0.2991111217534248, "learning_rate": 0.0001648993828926164, "loss": 0.9635, "step": 6510 }, { "epoch": 0.62, "grad_norm": 0.33123211260741653, "learning_rate": 0.00016488734671220512, "loss": 1.09, "step": 6511 }, { "epoch": 0.62, "grad_norm": 0.3071684112575216, "learning_rate": 0.0001648753089079472, "loss": 1.112, "step": 6512 }, { "epoch": 0.62, "grad_norm": 0.2919753257761195, "learning_rate": 0.0001648632694801439, "loss": 1.2142, "step": 6513 }, { "epoch": 0.62, "grad_norm": 0.3047462341213951, "learning_rate": 0.00016485122842909653, "loss": 1.0747, "step": 6514 }, { "epoch": 0.62, "grad_norm": 0.2561697275636995, "learning_rate": 0.00016483918575510638, "loss": 1.0719, "step": 6515 }, { "epoch": 0.62, "grad_norm": 0.29441141071756, "learning_rate": 0.00016482714145847488, "loss": 1.1003, "step": 6516 }, { "epoch": 0.62, "grad_norm": 0.2507495981732362, "learning_rate": 0.0001648150955395034, "loss": 1.0107, "step": 6517 }, { "epoch": 0.62, "grad_norm": 0.2745896765591778, "learning_rate": 0.00016480304799849343, "loss": 1.0221, "step": 6518 }, { "epoch": 0.62, "grad_norm": 0.2956074131649605, "learning_rate": 0.00016479099883574648, "loss": 1.0595, "step": 6519 }, { "epoch": 0.62, "grad_norm": 0.2755124978473152, "learning_rate": 0.00016477894805156404, "loss": 0.9841, "step": 6520 }, { "epoch": 0.62, "grad_norm": 0.2653332774638079, "learning_rate": 0.00016476689564624773, "loss": 1.1523, "step": 6521 }, { "epoch": 0.62, "grad_norm": 0.28336035642857826, "learning_rate": 0.00016475484162009913, "loss": 1.0458, "step": 6522 }, { "epoch": 0.62, "grad_norm": 0.27133324752026045, "learning_rate": 0.00016474278597341995, "loss": 1.0068, "step": 6523 }, { "epoch": 0.62, "grad_norm": 0.254789334192467, "learning_rate": 0.00016473072870651183, "loss": 1.0911, "step": 6524 }, { "epoch": 0.62, "grad_norm": 0.2645683646629639, "learning_rate": 0.00016471866981967654, "loss": 1.0735, "step": 6525 }, { "epoch": 0.62, "grad_norm": 0.29514733454194775, "learning_rate": 0.0001647066093132159, "loss": 1.1258, "step": 6526 }, { "epoch": 0.62, "grad_norm": 0.30773726775104837, "learning_rate": 0.00016469454718743166, "loss": 1.2085, "step": 6527 }, { "epoch": 0.62, "grad_norm": 0.28753719268726, "learning_rate": 0.00016468248344262575, "loss": 1.1034, "step": 6528 }, { "epoch": 0.62, "grad_norm": 0.28935088461393205, "learning_rate": 0.00016467041807910002, "loss": 0.9435, "step": 6529 }, { "epoch": 0.62, "grad_norm": 0.2910181115331406, "learning_rate": 0.00016465835109715643, "loss": 0.8991, "step": 6530 }, { "epoch": 0.62, "grad_norm": 0.26222425373411046, "learning_rate": 0.00016464628249709699, "loss": 1.1253, "step": 6531 }, { "epoch": 0.62, "grad_norm": 0.24656232690052832, "learning_rate": 0.00016463421227922367, "loss": 0.979, "step": 6532 }, { "epoch": 0.63, "grad_norm": 0.2997042750535666, "learning_rate": 0.0001646221404438386, "loss": 1.0947, "step": 6533 }, { "epoch": 0.63, "grad_norm": 0.26285239590686826, "learning_rate": 0.0001646100669912438, "loss": 0.9987, "step": 6534 }, { "epoch": 0.63, "grad_norm": 0.2810386117270015, "learning_rate": 0.00016459799192174152, "loss": 1.08, "step": 6535 }, { "epoch": 0.63, "grad_norm": 0.2899490777892802, "learning_rate": 0.0001645859152356339, "loss": 1.0636, "step": 6536 }, { "epoch": 0.63, "grad_norm": 0.32728289531293414, "learning_rate": 0.00016457383693322314, "loss": 1.1029, "step": 6537 }, { "epoch": 0.63, "grad_norm": 0.2873444747610267, "learning_rate": 0.0001645617570148115, "loss": 1.1222, "step": 6538 }, { "epoch": 0.63, "grad_norm": 0.26266152625659334, "learning_rate": 0.00016454967548070135, "loss": 1.1339, "step": 6539 }, { "epoch": 0.63, "grad_norm": 0.2803672145152364, "learning_rate": 0.00016453759233119503, "loss": 1.094, "step": 6540 }, { "epoch": 0.63, "grad_norm": 0.27084575822983264, "learning_rate": 0.00016452550756659482, "loss": 0.937, "step": 6541 }, { "epoch": 0.63, "grad_norm": 0.2701911111109992, "learning_rate": 0.00016451342118720328, "loss": 1.029, "step": 6542 }, { "epoch": 0.63, "grad_norm": 0.2716711794064477, "learning_rate": 0.00016450133319332282, "loss": 0.8668, "step": 6543 }, { "epoch": 0.63, "grad_norm": 0.2794432093629184, "learning_rate": 0.00016448924358525595, "loss": 1.0109, "step": 6544 }, { "epoch": 0.63, "grad_norm": 0.28134829606945316, "learning_rate": 0.00016447715236330524, "loss": 1.191, "step": 6545 }, { "epoch": 0.63, "grad_norm": 0.22932974730919156, "learning_rate": 0.0001644650595277733, "loss": 1.0067, "step": 6546 }, { "epoch": 0.63, "grad_norm": 0.2627137036762324, "learning_rate": 0.00016445296507896267, "loss": 1.0458, "step": 6547 }, { "epoch": 0.63, "grad_norm": 0.31351571153357294, "learning_rate": 0.00016444086901717614, "loss": 1.0699, "step": 6548 }, { "epoch": 0.63, "grad_norm": 0.3113263872681726, "learning_rate": 0.00016442877134271633, "loss": 1.1383, "step": 6549 }, { "epoch": 0.63, "grad_norm": 0.24271597078080498, "learning_rate": 0.00016441667205588603, "loss": 0.9184, "step": 6550 }, { "epoch": 0.63, "grad_norm": 0.27790009137065697, "learning_rate": 0.00016440457115698802, "loss": 1.2613, "step": 6551 }, { "epoch": 0.63, "grad_norm": 0.27765509036577246, "learning_rate": 0.0001643924686463252, "loss": 1.1003, "step": 6552 }, { "epoch": 0.63, "grad_norm": 0.29484287527906927, "learning_rate": 0.00016438036452420032, "loss": 1.0934, "step": 6553 }, { "epoch": 0.63, "grad_norm": 0.29018553943805164, "learning_rate": 0.00016436825879091635, "loss": 1.0676, "step": 6554 }, { "epoch": 0.63, "grad_norm": 0.28130026896984306, "learning_rate": 0.00016435615144677629, "loss": 1.1587, "step": 6555 }, { "epoch": 0.63, "grad_norm": 0.2878221364125203, "learning_rate": 0.00016434404249208306, "loss": 1.0381, "step": 6556 }, { "epoch": 0.63, "grad_norm": 0.2777652669244412, "learning_rate": 0.00016433193192713974, "loss": 1.0429, "step": 6557 }, { "epoch": 0.63, "grad_norm": 0.26453569833758483, "learning_rate": 0.00016431981975224938, "loss": 1.1111, "step": 6558 }, { "epoch": 0.63, "grad_norm": 0.28622435501200705, "learning_rate": 0.00016430770596771512, "loss": 1.0154, "step": 6559 }, { "epoch": 0.63, "grad_norm": 0.2847988417285998, "learning_rate": 0.00016429559057384011, "loss": 0.9996, "step": 6560 }, { "epoch": 0.63, "grad_norm": 0.26384397601020537, "learning_rate": 0.00016428347357092755, "loss": 0.9437, "step": 6561 }, { "epoch": 0.63, "grad_norm": 0.2775557302168176, "learning_rate": 0.00016427135495928062, "loss": 1.1182, "step": 6562 }, { "epoch": 0.63, "grad_norm": 0.30234120956824934, "learning_rate": 0.00016425923473920267, "loss": 1.0162, "step": 6563 }, { "epoch": 0.63, "grad_norm": 0.2667444994608358, "learning_rate": 0.000164247112910997, "loss": 0.9534, "step": 6564 }, { "epoch": 0.63, "grad_norm": 0.295387882239055, "learning_rate": 0.0001642349894749669, "loss": 1.1292, "step": 6565 }, { "epoch": 0.63, "grad_norm": 0.25364552624174114, "learning_rate": 0.00016422286443141585, "loss": 1.0281, "step": 6566 }, { "epoch": 0.63, "grad_norm": 0.30870163058659145, "learning_rate": 0.00016421073778064726, "loss": 0.9925, "step": 6567 }, { "epoch": 0.63, "grad_norm": 0.31418016446029906, "learning_rate": 0.0001641986095229646, "loss": 1.0183, "step": 6568 }, { "epoch": 0.63, "grad_norm": 0.30568890062391635, "learning_rate": 0.0001641864796586714, "loss": 1.1813, "step": 6569 }, { "epoch": 0.63, "grad_norm": 0.28403262150412845, "learning_rate": 0.00016417434818807118, "loss": 1.1134, "step": 6570 }, { "epoch": 0.63, "grad_norm": 0.26397330303772965, "learning_rate": 0.00016416221511146757, "loss": 1.113, "step": 6571 }, { "epoch": 0.63, "grad_norm": 0.2863559947419068, "learning_rate": 0.0001641500804291642, "loss": 1.1201, "step": 6572 }, { "epoch": 0.63, "grad_norm": 0.24406593687365424, "learning_rate": 0.00016413794414146476, "loss": 1.0542, "step": 6573 }, { "epoch": 0.63, "grad_norm": 0.27139228105504454, "learning_rate": 0.00016412580624867299, "loss": 1.012, "step": 6574 }, { "epoch": 0.63, "grad_norm": 0.27171095999953015, "learning_rate": 0.00016411366675109256, "loss": 0.9942, "step": 6575 }, { "epoch": 0.63, "grad_norm": 0.2692825756718052, "learning_rate": 0.00016410152564902734, "loss": 1.0619, "step": 6576 }, { "epoch": 0.63, "grad_norm": 0.2694897408553471, "learning_rate": 0.00016408938294278118, "loss": 1.1153, "step": 6577 }, { "epoch": 0.63, "grad_norm": 0.2946068069084391, "learning_rate": 0.0001640772386326579, "loss": 0.9959, "step": 6578 }, { "epoch": 0.63, "grad_norm": 0.2732041052214766, "learning_rate": 0.0001640650927189615, "loss": 1.1325, "step": 6579 }, { "epoch": 0.63, "grad_norm": 0.25828674961643106, "learning_rate": 0.00016405294520199586, "loss": 1.0311, "step": 6580 }, { "epoch": 0.63, "grad_norm": 0.2528935519865577, "learning_rate": 0.000164040796082065, "loss": 1.0198, "step": 6581 }, { "epoch": 0.63, "grad_norm": 0.24788820972140796, "learning_rate": 0.00016402864535947298, "loss": 1.0777, "step": 6582 }, { "epoch": 0.63, "grad_norm": 0.27027895661109375, "learning_rate": 0.00016401649303452386, "loss": 0.9911, "step": 6583 }, { "epoch": 0.63, "grad_norm": 0.29776619082303163, "learning_rate": 0.0001640043391075218, "loss": 0.995, "step": 6584 }, { "epoch": 0.63, "grad_norm": 0.25164405759102015, "learning_rate": 0.0001639921835787709, "loss": 1.0057, "step": 6585 }, { "epoch": 0.63, "grad_norm": 0.28442402581138315, "learning_rate": 0.00016398002644857538, "loss": 1.0983, "step": 6586 }, { "epoch": 0.63, "grad_norm": 0.27597236003640674, "learning_rate": 0.00016396786771723953, "loss": 1.0816, "step": 6587 }, { "epoch": 0.63, "grad_norm": 0.3036495741618543, "learning_rate": 0.00016395570738506754, "loss": 1.0313, "step": 6588 }, { "epoch": 0.63, "grad_norm": 0.2983086346865117, "learning_rate": 0.0001639435454523638, "loss": 1.1382, "step": 6589 }, { "epoch": 0.63, "grad_norm": 0.24415345911957764, "learning_rate": 0.00016393138191943266, "loss": 0.9764, "step": 6590 }, { "epoch": 0.63, "grad_norm": 0.26401568236315054, "learning_rate": 0.0001639192167865785, "loss": 1.0753, "step": 6591 }, { "epoch": 0.63, "grad_norm": 0.30616078316140916, "learning_rate": 0.00016390705005410577, "loss": 1.1654, "step": 6592 }, { "epoch": 0.63, "grad_norm": 0.268584602967286, "learning_rate": 0.00016389488172231895, "loss": 1.1281, "step": 6593 }, { "epoch": 0.63, "grad_norm": 0.3042044038945393, "learning_rate": 0.00016388271179152255, "loss": 1.1451, "step": 6594 }, { "epoch": 0.63, "grad_norm": 0.30865017852012816, "learning_rate": 0.00016387054026202114, "loss": 1.0497, "step": 6595 }, { "epoch": 0.63, "grad_norm": 0.2981587654841934, "learning_rate": 0.00016385836713411932, "loss": 1.0117, "step": 6596 }, { "epoch": 0.63, "grad_norm": 0.30634610666151335, "learning_rate": 0.00016384619240812173, "loss": 1.0624, "step": 6597 }, { "epoch": 0.63, "grad_norm": 0.31330552508402865, "learning_rate": 0.00016383401608433305, "loss": 1.0021, "step": 6598 }, { "epoch": 0.63, "grad_norm": 0.2746478862766659, "learning_rate": 0.00016382183816305798, "loss": 1.092, "step": 6599 }, { "epoch": 0.63, "grad_norm": 0.31666878049663144, "learning_rate": 0.00016380965864460135, "loss": 1.1224, "step": 6600 }, { "epoch": 0.63, "grad_norm": 0.2805806328462776, "learning_rate": 0.00016379747752926787, "loss": 1.0326, "step": 6601 }, { "epoch": 0.63, "grad_norm": 0.27859538132326334, "learning_rate": 0.00016378529481736242, "loss": 1.0901, "step": 6602 }, { "epoch": 0.63, "grad_norm": 0.27971190411521635, "learning_rate": 0.00016377311050918989, "loss": 1.0349, "step": 6603 }, { "epoch": 0.63, "grad_norm": 0.35928083318694953, "learning_rate": 0.0001637609246050552, "loss": 1.0325, "step": 6604 }, { "epoch": 0.63, "grad_norm": 0.2791809789319585, "learning_rate": 0.00016374873710526327, "loss": 1.101, "step": 6605 }, { "epoch": 0.63, "grad_norm": 0.25414723944354833, "learning_rate": 0.00016373654801011913, "loss": 0.9845, "step": 6606 }, { "epoch": 0.63, "grad_norm": 0.29501863717394433, "learning_rate": 0.00016372435731992784, "loss": 1.0297, "step": 6607 }, { "epoch": 0.63, "grad_norm": 0.27739025744682017, "learning_rate": 0.00016371216503499443, "loss": 1.1649, "step": 6608 }, { "epoch": 0.63, "grad_norm": 0.27992773939480853, "learning_rate": 0.0001636999711556241, "loss": 1.1623, "step": 6609 }, { "epoch": 0.63, "grad_norm": 0.256055609384181, "learning_rate": 0.00016368777568212192, "loss": 1.0349, "step": 6610 }, { "epoch": 0.63, "grad_norm": 0.27352283442582775, "learning_rate": 0.00016367557861479316, "loss": 1.081, "step": 6611 }, { "epoch": 0.63, "grad_norm": 0.2878898287968566, "learning_rate": 0.00016366337995394296, "loss": 1.0915, "step": 6612 }, { "epoch": 0.63, "grad_norm": 0.29458815084779255, "learning_rate": 0.0001636511796998767, "loss": 1.0093, "step": 6613 }, { "epoch": 0.63, "grad_norm": 0.2833302474424508, "learning_rate": 0.0001636389778528997, "loss": 1.1375, "step": 6614 }, { "epoch": 0.63, "grad_norm": 0.2827351352279611, "learning_rate": 0.00016362677441331727, "loss": 1.133, "step": 6615 }, { "epoch": 0.63, "grad_norm": 0.2776267400325992, "learning_rate": 0.0001636145693814348, "loss": 1.082, "step": 6616 }, { "epoch": 0.63, "grad_norm": 0.29210767304951707, "learning_rate": 0.00016360236275755777, "loss": 1.0961, "step": 6617 }, { "epoch": 0.63, "grad_norm": 0.3111465892344082, "learning_rate": 0.00016359015454199161, "loss": 1.0517, "step": 6618 }, { "epoch": 0.63, "grad_norm": 0.30230280360893014, "learning_rate": 0.0001635779447350419, "loss": 1.0257, "step": 6619 }, { "epoch": 0.63, "grad_norm": 0.2640103618422073, "learning_rate": 0.00016356573333701414, "loss": 0.9745, "step": 6620 }, { "epoch": 0.63, "grad_norm": 0.27301899027846943, "learning_rate": 0.00016355352034821396, "loss": 0.974, "step": 6621 }, { "epoch": 0.63, "grad_norm": 0.2738570363795273, "learning_rate": 0.00016354130576894698, "loss": 0.995, "step": 6622 }, { "epoch": 0.63, "grad_norm": 0.29413026629702177, "learning_rate": 0.00016352908959951892, "loss": 1.16, "step": 6623 }, { "epoch": 0.63, "grad_norm": 0.30276135434523793, "learning_rate": 0.00016351687184023547, "loss": 1.0801, "step": 6624 }, { "epoch": 0.63, "grad_norm": 0.26212091458025155, "learning_rate": 0.00016350465249140235, "loss": 1.1354, "step": 6625 }, { "epoch": 0.63, "grad_norm": 0.295200448114549, "learning_rate": 0.0001634924315533254, "loss": 1.1294, "step": 6626 }, { "epoch": 0.63, "grad_norm": 0.2719331912386029, "learning_rate": 0.00016348020902631047, "loss": 1.0, "step": 6627 }, { "epoch": 0.63, "grad_norm": 0.2801548078187952, "learning_rate": 0.0001634679849106634, "loss": 1.0472, "step": 6628 }, { "epoch": 0.63, "grad_norm": 0.28890264685867756, "learning_rate": 0.0001634557592066901, "loss": 1.159, "step": 6629 }, { "epoch": 0.63, "grad_norm": 0.2972148378840613, "learning_rate": 0.00016344353191469657, "loss": 1.0649, "step": 6630 }, { "epoch": 0.63, "grad_norm": 0.30972438758134824, "learning_rate": 0.00016343130303498877, "loss": 1.1313, "step": 6631 }, { "epoch": 0.63, "grad_norm": 0.2787430037836057, "learning_rate": 0.00016341907256787273, "loss": 1.0461, "step": 6632 }, { "epoch": 0.63, "grad_norm": 0.2509205560032734, "learning_rate": 0.00016340684051365458, "loss": 1.1465, "step": 6633 }, { "epoch": 0.63, "grad_norm": 0.3371469012042158, "learning_rate": 0.00016339460687264039, "loss": 0.9917, "step": 6634 }, { "epoch": 0.63, "grad_norm": 0.28031780574967835, "learning_rate": 0.0001633823716451363, "loss": 1.0859, "step": 6635 }, { "epoch": 0.63, "grad_norm": 0.32014706377626473, "learning_rate": 0.00016337013483144853, "loss": 1.0125, "step": 6636 }, { "epoch": 0.63, "grad_norm": 0.28048230557063836, "learning_rate": 0.00016335789643188333, "loss": 1.119, "step": 6637 }, { "epoch": 0.64, "grad_norm": 0.27484347493922334, "learning_rate": 0.0001633456564467469, "loss": 1.001, "step": 6638 }, { "epoch": 0.64, "grad_norm": 0.26589912392685283, "learning_rate": 0.00016333341487634567, "loss": 1.2356, "step": 6639 }, { "epoch": 0.64, "grad_norm": 0.3261860350977622, "learning_rate": 0.0001633211717209859, "loss": 1.1768, "step": 6640 }, { "epoch": 0.64, "grad_norm": 0.30705986319442513, "learning_rate": 0.000163308926980974, "loss": 1.072, "step": 6641 }, { "epoch": 0.64, "grad_norm": 0.3098762264842706, "learning_rate": 0.00016329668065661644, "loss": 1.088, "step": 6642 }, { "epoch": 0.64, "grad_norm": 0.24610152808075564, "learning_rate": 0.00016328443274821964, "loss": 1.0679, "step": 6643 }, { "epoch": 0.64, "grad_norm": 0.293387945611742, "learning_rate": 0.00016327218325609018, "loss": 1.0764, "step": 6644 }, { "epoch": 0.64, "grad_norm": 0.24880390773011782, "learning_rate": 0.0001632599321805345, "loss": 1.1687, "step": 6645 }, { "epoch": 0.64, "grad_norm": 0.2871610507138743, "learning_rate": 0.00016324767952185932, "loss": 1.0554, "step": 6646 }, { "epoch": 0.64, "grad_norm": 0.33306363001968703, "learning_rate": 0.00016323542528037116, "loss": 1.066, "step": 6647 }, { "epoch": 0.64, "grad_norm": 0.2601315042428321, "learning_rate": 0.0001632231694563768, "loss": 1.062, "step": 6648 }, { "epoch": 0.64, "grad_norm": 0.3122750901555906, "learning_rate": 0.00016321091205018283, "loss": 0.988, "step": 6649 }, { "epoch": 0.64, "grad_norm": 0.2530794057553161, "learning_rate": 0.0001631986530620961, "loss": 0.9536, "step": 6650 }, { "epoch": 0.64, "grad_norm": 0.25248623075991256, "learning_rate": 0.00016318639249242336, "loss": 1.0528, "step": 6651 }, { "epoch": 0.64, "grad_norm": 0.25241303157165856, "learning_rate": 0.00016317413034147143, "loss": 0.9887, "step": 6652 }, { "epoch": 0.64, "grad_norm": 0.25151161516493253, "learning_rate": 0.00016316186660954716, "loss": 1.0605, "step": 6653 }, { "epoch": 0.64, "grad_norm": 0.2537064242578609, "learning_rate": 0.0001631496012969575, "loss": 1.0014, "step": 6654 }, { "epoch": 0.64, "grad_norm": 0.26974830206999884, "learning_rate": 0.00016313733440400941, "loss": 1.1359, "step": 6655 }, { "epoch": 0.64, "grad_norm": 0.28423801348421174, "learning_rate": 0.0001631250659310098, "loss": 1.0911, "step": 6656 }, { "epoch": 0.64, "grad_norm": 0.3067572434414906, "learning_rate": 0.00016311279587826575, "loss": 1.1511, "step": 6657 }, { "epoch": 0.64, "grad_norm": 0.30349651329317273, "learning_rate": 0.00016310052424608435, "loss": 1.0717, "step": 6658 }, { "epoch": 0.64, "grad_norm": 0.31106706237441717, "learning_rate": 0.00016308825103477262, "loss": 1.0138, "step": 6659 }, { "epoch": 0.64, "grad_norm": 0.30512529577510444, "learning_rate": 0.0001630759762446378, "loss": 1.1175, "step": 6660 }, { "epoch": 0.64, "grad_norm": 0.3156400341146579, "learning_rate": 0.00016306369987598705, "loss": 1.0594, "step": 6661 }, { "epoch": 0.64, "grad_norm": 0.3024624555405739, "learning_rate": 0.00016305142192912754, "loss": 1.1203, "step": 6662 }, { "epoch": 0.64, "grad_norm": 0.24676397866245015, "learning_rate": 0.00016303914240436656, "loss": 1.0936, "step": 6663 }, { "epoch": 0.64, "grad_norm": 0.2585771707617533, "learning_rate": 0.00016302686130201144, "loss": 1.1232, "step": 6664 }, { "epoch": 0.64, "grad_norm": 0.2726696313152824, "learning_rate": 0.00016301457862236954, "loss": 1.0913, "step": 6665 }, { "epoch": 0.64, "grad_norm": 0.2760746274104927, "learning_rate": 0.00016300229436574815, "loss": 0.9746, "step": 6666 }, { "epoch": 0.64, "grad_norm": 0.2755395263384695, "learning_rate": 0.00016299000853245475, "loss": 1.0183, "step": 6667 }, { "epoch": 0.64, "grad_norm": 0.27852069729003387, "learning_rate": 0.00016297772112279683, "loss": 1.0574, "step": 6668 }, { "epoch": 0.64, "grad_norm": 0.27979974439621436, "learning_rate": 0.00016296543213708184, "loss": 0.9499, "step": 6669 }, { "epoch": 0.64, "grad_norm": 0.2960060531441859, "learning_rate": 0.00016295314157561736, "loss": 1.0549, "step": 6670 }, { "epoch": 0.64, "grad_norm": 0.2660000695711491, "learning_rate": 0.00016294084943871092, "loss": 1.1072, "step": 6671 }, { "epoch": 0.64, "grad_norm": 0.2564269859230904, "learning_rate": 0.0001629285557266702, "loss": 1.1333, "step": 6672 }, { "epoch": 0.64, "grad_norm": 0.27504533401306863, "learning_rate": 0.00016291626043980282, "loss": 1.0034, "step": 6673 }, { "epoch": 0.64, "grad_norm": 0.3099324729512832, "learning_rate": 0.00016290396357841646, "loss": 1.0459, "step": 6674 }, { "epoch": 0.64, "grad_norm": 0.2844820883790574, "learning_rate": 0.00016289166514281888, "loss": 0.9184, "step": 6675 }, { "epoch": 0.64, "grad_norm": 0.27114882440422344, "learning_rate": 0.00016287936513331787, "loss": 1.1727, "step": 6676 }, { "epoch": 0.64, "grad_norm": 0.2720012225716546, "learning_rate": 0.00016286706355022118, "loss": 1.0654, "step": 6677 }, { "epoch": 0.64, "grad_norm": 0.2916372120440236, "learning_rate": 0.00016285476039383675, "loss": 1.0744, "step": 6678 }, { "epoch": 0.64, "grad_norm": 0.27052138249686447, "learning_rate": 0.00016284245566447245, "loss": 1.1014, "step": 6679 }, { "epoch": 0.64, "grad_norm": 0.2652236414824706, "learning_rate": 0.0001628301493624362, "loss": 1.1672, "step": 6680 }, { "epoch": 0.64, "grad_norm": 0.27950873661043685, "learning_rate": 0.00016281784148803596, "loss": 1.0014, "step": 6681 }, { "epoch": 0.64, "grad_norm": 0.2827281504818738, "learning_rate": 0.0001628055320415798, "loss": 0.9676, "step": 6682 }, { "epoch": 0.64, "grad_norm": 0.2609854109436044, "learning_rate": 0.00016279322102337565, "loss": 0.8978, "step": 6683 }, { "epoch": 0.64, "grad_norm": 0.24726688155868715, "learning_rate": 0.00016278090843373173, "loss": 1.0843, "step": 6684 }, { "epoch": 0.64, "grad_norm": 0.3193189601858733, "learning_rate": 0.00016276859427295613, "loss": 1.028, "step": 6685 }, { "epoch": 0.64, "grad_norm": 0.2845484123594936, "learning_rate": 0.00016275627854135698, "loss": 0.9256, "step": 6686 }, { "epoch": 0.64, "grad_norm": 0.29028230142099054, "learning_rate": 0.00016274396123924252, "loss": 1.0812, "step": 6687 }, { "epoch": 0.64, "grad_norm": 0.32773446539660916, "learning_rate": 0.000162731642366921, "loss": 1.0608, "step": 6688 }, { "epoch": 0.64, "grad_norm": 0.29790785005575593, "learning_rate": 0.00016271932192470074, "loss": 1.0306, "step": 6689 }, { "epoch": 0.64, "grad_norm": 0.27883966783277836, "learning_rate": 0.00016270699991289, "loss": 1.0423, "step": 6690 }, { "epoch": 0.64, "grad_norm": 0.28349188815486853, "learning_rate": 0.0001626946763317972, "loss": 1.0897, "step": 6691 }, { "epoch": 0.64, "grad_norm": 0.3054195899080486, "learning_rate": 0.00016268235118173068, "loss": 1.0126, "step": 6692 }, { "epoch": 0.64, "grad_norm": 0.297957213586038, "learning_rate": 0.00016267002446299891, "loss": 1.0832, "step": 6693 }, { "epoch": 0.64, "grad_norm": 0.2786420902687923, "learning_rate": 0.00016265769617591046, "loss": 1.1095, "step": 6694 }, { "epoch": 0.64, "grad_norm": 0.28426733069435445, "learning_rate": 0.00016264536632077376, "loss": 1.0612, "step": 6695 }, { "epoch": 0.64, "grad_norm": 0.27433860519495945, "learning_rate": 0.0001626330348978974, "loss": 1.0349, "step": 6696 }, { "epoch": 0.64, "grad_norm": 0.317775496735832, "learning_rate": 0.00016262070190758995, "loss": 1.1712, "step": 6697 }, { "epoch": 0.64, "grad_norm": 0.2568267188069798, "learning_rate": 0.00016260836735016012, "loss": 0.9826, "step": 6698 }, { "epoch": 0.64, "grad_norm": 0.2852886758776049, "learning_rate": 0.00016259603122591653, "loss": 1.0588, "step": 6699 }, { "epoch": 0.64, "grad_norm": 0.28264369508139864, "learning_rate": 0.0001625836935351679, "loss": 1.139, "step": 6700 }, { "epoch": 0.64, "grad_norm": 0.3102222867034462, "learning_rate": 0.00016257135427822302, "loss": 1.0697, "step": 6701 }, { "epoch": 0.64, "grad_norm": 0.268465641442644, "learning_rate": 0.00016255901345539072, "loss": 1.0887, "step": 6702 }, { "epoch": 0.64, "grad_norm": 0.30026142767107994, "learning_rate": 0.00016254667106697972, "loss": 1.1509, "step": 6703 }, { "epoch": 0.64, "grad_norm": 0.2706314630840685, "learning_rate": 0.000162534327113299, "loss": 1.0657, "step": 6704 }, { "epoch": 0.64, "grad_norm": 0.26773703311582037, "learning_rate": 0.00016252198159465744, "loss": 1.1287, "step": 6705 }, { "epoch": 0.64, "grad_norm": 0.2957605324588231, "learning_rate": 0.000162509634511364, "loss": 1.2115, "step": 6706 }, { "epoch": 0.64, "grad_norm": 0.29017697603422365, "learning_rate": 0.00016249728586372765, "loss": 1.0255, "step": 6707 }, { "epoch": 0.64, "grad_norm": 0.27496495480231525, "learning_rate": 0.0001624849356520575, "loss": 1.0261, "step": 6708 }, { "epoch": 0.64, "grad_norm": 0.26386368126415394, "learning_rate": 0.0001624725838766625, "loss": 1.1855, "step": 6709 }, { "epoch": 0.64, "grad_norm": 0.27191856479446785, "learning_rate": 0.00016246023053785184, "loss": 1.033, "step": 6710 }, { "epoch": 0.64, "grad_norm": 0.26761649181678265, "learning_rate": 0.0001624478756359347, "loss": 1.137, "step": 6711 }, { "epoch": 0.64, "grad_norm": 0.26810717439455456, "learning_rate": 0.00016243551917122017, "loss": 1.169, "step": 6712 }, { "epoch": 0.64, "grad_norm": 0.23716095057095146, "learning_rate": 0.00016242316114401754, "loss": 0.9461, "step": 6713 }, { "epoch": 0.64, "grad_norm": 0.2820129032334972, "learning_rate": 0.00016241080155463613, "loss": 1.028, "step": 6714 }, { "epoch": 0.64, "grad_norm": 0.29187354158124584, "learning_rate": 0.00016239844040338513, "loss": 1.1075, "step": 6715 }, { "epoch": 0.64, "grad_norm": 0.2707882691645488, "learning_rate": 0.00016238607769057396, "loss": 1.1043, "step": 6716 }, { "epoch": 0.64, "grad_norm": 0.27825828988627205, "learning_rate": 0.00016237371341651198, "loss": 0.9317, "step": 6717 }, { "epoch": 0.64, "grad_norm": 0.24456292837088042, "learning_rate": 0.00016236134758150863, "loss": 1.115, "step": 6718 }, { "epoch": 0.64, "grad_norm": 0.30642644995469454, "learning_rate": 0.00016234898018587337, "loss": 1.0855, "step": 6719 }, { "epoch": 0.64, "grad_norm": 0.2536012594797928, "learning_rate": 0.00016233661122991568, "loss": 1.0347, "step": 6720 }, { "epoch": 0.64, "grad_norm": 0.28201291743191076, "learning_rate": 0.00016232424071394513, "loss": 0.923, "step": 6721 }, { "epoch": 0.64, "grad_norm": 0.26500412097642506, "learning_rate": 0.00016231186863827128, "loss": 1.1197, "step": 6722 }, { "epoch": 0.64, "grad_norm": 0.28336108524806514, "learning_rate": 0.00016229949500320376, "loss": 0.9663, "step": 6723 }, { "epoch": 0.64, "grad_norm": 0.30535153183981684, "learning_rate": 0.00016228711980905222, "loss": 1.0546, "step": 6724 }, { "epoch": 0.64, "grad_norm": 0.2738980528128915, "learning_rate": 0.00016227474305612635, "loss": 1.1652, "step": 6725 }, { "epoch": 0.64, "grad_norm": 0.29696172073189514, "learning_rate": 0.00016226236474473592, "loss": 1.1307, "step": 6726 }, { "epoch": 0.64, "grad_norm": 0.2575129900484908, "learning_rate": 0.00016224998487519065, "loss": 1.0553, "step": 6727 }, { "epoch": 0.64, "grad_norm": 0.25508951971135796, "learning_rate": 0.0001622376034478004, "loss": 0.9931, "step": 6728 }, { "epoch": 0.64, "grad_norm": 0.3026293198793625, "learning_rate": 0.00016222522046287506, "loss": 1.0979, "step": 6729 }, { "epoch": 0.64, "grad_norm": 0.2598437183575833, "learning_rate": 0.00016221283592072442, "loss": 1.0128, "step": 6730 }, { "epoch": 0.64, "grad_norm": 0.3163844838276975, "learning_rate": 0.00016220044982165845, "loss": 1.1538, "step": 6731 }, { "epoch": 0.64, "grad_norm": 0.2362891477357362, "learning_rate": 0.00016218806216598713, "loss": 1.0506, "step": 6732 }, { "epoch": 0.64, "grad_norm": 0.26662591447108, "learning_rate": 0.00016217567295402052, "loss": 0.962, "step": 6733 }, { "epoch": 0.64, "grad_norm": 0.29725496528185136, "learning_rate": 0.00016216328218606856, "loss": 1.0977, "step": 6734 }, { "epoch": 0.64, "grad_norm": 0.2571941755837014, "learning_rate": 0.00016215088986244145, "loss": 1.0301, "step": 6735 }, { "epoch": 0.64, "grad_norm": 0.24442116258690824, "learning_rate": 0.00016213849598344923, "loss": 1.0068, "step": 6736 }, { "epoch": 0.64, "grad_norm": 0.2747700753479199, "learning_rate": 0.0001621261005494021, "loss": 0.984, "step": 6737 }, { "epoch": 0.64, "grad_norm": 0.2947494451070546, "learning_rate": 0.00016211370356061024, "loss": 0.9723, "step": 6738 }, { "epoch": 0.64, "grad_norm": 0.28377559231615324, "learning_rate": 0.00016210130501738393, "loss": 1.1093, "step": 6739 }, { "epoch": 0.64, "grad_norm": 0.281938530095994, "learning_rate": 0.00016208890492003345, "loss": 1.0411, "step": 6740 }, { "epoch": 0.64, "grad_norm": 0.25782869247051643, "learning_rate": 0.00016207650326886908, "loss": 1.0448, "step": 6741 }, { "epoch": 0.65, "grad_norm": 0.27863659241136324, "learning_rate": 0.0001620641000642012, "loss": 1.0766, "step": 6742 }, { "epoch": 0.65, "grad_norm": 0.3034063935548644, "learning_rate": 0.00016205169530634022, "loss": 1.0668, "step": 6743 }, { "epoch": 0.65, "grad_norm": 0.29522301302575077, "learning_rate": 0.00016203928899559655, "loss": 1.0437, "step": 6744 }, { "epoch": 0.65, "grad_norm": 0.2407728356207695, "learning_rate": 0.0001620268811322807, "loss": 1.1312, "step": 6745 }, { "epoch": 0.65, "grad_norm": 0.26324055091027326, "learning_rate": 0.0001620144717167032, "loss": 1.0093, "step": 6746 }, { "epoch": 0.65, "grad_norm": 0.2804316768014678, "learning_rate": 0.0001620020607491745, "loss": 1.0229, "step": 6747 }, { "epoch": 0.65, "grad_norm": 0.2753277098551558, "learning_rate": 0.00016198964823000531, "loss": 1.0634, "step": 6748 }, { "epoch": 0.65, "grad_norm": 0.2800366285769209, "learning_rate": 0.00016197723415950618, "loss": 1.0365, "step": 6749 }, { "epoch": 0.65, "grad_norm": 0.302146197809392, "learning_rate": 0.00016196481853798783, "loss": 0.9998, "step": 6750 }, { "epoch": 0.65, "grad_norm": 0.3568635660133519, "learning_rate": 0.00016195240136576098, "loss": 1.1658, "step": 6751 }, { "epoch": 0.65, "grad_norm": 0.2716917931898554, "learning_rate": 0.00016193998264313632, "loss": 1.0812, "step": 6752 }, { "epoch": 0.65, "grad_norm": 0.27290621018790484, "learning_rate": 0.0001619275623704247, "loss": 1.094, "step": 6753 }, { "epoch": 0.65, "grad_norm": 0.2682620960621536, "learning_rate": 0.00016191514054793687, "loss": 1.0381, "step": 6754 }, { "epoch": 0.65, "grad_norm": 0.2845594233710903, "learning_rate": 0.00016190271717598376, "loss": 1.1278, "step": 6755 }, { "epoch": 0.65, "grad_norm": 0.27355020118683776, "learning_rate": 0.0001618902922548762, "loss": 0.9993, "step": 6756 }, { "epoch": 0.65, "grad_norm": 0.287166856164538, "learning_rate": 0.00016187786578492527, "loss": 1.1079, "step": 6757 }, { "epoch": 0.65, "grad_norm": 0.31367583134610894, "learning_rate": 0.00016186543776644177, "loss": 1.0392, "step": 6758 }, { "epoch": 0.65, "grad_norm": 0.26476803361212475, "learning_rate": 0.00016185300819973687, "loss": 1.0339, "step": 6759 }, { "epoch": 0.65, "grad_norm": 0.2869935686558382, "learning_rate": 0.00016184057708512156, "loss": 1.0353, "step": 6760 }, { "epoch": 0.65, "grad_norm": 0.2607335209317506, "learning_rate": 0.0001618281444229069, "loss": 1.0328, "step": 6761 }, { "epoch": 0.65, "grad_norm": 0.2819258447169869, "learning_rate": 0.0001618157102134041, "loss": 1.0694, "step": 6762 }, { "epoch": 0.65, "grad_norm": 0.2486602423602287, "learning_rate": 0.0001618032744569243, "loss": 1.0398, "step": 6763 }, { "epoch": 0.65, "grad_norm": 0.2964468238045624, "learning_rate": 0.0001617908371537787, "loss": 1.1674, "step": 6764 }, { "epoch": 0.65, "grad_norm": 0.28968276628283657, "learning_rate": 0.00016177839830427862, "loss": 1.0923, "step": 6765 }, { "epoch": 0.65, "grad_norm": 0.30259410481968824, "learning_rate": 0.00016176595790873526, "loss": 1.1047, "step": 6766 }, { "epoch": 0.65, "grad_norm": 0.299883854383291, "learning_rate": 0.00016175351596745997, "loss": 1.0987, "step": 6767 }, { "epoch": 0.65, "grad_norm": 0.28742268599750015, "learning_rate": 0.00016174107248076414, "loss": 1.1337, "step": 6768 }, { "epoch": 0.65, "grad_norm": 0.27078457130344585, "learning_rate": 0.00016172862744895917, "loss": 1.1273, "step": 6769 }, { "epoch": 0.65, "grad_norm": 0.28329813420365574, "learning_rate": 0.00016171618087235652, "loss": 1.042, "step": 6770 }, { "epoch": 0.65, "grad_norm": 0.2835411240856413, "learning_rate": 0.00016170373275126761, "loss": 1.1055, "step": 6771 }, { "epoch": 0.65, "grad_norm": 0.2697682905187557, "learning_rate": 0.00016169128308600404, "loss": 1.1592, "step": 6772 }, { "epoch": 0.65, "grad_norm": 0.2606901412284456, "learning_rate": 0.00016167883187687737, "loss": 1.0796, "step": 6773 }, { "epoch": 0.65, "grad_norm": 0.2718079210220933, "learning_rate": 0.0001616663791241991, "loss": 0.9178, "step": 6774 }, { "epoch": 0.65, "grad_norm": 0.2992698573782557, "learning_rate": 0.00016165392482828098, "loss": 0.9155, "step": 6775 }, { "epoch": 0.65, "grad_norm": 0.2820273589437464, "learning_rate": 0.00016164146898943463, "loss": 1.1096, "step": 6776 }, { "epoch": 0.65, "grad_norm": 0.27964729949513284, "learning_rate": 0.00016162901160797182, "loss": 1.0301, "step": 6777 }, { "epoch": 0.65, "grad_norm": 0.30035527749172686, "learning_rate": 0.0001616165526842042, "loss": 1.0112, "step": 6778 }, { "epoch": 0.65, "grad_norm": 0.26367756968433076, "learning_rate": 0.0001616040922184437, "loss": 1.062, "step": 6779 }, { "epoch": 0.65, "grad_norm": 0.30454178782431157, "learning_rate": 0.000161591630211002, "loss": 1.0908, "step": 6780 }, { "epoch": 0.65, "grad_norm": 0.2736709005007848, "learning_rate": 0.0001615791666621911, "loss": 1.0642, "step": 6781 }, { "epoch": 0.65, "grad_norm": 0.28955239699459784, "learning_rate": 0.00016156670157232278, "loss": 1.038, "step": 6782 }, { "epoch": 0.65, "grad_norm": 0.3055984799372455, "learning_rate": 0.00016155423494170913, "loss": 1.083, "step": 6783 }, { "epoch": 0.65, "grad_norm": 0.28914856789930005, "learning_rate": 0.00016154176677066204, "loss": 1.0251, "step": 6784 }, { "epoch": 0.65, "grad_norm": 0.2937920207039406, "learning_rate": 0.00016152929705949356, "loss": 1.0395, "step": 6785 }, { "epoch": 0.65, "grad_norm": 0.26681380130500015, "learning_rate": 0.00016151682580851576, "loss": 1.1342, "step": 6786 }, { "epoch": 0.65, "grad_norm": 0.31513605238395026, "learning_rate": 0.00016150435301804072, "loss": 1.1459, "step": 6787 }, { "epoch": 0.65, "grad_norm": 0.2962395662354257, "learning_rate": 0.0001614918786883806, "loss": 0.9866, "step": 6788 }, { "epoch": 0.65, "grad_norm": 0.27715689822063977, "learning_rate": 0.00016147940281984754, "loss": 1.0869, "step": 6789 }, { "epoch": 0.65, "grad_norm": 0.261319217896763, "learning_rate": 0.00016146692541275383, "loss": 0.9879, "step": 6790 }, { "epoch": 0.65, "grad_norm": 0.29020817288316764, "learning_rate": 0.00016145444646741166, "loss": 1.1018, "step": 6791 }, { "epoch": 0.65, "grad_norm": 0.2564090714223762, "learning_rate": 0.00016144196598413336, "loss": 1.0812, "step": 6792 }, { "epoch": 0.65, "grad_norm": 0.252997662859385, "learning_rate": 0.00016142948396323124, "loss": 1.0051, "step": 6793 }, { "epoch": 0.65, "grad_norm": 0.27979442491585477, "learning_rate": 0.00016141700040501767, "loss": 1.0166, "step": 6794 }, { "epoch": 0.65, "grad_norm": 0.2723199327047408, "learning_rate": 0.00016140451530980503, "loss": 1.0498, "step": 6795 }, { "epoch": 0.65, "grad_norm": 0.24997068906233713, "learning_rate": 0.00016139202867790586, "loss": 1.0254, "step": 6796 }, { "epoch": 0.65, "grad_norm": 0.25606449185707403, "learning_rate": 0.00016137954050963256, "loss": 1.0375, "step": 6797 }, { "epoch": 0.65, "grad_norm": 0.27072931875871203, "learning_rate": 0.0001613670508052977, "loss": 1.1278, "step": 6798 }, { "epoch": 0.65, "grad_norm": 0.2861760154233243, "learning_rate": 0.00016135455956521383, "loss": 1.0276, "step": 6799 }, { "epoch": 0.65, "grad_norm": 0.281890088050873, "learning_rate": 0.00016134206678969351, "loss": 1.151, "step": 6800 }, { "epoch": 0.65, "grad_norm": 0.2561578106187753, "learning_rate": 0.00016132957247904948, "loss": 1.0579, "step": 6801 }, { "epoch": 0.65, "grad_norm": 0.296476813043644, "learning_rate": 0.0001613170766335943, "loss": 1.0815, "step": 6802 }, { "epoch": 0.65, "grad_norm": 0.24047219584398974, "learning_rate": 0.00016130457925364074, "loss": 1.0674, "step": 6803 }, { "epoch": 0.65, "grad_norm": 0.28537999657913954, "learning_rate": 0.00016129208033950157, "loss": 1.1703, "step": 6804 }, { "epoch": 0.65, "grad_norm": 0.3071076656903495, "learning_rate": 0.00016127957989148958, "loss": 1.1097, "step": 6805 }, { "epoch": 0.65, "grad_norm": 0.2912482911905167, "learning_rate": 0.00016126707790991757, "loss": 1.0033, "step": 6806 }, { "epoch": 0.65, "grad_norm": 0.2873587122615141, "learning_rate": 0.00016125457439509843, "loss": 1.1402, "step": 6807 }, { "epoch": 0.65, "grad_norm": 0.2822651267978888, "learning_rate": 0.00016124206934734509, "loss": 1.0684, "step": 6808 }, { "epoch": 0.65, "grad_norm": 0.30569024152993257, "learning_rate": 0.0001612295627669705, "loss": 1.0356, "step": 6809 }, { "epoch": 0.65, "grad_norm": 0.2552189424981213, "learning_rate": 0.00016121705465428756, "loss": 1.1015, "step": 6810 }, { "epoch": 0.65, "grad_norm": 0.2780062595426105, "learning_rate": 0.0001612045450096094, "loss": 1.0252, "step": 6811 }, { "epoch": 0.65, "grad_norm": 0.2958626948496943, "learning_rate": 0.000161192033833249, "loss": 1.047, "step": 6812 }, { "epoch": 0.65, "grad_norm": 0.2547715653655437, "learning_rate": 0.0001611795211255195, "loss": 0.9724, "step": 6813 }, { "epoch": 0.65, "grad_norm": 0.26184844172876026, "learning_rate": 0.00016116700688673406, "loss": 0.9989, "step": 6814 }, { "epoch": 0.65, "grad_norm": 0.2903838433558316, "learning_rate": 0.0001611544911172058, "loss": 1.0365, "step": 6815 }, { "epoch": 0.65, "grad_norm": 0.2787578244449706, "learning_rate": 0.00016114197381724798, "loss": 1.1122, "step": 6816 }, { "epoch": 0.65, "grad_norm": 0.2890262361588652, "learning_rate": 0.00016112945498717384, "loss": 0.9743, "step": 6817 }, { "epoch": 0.65, "grad_norm": 0.25957088901852887, "learning_rate": 0.00016111693462729666, "loss": 1.0411, "step": 6818 }, { "epoch": 0.65, "grad_norm": 0.29141337706844284, "learning_rate": 0.0001611044127379298, "loss": 1.0738, "step": 6819 }, { "epoch": 0.65, "grad_norm": 0.27868532305453414, "learning_rate": 0.00016109188931938658, "loss": 1.0309, "step": 6820 }, { "epoch": 0.65, "grad_norm": 0.2865127928357969, "learning_rate": 0.00016107936437198048, "loss": 1.0432, "step": 6821 }, { "epoch": 0.65, "grad_norm": 0.28901415788858237, "learning_rate": 0.00016106683789602485, "loss": 1.0913, "step": 6822 }, { "epoch": 0.65, "grad_norm": 0.31087668233226007, "learning_rate": 0.00016105430989183324, "loss": 1.0044, "step": 6823 }, { "epoch": 0.65, "grad_norm": 0.2907385138727252, "learning_rate": 0.0001610417803597192, "loss": 0.9812, "step": 6824 }, { "epoch": 0.65, "grad_norm": 0.2955311073850929, "learning_rate": 0.00016102924929999618, "loss": 0.9718, "step": 6825 }, { "epoch": 0.65, "grad_norm": 0.25938433521785725, "learning_rate": 0.00016101671671297786, "loss": 1.0653, "step": 6826 }, { "epoch": 0.65, "grad_norm": 0.29835875083842295, "learning_rate": 0.00016100418259897787, "loss": 1.1136, "step": 6827 }, { "epoch": 0.65, "grad_norm": 0.28475427911325607, "learning_rate": 0.00016099164695830987, "loss": 1.0817, "step": 6828 }, { "epoch": 0.65, "grad_norm": 0.29712133149339015, "learning_rate": 0.00016097910979128756, "loss": 1.2519, "step": 6829 }, { "epoch": 0.65, "grad_norm": 0.30942764161840036, "learning_rate": 0.00016096657109822472, "loss": 1.153, "step": 6830 }, { "epoch": 0.65, "grad_norm": 0.24877924990251216, "learning_rate": 0.0001609540308794351, "loss": 1.0787, "step": 6831 }, { "epoch": 0.65, "grad_norm": 0.2491230221886881, "learning_rate": 0.00016094148913523254, "loss": 1.0693, "step": 6832 }, { "epoch": 0.65, "grad_norm": 0.33875565455907103, "learning_rate": 0.00016092894586593098, "loss": 1.0437, "step": 6833 }, { "epoch": 0.65, "grad_norm": 0.25683830659317614, "learning_rate": 0.00016091640107184418, "loss": 1.0761, "step": 6834 }, { "epoch": 0.65, "grad_norm": 0.28029893510677445, "learning_rate": 0.00016090385475328616, "loss": 1.0066, "step": 6835 }, { "epoch": 0.65, "grad_norm": 0.2638298088456665, "learning_rate": 0.00016089130691057096, "loss": 1.0488, "step": 6836 }, { "epoch": 0.65, "grad_norm": 0.2732308385667981, "learning_rate": 0.0001608787575440125, "loss": 1.0224, "step": 6837 }, { "epoch": 0.65, "grad_norm": 0.2652051822786465, "learning_rate": 0.0001608662066539249, "loss": 1.1503, "step": 6838 }, { "epoch": 0.65, "grad_norm": 0.3030626041580578, "learning_rate": 0.00016085365424062218, "loss": 1.0871, "step": 6839 }, { "epoch": 0.65, "grad_norm": 0.30008149751746244, "learning_rate": 0.00016084110030441853, "loss": 1.0668, "step": 6840 }, { "epoch": 0.65, "grad_norm": 0.2888340632480941, "learning_rate": 0.00016082854484562813, "loss": 1.0374, "step": 6841 }, { "epoch": 0.65, "grad_norm": 0.2862699272392478, "learning_rate": 0.00016081598786456516, "loss": 1.1416, "step": 6842 }, { "epoch": 0.65, "grad_norm": 0.2717722410816119, "learning_rate": 0.00016080342936154388, "loss": 1.0422, "step": 6843 }, { "epoch": 0.65, "grad_norm": 0.2726242149316908, "learning_rate": 0.00016079086933687854, "loss": 1.0457, "step": 6844 }, { "epoch": 0.65, "grad_norm": 0.2531509697020992, "learning_rate": 0.0001607783077908835, "loss": 1.0804, "step": 6845 }, { "epoch": 0.65, "grad_norm": 0.2574041172829116, "learning_rate": 0.0001607657447238731, "loss": 1.0583, "step": 6846 }, { "epoch": 0.66, "grad_norm": 0.28649411980651573, "learning_rate": 0.00016075318013616174, "loss": 1.0341, "step": 6847 }, { "epoch": 0.66, "grad_norm": 0.32154960312770714, "learning_rate": 0.0001607406140280639, "loss": 1.0304, "step": 6848 }, { "epoch": 0.66, "grad_norm": 0.27521017806337644, "learning_rate": 0.000160728046399894, "loss": 1.0798, "step": 6849 }, { "epoch": 0.66, "grad_norm": 0.27096865352974614, "learning_rate": 0.00016071547725196657, "loss": 1.1019, "step": 6850 }, { "epoch": 0.66, "grad_norm": 0.2827906410490978, "learning_rate": 0.0001607029065845962, "loss": 1.0481, "step": 6851 }, { "epoch": 0.66, "grad_norm": 0.28755989114991476, "learning_rate": 0.00016069033439809738, "loss": 1.221, "step": 6852 }, { "epoch": 0.66, "grad_norm": 0.2849156830204197, "learning_rate": 0.00016067776069278485, "loss": 1.0805, "step": 6853 }, { "epoch": 0.66, "grad_norm": 0.2738501723030775, "learning_rate": 0.0001606651854689732, "loss": 1.0744, "step": 6854 }, { "epoch": 0.66, "grad_norm": 0.30408775836991736, "learning_rate": 0.00016065260872697717, "loss": 1.0593, "step": 6855 }, { "epoch": 0.66, "grad_norm": 0.2742033230168456, "learning_rate": 0.00016064003046711148, "loss": 1.1384, "step": 6856 }, { "epoch": 0.66, "grad_norm": 0.2623388811156543, "learning_rate": 0.00016062745068969088, "loss": 1.0076, "step": 6857 }, { "epoch": 0.66, "grad_norm": 0.2830978029989236, "learning_rate": 0.00016061486939503028, "loss": 1.0956, "step": 6858 }, { "epoch": 0.66, "grad_norm": 0.2825549299962127, "learning_rate": 0.00016060228658344445, "loss": 0.9938, "step": 6859 }, { "epoch": 0.66, "grad_norm": 0.2787473530770543, "learning_rate": 0.00016058970225524833, "loss": 1.108, "step": 6860 }, { "epoch": 0.66, "grad_norm": 0.2766509594000378, "learning_rate": 0.00016057711641075684, "loss": 1.0104, "step": 6861 }, { "epoch": 0.66, "grad_norm": 0.2835424799524376, "learning_rate": 0.00016056452905028492, "loss": 1.0538, "step": 6862 }, { "epoch": 0.66, "grad_norm": 0.2718902303042608, "learning_rate": 0.0001605519401741476, "loss": 1.1156, "step": 6863 }, { "epoch": 0.66, "grad_norm": 0.31396026069305516, "learning_rate": 0.0001605393497826599, "loss": 1.0302, "step": 6864 }, { "epoch": 0.66, "grad_norm": 0.3105439907019263, "learning_rate": 0.00016052675787613696, "loss": 1.0684, "step": 6865 }, { "epoch": 0.66, "grad_norm": 0.2728613957688195, "learning_rate": 0.00016051416445489385, "loss": 1.0522, "step": 6866 }, { "epoch": 0.66, "grad_norm": 0.26407320682849367, "learning_rate": 0.00016050156951924574, "loss": 0.9828, "step": 6867 }, { "epoch": 0.66, "grad_norm": 0.286389653868074, "learning_rate": 0.00016048897306950784, "loss": 1.0011, "step": 6868 }, { "epoch": 0.66, "grad_norm": 0.20462171644432642, "learning_rate": 0.00016047637510599534, "loss": 0.9521, "step": 6869 }, { "epoch": 0.66, "grad_norm": 0.28123560210233106, "learning_rate": 0.00016046377562902356, "loss": 0.9788, "step": 6870 }, { "epoch": 0.66, "grad_norm": 0.30001191282665973, "learning_rate": 0.0001604511746389078, "loss": 1.0776, "step": 6871 }, { "epoch": 0.66, "grad_norm": 0.24622530091185105, "learning_rate": 0.00016043857213596344, "loss": 1.0529, "step": 6872 }, { "epoch": 0.66, "grad_norm": 0.27492099325556013, "learning_rate": 0.00016042596812050576, "loss": 1.0378, "step": 6873 }, { "epoch": 0.66, "grad_norm": 0.2918899294834224, "learning_rate": 0.00016041336259285031, "loss": 1.0596, "step": 6874 }, { "epoch": 0.66, "grad_norm": 0.2623677600554193, "learning_rate": 0.00016040075555331246, "loss": 1.1632, "step": 6875 }, { "epoch": 0.66, "grad_norm": 0.26087552461867863, "learning_rate": 0.00016038814700220777, "loss": 1.0231, "step": 6876 }, { "epoch": 0.66, "grad_norm": 0.27308513799182704, "learning_rate": 0.00016037553693985172, "loss": 1.0093, "step": 6877 }, { "epoch": 0.66, "grad_norm": 0.3112434726283997, "learning_rate": 0.00016036292536655993, "loss": 1.1635, "step": 6878 }, { "epoch": 0.66, "grad_norm": 0.2884405944631099, "learning_rate": 0.00016035031228264798, "loss": 1.1504, "step": 6879 }, { "epoch": 0.66, "grad_norm": 0.25052783602254847, "learning_rate": 0.00016033769768843153, "loss": 1.0535, "step": 6880 }, { "epoch": 0.66, "grad_norm": 0.2952430681334669, "learning_rate": 0.00016032508158422633, "loss": 1.1563, "step": 6881 }, { "epoch": 0.66, "grad_norm": 0.2775844498309543, "learning_rate": 0.00016031246397034797, "loss": 1.063, "step": 6882 }, { "epoch": 0.66, "grad_norm": 0.2641737504846404, "learning_rate": 0.00016029984484711233, "loss": 0.9881, "step": 6883 }, { "epoch": 0.66, "grad_norm": 0.25954131255382207, "learning_rate": 0.0001602872242148352, "loss": 0.9942, "step": 6884 }, { "epoch": 0.66, "grad_norm": 0.235184004978054, "learning_rate": 0.00016027460207383238, "loss": 1.0592, "step": 6885 }, { "epoch": 0.66, "grad_norm": 0.3208319103526141, "learning_rate": 0.00016026197842441975, "loss": 0.9071, "step": 6886 }, { "epoch": 0.66, "grad_norm": 0.38310981570001407, "learning_rate": 0.00016024935326691323, "loss": 1.0084, "step": 6887 }, { "epoch": 0.66, "grad_norm": 0.2736848488804497, "learning_rate": 0.00016023672660162881, "loss": 1.1093, "step": 6888 }, { "epoch": 0.66, "grad_norm": 0.30255083039667124, "learning_rate": 0.00016022409842888244, "loss": 1.0453, "step": 6889 }, { "epoch": 0.66, "grad_norm": 0.29616770728129344, "learning_rate": 0.00016021146874899015, "loss": 1.1753, "step": 6890 }, { "epoch": 0.66, "grad_norm": 0.26891944823488517, "learning_rate": 0.000160198837562268, "loss": 0.998, "step": 6891 }, { "epoch": 0.66, "grad_norm": 0.3069851887194386, "learning_rate": 0.00016018620486903213, "loss": 0.9517, "step": 6892 }, { "epoch": 0.66, "grad_norm": 0.2878707069041881, "learning_rate": 0.00016017357066959863, "loss": 1.0137, "step": 6893 }, { "epoch": 0.66, "grad_norm": 0.2804636505710241, "learning_rate": 0.0001601609349642837, "loss": 1.0739, "step": 6894 }, { "epoch": 0.66, "grad_norm": 0.272015497269986, "learning_rate": 0.00016014829775340362, "loss": 1.1176, "step": 6895 }, { "epoch": 0.66, "grad_norm": 0.24507472885852935, "learning_rate": 0.00016013565903727454, "loss": 1.1288, "step": 6896 }, { "epoch": 0.66, "grad_norm": 0.30186275659636463, "learning_rate": 0.00016012301881621283, "loss": 1.0384, "step": 6897 }, { "epoch": 0.66, "grad_norm": 0.3163308512937789, "learning_rate": 0.00016011037709053478, "loss": 1.0186, "step": 6898 }, { "epoch": 0.66, "grad_norm": 0.30810804171123124, "learning_rate": 0.00016009773386055676, "loss": 1.1093, "step": 6899 }, { "epoch": 0.66, "grad_norm": 0.2816814074951005, "learning_rate": 0.00016008508912659518, "loss": 1.0513, "step": 6900 }, { "epoch": 0.66, "grad_norm": 0.30201609837419807, "learning_rate": 0.00016007244288896645, "loss": 0.9661, "step": 6901 }, { "epoch": 0.66, "grad_norm": 0.28829302388205935, "learning_rate": 0.00016005979514798713, "loss": 1.0704, "step": 6902 }, { "epoch": 0.66, "grad_norm": 0.26515326393964955, "learning_rate": 0.00016004714590397366, "loss": 1.1436, "step": 6903 }, { "epoch": 0.66, "grad_norm": 0.2971122785850026, "learning_rate": 0.00016003449515724263, "loss": 1.1088, "step": 6904 }, { "epoch": 0.66, "grad_norm": 0.3097599416745419, "learning_rate": 0.00016002184290811065, "loss": 1.0972, "step": 6905 }, { "epoch": 0.66, "grad_norm": 0.2522652174175764, "learning_rate": 0.00016000918915689432, "loss": 1.0747, "step": 6906 }, { "epoch": 0.66, "grad_norm": 0.3182119441827633, "learning_rate": 0.0001599965339039103, "loss": 1.027, "step": 6907 }, { "epoch": 0.66, "grad_norm": 0.2783524829429487, "learning_rate": 0.0001599838771494753, "loss": 1.0525, "step": 6908 }, { "epoch": 0.66, "grad_norm": 0.26729480846780146, "learning_rate": 0.0001599712188939061, "loss": 1.02, "step": 6909 }, { "epoch": 0.66, "grad_norm": 0.27005772910227627, "learning_rate": 0.00015995855913751946, "loss": 1.1131, "step": 6910 }, { "epoch": 0.66, "grad_norm": 0.23819686657027173, "learning_rate": 0.00015994589788063222, "loss": 1.0546, "step": 6911 }, { "epoch": 0.66, "grad_norm": 0.2937203478865386, "learning_rate": 0.00015993323512356118, "loss": 1.1432, "step": 6912 }, { "epoch": 0.66, "grad_norm": 0.2788834474966195, "learning_rate": 0.00015992057086662323, "loss": 1.2207, "step": 6913 }, { "epoch": 0.66, "grad_norm": 0.26680578605709754, "learning_rate": 0.0001599079051101354, "loss": 1.0555, "step": 6914 }, { "epoch": 0.66, "grad_norm": 0.2829489557203681, "learning_rate": 0.00015989523785441456, "loss": 0.9449, "step": 6915 }, { "epoch": 0.66, "grad_norm": 0.29363875973797865, "learning_rate": 0.00015988256909977777, "loss": 0.9878, "step": 6916 }, { "epoch": 0.66, "grad_norm": 0.2543061502185059, "learning_rate": 0.00015986989884654202, "loss": 1.1064, "step": 6917 }, { "epoch": 0.66, "grad_norm": 0.26512820084084004, "learning_rate": 0.00015985722709502444, "loss": 1.036, "step": 6918 }, { "epoch": 0.66, "grad_norm": 0.28652859367622907, "learning_rate": 0.00015984455384554215, "loss": 1.1668, "step": 6919 }, { "epoch": 0.66, "grad_norm": 0.31723374235235535, "learning_rate": 0.00015983187909841226, "loss": 1.0463, "step": 6920 }, { "epoch": 0.66, "grad_norm": 0.29129953499102984, "learning_rate": 0.00015981920285395202, "loss": 1.058, "step": 6921 }, { "epoch": 0.66, "grad_norm": 0.29993610209681537, "learning_rate": 0.0001598065251124786, "loss": 1.04, "step": 6922 }, { "epoch": 0.66, "grad_norm": 0.2947422459230847, "learning_rate": 0.00015979384587430935, "loss": 1.0852, "step": 6923 }, { "epoch": 0.66, "grad_norm": 0.2714231587486264, "learning_rate": 0.00015978116513976152, "loss": 0.9586, "step": 6924 }, { "epoch": 0.66, "grad_norm": 0.31441338998821106, "learning_rate": 0.00015976848290915244, "loss": 1.0261, "step": 6925 }, { "epoch": 0.66, "grad_norm": 0.3020176096145915, "learning_rate": 0.0001597557991827995, "loss": 1.0744, "step": 6926 }, { "epoch": 0.66, "grad_norm": 0.2941393454482125, "learning_rate": 0.00015974311396102015, "loss": 1.0624, "step": 6927 }, { "epoch": 0.66, "grad_norm": 0.29719508910446507, "learning_rate": 0.00015973042724413183, "loss": 1.18, "step": 6928 }, { "epoch": 0.66, "grad_norm": 0.28110919244257476, "learning_rate": 0.00015971773903245202, "loss": 0.9661, "step": 6929 }, { "epoch": 0.66, "grad_norm": 0.2803124705765725, "learning_rate": 0.00015970504932629823, "loss": 1.0396, "step": 6930 }, { "epoch": 0.66, "grad_norm": 0.29208189180807126, "learning_rate": 0.0001596923581259881, "loss": 1.1863, "step": 6931 }, { "epoch": 0.66, "grad_norm": 0.29689595292251414, "learning_rate": 0.0001596796654318392, "loss": 1.0811, "step": 6932 }, { "epoch": 0.66, "grad_norm": 0.3077207175340659, "learning_rate": 0.00015966697124416914, "loss": 1.0538, "step": 6933 }, { "epoch": 0.66, "grad_norm": 0.30686176921995806, "learning_rate": 0.0001596542755632956, "loss": 1.0504, "step": 6934 }, { "epoch": 0.66, "grad_norm": 0.24712629310974943, "learning_rate": 0.00015964157838953638, "loss": 1.0084, "step": 6935 }, { "epoch": 0.66, "grad_norm": 0.26002780535214853, "learning_rate": 0.00015962887972320914, "loss": 1.0972, "step": 6936 }, { "epoch": 0.66, "grad_norm": 0.29963814827197655, "learning_rate": 0.00015961617956463173, "loss": 1.1617, "step": 6937 }, { "epoch": 0.66, "grad_norm": 0.29022102343364803, "learning_rate": 0.00015960347791412196, "loss": 0.9665, "step": 6938 }, { "epoch": 0.66, "grad_norm": 0.30836910286066316, "learning_rate": 0.00015959077477199765, "loss": 1.0885, "step": 6939 }, { "epoch": 0.66, "grad_norm": 0.2836035210572582, "learning_rate": 0.0001595780701385768, "loss": 1.0539, "step": 6940 }, { "epoch": 0.66, "grad_norm": 0.30435356184547685, "learning_rate": 0.0001595653640141773, "loss": 0.976, "step": 6941 }, { "epoch": 0.66, "grad_norm": 0.26960506796384626, "learning_rate": 0.00015955265639911711, "loss": 1.1245, "step": 6942 }, { "epoch": 0.66, "grad_norm": 0.36290217128036273, "learning_rate": 0.00015953994729371427, "loss": 1.0396, "step": 6943 }, { "epoch": 0.66, "grad_norm": 0.32401610966187167, "learning_rate": 0.00015952723669828683, "loss": 1.0519, "step": 6944 }, { "epoch": 0.66, "grad_norm": 0.27984623740696507, "learning_rate": 0.00015951452461315292, "loss": 1.0498, "step": 6945 }, { "epoch": 0.66, "grad_norm": 0.24996113180352433, "learning_rate": 0.00015950181103863056, "loss": 1.0642, "step": 6946 }, { "epoch": 0.66, "grad_norm": 0.2587685668722977, "learning_rate": 0.000159489095975038, "loss": 1.1062, "step": 6947 }, { "epoch": 0.66, "grad_norm": 0.32763762154630816, "learning_rate": 0.00015947637942269343, "loss": 1.1287, "step": 6948 }, { "epoch": 0.66, "grad_norm": 0.2585215704752277, "learning_rate": 0.0001594636613819151, "loss": 1.0892, "step": 6949 }, { "epoch": 0.66, "grad_norm": 0.2885043831452831, "learning_rate": 0.0001594509418530213, "loss": 1.1353, "step": 6950 }, { "epoch": 0.67, "grad_norm": 0.26515145631234566, "learning_rate": 0.00015943822083633026, "loss": 0.9613, "step": 6951 }, { "epoch": 0.67, "grad_norm": 0.2832070363192226, "learning_rate": 0.00015942549833216043, "loss": 1.047, "step": 6952 }, { "epoch": 0.67, "grad_norm": 0.2851606867131872, "learning_rate": 0.00015941277434083014, "loss": 1.1776, "step": 6953 }, { "epoch": 0.67, "grad_norm": 0.26970800321487326, "learning_rate": 0.00015940004886265781, "loss": 1.0471, "step": 6954 }, { "epoch": 0.67, "grad_norm": 0.30024373875561594, "learning_rate": 0.00015938732189796196, "loss": 1.1125, "step": 6955 }, { "epoch": 0.67, "grad_norm": 0.29870880904188896, "learning_rate": 0.00015937459344706105, "loss": 1.106, "step": 6956 }, { "epoch": 0.67, "grad_norm": 0.2990216369851441, "learning_rate": 0.0001593618635102736, "loss": 1.0811, "step": 6957 }, { "epoch": 0.67, "grad_norm": 0.2848511215046961, "learning_rate": 0.00015934913208791825, "loss": 1.0252, "step": 6958 }, { "epoch": 0.67, "grad_norm": 0.26962263498375855, "learning_rate": 0.00015933639918031353, "loss": 1.1282, "step": 6959 }, { "epoch": 0.67, "grad_norm": 0.3226088976764872, "learning_rate": 0.00015932366478777816, "loss": 1.1156, "step": 6960 }, { "epoch": 0.67, "grad_norm": 0.2775279024141733, "learning_rate": 0.00015931092891063078, "loss": 1.0649, "step": 6961 }, { "epoch": 0.67, "grad_norm": 0.29817813856951386, "learning_rate": 0.0001592981915491901, "loss": 1.0693, "step": 6962 }, { "epoch": 0.67, "grad_norm": 0.2685645767583313, "learning_rate": 0.00015928545270377494, "loss": 1.019, "step": 6963 }, { "epoch": 0.67, "grad_norm": 0.2950719704962928, "learning_rate": 0.00015927271237470408, "loss": 1.0845, "step": 6964 }, { "epoch": 0.67, "grad_norm": 0.2834261349957536, "learning_rate": 0.0001592599705622963, "loss": 1.153, "step": 6965 }, { "epoch": 0.67, "grad_norm": 0.2703350081406132, "learning_rate": 0.00015924722726687058, "loss": 1.1079, "step": 6966 }, { "epoch": 0.67, "grad_norm": 0.2573244560178056, "learning_rate": 0.0001592344824887457, "loss": 1.0111, "step": 6967 }, { "epoch": 0.67, "grad_norm": 0.24815316601701018, "learning_rate": 0.0001592217362282407, "loss": 1.0481, "step": 6968 }, { "epoch": 0.67, "grad_norm": 0.30891957392030694, "learning_rate": 0.0001592089884856745, "loss": 1.0926, "step": 6969 }, { "epoch": 0.67, "grad_norm": 0.30606523617707265, "learning_rate": 0.00015919623926136618, "loss": 1.099, "step": 6970 }, { "epoch": 0.67, "grad_norm": 0.28728857946398434, "learning_rate": 0.00015918348855563477, "loss": 1.1097, "step": 6971 }, { "epoch": 0.67, "grad_norm": 0.26905395320578945, "learning_rate": 0.00015917073636879936, "loss": 1.0316, "step": 6972 }, { "epoch": 0.67, "grad_norm": 0.2791301663055386, "learning_rate": 0.00015915798270117905, "loss": 1.078, "step": 6973 }, { "epoch": 0.67, "grad_norm": 0.27771895204228286, "learning_rate": 0.0001591452275530931, "loss": 0.9038, "step": 6974 }, { "epoch": 0.67, "grad_norm": 0.27933418779286445, "learning_rate": 0.0001591324709248606, "loss": 1.0519, "step": 6975 }, { "epoch": 0.67, "grad_norm": 0.2816750044645727, "learning_rate": 0.00015911971281680088, "loss": 0.9544, "step": 6976 }, { "epoch": 0.67, "grad_norm": 0.285922508579133, "learning_rate": 0.0001591069532292332, "loss": 1.0725, "step": 6977 }, { "epoch": 0.67, "grad_norm": 0.33215001802712657, "learning_rate": 0.00015909419216247688, "loss": 1.0101, "step": 6978 }, { "epoch": 0.67, "grad_norm": 0.23393732531173045, "learning_rate": 0.00015908142961685125, "loss": 1.0705, "step": 6979 }, { "epoch": 0.67, "grad_norm": 0.257596551482316, "learning_rate": 0.0001590686655926757, "loss": 1.1013, "step": 6980 }, { "epoch": 0.67, "grad_norm": 0.3135249829190419, "learning_rate": 0.00015905590009026967, "loss": 1.0304, "step": 6981 }, { "epoch": 0.67, "grad_norm": 0.2856466968173445, "learning_rate": 0.00015904313310995263, "loss": 0.9811, "step": 6982 }, { "epoch": 0.67, "grad_norm": 0.2829987935572402, "learning_rate": 0.00015903036465204407, "loss": 1.1061, "step": 6983 }, { "epoch": 0.67, "grad_norm": 0.2780452591589329, "learning_rate": 0.00015901759471686358, "loss": 1.1667, "step": 6984 }, { "epoch": 0.67, "grad_norm": 0.27814002322278586, "learning_rate": 0.00015900482330473062, "loss": 1.1414, "step": 6985 }, { "epoch": 0.67, "grad_norm": 0.2860266253901477, "learning_rate": 0.0001589920504159649, "loss": 0.9647, "step": 6986 }, { "epoch": 0.67, "grad_norm": 0.2760940301311592, "learning_rate": 0.0001589792760508861, "loss": 1.0185, "step": 6987 }, { "epoch": 0.67, "grad_norm": 0.2706769903514577, "learning_rate": 0.00015896650020981378, "loss": 1.2201, "step": 6988 }, { "epoch": 0.67, "grad_norm": 0.2622304954412599, "learning_rate": 0.00015895372289306776, "loss": 1.0844, "step": 6989 }, { "epoch": 0.67, "grad_norm": 0.30180292321521496, "learning_rate": 0.00015894094410096775, "loss": 0.9252, "step": 6990 }, { "epoch": 0.67, "grad_norm": 0.280964051345383, "learning_rate": 0.0001589281638338336, "loss": 1.0206, "step": 6991 }, { "epoch": 0.67, "grad_norm": 0.2722800997770079, "learning_rate": 0.0001589153820919851, "loss": 0.9916, "step": 6992 }, { "epoch": 0.67, "grad_norm": 0.27884975208948837, "learning_rate": 0.00015890259887574215, "loss": 0.9978, "step": 6993 }, { "epoch": 0.67, "grad_norm": 0.24598496860742308, "learning_rate": 0.00015888981418542462, "loss": 1.0904, "step": 6994 }, { "epoch": 0.67, "grad_norm": 0.2961053801063102, "learning_rate": 0.00015887702802135252, "loss": 0.9764, "step": 6995 }, { "epoch": 0.67, "grad_norm": 0.26389220997829704, "learning_rate": 0.00015886424038384577, "loss": 1.0735, "step": 6996 }, { "epoch": 0.67, "grad_norm": 0.3058819655409103, "learning_rate": 0.00015885145127322438, "loss": 1.1001, "step": 6997 }, { "epoch": 0.67, "grad_norm": 0.279716453620408, "learning_rate": 0.00015883866068980846, "loss": 1.1435, "step": 6998 }, { "epoch": 0.67, "grad_norm": 0.2938126610887859, "learning_rate": 0.00015882586863391807, "loss": 1.0411, "step": 6999 }, { "epoch": 0.67, "grad_norm": 0.24676335418681838, "learning_rate": 0.00015881307510587337, "loss": 0.9959, "step": 7000 }, { "epoch": 0.67, "grad_norm": 0.2653940896900372, "learning_rate": 0.0001588002801059945, "loss": 1.0835, "step": 7001 }, { "epoch": 0.67, "grad_norm": 0.3109595788932631, "learning_rate": 0.00015878748363460163, "loss": 1.0282, "step": 7002 }, { "epoch": 0.67, "grad_norm": 0.30081836243424015, "learning_rate": 0.00015877468569201506, "loss": 0.9415, "step": 7003 }, { "epoch": 0.67, "grad_norm": 0.31448590635210893, "learning_rate": 0.00015876188627855507, "loss": 1.074, "step": 7004 }, { "epoch": 0.67, "grad_norm": 0.3017891643305516, "learning_rate": 0.00015874908539454188, "loss": 1.0724, "step": 7005 }, { "epoch": 0.67, "grad_norm": 0.3277818013492165, "learning_rate": 0.00015873628304029596, "loss": 1.1018, "step": 7006 }, { "epoch": 0.67, "grad_norm": 0.2802892779964618, "learning_rate": 0.00015872347921613763, "loss": 1.0991, "step": 7007 }, { "epoch": 0.67, "grad_norm": 0.28604449999798276, "learning_rate": 0.0001587106739223873, "loss": 0.9591, "step": 7008 }, { "epoch": 0.67, "grad_norm": 0.2974499942407421, "learning_rate": 0.0001586978671593655, "loss": 1.0818, "step": 7009 }, { "epoch": 0.67, "grad_norm": 0.26346530008044805, "learning_rate": 0.00015868505892739266, "loss": 1.0774, "step": 7010 }, { "epoch": 0.67, "grad_norm": 0.30282010918915797, "learning_rate": 0.00015867224922678933, "loss": 1.0961, "step": 7011 }, { "epoch": 0.67, "grad_norm": 0.31016719288564637, "learning_rate": 0.0001586594380578761, "loss": 1.1236, "step": 7012 }, { "epoch": 0.67, "grad_norm": 0.28052033504795476, "learning_rate": 0.00015864662542097358, "loss": 1.0947, "step": 7013 }, { "epoch": 0.67, "grad_norm": 0.23638403426307897, "learning_rate": 0.00015863381131640236, "loss": 1.0231, "step": 7014 }, { "epoch": 0.67, "grad_norm": 0.291740553126471, "learning_rate": 0.00015862099574448317, "loss": 0.9802, "step": 7015 }, { "epoch": 0.67, "grad_norm": 0.3029857623248768, "learning_rate": 0.00015860817870553677, "loss": 1.12, "step": 7016 }, { "epoch": 0.67, "grad_norm": 0.2630855945498341, "learning_rate": 0.00015859536019988384, "loss": 0.9507, "step": 7017 }, { "epoch": 0.67, "grad_norm": 0.2758349794123403, "learning_rate": 0.00015858254022784515, "loss": 1.1074, "step": 7018 }, { "epoch": 0.67, "grad_norm": 0.27661938873325614, "learning_rate": 0.00015856971878974163, "loss": 1.0374, "step": 7019 }, { "epoch": 0.67, "grad_norm": 0.2714412044951736, "learning_rate": 0.00015855689588589405, "loss": 1.052, "step": 7020 }, { "epoch": 0.67, "grad_norm": 0.2677737703288316, "learning_rate": 0.00015854407151662337, "loss": 1.0147, "step": 7021 }, { "epoch": 0.67, "grad_norm": 0.2927994644038343, "learning_rate": 0.0001585312456822505, "loss": 1.0889, "step": 7022 }, { "epoch": 0.67, "grad_norm": 0.2790949416690381, "learning_rate": 0.0001585184183830964, "loss": 1.1484, "step": 7023 }, { "epoch": 0.67, "grad_norm": 0.28034621779286634, "learning_rate": 0.00015850558961948217, "loss": 1.0267, "step": 7024 }, { "epoch": 0.67, "grad_norm": 0.2745504280573277, "learning_rate": 0.00015849275939172874, "loss": 1.1027, "step": 7025 }, { "epoch": 0.67, "grad_norm": 0.26978925236279233, "learning_rate": 0.00015847992770015725, "loss": 0.9182, "step": 7026 }, { "epoch": 0.67, "grad_norm": 0.29839939566941587, "learning_rate": 0.00015846709454508883, "loss": 1.0897, "step": 7027 }, { "epoch": 0.67, "grad_norm": 0.26196842338250126, "learning_rate": 0.0001584542599268446, "loss": 1.0644, "step": 7028 }, { "epoch": 0.67, "grad_norm": 0.28373187399028676, "learning_rate": 0.0001584414238457458, "loss": 1.0633, "step": 7029 }, { "epoch": 0.67, "grad_norm": 0.2537841362135921, "learning_rate": 0.0001584285863021136, "loss": 1.0892, "step": 7030 }, { "epoch": 0.67, "grad_norm": 0.27473559874173337, "learning_rate": 0.00015841574729626935, "loss": 0.9814, "step": 7031 }, { "epoch": 0.67, "grad_norm": 0.30313556481972587, "learning_rate": 0.00015840290682853428, "loss": 1.016, "step": 7032 }, { "epoch": 0.67, "grad_norm": 0.28512884288704177, "learning_rate": 0.0001583900648992298, "loss": 0.9909, "step": 7033 }, { "epoch": 0.67, "grad_norm": 0.2551073361295876, "learning_rate": 0.00015837722150867722, "loss": 0.9779, "step": 7034 }, { "epoch": 0.67, "grad_norm": 0.2939908816315301, "learning_rate": 0.00015836437665719798, "loss": 1.1285, "step": 7035 }, { "epoch": 0.67, "grad_norm": 0.2725649426346149, "learning_rate": 0.00015835153034511357, "loss": 1.1472, "step": 7036 }, { "epoch": 0.67, "grad_norm": 0.2718133362148875, "learning_rate": 0.0001583386825727454, "loss": 1.0443, "step": 7037 }, { "epoch": 0.67, "grad_norm": 0.27419111607826285, "learning_rate": 0.00015832583334041505, "loss": 1.0012, "step": 7038 }, { "epoch": 0.67, "grad_norm": 0.2849089224699415, "learning_rate": 0.00015831298264844406, "loss": 1.0507, "step": 7039 }, { "epoch": 0.67, "grad_norm": 0.3068910572836713, "learning_rate": 0.00015830013049715404, "loss": 1.0838, "step": 7040 }, { "epoch": 0.67, "grad_norm": 0.2779576724478555, "learning_rate": 0.0001582872768868666, "loss": 1.0766, "step": 7041 }, { "epoch": 0.67, "grad_norm": 0.3302291612230447, "learning_rate": 0.00015827442181790344, "loss": 0.9843, "step": 7042 }, { "epoch": 0.67, "grad_norm": 0.25384666865589006, "learning_rate": 0.00015826156529058624, "loss": 1.0086, "step": 7043 }, { "epoch": 0.67, "grad_norm": 0.27280371588140856, "learning_rate": 0.00015824870730523675, "loss": 1.0686, "step": 7044 }, { "epoch": 0.67, "grad_norm": 0.3030164504363727, "learning_rate": 0.00015823584786217677, "loss": 1.0226, "step": 7045 }, { "epoch": 0.67, "grad_norm": 0.26909739332574156, "learning_rate": 0.00015822298696172805, "loss": 0.99, "step": 7046 }, { "epoch": 0.67, "grad_norm": 0.2877874749400455, "learning_rate": 0.00015821012460421255, "loss": 0.9923, "step": 7047 }, { "epoch": 0.67, "grad_norm": 0.2877538039040546, "learning_rate": 0.00015819726078995208, "loss": 1.0115, "step": 7048 }, { "epoch": 0.67, "grad_norm": 0.30848201813052933, "learning_rate": 0.00015818439551926856, "loss": 1.0591, "step": 7049 }, { "epoch": 0.67, "grad_norm": 0.29140546183062116, "learning_rate": 0.000158171528792484, "loss": 1.0429, "step": 7050 }, { "epoch": 0.67, "grad_norm": 0.26994340789160964, "learning_rate": 0.00015815866060992035, "loss": 1.011, "step": 7051 }, { "epoch": 0.67, "grad_norm": 0.2792969580723623, "learning_rate": 0.00015814579097189966, "loss": 1.0891, "step": 7052 }, { "epoch": 0.67, "grad_norm": 0.28250949172509243, "learning_rate": 0.00015813291987874407, "loss": 1.1065, "step": 7053 }, { "epoch": 0.67, "grad_norm": 0.2978217599721609, "learning_rate": 0.00015812004733077554, "loss": 0.9388, "step": 7054 }, { "epoch": 0.67, "grad_norm": 0.2908978622536936, "learning_rate": 0.00015810717332831635, "loss": 1.0988, "step": 7055 }, { "epoch": 0.68, "grad_norm": 0.2799056103902689, "learning_rate": 0.0001580942978716886, "loss": 1.1076, "step": 7056 }, { "epoch": 0.68, "grad_norm": 0.30269500535419785, "learning_rate": 0.00015808142096121456, "loss": 1.1737, "step": 7057 }, { "epoch": 0.68, "grad_norm": 0.28238103258320846, "learning_rate": 0.00015806854259721646, "loss": 1.0724, "step": 7058 }, { "epoch": 0.68, "grad_norm": 0.2985043169531798, "learning_rate": 0.00015805566278001657, "loss": 0.9103, "step": 7059 }, { "epoch": 0.68, "grad_norm": 0.293719688850402, "learning_rate": 0.00015804278150993722, "loss": 1.0877, "step": 7060 }, { "epoch": 0.68, "grad_norm": 0.27699549920215416, "learning_rate": 0.00015802989878730084, "loss": 1.1226, "step": 7061 }, { "epoch": 0.68, "grad_norm": 0.26487298015420774, "learning_rate": 0.00015801701461242974, "loss": 1.1144, "step": 7062 }, { "epoch": 0.68, "grad_norm": 0.26324552701590875, "learning_rate": 0.00015800412898564636, "loss": 1.173, "step": 7063 }, { "epoch": 0.68, "grad_norm": 0.27384596081191975, "learning_rate": 0.00015799124190727322, "loss": 0.9868, "step": 7064 }, { "epoch": 0.68, "grad_norm": 0.25963681695290847, "learning_rate": 0.00015797835337763282, "loss": 0.9288, "step": 7065 }, { "epoch": 0.68, "grad_norm": 0.29013003407961624, "learning_rate": 0.00015796546339704766, "loss": 1.0005, "step": 7066 }, { "epoch": 0.68, "grad_norm": 0.3020156469079804, "learning_rate": 0.00015795257196584038, "loss": 0.9843, "step": 7067 }, { "epoch": 0.68, "grad_norm": 0.2895277107493972, "learning_rate": 0.00015793967908433353, "loss": 1.105, "step": 7068 }, { "epoch": 0.68, "grad_norm": 0.2755240824809856, "learning_rate": 0.0001579267847528498, "loss": 1.082, "step": 7069 }, { "epoch": 0.68, "grad_norm": 0.30036604934270644, "learning_rate": 0.00015791388897171192, "loss": 1.2018, "step": 7070 }, { "epoch": 0.68, "grad_norm": 0.30614155747981436, "learning_rate": 0.00015790099174124253, "loss": 1.0759, "step": 7071 }, { "epoch": 0.68, "grad_norm": 0.2941340821458282, "learning_rate": 0.00015788809306176447, "loss": 1.0597, "step": 7072 }, { "epoch": 0.68, "grad_norm": 0.30856171230251395, "learning_rate": 0.00015787519293360044, "loss": 1.1318, "step": 7073 }, { "epoch": 0.68, "grad_norm": 0.29580057657767084, "learning_rate": 0.00015786229135707338, "loss": 1.0522, "step": 7074 }, { "epoch": 0.68, "grad_norm": 0.274616244018645, "learning_rate": 0.0001578493883325061, "loss": 1.0299, "step": 7075 }, { "epoch": 0.68, "grad_norm": 0.3114772800268081, "learning_rate": 0.00015783648386022151, "loss": 1.161, "step": 7076 }, { "epoch": 0.68, "grad_norm": 0.2802238017688198, "learning_rate": 0.0001578235779405426, "loss": 1.0197, "step": 7077 }, { "epoch": 0.68, "grad_norm": 0.2944810135727739, "learning_rate": 0.00015781067057379228, "loss": 1.1013, "step": 7078 }, { "epoch": 0.68, "grad_norm": 0.24998357909349614, "learning_rate": 0.00015779776176029356, "loss": 1.0692, "step": 7079 }, { "epoch": 0.68, "grad_norm": 0.2945811953423457, "learning_rate": 0.0001577848515003696, "loss": 1.068, "step": 7080 }, { "epoch": 0.68, "grad_norm": 0.25806411996635104, "learning_rate": 0.00015777193979434338, "loss": 1.0476, "step": 7081 }, { "epoch": 0.68, "grad_norm": 0.2598704969021484, "learning_rate": 0.00015775902664253808, "loss": 1.0567, "step": 7082 }, { "epoch": 0.68, "grad_norm": 0.28645413042100387, "learning_rate": 0.00015774611204527681, "loss": 1.0646, "step": 7083 }, { "epoch": 0.68, "grad_norm": 0.25410792181961595, "learning_rate": 0.0001577331960028828, "loss": 0.9907, "step": 7084 }, { "epoch": 0.68, "grad_norm": 0.26092421697879675, "learning_rate": 0.00015772027851567927, "loss": 0.9608, "step": 7085 }, { "epoch": 0.68, "grad_norm": 0.28943346794498837, "learning_rate": 0.00015770735958398952, "loss": 1.0073, "step": 7086 }, { "epoch": 0.68, "grad_norm": 0.26566974144868033, "learning_rate": 0.0001576944392081368, "loss": 1.109, "step": 7087 }, { "epoch": 0.68, "grad_norm": 0.28616557026410305, "learning_rate": 0.0001576815173884445, "loss": 1.1788, "step": 7088 }, { "epoch": 0.68, "grad_norm": 0.3140389757089799, "learning_rate": 0.00015766859412523596, "loss": 1.1028, "step": 7089 }, { "epoch": 0.68, "grad_norm": 0.2903534504256985, "learning_rate": 0.0001576556694188346, "loss": 1.0848, "step": 7090 }, { "epoch": 0.68, "grad_norm": 0.2739660674757886, "learning_rate": 0.00015764274326956392, "loss": 1.088, "step": 7091 }, { "epoch": 0.68, "grad_norm": 0.26183889146766914, "learning_rate": 0.00015762981567774733, "loss": 1.045, "step": 7092 }, { "epoch": 0.68, "grad_norm": 0.2773165562228698, "learning_rate": 0.0001576168866437084, "loss": 1.0625, "step": 7093 }, { "epoch": 0.68, "grad_norm": 0.28250207452060827, "learning_rate": 0.00015760395616777064, "loss": 1.0738, "step": 7094 }, { "epoch": 0.68, "grad_norm": 0.2664810800693953, "learning_rate": 0.0001575910242502577, "loss": 1.1199, "step": 7095 }, { "epoch": 0.68, "grad_norm": 0.29450171685022813, "learning_rate": 0.00015757809089149319, "loss": 1.0634, "step": 7096 }, { "epoch": 0.68, "grad_norm": 0.28549635512987454, "learning_rate": 0.00015756515609180073, "loss": 1.0337, "step": 7097 }, { "epoch": 0.68, "grad_norm": 0.28429692726911365, "learning_rate": 0.00015755221985150412, "loss": 1.1475, "step": 7098 }, { "epoch": 0.68, "grad_norm": 0.30253389968364547, "learning_rate": 0.000157539282170927, "loss": 1.047, "step": 7099 }, { "epoch": 0.68, "grad_norm": 0.3098958322121182, "learning_rate": 0.00015752634305039317, "loss": 0.965, "step": 7100 }, { "epoch": 0.68, "grad_norm": 0.25143769789194087, "learning_rate": 0.0001575134024902265, "loss": 0.9581, "step": 7101 }, { "epoch": 0.68, "grad_norm": 0.3016367636629631, "learning_rate": 0.00015750046049075076, "loss": 1.0633, "step": 7102 }, { "epoch": 0.68, "grad_norm": 0.3271227303031079, "learning_rate": 0.00015748751705228984, "loss": 0.9946, "step": 7103 }, { "epoch": 0.68, "grad_norm": 0.25230564082743323, "learning_rate": 0.00015747457217516768, "loss": 0.998, "step": 7104 }, { "epoch": 0.68, "grad_norm": 0.29985887178796994, "learning_rate": 0.00015746162585970826, "loss": 1.0747, "step": 7105 }, { "epoch": 0.68, "grad_norm": 0.3268684025188319, "learning_rate": 0.00015744867810623553, "loss": 1.2323, "step": 7106 }, { "epoch": 0.68, "grad_norm": 0.24907462724647375, "learning_rate": 0.0001574357289150735, "loss": 1.0212, "step": 7107 }, { "epoch": 0.68, "grad_norm": 0.2670820715697481, "learning_rate": 0.0001574227782865463, "loss": 0.9549, "step": 7108 }, { "epoch": 0.68, "grad_norm": 0.2872956326475909, "learning_rate": 0.00015740982622097793, "loss": 1.0685, "step": 7109 }, { "epoch": 0.68, "grad_norm": 0.3131384318834513, "learning_rate": 0.00015739687271869258, "loss": 1.0596, "step": 7110 }, { "epoch": 0.68, "grad_norm": 0.2658074059936171, "learning_rate": 0.00015738391778001446, "loss": 1.0484, "step": 7111 }, { "epoch": 0.68, "grad_norm": 0.2628890032193891, "learning_rate": 0.00015737096140526773, "loss": 0.9861, "step": 7112 }, { "epoch": 0.68, "grad_norm": 0.3111781620355991, "learning_rate": 0.0001573580035947766, "loss": 1.0708, "step": 7113 }, { "epoch": 0.68, "grad_norm": 0.28054369956222347, "learning_rate": 0.0001573450443488654, "loss": 1.0379, "step": 7114 }, { "epoch": 0.68, "grad_norm": 0.2972730553838039, "learning_rate": 0.00015733208366785847, "loss": 1.1547, "step": 7115 }, { "epoch": 0.68, "grad_norm": 0.2731099842689578, "learning_rate": 0.00015731912155208004, "loss": 1.0216, "step": 7116 }, { "epoch": 0.68, "grad_norm": 0.25449973903583906, "learning_rate": 0.0001573061580018546, "loss": 1.0567, "step": 7117 }, { "epoch": 0.68, "grad_norm": 0.3130389985554061, "learning_rate": 0.00015729319301750655, "loss": 1.0886, "step": 7118 }, { "epoch": 0.68, "grad_norm": 0.2764607660175635, "learning_rate": 0.00015728022659936033, "loss": 0.9724, "step": 7119 }, { "epoch": 0.68, "grad_norm": 0.24734793912425435, "learning_rate": 0.0001572672587477404, "loss": 1.0651, "step": 7120 }, { "epoch": 0.68, "grad_norm": 0.28747093554729464, "learning_rate": 0.00015725428946297137, "loss": 1.1001, "step": 7121 }, { "epoch": 0.68, "grad_norm": 0.270729761427449, "learning_rate": 0.0001572413187453778, "loss": 1.0384, "step": 7122 }, { "epoch": 0.68, "grad_norm": 0.2928842971220595, "learning_rate": 0.00015722834659528422, "loss": 1.1656, "step": 7123 }, { "epoch": 0.68, "grad_norm": 0.2793916142108935, "learning_rate": 0.00015721537301301527, "loss": 0.9688, "step": 7124 }, { "epoch": 0.68, "grad_norm": 0.29909818823600665, "learning_rate": 0.0001572023979988957, "loss": 0.9318, "step": 7125 }, { "epoch": 0.68, "grad_norm": 0.2696167713839793, "learning_rate": 0.00015718942155325011, "loss": 1.1889, "step": 7126 }, { "epoch": 0.68, "grad_norm": 0.29698260893246986, "learning_rate": 0.00015717644367640334, "loss": 1.1648, "step": 7127 }, { "epoch": 0.68, "grad_norm": 0.27565722448673946, "learning_rate": 0.00015716346436868016, "loss": 1.0083, "step": 7128 }, { "epoch": 0.68, "grad_norm": 0.27200588461690844, "learning_rate": 0.00015715048363040533, "loss": 1.0808, "step": 7129 }, { "epoch": 0.68, "grad_norm": 0.274483688022852, "learning_rate": 0.00015713750146190372, "loss": 1.0244, "step": 7130 }, { "epoch": 0.68, "grad_norm": 0.2603147750459217, "learning_rate": 0.00015712451786350023, "loss": 1.0541, "step": 7131 }, { "epoch": 0.68, "grad_norm": 0.2611050046236202, "learning_rate": 0.0001571115328355198, "loss": 0.9513, "step": 7132 }, { "epoch": 0.68, "grad_norm": 0.2611354213480838, "learning_rate": 0.00015709854637828733, "loss": 1.0073, "step": 7133 }, { "epoch": 0.68, "grad_norm": 0.29780693358025156, "learning_rate": 0.0001570855584921279, "loss": 1.0521, "step": 7134 }, { "epoch": 0.68, "grad_norm": 0.298538656430755, "learning_rate": 0.00015707256917736647, "loss": 1.2377, "step": 7135 }, { "epoch": 0.68, "grad_norm": 0.2748841653644941, "learning_rate": 0.0001570595784343281, "loss": 0.9403, "step": 7136 }, { "epoch": 0.68, "grad_norm": 0.2746937829833261, "learning_rate": 0.00015704658626333794, "loss": 1.1224, "step": 7137 }, { "epoch": 0.68, "grad_norm": 0.2842293587504884, "learning_rate": 0.00015703359266472112, "loss": 1.1599, "step": 7138 }, { "epoch": 0.68, "grad_norm": 0.2670841998105975, "learning_rate": 0.0001570205976388028, "loss": 1.138, "step": 7139 }, { "epoch": 0.68, "grad_norm": 0.27756874671553816, "learning_rate": 0.00015700760118590815, "loss": 0.9721, "step": 7140 }, { "epoch": 0.68, "grad_norm": 0.28756494584263737, "learning_rate": 0.00015699460330636248, "loss": 1.1181, "step": 7141 }, { "epoch": 0.68, "grad_norm": 0.32898360339581917, "learning_rate": 0.00015698160400049105, "loss": 1.219, "step": 7142 }, { "epoch": 0.68, "grad_norm": 0.2803241831359587, "learning_rate": 0.00015696860326861917, "loss": 1.1124, "step": 7143 }, { "epoch": 0.68, "grad_norm": 0.27535423833378253, "learning_rate": 0.00015695560111107218, "loss": 1.1046, "step": 7144 }, { "epoch": 0.68, "grad_norm": 0.2606122473280347, "learning_rate": 0.0001569425975281755, "loss": 1.158, "step": 7145 }, { "epoch": 0.68, "grad_norm": 0.26610041445452237, "learning_rate": 0.00015692959252025447, "loss": 1.1423, "step": 7146 }, { "epoch": 0.68, "grad_norm": 0.2953405796549938, "learning_rate": 0.00015691658608763467, "loss": 1.1065, "step": 7147 }, { "epoch": 0.68, "grad_norm": 0.2809208524967904, "learning_rate": 0.00015690357823064147, "loss": 1.0091, "step": 7148 }, { "epoch": 0.68, "grad_norm": 0.298379105314993, "learning_rate": 0.00015689056894960054, "loss": 0.9916, "step": 7149 }, { "epoch": 0.68, "grad_norm": 0.29683307271349985, "learning_rate": 0.00015687755824483733, "loss": 1.1125, "step": 7150 }, { "epoch": 0.68, "grad_norm": 0.29899393435333316, "learning_rate": 0.00015686454611667745, "loss": 1.0796, "step": 7151 }, { "epoch": 0.68, "grad_norm": 0.31478792374668824, "learning_rate": 0.00015685153256544658, "loss": 1.0593, "step": 7152 }, { "epoch": 0.68, "grad_norm": 0.2930746130040074, "learning_rate": 0.0001568385175914704, "loss": 1.0662, "step": 7153 }, { "epoch": 0.68, "grad_norm": 0.2882046106628799, "learning_rate": 0.00015682550119507457, "loss": 1.0464, "step": 7154 }, { "epoch": 0.68, "grad_norm": 0.24774454066142032, "learning_rate": 0.0001568124833765849, "loss": 1.0727, "step": 7155 }, { "epoch": 0.68, "grad_norm": 0.28400220165986323, "learning_rate": 0.0001567994641363271, "loss": 1.2001, "step": 7156 }, { "epoch": 0.68, "grad_norm": 0.2847089193485156, "learning_rate": 0.000156786443474627, "loss": 1.0203, "step": 7157 }, { "epoch": 0.68, "grad_norm": 0.25521289971018285, "learning_rate": 0.0001567734213918105, "loss": 1.0875, "step": 7158 }, { "epoch": 0.68, "grad_norm": 0.26777210499225285, "learning_rate": 0.0001567603978882034, "loss": 1.0413, "step": 7159 }, { "epoch": 0.69, "grad_norm": 0.29958229941874526, "learning_rate": 0.00015674737296413171, "loss": 1.1779, "step": 7160 }, { "epoch": 0.69, "grad_norm": 0.2973923709791303, "learning_rate": 0.00015673434661992133, "loss": 1.0605, "step": 7161 }, { "epoch": 0.69, "grad_norm": 0.29890629646161765, "learning_rate": 0.00015672131885589827, "loss": 1.1378, "step": 7162 }, { "epoch": 0.69, "grad_norm": 0.27465315968911586, "learning_rate": 0.00015670828967238857, "loss": 1.0176, "step": 7163 }, { "epoch": 0.69, "grad_norm": 0.27330309587431983, "learning_rate": 0.00015669525906971825, "loss": 1.099, "step": 7164 }, { "epoch": 0.69, "grad_norm": 0.2830955541275837, "learning_rate": 0.00015668222704821346, "loss": 1.0174, "step": 7165 }, { "epoch": 0.69, "grad_norm": 0.2918518106052513, "learning_rate": 0.00015666919360820034, "loss": 1.0828, "step": 7166 }, { "epoch": 0.69, "grad_norm": 0.2938118596159364, "learning_rate": 0.000156656158750005, "loss": 1.1368, "step": 7167 }, { "epoch": 0.69, "grad_norm": 0.33564117245367897, "learning_rate": 0.0001566431224739537, "loss": 1.1594, "step": 7168 }, { "epoch": 0.69, "grad_norm": 0.2791063027357139, "learning_rate": 0.00015663008478037263, "loss": 1.0643, "step": 7169 }, { "epoch": 0.69, "grad_norm": 0.2836589857893266, "learning_rate": 0.00015661704566958816, "loss": 1.0865, "step": 7170 }, { "epoch": 0.69, "grad_norm": 0.3268578411481459, "learning_rate": 0.00015660400514192648, "loss": 1.1146, "step": 7171 }, { "epoch": 0.69, "grad_norm": 0.26432781847496883, "learning_rate": 0.00015659096319771401, "loss": 1.2087, "step": 7172 }, { "epoch": 0.69, "grad_norm": 0.2947543226709198, "learning_rate": 0.00015657791983727715, "loss": 1.0841, "step": 7173 }, { "epoch": 0.69, "grad_norm": 0.3064445919955032, "learning_rate": 0.00015656487506094226, "loss": 1.1338, "step": 7174 }, { "epoch": 0.69, "grad_norm": 0.2768287567420236, "learning_rate": 0.00015655182886903582, "loss": 1.1217, "step": 7175 }, { "epoch": 0.69, "grad_norm": 0.3115480635288813, "learning_rate": 0.00015653878126188433, "loss": 1.1119, "step": 7176 }, { "epoch": 0.69, "grad_norm": 0.26658700110300837, "learning_rate": 0.00015652573223981432, "loss": 1.1532, "step": 7177 }, { "epoch": 0.69, "grad_norm": 0.30729315346172287, "learning_rate": 0.0001565126818031523, "loss": 1.0824, "step": 7178 }, { "epoch": 0.69, "grad_norm": 0.27545018818322364, "learning_rate": 0.00015649962995222493, "loss": 1.0008, "step": 7179 }, { "epoch": 0.69, "grad_norm": 0.2840294196959938, "learning_rate": 0.0001564865766873588, "loss": 0.9888, "step": 7180 }, { "epoch": 0.69, "grad_norm": 0.2704386331778757, "learning_rate": 0.00015647352200888056, "loss": 1.0845, "step": 7181 }, { "epoch": 0.69, "grad_norm": 0.309378454139959, "learning_rate": 0.00015646046591711698, "loss": 1.1094, "step": 7182 }, { "epoch": 0.69, "grad_norm": 0.33236253311844427, "learning_rate": 0.00015644740841239477, "loss": 1.0607, "step": 7183 }, { "epoch": 0.69, "grad_norm": 0.2654938165964374, "learning_rate": 0.00015643434949504066, "loss": 1.0977, "step": 7184 }, { "epoch": 0.69, "grad_norm": 0.25891560294027866, "learning_rate": 0.00015642128916538148, "loss": 1.0862, "step": 7185 }, { "epoch": 0.69, "grad_norm": 0.30191735715591245, "learning_rate": 0.00015640822742374411, "loss": 1.0986, "step": 7186 }, { "epoch": 0.69, "grad_norm": 0.3016522569398602, "learning_rate": 0.00015639516427045538, "loss": 1.1071, "step": 7187 }, { "epoch": 0.69, "grad_norm": 0.3747262023726705, "learning_rate": 0.00015638209970584218, "loss": 0.9347, "step": 7188 }, { "epoch": 0.69, "grad_norm": 0.26969894694899865, "learning_rate": 0.0001563690337302316, "loss": 0.9472, "step": 7189 }, { "epoch": 0.69, "grad_norm": 0.27707566766666775, "learning_rate": 0.00015635596634395045, "loss": 1.0062, "step": 7190 }, { "epoch": 0.69, "grad_norm": 0.299605953934922, "learning_rate": 0.00015634289754732584, "loss": 1.0789, "step": 7191 }, { "epoch": 0.69, "grad_norm": 0.2756403847856143, "learning_rate": 0.00015632982734068479, "loss": 1.0478, "step": 7192 }, { "epoch": 0.69, "grad_norm": 0.28164477492401824, "learning_rate": 0.00015631675572435442, "loss": 1.0234, "step": 7193 }, { "epoch": 0.69, "grad_norm": 0.3139943484530815, "learning_rate": 0.00015630368269866187, "loss": 1.0499, "step": 7194 }, { "epoch": 0.69, "grad_norm": 0.30405542664327845, "learning_rate": 0.0001562906082639342, "loss": 1.023, "step": 7195 }, { "epoch": 0.69, "grad_norm": 0.2874490807251814, "learning_rate": 0.00015627753242049877, "loss": 0.9521, "step": 7196 }, { "epoch": 0.69, "grad_norm": 0.2643259838229851, "learning_rate": 0.0001562644551686827, "loss": 1.0435, "step": 7197 }, { "epoch": 0.69, "grad_norm": 0.2865014648758085, "learning_rate": 0.00015625137650881324, "loss": 1.0175, "step": 7198 }, { "epoch": 0.69, "grad_norm": 0.30454373875693874, "learning_rate": 0.00015623829644121777, "loss": 1.134, "step": 7199 }, { "epoch": 0.69, "grad_norm": 0.31859855773654133, "learning_rate": 0.00015622521496622355, "loss": 1.0659, "step": 7200 }, { "epoch": 0.69, "grad_norm": 0.2858177196834928, "learning_rate": 0.00015621213208415804, "loss": 0.9598, "step": 7201 }, { "epoch": 0.69, "grad_norm": 0.26598227866462754, "learning_rate": 0.00015619904779534856, "loss": 0.9907, "step": 7202 }, { "epoch": 0.69, "grad_norm": 0.2951400583438705, "learning_rate": 0.00015618596210012256, "loss": 1.0765, "step": 7203 }, { "epoch": 0.69, "grad_norm": 0.3114702561846883, "learning_rate": 0.00015617287499880762, "loss": 1.1041, "step": 7204 }, { "epoch": 0.69, "grad_norm": 0.278499185049389, "learning_rate": 0.00015615978649173112, "loss": 0.9533, "step": 7205 }, { "epoch": 0.69, "grad_norm": 0.28555299537963824, "learning_rate": 0.0001561466965792207, "loss": 1.0967, "step": 7206 }, { "epoch": 0.69, "grad_norm": 0.2636715267212951, "learning_rate": 0.00015613360526160392, "loss": 1.086, "step": 7207 }, { "epoch": 0.69, "grad_norm": 0.29854739591999657, "learning_rate": 0.00015612051253920836, "loss": 1.1259, "step": 7208 }, { "epoch": 0.69, "grad_norm": 0.29584977312779975, "learning_rate": 0.00015610741841236173, "loss": 1.1717, "step": 7209 }, { "epoch": 0.69, "grad_norm": 0.2853540873126679, "learning_rate": 0.00015609432288139167, "loss": 1.0673, "step": 7210 }, { "epoch": 0.69, "grad_norm": 0.26104277026079287, "learning_rate": 0.00015608122594662596, "loss": 1.0772, "step": 7211 }, { "epoch": 0.69, "grad_norm": 0.3189693042004204, "learning_rate": 0.00015606812760839226, "loss": 1.0342, "step": 7212 }, { "epoch": 0.69, "grad_norm": 0.27816239495102424, "learning_rate": 0.00015605502786701848, "loss": 1.0825, "step": 7213 }, { "epoch": 0.69, "grad_norm": 0.2635774551759029, "learning_rate": 0.0001560419267228324, "loss": 1.0783, "step": 7214 }, { "epoch": 0.69, "grad_norm": 0.29195022738920845, "learning_rate": 0.00015602882417616184, "loss": 1.1758, "step": 7215 }, { "epoch": 0.69, "grad_norm": 0.3144273867404004, "learning_rate": 0.0001560157202273348, "loss": 1.0973, "step": 7216 }, { "epoch": 0.69, "grad_norm": 0.3065507511047062, "learning_rate": 0.00015600261487667912, "loss": 1.1111, "step": 7217 }, { "epoch": 0.69, "grad_norm": 0.3131853451412242, "learning_rate": 0.0001559895081245228, "loss": 1.2187, "step": 7218 }, { "epoch": 0.69, "grad_norm": 0.26515742013497673, "learning_rate": 0.00015597639997119389, "loss": 1.0605, "step": 7219 }, { "epoch": 0.69, "grad_norm": 0.3129582152206669, "learning_rate": 0.00015596329041702036, "loss": 1.0686, "step": 7220 }, { "epoch": 0.69, "grad_norm": 0.34295960089795735, "learning_rate": 0.00015595017946233033, "loss": 1.0771, "step": 7221 }, { "epoch": 0.69, "grad_norm": 0.2721307338351633, "learning_rate": 0.00015593706710745187, "loss": 1.0284, "step": 7222 }, { "epoch": 0.69, "grad_norm": 0.304506700589439, "learning_rate": 0.00015592395335271316, "loss": 1.055, "step": 7223 }, { "epoch": 0.69, "grad_norm": 0.31053881654022203, "learning_rate": 0.0001559108381984424, "loss": 1.1429, "step": 7224 }, { "epoch": 0.69, "grad_norm": 0.30691636576138964, "learning_rate": 0.00015589772164496774, "loss": 1.1403, "step": 7225 }, { "epoch": 0.69, "grad_norm": 0.2833916505818309, "learning_rate": 0.00015588460369261748, "loss": 0.9685, "step": 7226 }, { "epoch": 0.69, "grad_norm": 0.28649595640599806, "learning_rate": 0.0001558714843417199, "loss": 0.9536, "step": 7227 }, { "epoch": 0.69, "grad_norm": 0.27284801442293516, "learning_rate": 0.0001558583635926033, "loss": 1.1062, "step": 7228 }, { "epoch": 0.69, "grad_norm": 0.27007347326614567, "learning_rate": 0.00015584524144559604, "loss": 1.1615, "step": 7229 }, { "epoch": 0.69, "grad_norm": 0.3039328987168462, "learning_rate": 0.00015583211790102652, "loss": 1.1376, "step": 7230 }, { "epoch": 0.69, "grad_norm": 0.26912448480042633, "learning_rate": 0.00015581899295922318, "loss": 1.0096, "step": 7231 }, { "epoch": 0.69, "grad_norm": 0.2653339147614556, "learning_rate": 0.00015580586662051444, "loss": 1.0514, "step": 7232 }, { "epoch": 0.69, "grad_norm": 0.26697292715319065, "learning_rate": 0.0001557927388852288, "loss": 0.9873, "step": 7233 }, { "epoch": 0.69, "grad_norm": 0.2937309164974668, "learning_rate": 0.00015577960975369484, "loss": 1.1235, "step": 7234 }, { "epoch": 0.69, "grad_norm": 0.2781751496250474, "learning_rate": 0.00015576647922624105, "loss": 1.0094, "step": 7235 }, { "epoch": 0.69, "grad_norm": 0.28016398794516684, "learning_rate": 0.00015575334730319611, "loss": 1.1144, "step": 7236 }, { "epoch": 0.69, "grad_norm": 0.2756048290515205, "learning_rate": 0.00015574021398488862, "loss": 1.0359, "step": 7237 }, { "epoch": 0.69, "grad_norm": 0.24131864496426217, "learning_rate": 0.0001557270792716472, "loss": 1.0668, "step": 7238 }, { "epoch": 0.69, "grad_norm": 0.2840317470056723, "learning_rate": 0.00015571394316380062, "loss": 1.1596, "step": 7239 }, { "epoch": 0.69, "grad_norm": 0.27376264865951977, "learning_rate": 0.0001557008056616776, "loss": 0.9976, "step": 7240 }, { "epoch": 0.69, "grad_norm": 0.3019067450221086, "learning_rate": 0.0001556876667656069, "loss": 1.0813, "step": 7241 }, { "epoch": 0.69, "grad_norm": 0.3162835708665788, "learning_rate": 0.00015567452647591732, "loss": 1.1084, "step": 7242 }, { "epoch": 0.69, "grad_norm": 0.27896797749608704, "learning_rate": 0.00015566138479293775, "loss": 1.0595, "step": 7243 }, { "epoch": 0.69, "grad_norm": 0.2992448181482971, "learning_rate": 0.00015564824171699707, "loss": 0.9487, "step": 7244 }, { "epoch": 0.69, "grad_norm": 0.27553640224406994, "learning_rate": 0.00015563509724842413, "loss": 1.0404, "step": 7245 }, { "epoch": 0.69, "grad_norm": 0.27552371209875953, "learning_rate": 0.00015562195138754792, "loss": 1.1528, "step": 7246 }, { "epoch": 0.69, "grad_norm": 0.29426928783634326, "learning_rate": 0.00015560880413469742, "loss": 1.0984, "step": 7247 }, { "epoch": 0.69, "grad_norm": 0.27853895487179603, "learning_rate": 0.00015559565549020169, "loss": 1.1804, "step": 7248 }, { "epoch": 0.69, "grad_norm": 0.3136870635528011, "learning_rate": 0.00015558250545438972, "loss": 1.0698, "step": 7249 }, { "epoch": 0.69, "grad_norm": 0.2624215588147093, "learning_rate": 0.0001555693540275906, "loss": 1.1105, "step": 7250 }, { "epoch": 0.69, "grad_norm": 0.23782920442038563, "learning_rate": 0.0001555562012101335, "loss": 1.1211, "step": 7251 }, { "epoch": 0.69, "grad_norm": 0.2695440813802512, "learning_rate": 0.00015554304700234747, "loss": 1.0031, "step": 7252 }, { "epoch": 0.69, "grad_norm": 0.30147711904941943, "learning_rate": 0.00015552989140456185, "loss": 1.1955, "step": 7253 }, { "epoch": 0.69, "grad_norm": 0.2430503125575205, "learning_rate": 0.0001555167344171058, "loss": 0.983, "step": 7254 }, { "epoch": 0.69, "grad_norm": 0.29554732708566633, "learning_rate": 0.00015550357604030856, "loss": 1.0728, "step": 7255 }, { "epoch": 0.69, "grad_norm": 0.33084597515765934, "learning_rate": 0.00015549041627449945, "loss": 1.0754, "step": 7256 }, { "epoch": 0.69, "grad_norm": 0.35533675482762406, "learning_rate": 0.0001554772551200078, "loss": 1.1181, "step": 7257 }, { "epoch": 0.69, "grad_norm": 0.27809709182399867, "learning_rate": 0.00015546409257716296, "loss": 1.1706, "step": 7258 }, { "epoch": 0.69, "grad_norm": 0.2644018070633185, "learning_rate": 0.00015545092864629437, "loss": 1.1147, "step": 7259 }, { "epoch": 0.69, "grad_norm": 0.2749285252651721, "learning_rate": 0.00015543776332773142, "loss": 1.1475, "step": 7260 }, { "epoch": 0.69, "grad_norm": 0.26759109696003713, "learning_rate": 0.00015542459662180362, "loss": 1.1167, "step": 7261 }, { "epoch": 0.69, "grad_norm": 0.2744664795704848, "learning_rate": 0.00015541142852884044, "loss": 0.9624, "step": 7262 }, { "epoch": 0.69, "grad_norm": 0.2656603725314074, "learning_rate": 0.00015539825904917147, "loss": 1.0158, "step": 7263 }, { "epoch": 0.69, "grad_norm": 0.27736864447841364, "learning_rate": 0.0001553850881831262, "loss": 1.0009, "step": 7264 }, { "epoch": 0.7, "grad_norm": 0.23778024632317593, "learning_rate": 0.00015537191593103432, "loss": 1.1653, "step": 7265 }, { "epoch": 0.7, "grad_norm": 0.30730004897313506, "learning_rate": 0.00015535874229322545, "loss": 1.0293, "step": 7266 }, { "epoch": 0.7, "grad_norm": 0.26576264865967103, "learning_rate": 0.00015534556727002925, "loss": 1.1149, "step": 7267 }, { "epoch": 0.7, "grad_norm": 0.2910068427246134, "learning_rate": 0.00015533239086177548, "loss": 1.0587, "step": 7268 }, { "epoch": 0.7, "grad_norm": 0.2719761437844997, "learning_rate": 0.0001553192130687938, "loss": 1.0378, "step": 7269 }, { "epoch": 0.7, "grad_norm": 0.2578029237737743, "learning_rate": 0.00015530603389141408, "loss": 1.0591, "step": 7270 }, { "epoch": 0.7, "grad_norm": 0.2799149293630919, "learning_rate": 0.0001552928533299661, "loss": 1.0374, "step": 7271 }, { "epoch": 0.7, "grad_norm": 0.260847576690139, "learning_rate": 0.00015527967138477967, "loss": 0.9975, "step": 7272 }, { "epoch": 0.7, "grad_norm": 0.2907795091020474, "learning_rate": 0.00015526648805618478, "loss": 1.1593, "step": 7273 }, { "epoch": 0.7, "grad_norm": 0.22310418172043128, "learning_rate": 0.00015525330334451127, "loss": 0.9877, "step": 7274 }, { "epoch": 0.7, "grad_norm": 0.29425332767908924, "learning_rate": 0.00015524011725008912, "loss": 1.0992, "step": 7275 }, { "epoch": 0.7, "grad_norm": 0.3105156950201626, "learning_rate": 0.0001552269297732483, "loss": 1.0496, "step": 7276 }, { "epoch": 0.7, "grad_norm": 0.2558739957093287, "learning_rate": 0.00015521374091431888, "loss": 1.0964, "step": 7277 }, { "epoch": 0.7, "grad_norm": 0.28594690539220524, "learning_rate": 0.00015520055067363089, "loss": 1.0916, "step": 7278 }, { "epoch": 0.7, "grad_norm": 0.3061482044460731, "learning_rate": 0.00015518735905151442, "loss": 1.0903, "step": 7279 }, { "epoch": 0.7, "grad_norm": 0.3191580668312093, "learning_rate": 0.00015517416604829962, "loss": 0.9281, "step": 7280 }, { "epoch": 0.7, "grad_norm": 0.26708235106774547, "learning_rate": 0.00015516097166431663, "loss": 1.1284, "step": 7281 }, { "epoch": 0.7, "grad_norm": 0.2741503886756114, "learning_rate": 0.00015514777589989564, "loss": 0.9834, "step": 7282 }, { "epoch": 0.7, "grad_norm": 0.2721226495347994, "learning_rate": 0.00015513457875536692, "loss": 1.0924, "step": 7283 }, { "epoch": 0.7, "grad_norm": 0.2903941706772716, "learning_rate": 0.0001551213802310607, "loss": 1.1032, "step": 7284 }, { "epoch": 0.7, "grad_norm": 0.26719034153148796, "learning_rate": 0.0001551081803273073, "loss": 1.0721, "step": 7285 }, { "epoch": 0.7, "grad_norm": 0.2410538608083278, "learning_rate": 0.00015509497904443706, "loss": 1.0446, "step": 7286 }, { "epoch": 0.7, "grad_norm": 0.288052793290722, "learning_rate": 0.00015508177638278036, "loss": 1.0968, "step": 7287 }, { "epoch": 0.7, "grad_norm": 0.28114598468656504, "learning_rate": 0.00015506857234266755, "loss": 1.2161, "step": 7288 }, { "epoch": 0.7, "grad_norm": 0.30363151390349236, "learning_rate": 0.00015505536692442915, "loss": 1.1299, "step": 7289 }, { "epoch": 0.7, "grad_norm": 0.3430101339177829, "learning_rate": 0.00015504216012839555, "loss": 1.057, "step": 7290 }, { "epoch": 0.7, "grad_norm": 0.31124798109609725, "learning_rate": 0.00015502895195489735, "loss": 1.0329, "step": 7291 }, { "epoch": 0.7, "grad_norm": 0.31977891525097923, "learning_rate": 0.000155015742404265, "loss": 1.0195, "step": 7292 }, { "epoch": 0.7, "grad_norm": 0.2605016731210418, "learning_rate": 0.00015500253147682913, "loss": 1.1187, "step": 7293 }, { "epoch": 0.7, "grad_norm": 0.25377938560950036, "learning_rate": 0.00015498931917292037, "loss": 0.9918, "step": 7294 }, { "epoch": 0.7, "grad_norm": 0.3046468563219072, "learning_rate": 0.0001549761054928693, "loss": 1.112, "step": 7295 }, { "epoch": 0.7, "grad_norm": 0.34132422940513657, "learning_rate": 0.00015496289043700665, "loss": 1.0857, "step": 7296 }, { "epoch": 0.7, "grad_norm": 0.25593975051636847, "learning_rate": 0.00015494967400566311, "loss": 1.0094, "step": 7297 }, { "epoch": 0.7, "grad_norm": 0.30660036228331966, "learning_rate": 0.00015493645619916947, "loss": 1.0384, "step": 7298 }, { "epoch": 0.7, "grad_norm": 0.28934490627014536, "learning_rate": 0.0001549232370178565, "loss": 1.1638, "step": 7299 }, { "epoch": 0.7, "grad_norm": 0.3082067568322258, "learning_rate": 0.00015491001646205496, "loss": 1.1484, "step": 7300 }, { "epoch": 0.7, "grad_norm": 0.24608283441694623, "learning_rate": 0.00015489679453209578, "loss": 1.0935, "step": 7301 }, { "epoch": 0.7, "grad_norm": 0.2878652096204575, "learning_rate": 0.0001548835712283098, "loss": 1.0195, "step": 7302 }, { "epoch": 0.7, "grad_norm": 0.2445700870996487, "learning_rate": 0.00015487034655102796, "loss": 1.0676, "step": 7303 }, { "epoch": 0.7, "grad_norm": 0.2792976608554407, "learning_rate": 0.00015485712050058125, "loss": 1.0282, "step": 7304 }, { "epoch": 0.7, "grad_norm": 0.2792852923073424, "learning_rate": 0.00015484389307730056, "loss": 1.0393, "step": 7305 }, { "epoch": 0.7, "grad_norm": 0.2921531944021332, "learning_rate": 0.00015483066428151703, "loss": 1.1671, "step": 7306 }, { "epoch": 0.7, "grad_norm": 0.2840087290455877, "learning_rate": 0.00015481743411356163, "loss": 1.0849, "step": 7307 }, { "epoch": 0.7, "grad_norm": 0.3040563023087563, "learning_rate": 0.0001548042025737655, "loss": 1.1268, "step": 7308 }, { "epoch": 0.7, "grad_norm": 0.29355520065474283, "learning_rate": 0.00015479096966245978, "loss": 1.03, "step": 7309 }, { "epoch": 0.7, "grad_norm": 0.2760436996975324, "learning_rate": 0.00015477773537997557, "loss": 1.1871, "step": 7310 }, { "epoch": 0.7, "grad_norm": 0.27687371076005574, "learning_rate": 0.00015476449972664412, "loss": 1.1466, "step": 7311 }, { "epoch": 0.7, "grad_norm": 0.2845099189305587, "learning_rate": 0.00015475126270279667, "loss": 1.0636, "step": 7312 }, { "epoch": 0.7, "grad_norm": 0.2505523226102105, "learning_rate": 0.00015473802430876444, "loss": 1.0668, "step": 7313 }, { "epoch": 0.7, "grad_norm": 0.32837255120480047, "learning_rate": 0.00015472478454487876, "loss": 1.1224, "step": 7314 }, { "epoch": 0.7, "grad_norm": 0.26481048965460596, "learning_rate": 0.00015471154341147094, "loss": 1.0428, "step": 7315 }, { "epoch": 0.7, "grad_norm": 0.2690353020587212, "learning_rate": 0.00015469830090887235, "loss": 1.0432, "step": 7316 }, { "epoch": 0.7, "grad_norm": 0.27626600703824794, "learning_rate": 0.00015468505703741442, "loss": 1.0638, "step": 7317 }, { "epoch": 0.7, "grad_norm": 0.3065923845948702, "learning_rate": 0.00015467181179742857, "loss": 0.914, "step": 7318 }, { "epoch": 0.7, "grad_norm": 0.2829656174177215, "learning_rate": 0.0001546585651892463, "loss": 1.0586, "step": 7319 }, { "epoch": 0.7, "grad_norm": 0.277648311025837, "learning_rate": 0.00015464531721319903, "loss": 0.9368, "step": 7320 }, { "epoch": 0.7, "grad_norm": 0.26414058914433497, "learning_rate": 0.00015463206786961838, "loss": 0.9696, "step": 7321 }, { "epoch": 0.7, "grad_norm": 0.3161051076868171, "learning_rate": 0.0001546188171588359, "loss": 1.0229, "step": 7322 }, { "epoch": 0.7, "grad_norm": 0.2799019895774647, "learning_rate": 0.0001546055650811832, "loss": 1.1345, "step": 7323 }, { "epoch": 0.7, "grad_norm": 0.305865812690483, "learning_rate": 0.0001545923116369919, "loss": 1.0023, "step": 7324 }, { "epoch": 0.7, "grad_norm": 0.30382443041697793, "learning_rate": 0.00015457905682659368, "loss": 1.0488, "step": 7325 }, { "epoch": 0.7, "grad_norm": 0.31386464628323874, "learning_rate": 0.0001545658006503203, "loss": 1.0361, "step": 7326 }, { "epoch": 0.7, "grad_norm": 0.27019240058841637, "learning_rate": 0.00015455254310850345, "loss": 1.0565, "step": 7327 }, { "epoch": 0.7, "grad_norm": 0.266426547020745, "learning_rate": 0.0001545392842014749, "loss": 0.9752, "step": 7328 }, { "epoch": 0.7, "grad_norm": 0.2995784354581383, "learning_rate": 0.0001545260239295665, "loss": 1.0005, "step": 7329 }, { "epoch": 0.7, "grad_norm": 0.317973794140023, "learning_rate": 0.0001545127622931101, "loss": 1.0538, "step": 7330 }, { "epoch": 0.7, "grad_norm": 0.2947298100738568, "learning_rate": 0.00015449949929243755, "loss": 1.0116, "step": 7331 }, { "epoch": 0.7, "grad_norm": 0.3348856644428043, "learning_rate": 0.00015448623492788076, "loss": 0.9802, "step": 7332 }, { "epoch": 0.7, "grad_norm": 0.2989455629929353, "learning_rate": 0.00015447296919977172, "loss": 1.0376, "step": 7333 }, { "epoch": 0.7, "grad_norm": 0.3011061423556309, "learning_rate": 0.0001544597021084424, "loss": 1.0908, "step": 7334 }, { "epoch": 0.7, "grad_norm": 0.3049819395201172, "learning_rate": 0.00015444643365422478, "loss": 1.0768, "step": 7335 }, { "epoch": 0.7, "grad_norm": 0.26542548997246496, "learning_rate": 0.00015443316383745095, "loss": 1.024, "step": 7336 }, { "epoch": 0.7, "grad_norm": 0.2797529721086807, "learning_rate": 0.00015441989265845297, "loss": 1.072, "step": 7337 }, { "epoch": 0.7, "grad_norm": 0.2538780155773843, "learning_rate": 0.000154406620117563, "loss": 1.038, "step": 7338 }, { "epoch": 0.7, "grad_norm": 0.2831186717367567, "learning_rate": 0.00015439334621511318, "loss": 1.0638, "step": 7339 }, { "epoch": 0.7, "grad_norm": 0.28787268851064585, "learning_rate": 0.00015438007095143567, "loss": 1.0426, "step": 7340 }, { "epoch": 0.7, "grad_norm": 0.2948120261226757, "learning_rate": 0.0001543667943268627, "loss": 1.1221, "step": 7341 }, { "epoch": 0.7, "grad_norm": 0.32242230412435835, "learning_rate": 0.00015435351634172654, "loss": 1.0958, "step": 7342 }, { "epoch": 0.7, "grad_norm": 0.2654273360662479, "learning_rate": 0.00015434023699635948, "loss": 1.0071, "step": 7343 }, { "epoch": 0.7, "grad_norm": 0.27327197799382885, "learning_rate": 0.00015432695629109385, "loss": 1.0576, "step": 7344 }, { "epoch": 0.7, "grad_norm": 0.2754942416375848, "learning_rate": 0.00015431367422626195, "loss": 1.1498, "step": 7345 }, { "epoch": 0.7, "grad_norm": 0.27801354524717076, "learning_rate": 0.00015430039080219625, "loss": 1.1084, "step": 7346 }, { "epoch": 0.7, "grad_norm": 0.2914103943454639, "learning_rate": 0.00015428710601922914, "loss": 1.0507, "step": 7347 }, { "epoch": 0.7, "grad_norm": 0.2638150945585758, "learning_rate": 0.00015427381987769307, "loss": 1.0324, "step": 7348 }, { "epoch": 0.7, "grad_norm": 0.2526962446018732, "learning_rate": 0.0001542605323779206, "loss": 0.9352, "step": 7349 }, { "epoch": 0.7, "grad_norm": 0.2595273949493344, "learning_rate": 0.00015424724352024413, "loss": 1.0355, "step": 7350 }, { "epoch": 0.7, "grad_norm": 0.28782174706723024, "learning_rate": 0.00015423395330499632, "loss": 1.1083, "step": 7351 }, { "epoch": 0.7, "grad_norm": 0.3115649702709986, "learning_rate": 0.00015422066173250974, "loss": 1.0422, "step": 7352 }, { "epoch": 0.7, "grad_norm": 0.29111738038165, "learning_rate": 0.00015420736880311707, "loss": 1.0315, "step": 7353 }, { "epoch": 0.7, "grad_norm": 0.28032086546612484, "learning_rate": 0.00015419407451715088, "loss": 1.1888, "step": 7354 }, { "epoch": 0.7, "grad_norm": 0.2716417113526186, "learning_rate": 0.00015418077887494394, "loss": 1.0331, "step": 7355 }, { "epoch": 0.7, "grad_norm": 0.28116455658959255, "learning_rate": 0.00015416748187682897, "loss": 1.0433, "step": 7356 }, { "epoch": 0.7, "grad_norm": 0.3165740582098866, "learning_rate": 0.00015415418352313868, "loss": 1.0488, "step": 7357 }, { "epoch": 0.7, "grad_norm": 0.24167939460825494, "learning_rate": 0.00015414088381420594, "loss": 1.1047, "step": 7358 }, { "epoch": 0.7, "grad_norm": 0.2362202925878594, "learning_rate": 0.00015412758275036356, "loss": 1.111, "step": 7359 }, { "epoch": 0.7, "grad_norm": 0.30927705441472164, "learning_rate": 0.0001541142803319444, "loss": 0.9948, "step": 7360 }, { "epoch": 0.7, "grad_norm": 0.26694430612905373, "learning_rate": 0.00015410097655928136, "loss": 1.2141, "step": 7361 }, { "epoch": 0.7, "grad_norm": 0.28735202961238854, "learning_rate": 0.00015408767143270738, "loss": 1.0482, "step": 7362 }, { "epoch": 0.7, "grad_norm": 0.3028944111263906, "learning_rate": 0.00015407436495255543, "loss": 0.9829, "step": 7363 }, { "epoch": 0.7, "grad_norm": 0.3036916238157443, "learning_rate": 0.0001540610571191585, "loss": 1.1743, "step": 7364 }, { "epoch": 0.7, "grad_norm": 0.2897890002077795, "learning_rate": 0.00015404774793284967, "loss": 1.0033, "step": 7365 }, { "epoch": 0.7, "grad_norm": 0.29386071816379244, "learning_rate": 0.00015403443739396195, "loss": 1.1666, "step": 7366 }, { "epoch": 0.7, "grad_norm": 0.32062513986248775, "learning_rate": 0.0001540211255028285, "loss": 1.141, "step": 7367 }, { "epoch": 0.7, "grad_norm": 0.3180897552174907, "learning_rate": 0.00015400781225978242, "loss": 1.0144, "step": 7368 }, { "epoch": 0.7, "grad_norm": 0.27245479542574785, "learning_rate": 0.00015399449766515688, "loss": 1.0604, "step": 7369 }, { "epoch": 0.71, "grad_norm": 0.2915213084795467, "learning_rate": 0.00015398118171928516, "loss": 1.1338, "step": 7370 }, { "epoch": 0.71, "grad_norm": 0.23168432157865712, "learning_rate": 0.00015396786442250035, "loss": 1.0496, "step": 7371 }, { "epoch": 0.71, "grad_norm": 0.2782432262911835, "learning_rate": 0.0001539545457751359, "loss": 1.0979, "step": 7372 }, { "epoch": 0.71, "grad_norm": 0.27977300422557033, "learning_rate": 0.000153941225777525, "loss": 1.0744, "step": 7373 }, { "epoch": 0.71, "grad_norm": 0.2545144680006588, "learning_rate": 0.000153927904430001, "loss": 1.0237, "step": 7374 }, { "epoch": 0.71, "grad_norm": 0.23952050585666343, "learning_rate": 0.00015391458173289734, "loss": 1.1329, "step": 7375 }, { "epoch": 0.71, "grad_norm": 0.27650814084254066, "learning_rate": 0.00015390125768654738, "loss": 1.128, "step": 7376 }, { "epoch": 0.71, "grad_norm": 0.2957362698116198, "learning_rate": 0.00015388793229128455, "loss": 1.1255, "step": 7377 }, { "epoch": 0.71, "grad_norm": 0.24485467093779958, "learning_rate": 0.00015387460554744235, "loss": 1.0197, "step": 7378 }, { "epoch": 0.71, "grad_norm": 0.25214439017148976, "learning_rate": 0.0001538612774553543, "loss": 1.0601, "step": 7379 }, { "epoch": 0.71, "grad_norm": 0.282880344840166, "learning_rate": 0.00015384794801535394, "loss": 1.1426, "step": 7380 }, { "epoch": 0.71, "grad_norm": 0.3152140814752052, "learning_rate": 0.00015383461722777482, "loss": 1.0964, "step": 7381 }, { "epoch": 0.71, "grad_norm": 0.24688971723773373, "learning_rate": 0.00015382128509295058, "loss": 1.0926, "step": 7382 }, { "epoch": 0.71, "grad_norm": 0.2959651478445274, "learning_rate": 0.00015380795161121485, "loss": 1.0349, "step": 7383 }, { "epoch": 0.71, "grad_norm": 0.29305053218157, "learning_rate": 0.0001537946167829013, "loss": 1.1347, "step": 7384 }, { "epoch": 0.71, "grad_norm": 0.2717846083673245, "learning_rate": 0.00015378128060834366, "loss": 1.0096, "step": 7385 }, { "epoch": 0.71, "grad_norm": 0.2620308073344996, "learning_rate": 0.0001537679430878757, "loss": 1.0911, "step": 7386 }, { "epoch": 0.71, "grad_norm": 0.2531071813077481, "learning_rate": 0.00015375460422183116, "loss": 0.9925, "step": 7387 }, { "epoch": 0.71, "grad_norm": 0.2797391994034549, "learning_rate": 0.00015374126401054383, "loss": 1.1076, "step": 7388 }, { "epoch": 0.71, "grad_norm": 0.2727192530520494, "learning_rate": 0.00015372792245434765, "loss": 1.0334, "step": 7389 }, { "epoch": 0.71, "grad_norm": 0.29997950673640783, "learning_rate": 0.00015371457955357643, "loss": 0.9707, "step": 7390 }, { "epoch": 0.71, "grad_norm": 0.2816972910580794, "learning_rate": 0.00015370123530856407, "loss": 1.1254, "step": 7391 }, { "epoch": 0.71, "grad_norm": 0.2959827743331956, "learning_rate": 0.00015368788971964454, "loss": 1.0022, "step": 7392 }, { "epoch": 0.71, "grad_norm": 0.3027989628861647, "learning_rate": 0.0001536745427871519, "loss": 1.0555, "step": 7393 }, { "epoch": 0.71, "grad_norm": 0.27628323794583914, "learning_rate": 0.00015366119451142002, "loss": 0.985, "step": 7394 }, { "epoch": 0.71, "grad_norm": 0.3098642640517758, "learning_rate": 0.00015364784489278304, "loss": 1.0772, "step": 7395 }, { "epoch": 0.71, "grad_norm": 0.3045327679601191, "learning_rate": 0.00015363449393157504, "loss": 1.1383, "step": 7396 }, { "epoch": 0.71, "grad_norm": 0.27647845171066426, "learning_rate": 0.00015362114162813012, "loss": 1.0514, "step": 7397 }, { "epoch": 0.71, "grad_norm": 0.27708090315989137, "learning_rate": 0.00015360778798278243, "loss": 0.9376, "step": 7398 }, { "epoch": 0.71, "grad_norm": 0.2903239285477044, "learning_rate": 0.00015359443299586614, "loss": 1.1016, "step": 7399 }, { "epoch": 0.71, "grad_norm": 0.3010839768456833, "learning_rate": 0.0001535810766677155, "loss": 1.0842, "step": 7400 }, { "epoch": 0.71, "grad_norm": 0.276746949232895, "learning_rate": 0.0001535677189986647, "loss": 1.2018, "step": 7401 }, { "epoch": 0.71, "grad_norm": 0.29483934180185845, "learning_rate": 0.0001535543599890481, "loss": 1.0932, "step": 7402 }, { "epoch": 0.71, "grad_norm": 0.2944209969280942, "learning_rate": 0.0001535409996392, "loss": 1.1482, "step": 7403 }, { "epoch": 0.71, "grad_norm": 0.31126699379727446, "learning_rate": 0.0001535276379494547, "loss": 1.1388, "step": 7404 }, { "epoch": 0.71, "grad_norm": 0.2843244483748962, "learning_rate": 0.00015351427492014662, "loss": 1.122, "step": 7405 }, { "epoch": 0.71, "grad_norm": 0.2940291993707008, "learning_rate": 0.00015350091055161023, "loss": 0.8989, "step": 7406 }, { "epoch": 0.71, "grad_norm": 0.3351477452765215, "learning_rate": 0.0001534875448441799, "loss": 1.1108, "step": 7407 }, { "epoch": 0.71, "grad_norm": 0.2906887381085749, "learning_rate": 0.0001534741777981901, "loss": 0.9912, "step": 7408 }, { "epoch": 0.71, "grad_norm": 0.3122468085486279, "learning_rate": 0.00015346080941397544, "loss": 1.113, "step": 7409 }, { "epoch": 0.71, "grad_norm": 0.28278667430577964, "learning_rate": 0.00015344743969187042, "loss": 1.0403, "step": 7410 }, { "epoch": 0.71, "grad_norm": 0.27569491315509187, "learning_rate": 0.00015343406863220962, "loss": 1.121, "step": 7411 }, { "epoch": 0.71, "grad_norm": 0.2804750598763221, "learning_rate": 0.0001534206962353277, "loss": 1.0365, "step": 7412 }, { "epoch": 0.71, "grad_norm": 0.2524239751697441, "learning_rate": 0.00015340732250155927, "loss": 0.9917, "step": 7413 }, { "epoch": 0.71, "grad_norm": 0.29005569119447944, "learning_rate": 0.000153393947431239, "loss": 1.1511, "step": 7414 }, { "epoch": 0.71, "grad_norm": 0.2749251352441248, "learning_rate": 0.00015338057102470164, "loss": 1.0658, "step": 7415 }, { "epoch": 0.71, "grad_norm": 0.2838189917053858, "learning_rate": 0.000153367193282282, "loss": 1.0391, "step": 7416 }, { "epoch": 0.71, "grad_norm": 0.3244226146833391, "learning_rate": 0.00015335381420431476, "loss": 1.1094, "step": 7417 }, { "epoch": 0.71, "grad_norm": 0.2574324902880777, "learning_rate": 0.0001533404337911348, "loss": 1.1391, "step": 7418 }, { "epoch": 0.71, "grad_norm": 0.30913323555851724, "learning_rate": 0.00015332705204307696, "loss": 1.0444, "step": 7419 }, { "epoch": 0.71, "grad_norm": 0.2902578699002574, "learning_rate": 0.00015331366896047613, "loss": 0.9731, "step": 7420 }, { "epoch": 0.71, "grad_norm": 0.2736910311823876, "learning_rate": 0.00015330028454366723, "loss": 1.096, "step": 7421 }, { "epoch": 0.71, "grad_norm": 0.2595210721180384, "learning_rate": 0.0001532868987929852, "loss": 1.0444, "step": 7422 }, { "epoch": 0.71, "grad_norm": 0.2691043276914828, "learning_rate": 0.00015327351170876504, "loss": 0.9763, "step": 7423 }, { "epoch": 0.71, "grad_norm": 0.2625617634998602, "learning_rate": 0.00015326012329134177, "loss": 1.0655, "step": 7424 }, { "epoch": 0.71, "grad_norm": 0.3040586098635329, "learning_rate": 0.00015324673354105044, "loss": 1.0892, "step": 7425 }, { "epoch": 0.71, "grad_norm": 0.2688279407074845, "learning_rate": 0.00015323334245822613, "loss": 1.0881, "step": 7426 }, { "epoch": 0.71, "grad_norm": 0.2772663570425649, "learning_rate": 0.00015321995004320398, "loss": 1.0775, "step": 7427 }, { "epoch": 0.71, "grad_norm": 0.28558771196591126, "learning_rate": 0.00015320655629631915, "loss": 1.1682, "step": 7428 }, { "epoch": 0.71, "grad_norm": 0.3068876838609169, "learning_rate": 0.00015319316121790676, "loss": 1.1325, "step": 7429 }, { "epoch": 0.71, "grad_norm": 0.25376782398833597, "learning_rate": 0.00015317976480830214, "loss": 1.051, "step": 7430 }, { "epoch": 0.71, "grad_norm": 0.26689013628589564, "learning_rate": 0.00015316636706784047, "loss": 1.0535, "step": 7431 }, { "epoch": 0.71, "grad_norm": 0.27913817719785716, "learning_rate": 0.00015315296799685703, "loss": 1.0799, "step": 7432 }, { "epoch": 0.71, "grad_norm": 0.27105117018470604, "learning_rate": 0.00015313956759568717, "loss": 0.9799, "step": 7433 }, { "epoch": 0.71, "grad_norm": 0.2650201668901937, "learning_rate": 0.00015312616586466625, "loss": 1.0213, "step": 7434 }, { "epoch": 0.71, "grad_norm": 0.2597414213724003, "learning_rate": 0.0001531127628041296, "loss": 1.0696, "step": 7435 }, { "epoch": 0.71, "grad_norm": 0.2720574465290946, "learning_rate": 0.0001530993584144127, "loss": 1.0064, "step": 7436 }, { "epoch": 0.71, "grad_norm": 0.2926790787938213, "learning_rate": 0.000153085952695851, "loss": 1.0582, "step": 7437 }, { "epoch": 0.71, "grad_norm": 0.30451030733173934, "learning_rate": 0.00015307254564877996, "loss": 1.0364, "step": 7438 }, { "epoch": 0.71, "grad_norm": 0.2969242563944365, "learning_rate": 0.00015305913727353508, "loss": 0.9929, "step": 7439 }, { "epoch": 0.71, "grad_norm": 0.2843406055001775, "learning_rate": 0.000153045727570452, "loss": 1.0706, "step": 7440 }, { "epoch": 0.71, "grad_norm": 0.27079473763903633, "learning_rate": 0.0001530323165398662, "loss": 1.13, "step": 7441 }, { "epoch": 0.71, "grad_norm": 0.31673668159080653, "learning_rate": 0.00015301890418211337, "loss": 1.033, "step": 7442 }, { "epoch": 0.71, "grad_norm": 0.25852536375912855, "learning_rate": 0.00015300549049752915, "loss": 0.9558, "step": 7443 }, { "epoch": 0.71, "grad_norm": 0.2610535794849494, "learning_rate": 0.00015299207548644922, "loss": 0.9075, "step": 7444 }, { "epoch": 0.71, "grad_norm": 0.3144932740991477, "learning_rate": 0.0001529786591492093, "loss": 1.088, "step": 7445 }, { "epoch": 0.71, "grad_norm": 0.3002199992017584, "learning_rate": 0.0001529652414861451, "loss": 1.0292, "step": 7446 }, { "epoch": 0.71, "grad_norm": 0.29137989165257083, "learning_rate": 0.00015295182249759246, "loss": 1.1373, "step": 7447 }, { "epoch": 0.71, "grad_norm": 0.3000740319743772, "learning_rate": 0.0001529384021838872, "loss": 1.0349, "step": 7448 }, { "epoch": 0.71, "grad_norm": 0.37952107317617517, "learning_rate": 0.00015292498054536515, "loss": 1.027, "step": 7449 }, { "epoch": 0.71, "grad_norm": 0.2658313512672852, "learning_rate": 0.00015291155758236219, "loss": 1.0365, "step": 7450 }, { "epoch": 0.71, "grad_norm": 0.30711739440319363, "learning_rate": 0.00015289813329521427, "loss": 1.0623, "step": 7451 }, { "epoch": 0.71, "grad_norm": 0.26947346016409285, "learning_rate": 0.0001528847076842573, "loss": 1.0008, "step": 7452 }, { "epoch": 0.71, "grad_norm": 0.294244616915276, "learning_rate": 0.00015287128074982728, "loss": 1.1527, "step": 7453 }, { "epoch": 0.71, "grad_norm": 0.26434224087661645, "learning_rate": 0.00015285785249226025, "loss": 1.0076, "step": 7454 }, { "epoch": 0.71, "grad_norm": 0.2638878195945205, "learning_rate": 0.00015284442291189224, "loss": 1.0129, "step": 7455 }, { "epoch": 0.71, "grad_norm": 0.291822498691914, "learning_rate": 0.0001528309920090593, "loss": 1.0994, "step": 7456 }, { "epoch": 0.71, "grad_norm": 0.24079989611181216, "learning_rate": 0.00015281755978409763, "loss": 0.9589, "step": 7457 }, { "epoch": 0.71, "grad_norm": 0.26439658524128423, "learning_rate": 0.00015280412623734331, "loss": 1.0177, "step": 7458 }, { "epoch": 0.71, "grad_norm": 0.28359751564704094, "learning_rate": 0.00015279069136913252, "loss": 1.0652, "step": 7459 }, { "epoch": 0.71, "grad_norm": 0.27983466260304535, "learning_rate": 0.00015277725517980152, "loss": 1.0485, "step": 7460 }, { "epoch": 0.71, "grad_norm": 0.29708736109513595, "learning_rate": 0.00015276381766968656, "loss": 1.074, "step": 7461 }, { "epoch": 0.71, "grad_norm": 0.29289233835975614, "learning_rate": 0.0001527503788391239, "loss": 1.1547, "step": 7462 }, { "epoch": 0.71, "grad_norm": 0.2505395055508232, "learning_rate": 0.00015273693868844983, "loss": 0.9737, "step": 7463 }, { "epoch": 0.71, "grad_norm": 0.32901669963923147, "learning_rate": 0.00015272349721800075, "loss": 1.0536, "step": 7464 }, { "epoch": 0.71, "grad_norm": 0.256290518603839, "learning_rate": 0.000152710054428113, "loss": 1.1606, "step": 7465 }, { "epoch": 0.71, "grad_norm": 0.26769516189713743, "learning_rate": 0.000152696610319123, "loss": 0.9284, "step": 7466 }, { "epoch": 0.71, "grad_norm": 0.2644771395655536, "learning_rate": 0.00015268316489136722, "loss": 1.1177, "step": 7467 }, { "epoch": 0.71, "grad_norm": 0.2806512107610077, "learning_rate": 0.00015266971814518213, "loss": 0.9686, "step": 7468 }, { "epoch": 0.71, "grad_norm": 0.2845865397349829, "learning_rate": 0.00015265627008090424, "loss": 1.0744, "step": 7469 }, { "epoch": 0.71, "grad_norm": 0.26504898270571114, "learning_rate": 0.00015264282069887012, "loss": 1.1322, "step": 7470 }, { "epoch": 0.71, "grad_norm": 0.2916082135751575, "learning_rate": 0.0001526293699994163, "loss": 1.103, "step": 7471 }, { "epoch": 0.71, "grad_norm": 0.31144997066611424, "learning_rate": 0.00015261591798287945, "loss": 1.0014, "step": 7472 }, { "epoch": 0.71, "grad_norm": 0.2574195705247452, "learning_rate": 0.00015260246464959614, "loss": 1.1275, "step": 7473 }, { "epoch": 0.72, "grad_norm": 0.31598031433107676, "learning_rate": 0.00015258900999990313, "loss": 0.9769, "step": 7474 }, { "epoch": 0.72, "grad_norm": 0.2733626438470682, "learning_rate": 0.00015257555403413707, "loss": 1.1188, "step": 7475 }, { "epoch": 0.72, "grad_norm": 0.27826283583830214, "learning_rate": 0.00015256209675263473, "loss": 0.9995, "step": 7476 }, { "epoch": 0.72, "grad_norm": 0.3089275459108109, "learning_rate": 0.0001525486381557329, "loss": 1.0316, "step": 7477 }, { "epoch": 0.72, "grad_norm": 0.2928951184842569, "learning_rate": 0.00015253517824376838, "loss": 1.0514, "step": 7478 }, { "epoch": 0.72, "grad_norm": 0.26365816516744844, "learning_rate": 0.00015252171701707798, "loss": 1.0454, "step": 7479 }, { "epoch": 0.72, "grad_norm": 0.28372734957336343, "learning_rate": 0.00015250825447599863, "loss": 1.0397, "step": 7480 }, { "epoch": 0.72, "grad_norm": 0.24476440536696728, "learning_rate": 0.0001524947906208672, "loss": 1.1185, "step": 7481 }, { "epoch": 0.72, "grad_norm": 0.29553606635449614, "learning_rate": 0.00015248132545202066, "loss": 1.0536, "step": 7482 }, { "epoch": 0.72, "grad_norm": 0.28192161411982414, "learning_rate": 0.00015246785896979592, "loss": 1.0789, "step": 7483 }, { "epoch": 0.72, "grad_norm": 0.28836399016991415, "learning_rate": 0.00015245439117453005, "loss": 1.1376, "step": 7484 }, { "epoch": 0.72, "grad_norm": 0.285252586401288, "learning_rate": 0.00015244092206656012, "loss": 1.0715, "step": 7485 }, { "epoch": 0.72, "grad_norm": 0.34180059296617976, "learning_rate": 0.0001524274516462231, "loss": 1.0974, "step": 7486 }, { "epoch": 0.72, "grad_norm": 0.28411030591653186, "learning_rate": 0.0001524139799138562, "loss": 1.1046, "step": 7487 }, { "epoch": 0.72, "grad_norm": 0.2688829534104327, "learning_rate": 0.00015240050686979648, "loss": 1.0534, "step": 7488 }, { "epoch": 0.72, "grad_norm": 0.3171051572047796, "learning_rate": 0.00015238703251438116, "loss": 0.9673, "step": 7489 }, { "epoch": 0.72, "grad_norm": 0.2715311978491321, "learning_rate": 0.00015237355684794742, "loss": 1.0009, "step": 7490 }, { "epoch": 0.72, "grad_norm": 0.3107037826927023, "learning_rate": 0.0001523600798708325, "loss": 1.1782, "step": 7491 }, { "epoch": 0.72, "grad_norm": 0.3004283097955692, "learning_rate": 0.00015234660158337367, "loss": 1.1222, "step": 7492 }, { "epoch": 0.72, "grad_norm": 0.34256065220175824, "learning_rate": 0.00015233312198590824, "loss": 1.1572, "step": 7493 }, { "epoch": 0.72, "grad_norm": 0.30264641686373545, "learning_rate": 0.00015231964107877355, "loss": 0.9944, "step": 7494 }, { "epoch": 0.72, "grad_norm": 0.29115852098449335, "learning_rate": 0.00015230615886230696, "loss": 1.0331, "step": 7495 }, { "epoch": 0.72, "grad_norm": 0.2663234013844971, "learning_rate": 0.00015229267533684588, "loss": 0.9686, "step": 7496 }, { "epoch": 0.72, "grad_norm": 0.2706122042940165, "learning_rate": 0.00015227919050272775, "loss": 1.029, "step": 7497 }, { "epoch": 0.72, "grad_norm": 0.28192878540642374, "learning_rate": 0.00015226570436028996, "loss": 1.0831, "step": 7498 }, { "epoch": 0.72, "grad_norm": 0.3136140113978119, "learning_rate": 0.00015225221690987013, "loss": 1.1534, "step": 7499 }, { "epoch": 0.72, "grad_norm": 0.29044795123500067, "learning_rate": 0.0001522387281518057, "loss": 1.0986, "step": 7500 }, { "epoch": 0.72, "grad_norm": 0.25709365283504254, "learning_rate": 0.00015222523808643428, "loss": 1.1544, "step": 7501 }, { "epoch": 0.72, "grad_norm": 0.27642043583466885, "learning_rate": 0.00015221174671409347, "loss": 1.0419, "step": 7502 }, { "epoch": 0.72, "grad_norm": 0.28235849443833444, "learning_rate": 0.00015219825403512086, "loss": 1.1054, "step": 7503 }, { "epoch": 0.72, "grad_norm": 0.3347988942349422, "learning_rate": 0.00015218476004985414, "loss": 1.1444, "step": 7504 }, { "epoch": 0.72, "grad_norm": 0.28011799381802555, "learning_rate": 0.000152171264758631, "loss": 1.0586, "step": 7505 }, { "epoch": 0.72, "grad_norm": 0.29631516225770976, "learning_rate": 0.00015215776816178918, "loss": 1.066, "step": 7506 }, { "epoch": 0.72, "grad_norm": 0.312587038609111, "learning_rate": 0.00015214427025966642, "loss": 1.06, "step": 7507 }, { "epoch": 0.72, "grad_norm": 0.26267971626273046, "learning_rate": 0.00015213077105260053, "loss": 0.99, "step": 7508 }, { "epoch": 0.72, "grad_norm": 0.29362317699509005, "learning_rate": 0.00015211727054092932, "loss": 1.0374, "step": 7509 }, { "epoch": 0.72, "grad_norm": 0.2743031690741454, "learning_rate": 0.00015210376872499068, "loss": 0.9676, "step": 7510 }, { "epoch": 0.72, "grad_norm": 0.3008169494678444, "learning_rate": 0.0001520902656051225, "loss": 1.0962, "step": 7511 }, { "epoch": 0.72, "grad_norm": 0.2510179331269226, "learning_rate": 0.00015207676118166266, "loss": 1.0657, "step": 7512 }, { "epoch": 0.72, "grad_norm": 0.3006570781756079, "learning_rate": 0.00015206325545494913, "loss": 1.1252, "step": 7513 }, { "epoch": 0.72, "grad_norm": 0.3074556798522242, "learning_rate": 0.00015204974842531995, "loss": 1.1561, "step": 7514 }, { "epoch": 0.72, "grad_norm": 0.3251122148768213, "learning_rate": 0.00015203624009311307, "loss": 1.1283, "step": 7515 }, { "epoch": 0.72, "grad_norm": 0.2595576229638888, "learning_rate": 0.0001520227304586666, "loss": 0.9711, "step": 7516 }, { "epoch": 0.72, "grad_norm": 0.29928452954008067, "learning_rate": 0.00015200921952231858, "loss": 1.0824, "step": 7517 }, { "epoch": 0.72, "grad_norm": 0.3139867616135075, "learning_rate": 0.0001519957072844072, "loss": 1.1125, "step": 7518 }, { "epoch": 0.72, "grad_norm": 0.29374530619599887, "learning_rate": 0.00015198219374527053, "loss": 1.1183, "step": 7519 }, { "epoch": 0.72, "grad_norm": 0.2952280509086445, "learning_rate": 0.00015196867890524676, "loss": 1.0937, "step": 7520 }, { "epoch": 0.72, "grad_norm": 0.2826852773745996, "learning_rate": 0.00015195516276467422, "loss": 1.1857, "step": 7521 }, { "epoch": 0.72, "grad_norm": 0.2803858395978367, "learning_rate": 0.000151941645323891, "loss": 0.9876, "step": 7522 }, { "epoch": 0.72, "grad_norm": 0.27577711685224354, "learning_rate": 0.00015192812658323552, "loss": 1.0477, "step": 7523 }, { "epoch": 0.72, "grad_norm": 0.29937814744106744, "learning_rate": 0.00015191460654304602, "loss": 1.0781, "step": 7524 }, { "epoch": 0.72, "grad_norm": 0.344475738203272, "learning_rate": 0.00015190108520366085, "loss": 1.0677, "step": 7525 }, { "epoch": 0.72, "grad_norm": 0.269619028961468, "learning_rate": 0.00015188756256541842, "loss": 1.1207, "step": 7526 }, { "epoch": 0.72, "grad_norm": 0.2856659171795633, "learning_rate": 0.0001518740386286571, "loss": 0.9971, "step": 7527 }, { "epoch": 0.72, "grad_norm": 0.2690448461790446, "learning_rate": 0.0001518605133937154, "loss": 1.0434, "step": 7528 }, { "epoch": 0.72, "grad_norm": 0.3153664787777255, "learning_rate": 0.00015184698686093173, "loss": 1.1609, "step": 7529 }, { "epoch": 0.72, "grad_norm": 0.31002810501031186, "learning_rate": 0.00015183345903064467, "loss": 1.0097, "step": 7530 }, { "epoch": 0.72, "grad_norm": 0.2764530793093042, "learning_rate": 0.00015181992990319265, "loss": 1.0459, "step": 7531 }, { "epoch": 0.72, "grad_norm": 0.2581950187662785, "learning_rate": 0.00015180639947891437, "loss": 1.0612, "step": 7532 }, { "epoch": 0.72, "grad_norm": 0.3228690135774238, "learning_rate": 0.0001517928677581484, "loss": 1.1375, "step": 7533 }, { "epoch": 0.72, "grad_norm": 0.2830701920026826, "learning_rate": 0.0001517793347412333, "loss": 0.9702, "step": 7534 }, { "epoch": 0.72, "grad_norm": 0.30970817365242975, "learning_rate": 0.00015176580042850787, "loss": 1.0011, "step": 7535 }, { "epoch": 0.72, "grad_norm": 0.2879799004430592, "learning_rate": 0.00015175226482031073, "loss": 1.0469, "step": 7536 }, { "epoch": 0.72, "grad_norm": 0.2721982692551152, "learning_rate": 0.0001517387279169806, "loss": 1.0661, "step": 7537 }, { "epoch": 0.72, "grad_norm": 0.30110301529460076, "learning_rate": 0.00015172518971885634, "loss": 1.0512, "step": 7538 }, { "epoch": 0.72, "grad_norm": 0.26509470901065213, "learning_rate": 0.00015171165022627667, "loss": 1.0675, "step": 7539 }, { "epoch": 0.72, "grad_norm": 0.29294036548567454, "learning_rate": 0.00015169810943958044, "loss": 1.12, "step": 7540 }, { "epoch": 0.72, "grad_norm": 0.2756337326490096, "learning_rate": 0.00015168456735910657, "loss": 1.0387, "step": 7541 }, { "epoch": 0.72, "grad_norm": 0.2974971175746808, "learning_rate": 0.0001516710239851939, "loss": 1.1836, "step": 7542 }, { "epoch": 0.72, "grad_norm": 0.31089357056437184, "learning_rate": 0.0001516574793181814, "loss": 1.1023, "step": 7543 }, { "epoch": 0.72, "grad_norm": 0.30077365644294696, "learning_rate": 0.00015164393335840798, "loss": 0.9271, "step": 7544 }, { "epoch": 0.72, "grad_norm": 0.2565124797415133, "learning_rate": 0.00015163038610621269, "loss": 1.0294, "step": 7545 }, { "epoch": 0.72, "grad_norm": 0.2826401399220724, "learning_rate": 0.00015161683756193456, "loss": 1.0563, "step": 7546 }, { "epoch": 0.72, "grad_norm": 0.30362864947281726, "learning_rate": 0.00015160328772591256, "loss": 1.1748, "step": 7547 }, { "epoch": 0.72, "grad_norm": 0.29370524398637515, "learning_rate": 0.00015158973659848592, "loss": 1.0426, "step": 7548 }, { "epoch": 0.72, "grad_norm": 0.2684686193459749, "learning_rate": 0.00015157618417999366, "loss": 1.0464, "step": 7549 }, { "epoch": 0.72, "grad_norm": 0.2761867607331919, "learning_rate": 0.000151562630470775, "loss": 1.0953, "step": 7550 }, { "epoch": 0.72, "grad_norm": 0.25846363686227103, "learning_rate": 0.0001515490754711691, "loss": 1.061, "step": 7551 }, { "epoch": 0.72, "grad_norm": 0.2642419825902769, "learning_rate": 0.0001515355191815152, "loss": 1.1347, "step": 7552 }, { "epoch": 0.72, "grad_norm": 0.2559409653328977, "learning_rate": 0.00015152196160215253, "loss": 1.0638, "step": 7553 }, { "epoch": 0.72, "grad_norm": 0.28318731458349305, "learning_rate": 0.00015150840273342038, "loss": 1.0592, "step": 7554 }, { "epoch": 0.72, "grad_norm": 0.2850162725399112, "learning_rate": 0.00015149484257565813, "loss": 1.1141, "step": 7555 }, { "epoch": 0.72, "grad_norm": 0.26118830684653604, "learning_rate": 0.000151481281129205, "loss": 1.0173, "step": 7556 }, { "epoch": 0.72, "grad_norm": 0.27293247877399707, "learning_rate": 0.0001514677183944005, "loss": 1.0846, "step": 7557 }, { "epoch": 0.72, "grad_norm": 0.304675314149257, "learning_rate": 0.00015145415437158401, "loss": 1.1428, "step": 7558 }, { "epoch": 0.72, "grad_norm": 0.2972819357482946, "learning_rate": 0.00015144058906109496, "loss": 1.0655, "step": 7559 }, { "epoch": 0.72, "grad_norm": 0.28299905657676133, "learning_rate": 0.00015142702246327287, "loss": 1.0705, "step": 7560 }, { "epoch": 0.72, "grad_norm": 0.29542732633039054, "learning_rate": 0.00015141345457845716, "loss": 1.0193, "step": 7561 }, { "epoch": 0.72, "grad_norm": 0.2663372553374203, "learning_rate": 0.00015139988540698748, "loss": 0.9831, "step": 7562 }, { "epoch": 0.72, "grad_norm": 0.25393991962427676, "learning_rate": 0.00015138631494920337, "loss": 1.1387, "step": 7563 }, { "epoch": 0.72, "grad_norm": 0.27744832277420794, "learning_rate": 0.00015137274320544438, "loss": 1.1088, "step": 7564 }, { "epoch": 0.72, "grad_norm": 0.27311375703469265, "learning_rate": 0.00015135917017605025, "loss": 1.092, "step": 7565 }, { "epoch": 0.72, "grad_norm": 0.3264586447066704, "learning_rate": 0.0001513455958613606, "loss": 1.1544, "step": 7566 }, { "epoch": 0.72, "grad_norm": 0.29926443610547104, "learning_rate": 0.00015133202026171514, "loss": 1.0584, "step": 7567 }, { "epoch": 0.72, "grad_norm": 0.2945733521922124, "learning_rate": 0.00015131844337745362, "loss": 0.9397, "step": 7568 }, { "epoch": 0.72, "grad_norm": 0.2730547592785977, "learning_rate": 0.00015130486520891582, "loss": 0.9991, "step": 7569 }, { "epoch": 0.72, "grad_norm": 0.30119301432734397, "learning_rate": 0.00015129128575644147, "loss": 1.0843, "step": 7570 }, { "epoch": 0.72, "grad_norm": 0.30645684857055866, "learning_rate": 0.00015127770502037052, "loss": 1.0142, "step": 7571 }, { "epoch": 0.72, "grad_norm": 0.2821703954502372, "learning_rate": 0.00015126412300104272, "loss": 0.982, "step": 7572 }, { "epoch": 0.72, "grad_norm": 0.27920328503608616, "learning_rate": 0.00015125053969879807, "loss": 1.0294, "step": 7573 }, { "epoch": 0.72, "grad_norm": 0.2385258107376846, "learning_rate": 0.0001512369551139764, "loss": 1.0406, "step": 7574 }, { "epoch": 0.72, "grad_norm": 0.2972641315163676, "learning_rate": 0.0001512233692469178, "loss": 1.1835, "step": 7575 }, { "epoch": 0.72, "grad_norm": 0.2602247995915003, "learning_rate": 0.00015120978209796213, "loss": 1.0921, "step": 7576 }, { "epoch": 0.72, "grad_norm": 0.27839769616677684, "learning_rate": 0.00015119619366744952, "loss": 1.099, "step": 7577 }, { "epoch": 0.72, "grad_norm": 0.316376234085856, "learning_rate": 0.00015118260395571994, "loss": 1.0387, "step": 7578 }, { "epoch": 0.73, "grad_norm": 0.31660368584142895, "learning_rate": 0.00015116901296311356, "loss": 1.1306, "step": 7579 }, { "epoch": 0.73, "grad_norm": 0.260900438740011, "learning_rate": 0.00015115542068997047, "loss": 0.9071, "step": 7580 }, { "epoch": 0.73, "grad_norm": 0.2624881211285536, "learning_rate": 0.00015114182713663082, "loss": 1.1411, "step": 7581 }, { "epoch": 0.73, "grad_norm": 0.257486571814327, "learning_rate": 0.00015112823230343484, "loss": 1.0307, "step": 7582 }, { "epoch": 0.73, "grad_norm": 0.2823321215311315, "learning_rate": 0.00015111463619072265, "loss": 1.1471, "step": 7583 }, { "epoch": 0.73, "grad_norm": 0.30117635588768243, "learning_rate": 0.00015110103879883462, "loss": 1.1452, "step": 7584 }, { "epoch": 0.73, "grad_norm": 0.3022579959192287, "learning_rate": 0.00015108744012811096, "loss": 1.0584, "step": 7585 }, { "epoch": 0.73, "grad_norm": 0.29740329090227624, "learning_rate": 0.000151073840178892, "loss": 1.0415, "step": 7586 }, { "epoch": 0.73, "grad_norm": 0.29372205328515927, "learning_rate": 0.0001510602389515181, "loss": 1.1383, "step": 7587 }, { "epoch": 0.73, "grad_norm": 0.2920566881167881, "learning_rate": 0.00015104663644632962, "loss": 1.0683, "step": 7588 }, { "epoch": 0.73, "grad_norm": 0.22058431176486398, "learning_rate": 0.000151033032663667, "loss": 1.0229, "step": 7589 }, { "epoch": 0.73, "grad_norm": 0.25213010351059206, "learning_rate": 0.00015101942760387065, "loss": 1.1396, "step": 7590 }, { "epoch": 0.73, "grad_norm": 0.25918222437239363, "learning_rate": 0.00015100582126728105, "loss": 1.0704, "step": 7591 }, { "epoch": 0.73, "grad_norm": 0.2961632170304152, "learning_rate": 0.00015099221365423872, "loss": 1.1117, "step": 7592 }, { "epoch": 0.73, "grad_norm": 0.28187090698557027, "learning_rate": 0.0001509786047650842, "loss": 0.9727, "step": 7593 }, { "epoch": 0.73, "grad_norm": 0.2892636190812079, "learning_rate": 0.00015096499460015805, "loss": 1.013, "step": 7594 }, { "epoch": 0.73, "grad_norm": 0.28236820071622526, "learning_rate": 0.0001509513831598009, "loss": 0.9727, "step": 7595 }, { "epoch": 0.73, "grad_norm": 0.2722476913875187, "learning_rate": 0.00015093777044435333, "loss": 1.0157, "step": 7596 }, { "epoch": 0.73, "grad_norm": 0.2652646040859842, "learning_rate": 0.00015092415645415606, "loss": 1.0091, "step": 7597 }, { "epoch": 0.73, "grad_norm": 0.28143692367142764, "learning_rate": 0.00015091054118954978, "loss": 1.0371, "step": 7598 }, { "epoch": 0.73, "grad_norm": 0.2767178043247685, "learning_rate": 0.0001508969246508752, "loss": 1.0008, "step": 7599 }, { "epoch": 0.73, "grad_norm": 0.25719222763437416, "learning_rate": 0.0001508833068384731, "loss": 1.0056, "step": 7600 }, { "epoch": 0.73, "grad_norm": 0.30194394764309046, "learning_rate": 0.00015086968775268427, "loss": 1.0588, "step": 7601 }, { "epoch": 0.73, "grad_norm": 0.2775361029431778, "learning_rate": 0.00015085606739384953, "loss": 1.1844, "step": 7602 }, { "epoch": 0.73, "grad_norm": 0.2529941931845389, "learning_rate": 0.00015084244576230976, "loss": 0.9901, "step": 7603 }, { "epoch": 0.73, "grad_norm": 0.3165846646756356, "learning_rate": 0.00015082882285840578, "loss": 0.997, "step": 7604 }, { "epoch": 0.73, "grad_norm": 0.2782657643449699, "learning_rate": 0.0001508151986824786, "loss": 1.0908, "step": 7605 }, { "epoch": 0.73, "grad_norm": 0.2781877735281842, "learning_rate": 0.00015080157323486915, "loss": 1.1314, "step": 7606 }, { "epoch": 0.73, "grad_norm": 0.281042713599222, "learning_rate": 0.00015078794651591837, "loss": 1.0458, "step": 7607 }, { "epoch": 0.73, "grad_norm": 0.27849574824675116, "learning_rate": 0.0001507743185259673, "loss": 1.1229, "step": 7608 }, { "epoch": 0.73, "grad_norm": 0.2759377287295315, "learning_rate": 0.00015076068926535706, "loss": 1.044, "step": 7609 }, { "epoch": 0.73, "grad_norm": 0.2821493820751014, "learning_rate": 0.00015074705873442863, "loss": 1.0843, "step": 7610 }, { "epoch": 0.73, "grad_norm": 0.2607548413248963, "learning_rate": 0.00015073342693352316, "loss": 1.0291, "step": 7611 }, { "epoch": 0.73, "grad_norm": 0.3107914480290541, "learning_rate": 0.0001507197938629818, "loss": 1.1632, "step": 7612 }, { "epoch": 0.73, "grad_norm": 0.2644427769663581, "learning_rate": 0.0001507061595231457, "loss": 1.112, "step": 7613 }, { "epoch": 0.73, "grad_norm": 0.29569519438471115, "learning_rate": 0.00015069252391435614, "loss": 0.8825, "step": 7614 }, { "epoch": 0.73, "grad_norm": 0.2725562040988613, "learning_rate": 0.00015067888703695426, "loss": 0.975, "step": 7615 }, { "epoch": 0.73, "grad_norm": 0.27141691407540564, "learning_rate": 0.00015066524889128139, "loss": 1.1129, "step": 7616 }, { "epoch": 0.73, "grad_norm": 0.286170558294994, "learning_rate": 0.00015065160947767887, "loss": 1.1549, "step": 7617 }, { "epoch": 0.73, "grad_norm": 0.28827191966294335, "learning_rate": 0.00015063796879648793, "loss": 1.0636, "step": 7618 }, { "epoch": 0.73, "grad_norm": 0.34763534936988866, "learning_rate": 0.00015062432684805, "loss": 1.0821, "step": 7619 }, { "epoch": 0.73, "grad_norm": 0.2928541057535552, "learning_rate": 0.00015061068363270654, "loss": 0.9746, "step": 7620 }, { "epoch": 0.73, "grad_norm": 0.22676940791019204, "learning_rate": 0.00015059703915079888, "loss": 0.9351, "step": 7621 }, { "epoch": 0.73, "grad_norm": 0.2932641803272752, "learning_rate": 0.0001505833934026685, "loss": 1.169, "step": 7622 }, { "epoch": 0.73, "grad_norm": 0.2545708248714656, "learning_rate": 0.0001505697463886569, "loss": 1.0129, "step": 7623 }, { "epoch": 0.73, "grad_norm": 0.31554332098638554, "learning_rate": 0.00015055609810910565, "loss": 1.0664, "step": 7624 }, { "epoch": 0.73, "grad_norm": 0.3159256467723614, "learning_rate": 0.00015054244856435624, "loss": 1.1755, "step": 7625 }, { "epoch": 0.73, "grad_norm": 0.25618170107349153, "learning_rate": 0.0001505287977547503, "loss": 1.0721, "step": 7626 }, { "epoch": 0.73, "grad_norm": 0.3162579358667636, "learning_rate": 0.00015051514568062947, "loss": 0.958, "step": 7627 }, { "epoch": 0.73, "grad_norm": 0.25894438378797824, "learning_rate": 0.00015050149234233532, "loss": 1.0019, "step": 7628 }, { "epoch": 0.73, "grad_norm": 0.2852760760049263, "learning_rate": 0.00015048783774020962, "loss": 1.1271, "step": 7629 }, { "epoch": 0.73, "grad_norm": 0.3912607456759231, "learning_rate": 0.00015047418187459405, "loss": 1.1038, "step": 7630 }, { "epoch": 0.73, "grad_norm": 0.322140496874572, "learning_rate": 0.00015046052474583033, "loss": 1.0758, "step": 7631 }, { "epoch": 0.73, "grad_norm": 0.3229270016075276, "learning_rate": 0.0001504468663542603, "loss": 1.0946, "step": 7632 }, { "epoch": 0.73, "grad_norm": 0.3081581987259254, "learning_rate": 0.0001504332067002257, "loss": 1.0036, "step": 7633 }, { "epoch": 0.73, "grad_norm": 0.29552024618919837, "learning_rate": 0.00015041954578406844, "loss": 1.0949, "step": 7634 }, { "epoch": 0.73, "grad_norm": 0.2963238428604876, "learning_rate": 0.00015040588360613034, "loss": 1.0493, "step": 7635 }, { "epoch": 0.73, "grad_norm": 0.29301025689881727, "learning_rate": 0.00015039222016675332, "loss": 1.0601, "step": 7636 }, { "epoch": 0.73, "grad_norm": 0.31243960952554717, "learning_rate": 0.0001503785554662793, "loss": 1.0141, "step": 7637 }, { "epoch": 0.73, "grad_norm": 0.2810933699464373, "learning_rate": 0.00015036488950505032, "loss": 0.9789, "step": 7638 }, { "epoch": 0.73, "grad_norm": 0.2800782903010916, "learning_rate": 0.0001503512222834083, "loss": 1.0996, "step": 7639 }, { "epoch": 0.73, "grad_norm": 0.2737931904759353, "learning_rate": 0.0001503375538016953, "loss": 1.0249, "step": 7640 }, { "epoch": 0.73, "grad_norm": 0.28069227541654734, "learning_rate": 0.0001503238840602534, "loss": 0.9117, "step": 7641 }, { "epoch": 0.73, "grad_norm": 0.26949283739174246, "learning_rate": 0.00015031021305942464, "loss": 1.0084, "step": 7642 }, { "epoch": 0.73, "grad_norm": 0.31475584607752194, "learning_rate": 0.00015029654079955118, "loss": 1.0414, "step": 7643 }, { "epoch": 0.73, "grad_norm": 0.28698683483189275, "learning_rate": 0.0001502828672809752, "loss": 1.0434, "step": 7644 }, { "epoch": 0.73, "grad_norm": 0.25256982279011375, "learning_rate": 0.00015026919250403883, "loss": 1.0292, "step": 7645 }, { "epoch": 0.73, "grad_norm": 0.29577865157695005, "learning_rate": 0.00015025551646908437, "loss": 1.1659, "step": 7646 }, { "epoch": 0.73, "grad_norm": 0.2728622137260916, "learning_rate": 0.000150241839176454, "loss": 1.0527, "step": 7647 }, { "epoch": 0.73, "grad_norm": 0.30338258907392546, "learning_rate": 0.00015022816062649005, "loss": 1.1148, "step": 7648 }, { "epoch": 0.73, "grad_norm": 0.26931970248244586, "learning_rate": 0.0001502144808195348, "loss": 1.1357, "step": 7649 }, { "epoch": 0.73, "grad_norm": 0.2742596946281973, "learning_rate": 0.0001502007997559306, "loss": 1.0646, "step": 7650 }, { "epoch": 0.73, "grad_norm": 0.3011532902374543, "learning_rate": 0.00015018711743601984, "loss": 1.1351, "step": 7651 }, { "epoch": 0.73, "grad_norm": 0.2821557355247745, "learning_rate": 0.00015017343386014494, "loss": 1.0087, "step": 7652 }, { "epoch": 0.73, "grad_norm": 0.31425399583262464, "learning_rate": 0.0001501597490286483, "loss": 1.0384, "step": 7653 }, { "epoch": 0.73, "grad_norm": 0.2931669718113375, "learning_rate": 0.0001501460629418724, "loss": 1.0466, "step": 7654 }, { "epoch": 0.73, "grad_norm": 0.2875430271869409, "learning_rate": 0.0001501323756001598, "loss": 1.0878, "step": 7655 }, { "epoch": 0.73, "grad_norm": 0.2597750556495404, "learning_rate": 0.00015011868700385298, "loss": 1.0548, "step": 7656 }, { "epoch": 0.73, "grad_norm": 0.27705706713516576, "learning_rate": 0.00015010499715329453, "loss": 0.984, "step": 7657 }, { "epoch": 0.73, "grad_norm": 0.26538202477264977, "learning_rate": 0.00015009130604882702, "loss": 1.0417, "step": 7658 }, { "epoch": 0.73, "grad_norm": 0.28174584601024333, "learning_rate": 0.0001500776136907931, "loss": 1.0257, "step": 7659 }, { "epoch": 0.73, "grad_norm": 0.25036859344556583, "learning_rate": 0.00015006392007953543, "loss": 1.1059, "step": 7660 }, { "epoch": 0.73, "grad_norm": 0.27900752760462394, "learning_rate": 0.00015005022521539672, "loss": 1.1089, "step": 7661 }, { "epoch": 0.73, "grad_norm": 0.31107819751826155, "learning_rate": 0.0001500365290987196, "loss": 1.0433, "step": 7662 }, { "epoch": 0.73, "grad_norm": 0.2843202029978909, "learning_rate": 0.00015002283172984695, "loss": 1.0845, "step": 7663 }, { "epoch": 0.73, "grad_norm": 0.2589541502663174, "learning_rate": 0.00015000913310912148, "loss": 0.9485, "step": 7664 }, { "epoch": 0.73, "grad_norm": 0.300671018081089, "learning_rate": 0.00014999543323688603, "loss": 1.0407, "step": 7665 }, { "epoch": 0.73, "grad_norm": 0.27922119836694304, "learning_rate": 0.00014998173211348343, "loss": 0.9969, "step": 7666 }, { "epoch": 0.73, "grad_norm": 0.29425343441337976, "learning_rate": 0.0001499680297392566, "loss": 0.9274, "step": 7667 }, { "epoch": 0.73, "grad_norm": 0.2941232322325698, "learning_rate": 0.0001499543261145484, "loss": 1.0088, "step": 7668 }, { "epoch": 0.73, "grad_norm": 0.2914706729504272, "learning_rate": 0.0001499406212397018, "loss": 1.0058, "step": 7669 }, { "epoch": 0.73, "grad_norm": 0.2953576562328182, "learning_rate": 0.00014992691511505975, "loss": 1.0051, "step": 7670 }, { "epoch": 0.73, "grad_norm": 0.34306316772779066, "learning_rate": 0.0001499132077409653, "loss": 1.153, "step": 7671 }, { "epoch": 0.73, "grad_norm": 0.2679983439498134, "learning_rate": 0.0001498994991177614, "loss": 1.0104, "step": 7672 }, { "epoch": 0.73, "grad_norm": 0.2594539874622118, "learning_rate": 0.00014988578924579122, "loss": 1.1645, "step": 7673 }, { "epoch": 0.73, "grad_norm": 0.3048213376270825, "learning_rate": 0.0001498720781253978, "loss": 1.098, "step": 7674 }, { "epoch": 0.73, "grad_norm": 0.3077629403566935, "learning_rate": 0.0001498583657569243, "loss": 1.1484, "step": 7675 }, { "epoch": 0.73, "grad_norm": 0.31902620346830557, "learning_rate": 0.00014984465214071385, "loss": 1.009, "step": 7676 }, { "epoch": 0.73, "grad_norm": 0.29403015567312174, "learning_rate": 0.00014983093727710965, "loss": 0.9773, "step": 7677 }, { "epoch": 0.73, "grad_norm": 0.2548359871414429, "learning_rate": 0.00014981722116645495, "loss": 1.0751, "step": 7678 }, { "epoch": 0.73, "grad_norm": 0.2787506686083023, "learning_rate": 0.00014980350380909294, "loss": 1.1147, "step": 7679 }, { "epoch": 0.73, "grad_norm": 0.2992261122500776, "learning_rate": 0.00014978978520536698, "loss": 1.0858, "step": 7680 }, { "epoch": 0.73, "grad_norm": 0.3011621462376174, "learning_rate": 0.00014977606535562034, "loss": 1.046, "step": 7681 }, { "epoch": 0.73, "grad_norm": 0.2986573455838004, "learning_rate": 0.0001497623442601964, "loss": 1.0208, "step": 7682 }, { "epoch": 0.74, "grad_norm": 0.28656498019974624, "learning_rate": 0.00014974862191943848, "loss": 1.1759, "step": 7683 }, { "epoch": 0.74, "grad_norm": 0.2762485785593129, "learning_rate": 0.00014973489833369004, "loss": 1.1052, "step": 7684 }, { "epoch": 0.74, "grad_norm": 0.3083477893434321, "learning_rate": 0.00014972117350329456, "loss": 0.9617, "step": 7685 }, { "epoch": 0.74, "grad_norm": 0.3003280251753417, "learning_rate": 0.0001497074474285954, "loss": 1.029, "step": 7686 }, { "epoch": 0.74, "grad_norm": 0.2572025859628947, "learning_rate": 0.00014969372010993618, "loss": 1.2292, "step": 7687 }, { "epoch": 0.74, "grad_norm": 0.24791565205884394, "learning_rate": 0.00014967999154766036, "loss": 1.1202, "step": 7688 }, { "epoch": 0.74, "grad_norm": 0.2834724391112066, "learning_rate": 0.00014966626174211153, "loss": 1.1773, "step": 7689 }, { "epoch": 0.74, "grad_norm": 0.2793645427994029, "learning_rate": 0.0001496525306936333, "loss": 1.0002, "step": 7690 }, { "epoch": 0.74, "grad_norm": 0.2742995002805119, "learning_rate": 0.00014963879840256927, "loss": 1.1219, "step": 7691 }, { "epoch": 0.74, "grad_norm": 0.31255435123133335, "learning_rate": 0.0001496250648692631, "loss": 1.0062, "step": 7692 }, { "epoch": 0.74, "grad_norm": 0.3095875184013366, "learning_rate": 0.00014961133009405852, "loss": 1.0721, "step": 7693 }, { "epoch": 0.74, "grad_norm": 0.2804828390216687, "learning_rate": 0.00014959759407729922, "loss": 0.9511, "step": 7694 }, { "epoch": 0.74, "grad_norm": 0.28459275473311924, "learning_rate": 0.00014958385681932893, "loss": 0.8945, "step": 7695 }, { "epoch": 0.74, "grad_norm": 0.30693979635734253, "learning_rate": 0.00014957011832049147, "loss": 1.0574, "step": 7696 }, { "epoch": 0.74, "grad_norm": 0.2661866180961575, "learning_rate": 0.00014955637858113065, "loss": 1.1286, "step": 7697 }, { "epoch": 0.74, "grad_norm": 0.2748632910585887, "learning_rate": 0.00014954263760159033, "loss": 1.0264, "step": 7698 }, { "epoch": 0.74, "grad_norm": 0.2639660707256238, "learning_rate": 0.00014952889538221434, "loss": 0.9609, "step": 7699 }, { "epoch": 0.74, "grad_norm": 0.28950568503410234, "learning_rate": 0.00014951515192334665, "loss": 1.0243, "step": 7700 }, { "epoch": 0.74, "grad_norm": 0.2741181044596097, "learning_rate": 0.00014950140722533114, "loss": 1.1703, "step": 7701 }, { "epoch": 0.74, "grad_norm": 0.2939600698983923, "learning_rate": 0.0001494876612885118, "loss": 1.0576, "step": 7702 }, { "epoch": 0.74, "grad_norm": 0.27769959394242444, "learning_rate": 0.00014947391411323263, "loss": 1.032, "step": 7703 }, { "epoch": 0.74, "grad_norm": 0.27126944244428325, "learning_rate": 0.0001494601656998377, "loss": 0.9935, "step": 7704 }, { "epoch": 0.74, "grad_norm": 0.2842820549012656, "learning_rate": 0.000149446416048671, "loss": 1.1503, "step": 7705 }, { "epoch": 0.74, "grad_norm": 0.2669012700932571, "learning_rate": 0.0001494326651600767, "loss": 1.0303, "step": 7706 }, { "epoch": 0.74, "grad_norm": 0.30368700424850303, "learning_rate": 0.00014941891303439886, "loss": 1.0606, "step": 7707 }, { "epoch": 0.74, "grad_norm": 0.30976284634068807, "learning_rate": 0.0001494051596719817, "loss": 1.0383, "step": 7708 }, { "epoch": 0.74, "grad_norm": 0.3052315022073979, "learning_rate": 0.00014939140507316934, "loss": 1.0763, "step": 7709 }, { "epoch": 0.74, "grad_norm": 0.28582065805745044, "learning_rate": 0.00014937764923830598, "loss": 1.2036, "step": 7710 }, { "epoch": 0.74, "grad_norm": 0.2783946932750405, "learning_rate": 0.000149363892167736, "loss": 1.0478, "step": 7711 }, { "epoch": 0.74, "grad_norm": 0.2863324153725781, "learning_rate": 0.00014935013386180353, "loss": 1.0988, "step": 7712 }, { "epoch": 0.74, "grad_norm": 0.2785613479438333, "learning_rate": 0.00014933637432085295, "loss": 1.0189, "step": 7713 }, { "epoch": 0.74, "grad_norm": 0.28499263197797736, "learning_rate": 0.00014932261354522864, "loss": 1.1063, "step": 7714 }, { "epoch": 0.74, "grad_norm": 0.30255835930843594, "learning_rate": 0.00014930885153527492, "loss": 1.0056, "step": 7715 }, { "epoch": 0.74, "grad_norm": 0.23887877046224554, "learning_rate": 0.00014929508829133616, "loss": 1.0982, "step": 7716 }, { "epoch": 0.74, "grad_norm": 0.25941492372004377, "learning_rate": 0.0001492813238137569, "loss": 1.0775, "step": 7717 }, { "epoch": 0.74, "grad_norm": 0.2840455310981083, "learning_rate": 0.00014926755810288147, "loss": 1.0016, "step": 7718 }, { "epoch": 0.74, "grad_norm": 0.3159294197735588, "learning_rate": 0.00014925379115905444, "loss": 1.1289, "step": 7719 }, { "epoch": 0.74, "grad_norm": 0.3001807062258611, "learning_rate": 0.00014924002298262034, "loss": 0.9867, "step": 7720 }, { "epoch": 0.74, "grad_norm": 0.2783216610430766, "learning_rate": 0.00014922625357392376, "loss": 1.0793, "step": 7721 }, { "epoch": 0.74, "grad_norm": 0.28020041552297037, "learning_rate": 0.00014921248293330922, "loss": 1.0532, "step": 7722 }, { "epoch": 0.74, "grad_norm": 0.3424716991146364, "learning_rate": 0.00014919871106112135, "loss": 1.0768, "step": 7723 }, { "epoch": 0.74, "grad_norm": 0.2779795066958149, "learning_rate": 0.00014918493795770482, "loss": 0.9554, "step": 7724 }, { "epoch": 0.74, "grad_norm": 0.3181269730793016, "learning_rate": 0.00014917116362340435, "loss": 1.0211, "step": 7725 }, { "epoch": 0.74, "grad_norm": 0.28616822041259976, "learning_rate": 0.00014915738805856458, "loss": 1.1136, "step": 7726 }, { "epoch": 0.74, "grad_norm": 0.25638910226945644, "learning_rate": 0.00014914361126353026, "loss": 0.9941, "step": 7727 }, { "epoch": 0.74, "grad_norm": 0.25181470798369615, "learning_rate": 0.0001491298332386462, "loss": 1.0635, "step": 7728 }, { "epoch": 0.74, "grad_norm": 0.279163837257328, "learning_rate": 0.0001491160539842572, "loss": 1.0245, "step": 7729 }, { "epoch": 0.74, "grad_norm": 0.28555043885157716, "learning_rate": 0.00014910227350070805, "loss": 1.1297, "step": 7730 }, { "epoch": 0.74, "grad_norm": 0.28507597908988613, "learning_rate": 0.00014908849178834366, "loss": 1.1125, "step": 7731 }, { "epoch": 0.74, "grad_norm": 0.28046175174288296, "learning_rate": 0.00014907470884750892, "loss": 1.1712, "step": 7732 }, { "epoch": 0.74, "grad_norm": 0.2841570962434119, "learning_rate": 0.00014906092467854875, "loss": 1.061, "step": 7733 }, { "epoch": 0.74, "grad_norm": 0.25589378852673406, "learning_rate": 0.00014904713928180806, "loss": 1.0623, "step": 7734 }, { "epoch": 0.74, "grad_norm": 0.29195274168710955, "learning_rate": 0.00014903335265763193, "loss": 1.008, "step": 7735 }, { "epoch": 0.74, "grad_norm": 0.2742605176860971, "learning_rate": 0.00014901956480636535, "loss": 1.153, "step": 7736 }, { "epoch": 0.74, "grad_norm": 0.28128186302274166, "learning_rate": 0.0001490057757283533, "loss": 1.0106, "step": 7737 }, { "epoch": 0.74, "grad_norm": 0.2829056481616711, "learning_rate": 0.00014899198542394094, "loss": 1.1351, "step": 7738 }, { "epoch": 0.74, "grad_norm": 0.2715236434676326, "learning_rate": 0.00014897819389347335, "loss": 1.0404, "step": 7739 }, { "epoch": 0.74, "grad_norm": 0.2911015197712587, "learning_rate": 0.00014896440113729568, "loss": 1.2098, "step": 7740 }, { "epoch": 0.74, "grad_norm": 0.2752066845807117, "learning_rate": 0.0001489506071557531, "loss": 1.0378, "step": 7741 }, { "epoch": 0.74, "grad_norm": 0.33207199501826795, "learning_rate": 0.00014893681194919084, "loss": 0.967, "step": 7742 }, { "epoch": 0.74, "grad_norm": 0.2838659301448611, "learning_rate": 0.00014892301551795408, "loss": 1.0683, "step": 7743 }, { "epoch": 0.74, "grad_norm": 0.29348859811094435, "learning_rate": 0.0001489092178623881, "loss": 1.1144, "step": 7744 }, { "epoch": 0.74, "grad_norm": 0.2682746013538995, "learning_rate": 0.00014889541898283821, "loss": 1.1194, "step": 7745 }, { "epoch": 0.74, "grad_norm": 0.30171726380766734, "learning_rate": 0.00014888161887964974, "loss": 1.023, "step": 7746 }, { "epoch": 0.74, "grad_norm": 0.25296517645030825, "learning_rate": 0.000148867817553168, "loss": 1.1727, "step": 7747 }, { "epoch": 0.74, "grad_norm": 0.31768749485918707, "learning_rate": 0.00014885401500373845, "loss": 1.0391, "step": 7748 }, { "epoch": 0.74, "grad_norm": 0.2438411570540729, "learning_rate": 0.0001488402112317065, "loss": 1.0145, "step": 7749 }, { "epoch": 0.74, "grad_norm": 0.3034976664176374, "learning_rate": 0.0001488264062374175, "loss": 1.1215, "step": 7750 }, { "epoch": 0.74, "grad_norm": 0.265851976673612, "learning_rate": 0.00014881260002121705, "loss": 0.9724, "step": 7751 }, { "epoch": 0.74, "grad_norm": 0.29764121681730704, "learning_rate": 0.00014879879258345057, "loss": 1.0749, "step": 7752 }, { "epoch": 0.74, "grad_norm": 0.2797392427717897, "learning_rate": 0.00014878498392446366, "loss": 0.8735, "step": 7753 }, { "epoch": 0.74, "grad_norm": 0.24366220050786846, "learning_rate": 0.00014877117404460185, "loss": 1.0055, "step": 7754 }, { "epoch": 0.74, "grad_norm": 0.28588196020457907, "learning_rate": 0.00014875736294421078, "loss": 1.16, "step": 7755 }, { "epoch": 0.74, "grad_norm": 0.30525823027606785, "learning_rate": 0.00014874355062363605, "loss": 1.1435, "step": 7756 }, { "epoch": 0.74, "grad_norm": 0.248479759938403, "learning_rate": 0.00014872973708322332, "loss": 1.0148, "step": 7757 }, { "epoch": 0.74, "grad_norm": 0.2705904252639747, "learning_rate": 0.00014871592232331833, "loss": 1.1533, "step": 7758 }, { "epoch": 0.74, "grad_norm": 0.2624337205195208, "learning_rate": 0.0001487021063442667, "loss": 0.9806, "step": 7759 }, { "epoch": 0.74, "grad_norm": 0.2526960220148543, "learning_rate": 0.00014868828914641431, "loss": 0.9377, "step": 7760 }, { "epoch": 0.74, "grad_norm": 0.2974913829902583, "learning_rate": 0.00014867447073010686, "loss": 1.0434, "step": 7761 }, { "epoch": 0.74, "grad_norm": 0.22591683924408296, "learning_rate": 0.0001486606510956902, "loss": 1.145, "step": 7762 }, { "epoch": 0.74, "grad_norm": 0.26106572494311425, "learning_rate": 0.00014864683024351017, "loss": 1.0687, "step": 7763 }, { "epoch": 0.74, "grad_norm": 0.2540501633396988, "learning_rate": 0.00014863300817391262, "loss": 1.0217, "step": 7764 }, { "epoch": 0.74, "grad_norm": 0.2801665117949245, "learning_rate": 0.0001486191848872435, "loss": 1.1218, "step": 7765 }, { "epoch": 0.74, "grad_norm": 0.3187353776069642, "learning_rate": 0.0001486053603838487, "loss": 1.0898, "step": 7766 }, { "epoch": 0.74, "grad_norm": 0.25763576552152534, "learning_rate": 0.0001485915346640742, "loss": 1.0585, "step": 7767 }, { "epoch": 0.74, "grad_norm": 0.2876319448148584, "learning_rate": 0.00014857770772826602, "loss": 1.0169, "step": 7768 }, { "epoch": 0.74, "grad_norm": 0.25997267525577705, "learning_rate": 0.0001485638795767702, "loss": 1.0577, "step": 7769 }, { "epoch": 0.74, "grad_norm": 0.2870532110656952, "learning_rate": 0.00014855005020993276, "loss": 1.1589, "step": 7770 }, { "epoch": 0.74, "grad_norm": 0.28744735564229734, "learning_rate": 0.00014853621962809975, "loss": 1.0786, "step": 7771 }, { "epoch": 0.74, "grad_norm": 0.26983452322850415, "learning_rate": 0.0001485223878316174, "loss": 1.2123, "step": 7772 }, { "epoch": 0.74, "grad_norm": 0.33765627577778073, "learning_rate": 0.00014850855482083177, "loss": 1.1044, "step": 7773 }, { "epoch": 0.74, "grad_norm": 0.272206284878232, "learning_rate": 0.00014849472059608906, "loss": 1.0212, "step": 7774 }, { "epoch": 0.74, "grad_norm": 0.29277937755205086, "learning_rate": 0.00014848088515773553, "loss": 1.1695, "step": 7775 }, { "epoch": 0.74, "grad_norm": 0.25356023491550367, "learning_rate": 0.00014846704850611736, "loss": 1.1737, "step": 7776 }, { "epoch": 0.74, "grad_norm": 0.3022516410233581, "learning_rate": 0.00014845321064158083, "loss": 1.0756, "step": 7777 }, { "epoch": 0.74, "grad_norm": 0.2530511912100064, "learning_rate": 0.00014843937156447227, "loss": 1.0036, "step": 7778 }, { "epoch": 0.74, "grad_norm": 0.3022412340923616, "learning_rate": 0.000148425531275138, "loss": 0.9716, "step": 7779 }, { "epoch": 0.74, "grad_norm": 0.2678799188134652, "learning_rate": 0.00014841168977392432, "loss": 1.1076, "step": 7780 }, { "epoch": 0.74, "grad_norm": 0.2963039241793785, "learning_rate": 0.00014839784706117775, "loss": 1.0797, "step": 7781 }, { "epoch": 0.74, "grad_norm": 0.27673978607030375, "learning_rate": 0.00014838400313724458, "loss": 1.1354, "step": 7782 }, { "epoch": 0.74, "grad_norm": 0.2599969777043114, "learning_rate": 0.00014837015800247137, "loss": 1.0195, "step": 7783 }, { "epoch": 0.74, "grad_norm": 0.28872142479078317, "learning_rate": 0.0001483563116572045, "loss": 0.9883, "step": 7784 }, { "epoch": 0.74, "grad_norm": 0.2728653682651342, "learning_rate": 0.0001483424641017906, "loss": 1.1095, "step": 7785 }, { "epoch": 0.74, "grad_norm": 0.3069290092163076, "learning_rate": 0.00014832861533657613, "loss": 1.093, "step": 7786 }, { "epoch": 0.74, "grad_norm": 0.25616829490396015, "learning_rate": 0.0001483147653619077, "loss": 1.1398, "step": 7787 }, { "epoch": 0.75, "grad_norm": 0.33842983259832143, "learning_rate": 0.00014830091417813188, "loss": 1.0526, "step": 7788 }, { "epoch": 0.75, "grad_norm": 0.29499869905887005, "learning_rate": 0.00014828706178559534, "loss": 1.1995, "step": 7789 }, { "epoch": 0.75, "grad_norm": 0.2867836759486996, "learning_rate": 0.00014827320818464474, "loss": 1.1038, "step": 7790 }, { "epoch": 0.75, "grad_norm": 0.27597630873723183, "learning_rate": 0.00014825935337562673, "loss": 1.1934, "step": 7791 }, { "epoch": 0.75, "grad_norm": 0.2566617689778122, "learning_rate": 0.0001482454973588881, "loss": 1.0383, "step": 7792 }, { "epoch": 0.75, "grad_norm": 0.28408572266180726, "learning_rate": 0.0001482316401347756, "loss": 0.9326, "step": 7793 }, { "epoch": 0.75, "grad_norm": 0.2745042716972083, "learning_rate": 0.00014821778170363595, "loss": 1.0336, "step": 7794 }, { "epoch": 0.75, "grad_norm": 0.3032701457073397, "learning_rate": 0.00014820392206581602, "loss": 1.0234, "step": 7795 }, { "epoch": 0.75, "grad_norm": 0.2751280009313665, "learning_rate": 0.00014819006122166267, "loss": 1.0432, "step": 7796 }, { "epoch": 0.75, "grad_norm": 0.3125193076347104, "learning_rate": 0.00014817619917152275, "loss": 1.1172, "step": 7797 }, { "epoch": 0.75, "grad_norm": 0.26116307629794683, "learning_rate": 0.00014816233591574313, "loss": 1.0669, "step": 7798 }, { "epoch": 0.75, "grad_norm": 0.28349617158355267, "learning_rate": 0.0001481484714546708, "loss": 1.1477, "step": 7799 }, { "epoch": 0.75, "grad_norm": 0.34282406819344863, "learning_rate": 0.00014813460578865274, "loss": 1.009, "step": 7800 }, { "epoch": 0.75, "grad_norm": 0.28578123221740864, "learning_rate": 0.00014812073891803587, "loss": 1.156, "step": 7801 }, { "epoch": 0.75, "grad_norm": 0.2980354742362817, "learning_rate": 0.0001481068708431673, "loss": 1.0768, "step": 7802 }, { "epoch": 0.75, "grad_norm": 0.3220759599695297, "learning_rate": 0.00014809300156439406, "loss": 1.0764, "step": 7803 }, { "epoch": 0.75, "grad_norm": 0.2640835765034414, "learning_rate": 0.00014807913108206322, "loss": 1.0768, "step": 7804 }, { "epoch": 0.75, "grad_norm": 0.27220178093073455, "learning_rate": 0.00014806525939652188, "loss": 1.1449, "step": 7805 }, { "epoch": 0.75, "grad_norm": 0.27363838878553143, "learning_rate": 0.00014805138650811724, "loss": 1.0526, "step": 7806 }, { "epoch": 0.75, "grad_norm": 0.2883264760303332, "learning_rate": 0.0001480375124171965, "loss": 1.0072, "step": 7807 }, { "epoch": 0.75, "grad_norm": 0.28230583793684294, "learning_rate": 0.00014802363712410673, "loss": 1.043, "step": 7808 }, { "epoch": 0.75, "grad_norm": 0.2615628239719908, "learning_rate": 0.00014800976062919532, "loss": 1.0759, "step": 7809 }, { "epoch": 0.75, "grad_norm": 0.29229275325827664, "learning_rate": 0.00014799588293280946, "loss": 1.0116, "step": 7810 }, { "epoch": 0.75, "grad_norm": 0.2738661601965768, "learning_rate": 0.00014798200403529646, "loss": 1.1214, "step": 7811 }, { "epoch": 0.75, "grad_norm": 0.2743653194914421, "learning_rate": 0.00014796812393700368, "loss": 1.0277, "step": 7812 }, { "epoch": 0.75, "grad_norm": 0.27052008019631224, "learning_rate": 0.00014795424263827842, "loss": 1.0564, "step": 7813 }, { "epoch": 0.75, "grad_norm": 0.30231813442957706, "learning_rate": 0.00014794036013946813, "loss": 1.1308, "step": 7814 }, { "epoch": 0.75, "grad_norm": 0.29045069706562976, "learning_rate": 0.00014792647644092016, "loss": 1.0502, "step": 7815 }, { "epoch": 0.75, "grad_norm": 0.2663099880383512, "learning_rate": 0.000147912591542982, "loss": 1.1342, "step": 7816 }, { "epoch": 0.75, "grad_norm": 0.2671935344264389, "learning_rate": 0.00014789870544600116, "loss": 1.0337, "step": 7817 }, { "epoch": 0.75, "grad_norm": 0.2376028787105682, "learning_rate": 0.00014788481815032509, "loss": 1.0951, "step": 7818 }, { "epoch": 0.75, "grad_norm": 0.2873919541494794, "learning_rate": 0.00014787092965630135, "loss": 1.0885, "step": 7819 }, { "epoch": 0.75, "grad_norm": 0.28326748757574954, "learning_rate": 0.00014785703996427754, "loss": 0.9765, "step": 7820 }, { "epoch": 0.75, "grad_norm": 0.26333898401252714, "learning_rate": 0.0001478431490746012, "loss": 0.9915, "step": 7821 }, { "epoch": 0.75, "grad_norm": 0.2638238903309792, "learning_rate": 0.00014782925698761997, "loss": 1.0908, "step": 7822 }, { "epoch": 0.75, "grad_norm": 0.30274071491597526, "learning_rate": 0.00014781536370368157, "loss": 1.0742, "step": 7823 }, { "epoch": 0.75, "grad_norm": 0.2610374416233597, "learning_rate": 0.0001478014692231336, "loss": 1.1589, "step": 7824 }, { "epoch": 0.75, "grad_norm": 0.312177588979833, "learning_rate": 0.00014778757354632382, "loss": 1.023, "step": 7825 }, { "epoch": 0.75, "grad_norm": 0.2817902834771327, "learning_rate": 0.00014777367667360002, "loss": 1.0925, "step": 7826 }, { "epoch": 0.75, "grad_norm": 0.24540218148082688, "learning_rate": 0.00014775977860530988, "loss": 0.9525, "step": 7827 }, { "epoch": 0.75, "grad_norm": 0.292375111038645, "learning_rate": 0.0001477458793418013, "loss": 1.0295, "step": 7828 }, { "epoch": 0.75, "grad_norm": 0.25821078004181164, "learning_rate": 0.0001477319788834221, "loss": 1.0538, "step": 7829 }, { "epoch": 0.75, "grad_norm": 0.26371249547752684, "learning_rate": 0.00014771807723052013, "loss": 1.0396, "step": 7830 }, { "epoch": 0.75, "grad_norm": 0.24548178512781826, "learning_rate": 0.00014770417438344325, "loss": 1.1311, "step": 7831 }, { "epoch": 0.75, "grad_norm": 0.28303419109838207, "learning_rate": 0.00014769027034253944, "loss": 1.0892, "step": 7832 }, { "epoch": 0.75, "grad_norm": 0.27687609304875627, "learning_rate": 0.00014767636510815667, "loss": 1.0869, "step": 7833 }, { "epoch": 0.75, "grad_norm": 0.28746479888055076, "learning_rate": 0.00014766245868064285, "loss": 1.1869, "step": 7834 }, { "epoch": 0.75, "grad_norm": 0.2879143036391422, "learning_rate": 0.00014764855106034607, "loss": 1.0875, "step": 7835 }, { "epoch": 0.75, "grad_norm": 0.2653922351709626, "learning_rate": 0.00014763464224761436, "loss": 1.1238, "step": 7836 }, { "epoch": 0.75, "grad_norm": 0.2748579199210656, "learning_rate": 0.00014762073224279578, "loss": 1.0366, "step": 7837 }, { "epoch": 0.75, "grad_norm": 0.29359776941173216, "learning_rate": 0.00014760682104623845, "loss": 0.9695, "step": 7838 }, { "epoch": 0.75, "grad_norm": 0.2671398097358774, "learning_rate": 0.00014759290865829053, "loss": 1.0089, "step": 7839 }, { "epoch": 0.75, "eval_loss": 1.1271681785583496, "eval_runtime": 4227.9142, "eval_samples_per_second": 19.778, "eval_steps_per_second": 2.472, "step": 7839 }, { "epoch": 0.75, "grad_norm": 0.28108668366507505, "learning_rate": 0.00014757899507930012, "loss": 1.1518, "step": 7840 }, { "epoch": 0.75, "grad_norm": 0.2889156834394568, "learning_rate": 0.00014756508030961543, "loss": 1.057, "step": 7841 }, { "epoch": 0.75, "grad_norm": 0.23620442034659647, "learning_rate": 0.00014755116434958477, "loss": 1.0139, "step": 7842 }, { "epoch": 0.75, "grad_norm": 0.26963408436529646, "learning_rate": 0.00014753724719955634, "loss": 1.2173, "step": 7843 }, { "epoch": 0.75, "grad_norm": 0.2930307296639112, "learning_rate": 0.0001475233288598784, "loss": 1.1191, "step": 7844 }, { "epoch": 0.75, "grad_norm": 0.26447900586343337, "learning_rate": 0.00014750940933089927, "loss": 1.0972, "step": 7845 }, { "epoch": 0.75, "grad_norm": 0.28229996008223884, "learning_rate": 0.00014749548861296734, "loss": 1.1229, "step": 7846 }, { "epoch": 0.75, "grad_norm": 0.2670260276637567, "learning_rate": 0.00014748156670643097, "loss": 1.2654, "step": 7847 }, { "epoch": 0.75, "grad_norm": 0.29233732430571957, "learning_rate": 0.00014746764361163854, "loss": 1.1009, "step": 7848 }, { "epoch": 0.75, "grad_norm": 0.2872352622058822, "learning_rate": 0.00014745371932893848, "loss": 1.0123, "step": 7849 }, { "epoch": 0.75, "grad_norm": 0.26320292513113624, "learning_rate": 0.00014743979385867928, "loss": 1.056, "step": 7850 }, { "epoch": 0.75, "grad_norm": 0.29353850314037555, "learning_rate": 0.00014742586720120943, "loss": 1.0711, "step": 7851 }, { "epoch": 0.75, "grad_norm": 0.24643873017384735, "learning_rate": 0.00014741193935687743, "loss": 0.942, "step": 7852 }, { "epoch": 0.75, "grad_norm": 0.30839731486986416, "learning_rate": 0.00014739801032603186, "loss": 0.932, "step": 7853 }, { "epoch": 0.75, "grad_norm": 0.25565523985069327, "learning_rate": 0.0001473840801090213, "loss": 1.0941, "step": 7854 }, { "epoch": 0.75, "grad_norm": 0.26831609426984876, "learning_rate": 0.00014737014870619438, "loss": 1.079, "step": 7855 }, { "epoch": 0.75, "grad_norm": 0.2782662024037316, "learning_rate": 0.00014735621611789963, "loss": 1.085, "step": 7856 }, { "epoch": 0.75, "grad_norm": 0.29723517036293495, "learning_rate": 0.0001473422823444859, "loss": 1.0157, "step": 7857 }, { "epoch": 0.75, "grad_norm": 0.26674243695427563, "learning_rate": 0.00014732834738630178, "loss": 1.0748, "step": 7858 }, { "epoch": 0.75, "grad_norm": 0.3178208105289837, "learning_rate": 0.00014731441124369598, "loss": 1.047, "step": 7859 }, { "epoch": 0.75, "grad_norm": 0.3797982501806393, "learning_rate": 0.00014730047391701737, "loss": 0.9933, "step": 7860 }, { "epoch": 0.75, "grad_norm": 0.2901368038846893, "learning_rate": 0.00014728653540661463, "loss": 1.0821, "step": 7861 }, { "epoch": 0.75, "grad_norm": 0.2955915921047405, "learning_rate": 0.0001472725957128366, "loss": 0.9773, "step": 7862 }, { "epoch": 0.75, "grad_norm": 0.29647306511427657, "learning_rate": 0.00014725865483603218, "loss": 1.1752, "step": 7863 }, { "epoch": 0.75, "grad_norm": 0.2918367246052348, "learning_rate": 0.00014724471277655023, "loss": 0.9763, "step": 7864 }, { "epoch": 0.75, "grad_norm": 0.2815720894283948, "learning_rate": 0.00014723076953473965, "loss": 1.098, "step": 7865 }, { "epoch": 0.75, "grad_norm": 0.2930097766676262, "learning_rate": 0.00014721682511094935, "loss": 1.0655, "step": 7866 }, { "epoch": 0.75, "grad_norm": 0.29679633835988606, "learning_rate": 0.00014720287950552836, "loss": 1.103, "step": 7867 }, { "epoch": 0.75, "grad_norm": 0.29540666426295414, "learning_rate": 0.00014718893271882562, "loss": 1.1377, "step": 7868 }, { "epoch": 0.75, "grad_norm": 0.28866732431657105, "learning_rate": 0.0001471749847511902, "loss": 1.1552, "step": 7869 }, { "epoch": 0.75, "grad_norm": 0.2897250322336478, "learning_rate": 0.00014716103560297116, "loss": 1.1178, "step": 7870 }, { "epoch": 0.75, "grad_norm": 0.2871031890874088, "learning_rate": 0.00014714708527451752, "loss": 1.0608, "step": 7871 }, { "epoch": 0.75, "grad_norm": 0.2660012537881913, "learning_rate": 0.00014713313376617845, "loss": 0.9785, "step": 7872 }, { "epoch": 0.75, "grad_norm": 0.30288071506941316, "learning_rate": 0.00014711918107830312, "loss": 1.0374, "step": 7873 }, { "epoch": 0.75, "grad_norm": 0.2623082689214895, "learning_rate": 0.00014710522721124062, "loss": 1.1882, "step": 7874 }, { "epoch": 0.75, "grad_norm": 0.29218912698161004, "learning_rate": 0.00014709127216534025, "loss": 1.1121, "step": 7875 }, { "epoch": 0.75, "grad_norm": 0.2926792107209431, "learning_rate": 0.00014707731594095118, "loss": 1.1558, "step": 7876 }, { "epoch": 0.75, "grad_norm": 0.3036786630685544, "learning_rate": 0.00014706335853842268, "loss": 1.1285, "step": 7877 }, { "epoch": 0.75, "grad_norm": 0.3070257247499537, "learning_rate": 0.00014704939995810408, "loss": 1.1566, "step": 7878 }, { "epoch": 0.75, "grad_norm": 0.2753834785951813, "learning_rate": 0.00014703544020034463, "loss": 1.1336, "step": 7879 }, { "epoch": 0.75, "grad_norm": 0.30835133105536705, "learning_rate": 0.00014702147926549377, "loss": 1.0733, "step": 7880 }, { "epoch": 0.75, "grad_norm": 0.30129377981823663, "learning_rate": 0.00014700751715390084, "loss": 1.1398, "step": 7881 }, { "epoch": 0.75, "grad_norm": 0.26568110306618115, "learning_rate": 0.00014699355386591523, "loss": 1.0773, "step": 7882 }, { "epoch": 0.75, "grad_norm": 0.28052397758769976, "learning_rate": 0.00014697958940188642, "loss": 1.0731, "step": 7883 }, { "epoch": 0.75, "grad_norm": 0.2696382620489903, "learning_rate": 0.00014696562376216381, "loss": 0.9844, "step": 7884 }, { "epoch": 0.75, "grad_norm": 0.2532216857709228, "learning_rate": 0.000146951656947097, "loss": 1.0863, "step": 7885 }, { "epoch": 0.75, "grad_norm": 0.2622266542497967, "learning_rate": 0.00014693768895703544, "loss": 1.0305, "step": 7886 }, { "epoch": 0.75, "grad_norm": 0.2800195946028661, "learning_rate": 0.00014692371979232872, "loss": 1.1075, "step": 7887 }, { "epoch": 0.75, "grad_norm": 0.2953080493463318, "learning_rate": 0.00014690974945332644, "loss": 0.9948, "step": 7888 }, { "epoch": 0.75, "grad_norm": 0.2886266231002391, "learning_rate": 0.00014689577794037818, "loss": 0.9764, "step": 7889 }, { "epoch": 0.75, "grad_norm": 0.29706661719229843, "learning_rate": 0.0001468818052538336, "loss": 1.055, "step": 7890 }, { "epoch": 0.75, "grad_norm": 0.28179682965161795, "learning_rate": 0.0001468678313940424, "loss": 1.0903, "step": 7891 }, { "epoch": 0.76, "grad_norm": 0.3030242128434006, "learning_rate": 0.00014685385636135426, "loss": 1.1486, "step": 7892 }, { "epoch": 0.76, "grad_norm": 0.2647061302202202, "learning_rate": 0.00014683988015611892, "loss": 1.0548, "step": 7893 }, { "epoch": 0.76, "grad_norm": 0.300733623893973, "learning_rate": 0.00014682590277868612, "loss": 1.1367, "step": 7894 }, { "epoch": 0.76, "grad_norm": 0.2995793418222508, "learning_rate": 0.0001468119242294057, "loss": 1.0878, "step": 7895 }, { "epoch": 0.76, "grad_norm": 0.28708772163438406, "learning_rate": 0.00014679794450862745, "loss": 0.9396, "step": 7896 }, { "epoch": 0.76, "grad_norm": 0.311063842263335, "learning_rate": 0.0001467839636167012, "loss": 1.1202, "step": 7897 }, { "epoch": 0.76, "grad_norm": 0.2986192805747378, "learning_rate": 0.0001467699815539769, "loss": 1.0607, "step": 7898 }, { "epoch": 0.76, "grad_norm": 0.258096740209968, "learning_rate": 0.0001467559983208044, "loss": 0.9521, "step": 7899 }, { "epoch": 0.76, "grad_norm": 0.27283666156818637, "learning_rate": 0.00014674201391753363, "loss": 1.1342, "step": 7900 }, { "epoch": 0.76, "grad_norm": 0.28773671837545195, "learning_rate": 0.0001467280283445146, "loss": 1.1128, "step": 7901 }, { "epoch": 0.76, "grad_norm": 0.307556564712858, "learning_rate": 0.00014671404160209733, "loss": 0.9963, "step": 7902 }, { "epoch": 0.76, "grad_norm": 0.2684719138920676, "learning_rate": 0.0001467000536906318, "loss": 1.0201, "step": 7903 }, { "epoch": 0.76, "grad_norm": 0.26240667501651854, "learning_rate": 0.00014668606461046806, "loss": 1.0589, "step": 7904 }, { "epoch": 0.76, "grad_norm": 0.27922442657332547, "learning_rate": 0.0001466720743619562, "loss": 1.0683, "step": 7905 }, { "epoch": 0.76, "grad_norm": 0.2540084140712392, "learning_rate": 0.00014665808294544633, "loss": 0.9643, "step": 7906 }, { "epoch": 0.76, "grad_norm": 0.31142085724795016, "learning_rate": 0.00014664409036128866, "loss": 1.045, "step": 7907 }, { "epoch": 0.76, "grad_norm": 0.3206848269424768, "learning_rate": 0.00014663009660983328, "loss": 1.1147, "step": 7908 }, { "epoch": 0.76, "grad_norm": 0.33340917807727916, "learning_rate": 0.00014661610169143044, "loss": 1.0744, "step": 7909 }, { "epoch": 0.76, "grad_norm": 0.32071991522507715, "learning_rate": 0.00014660210560643036, "loss": 1.1297, "step": 7910 }, { "epoch": 0.76, "grad_norm": 0.2761333180916678, "learning_rate": 0.00014658810835518332, "loss": 1.0477, "step": 7911 }, { "epoch": 0.76, "grad_norm": 0.2859125633527157, "learning_rate": 0.00014657410993803956, "loss": 1.0781, "step": 7912 }, { "epoch": 0.76, "grad_norm": 0.3274797315976737, "learning_rate": 0.00014656011035534943, "loss": 1.0144, "step": 7913 }, { "epoch": 0.76, "grad_norm": 0.28598470593343445, "learning_rate": 0.00014654610960746327, "loss": 1.0887, "step": 7914 }, { "epoch": 0.76, "grad_norm": 0.28926204149947815, "learning_rate": 0.00014653210769473147, "loss": 1.0627, "step": 7915 }, { "epoch": 0.76, "grad_norm": 0.26324720206303615, "learning_rate": 0.00014651810461750446, "loss": 1.1053, "step": 7916 }, { "epoch": 0.76, "grad_norm": 0.30675528347879105, "learning_rate": 0.0001465041003761326, "loss": 1.1054, "step": 7917 }, { "epoch": 0.76, "grad_norm": 0.26066528416246476, "learning_rate": 0.0001464900949709664, "loss": 1.0405, "step": 7918 }, { "epoch": 0.76, "grad_norm": 0.29329946245835375, "learning_rate": 0.0001464760884023564, "loss": 1.0615, "step": 7919 }, { "epoch": 0.76, "grad_norm": 0.2679473490528041, "learning_rate": 0.00014646208067065305, "loss": 1.0988, "step": 7920 }, { "epoch": 0.76, "grad_norm": 0.29866219772813785, "learning_rate": 0.00014644807177620694, "loss": 1.1193, "step": 7921 }, { "epoch": 0.76, "grad_norm": 0.3004083603649936, "learning_rate": 0.00014643406171936863, "loss": 1.0458, "step": 7922 }, { "epoch": 0.76, "grad_norm": 0.3255922513505334, "learning_rate": 0.00014642005050048877, "loss": 1.1641, "step": 7923 }, { "epoch": 0.76, "grad_norm": 0.27331269973566386, "learning_rate": 0.00014640603811991794, "loss": 1.0904, "step": 7924 }, { "epoch": 0.76, "grad_norm": 0.3021895089941293, "learning_rate": 0.00014639202457800688, "loss": 1.0426, "step": 7925 }, { "epoch": 0.76, "grad_norm": 0.31651111587188924, "learning_rate": 0.0001463780098751062, "loss": 0.9561, "step": 7926 }, { "epoch": 0.76, "grad_norm": 0.27245193343641233, "learning_rate": 0.00014636399401156668, "loss": 0.9084, "step": 7927 }, { "epoch": 0.76, "grad_norm": 0.28171140443893944, "learning_rate": 0.0001463499769877391, "loss": 1.0122, "step": 7928 }, { "epoch": 0.76, "grad_norm": 0.2722172920609029, "learning_rate": 0.00014633595880397422, "loss": 0.9773, "step": 7929 }, { "epoch": 0.76, "grad_norm": 0.314701712266059, "learning_rate": 0.00014632193946062283, "loss": 1.0967, "step": 7930 }, { "epoch": 0.76, "grad_norm": 0.2766120012080896, "learning_rate": 0.0001463079189580358, "loss": 1.1545, "step": 7931 }, { "epoch": 0.76, "grad_norm": 0.26393567149311037, "learning_rate": 0.00014629389729656399, "loss": 0.8912, "step": 7932 }, { "epoch": 0.76, "grad_norm": 0.30280952487557516, "learning_rate": 0.0001462798744765583, "loss": 1.1161, "step": 7933 }, { "epoch": 0.76, "grad_norm": 0.31616202197131665, "learning_rate": 0.0001462658504983697, "loss": 1.179, "step": 7934 }, { "epoch": 0.76, "grad_norm": 0.26565495442579756, "learning_rate": 0.0001462518253623491, "loss": 1.0568, "step": 7935 }, { "epoch": 0.76, "grad_norm": 0.318280638548035, "learning_rate": 0.00014623779906884748, "loss": 1.1108, "step": 7936 }, { "epoch": 0.76, "grad_norm": 0.2586299065330331, "learning_rate": 0.00014622377161821587, "loss": 1.148, "step": 7937 }, { "epoch": 0.76, "grad_norm": 0.2715217059364236, "learning_rate": 0.00014620974301080537, "loss": 1.0559, "step": 7938 }, { "epoch": 0.76, "grad_norm": 0.24985474931051407, "learning_rate": 0.00014619571324696697, "loss": 1.1047, "step": 7939 }, { "epoch": 0.76, "grad_norm": 0.2961624550424931, "learning_rate": 0.00014618168232705182, "loss": 0.9659, "step": 7940 }, { "epoch": 0.76, "grad_norm": 0.2966955141929237, "learning_rate": 0.00014616765025141106, "loss": 1.0266, "step": 7941 }, { "epoch": 0.76, "grad_norm": 0.2986341505874412, "learning_rate": 0.00014615361702039582, "loss": 1.0165, "step": 7942 }, { "epoch": 0.76, "grad_norm": 0.291306108234052, "learning_rate": 0.00014613958263435734, "loss": 1.0463, "step": 7943 }, { "epoch": 0.76, "grad_norm": 0.24770026210630766, "learning_rate": 0.00014612554709364677, "loss": 1.0362, "step": 7944 }, { "epoch": 0.76, "grad_norm": 0.30727250672800005, "learning_rate": 0.00014611151039861542, "loss": 1.0903, "step": 7945 }, { "epoch": 0.76, "grad_norm": 0.30656468148580795, "learning_rate": 0.00014609747254961452, "loss": 1.0681, "step": 7946 }, { "epoch": 0.76, "grad_norm": 0.28351015720243533, "learning_rate": 0.0001460834335469954, "loss": 1.166, "step": 7947 }, { "epoch": 0.76, "grad_norm": 0.28579851887764157, "learning_rate": 0.0001460693933911094, "loss": 1.0082, "step": 7948 }, { "epoch": 0.76, "grad_norm": 0.26715615685634764, "learning_rate": 0.00014605535208230789, "loss": 1.1272, "step": 7949 }, { "epoch": 0.76, "grad_norm": 0.2908957018166822, "learning_rate": 0.0001460413096209422, "loss": 1.0386, "step": 7950 }, { "epoch": 0.76, "grad_norm": 0.30421751423231963, "learning_rate": 0.00014602726600736388, "loss": 1.1368, "step": 7951 }, { "epoch": 0.76, "grad_norm": 0.24654599232461855, "learning_rate": 0.00014601322124192426, "loss": 1.0704, "step": 7952 }, { "epoch": 0.76, "grad_norm": 0.2858412396022753, "learning_rate": 0.00014599917532497487, "loss": 0.9629, "step": 7953 }, { "epoch": 0.76, "grad_norm": 0.31986567150353395, "learning_rate": 0.00014598512825686718, "loss": 1.1515, "step": 7954 }, { "epoch": 0.76, "grad_norm": 0.26767724316265096, "learning_rate": 0.0001459710800379528, "loss": 1.0711, "step": 7955 }, { "epoch": 0.76, "grad_norm": 0.2683093058976096, "learning_rate": 0.0001459570306685832, "loss": 0.9834, "step": 7956 }, { "epoch": 0.76, "grad_norm": 0.26418718399717656, "learning_rate": 0.00014594298014911005, "loss": 1.1891, "step": 7957 }, { "epoch": 0.76, "grad_norm": 0.2997490177491894, "learning_rate": 0.00014592892847988494, "loss": 1.1303, "step": 7958 }, { "epoch": 0.76, "grad_norm": 0.3092712454899324, "learning_rate": 0.00014591487566125957, "loss": 1.0605, "step": 7959 }, { "epoch": 0.76, "grad_norm": 0.270510283984863, "learning_rate": 0.00014590082169358554, "loss": 1.0886, "step": 7960 }, { "epoch": 0.76, "grad_norm": 0.24910690948882463, "learning_rate": 0.0001458867665772146, "loss": 1.0897, "step": 7961 }, { "epoch": 0.76, "grad_norm": 0.29126461074740784, "learning_rate": 0.0001458727103124985, "loss": 1.1572, "step": 7962 }, { "epoch": 0.76, "grad_norm": 0.32863806873359025, "learning_rate": 0.000145858652899789, "loss": 1.0036, "step": 7963 }, { "epoch": 0.76, "grad_norm": 0.27115363531745906, "learning_rate": 0.00014584459433943786, "loss": 1.0609, "step": 7964 }, { "epoch": 0.76, "grad_norm": 0.26632334307016087, "learning_rate": 0.00014583053463179695, "loss": 0.929, "step": 7965 }, { "epoch": 0.76, "grad_norm": 0.30896648642067515, "learning_rate": 0.00014581647377721812, "loss": 1.1302, "step": 7966 }, { "epoch": 0.76, "grad_norm": 0.31271610841373415, "learning_rate": 0.00014580241177605322, "loss": 1.0555, "step": 7967 }, { "epoch": 0.76, "grad_norm": 0.2862084572430668, "learning_rate": 0.0001457883486286542, "loss": 1.1462, "step": 7968 }, { "epoch": 0.76, "grad_norm": 0.2722638712262504, "learning_rate": 0.00014577428433537297, "loss": 1.0671, "step": 7969 }, { "epoch": 0.76, "grad_norm": 0.29776545230446366, "learning_rate": 0.0001457602188965615, "loss": 1.0892, "step": 7970 }, { "epoch": 0.76, "grad_norm": 0.28339621303738605, "learning_rate": 0.00014574615231257177, "loss": 1.0042, "step": 7971 }, { "epoch": 0.76, "grad_norm": 0.2872649900886519, "learning_rate": 0.00014573208458375586, "loss": 1.0962, "step": 7972 }, { "epoch": 0.76, "grad_norm": 0.29073436106492967, "learning_rate": 0.0001457180157104658, "loss": 1.096, "step": 7973 }, { "epoch": 0.76, "grad_norm": 0.2782075864070279, "learning_rate": 0.00014570394569305366, "loss": 1.0278, "step": 7974 }, { "epoch": 0.76, "grad_norm": 0.31350190862970956, "learning_rate": 0.00014568987453187154, "loss": 1.091, "step": 7975 }, { "epoch": 0.76, "grad_norm": 0.2652268268602932, "learning_rate": 0.0001456758022272716, "loss": 1.128, "step": 7976 }, { "epoch": 0.76, "grad_norm": 0.2810839334912306, "learning_rate": 0.00014566172877960603, "loss": 1.0408, "step": 7977 }, { "epoch": 0.76, "grad_norm": 0.25013935049819563, "learning_rate": 0.00014564765418922696, "loss": 1.1124, "step": 7978 }, { "epoch": 0.76, "grad_norm": 0.23668034177376465, "learning_rate": 0.00014563357845648667, "loss": 1.028, "step": 7979 }, { "epoch": 0.76, "grad_norm": 0.2736819717949704, "learning_rate": 0.0001456195015817374, "loss": 0.9969, "step": 7980 }, { "epoch": 0.76, "grad_norm": 0.2771456339595817, "learning_rate": 0.00014560542356533142, "loss": 1.1049, "step": 7981 }, { "epoch": 0.76, "grad_norm": 0.294782138767455, "learning_rate": 0.00014559134440762108, "loss": 1.13, "step": 7982 }, { "epoch": 0.76, "grad_norm": 0.26345886128180046, "learning_rate": 0.0001455772641089587, "loss": 1.1155, "step": 7983 }, { "epoch": 0.76, "grad_norm": 0.2829069470535079, "learning_rate": 0.00014556318266969656, "loss": 1.0918, "step": 7984 }, { "epoch": 0.76, "grad_norm": 0.28542191746861517, "learning_rate": 0.00014554910009018722, "loss": 1.0391, "step": 7985 }, { "epoch": 0.76, "grad_norm": 0.2538797891395937, "learning_rate": 0.000145535016370783, "loss": 0.9585, "step": 7986 }, { "epoch": 0.76, "grad_norm": 0.26884615169189024, "learning_rate": 0.0001455209315118364, "loss": 1.0022, "step": 7987 }, { "epoch": 0.76, "grad_norm": 0.2716483059032336, "learning_rate": 0.00014550684551369985, "loss": 1.0702, "step": 7988 }, { "epoch": 0.76, "grad_norm": 0.27843349775471815, "learning_rate": 0.00014549275837672586, "loss": 1.1401, "step": 7989 }, { "epoch": 0.76, "grad_norm": 0.2518481347111056, "learning_rate": 0.00014547867010126706, "loss": 1.0111, "step": 7990 }, { "epoch": 0.76, "grad_norm": 0.3216392004906807, "learning_rate": 0.00014546458068767594, "loss": 1.0472, "step": 7991 }, { "epoch": 0.76, "grad_norm": 0.28909463374066535, "learning_rate": 0.00014545049013630512, "loss": 1.0201, "step": 7992 }, { "epoch": 0.76, "grad_norm": 0.31788442875623607, "learning_rate": 0.0001454363984475072, "loss": 1.0955, "step": 7993 }, { "epoch": 0.76, "grad_norm": 0.2758373823123496, "learning_rate": 0.00014542230562163488, "loss": 1.0873, "step": 7994 }, { "epoch": 0.76, "grad_norm": 0.2818308726562138, "learning_rate": 0.0001454082116590408, "loss": 1.2114, "step": 7995 }, { "epoch": 0.76, "grad_norm": 0.2641986472531651, "learning_rate": 0.0001453941165600777, "loss": 1.1287, "step": 7996 }, { "epoch": 0.77, "grad_norm": 0.27941463601912914, "learning_rate": 0.0001453800203250983, "loss": 1.0574, "step": 7997 }, { "epoch": 0.77, "grad_norm": 0.2756789148686793, "learning_rate": 0.00014536592295445532, "loss": 0.9556, "step": 7998 }, { "epoch": 0.77, "grad_norm": 0.27533489457097576, "learning_rate": 0.00014535182444850165, "loss": 1.1021, "step": 7999 }, { "epoch": 0.77, "grad_norm": 0.2964512342749194, "learning_rate": 0.00014533772480759008, "loss": 1.0421, "step": 8000 }, { "epoch": 0.77, "grad_norm": 0.26212904131408093, "learning_rate": 0.00014532362403207346, "loss": 1.0015, "step": 8001 }, { "epoch": 0.77, "grad_norm": 0.2949190595576002, "learning_rate": 0.00014530952212230463, "loss": 1.0511, "step": 8002 }, { "epoch": 0.77, "grad_norm": 0.3066540690434103, "learning_rate": 0.00014529541907863655, "loss": 1.0643, "step": 8003 }, { "epoch": 0.77, "grad_norm": 0.27584654577393586, "learning_rate": 0.00014528131490142217, "loss": 0.9773, "step": 8004 }, { "epoch": 0.77, "grad_norm": 0.27774031420621953, "learning_rate": 0.00014526720959101436, "loss": 0.9925, "step": 8005 }, { "epoch": 0.77, "grad_norm": 0.2512278740693556, "learning_rate": 0.00014525310314776623, "loss": 1.0008, "step": 8006 }, { "epoch": 0.77, "grad_norm": 0.2630439424936692, "learning_rate": 0.00014523899557203075, "loss": 1.1098, "step": 8007 }, { "epoch": 0.77, "grad_norm": 0.27426353058067837, "learning_rate": 0.00014522488686416097, "loss": 1.0728, "step": 8008 }, { "epoch": 0.77, "grad_norm": 0.2763756735231871, "learning_rate": 0.00014521077702450995, "loss": 1.0639, "step": 8009 }, { "epoch": 0.77, "grad_norm": 0.3325734526066199, "learning_rate": 0.00014519666605343083, "loss": 1.0027, "step": 8010 }, { "epoch": 0.77, "grad_norm": 0.26469084339952254, "learning_rate": 0.00014518255395127677, "loss": 1.0773, "step": 8011 }, { "epoch": 0.77, "grad_norm": 0.2755841427968526, "learning_rate": 0.00014516844071840086, "loss": 1.118, "step": 8012 }, { "epoch": 0.77, "grad_norm": 0.2970488079219033, "learning_rate": 0.00014515432635515635, "loss": 1.0362, "step": 8013 }, { "epoch": 0.77, "grad_norm": 0.2954323242845108, "learning_rate": 0.00014514021086189645, "loss": 1.0746, "step": 8014 }, { "epoch": 0.77, "grad_norm": 0.2617764506782068, "learning_rate": 0.00014512609423897438, "loss": 1.0385, "step": 8015 }, { "epoch": 0.77, "grad_norm": 0.30127357515303343, "learning_rate": 0.00014511197648674348, "loss": 1.0966, "step": 8016 }, { "epoch": 0.77, "grad_norm": 0.3004406925080267, "learning_rate": 0.00014509785760555697, "loss": 1.0441, "step": 8017 }, { "epoch": 0.77, "grad_norm": 0.25228201013427193, "learning_rate": 0.00014508373759576824, "loss": 1.0692, "step": 8018 }, { "epoch": 0.77, "grad_norm": 0.3052520210235085, "learning_rate": 0.00014506961645773068, "loss": 1.1513, "step": 8019 }, { "epoch": 0.77, "grad_norm": 0.2709522809700183, "learning_rate": 0.00014505549419179765, "loss": 0.991, "step": 8020 }, { "epoch": 0.77, "grad_norm": 0.26525464351023736, "learning_rate": 0.00014504137079832252, "loss": 0.8964, "step": 8021 }, { "epoch": 0.77, "grad_norm": 0.2877907446668608, "learning_rate": 0.00014502724627765877, "loss": 1.0461, "step": 8022 }, { "epoch": 0.77, "grad_norm": 0.29764029690496574, "learning_rate": 0.00014501312063015993, "loss": 1.0284, "step": 8023 }, { "epoch": 0.77, "grad_norm": 0.2883370401645907, "learning_rate": 0.00014499899385617943, "loss": 1.0268, "step": 8024 }, { "epoch": 0.77, "grad_norm": 0.2563877310771201, "learning_rate": 0.0001449848659560708, "loss": 1.1029, "step": 8025 }, { "epoch": 0.77, "grad_norm": 0.29710595080293883, "learning_rate": 0.00014497073693018768, "loss": 1.1315, "step": 8026 }, { "epoch": 0.77, "grad_norm": 0.2559529397106755, "learning_rate": 0.00014495660677888358, "loss": 1.009, "step": 8027 }, { "epoch": 0.77, "grad_norm": 0.29014013639604846, "learning_rate": 0.00014494247550251213, "loss": 1.0681, "step": 8028 }, { "epoch": 0.77, "grad_norm": 0.2875485426750669, "learning_rate": 0.00014492834310142702, "loss": 1.1195, "step": 8029 }, { "epoch": 0.77, "grad_norm": 0.286722769874282, "learning_rate": 0.00014491420957598184, "loss": 1.0529, "step": 8030 }, { "epoch": 0.77, "grad_norm": 0.2674676429533566, "learning_rate": 0.0001449000749265304, "loss": 1.0475, "step": 8031 }, { "epoch": 0.77, "grad_norm": 0.29840922095847167, "learning_rate": 0.00014488593915342628, "loss": 1.123, "step": 8032 }, { "epoch": 0.77, "grad_norm": 0.29901175240287103, "learning_rate": 0.0001448718022570234, "loss": 1.148, "step": 8033 }, { "epoch": 0.77, "grad_norm": 0.2915382990881953, "learning_rate": 0.00014485766423767544, "loss": 1.0401, "step": 8034 }, { "epoch": 0.77, "grad_norm": 0.26281760733012527, "learning_rate": 0.00014484352509573626, "loss": 1.1292, "step": 8035 }, { "epoch": 0.77, "grad_norm": 0.2756692483739733, "learning_rate": 0.00014482938483155965, "loss": 1.0544, "step": 8036 }, { "epoch": 0.77, "grad_norm": 0.28042375304828776, "learning_rate": 0.00014481524344549953, "loss": 1.1094, "step": 8037 }, { "epoch": 0.77, "grad_norm": 0.2666048364114282, "learning_rate": 0.00014480110093790976, "loss": 1.0748, "step": 8038 }, { "epoch": 0.77, "grad_norm": 0.5049686356926107, "learning_rate": 0.0001447869573091443, "loss": 1.0392, "step": 8039 }, { "epoch": 0.77, "grad_norm": 0.2898604153674101, "learning_rate": 0.0001447728125595571, "loss": 1.0527, "step": 8040 }, { "epoch": 0.77, "grad_norm": 0.2938423642503213, "learning_rate": 0.0001447586666895021, "loss": 1.0593, "step": 8041 }, { "epoch": 0.77, "grad_norm": 0.3199560544400549, "learning_rate": 0.00014474451969933333, "loss": 1.1253, "step": 8042 }, { "epoch": 0.77, "grad_norm": 0.279058551647216, "learning_rate": 0.00014473037158940484, "loss": 1.0904, "step": 8043 }, { "epoch": 0.77, "grad_norm": 0.3317633487011477, "learning_rate": 0.0001447162223600707, "loss": 1.0621, "step": 8044 }, { "epoch": 0.77, "grad_norm": 0.31113174621821804, "learning_rate": 0.00014470207201168497, "loss": 1.1192, "step": 8045 }, { "epoch": 0.77, "grad_norm": 0.2732527847810805, "learning_rate": 0.00014468792054460184, "loss": 1.0248, "step": 8046 }, { "epoch": 0.77, "grad_norm": 0.29153439327487507, "learning_rate": 0.00014467376795917537, "loss": 1.124, "step": 8047 }, { "epoch": 0.77, "grad_norm": 0.2860523862518081, "learning_rate": 0.0001446596142557598, "loss": 1.0621, "step": 8048 }, { "epoch": 0.77, "grad_norm": 0.2862949081963216, "learning_rate": 0.00014464545943470932, "loss": 1.0017, "step": 8049 }, { "epoch": 0.77, "grad_norm": 0.2968576640237807, "learning_rate": 0.00014463130349637814, "loss": 1.0521, "step": 8050 }, { "epoch": 0.77, "grad_norm": 0.3203158707213885, "learning_rate": 0.00014461714644112053, "loss": 1.0283, "step": 8051 }, { "epoch": 0.77, "grad_norm": 0.23962634495433535, "learning_rate": 0.0001446029882692908, "loss": 1.0052, "step": 8052 }, { "epoch": 0.77, "grad_norm": 0.29338455713698386, "learning_rate": 0.0001445888289812433, "loss": 1.1304, "step": 8053 }, { "epoch": 0.77, "grad_norm": 0.2650099607677807, "learning_rate": 0.0001445746685773323, "loss": 0.9916, "step": 8054 }, { "epoch": 0.77, "grad_norm": 0.26198071838847975, "learning_rate": 0.00014456050705791216, "loss": 1.0875, "step": 8055 }, { "epoch": 0.77, "grad_norm": 0.292914913997817, "learning_rate": 0.00014454634442333738, "loss": 1.0666, "step": 8056 }, { "epoch": 0.77, "grad_norm": 0.3015285693686079, "learning_rate": 0.00014453218067396231, "loss": 1.1363, "step": 8057 }, { "epoch": 0.77, "grad_norm": 0.30434995117078845, "learning_rate": 0.00014451801581014147, "loss": 1.0074, "step": 8058 }, { "epoch": 0.77, "grad_norm": 0.30572536430411557, "learning_rate": 0.00014450384983222926, "loss": 1.0941, "step": 8059 }, { "epoch": 0.77, "grad_norm": 0.2665325169928879, "learning_rate": 0.00014448968274058025, "loss": 1.0584, "step": 8060 }, { "epoch": 0.77, "grad_norm": 0.291238529425458, "learning_rate": 0.000144475514535549, "loss": 0.9758, "step": 8061 }, { "epoch": 0.77, "grad_norm": 0.320923065004721, "learning_rate": 0.00014446134521749, "loss": 0.9887, "step": 8062 }, { "epoch": 0.77, "grad_norm": 0.2695062501972563, "learning_rate": 0.00014444717478675792, "loss": 1.063, "step": 8063 }, { "epoch": 0.77, "grad_norm": 0.29036294842452276, "learning_rate": 0.00014443300324370738, "loss": 1.0825, "step": 8064 }, { "epoch": 0.77, "grad_norm": 0.2667481516709462, "learning_rate": 0.00014441883058869298, "loss": 1.0139, "step": 8065 }, { "epoch": 0.77, "grad_norm": 0.31330268446835163, "learning_rate": 0.00014440465682206944, "loss": 1.1519, "step": 8066 }, { "epoch": 0.77, "grad_norm": 0.29426667401104506, "learning_rate": 0.00014439048194419141, "loss": 1.001, "step": 8067 }, { "epoch": 0.77, "grad_norm": 0.30892190035705414, "learning_rate": 0.00014437630595541374, "loss": 1.0714, "step": 8068 }, { "epoch": 0.77, "grad_norm": 0.28625371937125405, "learning_rate": 0.00014436212885609106, "loss": 1.1328, "step": 8069 }, { "epoch": 0.77, "grad_norm": 0.2904730209899161, "learning_rate": 0.00014434795064657827, "loss": 0.9807, "step": 8070 }, { "epoch": 0.77, "grad_norm": 0.29946573442663216, "learning_rate": 0.0001443337713272301, "loss": 1.108, "step": 8071 }, { "epoch": 0.77, "grad_norm": 0.25138650851267935, "learning_rate": 0.0001443195908984015, "loss": 0.9354, "step": 8072 }, { "epoch": 0.77, "grad_norm": 0.3046129695319591, "learning_rate": 0.00014430540936044724, "loss": 1.0464, "step": 8073 }, { "epoch": 0.77, "grad_norm": 0.2815772869608019, "learning_rate": 0.0001442912267137223, "loss": 1.1689, "step": 8074 }, { "epoch": 0.77, "grad_norm": 0.28788534180895897, "learning_rate": 0.00014427704295858154, "loss": 1.1042, "step": 8075 }, { "epoch": 0.77, "grad_norm": 0.28620999022686416, "learning_rate": 0.00014426285809537997, "loss": 1.0644, "step": 8076 }, { "epoch": 0.77, "grad_norm": 0.2718948107560663, "learning_rate": 0.00014424867212447254, "loss": 1.0555, "step": 8077 }, { "epoch": 0.77, "grad_norm": 0.2745313908908246, "learning_rate": 0.0001442344850462143, "loss": 0.953, "step": 8078 }, { "epoch": 0.77, "grad_norm": 0.2782122491457099, "learning_rate": 0.0001442202968609603, "loss": 1.098, "step": 8079 }, { "epoch": 0.77, "grad_norm": 0.27832697424885927, "learning_rate": 0.00014420610756906552, "loss": 0.9896, "step": 8080 }, { "epoch": 0.77, "grad_norm": 0.2850630781424915, "learning_rate": 0.00014419191717088517, "loss": 1.0579, "step": 8081 }, { "epoch": 0.77, "grad_norm": 0.30698153794733524, "learning_rate": 0.00014417772566677428, "loss": 0.9522, "step": 8082 }, { "epoch": 0.77, "grad_norm": 0.2412252387912062, "learning_rate": 0.00014416353305708802, "loss": 0.963, "step": 8083 }, { "epoch": 0.77, "grad_norm": 0.2760933371029509, "learning_rate": 0.00014414933934218165, "loss": 1.039, "step": 8084 }, { "epoch": 0.77, "grad_norm": 0.27478977307311525, "learning_rate": 0.0001441351445224103, "loss": 1.0191, "step": 8085 }, { "epoch": 0.77, "grad_norm": 0.27460908719231536, "learning_rate": 0.0001441209485981292, "loss": 0.942, "step": 8086 }, { "epoch": 0.77, "grad_norm": 0.300097647062556, "learning_rate": 0.00014410675156969362, "loss": 1.1083, "step": 8087 }, { "epoch": 0.77, "grad_norm": 0.301434940986813, "learning_rate": 0.0001440925534374589, "loss": 1.1201, "step": 8088 }, { "epoch": 0.77, "grad_norm": 0.34304082819084863, "learning_rate": 0.00014407835420178028, "loss": 1.2234, "step": 8089 }, { "epoch": 0.77, "grad_norm": 0.30266233132954434, "learning_rate": 0.00014406415386301319, "loss": 1.0731, "step": 8090 }, { "epoch": 0.77, "grad_norm": 0.29052074003899026, "learning_rate": 0.00014404995242151293, "loss": 1.0351, "step": 8091 }, { "epoch": 0.77, "grad_norm": 0.31236829856827036, "learning_rate": 0.00014403574987763493, "loss": 1.1238, "step": 8092 }, { "epoch": 0.77, "grad_norm": 0.27979550559379457, "learning_rate": 0.0001440215462317346, "loss": 1.142, "step": 8093 }, { "epoch": 0.77, "grad_norm": 0.303167287374647, "learning_rate": 0.00014400734148416742, "loss": 1.1546, "step": 8094 }, { "epoch": 0.77, "grad_norm": 0.2722206268311316, "learning_rate": 0.00014399313563528886, "loss": 1.0722, "step": 8095 }, { "epoch": 0.77, "grad_norm": 0.29950103549697416, "learning_rate": 0.00014397892868545442, "loss": 1.0869, "step": 8096 }, { "epoch": 0.77, "grad_norm": 0.29083600055293385, "learning_rate": 0.00014396472063501968, "loss": 1.0576, "step": 8097 }, { "epoch": 0.77, "grad_norm": 0.2883189452608079, "learning_rate": 0.00014395051148434015, "loss": 0.9811, "step": 8098 }, { "epoch": 0.77, "grad_norm": 0.2614201885833928, "learning_rate": 0.0001439363012337715, "loss": 1.0721, "step": 8099 }, { "epoch": 0.77, "grad_norm": 0.27854811920048617, "learning_rate": 0.00014392208988366921, "loss": 1.0133, "step": 8100 }, { "epoch": 0.78, "grad_norm": 0.33867360910508465, "learning_rate": 0.00014390787743438907, "loss": 1.0265, "step": 8101 }, { "epoch": 0.78, "grad_norm": 0.27479933367603354, "learning_rate": 0.0001438936638862867, "loss": 0.9831, "step": 8102 }, { "epoch": 0.78, "grad_norm": 0.3270439622691649, "learning_rate": 0.00014387944923971782, "loss": 1.1035, "step": 8103 }, { "epoch": 0.78, "grad_norm": 0.29562739459578125, "learning_rate": 0.0001438652334950381, "loss": 1.0352, "step": 8104 }, { "epoch": 0.78, "grad_norm": 0.2771019668036566, "learning_rate": 0.00014385101665260338, "loss": 1.0598, "step": 8105 }, { "epoch": 0.78, "grad_norm": 0.26504488308797247, "learning_rate": 0.0001438367987127694, "loss": 1.1604, "step": 8106 }, { "epoch": 0.78, "grad_norm": 0.3218630057212102, "learning_rate": 0.000143822579675892, "loss": 1.0142, "step": 8107 }, { "epoch": 0.78, "grad_norm": 0.28024189474691924, "learning_rate": 0.00014380835954232697, "loss": 1.0721, "step": 8108 }, { "epoch": 0.78, "grad_norm": 0.2852976599263691, "learning_rate": 0.00014379413831243026, "loss": 0.9966, "step": 8109 }, { "epoch": 0.78, "grad_norm": 0.2904612714774512, "learning_rate": 0.00014377991598655765, "loss": 1.0675, "step": 8110 }, { "epoch": 0.78, "grad_norm": 0.26588060857502965, "learning_rate": 0.00014376569256506516, "loss": 1.0215, "step": 8111 }, { "epoch": 0.78, "grad_norm": 0.28963536427741476, "learning_rate": 0.0001437514680483087, "loss": 0.9806, "step": 8112 }, { "epoch": 0.78, "grad_norm": 0.2669044448329335, "learning_rate": 0.00014373724243664423, "loss": 1.1047, "step": 8113 }, { "epoch": 0.78, "grad_norm": 0.2904302678513009, "learning_rate": 0.00014372301573042782, "loss": 1.0147, "step": 8114 }, { "epoch": 0.78, "grad_norm": 0.25491810409780064, "learning_rate": 0.00014370878793001546, "loss": 1.0453, "step": 8115 }, { "epoch": 0.78, "grad_norm": 0.2592085806502541, "learning_rate": 0.0001436945590357632, "loss": 1.0276, "step": 8116 }, { "epoch": 0.78, "grad_norm": 0.2821215453803441, "learning_rate": 0.00014368032904802714, "loss": 1.0488, "step": 8117 }, { "epoch": 0.78, "grad_norm": 0.2615363952942239, "learning_rate": 0.00014366609796716338, "loss": 1.1121, "step": 8118 }, { "epoch": 0.78, "grad_norm": 0.31736889976384736, "learning_rate": 0.0001436518657935281, "loss": 1.1233, "step": 8119 }, { "epoch": 0.78, "grad_norm": 0.2598336195194818, "learning_rate": 0.00014363763252747745, "loss": 1.0805, "step": 8120 }, { "epoch": 0.78, "grad_norm": 0.3284573374828336, "learning_rate": 0.0001436233981693676, "loss": 1.0575, "step": 8121 }, { "epoch": 0.78, "grad_norm": 0.2908417184290244, "learning_rate": 0.00014360916271955482, "loss": 1.0383, "step": 8122 }, { "epoch": 0.78, "grad_norm": 0.28781407930130587, "learning_rate": 0.0001435949261783953, "loss": 0.9912, "step": 8123 }, { "epoch": 0.78, "grad_norm": 0.2929647743747517, "learning_rate": 0.0001435806885462454, "loss": 1.0553, "step": 8124 }, { "epoch": 0.78, "grad_norm": 0.2753194722230229, "learning_rate": 0.00014356644982346133, "loss": 0.9769, "step": 8125 }, { "epoch": 0.78, "grad_norm": 0.3013577274487869, "learning_rate": 0.0001435522100103995, "loss": 1.0244, "step": 8126 }, { "epoch": 0.78, "grad_norm": 0.2901508132919651, "learning_rate": 0.00014353796910741623, "loss": 1.011, "step": 8127 }, { "epoch": 0.78, "grad_norm": 0.29254447956246066, "learning_rate": 0.0001435237271148679, "loss": 1.0236, "step": 8128 }, { "epoch": 0.78, "grad_norm": 0.3042957195012631, "learning_rate": 0.000143509484033111, "loss": 1.0665, "step": 8129 }, { "epoch": 0.78, "grad_norm": 0.29428251771956543, "learning_rate": 0.0001434952398625019, "loss": 1.065, "step": 8130 }, { "epoch": 0.78, "grad_norm": 0.2734033579364972, "learning_rate": 0.00014348099460339707, "loss": 1.001, "step": 8131 }, { "epoch": 0.78, "grad_norm": 0.3303462730760466, "learning_rate": 0.00014346674825615303, "loss": 1.0916, "step": 8132 }, { "epoch": 0.78, "grad_norm": 0.27668942643869243, "learning_rate": 0.0001434525008211263, "loss": 1.0658, "step": 8133 }, { "epoch": 0.78, "grad_norm": 0.2953192209869138, "learning_rate": 0.00014343825229867343, "loss": 1.0055, "step": 8134 }, { "epoch": 0.78, "grad_norm": 0.30590123544431624, "learning_rate": 0.00014342400268915097, "loss": 1.1254, "step": 8135 }, { "epoch": 0.78, "grad_norm": 0.289254633390817, "learning_rate": 0.00014340975199291558, "loss": 0.8961, "step": 8136 }, { "epoch": 0.78, "grad_norm": 0.2554256044992401, "learning_rate": 0.00014339550021032384, "loss": 1.0181, "step": 8137 }, { "epoch": 0.78, "grad_norm": 0.3141083587137128, "learning_rate": 0.00014338124734173245, "loss": 1.044, "step": 8138 }, { "epoch": 0.78, "grad_norm": 0.2569367079798127, "learning_rate": 0.0001433669933874981, "loss": 1.1267, "step": 8139 }, { "epoch": 0.78, "grad_norm": 0.2678458091756298, "learning_rate": 0.00014335273834797745, "loss": 1.1156, "step": 8140 }, { "epoch": 0.78, "grad_norm": 0.29169647370051666, "learning_rate": 0.0001433384822235273, "loss": 1.0323, "step": 8141 }, { "epoch": 0.78, "grad_norm": 0.2742299999761089, "learning_rate": 0.0001433242250145044, "loss": 1.0005, "step": 8142 }, { "epoch": 0.78, "grad_norm": 0.2685403344557798, "learning_rate": 0.00014330996672126553, "loss": 0.9437, "step": 8143 }, { "epoch": 0.78, "grad_norm": 0.29515305992400265, "learning_rate": 0.0001432957073441675, "loss": 1.1712, "step": 8144 }, { "epoch": 0.78, "grad_norm": 0.2932051822985775, "learning_rate": 0.00014328144688356722, "loss": 1.083, "step": 8145 }, { "epoch": 0.78, "grad_norm": 0.28221973458585115, "learning_rate": 0.00014326718533982154, "loss": 1.0657, "step": 8146 }, { "epoch": 0.78, "grad_norm": 0.2528985840125888, "learning_rate": 0.00014325292271328733, "loss": 1.1343, "step": 8147 }, { "epoch": 0.78, "grad_norm": 0.3099604595171793, "learning_rate": 0.00014323865900432153, "loss": 1.1354, "step": 8148 }, { "epoch": 0.78, "grad_norm": 0.28724423689538947, "learning_rate": 0.00014322439421328114, "loss": 1.0258, "step": 8149 }, { "epoch": 0.78, "grad_norm": 0.29908396514945557, "learning_rate": 0.0001432101283405231, "loss": 1.0992, "step": 8150 }, { "epoch": 0.78, "grad_norm": 0.25176928414336436, "learning_rate": 0.00014319586138640447, "loss": 1.127, "step": 8151 }, { "epoch": 0.78, "grad_norm": 0.27010420300606885, "learning_rate": 0.00014318159335128226, "loss": 1.112, "step": 8152 }, { "epoch": 0.78, "grad_norm": 0.27285928105597573, "learning_rate": 0.0001431673242355135, "loss": 1.0926, "step": 8153 }, { "epoch": 0.78, "grad_norm": 0.2606896768983877, "learning_rate": 0.00014315305403945534, "loss": 1.0197, "step": 8154 }, { "epoch": 0.78, "grad_norm": 0.2828962978680743, "learning_rate": 0.0001431387827634649, "loss": 1.1644, "step": 8155 }, { "epoch": 0.78, "grad_norm": 0.30392229600605375, "learning_rate": 0.00014312451040789928, "loss": 1.0046, "step": 8156 }, { "epoch": 0.78, "grad_norm": 0.22992598628886599, "learning_rate": 0.0001431102369731157, "loss": 1.1334, "step": 8157 }, { "epoch": 0.78, "grad_norm": 0.27602772065447234, "learning_rate": 0.00014309596245947134, "loss": 1.1262, "step": 8158 }, { "epoch": 0.78, "grad_norm": 0.25737443548054917, "learning_rate": 0.00014308168686732344, "loss": 0.9536, "step": 8159 }, { "epoch": 0.78, "grad_norm": 0.2674977275846256, "learning_rate": 0.00014306741019702926, "loss": 1.1088, "step": 8160 }, { "epoch": 0.78, "grad_norm": 0.26488508791506327, "learning_rate": 0.00014305313244894604, "loss": 1.0541, "step": 8161 }, { "epoch": 0.78, "grad_norm": 0.2689193524609303, "learning_rate": 0.00014303885362343115, "loss": 1.1577, "step": 8162 }, { "epoch": 0.78, "grad_norm": 0.2879200289332181, "learning_rate": 0.00014302457372084192, "loss": 1.1757, "step": 8163 }, { "epoch": 0.78, "grad_norm": 0.2867128935586349, "learning_rate": 0.00014301029274153563, "loss": 1.0072, "step": 8164 }, { "epoch": 0.78, "grad_norm": 0.24341160678709342, "learning_rate": 0.00014299601068586978, "loss": 1.0225, "step": 8165 }, { "epoch": 0.78, "grad_norm": 0.2762861135172704, "learning_rate": 0.00014298172755420173, "loss": 1.1304, "step": 8166 }, { "epoch": 0.78, "grad_norm": 0.2769763284115676, "learning_rate": 0.00014296744334688893, "loss": 1.0157, "step": 8167 }, { "epoch": 0.78, "grad_norm": 0.26734305962867105, "learning_rate": 0.0001429531580642889, "loss": 1.1552, "step": 8168 }, { "epoch": 0.78, "grad_norm": 0.26480907501831297, "learning_rate": 0.00014293887170675903, "loss": 0.9893, "step": 8169 }, { "epoch": 0.78, "grad_norm": 0.29281043302440196, "learning_rate": 0.00014292458427465695, "loss": 0.9863, "step": 8170 }, { "epoch": 0.78, "grad_norm": 0.2888301828367874, "learning_rate": 0.00014291029576834013, "loss": 1.1172, "step": 8171 }, { "epoch": 0.78, "grad_norm": 0.3039150196032684, "learning_rate": 0.00014289600618816627, "loss": 1.0797, "step": 8172 }, { "epoch": 0.78, "grad_norm": 0.2841118966636821, "learning_rate": 0.0001428817155344928, "loss": 1.1855, "step": 8173 }, { "epoch": 0.78, "grad_norm": 0.28079473707368974, "learning_rate": 0.0001428674238076775, "loss": 1.0272, "step": 8174 }, { "epoch": 0.78, "grad_norm": 0.2862456351557848, "learning_rate": 0.00014285313100807797, "loss": 1.0838, "step": 8175 }, { "epoch": 0.78, "grad_norm": 0.2602397115203145, "learning_rate": 0.00014283883713605192, "loss": 1.083, "step": 8176 }, { "epoch": 0.78, "grad_norm": 0.30104111034073594, "learning_rate": 0.00014282454219195702, "loss": 1.0324, "step": 8177 }, { "epoch": 0.78, "grad_norm": 0.24143983810247652, "learning_rate": 0.00014281024617615105, "loss": 1.0873, "step": 8178 }, { "epoch": 0.78, "grad_norm": 0.2932603381628741, "learning_rate": 0.00014279594908899175, "loss": 1.017, "step": 8179 }, { "epoch": 0.78, "grad_norm": 0.3006594349278346, "learning_rate": 0.00014278165093083696, "loss": 1.0753, "step": 8180 }, { "epoch": 0.78, "grad_norm": 0.28022927773599904, "learning_rate": 0.00014276735170204444, "loss": 1.0561, "step": 8181 }, { "epoch": 0.78, "grad_norm": 0.27844733340976957, "learning_rate": 0.0001427530514029721, "loss": 1.1647, "step": 8182 }, { "epoch": 0.78, "grad_norm": 0.2571022498864135, "learning_rate": 0.00014273875003397774, "loss": 0.92, "step": 8183 }, { "epoch": 0.78, "grad_norm": 0.2692587322659136, "learning_rate": 0.0001427244475954193, "loss": 1.0409, "step": 8184 }, { "epoch": 0.78, "grad_norm": 0.2868029667790984, "learning_rate": 0.00014271014408765472, "loss": 0.9207, "step": 8185 }, { "epoch": 0.78, "grad_norm": 0.3122721251692393, "learning_rate": 0.00014269583951104196, "loss": 1.0165, "step": 8186 }, { "epoch": 0.78, "grad_norm": 0.240316991720147, "learning_rate": 0.00014268153386593898, "loss": 1.1213, "step": 8187 }, { "epoch": 0.78, "grad_norm": 0.283970934006317, "learning_rate": 0.00014266722715270376, "loss": 1.1421, "step": 8188 }, { "epoch": 0.78, "grad_norm": 0.2848531576221525, "learning_rate": 0.0001426529193716944, "loss": 1.0823, "step": 8189 }, { "epoch": 0.78, "grad_norm": 0.2683429644781905, "learning_rate": 0.0001426386105232689, "loss": 1.0785, "step": 8190 }, { "epoch": 0.78, "grad_norm": 0.2862688618200794, "learning_rate": 0.00014262430060778538, "loss": 1.0695, "step": 8191 }, { "epoch": 0.78, "grad_norm": 0.26946047893455116, "learning_rate": 0.00014260998962560195, "loss": 0.9804, "step": 8192 }, { "epoch": 0.78, "grad_norm": 0.3044969174847609, "learning_rate": 0.00014259567757707675, "loss": 1.0052, "step": 8193 }, { "epoch": 0.78, "grad_norm": 0.27698543650468876, "learning_rate": 0.00014258136446256795, "loss": 0.9854, "step": 8194 }, { "epoch": 0.78, "grad_norm": 0.2971561209977439, "learning_rate": 0.00014256705028243375, "loss": 1.0358, "step": 8195 }, { "epoch": 0.78, "grad_norm": 0.2770540731397757, "learning_rate": 0.00014255273503703238, "loss": 1.0042, "step": 8196 }, { "epoch": 0.78, "grad_norm": 0.28561457756539305, "learning_rate": 0.00014253841872672202, "loss": 1.1068, "step": 8197 }, { "epoch": 0.78, "grad_norm": 0.2813535778333693, "learning_rate": 0.00014252410135186103, "loss": 1.1568, "step": 8198 }, { "epoch": 0.78, "grad_norm": 0.3051058643154024, "learning_rate": 0.00014250978291280766, "loss": 1.0966, "step": 8199 }, { "epoch": 0.78, "grad_norm": 0.2995687199626439, "learning_rate": 0.00014249546340992027, "loss": 0.9566, "step": 8200 }, { "epoch": 0.78, "grad_norm": 0.25038359737911414, "learning_rate": 0.0001424811428435572, "loss": 0.9642, "step": 8201 }, { "epoch": 0.78, "grad_norm": 0.2755441297303153, "learning_rate": 0.00014246682121407686, "loss": 0.9964, "step": 8202 }, { "epoch": 0.78, "grad_norm": 0.28771744037000346, "learning_rate": 0.0001424524985218376, "loss": 1.1178, "step": 8203 }, { "epoch": 0.78, "grad_norm": 0.26096812303528166, "learning_rate": 0.00014243817476719789, "loss": 1.0602, "step": 8204 }, { "epoch": 0.78, "grad_norm": 0.2910822777089926, "learning_rate": 0.00014242384995051617, "loss": 1.1028, "step": 8205 }, { "epoch": 0.79, "grad_norm": 0.2946214010693757, "learning_rate": 0.000142409524072151, "loss": 1.0582, "step": 8206 }, { "epoch": 0.79, "grad_norm": 0.2782362063554336, "learning_rate": 0.00014239519713246077, "loss": 1.0472, "step": 8207 }, { "epoch": 0.79, "grad_norm": 0.3096824457906856, "learning_rate": 0.00014238086913180407, "loss": 1.1764, "step": 8208 }, { "epoch": 0.79, "grad_norm": 0.2705085127084864, "learning_rate": 0.00014236654007053956, "loss": 1.0761, "step": 8209 }, { "epoch": 0.79, "grad_norm": 0.2746196439259208, "learning_rate": 0.00014235220994902572, "loss": 1.0842, "step": 8210 }, { "epoch": 0.79, "grad_norm": 0.2818643359206753, "learning_rate": 0.0001423378787676212, "loss": 1.0669, "step": 8211 }, { "epoch": 0.79, "grad_norm": 0.3016521365181387, "learning_rate": 0.0001423235465266847, "loss": 1.1674, "step": 8212 }, { "epoch": 0.79, "grad_norm": 0.3097236944936014, "learning_rate": 0.0001423092132265748, "loss": 1.019, "step": 8213 }, { "epoch": 0.79, "grad_norm": 0.25229115162540394, "learning_rate": 0.00014229487886765026, "loss": 1.1445, "step": 8214 }, { "epoch": 0.79, "grad_norm": 0.27446316421143896, "learning_rate": 0.0001422805434502698, "loss": 1.1201, "step": 8215 }, { "epoch": 0.79, "grad_norm": 0.265891783959781, "learning_rate": 0.00014226620697479217, "loss": 0.9653, "step": 8216 }, { "epoch": 0.79, "grad_norm": 0.2858901663372903, "learning_rate": 0.00014225186944157614, "loss": 1.1012, "step": 8217 }, { "epoch": 0.79, "grad_norm": 0.24368459446009416, "learning_rate": 0.00014223753085098052, "loss": 1.0438, "step": 8218 }, { "epoch": 0.79, "grad_norm": 0.2658297205018217, "learning_rate": 0.00014222319120336415, "loss": 1.0923, "step": 8219 }, { "epoch": 0.79, "grad_norm": 0.27643416390290804, "learning_rate": 0.0001422088504990859, "loss": 1.0201, "step": 8220 }, { "epoch": 0.79, "grad_norm": 0.27516078932172305, "learning_rate": 0.00014219450873850464, "loss": 0.9544, "step": 8221 }, { "epoch": 0.79, "grad_norm": 0.27601315150275746, "learning_rate": 0.00014218016592197925, "loss": 1.0343, "step": 8222 }, { "epoch": 0.79, "grad_norm": 0.24192057612707965, "learning_rate": 0.00014216582204986872, "loss": 1.0806, "step": 8223 }, { "epoch": 0.79, "grad_norm": 0.28077409693408506, "learning_rate": 0.000142151477122532, "loss": 0.8763, "step": 8224 }, { "epoch": 0.79, "grad_norm": 0.27896630435292175, "learning_rate": 0.00014213713114032803, "loss": 1.0493, "step": 8225 }, { "epoch": 0.79, "grad_norm": 0.2737335624375359, "learning_rate": 0.0001421227841036159, "loss": 1.0675, "step": 8226 }, { "epoch": 0.79, "grad_norm": 0.3407111785009769, "learning_rate": 0.00014210843601275466, "loss": 1.0628, "step": 8227 }, { "epoch": 0.79, "grad_norm": 0.2617914284389135, "learning_rate": 0.00014209408686810329, "loss": 1.0987, "step": 8228 }, { "epoch": 0.79, "grad_norm": 0.3013100303523883, "learning_rate": 0.00014207973667002097, "loss": 0.9312, "step": 8229 }, { "epoch": 0.79, "grad_norm": 0.2846920379517571, "learning_rate": 0.00014206538541886677, "loss": 1.0237, "step": 8230 }, { "epoch": 0.79, "grad_norm": 0.30283490816190023, "learning_rate": 0.0001420510331149999, "loss": 1.0535, "step": 8231 }, { "epoch": 0.79, "grad_norm": 0.24872498659564674, "learning_rate": 0.00014203667975877946, "loss": 1.057, "step": 8232 }, { "epoch": 0.79, "grad_norm": 0.27630382799540015, "learning_rate": 0.00014202232535056472, "loss": 1.0121, "step": 8233 }, { "epoch": 0.79, "grad_norm": 0.28477175943154037, "learning_rate": 0.00014200796989071487, "loss": 1.045, "step": 8234 }, { "epoch": 0.79, "grad_norm": 0.2719512123825706, "learning_rate": 0.00014199361337958915, "loss": 1.075, "step": 8235 }, { "epoch": 0.79, "grad_norm": 0.27304851623487275, "learning_rate": 0.0001419792558175469, "loss": 0.9973, "step": 8236 }, { "epoch": 0.79, "grad_norm": 0.2749777621769799, "learning_rate": 0.0001419648972049474, "loss": 1.0225, "step": 8237 }, { "epoch": 0.79, "grad_norm": 0.26176184932542573, "learning_rate": 0.0001419505375421499, "loss": 1.061, "step": 8238 }, { "epoch": 0.79, "grad_norm": 0.2825657955231182, "learning_rate": 0.0001419361768295139, "loss": 1.039, "step": 8239 }, { "epoch": 0.79, "grad_norm": 0.26238648793597996, "learning_rate": 0.00014192181506739868, "loss": 0.9806, "step": 8240 }, { "epoch": 0.79, "grad_norm": 0.2596777187775208, "learning_rate": 0.0001419074522561637, "loss": 1.0234, "step": 8241 }, { "epoch": 0.79, "grad_norm": 0.2883448002693972, "learning_rate": 0.0001418930883961684, "loss": 1.113, "step": 8242 }, { "epoch": 0.79, "grad_norm": 0.31700421775940696, "learning_rate": 0.00014187872348777223, "loss": 0.9246, "step": 8243 }, { "epoch": 0.79, "grad_norm": 0.2502687124845023, "learning_rate": 0.00014186435753133468, "loss": 0.9632, "step": 8244 }, { "epoch": 0.79, "grad_norm": 0.2721584066704403, "learning_rate": 0.00014184999052721528, "loss": 1.0489, "step": 8245 }, { "epoch": 0.79, "grad_norm": 0.2672093711111685, "learning_rate": 0.00014183562247577358, "loss": 1.029, "step": 8246 }, { "epoch": 0.79, "grad_norm": 0.31595242015080555, "learning_rate": 0.00014182125337736912, "loss": 1.0484, "step": 8247 }, { "epoch": 0.79, "grad_norm": 0.217568494553999, "learning_rate": 0.0001418068832323615, "loss": 0.8719, "step": 8248 }, { "epoch": 0.79, "grad_norm": 0.27618501018822034, "learning_rate": 0.00014179251204111037, "loss": 0.9553, "step": 8249 }, { "epoch": 0.79, "grad_norm": 0.28614850226175115, "learning_rate": 0.00014177813980397535, "loss": 1.0244, "step": 8250 }, { "epoch": 0.79, "grad_norm": 0.2804392960583254, "learning_rate": 0.00014176376652131614, "loss": 1.117, "step": 8251 }, { "epoch": 0.79, "grad_norm": 0.296161246928998, "learning_rate": 0.0001417493921934924, "loss": 0.9899, "step": 8252 }, { "epoch": 0.79, "grad_norm": 0.27500753507752407, "learning_rate": 0.00014173501682086389, "loss": 1.0332, "step": 8253 }, { "epoch": 0.79, "grad_norm": 0.27240297524975027, "learning_rate": 0.00014172064040379037, "loss": 1.1169, "step": 8254 }, { "epoch": 0.79, "grad_norm": 0.2862065599891984, "learning_rate": 0.00014170626294263158, "loss": 0.9608, "step": 8255 }, { "epoch": 0.79, "grad_norm": 0.29049647009040475, "learning_rate": 0.00014169188443774737, "loss": 1.0737, "step": 8256 }, { "epoch": 0.79, "grad_norm": 0.30650214779789686, "learning_rate": 0.00014167750488949753, "loss": 1.1447, "step": 8257 }, { "epoch": 0.79, "grad_norm": 0.25682892320136824, "learning_rate": 0.00014166312429824196, "loss": 1.1008, "step": 8258 }, { "epoch": 0.79, "grad_norm": 0.289522043516876, "learning_rate": 0.0001416487426643405, "loss": 0.9996, "step": 8259 }, { "epoch": 0.79, "grad_norm": 0.2612733947909529, "learning_rate": 0.00014163435998815308, "loss": 1.0454, "step": 8260 }, { "epoch": 0.79, "grad_norm": 0.3017746734519574, "learning_rate": 0.00014161997627003964, "loss": 1.1057, "step": 8261 }, { "epoch": 0.79, "grad_norm": 0.3026134541065002, "learning_rate": 0.0001416055915103601, "loss": 1.0563, "step": 8262 }, { "epoch": 0.79, "grad_norm": 0.2921549638360822, "learning_rate": 0.00014159120570947454, "loss": 1.2412, "step": 8263 }, { "epoch": 0.79, "grad_norm": 0.27462991805196874, "learning_rate": 0.00014157681886774293, "loss": 1.1579, "step": 8264 }, { "epoch": 0.79, "grad_norm": 0.30353840216135913, "learning_rate": 0.0001415624309855253, "loss": 1.0529, "step": 8265 }, { "epoch": 0.79, "grad_norm": 0.2628785239134127, "learning_rate": 0.00014154804206318165, "loss": 1.0783, "step": 8266 }, { "epoch": 0.79, "grad_norm": 0.279824355560124, "learning_rate": 0.00014153365210107217, "loss": 0.992, "step": 8267 }, { "epoch": 0.79, "grad_norm": 0.25845741171089465, "learning_rate": 0.00014151926109955696, "loss": 0.9018, "step": 8268 }, { "epoch": 0.79, "grad_norm": 0.2949235009130459, "learning_rate": 0.0001415048690589961, "loss": 1.1514, "step": 8269 }, { "epoch": 0.79, "grad_norm": 0.3091426382260449, "learning_rate": 0.00014149047597974984, "loss": 1.128, "step": 8270 }, { "epoch": 0.79, "grad_norm": 0.261254800590016, "learning_rate": 0.00014147608186217836, "loss": 1.0248, "step": 8271 }, { "epoch": 0.79, "grad_norm": 0.28155062611196985, "learning_rate": 0.0001414616867066418, "loss": 1.1088, "step": 8272 }, { "epoch": 0.79, "grad_norm": 0.2802398085480191, "learning_rate": 0.00014144729051350055, "loss": 1.0411, "step": 8273 }, { "epoch": 0.79, "grad_norm": 0.28950599861246645, "learning_rate": 0.00014143289328311478, "loss": 1.1909, "step": 8274 }, { "epoch": 0.79, "grad_norm": 0.3156838267805892, "learning_rate": 0.0001414184950158448, "loss": 1.0001, "step": 8275 }, { "epoch": 0.79, "grad_norm": 0.2761437699110209, "learning_rate": 0.00014140409571205095, "loss": 1.0747, "step": 8276 }, { "epoch": 0.79, "grad_norm": 0.2556738607507545, "learning_rate": 0.00014138969537209358, "loss": 1.1727, "step": 8277 }, { "epoch": 0.79, "grad_norm": 0.3116455068455913, "learning_rate": 0.0001413752939963331, "loss": 1.0714, "step": 8278 }, { "epoch": 0.79, "grad_norm": 0.3023777310208979, "learning_rate": 0.00014136089158512985, "loss": 1.1541, "step": 8279 }, { "epoch": 0.79, "grad_norm": 0.29881211105271765, "learning_rate": 0.00014134648813884433, "loss": 1.1441, "step": 8280 }, { "epoch": 0.79, "grad_norm": 0.2564362293165476, "learning_rate": 0.00014133208365783693, "loss": 1.2231, "step": 8281 }, { "epoch": 0.79, "grad_norm": 0.25459170299471473, "learning_rate": 0.00014131767814246817, "loss": 1.0638, "step": 8282 }, { "epoch": 0.79, "grad_norm": 0.3164293509155231, "learning_rate": 0.00014130327159309853, "loss": 1.0238, "step": 8283 }, { "epoch": 0.79, "grad_norm": 0.3161096823663635, "learning_rate": 0.0001412888640100886, "loss": 1.0997, "step": 8284 }, { "epoch": 0.79, "grad_norm": 0.2926302601160675, "learning_rate": 0.00014127445539379886, "loss": 1.0269, "step": 8285 }, { "epoch": 0.79, "grad_norm": 0.3026119265810598, "learning_rate": 0.00014126004574458996, "loss": 1.0535, "step": 8286 }, { "epoch": 0.79, "grad_norm": 0.31239765394033814, "learning_rate": 0.00014124563506282247, "loss": 1.1042, "step": 8287 }, { "epoch": 0.79, "grad_norm": 0.2730026191662197, "learning_rate": 0.00014123122334885706, "loss": 1.0406, "step": 8288 }, { "epoch": 0.79, "grad_norm": 0.2802587764341756, "learning_rate": 0.00014121681060305435, "loss": 1.0393, "step": 8289 }, { "epoch": 0.79, "grad_norm": 0.2965134782106928, "learning_rate": 0.00014120239682577506, "loss": 1.1487, "step": 8290 }, { "epoch": 0.79, "grad_norm": 0.3604661013234594, "learning_rate": 0.0001411879820173799, "loss": 1.0004, "step": 8291 }, { "epoch": 0.79, "grad_norm": 0.2754245502665909, "learning_rate": 0.0001411735661782296, "loss": 0.9554, "step": 8292 }, { "epoch": 0.79, "grad_norm": 0.24638138967620207, "learning_rate": 0.00014115914930868493, "loss": 1.1353, "step": 8293 }, { "epoch": 0.79, "grad_norm": 0.30600504343829077, "learning_rate": 0.00014114473140910668, "loss": 1.1201, "step": 8294 }, { "epoch": 0.79, "grad_norm": 0.3035845944676315, "learning_rate": 0.00014113031247985566, "loss": 1.0861, "step": 8295 }, { "epoch": 0.79, "grad_norm": 0.3384934195188291, "learning_rate": 0.00014111589252129272, "loss": 1.1501, "step": 8296 }, { "epoch": 0.79, "grad_norm": 0.2716845796991304, "learning_rate": 0.00014110147153377874, "loss": 1.0534, "step": 8297 }, { "epoch": 0.79, "grad_norm": 0.3062067246424864, "learning_rate": 0.0001410870495176746, "loss": 1.1802, "step": 8298 }, { "epoch": 0.79, "grad_norm": 0.2673331008196946, "learning_rate": 0.0001410726264733412, "loss": 1.0404, "step": 8299 }, { "epoch": 0.79, "grad_norm": 0.2860719333532579, "learning_rate": 0.00014105820240113955, "loss": 1.1296, "step": 8300 }, { "epoch": 0.79, "grad_norm": 0.2924049271387296, "learning_rate": 0.0001410437773014306, "loss": 1.0039, "step": 8301 }, { "epoch": 0.79, "grad_norm": 0.32493379544572254, "learning_rate": 0.00014102935117457524, "loss": 0.9896, "step": 8302 }, { "epoch": 0.79, "grad_norm": 0.308689353469358, "learning_rate": 0.00014101492402093463, "loss": 0.9233, "step": 8303 }, { "epoch": 0.79, "grad_norm": 0.2713635705097109, "learning_rate": 0.00014100049584086979, "loss": 1.0322, "step": 8304 }, { "epoch": 0.79, "grad_norm": 0.2853711362692117, "learning_rate": 0.00014098606663474176, "loss": 1.0401, "step": 8305 }, { "epoch": 0.79, "grad_norm": 0.25822823305650644, "learning_rate": 0.00014097163640291164, "loss": 1.0283, "step": 8306 }, { "epoch": 0.79, "grad_norm": 0.3343555252521715, "learning_rate": 0.00014095720514574058, "loss": 1.0707, "step": 8307 }, { "epoch": 0.79, "grad_norm": 0.28266257518096666, "learning_rate": 0.00014094277286358972, "loss": 1.1636, "step": 8308 }, { "epoch": 0.79, "grad_norm": 0.2649055365462347, "learning_rate": 0.00014092833955682026, "loss": 1.0662, "step": 8309 }, { "epoch": 0.8, "grad_norm": 0.31075468177448395, "learning_rate": 0.00014091390522579333, "loss": 1.0101, "step": 8310 }, { "epoch": 0.8, "grad_norm": 0.3011177221612655, "learning_rate": 0.00014089946987087023, "loss": 1.136, "step": 8311 }, { "epoch": 0.8, "grad_norm": 0.29851607856732926, "learning_rate": 0.00014088503349241223, "loss": 1.1139, "step": 8312 }, { "epoch": 0.8, "grad_norm": 0.29855074002538673, "learning_rate": 0.00014087059609078052, "loss": 1.0843, "step": 8313 }, { "epoch": 0.8, "grad_norm": 0.2768067900609332, "learning_rate": 0.0001408561576663365, "loss": 1.0972, "step": 8314 }, { "epoch": 0.8, "grad_norm": 0.2819211049236916, "learning_rate": 0.00014084171821944144, "loss": 1.1288, "step": 8315 }, { "epoch": 0.8, "grad_norm": 0.28684815717935336, "learning_rate": 0.00014082727775045667, "loss": 1.1531, "step": 8316 }, { "epoch": 0.8, "grad_norm": 0.29080240408238495, "learning_rate": 0.00014081283625974367, "loss": 1.1591, "step": 8317 }, { "epoch": 0.8, "grad_norm": 0.29429553590146723, "learning_rate": 0.0001407983937476638, "loss": 1.1297, "step": 8318 }, { "epoch": 0.8, "grad_norm": 0.30787470171045134, "learning_rate": 0.00014078395021457845, "loss": 1.2024, "step": 8319 }, { "epoch": 0.8, "grad_norm": 0.27975097914183483, "learning_rate": 0.0001407695056608491, "loss": 0.9402, "step": 8320 }, { "epoch": 0.8, "grad_norm": 0.2832395257132092, "learning_rate": 0.0001407550600868373, "loss": 1.0379, "step": 8321 }, { "epoch": 0.8, "grad_norm": 0.29437871608032573, "learning_rate": 0.00014074061349290447, "loss": 1.0362, "step": 8322 }, { "epoch": 0.8, "grad_norm": 0.269143724602035, "learning_rate": 0.00014072616587941218, "loss": 1.0537, "step": 8323 }, { "epoch": 0.8, "grad_norm": 0.29710110944603785, "learning_rate": 0.00014071171724672202, "loss": 1.0264, "step": 8324 }, { "epoch": 0.8, "grad_norm": 0.30415387019725554, "learning_rate": 0.00014069726759519553, "loss": 1.231, "step": 8325 }, { "epoch": 0.8, "grad_norm": 0.29030887320948506, "learning_rate": 0.00014068281692519434, "loss": 1.1106, "step": 8326 }, { "epoch": 0.8, "grad_norm": 0.32054128469538806, "learning_rate": 0.0001406683652370801, "loss": 1.1057, "step": 8327 }, { "epoch": 0.8, "grad_norm": 0.30650933574016137, "learning_rate": 0.00014065391253121446, "loss": 1.1166, "step": 8328 }, { "epoch": 0.8, "grad_norm": 0.2694980594720075, "learning_rate": 0.0001406394588079591, "loss": 0.9278, "step": 8329 }, { "epoch": 0.8, "grad_norm": 0.2696507606928678, "learning_rate": 0.00014062500406767574, "loss": 1.079, "step": 8330 }, { "epoch": 0.8, "grad_norm": 0.2759447283420335, "learning_rate": 0.00014061054831072614, "loss": 0.9918, "step": 8331 }, { "epoch": 0.8, "grad_norm": 0.2738734236935922, "learning_rate": 0.00014059609153747204, "loss": 1.1079, "step": 8332 }, { "epoch": 0.8, "grad_norm": 0.28515443322196976, "learning_rate": 0.00014058163374827521, "loss": 1.0569, "step": 8333 }, { "epoch": 0.8, "grad_norm": 0.3080422030491291, "learning_rate": 0.0001405671749434975, "loss": 1.0172, "step": 8334 }, { "epoch": 0.8, "grad_norm": 0.6582786045483043, "learning_rate": 0.00014055271512350079, "loss": 1.458, "step": 8335 }, { "epoch": 0.8, "grad_norm": 0.29640316228647345, "learning_rate": 0.00014053825428864686, "loss": 0.9889, "step": 8336 }, { "epoch": 0.8, "grad_norm": 0.3224808939799605, "learning_rate": 0.00014052379243929762, "loss": 1.143, "step": 8337 }, { "epoch": 0.8, "grad_norm": 0.26780388495165136, "learning_rate": 0.00014050932957581505, "loss": 1.1065, "step": 8338 }, { "epoch": 0.8, "grad_norm": 0.3007238046192213, "learning_rate": 0.000140494865698561, "loss": 1.2191, "step": 8339 }, { "epoch": 0.8, "grad_norm": 0.3150280632023674, "learning_rate": 0.00014048040080789752, "loss": 1.1275, "step": 8340 }, { "epoch": 0.8, "grad_norm": 0.25501610087568716, "learning_rate": 0.00014046593490418656, "loss": 1.0839, "step": 8341 }, { "epoch": 0.8, "grad_norm": 0.3241245476281749, "learning_rate": 0.00014045146798779014, "loss": 1.0121, "step": 8342 }, { "epoch": 0.8, "grad_norm": 0.28717103288948476, "learning_rate": 0.00014043700005907033, "loss": 1.0748, "step": 8343 }, { "epoch": 0.8, "grad_norm": 0.2949240771446552, "learning_rate": 0.00014042253111838917, "loss": 1.1718, "step": 8344 }, { "epoch": 0.8, "grad_norm": 0.3080849599666515, "learning_rate": 0.00014040806116610873, "loss": 1.1352, "step": 8345 }, { "epoch": 0.8, "grad_norm": 0.2757839282426602, "learning_rate": 0.0001403935902025912, "loss": 1.0233, "step": 8346 }, { "epoch": 0.8, "grad_norm": 0.2993473446158299, "learning_rate": 0.00014037911822819868, "loss": 1.1544, "step": 8347 }, { "epoch": 0.8, "grad_norm": 0.292778124826498, "learning_rate": 0.00014036464524329337, "loss": 1.1381, "step": 8348 }, { "epoch": 0.8, "grad_norm": 0.2958157472586977, "learning_rate": 0.00014035017124823743, "loss": 1.0634, "step": 8349 }, { "epoch": 0.8, "grad_norm": 0.2973614768918341, "learning_rate": 0.00014033569624339308, "loss": 1.0993, "step": 8350 }, { "epoch": 0.8, "grad_norm": 0.2877291411238009, "learning_rate": 0.0001403212202291226, "loss": 1.1405, "step": 8351 }, { "epoch": 0.8, "grad_norm": 0.3161483762081105, "learning_rate": 0.00014030674320578823, "loss": 1.0157, "step": 8352 }, { "epoch": 0.8, "grad_norm": 0.3097883664079197, "learning_rate": 0.0001402922651737523, "loss": 1.1105, "step": 8353 }, { "epoch": 0.8, "grad_norm": 0.3052718311315742, "learning_rate": 0.00014027778613337708, "loss": 1.1271, "step": 8354 }, { "epoch": 0.8, "grad_norm": 0.24254225363550241, "learning_rate": 0.00014026330608502496, "loss": 0.9638, "step": 8355 }, { "epoch": 0.8, "grad_norm": 0.28453519098553043, "learning_rate": 0.00014024882502905833, "loss": 1.095, "step": 8356 }, { "epoch": 0.8, "grad_norm": 0.3148338032717982, "learning_rate": 0.0001402343429658395, "loss": 1.1137, "step": 8357 }, { "epoch": 0.8, "grad_norm": 0.3117328371653278, "learning_rate": 0.000140219859895731, "loss": 1.1287, "step": 8358 }, { "epoch": 0.8, "grad_norm": 0.26695229050684094, "learning_rate": 0.00014020537581909524, "loss": 1.0255, "step": 8359 }, { "epoch": 0.8, "grad_norm": 0.28843280197149884, "learning_rate": 0.00014019089073629464, "loss": 1.0416, "step": 8360 }, { "epoch": 0.8, "grad_norm": 0.3067274794496299, "learning_rate": 0.00014017640464769176, "loss": 1.0825, "step": 8361 }, { "epoch": 0.8, "grad_norm": 0.2739855692621317, "learning_rate": 0.00014016191755364908, "loss": 1.0509, "step": 8362 }, { "epoch": 0.8, "grad_norm": 0.2395995459480104, "learning_rate": 0.0001401474294545292, "loss": 1.0843, "step": 8363 }, { "epoch": 0.8, "grad_norm": 0.2759339406448203, "learning_rate": 0.00014013294035069467, "loss": 1.1089, "step": 8364 }, { "epoch": 0.8, "grad_norm": 0.3012501447545405, "learning_rate": 0.00014011845024250805, "loss": 1.0218, "step": 8365 }, { "epoch": 0.8, "grad_norm": 0.2743545412783308, "learning_rate": 0.00014010395913033202, "loss": 0.9881, "step": 8366 }, { "epoch": 0.8, "grad_norm": 0.22999177193777853, "learning_rate": 0.00014008946701452921, "loss": 1.0122, "step": 8367 }, { "epoch": 0.8, "grad_norm": 0.3145753551754261, "learning_rate": 0.00014007497389546228, "loss": 1.1592, "step": 8368 }, { "epoch": 0.8, "grad_norm": 0.29504190170329436, "learning_rate": 0.000140060479773494, "loss": 1.0783, "step": 8369 }, { "epoch": 0.8, "grad_norm": 0.27650998107466573, "learning_rate": 0.00014004598464898698, "loss": 1.1227, "step": 8370 }, { "epoch": 0.8, "grad_norm": 0.2594677164964031, "learning_rate": 0.00014003148852230403, "loss": 1.0184, "step": 8371 }, { "epoch": 0.8, "grad_norm": 0.26498120130511177, "learning_rate": 0.00014001699139380792, "loss": 1.016, "step": 8372 }, { "epoch": 0.8, "grad_norm": 0.33720870260470887, "learning_rate": 0.00014000249326386147, "loss": 0.9954, "step": 8373 }, { "epoch": 0.8, "grad_norm": 0.27003696605507693, "learning_rate": 0.0001399879941328275, "loss": 1.0899, "step": 8374 }, { "epoch": 0.8, "grad_norm": 0.25731301945171275, "learning_rate": 0.0001399734940010688, "loss": 1.1751, "step": 8375 }, { "epoch": 0.8, "grad_norm": 0.288234443384105, "learning_rate": 0.0001399589928689483, "loss": 1.1105, "step": 8376 }, { "epoch": 0.8, "grad_norm": 0.3086646943980646, "learning_rate": 0.0001399444907368289, "loss": 1.0816, "step": 8377 }, { "epoch": 0.8, "grad_norm": 0.2857761154797601, "learning_rate": 0.00013992998760507352, "loss": 1.1379, "step": 8378 }, { "epoch": 0.8, "grad_norm": 0.2848709741740318, "learning_rate": 0.00013991548347404512, "loss": 1.1337, "step": 8379 }, { "epoch": 0.8, "grad_norm": 0.26144857764795576, "learning_rate": 0.00013990097834410664, "loss": 0.9858, "step": 8380 }, { "epoch": 0.8, "grad_norm": 0.28140647243062356, "learning_rate": 0.0001398864722156211, "loss": 1.1319, "step": 8381 }, { "epoch": 0.8, "grad_norm": 0.2757644481768791, "learning_rate": 0.00013987196508895153, "loss": 0.9251, "step": 8382 }, { "epoch": 0.8, "grad_norm": 0.29461811148669015, "learning_rate": 0.00013985745696446097, "loss": 1.1373, "step": 8383 }, { "epoch": 0.8, "grad_norm": 0.3141106450117521, "learning_rate": 0.0001398429478425125, "loss": 1.0046, "step": 8384 }, { "epoch": 0.8, "grad_norm": 0.2932601643453963, "learning_rate": 0.00013982843772346922, "loss": 1.0836, "step": 8385 }, { "epoch": 0.8, "grad_norm": 0.3080641745939993, "learning_rate": 0.00013981392660769424, "loss": 1.1587, "step": 8386 }, { "epoch": 0.8, "grad_norm": 0.3161172120816325, "learning_rate": 0.00013979941449555075, "loss": 1.0562, "step": 8387 }, { "epoch": 0.8, "grad_norm": 0.2919139460605944, "learning_rate": 0.00013978490138740187, "loss": 1.1742, "step": 8388 }, { "epoch": 0.8, "grad_norm": 0.27923451724228243, "learning_rate": 0.00013977038728361086, "loss": 1.1083, "step": 8389 }, { "epoch": 0.8, "grad_norm": 0.31621277440252016, "learning_rate": 0.0001397558721845409, "loss": 1.0143, "step": 8390 }, { "epoch": 0.8, "grad_norm": 0.3053969306091787, "learning_rate": 0.00013974135609055527, "loss": 1.0729, "step": 8391 }, { "epoch": 0.8, "grad_norm": 0.2825597454845708, "learning_rate": 0.00013972683900201723, "loss": 1.0619, "step": 8392 }, { "epoch": 0.8, "grad_norm": 0.29068615722749824, "learning_rate": 0.00013971232091929006, "loss": 1.065, "step": 8393 }, { "epoch": 0.8, "grad_norm": 0.2853464043964099, "learning_rate": 0.00013969780184273705, "loss": 0.9789, "step": 8394 }, { "epoch": 0.8, "grad_norm": 0.2849572721200672, "learning_rate": 0.0001396832817727217, "loss": 1.0263, "step": 8395 }, { "epoch": 0.8, "grad_norm": 0.28211914596567356, "learning_rate": 0.00013966876070960722, "loss": 1.1037, "step": 8396 }, { "epoch": 0.8, "grad_norm": 0.2995697358994056, "learning_rate": 0.00013965423865375712, "loss": 1.1653, "step": 8397 }, { "epoch": 0.8, "grad_norm": 0.29293561141447033, "learning_rate": 0.0001396397156055347, "loss": 1.094, "step": 8398 }, { "epoch": 0.8, "grad_norm": 0.25956145112228035, "learning_rate": 0.00013962519156530354, "loss": 1.0856, "step": 8399 }, { "epoch": 0.8, "grad_norm": 0.27947255917814445, "learning_rate": 0.00013961066653342706, "loss": 1.0245, "step": 8400 }, { "epoch": 0.8, "grad_norm": 0.2745380656704247, "learning_rate": 0.00013959614051026873, "loss": 1.0483, "step": 8401 }, { "epoch": 0.8, "grad_norm": 0.2980202904500265, "learning_rate": 0.0001395816134961921, "loss": 1.0607, "step": 8402 }, { "epoch": 0.8, "grad_norm": 0.25864322411654844, "learning_rate": 0.00013956708549156072, "loss": 0.9817, "step": 8403 }, { "epoch": 0.8, "grad_norm": 0.27059445234233453, "learning_rate": 0.00013955255649673816, "loss": 1.0721, "step": 8404 }, { "epoch": 0.8, "grad_norm": 0.30462552256042574, "learning_rate": 0.00013953802651208802, "loss": 1.0382, "step": 8405 }, { "epoch": 0.8, "grad_norm": 0.2863563756673883, "learning_rate": 0.0001395234955379739, "loss": 1.1493, "step": 8406 }, { "epoch": 0.8, "grad_norm": 0.2862298189990451, "learning_rate": 0.0001395089635747595, "loss": 1.1067, "step": 8407 }, { "epoch": 0.8, "grad_norm": 0.2993560987033485, "learning_rate": 0.00013949443062280842, "loss": 1.081, "step": 8408 }, { "epoch": 0.8, "grad_norm": 0.2717526863604508, "learning_rate": 0.00013947989668248442, "loss": 1.0246, "step": 8409 }, { "epoch": 0.8, "grad_norm": 0.2843109420913706, "learning_rate": 0.00013946536175415118, "loss": 1.1101, "step": 8410 }, { "epoch": 0.8, "grad_norm": 0.31237587843637793, "learning_rate": 0.00013945082583817245, "loss": 1.0994, "step": 8411 }, { "epoch": 0.8, "grad_norm": 0.2655591048449623, "learning_rate": 0.00013943628893491202, "loss": 1.1926, "step": 8412 }, { "epoch": 0.8, "grad_norm": 0.2923686496830888, "learning_rate": 0.0001394217510447337, "loss": 1.1175, "step": 8413 }, { "epoch": 0.8, "grad_norm": 0.26432275264898286, "learning_rate": 0.00013940721216800127, "loss": 0.9868, "step": 8414 }, { "epoch": 0.81, "grad_norm": 0.2767290301083631, "learning_rate": 0.00013939267230507856, "loss": 0.9424, "step": 8415 }, { "epoch": 0.81, "grad_norm": 0.2865371880691285, "learning_rate": 0.0001393781314563295, "loss": 1.1268, "step": 8416 }, { "epoch": 0.81, "grad_norm": 0.27359931565493517, "learning_rate": 0.00013936358962211794, "loss": 0.894, "step": 8417 }, { "epoch": 0.81, "grad_norm": 0.2682490335376522, "learning_rate": 0.00013934904680280781, "loss": 1.0634, "step": 8418 }, { "epoch": 0.81, "grad_norm": 0.323666262645315, "learning_rate": 0.00013933450299876305, "loss": 1.0572, "step": 8419 }, { "epoch": 0.81, "grad_norm": 0.313542678895405, "learning_rate": 0.00013931995821034766, "loss": 1.0228, "step": 8420 }, { "epoch": 0.81, "grad_norm": 0.2741537767451146, "learning_rate": 0.00013930541243792555, "loss": 1.1194, "step": 8421 }, { "epoch": 0.81, "grad_norm": 0.2727440924938454, "learning_rate": 0.00013929086568186083, "loss": 1.1567, "step": 8422 }, { "epoch": 0.81, "grad_norm": 0.27596495736712096, "learning_rate": 0.0001392763179425175, "loss": 1.0803, "step": 8423 }, { "epoch": 0.81, "grad_norm": 0.2789242022899579, "learning_rate": 0.00013926176922025963, "loss": 0.912, "step": 8424 }, { "epoch": 0.81, "grad_norm": 0.28533265323613294, "learning_rate": 0.00013924721951545128, "loss": 1.1343, "step": 8425 }, { "epoch": 0.81, "grad_norm": 0.29008984332342763, "learning_rate": 0.00013923266882845666, "loss": 1.0476, "step": 8426 }, { "epoch": 0.81, "grad_norm": 0.24250004605487083, "learning_rate": 0.00013921811715963977, "loss": 1.062, "step": 8427 }, { "epoch": 0.81, "grad_norm": 0.29532956323803095, "learning_rate": 0.0001392035645093649, "loss": 1.0561, "step": 8428 }, { "epoch": 0.81, "grad_norm": 0.3001458878270363, "learning_rate": 0.00013918901087799616, "loss": 1.081, "step": 8429 }, { "epoch": 0.81, "grad_norm": 0.3037811128687649, "learning_rate": 0.0001391744562658978, "loss": 1.098, "step": 8430 }, { "epoch": 0.81, "grad_norm": 0.3304936166647709, "learning_rate": 0.00013915990067343408, "loss": 0.9485, "step": 8431 }, { "epoch": 0.81, "grad_norm": 0.2831506029753717, "learning_rate": 0.0001391453441009692, "loss": 1.0727, "step": 8432 }, { "epoch": 0.81, "grad_norm": 0.2528874867012647, "learning_rate": 0.0001391307865488675, "loss": 1.046, "step": 8433 }, { "epoch": 0.81, "grad_norm": 0.2926620284475519, "learning_rate": 0.00013911622801749326, "loss": 1.066, "step": 8434 }, { "epoch": 0.81, "grad_norm": 0.28658384259986724, "learning_rate": 0.00013910166850721086, "loss": 1.1164, "step": 8435 }, { "epoch": 0.81, "grad_norm": 0.2611948977130402, "learning_rate": 0.0001390871080183846, "loss": 1.1024, "step": 8436 }, { "epoch": 0.81, "grad_norm": 0.29623740606511145, "learning_rate": 0.0001390725465513789, "loss": 0.9396, "step": 8437 }, { "epoch": 0.81, "grad_norm": 0.2759634164236242, "learning_rate": 0.00013905798410655817, "loss": 0.991, "step": 8438 }, { "epoch": 0.81, "grad_norm": 0.2559555603571331, "learning_rate": 0.00013904342068428688, "loss": 0.9583, "step": 8439 }, { "epoch": 0.81, "grad_norm": 0.2834397140569843, "learning_rate": 0.00013902885628492938, "loss": 1.1322, "step": 8440 }, { "epoch": 0.81, "grad_norm": 0.2959073999001046, "learning_rate": 0.00013901429090885028, "loss": 1.0687, "step": 8441 }, { "epoch": 0.81, "grad_norm": 0.25399362804027736, "learning_rate": 0.000138999724556414, "loss": 1.0374, "step": 8442 }, { "epoch": 0.81, "grad_norm": 0.29065321813906453, "learning_rate": 0.00013898515722798513, "loss": 1.0508, "step": 8443 }, { "epoch": 0.81, "grad_norm": 0.37210670978861016, "learning_rate": 0.00013897058892392818, "loss": 0.9855, "step": 8444 }, { "epoch": 0.81, "grad_norm": 0.2800545837279327, "learning_rate": 0.00013895601964460775, "loss": 1.1169, "step": 8445 }, { "epoch": 0.81, "grad_norm": 0.2926130011031746, "learning_rate": 0.00013894144939038844, "loss": 1.0492, "step": 8446 }, { "epoch": 0.81, "grad_norm": 0.31446837274856276, "learning_rate": 0.00013892687816163487, "loss": 1.077, "step": 8447 }, { "epoch": 0.81, "grad_norm": 0.2894693179667743, "learning_rate": 0.00013891230595871175, "loss": 1.1005, "step": 8448 }, { "epoch": 0.81, "grad_norm": 0.2658040957365846, "learning_rate": 0.0001388977327819837, "loss": 1.129, "step": 8449 }, { "epoch": 0.81, "grad_norm": 0.30570591750106335, "learning_rate": 0.0001388831586318154, "loss": 1.1457, "step": 8450 }, { "epoch": 0.81, "grad_norm": 0.30282216470657447, "learning_rate": 0.00013886858350857167, "loss": 1.0694, "step": 8451 }, { "epoch": 0.81, "grad_norm": 0.3179740476883664, "learning_rate": 0.00013885400741261717, "loss": 1.1539, "step": 8452 }, { "epoch": 0.81, "grad_norm": 0.2714947725647423, "learning_rate": 0.00013883943034431677, "loss": 1.0887, "step": 8453 }, { "epoch": 0.81, "grad_norm": 0.2617135621115103, "learning_rate": 0.0001388248523040352, "loss": 1.1484, "step": 8454 }, { "epoch": 0.81, "grad_norm": 0.2589470456450907, "learning_rate": 0.00013881027329213727, "loss": 0.976, "step": 8455 }, { "epoch": 0.81, "grad_norm": 0.2971405117388152, "learning_rate": 0.0001387956933089879, "loss": 1.1232, "step": 8456 }, { "epoch": 0.81, "grad_norm": 0.2638200620165654, "learning_rate": 0.0001387811123549519, "loss": 1.1529, "step": 8457 }, { "epoch": 0.81, "grad_norm": 0.2792026970736888, "learning_rate": 0.00013876653043039418, "loss": 1.1957, "step": 8458 }, { "epoch": 0.81, "grad_norm": 0.27823742191201595, "learning_rate": 0.0001387519475356797, "loss": 1.0414, "step": 8459 }, { "epoch": 0.81, "grad_norm": 0.25238962999634595, "learning_rate": 0.00013873736367117336, "loss": 1.0228, "step": 8460 }, { "epoch": 0.81, "grad_norm": 0.28008622103734476, "learning_rate": 0.00013872277883724015, "loss": 1.023, "step": 8461 }, { "epoch": 0.81, "grad_norm": 0.2777949786844138, "learning_rate": 0.00013870819303424506, "loss": 1.1302, "step": 8462 }, { "epoch": 0.81, "grad_norm": 0.24701542829888543, "learning_rate": 0.0001386936062625531, "loss": 1.1174, "step": 8463 }, { "epoch": 0.81, "grad_norm": 0.29033108955425646, "learning_rate": 0.00013867901852252935, "loss": 1.0872, "step": 8464 }, { "epoch": 0.81, "grad_norm": 0.2747150319565986, "learning_rate": 0.00013866442981453887, "loss": 0.95, "step": 8465 }, { "epoch": 0.81, "grad_norm": 0.28133751977616195, "learning_rate": 0.00013864984013894669, "loss": 1.04, "step": 8466 }, { "epoch": 0.81, "grad_norm": 0.2603016003322199, "learning_rate": 0.00013863524949611798, "loss": 1.0926, "step": 8467 }, { "epoch": 0.81, "grad_norm": 0.28873463989684345, "learning_rate": 0.00013862065788641787, "loss": 1.0727, "step": 8468 }, { "epoch": 0.81, "grad_norm": 0.3043522634022851, "learning_rate": 0.00013860606531021155, "loss": 1.017, "step": 8469 }, { "epoch": 0.81, "grad_norm": 0.258266209093827, "learning_rate": 0.00013859147176786417, "loss": 1.0572, "step": 8470 }, { "epoch": 0.81, "grad_norm": 0.2960901666392521, "learning_rate": 0.00013857687725974093, "loss": 0.9861, "step": 8471 }, { "epoch": 0.81, "grad_norm": 0.29910616519981614, "learning_rate": 0.00013856228178620709, "loss": 1.1596, "step": 8472 }, { "epoch": 0.81, "grad_norm": 0.2990576464485771, "learning_rate": 0.00013854768534762795, "loss": 1.0265, "step": 8473 }, { "epoch": 0.81, "grad_norm": 0.31441877420729514, "learning_rate": 0.00013853308794436876, "loss": 1.0613, "step": 8474 }, { "epoch": 0.81, "grad_norm": 0.2634200444546087, "learning_rate": 0.0001385184895767948, "loss": 1.004, "step": 8475 }, { "epoch": 0.81, "grad_norm": 0.2556240322403397, "learning_rate": 0.0001385038902452714, "loss": 1.1238, "step": 8476 }, { "epoch": 0.81, "grad_norm": 0.27888482542250587, "learning_rate": 0.00013848928995016403, "loss": 1.1326, "step": 8477 }, { "epoch": 0.81, "grad_norm": 0.27815317220422986, "learning_rate": 0.00013847468869183796, "loss": 1.123, "step": 8478 }, { "epoch": 0.81, "grad_norm": 0.2831745331072525, "learning_rate": 0.00013846008647065857, "loss": 1.0664, "step": 8479 }, { "epoch": 0.81, "grad_norm": 0.3041401975450099, "learning_rate": 0.0001384454832869914, "loss": 1.0162, "step": 8480 }, { "epoch": 0.81, "grad_norm": 0.27417387963872447, "learning_rate": 0.00013843087914120185, "loss": 1.1522, "step": 8481 }, { "epoch": 0.81, "grad_norm": 0.2551238865778478, "learning_rate": 0.00013841627403365537, "loss": 1.1159, "step": 8482 }, { "epoch": 0.81, "grad_norm": 0.2772835560718097, "learning_rate": 0.0001384016679647175, "loss": 1.0014, "step": 8483 }, { "epoch": 0.81, "grad_norm": 0.266588619864142, "learning_rate": 0.00013838706093475379, "loss": 1.0242, "step": 8484 }, { "epoch": 0.81, "grad_norm": 0.2950429695678513, "learning_rate": 0.0001383724529441297, "loss": 1.0789, "step": 8485 }, { "epoch": 0.81, "grad_norm": 0.26853491046599237, "learning_rate": 0.00013835784399321088, "loss": 1.1386, "step": 8486 }, { "epoch": 0.81, "grad_norm": 0.27338834723695976, "learning_rate": 0.0001383432340823629, "loss": 0.9977, "step": 8487 }, { "epoch": 0.81, "grad_norm": 0.2865977979325858, "learning_rate": 0.00013832862321195143, "loss": 1.0583, "step": 8488 }, { "epoch": 0.81, "grad_norm": 0.28068233052657926, "learning_rate": 0.000138314011382342, "loss": 1.0611, "step": 8489 }, { "epoch": 0.81, "grad_norm": 0.3053917578678752, "learning_rate": 0.0001382993985939004, "loss": 1.1556, "step": 8490 }, { "epoch": 0.81, "grad_norm": 0.26177227751546944, "learning_rate": 0.00013828478484699227, "loss": 1.1853, "step": 8491 }, { "epoch": 0.81, "grad_norm": 0.28197772368176605, "learning_rate": 0.00013827017014198336, "loss": 1.108, "step": 8492 }, { "epoch": 0.81, "grad_norm": 0.25534797811164694, "learning_rate": 0.00013825555447923935, "loss": 1.1122, "step": 8493 }, { "epoch": 0.81, "grad_norm": 0.2985449459023056, "learning_rate": 0.00013824093785912609, "loss": 1.0912, "step": 8494 }, { "epoch": 0.81, "grad_norm": 0.3047078671768318, "learning_rate": 0.0001382263202820093, "loss": 0.909, "step": 8495 }, { "epoch": 0.81, "grad_norm": 0.2566715818782515, "learning_rate": 0.0001382117017482548, "loss": 1.0322, "step": 8496 }, { "epoch": 0.81, "grad_norm": 0.2612952596331514, "learning_rate": 0.0001381970822582285, "loss": 0.9924, "step": 8497 }, { "epoch": 0.81, "grad_norm": 0.2933790624492483, "learning_rate": 0.00013818246181229618, "loss": 1.0607, "step": 8498 }, { "epoch": 0.81, "grad_norm": 0.3051960049265422, "learning_rate": 0.00013816784041082374, "loss": 1.1308, "step": 8499 }, { "epoch": 0.81, "grad_norm": 0.2790257819761861, "learning_rate": 0.0001381532180541772, "loss": 1.0386, "step": 8500 }, { "epoch": 0.81, "grad_norm": 0.3118122919886825, "learning_rate": 0.0001381385947427223, "loss": 1.1372, "step": 8501 }, { "epoch": 0.81, "grad_norm": 0.29570017068411963, "learning_rate": 0.00013812397047682513, "loss": 1.0147, "step": 8502 }, { "epoch": 0.81, "grad_norm": 0.31683426314572966, "learning_rate": 0.00013810934525685165, "loss": 1.0712, "step": 8503 }, { "epoch": 0.81, "grad_norm": 0.2909795486157777, "learning_rate": 0.00013809471908316783, "loss": 1.2234, "step": 8504 }, { "epoch": 0.81, "grad_norm": 0.2955103667345504, "learning_rate": 0.00013808009195613973, "loss": 1.0783, "step": 8505 }, { "epoch": 0.81, "grad_norm": 0.2867522171152345, "learning_rate": 0.0001380654638761334, "loss": 1.0277, "step": 8506 }, { "epoch": 0.81, "grad_norm": 0.3681601782152508, "learning_rate": 0.0001380508348435149, "loss": 1.1069, "step": 8507 }, { "epoch": 0.81, "grad_norm": 0.2932701192842754, "learning_rate": 0.00013803620485865035, "loss": 1.0882, "step": 8508 }, { "epoch": 0.81, "grad_norm": 0.31028006826949395, "learning_rate": 0.0001380215739219059, "loss": 0.9705, "step": 8509 }, { "epoch": 0.81, "grad_norm": 0.311953807926566, "learning_rate": 0.00013800694203364763, "loss": 1.1564, "step": 8510 }, { "epoch": 0.81, "grad_norm": 0.2959800715326205, "learning_rate": 0.00013799230919424175, "loss": 1.0579, "step": 8511 }, { "epoch": 0.81, "grad_norm": 0.2605263218657084, "learning_rate": 0.00013797767540405447, "loss": 1.0171, "step": 8512 }, { "epoch": 0.81, "grad_norm": 0.30387836906175536, "learning_rate": 0.00013796304066345197, "loss": 0.9672, "step": 8513 }, { "epoch": 0.81, "grad_norm": 0.299921065982018, "learning_rate": 0.00013794840497280056, "loss": 1.1199, "step": 8514 }, { "epoch": 0.81, "grad_norm": 0.32487111245703615, "learning_rate": 0.00013793376833246644, "loss": 1.2367, "step": 8515 }, { "epoch": 0.81, "grad_norm": 0.2847819157265213, "learning_rate": 0.00013791913074281595, "loss": 1.1113, "step": 8516 }, { "epoch": 0.81, "grad_norm": 0.2868712720370698, "learning_rate": 0.00013790449220421535, "loss": 1.1692, "step": 8517 }, { "epoch": 0.81, "grad_norm": 0.2678298209756187, "learning_rate": 0.00013788985271703105, "loss": 1.1474, "step": 8518 }, { "epoch": 0.82, "grad_norm": 0.29099670328302274, "learning_rate": 0.00013787521228162934, "loss": 1.0656, "step": 8519 }, { "epoch": 0.82, "grad_norm": 0.30474414530355776, "learning_rate": 0.00013786057089837663, "loss": 1.0057, "step": 8520 }, { "epoch": 0.82, "grad_norm": 0.28018691357325703, "learning_rate": 0.00013784592856763936, "loss": 1.0637, "step": 8521 }, { "epoch": 0.82, "grad_norm": 0.30281968491489586, "learning_rate": 0.00013783128528978395, "loss": 0.9859, "step": 8522 }, { "epoch": 0.82, "grad_norm": 0.2833193148012386, "learning_rate": 0.00013781664106517685, "loss": 1.1878, "step": 8523 }, { "epoch": 0.82, "grad_norm": 0.27855400568860456, "learning_rate": 0.00013780199589418453, "loss": 1.0736, "step": 8524 }, { "epoch": 0.82, "grad_norm": 0.3023424425522343, "learning_rate": 0.00013778734977717348, "loss": 0.9647, "step": 8525 }, { "epoch": 0.82, "grad_norm": 0.24163910511953707, "learning_rate": 0.00013777270271451031, "loss": 1.0617, "step": 8526 }, { "epoch": 0.82, "grad_norm": 0.2643004336760188, "learning_rate": 0.00013775805470656147, "loss": 1.0083, "step": 8527 }, { "epoch": 0.82, "grad_norm": 0.30690050558969756, "learning_rate": 0.00013774340575369357, "loss": 1.1863, "step": 8528 }, { "epoch": 0.82, "grad_norm": 0.2803369681980211, "learning_rate": 0.00013772875585627326, "loss": 0.9811, "step": 8529 }, { "epoch": 0.82, "grad_norm": 0.27882776591624236, "learning_rate": 0.00013771410501466712, "loss": 1.0438, "step": 8530 }, { "epoch": 0.82, "grad_norm": 0.2866694179672929, "learning_rate": 0.00013769945322924179, "loss": 1.0089, "step": 8531 }, { "epoch": 0.82, "grad_norm": 0.3193049906912792, "learning_rate": 0.00013768480050036392, "loss": 0.9945, "step": 8532 }, { "epoch": 0.82, "grad_norm": 0.3134610280410675, "learning_rate": 0.00013767014682840027, "loss": 1.0041, "step": 8533 }, { "epoch": 0.82, "grad_norm": 0.25858959641085166, "learning_rate": 0.0001376554922137175, "loss": 1.0951, "step": 8534 }, { "epoch": 0.82, "grad_norm": 0.27495664936656555, "learning_rate": 0.00013764083665668237, "loss": 1.0078, "step": 8535 }, { "epoch": 0.82, "grad_norm": 0.26221912952359344, "learning_rate": 0.00013762618015766167, "loss": 1.0953, "step": 8536 }, { "epoch": 0.82, "grad_norm": 0.2969797589840666, "learning_rate": 0.00013761152271702214, "loss": 1.104, "step": 8537 }, { "epoch": 0.82, "grad_norm": 0.3003478528891654, "learning_rate": 0.00013759686433513062, "loss": 1.0608, "step": 8538 }, { "epoch": 0.82, "grad_norm": 0.35023226379753625, "learning_rate": 0.00013758220501235396, "loss": 1.0195, "step": 8539 }, { "epoch": 0.82, "grad_norm": 0.2598816030866285, "learning_rate": 0.000137567544749059, "loss": 0.8741, "step": 8540 }, { "epoch": 0.82, "grad_norm": 0.29603444421799885, "learning_rate": 0.0001375528835456126, "loss": 1.124, "step": 8541 }, { "epoch": 0.82, "grad_norm": 0.24876455442000533, "learning_rate": 0.0001375382214023817, "loss": 0.9333, "step": 8542 }, { "epoch": 0.82, "grad_norm": 0.29348410647341444, "learning_rate": 0.00013752355831973324, "loss": 0.9545, "step": 8543 }, { "epoch": 0.82, "grad_norm": 0.31014489060350814, "learning_rate": 0.00013750889429803412, "loss": 1.0946, "step": 8544 }, { "epoch": 0.82, "grad_norm": 0.2901149550164192, "learning_rate": 0.00013749422933765135, "loss": 1.1371, "step": 8545 }, { "epoch": 0.82, "grad_norm": 0.2796054693890663, "learning_rate": 0.00013747956343895194, "loss": 1.0809, "step": 8546 }, { "epoch": 0.82, "grad_norm": 0.2809110179945892, "learning_rate": 0.00013746489660230288, "loss": 1.0668, "step": 8547 }, { "epoch": 0.82, "grad_norm": 0.29179871521386425, "learning_rate": 0.00013745022882807127, "loss": 1.1174, "step": 8548 }, { "epoch": 0.82, "grad_norm": 0.310921629295036, "learning_rate": 0.00013743556011662413, "loss": 1.0533, "step": 8549 }, { "epoch": 0.82, "grad_norm": 0.2538334627557585, "learning_rate": 0.00013742089046832855, "loss": 0.994, "step": 8550 }, { "epoch": 0.82, "grad_norm": 0.2919456298279374, "learning_rate": 0.00013740621988355168, "loss": 1.0132, "step": 8551 }, { "epoch": 0.82, "grad_norm": 0.2532952808062169, "learning_rate": 0.00013739154836266064, "loss": 0.9537, "step": 8552 }, { "epoch": 0.82, "grad_norm": 0.31354688802504277, "learning_rate": 0.0001373768759060226, "loss": 1.1491, "step": 8553 }, { "epoch": 0.82, "grad_norm": 0.2485754538763557, "learning_rate": 0.00013736220251400478, "loss": 1.1273, "step": 8554 }, { "epoch": 0.82, "grad_norm": 0.29503224281382395, "learning_rate": 0.00013734752818697434, "loss": 1.0555, "step": 8555 }, { "epoch": 0.82, "grad_norm": 0.2671248855695662, "learning_rate": 0.00013733285292529855, "loss": 1.001, "step": 8556 }, { "epoch": 0.82, "grad_norm": 0.3018392538404123, "learning_rate": 0.00013731817672934463, "loss": 1.1354, "step": 8557 }, { "epoch": 0.82, "grad_norm": 0.297525745763971, "learning_rate": 0.0001373034995994799, "loss": 1.1386, "step": 8558 }, { "epoch": 0.82, "grad_norm": 0.3031989211002846, "learning_rate": 0.00013728882153607165, "loss": 1.0058, "step": 8559 }, { "epoch": 0.82, "grad_norm": 0.2925383632398921, "learning_rate": 0.00013727414253948719, "loss": 0.9204, "step": 8560 }, { "epoch": 0.82, "grad_norm": 0.28581280375824064, "learning_rate": 0.0001372594626100939, "loss": 1.1115, "step": 8561 }, { "epoch": 0.82, "grad_norm": 0.2849917863325391, "learning_rate": 0.00013724478174825916, "loss": 1.0351, "step": 8562 }, { "epoch": 0.82, "grad_norm": 0.2867357667558398, "learning_rate": 0.0001372300999543503, "loss": 0.9713, "step": 8563 }, { "epoch": 0.82, "grad_norm": 0.2912838079543734, "learning_rate": 0.00013721541722873484, "loss": 1.0435, "step": 8564 }, { "epoch": 0.82, "grad_norm": 0.3121331293675006, "learning_rate": 0.00013720073357178017, "loss": 1.1627, "step": 8565 }, { "epoch": 0.82, "grad_norm": 0.29971607543863277, "learning_rate": 0.00013718604898385375, "loss": 1.1364, "step": 8566 }, { "epoch": 0.82, "grad_norm": 0.2740375080807219, "learning_rate": 0.00013717136346532306, "loss": 1.0376, "step": 8567 }, { "epoch": 0.82, "grad_norm": 0.26055014984233066, "learning_rate": 0.00013715667701655565, "loss": 0.9892, "step": 8568 }, { "epoch": 0.82, "grad_norm": 0.28985982137880956, "learning_rate": 0.00013714198963791908, "loss": 1.0258, "step": 8569 }, { "epoch": 0.82, "grad_norm": 0.24153544179134467, "learning_rate": 0.00013712730132978083, "loss": 1.0979, "step": 8570 }, { "epoch": 0.82, "grad_norm": 0.2819940528233919, "learning_rate": 0.00013711261209250857, "loss": 1.0627, "step": 8571 }, { "epoch": 0.82, "grad_norm": 0.3014061072273147, "learning_rate": 0.00013709792192646985, "loss": 1.0036, "step": 8572 }, { "epoch": 0.82, "grad_norm": 0.2943687582185628, "learning_rate": 0.00013708323083203228, "loss": 1.1475, "step": 8573 }, { "epoch": 0.82, "grad_norm": 0.29418992771044433, "learning_rate": 0.0001370685388095636, "loss": 1.0402, "step": 8574 }, { "epoch": 0.82, "grad_norm": 0.3135438299333758, "learning_rate": 0.00013705384585943145, "loss": 1.1125, "step": 8575 }, { "epoch": 0.82, "grad_norm": 0.28733629193402527, "learning_rate": 0.00013703915198200347, "loss": 1.0161, "step": 8576 }, { "epoch": 0.82, "grad_norm": 0.29180260421642634, "learning_rate": 0.00013702445717764746, "loss": 1.0476, "step": 8577 }, { "epoch": 0.82, "grad_norm": 0.2980601220397487, "learning_rate": 0.00013700976144673116, "loss": 1.0602, "step": 8578 }, { "epoch": 0.82, "grad_norm": 0.3066488522260643, "learning_rate": 0.00013699506478962231, "loss": 1.0332, "step": 8579 }, { "epoch": 0.82, "grad_norm": 0.3353162200492158, "learning_rate": 0.00013698036720668873, "loss": 0.9495, "step": 8580 }, { "epoch": 0.82, "grad_norm": 0.2816337655551374, "learning_rate": 0.00013696566869829816, "loss": 1.1274, "step": 8581 }, { "epoch": 0.82, "grad_norm": 0.3100139223925439, "learning_rate": 0.00013695096926481855, "loss": 1.0753, "step": 8582 }, { "epoch": 0.82, "grad_norm": 0.2746129531995423, "learning_rate": 0.0001369362689066177, "loss": 1.0354, "step": 8583 }, { "epoch": 0.82, "grad_norm": 0.2697359432512603, "learning_rate": 0.00013692156762406347, "loss": 0.9328, "step": 8584 }, { "epoch": 0.82, "grad_norm": 0.290636331567875, "learning_rate": 0.00013690686541752384, "loss": 1.0889, "step": 8585 }, { "epoch": 0.82, "grad_norm": 0.28720885198550766, "learning_rate": 0.0001368921622873667, "loss": 1.1372, "step": 8586 }, { "epoch": 0.82, "grad_norm": 0.30775748112991347, "learning_rate": 0.00013687745823396007, "loss": 1.1094, "step": 8587 }, { "epoch": 0.82, "grad_norm": 0.2624465157124863, "learning_rate": 0.0001368627532576718, "loss": 0.9509, "step": 8588 }, { "epoch": 0.82, "grad_norm": 0.26983758242587186, "learning_rate": 0.00013684804735887, "loss": 1.0852, "step": 8589 }, { "epoch": 0.82, "grad_norm": 0.3019798930382428, "learning_rate": 0.00013683334053792262, "loss": 1.0498, "step": 8590 }, { "epoch": 0.82, "grad_norm": 0.28333579655941465, "learning_rate": 0.00013681863279519776, "loss": 1.0397, "step": 8591 }, { "epoch": 0.82, "grad_norm": 0.27677050640426154, "learning_rate": 0.0001368039241310635, "loss": 1.0302, "step": 8592 }, { "epoch": 0.82, "grad_norm": 0.2740272283421442, "learning_rate": 0.00013678921454588787, "loss": 0.9894, "step": 8593 }, { "epoch": 0.82, "grad_norm": 0.30385602594318123, "learning_rate": 0.00013677450404003905, "loss": 0.9967, "step": 8594 }, { "epoch": 0.82, "grad_norm": 0.2543360042755725, "learning_rate": 0.0001367597926138851, "loss": 1.1073, "step": 8595 }, { "epoch": 0.82, "grad_norm": 0.30243436611460256, "learning_rate": 0.0001367450802677943, "loss": 1.0471, "step": 8596 }, { "epoch": 0.82, "grad_norm": 0.298278417963026, "learning_rate": 0.00013673036700213476, "loss": 1.108, "step": 8597 }, { "epoch": 0.82, "grad_norm": 0.344466314776565, "learning_rate": 0.0001367156528172747, "loss": 1.0931, "step": 8598 }, { "epoch": 0.82, "grad_norm": 0.2597226647292173, "learning_rate": 0.00013670093771358234, "loss": 1.0871, "step": 8599 }, { "epoch": 0.82, "grad_norm": 0.24557911295793564, "learning_rate": 0.00013668622169142597, "loss": 1.0619, "step": 8600 }, { "epoch": 0.82, "grad_norm": 0.2747878102964318, "learning_rate": 0.00013667150475117382, "loss": 1.1528, "step": 8601 }, { "epoch": 0.82, "grad_norm": 0.2853209445830605, "learning_rate": 0.00013665678689319424, "loss": 1.0169, "step": 8602 }, { "epoch": 0.82, "grad_norm": 0.29466027746832646, "learning_rate": 0.00013664206811785554, "loss": 1.0538, "step": 8603 }, { "epoch": 0.82, "grad_norm": 0.28940310201790015, "learning_rate": 0.000136627348425526, "loss": 0.9515, "step": 8604 }, { "epoch": 0.82, "grad_norm": 0.28401605740381525, "learning_rate": 0.0001366126278165741, "loss": 1.1516, "step": 8605 }, { "epoch": 0.82, "grad_norm": 0.2740954715279189, "learning_rate": 0.00013659790629136817, "loss": 1.0099, "step": 8606 }, { "epoch": 0.82, "grad_norm": 0.2874894186741719, "learning_rate": 0.00013658318385027665, "loss": 0.9993, "step": 8607 }, { "epoch": 0.82, "grad_norm": 0.2673309894002083, "learning_rate": 0.0001365684604936679, "loss": 1.1178, "step": 8608 }, { "epoch": 0.82, "grad_norm": 0.28060184644025465, "learning_rate": 0.0001365537362219105, "loss": 1.1111, "step": 8609 }, { "epoch": 0.82, "grad_norm": 0.262926201140649, "learning_rate": 0.00013653901103537287, "loss": 1.0677, "step": 8610 }, { "epoch": 0.82, "grad_norm": 0.2603806384626445, "learning_rate": 0.0001365242849344235, "loss": 1.0661, "step": 8611 }, { "epoch": 0.82, "grad_norm": 0.27821013084562063, "learning_rate": 0.00013650955791943097, "loss": 1.1453, "step": 8612 }, { "epoch": 0.82, "grad_norm": 0.3025962932247498, "learning_rate": 0.0001364948299907638, "loss": 1.2105, "step": 8613 }, { "epoch": 0.82, "grad_norm": 0.27742930612541283, "learning_rate": 0.00013648010114879056, "loss": 0.9934, "step": 8614 }, { "epoch": 0.82, "grad_norm": 0.2436905652104897, "learning_rate": 0.0001364653713938799, "loss": 0.98, "step": 8615 }, { "epoch": 0.82, "grad_norm": 0.308431748553756, "learning_rate": 0.00013645064072640036, "loss": 1.1859, "step": 8616 }, { "epoch": 0.82, "grad_norm": 0.32785207627829216, "learning_rate": 0.00013643590914672065, "loss": 1.0128, "step": 8617 }, { "epoch": 0.82, "grad_norm": 0.3089734401683715, "learning_rate": 0.00013642117665520938, "loss": 1.1553, "step": 8618 }, { "epoch": 0.82, "grad_norm": 0.3077673526631775, "learning_rate": 0.0001364064432522353, "loss": 1.0379, "step": 8619 }, { "epoch": 0.82, "grad_norm": 0.29158179240095367, "learning_rate": 0.00013639170893816713, "loss": 1.0954, "step": 8620 }, { "epoch": 0.82, "grad_norm": 0.2899047445443715, "learning_rate": 0.00013637697371337353, "loss": 1.0493, "step": 8621 }, { "epoch": 0.82, "grad_norm": 0.2409967915205593, "learning_rate": 0.0001363622375782233, "loss": 1.0478, "step": 8622 }, { "epoch": 0.82, "grad_norm": 0.294664521205138, "learning_rate": 0.00013634750053308524, "loss": 1.1147, "step": 8623 }, { "epoch": 0.83, "grad_norm": 0.2691622747456586, "learning_rate": 0.00013633276257832814, "loss": 1.0245, "step": 8624 }, { "epoch": 0.83, "grad_norm": 0.2662427740875133, "learning_rate": 0.0001363180237143208, "loss": 1.1204, "step": 8625 }, { "epoch": 0.83, "grad_norm": 0.315565873248549, "learning_rate": 0.00013630328394143213, "loss": 1.0977, "step": 8626 }, { "epoch": 0.83, "grad_norm": 0.2958160614252357, "learning_rate": 0.00013628854326003093, "loss": 1.0883, "step": 8627 }, { "epoch": 0.83, "grad_norm": 0.27828748808814535, "learning_rate": 0.00013627380167048614, "loss": 0.9, "step": 8628 }, { "epoch": 0.83, "grad_norm": 0.2605213690147047, "learning_rate": 0.00013625905917316665, "loss": 1.1766, "step": 8629 }, { "epoch": 0.83, "grad_norm": 0.28580811147831237, "learning_rate": 0.00013624431576844144, "loss": 1.142, "step": 8630 }, { "epoch": 0.83, "grad_norm": 0.2645180239308723, "learning_rate": 0.00013622957145667945, "loss": 1.0222, "step": 8631 }, { "epoch": 0.83, "grad_norm": 0.27332832418491226, "learning_rate": 0.00013621482623824965, "loss": 1.1453, "step": 8632 }, { "epoch": 0.83, "grad_norm": 0.24290270100131645, "learning_rate": 0.00013620008011352105, "loss": 1.0981, "step": 8633 }, { "epoch": 0.83, "grad_norm": 0.2738275452675957, "learning_rate": 0.0001361853330828627, "loss": 1.087, "step": 8634 }, { "epoch": 0.83, "grad_norm": 0.2973345148003589, "learning_rate": 0.00013617058514664367, "loss": 1.0528, "step": 8635 }, { "epoch": 0.83, "grad_norm": 0.30243238138049405, "learning_rate": 0.000136155836305233, "loss": 1.1979, "step": 8636 }, { "epoch": 0.83, "grad_norm": 0.2973554678344532, "learning_rate": 0.0001361410865589998, "loss": 1.1496, "step": 8637 }, { "epoch": 0.83, "grad_norm": 0.27907427949892327, "learning_rate": 0.00013612633590831319, "loss": 1.1112, "step": 8638 }, { "epoch": 0.83, "grad_norm": 0.2546823283144357, "learning_rate": 0.00013611158435354232, "loss": 0.9379, "step": 8639 }, { "epoch": 0.83, "grad_norm": 0.24491672704420625, "learning_rate": 0.0001360968318950564, "loss": 1.1017, "step": 8640 }, { "epoch": 0.83, "grad_norm": 0.2397150988377803, "learning_rate": 0.00013608207853322454, "loss": 0.9593, "step": 8641 }, { "epoch": 0.83, "grad_norm": 0.27419817711093586, "learning_rate": 0.00013606732426841596, "loss": 0.937, "step": 8642 }, { "epoch": 0.83, "grad_norm": 0.3159034039720681, "learning_rate": 0.00013605256910099997, "loss": 1.1914, "step": 8643 }, { "epoch": 0.83, "grad_norm": 0.27134589534535863, "learning_rate": 0.00013603781303134576, "loss": 1.1164, "step": 8644 }, { "epoch": 0.83, "grad_norm": 0.3025553917420271, "learning_rate": 0.00013602305605982262, "loss": 1.0479, "step": 8645 }, { "epoch": 0.83, "grad_norm": 0.27129927828178074, "learning_rate": 0.0001360082981867999, "loss": 1.1052, "step": 8646 }, { "epoch": 0.83, "grad_norm": 0.31517001904029224, "learning_rate": 0.00013599353941264684, "loss": 1.0203, "step": 8647 }, { "epoch": 0.83, "grad_norm": 0.2851479531322408, "learning_rate": 0.0001359787797377329, "loss": 1.0592, "step": 8648 }, { "epoch": 0.83, "grad_norm": 0.2856016662841958, "learning_rate": 0.00013596401916242732, "loss": 1.0729, "step": 8649 }, { "epoch": 0.83, "grad_norm": 0.2847359988614896, "learning_rate": 0.00013594925768709959, "loss": 1.0519, "step": 8650 }, { "epoch": 0.83, "grad_norm": 0.2656376058979404, "learning_rate": 0.00013593449531211908, "loss": 1.0247, "step": 8651 }, { "epoch": 0.83, "grad_norm": 0.2591314203645854, "learning_rate": 0.00013591973203785524, "loss": 1.099, "step": 8652 }, { "epoch": 0.83, "grad_norm": 0.28779350087950495, "learning_rate": 0.00013590496786467754, "loss": 1.054, "step": 8653 }, { "epoch": 0.83, "grad_norm": 0.2546447105719811, "learning_rate": 0.00013589020279295544, "loss": 1.0369, "step": 8654 }, { "epoch": 0.83, "grad_norm": 0.33112794737534357, "learning_rate": 0.00013587543682305847, "loss": 1.0239, "step": 8655 }, { "epoch": 0.83, "grad_norm": 0.261490090551106, "learning_rate": 0.00013586066995535616, "loss": 1.0201, "step": 8656 }, { "epoch": 0.83, "grad_norm": 0.2880468769072062, "learning_rate": 0.000135845902190218, "loss": 1.1193, "step": 8657 }, { "epoch": 0.83, "grad_norm": 0.288557868149757, "learning_rate": 0.00013583113352801367, "loss": 1.1048, "step": 8658 }, { "epoch": 0.83, "grad_norm": 0.29517378742374495, "learning_rate": 0.00013581636396911266, "loss": 1.0905, "step": 8659 }, { "epoch": 0.83, "grad_norm": 0.2617708019113313, "learning_rate": 0.00013580159351388464, "loss": 1.0682, "step": 8660 }, { "epoch": 0.83, "grad_norm": 0.31436033052025997, "learning_rate": 0.00013578682216269927, "loss": 0.9827, "step": 8661 }, { "epoch": 0.83, "grad_norm": 0.2710558291831824, "learning_rate": 0.00013577204991592617, "loss": 1.0678, "step": 8662 }, { "epoch": 0.83, "grad_norm": 0.30589524951652153, "learning_rate": 0.000135757276773935, "loss": 1.0166, "step": 8663 }, { "epoch": 0.83, "grad_norm": 0.31535012911315774, "learning_rate": 0.00013574250273709555, "loss": 1.0809, "step": 8664 }, { "epoch": 0.83, "grad_norm": 0.34357963327522195, "learning_rate": 0.0001357277278057775, "loss": 1.0025, "step": 8665 }, { "epoch": 0.83, "grad_norm": 0.29336089111450286, "learning_rate": 0.0001357129519803506, "loss": 1.14, "step": 8666 }, { "epoch": 0.83, "grad_norm": 0.2988503565133178, "learning_rate": 0.00013569817526118465, "loss": 0.9728, "step": 8667 }, { "epoch": 0.83, "grad_norm": 0.2586925578598481, "learning_rate": 0.0001356833976486494, "loss": 1.0163, "step": 8668 }, { "epoch": 0.83, "grad_norm": 0.2937963873608329, "learning_rate": 0.0001356686191431147, "loss": 1.0555, "step": 8669 }, { "epoch": 0.83, "grad_norm": 0.29571540658867496, "learning_rate": 0.0001356538397449504, "loss": 1.0379, "step": 8670 }, { "epoch": 0.83, "grad_norm": 0.27868553879152896, "learning_rate": 0.00013563905945452638, "loss": 1.0352, "step": 8671 }, { "epoch": 0.83, "grad_norm": 0.29105897745078474, "learning_rate": 0.00013562427827221244, "loss": 0.993, "step": 8672 }, { "epoch": 0.83, "grad_norm": 0.24758183366141243, "learning_rate": 0.0001356094961983786, "loss": 0.8968, "step": 8673 }, { "epoch": 0.83, "grad_norm": 0.2605888157367459, "learning_rate": 0.0001355947132333947, "loss": 1.0564, "step": 8674 }, { "epoch": 0.83, "grad_norm": 0.2924270653891825, "learning_rate": 0.00013557992937763077, "loss": 1.0911, "step": 8675 }, { "epoch": 0.83, "grad_norm": 0.3153612885581894, "learning_rate": 0.00013556514463145672, "loss": 1.0308, "step": 8676 }, { "epoch": 0.83, "grad_norm": 0.27974012653020464, "learning_rate": 0.00013555035899524257, "loss": 1.0977, "step": 8677 }, { "epoch": 0.83, "grad_norm": 0.2686146240839227, "learning_rate": 0.00013553557246935834, "loss": 0.9855, "step": 8678 }, { "epoch": 0.83, "grad_norm": 0.2809894346574622, "learning_rate": 0.00013552078505417412, "loss": 1.1707, "step": 8679 }, { "epoch": 0.83, "grad_norm": 0.2869178669204025, "learning_rate": 0.00013550599675005986, "loss": 1.1491, "step": 8680 }, { "epoch": 0.83, "grad_norm": 0.2955879869986699, "learning_rate": 0.00013549120755738576, "loss": 1.0608, "step": 8681 }, { "epoch": 0.83, "grad_norm": 0.2875101038157737, "learning_rate": 0.00013547641747652187, "loss": 1.0307, "step": 8682 }, { "epoch": 0.83, "grad_norm": 0.28175365232286614, "learning_rate": 0.00013546162650783836, "loss": 1.0323, "step": 8683 }, { "epoch": 0.83, "grad_norm": 0.27831603189171494, "learning_rate": 0.00013544683465170537, "loss": 1.057, "step": 8684 }, { "epoch": 0.83, "grad_norm": 0.3073334793522284, "learning_rate": 0.00013543204190849303, "loss": 1.0045, "step": 8685 }, { "epoch": 0.83, "grad_norm": 0.29215172333875483, "learning_rate": 0.00013541724827857157, "loss": 1.1192, "step": 8686 }, { "epoch": 0.83, "grad_norm": 0.2771543039799703, "learning_rate": 0.00013540245376231122, "loss": 1.1609, "step": 8687 }, { "epoch": 0.83, "grad_norm": 0.29777594479305014, "learning_rate": 0.00013538765836008224, "loss": 0.9681, "step": 8688 }, { "epoch": 0.83, "grad_norm": 0.2545360895510685, "learning_rate": 0.00013537286207225484, "loss": 1.0139, "step": 8689 }, { "epoch": 0.83, "grad_norm": 0.26647845539942555, "learning_rate": 0.00013535806489919935, "loss": 1.1347, "step": 8690 }, { "epoch": 0.83, "grad_norm": 0.29722399863436927, "learning_rate": 0.00013534326684128605, "loss": 1.1584, "step": 8691 }, { "epoch": 0.83, "grad_norm": 0.30891387675873, "learning_rate": 0.00013532846789888532, "loss": 1.0211, "step": 8692 }, { "epoch": 0.83, "grad_norm": 0.31589225263631776, "learning_rate": 0.00013531366807236742, "loss": 1.0062, "step": 8693 }, { "epoch": 0.83, "grad_norm": 0.2972943846537337, "learning_rate": 0.00013529886736210285, "loss": 1.0088, "step": 8694 }, { "epoch": 0.83, "grad_norm": 0.28937720201774464, "learning_rate": 0.00013528406576846189, "loss": 0.9853, "step": 8695 }, { "epoch": 0.83, "grad_norm": 0.24765821705508315, "learning_rate": 0.000135269263291815, "loss": 0.9191, "step": 8696 }, { "epoch": 0.83, "grad_norm": 0.27414169282442363, "learning_rate": 0.00013525445993253267, "loss": 1.0309, "step": 8697 }, { "epoch": 0.83, "grad_norm": 0.2688265567951683, "learning_rate": 0.0001352396556909853, "loss": 1.0866, "step": 8698 }, { "epoch": 0.83, "grad_norm": 0.26180708297498495, "learning_rate": 0.0001352248505675434, "loss": 1.0702, "step": 8699 }, { "epoch": 0.83, "grad_norm": 0.296931683455755, "learning_rate": 0.00013521004456257748, "loss": 1.0738, "step": 8700 }, { "epoch": 0.83, "grad_norm": 0.2518973661220569, "learning_rate": 0.0001351952376764581, "loss": 1.0174, "step": 8701 }, { "epoch": 0.83, "grad_norm": 0.30852287173282533, "learning_rate": 0.00013518042990955575, "loss": 1.0339, "step": 8702 }, { "epoch": 0.83, "grad_norm": 0.28709043823004965, "learning_rate": 0.000135165621262241, "loss": 1.127, "step": 8703 }, { "epoch": 0.83, "grad_norm": 0.2402225336023348, "learning_rate": 0.00013515081173488453, "loss": 1.0512, "step": 8704 }, { "epoch": 0.83, "grad_norm": 0.3009799139239725, "learning_rate": 0.00013513600132785688, "loss": 0.995, "step": 8705 }, { "epoch": 0.83, "grad_norm": 0.31841563271518275, "learning_rate": 0.0001351211900415287, "loss": 1.1342, "step": 8706 }, { "epoch": 0.83, "grad_norm": 0.2726227896301979, "learning_rate": 0.00013510637787627068, "loss": 1.1005, "step": 8707 }, { "epoch": 0.83, "grad_norm": 0.30718818182896745, "learning_rate": 0.0001350915648324535, "loss": 0.9629, "step": 8708 }, { "epoch": 0.83, "grad_norm": 0.24754498668980748, "learning_rate": 0.00013507675091044787, "loss": 1.1344, "step": 8709 }, { "epoch": 0.83, "grad_norm": 0.3341325246272737, "learning_rate": 0.00013506193611062444, "loss": 1.0433, "step": 8710 }, { "epoch": 0.83, "grad_norm": 0.2831390747623922, "learning_rate": 0.0001350471204333541, "loss": 0.9654, "step": 8711 }, { "epoch": 0.83, "grad_norm": 0.27958419761160536, "learning_rate": 0.0001350323038790075, "loss": 1.0065, "step": 8712 }, { "epoch": 0.83, "grad_norm": 0.3219792061230287, "learning_rate": 0.00013501748644795548, "loss": 1.008, "step": 8713 }, { "epoch": 0.83, "grad_norm": 0.27010290849276297, "learning_rate": 0.00013500266814056886, "loss": 0.9119, "step": 8714 }, { "epoch": 0.83, "grad_norm": 0.3035143869003971, "learning_rate": 0.0001349878489572185, "loss": 1.0691, "step": 8715 }, { "epoch": 0.83, "grad_norm": 0.27992327647557635, "learning_rate": 0.0001349730288982752, "loss": 1.067, "step": 8716 }, { "epoch": 0.83, "grad_norm": 0.31109391121037, "learning_rate": 0.00013495820796410987, "loss": 1.0007, "step": 8717 }, { "epoch": 0.83, "grad_norm": 0.30367401228401525, "learning_rate": 0.00013494338615509344, "loss": 1.1086, "step": 8718 }, { "epoch": 0.83, "grad_norm": 0.27392132119871376, "learning_rate": 0.00013492856347159678, "loss": 0.9861, "step": 8719 }, { "epoch": 0.83, "grad_norm": 0.318345859783819, "learning_rate": 0.00013491373991399088, "loss": 1.042, "step": 8720 }, { "epoch": 0.83, "grad_norm": 0.3354485387598985, "learning_rate": 0.0001348989154826467, "loss": 1.1203, "step": 8721 }, { "epoch": 0.83, "grad_norm": 0.3027876660339266, "learning_rate": 0.0001348840901779352, "loss": 1.1517, "step": 8722 }, { "epoch": 0.83, "grad_norm": 0.3025089370772015, "learning_rate": 0.00013486926400022744, "loss": 1.2937, "step": 8723 }, { "epoch": 0.83, "grad_norm": 0.27699234873722095, "learning_rate": 0.00013485443694989443, "loss": 1.0503, "step": 8724 }, { "epoch": 0.83, "grad_norm": 0.30429620705081767, "learning_rate": 0.00013483960902730725, "loss": 1.0699, "step": 8725 }, { "epoch": 0.83, "grad_norm": 0.3040291893430194, "learning_rate": 0.00013482478023283694, "loss": 1.0117, "step": 8726 }, { "epoch": 0.83, "grad_norm": 0.28916049582699416, "learning_rate": 0.00013480995056685462, "loss": 1.0908, "step": 8727 }, { "epoch": 0.84, "grad_norm": 0.24217865268899172, "learning_rate": 0.00013479512002973143, "loss": 1.056, "step": 8728 }, { "epoch": 0.84, "grad_norm": 0.27032893449561624, "learning_rate": 0.00013478028862183846, "loss": 1.0668, "step": 8729 }, { "epoch": 0.84, "grad_norm": 0.27542193970843054, "learning_rate": 0.00013476545634354692, "loss": 0.9492, "step": 8730 }, { "epoch": 0.84, "grad_norm": 0.23543044415563721, "learning_rate": 0.000134750623195228, "loss": 1.0316, "step": 8731 }, { "epoch": 0.84, "grad_norm": 0.3223797172938787, "learning_rate": 0.0001347357891772529, "loss": 1.1393, "step": 8732 }, { "epoch": 0.84, "grad_norm": 0.2998222335002957, "learning_rate": 0.0001347209542899928, "loss": 1.1096, "step": 8733 }, { "epoch": 0.84, "grad_norm": 0.2845913693982625, "learning_rate": 0.00013470611853381905, "loss": 1.0304, "step": 8734 }, { "epoch": 0.84, "grad_norm": 0.312734748694851, "learning_rate": 0.00013469128190910285, "loss": 1.0816, "step": 8735 }, { "epoch": 0.84, "grad_norm": 0.2784778983582345, "learning_rate": 0.00013467644441621552, "loss": 0.9477, "step": 8736 }, { "epoch": 0.84, "grad_norm": 0.294746691747119, "learning_rate": 0.00013466160605552836, "loss": 1.1006, "step": 8737 }, { "epoch": 0.84, "grad_norm": 0.2729555754078699, "learning_rate": 0.00013464676682741275, "loss": 1.165, "step": 8738 }, { "epoch": 0.84, "grad_norm": 0.27914582732602733, "learning_rate": 0.00013463192673223998, "loss": 1.0035, "step": 8739 }, { "epoch": 0.84, "grad_norm": 0.2759299004442835, "learning_rate": 0.00013461708577038154, "loss": 1.1369, "step": 8740 }, { "epoch": 0.84, "grad_norm": 0.25997588040826375, "learning_rate": 0.00013460224394220871, "loss": 1.0008, "step": 8741 }, { "epoch": 0.84, "grad_norm": 0.3006336454437912, "learning_rate": 0.00013458740124809302, "loss": 1.0918, "step": 8742 }, { "epoch": 0.84, "grad_norm": 0.2865486476111894, "learning_rate": 0.00013457255768840586, "loss": 1.0318, "step": 8743 }, { "epoch": 0.84, "grad_norm": 0.2610230590960148, "learning_rate": 0.00013455771326351874, "loss": 1.0608, "step": 8744 }, { "epoch": 0.84, "grad_norm": 0.2747856141015647, "learning_rate": 0.0001345428679738031, "loss": 0.9236, "step": 8745 }, { "epoch": 0.84, "grad_norm": 0.250840631667433, "learning_rate": 0.0001345280218196305, "loss": 1.076, "step": 8746 }, { "epoch": 0.84, "grad_norm": 0.3427774684297268, "learning_rate": 0.0001345131748013724, "loss": 1.0931, "step": 8747 }, { "epoch": 0.84, "grad_norm": 0.30101078325665653, "learning_rate": 0.0001344983269194005, "loss": 0.9994, "step": 8748 }, { "epoch": 0.84, "grad_norm": 0.285861675215507, "learning_rate": 0.00013448347817408623, "loss": 1.0467, "step": 8749 }, { "epoch": 0.84, "grad_norm": 0.28454224361070146, "learning_rate": 0.00013446862856580127, "loss": 1.0008, "step": 8750 }, { "epoch": 0.84, "grad_norm": 0.33159442196308175, "learning_rate": 0.0001344537780949172, "loss": 1.206, "step": 8751 }, { "epoch": 0.84, "grad_norm": 0.28094929027693405, "learning_rate": 0.0001344389267618057, "loss": 1.182, "step": 8752 }, { "epoch": 0.84, "grad_norm": 0.2904531787454453, "learning_rate": 0.0001344240745668384, "loss": 0.982, "step": 8753 }, { "epoch": 0.84, "grad_norm": 0.3124634413609586, "learning_rate": 0.00013440922151038698, "loss": 1.0623, "step": 8754 }, { "epoch": 0.84, "grad_norm": 0.28809714187038504, "learning_rate": 0.0001343943675928232, "loss": 1.1198, "step": 8755 }, { "epoch": 0.84, "grad_norm": 0.2873805808293868, "learning_rate": 0.00013437951281451875, "loss": 1.0509, "step": 8756 }, { "epoch": 0.84, "grad_norm": 0.3130611893567796, "learning_rate": 0.00013436465717584533, "loss": 1.0459, "step": 8757 }, { "epoch": 0.84, "grad_norm": 0.3002619940368619, "learning_rate": 0.00013434980067717484, "loss": 0.9741, "step": 8758 }, { "epoch": 0.84, "grad_norm": 0.29994300371561994, "learning_rate": 0.00013433494331887896, "loss": 1.0306, "step": 8759 }, { "epoch": 0.84, "grad_norm": 0.2709309546499614, "learning_rate": 0.00013432008510132955, "loss": 1.0472, "step": 8760 }, { "epoch": 0.84, "grad_norm": 0.30275123120474723, "learning_rate": 0.00013430522602489846, "loss": 1.0934, "step": 8761 }, { "epoch": 0.84, "grad_norm": 0.27267018063077797, "learning_rate": 0.0001342903660899575, "loss": 1.0372, "step": 8762 }, { "epoch": 0.84, "grad_norm": 0.3179594835836974, "learning_rate": 0.0001342755052968786, "loss": 1.0563, "step": 8763 }, { "epoch": 0.84, "grad_norm": 0.31426468902967697, "learning_rate": 0.0001342606436460336, "loss": 1.1932, "step": 8764 }, { "epoch": 0.84, "grad_norm": 0.2715991486632539, "learning_rate": 0.00013424578113779452, "loss": 1.0489, "step": 8765 }, { "epoch": 0.84, "grad_norm": 0.31900892380705087, "learning_rate": 0.00013423091777253323, "loss": 1.109, "step": 8766 }, { "epoch": 0.84, "grad_norm": 0.2661697001151372, "learning_rate": 0.0001342160535506217, "loss": 1.0726, "step": 8767 }, { "epoch": 0.84, "grad_norm": 0.29001722553751685, "learning_rate": 0.00013420118847243191, "loss": 1.0504, "step": 8768 }, { "epoch": 0.84, "grad_norm": 0.26543562475469634, "learning_rate": 0.0001341863225383359, "loss": 1.0057, "step": 8769 }, { "epoch": 0.84, "grad_norm": 0.30664308972274495, "learning_rate": 0.0001341714557487057, "loss": 1.0982, "step": 8770 }, { "epoch": 0.84, "grad_norm": 0.29541897062886224, "learning_rate": 0.0001341565881039133, "loss": 1.0859, "step": 8771 }, { "epoch": 0.84, "grad_norm": 0.280865811627392, "learning_rate": 0.00013414171960433085, "loss": 1.119, "step": 8772 }, { "epoch": 0.84, "grad_norm": 0.24611125528299096, "learning_rate": 0.00013412685025033038, "loss": 1.1245, "step": 8773 }, { "epoch": 0.84, "grad_norm": 0.31141735928913117, "learning_rate": 0.00013411198004228405, "loss": 1.0752, "step": 8774 }, { "epoch": 0.84, "grad_norm": 0.23851136770012923, "learning_rate": 0.000134097108980564, "loss": 1.0644, "step": 8775 }, { "epoch": 0.84, "grad_norm": 0.2371811186751139, "learning_rate": 0.00013408223706554235, "loss": 1.0813, "step": 8776 }, { "epoch": 0.84, "grad_norm": 0.2987397006710603, "learning_rate": 0.0001340673642975913, "loss": 1.0765, "step": 8777 }, { "epoch": 0.84, "grad_norm": 0.26831922729943924, "learning_rate": 0.00013405249067708304, "loss": 1.0317, "step": 8778 }, { "epoch": 0.84, "grad_norm": 0.2899859345363733, "learning_rate": 0.00013403761620438983, "loss": 1.0402, "step": 8779 }, { "epoch": 0.84, "grad_norm": 0.2878652206230178, "learning_rate": 0.00013402274087988384, "loss": 0.9933, "step": 8780 }, { "epoch": 0.84, "grad_norm": 0.31702536400845066, "learning_rate": 0.0001340078647039374, "loss": 1.0253, "step": 8781 }, { "epoch": 0.84, "grad_norm": 0.30001278045384056, "learning_rate": 0.00013399298767692277, "loss": 1.123, "step": 8782 }, { "epoch": 0.84, "grad_norm": 0.2624717683492848, "learning_rate": 0.00013397810979921227, "loss": 1.021, "step": 8783 }, { "epoch": 0.84, "grad_norm": 0.316080617053837, "learning_rate": 0.0001339632310711782, "loss": 1.133, "step": 8784 }, { "epoch": 0.84, "grad_norm": 0.27598469027777045, "learning_rate": 0.00013394835149319292, "loss": 0.8538, "step": 8785 }, { "epoch": 0.84, "grad_norm": 0.29532824662066726, "learning_rate": 0.00013393347106562884, "loss": 1.0311, "step": 8786 }, { "epoch": 0.84, "grad_norm": 0.3293468526276591, "learning_rate": 0.00013391858978885828, "loss": 1.0673, "step": 8787 }, { "epoch": 0.84, "grad_norm": 0.2643309414987189, "learning_rate": 0.00013390370766325373, "loss": 1.1561, "step": 8788 }, { "epoch": 0.84, "grad_norm": 0.2763364535801901, "learning_rate": 0.00013388882468918758, "loss": 1.0627, "step": 8789 }, { "epoch": 0.84, "grad_norm": 0.2862222208420642, "learning_rate": 0.0001338739408670323, "loss": 1.0606, "step": 8790 }, { "epoch": 0.84, "grad_norm": 0.2927406980062334, "learning_rate": 0.00013385905619716032, "loss": 1.0987, "step": 8791 }, { "epoch": 0.84, "grad_norm": 0.27073771357355814, "learning_rate": 0.00013384417067994423, "loss": 1.0864, "step": 8792 }, { "epoch": 0.84, "grad_norm": 0.26681668946322573, "learning_rate": 0.00013382928431575648, "loss": 0.9944, "step": 8793 }, { "epoch": 0.84, "grad_norm": 0.2872926853091687, "learning_rate": 0.00013381439710496962, "loss": 1.1929, "step": 8794 }, { "epoch": 0.84, "grad_norm": 0.29473601888234835, "learning_rate": 0.00013379950904795625, "loss": 1.0424, "step": 8795 }, { "epoch": 0.84, "grad_norm": 0.2767351991760329, "learning_rate": 0.0001337846201450889, "loss": 0.976, "step": 8796 }, { "epoch": 0.84, "grad_norm": 0.31278846280951567, "learning_rate": 0.00013376973039674019, "loss": 1.0786, "step": 8797 }, { "epoch": 0.84, "grad_norm": 0.29106202302244005, "learning_rate": 0.00013375483980328275, "loss": 1.0206, "step": 8798 }, { "epoch": 0.84, "grad_norm": 0.3062557626049918, "learning_rate": 0.00013373994836508925, "loss": 1.0934, "step": 8799 }, { "epoch": 0.84, "grad_norm": 0.2930477172648966, "learning_rate": 0.00013372505608253235, "loss": 1.0262, "step": 8800 }, { "epoch": 0.84, "grad_norm": 0.34634712769514453, "learning_rate": 0.0001337101629559847, "loss": 0.9546, "step": 8801 }, { "epoch": 0.84, "grad_norm": 0.2772018365006181, "learning_rate": 0.00013369526898581902, "loss": 1.0512, "step": 8802 }, { "epoch": 0.84, "grad_norm": 0.2709787690909469, "learning_rate": 0.00013368037417240807, "loss": 0.994, "step": 8803 }, { "epoch": 0.84, "grad_norm": 0.2814766172965834, "learning_rate": 0.0001336654785161246, "loss": 1.0845, "step": 8804 }, { "epoch": 0.84, "grad_norm": 0.2853983711193067, "learning_rate": 0.00013365058201734135, "loss": 0.9669, "step": 8805 }, { "epoch": 0.84, "grad_norm": 0.3256691908202577, "learning_rate": 0.00013363568467643117, "loss": 1.0751, "step": 8806 }, { "epoch": 0.84, "grad_norm": 0.2878851657217033, "learning_rate": 0.00013362078649376683, "loss": 1.045, "step": 8807 }, { "epoch": 0.84, "grad_norm": 0.30585377114209605, "learning_rate": 0.00013360588746972118, "loss": 1.0328, "step": 8808 }, { "epoch": 0.84, "grad_norm": 0.2793018113302156, "learning_rate": 0.00013359098760466707, "loss": 1.123, "step": 8809 }, { "epoch": 0.84, "grad_norm": 0.25457785398487776, "learning_rate": 0.0001335760868989774, "loss": 1.1305, "step": 8810 }, { "epoch": 0.84, "grad_norm": 0.26931250840260823, "learning_rate": 0.00013356118535302503, "loss": 1.1142, "step": 8811 }, { "epoch": 0.84, "grad_norm": 0.2779912568855338, "learning_rate": 0.00013354628296718293, "loss": 1.0774, "step": 8812 }, { "epoch": 0.84, "grad_norm": 0.30698926954918604, "learning_rate": 0.000133531379741824, "loss": 1.1206, "step": 8813 }, { "epoch": 0.84, "grad_norm": 0.28571250644527174, "learning_rate": 0.0001335164756773212, "loss": 1.1259, "step": 8814 }, { "epoch": 0.84, "grad_norm": 0.30864009381335133, "learning_rate": 0.00013350157077404755, "loss": 0.9197, "step": 8815 }, { "epoch": 0.84, "grad_norm": 0.2714746153198062, "learning_rate": 0.00013348666503237603, "loss": 0.9842, "step": 8816 }, { "epoch": 0.84, "grad_norm": 0.28173495612303234, "learning_rate": 0.0001334717584526797, "loss": 0.9923, "step": 8817 }, { "epoch": 0.84, "grad_norm": 0.3216688395468582, "learning_rate": 0.00013345685103533154, "loss": 1.0964, "step": 8818 }, { "epoch": 0.84, "grad_norm": 0.2823984993319558, "learning_rate": 0.00013344194278070467, "loss": 1.0409, "step": 8819 }, { "epoch": 0.84, "grad_norm": 0.257746108679655, "learning_rate": 0.00013342703368917217, "loss": 1.1301, "step": 8820 }, { "epoch": 0.84, "grad_norm": 0.3051717861148401, "learning_rate": 0.00013341212376110715, "loss": 1.0672, "step": 8821 }, { "epoch": 0.84, "grad_norm": 0.27852339454292335, "learning_rate": 0.00013339721299688272, "loss": 0.9949, "step": 8822 }, { "epoch": 0.84, "grad_norm": 0.30905296931037346, "learning_rate": 0.00013338230139687206, "loss": 1.0062, "step": 8823 }, { "epoch": 0.84, "grad_norm": 0.24655148826104478, "learning_rate": 0.0001333673889614483, "loss": 1.001, "step": 8824 }, { "epoch": 0.84, "grad_norm": 0.30269701257489295, "learning_rate": 0.00013335247569098467, "loss": 1.0285, "step": 8825 }, { "epoch": 0.84, "grad_norm": 0.29912297649204367, "learning_rate": 0.00013333756158585437, "loss": 1.1004, "step": 8826 }, { "epoch": 0.84, "grad_norm": 0.29035499574876744, "learning_rate": 0.00013332264664643067, "loss": 1.053, "step": 8827 }, { "epoch": 0.84, "grad_norm": 0.2869996333200353, "learning_rate": 0.00013330773087308676, "loss": 1.0357, "step": 8828 }, { "epoch": 0.84, "grad_norm": 0.29232380763052546, "learning_rate": 0.00013329281426619597, "loss": 1.0288, "step": 8829 }, { "epoch": 0.84, "grad_norm": 0.2495577755639189, "learning_rate": 0.0001332778968261316, "loss": 1.0611, "step": 8830 }, { "epoch": 0.84, "grad_norm": 0.2979321424581524, "learning_rate": 0.0001332629785532669, "loss": 0.9719, "step": 8831 }, { "epoch": 0.84, "grad_norm": 0.2821090849292814, "learning_rate": 0.0001332480594479753, "loss": 1.1135, "step": 8832 }, { "epoch": 0.85, "grad_norm": 0.3016300483432541, "learning_rate": 0.0001332331395106301, "loss": 1.1395, "step": 8833 }, { "epoch": 0.85, "grad_norm": 0.3017945019892449, "learning_rate": 0.00013321821874160472, "loss": 0.9148, "step": 8834 }, { "epoch": 0.85, "grad_norm": 0.25675693104083414, "learning_rate": 0.00013320329714127248, "loss": 1.0208, "step": 8835 }, { "epoch": 0.85, "grad_norm": 0.30879551976248903, "learning_rate": 0.0001331883747100069, "loss": 1.1158, "step": 8836 }, { "epoch": 0.85, "grad_norm": 0.2910089252086533, "learning_rate": 0.0001331734514481814, "loss": 1.0531, "step": 8837 }, { "epoch": 0.85, "grad_norm": 0.2800038334110076, "learning_rate": 0.0001331585273561694, "loss": 1.0722, "step": 8838 }, { "epoch": 0.85, "grad_norm": 0.29386882904498585, "learning_rate": 0.00013314360243434442, "loss": 1.0914, "step": 8839 }, { "epoch": 0.85, "grad_norm": 0.32822389242666516, "learning_rate": 0.00013312867668307998, "loss": 1.0664, "step": 8840 }, { "epoch": 0.85, "grad_norm": 0.28150462602342563, "learning_rate": 0.00013311375010274958, "loss": 1.0505, "step": 8841 }, { "epoch": 0.85, "grad_norm": 0.28190761168490724, "learning_rate": 0.00013309882269372676, "loss": 1.0243, "step": 8842 }, { "epoch": 0.85, "grad_norm": 0.324588959170944, "learning_rate": 0.00013308389445638508, "loss": 1.1538, "step": 8843 }, { "epoch": 0.85, "grad_norm": 0.313116228461298, "learning_rate": 0.0001330689653910982, "loss": 1.0932, "step": 8844 }, { "epoch": 0.85, "grad_norm": 0.3028011762250344, "learning_rate": 0.00013305403549823962, "loss": 1.0032, "step": 8845 }, { "epoch": 0.85, "grad_norm": 0.2679913864423184, "learning_rate": 0.00013303910477818306, "loss": 1.0489, "step": 8846 }, { "epoch": 0.85, "grad_norm": 0.23253134929888603, "learning_rate": 0.00013302417323130214, "loss": 1.0339, "step": 8847 }, { "epoch": 0.85, "grad_norm": 0.32940341276625706, "learning_rate": 0.00013300924085797052, "loss": 1.0542, "step": 8848 }, { "epoch": 0.85, "grad_norm": 0.32063577062079657, "learning_rate": 0.0001329943076585619, "loss": 1.029, "step": 8849 }, { "epoch": 0.85, "grad_norm": 0.30995947199370877, "learning_rate": 0.00013297937363345, "loss": 1.0355, "step": 8850 }, { "epoch": 0.85, "grad_norm": 0.25561792258910343, "learning_rate": 0.00013296443878300858, "loss": 1.0827, "step": 8851 }, { "epoch": 0.85, "grad_norm": 0.26801538052648793, "learning_rate": 0.0001329495031076113, "loss": 1.0586, "step": 8852 }, { "epoch": 0.85, "grad_norm": 0.2728699949209647, "learning_rate": 0.00013293456660763204, "loss": 1.033, "step": 8853 }, { "epoch": 0.85, "grad_norm": 0.2807693199525417, "learning_rate": 0.00013291962928344456, "loss": 1.0447, "step": 8854 }, { "epoch": 0.85, "grad_norm": 0.30144064538223736, "learning_rate": 0.00013290469113542264, "loss": 1.0855, "step": 8855 }, { "epoch": 0.85, "grad_norm": 0.2871386350695731, "learning_rate": 0.00013288975216394015, "loss": 1.045, "step": 8856 }, { "epoch": 0.85, "grad_norm": 0.2783823690348202, "learning_rate": 0.00013287481236937094, "loss": 1.0184, "step": 8857 }, { "epoch": 0.85, "grad_norm": 0.28082328661100076, "learning_rate": 0.0001328598717520889, "loss": 1.11, "step": 8858 }, { "epoch": 0.85, "grad_norm": 0.2537605984201099, "learning_rate": 0.00013284493031246792, "loss": 1.0461, "step": 8859 }, { "epoch": 0.85, "grad_norm": 0.29693120122561123, "learning_rate": 0.00013282998805088191, "loss": 0.9376, "step": 8860 }, { "epoch": 0.85, "grad_norm": 0.2736943955631886, "learning_rate": 0.0001328150449677048, "loss": 1.1058, "step": 8861 }, { "epoch": 0.85, "grad_norm": 0.3420821362503677, "learning_rate": 0.00013280010106331058, "loss": 1.1671, "step": 8862 }, { "epoch": 0.85, "grad_norm": 0.2989694017306944, "learning_rate": 0.00013278515633807322, "loss": 1.1161, "step": 8863 }, { "epoch": 0.85, "grad_norm": 0.24965561624077762, "learning_rate": 0.00013277021079236673, "loss": 0.9428, "step": 8864 }, { "epoch": 0.85, "grad_norm": 0.27560232204493745, "learning_rate": 0.0001327552644265651, "loss": 1.0965, "step": 8865 }, { "epoch": 0.85, "grad_norm": 0.31916956119622164, "learning_rate": 0.0001327403172410424, "loss": 1.1617, "step": 8866 }, { "epoch": 0.85, "grad_norm": 0.26752857028568255, "learning_rate": 0.00013272536923617266, "loss": 0.9989, "step": 8867 }, { "epoch": 0.85, "grad_norm": 0.2883358139977724, "learning_rate": 0.00013271042041233003, "loss": 0.9731, "step": 8868 }, { "epoch": 0.85, "grad_norm": 0.33884977680481115, "learning_rate": 0.00013269547076988854, "loss": 1.1386, "step": 8869 }, { "epoch": 0.85, "grad_norm": 0.32546200445254864, "learning_rate": 0.00013268052030922237, "loss": 0.9609, "step": 8870 }, { "epoch": 0.85, "grad_norm": 0.2826007467613128, "learning_rate": 0.00013266556903070563, "loss": 1.0233, "step": 8871 }, { "epoch": 0.85, "grad_norm": 0.2778261537338506, "learning_rate": 0.00013265061693471246, "loss": 1.0323, "step": 8872 }, { "epoch": 0.85, "grad_norm": 0.26913753452272493, "learning_rate": 0.00013263566402161713, "loss": 0.9943, "step": 8873 }, { "epoch": 0.85, "grad_norm": 0.26448807283032155, "learning_rate": 0.0001326207102917938, "loss": 1.0572, "step": 8874 }, { "epoch": 0.85, "grad_norm": 0.3046847456760344, "learning_rate": 0.00013260575574561666, "loss": 1.0784, "step": 8875 }, { "epoch": 0.85, "grad_norm": 0.297455531468288, "learning_rate": 0.00013259080038345998, "loss": 1.1246, "step": 8876 }, { "epoch": 0.85, "grad_norm": 0.31150901502574146, "learning_rate": 0.0001325758442056981, "loss": 1.1541, "step": 8877 }, { "epoch": 0.85, "grad_norm": 0.32499878866127807, "learning_rate": 0.00013256088721270518, "loss": 1.1098, "step": 8878 }, { "epoch": 0.85, "grad_norm": 0.30353184520322335, "learning_rate": 0.00013254592940485562, "loss": 1.1114, "step": 8879 }, { "epoch": 0.85, "grad_norm": 0.2765014106506231, "learning_rate": 0.00013253097078252374, "loss": 1.0607, "step": 8880 }, { "epoch": 0.85, "grad_norm": 0.3232413136845846, "learning_rate": 0.00013251601134608385, "loss": 1.0267, "step": 8881 }, { "epoch": 0.85, "grad_norm": 0.2576596041228671, "learning_rate": 0.00013250105109591034, "loss": 1.1395, "step": 8882 }, { "epoch": 0.85, "grad_norm": 0.299087343065221, "learning_rate": 0.00013248609003237762, "loss": 1.0855, "step": 8883 }, { "epoch": 0.85, "grad_norm": 0.31373524675219927, "learning_rate": 0.00013247112815586008, "loss": 1.0168, "step": 8884 }, { "epoch": 0.85, "grad_norm": 0.29307123636308, "learning_rate": 0.00013245616546673212, "loss": 1.1391, "step": 8885 }, { "epoch": 0.85, "grad_norm": 0.2892716141501793, "learning_rate": 0.00013244120196536825, "loss": 1.0368, "step": 8886 }, { "epoch": 0.85, "grad_norm": 0.30030230235650646, "learning_rate": 0.0001324262376521429, "loss": 0.9953, "step": 8887 }, { "epoch": 0.85, "grad_norm": 0.2958420718954474, "learning_rate": 0.00013241127252743056, "loss": 1.0635, "step": 8888 }, { "epoch": 0.85, "grad_norm": 0.30426355904923175, "learning_rate": 0.00013239630659160577, "loss": 0.9941, "step": 8889 }, { "epoch": 0.85, "grad_norm": 0.26204874988846405, "learning_rate": 0.00013238133984504305, "loss": 1.0014, "step": 8890 }, { "epoch": 0.85, "grad_norm": 0.2808476806284864, "learning_rate": 0.00013236637228811695, "loss": 0.9811, "step": 8891 }, { "epoch": 0.85, "grad_norm": 0.2613165358148853, "learning_rate": 0.00013235140392120202, "loss": 1.0464, "step": 8892 }, { "epoch": 0.85, "grad_norm": 0.2614569034666025, "learning_rate": 0.0001323364347446729, "loss": 1.0346, "step": 8893 }, { "epoch": 0.85, "grad_norm": 0.3048725191683244, "learning_rate": 0.00013232146475890415, "loss": 1.1072, "step": 8894 }, { "epoch": 0.85, "grad_norm": 0.3051855377395641, "learning_rate": 0.00013230649396427048, "loss": 1.2126, "step": 8895 }, { "epoch": 0.85, "grad_norm": 0.2885176846905764, "learning_rate": 0.00013229152236114646, "loss": 1.1485, "step": 8896 }, { "epoch": 0.85, "grad_norm": 0.27063874711622693, "learning_rate": 0.0001322765499499068, "loss": 1.0701, "step": 8897 }, { "epoch": 0.85, "grad_norm": 0.2650234430261914, "learning_rate": 0.0001322615767309262, "loss": 0.9788, "step": 8898 }, { "epoch": 0.85, "grad_norm": 0.3332394548518117, "learning_rate": 0.00013224660270457937, "loss": 1.0501, "step": 8899 }, { "epoch": 0.85, "grad_norm": 0.2622983793530935, "learning_rate": 0.00013223162787124104, "loss": 1.0524, "step": 8900 }, { "epoch": 0.85, "grad_norm": 0.28082192393839644, "learning_rate": 0.00013221665223128593, "loss": 1.2141, "step": 8901 }, { "epoch": 0.85, "grad_norm": 0.30805730414546934, "learning_rate": 0.00013220167578508892, "loss": 1.073, "step": 8902 }, { "epoch": 0.85, "grad_norm": 0.2984108714398881, "learning_rate": 0.00013218669853302467, "loss": 1.075, "step": 8903 }, { "epoch": 0.85, "grad_norm": 0.24970867197130167, "learning_rate": 0.0001321717204754681, "loss": 0.9129, "step": 8904 }, { "epoch": 0.85, "grad_norm": 0.2976975973384336, "learning_rate": 0.00013215674161279402, "loss": 1.0718, "step": 8905 }, { "epoch": 0.85, "grad_norm": 0.26473164124738907, "learning_rate": 0.00013214176194537722, "loss": 1.1786, "step": 8906 }, { "epoch": 0.85, "grad_norm": 0.28433983890808473, "learning_rate": 0.00013212678147359267, "loss": 1.1131, "step": 8907 }, { "epoch": 0.85, "grad_norm": 0.268486990998593, "learning_rate": 0.00013211180019781518, "loss": 1.0515, "step": 8908 }, { "epoch": 0.85, "grad_norm": 0.2737650744067927, "learning_rate": 0.00013209681811841972, "loss": 1.0995, "step": 8909 }, { "epoch": 0.85, "grad_norm": 0.30007514688621423, "learning_rate": 0.00013208183523578124, "loss": 1.0624, "step": 8910 }, { "epoch": 0.85, "grad_norm": 0.26052045873934276, "learning_rate": 0.00013206685155027465, "loss": 1.1553, "step": 8911 }, { "epoch": 0.85, "grad_norm": 0.27092778505487736, "learning_rate": 0.00013205186706227498, "loss": 1.1842, "step": 8912 }, { "epoch": 0.85, "grad_norm": 0.3258021626325885, "learning_rate": 0.00013203688177215714, "loss": 1.081, "step": 8913 }, { "epoch": 0.85, "grad_norm": 0.2984371253963151, "learning_rate": 0.00013202189568029625, "loss": 1.0585, "step": 8914 }, { "epoch": 0.85, "grad_norm": 0.28356401437541234, "learning_rate": 0.00013200690878706724, "loss": 0.9656, "step": 8915 }, { "epoch": 0.85, "grad_norm": 0.27533653430669, "learning_rate": 0.00013199192109284526, "loss": 1.0106, "step": 8916 }, { "epoch": 0.85, "grad_norm": 0.28571972152442254, "learning_rate": 0.00013197693259800534, "loss": 0.9763, "step": 8917 }, { "epoch": 0.85, "grad_norm": 0.2560488227643934, "learning_rate": 0.0001319619433029226, "loss": 1.182, "step": 8918 }, { "epoch": 0.85, "grad_norm": 0.2984853514513387, "learning_rate": 0.00013194695320797214, "loss": 1.1465, "step": 8919 }, { "epoch": 0.85, "grad_norm": 0.25687016849701755, "learning_rate": 0.00013193196231352905, "loss": 0.9482, "step": 8920 }, { "epoch": 0.85, "grad_norm": 0.31648422763460987, "learning_rate": 0.00013191697061996858, "loss": 1.0639, "step": 8921 }, { "epoch": 0.85, "grad_norm": 0.2499110549279051, "learning_rate": 0.00013190197812766588, "loss": 1.0542, "step": 8922 }, { "epoch": 0.85, "grad_norm": 0.26107365347955813, "learning_rate": 0.00013188698483699608, "loss": 1.0763, "step": 8923 }, { "epoch": 0.85, "grad_norm": 0.2890340873975123, "learning_rate": 0.00013187199074833449, "loss": 1.1517, "step": 8924 }, { "epoch": 0.85, "grad_norm": 0.26683546682272963, "learning_rate": 0.00013185699586205628, "loss": 1.1019, "step": 8925 }, { "epoch": 0.85, "grad_norm": 0.2751530786283772, "learning_rate": 0.0001318420001785367, "loss": 1.0552, "step": 8926 }, { "epoch": 0.85, "grad_norm": 0.2805940758160267, "learning_rate": 0.00013182700369815108, "loss": 1.046, "step": 8927 }, { "epoch": 0.85, "grad_norm": 0.2581306634911059, "learning_rate": 0.00013181200642127468, "loss": 0.9718, "step": 8928 }, { "epoch": 0.85, "grad_norm": 0.2699851519716227, "learning_rate": 0.00013179700834828282, "loss": 1.1284, "step": 8929 }, { "epoch": 0.85, "grad_norm": 0.2735704940152805, "learning_rate": 0.00013178200947955087, "loss": 0.9873, "step": 8930 }, { "epoch": 0.85, "grad_norm": 0.29262357118516796, "learning_rate": 0.00013176700981545414, "loss": 1.052, "step": 8931 }, { "epoch": 0.85, "grad_norm": 0.3021541436530508, "learning_rate": 0.00013175200935636804, "loss": 0.9968, "step": 8932 }, { "epoch": 0.85, "grad_norm": 0.2994688515381643, "learning_rate": 0.0001317370081026679, "loss": 1.0423, "step": 8933 }, { "epoch": 0.85, "grad_norm": 0.21622128251751344, "learning_rate": 0.00013172200605472925, "loss": 1.0596, "step": 8934 }, { "epoch": 0.85, "grad_norm": 0.27357520538102, "learning_rate": 0.00013170700321292746, "loss": 1.0868, "step": 8935 }, { "epoch": 0.85, "grad_norm": 0.32314314647789527, "learning_rate": 0.00013169199957763797, "loss": 1.1866, "step": 8936 }, { "epoch": 0.86, "grad_norm": 0.27729229797866944, "learning_rate": 0.00013167699514923624, "loss": 1.0354, "step": 8937 }, { "epoch": 0.86, "grad_norm": 0.2689304675066156, "learning_rate": 0.00013166198992809784, "loss": 1.118, "step": 8938 }, { "epoch": 0.86, "grad_norm": 0.281878748265055, "learning_rate": 0.00013164698391459823, "loss": 1.0886, "step": 8939 }, { "epoch": 0.86, "grad_norm": 0.31972214001347354, "learning_rate": 0.00013163197710911294, "loss": 1.0638, "step": 8940 }, { "epoch": 0.86, "grad_norm": 0.30752537186921586, "learning_rate": 0.00013161696951201755, "loss": 1.0808, "step": 8941 }, { "epoch": 0.86, "grad_norm": 0.3169449014565689, "learning_rate": 0.00013160196112368765, "loss": 1.0815, "step": 8942 }, { "epoch": 0.86, "grad_norm": 0.279353465633034, "learning_rate": 0.00013158695194449878, "loss": 0.9856, "step": 8943 }, { "epoch": 0.86, "grad_norm": 0.2506893330731972, "learning_rate": 0.00013157194197482662, "loss": 1.0427, "step": 8944 }, { "epoch": 0.86, "grad_norm": 0.28379173348711845, "learning_rate": 0.00013155693121504676, "loss": 1.077, "step": 8945 }, { "epoch": 0.86, "grad_norm": 0.35906069380401406, "learning_rate": 0.00013154191966553488, "loss": 1.0853, "step": 8946 }, { "epoch": 0.86, "grad_norm": 0.2682913120576577, "learning_rate": 0.0001315269073266666, "loss": 1.011, "step": 8947 }, { "epoch": 0.86, "grad_norm": 0.2686741445506597, "learning_rate": 0.00013151189419881767, "loss": 1.0058, "step": 8948 }, { "epoch": 0.86, "grad_norm": 0.30034831308239196, "learning_rate": 0.00013149688028236378, "loss": 0.999, "step": 8949 }, { "epoch": 0.86, "grad_norm": 0.2785651237874469, "learning_rate": 0.00013148186557768065, "loss": 1.0743, "step": 8950 }, { "epoch": 0.86, "grad_norm": 0.26665224113450825, "learning_rate": 0.00013146685008514405, "loss": 1.1498, "step": 8951 }, { "epoch": 0.86, "grad_norm": 0.2767238879256942, "learning_rate": 0.00013145183380512977, "loss": 0.9933, "step": 8952 }, { "epoch": 0.86, "grad_norm": 0.27676417086874466, "learning_rate": 0.0001314368167380136, "loss": 1.048, "step": 8953 }, { "epoch": 0.86, "grad_norm": 0.26331762630608485, "learning_rate": 0.00013142179888417127, "loss": 1.0725, "step": 8954 }, { "epoch": 0.86, "grad_norm": 0.2744742895646326, "learning_rate": 0.00013140678024397876, "loss": 1.0698, "step": 8955 }, { "epoch": 0.86, "grad_norm": 0.3284594696993571, "learning_rate": 0.00013139176081781176, "loss": 1.1246, "step": 8956 }, { "epoch": 0.86, "grad_norm": 0.25822096562028224, "learning_rate": 0.00013137674060604627, "loss": 0.9168, "step": 8957 }, { "epoch": 0.86, "grad_norm": 0.26107725204700727, "learning_rate": 0.0001313617196090581, "loss": 1.2021, "step": 8958 }, { "epoch": 0.86, "grad_norm": 0.3179530192263277, "learning_rate": 0.0001313466978272232, "loss": 1.0834, "step": 8959 }, { "epoch": 0.86, "grad_norm": 0.2754800386668831, "learning_rate": 0.00013133167526091746, "loss": 1.0735, "step": 8960 }, { "epoch": 0.86, "grad_norm": 0.28969505946971996, "learning_rate": 0.00013131665191051686, "loss": 1.1824, "step": 8961 }, { "epoch": 0.86, "grad_norm": 0.3050269917318578, "learning_rate": 0.0001313016277763974, "loss": 1.1192, "step": 8962 }, { "epoch": 0.86, "grad_norm": 0.28997353889525096, "learning_rate": 0.00013128660285893502, "loss": 1.0988, "step": 8963 }, { "epoch": 0.86, "grad_norm": 0.2639957610030003, "learning_rate": 0.00013127157715850572, "loss": 1.095, "step": 8964 }, { "epoch": 0.86, "grad_norm": 0.25243571206061105, "learning_rate": 0.00013125655067548555, "loss": 1.0326, "step": 8965 }, { "epoch": 0.86, "grad_norm": 0.27769176135258244, "learning_rate": 0.00013124152341025057, "loss": 1.0556, "step": 8966 }, { "epoch": 0.86, "grad_norm": 0.31119684825896593, "learning_rate": 0.00013122649536317682, "loss": 1.0474, "step": 8967 }, { "epoch": 0.86, "grad_norm": 0.2899032223624125, "learning_rate": 0.0001312114665346404, "loss": 1.1379, "step": 8968 }, { "epoch": 0.86, "grad_norm": 0.26474621772474893, "learning_rate": 0.00013119643692501742, "loss": 0.9737, "step": 8969 }, { "epoch": 0.86, "grad_norm": 0.296070787125954, "learning_rate": 0.000131181406534684, "loss": 1.1303, "step": 8970 }, { "epoch": 0.86, "grad_norm": 0.28196083416898055, "learning_rate": 0.00013116637536401626, "loss": 0.9212, "step": 8971 }, { "epoch": 0.86, "grad_norm": 0.33777731349783074, "learning_rate": 0.00013115134341339042, "loss": 1.1114, "step": 8972 }, { "epoch": 0.86, "grad_norm": 0.2980471209168762, "learning_rate": 0.00013113631068318262, "loss": 1.0419, "step": 8973 }, { "epoch": 0.86, "grad_norm": 0.34067752191686806, "learning_rate": 0.00013112127717376906, "loss": 1.0489, "step": 8974 }, { "epoch": 0.86, "grad_norm": 0.2857921528011418, "learning_rate": 0.000131106242885526, "loss": 1.095, "step": 8975 }, { "epoch": 0.86, "grad_norm": 0.30595650571474364, "learning_rate": 0.0001310912078188297, "loss": 1.0579, "step": 8976 }, { "epoch": 0.86, "grad_norm": 0.3113974291276129, "learning_rate": 0.00013107617197405632, "loss": 1.1008, "step": 8977 }, { "epoch": 0.86, "grad_norm": 0.2835220329748002, "learning_rate": 0.00013106113535158223, "loss": 1.1758, "step": 8978 }, { "epoch": 0.86, "grad_norm": 0.30573600648132765, "learning_rate": 0.00013104609795178373, "loss": 1.0587, "step": 8979 }, { "epoch": 0.86, "grad_norm": 0.2724899362196218, "learning_rate": 0.00013103105977503712, "loss": 1.0202, "step": 8980 }, { "epoch": 0.86, "grad_norm": 0.28699936186440506, "learning_rate": 0.0001310160208217187, "loss": 1.1057, "step": 8981 }, { "epoch": 0.86, "grad_norm": 0.26272510332229243, "learning_rate": 0.00013100098109220486, "loss": 1.0776, "step": 8982 }, { "epoch": 0.86, "grad_norm": 0.27023197555661166, "learning_rate": 0.00013098594058687203, "loss": 1.1075, "step": 8983 }, { "epoch": 0.86, "grad_norm": 0.27849525197693725, "learning_rate": 0.00013097089930609653, "loss": 1.0556, "step": 8984 }, { "epoch": 0.86, "grad_norm": 0.2813628673155407, "learning_rate": 0.00013095585725025481, "loss": 0.9954, "step": 8985 }, { "epoch": 0.86, "grad_norm": 0.23165959095448188, "learning_rate": 0.00013094081441972333, "loss": 1.0472, "step": 8986 }, { "epoch": 0.86, "grad_norm": 0.2865879802186648, "learning_rate": 0.0001309257708148785, "loss": 1.0757, "step": 8987 }, { "epoch": 0.86, "grad_norm": 0.28003705635045895, "learning_rate": 0.00013091072643609683, "loss": 1.027, "step": 8988 }, { "epoch": 0.86, "grad_norm": 0.2518027494418857, "learning_rate": 0.0001308956812837548, "loss": 1.074, "step": 8989 }, { "epoch": 0.86, "grad_norm": 0.2578937586135656, "learning_rate": 0.0001308806353582289, "loss": 1.0831, "step": 8990 }, { "epoch": 0.86, "grad_norm": 0.27260063912350424, "learning_rate": 0.00013086558865989576, "loss": 1.0183, "step": 8991 }, { "epoch": 0.86, "grad_norm": 0.29385408837160987, "learning_rate": 0.0001308505411891318, "loss": 1.0944, "step": 8992 }, { "epoch": 0.86, "grad_norm": 0.293490265358121, "learning_rate": 0.0001308354929463137, "loss": 1.0724, "step": 8993 }, { "epoch": 0.86, "grad_norm": 0.2822276360093321, "learning_rate": 0.00013082044393181798, "loss": 1.0708, "step": 8994 }, { "epoch": 0.86, "grad_norm": 0.31600027722892315, "learning_rate": 0.0001308053941460213, "loss": 1.036, "step": 8995 }, { "epoch": 0.86, "grad_norm": 0.31974027288364265, "learning_rate": 0.00013079034358930028, "loss": 1.1244, "step": 8996 }, { "epoch": 0.86, "grad_norm": 0.2636215501400869, "learning_rate": 0.00013077529226203155, "loss": 0.9505, "step": 8997 }, { "epoch": 0.86, "grad_norm": 0.2732300022362931, "learning_rate": 0.00013076024016459177, "loss": 1.0561, "step": 8998 }, { "epoch": 0.86, "grad_norm": 0.33093910848931535, "learning_rate": 0.0001307451872973577, "loss": 1.0408, "step": 8999 }, { "epoch": 0.86, "grad_norm": 0.2880821403188908, "learning_rate": 0.00013073013366070595, "loss": 1.052, "step": 9000 }, { "epoch": 0.86, "grad_norm": 0.3061067194549959, "learning_rate": 0.0001307150792550133, "loss": 1.0279, "step": 9001 }, { "epoch": 0.86, "grad_norm": 0.2723856651427697, "learning_rate": 0.0001307000240806565, "loss": 1.0259, "step": 9002 }, { "epoch": 0.86, "grad_norm": 0.2997832873455384, "learning_rate": 0.0001306849681380123, "loss": 1.0171, "step": 9003 }, { "epoch": 0.86, "grad_norm": 0.268324862068108, "learning_rate": 0.00013066991142745746, "loss": 1.0015, "step": 9004 }, { "epoch": 0.86, "grad_norm": 0.3313758049625676, "learning_rate": 0.00013065485394936886, "loss": 1.0488, "step": 9005 }, { "epoch": 0.86, "grad_norm": 0.2840867112984875, "learning_rate": 0.00013063979570412324, "loss": 1.0547, "step": 9006 }, { "epoch": 0.86, "grad_norm": 0.2859672508903575, "learning_rate": 0.0001306247366920975, "loss": 1.0588, "step": 9007 }, { "epoch": 0.86, "grad_norm": 0.29147422535829376, "learning_rate": 0.00013060967691366844, "loss": 1.126, "step": 9008 }, { "epoch": 0.86, "grad_norm": 0.2736559351409344, "learning_rate": 0.00013059461636921298, "loss": 0.9615, "step": 9009 }, { "epoch": 0.86, "grad_norm": 0.2772933874203004, "learning_rate": 0.00013057955505910805, "loss": 1.085, "step": 9010 }, { "epoch": 0.86, "grad_norm": 0.29810412332818575, "learning_rate": 0.00013056449298373053, "loss": 1.0038, "step": 9011 }, { "epoch": 0.86, "grad_norm": 0.2844185615999324, "learning_rate": 0.00013054943014345732, "loss": 1.1034, "step": 9012 }, { "epoch": 0.86, "grad_norm": 0.3198834730415027, "learning_rate": 0.0001305343665386655, "loss": 1.0527, "step": 9013 }, { "epoch": 0.86, "grad_norm": 0.29395624791864494, "learning_rate": 0.00013051930216973192, "loss": 1.1132, "step": 9014 }, { "epoch": 0.86, "grad_norm": 0.28969238208023507, "learning_rate": 0.0001305042370370336, "loss": 0.8879, "step": 9015 }, { "epoch": 0.86, "grad_norm": 0.2718247193165485, "learning_rate": 0.0001304891711409476, "loss": 0.9514, "step": 9016 }, { "epoch": 0.86, "grad_norm": 0.2736779348162901, "learning_rate": 0.00013047410448185096, "loss": 1.0625, "step": 9017 }, { "epoch": 0.86, "grad_norm": 0.3190676200490636, "learning_rate": 0.00013045903706012066, "loss": 1.1119, "step": 9018 }, { "epoch": 0.86, "grad_norm": 0.2645211188163359, "learning_rate": 0.00013044396887613383, "loss": 1.0451, "step": 9019 }, { "epoch": 0.86, "grad_norm": 0.27409629322747, "learning_rate": 0.00013042889993026757, "loss": 1.0542, "step": 9020 }, { "epoch": 0.86, "grad_norm": 0.3057021252154314, "learning_rate": 0.00013041383022289893, "loss": 0.9845, "step": 9021 }, { "epoch": 0.86, "grad_norm": 0.31255114430184533, "learning_rate": 0.00013039875975440508, "loss": 0.923, "step": 9022 }, { "epoch": 0.86, "grad_norm": 0.28633949394067004, "learning_rate": 0.00013038368852516318, "loss": 1.0501, "step": 9023 }, { "epoch": 0.86, "grad_norm": 0.2635390208946298, "learning_rate": 0.00013036861653555038, "loss": 1.0946, "step": 9024 }, { "epoch": 0.86, "grad_norm": 0.2938192444403693, "learning_rate": 0.00013035354378594384, "loss": 1.0054, "step": 9025 }, { "epoch": 0.86, "grad_norm": 0.30536276310624666, "learning_rate": 0.0001303384702767208, "loss": 1.0497, "step": 9026 }, { "epoch": 0.86, "grad_norm": 0.2790718788746024, "learning_rate": 0.0001303233960082585, "loss": 1.0417, "step": 9027 }, { "epoch": 0.86, "grad_norm": 0.2816516990804407, "learning_rate": 0.00013030832098093412, "loss": 0.9402, "step": 9028 }, { "epoch": 0.86, "grad_norm": 0.31401103333965236, "learning_rate": 0.00013029324519512497, "loss": 0.9892, "step": 9029 }, { "epoch": 0.86, "grad_norm": 0.26148363698940397, "learning_rate": 0.00013027816865120834, "loss": 1.0242, "step": 9030 }, { "epoch": 0.86, "grad_norm": 0.2998596222624002, "learning_rate": 0.0001302630913495615, "loss": 1.0302, "step": 9031 }, { "epoch": 0.86, "grad_norm": 0.29176595806240635, "learning_rate": 0.00013024801329056178, "loss": 1.2279, "step": 9032 }, { "epoch": 0.86, "grad_norm": 0.3344098434780136, "learning_rate": 0.00013023293447458648, "loss": 1.0633, "step": 9033 }, { "epoch": 0.86, "grad_norm": 0.34296449020035136, "learning_rate": 0.00013021785490201305, "loss": 1.2539, "step": 9034 }, { "epoch": 0.86, "grad_norm": 0.2564122283684667, "learning_rate": 0.00013020277457321877, "loss": 1.0903, "step": 9035 }, { "epoch": 0.86, "grad_norm": 0.2917999377183397, "learning_rate": 0.00013018769348858107, "loss": 0.9977, "step": 9036 }, { "epoch": 0.86, "grad_norm": 0.31170385260658784, "learning_rate": 0.00013017261164847743, "loss": 0.8905, "step": 9037 }, { "epoch": 0.86, "grad_norm": 0.30015787859342813, "learning_rate": 0.00013015752905328514, "loss": 1.0676, "step": 9038 }, { "epoch": 0.86, "grad_norm": 0.43157315991514567, "learning_rate": 0.00013014244570338178, "loss": 1.1419, "step": 9039 }, { "epoch": 0.86, "grad_norm": 0.25061895805404616, "learning_rate": 0.0001301273615991448, "loss": 0.9809, "step": 9040 }, { "epoch": 0.86, "grad_norm": 0.2678433671294428, "learning_rate": 0.00013011227674095162, "loss": 0.951, "step": 9041 }, { "epoch": 0.87, "grad_norm": 0.2886930597012415, "learning_rate": 0.00013009719112917978, "loss": 1.0616, "step": 9042 }, { "epoch": 0.87, "grad_norm": 0.25785608296938134, "learning_rate": 0.00013008210476420684, "loss": 1.0351, "step": 9043 }, { "epoch": 0.87, "grad_norm": 0.25783062009926794, "learning_rate": 0.0001300670176464103, "loss": 1.0019, "step": 9044 }, { "epoch": 0.87, "grad_norm": 0.30369123418177635, "learning_rate": 0.00013005192977616777, "loss": 0.8641, "step": 9045 }, { "epoch": 0.87, "grad_norm": 0.3225428130185902, "learning_rate": 0.0001300368411538568, "loss": 1.1576, "step": 9046 }, { "epoch": 0.87, "grad_norm": 0.2967976819197218, "learning_rate": 0.00013002175177985502, "loss": 1.1703, "step": 9047 }, { "epoch": 0.87, "grad_norm": 0.3014813200032349, "learning_rate": 0.00013000666165454, "loss": 1.1789, "step": 9048 }, { "epoch": 0.87, "grad_norm": 0.2501779735414716, "learning_rate": 0.00012999157077828944, "loss": 1.105, "step": 9049 }, { "epoch": 0.87, "grad_norm": 0.32187566601028167, "learning_rate": 0.000129976479151481, "loss": 1.0835, "step": 9050 }, { "epoch": 0.87, "grad_norm": 0.27746438481780444, "learning_rate": 0.0001299613867744923, "loss": 1.0821, "step": 9051 }, { "epoch": 0.87, "grad_norm": 0.28339489212125185, "learning_rate": 0.00012994629364770102, "loss": 1.0247, "step": 9052 }, { "epoch": 0.87, "grad_norm": 0.27520392235897523, "learning_rate": 0.00012993119977148499, "loss": 0.9562, "step": 9053 }, { "epoch": 0.87, "grad_norm": 0.2575142187774334, "learning_rate": 0.0001299161051462218, "loss": 1.0923, "step": 9054 }, { "epoch": 0.87, "grad_norm": 0.2934030821339123, "learning_rate": 0.00012990100977228934, "loss": 1.0186, "step": 9055 }, { "epoch": 0.87, "grad_norm": 0.2825842909756287, "learning_rate": 0.0001298859136500653, "loss": 1.0244, "step": 9056 }, { "epoch": 0.87, "grad_norm": 0.2732801101975911, "learning_rate": 0.0001298708167799275, "loss": 1.1346, "step": 9057 }, { "epoch": 0.87, "grad_norm": 0.2835565308919408, "learning_rate": 0.0001298557191622537, "loss": 0.9529, "step": 9058 }, { "epoch": 0.87, "grad_norm": 0.32615560594023263, "learning_rate": 0.00012984062079742181, "loss": 1.1499, "step": 9059 }, { "epoch": 0.87, "grad_norm": 0.2869135232591449, "learning_rate": 0.00012982552168580962, "loss": 0.9194, "step": 9060 }, { "epoch": 0.87, "grad_norm": 0.3134787513394852, "learning_rate": 0.000129810421827795, "loss": 1.1952, "step": 9061 }, { "epoch": 0.87, "grad_norm": 0.32346472417562094, "learning_rate": 0.0001297953212237558, "loss": 1.023, "step": 9062 }, { "epoch": 0.87, "grad_norm": 0.3367904265131591, "learning_rate": 0.00012978021987407004, "loss": 1.1234, "step": 9063 }, { "epoch": 0.87, "grad_norm": 0.30432907150859745, "learning_rate": 0.0001297651177791155, "loss": 0.9762, "step": 9064 }, { "epoch": 0.87, "grad_norm": 0.3161449071160964, "learning_rate": 0.00012975001493927018, "loss": 0.9887, "step": 9065 }, { "epoch": 0.87, "grad_norm": 0.24592648677051562, "learning_rate": 0.00012973491135491206, "loss": 1.0069, "step": 9066 }, { "epoch": 0.87, "grad_norm": 0.26653722611873165, "learning_rate": 0.00012971980702641912, "loss": 1.0546, "step": 9067 }, { "epoch": 0.87, "grad_norm": 0.2862693690630182, "learning_rate": 0.00012970470195416931, "loss": 1.1177, "step": 9068 }, { "epoch": 0.87, "grad_norm": 0.29138542499171116, "learning_rate": 0.00012968959613854063, "loss": 1.1238, "step": 9069 }, { "epoch": 0.87, "grad_norm": 0.2859847406201628, "learning_rate": 0.0001296744895799112, "loss": 1.047, "step": 9070 }, { "epoch": 0.87, "grad_norm": 0.2807399557511384, "learning_rate": 0.000129659382278659, "loss": 0.9589, "step": 9071 }, { "epoch": 0.87, "grad_norm": 0.28450650670607563, "learning_rate": 0.0001296442742351621, "loss": 0.9999, "step": 9072 }, { "epoch": 0.87, "grad_norm": 0.31676772977788115, "learning_rate": 0.0001296291654497986, "loss": 0.978, "step": 9073 }, { "epoch": 0.87, "grad_norm": 0.2784851835452743, "learning_rate": 0.00012961405592294665, "loss": 1.0332, "step": 9074 }, { "epoch": 0.87, "grad_norm": 0.26832152402985765, "learning_rate": 0.0001295989456549843, "loss": 1.0811, "step": 9075 }, { "epoch": 0.87, "grad_norm": 0.29162865744847255, "learning_rate": 0.00012958383464628975, "loss": 1.1361, "step": 9076 }, { "epoch": 0.87, "grad_norm": 0.26672809187008667, "learning_rate": 0.00012956872289724116, "loss": 1.0796, "step": 9077 }, { "epoch": 0.87, "grad_norm": 0.27076549176531384, "learning_rate": 0.0001295536104082167, "loss": 1.1287, "step": 9078 }, { "epoch": 0.87, "grad_norm": 0.28696821632322866, "learning_rate": 0.00012953849717959454, "loss": 1.1795, "step": 9079 }, { "epoch": 0.87, "grad_norm": 0.2955303786908336, "learning_rate": 0.00012952338321175293, "loss": 1.0168, "step": 9080 }, { "epoch": 0.87, "grad_norm": 0.3039172006339761, "learning_rate": 0.00012950826850507011, "loss": 0.9734, "step": 9081 }, { "epoch": 0.87, "grad_norm": 0.2680076109464228, "learning_rate": 0.00012949315305992433, "loss": 1.1325, "step": 9082 }, { "epoch": 0.87, "grad_norm": 0.3197355029722075, "learning_rate": 0.00012947803687669385, "loss": 1.1474, "step": 9083 }, { "epoch": 0.87, "grad_norm": 0.260643212258335, "learning_rate": 0.00012946291995575697, "loss": 1.0691, "step": 9084 }, { "epoch": 0.87, "grad_norm": 0.28094689856026667, "learning_rate": 0.00012944780229749201, "loss": 1.1027, "step": 9085 }, { "epoch": 0.87, "grad_norm": 0.3354262462770161, "learning_rate": 0.00012943268390227727, "loss": 0.9943, "step": 9086 }, { "epoch": 0.87, "grad_norm": 0.2863437127555651, "learning_rate": 0.00012941756477049114, "loss": 1.101, "step": 9087 }, { "epoch": 0.87, "grad_norm": 0.24300912831153612, "learning_rate": 0.00012940244490251197, "loss": 0.9077, "step": 9088 }, { "epoch": 0.87, "grad_norm": 0.259039073094172, "learning_rate": 0.0001293873242987181, "loss": 1.117, "step": 9089 }, { "epoch": 0.87, "grad_norm": 0.2512219349760124, "learning_rate": 0.000129372202959488, "loss": 0.9785, "step": 9090 }, { "epoch": 0.87, "grad_norm": 0.27273837493945713, "learning_rate": 0.00012935708088520007, "loss": 0.9931, "step": 9091 }, { "epoch": 0.87, "grad_norm": 0.27530538774617, "learning_rate": 0.0001293419580762327, "loss": 1.0153, "step": 9092 }, { "epoch": 0.87, "grad_norm": 0.2926747917554771, "learning_rate": 0.0001293268345329644, "loss": 1.0461, "step": 9093 }, { "epoch": 0.87, "grad_norm": 0.2500212462256167, "learning_rate": 0.00012931171025577366, "loss": 1.0736, "step": 9094 }, { "epoch": 0.87, "grad_norm": 0.29111956874370015, "learning_rate": 0.00012929658524503894, "loss": 1.0124, "step": 9095 }, { "epoch": 0.87, "grad_norm": 0.263782277974996, "learning_rate": 0.00012928145950113877, "loss": 0.9819, "step": 9096 }, { "epoch": 0.87, "grad_norm": 0.2905200592972542, "learning_rate": 0.00012926633302445164, "loss": 1.0002, "step": 9097 }, { "epoch": 0.87, "grad_norm": 0.29496184216102744, "learning_rate": 0.00012925120581535614, "loss": 1.0787, "step": 9098 }, { "epoch": 0.87, "grad_norm": 0.3117510201468771, "learning_rate": 0.00012923607787423085, "loss": 1.0186, "step": 9099 }, { "epoch": 0.87, "grad_norm": 0.32624449591492205, "learning_rate": 0.00012922094920145432, "loss": 1.1453, "step": 9100 }, { "epoch": 0.87, "grad_norm": 0.33313379774620716, "learning_rate": 0.0001292058197974052, "loss": 1.1103, "step": 9101 }, { "epoch": 0.87, "grad_norm": 0.30476232541112913, "learning_rate": 0.0001291906896624621, "loss": 1.0197, "step": 9102 }, { "epoch": 0.87, "grad_norm": 0.2966527557371042, "learning_rate": 0.00012917555879700358, "loss": 1.0609, "step": 9103 }, { "epoch": 0.87, "grad_norm": 0.3050234189493314, "learning_rate": 0.0001291604272014084, "loss": 1.098, "step": 9104 }, { "epoch": 0.87, "grad_norm": 0.27720296715762255, "learning_rate": 0.0001291452948760552, "loss": 1.0512, "step": 9105 }, { "epoch": 0.87, "grad_norm": 0.3042006172159329, "learning_rate": 0.00012913016182132268, "loss": 0.9907, "step": 9106 }, { "epoch": 0.87, "grad_norm": 0.3068804882948529, "learning_rate": 0.00012911502803758954, "loss": 1.1273, "step": 9107 }, { "epoch": 0.87, "grad_norm": 0.25437915672597605, "learning_rate": 0.00012909989352523455, "loss": 1.0053, "step": 9108 }, { "epoch": 0.87, "grad_norm": 0.2588485696040596, "learning_rate": 0.00012908475828463643, "loss": 1.069, "step": 9109 }, { "epoch": 0.87, "grad_norm": 0.288489014040999, "learning_rate": 0.00012906962231617396, "loss": 1.1162, "step": 9110 }, { "epoch": 0.87, "grad_norm": 0.2902499883183751, "learning_rate": 0.00012905448562022592, "loss": 0.959, "step": 9111 }, { "epoch": 0.87, "grad_norm": 0.2885389820207888, "learning_rate": 0.00012903934819717108, "loss": 1.0781, "step": 9112 }, { "epoch": 0.87, "grad_norm": 0.2434151493288888, "learning_rate": 0.00012902421004738833, "loss": 1.1026, "step": 9113 }, { "epoch": 0.87, "grad_norm": 0.27724613447577373, "learning_rate": 0.0001290090711712565, "loss": 0.9479, "step": 9114 }, { "epoch": 0.87, "grad_norm": 0.2638723940275514, "learning_rate": 0.00012899393156915438, "loss": 1.0193, "step": 9115 }, { "epoch": 0.87, "grad_norm": 0.3146091758887233, "learning_rate": 0.00012897879124146094, "loss": 1.0415, "step": 9116 }, { "epoch": 0.87, "grad_norm": 0.2781231516386979, "learning_rate": 0.00012896365018855502, "loss": 1.1148, "step": 9117 }, { "epoch": 0.87, "grad_norm": 0.2814021450123451, "learning_rate": 0.00012894850841081555, "loss": 0.9947, "step": 9118 }, { "epoch": 0.87, "grad_norm": 0.2951572978095963, "learning_rate": 0.0001289333659086215, "loss": 1.0666, "step": 9119 }, { "epoch": 0.87, "grad_norm": 0.2789547266879171, "learning_rate": 0.00012891822268235175, "loss": 1.1119, "step": 9120 }, { "epoch": 0.87, "grad_norm": 0.2944178727397588, "learning_rate": 0.0001289030787323853, "loss": 1.0622, "step": 9121 }, { "epoch": 0.87, "grad_norm": 0.3002264804462691, "learning_rate": 0.00012888793405910117, "loss": 1.1301, "step": 9122 }, { "epoch": 0.87, "grad_norm": 0.29596892828518445, "learning_rate": 0.0001288727886628783, "loss": 0.9955, "step": 9123 }, { "epoch": 0.87, "grad_norm": 0.28581897006530516, "learning_rate": 0.00012885764254409577, "loss": 1.0991, "step": 9124 }, { "epoch": 0.87, "grad_norm": 0.2848802047710548, "learning_rate": 0.0001288424957031326, "loss": 1.1973, "step": 9125 }, { "epoch": 0.87, "grad_norm": 0.30273121467305963, "learning_rate": 0.00012882734814036783, "loss": 1.1433, "step": 9126 }, { "epoch": 0.87, "grad_norm": 0.26901216004525036, "learning_rate": 0.00012881219985618058, "loss": 0.9651, "step": 9127 }, { "epoch": 0.87, "grad_norm": 0.29357748445834536, "learning_rate": 0.0001287970508509499, "loss": 1.0533, "step": 9128 }, { "epoch": 0.87, "grad_norm": 0.28302245682622934, "learning_rate": 0.00012878190112505496, "loss": 1.1715, "step": 9129 }, { "epoch": 0.87, "grad_norm": 0.32220788805164724, "learning_rate": 0.0001287667506788748, "loss": 1.037, "step": 9130 }, { "epoch": 0.87, "grad_norm": 0.2838656933293558, "learning_rate": 0.00012875159951278867, "loss": 0.9909, "step": 9131 }, { "epoch": 0.87, "grad_norm": 0.31812680763347256, "learning_rate": 0.0001287364476271757, "loss": 1.0599, "step": 9132 }, { "epoch": 0.87, "grad_norm": 0.2729154895834484, "learning_rate": 0.00012872129502241502, "loss": 1.0977, "step": 9133 }, { "epoch": 0.87, "grad_norm": 0.3125996237172212, "learning_rate": 0.0001287061416988859, "loss": 1.0514, "step": 9134 }, { "epoch": 0.87, "grad_norm": 0.2807631918763277, "learning_rate": 0.00012869098765696757, "loss": 1.051, "step": 9135 }, { "epoch": 0.87, "grad_norm": 0.31219828383217524, "learning_rate": 0.0001286758328970392, "loss": 1.0464, "step": 9136 }, { "epoch": 0.87, "grad_norm": 0.2639358482520139, "learning_rate": 0.0001286606774194801, "loss": 1.0578, "step": 9137 }, { "epoch": 0.87, "grad_norm": 0.2974661876294612, "learning_rate": 0.00012864552122466956, "loss": 0.9595, "step": 9138 }, { "epoch": 0.87, "grad_norm": 0.2926598180262982, "learning_rate": 0.00012863036431298684, "loss": 1.1708, "step": 9139 }, { "epoch": 0.87, "grad_norm": 0.25929024999451794, "learning_rate": 0.00012861520668481122, "loss": 0.9799, "step": 9140 }, { "epoch": 0.87, "grad_norm": 0.29654960563184474, "learning_rate": 0.0001286000483405221, "loss": 1.1523, "step": 9141 }, { "epoch": 0.87, "grad_norm": 0.27691848301002864, "learning_rate": 0.00012858488928049882, "loss": 1.0334, "step": 9142 }, { "epoch": 0.87, "grad_norm": 0.29295271269701484, "learning_rate": 0.00012856972950512068, "loss": 1.0546, "step": 9143 }, { "epoch": 0.87, "grad_norm": 0.2803146245701744, "learning_rate": 0.00012855456901476712, "loss": 0.9898, "step": 9144 }, { "epoch": 0.87, "grad_norm": 0.2863989751594063, "learning_rate": 0.00012853940780981751, "loss": 1.1504, "step": 9145 }, { "epoch": 0.88, "grad_norm": 0.2739972569019825, "learning_rate": 0.00012852424589065132, "loss": 0.923, "step": 9146 }, { "epoch": 0.88, "grad_norm": 0.25052615701520137, "learning_rate": 0.0001285090832576479, "loss": 1.0061, "step": 9147 }, { "epoch": 0.88, "grad_norm": 0.3030142194521633, "learning_rate": 0.00012849391991118683, "loss": 1.0694, "step": 9148 }, { "epoch": 0.88, "grad_norm": 0.2714696456433213, "learning_rate": 0.00012847875585164745, "loss": 1.0009, "step": 9149 }, { "epoch": 0.88, "grad_norm": 0.28875446135458116, "learning_rate": 0.00012846359107940931, "loss": 1.1649, "step": 9150 }, { "epoch": 0.88, "grad_norm": 0.3150764287272924, "learning_rate": 0.00012844842559485192, "loss": 0.9091, "step": 9151 }, { "epoch": 0.88, "grad_norm": 0.27701993372597217, "learning_rate": 0.00012843325939835483, "loss": 1.083, "step": 9152 }, { "epoch": 0.88, "grad_norm": 0.29338798312190123, "learning_rate": 0.00012841809249029747, "loss": 0.9318, "step": 9153 }, { "epoch": 0.88, "grad_norm": 0.30076117068870556, "learning_rate": 0.00012840292487105955, "loss": 1.0577, "step": 9154 }, { "epoch": 0.88, "grad_norm": 0.26916632631906984, "learning_rate": 0.0001283877565410206, "loss": 1.0901, "step": 9155 }, { "epoch": 0.88, "grad_norm": 0.2863676566010998, "learning_rate": 0.00012837258750056016, "loss": 1.0916, "step": 9156 }, { "epoch": 0.88, "grad_norm": 0.311513288826919, "learning_rate": 0.0001283574177500579, "loss": 0.9895, "step": 9157 }, { "epoch": 0.88, "grad_norm": 0.2788747044437777, "learning_rate": 0.00012834224728989344, "loss": 1.0058, "step": 9158 }, { "epoch": 0.88, "grad_norm": 0.31246123148504296, "learning_rate": 0.00012832707612044642, "loss": 1.1208, "step": 9159 }, { "epoch": 0.88, "grad_norm": 0.3153487374039508, "learning_rate": 0.00012831190424209655, "loss": 1.0619, "step": 9160 }, { "epoch": 0.88, "grad_norm": 0.2979832645966896, "learning_rate": 0.00012829673165522343, "loss": 1.0788, "step": 9161 }, { "epoch": 0.88, "grad_norm": 0.27761648021394475, "learning_rate": 0.00012828155836020687, "loss": 1.0676, "step": 9162 }, { "epoch": 0.88, "grad_norm": 0.2966190620374098, "learning_rate": 0.00012826638435742654, "loss": 1.127, "step": 9163 }, { "epoch": 0.88, "grad_norm": 0.2677449239886606, "learning_rate": 0.0001282512096472621, "loss": 1.1182, "step": 9164 }, { "epoch": 0.88, "grad_norm": 0.2955895529683899, "learning_rate": 0.00012823603423009347, "loss": 0.8931, "step": 9165 }, { "epoch": 0.88, "grad_norm": 0.2864122311531748, "learning_rate": 0.0001282208581063003, "loss": 1.1078, "step": 9166 }, { "epoch": 0.88, "grad_norm": 0.2901266851305628, "learning_rate": 0.00012820568127626242, "loss": 1.0239, "step": 9167 }, { "epoch": 0.88, "grad_norm": 0.3049079996092775, "learning_rate": 0.00012819050374035962, "loss": 1.0681, "step": 9168 }, { "epoch": 0.88, "grad_norm": 0.27371968074107056, "learning_rate": 0.0001281753254989718, "loss": 1.0393, "step": 9169 }, { "epoch": 0.88, "grad_norm": 0.31873095311089905, "learning_rate": 0.0001281601465524787, "loss": 1.0495, "step": 9170 }, { "epoch": 0.88, "grad_norm": 0.33430466740169595, "learning_rate": 0.00012814496690126027, "loss": 1.0828, "step": 9171 }, { "epoch": 0.88, "grad_norm": 0.2793091890823801, "learning_rate": 0.00012812978654569635, "loss": 1.0, "step": 9172 }, { "epoch": 0.88, "grad_norm": 0.28127672160759526, "learning_rate": 0.00012811460548616682, "loss": 1.1068, "step": 9173 }, { "epoch": 0.88, "grad_norm": 0.2822256164444625, "learning_rate": 0.00012809942372305164, "loss": 1.1039, "step": 9174 }, { "epoch": 0.88, "grad_norm": 0.2989368506283751, "learning_rate": 0.0001280842412567307, "loss": 1.0661, "step": 9175 }, { "epoch": 0.88, "grad_norm": 0.2787834185347091, "learning_rate": 0.000128069058087584, "loss": 1.0754, "step": 9176 }, { "epoch": 0.88, "grad_norm": 0.26459335862211264, "learning_rate": 0.00012805387421599144, "loss": 1.0921, "step": 9177 }, { "epoch": 0.88, "grad_norm": 0.2717879411221796, "learning_rate": 0.0001280386896423331, "loss": 1.0256, "step": 9178 }, { "epoch": 0.88, "grad_norm": 0.3392781499730835, "learning_rate": 0.00012802350436698888, "loss": 0.963, "step": 9179 }, { "epoch": 0.88, "grad_norm": 0.3008890143060967, "learning_rate": 0.0001280083183903389, "loss": 1.0453, "step": 9180 }, { "epoch": 0.88, "grad_norm": 0.2859398503777074, "learning_rate": 0.00012799313171276308, "loss": 1.095, "step": 9181 }, { "epoch": 0.88, "grad_norm": 0.269640842666525, "learning_rate": 0.0001279779443346416, "loss": 1.0724, "step": 9182 }, { "epoch": 0.88, "grad_norm": 0.28038845119609257, "learning_rate": 0.0001279627562563545, "loss": 1.0738, "step": 9183 }, { "epoch": 0.88, "grad_norm": 0.2887167863295914, "learning_rate": 0.00012794756747828179, "loss": 1.0641, "step": 9184 }, { "epoch": 0.88, "grad_norm": 0.29441549775626563, "learning_rate": 0.00012793237800080365, "loss": 1.1956, "step": 9185 }, { "epoch": 0.88, "grad_norm": 0.2614449210833528, "learning_rate": 0.00012791718782430024, "loss": 0.9941, "step": 9186 }, { "epoch": 0.88, "grad_norm": 0.27898220786899924, "learning_rate": 0.00012790199694915163, "loss": 1.0125, "step": 9187 }, { "epoch": 0.88, "grad_norm": 0.2623293441843174, "learning_rate": 0.000127886805375738, "loss": 1.018, "step": 9188 }, { "epoch": 0.88, "grad_norm": 0.24474445860858335, "learning_rate": 0.00012787161310443958, "loss": 0.9925, "step": 9189 }, { "epoch": 0.88, "grad_norm": 0.26691886103805196, "learning_rate": 0.0001278564201356365, "loss": 1.0262, "step": 9190 }, { "epoch": 0.88, "grad_norm": 0.2996480479285695, "learning_rate": 0.000127841226469709, "loss": 1.1404, "step": 9191 }, { "epoch": 0.88, "grad_norm": 0.2831005610084299, "learning_rate": 0.0001278260321070373, "loss": 1.0199, "step": 9192 }, { "epoch": 0.88, "grad_norm": 0.3016403069097424, "learning_rate": 0.00012781083704800167, "loss": 1.1395, "step": 9193 }, { "epoch": 0.88, "grad_norm": 0.258599684442815, "learning_rate": 0.00012779564129298233, "loss": 1.0072, "step": 9194 }, { "epoch": 0.88, "grad_norm": 0.28981820294114957, "learning_rate": 0.00012778044484235964, "loss": 1.0872, "step": 9195 }, { "epoch": 0.88, "grad_norm": 0.2944671368276984, "learning_rate": 0.0001277652476965139, "loss": 0.9736, "step": 9196 }, { "epoch": 0.88, "grad_norm": 0.32250052614896707, "learning_rate": 0.0001277500498558253, "loss": 1.0861, "step": 9197 }, { "epoch": 0.88, "grad_norm": 0.33413812354501804, "learning_rate": 0.00012773485132067428, "loss": 0.9882, "step": 9198 }, { "epoch": 0.88, "grad_norm": 0.2396696634982347, "learning_rate": 0.00012771965209144122, "loss": 0.999, "step": 9199 }, { "epoch": 0.88, "grad_norm": 0.2871461793248707, "learning_rate": 0.00012770445216850638, "loss": 0.9985, "step": 9200 }, { "epoch": 0.88, "grad_norm": 0.27681646471608834, "learning_rate": 0.00012768925155225025, "loss": 0.9664, "step": 9201 }, { "epoch": 0.88, "grad_norm": 0.27957278139129027, "learning_rate": 0.00012767405024305322, "loss": 1.0305, "step": 9202 }, { "epoch": 0.88, "grad_norm": 0.28753086690595353, "learning_rate": 0.00012765884824129565, "loss": 1.0237, "step": 9203 }, { "epoch": 0.88, "grad_norm": 0.3111440689789316, "learning_rate": 0.000127643645547358, "loss": 1.2017, "step": 9204 }, { "epoch": 0.88, "grad_norm": 0.2921443717707693, "learning_rate": 0.0001276284421616208, "loss": 1.029, "step": 9205 }, { "epoch": 0.88, "grad_norm": 0.30261904265294104, "learning_rate": 0.00012761323808446447, "loss": 1.0758, "step": 9206 }, { "epoch": 0.88, "grad_norm": 0.25772792131049205, "learning_rate": 0.00012759803331626948, "loss": 1.0139, "step": 9207 }, { "epoch": 0.88, "grad_norm": 0.3046831790519165, "learning_rate": 0.00012758282785741638, "loss": 1.0825, "step": 9208 }, { "epoch": 0.88, "grad_norm": 0.3183296731093666, "learning_rate": 0.00012756762170828566, "loss": 0.9899, "step": 9209 }, { "epoch": 0.88, "grad_norm": 0.2817939566711674, "learning_rate": 0.0001275524148692579, "loss": 0.9752, "step": 9210 }, { "epoch": 0.88, "grad_norm": 0.2665373598422484, "learning_rate": 0.0001275372073407136, "loss": 1.0575, "step": 9211 }, { "epoch": 0.88, "grad_norm": 0.2725700632449814, "learning_rate": 0.00012752199912303345, "loss": 1.0651, "step": 9212 }, { "epoch": 0.88, "grad_norm": 0.29400308946907394, "learning_rate": 0.00012750679021659794, "loss": 1.0696, "step": 9213 }, { "epoch": 0.88, "grad_norm": 0.297641959611062, "learning_rate": 0.00012749158062178769, "loss": 1.0051, "step": 9214 }, { "epoch": 0.88, "grad_norm": 0.24521868429916097, "learning_rate": 0.0001274763703389834, "loss": 0.9303, "step": 9215 }, { "epoch": 0.88, "grad_norm": 0.29297299579392244, "learning_rate": 0.00012746115936856564, "loss": 1.1431, "step": 9216 }, { "epoch": 0.88, "grad_norm": 0.26851527665671227, "learning_rate": 0.00012744594771091513, "loss": 1.0198, "step": 9217 }, { "epoch": 0.88, "grad_norm": 0.2819204613169668, "learning_rate": 0.0001274307353664125, "loss": 1.1456, "step": 9218 }, { "epoch": 0.88, "grad_norm": 0.3038986733258005, "learning_rate": 0.00012741552233543852, "loss": 0.9887, "step": 9219 }, { "epoch": 0.88, "grad_norm": 0.3047117203847732, "learning_rate": 0.0001274003086183738, "loss": 1.069, "step": 9220 }, { "epoch": 0.88, "grad_norm": 0.2920407974764749, "learning_rate": 0.0001273850942155992, "loss": 1.113, "step": 9221 }, { "epoch": 0.88, "grad_norm": 0.28997161723843756, "learning_rate": 0.0001273698791274954, "loss": 0.9928, "step": 9222 }, { "epoch": 0.88, "grad_norm": 0.2601062102739676, "learning_rate": 0.00012735466335444314, "loss": 1.0277, "step": 9223 }, { "epoch": 0.88, "grad_norm": 0.24196462571085023, "learning_rate": 0.00012733944689682325, "loss": 1.0118, "step": 9224 }, { "epoch": 0.88, "grad_norm": 0.27692067285133026, "learning_rate": 0.00012732422975501653, "loss": 1.1095, "step": 9225 }, { "epoch": 0.88, "grad_norm": 0.25760809183221073, "learning_rate": 0.0001273090119294038, "loss": 0.9548, "step": 9226 }, { "epoch": 0.88, "grad_norm": 0.25859993613149374, "learning_rate": 0.00012729379342036587, "loss": 0.9637, "step": 9227 }, { "epoch": 0.88, "grad_norm": 0.2540462826516634, "learning_rate": 0.00012727857422828359, "loss": 1.1307, "step": 9228 }, { "epoch": 0.88, "grad_norm": 0.2921227707514754, "learning_rate": 0.00012726335435353785, "loss": 1.1008, "step": 9229 }, { "epoch": 0.88, "grad_norm": 0.2984915373574552, "learning_rate": 0.00012724813379650954, "loss": 1.0848, "step": 9230 }, { "epoch": 0.88, "grad_norm": 0.3095681979770164, "learning_rate": 0.00012723291255757957, "loss": 1.1033, "step": 9231 }, { "epoch": 0.88, "grad_norm": 0.28318769682527445, "learning_rate": 0.00012721769063712884, "loss": 1.0971, "step": 9232 }, { "epoch": 0.88, "grad_norm": 0.27525707454107234, "learning_rate": 0.00012720246803553828, "loss": 1.0534, "step": 9233 }, { "epoch": 0.88, "grad_norm": 0.28825651516813144, "learning_rate": 0.0001271872447531889, "loss": 1.1621, "step": 9234 }, { "epoch": 0.88, "grad_norm": 0.3010094105143729, "learning_rate": 0.0001271720207904616, "loss": 1.072, "step": 9235 }, { "epoch": 0.88, "grad_norm": 0.24956098318442146, "learning_rate": 0.00012715679614773738, "loss": 1.2222, "step": 9236 }, { "epoch": 0.88, "grad_norm": 0.2764944338239069, "learning_rate": 0.00012714157082539733, "loss": 1.0995, "step": 9237 }, { "epoch": 0.88, "grad_norm": 0.27634606116433363, "learning_rate": 0.00012712634482382238, "loss": 1.036, "step": 9238 }, { "epoch": 0.88, "grad_norm": 0.2794870761616325, "learning_rate": 0.0001271111181433936, "loss": 1.1254, "step": 9239 }, { "epoch": 0.88, "grad_norm": 0.2601345404562058, "learning_rate": 0.00012709589078449204, "loss": 1.1053, "step": 9240 }, { "epoch": 0.88, "grad_norm": 0.2622935878916725, "learning_rate": 0.0001270806627474988, "loss": 1.0455, "step": 9241 }, { "epoch": 0.88, "grad_norm": 0.29883490193917805, "learning_rate": 0.00012706543403279497, "loss": 0.9762, "step": 9242 }, { "epoch": 0.88, "grad_norm": 0.22869752618561462, "learning_rate": 0.0001270502046407616, "loss": 0.9916, "step": 9243 }, { "epoch": 0.88, "grad_norm": 0.29444348606081083, "learning_rate": 0.00012703497457177988, "loss": 0.9847, "step": 9244 }, { "epoch": 0.88, "grad_norm": 0.2938071041973925, "learning_rate": 0.00012701974382623094, "loss": 1.0545, "step": 9245 }, { "epoch": 0.88, "grad_norm": 0.26894685253793144, "learning_rate": 0.00012700451240449593, "loss": 1.0102, "step": 9246 }, { "epoch": 0.88, "grad_norm": 0.3295306409371883, "learning_rate": 0.00012698928030695602, "loss": 1.1135, "step": 9247 }, { "epoch": 0.88, "grad_norm": 0.29519953492147355, "learning_rate": 0.0001269740475339924, "loss": 1.1716, "step": 9248 }, { "epoch": 0.88, "grad_norm": 0.31644779195394845, "learning_rate": 0.0001269588140859863, "loss": 1.0338, "step": 9249 }, { "epoch": 0.88, "grad_norm": 0.28862661343334506, "learning_rate": 0.00012694357996331893, "loss": 0.9805, "step": 9250 }, { "epoch": 0.89, "grad_norm": 0.29109092097690287, "learning_rate": 0.00012692834516637156, "loss": 1.0948, "step": 9251 }, { "epoch": 0.89, "grad_norm": 0.2755208136905377, "learning_rate": 0.00012691310969552538, "loss": 1.0859, "step": 9252 }, { "epoch": 0.89, "grad_norm": 0.2947036722853116, "learning_rate": 0.00012689787355116177, "loss": 1.0358, "step": 9253 }, { "epoch": 0.89, "grad_norm": 0.2871135631256799, "learning_rate": 0.00012688263673366195, "loss": 1.0249, "step": 9254 }, { "epoch": 0.89, "grad_norm": 0.2562542263019815, "learning_rate": 0.0001268673992434072, "loss": 1.0182, "step": 9255 }, { "epoch": 0.89, "grad_norm": 0.31659182199731395, "learning_rate": 0.00012685216108077895, "loss": 1.0291, "step": 9256 }, { "epoch": 0.89, "grad_norm": 0.2933670124167318, "learning_rate": 0.0001268369222461585, "loss": 1.0631, "step": 9257 }, { "epoch": 0.89, "grad_norm": 0.2657225784590937, "learning_rate": 0.0001268216827399272, "loss": 1.119, "step": 9258 }, { "epoch": 0.89, "grad_norm": 0.2797518168945411, "learning_rate": 0.00012680644256246642, "loss": 0.9506, "step": 9259 }, { "epoch": 0.89, "grad_norm": 0.3020240632022724, "learning_rate": 0.00012679120171415757, "loss": 0.9763, "step": 9260 }, { "epoch": 0.89, "grad_norm": 0.2637706589429481, "learning_rate": 0.00012677596019538206, "loss": 1.0194, "step": 9261 }, { "epoch": 0.89, "grad_norm": 0.31418144937011505, "learning_rate": 0.0001267607180065213, "loss": 1.1327, "step": 9262 }, { "epoch": 0.89, "grad_norm": 0.2952874649931668, "learning_rate": 0.00012674547514795675, "loss": 1.0817, "step": 9263 }, { "epoch": 0.89, "grad_norm": 0.3150374662852804, "learning_rate": 0.00012673023162006989, "loss": 1.0925, "step": 9264 }, { "epoch": 0.89, "grad_norm": 0.3198349522638215, "learning_rate": 0.0001267149874232422, "loss": 1.1409, "step": 9265 }, { "epoch": 0.89, "grad_norm": 0.25854203157035266, "learning_rate": 0.00012669974255785516, "loss": 1.138, "step": 9266 }, { "epoch": 0.89, "grad_norm": 0.2608990015480636, "learning_rate": 0.00012668449702429028, "loss": 1.0271, "step": 9267 }, { "epoch": 0.89, "grad_norm": 0.29909886728393076, "learning_rate": 0.0001266692508229291, "loss": 1.0969, "step": 9268 }, { "epoch": 0.89, "grad_norm": 0.23909051250886207, "learning_rate": 0.0001266540039541531, "loss": 0.9744, "step": 9269 }, { "epoch": 0.89, "grad_norm": 0.28087485327219586, "learning_rate": 0.00012663875641834394, "loss": 1.1004, "step": 9270 }, { "epoch": 0.89, "grad_norm": 0.2552216763566098, "learning_rate": 0.0001266235082158832, "loss": 0.9802, "step": 9271 }, { "epoch": 0.89, "grad_norm": 0.28763301871999136, "learning_rate": 0.00012660825934715235, "loss": 1.0887, "step": 9272 }, { "epoch": 0.89, "grad_norm": 0.29630933468752285, "learning_rate": 0.00012659300981253315, "loss": 1.1228, "step": 9273 }, { "epoch": 0.89, "grad_norm": 0.29194868948582664, "learning_rate": 0.00012657775961240713, "loss": 1.0174, "step": 9274 }, { "epoch": 0.89, "grad_norm": 0.2617941239283223, "learning_rate": 0.000126562508747156, "loss": 1.0721, "step": 9275 }, { "epoch": 0.89, "grad_norm": 0.28279751520459484, "learning_rate": 0.00012654725721716138, "loss": 1.0261, "step": 9276 }, { "epoch": 0.89, "grad_norm": 0.30103442627581245, "learning_rate": 0.00012653200502280498, "loss": 1.1181, "step": 9277 }, { "epoch": 0.89, "grad_norm": 0.27808862986365707, "learning_rate": 0.00012651675216446848, "loss": 1.0745, "step": 9278 }, { "epoch": 0.89, "grad_norm": 0.29656831264442357, "learning_rate": 0.00012650149864253357, "loss": 1.1474, "step": 9279 }, { "epoch": 0.89, "grad_norm": 0.27503893982251604, "learning_rate": 0.000126486244457382, "loss": 1.0173, "step": 9280 }, { "epoch": 0.89, "grad_norm": 0.2657056986153241, "learning_rate": 0.00012647098960939554, "loss": 0.9806, "step": 9281 }, { "epoch": 0.89, "grad_norm": 0.25653477829913407, "learning_rate": 0.0001264557340989559, "loss": 1.0358, "step": 9282 }, { "epoch": 0.89, "grad_norm": 0.29619037014025956, "learning_rate": 0.0001264404779264449, "loss": 1.1536, "step": 9283 }, { "epoch": 0.89, "grad_norm": 0.2822157801841811, "learning_rate": 0.00012642522109224434, "loss": 1.1011, "step": 9284 }, { "epoch": 0.89, "grad_norm": 0.31455283414554064, "learning_rate": 0.000126409963596736, "loss": 0.9813, "step": 9285 }, { "epoch": 0.89, "grad_norm": 0.2775822826731524, "learning_rate": 0.0001263947054403017, "loss": 0.951, "step": 9286 }, { "epoch": 0.89, "grad_norm": 0.2575236283104343, "learning_rate": 0.00012637944662332332, "loss": 0.9371, "step": 9287 }, { "epoch": 0.89, "grad_norm": 0.24149231079738087, "learning_rate": 0.00012636418714618273, "loss": 0.9715, "step": 9288 }, { "epoch": 0.89, "grad_norm": 0.2831753975004831, "learning_rate": 0.00012634892700926178, "loss": 1.0039, "step": 9289 }, { "epoch": 0.89, "grad_norm": 0.29422901876914576, "learning_rate": 0.00012633366621294238, "loss": 1.1093, "step": 9290 }, { "epoch": 0.89, "grad_norm": 0.27392177332876777, "learning_rate": 0.00012631840475760644, "loss": 1.1547, "step": 9291 }, { "epoch": 0.89, "grad_norm": 0.27799764214008904, "learning_rate": 0.00012630314264363584, "loss": 1.1274, "step": 9292 }, { "epoch": 0.89, "grad_norm": 0.31072988212098307, "learning_rate": 0.0001262878798714126, "loss": 1.0469, "step": 9293 }, { "epoch": 0.89, "grad_norm": 0.31492709765160437, "learning_rate": 0.00012627261644131862, "loss": 0.9578, "step": 9294 }, { "epoch": 0.89, "grad_norm": 0.2528035347256275, "learning_rate": 0.00012625735235373593, "loss": 1.0668, "step": 9295 }, { "epoch": 0.89, "grad_norm": 0.3155214475191695, "learning_rate": 0.00012624208760904647, "loss": 0.9916, "step": 9296 }, { "epoch": 0.89, "grad_norm": 0.302087235196872, "learning_rate": 0.00012622682220763228, "loss": 1.0884, "step": 9297 }, { "epoch": 0.89, "grad_norm": 0.2990095102194339, "learning_rate": 0.00012621155614987538, "loss": 1.0101, "step": 9298 }, { "epoch": 0.89, "grad_norm": 0.26706525825653776, "learning_rate": 0.00012619628943615782, "loss": 1.0278, "step": 9299 }, { "epoch": 0.89, "grad_norm": 0.27756280255006816, "learning_rate": 0.00012618102206686166, "loss": 1.0099, "step": 9300 }, { "epoch": 0.89, "grad_norm": 0.30586196916112135, "learning_rate": 0.00012616575404236899, "loss": 1.0915, "step": 9301 }, { "epoch": 0.89, "grad_norm": 0.25272225815414157, "learning_rate": 0.0001261504853630618, "loss": 1.0799, "step": 9302 }, { "epoch": 0.89, "grad_norm": 0.3258028723785073, "learning_rate": 0.00012613521602932237, "loss": 0.9907, "step": 9303 }, { "epoch": 0.89, "grad_norm": 0.3105133570818196, "learning_rate": 0.00012611994604153269, "loss": 1.0494, "step": 9304 }, { "epoch": 0.89, "grad_norm": 0.2853288951774398, "learning_rate": 0.00012610467540007494, "loss": 1.0367, "step": 9305 }, { "epoch": 0.89, "grad_norm": 0.30136724902716244, "learning_rate": 0.00012608940410533127, "loss": 1.0905, "step": 9306 }, { "epoch": 0.89, "grad_norm": 0.27766552230271924, "learning_rate": 0.00012607413215768388, "loss": 1.0067, "step": 9307 }, { "epoch": 0.89, "grad_norm": 0.2839527667018618, "learning_rate": 0.00012605885955751497, "loss": 0.9767, "step": 9308 }, { "epoch": 0.89, "grad_norm": 0.30687156353062944, "learning_rate": 0.0001260435863052067, "loss": 1.0725, "step": 9309 }, { "epoch": 0.89, "grad_norm": 0.3156040190026411, "learning_rate": 0.0001260283124011413, "loss": 1.0457, "step": 9310 }, { "epoch": 0.89, "grad_norm": 0.3104920835342392, "learning_rate": 0.00012601303784570106, "loss": 1.1438, "step": 9311 }, { "epoch": 0.89, "grad_norm": 0.27017582473335783, "learning_rate": 0.0001259977626392682, "loss": 1.052, "step": 9312 }, { "epoch": 0.89, "grad_norm": 0.27144499750698986, "learning_rate": 0.00012598248678222498, "loss": 1.0559, "step": 9313 }, { "epoch": 0.89, "grad_norm": 0.2675880100657242, "learning_rate": 0.0001259672102749537, "loss": 0.9763, "step": 9314 }, { "epoch": 0.89, "grad_norm": 0.2955864081602817, "learning_rate": 0.00012595193311783665, "loss": 0.9867, "step": 9315 }, { "epoch": 0.89, "grad_norm": 0.2813400142046753, "learning_rate": 0.00012593665531125615, "loss": 1.0049, "step": 9316 }, { "epoch": 0.89, "grad_norm": 0.2831320746009813, "learning_rate": 0.00012592137685559458, "loss": 1.029, "step": 9317 }, { "epoch": 0.89, "grad_norm": 0.29831547442685147, "learning_rate": 0.00012590609775123426, "loss": 1.116, "step": 9318 }, { "epoch": 0.89, "grad_norm": 0.2936254006184292, "learning_rate": 0.00012589081799855756, "loss": 1.0433, "step": 9319 }, { "epoch": 0.89, "grad_norm": 0.30202822877591445, "learning_rate": 0.00012587553759794683, "loss": 1.067, "step": 9320 }, { "epoch": 0.89, "grad_norm": 0.31153441957615685, "learning_rate": 0.00012586025654978458, "loss": 1.0185, "step": 9321 }, { "epoch": 0.89, "grad_norm": 0.31241827225447677, "learning_rate": 0.0001258449748544531, "loss": 1.0975, "step": 9322 }, { "epoch": 0.89, "grad_norm": 0.27730753920433476, "learning_rate": 0.0001258296925123349, "loss": 1.069, "step": 9323 }, { "epoch": 0.89, "grad_norm": 0.2897016648466612, "learning_rate": 0.00012581440952381243, "loss": 1.067, "step": 9324 }, { "epoch": 0.89, "grad_norm": 0.24652987673338808, "learning_rate": 0.0001257991258892681, "loss": 1.1338, "step": 9325 }, { "epoch": 0.89, "grad_norm": 0.3071238167163373, "learning_rate": 0.00012578384160908445, "loss": 1.1402, "step": 9326 }, { "epoch": 0.89, "grad_norm": 0.29112654720110537, "learning_rate": 0.00012576855668364396, "loss": 1.0772, "step": 9327 }, { "epoch": 0.89, "grad_norm": 0.28108564003717257, "learning_rate": 0.00012575327111332912, "loss": 1.0895, "step": 9328 }, { "epoch": 0.89, "grad_norm": 0.2697345251089792, "learning_rate": 0.00012573798489852253, "loss": 1.1332, "step": 9329 }, { "epoch": 0.89, "grad_norm": 0.304121790273392, "learning_rate": 0.00012572269803960665, "loss": 1.0902, "step": 9330 }, { "epoch": 0.89, "grad_norm": 0.28853126394768475, "learning_rate": 0.00012570741053696412, "loss": 1.0721, "step": 9331 }, { "epoch": 0.89, "grad_norm": 0.2851724014266135, "learning_rate": 0.0001256921223909775, "loss": 1.1567, "step": 9332 }, { "epoch": 0.89, "grad_norm": 0.28817110546138114, "learning_rate": 0.0001256768336020293, "loss": 1.1432, "step": 9333 }, { "epoch": 0.89, "grad_norm": 0.2674528738842632, "learning_rate": 0.00012566154417050225, "loss": 1.1189, "step": 9334 }, { "epoch": 0.89, "grad_norm": 0.27322750722188155, "learning_rate": 0.00012564625409677895, "loss": 1.0305, "step": 9335 }, { "epoch": 0.89, "grad_norm": 0.29977545197889227, "learning_rate": 0.000125630963381242, "loss": 1.1117, "step": 9336 }, { "epoch": 0.89, "grad_norm": 0.29111294479338623, "learning_rate": 0.00012561567202427407, "loss": 0.9916, "step": 9337 }, { "epoch": 0.89, "grad_norm": 0.3036268811633583, "learning_rate": 0.00012560038002625788, "loss": 1.0984, "step": 9338 }, { "epoch": 0.89, "grad_norm": 0.2549172822007168, "learning_rate": 0.00012558508738757604, "loss": 1.1815, "step": 9339 }, { "epoch": 0.89, "grad_norm": 0.2675936118833171, "learning_rate": 0.00012556979410861135, "loss": 0.962, "step": 9340 }, { "epoch": 0.89, "grad_norm": 0.2773919289873, "learning_rate": 0.00012555450018974647, "loss": 1.1229, "step": 9341 }, { "epoch": 0.89, "grad_norm": 0.2813050855847403, "learning_rate": 0.00012553920563136418, "loss": 1.1681, "step": 9342 }, { "epoch": 0.89, "grad_norm": 0.2798799129707208, "learning_rate": 0.00012552391043384718, "loss": 1.1568, "step": 9343 }, { "epoch": 0.89, "grad_norm": 0.30424213764742, "learning_rate": 0.00012550861459757835, "loss": 1.1593, "step": 9344 }, { "epoch": 0.89, "grad_norm": 0.32261170593030764, "learning_rate": 0.00012549331812294033, "loss": 1.0733, "step": 9345 }, { "epoch": 0.89, "grad_norm": 0.28695317695752437, "learning_rate": 0.00012547802101031604, "loss": 1.0534, "step": 9346 }, { "epoch": 0.89, "grad_norm": 0.297270830182022, "learning_rate": 0.00012546272326008828, "loss": 0.9348, "step": 9347 }, { "epoch": 0.89, "grad_norm": 0.32798136328441224, "learning_rate": 0.00012544742487263983, "loss": 1.2133, "step": 9348 }, { "epoch": 0.89, "grad_norm": 0.276438841709617, "learning_rate": 0.00012543212584835363, "loss": 0.9188, "step": 9349 }, { "epoch": 0.89, "grad_norm": 0.30713210860480056, "learning_rate": 0.00012541682618761243, "loss": 0.936, "step": 9350 }, { "epoch": 0.89, "grad_norm": 0.3058264322272426, "learning_rate": 0.00012540152589079922, "loss": 1.0953, "step": 9351 }, { "epoch": 0.89, "grad_norm": 0.3086810532233868, "learning_rate": 0.00012538622495829687, "loss": 1.0574, "step": 9352 }, { "epoch": 0.89, "grad_norm": 0.26492015598732366, "learning_rate": 0.00012537092339048829, "loss": 1.0913, "step": 9353 }, { "epoch": 0.89, "grad_norm": 0.2613494790949844, "learning_rate": 0.00012535562118775638, "loss": 1.0103, "step": 9354 }, { "epoch": 0.9, "grad_norm": 0.2745257568238265, "learning_rate": 0.00012534031835048412, "loss": 1.016, "step": 9355 }, { "epoch": 0.9, "grad_norm": 0.32547504865315513, "learning_rate": 0.00012532501487905447, "loss": 1.0429, "step": 9356 }, { "epoch": 0.9, "grad_norm": 0.25947151074133556, "learning_rate": 0.0001253097107738504, "loss": 1.0684, "step": 9357 }, { "epoch": 0.9, "grad_norm": 0.2749564028335704, "learning_rate": 0.00012529440603525495, "loss": 1.0026, "step": 9358 }, { "epoch": 0.9, "grad_norm": 0.26444889751740525, "learning_rate": 0.00012527910066365108, "loss": 1.1251, "step": 9359 }, { "epoch": 0.9, "grad_norm": 0.24760688189734706, "learning_rate": 0.00012526379465942179, "loss": 1.0365, "step": 9360 }, { "epoch": 0.9, "grad_norm": 0.2952587878180295, "learning_rate": 0.00012524848802295018, "loss": 1.1582, "step": 9361 }, { "epoch": 0.9, "grad_norm": 0.27540696444772433, "learning_rate": 0.0001252331807546193, "loss": 1.1061, "step": 9362 }, { "epoch": 0.9, "grad_norm": 0.27595222755388815, "learning_rate": 0.00012521787285481222, "loss": 1.0053, "step": 9363 }, { "epoch": 0.9, "grad_norm": 0.3078665527994498, "learning_rate": 0.00012520256432391197, "loss": 1.0609, "step": 9364 }, { "epoch": 0.9, "grad_norm": 0.30912162354934847, "learning_rate": 0.00012518725516230176, "loss": 1.092, "step": 9365 }, { "epoch": 0.9, "grad_norm": 0.24850758690847607, "learning_rate": 0.00012517194537036463, "loss": 0.9636, "step": 9366 }, { "epoch": 0.9, "grad_norm": 0.25462201249484734, "learning_rate": 0.00012515663494848378, "loss": 1.0612, "step": 9367 }, { "epoch": 0.9, "grad_norm": 0.30057529749822187, "learning_rate": 0.0001251413238970423, "loss": 1.1008, "step": 9368 }, { "epoch": 0.9, "grad_norm": 0.28606400293322526, "learning_rate": 0.00012512601221642338, "loss": 0.9646, "step": 9369 }, { "epoch": 0.9, "grad_norm": 0.2718761895079578, "learning_rate": 0.00012511069990701022, "loss": 1.1127, "step": 9370 }, { "epoch": 0.9, "grad_norm": 0.26749842342835667, "learning_rate": 0.00012509538696918606, "loss": 1.0016, "step": 9371 }, { "epoch": 0.9, "grad_norm": 0.24422845567032508, "learning_rate": 0.00012508007340333402, "loss": 1.0075, "step": 9372 }, { "epoch": 0.9, "grad_norm": 0.26485906439128654, "learning_rate": 0.00012506475920983742, "loss": 1.0334, "step": 9373 }, { "epoch": 0.9, "grad_norm": 0.23407388212556063, "learning_rate": 0.00012504944438907945, "loss": 0.9974, "step": 9374 }, { "epoch": 0.9, "grad_norm": 0.2948257650153299, "learning_rate": 0.00012503412894144337, "loss": 1.0004, "step": 9375 }, { "epoch": 0.9, "grad_norm": 0.2624520239691215, "learning_rate": 0.0001250188128673125, "loss": 1.113, "step": 9376 }, { "epoch": 0.9, "grad_norm": 0.28609464800481466, "learning_rate": 0.00012500349616707013, "loss": 0.9897, "step": 9377 }, { "epoch": 0.9, "grad_norm": 0.2647060271102632, "learning_rate": 0.0001249881788410995, "loss": 1.1142, "step": 9378 }, { "epoch": 0.9, "grad_norm": 0.3319227336941671, "learning_rate": 0.00012497286088978407, "loss": 1.1371, "step": 9379 }, { "epoch": 0.9, "grad_norm": 0.290890609063919, "learning_rate": 0.00012495754231350704, "loss": 0.9454, "step": 9380 }, { "epoch": 0.9, "grad_norm": 0.275011083994787, "learning_rate": 0.00012494222311265185, "loss": 1.0498, "step": 9381 }, { "epoch": 0.9, "grad_norm": 0.29328964264372026, "learning_rate": 0.00012492690328760184, "loss": 1.2401, "step": 9382 }, { "epoch": 0.9, "grad_norm": 0.2849134201992309, "learning_rate": 0.00012491158283874042, "loss": 1.0384, "step": 9383 }, { "epoch": 0.9, "grad_norm": 0.2644889309340255, "learning_rate": 0.00012489626176645098, "loss": 1.0729, "step": 9384 }, { "epoch": 0.9, "grad_norm": 0.2671846138345653, "learning_rate": 0.00012488094007111694, "loss": 1.0643, "step": 9385 }, { "epoch": 0.9, "grad_norm": 0.28342755899615574, "learning_rate": 0.00012486561775312176, "loss": 1.0652, "step": 9386 }, { "epoch": 0.9, "grad_norm": 0.2708718366533827, "learning_rate": 0.00012485029481284883, "loss": 1.0131, "step": 9387 }, { "epoch": 0.9, "grad_norm": 0.2893275903976366, "learning_rate": 0.00012483497125068168, "loss": 1.1254, "step": 9388 }, { "epoch": 0.9, "grad_norm": 0.29651203987564034, "learning_rate": 0.00012481964706700374, "loss": 1.0964, "step": 9389 }, { "epoch": 0.9, "grad_norm": 0.2668243860412548, "learning_rate": 0.00012480432226219857, "loss": 1.0646, "step": 9390 }, { "epoch": 0.9, "grad_norm": 0.2607022670832829, "learning_rate": 0.0001247889968366496, "loss": 1.094, "step": 9391 }, { "epoch": 0.9, "grad_norm": 0.3032946090686901, "learning_rate": 0.00012477367079074045, "loss": 1.0649, "step": 9392 }, { "epoch": 0.9, "grad_norm": 0.31332018726986793, "learning_rate": 0.0001247583441248546, "loss": 1.0273, "step": 9393 }, { "epoch": 0.9, "grad_norm": 0.25149055452189184, "learning_rate": 0.00012474301683937562, "loss": 1.0565, "step": 9394 }, { "epoch": 0.9, "grad_norm": 0.31857711281294315, "learning_rate": 0.00012472768893468712, "loss": 1.1011, "step": 9395 }, { "epoch": 0.9, "grad_norm": 0.27584446547396924, "learning_rate": 0.00012471236041117263, "loss": 1.1056, "step": 9396 }, { "epoch": 0.9, "grad_norm": 0.2710593279658963, "learning_rate": 0.00012469703126921582, "loss": 1.0088, "step": 9397 }, { "epoch": 0.9, "grad_norm": 0.2924314964891264, "learning_rate": 0.00012468170150920028, "loss": 1.0443, "step": 9398 }, { "epoch": 0.9, "grad_norm": 0.3100262243859713, "learning_rate": 0.00012466637113150964, "loss": 1.068, "step": 9399 }, { "epoch": 0.9, "grad_norm": 0.3203008665877059, "learning_rate": 0.00012465104013652755, "loss": 1.0662, "step": 9400 }, { "epoch": 0.9, "grad_norm": 0.28159144815769815, "learning_rate": 0.00012463570852463767, "loss": 1.0416, "step": 9401 }, { "epoch": 0.9, "grad_norm": 0.2670293148269375, "learning_rate": 0.00012462037629622374, "loss": 1.0158, "step": 9402 }, { "epoch": 0.9, "grad_norm": 0.28251780180802666, "learning_rate": 0.00012460504345166942, "loss": 1.0844, "step": 9403 }, { "epoch": 0.9, "grad_norm": 0.2762339006289285, "learning_rate": 0.00012458970999135839, "loss": 1.0557, "step": 9404 }, { "epoch": 0.9, "grad_norm": 0.26743346835091364, "learning_rate": 0.00012457437591567442, "loss": 1.0952, "step": 9405 }, { "epoch": 0.9, "grad_norm": 0.28037210231320464, "learning_rate": 0.00012455904122500128, "loss": 0.9932, "step": 9406 }, { "epoch": 0.9, "grad_norm": 0.2629358216186376, "learning_rate": 0.00012454370591972268, "loss": 0.9753, "step": 9407 }, { "epoch": 0.9, "grad_norm": 0.2749296239864377, "learning_rate": 0.0001245283700002224, "loss": 1.0294, "step": 9408 }, { "epoch": 0.9, "grad_norm": 0.3005194418484203, "learning_rate": 0.00012451303346688424, "loss": 0.9774, "step": 9409 }, { "epoch": 0.9, "grad_norm": 0.2994528395818742, "learning_rate": 0.00012449769632009205, "loss": 1.0247, "step": 9410 }, { "epoch": 0.9, "grad_norm": 0.2491825962382407, "learning_rate": 0.00012448235856022958, "loss": 1.0416, "step": 9411 }, { "epoch": 0.9, "grad_norm": 0.24946941218248858, "learning_rate": 0.0001244670201876807, "loss": 1.0701, "step": 9412 }, { "epoch": 0.9, "grad_norm": 0.30337869321931366, "learning_rate": 0.0001244516812028293, "loss": 1.1049, "step": 9413 }, { "epoch": 0.9, "grad_norm": 0.30488539436890616, "learning_rate": 0.00012443634160605918, "loss": 1.0473, "step": 9414 }, { "epoch": 0.9, "grad_norm": 0.307492284380008, "learning_rate": 0.00012442100139775425, "loss": 1.0344, "step": 9415 }, { "epoch": 0.9, "grad_norm": 0.3091356412029321, "learning_rate": 0.00012440566057829843, "loss": 1.0037, "step": 9416 }, { "epoch": 0.9, "grad_norm": 0.27659203912070673, "learning_rate": 0.0001243903191480756, "loss": 0.9788, "step": 9417 }, { "epoch": 0.9, "grad_norm": 0.295213821134966, "learning_rate": 0.00012437497710746974, "loss": 1.0793, "step": 9418 }, { "epoch": 0.9, "grad_norm": 0.2921351405650045, "learning_rate": 0.00012435963445686472, "loss": 1.0692, "step": 9419 }, { "epoch": 0.9, "grad_norm": 0.32103907136316706, "learning_rate": 0.00012434429119664457, "loss": 1.1415, "step": 9420 }, { "epoch": 0.9, "grad_norm": 0.30821488009357795, "learning_rate": 0.0001243289473271932, "loss": 1.1574, "step": 9421 }, { "epoch": 0.9, "grad_norm": 0.3020517741494569, "learning_rate": 0.00012431360284889464, "loss": 1.1252, "step": 9422 }, { "epoch": 0.9, "grad_norm": 0.30111749627739215, "learning_rate": 0.0001242982577621329, "loss": 1.0815, "step": 9423 }, { "epoch": 0.9, "grad_norm": 0.2654478388360173, "learning_rate": 0.000124282912067292, "loss": 1.0287, "step": 9424 }, { "epoch": 0.9, "grad_norm": 0.2845065279316126, "learning_rate": 0.00012426756576475593, "loss": 1.0703, "step": 9425 }, { "epoch": 0.9, "grad_norm": 0.24604429019373228, "learning_rate": 0.00012425221885490882, "loss": 0.982, "step": 9426 }, { "epoch": 0.9, "grad_norm": 0.3204878522592504, "learning_rate": 0.00012423687133813466, "loss": 0.9874, "step": 9427 }, { "epoch": 0.9, "grad_norm": 0.2698848880583595, "learning_rate": 0.00012422152321481754, "loss": 1.1232, "step": 9428 }, { "epoch": 0.9, "grad_norm": 0.2956334280898767, "learning_rate": 0.00012420617448534162, "loss": 1.1245, "step": 9429 }, { "epoch": 0.9, "grad_norm": 0.28215348025828507, "learning_rate": 0.00012419082515009093, "loss": 1.0598, "step": 9430 }, { "epoch": 0.9, "grad_norm": 0.29800328339448867, "learning_rate": 0.00012417547520944967, "loss": 0.9549, "step": 9431 }, { "epoch": 0.9, "grad_norm": 0.2758307830968502, "learning_rate": 0.00012416012466380194, "loss": 1.0299, "step": 9432 }, { "epoch": 0.9, "grad_norm": 0.27789615338120255, "learning_rate": 0.00012414477351353192, "loss": 0.955, "step": 9433 }, { "epoch": 0.9, "grad_norm": 0.28216151632355363, "learning_rate": 0.00012412942175902376, "loss": 1.0228, "step": 9434 }, { "epoch": 0.9, "grad_norm": 0.3646066899027749, "learning_rate": 0.00012411406940066163, "loss": 1.2245, "step": 9435 }, { "epoch": 0.9, "grad_norm": 0.28097256754194205, "learning_rate": 0.0001240987164388298, "loss": 1.0332, "step": 9436 }, { "epoch": 0.9, "grad_norm": 0.25589371187045934, "learning_rate": 0.00012408336287391243, "loss": 1.0571, "step": 9437 }, { "epoch": 0.9, "grad_norm": 0.2924108280699943, "learning_rate": 0.00012406800870629373, "loss": 1.0727, "step": 9438 }, { "epoch": 0.9, "grad_norm": 0.26309668552925775, "learning_rate": 0.00012405265393635804, "loss": 0.9953, "step": 9439 }, { "epoch": 0.9, "grad_norm": 0.3055036695706396, "learning_rate": 0.00012403729856448956, "loss": 1.1705, "step": 9440 }, { "epoch": 0.9, "grad_norm": 0.26404553824001786, "learning_rate": 0.00012402194259107256, "loss": 0.8035, "step": 9441 }, { "epoch": 0.9, "grad_norm": 0.263456979982817, "learning_rate": 0.00012400658601649135, "loss": 1.0515, "step": 9442 }, { "epoch": 0.9, "grad_norm": 0.2697295571066528, "learning_rate": 0.00012399122884113024, "loss": 1.0783, "step": 9443 }, { "epoch": 0.9, "grad_norm": 0.3032996814397787, "learning_rate": 0.00012397587106537355, "loss": 1.107, "step": 9444 }, { "epoch": 0.9, "grad_norm": 0.30209747282548055, "learning_rate": 0.0001239605126896056, "loss": 1.0748, "step": 9445 }, { "epoch": 0.9, "grad_norm": 0.2541798522215149, "learning_rate": 0.0001239451537142108, "loss": 0.9973, "step": 9446 }, { "epoch": 0.9, "grad_norm": 0.26055885474176194, "learning_rate": 0.0001239297941395735, "loss": 0.9793, "step": 9447 }, { "epoch": 0.9, "grad_norm": 0.2937502415391966, "learning_rate": 0.00012391443396607798, "loss": 1.0501, "step": 9448 }, { "epoch": 0.9, "grad_norm": 0.2484096340317221, "learning_rate": 0.00012389907319410877, "loss": 0.9737, "step": 9449 }, { "epoch": 0.9, "grad_norm": 0.34708042996433486, "learning_rate": 0.00012388371182405023, "loss": 1.0665, "step": 9450 }, { "epoch": 0.9, "grad_norm": 0.2870344403909073, "learning_rate": 0.0001238683498562868, "loss": 1.0346, "step": 9451 }, { "epoch": 0.9, "grad_norm": 0.32368230775180656, "learning_rate": 0.00012385298729120287, "loss": 0.9915, "step": 9452 }, { "epoch": 0.9, "grad_norm": 0.2902156260509134, "learning_rate": 0.00012383762412918297, "loss": 1.0838, "step": 9453 }, { "epoch": 0.9, "grad_norm": 0.26483538433540343, "learning_rate": 0.00012382226037061157, "loss": 1.0152, "step": 9454 }, { "epoch": 0.9, "grad_norm": 0.31506688541873007, "learning_rate": 0.0001238068960158731, "loss": 1.0498, "step": 9455 }, { "epoch": 0.9, "grad_norm": 0.31606368328175294, "learning_rate": 0.00012379153106535212, "loss": 1.0568, "step": 9456 }, { "epoch": 0.9, "grad_norm": 0.26749250980306544, "learning_rate": 0.00012377616551943312, "loss": 1.1441, "step": 9457 }, { "epoch": 0.9, "grad_norm": 0.2805411852480236, "learning_rate": 0.0001237607993785006, "loss": 1.0273, "step": 9458 }, { "epoch": 0.9, "grad_norm": 0.29590637384614576, "learning_rate": 0.0001237454326429392, "loss": 1.2318, "step": 9459 }, { "epoch": 0.91, "grad_norm": 0.277157897803464, "learning_rate": 0.00012373006531313338, "loss": 1.0606, "step": 9460 }, { "epoch": 0.91, "grad_norm": 0.2820422335962146, "learning_rate": 0.0001237146973894678, "loss": 1.0724, "step": 9461 }, { "epoch": 0.91, "grad_norm": 0.28970884694413745, "learning_rate": 0.00012369932887232695, "loss": 1.0501, "step": 9462 }, { "epoch": 0.91, "grad_norm": 0.26821460214493453, "learning_rate": 0.00012368395976209554, "loss": 1.1002, "step": 9463 }, { "epoch": 0.91, "grad_norm": 0.3020120162788039, "learning_rate": 0.00012366859005915817, "loss": 1.2129, "step": 9464 }, { "epoch": 0.91, "grad_norm": 0.25290616792649784, "learning_rate": 0.00012365321976389942, "loss": 0.9608, "step": 9465 }, { "epoch": 0.91, "grad_norm": 0.3118979180099709, "learning_rate": 0.000123637848876704, "loss": 1.0709, "step": 9466 }, { "epoch": 0.91, "grad_norm": 0.31154007441669707, "learning_rate": 0.00012362247739795658, "loss": 1.0235, "step": 9467 }, { "epoch": 0.91, "grad_norm": 0.28995095468089827, "learning_rate": 0.00012360710532804178, "loss": 1.201, "step": 9468 }, { "epoch": 0.91, "grad_norm": 0.3110913941186855, "learning_rate": 0.00012359173266734435, "loss": 1.1567, "step": 9469 }, { "epoch": 0.91, "grad_norm": 0.27514587325982237, "learning_rate": 0.00012357635941624898, "loss": 1.0559, "step": 9470 }, { "epoch": 0.91, "grad_norm": 0.30517981996469423, "learning_rate": 0.00012356098557514037, "loss": 1.0068, "step": 9471 }, { "epoch": 0.91, "grad_norm": 0.2725388181887218, "learning_rate": 0.00012354561114440334, "loss": 0.9899, "step": 9472 }, { "epoch": 0.91, "grad_norm": 0.297899718417481, "learning_rate": 0.00012353023612442254, "loss": 1.0983, "step": 9473 }, { "epoch": 0.91, "grad_norm": 0.2848396496021322, "learning_rate": 0.00012351486051558283, "loss": 1.0539, "step": 9474 }, { "epoch": 0.91, "grad_norm": 0.2852186312128984, "learning_rate": 0.00012349948431826895, "loss": 1.0044, "step": 9475 }, { "epoch": 0.91, "grad_norm": 0.29012020789960563, "learning_rate": 0.0001234841075328657, "loss": 0.9673, "step": 9476 }, { "epoch": 0.91, "grad_norm": 0.24672860443444639, "learning_rate": 0.0001234687301597579, "loss": 1.0161, "step": 9477 }, { "epoch": 0.91, "grad_norm": 0.2726877358325203, "learning_rate": 0.0001234533521993304, "loss": 1.0621, "step": 9478 }, { "epoch": 0.91, "grad_norm": 0.28948332424230283, "learning_rate": 0.00012343797365196797, "loss": 0.9913, "step": 9479 }, { "epoch": 0.91, "grad_norm": 0.26969250387705407, "learning_rate": 0.00012342259451805557, "loss": 1.1202, "step": 9480 }, { "epoch": 0.91, "grad_norm": 0.3258916039185605, "learning_rate": 0.000123407214797978, "loss": 1.0168, "step": 9481 }, { "epoch": 0.91, "grad_norm": 0.25973605245424924, "learning_rate": 0.00012339183449212017, "loss": 1.0874, "step": 9482 }, { "epoch": 0.91, "grad_norm": 0.2676443845035316, "learning_rate": 0.00012337645360086698, "loss": 0.9667, "step": 9483 }, { "epoch": 0.91, "grad_norm": 0.30203903279304206, "learning_rate": 0.00012336107212460338, "loss": 1.0537, "step": 9484 }, { "epoch": 0.91, "grad_norm": 0.27476191579196646, "learning_rate": 0.00012334569006371422, "loss": 1.0224, "step": 9485 }, { "epoch": 0.91, "grad_norm": 0.26328678406410266, "learning_rate": 0.0001233303074185845, "loss": 1.1169, "step": 9486 }, { "epoch": 0.91, "grad_norm": 0.29307038009665376, "learning_rate": 0.0001233149241895992, "loss": 1.0547, "step": 9487 }, { "epoch": 0.91, "grad_norm": 0.3029822127610837, "learning_rate": 0.00012329954037714326, "loss": 1.0455, "step": 9488 }, { "epoch": 0.91, "grad_norm": 0.31316815984313306, "learning_rate": 0.00012328415598160167, "loss": 1.0461, "step": 9489 }, { "epoch": 0.91, "grad_norm": 0.26416179108140025, "learning_rate": 0.00012326877100335946, "loss": 0.9973, "step": 9490 }, { "epoch": 0.91, "grad_norm": 0.27917647838809806, "learning_rate": 0.0001232533854428016, "loss": 1.1014, "step": 9491 }, { "epoch": 0.91, "grad_norm": 0.2880924083809222, "learning_rate": 0.00012323799930031318, "loss": 1.1198, "step": 9492 }, { "epoch": 0.91, "grad_norm": 0.2738883771003005, "learning_rate": 0.0001232226125762792, "loss": 0.9753, "step": 9493 }, { "epoch": 0.91, "grad_norm": 0.28505026809880435, "learning_rate": 0.00012320722527108476, "loss": 1.0131, "step": 9494 }, { "epoch": 0.91, "grad_norm": 0.2700388433926024, "learning_rate": 0.00012319183738511495, "loss": 1.0627, "step": 9495 }, { "epoch": 0.91, "grad_norm": 0.21724216250627967, "learning_rate": 0.0001231764489187548, "loss": 0.969, "step": 9496 }, { "epoch": 0.91, "grad_norm": 0.30567642004976847, "learning_rate": 0.00012316105987238946, "loss": 1.1282, "step": 9497 }, { "epoch": 0.91, "grad_norm": 0.2960952448251748, "learning_rate": 0.00012314567024640405, "loss": 1.0719, "step": 9498 }, { "epoch": 0.91, "grad_norm": 0.2679330934652599, "learning_rate": 0.00012313028004118368, "loss": 1.0671, "step": 9499 }, { "epoch": 0.91, "grad_norm": 0.30977923520733813, "learning_rate": 0.00012311488925711352, "loss": 1.0649, "step": 9500 }, { "epoch": 0.91, "grad_norm": 0.2877059133094227, "learning_rate": 0.00012309949789457872, "loss": 0.99, "step": 9501 }, { "epoch": 0.91, "grad_norm": 0.2747597630168646, "learning_rate": 0.0001230841059539645, "loss": 1.0708, "step": 9502 }, { "epoch": 0.91, "grad_norm": 0.2849182513539573, "learning_rate": 0.00012306871343565598, "loss": 1.1457, "step": 9503 }, { "epoch": 0.91, "grad_norm": 0.2816422560556322, "learning_rate": 0.00012305332034003843, "loss": 1.0653, "step": 9504 }, { "epoch": 0.91, "grad_norm": 0.2449196404919519, "learning_rate": 0.00012303792666749704, "loss": 1.0025, "step": 9505 }, { "epoch": 0.91, "grad_norm": 0.28277588561340145, "learning_rate": 0.00012302253241841705, "loss": 1.0981, "step": 9506 }, { "epoch": 0.91, "grad_norm": 0.28881985968739604, "learning_rate": 0.00012300713759318374, "loss": 0.8564, "step": 9507 }, { "epoch": 0.91, "grad_norm": 0.2658935819955603, "learning_rate": 0.00012299174219218236, "loss": 0.9685, "step": 9508 }, { "epoch": 0.91, "grad_norm": 0.33536541922122687, "learning_rate": 0.00012297634621579815, "loss": 1.0644, "step": 9509 }, { "epoch": 0.91, "grad_norm": 0.2883666884244152, "learning_rate": 0.00012296094966441644, "loss": 1.0163, "step": 9510 }, { "epoch": 0.91, "grad_norm": 0.2874022639714324, "learning_rate": 0.00012294555253842258, "loss": 1.0547, "step": 9511 }, { "epoch": 0.91, "grad_norm": 0.28615282013866633, "learning_rate": 0.0001229301548382018, "loss": 1.0267, "step": 9512 }, { "epoch": 0.91, "grad_norm": 0.31559288068892855, "learning_rate": 0.0001229147565641395, "loss": 1.0427, "step": 9513 }, { "epoch": 0.91, "grad_norm": 0.29067613727477026, "learning_rate": 0.000122899357716621, "loss": 1.0771, "step": 9514 }, { "epoch": 0.91, "grad_norm": 0.3091379944841265, "learning_rate": 0.00012288395829603168, "loss": 1.1228, "step": 9515 }, { "epoch": 0.91, "grad_norm": 0.34288634284416575, "learning_rate": 0.0001228685583027569, "loss": 1.07, "step": 9516 }, { "epoch": 0.91, "grad_norm": 0.25806670007511917, "learning_rate": 0.00012285315773718215, "loss": 1.177, "step": 9517 }, { "epoch": 0.91, "grad_norm": 0.28617035042389005, "learning_rate": 0.00012283775659969272, "loss": 0.9902, "step": 9518 }, { "epoch": 0.91, "grad_norm": 0.2699590057863018, "learning_rate": 0.00012282235489067406, "loss": 1.0114, "step": 9519 }, { "epoch": 0.91, "grad_norm": 0.2668076542427271, "learning_rate": 0.00012280695261051168, "loss": 1.0479, "step": 9520 }, { "epoch": 0.91, "grad_norm": 0.27069952546890097, "learning_rate": 0.00012279154975959093, "loss": 0.9784, "step": 9521 }, { "epoch": 0.91, "grad_norm": 0.26601560597027246, "learning_rate": 0.00012277614633829736, "loss": 0.9275, "step": 9522 }, { "epoch": 0.91, "grad_norm": 0.34915490477463507, "learning_rate": 0.00012276074234701637, "loss": 1.0747, "step": 9523 }, { "epoch": 0.91, "grad_norm": 0.2963938117016832, "learning_rate": 0.00012274533778613354, "loss": 0.968, "step": 9524 }, { "epoch": 0.91, "grad_norm": 0.28627744365026025, "learning_rate": 0.00012272993265603432, "loss": 1.0285, "step": 9525 }, { "epoch": 0.91, "grad_norm": 0.2543047213053924, "learning_rate": 0.00012271452695710423, "loss": 0.9785, "step": 9526 }, { "epoch": 0.91, "grad_norm": 0.30369638655565206, "learning_rate": 0.00012269912068972887, "loss": 1.0088, "step": 9527 }, { "epoch": 0.91, "grad_norm": 0.30048051465973585, "learning_rate": 0.00012268371385429371, "loss": 1.0032, "step": 9528 }, { "epoch": 0.91, "grad_norm": 0.24363694434494604, "learning_rate": 0.0001226683064511844, "loss": 1.083, "step": 9529 }, { "epoch": 0.91, "grad_norm": 0.27300145834174794, "learning_rate": 0.0001226528984807864, "loss": 1.056, "step": 9530 }, { "epoch": 0.91, "grad_norm": 0.2840057926179831, "learning_rate": 0.00012263748994348543, "loss": 1.0906, "step": 9531 }, { "epoch": 0.91, "grad_norm": 0.2867767195608796, "learning_rate": 0.00012262208083966707, "loss": 1.0343, "step": 9532 }, { "epoch": 0.91, "grad_norm": 0.30097203291437813, "learning_rate": 0.00012260667116971687, "loss": 1.1535, "step": 9533 }, { "epoch": 0.91, "grad_norm": 0.26057797605093236, "learning_rate": 0.0001225912609340205, "loss": 0.9956, "step": 9534 }, { "epoch": 0.91, "grad_norm": 0.2722485625219177, "learning_rate": 0.00012257585013296368, "loss": 0.9609, "step": 9535 }, { "epoch": 0.91, "grad_norm": 0.31674933662782234, "learning_rate": 0.00012256043876693199, "loss": 1.1484, "step": 9536 }, { "epoch": 0.91, "grad_norm": 0.24732657574619976, "learning_rate": 0.00012254502683631114, "loss": 1.11, "step": 9537 }, { "epoch": 0.91, "grad_norm": 0.302444859564973, "learning_rate": 0.00012252961434148685, "loss": 1.1601, "step": 9538 }, { "epoch": 0.91, "grad_norm": 0.3039631031521198, "learning_rate": 0.0001225142012828448, "loss": 1.1319, "step": 9539 }, { "epoch": 0.91, "grad_norm": 0.2833615843410249, "learning_rate": 0.0001224987876607707, "loss": 1.1211, "step": 9540 }, { "epoch": 0.91, "grad_norm": 0.24087751384697798, "learning_rate": 0.0001224833734756503, "loss": 0.9828, "step": 9541 }, { "epoch": 0.91, "grad_norm": 0.3022341870078711, "learning_rate": 0.00012246795872786938, "loss": 1.0778, "step": 9542 }, { "epoch": 0.91, "grad_norm": 0.3125008887305215, "learning_rate": 0.00012245254341781362, "loss": 0.9715, "step": 9543 }, { "epoch": 0.91, "grad_norm": 0.26961102066302006, "learning_rate": 0.0001224371275458689, "loss": 1.0219, "step": 9544 }, { "epoch": 0.91, "grad_norm": 0.30552221121015993, "learning_rate": 0.00012242171111242093, "loss": 0.9941, "step": 9545 }, { "epoch": 0.91, "grad_norm": 0.30365825120398526, "learning_rate": 0.00012240629411785557, "loss": 0.9531, "step": 9546 }, { "epoch": 0.91, "grad_norm": 0.27544518032809634, "learning_rate": 0.0001223908765625586, "loss": 1.0634, "step": 9547 }, { "epoch": 0.91, "grad_norm": 0.2469345335356712, "learning_rate": 0.00012237545844691585, "loss": 1.0724, "step": 9548 }, { "epoch": 0.91, "grad_norm": 0.2956105861133632, "learning_rate": 0.00012236003977131324, "loss": 1.1523, "step": 9549 }, { "epoch": 0.91, "grad_norm": 0.2968897322282517, "learning_rate": 0.00012234462053613653, "loss": 1.07, "step": 9550 }, { "epoch": 0.91, "grad_norm": 0.26572173065636373, "learning_rate": 0.0001223292007417717, "loss": 1.0555, "step": 9551 }, { "epoch": 0.91, "grad_norm": 0.24837107366849048, "learning_rate": 0.00012231378038860455, "loss": 1.1674, "step": 9552 }, { "epoch": 0.91, "grad_norm": 0.27292429243707583, "learning_rate": 0.00012229835947702103, "loss": 1.1317, "step": 9553 }, { "epoch": 0.91, "grad_norm": 0.31026913454307414, "learning_rate": 0.00012228293800740705, "loss": 1.1446, "step": 9554 }, { "epoch": 0.91, "grad_norm": 0.3072735487423723, "learning_rate": 0.00012226751598014854, "loss": 1.1987, "step": 9555 }, { "epoch": 0.91, "grad_norm": 0.24847902065517485, "learning_rate": 0.00012225209339563145, "loss": 1.1041, "step": 9556 }, { "epoch": 0.91, "grad_norm": 0.2924020829498602, "learning_rate": 0.00012223667025424172, "loss": 1.0721, "step": 9557 }, { "epoch": 0.91, "grad_norm": 0.2613223633837016, "learning_rate": 0.00012222124655636538, "loss": 1.1266, "step": 9558 }, { "epoch": 0.91, "grad_norm": 0.2894905328595956, "learning_rate": 0.00012220582230238839, "loss": 1.0732, "step": 9559 }, { "epoch": 0.91, "grad_norm": 0.28681410216353886, "learning_rate": 0.00012219039749269668, "loss": 1.0915, "step": 9560 }, { "epoch": 0.91, "grad_norm": 0.2943586385473255, "learning_rate": 0.00012217497212767636, "loss": 1.0788, "step": 9561 }, { "epoch": 0.91, "grad_norm": 0.285929389887662, "learning_rate": 0.00012215954620771344, "loss": 1.1005, "step": 9562 }, { "epoch": 0.91, "grad_norm": 0.3149078861825459, "learning_rate": 0.00012214411973319396, "loss": 1.0933, "step": 9563 }, { "epoch": 0.91, "grad_norm": 0.24280112314190866, "learning_rate": 0.0001221286927045039, "loss": 0.954, "step": 9564 }, { "epoch": 0.92, "grad_norm": 0.24225580439211833, "learning_rate": 0.00012211326512202945, "loss": 0.9918, "step": 9565 }, { "epoch": 0.92, "grad_norm": 0.3066920417822219, "learning_rate": 0.00012209783698615665, "loss": 1.0828, "step": 9566 }, { "epoch": 0.92, "grad_norm": 0.3112335184826233, "learning_rate": 0.00012208240829727156, "loss": 1.1426, "step": 9567 }, { "epoch": 0.92, "grad_norm": 0.263752238595472, "learning_rate": 0.00012206697905576034, "loss": 1.0959, "step": 9568 }, { "epoch": 0.92, "grad_norm": 0.3013509779853578, "learning_rate": 0.0001220515492620091, "loss": 1.0237, "step": 9569 }, { "epoch": 0.92, "grad_norm": 0.29198852460496516, "learning_rate": 0.00012203611891640398, "loss": 1.1627, "step": 9570 }, { "epoch": 0.92, "grad_norm": 0.26850931222903324, "learning_rate": 0.00012202068801933112, "loss": 1.1425, "step": 9571 }, { "epoch": 0.92, "grad_norm": 0.30251674022231434, "learning_rate": 0.00012200525657117673, "loss": 1.1273, "step": 9572 }, { "epoch": 0.92, "grad_norm": 0.3018015700917101, "learning_rate": 0.00012198982457232698, "loss": 0.9809, "step": 9573 }, { "epoch": 0.92, "grad_norm": 0.273598131245597, "learning_rate": 0.000121974392023168, "loss": 1.0571, "step": 9574 }, { "epoch": 0.92, "grad_norm": 0.30277174215701025, "learning_rate": 0.00012195895892408609, "loss": 0.9031, "step": 9575 }, { "epoch": 0.92, "grad_norm": 0.28490196205477425, "learning_rate": 0.00012194352527546739, "loss": 1.0131, "step": 9576 }, { "epoch": 0.92, "grad_norm": 0.2715100350623435, "learning_rate": 0.0001219280910776982, "loss": 1.0771, "step": 9577 }, { "epoch": 0.92, "grad_norm": 0.2948896697832583, "learning_rate": 0.00012191265633116473, "loss": 1.0838, "step": 9578 }, { "epoch": 0.92, "grad_norm": 0.2570476008819864, "learning_rate": 0.00012189722103625332, "loss": 1.0897, "step": 9579 }, { "epoch": 0.92, "grad_norm": 0.2923182653513327, "learning_rate": 0.00012188178519335014, "loss": 1.063, "step": 9580 }, { "epoch": 0.92, "grad_norm": 0.24843565354969446, "learning_rate": 0.00012186634880284155, "loss": 1.0832, "step": 9581 }, { "epoch": 0.92, "grad_norm": 0.30405197780168924, "learning_rate": 0.00012185091186511383, "loss": 1.0964, "step": 9582 }, { "epoch": 0.92, "grad_norm": 0.28552359904155467, "learning_rate": 0.00012183547438055334, "loss": 1.0048, "step": 9583 }, { "epoch": 0.92, "grad_norm": 0.25623311580886565, "learning_rate": 0.00012182003634954635, "loss": 1.1127, "step": 9584 }, { "epoch": 0.92, "grad_norm": 0.28043711484669764, "learning_rate": 0.00012180459777247924, "loss": 0.9836, "step": 9585 }, { "epoch": 0.92, "grad_norm": 0.29306608514635557, "learning_rate": 0.00012178915864973839, "loss": 1.0981, "step": 9586 }, { "epoch": 0.92, "grad_norm": 0.2509994097045669, "learning_rate": 0.00012177371898171011, "loss": 1.0955, "step": 9587 }, { "epoch": 0.92, "grad_norm": 0.25595385108043606, "learning_rate": 0.00012175827876878085, "loss": 1.0905, "step": 9588 }, { "epoch": 0.92, "grad_norm": 0.29970697558699483, "learning_rate": 0.00012174283801133701, "loss": 1.08, "step": 9589 }, { "epoch": 0.92, "grad_norm": 0.25132811288192697, "learning_rate": 0.00012172739670976497, "loss": 0.9933, "step": 9590 }, { "epoch": 0.92, "grad_norm": 0.2795967397200255, "learning_rate": 0.00012171195486445115, "loss": 1.0812, "step": 9591 }, { "epoch": 0.92, "grad_norm": 0.27861509817589664, "learning_rate": 0.00012169651247578205, "loss": 1.0368, "step": 9592 }, { "epoch": 0.92, "grad_norm": 0.2916254831937168, "learning_rate": 0.00012168106954414406, "loss": 1.0809, "step": 9593 }, { "epoch": 0.92, "grad_norm": 0.2922945494777507, "learning_rate": 0.00012166562606992368, "loss": 1.1979, "step": 9594 }, { "epoch": 0.92, "grad_norm": 0.27637344653802837, "learning_rate": 0.0001216501820535074, "loss": 0.957, "step": 9595 }, { "epoch": 0.92, "grad_norm": 0.2890101477293797, "learning_rate": 0.00012163473749528169, "loss": 1.0743, "step": 9596 }, { "epoch": 0.92, "grad_norm": 0.27375555736611096, "learning_rate": 0.0001216192923956331, "loss": 1.0294, "step": 9597 }, { "epoch": 0.92, "grad_norm": 0.3020964878540027, "learning_rate": 0.00012160384675494811, "loss": 1.117, "step": 9598 }, { "epoch": 0.92, "grad_norm": 0.31564811381117275, "learning_rate": 0.00012158840057361332, "loss": 1.1155, "step": 9599 }, { "epoch": 0.92, "grad_norm": 0.30570724996592125, "learning_rate": 0.00012157295385201522, "loss": 1.0954, "step": 9600 }, { "epoch": 0.92, "grad_norm": 0.2772559483117768, "learning_rate": 0.00012155750659054035, "loss": 1.0597, "step": 9601 }, { "epoch": 0.92, "grad_norm": 0.27996328612745197, "learning_rate": 0.00012154205878957539, "loss": 1.0686, "step": 9602 }, { "epoch": 0.92, "grad_norm": 0.2862321218720333, "learning_rate": 0.00012152661044950684, "loss": 0.9934, "step": 9603 }, { "epoch": 0.92, "grad_norm": 0.2853452333425892, "learning_rate": 0.00012151116157072132, "loss": 1.1348, "step": 9604 }, { "epoch": 0.92, "grad_norm": 0.2617628600038958, "learning_rate": 0.00012149571215360547, "loss": 0.97, "step": 9605 }, { "epoch": 0.92, "grad_norm": 0.25696280278663025, "learning_rate": 0.00012148026219854594, "loss": 1.0261, "step": 9606 }, { "epoch": 0.92, "grad_norm": 0.310031454374293, "learning_rate": 0.00012146481170592933, "loss": 1.0158, "step": 9607 }, { "epoch": 0.92, "grad_norm": 0.2790355327820539, "learning_rate": 0.0001214493606761423, "loss": 1.1757, "step": 9608 }, { "epoch": 0.92, "grad_norm": 0.30770923269815564, "learning_rate": 0.00012143390910957157, "loss": 1.1518, "step": 9609 }, { "epoch": 0.92, "grad_norm": 0.30029358695910546, "learning_rate": 0.00012141845700660379, "loss": 1.1968, "step": 9610 }, { "epoch": 0.92, "grad_norm": 0.3098726943709629, "learning_rate": 0.00012140300436762564, "loss": 1.0352, "step": 9611 }, { "epoch": 0.92, "grad_norm": 0.2959419907143475, "learning_rate": 0.00012138755119302388, "loss": 1.0186, "step": 9612 }, { "epoch": 0.92, "grad_norm": 0.27785698197656766, "learning_rate": 0.00012137209748318521, "loss": 1.0375, "step": 9613 }, { "epoch": 0.92, "grad_norm": 0.2930400342362595, "learning_rate": 0.00012135664323849634, "loss": 0.9418, "step": 9614 }, { "epoch": 0.92, "grad_norm": 0.28085631013272555, "learning_rate": 0.00012134118845934405, "loss": 1.0465, "step": 9615 }, { "epoch": 0.92, "grad_norm": 0.2541256790591151, "learning_rate": 0.00012132573314611516, "loss": 1.0293, "step": 9616 }, { "epoch": 0.92, "grad_norm": 0.28908641994102546, "learning_rate": 0.00012131027729919633, "loss": 1.078, "step": 9617 }, { "epoch": 0.92, "grad_norm": 0.2772893250241189, "learning_rate": 0.00012129482091897446, "loss": 1.1151, "step": 9618 }, { "epoch": 0.92, "grad_norm": 0.327164314875272, "learning_rate": 0.00012127936400583629, "loss": 1.0591, "step": 9619 }, { "epoch": 0.92, "grad_norm": 0.2886231869799765, "learning_rate": 0.00012126390656016866, "loss": 1.0113, "step": 9620 }, { "epoch": 0.92, "grad_norm": 0.283680263633664, "learning_rate": 0.0001212484485823584, "loss": 1.0249, "step": 9621 }, { "epoch": 0.92, "grad_norm": 0.26111321954434336, "learning_rate": 0.00012123299007279238, "loss": 1.0342, "step": 9622 }, { "epoch": 0.92, "grad_norm": 0.2955195301556354, "learning_rate": 0.00012121753103185745, "loss": 0.9461, "step": 9623 }, { "epoch": 0.92, "grad_norm": 0.27409190677236495, "learning_rate": 0.00012120207145994045, "loss": 1.0637, "step": 9624 }, { "epoch": 0.92, "grad_norm": 0.26924614303903166, "learning_rate": 0.00012118661135742828, "loss": 0.9253, "step": 9625 }, { "epoch": 0.92, "grad_norm": 0.2692477485784918, "learning_rate": 0.00012117115072470788, "loss": 1.0827, "step": 9626 }, { "epoch": 0.92, "grad_norm": 0.26774970205249055, "learning_rate": 0.0001211556895621661, "loss": 1.0528, "step": 9627 }, { "epoch": 0.92, "grad_norm": 0.29867507929874776, "learning_rate": 0.00012114022787018988, "loss": 1.0436, "step": 9628 }, { "epoch": 0.92, "grad_norm": 0.3208070747287111, "learning_rate": 0.00012112476564916622, "loss": 1.1365, "step": 9629 }, { "epoch": 0.92, "grad_norm": 0.2972966476878782, "learning_rate": 0.000121109302899482, "loss": 1.1186, "step": 9630 }, { "epoch": 0.92, "grad_norm": 0.30544954734093066, "learning_rate": 0.00012109383962152416, "loss": 1.1047, "step": 9631 }, { "epoch": 0.92, "grad_norm": 0.2967657674672275, "learning_rate": 0.00012107837581567977, "loss": 1.072, "step": 9632 }, { "epoch": 0.92, "grad_norm": 0.3033096939499795, "learning_rate": 0.00012106291148233579, "loss": 1.0254, "step": 9633 }, { "epoch": 0.92, "grad_norm": 0.26626395824315857, "learning_rate": 0.00012104744662187922, "loss": 0.9941, "step": 9634 }, { "epoch": 0.92, "grad_norm": 0.2895890185705288, "learning_rate": 0.00012103198123469704, "loss": 1.0531, "step": 9635 }, { "epoch": 0.92, "grad_norm": 0.2957215592529329, "learning_rate": 0.00012101651532117632, "loss": 1.0199, "step": 9636 }, { "epoch": 0.92, "grad_norm": 0.2966423558454665, "learning_rate": 0.00012100104888170407, "loss": 0.9559, "step": 9637 }, { "epoch": 0.92, "grad_norm": 0.3146917874326988, "learning_rate": 0.00012098558191666742, "loss": 1.1623, "step": 9638 }, { "epoch": 0.92, "grad_norm": 0.33430522861396766, "learning_rate": 0.00012097011442645337, "loss": 1.0295, "step": 9639 }, { "epoch": 0.92, "grad_norm": 0.2725271575959663, "learning_rate": 0.00012095464641144902, "loss": 1.0745, "step": 9640 }, { "epoch": 0.92, "grad_norm": 0.32836523657529365, "learning_rate": 0.00012093917787204148, "loss": 1.0739, "step": 9641 }, { "epoch": 0.92, "grad_norm": 0.2905477137452141, "learning_rate": 0.00012092370880861786, "loss": 0.9218, "step": 9642 }, { "epoch": 0.92, "grad_norm": 0.28312434963987126, "learning_rate": 0.00012090823922156526, "loss": 1.1302, "step": 9643 }, { "epoch": 0.92, "grad_norm": 0.31119878698668474, "learning_rate": 0.00012089276911127088, "loss": 1.1067, "step": 9644 }, { "epoch": 0.92, "grad_norm": 0.24999334472319124, "learning_rate": 0.00012087729847812176, "loss": 1.0741, "step": 9645 }, { "epoch": 0.92, "grad_norm": 0.2543284979471403, "learning_rate": 0.00012086182732250517, "loss": 0.9579, "step": 9646 }, { "epoch": 0.92, "grad_norm": 0.2594464862081111, "learning_rate": 0.00012084635564480824, "loss": 1.0921, "step": 9647 }, { "epoch": 0.92, "grad_norm": 0.3158864173446626, "learning_rate": 0.00012083088344541813, "loss": 1.1075, "step": 9648 }, { "epoch": 0.92, "grad_norm": 0.29277153816070817, "learning_rate": 0.00012081541072472208, "loss": 1.0819, "step": 9649 }, { "epoch": 0.92, "grad_norm": 0.285551700825859, "learning_rate": 0.00012079993748310729, "loss": 1.1289, "step": 9650 }, { "epoch": 0.92, "grad_norm": 0.26578837779844633, "learning_rate": 0.00012078446372096102, "loss": 1.0786, "step": 9651 }, { "epoch": 0.92, "grad_norm": 0.2770368943423453, "learning_rate": 0.00012076898943867046, "loss": 1.2504, "step": 9652 }, { "epoch": 0.92, "grad_norm": 0.2915791206912121, "learning_rate": 0.0001207535146366229, "loss": 1.0691, "step": 9653 }, { "epoch": 0.92, "grad_norm": 0.29701485898418156, "learning_rate": 0.00012073803931520557, "loss": 1.0343, "step": 9654 }, { "epoch": 0.92, "grad_norm": 0.3164931927910207, "learning_rate": 0.0001207225634748058, "loss": 1.0555, "step": 9655 }, { "epoch": 0.92, "grad_norm": 0.26381700408017583, "learning_rate": 0.00012070708711581083, "loss": 1.1291, "step": 9656 }, { "epoch": 0.92, "grad_norm": 0.29962327944839096, "learning_rate": 0.00012069161023860802, "loss": 1.039, "step": 9657 }, { "epoch": 0.92, "grad_norm": 0.3144393601015309, "learning_rate": 0.00012067613284358461, "loss": 1.1407, "step": 9658 }, { "epoch": 0.92, "grad_norm": 0.29485729590956794, "learning_rate": 0.00012066065493112803, "loss": 0.9772, "step": 9659 }, { "epoch": 0.92, "grad_norm": 0.3317695176709224, "learning_rate": 0.00012064517650162555, "loss": 1.0321, "step": 9660 }, { "epoch": 0.92, "grad_norm": 0.2795257128963298, "learning_rate": 0.00012062969755546456, "loss": 1.1162, "step": 9661 }, { "epoch": 0.92, "grad_norm": 0.3308060051457206, "learning_rate": 0.00012061421809303241, "loss": 0.9852, "step": 9662 }, { "epoch": 0.92, "grad_norm": 0.284967097310632, "learning_rate": 0.00012059873811471651, "loss": 0.9865, "step": 9663 }, { "epoch": 0.92, "grad_norm": 0.31468502602583576, "learning_rate": 0.00012058325762090426, "loss": 1.0112, "step": 9664 }, { "epoch": 0.92, "grad_norm": 0.2760079699322398, "learning_rate": 0.00012056777661198301, "loss": 0.9597, "step": 9665 }, { "epoch": 0.92, "grad_norm": 0.2911837084304875, "learning_rate": 0.00012055229508834027, "loss": 1.0258, "step": 9666 }, { "epoch": 0.92, "grad_norm": 0.30981871374584724, "learning_rate": 0.00012053681305036342, "loss": 1.1091, "step": 9667 }, { "epoch": 0.92, "grad_norm": 0.28199493834041306, "learning_rate": 0.00012052133049843992, "loss": 0.9613, "step": 9668 }, { "epoch": 0.93, "grad_norm": 0.2833626576757252, "learning_rate": 0.00012050584743295718, "loss": 1.0918, "step": 9669 }, { "epoch": 0.93, "grad_norm": 0.32179313490464345, "learning_rate": 0.00012049036385430277, "loss": 1.0589, "step": 9670 }, { "epoch": 0.93, "grad_norm": 0.3529820688530236, "learning_rate": 0.0001204748797628641, "loss": 1.0392, "step": 9671 }, { "epoch": 0.93, "grad_norm": 0.316182685666834, "learning_rate": 0.00012045939515902872, "loss": 1.091, "step": 9672 }, { "epoch": 0.93, "grad_norm": 0.3132427307808237, "learning_rate": 0.00012044391004318409, "loss": 1.1227, "step": 9673 }, { "epoch": 0.93, "grad_norm": 0.2997545510559344, "learning_rate": 0.0001204284244157178, "loss": 1.0994, "step": 9674 }, { "epoch": 0.93, "grad_norm": 0.2536805673335262, "learning_rate": 0.00012041293827701729, "loss": 0.9636, "step": 9675 }, { "epoch": 0.93, "grad_norm": 0.2876440010059215, "learning_rate": 0.00012039745162747022, "loss": 1.1536, "step": 9676 }, { "epoch": 0.93, "grad_norm": 0.2691609443880876, "learning_rate": 0.0001203819644674641, "loss": 1.1113, "step": 9677 }, { "epoch": 0.93, "grad_norm": 0.25701727543705255, "learning_rate": 0.00012036647679738649, "loss": 0.9819, "step": 9678 }, { "epoch": 0.93, "grad_norm": 0.29817078458954743, "learning_rate": 0.00012035098861762502, "loss": 0.9931, "step": 9679 }, { "epoch": 0.93, "grad_norm": 0.2642831901167002, "learning_rate": 0.00012033549992856726, "loss": 1.0036, "step": 9680 }, { "epoch": 0.93, "grad_norm": 0.29959073213056514, "learning_rate": 0.00012032001073060082, "loss": 1.1449, "step": 9681 }, { "epoch": 0.93, "grad_norm": 0.32204374587710666, "learning_rate": 0.00012030452102411333, "loss": 0.9079, "step": 9682 }, { "epoch": 0.93, "grad_norm": 0.30732832828204176, "learning_rate": 0.00012028903080949248, "loss": 0.8944, "step": 9683 }, { "epoch": 0.93, "grad_norm": 0.27203865429830504, "learning_rate": 0.00012027354008712588, "loss": 0.9959, "step": 9684 }, { "epoch": 0.93, "grad_norm": 0.31723507213333874, "learning_rate": 0.0001202580488574012, "loss": 1.0008, "step": 9685 }, { "epoch": 0.93, "grad_norm": 0.2753881383976869, "learning_rate": 0.00012024255712070607, "loss": 0.9831, "step": 9686 }, { "epoch": 0.93, "grad_norm": 0.2905972632173138, "learning_rate": 0.00012022706487742827, "loss": 0.9718, "step": 9687 }, { "epoch": 0.93, "grad_norm": 0.3441351333384633, "learning_rate": 0.00012021157212795544, "loss": 1.1038, "step": 9688 }, { "epoch": 0.93, "grad_norm": 0.30829573394192783, "learning_rate": 0.00012019607887267532, "loss": 1.0747, "step": 9689 }, { "epoch": 0.93, "grad_norm": 0.3116599969382794, "learning_rate": 0.00012018058511197563, "loss": 0.9906, "step": 9690 }, { "epoch": 0.93, "grad_norm": 0.32153118671256326, "learning_rate": 0.00012016509084624413, "loss": 1.0987, "step": 9691 }, { "epoch": 0.93, "grad_norm": 0.30020496331906704, "learning_rate": 0.00012014959607586853, "loss": 0.9893, "step": 9692 }, { "epoch": 0.93, "grad_norm": 0.2488961913424749, "learning_rate": 0.00012013410080123666, "loss": 1.0128, "step": 9693 }, { "epoch": 0.93, "grad_norm": 0.2894864987548495, "learning_rate": 0.00012011860502273625, "loss": 1.0898, "step": 9694 }, { "epoch": 0.93, "grad_norm": 0.339459814596149, "learning_rate": 0.0001201031087407551, "loss": 0.972, "step": 9695 }, { "epoch": 0.93, "grad_norm": 0.28936642655855416, "learning_rate": 0.00012008761195568101, "loss": 1.1054, "step": 9696 }, { "epoch": 0.93, "grad_norm": 0.29865336998518743, "learning_rate": 0.00012007211466790183, "loss": 1.0518, "step": 9697 }, { "epoch": 0.93, "grad_norm": 0.29410810287938477, "learning_rate": 0.00012005661687780537, "loss": 1.0348, "step": 9698 }, { "epoch": 0.93, "grad_norm": 0.28715006026168827, "learning_rate": 0.00012004111858577942, "loss": 1.0277, "step": 9699 }, { "epoch": 0.93, "grad_norm": 0.3246035439472215, "learning_rate": 0.00012002561979221191, "loss": 1.0978, "step": 9700 }, { "epoch": 0.93, "grad_norm": 0.28714503427033067, "learning_rate": 0.00012001012049749067, "loss": 1.0528, "step": 9701 }, { "epoch": 0.93, "grad_norm": 0.2999877264540504, "learning_rate": 0.0001199946207020036, "loss": 1.0535, "step": 9702 }, { "epoch": 0.93, "grad_norm": 0.2831842528289797, "learning_rate": 0.00011997912040613856, "loss": 1.0861, "step": 9703 }, { "epoch": 0.93, "grad_norm": 0.27863916487179596, "learning_rate": 0.00011996361961028351, "loss": 1.0172, "step": 9704 }, { "epoch": 0.93, "grad_norm": 0.3076428593870626, "learning_rate": 0.00011994811831482633, "loss": 1.154, "step": 9705 }, { "epoch": 0.93, "grad_norm": 0.3178394621331127, "learning_rate": 0.00011993261652015493, "loss": 1.0459, "step": 9706 }, { "epoch": 0.93, "grad_norm": 0.2834780610000245, "learning_rate": 0.00011991711422665728, "loss": 0.8991, "step": 9707 }, { "epoch": 0.93, "grad_norm": 0.28248971237831144, "learning_rate": 0.00011990161143472134, "loss": 1.0412, "step": 9708 }, { "epoch": 0.93, "grad_norm": 0.37293406629713854, "learning_rate": 0.00011988610814473504, "loss": 1.1236, "step": 9709 }, { "epoch": 0.93, "grad_norm": 0.29203647947031824, "learning_rate": 0.00011987060435708643, "loss": 0.972, "step": 9710 }, { "epoch": 0.93, "grad_norm": 0.2784065820475021, "learning_rate": 0.00011985510007216343, "loss": 1.0975, "step": 9711 }, { "epoch": 0.93, "grad_norm": 0.2967922411536282, "learning_rate": 0.0001198395952903541, "loss": 1.0979, "step": 9712 }, { "epoch": 0.93, "grad_norm": 0.29200788752529727, "learning_rate": 0.00011982409001204637, "loss": 1.0726, "step": 9713 }, { "epoch": 0.93, "grad_norm": 0.2692696205517818, "learning_rate": 0.00011980858423762837, "loss": 1.0631, "step": 9714 }, { "epoch": 0.93, "grad_norm": 0.2965913502544387, "learning_rate": 0.00011979307796748811, "loss": 1.1334, "step": 9715 }, { "epoch": 0.93, "grad_norm": 0.28309793277521694, "learning_rate": 0.0001197775712020136, "loss": 1.1148, "step": 9716 }, { "epoch": 0.93, "grad_norm": 0.2842362819659992, "learning_rate": 0.00011976206394159297, "loss": 1.089, "step": 9717 }, { "epoch": 0.93, "grad_norm": 0.30869020384451756, "learning_rate": 0.00011974655618661425, "loss": 1.1634, "step": 9718 }, { "epoch": 0.93, "grad_norm": 0.2919947030319265, "learning_rate": 0.00011973104793746554, "loss": 0.9976, "step": 9719 }, { "epoch": 0.93, "grad_norm": 0.28107687880463733, "learning_rate": 0.000119715539194535, "loss": 1.0656, "step": 9720 }, { "epoch": 0.93, "grad_norm": 0.2538194528442572, "learning_rate": 0.00011970002995821069, "loss": 0.9763, "step": 9721 }, { "epoch": 0.93, "grad_norm": 0.3042314896103001, "learning_rate": 0.0001196845202288807, "loss": 1.1705, "step": 9722 }, { "epoch": 0.93, "grad_norm": 0.2572188222835515, "learning_rate": 0.00011966901000693325, "loss": 1.0154, "step": 9723 }, { "epoch": 0.93, "grad_norm": 0.28736803000564753, "learning_rate": 0.00011965349929275646, "loss": 0.9974, "step": 9724 }, { "epoch": 0.93, "grad_norm": 0.31110944588178774, "learning_rate": 0.00011963798808673852, "loss": 1.0147, "step": 9725 }, { "epoch": 0.93, "grad_norm": 0.2875577964788188, "learning_rate": 0.00011962247638926755, "loss": 1.0678, "step": 9726 }, { "epoch": 0.93, "grad_norm": 0.2700897281383975, "learning_rate": 0.00011960696420073181, "loss": 1.1177, "step": 9727 }, { "epoch": 0.93, "grad_norm": 0.2806117435648341, "learning_rate": 0.00011959145152151947, "loss": 1.012, "step": 9728 }, { "epoch": 0.93, "grad_norm": 0.31209826283604314, "learning_rate": 0.00011957593835201875, "loss": 0.9591, "step": 9729 }, { "epoch": 0.93, "grad_norm": 0.2750239320324656, "learning_rate": 0.00011956042469261781, "loss": 1.1095, "step": 9730 }, { "epoch": 0.93, "grad_norm": 0.28264832045438565, "learning_rate": 0.000119544910543705, "loss": 1.037, "step": 9731 }, { "epoch": 0.93, "grad_norm": 0.3300651441181935, "learning_rate": 0.00011952939590566852, "loss": 1.0304, "step": 9732 }, { "epoch": 0.93, "grad_norm": 0.2859521337438915, "learning_rate": 0.0001195138807788966, "loss": 0.969, "step": 9733 }, { "epoch": 0.93, "grad_norm": 0.2828356666065811, "learning_rate": 0.00011949836516377759, "loss": 1.004, "step": 9734 }, { "epoch": 0.93, "grad_norm": 0.2696647965723336, "learning_rate": 0.00011948284906069974, "loss": 1.089, "step": 9735 }, { "epoch": 0.93, "grad_norm": 0.2708560032776781, "learning_rate": 0.00011946733247005131, "loss": 0.9534, "step": 9736 }, { "epoch": 0.93, "grad_norm": 0.3142091199851108, "learning_rate": 0.00011945181539222065, "loss": 1.1159, "step": 9737 }, { "epoch": 0.93, "grad_norm": 0.2580939496791897, "learning_rate": 0.00011943629782759611, "loss": 0.9785, "step": 9738 }, { "epoch": 0.93, "grad_norm": 0.29871072295327306, "learning_rate": 0.00011942077977656601, "loss": 1.072, "step": 9739 }, { "epoch": 0.93, "grad_norm": 0.28134297529949587, "learning_rate": 0.00011940526123951865, "loss": 1.0097, "step": 9740 }, { "epoch": 0.93, "grad_norm": 0.27030245416409626, "learning_rate": 0.00011938974221684248, "loss": 1.0362, "step": 9741 }, { "epoch": 0.93, "grad_norm": 0.27891127006754407, "learning_rate": 0.00011937422270892578, "loss": 1.1503, "step": 9742 }, { "epoch": 0.93, "grad_norm": 0.2527819203001018, "learning_rate": 0.00011935870271615701, "loss": 1.0682, "step": 9743 }, { "epoch": 0.93, "grad_norm": 0.25930692949225115, "learning_rate": 0.00011934318223892451, "loss": 1.062, "step": 9744 }, { "epoch": 0.93, "grad_norm": 0.2875000837745984, "learning_rate": 0.00011932766127761675, "loss": 0.9477, "step": 9745 }, { "epoch": 0.93, "grad_norm": 0.325246715534001, "learning_rate": 0.00011931213983262211, "loss": 1.0165, "step": 9746 }, { "epoch": 0.93, "grad_norm": 0.25662864976415845, "learning_rate": 0.00011929661790432903, "loss": 1.0786, "step": 9747 }, { "epoch": 0.93, "grad_norm": 0.260785274906786, "learning_rate": 0.00011928109549312596, "loss": 1.0819, "step": 9748 }, { "epoch": 0.93, "grad_norm": 0.24971660647552188, "learning_rate": 0.00011926557259940137, "loss": 1.1172, "step": 9749 }, { "epoch": 0.93, "grad_norm": 0.2634144133215548, "learning_rate": 0.00011925004922354368, "loss": 1.0457, "step": 9750 }, { "epoch": 0.93, "grad_norm": 0.28481353084008365, "learning_rate": 0.00011923452536594144, "loss": 1.0667, "step": 9751 }, { "epoch": 0.93, "grad_norm": 0.2480420939966222, "learning_rate": 0.00011921900102698312, "loss": 1.0387, "step": 9752 }, { "epoch": 0.93, "grad_norm": 0.290303015413711, "learning_rate": 0.00011920347620705719, "loss": 0.9624, "step": 9753 }, { "epoch": 0.93, "grad_norm": 0.30582174621304614, "learning_rate": 0.00011918795090655221, "loss": 0.982, "step": 9754 }, { "epoch": 0.93, "grad_norm": 0.2655028218340249, "learning_rate": 0.00011917242512585674, "loss": 1.0536, "step": 9755 }, { "epoch": 0.93, "grad_norm": 0.27477226526624493, "learning_rate": 0.00011915689886535923, "loss": 0.9856, "step": 9756 }, { "epoch": 0.93, "grad_norm": 0.28682754344560973, "learning_rate": 0.00011914137212544831, "loss": 1.1707, "step": 9757 }, { "epoch": 0.93, "grad_norm": 0.26153050639845216, "learning_rate": 0.00011912584490651253, "loss": 1.0497, "step": 9758 }, { "epoch": 0.93, "grad_norm": 0.28229038067976947, "learning_rate": 0.00011911031720894046, "loss": 0.9965, "step": 9759 }, { "epoch": 0.93, "grad_norm": 0.28006029083616607, "learning_rate": 0.00011909478903312066, "loss": 1.0668, "step": 9760 }, { "epoch": 0.93, "grad_norm": 0.27740453645729457, "learning_rate": 0.00011907926037944179, "loss": 1.0366, "step": 9761 }, { "epoch": 0.93, "grad_norm": 0.2384257527120478, "learning_rate": 0.00011906373124829244, "loss": 1.0827, "step": 9762 }, { "epoch": 0.93, "grad_norm": 0.293678354692542, "learning_rate": 0.0001190482016400612, "loss": 1.0645, "step": 9763 }, { "epoch": 0.93, "grad_norm": 0.2938241245866292, "learning_rate": 0.00011903267155513677, "loss": 1.055, "step": 9764 }, { "epoch": 0.93, "grad_norm": 0.26615653569909886, "learning_rate": 0.00011901714099390777, "loss": 1.0972, "step": 9765 }, { "epoch": 0.93, "grad_norm": 0.28171980966420157, "learning_rate": 0.00011900160995676288, "loss": 0.9422, "step": 9766 }, { "epoch": 0.93, "grad_norm": 0.26740119824230185, "learning_rate": 0.00011898607844409073, "loss": 1.0326, "step": 9767 }, { "epoch": 0.93, "grad_norm": 0.27023112187042453, "learning_rate": 0.00011897054645628005, "loss": 0.9777, "step": 9768 }, { "epoch": 0.93, "grad_norm": 0.3200885822581613, "learning_rate": 0.00011895501399371953, "loss": 1.0587, "step": 9769 }, { "epoch": 0.93, "grad_norm": 0.31831634758810756, "learning_rate": 0.00011893948105679787, "loss": 1.0764, "step": 9770 }, { "epoch": 0.93, "grad_norm": 0.3081776019474058, "learning_rate": 0.00011892394764590378, "loss": 1.0104, "step": 9771 }, { "epoch": 0.93, "grad_norm": 0.28957521753453463, "learning_rate": 0.00011890841376142603, "loss": 1.0592, "step": 9772 }, { "epoch": 0.93, "grad_norm": 0.28207411325497395, "learning_rate": 0.00011889287940375334, "loss": 1.104, "step": 9773 }, { "epoch": 0.94, "grad_norm": 0.36180324747623, "learning_rate": 0.00011887734457327443, "loss": 1.0525, "step": 9774 }, { "epoch": 0.94, "grad_norm": 0.2910045433010645, "learning_rate": 0.00011886180927037815, "loss": 1.087, "step": 9775 }, { "epoch": 0.94, "grad_norm": 0.28543697105123744, "learning_rate": 0.00011884627349545323, "loss": 1.0952, "step": 9776 }, { "epoch": 0.94, "grad_norm": 0.2849455865427251, "learning_rate": 0.00011883073724888844, "loss": 1.0036, "step": 9777 }, { "epoch": 0.94, "grad_norm": 0.3445968201656461, "learning_rate": 0.00011881520053107267, "loss": 1.0498, "step": 9778 }, { "epoch": 0.94, "grad_norm": 0.2506321633298875, "learning_rate": 0.00011879966334239466, "loss": 1.1624, "step": 9779 }, { "epoch": 0.94, "grad_norm": 0.29162103318787663, "learning_rate": 0.00011878412568324322, "loss": 1.0233, "step": 9780 }, { "epoch": 0.94, "grad_norm": 0.29500101419963837, "learning_rate": 0.00011876858755400728, "loss": 1.0922, "step": 9781 }, { "epoch": 0.94, "grad_norm": 0.3009884592875106, "learning_rate": 0.00011875304895507562, "loss": 1.1165, "step": 9782 }, { "epoch": 0.94, "grad_norm": 0.27565694136640945, "learning_rate": 0.00011873750988683712, "loss": 1.0688, "step": 9783 }, { "epoch": 0.94, "grad_norm": 0.3273050375332514, "learning_rate": 0.00011872197034968067, "loss": 1.1118, "step": 9784 }, { "epoch": 0.94, "grad_norm": 0.29960996186748606, "learning_rate": 0.00011870643034399514, "loss": 1.2125, "step": 9785 }, { "epoch": 0.94, "grad_norm": 0.2964779408152491, "learning_rate": 0.00011869088987016943, "loss": 1.1393, "step": 9786 }, { "epoch": 0.94, "grad_norm": 0.2858838476728597, "learning_rate": 0.00011867534892859244, "loss": 1.0267, "step": 9787 }, { "epoch": 0.94, "grad_norm": 0.2852488900654838, "learning_rate": 0.00011865980751965313, "loss": 1.1121, "step": 9788 }, { "epoch": 0.94, "grad_norm": 0.29119397522264384, "learning_rate": 0.00011864426564374043, "loss": 1.0825, "step": 9789 }, { "epoch": 0.94, "grad_norm": 0.2594508280784169, "learning_rate": 0.00011862872330124324, "loss": 1.154, "step": 9790 }, { "epoch": 0.94, "grad_norm": 0.2975677876895368, "learning_rate": 0.00011861318049255052, "loss": 1.0173, "step": 9791 }, { "epoch": 0.94, "grad_norm": 0.2648318009564742, "learning_rate": 0.00011859763721805128, "loss": 1.0818, "step": 9792 }, { "epoch": 0.94, "grad_norm": 0.2475611229418435, "learning_rate": 0.00011858209347813449, "loss": 0.9974, "step": 9793 }, { "epoch": 0.94, "grad_norm": 0.24368772705519215, "learning_rate": 0.00011856654927318914, "loss": 1.0076, "step": 9794 }, { "epoch": 0.94, "grad_norm": 0.2589222900696365, "learning_rate": 0.0001185510046036042, "loss": 1.0072, "step": 9795 }, { "epoch": 0.94, "grad_norm": 0.2518421468766372, "learning_rate": 0.00011853545946976874, "loss": 1.1606, "step": 9796 }, { "epoch": 0.94, "grad_norm": 0.295292430219631, "learning_rate": 0.00011851991387207171, "loss": 0.98, "step": 9797 }, { "epoch": 0.94, "grad_norm": 0.29245516327817817, "learning_rate": 0.00011850436781090223, "loss": 1.0599, "step": 9798 }, { "epoch": 0.94, "grad_norm": 0.2914565208305866, "learning_rate": 0.00011848882128664933, "loss": 0.9911, "step": 9799 }, { "epoch": 0.94, "grad_norm": 0.3078689389856212, "learning_rate": 0.00011847327429970203, "loss": 0.9396, "step": 9800 }, { "epoch": 0.94, "grad_norm": 0.3127304428982239, "learning_rate": 0.00011845772685044945, "loss": 1.0645, "step": 9801 }, { "epoch": 0.94, "grad_norm": 0.2873883647576085, "learning_rate": 0.00011844217893928064, "loss": 0.9597, "step": 9802 }, { "epoch": 0.94, "grad_norm": 0.29524076369164753, "learning_rate": 0.00011842663056658471, "loss": 1.054, "step": 9803 }, { "epoch": 0.94, "grad_norm": 0.30216797407479606, "learning_rate": 0.00011841108173275078, "loss": 1.0366, "step": 9804 }, { "epoch": 0.94, "grad_norm": 0.2683352824632881, "learning_rate": 0.00011839553243816794, "loss": 1.1457, "step": 9805 }, { "epoch": 0.94, "grad_norm": 0.3088671649045264, "learning_rate": 0.00011837998268322535, "loss": 0.9811, "step": 9806 }, { "epoch": 0.94, "grad_norm": 0.3127798071067639, "learning_rate": 0.00011836443246831215, "loss": 1.0054, "step": 9807 }, { "epoch": 0.94, "grad_norm": 0.2958330910482311, "learning_rate": 0.00011834888179381746, "loss": 0.9717, "step": 9808 }, { "epoch": 0.94, "grad_norm": 0.24632414148720827, "learning_rate": 0.00011833333066013051, "loss": 1.0858, "step": 9809 }, { "epoch": 0.94, "grad_norm": 0.2723921866158821, "learning_rate": 0.00011831777906764044, "loss": 1.0244, "step": 9810 }, { "epoch": 0.94, "grad_norm": 0.2975337869324926, "learning_rate": 0.00011830222701673639, "loss": 1.0093, "step": 9811 }, { "epoch": 0.94, "grad_norm": 0.28650604212889696, "learning_rate": 0.00011828667450780764, "loss": 1.069, "step": 9812 }, { "epoch": 0.94, "grad_norm": 0.27777382822296665, "learning_rate": 0.00011827112154124338, "loss": 1.1207, "step": 9813 }, { "epoch": 0.94, "grad_norm": 0.2870567816741802, "learning_rate": 0.00011825556811743279, "loss": 1.1142, "step": 9814 }, { "epoch": 0.94, "grad_norm": 0.30386027251939157, "learning_rate": 0.00011824001423676513, "loss": 1.1055, "step": 9815 }, { "epoch": 0.94, "grad_norm": 0.2705336512672914, "learning_rate": 0.00011822445989962969, "loss": 1.1277, "step": 9816 }, { "epoch": 0.94, "grad_norm": 0.29969456459847577, "learning_rate": 0.00011820890510641567, "loss": 1.0124, "step": 9817 }, { "epoch": 0.94, "grad_norm": 0.30298340488413317, "learning_rate": 0.00011819334985751233, "loss": 1.0302, "step": 9818 }, { "epoch": 0.94, "grad_norm": 0.27778928676842507, "learning_rate": 0.00011817779415330901, "loss": 1.0885, "step": 9819 }, { "epoch": 0.94, "grad_norm": 0.300820511952422, "learning_rate": 0.00011816223799419497, "loss": 1.0825, "step": 9820 }, { "epoch": 0.94, "grad_norm": 0.3444116599675863, "learning_rate": 0.00011814668138055947, "loss": 1.098, "step": 9821 }, { "epoch": 0.94, "grad_norm": 0.27136608383916344, "learning_rate": 0.0001181311243127919, "loss": 1.1083, "step": 9822 }, { "epoch": 0.94, "grad_norm": 0.29143092481382066, "learning_rate": 0.00011811556679128153, "loss": 1.0637, "step": 9823 }, { "epoch": 0.94, "grad_norm": 0.25973448401962235, "learning_rate": 0.00011810000881641771, "loss": 1.0963, "step": 9824 }, { "epoch": 0.94, "grad_norm": 0.2774775599714097, "learning_rate": 0.00011808445038858982, "loss": 1.0984, "step": 9825 }, { "epoch": 0.94, "grad_norm": 0.263532587397457, "learning_rate": 0.00011806889150818716, "loss": 0.9681, "step": 9826 }, { "epoch": 0.94, "grad_norm": 0.3141288079300534, "learning_rate": 0.00011805333217559918, "loss": 1.0527, "step": 9827 }, { "epoch": 0.94, "grad_norm": 0.31147150343587526, "learning_rate": 0.00011803777239121516, "loss": 1.0198, "step": 9828 }, { "epoch": 0.94, "grad_norm": 0.2732581866158368, "learning_rate": 0.00011802221215542459, "loss": 1.0903, "step": 9829 }, { "epoch": 0.94, "grad_norm": 0.28505766654611703, "learning_rate": 0.00011800665146861683, "loss": 1.0171, "step": 9830 }, { "epoch": 0.94, "grad_norm": 0.28825531298858104, "learning_rate": 0.00011799109033118127, "loss": 1.1042, "step": 9831 }, { "epoch": 0.94, "grad_norm": 0.28681592008334156, "learning_rate": 0.00011797552874350739, "loss": 1.0476, "step": 9832 }, { "epoch": 0.94, "grad_norm": 0.3123668239526817, "learning_rate": 0.00011795996670598462, "loss": 1.0743, "step": 9833 }, { "epoch": 0.94, "grad_norm": 0.2418704079745117, "learning_rate": 0.0001179444042190024, "loss": 1.0915, "step": 9834 }, { "epoch": 0.94, "grad_norm": 0.3233001919313393, "learning_rate": 0.00011792884128295014, "loss": 1.1112, "step": 9835 }, { "epoch": 0.94, "grad_norm": 0.23543282622618317, "learning_rate": 0.0001179132778982174, "loss": 0.9408, "step": 9836 }, { "epoch": 0.94, "grad_norm": 0.27953787864369156, "learning_rate": 0.00011789771406519361, "loss": 1.0951, "step": 9837 }, { "epoch": 0.94, "grad_norm": 0.26888014225579154, "learning_rate": 0.00011788214978426827, "loss": 1.0421, "step": 9838 }, { "epoch": 0.94, "grad_norm": 0.28757566533635587, "learning_rate": 0.0001178665850558309, "loss": 1.1287, "step": 9839 }, { "epoch": 0.94, "grad_norm": 0.3053049659715436, "learning_rate": 0.00011785101988027103, "loss": 1.0709, "step": 9840 }, { "epoch": 0.94, "grad_norm": 0.2693832593847435, "learning_rate": 0.00011783545425797813, "loss": 1.178, "step": 9841 }, { "epoch": 0.94, "grad_norm": 0.29150202296705147, "learning_rate": 0.0001178198881893418, "loss": 1.1276, "step": 9842 }, { "epoch": 0.94, "grad_norm": 0.28026413298185754, "learning_rate": 0.00011780432167475157, "loss": 1.1026, "step": 9843 }, { "epoch": 0.94, "grad_norm": 0.2883237105659363, "learning_rate": 0.00011778875471459703, "loss": 1.029, "step": 9844 }, { "epoch": 0.94, "grad_norm": 0.2882413609766784, "learning_rate": 0.00011777318730926768, "loss": 1.1348, "step": 9845 }, { "epoch": 0.94, "grad_norm": 0.3024147348726725, "learning_rate": 0.00011775761945915315, "loss": 1.1326, "step": 9846 }, { "epoch": 0.94, "grad_norm": 0.27230068738152696, "learning_rate": 0.00011774205116464304, "loss": 1.1274, "step": 9847 }, { "epoch": 0.94, "grad_norm": 0.2671829447086581, "learning_rate": 0.00011772648242612694, "loss": 0.999, "step": 9848 }, { "epoch": 0.94, "grad_norm": 0.27183496557565345, "learning_rate": 0.00011771091324399447, "loss": 0.9818, "step": 9849 }, { "epoch": 0.94, "grad_norm": 0.30565783107867467, "learning_rate": 0.0001176953436186353, "loss": 1.0334, "step": 9850 }, { "epoch": 0.94, "grad_norm": 0.2672952617830373, "learning_rate": 0.00011767977355043902, "loss": 1.014, "step": 9851 }, { "epoch": 0.94, "grad_norm": 0.25977834030458224, "learning_rate": 0.00011766420303979528, "loss": 1.0847, "step": 9852 }, { "epoch": 0.94, "grad_norm": 0.29226617360753704, "learning_rate": 0.00011764863208709378, "loss": 1.1198, "step": 9853 }, { "epoch": 0.94, "grad_norm": 0.2710445438667288, "learning_rate": 0.00011763306069272415, "loss": 1.1502, "step": 9854 }, { "epoch": 0.94, "grad_norm": 0.2683767291753003, "learning_rate": 0.00011761748885707611, "loss": 1.0218, "step": 9855 }, { "epoch": 0.94, "grad_norm": 0.30996151508397074, "learning_rate": 0.00011760191658053933, "loss": 1.0196, "step": 9856 }, { "epoch": 0.94, "grad_norm": 0.2836262926909511, "learning_rate": 0.00011758634386350353, "loss": 0.9859, "step": 9857 }, { "epoch": 0.94, "grad_norm": 0.2835130303902405, "learning_rate": 0.00011757077070635842, "loss": 0.9518, "step": 9858 }, { "epoch": 0.94, "grad_norm": 0.29179997921864015, "learning_rate": 0.00011755519710949375, "loss": 1.0851, "step": 9859 }, { "epoch": 0.94, "grad_norm": 0.2580784420137259, "learning_rate": 0.0001175396230732992, "loss": 1.0792, "step": 9860 }, { "epoch": 0.94, "grad_norm": 0.2980701381253143, "learning_rate": 0.00011752404859816459, "loss": 1.1523, "step": 9861 }, { "epoch": 0.94, "grad_norm": 0.30650598237537346, "learning_rate": 0.00011750847368447963, "loss": 1.0509, "step": 9862 }, { "epoch": 0.94, "grad_norm": 0.2829225311103543, "learning_rate": 0.00011749289833263413, "loss": 1.0698, "step": 9863 }, { "epoch": 0.94, "grad_norm": 0.3150147271464283, "learning_rate": 0.00011747732254301786, "loss": 1.0618, "step": 9864 }, { "epoch": 0.94, "grad_norm": 0.26617589803804353, "learning_rate": 0.00011746174631602059, "loss": 0.9886, "step": 9865 }, { "epoch": 0.94, "grad_norm": 0.28821178558985444, "learning_rate": 0.00011744616965203214, "loss": 0.9826, "step": 9866 }, { "epoch": 0.94, "grad_norm": 0.264781657352801, "learning_rate": 0.00011743059255144233, "loss": 0.8746, "step": 9867 }, { "epoch": 0.94, "grad_norm": 0.29886418377952234, "learning_rate": 0.000117415015014641, "loss": 1.0563, "step": 9868 }, { "epoch": 0.94, "grad_norm": 0.3002368289446441, "learning_rate": 0.00011739943704201796, "loss": 1.0341, "step": 9869 }, { "epoch": 0.94, "grad_norm": 0.28512524718120663, "learning_rate": 0.00011738385863396311, "loss": 0.9591, "step": 9870 }, { "epoch": 0.94, "grad_norm": 0.2900458256329998, "learning_rate": 0.00011736827979086625, "loss": 1.0893, "step": 9871 }, { "epoch": 0.94, "grad_norm": 0.28440813574018564, "learning_rate": 0.00011735270051311724, "loss": 1.052, "step": 9872 }, { "epoch": 0.94, "grad_norm": 0.2469565537916454, "learning_rate": 0.00011733712080110603, "loss": 0.857, "step": 9873 }, { "epoch": 0.94, "grad_norm": 0.2501421742546667, "learning_rate": 0.00011732154065522247, "loss": 1.1517, "step": 9874 }, { "epoch": 0.94, "grad_norm": 0.2943711294105304, "learning_rate": 0.00011730596007585646, "loss": 1.0131, "step": 9875 }, { "epoch": 0.94, "grad_norm": 0.2759019579633036, "learning_rate": 0.00011729037906339795, "loss": 1.0827, "step": 9876 }, { "epoch": 0.94, "grad_norm": 0.2734166501679395, "learning_rate": 0.00011727479761823683, "loss": 1.017, "step": 9877 }, { "epoch": 0.95, "grad_norm": 0.2945759953073775, "learning_rate": 0.00011725921574076305, "loss": 1.048, "step": 9878 }, { "epoch": 0.95, "grad_norm": 0.2847423179857022, "learning_rate": 0.00011724363343136651, "loss": 0.9387, "step": 9879 }, { "epoch": 0.95, "grad_norm": 0.2731998133242859, "learning_rate": 0.00011722805069043724, "loss": 1.1269, "step": 9880 }, { "epoch": 0.95, "grad_norm": 0.31097755473044353, "learning_rate": 0.00011721246751836514, "loss": 1.1183, "step": 9881 }, { "epoch": 0.95, "grad_norm": 0.24077282326706342, "learning_rate": 0.00011719688391554024, "loss": 1.1315, "step": 9882 }, { "epoch": 0.95, "grad_norm": 0.28290762006495423, "learning_rate": 0.00011718129988235251, "loss": 1.0301, "step": 9883 }, { "epoch": 0.95, "grad_norm": 0.28799605220582924, "learning_rate": 0.00011716571541919197, "loss": 1.1275, "step": 9884 }, { "epoch": 0.95, "grad_norm": 0.2506151300260542, "learning_rate": 0.00011715013052644859, "loss": 1.0488, "step": 9885 }, { "epoch": 0.95, "grad_norm": 0.3003785975679057, "learning_rate": 0.00011713454520451243, "loss": 1.1445, "step": 9886 }, { "epoch": 0.95, "grad_norm": 0.2871681991986448, "learning_rate": 0.00011711895945377351, "loss": 1.1341, "step": 9887 }, { "epoch": 0.95, "grad_norm": 0.263469877916956, "learning_rate": 0.00011710337327462186, "loss": 1.0684, "step": 9888 }, { "epoch": 0.95, "grad_norm": 0.25031317343613296, "learning_rate": 0.00011708778666744756, "loss": 1.0692, "step": 9889 }, { "epoch": 0.95, "grad_norm": 0.27978745157205426, "learning_rate": 0.00011707219963264063, "loss": 0.9031, "step": 9890 }, { "epoch": 0.95, "grad_norm": 0.2771417917801048, "learning_rate": 0.00011705661217059121, "loss": 1.0672, "step": 9891 }, { "epoch": 0.95, "grad_norm": 0.27046659772554993, "learning_rate": 0.00011704102428168931, "loss": 1.1261, "step": 9892 }, { "epoch": 0.95, "grad_norm": 0.2627203026681411, "learning_rate": 0.00011702543596632512, "loss": 1.0424, "step": 9893 }, { "epoch": 0.95, "grad_norm": 0.24842638162917163, "learning_rate": 0.00011700984722488865, "loss": 1.0639, "step": 9894 }, { "epoch": 0.95, "grad_norm": 0.297596295779575, "learning_rate": 0.00011699425805777008, "loss": 1.1003, "step": 9895 }, { "epoch": 0.95, "grad_norm": 0.27759923733902636, "learning_rate": 0.00011697866846535953, "loss": 1.1192, "step": 9896 }, { "epoch": 0.95, "grad_norm": 0.30285981266810275, "learning_rate": 0.00011696307844804713, "loss": 1.1918, "step": 9897 }, { "epoch": 0.95, "grad_norm": 0.31450365765502614, "learning_rate": 0.00011694748800622301, "loss": 1.0818, "step": 9898 }, { "epoch": 0.95, "grad_norm": 0.3141057225598429, "learning_rate": 0.00011693189714027737, "loss": 0.9055, "step": 9899 }, { "epoch": 0.95, "grad_norm": 0.2718320654630044, "learning_rate": 0.00011691630585060036, "loss": 1.182, "step": 9900 }, { "epoch": 0.95, "grad_norm": 0.2642413261096164, "learning_rate": 0.00011690071413758217, "loss": 1.0622, "step": 9901 }, { "epoch": 0.95, "grad_norm": 0.2781872909054998, "learning_rate": 0.00011688512200161297, "loss": 1.0917, "step": 9902 }, { "epoch": 0.95, "grad_norm": 0.2933665399467418, "learning_rate": 0.00011686952944308298, "loss": 1.0172, "step": 9903 }, { "epoch": 0.95, "grad_norm": 0.2986798343224596, "learning_rate": 0.00011685393646238243, "loss": 1.0515, "step": 9904 }, { "epoch": 0.95, "grad_norm": 0.27008347335950034, "learning_rate": 0.00011683834305990154, "loss": 1.0519, "step": 9905 }, { "epoch": 0.95, "grad_norm": 0.27146126457125797, "learning_rate": 0.00011682274923603049, "loss": 1.0363, "step": 9906 }, { "epoch": 0.95, "grad_norm": 0.24730678500742276, "learning_rate": 0.00011680715499115959, "loss": 1.0349, "step": 9907 }, { "epoch": 0.95, "grad_norm": 0.274923895199804, "learning_rate": 0.00011679156032567911, "loss": 1.0628, "step": 9908 }, { "epoch": 0.95, "grad_norm": 0.28568564007828456, "learning_rate": 0.00011677596523997922, "loss": 1.0989, "step": 9909 }, { "epoch": 0.95, "grad_norm": 0.26786733880651786, "learning_rate": 0.00011676036973445028, "loss": 0.9306, "step": 9910 }, { "epoch": 0.95, "grad_norm": 0.30754840140433903, "learning_rate": 0.00011674477380948255, "loss": 1.0696, "step": 9911 }, { "epoch": 0.95, "grad_norm": 0.25852789773425133, "learning_rate": 0.00011672917746546634, "loss": 0.8838, "step": 9912 }, { "epoch": 0.95, "grad_norm": 0.2558247705235392, "learning_rate": 0.00011671358070279193, "loss": 0.99, "step": 9913 }, { "epoch": 0.95, "grad_norm": 0.29138329913400457, "learning_rate": 0.00011669798352184968, "loss": 0.9954, "step": 9914 }, { "epoch": 0.95, "grad_norm": 0.30260586220267804, "learning_rate": 0.0001166823859230299, "loss": 1.0312, "step": 9915 }, { "epoch": 0.95, "grad_norm": 0.2827165548781267, "learning_rate": 0.0001166667879067229, "loss": 1.131, "step": 9916 }, { "epoch": 0.95, "grad_norm": 0.29392517958616593, "learning_rate": 0.0001166511894733191, "loss": 1.0896, "step": 9917 }, { "epoch": 0.95, "grad_norm": 0.3260991950351522, "learning_rate": 0.00011663559062320878, "loss": 1.0871, "step": 9918 }, { "epoch": 0.95, "grad_norm": 0.2561723519059954, "learning_rate": 0.00011661999135678237, "loss": 1.1483, "step": 9919 }, { "epoch": 0.95, "grad_norm": 0.309722846552505, "learning_rate": 0.00011660439167443022, "loss": 1.0889, "step": 9920 }, { "epoch": 0.95, "grad_norm": 0.25190002272893053, "learning_rate": 0.00011658879157654276, "loss": 1.0626, "step": 9921 }, { "epoch": 0.95, "grad_norm": 0.3259479008437906, "learning_rate": 0.00011657319106351035, "loss": 1.0441, "step": 9922 }, { "epoch": 0.95, "grad_norm": 0.26095427143336863, "learning_rate": 0.0001165575901357234, "loss": 1.108, "step": 9923 }, { "epoch": 0.95, "grad_norm": 0.3334009293399356, "learning_rate": 0.00011654198879357236, "loss": 0.9941, "step": 9924 }, { "epoch": 0.95, "grad_norm": 0.2479075214464415, "learning_rate": 0.00011652638703744769, "loss": 0.9997, "step": 9925 }, { "epoch": 0.95, "grad_norm": 0.2699529303379684, "learning_rate": 0.00011651078486773974, "loss": 1.0008, "step": 9926 }, { "epoch": 0.95, "grad_norm": 0.2793875562565832, "learning_rate": 0.00011649518228483907, "loss": 1.1141, "step": 9927 }, { "epoch": 0.95, "grad_norm": 0.30640720124070897, "learning_rate": 0.00011647957928913606, "loss": 1.088, "step": 9928 }, { "epoch": 0.95, "grad_norm": 0.29450119862900453, "learning_rate": 0.00011646397588102123, "loss": 1.0208, "step": 9929 }, { "epoch": 0.95, "grad_norm": 0.2986882197988017, "learning_rate": 0.00011644837206088508, "loss": 1.1405, "step": 9930 }, { "epoch": 0.95, "grad_norm": 0.28289114316471997, "learning_rate": 0.00011643276782911805, "loss": 1.0624, "step": 9931 }, { "epoch": 0.95, "grad_norm": 0.25290685088339804, "learning_rate": 0.0001164171631861107, "loss": 0.9819, "step": 9932 }, { "epoch": 0.95, "grad_norm": 0.29613853648276806, "learning_rate": 0.00011640155813225348, "loss": 0.9768, "step": 9933 }, { "epoch": 0.95, "grad_norm": 0.30470164680746314, "learning_rate": 0.00011638595266793701, "loss": 1.0121, "step": 9934 }, { "epoch": 0.95, "grad_norm": 0.26617140467629113, "learning_rate": 0.00011637034679355176, "loss": 1.0911, "step": 9935 }, { "epoch": 0.95, "grad_norm": 0.29756347865505667, "learning_rate": 0.00011635474050948829, "loss": 1.0299, "step": 9936 }, { "epoch": 0.95, "grad_norm": 0.2496000722355499, "learning_rate": 0.00011633913381613717, "loss": 0.9958, "step": 9937 }, { "epoch": 0.95, "grad_norm": 0.28395690278817887, "learning_rate": 0.00011632352671388898, "loss": 1.0131, "step": 9938 }, { "epoch": 0.95, "grad_norm": 0.2922841640955166, "learning_rate": 0.00011630791920313425, "loss": 1.0615, "step": 9939 }, { "epoch": 0.95, "grad_norm": 0.3033319129166104, "learning_rate": 0.00011629231128426356, "loss": 1.1185, "step": 9940 }, { "epoch": 0.95, "grad_norm": 0.2696093697556044, "learning_rate": 0.00011627670295766759, "loss": 0.9108, "step": 9941 }, { "epoch": 0.95, "grad_norm": 0.275903786765533, "learning_rate": 0.00011626109422373688, "loss": 0.9584, "step": 9942 }, { "epoch": 0.95, "grad_norm": 0.2593587234240088, "learning_rate": 0.00011624548508286206, "loss": 1.0215, "step": 9943 }, { "epoch": 0.95, "grad_norm": 0.25466792159457646, "learning_rate": 0.00011622987553543376, "loss": 1.0309, "step": 9944 }, { "epoch": 0.95, "grad_norm": 0.28160232777966615, "learning_rate": 0.00011621426558184265, "loss": 1.036, "step": 9945 }, { "epoch": 0.95, "grad_norm": 0.2665942842206432, "learning_rate": 0.00011619865522247933, "loss": 1.0767, "step": 9946 }, { "epoch": 0.95, "grad_norm": 0.29079511088912224, "learning_rate": 0.00011618304445773451, "loss": 1.0276, "step": 9947 }, { "epoch": 0.95, "grad_norm": 0.2803873259698023, "learning_rate": 0.00011616743328799881, "loss": 1.0198, "step": 9948 }, { "epoch": 0.95, "grad_norm": 0.2555819038764869, "learning_rate": 0.00011615182171366297, "loss": 1.0222, "step": 9949 }, { "epoch": 0.95, "grad_norm": 0.31734739800334083, "learning_rate": 0.00011613620973511758, "loss": 1.1547, "step": 9950 }, { "epoch": 0.95, "grad_norm": 0.28917459887517855, "learning_rate": 0.00011612059735275342, "loss": 1.1102, "step": 9951 }, { "epoch": 0.95, "grad_norm": 0.24841835752111638, "learning_rate": 0.00011610498456696119, "loss": 1.1191, "step": 9952 }, { "epoch": 0.95, "grad_norm": 0.29654024864058454, "learning_rate": 0.00011608937137813161, "loss": 0.9583, "step": 9953 }, { "epoch": 0.95, "grad_norm": 0.3147918475594126, "learning_rate": 0.00011607375778665536, "loss": 1.1377, "step": 9954 }, { "epoch": 0.95, "grad_norm": 0.2772820949372226, "learning_rate": 0.00011605814379292325, "loss": 1.0474, "step": 9955 }, { "epoch": 0.95, "grad_norm": 0.30109169932300395, "learning_rate": 0.00011604252939732601, "loss": 1.0939, "step": 9956 }, { "epoch": 0.95, "grad_norm": 0.2868717018893174, "learning_rate": 0.00011602691460025437, "loss": 0.9908, "step": 9957 }, { "epoch": 0.95, "grad_norm": 0.2675493227671909, "learning_rate": 0.00011601129940209911, "loss": 1.0693, "step": 9958 }, { "epoch": 0.95, "grad_norm": 0.31375505859098296, "learning_rate": 0.00011599568380325106, "loss": 1.1187, "step": 9959 }, { "epoch": 0.95, "grad_norm": 0.295148232115398, "learning_rate": 0.00011598006780410091, "loss": 1.0427, "step": 9960 }, { "epoch": 0.95, "grad_norm": 0.28936388619727343, "learning_rate": 0.00011596445140503957, "loss": 0.9698, "step": 9961 }, { "epoch": 0.95, "grad_norm": 0.28216120171352943, "learning_rate": 0.0001159488346064578, "loss": 1.0421, "step": 9962 }, { "epoch": 0.95, "grad_norm": 0.28268688737805364, "learning_rate": 0.00011593321740874639, "loss": 1.0307, "step": 9963 }, { "epoch": 0.95, "grad_norm": 0.27612823655418073, "learning_rate": 0.00011591759981229622, "loss": 0.9663, "step": 9964 }, { "epoch": 0.95, "grad_norm": 0.2740428689355272, "learning_rate": 0.00011590198181749811, "loss": 1.0961, "step": 9965 }, { "epoch": 0.95, "grad_norm": 0.2499724504263894, "learning_rate": 0.0001158863634247429, "loss": 1.0651, "step": 9966 }, { "epoch": 0.95, "grad_norm": 0.2757709129720417, "learning_rate": 0.00011587074463442147, "loss": 1.0643, "step": 9967 }, { "epoch": 0.95, "grad_norm": 0.2985207172650015, "learning_rate": 0.00011585512544692467, "loss": 1.1078, "step": 9968 }, { "epoch": 0.95, "grad_norm": 0.3045737397007342, "learning_rate": 0.00011583950586264343, "loss": 1.1594, "step": 9969 }, { "epoch": 0.95, "grad_norm": 0.27637988509985273, "learning_rate": 0.00011582388588196855, "loss": 1.0231, "step": 9970 }, { "epoch": 0.95, "grad_norm": 0.2821414694181937, "learning_rate": 0.000115808265505291, "loss": 1.1147, "step": 9971 }, { "epoch": 0.95, "grad_norm": 0.29155806162009856, "learning_rate": 0.00011579264473300167, "loss": 1.0431, "step": 9972 }, { "epoch": 0.95, "grad_norm": 0.27636257246936596, "learning_rate": 0.00011577702356549149, "loss": 1.0565, "step": 9973 }, { "epoch": 0.95, "grad_norm": 0.31102867461907796, "learning_rate": 0.00011576140200315135, "loss": 1.1162, "step": 9974 }, { "epoch": 0.95, "grad_norm": 0.28686875083828667, "learning_rate": 0.00011574578004637226, "loss": 0.9938, "step": 9975 }, { "epoch": 0.95, "grad_norm": 0.2829922774553789, "learning_rate": 0.00011573015769554512, "loss": 1.043, "step": 9976 }, { "epoch": 0.95, "grad_norm": 0.30378521566991484, "learning_rate": 0.00011571453495106086, "loss": 1.125, "step": 9977 }, { "epoch": 0.95, "grad_norm": 0.2882399502995974, "learning_rate": 0.00011569891181331054, "loss": 1.0316, "step": 9978 }, { "epoch": 0.95, "grad_norm": 0.2546929646781789, "learning_rate": 0.00011568328828268506, "loss": 1.0347, "step": 9979 }, { "epoch": 0.95, "grad_norm": 0.27885757954603946, "learning_rate": 0.00011566766435957541, "loss": 1.0919, "step": 9980 }, { "epoch": 0.95, "grad_norm": 0.26559169992251663, "learning_rate": 0.00011565204004437267, "loss": 1.0969, "step": 9981 }, { "epoch": 0.95, "grad_norm": 0.33065813751668216, "learning_rate": 0.00011563641533746774, "loss": 1.0411, "step": 9982 }, { "epoch": 0.96, "grad_norm": 0.33089614226385505, "learning_rate": 0.00011562079023925172, "loss": 1.0129, "step": 9983 }, { "epoch": 0.96, "grad_norm": 0.29760832729104647, "learning_rate": 0.00011560516475011558, "loss": 1.0865, "step": 9984 }, { "epoch": 0.96, "grad_norm": 0.2982860380062189, "learning_rate": 0.00011558953887045041, "loss": 1.0879, "step": 9985 }, { "epoch": 0.96, "grad_norm": 0.3089962049085526, "learning_rate": 0.00011557391260064723, "loss": 1.062, "step": 9986 }, { "epoch": 0.96, "grad_norm": 0.2907036025220388, "learning_rate": 0.00011555828594109707, "loss": 1.0863, "step": 9987 }, { "epoch": 0.96, "grad_norm": 0.31013815730431993, "learning_rate": 0.00011554265889219106, "loss": 1.0049, "step": 9988 }, { "epoch": 0.96, "grad_norm": 0.29883777425014585, "learning_rate": 0.00011552703145432025, "loss": 1.0613, "step": 9989 }, { "epoch": 0.96, "grad_norm": 0.29975907234168436, "learning_rate": 0.0001155114036278757, "loss": 1.0005, "step": 9990 }, { "epoch": 0.96, "grad_norm": 0.30684771167447394, "learning_rate": 0.0001154957754132485, "loss": 1.1158, "step": 9991 }, { "epoch": 0.96, "grad_norm": 0.26335079161415853, "learning_rate": 0.00011548014681082981, "loss": 1.021, "step": 9992 }, { "epoch": 0.96, "grad_norm": 0.27648270125908575, "learning_rate": 0.00011546451782101071, "loss": 1.1223, "step": 9993 }, { "epoch": 0.96, "grad_norm": 0.3475752040212275, "learning_rate": 0.00011544888844418233, "loss": 1.1233, "step": 9994 }, { "epoch": 0.96, "grad_norm": 0.30111765274598085, "learning_rate": 0.0001154332586807358, "loss": 1.0963, "step": 9995 }, { "epoch": 0.96, "grad_norm": 0.2970605582481724, "learning_rate": 0.0001154176285310623, "loss": 0.99, "step": 9996 }, { "epoch": 0.96, "grad_norm": 0.305235597238056, "learning_rate": 0.00011540199799555294, "loss": 1.132, "step": 9997 }, { "epoch": 0.96, "grad_norm": 0.2670713296870493, "learning_rate": 0.00011538636707459889, "loss": 1.0476, "step": 9998 }, { "epoch": 0.96, "grad_norm": 0.2677367396231706, "learning_rate": 0.00011537073576859136, "loss": 1.069, "step": 9999 }, { "epoch": 0.96, "grad_norm": 0.2525746570305202, "learning_rate": 0.00011535510407792149, "loss": 0.9385, "step": 10000 }, { "epoch": 0.96, "grad_norm": 0.2758414503350628, "learning_rate": 0.0001153394720029805, "loss": 1.0631, "step": 10001 }, { "epoch": 0.96, "grad_norm": 0.3047038095467878, "learning_rate": 0.00011532383954415957, "loss": 1.0311, "step": 10002 }, { "epoch": 0.96, "grad_norm": 0.28111478399862794, "learning_rate": 0.00011530820670184995, "loss": 1.0573, "step": 10003 }, { "epoch": 0.96, "grad_norm": 0.28036170451809095, "learning_rate": 0.0001152925734764428, "loss": 1.1742, "step": 10004 }, { "epoch": 0.96, "grad_norm": 0.2963156258676252, "learning_rate": 0.00011527693986832942, "loss": 1.016, "step": 10005 }, { "epoch": 0.96, "grad_norm": 0.2815524009206448, "learning_rate": 0.000115261305877901, "loss": 1.0306, "step": 10006 }, { "epoch": 0.96, "grad_norm": 0.29474398852141115, "learning_rate": 0.00011524567150554881, "loss": 1.0829, "step": 10007 }, { "epoch": 0.96, "grad_norm": 0.29358155011217263, "learning_rate": 0.00011523003675166411, "loss": 1.0714, "step": 10008 }, { "epoch": 0.96, "grad_norm": 0.2844867269668011, "learning_rate": 0.00011521440161663819, "loss": 1.0348, "step": 10009 }, { "epoch": 0.96, "grad_norm": 0.255280580277318, "learning_rate": 0.00011519876610086229, "loss": 1.1012, "step": 10010 }, { "epoch": 0.96, "grad_norm": 0.31413925882198807, "learning_rate": 0.00011518313020472768, "loss": 1.0744, "step": 10011 }, { "epoch": 0.96, "grad_norm": 0.31994016396189484, "learning_rate": 0.00011516749392862576, "loss": 0.9883, "step": 10012 }, { "epoch": 0.96, "grad_norm": 0.27390552772942023, "learning_rate": 0.00011515185727294771, "loss": 0.9983, "step": 10013 }, { "epoch": 0.96, "grad_norm": 0.2853534170678965, "learning_rate": 0.00011513622023808495, "loss": 1.0904, "step": 10014 }, { "epoch": 0.96, "grad_norm": 0.25465697705481827, "learning_rate": 0.00011512058282442874, "loss": 0.922, "step": 10015 }, { "epoch": 0.96, "grad_norm": 0.26802643575831, "learning_rate": 0.00011510494503237046, "loss": 1.0313, "step": 10016 }, { "epoch": 0.96, "grad_norm": 0.3193729552335535, "learning_rate": 0.00011508930686230146, "loss": 0.9854, "step": 10017 }, { "epoch": 0.96, "grad_norm": 0.2879367749732901, "learning_rate": 0.00011507366831461302, "loss": 1.1051, "step": 10018 }, { "epoch": 0.96, "grad_norm": 0.30097969407853326, "learning_rate": 0.0001150580293896966, "loss": 1.0425, "step": 10019 }, { "epoch": 0.96, "grad_norm": 0.30624544517915264, "learning_rate": 0.0001150423900879435, "loss": 1.0311, "step": 10020 }, { "epoch": 0.96, "grad_norm": 0.2767941099814116, "learning_rate": 0.00011502675040974516, "loss": 0.9427, "step": 10021 }, { "epoch": 0.96, "grad_norm": 0.28490801229190077, "learning_rate": 0.00011501111035549295, "loss": 1.1947, "step": 10022 }, { "epoch": 0.96, "grad_norm": 0.2784277522206461, "learning_rate": 0.00011499546992557826, "loss": 0.9624, "step": 10023 }, { "epoch": 0.96, "grad_norm": 0.2666766109775799, "learning_rate": 0.00011497982912039249, "loss": 0.9757, "step": 10024 }, { "epoch": 0.96, "grad_norm": 0.32891260560151236, "learning_rate": 0.00011496418794032711, "loss": 1.1256, "step": 10025 }, { "epoch": 0.96, "grad_norm": 0.3086769971151652, "learning_rate": 0.0001149485463857735, "loss": 0.9429, "step": 10026 }, { "epoch": 0.96, "grad_norm": 0.25897740279910364, "learning_rate": 0.00011493290445712315, "loss": 0.8941, "step": 10027 }, { "epoch": 0.96, "grad_norm": 0.27367523231686985, "learning_rate": 0.00011491726215476746, "loss": 0.961, "step": 10028 }, { "epoch": 0.96, "grad_norm": 0.2902717009917645, "learning_rate": 0.0001149016194790979, "loss": 1.0828, "step": 10029 }, { "epoch": 0.96, "grad_norm": 0.2767681614076228, "learning_rate": 0.00011488597643050598, "loss": 1.1453, "step": 10030 }, { "epoch": 0.96, "grad_norm": 0.2857144403267616, "learning_rate": 0.0001148703330093831, "loss": 1.1737, "step": 10031 }, { "epoch": 0.96, "grad_norm": 0.28150370452068746, "learning_rate": 0.00011485468921612084, "loss": 1.1734, "step": 10032 }, { "epoch": 0.96, "grad_norm": 0.3290209532225155, "learning_rate": 0.00011483904505111063, "loss": 1.1331, "step": 10033 }, { "epoch": 0.96, "grad_norm": 0.3138396953516879, "learning_rate": 0.00011482340051474396, "loss": 1.0148, "step": 10034 }, { "epoch": 0.96, "grad_norm": 0.30110096644908, "learning_rate": 0.00011480775560741239, "loss": 1.0134, "step": 10035 }, { "epoch": 0.96, "grad_norm": 0.3203655127160484, "learning_rate": 0.00011479211032950743, "loss": 1.0475, "step": 10036 }, { "epoch": 0.96, "grad_norm": 0.2772142324547969, "learning_rate": 0.00011477646468142062, "loss": 1.0685, "step": 10037 }, { "epoch": 0.96, "grad_norm": 0.2917042525821615, "learning_rate": 0.0001147608186635435, "loss": 1.1421, "step": 10038 }, { "epoch": 0.96, "grad_norm": 0.26698256397886955, "learning_rate": 0.00011474517227626762, "loss": 1.1066, "step": 10039 }, { "epoch": 0.96, "grad_norm": 0.2675194389510324, "learning_rate": 0.00011472952551998452, "loss": 0.9672, "step": 10040 }, { "epoch": 0.96, "grad_norm": 0.2925787442554594, "learning_rate": 0.0001147138783950858, "loss": 1.0619, "step": 10041 }, { "epoch": 0.96, "grad_norm": 0.2925652633706256, "learning_rate": 0.00011469823090196303, "loss": 1.067, "step": 10042 }, { "epoch": 0.96, "grad_norm": 0.26146199790712404, "learning_rate": 0.00011468258304100779, "loss": 1.0669, "step": 10043 }, { "epoch": 0.96, "grad_norm": 0.29387442287441945, "learning_rate": 0.00011466693481261168, "loss": 1.1529, "step": 10044 }, { "epoch": 0.96, "grad_norm": 0.30566339364004746, "learning_rate": 0.0001146512862171663, "loss": 1.0711, "step": 10045 }, { "epoch": 0.96, "grad_norm": 0.2623345882172563, "learning_rate": 0.00011463563725506328, "loss": 1.1218, "step": 10046 }, { "epoch": 0.96, "grad_norm": 0.3090594432747929, "learning_rate": 0.00011461998792669426, "loss": 1.0833, "step": 10047 }, { "epoch": 0.96, "grad_norm": 0.3168446002514091, "learning_rate": 0.0001146043382324508, "loss": 1.0041, "step": 10048 }, { "epoch": 0.96, "grad_norm": 0.26118984316659555, "learning_rate": 0.00011458868817272465, "loss": 1.075, "step": 10049 }, { "epoch": 0.96, "grad_norm": 0.3084194238337441, "learning_rate": 0.0001145730377479074, "loss": 0.9865, "step": 10050 }, { "epoch": 0.96, "grad_norm": 0.2916812390686762, "learning_rate": 0.00011455738695839071, "loss": 1.1019, "step": 10051 }, { "epoch": 0.96, "grad_norm": 0.2853685064498807, "learning_rate": 0.00011454173580456627, "loss": 1.1053, "step": 10052 }, { "epoch": 0.96, "grad_norm": 0.28381729559022867, "learning_rate": 0.00011452608428682574, "loss": 1.0872, "step": 10053 }, { "epoch": 0.96, "grad_norm": 0.35555285136726095, "learning_rate": 0.0001145104324055608, "loss": 1.0321, "step": 10054 }, { "epoch": 0.96, "grad_norm": 0.29725262158547283, "learning_rate": 0.00011449478016116322, "loss": 1.0607, "step": 10055 }, { "epoch": 0.96, "grad_norm": 0.32988647361187395, "learning_rate": 0.00011447912755402463, "loss": 1.0194, "step": 10056 }, { "epoch": 0.96, "grad_norm": 0.2986055841309832, "learning_rate": 0.00011446347458453677, "loss": 1.0673, "step": 10057 }, { "epoch": 0.96, "grad_norm": 0.2696942404700563, "learning_rate": 0.00011444782125309137, "loss": 1.0442, "step": 10058 }, { "epoch": 0.96, "grad_norm": 0.3064172789572965, "learning_rate": 0.00011443216756008017, "loss": 0.9705, "step": 10059 }, { "epoch": 0.96, "grad_norm": 0.29986340116379595, "learning_rate": 0.00011441651350589493, "loss": 1.042, "step": 10060 }, { "epoch": 0.96, "grad_norm": 0.29196996307249695, "learning_rate": 0.00011440085909092735, "loss": 1.1514, "step": 10061 }, { "epoch": 0.96, "grad_norm": 0.26305531388271913, "learning_rate": 0.00011438520431556923, "loss": 1.0125, "step": 10062 }, { "epoch": 0.96, "grad_norm": 0.28626327759234815, "learning_rate": 0.00011436954918021232, "loss": 1.1407, "step": 10063 }, { "epoch": 0.96, "grad_norm": 0.27422258071461836, "learning_rate": 0.00011435389368524842, "loss": 1.055, "step": 10064 }, { "epoch": 0.96, "grad_norm": 0.29139299897756993, "learning_rate": 0.0001143382378310693, "loss": 1.015, "step": 10065 }, { "epoch": 0.96, "grad_norm": 0.2590303991609533, "learning_rate": 0.0001143225816180668, "loss": 0.9926, "step": 10066 }, { "epoch": 0.96, "grad_norm": 0.28910079655703913, "learning_rate": 0.00011430692504663265, "loss": 0.9727, "step": 10067 }, { "epoch": 0.96, "grad_norm": 0.3312654664415843, "learning_rate": 0.00011429126811715872, "loss": 1.101, "step": 10068 }, { "epoch": 0.96, "grad_norm": 0.30000869026615357, "learning_rate": 0.00011427561083003683, "loss": 1.0738, "step": 10069 }, { "epoch": 0.96, "grad_norm": 0.28159858071588617, "learning_rate": 0.00011425995318565883, "loss": 1.0995, "step": 10070 }, { "epoch": 0.96, "grad_norm": 0.3123705444016819, "learning_rate": 0.00011424429518441653, "loss": 0.9907, "step": 10071 }, { "epoch": 0.96, "grad_norm": 0.25560829274875024, "learning_rate": 0.00011422863682670176, "loss": 1.0387, "step": 10072 }, { "epoch": 0.96, "grad_norm": 0.3047604879614061, "learning_rate": 0.00011421297811290643, "loss": 1.0803, "step": 10073 }, { "epoch": 0.96, "grad_norm": 0.27464921456414265, "learning_rate": 0.0001141973190434224, "loss": 1.1457, "step": 10074 }, { "epoch": 0.96, "grad_norm": 0.3015661166736289, "learning_rate": 0.00011418165961864151, "loss": 0.9435, "step": 10075 }, { "epoch": 0.96, "grad_norm": 0.3288482467287445, "learning_rate": 0.0001141659998389557, "loss": 1.0268, "step": 10076 }, { "epoch": 0.96, "grad_norm": 0.2880879802413768, "learning_rate": 0.00011415033970475682, "loss": 1.1155, "step": 10077 }, { "epoch": 0.96, "grad_norm": 0.3046710592105044, "learning_rate": 0.00011413467921643681, "loss": 1.0854, "step": 10078 }, { "epoch": 0.96, "grad_norm": 0.23189332771051346, "learning_rate": 0.00011411901837438757, "loss": 0.8959, "step": 10079 }, { "epoch": 0.96, "grad_norm": 0.2860860545736034, "learning_rate": 0.00011410335717900102, "loss": 1.004, "step": 10080 }, { "epoch": 0.96, "grad_norm": 0.29819011560467884, "learning_rate": 0.0001140876956306691, "loss": 1.0271, "step": 10081 }, { "epoch": 0.96, "grad_norm": 0.2961548882133075, "learning_rate": 0.00011407203372978372, "loss": 1.0964, "step": 10082 }, { "epoch": 0.96, "grad_norm": 0.27643172524881576, "learning_rate": 0.00011405637147673688, "loss": 1.0848, "step": 10083 }, { "epoch": 0.96, "grad_norm": 0.26601937387043634, "learning_rate": 0.00011404070887192051, "loss": 1.0771, "step": 10084 }, { "epoch": 0.96, "grad_norm": 0.29423767837815973, "learning_rate": 0.00011402504591572656, "loss": 1.1087, "step": 10085 }, { "epoch": 0.96, "grad_norm": 0.31957243246751704, "learning_rate": 0.00011400938260854703, "loss": 1.1154, "step": 10086 }, { "epoch": 0.97, "grad_norm": 0.34871076842626053, "learning_rate": 0.00011399371895077389, "loss": 1.0691, "step": 10087 }, { "epoch": 0.97, "grad_norm": 0.30378790346074774, "learning_rate": 0.00011397805494279916, "loss": 1.096, "step": 10088 }, { "epoch": 0.97, "grad_norm": 0.25772589878682645, "learning_rate": 0.00011396239058501476, "loss": 1.0342, "step": 10089 }, { "epoch": 0.97, "grad_norm": 0.2823080995120186, "learning_rate": 0.00011394672587781284, "loss": 1.1017, "step": 10090 }, { "epoch": 0.97, "grad_norm": 0.312629120636968, "learning_rate": 0.0001139310608215853, "loss": 1.0579, "step": 10091 }, { "epoch": 0.97, "grad_norm": 0.3054424461560633, "learning_rate": 0.00011391539541672418, "loss": 1.0553, "step": 10092 }, { "epoch": 0.97, "grad_norm": 0.28281060096224714, "learning_rate": 0.00011389972966362159, "loss": 1.0612, "step": 10093 }, { "epoch": 0.97, "grad_norm": 0.27166297516714194, "learning_rate": 0.00011388406356266951, "loss": 1.0524, "step": 10094 }, { "epoch": 0.97, "grad_norm": 0.31743027731547635, "learning_rate": 0.00011386839711426003, "loss": 1.0024, "step": 10095 }, { "epoch": 0.97, "grad_norm": 0.32534625159202174, "learning_rate": 0.00011385273031878516, "loss": 1.0885, "step": 10096 }, { "epoch": 0.97, "grad_norm": 0.30049574798069184, "learning_rate": 0.00011383706317663705, "loss": 1.0135, "step": 10097 }, { "epoch": 0.97, "grad_norm": 0.3266379229812977, "learning_rate": 0.00011382139568820771, "loss": 1.0434, "step": 10098 }, { "epoch": 0.97, "grad_norm": 0.3039958500677989, "learning_rate": 0.00011380572785388923, "loss": 1.1338, "step": 10099 }, { "epoch": 0.97, "grad_norm": 0.2820873927102382, "learning_rate": 0.0001137900596740738, "loss": 0.9901, "step": 10100 }, { "epoch": 0.97, "grad_norm": 0.25895906852181655, "learning_rate": 0.00011377439114915343, "loss": 1.0197, "step": 10101 }, { "epoch": 0.97, "grad_norm": 0.2736675525144609, "learning_rate": 0.00011375872227952024, "loss": 1.0666, "step": 10102 }, { "epoch": 0.97, "grad_norm": 0.27812994654877704, "learning_rate": 0.00011374305306556641, "loss": 0.9667, "step": 10103 }, { "epoch": 0.97, "grad_norm": 0.24944483327099637, "learning_rate": 0.00011372738350768404, "loss": 1.1377, "step": 10104 }, { "epoch": 0.97, "grad_norm": 0.3052298238193706, "learning_rate": 0.00011371171360626528, "loss": 1.0432, "step": 10105 }, { "epoch": 0.97, "grad_norm": 0.27306031709563633, "learning_rate": 0.00011369604336170221, "loss": 1.0682, "step": 10106 }, { "epoch": 0.97, "grad_norm": 0.3167325272734246, "learning_rate": 0.0001136803727743871, "loss": 1.1381, "step": 10107 }, { "epoch": 0.97, "grad_norm": 0.3310696902768376, "learning_rate": 0.00011366470184471206, "loss": 0.9888, "step": 10108 }, { "epoch": 0.97, "grad_norm": 0.2920470605038157, "learning_rate": 0.00011364903057306923, "loss": 1.0723, "step": 10109 }, { "epoch": 0.97, "grad_norm": 0.28721162596130884, "learning_rate": 0.00011363335895985087, "loss": 1.0795, "step": 10110 }, { "epoch": 0.97, "grad_norm": 0.3123155431147055, "learning_rate": 0.00011361768700544915, "loss": 1.0195, "step": 10111 }, { "epoch": 0.97, "grad_norm": 0.31432924829965664, "learning_rate": 0.00011360201471025625, "loss": 1.0262, "step": 10112 }, { "epoch": 0.97, "grad_norm": 0.30371743385151373, "learning_rate": 0.00011358634207466434, "loss": 1.1198, "step": 10113 }, { "epoch": 0.97, "grad_norm": 0.30399866249564966, "learning_rate": 0.0001135706690990657, "loss": 1.0732, "step": 10114 }, { "epoch": 0.97, "grad_norm": 0.32184020587696033, "learning_rate": 0.00011355499578385256, "loss": 1.1721, "step": 10115 }, { "epoch": 0.97, "grad_norm": 0.30947844684370757, "learning_rate": 0.00011353932212941709, "loss": 0.9335, "step": 10116 }, { "epoch": 0.97, "grad_norm": 0.30689326685860674, "learning_rate": 0.00011352364813615159, "loss": 1.154, "step": 10117 }, { "epoch": 0.97, "grad_norm": 0.35179570677545263, "learning_rate": 0.0001135079738044483, "loss": 1.1623, "step": 10118 }, { "epoch": 0.97, "grad_norm": 0.2895264616854555, "learning_rate": 0.00011349229913469948, "loss": 1.1446, "step": 10119 }, { "epoch": 0.97, "grad_norm": 0.31453418960821145, "learning_rate": 0.00011347662412729738, "loss": 1.1001, "step": 10120 }, { "epoch": 0.97, "grad_norm": 0.2761763344790832, "learning_rate": 0.00011346094878263431, "loss": 0.9588, "step": 10121 }, { "epoch": 0.97, "grad_norm": 0.30740807652927366, "learning_rate": 0.00011344527310110256, "loss": 1.0145, "step": 10122 }, { "epoch": 0.97, "grad_norm": 0.2852359412921041, "learning_rate": 0.00011342959708309435, "loss": 0.9789, "step": 10123 }, { "epoch": 0.97, "grad_norm": 0.2870738764698151, "learning_rate": 0.00011341392072900205, "loss": 1.0349, "step": 10124 }, { "epoch": 0.97, "grad_norm": 0.3079179973519736, "learning_rate": 0.00011339824403921797, "loss": 1.0653, "step": 10125 }, { "epoch": 0.97, "grad_norm": 0.256710403425938, "learning_rate": 0.0001133825670141344, "loss": 0.9875, "step": 10126 }, { "epoch": 0.97, "grad_norm": 0.3171962319601306, "learning_rate": 0.00011336688965414369, "loss": 1.1135, "step": 10127 }, { "epoch": 0.97, "grad_norm": 0.2964396038679801, "learning_rate": 0.00011335121195963813, "loss": 1.0385, "step": 10128 }, { "epoch": 0.97, "grad_norm": 0.2750463815164073, "learning_rate": 0.00011333553393101013, "loss": 0.9907, "step": 10129 }, { "epoch": 0.97, "grad_norm": 0.3245742501930826, "learning_rate": 0.00011331985556865201, "loss": 1.0089, "step": 10130 }, { "epoch": 0.97, "grad_norm": 0.2976235078728115, "learning_rate": 0.00011330417687295614, "loss": 1.0739, "step": 10131 }, { "epoch": 0.97, "grad_norm": 0.30953794415640545, "learning_rate": 0.00011328849784431488, "loss": 1.1504, "step": 10132 }, { "epoch": 0.97, "grad_norm": 0.2779345999485772, "learning_rate": 0.00011327281848312059, "loss": 1.1356, "step": 10133 }, { "epoch": 0.97, "grad_norm": 0.2802541009558856, "learning_rate": 0.0001132571387897657, "loss": 1.0917, "step": 10134 }, { "epoch": 0.97, "grad_norm": 0.27849384342581196, "learning_rate": 0.00011324145876464259, "loss": 1.0923, "step": 10135 }, { "epoch": 0.97, "grad_norm": 0.3318350576277318, "learning_rate": 0.00011322577840814361, "loss": 0.9506, "step": 10136 }, { "epoch": 0.97, "grad_norm": 0.3459815664705523, "learning_rate": 0.00011321009772066124, "loss": 1.0129, "step": 10137 }, { "epoch": 0.97, "grad_norm": 0.3037533943998662, "learning_rate": 0.00011319441670258788, "loss": 1.0418, "step": 10138 }, { "epoch": 0.97, "grad_norm": 0.2520746414367993, "learning_rate": 0.00011317873535431591, "loss": 1.0216, "step": 10139 }, { "epoch": 0.97, "grad_norm": 0.2661292266112255, "learning_rate": 0.00011316305367623785, "loss": 1.1706, "step": 10140 }, { "epoch": 0.97, "grad_norm": 0.3063821264146261, "learning_rate": 0.00011314737166874607, "loss": 1.0417, "step": 10141 }, { "epoch": 0.97, "grad_norm": 0.26323311620303635, "learning_rate": 0.00011313168933223306, "loss": 1.0204, "step": 10142 }, { "epoch": 0.97, "grad_norm": 0.27238653921733935, "learning_rate": 0.00011311600666709126, "loss": 1.0871, "step": 10143 }, { "epoch": 0.97, "grad_norm": 0.3013746324023429, "learning_rate": 0.00011310032367371317, "loss": 1.0356, "step": 10144 }, { "epoch": 0.97, "grad_norm": 0.2574291995601369, "learning_rate": 0.00011308464035249125, "loss": 1.0812, "step": 10145 }, { "epoch": 0.97, "grad_norm": 0.2869306171965447, "learning_rate": 0.00011306895670381797, "loss": 1.0173, "step": 10146 }, { "epoch": 0.97, "grad_norm": 0.23922368061607324, "learning_rate": 0.00011305327272808583, "loss": 1.0289, "step": 10147 }, { "epoch": 0.97, "grad_norm": 0.25159221628276673, "learning_rate": 0.00011303758842568735, "loss": 0.999, "step": 10148 }, { "epoch": 0.97, "grad_norm": 0.2868191888678923, "learning_rate": 0.00011302190379701503, "loss": 1.0263, "step": 10149 }, { "epoch": 0.97, "grad_norm": 0.361397853429607, "learning_rate": 0.00011300621884246136, "loss": 1.1683, "step": 10150 }, { "epoch": 0.97, "grad_norm": 0.2826280133952878, "learning_rate": 0.00011299053356241891, "loss": 1.0963, "step": 10151 }, { "epoch": 0.97, "grad_norm": 0.28972955414613405, "learning_rate": 0.00011297484795728019, "loss": 1.0244, "step": 10152 }, { "epoch": 0.97, "grad_norm": 0.3320877897903177, "learning_rate": 0.00011295916202743773, "loss": 1.0985, "step": 10153 }, { "epoch": 0.97, "grad_norm": 0.2928253376533602, "learning_rate": 0.00011294347577328412, "loss": 1.0557, "step": 10154 }, { "epoch": 0.97, "grad_norm": 0.33688760633249853, "learning_rate": 0.00011292778919521189, "loss": 1.149, "step": 10155 }, { "epoch": 0.97, "grad_norm": 0.27676103323848317, "learning_rate": 0.00011291210229361362, "loss": 1.1117, "step": 10156 }, { "epoch": 0.97, "grad_norm": 0.29026256750352997, "learning_rate": 0.00011289641506888182, "loss": 1.0704, "step": 10157 }, { "epoch": 0.97, "grad_norm": 0.29334540208045223, "learning_rate": 0.0001128807275214092, "loss": 0.9988, "step": 10158 }, { "epoch": 0.97, "grad_norm": 0.2818605763391759, "learning_rate": 0.00011286503965158822, "loss": 1.0227, "step": 10159 }, { "epoch": 0.97, "grad_norm": 0.27801097090219445, "learning_rate": 0.00011284935145981157, "loss": 0.9915, "step": 10160 }, { "epoch": 0.97, "grad_norm": 0.2805850444759864, "learning_rate": 0.0001128336629464718, "loss": 1.0897, "step": 10161 }, { "epoch": 0.97, "grad_norm": 0.3032491771748229, "learning_rate": 0.00011281797411196156, "loss": 1.0666, "step": 10162 }, { "epoch": 0.97, "grad_norm": 0.300948028153539, "learning_rate": 0.00011280228495667346, "loss": 1.1215, "step": 10163 }, { "epoch": 0.97, "grad_norm": 0.32505655549938, "learning_rate": 0.00011278659548100015, "loss": 1.0754, "step": 10164 }, { "epoch": 0.97, "grad_norm": 0.2830441409159092, "learning_rate": 0.00011277090568533424, "loss": 1.1184, "step": 10165 }, { "epoch": 0.97, "grad_norm": 0.27235511959421765, "learning_rate": 0.0001127552155700684, "loss": 1.0373, "step": 10166 }, { "epoch": 0.97, "grad_norm": 0.3112142074976755, "learning_rate": 0.00011273952513559525, "loss": 1.0028, "step": 10167 }, { "epoch": 0.97, "grad_norm": 0.27216215210937883, "learning_rate": 0.0001127238343823075, "loss": 1.0513, "step": 10168 }, { "epoch": 0.97, "grad_norm": 0.2953527746729821, "learning_rate": 0.0001127081433105978, "loss": 1.0803, "step": 10169 }, { "epoch": 0.97, "grad_norm": 0.2897815242186968, "learning_rate": 0.0001126924519208588, "loss": 1.0248, "step": 10170 }, { "epoch": 0.97, "grad_norm": 0.28105812749428083, "learning_rate": 0.00011267676021348323, "loss": 1.013, "step": 10171 }, { "epoch": 0.97, "grad_norm": 0.30722992978249025, "learning_rate": 0.00011266106818886377, "loss": 1.0101, "step": 10172 }, { "epoch": 0.97, "grad_norm": 0.3033918428966205, "learning_rate": 0.00011264537584739314, "loss": 1.0618, "step": 10173 }, { "epoch": 0.97, "grad_norm": 0.28043324409698095, "learning_rate": 0.00011262968318946398, "loss": 0.9475, "step": 10174 }, { "epoch": 0.97, "grad_norm": 0.2609838079910845, "learning_rate": 0.00011261399021546912, "loss": 0.9683, "step": 10175 }, { "epoch": 0.97, "grad_norm": 0.3252941464289039, "learning_rate": 0.00011259829692580119, "loss": 1.0946, "step": 10176 }, { "epoch": 0.97, "grad_norm": 0.3102405322061509, "learning_rate": 0.00011258260332085298, "loss": 1.0548, "step": 10177 }, { "epoch": 0.97, "grad_norm": 0.27928723327298144, "learning_rate": 0.0001125669094010172, "loss": 1.0014, "step": 10178 }, { "epoch": 0.97, "grad_norm": 0.3043342368498864, "learning_rate": 0.00011255121516668663, "loss": 1.0738, "step": 10179 }, { "epoch": 0.97, "grad_norm": 0.28720036803685933, "learning_rate": 0.00011253552061825398, "loss": 1.0355, "step": 10180 }, { "epoch": 0.97, "grad_norm": 0.284815493784025, "learning_rate": 0.00011251982575611209, "loss": 1.0894, "step": 10181 }, { "epoch": 0.97, "grad_norm": 0.2556934414454461, "learning_rate": 0.00011250413058065365, "loss": 0.9576, "step": 10182 }, { "epoch": 0.97, "grad_norm": 0.26083285136088175, "learning_rate": 0.00011248843509227152, "loss": 0.945, "step": 10183 }, { "epoch": 0.97, "grad_norm": 0.28387550518261473, "learning_rate": 0.00011247273929135841, "loss": 1.1312, "step": 10184 }, { "epoch": 0.97, "grad_norm": 0.33228217976195246, "learning_rate": 0.00011245704317830721, "loss": 1.0897, "step": 10185 }, { "epoch": 0.97, "grad_norm": 0.31237530100881616, "learning_rate": 0.00011244134675351066, "loss": 1.1166, "step": 10186 }, { "epoch": 0.97, "grad_norm": 0.30031596382794623, "learning_rate": 0.00011242565001736159, "loss": 1.0411, "step": 10187 }, { "epoch": 0.97, "grad_norm": 0.3052672362577054, "learning_rate": 0.00011240995297025281, "loss": 0.9902, "step": 10188 }, { "epoch": 0.97, "grad_norm": 0.306885450267344, "learning_rate": 0.00011239425561257717, "loss": 1.0137, "step": 10189 }, { "epoch": 0.97, "grad_norm": 0.29453938744454133, "learning_rate": 0.00011237855794472748, "loss": 1.0604, "step": 10190 }, { "epoch": 0.97, "grad_norm": 0.24569112604498705, "learning_rate": 0.00011236285996709659, "loss": 1.128, "step": 10191 }, { "epoch": 0.98, "grad_norm": 0.30552330478731954, "learning_rate": 0.00011234716168007737, "loss": 1.0256, "step": 10192 }, { "epoch": 0.98, "grad_norm": 0.29514963250865506, "learning_rate": 0.00011233146308406268, "loss": 1.1758, "step": 10193 }, { "epoch": 0.98, "grad_norm": 0.2895811885576239, "learning_rate": 0.00011231576417944536, "loss": 1.0462, "step": 10194 }, { "epoch": 0.98, "grad_norm": 0.2674722074074362, "learning_rate": 0.00011230006496661831, "loss": 1.2296, "step": 10195 }, { "epoch": 0.98, "grad_norm": 0.31631409210970807, "learning_rate": 0.00011228436544597442, "loss": 1.0523, "step": 10196 }, { "epoch": 0.98, "grad_norm": 0.2383251012221512, "learning_rate": 0.00011226866561790653, "loss": 0.9241, "step": 10197 }, { "epoch": 0.98, "grad_norm": 0.3245079733703714, "learning_rate": 0.00011225296548280759, "loss": 1.0727, "step": 10198 }, { "epoch": 0.98, "grad_norm": 0.28495612155493694, "learning_rate": 0.0001122372650410705, "loss": 0.9841, "step": 10199 }, { "epoch": 0.98, "grad_norm": 0.30013506380723926, "learning_rate": 0.00011222156429308812, "loss": 0.9932, "step": 10200 }, { "epoch": 0.98, "grad_norm": 0.2786075519347718, "learning_rate": 0.00011220586323925346, "loss": 1.0858, "step": 10201 }, { "epoch": 0.98, "grad_norm": 0.3138110381506769, "learning_rate": 0.00011219016187995937, "loss": 1.1049, "step": 10202 }, { "epoch": 0.98, "grad_norm": 0.27581534819480813, "learning_rate": 0.00011217446021559883, "loss": 0.9373, "step": 10203 }, { "epoch": 0.98, "grad_norm": 0.3292494819868315, "learning_rate": 0.00011215875824656477, "loss": 0.9851, "step": 10204 }, { "epoch": 0.98, "grad_norm": 0.2655547214014843, "learning_rate": 0.00011214305597325015, "loss": 0.9125, "step": 10205 }, { "epoch": 0.98, "grad_norm": 0.2747966640064946, "learning_rate": 0.00011212735339604792, "loss": 0.9829, "step": 10206 }, { "epoch": 0.98, "grad_norm": 0.26757047246677396, "learning_rate": 0.00011211165051535104, "loss": 0.9451, "step": 10207 }, { "epoch": 0.98, "grad_norm": 0.27347785909550587, "learning_rate": 0.00011209594733155251, "loss": 1.0511, "step": 10208 }, { "epoch": 0.98, "grad_norm": 0.250251681777428, "learning_rate": 0.00011208024384504527, "loss": 1.0769, "step": 10209 }, { "epoch": 0.98, "grad_norm": 0.2977263384929927, "learning_rate": 0.00011206454005622237, "loss": 1.0488, "step": 10210 }, { "epoch": 0.98, "grad_norm": 0.26128674737229673, "learning_rate": 0.00011204883596547676, "loss": 0.9721, "step": 10211 }, { "epoch": 0.98, "grad_norm": 0.2864777874901107, "learning_rate": 0.00011203313157320146, "loss": 0.9903, "step": 10212 }, { "epoch": 0.98, "grad_norm": 0.2930793218779365, "learning_rate": 0.00011201742687978946, "loss": 1.0835, "step": 10213 }, { "epoch": 0.98, "grad_norm": 0.2726110420231643, "learning_rate": 0.0001120017218856338, "loss": 1.0418, "step": 10214 }, { "epoch": 0.98, "grad_norm": 0.26500863939608205, "learning_rate": 0.00011198601659112753, "loss": 0.9941, "step": 10215 }, { "epoch": 0.98, "grad_norm": 0.35922537571617147, "learning_rate": 0.00011197031099666366, "loss": 1.1, "step": 10216 }, { "epoch": 0.98, "grad_norm": 0.26403707584086666, "learning_rate": 0.00011195460510263523, "loss": 1.0841, "step": 10217 }, { "epoch": 0.98, "grad_norm": 0.30153285229911025, "learning_rate": 0.00011193889890943528, "loss": 0.9745, "step": 10218 }, { "epoch": 0.98, "grad_norm": 0.27058298329981373, "learning_rate": 0.0001119231924174569, "loss": 1.0014, "step": 10219 }, { "epoch": 0.98, "grad_norm": 0.2890597865454572, "learning_rate": 0.00011190748562709314, "loss": 1.1932, "step": 10220 }, { "epoch": 0.98, "grad_norm": 0.2559758601582075, "learning_rate": 0.00011189177853873705, "loss": 1.035, "step": 10221 }, { "epoch": 0.98, "grad_norm": 0.2675198324495206, "learning_rate": 0.00011187607115278173, "loss": 1.0852, "step": 10222 }, { "epoch": 0.98, "grad_norm": 0.2984560752244984, "learning_rate": 0.00011186036346962025, "loss": 1.0197, "step": 10223 }, { "epoch": 0.98, "grad_norm": 0.2746333508360344, "learning_rate": 0.00011184465548964575, "loss": 1.1096, "step": 10224 }, { "epoch": 0.98, "grad_norm": 0.2911661232884761, "learning_rate": 0.00011182894721325128, "loss": 1.0876, "step": 10225 }, { "epoch": 0.98, "grad_norm": 0.2545901819238632, "learning_rate": 0.00011181323864082999, "loss": 1.008, "step": 10226 }, { "epoch": 0.98, "grad_norm": 0.27438906716939765, "learning_rate": 0.00011179752977277498, "loss": 0.9276, "step": 10227 }, { "epoch": 0.98, "grad_norm": 0.30116616901446114, "learning_rate": 0.00011178182060947935, "loss": 0.9543, "step": 10228 }, { "epoch": 0.98, "grad_norm": 0.2752406915494225, "learning_rate": 0.00011176611115133628, "loss": 1.0355, "step": 10229 }, { "epoch": 0.98, "grad_norm": 0.27687088339188193, "learning_rate": 0.00011175040139873889, "loss": 0.9992, "step": 10230 }, { "epoch": 0.98, "grad_norm": 0.2650331819262323, "learning_rate": 0.00011173469135208028, "loss": 0.9954, "step": 10231 }, { "epoch": 0.98, "grad_norm": 0.29487040103252415, "learning_rate": 0.00011171898101175369, "loss": 0.9526, "step": 10232 }, { "epoch": 0.98, "grad_norm": 0.2784410390172419, "learning_rate": 0.0001117032703781522, "loss": 1.1314, "step": 10233 }, { "epoch": 0.98, "grad_norm": 0.34321818169181995, "learning_rate": 0.00011168755945166905, "loss": 1.0451, "step": 10234 }, { "epoch": 0.98, "grad_norm": 0.2934212795606079, "learning_rate": 0.00011167184823269735, "loss": 0.9916, "step": 10235 }, { "epoch": 0.98, "grad_norm": 0.27720714282535736, "learning_rate": 0.00011165613672163032, "loss": 1.052, "step": 10236 }, { "epoch": 0.98, "grad_norm": 0.2827387556355499, "learning_rate": 0.00011164042491886115, "loss": 1.0031, "step": 10237 }, { "epoch": 0.98, "grad_norm": 0.3020361414751645, "learning_rate": 0.00011162471282478299, "loss": 1.1295, "step": 10238 }, { "epoch": 0.98, "grad_norm": 0.2926727806203999, "learning_rate": 0.00011160900043978915, "loss": 1.0201, "step": 10239 }, { "epoch": 0.98, "grad_norm": 0.29327346670931664, "learning_rate": 0.00011159328776427274, "loss": 1.0045, "step": 10240 }, { "epoch": 0.98, "grad_norm": 0.3197321134655832, "learning_rate": 0.00011157757479862701, "loss": 0.9832, "step": 10241 }, { "epoch": 0.98, "grad_norm": 0.2834245023692689, "learning_rate": 0.00011156186154324522, "loss": 0.9674, "step": 10242 }, { "epoch": 0.98, "grad_norm": 0.2925004499767554, "learning_rate": 0.00011154614799852055, "loss": 1.0646, "step": 10243 }, { "epoch": 0.98, "grad_norm": 0.25673872599009095, "learning_rate": 0.0001115304341648463, "loss": 1.04, "step": 10244 }, { "epoch": 0.98, "grad_norm": 0.2808493957291415, "learning_rate": 0.00011151472004261565, "loss": 1.0743, "step": 10245 }, { "epoch": 0.98, "grad_norm": 0.2908844238513358, "learning_rate": 0.00011149900563222193, "loss": 1.0376, "step": 10246 }, { "epoch": 0.98, "grad_norm": 0.2887693925794027, "learning_rate": 0.00011148329093405836, "loss": 1.018, "step": 10247 }, { "epoch": 0.98, "grad_norm": 0.29234813333297677, "learning_rate": 0.0001114675759485182, "loss": 1.0377, "step": 10248 }, { "epoch": 0.98, "grad_norm": 0.28841549577391123, "learning_rate": 0.00011145186067599478, "loss": 1.1507, "step": 10249 }, { "epoch": 0.98, "grad_norm": 0.31377105859709736, "learning_rate": 0.00011143614511688132, "loss": 0.9281, "step": 10250 }, { "epoch": 0.98, "grad_norm": 0.27520050582114036, "learning_rate": 0.00011142042927157114, "loss": 1.0736, "step": 10251 }, { "epoch": 0.98, "grad_norm": 0.2836489124077, "learning_rate": 0.00011140471314045755, "loss": 1.1397, "step": 10252 }, { "epoch": 0.98, "grad_norm": 0.3196462341558277, "learning_rate": 0.00011138899672393386, "loss": 1.1109, "step": 10253 }, { "epoch": 0.98, "grad_norm": 0.2919022639490787, "learning_rate": 0.00011137328002239335, "loss": 1.0623, "step": 10254 }, { "epoch": 0.98, "grad_norm": 0.2668993177956516, "learning_rate": 0.00011135756303622937, "loss": 0.9756, "step": 10255 }, { "epoch": 0.98, "grad_norm": 0.29098900383647164, "learning_rate": 0.00011134184576583525, "loss": 0.981, "step": 10256 }, { "epoch": 0.98, "grad_norm": 0.2741127564871344, "learning_rate": 0.00011132612821160428, "loss": 1.0042, "step": 10257 }, { "epoch": 0.98, "grad_norm": 0.3179337291974993, "learning_rate": 0.00011131041037392984, "loss": 0.9453, "step": 10258 }, { "epoch": 0.98, "grad_norm": 0.2903023541952086, "learning_rate": 0.00011129469225320527, "loss": 1.1199, "step": 10259 }, { "epoch": 0.98, "grad_norm": 0.2762878923470942, "learning_rate": 0.00011127897384982396, "loss": 1.0547, "step": 10260 }, { "epoch": 0.98, "grad_norm": 0.29633017173721615, "learning_rate": 0.00011126325516417921, "loss": 1.0946, "step": 10261 }, { "epoch": 0.98, "grad_norm": 0.30069391214448493, "learning_rate": 0.00011124753619666441, "loss": 1.1047, "step": 10262 }, { "epoch": 0.98, "grad_norm": 0.30197537398471846, "learning_rate": 0.000111231816947673, "loss": 1.0716, "step": 10263 }, { "epoch": 0.98, "grad_norm": 0.22716259808522002, "learning_rate": 0.00011121609741759824, "loss": 1.1495, "step": 10264 }, { "epoch": 0.98, "grad_norm": 0.33447762653923324, "learning_rate": 0.00011120037760683364, "loss": 1.0615, "step": 10265 }, { "epoch": 0.98, "grad_norm": 0.2677195592128131, "learning_rate": 0.00011118465751577254, "loss": 1.0661, "step": 10266 }, { "epoch": 0.98, "grad_norm": 0.25916514691347986, "learning_rate": 0.00011116893714480836, "loss": 1.0618, "step": 10267 }, { "epoch": 0.98, "grad_norm": 0.2735392836561538, "learning_rate": 0.0001111532164943345, "loss": 1.1209, "step": 10268 }, { "epoch": 0.98, "grad_norm": 0.306277634485208, "learning_rate": 0.0001111374955647444, "loss": 1.0481, "step": 10269 }, { "epoch": 0.98, "grad_norm": 0.24999171813245338, "learning_rate": 0.00011112177435643147, "loss": 0.9234, "step": 10270 }, { "epoch": 0.98, "grad_norm": 0.26942078573492634, "learning_rate": 0.00011110605286978914, "loss": 1.0864, "step": 10271 }, { "epoch": 0.98, "grad_norm": 0.26245411051479745, "learning_rate": 0.00011109033110521086, "loss": 1.1104, "step": 10272 }, { "epoch": 0.98, "grad_norm": 0.31836586363546904, "learning_rate": 0.00011107460906309008, "loss": 0.9678, "step": 10273 }, { "epoch": 0.98, "grad_norm": 0.3164102794109734, "learning_rate": 0.00011105888674382025, "loss": 1.063, "step": 10274 }, { "epoch": 0.98, "grad_norm": 0.3168927687531968, "learning_rate": 0.0001110431641477948, "loss": 1.0617, "step": 10275 }, { "epoch": 0.98, "grad_norm": 0.2901612455077041, "learning_rate": 0.00011102744127540728, "loss": 1.0798, "step": 10276 }, { "epoch": 0.98, "grad_norm": 0.2808843978397793, "learning_rate": 0.0001110117181270511, "loss": 1.083, "step": 10277 }, { "epoch": 0.98, "grad_norm": 0.28149992419883424, "learning_rate": 0.00011099599470311972, "loss": 1.0127, "step": 10278 }, { "epoch": 0.98, "grad_norm": 0.31449595403015373, "learning_rate": 0.00011098027100400667, "loss": 1.1207, "step": 10279 }, { "epoch": 0.98, "grad_norm": 0.243827419121298, "learning_rate": 0.00011096454703010546, "loss": 0.9526, "step": 10280 }, { "epoch": 0.98, "grad_norm": 0.27945107442928213, "learning_rate": 0.00011094882278180956, "loss": 0.9607, "step": 10281 }, { "epoch": 0.98, "grad_norm": 0.27291199800336474, "learning_rate": 0.00011093309825951245, "loss": 1.0538, "step": 10282 }, { "epoch": 0.98, "grad_norm": 0.30765998442794057, "learning_rate": 0.00011091737346360773, "loss": 1.1567, "step": 10283 }, { "epoch": 0.98, "grad_norm": 0.28547060490679954, "learning_rate": 0.00011090164839448887, "loss": 0.9414, "step": 10284 }, { "epoch": 0.98, "grad_norm": 0.28516651031906826, "learning_rate": 0.00011088592305254939, "loss": 1.1272, "step": 10285 }, { "epoch": 0.98, "grad_norm": 0.28976084124588714, "learning_rate": 0.00011087019743818288, "loss": 1.067, "step": 10286 }, { "epoch": 0.98, "grad_norm": 0.30715911284891306, "learning_rate": 0.00011085447155178279, "loss": 1.0638, "step": 10287 }, { "epoch": 0.98, "grad_norm": 0.26407524082018785, "learning_rate": 0.00011083874539374277, "loss": 1.0787, "step": 10288 }, { "epoch": 0.98, "grad_norm": 0.2694386589995852, "learning_rate": 0.00011082301896445633, "loss": 1.0979, "step": 10289 }, { "epoch": 0.98, "grad_norm": 0.27217656363948856, "learning_rate": 0.00011080729226431703, "loss": 1.141, "step": 10290 }, { "epoch": 0.98, "grad_norm": 0.2794911223689666, "learning_rate": 0.00011079156529371846, "loss": 1.049, "step": 10291 }, { "epoch": 0.98, "grad_norm": 0.27195359614242276, "learning_rate": 0.00011077583805305418, "loss": 0.9813, "step": 10292 }, { "epoch": 0.98, "grad_norm": 0.29038357957473354, "learning_rate": 0.00011076011054271778, "loss": 1.1996, "step": 10293 }, { "epoch": 0.98, "grad_norm": 0.28199425499385034, "learning_rate": 0.00011074438276310287, "loss": 1.1444, "step": 10294 }, { "epoch": 0.98, "grad_norm": 0.30433530645019496, "learning_rate": 0.00011072865471460301, "loss": 1.0179, "step": 10295 }, { "epoch": 0.99, "grad_norm": 0.26891004746639385, "learning_rate": 0.00011071292639761181, "loss": 1.0334, "step": 10296 }, { "epoch": 0.99, "grad_norm": 0.29402614889194606, "learning_rate": 0.0001106971978125229, "loss": 1.0908, "step": 10297 }, { "epoch": 0.99, "grad_norm": 0.2559539105537088, "learning_rate": 0.00011068146895972993, "loss": 1.1747, "step": 10298 }, { "epoch": 0.99, "grad_norm": 0.2845273763898639, "learning_rate": 0.00011066573983962642, "loss": 1.0114, "step": 10299 }, { "epoch": 0.99, "grad_norm": 0.3025475977583579, "learning_rate": 0.00011065001045260613, "loss": 1.0435, "step": 10300 }, { "epoch": 0.99, "grad_norm": 0.2747465345583122, "learning_rate": 0.00011063428079906259, "loss": 1.1137, "step": 10301 }, { "epoch": 0.99, "grad_norm": 0.2990549222362699, "learning_rate": 0.0001106185508793895, "loss": 1.0761, "step": 10302 }, { "epoch": 0.99, "grad_norm": 0.3060379979713365, "learning_rate": 0.00011060282069398052, "loss": 1.0701, "step": 10303 }, { "epoch": 0.99, "grad_norm": 0.25935605894516695, "learning_rate": 0.00011058709024322929, "loss": 1.0854, "step": 10304 }, { "epoch": 0.99, "grad_norm": 0.284568963561047, "learning_rate": 0.00011057135952752943, "loss": 0.9767, "step": 10305 }, { "epoch": 0.99, "grad_norm": 0.268579961216263, "learning_rate": 0.00011055562854727471, "loss": 1.0459, "step": 10306 }, { "epoch": 0.99, "grad_norm": 0.26453950568162055, "learning_rate": 0.00011053989730285869, "loss": 1.0633, "step": 10307 }, { "epoch": 0.99, "grad_norm": 0.2840144675918696, "learning_rate": 0.00011052416579467518, "loss": 0.8992, "step": 10308 }, { "epoch": 0.99, "grad_norm": 0.2526273337620793, "learning_rate": 0.00011050843402311777, "loss": 1.0391, "step": 10309 }, { "epoch": 0.99, "grad_norm": 0.2907758381732847, "learning_rate": 0.00011049270198858019, "loss": 1.1091, "step": 10310 }, { "epoch": 0.99, "grad_norm": 0.29330977370977224, "learning_rate": 0.00011047696969145618, "loss": 1.1539, "step": 10311 }, { "epoch": 0.99, "grad_norm": 0.28027168248307766, "learning_rate": 0.00011046123713213939, "loss": 1.0466, "step": 10312 }, { "epoch": 0.99, "grad_norm": 0.29433174543880003, "learning_rate": 0.00011044550431102358, "loss": 1.0868, "step": 10313 }, { "epoch": 0.99, "grad_norm": 0.28747079837458267, "learning_rate": 0.00011042977122850247, "loss": 0.9917, "step": 10314 }, { "epoch": 0.99, "grad_norm": 0.2841412064889096, "learning_rate": 0.00011041403788496976, "loss": 1.23, "step": 10315 }, { "epoch": 0.99, "grad_norm": 0.26187375710340965, "learning_rate": 0.0001103983042808192, "loss": 1.0055, "step": 10316 }, { "epoch": 0.99, "grad_norm": 0.32286275918136254, "learning_rate": 0.00011038257041644455, "loss": 0.993, "step": 10317 }, { "epoch": 0.99, "grad_norm": 0.28197212224874324, "learning_rate": 0.00011036683629223958, "loss": 1.003, "step": 10318 }, { "epoch": 0.99, "grad_norm": 0.25905993814852957, "learning_rate": 0.00011035110190859796, "loss": 1.0416, "step": 10319 }, { "epoch": 0.99, "grad_norm": 0.2894375367829976, "learning_rate": 0.00011033536726591356, "loss": 1.0795, "step": 10320 }, { "epoch": 0.99, "grad_norm": 0.32587938767421387, "learning_rate": 0.00011031963236458008, "loss": 1.0801, "step": 10321 }, { "epoch": 0.99, "grad_norm": 0.2678020975589778, "learning_rate": 0.00011030389720499132, "loss": 1.1145, "step": 10322 }, { "epoch": 0.99, "grad_norm": 0.30583316214893913, "learning_rate": 0.00011028816178754104, "loss": 1.0778, "step": 10323 }, { "epoch": 0.99, "grad_norm": 0.32483933693639894, "learning_rate": 0.00011027242611262306, "loss": 1.0419, "step": 10324 }, { "epoch": 0.99, "grad_norm": 0.31586042610524007, "learning_rate": 0.00011025669018063116, "loss": 1.0958, "step": 10325 }, { "epoch": 0.99, "grad_norm": 0.2878776556420424, "learning_rate": 0.00011024095399195913, "loss": 1.0656, "step": 10326 }, { "epoch": 0.99, "grad_norm": 0.2830198312625606, "learning_rate": 0.0001102252175470008, "loss": 1.0493, "step": 10327 }, { "epoch": 0.99, "grad_norm": 0.2803658012096297, "learning_rate": 0.00011020948084614995, "loss": 0.9544, "step": 10328 }, { "epoch": 0.99, "grad_norm": 0.3160224185691885, "learning_rate": 0.00011019374388980046, "loss": 0.9955, "step": 10329 }, { "epoch": 0.99, "grad_norm": 0.28845814066617365, "learning_rate": 0.0001101780066783461, "loss": 1.1675, "step": 10330 }, { "epoch": 0.99, "grad_norm": 0.2892793784279133, "learning_rate": 0.00011016226921218074, "loss": 0.9273, "step": 10331 }, { "epoch": 0.99, "grad_norm": 0.27909437771191264, "learning_rate": 0.0001101465314916982, "loss": 1.1146, "step": 10332 }, { "epoch": 0.99, "grad_norm": 0.3021556255362156, "learning_rate": 0.00011013079351729232, "loss": 1.0174, "step": 10333 }, { "epoch": 0.99, "grad_norm": 0.3088606738990712, "learning_rate": 0.000110115055289357, "loss": 1.0696, "step": 10334 }, { "epoch": 0.99, "grad_norm": 0.2532028338885159, "learning_rate": 0.00011009931680828604, "loss": 1.0509, "step": 10335 }, { "epoch": 0.99, "grad_norm": 0.2644967377127452, "learning_rate": 0.00011008357807447334, "loss": 1.011, "step": 10336 }, { "epoch": 0.99, "grad_norm": 0.25911644503390924, "learning_rate": 0.00011006783908831275, "loss": 0.9568, "step": 10337 }, { "epoch": 0.99, "grad_norm": 0.3093805850346706, "learning_rate": 0.00011005209985019817, "loss": 1.0442, "step": 10338 }, { "epoch": 0.99, "grad_norm": 0.26605780506457216, "learning_rate": 0.00011003636036052347, "loss": 1.12, "step": 10339 }, { "epoch": 0.99, "grad_norm": 0.2738908065467255, "learning_rate": 0.00011002062061968255, "loss": 1.0423, "step": 10340 }, { "epoch": 0.99, "grad_norm": 0.38868786111630993, "learning_rate": 0.00011000488062806929, "loss": 1.0834, "step": 10341 }, { "epoch": 0.99, "grad_norm": 0.3172034502101059, "learning_rate": 0.00010998914038607762, "loss": 1.0222, "step": 10342 }, { "epoch": 0.99, "grad_norm": 0.2858843799078492, "learning_rate": 0.0001099733998941014, "loss": 1.0284, "step": 10343 }, { "epoch": 0.99, "grad_norm": 0.2646838577150544, "learning_rate": 0.00010995765915253462, "loss": 1.1413, "step": 10344 }, { "epoch": 0.99, "grad_norm": 0.27314529183764996, "learning_rate": 0.00010994191816177115, "loss": 1.0144, "step": 10345 }, { "epoch": 0.99, "grad_norm": 0.25795307029613257, "learning_rate": 0.0001099261769222049, "loss": 1.1011, "step": 10346 }, { "epoch": 0.99, "grad_norm": 0.269654266329036, "learning_rate": 0.00010991043543422987, "loss": 1.093, "step": 10347 }, { "epoch": 0.99, "grad_norm": 0.2873513143757092, "learning_rate": 0.00010989469369823993, "loss": 1.0575, "step": 10348 }, { "epoch": 0.99, "grad_norm": 0.2966705516629044, "learning_rate": 0.0001098789517146291, "loss": 0.9566, "step": 10349 }, { "epoch": 0.99, "grad_norm": 0.30235296838928694, "learning_rate": 0.00010986320948379125, "loss": 1.0973, "step": 10350 }, { "epoch": 0.99, "grad_norm": 0.22856559446563995, "learning_rate": 0.00010984746700612043, "loss": 1.0409, "step": 10351 }, { "epoch": 0.99, "grad_norm": 0.29679007545691055, "learning_rate": 0.00010983172428201055, "loss": 1.0134, "step": 10352 }, { "epoch": 0.99, "grad_norm": 0.3089775230936333, "learning_rate": 0.00010981598131185558, "loss": 1.0447, "step": 10353 }, { "epoch": 0.99, "grad_norm": 0.29725469954817196, "learning_rate": 0.00010980023809604951, "loss": 1.0729, "step": 10354 }, { "epoch": 0.99, "grad_norm": 0.2626951615117108, "learning_rate": 0.00010978449463498632, "loss": 0.9726, "step": 10355 }, { "epoch": 0.99, "grad_norm": 0.27242407027742316, "learning_rate": 0.00010976875092906003, "loss": 1.0434, "step": 10356 }, { "epoch": 0.99, "grad_norm": 0.2947561469672809, "learning_rate": 0.00010975300697866456, "loss": 1.0845, "step": 10357 }, { "epoch": 0.99, "grad_norm": 0.29370486489779185, "learning_rate": 0.00010973726278419398, "loss": 0.9322, "step": 10358 }, { "epoch": 0.99, "grad_norm": 0.2928377780175839, "learning_rate": 0.00010972151834604229, "loss": 1.0208, "step": 10359 }, { "epoch": 0.99, "grad_norm": 0.32099727413336887, "learning_rate": 0.00010970577366460349, "loss": 1.0144, "step": 10360 }, { "epoch": 0.99, "grad_norm": 0.2763946202818681, "learning_rate": 0.00010969002874027161, "loss": 1.206, "step": 10361 }, { "epoch": 0.99, "grad_norm": 0.29031856568736375, "learning_rate": 0.00010967428357344067, "loss": 1.0013, "step": 10362 }, { "epoch": 0.99, "grad_norm": 0.2600423194599826, "learning_rate": 0.00010965853816450469, "loss": 1.1104, "step": 10363 }, { "epoch": 0.99, "grad_norm": 0.262125842100789, "learning_rate": 0.00010964279251385774, "loss": 1.0854, "step": 10364 }, { "epoch": 0.99, "grad_norm": 0.2983375163299008, "learning_rate": 0.00010962704662189383, "loss": 1.0486, "step": 10365 }, { "epoch": 0.99, "grad_norm": 0.27574617002830204, "learning_rate": 0.00010961130048900705, "loss": 1.0465, "step": 10366 }, { "epoch": 0.99, "grad_norm": 0.2474828198501618, "learning_rate": 0.0001095955541155914, "loss": 1.0645, "step": 10367 }, { "epoch": 0.99, "grad_norm": 0.33229191383287654, "learning_rate": 0.000109579807502041, "loss": 0.9869, "step": 10368 }, { "epoch": 0.99, "grad_norm": 0.2982566291542284, "learning_rate": 0.0001095640606487499, "loss": 1.0616, "step": 10369 }, { "epoch": 0.99, "grad_norm": 0.3011080575254373, "learning_rate": 0.00010954831355611215, "loss": 1.0475, "step": 10370 }, { "epoch": 0.99, "grad_norm": 0.2954042572701708, "learning_rate": 0.00010953256622452185, "loss": 1.0212, "step": 10371 }, { "epoch": 0.99, "grad_norm": 0.3015044088263099, "learning_rate": 0.0001095168186543731, "loss": 1.0839, "step": 10372 }, { "epoch": 0.99, "grad_norm": 0.36161978315568827, "learning_rate": 0.00010950107084605998, "loss": 0.9966, "step": 10373 }, { "epoch": 0.99, "grad_norm": 0.2947245497967197, "learning_rate": 0.00010948532279997664, "loss": 1.0052, "step": 10374 }, { "epoch": 0.99, "grad_norm": 0.278629231797063, "learning_rate": 0.00010946957451651709, "loss": 0.974, "step": 10375 }, { "epoch": 0.99, "grad_norm": 0.3081240007766056, "learning_rate": 0.0001094538259960755, "loss": 0.9513, "step": 10376 }, { "epoch": 0.99, "grad_norm": 0.37939549753517904, "learning_rate": 0.00010943807723904593, "loss": 1.1191, "step": 10377 }, { "epoch": 0.99, "grad_norm": 0.24633288069760065, "learning_rate": 0.0001094223282458226, "loss": 1.0712, "step": 10378 }, { "epoch": 0.99, "grad_norm": 0.3096687909717489, "learning_rate": 0.00010940657901679956, "loss": 0.9914, "step": 10379 }, { "epoch": 0.99, "grad_norm": 0.3063298337056405, "learning_rate": 0.00010939082955237096, "loss": 1.1432, "step": 10380 }, { "epoch": 0.99, "grad_norm": 0.2745100518027808, "learning_rate": 0.00010937507985293098, "loss": 1.0335, "step": 10381 }, { "epoch": 0.99, "grad_norm": 0.25605757624283976, "learning_rate": 0.00010935932991887372, "loss": 0.9888, "step": 10382 }, { "epoch": 0.99, "grad_norm": 0.27108913673944884, "learning_rate": 0.00010934357975059334, "loss": 1.1554, "step": 10383 }, { "epoch": 0.99, "grad_norm": 0.3465731349344641, "learning_rate": 0.000109327829348484, "loss": 1.0209, "step": 10384 }, { "epoch": 0.99, "grad_norm": 0.2661351715791654, "learning_rate": 0.00010931207871293987, "loss": 1.1225, "step": 10385 }, { "epoch": 0.99, "grad_norm": 0.2992239551094853, "learning_rate": 0.00010929632784435513, "loss": 1.1448, "step": 10386 }, { "epoch": 0.99, "grad_norm": 0.2714660858795361, "learning_rate": 0.00010928057674312393, "loss": 1.1303, "step": 10387 }, { "epoch": 0.99, "grad_norm": 0.3560152461135453, "learning_rate": 0.00010926482540964047, "loss": 1.0939, "step": 10388 }, { "epoch": 0.99, "grad_norm": 0.33230188942732725, "learning_rate": 0.00010924907384429892, "loss": 1.0654, "step": 10389 }, { "epoch": 0.99, "grad_norm": 0.2648343149387062, "learning_rate": 0.0001092333220474935, "loss": 1.0357, "step": 10390 }, { "epoch": 0.99, "grad_norm": 0.2959171922667274, "learning_rate": 0.00010921757001961839, "loss": 1.0695, "step": 10391 }, { "epoch": 0.99, "grad_norm": 0.26506456061345024, "learning_rate": 0.00010920181776106779, "loss": 1.1593, "step": 10392 }, { "epoch": 0.99, "grad_norm": 0.3204886263895637, "learning_rate": 0.00010918606527223593, "loss": 1.0391, "step": 10393 }, { "epoch": 0.99, "grad_norm": 0.24450093898680367, "learning_rate": 0.00010917031255351699, "loss": 0.9267, "step": 10394 }, { "epoch": 0.99, "grad_norm": 0.28085674386349635, "learning_rate": 0.00010915455960530526, "loss": 1.0893, "step": 10395 }, { "epoch": 0.99, "grad_norm": 0.28257173644304456, "learning_rate": 0.00010913880642799488, "loss": 1.0546, "step": 10396 }, { "epoch": 0.99, "grad_norm": 0.3084129119694214, "learning_rate": 0.00010912305302198014, "loss": 1.0221, "step": 10397 }, { "epoch": 0.99, "grad_norm": 0.2778700782623621, "learning_rate": 0.00010910729938765528, "loss": 1.0197, "step": 10398 }, { "epoch": 0.99, "grad_norm": 0.24322367397269376, "learning_rate": 0.00010909154552541449, "loss": 0.9353, "step": 10399 }, { "epoch": 0.99, "grad_norm": 0.27278544222892476, "learning_rate": 0.0001090757914356521, "loss": 1.076, "step": 10400 }, { "epoch": 1.0, "grad_norm": 0.2869093663218051, "learning_rate": 0.00010906003711876229, "loss": 0.9683, "step": 10401 }, { "epoch": 1.0, "grad_norm": 0.2941519770990777, "learning_rate": 0.00010904428257513939, "loss": 1.0158, "step": 10402 }, { "epoch": 1.0, "grad_norm": 0.27338008253801777, "learning_rate": 0.00010902852780517763, "loss": 1.0386, "step": 10403 }, { "epoch": 1.0, "grad_norm": 0.28389255664388, "learning_rate": 0.00010901277280927124, "loss": 1.1183, "step": 10404 }, { "epoch": 1.0, "grad_norm": 0.3443690879581628, "learning_rate": 0.0001089970175878146, "loss": 1.098, "step": 10405 }, { "epoch": 1.0, "grad_norm": 0.2551799172999775, "learning_rate": 0.00010898126214120194, "loss": 1.0581, "step": 10406 }, { "epoch": 1.0, "grad_norm": 0.282716317579218, "learning_rate": 0.0001089655064698275, "loss": 0.9715, "step": 10407 }, { "epoch": 1.0, "grad_norm": 0.31282993584728686, "learning_rate": 0.00010894975057408568, "loss": 1.047, "step": 10408 }, { "epoch": 1.0, "grad_norm": 0.30036322798930976, "learning_rate": 0.00010893399445437071, "loss": 1.188, "step": 10409 }, { "epoch": 1.0, "grad_norm": 0.3325931928877523, "learning_rate": 0.0001089182381110769, "loss": 0.9747, "step": 10410 }, { "epoch": 1.0, "grad_norm": 0.3117101250717752, "learning_rate": 0.00010890248154459858, "loss": 1.0603, "step": 10411 }, { "epoch": 1.0, "grad_norm": 0.28934935878323786, "learning_rate": 0.00010888672475533006, "loss": 1.093, "step": 10412 }, { "epoch": 1.0, "grad_norm": 0.2716220894787029, "learning_rate": 0.0001088709677436657, "loss": 1.1221, "step": 10413 }, { "epoch": 1.0, "grad_norm": 0.30194803612986443, "learning_rate": 0.00010885521050999976, "loss": 1.0063, "step": 10414 }, { "epoch": 1.0, "grad_norm": 0.28931680238920227, "learning_rate": 0.00010883945305472662, "loss": 0.9928, "step": 10415 }, { "epoch": 1.0, "grad_norm": 0.3229463397228268, "learning_rate": 0.00010882369537824064, "loss": 1.0712, "step": 10416 }, { "epoch": 1.0, "grad_norm": 0.2954911003697874, "learning_rate": 0.00010880793748093615, "loss": 1.0046, "step": 10417 }, { "epoch": 1.0, "grad_norm": 0.2872966086169036, "learning_rate": 0.00010879217936320743, "loss": 1.0814, "step": 10418 }, { "epoch": 1.0, "grad_norm": 0.3310289766979793, "learning_rate": 0.00010877642102544891, "loss": 0.9988, "step": 10419 }, { "epoch": 1.0, "grad_norm": 0.2661925275942424, "learning_rate": 0.00010876066246805496, "loss": 1.0533, "step": 10420 }, { "epoch": 1.0, "grad_norm": 0.2868056408701368, "learning_rate": 0.00010874490369141991, "loss": 1.0454, "step": 10421 }, { "epoch": 1.0, "grad_norm": 0.25382291752633346, "learning_rate": 0.00010872914469593816, "loss": 1.0787, "step": 10422 }, { "epoch": 1.0, "grad_norm": 0.32950064922007866, "learning_rate": 0.00010871338548200409, "loss": 1.0575, "step": 10423 }, { "epoch": 1.0, "grad_norm": 0.28542084992905475, "learning_rate": 0.00010869762605001204, "loss": 1.0273, "step": 10424 }, { "epoch": 1.0, "grad_norm": 0.274404919205005, "learning_rate": 0.00010868186640035645, "loss": 1.1522, "step": 10425 }, { "epoch": 1.0, "grad_norm": 0.27910073576406763, "learning_rate": 0.0001086661065334317, "loss": 0.9747, "step": 10426 }, { "epoch": 1.0, "grad_norm": 0.2754135797678086, "learning_rate": 0.00010865034644963219, "loss": 1.0593, "step": 10427 }, { "epoch": 1.0, "grad_norm": 0.3036570067584713, "learning_rate": 0.00010863458614935228, "loss": 0.9586, "step": 10428 }, { "epoch": 1.0, "grad_norm": 0.25912219351365356, "learning_rate": 0.00010861882563298648, "loss": 1.1329, "step": 10429 }, { "epoch": 1.0, "grad_norm": 0.30796423013527807, "learning_rate": 0.00010860306490092916, "loss": 0.9919, "step": 10430 }, { "epoch": 1.0, "grad_norm": 0.2939383687791828, "learning_rate": 0.00010858730395357468, "loss": 0.9745, "step": 10431 }, { "epoch": 1.0, "grad_norm": 0.27337663742366286, "learning_rate": 0.00010857154279131754, "loss": 0.9214, "step": 10432 }, { "epoch": 1.0, "grad_norm": 0.26204823094059426, "learning_rate": 0.00010855578141455216, "loss": 1.0411, "step": 10433 }, { "epoch": 1.0, "grad_norm": 0.26527110818824695, "learning_rate": 0.00010854001982367296, "loss": 1.0424, "step": 10434 }, { "epoch": 1.0, "grad_norm": 0.25265488574920947, "learning_rate": 0.00010852425801907442, "loss": 0.9679, "step": 10435 }, { "epoch": 1.0, "grad_norm": 0.26557773202249946, "learning_rate": 0.00010850849600115096, "loss": 1.0498, "step": 10436 }, { "epoch": 1.0, "grad_norm": 0.3095199638697367, "learning_rate": 0.00010849273377029705, "loss": 0.9924, "step": 10437 }, { "epoch": 1.0, "grad_norm": 0.25086882514656306, "learning_rate": 0.00010847697132690713, "loss": 1.0061, "step": 10438 }, { "epoch": 1.0, "grad_norm": 0.26227634701423524, "learning_rate": 0.00010846120867137567, "loss": 1.022, "step": 10439 }, { "epoch": 1.0, "grad_norm": 0.272747535197043, "learning_rate": 0.00010844544580409717, "loss": 1.049, "step": 10440 }, { "epoch": 1.0, "grad_norm": 0.30486635134117596, "learning_rate": 0.00010842968272546603, "loss": 0.9928, "step": 10441 }, { "epoch": 1.0, "grad_norm": 0.2911807248936414, "learning_rate": 0.00010841391943587682, "loss": 1.0824, "step": 10442 }, { "epoch": 1.0, "grad_norm": 0.24115347100619006, "learning_rate": 0.00010839815593572398, "loss": 1.0671, "step": 10443 }, { "epoch": 1.0, "grad_norm": 0.2749883873988357, "learning_rate": 0.00010838239222540203, "loss": 1.1308, "step": 10444 }, { "epoch": 1.0, "grad_norm": 0.25592035706779787, "learning_rate": 0.00010836662830530539, "loss": 1.0291, "step": 10445 }, { "epoch": 1.0, "grad_norm": 0.27534119534699625, "learning_rate": 0.00010835086417582867, "loss": 1.075, "step": 10446 }, { "epoch": 1.0, "grad_norm": 0.29613831949975483, "learning_rate": 0.00010833509983736632, "loss": 1.0167, "step": 10447 }, { "epoch": 1.0, "grad_norm": 0.2971382111384855, "learning_rate": 0.00010831933529031284, "loss": 1.0102, "step": 10448 }, { "epoch": 1.0, "grad_norm": 0.2833249451857255, "learning_rate": 0.00010830357053506277, "loss": 1.1205, "step": 10449 }, { "epoch": 1.0, "grad_norm": 0.28978501988105787, "learning_rate": 0.00010828780557201066, "loss": 1.0154, "step": 10450 }, { "epoch": 1.0, "grad_norm": 0.32646478206285645, "learning_rate": 0.00010827204040155094, "loss": 0.9872, "step": 10451 }, { "epoch": 1.0, "grad_norm": 0.28028521065676737, "learning_rate": 0.00010825627502407827, "loss": 0.9816, "step": 10452 }, { "epoch": 1.0, "eval_loss": 1.123805046081543, "eval_runtime": 4227.9159, "eval_samples_per_second": 19.778, "eval_steps_per_second": 2.472, "step": 10452 } ], "logging_steps": 1, "max_steps": 20904, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10452, "total_flos": 2.304536495731507e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }