{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999652161814324, "eval_steps": 500, "global_step": 14374, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.956763713520471e-05, "grad_norm": 16.375, "learning_rate": 4.6296296296296296e-06, "loss": 4.1097, "step": 1 }, { "epoch": 0.00013913527427040942, "grad_norm": 30.125, "learning_rate": 9.259259259259259e-06, "loss": 4.3742, "step": 2 }, { "epoch": 0.0002087029114056141, "grad_norm": 20.625, "learning_rate": 1.3888888888888888e-05, "loss": 3.9231, "step": 3 }, { "epoch": 0.00027827054854081884, "grad_norm": 14.3125, "learning_rate": 1.8518518518518518e-05, "loss": 3.7877, "step": 4 }, { "epoch": 0.0003478381856760235, "grad_norm": 20.0, "learning_rate": 2.3148148148148147e-05, "loss": 3.9694, "step": 5 }, { "epoch": 0.0004174058228112282, "grad_norm": 13.75, "learning_rate": 2.7777777777777776e-05, "loss": 4.0547, "step": 6 }, { "epoch": 0.0004869734599464329, "grad_norm": 17.875, "learning_rate": 3.240740740740741e-05, "loss": 3.9384, "step": 7 }, { "epoch": 0.0005565410970816377, "grad_norm": 15.8125, "learning_rate": 3.7037037037037037e-05, "loss": 3.8156, "step": 8 }, { "epoch": 0.0006261087342168423, "grad_norm": 11.625, "learning_rate": 4.1666666666666665e-05, "loss": 4.0875, "step": 9 }, { "epoch": 0.000695676371352047, "grad_norm": 14.125, "learning_rate": 4.6296296296296294e-05, "loss": 3.9151, "step": 10 }, { "epoch": 0.0007652440084872517, "grad_norm": 5.65625, "learning_rate": 5.092592592592592e-05, "loss": 3.8153, "step": 11 }, { "epoch": 0.0008348116456224564, "grad_norm": 5.09375, "learning_rate": 5.555555555555555e-05, "loss": 3.9969, "step": 12 }, { "epoch": 0.0009043792827576611, "grad_norm": 7.65625, "learning_rate": 6.018518518518518e-05, "loss": 3.6755, "step": 13 }, { "epoch": 0.0009739469198928658, "grad_norm": 5.6875, "learning_rate": 6.481481481481482e-05, "loss": 3.8728, "step": 14 }, { "epoch": 0.0010435145570280705, "grad_norm": 3.03125, "learning_rate": 6.944444444444444e-05, "loss": 3.8949, "step": 15 }, { "epoch": 0.0011130821941632753, "grad_norm": 2.65625, "learning_rate": 7.407407407407407e-05, "loss": 4.2955, "step": 16 }, { "epoch": 0.00118264983129848, "grad_norm": 3.546875, "learning_rate": 7.87037037037037e-05, "loss": 3.6512, "step": 17 }, { "epoch": 0.0012522174684336846, "grad_norm": 2.765625, "learning_rate": 8.333333333333333e-05, "loss": 4.3409, "step": 18 }, { "epoch": 0.0013217851055688894, "grad_norm": 2.546875, "learning_rate": 8.796296296296297e-05, "loss": 3.5914, "step": 19 }, { "epoch": 0.001391352742704094, "grad_norm": 2.21875, "learning_rate": 9.259259259259259e-05, "loss": 4.1952, "step": 20 }, { "epoch": 0.0014609203798392988, "grad_norm": 2.234375, "learning_rate": 9.722222222222223e-05, "loss": 3.7605, "step": 21 }, { "epoch": 0.0015304880169745034, "grad_norm": 2.703125, "learning_rate": 0.00010185185185185185, "loss": 3.8877, "step": 22 }, { "epoch": 0.0016000556541097082, "grad_norm": 1.6953125, "learning_rate": 0.00010648148148148149, "loss": 3.738, "step": 23 }, { "epoch": 0.0016696232912449128, "grad_norm": 1.75, "learning_rate": 0.0001111111111111111, "loss": 3.7867, "step": 24 }, { "epoch": 0.0017391909283801176, "grad_norm": 2.0, "learning_rate": 0.00011574074074074075, "loss": 3.6365, "step": 25 }, { "epoch": 0.0018087585655153222, "grad_norm": 1.8984375, "learning_rate": 0.00012037037037037036, "loss": 4.0938, "step": 26 }, { "epoch": 0.001878326202650527, "grad_norm": 2.25, "learning_rate": 0.000125, "loss": 3.5465, "step": 27 }, { "epoch": 0.0019478938397857316, "grad_norm": 1.7578125, "learning_rate": 0.00012962962962962963, "loss": 4.0124, "step": 28 }, { "epoch": 0.0020174614769209362, "grad_norm": 1.6015625, "learning_rate": 0.0001342592592592593, "loss": 3.7071, "step": 29 }, { "epoch": 0.002087029114056141, "grad_norm": 2.140625, "learning_rate": 0.0001388888888888889, "loss": 4.0923, "step": 30 }, { "epoch": 0.002156596751191346, "grad_norm": 1.9453125, "learning_rate": 0.00014351851851851852, "loss": 3.6059, "step": 31 }, { "epoch": 0.0022261643883265507, "grad_norm": 1.9765625, "learning_rate": 0.00014814814814814815, "loss": 3.5063, "step": 32 }, { "epoch": 0.002295732025461755, "grad_norm": 2.59375, "learning_rate": 0.0001527777777777778, "loss": 3.846, "step": 33 }, { "epoch": 0.00236529966259696, "grad_norm": 1.9921875, "learning_rate": 0.0001574074074074074, "loss": 3.8888, "step": 34 }, { "epoch": 0.0024348672997321647, "grad_norm": 3.046875, "learning_rate": 0.00016203703703703703, "loss": 3.7714, "step": 35 }, { "epoch": 0.002504434936867369, "grad_norm": 2.1875, "learning_rate": 0.00016666666666666666, "loss": 3.7296, "step": 36 }, { "epoch": 0.002574002574002574, "grad_norm": 2.484375, "learning_rate": 0.00017129629629629632, "loss": 3.9198, "step": 37 }, { "epoch": 0.0026435702111377787, "grad_norm": 2.125, "learning_rate": 0.00017592592592592595, "loss": 3.6842, "step": 38 }, { "epoch": 0.0027131378482729836, "grad_norm": 2.828125, "learning_rate": 0.00018055555555555555, "loss": 3.5604, "step": 39 }, { "epoch": 0.002782705485408188, "grad_norm": 2.203125, "learning_rate": 0.00018518518518518518, "loss": 3.8453, "step": 40 }, { "epoch": 0.0028522731225433928, "grad_norm": 2.515625, "learning_rate": 0.00018981481481481483, "loss": 3.5141, "step": 41 }, { "epoch": 0.0029218407596785976, "grad_norm": 2.8125, "learning_rate": 0.00019444444444444446, "loss": 3.477, "step": 42 }, { "epoch": 0.0029914083968138024, "grad_norm": 4.0, "learning_rate": 0.00019907407407407406, "loss": 3.822, "step": 43 }, { "epoch": 0.0030609760339490068, "grad_norm": 2.09375, "learning_rate": 0.0002037037037037037, "loss": 3.9645, "step": 44 }, { "epoch": 0.0031305436710842116, "grad_norm": 4.375, "learning_rate": 0.00020833333333333335, "loss": 3.3629, "step": 45 }, { "epoch": 0.0032001113082194164, "grad_norm": 3.578125, "learning_rate": 0.00021296296296296298, "loss": 3.4721, "step": 46 }, { "epoch": 0.0032696789453546212, "grad_norm": 4.03125, "learning_rate": 0.0002175925925925926, "loss": 3.2992, "step": 47 }, { "epoch": 0.0033392465824898256, "grad_norm": 3.234375, "learning_rate": 0.0002222222222222222, "loss": 3.648, "step": 48 }, { "epoch": 0.0034088142196250304, "grad_norm": 3.203125, "learning_rate": 0.00022685185185185186, "loss": 3.3534, "step": 49 }, { "epoch": 0.0034783818567602352, "grad_norm": 3.203125, "learning_rate": 0.0002314814814814815, "loss": 3.8404, "step": 50 }, { "epoch": 0.0035479494938954396, "grad_norm": 2.859375, "learning_rate": 0.00023611111111111112, "loss": 3.6755, "step": 51 }, { "epoch": 0.0036175171310306444, "grad_norm": 3.25, "learning_rate": 0.00024074074074074072, "loss": 3.5372, "step": 52 }, { "epoch": 0.0036870847681658493, "grad_norm": 2.953125, "learning_rate": 0.0002453703703703704, "loss": 3.6597, "step": 53 }, { "epoch": 0.003756652405301054, "grad_norm": 3.53125, "learning_rate": 0.00025, "loss": 3.7115, "step": 54 }, { "epoch": 0.0038262200424362585, "grad_norm": 3.9375, "learning_rate": 0.00025462962962962966, "loss": 3.8284, "step": 55 }, { "epoch": 0.0038957876795714633, "grad_norm": 3.234375, "learning_rate": 0.00025925925925925926, "loss": 3.8952, "step": 56 }, { "epoch": 0.003965355316706668, "grad_norm": 3.28125, "learning_rate": 0.0002638888888888889, "loss": 3.6997, "step": 57 }, { "epoch": 0.0040349229538418725, "grad_norm": 7.03125, "learning_rate": 0.0002685185185185186, "loss": 3.8363, "step": 58 }, { "epoch": 0.004104490590977078, "grad_norm": 1.9921875, "learning_rate": 0.0002731481481481481, "loss": 3.6792, "step": 59 }, { "epoch": 0.004174058228112282, "grad_norm": 3.0, "learning_rate": 0.0002777777777777778, "loss": 3.6028, "step": 60 }, { "epoch": 0.0042436258652474865, "grad_norm": 2.765625, "learning_rate": 0.0002824074074074074, "loss": 3.6588, "step": 61 }, { "epoch": 0.004313193502382692, "grad_norm": 2.734375, "learning_rate": 0.00028703703703703703, "loss": 3.3777, "step": 62 }, { "epoch": 0.004382761139517896, "grad_norm": 4.5, "learning_rate": 0.0002916666666666667, "loss": 3.3868, "step": 63 }, { "epoch": 0.004452328776653101, "grad_norm": 9.5, "learning_rate": 0.0002962962962962963, "loss": 3.8549, "step": 64 }, { "epoch": 0.004521896413788306, "grad_norm": 3.328125, "learning_rate": 0.00030092592592592595, "loss": 3.5282, "step": 65 }, { "epoch": 0.00459146405092351, "grad_norm": 3.203125, "learning_rate": 0.0003055555555555556, "loss": 3.5236, "step": 66 }, { "epoch": 0.004661031688058715, "grad_norm": 4.59375, "learning_rate": 0.0003101851851851852, "loss": 3.8009, "step": 67 }, { "epoch": 0.00473059932519392, "grad_norm": 3.25, "learning_rate": 0.0003148148148148148, "loss": 3.4033, "step": 68 }, { "epoch": 0.004800166962329124, "grad_norm": 4.96875, "learning_rate": 0.0003194444444444444, "loss": 3.2774, "step": 69 }, { "epoch": 0.004869734599464329, "grad_norm": 3.265625, "learning_rate": 0.00032407407407407406, "loss": 3.7126, "step": 70 }, { "epoch": 0.004939302236599534, "grad_norm": 3.625, "learning_rate": 0.0003287037037037037, "loss": 3.7453, "step": 71 }, { "epoch": 0.005008869873734738, "grad_norm": 2.96875, "learning_rate": 0.0003333333333333333, "loss": 3.2229, "step": 72 }, { "epoch": 0.0050784375108699435, "grad_norm": 4.875, "learning_rate": 0.000337962962962963, "loss": 3.7234, "step": 73 }, { "epoch": 0.005148005148005148, "grad_norm": 3.71875, "learning_rate": 0.00034259259259259263, "loss": 3.7748, "step": 74 }, { "epoch": 0.005217572785140353, "grad_norm": 2.25, "learning_rate": 0.00034722222222222224, "loss": 3.6813, "step": 75 }, { "epoch": 0.0052871404222755575, "grad_norm": 3.3125, "learning_rate": 0.0003518518518518519, "loss": 3.793, "step": 76 }, { "epoch": 0.005356708059410762, "grad_norm": 3.1875, "learning_rate": 0.00035648148148148144, "loss": 3.2565, "step": 77 }, { "epoch": 0.005426275696545967, "grad_norm": 3.5, "learning_rate": 0.0003611111111111111, "loss": 3.6943, "step": 78 }, { "epoch": 0.0054958433336811715, "grad_norm": 3.453125, "learning_rate": 0.00036574074074074075, "loss": 3.3554, "step": 79 }, { "epoch": 0.005565410970816376, "grad_norm": 3.171875, "learning_rate": 0.00037037037037037035, "loss": 3.6748, "step": 80 }, { "epoch": 0.005634978607951581, "grad_norm": 3.25, "learning_rate": 0.000375, "loss": 3.5064, "step": 81 }, { "epoch": 0.0057045462450867855, "grad_norm": 2.65625, "learning_rate": 0.00037962962962962966, "loss": 3.9164, "step": 82 }, { "epoch": 0.005774113882221991, "grad_norm": 3.84375, "learning_rate": 0.00038425925925925927, "loss": 3.4874, "step": 83 }, { "epoch": 0.005843681519357195, "grad_norm": 3.40625, "learning_rate": 0.0003888888888888889, "loss": 3.6889, "step": 84 }, { "epoch": 0.0059132491564923995, "grad_norm": 3.140625, "learning_rate": 0.0003935185185185186, "loss": 3.3944, "step": 85 }, { "epoch": 0.005982816793627605, "grad_norm": 2.34375, "learning_rate": 0.0003981481481481481, "loss": 3.3187, "step": 86 }, { "epoch": 0.006052384430762809, "grad_norm": 6.5, "learning_rate": 0.0004027777777777778, "loss": 3.6085, "step": 87 }, { "epoch": 0.0061219520678980135, "grad_norm": 6.375, "learning_rate": 0.0004074074074074074, "loss": 3.3279, "step": 88 }, { "epoch": 0.006191519705033219, "grad_norm": 4.28125, "learning_rate": 0.00041203703703703704, "loss": 3.6968, "step": 89 }, { "epoch": 0.006261087342168423, "grad_norm": 5.0, "learning_rate": 0.0004166666666666667, "loss": 3.162, "step": 90 }, { "epoch": 0.006330654979303628, "grad_norm": 3.296875, "learning_rate": 0.0004212962962962963, "loss": 3.106, "step": 91 }, { "epoch": 0.006400222616438833, "grad_norm": 3.0625, "learning_rate": 0.00042592592592592595, "loss": 3.2511, "step": 92 }, { "epoch": 0.006469790253574037, "grad_norm": 3.734375, "learning_rate": 0.0004305555555555556, "loss": 3.3395, "step": 93 }, { "epoch": 0.0065393578907092425, "grad_norm": 3.5625, "learning_rate": 0.0004351851851851852, "loss": 3.9849, "step": 94 }, { "epoch": 0.006608925527844447, "grad_norm": 3.34375, "learning_rate": 0.0004398148148148148, "loss": 3.7063, "step": 95 }, { "epoch": 0.006678493164979651, "grad_norm": 3.671875, "learning_rate": 0.0004444444444444444, "loss": 3.643, "step": 96 }, { "epoch": 0.0067480608021148565, "grad_norm": 3.671875, "learning_rate": 0.00044907407407407407, "loss": 3.7839, "step": 97 }, { "epoch": 0.006817628439250061, "grad_norm": 2.9375, "learning_rate": 0.0004537037037037037, "loss": 3.3117, "step": 98 }, { "epoch": 0.006887196076385265, "grad_norm": 4.0625, "learning_rate": 0.0004583333333333333, "loss": 3.2695, "step": 99 }, { "epoch": 0.0069567637135204705, "grad_norm": 3.1875, "learning_rate": 0.000462962962962963, "loss": 4.1127, "step": 100 }, { "epoch": 0.007026331350655675, "grad_norm": 7.0, "learning_rate": 0.00046759259259259264, "loss": 3.3675, "step": 101 }, { "epoch": 0.007095898987790879, "grad_norm": 4.0625, "learning_rate": 0.00047222222222222224, "loss": 3.4207, "step": 102 }, { "epoch": 0.0071654666249260845, "grad_norm": 4.125, "learning_rate": 0.0004768518518518519, "loss": 3.3211, "step": 103 }, { "epoch": 0.007235034262061289, "grad_norm": 3.84375, "learning_rate": 0.00048148148148148144, "loss": 3.0541, "step": 104 }, { "epoch": 0.007304601899196494, "grad_norm": 6.46875, "learning_rate": 0.0004861111111111111, "loss": 3.7767, "step": 105 }, { "epoch": 0.0073741695363316985, "grad_norm": 6.21875, "learning_rate": 0.0004907407407407408, "loss": 3.6529, "step": 106 }, { "epoch": 0.007443737173466903, "grad_norm": 5.03125, "learning_rate": 0.0004953703703703704, "loss": 3.3748, "step": 107 }, { "epoch": 0.007513304810602108, "grad_norm": 3.3125, "learning_rate": 0.0005, "loss": 3.1601, "step": 108 }, { "epoch": 0.0075828724477373126, "grad_norm": 4.53125, "learning_rate": 0.0005046296296296296, "loss": 3.583, "step": 109 }, { "epoch": 0.007652440084872517, "grad_norm": 4.46875, "learning_rate": 0.0005092592592592593, "loss": 3.576, "step": 110 }, { "epoch": 0.007722007722007722, "grad_norm": 3.140625, "learning_rate": 0.0005138888888888888, "loss": 3.2224, "step": 111 }, { "epoch": 0.007791575359142927, "grad_norm": 3.84375, "learning_rate": 0.0005185185185185185, "loss": 3.5271, "step": 112 }, { "epoch": 0.007861142996278131, "grad_norm": 5.6875, "learning_rate": 0.0005231481481481481, "loss": 3.5304, "step": 113 }, { "epoch": 0.007930710633413336, "grad_norm": 3.609375, "learning_rate": 0.0005277777777777778, "loss": 3.4794, "step": 114 }, { "epoch": 0.008000278270548541, "grad_norm": 2.921875, "learning_rate": 0.0005324074074074074, "loss": 3.7191, "step": 115 }, { "epoch": 0.008069845907683745, "grad_norm": 3.328125, "learning_rate": 0.0005370370370370371, "loss": 3.5999, "step": 116 }, { "epoch": 0.00813941354481895, "grad_norm": 3.828125, "learning_rate": 0.0005416666666666666, "loss": 3.5959, "step": 117 }, { "epoch": 0.008208981181954155, "grad_norm": 2.484375, "learning_rate": 0.0005462962962962962, "loss": 3.5367, "step": 118 }, { "epoch": 0.008278548819089359, "grad_norm": 2.96875, "learning_rate": 0.000550925925925926, "loss": 3.5518, "step": 119 }, { "epoch": 0.008348116456224564, "grad_norm": 3.546875, "learning_rate": 0.0005555555555555556, "loss": 3.5799, "step": 120 }, { "epoch": 0.00841768409335977, "grad_norm": 3.25, "learning_rate": 0.0005601851851851853, "loss": 3.4259, "step": 121 }, { "epoch": 0.008487251730494973, "grad_norm": 3.078125, "learning_rate": 0.0005648148148148148, "loss": 3.1868, "step": 122 }, { "epoch": 0.008556819367630178, "grad_norm": 2.609375, "learning_rate": 0.0005694444444444445, "loss": 3.6312, "step": 123 }, { "epoch": 0.008626387004765384, "grad_norm": 3.421875, "learning_rate": 0.0005740740740740741, "loss": 3.5673, "step": 124 }, { "epoch": 0.008695954641900587, "grad_norm": 3.5, "learning_rate": 0.0005787037037037038, "loss": 3.3845, "step": 125 }, { "epoch": 0.008765522279035792, "grad_norm": 3.46875, "learning_rate": 0.0005833333333333334, "loss": 3.5458, "step": 126 }, { "epoch": 0.008835089916170998, "grad_norm": 3.0, "learning_rate": 0.0005879629629629629, "loss": 3.3844, "step": 127 }, { "epoch": 0.008904657553306203, "grad_norm": 2.921875, "learning_rate": 0.0005925925925925926, "loss": 3.4753, "step": 128 }, { "epoch": 0.008974225190441406, "grad_norm": 2.75, "learning_rate": 0.0005972222222222222, "loss": 3.7892, "step": 129 }, { "epoch": 0.009043792827576612, "grad_norm": 3.234375, "learning_rate": 0.0006018518518518519, "loss": 3.3437, "step": 130 }, { "epoch": 0.009113360464711817, "grad_norm": 3.171875, "learning_rate": 0.0006064814814814815, "loss": 3.3362, "step": 131 }, { "epoch": 0.00918292810184702, "grad_norm": 5.875, "learning_rate": 0.0006111111111111112, "loss": 3.806, "step": 132 }, { "epoch": 0.009252495738982226, "grad_norm": 4.65625, "learning_rate": 0.0006157407407407407, "loss": 3.3642, "step": 133 }, { "epoch": 0.00932206337611743, "grad_norm": 4.8125, "learning_rate": 0.0006203703703703704, "loss": 3.4821, "step": 134 }, { "epoch": 0.009391631013252634, "grad_norm": 5.03125, "learning_rate": 0.000625, "loss": 3.5422, "step": 135 }, { "epoch": 0.00946119865038784, "grad_norm": 3.796875, "learning_rate": 0.0006296296296296296, "loss": 3.7212, "step": 136 }, { "epoch": 0.009530766287523045, "grad_norm": 3.234375, "learning_rate": 0.0006342592592592593, "loss": 3.7358, "step": 137 }, { "epoch": 0.009600333924658248, "grad_norm": 3.125, "learning_rate": 0.0006388888888888888, "loss": 3.2124, "step": 138 }, { "epoch": 0.009669901561793454, "grad_norm": 2.34375, "learning_rate": 0.0006435185185185185, "loss": 3.5355, "step": 139 }, { "epoch": 0.009739469198928659, "grad_norm": 3.828125, "learning_rate": 0.0006481481481481481, "loss": 2.8285, "step": 140 }, { "epoch": 0.009809036836063862, "grad_norm": 2.265625, "learning_rate": 0.0006527777777777778, "loss": 3.2107, "step": 141 }, { "epoch": 0.009878604473199068, "grad_norm": 2.671875, "learning_rate": 0.0006574074074074074, "loss": 3.6405, "step": 142 }, { "epoch": 0.009948172110334273, "grad_norm": 2.734375, "learning_rate": 0.0006620370370370372, "loss": 3.77, "step": 143 }, { "epoch": 0.010017739747469476, "grad_norm": 5.5625, "learning_rate": 0.0006666666666666666, "loss": 3.5891, "step": 144 }, { "epoch": 0.010087307384604682, "grad_norm": 4.1875, "learning_rate": 0.0006712962962962962, "loss": 3.6008, "step": 145 }, { "epoch": 0.010156875021739887, "grad_norm": 2.4375, "learning_rate": 0.000675925925925926, "loss": 3.3318, "step": 146 }, { "epoch": 0.010226442658875092, "grad_norm": 3.75, "learning_rate": 0.0006805555555555556, "loss": 3.635, "step": 147 }, { "epoch": 0.010296010296010296, "grad_norm": 3.59375, "learning_rate": 0.0006851851851851853, "loss": 3.6206, "step": 148 }, { "epoch": 0.010365577933145501, "grad_norm": 3.5625, "learning_rate": 0.0006898148148148148, "loss": 3.5804, "step": 149 }, { "epoch": 0.010435145570280706, "grad_norm": 2.46875, "learning_rate": 0.0006944444444444445, "loss": 3.4559, "step": 150 }, { "epoch": 0.01050471320741591, "grad_norm": 2.859375, "learning_rate": 0.0006990740740740741, "loss": 3.2063, "step": 151 }, { "epoch": 0.010574280844551115, "grad_norm": 2.96875, "learning_rate": 0.0007037037037037038, "loss": 3.746, "step": 152 }, { "epoch": 0.01064384848168632, "grad_norm": 5.75, "learning_rate": 0.0007083333333333334, "loss": 2.9674, "step": 153 }, { "epoch": 0.010713416118821524, "grad_norm": 2.65625, "learning_rate": 0.0007129629629629629, "loss": 3.7912, "step": 154 }, { "epoch": 0.010782983755956729, "grad_norm": 2.90625, "learning_rate": 0.0007175925925925926, "loss": 3.2516, "step": 155 }, { "epoch": 0.010852551393091934, "grad_norm": 3.1875, "learning_rate": 0.0007222222222222222, "loss": 3.3805, "step": 156 }, { "epoch": 0.010922119030227138, "grad_norm": 3.96875, "learning_rate": 0.0007268518518518519, "loss": 3.542, "step": 157 }, { "epoch": 0.010991686667362343, "grad_norm": 3.328125, "learning_rate": 0.0007314814814814815, "loss": 3.4749, "step": 158 }, { "epoch": 0.011061254304497548, "grad_norm": 2.28125, "learning_rate": 0.0007361111111111112, "loss": 3.5944, "step": 159 }, { "epoch": 0.011130821941632752, "grad_norm": 3.25, "learning_rate": 0.0007407407407407407, "loss": 3.2803, "step": 160 }, { "epoch": 0.011200389578767957, "grad_norm": 3.453125, "learning_rate": 0.0007453703703703704, "loss": 3.3337, "step": 161 }, { "epoch": 0.011269957215903162, "grad_norm": 2.515625, "learning_rate": 0.00075, "loss": 3.5694, "step": 162 }, { "epoch": 0.011339524853038366, "grad_norm": 3.921875, "learning_rate": 0.0007546296296296296, "loss": 3.3504, "step": 163 }, { "epoch": 0.011409092490173571, "grad_norm": 4.65625, "learning_rate": 0.0007592592592592593, "loss": 3.3871, "step": 164 }, { "epoch": 0.011478660127308776, "grad_norm": 3.703125, "learning_rate": 0.0007638888888888888, "loss": 3.5259, "step": 165 }, { "epoch": 0.011548227764443982, "grad_norm": 2.796875, "learning_rate": 0.0007685185185185185, "loss": 3.2406, "step": 166 }, { "epoch": 0.011617795401579185, "grad_norm": 4.03125, "learning_rate": 0.0007731481481481481, "loss": 3.4967, "step": 167 }, { "epoch": 0.01168736303871439, "grad_norm": 2.90625, "learning_rate": 0.0007777777777777778, "loss": 3.6256, "step": 168 }, { "epoch": 0.011756930675849596, "grad_norm": 2.21875, "learning_rate": 0.0007824074074074074, "loss": 3.4215, "step": 169 }, { "epoch": 0.011826498312984799, "grad_norm": 2.796875, "learning_rate": 0.0007870370370370372, "loss": 3.2696, "step": 170 }, { "epoch": 0.011896065950120004, "grad_norm": 3.109375, "learning_rate": 0.0007916666666666666, "loss": 3.34, "step": 171 }, { "epoch": 0.01196563358725521, "grad_norm": 2.25, "learning_rate": 0.0007962962962962962, "loss": 3.4738, "step": 172 }, { "epoch": 0.012035201224390413, "grad_norm": 2.484375, "learning_rate": 0.000800925925925926, "loss": 3.5333, "step": 173 }, { "epoch": 0.012104768861525618, "grad_norm": 3.453125, "learning_rate": 0.0008055555555555556, "loss": 3.6269, "step": 174 }, { "epoch": 0.012174336498660824, "grad_norm": 2.546875, "learning_rate": 0.0008101851851851853, "loss": 3.3796, "step": 175 }, { "epoch": 0.012243904135796027, "grad_norm": 4.71875, "learning_rate": 0.0008148148148148148, "loss": 3.7743, "step": 176 }, { "epoch": 0.012313471772931232, "grad_norm": 3.078125, "learning_rate": 0.0008194444444444445, "loss": 3.494, "step": 177 }, { "epoch": 0.012383039410066438, "grad_norm": 3.328125, "learning_rate": 0.0008240740740740741, "loss": 3.4347, "step": 178 }, { "epoch": 0.012452607047201641, "grad_norm": 3.015625, "learning_rate": 0.0008287037037037038, "loss": 3.0524, "step": 179 }, { "epoch": 0.012522174684336846, "grad_norm": 2.046875, "learning_rate": 0.0008333333333333334, "loss": 3.4803, "step": 180 }, { "epoch": 0.012591742321472052, "grad_norm": 3.234375, "learning_rate": 0.0008379629629629629, "loss": 3.4848, "step": 181 }, { "epoch": 0.012661309958607255, "grad_norm": 3.28125, "learning_rate": 0.0008425925925925926, "loss": 3.5803, "step": 182 }, { "epoch": 0.01273087759574246, "grad_norm": 3.015625, "learning_rate": 0.0008472222222222222, "loss": 3.1998, "step": 183 }, { "epoch": 0.012800445232877666, "grad_norm": 2.78125, "learning_rate": 0.0008518518518518519, "loss": 4.02, "step": 184 }, { "epoch": 0.01287001287001287, "grad_norm": 3.46875, "learning_rate": 0.0008564814814814815, "loss": 3.5389, "step": 185 }, { "epoch": 0.012939580507148074, "grad_norm": 2.3125, "learning_rate": 0.0008611111111111112, "loss": 3.6485, "step": 186 }, { "epoch": 0.01300914814428328, "grad_norm": 2.390625, "learning_rate": 0.0008657407407407407, "loss": 3.6143, "step": 187 }, { "epoch": 0.013078715781418485, "grad_norm": 3.03125, "learning_rate": 0.0008703703703703704, "loss": 3.2083, "step": 188 }, { "epoch": 0.013148283418553688, "grad_norm": 3.0625, "learning_rate": 0.000875, "loss": 3.7757, "step": 189 }, { "epoch": 0.013217851055688894, "grad_norm": 3.0, "learning_rate": 0.0008796296296296296, "loss": 3.3781, "step": 190 }, { "epoch": 0.013287418692824099, "grad_norm": 2.8125, "learning_rate": 0.0008842592592592593, "loss": 3.5871, "step": 191 }, { "epoch": 0.013356986329959302, "grad_norm": 2.484375, "learning_rate": 0.0008888888888888888, "loss": 3.4771, "step": 192 }, { "epoch": 0.013426553967094508, "grad_norm": 2.875, "learning_rate": 0.0008935185185185185, "loss": 3.2491, "step": 193 }, { "epoch": 0.013496121604229713, "grad_norm": 7.21875, "learning_rate": 0.0008981481481481481, "loss": 3.3524, "step": 194 }, { "epoch": 0.013565689241364916, "grad_norm": 3.15625, "learning_rate": 0.0009027777777777778, "loss": 3.4881, "step": 195 }, { "epoch": 0.013635256878500122, "grad_norm": 2.921875, "learning_rate": 0.0009074074074074074, "loss": 3.3662, "step": 196 }, { "epoch": 0.013704824515635327, "grad_norm": 3.015625, "learning_rate": 0.0009120370370370372, "loss": 3.647, "step": 197 }, { "epoch": 0.01377439215277053, "grad_norm": 2.65625, "learning_rate": 0.0009166666666666666, "loss": 3.2356, "step": 198 }, { "epoch": 0.013843959789905736, "grad_norm": 2.28125, "learning_rate": 0.0009212962962962963, "loss": 3.5328, "step": 199 }, { "epoch": 0.013913527427040941, "grad_norm": 2.859375, "learning_rate": 0.000925925925925926, "loss": 3.576, "step": 200 }, { "epoch": 0.013983095064176145, "grad_norm": 3.0625, "learning_rate": 0.0009305555555555556, "loss": 3.3183, "step": 201 }, { "epoch": 0.01405266270131135, "grad_norm": 2.046875, "learning_rate": 0.0009351851851851853, "loss": 3.4818, "step": 202 }, { "epoch": 0.014122230338446555, "grad_norm": 2.1875, "learning_rate": 0.0009398148148148148, "loss": 3.5219, "step": 203 }, { "epoch": 0.014191797975581759, "grad_norm": 2.6875, "learning_rate": 0.0009444444444444445, "loss": 3.775, "step": 204 }, { "epoch": 0.014261365612716964, "grad_norm": 2.296875, "learning_rate": 0.0009490740740740741, "loss": 3.4462, "step": 205 }, { "epoch": 0.014330933249852169, "grad_norm": 2.359375, "learning_rate": 0.0009537037037037038, "loss": 3.4381, "step": 206 }, { "epoch": 0.014400500886987374, "grad_norm": 2.875, "learning_rate": 0.0009583333333333334, "loss": 3.0672, "step": 207 }, { "epoch": 0.014470068524122578, "grad_norm": 1.953125, "learning_rate": 0.0009629629629629629, "loss": 3.1428, "step": 208 }, { "epoch": 0.014539636161257783, "grad_norm": 2.078125, "learning_rate": 0.0009675925925925926, "loss": 3.4844, "step": 209 }, { "epoch": 0.014609203798392988, "grad_norm": 3.078125, "learning_rate": 0.0009722222222222222, "loss": 3.3521, "step": 210 }, { "epoch": 0.014678771435528192, "grad_norm": 3.109375, "learning_rate": 0.0009768518518518518, "loss": 3.4093, "step": 211 }, { "epoch": 0.014748339072663397, "grad_norm": 2.390625, "learning_rate": 0.0009814814814814816, "loss": 3.8329, "step": 212 }, { "epoch": 0.014817906709798602, "grad_norm": 3.28125, "learning_rate": 0.0009861111111111112, "loss": 3.2119, "step": 213 }, { "epoch": 0.014887474346933806, "grad_norm": 2.8125, "learning_rate": 0.0009907407407407408, "loss": 3.1916, "step": 214 }, { "epoch": 0.014957041984069011, "grad_norm": 5.3125, "learning_rate": 0.0009953703703703704, "loss": 3.7224, "step": 215 }, { "epoch": 0.015026609621204216, "grad_norm": 2.8125, "learning_rate": 0.001, "loss": 3.3947, "step": 216 }, { "epoch": 0.01509617725833942, "grad_norm": 4.40625, "learning_rate": 0.0010046296296296296, "loss": 3.8063, "step": 217 }, { "epoch": 0.015165744895474625, "grad_norm": 3.34375, "learning_rate": 0.0010092592592592592, "loss": 3.4035, "step": 218 }, { "epoch": 0.01523531253260983, "grad_norm": 2.921875, "learning_rate": 0.0010138888888888888, "loss": 3.2933, "step": 219 }, { "epoch": 0.015304880169745034, "grad_norm": 2.0625, "learning_rate": 0.0010185185185185186, "loss": 3.5955, "step": 220 }, { "epoch": 0.015374447806880239, "grad_norm": 3.265625, "learning_rate": 0.0010231481481481482, "loss": 3.4307, "step": 221 }, { "epoch": 0.015444015444015444, "grad_norm": 4.0625, "learning_rate": 0.0010277777777777776, "loss": 3.0483, "step": 222 }, { "epoch": 0.015513583081150648, "grad_norm": 2.3125, "learning_rate": 0.0010324074074074074, "loss": 3.3634, "step": 223 }, { "epoch": 0.015583150718285853, "grad_norm": 3.78125, "learning_rate": 0.001037037037037037, "loss": 3.6414, "step": 224 }, { "epoch": 0.015652718355421057, "grad_norm": 2.6875, "learning_rate": 0.0010416666666666669, "loss": 3.4195, "step": 225 }, { "epoch": 0.015722285992556262, "grad_norm": 1.75, "learning_rate": 0.0010462962962962963, "loss": 3.9841, "step": 226 }, { "epoch": 0.015791853629691467, "grad_norm": 2.625, "learning_rate": 0.0010509259259259259, "loss": 3.6758, "step": 227 }, { "epoch": 0.015861421266826672, "grad_norm": 2.53125, "learning_rate": 0.0010555555555555557, "loss": 3.6038, "step": 228 }, { "epoch": 0.015930988903961878, "grad_norm": 2.1875, "learning_rate": 0.001060185185185185, "loss": 3.7388, "step": 229 }, { "epoch": 0.016000556541097083, "grad_norm": 3.9375, "learning_rate": 0.0010648148148148149, "loss": 3.3876, "step": 230 }, { "epoch": 0.016070124178232285, "grad_norm": 4.1875, "learning_rate": 0.0010694444444444445, "loss": 3.3911, "step": 231 }, { "epoch": 0.01613969181536749, "grad_norm": 3.0, "learning_rate": 0.0010740740740740743, "loss": 3.7106, "step": 232 }, { "epoch": 0.016209259452502695, "grad_norm": 4.28125, "learning_rate": 0.0010787037037037037, "loss": 3.7648, "step": 233 }, { "epoch": 0.0162788270896379, "grad_norm": 2.28125, "learning_rate": 0.0010833333333333333, "loss": 3.1461, "step": 234 }, { "epoch": 0.016348394726773106, "grad_norm": 2.359375, "learning_rate": 0.001087962962962963, "loss": 3.7154, "step": 235 }, { "epoch": 0.01641796236390831, "grad_norm": 3.453125, "learning_rate": 0.0010925925925925925, "loss": 3.5562, "step": 236 }, { "epoch": 0.016487530001043516, "grad_norm": 3.296875, "learning_rate": 0.0010972222222222223, "loss": 3.2213, "step": 237 }, { "epoch": 0.016557097638178718, "grad_norm": 4.4375, "learning_rate": 0.001101851851851852, "loss": 3.7439, "step": 238 }, { "epoch": 0.016626665275313923, "grad_norm": 2.265625, "learning_rate": 0.0011064814814814815, "loss": 3.4636, "step": 239 }, { "epoch": 0.01669623291244913, "grad_norm": 2.84375, "learning_rate": 0.0011111111111111111, "loss": 3.4433, "step": 240 }, { "epoch": 0.016765800549584334, "grad_norm": 3.59375, "learning_rate": 0.0011157407407407407, "loss": 3.5306, "step": 241 }, { "epoch": 0.01683536818671954, "grad_norm": 4.6875, "learning_rate": 0.0011203703703703705, "loss": 3.5676, "step": 242 }, { "epoch": 0.016904935823854744, "grad_norm": 3.296875, "learning_rate": 0.0011250000000000001, "loss": 2.8527, "step": 243 }, { "epoch": 0.016974503460989946, "grad_norm": 1.78125, "learning_rate": 0.0011296296296296295, "loss": 3.7269, "step": 244 }, { "epoch": 0.01704407109812515, "grad_norm": 2.21875, "learning_rate": 0.0011342592592592593, "loss": 3.1405, "step": 245 }, { "epoch": 0.017113638735260357, "grad_norm": 2.09375, "learning_rate": 0.001138888888888889, "loss": 3.7365, "step": 246 }, { "epoch": 0.017183206372395562, "grad_norm": 2.796875, "learning_rate": 0.0011435185185185185, "loss": 2.9634, "step": 247 }, { "epoch": 0.017252774009530767, "grad_norm": 2.890625, "learning_rate": 0.0011481481481481481, "loss": 3.4298, "step": 248 }, { "epoch": 0.017322341646665972, "grad_norm": 3.09375, "learning_rate": 0.0011527777777777777, "loss": 3.5184, "step": 249 }, { "epoch": 0.017391909283801174, "grad_norm": 2.671875, "learning_rate": 0.0011574074074074076, "loss": 3.6976, "step": 250 }, { "epoch": 0.01746147692093638, "grad_norm": 2.609375, "learning_rate": 0.001162037037037037, "loss": 3.2592, "step": 251 }, { "epoch": 0.017531044558071585, "grad_norm": 2.328125, "learning_rate": 0.0011666666666666668, "loss": 3.491, "step": 252 }, { "epoch": 0.01760061219520679, "grad_norm": 2.328125, "learning_rate": 0.0011712962962962964, "loss": 3.6034, "step": 253 }, { "epoch": 0.017670179832341995, "grad_norm": 2.28125, "learning_rate": 0.0011759259259259257, "loss": 3.5464, "step": 254 }, { "epoch": 0.0177397474694772, "grad_norm": 2.84375, "learning_rate": 0.0011805555555555556, "loss": 3.6768, "step": 255 }, { "epoch": 0.017809315106612406, "grad_norm": 1.8359375, "learning_rate": 0.0011851851851851852, "loss": 3.2789, "step": 256 }, { "epoch": 0.017878882743747607, "grad_norm": 2.703125, "learning_rate": 0.001189814814814815, "loss": 3.4833, "step": 257 }, { "epoch": 0.017948450380882813, "grad_norm": 2.875, "learning_rate": 0.0011944444444444444, "loss": 3.3873, "step": 258 }, { "epoch": 0.018018018018018018, "grad_norm": 3.28125, "learning_rate": 0.0011990740740740742, "loss": 3.7072, "step": 259 }, { "epoch": 0.018087585655153223, "grad_norm": 2.765625, "learning_rate": 0.0012037037037037038, "loss": 2.9247, "step": 260 }, { "epoch": 0.01815715329228843, "grad_norm": 2.609375, "learning_rate": 0.0012083333333333332, "loss": 3.7827, "step": 261 }, { "epoch": 0.018226720929423634, "grad_norm": 2.875, "learning_rate": 0.001212962962962963, "loss": 3.6445, "step": 262 }, { "epoch": 0.018296288566558835, "grad_norm": 3.1875, "learning_rate": 0.0012175925925925926, "loss": 3.3478, "step": 263 }, { "epoch": 0.01836585620369404, "grad_norm": 228.0, "learning_rate": 0.0012222222222222224, "loss": 4.2626, "step": 264 }, { "epoch": 0.018435423840829246, "grad_norm": 2.9375, "learning_rate": 0.0012268518518518518, "loss": 3.646, "step": 265 }, { "epoch": 0.01850499147796445, "grad_norm": 2.8125, "learning_rate": 0.0012314814814814814, "loss": 3.4842, "step": 266 }, { "epoch": 0.018574559115099656, "grad_norm": 2.765625, "learning_rate": 0.0012361111111111112, "loss": 3.6565, "step": 267 }, { "epoch": 0.01864412675223486, "grad_norm": 3.109375, "learning_rate": 0.0012407407407407408, "loss": 3.2464, "step": 268 }, { "epoch": 0.018713694389370063, "grad_norm": 2.203125, "learning_rate": 0.0012453703703703704, "loss": 3.9116, "step": 269 }, { "epoch": 0.01878326202650527, "grad_norm": 4.15625, "learning_rate": 0.00125, "loss": 3.1034, "step": 270 }, { "epoch": 0.018852829663640474, "grad_norm": 3.53125, "learning_rate": 0.0012546296296296296, "loss": 3.4661, "step": 271 }, { "epoch": 0.01892239730077568, "grad_norm": 2.765625, "learning_rate": 0.0012592592592592592, "loss": 3.4787, "step": 272 }, { "epoch": 0.018991964937910884, "grad_norm": 2.8125, "learning_rate": 0.0012638888888888888, "loss": 3.4316, "step": 273 }, { "epoch": 0.01906153257504609, "grad_norm": 2.703125, "learning_rate": 0.0012685185185185186, "loss": 3.4963, "step": 274 }, { "epoch": 0.019131100212181295, "grad_norm": 2.46875, "learning_rate": 0.0012731481481481483, "loss": 3.8052, "step": 275 }, { "epoch": 0.019200667849316497, "grad_norm": 2.515625, "learning_rate": 0.0012777777777777776, "loss": 3.6617, "step": 276 }, { "epoch": 0.019270235486451702, "grad_norm": 3.3125, "learning_rate": 0.0012824074074074075, "loss": 3.0172, "step": 277 }, { "epoch": 0.019339803123586907, "grad_norm": 2.828125, "learning_rate": 0.001287037037037037, "loss": 3.6914, "step": 278 }, { "epoch": 0.019409370760722112, "grad_norm": 2.484375, "learning_rate": 0.0012916666666666669, "loss": 3.7064, "step": 279 }, { "epoch": 0.019478938397857318, "grad_norm": 2.53125, "learning_rate": 0.0012962962962962963, "loss": 3.1845, "step": 280 }, { "epoch": 0.019548506034992523, "grad_norm": 2.375, "learning_rate": 0.0013009259259259259, "loss": 3.29, "step": 281 }, { "epoch": 0.019618073672127725, "grad_norm": 2.421875, "learning_rate": 0.0013055555555555557, "loss": 3.5425, "step": 282 }, { "epoch": 0.01968764130926293, "grad_norm": 2.703125, "learning_rate": 0.001310185185185185, "loss": 3.0504, "step": 283 }, { "epoch": 0.019757208946398135, "grad_norm": 2.0, "learning_rate": 0.0013148148148148149, "loss": 3.6795, "step": 284 }, { "epoch": 0.01982677658353334, "grad_norm": 3.3125, "learning_rate": 0.0013194444444444445, "loss": 3.4279, "step": 285 }, { "epoch": 0.019896344220668546, "grad_norm": 2.53125, "learning_rate": 0.0013240740740740743, "loss": 3.3526, "step": 286 }, { "epoch": 0.01996591185780375, "grad_norm": 3.21875, "learning_rate": 0.0013287037037037037, "loss": 3.5018, "step": 287 }, { "epoch": 0.020035479494938953, "grad_norm": 2.453125, "learning_rate": 0.0013333333333333333, "loss": 3.1581, "step": 288 }, { "epoch": 0.020105047132074158, "grad_norm": 2.84375, "learning_rate": 0.001337962962962963, "loss": 3.0851, "step": 289 }, { "epoch": 0.020174614769209363, "grad_norm": 2.65625, "learning_rate": 0.0013425925925925925, "loss": 3.6954, "step": 290 }, { "epoch": 0.02024418240634457, "grad_norm": 2.40625, "learning_rate": 0.0013472222222222223, "loss": 3.5156, "step": 291 }, { "epoch": 0.020313750043479774, "grad_norm": 1.765625, "learning_rate": 0.001351851851851852, "loss": 3.3342, "step": 292 }, { "epoch": 0.02038331768061498, "grad_norm": 2.015625, "learning_rate": 0.0013564814814814815, "loss": 3.4297, "step": 293 }, { "epoch": 0.020452885317750184, "grad_norm": 2.59375, "learning_rate": 0.0013611111111111111, "loss": 3.461, "step": 294 }, { "epoch": 0.020522452954885386, "grad_norm": 2.4375, "learning_rate": 0.0013657407407407407, "loss": 3.9659, "step": 295 }, { "epoch": 0.02059202059202059, "grad_norm": 2.359375, "learning_rate": 0.0013703703703703705, "loss": 3.7741, "step": 296 }, { "epoch": 0.020661588229155797, "grad_norm": 1.75, "learning_rate": 0.001375, "loss": 3.4807, "step": 297 }, { "epoch": 0.020731155866291002, "grad_norm": 1.8125, "learning_rate": 0.0013796296296296295, "loss": 3.4547, "step": 298 }, { "epoch": 0.020800723503426207, "grad_norm": 3.125, "learning_rate": 0.0013842592592592593, "loss": 3.5839, "step": 299 }, { "epoch": 0.020870291140561412, "grad_norm": 2.328125, "learning_rate": 0.001388888888888889, "loss": 3.4602, "step": 300 }, { "epoch": 0.020939858777696614, "grad_norm": 2.71875, "learning_rate": 0.0013935185185185185, "loss": 3.0079, "step": 301 }, { "epoch": 0.02100942641483182, "grad_norm": 3.0, "learning_rate": 0.0013981481481481481, "loss": 3.4029, "step": 302 }, { "epoch": 0.021078994051967025, "grad_norm": 2.890625, "learning_rate": 0.0014027777777777777, "loss": 3.3146, "step": 303 }, { "epoch": 0.02114856168910223, "grad_norm": 2.46875, "learning_rate": 0.0014074074074074076, "loss": 3.468, "step": 304 }, { "epoch": 0.021218129326237435, "grad_norm": 2.46875, "learning_rate": 0.001412037037037037, "loss": 3.3304, "step": 305 }, { "epoch": 0.02128769696337264, "grad_norm": 1.640625, "learning_rate": 0.0014166666666666668, "loss": 3.6421, "step": 306 }, { "epoch": 0.021357264600507842, "grad_norm": 3.0, "learning_rate": 0.0014212962962962964, "loss": 2.9303, "step": 307 }, { "epoch": 0.021426832237643047, "grad_norm": 2.171875, "learning_rate": 0.0014259259259259258, "loss": 3.2054, "step": 308 }, { "epoch": 0.021496399874778253, "grad_norm": 1.90625, "learning_rate": 0.0014305555555555556, "loss": 3.7044, "step": 309 }, { "epoch": 0.021565967511913458, "grad_norm": 2.046875, "learning_rate": 0.0014351851851851852, "loss": 3.7959, "step": 310 }, { "epoch": 0.021635535149048663, "grad_norm": 1.890625, "learning_rate": 0.001439814814814815, "loss": 3.2365, "step": 311 }, { "epoch": 0.02170510278618387, "grad_norm": 2.328125, "learning_rate": 0.0014444444444444444, "loss": 3.5516, "step": 312 }, { "epoch": 0.021774670423319074, "grad_norm": 2.421875, "learning_rate": 0.0014490740740740742, "loss": 3.3478, "step": 313 }, { "epoch": 0.021844238060454275, "grad_norm": 3.625, "learning_rate": 0.0014537037037037038, "loss": 2.91, "step": 314 }, { "epoch": 0.02191380569758948, "grad_norm": 2.8125, "learning_rate": 0.0014583333333333332, "loss": 3.3708, "step": 315 }, { "epoch": 0.021983373334724686, "grad_norm": 3.265625, "learning_rate": 0.001462962962962963, "loss": 3.5365, "step": 316 }, { "epoch": 0.02205294097185989, "grad_norm": 2.09375, "learning_rate": 0.0014675925925925926, "loss": 3.3751, "step": 317 }, { "epoch": 0.022122508608995096, "grad_norm": 2.515625, "learning_rate": 0.0014722222222222224, "loss": 3.3161, "step": 318 }, { "epoch": 0.0221920762461303, "grad_norm": 3.265625, "learning_rate": 0.0014768518518518518, "loss": 3.3045, "step": 319 }, { "epoch": 0.022261643883265504, "grad_norm": 1.953125, "learning_rate": 0.0014814814814814814, "loss": 3.5489, "step": 320 }, { "epoch": 0.02233121152040071, "grad_norm": 3.015625, "learning_rate": 0.0014861111111111112, "loss": 3.5659, "step": 321 }, { "epoch": 0.022400779157535914, "grad_norm": 3.34375, "learning_rate": 0.0014907407407407408, "loss": 3.2205, "step": 322 }, { "epoch": 0.02247034679467112, "grad_norm": 2.640625, "learning_rate": 0.0014953703703703704, "loss": 3.3073, "step": 323 }, { "epoch": 0.022539914431806325, "grad_norm": 2.171875, "learning_rate": 0.0015, "loss": 3.6127, "step": 324 }, { "epoch": 0.02260948206894153, "grad_norm": 2.703125, "learning_rate": 0.0015046296296296296, "loss": 3.2646, "step": 325 }, { "epoch": 0.02267904970607673, "grad_norm": 2.203125, "learning_rate": 0.0015092592592592592, "loss": 3.4497, "step": 326 }, { "epoch": 0.022748617343211937, "grad_norm": 2.796875, "learning_rate": 0.0015138888888888888, "loss": 3.2017, "step": 327 }, { "epoch": 0.022818184980347142, "grad_norm": 2.5, "learning_rate": 0.0015185185185185187, "loss": 3.5135, "step": 328 }, { "epoch": 0.022887752617482347, "grad_norm": 2.4375, "learning_rate": 0.0015231481481481483, "loss": 3.2238, "step": 329 }, { "epoch": 0.022957320254617553, "grad_norm": 2.0, "learning_rate": 0.0015277777777777776, "loss": 2.8799, "step": 330 }, { "epoch": 0.023026887891752758, "grad_norm": 2.703125, "learning_rate": 0.0015324074074074075, "loss": 3.4803, "step": 331 }, { "epoch": 0.023096455528887963, "grad_norm": 2.4375, "learning_rate": 0.001537037037037037, "loss": 3.3901, "step": 332 }, { "epoch": 0.023166023166023165, "grad_norm": 2.25, "learning_rate": 0.0015416666666666669, "loss": 3.2674, "step": 333 }, { "epoch": 0.02323559080315837, "grad_norm": 2.09375, "learning_rate": 0.0015462962962962963, "loss": 3.6683, "step": 334 }, { "epoch": 0.023305158440293575, "grad_norm": 3.0625, "learning_rate": 0.0015509259259259259, "loss": 3.4703, "step": 335 }, { "epoch": 0.02337472607742878, "grad_norm": 1.65625, "learning_rate": 0.0015555555555555557, "loss": 3.2555, "step": 336 }, { "epoch": 0.023444293714563986, "grad_norm": 2.21875, "learning_rate": 0.001560185185185185, "loss": 3.6211, "step": 337 }, { "epoch": 0.02351386135169919, "grad_norm": 2.421875, "learning_rate": 0.0015648148148148149, "loss": 3.5455, "step": 338 }, { "epoch": 0.023583428988834393, "grad_norm": 2.25, "learning_rate": 0.0015694444444444445, "loss": 3.6748, "step": 339 }, { "epoch": 0.023652996625969598, "grad_norm": 1.546875, "learning_rate": 0.0015740740740740743, "loss": 3.453, "step": 340 }, { "epoch": 0.023722564263104803, "grad_norm": 1.9921875, "learning_rate": 0.0015787037037037037, "loss": 3.3692, "step": 341 }, { "epoch": 0.02379213190024001, "grad_norm": 2.53125, "learning_rate": 0.0015833333333333333, "loss": 2.9392, "step": 342 }, { "epoch": 0.023861699537375214, "grad_norm": 1.96875, "learning_rate": 0.0015879629629629631, "loss": 3.5069, "step": 343 }, { "epoch": 0.02393126717451042, "grad_norm": 1.59375, "learning_rate": 0.0015925925925925925, "loss": 3.5041, "step": 344 }, { "epoch": 0.02400083481164562, "grad_norm": 2.25, "learning_rate": 0.0015972222222222223, "loss": 3.8914, "step": 345 }, { "epoch": 0.024070402448780826, "grad_norm": 2.328125, "learning_rate": 0.001601851851851852, "loss": 3.3932, "step": 346 }, { "epoch": 0.02413997008591603, "grad_norm": 2.609375, "learning_rate": 0.0016064814814814815, "loss": 3.2436, "step": 347 }, { "epoch": 0.024209537723051237, "grad_norm": 2.5625, "learning_rate": 0.0016111111111111111, "loss": 3.4229, "step": 348 }, { "epoch": 0.024279105360186442, "grad_norm": 2.109375, "learning_rate": 0.0016157407407407407, "loss": 3.5363, "step": 349 }, { "epoch": 0.024348672997321647, "grad_norm": 3.71875, "learning_rate": 0.0016203703703703705, "loss": 3.1235, "step": 350 }, { "epoch": 0.02441824063445685, "grad_norm": 3.140625, "learning_rate": 0.0016250000000000001, "loss": 3.0492, "step": 351 }, { "epoch": 0.024487808271592054, "grad_norm": 2.09375, "learning_rate": 0.0016296296296296295, "loss": 3.3601, "step": 352 }, { "epoch": 0.02455737590872726, "grad_norm": 2.03125, "learning_rate": 0.0016342592592592593, "loss": 4.0213, "step": 353 }, { "epoch": 0.024626943545862465, "grad_norm": 6.1875, "learning_rate": 0.001638888888888889, "loss": 3.2648, "step": 354 }, { "epoch": 0.02469651118299767, "grad_norm": 2.875, "learning_rate": 0.0016435185185185185, "loss": 3.5466, "step": 355 }, { "epoch": 0.024766078820132875, "grad_norm": 2.125, "learning_rate": 0.0016481481481481482, "loss": 3.611, "step": 356 }, { "epoch": 0.02483564645726808, "grad_norm": 2.515625, "learning_rate": 0.0016527777777777778, "loss": 3.8875, "step": 357 }, { "epoch": 0.024905214094403282, "grad_norm": 3.03125, "learning_rate": 0.0016574074074074076, "loss": 3.0971, "step": 358 }, { "epoch": 0.024974781731538487, "grad_norm": 2.078125, "learning_rate": 0.001662037037037037, "loss": 3.4832, "step": 359 }, { "epoch": 0.025044349368673693, "grad_norm": 1.890625, "learning_rate": 0.0016666666666666668, "loss": 3.4558, "step": 360 }, { "epoch": 0.025113917005808898, "grad_norm": 2.109375, "learning_rate": 0.0016712962962962964, "loss": 3.7369, "step": 361 }, { "epoch": 0.025183484642944103, "grad_norm": 3.15625, "learning_rate": 0.0016759259259259258, "loss": 3.5278, "step": 362 }, { "epoch": 0.02525305228007931, "grad_norm": 2.875, "learning_rate": 0.0016805555555555556, "loss": 3.575, "step": 363 }, { "epoch": 0.02532261991721451, "grad_norm": 2.34375, "learning_rate": 0.0016851851851851852, "loss": 3.7479, "step": 364 }, { "epoch": 0.025392187554349716, "grad_norm": 3.015625, "learning_rate": 0.001689814814814815, "loss": 3.2374, "step": 365 }, { "epoch": 0.02546175519148492, "grad_norm": 2.765625, "learning_rate": 0.0016944444444444444, "loss": 3.479, "step": 366 }, { "epoch": 0.025531322828620126, "grad_norm": 2.5625, "learning_rate": 0.0016990740740740742, "loss": 3.6505, "step": 367 }, { "epoch": 0.02560089046575533, "grad_norm": 1.6953125, "learning_rate": 0.0017037037037037038, "loss": 3.5518, "step": 368 }, { "epoch": 0.025670458102890537, "grad_norm": 1.8203125, "learning_rate": 0.0017083333333333332, "loss": 3.5959, "step": 369 }, { "epoch": 0.02574002574002574, "grad_norm": 1.9140625, "learning_rate": 0.001712962962962963, "loss": 3.6952, "step": 370 }, { "epoch": 0.025809593377160944, "grad_norm": 2.09375, "learning_rate": 0.0017175925925925926, "loss": 3.4196, "step": 371 }, { "epoch": 0.02587916101429615, "grad_norm": 2.109375, "learning_rate": 0.0017222222222222224, "loss": 3.3174, "step": 372 }, { "epoch": 0.025948728651431354, "grad_norm": 1.7734375, "learning_rate": 0.0017268518518518518, "loss": 3.5334, "step": 373 }, { "epoch": 0.02601829628856656, "grad_norm": 2.515625, "learning_rate": 0.0017314814814814814, "loss": 3.6951, "step": 374 }, { "epoch": 0.026087863925701765, "grad_norm": 2.125, "learning_rate": 0.0017361111111111112, "loss": 3.0378, "step": 375 }, { "epoch": 0.02615743156283697, "grad_norm": 1.9921875, "learning_rate": 0.0017407407407407408, "loss": 3.1484, "step": 376 }, { "epoch": 0.02622699919997217, "grad_norm": 2.640625, "learning_rate": 0.0017453703703703704, "loss": 3.3457, "step": 377 }, { "epoch": 0.026296566837107377, "grad_norm": 1.96875, "learning_rate": 0.00175, "loss": 3.4347, "step": 378 }, { "epoch": 0.026366134474242582, "grad_norm": 1.6328125, "learning_rate": 0.0017546296296296296, "loss": 3.3564, "step": 379 }, { "epoch": 0.026435702111377787, "grad_norm": 3.046875, "learning_rate": 0.0017592592592592592, "loss": 3.223, "step": 380 }, { "epoch": 0.026505269748512993, "grad_norm": 1.53125, "learning_rate": 0.0017638888888888888, "loss": 3.593, "step": 381 }, { "epoch": 0.026574837385648198, "grad_norm": 1.5234375, "learning_rate": 0.0017685185185185187, "loss": 3.6302, "step": 382 }, { "epoch": 0.0266444050227834, "grad_norm": 1.875, "learning_rate": 0.0017731481481481483, "loss": 3.5909, "step": 383 }, { "epoch": 0.026713972659918605, "grad_norm": 2.03125, "learning_rate": 0.0017777777777777776, "loss": 3.5074, "step": 384 }, { "epoch": 0.02678354029705381, "grad_norm": 1.390625, "learning_rate": 0.0017824074074074075, "loss": 3.8452, "step": 385 }, { "epoch": 0.026853107934189015, "grad_norm": 1.640625, "learning_rate": 0.001787037037037037, "loss": 3.5292, "step": 386 }, { "epoch": 0.02692267557132422, "grad_norm": 1.5546875, "learning_rate": 0.0017916666666666669, "loss": 3.3985, "step": 387 }, { "epoch": 0.026992243208459426, "grad_norm": 1.453125, "learning_rate": 0.0017962962962962963, "loss": 3.6987, "step": 388 }, { "epoch": 0.027061810845594628, "grad_norm": 2.9375, "learning_rate": 0.0018009259259259259, "loss": 3.6334, "step": 389 }, { "epoch": 0.027131378482729833, "grad_norm": 1.6015625, "learning_rate": 0.0018055555555555557, "loss": 3.3575, "step": 390 }, { "epoch": 0.027200946119865038, "grad_norm": 1.953125, "learning_rate": 0.001810185185185185, "loss": 3.7009, "step": 391 }, { "epoch": 0.027270513757000243, "grad_norm": 1.5078125, "learning_rate": 0.001814814814814815, "loss": 3.602, "step": 392 }, { "epoch": 0.02734008139413545, "grad_norm": 1.6953125, "learning_rate": 0.0018194444444444445, "loss": 3.6099, "step": 393 }, { "epoch": 0.027409649031270654, "grad_norm": 2.0625, "learning_rate": 0.0018240740740740743, "loss": 3.3514, "step": 394 }, { "epoch": 0.02747921666840586, "grad_norm": 1.5078125, "learning_rate": 0.0018287037037037037, "loss": 3.6098, "step": 395 }, { "epoch": 0.02754878430554106, "grad_norm": 2.03125, "learning_rate": 0.0018333333333333333, "loss": 3.5332, "step": 396 }, { "epoch": 0.027618351942676266, "grad_norm": 2.875, "learning_rate": 0.0018379629629629631, "loss": 3.3918, "step": 397 }, { "epoch": 0.02768791957981147, "grad_norm": 1.828125, "learning_rate": 0.0018425925925925925, "loss": 3.2549, "step": 398 }, { "epoch": 0.027757487216946677, "grad_norm": 2.921875, "learning_rate": 0.0018472222222222223, "loss": 3.378, "step": 399 }, { "epoch": 0.027827054854081882, "grad_norm": 1.7421875, "learning_rate": 0.001851851851851852, "loss": 3.3514, "step": 400 }, { "epoch": 0.027896622491217087, "grad_norm": 1.875, "learning_rate": 0.0018564814814814815, "loss": 3.163, "step": 401 }, { "epoch": 0.02796619012835229, "grad_norm": 1.5859375, "learning_rate": 0.0018611111111111111, "loss": 3.3832, "step": 402 }, { "epoch": 0.028035757765487494, "grad_norm": 1.296875, "learning_rate": 0.0018657407407407407, "loss": 3.3657, "step": 403 }, { "epoch": 0.0281053254026227, "grad_norm": 2.15625, "learning_rate": 0.0018703703703703705, "loss": 3.7129, "step": 404 }, { "epoch": 0.028174893039757905, "grad_norm": 1.609375, "learning_rate": 0.001875, "loss": 3.3413, "step": 405 }, { "epoch": 0.02824446067689311, "grad_norm": 2.796875, "learning_rate": 0.0018796296296296295, "loss": 3.5818, "step": 406 }, { "epoch": 0.028314028314028315, "grad_norm": 2.015625, "learning_rate": 0.0018842592592592594, "loss": 3.2641, "step": 407 }, { "epoch": 0.028383595951163517, "grad_norm": 2.3125, "learning_rate": 0.001888888888888889, "loss": 3.4429, "step": 408 }, { "epoch": 0.028453163588298722, "grad_norm": 2.0, "learning_rate": 0.0018935185185185186, "loss": 3.2141, "step": 409 }, { "epoch": 0.028522731225433928, "grad_norm": 1.8515625, "learning_rate": 0.0018981481481481482, "loss": 3.4574, "step": 410 }, { "epoch": 0.028592298862569133, "grad_norm": 1.734375, "learning_rate": 0.0019027777777777778, "loss": 3.148, "step": 411 }, { "epoch": 0.028661866499704338, "grad_norm": 1.9609375, "learning_rate": 0.0019074074074074076, "loss": 3.7982, "step": 412 }, { "epoch": 0.028731434136839543, "grad_norm": 1.6171875, "learning_rate": 0.001912037037037037, "loss": 3.3567, "step": 413 }, { "epoch": 0.02880100177397475, "grad_norm": 3.046875, "learning_rate": 0.0019166666666666668, "loss": 3.6453, "step": 414 }, { "epoch": 0.02887056941110995, "grad_norm": 2.1875, "learning_rate": 0.0019212962962962964, "loss": 3.4031, "step": 415 }, { "epoch": 0.028940137048245156, "grad_norm": 1.6796875, "learning_rate": 0.0019259259259259258, "loss": 3.6185, "step": 416 }, { "epoch": 0.02900970468538036, "grad_norm": 1.8828125, "learning_rate": 0.0019305555555555556, "loss": 3.0648, "step": 417 }, { "epoch": 0.029079272322515566, "grad_norm": 1.7734375, "learning_rate": 0.0019351851851851852, "loss": 3.3076, "step": 418 }, { "epoch": 0.02914883995965077, "grad_norm": 2.0, "learning_rate": 0.001939814814814815, "loss": 3.5246, "step": 419 }, { "epoch": 0.029218407596785977, "grad_norm": 2.921875, "learning_rate": 0.0019444444444444444, "loss": 3.6498, "step": 420 }, { "epoch": 0.02928797523392118, "grad_norm": 2.828125, "learning_rate": 0.0019490740740740742, "loss": 3.2355, "step": 421 }, { "epoch": 0.029357542871056384, "grad_norm": 2.8125, "learning_rate": 0.0019537037037037036, "loss": 3.9044, "step": 422 }, { "epoch": 0.02942711050819159, "grad_norm": 2.390625, "learning_rate": 0.001958333333333333, "loss": 3.5943, "step": 423 }, { "epoch": 0.029496678145326794, "grad_norm": 1.6640625, "learning_rate": 0.0019629629629629632, "loss": 3.7883, "step": 424 }, { "epoch": 0.029566245782462, "grad_norm": 1.8203125, "learning_rate": 0.0019675925925925924, "loss": 3.5543, "step": 425 }, { "epoch": 0.029635813419597205, "grad_norm": 1.5, "learning_rate": 0.0019722222222222224, "loss": 3.6355, "step": 426 }, { "epoch": 0.029705381056732406, "grad_norm": 1.6796875, "learning_rate": 0.001976851851851852, "loss": 3.1634, "step": 427 }, { "epoch": 0.02977494869386761, "grad_norm": 2.0625, "learning_rate": 0.0019814814814814816, "loss": 3.2555, "step": 428 }, { "epoch": 0.029844516331002817, "grad_norm": 2.421875, "learning_rate": 0.0019861111111111112, "loss": 3.1424, "step": 429 }, { "epoch": 0.029914083968138022, "grad_norm": 2.15625, "learning_rate": 0.001990740740740741, "loss": 3.5279, "step": 430 }, { "epoch": 0.029983651605273227, "grad_norm": 3.203125, "learning_rate": 0.0019953703703703704, "loss": 3.0176, "step": 431 }, { "epoch": 0.030053219242408433, "grad_norm": 3.03125, "learning_rate": 0.002, "loss": 3.1193, "step": 432 }, { "epoch": 0.030122786879543638, "grad_norm": 2.828125, "learning_rate": 0.00199999997461252, "loss": 3.2754, "step": 433 }, { "epoch": 0.03019235451667884, "grad_norm": 2.125, "learning_rate": 0.0019999998984500823, "loss": 3.5648, "step": 434 }, { "epoch": 0.030261922153814045, "grad_norm": 2.125, "learning_rate": 0.0019999997715126894, "loss": 3.3541, "step": 435 }, { "epoch": 0.03033148979094925, "grad_norm": 2.078125, "learning_rate": 0.001999999593800349, "loss": 3.3309, "step": 436 }, { "epoch": 0.030401057428084455, "grad_norm": 2.890625, "learning_rate": 0.001999999365313069, "loss": 3.3605, "step": 437 }, { "epoch": 0.03047062506521966, "grad_norm": 1.921875, "learning_rate": 0.0019999990860508623, "loss": 3.3191, "step": 438 }, { "epoch": 0.030540192702354866, "grad_norm": 2.25, "learning_rate": 0.001999998756013742, "loss": 3.1285, "step": 439 }, { "epoch": 0.030609760339490068, "grad_norm": 2.984375, "learning_rate": 0.001999998375201725, "loss": 3.0919, "step": 440 }, { "epoch": 0.030679327976625273, "grad_norm": 1.6328125, "learning_rate": 0.001999997943614831, "loss": 3.4453, "step": 441 }, { "epoch": 0.030748895613760478, "grad_norm": 1.78125, "learning_rate": 0.001999997461253082, "loss": 3.4458, "step": 442 }, { "epoch": 0.030818463250895684, "grad_norm": 2.3125, "learning_rate": 0.001999996928116502, "loss": 3.7504, "step": 443 }, { "epoch": 0.03088803088803089, "grad_norm": 1.6171875, "learning_rate": 0.001999996344205119, "loss": 3.333, "step": 444 }, { "epoch": 0.030957598525166094, "grad_norm": 1.9921875, "learning_rate": 0.0019999957095189615, "loss": 3.4064, "step": 445 }, { "epoch": 0.031027166162301296, "grad_norm": 1.875, "learning_rate": 0.001999995024058062, "loss": 3.095, "step": 446 }, { "epoch": 0.0310967337994365, "grad_norm": 1.9609375, "learning_rate": 0.001999994287822456, "loss": 3.4324, "step": 447 }, { "epoch": 0.031166301436571706, "grad_norm": 2.34375, "learning_rate": 0.00199999350081218, "loss": 2.9063, "step": 448 }, { "epoch": 0.03123586907370691, "grad_norm": 1.6875, "learning_rate": 0.001999992663027275, "loss": 3.5043, "step": 449 }, { "epoch": 0.03130543671084211, "grad_norm": 1.9140625, "learning_rate": 0.0019999917744677824, "loss": 3.4375, "step": 450 }, { "epoch": 0.03137500434797732, "grad_norm": 2.5625, "learning_rate": 0.001999990835133748, "loss": 3.3257, "step": 451 }, { "epoch": 0.031444571985112524, "grad_norm": 1.8515625, "learning_rate": 0.001999989845025219, "loss": 3.6367, "step": 452 }, { "epoch": 0.03151413962224773, "grad_norm": 1.4765625, "learning_rate": 0.001999988804142246, "loss": 3.3612, "step": 453 }, { "epoch": 0.031583707259382934, "grad_norm": 1.859375, "learning_rate": 0.0019999877124848822, "loss": 3.1522, "step": 454 }, { "epoch": 0.03165327489651814, "grad_norm": 2.15625, "learning_rate": 0.0019999865700531826, "loss": 3.3392, "step": 455 }, { "epoch": 0.031722842533653345, "grad_norm": 1.671875, "learning_rate": 0.001999985376847205, "loss": 3.2132, "step": 456 }, { "epoch": 0.03179241017078855, "grad_norm": 1.4375, "learning_rate": 0.0019999841328670106, "loss": 2.9862, "step": 457 }, { "epoch": 0.031861977807923755, "grad_norm": 1.6171875, "learning_rate": 0.001999982838112662, "loss": 3.2235, "step": 458 }, { "epoch": 0.03193154544505896, "grad_norm": 2.1875, "learning_rate": 0.0019999814925842256, "loss": 3.1465, "step": 459 }, { "epoch": 0.032001113082194166, "grad_norm": 2.03125, "learning_rate": 0.0019999800962817687, "loss": 3.6032, "step": 460 }, { "epoch": 0.03207068071932937, "grad_norm": 1.7109375, "learning_rate": 0.0019999786492053634, "loss": 3.2446, "step": 461 }, { "epoch": 0.03214024835646457, "grad_norm": 2.140625, "learning_rate": 0.001999977151355082, "loss": 3.5569, "step": 462 }, { "epoch": 0.032209815993599775, "grad_norm": 1.796875, "learning_rate": 0.001999975602731001, "loss": 3.2789, "step": 463 }, { "epoch": 0.03227938363073498, "grad_norm": 1.7109375, "learning_rate": 0.0019999740033332, "loss": 3.1285, "step": 464 }, { "epoch": 0.032348951267870185, "grad_norm": 1.453125, "learning_rate": 0.0019999723531617586, "loss": 3.2197, "step": 465 }, { "epoch": 0.03241851890500539, "grad_norm": 1.9375, "learning_rate": 0.0019999706522167617, "loss": 3.6819, "step": 466 }, { "epoch": 0.032488086542140596, "grad_norm": 1.9296875, "learning_rate": 0.0019999689004982953, "loss": 3.6977, "step": 467 }, { "epoch": 0.0325576541792758, "grad_norm": 1.6484375, "learning_rate": 0.001999967098006448, "loss": 3.4413, "step": 468 }, { "epoch": 0.032627221816411006, "grad_norm": 1.953125, "learning_rate": 0.0019999652447413117, "loss": 3.381, "step": 469 }, { "epoch": 0.03269678945354621, "grad_norm": 1.6875, "learning_rate": 0.0019999633407029806, "loss": 3.3125, "step": 470 }, { "epoch": 0.03276635709068142, "grad_norm": 2.125, "learning_rate": 0.0019999613858915515, "loss": 3.4642, "step": 471 }, { "epoch": 0.03283592472781662, "grad_norm": 1.4140625, "learning_rate": 0.0019999593803071234, "loss": 3.5275, "step": 472 }, { "epoch": 0.03290549236495183, "grad_norm": 1.6484375, "learning_rate": 0.0019999573239497977, "loss": 3.3881, "step": 473 }, { "epoch": 0.03297506000208703, "grad_norm": 2.0625, "learning_rate": 0.0019999552168196797, "loss": 3.446, "step": 474 }, { "epoch": 0.03304462763922223, "grad_norm": 1.8359375, "learning_rate": 0.0019999530589168753, "loss": 3.4311, "step": 475 }, { "epoch": 0.033114195276357436, "grad_norm": 2.484375, "learning_rate": 0.001999950850241495, "loss": 3.626, "step": 476 }, { "epoch": 0.03318376291349264, "grad_norm": 1.890625, "learning_rate": 0.001999948590793651, "loss": 3.486, "step": 477 }, { "epoch": 0.033253330550627846, "grad_norm": 1.40625, "learning_rate": 0.0019999462805734575, "loss": 3.8674, "step": 478 }, { "epoch": 0.03332289818776305, "grad_norm": 2.0, "learning_rate": 0.0019999439195810317, "loss": 3.5158, "step": 479 }, { "epoch": 0.03339246582489826, "grad_norm": 2.015625, "learning_rate": 0.0019999415078164945, "loss": 3.1666, "step": 480 }, { "epoch": 0.03346203346203346, "grad_norm": 1.46875, "learning_rate": 0.001999939045279967, "loss": 3.6701, "step": 481 }, { "epoch": 0.03353160109916867, "grad_norm": 1.390625, "learning_rate": 0.0019999365319715748, "loss": 3.4038, "step": 482 }, { "epoch": 0.03360116873630387, "grad_norm": 2.109375, "learning_rate": 0.0019999339678914456, "loss": 3.3869, "step": 483 }, { "epoch": 0.03367073637343908, "grad_norm": 1.8671875, "learning_rate": 0.00199993135303971, "loss": 3.4404, "step": 484 }, { "epoch": 0.03374030401057428, "grad_norm": 1.65625, "learning_rate": 0.0019999286874165, "loss": 3.2973, "step": 485 }, { "epoch": 0.03380987164770949, "grad_norm": 2.171875, "learning_rate": 0.0019999259710219513, "loss": 3.4502, "step": 486 }, { "epoch": 0.03387943928484469, "grad_norm": 1.484375, "learning_rate": 0.0019999232038562013, "loss": 3.3338, "step": 487 }, { "epoch": 0.03394900692197989, "grad_norm": 1.25, "learning_rate": 0.0019999203859193916, "loss": 3.5657, "step": 488 }, { "epoch": 0.0340185745591151, "grad_norm": 1.7578125, "learning_rate": 0.0019999175172116645, "loss": 3.6222, "step": 489 }, { "epoch": 0.0340881421962503, "grad_norm": 2.203125, "learning_rate": 0.0019999145977331657, "loss": 3.5801, "step": 490 }, { "epoch": 0.03415770983338551, "grad_norm": 2.203125, "learning_rate": 0.001999911627484044, "loss": 3.0592, "step": 491 }, { "epoch": 0.03422727747052071, "grad_norm": 2.1875, "learning_rate": 0.0019999086064644493, "loss": 3.1636, "step": 492 }, { "epoch": 0.03429684510765592, "grad_norm": 1.4140625, "learning_rate": 0.0019999055346745357, "loss": 3.2533, "step": 493 }, { "epoch": 0.034366412744791124, "grad_norm": 1.859375, "learning_rate": 0.0019999024121144585, "loss": 2.8686, "step": 494 }, { "epoch": 0.03443598038192633, "grad_norm": 2.203125, "learning_rate": 0.001999899238784377, "loss": 3.1926, "step": 495 }, { "epoch": 0.034505548019061534, "grad_norm": 1.8359375, "learning_rate": 0.0019998960146844526, "loss": 3.454, "step": 496 }, { "epoch": 0.03457511565619674, "grad_norm": 1.6875, "learning_rate": 0.001999892739814848, "loss": 3.9289, "step": 497 }, { "epoch": 0.034644683293331945, "grad_norm": 1.46875, "learning_rate": 0.0019998894141757297, "loss": 3.6297, "step": 498 }, { "epoch": 0.03471425093046715, "grad_norm": 1.640625, "learning_rate": 0.001999886037767267, "loss": 3.2615, "step": 499 }, { "epoch": 0.03478381856760235, "grad_norm": 1.3515625, "learning_rate": 0.0019998826105896306, "loss": 3.6395, "step": 500 }, { "epoch": 0.03485338620473755, "grad_norm": 1.6484375, "learning_rate": 0.0019998791326429955, "loss": 2.9141, "step": 501 }, { "epoch": 0.03492295384187276, "grad_norm": 1.8828125, "learning_rate": 0.001999875603927538, "loss": 3.3799, "step": 502 }, { "epoch": 0.034992521479007964, "grad_norm": 2.015625, "learning_rate": 0.001999872024443437, "loss": 3.7696, "step": 503 }, { "epoch": 0.03506208911614317, "grad_norm": 1.7421875, "learning_rate": 0.001999868394190874, "loss": 3.5759, "step": 504 }, { "epoch": 0.035131656753278374, "grad_norm": 1.8359375, "learning_rate": 0.001999864713170034, "loss": 3.5347, "step": 505 }, { "epoch": 0.03520122439041358, "grad_norm": 1.3359375, "learning_rate": 0.001999860981381103, "loss": 3.4105, "step": 506 }, { "epoch": 0.035270792027548785, "grad_norm": 1.6484375, "learning_rate": 0.0019998571988242716, "loss": 3.516, "step": 507 }, { "epoch": 0.03534035966468399, "grad_norm": 1.34375, "learning_rate": 0.001999853365499731, "loss": 3.7225, "step": 508 }, { "epoch": 0.035409927301819195, "grad_norm": 1.296875, "learning_rate": 0.001999849481407676, "loss": 3.3798, "step": 509 }, { "epoch": 0.0354794949389544, "grad_norm": 1.734375, "learning_rate": 0.0019998455465483045, "loss": 3.2524, "step": 510 }, { "epoch": 0.035549062576089606, "grad_norm": 1.4609375, "learning_rate": 0.0019998415609218155, "loss": 3.4873, "step": 511 }, { "epoch": 0.03561863021322481, "grad_norm": 1.515625, "learning_rate": 0.0019998375245284116, "loss": 3.3883, "step": 512 }, { "epoch": 0.03568819785036001, "grad_norm": 1.2734375, "learning_rate": 0.0019998334373682977, "loss": 3.401, "step": 513 }, { "epoch": 0.035757765487495215, "grad_norm": 1.5703125, "learning_rate": 0.0019998292994416814, "loss": 2.9872, "step": 514 }, { "epoch": 0.03582733312463042, "grad_norm": 1.8828125, "learning_rate": 0.0019998251107487728, "loss": 3.3022, "step": 515 }, { "epoch": 0.035896900761765625, "grad_norm": 1.953125, "learning_rate": 0.0019998208712897845, "loss": 3.3426, "step": 516 }, { "epoch": 0.03596646839890083, "grad_norm": 2.265625, "learning_rate": 0.0019998165810649316, "loss": 3.2612, "step": 517 }, { "epoch": 0.036036036036036036, "grad_norm": 1.8046875, "learning_rate": 0.0019998122400744327, "loss": 3.2155, "step": 518 }, { "epoch": 0.03610560367317124, "grad_norm": 1.328125, "learning_rate": 0.001999807848318507, "loss": 3.3988, "step": 519 }, { "epoch": 0.036175171310306446, "grad_norm": 1.21875, "learning_rate": 0.001999803405797379, "loss": 3.5409, "step": 520 }, { "epoch": 0.03624473894744165, "grad_norm": 1.609375, "learning_rate": 0.001999798912511273, "loss": 3.8011, "step": 521 }, { "epoch": 0.03631430658457686, "grad_norm": 1.3671875, "learning_rate": 0.0019997943684604176, "loss": 3.0568, "step": 522 }, { "epoch": 0.03638387422171206, "grad_norm": 1.6640625, "learning_rate": 0.001999789773645043, "loss": 3.2267, "step": 523 }, { "epoch": 0.03645344185884727, "grad_norm": 1.5859375, "learning_rate": 0.001999785128065384, "loss": 3.0883, "step": 524 }, { "epoch": 0.036523009495982466, "grad_norm": 1.546875, "learning_rate": 0.001999780431721675, "loss": 3.5484, "step": 525 }, { "epoch": 0.03659257713311767, "grad_norm": 1.8203125, "learning_rate": 0.0019997756846141545, "loss": 3.5082, "step": 526 }, { "epoch": 0.036662144770252876, "grad_norm": 1.359375, "learning_rate": 0.0019997708867430645, "loss": 3.5143, "step": 527 }, { "epoch": 0.03673171240738808, "grad_norm": 1.4921875, "learning_rate": 0.001999766038108648, "loss": 3.2694, "step": 528 }, { "epoch": 0.03680128004452329, "grad_norm": 1.9453125, "learning_rate": 0.0019997611387111516, "loss": 3.274, "step": 529 }, { "epoch": 0.03687084768165849, "grad_norm": 1.53125, "learning_rate": 0.001999756188550823, "loss": 3.415, "step": 530 }, { "epoch": 0.0369404153187937, "grad_norm": 2.0625, "learning_rate": 0.001999751187627915, "loss": 3.8245, "step": 531 }, { "epoch": 0.0370099829559289, "grad_norm": 2.203125, "learning_rate": 0.0019997461359426805, "loss": 3.3096, "step": 532 }, { "epoch": 0.03707955059306411, "grad_norm": 2.625, "learning_rate": 0.001999741033495376, "loss": 3.1154, "step": 533 }, { "epoch": 0.03714911823019931, "grad_norm": 2.46875, "learning_rate": 0.0019997358802862617, "loss": 3.028, "step": 534 }, { "epoch": 0.03721868586733452, "grad_norm": 2.375, "learning_rate": 0.0019997306763155976, "loss": 3.3044, "step": 535 }, { "epoch": 0.03728825350446972, "grad_norm": 1.7109375, "learning_rate": 0.001999725421583649, "loss": 3.5187, "step": 536 }, { "epoch": 0.03735782114160493, "grad_norm": 1.75, "learning_rate": 0.001999720116090683, "loss": 3.1256, "step": 537 }, { "epoch": 0.03742738877874013, "grad_norm": 1.828125, "learning_rate": 0.001999714759836968, "loss": 3.4639, "step": 538 }, { "epoch": 0.03749695641587533, "grad_norm": 1.2578125, "learning_rate": 0.0019997093528227768, "loss": 3.4131, "step": 539 }, { "epoch": 0.03756652405301054, "grad_norm": 1.609375, "learning_rate": 0.001999703895048383, "loss": 3.2755, "step": 540 }, { "epoch": 0.03763609169014574, "grad_norm": 1.703125, "learning_rate": 0.0019996983865140645, "loss": 3.0651, "step": 541 }, { "epoch": 0.03770565932728095, "grad_norm": 1.2734375, "learning_rate": 0.001999692827220101, "loss": 3.7271, "step": 542 }, { "epoch": 0.03777522696441615, "grad_norm": 1.4765625, "learning_rate": 0.001999687217166774, "loss": 2.8666, "step": 543 }, { "epoch": 0.03784479460155136, "grad_norm": 2.609375, "learning_rate": 0.0019996815563543694, "loss": 3.3484, "step": 544 }, { "epoch": 0.037914362238686564, "grad_norm": 1.9140625, "learning_rate": 0.0019996758447831746, "loss": 2.9785, "step": 545 }, { "epoch": 0.03798392987582177, "grad_norm": 2.09375, "learning_rate": 0.0019996700824534783, "loss": 3.1998, "step": 546 }, { "epoch": 0.038053497512956974, "grad_norm": 1.7734375, "learning_rate": 0.001999664269365574, "loss": 2.9826, "step": 547 }, { "epoch": 0.03812306515009218, "grad_norm": 1.3515625, "learning_rate": 0.001999658405519757, "loss": 3.3673, "step": 548 }, { "epoch": 0.038192632787227385, "grad_norm": 1.640625, "learning_rate": 0.001999652490916325, "loss": 3.7924, "step": 549 }, { "epoch": 0.03826220042436259, "grad_norm": 1.578125, "learning_rate": 0.001999646525555578, "loss": 3.3943, "step": 550 }, { "epoch": 0.03833176806149779, "grad_norm": 1.921875, "learning_rate": 0.0019996405094378188, "loss": 2.9449, "step": 551 }, { "epoch": 0.03840133569863299, "grad_norm": 2.53125, "learning_rate": 0.0019996344425633533, "loss": 3.5443, "step": 552 }, { "epoch": 0.0384709033357682, "grad_norm": 1.9765625, "learning_rate": 0.0019996283249324896, "loss": 3.2403, "step": 553 }, { "epoch": 0.038540470972903404, "grad_norm": 1.515625, "learning_rate": 0.0019996221565455378, "loss": 3.493, "step": 554 }, { "epoch": 0.03861003861003861, "grad_norm": 1.4921875, "learning_rate": 0.0019996159374028113, "loss": 3.4755, "step": 555 }, { "epoch": 0.038679606247173814, "grad_norm": 1.484375, "learning_rate": 0.0019996096675046256, "loss": 3.5693, "step": 556 }, { "epoch": 0.03874917388430902, "grad_norm": 1.53125, "learning_rate": 0.0019996033468513003, "loss": 3.2506, "step": 557 }, { "epoch": 0.038818741521444225, "grad_norm": 1.75, "learning_rate": 0.001999596975443155, "loss": 3.5411, "step": 558 }, { "epoch": 0.03888830915857943, "grad_norm": 1.7265625, "learning_rate": 0.0019995905532805133, "loss": 3.6029, "step": 559 }, { "epoch": 0.038957876795714635, "grad_norm": 2.03125, "learning_rate": 0.001999584080363702, "loss": 3.3463, "step": 560 }, { "epoch": 0.03902744443284984, "grad_norm": 3.0625, "learning_rate": 0.001999577556693049, "loss": 3.5366, "step": 561 }, { "epoch": 0.039097012069985046, "grad_norm": 1.84375, "learning_rate": 0.001999570982268886, "loss": 3.3199, "step": 562 }, { "epoch": 0.039166579707120244, "grad_norm": 1.46875, "learning_rate": 0.001999564357091547, "loss": 3.5079, "step": 563 }, { "epoch": 0.03923614734425545, "grad_norm": 1.328125, "learning_rate": 0.001999557681161368, "loss": 3.5224, "step": 564 }, { "epoch": 0.039305714981390655, "grad_norm": 1.59375, "learning_rate": 0.001999550954478688, "loss": 3.674, "step": 565 }, { "epoch": 0.03937528261852586, "grad_norm": 1.234375, "learning_rate": 0.0019995441770438486, "loss": 3.3461, "step": 566 }, { "epoch": 0.039444850255661065, "grad_norm": 1.3125, "learning_rate": 0.001999537348857194, "loss": 3.3256, "step": 567 }, { "epoch": 0.03951441789279627, "grad_norm": 2.078125, "learning_rate": 0.0019995304699190713, "loss": 3.0955, "step": 568 }, { "epoch": 0.039583985529931476, "grad_norm": 1.96875, "learning_rate": 0.0019995235402298288, "loss": 3.6332, "step": 569 }, { "epoch": 0.03965355316706668, "grad_norm": 1.71875, "learning_rate": 0.0019995165597898193, "loss": 3.2571, "step": 570 }, { "epoch": 0.039723120804201886, "grad_norm": 1.640625, "learning_rate": 0.0019995095285993965, "loss": 2.8634, "step": 571 }, { "epoch": 0.03979268844133709, "grad_norm": 1.6796875, "learning_rate": 0.001999502446658918, "loss": 3.444, "step": 572 }, { "epoch": 0.0398622560784723, "grad_norm": 1.265625, "learning_rate": 0.001999495313968743, "loss": 3.4595, "step": 573 }, { "epoch": 0.0399318237156075, "grad_norm": 1.375, "learning_rate": 0.0019994881305292335, "loss": 3.588, "step": 574 }, { "epoch": 0.04000139135274271, "grad_norm": 2.1875, "learning_rate": 0.0019994808963407548, "loss": 3.1029, "step": 575 }, { "epoch": 0.040070958989877906, "grad_norm": 1.6796875, "learning_rate": 0.001999473611403674, "loss": 3.4616, "step": 576 }, { "epoch": 0.04014052662701311, "grad_norm": 2.40625, "learning_rate": 0.001999466275718361, "loss": 3.1576, "step": 577 }, { "epoch": 0.040210094264148316, "grad_norm": 2.453125, "learning_rate": 0.001999458889285188, "loss": 3.1757, "step": 578 }, { "epoch": 0.04027966190128352, "grad_norm": 1.6796875, "learning_rate": 0.00199945145210453, "loss": 2.8828, "step": 579 }, { "epoch": 0.04034922953841873, "grad_norm": 1.578125, "learning_rate": 0.0019994439641767654, "loss": 3.1672, "step": 580 }, { "epoch": 0.04041879717555393, "grad_norm": 1.6796875, "learning_rate": 0.001999436425502274, "loss": 3.35, "step": 581 }, { "epoch": 0.04048836481268914, "grad_norm": 1.671875, "learning_rate": 0.0019994288360814377, "loss": 3.3505, "step": 582 }, { "epoch": 0.04055793244982434, "grad_norm": 1.5, "learning_rate": 0.001999421195914643, "loss": 3.2588, "step": 583 }, { "epoch": 0.04062750008695955, "grad_norm": 2.0, "learning_rate": 0.0019994135050022776, "loss": 3.6955, "step": 584 }, { "epoch": 0.04069706772409475, "grad_norm": 1.453125, "learning_rate": 0.0019994057633447317, "loss": 3.1572, "step": 585 }, { "epoch": 0.04076663536122996, "grad_norm": 1.671875, "learning_rate": 0.0019993979709423985, "loss": 3.1665, "step": 586 }, { "epoch": 0.04083620299836516, "grad_norm": 1.6796875, "learning_rate": 0.0019993901277956735, "loss": 3.7347, "step": 587 }, { "epoch": 0.04090577063550037, "grad_norm": 1.1640625, "learning_rate": 0.0019993822339049554, "loss": 3.4667, "step": 588 }, { "epoch": 0.04097533827263557, "grad_norm": 1.5390625, "learning_rate": 0.0019993742892706447, "loss": 3.3394, "step": 589 }, { "epoch": 0.04104490590977077, "grad_norm": 1.4296875, "learning_rate": 0.001999366293893145, "loss": 3.34, "step": 590 }, { "epoch": 0.04111447354690598, "grad_norm": 1.171875, "learning_rate": 0.0019993582477728614, "loss": 3.2674, "step": 591 }, { "epoch": 0.04118404118404118, "grad_norm": 1.640625, "learning_rate": 0.0019993501509102036, "loss": 3.2031, "step": 592 }, { "epoch": 0.04125360882117639, "grad_norm": 1.1640625, "learning_rate": 0.001999342003305582, "loss": 3.6727, "step": 593 }, { "epoch": 0.04132317645831159, "grad_norm": 1.5625, "learning_rate": 0.0019993338049594106, "loss": 2.8559, "step": 594 }, { "epoch": 0.0413927440954468, "grad_norm": 1.5, "learning_rate": 0.001999325555872106, "loss": 3.2697, "step": 595 }, { "epoch": 0.041462311732582004, "grad_norm": 1.4375, "learning_rate": 0.001999317256044086, "loss": 3.6079, "step": 596 }, { "epoch": 0.04153187936971721, "grad_norm": 1.5, "learning_rate": 0.0019993089054757733, "loss": 3.4023, "step": 597 }, { "epoch": 0.041601447006852414, "grad_norm": 2.265625, "learning_rate": 0.001999300504167591, "loss": 3.3421, "step": 598 }, { "epoch": 0.04167101464398762, "grad_norm": 1.6484375, "learning_rate": 0.0019992920521199656, "loss": 3.1932, "step": 599 }, { "epoch": 0.041740582281122825, "grad_norm": 1.9453125, "learning_rate": 0.001999283549333327, "loss": 3.1098, "step": 600 }, { "epoch": 0.04181014991825802, "grad_norm": 2.390625, "learning_rate": 0.001999274995808106, "loss": 3.0167, "step": 601 }, { "epoch": 0.04187971755539323, "grad_norm": 1.40625, "learning_rate": 0.001999266391544738, "loss": 3.2582, "step": 602 }, { "epoch": 0.041949285192528434, "grad_norm": 1.9453125, "learning_rate": 0.0019992577365436593, "loss": 3.6975, "step": 603 }, { "epoch": 0.04201885282966364, "grad_norm": 1.5703125, "learning_rate": 0.001999249030805309, "loss": 3.2861, "step": 604 }, { "epoch": 0.042088420466798844, "grad_norm": 1.6796875, "learning_rate": 0.00199924027433013, "loss": 3.6903, "step": 605 }, { "epoch": 0.04215798810393405, "grad_norm": 1.4921875, "learning_rate": 0.0019992314671185662, "loss": 3.3531, "step": 606 }, { "epoch": 0.042227555741069255, "grad_norm": 1.28125, "learning_rate": 0.001999222609171065, "loss": 3.5518, "step": 607 }, { "epoch": 0.04229712337820446, "grad_norm": 1.0546875, "learning_rate": 0.001999213700488076, "loss": 3.0014, "step": 608 }, { "epoch": 0.042366691015339665, "grad_norm": 1.6328125, "learning_rate": 0.0019992047410700518, "loss": 2.9034, "step": 609 }, { "epoch": 0.04243625865247487, "grad_norm": 1.34375, "learning_rate": 0.0019991957309174473, "loss": 3.6726, "step": 610 }, { "epoch": 0.042505826289610076, "grad_norm": 1.015625, "learning_rate": 0.0019991866700307197, "loss": 3.7199, "step": 611 }, { "epoch": 0.04257539392674528, "grad_norm": 0.99609375, "learning_rate": 0.0019991775584103297, "loss": 3.639, "step": 612 }, { "epoch": 0.042644961563880486, "grad_norm": 1.546875, "learning_rate": 0.001999168396056739, "loss": 3.2102, "step": 613 }, { "epoch": 0.042714529201015684, "grad_norm": 1.796875, "learning_rate": 0.0019991591829704135, "loss": 3.319, "step": 614 }, { "epoch": 0.04278409683815089, "grad_norm": 1.5546875, "learning_rate": 0.0019991499191518206, "loss": 3.1985, "step": 615 }, { "epoch": 0.042853664475286095, "grad_norm": 1.5234375, "learning_rate": 0.0019991406046014314, "loss": 3.4867, "step": 616 }, { "epoch": 0.0429232321124213, "grad_norm": 1.4140625, "learning_rate": 0.001999131239319718, "loss": 3.304, "step": 617 }, { "epoch": 0.042992799749556505, "grad_norm": 1.609375, "learning_rate": 0.0019991218233071564, "loss": 3.4094, "step": 618 }, { "epoch": 0.04306236738669171, "grad_norm": 1.2578125, "learning_rate": 0.0019991123565642247, "loss": 3.1638, "step": 619 }, { "epoch": 0.043131935023826916, "grad_norm": 1.234375, "learning_rate": 0.001999102839091403, "loss": 3.3121, "step": 620 }, { "epoch": 0.04320150266096212, "grad_norm": 1.1953125, "learning_rate": 0.001999093270889175, "loss": 3.2227, "step": 621 }, { "epoch": 0.043271070298097326, "grad_norm": 2.0625, "learning_rate": 0.001999083651958027, "loss": 3.4165, "step": 622 }, { "epoch": 0.04334063793523253, "grad_norm": 1.3515625, "learning_rate": 0.0019990739822984467, "loss": 3.5862, "step": 623 }, { "epoch": 0.04341020557236774, "grad_norm": 1.390625, "learning_rate": 0.0019990642619109253, "loss": 3.0626, "step": 624 }, { "epoch": 0.04347977320950294, "grad_norm": 1.3046875, "learning_rate": 0.001999054490795956, "loss": 3.7579, "step": 625 }, { "epoch": 0.04354934084663815, "grad_norm": 1.265625, "learning_rate": 0.001999044668954036, "loss": 3.245, "step": 626 }, { "epoch": 0.043618908483773346, "grad_norm": 1.984375, "learning_rate": 0.0019990347963856625, "loss": 3.6678, "step": 627 }, { "epoch": 0.04368847612090855, "grad_norm": 1.3828125, "learning_rate": 0.001999024873091338, "loss": 3.2047, "step": 628 }, { "epoch": 0.043758043758043756, "grad_norm": 1.84375, "learning_rate": 0.0019990148990715654, "loss": 3.3547, "step": 629 }, { "epoch": 0.04382761139517896, "grad_norm": 1.671875, "learning_rate": 0.001999004874326852, "loss": 3.2467, "step": 630 }, { "epoch": 0.04389717903231417, "grad_norm": 1.5703125, "learning_rate": 0.001998994798857707, "loss": 3.5803, "step": 631 }, { "epoch": 0.04396674666944937, "grad_norm": 2.09375, "learning_rate": 0.0019989846726646407, "loss": 3.2881, "step": 632 }, { "epoch": 0.04403631430658458, "grad_norm": 1.7578125, "learning_rate": 0.001998974495748168, "loss": 3.0706, "step": 633 }, { "epoch": 0.04410588194371978, "grad_norm": 1.6484375, "learning_rate": 0.0019989642681088058, "loss": 3.4599, "step": 634 }, { "epoch": 0.04417544958085499, "grad_norm": 1.6015625, "learning_rate": 0.001998953989747073, "loss": 3.3248, "step": 635 }, { "epoch": 0.04424501721799019, "grad_norm": 1.09375, "learning_rate": 0.001998943660663492, "loss": 3.4659, "step": 636 }, { "epoch": 0.0443145848551254, "grad_norm": 1.3671875, "learning_rate": 0.001998933280858587, "loss": 3.6053, "step": 637 }, { "epoch": 0.0443841524922606, "grad_norm": 1.1328125, "learning_rate": 0.0019989228503328846, "loss": 3.4567, "step": 638 }, { "epoch": 0.0444537201293958, "grad_norm": 1.5078125, "learning_rate": 0.001998912369086915, "loss": 2.8121, "step": 639 }, { "epoch": 0.04452328776653101, "grad_norm": 1.5, "learning_rate": 0.00199890183712121, "loss": 3.4138, "step": 640 }, { "epoch": 0.04459285540366621, "grad_norm": 1.3515625, "learning_rate": 0.001998891254436305, "loss": 3.1913, "step": 641 }, { "epoch": 0.04466242304080142, "grad_norm": 1.15625, "learning_rate": 0.0019988806210327367, "loss": 3.3442, "step": 642 }, { "epoch": 0.04473199067793662, "grad_norm": 1.3984375, "learning_rate": 0.001998869936911045, "loss": 3.4197, "step": 643 }, { "epoch": 0.04480155831507183, "grad_norm": 1.765625, "learning_rate": 0.0019988592020717725, "loss": 3.5198, "step": 644 }, { "epoch": 0.04487112595220703, "grad_norm": 1.3984375, "learning_rate": 0.001998848416515465, "loss": 3.3542, "step": 645 }, { "epoch": 0.04494069358934224, "grad_norm": 1.59375, "learning_rate": 0.001998837580242669, "loss": 3.1037, "step": 646 }, { "epoch": 0.045010261226477444, "grad_norm": 1.203125, "learning_rate": 0.001998826693253935, "loss": 3.4104, "step": 647 }, { "epoch": 0.04507982886361265, "grad_norm": 1.28125, "learning_rate": 0.0019988157555498164, "loss": 3.5321, "step": 648 }, { "epoch": 0.045149396500747854, "grad_norm": 1.3359375, "learning_rate": 0.001998804767130868, "loss": 3.0773, "step": 649 }, { "epoch": 0.04521896413788306, "grad_norm": 1.359375, "learning_rate": 0.0019987937279976474, "loss": 3.6503, "step": 650 }, { "epoch": 0.045288531775018265, "grad_norm": 1.5625, "learning_rate": 0.001998782638150716, "loss": 3.2508, "step": 651 }, { "epoch": 0.04535809941215346, "grad_norm": 1.671875, "learning_rate": 0.001998771497590637, "loss": 3.3105, "step": 652 }, { "epoch": 0.04542766704928867, "grad_norm": 1.2265625, "learning_rate": 0.001998760306317975, "loss": 3.1075, "step": 653 }, { "epoch": 0.045497234686423874, "grad_norm": 2.40625, "learning_rate": 0.0019987490643332984, "loss": 3.257, "step": 654 }, { "epoch": 0.04556680232355908, "grad_norm": 1.421875, "learning_rate": 0.001998737771637179, "loss": 3.4907, "step": 655 }, { "epoch": 0.045636369960694284, "grad_norm": 1.5234375, "learning_rate": 0.0019987264282301893, "loss": 3.7234, "step": 656 }, { "epoch": 0.04570593759782949, "grad_norm": 1.3359375, "learning_rate": 0.0019987150341129055, "loss": 2.9309, "step": 657 }, { "epoch": 0.045775505234964695, "grad_norm": 1.359375, "learning_rate": 0.001998703589285906, "loss": 3.348, "step": 658 }, { "epoch": 0.0458450728720999, "grad_norm": 1.5859375, "learning_rate": 0.0019986920937497725, "loss": 3.3921, "step": 659 }, { "epoch": 0.045914640509235105, "grad_norm": 1.328125, "learning_rate": 0.001998680547505088, "loss": 3.0439, "step": 660 }, { "epoch": 0.04598420814637031, "grad_norm": 1.2578125, "learning_rate": 0.001998668950552439, "loss": 3.1959, "step": 661 }, { "epoch": 0.046053775783505516, "grad_norm": 2.125, "learning_rate": 0.0019986573028924143, "loss": 3.4075, "step": 662 }, { "epoch": 0.04612334342064072, "grad_norm": 1.421875, "learning_rate": 0.0019986456045256056, "loss": 3.1542, "step": 663 }, { "epoch": 0.046192911057775926, "grad_norm": 1.5, "learning_rate": 0.001998633855452607, "loss": 3.1908, "step": 664 }, { "epoch": 0.046262478694911124, "grad_norm": 1.578125, "learning_rate": 0.001998622055674014, "loss": 3.5424, "step": 665 }, { "epoch": 0.04633204633204633, "grad_norm": 1.28125, "learning_rate": 0.0019986102051904268, "loss": 3.1491, "step": 666 }, { "epoch": 0.046401613969181535, "grad_norm": 1.5625, "learning_rate": 0.0019985983040024468, "loss": 3.4403, "step": 667 }, { "epoch": 0.04647118160631674, "grad_norm": 1.3828125, "learning_rate": 0.001998586352110678, "loss": 3.3588, "step": 668 }, { "epoch": 0.046540749243451945, "grad_norm": 1.234375, "learning_rate": 0.0019985743495157275, "loss": 3.7794, "step": 669 }, { "epoch": 0.04661031688058715, "grad_norm": 1.28125, "learning_rate": 0.0019985622962182046, "loss": 3.3294, "step": 670 }, { "epoch": 0.046679884517722356, "grad_norm": 1.3046875, "learning_rate": 0.001998550192218722, "loss": 3.1803, "step": 671 }, { "epoch": 0.04674945215485756, "grad_norm": 1.6953125, "learning_rate": 0.001998538037517893, "loss": 3.0911, "step": 672 }, { "epoch": 0.046819019791992766, "grad_norm": 1.2421875, "learning_rate": 0.0019985258321163356, "loss": 2.7282, "step": 673 }, { "epoch": 0.04688858742912797, "grad_norm": 1.453125, "learning_rate": 0.00199851357601467, "loss": 2.9728, "step": 674 }, { "epoch": 0.04695815506626318, "grad_norm": 1.0546875, "learning_rate": 0.001998501269213517, "loss": 3.3479, "step": 675 }, { "epoch": 0.04702772270339838, "grad_norm": 1.515625, "learning_rate": 0.001998488911713503, "loss": 3.2668, "step": 676 }, { "epoch": 0.04709729034053358, "grad_norm": 1.7109375, "learning_rate": 0.0019984765035152546, "loss": 3.5445, "step": 677 }, { "epoch": 0.047166857977668786, "grad_norm": 1.2578125, "learning_rate": 0.001998464044619402, "loss": 3.2349, "step": 678 }, { "epoch": 0.04723642561480399, "grad_norm": 2.515625, "learning_rate": 0.0019984515350265778, "loss": 3.1874, "step": 679 }, { "epoch": 0.047305993251939196, "grad_norm": 1.0234375, "learning_rate": 0.0019984389747374175, "loss": 3.4472, "step": 680 }, { "epoch": 0.0473755608890744, "grad_norm": 1.15625, "learning_rate": 0.0019984263637525587, "loss": 3.7444, "step": 681 }, { "epoch": 0.04744512852620961, "grad_norm": 1.4296875, "learning_rate": 0.001998413702072641, "loss": 3.2759, "step": 682 }, { "epoch": 0.04751469616334481, "grad_norm": 1.6015625, "learning_rate": 0.0019984009896983086, "loss": 3.2497, "step": 683 }, { "epoch": 0.04758426380048002, "grad_norm": 1.21875, "learning_rate": 0.0019983882266302057, "loss": 3.6491, "step": 684 }, { "epoch": 0.04765383143761522, "grad_norm": 1.25, "learning_rate": 0.001998375412868981, "loss": 3.5994, "step": 685 }, { "epoch": 0.04772339907475043, "grad_norm": 1.2265625, "learning_rate": 0.001998362548415285, "loss": 3.3485, "step": 686 }, { "epoch": 0.04779296671188563, "grad_norm": 1.7578125, "learning_rate": 0.001998349633269771, "loss": 3.5639, "step": 687 }, { "epoch": 0.04786253434902084, "grad_norm": 1.7265625, "learning_rate": 0.0019983366674330948, "loss": 3.4652, "step": 688 }, { "epoch": 0.047932101986156044, "grad_norm": 1.3828125, "learning_rate": 0.0019983236509059144, "loss": 3.4984, "step": 689 }, { "epoch": 0.04800166962329124, "grad_norm": 1.3828125, "learning_rate": 0.001998310583688891, "loss": 3.1597, "step": 690 }, { "epoch": 0.04807123726042645, "grad_norm": 1.1640625, "learning_rate": 0.001998297465782688, "loss": 3.3416, "step": 691 }, { "epoch": 0.04814080489756165, "grad_norm": 1.40625, "learning_rate": 0.0019982842971879716, "loss": 3.7584, "step": 692 }, { "epoch": 0.04821037253469686, "grad_norm": 1.328125, "learning_rate": 0.0019982710779054102, "loss": 3.1876, "step": 693 }, { "epoch": 0.04827994017183206, "grad_norm": 2.140625, "learning_rate": 0.001998257807935675, "loss": 3.5906, "step": 694 }, { "epoch": 0.04834950780896727, "grad_norm": 1.21875, "learning_rate": 0.00199824448727944, "loss": 3.512, "step": 695 }, { "epoch": 0.04841907544610247, "grad_norm": 1.1015625, "learning_rate": 0.0019982311159373817, "loss": 3.5001, "step": 696 }, { "epoch": 0.04848864308323768, "grad_norm": 1.3515625, "learning_rate": 0.0019982176939101785, "loss": 3.4267, "step": 697 }, { "epoch": 0.048558210720372884, "grad_norm": 1.1171875, "learning_rate": 0.001998204221198512, "loss": 3.5476, "step": 698 }, { "epoch": 0.04862777835750809, "grad_norm": 1.125, "learning_rate": 0.001998190697803067, "loss": 3.7046, "step": 699 }, { "epoch": 0.048697345994643294, "grad_norm": 1.1640625, "learning_rate": 0.0019981771237245296, "loss": 3.4709, "step": 700 }, { "epoch": 0.0487669136317785, "grad_norm": 1.421875, "learning_rate": 0.0019981634989635886, "loss": 3.6087, "step": 701 }, { "epoch": 0.0488364812689137, "grad_norm": 1.0625, "learning_rate": 0.0019981498235209366, "loss": 3.3568, "step": 702 }, { "epoch": 0.0489060489060489, "grad_norm": 2.09375, "learning_rate": 0.0019981360973972675, "loss": 3.1876, "step": 703 }, { "epoch": 0.04897561654318411, "grad_norm": 1.484375, "learning_rate": 0.0019981223205932782, "loss": 3.2481, "step": 704 }, { "epoch": 0.049045184180319314, "grad_norm": 1.1796875, "learning_rate": 0.0019981084931096687, "loss": 3.5898, "step": 705 }, { "epoch": 0.04911475181745452, "grad_norm": 1.8828125, "learning_rate": 0.0019980946149471403, "loss": 3.5733, "step": 706 }, { "epoch": 0.049184319454589724, "grad_norm": 1.4296875, "learning_rate": 0.001998080686106399, "loss": 3.2213, "step": 707 }, { "epoch": 0.04925388709172493, "grad_norm": 1.7734375, "learning_rate": 0.00199806670658815, "loss": 2.8544, "step": 708 }, { "epoch": 0.049323454728860135, "grad_norm": 1.421875, "learning_rate": 0.001998052676393105, "loss": 3.8152, "step": 709 }, { "epoch": 0.04939302236599534, "grad_norm": 1.6796875, "learning_rate": 0.0019980385955219756, "loss": 3.4783, "step": 710 }, { "epoch": 0.049462590003130545, "grad_norm": 1.921875, "learning_rate": 0.0019980244639754767, "loss": 3.3807, "step": 711 }, { "epoch": 0.04953215764026575, "grad_norm": 1.296875, "learning_rate": 0.0019980102817543258, "loss": 3.7659, "step": 712 }, { "epoch": 0.049601725277400956, "grad_norm": 1.6953125, "learning_rate": 0.001997996048859243, "loss": 3.4794, "step": 713 }, { "epoch": 0.04967129291453616, "grad_norm": 1.6015625, "learning_rate": 0.0019979817652909515, "loss": 3.496, "step": 714 }, { "epoch": 0.04974086055167136, "grad_norm": 1.0546875, "learning_rate": 0.0019979674310501763, "loss": 3.189, "step": 715 }, { "epoch": 0.049810428188806564, "grad_norm": 1.171875, "learning_rate": 0.0019979530461376447, "loss": 3.2305, "step": 716 }, { "epoch": 0.04987999582594177, "grad_norm": 1.2890625, "learning_rate": 0.001997938610554087, "loss": 3.175, "step": 717 }, { "epoch": 0.049949563463076975, "grad_norm": 0.96875, "learning_rate": 0.0019979241243002375, "loss": 3.3976, "step": 718 }, { "epoch": 0.05001913110021218, "grad_norm": 1.1953125, "learning_rate": 0.0019979095873768307, "loss": 3.2898, "step": 719 }, { "epoch": 0.050088698737347385, "grad_norm": 1.53125, "learning_rate": 0.0019978949997846046, "loss": 3.3068, "step": 720 }, { "epoch": 0.05015826637448259, "grad_norm": 1.6484375, "learning_rate": 0.0019978803615243006, "loss": 2.9773, "step": 721 }, { "epoch": 0.050227834011617796, "grad_norm": 1.21875, "learning_rate": 0.0019978656725966612, "loss": 3.5996, "step": 722 }, { "epoch": 0.050297401648753, "grad_norm": 1.6875, "learning_rate": 0.0019978509330024325, "loss": 2.9751, "step": 723 }, { "epoch": 0.050366969285888206, "grad_norm": 1.6015625, "learning_rate": 0.0019978361427423633, "loss": 3.2896, "step": 724 }, { "epoch": 0.05043653692302341, "grad_norm": 1.6875, "learning_rate": 0.001997821301817204, "loss": 3.4558, "step": 725 }, { "epoch": 0.05050610456015862, "grad_norm": 1.6796875, "learning_rate": 0.0019978064102277085, "loss": 3.5634, "step": 726 }, { "epoch": 0.05057567219729382, "grad_norm": 1.296875, "learning_rate": 0.0019977914679746326, "loss": 3.643, "step": 727 }, { "epoch": 0.05064523983442902, "grad_norm": 1.265625, "learning_rate": 0.0019977764750587356, "loss": 3.5794, "step": 728 }, { "epoch": 0.050714807471564226, "grad_norm": 1.6875, "learning_rate": 0.001997761431480778, "loss": 3.5215, "step": 729 }, { "epoch": 0.05078437510869943, "grad_norm": 1.0234375, "learning_rate": 0.0019977463372415237, "loss": 3.1712, "step": 730 }, { "epoch": 0.050853942745834636, "grad_norm": 1.125, "learning_rate": 0.00199773119234174, "loss": 3.6297, "step": 731 }, { "epoch": 0.05092351038296984, "grad_norm": 1.265625, "learning_rate": 0.001997715996782195, "loss": 3.3124, "step": 732 }, { "epoch": 0.05099307802010505, "grad_norm": 1.5390625, "learning_rate": 0.0019977007505636605, "loss": 3.1167, "step": 733 }, { "epoch": 0.05106264565724025, "grad_norm": 1.2890625, "learning_rate": 0.0019976854536869113, "loss": 3.2124, "step": 734 }, { "epoch": 0.05113221329437546, "grad_norm": 1.5625, "learning_rate": 0.001997670106152723, "loss": 3.2263, "step": 735 }, { "epoch": 0.05120178093151066, "grad_norm": 1.4375, "learning_rate": 0.001997654707961875, "loss": 3.3927, "step": 736 }, { "epoch": 0.05127134856864587, "grad_norm": 1.375, "learning_rate": 0.0019976392591151497, "loss": 3.4537, "step": 737 }, { "epoch": 0.05134091620578107, "grad_norm": 1.1328125, "learning_rate": 0.0019976237596133315, "loss": 3.4768, "step": 738 }, { "epoch": 0.05141048384291628, "grad_norm": 1.21875, "learning_rate": 0.0019976082094572073, "loss": 3.4063, "step": 739 }, { "epoch": 0.05148005148005148, "grad_norm": 1.46875, "learning_rate": 0.0019975926086475662, "loss": 3.5262, "step": 740 }, { "epoch": 0.05154961911718668, "grad_norm": 1.2890625, "learning_rate": 0.001997576957185201, "loss": 3.1927, "step": 741 }, { "epoch": 0.05161918675432189, "grad_norm": 1.9453125, "learning_rate": 0.0019975612550709055, "loss": 3.4512, "step": 742 }, { "epoch": 0.05168875439145709, "grad_norm": 1.1640625, "learning_rate": 0.001997545502305478, "loss": 3.3019, "step": 743 }, { "epoch": 0.0517583220285923, "grad_norm": 1.2890625, "learning_rate": 0.001997529698889718, "loss": 2.9856, "step": 744 }, { "epoch": 0.0518278896657275, "grad_norm": 1.171875, "learning_rate": 0.0019975138448244272, "loss": 2.9967, "step": 745 }, { "epoch": 0.05189745730286271, "grad_norm": 1.3359375, "learning_rate": 0.001997497940110412, "loss": 3.3576, "step": 746 }, { "epoch": 0.05196702493999791, "grad_norm": 0.921875, "learning_rate": 0.0019974819847484787, "loss": 3.4691, "step": 747 }, { "epoch": 0.05203659257713312, "grad_norm": 1.1015625, "learning_rate": 0.001997465978739438, "loss": 3.7629, "step": 748 }, { "epoch": 0.052106160214268324, "grad_norm": 1.359375, "learning_rate": 0.0019974499220841023, "loss": 3.1283, "step": 749 }, { "epoch": 0.05217572785140353, "grad_norm": 1.453125, "learning_rate": 0.0019974338147832876, "loss": 3.7169, "step": 750 }, { "epoch": 0.052245295488538734, "grad_norm": 1.1875, "learning_rate": 0.0019974176568378107, "loss": 3.3305, "step": 751 }, { "epoch": 0.05231486312567394, "grad_norm": 1.1796875, "learning_rate": 0.001997401448248493, "loss": 2.918, "step": 752 }, { "epoch": 0.05238443076280914, "grad_norm": 1.4921875, "learning_rate": 0.0019973851890161564, "loss": 3.1841, "step": 753 }, { "epoch": 0.05245399839994434, "grad_norm": 1.3671875, "learning_rate": 0.001997368879141628, "loss": 3.4353, "step": 754 }, { "epoch": 0.05252356603707955, "grad_norm": 0.98828125, "learning_rate": 0.0019973525186257344, "loss": 3.779, "step": 755 }, { "epoch": 0.052593133674214754, "grad_norm": 1.2265625, "learning_rate": 0.0019973361074693066, "loss": 3.8655, "step": 756 }, { "epoch": 0.05266270131134996, "grad_norm": 1.03125, "learning_rate": 0.001997319645673179, "loss": 3.0859, "step": 757 }, { "epoch": 0.052732268948485164, "grad_norm": 1.203125, "learning_rate": 0.001997303133238186, "loss": 3.3803, "step": 758 }, { "epoch": 0.05280183658562037, "grad_norm": 1.25, "learning_rate": 0.001997286570165167, "loss": 3.1753, "step": 759 }, { "epoch": 0.052871404222755575, "grad_norm": 1.515625, "learning_rate": 0.0019972699564549624, "loss": 3.5117, "step": 760 }, { "epoch": 0.05294097185989078, "grad_norm": 1.234375, "learning_rate": 0.0019972532921084165, "loss": 3.3453, "step": 761 }, { "epoch": 0.053010539497025985, "grad_norm": 1.6953125, "learning_rate": 0.001997236577126375, "loss": 3.1444, "step": 762 }, { "epoch": 0.05308010713416119, "grad_norm": 1.2578125, "learning_rate": 0.001997219811509686, "loss": 2.9711, "step": 763 }, { "epoch": 0.053149674771296396, "grad_norm": 1.28125, "learning_rate": 0.0019972029952592014, "loss": 3.7106, "step": 764 }, { "epoch": 0.0532192424084316, "grad_norm": 1.5234375, "learning_rate": 0.0019971861283757755, "loss": 3.5073, "step": 765 }, { "epoch": 0.0532888100455668, "grad_norm": 1.34375, "learning_rate": 0.0019971692108602637, "loss": 3.1031, "step": 766 }, { "epoch": 0.053358377682702005, "grad_norm": 1.4921875, "learning_rate": 0.001997152242713526, "loss": 3.2373, "step": 767 }, { "epoch": 0.05342794531983721, "grad_norm": 1.203125, "learning_rate": 0.0019971352239364225, "loss": 3.1924, "step": 768 }, { "epoch": 0.053497512956972415, "grad_norm": 1.125, "learning_rate": 0.001997118154529819, "loss": 3.5255, "step": 769 }, { "epoch": 0.05356708059410762, "grad_norm": 1.28125, "learning_rate": 0.001997101034494581, "loss": 3.5165, "step": 770 }, { "epoch": 0.053636648231242826, "grad_norm": 0.96875, "learning_rate": 0.001997083863831579, "loss": 3.2297, "step": 771 }, { "epoch": 0.05370621586837803, "grad_norm": 1.4140625, "learning_rate": 0.0019970666425416835, "loss": 3.3447, "step": 772 }, { "epoch": 0.053775783505513236, "grad_norm": 1.3046875, "learning_rate": 0.0019970493706257695, "loss": 3.6247, "step": 773 }, { "epoch": 0.05384535114264844, "grad_norm": 1.53125, "learning_rate": 0.001997032048084714, "loss": 3.2659, "step": 774 }, { "epoch": 0.05391491877978365, "grad_norm": 1.0546875, "learning_rate": 0.0019970146749193965, "loss": 3.3362, "step": 775 }, { "epoch": 0.05398448641691885, "grad_norm": 1.3671875, "learning_rate": 0.0019969972511306995, "loss": 3.8919, "step": 776 }, { "epoch": 0.05405405405405406, "grad_norm": 1.6640625, "learning_rate": 0.0019969797767195067, "loss": 3.1955, "step": 777 }, { "epoch": 0.054123621691189255, "grad_norm": 1.53125, "learning_rate": 0.0019969622516867063, "loss": 3.4623, "step": 778 }, { "epoch": 0.05419318932832446, "grad_norm": 0.93359375, "learning_rate": 0.001996944676033188, "loss": 3.5068, "step": 779 }, { "epoch": 0.054262756965459666, "grad_norm": 1.1171875, "learning_rate": 0.0019969270497598437, "loss": 3.8145, "step": 780 }, { "epoch": 0.05433232460259487, "grad_norm": 1.296875, "learning_rate": 0.001996909372867569, "loss": 3.3767, "step": 781 }, { "epoch": 0.054401892239730076, "grad_norm": 1.21875, "learning_rate": 0.001996891645357261, "loss": 3.5851, "step": 782 }, { "epoch": 0.05447145987686528, "grad_norm": 1.3046875, "learning_rate": 0.0019968738672298198, "loss": 3.4255, "step": 783 }, { "epoch": 0.05454102751400049, "grad_norm": 0.9921875, "learning_rate": 0.0019968560384861487, "loss": 3.5207, "step": 784 }, { "epoch": 0.05461059515113569, "grad_norm": 1.3671875, "learning_rate": 0.001996838159127152, "loss": 3.5369, "step": 785 }, { "epoch": 0.0546801627882709, "grad_norm": 1.5703125, "learning_rate": 0.0019968202291537384, "loss": 3.1211, "step": 786 }, { "epoch": 0.0547497304254061, "grad_norm": 1.65625, "learning_rate": 0.0019968022485668175, "loss": 3.6356, "step": 787 }, { "epoch": 0.05481929806254131, "grad_norm": 2.125, "learning_rate": 0.0019967842173673027, "loss": 3.6714, "step": 788 }, { "epoch": 0.05488886569967651, "grad_norm": 1.5625, "learning_rate": 0.00199676613555611, "loss": 3.1509, "step": 789 }, { "epoch": 0.05495843333681172, "grad_norm": 1.421875, "learning_rate": 0.0019967480031341566, "loss": 3.2795, "step": 790 }, { "epoch": 0.05502800097394692, "grad_norm": 1.171875, "learning_rate": 0.0019967298201023637, "loss": 3.2036, "step": 791 }, { "epoch": 0.05509756861108212, "grad_norm": 1.0625, "learning_rate": 0.0019967115864616544, "loss": 3.2689, "step": 792 }, { "epoch": 0.05516713624821733, "grad_norm": 1.0234375, "learning_rate": 0.0019966933022129542, "loss": 3.3734, "step": 793 }, { "epoch": 0.05523670388535253, "grad_norm": 1.40625, "learning_rate": 0.001996674967357192, "loss": 2.7514, "step": 794 }, { "epoch": 0.05530627152248774, "grad_norm": 1.3359375, "learning_rate": 0.001996656581895299, "loss": 3.4004, "step": 795 }, { "epoch": 0.05537583915962294, "grad_norm": 1.2890625, "learning_rate": 0.0019966381458282082, "loss": 3.0403, "step": 796 }, { "epoch": 0.05544540679675815, "grad_norm": 1.0859375, "learning_rate": 0.0019966196591568557, "loss": 3.2993, "step": 797 }, { "epoch": 0.05551497443389335, "grad_norm": 1.4921875, "learning_rate": 0.00199660112188218, "loss": 3.3328, "step": 798 }, { "epoch": 0.05558454207102856, "grad_norm": 0.98828125, "learning_rate": 0.001996582534005123, "loss": 3.5094, "step": 799 }, { "epoch": 0.055654109708163764, "grad_norm": 1.234375, "learning_rate": 0.0019965638955266275, "loss": 3.4173, "step": 800 }, { "epoch": 0.05572367734529897, "grad_norm": 1.75, "learning_rate": 0.0019965452064476404, "loss": 3.6744, "step": 801 }, { "epoch": 0.055793244982434174, "grad_norm": 1.7109375, "learning_rate": 0.0019965264667691114, "loss": 3.2192, "step": 802 }, { "epoch": 0.05586281261956938, "grad_norm": 1.40625, "learning_rate": 0.0019965076764919907, "loss": 3.1377, "step": 803 }, { "epoch": 0.05593238025670458, "grad_norm": 1.4375, "learning_rate": 0.001996488835617233, "loss": 3.2694, "step": 804 }, { "epoch": 0.05600194789383978, "grad_norm": 1.1015625, "learning_rate": 0.0019964699441457952, "loss": 3.5418, "step": 805 }, { "epoch": 0.05607151553097499, "grad_norm": 1.25, "learning_rate": 0.001996451002078636, "loss": 3.1088, "step": 806 }, { "epoch": 0.056141083168110194, "grad_norm": 1.109375, "learning_rate": 0.0019964320094167176, "loss": 3.3395, "step": 807 }, { "epoch": 0.0562106508052454, "grad_norm": 1.9296875, "learning_rate": 0.001996412966161004, "loss": 3.2299, "step": 808 }, { "epoch": 0.056280218442380604, "grad_norm": 1.3046875, "learning_rate": 0.0019963938723124622, "loss": 3.4256, "step": 809 }, { "epoch": 0.05634978607951581, "grad_norm": 1.25, "learning_rate": 0.001996374727872062, "loss": 3.3638, "step": 810 }, { "epoch": 0.056419353716651015, "grad_norm": 1.2109375, "learning_rate": 0.001996355532840775, "loss": 3.8451, "step": 811 }, { "epoch": 0.05648892135378622, "grad_norm": 1.09375, "learning_rate": 0.001996336287219576, "loss": 3.2477, "step": 812 }, { "epoch": 0.056558488990921425, "grad_norm": 1.4921875, "learning_rate": 0.0019963169910094426, "loss": 3.607, "step": 813 }, { "epoch": 0.05662805662805663, "grad_norm": 1.359375, "learning_rate": 0.0019962976442113537, "loss": 3.2908, "step": 814 }, { "epoch": 0.056697624265191836, "grad_norm": 1.09375, "learning_rate": 0.0019962782468262927, "loss": 3.3164, "step": 815 }, { "epoch": 0.056767191902327034, "grad_norm": 1.0859375, "learning_rate": 0.0019962587988552436, "loss": 3.2608, "step": 816 }, { "epoch": 0.05683675953946224, "grad_norm": 1.625, "learning_rate": 0.0019962393002991943, "loss": 2.8271, "step": 817 }, { "epoch": 0.056906327176597445, "grad_norm": 1.7109375, "learning_rate": 0.0019962197511591345, "loss": 3.2975, "step": 818 }, { "epoch": 0.05697589481373265, "grad_norm": 1.4921875, "learning_rate": 0.0019962001514360573, "loss": 3.4618, "step": 819 }, { "epoch": 0.057045462450867855, "grad_norm": 1.3359375, "learning_rate": 0.0019961805011309577, "loss": 3.2017, "step": 820 }, { "epoch": 0.05711503008800306, "grad_norm": 1.453125, "learning_rate": 0.0019961608002448334, "loss": 3.4054, "step": 821 }, { "epoch": 0.057184597725138266, "grad_norm": 1.203125, "learning_rate": 0.0019961410487786845, "loss": 3.5605, "step": 822 }, { "epoch": 0.05725416536227347, "grad_norm": 0.921875, "learning_rate": 0.0019961212467335143, "loss": 3.4814, "step": 823 }, { "epoch": 0.057323732999408676, "grad_norm": 1.203125, "learning_rate": 0.0019961013941103274, "loss": 3.1413, "step": 824 }, { "epoch": 0.05739330063654388, "grad_norm": 1.9921875, "learning_rate": 0.001996081490910133, "loss": 3.4585, "step": 825 }, { "epoch": 0.05746286827367909, "grad_norm": 2.84375, "learning_rate": 0.001996061537133941, "loss": 3.4995, "step": 826 }, { "epoch": 0.05753243591081429, "grad_norm": 1.5703125, "learning_rate": 0.0019960415327827646, "loss": 3.4805, "step": 827 }, { "epoch": 0.0576020035479495, "grad_norm": 1.078125, "learning_rate": 0.0019960214778576195, "loss": 3.0566, "step": 828 }, { "epoch": 0.057671571185084695, "grad_norm": 1.2265625, "learning_rate": 0.001996001372359524, "loss": 3.0971, "step": 829 }, { "epoch": 0.0577411388222199, "grad_norm": 1.265625, "learning_rate": 0.0019959812162894997, "loss": 3.596, "step": 830 }, { "epoch": 0.057810706459355106, "grad_norm": 0.9609375, "learning_rate": 0.001995961009648569, "loss": 3.5438, "step": 831 }, { "epoch": 0.05788027409649031, "grad_norm": 0.75, "learning_rate": 0.001995940752437758, "loss": 3.7194, "step": 832 }, { "epoch": 0.057949841733625516, "grad_norm": 1.2578125, "learning_rate": 0.0019959204446580955, "loss": 3.187, "step": 833 }, { "epoch": 0.05801940937076072, "grad_norm": 1.4609375, "learning_rate": 0.001995900086310613, "loss": 3.2581, "step": 834 }, { "epoch": 0.05808897700789593, "grad_norm": 1.375, "learning_rate": 0.0019958796773963433, "loss": 3.3424, "step": 835 }, { "epoch": 0.05815854464503113, "grad_norm": 1.125, "learning_rate": 0.0019958592179163234, "loss": 3.5114, "step": 836 }, { "epoch": 0.05822811228216634, "grad_norm": 1.1953125, "learning_rate": 0.0019958387078715923, "loss": 3.2554, "step": 837 }, { "epoch": 0.05829767991930154, "grad_norm": 1.6796875, "learning_rate": 0.0019958181472631907, "loss": 3.3679, "step": 838 }, { "epoch": 0.05836724755643675, "grad_norm": 1.015625, "learning_rate": 0.001995797536092163, "loss": 3.3486, "step": 839 }, { "epoch": 0.05843681519357195, "grad_norm": 1.2421875, "learning_rate": 0.001995776874359555, "loss": 3.0417, "step": 840 }, { "epoch": 0.05850638283070716, "grad_norm": 1.5546875, "learning_rate": 0.001995756162066417, "loss": 3.4033, "step": 841 }, { "epoch": 0.05857595046784236, "grad_norm": 1.3203125, "learning_rate": 0.0019957353992138003, "loss": 3.1399, "step": 842 }, { "epoch": 0.05864551810497756, "grad_norm": 1.0234375, "learning_rate": 0.0019957145858027587, "loss": 3.3802, "step": 843 }, { "epoch": 0.05871508574211277, "grad_norm": 1.1328125, "learning_rate": 0.001995693721834349, "loss": 3.0986, "step": 844 }, { "epoch": 0.05878465337924797, "grad_norm": 0.98828125, "learning_rate": 0.001995672807309631, "loss": 3.2547, "step": 845 }, { "epoch": 0.05885422101638318, "grad_norm": 0.95703125, "learning_rate": 0.001995651842229666, "loss": 3.1011, "step": 846 }, { "epoch": 0.05892378865351838, "grad_norm": 1.3046875, "learning_rate": 0.0019956308265955194, "loss": 3.4335, "step": 847 }, { "epoch": 0.05899335629065359, "grad_norm": 1.3203125, "learning_rate": 0.0019956097604082574, "loss": 2.7745, "step": 848 }, { "epoch": 0.059062923927788794, "grad_norm": 1.2578125, "learning_rate": 0.00199558864366895, "loss": 3.4019, "step": 849 }, { "epoch": 0.059132491564924, "grad_norm": 1.375, "learning_rate": 0.0019955674763786698, "loss": 3.3095, "step": 850 }, { "epoch": 0.059202059202059204, "grad_norm": 1.3203125, "learning_rate": 0.001995546258538491, "loss": 3.3396, "step": 851 }, { "epoch": 0.05927162683919441, "grad_norm": 1.1875, "learning_rate": 0.001995524990149491, "loss": 3.4062, "step": 852 }, { "epoch": 0.059341194476329615, "grad_norm": 1.4453125, "learning_rate": 0.00199550367121275, "loss": 3.2541, "step": 853 }, { "epoch": 0.05941076211346481, "grad_norm": 2.4375, "learning_rate": 0.00199548230172935, "loss": 3.0319, "step": 854 }, { "epoch": 0.05948032975060002, "grad_norm": 1.8359375, "learning_rate": 0.001995460881700377, "loss": 3.1415, "step": 855 }, { "epoch": 0.05954989738773522, "grad_norm": 1.375, "learning_rate": 0.001995439411126917, "loss": 3.4585, "step": 856 }, { "epoch": 0.05961946502487043, "grad_norm": 1.0703125, "learning_rate": 0.0019954178900100615, "loss": 3.2141, "step": 857 }, { "epoch": 0.059689032662005634, "grad_norm": 1.1484375, "learning_rate": 0.001995396318350903, "loss": 3.3658, "step": 858 }, { "epoch": 0.05975860029914084, "grad_norm": 1.1875, "learning_rate": 0.0019953746961505364, "loss": 3.5806, "step": 859 }, { "epoch": 0.059828167936276044, "grad_norm": 1.296875, "learning_rate": 0.00199535302341006, "loss": 3.4283, "step": 860 }, { "epoch": 0.05989773557341125, "grad_norm": 1.1953125, "learning_rate": 0.0019953313001305735, "loss": 3.2957, "step": 861 }, { "epoch": 0.059967303210546455, "grad_norm": 1.1953125, "learning_rate": 0.001995309526313181, "loss": 3.1868, "step": 862 }, { "epoch": 0.06003687084768166, "grad_norm": 1.296875, "learning_rate": 0.001995287701958987, "loss": 3.5114, "step": 863 }, { "epoch": 0.060106438484816865, "grad_norm": 1.453125, "learning_rate": 0.0019952658270691007, "loss": 3.2758, "step": 864 }, { "epoch": 0.06017600612195207, "grad_norm": 1.0625, "learning_rate": 0.001995243901644632, "loss": 3.0768, "step": 865 }, { "epoch": 0.060245573759087276, "grad_norm": 2.046875, "learning_rate": 0.0019952219256866945, "loss": 3.4132, "step": 866 }, { "epoch": 0.060315141396222474, "grad_norm": 1.0625, "learning_rate": 0.0019951998991964036, "loss": 3.3159, "step": 867 }, { "epoch": 0.06038470903335768, "grad_norm": 1.1015625, "learning_rate": 0.001995177822174878, "loss": 3.1059, "step": 868 }, { "epoch": 0.060454276670492885, "grad_norm": 1.09375, "learning_rate": 0.0019951556946232385, "loss": 3.3609, "step": 869 }, { "epoch": 0.06052384430762809, "grad_norm": 1.015625, "learning_rate": 0.001995133516542609, "loss": 3.1245, "step": 870 }, { "epoch": 0.060593411944763295, "grad_norm": 2.46875, "learning_rate": 0.0019951112879341157, "loss": 3.4607, "step": 871 }, { "epoch": 0.0606629795818985, "grad_norm": 1.1171875, "learning_rate": 0.0019950890087988868, "loss": 3.9611, "step": 872 }, { "epoch": 0.060732547219033706, "grad_norm": 1.1015625, "learning_rate": 0.0019950666791380533, "loss": 3.0612, "step": 873 }, { "epoch": 0.06080211485616891, "grad_norm": 0.86328125, "learning_rate": 0.0019950442989527493, "loss": 3.4439, "step": 874 }, { "epoch": 0.060871682493304116, "grad_norm": 1.1796875, "learning_rate": 0.0019950218682441116, "loss": 3.4221, "step": 875 }, { "epoch": 0.06094125013043932, "grad_norm": 1.078125, "learning_rate": 0.0019949993870132785, "loss": 3.5011, "step": 876 }, { "epoch": 0.06101081776757453, "grad_norm": 1.2890625, "learning_rate": 0.001994976855261392, "loss": 3.1799, "step": 877 }, { "epoch": 0.06108038540470973, "grad_norm": 1.0234375, "learning_rate": 0.0019949542729895955, "loss": 3.4673, "step": 878 }, { "epoch": 0.06114995304184494, "grad_norm": 1.6015625, "learning_rate": 0.001994931640199036, "loss": 3.3861, "step": 879 }, { "epoch": 0.061219520678980135, "grad_norm": 1.0078125, "learning_rate": 0.0019949089568908627, "loss": 3.7525, "step": 880 }, { "epoch": 0.06128908831611534, "grad_norm": 1.2421875, "learning_rate": 0.001994886223066227, "loss": 3.5964, "step": 881 }, { "epoch": 0.061358655953250546, "grad_norm": 1.078125, "learning_rate": 0.001994863438726284, "loss": 3.6425, "step": 882 }, { "epoch": 0.06142822359038575, "grad_norm": 1.3671875, "learning_rate": 0.0019948406038721896, "loss": 3.5512, "step": 883 }, { "epoch": 0.061497791227520957, "grad_norm": 1.359375, "learning_rate": 0.001994817718505104, "loss": 3.445, "step": 884 }, { "epoch": 0.06156735886465616, "grad_norm": 1.6875, "learning_rate": 0.001994794782626189, "loss": 2.9935, "step": 885 }, { "epoch": 0.06163692650179137, "grad_norm": 1.1953125, "learning_rate": 0.0019947717962366085, "loss": 3.5828, "step": 886 }, { "epoch": 0.06170649413892657, "grad_norm": 1.0703125, "learning_rate": 0.001994748759337531, "loss": 3.1945, "step": 887 }, { "epoch": 0.06177606177606178, "grad_norm": 1.296875, "learning_rate": 0.001994725671930125, "loss": 3.4912, "step": 888 }, { "epoch": 0.06184562941319698, "grad_norm": 1.3125, "learning_rate": 0.001994702534015563, "loss": 3.0837, "step": 889 }, { "epoch": 0.06191519705033219, "grad_norm": 1.0703125, "learning_rate": 0.00199467934559502, "loss": 3.4557, "step": 890 }, { "epoch": 0.06198476468746739, "grad_norm": 1.0703125, "learning_rate": 0.001994656106669674, "loss": 3.282, "step": 891 }, { "epoch": 0.06205433232460259, "grad_norm": 1.2421875, "learning_rate": 0.0019946328172407036, "loss": 3.8112, "step": 892 }, { "epoch": 0.0621238999617378, "grad_norm": 1.1796875, "learning_rate": 0.0019946094773092924, "loss": 3.429, "step": 893 }, { "epoch": 0.062193467598873, "grad_norm": 1.078125, "learning_rate": 0.001994586086876625, "loss": 3.5757, "step": 894 }, { "epoch": 0.06226303523600821, "grad_norm": 1.125, "learning_rate": 0.0019945626459438896, "loss": 3.4571, "step": 895 }, { "epoch": 0.06233260287314341, "grad_norm": 1.1015625, "learning_rate": 0.0019945391545122754, "loss": 2.874, "step": 896 }, { "epoch": 0.06240217051027862, "grad_norm": 1.1640625, "learning_rate": 0.001994515612582976, "loss": 2.7974, "step": 897 }, { "epoch": 0.06247173814741382, "grad_norm": 1.3671875, "learning_rate": 0.0019944920201571867, "loss": 3.2491, "step": 898 }, { "epoch": 0.06254130578454903, "grad_norm": 1.1953125, "learning_rate": 0.0019944683772361053, "loss": 3.3027, "step": 899 }, { "epoch": 0.06261087342168423, "grad_norm": 1.359375, "learning_rate": 0.001994444683820932, "loss": 3.3587, "step": 900 }, { "epoch": 0.06268044105881944, "grad_norm": 1.171875, "learning_rate": 0.00199442093991287, "loss": 3.7572, "step": 901 }, { "epoch": 0.06275000869595464, "grad_norm": 0.97265625, "learning_rate": 0.001994397145513125, "loss": 3.4524, "step": 902 }, { "epoch": 0.06281957633308985, "grad_norm": 1.3984375, "learning_rate": 0.0019943733006229053, "loss": 3.2251, "step": 903 }, { "epoch": 0.06288914397022505, "grad_norm": 1.0546875, "learning_rate": 0.001994349405243421, "loss": 3.4437, "step": 904 }, { "epoch": 0.06295871160736026, "grad_norm": 1.2578125, "learning_rate": 0.001994325459375886, "loss": 3.3404, "step": 905 }, { "epoch": 0.06302827924449546, "grad_norm": 1.1875, "learning_rate": 0.001994301463021516, "loss": 3.4911, "step": 906 }, { "epoch": 0.06309784688163067, "grad_norm": 1.0234375, "learning_rate": 0.001994277416181529, "loss": 3.2754, "step": 907 }, { "epoch": 0.06316741451876587, "grad_norm": 1.5625, "learning_rate": 0.001994253318857147, "loss": 3.1639, "step": 908 }, { "epoch": 0.06323698215590108, "grad_norm": 1.6015625, "learning_rate": 0.001994229171049592, "loss": 3.0842, "step": 909 }, { "epoch": 0.06330654979303628, "grad_norm": 1.109375, "learning_rate": 0.001994204972760092, "loss": 3.7311, "step": 910 }, { "epoch": 0.06337611743017148, "grad_norm": 1.171875, "learning_rate": 0.001994180723989874, "loss": 3.3897, "step": 911 }, { "epoch": 0.06344568506730669, "grad_norm": 1.2734375, "learning_rate": 0.00199415642474017, "loss": 3.3411, "step": 912 }, { "epoch": 0.06351525270444189, "grad_norm": 1.2265625, "learning_rate": 0.0019941320750122135, "loss": 3.3479, "step": 913 }, { "epoch": 0.0635848203415771, "grad_norm": 0.796875, "learning_rate": 0.0019941076748072415, "loss": 3.6582, "step": 914 }, { "epoch": 0.0636543879787123, "grad_norm": 1.1171875, "learning_rate": 0.001994083224126492, "loss": 3.0862, "step": 915 }, { "epoch": 0.06372395561584751, "grad_norm": 1.2265625, "learning_rate": 0.001994058722971207, "loss": 3.3082, "step": 916 }, { "epoch": 0.06379352325298271, "grad_norm": 0.9921875, "learning_rate": 0.0019940341713426306, "loss": 3.7296, "step": 917 }, { "epoch": 0.06386309089011792, "grad_norm": 1.1640625, "learning_rate": 0.001994009569242009, "loss": 3.6124, "step": 918 }, { "epoch": 0.06393265852725312, "grad_norm": 1.4375, "learning_rate": 0.001993984916670592, "loss": 3.1775, "step": 919 }, { "epoch": 0.06400222616438833, "grad_norm": 1.2734375, "learning_rate": 0.001993960213629631, "loss": 2.9769, "step": 920 }, { "epoch": 0.06407179380152353, "grad_norm": 1.1953125, "learning_rate": 0.0019939354601203802, "loss": 3.117, "step": 921 }, { "epoch": 0.06414136143865874, "grad_norm": 0.99609375, "learning_rate": 0.0019939106561440963, "loss": 3.3155, "step": 922 }, { "epoch": 0.06421092907579394, "grad_norm": 0.98828125, "learning_rate": 0.0019938858017020393, "loss": 3.7916, "step": 923 }, { "epoch": 0.06428049671292914, "grad_norm": 1.140625, "learning_rate": 0.0019938608967954704, "loss": 3.5828, "step": 924 }, { "epoch": 0.06435006435006435, "grad_norm": 1.3125, "learning_rate": 0.001993835941425655, "loss": 3.4896, "step": 925 }, { "epoch": 0.06441963198719955, "grad_norm": 1.0234375, "learning_rate": 0.00199381093559386, "loss": 3.454, "step": 926 }, { "epoch": 0.06448919962433476, "grad_norm": 0.94921875, "learning_rate": 0.0019937858793013545, "loss": 3.5434, "step": 927 }, { "epoch": 0.06455876726146996, "grad_norm": 0.796875, "learning_rate": 0.001993760772549411, "loss": 3.5979, "step": 928 }, { "epoch": 0.06462833489860517, "grad_norm": 1.0078125, "learning_rate": 0.0019937356153393046, "loss": 3.6209, "step": 929 }, { "epoch": 0.06469790253574037, "grad_norm": 1.1328125, "learning_rate": 0.0019937104076723127, "loss": 3.1411, "step": 930 }, { "epoch": 0.06476747017287558, "grad_norm": 1.296875, "learning_rate": 0.0019936851495497144, "loss": 3.1043, "step": 931 }, { "epoch": 0.06483703781001078, "grad_norm": 1.0859375, "learning_rate": 0.001993659840972793, "loss": 3.3624, "step": 932 }, { "epoch": 0.06490660544714599, "grad_norm": 1.0859375, "learning_rate": 0.0019936344819428335, "loss": 3.3843, "step": 933 }, { "epoch": 0.06497617308428119, "grad_norm": 1.1015625, "learning_rate": 0.001993609072461123, "loss": 3.1549, "step": 934 }, { "epoch": 0.0650457407214164, "grad_norm": 1.25, "learning_rate": 0.001993583612528952, "loss": 3.5149, "step": 935 }, { "epoch": 0.0651153083585516, "grad_norm": 1.0078125, "learning_rate": 0.0019935581021476136, "loss": 3.5026, "step": 936 }, { "epoch": 0.0651848759956868, "grad_norm": 1.171875, "learning_rate": 0.001993532541318402, "loss": 3.2724, "step": 937 }, { "epoch": 0.06525444363282201, "grad_norm": 1.0859375, "learning_rate": 0.001993506930042616, "loss": 3.2721, "step": 938 }, { "epoch": 0.06532401126995721, "grad_norm": 0.8984375, "learning_rate": 0.001993481268321556, "loss": 3.0339, "step": 939 }, { "epoch": 0.06539357890709242, "grad_norm": 1.0625, "learning_rate": 0.0019934555561565244, "loss": 3.6624, "step": 940 }, { "epoch": 0.06546314654422762, "grad_norm": 1.1015625, "learning_rate": 0.0019934297935488275, "loss": 3.2563, "step": 941 }, { "epoch": 0.06553271418136283, "grad_norm": 1.1953125, "learning_rate": 0.0019934039804997724, "loss": 3.1389, "step": 942 }, { "epoch": 0.06560228181849803, "grad_norm": 1.484375, "learning_rate": 0.0019933781170106703, "loss": 3.7684, "step": 943 }, { "epoch": 0.06567184945563324, "grad_norm": 1.25, "learning_rate": 0.0019933522030828347, "loss": 3.2817, "step": 944 }, { "epoch": 0.06574141709276844, "grad_norm": 0.82421875, "learning_rate": 0.0019933262387175814, "loss": 3.4684, "step": 945 }, { "epoch": 0.06581098472990365, "grad_norm": 1.203125, "learning_rate": 0.001993300223916228, "loss": 3.2135, "step": 946 }, { "epoch": 0.06588055236703885, "grad_norm": 1.125, "learning_rate": 0.0019932741586800957, "loss": 3.4436, "step": 947 }, { "epoch": 0.06595012000417406, "grad_norm": 1.1875, "learning_rate": 0.0019932480430105083, "loss": 3.4002, "step": 948 }, { "epoch": 0.06601968764130926, "grad_norm": 0.921875, "learning_rate": 0.0019932218769087916, "loss": 3.8864, "step": 949 }, { "epoch": 0.06608925527844446, "grad_norm": 1.34375, "learning_rate": 0.001993195660376274, "loss": 2.9565, "step": 950 }, { "epoch": 0.06615882291557967, "grad_norm": 1.28125, "learning_rate": 0.001993169393414287, "loss": 3.2403, "step": 951 }, { "epoch": 0.06622839055271487, "grad_norm": 1.1171875, "learning_rate": 0.001993143076024164, "loss": 3.5741, "step": 952 }, { "epoch": 0.06629795818985008, "grad_norm": 1.1875, "learning_rate": 0.001993116708207242, "loss": 3.2992, "step": 953 }, { "epoch": 0.06636752582698528, "grad_norm": 1.3046875, "learning_rate": 0.001993090289964859, "loss": 3.3092, "step": 954 }, { "epoch": 0.0664370934641205, "grad_norm": 1.859375, "learning_rate": 0.0019930638212983564, "loss": 3.2502, "step": 955 }, { "epoch": 0.06650666110125569, "grad_norm": 0.91796875, "learning_rate": 0.0019930373022090785, "loss": 3.2321, "step": 956 }, { "epoch": 0.0665762287383909, "grad_norm": 1.1171875, "learning_rate": 0.0019930107326983715, "loss": 3.2501, "step": 957 }, { "epoch": 0.0666457963755261, "grad_norm": 0.9921875, "learning_rate": 0.0019929841127675845, "loss": 3.5014, "step": 958 }, { "epoch": 0.06671536401266132, "grad_norm": 0.875, "learning_rate": 0.0019929574424180697, "loss": 3.1929, "step": 959 }, { "epoch": 0.06678493164979651, "grad_norm": 1.625, "learning_rate": 0.0019929307216511806, "loss": 3.5224, "step": 960 }, { "epoch": 0.06685449928693173, "grad_norm": 1.0234375, "learning_rate": 0.0019929039504682743, "loss": 3.7231, "step": 961 }, { "epoch": 0.06692406692406692, "grad_norm": 1.4765625, "learning_rate": 0.0019928771288707098, "loss": 3.4419, "step": 962 }, { "epoch": 0.06699363456120212, "grad_norm": 1.1796875, "learning_rate": 0.0019928502568598494, "loss": 3.5337, "step": 963 }, { "epoch": 0.06706320219833733, "grad_norm": 1.6875, "learning_rate": 0.0019928233344370574, "loss": 3.2093, "step": 964 }, { "epoch": 0.06713276983547253, "grad_norm": 1.09375, "learning_rate": 0.0019927963616037003, "loss": 3.0913, "step": 965 }, { "epoch": 0.06720233747260775, "grad_norm": 1.265625, "learning_rate": 0.001992769338361148, "loss": 3.4042, "step": 966 }, { "epoch": 0.06727190510974294, "grad_norm": 0.89453125, "learning_rate": 0.001992742264710773, "loss": 3.6867, "step": 967 }, { "epoch": 0.06734147274687816, "grad_norm": 1.453125, "learning_rate": 0.0019927151406539494, "loss": 3.4302, "step": 968 }, { "epoch": 0.06741104038401335, "grad_norm": 0.9609375, "learning_rate": 0.0019926879661920547, "loss": 3.4974, "step": 969 }, { "epoch": 0.06748060802114857, "grad_norm": 1.0703125, "learning_rate": 0.0019926607413264684, "loss": 3.2897, "step": 970 }, { "epoch": 0.06755017565828376, "grad_norm": 1.0625, "learning_rate": 0.0019926334660585734, "loss": 3.2806, "step": 971 }, { "epoch": 0.06761974329541898, "grad_norm": 1.125, "learning_rate": 0.0019926061403897537, "loss": 3.3603, "step": 972 }, { "epoch": 0.06768931093255418, "grad_norm": 1.125, "learning_rate": 0.001992578764321398, "loss": 3.0188, "step": 973 }, { "epoch": 0.06775887856968937, "grad_norm": 0.89453125, "learning_rate": 0.001992551337854895, "loss": 3.3528, "step": 974 }, { "epoch": 0.06782844620682459, "grad_norm": 1.015625, "learning_rate": 0.0019925238609916377, "loss": 3.1065, "step": 975 }, { "epoch": 0.06789801384395978, "grad_norm": 1.0078125, "learning_rate": 0.0019924963337330224, "loss": 3.5713, "step": 976 }, { "epoch": 0.067967581481095, "grad_norm": 1.1171875, "learning_rate": 0.001992468756080445, "loss": 3.1457, "step": 977 }, { "epoch": 0.0680371491182302, "grad_norm": 0.984375, "learning_rate": 0.001992441128035307, "loss": 3.346, "step": 978 }, { "epoch": 0.0681067167553654, "grad_norm": 0.95703125, "learning_rate": 0.0019924134495990105, "loss": 3.6706, "step": 979 }, { "epoch": 0.0681762843925006, "grad_norm": 0.9453125, "learning_rate": 0.0019923857207729614, "loss": 3.5294, "step": 980 }, { "epoch": 0.06824585202963582, "grad_norm": 1.171875, "learning_rate": 0.0019923579415585674, "loss": 3.4409, "step": 981 }, { "epoch": 0.06831541966677102, "grad_norm": 1.09375, "learning_rate": 0.0019923301119572387, "loss": 3.3162, "step": 982 }, { "epoch": 0.06838498730390623, "grad_norm": 1.140625, "learning_rate": 0.0019923022319703887, "loss": 3.1688, "step": 983 }, { "epoch": 0.06845455494104143, "grad_norm": 0.828125, "learning_rate": 0.001992274301599433, "loss": 3.2179, "step": 984 }, { "epoch": 0.06852412257817664, "grad_norm": 0.87109375, "learning_rate": 0.0019922463208457896, "loss": 3.6096, "step": 985 }, { "epoch": 0.06859369021531184, "grad_norm": 1.4609375, "learning_rate": 0.0019922182897108794, "loss": 3.4342, "step": 986 }, { "epoch": 0.06866325785244703, "grad_norm": 1.09375, "learning_rate": 0.001992190208196126, "loss": 2.9741, "step": 987 }, { "epoch": 0.06873282548958225, "grad_norm": 1.1171875, "learning_rate": 0.0019921620763029544, "loss": 3.58, "step": 988 }, { "epoch": 0.06880239312671745, "grad_norm": 0.953125, "learning_rate": 0.0019921338940327936, "loss": 3.4243, "step": 989 }, { "epoch": 0.06887196076385266, "grad_norm": 0.95703125, "learning_rate": 0.001992105661387074, "loss": 3.3534, "step": 990 }, { "epoch": 0.06894152840098786, "grad_norm": 1.1171875, "learning_rate": 0.00199207737836723, "loss": 3.1215, "step": 991 }, { "epoch": 0.06901109603812307, "grad_norm": 1.0546875, "learning_rate": 0.0019920490449746972, "loss": 3.4135, "step": 992 }, { "epoch": 0.06908066367525827, "grad_norm": 1.3671875, "learning_rate": 0.001992020661210914, "loss": 3.516, "step": 993 }, { "epoch": 0.06915023131239348, "grad_norm": 1.3515625, "learning_rate": 0.0019919922270773215, "loss": 2.9574, "step": 994 }, { "epoch": 0.06921979894952868, "grad_norm": 1.4453125, "learning_rate": 0.001991963742575364, "loss": 3.4767, "step": 995 }, { "epoch": 0.06928936658666389, "grad_norm": 1.5625, "learning_rate": 0.0019919352077064872, "loss": 3.7818, "step": 996 }, { "epoch": 0.06935893422379909, "grad_norm": 1.4765625, "learning_rate": 0.0019919066224721406, "loss": 3.3807, "step": 997 }, { "epoch": 0.0694285018609343, "grad_norm": 1.1796875, "learning_rate": 0.0019918779868737754, "loss": 3.2762, "step": 998 }, { "epoch": 0.0694980694980695, "grad_norm": 1.2109375, "learning_rate": 0.0019918493009128454, "loss": 3.0363, "step": 999 }, { "epoch": 0.0695676371352047, "grad_norm": 0.96875, "learning_rate": 0.0019918205645908073, "loss": 3.3782, "step": 1000 }, { "epoch": 0.06963720477233991, "grad_norm": 1.25, "learning_rate": 0.0019917917779091196, "loss": 3.4582, "step": 1001 }, { "epoch": 0.0697067724094751, "grad_norm": 0.99609375, "learning_rate": 0.0019917629408692447, "loss": 3.3803, "step": 1002 }, { "epoch": 0.06977634004661032, "grad_norm": 1.1640625, "learning_rate": 0.0019917340534726467, "loss": 3.3236, "step": 1003 }, { "epoch": 0.06984590768374552, "grad_norm": 1.0546875, "learning_rate": 0.0019917051157207918, "loss": 3.3535, "step": 1004 }, { "epoch": 0.06991547532088073, "grad_norm": 0.77734375, "learning_rate": 0.00199167612761515, "loss": 3.5928, "step": 1005 }, { "epoch": 0.06998504295801593, "grad_norm": 1.2734375, "learning_rate": 0.0019916470891571925, "loss": 3.3844, "step": 1006 }, { "epoch": 0.07005461059515114, "grad_norm": 0.94921875, "learning_rate": 0.0019916180003483946, "loss": 3.1268, "step": 1007 }, { "epoch": 0.07012417823228634, "grad_norm": 1.3203125, "learning_rate": 0.0019915888611902323, "loss": 3.4423, "step": 1008 }, { "epoch": 0.07019374586942155, "grad_norm": 1.03125, "learning_rate": 0.001991559671684186, "loss": 2.9498, "step": 1009 }, { "epoch": 0.07026331350655675, "grad_norm": 0.8984375, "learning_rate": 0.001991530431831737, "loss": 3.3612, "step": 1010 }, { "epoch": 0.07033288114369196, "grad_norm": 1.0703125, "learning_rate": 0.0019915011416343706, "loss": 3.0226, "step": 1011 }, { "epoch": 0.07040244878082716, "grad_norm": 1.3984375, "learning_rate": 0.001991471801093574, "loss": 3.5298, "step": 1012 }, { "epoch": 0.07047201641796236, "grad_norm": 0.9921875, "learning_rate": 0.001991442410210836, "loss": 3.2802, "step": 1013 }, { "epoch": 0.07054158405509757, "grad_norm": 0.9921875, "learning_rate": 0.0019914129689876502, "loss": 3.4393, "step": 1014 }, { "epoch": 0.07061115169223277, "grad_norm": 1.109375, "learning_rate": 0.0019913834774255112, "loss": 3.0369, "step": 1015 }, { "epoch": 0.07068071932936798, "grad_norm": 1.28125, "learning_rate": 0.001991353935525916, "loss": 3.4006, "step": 1016 }, { "epoch": 0.07075028696650318, "grad_norm": 1.234375, "learning_rate": 0.001991324343290364, "loss": 3.2334, "step": 1017 }, { "epoch": 0.07081985460363839, "grad_norm": 1.0625, "learning_rate": 0.001991294700720359, "loss": 3.5597, "step": 1018 }, { "epoch": 0.07088942224077359, "grad_norm": 0.99609375, "learning_rate": 0.001991265007817406, "loss": 3.2408, "step": 1019 }, { "epoch": 0.0709589898779088, "grad_norm": 1.546875, "learning_rate": 0.001991235264583012, "loss": 3.2134, "step": 1020 }, { "epoch": 0.071028557515044, "grad_norm": 2.0625, "learning_rate": 0.001991205471018687, "loss": 3.423, "step": 1021 }, { "epoch": 0.07109812515217921, "grad_norm": 0.921875, "learning_rate": 0.0019911756271259445, "loss": 3.1471, "step": 1022 }, { "epoch": 0.07116769278931441, "grad_norm": 1.15625, "learning_rate": 0.0019911457329062996, "loss": 3.37, "step": 1023 }, { "epoch": 0.07123726042644962, "grad_norm": 1.2421875, "learning_rate": 0.0019911157883612703, "loss": 3.5813, "step": 1024 }, { "epoch": 0.07130682806358482, "grad_norm": 1.0390625, "learning_rate": 0.0019910857934923765, "loss": 3.6288, "step": 1025 }, { "epoch": 0.07137639570072002, "grad_norm": 1.703125, "learning_rate": 0.001991055748301142, "loss": 3.3881, "step": 1026 }, { "epoch": 0.07144596333785523, "grad_norm": 0.87890625, "learning_rate": 0.0019910256527890914, "loss": 3.1126, "step": 1027 }, { "epoch": 0.07151553097499043, "grad_norm": 1.15625, "learning_rate": 0.0019909955069577533, "loss": 3.278, "step": 1028 }, { "epoch": 0.07158509861212564, "grad_norm": 0.88671875, "learning_rate": 0.001990965310808659, "loss": 3.2712, "step": 1029 }, { "epoch": 0.07165466624926084, "grad_norm": 1.3203125, "learning_rate": 0.0019909350643433402, "loss": 3.1583, "step": 1030 }, { "epoch": 0.07172423388639605, "grad_norm": 1.0, "learning_rate": 0.0019909047675633344, "loss": 3.6229, "step": 1031 }, { "epoch": 0.07179380152353125, "grad_norm": 1.234375, "learning_rate": 0.0019908744204701783, "loss": 3.1285, "step": 1032 }, { "epoch": 0.07186336916066646, "grad_norm": 0.8359375, "learning_rate": 0.0019908440230654136, "loss": 3.3274, "step": 1033 }, { "epoch": 0.07193293679780166, "grad_norm": 0.9765625, "learning_rate": 0.001990813575350584, "loss": 3.6442, "step": 1034 }, { "epoch": 0.07200250443493687, "grad_norm": 0.91796875, "learning_rate": 0.0019907830773272348, "loss": 3.8308, "step": 1035 }, { "epoch": 0.07207207207207207, "grad_norm": 0.91015625, "learning_rate": 0.001990752528996915, "loss": 3.1517, "step": 1036 }, { "epoch": 0.07214163970920728, "grad_norm": 1.015625, "learning_rate": 0.0019907219303611757, "loss": 3.2052, "step": 1037 }, { "epoch": 0.07221120734634248, "grad_norm": 1.53125, "learning_rate": 0.0019906912814215702, "loss": 3.0487, "step": 1038 }, { "epoch": 0.07228077498347768, "grad_norm": 1.0234375, "learning_rate": 0.0019906605821796547, "loss": 3.6735, "step": 1039 }, { "epoch": 0.07235034262061289, "grad_norm": 1.0390625, "learning_rate": 0.0019906298326369887, "loss": 3.3629, "step": 1040 }, { "epoch": 0.07241991025774809, "grad_norm": 1.0625, "learning_rate": 0.001990599032795132, "loss": 3.1491, "step": 1041 }, { "epoch": 0.0724894778948833, "grad_norm": 0.96875, "learning_rate": 0.0019905681826556504, "loss": 3.3664, "step": 1042 }, { "epoch": 0.0725590455320185, "grad_norm": 0.7421875, "learning_rate": 0.001990537282220109, "loss": 3.2537, "step": 1043 }, { "epoch": 0.07262861316915371, "grad_norm": 0.984375, "learning_rate": 0.0019905063314900767, "loss": 3.6151, "step": 1044 }, { "epoch": 0.07269818080628891, "grad_norm": 0.9296875, "learning_rate": 0.0019904753304671257, "loss": 3.6813, "step": 1045 }, { "epoch": 0.07276774844342412, "grad_norm": 1.078125, "learning_rate": 0.00199044427915283, "loss": 3.1794, "step": 1046 }, { "epoch": 0.07283731608055932, "grad_norm": 0.8359375, "learning_rate": 0.0019904131775487655, "loss": 3.7046, "step": 1047 }, { "epoch": 0.07290688371769453, "grad_norm": 1.0703125, "learning_rate": 0.0019903820256565122, "loss": 3.285, "step": 1048 }, { "epoch": 0.07297645135482973, "grad_norm": 1.3203125, "learning_rate": 0.0019903508234776516, "loss": 3.1065, "step": 1049 }, { "epoch": 0.07304601899196493, "grad_norm": 0.94140625, "learning_rate": 0.001990319571013768, "loss": 3.3196, "step": 1050 }, { "epoch": 0.07311558662910014, "grad_norm": 1.0859375, "learning_rate": 0.001990288268266448, "loss": 3.2582, "step": 1051 }, { "epoch": 0.07318515426623534, "grad_norm": 1.0390625, "learning_rate": 0.0019902569152372806, "loss": 3.3076, "step": 1052 }, { "epoch": 0.07325472190337055, "grad_norm": 1.09375, "learning_rate": 0.001990225511927859, "loss": 2.9895, "step": 1053 }, { "epoch": 0.07332428954050575, "grad_norm": 1.0546875, "learning_rate": 0.001990194058339777, "loss": 2.9849, "step": 1054 }, { "epoch": 0.07339385717764096, "grad_norm": 1.125, "learning_rate": 0.0019901625544746313, "loss": 3.5619, "step": 1055 }, { "epoch": 0.07346342481477616, "grad_norm": 0.875, "learning_rate": 0.0019901310003340223, "loss": 3.2445, "step": 1056 }, { "epoch": 0.07353299245191137, "grad_norm": 0.90625, "learning_rate": 0.001990099395919552, "loss": 3.4955, "step": 1057 }, { "epoch": 0.07360256008904657, "grad_norm": 1.0390625, "learning_rate": 0.0019900677412328237, "loss": 3.0674, "step": 1058 }, { "epoch": 0.07367212772618179, "grad_norm": 0.91796875, "learning_rate": 0.0019900360362754468, "loss": 3.2239, "step": 1059 }, { "epoch": 0.07374169536331698, "grad_norm": 0.95703125, "learning_rate": 0.0019900042810490296, "loss": 3.0941, "step": 1060 }, { "epoch": 0.0738112630004522, "grad_norm": 1.1015625, "learning_rate": 0.0019899724755551855, "loss": 3.154, "step": 1061 }, { "epoch": 0.0738808306375874, "grad_norm": 1.1171875, "learning_rate": 0.0019899406197955286, "loss": 3.3571, "step": 1062 }, { "epoch": 0.07395039827472259, "grad_norm": 1.65625, "learning_rate": 0.0019899087137716766, "loss": 3.3969, "step": 1063 }, { "epoch": 0.0740199659118578, "grad_norm": 1.484375, "learning_rate": 0.0019898767574852497, "loss": 3.2255, "step": 1064 }, { "epoch": 0.074089533548993, "grad_norm": 0.8828125, "learning_rate": 0.0019898447509378706, "loss": 3.3027, "step": 1065 }, { "epoch": 0.07415910118612822, "grad_norm": 1.0859375, "learning_rate": 0.001989812694131164, "loss": 3.6268, "step": 1066 }, { "epoch": 0.07422866882326341, "grad_norm": 0.921875, "learning_rate": 0.001989780587066758, "loss": 3.3741, "step": 1067 }, { "epoch": 0.07429823646039863, "grad_norm": 0.98828125, "learning_rate": 0.0019897484297462828, "loss": 2.8676, "step": 1068 }, { "epoch": 0.07436780409753382, "grad_norm": 0.9375, "learning_rate": 0.0019897162221713706, "loss": 3.2567, "step": 1069 }, { "epoch": 0.07443737173466904, "grad_norm": 1.109375, "learning_rate": 0.0019896839643436573, "loss": 3.2005, "step": 1070 }, { "epoch": 0.07450693937180423, "grad_norm": 1.1640625, "learning_rate": 0.001989651656264781, "loss": 3.1738, "step": 1071 }, { "epoch": 0.07457650700893945, "grad_norm": 1.078125, "learning_rate": 0.0019896192979363815, "loss": 3.3275, "step": 1072 }, { "epoch": 0.07464607464607464, "grad_norm": 0.9765625, "learning_rate": 0.0019895868893601023, "loss": 3.5421, "step": 1073 }, { "epoch": 0.07471564228320986, "grad_norm": 1.2109375, "learning_rate": 0.0019895544305375884, "loss": 3.0984, "step": 1074 }, { "epoch": 0.07478520992034506, "grad_norm": 1.15625, "learning_rate": 0.0019895219214704886, "loss": 3.1449, "step": 1075 }, { "epoch": 0.07485477755748025, "grad_norm": 1.234375, "learning_rate": 0.001989489362160453, "loss": 3.1699, "step": 1076 }, { "epoch": 0.07492434519461547, "grad_norm": 0.9765625, "learning_rate": 0.0019894567526091353, "loss": 3.2374, "step": 1077 }, { "epoch": 0.07499391283175066, "grad_norm": 0.75390625, "learning_rate": 0.0019894240928181907, "loss": 3.3657, "step": 1078 }, { "epoch": 0.07506348046888588, "grad_norm": 1.390625, "learning_rate": 0.0019893913827892773, "loss": 3.5596, "step": 1079 }, { "epoch": 0.07513304810602107, "grad_norm": 0.95703125, "learning_rate": 0.001989358622524057, "loss": 3.4357, "step": 1080 }, { "epoch": 0.07520261574315629, "grad_norm": 1.1484375, "learning_rate": 0.0019893258120241924, "loss": 3.1769, "step": 1081 }, { "epoch": 0.07527218338029149, "grad_norm": 1.2109375, "learning_rate": 0.0019892929512913497, "loss": 3.4845, "step": 1082 }, { "epoch": 0.0753417510174267, "grad_norm": 1.4296875, "learning_rate": 0.001989260040327197, "loss": 3.3399, "step": 1083 }, { "epoch": 0.0754113186545619, "grad_norm": 1.2265625, "learning_rate": 0.001989227079133406, "loss": 3.1975, "step": 1084 }, { "epoch": 0.07548088629169711, "grad_norm": 1.140625, "learning_rate": 0.00198919406771165, "loss": 2.9935, "step": 1085 }, { "epoch": 0.0755504539288323, "grad_norm": 1.2578125, "learning_rate": 0.001989161006063605, "loss": 3.0267, "step": 1086 }, { "epoch": 0.07562002156596752, "grad_norm": 1.4375, "learning_rate": 0.0019891278941909503, "loss": 3.2466, "step": 1087 }, { "epoch": 0.07568958920310272, "grad_norm": 1.0, "learning_rate": 0.001989094732095366, "loss": 3.3007, "step": 1088 }, { "epoch": 0.07575915684023792, "grad_norm": 1.1484375, "learning_rate": 0.001989061519778537, "loss": 3.4514, "step": 1089 }, { "epoch": 0.07582872447737313, "grad_norm": 0.93359375, "learning_rate": 0.0019890282572421493, "loss": 3.48, "step": 1090 }, { "epoch": 0.07589829211450833, "grad_norm": 1.09375, "learning_rate": 0.0019889949444878915, "loss": 3.2318, "step": 1091 }, { "epoch": 0.07596785975164354, "grad_norm": 1.53125, "learning_rate": 0.0019889615815174557, "loss": 3.0235, "step": 1092 }, { "epoch": 0.07603742738877874, "grad_norm": 1.171875, "learning_rate": 0.001988928168332535, "loss": 3.2355, "step": 1093 }, { "epoch": 0.07610699502591395, "grad_norm": 0.890625, "learning_rate": 0.0019888947049348273, "loss": 3.3776, "step": 1094 }, { "epoch": 0.07617656266304915, "grad_norm": 1.1875, "learning_rate": 0.0019888611913260303, "loss": 3.1374, "step": 1095 }, { "epoch": 0.07624613030018436, "grad_norm": 1.3125, "learning_rate": 0.0019888276275078463, "loss": 3.3041, "step": 1096 }, { "epoch": 0.07631569793731956, "grad_norm": 1.0078125, "learning_rate": 0.0019887940134819793, "loss": 3.3709, "step": 1097 }, { "epoch": 0.07638526557445477, "grad_norm": 0.98828125, "learning_rate": 0.0019887603492501366, "loss": 3.4368, "step": 1098 }, { "epoch": 0.07645483321158997, "grad_norm": 0.87890625, "learning_rate": 0.0019887266348140266, "loss": 3.4639, "step": 1099 }, { "epoch": 0.07652440084872518, "grad_norm": 1.0625, "learning_rate": 0.001988692870175362, "loss": 3.2643, "step": 1100 }, { "epoch": 0.07659396848586038, "grad_norm": 1.171875, "learning_rate": 0.0019886590553358564, "loss": 3.0054, "step": 1101 }, { "epoch": 0.07666353612299558, "grad_norm": 1.3671875, "learning_rate": 0.0019886251902972276, "loss": 2.6731, "step": 1102 }, { "epoch": 0.07673310376013079, "grad_norm": 1.1328125, "learning_rate": 0.001988591275061195, "loss": 3.034, "step": 1103 }, { "epoch": 0.07680267139726599, "grad_norm": 1.0859375, "learning_rate": 0.0019885573096294793, "loss": 3.5486, "step": 1104 }, { "epoch": 0.0768722390344012, "grad_norm": 1.9375, "learning_rate": 0.001988523294003807, "loss": 3.0527, "step": 1105 }, { "epoch": 0.0769418066715364, "grad_norm": 0.92578125, "learning_rate": 0.001988489228185904, "loss": 3.0049, "step": 1106 }, { "epoch": 0.07701137430867161, "grad_norm": 0.98828125, "learning_rate": 0.0019884551121775004, "loss": 3.3528, "step": 1107 }, { "epoch": 0.07708094194580681, "grad_norm": 0.734375, "learning_rate": 0.001988420945980328, "loss": 3.3525, "step": 1108 }, { "epoch": 0.07715050958294202, "grad_norm": 0.984375, "learning_rate": 0.001988386729596123, "loss": 3.2421, "step": 1109 }, { "epoch": 0.07722007722007722, "grad_norm": 0.84765625, "learning_rate": 0.001988352463026621, "loss": 3.6422, "step": 1110 }, { "epoch": 0.07728964485721243, "grad_norm": 0.90234375, "learning_rate": 0.0019883181462735625, "loss": 3.4884, "step": 1111 }, { "epoch": 0.07735921249434763, "grad_norm": 0.99609375, "learning_rate": 0.0019882837793386903, "loss": 3.4869, "step": 1112 }, { "epoch": 0.07742878013148284, "grad_norm": 1.2265625, "learning_rate": 0.001988249362223749, "loss": 3.354, "step": 1113 }, { "epoch": 0.07749834776861804, "grad_norm": 1.0078125, "learning_rate": 0.0019882148949304864, "loss": 3.2511, "step": 1114 }, { "epoch": 0.07756791540575324, "grad_norm": 0.9296875, "learning_rate": 0.001988180377460652, "loss": 3.2186, "step": 1115 }, { "epoch": 0.07763748304288845, "grad_norm": 1.0546875, "learning_rate": 0.001988145809815999, "loss": 3.2695, "step": 1116 }, { "epoch": 0.07770705068002365, "grad_norm": 0.9296875, "learning_rate": 0.0019881111919982826, "loss": 3.7394, "step": 1117 }, { "epoch": 0.07777661831715886, "grad_norm": 1.0, "learning_rate": 0.0019880765240092605, "loss": 3.0813, "step": 1118 }, { "epoch": 0.07784618595429406, "grad_norm": 1.328125, "learning_rate": 0.0019880418058506925, "loss": 3.2941, "step": 1119 }, { "epoch": 0.07791575359142927, "grad_norm": 0.95703125, "learning_rate": 0.0019880070375243417, "loss": 3.671, "step": 1120 }, { "epoch": 0.07798532122856447, "grad_norm": 1.1015625, "learning_rate": 0.0019879722190319733, "loss": 3.3528, "step": 1121 }, { "epoch": 0.07805488886569968, "grad_norm": 1.046875, "learning_rate": 0.0019879373503753554, "loss": 3.386, "step": 1122 }, { "epoch": 0.07812445650283488, "grad_norm": 0.9296875, "learning_rate": 0.0019879024315562583, "loss": 2.9778, "step": 1123 }, { "epoch": 0.07819402413997009, "grad_norm": 0.90234375, "learning_rate": 0.0019878674625764554, "loss": 3.3305, "step": 1124 }, { "epoch": 0.07826359177710529, "grad_norm": 1.1171875, "learning_rate": 0.001987832443437722, "loss": 2.9773, "step": 1125 }, { "epoch": 0.07833315941424049, "grad_norm": 0.9140625, "learning_rate": 0.0019877973741418364, "loss": 3.394, "step": 1126 }, { "epoch": 0.0784027270513757, "grad_norm": 0.984375, "learning_rate": 0.0019877622546905786, "loss": 3.2623, "step": 1127 }, { "epoch": 0.0784722946885109, "grad_norm": 1.1640625, "learning_rate": 0.001987727085085732, "loss": 3.1564, "step": 1128 }, { "epoch": 0.07854186232564611, "grad_norm": 1.171875, "learning_rate": 0.001987691865329083, "loss": 3.6401, "step": 1129 }, { "epoch": 0.07861142996278131, "grad_norm": 1.140625, "learning_rate": 0.0019876565954224192, "loss": 3.0965, "step": 1130 }, { "epoch": 0.07868099759991652, "grad_norm": 1.15625, "learning_rate": 0.001987621275367532, "loss": 3.0531, "step": 1131 }, { "epoch": 0.07875056523705172, "grad_norm": 1.2578125, "learning_rate": 0.0019875859051662137, "loss": 2.9751, "step": 1132 }, { "epoch": 0.07882013287418693, "grad_norm": 0.82421875, "learning_rate": 0.0019875504848202614, "loss": 3.5153, "step": 1133 }, { "epoch": 0.07888970051132213, "grad_norm": 1.53125, "learning_rate": 0.001987515014331473, "loss": 2.8853, "step": 1134 }, { "epoch": 0.07895926814845734, "grad_norm": 1.28125, "learning_rate": 0.0019874794937016498, "loss": 3.1095, "step": 1135 }, { "epoch": 0.07902883578559254, "grad_norm": 2.21875, "learning_rate": 0.001987443922932595, "loss": 3.3813, "step": 1136 }, { "epoch": 0.07909840342272775, "grad_norm": 0.98046875, "learning_rate": 0.001987408302026115, "loss": 3.7459, "step": 1137 }, { "epoch": 0.07916797105986295, "grad_norm": 1.109375, "learning_rate": 0.001987372630984018, "loss": 3.0523, "step": 1138 }, { "epoch": 0.07923753869699815, "grad_norm": 1.53125, "learning_rate": 0.001987336909808116, "loss": 2.879, "step": 1139 }, { "epoch": 0.07930710633413336, "grad_norm": 0.7890625, "learning_rate": 0.001987301138500222, "loss": 3.192, "step": 1140 }, { "epoch": 0.07937667397126856, "grad_norm": 1.0859375, "learning_rate": 0.001987265317062153, "loss": 2.8798, "step": 1141 }, { "epoch": 0.07944624160840377, "grad_norm": 1.2109375, "learning_rate": 0.0019872294454957268, "loss": 2.8866, "step": 1142 }, { "epoch": 0.07951580924553897, "grad_norm": 0.921875, "learning_rate": 0.001987193523802765, "loss": 3.1609, "step": 1143 }, { "epoch": 0.07958537688267418, "grad_norm": 0.83984375, "learning_rate": 0.0019871575519850924, "loss": 3.6688, "step": 1144 }, { "epoch": 0.07965494451980938, "grad_norm": 2.453125, "learning_rate": 0.0019871215300445353, "loss": 3.64, "step": 1145 }, { "epoch": 0.0797245121569446, "grad_norm": 2.296875, "learning_rate": 0.001987085457982922, "loss": 3.4658, "step": 1146 }, { "epoch": 0.07979407979407979, "grad_norm": 1.0703125, "learning_rate": 0.0019870493358020843, "loss": 3.5664, "step": 1147 }, { "epoch": 0.079863647431215, "grad_norm": 0.6953125, "learning_rate": 0.001987013163503857, "loss": 3.882, "step": 1148 }, { "epoch": 0.0799332150683502, "grad_norm": 1.34375, "learning_rate": 0.0019869769410900753, "loss": 3.1736, "step": 1149 }, { "epoch": 0.08000278270548541, "grad_norm": 1.0546875, "learning_rate": 0.00198694066856258, "loss": 2.935, "step": 1150 }, { "epoch": 0.08007235034262061, "grad_norm": 1.0625, "learning_rate": 0.001986904345923212, "loss": 3.3874, "step": 1151 }, { "epoch": 0.08014191797975581, "grad_norm": 0.80859375, "learning_rate": 0.001986867973173815, "loss": 3.2382, "step": 1152 }, { "epoch": 0.08021148561689102, "grad_norm": 0.96484375, "learning_rate": 0.001986831550316237, "loss": 3.1287, "step": 1153 }, { "epoch": 0.08028105325402622, "grad_norm": 1.0, "learning_rate": 0.001986795077352327, "loss": 3.1296, "step": 1154 }, { "epoch": 0.08035062089116143, "grad_norm": 1.15625, "learning_rate": 0.0019867585542839373, "loss": 3.1815, "step": 1155 }, { "epoch": 0.08042018852829663, "grad_norm": 0.8515625, "learning_rate": 0.001986721981112921, "loss": 3.7003, "step": 1156 }, { "epoch": 0.08048975616543184, "grad_norm": 0.9765625, "learning_rate": 0.001986685357841136, "loss": 3.2178, "step": 1157 }, { "epoch": 0.08055932380256704, "grad_norm": 1.0, "learning_rate": 0.001986648684470442, "loss": 3.4522, "step": 1158 }, { "epoch": 0.08062889143970225, "grad_norm": 0.81640625, "learning_rate": 0.001986611961002701, "loss": 3.4755, "step": 1159 }, { "epoch": 0.08069845907683745, "grad_norm": 0.953125, "learning_rate": 0.001986575187439777, "loss": 3.3797, "step": 1160 }, { "epoch": 0.08076802671397267, "grad_norm": 0.7890625, "learning_rate": 0.001986538363783538, "loss": 3.1683, "step": 1161 }, { "epoch": 0.08083759435110786, "grad_norm": 1.0234375, "learning_rate": 0.0019865014900358534, "loss": 3.1062, "step": 1162 }, { "epoch": 0.08090716198824308, "grad_norm": 0.70703125, "learning_rate": 0.0019864645661985957, "loss": 3.574, "step": 1163 }, { "epoch": 0.08097672962537827, "grad_norm": 0.79296875, "learning_rate": 0.0019864275922736397, "loss": 3.4354, "step": 1164 }, { "epoch": 0.08104629726251347, "grad_norm": 0.73828125, "learning_rate": 0.001986390568262862, "loss": 3.3539, "step": 1165 }, { "epoch": 0.08111586489964868, "grad_norm": 1.171875, "learning_rate": 0.0019863534941681428, "loss": 3.5538, "step": 1166 }, { "epoch": 0.08118543253678388, "grad_norm": 1.046875, "learning_rate": 0.001986316369991365, "loss": 3.5096, "step": 1167 }, { "epoch": 0.0812550001739191, "grad_norm": 0.80078125, "learning_rate": 0.0019862791957344136, "loss": 2.8434, "step": 1168 }, { "epoch": 0.0813245678110543, "grad_norm": 1.2109375, "learning_rate": 0.0019862419713991756, "loss": 3.2823, "step": 1169 }, { "epoch": 0.0813941354481895, "grad_norm": 1.25, "learning_rate": 0.0019862046969875416, "loss": 3.4693, "step": 1170 }, { "epoch": 0.0814637030853247, "grad_norm": 0.875, "learning_rate": 0.0019861673725014035, "loss": 3.1002, "step": 1171 }, { "epoch": 0.08153327072245992, "grad_norm": 0.96875, "learning_rate": 0.0019861299979426574, "loss": 3.5625, "step": 1172 }, { "epoch": 0.08160283835959511, "grad_norm": 0.80078125, "learning_rate": 0.0019860925733132004, "loss": 3.1363, "step": 1173 }, { "epoch": 0.08167240599673033, "grad_norm": 0.6796875, "learning_rate": 0.0019860550986149322, "loss": 3.7165, "step": 1174 }, { "epoch": 0.08174197363386553, "grad_norm": 0.921875, "learning_rate": 0.0019860175738497564, "loss": 3.2552, "step": 1175 }, { "epoch": 0.08181154127100074, "grad_norm": 1.0234375, "learning_rate": 0.0019859799990195786, "loss": 3.5978, "step": 1176 }, { "epoch": 0.08188110890813594, "grad_norm": 0.97265625, "learning_rate": 0.0019859423741263055, "loss": 3.0328, "step": 1177 }, { "epoch": 0.08195067654527113, "grad_norm": 0.98828125, "learning_rate": 0.0019859046991718486, "loss": 3.2697, "step": 1178 }, { "epoch": 0.08202024418240635, "grad_norm": 1.046875, "learning_rate": 0.0019858669741581207, "loss": 3.0997, "step": 1179 }, { "epoch": 0.08208981181954154, "grad_norm": 0.98046875, "learning_rate": 0.0019858291990870365, "loss": 3.2616, "step": 1180 }, { "epoch": 0.08215937945667676, "grad_norm": 1.1484375, "learning_rate": 0.0019857913739605147, "loss": 3.0133, "step": 1181 }, { "epoch": 0.08222894709381195, "grad_norm": 1.0078125, "learning_rate": 0.001985753498780475, "loss": 3.3974, "step": 1182 }, { "epoch": 0.08229851473094717, "grad_norm": 0.921875, "learning_rate": 0.001985715573548842, "loss": 3.466, "step": 1183 }, { "epoch": 0.08236808236808237, "grad_norm": 1.2109375, "learning_rate": 0.0019856775982675405, "loss": 2.859, "step": 1184 }, { "epoch": 0.08243765000521758, "grad_norm": 0.8359375, "learning_rate": 0.0019856395729384983, "loss": 3.4976, "step": 1185 }, { "epoch": 0.08250721764235278, "grad_norm": 1.1015625, "learning_rate": 0.001985601497563647, "loss": 3.2808, "step": 1186 }, { "epoch": 0.08257678527948799, "grad_norm": 6.3125, "learning_rate": 0.001985563372144919, "loss": 3.4329, "step": 1187 }, { "epoch": 0.08264635291662319, "grad_norm": 1.0703125, "learning_rate": 0.001985525196684251, "loss": 3.2684, "step": 1188 }, { "epoch": 0.08271592055375838, "grad_norm": 1.28125, "learning_rate": 0.001985486971183581, "loss": 3.5413, "step": 1189 }, { "epoch": 0.0827854881908936, "grad_norm": 1.0, "learning_rate": 0.00198544869564485, "loss": 3.0702, "step": 1190 }, { "epoch": 0.0828550558280288, "grad_norm": 1.1328125, "learning_rate": 0.00198541037007, "loss": 2.9937, "step": 1191 }, { "epoch": 0.08292462346516401, "grad_norm": 0.9921875, "learning_rate": 0.00198537199446098, "loss": 3.5534, "step": 1192 }, { "epoch": 0.0829941911022992, "grad_norm": 0.984375, "learning_rate": 0.0019853335688197354, "loss": 3.4087, "step": 1193 }, { "epoch": 0.08306375873943442, "grad_norm": 1.2109375, "learning_rate": 0.0019852950931482194, "loss": 3.1602, "step": 1194 }, { "epoch": 0.08313332637656962, "grad_norm": 0.83203125, "learning_rate": 0.0019852565674483846, "loss": 3.3389, "step": 1195 }, { "epoch": 0.08320289401370483, "grad_norm": 0.9609375, "learning_rate": 0.001985217991722187, "loss": 2.9335, "step": 1196 }, { "epoch": 0.08327246165084003, "grad_norm": 0.93359375, "learning_rate": 0.001985179365971586, "loss": 3.0408, "step": 1197 }, { "epoch": 0.08334202928797524, "grad_norm": 0.82421875, "learning_rate": 0.0019851406901985427, "loss": 3.4378, "step": 1198 }, { "epoch": 0.08341159692511044, "grad_norm": 0.734375, "learning_rate": 0.0019851019644050202, "loss": 3.5636, "step": 1199 }, { "epoch": 0.08348116456224565, "grad_norm": 0.921875, "learning_rate": 0.0019850631885929854, "loss": 2.891, "step": 1200 }, { "epoch": 0.08355073219938085, "grad_norm": 0.953125, "learning_rate": 0.001985024362764407, "loss": 3.2612, "step": 1201 }, { "epoch": 0.08362029983651605, "grad_norm": 1.0546875, "learning_rate": 0.0019849854869212562, "loss": 3.5886, "step": 1202 }, { "epoch": 0.08368986747365126, "grad_norm": 1.1171875, "learning_rate": 0.0019849465610655074, "loss": 3.4887, "step": 1203 }, { "epoch": 0.08375943511078646, "grad_norm": 1.3125, "learning_rate": 0.0019849075851991363, "loss": 3.4052, "step": 1204 }, { "epoch": 0.08382900274792167, "grad_norm": 0.75, "learning_rate": 0.0019848685593241225, "loss": 3.3308, "step": 1205 }, { "epoch": 0.08389857038505687, "grad_norm": 0.92578125, "learning_rate": 0.0019848294834424476, "loss": 3.395, "step": 1206 }, { "epoch": 0.08396813802219208, "grad_norm": 0.97265625, "learning_rate": 0.001984790357556095, "loss": 3.2642, "step": 1207 }, { "epoch": 0.08403770565932728, "grad_norm": 0.90625, "learning_rate": 0.001984751181667052, "loss": 3.7784, "step": 1208 }, { "epoch": 0.08410727329646249, "grad_norm": 1.078125, "learning_rate": 0.0019847119557773072, "loss": 3.5094, "step": 1209 }, { "epoch": 0.08417684093359769, "grad_norm": 0.859375, "learning_rate": 0.001984672679888853, "loss": 3.0859, "step": 1210 }, { "epoch": 0.0842464085707329, "grad_norm": 1.1796875, "learning_rate": 0.0019846333540036835, "loss": 3.2304, "step": 1211 }, { "epoch": 0.0843159762078681, "grad_norm": 1.0234375, "learning_rate": 0.001984593978123795, "loss": 3.5656, "step": 1212 }, { "epoch": 0.08438554384500331, "grad_norm": 1.1953125, "learning_rate": 0.001984554552251186, "loss": 3.2698, "step": 1213 }, { "epoch": 0.08445511148213851, "grad_norm": 0.81640625, "learning_rate": 0.0019845150763878605, "loss": 3.5456, "step": 1214 }, { "epoch": 0.08452467911927371, "grad_norm": 0.9140625, "learning_rate": 0.0019844755505358217, "loss": 3.2847, "step": 1215 }, { "epoch": 0.08459424675640892, "grad_norm": 1.3125, "learning_rate": 0.001984435974697076, "loss": 3.2534, "step": 1216 }, { "epoch": 0.08466381439354412, "grad_norm": 1.28125, "learning_rate": 0.001984396348873634, "loss": 3.2247, "step": 1217 }, { "epoch": 0.08473338203067933, "grad_norm": 1.0859375, "learning_rate": 0.0019843566730675067, "loss": 3.5911, "step": 1218 }, { "epoch": 0.08480294966781453, "grad_norm": 1.2265625, "learning_rate": 0.0019843169472807095, "loss": 3.4861, "step": 1219 }, { "epoch": 0.08487251730494974, "grad_norm": 1.3046875, "learning_rate": 0.0019842771715152586, "loss": 3.0054, "step": 1220 }, { "epoch": 0.08494208494208494, "grad_norm": 1.1796875, "learning_rate": 0.0019842373457731742, "loss": 3.276, "step": 1221 }, { "epoch": 0.08501165257922015, "grad_norm": 1.203125, "learning_rate": 0.0019841974700564786, "loss": 3.45, "step": 1222 }, { "epoch": 0.08508122021635535, "grad_norm": 0.88671875, "learning_rate": 0.0019841575443671957, "loss": 3.2529, "step": 1223 }, { "epoch": 0.08515078785349056, "grad_norm": 0.9140625, "learning_rate": 0.0019841175687073534, "loss": 3.2271, "step": 1224 }, { "epoch": 0.08522035549062576, "grad_norm": 0.984375, "learning_rate": 0.0019840775430789814, "loss": 3.2621, "step": 1225 }, { "epoch": 0.08528992312776097, "grad_norm": 0.8125, "learning_rate": 0.001984037467484112, "loss": 3.589, "step": 1226 }, { "epoch": 0.08535949076489617, "grad_norm": 1.1171875, "learning_rate": 0.0019839973419247797, "loss": 3.0352, "step": 1227 }, { "epoch": 0.08542905840203137, "grad_norm": 1.203125, "learning_rate": 0.001983957166403022, "loss": 3.4417, "step": 1228 }, { "epoch": 0.08549862603916658, "grad_norm": 1.109375, "learning_rate": 0.001983916940920879, "loss": 3.7552, "step": 1229 }, { "epoch": 0.08556819367630178, "grad_norm": 0.92578125, "learning_rate": 0.001983876665480393, "loss": 3.4003, "step": 1230 }, { "epoch": 0.08563776131343699, "grad_norm": 0.85546875, "learning_rate": 0.0019838363400836094, "loss": 3.5857, "step": 1231 }, { "epoch": 0.08570732895057219, "grad_norm": 1.4921875, "learning_rate": 0.001983795964732575, "loss": 3.5616, "step": 1232 }, { "epoch": 0.0857768965877074, "grad_norm": 1.125, "learning_rate": 0.0019837555394293404, "loss": 3.2636, "step": 1233 }, { "epoch": 0.0858464642248426, "grad_norm": 0.97265625, "learning_rate": 0.0019837150641759576, "loss": 3.4016, "step": 1234 }, { "epoch": 0.08591603186197781, "grad_norm": 1.0078125, "learning_rate": 0.0019836745389744826, "loss": 2.9563, "step": 1235 }, { "epoch": 0.08598559949911301, "grad_norm": 1.1328125, "learning_rate": 0.001983633963826972, "loss": 3.4908, "step": 1236 }, { "epoch": 0.08605516713624822, "grad_norm": 1.1171875, "learning_rate": 0.0019835933387354872, "loss": 3.0906, "step": 1237 }, { "epoch": 0.08612473477338342, "grad_norm": 0.8125, "learning_rate": 0.0019835526637020902, "loss": 3.4301, "step": 1238 }, { "epoch": 0.08619430241051863, "grad_norm": 0.87890625, "learning_rate": 0.0019835119387288463, "loss": 3.3323, "step": 1239 }, { "epoch": 0.08626387004765383, "grad_norm": 1.6484375, "learning_rate": 0.0019834711638178236, "loss": 3.3489, "step": 1240 }, { "epoch": 0.08633343768478903, "grad_norm": 0.80078125, "learning_rate": 0.001983430338971092, "loss": 3.6983, "step": 1241 }, { "epoch": 0.08640300532192424, "grad_norm": 0.96875, "learning_rate": 0.001983389464190725, "loss": 3.4093, "step": 1242 }, { "epoch": 0.08647257295905944, "grad_norm": 0.76953125, "learning_rate": 0.0019833485394787974, "loss": 3.1098, "step": 1243 }, { "epoch": 0.08654214059619465, "grad_norm": 0.88671875, "learning_rate": 0.0019833075648373875, "loss": 3.2846, "step": 1244 }, { "epoch": 0.08661170823332985, "grad_norm": 0.73828125, "learning_rate": 0.0019832665402685756, "loss": 3.5138, "step": 1245 }, { "epoch": 0.08668127587046506, "grad_norm": 0.7578125, "learning_rate": 0.001983225465774445, "loss": 3.6069, "step": 1246 }, { "epoch": 0.08675084350760026, "grad_norm": 0.828125, "learning_rate": 0.001983184341357081, "loss": 3.4257, "step": 1247 }, { "epoch": 0.08682041114473547, "grad_norm": 1.0234375, "learning_rate": 0.0019831431670185714, "loss": 3.2438, "step": 1248 }, { "epoch": 0.08688997878187067, "grad_norm": 0.85546875, "learning_rate": 0.0019831019427610074, "loss": 3.291, "step": 1249 }, { "epoch": 0.08695954641900588, "grad_norm": 1.25, "learning_rate": 0.001983060668586482, "loss": 3.2301, "step": 1250 }, { "epoch": 0.08702911405614108, "grad_norm": 1.234375, "learning_rate": 0.001983019344497091, "loss": 3.4895, "step": 1251 }, { "epoch": 0.0870986816932763, "grad_norm": 1.265625, "learning_rate": 0.0019829779704949326, "loss": 3.1253, "step": 1252 }, { "epoch": 0.08716824933041149, "grad_norm": 1.265625, "learning_rate": 0.0019829365465821066, "loss": 3.1541, "step": 1253 }, { "epoch": 0.08723781696754669, "grad_norm": 1.0859375, "learning_rate": 0.001982895072760718, "loss": 3.1458, "step": 1254 }, { "epoch": 0.0873073846046819, "grad_norm": 0.8515625, "learning_rate": 0.0019828535490328714, "loss": 3.3, "step": 1255 }, { "epoch": 0.0873769522418171, "grad_norm": 0.87109375, "learning_rate": 0.0019828119754006757, "loss": 3.5455, "step": 1256 }, { "epoch": 0.08744651987895231, "grad_norm": 1.046875, "learning_rate": 0.0019827703518662415, "loss": 3.2653, "step": 1257 }, { "epoch": 0.08751608751608751, "grad_norm": 1.265625, "learning_rate": 0.0019827286784316824, "loss": 3.3314, "step": 1258 }, { "epoch": 0.08758565515322272, "grad_norm": 0.94140625, "learning_rate": 0.0019826869550991144, "loss": 3.4117, "step": 1259 }, { "epoch": 0.08765522279035792, "grad_norm": 1.03125, "learning_rate": 0.0019826451818706556, "loss": 3.286, "step": 1260 }, { "epoch": 0.08772479042749314, "grad_norm": 0.94921875, "learning_rate": 0.001982603358748428, "loss": 3.2271, "step": 1261 }, { "epoch": 0.08779435806462833, "grad_norm": 0.890625, "learning_rate": 0.001982561485734554, "loss": 3.4454, "step": 1262 }, { "epoch": 0.08786392570176355, "grad_norm": 1.109375, "learning_rate": 0.0019825195628311604, "loss": 3.2706, "step": 1263 }, { "epoch": 0.08793349333889874, "grad_norm": 1.1796875, "learning_rate": 0.0019824775900403754, "loss": 3.1977, "step": 1264 }, { "epoch": 0.08800306097603394, "grad_norm": 0.89453125, "learning_rate": 0.0019824355673643307, "loss": 3.0425, "step": 1265 }, { "epoch": 0.08807262861316915, "grad_norm": 0.6640625, "learning_rate": 0.0019823934948051598, "loss": 3.3867, "step": 1266 }, { "epoch": 0.08814219625030435, "grad_norm": 0.828125, "learning_rate": 0.001982351372364999, "loss": 3.6068, "step": 1267 }, { "epoch": 0.08821176388743956, "grad_norm": 1.015625, "learning_rate": 0.0019823092000459865, "loss": 3.5567, "step": 1268 }, { "epoch": 0.08828133152457476, "grad_norm": 1.1015625, "learning_rate": 0.001982266977850264, "loss": 3.138, "step": 1269 }, { "epoch": 0.08835089916170998, "grad_norm": 1.0078125, "learning_rate": 0.001982224705779976, "loss": 2.9568, "step": 1270 }, { "epoch": 0.08842046679884517, "grad_norm": 1.2265625, "learning_rate": 0.0019821823838372674, "loss": 2.9139, "step": 1271 }, { "epoch": 0.08849003443598039, "grad_norm": 1.2109375, "learning_rate": 0.0019821400120242885, "loss": 3.6534, "step": 1272 }, { "epoch": 0.08855960207311558, "grad_norm": 1.0390625, "learning_rate": 0.00198209759034319, "loss": 3.1228, "step": 1273 }, { "epoch": 0.0886291697102508, "grad_norm": 1.171875, "learning_rate": 0.0019820551187961256, "loss": 3.2362, "step": 1274 }, { "epoch": 0.088698737347386, "grad_norm": 0.94921875, "learning_rate": 0.001982012597385253, "loss": 3.4493, "step": 1275 }, { "epoch": 0.0887683049845212, "grad_norm": 1.0625, "learning_rate": 0.0019819700261127296, "loss": 3.139, "step": 1276 }, { "epoch": 0.0888378726216564, "grad_norm": 0.95703125, "learning_rate": 0.0019819274049807183, "loss": 3.2712, "step": 1277 }, { "epoch": 0.0889074402587916, "grad_norm": 0.953125, "learning_rate": 0.001981884733991382, "loss": 3.0725, "step": 1278 }, { "epoch": 0.08897700789592682, "grad_norm": 1.03125, "learning_rate": 0.0019818420131468887, "loss": 3.3549, "step": 1279 }, { "epoch": 0.08904657553306201, "grad_norm": 1.15625, "learning_rate": 0.0019817992424494067, "loss": 3.5994, "step": 1280 }, { "epoch": 0.08911614317019723, "grad_norm": 0.8515625, "learning_rate": 0.0019817564219011077, "loss": 3.1747, "step": 1281 }, { "epoch": 0.08918571080733242, "grad_norm": 3.953125, "learning_rate": 0.001981713551504166, "loss": 3.5693, "step": 1282 }, { "epoch": 0.08925527844446764, "grad_norm": 0.91796875, "learning_rate": 0.0019816706312607586, "loss": 3.6042, "step": 1283 }, { "epoch": 0.08932484608160284, "grad_norm": 1.25, "learning_rate": 0.0019816276611730643, "loss": 3.2726, "step": 1284 }, { "epoch": 0.08939441371873805, "grad_norm": 1.1171875, "learning_rate": 0.001981584641243265, "loss": 3.1582, "step": 1285 }, { "epoch": 0.08946398135587325, "grad_norm": 1.0078125, "learning_rate": 0.001981541571473545, "loss": 3.5227, "step": 1286 }, { "epoch": 0.08953354899300846, "grad_norm": 1.1953125, "learning_rate": 0.001981498451866092, "loss": 3.4966, "step": 1287 }, { "epoch": 0.08960311663014366, "grad_norm": 0.9375, "learning_rate": 0.001981455282423094, "loss": 2.9587, "step": 1288 }, { "epoch": 0.08967268426727887, "grad_norm": 0.88671875, "learning_rate": 0.0019814120631467444, "loss": 3.4397, "step": 1289 }, { "epoch": 0.08974225190441407, "grad_norm": 0.953125, "learning_rate": 0.0019813687940392366, "loss": 3.5678, "step": 1290 }, { "epoch": 0.08981181954154926, "grad_norm": 0.9921875, "learning_rate": 0.001981325475102768, "loss": 3.3464, "step": 1291 }, { "epoch": 0.08988138717868448, "grad_norm": 1.109375, "learning_rate": 0.0019812821063395374, "loss": 3.3282, "step": 1292 }, { "epoch": 0.08995095481581968, "grad_norm": 1.1953125, "learning_rate": 0.001981238687751748, "loss": 3.6958, "step": 1293 }, { "epoch": 0.09002052245295489, "grad_norm": 0.98828125, "learning_rate": 0.0019811952193416037, "loss": 3.4398, "step": 1294 }, { "epoch": 0.09009009009009009, "grad_norm": 1.21875, "learning_rate": 0.001981151701111312, "loss": 3.1065, "step": 1295 }, { "epoch": 0.0901596577272253, "grad_norm": 1.0625, "learning_rate": 0.0019811081330630823, "loss": 3.4621, "step": 1296 }, { "epoch": 0.0902292253643605, "grad_norm": 1.1484375, "learning_rate": 0.0019810645151991262, "loss": 3.3886, "step": 1297 }, { "epoch": 0.09029879300149571, "grad_norm": 0.92578125, "learning_rate": 0.0019810208475216596, "loss": 3.4419, "step": 1298 }, { "epoch": 0.0903683606386309, "grad_norm": 1.0546875, "learning_rate": 0.0019809771300328986, "loss": 3.3693, "step": 1299 }, { "epoch": 0.09043792827576612, "grad_norm": 1.2265625, "learning_rate": 0.0019809333627350636, "loss": 3.2262, "step": 1300 }, { "epoch": 0.09050749591290132, "grad_norm": 1.1015625, "learning_rate": 0.001980889545630377, "loss": 3.2679, "step": 1301 }, { "epoch": 0.09057706355003653, "grad_norm": 0.8125, "learning_rate": 0.001980845678721063, "loss": 3.2695, "step": 1302 }, { "epoch": 0.09064663118717173, "grad_norm": 1.0, "learning_rate": 0.001980801762009349, "loss": 3.4142, "step": 1303 }, { "epoch": 0.09071619882430693, "grad_norm": 1.0, "learning_rate": 0.0019807577954974657, "loss": 3.063, "step": 1304 }, { "epoch": 0.09078576646144214, "grad_norm": 0.640625, "learning_rate": 0.001980713779187645, "loss": 3.5843, "step": 1305 }, { "epoch": 0.09085533409857734, "grad_norm": 1.0703125, "learning_rate": 0.001980669713082121, "loss": 3.2546, "step": 1306 }, { "epoch": 0.09092490173571255, "grad_norm": 0.84375, "learning_rate": 0.0019806255971831326, "loss": 3.5231, "step": 1307 }, { "epoch": 0.09099446937284775, "grad_norm": 0.85546875, "learning_rate": 0.0019805814314929186, "loss": 3.2505, "step": 1308 }, { "epoch": 0.09106403700998296, "grad_norm": 0.91796875, "learning_rate": 0.0019805372160137226, "loss": 3.1583, "step": 1309 }, { "epoch": 0.09113360464711816, "grad_norm": 1.3125, "learning_rate": 0.0019804929507477886, "loss": 3.1289, "step": 1310 }, { "epoch": 0.09120317228425337, "grad_norm": 0.9921875, "learning_rate": 0.0019804486356973646, "loss": 3.1288, "step": 1311 }, { "epoch": 0.09127273992138857, "grad_norm": 0.74609375, "learning_rate": 0.001980404270864701, "loss": 3.6134, "step": 1312 }, { "epoch": 0.09134230755852378, "grad_norm": 1.453125, "learning_rate": 0.00198035985625205, "loss": 3.2821, "step": 1313 }, { "epoch": 0.09141187519565898, "grad_norm": 1.0625, "learning_rate": 0.0019803153918616667, "loss": 3.1245, "step": 1314 }, { "epoch": 0.09148144283279419, "grad_norm": 1.34375, "learning_rate": 0.001980270877695809, "loss": 2.9684, "step": 1315 }, { "epoch": 0.09155101046992939, "grad_norm": 0.81640625, "learning_rate": 0.001980226313756737, "loss": 3.5429, "step": 1316 }, { "epoch": 0.09162057810706459, "grad_norm": 0.9140625, "learning_rate": 0.001980181700046714, "loss": 3.2961, "step": 1317 }, { "epoch": 0.0916901457441998, "grad_norm": 1.09375, "learning_rate": 0.001980137036568004, "loss": 3.4082, "step": 1318 }, { "epoch": 0.091759713381335, "grad_norm": 0.9453125, "learning_rate": 0.001980092323322876, "loss": 3.447, "step": 1319 }, { "epoch": 0.09182928101847021, "grad_norm": 0.8828125, "learning_rate": 0.0019800475603135997, "loss": 3.417, "step": 1320 }, { "epoch": 0.09189884865560541, "grad_norm": 0.81640625, "learning_rate": 0.0019800027475424483, "loss": 3.7718, "step": 1321 }, { "epoch": 0.09196841629274062, "grad_norm": 1.1484375, "learning_rate": 0.0019799578850116972, "loss": 2.9843, "step": 1322 }, { "epoch": 0.09203798392987582, "grad_norm": 0.79296875, "learning_rate": 0.0019799129727236233, "loss": 3.3471, "step": 1323 }, { "epoch": 0.09210755156701103, "grad_norm": 0.94140625, "learning_rate": 0.001979868010680508, "loss": 3.2991, "step": 1324 }, { "epoch": 0.09217711920414623, "grad_norm": 0.984375, "learning_rate": 0.0019798229988846347, "loss": 3.3589, "step": 1325 }, { "epoch": 0.09224668684128144, "grad_norm": 1.1015625, "learning_rate": 0.0019797779373382876, "loss": 3.2222, "step": 1326 }, { "epoch": 0.09231625447841664, "grad_norm": 0.95703125, "learning_rate": 0.001979732826043755, "loss": 3.3215, "step": 1327 }, { "epoch": 0.09238582211555185, "grad_norm": 0.83203125, "learning_rate": 0.0019796876650033284, "loss": 3.4502, "step": 1328 }, { "epoch": 0.09245538975268705, "grad_norm": 0.953125, "learning_rate": 0.0019796424542192995, "loss": 3.2011, "step": 1329 }, { "epoch": 0.09252495738982225, "grad_norm": 1.171875, "learning_rate": 0.001979597193693965, "loss": 3.5101, "step": 1330 }, { "epoch": 0.09259452502695746, "grad_norm": 0.93359375, "learning_rate": 0.001979551883429623, "loss": 3.1729, "step": 1331 }, { "epoch": 0.09266409266409266, "grad_norm": 0.82421875, "learning_rate": 0.001979506523428573, "loss": 3.4822, "step": 1332 }, { "epoch": 0.09273366030122787, "grad_norm": 0.6875, "learning_rate": 0.001979461113693119, "loss": 3.6057, "step": 1333 }, { "epoch": 0.09280322793836307, "grad_norm": 1.171875, "learning_rate": 0.001979415654225566, "loss": 3.4683, "step": 1334 }, { "epoch": 0.09287279557549828, "grad_norm": 0.890625, "learning_rate": 0.001979370145028223, "loss": 3.489, "step": 1335 }, { "epoch": 0.09294236321263348, "grad_norm": 1.140625, "learning_rate": 0.001979324586103401, "loss": 3.2464, "step": 1336 }, { "epoch": 0.09301193084976869, "grad_norm": 1.0625, "learning_rate": 0.0019792789774534117, "loss": 3.1344, "step": 1337 }, { "epoch": 0.09308149848690389, "grad_norm": 1.1796875, "learning_rate": 0.0019792333190805727, "loss": 3.333, "step": 1338 }, { "epoch": 0.0931510661240391, "grad_norm": 1.03125, "learning_rate": 0.001979187610987201, "loss": 3.5017, "step": 1339 }, { "epoch": 0.0932206337611743, "grad_norm": 0.87109375, "learning_rate": 0.0019791418531756176, "loss": 3.2691, "step": 1340 }, { "epoch": 0.0932902013983095, "grad_norm": 0.81640625, "learning_rate": 0.001979096045648147, "loss": 3.5223, "step": 1341 }, { "epoch": 0.09335976903544471, "grad_norm": 0.83203125, "learning_rate": 0.0019790501884071137, "loss": 3.3475, "step": 1342 }, { "epoch": 0.09342933667257991, "grad_norm": 0.75390625, "learning_rate": 0.0019790042814548463, "loss": 3.3571, "step": 1343 }, { "epoch": 0.09349890430971512, "grad_norm": 1.078125, "learning_rate": 0.0019789583247936766, "loss": 3.123, "step": 1344 }, { "epoch": 0.09356847194685032, "grad_norm": 1.078125, "learning_rate": 0.0019789123184259373, "loss": 3.2013, "step": 1345 }, { "epoch": 0.09363803958398553, "grad_norm": 0.8359375, "learning_rate": 0.001978866262353964, "loss": 3.4691, "step": 1346 }, { "epoch": 0.09370760722112073, "grad_norm": 0.9375, "learning_rate": 0.0019788201565800966, "loss": 3.2226, "step": 1347 }, { "epoch": 0.09377717485825594, "grad_norm": 0.93359375, "learning_rate": 0.001978774001106675, "loss": 3.3479, "step": 1348 }, { "epoch": 0.09384674249539114, "grad_norm": 1.2265625, "learning_rate": 0.001978727795936043, "loss": 3.2993, "step": 1349 }, { "epoch": 0.09391631013252635, "grad_norm": 0.93359375, "learning_rate": 0.0019786815410705464, "loss": 3.4622, "step": 1350 }, { "epoch": 0.09398587776966155, "grad_norm": 0.97265625, "learning_rate": 0.0019786352365125347, "loss": 3.5396, "step": 1351 }, { "epoch": 0.09405544540679676, "grad_norm": 1.109375, "learning_rate": 0.001978588882264358, "loss": 3.6279, "step": 1352 }, { "epoch": 0.09412501304393196, "grad_norm": 1.1796875, "learning_rate": 0.00197854247832837, "loss": 3.3464, "step": 1353 }, { "epoch": 0.09419458068106716, "grad_norm": 1.1875, "learning_rate": 0.0019784960247069276, "loss": 3.1044, "step": 1354 }, { "epoch": 0.09426414831820237, "grad_norm": 0.8515625, "learning_rate": 0.001978449521402389, "loss": 3.1176, "step": 1355 }, { "epoch": 0.09433371595533757, "grad_norm": 1.0234375, "learning_rate": 0.0019784029684171154, "loss": 3.3721, "step": 1356 }, { "epoch": 0.09440328359247278, "grad_norm": 1.0390625, "learning_rate": 0.0019783563657534706, "loss": 3.4134, "step": 1357 }, { "epoch": 0.09447285122960798, "grad_norm": 0.95703125, "learning_rate": 0.001978309713413821, "loss": 3.091, "step": 1358 }, { "epoch": 0.0945424188667432, "grad_norm": 0.9921875, "learning_rate": 0.0019782630114005347, "loss": 3.1234, "step": 1359 }, { "epoch": 0.09461198650387839, "grad_norm": 1.0, "learning_rate": 0.0019782162597159836, "loss": 3.0553, "step": 1360 }, { "epoch": 0.0946815541410136, "grad_norm": 1.078125, "learning_rate": 0.0019781694583625416, "loss": 3.4159, "step": 1361 }, { "epoch": 0.0947511217781488, "grad_norm": 0.7421875, "learning_rate": 0.0019781226073425848, "loss": 3.199, "step": 1362 }, { "epoch": 0.09482068941528402, "grad_norm": 1.0703125, "learning_rate": 0.0019780757066584923, "loss": 3.4261, "step": 1363 }, { "epoch": 0.09489025705241921, "grad_norm": 0.90625, "learning_rate": 0.001978028756312645, "loss": 3.0732, "step": 1364 }, { "epoch": 0.09495982468955443, "grad_norm": 0.88671875, "learning_rate": 0.001977981756307427, "loss": 3.3938, "step": 1365 }, { "epoch": 0.09502939232668962, "grad_norm": 1.0546875, "learning_rate": 0.001977934706645225, "loss": 3.3882, "step": 1366 }, { "epoch": 0.09509895996382482, "grad_norm": 1.015625, "learning_rate": 0.001977887607328428, "loss": 2.9706, "step": 1367 }, { "epoch": 0.09516852760096003, "grad_norm": 0.98046875, "learning_rate": 0.001977840458359427, "loss": 2.8586, "step": 1368 }, { "epoch": 0.09523809523809523, "grad_norm": 1.0078125, "learning_rate": 0.001977793259740616, "loss": 3.1019, "step": 1369 }, { "epoch": 0.09530766287523044, "grad_norm": 1.2265625, "learning_rate": 0.001977746011474392, "loss": 3.264, "step": 1370 }, { "epoch": 0.09537723051236564, "grad_norm": 1.234375, "learning_rate": 0.001977698713563154, "loss": 3.151, "step": 1371 }, { "epoch": 0.09544679814950086, "grad_norm": 1.265625, "learning_rate": 0.0019776513660093027, "loss": 3.145, "step": 1372 }, { "epoch": 0.09551636578663605, "grad_norm": 0.875, "learning_rate": 0.001977603968815243, "loss": 3.6223, "step": 1373 }, { "epoch": 0.09558593342377127, "grad_norm": 0.8125, "learning_rate": 0.001977556521983381, "loss": 3.2582, "step": 1374 }, { "epoch": 0.09565550106090646, "grad_norm": 1.546875, "learning_rate": 0.0019775090255161262, "loss": 3.3363, "step": 1375 }, { "epoch": 0.09572506869804168, "grad_norm": 0.86328125, "learning_rate": 0.0019774614794158905, "loss": 3.116, "step": 1376 }, { "epoch": 0.09579463633517687, "grad_norm": 0.8671875, "learning_rate": 0.0019774138836850873, "loss": 3.6931, "step": 1377 }, { "epoch": 0.09586420397231209, "grad_norm": 1.015625, "learning_rate": 0.0019773662383261335, "loss": 3.1336, "step": 1378 }, { "epoch": 0.09593377160944729, "grad_norm": 1.1484375, "learning_rate": 0.0019773185433414487, "loss": 3.2009, "step": 1379 }, { "epoch": 0.09600333924658248, "grad_norm": 0.8984375, "learning_rate": 0.001977270798733454, "loss": 3.4753, "step": 1380 }, { "epoch": 0.0960729068837177, "grad_norm": 1.0234375, "learning_rate": 0.0019772230045045744, "loss": 3.1052, "step": 1381 }, { "epoch": 0.0961424745208529, "grad_norm": 0.82421875, "learning_rate": 0.001977175160657236, "loss": 3.0842, "step": 1382 }, { "epoch": 0.0962120421579881, "grad_norm": 0.890625, "learning_rate": 0.0019771272671938677, "loss": 3.3699, "step": 1383 }, { "epoch": 0.0962816097951233, "grad_norm": 0.87109375, "learning_rate": 0.0019770793241169027, "loss": 3.4106, "step": 1384 }, { "epoch": 0.09635117743225852, "grad_norm": 0.828125, "learning_rate": 0.001977031331428774, "loss": 3.2359, "step": 1385 }, { "epoch": 0.09642074506939372, "grad_norm": 1.0078125, "learning_rate": 0.0019769832891319192, "loss": 3.3751, "step": 1386 }, { "epoch": 0.09649031270652893, "grad_norm": 0.87109375, "learning_rate": 0.001976935197228777, "loss": 3.5038, "step": 1387 }, { "epoch": 0.09655988034366413, "grad_norm": 1.140625, "learning_rate": 0.00197688705572179, "loss": 3.3873, "step": 1388 }, { "epoch": 0.09662944798079934, "grad_norm": 0.70703125, "learning_rate": 0.0019768388646134016, "loss": 3.3603, "step": 1389 }, { "epoch": 0.09669901561793454, "grad_norm": 1.2890625, "learning_rate": 0.0019767906239060596, "loss": 3.106, "step": 1390 }, { "epoch": 0.09676858325506975, "grad_norm": 0.890625, "learning_rate": 0.001976742333602213, "loss": 3.4672, "step": 1391 }, { "epoch": 0.09683815089220495, "grad_norm": 1.328125, "learning_rate": 0.0019766939937043144, "loss": 3.1086, "step": 1392 }, { "epoch": 0.09690771852934014, "grad_norm": 1.046875, "learning_rate": 0.0019766456042148175, "loss": 3.3279, "step": 1393 }, { "epoch": 0.09697728616647536, "grad_norm": 0.6875, "learning_rate": 0.001976597165136179, "loss": 3.3536, "step": 1394 }, { "epoch": 0.09704685380361056, "grad_norm": 1.0625, "learning_rate": 0.001976548676470859, "loss": 3.0432, "step": 1395 }, { "epoch": 0.09711642144074577, "grad_norm": 0.70703125, "learning_rate": 0.0019765001382213198, "loss": 3.5107, "step": 1396 }, { "epoch": 0.09718598907788097, "grad_norm": 0.87109375, "learning_rate": 0.001976451550390025, "loss": 2.7966, "step": 1397 }, { "epoch": 0.09725555671501618, "grad_norm": 1.078125, "learning_rate": 0.0019764029129794424, "loss": 3.3384, "step": 1398 }, { "epoch": 0.09732512435215138, "grad_norm": 0.87109375, "learning_rate": 0.001976354225992041, "loss": 3.1992, "step": 1399 }, { "epoch": 0.09739469198928659, "grad_norm": 0.98828125, "learning_rate": 0.001976305489430294, "loss": 3.2581, "step": 1400 }, { "epoch": 0.09746425962642179, "grad_norm": 0.90234375, "learning_rate": 0.0019762567032966744, "loss": 3.0971, "step": 1401 }, { "epoch": 0.097533827263557, "grad_norm": 1.0703125, "learning_rate": 0.0019762078675936608, "loss": 2.7413, "step": 1402 }, { "epoch": 0.0976033949006922, "grad_norm": 0.80859375, "learning_rate": 0.0019761589823237315, "loss": 3.189, "step": 1403 }, { "epoch": 0.0976729625378274, "grad_norm": 0.76953125, "learning_rate": 0.0019761100474893693, "loss": 3.2528, "step": 1404 }, { "epoch": 0.09774253017496261, "grad_norm": 1.0, "learning_rate": 0.0019760610630930593, "loss": 2.9884, "step": 1405 }, { "epoch": 0.0978120978120978, "grad_norm": 0.859375, "learning_rate": 0.0019760120291372877, "loss": 3.2232, "step": 1406 }, { "epoch": 0.09788166544923302, "grad_norm": 0.875, "learning_rate": 0.001975962945624545, "loss": 3.3636, "step": 1407 }, { "epoch": 0.09795123308636822, "grad_norm": 1.1171875, "learning_rate": 0.0019759138125573232, "loss": 3.3866, "step": 1408 }, { "epoch": 0.09802080072350343, "grad_norm": 1.1640625, "learning_rate": 0.0019758646299381168, "loss": 3.3013, "step": 1409 }, { "epoch": 0.09809036836063863, "grad_norm": 0.984375, "learning_rate": 0.0019758153977694234, "loss": 3.3126, "step": 1410 }, { "epoch": 0.09815993599777384, "grad_norm": 1.1171875, "learning_rate": 0.001975766116053743, "loss": 3.2412, "step": 1411 }, { "epoch": 0.09822950363490904, "grad_norm": 0.61328125, "learning_rate": 0.0019757167847935767, "loss": 3.6484, "step": 1412 }, { "epoch": 0.09829907127204425, "grad_norm": 0.9921875, "learning_rate": 0.00197566740399143, "loss": 3.5801, "step": 1413 }, { "epoch": 0.09836863890917945, "grad_norm": 1.015625, "learning_rate": 0.0019756179736498108, "loss": 3.2148, "step": 1414 }, { "epoch": 0.09843820654631466, "grad_norm": 0.8203125, "learning_rate": 0.001975568493771228, "loss": 3.4016, "step": 1415 }, { "epoch": 0.09850777418344986, "grad_norm": 0.91015625, "learning_rate": 0.0019755189643581943, "loss": 3.5892, "step": 1416 }, { "epoch": 0.09857734182058506, "grad_norm": 0.8203125, "learning_rate": 0.001975469385413225, "loss": 3.1497, "step": 1417 }, { "epoch": 0.09864690945772027, "grad_norm": 0.921875, "learning_rate": 0.0019754197569388367, "loss": 3.2365, "step": 1418 }, { "epoch": 0.09871647709485547, "grad_norm": 1.09375, "learning_rate": 0.00197537007893755, "loss": 3.446, "step": 1419 }, { "epoch": 0.09878604473199068, "grad_norm": 0.7421875, "learning_rate": 0.001975320351411886, "loss": 3.2635, "step": 1420 }, { "epoch": 0.09885561236912588, "grad_norm": 0.96484375, "learning_rate": 0.0019752705743643715, "loss": 3.2567, "step": 1421 }, { "epoch": 0.09892518000626109, "grad_norm": 0.734375, "learning_rate": 0.0019752207477975324, "loss": 3.5977, "step": 1422 }, { "epoch": 0.09899474764339629, "grad_norm": 0.875, "learning_rate": 0.0019751708717138995, "loss": 3.2144, "step": 1423 }, { "epoch": 0.0990643152805315, "grad_norm": 0.84765625, "learning_rate": 0.0019751209461160045, "loss": 3.4039, "step": 1424 }, { "epoch": 0.0991338829176667, "grad_norm": 0.84765625, "learning_rate": 0.0019750709710063836, "loss": 3.4512, "step": 1425 }, { "epoch": 0.09920345055480191, "grad_norm": 0.92578125, "learning_rate": 0.001975020946387573, "loss": 3.3055, "step": 1426 }, { "epoch": 0.09927301819193711, "grad_norm": 1.0625, "learning_rate": 0.001974970872262113, "loss": 3.291, "step": 1427 }, { "epoch": 0.09934258582907232, "grad_norm": 1.2421875, "learning_rate": 0.001974920748632547, "loss": 3.3701, "step": 1428 }, { "epoch": 0.09941215346620752, "grad_norm": 1.109375, "learning_rate": 0.0019748705755014188, "loss": 3.3214, "step": 1429 }, { "epoch": 0.09948172110334272, "grad_norm": 0.80859375, "learning_rate": 0.001974820352871277, "loss": 3.5112, "step": 1430 }, { "epoch": 0.09955128874047793, "grad_norm": 0.78125, "learning_rate": 0.0019747700807446703, "loss": 3.3854, "step": 1431 }, { "epoch": 0.09962085637761313, "grad_norm": 0.9375, "learning_rate": 0.0019747197591241526, "loss": 3.2159, "step": 1432 }, { "epoch": 0.09969042401474834, "grad_norm": 0.85546875, "learning_rate": 0.0019746693880122786, "loss": 3.06, "step": 1433 }, { "epoch": 0.09975999165188354, "grad_norm": 0.8671875, "learning_rate": 0.001974618967411606, "loss": 3.1673, "step": 1434 }, { "epoch": 0.09982955928901875, "grad_norm": 0.83984375, "learning_rate": 0.0019745684973246943, "loss": 3.449, "step": 1435 }, { "epoch": 0.09989912692615395, "grad_norm": 0.8359375, "learning_rate": 0.0019745179777541063, "loss": 3.5406, "step": 1436 }, { "epoch": 0.09996869456328916, "grad_norm": 1.046875, "learning_rate": 0.0019744674087024076, "loss": 3.5287, "step": 1437 }, { "epoch": 0.10003826220042436, "grad_norm": 1.09375, "learning_rate": 0.0019744167901721657, "loss": 3.2567, "step": 1438 }, { "epoch": 0.10010782983755957, "grad_norm": 0.89453125, "learning_rate": 0.0019743661221659505, "loss": 3.0954, "step": 1439 }, { "epoch": 0.10017739747469477, "grad_norm": 1.0078125, "learning_rate": 0.0019743154046863347, "loss": 3.3116, "step": 1440 }, { "epoch": 0.10024696511182998, "grad_norm": 0.77734375, "learning_rate": 0.001974264637735894, "loss": 3.5275, "step": 1441 }, { "epoch": 0.10031653274896518, "grad_norm": 0.9296875, "learning_rate": 0.0019742138213172046, "loss": 3.3459, "step": 1442 }, { "epoch": 0.10038610038610038, "grad_norm": 1.171875, "learning_rate": 0.0019741629554328485, "loss": 3.5695, "step": 1443 }, { "epoch": 0.10045566802323559, "grad_norm": 0.70703125, "learning_rate": 0.0019741120400854077, "loss": 3.4048, "step": 1444 }, { "epoch": 0.10052523566037079, "grad_norm": 1.0703125, "learning_rate": 0.0019740610752774675, "loss": 2.9705, "step": 1445 }, { "epoch": 0.100594803297506, "grad_norm": 0.765625, "learning_rate": 0.001974010061011615, "loss": 3.5704, "step": 1446 }, { "epoch": 0.1006643709346412, "grad_norm": 0.83203125, "learning_rate": 0.0019739589972904417, "loss": 3.2439, "step": 1447 }, { "epoch": 0.10073393857177641, "grad_norm": 0.76953125, "learning_rate": 0.001973907884116539, "loss": 3.3124, "step": 1448 }, { "epoch": 0.10080350620891161, "grad_norm": 0.92578125, "learning_rate": 0.001973856721492503, "loss": 3.5505, "step": 1449 }, { "epoch": 0.10087307384604682, "grad_norm": 1.171875, "learning_rate": 0.001973805509420931, "loss": 3.4526, "step": 1450 }, { "epoch": 0.10094264148318202, "grad_norm": 1.0390625, "learning_rate": 0.0019737542479044243, "loss": 3.2931, "step": 1451 }, { "epoch": 0.10101220912031723, "grad_norm": 0.921875, "learning_rate": 0.0019737029369455844, "loss": 3.1317, "step": 1452 }, { "epoch": 0.10108177675745243, "grad_norm": 1.046875, "learning_rate": 0.0019736515765470175, "loss": 3.0454, "step": 1453 }, { "epoch": 0.10115134439458764, "grad_norm": 0.921875, "learning_rate": 0.0019736001667113308, "loss": 3.2824, "step": 1454 }, { "epoch": 0.10122091203172284, "grad_norm": 0.7265625, "learning_rate": 0.001973548707441135, "loss": 3.1641, "step": 1455 }, { "epoch": 0.10129047966885804, "grad_norm": 1.0546875, "learning_rate": 0.0019734971987390433, "loss": 3.6603, "step": 1456 }, { "epoch": 0.10136004730599325, "grad_norm": 0.83984375, "learning_rate": 0.00197344564060767, "loss": 3.3129, "step": 1457 }, { "epoch": 0.10142961494312845, "grad_norm": 0.87109375, "learning_rate": 0.001973394033049634, "loss": 3.5636, "step": 1458 }, { "epoch": 0.10149918258026366, "grad_norm": 0.80859375, "learning_rate": 0.001973342376067555, "loss": 3.5552, "step": 1459 }, { "epoch": 0.10156875021739886, "grad_norm": 0.8359375, "learning_rate": 0.001973290669664057, "loss": 3.288, "step": 1460 }, { "epoch": 0.10163831785453407, "grad_norm": 0.8515625, "learning_rate": 0.0019732389138417635, "loss": 3.0768, "step": 1461 }, { "epoch": 0.10170788549166927, "grad_norm": 1.09375, "learning_rate": 0.001973187108603304, "loss": 3.5058, "step": 1462 }, { "epoch": 0.10177745312880448, "grad_norm": 1.1796875, "learning_rate": 0.001973135253951308, "loss": 3.5485, "step": 1463 }, { "epoch": 0.10184702076593968, "grad_norm": 0.87890625, "learning_rate": 0.001973083349888409, "loss": 3.6063, "step": 1464 }, { "epoch": 0.1019165884030749, "grad_norm": 1.2890625, "learning_rate": 0.001973031396417242, "loss": 3.6008, "step": 1465 }, { "epoch": 0.1019861560402101, "grad_norm": 1.0625, "learning_rate": 0.001972979393540445, "loss": 3.136, "step": 1466 }, { "epoch": 0.1020557236773453, "grad_norm": 1.0234375, "learning_rate": 0.0019729273412606592, "loss": 3.3279, "step": 1467 }, { "epoch": 0.1021252913144805, "grad_norm": 1.0078125, "learning_rate": 0.0019728752395805267, "loss": 3.2092, "step": 1468 }, { "epoch": 0.1021948589516157, "grad_norm": 0.95703125, "learning_rate": 0.001972823088502693, "loss": 3.56, "step": 1469 }, { "epoch": 0.10226442658875091, "grad_norm": 0.859375, "learning_rate": 0.0019727708880298064, "loss": 3.3485, "step": 1470 }, { "epoch": 0.10233399422588611, "grad_norm": 0.89453125, "learning_rate": 0.001972718638164517, "loss": 3.2539, "step": 1471 }, { "epoch": 0.10240356186302133, "grad_norm": 1.6328125, "learning_rate": 0.0019726663389094783, "loss": 3.4803, "step": 1472 }, { "epoch": 0.10247312950015652, "grad_norm": 0.75, "learning_rate": 0.0019726139902673454, "loss": 3.5746, "step": 1473 }, { "epoch": 0.10254269713729174, "grad_norm": 0.84765625, "learning_rate": 0.0019725615922407762, "loss": 3.2785, "step": 1474 }, { "epoch": 0.10261226477442693, "grad_norm": 0.87890625, "learning_rate": 0.0019725091448324315, "loss": 3.046, "step": 1475 }, { "epoch": 0.10268183241156215, "grad_norm": 0.84375, "learning_rate": 0.0019724566480449745, "loss": 3.5367, "step": 1476 }, { "epoch": 0.10275140004869734, "grad_norm": 0.9765625, "learning_rate": 0.0019724041018810705, "loss": 3.6516, "step": 1477 }, { "epoch": 0.10282096768583256, "grad_norm": 0.765625, "learning_rate": 0.001972351506343387, "loss": 3.448, "step": 1478 }, { "epoch": 0.10289053532296775, "grad_norm": 0.95703125, "learning_rate": 0.0019722988614345955, "loss": 2.9195, "step": 1479 }, { "epoch": 0.10296010296010295, "grad_norm": 0.734375, "learning_rate": 0.0019722461671573682, "loss": 3.3375, "step": 1480 }, { "epoch": 0.10302967059723817, "grad_norm": 0.80078125, "learning_rate": 0.0019721934235143817, "loss": 3.1518, "step": 1481 }, { "epoch": 0.10309923823437336, "grad_norm": 0.91796875, "learning_rate": 0.001972140630508313, "loss": 3.3298, "step": 1482 }, { "epoch": 0.10316880587150858, "grad_norm": 0.8984375, "learning_rate": 0.0019720877881418426, "loss": 3.4051, "step": 1483 }, { "epoch": 0.10323837350864377, "grad_norm": 1.125, "learning_rate": 0.001972034896417654, "loss": 3.2962, "step": 1484 }, { "epoch": 0.10330794114577899, "grad_norm": 0.75, "learning_rate": 0.001971981955338433, "loss": 3.2481, "step": 1485 }, { "epoch": 0.10337750878291418, "grad_norm": 0.87890625, "learning_rate": 0.001971928964906868, "loss": 3.1045, "step": 1486 }, { "epoch": 0.1034470764200494, "grad_norm": 0.8046875, "learning_rate": 0.0019718759251256485, "loss": 3.4065, "step": 1487 }, { "epoch": 0.1035166440571846, "grad_norm": 0.95703125, "learning_rate": 0.001971822835997468, "loss": 3.0315, "step": 1488 }, { "epoch": 0.10358621169431981, "grad_norm": 0.9453125, "learning_rate": 0.001971769697525023, "loss": 3.1924, "step": 1489 }, { "epoch": 0.103655779331455, "grad_norm": 0.921875, "learning_rate": 0.00197171650971101, "loss": 3.1543, "step": 1490 }, { "epoch": 0.10372534696859022, "grad_norm": 0.86328125, "learning_rate": 0.001971663272558131, "loss": 3.4982, "step": 1491 }, { "epoch": 0.10379491460572542, "grad_norm": 0.8671875, "learning_rate": 0.001971609986069088, "loss": 3.4726, "step": 1492 }, { "epoch": 0.10386448224286061, "grad_norm": 1.0078125, "learning_rate": 0.0019715566502465877, "loss": 3.1342, "step": 1493 }, { "epoch": 0.10393404987999583, "grad_norm": 0.8671875, "learning_rate": 0.0019715032650933374, "loss": 3.8033, "step": 1494 }, { "epoch": 0.10400361751713103, "grad_norm": 0.91015625, "learning_rate": 0.0019714498306120484, "loss": 3.3207, "step": 1495 }, { "epoch": 0.10407318515426624, "grad_norm": 0.65234375, "learning_rate": 0.001971396346805433, "loss": 3.5685, "step": 1496 }, { "epoch": 0.10414275279140144, "grad_norm": 0.609375, "learning_rate": 0.0019713428136762076, "loss": 3.7468, "step": 1497 }, { "epoch": 0.10421232042853665, "grad_norm": 0.9609375, "learning_rate": 0.00197128923122709, "loss": 3.2513, "step": 1498 }, { "epoch": 0.10428188806567185, "grad_norm": 0.921875, "learning_rate": 0.0019712355994608013, "loss": 3.2436, "step": 1499 }, { "epoch": 0.10435145570280706, "grad_norm": 0.7734375, "learning_rate": 0.0019711819183800636, "loss": 3.3017, "step": 1500 }, { "epoch": 0.10442102333994226, "grad_norm": 0.8671875, "learning_rate": 0.0019711281879876037, "loss": 2.9926, "step": 1501 }, { "epoch": 0.10449059097707747, "grad_norm": 1.1484375, "learning_rate": 0.0019710744082861486, "loss": 3.0992, "step": 1502 }, { "epoch": 0.10456015861421267, "grad_norm": 0.921875, "learning_rate": 0.0019710205792784303, "loss": 3.4089, "step": 1503 }, { "epoch": 0.10462972625134788, "grad_norm": 1.0078125, "learning_rate": 0.001970966700967181, "loss": 2.9906, "step": 1504 }, { "epoch": 0.10469929388848308, "grad_norm": 0.99609375, "learning_rate": 0.0019709127733551365, "loss": 3.3956, "step": 1505 }, { "epoch": 0.10476886152561828, "grad_norm": 0.9453125, "learning_rate": 0.0019708587964450356, "loss": 3.6315, "step": 1506 }, { "epoch": 0.10483842916275349, "grad_norm": 0.91796875, "learning_rate": 0.0019708047702396182, "loss": 3.4508, "step": 1507 }, { "epoch": 0.10490799679988869, "grad_norm": 0.82421875, "learning_rate": 0.001970750694741628, "loss": 3.3902, "step": 1508 }, { "epoch": 0.1049775644370239, "grad_norm": 0.86328125, "learning_rate": 0.00197069656995381, "loss": 3.3237, "step": 1509 }, { "epoch": 0.1050471320741591, "grad_norm": 0.8671875, "learning_rate": 0.001970642395878913, "loss": 3.215, "step": 1510 }, { "epoch": 0.10511669971129431, "grad_norm": 1.0078125, "learning_rate": 0.001970588172519688, "loss": 2.8556, "step": 1511 }, { "epoch": 0.10518626734842951, "grad_norm": 1.03125, "learning_rate": 0.001970533899878887, "loss": 3.1504, "step": 1512 }, { "epoch": 0.10525583498556472, "grad_norm": 0.828125, "learning_rate": 0.0019704795779592666, "loss": 3.3045, "step": 1513 }, { "epoch": 0.10532540262269992, "grad_norm": 0.9765625, "learning_rate": 0.0019704252067635855, "loss": 3.2279, "step": 1514 }, { "epoch": 0.10539497025983513, "grad_norm": 1.0703125, "learning_rate": 0.001970370786294603, "loss": 2.9838, "step": 1515 }, { "epoch": 0.10546453789697033, "grad_norm": 0.8046875, "learning_rate": 0.0019703163165550835, "loss": 3.3872, "step": 1516 }, { "epoch": 0.10553410553410554, "grad_norm": 0.6796875, "learning_rate": 0.0019702617975477918, "loss": 3.2918, "step": 1517 }, { "epoch": 0.10560367317124074, "grad_norm": 1.2578125, "learning_rate": 0.001970207229275497, "loss": 2.9289, "step": 1518 }, { "epoch": 0.10567324080837594, "grad_norm": 0.984375, "learning_rate": 0.001970152611740969, "loss": 3.4421, "step": 1519 }, { "epoch": 0.10574280844551115, "grad_norm": 0.9609375, "learning_rate": 0.0019700979449469806, "loss": 3.3153, "step": 1520 }, { "epoch": 0.10581237608264635, "grad_norm": 0.58203125, "learning_rate": 0.001970043228896309, "loss": 3.7921, "step": 1521 }, { "epoch": 0.10588194371978156, "grad_norm": 0.96484375, "learning_rate": 0.0019699884635917316, "loss": 3.2177, "step": 1522 }, { "epoch": 0.10595151135691676, "grad_norm": 1.0546875, "learning_rate": 0.001969933649036029, "loss": 3.3786, "step": 1523 }, { "epoch": 0.10602107899405197, "grad_norm": 0.80078125, "learning_rate": 0.0019698787852319845, "loss": 3.679, "step": 1524 }, { "epoch": 0.10609064663118717, "grad_norm": 0.89453125, "learning_rate": 0.001969823872182384, "loss": 2.9693, "step": 1525 }, { "epoch": 0.10616021426832238, "grad_norm": 0.74609375, "learning_rate": 0.0019697689098900155, "loss": 3.2078, "step": 1526 }, { "epoch": 0.10622978190545758, "grad_norm": 0.94140625, "learning_rate": 0.0019697138983576696, "loss": 3.2731, "step": 1527 }, { "epoch": 0.10629934954259279, "grad_norm": 0.8515625, "learning_rate": 0.0019696588375881395, "loss": 3.4905, "step": 1528 }, { "epoch": 0.10636891717972799, "grad_norm": 0.78515625, "learning_rate": 0.0019696037275842215, "loss": 3.7366, "step": 1529 }, { "epoch": 0.1064384848168632, "grad_norm": 0.9296875, "learning_rate": 0.001969548568348713, "loss": 3.1914, "step": 1530 }, { "epoch": 0.1065080524539984, "grad_norm": 0.79296875, "learning_rate": 0.0019694933598844153, "loss": 3.5288, "step": 1531 }, { "epoch": 0.1065776200911336, "grad_norm": 0.7265625, "learning_rate": 0.0019694381021941316, "loss": 3.1728, "step": 1532 }, { "epoch": 0.10664718772826881, "grad_norm": 0.7265625, "learning_rate": 0.0019693827952806673, "loss": 3.5763, "step": 1533 }, { "epoch": 0.10671675536540401, "grad_norm": 1.1328125, "learning_rate": 0.0019693274391468303, "loss": 3.5042, "step": 1534 }, { "epoch": 0.10678632300253922, "grad_norm": 0.8984375, "learning_rate": 0.001969272033795432, "loss": 2.8814, "step": 1535 }, { "epoch": 0.10685589063967442, "grad_norm": 0.70703125, "learning_rate": 0.0019692165792292854, "loss": 3.2389, "step": 1536 }, { "epoch": 0.10692545827680963, "grad_norm": 0.7109375, "learning_rate": 0.001969161075451206, "loss": 3.2213, "step": 1537 }, { "epoch": 0.10699502591394483, "grad_norm": 0.80859375, "learning_rate": 0.001969105522464012, "loss": 3.6244, "step": 1538 }, { "epoch": 0.10706459355108004, "grad_norm": 0.8125, "learning_rate": 0.0019690499202705243, "loss": 3.2784, "step": 1539 }, { "epoch": 0.10713416118821524, "grad_norm": 1.0546875, "learning_rate": 0.001968994268873566, "loss": 3.08, "step": 1540 }, { "epoch": 0.10720372882535045, "grad_norm": 0.8203125, "learning_rate": 0.001968938568275963, "loss": 2.9797, "step": 1541 }, { "epoch": 0.10727329646248565, "grad_norm": 0.90234375, "learning_rate": 0.0019688828184805432, "loss": 3.0292, "step": 1542 }, { "epoch": 0.10734286409962086, "grad_norm": 0.8984375, "learning_rate": 0.0019688270194901376, "loss": 2.9543, "step": 1543 }, { "epoch": 0.10741243173675606, "grad_norm": 0.9375, "learning_rate": 0.001968771171307579, "loss": 2.8975, "step": 1544 }, { "epoch": 0.10748199937389126, "grad_norm": 0.83984375, "learning_rate": 0.0019687152739357033, "loss": 3.6984, "step": 1545 }, { "epoch": 0.10755156701102647, "grad_norm": 0.859375, "learning_rate": 0.0019686593273773485, "loss": 3.5366, "step": 1546 }, { "epoch": 0.10762113464816167, "grad_norm": 0.765625, "learning_rate": 0.0019686033316353557, "loss": 3.4072, "step": 1547 }, { "epoch": 0.10769070228529688, "grad_norm": 0.8359375, "learning_rate": 0.001968547286712568, "loss": 3.4246, "step": 1548 }, { "epoch": 0.10776026992243208, "grad_norm": 0.87109375, "learning_rate": 0.0019684911926118307, "loss": 3.1723, "step": 1549 }, { "epoch": 0.1078298375595673, "grad_norm": 0.8828125, "learning_rate": 0.001968435049335992, "loss": 3.4788, "step": 1550 }, { "epoch": 0.10789940519670249, "grad_norm": 0.75390625, "learning_rate": 0.001968378856887903, "loss": 3.4059, "step": 1551 }, { "epoch": 0.1079689728338377, "grad_norm": 0.96875, "learning_rate": 0.0019683226152704164, "loss": 3.2771, "step": 1552 }, { "epoch": 0.1080385404709729, "grad_norm": 1.125, "learning_rate": 0.001968266324486389, "loss": 3.1777, "step": 1553 }, { "epoch": 0.10810810810810811, "grad_norm": 1.0546875, "learning_rate": 0.001968209984538677, "loss": 3.3148, "step": 1554 }, { "epoch": 0.10817767574524331, "grad_norm": 0.95703125, "learning_rate": 0.0019681535954301425, "loss": 3.3594, "step": 1555 }, { "epoch": 0.10824724338237851, "grad_norm": 0.90625, "learning_rate": 0.0019680971571636482, "loss": 3.2512, "step": 1556 }, { "epoch": 0.10831681101951372, "grad_norm": 0.62109375, "learning_rate": 0.00196804066974206, "loss": 3.4698, "step": 1557 }, { "epoch": 0.10838637865664892, "grad_norm": 0.83984375, "learning_rate": 0.001967984133168246, "loss": 3.5944, "step": 1558 }, { "epoch": 0.10845594629378413, "grad_norm": 0.6640625, "learning_rate": 0.001967927547445076, "loss": 3.4931, "step": 1559 }, { "epoch": 0.10852551393091933, "grad_norm": 0.9375, "learning_rate": 0.001967870912575425, "loss": 3.1851, "step": 1560 }, { "epoch": 0.10859508156805454, "grad_norm": 0.71875, "learning_rate": 0.0019678142285621666, "loss": 3.4562, "step": 1561 }, { "epoch": 0.10866464920518974, "grad_norm": 0.90234375, "learning_rate": 0.00196775749540818, "loss": 3.0386, "step": 1562 }, { "epoch": 0.10873421684232495, "grad_norm": 1.75, "learning_rate": 0.0019677007131163457, "loss": 3.611, "step": 1563 }, { "epoch": 0.10880378447946015, "grad_norm": 1.0234375, "learning_rate": 0.001967643881689547, "loss": 3.2333, "step": 1564 }, { "epoch": 0.10887335211659536, "grad_norm": 0.96875, "learning_rate": 0.0019675870011306687, "loss": 3.495, "step": 1565 }, { "epoch": 0.10894291975373056, "grad_norm": 1.0859375, "learning_rate": 0.0019675300714426004, "loss": 3.0353, "step": 1566 }, { "epoch": 0.10901248739086578, "grad_norm": 0.7421875, "learning_rate": 0.001967473092628231, "loss": 3.3026, "step": 1567 }, { "epoch": 0.10908205502800097, "grad_norm": 1.0078125, "learning_rate": 0.001967416064690455, "loss": 3.4083, "step": 1568 }, { "epoch": 0.10915162266513617, "grad_norm": 1.15625, "learning_rate": 0.001967358987632167, "loss": 3.1005, "step": 1569 }, { "epoch": 0.10922119030227138, "grad_norm": 0.8125, "learning_rate": 0.001967301861456265, "loss": 3.5104, "step": 1570 }, { "epoch": 0.10929075793940658, "grad_norm": 0.78515625, "learning_rate": 0.0019672446861656507, "loss": 3.2699, "step": 1571 }, { "epoch": 0.1093603255765418, "grad_norm": 0.6328125, "learning_rate": 0.001967187461763226, "loss": 3.2808, "step": 1572 }, { "epoch": 0.109429893213677, "grad_norm": 0.79296875, "learning_rate": 0.0019671301882518977, "loss": 3.6146, "step": 1573 }, { "epoch": 0.1094994608508122, "grad_norm": 0.73828125, "learning_rate": 0.0019670728656345725, "loss": 3.3131, "step": 1574 }, { "epoch": 0.1095690284879474, "grad_norm": 1.2734375, "learning_rate": 0.0019670154939141616, "loss": 3.522, "step": 1575 }, { "epoch": 0.10963859612508262, "grad_norm": 0.84765625, "learning_rate": 0.0019669580730935785, "loss": 3.5155, "step": 1576 }, { "epoch": 0.10970816376221781, "grad_norm": 0.859375, "learning_rate": 0.001966900603175738, "loss": 3.1026, "step": 1577 }, { "epoch": 0.10977773139935303, "grad_norm": 0.72265625, "learning_rate": 0.0019668430841635583, "loss": 3.4919, "step": 1578 }, { "epoch": 0.10984729903648822, "grad_norm": 0.94921875, "learning_rate": 0.0019667855160599604, "loss": 3.2533, "step": 1579 }, { "epoch": 0.10991686667362344, "grad_norm": 0.97265625, "learning_rate": 0.0019667278988678666, "loss": 3.6415, "step": 1580 }, { "epoch": 0.10998643431075864, "grad_norm": 1.0390625, "learning_rate": 0.001966670232590203, "loss": 3.059, "step": 1581 }, { "epoch": 0.11005600194789383, "grad_norm": 0.7421875, "learning_rate": 0.0019666125172298973, "loss": 2.7372, "step": 1582 }, { "epoch": 0.11012556958502905, "grad_norm": 0.73046875, "learning_rate": 0.0019665547527898796, "loss": 3.3197, "step": 1583 }, { "epoch": 0.11019513722216424, "grad_norm": 0.6953125, "learning_rate": 0.001966496939273084, "loss": 2.9155, "step": 1584 }, { "epoch": 0.11026470485929946, "grad_norm": 0.96875, "learning_rate": 0.001966439076682445, "loss": 2.9173, "step": 1585 }, { "epoch": 0.11033427249643465, "grad_norm": 0.640625, "learning_rate": 0.001966381165020901, "loss": 3.2683, "step": 1586 }, { "epoch": 0.11040384013356987, "grad_norm": 1.09375, "learning_rate": 0.0019663232042913923, "loss": 3.6652, "step": 1587 }, { "epoch": 0.11047340777070506, "grad_norm": 0.87890625, "learning_rate": 0.0019662651944968622, "loss": 3.0684, "step": 1588 }, { "epoch": 0.11054297540784028, "grad_norm": 0.73046875, "learning_rate": 0.0019662071356402557, "loss": 3.464, "step": 1589 }, { "epoch": 0.11061254304497548, "grad_norm": 0.98828125, "learning_rate": 0.0019661490277245205, "loss": 3.1648, "step": 1590 }, { "epoch": 0.11068211068211069, "grad_norm": 0.7578125, "learning_rate": 0.001966090870752608, "loss": 3.8514, "step": 1591 }, { "epoch": 0.11075167831924589, "grad_norm": 1.0859375, "learning_rate": 0.00196603266472747, "loss": 3.5695, "step": 1592 }, { "epoch": 0.1108212459563811, "grad_norm": 0.8359375, "learning_rate": 0.0019659744096520632, "loss": 3.2381, "step": 1593 }, { "epoch": 0.1108908135935163, "grad_norm": 0.7265625, "learning_rate": 0.0019659161055293442, "loss": 3.2566, "step": 1594 }, { "epoch": 0.1109603812306515, "grad_norm": 0.8046875, "learning_rate": 0.001965857752362274, "loss": 3.4792, "step": 1595 }, { "epoch": 0.1110299488677867, "grad_norm": 0.78125, "learning_rate": 0.0019657993501538155, "loss": 3.3937, "step": 1596 }, { "epoch": 0.1110995165049219, "grad_norm": 0.7421875, "learning_rate": 0.001965740898906934, "loss": 3.25, "step": 1597 }, { "epoch": 0.11116908414205712, "grad_norm": 0.80078125, "learning_rate": 0.001965682398624597, "loss": 3.3554, "step": 1598 }, { "epoch": 0.11123865177919232, "grad_norm": 0.765625, "learning_rate": 0.001965623849309776, "loss": 3.114, "step": 1599 }, { "epoch": 0.11130821941632753, "grad_norm": 0.9375, "learning_rate": 0.0019655652509654423, "loss": 3.368, "step": 1600 }, { "epoch": 0.11137778705346273, "grad_norm": 0.7578125, "learning_rate": 0.001965506603594572, "loss": 3.2518, "step": 1601 }, { "epoch": 0.11144735469059794, "grad_norm": 0.84375, "learning_rate": 0.001965447907200143, "loss": 3.5058, "step": 1602 }, { "epoch": 0.11151692232773314, "grad_norm": 0.8984375, "learning_rate": 0.0019653891617851357, "loss": 3.2876, "step": 1603 }, { "epoch": 0.11158648996486835, "grad_norm": 1.09375, "learning_rate": 0.001965330367352533, "loss": 3.3141, "step": 1604 }, { "epoch": 0.11165605760200355, "grad_norm": 0.8203125, "learning_rate": 0.001965271523905319, "loss": 3.6069, "step": 1605 }, { "epoch": 0.11172562523913876, "grad_norm": 0.73828125, "learning_rate": 0.001965212631446483, "loss": 3.1281, "step": 1606 }, { "epoch": 0.11179519287627396, "grad_norm": 0.97265625, "learning_rate": 0.0019651536899790143, "loss": 2.982, "step": 1607 }, { "epoch": 0.11186476051340916, "grad_norm": 0.79296875, "learning_rate": 0.001965094699505906, "loss": 3.2783, "step": 1608 }, { "epoch": 0.11193432815054437, "grad_norm": 0.84765625, "learning_rate": 0.0019650356600301533, "loss": 2.854, "step": 1609 }, { "epoch": 0.11200389578767957, "grad_norm": 0.84375, "learning_rate": 0.001964976571554754, "loss": 3.1584, "step": 1610 }, { "epoch": 0.11207346342481478, "grad_norm": 0.765625, "learning_rate": 0.001964917434082708, "loss": 2.9245, "step": 1611 }, { "epoch": 0.11214303106194998, "grad_norm": 0.80078125, "learning_rate": 0.0019648582476170184, "loss": 3.347, "step": 1612 }, { "epoch": 0.11221259869908519, "grad_norm": 0.71484375, "learning_rate": 0.0019647990121606906, "loss": 3.3861, "step": 1613 }, { "epoch": 0.11228216633622039, "grad_norm": 0.765625, "learning_rate": 0.0019647397277167316, "loss": 3.1979, "step": 1614 }, { "epoch": 0.1123517339733556, "grad_norm": 0.8671875, "learning_rate": 0.0019646803942881515, "loss": 3.4472, "step": 1615 }, { "epoch": 0.1124213016104908, "grad_norm": 1.0234375, "learning_rate": 0.0019646210118779636, "loss": 3.1445, "step": 1616 }, { "epoch": 0.11249086924762601, "grad_norm": 0.88671875, "learning_rate": 0.0019645615804891833, "loss": 3.1762, "step": 1617 }, { "epoch": 0.11256043688476121, "grad_norm": 0.94140625, "learning_rate": 0.001964502100124827, "loss": 3.0012, "step": 1618 }, { "epoch": 0.1126300045218964, "grad_norm": 0.90234375, "learning_rate": 0.001964442570787916, "loss": 3.1677, "step": 1619 }, { "epoch": 0.11269957215903162, "grad_norm": 0.71875, "learning_rate": 0.001964382992481472, "loss": 3.3573, "step": 1620 }, { "epoch": 0.11276913979616682, "grad_norm": 0.734375, "learning_rate": 0.0019643233652085206, "loss": 3.5359, "step": 1621 }, { "epoch": 0.11283870743330203, "grad_norm": 1.015625, "learning_rate": 0.0019642636889720894, "loss": 2.8434, "step": 1622 }, { "epoch": 0.11290827507043723, "grad_norm": 0.87109375, "learning_rate": 0.001964203963775208, "loss": 3.3141, "step": 1623 }, { "epoch": 0.11297784270757244, "grad_norm": 0.765625, "learning_rate": 0.00196414418962091, "loss": 3.6836, "step": 1624 }, { "epoch": 0.11304741034470764, "grad_norm": 1.0390625, "learning_rate": 0.0019640843665122286, "loss": 3.5512, "step": 1625 }, { "epoch": 0.11311697798184285, "grad_norm": 0.90625, "learning_rate": 0.0019640244944522036, "loss": 2.9838, "step": 1626 }, { "epoch": 0.11318654561897805, "grad_norm": 0.86328125, "learning_rate": 0.001963964573443873, "loss": 3.2729, "step": 1627 }, { "epoch": 0.11325611325611326, "grad_norm": 0.890625, "learning_rate": 0.00196390460349028, "loss": 3.4352, "step": 1628 }, { "epoch": 0.11332568089324846, "grad_norm": 0.95703125, "learning_rate": 0.0019638445845944702, "loss": 3.5427, "step": 1629 }, { "epoch": 0.11339524853038367, "grad_norm": 0.96484375, "learning_rate": 0.0019637845167594903, "loss": 3.2987, "step": 1630 }, { "epoch": 0.11346481616751887, "grad_norm": 0.8984375, "learning_rate": 0.0019637243999883905, "loss": 3.2223, "step": 1631 }, { "epoch": 0.11353438380465407, "grad_norm": 1.09375, "learning_rate": 0.001963664234284223, "loss": 3.177, "step": 1632 }, { "epoch": 0.11360395144178928, "grad_norm": 0.80859375, "learning_rate": 0.0019636040196500436, "loss": 3.2046, "step": 1633 }, { "epoch": 0.11367351907892448, "grad_norm": 0.83984375, "learning_rate": 0.0019635437560889084, "loss": 3.163, "step": 1634 }, { "epoch": 0.11374308671605969, "grad_norm": 1.015625, "learning_rate": 0.001963483443603878, "loss": 3.1626, "step": 1635 }, { "epoch": 0.11381265435319489, "grad_norm": 1.1015625, "learning_rate": 0.0019634230821980146, "loss": 3.3732, "step": 1636 }, { "epoch": 0.1138822219903301, "grad_norm": 0.91796875, "learning_rate": 0.001963362671874383, "loss": 3.3927, "step": 1637 }, { "epoch": 0.1139517896274653, "grad_norm": 0.87890625, "learning_rate": 0.0019633022126360512, "loss": 3.0315, "step": 1638 }, { "epoch": 0.11402135726460051, "grad_norm": 0.9375, "learning_rate": 0.0019632417044860876, "loss": 3.236, "step": 1639 }, { "epoch": 0.11409092490173571, "grad_norm": 0.74609375, "learning_rate": 0.001963181147427566, "loss": 3.0346, "step": 1640 }, { "epoch": 0.11416049253887092, "grad_norm": 1.015625, "learning_rate": 0.0019631205414635602, "loss": 3.0258, "step": 1641 }, { "epoch": 0.11423006017600612, "grad_norm": 1.046875, "learning_rate": 0.001963059886597148, "loss": 3.1226, "step": 1642 }, { "epoch": 0.11429962781314133, "grad_norm": 1.3203125, "learning_rate": 0.001962999182831409, "loss": 3.1423, "step": 1643 }, { "epoch": 0.11436919545027653, "grad_norm": 0.734375, "learning_rate": 0.0019629384301694253, "loss": 3.4149, "step": 1644 }, { "epoch": 0.11443876308741173, "grad_norm": 0.95703125, "learning_rate": 0.0019628776286142813, "loss": 3.0182, "step": 1645 }, { "epoch": 0.11450833072454694, "grad_norm": 0.91796875, "learning_rate": 0.001962816778169065, "loss": 3.2926, "step": 1646 }, { "epoch": 0.11457789836168214, "grad_norm": 0.99609375, "learning_rate": 0.0019627558788368657, "loss": 3.243, "step": 1647 }, { "epoch": 0.11464746599881735, "grad_norm": 0.8359375, "learning_rate": 0.001962694930620775, "loss": 3.2997, "step": 1648 }, { "epoch": 0.11471703363595255, "grad_norm": 0.90625, "learning_rate": 0.001962633933523889, "loss": 2.6841, "step": 1649 }, { "epoch": 0.11478660127308776, "grad_norm": 0.5078125, "learning_rate": 0.001962572887549303, "loss": 3.6762, "step": 1650 }, { "epoch": 0.11485616891022296, "grad_norm": 0.8828125, "learning_rate": 0.001962511792700118, "loss": 3.12, "step": 1651 }, { "epoch": 0.11492573654735817, "grad_norm": 0.99609375, "learning_rate": 0.001962450648979435, "loss": 3.3223, "step": 1652 }, { "epoch": 0.11499530418449337, "grad_norm": 0.734375, "learning_rate": 0.0019623894563903597, "loss": 3.5976, "step": 1653 }, { "epoch": 0.11506487182162858, "grad_norm": 1.2578125, "learning_rate": 0.0019623282149359984, "loss": 2.9595, "step": 1654 }, { "epoch": 0.11513443945876378, "grad_norm": 0.703125, "learning_rate": 0.001962266924619461, "loss": 3.4546, "step": 1655 }, { "epoch": 0.115204007095899, "grad_norm": 0.98046875, "learning_rate": 0.001962205585443859, "loss": 3.3012, "step": 1656 }, { "epoch": 0.11527357473303419, "grad_norm": 1.1015625, "learning_rate": 0.0019621441974123077, "loss": 3.1979, "step": 1657 }, { "epoch": 0.11534314237016939, "grad_norm": 0.88671875, "learning_rate": 0.0019620827605279236, "loss": 3.0767, "step": 1658 }, { "epoch": 0.1154127100073046, "grad_norm": 0.78125, "learning_rate": 0.001962021274793826, "loss": 3.2852, "step": 1659 }, { "epoch": 0.1154822776444398, "grad_norm": 1.03125, "learning_rate": 0.001961959740213137, "loss": 3.336, "step": 1660 }, { "epoch": 0.11555184528157501, "grad_norm": 1.0390625, "learning_rate": 0.001961898156788981, "loss": 2.9833, "step": 1661 }, { "epoch": 0.11562141291871021, "grad_norm": 0.73828125, "learning_rate": 0.001961836524524485, "loss": 3.4473, "step": 1662 }, { "epoch": 0.11569098055584542, "grad_norm": 0.6953125, "learning_rate": 0.001961774843422778, "loss": 3.2512, "step": 1663 }, { "epoch": 0.11576054819298062, "grad_norm": 0.94140625, "learning_rate": 0.0019617131134869927, "loss": 3.3328, "step": 1664 }, { "epoch": 0.11583011583011583, "grad_norm": 0.97265625, "learning_rate": 0.0019616513347202624, "loss": 3.4149, "step": 1665 }, { "epoch": 0.11589968346725103, "grad_norm": 0.8359375, "learning_rate": 0.001961589507125725, "loss": 3.1448, "step": 1666 }, { "epoch": 0.11596925110438625, "grad_norm": 0.765625, "learning_rate": 0.0019615276307065185, "loss": 3.1483, "step": 1667 }, { "epoch": 0.11603881874152144, "grad_norm": 0.80078125, "learning_rate": 0.0019614657054657855, "loss": 3.2344, "step": 1668 }, { "epoch": 0.11610838637865666, "grad_norm": 0.67578125, "learning_rate": 0.0019614037314066705, "loss": 3.5726, "step": 1669 }, { "epoch": 0.11617795401579185, "grad_norm": 0.8046875, "learning_rate": 0.0019613417085323193, "loss": 3.5865, "step": 1670 }, { "epoch": 0.11624752165292705, "grad_norm": 0.91796875, "learning_rate": 0.0019612796368458827, "loss": 3.2813, "step": 1671 }, { "epoch": 0.11631708929006226, "grad_norm": 0.89453125, "learning_rate": 0.0019612175163505104, "loss": 3.1217, "step": 1672 }, { "epoch": 0.11638665692719746, "grad_norm": 0.83203125, "learning_rate": 0.0019611553470493576, "loss": 2.907, "step": 1673 }, { "epoch": 0.11645622456433267, "grad_norm": 0.8203125, "learning_rate": 0.0019610931289455813, "loss": 3.2636, "step": 1674 }, { "epoch": 0.11652579220146787, "grad_norm": 0.83984375, "learning_rate": 0.0019610308620423397, "loss": 3.4404, "step": 1675 }, { "epoch": 0.11659535983860309, "grad_norm": 1.015625, "learning_rate": 0.0019609685463427952, "loss": 3.5559, "step": 1676 }, { "epoch": 0.11666492747573828, "grad_norm": 0.984375, "learning_rate": 0.0019609061818501115, "loss": 3.477, "step": 1677 }, { "epoch": 0.1167344951128735, "grad_norm": 0.84375, "learning_rate": 0.001960843768567455, "loss": 3.286, "step": 1678 }, { "epoch": 0.1168040627500087, "grad_norm": 0.70703125, "learning_rate": 0.0019607813064979954, "loss": 3.4537, "step": 1679 }, { "epoch": 0.1168736303871439, "grad_norm": 1.0234375, "learning_rate": 0.0019607187956449034, "loss": 3.4741, "step": 1680 }, { "epoch": 0.1169431980242791, "grad_norm": 0.8984375, "learning_rate": 0.0019606562360113535, "loss": 3.4188, "step": 1681 }, { "epoch": 0.11701276566141432, "grad_norm": 0.83203125, "learning_rate": 0.0019605936276005215, "loss": 3.1084, "step": 1682 }, { "epoch": 0.11708233329854952, "grad_norm": 0.60546875, "learning_rate": 0.0019605309704155876, "loss": 3.5886, "step": 1683 }, { "epoch": 0.11715190093568471, "grad_norm": 0.80859375, "learning_rate": 0.001960468264459732, "loss": 3.1211, "step": 1684 }, { "epoch": 0.11722146857281993, "grad_norm": 0.6796875, "learning_rate": 0.0019604055097361393, "loss": 3.3281, "step": 1685 }, { "epoch": 0.11729103620995512, "grad_norm": 0.8984375, "learning_rate": 0.0019603427062479953, "loss": 3.4789, "step": 1686 }, { "epoch": 0.11736060384709034, "grad_norm": 0.98046875, "learning_rate": 0.001960279853998489, "loss": 3.2314, "step": 1687 }, { "epoch": 0.11743017148422553, "grad_norm": 0.6953125, "learning_rate": 0.0019602169529908124, "loss": 3.8185, "step": 1688 }, { "epoch": 0.11749973912136075, "grad_norm": 0.796875, "learning_rate": 0.001960154003228159, "loss": 3.1382, "step": 1689 }, { "epoch": 0.11756930675849595, "grad_norm": 0.79296875, "learning_rate": 0.0019600910047137244, "loss": 3.1299, "step": 1690 }, { "epoch": 0.11763887439563116, "grad_norm": 0.71484375, "learning_rate": 0.0019600279574507077, "loss": 3.1588, "step": 1691 }, { "epoch": 0.11770844203276636, "grad_norm": 0.828125, "learning_rate": 0.00195996486144231, "loss": 3.1371, "step": 1692 }, { "epoch": 0.11777800966990157, "grad_norm": 0.74609375, "learning_rate": 0.001959901716691736, "loss": 3.4681, "step": 1693 }, { "epoch": 0.11784757730703677, "grad_norm": 0.67578125, "learning_rate": 0.0019598385232021905, "loss": 3.3351, "step": 1694 }, { "epoch": 0.11791714494417196, "grad_norm": 0.69921875, "learning_rate": 0.0019597752809768832, "loss": 3.4628, "step": 1695 }, { "epoch": 0.11798671258130718, "grad_norm": 0.81640625, "learning_rate": 0.0019597119900190245, "loss": 3.2477, "step": 1696 }, { "epoch": 0.11805628021844237, "grad_norm": 0.82421875, "learning_rate": 0.001959648650331828, "loss": 3.3238, "step": 1697 }, { "epoch": 0.11812584785557759, "grad_norm": 0.75, "learning_rate": 0.00195958526191851, "loss": 3.4188, "step": 1698 }, { "epoch": 0.11819541549271279, "grad_norm": 0.9921875, "learning_rate": 0.00195952182478229, "loss": 3.3663, "step": 1699 }, { "epoch": 0.118264983129848, "grad_norm": 0.8984375, "learning_rate": 0.0019594583389263872, "loss": 3.1903, "step": 1700 }, { "epoch": 0.1183345507669832, "grad_norm": 0.87109375, "learning_rate": 0.001959394804354026, "loss": 3.4655, "step": 1701 }, { "epoch": 0.11840411840411841, "grad_norm": 0.84765625, "learning_rate": 0.0019593312210684326, "loss": 3.4838, "step": 1702 }, { "epoch": 0.1184736860412536, "grad_norm": 1.0078125, "learning_rate": 0.001959267589072835, "loss": 3.4469, "step": 1703 }, { "epoch": 0.11854325367838882, "grad_norm": 0.81640625, "learning_rate": 0.0019592039083704644, "loss": 3.0336, "step": 1704 }, { "epoch": 0.11861282131552402, "grad_norm": 0.734375, "learning_rate": 0.001959140178964554, "loss": 3.3108, "step": 1705 }, { "epoch": 0.11868238895265923, "grad_norm": 1.09375, "learning_rate": 0.00195907640085834, "loss": 3.0878, "step": 1706 }, { "epoch": 0.11875195658979443, "grad_norm": 0.9765625, "learning_rate": 0.00195901257405506, "loss": 3.0738, "step": 1707 }, { "epoch": 0.11882152422692963, "grad_norm": 0.75390625, "learning_rate": 0.0019589486985579557, "loss": 3.4438, "step": 1708 }, { "epoch": 0.11889109186406484, "grad_norm": 0.56640625, "learning_rate": 0.00195888477437027, "loss": 3.465, "step": 1709 }, { "epoch": 0.11896065950120004, "grad_norm": 0.5703125, "learning_rate": 0.001958820801495248, "loss": 3.4594, "step": 1710 }, { "epoch": 0.11903022713833525, "grad_norm": 0.8828125, "learning_rate": 0.001958756779936139, "loss": 3.4865, "step": 1711 }, { "epoch": 0.11909979477547045, "grad_norm": 0.75, "learning_rate": 0.0019586927096961935, "loss": 3.2511, "step": 1712 }, { "epoch": 0.11916936241260566, "grad_norm": 0.6875, "learning_rate": 0.001958628590778664, "loss": 3.3198, "step": 1713 }, { "epoch": 0.11923893004974086, "grad_norm": 1.2265625, "learning_rate": 0.0019585644231868062, "loss": 3.6062, "step": 1714 }, { "epoch": 0.11930849768687607, "grad_norm": 0.7421875, "learning_rate": 0.001958500206923879, "loss": 3.5153, "step": 1715 }, { "epoch": 0.11937806532401127, "grad_norm": 0.796875, "learning_rate": 0.001958435941993142, "loss": 3.3921, "step": 1716 }, { "epoch": 0.11944763296114648, "grad_norm": 0.7734375, "learning_rate": 0.0019583716283978593, "loss": 3.3137, "step": 1717 }, { "epoch": 0.11951720059828168, "grad_norm": 0.8203125, "learning_rate": 0.0019583072661412955, "loss": 3.4524, "step": 1718 }, { "epoch": 0.11958676823541689, "grad_norm": 0.62109375, "learning_rate": 0.001958242855226719, "loss": 3.3938, "step": 1719 }, { "epoch": 0.11965633587255209, "grad_norm": 0.70703125, "learning_rate": 0.0019581783956574006, "loss": 3.168, "step": 1720 }, { "epoch": 0.11972590350968729, "grad_norm": 0.55859375, "learning_rate": 0.001958113887436612, "loss": 3.6354, "step": 1721 }, { "epoch": 0.1197954711468225, "grad_norm": 0.734375, "learning_rate": 0.00195804933056763, "loss": 3.1484, "step": 1722 }, { "epoch": 0.1198650387839577, "grad_norm": 1.015625, "learning_rate": 0.0019579847250537318, "loss": 2.7735, "step": 1723 }, { "epoch": 0.11993460642109291, "grad_norm": 0.80859375, "learning_rate": 0.001957920070898198, "loss": 2.9981, "step": 1724 }, { "epoch": 0.12000417405822811, "grad_norm": 0.953125, "learning_rate": 0.0019578553681043115, "loss": 2.9309, "step": 1725 }, { "epoch": 0.12007374169536332, "grad_norm": 0.71875, "learning_rate": 0.001957790616675357, "loss": 3.4824, "step": 1726 }, { "epoch": 0.12014330933249852, "grad_norm": 0.7890625, "learning_rate": 0.0019577258166146227, "loss": 3.1838, "step": 1727 }, { "epoch": 0.12021287696963373, "grad_norm": 0.82421875, "learning_rate": 0.0019576609679253986, "loss": 2.8742, "step": 1728 }, { "epoch": 0.12028244460676893, "grad_norm": 0.9375, "learning_rate": 0.001957596070610978, "loss": 3.5897, "step": 1729 }, { "epoch": 0.12035201224390414, "grad_norm": 0.62109375, "learning_rate": 0.001957531124674655, "loss": 3.6366, "step": 1730 }, { "epoch": 0.12042157988103934, "grad_norm": 0.74609375, "learning_rate": 0.001957466130119728, "loss": 3.0287, "step": 1731 }, { "epoch": 0.12049114751817455, "grad_norm": 0.890625, "learning_rate": 0.0019574010869494968, "loss": 3.2798, "step": 1732 }, { "epoch": 0.12056071515530975, "grad_norm": 0.82421875, "learning_rate": 0.0019573359951672643, "loss": 3.4167, "step": 1733 }, { "epoch": 0.12063028279244495, "grad_norm": 2.75, "learning_rate": 0.001957270854776335, "loss": 3.3749, "step": 1734 }, { "epoch": 0.12069985042958016, "grad_norm": 0.7265625, "learning_rate": 0.001957205665780017, "loss": 3.7976, "step": 1735 }, { "epoch": 0.12076941806671536, "grad_norm": 1.015625, "learning_rate": 0.00195714042818162, "loss": 3.2681, "step": 1736 }, { "epoch": 0.12083898570385057, "grad_norm": 0.828125, "learning_rate": 0.001957075141984456, "loss": 3.0821, "step": 1737 }, { "epoch": 0.12090855334098577, "grad_norm": 1.0546875, "learning_rate": 0.0019570098071918407, "loss": 3.477, "step": 1738 }, { "epoch": 0.12097812097812098, "grad_norm": 0.88671875, "learning_rate": 0.001956944423807091, "loss": 3.3121, "step": 1739 }, { "epoch": 0.12104768861525618, "grad_norm": 0.9140625, "learning_rate": 0.0019568789918335268, "loss": 3.0542, "step": 1740 }, { "epoch": 0.12111725625239139, "grad_norm": 0.95703125, "learning_rate": 0.0019568135112744698, "loss": 3.432, "step": 1741 }, { "epoch": 0.12118682388952659, "grad_norm": 0.85546875, "learning_rate": 0.0019567479821332463, "loss": 2.9629, "step": 1742 }, { "epoch": 0.1212563915266618, "grad_norm": 0.99609375, "learning_rate": 0.001956682404413182, "loss": 3.4069, "step": 1743 }, { "epoch": 0.121325959163797, "grad_norm": 0.84375, "learning_rate": 0.0019566167781176077, "loss": 3.2643, "step": 1744 }, { "epoch": 0.12139552680093221, "grad_norm": 0.84765625, "learning_rate": 0.001956551103249855, "loss": 3.8109, "step": 1745 }, { "epoch": 0.12146509443806741, "grad_norm": 0.70703125, "learning_rate": 0.0019564853798132585, "loss": 3.5165, "step": 1746 }, { "epoch": 0.12153466207520261, "grad_norm": 0.9765625, "learning_rate": 0.0019564196078111556, "loss": 3.4767, "step": 1747 }, { "epoch": 0.12160422971233782, "grad_norm": 0.9609375, "learning_rate": 0.0019563537872468854, "loss": 3.2814, "step": 1748 }, { "epoch": 0.12167379734947302, "grad_norm": 1.0390625, "learning_rate": 0.0019562879181237907, "loss": 3.188, "step": 1749 }, { "epoch": 0.12174336498660823, "grad_norm": 0.86328125, "learning_rate": 0.0019562220004452156, "loss": 3.3306, "step": 1750 }, { "epoch": 0.12181293262374343, "grad_norm": 1.0234375, "learning_rate": 0.001956156034214507, "loss": 3.3211, "step": 1751 }, { "epoch": 0.12188250026087864, "grad_norm": 1.109375, "learning_rate": 0.0019560900194350137, "loss": 3.0687, "step": 1752 }, { "epoch": 0.12195206789801384, "grad_norm": 0.671875, "learning_rate": 0.001956023956110089, "loss": 3.1071, "step": 1753 }, { "epoch": 0.12202163553514905, "grad_norm": 0.796875, "learning_rate": 0.0019559578442430864, "loss": 3.1057, "step": 1754 }, { "epoch": 0.12209120317228425, "grad_norm": 0.84375, "learning_rate": 0.0019558916838373626, "loss": 3.3788, "step": 1755 }, { "epoch": 0.12216077080941946, "grad_norm": 0.8359375, "learning_rate": 0.0019558254748962773, "loss": 3.0311, "step": 1756 }, { "epoch": 0.12223033844655466, "grad_norm": 0.94921875, "learning_rate": 0.001955759217423192, "loss": 3.4973, "step": 1757 }, { "epoch": 0.12229990608368987, "grad_norm": 1.0234375, "learning_rate": 0.001955692911421471, "loss": 3.0044, "step": 1758 }, { "epoch": 0.12236947372082507, "grad_norm": 0.7578125, "learning_rate": 0.0019556265568944813, "loss": 2.8627, "step": 1759 }, { "epoch": 0.12243904135796027, "grad_norm": 0.70703125, "learning_rate": 0.0019555601538455915, "loss": 3.5029, "step": 1760 }, { "epoch": 0.12250860899509548, "grad_norm": 0.953125, "learning_rate": 0.0019554937022781735, "loss": 3.1952, "step": 1761 }, { "epoch": 0.12257817663223068, "grad_norm": 0.86328125, "learning_rate": 0.0019554272021956014, "loss": 3.7242, "step": 1762 }, { "epoch": 0.1226477442693659, "grad_norm": 0.94921875, "learning_rate": 0.001955360653601252, "loss": 3.31, "step": 1763 }, { "epoch": 0.12271731190650109, "grad_norm": 0.82421875, "learning_rate": 0.0019552940564985036, "loss": 3.3173, "step": 1764 }, { "epoch": 0.1227868795436363, "grad_norm": 0.79296875, "learning_rate": 0.001955227410890738, "loss": 3.2139, "step": 1765 }, { "epoch": 0.1228564471807715, "grad_norm": 0.796875, "learning_rate": 0.001955160716781339, "loss": 3.0229, "step": 1766 }, { "epoch": 0.12292601481790671, "grad_norm": 0.71484375, "learning_rate": 0.0019550939741736937, "loss": 3.0964, "step": 1767 }, { "epoch": 0.12299558245504191, "grad_norm": 0.890625, "learning_rate": 0.00195502718307119, "loss": 3.3713, "step": 1768 }, { "epoch": 0.12306515009217713, "grad_norm": 1.03125, "learning_rate": 0.0019549603434772197, "loss": 3.3651, "step": 1769 }, { "epoch": 0.12313471772931232, "grad_norm": 0.89453125, "learning_rate": 0.001954893455395177, "loss": 2.9908, "step": 1770 }, { "epoch": 0.12320428536644752, "grad_norm": 0.921875, "learning_rate": 0.0019548265188284574, "loss": 2.9317, "step": 1771 }, { "epoch": 0.12327385300358273, "grad_norm": 0.9375, "learning_rate": 0.0019547595337804594, "loss": 2.8074, "step": 1772 }, { "epoch": 0.12334342064071793, "grad_norm": 0.921875, "learning_rate": 0.001954692500254585, "loss": 3.339, "step": 1773 }, { "epoch": 0.12341298827785314, "grad_norm": 0.9453125, "learning_rate": 0.0019546254182542374, "loss": 3.0529, "step": 1774 }, { "epoch": 0.12348255591498834, "grad_norm": 0.78125, "learning_rate": 0.001954558287782823, "loss": 3.3201, "step": 1775 }, { "epoch": 0.12355212355212356, "grad_norm": 1.0234375, "learning_rate": 0.0019544911088437496, "loss": 3.211, "step": 1776 }, { "epoch": 0.12362169118925875, "grad_norm": 0.84765625, "learning_rate": 0.0019544238814404287, "loss": 3.4603, "step": 1777 }, { "epoch": 0.12369125882639397, "grad_norm": 0.73828125, "learning_rate": 0.0019543566055762744, "loss": 3.3202, "step": 1778 }, { "epoch": 0.12376082646352916, "grad_norm": 1.09375, "learning_rate": 0.0019542892812547015, "loss": 3.4099, "step": 1779 }, { "epoch": 0.12383039410066438, "grad_norm": 0.88671875, "learning_rate": 0.0019542219084791286, "loss": 3.6499, "step": 1780 }, { "epoch": 0.12389996173779957, "grad_norm": 0.84765625, "learning_rate": 0.001954154487252977, "loss": 3.2814, "step": 1781 }, { "epoch": 0.12396952937493479, "grad_norm": 0.796875, "learning_rate": 0.00195408701757967, "loss": 3.1144, "step": 1782 }, { "epoch": 0.12403909701206998, "grad_norm": 0.90625, "learning_rate": 0.001954019499462633, "loss": 3.2145, "step": 1783 }, { "epoch": 0.12410866464920518, "grad_norm": 1.1796875, "learning_rate": 0.001953951932905295, "loss": 2.9224, "step": 1784 }, { "epoch": 0.1241782322863404, "grad_norm": 0.62890625, "learning_rate": 0.0019538843179110854, "loss": 3.5563, "step": 1785 }, { "epoch": 0.1242477999234756, "grad_norm": 0.73828125, "learning_rate": 0.0019538166544834385, "loss": 3.7433, "step": 1786 }, { "epoch": 0.1243173675606108, "grad_norm": 0.66796875, "learning_rate": 0.0019537489426257894, "loss": 3.5424, "step": 1787 }, { "epoch": 0.124386935197746, "grad_norm": 0.81640625, "learning_rate": 0.001953681182341576, "loss": 3.1177, "step": 1788 }, { "epoch": 0.12445650283488122, "grad_norm": 0.94140625, "learning_rate": 0.0019536133736342393, "loss": 3.1545, "step": 1789 }, { "epoch": 0.12452607047201641, "grad_norm": 0.92578125, "learning_rate": 0.001953545516507222, "loss": 3.2966, "step": 1790 }, { "epoch": 0.12459563810915163, "grad_norm": 0.828125, "learning_rate": 0.00195347761096397, "loss": 3.0389, "step": 1791 }, { "epoch": 0.12466520574628683, "grad_norm": 0.8984375, "learning_rate": 0.0019534096570079304, "loss": 3.2605, "step": 1792 }, { "epoch": 0.12473477338342204, "grad_norm": 0.734375, "learning_rate": 0.001953341654642554, "loss": 3.5102, "step": 1793 }, { "epoch": 0.12480434102055724, "grad_norm": 0.6875, "learning_rate": 0.0019532736038712934, "loss": 3.6672, "step": 1794 }, { "epoch": 0.12487390865769245, "grad_norm": 0.953125, "learning_rate": 0.0019532055046976044, "loss": 3.2388, "step": 1795 }, { "epoch": 0.12494347629482765, "grad_norm": 1.0703125, "learning_rate": 0.001953137357124944, "loss": 3.2163, "step": 1796 }, { "epoch": 0.12501304393196286, "grad_norm": 0.78515625, "learning_rate": 0.001953069161156773, "loss": 3.3399, "step": 1797 }, { "epoch": 0.12508261156909806, "grad_norm": 1.0078125, "learning_rate": 0.0019530009167965537, "loss": 3.4525, "step": 1798 }, { "epoch": 0.12515217920623325, "grad_norm": 0.8515625, "learning_rate": 0.0019529326240477513, "loss": 2.9544, "step": 1799 }, { "epoch": 0.12522174684336845, "grad_norm": 1.125, "learning_rate": 0.0019528642829138338, "loss": 3.4445, "step": 1800 }, { "epoch": 0.12529131448050368, "grad_norm": 0.74609375, "learning_rate": 0.0019527958933982703, "loss": 3.1022, "step": 1801 }, { "epoch": 0.12536088211763888, "grad_norm": 0.8828125, "learning_rate": 0.001952727455504534, "loss": 3.1783, "step": 1802 }, { "epoch": 0.12543044975477408, "grad_norm": 0.8671875, "learning_rate": 0.0019526589692360997, "loss": 3.3324, "step": 1803 }, { "epoch": 0.12550001739190927, "grad_norm": 1.0078125, "learning_rate": 0.0019525904345964445, "loss": 3.2979, "step": 1804 }, { "epoch": 0.1255695850290445, "grad_norm": 0.84375, "learning_rate": 0.0019525218515890487, "loss": 3.2694, "step": 1805 }, { "epoch": 0.1256391526661797, "grad_norm": 0.703125, "learning_rate": 0.0019524532202173938, "loss": 3.6337, "step": 1806 }, { "epoch": 0.1257087203033149, "grad_norm": 1.03125, "learning_rate": 0.0019523845404849655, "loss": 3.0394, "step": 1807 }, { "epoch": 0.1257782879404501, "grad_norm": 0.80859375, "learning_rate": 0.0019523158123952507, "loss": 3.2475, "step": 1808 }, { "epoch": 0.1258478555775853, "grad_norm": 0.97265625, "learning_rate": 0.001952247035951739, "loss": 3.5361, "step": 1809 }, { "epoch": 0.12591742321472052, "grad_norm": 0.75, "learning_rate": 0.0019521782111579223, "loss": 3.2208, "step": 1810 }, { "epoch": 0.12598699085185572, "grad_norm": 0.74609375, "learning_rate": 0.0019521093380172954, "loss": 3.0137, "step": 1811 }, { "epoch": 0.12605655848899092, "grad_norm": 0.92578125, "learning_rate": 0.0019520404165333555, "loss": 3.4475, "step": 1812 }, { "epoch": 0.12612612612612611, "grad_norm": 0.85546875, "learning_rate": 0.0019519714467096016, "loss": 3.3839, "step": 1813 }, { "epoch": 0.12619569376326134, "grad_norm": 0.65625, "learning_rate": 0.0019519024285495359, "loss": 3.4224, "step": 1814 }, { "epoch": 0.12626526140039654, "grad_norm": 0.796875, "learning_rate": 0.0019518333620566631, "loss": 3.4469, "step": 1815 }, { "epoch": 0.12633482903753174, "grad_norm": 0.91796875, "learning_rate": 0.0019517642472344895, "loss": 3.385, "step": 1816 }, { "epoch": 0.12640439667466694, "grad_norm": 0.77734375, "learning_rate": 0.0019516950840865249, "loss": 3.0788, "step": 1817 }, { "epoch": 0.12647396431180216, "grad_norm": 1.109375, "learning_rate": 0.0019516258726162807, "loss": 2.922, "step": 1818 }, { "epoch": 0.12654353194893736, "grad_norm": 0.94140625, "learning_rate": 0.0019515566128272713, "loss": 3.7069, "step": 1819 }, { "epoch": 0.12661309958607256, "grad_norm": 0.87890625, "learning_rate": 0.0019514873047230133, "loss": 3.5041, "step": 1820 }, { "epoch": 0.12668266722320776, "grad_norm": 0.98828125, "learning_rate": 0.0019514179483070258, "loss": 3.3119, "step": 1821 }, { "epoch": 0.12675223486034295, "grad_norm": 0.76171875, "learning_rate": 0.0019513485435828303, "loss": 3.5025, "step": 1822 }, { "epoch": 0.12682180249747818, "grad_norm": 0.765625, "learning_rate": 0.001951279090553951, "loss": 3.1403, "step": 1823 }, { "epoch": 0.12689137013461338, "grad_norm": 0.796875, "learning_rate": 0.0019512095892239144, "loss": 3.0786, "step": 1824 }, { "epoch": 0.12696093777174858, "grad_norm": 0.7421875, "learning_rate": 0.001951140039596249, "loss": 3.2466, "step": 1825 }, { "epoch": 0.12703050540888378, "grad_norm": 0.86328125, "learning_rate": 0.001951070441674487, "loss": 3.1296, "step": 1826 }, { "epoch": 0.127100073046019, "grad_norm": 0.89453125, "learning_rate": 0.0019510007954621612, "loss": 3.5657, "step": 1827 }, { "epoch": 0.1271696406831542, "grad_norm": 0.77734375, "learning_rate": 0.0019509311009628086, "loss": 3.0529, "step": 1828 }, { "epoch": 0.1272392083202894, "grad_norm": 0.82421875, "learning_rate": 0.0019508613581799676, "loss": 3.0305, "step": 1829 }, { "epoch": 0.1273087759574246, "grad_norm": 0.9609375, "learning_rate": 0.0019507915671171797, "loss": 3.1983, "step": 1830 }, { "epoch": 0.12737834359455982, "grad_norm": 0.78515625, "learning_rate": 0.0019507217277779884, "loss": 3.1657, "step": 1831 }, { "epoch": 0.12744791123169502, "grad_norm": 0.66015625, "learning_rate": 0.0019506518401659397, "loss": 3.5715, "step": 1832 }, { "epoch": 0.12751747886883022, "grad_norm": 1.015625, "learning_rate": 0.0019505819042845822, "loss": 3.2889, "step": 1833 }, { "epoch": 0.12758704650596542, "grad_norm": 0.9140625, "learning_rate": 0.001950511920137467, "loss": 3.3527, "step": 1834 }, { "epoch": 0.12765661414310062, "grad_norm": 0.859375, "learning_rate": 0.001950441887728147, "loss": 3.7539, "step": 1835 }, { "epoch": 0.12772618178023584, "grad_norm": 0.78125, "learning_rate": 0.0019503718070601791, "loss": 3.2217, "step": 1836 }, { "epoch": 0.12779574941737104, "grad_norm": 0.78125, "learning_rate": 0.0019503016781371209, "loss": 3.275, "step": 1837 }, { "epoch": 0.12786531705450624, "grad_norm": 0.85546875, "learning_rate": 0.0019502315009625331, "loss": 3.4357, "step": 1838 }, { "epoch": 0.12793488469164144, "grad_norm": 0.82421875, "learning_rate": 0.0019501612755399795, "loss": 3.3004, "step": 1839 }, { "epoch": 0.12800445232877666, "grad_norm": 1.0546875, "learning_rate": 0.0019500910018730253, "loss": 3.1225, "step": 1840 }, { "epoch": 0.12807401996591186, "grad_norm": 0.86328125, "learning_rate": 0.0019500206799652386, "loss": 3.3034, "step": 1841 }, { "epoch": 0.12814358760304706, "grad_norm": 1.0703125, "learning_rate": 0.0019499503098201908, "loss": 3.1774, "step": 1842 }, { "epoch": 0.12821315524018226, "grad_norm": 0.79296875, "learning_rate": 0.001949879891441454, "loss": 2.9553, "step": 1843 }, { "epoch": 0.12828272287731748, "grad_norm": 0.73046875, "learning_rate": 0.0019498094248326043, "loss": 3.0137, "step": 1844 }, { "epoch": 0.12835229051445268, "grad_norm": 0.87109375, "learning_rate": 0.0019497389099972192, "loss": 3.063, "step": 1845 }, { "epoch": 0.12842185815158788, "grad_norm": 1.078125, "learning_rate": 0.0019496683469388794, "loss": 3.5581, "step": 1846 }, { "epoch": 0.12849142578872308, "grad_norm": 0.92578125, "learning_rate": 0.0019495977356611674, "loss": 3.3502, "step": 1847 }, { "epoch": 0.12856099342585828, "grad_norm": 0.80859375, "learning_rate": 0.001949527076167669, "loss": 3.3218, "step": 1848 }, { "epoch": 0.1286305610629935, "grad_norm": 0.8515625, "learning_rate": 0.0019494563684619715, "loss": 3.4957, "step": 1849 }, { "epoch": 0.1287001287001287, "grad_norm": 1.09375, "learning_rate": 0.0019493856125476652, "loss": 3.7629, "step": 1850 }, { "epoch": 0.1287696963372639, "grad_norm": 1.3828125, "learning_rate": 0.0019493148084283427, "loss": 3.0125, "step": 1851 }, { "epoch": 0.1288392639743991, "grad_norm": 1.09375, "learning_rate": 0.0019492439561075994, "loss": 3.1882, "step": 1852 }, { "epoch": 0.12890883161153432, "grad_norm": 0.74609375, "learning_rate": 0.0019491730555890323, "loss": 3.1547, "step": 1853 }, { "epoch": 0.12897839924866952, "grad_norm": 0.8125, "learning_rate": 0.0019491021068762417, "loss": 3.2827, "step": 1854 }, { "epoch": 0.12904796688580472, "grad_norm": 1.0703125, "learning_rate": 0.00194903110997283, "loss": 3.4886, "step": 1855 }, { "epoch": 0.12911753452293992, "grad_norm": 0.73828125, "learning_rate": 0.001948960064882402, "loss": 3.4038, "step": 1856 }, { "epoch": 0.12918710216007515, "grad_norm": 0.76953125, "learning_rate": 0.0019488889716085648, "loss": 3.119, "step": 1857 }, { "epoch": 0.12925666979721034, "grad_norm": 0.83984375, "learning_rate": 0.0019488178301549286, "loss": 3.3197, "step": 1858 }, { "epoch": 0.12932623743434554, "grad_norm": 0.73828125, "learning_rate": 0.0019487466405251053, "loss": 3.5417, "step": 1859 }, { "epoch": 0.12939580507148074, "grad_norm": 1.1796875, "learning_rate": 0.0019486754027227098, "loss": 3.174, "step": 1860 }, { "epoch": 0.12946537270861594, "grad_norm": 0.76953125, "learning_rate": 0.0019486041167513588, "loss": 3.2131, "step": 1861 }, { "epoch": 0.12953494034575117, "grad_norm": 0.72265625, "learning_rate": 0.0019485327826146723, "loss": 3.4978, "step": 1862 }, { "epoch": 0.12960450798288636, "grad_norm": 0.67578125, "learning_rate": 0.0019484614003162717, "loss": 3.408, "step": 1863 }, { "epoch": 0.12967407562002156, "grad_norm": 0.6953125, "learning_rate": 0.0019483899698597821, "loss": 3.3175, "step": 1864 }, { "epoch": 0.12974364325715676, "grad_norm": 0.9296875, "learning_rate": 0.0019483184912488301, "loss": 2.8108, "step": 1865 }, { "epoch": 0.12981321089429199, "grad_norm": 0.6875, "learning_rate": 0.001948246964487045, "loss": 3.226, "step": 1866 }, { "epoch": 0.12988277853142718, "grad_norm": 0.796875, "learning_rate": 0.0019481753895780583, "loss": 3.2306, "step": 1867 }, { "epoch": 0.12995234616856238, "grad_norm": 0.7265625, "learning_rate": 0.0019481037665255046, "loss": 3.292, "step": 1868 }, { "epoch": 0.13002191380569758, "grad_norm": 0.7265625, "learning_rate": 0.0019480320953330205, "loss": 3.6482, "step": 1869 }, { "epoch": 0.1300914814428328, "grad_norm": 0.8515625, "learning_rate": 0.0019479603760042448, "loss": 3.6202, "step": 1870 }, { "epoch": 0.130161049079968, "grad_norm": 0.9296875, "learning_rate": 0.0019478886085428195, "loss": 2.8485, "step": 1871 }, { "epoch": 0.1302306167171032, "grad_norm": 0.95703125, "learning_rate": 0.0019478167929523884, "loss": 3.5329, "step": 1872 }, { "epoch": 0.1303001843542384, "grad_norm": 0.83984375, "learning_rate": 0.0019477449292365978, "loss": 3.2885, "step": 1873 }, { "epoch": 0.1303697519913736, "grad_norm": 1.1796875, "learning_rate": 0.001947673017399097, "loss": 3.0415, "step": 1874 }, { "epoch": 0.13043931962850883, "grad_norm": 0.890625, "learning_rate": 0.0019476010574435364, "loss": 3.4042, "step": 1875 }, { "epoch": 0.13050888726564402, "grad_norm": 0.66796875, "learning_rate": 0.001947529049373571, "loss": 3.4087, "step": 1876 }, { "epoch": 0.13057845490277922, "grad_norm": 0.7265625, "learning_rate": 0.0019474569931928558, "loss": 3.4739, "step": 1877 }, { "epoch": 0.13064802253991442, "grad_norm": 0.859375, "learning_rate": 0.0019473848889050504, "loss": 3.1786, "step": 1878 }, { "epoch": 0.13071759017704965, "grad_norm": 0.8515625, "learning_rate": 0.0019473127365138155, "loss": 3.2386, "step": 1879 }, { "epoch": 0.13078715781418485, "grad_norm": 0.890625, "learning_rate": 0.0019472405360228145, "loss": 3.5112, "step": 1880 }, { "epoch": 0.13085672545132004, "grad_norm": 0.98828125, "learning_rate": 0.0019471682874357135, "loss": 3.318, "step": 1881 }, { "epoch": 0.13092629308845524, "grad_norm": 0.80078125, "learning_rate": 0.0019470959907561811, "loss": 3.6096, "step": 1882 }, { "epoch": 0.13099586072559047, "grad_norm": 0.875, "learning_rate": 0.0019470236459878877, "loss": 2.9816, "step": 1883 }, { "epoch": 0.13106542836272567, "grad_norm": 0.78125, "learning_rate": 0.0019469512531345072, "loss": 2.9925, "step": 1884 }, { "epoch": 0.13113499599986086, "grad_norm": 0.9765625, "learning_rate": 0.001946878812199715, "loss": 3.3357, "step": 1885 }, { "epoch": 0.13120456363699606, "grad_norm": 0.82421875, "learning_rate": 0.0019468063231871896, "loss": 3.3221, "step": 1886 }, { "epoch": 0.13127413127413126, "grad_norm": 0.9375, "learning_rate": 0.001946733786100611, "loss": 3.0446, "step": 1887 }, { "epoch": 0.1313436989112665, "grad_norm": 0.94140625, "learning_rate": 0.0019466612009436627, "loss": 3.4744, "step": 1888 }, { "epoch": 0.13141326654840169, "grad_norm": 0.7890625, "learning_rate": 0.00194658856772003, "loss": 3.3944, "step": 1889 }, { "epoch": 0.13148283418553688, "grad_norm": 1.0078125, "learning_rate": 0.001946515886433401, "loss": 3.1389, "step": 1890 }, { "epoch": 0.13155240182267208, "grad_norm": 1.0390625, "learning_rate": 0.0019464431570874665, "loss": 3.3595, "step": 1891 }, { "epoch": 0.1316219694598073, "grad_norm": 0.76171875, "learning_rate": 0.0019463703796859188, "loss": 3.3208, "step": 1892 }, { "epoch": 0.1316915370969425, "grad_norm": 0.69921875, "learning_rate": 0.001946297554232453, "loss": 3.5098, "step": 1893 }, { "epoch": 0.1317611047340777, "grad_norm": 0.8125, "learning_rate": 0.0019462246807307672, "loss": 3.2446, "step": 1894 }, { "epoch": 0.1318306723712129, "grad_norm": 0.6640625, "learning_rate": 0.0019461517591845615, "loss": 3.3563, "step": 1895 }, { "epoch": 0.13190024000834813, "grad_norm": 0.87109375, "learning_rate": 0.0019460787895975386, "loss": 3.5702, "step": 1896 }, { "epoch": 0.13196980764548333, "grad_norm": 0.83984375, "learning_rate": 0.001946005771973403, "loss": 3.3963, "step": 1897 }, { "epoch": 0.13203937528261853, "grad_norm": 0.9140625, "learning_rate": 0.0019459327063158628, "loss": 3.2952, "step": 1898 }, { "epoch": 0.13210894291975372, "grad_norm": 0.95703125, "learning_rate": 0.0019458595926286272, "loss": 3.0189, "step": 1899 }, { "epoch": 0.13217851055688892, "grad_norm": 0.59375, "learning_rate": 0.0019457864309154094, "loss": 3.6055, "step": 1900 }, { "epoch": 0.13224807819402415, "grad_norm": 0.83984375, "learning_rate": 0.0019457132211799235, "loss": 2.9506, "step": 1901 }, { "epoch": 0.13231764583115935, "grad_norm": 0.9453125, "learning_rate": 0.0019456399634258871, "loss": 3.2754, "step": 1902 }, { "epoch": 0.13238721346829455, "grad_norm": 0.8125, "learning_rate": 0.00194556665765702, "loss": 3.2409, "step": 1903 }, { "epoch": 0.13245678110542974, "grad_norm": 0.94921875, "learning_rate": 0.0019454933038770435, "loss": 3.1072, "step": 1904 }, { "epoch": 0.13252634874256497, "grad_norm": 0.9296875, "learning_rate": 0.001945419902089683, "loss": 2.9871, "step": 1905 }, { "epoch": 0.13259591637970017, "grad_norm": 0.72265625, "learning_rate": 0.001945346452298665, "loss": 3.0467, "step": 1906 }, { "epoch": 0.13266548401683537, "grad_norm": 0.80859375, "learning_rate": 0.0019452729545077192, "loss": 3.4325, "step": 1907 }, { "epoch": 0.13273505165397056, "grad_norm": 0.83984375, "learning_rate": 0.001945199408720577, "loss": 3.1134, "step": 1908 }, { "epoch": 0.1328046192911058, "grad_norm": 1.0546875, "learning_rate": 0.0019451258149409735, "loss": 3.4843, "step": 1909 }, { "epoch": 0.132874186928241, "grad_norm": 0.78515625, "learning_rate": 0.0019450521731726447, "loss": 3.384, "step": 1910 }, { "epoch": 0.1329437545653762, "grad_norm": 0.96875, "learning_rate": 0.0019449784834193297, "loss": 3.2625, "step": 1911 }, { "epoch": 0.13301332220251139, "grad_norm": 0.796875, "learning_rate": 0.0019449047456847706, "loss": 3.519, "step": 1912 }, { "epoch": 0.13308288983964658, "grad_norm": 0.94921875, "learning_rate": 0.0019448309599727112, "loss": 3.3985, "step": 1913 }, { "epoch": 0.1331524574767818, "grad_norm": 0.72265625, "learning_rate": 0.001944757126286898, "loss": 3.4797, "step": 1914 }, { "epoch": 0.133222025113917, "grad_norm": 0.9375, "learning_rate": 0.0019446832446310793, "loss": 3.2945, "step": 1915 }, { "epoch": 0.1332915927510522, "grad_norm": 0.859375, "learning_rate": 0.0019446093150090075, "loss": 3.3423, "step": 1916 }, { "epoch": 0.1333611603881874, "grad_norm": 0.828125, "learning_rate": 0.0019445353374244359, "loss": 3.6269, "step": 1917 }, { "epoch": 0.13343072802532263, "grad_norm": 0.7421875, "learning_rate": 0.0019444613118811205, "loss": 3.362, "step": 1918 }, { "epoch": 0.13350029566245783, "grad_norm": 0.9453125, "learning_rate": 0.0019443872383828203, "loss": 3.3338, "step": 1919 }, { "epoch": 0.13356986329959303, "grad_norm": 0.89453125, "learning_rate": 0.0019443131169332962, "loss": 3.3511, "step": 1920 }, { "epoch": 0.13363943093672823, "grad_norm": 0.953125, "learning_rate": 0.0019442389475363116, "loss": 3.5065, "step": 1921 }, { "epoch": 0.13370899857386345, "grad_norm": 0.9453125, "learning_rate": 0.0019441647301956324, "loss": 3.3673, "step": 1922 }, { "epoch": 0.13377856621099865, "grad_norm": 0.90234375, "learning_rate": 0.0019440904649150276, "loss": 3.0948, "step": 1923 }, { "epoch": 0.13384813384813385, "grad_norm": 0.8984375, "learning_rate": 0.0019440161516982668, "loss": 3.6924, "step": 1924 }, { "epoch": 0.13391770148526905, "grad_norm": 0.78515625, "learning_rate": 0.0019439417905491247, "loss": 3.0385, "step": 1925 }, { "epoch": 0.13398726912240425, "grad_norm": 0.9140625, "learning_rate": 0.0019438673814713761, "loss": 3.1771, "step": 1926 }, { "epoch": 0.13405683675953947, "grad_norm": 1.109375, "learning_rate": 0.001943792924468799, "loss": 3.7504, "step": 1927 }, { "epoch": 0.13412640439667467, "grad_norm": 0.875, "learning_rate": 0.0019437184195451747, "loss": 3.4519, "step": 1928 }, { "epoch": 0.13419597203380987, "grad_norm": 0.78125, "learning_rate": 0.0019436438667042855, "loss": 3.4553, "step": 1929 }, { "epoch": 0.13426553967094507, "grad_norm": 0.7734375, "learning_rate": 0.0019435692659499173, "loss": 3.4122, "step": 1930 }, { "epoch": 0.1343351073080803, "grad_norm": 0.8046875, "learning_rate": 0.0019434946172858577, "loss": 3.4642, "step": 1931 }, { "epoch": 0.1344046749452155, "grad_norm": 0.69140625, "learning_rate": 0.0019434199207158968, "loss": 3.3712, "step": 1932 }, { "epoch": 0.1344742425823507, "grad_norm": 0.8046875, "learning_rate": 0.0019433451762438274, "loss": 3.2983, "step": 1933 }, { "epoch": 0.1345438102194859, "grad_norm": 0.77734375, "learning_rate": 0.0019432703838734452, "loss": 3.5689, "step": 1934 }, { "epoch": 0.1346133778566211, "grad_norm": 0.6875, "learning_rate": 0.0019431955436085468, "loss": 3.0071, "step": 1935 }, { "epoch": 0.1346829454937563, "grad_norm": 0.78515625, "learning_rate": 0.0019431206554529333, "loss": 3.5376, "step": 1936 }, { "epoch": 0.1347525131308915, "grad_norm": 0.7265625, "learning_rate": 0.0019430457194104063, "loss": 3.5035, "step": 1937 }, { "epoch": 0.1348220807680267, "grad_norm": 0.64453125, "learning_rate": 0.0019429707354847712, "loss": 3.1914, "step": 1938 }, { "epoch": 0.1348916484051619, "grad_norm": 0.6484375, "learning_rate": 0.0019428957036798347, "loss": 3.9197, "step": 1939 }, { "epoch": 0.13496121604229713, "grad_norm": 0.84375, "learning_rate": 0.001942820623999407, "loss": 3.2672, "step": 1940 }, { "epoch": 0.13503078367943233, "grad_norm": 0.8125, "learning_rate": 0.0019427454964473006, "loss": 3.0891, "step": 1941 }, { "epoch": 0.13510035131656753, "grad_norm": 1.171875, "learning_rate": 0.0019426703210273294, "loss": 2.8527, "step": 1942 }, { "epoch": 0.13516991895370273, "grad_norm": 0.78125, "learning_rate": 0.0019425950977433105, "loss": 2.8761, "step": 1943 }, { "epoch": 0.13523948659083795, "grad_norm": 0.73828125, "learning_rate": 0.0019425198265990637, "loss": 3.564, "step": 1944 }, { "epoch": 0.13530905422797315, "grad_norm": 0.80859375, "learning_rate": 0.001942444507598411, "loss": 3.5785, "step": 1945 }, { "epoch": 0.13537862186510835, "grad_norm": 0.890625, "learning_rate": 0.0019423691407451761, "loss": 3.4398, "step": 1946 }, { "epoch": 0.13544818950224355, "grad_norm": 0.8125, "learning_rate": 0.0019422937260431864, "loss": 3.2611, "step": 1947 }, { "epoch": 0.13551775713937875, "grad_norm": 0.65234375, "learning_rate": 0.0019422182634962707, "loss": 3.4193, "step": 1948 }, { "epoch": 0.13558732477651397, "grad_norm": 0.76171875, "learning_rate": 0.0019421427531082606, "loss": 3.2257, "step": 1949 }, { "epoch": 0.13565689241364917, "grad_norm": 0.71484375, "learning_rate": 0.0019420671948829904, "loss": 3.2713, "step": 1950 }, { "epoch": 0.13572646005078437, "grad_norm": 0.7421875, "learning_rate": 0.0019419915888242963, "loss": 3.2238, "step": 1951 }, { "epoch": 0.13579602768791957, "grad_norm": 1.0234375, "learning_rate": 0.0019419159349360173, "loss": 3.5063, "step": 1952 }, { "epoch": 0.1358655953250548, "grad_norm": 0.62109375, "learning_rate": 0.0019418402332219951, "loss": 3.765, "step": 1953 }, { "epoch": 0.13593516296219, "grad_norm": 0.64453125, "learning_rate": 0.0019417644836860727, "loss": 3.1648, "step": 1954 }, { "epoch": 0.1360047305993252, "grad_norm": 0.89453125, "learning_rate": 0.0019416886863320968, "loss": 3.2006, "step": 1955 }, { "epoch": 0.1360742982364604, "grad_norm": 0.703125, "learning_rate": 0.001941612841163916, "loss": 3.1752, "step": 1956 }, { "epoch": 0.13614386587359562, "grad_norm": 0.92578125, "learning_rate": 0.0019415369481853811, "loss": 3.1891, "step": 1957 }, { "epoch": 0.1362134335107308, "grad_norm": 0.85546875, "learning_rate": 0.0019414610074003455, "loss": 3.3473, "step": 1958 }, { "epoch": 0.136283001147866, "grad_norm": 0.8515625, "learning_rate": 0.001941385018812665, "loss": 3.1638, "step": 1959 }, { "epoch": 0.1363525687850012, "grad_norm": 1.046875, "learning_rate": 0.0019413089824261989, "loss": 3.2753, "step": 1960 }, { "epoch": 0.1364221364221364, "grad_norm": 0.94921875, "learning_rate": 0.0019412328982448069, "loss": 3.0721, "step": 1961 }, { "epoch": 0.13649170405927163, "grad_norm": 0.8984375, "learning_rate": 0.0019411567662723523, "loss": 3.5696, "step": 1962 }, { "epoch": 0.13656127169640683, "grad_norm": 0.83984375, "learning_rate": 0.001941080586512701, "loss": 3.1816, "step": 1963 }, { "epoch": 0.13663083933354203, "grad_norm": 0.83203125, "learning_rate": 0.001941004358969721, "loss": 3.2473, "step": 1964 }, { "epoch": 0.13670040697067723, "grad_norm": 0.77734375, "learning_rate": 0.0019409280836472829, "loss": 3.4571, "step": 1965 }, { "epoch": 0.13676997460781246, "grad_norm": 0.828125, "learning_rate": 0.0019408517605492592, "loss": 3.5023, "step": 1966 }, { "epoch": 0.13683954224494765, "grad_norm": 0.8515625, "learning_rate": 0.001940775389679525, "loss": 3.7084, "step": 1967 }, { "epoch": 0.13690910988208285, "grad_norm": 0.90234375, "learning_rate": 0.0019406989710419587, "loss": 3.5293, "step": 1968 }, { "epoch": 0.13697867751921805, "grad_norm": 0.89453125, "learning_rate": 0.00194062250464044, "loss": 3.2683, "step": 1969 }, { "epoch": 0.13704824515635328, "grad_norm": 0.84375, "learning_rate": 0.0019405459904788516, "loss": 3.4587, "step": 1970 }, { "epoch": 0.13711781279348847, "grad_norm": 1.0703125, "learning_rate": 0.0019404694285610783, "loss": 2.7896, "step": 1971 }, { "epoch": 0.13718738043062367, "grad_norm": 0.82421875, "learning_rate": 0.0019403928188910082, "loss": 3.2769, "step": 1972 }, { "epoch": 0.13725694806775887, "grad_norm": 1.2890625, "learning_rate": 0.00194031616147253, "loss": 3.0715, "step": 1973 }, { "epoch": 0.13732651570489407, "grad_norm": 1.0546875, "learning_rate": 0.0019402394563095373, "loss": 3.2322, "step": 1974 }, { "epoch": 0.1373960833420293, "grad_norm": 0.8828125, "learning_rate": 0.001940162703405924, "loss": 3.543, "step": 1975 }, { "epoch": 0.1374656509791645, "grad_norm": 0.79296875, "learning_rate": 0.0019400859027655876, "loss": 3.3874, "step": 1976 }, { "epoch": 0.1375352186162997, "grad_norm": 0.73046875, "learning_rate": 0.0019400090543924271, "loss": 3.2165, "step": 1977 }, { "epoch": 0.1376047862534349, "grad_norm": 0.80078125, "learning_rate": 0.0019399321582903451, "loss": 2.9563, "step": 1978 }, { "epoch": 0.13767435389057012, "grad_norm": 0.71484375, "learning_rate": 0.0019398552144632454, "loss": 3.2973, "step": 1979 }, { "epoch": 0.13774392152770532, "grad_norm": 0.7265625, "learning_rate": 0.0019397782229150355, "loss": 3.3801, "step": 1980 }, { "epoch": 0.1378134891648405, "grad_norm": 0.94140625, "learning_rate": 0.001939701183649624, "loss": 3.2854, "step": 1981 }, { "epoch": 0.1378830568019757, "grad_norm": 0.75390625, "learning_rate": 0.0019396240966709226, "loss": 3.5824, "step": 1982 }, { "epoch": 0.13795262443911094, "grad_norm": 0.84375, "learning_rate": 0.001939546961982846, "loss": 3.2963, "step": 1983 }, { "epoch": 0.13802219207624614, "grad_norm": 0.828125, "learning_rate": 0.0019394697795893103, "loss": 3.2933, "step": 1984 }, { "epoch": 0.13809175971338133, "grad_norm": 0.9765625, "learning_rate": 0.0019393925494942345, "loss": 3.187, "step": 1985 }, { "epoch": 0.13816132735051653, "grad_norm": 0.68359375, "learning_rate": 0.0019393152717015396, "loss": 3.6, "step": 1986 }, { "epoch": 0.13823089498765173, "grad_norm": 0.76171875, "learning_rate": 0.0019392379462151502, "loss": 3.9195, "step": 1987 }, { "epoch": 0.13830046262478696, "grad_norm": 0.8046875, "learning_rate": 0.0019391605730389916, "loss": 3.3988, "step": 1988 }, { "epoch": 0.13837003026192216, "grad_norm": 0.859375, "learning_rate": 0.0019390831521769929, "loss": 3.3932, "step": 1989 }, { "epoch": 0.13843959789905735, "grad_norm": 0.7734375, "learning_rate": 0.0019390056836330852, "loss": 3.1819, "step": 1990 }, { "epoch": 0.13850916553619255, "grad_norm": 0.77734375, "learning_rate": 0.0019389281674112018, "loss": 3.409, "step": 1991 }, { "epoch": 0.13857873317332778, "grad_norm": 0.75390625, "learning_rate": 0.0019388506035152785, "loss": 3.4284, "step": 1992 }, { "epoch": 0.13864830081046298, "grad_norm": 0.91796875, "learning_rate": 0.0019387729919492541, "loss": 3.3189, "step": 1993 }, { "epoch": 0.13871786844759817, "grad_norm": 0.99609375, "learning_rate": 0.0019386953327170684, "loss": 3.356, "step": 1994 }, { "epoch": 0.13878743608473337, "grad_norm": 0.765625, "learning_rate": 0.0019386176258226653, "loss": 3.1132, "step": 1995 }, { "epoch": 0.1388570037218686, "grad_norm": 0.93359375, "learning_rate": 0.00193853987126999, "loss": 3.3558, "step": 1996 }, { "epoch": 0.1389265713590038, "grad_norm": 0.7578125, "learning_rate": 0.0019384620690629907, "loss": 3.2451, "step": 1997 }, { "epoch": 0.138996138996139, "grad_norm": 0.60546875, "learning_rate": 0.001938384219205618, "loss": 3.4747, "step": 1998 }, { "epoch": 0.1390657066332742, "grad_norm": 0.80078125, "learning_rate": 0.0019383063217018241, "loss": 2.8982, "step": 1999 }, { "epoch": 0.1391352742704094, "grad_norm": 0.74609375, "learning_rate": 0.0019382283765555651, "loss": 2.9246, "step": 2000 }, { "epoch": 0.13920484190754462, "grad_norm": 0.79296875, "learning_rate": 0.0019381503837707977, "loss": 3.1143, "step": 2001 }, { "epoch": 0.13927440954467982, "grad_norm": 0.77734375, "learning_rate": 0.0019380723433514823, "loss": 3.2632, "step": 2002 }, { "epoch": 0.13934397718181502, "grad_norm": 0.87109375, "learning_rate": 0.001937994255301582, "loss": 3.4156, "step": 2003 }, { "epoch": 0.1394135448189502, "grad_norm": 0.59375, "learning_rate": 0.001937916119625061, "loss": 3.0436, "step": 2004 }, { "epoch": 0.13948311245608544, "grad_norm": 0.953125, "learning_rate": 0.001937837936325887, "loss": 3.1214, "step": 2005 }, { "epoch": 0.13955268009322064, "grad_norm": 0.8515625, "learning_rate": 0.0019377597054080296, "loss": 3.0121, "step": 2006 }, { "epoch": 0.13962224773035584, "grad_norm": 0.85546875, "learning_rate": 0.0019376814268754609, "loss": 3.0581, "step": 2007 }, { "epoch": 0.13969181536749103, "grad_norm": 0.94140625, "learning_rate": 0.0019376031007321557, "loss": 3.3915, "step": 2008 }, { "epoch": 0.13976138300462626, "grad_norm": 0.77734375, "learning_rate": 0.001937524726982091, "loss": 3.26, "step": 2009 }, { "epoch": 0.13983095064176146, "grad_norm": 1.140625, "learning_rate": 0.0019374463056292459, "loss": 3.6236, "step": 2010 }, { "epoch": 0.13990051827889666, "grad_norm": 0.71875, "learning_rate": 0.0019373678366776028, "loss": 3.1375, "step": 2011 }, { "epoch": 0.13997008591603186, "grad_norm": 0.70703125, "learning_rate": 0.0019372893201311454, "loss": 3.4221, "step": 2012 }, { "epoch": 0.14003965355316705, "grad_norm": 1.0859375, "learning_rate": 0.0019372107559938608, "loss": 3.3759, "step": 2013 }, { "epoch": 0.14010922119030228, "grad_norm": 0.875, "learning_rate": 0.001937132144269738, "loss": 3.4701, "step": 2014 }, { "epoch": 0.14017878882743748, "grad_norm": 1.1484375, "learning_rate": 0.0019370534849627679, "loss": 3.1729, "step": 2015 }, { "epoch": 0.14024835646457268, "grad_norm": 0.84765625, "learning_rate": 0.0019369747780769453, "loss": 2.7963, "step": 2016 }, { "epoch": 0.14031792410170787, "grad_norm": 0.80078125, "learning_rate": 0.0019368960236162663, "loss": 3.4503, "step": 2017 }, { "epoch": 0.1403874917388431, "grad_norm": 0.9765625, "learning_rate": 0.0019368172215847293, "loss": 3.3126, "step": 2018 }, { "epoch": 0.1404570593759783, "grad_norm": 0.80859375, "learning_rate": 0.0019367383719863355, "loss": 3.8492, "step": 2019 }, { "epoch": 0.1405266270131135, "grad_norm": 0.69921875, "learning_rate": 0.0019366594748250893, "loss": 3.3776, "step": 2020 }, { "epoch": 0.1405961946502487, "grad_norm": 0.69921875, "learning_rate": 0.0019365805301049955, "loss": 3.2232, "step": 2021 }, { "epoch": 0.14066576228738392, "grad_norm": 0.73046875, "learning_rate": 0.0019365015378300632, "loss": 3.2879, "step": 2022 }, { "epoch": 0.14073532992451912, "grad_norm": 0.85546875, "learning_rate": 0.0019364224980043033, "loss": 3.2998, "step": 2023 }, { "epoch": 0.14080489756165432, "grad_norm": 0.98828125, "learning_rate": 0.0019363434106317288, "loss": 3.2886, "step": 2024 }, { "epoch": 0.14087446519878952, "grad_norm": 0.9921875, "learning_rate": 0.0019362642757163556, "loss": 3.2717, "step": 2025 }, { "epoch": 0.14094403283592472, "grad_norm": 1.0390625, "learning_rate": 0.0019361850932622011, "loss": 3.119, "step": 2026 }, { "epoch": 0.14101360047305994, "grad_norm": 0.7421875, "learning_rate": 0.0019361058632732867, "loss": 3.2256, "step": 2027 }, { "epoch": 0.14108316811019514, "grad_norm": 1.0234375, "learning_rate": 0.001936026585753635, "loss": 2.9657, "step": 2028 }, { "epoch": 0.14115273574733034, "grad_norm": 1.1015625, "learning_rate": 0.001935947260707271, "loss": 2.9658, "step": 2029 }, { "epoch": 0.14122230338446554, "grad_norm": 0.82421875, "learning_rate": 0.0019358678881382227, "loss": 3.5397, "step": 2030 }, { "epoch": 0.14129187102160076, "grad_norm": 1.03125, "learning_rate": 0.0019357884680505197, "loss": 3.3371, "step": 2031 }, { "epoch": 0.14136143865873596, "grad_norm": 0.7109375, "learning_rate": 0.0019357090004481954, "loss": 3.7608, "step": 2032 }, { "epoch": 0.14143100629587116, "grad_norm": 0.87109375, "learning_rate": 0.0019356294853352845, "loss": 3.2105, "step": 2033 }, { "epoch": 0.14150057393300636, "grad_norm": 0.73046875, "learning_rate": 0.0019355499227158243, "loss": 3.3079, "step": 2034 }, { "epoch": 0.14157014157014158, "grad_norm": 0.66015625, "learning_rate": 0.0019354703125938543, "loss": 3.2313, "step": 2035 }, { "epoch": 0.14163970920727678, "grad_norm": 0.7578125, "learning_rate": 0.001935390654973417, "loss": 3.4517, "step": 2036 }, { "epoch": 0.14170927684441198, "grad_norm": 0.7578125, "learning_rate": 0.001935310949858557, "loss": 2.969, "step": 2037 }, { "epoch": 0.14177884448154718, "grad_norm": 0.72265625, "learning_rate": 0.0019352311972533212, "loss": 3.5515, "step": 2038 }, { "epoch": 0.14184841211868238, "grad_norm": 0.81640625, "learning_rate": 0.0019351513971617594, "loss": 3.2527, "step": 2039 }, { "epoch": 0.1419179797558176, "grad_norm": 0.94140625, "learning_rate": 0.001935071549587923, "loss": 3.3396, "step": 2040 }, { "epoch": 0.1419875473929528, "grad_norm": 0.87890625, "learning_rate": 0.001934991654535866, "loss": 3.0555, "step": 2041 }, { "epoch": 0.142057115030088, "grad_norm": 0.76171875, "learning_rate": 0.001934911712009646, "loss": 3.5917, "step": 2042 }, { "epoch": 0.1421266826672232, "grad_norm": 0.859375, "learning_rate": 0.0019348317220133217, "loss": 3.4887, "step": 2043 }, { "epoch": 0.14219625030435842, "grad_norm": 0.93359375, "learning_rate": 0.001934751684550954, "loss": 3.6032, "step": 2044 }, { "epoch": 0.14226581794149362, "grad_norm": 0.84765625, "learning_rate": 0.0019346715996266073, "loss": 3.1755, "step": 2045 }, { "epoch": 0.14233538557862882, "grad_norm": 0.90234375, "learning_rate": 0.0019345914672443483, "loss": 3.165, "step": 2046 }, { "epoch": 0.14240495321576402, "grad_norm": 0.93359375, "learning_rate": 0.0019345112874082449, "loss": 2.8039, "step": 2047 }, { "epoch": 0.14247452085289924, "grad_norm": 0.828125, "learning_rate": 0.0019344310601223686, "loss": 2.9524, "step": 2048 }, { "epoch": 0.14254408849003444, "grad_norm": 0.69921875, "learning_rate": 0.001934350785390793, "loss": 3.5818, "step": 2049 }, { "epoch": 0.14261365612716964, "grad_norm": 0.91015625, "learning_rate": 0.0019342704632175944, "loss": 3.5404, "step": 2050 }, { "epoch": 0.14268322376430484, "grad_norm": 0.76171875, "learning_rate": 0.0019341900936068503, "loss": 3.0195, "step": 2051 }, { "epoch": 0.14275279140144004, "grad_norm": 0.78515625, "learning_rate": 0.001934109676562642, "loss": 3.166, "step": 2052 }, { "epoch": 0.14282235903857526, "grad_norm": 0.99609375, "learning_rate": 0.0019340292120890524, "loss": 3.1962, "step": 2053 }, { "epoch": 0.14289192667571046, "grad_norm": 1.359375, "learning_rate": 0.0019339487001901676, "loss": 3.3675, "step": 2054 }, { "epoch": 0.14296149431284566, "grad_norm": 0.8359375, "learning_rate": 0.0019338681408700752, "loss": 3.3433, "step": 2055 }, { "epoch": 0.14303106194998086, "grad_norm": 0.953125, "learning_rate": 0.0019337875341328655, "loss": 3.031, "step": 2056 }, { "epoch": 0.14310062958711608, "grad_norm": 0.98046875, "learning_rate": 0.0019337068799826316, "loss": 3.0238, "step": 2057 }, { "epoch": 0.14317019722425128, "grad_norm": 0.7421875, "learning_rate": 0.0019336261784234684, "loss": 3.3712, "step": 2058 }, { "epoch": 0.14323976486138648, "grad_norm": 0.765625, "learning_rate": 0.001933545429459474, "loss": 3.4989, "step": 2059 }, { "epoch": 0.14330933249852168, "grad_norm": 0.74609375, "learning_rate": 0.0019334646330947476, "loss": 3.2556, "step": 2060 }, { "epoch": 0.1433789001356569, "grad_norm": 0.875, "learning_rate": 0.0019333837893333926, "loss": 3.0214, "step": 2061 }, { "epoch": 0.1434484677727921, "grad_norm": 1.1015625, "learning_rate": 0.0019333028981795132, "loss": 3.1768, "step": 2062 }, { "epoch": 0.1435180354099273, "grad_norm": 0.765625, "learning_rate": 0.0019332219596372166, "loss": 3.4896, "step": 2063 }, { "epoch": 0.1435876030470625, "grad_norm": 0.86328125, "learning_rate": 0.0019331409737106129, "loss": 2.6882, "step": 2064 }, { "epoch": 0.1436571706841977, "grad_norm": 0.89453125, "learning_rate": 0.001933059940403814, "loss": 3.2502, "step": 2065 }, { "epoch": 0.14372673832133293, "grad_norm": 1.125, "learning_rate": 0.0019329788597209343, "loss": 3.2372, "step": 2066 }, { "epoch": 0.14379630595846812, "grad_norm": 0.8125, "learning_rate": 0.0019328977316660906, "loss": 3.2938, "step": 2067 }, { "epoch": 0.14386587359560332, "grad_norm": 0.7734375, "learning_rate": 0.0019328165562434024, "loss": 3.1811, "step": 2068 }, { "epoch": 0.14393544123273852, "grad_norm": 0.84375, "learning_rate": 0.001932735333456991, "loss": 3.4409, "step": 2069 }, { "epoch": 0.14400500886987375, "grad_norm": 0.859375, "learning_rate": 0.0019326540633109808, "loss": 3.1112, "step": 2070 }, { "epoch": 0.14407457650700894, "grad_norm": 1.1640625, "learning_rate": 0.0019325727458094982, "loss": 3.1347, "step": 2071 }, { "epoch": 0.14414414414414414, "grad_norm": 0.89453125, "learning_rate": 0.0019324913809566717, "loss": 3.1182, "step": 2072 }, { "epoch": 0.14421371178127934, "grad_norm": 0.96875, "learning_rate": 0.0019324099687566335, "loss": 3.2554, "step": 2073 }, { "epoch": 0.14428327941841457, "grad_norm": 1.078125, "learning_rate": 0.0019323285092135167, "loss": 2.8964, "step": 2074 }, { "epoch": 0.14435284705554977, "grad_norm": 0.98828125, "learning_rate": 0.0019322470023314573, "loss": 3.049, "step": 2075 }, { "epoch": 0.14442241469268496, "grad_norm": 0.74609375, "learning_rate": 0.001932165448114594, "loss": 2.8512, "step": 2076 }, { "epoch": 0.14449198232982016, "grad_norm": 0.828125, "learning_rate": 0.0019320838465670678, "loss": 3.2504, "step": 2077 }, { "epoch": 0.14456154996695536, "grad_norm": 0.67578125, "learning_rate": 0.001932002197693022, "loss": 3.0084, "step": 2078 }, { "epoch": 0.1446311176040906, "grad_norm": 0.9296875, "learning_rate": 0.0019319205014966022, "loss": 3.415, "step": 2079 }, { "epoch": 0.14470068524122578, "grad_norm": 0.66015625, "learning_rate": 0.0019318387579819562, "loss": 3.2434, "step": 2080 }, { "epoch": 0.14477025287836098, "grad_norm": 0.97265625, "learning_rate": 0.0019317569671532353, "loss": 3.4969, "step": 2081 }, { "epoch": 0.14483982051549618, "grad_norm": 0.9140625, "learning_rate": 0.0019316751290145923, "loss": 2.9212, "step": 2082 }, { "epoch": 0.1449093881526314, "grad_norm": 0.8828125, "learning_rate": 0.0019315932435701817, "loss": 3.4919, "step": 2083 }, { "epoch": 0.1449789557897666, "grad_norm": 0.8046875, "learning_rate": 0.0019315113108241617, "loss": 3.3674, "step": 2084 }, { "epoch": 0.1450485234269018, "grad_norm": 0.7421875, "learning_rate": 0.0019314293307806927, "loss": 3.6306, "step": 2085 }, { "epoch": 0.145118091064037, "grad_norm": 0.73828125, "learning_rate": 0.0019313473034439372, "loss": 3.1649, "step": 2086 }, { "epoch": 0.1451876587011722, "grad_norm": 1.0234375, "learning_rate": 0.00193126522881806, "loss": 3.0952, "step": 2087 }, { "epoch": 0.14525722633830743, "grad_norm": 0.63671875, "learning_rate": 0.0019311831069072278, "loss": 3.3009, "step": 2088 }, { "epoch": 0.14532679397544263, "grad_norm": 0.65625, "learning_rate": 0.0019311009377156116, "loss": 2.86, "step": 2089 }, { "epoch": 0.14539636161257782, "grad_norm": 0.57421875, "learning_rate": 0.0019310187212473826, "loss": 3.2769, "step": 2090 }, { "epoch": 0.14546592924971302, "grad_norm": 0.68359375, "learning_rate": 0.0019309364575067157, "loss": 3.2831, "step": 2091 }, { "epoch": 0.14553549688684825, "grad_norm": 0.7109375, "learning_rate": 0.0019308541464977877, "loss": 3.105, "step": 2092 }, { "epoch": 0.14560506452398345, "grad_norm": 0.74609375, "learning_rate": 0.001930771788224778, "loss": 3.2699, "step": 2093 }, { "epoch": 0.14567463216111864, "grad_norm": 0.83984375, "learning_rate": 0.0019306893826918684, "loss": 3.2145, "step": 2094 }, { "epoch": 0.14574419979825384, "grad_norm": 0.56640625, "learning_rate": 0.001930606929903243, "loss": 3.4146, "step": 2095 }, { "epoch": 0.14581376743538907, "grad_norm": 0.75, "learning_rate": 0.001930524429863088, "loss": 3.2052, "step": 2096 }, { "epoch": 0.14588333507252427, "grad_norm": 0.65234375, "learning_rate": 0.0019304418825755929, "loss": 3.7529, "step": 2097 }, { "epoch": 0.14595290270965947, "grad_norm": 0.73046875, "learning_rate": 0.0019303592880449488, "loss": 3.1949, "step": 2098 }, { "epoch": 0.14602247034679466, "grad_norm": 0.9140625, "learning_rate": 0.0019302766462753493, "loss": 2.8965, "step": 2099 }, { "epoch": 0.14609203798392986, "grad_norm": 0.796875, "learning_rate": 0.0019301939572709907, "loss": 3.0809, "step": 2100 }, { "epoch": 0.1461616056210651, "grad_norm": 0.81640625, "learning_rate": 0.0019301112210360714, "loss": 3.1178, "step": 2101 }, { "epoch": 0.1462311732582003, "grad_norm": 0.8203125, "learning_rate": 0.0019300284375747925, "loss": 3.6246, "step": 2102 }, { "epoch": 0.14630074089533548, "grad_norm": 0.8671875, "learning_rate": 0.0019299456068913572, "loss": 3.0863, "step": 2103 }, { "epoch": 0.14637030853247068, "grad_norm": 0.77734375, "learning_rate": 0.0019298627289899715, "loss": 3.1766, "step": 2104 }, { "epoch": 0.1464398761696059, "grad_norm": 0.96875, "learning_rate": 0.001929779803874843, "loss": 3.4152, "step": 2105 }, { "epoch": 0.1465094438067411, "grad_norm": 0.75, "learning_rate": 0.0019296968315501823, "loss": 3.3496, "step": 2106 }, { "epoch": 0.1465790114438763, "grad_norm": 0.7734375, "learning_rate": 0.001929613812020203, "loss": 3.267, "step": 2107 }, { "epoch": 0.1466485790810115, "grad_norm": 0.8203125, "learning_rate": 0.0019295307452891195, "loss": 3.1405, "step": 2108 }, { "epoch": 0.14671814671814673, "grad_norm": 0.73828125, "learning_rate": 0.0019294476313611501, "loss": 3.1015, "step": 2109 }, { "epoch": 0.14678771435528193, "grad_norm": 0.734375, "learning_rate": 0.0019293644702405147, "loss": 2.8852, "step": 2110 }, { "epoch": 0.14685728199241713, "grad_norm": 0.80859375, "learning_rate": 0.001929281261931436, "loss": 3.3098, "step": 2111 }, { "epoch": 0.14692684962955233, "grad_norm": 0.72265625, "learning_rate": 0.0019291980064381385, "loss": 3.1474, "step": 2112 }, { "epoch": 0.14699641726668752, "grad_norm": 0.72265625, "learning_rate": 0.00192911470376485, "loss": 3.6916, "step": 2113 }, { "epoch": 0.14706598490382275, "grad_norm": 0.8046875, "learning_rate": 0.0019290313539158, "loss": 3.4483, "step": 2114 }, { "epoch": 0.14713555254095795, "grad_norm": 1.09375, "learning_rate": 0.0019289479568952203, "loss": 3.3741, "step": 2115 }, { "epoch": 0.14720512017809315, "grad_norm": 0.75390625, "learning_rate": 0.0019288645127073455, "loss": 2.9652, "step": 2116 }, { "epoch": 0.14727468781522834, "grad_norm": 0.8828125, "learning_rate": 0.0019287810213564126, "loss": 2.9294, "step": 2117 }, { "epoch": 0.14734425545236357, "grad_norm": 0.70703125, "learning_rate": 0.001928697482846661, "loss": 3.3925, "step": 2118 }, { "epoch": 0.14741382308949877, "grad_norm": 0.8046875, "learning_rate": 0.001928613897182332, "loss": 3.0946, "step": 2119 }, { "epoch": 0.14748339072663397, "grad_norm": 0.59765625, "learning_rate": 0.00192853026436767, "loss": 3.5871, "step": 2120 }, { "epoch": 0.14755295836376917, "grad_norm": 0.64453125, "learning_rate": 0.0019284465844069212, "loss": 3.4433, "step": 2121 }, { "epoch": 0.1476225260009044, "grad_norm": 0.78125, "learning_rate": 0.0019283628573043348, "loss": 3.4745, "step": 2122 }, { "epoch": 0.1476920936380396, "grad_norm": 0.6484375, "learning_rate": 0.0019282790830641616, "loss": 3.5395, "step": 2123 }, { "epoch": 0.1477616612751748, "grad_norm": 0.68359375, "learning_rate": 0.0019281952616906554, "loss": 2.7807, "step": 2124 }, { "epoch": 0.14783122891231, "grad_norm": 0.6015625, "learning_rate": 0.0019281113931880727, "loss": 3.6789, "step": 2125 }, { "epoch": 0.14790079654944518, "grad_norm": 0.64453125, "learning_rate": 0.001928027477560671, "loss": 3.056, "step": 2126 }, { "epoch": 0.1479703641865804, "grad_norm": 0.796875, "learning_rate": 0.0019279435148127117, "loss": 3.503, "step": 2127 }, { "epoch": 0.1480399318237156, "grad_norm": 0.703125, "learning_rate": 0.001927859504948458, "loss": 3.5267, "step": 2128 }, { "epoch": 0.1481094994608508, "grad_norm": 0.8046875, "learning_rate": 0.0019277754479721755, "loss": 3.436, "step": 2129 }, { "epoch": 0.148179067097986, "grad_norm": 0.78515625, "learning_rate": 0.0019276913438881316, "loss": 3.2678, "step": 2130 }, { "epoch": 0.14824863473512123, "grad_norm": 0.71484375, "learning_rate": 0.0019276071927005977, "loss": 3.2497, "step": 2131 }, { "epoch": 0.14831820237225643, "grad_norm": 0.765625, "learning_rate": 0.0019275229944138456, "loss": 3.058, "step": 2132 }, { "epoch": 0.14838777000939163, "grad_norm": 1.015625, "learning_rate": 0.0019274387490321515, "loss": 3.205, "step": 2133 }, { "epoch": 0.14845733764652683, "grad_norm": 0.81640625, "learning_rate": 0.0019273544565597918, "loss": 2.9491, "step": 2134 }, { "epoch": 0.14852690528366205, "grad_norm": 1.1484375, "learning_rate": 0.0019272701170010471, "loss": 3.448, "step": 2135 }, { "epoch": 0.14859647292079725, "grad_norm": 0.7578125, "learning_rate": 0.0019271857303602, "loss": 3.424, "step": 2136 }, { "epoch": 0.14866604055793245, "grad_norm": 0.875, "learning_rate": 0.0019271012966415345, "loss": 3.2642, "step": 2137 }, { "epoch": 0.14873560819506765, "grad_norm": 0.921875, "learning_rate": 0.001927016815849338, "loss": 3.8212, "step": 2138 }, { "epoch": 0.14880517583220285, "grad_norm": 0.875, "learning_rate": 0.0019269322879879006, "loss": 3.1188, "step": 2139 }, { "epoch": 0.14887474346933807, "grad_norm": 0.98046875, "learning_rate": 0.0019268477130615135, "loss": 3.5071, "step": 2140 }, { "epoch": 0.14894431110647327, "grad_norm": 0.74609375, "learning_rate": 0.0019267630910744708, "loss": 3.1713, "step": 2141 }, { "epoch": 0.14901387874360847, "grad_norm": 0.7109375, "learning_rate": 0.00192667842203107, "loss": 3.2658, "step": 2142 }, { "epoch": 0.14908344638074367, "grad_norm": 0.625, "learning_rate": 0.0019265937059356095, "loss": 3.1708, "step": 2143 }, { "epoch": 0.1491530140178789, "grad_norm": 0.6484375, "learning_rate": 0.0019265089427923914, "loss": 3.24, "step": 2144 }, { "epoch": 0.1492225816550141, "grad_norm": 0.67578125, "learning_rate": 0.0019264241326057189, "loss": 3.2901, "step": 2145 }, { "epoch": 0.1492921492921493, "grad_norm": 0.74609375, "learning_rate": 0.0019263392753798981, "loss": 3.1596, "step": 2146 }, { "epoch": 0.1493617169292845, "grad_norm": 1.1171875, "learning_rate": 0.0019262543711192385, "loss": 2.9588, "step": 2147 }, { "epoch": 0.14943128456641971, "grad_norm": 0.796875, "learning_rate": 0.0019261694198280503, "loss": 2.9698, "step": 2148 }, { "epoch": 0.1495008522035549, "grad_norm": 0.7890625, "learning_rate": 0.0019260844215106471, "loss": 3.4912, "step": 2149 }, { "epoch": 0.1495704198406901, "grad_norm": 0.67578125, "learning_rate": 0.0019259993761713452, "loss": 3.1958, "step": 2150 }, { "epoch": 0.1496399874778253, "grad_norm": 0.8515625, "learning_rate": 0.0019259142838144623, "loss": 2.9556, "step": 2151 }, { "epoch": 0.1497095551149605, "grad_norm": 0.7890625, "learning_rate": 0.0019258291444443187, "loss": 3.0389, "step": 2152 }, { "epoch": 0.14977912275209573, "grad_norm": 0.9296875, "learning_rate": 0.0019257439580652378, "loss": 3.3051, "step": 2153 }, { "epoch": 0.14984869038923093, "grad_norm": 0.78515625, "learning_rate": 0.0019256587246815448, "loss": 3.0117, "step": 2154 }, { "epoch": 0.14991825802636613, "grad_norm": 0.6328125, "learning_rate": 0.0019255734442975676, "loss": 3.5847, "step": 2155 }, { "epoch": 0.14998782566350133, "grad_norm": 0.84765625, "learning_rate": 0.001925488116917636, "loss": 3.4234, "step": 2156 }, { "epoch": 0.15005739330063655, "grad_norm": 0.80078125, "learning_rate": 0.0019254027425460827, "loss": 3.4137, "step": 2157 }, { "epoch": 0.15012696093777175, "grad_norm": 0.78515625, "learning_rate": 0.0019253173211872423, "loss": 3.4854, "step": 2158 }, { "epoch": 0.15019652857490695, "grad_norm": 0.65625, "learning_rate": 0.0019252318528454526, "loss": 3.1512, "step": 2159 }, { "epoch": 0.15026609621204215, "grad_norm": 0.72265625, "learning_rate": 0.0019251463375250526, "loss": 3.7507, "step": 2160 }, { "epoch": 0.15033566384917738, "grad_norm": 0.71875, "learning_rate": 0.001925060775230385, "loss": 3.488, "step": 2161 }, { "epoch": 0.15040523148631257, "grad_norm": 0.6953125, "learning_rate": 0.0019249751659657934, "loss": 3.3587, "step": 2162 }, { "epoch": 0.15047479912344777, "grad_norm": 0.8203125, "learning_rate": 0.0019248895097356256, "loss": 3.428, "step": 2163 }, { "epoch": 0.15054436676058297, "grad_norm": 0.76953125, "learning_rate": 0.00192480380654423, "loss": 3.1657, "step": 2164 }, { "epoch": 0.15061393439771817, "grad_norm": 0.80078125, "learning_rate": 0.0019247180563959586, "loss": 3.5087, "step": 2165 }, { "epoch": 0.1506835020348534, "grad_norm": 0.7734375, "learning_rate": 0.0019246322592951653, "loss": 3.457, "step": 2166 }, { "epoch": 0.1507530696719886, "grad_norm": 0.75390625, "learning_rate": 0.0019245464152462062, "loss": 3.4768, "step": 2167 }, { "epoch": 0.1508226373091238, "grad_norm": 0.90234375, "learning_rate": 0.0019244605242534402, "loss": 3.0841, "step": 2168 }, { "epoch": 0.150892204946259, "grad_norm": 0.76171875, "learning_rate": 0.0019243745863212283, "loss": 3.1466, "step": 2169 }, { "epoch": 0.15096177258339422, "grad_norm": 0.8828125, "learning_rate": 0.0019242886014539343, "loss": 2.7856, "step": 2170 }, { "epoch": 0.15103134022052941, "grad_norm": 0.78515625, "learning_rate": 0.0019242025696559239, "loss": 3.379, "step": 2171 }, { "epoch": 0.1511009078576646, "grad_norm": 0.68359375, "learning_rate": 0.0019241164909315652, "loss": 3.4753, "step": 2172 }, { "epoch": 0.1511704754947998, "grad_norm": 0.8359375, "learning_rate": 0.001924030365285229, "loss": 2.9882, "step": 2173 }, { "epoch": 0.15124004313193504, "grad_norm": 0.71875, "learning_rate": 0.0019239441927212885, "loss": 3.2017, "step": 2174 }, { "epoch": 0.15130961076907024, "grad_norm": 0.8125, "learning_rate": 0.0019238579732441185, "loss": 2.9532, "step": 2175 }, { "epoch": 0.15137917840620543, "grad_norm": 0.75, "learning_rate": 0.0019237717068580973, "loss": 3.4523, "step": 2176 }, { "epoch": 0.15144874604334063, "grad_norm": 0.6171875, "learning_rate": 0.0019236853935676052, "loss": 3.2862, "step": 2177 }, { "epoch": 0.15151831368047583, "grad_norm": 0.62890625, "learning_rate": 0.0019235990333770247, "loss": 3.4693, "step": 2178 }, { "epoch": 0.15158788131761106, "grad_norm": 0.8515625, "learning_rate": 0.0019235126262907402, "loss": 3.1924, "step": 2179 }, { "epoch": 0.15165744895474625, "grad_norm": 0.86328125, "learning_rate": 0.0019234261723131395, "loss": 3.4629, "step": 2180 }, { "epoch": 0.15172701659188145, "grad_norm": 1.0546875, "learning_rate": 0.0019233396714486122, "loss": 3.4659, "step": 2181 }, { "epoch": 0.15179658422901665, "grad_norm": 0.71484375, "learning_rate": 0.0019232531237015503, "loss": 3.5634, "step": 2182 }, { "epoch": 0.15186615186615188, "grad_norm": 0.8203125, "learning_rate": 0.0019231665290763485, "loss": 3.2782, "step": 2183 }, { "epoch": 0.15193571950328708, "grad_norm": 0.91015625, "learning_rate": 0.0019230798875774031, "loss": 2.896, "step": 2184 }, { "epoch": 0.15200528714042227, "grad_norm": 1.078125, "learning_rate": 0.001922993199209114, "loss": 3.9471, "step": 2185 }, { "epoch": 0.15207485477755747, "grad_norm": 0.765625, "learning_rate": 0.0019229064639758825, "loss": 3.2744, "step": 2186 }, { "epoch": 0.1521444224146927, "grad_norm": 0.90625, "learning_rate": 0.0019228196818821127, "loss": 3.12, "step": 2187 }, { "epoch": 0.1522139900518279, "grad_norm": 0.80078125, "learning_rate": 0.0019227328529322102, "loss": 3.2227, "step": 2188 }, { "epoch": 0.1522835576889631, "grad_norm": 0.734375, "learning_rate": 0.001922645977130585, "loss": 3.2465, "step": 2189 }, { "epoch": 0.1523531253260983, "grad_norm": 0.78125, "learning_rate": 0.0019225590544816472, "loss": 2.951, "step": 2190 }, { "epoch": 0.1524226929632335, "grad_norm": 0.796875, "learning_rate": 0.0019224720849898107, "loss": 3.3373, "step": 2191 }, { "epoch": 0.15249226060036872, "grad_norm": 0.72265625, "learning_rate": 0.0019223850686594913, "loss": 3.667, "step": 2192 }, { "epoch": 0.15256182823750392, "grad_norm": 1.0078125, "learning_rate": 0.0019222980054951072, "loss": 3.4597, "step": 2193 }, { "epoch": 0.15263139587463911, "grad_norm": 0.7421875, "learning_rate": 0.0019222108955010793, "loss": 3.0378, "step": 2194 }, { "epoch": 0.1527009635117743, "grad_norm": 0.828125, "learning_rate": 0.0019221237386818305, "loss": 3.2428, "step": 2195 }, { "epoch": 0.15277053114890954, "grad_norm": 0.72265625, "learning_rate": 0.0019220365350417858, "loss": 3.5437, "step": 2196 }, { "epoch": 0.15284009878604474, "grad_norm": 0.93359375, "learning_rate": 0.0019219492845853733, "loss": 3.1585, "step": 2197 }, { "epoch": 0.15290966642317994, "grad_norm": 0.80859375, "learning_rate": 0.0019218619873170232, "loss": 3.2917, "step": 2198 }, { "epoch": 0.15297923406031513, "grad_norm": 0.73046875, "learning_rate": 0.001921774643241168, "loss": 3.4092, "step": 2199 }, { "epoch": 0.15304880169745036, "grad_norm": 0.83203125, "learning_rate": 0.0019216872523622427, "loss": 3.2245, "step": 2200 }, { "epoch": 0.15311836933458556, "grad_norm": 0.88671875, "learning_rate": 0.0019215998146846838, "loss": 2.9539, "step": 2201 }, { "epoch": 0.15318793697172076, "grad_norm": 0.77734375, "learning_rate": 0.001921512330212932, "loss": 3.0401, "step": 2202 }, { "epoch": 0.15325750460885595, "grad_norm": 0.9296875, "learning_rate": 0.0019214247989514286, "loss": 3.1097, "step": 2203 }, { "epoch": 0.15332707224599115, "grad_norm": 0.91015625, "learning_rate": 0.0019213372209046183, "loss": 3.3438, "step": 2204 }, { "epoch": 0.15339663988312638, "grad_norm": 0.7109375, "learning_rate": 0.0019212495960769479, "loss": 3.4192, "step": 2205 }, { "epoch": 0.15346620752026158, "grad_norm": 0.7734375, "learning_rate": 0.001921161924472866, "loss": 3.0318, "step": 2206 }, { "epoch": 0.15353577515739678, "grad_norm": 0.74609375, "learning_rate": 0.001921074206096825, "loss": 3.2601, "step": 2207 }, { "epoch": 0.15360534279453197, "grad_norm": 0.85546875, "learning_rate": 0.0019209864409532784, "loss": 3.0052, "step": 2208 }, { "epoch": 0.1536749104316672, "grad_norm": 0.6953125, "learning_rate": 0.0019208986290466822, "loss": 3.2566, "step": 2209 }, { "epoch": 0.1537444780688024, "grad_norm": 0.81640625, "learning_rate": 0.0019208107703814954, "loss": 3.2904, "step": 2210 }, { "epoch": 0.1538140457059376, "grad_norm": 0.859375, "learning_rate": 0.001920722864962179, "loss": 3.2598, "step": 2211 }, { "epoch": 0.1538836133430728, "grad_norm": 0.88671875, "learning_rate": 0.0019206349127931963, "loss": 2.8552, "step": 2212 }, { "epoch": 0.15395318098020802, "grad_norm": 0.734375, "learning_rate": 0.001920546913879013, "loss": 3.4522, "step": 2213 }, { "epoch": 0.15402274861734322, "grad_norm": 0.86328125, "learning_rate": 0.0019204588682240973, "loss": 3.4462, "step": 2214 }, { "epoch": 0.15409231625447842, "grad_norm": 0.80859375, "learning_rate": 0.0019203707758329198, "loss": 3.5985, "step": 2215 }, { "epoch": 0.15416188389161362, "grad_norm": 0.7734375, "learning_rate": 0.0019202826367099534, "loss": 3.1834, "step": 2216 }, { "epoch": 0.15423145152874881, "grad_norm": 0.83984375, "learning_rate": 0.0019201944508596732, "loss": 3.5128, "step": 2217 }, { "epoch": 0.15430101916588404, "grad_norm": 0.87109375, "learning_rate": 0.0019201062182865566, "loss": 3.4152, "step": 2218 }, { "epoch": 0.15437058680301924, "grad_norm": 0.7421875, "learning_rate": 0.0019200179389950842, "loss": 3.2574, "step": 2219 }, { "epoch": 0.15444015444015444, "grad_norm": 0.62109375, "learning_rate": 0.001919929612989738, "loss": 3.3873, "step": 2220 }, { "epoch": 0.15450972207728964, "grad_norm": 0.81640625, "learning_rate": 0.001919841240275003, "loss": 3.3195, "step": 2221 }, { "epoch": 0.15457928971442486, "grad_norm": 0.7421875, "learning_rate": 0.0019197528208553661, "loss": 3.0571, "step": 2222 }, { "epoch": 0.15464885735156006, "grad_norm": 0.7109375, "learning_rate": 0.0019196643547353168, "loss": 3.526, "step": 2223 }, { "epoch": 0.15471842498869526, "grad_norm": 0.73828125, "learning_rate": 0.001919575841919347, "loss": 3.4711, "step": 2224 }, { "epoch": 0.15478799262583046, "grad_norm": 0.8359375, "learning_rate": 0.001919487282411951, "loss": 3.3619, "step": 2225 }, { "epoch": 0.15485756026296568, "grad_norm": 0.640625, "learning_rate": 0.0019193986762176252, "loss": 3.0812, "step": 2226 }, { "epoch": 0.15492712790010088, "grad_norm": 0.6953125, "learning_rate": 0.0019193100233408692, "loss": 3.1839, "step": 2227 }, { "epoch": 0.15499669553723608, "grad_norm": 0.76171875, "learning_rate": 0.0019192213237861834, "loss": 3.0777, "step": 2228 }, { "epoch": 0.15506626317437128, "grad_norm": 0.87890625, "learning_rate": 0.0019191325775580722, "loss": 2.9409, "step": 2229 }, { "epoch": 0.15513583081150648, "grad_norm": 0.90625, "learning_rate": 0.0019190437846610413, "loss": 3.1332, "step": 2230 }, { "epoch": 0.1552053984486417, "grad_norm": 1.0390625, "learning_rate": 0.0019189549450995996, "loss": 3.3082, "step": 2231 }, { "epoch": 0.1552749660857769, "grad_norm": 0.86328125, "learning_rate": 0.0019188660588782573, "loss": 3.2494, "step": 2232 }, { "epoch": 0.1553445337229121, "grad_norm": 1.078125, "learning_rate": 0.0019187771260015284, "loss": 3.215, "step": 2233 }, { "epoch": 0.1554141013600473, "grad_norm": 0.80859375, "learning_rate": 0.0019186881464739278, "loss": 3.6842, "step": 2234 }, { "epoch": 0.15548366899718252, "grad_norm": 0.72265625, "learning_rate": 0.0019185991202999738, "loss": 3.232, "step": 2235 }, { "epoch": 0.15555323663431772, "grad_norm": 0.91796875, "learning_rate": 0.0019185100474841863, "loss": 3.0475, "step": 2236 }, { "epoch": 0.15562280427145292, "grad_norm": 0.734375, "learning_rate": 0.0019184209280310883, "loss": 3.2183, "step": 2237 }, { "epoch": 0.15569237190858812, "grad_norm": 0.91796875, "learning_rate": 0.001918331761945205, "loss": 3.4003, "step": 2238 }, { "epoch": 0.15576193954572332, "grad_norm": 0.83203125, "learning_rate": 0.0019182425492310633, "loss": 3.3352, "step": 2239 }, { "epoch": 0.15583150718285854, "grad_norm": 0.9375, "learning_rate": 0.0019181532898931934, "loss": 3.2472, "step": 2240 }, { "epoch": 0.15590107481999374, "grad_norm": 0.9140625, "learning_rate": 0.001918063983936127, "loss": 3.2951, "step": 2241 }, { "epoch": 0.15597064245712894, "grad_norm": 1.1015625, "learning_rate": 0.0019179746313643992, "loss": 3.3719, "step": 2242 }, { "epoch": 0.15604021009426414, "grad_norm": 0.78125, "learning_rate": 0.0019178852321825464, "loss": 3.3772, "step": 2243 }, { "epoch": 0.15610977773139936, "grad_norm": 0.7578125, "learning_rate": 0.001917795786395108, "loss": 3.1354, "step": 2244 }, { "epoch": 0.15617934536853456, "grad_norm": 0.6953125, "learning_rate": 0.0019177062940066256, "loss": 3.3926, "step": 2245 }, { "epoch": 0.15624891300566976, "grad_norm": 0.87890625, "learning_rate": 0.0019176167550216433, "loss": 3.2926, "step": 2246 }, { "epoch": 0.15631848064280496, "grad_norm": 0.82421875, "learning_rate": 0.0019175271694447072, "loss": 3.3812, "step": 2247 }, { "epoch": 0.15638804827994018, "grad_norm": 0.9296875, "learning_rate": 0.0019174375372803662, "loss": 3.2116, "step": 2248 }, { "epoch": 0.15645761591707538, "grad_norm": 0.82421875, "learning_rate": 0.0019173478585331712, "loss": 2.9926, "step": 2249 }, { "epoch": 0.15652718355421058, "grad_norm": 0.9921875, "learning_rate": 0.0019172581332076756, "loss": 3.1369, "step": 2250 }, { "epoch": 0.15659675119134578, "grad_norm": 0.79296875, "learning_rate": 0.0019171683613084353, "loss": 3.4026, "step": 2251 }, { "epoch": 0.15666631882848098, "grad_norm": 0.73046875, "learning_rate": 0.0019170785428400086, "loss": 3.0608, "step": 2252 }, { "epoch": 0.1567358864656162, "grad_norm": 1.03125, "learning_rate": 0.001916988677806956, "loss": 3.22, "step": 2253 }, { "epoch": 0.1568054541027514, "grad_norm": 0.65625, "learning_rate": 0.0019168987662138402, "loss": 3.0833, "step": 2254 }, { "epoch": 0.1568750217398866, "grad_norm": 0.83984375, "learning_rate": 0.0019168088080652268, "loss": 2.9146, "step": 2255 }, { "epoch": 0.1569445893770218, "grad_norm": 0.86328125, "learning_rate": 0.0019167188033656828, "loss": 3.3521, "step": 2256 }, { "epoch": 0.15701415701415702, "grad_norm": 0.84375, "learning_rate": 0.0019166287521197786, "loss": 3.179, "step": 2257 }, { "epoch": 0.15708372465129222, "grad_norm": 0.6328125, "learning_rate": 0.0019165386543320867, "loss": 3.3823, "step": 2258 }, { "epoch": 0.15715329228842742, "grad_norm": 0.83984375, "learning_rate": 0.0019164485100071817, "loss": 3.5961, "step": 2259 }, { "epoch": 0.15722285992556262, "grad_norm": 0.828125, "learning_rate": 0.0019163583191496407, "loss": 3.0851, "step": 2260 }, { "epoch": 0.15729242756269785, "grad_norm": 0.77734375, "learning_rate": 0.0019162680817640429, "loss": 3.232, "step": 2261 }, { "epoch": 0.15736199519983304, "grad_norm": 0.796875, "learning_rate": 0.00191617779785497, "loss": 3.3365, "step": 2262 }, { "epoch": 0.15743156283696824, "grad_norm": 0.6640625, "learning_rate": 0.0019160874674270067, "loss": 3.7762, "step": 2263 }, { "epoch": 0.15750113047410344, "grad_norm": 0.80859375, "learning_rate": 0.0019159970904847393, "loss": 3.0307, "step": 2264 }, { "epoch": 0.15757069811123864, "grad_norm": 0.76171875, "learning_rate": 0.0019159066670327563, "loss": 3.2305, "step": 2265 }, { "epoch": 0.15764026574837386, "grad_norm": 0.7109375, "learning_rate": 0.0019158161970756493, "loss": 3.0286, "step": 2266 }, { "epoch": 0.15770983338550906, "grad_norm": 0.94921875, "learning_rate": 0.001915725680618012, "loss": 3.1607, "step": 2267 }, { "epoch": 0.15777940102264426, "grad_norm": 0.859375, "learning_rate": 0.0019156351176644404, "loss": 3.2419, "step": 2268 }, { "epoch": 0.15784896865977946, "grad_norm": 0.65625, "learning_rate": 0.0019155445082195324, "loss": 3.6515, "step": 2269 }, { "epoch": 0.15791853629691469, "grad_norm": 0.87109375, "learning_rate": 0.001915453852287889, "loss": 3.5183, "step": 2270 }, { "epoch": 0.15798810393404988, "grad_norm": 0.7421875, "learning_rate": 0.0019153631498741133, "loss": 3.5219, "step": 2271 }, { "epoch": 0.15805767157118508, "grad_norm": 0.78125, "learning_rate": 0.0019152724009828105, "loss": 3.2711, "step": 2272 }, { "epoch": 0.15812723920832028, "grad_norm": 0.73046875, "learning_rate": 0.0019151816056185887, "loss": 3.2095, "step": 2273 }, { "epoch": 0.1581968068454555, "grad_norm": 0.734375, "learning_rate": 0.0019150907637860576, "loss": 3.4705, "step": 2274 }, { "epoch": 0.1582663744825907, "grad_norm": 0.75, "learning_rate": 0.0019149998754898298, "loss": 3.5706, "step": 2275 }, { "epoch": 0.1583359421197259, "grad_norm": 0.92578125, "learning_rate": 0.0019149089407345206, "loss": 3.1592, "step": 2276 }, { "epoch": 0.1584055097568611, "grad_norm": 0.8203125, "learning_rate": 0.0019148179595247468, "loss": 3.2951, "step": 2277 }, { "epoch": 0.1584750773939963, "grad_norm": 0.9140625, "learning_rate": 0.0019147269318651279, "loss": 3.4067, "step": 2278 }, { "epoch": 0.15854464503113153, "grad_norm": 0.87109375, "learning_rate": 0.001914635857760286, "loss": 3.2653, "step": 2279 }, { "epoch": 0.15861421266826672, "grad_norm": 0.80859375, "learning_rate": 0.0019145447372148454, "loss": 3.1743, "step": 2280 }, { "epoch": 0.15868378030540192, "grad_norm": 0.671875, "learning_rate": 0.0019144535702334327, "loss": 3.3068, "step": 2281 }, { "epoch": 0.15875334794253712, "grad_norm": 0.83984375, "learning_rate": 0.001914362356820677, "loss": 3.6477, "step": 2282 }, { "epoch": 0.15882291557967235, "grad_norm": 0.87109375, "learning_rate": 0.0019142710969812092, "loss": 3.1305, "step": 2283 }, { "epoch": 0.15889248321680755, "grad_norm": 0.88671875, "learning_rate": 0.0019141797907196638, "loss": 2.9882, "step": 2284 }, { "epoch": 0.15896205085394274, "grad_norm": 0.6796875, "learning_rate": 0.0019140884380406762, "loss": 3.4047, "step": 2285 }, { "epoch": 0.15903161849107794, "grad_norm": 0.71484375, "learning_rate": 0.001913997038948885, "loss": 3.4563, "step": 2286 }, { "epoch": 0.15910118612821317, "grad_norm": 0.71484375, "learning_rate": 0.001913905593448931, "loss": 3.0906, "step": 2287 }, { "epoch": 0.15917075376534837, "grad_norm": 0.69140625, "learning_rate": 0.0019138141015454578, "loss": 3.483, "step": 2288 }, { "epoch": 0.15924032140248356, "grad_norm": 0.8125, "learning_rate": 0.00191372256324311, "loss": 3.0954, "step": 2289 }, { "epoch": 0.15930988903961876, "grad_norm": 0.92578125, "learning_rate": 0.0019136309785465363, "loss": 3.2783, "step": 2290 }, { "epoch": 0.15937945667675396, "grad_norm": 0.671875, "learning_rate": 0.0019135393474603863, "loss": 3.4251, "step": 2291 }, { "epoch": 0.1594490243138892, "grad_norm": 1.15625, "learning_rate": 0.0019134476699893131, "loss": 3.1533, "step": 2292 }, { "epoch": 0.15951859195102439, "grad_norm": 0.76171875, "learning_rate": 0.0019133559461379708, "loss": 3.1875, "step": 2293 }, { "epoch": 0.15958815958815958, "grad_norm": 0.7578125, "learning_rate": 0.0019132641759110175, "loss": 3.1194, "step": 2294 }, { "epoch": 0.15965772722529478, "grad_norm": 0.78515625, "learning_rate": 0.001913172359313113, "loss": 3.3485, "step": 2295 }, { "epoch": 0.15972729486243, "grad_norm": 0.91796875, "learning_rate": 0.0019130804963489183, "loss": 3.5899, "step": 2296 }, { "epoch": 0.1597968624995652, "grad_norm": 0.96484375, "learning_rate": 0.0019129885870230983, "loss": 2.8317, "step": 2297 }, { "epoch": 0.1598664301367004, "grad_norm": 0.6953125, "learning_rate": 0.0019128966313403197, "loss": 3.3308, "step": 2298 }, { "epoch": 0.1599359977738356, "grad_norm": 0.828125, "learning_rate": 0.0019128046293052515, "loss": 3.3317, "step": 2299 }, { "epoch": 0.16000556541097083, "grad_norm": 0.8515625, "learning_rate": 0.0019127125809225653, "loss": 3.7295, "step": 2300 }, { "epoch": 0.16007513304810603, "grad_norm": 0.84765625, "learning_rate": 0.0019126204861969344, "loss": 3.281, "step": 2301 }, { "epoch": 0.16014470068524123, "grad_norm": 0.8828125, "learning_rate": 0.0019125283451330354, "loss": 3.1455, "step": 2302 }, { "epoch": 0.16021426832237642, "grad_norm": 0.953125, "learning_rate": 0.0019124361577355462, "loss": 2.7583, "step": 2303 }, { "epoch": 0.16028383595951162, "grad_norm": 0.9609375, "learning_rate": 0.0019123439240091482, "loss": 3.3957, "step": 2304 }, { "epoch": 0.16035340359664685, "grad_norm": 0.83203125, "learning_rate": 0.0019122516439585243, "loss": 3.1651, "step": 2305 }, { "epoch": 0.16042297123378205, "grad_norm": 0.91796875, "learning_rate": 0.0019121593175883596, "loss": 3.4217, "step": 2306 }, { "epoch": 0.16049253887091725, "grad_norm": 0.77734375, "learning_rate": 0.0019120669449033429, "loss": 3.1833, "step": 2307 }, { "epoch": 0.16056210650805244, "grad_norm": 0.77734375, "learning_rate": 0.0019119745259081635, "loss": 3.5411, "step": 2308 }, { "epoch": 0.16063167414518767, "grad_norm": 0.7265625, "learning_rate": 0.0019118820606075146, "loss": 3.6817, "step": 2309 }, { "epoch": 0.16070124178232287, "grad_norm": 0.70703125, "learning_rate": 0.001911789549006091, "loss": 3.2903, "step": 2310 }, { "epoch": 0.16077080941945807, "grad_norm": 0.82421875, "learning_rate": 0.0019116969911085896, "loss": 2.9979, "step": 2311 }, { "epoch": 0.16084037705659326, "grad_norm": 0.921875, "learning_rate": 0.0019116043869197102, "loss": 3.6423, "step": 2312 }, { "epoch": 0.1609099446937285, "grad_norm": 0.78515625, "learning_rate": 0.0019115117364441553, "loss": 3.4296, "step": 2313 }, { "epoch": 0.1609795123308637, "grad_norm": 0.70703125, "learning_rate": 0.0019114190396866283, "loss": 3.6313, "step": 2314 }, { "epoch": 0.1610490799679989, "grad_norm": 0.8671875, "learning_rate": 0.0019113262966518369, "loss": 3.1692, "step": 2315 }, { "epoch": 0.16111864760513409, "grad_norm": 0.875, "learning_rate": 0.0019112335073444891, "loss": 3.4536, "step": 2316 }, { "epoch": 0.16118821524226928, "grad_norm": 1.0, "learning_rate": 0.0019111406717692966, "loss": 2.7128, "step": 2317 }, { "epoch": 0.1612577828794045, "grad_norm": 0.859375, "learning_rate": 0.0019110477899309739, "loss": 3.1569, "step": 2318 }, { "epoch": 0.1613273505165397, "grad_norm": 0.84765625, "learning_rate": 0.001910954861834236, "loss": 3.2842, "step": 2319 }, { "epoch": 0.1613969181536749, "grad_norm": 0.8046875, "learning_rate": 0.001910861887483802, "loss": 3.1674, "step": 2320 }, { "epoch": 0.1614664857908101, "grad_norm": 0.6953125, "learning_rate": 0.0019107688668843924, "loss": 3.3832, "step": 2321 }, { "epoch": 0.16153605342794533, "grad_norm": 0.97265625, "learning_rate": 0.00191067580004073, "loss": 2.9273, "step": 2322 }, { "epoch": 0.16160562106508053, "grad_norm": 0.81640625, "learning_rate": 0.001910582686957541, "loss": 3.4529, "step": 2323 }, { "epoch": 0.16167518870221573, "grad_norm": 1.0078125, "learning_rate": 0.001910489527639553, "loss": 3.2073, "step": 2324 }, { "epoch": 0.16174475633935093, "grad_norm": 0.75390625, "learning_rate": 0.0019103963220914958, "loss": 3.587, "step": 2325 }, { "epoch": 0.16181432397648615, "grad_norm": 0.8203125, "learning_rate": 0.001910303070318102, "loss": 2.9228, "step": 2326 }, { "epoch": 0.16188389161362135, "grad_norm": 1.0546875, "learning_rate": 0.0019102097723241065, "loss": 3.3639, "step": 2327 }, { "epoch": 0.16195345925075655, "grad_norm": 0.89453125, "learning_rate": 0.0019101164281142466, "loss": 3.4723, "step": 2328 }, { "epoch": 0.16202302688789175, "grad_norm": 0.87109375, "learning_rate": 0.0019100230376932618, "loss": 3.236, "step": 2329 }, { "epoch": 0.16209259452502695, "grad_norm": 1.0546875, "learning_rate": 0.001909929601065894, "loss": 3.2439, "step": 2330 }, { "epoch": 0.16216216216216217, "grad_norm": 0.8515625, "learning_rate": 0.0019098361182368878, "loss": 3.2361, "step": 2331 }, { "epoch": 0.16223172979929737, "grad_norm": 0.859375, "learning_rate": 0.0019097425892109889, "loss": 3.3246, "step": 2332 }, { "epoch": 0.16230129743643257, "grad_norm": 0.84375, "learning_rate": 0.0019096490139929472, "loss": 2.9247, "step": 2333 }, { "epoch": 0.16237086507356777, "grad_norm": 1.0234375, "learning_rate": 0.0019095553925875133, "loss": 3.1494, "step": 2334 }, { "epoch": 0.162440432710703, "grad_norm": 0.98046875, "learning_rate": 0.001909461724999441, "loss": 3.2959, "step": 2335 }, { "epoch": 0.1625100003478382, "grad_norm": 0.97265625, "learning_rate": 0.0019093680112334864, "loss": 3.0625, "step": 2336 }, { "epoch": 0.1625795679849734, "grad_norm": 1.0546875, "learning_rate": 0.001909274251294408, "loss": 3.179, "step": 2337 }, { "epoch": 0.1626491356221086, "grad_norm": 0.85546875, "learning_rate": 0.001909180445186966, "loss": 3.1252, "step": 2338 }, { "epoch": 0.1627187032592438, "grad_norm": 0.71484375, "learning_rate": 0.0019090865929159233, "loss": 3.6039, "step": 2339 }, { "epoch": 0.162788270896379, "grad_norm": 0.77734375, "learning_rate": 0.0019089926944860461, "loss": 3.4938, "step": 2340 }, { "epoch": 0.1628578385335142, "grad_norm": 0.59375, "learning_rate": 0.0019088987499021012, "loss": 3.6166, "step": 2341 }, { "epoch": 0.1629274061706494, "grad_norm": 1.0234375, "learning_rate": 0.001908804759168859, "loss": 3.4761, "step": 2342 }, { "epoch": 0.1629969738077846, "grad_norm": 0.90234375, "learning_rate": 0.001908710722291092, "loss": 3.2296, "step": 2343 }, { "epoch": 0.16306654144491983, "grad_norm": 0.81640625, "learning_rate": 0.0019086166392735745, "loss": 3.1014, "step": 2344 }, { "epoch": 0.16313610908205503, "grad_norm": 0.68359375, "learning_rate": 0.001908522510121084, "loss": 3.6301, "step": 2345 }, { "epoch": 0.16320567671919023, "grad_norm": 0.87109375, "learning_rate": 0.0019084283348383994, "loss": 2.8503, "step": 2346 }, { "epoch": 0.16327524435632543, "grad_norm": 0.796875, "learning_rate": 0.0019083341134303034, "loss": 3.1469, "step": 2347 }, { "epoch": 0.16334481199346065, "grad_norm": 0.890625, "learning_rate": 0.001908239845901579, "loss": 3.2092, "step": 2348 }, { "epoch": 0.16341437963059585, "grad_norm": 0.6953125, "learning_rate": 0.0019081455322570134, "loss": 3.4614, "step": 2349 }, { "epoch": 0.16348394726773105, "grad_norm": 0.7109375, "learning_rate": 0.001908051172501395, "loss": 2.7877, "step": 2350 }, { "epoch": 0.16355351490486625, "grad_norm": 0.703125, "learning_rate": 0.0019079567666395146, "loss": 3.1249, "step": 2351 }, { "epoch": 0.16362308254200147, "grad_norm": 0.80078125, "learning_rate": 0.0019078623146761662, "loss": 3.3158, "step": 2352 }, { "epoch": 0.16369265017913667, "grad_norm": 0.828125, "learning_rate": 0.0019077678166161457, "loss": 3.1093, "step": 2353 }, { "epoch": 0.16376221781627187, "grad_norm": 0.84765625, "learning_rate": 0.0019076732724642507, "loss": 3.0221, "step": 2354 }, { "epoch": 0.16383178545340707, "grad_norm": 0.90625, "learning_rate": 0.0019075786822252822, "loss": 3.3588, "step": 2355 }, { "epoch": 0.16390135309054227, "grad_norm": 0.5546875, "learning_rate": 0.0019074840459040426, "loss": 3.5697, "step": 2356 }, { "epoch": 0.1639709207276775, "grad_norm": 0.84765625, "learning_rate": 0.0019073893635053372, "loss": 3.0052, "step": 2357 }, { "epoch": 0.1640404883648127, "grad_norm": 1.1015625, "learning_rate": 0.0019072946350339732, "loss": 2.9758, "step": 2358 }, { "epoch": 0.1641100560019479, "grad_norm": 0.88671875, "learning_rate": 0.0019071998604947612, "loss": 2.8526, "step": 2359 }, { "epoch": 0.1641796236390831, "grad_norm": 0.76953125, "learning_rate": 0.0019071050398925128, "loss": 3.6262, "step": 2360 }, { "epoch": 0.16424919127621831, "grad_norm": 0.85546875, "learning_rate": 0.0019070101732320426, "loss": 3.246, "step": 2361 }, { "epoch": 0.1643187589133535, "grad_norm": 0.6484375, "learning_rate": 0.0019069152605181673, "loss": 3.2814, "step": 2362 }, { "epoch": 0.1643883265504887, "grad_norm": 0.8046875, "learning_rate": 0.0019068203017557064, "loss": 3.2555, "step": 2363 }, { "epoch": 0.1644578941876239, "grad_norm": 0.73046875, "learning_rate": 0.0019067252969494812, "loss": 2.951, "step": 2364 }, { "epoch": 0.16452746182475914, "grad_norm": 0.81640625, "learning_rate": 0.0019066302461043158, "loss": 3.3416, "step": 2365 }, { "epoch": 0.16459702946189433, "grad_norm": 0.7265625, "learning_rate": 0.0019065351492250362, "loss": 3.3133, "step": 2366 }, { "epoch": 0.16466659709902953, "grad_norm": 0.6796875, "learning_rate": 0.0019064400063164711, "loss": 3.5457, "step": 2367 }, { "epoch": 0.16473616473616473, "grad_norm": 0.8046875, "learning_rate": 0.001906344817383451, "loss": 3.1784, "step": 2368 }, { "epoch": 0.16480573237329993, "grad_norm": 0.8046875, "learning_rate": 0.0019062495824308098, "loss": 3.2771, "step": 2369 }, { "epoch": 0.16487530001043516, "grad_norm": 0.92578125, "learning_rate": 0.0019061543014633822, "loss": 2.851, "step": 2370 }, { "epoch": 0.16494486764757035, "grad_norm": 1.25, "learning_rate": 0.0019060589744860068, "loss": 3.1726, "step": 2371 }, { "epoch": 0.16501443528470555, "grad_norm": 0.77734375, "learning_rate": 0.0019059636015035235, "loss": 3.5959, "step": 2372 }, { "epoch": 0.16508400292184075, "grad_norm": 0.93359375, "learning_rate": 0.0019058681825207748, "loss": 3.3155, "step": 2373 }, { "epoch": 0.16515357055897598, "grad_norm": 0.890625, "learning_rate": 0.001905772717542606, "loss": 3.2452, "step": 2374 }, { "epoch": 0.16522313819611117, "grad_norm": 0.77734375, "learning_rate": 0.0019056772065738636, "loss": 3.0752, "step": 2375 }, { "epoch": 0.16529270583324637, "grad_norm": 0.70703125, "learning_rate": 0.0019055816496193981, "loss": 3.1546, "step": 2376 }, { "epoch": 0.16536227347038157, "grad_norm": 0.8984375, "learning_rate": 0.0019054860466840606, "loss": 3.1118, "step": 2377 }, { "epoch": 0.16543184110751677, "grad_norm": 0.859375, "learning_rate": 0.0019053903977727057, "loss": 3.4787, "step": 2378 }, { "epoch": 0.165501408744652, "grad_norm": 0.72265625, "learning_rate": 0.0019052947028901897, "loss": 3.3528, "step": 2379 }, { "epoch": 0.1655709763817872, "grad_norm": 0.83203125, "learning_rate": 0.0019051989620413718, "loss": 3.4341, "step": 2380 }, { "epoch": 0.1656405440189224, "grad_norm": 0.87109375, "learning_rate": 0.0019051031752311135, "loss": 3.4025, "step": 2381 }, { "epoch": 0.1657101116560576, "grad_norm": 0.85546875, "learning_rate": 0.0019050073424642779, "loss": 3.6356, "step": 2382 }, { "epoch": 0.16577967929319282, "grad_norm": 0.82421875, "learning_rate": 0.0019049114637457306, "loss": 3.1271, "step": 2383 }, { "epoch": 0.16584924693032801, "grad_norm": 0.75390625, "learning_rate": 0.0019048155390803405, "loss": 3.3048, "step": 2384 }, { "epoch": 0.1659188145674632, "grad_norm": 0.91796875, "learning_rate": 0.0019047195684729781, "loss": 3.3535, "step": 2385 }, { "epoch": 0.1659883822045984, "grad_norm": 0.83984375, "learning_rate": 0.001904623551928516, "loss": 3.0647, "step": 2386 }, { "epoch": 0.16605794984173364, "grad_norm": 0.85546875, "learning_rate": 0.0019045274894518296, "loss": 3.1102, "step": 2387 }, { "epoch": 0.16612751747886884, "grad_norm": 0.71484375, "learning_rate": 0.0019044313810477964, "loss": 3.2833, "step": 2388 }, { "epoch": 0.16619708511600403, "grad_norm": 0.6875, "learning_rate": 0.0019043352267212965, "loss": 3.1556, "step": 2389 }, { "epoch": 0.16626665275313923, "grad_norm": 0.6953125, "learning_rate": 0.0019042390264772118, "loss": 3.3388, "step": 2390 }, { "epoch": 0.16633622039027443, "grad_norm": 0.9609375, "learning_rate": 0.001904142780320427, "loss": 2.995, "step": 2391 }, { "epoch": 0.16640578802740966, "grad_norm": 0.63671875, "learning_rate": 0.0019040464882558292, "loss": 3.6169, "step": 2392 }, { "epoch": 0.16647535566454486, "grad_norm": 0.77734375, "learning_rate": 0.0019039501502883071, "loss": 2.7984, "step": 2393 }, { "epoch": 0.16654492330168005, "grad_norm": 0.80859375, "learning_rate": 0.001903853766422753, "loss": 3.3614, "step": 2394 }, { "epoch": 0.16661449093881525, "grad_norm": 0.90625, "learning_rate": 0.0019037573366640604, "loss": 3.0099, "step": 2395 }, { "epoch": 0.16668405857595048, "grad_norm": 0.65234375, "learning_rate": 0.0019036608610171256, "loss": 3.6974, "step": 2396 }, { "epoch": 0.16675362621308568, "grad_norm": 0.72265625, "learning_rate": 0.0019035643394868468, "loss": 3.5691, "step": 2397 }, { "epoch": 0.16682319385022087, "grad_norm": 0.91796875, "learning_rate": 0.001903467772078125, "loss": 3.4569, "step": 2398 }, { "epoch": 0.16689276148735607, "grad_norm": 0.80078125, "learning_rate": 0.0019033711587958639, "loss": 2.9937, "step": 2399 }, { "epoch": 0.1669623291244913, "grad_norm": 0.69140625, "learning_rate": 0.0019032744996449688, "loss": 3.3972, "step": 2400 }, { "epoch": 0.1670318967616265, "grad_norm": 0.76171875, "learning_rate": 0.001903177794630347, "loss": 3.1435, "step": 2401 }, { "epoch": 0.1671014643987617, "grad_norm": 0.8125, "learning_rate": 0.0019030810437569096, "loss": 3.33, "step": 2402 }, { "epoch": 0.1671710320358969, "grad_norm": 1.0546875, "learning_rate": 0.0019029842470295682, "loss": 3.0786, "step": 2403 }, { "epoch": 0.1672405996730321, "grad_norm": 0.8515625, "learning_rate": 0.0019028874044532383, "loss": 3.3754, "step": 2404 }, { "epoch": 0.16731016731016732, "grad_norm": 1.0390625, "learning_rate": 0.001902790516032837, "loss": 3.0439, "step": 2405 }, { "epoch": 0.16737973494730252, "grad_norm": 0.69921875, "learning_rate": 0.0019026935817732836, "loss": 3.9029, "step": 2406 }, { "epoch": 0.16744930258443771, "grad_norm": 1.0078125, "learning_rate": 0.0019025966016795, "loss": 3.1789, "step": 2407 }, { "epoch": 0.1675188702215729, "grad_norm": 1.03125, "learning_rate": 0.0019024995757564102, "loss": 3.7364, "step": 2408 }, { "epoch": 0.16758843785870814, "grad_norm": 0.73828125, "learning_rate": 0.0019024025040089412, "loss": 3.3665, "step": 2409 }, { "epoch": 0.16765800549584334, "grad_norm": 0.8203125, "learning_rate": 0.0019023053864420216, "loss": 3.232, "step": 2410 }, { "epoch": 0.16772757313297854, "grad_norm": 0.81640625, "learning_rate": 0.0019022082230605822, "loss": 3.2489, "step": 2411 }, { "epoch": 0.16779714077011373, "grad_norm": 0.84375, "learning_rate": 0.0019021110138695567, "loss": 3.1511, "step": 2412 }, { "epoch": 0.16786670840724896, "grad_norm": 0.78125, "learning_rate": 0.0019020137588738808, "loss": 3.2653, "step": 2413 }, { "epoch": 0.16793627604438416, "grad_norm": 0.76953125, "learning_rate": 0.001901916458078493, "loss": 3.3701, "step": 2414 }, { "epoch": 0.16800584368151936, "grad_norm": 0.74609375, "learning_rate": 0.0019018191114883332, "loss": 3.3645, "step": 2415 }, { "epoch": 0.16807541131865456, "grad_norm": 0.99609375, "learning_rate": 0.0019017217191083446, "loss": 2.9964, "step": 2416 }, { "epoch": 0.16814497895578975, "grad_norm": 0.98046875, "learning_rate": 0.001901624280943472, "loss": 3.108, "step": 2417 }, { "epoch": 0.16821454659292498, "grad_norm": 0.78515625, "learning_rate": 0.001901526796998663, "loss": 3.2885, "step": 2418 }, { "epoch": 0.16828411423006018, "grad_norm": 0.84765625, "learning_rate": 0.0019014292672788673, "loss": 3.3568, "step": 2419 }, { "epoch": 0.16835368186719538, "grad_norm": 0.85546875, "learning_rate": 0.0019013316917890369, "loss": 3.131, "step": 2420 }, { "epoch": 0.16842324950433057, "grad_norm": 0.72265625, "learning_rate": 0.0019012340705341262, "loss": 3.459, "step": 2421 }, { "epoch": 0.1684928171414658, "grad_norm": 0.859375, "learning_rate": 0.001901136403519092, "loss": 3.1376, "step": 2422 }, { "epoch": 0.168562384778601, "grad_norm": 0.890625, "learning_rate": 0.0019010386907488933, "loss": 2.9253, "step": 2423 }, { "epoch": 0.1686319524157362, "grad_norm": 0.74609375, "learning_rate": 0.0019009409322284915, "loss": 3.3112, "step": 2424 }, { "epoch": 0.1687015200528714, "grad_norm": 0.76171875, "learning_rate": 0.00190084312796285, "loss": 3.2567, "step": 2425 }, { "epoch": 0.16877108769000662, "grad_norm": 0.74609375, "learning_rate": 0.0019007452779569354, "loss": 3.2253, "step": 2426 }, { "epoch": 0.16884065532714182, "grad_norm": 0.671875, "learning_rate": 0.0019006473822157153, "loss": 3.2455, "step": 2427 }, { "epoch": 0.16891022296427702, "grad_norm": 0.59765625, "learning_rate": 0.001900549440744161, "loss": 3.3872, "step": 2428 }, { "epoch": 0.16897979060141222, "grad_norm": 0.80859375, "learning_rate": 0.001900451453547245, "loss": 3.3497, "step": 2429 }, { "epoch": 0.16904935823854741, "grad_norm": 0.92578125, "learning_rate": 0.001900353420629943, "loss": 3.0166, "step": 2430 }, { "epoch": 0.16911892587568264, "grad_norm": 0.72265625, "learning_rate": 0.0019002553419972324, "loss": 3.2642, "step": 2431 }, { "epoch": 0.16918849351281784, "grad_norm": 0.79296875, "learning_rate": 0.001900157217654093, "loss": 3.4829, "step": 2432 }, { "epoch": 0.16925806114995304, "grad_norm": 0.98046875, "learning_rate": 0.0019000590476055076, "loss": 3.4229, "step": 2433 }, { "epoch": 0.16932762878708824, "grad_norm": 0.9140625, "learning_rate": 0.00189996083185646, "loss": 3.0214, "step": 2434 }, { "epoch": 0.16939719642422346, "grad_norm": 0.85546875, "learning_rate": 0.0018998625704119377, "loss": 3.3381, "step": 2435 }, { "epoch": 0.16946676406135866, "grad_norm": 0.75390625, "learning_rate": 0.0018997642632769297, "loss": 3.724, "step": 2436 }, { "epoch": 0.16953633169849386, "grad_norm": 0.6953125, "learning_rate": 0.0018996659104564273, "loss": 3.2562, "step": 2437 }, { "epoch": 0.16960589933562906, "grad_norm": 0.890625, "learning_rate": 0.001899567511955425, "loss": 2.8605, "step": 2438 }, { "epoch": 0.16967546697276428, "grad_norm": 0.80859375, "learning_rate": 0.0018994690677789183, "loss": 3.4274, "step": 2439 }, { "epoch": 0.16974503460989948, "grad_norm": 0.87109375, "learning_rate": 0.0018993705779319062, "loss": 3.4065, "step": 2440 }, { "epoch": 0.16981460224703468, "grad_norm": 0.7265625, "learning_rate": 0.0018992720424193892, "loss": 3.3679, "step": 2441 }, { "epoch": 0.16988416988416988, "grad_norm": 1.046875, "learning_rate": 0.0018991734612463706, "loss": 3.1656, "step": 2442 }, { "epoch": 0.16995373752130508, "grad_norm": 0.9765625, "learning_rate": 0.001899074834417856, "loss": 2.9411, "step": 2443 }, { "epoch": 0.1700233051584403, "grad_norm": 0.8046875, "learning_rate": 0.0018989761619388527, "loss": 3.1267, "step": 2444 }, { "epoch": 0.1700928727955755, "grad_norm": 1.140625, "learning_rate": 0.0018988774438143713, "loss": 3.4626, "step": 2445 }, { "epoch": 0.1701624404327107, "grad_norm": 0.6875, "learning_rate": 0.0018987786800494235, "loss": 2.9381, "step": 2446 }, { "epoch": 0.1702320080698459, "grad_norm": 0.82421875, "learning_rate": 0.001898679870649025, "loss": 3.3637, "step": 2447 }, { "epoch": 0.17030157570698112, "grad_norm": 1.03125, "learning_rate": 0.0018985810156181922, "loss": 3.1177, "step": 2448 }, { "epoch": 0.17037114334411632, "grad_norm": 0.71484375, "learning_rate": 0.0018984821149619444, "loss": 3.3731, "step": 2449 }, { "epoch": 0.17044071098125152, "grad_norm": 0.69921875, "learning_rate": 0.001898383168685304, "loss": 3.191, "step": 2450 }, { "epoch": 0.17051027861838672, "grad_norm": 0.59375, "learning_rate": 0.001898284176793294, "loss": 3.696, "step": 2451 }, { "epoch": 0.17057984625552194, "grad_norm": 0.6796875, "learning_rate": 0.0018981851392909413, "loss": 3.4107, "step": 2452 }, { "epoch": 0.17064941389265714, "grad_norm": 0.97265625, "learning_rate": 0.0018980860561832746, "loss": 3.6169, "step": 2453 }, { "epoch": 0.17071898152979234, "grad_norm": 0.76171875, "learning_rate": 0.0018979869274753246, "loss": 3.0298, "step": 2454 }, { "epoch": 0.17078854916692754, "grad_norm": 0.765625, "learning_rate": 0.0018978877531721245, "loss": 3.2814, "step": 2455 }, { "epoch": 0.17085811680406274, "grad_norm": 0.9609375, "learning_rate": 0.00189778853327871, "loss": 2.9714, "step": 2456 }, { "epoch": 0.17092768444119796, "grad_norm": 1.140625, "learning_rate": 0.001897689267800119, "loss": 3.0664, "step": 2457 }, { "epoch": 0.17099725207833316, "grad_norm": 1.0, "learning_rate": 0.0018975899567413915, "loss": 2.9897, "step": 2458 }, { "epoch": 0.17106681971546836, "grad_norm": 1.0859375, "learning_rate": 0.0018974906001075706, "loss": 3.3436, "step": 2459 }, { "epoch": 0.17113638735260356, "grad_norm": 0.734375, "learning_rate": 0.0018973911979037004, "loss": 3.2716, "step": 2460 }, { "epoch": 0.17120595498973878, "grad_norm": 0.8671875, "learning_rate": 0.0018972917501348283, "loss": 3.4302, "step": 2461 }, { "epoch": 0.17127552262687398, "grad_norm": 0.765625, "learning_rate": 0.001897192256806004, "loss": 3.034, "step": 2462 }, { "epoch": 0.17134509026400918, "grad_norm": 0.8359375, "learning_rate": 0.001897092717922279, "loss": 3.1561, "step": 2463 }, { "epoch": 0.17141465790114438, "grad_norm": 0.93359375, "learning_rate": 0.0018969931334887073, "loss": 3.3995, "step": 2464 }, { "epoch": 0.1714842255382796, "grad_norm": 1.0234375, "learning_rate": 0.0018968935035103458, "loss": 3.1383, "step": 2465 }, { "epoch": 0.1715537931754148, "grad_norm": 0.86328125, "learning_rate": 0.0018967938279922528, "loss": 3.1904, "step": 2466 }, { "epoch": 0.17162336081255, "grad_norm": 0.8828125, "learning_rate": 0.0018966941069394894, "loss": 3.2457, "step": 2467 }, { "epoch": 0.1716929284496852, "grad_norm": 1.046875, "learning_rate": 0.001896594340357119, "loss": 3.4366, "step": 2468 }, { "epoch": 0.1717624960868204, "grad_norm": 0.75390625, "learning_rate": 0.001896494528250207, "loss": 3.1295, "step": 2469 }, { "epoch": 0.17183206372395562, "grad_norm": 0.7109375, "learning_rate": 0.0018963946706238213, "loss": 3.4161, "step": 2470 }, { "epoch": 0.17190163136109082, "grad_norm": 0.71484375, "learning_rate": 0.0018962947674830324, "loss": 3.6559, "step": 2471 }, { "epoch": 0.17197119899822602, "grad_norm": 0.75, "learning_rate": 0.0018961948188329133, "loss": 3.0453, "step": 2472 }, { "epoch": 0.17204076663536122, "grad_norm": 0.7890625, "learning_rate": 0.0018960948246785382, "loss": 3.4069, "step": 2473 }, { "epoch": 0.17211033427249645, "grad_norm": 0.8359375, "learning_rate": 0.0018959947850249845, "loss": 3.2656, "step": 2474 }, { "epoch": 0.17217990190963164, "grad_norm": 0.7265625, "learning_rate": 0.0018958946998773318, "loss": 3.3958, "step": 2475 }, { "epoch": 0.17224946954676684, "grad_norm": 0.69140625, "learning_rate": 0.0018957945692406621, "loss": 3.2935, "step": 2476 }, { "epoch": 0.17231903718390204, "grad_norm": 0.59375, "learning_rate": 0.0018956943931200591, "loss": 3.5589, "step": 2477 }, { "epoch": 0.17238860482103727, "grad_norm": 0.78125, "learning_rate": 0.0018955941715206096, "loss": 2.9786, "step": 2478 }, { "epoch": 0.17245817245817247, "grad_norm": 0.76171875, "learning_rate": 0.001895493904447402, "loss": 3.2443, "step": 2479 }, { "epoch": 0.17252774009530766, "grad_norm": 1.0, "learning_rate": 0.0018953935919055276, "loss": 3.1005, "step": 2480 }, { "epoch": 0.17259730773244286, "grad_norm": 0.90625, "learning_rate": 0.00189529323390008, "loss": 3.1678, "step": 2481 }, { "epoch": 0.17266687536957806, "grad_norm": 0.828125, "learning_rate": 0.0018951928304361543, "loss": 2.9976, "step": 2482 }, { "epoch": 0.17273644300671329, "grad_norm": 1.15625, "learning_rate": 0.001895092381518849, "loss": 3.3298, "step": 2483 }, { "epoch": 0.17280601064384848, "grad_norm": 1.0390625, "learning_rate": 0.0018949918871532638, "loss": 3.0909, "step": 2484 }, { "epoch": 0.17287557828098368, "grad_norm": 0.609375, "learning_rate": 0.001894891347344502, "loss": 3.2494, "step": 2485 }, { "epoch": 0.17294514591811888, "grad_norm": 1.0078125, "learning_rate": 0.001894790762097668, "loss": 3.1686, "step": 2486 }, { "epoch": 0.1730147135552541, "grad_norm": 0.65625, "learning_rate": 0.0018946901314178693, "loss": 3.6562, "step": 2487 }, { "epoch": 0.1730842811923893, "grad_norm": 0.82421875, "learning_rate": 0.0018945894553102152, "loss": 3.07, "step": 2488 }, { "epoch": 0.1731538488295245, "grad_norm": 0.8203125, "learning_rate": 0.0018944887337798177, "loss": 2.9494, "step": 2489 }, { "epoch": 0.1732234164666597, "grad_norm": 0.76953125, "learning_rate": 0.0018943879668317906, "loss": 3.03, "step": 2490 }, { "epoch": 0.17329298410379493, "grad_norm": 0.78515625, "learning_rate": 0.0018942871544712508, "loss": 3.5412, "step": 2491 }, { "epoch": 0.17336255174093013, "grad_norm": 0.8515625, "learning_rate": 0.001894186296703317, "loss": 3.2025, "step": 2492 }, { "epoch": 0.17343211937806532, "grad_norm": 0.91796875, "learning_rate": 0.00189408539353311, "loss": 2.7529, "step": 2493 }, { "epoch": 0.17350168701520052, "grad_norm": 0.78125, "learning_rate": 0.001893984444965753, "loss": 3.3874, "step": 2494 }, { "epoch": 0.17357125465233572, "grad_norm": 0.7265625, "learning_rate": 0.001893883451006372, "loss": 3.3101, "step": 2495 }, { "epoch": 0.17364082228947095, "grad_norm": 0.65234375, "learning_rate": 0.001893782411660095, "loss": 3.3576, "step": 2496 }, { "epoch": 0.17371038992660615, "grad_norm": 1.0078125, "learning_rate": 0.001893681326932052, "loss": 3.1649, "step": 2497 }, { "epoch": 0.17377995756374134, "grad_norm": 0.7578125, "learning_rate": 0.0018935801968273758, "loss": 2.8563, "step": 2498 }, { "epoch": 0.17384952520087654, "grad_norm": 0.671875, "learning_rate": 0.0018934790213512014, "loss": 3.3319, "step": 2499 }, { "epoch": 0.17391909283801177, "grad_norm": 0.765625, "learning_rate": 0.0018933778005086653, "loss": 3.5107, "step": 2500 }, { "epoch": 0.17398866047514697, "grad_norm": 0.78515625, "learning_rate": 0.0018932765343049076, "loss": 3.4601, "step": 2501 }, { "epoch": 0.17405822811228217, "grad_norm": 0.78125, "learning_rate": 0.0018931752227450702, "loss": 3.3166, "step": 2502 }, { "epoch": 0.17412779574941736, "grad_norm": 0.83984375, "learning_rate": 0.0018930738658342965, "loss": 3.2444, "step": 2503 }, { "epoch": 0.1741973633865526, "grad_norm": 0.61328125, "learning_rate": 0.0018929724635777336, "loss": 3.1286, "step": 2504 }, { "epoch": 0.1742669310236878, "grad_norm": 0.79296875, "learning_rate": 0.00189287101598053, "loss": 3.2518, "step": 2505 }, { "epoch": 0.17433649866082299, "grad_norm": 0.7265625, "learning_rate": 0.0018927695230478365, "loss": 3.1074, "step": 2506 }, { "epoch": 0.17440606629795818, "grad_norm": 0.81640625, "learning_rate": 0.0018926679847848064, "loss": 3.2652, "step": 2507 }, { "epoch": 0.17447563393509338, "grad_norm": 0.66796875, "learning_rate": 0.0018925664011965955, "loss": 3.2207, "step": 2508 }, { "epoch": 0.1745452015722286, "grad_norm": 0.9296875, "learning_rate": 0.0018924647722883617, "loss": 2.8783, "step": 2509 }, { "epoch": 0.1746147692093638, "grad_norm": 1.03125, "learning_rate": 0.0018923630980652649, "loss": 3.0064, "step": 2510 }, { "epoch": 0.174684336846499, "grad_norm": 0.9140625, "learning_rate": 0.001892261378532468, "loss": 3.5639, "step": 2511 }, { "epoch": 0.1747539044836342, "grad_norm": 0.8359375, "learning_rate": 0.0018921596136951355, "loss": 3.1965, "step": 2512 }, { "epoch": 0.17482347212076943, "grad_norm": 0.71875, "learning_rate": 0.0018920578035584348, "loss": 2.857, "step": 2513 }, { "epoch": 0.17489303975790463, "grad_norm": 0.86328125, "learning_rate": 0.001891955948127535, "loss": 2.7816, "step": 2514 }, { "epoch": 0.17496260739503983, "grad_norm": 0.8125, "learning_rate": 0.0018918540474076081, "loss": 3.2822, "step": 2515 }, { "epoch": 0.17503217503217502, "grad_norm": 0.94921875, "learning_rate": 0.0018917521014038278, "loss": 3.0852, "step": 2516 }, { "epoch": 0.17510174266931022, "grad_norm": 0.91796875, "learning_rate": 0.0018916501101213705, "loss": 3.4133, "step": 2517 }, { "epoch": 0.17517131030644545, "grad_norm": 0.921875, "learning_rate": 0.001891548073565415, "loss": 3.0457, "step": 2518 }, { "epoch": 0.17524087794358065, "grad_norm": 0.81640625, "learning_rate": 0.0018914459917411422, "loss": 2.9106, "step": 2519 }, { "epoch": 0.17531044558071585, "grad_norm": 0.89453125, "learning_rate": 0.0018913438646537349, "loss": 3.2036, "step": 2520 }, { "epoch": 0.17538001321785104, "grad_norm": 0.890625, "learning_rate": 0.0018912416923083791, "loss": 2.8513, "step": 2521 }, { "epoch": 0.17544958085498627, "grad_norm": 0.88671875, "learning_rate": 0.0018911394747102622, "loss": 3.2675, "step": 2522 }, { "epoch": 0.17551914849212147, "grad_norm": 0.77734375, "learning_rate": 0.0018910372118645742, "loss": 3.3492, "step": 2523 }, { "epoch": 0.17558871612925667, "grad_norm": 0.81640625, "learning_rate": 0.001890934903776508, "loss": 3.0853, "step": 2524 }, { "epoch": 0.17565828376639187, "grad_norm": 0.8046875, "learning_rate": 0.001890832550451258, "loss": 3.0819, "step": 2525 }, { "epoch": 0.1757278514035271, "grad_norm": 0.8125, "learning_rate": 0.0018907301518940214, "loss": 3.1941, "step": 2526 }, { "epoch": 0.1757974190406623, "grad_norm": 0.81640625, "learning_rate": 0.0018906277081099973, "loss": 3.2225, "step": 2527 }, { "epoch": 0.1758669866777975, "grad_norm": 0.86328125, "learning_rate": 0.0018905252191043869, "loss": 3.2995, "step": 2528 }, { "epoch": 0.17593655431493269, "grad_norm": 0.8203125, "learning_rate": 0.0018904226848823948, "loss": 2.9885, "step": 2529 }, { "epoch": 0.17600612195206788, "grad_norm": 1.4609375, "learning_rate": 0.0018903201054492266, "loss": 3.4344, "step": 2530 }, { "epoch": 0.1760756895892031, "grad_norm": 0.79296875, "learning_rate": 0.0018902174808100912, "loss": 3.1054, "step": 2531 }, { "epoch": 0.1761452572263383, "grad_norm": 0.68359375, "learning_rate": 0.0018901148109701988, "loss": 3.2626, "step": 2532 }, { "epoch": 0.1762148248634735, "grad_norm": 0.83203125, "learning_rate": 0.0018900120959347633, "loss": 3.1817, "step": 2533 }, { "epoch": 0.1762843925006087, "grad_norm": 0.97265625, "learning_rate": 0.0018899093357089992, "loss": 3.0, "step": 2534 }, { "epoch": 0.17635396013774393, "grad_norm": 0.71875, "learning_rate": 0.0018898065302981246, "loss": 3.1899, "step": 2535 }, { "epoch": 0.17642352777487913, "grad_norm": 0.84375, "learning_rate": 0.0018897036797073594, "loss": 2.9696, "step": 2536 }, { "epoch": 0.17649309541201433, "grad_norm": 0.81640625, "learning_rate": 0.0018896007839419259, "loss": 2.9419, "step": 2537 }, { "epoch": 0.17656266304914953, "grad_norm": 0.70703125, "learning_rate": 0.0018894978430070482, "loss": 3.2079, "step": 2538 }, { "epoch": 0.17663223068628475, "grad_norm": 0.953125, "learning_rate": 0.0018893948569079536, "loss": 3.162, "step": 2539 }, { "epoch": 0.17670179832341995, "grad_norm": 0.9296875, "learning_rate": 0.001889291825649871, "loss": 3.4439, "step": 2540 }, { "epoch": 0.17677136596055515, "grad_norm": 0.87109375, "learning_rate": 0.001889188749238032, "loss": 3.2802, "step": 2541 }, { "epoch": 0.17684093359769035, "grad_norm": 0.609375, "learning_rate": 0.00188908562767767, "loss": 3.414, "step": 2542 }, { "epoch": 0.17691050123482555, "grad_norm": 0.9453125, "learning_rate": 0.001888982460974021, "loss": 2.7324, "step": 2543 }, { "epoch": 0.17698006887196077, "grad_norm": 0.8046875, "learning_rate": 0.001888879249132324, "loss": 3.433, "step": 2544 }, { "epoch": 0.17704963650909597, "grad_norm": 0.79296875, "learning_rate": 0.0018887759921578184, "loss": 3.1548, "step": 2545 }, { "epoch": 0.17711920414623117, "grad_norm": 0.90234375, "learning_rate": 0.001888672690055748, "loss": 3.1417, "step": 2546 }, { "epoch": 0.17718877178336637, "grad_norm": 0.796875, "learning_rate": 0.0018885693428313576, "loss": 3.1294, "step": 2547 }, { "epoch": 0.1772583394205016, "grad_norm": 0.734375, "learning_rate": 0.0018884659504898947, "loss": 3.7066, "step": 2548 }, { "epoch": 0.1773279070576368, "grad_norm": 0.9296875, "learning_rate": 0.001888362513036609, "loss": 3.0523, "step": 2549 }, { "epoch": 0.177397474694772, "grad_norm": 0.90234375, "learning_rate": 0.0018882590304767526, "loss": 2.8411, "step": 2550 }, { "epoch": 0.1774670423319072, "grad_norm": 0.96875, "learning_rate": 0.0018881555028155796, "loss": 3.0223, "step": 2551 }, { "epoch": 0.1775366099690424, "grad_norm": 0.7421875, "learning_rate": 0.0018880519300583471, "loss": 3.3749, "step": 2552 }, { "epoch": 0.1776061776061776, "grad_norm": 0.8359375, "learning_rate": 0.0018879483122103136, "loss": 2.9869, "step": 2553 }, { "epoch": 0.1776757452433128, "grad_norm": 0.77734375, "learning_rate": 0.0018878446492767403, "loss": 3.4166, "step": 2554 }, { "epoch": 0.177745312880448, "grad_norm": 0.80859375, "learning_rate": 0.0018877409412628907, "loss": 3.3083, "step": 2555 }, { "epoch": 0.1778148805175832, "grad_norm": 0.83984375, "learning_rate": 0.0018876371881740308, "loss": 2.9763, "step": 2556 }, { "epoch": 0.17788444815471843, "grad_norm": 0.67578125, "learning_rate": 0.0018875333900154289, "loss": 3.5804, "step": 2557 }, { "epoch": 0.17795401579185363, "grad_norm": 0.9375, "learning_rate": 0.0018874295467923544, "loss": 2.897, "step": 2558 }, { "epoch": 0.17802358342898883, "grad_norm": 0.7265625, "learning_rate": 0.0018873256585100807, "loss": 3.2606, "step": 2559 }, { "epoch": 0.17809315106612403, "grad_norm": 0.8984375, "learning_rate": 0.0018872217251738824, "loss": 3.6405, "step": 2560 }, { "epoch": 0.17816271870325925, "grad_norm": 0.7734375, "learning_rate": 0.0018871177467890369, "loss": 3.1806, "step": 2561 }, { "epoch": 0.17823228634039445, "grad_norm": 0.75, "learning_rate": 0.0018870137233608236, "loss": 3.3116, "step": 2562 }, { "epoch": 0.17830185397752965, "grad_norm": 0.765625, "learning_rate": 0.0018869096548945242, "loss": 3.1411, "step": 2563 }, { "epoch": 0.17837142161466485, "grad_norm": 0.703125, "learning_rate": 0.0018868055413954231, "loss": 3.2375, "step": 2564 }, { "epoch": 0.17844098925180008, "grad_norm": 0.72265625, "learning_rate": 0.0018867013828688065, "loss": 3.5075, "step": 2565 }, { "epoch": 0.17851055688893527, "grad_norm": 0.7265625, "learning_rate": 0.0018865971793199626, "loss": 3.1211, "step": 2566 }, { "epoch": 0.17858012452607047, "grad_norm": 0.95703125, "learning_rate": 0.001886492930754183, "loss": 3.4119, "step": 2567 }, { "epoch": 0.17864969216320567, "grad_norm": 0.82421875, "learning_rate": 0.0018863886371767605, "loss": 3.0101, "step": 2568 }, { "epoch": 0.17871925980034087, "grad_norm": 0.828125, "learning_rate": 0.0018862842985929906, "loss": 3.0612, "step": 2569 }, { "epoch": 0.1787888274374761, "grad_norm": 0.9140625, "learning_rate": 0.0018861799150081719, "loss": 3.1752, "step": 2570 }, { "epoch": 0.1788583950746113, "grad_norm": 0.98046875, "learning_rate": 0.0018860754864276031, "loss": 3.3085, "step": 2571 }, { "epoch": 0.1789279627117465, "grad_norm": 0.9453125, "learning_rate": 0.0018859710128565875, "loss": 3.4382, "step": 2572 }, { "epoch": 0.1789975303488817, "grad_norm": 0.93359375, "learning_rate": 0.0018858664943004295, "loss": 3.635, "step": 2573 }, { "epoch": 0.17906709798601692, "grad_norm": 0.72265625, "learning_rate": 0.001885761930764436, "loss": 3.3867, "step": 2574 }, { "epoch": 0.1791366656231521, "grad_norm": 0.81640625, "learning_rate": 0.0018856573222539163, "loss": 3.0955, "step": 2575 }, { "epoch": 0.1792062332602873, "grad_norm": 1.0234375, "learning_rate": 0.0018855526687741816, "loss": 3.4234, "step": 2576 }, { "epoch": 0.1792758008974225, "grad_norm": 0.75, "learning_rate": 0.001885447970330546, "loss": 2.7572, "step": 2577 }, { "epoch": 0.17934536853455774, "grad_norm": 1.03125, "learning_rate": 0.0018853432269283254, "loss": 3.193, "step": 2578 }, { "epoch": 0.17941493617169293, "grad_norm": 0.8359375, "learning_rate": 0.0018852384385728382, "loss": 3.1121, "step": 2579 }, { "epoch": 0.17948450380882813, "grad_norm": 0.8671875, "learning_rate": 0.0018851336052694051, "loss": 3.2991, "step": 2580 }, { "epoch": 0.17955407144596333, "grad_norm": 0.7578125, "learning_rate": 0.0018850287270233488, "loss": 3.3188, "step": 2581 }, { "epoch": 0.17962363908309853, "grad_norm": 0.921875, "learning_rate": 0.001884923803839995, "loss": 3.432, "step": 2582 }, { "epoch": 0.17969320672023376, "grad_norm": 1.046875, "learning_rate": 0.0018848188357246706, "loss": 3.5215, "step": 2583 }, { "epoch": 0.17976277435736895, "grad_norm": 0.953125, "learning_rate": 0.0018847138226827053, "loss": 3.0869, "step": 2584 }, { "epoch": 0.17983234199450415, "grad_norm": 0.9296875, "learning_rate": 0.0018846087647194315, "loss": 3.4616, "step": 2585 }, { "epoch": 0.17990190963163935, "grad_norm": 0.70703125, "learning_rate": 0.0018845036618401834, "loss": 3.6504, "step": 2586 }, { "epoch": 0.17997147726877458, "grad_norm": 0.77734375, "learning_rate": 0.0018843985140502976, "loss": 2.9132, "step": 2587 }, { "epoch": 0.18004104490590978, "grad_norm": 0.7578125, "learning_rate": 0.001884293321355113, "loss": 3.5805, "step": 2588 }, { "epoch": 0.18011061254304497, "grad_norm": 1.0859375, "learning_rate": 0.0018841880837599705, "loss": 3.0303, "step": 2589 }, { "epoch": 0.18018018018018017, "grad_norm": 0.9296875, "learning_rate": 0.001884082801270214, "loss": 3.5147, "step": 2590 }, { "epoch": 0.1802497478173154, "grad_norm": 0.88671875, "learning_rate": 0.0018839774738911889, "loss": 3.0765, "step": 2591 }, { "epoch": 0.1803193154544506, "grad_norm": 0.890625, "learning_rate": 0.0018838721016282433, "loss": 3.494, "step": 2592 }, { "epoch": 0.1803888830915858, "grad_norm": 0.7578125, "learning_rate": 0.0018837666844867273, "loss": 3.1105, "step": 2593 }, { "epoch": 0.180458450728721, "grad_norm": 0.7734375, "learning_rate": 0.0018836612224719938, "loss": 3.0279, "step": 2594 }, { "epoch": 0.1805280183658562, "grad_norm": 0.99609375, "learning_rate": 0.001883555715589397, "loss": 3.5961, "step": 2595 }, { "epoch": 0.18059758600299142, "grad_norm": 0.80078125, "learning_rate": 0.0018834501638442947, "loss": 3.3071, "step": 2596 }, { "epoch": 0.18066715364012662, "grad_norm": 0.734375, "learning_rate": 0.001883344567242046, "loss": 3.5241, "step": 2597 }, { "epoch": 0.1807367212772618, "grad_norm": 1.015625, "learning_rate": 0.0018832389257880124, "loss": 3.1847, "step": 2598 }, { "epoch": 0.180806288914397, "grad_norm": 0.82421875, "learning_rate": 0.0018831332394875582, "loss": 3.5169, "step": 2599 }, { "epoch": 0.18087585655153224, "grad_norm": 0.96875, "learning_rate": 0.0018830275083460493, "loss": 3.0003, "step": 2600 }, { "epoch": 0.18094542418866744, "grad_norm": 1.125, "learning_rate": 0.0018829217323688544, "loss": 3.1013, "step": 2601 }, { "epoch": 0.18101499182580263, "grad_norm": 0.9296875, "learning_rate": 0.0018828159115613441, "loss": 3.0551, "step": 2602 }, { "epoch": 0.18108455946293783, "grad_norm": 0.77734375, "learning_rate": 0.0018827100459288914, "loss": 3.3201, "step": 2603 }, { "epoch": 0.18115412710007306, "grad_norm": 0.91796875, "learning_rate": 0.001882604135476872, "loss": 3.3826, "step": 2604 }, { "epoch": 0.18122369473720826, "grad_norm": 0.8671875, "learning_rate": 0.0018824981802106633, "loss": 3.7203, "step": 2605 }, { "epoch": 0.18129326237434346, "grad_norm": 0.73046875, "learning_rate": 0.001882392180135645, "loss": 3.1752, "step": 2606 }, { "epoch": 0.18136283001147865, "grad_norm": 0.8203125, "learning_rate": 0.0018822861352571995, "loss": 3.4479, "step": 2607 }, { "epoch": 0.18143239764861385, "grad_norm": 0.7421875, "learning_rate": 0.0018821800455807109, "loss": 3.347, "step": 2608 }, { "epoch": 0.18150196528574908, "grad_norm": 0.81640625, "learning_rate": 0.001882073911111566, "loss": 3.342, "step": 2609 }, { "epoch": 0.18157153292288428, "grad_norm": 0.81640625, "learning_rate": 0.0018819677318551542, "loss": 3.4264, "step": 2610 }, { "epoch": 0.18164110056001948, "grad_norm": 0.83984375, "learning_rate": 0.0018818615078168661, "loss": 3.2509, "step": 2611 }, { "epoch": 0.18171066819715467, "grad_norm": 0.69140625, "learning_rate": 0.0018817552390020958, "loss": 3.0668, "step": 2612 }, { "epoch": 0.1817802358342899, "grad_norm": 0.90234375, "learning_rate": 0.0018816489254162387, "loss": 3.3121, "step": 2613 }, { "epoch": 0.1818498034714251, "grad_norm": 0.83984375, "learning_rate": 0.001881542567064693, "loss": 3.1481, "step": 2614 }, { "epoch": 0.1819193711085603, "grad_norm": 0.80078125, "learning_rate": 0.0018814361639528593, "loss": 3.3161, "step": 2615 }, { "epoch": 0.1819889387456955, "grad_norm": 0.65625, "learning_rate": 0.0018813297160861398, "loss": 2.8992, "step": 2616 }, { "epoch": 0.18205850638283072, "grad_norm": 0.98046875, "learning_rate": 0.0018812232234699394, "loss": 3.1757, "step": 2617 }, { "epoch": 0.18212807401996592, "grad_norm": 1.0390625, "learning_rate": 0.0018811166861096656, "loss": 2.7238, "step": 2618 }, { "epoch": 0.18219764165710112, "grad_norm": 1.0, "learning_rate": 0.0018810101040107276, "loss": 3.3991, "step": 2619 }, { "epoch": 0.18226720929423632, "grad_norm": 0.90234375, "learning_rate": 0.001880903477178537, "loss": 2.9947, "step": 2620 }, { "epoch": 0.1823367769313715, "grad_norm": 0.75390625, "learning_rate": 0.001880796805618508, "loss": 3.1391, "step": 2621 }, { "epoch": 0.18240634456850674, "grad_norm": 0.70703125, "learning_rate": 0.0018806900893360567, "loss": 3.1868, "step": 2622 }, { "epoch": 0.18247591220564194, "grad_norm": 1.2265625, "learning_rate": 0.001880583328336602, "loss": 3.2925, "step": 2623 }, { "epoch": 0.18254547984277714, "grad_norm": 0.75, "learning_rate": 0.001880476522625564, "loss": 3.3639, "step": 2624 }, { "epoch": 0.18261504747991233, "grad_norm": 0.8203125, "learning_rate": 0.0018803696722083662, "loss": 2.7638, "step": 2625 }, { "epoch": 0.18268461511704756, "grad_norm": 0.72265625, "learning_rate": 0.0018802627770904338, "loss": 3.1189, "step": 2626 }, { "epoch": 0.18275418275418276, "grad_norm": 0.63671875, "learning_rate": 0.0018801558372771945, "loss": 3.4456, "step": 2627 }, { "epoch": 0.18282375039131796, "grad_norm": 0.734375, "learning_rate": 0.0018800488527740782, "loss": 3.0031, "step": 2628 }, { "epoch": 0.18289331802845316, "grad_norm": 0.8046875, "learning_rate": 0.001879941823586517, "loss": 3.2647, "step": 2629 }, { "epoch": 0.18296288566558838, "grad_norm": 0.71484375, "learning_rate": 0.001879834749719945, "loss": 3.4376, "step": 2630 }, { "epoch": 0.18303245330272358, "grad_norm": 0.6796875, "learning_rate": 0.001879727631179799, "loss": 2.9715, "step": 2631 }, { "epoch": 0.18310202093985878, "grad_norm": 0.87890625, "learning_rate": 0.0018796204679715183, "loss": 3.1558, "step": 2632 }, { "epoch": 0.18317158857699398, "grad_norm": 0.8515625, "learning_rate": 0.0018795132601005435, "loss": 3.399, "step": 2633 }, { "epoch": 0.18324115621412917, "grad_norm": 0.71875, "learning_rate": 0.0018794060075723188, "loss": 3.0927, "step": 2634 }, { "epoch": 0.1833107238512644, "grad_norm": 0.81640625, "learning_rate": 0.0018792987103922894, "loss": 3.4046, "step": 2635 }, { "epoch": 0.1833802914883996, "grad_norm": 0.72265625, "learning_rate": 0.0018791913685659036, "loss": 3.0692, "step": 2636 }, { "epoch": 0.1834498591255348, "grad_norm": 0.8984375, "learning_rate": 0.0018790839820986113, "loss": 3.1099, "step": 2637 }, { "epoch": 0.18351942676267, "grad_norm": 0.83203125, "learning_rate": 0.0018789765509958656, "loss": 2.9657, "step": 2638 }, { "epoch": 0.18358899439980522, "grad_norm": 0.734375, "learning_rate": 0.001878869075263121, "loss": 3.4141, "step": 2639 }, { "epoch": 0.18365856203694042, "grad_norm": 0.8515625, "learning_rate": 0.0018787615549058347, "loss": 2.8735, "step": 2640 }, { "epoch": 0.18372812967407562, "grad_norm": 0.91796875, "learning_rate": 0.0018786539899294655, "loss": 3.509, "step": 2641 }, { "epoch": 0.18379769731121082, "grad_norm": 0.87890625, "learning_rate": 0.0018785463803394757, "loss": 3.4174, "step": 2642 }, { "epoch": 0.18386726494834604, "grad_norm": 1.015625, "learning_rate": 0.001878438726141329, "loss": 2.6337, "step": 2643 }, { "epoch": 0.18393683258548124, "grad_norm": 0.74609375, "learning_rate": 0.001878331027340491, "loss": 3.4049, "step": 2644 }, { "epoch": 0.18400640022261644, "grad_norm": 0.91796875, "learning_rate": 0.0018782232839424308, "loss": 3.0697, "step": 2645 }, { "epoch": 0.18407596785975164, "grad_norm": 0.93359375, "learning_rate": 0.001878115495952619, "loss": 3.241, "step": 2646 }, { "epoch": 0.18414553549688684, "grad_norm": 0.70703125, "learning_rate": 0.001878007663376528, "loss": 3.2673, "step": 2647 }, { "epoch": 0.18421510313402206, "grad_norm": 0.84375, "learning_rate": 0.0018778997862196338, "loss": 3.3684, "step": 2648 }, { "epoch": 0.18428467077115726, "grad_norm": 0.8046875, "learning_rate": 0.001877791864487413, "loss": 2.8385, "step": 2649 }, { "epoch": 0.18435423840829246, "grad_norm": 0.91015625, "learning_rate": 0.001877683898185346, "loss": 3.0731, "step": 2650 }, { "epoch": 0.18442380604542766, "grad_norm": 0.87109375, "learning_rate": 0.0018775758873189143, "loss": 3.0177, "step": 2651 }, { "epoch": 0.18449337368256288, "grad_norm": 0.875, "learning_rate": 0.0018774678318936025, "loss": 3.1539, "step": 2652 }, { "epoch": 0.18456294131969808, "grad_norm": 0.7421875, "learning_rate": 0.0018773597319148968, "loss": 2.9013, "step": 2653 }, { "epoch": 0.18463250895683328, "grad_norm": 0.9765625, "learning_rate": 0.0018772515873882864, "loss": 3.1443, "step": 2654 }, { "epoch": 0.18470207659396848, "grad_norm": 0.84765625, "learning_rate": 0.0018771433983192619, "loss": 3.1884, "step": 2655 }, { "epoch": 0.1847716442311037, "grad_norm": 0.890625, "learning_rate": 0.0018770351647133165, "loss": 3.4777, "step": 2656 }, { "epoch": 0.1848412118682389, "grad_norm": 1.078125, "learning_rate": 0.0018769268865759467, "loss": 3.2733, "step": 2657 }, { "epoch": 0.1849107795053741, "grad_norm": 1.0703125, "learning_rate": 0.001876818563912649, "loss": 3.135, "step": 2658 }, { "epoch": 0.1849803471425093, "grad_norm": 0.86328125, "learning_rate": 0.0018767101967289244, "loss": 3.3096, "step": 2659 }, { "epoch": 0.1850499147796445, "grad_norm": 0.95703125, "learning_rate": 0.0018766017850302748, "loss": 3.4159, "step": 2660 }, { "epoch": 0.18511948241677972, "grad_norm": 0.71875, "learning_rate": 0.001876493328822205, "loss": 3.3289, "step": 2661 }, { "epoch": 0.18518905005391492, "grad_norm": 0.828125, "learning_rate": 0.0018763848281102221, "loss": 3.3328, "step": 2662 }, { "epoch": 0.18525861769105012, "grad_norm": 0.8828125, "learning_rate": 0.0018762762828998345, "loss": 3.0924, "step": 2663 }, { "epoch": 0.18532818532818532, "grad_norm": 0.8671875, "learning_rate": 0.0018761676931965542, "loss": 2.9847, "step": 2664 }, { "epoch": 0.18539775296532054, "grad_norm": 0.640625, "learning_rate": 0.0018760590590058946, "loss": 3.0087, "step": 2665 }, { "epoch": 0.18546732060245574, "grad_norm": 0.96484375, "learning_rate": 0.0018759503803333717, "loss": 2.853, "step": 2666 }, { "epoch": 0.18553688823959094, "grad_norm": 0.9609375, "learning_rate": 0.0018758416571845037, "loss": 2.8915, "step": 2667 }, { "epoch": 0.18560645587672614, "grad_norm": 0.73828125, "learning_rate": 0.0018757328895648109, "loss": 3.0704, "step": 2668 }, { "epoch": 0.18567602351386134, "grad_norm": 1.15625, "learning_rate": 0.0018756240774798157, "loss": 3.1353, "step": 2669 }, { "epoch": 0.18574559115099656, "grad_norm": 0.84765625, "learning_rate": 0.0018755152209350436, "loss": 3.1826, "step": 2670 }, { "epoch": 0.18581515878813176, "grad_norm": 1.078125, "learning_rate": 0.0018754063199360217, "loss": 3.1387, "step": 2671 }, { "epoch": 0.18588472642526696, "grad_norm": 1.0234375, "learning_rate": 0.0018752973744882789, "loss": 2.8912, "step": 2672 }, { "epoch": 0.18595429406240216, "grad_norm": 0.80078125, "learning_rate": 0.001875188384597347, "loss": 3.3143, "step": 2673 }, { "epoch": 0.18602386169953739, "grad_norm": 1.0078125, "learning_rate": 0.0018750793502687606, "loss": 3.4538, "step": 2674 }, { "epoch": 0.18609342933667258, "grad_norm": 0.74609375, "learning_rate": 0.0018749702715080557, "loss": 3.6592, "step": 2675 }, { "epoch": 0.18616299697380778, "grad_norm": 0.890625, "learning_rate": 0.0018748611483207704, "loss": 3.3338, "step": 2676 }, { "epoch": 0.18623256461094298, "grad_norm": 0.76171875, "learning_rate": 0.0018747519807124453, "loss": 2.9657, "step": 2677 }, { "epoch": 0.1863021322480782, "grad_norm": 0.84375, "learning_rate": 0.001874642768688624, "loss": 3.3772, "step": 2678 }, { "epoch": 0.1863716998852134, "grad_norm": 1.109375, "learning_rate": 0.0018745335122548514, "loss": 2.5748, "step": 2679 }, { "epoch": 0.1864412675223486, "grad_norm": 0.84765625, "learning_rate": 0.0018744242114166752, "loss": 3.4695, "step": 2680 }, { "epoch": 0.1865108351594838, "grad_norm": 1.015625, "learning_rate": 0.0018743148661796447, "loss": 3.6006, "step": 2681 }, { "epoch": 0.186580402796619, "grad_norm": 1.0390625, "learning_rate": 0.0018742054765493125, "loss": 2.9741, "step": 2682 }, { "epoch": 0.18664997043375423, "grad_norm": 0.7578125, "learning_rate": 0.001874096042531232, "loss": 3.1929, "step": 2683 }, { "epoch": 0.18671953807088942, "grad_norm": 0.671875, "learning_rate": 0.0018739865641309605, "loss": 3.0261, "step": 2684 }, { "epoch": 0.18678910570802462, "grad_norm": 0.73046875, "learning_rate": 0.0018738770413540566, "loss": 3.3793, "step": 2685 }, { "epoch": 0.18685867334515982, "grad_norm": 1.0078125, "learning_rate": 0.001873767474206081, "loss": 2.8978, "step": 2686 }, { "epoch": 0.18692824098229505, "grad_norm": 0.76953125, "learning_rate": 0.0018736578626925976, "loss": 3.3339, "step": 2687 }, { "epoch": 0.18699780861943024, "grad_norm": 0.94921875, "learning_rate": 0.0018735482068191712, "loss": 2.9924, "step": 2688 }, { "epoch": 0.18706737625656544, "grad_norm": 0.75, "learning_rate": 0.0018734385065913698, "loss": 3.4697, "step": 2689 }, { "epoch": 0.18713694389370064, "grad_norm": 0.8046875, "learning_rate": 0.0018733287620147634, "loss": 3.0865, "step": 2690 }, { "epoch": 0.18720651153083587, "grad_norm": 0.83984375, "learning_rate": 0.0018732189730949246, "loss": 3.5094, "step": 2691 }, { "epoch": 0.18727607916797107, "grad_norm": 0.71875, "learning_rate": 0.0018731091398374276, "loss": 3.3612, "step": 2692 }, { "epoch": 0.18734564680510626, "grad_norm": 0.84375, "learning_rate": 0.0018729992622478493, "loss": 3.5381, "step": 2693 }, { "epoch": 0.18741521444224146, "grad_norm": 0.73828125, "learning_rate": 0.0018728893403317686, "loss": 3.3153, "step": 2694 }, { "epoch": 0.18748478207937666, "grad_norm": 0.80078125, "learning_rate": 0.0018727793740947669, "loss": 3.1899, "step": 2695 }, { "epoch": 0.1875543497165119, "grad_norm": 0.796875, "learning_rate": 0.001872669363542428, "loss": 3.0942, "step": 2696 }, { "epoch": 0.18762391735364709, "grad_norm": 0.8828125, "learning_rate": 0.0018725593086803371, "loss": 3.0085, "step": 2697 }, { "epoch": 0.18769348499078228, "grad_norm": 0.9296875, "learning_rate": 0.0018724492095140825, "loss": 3.2013, "step": 2698 }, { "epoch": 0.18776305262791748, "grad_norm": 0.98828125, "learning_rate": 0.0018723390660492548, "loss": 2.9254, "step": 2699 }, { "epoch": 0.1878326202650527, "grad_norm": 1.9921875, "learning_rate": 0.001872228878291446, "loss": 3.2803, "step": 2700 }, { "epoch": 0.1879021879021879, "grad_norm": 0.96484375, "learning_rate": 0.0018721186462462513, "loss": 2.8896, "step": 2701 }, { "epoch": 0.1879717555393231, "grad_norm": 0.80859375, "learning_rate": 0.0018720083699192674, "loss": 3.1164, "step": 2702 }, { "epoch": 0.1880413231764583, "grad_norm": 0.71875, "learning_rate": 0.0018718980493160938, "loss": 3.4225, "step": 2703 }, { "epoch": 0.18811089081359353, "grad_norm": 0.8515625, "learning_rate": 0.0018717876844423318, "loss": 3.3561, "step": 2704 }, { "epoch": 0.18818045845072873, "grad_norm": 0.99609375, "learning_rate": 0.0018716772753035852, "loss": 2.6811, "step": 2705 }, { "epoch": 0.18825002608786393, "grad_norm": 0.6953125, "learning_rate": 0.0018715668219054606, "loss": 3.4476, "step": 2706 }, { "epoch": 0.18831959372499912, "grad_norm": 1.0859375, "learning_rate": 0.0018714563242535657, "loss": 3.7711, "step": 2707 }, { "epoch": 0.18838916136213432, "grad_norm": 0.74609375, "learning_rate": 0.0018713457823535107, "loss": 3.125, "step": 2708 }, { "epoch": 0.18845872899926955, "grad_norm": 0.8828125, "learning_rate": 0.001871235196210909, "loss": 3.4208, "step": 2709 }, { "epoch": 0.18852829663640475, "grad_norm": 0.8828125, "learning_rate": 0.0018711245658313755, "loss": 2.6746, "step": 2710 }, { "epoch": 0.18859786427353994, "grad_norm": 0.96484375, "learning_rate": 0.0018710138912205274, "loss": 3.2908, "step": 2711 }, { "epoch": 0.18866743191067514, "grad_norm": 0.89453125, "learning_rate": 0.0018709031723839842, "loss": 3.5754, "step": 2712 }, { "epoch": 0.18873699954781037, "grad_norm": 0.92578125, "learning_rate": 0.0018707924093273674, "loss": 3.3013, "step": 2713 }, { "epoch": 0.18880656718494557, "grad_norm": 0.9453125, "learning_rate": 0.0018706816020563012, "loss": 3.2028, "step": 2714 }, { "epoch": 0.18887613482208077, "grad_norm": 0.9296875, "learning_rate": 0.0018705707505764116, "loss": 3.3403, "step": 2715 }, { "epoch": 0.18894570245921596, "grad_norm": 0.72265625, "learning_rate": 0.0018704598548933277, "loss": 3.3962, "step": 2716 }, { "epoch": 0.1890152700963512, "grad_norm": 0.91796875, "learning_rate": 0.0018703489150126793, "loss": 2.7822, "step": 2717 }, { "epoch": 0.1890848377334864, "grad_norm": 1.015625, "learning_rate": 0.0018702379309401005, "loss": 3.3276, "step": 2718 }, { "epoch": 0.1891544053706216, "grad_norm": 1.109375, "learning_rate": 0.0018701269026812253, "loss": 3.452, "step": 2719 }, { "epoch": 0.18922397300775678, "grad_norm": 1.0, "learning_rate": 0.0018700158302416923, "loss": 3.1536, "step": 2720 }, { "epoch": 0.18929354064489198, "grad_norm": 0.82421875, "learning_rate": 0.0018699047136271402, "loss": 2.8866, "step": 2721 }, { "epoch": 0.1893631082820272, "grad_norm": 0.97265625, "learning_rate": 0.0018697935528432118, "loss": 3.1771, "step": 2722 }, { "epoch": 0.1894326759191624, "grad_norm": 0.79296875, "learning_rate": 0.0018696823478955502, "loss": 3.6915, "step": 2723 }, { "epoch": 0.1895022435562976, "grad_norm": 1.078125, "learning_rate": 0.0018695710987898032, "loss": 3.3638, "step": 2724 }, { "epoch": 0.1895718111934328, "grad_norm": 0.95703125, "learning_rate": 0.0018694598055316184, "loss": 3.0669, "step": 2725 }, { "epoch": 0.18964137883056803, "grad_norm": 1.0703125, "learning_rate": 0.0018693484681266473, "loss": 3.0863, "step": 2726 }, { "epoch": 0.18971094646770323, "grad_norm": 0.703125, "learning_rate": 0.0018692370865805426, "loss": 3.2591, "step": 2727 }, { "epoch": 0.18978051410483843, "grad_norm": 0.97265625, "learning_rate": 0.00186912566089896, "loss": 3.1086, "step": 2728 }, { "epoch": 0.18985008174197363, "grad_norm": 1.2109375, "learning_rate": 0.001869014191087557, "loss": 2.9723, "step": 2729 }, { "epoch": 0.18991964937910885, "grad_norm": 0.8046875, "learning_rate": 0.0018689026771519937, "loss": 3.1802, "step": 2730 }, { "epoch": 0.18998921701624405, "grad_norm": 0.890625, "learning_rate": 0.001868791119097932, "loss": 3.3697, "step": 2731 }, { "epoch": 0.19005878465337925, "grad_norm": 0.84765625, "learning_rate": 0.001868679516931036, "loss": 3.5163, "step": 2732 }, { "epoch": 0.19012835229051445, "grad_norm": 0.83203125, "learning_rate": 0.001868567870656973, "loss": 3.3499, "step": 2733 }, { "epoch": 0.19019791992764964, "grad_norm": 0.8203125, "learning_rate": 0.0018684561802814112, "loss": 3.3363, "step": 2734 }, { "epoch": 0.19026748756478487, "grad_norm": 0.984375, "learning_rate": 0.0018683444458100222, "loss": 2.8722, "step": 2735 }, { "epoch": 0.19033705520192007, "grad_norm": 0.90625, "learning_rate": 0.0018682326672484785, "loss": 3.327, "step": 2736 }, { "epoch": 0.19040662283905527, "grad_norm": 0.88671875, "learning_rate": 0.0018681208446024566, "loss": 3.1799, "step": 2737 }, { "epoch": 0.19047619047619047, "grad_norm": 1.09375, "learning_rate": 0.001868008977877634, "loss": 3.1104, "step": 2738 }, { "epoch": 0.1905457581133257, "grad_norm": 0.890625, "learning_rate": 0.0018678970670796902, "loss": 3.0156, "step": 2739 }, { "epoch": 0.1906153257504609, "grad_norm": 0.7734375, "learning_rate": 0.0018677851122143082, "loss": 3.1659, "step": 2740 }, { "epoch": 0.1906848933875961, "grad_norm": 0.7890625, "learning_rate": 0.0018676731132871718, "loss": 3.354, "step": 2741 }, { "epoch": 0.1907544610247313, "grad_norm": 0.95703125, "learning_rate": 0.0018675610703039682, "loss": 3.3231, "step": 2742 }, { "epoch": 0.1908240286618665, "grad_norm": 0.875, "learning_rate": 0.0018674489832703864, "loss": 3.2926, "step": 2743 }, { "epoch": 0.1908935962990017, "grad_norm": 0.92578125, "learning_rate": 0.0018673368521921177, "loss": 3.3228, "step": 2744 }, { "epoch": 0.1909631639361369, "grad_norm": 0.90625, "learning_rate": 0.001867224677074855, "loss": 2.9053, "step": 2745 }, { "epoch": 0.1910327315732721, "grad_norm": 0.90234375, "learning_rate": 0.0018671124579242944, "loss": 3.0507, "step": 2746 }, { "epoch": 0.1911022992104073, "grad_norm": 0.99609375, "learning_rate": 0.0018670001947461339, "loss": 3.5, "step": 2747 }, { "epoch": 0.19117186684754253, "grad_norm": 1.2265625, "learning_rate": 0.0018668878875460733, "loss": 3.6022, "step": 2748 }, { "epoch": 0.19124143448467773, "grad_norm": 0.8984375, "learning_rate": 0.0018667755363298154, "loss": 3.3031, "step": 2749 }, { "epoch": 0.19131100212181293, "grad_norm": 0.92578125, "learning_rate": 0.0018666631411030645, "loss": 3.531, "step": 2750 }, { "epoch": 0.19138056975894813, "grad_norm": 0.91796875, "learning_rate": 0.0018665507018715277, "loss": 3.1135, "step": 2751 }, { "epoch": 0.19145013739608335, "grad_norm": 0.86328125, "learning_rate": 0.001866438218640914, "loss": 3.5832, "step": 2752 }, { "epoch": 0.19151970503321855, "grad_norm": 1.1875, "learning_rate": 0.0018663256914169346, "loss": 2.8053, "step": 2753 }, { "epoch": 0.19158927267035375, "grad_norm": 0.7890625, "learning_rate": 0.0018662131202053032, "loss": 3.1533, "step": 2754 }, { "epoch": 0.19165884030748895, "grad_norm": 1.0, "learning_rate": 0.0018661005050117359, "loss": 3.1857, "step": 2755 }, { "epoch": 0.19172840794462417, "grad_norm": 0.9296875, "learning_rate": 0.0018659878458419498, "loss": 3.2337, "step": 2756 }, { "epoch": 0.19179797558175937, "grad_norm": 1.0, "learning_rate": 0.0018658751427016664, "loss": 2.8843, "step": 2757 }, { "epoch": 0.19186754321889457, "grad_norm": 1.2421875, "learning_rate": 0.0018657623955966075, "loss": 3.1811, "step": 2758 }, { "epoch": 0.19193711085602977, "grad_norm": 0.99609375, "learning_rate": 0.0018656496045324977, "loss": 3.4058, "step": 2759 }, { "epoch": 0.19200667849316497, "grad_norm": 0.95703125, "learning_rate": 0.0018655367695150642, "loss": 3.3727, "step": 2760 }, { "epoch": 0.1920762461303002, "grad_norm": 0.94140625, "learning_rate": 0.0018654238905500362, "loss": 3.2887, "step": 2761 }, { "epoch": 0.1921458137674354, "grad_norm": 1.0859375, "learning_rate": 0.0018653109676431453, "loss": 3.2733, "step": 2762 }, { "epoch": 0.1922153814045706, "grad_norm": 1.0859375, "learning_rate": 0.0018651980008001247, "loss": 3.2227, "step": 2763 }, { "epoch": 0.1922849490417058, "grad_norm": 0.9921875, "learning_rate": 0.001865084990026711, "loss": 2.544, "step": 2764 }, { "epoch": 0.19235451667884101, "grad_norm": 0.8359375, "learning_rate": 0.0018649719353286411, "loss": 2.9591, "step": 2765 }, { "epoch": 0.1924240843159762, "grad_norm": 0.78125, "learning_rate": 0.0018648588367116568, "loss": 3.5255, "step": 2766 }, { "epoch": 0.1924936519531114, "grad_norm": 1.015625, "learning_rate": 0.0018647456941814995, "loss": 3.4794, "step": 2767 }, { "epoch": 0.1925632195902466, "grad_norm": 1.1640625, "learning_rate": 0.0018646325077439148, "loss": 3.0202, "step": 2768 }, { "epoch": 0.19263278722738184, "grad_norm": 0.78125, "learning_rate": 0.0018645192774046492, "loss": 3.567, "step": 2769 }, { "epoch": 0.19270235486451703, "grad_norm": 0.8671875, "learning_rate": 0.0018644060031694522, "loss": 3.051, "step": 2770 }, { "epoch": 0.19277192250165223, "grad_norm": 0.875, "learning_rate": 0.0018642926850440755, "loss": 2.9566, "step": 2771 }, { "epoch": 0.19284149013878743, "grad_norm": 0.91796875, "learning_rate": 0.0018641793230342726, "loss": 3.2604, "step": 2772 }, { "epoch": 0.19291105777592263, "grad_norm": 0.87890625, "learning_rate": 0.0018640659171457992, "loss": 3.246, "step": 2773 }, { "epoch": 0.19298062541305785, "grad_norm": 1.140625, "learning_rate": 0.0018639524673844143, "loss": 3.4788, "step": 2774 }, { "epoch": 0.19305019305019305, "grad_norm": 1.0078125, "learning_rate": 0.001863838973755877, "loss": 2.9149, "step": 2775 }, { "epoch": 0.19311976068732825, "grad_norm": 1.1328125, "learning_rate": 0.001863725436265951, "loss": 2.9041, "step": 2776 }, { "epoch": 0.19318932832446345, "grad_norm": 1.0390625, "learning_rate": 0.0018636118549204008, "loss": 3.3209, "step": 2777 }, { "epoch": 0.19325889596159868, "grad_norm": 0.85546875, "learning_rate": 0.0018634982297249937, "loss": 3.6583, "step": 2778 }, { "epoch": 0.19332846359873387, "grad_norm": 1.1328125, "learning_rate": 0.001863384560685499, "loss": 3.1681, "step": 2779 }, { "epoch": 0.19339803123586907, "grad_norm": 0.96875, "learning_rate": 0.0018632708478076875, "loss": 3.3337, "step": 2780 }, { "epoch": 0.19346759887300427, "grad_norm": 1.0859375, "learning_rate": 0.0018631570910973342, "loss": 3.3942, "step": 2781 }, { "epoch": 0.1935371665101395, "grad_norm": 0.95703125, "learning_rate": 0.001863043290560214, "loss": 3.2764, "step": 2782 }, { "epoch": 0.1936067341472747, "grad_norm": 1.078125, "learning_rate": 0.0018629294462021058, "loss": 3.1924, "step": 2783 }, { "epoch": 0.1936763017844099, "grad_norm": 1.109375, "learning_rate": 0.0018628155580287897, "loss": 3.2434, "step": 2784 }, { "epoch": 0.1937458694215451, "grad_norm": 1.15625, "learning_rate": 0.0018627016260460486, "loss": 3.2823, "step": 2785 }, { "epoch": 0.1938154370586803, "grad_norm": 1.0546875, "learning_rate": 0.001862587650259667, "loss": 3.1277, "step": 2786 }, { "epoch": 0.19388500469581552, "grad_norm": 0.90625, "learning_rate": 0.0018624736306754324, "loss": 3.0137, "step": 2787 }, { "epoch": 0.19395457233295071, "grad_norm": 1.3125, "learning_rate": 0.0018623595672991342, "loss": 3.1154, "step": 2788 }, { "epoch": 0.1940241399700859, "grad_norm": 0.96875, "learning_rate": 0.0018622454601365636, "loss": 3.1293, "step": 2789 }, { "epoch": 0.1940937076072211, "grad_norm": 1.1015625, "learning_rate": 0.0018621313091935145, "loss": 2.8241, "step": 2790 }, { "epoch": 0.19416327524435634, "grad_norm": 1.1171875, "learning_rate": 0.0018620171144757833, "loss": 3.5697, "step": 2791 }, { "epoch": 0.19423284288149154, "grad_norm": 1.1484375, "learning_rate": 0.0018619028759891676, "loss": 2.8754, "step": 2792 }, { "epoch": 0.19430241051862673, "grad_norm": 1.359375, "learning_rate": 0.0018617885937394685, "loss": 2.8006, "step": 2793 }, { "epoch": 0.19437197815576193, "grad_norm": 1.0546875, "learning_rate": 0.001861674267732488, "loss": 2.903, "step": 2794 }, { "epoch": 0.19444154579289716, "grad_norm": 1.234375, "learning_rate": 0.0018615598979740318, "loss": 3.1794, "step": 2795 }, { "epoch": 0.19451111343003236, "grad_norm": 1.1328125, "learning_rate": 0.0018614454844699062, "loss": 3.3262, "step": 2796 }, { "epoch": 0.19458068106716755, "grad_norm": 1.2734375, "learning_rate": 0.0018613310272259209, "loss": 2.9255, "step": 2797 }, { "epoch": 0.19465024870430275, "grad_norm": 1.234375, "learning_rate": 0.0018612165262478875, "loss": 3.1538, "step": 2798 }, { "epoch": 0.19471981634143795, "grad_norm": 1.375, "learning_rate": 0.0018611019815416197, "loss": 2.8641, "step": 2799 }, { "epoch": 0.19478938397857318, "grad_norm": 1.125, "learning_rate": 0.0018609873931129338, "loss": 2.8544, "step": 2800 }, { "epoch": 0.19485895161570838, "grad_norm": 1.3671875, "learning_rate": 0.0018608727609676476, "loss": 3.2558, "step": 2801 }, { "epoch": 0.19492851925284357, "grad_norm": 1.4140625, "learning_rate": 0.0018607580851115817, "loss": 3.1328, "step": 2802 }, { "epoch": 0.19499808688997877, "grad_norm": 1.484375, "learning_rate": 0.0018606433655505587, "loss": 2.9437, "step": 2803 }, { "epoch": 0.195067654527114, "grad_norm": 1.46875, "learning_rate": 0.0018605286022904037, "loss": 2.9838, "step": 2804 }, { "epoch": 0.1951372221642492, "grad_norm": 2.203125, "learning_rate": 0.0018604137953369439, "loss": 3.0965, "step": 2805 }, { "epoch": 0.1952067898013844, "grad_norm": 1.3359375, "learning_rate": 0.0018602989446960079, "loss": 3.2953, "step": 2806 }, { "epoch": 0.1952763574385196, "grad_norm": 1.46875, "learning_rate": 0.001860184050373428, "loss": 3.4891, "step": 2807 }, { "epoch": 0.1953459250756548, "grad_norm": 1.15625, "learning_rate": 0.0018600691123750374, "loss": 3.2762, "step": 2808 }, { "epoch": 0.19541549271279002, "grad_norm": 1.328125, "learning_rate": 0.0018599541307066727, "loss": 2.7164, "step": 2809 }, { "epoch": 0.19548506034992522, "grad_norm": 1.03125, "learning_rate": 0.0018598391053741717, "loss": 3.2189, "step": 2810 }, { "epoch": 0.19555462798706041, "grad_norm": 1.1640625, "learning_rate": 0.0018597240363833745, "loss": 3.0099, "step": 2811 }, { "epoch": 0.1956241956241956, "grad_norm": 1.1953125, "learning_rate": 0.0018596089237401245, "loss": 2.7148, "step": 2812 }, { "epoch": 0.19569376326133084, "grad_norm": 1.125, "learning_rate": 0.0018594937674502657, "loss": 3.1438, "step": 2813 }, { "epoch": 0.19576333089846604, "grad_norm": 1.359375, "learning_rate": 0.001859378567519646, "loss": 3.3735, "step": 2814 }, { "epoch": 0.19583289853560124, "grad_norm": 1.3828125, "learning_rate": 0.0018592633239541136, "loss": 3.291, "step": 2815 }, { "epoch": 0.19590246617273643, "grad_norm": 1.1875, "learning_rate": 0.0018591480367595213, "loss": 3.312, "step": 2816 }, { "epoch": 0.19597203380987166, "grad_norm": 1.5234375, "learning_rate": 0.0018590327059417216, "loss": 3.276, "step": 2817 }, { "epoch": 0.19604160144700686, "grad_norm": 1.359375, "learning_rate": 0.0018589173315065712, "loss": 2.7598, "step": 2818 }, { "epoch": 0.19611116908414206, "grad_norm": 1.6796875, "learning_rate": 0.001858801913459928, "loss": 3.0982, "step": 2819 }, { "epoch": 0.19618073672127725, "grad_norm": 1.6796875, "learning_rate": 0.0018586864518076523, "loss": 3.0716, "step": 2820 }, { "epoch": 0.19625030435841245, "grad_norm": 1.4921875, "learning_rate": 0.0018585709465556066, "loss": 3.7368, "step": 2821 }, { "epoch": 0.19631987199554768, "grad_norm": 1.6484375, "learning_rate": 0.0018584553977096557, "loss": 3.1684, "step": 2822 }, { "epoch": 0.19638943963268288, "grad_norm": 1.828125, "learning_rate": 0.0018583398052756665, "loss": 2.9681, "step": 2823 }, { "epoch": 0.19645900726981808, "grad_norm": 1.5078125, "learning_rate": 0.0018582241692595089, "loss": 3.0652, "step": 2824 }, { "epoch": 0.19652857490695327, "grad_norm": 1.7265625, "learning_rate": 0.0018581084896670532, "loss": 2.9162, "step": 2825 }, { "epoch": 0.1965981425440885, "grad_norm": 1.9765625, "learning_rate": 0.0018579927665041739, "loss": 3.1073, "step": 2826 }, { "epoch": 0.1966677101812237, "grad_norm": 1.390625, "learning_rate": 0.0018578769997767465, "loss": 2.6965, "step": 2827 }, { "epoch": 0.1967372778183589, "grad_norm": 1.484375, "learning_rate": 0.001857761189490649, "loss": 2.9426, "step": 2828 }, { "epoch": 0.1968068454554941, "grad_norm": 1.7109375, "learning_rate": 0.0018576453356517618, "loss": 3.1776, "step": 2829 }, { "epoch": 0.19687641309262932, "grad_norm": 1.484375, "learning_rate": 0.001857529438265967, "loss": 2.8095, "step": 2830 }, { "epoch": 0.19694598072976452, "grad_norm": 1.8046875, "learning_rate": 0.0018574134973391497, "loss": 3.233, "step": 2831 }, { "epoch": 0.19701554836689972, "grad_norm": 1.6796875, "learning_rate": 0.001857297512877197, "loss": 3.2599, "step": 2832 }, { "epoch": 0.19708511600403492, "grad_norm": 1.5234375, "learning_rate": 0.0018571814848859973, "loss": 2.7753, "step": 2833 }, { "epoch": 0.19715468364117011, "grad_norm": 1.53125, "learning_rate": 0.0018570654133714425, "loss": 2.755, "step": 2834 }, { "epoch": 0.19722425127830534, "grad_norm": 2.15625, "learning_rate": 0.001856949298339426, "loss": 2.9832, "step": 2835 }, { "epoch": 0.19729381891544054, "grad_norm": 2.078125, "learning_rate": 0.0018568331397958435, "loss": 3.0149, "step": 2836 }, { "epoch": 0.19736338655257574, "grad_norm": 1.765625, "learning_rate": 0.0018567169377465928, "loss": 3.0255, "step": 2837 }, { "epoch": 0.19743295418971094, "grad_norm": 1.578125, "learning_rate": 0.0018566006921975741, "loss": 2.6039, "step": 2838 }, { "epoch": 0.19750252182684616, "grad_norm": 1.71875, "learning_rate": 0.0018564844031546902, "loss": 2.7991, "step": 2839 }, { "epoch": 0.19757208946398136, "grad_norm": 1.7734375, "learning_rate": 0.001856368070623845, "loss": 2.8238, "step": 2840 }, { "epoch": 0.19764165710111656, "grad_norm": 2.25, "learning_rate": 0.0018562516946109455, "loss": 2.7847, "step": 2841 }, { "epoch": 0.19771122473825176, "grad_norm": 1.625, "learning_rate": 0.001856135275121901, "loss": 2.5704, "step": 2842 }, { "epoch": 0.19778079237538698, "grad_norm": 4.5625, "learning_rate": 0.0018560188121626224, "loss": 3.1919, "step": 2843 }, { "epoch": 0.19785036001252218, "grad_norm": 2.078125, "learning_rate": 0.0018559023057390235, "loss": 2.3178, "step": 2844 }, { "epoch": 0.19791992764965738, "grad_norm": 2.25, "learning_rate": 0.001855785755857019, "loss": 2.9565, "step": 2845 }, { "epoch": 0.19798949528679258, "grad_norm": 2.125, "learning_rate": 0.0018556691625225277, "loss": 2.532, "step": 2846 }, { "epoch": 0.19805906292392778, "grad_norm": 3.484375, "learning_rate": 0.001855552525741469, "loss": 2.7136, "step": 2847 }, { "epoch": 0.198128630561063, "grad_norm": 2.09375, "learning_rate": 0.0018554358455197652, "loss": 2.5548, "step": 2848 }, { "epoch": 0.1981981981981982, "grad_norm": 2.015625, "learning_rate": 0.0018553191218633415, "loss": 2.5284, "step": 2849 }, { "epoch": 0.1982677658353334, "grad_norm": 2.34375, "learning_rate": 0.0018552023547781231, "loss": 2.6736, "step": 2850 }, { "epoch": 0.1983373334724686, "grad_norm": 2.390625, "learning_rate": 0.0018550855442700403, "loss": 2.8147, "step": 2851 }, { "epoch": 0.19840690110960382, "grad_norm": 2.65625, "learning_rate": 0.0018549686903450234, "loss": 2.7503, "step": 2852 }, { "epoch": 0.19847646874673902, "grad_norm": 2.484375, "learning_rate": 0.0018548517930090057, "loss": 2.8713, "step": 2853 }, { "epoch": 0.19854603638387422, "grad_norm": 2.4375, "learning_rate": 0.0018547348522679225, "loss": 2.7856, "step": 2854 }, { "epoch": 0.19861560402100942, "grad_norm": 2.40625, "learning_rate": 0.0018546178681277119, "loss": 2.6505, "step": 2855 }, { "epoch": 0.19868517165814464, "grad_norm": 2.34375, "learning_rate": 0.0018545008405943136, "loss": 2.6597, "step": 2856 }, { "epoch": 0.19875473929527984, "grad_norm": 2.78125, "learning_rate": 0.0018543837696736694, "loss": 2.4495, "step": 2857 }, { "epoch": 0.19882430693241504, "grad_norm": 2.390625, "learning_rate": 0.001854266655371724, "loss": 2.5478, "step": 2858 }, { "epoch": 0.19889387456955024, "grad_norm": 2.9375, "learning_rate": 0.0018541494976944235, "loss": 2.8368, "step": 2859 }, { "epoch": 0.19896344220668544, "grad_norm": 2.8125, "learning_rate": 0.0018540322966477168, "loss": 2.3955, "step": 2860 }, { "epoch": 0.19903300984382066, "grad_norm": 3.015625, "learning_rate": 0.001853915052237555, "loss": 2.4895, "step": 2861 }, { "epoch": 0.19910257748095586, "grad_norm": 2.640625, "learning_rate": 0.0018537977644698907, "loss": 2.7793, "step": 2862 }, { "epoch": 0.19917214511809106, "grad_norm": 2.34375, "learning_rate": 0.0018536804333506793, "loss": 2.3713, "step": 2863 }, { "epoch": 0.19924171275522626, "grad_norm": 2.578125, "learning_rate": 0.0018535630588858783, "loss": 2.6354, "step": 2864 }, { "epoch": 0.19931128039236148, "grad_norm": 3.390625, "learning_rate": 0.0018534456410814473, "loss": 2.3463, "step": 2865 }, { "epoch": 0.19938084802949668, "grad_norm": 2.859375, "learning_rate": 0.0018533281799433489, "loss": 2.2692, "step": 2866 }, { "epoch": 0.19945041566663188, "grad_norm": 2.90625, "learning_rate": 0.0018532106754775462, "loss": 2.1026, "step": 2867 }, { "epoch": 0.19951998330376708, "grad_norm": 2.140625, "learning_rate": 0.001853093127690006, "loss": 2.2183, "step": 2868 }, { "epoch": 0.1995895509409023, "grad_norm": 2.921875, "learning_rate": 0.0018529755365866967, "loss": 2.5848, "step": 2869 }, { "epoch": 0.1996591185780375, "grad_norm": 3.203125, "learning_rate": 0.001852857902173589, "loss": 2.3433, "step": 2870 }, { "epoch": 0.1997286862151727, "grad_norm": 2.5625, "learning_rate": 0.0018527402244566554, "loss": 2.0217, "step": 2871 }, { "epoch": 0.1997982538523079, "grad_norm": 2.578125, "learning_rate": 0.0018526225034418715, "loss": 2.2611, "step": 2872 }, { "epoch": 0.1998678214894431, "grad_norm": 2.78125, "learning_rate": 0.0018525047391352144, "loss": 2.3769, "step": 2873 }, { "epoch": 0.19993738912657832, "grad_norm": 2.3125, "learning_rate": 0.001852386931542664, "loss": 2.2041, "step": 2874 }, { "epoch": 0.20000695676371352, "grad_norm": 2.90625, "learning_rate": 0.0018522690806702013, "loss": 2.1939, "step": 2875 }, { "epoch": 0.20007652440084872, "grad_norm": 2.671875, "learning_rate": 0.0018521511865238103, "loss": 2.1469, "step": 2876 }, { "epoch": 0.20014609203798392, "grad_norm": 2.515625, "learning_rate": 0.0018520332491094775, "loss": 2.3859, "step": 2877 }, { "epoch": 0.20021565967511915, "grad_norm": 2.609375, "learning_rate": 0.0018519152684331906, "loss": 2.2566, "step": 2878 }, { "epoch": 0.20028522731225434, "grad_norm": 2.46875, "learning_rate": 0.0018517972445009404, "loss": 2.1774, "step": 2879 }, { "epoch": 0.20035479494938954, "grad_norm": 2.328125, "learning_rate": 0.0018516791773187196, "loss": 1.9746, "step": 2880 }, { "epoch": 0.20042436258652474, "grad_norm": 2.375, "learning_rate": 0.0018515610668925228, "loss": 2.3208, "step": 2881 }, { "epoch": 0.20049393022365997, "grad_norm": 2.6875, "learning_rate": 0.0018514429132283476, "loss": 2.1074, "step": 2882 }, { "epoch": 0.20056349786079516, "grad_norm": 2.3125, "learning_rate": 0.0018513247163321925, "loss": 2.1219, "step": 2883 }, { "epoch": 0.20063306549793036, "grad_norm": 2.953125, "learning_rate": 0.0018512064762100594, "loss": 2.3607, "step": 2884 }, { "epoch": 0.20070263313506556, "grad_norm": 2.203125, "learning_rate": 0.0018510881928679517, "loss": 2.1561, "step": 2885 }, { "epoch": 0.20077220077220076, "grad_norm": 2.140625, "learning_rate": 0.0018509698663118754, "loss": 1.8336, "step": 2886 }, { "epoch": 0.20084176840933599, "grad_norm": 2.359375, "learning_rate": 0.0018508514965478384, "loss": 2.1067, "step": 2887 }, { "epoch": 0.20091133604647118, "grad_norm": 2.3125, "learning_rate": 0.0018507330835818513, "loss": 2.0895, "step": 2888 }, { "epoch": 0.20098090368360638, "grad_norm": 2.953125, "learning_rate": 0.0018506146274199261, "loss": 1.8984, "step": 2889 }, { "epoch": 0.20105047132074158, "grad_norm": 2.40625, "learning_rate": 0.0018504961280680777, "loss": 1.9448, "step": 2890 }, { "epoch": 0.2011200389578768, "grad_norm": 2.25, "learning_rate": 0.0018503775855323226, "loss": 1.7361, "step": 2891 }, { "epoch": 0.201189606595012, "grad_norm": 2.40625, "learning_rate": 0.00185025899981868, "loss": 1.8027, "step": 2892 }, { "epoch": 0.2012591742321472, "grad_norm": 3.03125, "learning_rate": 0.0018501403709331706, "loss": 2.1839, "step": 2893 }, { "epoch": 0.2013287418692824, "grad_norm": 2.671875, "learning_rate": 0.0018500216988818186, "loss": 1.9038, "step": 2894 }, { "epoch": 0.20139830950641763, "grad_norm": 2.96875, "learning_rate": 0.0018499029836706491, "loss": 1.8661, "step": 2895 }, { "epoch": 0.20146787714355283, "grad_norm": 2.421875, "learning_rate": 0.0018497842253056898, "loss": 1.9367, "step": 2896 }, { "epoch": 0.20153744478068802, "grad_norm": 3.34375, "learning_rate": 0.0018496654237929709, "loss": 2.0319, "step": 2897 }, { "epoch": 0.20160701241782322, "grad_norm": 2.8125, "learning_rate": 0.0018495465791385243, "loss": 1.7812, "step": 2898 }, { "epoch": 0.20167658005495842, "grad_norm": 2.625, "learning_rate": 0.0018494276913483846, "loss": 2.0751, "step": 2899 }, { "epoch": 0.20174614769209365, "grad_norm": 4.34375, "learning_rate": 0.0018493087604285882, "loss": 2.0154, "step": 2900 }, { "epoch": 0.20181571532922885, "grad_norm": 3.125, "learning_rate": 0.0018491897863851733, "loss": 2.1483, "step": 2901 }, { "epoch": 0.20188528296636404, "grad_norm": 2.59375, "learning_rate": 0.001849070769224182, "loss": 1.8735, "step": 2902 }, { "epoch": 0.20195485060349924, "grad_norm": 2.4375, "learning_rate": 0.001848951708951656, "loss": 1.9557, "step": 2903 }, { "epoch": 0.20202441824063447, "grad_norm": 2.765625, "learning_rate": 0.0018488326055736417, "loss": 1.9589, "step": 2904 }, { "epoch": 0.20209398587776967, "grad_norm": 2.4375, "learning_rate": 0.001848713459096186, "loss": 1.814, "step": 2905 }, { "epoch": 0.20216355351490486, "grad_norm": 2.140625, "learning_rate": 0.0018485942695253387, "loss": 2.0086, "step": 2906 }, { "epoch": 0.20223312115204006, "grad_norm": 2.125, "learning_rate": 0.0018484750368671515, "loss": 1.9076, "step": 2907 }, { "epoch": 0.2023026887891753, "grad_norm": 2.15625, "learning_rate": 0.0018483557611276788, "loss": 1.9679, "step": 2908 }, { "epoch": 0.2023722564263105, "grad_norm": 6.5625, "learning_rate": 0.0018482364423129762, "loss": 1.9789, "step": 2909 }, { "epoch": 0.20244182406344569, "grad_norm": 2.1875, "learning_rate": 0.0018481170804291029, "loss": 1.7068, "step": 2910 }, { "epoch": 0.20251139170058088, "grad_norm": 2.90625, "learning_rate": 0.0018479976754821187, "loss": 1.8457, "step": 2911 }, { "epoch": 0.20258095933771608, "grad_norm": 1.9921875, "learning_rate": 0.001847878227478087, "loss": 1.8104, "step": 2912 }, { "epoch": 0.2026505269748513, "grad_norm": 2.484375, "learning_rate": 0.0018477587364230726, "loss": 2.1387, "step": 2913 }, { "epoch": 0.2027200946119865, "grad_norm": 2.328125, "learning_rate": 0.0018476392023231423, "loss": 1.9466, "step": 2914 }, { "epoch": 0.2027896622491217, "grad_norm": 2.015625, "learning_rate": 0.001847519625184366, "loss": 1.7601, "step": 2915 }, { "epoch": 0.2028592298862569, "grad_norm": 2.0625, "learning_rate": 0.0018474000050128147, "loss": 1.6999, "step": 2916 }, { "epoch": 0.20292879752339213, "grad_norm": 3.03125, "learning_rate": 0.0018472803418145625, "loss": 1.6827, "step": 2917 }, { "epoch": 0.20299836516052733, "grad_norm": 2.015625, "learning_rate": 0.001847160635595685, "loss": 1.7657, "step": 2918 }, { "epoch": 0.20306793279766253, "grad_norm": 2.265625, "learning_rate": 0.0018470408863622608, "loss": 1.7335, "step": 2919 }, { "epoch": 0.20313750043479772, "grad_norm": 2.078125, "learning_rate": 0.0018469210941203698, "loss": 1.7775, "step": 2920 }, { "epoch": 0.20320706807193295, "grad_norm": 2.765625, "learning_rate": 0.001846801258876094, "loss": 1.6758, "step": 2921 }, { "epoch": 0.20327663570906815, "grad_norm": 2.296875, "learning_rate": 0.0018466813806355187, "loss": 1.8042, "step": 2922 }, { "epoch": 0.20334620334620335, "grad_norm": 2.0625, "learning_rate": 0.0018465614594047307, "loss": 1.8612, "step": 2923 }, { "epoch": 0.20341577098333855, "grad_norm": 2.0625, "learning_rate": 0.0018464414951898185, "loss": 1.7777, "step": 2924 }, { "epoch": 0.20348533862047374, "grad_norm": 2.53125, "learning_rate": 0.0018463214879968735, "loss": 1.7603, "step": 2925 }, { "epoch": 0.20355490625760897, "grad_norm": 2.0625, "learning_rate": 0.0018462014378319892, "loss": 1.7786, "step": 2926 }, { "epoch": 0.20362447389474417, "grad_norm": 2.09375, "learning_rate": 0.0018460813447012613, "loss": 1.666, "step": 2927 }, { "epoch": 0.20369404153187937, "grad_norm": 2.46875, "learning_rate": 0.0018459612086107868, "loss": 1.8724, "step": 2928 }, { "epoch": 0.20376360916901456, "grad_norm": 4.3125, "learning_rate": 0.0018458410295666664, "loss": 2.3202, "step": 2929 }, { "epoch": 0.2038331768061498, "grad_norm": 2.484375, "learning_rate": 0.0018457208075750018, "loss": 1.7222, "step": 2930 }, { "epoch": 0.203902744443285, "grad_norm": 2.078125, "learning_rate": 0.0018456005426418973, "loss": 1.6514, "step": 2931 }, { "epoch": 0.2039723120804202, "grad_norm": 2.078125, "learning_rate": 0.0018454802347734596, "loss": 1.9026, "step": 2932 }, { "epoch": 0.20404187971755539, "grad_norm": 2.4375, "learning_rate": 0.0018453598839757968, "loss": 1.6522, "step": 2933 }, { "epoch": 0.2041114473546906, "grad_norm": 2.03125, "learning_rate": 0.0018452394902550202, "loss": 1.6696, "step": 2934 }, { "epoch": 0.2041810149918258, "grad_norm": 2.046875, "learning_rate": 0.0018451190536172427, "loss": 1.6172, "step": 2935 }, { "epoch": 0.204250582628961, "grad_norm": 4.5, "learning_rate": 0.0018449985740685794, "loss": 1.675, "step": 2936 }, { "epoch": 0.2043201502660962, "grad_norm": 2.125, "learning_rate": 0.0018448780516151474, "loss": 1.7131, "step": 2937 }, { "epoch": 0.2043897179032314, "grad_norm": 2.28125, "learning_rate": 0.0018447574862630663, "loss": 1.7751, "step": 2938 }, { "epoch": 0.20445928554036663, "grad_norm": 1.9453125, "learning_rate": 0.0018446368780184583, "loss": 1.5138, "step": 2939 }, { "epoch": 0.20452885317750183, "grad_norm": 2.140625, "learning_rate": 0.0018445162268874466, "loss": 1.7987, "step": 2940 }, { "epoch": 0.20459842081463703, "grad_norm": 2.0625, "learning_rate": 0.0018443955328761579, "loss": 1.6765, "step": 2941 }, { "epoch": 0.20466798845177223, "grad_norm": 2.0, "learning_rate": 0.00184427479599072, "loss": 1.8651, "step": 2942 }, { "epoch": 0.20473755608890745, "grad_norm": 3.046875, "learning_rate": 0.0018441540162372632, "loss": 1.6984, "step": 2943 }, { "epoch": 0.20480712372604265, "grad_norm": 2.65625, "learning_rate": 0.0018440331936219207, "loss": 1.554, "step": 2944 }, { "epoch": 0.20487669136317785, "grad_norm": 2.625, "learning_rate": 0.0018439123281508265, "loss": 1.8372, "step": 2945 }, { "epoch": 0.20494625900031305, "grad_norm": 1.875, "learning_rate": 0.0018437914198301182, "loss": 1.6942, "step": 2946 }, { "epoch": 0.20501582663744825, "grad_norm": 1.890625, "learning_rate": 0.0018436704686659346, "loss": 1.6305, "step": 2947 }, { "epoch": 0.20508539427458347, "grad_norm": 2.375, "learning_rate": 0.0018435494746644168, "loss": 1.6892, "step": 2948 }, { "epoch": 0.20515496191171867, "grad_norm": 2.03125, "learning_rate": 0.0018434284378317086, "loss": 1.6875, "step": 2949 }, { "epoch": 0.20522452954885387, "grad_norm": 2.28125, "learning_rate": 0.0018433073581739555, "loss": 1.799, "step": 2950 }, { "epoch": 0.20529409718598907, "grad_norm": 2.09375, "learning_rate": 0.0018431862356973056, "loss": 1.9487, "step": 2951 }, { "epoch": 0.2053636648231243, "grad_norm": 1.9609375, "learning_rate": 0.001843065070407908, "loss": 1.5475, "step": 2952 }, { "epoch": 0.2054332324602595, "grad_norm": 2.0, "learning_rate": 0.0018429438623119162, "loss": 1.4665, "step": 2953 }, { "epoch": 0.2055028000973947, "grad_norm": 1.9296875, "learning_rate": 0.0018428226114154832, "loss": 1.5239, "step": 2954 }, { "epoch": 0.2055723677345299, "grad_norm": 2.015625, "learning_rate": 0.0018427013177247664, "loss": 1.602, "step": 2955 }, { "epoch": 0.2056419353716651, "grad_norm": 1.7109375, "learning_rate": 0.0018425799812459244, "loss": 1.4334, "step": 2956 }, { "epoch": 0.2057115030088003, "grad_norm": 2.40625, "learning_rate": 0.0018424586019851175, "loss": 1.825, "step": 2957 }, { "epoch": 0.2057810706459355, "grad_norm": 2.796875, "learning_rate": 0.0018423371799485095, "loss": 1.6815, "step": 2958 }, { "epoch": 0.2058506382830707, "grad_norm": 1.828125, "learning_rate": 0.001842215715142265, "loss": 1.6816, "step": 2959 }, { "epoch": 0.2059202059202059, "grad_norm": 1.9609375, "learning_rate": 0.0018420942075725514, "loss": 1.4627, "step": 2960 }, { "epoch": 0.20598977355734113, "grad_norm": 2.375, "learning_rate": 0.0018419726572455387, "loss": 1.7165, "step": 2961 }, { "epoch": 0.20605934119447633, "grad_norm": 2.21875, "learning_rate": 0.0018418510641673982, "loss": 1.6336, "step": 2962 }, { "epoch": 0.20612890883161153, "grad_norm": 1.640625, "learning_rate": 0.001841729428344304, "loss": 1.2457, "step": 2963 }, { "epoch": 0.20619847646874673, "grad_norm": 1.6796875, "learning_rate": 0.001841607749782432, "loss": 1.6383, "step": 2964 }, { "epoch": 0.20626804410588195, "grad_norm": 1.703125, "learning_rate": 0.0018414860284879603, "loss": 1.4405, "step": 2965 }, { "epoch": 0.20633761174301715, "grad_norm": 2.0, "learning_rate": 0.0018413642644670696, "loss": 1.8695, "step": 2966 }, { "epoch": 0.20640717938015235, "grad_norm": 1.5703125, "learning_rate": 0.0018412424577259423, "loss": 1.6998, "step": 2967 }, { "epoch": 0.20647674701728755, "grad_norm": 1.7578125, "learning_rate": 0.0018411206082707633, "loss": 1.4037, "step": 2968 }, { "epoch": 0.20654631465442277, "grad_norm": 2.3125, "learning_rate": 0.0018409987161077193, "loss": 1.4348, "step": 2969 }, { "epoch": 0.20661588229155797, "grad_norm": 1.8828125, "learning_rate": 0.0018408767812429993, "loss": 1.6154, "step": 2970 }, { "epoch": 0.20668544992869317, "grad_norm": 1.8046875, "learning_rate": 0.001840754803682795, "loss": 1.5229, "step": 2971 }, { "epoch": 0.20675501756582837, "grad_norm": 1.9296875, "learning_rate": 0.0018406327834332994, "loss": 1.7966, "step": 2972 }, { "epoch": 0.20682458520296357, "grad_norm": 1.984375, "learning_rate": 0.0018405107205007082, "loss": 1.7795, "step": 2973 }, { "epoch": 0.2068941528400988, "grad_norm": 1.7109375, "learning_rate": 0.0018403886148912188, "loss": 1.6348, "step": 2974 }, { "epoch": 0.206963720477234, "grad_norm": 2.03125, "learning_rate": 0.0018402664666110316, "loss": 1.429, "step": 2975 }, { "epoch": 0.2070332881143692, "grad_norm": 1.8046875, "learning_rate": 0.0018401442756663484, "loss": 1.4652, "step": 2976 }, { "epoch": 0.2071028557515044, "grad_norm": 1.7578125, "learning_rate": 0.0018400220420633736, "loss": 1.3845, "step": 2977 }, { "epoch": 0.20717242338863961, "grad_norm": 1.9609375, "learning_rate": 0.0018398997658083136, "loss": 1.7564, "step": 2978 }, { "epoch": 0.2072419910257748, "grad_norm": 2.046875, "learning_rate": 0.0018397774469073767, "loss": 1.7363, "step": 2979 }, { "epoch": 0.20731155866291, "grad_norm": 1.640625, "learning_rate": 0.0018396550853667741, "loss": 1.3616, "step": 2980 }, { "epoch": 0.2073811263000452, "grad_norm": 1.6953125, "learning_rate": 0.0018395326811927182, "loss": 1.2341, "step": 2981 }, { "epoch": 0.20745069393718044, "grad_norm": 1.6015625, "learning_rate": 0.0018394102343914245, "loss": 1.4868, "step": 2982 }, { "epoch": 0.20752026157431563, "grad_norm": 1.921875, "learning_rate": 0.0018392877449691098, "loss": 1.5404, "step": 2983 }, { "epoch": 0.20758982921145083, "grad_norm": 2.96875, "learning_rate": 0.0018391652129319941, "loss": 1.5061, "step": 2984 }, { "epoch": 0.20765939684858603, "grad_norm": 1.671875, "learning_rate": 0.001839042638286298, "loss": 1.4491, "step": 2985 }, { "epoch": 0.20772896448572123, "grad_norm": 2.046875, "learning_rate": 0.0018389200210382464, "loss": 1.4995, "step": 2986 }, { "epoch": 0.20779853212285646, "grad_norm": 1.8828125, "learning_rate": 0.0018387973611940645, "loss": 1.7329, "step": 2987 }, { "epoch": 0.20786809975999165, "grad_norm": 2.3125, "learning_rate": 0.0018386746587599804, "loss": 1.326, "step": 2988 }, { "epoch": 0.20793766739712685, "grad_norm": 1.7421875, "learning_rate": 0.001838551913742224, "loss": 1.3855, "step": 2989 }, { "epoch": 0.20800723503426205, "grad_norm": 1.6484375, "learning_rate": 0.0018384291261470285, "loss": 1.4359, "step": 2990 }, { "epoch": 0.20807680267139728, "grad_norm": 2.015625, "learning_rate": 0.0018383062959806279, "loss": 1.6186, "step": 2991 }, { "epoch": 0.20814637030853247, "grad_norm": 1.78125, "learning_rate": 0.0018381834232492587, "loss": 1.6195, "step": 2992 }, { "epoch": 0.20821593794566767, "grad_norm": 1.4296875, "learning_rate": 0.0018380605079591603, "loss": 1.2774, "step": 2993 }, { "epoch": 0.20828550558280287, "grad_norm": 1.875, "learning_rate": 0.0018379375501165734, "loss": 1.4691, "step": 2994 }, { "epoch": 0.2083550732199381, "grad_norm": 1.6796875, "learning_rate": 0.0018378145497277409, "loss": 1.3312, "step": 2995 }, { "epoch": 0.2084246408570733, "grad_norm": 1.640625, "learning_rate": 0.001837691506798909, "loss": 1.5309, "step": 2996 }, { "epoch": 0.2084942084942085, "grad_norm": 1.8671875, "learning_rate": 0.0018375684213363243, "loss": 1.5467, "step": 2997 }, { "epoch": 0.2085637761313437, "grad_norm": 1.7265625, "learning_rate": 0.001837445293346237, "loss": 1.6222, "step": 2998 }, { "epoch": 0.2086333437684789, "grad_norm": 1.59375, "learning_rate": 0.0018373221228348987, "loss": 1.4877, "step": 2999 }, { "epoch": 0.20870291140561412, "grad_norm": 2.421875, "learning_rate": 0.0018371989098085633, "loss": 1.4715, "step": 3000 }, { "epoch": 0.20877247904274931, "grad_norm": 1.9609375, "learning_rate": 0.0018370756542734872, "loss": 1.6579, "step": 3001 }, { "epoch": 0.2088420466798845, "grad_norm": 2.203125, "learning_rate": 0.0018369523562359285, "loss": 1.6065, "step": 3002 }, { "epoch": 0.2089116143170197, "grad_norm": 1.75, "learning_rate": 0.0018368290157021474, "loss": 1.3236, "step": 3003 }, { "epoch": 0.20898118195415494, "grad_norm": 1.640625, "learning_rate": 0.0018367056326784074, "loss": 1.462, "step": 3004 }, { "epoch": 0.20905074959129014, "grad_norm": 1.796875, "learning_rate": 0.0018365822071709724, "loss": 1.4751, "step": 3005 }, { "epoch": 0.20912031722842533, "grad_norm": 1.8125, "learning_rate": 0.0018364587391861095, "loss": 1.385, "step": 3006 }, { "epoch": 0.20918988486556053, "grad_norm": 1.71875, "learning_rate": 0.0018363352287300877, "loss": 1.3311, "step": 3007 }, { "epoch": 0.20925945250269576, "grad_norm": 2.15625, "learning_rate": 0.001836211675809179, "loss": 1.6793, "step": 3008 }, { "epoch": 0.20932902013983096, "grad_norm": 1.65625, "learning_rate": 0.001836088080429656, "loss": 1.4785, "step": 3009 }, { "epoch": 0.20939858777696616, "grad_norm": 1.796875, "learning_rate": 0.0018359644425977942, "loss": 1.5472, "step": 3010 }, { "epoch": 0.20946815541410135, "grad_norm": 1.96875, "learning_rate": 0.0018358407623198718, "loss": 1.3917, "step": 3011 }, { "epoch": 0.20953772305123655, "grad_norm": 1.796875, "learning_rate": 0.0018357170396021685, "loss": 1.3381, "step": 3012 }, { "epoch": 0.20960729068837178, "grad_norm": 1.6640625, "learning_rate": 0.0018355932744509662, "loss": 1.3268, "step": 3013 }, { "epoch": 0.20967685832550698, "grad_norm": 1.4296875, "learning_rate": 0.0018354694668725488, "loss": 1.5767, "step": 3014 }, { "epoch": 0.20974642596264217, "grad_norm": 1.546875, "learning_rate": 0.0018353456168732035, "loss": 1.5286, "step": 3015 }, { "epoch": 0.20981599359977737, "grad_norm": 1.359375, "learning_rate": 0.001835221724459218, "loss": 1.3449, "step": 3016 }, { "epoch": 0.2098855612369126, "grad_norm": 2.03125, "learning_rate": 0.0018350977896368832, "loss": 1.4017, "step": 3017 }, { "epoch": 0.2099551288740478, "grad_norm": 1.875, "learning_rate": 0.0018349738124124918, "loss": 1.4264, "step": 3018 }, { "epoch": 0.210024696511183, "grad_norm": 1.84375, "learning_rate": 0.0018348497927923387, "loss": 1.4162, "step": 3019 }, { "epoch": 0.2100942641483182, "grad_norm": 1.96875, "learning_rate": 0.0018347257307827212, "loss": 1.407, "step": 3020 }, { "epoch": 0.21016383178545342, "grad_norm": 1.8984375, "learning_rate": 0.0018346016263899383, "loss": 1.4882, "step": 3021 }, { "epoch": 0.21023339942258862, "grad_norm": 1.7734375, "learning_rate": 0.0018344774796202916, "loss": 1.4429, "step": 3022 }, { "epoch": 0.21030296705972382, "grad_norm": 1.3671875, "learning_rate": 0.0018343532904800846, "loss": 1.3768, "step": 3023 }, { "epoch": 0.21037253469685901, "grad_norm": 1.4921875, "learning_rate": 0.0018342290589756227, "loss": 1.2807, "step": 3024 }, { "epoch": 0.2104421023339942, "grad_norm": 1.890625, "learning_rate": 0.0018341047851132143, "loss": 1.4446, "step": 3025 }, { "epoch": 0.21051166997112944, "grad_norm": 2.28125, "learning_rate": 0.001833980468899169, "loss": 1.412, "step": 3026 }, { "epoch": 0.21058123760826464, "grad_norm": 1.6015625, "learning_rate": 0.001833856110339799, "loss": 1.3339, "step": 3027 }, { "epoch": 0.21065080524539984, "grad_norm": 1.59375, "learning_rate": 0.0018337317094414187, "loss": 1.5386, "step": 3028 }, { "epoch": 0.21072037288253503, "grad_norm": 1.375, "learning_rate": 0.0018336072662103447, "loss": 1.5191, "step": 3029 }, { "epoch": 0.21078994051967026, "grad_norm": 1.4609375, "learning_rate": 0.0018334827806528954, "loss": 1.5986, "step": 3030 }, { "epoch": 0.21085950815680546, "grad_norm": 1.4921875, "learning_rate": 0.0018333582527753913, "loss": 1.4021, "step": 3031 }, { "epoch": 0.21092907579394066, "grad_norm": 1.6953125, "learning_rate": 0.0018332336825841557, "loss": 1.5484, "step": 3032 }, { "epoch": 0.21099864343107586, "grad_norm": 1.5390625, "learning_rate": 0.0018331090700855134, "loss": 1.4599, "step": 3033 }, { "epoch": 0.21106821106821108, "grad_norm": 1.6796875, "learning_rate": 0.001832984415285792, "loss": 1.5886, "step": 3034 }, { "epoch": 0.21113777870534628, "grad_norm": 1.6171875, "learning_rate": 0.0018328597181913203, "loss": 1.5175, "step": 3035 }, { "epoch": 0.21120734634248148, "grad_norm": 1.859375, "learning_rate": 0.0018327349788084303, "loss": 1.4476, "step": 3036 }, { "epoch": 0.21127691397961668, "grad_norm": 1.71875, "learning_rate": 0.001832610197143455, "loss": 1.539, "step": 3037 }, { "epoch": 0.21134648161675187, "grad_norm": 1.5546875, "learning_rate": 0.0018324853732027307, "loss": 1.6018, "step": 3038 }, { "epoch": 0.2114160492538871, "grad_norm": 1.546875, "learning_rate": 0.0018323605069925954, "loss": 1.7061, "step": 3039 }, { "epoch": 0.2114856168910223, "grad_norm": 1.78125, "learning_rate": 0.0018322355985193885, "loss": 1.5131, "step": 3040 }, { "epoch": 0.2115551845281575, "grad_norm": 1.5, "learning_rate": 0.001832110647789453, "loss": 1.5458, "step": 3041 }, { "epoch": 0.2116247521652927, "grad_norm": 1.453125, "learning_rate": 0.0018319856548091329, "loss": 1.3315, "step": 3042 }, { "epoch": 0.21169431980242792, "grad_norm": 1.40625, "learning_rate": 0.0018318606195847748, "loss": 1.2364, "step": 3043 }, { "epoch": 0.21176388743956312, "grad_norm": 1.546875, "learning_rate": 0.0018317355421227273, "loss": 1.3256, "step": 3044 }, { "epoch": 0.21183345507669832, "grad_norm": 1.7265625, "learning_rate": 0.0018316104224293413, "loss": 1.2375, "step": 3045 }, { "epoch": 0.21190302271383352, "grad_norm": 1.75, "learning_rate": 0.0018314852605109695, "loss": 1.2515, "step": 3046 }, { "epoch": 0.21197259035096874, "grad_norm": 1.53125, "learning_rate": 0.0018313600563739673, "loss": 1.3807, "step": 3047 }, { "epoch": 0.21204215798810394, "grad_norm": 1.6171875, "learning_rate": 0.0018312348100246918, "loss": 1.3903, "step": 3048 }, { "epoch": 0.21211172562523914, "grad_norm": 2.1875, "learning_rate": 0.0018311095214695024, "loss": 1.9212, "step": 3049 }, { "epoch": 0.21218129326237434, "grad_norm": 1.78125, "learning_rate": 0.0018309841907147605, "loss": 1.1514, "step": 3050 }, { "epoch": 0.21225086089950954, "grad_norm": 1.5, "learning_rate": 0.00183085881776683, "loss": 1.3666, "step": 3051 }, { "epoch": 0.21232042853664476, "grad_norm": 1.8125, "learning_rate": 0.0018307334026320765, "loss": 1.6806, "step": 3052 }, { "epoch": 0.21238999617377996, "grad_norm": 1.53125, "learning_rate": 0.0018306079453168681, "loss": 1.5053, "step": 3053 }, { "epoch": 0.21245956381091516, "grad_norm": 1.734375, "learning_rate": 0.0018304824458275745, "loss": 1.2886, "step": 3054 }, { "epoch": 0.21252913144805036, "grad_norm": 1.3359375, "learning_rate": 0.0018303569041705685, "loss": 1.4875, "step": 3055 }, { "epoch": 0.21259869908518558, "grad_norm": 1.3671875, "learning_rate": 0.0018302313203522242, "loss": 1.5887, "step": 3056 }, { "epoch": 0.21266826672232078, "grad_norm": 1.59375, "learning_rate": 0.0018301056943789181, "loss": 1.4743, "step": 3057 }, { "epoch": 0.21273783435945598, "grad_norm": 2.03125, "learning_rate": 0.001829980026257029, "loss": 1.2388, "step": 3058 }, { "epoch": 0.21280740199659118, "grad_norm": 1.78125, "learning_rate": 0.0018298543159929372, "loss": 1.6247, "step": 3059 }, { "epoch": 0.2128769696337264, "grad_norm": 1.7734375, "learning_rate": 0.0018297285635930265, "loss": 1.4666, "step": 3060 }, { "epoch": 0.2129465372708616, "grad_norm": 1.5390625, "learning_rate": 0.0018296027690636813, "loss": 1.4244, "step": 3061 }, { "epoch": 0.2130161049079968, "grad_norm": 1.4296875, "learning_rate": 0.001829476932411289, "loss": 1.3429, "step": 3062 }, { "epoch": 0.213085672545132, "grad_norm": 1.96875, "learning_rate": 0.0018293510536422388, "loss": 1.1101, "step": 3063 }, { "epoch": 0.2131552401822672, "grad_norm": 1.2734375, "learning_rate": 0.0018292251327629225, "loss": 1.2017, "step": 3064 }, { "epoch": 0.21322480781940242, "grad_norm": 1.328125, "learning_rate": 0.0018290991697797335, "loss": 1.3601, "step": 3065 }, { "epoch": 0.21329437545653762, "grad_norm": 2.015625, "learning_rate": 0.0018289731646990678, "loss": 1.2416, "step": 3066 }, { "epoch": 0.21336394309367282, "grad_norm": 1.8125, "learning_rate": 0.0018288471175273227, "loss": 1.4516, "step": 3067 }, { "epoch": 0.21343351073080802, "grad_norm": 1.8359375, "learning_rate": 0.001828721028270899, "loss": 1.245, "step": 3068 }, { "epoch": 0.21350307836794324, "grad_norm": 2.1875, "learning_rate": 0.0018285948969361985, "loss": 1.4108, "step": 3069 }, { "epoch": 0.21357264600507844, "grad_norm": 1.5703125, "learning_rate": 0.0018284687235296255, "loss": 1.1665, "step": 3070 }, { "epoch": 0.21364221364221364, "grad_norm": 1.9296875, "learning_rate": 0.0018283425080575866, "loss": 1.4434, "step": 3071 }, { "epoch": 0.21371178127934884, "grad_norm": 1.5390625, "learning_rate": 0.00182821625052649, "loss": 1.3012, "step": 3072 }, { "epoch": 0.21378134891648407, "grad_norm": 2.046875, "learning_rate": 0.001828089950942747, "loss": 1.5307, "step": 3073 }, { "epoch": 0.21385091655361926, "grad_norm": 1.4375, "learning_rate": 0.0018279636093127705, "loss": 1.18, "step": 3074 }, { "epoch": 0.21392048419075446, "grad_norm": 1.5234375, "learning_rate": 0.0018278372256429747, "loss": 1.4393, "step": 3075 }, { "epoch": 0.21399005182788966, "grad_norm": 1.625, "learning_rate": 0.0018277107999397774, "loss": 1.3494, "step": 3076 }, { "epoch": 0.21405961946502486, "grad_norm": 1.46875, "learning_rate": 0.0018275843322095974, "loss": 1.3145, "step": 3077 }, { "epoch": 0.21412918710216008, "grad_norm": 1.578125, "learning_rate": 0.0018274578224588564, "loss": 1.6444, "step": 3078 }, { "epoch": 0.21419875473929528, "grad_norm": 1.4453125, "learning_rate": 0.001827331270693978, "loss": 1.2429, "step": 3079 }, { "epoch": 0.21426832237643048, "grad_norm": 1.6640625, "learning_rate": 0.0018272046769213879, "loss": 1.5588, "step": 3080 }, { "epoch": 0.21433789001356568, "grad_norm": 1.8203125, "learning_rate": 0.0018270780411475133, "loss": 1.5714, "step": 3081 }, { "epoch": 0.2144074576507009, "grad_norm": 1.6875, "learning_rate": 0.0018269513633787848, "loss": 1.2007, "step": 3082 }, { "epoch": 0.2144770252878361, "grad_norm": 1.5078125, "learning_rate": 0.0018268246436216342, "loss": 1.2613, "step": 3083 }, { "epoch": 0.2145465929249713, "grad_norm": 1.5, "learning_rate": 0.0018266978818824958, "loss": 1.6095, "step": 3084 }, { "epoch": 0.2146161605621065, "grad_norm": 1.5859375, "learning_rate": 0.0018265710781678055, "loss": 1.3277, "step": 3085 }, { "epoch": 0.21468572819924173, "grad_norm": 1.65625, "learning_rate": 0.0018264442324840025, "loss": 1.4692, "step": 3086 }, { "epoch": 0.21475529583637692, "grad_norm": 1.4296875, "learning_rate": 0.0018263173448375266, "loss": 1.3238, "step": 3087 }, { "epoch": 0.21482486347351212, "grad_norm": 1.5390625, "learning_rate": 0.0018261904152348212, "loss": 1.2368, "step": 3088 }, { "epoch": 0.21489443111064732, "grad_norm": 1.71875, "learning_rate": 0.0018260634436823304, "loss": 1.4299, "step": 3089 }, { "epoch": 0.21496399874778252, "grad_norm": 1.2734375, "learning_rate": 0.001825936430186502, "loss": 1.3339, "step": 3090 }, { "epoch": 0.21503356638491775, "grad_norm": 1.3828125, "learning_rate": 0.0018258093747537845, "loss": 1.3982, "step": 3091 }, { "epoch": 0.21510313402205294, "grad_norm": 2.25, "learning_rate": 0.0018256822773906297, "loss": 1.451, "step": 3092 }, { "epoch": 0.21517270165918814, "grad_norm": 1.8203125, "learning_rate": 0.00182555513810349, "loss": 1.56, "step": 3093 }, { "epoch": 0.21524226929632334, "grad_norm": 1.703125, "learning_rate": 0.0018254279568988218, "loss": 1.1239, "step": 3094 }, { "epoch": 0.21531183693345857, "grad_norm": 1.5, "learning_rate": 0.0018253007337830824, "loss": 1.2984, "step": 3095 }, { "epoch": 0.21538140457059377, "grad_norm": 1.75, "learning_rate": 0.0018251734687627318, "loss": 1.0488, "step": 3096 }, { "epoch": 0.21545097220772896, "grad_norm": 1.765625, "learning_rate": 0.0018250461618442312, "loss": 1.0971, "step": 3097 }, { "epoch": 0.21552053984486416, "grad_norm": 1.484375, "learning_rate": 0.0018249188130340453, "loss": 1.3812, "step": 3098 }, { "epoch": 0.21559010748199936, "grad_norm": 1.3359375, "learning_rate": 0.0018247914223386398, "loss": 1.4449, "step": 3099 }, { "epoch": 0.2156596751191346, "grad_norm": 1.5, "learning_rate": 0.0018246639897644835, "loss": 1.3833, "step": 3100 }, { "epoch": 0.21572924275626978, "grad_norm": 1.265625, "learning_rate": 0.001824536515318046, "loss": 1.4897, "step": 3101 }, { "epoch": 0.21579881039340498, "grad_norm": 2.21875, "learning_rate": 0.0018244089990058004, "loss": 1.369, "step": 3102 }, { "epoch": 0.21586837803054018, "grad_norm": 2.15625, "learning_rate": 0.0018242814408342212, "loss": 1.5356, "step": 3103 }, { "epoch": 0.2159379456676754, "grad_norm": 1.734375, "learning_rate": 0.0018241538408097849, "loss": 1.2144, "step": 3104 }, { "epoch": 0.2160075133048106, "grad_norm": 1.375, "learning_rate": 0.001824026198938971, "loss": 1.2871, "step": 3105 }, { "epoch": 0.2160770809419458, "grad_norm": 1.359375, "learning_rate": 0.0018238985152282598, "loss": 1.4326, "step": 3106 }, { "epoch": 0.216146648579081, "grad_norm": 1.4609375, "learning_rate": 0.0018237707896841347, "loss": 1.3786, "step": 3107 }, { "epoch": 0.21621621621621623, "grad_norm": 2.015625, "learning_rate": 0.0018236430223130813, "loss": 1.4567, "step": 3108 }, { "epoch": 0.21628578385335143, "grad_norm": 1.5703125, "learning_rate": 0.0018235152131215867, "loss": 1.397, "step": 3109 }, { "epoch": 0.21635535149048662, "grad_norm": 2.03125, "learning_rate": 0.0018233873621161401, "loss": 1.3467, "step": 3110 }, { "epoch": 0.21642491912762182, "grad_norm": 2.0625, "learning_rate": 0.0018232594693032337, "loss": 1.4307, "step": 3111 }, { "epoch": 0.21649448676475702, "grad_norm": 1.3125, "learning_rate": 0.001823131534689361, "loss": 1.3591, "step": 3112 }, { "epoch": 0.21656405440189225, "grad_norm": 1.5546875, "learning_rate": 0.0018230035582810175, "loss": 1.3185, "step": 3113 }, { "epoch": 0.21663362203902745, "grad_norm": 1.5703125, "learning_rate": 0.0018228755400847016, "loss": 1.3026, "step": 3114 }, { "epoch": 0.21670318967616264, "grad_norm": 1.3125, "learning_rate": 0.0018227474801069136, "loss": 1.2597, "step": 3115 }, { "epoch": 0.21677275731329784, "grad_norm": 1.6484375, "learning_rate": 0.0018226193783541557, "loss": 1.2826, "step": 3116 }, { "epoch": 0.21684232495043307, "grad_norm": 1.765625, "learning_rate": 0.0018224912348329316, "loss": 1.2702, "step": 3117 }, { "epoch": 0.21691189258756827, "grad_norm": 1.46875, "learning_rate": 0.0018223630495497484, "loss": 1.1387, "step": 3118 }, { "epoch": 0.21698146022470347, "grad_norm": 1.4765625, "learning_rate": 0.001822234822511115, "loss": 1.3643, "step": 3119 }, { "epoch": 0.21705102786183866, "grad_norm": 1.3828125, "learning_rate": 0.0018221065537235412, "loss": 1.0964, "step": 3120 }, { "epoch": 0.2171205954989739, "grad_norm": 1.3984375, "learning_rate": 0.0018219782431935405, "loss": 1.242, "step": 3121 }, { "epoch": 0.2171901631361091, "grad_norm": 1.4921875, "learning_rate": 0.0018218498909276276, "loss": 1.3961, "step": 3122 }, { "epoch": 0.2172597307732443, "grad_norm": 1.5078125, "learning_rate": 0.0018217214969323198, "loss": 1.3259, "step": 3123 }, { "epoch": 0.21732929841037948, "grad_norm": 1.6796875, "learning_rate": 0.001821593061214136, "loss": 1.1908, "step": 3124 }, { "epoch": 0.21739886604751468, "grad_norm": 1.875, "learning_rate": 0.0018214645837795979, "loss": 1.5525, "step": 3125 }, { "epoch": 0.2174684336846499, "grad_norm": 1.5078125, "learning_rate": 0.0018213360646352286, "loss": 1.1041, "step": 3126 }, { "epoch": 0.2175380013217851, "grad_norm": 1.53125, "learning_rate": 0.0018212075037875538, "loss": 1.4062, "step": 3127 }, { "epoch": 0.2176075689589203, "grad_norm": 1.421875, "learning_rate": 0.001821078901243101, "loss": 1.3016, "step": 3128 }, { "epoch": 0.2176771365960555, "grad_norm": 1.4140625, "learning_rate": 0.0018209502570084005, "loss": 1.2827, "step": 3129 }, { "epoch": 0.21774670423319073, "grad_norm": 1.4453125, "learning_rate": 0.0018208215710899833, "loss": 1.5377, "step": 3130 }, { "epoch": 0.21781627187032593, "grad_norm": 1.5703125, "learning_rate": 0.0018206928434943846, "loss": 1.3466, "step": 3131 }, { "epoch": 0.21788583950746113, "grad_norm": 1.6484375, "learning_rate": 0.0018205640742281397, "loss": 1.2992, "step": 3132 }, { "epoch": 0.21795540714459632, "grad_norm": 1.3515625, "learning_rate": 0.001820435263297787, "loss": 1.181, "step": 3133 }, { "epoch": 0.21802497478173155, "grad_norm": 1.6328125, "learning_rate": 0.0018203064107098666, "loss": 1.4374, "step": 3134 }, { "epoch": 0.21809454241886675, "grad_norm": 1.4296875, "learning_rate": 0.0018201775164709219, "loss": 1.1472, "step": 3135 }, { "epoch": 0.21816411005600195, "grad_norm": 1.625, "learning_rate": 0.0018200485805874962, "loss": 1.2548, "step": 3136 }, { "epoch": 0.21823367769313715, "grad_norm": 1.515625, "learning_rate": 0.0018199196030661375, "loss": 1.1976, "step": 3137 }, { "epoch": 0.21830324533027234, "grad_norm": 1.3359375, "learning_rate": 0.001819790583913394, "loss": 1.3096, "step": 3138 }, { "epoch": 0.21837281296740757, "grad_norm": 1.609375, "learning_rate": 0.0018196615231358165, "loss": 1.3699, "step": 3139 }, { "epoch": 0.21844238060454277, "grad_norm": 1.1796875, "learning_rate": 0.0018195324207399587, "loss": 1.2893, "step": 3140 }, { "epoch": 0.21851194824167797, "grad_norm": 1.3828125, "learning_rate": 0.0018194032767323747, "loss": 1.2761, "step": 3141 }, { "epoch": 0.21858151587881317, "grad_norm": 1.515625, "learning_rate": 0.0018192740911196225, "loss": 1.1881, "step": 3142 }, { "epoch": 0.2186510835159484, "grad_norm": 1.375, "learning_rate": 0.001819144863908262, "loss": 1.2316, "step": 3143 }, { "epoch": 0.2187206511530836, "grad_norm": 1.4453125, "learning_rate": 0.0018190155951048534, "loss": 1.3229, "step": 3144 }, { "epoch": 0.2187902187902188, "grad_norm": 1.328125, "learning_rate": 0.0018188862847159616, "loss": 1.2341, "step": 3145 }, { "epoch": 0.218859786427354, "grad_norm": 1.3359375, "learning_rate": 0.0018187569327481512, "loss": 1.1094, "step": 3146 }, { "epoch": 0.2189293540644892, "grad_norm": 1.7578125, "learning_rate": 0.001818627539207991, "loss": 1.2033, "step": 3147 }, { "epoch": 0.2189989217016244, "grad_norm": 1.4609375, "learning_rate": 0.0018184981041020505, "loss": 1.0848, "step": 3148 }, { "epoch": 0.2190684893387596, "grad_norm": 2.265625, "learning_rate": 0.0018183686274369016, "loss": 1.4267, "step": 3149 }, { "epoch": 0.2191380569758948, "grad_norm": 1.5859375, "learning_rate": 0.001818239109219119, "loss": 1.2773, "step": 3150 }, { "epoch": 0.21920762461303, "grad_norm": 1.34375, "learning_rate": 0.0018181095494552784, "loss": 1.3228, "step": 3151 }, { "epoch": 0.21927719225016523, "grad_norm": 1.515625, "learning_rate": 0.0018179799481519586, "loss": 1.2327, "step": 3152 }, { "epoch": 0.21934675988730043, "grad_norm": 1.3359375, "learning_rate": 0.00181785030531574, "loss": 1.3893, "step": 3153 }, { "epoch": 0.21941632752443563, "grad_norm": 1.5078125, "learning_rate": 0.001817720620953205, "loss": 1.3026, "step": 3154 }, { "epoch": 0.21948589516157083, "grad_norm": 1.515625, "learning_rate": 0.0018175908950709384, "loss": 1.1513, "step": 3155 }, { "epoch": 0.21955546279870605, "grad_norm": 1.5, "learning_rate": 0.0018174611276755273, "loss": 1.239, "step": 3156 }, { "epoch": 0.21962503043584125, "grad_norm": 1.6640625, "learning_rate": 0.0018173313187735602, "loss": 1.1086, "step": 3157 }, { "epoch": 0.21969459807297645, "grad_norm": 1.4453125, "learning_rate": 0.0018172014683716287, "loss": 1.2177, "step": 3158 }, { "epoch": 0.21976416571011165, "grad_norm": 1.59375, "learning_rate": 0.0018170715764763254, "loss": 1.1821, "step": 3159 }, { "epoch": 0.21983373334724687, "grad_norm": 1.4921875, "learning_rate": 0.0018169416430942461, "loss": 1.7046, "step": 3160 }, { "epoch": 0.21990330098438207, "grad_norm": 1.40625, "learning_rate": 0.0018168116682319875, "loss": 1.3001, "step": 3161 }, { "epoch": 0.21997286862151727, "grad_norm": 1.171875, "learning_rate": 0.0018166816518961498, "loss": 1.2037, "step": 3162 }, { "epoch": 0.22004243625865247, "grad_norm": 1.5078125, "learning_rate": 0.001816551594093334, "loss": 1.4909, "step": 3163 }, { "epoch": 0.22011200389578767, "grad_norm": 1.2734375, "learning_rate": 0.001816421494830144, "loss": 1.2482, "step": 3164 }, { "epoch": 0.2201815715329229, "grad_norm": 1.4453125, "learning_rate": 0.0018162913541131856, "loss": 1.5315, "step": 3165 }, { "epoch": 0.2202511391700581, "grad_norm": 1.625, "learning_rate": 0.0018161611719490663, "loss": 1.2521, "step": 3166 }, { "epoch": 0.2203207068071933, "grad_norm": 1.4375, "learning_rate": 0.0018160309483443969, "loss": 1.2491, "step": 3167 }, { "epoch": 0.2203902744443285, "grad_norm": 1.4453125, "learning_rate": 0.001815900683305789, "loss": 1.3487, "step": 3168 }, { "epoch": 0.2204598420814637, "grad_norm": 1.453125, "learning_rate": 0.0018157703768398566, "loss": 1.2102, "step": 3169 }, { "epoch": 0.2205294097185989, "grad_norm": 1.5703125, "learning_rate": 0.0018156400289532164, "loss": 1.2524, "step": 3170 }, { "epoch": 0.2205989773557341, "grad_norm": 1.140625, "learning_rate": 0.0018155096396524867, "loss": 1.3116, "step": 3171 }, { "epoch": 0.2206685449928693, "grad_norm": 1.2734375, "learning_rate": 0.0018153792089442879, "loss": 1.3005, "step": 3172 }, { "epoch": 0.22073811263000453, "grad_norm": 1.5859375, "learning_rate": 0.0018152487368352426, "loss": 1.3617, "step": 3173 }, { "epoch": 0.22080768026713973, "grad_norm": 1.296875, "learning_rate": 0.0018151182233319756, "loss": 1.3319, "step": 3174 }, { "epoch": 0.22087724790427493, "grad_norm": 1.25, "learning_rate": 0.001814987668441114, "loss": 1.1319, "step": 3175 }, { "epoch": 0.22094681554141013, "grad_norm": 1.546875, "learning_rate": 0.0018148570721692862, "loss": 1.1426, "step": 3176 }, { "epoch": 0.22101638317854533, "grad_norm": 1.328125, "learning_rate": 0.0018147264345231234, "loss": 1.5089, "step": 3177 }, { "epoch": 0.22108595081568055, "grad_norm": 1.3359375, "learning_rate": 0.001814595755509259, "loss": 1.2042, "step": 3178 }, { "epoch": 0.22115551845281575, "grad_norm": 1.3046875, "learning_rate": 0.0018144650351343277, "loss": 1.3103, "step": 3179 }, { "epoch": 0.22122508608995095, "grad_norm": 1.2578125, "learning_rate": 0.0018143342734049672, "loss": 1.256, "step": 3180 }, { "epoch": 0.22129465372708615, "grad_norm": 1.390625, "learning_rate": 0.0018142034703278172, "loss": 1.4062, "step": 3181 }, { "epoch": 0.22136422136422138, "grad_norm": 1.4453125, "learning_rate": 0.0018140726259095186, "loss": 1.5177, "step": 3182 }, { "epoch": 0.22143378900135657, "grad_norm": 1.53125, "learning_rate": 0.0018139417401567153, "loss": 1.3101, "step": 3183 }, { "epoch": 0.22150335663849177, "grad_norm": 1.2109375, "learning_rate": 0.0018138108130760528, "loss": 1.2181, "step": 3184 }, { "epoch": 0.22157292427562697, "grad_norm": 1.4453125, "learning_rate": 0.0018136798446741797, "loss": 1.1762, "step": 3185 }, { "epoch": 0.2216424919127622, "grad_norm": 1.34375, "learning_rate": 0.001813548834957745, "loss": 1.3214, "step": 3186 }, { "epoch": 0.2217120595498974, "grad_norm": 1.2578125, "learning_rate": 0.0018134177839334007, "loss": 1.332, "step": 3187 }, { "epoch": 0.2217816271870326, "grad_norm": 1.671875, "learning_rate": 0.0018132866916078017, "loss": 1.5935, "step": 3188 }, { "epoch": 0.2218511948241678, "grad_norm": 1.046875, "learning_rate": 0.0018131555579876037, "loss": 1.1784, "step": 3189 }, { "epoch": 0.221920762461303, "grad_norm": 1.328125, "learning_rate": 0.001813024383079465, "loss": 1.1413, "step": 3190 }, { "epoch": 0.22199033009843822, "grad_norm": 1.1484375, "learning_rate": 0.0018128931668900462, "loss": 1.2292, "step": 3191 }, { "epoch": 0.2220598977355734, "grad_norm": 1.21875, "learning_rate": 0.0018127619094260095, "loss": 1.0415, "step": 3192 }, { "epoch": 0.2221294653727086, "grad_norm": 1.5078125, "learning_rate": 0.0018126306106940198, "loss": 1.2261, "step": 3193 }, { "epoch": 0.2221990330098438, "grad_norm": 0.9453125, "learning_rate": 0.0018124992707007435, "loss": 0.8691, "step": 3194 }, { "epoch": 0.22226860064697904, "grad_norm": 1.2578125, "learning_rate": 0.0018123678894528498, "loss": 1.3883, "step": 3195 }, { "epoch": 0.22233816828411423, "grad_norm": 1.4609375, "learning_rate": 0.0018122364669570091, "loss": 1.3116, "step": 3196 }, { "epoch": 0.22240773592124943, "grad_norm": 1.3828125, "learning_rate": 0.0018121050032198945, "loss": 1.032, "step": 3197 }, { "epoch": 0.22247730355838463, "grad_norm": 1.4375, "learning_rate": 0.0018119734982481814, "loss": 1.2981, "step": 3198 }, { "epoch": 0.22254687119551986, "grad_norm": 1.5625, "learning_rate": 0.0018118419520485466, "loss": 1.3269, "step": 3199 }, { "epoch": 0.22261643883265506, "grad_norm": 1.71875, "learning_rate": 0.0018117103646276692, "loss": 1.2838, "step": 3200 }, { "epoch": 0.22268600646979025, "grad_norm": 1.140625, "learning_rate": 0.001811578735992231, "loss": 1.3026, "step": 3201 }, { "epoch": 0.22275557410692545, "grad_norm": 1.265625, "learning_rate": 0.0018114470661489154, "loss": 1.264, "step": 3202 }, { "epoch": 0.22282514174406065, "grad_norm": 1.4453125, "learning_rate": 0.0018113153551044077, "loss": 1.1016, "step": 3203 }, { "epoch": 0.22289470938119588, "grad_norm": 1.4921875, "learning_rate": 0.0018111836028653957, "loss": 1.1228, "step": 3204 }, { "epoch": 0.22296427701833108, "grad_norm": 1.1875, "learning_rate": 0.0018110518094385686, "loss": 1.4613, "step": 3205 }, { "epoch": 0.22303384465546627, "grad_norm": 1.6484375, "learning_rate": 0.001810919974830619, "loss": 1.4146, "step": 3206 }, { "epoch": 0.22310341229260147, "grad_norm": 1.5, "learning_rate": 0.0018107880990482403, "loss": 1.2999, "step": 3207 }, { "epoch": 0.2231729799297367, "grad_norm": 1.359375, "learning_rate": 0.0018106561820981286, "loss": 1.2888, "step": 3208 }, { "epoch": 0.2232425475668719, "grad_norm": 1.25, "learning_rate": 0.0018105242239869822, "loss": 1.3011, "step": 3209 }, { "epoch": 0.2233121152040071, "grad_norm": 1.890625, "learning_rate": 0.0018103922247215008, "loss": 1.2069, "step": 3210 }, { "epoch": 0.2233816828411423, "grad_norm": 1.1875, "learning_rate": 0.0018102601843083869, "loss": 1.0903, "step": 3211 }, { "epoch": 0.22345125047827752, "grad_norm": 1.28125, "learning_rate": 0.0018101281027543448, "loss": 1.044, "step": 3212 }, { "epoch": 0.22352081811541272, "grad_norm": 1.3359375, "learning_rate": 0.0018099959800660812, "loss": 1.3568, "step": 3213 }, { "epoch": 0.22359038575254792, "grad_norm": 1.578125, "learning_rate": 0.0018098638162503042, "loss": 1.4231, "step": 3214 }, { "epoch": 0.2236599533896831, "grad_norm": 1.3984375, "learning_rate": 0.001809731611313725, "loss": 1.0716, "step": 3215 }, { "epoch": 0.2237295210268183, "grad_norm": 1.578125, "learning_rate": 0.0018095993652630555, "loss": 1.3694, "step": 3216 }, { "epoch": 0.22379908866395354, "grad_norm": 1.1484375, "learning_rate": 0.001809467078105011, "loss": 1.1203, "step": 3217 }, { "epoch": 0.22386865630108874, "grad_norm": 1.3671875, "learning_rate": 0.0018093347498463086, "loss": 1.2607, "step": 3218 }, { "epoch": 0.22393822393822393, "grad_norm": 1.2890625, "learning_rate": 0.0018092023804936667, "loss": 1.3865, "step": 3219 }, { "epoch": 0.22400779157535913, "grad_norm": 2.28125, "learning_rate": 0.0018090699700538068, "loss": 1.1439, "step": 3220 }, { "epoch": 0.22407735921249436, "grad_norm": 1.6328125, "learning_rate": 0.0018089375185334515, "loss": 1.3804, "step": 3221 }, { "epoch": 0.22414692684962956, "grad_norm": 1.3515625, "learning_rate": 0.0018088050259393268, "loss": 1.1861, "step": 3222 }, { "epoch": 0.22421649448676476, "grad_norm": 1.171875, "learning_rate": 0.0018086724922781593, "loss": 1.2895, "step": 3223 }, { "epoch": 0.22428606212389995, "grad_norm": 1.1484375, "learning_rate": 0.0018085399175566783, "loss": 0.9787, "step": 3224 }, { "epoch": 0.22435562976103518, "grad_norm": 1.3984375, "learning_rate": 0.0018084073017816161, "loss": 1.5001, "step": 3225 }, { "epoch": 0.22442519739817038, "grad_norm": 1.2734375, "learning_rate": 0.0018082746449597056, "loss": 1.0478, "step": 3226 }, { "epoch": 0.22449476503530558, "grad_norm": 1.3984375, "learning_rate": 0.0018081419470976827, "loss": 1.1353, "step": 3227 }, { "epoch": 0.22456433267244078, "grad_norm": 1.4921875, "learning_rate": 0.001808009208202285, "loss": 1.3408, "step": 3228 }, { "epoch": 0.22463390030957597, "grad_norm": 1.8671875, "learning_rate": 0.0018078764282802526, "loss": 1.3925, "step": 3229 }, { "epoch": 0.2247034679467112, "grad_norm": 1.53125, "learning_rate": 0.001807743607338327, "loss": 1.2309, "step": 3230 }, { "epoch": 0.2247730355838464, "grad_norm": 1.484375, "learning_rate": 0.0018076107453832524, "loss": 1.3426, "step": 3231 }, { "epoch": 0.2248426032209816, "grad_norm": 1.484375, "learning_rate": 0.0018074778424217745, "loss": 1.1903, "step": 3232 }, { "epoch": 0.2249121708581168, "grad_norm": 1.296875, "learning_rate": 0.0018073448984606423, "loss": 1.2461, "step": 3233 }, { "epoch": 0.22498173849525202, "grad_norm": 1.6875, "learning_rate": 0.0018072119135066052, "loss": 1.4229, "step": 3234 }, { "epoch": 0.22505130613238722, "grad_norm": 1.3671875, "learning_rate": 0.0018070788875664157, "loss": 1.1639, "step": 3235 }, { "epoch": 0.22512087376952242, "grad_norm": 1.2421875, "learning_rate": 0.0018069458206468284, "loss": 1.0116, "step": 3236 }, { "epoch": 0.22519044140665762, "grad_norm": 1.296875, "learning_rate": 0.0018068127127545998, "loss": 1.3706, "step": 3237 }, { "epoch": 0.2252600090437928, "grad_norm": 1.4453125, "learning_rate": 0.0018066795638964877, "loss": 1.1156, "step": 3238 }, { "epoch": 0.22532957668092804, "grad_norm": 1.4765625, "learning_rate": 0.001806546374079254, "loss": 1.3318, "step": 3239 }, { "epoch": 0.22539914431806324, "grad_norm": 1.578125, "learning_rate": 0.0018064131433096601, "loss": 1.2423, "step": 3240 }, { "epoch": 0.22546871195519844, "grad_norm": 1.2890625, "learning_rate": 0.0018062798715944718, "loss": 1.0989, "step": 3241 }, { "epoch": 0.22553827959233363, "grad_norm": 1.28125, "learning_rate": 0.0018061465589404556, "loss": 1.5471, "step": 3242 }, { "epoch": 0.22560784722946886, "grad_norm": 1.40625, "learning_rate": 0.0018060132053543804, "loss": 1.1482, "step": 3243 }, { "epoch": 0.22567741486660406, "grad_norm": 1.515625, "learning_rate": 0.0018058798108430167, "loss": 1.229, "step": 3244 }, { "epoch": 0.22574698250373926, "grad_norm": 1.4140625, "learning_rate": 0.001805746375413139, "loss": 1.4308, "step": 3245 }, { "epoch": 0.22581655014087446, "grad_norm": 1.1953125, "learning_rate": 0.001805612899071521, "loss": 1.007, "step": 3246 }, { "epoch": 0.22588611777800968, "grad_norm": 1.25, "learning_rate": 0.0018054793818249406, "loss": 1.3335, "step": 3247 }, { "epoch": 0.22595568541514488, "grad_norm": 1.078125, "learning_rate": 0.0018053458236801773, "loss": 1.1016, "step": 3248 }, { "epoch": 0.22602525305228008, "grad_norm": 1.5078125, "learning_rate": 0.0018052122246440124, "loss": 1.1453, "step": 3249 }, { "epoch": 0.22609482068941528, "grad_norm": 1.328125, "learning_rate": 0.0018050785847232294, "loss": 1.2315, "step": 3250 }, { "epoch": 0.22616438832655048, "grad_norm": 1.3125, "learning_rate": 0.0018049449039246133, "loss": 1.1245, "step": 3251 }, { "epoch": 0.2262339559636857, "grad_norm": 1.1875, "learning_rate": 0.0018048111822549524, "loss": 1.0022, "step": 3252 }, { "epoch": 0.2263035236008209, "grad_norm": 1.4375, "learning_rate": 0.0018046774197210365, "loss": 1.274, "step": 3253 }, { "epoch": 0.2263730912379561, "grad_norm": 1.4375, "learning_rate": 0.0018045436163296566, "loss": 1.3615, "step": 3254 }, { "epoch": 0.2264426588750913, "grad_norm": 1.3046875, "learning_rate": 0.0018044097720876077, "loss": 1.3341, "step": 3255 }, { "epoch": 0.22651222651222652, "grad_norm": 1.3828125, "learning_rate": 0.0018042758870016847, "loss": 1.013, "step": 3256 }, { "epoch": 0.22658179414936172, "grad_norm": 1.8359375, "learning_rate": 0.001804141961078686, "loss": 1.3983, "step": 3257 }, { "epoch": 0.22665136178649692, "grad_norm": 1.21875, "learning_rate": 0.0018040079943254118, "loss": 1.1396, "step": 3258 }, { "epoch": 0.22672092942363212, "grad_norm": 1.6953125, "learning_rate": 0.001803873986748664, "loss": 1.4228, "step": 3259 }, { "epoch": 0.22679049706076734, "grad_norm": 1.296875, "learning_rate": 0.0018037399383552472, "loss": 1.3769, "step": 3260 }, { "epoch": 0.22686006469790254, "grad_norm": 1.484375, "learning_rate": 0.001803605849151967, "loss": 1.754, "step": 3261 }, { "epoch": 0.22692963233503774, "grad_norm": 1.1953125, "learning_rate": 0.0018034717191456327, "loss": 1.2549, "step": 3262 }, { "epoch": 0.22699919997217294, "grad_norm": 1.2734375, "learning_rate": 0.0018033375483430542, "loss": 1.3371, "step": 3263 }, { "epoch": 0.22706876760930814, "grad_norm": 1.453125, "learning_rate": 0.0018032033367510443, "loss": 1.3409, "step": 3264 }, { "epoch": 0.22713833524644336, "grad_norm": 1.15625, "learning_rate": 0.0018030690843764173, "loss": 1.0501, "step": 3265 }, { "epoch": 0.22720790288357856, "grad_norm": 1.203125, "learning_rate": 0.0018029347912259896, "loss": 1.2673, "step": 3266 }, { "epoch": 0.22727747052071376, "grad_norm": 1.1875, "learning_rate": 0.0018028004573065806, "loss": 1.1292, "step": 3267 }, { "epoch": 0.22734703815784896, "grad_norm": 2.4375, "learning_rate": 0.0018026660826250106, "loss": 1.181, "step": 3268 }, { "epoch": 0.22741660579498418, "grad_norm": 1.2421875, "learning_rate": 0.0018025316671881032, "loss": 1.2582, "step": 3269 }, { "epoch": 0.22748617343211938, "grad_norm": 1.1171875, "learning_rate": 0.0018023972110026822, "loss": 1.1066, "step": 3270 }, { "epoch": 0.22755574106925458, "grad_norm": 1.3515625, "learning_rate": 0.0018022627140755754, "loss": 1.1172, "step": 3271 }, { "epoch": 0.22762530870638978, "grad_norm": 1.1796875, "learning_rate": 0.0018021281764136119, "loss": 1.0191, "step": 3272 }, { "epoch": 0.227694876343525, "grad_norm": 1.0625, "learning_rate": 0.0018019935980236224, "loss": 1.1673, "step": 3273 }, { "epoch": 0.2277644439806602, "grad_norm": 1.4296875, "learning_rate": 0.0018018589789124404, "loss": 1.2633, "step": 3274 }, { "epoch": 0.2278340116177954, "grad_norm": 1.109375, "learning_rate": 0.001801724319086901, "loss": 1.1363, "step": 3275 }, { "epoch": 0.2279035792549306, "grad_norm": 1.2421875, "learning_rate": 0.0018015896185538418, "loss": 1.1201, "step": 3276 }, { "epoch": 0.2279731468920658, "grad_norm": 1.09375, "learning_rate": 0.001801454877320102, "loss": 1.094, "step": 3277 }, { "epoch": 0.22804271452920102, "grad_norm": 1.3515625, "learning_rate": 0.0018013200953925232, "loss": 1.1318, "step": 3278 }, { "epoch": 0.22811228216633622, "grad_norm": 1.2265625, "learning_rate": 0.001801185272777949, "loss": 1.079, "step": 3279 }, { "epoch": 0.22818184980347142, "grad_norm": 1.6953125, "learning_rate": 0.001801050409483225, "loss": 1.4306, "step": 3280 }, { "epoch": 0.22825141744060662, "grad_norm": 1.5, "learning_rate": 0.0018009155055151984, "loss": 1.4664, "step": 3281 }, { "epoch": 0.22832098507774184, "grad_norm": 1.296875, "learning_rate": 0.0018007805608807198, "loss": 1.2103, "step": 3282 }, { "epoch": 0.22839055271487704, "grad_norm": 1.3203125, "learning_rate": 0.0018006455755866404, "loss": 1.1655, "step": 3283 }, { "epoch": 0.22846012035201224, "grad_norm": 1.1953125, "learning_rate": 0.0018005105496398139, "loss": 1.2991, "step": 3284 }, { "epoch": 0.22852968798914744, "grad_norm": 1.6640625, "learning_rate": 0.0018003754830470968, "loss": 1.04, "step": 3285 }, { "epoch": 0.22859925562628267, "grad_norm": 1.3203125, "learning_rate": 0.001800240375815347, "loss": 1.297, "step": 3286 }, { "epoch": 0.22866882326341786, "grad_norm": 1.1171875, "learning_rate": 0.0018001052279514242, "loss": 1.0963, "step": 3287 }, { "epoch": 0.22873839090055306, "grad_norm": 1.3359375, "learning_rate": 0.001799970039462191, "loss": 0.9655, "step": 3288 }, { "epoch": 0.22880795853768826, "grad_norm": 1.7109375, "learning_rate": 0.0017998348103545113, "loss": 1.0901, "step": 3289 }, { "epoch": 0.22887752617482346, "grad_norm": 1.2109375, "learning_rate": 0.0017996995406352513, "loss": 1.0801, "step": 3290 }, { "epoch": 0.22894709381195869, "grad_norm": 1.2890625, "learning_rate": 0.0017995642303112794, "loss": 1.3627, "step": 3291 }, { "epoch": 0.22901666144909388, "grad_norm": 1.1953125, "learning_rate": 0.001799428879389466, "loss": 1.0702, "step": 3292 }, { "epoch": 0.22908622908622908, "grad_norm": 1.2734375, "learning_rate": 0.0017992934878766835, "loss": 1.3215, "step": 3293 }, { "epoch": 0.22915579672336428, "grad_norm": 1.2421875, "learning_rate": 0.0017991580557798065, "loss": 1.1234, "step": 3294 }, { "epoch": 0.2292253643604995, "grad_norm": 1.1328125, "learning_rate": 0.0017990225831057114, "loss": 1.1273, "step": 3295 }, { "epoch": 0.2292949319976347, "grad_norm": 1.1796875, "learning_rate": 0.001798887069861277, "loss": 1.0268, "step": 3296 }, { "epoch": 0.2293644996347699, "grad_norm": 1.0234375, "learning_rate": 0.0017987515160533837, "loss": 1.0267, "step": 3297 }, { "epoch": 0.2294340672719051, "grad_norm": 1.0859375, "learning_rate": 0.0017986159216889146, "loss": 0.9197, "step": 3298 }, { "epoch": 0.22950363490904033, "grad_norm": 1.453125, "learning_rate": 0.0017984802867747542, "loss": 1.3159, "step": 3299 }, { "epoch": 0.22957320254617553, "grad_norm": 1.390625, "learning_rate": 0.0017983446113177895, "loss": 1.6719, "step": 3300 }, { "epoch": 0.22964277018331072, "grad_norm": 1.1875, "learning_rate": 0.0017982088953249096, "loss": 1.0058, "step": 3301 }, { "epoch": 0.22971233782044592, "grad_norm": 1.1640625, "learning_rate": 0.0017980731388030052, "loss": 1.0851, "step": 3302 }, { "epoch": 0.22978190545758112, "grad_norm": 1.15625, "learning_rate": 0.0017979373417589693, "loss": 0.8804, "step": 3303 }, { "epoch": 0.22985147309471635, "grad_norm": 1.140625, "learning_rate": 0.0017978015041996969, "loss": 1.0491, "step": 3304 }, { "epoch": 0.22992104073185154, "grad_norm": 1.5234375, "learning_rate": 0.0017976656261320856, "loss": 1.2712, "step": 3305 }, { "epoch": 0.22999060836898674, "grad_norm": 1.125, "learning_rate": 0.0017975297075630342, "loss": 1.0895, "step": 3306 }, { "epoch": 0.23006017600612194, "grad_norm": 1.40625, "learning_rate": 0.0017973937484994443, "loss": 1.0422, "step": 3307 }, { "epoch": 0.23012974364325717, "grad_norm": 1.5703125, "learning_rate": 0.0017972577489482188, "loss": 0.9921, "step": 3308 }, { "epoch": 0.23019931128039237, "grad_norm": 1.2578125, "learning_rate": 0.0017971217089162632, "loss": 0.9485, "step": 3309 }, { "epoch": 0.23026887891752756, "grad_norm": 1.171875, "learning_rate": 0.0017969856284104854, "loss": 1.1615, "step": 3310 }, { "epoch": 0.23033844655466276, "grad_norm": 1.25, "learning_rate": 0.001796849507437794, "loss": 1.1171, "step": 3311 }, { "epoch": 0.230408014191798, "grad_norm": 1.2421875, "learning_rate": 0.0017967133460051014, "loss": 1.1948, "step": 3312 }, { "epoch": 0.2304775818289332, "grad_norm": 1.4453125, "learning_rate": 0.0017965771441193206, "loss": 1.2799, "step": 3313 }, { "epoch": 0.23054714946606839, "grad_norm": 1.1796875, "learning_rate": 0.0017964409017873675, "loss": 1.0338, "step": 3314 }, { "epoch": 0.23061671710320358, "grad_norm": 1.1171875, "learning_rate": 0.0017963046190161598, "loss": 1.2925, "step": 3315 }, { "epoch": 0.23068628474033878, "grad_norm": 1.5625, "learning_rate": 0.0017961682958126174, "loss": 1.0702, "step": 3316 }, { "epoch": 0.230755852377474, "grad_norm": 1.1015625, "learning_rate": 0.0017960319321836619, "loss": 1.0485, "step": 3317 }, { "epoch": 0.2308254200146092, "grad_norm": 1.375, "learning_rate": 0.001795895528136217, "loss": 1.0255, "step": 3318 }, { "epoch": 0.2308949876517444, "grad_norm": 1.171875, "learning_rate": 0.0017957590836772091, "loss": 1.1028, "step": 3319 }, { "epoch": 0.2309645552888796, "grad_norm": 1.359375, "learning_rate": 0.0017956225988135653, "loss": 1.2513, "step": 3320 }, { "epoch": 0.23103412292601483, "grad_norm": 1.3671875, "learning_rate": 0.0017954860735522166, "loss": 1.1436, "step": 3321 }, { "epoch": 0.23110369056315003, "grad_norm": 1.2734375, "learning_rate": 0.0017953495079000945, "loss": 1.1775, "step": 3322 }, { "epoch": 0.23117325820028523, "grad_norm": 1.1953125, "learning_rate": 0.0017952129018641333, "loss": 1.1371, "step": 3323 }, { "epoch": 0.23124282583742042, "grad_norm": 1.4609375, "learning_rate": 0.001795076255451269, "loss": 1.3008, "step": 3324 }, { "epoch": 0.23131239347455565, "grad_norm": 1.2734375, "learning_rate": 0.00179493956866844, "loss": 1.0581, "step": 3325 }, { "epoch": 0.23138196111169085, "grad_norm": 1.1875, "learning_rate": 0.0017948028415225865, "loss": 1.0475, "step": 3326 }, { "epoch": 0.23145152874882605, "grad_norm": 1.3046875, "learning_rate": 0.0017946660740206508, "loss": 1.3079, "step": 3327 }, { "epoch": 0.23152109638596124, "grad_norm": 1.234375, "learning_rate": 0.0017945292661695773, "loss": 1.2221, "step": 3328 }, { "epoch": 0.23159066402309644, "grad_norm": 1.3046875, "learning_rate": 0.0017943924179763125, "loss": 1.0878, "step": 3329 }, { "epoch": 0.23166023166023167, "grad_norm": 1.5234375, "learning_rate": 0.0017942555294478044, "loss": 1.4133, "step": 3330 }, { "epoch": 0.23172979929736687, "grad_norm": 1.1953125, "learning_rate": 0.0017941186005910042, "loss": 1.1608, "step": 3331 }, { "epoch": 0.23179936693450207, "grad_norm": 1.25, "learning_rate": 0.001793981631412864, "loss": 1.3104, "step": 3332 }, { "epoch": 0.23186893457163726, "grad_norm": 1.2421875, "learning_rate": 0.0017938446219203385, "loss": 1.2514, "step": 3333 }, { "epoch": 0.2319385022087725, "grad_norm": 1.125, "learning_rate": 0.0017937075721203843, "loss": 1.1533, "step": 3334 }, { "epoch": 0.2320080698459077, "grad_norm": 1.15625, "learning_rate": 0.0017935704820199604, "loss": 1.4404, "step": 3335 }, { "epoch": 0.2320776374830429, "grad_norm": 1.109375, "learning_rate": 0.0017934333516260272, "loss": 1.0626, "step": 3336 }, { "epoch": 0.23214720512017809, "grad_norm": 1.3671875, "learning_rate": 0.0017932961809455476, "loss": 1.2449, "step": 3337 }, { "epoch": 0.2322167727573133, "grad_norm": 1.234375, "learning_rate": 0.0017931589699854865, "loss": 1.0743, "step": 3338 }, { "epoch": 0.2322863403944485, "grad_norm": 1.15625, "learning_rate": 0.0017930217187528106, "loss": 1.2008, "step": 3339 }, { "epoch": 0.2323559080315837, "grad_norm": 1.34375, "learning_rate": 0.0017928844272544892, "loss": 1.1208, "step": 3340 }, { "epoch": 0.2324254756687189, "grad_norm": 1.203125, "learning_rate": 0.0017927470954974924, "loss": 1.2209, "step": 3341 }, { "epoch": 0.2324950433058541, "grad_norm": 1.21875, "learning_rate": 0.0017926097234887944, "loss": 1.5304, "step": 3342 }, { "epoch": 0.23256461094298933, "grad_norm": 0.9921875, "learning_rate": 0.0017924723112353695, "loss": 0.9315, "step": 3343 }, { "epoch": 0.23263417858012453, "grad_norm": 1.3046875, "learning_rate": 0.0017923348587441951, "loss": 1.0834, "step": 3344 }, { "epoch": 0.23270374621725973, "grad_norm": 1.2578125, "learning_rate": 0.0017921973660222502, "loss": 1.0801, "step": 3345 }, { "epoch": 0.23277331385439493, "grad_norm": 1.2109375, "learning_rate": 0.001792059833076516, "loss": 0.9739, "step": 3346 }, { "epoch": 0.23284288149153015, "grad_norm": 1.5546875, "learning_rate": 0.0017919222599139758, "loss": 1.2927, "step": 3347 }, { "epoch": 0.23291244912866535, "grad_norm": 1.2578125, "learning_rate": 0.0017917846465416148, "loss": 0.9077, "step": 3348 }, { "epoch": 0.23298201676580055, "grad_norm": 1.03125, "learning_rate": 0.00179164699296642, "loss": 0.7696, "step": 3349 }, { "epoch": 0.23305158440293575, "grad_norm": 1.296875, "learning_rate": 0.0017915092991953815, "loss": 1.3586, "step": 3350 }, { "epoch": 0.23312115204007097, "grad_norm": 1.28125, "learning_rate": 0.0017913715652354903, "loss": 1.1076, "step": 3351 }, { "epoch": 0.23319071967720617, "grad_norm": 1.046875, "learning_rate": 0.0017912337910937395, "loss": 1.0509, "step": 3352 }, { "epoch": 0.23326028731434137, "grad_norm": 1.2421875, "learning_rate": 0.0017910959767771253, "loss": 1.5079, "step": 3353 }, { "epoch": 0.23332985495147657, "grad_norm": 1.078125, "learning_rate": 0.0017909581222926446, "loss": 0.9356, "step": 3354 }, { "epoch": 0.23339942258861177, "grad_norm": 1.0625, "learning_rate": 0.001790820227647297, "loss": 1.0451, "step": 3355 }, { "epoch": 0.233468990225747, "grad_norm": 1.0546875, "learning_rate": 0.0017906822928480848, "loss": 1.0699, "step": 3356 }, { "epoch": 0.2335385578628822, "grad_norm": 1.1953125, "learning_rate": 0.0017905443179020107, "loss": 1.1171, "step": 3357 }, { "epoch": 0.2336081255000174, "grad_norm": 1.3203125, "learning_rate": 0.0017904063028160806, "loss": 1.2825, "step": 3358 }, { "epoch": 0.2336776931371526, "grad_norm": 1.25, "learning_rate": 0.001790268247597303, "loss": 1.0923, "step": 3359 }, { "epoch": 0.2337472607742878, "grad_norm": 1.2890625, "learning_rate": 0.0017901301522526864, "loss": 1.2474, "step": 3360 }, { "epoch": 0.233816828411423, "grad_norm": 1.1328125, "learning_rate": 0.0017899920167892436, "loss": 1.1623, "step": 3361 }, { "epoch": 0.2338863960485582, "grad_norm": 1.890625, "learning_rate": 0.001789853841213988, "loss": 1.3545, "step": 3362 }, { "epoch": 0.2339559636856934, "grad_norm": 1.3984375, "learning_rate": 0.0017897156255339355, "loss": 1.0498, "step": 3363 }, { "epoch": 0.23402553132282863, "grad_norm": 1.2578125, "learning_rate": 0.0017895773697561039, "loss": 1.158, "step": 3364 }, { "epoch": 0.23409509895996383, "grad_norm": 0.99609375, "learning_rate": 0.0017894390738875132, "loss": 0.9316, "step": 3365 }, { "epoch": 0.23416466659709903, "grad_norm": 0.984375, "learning_rate": 0.0017893007379351854, "loss": 0.9714, "step": 3366 }, { "epoch": 0.23423423423423423, "grad_norm": 1.3984375, "learning_rate": 0.0017891623619061445, "loss": 1.3723, "step": 3367 }, { "epoch": 0.23430380187136943, "grad_norm": 1.2734375, "learning_rate": 0.0017890239458074166, "loss": 1.2839, "step": 3368 }, { "epoch": 0.23437336950850465, "grad_norm": 1.3359375, "learning_rate": 0.0017888854896460297, "loss": 1.397, "step": 3369 }, { "epoch": 0.23444293714563985, "grad_norm": 1.28125, "learning_rate": 0.0017887469934290139, "loss": 1.3389, "step": 3370 }, { "epoch": 0.23451250478277505, "grad_norm": 1.2578125, "learning_rate": 0.0017886084571634014, "loss": 1.0145, "step": 3371 }, { "epoch": 0.23458207241991025, "grad_norm": 1.34375, "learning_rate": 0.0017884698808562263, "loss": 1.1883, "step": 3372 }, { "epoch": 0.23465164005704547, "grad_norm": 1.0078125, "learning_rate": 0.0017883312645145249, "loss": 1.115, "step": 3373 }, { "epoch": 0.23472120769418067, "grad_norm": 1.2265625, "learning_rate": 0.0017881926081453354, "loss": 1.1842, "step": 3374 }, { "epoch": 0.23479077533131587, "grad_norm": 1.1171875, "learning_rate": 0.0017880539117556978, "loss": 0.9997, "step": 3375 }, { "epoch": 0.23486034296845107, "grad_norm": 1.2734375, "learning_rate": 0.0017879151753526549, "loss": 0.9747, "step": 3376 }, { "epoch": 0.2349299106055863, "grad_norm": 1.140625, "learning_rate": 0.0017877763989432504, "loss": 1.0838, "step": 3377 }, { "epoch": 0.2349994782427215, "grad_norm": 1.2109375, "learning_rate": 0.0017876375825345314, "loss": 1.1765, "step": 3378 }, { "epoch": 0.2350690458798567, "grad_norm": 1.2578125, "learning_rate": 0.001787498726133546, "loss": 1.1738, "step": 3379 }, { "epoch": 0.2351386135169919, "grad_norm": 1.1328125, "learning_rate": 0.0017873598297473445, "loss": 1.1137, "step": 3380 }, { "epoch": 0.2352081811541271, "grad_norm": 1.2421875, "learning_rate": 0.0017872208933829793, "loss": 1.1858, "step": 3381 }, { "epoch": 0.23527774879126231, "grad_norm": 1.25, "learning_rate": 0.0017870819170475053, "loss": 1.1267, "step": 3382 }, { "epoch": 0.2353473164283975, "grad_norm": 1.21875, "learning_rate": 0.0017869429007479783, "loss": 1.1509, "step": 3383 }, { "epoch": 0.2354168840655327, "grad_norm": 1.1328125, "learning_rate": 0.0017868038444914577, "loss": 1.2707, "step": 3384 }, { "epoch": 0.2354864517026679, "grad_norm": 1.1015625, "learning_rate": 0.0017866647482850033, "loss": 1.1556, "step": 3385 }, { "epoch": 0.23555601933980314, "grad_norm": 1.5546875, "learning_rate": 0.0017865256121356783, "loss": 1.5673, "step": 3386 }, { "epoch": 0.23562558697693833, "grad_norm": 1.2109375, "learning_rate": 0.001786386436050547, "loss": 1.1818, "step": 3387 }, { "epoch": 0.23569515461407353, "grad_norm": 1.2421875, "learning_rate": 0.0017862472200366763, "loss": 1.1394, "step": 3388 }, { "epoch": 0.23576472225120873, "grad_norm": 1.3046875, "learning_rate": 0.0017861079641011345, "loss": 0.9994, "step": 3389 }, { "epoch": 0.23583428988834393, "grad_norm": 1.203125, "learning_rate": 0.0017859686682509927, "loss": 1.1313, "step": 3390 }, { "epoch": 0.23590385752547915, "grad_norm": 1.453125, "learning_rate": 0.0017858293324933237, "loss": 1.2673, "step": 3391 }, { "epoch": 0.23597342516261435, "grad_norm": 1.5078125, "learning_rate": 0.0017856899568352018, "loss": 1.2771, "step": 3392 }, { "epoch": 0.23604299279974955, "grad_norm": 1.046875, "learning_rate": 0.0017855505412837044, "loss": 1.0559, "step": 3393 }, { "epoch": 0.23611256043688475, "grad_norm": 1.1171875, "learning_rate": 0.0017854110858459094, "loss": 1.2803, "step": 3394 }, { "epoch": 0.23618212807401998, "grad_norm": 1.21875, "learning_rate": 0.0017852715905288985, "loss": 1.2212, "step": 3395 }, { "epoch": 0.23625169571115517, "grad_norm": 1.28125, "learning_rate": 0.0017851320553397545, "loss": 1.2095, "step": 3396 }, { "epoch": 0.23632126334829037, "grad_norm": 1.2265625, "learning_rate": 0.001784992480285562, "loss": 1.1702, "step": 3397 }, { "epoch": 0.23639083098542557, "grad_norm": 1.4296875, "learning_rate": 0.0017848528653734079, "loss": 1.1098, "step": 3398 }, { "epoch": 0.2364603986225608, "grad_norm": 1.3671875, "learning_rate": 0.0017847132106103812, "loss": 1.2043, "step": 3399 }, { "epoch": 0.236529966259696, "grad_norm": 1.28125, "learning_rate": 0.0017845735160035732, "loss": 1.3687, "step": 3400 }, { "epoch": 0.2365995338968312, "grad_norm": 1.3359375, "learning_rate": 0.0017844337815600762, "loss": 1.0551, "step": 3401 }, { "epoch": 0.2366691015339664, "grad_norm": 1.109375, "learning_rate": 0.0017842940072869858, "loss": 1.0612, "step": 3402 }, { "epoch": 0.2367386691711016, "grad_norm": 1.3046875, "learning_rate": 0.001784154193191399, "loss": 1.1619, "step": 3403 }, { "epoch": 0.23680823680823682, "grad_norm": 1.140625, "learning_rate": 0.0017840143392804145, "loss": 1.0387, "step": 3404 }, { "epoch": 0.23687780444537201, "grad_norm": 1.6171875, "learning_rate": 0.0017838744455611337, "loss": 1.0793, "step": 3405 }, { "epoch": 0.2369473720825072, "grad_norm": 1.3828125, "learning_rate": 0.0017837345120406596, "loss": 1.1494, "step": 3406 }, { "epoch": 0.2370169397196424, "grad_norm": 1.3359375, "learning_rate": 0.001783594538726097, "loss": 0.9968, "step": 3407 }, { "epoch": 0.23708650735677764, "grad_norm": 1.125, "learning_rate": 0.0017834545256245535, "loss": 0.9704, "step": 3408 }, { "epoch": 0.23715607499391284, "grad_norm": 1.1953125, "learning_rate": 0.0017833144727431383, "loss": 1.1628, "step": 3409 }, { "epoch": 0.23722564263104803, "grad_norm": 1.140625, "learning_rate": 0.0017831743800889623, "loss": 1.0879, "step": 3410 }, { "epoch": 0.23729521026818323, "grad_norm": 1.0546875, "learning_rate": 0.0017830342476691386, "loss": 1.3391, "step": 3411 }, { "epoch": 0.23736477790531846, "grad_norm": 1.15625, "learning_rate": 0.0017828940754907828, "loss": 1.1479, "step": 3412 }, { "epoch": 0.23743434554245366, "grad_norm": 1.234375, "learning_rate": 0.0017827538635610117, "loss": 1.2333, "step": 3413 }, { "epoch": 0.23750391317958885, "grad_norm": 1.0234375, "learning_rate": 0.0017826136118869447, "loss": 1.0714, "step": 3414 }, { "epoch": 0.23757348081672405, "grad_norm": 1.3203125, "learning_rate": 0.0017824733204757034, "loss": 1.0966, "step": 3415 }, { "epoch": 0.23764304845385925, "grad_norm": 1.0, "learning_rate": 0.0017823329893344106, "loss": 0.9634, "step": 3416 }, { "epoch": 0.23771261609099448, "grad_norm": 0.8984375, "learning_rate": 0.0017821926184701923, "loss": 0.8942, "step": 3417 }, { "epoch": 0.23778218372812968, "grad_norm": 1.2109375, "learning_rate": 0.001782052207890175, "loss": 1.1385, "step": 3418 }, { "epoch": 0.23785175136526487, "grad_norm": 0.9453125, "learning_rate": 0.0017819117576014884, "loss": 0.9763, "step": 3419 }, { "epoch": 0.23792131900240007, "grad_norm": 1.125, "learning_rate": 0.001781771267611264, "loss": 1.2327, "step": 3420 }, { "epoch": 0.2379908866395353, "grad_norm": 1.5078125, "learning_rate": 0.0017816307379266351, "loss": 1.1842, "step": 3421 }, { "epoch": 0.2380604542766705, "grad_norm": 1.1171875, "learning_rate": 0.0017814901685547372, "loss": 1.172, "step": 3422 }, { "epoch": 0.2381300219138057, "grad_norm": 1.3984375, "learning_rate": 0.0017813495595027072, "loss": 1.2356, "step": 3423 }, { "epoch": 0.2381995895509409, "grad_norm": 1.640625, "learning_rate": 0.0017812089107776847, "loss": 1.1543, "step": 3424 }, { "epoch": 0.23826915718807612, "grad_norm": 1.015625, "learning_rate": 0.0017810682223868117, "loss": 1.3024, "step": 3425 }, { "epoch": 0.23833872482521132, "grad_norm": 1.265625, "learning_rate": 0.0017809274943372312, "loss": 1.1413, "step": 3426 }, { "epoch": 0.23840829246234652, "grad_norm": 1.1015625, "learning_rate": 0.0017807867266360887, "loss": 1.1717, "step": 3427 }, { "epoch": 0.23847786009948171, "grad_norm": 1.40625, "learning_rate": 0.0017806459192905315, "loss": 1.2329, "step": 3428 }, { "epoch": 0.2385474277366169, "grad_norm": 1.4140625, "learning_rate": 0.0017805050723077095, "loss": 1.3393, "step": 3429 }, { "epoch": 0.23861699537375214, "grad_norm": 1.125, "learning_rate": 0.0017803641856947738, "loss": 1.1232, "step": 3430 }, { "epoch": 0.23868656301088734, "grad_norm": 1.2734375, "learning_rate": 0.0017802232594588778, "loss": 1.2795, "step": 3431 }, { "epoch": 0.23875613064802254, "grad_norm": 1.390625, "learning_rate": 0.001780082293607178, "loss": 1.2462, "step": 3432 }, { "epoch": 0.23882569828515773, "grad_norm": 0.890625, "learning_rate": 0.0017799412881468306, "loss": 1.0268, "step": 3433 }, { "epoch": 0.23889526592229296, "grad_norm": 1.09375, "learning_rate": 0.001779800243084996, "loss": 1.2895, "step": 3434 }, { "epoch": 0.23896483355942816, "grad_norm": 1.1171875, "learning_rate": 0.0017796591584288356, "loss": 1.1936, "step": 3435 }, { "epoch": 0.23903440119656336, "grad_norm": 1.34375, "learning_rate": 0.001779518034185513, "loss": 1.4595, "step": 3436 }, { "epoch": 0.23910396883369855, "grad_norm": 1.265625, "learning_rate": 0.0017793768703621936, "loss": 1.2653, "step": 3437 }, { "epoch": 0.23917353647083378, "grad_norm": 1.109375, "learning_rate": 0.001779235666966045, "loss": 0.8807, "step": 3438 }, { "epoch": 0.23924310410796898, "grad_norm": 1.4375, "learning_rate": 0.0017790944240042368, "loss": 1.3037, "step": 3439 }, { "epoch": 0.23931267174510418, "grad_norm": 1.2109375, "learning_rate": 0.0017789531414839409, "loss": 1.1275, "step": 3440 }, { "epoch": 0.23938223938223938, "grad_norm": 1.1484375, "learning_rate": 0.0017788118194123307, "loss": 1.1571, "step": 3441 }, { "epoch": 0.23945180701937457, "grad_norm": 1.390625, "learning_rate": 0.0017786704577965814, "loss": 1.0926, "step": 3442 }, { "epoch": 0.2395213746565098, "grad_norm": 1.265625, "learning_rate": 0.0017785290566438717, "loss": 1.2125, "step": 3443 }, { "epoch": 0.239590942293645, "grad_norm": 1.15625, "learning_rate": 0.0017783876159613802, "loss": 1.1799, "step": 3444 }, { "epoch": 0.2396605099307802, "grad_norm": 1.0859375, "learning_rate": 0.0017782461357562886, "loss": 1.2677, "step": 3445 }, { "epoch": 0.2397300775679154, "grad_norm": 1.3125, "learning_rate": 0.0017781046160357814, "loss": 1.1287, "step": 3446 }, { "epoch": 0.23979964520505062, "grad_norm": 1.3125, "learning_rate": 0.0017779630568070435, "loss": 1.292, "step": 3447 }, { "epoch": 0.23986921284218582, "grad_norm": 1.140625, "learning_rate": 0.0017778214580772627, "loss": 1.0825, "step": 3448 }, { "epoch": 0.23993878047932102, "grad_norm": 1.3515625, "learning_rate": 0.001777679819853629, "loss": 1.2443, "step": 3449 }, { "epoch": 0.24000834811645622, "grad_norm": 1.3203125, "learning_rate": 0.001777538142143334, "loss": 1.2154, "step": 3450 }, { "epoch": 0.24007791575359144, "grad_norm": 1.4453125, "learning_rate": 0.001777396424953571, "loss": 1.3522, "step": 3451 }, { "epoch": 0.24014748339072664, "grad_norm": 1.4296875, "learning_rate": 0.0017772546682915359, "loss": 1.2646, "step": 3452 }, { "epoch": 0.24021705102786184, "grad_norm": 1.1640625, "learning_rate": 0.0017771128721644264, "loss": 0.9994, "step": 3453 }, { "epoch": 0.24028661866499704, "grad_norm": 1.375, "learning_rate": 0.001776971036579442, "loss": 0.911, "step": 3454 }, { "epoch": 0.24035618630213224, "grad_norm": 1.2265625, "learning_rate": 0.0017768291615437848, "loss": 1.0556, "step": 3455 }, { "epoch": 0.24042575393926746, "grad_norm": 1.5546875, "learning_rate": 0.0017766872470646583, "loss": 1.2429, "step": 3456 }, { "epoch": 0.24049532157640266, "grad_norm": 1.5546875, "learning_rate": 0.0017765452931492681, "loss": 1.3328, "step": 3457 }, { "epoch": 0.24056488921353786, "grad_norm": 1.2734375, "learning_rate": 0.001776403299804822, "loss": 1.1474, "step": 3458 }, { "epoch": 0.24063445685067306, "grad_norm": 1.6015625, "learning_rate": 0.0017762612670385299, "loss": 1.0391, "step": 3459 }, { "epoch": 0.24070402448780828, "grad_norm": 1.34375, "learning_rate": 0.001776119194857603, "loss": 1.2632, "step": 3460 }, { "epoch": 0.24077359212494348, "grad_norm": 1.3359375, "learning_rate": 0.0017759770832692556, "loss": 1.2716, "step": 3461 }, { "epoch": 0.24084315976207868, "grad_norm": 1.1328125, "learning_rate": 0.001775834932280703, "loss": 0.9257, "step": 3462 }, { "epoch": 0.24091272739921388, "grad_norm": 1.296875, "learning_rate": 0.001775692741899163, "loss": 1.1569, "step": 3463 }, { "epoch": 0.2409822950363491, "grad_norm": 1.3203125, "learning_rate": 0.0017755505121318552, "loss": 1.3196, "step": 3464 }, { "epoch": 0.2410518626734843, "grad_norm": 1.34375, "learning_rate": 0.0017754082429860018, "loss": 1.1923, "step": 3465 }, { "epoch": 0.2411214303106195, "grad_norm": 1.140625, "learning_rate": 0.001775265934468826, "loss": 1.182, "step": 3466 }, { "epoch": 0.2411909979477547, "grad_norm": 1.2421875, "learning_rate": 0.0017751235865875537, "loss": 1.1103, "step": 3467 }, { "epoch": 0.2412605655848899, "grad_norm": 1.1328125, "learning_rate": 0.0017749811993494125, "loss": 1.133, "step": 3468 }, { "epoch": 0.24133013322202512, "grad_norm": 1.15625, "learning_rate": 0.0017748387727616322, "loss": 1.1061, "step": 3469 }, { "epoch": 0.24139970085916032, "grad_norm": 1.28125, "learning_rate": 0.0017746963068314447, "loss": 1.079, "step": 3470 }, { "epoch": 0.24146926849629552, "grad_norm": 1.3203125, "learning_rate": 0.0017745538015660834, "loss": 1.24, "step": 3471 }, { "epoch": 0.24153883613343072, "grad_norm": 1.0625, "learning_rate": 0.0017744112569727838, "loss": 1.0469, "step": 3472 }, { "epoch": 0.24160840377056594, "grad_norm": 1.1796875, "learning_rate": 0.0017742686730587841, "loss": 1.1172, "step": 3473 }, { "epoch": 0.24167797140770114, "grad_norm": 1.2578125, "learning_rate": 0.001774126049831324, "loss": 1.0453, "step": 3474 }, { "epoch": 0.24174753904483634, "grad_norm": 1.3125, "learning_rate": 0.0017739833872976447, "loss": 0.9883, "step": 3475 }, { "epoch": 0.24181710668197154, "grad_norm": 1.46875, "learning_rate": 0.0017738406854649902, "loss": 1.3942, "step": 3476 }, { "epoch": 0.24188667431910676, "grad_norm": 1.28125, "learning_rate": 0.0017736979443406062, "loss": 0.8944, "step": 3477 }, { "epoch": 0.24195624195624196, "grad_norm": 1.28125, "learning_rate": 0.0017735551639317402, "loss": 1.2261, "step": 3478 }, { "epoch": 0.24202580959337716, "grad_norm": 1.078125, "learning_rate": 0.0017734123442456422, "loss": 1.1733, "step": 3479 }, { "epoch": 0.24209537723051236, "grad_norm": 1.2109375, "learning_rate": 0.0017732694852895636, "loss": 1.0871, "step": 3480 }, { "epoch": 0.24216494486764756, "grad_norm": 1.0078125, "learning_rate": 0.001773126587070758, "loss": 1.0592, "step": 3481 }, { "epoch": 0.24223451250478278, "grad_norm": 1.5625, "learning_rate": 0.001772983649596481, "loss": 1.2381, "step": 3482 }, { "epoch": 0.24230408014191798, "grad_norm": 1.53125, "learning_rate": 0.0017728406728739908, "loss": 0.8463, "step": 3483 }, { "epoch": 0.24237364777905318, "grad_norm": 1.0234375, "learning_rate": 0.0017726976569105463, "loss": 1.07, "step": 3484 }, { "epoch": 0.24244321541618838, "grad_norm": 0.93359375, "learning_rate": 0.0017725546017134098, "loss": 1.1426, "step": 3485 }, { "epoch": 0.2425127830533236, "grad_norm": 1.140625, "learning_rate": 0.0017724115072898442, "loss": 1.222, "step": 3486 }, { "epoch": 0.2425823506904588, "grad_norm": 1.4765625, "learning_rate": 0.0017722683736471159, "loss": 1.0897, "step": 3487 }, { "epoch": 0.242651918327594, "grad_norm": 1.515625, "learning_rate": 0.001772125200792492, "loss": 0.8916, "step": 3488 }, { "epoch": 0.2427214859647292, "grad_norm": 1.2578125, "learning_rate": 0.0017719819887332417, "loss": 1.1342, "step": 3489 }, { "epoch": 0.24279105360186443, "grad_norm": 1.125, "learning_rate": 0.0017718387374766379, "loss": 0.9353, "step": 3490 }, { "epoch": 0.24286062123899962, "grad_norm": 1.1171875, "learning_rate": 0.001771695447029953, "loss": 1.2812, "step": 3491 }, { "epoch": 0.24293018887613482, "grad_norm": 1.125, "learning_rate": 0.0017715521174004624, "loss": 1.2196, "step": 3492 }, { "epoch": 0.24299975651327002, "grad_norm": 1.1484375, "learning_rate": 0.0017714087485954449, "loss": 1.0235, "step": 3493 }, { "epoch": 0.24306932415040522, "grad_norm": 0.9921875, "learning_rate": 0.001771265340622179, "loss": 0.9502, "step": 3494 }, { "epoch": 0.24313889178754045, "grad_norm": 1.09375, "learning_rate": 0.0017711218934879467, "loss": 0.9577, "step": 3495 }, { "epoch": 0.24320845942467564, "grad_norm": 1.3125, "learning_rate": 0.0017709784072000314, "loss": 1.001, "step": 3496 }, { "epoch": 0.24327802706181084, "grad_norm": 1.265625, "learning_rate": 0.0017708348817657188, "loss": 1.1289, "step": 3497 }, { "epoch": 0.24334759469894604, "grad_norm": 1.296875, "learning_rate": 0.0017706913171922959, "loss": 1.1366, "step": 3498 }, { "epoch": 0.24341716233608127, "grad_norm": 1.0390625, "learning_rate": 0.0017705477134870526, "loss": 1.0603, "step": 3499 }, { "epoch": 0.24348672997321646, "grad_norm": 1.171875, "learning_rate": 0.00177040407065728, "loss": 1.1888, "step": 3500 }, { "epoch": 0.24355629761035166, "grad_norm": 0.9609375, "learning_rate": 0.0017702603887102721, "loss": 0.9626, "step": 3501 }, { "epoch": 0.24362586524748686, "grad_norm": 1.3046875, "learning_rate": 0.001770116667653324, "loss": 1.2676, "step": 3502 }, { "epoch": 0.2436954328846221, "grad_norm": 1.28125, "learning_rate": 0.0017699729074937332, "loss": 1.0256, "step": 3503 }, { "epoch": 0.24376500052175729, "grad_norm": 1.3125, "learning_rate": 0.001769829108238799, "loss": 1.3075, "step": 3504 }, { "epoch": 0.24383456815889248, "grad_norm": 1.4375, "learning_rate": 0.001769685269895823, "loss": 1.1977, "step": 3505 }, { "epoch": 0.24390413579602768, "grad_norm": 1.3046875, "learning_rate": 0.0017695413924721088, "loss": 1.1401, "step": 3506 }, { "epoch": 0.24397370343316288, "grad_norm": 1.1171875, "learning_rate": 0.0017693974759749609, "loss": 1.0515, "step": 3507 }, { "epoch": 0.2440432710702981, "grad_norm": 1.0, "learning_rate": 0.0017692535204116876, "loss": 1.1446, "step": 3508 }, { "epoch": 0.2441128387074333, "grad_norm": 1.03125, "learning_rate": 0.0017691095257895977, "loss": 1.0242, "step": 3509 }, { "epoch": 0.2441824063445685, "grad_norm": 1.265625, "learning_rate": 0.0017689654921160028, "loss": 1.2903, "step": 3510 }, { "epoch": 0.2442519739817037, "grad_norm": 1.1328125, "learning_rate": 0.0017688214193982159, "loss": 0.9139, "step": 3511 }, { "epoch": 0.24432154161883893, "grad_norm": 1.265625, "learning_rate": 0.0017686773076435527, "loss": 0.9588, "step": 3512 }, { "epoch": 0.24439110925597413, "grad_norm": 1.2578125, "learning_rate": 0.00176853315685933, "loss": 1.2142, "step": 3513 }, { "epoch": 0.24446067689310932, "grad_norm": 1.078125, "learning_rate": 0.0017683889670528675, "loss": 1.0694, "step": 3514 }, { "epoch": 0.24453024453024452, "grad_norm": 1.1953125, "learning_rate": 0.0017682447382314861, "loss": 1.073, "step": 3515 }, { "epoch": 0.24459981216737975, "grad_norm": 1.234375, "learning_rate": 0.0017681004704025091, "loss": 0.9615, "step": 3516 }, { "epoch": 0.24466937980451495, "grad_norm": 1.2265625, "learning_rate": 0.001767956163573262, "loss": 0.8559, "step": 3517 }, { "epoch": 0.24473894744165015, "grad_norm": 1.0390625, "learning_rate": 0.0017678118177510713, "loss": 1.0005, "step": 3518 }, { "epoch": 0.24480851507878534, "grad_norm": 1.2578125, "learning_rate": 0.0017676674329432669, "loss": 1.0934, "step": 3519 }, { "epoch": 0.24487808271592054, "grad_norm": 1.046875, "learning_rate": 0.0017675230091571791, "loss": 1.0709, "step": 3520 }, { "epoch": 0.24494765035305577, "grad_norm": 1.046875, "learning_rate": 0.001767378546400142, "loss": 1.1684, "step": 3521 }, { "epoch": 0.24501721799019097, "grad_norm": 1.0859375, "learning_rate": 0.00176723404467949, "loss": 1.1248, "step": 3522 }, { "epoch": 0.24508678562732616, "grad_norm": 1.03125, "learning_rate": 0.0017670895040025605, "loss": 1.1665, "step": 3523 }, { "epoch": 0.24515635326446136, "grad_norm": 1.6484375, "learning_rate": 0.0017669449243766923, "loss": 1.1104, "step": 3524 }, { "epoch": 0.2452259209015966, "grad_norm": 1.234375, "learning_rate": 0.0017668003058092263, "loss": 0.9688, "step": 3525 }, { "epoch": 0.2452954885387318, "grad_norm": 1.265625, "learning_rate": 0.001766655648307506, "loss": 1.1997, "step": 3526 }, { "epoch": 0.24536505617586699, "grad_norm": 1.203125, "learning_rate": 0.001766510951878876, "loss": 0.9125, "step": 3527 }, { "epoch": 0.24543462381300218, "grad_norm": 1.1875, "learning_rate": 0.0017663662165306833, "loss": 1.3179, "step": 3528 }, { "epoch": 0.24550419145013738, "grad_norm": 1.46875, "learning_rate": 0.0017662214422702772, "loss": 1.1226, "step": 3529 }, { "epoch": 0.2455737590872726, "grad_norm": 0.9765625, "learning_rate": 0.0017660766291050082, "loss": 0.8704, "step": 3530 }, { "epoch": 0.2456433267244078, "grad_norm": 1.359375, "learning_rate": 0.001765931777042229, "loss": 1.2727, "step": 3531 }, { "epoch": 0.245712894361543, "grad_norm": 1.046875, "learning_rate": 0.001765786886089295, "loss": 0.854, "step": 3532 }, { "epoch": 0.2457824619986782, "grad_norm": 1.3828125, "learning_rate": 0.0017656419562535625, "loss": 1.1171, "step": 3533 }, { "epoch": 0.24585202963581343, "grad_norm": 1.1484375, "learning_rate": 0.001765496987542391, "loss": 0.929, "step": 3534 }, { "epoch": 0.24592159727294863, "grad_norm": 1.1171875, "learning_rate": 0.0017653519799631407, "loss": 1.2473, "step": 3535 }, { "epoch": 0.24599116491008383, "grad_norm": 1.34375, "learning_rate": 0.0017652069335231744, "loss": 1.2321, "step": 3536 }, { "epoch": 0.24606073254721902, "grad_norm": 1.21875, "learning_rate": 0.001765061848229857, "loss": 1.0668, "step": 3537 }, { "epoch": 0.24613030018435425, "grad_norm": 1.1484375, "learning_rate": 0.0017649167240905554, "loss": 1.1731, "step": 3538 }, { "epoch": 0.24619986782148945, "grad_norm": 1.3125, "learning_rate": 0.0017647715611126375, "loss": 0.8925, "step": 3539 }, { "epoch": 0.24626943545862465, "grad_norm": 1.3515625, "learning_rate": 0.0017646263593034748, "loss": 1.1732, "step": 3540 }, { "epoch": 0.24633900309575985, "grad_norm": 1.3359375, "learning_rate": 0.0017644811186704396, "loss": 1.1054, "step": 3541 }, { "epoch": 0.24640857073289504, "grad_norm": 1.265625, "learning_rate": 0.0017643358392209062, "loss": 1.2514, "step": 3542 }, { "epoch": 0.24647813837003027, "grad_norm": 1.15625, "learning_rate": 0.0017641905209622518, "loss": 0.8752, "step": 3543 }, { "epoch": 0.24654770600716547, "grad_norm": 1.140625, "learning_rate": 0.0017640451639018542, "loss": 1.1269, "step": 3544 }, { "epoch": 0.24661727364430067, "grad_norm": 0.96875, "learning_rate": 0.0017638997680470944, "loss": 1.1035, "step": 3545 }, { "epoch": 0.24668684128143586, "grad_norm": 1.1875, "learning_rate": 0.001763754333405355, "loss": 1.3187, "step": 3546 }, { "epoch": 0.2467564089185711, "grad_norm": 1.2578125, "learning_rate": 0.0017636088599840196, "loss": 1.2283, "step": 3547 }, { "epoch": 0.2468259765557063, "grad_norm": 1.1796875, "learning_rate": 0.0017634633477904755, "loss": 1.1916, "step": 3548 }, { "epoch": 0.2468955441928415, "grad_norm": 1.0546875, "learning_rate": 0.0017633177968321109, "loss": 0.9575, "step": 3549 }, { "epoch": 0.24696511182997669, "grad_norm": 1.046875, "learning_rate": 0.0017631722071163156, "loss": 0.8847, "step": 3550 }, { "epoch": 0.2470346794671119, "grad_norm": 1.0625, "learning_rate": 0.0017630265786504824, "loss": 0.888, "step": 3551 }, { "epoch": 0.2471042471042471, "grad_norm": 1.03125, "learning_rate": 0.0017628809114420057, "loss": 0.951, "step": 3552 }, { "epoch": 0.2471738147413823, "grad_norm": 1.3125, "learning_rate": 0.0017627352054982812, "loss": 1.0284, "step": 3553 }, { "epoch": 0.2472433823785175, "grad_norm": 1.2734375, "learning_rate": 0.0017625894608267077, "loss": 1.2441, "step": 3554 }, { "epoch": 0.2473129500156527, "grad_norm": 1.390625, "learning_rate": 0.001762443677434685, "loss": 1.1451, "step": 3555 }, { "epoch": 0.24738251765278793, "grad_norm": 1.2109375, "learning_rate": 0.0017622978553296154, "loss": 1.1002, "step": 3556 }, { "epoch": 0.24745208528992313, "grad_norm": 1.25, "learning_rate": 0.0017621519945189028, "loss": 1.0644, "step": 3557 }, { "epoch": 0.24752165292705833, "grad_norm": 1.171875, "learning_rate": 0.0017620060950099537, "loss": 0.9999, "step": 3558 }, { "epoch": 0.24759122056419353, "grad_norm": 1.2578125, "learning_rate": 0.0017618601568101758, "loss": 1.3499, "step": 3559 }, { "epoch": 0.24766078820132875, "grad_norm": 1.1328125, "learning_rate": 0.001761714179926979, "loss": 0.9083, "step": 3560 }, { "epoch": 0.24773035583846395, "grad_norm": 1.2421875, "learning_rate": 0.001761568164367776, "loss": 0.9532, "step": 3561 }, { "epoch": 0.24779992347559915, "grad_norm": 1.390625, "learning_rate": 0.0017614221101399797, "loss": 1.064, "step": 3562 }, { "epoch": 0.24786949111273435, "grad_norm": 1.4375, "learning_rate": 0.0017612760172510066, "loss": 1.0452, "step": 3563 }, { "epoch": 0.24793905874986957, "grad_norm": 1.4140625, "learning_rate": 0.0017611298857082745, "loss": 0.9763, "step": 3564 }, { "epoch": 0.24800862638700477, "grad_norm": 1.0703125, "learning_rate": 0.0017609837155192032, "loss": 0.8372, "step": 3565 }, { "epoch": 0.24807819402413997, "grad_norm": 1.125, "learning_rate": 0.0017608375066912143, "loss": 1.0637, "step": 3566 }, { "epoch": 0.24814776166127517, "grad_norm": 0.9765625, "learning_rate": 0.0017606912592317322, "loss": 1.0154, "step": 3567 }, { "epoch": 0.24821732929841037, "grad_norm": 1.3125, "learning_rate": 0.0017605449731481816, "loss": 1.1124, "step": 3568 }, { "epoch": 0.2482868969355456, "grad_norm": 1.2578125, "learning_rate": 0.001760398648447991, "loss": 1.1471, "step": 3569 }, { "epoch": 0.2483564645726808, "grad_norm": 1.296875, "learning_rate": 0.0017602522851385895, "loss": 1.1284, "step": 3570 }, { "epoch": 0.248426032209816, "grad_norm": 1.0859375, "learning_rate": 0.001760105883227409, "loss": 1.1458, "step": 3571 }, { "epoch": 0.2484955998469512, "grad_norm": 1.0234375, "learning_rate": 0.001759959442721883, "loss": 0.9884, "step": 3572 }, { "epoch": 0.2485651674840864, "grad_norm": 1.2734375, "learning_rate": 0.001759812963629447, "loss": 1.043, "step": 3573 }, { "epoch": 0.2486347351212216, "grad_norm": 1.125, "learning_rate": 0.0017596664459575385, "loss": 1.0459, "step": 3574 }, { "epoch": 0.2487043027583568, "grad_norm": 1.0546875, "learning_rate": 0.0017595198897135968, "loss": 1.1725, "step": 3575 }, { "epoch": 0.248773870395492, "grad_norm": 1.140625, "learning_rate": 0.0017593732949050633, "loss": 1.0612, "step": 3576 }, { "epoch": 0.24884343803262723, "grad_norm": 1.0859375, "learning_rate": 0.0017592266615393815, "loss": 0.7711, "step": 3577 }, { "epoch": 0.24891300566976243, "grad_norm": 1.2734375, "learning_rate": 0.0017590799896239969, "loss": 1.2447, "step": 3578 }, { "epoch": 0.24898257330689763, "grad_norm": 0.984375, "learning_rate": 0.001758933279166356, "loss": 0.9262, "step": 3579 }, { "epoch": 0.24905214094403283, "grad_norm": 1.0625, "learning_rate": 0.0017587865301739085, "loss": 1.075, "step": 3580 }, { "epoch": 0.24912170858116803, "grad_norm": 1.0859375, "learning_rate": 0.001758639742654106, "loss": 1.109, "step": 3581 }, { "epoch": 0.24919127621830325, "grad_norm": 1.1015625, "learning_rate": 0.0017584929166144009, "loss": 1.0506, "step": 3582 }, { "epoch": 0.24926084385543845, "grad_norm": 1.09375, "learning_rate": 0.0017583460520622482, "loss": 1.1858, "step": 3583 }, { "epoch": 0.24933041149257365, "grad_norm": 1.046875, "learning_rate": 0.001758199149005106, "loss": 1.0795, "step": 3584 }, { "epoch": 0.24939997912970885, "grad_norm": 0.9296875, "learning_rate": 0.0017580522074504324, "loss": 1.1447, "step": 3585 }, { "epoch": 0.24946954676684407, "grad_norm": 1.4921875, "learning_rate": 0.0017579052274056884, "loss": 0.9276, "step": 3586 }, { "epoch": 0.24953911440397927, "grad_norm": 1.265625, "learning_rate": 0.0017577582088783373, "loss": 1.0903, "step": 3587 }, { "epoch": 0.24960868204111447, "grad_norm": 0.9296875, "learning_rate": 0.0017576111518758436, "loss": 0.9308, "step": 3588 }, { "epoch": 0.24967824967824967, "grad_norm": 0.890625, "learning_rate": 0.001757464056405674, "loss": 0.8891, "step": 3589 }, { "epoch": 0.2497478173153849, "grad_norm": 1.484375, "learning_rate": 0.0017573169224752978, "loss": 1.0226, "step": 3590 }, { "epoch": 0.2498173849525201, "grad_norm": 1.078125, "learning_rate": 0.0017571697500921857, "loss": 1.2338, "step": 3591 }, { "epoch": 0.2498869525896553, "grad_norm": 1.3125, "learning_rate": 0.0017570225392638098, "loss": 0.9897, "step": 3592 }, { "epoch": 0.2499565202267905, "grad_norm": 1.34375, "learning_rate": 0.001756875289997645, "loss": 1.0784, "step": 3593 }, { "epoch": 0.2500260878639257, "grad_norm": 1.609375, "learning_rate": 0.001756728002301168, "loss": 1.2579, "step": 3594 }, { "epoch": 0.2500956555010609, "grad_norm": 1.171875, "learning_rate": 0.0017565806761818572, "loss": 1.1706, "step": 3595 }, { "epoch": 0.2501652231381961, "grad_norm": 1.203125, "learning_rate": 0.001756433311647193, "loss": 1.167, "step": 3596 }, { "epoch": 0.25023479077533134, "grad_norm": 1.078125, "learning_rate": 0.0017562859087046584, "loss": 1.1437, "step": 3597 }, { "epoch": 0.2503043584124665, "grad_norm": 1.109375, "learning_rate": 0.001756138467361737, "loss": 0.9237, "step": 3598 }, { "epoch": 0.25037392604960174, "grad_norm": 1.3046875, "learning_rate": 0.0017559909876259155, "loss": 1.0883, "step": 3599 }, { "epoch": 0.2504434936867369, "grad_norm": 1.0390625, "learning_rate": 0.0017558434695046819, "loss": 1.121, "step": 3600 }, { "epoch": 0.25051306132387213, "grad_norm": 1.265625, "learning_rate": 0.0017556959130055267, "loss": 1.0369, "step": 3601 }, { "epoch": 0.25058262896100736, "grad_norm": 1.1328125, "learning_rate": 0.001755548318135942, "loss": 1.0228, "step": 3602 }, { "epoch": 0.25065219659814253, "grad_norm": 1.171875, "learning_rate": 0.0017554006849034222, "loss": 0.9694, "step": 3603 }, { "epoch": 0.25072176423527776, "grad_norm": 1.3125, "learning_rate": 0.0017552530133154631, "loss": 1.0473, "step": 3604 }, { "epoch": 0.2507913318724129, "grad_norm": 1.21875, "learning_rate": 0.0017551053033795627, "loss": 1.1891, "step": 3605 }, { "epoch": 0.25086089950954815, "grad_norm": 1.1875, "learning_rate": 0.001754957555103221, "loss": 0.9631, "step": 3606 }, { "epoch": 0.2509304671466834, "grad_norm": 1.078125, "learning_rate": 0.00175480976849394, "loss": 0.9455, "step": 3607 }, { "epoch": 0.25100003478381855, "grad_norm": 1.046875, "learning_rate": 0.0017546619435592232, "loss": 1.0606, "step": 3608 }, { "epoch": 0.2510696024209538, "grad_norm": 1.03125, "learning_rate": 0.001754514080306577, "loss": 1.0355, "step": 3609 }, { "epoch": 0.251139170058089, "grad_norm": 1.4609375, "learning_rate": 0.0017543661787435085, "loss": 0.9412, "step": 3610 }, { "epoch": 0.25120873769522417, "grad_norm": 1.28125, "learning_rate": 0.0017542182388775279, "loss": 1.322, "step": 3611 }, { "epoch": 0.2512783053323594, "grad_norm": 1.2890625, "learning_rate": 0.0017540702607161467, "loss": 0.9471, "step": 3612 }, { "epoch": 0.25134787296949457, "grad_norm": 1.546875, "learning_rate": 0.0017539222442668784, "loss": 1.0245, "step": 3613 }, { "epoch": 0.2514174406066298, "grad_norm": 1.1171875, "learning_rate": 0.0017537741895372388, "loss": 1.2477, "step": 3614 }, { "epoch": 0.251487008243765, "grad_norm": 1.6953125, "learning_rate": 0.0017536260965347447, "loss": 1.1399, "step": 3615 }, { "epoch": 0.2515565758809002, "grad_norm": 1.265625, "learning_rate": 0.0017534779652669163, "loss": 1.2683, "step": 3616 }, { "epoch": 0.2516261435180354, "grad_norm": 1.0625, "learning_rate": 0.0017533297957412746, "loss": 0.993, "step": 3617 }, { "epoch": 0.2516957111551706, "grad_norm": 0.99609375, "learning_rate": 0.0017531815879653432, "loss": 1.0794, "step": 3618 }, { "epoch": 0.2517652787923058, "grad_norm": 1.2421875, "learning_rate": 0.0017530333419466468, "loss": 1.1167, "step": 3619 }, { "epoch": 0.25183484642944104, "grad_norm": 1.1640625, "learning_rate": 0.0017528850576927128, "loss": 1.032, "step": 3620 }, { "epoch": 0.2519044140665762, "grad_norm": 1.4140625, "learning_rate": 0.0017527367352110704, "loss": 1.1043, "step": 3621 }, { "epoch": 0.25197398170371144, "grad_norm": 1.1875, "learning_rate": 0.0017525883745092509, "loss": 1.0413, "step": 3622 }, { "epoch": 0.25204354934084666, "grad_norm": 1.1171875, "learning_rate": 0.0017524399755947865, "loss": 1.0989, "step": 3623 }, { "epoch": 0.25211311697798183, "grad_norm": 1.296875, "learning_rate": 0.0017522915384752134, "loss": 0.9765, "step": 3624 }, { "epoch": 0.25218268461511706, "grad_norm": 1.6640625, "learning_rate": 0.0017521430631580674, "loss": 1.2161, "step": 3625 }, { "epoch": 0.25225225225225223, "grad_norm": 1.140625, "learning_rate": 0.001751994549650888, "loss": 1.4329, "step": 3626 }, { "epoch": 0.25232181988938746, "grad_norm": 1.1953125, "learning_rate": 0.0017518459979612155, "loss": 1.2085, "step": 3627 }, { "epoch": 0.2523913875265227, "grad_norm": 1.1953125, "learning_rate": 0.001751697408096593, "loss": 1.2017, "step": 3628 }, { "epoch": 0.25246095516365785, "grad_norm": 1.15625, "learning_rate": 0.0017515487800645647, "loss": 0.9696, "step": 3629 }, { "epoch": 0.2525305228007931, "grad_norm": 0.94140625, "learning_rate": 0.0017514001138726775, "loss": 1.0484, "step": 3630 }, { "epoch": 0.25260009043792825, "grad_norm": 1.0859375, "learning_rate": 0.00175125140952848, "loss": 1.0938, "step": 3631 }, { "epoch": 0.2526696580750635, "grad_norm": 1.046875, "learning_rate": 0.0017511026670395222, "loss": 1.158, "step": 3632 }, { "epoch": 0.2527392257121987, "grad_norm": 1.1484375, "learning_rate": 0.0017509538864133574, "loss": 0.862, "step": 3633 }, { "epoch": 0.25280879334933387, "grad_norm": 1.3984375, "learning_rate": 0.0017508050676575389, "loss": 0.8049, "step": 3634 }, { "epoch": 0.2528783609864691, "grad_norm": 0.9140625, "learning_rate": 0.0017506562107796233, "loss": 0.9967, "step": 3635 }, { "epoch": 0.2529479286236043, "grad_norm": 1.265625, "learning_rate": 0.001750507315787169, "loss": 1.1872, "step": 3636 }, { "epoch": 0.2530174962607395, "grad_norm": 0.91015625, "learning_rate": 0.0017503583826877364, "loss": 1.0083, "step": 3637 }, { "epoch": 0.2530870638978747, "grad_norm": 1.203125, "learning_rate": 0.001750209411488887, "loss": 1.1361, "step": 3638 }, { "epoch": 0.2531566315350099, "grad_norm": 0.9609375, "learning_rate": 0.0017500604021981848, "loss": 1.0634, "step": 3639 }, { "epoch": 0.2532261991721451, "grad_norm": 1.109375, "learning_rate": 0.0017499113548231963, "loss": 0.7385, "step": 3640 }, { "epoch": 0.25329576680928034, "grad_norm": 1.140625, "learning_rate": 0.0017497622693714886, "loss": 1.0432, "step": 3641 }, { "epoch": 0.2533653344464155, "grad_norm": 1.2421875, "learning_rate": 0.001749613145850632, "loss": 0.9483, "step": 3642 }, { "epoch": 0.25343490208355074, "grad_norm": 1.2109375, "learning_rate": 0.0017494639842681986, "loss": 1.1768, "step": 3643 }, { "epoch": 0.2535044697206859, "grad_norm": 1.109375, "learning_rate": 0.0017493147846317613, "loss": 1.2575, "step": 3644 }, { "epoch": 0.25357403735782114, "grad_norm": 1.1640625, "learning_rate": 0.0017491655469488963, "loss": 1.0326, "step": 3645 }, { "epoch": 0.25364360499495636, "grad_norm": 1.4609375, "learning_rate": 0.0017490162712271808, "loss": 1.0368, "step": 3646 }, { "epoch": 0.25371317263209153, "grad_norm": 1.3359375, "learning_rate": 0.0017488669574741943, "loss": 0.9942, "step": 3647 }, { "epoch": 0.25378274026922676, "grad_norm": 1.1640625, "learning_rate": 0.0017487176056975185, "loss": 1.1022, "step": 3648 }, { "epoch": 0.253852307906362, "grad_norm": 1.2265625, "learning_rate": 0.001748568215904736, "loss": 1.1188, "step": 3649 }, { "epoch": 0.25392187554349716, "grad_norm": 1.0703125, "learning_rate": 0.001748418788103433, "loss": 1.158, "step": 3650 }, { "epoch": 0.2539914431806324, "grad_norm": 0.98046875, "learning_rate": 0.0017482693223011961, "loss": 1.0782, "step": 3651 }, { "epoch": 0.25406101081776755, "grad_norm": 1.09375, "learning_rate": 0.0017481198185056146, "loss": 0.964, "step": 3652 }, { "epoch": 0.2541305784549028, "grad_norm": 1.140625, "learning_rate": 0.0017479702767242795, "loss": 1.0249, "step": 3653 }, { "epoch": 0.254200146092038, "grad_norm": 1.09375, "learning_rate": 0.001747820696964784, "loss": 1.0348, "step": 3654 }, { "epoch": 0.2542697137291732, "grad_norm": 1.265625, "learning_rate": 0.0017476710792347226, "loss": 1.1565, "step": 3655 }, { "epoch": 0.2543392813663084, "grad_norm": 1.109375, "learning_rate": 0.0017475214235416923, "loss": 1.1294, "step": 3656 }, { "epoch": 0.25440884900344357, "grad_norm": 1.0703125, "learning_rate": 0.0017473717298932918, "loss": 0.8729, "step": 3657 }, { "epoch": 0.2544784166405788, "grad_norm": 1.1796875, "learning_rate": 0.0017472219982971222, "loss": 0.9156, "step": 3658 }, { "epoch": 0.254547984277714, "grad_norm": 1.1171875, "learning_rate": 0.0017470722287607856, "loss": 1.1243, "step": 3659 }, { "epoch": 0.2546175519148492, "grad_norm": 1.375, "learning_rate": 0.001746922421291887, "loss": 1.0525, "step": 3660 }, { "epoch": 0.2546871195519844, "grad_norm": 1.171875, "learning_rate": 0.0017467725758980323, "loss": 1.0949, "step": 3661 }, { "epoch": 0.25475668718911965, "grad_norm": 1.0078125, "learning_rate": 0.0017466226925868305, "loss": 0.918, "step": 3662 }, { "epoch": 0.2548262548262548, "grad_norm": 1.234375, "learning_rate": 0.0017464727713658915, "loss": 1.2199, "step": 3663 }, { "epoch": 0.25489582246339004, "grad_norm": 1.3046875, "learning_rate": 0.0017463228122428275, "loss": 1.2338, "step": 3664 }, { "epoch": 0.2549653901005252, "grad_norm": 1.1171875, "learning_rate": 0.0017461728152252528, "loss": 0.93, "step": 3665 }, { "epoch": 0.25503495773766044, "grad_norm": 1.078125, "learning_rate": 0.0017460227803207838, "loss": 1.1836, "step": 3666 }, { "epoch": 0.25510452537479567, "grad_norm": 1.2109375, "learning_rate": 0.0017458727075370382, "loss": 0.9921, "step": 3667 }, { "epoch": 0.25517409301193084, "grad_norm": 1.0859375, "learning_rate": 0.001745722596881636, "loss": 1.1594, "step": 3668 }, { "epoch": 0.25524366064906606, "grad_norm": 1.0390625, "learning_rate": 0.0017455724483621989, "loss": 1.2506, "step": 3669 }, { "epoch": 0.25531322828620123, "grad_norm": 1.4140625, "learning_rate": 0.001745422261986351, "loss": 1.1569, "step": 3670 }, { "epoch": 0.25538279592333646, "grad_norm": 1.03125, "learning_rate": 0.0017452720377617178, "loss": 0.8423, "step": 3671 }, { "epoch": 0.2554523635604717, "grad_norm": 1.359375, "learning_rate": 0.0017451217756959268, "loss": 1.1653, "step": 3672 }, { "epoch": 0.25552193119760686, "grad_norm": 1.0703125, "learning_rate": 0.001744971475796608, "loss": 0.7858, "step": 3673 }, { "epoch": 0.2555914988347421, "grad_norm": 1.0, "learning_rate": 0.0017448211380713923, "loss": 1.1199, "step": 3674 }, { "epoch": 0.2556610664718773, "grad_norm": 1.0390625, "learning_rate": 0.0017446707625279135, "loss": 0.9626, "step": 3675 }, { "epoch": 0.2557306341090125, "grad_norm": 1.03125, "learning_rate": 0.001744520349173807, "loss": 0.9203, "step": 3676 }, { "epoch": 0.2558002017461477, "grad_norm": 1.0234375, "learning_rate": 0.0017443698980167096, "loss": 0.8413, "step": 3677 }, { "epoch": 0.2558697693832829, "grad_norm": 1.671875, "learning_rate": 0.0017442194090642607, "loss": 1.3561, "step": 3678 }, { "epoch": 0.2559393370204181, "grad_norm": 1.3046875, "learning_rate": 0.0017440688823241012, "loss": 0.9417, "step": 3679 }, { "epoch": 0.2560089046575533, "grad_norm": 1.2578125, "learning_rate": 0.0017439183178038747, "loss": 0.8635, "step": 3680 }, { "epoch": 0.2560784722946885, "grad_norm": 1.1875, "learning_rate": 0.0017437677155112252, "loss": 1.0922, "step": 3681 }, { "epoch": 0.2561480399318237, "grad_norm": 1.171875, "learning_rate": 0.0017436170754538001, "loss": 1.0156, "step": 3682 }, { "epoch": 0.2562176075689589, "grad_norm": 1.1953125, "learning_rate": 0.0017434663976392483, "loss": 1.2135, "step": 3683 }, { "epoch": 0.2562871752060941, "grad_norm": 1.2734375, "learning_rate": 0.00174331568207522, "loss": 1.1355, "step": 3684 }, { "epoch": 0.25635674284322935, "grad_norm": 1.5390625, "learning_rate": 0.0017431649287693678, "loss": 1.1804, "step": 3685 }, { "epoch": 0.2564263104803645, "grad_norm": 0.98828125, "learning_rate": 0.0017430141377293466, "loss": 1.0658, "step": 3686 }, { "epoch": 0.25649587811749974, "grad_norm": 1.21875, "learning_rate": 0.0017428633089628122, "loss": 1.0869, "step": 3687 }, { "epoch": 0.25656544575463497, "grad_norm": 0.96484375, "learning_rate": 0.0017427124424774236, "loss": 0.7553, "step": 3688 }, { "epoch": 0.25663501339177014, "grad_norm": 1.1953125, "learning_rate": 0.0017425615382808406, "loss": 1.0089, "step": 3689 }, { "epoch": 0.25670458102890537, "grad_norm": 1.015625, "learning_rate": 0.0017424105963807252, "loss": 1.1103, "step": 3690 }, { "epoch": 0.25677414866604054, "grad_norm": 1.09375, "learning_rate": 0.0017422596167847421, "loss": 0.9985, "step": 3691 }, { "epoch": 0.25684371630317576, "grad_norm": 1.453125, "learning_rate": 0.001742108599500557, "loss": 1.1166, "step": 3692 }, { "epoch": 0.256913283940311, "grad_norm": 0.90625, "learning_rate": 0.0017419575445358376, "loss": 0.9415, "step": 3693 }, { "epoch": 0.25698285157744616, "grad_norm": 1.1328125, "learning_rate": 0.0017418064518982539, "loss": 0.9779, "step": 3694 }, { "epoch": 0.2570524192145814, "grad_norm": 0.94921875, "learning_rate": 0.0017416553215954774, "loss": 1.0644, "step": 3695 }, { "epoch": 0.25712198685171656, "grad_norm": 0.94921875, "learning_rate": 0.0017415041536351819, "loss": 0.8074, "step": 3696 }, { "epoch": 0.2571915544888518, "grad_norm": 1.046875, "learning_rate": 0.001741352948025043, "loss": 1.1636, "step": 3697 }, { "epoch": 0.257261122125987, "grad_norm": 1.171875, "learning_rate": 0.001741201704772738, "loss": 1.1348, "step": 3698 }, { "epoch": 0.2573306897631222, "grad_norm": 1.1953125, "learning_rate": 0.0017410504238859464, "loss": 0.9444, "step": 3699 }, { "epoch": 0.2574002574002574, "grad_norm": 0.984375, "learning_rate": 0.0017408991053723495, "loss": 0.9938, "step": 3700 }, { "epoch": 0.25746982503739263, "grad_norm": 1.1640625, "learning_rate": 0.0017407477492396306, "loss": 1.0754, "step": 3701 }, { "epoch": 0.2575393926745278, "grad_norm": 1.1796875, "learning_rate": 0.0017405963554954745, "loss": 0.981, "step": 3702 }, { "epoch": 0.257608960311663, "grad_norm": 1.2265625, "learning_rate": 0.0017404449241475682, "loss": 1.3793, "step": 3703 }, { "epoch": 0.2576785279487982, "grad_norm": 1.0625, "learning_rate": 0.0017402934552036007, "loss": 1.051, "step": 3704 }, { "epoch": 0.2577480955859334, "grad_norm": 1.359375, "learning_rate": 0.0017401419486712632, "loss": 1.3719, "step": 3705 }, { "epoch": 0.25781766322306865, "grad_norm": 1.59375, "learning_rate": 0.001739990404558248, "loss": 1.1657, "step": 3706 }, { "epoch": 0.2578872308602038, "grad_norm": 1.0859375, "learning_rate": 0.00173983882287225, "loss": 1.097, "step": 3707 }, { "epoch": 0.25795679849733905, "grad_norm": 1.2578125, "learning_rate": 0.0017396872036209655, "loss": 0.9704, "step": 3708 }, { "epoch": 0.2580263661344742, "grad_norm": 1.125, "learning_rate": 0.001739535546812093, "loss": 1.1431, "step": 3709 }, { "epoch": 0.25809593377160944, "grad_norm": 1.234375, "learning_rate": 0.0017393838524533333, "loss": 0.9777, "step": 3710 }, { "epoch": 0.25816550140874467, "grad_norm": 1.328125, "learning_rate": 0.001739232120552388, "loss": 1.1802, "step": 3711 }, { "epoch": 0.25823506904587984, "grad_norm": 1.3046875, "learning_rate": 0.0017390803511169617, "loss": 1.0882, "step": 3712 }, { "epoch": 0.25830463668301507, "grad_norm": 1.421875, "learning_rate": 0.0017389285441547606, "loss": 1.0451, "step": 3713 }, { "epoch": 0.2583742043201503, "grad_norm": 1.09375, "learning_rate": 0.0017387766996734924, "loss": 1.0731, "step": 3714 }, { "epoch": 0.25844377195728546, "grad_norm": 1.0859375, "learning_rate": 0.0017386248176808673, "loss": 1.101, "step": 3715 }, { "epoch": 0.2585133395944207, "grad_norm": 1.125, "learning_rate": 0.0017384728981845966, "loss": 1.0954, "step": 3716 }, { "epoch": 0.25858290723155586, "grad_norm": 1.046875, "learning_rate": 0.0017383209411923944, "loss": 1.0016, "step": 3717 }, { "epoch": 0.2586524748686911, "grad_norm": 0.953125, "learning_rate": 0.0017381689467119764, "loss": 1.1121, "step": 3718 }, { "epoch": 0.2587220425058263, "grad_norm": 1.3359375, "learning_rate": 0.0017380169147510594, "loss": 1.2915, "step": 3719 }, { "epoch": 0.2587916101429615, "grad_norm": 1.21875, "learning_rate": 0.0017378648453173638, "loss": 1.2362, "step": 3720 }, { "epoch": 0.2588611777800967, "grad_norm": 1.1328125, "learning_rate": 0.0017377127384186105, "loss": 1.1448, "step": 3721 }, { "epoch": 0.2589307454172319, "grad_norm": 1.328125, "learning_rate": 0.0017375605940625225, "loss": 1.2008, "step": 3722 }, { "epoch": 0.2590003130543671, "grad_norm": 1.4296875, "learning_rate": 0.001737408412256825, "loss": 1.0659, "step": 3723 }, { "epoch": 0.25906988069150233, "grad_norm": 1.1484375, "learning_rate": 0.0017372561930092455, "loss": 1.1304, "step": 3724 }, { "epoch": 0.2591394483286375, "grad_norm": 1.1484375, "learning_rate": 0.0017371039363275123, "loss": 0.945, "step": 3725 }, { "epoch": 0.2592090159657727, "grad_norm": 1.1015625, "learning_rate": 0.0017369516422193567, "loss": 1.0949, "step": 3726 }, { "epoch": 0.25927858360290795, "grad_norm": 1.1171875, "learning_rate": 0.001736799310692511, "loss": 1.1905, "step": 3727 }, { "epoch": 0.2593481512400431, "grad_norm": 1.1171875, "learning_rate": 0.0017366469417547101, "loss": 1.1927, "step": 3728 }, { "epoch": 0.25941771887717835, "grad_norm": 1.4375, "learning_rate": 0.0017364945354136907, "loss": 1.1854, "step": 3729 }, { "epoch": 0.2594872865143135, "grad_norm": 1.1640625, "learning_rate": 0.0017363420916771909, "loss": 0.9458, "step": 3730 }, { "epoch": 0.25955685415144875, "grad_norm": 1.359375, "learning_rate": 0.0017361896105529508, "loss": 1.3304, "step": 3731 }, { "epoch": 0.25962642178858397, "grad_norm": 1.0078125, "learning_rate": 0.0017360370920487134, "loss": 0.9451, "step": 3732 }, { "epoch": 0.25969598942571914, "grad_norm": 1.15625, "learning_rate": 0.0017358845361722221, "loss": 0.6876, "step": 3733 }, { "epoch": 0.25976555706285437, "grad_norm": 1.15625, "learning_rate": 0.0017357319429312232, "loss": 1.0501, "step": 3734 }, { "epoch": 0.25983512469998954, "grad_norm": 0.95703125, "learning_rate": 0.0017355793123334648, "loss": 0.8675, "step": 3735 }, { "epoch": 0.25990469233712477, "grad_norm": 1.09375, "learning_rate": 0.0017354266443866961, "loss": 1.1924, "step": 3736 }, { "epoch": 0.25997425997426, "grad_norm": 1.1171875, "learning_rate": 0.0017352739390986696, "loss": 0.9601, "step": 3737 }, { "epoch": 0.26004382761139516, "grad_norm": 1.3984375, "learning_rate": 0.0017351211964771384, "loss": 1.0359, "step": 3738 }, { "epoch": 0.2601133952485304, "grad_norm": 1.15625, "learning_rate": 0.0017349684165298583, "loss": 1.1129, "step": 3739 }, { "epoch": 0.2601829628856656, "grad_norm": 0.97265625, "learning_rate": 0.0017348155992645863, "loss": 1.0371, "step": 3740 }, { "epoch": 0.2602525305228008, "grad_norm": 1.3203125, "learning_rate": 0.001734662744689082, "loss": 1.1541, "step": 3741 }, { "epoch": 0.260322098159936, "grad_norm": 1.375, "learning_rate": 0.0017345098528111062, "loss": 1.1502, "step": 3742 }, { "epoch": 0.2603916657970712, "grad_norm": 1.296875, "learning_rate": 0.0017343569236384227, "loss": 1.1974, "step": 3743 }, { "epoch": 0.2604612334342064, "grad_norm": 1.3984375, "learning_rate": 0.001734203957178796, "loss": 0.9742, "step": 3744 }, { "epoch": 0.26053080107134163, "grad_norm": 1.078125, "learning_rate": 0.0017340509534399928, "loss": 1.1656, "step": 3745 }, { "epoch": 0.2606003687084768, "grad_norm": 0.99609375, "learning_rate": 0.0017338979124297822, "loss": 1.1089, "step": 3746 }, { "epoch": 0.26066993634561203, "grad_norm": 1.0859375, "learning_rate": 0.0017337448341559348, "loss": 0.8266, "step": 3747 }, { "epoch": 0.2607395039827472, "grad_norm": 1.0390625, "learning_rate": 0.001733591718626223, "loss": 0.8495, "step": 3748 }, { "epoch": 0.2608090716198824, "grad_norm": 1.234375, "learning_rate": 0.0017334385658484212, "loss": 1.2051, "step": 3749 }, { "epoch": 0.26087863925701765, "grad_norm": 1.4765625, "learning_rate": 0.0017332853758303059, "loss": 1.307, "step": 3750 }, { "epoch": 0.2609482068941528, "grad_norm": 1.203125, "learning_rate": 0.0017331321485796554, "loss": 1.1944, "step": 3751 }, { "epoch": 0.26101777453128805, "grad_norm": 1.2265625, "learning_rate": 0.0017329788841042495, "loss": 0.9252, "step": 3752 }, { "epoch": 0.2610873421684233, "grad_norm": 1.265625, "learning_rate": 0.0017328255824118704, "loss": 0.9735, "step": 3753 }, { "epoch": 0.26115690980555845, "grad_norm": 1.3359375, "learning_rate": 0.001732672243510302, "loss": 1.2298, "step": 3754 }, { "epoch": 0.26122647744269367, "grad_norm": 1.1484375, "learning_rate": 0.00173251886740733, "loss": 1.1572, "step": 3755 }, { "epoch": 0.26129604507982884, "grad_norm": 0.92578125, "learning_rate": 0.0017323654541107419, "loss": 0.9327, "step": 3756 }, { "epoch": 0.26136561271696407, "grad_norm": 1.265625, "learning_rate": 0.0017322120036283276, "loss": 0.8942, "step": 3757 }, { "epoch": 0.2614351803540993, "grad_norm": 1.1796875, "learning_rate": 0.0017320585159678783, "loss": 0.9664, "step": 3758 }, { "epoch": 0.26150474799123447, "grad_norm": 0.9765625, "learning_rate": 0.0017319049911371876, "loss": 1.1331, "step": 3759 }, { "epoch": 0.2615743156283697, "grad_norm": 1.0, "learning_rate": 0.00173175142914405, "loss": 0.9512, "step": 3760 }, { "epoch": 0.26164388326550486, "grad_norm": 1.0703125, "learning_rate": 0.0017315978299962636, "loss": 0.7905, "step": 3761 }, { "epoch": 0.2617134509026401, "grad_norm": 1.46875, "learning_rate": 0.001731444193701627, "loss": 1.2855, "step": 3762 }, { "epoch": 0.2617830185397753, "grad_norm": 1.3046875, "learning_rate": 0.0017312905202679408, "loss": 1.0728, "step": 3763 }, { "epoch": 0.2618525861769105, "grad_norm": 1.2578125, "learning_rate": 0.001731136809703008, "loss": 1.2534, "step": 3764 }, { "epoch": 0.2619221538140457, "grad_norm": 0.94921875, "learning_rate": 0.0017309830620146332, "loss": 0.7704, "step": 3765 }, { "epoch": 0.26199172145118094, "grad_norm": 1.296875, "learning_rate": 0.0017308292772106229, "loss": 1.0189, "step": 3766 }, { "epoch": 0.2620612890883161, "grad_norm": 1.1484375, "learning_rate": 0.0017306754552987855, "loss": 1.1517, "step": 3767 }, { "epoch": 0.26213085672545133, "grad_norm": 1.015625, "learning_rate": 0.0017305215962869313, "loss": 1.0454, "step": 3768 }, { "epoch": 0.2622004243625865, "grad_norm": 1.03125, "learning_rate": 0.0017303677001828729, "loss": 0.8192, "step": 3769 }, { "epoch": 0.26226999199972173, "grad_norm": 1.4296875, "learning_rate": 0.0017302137669944235, "loss": 1.2484, "step": 3770 }, { "epoch": 0.26233955963685696, "grad_norm": 1.0703125, "learning_rate": 0.0017300597967294, "loss": 0.9947, "step": 3771 }, { "epoch": 0.2624091272739921, "grad_norm": 0.90234375, "learning_rate": 0.0017299057893956195, "loss": 1.0141, "step": 3772 }, { "epoch": 0.26247869491112735, "grad_norm": 1.328125, "learning_rate": 0.0017297517450009022, "loss": 1.3139, "step": 3773 }, { "epoch": 0.2625482625482625, "grad_norm": 1.171875, "learning_rate": 0.0017295976635530695, "loss": 1.2733, "step": 3774 }, { "epoch": 0.26261783018539775, "grad_norm": 1.1328125, "learning_rate": 0.001729443545059945, "loss": 0.9708, "step": 3775 }, { "epoch": 0.262687397822533, "grad_norm": 1.0625, "learning_rate": 0.0017292893895293538, "loss": 1.1274, "step": 3776 }, { "epoch": 0.26275696545966815, "grad_norm": 0.88671875, "learning_rate": 0.0017291351969691232, "loss": 0.9228, "step": 3777 }, { "epoch": 0.26282653309680337, "grad_norm": 1.25, "learning_rate": 0.0017289809673870825, "loss": 1.2935, "step": 3778 }, { "epoch": 0.2628961007339386, "grad_norm": 1.1640625, "learning_rate": 0.0017288267007910627, "loss": 0.8132, "step": 3779 }, { "epoch": 0.26296566837107377, "grad_norm": 1.203125, "learning_rate": 0.0017286723971888965, "loss": 0.9086, "step": 3780 }, { "epoch": 0.263035236008209, "grad_norm": 1.3359375, "learning_rate": 0.0017285180565884187, "loss": 1.0525, "step": 3781 }, { "epoch": 0.26310480364534417, "grad_norm": 1.2109375, "learning_rate": 0.0017283636789974662, "loss": 0.8169, "step": 3782 }, { "epoch": 0.2631743712824794, "grad_norm": 0.84765625, "learning_rate": 0.001728209264423877, "loss": 0.8526, "step": 3783 }, { "epoch": 0.2632439389196146, "grad_norm": 0.921875, "learning_rate": 0.001728054812875492, "loss": 0.965, "step": 3784 }, { "epoch": 0.2633135065567498, "grad_norm": 1.046875, "learning_rate": 0.0017279003243601532, "loss": 0.8445, "step": 3785 }, { "epoch": 0.263383074193885, "grad_norm": 1.2578125, "learning_rate": 0.001727745798885705, "loss": 1.0094, "step": 3786 }, { "epoch": 0.2634526418310202, "grad_norm": 0.96875, "learning_rate": 0.0017275912364599928, "loss": 1.0176, "step": 3787 }, { "epoch": 0.2635222094681554, "grad_norm": 1.296875, "learning_rate": 0.0017274366370908655, "loss": 0.941, "step": 3788 }, { "epoch": 0.26359177710529064, "grad_norm": 1.046875, "learning_rate": 0.0017272820007861718, "loss": 1.2155, "step": 3789 }, { "epoch": 0.2636613447424258, "grad_norm": 1.0859375, "learning_rate": 0.0017271273275537642, "loss": 0.8125, "step": 3790 }, { "epoch": 0.26373091237956103, "grad_norm": 1.515625, "learning_rate": 0.0017269726174014956, "loss": 0.8297, "step": 3791 }, { "epoch": 0.26380048001669626, "grad_norm": 1.609375, "learning_rate": 0.001726817870337222, "loss": 1.0904, "step": 3792 }, { "epoch": 0.26387004765383143, "grad_norm": 1.46875, "learning_rate": 0.0017266630863688004, "loss": 1.1033, "step": 3793 }, { "epoch": 0.26393961529096666, "grad_norm": 0.9453125, "learning_rate": 0.0017265082655040897, "loss": 0.8511, "step": 3794 }, { "epoch": 0.2640091829281018, "grad_norm": 1.3984375, "learning_rate": 0.0017263534077509514, "loss": 1.1242, "step": 3795 }, { "epoch": 0.26407875056523705, "grad_norm": 1.0390625, "learning_rate": 0.0017261985131172479, "loss": 0.952, "step": 3796 }, { "epoch": 0.2641483182023723, "grad_norm": 1.0078125, "learning_rate": 0.0017260435816108446, "loss": 0.9841, "step": 3797 }, { "epoch": 0.26421788583950745, "grad_norm": 1.0078125, "learning_rate": 0.0017258886132396074, "loss": 0.8861, "step": 3798 }, { "epoch": 0.2642874534766427, "grad_norm": 1.28125, "learning_rate": 0.0017257336080114052, "loss": 1.0207, "step": 3799 }, { "epoch": 0.26435702111377785, "grad_norm": 1.015625, "learning_rate": 0.0017255785659341086, "loss": 0.9993, "step": 3800 }, { "epoch": 0.26442658875091307, "grad_norm": 1.0625, "learning_rate": 0.0017254234870155893, "loss": 0.749, "step": 3801 }, { "epoch": 0.2644961563880483, "grad_norm": 1.1015625, "learning_rate": 0.0017252683712637219, "loss": 1.1282, "step": 3802 }, { "epoch": 0.26456572402518347, "grad_norm": 1.09375, "learning_rate": 0.0017251132186863823, "loss": 1.0138, "step": 3803 }, { "epoch": 0.2646352916623187, "grad_norm": 1.1796875, "learning_rate": 0.001724958029291448, "loss": 0.9922, "step": 3804 }, { "epoch": 0.2647048592994539, "grad_norm": 1.2421875, "learning_rate": 0.0017248028030867992, "loss": 1.4224, "step": 3805 }, { "epoch": 0.2647744269365891, "grad_norm": 0.97265625, "learning_rate": 0.0017246475400803174, "loss": 0.8931, "step": 3806 }, { "epoch": 0.2648439945737243, "grad_norm": 1.15625, "learning_rate": 0.001724492240279886, "loss": 0.9911, "step": 3807 }, { "epoch": 0.2649135622108595, "grad_norm": 1.15625, "learning_rate": 0.0017243369036933904, "loss": 1.0472, "step": 3808 }, { "epoch": 0.2649831298479947, "grad_norm": 1.0546875, "learning_rate": 0.0017241815303287176, "loss": 0.9658, "step": 3809 }, { "epoch": 0.26505269748512994, "grad_norm": 1.25, "learning_rate": 0.001724026120193757, "loss": 1.0667, "step": 3810 }, { "epoch": 0.2651222651222651, "grad_norm": 1.0390625, "learning_rate": 0.0017238706732963993, "loss": 0.9033, "step": 3811 }, { "epoch": 0.26519183275940034, "grad_norm": 1.0625, "learning_rate": 0.0017237151896445373, "loss": 1.1824, "step": 3812 }, { "epoch": 0.2652614003965355, "grad_norm": 0.91015625, "learning_rate": 0.001723559669246066, "loss": 0.9525, "step": 3813 }, { "epoch": 0.26533096803367073, "grad_norm": 1.109375, "learning_rate": 0.0017234041121088814, "loss": 0.8767, "step": 3814 }, { "epoch": 0.26540053567080596, "grad_norm": 1.2734375, "learning_rate": 0.0017232485182408824, "loss": 1.0737, "step": 3815 }, { "epoch": 0.26547010330794113, "grad_norm": 1.109375, "learning_rate": 0.001723092887649969, "loss": 1.0869, "step": 3816 }, { "epoch": 0.26553967094507636, "grad_norm": 1.3046875, "learning_rate": 0.0017229372203440435, "loss": 1.1493, "step": 3817 }, { "epoch": 0.2656092385822116, "grad_norm": 1.3984375, "learning_rate": 0.00172278151633101, "loss": 1.3631, "step": 3818 }, { "epoch": 0.26567880621934675, "grad_norm": 1.1484375, "learning_rate": 0.001722625775618774, "loss": 1.1792, "step": 3819 }, { "epoch": 0.265748373856482, "grad_norm": 1.25, "learning_rate": 0.0017224699982152432, "loss": 0.8632, "step": 3820 }, { "epoch": 0.26581794149361715, "grad_norm": 1.1875, "learning_rate": 0.0017223141841283276, "loss": 1.177, "step": 3821 }, { "epoch": 0.2658875091307524, "grad_norm": 1.1875, "learning_rate": 0.0017221583333659385, "loss": 0.9559, "step": 3822 }, { "epoch": 0.2659570767678876, "grad_norm": 1.3359375, "learning_rate": 0.0017220024459359893, "loss": 1.1445, "step": 3823 }, { "epoch": 0.26602664440502277, "grad_norm": 1.234375, "learning_rate": 0.0017218465218463948, "loss": 1.3609, "step": 3824 }, { "epoch": 0.266096212042158, "grad_norm": 1.2578125, "learning_rate": 0.0017216905611050725, "loss": 1.3015, "step": 3825 }, { "epoch": 0.26616577967929317, "grad_norm": 1.203125, "learning_rate": 0.0017215345637199412, "loss": 1.1312, "step": 3826 }, { "epoch": 0.2662353473164284, "grad_norm": 1.265625, "learning_rate": 0.0017213785296989212, "loss": 1.1181, "step": 3827 }, { "epoch": 0.2663049149535636, "grad_norm": 1.0546875, "learning_rate": 0.0017212224590499358, "loss": 1.0499, "step": 3828 }, { "epoch": 0.2663744825906988, "grad_norm": 1.203125, "learning_rate": 0.001721066351780909, "loss": 1.0541, "step": 3829 }, { "epoch": 0.266444050227834, "grad_norm": 1.03125, "learning_rate": 0.0017209102078997673, "loss": 0.9934, "step": 3830 }, { "epoch": 0.26651361786496924, "grad_norm": 1.015625, "learning_rate": 0.0017207540274144387, "loss": 1.0731, "step": 3831 }, { "epoch": 0.2665831855021044, "grad_norm": 1.28125, "learning_rate": 0.0017205978103328537, "loss": 0.9135, "step": 3832 }, { "epoch": 0.26665275313923964, "grad_norm": 1.0625, "learning_rate": 0.001720441556662944, "loss": 1.1729, "step": 3833 }, { "epoch": 0.2667223207763748, "grad_norm": 1.09375, "learning_rate": 0.0017202852664126432, "loss": 0.9803, "step": 3834 }, { "epoch": 0.26679188841351004, "grad_norm": 0.9765625, "learning_rate": 0.001720128939589887, "loss": 0.8769, "step": 3835 }, { "epoch": 0.26686145605064526, "grad_norm": 1.0546875, "learning_rate": 0.0017199725762026136, "loss": 0.8438, "step": 3836 }, { "epoch": 0.26693102368778043, "grad_norm": 1.25, "learning_rate": 0.001719816176258761, "loss": 1.1329, "step": 3837 }, { "epoch": 0.26700059132491566, "grad_norm": 1.1328125, "learning_rate": 0.0017196597397662714, "loss": 1.0036, "step": 3838 }, { "epoch": 0.26707015896205083, "grad_norm": 1.203125, "learning_rate": 0.0017195032667330875, "loss": 0.8832, "step": 3839 }, { "epoch": 0.26713972659918606, "grad_norm": 1.078125, "learning_rate": 0.0017193467571671541, "loss": 0.9158, "step": 3840 }, { "epoch": 0.2672092942363213, "grad_norm": 1.1328125, "learning_rate": 0.0017191902110764183, "loss": 0.8351, "step": 3841 }, { "epoch": 0.26727886187345645, "grad_norm": 1.0859375, "learning_rate": 0.0017190336284688289, "loss": 1.0573, "step": 3842 }, { "epoch": 0.2673484295105917, "grad_norm": 1.328125, "learning_rate": 0.001718877009352336, "loss": 1.0967, "step": 3843 }, { "epoch": 0.2674179971477269, "grad_norm": 1.0, "learning_rate": 0.0017187203537348914, "loss": 0.8831, "step": 3844 }, { "epoch": 0.2674875647848621, "grad_norm": 1.1328125, "learning_rate": 0.0017185636616244503, "loss": 0.8737, "step": 3845 }, { "epoch": 0.2675571324219973, "grad_norm": 1.5390625, "learning_rate": 0.0017184069330289681, "loss": 1.2432, "step": 3846 }, { "epoch": 0.26762670005913247, "grad_norm": 1.0234375, "learning_rate": 0.0017182501679564029, "loss": 1.2132, "step": 3847 }, { "epoch": 0.2676962676962677, "grad_norm": 0.91015625, "learning_rate": 0.0017180933664147147, "loss": 0.7472, "step": 3848 }, { "epoch": 0.2677658353334029, "grad_norm": 1.3125, "learning_rate": 0.0017179365284118644, "loss": 0.9909, "step": 3849 }, { "epoch": 0.2678354029705381, "grad_norm": 1.2109375, "learning_rate": 0.0017177796539558162, "loss": 1.064, "step": 3850 }, { "epoch": 0.2679049706076733, "grad_norm": 0.91015625, "learning_rate": 0.0017176227430545348, "loss": 0.9202, "step": 3851 }, { "epoch": 0.2679745382448085, "grad_norm": 1.078125, "learning_rate": 0.0017174657957159875, "loss": 0.865, "step": 3852 }, { "epoch": 0.2680441058819437, "grad_norm": 1.0625, "learning_rate": 0.001717308811948144, "loss": 1.0918, "step": 3853 }, { "epoch": 0.26811367351907894, "grad_norm": 1.0859375, "learning_rate": 0.0017171517917589738, "loss": 1.0774, "step": 3854 }, { "epoch": 0.2681832411562141, "grad_norm": 1.171875, "learning_rate": 0.0017169947351564508, "loss": 1.165, "step": 3855 }, { "epoch": 0.26825280879334934, "grad_norm": 1.28125, "learning_rate": 0.0017168376421485489, "loss": 1.0177, "step": 3856 }, { "epoch": 0.26832237643048457, "grad_norm": 0.8359375, "learning_rate": 0.0017166805127432447, "loss": 0.879, "step": 3857 }, { "epoch": 0.26839194406761974, "grad_norm": 1.3359375, "learning_rate": 0.0017165233469485163, "loss": 1.1177, "step": 3858 }, { "epoch": 0.26846151170475496, "grad_norm": 1.2578125, "learning_rate": 0.001716366144772344, "loss": 1.1613, "step": 3859 }, { "epoch": 0.26853107934189013, "grad_norm": 1.0390625, "learning_rate": 0.0017162089062227096, "loss": 0.8865, "step": 3860 }, { "epoch": 0.26860064697902536, "grad_norm": 1.140625, "learning_rate": 0.0017160516313075968, "loss": 1.176, "step": 3861 }, { "epoch": 0.2686702146161606, "grad_norm": 1.2421875, "learning_rate": 0.0017158943200349915, "loss": 1.0508, "step": 3862 }, { "epoch": 0.26873978225329576, "grad_norm": 1.15625, "learning_rate": 0.0017157369724128812, "loss": 0.884, "step": 3863 }, { "epoch": 0.268809349890431, "grad_norm": 1.3125, "learning_rate": 0.0017155795884492547, "loss": 1.3607, "step": 3864 }, { "epoch": 0.26887891752756615, "grad_norm": 1.0234375, "learning_rate": 0.0017154221681521034, "loss": 0.9544, "step": 3865 }, { "epoch": 0.2689484851647014, "grad_norm": 1.109375, "learning_rate": 0.0017152647115294204, "loss": 1.084, "step": 3866 }, { "epoch": 0.2690180528018366, "grad_norm": 1.0390625, "learning_rate": 0.0017151072185892008, "loss": 1.0318, "step": 3867 }, { "epoch": 0.2690876204389718, "grad_norm": 1.1171875, "learning_rate": 0.001714949689339441, "loss": 1.0706, "step": 3868 }, { "epoch": 0.269157188076107, "grad_norm": 1.28125, "learning_rate": 0.0017147921237881394, "loss": 0.9735, "step": 3869 }, { "epoch": 0.2692267557132422, "grad_norm": 1.0625, "learning_rate": 0.0017146345219432966, "loss": 0.8209, "step": 3870 }, { "epoch": 0.2692963233503774, "grad_norm": 1.109375, "learning_rate": 0.0017144768838129147, "loss": 0.8533, "step": 3871 }, { "epoch": 0.2693658909875126, "grad_norm": 0.96875, "learning_rate": 0.0017143192094049985, "loss": 1.0087, "step": 3872 }, { "epoch": 0.2694354586246478, "grad_norm": 1.2734375, "learning_rate": 0.0017141614987275526, "loss": 0.8013, "step": 3873 }, { "epoch": 0.269505026261783, "grad_norm": 1.5859375, "learning_rate": 0.0017140037517885856, "loss": 1.1604, "step": 3874 }, { "epoch": 0.26957459389891825, "grad_norm": 1.171875, "learning_rate": 0.001713845968596107, "loss": 1.0919, "step": 3875 }, { "epoch": 0.2696441615360534, "grad_norm": 1.4140625, "learning_rate": 0.0017136881491581284, "loss": 0.9652, "step": 3876 }, { "epoch": 0.26971372917318864, "grad_norm": 0.99609375, "learning_rate": 0.0017135302934826627, "loss": 0.9007, "step": 3877 }, { "epoch": 0.2697832968103238, "grad_norm": 1.1875, "learning_rate": 0.001713372401577725, "loss": 1.0811, "step": 3878 }, { "epoch": 0.26985286444745904, "grad_norm": 1.046875, "learning_rate": 0.0017132144734513324, "loss": 1.2171, "step": 3879 }, { "epoch": 0.26992243208459427, "grad_norm": 1.0703125, "learning_rate": 0.0017130565091115037, "loss": 0.9081, "step": 3880 }, { "epoch": 0.26999199972172944, "grad_norm": 1.0078125, "learning_rate": 0.0017128985085662599, "loss": 1.0543, "step": 3881 }, { "epoch": 0.27006156735886466, "grad_norm": 1.1953125, "learning_rate": 0.0017127404718236226, "loss": 0.9564, "step": 3882 }, { "epoch": 0.27013113499599983, "grad_norm": 1.125, "learning_rate": 0.001712582398891617, "loss": 0.9315, "step": 3883 }, { "epoch": 0.27020070263313506, "grad_norm": 1.484375, "learning_rate": 0.0017124242897782684, "loss": 1.186, "step": 3884 }, { "epoch": 0.2702702702702703, "grad_norm": 1.0390625, "learning_rate": 0.0017122661444916058, "loss": 0.9827, "step": 3885 }, { "epoch": 0.27033983790740546, "grad_norm": 0.9765625, "learning_rate": 0.0017121079630396583, "loss": 0.95, "step": 3886 }, { "epoch": 0.2704094055445407, "grad_norm": 1.015625, "learning_rate": 0.0017119497454304575, "loss": 1.0094, "step": 3887 }, { "epoch": 0.2704789731816759, "grad_norm": 1.3984375, "learning_rate": 0.0017117914916720373, "loss": 1.2051, "step": 3888 }, { "epoch": 0.2705485408188111, "grad_norm": 1.0078125, "learning_rate": 0.001711633201772433, "loss": 1.0079, "step": 3889 }, { "epoch": 0.2706181084559463, "grad_norm": 1.1953125, "learning_rate": 0.0017114748757396812, "loss": 1.0215, "step": 3890 }, { "epoch": 0.2706876760930815, "grad_norm": 1.15625, "learning_rate": 0.0017113165135818217, "loss": 0.8982, "step": 3891 }, { "epoch": 0.2707572437302167, "grad_norm": 1.0390625, "learning_rate": 0.0017111581153068948, "loss": 0.8377, "step": 3892 }, { "epoch": 0.2708268113673519, "grad_norm": 1.1875, "learning_rate": 0.0017109996809229434, "loss": 0.9026, "step": 3893 }, { "epoch": 0.2708963790044871, "grad_norm": 1.125, "learning_rate": 0.0017108412104380117, "loss": 1.0206, "step": 3894 }, { "epoch": 0.2709659466416223, "grad_norm": 1.1796875, "learning_rate": 0.0017106827038601464, "loss": 1.1209, "step": 3895 }, { "epoch": 0.2710355142787575, "grad_norm": 0.9375, "learning_rate": 0.0017105241611973954, "loss": 1.0785, "step": 3896 }, { "epoch": 0.2711050819158927, "grad_norm": 1.21875, "learning_rate": 0.001710365582457809, "loss": 1.0814, "step": 3897 }, { "epoch": 0.27117464955302795, "grad_norm": 1.8828125, "learning_rate": 0.0017102069676494386, "loss": 1.086, "step": 3898 }, { "epoch": 0.2712442171901631, "grad_norm": 1.28125, "learning_rate": 0.0017100483167803381, "loss": 0.8824, "step": 3899 }, { "epoch": 0.27131378482729834, "grad_norm": 1.0703125, "learning_rate": 0.0017098896298585631, "loss": 0.927, "step": 3900 }, { "epoch": 0.27138335246443357, "grad_norm": 1.109375, "learning_rate": 0.0017097309068921708, "loss": 1.0531, "step": 3901 }, { "epoch": 0.27145292010156874, "grad_norm": 1.1875, "learning_rate": 0.00170957214788922, "loss": 1.1599, "step": 3902 }, { "epoch": 0.27152248773870397, "grad_norm": 1.046875, "learning_rate": 0.0017094133528577724, "loss": 1.1276, "step": 3903 }, { "epoch": 0.27159205537583914, "grad_norm": 1.0078125, "learning_rate": 0.0017092545218058905, "loss": 0.8651, "step": 3904 }, { "epoch": 0.27166162301297436, "grad_norm": 1.1328125, "learning_rate": 0.0017090956547416388, "loss": 1.0644, "step": 3905 }, { "epoch": 0.2717311906501096, "grad_norm": 1.0859375, "learning_rate": 0.001708936751673084, "loss": 1.1555, "step": 3906 }, { "epoch": 0.27180075828724476, "grad_norm": 1.078125, "learning_rate": 0.001708777812608294, "loss": 1.024, "step": 3907 }, { "epoch": 0.27187032592438, "grad_norm": 1.3671875, "learning_rate": 0.0017086188375553394, "loss": 1.188, "step": 3908 }, { "epoch": 0.27193989356151516, "grad_norm": 1.1484375, "learning_rate": 0.0017084598265222919, "loss": 0.8403, "step": 3909 }, { "epoch": 0.2720094611986504, "grad_norm": 1.484375, "learning_rate": 0.0017083007795172251, "loss": 1.0971, "step": 3910 }, { "epoch": 0.2720790288357856, "grad_norm": 1.1328125, "learning_rate": 0.001708141696548215, "loss": 1.1551, "step": 3911 }, { "epoch": 0.2721485964729208, "grad_norm": 1.3515625, "learning_rate": 0.001707982577623339, "loss": 1.1604, "step": 3912 }, { "epoch": 0.272218164110056, "grad_norm": 1.234375, "learning_rate": 0.0017078234227506756, "loss": 0.9916, "step": 3913 }, { "epoch": 0.27228773174719123, "grad_norm": 0.88671875, "learning_rate": 0.0017076642319383071, "loss": 0.8347, "step": 3914 }, { "epoch": 0.2723572993843264, "grad_norm": 1.1953125, "learning_rate": 0.0017075050051943155, "loss": 0.8493, "step": 3915 }, { "epoch": 0.2724268670214616, "grad_norm": 1.2421875, "learning_rate": 0.001707345742526786, "loss": 1.1102, "step": 3916 }, { "epoch": 0.2724964346585968, "grad_norm": 1.1640625, "learning_rate": 0.001707186443943805, "loss": 0.7921, "step": 3917 }, { "epoch": 0.272566002295732, "grad_norm": 0.96875, "learning_rate": 0.0017070271094534607, "loss": 0.9222, "step": 3918 }, { "epoch": 0.27263556993286725, "grad_norm": 1.078125, "learning_rate": 0.0017068677390638435, "loss": 1.2166, "step": 3919 }, { "epoch": 0.2727051375700024, "grad_norm": 0.9375, "learning_rate": 0.0017067083327830454, "loss": 0.8154, "step": 3920 }, { "epoch": 0.27277470520713765, "grad_norm": 1.609375, "learning_rate": 0.0017065488906191602, "loss": 1.3034, "step": 3921 }, { "epoch": 0.2728442728442728, "grad_norm": 1.1484375, "learning_rate": 0.0017063894125802835, "loss": 1.0304, "step": 3922 }, { "epoch": 0.27291384048140804, "grad_norm": 1.0859375, "learning_rate": 0.0017062298986745131, "loss": 0.8899, "step": 3923 }, { "epoch": 0.27298340811854327, "grad_norm": 1.0390625, "learning_rate": 0.001706070348909948, "loss": 0.951, "step": 3924 }, { "epoch": 0.27305297575567844, "grad_norm": 1.140625, "learning_rate": 0.0017059107632946895, "loss": 1.087, "step": 3925 }, { "epoch": 0.27312254339281367, "grad_norm": 1.7265625, "learning_rate": 0.0017057511418368408, "loss": 1.0523, "step": 3926 }, { "epoch": 0.2731921110299489, "grad_norm": 1.2265625, "learning_rate": 0.0017055914845445059, "loss": 0.9484, "step": 3927 }, { "epoch": 0.27326167866708406, "grad_norm": 1.4140625, "learning_rate": 0.001705431791425792, "loss": 1.2164, "step": 3928 }, { "epoch": 0.2733312463042193, "grad_norm": 1.0078125, "learning_rate": 0.0017052720624888074, "loss": 0.9607, "step": 3929 }, { "epoch": 0.27340081394135446, "grad_norm": 1.1875, "learning_rate": 0.0017051122977416622, "loss": 0.9486, "step": 3930 }, { "epoch": 0.2734703815784897, "grad_norm": 1.4296875, "learning_rate": 0.0017049524971924686, "loss": 0.8929, "step": 3931 }, { "epoch": 0.2735399492156249, "grad_norm": 1.3671875, "learning_rate": 0.0017047926608493404, "loss": 0.9276, "step": 3932 }, { "epoch": 0.2736095168527601, "grad_norm": 1.328125, "learning_rate": 0.0017046327887203937, "loss": 1.1083, "step": 3933 }, { "epoch": 0.2736790844898953, "grad_norm": 1.125, "learning_rate": 0.0017044728808137451, "loss": 1.0824, "step": 3934 }, { "epoch": 0.2737486521270305, "grad_norm": 1.6875, "learning_rate": 0.0017043129371375147, "loss": 1.3346, "step": 3935 }, { "epoch": 0.2738182197641657, "grad_norm": 1.2421875, "learning_rate": 0.0017041529576998229, "loss": 0.9817, "step": 3936 }, { "epoch": 0.27388778740130093, "grad_norm": 1.140625, "learning_rate": 0.0017039929425087938, "loss": 0.8586, "step": 3937 }, { "epoch": 0.2739573550384361, "grad_norm": 1.2421875, "learning_rate": 0.0017038328915725508, "loss": 1.3273, "step": 3938 }, { "epoch": 0.2740269226755713, "grad_norm": 0.96484375, "learning_rate": 0.0017036728048992215, "loss": 1.0559, "step": 3939 }, { "epoch": 0.27409649031270655, "grad_norm": 1.1953125, "learning_rate": 0.0017035126824969339, "loss": 1.0056, "step": 3940 }, { "epoch": 0.2741660579498417, "grad_norm": 1.0546875, "learning_rate": 0.0017033525243738182, "loss": 1.1017, "step": 3941 }, { "epoch": 0.27423562558697695, "grad_norm": 1.1640625, "learning_rate": 0.0017031923305380063, "loss": 1.0774, "step": 3942 }, { "epoch": 0.2743051932241121, "grad_norm": 1.203125, "learning_rate": 0.001703032100997633, "loss": 1.1158, "step": 3943 }, { "epoch": 0.27437476086124735, "grad_norm": 1.0078125, "learning_rate": 0.0017028718357608322, "loss": 0.9334, "step": 3944 }, { "epoch": 0.2744443284983826, "grad_norm": 1.03125, "learning_rate": 0.0017027115348357427, "loss": 1.0335, "step": 3945 }, { "epoch": 0.27451389613551774, "grad_norm": 1.3359375, "learning_rate": 0.0017025511982305033, "loss": 0.9501, "step": 3946 }, { "epoch": 0.27458346377265297, "grad_norm": 1.53125, "learning_rate": 0.0017023908259532552, "loss": 1.0681, "step": 3947 }, { "epoch": 0.27465303140978814, "grad_norm": 1.0078125, "learning_rate": 0.0017022304180121415, "loss": 1.0523, "step": 3948 }, { "epoch": 0.27472259904692337, "grad_norm": 1.140625, "learning_rate": 0.0017020699744153065, "loss": 1.0081, "step": 3949 }, { "epoch": 0.2747921666840586, "grad_norm": 1.2890625, "learning_rate": 0.0017019094951708968, "loss": 0.8158, "step": 3950 }, { "epoch": 0.27486173432119376, "grad_norm": 1.125, "learning_rate": 0.0017017489802870606, "loss": 1.0486, "step": 3951 }, { "epoch": 0.274931301958329, "grad_norm": 1.265625, "learning_rate": 0.001701588429771949, "loss": 1.0314, "step": 3952 }, { "epoch": 0.2750008695954642, "grad_norm": 1.0, "learning_rate": 0.0017014278436337125, "loss": 0.8683, "step": 3953 }, { "epoch": 0.2750704372325994, "grad_norm": 1.109375, "learning_rate": 0.001701267221880506, "loss": 0.7564, "step": 3954 }, { "epoch": 0.2751400048697346, "grad_norm": 1.2734375, "learning_rate": 0.0017011065645204844, "loss": 1.0051, "step": 3955 }, { "epoch": 0.2752095725068698, "grad_norm": 0.9453125, "learning_rate": 0.0017009458715618053, "loss": 0.9184, "step": 3956 }, { "epoch": 0.275279140144005, "grad_norm": 1.03125, "learning_rate": 0.0017007851430126278, "loss": 0.9399, "step": 3957 }, { "epoch": 0.27534870778114023, "grad_norm": 1.1171875, "learning_rate": 0.001700624378881113, "loss": 0.9812, "step": 3958 }, { "epoch": 0.2754182754182754, "grad_norm": 1.0625, "learning_rate": 0.0017004635791754237, "loss": 0.985, "step": 3959 }, { "epoch": 0.27548784305541063, "grad_norm": 1.1171875, "learning_rate": 0.0017003027439037245, "loss": 0.8774, "step": 3960 }, { "epoch": 0.2755574106925458, "grad_norm": 1.171875, "learning_rate": 0.0017001418730741818, "loss": 0.94, "step": 3961 }, { "epoch": 0.275626978329681, "grad_norm": 1.015625, "learning_rate": 0.0016999809666949637, "loss": 0.8023, "step": 3962 }, { "epoch": 0.27569654596681625, "grad_norm": 0.9921875, "learning_rate": 0.0016998200247742403, "loss": 1.077, "step": 3963 }, { "epoch": 0.2757661136039514, "grad_norm": 1.3671875, "learning_rate": 0.0016996590473201834, "loss": 0.8863, "step": 3964 }, { "epoch": 0.27583568124108665, "grad_norm": 1.0703125, "learning_rate": 0.0016994980343409665, "loss": 1.0414, "step": 3965 }, { "epoch": 0.2759052488782219, "grad_norm": 1.140625, "learning_rate": 0.001699336985844765, "loss": 0.7603, "step": 3966 }, { "epoch": 0.27597481651535705, "grad_norm": 1.0078125, "learning_rate": 0.0016991759018397568, "loss": 0.9134, "step": 3967 }, { "epoch": 0.2760443841524923, "grad_norm": 1.3125, "learning_rate": 0.00169901478233412, "loss": 1.108, "step": 3968 }, { "epoch": 0.27611395178962744, "grad_norm": 1.28125, "learning_rate": 0.001698853627336036, "loss": 1.099, "step": 3969 }, { "epoch": 0.27618351942676267, "grad_norm": 1.1171875, "learning_rate": 0.0016986924368536872, "loss": 0.981, "step": 3970 }, { "epoch": 0.2762530870638979, "grad_norm": 1.296875, "learning_rate": 0.0016985312108952582, "loss": 0.9792, "step": 3971 }, { "epoch": 0.27632265470103307, "grad_norm": 1.0546875, "learning_rate": 0.001698369949468935, "loss": 0.9086, "step": 3972 }, { "epoch": 0.2763922223381683, "grad_norm": 1.2265625, "learning_rate": 0.0016982086525829062, "loss": 1.0202, "step": 3973 }, { "epoch": 0.27646178997530346, "grad_norm": 1.2890625, "learning_rate": 0.0016980473202453609, "loss": 1.1561, "step": 3974 }, { "epoch": 0.2765313576124387, "grad_norm": 1.1015625, "learning_rate": 0.0016978859524644913, "loss": 0.9192, "step": 3975 }, { "epoch": 0.2766009252495739, "grad_norm": 1.3203125, "learning_rate": 0.0016977245492484905, "loss": 1.0597, "step": 3976 }, { "epoch": 0.2766704928867091, "grad_norm": 0.9609375, "learning_rate": 0.0016975631106055538, "loss": 0.8171, "step": 3977 }, { "epoch": 0.2767400605238443, "grad_norm": 1.140625, "learning_rate": 0.0016974016365438787, "loss": 0.9362, "step": 3978 }, { "epoch": 0.27680962816097954, "grad_norm": 1.2578125, "learning_rate": 0.0016972401270716633, "loss": 1.1577, "step": 3979 }, { "epoch": 0.2768791957981147, "grad_norm": 1.1875, "learning_rate": 0.0016970785821971087, "loss": 0.908, "step": 3980 }, { "epoch": 0.27694876343524993, "grad_norm": 1.1015625, "learning_rate": 0.0016969170019284173, "loss": 0.9814, "step": 3981 }, { "epoch": 0.2770183310723851, "grad_norm": 1.0390625, "learning_rate": 0.001696755386273793, "loss": 0.9795, "step": 3982 }, { "epoch": 0.27708789870952033, "grad_norm": 1.1328125, "learning_rate": 0.0016965937352414425, "loss": 1.0025, "step": 3983 }, { "epoch": 0.27715746634665556, "grad_norm": 0.9765625, "learning_rate": 0.001696432048839573, "loss": 1.045, "step": 3984 }, { "epoch": 0.2772270339837907, "grad_norm": 1.0703125, "learning_rate": 0.0016962703270763941, "loss": 0.9926, "step": 3985 }, { "epoch": 0.27729660162092595, "grad_norm": 1.15625, "learning_rate": 0.001696108569960118, "loss": 0.9154, "step": 3986 }, { "epoch": 0.2773661692580611, "grad_norm": 1.140625, "learning_rate": 0.001695946777498957, "loss": 0.8187, "step": 3987 }, { "epoch": 0.27743573689519635, "grad_norm": 1.65625, "learning_rate": 0.0016957849497011264, "loss": 1.3706, "step": 3988 }, { "epoch": 0.2775053045323316, "grad_norm": 1.078125, "learning_rate": 0.0016956230865748433, "loss": 1.0324, "step": 3989 }, { "epoch": 0.27757487216946675, "grad_norm": 1.0234375, "learning_rate": 0.001695461188128326, "loss": 0.7268, "step": 3990 }, { "epoch": 0.277644439806602, "grad_norm": 1.1875, "learning_rate": 0.001695299254369795, "loss": 0.9714, "step": 3991 }, { "epoch": 0.2777140074437372, "grad_norm": 1.0625, "learning_rate": 0.0016951372853074723, "loss": 0.8341, "step": 3992 }, { "epoch": 0.27778357508087237, "grad_norm": 0.890625, "learning_rate": 0.001694975280949582, "loss": 1.0248, "step": 3993 }, { "epoch": 0.2778531427180076, "grad_norm": 1.03125, "learning_rate": 0.00169481324130435, "loss": 1.0205, "step": 3994 }, { "epoch": 0.27792271035514277, "grad_norm": 1.046875, "learning_rate": 0.0016946511663800035, "loss": 0.965, "step": 3995 }, { "epoch": 0.277992277992278, "grad_norm": 1.125, "learning_rate": 0.0016944890561847723, "loss": 0.9839, "step": 3996 }, { "epoch": 0.2780618456294132, "grad_norm": 1.109375, "learning_rate": 0.0016943269107268873, "loss": 0.8865, "step": 3997 }, { "epoch": 0.2781314132665484, "grad_norm": 0.98046875, "learning_rate": 0.0016941647300145813, "loss": 0.9313, "step": 3998 }, { "epoch": 0.2782009809036836, "grad_norm": 1.2578125, "learning_rate": 0.0016940025140560894, "loss": 0.9325, "step": 3999 }, { "epoch": 0.2782705485408188, "grad_norm": 1.046875, "learning_rate": 0.0016938402628596477, "loss": 0.8353, "step": 4000 }, { "epoch": 0.278340116177954, "grad_norm": 0.984375, "learning_rate": 0.0016936779764334946, "loss": 0.9596, "step": 4001 }, { "epoch": 0.27840968381508924, "grad_norm": 1.1484375, "learning_rate": 0.00169351565478587, "loss": 1.1228, "step": 4002 }, { "epoch": 0.2784792514522244, "grad_norm": 1.2265625, "learning_rate": 0.0016933532979250166, "loss": 1.0892, "step": 4003 }, { "epoch": 0.27854881908935963, "grad_norm": 1.2734375, "learning_rate": 0.0016931909058591772, "loss": 0.811, "step": 4004 }, { "epoch": 0.27861838672649486, "grad_norm": 1.21875, "learning_rate": 0.0016930284785965975, "loss": 0.9714, "step": 4005 }, { "epoch": 0.27868795436363003, "grad_norm": 1.2421875, "learning_rate": 0.001692866016145525, "loss": 0.9037, "step": 4006 }, { "epoch": 0.27875752200076526, "grad_norm": 1.359375, "learning_rate": 0.0016927035185142084, "loss": 1.0398, "step": 4007 }, { "epoch": 0.2788270896379004, "grad_norm": 1.203125, "learning_rate": 0.0016925409857108985, "loss": 0.9304, "step": 4008 }, { "epoch": 0.27889665727503565, "grad_norm": 1.0, "learning_rate": 0.0016923784177438482, "loss": 0.962, "step": 4009 }, { "epoch": 0.2789662249121709, "grad_norm": 1.109375, "learning_rate": 0.0016922158146213113, "loss": 0.9493, "step": 4010 }, { "epoch": 0.27903579254930605, "grad_norm": 0.98828125, "learning_rate": 0.0016920531763515447, "loss": 0.8456, "step": 4011 }, { "epoch": 0.2791053601864413, "grad_norm": 1.0546875, "learning_rate": 0.001691890502942806, "loss": 0.7361, "step": 4012 }, { "epoch": 0.27917492782357645, "grad_norm": 1.234375, "learning_rate": 0.0016917277944033548, "loss": 1.0424, "step": 4013 }, { "epoch": 0.2792444954607117, "grad_norm": 1.1796875, "learning_rate": 0.001691565050741453, "loss": 0.9785, "step": 4014 }, { "epoch": 0.2793140630978469, "grad_norm": 1.1328125, "learning_rate": 0.0016914022719653637, "loss": 0.8496, "step": 4015 }, { "epoch": 0.27938363073498207, "grad_norm": 1.2578125, "learning_rate": 0.0016912394580833516, "loss": 0.9109, "step": 4016 }, { "epoch": 0.2794531983721173, "grad_norm": 1.40625, "learning_rate": 0.0016910766091036843, "loss": 1.1295, "step": 4017 }, { "epoch": 0.2795227660092525, "grad_norm": 1.2421875, "learning_rate": 0.0016909137250346298, "loss": 0.9687, "step": 4018 }, { "epoch": 0.2795923336463877, "grad_norm": 0.921875, "learning_rate": 0.0016907508058844588, "loss": 0.9258, "step": 4019 }, { "epoch": 0.2796619012835229, "grad_norm": 1.2421875, "learning_rate": 0.0016905878516614437, "loss": 1.2742, "step": 4020 }, { "epoch": 0.2797314689206581, "grad_norm": 0.9921875, "learning_rate": 0.0016904248623738584, "loss": 0.7806, "step": 4021 }, { "epoch": 0.2798010365577933, "grad_norm": 1.25, "learning_rate": 0.0016902618380299783, "loss": 1.0143, "step": 4022 }, { "epoch": 0.27987060419492854, "grad_norm": 1.4296875, "learning_rate": 0.0016900987786380812, "loss": 0.8619, "step": 4023 }, { "epoch": 0.2799401718320637, "grad_norm": 1.046875, "learning_rate": 0.0016899356842064468, "loss": 0.9301, "step": 4024 }, { "epoch": 0.28000973946919894, "grad_norm": 1.140625, "learning_rate": 0.0016897725547433556, "loss": 0.9563, "step": 4025 }, { "epoch": 0.2800793071063341, "grad_norm": 1.03125, "learning_rate": 0.001689609390257091, "loss": 1.0522, "step": 4026 }, { "epoch": 0.28014887474346933, "grad_norm": 1.09375, "learning_rate": 0.0016894461907559374, "loss": 1.0169, "step": 4027 }, { "epoch": 0.28021844238060456, "grad_norm": 1.2421875, "learning_rate": 0.001689282956248181, "loss": 1.1033, "step": 4028 }, { "epoch": 0.28028801001773973, "grad_norm": 0.9453125, "learning_rate": 0.0016891196867421109, "loss": 0.791, "step": 4029 }, { "epoch": 0.28035757765487496, "grad_norm": 1.1328125, "learning_rate": 0.0016889563822460158, "loss": 1.0634, "step": 4030 }, { "epoch": 0.2804271452920102, "grad_norm": 1.1328125, "learning_rate": 0.001688793042768189, "loss": 1.2211, "step": 4031 }, { "epoch": 0.28049671292914535, "grad_norm": 1.109375, "learning_rate": 0.0016886296683169227, "loss": 1.0032, "step": 4032 }, { "epoch": 0.2805662805662806, "grad_norm": 1.1640625, "learning_rate": 0.001688466258900513, "loss": 1.1016, "step": 4033 }, { "epoch": 0.28063584820341575, "grad_norm": 1.2265625, "learning_rate": 0.0016883028145272567, "loss": 0.8212, "step": 4034 }, { "epoch": 0.280705415840551, "grad_norm": 1.3984375, "learning_rate": 0.0016881393352054528, "loss": 1.1214, "step": 4035 }, { "epoch": 0.2807749834776862, "grad_norm": 1.078125, "learning_rate": 0.0016879758209434022, "loss": 0.9552, "step": 4036 }, { "epoch": 0.2808445511148214, "grad_norm": 1.2109375, "learning_rate": 0.0016878122717494067, "loss": 0.7728, "step": 4037 }, { "epoch": 0.2809141187519566, "grad_norm": 1.234375, "learning_rate": 0.0016876486876317711, "loss": 1.2132, "step": 4038 }, { "epoch": 0.28098368638909177, "grad_norm": 1.2890625, "learning_rate": 0.001687485068598801, "loss": 1.1734, "step": 4039 }, { "epoch": 0.281053254026227, "grad_norm": 1.34375, "learning_rate": 0.0016873214146588046, "loss": 0.7552, "step": 4040 }, { "epoch": 0.2811228216633622, "grad_norm": 1.1015625, "learning_rate": 0.0016871577258200908, "loss": 0.9213, "step": 4041 }, { "epoch": 0.2811923893004974, "grad_norm": 1.1640625, "learning_rate": 0.0016869940020909713, "loss": 0.8805, "step": 4042 }, { "epoch": 0.2812619569376326, "grad_norm": 0.9609375, "learning_rate": 0.0016868302434797592, "loss": 0.9129, "step": 4043 }, { "epoch": 0.28133152457476784, "grad_norm": 1.125, "learning_rate": 0.0016866664499947687, "loss": 0.9635, "step": 4044 }, { "epoch": 0.281401092211903, "grad_norm": 1.3671875, "learning_rate": 0.0016865026216443177, "loss": 1.1198, "step": 4045 }, { "epoch": 0.28147065984903824, "grad_norm": 0.953125, "learning_rate": 0.0016863387584367233, "loss": 0.8736, "step": 4046 }, { "epoch": 0.2815402274861734, "grad_norm": 1.265625, "learning_rate": 0.0016861748603803062, "loss": 1.2039, "step": 4047 }, { "epoch": 0.28160979512330864, "grad_norm": 1.328125, "learning_rate": 0.0016860109274833884, "loss": 1.3537, "step": 4048 }, { "epoch": 0.28167936276044386, "grad_norm": 1.1953125, "learning_rate": 0.0016858469597542936, "loss": 0.9092, "step": 4049 }, { "epoch": 0.28174893039757903, "grad_norm": 1.0546875, "learning_rate": 0.0016856829572013468, "loss": 1.0306, "step": 4050 }, { "epoch": 0.28181849803471426, "grad_norm": 1.046875, "learning_rate": 0.0016855189198328757, "loss": 0.9224, "step": 4051 }, { "epoch": 0.28188806567184943, "grad_norm": 1.234375, "learning_rate": 0.0016853548476572092, "loss": 0.9718, "step": 4052 }, { "epoch": 0.28195763330898466, "grad_norm": 1.2421875, "learning_rate": 0.0016851907406826776, "loss": 1.1605, "step": 4053 }, { "epoch": 0.2820272009461199, "grad_norm": 1.140625, "learning_rate": 0.001685026598917614, "loss": 1.1018, "step": 4054 }, { "epoch": 0.28209676858325505, "grad_norm": 1.1640625, "learning_rate": 0.0016848624223703527, "loss": 0.8536, "step": 4055 }, { "epoch": 0.2821663362203903, "grad_norm": 0.9609375, "learning_rate": 0.0016846982110492292, "loss": 0.7116, "step": 4056 }, { "epoch": 0.2822359038575255, "grad_norm": 0.96484375, "learning_rate": 0.0016845339649625818, "loss": 0.7503, "step": 4057 }, { "epoch": 0.2823054714946607, "grad_norm": 1.015625, "learning_rate": 0.0016843696841187504, "loss": 0.9039, "step": 4058 }, { "epoch": 0.2823750391317959, "grad_norm": 1.125, "learning_rate": 0.0016842053685260754, "loss": 0.9546, "step": 4059 }, { "epoch": 0.2824446067689311, "grad_norm": 1.2734375, "learning_rate": 0.0016840410181929006, "loss": 1.0785, "step": 4060 }, { "epoch": 0.2825141744060663, "grad_norm": 1.1328125, "learning_rate": 0.001683876633127571, "loss": 0.7847, "step": 4061 }, { "epoch": 0.2825837420432015, "grad_norm": 1.1953125, "learning_rate": 0.0016837122133384326, "loss": 1.0703, "step": 4062 }, { "epoch": 0.2826533096803367, "grad_norm": 1.1015625, "learning_rate": 0.001683547758833834, "loss": 0.9288, "step": 4063 }, { "epoch": 0.2827228773174719, "grad_norm": 0.99609375, "learning_rate": 0.0016833832696221262, "loss": 1.0972, "step": 4064 }, { "epoch": 0.2827924449546071, "grad_norm": 1.28125, "learning_rate": 0.00168321874571166, "loss": 1.1096, "step": 4065 }, { "epoch": 0.2828620125917423, "grad_norm": 1.234375, "learning_rate": 0.0016830541871107893, "loss": 1.0317, "step": 4066 }, { "epoch": 0.28293158022887754, "grad_norm": 0.9609375, "learning_rate": 0.0016828895938278703, "loss": 0.7199, "step": 4067 }, { "epoch": 0.2830011478660127, "grad_norm": 1.0859375, "learning_rate": 0.0016827249658712597, "loss": 0.9799, "step": 4068 }, { "epoch": 0.28307071550314794, "grad_norm": 1.3125, "learning_rate": 0.0016825603032493163, "loss": 1.0341, "step": 4069 }, { "epoch": 0.28314028314028317, "grad_norm": 1.5546875, "learning_rate": 0.0016823956059704012, "loss": 0.9213, "step": 4070 }, { "epoch": 0.28320985077741834, "grad_norm": 1.2109375, "learning_rate": 0.001682230874042877, "loss": 1.188, "step": 4071 }, { "epoch": 0.28327941841455356, "grad_norm": 0.88671875, "learning_rate": 0.0016820661074751074, "loss": 1.047, "step": 4072 }, { "epoch": 0.28334898605168873, "grad_norm": 0.98046875, "learning_rate": 0.0016819013062754587, "loss": 0.8298, "step": 4073 }, { "epoch": 0.28341855368882396, "grad_norm": 1.125, "learning_rate": 0.0016817364704522987, "loss": 0.9393, "step": 4074 }, { "epoch": 0.2834881213259592, "grad_norm": 1.21875, "learning_rate": 0.0016815716000139972, "loss": 1.2058, "step": 4075 }, { "epoch": 0.28355768896309436, "grad_norm": 1.1171875, "learning_rate": 0.0016814066949689252, "loss": 1.0323, "step": 4076 }, { "epoch": 0.2836272566002296, "grad_norm": 1.0625, "learning_rate": 0.0016812417553254556, "loss": 1.2022, "step": 4077 }, { "epoch": 0.28369682423736475, "grad_norm": 1.1640625, "learning_rate": 0.0016810767810919633, "loss": 0.9194, "step": 4078 }, { "epoch": 0.2837663918745, "grad_norm": 1.125, "learning_rate": 0.001680911772276825, "loss": 0.8541, "step": 4079 }, { "epoch": 0.2838359595116352, "grad_norm": 1.0234375, "learning_rate": 0.0016807467288884191, "loss": 0.9392, "step": 4080 }, { "epoch": 0.2839055271487704, "grad_norm": 1.0390625, "learning_rate": 0.0016805816509351255, "loss": 1.1768, "step": 4081 }, { "epoch": 0.2839750947859056, "grad_norm": 1.2265625, "learning_rate": 0.001680416538425326, "loss": 1.1473, "step": 4082 }, { "epoch": 0.28404466242304083, "grad_norm": 1.21875, "learning_rate": 0.0016802513913674042, "loss": 0.9821, "step": 4083 }, { "epoch": 0.284114230060176, "grad_norm": 1.2421875, "learning_rate": 0.0016800862097697453, "loss": 0.9646, "step": 4084 }, { "epoch": 0.2841837976973112, "grad_norm": 1.140625, "learning_rate": 0.0016799209936407369, "loss": 1.1557, "step": 4085 }, { "epoch": 0.2842533653344464, "grad_norm": 1.015625, "learning_rate": 0.0016797557429887673, "loss": 0.8796, "step": 4086 }, { "epoch": 0.2843229329715816, "grad_norm": 1.1015625, "learning_rate": 0.0016795904578222275, "loss": 0.7061, "step": 4087 }, { "epoch": 0.28439250060871685, "grad_norm": 1.1875, "learning_rate": 0.0016794251381495094, "loss": 1.0073, "step": 4088 }, { "epoch": 0.284462068245852, "grad_norm": 1.203125, "learning_rate": 0.0016792597839790074, "loss": 0.8033, "step": 4089 }, { "epoch": 0.28453163588298724, "grad_norm": 1.1640625, "learning_rate": 0.0016790943953191174, "loss": 1.1437, "step": 4090 }, { "epoch": 0.2846012035201224, "grad_norm": 1.03125, "learning_rate": 0.0016789289721782367, "loss": 0.9835, "step": 4091 }, { "epoch": 0.28467077115725764, "grad_norm": 1.0546875, "learning_rate": 0.0016787635145647651, "loss": 1.0082, "step": 4092 }, { "epoch": 0.28474033879439287, "grad_norm": 1.453125, "learning_rate": 0.0016785980224871032, "loss": 0.8653, "step": 4093 }, { "epoch": 0.28480990643152804, "grad_norm": 0.90625, "learning_rate": 0.0016784324959536541, "loss": 0.6891, "step": 4094 }, { "epoch": 0.28487947406866326, "grad_norm": 1.3984375, "learning_rate": 0.0016782669349728226, "loss": 1.1953, "step": 4095 }, { "epoch": 0.2849490417057985, "grad_norm": 1.234375, "learning_rate": 0.0016781013395530148, "loss": 0.8658, "step": 4096 }, { "epoch": 0.28501860934293366, "grad_norm": 1.1328125, "learning_rate": 0.0016779357097026389, "loss": 0.8288, "step": 4097 }, { "epoch": 0.2850881769800689, "grad_norm": 1.015625, "learning_rate": 0.0016777700454301046, "loss": 0.7104, "step": 4098 }, { "epoch": 0.28515774461720406, "grad_norm": 1.3359375, "learning_rate": 0.0016776043467438236, "loss": 0.9844, "step": 4099 }, { "epoch": 0.2852273122543393, "grad_norm": 1.3046875, "learning_rate": 0.0016774386136522092, "loss": 0.9912, "step": 4100 }, { "epoch": 0.2852968798914745, "grad_norm": 1.328125, "learning_rate": 0.0016772728461636767, "loss": 0.9511, "step": 4101 }, { "epoch": 0.2853664475286097, "grad_norm": 1.046875, "learning_rate": 0.0016771070442866427, "loss": 0.8666, "step": 4102 }, { "epoch": 0.2854360151657449, "grad_norm": 1.0703125, "learning_rate": 0.001676941208029526, "loss": 1.2521, "step": 4103 }, { "epoch": 0.2855055828028801, "grad_norm": 0.96484375, "learning_rate": 0.0016767753374007466, "loss": 1.1681, "step": 4104 }, { "epoch": 0.2855751504400153, "grad_norm": 1.1015625, "learning_rate": 0.001676609432408727, "loss": 0.8885, "step": 4105 }, { "epoch": 0.28564471807715053, "grad_norm": 0.9765625, "learning_rate": 0.001676443493061891, "loss": 0.9408, "step": 4106 }, { "epoch": 0.2857142857142857, "grad_norm": 1.0234375, "learning_rate": 0.0016762775193686632, "loss": 1.078, "step": 4107 }, { "epoch": 0.2857838533514209, "grad_norm": 1.328125, "learning_rate": 0.001676111511337472, "loss": 1.1927, "step": 4108 }, { "epoch": 0.28585342098855615, "grad_norm": 1.3984375, "learning_rate": 0.0016759454689767464, "loss": 1.0382, "step": 4109 }, { "epoch": 0.2859229886256913, "grad_norm": 1.421875, "learning_rate": 0.0016757793922949165, "loss": 0.9997, "step": 4110 }, { "epoch": 0.28599255626282655, "grad_norm": 1.4296875, "learning_rate": 0.0016756132813004153, "loss": 1.2973, "step": 4111 }, { "epoch": 0.2860621238999617, "grad_norm": 1.328125, "learning_rate": 0.0016754471360016772, "loss": 1.0318, "step": 4112 }, { "epoch": 0.28613169153709694, "grad_norm": 0.828125, "learning_rate": 0.001675280956407138, "loss": 0.8628, "step": 4113 }, { "epoch": 0.28620125917423217, "grad_norm": 1.0546875, "learning_rate": 0.0016751147425252354, "loss": 0.9316, "step": 4114 }, { "epoch": 0.28627082681136734, "grad_norm": 1.1953125, "learning_rate": 0.001674948494364409, "loss": 0.772, "step": 4115 }, { "epoch": 0.28634039444850257, "grad_norm": 1.453125, "learning_rate": 0.0016747822119331003, "loss": 1.4195, "step": 4116 }, { "epoch": 0.28640996208563774, "grad_norm": 1.3125, "learning_rate": 0.0016746158952397519, "loss": 1.1103, "step": 4117 }, { "epoch": 0.28647952972277296, "grad_norm": 1.171875, "learning_rate": 0.0016744495442928085, "loss": 0.9768, "step": 4118 }, { "epoch": 0.2865490973599082, "grad_norm": 1.0625, "learning_rate": 0.0016742831591007171, "loss": 1.1117, "step": 4119 }, { "epoch": 0.28661866499704336, "grad_norm": 1.453125, "learning_rate": 0.001674116739671925, "loss": 0.9725, "step": 4120 }, { "epoch": 0.2866882326341786, "grad_norm": 1.3515625, "learning_rate": 0.001673950286014883, "loss": 1.0254, "step": 4121 }, { "epoch": 0.2867578002713138, "grad_norm": 1.1328125, "learning_rate": 0.001673783798138042, "loss": 1.1598, "step": 4122 }, { "epoch": 0.286827367908449, "grad_norm": 1.15625, "learning_rate": 0.0016736172760498564, "loss": 1.0397, "step": 4123 }, { "epoch": 0.2868969355455842, "grad_norm": 1.265625, "learning_rate": 0.0016734507197587807, "loss": 1.1071, "step": 4124 }, { "epoch": 0.2869665031827194, "grad_norm": 1.484375, "learning_rate": 0.001673284129273272, "loss": 1.0425, "step": 4125 }, { "epoch": 0.2870360708198546, "grad_norm": 1.203125, "learning_rate": 0.0016731175046017883, "loss": 0.944, "step": 4126 }, { "epoch": 0.28710563845698983, "grad_norm": 1.0859375, "learning_rate": 0.0016729508457527908, "loss": 0.878, "step": 4127 }, { "epoch": 0.287175206094125, "grad_norm": 0.953125, "learning_rate": 0.0016727841527347414, "loss": 0.9369, "step": 4128 }, { "epoch": 0.28724477373126023, "grad_norm": 1.140625, "learning_rate": 0.0016726174255561035, "loss": 0.896, "step": 4129 }, { "epoch": 0.2873143413683954, "grad_norm": 1.0078125, "learning_rate": 0.0016724506642253432, "loss": 0.885, "step": 4130 }, { "epoch": 0.2873839090055306, "grad_norm": 1.1875, "learning_rate": 0.0016722838687509276, "loss": 1.1057, "step": 4131 }, { "epoch": 0.28745347664266585, "grad_norm": 1.0078125, "learning_rate": 0.0016721170391413257, "loss": 0.7977, "step": 4132 }, { "epoch": 0.287523044279801, "grad_norm": 0.9375, "learning_rate": 0.0016719501754050082, "loss": 0.8088, "step": 4133 }, { "epoch": 0.28759261191693625, "grad_norm": 1.078125, "learning_rate": 0.0016717832775504475, "loss": 0.9313, "step": 4134 }, { "epoch": 0.2876621795540715, "grad_norm": 1.15625, "learning_rate": 0.0016716163455861182, "loss": 0.9587, "step": 4135 }, { "epoch": 0.28773174719120664, "grad_norm": 1.1015625, "learning_rate": 0.0016714493795204962, "loss": 1.2193, "step": 4136 }, { "epoch": 0.28780131482834187, "grad_norm": 1.125, "learning_rate": 0.0016712823793620588, "loss": 1.0766, "step": 4137 }, { "epoch": 0.28787088246547704, "grad_norm": 1.2265625, "learning_rate": 0.001671115345119286, "loss": 0.92, "step": 4138 }, { "epoch": 0.28794045010261227, "grad_norm": 0.859375, "learning_rate": 0.0016709482768006584, "loss": 0.821, "step": 4139 }, { "epoch": 0.2880100177397475, "grad_norm": 1.125, "learning_rate": 0.001670781174414659, "loss": 1.1517, "step": 4140 }, { "epoch": 0.28807958537688266, "grad_norm": 1.1171875, "learning_rate": 0.0016706140379697727, "loss": 0.9332, "step": 4141 }, { "epoch": 0.2881491530140179, "grad_norm": 1.28125, "learning_rate": 0.001670446867474486, "loss": 0.9166, "step": 4142 }, { "epoch": 0.28821872065115306, "grad_norm": 1.0546875, "learning_rate": 0.0016702796629372862, "loss": 0.9084, "step": 4143 }, { "epoch": 0.2882882882882883, "grad_norm": 1.09375, "learning_rate": 0.0016701124243666636, "loss": 0.6126, "step": 4144 }, { "epoch": 0.2883578559254235, "grad_norm": 1.15625, "learning_rate": 0.0016699451517711102, "loss": 0.9703, "step": 4145 }, { "epoch": 0.2884274235625587, "grad_norm": 1.359375, "learning_rate": 0.0016697778451591184, "loss": 0.8664, "step": 4146 }, { "epoch": 0.2884969911996939, "grad_norm": 1.0390625, "learning_rate": 0.0016696105045391836, "loss": 0.614, "step": 4147 }, { "epoch": 0.28856655883682913, "grad_norm": 1.2109375, "learning_rate": 0.0016694431299198024, "loss": 1.1663, "step": 4148 }, { "epoch": 0.2886361264739643, "grad_norm": 1.2109375, "learning_rate": 0.0016692757213094733, "loss": 1.2911, "step": 4149 }, { "epoch": 0.28870569411109953, "grad_norm": 1.0546875, "learning_rate": 0.0016691082787166967, "loss": 0.8398, "step": 4150 }, { "epoch": 0.2887752617482347, "grad_norm": 1.0234375, "learning_rate": 0.001668940802149974, "loss": 0.9779, "step": 4151 }, { "epoch": 0.28884482938536993, "grad_norm": 1.1484375, "learning_rate": 0.0016687732916178092, "loss": 1.0343, "step": 4152 }, { "epoch": 0.28891439702250515, "grad_norm": 1.1484375, "learning_rate": 0.0016686057471287077, "loss": 1.0199, "step": 4153 }, { "epoch": 0.2889839646596403, "grad_norm": 0.90625, "learning_rate": 0.0016684381686911762, "loss": 0.8642, "step": 4154 }, { "epoch": 0.28905353229677555, "grad_norm": 1.2421875, "learning_rate": 0.0016682705563137237, "loss": 0.954, "step": 4155 }, { "epoch": 0.2891230999339107, "grad_norm": 1.3671875, "learning_rate": 0.0016681029100048606, "loss": 1.064, "step": 4156 }, { "epoch": 0.28919266757104595, "grad_norm": 1.3046875, "learning_rate": 0.0016679352297730991, "loss": 1.0502, "step": 4157 }, { "epoch": 0.2892622352081812, "grad_norm": 1.0703125, "learning_rate": 0.0016677675156269536, "loss": 1.0124, "step": 4158 }, { "epoch": 0.28933180284531634, "grad_norm": 1.1875, "learning_rate": 0.0016675997675749392, "loss": 1.1415, "step": 4159 }, { "epoch": 0.28940137048245157, "grad_norm": 1.140625, "learning_rate": 0.0016674319856255738, "loss": 0.9647, "step": 4160 }, { "epoch": 0.2894709381195868, "grad_norm": 1.2578125, "learning_rate": 0.0016672641697873761, "loss": 0.9417, "step": 4161 }, { "epoch": 0.28954050575672197, "grad_norm": 1.21875, "learning_rate": 0.0016670963200688669, "loss": 0.8899, "step": 4162 }, { "epoch": 0.2896100733938572, "grad_norm": 1.140625, "learning_rate": 0.0016669284364785692, "loss": 1.1638, "step": 4163 }, { "epoch": 0.28967964103099236, "grad_norm": 1.21875, "learning_rate": 0.0016667605190250072, "loss": 1.2574, "step": 4164 }, { "epoch": 0.2897492086681276, "grad_norm": 1.375, "learning_rate": 0.0016665925677167067, "loss": 0.9816, "step": 4165 }, { "epoch": 0.2898187763052628, "grad_norm": 1.3125, "learning_rate": 0.0016664245825621954, "loss": 0.9461, "step": 4166 }, { "epoch": 0.289888343942398, "grad_norm": 1.03125, "learning_rate": 0.0016662565635700028, "loss": 0.8763, "step": 4167 }, { "epoch": 0.2899579115795332, "grad_norm": 1.171875, "learning_rate": 0.0016660885107486606, "loss": 0.8769, "step": 4168 }, { "epoch": 0.2900274792166684, "grad_norm": 1.0625, "learning_rate": 0.0016659204241067003, "loss": 1.0802, "step": 4169 }, { "epoch": 0.2900970468538036, "grad_norm": 1.1640625, "learning_rate": 0.001665752303652658, "loss": 0.9512, "step": 4170 }, { "epoch": 0.29016661449093883, "grad_norm": 1.203125, "learning_rate": 0.001665584149395069, "loss": 0.9257, "step": 4171 }, { "epoch": 0.290236182128074, "grad_norm": 1.0078125, "learning_rate": 0.0016654159613424717, "loss": 0.9806, "step": 4172 }, { "epoch": 0.29030574976520923, "grad_norm": 1.328125, "learning_rate": 0.0016652477395034061, "loss": 1.2123, "step": 4173 }, { "epoch": 0.2903753174023444, "grad_norm": 1.1328125, "learning_rate": 0.0016650794838864132, "loss": 1.0177, "step": 4174 }, { "epoch": 0.29044488503947963, "grad_norm": 1.0703125, "learning_rate": 0.0016649111945000363, "loss": 0.8756, "step": 4175 }, { "epoch": 0.29051445267661485, "grad_norm": 1.21875, "learning_rate": 0.0016647428713528205, "loss": 0.9949, "step": 4176 }, { "epoch": 0.29058402031375, "grad_norm": 1.125, "learning_rate": 0.0016645745144533122, "loss": 1.2037, "step": 4177 }, { "epoch": 0.29065358795088525, "grad_norm": 1.03125, "learning_rate": 0.0016644061238100596, "loss": 0.8776, "step": 4178 }, { "epoch": 0.2907231555880205, "grad_norm": 0.9453125, "learning_rate": 0.0016642376994316132, "loss": 1.0536, "step": 4179 }, { "epoch": 0.29079272322515565, "grad_norm": 1.1171875, "learning_rate": 0.001664069241326524, "loss": 1.0989, "step": 4180 }, { "epoch": 0.2908622908622909, "grad_norm": 1.421875, "learning_rate": 0.0016639007495033462, "loss": 1.1989, "step": 4181 }, { "epoch": 0.29093185849942604, "grad_norm": 1.109375, "learning_rate": 0.0016637322239706348, "loss": 0.9185, "step": 4182 }, { "epoch": 0.29100142613656127, "grad_norm": 1.03125, "learning_rate": 0.0016635636647369463, "loss": 0.8248, "step": 4183 }, { "epoch": 0.2910709937736965, "grad_norm": 0.9609375, "learning_rate": 0.0016633950718108394, "loss": 0.8318, "step": 4184 }, { "epoch": 0.29114056141083167, "grad_norm": 1.3125, "learning_rate": 0.0016632264452008747, "loss": 0.8321, "step": 4185 }, { "epoch": 0.2912101290479669, "grad_norm": 1.203125, "learning_rate": 0.0016630577849156142, "loss": 1.1485, "step": 4186 }, { "epoch": 0.29127969668510206, "grad_norm": 1.28125, "learning_rate": 0.001662889090963621, "loss": 1.0078, "step": 4187 }, { "epoch": 0.2913492643222373, "grad_norm": 1.125, "learning_rate": 0.001662720363353461, "loss": 1.0449, "step": 4188 }, { "epoch": 0.2914188319593725, "grad_norm": 1.109375, "learning_rate": 0.0016625516020937015, "loss": 0.8293, "step": 4189 }, { "epoch": 0.2914883995965077, "grad_norm": 1.140625, "learning_rate": 0.0016623828071929113, "loss": 0.7138, "step": 4190 }, { "epoch": 0.2915579672336429, "grad_norm": 1.2421875, "learning_rate": 0.0016622139786596603, "loss": 0.9308, "step": 4191 }, { "epoch": 0.29162753487077814, "grad_norm": 1.3828125, "learning_rate": 0.0016620451165025218, "loss": 1.4084, "step": 4192 }, { "epoch": 0.2916971025079133, "grad_norm": 0.94921875, "learning_rate": 0.001661876220730069, "loss": 0.8537, "step": 4193 }, { "epoch": 0.29176667014504853, "grad_norm": 1.2109375, "learning_rate": 0.001661707291350878, "loss": 0.8307, "step": 4194 }, { "epoch": 0.2918362377821837, "grad_norm": 0.91015625, "learning_rate": 0.0016615383283735256, "loss": 0.797, "step": 4195 }, { "epoch": 0.29190580541931893, "grad_norm": 0.9296875, "learning_rate": 0.0016613693318065917, "loss": 0.8068, "step": 4196 }, { "epoch": 0.29197537305645416, "grad_norm": 1.265625, "learning_rate": 0.0016612003016586562, "loss": 1.0044, "step": 4197 }, { "epoch": 0.2920449406935893, "grad_norm": 1.1796875, "learning_rate": 0.0016610312379383028, "loss": 0.9487, "step": 4198 }, { "epoch": 0.29211450833072455, "grad_norm": 1.125, "learning_rate": 0.0016608621406541144, "loss": 0.7799, "step": 4199 }, { "epoch": 0.2921840759678597, "grad_norm": 1.140625, "learning_rate": 0.0016606930098146777, "loss": 1.0704, "step": 4200 }, { "epoch": 0.29225364360499495, "grad_norm": 1.0546875, "learning_rate": 0.0016605238454285801, "loss": 1.0219, "step": 4201 }, { "epoch": 0.2923232112421302, "grad_norm": 1.15625, "learning_rate": 0.001660354647504411, "loss": 1.1202, "step": 4202 }, { "epoch": 0.29239277887926535, "grad_norm": 1.0, "learning_rate": 0.0016601854160507613, "loss": 0.8611, "step": 4203 }, { "epoch": 0.2924623465164006, "grad_norm": 1.1640625, "learning_rate": 0.0016600161510762232, "loss": 0.8448, "step": 4204 }, { "epoch": 0.2925319141535358, "grad_norm": 1.2109375, "learning_rate": 0.0016598468525893923, "loss": 0.7808, "step": 4205 }, { "epoch": 0.29260148179067097, "grad_norm": 1.140625, "learning_rate": 0.001659677520598864, "loss": 1.0908, "step": 4206 }, { "epoch": 0.2926710494278062, "grad_norm": 1.0234375, "learning_rate": 0.0016595081551132364, "loss": 0.6729, "step": 4207 }, { "epoch": 0.29274061706494137, "grad_norm": 1.1484375, "learning_rate": 0.0016593387561411085, "loss": 1.0274, "step": 4208 }, { "epoch": 0.2928101847020766, "grad_norm": 1.2578125, "learning_rate": 0.0016591693236910818, "loss": 1.0735, "step": 4209 }, { "epoch": 0.2928797523392118, "grad_norm": 1.0078125, "learning_rate": 0.0016589998577717596, "loss": 0.8966, "step": 4210 }, { "epoch": 0.292949319976347, "grad_norm": 1.046875, "learning_rate": 0.0016588303583917462, "loss": 1.036, "step": 4211 }, { "epoch": 0.2930188876134822, "grad_norm": 1.03125, "learning_rate": 0.0016586608255596477, "loss": 1.0781, "step": 4212 }, { "epoch": 0.2930884552506174, "grad_norm": 1.0859375, "learning_rate": 0.0016584912592840727, "loss": 1.0266, "step": 4213 }, { "epoch": 0.2931580228877526, "grad_norm": 0.953125, "learning_rate": 0.0016583216595736304, "loss": 0.8677, "step": 4214 }, { "epoch": 0.29322759052488784, "grad_norm": 1.046875, "learning_rate": 0.0016581520264369325, "loss": 0.9807, "step": 4215 }, { "epoch": 0.293297158162023, "grad_norm": 0.9921875, "learning_rate": 0.001657982359882592, "loss": 0.7129, "step": 4216 }, { "epoch": 0.29336672579915823, "grad_norm": 0.89453125, "learning_rate": 0.0016578126599192237, "loss": 1.0546, "step": 4217 }, { "epoch": 0.29343629343629346, "grad_norm": 1.3046875, "learning_rate": 0.001657642926555444, "loss": 0.8812, "step": 4218 }, { "epoch": 0.29350586107342863, "grad_norm": 1.1640625, "learning_rate": 0.0016574731597998715, "loss": 0.9671, "step": 4219 }, { "epoch": 0.29357542871056386, "grad_norm": 1.359375, "learning_rate": 0.001657303359661126, "loss": 1.1712, "step": 4220 }, { "epoch": 0.293644996347699, "grad_norm": 0.89453125, "learning_rate": 0.001657133526147829, "loss": 1.011, "step": 4221 }, { "epoch": 0.29371456398483425, "grad_norm": 1.328125, "learning_rate": 0.0016569636592686033, "loss": 1.0849, "step": 4222 }, { "epoch": 0.2937841316219695, "grad_norm": 1.0546875, "learning_rate": 0.0016567937590320745, "loss": 1.1929, "step": 4223 }, { "epoch": 0.29385369925910465, "grad_norm": 1.125, "learning_rate": 0.0016566238254468691, "loss": 0.9215, "step": 4224 }, { "epoch": 0.2939232668962399, "grad_norm": 1.5703125, "learning_rate": 0.0016564538585216153, "loss": 1.2911, "step": 4225 }, { "epoch": 0.29399283453337505, "grad_norm": 1.1796875, "learning_rate": 0.0016562838582649439, "loss": 0.9647, "step": 4226 }, { "epoch": 0.2940624021705103, "grad_norm": 1.1796875, "learning_rate": 0.0016561138246854853, "loss": 1.0962, "step": 4227 }, { "epoch": 0.2941319698076455, "grad_norm": 1.015625, "learning_rate": 0.0016559437577918744, "loss": 1.0047, "step": 4228 }, { "epoch": 0.29420153744478067, "grad_norm": 0.9609375, "learning_rate": 0.0016557736575927454, "loss": 0.8579, "step": 4229 }, { "epoch": 0.2942711050819159, "grad_norm": 1.375, "learning_rate": 0.0016556035240967355, "loss": 0.9159, "step": 4230 }, { "epoch": 0.2943406727190511, "grad_norm": 1.0703125, "learning_rate": 0.0016554333573124832, "loss": 0.8801, "step": 4231 }, { "epoch": 0.2944102403561863, "grad_norm": 1.046875, "learning_rate": 0.0016552631572486283, "loss": 0.8854, "step": 4232 }, { "epoch": 0.2944798079933215, "grad_norm": 0.9609375, "learning_rate": 0.0016550929239138132, "loss": 0.8029, "step": 4233 }, { "epoch": 0.2945493756304567, "grad_norm": 1.0546875, "learning_rate": 0.0016549226573166816, "loss": 0.7103, "step": 4234 }, { "epoch": 0.2946189432675919, "grad_norm": 1.2578125, "learning_rate": 0.0016547523574658783, "loss": 0.858, "step": 4235 }, { "epoch": 0.29468851090472714, "grad_norm": 1.078125, "learning_rate": 0.0016545820243700504, "loss": 0.8784, "step": 4236 }, { "epoch": 0.2947580785418623, "grad_norm": 1.0625, "learning_rate": 0.001654411658037847, "loss": 0.8463, "step": 4237 }, { "epoch": 0.29482764617899754, "grad_norm": 1.109375, "learning_rate": 0.0016542412584779175, "loss": 0.9727, "step": 4238 }, { "epoch": 0.2948972138161327, "grad_norm": 1.1328125, "learning_rate": 0.001654070825698915, "loss": 0.9683, "step": 4239 }, { "epoch": 0.29496678145326793, "grad_norm": 1.234375, "learning_rate": 0.0016539003597094927, "loss": 1.2157, "step": 4240 }, { "epoch": 0.29503634909040316, "grad_norm": 1.1953125, "learning_rate": 0.0016537298605183058, "loss": 1.0825, "step": 4241 }, { "epoch": 0.29510591672753833, "grad_norm": 1.328125, "learning_rate": 0.0016535593281340117, "loss": 1.0039, "step": 4242 }, { "epoch": 0.29517548436467356, "grad_norm": 1.21875, "learning_rate": 0.0016533887625652692, "loss": 0.8681, "step": 4243 }, { "epoch": 0.2952450520018088, "grad_norm": 0.984375, "learning_rate": 0.0016532181638207386, "loss": 0.7514, "step": 4244 }, { "epoch": 0.29531461963894395, "grad_norm": 1.203125, "learning_rate": 0.001653047531909082, "loss": 1.2113, "step": 4245 }, { "epoch": 0.2953841872760792, "grad_norm": 1.171875, "learning_rate": 0.0016528768668389636, "loss": 1.1541, "step": 4246 }, { "epoch": 0.29545375491321435, "grad_norm": 0.98828125, "learning_rate": 0.0016527061686190485, "loss": 1.0614, "step": 4247 }, { "epoch": 0.2955233225503496, "grad_norm": 1.0234375, "learning_rate": 0.001652535437258004, "loss": 0.9572, "step": 4248 }, { "epoch": 0.2955928901874848, "grad_norm": 0.91015625, "learning_rate": 0.0016523646727644992, "loss": 0.9534, "step": 4249 }, { "epoch": 0.29566245782462, "grad_norm": 1.1484375, "learning_rate": 0.001652193875147204, "loss": 1.1222, "step": 4250 }, { "epoch": 0.2957320254617552, "grad_norm": 1.2734375, "learning_rate": 0.0016520230444147916, "loss": 1.1133, "step": 4251 }, { "epoch": 0.29580159309889037, "grad_norm": 1.078125, "learning_rate": 0.0016518521805759352, "loss": 1.1543, "step": 4252 }, { "epoch": 0.2958711607360256, "grad_norm": 1.234375, "learning_rate": 0.001651681283639311, "loss": 0.9325, "step": 4253 }, { "epoch": 0.2959407283731608, "grad_norm": 1.2421875, "learning_rate": 0.0016515103536135956, "loss": 1.1222, "step": 4254 }, { "epoch": 0.296010296010296, "grad_norm": 1.2578125, "learning_rate": 0.0016513393905074683, "loss": 0.9288, "step": 4255 }, { "epoch": 0.2960798636474312, "grad_norm": 1.0, "learning_rate": 0.00165116839432961, "loss": 0.8688, "step": 4256 }, { "epoch": 0.29614943128456644, "grad_norm": 1.0703125, "learning_rate": 0.0016509973650887023, "loss": 0.9634, "step": 4257 }, { "epoch": 0.2962189989217016, "grad_norm": 1.0, "learning_rate": 0.0016508263027934303, "loss": 0.9483, "step": 4258 }, { "epoch": 0.29628856655883684, "grad_norm": 1.125, "learning_rate": 0.0016506552074524784, "loss": 1.0677, "step": 4259 }, { "epoch": 0.296358134195972, "grad_norm": 0.95703125, "learning_rate": 0.001650484079074535, "loss": 0.8843, "step": 4260 }, { "epoch": 0.29642770183310724, "grad_norm": 0.921875, "learning_rate": 0.0016503129176682887, "loss": 1.0493, "step": 4261 }, { "epoch": 0.29649726947024246, "grad_norm": 1.3203125, "learning_rate": 0.00165014172324243, "loss": 1.1126, "step": 4262 }, { "epoch": 0.29656683710737763, "grad_norm": 1.3359375, "learning_rate": 0.0016499704958056521, "loss": 1.1817, "step": 4263 }, { "epoch": 0.29663640474451286, "grad_norm": 1.1015625, "learning_rate": 0.001649799235366648, "loss": 0.7771, "step": 4264 }, { "epoch": 0.29670597238164803, "grad_norm": 1.3125, "learning_rate": 0.0016496279419341143, "loss": 1.1077, "step": 4265 }, { "epoch": 0.29677554001878326, "grad_norm": 1.53125, "learning_rate": 0.001649456615516748, "loss": 1.017, "step": 4266 }, { "epoch": 0.2968451076559185, "grad_norm": 1.375, "learning_rate": 0.0016492852561232482, "loss": 1.0378, "step": 4267 }, { "epoch": 0.29691467529305365, "grad_norm": 1.0, "learning_rate": 0.0016491138637623156, "loss": 0.8971, "step": 4268 }, { "epoch": 0.2969842429301889, "grad_norm": 1.328125, "learning_rate": 0.0016489424384426529, "loss": 1.1585, "step": 4269 }, { "epoch": 0.2970538105673241, "grad_norm": 1.015625, "learning_rate": 0.001648770980172964, "loss": 0.9772, "step": 4270 }, { "epoch": 0.2971233782044593, "grad_norm": 1.2109375, "learning_rate": 0.0016485994889619549, "loss": 0.9343, "step": 4271 }, { "epoch": 0.2971929458415945, "grad_norm": 1.109375, "learning_rate": 0.0016484279648183331, "loss": 0.8485, "step": 4272 }, { "epoch": 0.2972625134787297, "grad_norm": 1.296875, "learning_rate": 0.0016482564077508074, "loss": 0.9716, "step": 4273 }, { "epoch": 0.2973320811158649, "grad_norm": 1.125, "learning_rate": 0.0016480848177680887, "loss": 0.9989, "step": 4274 }, { "epoch": 0.2974016487530001, "grad_norm": 1.09375, "learning_rate": 0.0016479131948788895, "loss": 0.735, "step": 4275 }, { "epoch": 0.2974712163901353, "grad_norm": 1.1796875, "learning_rate": 0.001647741539091924, "loss": 1.0061, "step": 4276 }, { "epoch": 0.2975407840272705, "grad_norm": 1.0078125, "learning_rate": 0.0016475698504159083, "loss": 0.7155, "step": 4277 }, { "epoch": 0.2976103516644057, "grad_norm": 1.421875, "learning_rate": 0.0016473981288595589, "loss": 1.0526, "step": 4278 }, { "epoch": 0.2976799193015409, "grad_norm": 1.15625, "learning_rate": 0.0016472263744315963, "loss": 0.819, "step": 4279 }, { "epoch": 0.29774948693867614, "grad_norm": 1.359375, "learning_rate": 0.0016470545871407405, "loss": 1.3974, "step": 4280 }, { "epoch": 0.2978190545758113, "grad_norm": 1.1328125, "learning_rate": 0.0016468827669957142, "loss": 0.959, "step": 4281 }, { "epoch": 0.29788862221294654, "grad_norm": 1.1640625, "learning_rate": 0.0016467109140052415, "loss": 1.0633, "step": 4282 }, { "epoch": 0.29795818985008177, "grad_norm": 1.03125, "learning_rate": 0.001646539028178048, "loss": 0.8363, "step": 4283 }, { "epoch": 0.29802775748721694, "grad_norm": 1.0546875, "learning_rate": 0.0016463671095228618, "loss": 0.9048, "step": 4284 }, { "epoch": 0.29809732512435216, "grad_norm": 1.3046875, "learning_rate": 0.0016461951580484116, "loss": 1.3229, "step": 4285 }, { "epoch": 0.29816689276148733, "grad_norm": 1.234375, "learning_rate": 0.0016460231737634283, "loss": 1.0129, "step": 4286 }, { "epoch": 0.29823646039862256, "grad_norm": 0.98046875, "learning_rate": 0.0016458511566766446, "loss": 0.7665, "step": 4287 }, { "epoch": 0.2983060280357578, "grad_norm": 1.6015625, "learning_rate": 0.0016456791067967942, "loss": 1.1054, "step": 4288 }, { "epoch": 0.29837559567289296, "grad_norm": 1.296875, "learning_rate": 0.0016455070241326133, "loss": 1.0423, "step": 4289 }, { "epoch": 0.2984451633100282, "grad_norm": 1.0859375, "learning_rate": 0.0016453349086928395, "loss": 0.8518, "step": 4290 }, { "epoch": 0.29851473094716335, "grad_norm": 1.0625, "learning_rate": 0.0016451627604862115, "loss": 0.7496, "step": 4291 }, { "epoch": 0.2985842985842986, "grad_norm": 1.2890625, "learning_rate": 0.0016449905795214706, "loss": 1.0012, "step": 4292 }, { "epoch": 0.2986538662214338, "grad_norm": 1.078125, "learning_rate": 0.001644818365807359, "loss": 1.0817, "step": 4293 }, { "epoch": 0.298723433858569, "grad_norm": 1.546875, "learning_rate": 0.001644646119352621, "loss": 1.3537, "step": 4294 }, { "epoch": 0.2987930014957042, "grad_norm": 1.078125, "learning_rate": 0.0016444738401660021, "loss": 0.8799, "step": 4295 }, { "epoch": 0.29886256913283943, "grad_norm": 1.0234375, "learning_rate": 0.0016443015282562499, "loss": 0.9406, "step": 4296 }, { "epoch": 0.2989321367699746, "grad_norm": 1.015625, "learning_rate": 0.0016441291836321139, "loss": 0.9568, "step": 4297 }, { "epoch": 0.2990017044071098, "grad_norm": 1.0390625, "learning_rate": 0.0016439568063023446, "loss": 0.8902, "step": 4298 }, { "epoch": 0.299071272044245, "grad_norm": 0.9453125, "learning_rate": 0.0016437843962756942, "loss": 0.9022, "step": 4299 }, { "epoch": 0.2991408396813802, "grad_norm": 1.0078125, "learning_rate": 0.0016436119535609176, "loss": 0.96, "step": 4300 }, { "epoch": 0.29921040731851545, "grad_norm": 1.1015625, "learning_rate": 0.0016434394781667696, "loss": 1.0838, "step": 4301 }, { "epoch": 0.2992799749556506, "grad_norm": 1.109375, "learning_rate": 0.0016432669701020083, "loss": 1.1138, "step": 4302 }, { "epoch": 0.29934954259278584, "grad_norm": 1.1015625, "learning_rate": 0.0016430944293753921, "loss": 0.8477, "step": 4303 }, { "epoch": 0.299419110229921, "grad_norm": 0.98046875, "learning_rate": 0.0016429218559956826, "loss": 0.8323, "step": 4304 }, { "epoch": 0.29948867786705624, "grad_norm": 1.2890625, "learning_rate": 0.001642749249971642, "loss": 0.9095, "step": 4305 }, { "epoch": 0.29955824550419147, "grad_norm": 1.1015625, "learning_rate": 0.0016425766113120337, "loss": 0.7988, "step": 4306 }, { "epoch": 0.29962781314132664, "grad_norm": 0.9375, "learning_rate": 0.0016424039400256244, "loss": 0.8935, "step": 4307 }, { "epoch": 0.29969738077846186, "grad_norm": 0.98828125, "learning_rate": 0.0016422312361211806, "loss": 1.1378, "step": 4308 }, { "epoch": 0.2997669484155971, "grad_norm": 1.0546875, "learning_rate": 0.001642058499607472, "loss": 1.0826, "step": 4309 }, { "epoch": 0.29983651605273226, "grad_norm": 0.80859375, "learning_rate": 0.0016418857304932686, "loss": 0.682, "step": 4310 }, { "epoch": 0.2999060836898675, "grad_norm": 1.0078125, "learning_rate": 0.0016417129287873435, "loss": 0.8288, "step": 4311 }, { "epoch": 0.29997565132700266, "grad_norm": 1.1328125, "learning_rate": 0.0016415400944984702, "loss": 1.1236, "step": 4312 }, { "epoch": 0.3000452189641379, "grad_norm": 1.09375, "learning_rate": 0.0016413672276354245, "loss": 0.8868, "step": 4313 }, { "epoch": 0.3001147866012731, "grad_norm": 0.859375, "learning_rate": 0.0016411943282069838, "loss": 0.6118, "step": 4314 }, { "epoch": 0.3001843542384083, "grad_norm": 1.03125, "learning_rate": 0.001641021396221927, "loss": 0.8889, "step": 4315 }, { "epoch": 0.3002539218755435, "grad_norm": 0.87109375, "learning_rate": 0.0016408484316890347, "loss": 0.7158, "step": 4316 }, { "epoch": 0.3003234895126787, "grad_norm": 1.0546875, "learning_rate": 0.001640675434617089, "loss": 1.0108, "step": 4317 }, { "epoch": 0.3003930571498139, "grad_norm": 1.1171875, "learning_rate": 0.001640502405014874, "loss": 0.9027, "step": 4318 }, { "epoch": 0.30046262478694913, "grad_norm": 0.96484375, "learning_rate": 0.0016403293428911754, "loss": 0.9381, "step": 4319 }, { "epoch": 0.3005321924240843, "grad_norm": 1.2265625, "learning_rate": 0.00164015624825478, "loss": 1.1626, "step": 4320 }, { "epoch": 0.3006017600612195, "grad_norm": 1.140625, "learning_rate": 0.0016399831211144772, "loss": 1.0997, "step": 4321 }, { "epoch": 0.30067132769835475, "grad_norm": 1.1953125, "learning_rate": 0.001639809961479057, "loss": 1.1818, "step": 4322 }, { "epoch": 0.3007408953354899, "grad_norm": 1.1640625, "learning_rate": 0.0016396367693573119, "loss": 1.1089, "step": 4323 }, { "epoch": 0.30081046297262515, "grad_norm": 1.2578125, "learning_rate": 0.0016394635447580358, "loss": 1.216, "step": 4324 }, { "epoch": 0.3008800306097603, "grad_norm": 1.0859375, "learning_rate": 0.0016392902876900242, "loss": 1.1119, "step": 4325 }, { "epoch": 0.30094959824689554, "grad_norm": 1.2109375, "learning_rate": 0.001639116998162074, "loss": 1.2958, "step": 4326 }, { "epoch": 0.30101916588403077, "grad_norm": 1.03125, "learning_rate": 0.0016389436761829836, "loss": 0.8656, "step": 4327 }, { "epoch": 0.30108873352116594, "grad_norm": 1.125, "learning_rate": 0.0016387703217615541, "loss": 0.9134, "step": 4328 }, { "epoch": 0.30115830115830117, "grad_norm": 1.0234375, "learning_rate": 0.0016385969349065875, "loss": 0.7285, "step": 4329 }, { "epoch": 0.30122786879543634, "grad_norm": 1.1796875, "learning_rate": 0.001638423515626887, "loss": 1.0429, "step": 4330 }, { "epoch": 0.30129743643257156, "grad_norm": 1.2421875, "learning_rate": 0.0016382500639312582, "loss": 1.1032, "step": 4331 }, { "epoch": 0.3013670040697068, "grad_norm": 1.140625, "learning_rate": 0.0016380765798285086, "loss": 0.9027, "step": 4332 }, { "epoch": 0.30143657170684196, "grad_norm": 1.1796875, "learning_rate": 0.0016379030633274462, "loss": 0.8908, "step": 4333 }, { "epoch": 0.3015061393439772, "grad_norm": 1.1953125, "learning_rate": 0.0016377295144368816, "loss": 0.9926, "step": 4334 }, { "epoch": 0.3015757069811124, "grad_norm": 1.21875, "learning_rate": 0.0016375559331656265, "loss": 0.8325, "step": 4335 }, { "epoch": 0.3016452746182476, "grad_norm": 1.6796875, "learning_rate": 0.0016373823195224943, "loss": 0.8746, "step": 4336 }, { "epoch": 0.3017148422553828, "grad_norm": 0.94140625, "learning_rate": 0.0016372086735163011, "loss": 0.9151, "step": 4337 }, { "epoch": 0.301784409892518, "grad_norm": 0.96484375, "learning_rate": 0.0016370349951558632, "loss": 0.8559, "step": 4338 }, { "epoch": 0.3018539775296532, "grad_norm": 1.21875, "learning_rate": 0.001636861284449999, "loss": 1.004, "step": 4339 }, { "epoch": 0.30192354516678843, "grad_norm": 1.2109375, "learning_rate": 0.0016366875414075288, "loss": 0.8747, "step": 4340 }, { "epoch": 0.3019931128039236, "grad_norm": 1.40625, "learning_rate": 0.0016365137660372744, "loss": 0.9914, "step": 4341 }, { "epoch": 0.30206268044105883, "grad_norm": 1.046875, "learning_rate": 0.0016363399583480592, "loss": 0.9239, "step": 4342 }, { "epoch": 0.302132248078194, "grad_norm": 0.98828125, "learning_rate": 0.0016361661183487085, "loss": 0.8221, "step": 4343 }, { "epoch": 0.3022018157153292, "grad_norm": 0.9140625, "learning_rate": 0.0016359922460480484, "loss": 0.7647, "step": 4344 }, { "epoch": 0.30227138335246445, "grad_norm": 1.203125, "learning_rate": 0.001635818341454908, "loss": 1.116, "step": 4345 }, { "epoch": 0.3023409509895996, "grad_norm": 1.2265625, "learning_rate": 0.001635644404578117, "loss": 1.0775, "step": 4346 }, { "epoch": 0.30241051862673485, "grad_norm": 0.67578125, "learning_rate": 0.0016354704354265071, "loss": 0.7777, "step": 4347 }, { "epoch": 0.3024800862638701, "grad_norm": 0.9765625, "learning_rate": 0.0016352964340089113, "loss": 0.9641, "step": 4348 }, { "epoch": 0.30254965390100524, "grad_norm": 1.09375, "learning_rate": 0.0016351224003341644, "loss": 1.0605, "step": 4349 }, { "epoch": 0.30261922153814047, "grad_norm": 1.0859375, "learning_rate": 0.0016349483344111038, "loss": 0.6386, "step": 4350 }, { "epoch": 0.30268878917527564, "grad_norm": 1.03125, "learning_rate": 0.0016347742362485672, "loss": 0.9836, "step": 4351 }, { "epoch": 0.30275835681241087, "grad_norm": 1.2109375, "learning_rate": 0.0016346001058553938, "loss": 0.9887, "step": 4352 }, { "epoch": 0.3028279244495461, "grad_norm": 1.171875, "learning_rate": 0.001634425943240426, "loss": 1.0901, "step": 4353 }, { "epoch": 0.30289749208668126, "grad_norm": 0.9453125, "learning_rate": 0.0016342517484125069, "loss": 1.0028, "step": 4354 }, { "epoch": 0.3029670597238165, "grad_norm": 1.1484375, "learning_rate": 0.0016340775213804803, "loss": 0.976, "step": 4355 }, { "epoch": 0.30303662736095166, "grad_norm": 1.0234375, "learning_rate": 0.0016339032621531936, "loss": 0.8511, "step": 4356 }, { "epoch": 0.3031061949980869, "grad_norm": 0.9140625, "learning_rate": 0.0016337289707394939, "loss": 0.7611, "step": 4357 }, { "epoch": 0.3031757626352221, "grad_norm": 1.0, "learning_rate": 0.0016335546471482317, "loss": 0.7739, "step": 4358 }, { "epoch": 0.3032453302723573, "grad_norm": 1.046875, "learning_rate": 0.0016333802913882573, "loss": 0.8467, "step": 4359 }, { "epoch": 0.3033148979094925, "grad_norm": 0.859375, "learning_rate": 0.0016332059034684248, "loss": 0.9224, "step": 4360 }, { "epoch": 0.30338446554662774, "grad_norm": 1.46875, "learning_rate": 0.001633031483397588, "loss": 0.8146, "step": 4361 }, { "epoch": 0.3034540331837629, "grad_norm": 1.3671875, "learning_rate": 0.0016328570311846032, "loss": 1.1802, "step": 4362 }, { "epoch": 0.30352360082089813, "grad_norm": 1.0625, "learning_rate": 0.001632682546838328, "loss": 1.055, "step": 4363 }, { "epoch": 0.3035931684580333, "grad_norm": 1.1875, "learning_rate": 0.0016325080303676218, "loss": 1.0319, "step": 4364 }, { "epoch": 0.30366273609516853, "grad_norm": 1.203125, "learning_rate": 0.0016323334817813465, "loss": 0.8394, "step": 4365 }, { "epoch": 0.30373230373230375, "grad_norm": 1.125, "learning_rate": 0.001632158901088364, "loss": 1.1423, "step": 4366 }, { "epoch": 0.3038018713694389, "grad_norm": 1.0703125, "learning_rate": 0.0016319842882975386, "loss": 1.1017, "step": 4367 }, { "epoch": 0.30387143900657415, "grad_norm": 1.1796875, "learning_rate": 0.0016318096434177365, "loss": 1.069, "step": 4368 }, { "epoch": 0.3039410066437093, "grad_norm": 1.0625, "learning_rate": 0.0016316349664578253, "loss": 1.0585, "step": 4369 }, { "epoch": 0.30401057428084455, "grad_norm": 0.9296875, "learning_rate": 0.0016314602574266743, "loss": 0.893, "step": 4370 }, { "epoch": 0.3040801419179798, "grad_norm": 1.1796875, "learning_rate": 0.0016312855163331543, "loss": 1.1369, "step": 4371 }, { "epoch": 0.30414970955511494, "grad_norm": 1.15625, "learning_rate": 0.0016311107431861377, "loss": 0.8239, "step": 4372 }, { "epoch": 0.30421927719225017, "grad_norm": 1.21875, "learning_rate": 0.001630935937994498, "loss": 0.9099, "step": 4373 }, { "epoch": 0.3042888448293854, "grad_norm": 1.0546875, "learning_rate": 0.0016307611007671122, "loss": 0.8812, "step": 4374 }, { "epoch": 0.30435841246652057, "grad_norm": 1.09375, "learning_rate": 0.0016305862315128565, "loss": 1.1323, "step": 4375 }, { "epoch": 0.3044279801036558, "grad_norm": 0.95703125, "learning_rate": 0.001630411330240611, "loss": 0.9536, "step": 4376 }, { "epoch": 0.30449754774079096, "grad_norm": 1.421875, "learning_rate": 0.0016302363969592551, "loss": 1.147, "step": 4377 }, { "epoch": 0.3045671153779262, "grad_norm": 1.1015625, "learning_rate": 0.0016300614316776718, "loss": 1.0402, "step": 4378 }, { "epoch": 0.3046366830150614, "grad_norm": 1.21875, "learning_rate": 0.0016298864344047447, "loss": 1.0486, "step": 4379 }, { "epoch": 0.3047062506521966, "grad_norm": 1.1328125, "learning_rate": 0.0016297114051493592, "loss": 0.9422, "step": 4380 }, { "epoch": 0.3047758182893318, "grad_norm": 1.234375, "learning_rate": 0.0016295363439204028, "loss": 1.076, "step": 4381 }, { "epoch": 0.304845385926467, "grad_norm": 1.1484375, "learning_rate": 0.0016293612507267637, "loss": 0.9279, "step": 4382 }, { "epoch": 0.3049149535636022, "grad_norm": 1.1328125, "learning_rate": 0.0016291861255773325, "loss": 0.9118, "step": 4383 }, { "epoch": 0.30498452120073744, "grad_norm": 1.0546875, "learning_rate": 0.0016290109684810013, "loss": 0.8204, "step": 4384 }, { "epoch": 0.3050540888378726, "grad_norm": 1.1328125, "learning_rate": 0.0016288357794466638, "loss": 1.0852, "step": 4385 }, { "epoch": 0.30512365647500783, "grad_norm": 0.9921875, "learning_rate": 0.0016286605584832144, "loss": 0.9775, "step": 4386 }, { "epoch": 0.30519322411214306, "grad_norm": 1.0078125, "learning_rate": 0.001628485305599551, "loss": 0.8048, "step": 4387 }, { "epoch": 0.30526279174927823, "grad_norm": 0.94140625, "learning_rate": 0.0016283100208045714, "loss": 0.9148, "step": 4388 }, { "epoch": 0.30533235938641345, "grad_norm": 1.203125, "learning_rate": 0.0016281347041071758, "loss": 1.0056, "step": 4389 }, { "epoch": 0.3054019270235486, "grad_norm": 0.89453125, "learning_rate": 0.0016279593555162662, "loss": 0.9933, "step": 4390 }, { "epoch": 0.30547149466068385, "grad_norm": 0.984375, "learning_rate": 0.0016277839750407455, "loss": 0.9661, "step": 4391 }, { "epoch": 0.3055410622978191, "grad_norm": 1.2109375, "learning_rate": 0.001627608562689519, "loss": 1.0881, "step": 4392 }, { "epoch": 0.30561062993495425, "grad_norm": 0.94140625, "learning_rate": 0.0016274331184714928, "loss": 0.8765, "step": 4393 }, { "epoch": 0.3056801975720895, "grad_norm": 1.046875, "learning_rate": 0.0016272576423955753, "loss": 0.859, "step": 4394 }, { "epoch": 0.30574976520922464, "grad_norm": 1.0390625, "learning_rate": 0.0016270821344706765, "loss": 0.8187, "step": 4395 }, { "epoch": 0.30581933284635987, "grad_norm": 1.0234375, "learning_rate": 0.0016269065947057079, "loss": 1.2265, "step": 4396 }, { "epoch": 0.3058889004834951, "grad_norm": 1.28125, "learning_rate": 0.0016267310231095817, "loss": 1.0202, "step": 4397 }, { "epoch": 0.30595846812063027, "grad_norm": 1.140625, "learning_rate": 0.0016265554196912137, "loss": 0.7479, "step": 4398 }, { "epoch": 0.3060280357577655, "grad_norm": 1.1171875, "learning_rate": 0.001626379784459519, "loss": 0.9621, "step": 4399 }, { "epoch": 0.3060976033949007, "grad_norm": 1.3203125, "learning_rate": 0.0016262041174234163, "loss": 0.9615, "step": 4400 }, { "epoch": 0.3061671710320359, "grad_norm": 1.2734375, "learning_rate": 0.001626028418591825, "loss": 1.0641, "step": 4401 }, { "epoch": 0.3062367386691711, "grad_norm": 1.09375, "learning_rate": 0.0016258526879736658, "loss": 0.8778, "step": 4402 }, { "epoch": 0.3063063063063063, "grad_norm": 1.234375, "learning_rate": 0.0016256769255778615, "loss": 0.8062, "step": 4403 }, { "epoch": 0.3063758739434415, "grad_norm": 1.1796875, "learning_rate": 0.001625501131413337, "loss": 0.783, "step": 4404 }, { "epoch": 0.30644544158057674, "grad_norm": 1.140625, "learning_rate": 0.0016253253054890173, "loss": 1.1104, "step": 4405 }, { "epoch": 0.3065150092177119, "grad_norm": 1.5390625, "learning_rate": 0.001625149447813831, "loss": 0.8862, "step": 4406 }, { "epoch": 0.30658457685484714, "grad_norm": 1.0859375, "learning_rate": 0.001624973558396706, "loss": 0.992, "step": 4407 }, { "epoch": 0.3066541444919823, "grad_norm": 1.3359375, "learning_rate": 0.0016247976372465744, "loss": 0.7775, "step": 4408 }, { "epoch": 0.30672371212911753, "grad_norm": 1.0703125, "learning_rate": 0.001624621684372368, "loss": 0.8106, "step": 4409 }, { "epoch": 0.30679327976625276, "grad_norm": 0.93359375, "learning_rate": 0.0016244456997830203, "loss": 1.0442, "step": 4410 }, { "epoch": 0.30686284740338793, "grad_norm": 1.078125, "learning_rate": 0.001624269683487468, "loss": 1.1166, "step": 4411 }, { "epoch": 0.30693241504052315, "grad_norm": 1.078125, "learning_rate": 0.0016240936354946474, "loss": 0.896, "step": 4412 }, { "epoch": 0.3070019826776584, "grad_norm": 0.90625, "learning_rate": 0.0016239175558134976, "loss": 1.0908, "step": 4413 }, { "epoch": 0.30707155031479355, "grad_norm": 1.1484375, "learning_rate": 0.0016237414444529592, "loss": 1.1792, "step": 4414 }, { "epoch": 0.3071411179519288, "grad_norm": 1.21875, "learning_rate": 0.0016235653014219742, "loss": 0.9716, "step": 4415 }, { "epoch": 0.30721068558906395, "grad_norm": 1.8125, "learning_rate": 0.001623389126729486, "loss": 0.9999, "step": 4416 }, { "epoch": 0.3072802532261992, "grad_norm": 0.88671875, "learning_rate": 0.0016232129203844403, "loss": 1.0203, "step": 4417 }, { "epoch": 0.3073498208633344, "grad_norm": 0.94140625, "learning_rate": 0.0016230366823957836, "loss": 1.0839, "step": 4418 }, { "epoch": 0.30741938850046957, "grad_norm": 0.890625, "learning_rate": 0.0016228604127724645, "loss": 0.8569, "step": 4419 }, { "epoch": 0.3074889561376048, "grad_norm": 1.3359375, "learning_rate": 0.0016226841115234332, "loss": 1.134, "step": 4420 }, { "epoch": 0.30755852377473997, "grad_norm": 0.953125, "learning_rate": 0.0016225077786576412, "loss": 0.795, "step": 4421 }, { "epoch": 0.3076280914118752, "grad_norm": 1.234375, "learning_rate": 0.0016223314141840417, "loss": 1.0879, "step": 4422 }, { "epoch": 0.3076976590490104, "grad_norm": 1.1015625, "learning_rate": 0.0016221550181115898, "loss": 0.8837, "step": 4423 }, { "epoch": 0.3077672266861456, "grad_norm": 1.203125, "learning_rate": 0.0016219785904492423, "loss": 1.3526, "step": 4424 }, { "epoch": 0.3078367943232808, "grad_norm": 1.2890625, "learning_rate": 0.001621802131205957, "loss": 0.8536, "step": 4425 }, { "epoch": 0.30790636196041604, "grad_norm": 1.0234375, "learning_rate": 0.0016216256403906932, "loss": 0.9965, "step": 4426 }, { "epoch": 0.3079759295975512, "grad_norm": 1.34375, "learning_rate": 0.0016214491180124128, "loss": 1.1338, "step": 4427 }, { "epoch": 0.30804549723468644, "grad_norm": 0.84375, "learning_rate": 0.0016212725640800784, "loss": 0.9712, "step": 4428 }, { "epoch": 0.3081150648718216, "grad_norm": 1.25, "learning_rate": 0.001621095978602655, "loss": 1.1477, "step": 4429 }, { "epoch": 0.30818463250895684, "grad_norm": 1.0625, "learning_rate": 0.0016209193615891078, "loss": 0.8307, "step": 4430 }, { "epoch": 0.30825420014609206, "grad_norm": 1.15625, "learning_rate": 0.0016207427130484056, "loss": 1.0563, "step": 4431 }, { "epoch": 0.30832376778322723, "grad_norm": 1.1015625, "learning_rate": 0.001620566032989517, "loss": 1.1016, "step": 4432 }, { "epoch": 0.30839333542036246, "grad_norm": 1.2734375, "learning_rate": 0.001620389321421413, "loss": 1.1264, "step": 4433 }, { "epoch": 0.30846290305749763, "grad_norm": 1.171875, "learning_rate": 0.0016202125783530666, "loss": 1.1547, "step": 4434 }, { "epoch": 0.30853247069463285, "grad_norm": 1.203125, "learning_rate": 0.0016200358037934512, "loss": 1.0316, "step": 4435 }, { "epoch": 0.3086020383317681, "grad_norm": 1.015625, "learning_rate": 0.0016198589977515431, "loss": 0.9454, "step": 4436 }, { "epoch": 0.30867160596890325, "grad_norm": 1.0859375, "learning_rate": 0.0016196821602363193, "loss": 0.9787, "step": 4437 }, { "epoch": 0.3087411736060385, "grad_norm": 0.96484375, "learning_rate": 0.001619505291256759, "loss": 0.9956, "step": 4438 }, { "epoch": 0.3088107412431737, "grad_norm": 1.2578125, "learning_rate": 0.0016193283908218423, "loss": 1.0377, "step": 4439 }, { "epoch": 0.3088803088803089, "grad_norm": 1.2265625, "learning_rate": 0.001619151458940552, "loss": 1.0214, "step": 4440 }, { "epoch": 0.3089498765174441, "grad_norm": 0.96875, "learning_rate": 0.001618974495621871, "loss": 0.7111, "step": 4441 }, { "epoch": 0.30901944415457927, "grad_norm": 1.171875, "learning_rate": 0.001618797500874785, "loss": 1.1319, "step": 4442 }, { "epoch": 0.3090890117917145, "grad_norm": 0.921875, "learning_rate": 0.001618620474708281, "loss": 1.0498, "step": 4443 }, { "epoch": 0.3091585794288497, "grad_norm": 1.21875, "learning_rate": 0.0016184434171313473, "loss": 1.1306, "step": 4444 }, { "epoch": 0.3092281470659849, "grad_norm": 1.0546875, "learning_rate": 0.001618266328152974, "loss": 0.8588, "step": 4445 }, { "epoch": 0.3092977147031201, "grad_norm": 1.375, "learning_rate": 0.0016180892077821529, "loss": 0.9119, "step": 4446 }, { "epoch": 0.3093672823402553, "grad_norm": 0.953125, "learning_rate": 0.0016179120560278772, "loss": 0.8275, "step": 4447 }, { "epoch": 0.3094368499773905, "grad_norm": 1.0234375, "learning_rate": 0.0016177348728991419, "loss": 1.0409, "step": 4448 }, { "epoch": 0.30950641761452574, "grad_norm": 1.265625, "learning_rate": 0.0016175576584049431, "loss": 0.9388, "step": 4449 }, { "epoch": 0.3095759852516609, "grad_norm": 1.0, "learning_rate": 0.0016173804125542797, "loss": 0.8171, "step": 4450 }, { "epoch": 0.30964555288879614, "grad_norm": 1.625, "learning_rate": 0.0016172031353561503, "loss": 1.3405, "step": 4451 }, { "epoch": 0.30971512052593136, "grad_norm": 1.21875, "learning_rate": 0.0016170258268195568, "loss": 1.105, "step": 4452 }, { "epoch": 0.30978468816306653, "grad_norm": 1.046875, "learning_rate": 0.0016168484869535015, "loss": 0.5778, "step": 4453 }, { "epoch": 0.30985425580020176, "grad_norm": 1.046875, "learning_rate": 0.0016166711157669898, "loss": 0.752, "step": 4454 }, { "epoch": 0.30992382343733693, "grad_norm": 1.1015625, "learning_rate": 0.0016164937132690266, "loss": 1.0001, "step": 4455 }, { "epoch": 0.30999339107447216, "grad_norm": 1.0390625, "learning_rate": 0.0016163162794686201, "loss": 1.0588, "step": 4456 }, { "epoch": 0.3100629587116074, "grad_norm": 1.0703125, "learning_rate": 0.0016161388143747797, "loss": 0.8943, "step": 4457 }, { "epoch": 0.31013252634874255, "grad_norm": 0.875, "learning_rate": 0.0016159613179965156, "loss": 0.9216, "step": 4458 }, { "epoch": 0.3102020939858778, "grad_norm": 0.9609375, "learning_rate": 0.0016157837903428404, "loss": 0.8055, "step": 4459 }, { "epoch": 0.31027166162301295, "grad_norm": 0.90234375, "learning_rate": 0.0016156062314227682, "loss": 0.8656, "step": 4460 }, { "epoch": 0.3103412292601482, "grad_norm": 1.2578125, "learning_rate": 0.0016154286412453144, "loss": 1.1291, "step": 4461 }, { "epoch": 0.3104107968972834, "grad_norm": 1.1875, "learning_rate": 0.0016152510198194966, "loss": 0.8579, "step": 4462 }, { "epoch": 0.3104803645344186, "grad_norm": 1.1015625, "learning_rate": 0.0016150733671543324, "loss": 0.821, "step": 4463 }, { "epoch": 0.3105499321715538, "grad_norm": 0.875, "learning_rate": 0.0016148956832588435, "loss": 0.8403, "step": 4464 }, { "epoch": 0.31061949980868897, "grad_norm": 1.0859375, "learning_rate": 0.0016147179681420506, "loss": 0.9634, "step": 4465 }, { "epoch": 0.3106890674458242, "grad_norm": 1.2734375, "learning_rate": 0.001614540221812978, "loss": 1.0434, "step": 4466 }, { "epoch": 0.3107586350829594, "grad_norm": 1.03125, "learning_rate": 0.00161436244428065, "loss": 0.9125, "step": 4467 }, { "epoch": 0.3108282027200946, "grad_norm": 1.046875, "learning_rate": 0.0016141846355540942, "loss": 1.0275, "step": 4468 }, { "epoch": 0.3108977703572298, "grad_norm": 1.2578125, "learning_rate": 0.0016140067956423381, "loss": 0.7801, "step": 4469 }, { "epoch": 0.31096733799436505, "grad_norm": 1.1640625, "learning_rate": 0.0016138289245544116, "loss": 0.8483, "step": 4470 }, { "epoch": 0.3110369056315002, "grad_norm": 1.2421875, "learning_rate": 0.0016136510222993464, "loss": 1.0084, "step": 4471 }, { "epoch": 0.31110647326863544, "grad_norm": 1.2265625, "learning_rate": 0.0016134730888861754, "loss": 0.9062, "step": 4472 }, { "epoch": 0.3111760409057706, "grad_norm": 1.140625, "learning_rate": 0.0016132951243239331, "loss": 0.9388, "step": 4473 }, { "epoch": 0.31124560854290584, "grad_norm": 0.8984375, "learning_rate": 0.0016131171286216555, "loss": 0.7988, "step": 4474 }, { "epoch": 0.31131517618004106, "grad_norm": 1.2109375, "learning_rate": 0.0016129391017883803, "loss": 0.9827, "step": 4475 }, { "epoch": 0.31138474381717623, "grad_norm": 1.203125, "learning_rate": 0.0016127610438331473, "loss": 0.7951, "step": 4476 }, { "epoch": 0.31145431145431146, "grad_norm": 1.1328125, "learning_rate": 0.0016125829547649967, "loss": 0.9847, "step": 4477 }, { "epoch": 0.31152387909144663, "grad_norm": 1.1015625, "learning_rate": 0.0016124048345929716, "loss": 0.7996, "step": 4478 }, { "epoch": 0.31159344672858186, "grad_norm": 0.95703125, "learning_rate": 0.0016122266833261158, "loss": 1.0155, "step": 4479 }, { "epoch": 0.3116630143657171, "grad_norm": 1.078125, "learning_rate": 0.0016120485009734743, "loss": 0.9282, "step": 4480 }, { "epoch": 0.31173258200285225, "grad_norm": 1.0390625, "learning_rate": 0.0016118702875440954, "loss": 0.9679, "step": 4481 }, { "epoch": 0.3118021496399875, "grad_norm": 0.98828125, "learning_rate": 0.0016116920430470272, "loss": 0.7549, "step": 4482 }, { "epoch": 0.3118717172771227, "grad_norm": 1.25, "learning_rate": 0.0016115137674913202, "loss": 1.0342, "step": 4483 }, { "epoch": 0.3119412849142579, "grad_norm": 1.21875, "learning_rate": 0.0016113354608860264, "loss": 0.7318, "step": 4484 }, { "epoch": 0.3120108525513931, "grad_norm": 1.2578125, "learning_rate": 0.0016111571232401993, "loss": 0.868, "step": 4485 }, { "epoch": 0.3120804201885283, "grad_norm": 1.09375, "learning_rate": 0.0016109787545628938, "loss": 0.9422, "step": 4486 }, { "epoch": 0.3121499878256635, "grad_norm": 1.03125, "learning_rate": 0.001610800354863167, "loss": 0.8755, "step": 4487 }, { "epoch": 0.3122195554627987, "grad_norm": 1.140625, "learning_rate": 0.0016106219241500766, "loss": 1.0344, "step": 4488 }, { "epoch": 0.3122891230999339, "grad_norm": 0.984375, "learning_rate": 0.0016104434624326825, "loss": 0.8669, "step": 4489 }, { "epoch": 0.3123586907370691, "grad_norm": 0.953125, "learning_rate": 0.0016102649697200464, "loss": 0.961, "step": 4490 }, { "epoch": 0.3124282583742043, "grad_norm": 1.2109375, "learning_rate": 0.001610086446021231, "loss": 0.9783, "step": 4491 }, { "epoch": 0.3124978260113395, "grad_norm": 1.1875, "learning_rate": 0.0016099078913453014, "loss": 0.6967, "step": 4492 }, { "epoch": 0.31256739364847475, "grad_norm": 1.171875, "learning_rate": 0.0016097293057013226, "loss": 0.9887, "step": 4493 }, { "epoch": 0.3126369612856099, "grad_norm": 1.25, "learning_rate": 0.0016095506890983634, "loss": 1.0331, "step": 4494 }, { "epoch": 0.31270652892274514, "grad_norm": 1.1484375, "learning_rate": 0.0016093720415454925, "loss": 0.8642, "step": 4495 }, { "epoch": 0.31277609655988037, "grad_norm": 1.1171875, "learning_rate": 0.0016091933630517806, "loss": 0.8122, "step": 4496 }, { "epoch": 0.31284566419701554, "grad_norm": 1.0703125, "learning_rate": 0.0016090146536263002, "loss": 0.8992, "step": 4497 }, { "epoch": 0.31291523183415076, "grad_norm": 1.2265625, "learning_rate": 0.0016088359132781253, "loss": 0.8872, "step": 4498 }, { "epoch": 0.31298479947128593, "grad_norm": 1.53125, "learning_rate": 0.0016086571420163322, "loss": 1.1528, "step": 4499 }, { "epoch": 0.31305436710842116, "grad_norm": 1.078125, "learning_rate": 0.0016084783398499964, "loss": 0.897, "step": 4500 }, { "epoch": 0.3131239347455564, "grad_norm": 2.578125, "learning_rate": 0.0016082995067881979, "loss": 1.0002, "step": 4501 }, { "epoch": 0.31319350238269156, "grad_norm": 1.2265625, "learning_rate": 0.0016081206428400165, "loss": 1.1553, "step": 4502 }, { "epoch": 0.3132630700198268, "grad_norm": 1.0, "learning_rate": 0.0016079417480145339, "loss": 0.8074, "step": 4503 }, { "epoch": 0.31333263765696195, "grad_norm": 1.25, "learning_rate": 0.0016077628223208338, "loss": 1.249, "step": 4504 }, { "epoch": 0.3134022052940972, "grad_norm": 1.015625, "learning_rate": 0.0016075838657680004, "loss": 1.1904, "step": 4505 }, { "epoch": 0.3134717729312324, "grad_norm": 1.109375, "learning_rate": 0.0016074048783651213, "loss": 0.8828, "step": 4506 }, { "epoch": 0.3135413405683676, "grad_norm": 1.5078125, "learning_rate": 0.0016072258601212838, "loss": 0.8629, "step": 4507 }, { "epoch": 0.3136109082055028, "grad_norm": 1.0390625, "learning_rate": 0.001607046811045578, "loss": 0.9527, "step": 4508 }, { "epoch": 0.31368047584263803, "grad_norm": 1.0859375, "learning_rate": 0.0016068677311470948, "loss": 0.9314, "step": 4509 }, { "epoch": 0.3137500434797732, "grad_norm": 0.953125, "learning_rate": 0.0016066886204349267, "loss": 0.7579, "step": 4510 }, { "epoch": 0.3138196111169084, "grad_norm": 0.90234375, "learning_rate": 0.0016065094789181687, "loss": 0.7607, "step": 4511 }, { "epoch": 0.3138891787540436, "grad_norm": 0.9140625, "learning_rate": 0.0016063303066059162, "loss": 0.7219, "step": 4512 }, { "epoch": 0.3139587463911788, "grad_norm": 1.34375, "learning_rate": 0.001606151103507267, "loss": 1.1225, "step": 4513 }, { "epoch": 0.31402831402831405, "grad_norm": 1.1796875, "learning_rate": 0.0016059718696313202, "loss": 0.7852, "step": 4514 }, { "epoch": 0.3140978816654492, "grad_norm": 1.28125, "learning_rate": 0.001605792604987176, "loss": 1.0759, "step": 4515 }, { "epoch": 0.31416744930258445, "grad_norm": 1.1640625, "learning_rate": 0.0016056133095839365, "loss": 1.0242, "step": 4516 }, { "epoch": 0.3142370169397196, "grad_norm": 1.125, "learning_rate": 0.0016054339834307059, "loss": 1.0011, "step": 4517 }, { "epoch": 0.31430658457685484, "grad_norm": 1.0625, "learning_rate": 0.0016052546265365893, "loss": 1.2239, "step": 4518 }, { "epoch": 0.31437615221399007, "grad_norm": 1.046875, "learning_rate": 0.0016050752389106934, "loss": 1.11, "step": 4519 }, { "epoch": 0.31444571985112524, "grad_norm": 1.09375, "learning_rate": 0.0016048958205621268, "loss": 0.8507, "step": 4520 }, { "epoch": 0.31451528748826046, "grad_norm": 1.34375, "learning_rate": 0.0016047163714999991, "loss": 0.9852, "step": 4521 }, { "epoch": 0.3145848551253957, "grad_norm": 1.0703125, "learning_rate": 0.001604536891733422, "loss": 0.9424, "step": 4522 }, { "epoch": 0.31465442276253086, "grad_norm": 1.3203125, "learning_rate": 0.0016043573812715086, "loss": 1.0454, "step": 4523 }, { "epoch": 0.3147239903996661, "grad_norm": 1.390625, "learning_rate": 0.001604177840123374, "loss": 1.1769, "step": 4524 }, { "epoch": 0.31479355803680126, "grad_norm": 1.2578125, "learning_rate": 0.0016039982682981336, "loss": 0.9743, "step": 4525 }, { "epoch": 0.3148631256739365, "grad_norm": 1.03125, "learning_rate": 0.0016038186658049055, "loss": 0.9401, "step": 4526 }, { "epoch": 0.3149326933110717, "grad_norm": 1.0546875, "learning_rate": 0.0016036390326528093, "loss": 0.9003, "step": 4527 }, { "epoch": 0.3150022609482069, "grad_norm": 1.1015625, "learning_rate": 0.0016034593688509654, "loss": 0.9977, "step": 4528 }, { "epoch": 0.3150718285853421, "grad_norm": 1.1953125, "learning_rate": 0.0016032796744084963, "loss": 0.9259, "step": 4529 }, { "epoch": 0.3151413962224773, "grad_norm": 1.390625, "learning_rate": 0.0016030999493345261, "loss": 0.7179, "step": 4530 }, { "epoch": 0.3152109638596125, "grad_norm": 1.359375, "learning_rate": 0.0016029201936381804, "loss": 0.9177, "step": 4531 }, { "epoch": 0.31528053149674773, "grad_norm": 1.296875, "learning_rate": 0.0016027404073285863, "loss": 0.7988, "step": 4532 }, { "epoch": 0.3153500991338829, "grad_norm": 1.1640625, "learning_rate": 0.0016025605904148726, "loss": 0.8419, "step": 4533 }, { "epoch": 0.3154196667710181, "grad_norm": 1.0859375, "learning_rate": 0.0016023807429061687, "loss": 1.1772, "step": 4534 }, { "epoch": 0.31548923440815335, "grad_norm": 1.0859375, "learning_rate": 0.0016022008648116071, "loss": 1.1462, "step": 4535 }, { "epoch": 0.3155588020452885, "grad_norm": 1.109375, "learning_rate": 0.0016020209561403212, "loss": 0.8713, "step": 4536 }, { "epoch": 0.31562836968242375, "grad_norm": 1.2578125, "learning_rate": 0.001601841016901445, "loss": 0.9731, "step": 4537 }, { "epoch": 0.3156979373195589, "grad_norm": 1.4140625, "learning_rate": 0.001601661047104116, "loss": 1.0064, "step": 4538 }, { "epoch": 0.31576750495669414, "grad_norm": 0.95703125, "learning_rate": 0.0016014810467574712, "loss": 0.7273, "step": 4539 }, { "epoch": 0.31583707259382937, "grad_norm": 1.0859375, "learning_rate": 0.001601301015870651, "loss": 0.9267, "step": 4540 }, { "epoch": 0.31590664023096454, "grad_norm": 1.4453125, "learning_rate": 0.0016011209544527956, "loss": 1.2038, "step": 4541 }, { "epoch": 0.31597620786809977, "grad_norm": 0.96484375, "learning_rate": 0.001600940862513048, "loss": 0.8266, "step": 4542 }, { "epoch": 0.31604577550523494, "grad_norm": 1.0, "learning_rate": 0.0016007607400605527, "loss": 0.8463, "step": 4543 }, { "epoch": 0.31611534314237016, "grad_norm": 1.140625, "learning_rate": 0.0016005805871044548, "loss": 0.9369, "step": 4544 }, { "epoch": 0.3161849107795054, "grad_norm": 1.0078125, "learning_rate": 0.0016004004036539018, "loss": 0.9409, "step": 4545 }, { "epoch": 0.31625447841664056, "grad_norm": 1.0625, "learning_rate": 0.0016002201897180426, "loss": 0.8067, "step": 4546 }, { "epoch": 0.3163240460537758, "grad_norm": 1.1015625, "learning_rate": 0.0016000399453060276, "loss": 1.0236, "step": 4547 }, { "epoch": 0.316393613690911, "grad_norm": 1.0078125, "learning_rate": 0.0015998596704270085, "loss": 0.8579, "step": 4548 }, { "epoch": 0.3164631813280462, "grad_norm": 1.1015625, "learning_rate": 0.001599679365090139, "loss": 1.0777, "step": 4549 }, { "epoch": 0.3165327489651814, "grad_norm": 1.1875, "learning_rate": 0.0015994990293045738, "loss": 0.911, "step": 4550 }, { "epoch": 0.3166023166023166, "grad_norm": 1.0078125, "learning_rate": 0.0015993186630794698, "loss": 0.8788, "step": 4551 }, { "epoch": 0.3166718842394518, "grad_norm": 1.0390625, "learning_rate": 0.0015991382664239846, "loss": 1.1761, "step": 4552 }, { "epoch": 0.31674145187658703, "grad_norm": 1.171875, "learning_rate": 0.0015989578393472783, "loss": 1.0325, "step": 4553 }, { "epoch": 0.3168110195137222, "grad_norm": 0.92578125, "learning_rate": 0.0015987773818585118, "loss": 0.8483, "step": 4554 }, { "epoch": 0.31688058715085743, "grad_norm": 1.0390625, "learning_rate": 0.001598596893966848, "loss": 0.7744, "step": 4555 }, { "epoch": 0.3169501547879926, "grad_norm": 1.34375, "learning_rate": 0.0015984163756814509, "loss": 0.9896, "step": 4556 }, { "epoch": 0.3170197224251278, "grad_norm": 0.87890625, "learning_rate": 0.0015982358270114868, "loss": 0.804, "step": 4557 }, { "epoch": 0.31708929006226305, "grad_norm": 1.171875, "learning_rate": 0.0015980552479661224, "loss": 0.8124, "step": 4558 }, { "epoch": 0.3171588576993982, "grad_norm": 1.1484375, "learning_rate": 0.0015978746385545272, "loss": 1.0928, "step": 4559 }, { "epoch": 0.31722842533653345, "grad_norm": 1.0, "learning_rate": 0.001597693998785871, "loss": 0.744, "step": 4560 }, { "epoch": 0.3172979929736687, "grad_norm": 1.0390625, "learning_rate": 0.0015975133286693266, "loss": 0.7445, "step": 4561 }, { "epoch": 0.31736756061080384, "grad_norm": 1.1640625, "learning_rate": 0.0015973326282140668, "loss": 0.9578, "step": 4562 }, { "epoch": 0.31743712824793907, "grad_norm": 1.2421875, "learning_rate": 0.001597151897429267, "loss": 0.9887, "step": 4563 }, { "epoch": 0.31750669588507424, "grad_norm": 0.93359375, "learning_rate": 0.0015969711363241035, "loss": 0.7945, "step": 4564 }, { "epoch": 0.31757626352220947, "grad_norm": 0.96484375, "learning_rate": 0.0015967903449077548, "loss": 0.9663, "step": 4565 }, { "epoch": 0.3176458311593447, "grad_norm": 1.2578125, "learning_rate": 0.0015966095231894006, "loss": 0.9744, "step": 4566 }, { "epoch": 0.31771539879647986, "grad_norm": 1.40625, "learning_rate": 0.001596428671178222, "loss": 0.8752, "step": 4567 }, { "epoch": 0.3177849664336151, "grad_norm": 0.9609375, "learning_rate": 0.0015962477888834012, "loss": 0.8157, "step": 4568 }, { "epoch": 0.31785453407075026, "grad_norm": 0.99609375, "learning_rate": 0.0015960668763141234, "loss": 1.0015, "step": 4569 }, { "epoch": 0.3179241017078855, "grad_norm": 1.0625, "learning_rate": 0.001595885933479574, "loss": 0.7865, "step": 4570 }, { "epoch": 0.3179936693450207, "grad_norm": 0.92578125, "learning_rate": 0.0015957049603889401, "loss": 0.8637, "step": 4571 }, { "epoch": 0.3180632369821559, "grad_norm": 1.21875, "learning_rate": 0.0015955239570514112, "loss": 0.9391, "step": 4572 }, { "epoch": 0.3181328046192911, "grad_norm": 1.0234375, "learning_rate": 0.0015953429234761773, "loss": 1.0095, "step": 4573 }, { "epoch": 0.31820237225642634, "grad_norm": 0.9765625, "learning_rate": 0.0015951618596724306, "loss": 0.8901, "step": 4574 }, { "epoch": 0.3182719398935615, "grad_norm": 1.2265625, "learning_rate": 0.0015949807656493644, "loss": 0.9012, "step": 4575 }, { "epoch": 0.31834150753069673, "grad_norm": 1.109375, "learning_rate": 0.001594799641416174, "loss": 1.0944, "step": 4576 }, { "epoch": 0.3184110751678319, "grad_norm": 1.1484375, "learning_rate": 0.0015946184869820557, "loss": 0.9183, "step": 4577 }, { "epoch": 0.31848064280496713, "grad_norm": 1.203125, "learning_rate": 0.0015944373023562075, "loss": 0.808, "step": 4578 }, { "epoch": 0.31855021044210236, "grad_norm": 1.265625, "learning_rate": 0.0015942560875478295, "loss": 0.9312, "step": 4579 }, { "epoch": 0.3186197780792375, "grad_norm": 0.92578125, "learning_rate": 0.0015940748425661226, "loss": 0.7663, "step": 4580 }, { "epoch": 0.31868934571637275, "grad_norm": 1.015625, "learning_rate": 0.0015938935674202897, "loss": 0.8847, "step": 4581 }, { "epoch": 0.3187589133535079, "grad_norm": 1.25, "learning_rate": 0.0015937122621195348, "loss": 0.9319, "step": 4582 }, { "epoch": 0.31882848099064315, "grad_norm": 1.0625, "learning_rate": 0.0015935309266730635, "loss": 0.9495, "step": 4583 }, { "epoch": 0.3188980486277784, "grad_norm": 1.125, "learning_rate": 0.0015933495610900839, "loss": 0.8488, "step": 4584 }, { "epoch": 0.31896761626491354, "grad_norm": 1.1953125, "learning_rate": 0.001593168165379804, "loss": 0.9934, "step": 4585 }, { "epoch": 0.31903718390204877, "grad_norm": 1.0703125, "learning_rate": 0.0015929867395514344, "loss": 1.0, "step": 4586 }, { "epoch": 0.319106751539184, "grad_norm": 0.9453125, "learning_rate": 0.0015928052836141871, "loss": 0.8157, "step": 4587 }, { "epoch": 0.31917631917631917, "grad_norm": 0.9375, "learning_rate": 0.0015926237975772755, "loss": 0.7274, "step": 4588 }, { "epoch": 0.3192458868134544, "grad_norm": 1.2265625, "learning_rate": 0.0015924422814499145, "loss": 0.8182, "step": 4589 }, { "epoch": 0.31931545445058956, "grad_norm": 1.015625, "learning_rate": 0.0015922607352413204, "loss": 0.8247, "step": 4590 }, { "epoch": 0.3193850220877248, "grad_norm": 1.046875, "learning_rate": 0.0015920791589607115, "loss": 0.8285, "step": 4591 }, { "epoch": 0.31945458972486, "grad_norm": 1.0859375, "learning_rate": 0.0015918975526173073, "loss": 1.0798, "step": 4592 }, { "epoch": 0.3195241573619952, "grad_norm": 1.1328125, "learning_rate": 0.0015917159162203284, "loss": 0.912, "step": 4593 }, { "epoch": 0.3195937249991304, "grad_norm": 1.0625, "learning_rate": 0.0015915342497789982, "loss": 0.8307, "step": 4594 }, { "epoch": 0.3196632926362656, "grad_norm": 0.859375, "learning_rate": 0.0015913525533025402, "loss": 0.8298, "step": 4595 }, { "epoch": 0.3197328602734008, "grad_norm": 1.03125, "learning_rate": 0.0015911708268001802, "loss": 0.8014, "step": 4596 }, { "epoch": 0.31980242791053604, "grad_norm": 1.15625, "learning_rate": 0.0015909890702811452, "loss": 0.8576, "step": 4597 }, { "epoch": 0.3198719955476712, "grad_norm": 1.2734375, "learning_rate": 0.0015908072837546642, "loss": 1.0694, "step": 4598 }, { "epoch": 0.31994156318480643, "grad_norm": 1.09375, "learning_rate": 0.001590625467229967, "loss": 1.1575, "step": 4599 }, { "epoch": 0.32001113082194166, "grad_norm": 0.95703125, "learning_rate": 0.0015904436207162856, "loss": 0.6767, "step": 4600 }, { "epoch": 0.32008069845907683, "grad_norm": 1.046875, "learning_rate": 0.0015902617442228532, "loss": 1.1852, "step": 4601 }, { "epoch": 0.32015026609621206, "grad_norm": 1.1640625, "learning_rate": 0.0015900798377589047, "loss": 1.0463, "step": 4602 }, { "epoch": 0.3202198337333472, "grad_norm": 1.0703125, "learning_rate": 0.0015898979013336764, "loss": 1.0375, "step": 4603 }, { "epoch": 0.32028940137048245, "grad_norm": 1.0390625, "learning_rate": 0.0015897159349564057, "loss": 1.0322, "step": 4604 }, { "epoch": 0.3203589690076177, "grad_norm": 1.1015625, "learning_rate": 0.0015895339386363322, "loss": 1.1247, "step": 4605 }, { "epoch": 0.32042853664475285, "grad_norm": 1.1484375, "learning_rate": 0.0015893519123826969, "loss": 0.9166, "step": 4606 }, { "epoch": 0.3204981042818881, "grad_norm": 0.95703125, "learning_rate": 0.0015891698562047422, "loss": 1.0436, "step": 4607 }, { "epoch": 0.32056767191902324, "grad_norm": 1.03125, "learning_rate": 0.0015889877701117114, "loss": 0.8429, "step": 4608 }, { "epoch": 0.32063723955615847, "grad_norm": 1.203125, "learning_rate": 0.0015888056541128505, "loss": 0.9104, "step": 4609 }, { "epoch": 0.3207068071932937, "grad_norm": 1.03125, "learning_rate": 0.0015886235082174065, "loss": 1.0487, "step": 4610 }, { "epoch": 0.32077637483042887, "grad_norm": 1.171875, "learning_rate": 0.0015884413324346275, "loss": 0.9555, "step": 4611 }, { "epoch": 0.3208459424675641, "grad_norm": 1.078125, "learning_rate": 0.0015882591267737639, "loss": 0.7592, "step": 4612 }, { "epoch": 0.3209155101046993, "grad_norm": 1.09375, "learning_rate": 0.001588076891244066, "loss": 0.7827, "step": 4613 }, { "epoch": 0.3209850777418345, "grad_norm": 1.0546875, "learning_rate": 0.0015878946258547889, "loss": 0.8938, "step": 4614 }, { "epoch": 0.3210546453789697, "grad_norm": 0.96875, "learning_rate": 0.0015877123306151848, "loss": 0.8164, "step": 4615 }, { "epoch": 0.3211242130161049, "grad_norm": 1.09375, "learning_rate": 0.0015875300055345114, "loss": 0.929, "step": 4616 }, { "epoch": 0.3211937806532401, "grad_norm": 1.1015625, "learning_rate": 0.001587347650622026, "loss": 1.0975, "step": 4617 }, { "epoch": 0.32126334829037534, "grad_norm": 1.0078125, "learning_rate": 0.0015871652658869869, "loss": 0.9612, "step": 4618 }, { "epoch": 0.3213329159275105, "grad_norm": 1.046875, "learning_rate": 0.001586982851338655, "loss": 0.818, "step": 4619 }, { "epoch": 0.32140248356464574, "grad_norm": 1.140625, "learning_rate": 0.001586800406986293, "loss": 0.8031, "step": 4620 }, { "epoch": 0.3214720512017809, "grad_norm": 1.0859375, "learning_rate": 0.0015866179328391636, "loss": 1.0245, "step": 4621 }, { "epoch": 0.32154161883891613, "grad_norm": 0.92578125, "learning_rate": 0.0015864354289065324, "loss": 0.7114, "step": 4622 }, { "epoch": 0.32161118647605136, "grad_norm": 1.0078125, "learning_rate": 0.001586252895197666, "loss": 0.848, "step": 4623 }, { "epoch": 0.32168075411318653, "grad_norm": 1.2109375, "learning_rate": 0.0015860703317218325, "loss": 0.9736, "step": 4624 }, { "epoch": 0.32175032175032175, "grad_norm": 1.265625, "learning_rate": 0.0015858877384883018, "loss": 0.899, "step": 4625 }, { "epoch": 0.321819889387457, "grad_norm": 0.99609375, "learning_rate": 0.001585705115506345, "loss": 0.912, "step": 4626 }, { "epoch": 0.32188945702459215, "grad_norm": 1.03125, "learning_rate": 0.001585522462785234, "loss": 1.0388, "step": 4627 }, { "epoch": 0.3219590246617274, "grad_norm": 1.0390625, "learning_rate": 0.001585339780334244, "loss": 0.8447, "step": 4628 }, { "epoch": 0.32202859229886255, "grad_norm": 1.03125, "learning_rate": 0.0015851570681626502, "loss": 0.9812, "step": 4629 }, { "epoch": 0.3220981599359978, "grad_norm": 0.98046875, "learning_rate": 0.0015849743262797299, "loss": 0.8619, "step": 4630 }, { "epoch": 0.322167727573133, "grad_norm": 0.95703125, "learning_rate": 0.0015847915546947618, "loss": 0.8297, "step": 4631 }, { "epoch": 0.32223729521026817, "grad_norm": 1.421875, "learning_rate": 0.001584608753417026, "loss": 1.0828, "step": 4632 }, { "epoch": 0.3223068628474034, "grad_norm": 1.203125, "learning_rate": 0.0015844259224558044, "loss": 0.9393, "step": 4633 }, { "epoch": 0.32237643048453857, "grad_norm": 1.203125, "learning_rate": 0.0015842430618203803, "loss": 1.045, "step": 4634 }, { "epoch": 0.3224459981216738, "grad_norm": 1.1015625, "learning_rate": 0.0015840601715200382, "loss": 0.9415, "step": 4635 }, { "epoch": 0.322515565758809, "grad_norm": 1.203125, "learning_rate": 0.0015838772515640645, "loss": 1.0985, "step": 4636 }, { "epoch": 0.3225851333959442, "grad_norm": 0.91015625, "learning_rate": 0.0015836943019617467, "loss": 0.8123, "step": 4637 }, { "epoch": 0.3226547010330794, "grad_norm": 1.0546875, "learning_rate": 0.0015835113227223748, "loss": 0.5864, "step": 4638 }, { "epoch": 0.32272426867021464, "grad_norm": 1.234375, "learning_rate": 0.0015833283138552386, "loss": 1.0993, "step": 4639 }, { "epoch": 0.3227938363073498, "grad_norm": 1.3046875, "learning_rate": 0.0015831452753696312, "loss": 1.0382, "step": 4640 }, { "epoch": 0.32286340394448504, "grad_norm": 1.34375, "learning_rate": 0.0015829622072748455, "loss": 0.9563, "step": 4641 }, { "epoch": 0.3229329715816202, "grad_norm": 0.92578125, "learning_rate": 0.0015827791095801777, "loss": 0.7008, "step": 4642 }, { "epoch": 0.32300253921875544, "grad_norm": 1.2265625, "learning_rate": 0.001582595982294924, "loss": 0.9963, "step": 4643 }, { "epoch": 0.32307210685589066, "grad_norm": 1.15625, "learning_rate": 0.0015824128254283828, "loss": 1.0364, "step": 4644 }, { "epoch": 0.32314167449302583, "grad_norm": 1.109375, "learning_rate": 0.0015822296389898538, "loss": 1.0407, "step": 4645 }, { "epoch": 0.32321124213016106, "grad_norm": 0.91796875, "learning_rate": 0.0015820464229886384, "loss": 0.7409, "step": 4646 }, { "epoch": 0.32328080976729623, "grad_norm": 1.03125, "learning_rate": 0.0015818631774340394, "loss": 0.9008, "step": 4647 }, { "epoch": 0.32335037740443145, "grad_norm": 1.125, "learning_rate": 0.0015816799023353613, "loss": 0.9936, "step": 4648 }, { "epoch": 0.3234199450415667, "grad_norm": 1.359375, "learning_rate": 0.0015814965977019094, "loss": 1.1239, "step": 4649 }, { "epoch": 0.32348951267870185, "grad_norm": 1.2734375, "learning_rate": 0.0015813132635429912, "loss": 1.0262, "step": 4650 }, { "epoch": 0.3235590803158371, "grad_norm": 0.92578125, "learning_rate": 0.0015811298998679156, "loss": 0.6669, "step": 4651 }, { "epoch": 0.3236286479529723, "grad_norm": 0.96484375, "learning_rate": 0.0015809465066859928, "loss": 1.0144, "step": 4652 }, { "epoch": 0.3236982155901075, "grad_norm": 0.828125, "learning_rate": 0.0015807630840065346, "loss": 0.7395, "step": 4653 }, { "epoch": 0.3237677832272427, "grad_norm": 0.98046875, "learning_rate": 0.0015805796318388544, "loss": 0.9497, "step": 4654 }, { "epoch": 0.32383735086437787, "grad_norm": 0.953125, "learning_rate": 0.0015803961501922666, "loss": 1.0169, "step": 4655 }, { "epoch": 0.3239069185015131, "grad_norm": 1.1171875, "learning_rate": 0.0015802126390760875, "loss": 1.2319, "step": 4656 }, { "epoch": 0.3239764861386483, "grad_norm": 1.1640625, "learning_rate": 0.0015800290984996355, "loss": 0.7076, "step": 4657 }, { "epoch": 0.3240460537757835, "grad_norm": 0.984375, "learning_rate": 0.0015798455284722294, "loss": 1.0573, "step": 4658 }, { "epoch": 0.3241156214129187, "grad_norm": 1.421875, "learning_rate": 0.0015796619290031897, "loss": 1.17, "step": 4659 }, { "epoch": 0.3241851890500539, "grad_norm": 1.3828125, "learning_rate": 0.001579478300101839, "loss": 1.0127, "step": 4660 }, { "epoch": 0.3242547566871891, "grad_norm": 0.94921875, "learning_rate": 0.0015792946417775013, "loss": 0.927, "step": 4661 }, { "epoch": 0.32432432432432434, "grad_norm": 1.0390625, "learning_rate": 0.0015791109540395014, "loss": 1.0321, "step": 4662 }, { "epoch": 0.3243938919614595, "grad_norm": 1.15625, "learning_rate": 0.0015789272368971663, "loss": 0.758, "step": 4663 }, { "epoch": 0.32446345959859474, "grad_norm": 1.328125, "learning_rate": 0.001578743490359824, "loss": 1.1223, "step": 4664 }, { "epoch": 0.32453302723572997, "grad_norm": 1.1640625, "learning_rate": 0.0015785597144368042, "loss": 0.9884, "step": 4665 }, { "epoch": 0.32460259487286514, "grad_norm": 1.1015625, "learning_rate": 0.0015783759091374386, "loss": 1.0554, "step": 4666 }, { "epoch": 0.32467216251000036, "grad_norm": 1.109375, "learning_rate": 0.0015781920744710593, "loss": 0.7594, "step": 4667 }, { "epoch": 0.32474173014713553, "grad_norm": 1.0390625, "learning_rate": 0.0015780082104470009, "loss": 0.9857, "step": 4668 }, { "epoch": 0.32481129778427076, "grad_norm": 1.203125, "learning_rate": 0.0015778243170745988, "loss": 0.815, "step": 4669 }, { "epoch": 0.324880865421406, "grad_norm": 1.3359375, "learning_rate": 0.0015776403943631905, "loss": 1.0044, "step": 4670 }, { "epoch": 0.32495043305854115, "grad_norm": 1.2578125, "learning_rate": 0.0015774564423221143, "loss": 1.1304, "step": 4671 }, { "epoch": 0.3250200006956764, "grad_norm": 1.46875, "learning_rate": 0.0015772724609607108, "loss": 0.9644, "step": 4672 }, { "epoch": 0.32508956833281155, "grad_norm": 0.96484375, "learning_rate": 0.001577088450288321, "loss": 0.7194, "step": 4673 }, { "epoch": 0.3251591359699468, "grad_norm": 0.9609375, "learning_rate": 0.001576904410314289, "loss": 0.8332, "step": 4674 }, { "epoch": 0.325228703607082, "grad_norm": 0.95703125, "learning_rate": 0.0015767203410479587, "loss": 0.9187, "step": 4675 }, { "epoch": 0.3252982712442172, "grad_norm": 1.265625, "learning_rate": 0.001576536242498676, "loss": 1.1336, "step": 4676 }, { "epoch": 0.3253678388813524, "grad_norm": 0.83203125, "learning_rate": 0.0015763521146757893, "loss": 0.6083, "step": 4677 }, { "epoch": 0.3254374065184876, "grad_norm": 1.1953125, "learning_rate": 0.001576167957588647, "loss": 0.9975, "step": 4678 }, { "epoch": 0.3255069741556228, "grad_norm": 1.421875, "learning_rate": 0.0015759837712465998, "loss": 1.0539, "step": 4679 }, { "epoch": 0.325576541792758, "grad_norm": 1.5625, "learning_rate": 0.001575799555659, "loss": 1.2816, "step": 4680 }, { "epoch": 0.3256461094298932, "grad_norm": 1.1796875, "learning_rate": 0.0015756153108352012, "loss": 1.0234, "step": 4681 }, { "epoch": 0.3257156770670284, "grad_norm": 1.0625, "learning_rate": 0.0015754310367845582, "loss": 1.026, "step": 4682 }, { "epoch": 0.32578524470416365, "grad_norm": 1.609375, "learning_rate": 0.001575246733516427, "loss": 1.1533, "step": 4683 }, { "epoch": 0.3258548123412988, "grad_norm": 1.078125, "learning_rate": 0.001575062401040167, "loss": 0.6291, "step": 4684 }, { "epoch": 0.32592437997843404, "grad_norm": 1.21875, "learning_rate": 0.001574878039365136, "loss": 0.7976, "step": 4685 }, { "epoch": 0.3259939476155692, "grad_norm": 0.9765625, "learning_rate": 0.0015746936485006961, "loss": 0.9032, "step": 4686 }, { "epoch": 0.32606351525270444, "grad_norm": 1.0078125, "learning_rate": 0.0015745092284562094, "loss": 0.8683, "step": 4687 }, { "epoch": 0.32613308288983966, "grad_norm": 1.078125, "learning_rate": 0.00157432477924104, "loss": 0.9426, "step": 4688 }, { "epoch": 0.32620265052697484, "grad_norm": 1.234375, "learning_rate": 0.001574140300864553, "loss": 0.8228, "step": 4689 }, { "epoch": 0.32627221816411006, "grad_norm": 0.92578125, "learning_rate": 0.0015739557933361153, "loss": 0.9683, "step": 4690 }, { "epoch": 0.3263417858012453, "grad_norm": 1.109375, "learning_rate": 0.0015737712566650955, "loss": 1.0398, "step": 4691 }, { "epoch": 0.32641135343838046, "grad_norm": 1.296875, "learning_rate": 0.0015735866908608632, "loss": 1.0982, "step": 4692 }, { "epoch": 0.3264809210755157, "grad_norm": 1.0625, "learning_rate": 0.00157340209593279, "loss": 0.8874, "step": 4693 }, { "epoch": 0.32655048871265085, "grad_norm": 1.1484375, "learning_rate": 0.001573217471890248, "loss": 1.1152, "step": 4694 }, { "epoch": 0.3266200563497861, "grad_norm": 1.4609375, "learning_rate": 0.0015730328187426126, "loss": 1.0359, "step": 4695 }, { "epoch": 0.3266896239869213, "grad_norm": 1.0625, "learning_rate": 0.0015728481364992587, "loss": 0.8886, "step": 4696 }, { "epoch": 0.3267591916240565, "grad_norm": 1.0625, "learning_rate": 0.001572663425169564, "loss": 0.7187, "step": 4697 }, { "epoch": 0.3268287592611917, "grad_norm": 1.2578125, "learning_rate": 0.0015724786847629067, "loss": 0.9773, "step": 4698 }, { "epoch": 0.3268983268983269, "grad_norm": 1.0078125, "learning_rate": 0.0015722939152886676, "loss": 0.7418, "step": 4699 }, { "epoch": 0.3269678945354621, "grad_norm": 1.3203125, "learning_rate": 0.0015721091167562279, "loss": 0.9311, "step": 4700 }, { "epoch": 0.3270374621725973, "grad_norm": 1.0625, "learning_rate": 0.0015719242891749708, "loss": 0.8655, "step": 4701 }, { "epoch": 0.3271070298097325, "grad_norm": 1.0625, "learning_rate": 0.0015717394325542814, "loss": 0.5923, "step": 4702 }, { "epoch": 0.3271765974468677, "grad_norm": 0.984375, "learning_rate": 0.0015715545469035448, "loss": 0.7129, "step": 4703 }, { "epoch": 0.32724616508400295, "grad_norm": 1.3515625, "learning_rate": 0.0015713696322321496, "loss": 1.1562, "step": 4704 }, { "epoch": 0.3273157327211381, "grad_norm": 1.3125, "learning_rate": 0.0015711846885494843, "loss": 1.1207, "step": 4705 }, { "epoch": 0.32738530035827335, "grad_norm": 1.2578125, "learning_rate": 0.0015709997158649394, "loss": 0.846, "step": 4706 }, { "epoch": 0.3274548679954085, "grad_norm": 1.28125, "learning_rate": 0.001570814714187907, "loss": 0.8207, "step": 4707 }, { "epoch": 0.32752443563254374, "grad_norm": 0.859375, "learning_rate": 0.0015706296835277804, "loss": 0.7765, "step": 4708 }, { "epoch": 0.32759400326967897, "grad_norm": 1.1328125, "learning_rate": 0.001570444623893955, "loss": 0.7479, "step": 4709 }, { "epoch": 0.32766357090681414, "grad_norm": 1.078125, "learning_rate": 0.0015702595352958266, "loss": 1.097, "step": 4710 }, { "epoch": 0.32773313854394936, "grad_norm": 1.0078125, "learning_rate": 0.0015700744177427933, "loss": 0.7712, "step": 4711 }, { "epoch": 0.32780270618108454, "grad_norm": 0.9921875, "learning_rate": 0.0015698892712442546, "loss": 0.8461, "step": 4712 }, { "epoch": 0.32787227381821976, "grad_norm": 1.4609375, "learning_rate": 0.0015697040958096112, "loss": 1.0414, "step": 4713 }, { "epoch": 0.327941841455355, "grad_norm": 1.015625, "learning_rate": 0.0015695188914482655, "loss": 0.8578, "step": 4714 }, { "epoch": 0.32801140909249016, "grad_norm": 1.171875, "learning_rate": 0.0015693336581696204, "loss": 0.996, "step": 4715 }, { "epoch": 0.3280809767296254, "grad_norm": 1.046875, "learning_rate": 0.0015691483959830825, "loss": 0.8574, "step": 4716 }, { "epoch": 0.3281505443667606, "grad_norm": 1.171875, "learning_rate": 0.0015689631048980575, "loss": 0.9643, "step": 4717 }, { "epoch": 0.3282201120038958, "grad_norm": 1.1015625, "learning_rate": 0.0015687777849239537, "loss": 0.6624, "step": 4718 }, { "epoch": 0.328289679641031, "grad_norm": 1.03125, "learning_rate": 0.001568592436070181, "loss": 0.7277, "step": 4719 }, { "epoch": 0.3283592472781662, "grad_norm": 1.0859375, "learning_rate": 0.0015684070583461504, "loss": 0.8696, "step": 4720 }, { "epoch": 0.3284288149153014, "grad_norm": 1.265625, "learning_rate": 0.0015682216517612741, "loss": 0.8684, "step": 4721 }, { "epoch": 0.32849838255243663, "grad_norm": 0.98046875, "learning_rate": 0.0015680362163249665, "loss": 0.8038, "step": 4722 }, { "epoch": 0.3285679501895718, "grad_norm": 0.9921875, "learning_rate": 0.001567850752046643, "loss": 0.8145, "step": 4723 }, { "epoch": 0.328637517826707, "grad_norm": 1.1875, "learning_rate": 0.0015676652589357203, "loss": 1.1258, "step": 4724 }, { "epoch": 0.3287070854638422, "grad_norm": 1.0234375, "learning_rate": 0.0015674797370016172, "loss": 0.9553, "step": 4725 }, { "epoch": 0.3287766531009774, "grad_norm": 1.21875, "learning_rate": 0.0015672941862537534, "loss": 1.1712, "step": 4726 }, { "epoch": 0.32884622073811265, "grad_norm": 1.15625, "learning_rate": 0.0015671086067015501, "loss": 1.0842, "step": 4727 }, { "epoch": 0.3289157883752478, "grad_norm": 0.9765625, "learning_rate": 0.0015669229983544303, "loss": 0.854, "step": 4728 }, { "epoch": 0.32898535601238305, "grad_norm": 1.21875, "learning_rate": 0.0015667373612218176, "loss": 0.9932, "step": 4729 }, { "epoch": 0.32905492364951827, "grad_norm": 1.1328125, "learning_rate": 0.001566551695313139, "loss": 1.0276, "step": 4730 }, { "epoch": 0.32912449128665344, "grad_norm": 1.0546875, "learning_rate": 0.0015663660006378203, "loss": 0.9543, "step": 4731 }, { "epoch": 0.32919405892378867, "grad_norm": 1.3671875, "learning_rate": 0.0015661802772052914, "loss": 0.8605, "step": 4732 }, { "epoch": 0.32926362656092384, "grad_norm": 1.2109375, "learning_rate": 0.0015659945250249814, "loss": 1.184, "step": 4733 }, { "epoch": 0.32933319419805906, "grad_norm": 0.86328125, "learning_rate": 0.0015658087441063225, "loss": 0.7605, "step": 4734 }, { "epoch": 0.3294027618351943, "grad_norm": 1.3203125, "learning_rate": 0.0015656229344587472, "loss": 0.9397, "step": 4735 }, { "epoch": 0.32947232947232946, "grad_norm": 1.0546875, "learning_rate": 0.0015654370960916904, "loss": 0.6376, "step": 4736 }, { "epoch": 0.3295418971094647, "grad_norm": 1.6015625, "learning_rate": 0.001565251229014588, "loss": 0.9573, "step": 4737 }, { "epoch": 0.32961146474659986, "grad_norm": 1.171875, "learning_rate": 0.001565065333236877, "loss": 0.8613, "step": 4738 }, { "epoch": 0.3296810323837351, "grad_norm": 0.859375, "learning_rate": 0.0015648794087679968, "loss": 0.7105, "step": 4739 }, { "epoch": 0.3297506000208703, "grad_norm": 1.2734375, "learning_rate": 0.0015646934556173872, "loss": 1.0448, "step": 4740 }, { "epoch": 0.3298201676580055, "grad_norm": 0.98828125, "learning_rate": 0.0015645074737944897, "loss": 0.9482, "step": 4741 }, { "epoch": 0.3298897352951407, "grad_norm": 1.109375, "learning_rate": 0.0015643214633087488, "loss": 0.9157, "step": 4742 }, { "epoch": 0.32995930293227593, "grad_norm": 1.140625, "learning_rate": 0.0015641354241696082, "loss": 0.8553, "step": 4743 }, { "epoch": 0.3300288705694111, "grad_norm": 1.5625, "learning_rate": 0.001563949356386514, "loss": 0.8695, "step": 4744 }, { "epoch": 0.33009843820654633, "grad_norm": 1.1328125, "learning_rate": 0.0015637632599689141, "loss": 1.2751, "step": 4745 }, { "epoch": 0.3301680058436815, "grad_norm": 1.0703125, "learning_rate": 0.0015635771349262577, "loss": 1.0886, "step": 4746 }, { "epoch": 0.3302375734808167, "grad_norm": 1.1640625, "learning_rate": 0.0015633909812679948, "loss": 0.7407, "step": 4747 }, { "epoch": 0.33030714111795195, "grad_norm": 1.125, "learning_rate": 0.0015632047990035774, "loss": 1.0269, "step": 4748 }, { "epoch": 0.3303767087550871, "grad_norm": 1.28125, "learning_rate": 0.0015630185881424592, "loss": 1.027, "step": 4749 }, { "epoch": 0.33044627639222235, "grad_norm": 0.86328125, "learning_rate": 0.0015628323486940952, "loss": 0.6675, "step": 4750 }, { "epoch": 0.3305158440293575, "grad_norm": 0.9453125, "learning_rate": 0.0015626460806679413, "loss": 0.764, "step": 4751 }, { "epoch": 0.33058541166649275, "grad_norm": 1.109375, "learning_rate": 0.0015624597840734552, "loss": 1.0752, "step": 4752 }, { "epoch": 0.33065497930362797, "grad_norm": 1.1796875, "learning_rate": 0.0015622734589200962, "loss": 0.9472, "step": 4753 }, { "epoch": 0.33072454694076314, "grad_norm": 1.0625, "learning_rate": 0.001562087105217325, "loss": 1.0138, "step": 4754 }, { "epoch": 0.33079411457789837, "grad_norm": 1.1484375, "learning_rate": 0.0015619007229746038, "loss": 0.9164, "step": 4755 }, { "epoch": 0.33086368221503354, "grad_norm": 1.203125, "learning_rate": 0.0015617143122013963, "loss": 0.8213, "step": 4756 }, { "epoch": 0.33093324985216876, "grad_norm": 1.109375, "learning_rate": 0.001561527872907167, "loss": 0.8078, "step": 4757 }, { "epoch": 0.331002817489304, "grad_norm": 1.125, "learning_rate": 0.0015613414051013827, "loss": 1.1892, "step": 4758 }, { "epoch": 0.33107238512643916, "grad_norm": 0.98046875, "learning_rate": 0.0015611549087935115, "loss": 0.8594, "step": 4759 }, { "epoch": 0.3311419527635744, "grad_norm": 0.98828125, "learning_rate": 0.0015609683839930223, "loss": 0.8934, "step": 4760 }, { "epoch": 0.3312115204007096, "grad_norm": 1.140625, "learning_rate": 0.0015607818307093856, "loss": 0.7721, "step": 4761 }, { "epoch": 0.3312810880378448, "grad_norm": 1.15625, "learning_rate": 0.0015605952489520748, "loss": 0.9527, "step": 4762 }, { "epoch": 0.33135065567498, "grad_norm": 1.109375, "learning_rate": 0.0015604086387305625, "loss": 0.9401, "step": 4763 }, { "epoch": 0.3314202233121152, "grad_norm": 0.99609375, "learning_rate": 0.0015602220000543242, "loss": 0.8029, "step": 4764 }, { "epoch": 0.3314897909492504, "grad_norm": 0.84765625, "learning_rate": 0.0015600353329328364, "loss": 0.8822, "step": 4765 }, { "epoch": 0.33155935858638563, "grad_norm": 1.125, "learning_rate": 0.0015598486373755774, "loss": 1.0305, "step": 4766 }, { "epoch": 0.3316289262235208, "grad_norm": 1.0390625, "learning_rate": 0.0015596619133920262, "loss": 0.9862, "step": 4767 }, { "epoch": 0.33169849386065603, "grad_norm": 1.4140625, "learning_rate": 0.0015594751609916643, "loss": 0.9982, "step": 4768 }, { "epoch": 0.3317680614977912, "grad_norm": 0.984375, "learning_rate": 0.0015592883801839733, "loss": 0.7449, "step": 4769 }, { "epoch": 0.3318376291349264, "grad_norm": 1.71875, "learning_rate": 0.0015591015709784375, "loss": 0.6865, "step": 4770 }, { "epoch": 0.33190719677206165, "grad_norm": 1.21875, "learning_rate": 0.001558914733384542, "loss": 0.9245, "step": 4771 }, { "epoch": 0.3319767644091968, "grad_norm": 0.98828125, "learning_rate": 0.0015587278674117735, "loss": 0.7876, "step": 4772 }, { "epoch": 0.33204633204633205, "grad_norm": 0.98828125, "learning_rate": 0.00155854097306962, "loss": 0.8075, "step": 4773 }, { "epoch": 0.3321158996834673, "grad_norm": 1.015625, "learning_rate": 0.0015583540503675715, "loss": 1.0019, "step": 4774 }, { "epoch": 0.33218546732060245, "grad_norm": 1.1640625, "learning_rate": 0.0015581670993151183, "loss": 0.9934, "step": 4775 }, { "epoch": 0.33225503495773767, "grad_norm": 1.3984375, "learning_rate": 0.0015579801199217533, "loss": 0.9749, "step": 4776 }, { "epoch": 0.33232460259487284, "grad_norm": 1.109375, "learning_rate": 0.0015577931121969703, "loss": 0.7169, "step": 4777 }, { "epoch": 0.33239417023200807, "grad_norm": 1.0859375, "learning_rate": 0.0015576060761502643, "loss": 0.9887, "step": 4778 }, { "epoch": 0.3324637378691433, "grad_norm": 1.078125, "learning_rate": 0.0015574190117911325, "loss": 1.1363, "step": 4779 }, { "epoch": 0.33253330550627846, "grad_norm": 1.0625, "learning_rate": 0.0015572319191290726, "loss": 0.9078, "step": 4780 }, { "epoch": 0.3326028731434137, "grad_norm": 1.0546875, "learning_rate": 0.001557044798173585, "loss": 0.8433, "step": 4781 }, { "epoch": 0.33267244078054886, "grad_norm": 1.0078125, "learning_rate": 0.0015568576489341699, "loss": 0.9377, "step": 4782 }, { "epoch": 0.3327420084176841, "grad_norm": 0.875, "learning_rate": 0.00155667047142033, "loss": 0.775, "step": 4783 }, { "epoch": 0.3328115760548193, "grad_norm": 1.375, "learning_rate": 0.0015564832656415697, "loss": 1.1054, "step": 4784 }, { "epoch": 0.3328811436919545, "grad_norm": 1.015625, "learning_rate": 0.0015562960316073938, "loss": 0.8957, "step": 4785 }, { "epoch": 0.3329507113290897, "grad_norm": 0.9453125, "learning_rate": 0.0015561087693273098, "loss": 0.6607, "step": 4786 }, { "epoch": 0.33302027896622494, "grad_norm": 1.046875, "learning_rate": 0.001555921478810825, "loss": 0.8963, "step": 4787 }, { "epoch": 0.3330898466033601, "grad_norm": 0.97265625, "learning_rate": 0.00155573416006745, "loss": 0.9921, "step": 4788 }, { "epoch": 0.33315941424049533, "grad_norm": 1.390625, "learning_rate": 0.001555546813106695, "loss": 1.1758, "step": 4789 }, { "epoch": 0.3332289818776305, "grad_norm": 1.40625, "learning_rate": 0.0015553594379380733, "loss": 1.159, "step": 4790 }, { "epoch": 0.33329854951476573, "grad_norm": 1.03125, "learning_rate": 0.0015551720345710987, "loss": 1.0012, "step": 4791 }, { "epoch": 0.33336811715190096, "grad_norm": 1.359375, "learning_rate": 0.0015549846030152858, "loss": 0.9196, "step": 4792 }, { "epoch": 0.3334376847890361, "grad_norm": 0.8984375, "learning_rate": 0.0015547971432801528, "loss": 0.9451, "step": 4793 }, { "epoch": 0.33350725242617135, "grad_norm": 1.1796875, "learning_rate": 0.001554609655375217, "loss": 0.8466, "step": 4794 }, { "epoch": 0.3335768200633065, "grad_norm": 1.0078125, "learning_rate": 0.0015544221393099984, "loss": 0.7053, "step": 4795 }, { "epoch": 0.33364638770044175, "grad_norm": 1.1796875, "learning_rate": 0.0015542345950940177, "loss": 0.8763, "step": 4796 }, { "epoch": 0.333715955337577, "grad_norm": 1.25, "learning_rate": 0.0015540470227367984, "loss": 1.0129, "step": 4797 }, { "epoch": 0.33378552297471215, "grad_norm": 1.1484375, "learning_rate": 0.0015538594222478635, "loss": 0.9119, "step": 4798 }, { "epoch": 0.33385509061184737, "grad_norm": 1.328125, "learning_rate": 0.001553671793636739, "loss": 0.9252, "step": 4799 }, { "epoch": 0.3339246582489826, "grad_norm": 1.0859375, "learning_rate": 0.0015534841369129514, "loss": 0.9074, "step": 4800 }, { "epoch": 0.33399422588611777, "grad_norm": 1.3203125, "learning_rate": 0.001553296452086029, "loss": 0.8542, "step": 4801 }, { "epoch": 0.334063793523253, "grad_norm": 1.3203125, "learning_rate": 0.0015531087391655017, "loss": 1.1449, "step": 4802 }, { "epoch": 0.33413336116038816, "grad_norm": 1.328125, "learning_rate": 0.0015529209981609005, "loss": 0.9562, "step": 4803 }, { "epoch": 0.3342029287975234, "grad_norm": 1.234375, "learning_rate": 0.001552733229081758, "loss": 0.9129, "step": 4804 }, { "epoch": 0.3342724964346586, "grad_norm": 0.9921875, "learning_rate": 0.0015525454319376079, "loss": 1.0567, "step": 4805 }, { "epoch": 0.3343420640717938, "grad_norm": 1.4296875, "learning_rate": 0.0015523576067379861, "loss": 1.0974, "step": 4806 }, { "epoch": 0.334411631708929, "grad_norm": 0.99609375, "learning_rate": 0.001552169753492429, "loss": 0.9468, "step": 4807 }, { "epoch": 0.3344811993460642, "grad_norm": 0.9921875, "learning_rate": 0.0015519818722104747, "loss": 0.7054, "step": 4808 }, { "epoch": 0.3345507669831994, "grad_norm": 1.3203125, "learning_rate": 0.0015517939629016634, "loss": 1.3383, "step": 4809 }, { "epoch": 0.33462033462033464, "grad_norm": 1.203125, "learning_rate": 0.001551606025575536, "loss": 0.8769, "step": 4810 }, { "epoch": 0.3346899022574698, "grad_norm": 1.109375, "learning_rate": 0.0015514180602416348, "loss": 0.8681, "step": 4811 }, { "epoch": 0.33475946989460503, "grad_norm": 1.1484375, "learning_rate": 0.0015512300669095036, "loss": 0.9219, "step": 4812 }, { "epoch": 0.33482903753174026, "grad_norm": 1.25, "learning_rate": 0.0015510420455886885, "loss": 1.2348, "step": 4813 }, { "epoch": 0.33489860516887543, "grad_norm": 1.0703125, "learning_rate": 0.0015508539962887356, "loss": 0.6768, "step": 4814 }, { "epoch": 0.33496817280601066, "grad_norm": 1.171875, "learning_rate": 0.001550665919019193, "loss": 0.8907, "step": 4815 }, { "epoch": 0.3350377404431458, "grad_norm": 1.015625, "learning_rate": 0.0015504778137896108, "loss": 0.93, "step": 4816 }, { "epoch": 0.33510730808028105, "grad_norm": 1.3125, "learning_rate": 0.0015502896806095397, "loss": 1.2813, "step": 4817 }, { "epoch": 0.3351768757174163, "grad_norm": 0.984375, "learning_rate": 0.0015501015194885326, "loss": 1.2485, "step": 4818 }, { "epoch": 0.33524644335455145, "grad_norm": 1.1015625, "learning_rate": 0.0015499133304361426, "loss": 0.9347, "step": 4819 }, { "epoch": 0.3353160109916867, "grad_norm": 1.1484375, "learning_rate": 0.001549725113461926, "loss": 0.8417, "step": 4820 }, { "epoch": 0.33538557862882185, "grad_norm": 1.0234375, "learning_rate": 0.0015495368685754386, "loss": 0.765, "step": 4821 }, { "epoch": 0.33545514626595707, "grad_norm": 1.0546875, "learning_rate": 0.0015493485957862388, "loss": 1.0378, "step": 4822 }, { "epoch": 0.3355247139030923, "grad_norm": 0.9453125, "learning_rate": 0.0015491602951038866, "loss": 0.891, "step": 4823 }, { "epoch": 0.33559428154022747, "grad_norm": 1.1796875, "learning_rate": 0.0015489719665379422, "loss": 1.2746, "step": 4824 }, { "epoch": 0.3356638491773627, "grad_norm": 1.1015625, "learning_rate": 0.0015487836100979686, "loss": 1.024, "step": 4825 }, { "epoch": 0.3357334168144979, "grad_norm": 1.09375, "learning_rate": 0.0015485952257935293, "loss": 0.8937, "step": 4826 }, { "epoch": 0.3358029844516331, "grad_norm": 1.265625, "learning_rate": 0.0015484068136341898, "loss": 1.1165, "step": 4827 }, { "epoch": 0.3358725520887683, "grad_norm": 0.96875, "learning_rate": 0.001548218373629516, "loss": 0.8166, "step": 4828 }, { "epoch": 0.3359421197259035, "grad_norm": 1.1015625, "learning_rate": 0.0015480299057890768, "loss": 0.7577, "step": 4829 }, { "epoch": 0.3360116873630387, "grad_norm": 1.3046875, "learning_rate": 0.0015478414101224409, "loss": 1.0613, "step": 4830 }, { "epoch": 0.33608125500017394, "grad_norm": 1.2421875, "learning_rate": 0.0015476528866391797, "loss": 0.8719, "step": 4831 }, { "epoch": 0.3361508226373091, "grad_norm": 1.1171875, "learning_rate": 0.0015474643353488653, "loss": 0.876, "step": 4832 }, { "epoch": 0.33622039027444434, "grad_norm": 1.03125, "learning_rate": 0.0015472757562610714, "loss": 0.6715, "step": 4833 }, { "epoch": 0.3362899579115795, "grad_norm": 1.1328125, "learning_rate": 0.0015470871493853734, "loss": 0.7938, "step": 4834 }, { "epoch": 0.33635952554871473, "grad_norm": 0.74609375, "learning_rate": 0.0015468985147313468, "loss": 0.7811, "step": 4835 }, { "epoch": 0.33642909318584996, "grad_norm": 1.2421875, "learning_rate": 0.0015467098523085706, "loss": 0.9007, "step": 4836 }, { "epoch": 0.33649866082298513, "grad_norm": 0.93359375, "learning_rate": 0.0015465211621266237, "loss": 0.8734, "step": 4837 }, { "epoch": 0.33656822846012036, "grad_norm": 0.9609375, "learning_rate": 0.0015463324441950868, "loss": 0.8147, "step": 4838 }, { "epoch": 0.3366377960972556, "grad_norm": 1.171875, "learning_rate": 0.0015461436985235422, "loss": 1.0236, "step": 4839 }, { "epoch": 0.33670736373439075, "grad_norm": 0.984375, "learning_rate": 0.0015459549251215733, "loss": 0.9286, "step": 4840 }, { "epoch": 0.336776931371526, "grad_norm": 0.94921875, "learning_rate": 0.001545766123998765, "loss": 0.6887, "step": 4841 }, { "epoch": 0.33684649900866115, "grad_norm": 1.328125, "learning_rate": 0.001545577295164704, "loss": 0.8163, "step": 4842 }, { "epoch": 0.3369160666457964, "grad_norm": 1.1640625, "learning_rate": 0.0015453884386289775, "loss": 0.9204, "step": 4843 }, { "epoch": 0.3369856342829316, "grad_norm": 1.109375, "learning_rate": 0.0015451995544011755, "loss": 0.9626, "step": 4844 }, { "epoch": 0.33705520192006677, "grad_norm": 1.140625, "learning_rate": 0.0015450106424908876, "loss": 0.8108, "step": 4845 }, { "epoch": 0.337124769557202, "grad_norm": 1.125, "learning_rate": 0.001544821702907707, "loss": 1.0259, "step": 4846 }, { "epoch": 0.33719433719433717, "grad_norm": 1.265625, "learning_rate": 0.001544632735661226, "loss": 0.906, "step": 4847 }, { "epoch": 0.3372639048314724, "grad_norm": 1.1640625, "learning_rate": 0.00154444374076104, "loss": 1.0432, "step": 4848 }, { "epoch": 0.3373334724686076, "grad_norm": 0.93359375, "learning_rate": 0.0015442547182167449, "loss": 0.8352, "step": 4849 }, { "epoch": 0.3374030401057428, "grad_norm": 1.0078125, "learning_rate": 0.0015440656680379386, "loss": 0.8683, "step": 4850 }, { "epoch": 0.337472607742878, "grad_norm": 1.078125, "learning_rate": 0.0015438765902342198, "loss": 0.7643, "step": 4851 }, { "epoch": 0.33754217538001324, "grad_norm": 1.1015625, "learning_rate": 0.0015436874848151893, "loss": 0.9901, "step": 4852 }, { "epoch": 0.3376117430171484, "grad_norm": 1.1484375, "learning_rate": 0.0015434983517904485, "loss": 0.7876, "step": 4853 }, { "epoch": 0.33768131065428364, "grad_norm": 1.3203125, "learning_rate": 0.0015433091911696009, "loss": 1.1143, "step": 4854 }, { "epoch": 0.3377508782914188, "grad_norm": 1.265625, "learning_rate": 0.0015431200029622511, "loss": 1.0496, "step": 4855 }, { "epoch": 0.33782044592855404, "grad_norm": 1.21875, "learning_rate": 0.001542930787178005, "loss": 0.9421, "step": 4856 }, { "epoch": 0.33789001356568926, "grad_norm": 0.875, "learning_rate": 0.0015427415438264702, "loss": 0.6855, "step": 4857 }, { "epoch": 0.33795958120282443, "grad_norm": 1.109375, "learning_rate": 0.0015425522729172552, "loss": 0.639, "step": 4858 }, { "epoch": 0.33802914883995966, "grad_norm": 1.015625, "learning_rate": 0.0015423629744599709, "loss": 0.854, "step": 4859 }, { "epoch": 0.33809871647709483, "grad_norm": 1.2578125, "learning_rate": 0.001542173648464228, "loss": 0.9457, "step": 4860 }, { "epoch": 0.33816828411423006, "grad_norm": 1.25, "learning_rate": 0.0015419842949396404, "loss": 1.1196, "step": 4861 }, { "epoch": 0.3382378517513653, "grad_norm": 1.4453125, "learning_rate": 0.0015417949138958218, "loss": 0.901, "step": 4862 }, { "epoch": 0.33830741938850045, "grad_norm": 1.1484375, "learning_rate": 0.0015416055053423885, "loss": 1.0023, "step": 4863 }, { "epoch": 0.3383769870256357, "grad_norm": 1.15625, "learning_rate": 0.0015414160692889575, "loss": 0.8722, "step": 4864 }, { "epoch": 0.3384465546627709, "grad_norm": 1.1796875, "learning_rate": 0.0015412266057451471, "loss": 0.8777, "step": 4865 }, { "epoch": 0.3385161222999061, "grad_norm": 0.97265625, "learning_rate": 0.001541037114720578, "loss": 0.7736, "step": 4866 }, { "epoch": 0.3385856899370413, "grad_norm": 1.03125, "learning_rate": 0.001540847596224871, "loss": 0.8343, "step": 4867 }, { "epoch": 0.33865525757417647, "grad_norm": 0.98828125, "learning_rate": 0.0015406580502676497, "loss": 0.7841, "step": 4868 }, { "epoch": 0.3387248252113117, "grad_norm": 1.2109375, "learning_rate": 0.0015404684768585374, "loss": 0.9431, "step": 4869 }, { "epoch": 0.3387943928484469, "grad_norm": 0.97265625, "learning_rate": 0.0015402788760071598, "loss": 0.9309, "step": 4870 }, { "epoch": 0.3388639604855821, "grad_norm": 1.1640625, "learning_rate": 0.0015400892477231442, "loss": 1.0093, "step": 4871 }, { "epoch": 0.3389335281227173, "grad_norm": 1.046875, "learning_rate": 0.001539899592016119, "loss": 0.8758, "step": 4872 }, { "epoch": 0.3390030957598525, "grad_norm": 1.5, "learning_rate": 0.0015397099088957137, "loss": 1.0624, "step": 4873 }, { "epoch": 0.3390726633969877, "grad_norm": 0.86328125, "learning_rate": 0.0015395201983715594, "loss": 0.6798, "step": 4874 }, { "epoch": 0.33914223103412294, "grad_norm": 1.171875, "learning_rate": 0.001539330460453289, "loss": 1.0312, "step": 4875 }, { "epoch": 0.3392117986712581, "grad_norm": 1.03125, "learning_rate": 0.0015391406951505361, "loss": 0.9884, "step": 4876 }, { "epoch": 0.33928136630839334, "grad_norm": 1.1171875, "learning_rate": 0.0015389509024729365, "loss": 0.9795, "step": 4877 }, { "epoch": 0.33935093394552857, "grad_norm": 1.3125, "learning_rate": 0.0015387610824301263, "loss": 0.9417, "step": 4878 }, { "epoch": 0.33942050158266374, "grad_norm": 0.94921875, "learning_rate": 0.001538571235031744, "loss": 0.7121, "step": 4879 }, { "epoch": 0.33949006921979896, "grad_norm": 1.1171875, "learning_rate": 0.0015383813602874291, "loss": 0.8359, "step": 4880 }, { "epoch": 0.33955963685693413, "grad_norm": 1.1640625, "learning_rate": 0.0015381914582068223, "loss": 1.0214, "step": 4881 }, { "epoch": 0.33962920449406936, "grad_norm": 1.25, "learning_rate": 0.0015380015287995655, "loss": 0.8943, "step": 4882 }, { "epoch": 0.3396987721312046, "grad_norm": 1.0390625, "learning_rate": 0.0015378115720753032, "loss": 0.7631, "step": 4883 }, { "epoch": 0.33976833976833976, "grad_norm": 1.1171875, "learning_rate": 0.00153762158804368, "loss": 0.9479, "step": 4884 }, { "epoch": 0.339837907405475, "grad_norm": 1.1484375, "learning_rate": 0.0015374315767143422, "loss": 1.1355, "step": 4885 }, { "epoch": 0.33990747504261015, "grad_norm": 1.1953125, "learning_rate": 0.001537241538096938, "loss": 0.9332, "step": 4886 }, { "epoch": 0.3399770426797454, "grad_norm": 0.9765625, "learning_rate": 0.0015370514722011163, "loss": 0.7488, "step": 4887 }, { "epoch": 0.3400466103168806, "grad_norm": 1.1640625, "learning_rate": 0.001536861379036528, "loss": 0.9931, "step": 4888 }, { "epoch": 0.3401161779540158, "grad_norm": 0.99609375, "learning_rate": 0.0015366712586128246, "loss": 0.9932, "step": 4889 }, { "epoch": 0.340185745591151, "grad_norm": 1.0234375, "learning_rate": 0.00153648111093966, "loss": 0.7979, "step": 4890 }, { "epoch": 0.3402553132282862, "grad_norm": 1.078125, "learning_rate": 0.0015362909360266883, "loss": 1.0964, "step": 4891 }, { "epoch": 0.3403248808654214, "grad_norm": 1.28125, "learning_rate": 0.0015361007338835662, "loss": 0.938, "step": 4892 }, { "epoch": 0.3403944485025566, "grad_norm": 1.125, "learning_rate": 0.0015359105045199511, "loss": 0.8411, "step": 4893 }, { "epoch": 0.3404640161396918, "grad_norm": 1.109375, "learning_rate": 0.0015357202479455016, "loss": 1.1284, "step": 4894 }, { "epoch": 0.340533583776827, "grad_norm": 0.9140625, "learning_rate": 0.001535529964169878, "loss": 1.0382, "step": 4895 }, { "epoch": 0.34060315141396225, "grad_norm": 0.91796875, "learning_rate": 0.0015353396532027423, "loss": 0.8875, "step": 4896 }, { "epoch": 0.3406727190510974, "grad_norm": 1.2265625, "learning_rate": 0.001535149315053757, "loss": 1.2211, "step": 4897 }, { "epoch": 0.34074228668823264, "grad_norm": 1.359375, "learning_rate": 0.0015349589497325872, "loss": 0.9812, "step": 4898 }, { "epoch": 0.3408118543253678, "grad_norm": 1.375, "learning_rate": 0.001534768557248898, "loss": 0.8877, "step": 4899 }, { "epoch": 0.34088142196250304, "grad_norm": 1.1875, "learning_rate": 0.0015345781376123573, "loss": 0.8944, "step": 4900 }, { "epoch": 0.34095098959963827, "grad_norm": 1.09375, "learning_rate": 0.001534387690832633, "loss": 0.8149, "step": 4901 }, { "epoch": 0.34102055723677344, "grad_norm": 1.015625, "learning_rate": 0.0015341972169193952, "loss": 0.6687, "step": 4902 }, { "epoch": 0.34109012487390866, "grad_norm": 0.96484375, "learning_rate": 0.0015340067158823155, "loss": 0.8705, "step": 4903 }, { "epoch": 0.3411596925110439, "grad_norm": 1.0625, "learning_rate": 0.001533816187731066, "loss": 0.8809, "step": 4904 }, { "epoch": 0.34122926014817906, "grad_norm": 0.96875, "learning_rate": 0.0015336256324753215, "loss": 0.868, "step": 4905 }, { "epoch": 0.3412988277853143, "grad_norm": 1.140625, "learning_rate": 0.0015334350501247569, "loss": 0.7088, "step": 4906 }, { "epoch": 0.34136839542244946, "grad_norm": 1.203125, "learning_rate": 0.001533244440689049, "loss": 0.904, "step": 4907 }, { "epoch": 0.3414379630595847, "grad_norm": 1.171875, "learning_rate": 0.0015330538041778766, "loss": 1.0991, "step": 4908 }, { "epoch": 0.3415075306967199, "grad_norm": 0.9921875, "learning_rate": 0.0015328631406009183, "loss": 0.9877, "step": 4909 }, { "epoch": 0.3415770983338551, "grad_norm": 1.0859375, "learning_rate": 0.001532672449967856, "loss": 0.7401, "step": 4910 }, { "epoch": 0.3416466659709903, "grad_norm": 0.984375, "learning_rate": 0.0015324817322883715, "loss": 0.6729, "step": 4911 }, { "epoch": 0.3417162336081255, "grad_norm": 1.1875, "learning_rate": 0.0015322909875721481, "loss": 0.925, "step": 4912 }, { "epoch": 0.3417858012452607, "grad_norm": 1.2421875, "learning_rate": 0.001532100215828872, "loss": 0.9261, "step": 4913 }, { "epoch": 0.3418553688823959, "grad_norm": 0.984375, "learning_rate": 0.0015319094170682282, "loss": 0.7656, "step": 4914 }, { "epoch": 0.3419249365195311, "grad_norm": 1.2265625, "learning_rate": 0.0015317185912999056, "loss": 0.8597, "step": 4915 }, { "epoch": 0.3419945041566663, "grad_norm": 0.98046875, "learning_rate": 0.001531527738533593, "loss": 0.6822, "step": 4916 }, { "epoch": 0.34206407179380155, "grad_norm": 0.9921875, "learning_rate": 0.001531336858778981, "loss": 1.1433, "step": 4917 }, { "epoch": 0.3421336394309367, "grad_norm": 1.09375, "learning_rate": 0.0015311459520457613, "loss": 0.9465, "step": 4918 }, { "epoch": 0.34220320706807195, "grad_norm": 0.9296875, "learning_rate": 0.0015309550183436273, "loss": 0.5912, "step": 4919 }, { "epoch": 0.3422727747052071, "grad_norm": 1.171875, "learning_rate": 0.0015307640576822737, "loss": 0.9429, "step": 4920 }, { "epoch": 0.34234234234234234, "grad_norm": 1.0, "learning_rate": 0.0015305730700713965, "loss": 0.7966, "step": 4921 }, { "epoch": 0.34241190997947757, "grad_norm": 1.1328125, "learning_rate": 0.0015303820555206931, "loss": 0.9875, "step": 4922 }, { "epoch": 0.34248147761661274, "grad_norm": 1.21875, "learning_rate": 0.0015301910140398623, "loss": 1.0872, "step": 4923 }, { "epoch": 0.34255104525374797, "grad_norm": 1.3046875, "learning_rate": 0.001529999945638604, "loss": 0.9393, "step": 4924 }, { "epoch": 0.34262061289088314, "grad_norm": 1.140625, "learning_rate": 0.00152980885032662, "loss": 0.9258, "step": 4925 }, { "epoch": 0.34269018052801836, "grad_norm": 1.0390625, "learning_rate": 0.001529617728113613, "loss": 0.9441, "step": 4926 }, { "epoch": 0.3427597481651536, "grad_norm": 0.98828125, "learning_rate": 0.0015294265790092873, "loss": 0.8605, "step": 4927 }, { "epoch": 0.34282931580228876, "grad_norm": 0.859375, "learning_rate": 0.0015292354030233483, "loss": 0.7356, "step": 4928 }, { "epoch": 0.342898883439424, "grad_norm": 1.0390625, "learning_rate": 0.0015290442001655031, "loss": 1.0271, "step": 4929 }, { "epoch": 0.3429684510765592, "grad_norm": 1.3828125, "learning_rate": 0.0015288529704454601, "loss": 1.0625, "step": 4930 }, { "epoch": 0.3430380187136944, "grad_norm": 0.9609375, "learning_rate": 0.0015286617138729288, "loss": 0.9363, "step": 4931 }, { "epoch": 0.3431075863508296, "grad_norm": 1.0390625, "learning_rate": 0.0015284704304576204, "loss": 0.9087, "step": 4932 }, { "epoch": 0.3431771539879648, "grad_norm": 1.359375, "learning_rate": 0.0015282791202092475, "loss": 1.0606, "step": 4933 }, { "epoch": 0.3432467216251, "grad_norm": 0.9765625, "learning_rate": 0.001528087783137523, "loss": 0.9597, "step": 4934 }, { "epoch": 0.34331628926223523, "grad_norm": 1.2734375, "learning_rate": 0.0015278964192521629, "loss": 1.043, "step": 4935 }, { "epoch": 0.3433858568993704, "grad_norm": 1.046875, "learning_rate": 0.0015277050285628835, "loss": 0.7973, "step": 4936 }, { "epoch": 0.3434554245365056, "grad_norm": 0.96875, "learning_rate": 0.0015275136110794027, "loss": 0.9741, "step": 4937 }, { "epoch": 0.3435249921736408, "grad_norm": 1.0390625, "learning_rate": 0.0015273221668114392, "loss": 0.8344, "step": 4938 }, { "epoch": 0.343594559810776, "grad_norm": 1.3828125, "learning_rate": 0.0015271306957687142, "loss": 0.991, "step": 4939 }, { "epoch": 0.34366412744791125, "grad_norm": 1.171875, "learning_rate": 0.001526939197960949, "loss": 0.6336, "step": 4940 }, { "epoch": 0.3437336950850464, "grad_norm": 1.0859375, "learning_rate": 0.001526747673397868, "loss": 0.8836, "step": 4941 }, { "epoch": 0.34380326272218165, "grad_norm": 1.4921875, "learning_rate": 0.0015265561220891948, "loss": 0.9956, "step": 4942 }, { "epoch": 0.3438728303593169, "grad_norm": 0.86328125, "learning_rate": 0.0015263645440446558, "loss": 0.7178, "step": 4943 }, { "epoch": 0.34394239799645204, "grad_norm": 1.0703125, "learning_rate": 0.0015261729392739786, "loss": 0.7417, "step": 4944 }, { "epoch": 0.34401196563358727, "grad_norm": 1.5078125, "learning_rate": 0.001525981307786891, "loss": 0.6659, "step": 4945 }, { "epoch": 0.34408153327072244, "grad_norm": 1.2109375, "learning_rate": 0.0015257896495931244, "loss": 1.1262, "step": 4946 }, { "epoch": 0.34415110090785767, "grad_norm": 0.9921875, "learning_rate": 0.001525597964702409, "loss": 1.0506, "step": 4947 }, { "epoch": 0.3442206685449929, "grad_norm": 1.421875, "learning_rate": 0.0015254062531244786, "loss": 1.0611, "step": 4948 }, { "epoch": 0.34429023618212806, "grad_norm": 1.0546875, "learning_rate": 0.0015252145148690666, "loss": 0.8728, "step": 4949 }, { "epoch": 0.3443598038192633, "grad_norm": 0.91015625, "learning_rate": 0.0015250227499459088, "loss": 0.8899, "step": 4950 }, { "epoch": 0.34442937145639846, "grad_norm": 0.9453125, "learning_rate": 0.0015248309583647424, "loss": 0.9292, "step": 4951 }, { "epoch": 0.3444989390935337, "grad_norm": 0.9609375, "learning_rate": 0.0015246391401353052, "loss": 0.7855, "step": 4952 }, { "epoch": 0.3445685067306689, "grad_norm": 0.7890625, "learning_rate": 0.0015244472952673368, "loss": 0.8608, "step": 4953 }, { "epoch": 0.3446380743678041, "grad_norm": 1.03125, "learning_rate": 0.0015242554237705778, "loss": 0.6868, "step": 4954 }, { "epoch": 0.3447076420049393, "grad_norm": 1.109375, "learning_rate": 0.0015240635256547712, "loss": 0.8707, "step": 4955 }, { "epoch": 0.34477720964207453, "grad_norm": 1.03125, "learning_rate": 0.00152387160092966, "loss": 0.968, "step": 4956 }, { "epoch": 0.3448467772792097, "grad_norm": 1.1875, "learning_rate": 0.0015236796496049898, "loss": 0.8383, "step": 4957 }, { "epoch": 0.34491634491634493, "grad_norm": 1.0625, "learning_rate": 0.0015234876716905062, "loss": 0.8117, "step": 4958 }, { "epoch": 0.3449859125534801, "grad_norm": 0.94140625, "learning_rate": 0.0015232956671959574, "loss": 0.9149, "step": 4959 }, { "epoch": 0.3450554801906153, "grad_norm": 1.34375, "learning_rate": 0.001523103636131092, "loss": 0.8582, "step": 4960 }, { "epoch": 0.34512504782775055, "grad_norm": 1.09375, "learning_rate": 0.001522911578505661, "loss": 0.7958, "step": 4961 }, { "epoch": 0.3451946154648857, "grad_norm": 0.9609375, "learning_rate": 0.0015227194943294154, "loss": 0.6385, "step": 4962 }, { "epoch": 0.34526418310202095, "grad_norm": 1.0625, "learning_rate": 0.0015225273836121085, "loss": 0.8825, "step": 4963 }, { "epoch": 0.3453337507391561, "grad_norm": 1.140625, "learning_rate": 0.001522335246363495, "loss": 0.8911, "step": 4964 }, { "epoch": 0.34540331837629135, "grad_norm": 1.15625, "learning_rate": 0.0015221430825933305, "loss": 0.7903, "step": 4965 }, { "epoch": 0.34547288601342657, "grad_norm": 0.953125, "learning_rate": 0.001521950892311372, "loss": 0.844, "step": 4966 }, { "epoch": 0.34554245365056174, "grad_norm": 1.265625, "learning_rate": 0.0015217586755273778, "loss": 0.9461, "step": 4967 }, { "epoch": 0.34561202128769697, "grad_norm": 1.109375, "learning_rate": 0.001521566432251108, "loss": 0.8895, "step": 4968 }, { "epoch": 0.3456815889248322, "grad_norm": 0.99609375, "learning_rate": 0.0015213741624923239, "loss": 0.6103, "step": 4969 }, { "epoch": 0.34575115656196737, "grad_norm": 1.5234375, "learning_rate": 0.0015211818662607872, "loss": 0.8747, "step": 4970 }, { "epoch": 0.3458207241991026, "grad_norm": 1.28125, "learning_rate": 0.001520989543566263, "loss": 0.7802, "step": 4971 }, { "epoch": 0.34589029183623776, "grad_norm": 0.953125, "learning_rate": 0.0015207971944185155, "loss": 0.8913, "step": 4972 }, { "epoch": 0.345959859473373, "grad_norm": 1.1171875, "learning_rate": 0.0015206048188273113, "loss": 0.9981, "step": 4973 }, { "epoch": 0.3460294271105082, "grad_norm": 1.265625, "learning_rate": 0.0015204124168024184, "loss": 0.9906, "step": 4974 }, { "epoch": 0.3460989947476434, "grad_norm": 1.2890625, "learning_rate": 0.0015202199883536064, "loss": 0.807, "step": 4975 }, { "epoch": 0.3461685623847786, "grad_norm": 1.4921875, "learning_rate": 0.0015200275334906453, "loss": 1.1194, "step": 4976 }, { "epoch": 0.3462381300219138, "grad_norm": 1.1171875, "learning_rate": 0.0015198350522233068, "loss": 0.9465, "step": 4977 }, { "epoch": 0.346307697659049, "grad_norm": 1.25, "learning_rate": 0.001519642544561365, "loss": 1.0371, "step": 4978 }, { "epoch": 0.34637726529618423, "grad_norm": 1.015625, "learning_rate": 0.0015194500105145936, "loss": 0.8513, "step": 4979 }, { "epoch": 0.3464468329333194, "grad_norm": 1.4609375, "learning_rate": 0.0015192574500927695, "loss": 0.8837, "step": 4980 }, { "epoch": 0.34651640057045463, "grad_norm": 1.234375, "learning_rate": 0.001519064863305669, "loss": 0.7, "step": 4981 }, { "epoch": 0.34658596820758986, "grad_norm": 1.1328125, "learning_rate": 0.0015188722501630711, "loss": 0.8776, "step": 4982 }, { "epoch": 0.346655535844725, "grad_norm": 0.890625, "learning_rate": 0.0015186796106747553, "loss": 0.7257, "step": 4983 }, { "epoch": 0.34672510348186025, "grad_norm": 1.015625, "learning_rate": 0.0015184869448505035, "loss": 0.911, "step": 4984 }, { "epoch": 0.3467946711189954, "grad_norm": 1.203125, "learning_rate": 0.0015182942527000982, "loss": 0.5819, "step": 4985 }, { "epoch": 0.34686423875613065, "grad_norm": 1.109375, "learning_rate": 0.0015181015342333227, "loss": 0.7151, "step": 4986 }, { "epoch": 0.3469338063932659, "grad_norm": 1.0859375, "learning_rate": 0.001517908789459963, "loss": 0.9317, "step": 4987 }, { "epoch": 0.34700337403040105, "grad_norm": 0.87890625, "learning_rate": 0.0015177160183898054, "loss": 0.7153, "step": 4988 }, { "epoch": 0.34707294166753627, "grad_norm": 1.171875, "learning_rate": 0.0015175232210326377, "loss": 0.8403, "step": 4989 }, { "epoch": 0.34714250930467144, "grad_norm": 1.140625, "learning_rate": 0.0015173303973982498, "loss": 0.8124, "step": 4990 }, { "epoch": 0.34721207694180667, "grad_norm": 1.640625, "learning_rate": 0.0015171375474964312, "loss": 0.7931, "step": 4991 }, { "epoch": 0.3472816445789419, "grad_norm": 1.25, "learning_rate": 0.001516944671336975, "loss": 1.0845, "step": 4992 }, { "epoch": 0.34735121221607707, "grad_norm": 0.9375, "learning_rate": 0.0015167517689296734, "loss": 0.8126, "step": 4993 }, { "epoch": 0.3474207798532123, "grad_norm": 1.1171875, "learning_rate": 0.0015165588402843225, "loss": 1.018, "step": 4994 }, { "epoch": 0.3474903474903475, "grad_norm": 0.8515625, "learning_rate": 0.0015163658854107165, "loss": 0.8798, "step": 4995 }, { "epoch": 0.3475599151274827, "grad_norm": 1.109375, "learning_rate": 0.0015161729043186541, "loss": 0.988, "step": 4996 }, { "epoch": 0.3476294827646179, "grad_norm": 0.9765625, "learning_rate": 0.001515979897017933, "loss": 0.8558, "step": 4997 }, { "epoch": 0.3476990504017531, "grad_norm": 1.15625, "learning_rate": 0.0015157868635183537, "loss": 1.0063, "step": 4998 }, { "epoch": 0.3477686180388883, "grad_norm": 1.40625, "learning_rate": 0.001515593803829717, "loss": 1.202, "step": 4999 }, { "epoch": 0.34783818567602354, "grad_norm": 1.390625, "learning_rate": 0.0015154007179618257, "loss": 1.1614, "step": 5000 }, { "epoch": 0.3479077533131587, "grad_norm": 1.15625, "learning_rate": 0.0015152076059244842, "loss": 0.6836, "step": 5001 }, { "epoch": 0.34797732095029393, "grad_norm": 1.0546875, "learning_rate": 0.0015150144677274966, "loss": 0.8049, "step": 5002 }, { "epoch": 0.3480468885874291, "grad_norm": 1.0234375, "learning_rate": 0.0015148213033806708, "loss": 1.0787, "step": 5003 }, { "epoch": 0.34811645622456433, "grad_norm": 1.0859375, "learning_rate": 0.001514628112893814, "loss": 0.781, "step": 5004 }, { "epoch": 0.34818602386169956, "grad_norm": 1.203125, "learning_rate": 0.0015144348962767352, "loss": 0.9506, "step": 5005 }, { "epoch": 0.3482555914988347, "grad_norm": 1.0234375, "learning_rate": 0.0015142416535392457, "loss": 0.8873, "step": 5006 }, { "epoch": 0.34832515913596995, "grad_norm": 1.0390625, "learning_rate": 0.0015140483846911566, "loss": 0.879, "step": 5007 }, { "epoch": 0.3483947267731052, "grad_norm": 1.2890625, "learning_rate": 0.001513855089742282, "loss": 0.969, "step": 5008 }, { "epoch": 0.34846429441024035, "grad_norm": 1.15625, "learning_rate": 0.0015136617687024354, "loss": 0.9758, "step": 5009 }, { "epoch": 0.3485338620473756, "grad_norm": 0.984375, "learning_rate": 0.0015134684215814338, "loss": 0.7726, "step": 5010 }, { "epoch": 0.34860342968451075, "grad_norm": 0.9921875, "learning_rate": 0.0015132750483890934, "loss": 0.853, "step": 5011 }, { "epoch": 0.34867299732164597, "grad_norm": 1.0625, "learning_rate": 0.0015130816491352333, "loss": 0.879, "step": 5012 }, { "epoch": 0.3487425649587812, "grad_norm": 1.1328125, "learning_rate": 0.0015128882238296733, "loss": 0.9025, "step": 5013 }, { "epoch": 0.34881213259591637, "grad_norm": 0.9921875, "learning_rate": 0.0015126947724822342, "loss": 0.7474, "step": 5014 }, { "epoch": 0.3488817002330516, "grad_norm": 1.0546875, "learning_rate": 0.001512501295102739, "loss": 0.5989, "step": 5015 }, { "epoch": 0.34895126787018677, "grad_norm": 1.2890625, "learning_rate": 0.0015123077917010108, "loss": 1.0787, "step": 5016 }, { "epoch": 0.349020835507322, "grad_norm": 1.2265625, "learning_rate": 0.0015121142622868758, "loss": 0.8695, "step": 5017 }, { "epoch": 0.3490904031444572, "grad_norm": 1.1953125, "learning_rate": 0.0015119207068701593, "loss": 0.9261, "step": 5018 }, { "epoch": 0.3491599707815924, "grad_norm": 0.96484375, "learning_rate": 0.0015117271254606898, "loss": 0.8961, "step": 5019 }, { "epoch": 0.3492295384187276, "grad_norm": 1.1796875, "learning_rate": 0.0015115335180682964, "loss": 0.6799, "step": 5020 }, { "epoch": 0.34929910605586284, "grad_norm": 1.046875, "learning_rate": 0.0015113398847028086, "loss": 0.7744, "step": 5021 }, { "epoch": 0.349368673692998, "grad_norm": 1.0, "learning_rate": 0.0015111462253740594, "loss": 1.0222, "step": 5022 }, { "epoch": 0.34943824133013324, "grad_norm": 1.0703125, "learning_rate": 0.0015109525400918806, "loss": 0.8946, "step": 5023 }, { "epoch": 0.3495078089672684, "grad_norm": 1.203125, "learning_rate": 0.0015107588288661078, "loss": 0.9303, "step": 5024 }, { "epoch": 0.34957737660440363, "grad_norm": 1.171875, "learning_rate": 0.0015105650917065759, "loss": 0.7738, "step": 5025 }, { "epoch": 0.34964694424153886, "grad_norm": 1.0546875, "learning_rate": 0.0015103713286231221, "loss": 0.9991, "step": 5026 }, { "epoch": 0.34971651187867403, "grad_norm": 0.94921875, "learning_rate": 0.0015101775396255848, "loss": 0.7473, "step": 5027 }, { "epoch": 0.34978607951580926, "grad_norm": 1.03125, "learning_rate": 0.0015099837247238032, "loss": 0.908, "step": 5028 }, { "epoch": 0.3498556471529444, "grad_norm": 1.375, "learning_rate": 0.0015097898839276191, "loss": 1.0346, "step": 5029 }, { "epoch": 0.34992521479007965, "grad_norm": 1.0703125, "learning_rate": 0.0015095960172468736, "loss": 0.7678, "step": 5030 }, { "epoch": 0.3499947824272149, "grad_norm": 0.9140625, "learning_rate": 0.0015094021246914117, "loss": 0.6541, "step": 5031 }, { "epoch": 0.35006435006435005, "grad_norm": 1.1640625, "learning_rate": 0.0015092082062710766, "loss": 0.829, "step": 5032 }, { "epoch": 0.3501339177014853, "grad_norm": 1.1171875, "learning_rate": 0.0015090142619957158, "loss": 1.0688, "step": 5033 }, { "epoch": 0.35020348533862045, "grad_norm": 1.0625, "learning_rate": 0.0015088202918751763, "loss": 1.0056, "step": 5034 }, { "epoch": 0.35027305297575567, "grad_norm": 1.2265625, "learning_rate": 0.0015086262959193074, "loss": 1.0583, "step": 5035 }, { "epoch": 0.3503426206128909, "grad_norm": 1.03125, "learning_rate": 0.0015084322741379585, "loss": 0.9359, "step": 5036 }, { "epoch": 0.35041218825002607, "grad_norm": 1.015625, "learning_rate": 0.0015082382265409811, "loss": 0.8333, "step": 5037 }, { "epoch": 0.3504817558871613, "grad_norm": 0.88671875, "learning_rate": 0.001508044153138229, "loss": 0.7506, "step": 5038 }, { "epoch": 0.3505513235242965, "grad_norm": 0.99609375, "learning_rate": 0.001507850053939555, "loss": 0.7149, "step": 5039 }, { "epoch": 0.3506208911614317, "grad_norm": 1.1484375, "learning_rate": 0.0015076559289548153, "loss": 0.9226, "step": 5040 }, { "epoch": 0.3506904587985669, "grad_norm": 1.0625, "learning_rate": 0.001507461778193866, "loss": 0.8957, "step": 5041 }, { "epoch": 0.3507600264357021, "grad_norm": 1.1171875, "learning_rate": 0.0015072676016665656, "loss": 0.8803, "step": 5042 }, { "epoch": 0.3508295940728373, "grad_norm": 0.86328125, "learning_rate": 0.0015070733993827732, "loss": 0.781, "step": 5043 }, { "epoch": 0.35089916170997254, "grad_norm": 1.171875, "learning_rate": 0.0015068791713523492, "loss": 1.0205, "step": 5044 }, { "epoch": 0.3509687293471077, "grad_norm": 0.98828125, "learning_rate": 0.0015066849175851562, "loss": 0.8004, "step": 5045 }, { "epoch": 0.35103829698424294, "grad_norm": 1.2265625, "learning_rate": 0.0015064906380910566, "loss": 0.9252, "step": 5046 }, { "epoch": 0.3511078646213781, "grad_norm": 1.0390625, "learning_rate": 0.0015062963328799155, "loss": 0.9361, "step": 5047 }, { "epoch": 0.35117743225851333, "grad_norm": 0.97265625, "learning_rate": 0.0015061020019615982, "loss": 0.7734, "step": 5048 }, { "epoch": 0.35124699989564856, "grad_norm": 1.1328125, "learning_rate": 0.0015059076453459727, "loss": 0.9989, "step": 5049 }, { "epoch": 0.35131656753278373, "grad_norm": 1.3046875, "learning_rate": 0.0015057132630429066, "loss": 0.9318, "step": 5050 }, { "epoch": 0.35138613516991896, "grad_norm": 1.0234375, "learning_rate": 0.00150551885506227, "loss": 0.9111, "step": 5051 }, { "epoch": 0.3514557028070542, "grad_norm": 0.953125, "learning_rate": 0.0015053244214139343, "loss": 0.8417, "step": 5052 }, { "epoch": 0.35152527044418935, "grad_norm": 1.2890625, "learning_rate": 0.001505129962107771, "loss": 1.3842, "step": 5053 }, { "epoch": 0.3515948380813246, "grad_norm": 1.0625, "learning_rate": 0.0015049354771536545, "loss": 0.8825, "step": 5054 }, { "epoch": 0.35166440571845975, "grad_norm": 1.1796875, "learning_rate": 0.0015047409665614594, "loss": 0.9776, "step": 5055 }, { "epoch": 0.351733973355595, "grad_norm": 1.2265625, "learning_rate": 0.0015045464303410623, "loss": 0.9288, "step": 5056 }, { "epoch": 0.3518035409927302, "grad_norm": 0.87109375, "learning_rate": 0.0015043518685023403, "loss": 0.6324, "step": 5057 }, { "epoch": 0.35187310862986537, "grad_norm": 0.80078125, "learning_rate": 0.0015041572810551727, "loss": 0.7409, "step": 5058 }, { "epoch": 0.3519426762670006, "grad_norm": 1.1328125, "learning_rate": 0.0015039626680094398, "loss": 0.8132, "step": 5059 }, { "epoch": 0.35201224390413577, "grad_norm": 1.09375, "learning_rate": 0.0015037680293750223, "loss": 1.0056, "step": 5060 }, { "epoch": 0.352081811541271, "grad_norm": 1.1875, "learning_rate": 0.0015035733651618038, "loss": 0.8673, "step": 5061 }, { "epoch": 0.3521513791784062, "grad_norm": 1.2890625, "learning_rate": 0.0015033786753796676, "loss": 0.8713, "step": 5062 }, { "epoch": 0.3522209468155414, "grad_norm": 1.265625, "learning_rate": 0.0015031839600385, "loss": 0.89, "step": 5063 }, { "epoch": 0.3522905144526766, "grad_norm": 1.1640625, "learning_rate": 0.0015029892191481867, "loss": 0.9799, "step": 5064 }, { "epoch": 0.35236008208981184, "grad_norm": 0.90625, "learning_rate": 0.001502794452718616, "loss": 0.58, "step": 5065 }, { "epoch": 0.352429649726947, "grad_norm": 1.1171875, "learning_rate": 0.0015025996607596777, "loss": 0.9915, "step": 5066 }, { "epoch": 0.35249921736408224, "grad_norm": 1.3828125, "learning_rate": 0.001502404843281262, "loss": 1.0942, "step": 5067 }, { "epoch": 0.3525687850012174, "grad_norm": 0.953125, "learning_rate": 0.0015022100002932606, "loss": 0.8471, "step": 5068 }, { "epoch": 0.35263835263835264, "grad_norm": 0.92578125, "learning_rate": 0.0015020151318055662, "loss": 0.7711, "step": 5069 }, { "epoch": 0.35270792027548786, "grad_norm": 1.0078125, "learning_rate": 0.0015018202378280746, "loss": 0.8061, "step": 5070 }, { "epoch": 0.35277748791262303, "grad_norm": 0.95703125, "learning_rate": 0.0015016253183706798, "loss": 0.8691, "step": 5071 }, { "epoch": 0.35284705554975826, "grad_norm": 1.265625, "learning_rate": 0.00150143037344328, "loss": 1.1209, "step": 5072 }, { "epoch": 0.35291662318689343, "grad_norm": 1.1796875, "learning_rate": 0.0015012354030557735, "loss": 1.1614, "step": 5073 }, { "epoch": 0.35298619082402866, "grad_norm": 0.84765625, "learning_rate": 0.0015010404072180595, "loss": 0.9, "step": 5074 }, { "epoch": 0.3530557584611639, "grad_norm": 1.140625, "learning_rate": 0.001500845385940039, "loss": 0.8916, "step": 5075 }, { "epoch": 0.35312532609829905, "grad_norm": 1.0546875, "learning_rate": 0.0015006503392316142, "loss": 0.8861, "step": 5076 }, { "epoch": 0.3531948937354343, "grad_norm": 1.3125, "learning_rate": 0.001500455267102689, "loss": 0.9147, "step": 5077 }, { "epoch": 0.3532644613725695, "grad_norm": 0.9609375, "learning_rate": 0.0015002601695631673, "loss": 0.9116, "step": 5078 }, { "epoch": 0.3533340290097047, "grad_norm": 1.1796875, "learning_rate": 0.001500065046622956, "loss": 0.7114, "step": 5079 }, { "epoch": 0.3534035966468399, "grad_norm": 1.25, "learning_rate": 0.0014998698982919621, "loss": 1.2384, "step": 5080 }, { "epoch": 0.35347316428397507, "grad_norm": 1.125, "learning_rate": 0.0014996747245800942, "loss": 0.9718, "step": 5081 }, { "epoch": 0.3535427319211103, "grad_norm": 0.953125, "learning_rate": 0.0014994795254972622, "loss": 0.9054, "step": 5082 }, { "epoch": 0.3536122995582455, "grad_norm": 1.328125, "learning_rate": 0.0014992843010533776, "loss": 0.7544, "step": 5083 }, { "epoch": 0.3536818671953807, "grad_norm": 1.171875, "learning_rate": 0.0014990890512583534, "loss": 1.0095, "step": 5084 }, { "epoch": 0.3537514348325159, "grad_norm": 0.97265625, "learning_rate": 0.0014988937761221018, "loss": 0.9885, "step": 5085 }, { "epoch": 0.3538210024696511, "grad_norm": 1.265625, "learning_rate": 0.0014986984756545393, "loss": 1.0343, "step": 5086 }, { "epoch": 0.3538905701067863, "grad_norm": 0.8828125, "learning_rate": 0.0014985031498655817, "loss": 0.7228, "step": 5087 }, { "epoch": 0.35396013774392154, "grad_norm": 1.28125, "learning_rate": 0.001498307798765147, "loss": 1.2578, "step": 5088 }, { "epoch": 0.3540297053810567, "grad_norm": 1.3671875, "learning_rate": 0.0014981124223631538, "loss": 0.9989, "step": 5089 }, { "epoch": 0.35409927301819194, "grad_norm": 1.2265625, "learning_rate": 0.0014979170206695226, "loss": 0.8049, "step": 5090 }, { "epoch": 0.35416884065532717, "grad_norm": 1.1015625, "learning_rate": 0.0014977215936941746, "loss": 0.9737, "step": 5091 }, { "epoch": 0.35423840829246234, "grad_norm": 1.015625, "learning_rate": 0.0014975261414470328, "loss": 0.8074, "step": 5092 }, { "epoch": 0.35430797592959756, "grad_norm": 0.84765625, "learning_rate": 0.0014973306639380214, "loss": 0.7584, "step": 5093 }, { "epoch": 0.35437754356673273, "grad_norm": 1.1484375, "learning_rate": 0.0014971351611770653, "loss": 0.9073, "step": 5094 }, { "epoch": 0.35444711120386796, "grad_norm": 1.3125, "learning_rate": 0.0014969396331740916, "loss": 0.9608, "step": 5095 }, { "epoch": 0.3545166788410032, "grad_norm": 1.234375, "learning_rate": 0.0014967440799390284, "loss": 1.0383, "step": 5096 }, { "epoch": 0.35458624647813836, "grad_norm": 1.0703125, "learning_rate": 0.0014965485014818043, "loss": 0.8309, "step": 5097 }, { "epoch": 0.3546558141152736, "grad_norm": 0.98046875, "learning_rate": 0.0014963528978123501, "loss": 0.6906, "step": 5098 }, { "epoch": 0.35472538175240875, "grad_norm": 1.015625, "learning_rate": 0.0014961572689405976, "loss": 0.7935, "step": 5099 }, { "epoch": 0.354794949389544, "grad_norm": 1.0234375, "learning_rate": 0.0014959616148764799, "loss": 0.7032, "step": 5100 }, { "epoch": 0.3548645170266792, "grad_norm": 1.2578125, "learning_rate": 0.001495765935629931, "loss": 0.8586, "step": 5101 }, { "epoch": 0.3549340846638144, "grad_norm": 1.3515625, "learning_rate": 0.0014955702312108867, "loss": 0.8868, "step": 5102 }, { "epoch": 0.3550036523009496, "grad_norm": 1.34375, "learning_rate": 0.0014953745016292844, "loss": 0.9855, "step": 5103 }, { "epoch": 0.3550732199380848, "grad_norm": 0.99609375, "learning_rate": 0.0014951787468950612, "loss": 0.9177, "step": 5104 }, { "epoch": 0.35514278757522, "grad_norm": 0.9921875, "learning_rate": 0.0014949829670181573, "loss": 0.6573, "step": 5105 }, { "epoch": 0.3552123552123552, "grad_norm": 0.90625, "learning_rate": 0.0014947871620085134, "loss": 0.8794, "step": 5106 }, { "epoch": 0.3552819228494904, "grad_norm": 1.0859375, "learning_rate": 0.0014945913318760715, "loss": 0.8961, "step": 5107 }, { "epoch": 0.3553514904866256, "grad_norm": 1.0234375, "learning_rate": 0.0014943954766307743, "loss": 0.8715, "step": 5108 }, { "epoch": 0.35542105812376085, "grad_norm": 1.2578125, "learning_rate": 0.0014941995962825668, "loss": 0.9822, "step": 5109 }, { "epoch": 0.355490625760896, "grad_norm": 1.4140625, "learning_rate": 0.0014940036908413948, "loss": 0.9308, "step": 5110 }, { "epoch": 0.35556019339803124, "grad_norm": 0.94140625, "learning_rate": 0.0014938077603172052, "loss": 0.6205, "step": 5111 }, { "epoch": 0.3556297610351664, "grad_norm": 1.2421875, "learning_rate": 0.0014936118047199467, "loss": 0.8679, "step": 5112 }, { "epoch": 0.35569932867230164, "grad_norm": 1.1328125, "learning_rate": 0.0014934158240595687, "loss": 0.913, "step": 5113 }, { "epoch": 0.35576889630943687, "grad_norm": 1.1484375, "learning_rate": 0.0014932198183460223, "loss": 0.7827, "step": 5114 }, { "epoch": 0.35583846394657204, "grad_norm": 1.015625, "learning_rate": 0.0014930237875892594, "loss": 0.7733, "step": 5115 }, { "epoch": 0.35590803158370726, "grad_norm": 0.9140625, "learning_rate": 0.0014928277317992338, "loss": 0.686, "step": 5116 }, { "epoch": 0.3559775992208425, "grad_norm": 1.1796875, "learning_rate": 0.0014926316509858996, "loss": 0.8916, "step": 5117 }, { "epoch": 0.35604716685797766, "grad_norm": 0.9140625, "learning_rate": 0.0014924355451592134, "loss": 0.9492, "step": 5118 }, { "epoch": 0.3561167344951129, "grad_norm": 0.8828125, "learning_rate": 0.0014922394143291322, "loss": 0.9, "step": 5119 }, { "epoch": 0.35618630213224806, "grad_norm": 1.21875, "learning_rate": 0.0014920432585056147, "loss": 0.9832, "step": 5120 }, { "epoch": 0.3562558697693833, "grad_norm": 0.9921875, "learning_rate": 0.001491847077698621, "loss": 0.7843, "step": 5121 }, { "epoch": 0.3563254374065185, "grad_norm": 1.0078125, "learning_rate": 0.001491650871918111, "loss": 0.829, "step": 5122 }, { "epoch": 0.3563950050436537, "grad_norm": 1.140625, "learning_rate": 0.0014914546411740487, "loss": 0.9153, "step": 5123 }, { "epoch": 0.3564645726807889, "grad_norm": 1.0078125, "learning_rate": 0.001491258385476396, "loss": 0.8968, "step": 5124 }, { "epoch": 0.3565341403179241, "grad_norm": 1.234375, "learning_rate": 0.001491062104835119, "loss": 0.9404, "step": 5125 }, { "epoch": 0.3566037079550593, "grad_norm": 1.421875, "learning_rate": 0.0014908657992601833, "loss": 1.2725, "step": 5126 }, { "epoch": 0.3566732755921945, "grad_norm": 1.0546875, "learning_rate": 0.0014906694687615567, "loss": 0.7339, "step": 5127 }, { "epoch": 0.3567428432293297, "grad_norm": 1.453125, "learning_rate": 0.0014904731133492076, "loss": 0.813, "step": 5128 }, { "epoch": 0.3568124108664649, "grad_norm": 1.0234375, "learning_rate": 0.001490276733033106, "loss": 0.8772, "step": 5129 }, { "epoch": 0.35688197850360015, "grad_norm": 1.046875, "learning_rate": 0.0014900803278232227, "loss": 0.7159, "step": 5130 }, { "epoch": 0.3569515461407353, "grad_norm": 0.94921875, "learning_rate": 0.0014898838977295311, "loss": 0.8305, "step": 5131 }, { "epoch": 0.35702111377787055, "grad_norm": 0.9296875, "learning_rate": 0.0014896874427620039, "loss": 0.8337, "step": 5132 }, { "epoch": 0.3570906814150057, "grad_norm": 1.0078125, "learning_rate": 0.0014894909629306168, "loss": 0.759, "step": 5133 }, { "epoch": 0.35716024905214094, "grad_norm": 1.296875, "learning_rate": 0.001489294458245346, "loss": 1.0684, "step": 5134 }, { "epoch": 0.35722981668927617, "grad_norm": 1.1484375, "learning_rate": 0.0014890979287161684, "loss": 0.8084, "step": 5135 }, { "epoch": 0.35729938432641134, "grad_norm": 0.9609375, "learning_rate": 0.0014889013743530632, "loss": 0.8792, "step": 5136 }, { "epoch": 0.35736895196354657, "grad_norm": 1.0625, "learning_rate": 0.001488704795166011, "loss": 0.8146, "step": 5137 }, { "epoch": 0.35743851960068174, "grad_norm": 1.140625, "learning_rate": 0.001488508191164992, "loss": 0.7602, "step": 5138 }, { "epoch": 0.35750808723781696, "grad_norm": 1.2578125, "learning_rate": 0.0014883115623599897, "loss": 1.1143, "step": 5139 }, { "epoch": 0.3575776548749522, "grad_norm": 0.92578125, "learning_rate": 0.0014881149087609873, "loss": 0.8189, "step": 5140 }, { "epoch": 0.35764722251208736, "grad_norm": 1.0859375, "learning_rate": 0.0014879182303779701, "loss": 0.8648, "step": 5141 }, { "epoch": 0.3577167901492226, "grad_norm": 1.546875, "learning_rate": 0.0014877215272209245, "loss": 1.0972, "step": 5142 }, { "epoch": 0.3577863577863578, "grad_norm": 1.109375, "learning_rate": 0.0014875247992998382, "loss": 0.8679, "step": 5143 }, { "epoch": 0.357855925423493, "grad_norm": 1.3046875, "learning_rate": 0.0014873280466247, "loss": 0.907, "step": 5144 }, { "epoch": 0.3579254930606282, "grad_norm": 0.859375, "learning_rate": 0.0014871312692054995, "loss": 0.8558, "step": 5145 }, { "epoch": 0.3579950606977634, "grad_norm": 0.78125, "learning_rate": 0.0014869344670522286, "loss": 0.607, "step": 5146 }, { "epoch": 0.3580646283348986, "grad_norm": 1.0, "learning_rate": 0.00148673764017488, "loss": 0.8488, "step": 5147 }, { "epoch": 0.35813419597203383, "grad_norm": 1.015625, "learning_rate": 0.0014865407885834472, "loss": 0.8936, "step": 5148 }, { "epoch": 0.358203763609169, "grad_norm": 0.98046875, "learning_rate": 0.0014863439122879253, "loss": 0.8855, "step": 5149 }, { "epoch": 0.3582733312463042, "grad_norm": 1.0234375, "learning_rate": 0.0014861470112983116, "loss": 0.8521, "step": 5150 }, { "epoch": 0.3583428988834394, "grad_norm": 1.125, "learning_rate": 0.0014859500856246024, "loss": 1.1338, "step": 5151 }, { "epoch": 0.3584124665205746, "grad_norm": 1.3046875, "learning_rate": 0.0014857531352767972, "loss": 1.0693, "step": 5152 }, { "epoch": 0.35848203415770985, "grad_norm": 0.97265625, "learning_rate": 0.0014855561602648965, "loss": 0.9105, "step": 5153 }, { "epoch": 0.358551601794845, "grad_norm": 1.125, "learning_rate": 0.0014853591605989013, "loss": 0.9043, "step": 5154 }, { "epoch": 0.35862116943198025, "grad_norm": 1.03125, "learning_rate": 0.0014851621362888142, "loss": 0.9153, "step": 5155 }, { "epoch": 0.3586907370691155, "grad_norm": 0.98046875, "learning_rate": 0.001484965087344639, "loss": 1.0623, "step": 5156 }, { "epoch": 0.35876030470625064, "grad_norm": 1.1796875, "learning_rate": 0.0014847680137763815, "loss": 1.1121, "step": 5157 }, { "epoch": 0.35882987234338587, "grad_norm": 1.0, "learning_rate": 0.0014845709155940474, "loss": 0.8297, "step": 5158 }, { "epoch": 0.35889943998052104, "grad_norm": 0.828125, "learning_rate": 0.0014843737928076448, "loss": 0.9511, "step": 5159 }, { "epoch": 0.35896900761765627, "grad_norm": 1.34375, "learning_rate": 0.0014841766454271824, "loss": 1.129, "step": 5160 }, { "epoch": 0.3590385752547915, "grad_norm": 1.1796875, "learning_rate": 0.0014839794734626704, "loss": 0.8025, "step": 5161 }, { "epoch": 0.35910814289192666, "grad_norm": 1.3984375, "learning_rate": 0.00148378227692412, "loss": 0.8423, "step": 5162 }, { "epoch": 0.3591777105290619, "grad_norm": 1.125, "learning_rate": 0.001483585055821544, "loss": 1.051, "step": 5163 }, { "epoch": 0.35924727816619706, "grad_norm": 1.3828125, "learning_rate": 0.0014833878101649565, "loss": 0.9794, "step": 5164 }, { "epoch": 0.3593168458033323, "grad_norm": 1.0703125, "learning_rate": 0.0014831905399643724, "loss": 0.9215, "step": 5165 }, { "epoch": 0.3593864134404675, "grad_norm": 1.0625, "learning_rate": 0.001482993245229808, "loss": 0.7982, "step": 5166 }, { "epoch": 0.3594559810776027, "grad_norm": 1.2421875, "learning_rate": 0.0014827959259712813, "loss": 0.9635, "step": 5167 }, { "epoch": 0.3595255487147379, "grad_norm": 1.2421875, "learning_rate": 0.0014825985821988108, "loss": 1.0276, "step": 5168 }, { "epoch": 0.35959511635187313, "grad_norm": 1.078125, "learning_rate": 0.001482401213922417, "loss": 0.9149, "step": 5169 }, { "epoch": 0.3596646839890083, "grad_norm": 1.265625, "learning_rate": 0.0014822038211521208, "loss": 1.3285, "step": 5170 }, { "epoch": 0.35973425162614353, "grad_norm": 0.88671875, "learning_rate": 0.0014820064038979452, "loss": 0.7815, "step": 5171 }, { "epoch": 0.3598038192632787, "grad_norm": 1.015625, "learning_rate": 0.0014818089621699139, "loss": 0.799, "step": 5172 }, { "epoch": 0.3598733869004139, "grad_norm": 1.1328125, "learning_rate": 0.0014816114959780517, "loss": 1.0051, "step": 5173 }, { "epoch": 0.35994295453754915, "grad_norm": 1.046875, "learning_rate": 0.0014814140053323855, "loss": 0.687, "step": 5174 }, { "epoch": 0.3600125221746843, "grad_norm": 1.125, "learning_rate": 0.0014812164902429426, "loss": 0.8509, "step": 5175 }, { "epoch": 0.36008208981181955, "grad_norm": 0.83203125, "learning_rate": 0.0014810189507197518, "loss": 0.561, "step": 5176 }, { "epoch": 0.3601516574489547, "grad_norm": 1.0859375, "learning_rate": 0.0014808213867728434, "loss": 0.8071, "step": 5177 }, { "epoch": 0.36022122508608995, "grad_norm": 0.92578125, "learning_rate": 0.0014806237984122481, "loss": 0.6641, "step": 5178 }, { "epoch": 0.3602907927232252, "grad_norm": 0.99609375, "learning_rate": 0.001480426185647999, "loss": 0.7432, "step": 5179 }, { "epoch": 0.36036036036036034, "grad_norm": 1.0234375, "learning_rate": 0.0014802285484901297, "loss": 0.8742, "step": 5180 }, { "epoch": 0.36042992799749557, "grad_norm": 1.265625, "learning_rate": 0.0014800308869486753, "loss": 0.886, "step": 5181 }, { "epoch": 0.3604994956346308, "grad_norm": 1.21875, "learning_rate": 0.0014798332010336722, "loss": 0.9504, "step": 5182 }, { "epoch": 0.36056906327176597, "grad_norm": 1.140625, "learning_rate": 0.0014796354907551574, "loss": 0.8872, "step": 5183 }, { "epoch": 0.3606386309089012, "grad_norm": 1.40625, "learning_rate": 0.00147943775612317, "loss": 1.0139, "step": 5184 }, { "epoch": 0.36070819854603636, "grad_norm": 1.015625, "learning_rate": 0.00147923999714775, "loss": 0.8477, "step": 5185 }, { "epoch": 0.3607777661831716, "grad_norm": 1.28125, "learning_rate": 0.0014790422138389384, "loss": 0.9271, "step": 5186 }, { "epoch": 0.3608473338203068, "grad_norm": 0.953125, "learning_rate": 0.0014788444062067776, "loss": 0.8271, "step": 5187 }, { "epoch": 0.360916901457442, "grad_norm": 1.2578125, "learning_rate": 0.0014786465742613116, "loss": 1.1401, "step": 5188 }, { "epoch": 0.3609864690945772, "grad_norm": 1.046875, "learning_rate": 0.001478448718012585, "loss": 0.8429, "step": 5189 }, { "epoch": 0.3610560367317124, "grad_norm": 1.15625, "learning_rate": 0.001478250837470644, "loss": 0.9735, "step": 5190 }, { "epoch": 0.3611256043688476, "grad_norm": 1.1171875, "learning_rate": 0.0014780529326455362, "loss": 0.8389, "step": 5191 }, { "epoch": 0.36119517200598283, "grad_norm": 1.2109375, "learning_rate": 0.00147785500354731, "loss": 0.831, "step": 5192 }, { "epoch": 0.361264739643118, "grad_norm": 1.140625, "learning_rate": 0.0014776570501860153, "loss": 1.0879, "step": 5193 }, { "epoch": 0.36133430728025323, "grad_norm": 0.89453125, "learning_rate": 0.0014774590725717032, "loss": 0.8871, "step": 5194 }, { "epoch": 0.36140387491738846, "grad_norm": 0.90234375, "learning_rate": 0.0014772610707144257, "loss": 0.9803, "step": 5195 }, { "epoch": 0.3614734425545236, "grad_norm": 1.0234375, "learning_rate": 0.001477063044624237, "loss": 0.8462, "step": 5196 }, { "epoch": 0.36154301019165885, "grad_norm": 1.546875, "learning_rate": 0.0014768649943111911, "loss": 1.0829, "step": 5197 }, { "epoch": 0.361612577828794, "grad_norm": 1.0859375, "learning_rate": 0.0014766669197853446, "loss": 0.7897, "step": 5198 }, { "epoch": 0.36168214546592925, "grad_norm": 1.03125, "learning_rate": 0.0014764688210567546, "loss": 0.7425, "step": 5199 }, { "epoch": 0.3617517131030645, "grad_norm": 1.03125, "learning_rate": 0.0014762706981354791, "loss": 0.9892, "step": 5200 }, { "epoch": 0.36182128074019965, "grad_norm": 1.0859375, "learning_rate": 0.0014760725510315784, "loss": 0.8421, "step": 5201 }, { "epoch": 0.3618908483773349, "grad_norm": 1.140625, "learning_rate": 0.001475874379755113, "loss": 0.8271, "step": 5202 }, { "epoch": 0.36196041601447004, "grad_norm": 1.1328125, "learning_rate": 0.0014756761843161452, "loss": 1.192, "step": 5203 }, { "epoch": 0.36202998365160527, "grad_norm": 0.83203125, "learning_rate": 0.0014754779647247385, "loss": 0.6577, "step": 5204 }, { "epoch": 0.3620995512887405, "grad_norm": 1.28125, "learning_rate": 0.0014752797209909572, "loss": 0.7588, "step": 5205 }, { "epoch": 0.36216911892587567, "grad_norm": 1.0625, "learning_rate": 0.0014750814531248673, "loss": 0.7133, "step": 5206 }, { "epoch": 0.3622386865630109, "grad_norm": 0.984375, "learning_rate": 0.001474883161136536, "loss": 0.9246, "step": 5207 }, { "epoch": 0.3623082542001461, "grad_norm": 0.8515625, "learning_rate": 0.001474684845036031, "loss": 0.6941, "step": 5208 }, { "epoch": 0.3623778218372813, "grad_norm": 1.0234375, "learning_rate": 0.0014744865048334221, "loss": 0.777, "step": 5209 }, { "epoch": 0.3624473894744165, "grad_norm": 1.15625, "learning_rate": 0.0014742881405387803, "loss": 0.7638, "step": 5210 }, { "epoch": 0.3625169571115517, "grad_norm": 1.078125, "learning_rate": 0.0014740897521621772, "loss": 0.8489, "step": 5211 }, { "epoch": 0.3625865247486869, "grad_norm": 1.28125, "learning_rate": 0.0014738913397136862, "loss": 0.8281, "step": 5212 }, { "epoch": 0.36265609238582214, "grad_norm": 1.0859375, "learning_rate": 0.0014736929032033816, "loss": 0.8747, "step": 5213 }, { "epoch": 0.3627256600229573, "grad_norm": 1.0234375, "learning_rate": 0.0014734944426413388, "loss": 0.7183, "step": 5214 }, { "epoch": 0.36279522766009253, "grad_norm": 1.03125, "learning_rate": 0.001473295958037635, "loss": 0.9431, "step": 5215 }, { "epoch": 0.3628647952972277, "grad_norm": 1.1484375, "learning_rate": 0.0014730974494023478, "loss": 0.9157, "step": 5216 }, { "epoch": 0.36293436293436293, "grad_norm": 1.078125, "learning_rate": 0.001472898916745557, "loss": 0.9453, "step": 5217 }, { "epoch": 0.36300393057149816, "grad_norm": 1.0234375, "learning_rate": 0.0014727003600773425, "loss": 0.6536, "step": 5218 }, { "epoch": 0.3630734982086333, "grad_norm": 1.0546875, "learning_rate": 0.0014725017794077863, "loss": 0.9153, "step": 5219 }, { "epoch": 0.36314306584576855, "grad_norm": 1.1484375, "learning_rate": 0.0014723031747469713, "loss": 0.8485, "step": 5220 }, { "epoch": 0.3632126334829038, "grad_norm": 1.0859375, "learning_rate": 0.001472104546104982, "loss": 0.7691, "step": 5221 }, { "epoch": 0.36328220112003895, "grad_norm": 1.1875, "learning_rate": 0.0014719058934919034, "loss": 0.9719, "step": 5222 }, { "epoch": 0.3633517687571742, "grad_norm": 1.28125, "learning_rate": 0.0014717072169178219, "loss": 0.9064, "step": 5223 }, { "epoch": 0.36342133639430935, "grad_norm": 1.1875, "learning_rate": 0.0014715085163928255, "loss": 0.9613, "step": 5224 }, { "epoch": 0.3634909040314446, "grad_norm": 1.0625, "learning_rate": 0.0014713097919270032, "loss": 1.0228, "step": 5225 }, { "epoch": 0.3635604716685798, "grad_norm": 1.1796875, "learning_rate": 0.0014711110435304455, "loss": 0.9179, "step": 5226 }, { "epoch": 0.36363003930571497, "grad_norm": 1.0625, "learning_rate": 0.0014709122712132433, "loss": 0.7914, "step": 5227 }, { "epoch": 0.3636996069428502, "grad_norm": 1.1328125, "learning_rate": 0.0014707134749854898, "loss": 0.7456, "step": 5228 }, { "epoch": 0.36376917457998537, "grad_norm": 1.15625, "learning_rate": 0.0014705146548572782, "loss": 1.0304, "step": 5229 }, { "epoch": 0.3638387422171206, "grad_norm": 1.0625, "learning_rate": 0.0014703158108387044, "loss": 1.0316, "step": 5230 }, { "epoch": 0.3639083098542558, "grad_norm": 1.078125, "learning_rate": 0.0014701169429398643, "loss": 0.8354, "step": 5231 }, { "epoch": 0.363977877491391, "grad_norm": 1.0625, "learning_rate": 0.0014699180511708553, "loss": 0.7559, "step": 5232 }, { "epoch": 0.3640474451285262, "grad_norm": 1.4140625, "learning_rate": 0.0014697191355417761, "loss": 1.1107, "step": 5233 }, { "epoch": 0.36411701276566144, "grad_norm": 1.203125, "learning_rate": 0.0014695201960627266, "loss": 0.8169, "step": 5234 }, { "epoch": 0.3641865804027966, "grad_norm": 1.0390625, "learning_rate": 0.0014693212327438086, "loss": 0.6953, "step": 5235 }, { "epoch": 0.36425614803993184, "grad_norm": 1.0859375, "learning_rate": 0.0014691222455951235, "loss": 0.9619, "step": 5236 }, { "epoch": 0.364325715677067, "grad_norm": 1.03125, "learning_rate": 0.0014689232346267755, "loss": 0.9999, "step": 5237 }, { "epoch": 0.36439528331420223, "grad_norm": 1.296875, "learning_rate": 0.0014687241998488695, "loss": 1.0531, "step": 5238 }, { "epoch": 0.36446485095133746, "grad_norm": 0.96484375, "learning_rate": 0.0014685251412715106, "loss": 0.7005, "step": 5239 }, { "epoch": 0.36453441858847263, "grad_norm": 1.078125, "learning_rate": 0.0014683260589048069, "loss": 0.7436, "step": 5240 }, { "epoch": 0.36460398622560786, "grad_norm": 1.3515625, "learning_rate": 0.0014681269527588663, "loss": 1.1294, "step": 5241 }, { "epoch": 0.364673553862743, "grad_norm": 0.9296875, "learning_rate": 0.001467927822843799, "loss": 0.7699, "step": 5242 }, { "epoch": 0.36474312149987825, "grad_norm": 1.109375, "learning_rate": 0.0014677286691697146, "loss": 0.827, "step": 5243 }, { "epoch": 0.3648126891370135, "grad_norm": 1.09375, "learning_rate": 0.0014675294917467269, "loss": 0.8862, "step": 5244 }, { "epoch": 0.36488225677414865, "grad_norm": 1.2890625, "learning_rate": 0.0014673302905849476, "loss": 0.9959, "step": 5245 }, { "epoch": 0.3649518244112839, "grad_norm": 0.96484375, "learning_rate": 0.0014671310656944915, "loss": 0.7926, "step": 5246 }, { "epoch": 0.3650213920484191, "grad_norm": 0.9609375, "learning_rate": 0.0014669318170854747, "loss": 0.8058, "step": 5247 }, { "epoch": 0.3650909596855543, "grad_norm": 0.96484375, "learning_rate": 0.0014667325447680136, "loss": 0.6047, "step": 5248 }, { "epoch": 0.3651605273226895, "grad_norm": 1.671875, "learning_rate": 0.0014665332487522262, "loss": 1.2125, "step": 5249 }, { "epoch": 0.36523009495982467, "grad_norm": 1.4296875, "learning_rate": 0.001466333929048232, "loss": 1.0838, "step": 5250 }, { "epoch": 0.3652996625969599, "grad_norm": 0.921875, "learning_rate": 0.0014661345856661517, "loss": 0.7864, "step": 5251 }, { "epoch": 0.3653692302340951, "grad_norm": 1.0390625, "learning_rate": 0.0014659352186161064, "loss": 1.1664, "step": 5252 }, { "epoch": 0.3654387978712303, "grad_norm": 1.3828125, "learning_rate": 0.0014657358279082193, "loss": 0.9814, "step": 5253 }, { "epoch": 0.3655083655083655, "grad_norm": 1.2734375, "learning_rate": 0.0014655364135526142, "loss": 0.9766, "step": 5254 }, { "epoch": 0.3655779331455007, "grad_norm": 1.0625, "learning_rate": 0.0014653369755594165, "loss": 0.8501, "step": 5255 }, { "epoch": 0.3656475007826359, "grad_norm": 1.15625, "learning_rate": 0.001465137513938753, "loss": 0.9529, "step": 5256 }, { "epoch": 0.36571706841977114, "grad_norm": 1.015625, "learning_rate": 0.0014649380287007504, "loss": 0.8705, "step": 5257 }, { "epoch": 0.3657866360569063, "grad_norm": 1.078125, "learning_rate": 0.0014647385198555388, "loss": 0.909, "step": 5258 }, { "epoch": 0.36585620369404154, "grad_norm": 1.1328125, "learning_rate": 0.001464538987413247, "loss": 1.0303, "step": 5259 }, { "epoch": 0.36592577133117676, "grad_norm": 1.328125, "learning_rate": 0.0014643394313840076, "loss": 1.0606, "step": 5260 }, { "epoch": 0.36599533896831193, "grad_norm": 1.125, "learning_rate": 0.0014641398517779517, "loss": 0.8019, "step": 5261 }, { "epoch": 0.36606490660544716, "grad_norm": 1.4140625, "learning_rate": 0.0014639402486052138, "loss": 0.9951, "step": 5262 }, { "epoch": 0.36613447424258233, "grad_norm": 1.234375, "learning_rate": 0.0014637406218759284, "loss": 0.9101, "step": 5263 }, { "epoch": 0.36620404187971756, "grad_norm": 1.0625, "learning_rate": 0.0014635409716002314, "loss": 0.8821, "step": 5264 }, { "epoch": 0.3662736095168528, "grad_norm": 1.0234375, "learning_rate": 0.001463341297788261, "loss": 0.753, "step": 5265 }, { "epoch": 0.36634317715398795, "grad_norm": 1.34375, "learning_rate": 0.0014631416004501543, "loss": 0.7014, "step": 5266 }, { "epoch": 0.3664127447911232, "grad_norm": 1.1953125, "learning_rate": 0.0014629418795960517, "loss": 1.1205, "step": 5267 }, { "epoch": 0.36648231242825835, "grad_norm": 1.140625, "learning_rate": 0.001462742135236094, "loss": 0.8285, "step": 5268 }, { "epoch": 0.3665518800653936, "grad_norm": 1.1875, "learning_rate": 0.001462542367380423, "loss": 0.9696, "step": 5269 }, { "epoch": 0.3666214477025288, "grad_norm": 1.203125, "learning_rate": 0.001462342576039182, "loss": 1.135, "step": 5270 }, { "epoch": 0.366691015339664, "grad_norm": 1.125, "learning_rate": 0.0014621427612225154, "loss": 0.8164, "step": 5271 }, { "epoch": 0.3667605829767992, "grad_norm": 1.078125, "learning_rate": 0.0014619429229405685, "loss": 0.8362, "step": 5272 }, { "epoch": 0.3668301506139344, "grad_norm": 1.0625, "learning_rate": 0.0014617430612034884, "loss": 0.6728, "step": 5273 }, { "epoch": 0.3668997182510696, "grad_norm": 1.171875, "learning_rate": 0.0014615431760214232, "loss": 0.7946, "step": 5274 }, { "epoch": 0.3669692858882048, "grad_norm": 0.97265625, "learning_rate": 0.0014613432674045216, "loss": 0.9709, "step": 5275 }, { "epoch": 0.36703885352534, "grad_norm": 1.1484375, "learning_rate": 0.0014611433353629347, "loss": 0.8764, "step": 5276 }, { "epoch": 0.3671084211624752, "grad_norm": 1.0390625, "learning_rate": 0.0014609433799068132, "loss": 0.7207, "step": 5277 }, { "epoch": 0.36717798879961044, "grad_norm": 1.15625, "learning_rate": 0.0014607434010463103, "loss": 0.8309, "step": 5278 }, { "epoch": 0.3672475564367456, "grad_norm": 1.1796875, "learning_rate": 0.0014605433987915797, "loss": 0.7943, "step": 5279 }, { "epoch": 0.36731712407388084, "grad_norm": 1.40625, "learning_rate": 0.0014603433731527767, "loss": 1.1244, "step": 5280 }, { "epoch": 0.367386691711016, "grad_norm": 1.15625, "learning_rate": 0.0014601433241400576, "loss": 0.9917, "step": 5281 }, { "epoch": 0.36745625934815124, "grad_norm": 0.92578125, "learning_rate": 0.0014599432517635796, "loss": 0.9159, "step": 5282 }, { "epoch": 0.36752582698528646, "grad_norm": 1.03125, "learning_rate": 0.0014597431560335018, "loss": 0.8739, "step": 5283 }, { "epoch": 0.36759539462242163, "grad_norm": 1.3203125, "learning_rate": 0.0014595430369599837, "loss": 1.2074, "step": 5284 }, { "epoch": 0.36766496225955686, "grad_norm": 1.21875, "learning_rate": 0.0014593428945531863, "loss": 0.9927, "step": 5285 }, { "epoch": 0.3677345298966921, "grad_norm": 1.2421875, "learning_rate": 0.0014591427288232722, "loss": 0.8677, "step": 5286 }, { "epoch": 0.36780409753382726, "grad_norm": 1.0390625, "learning_rate": 0.0014589425397804044, "loss": 1.0091, "step": 5287 }, { "epoch": 0.3678736651709625, "grad_norm": 0.97265625, "learning_rate": 0.0014587423274347478, "loss": 1.0127, "step": 5288 }, { "epoch": 0.36794323280809765, "grad_norm": 1.0859375, "learning_rate": 0.0014585420917964677, "loss": 1.1267, "step": 5289 }, { "epoch": 0.3680128004452329, "grad_norm": 1.1171875, "learning_rate": 0.001458341832875732, "loss": 0.7491, "step": 5290 }, { "epoch": 0.3680823680823681, "grad_norm": 1.0390625, "learning_rate": 0.0014581415506827078, "loss": 0.7425, "step": 5291 }, { "epoch": 0.3681519357195033, "grad_norm": 1.0, "learning_rate": 0.0014579412452275654, "loss": 0.721, "step": 5292 }, { "epoch": 0.3682215033566385, "grad_norm": 1.296875, "learning_rate": 0.0014577409165204742, "loss": 0.6941, "step": 5293 }, { "epoch": 0.3682910709937737, "grad_norm": 1.265625, "learning_rate": 0.0014575405645716065, "loss": 1.0086, "step": 5294 }, { "epoch": 0.3683606386309089, "grad_norm": 0.953125, "learning_rate": 0.0014573401893911353, "loss": 0.7428, "step": 5295 }, { "epoch": 0.3684302062680441, "grad_norm": 0.9296875, "learning_rate": 0.0014571397909892343, "loss": 0.7044, "step": 5296 }, { "epoch": 0.3684997739051793, "grad_norm": 1.3671875, "learning_rate": 0.001456939369376079, "loss": 0.9907, "step": 5297 }, { "epoch": 0.3685693415423145, "grad_norm": 0.9609375, "learning_rate": 0.0014567389245618454, "loss": 0.6828, "step": 5298 }, { "epoch": 0.36863890917944975, "grad_norm": 1.09375, "learning_rate": 0.001456538456556712, "loss": 1.0632, "step": 5299 }, { "epoch": 0.3687084768165849, "grad_norm": 1.09375, "learning_rate": 0.0014563379653708562, "loss": 0.7712, "step": 5300 }, { "epoch": 0.36877804445372014, "grad_norm": 1.28125, "learning_rate": 0.0014561374510144588, "loss": 1.0056, "step": 5301 }, { "epoch": 0.3688476120908553, "grad_norm": 1.09375, "learning_rate": 0.001455936913497701, "loss": 0.8905, "step": 5302 }, { "epoch": 0.36891717972799054, "grad_norm": 1.203125, "learning_rate": 0.0014557363528307646, "loss": 0.9535, "step": 5303 }, { "epoch": 0.36898674736512577, "grad_norm": 1.21875, "learning_rate": 0.0014555357690238333, "loss": 0.8259, "step": 5304 }, { "epoch": 0.36905631500226094, "grad_norm": 1.125, "learning_rate": 0.0014553351620870917, "loss": 0.7287, "step": 5305 }, { "epoch": 0.36912588263939616, "grad_norm": 1.421875, "learning_rate": 0.001455134532030726, "loss": 1.0635, "step": 5306 }, { "epoch": 0.36919545027653133, "grad_norm": 1.359375, "learning_rate": 0.0014549338788649223, "loss": 1.118, "step": 5307 }, { "epoch": 0.36926501791366656, "grad_norm": 1.3046875, "learning_rate": 0.0014547332025998693, "loss": 0.7552, "step": 5308 }, { "epoch": 0.3693345855508018, "grad_norm": 1.3203125, "learning_rate": 0.0014545325032457566, "loss": 0.8597, "step": 5309 }, { "epoch": 0.36940415318793696, "grad_norm": 1.0859375, "learning_rate": 0.0014543317808127741, "loss": 0.9539, "step": 5310 }, { "epoch": 0.3694737208250722, "grad_norm": 1.125, "learning_rate": 0.001454131035311114, "loss": 0.8004, "step": 5311 }, { "epoch": 0.3695432884622074, "grad_norm": 0.93359375, "learning_rate": 0.001453930266750969, "loss": 0.9075, "step": 5312 }, { "epoch": 0.3696128560993426, "grad_norm": 1.03125, "learning_rate": 0.001453729475142533, "loss": 0.7649, "step": 5313 }, { "epoch": 0.3696824237364778, "grad_norm": 1.0625, "learning_rate": 0.0014535286604960007, "loss": 0.8814, "step": 5314 }, { "epoch": 0.369751991373613, "grad_norm": 0.9921875, "learning_rate": 0.0014533278228215697, "loss": 0.6539, "step": 5315 }, { "epoch": 0.3698215590107482, "grad_norm": 1.1875, "learning_rate": 0.0014531269621294366, "loss": 0.967, "step": 5316 }, { "epoch": 0.36989112664788343, "grad_norm": 0.9375, "learning_rate": 0.0014529260784297998, "loss": 0.7468, "step": 5317 }, { "epoch": 0.3699606942850186, "grad_norm": 0.97265625, "learning_rate": 0.0014527251717328603, "loss": 0.7324, "step": 5318 }, { "epoch": 0.3700302619221538, "grad_norm": 1.1015625, "learning_rate": 0.0014525242420488178, "loss": 0.7423, "step": 5319 }, { "epoch": 0.370099829559289, "grad_norm": 0.85546875, "learning_rate": 0.001452323289387876, "loss": 0.6566, "step": 5320 }, { "epoch": 0.3701693971964242, "grad_norm": 1.2734375, "learning_rate": 0.0014521223137602367, "loss": 0.976, "step": 5321 }, { "epoch": 0.37023896483355945, "grad_norm": 0.7890625, "learning_rate": 0.0014519213151761056, "loss": 0.8218, "step": 5322 }, { "epoch": 0.3703085324706946, "grad_norm": 1.2265625, "learning_rate": 0.0014517202936456877, "loss": 0.9544, "step": 5323 }, { "epoch": 0.37037810010782984, "grad_norm": 1.125, "learning_rate": 0.0014515192491791904, "loss": 0.987, "step": 5324 }, { "epoch": 0.370447667744965, "grad_norm": 1.09375, "learning_rate": 0.0014513181817868215, "loss": 1.1131, "step": 5325 }, { "epoch": 0.37051723538210024, "grad_norm": 1.0390625, "learning_rate": 0.0014511170914787899, "loss": 0.7968, "step": 5326 }, { "epoch": 0.37058680301923547, "grad_norm": 1.2109375, "learning_rate": 0.0014509159782653063, "loss": 1.0672, "step": 5327 }, { "epoch": 0.37065637065637064, "grad_norm": 1.1796875, "learning_rate": 0.001450714842156582, "loss": 0.7135, "step": 5328 }, { "epoch": 0.37072593829350586, "grad_norm": 1.2109375, "learning_rate": 0.00145051368316283, "loss": 0.7866, "step": 5329 }, { "epoch": 0.3707955059306411, "grad_norm": 1.0234375, "learning_rate": 0.0014503125012942637, "loss": 0.9565, "step": 5330 }, { "epoch": 0.37086507356777626, "grad_norm": 1.140625, "learning_rate": 0.0014501112965610986, "loss": 0.9084, "step": 5331 }, { "epoch": 0.3709346412049115, "grad_norm": 1.0234375, "learning_rate": 0.0014499100689735504, "loss": 1.0043, "step": 5332 }, { "epoch": 0.37100420884204666, "grad_norm": 0.99609375, "learning_rate": 0.0014497088185418364, "loss": 0.8889, "step": 5333 }, { "epoch": 0.3710737764791819, "grad_norm": 0.96484375, "learning_rate": 0.0014495075452761758, "loss": 0.9213, "step": 5334 }, { "epoch": 0.3711433441163171, "grad_norm": 1.1015625, "learning_rate": 0.0014493062491867871, "loss": 0.8642, "step": 5335 }, { "epoch": 0.3712129117534523, "grad_norm": 1.2109375, "learning_rate": 0.0014491049302838923, "loss": 0.7673, "step": 5336 }, { "epoch": 0.3712824793905875, "grad_norm": 0.921875, "learning_rate": 0.0014489035885777125, "loss": 0.9089, "step": 5337 }, { "epoch": 0.3713520470277227, "grad_norm": 1.0390625, "learning_rate": 0.0014487022240784713, "loss": 0.8892, "step": 5338 }, { "epoch": 0.3714216146648579, "grad_norm": 1.1484375, "learning_rate": 0.0014485008367963927, "loss": 0.8305, "step": 5339 }, { "epoch": 0.37149118230199313, "grad_norm": 1.296875, "learning_rate": 0.0014482994267417022, "loss": 1.1563, "step": 5340 }, { "epoch": 0.3715607499391283, "grad_norm": 1.03125, "learning_rate": 0.0014480979939246266, "loss": 0.7855, "step": 5341 }, { "epoch": 0.3716303175762635, "grad_norm": 1.0234375, "learning_rate": 0.001447896538355393, "loss": 0.8052, "step": 5342 }, { "epoch": 0.37169988521339875, "grad_norm": 1.0234375, "learning_rate": 0.0014476950600442315, "loss": 0.8226, "step": 5343 }, { "epoch": 0.3717694528505339, "grad_norm": 1.078125, "learning_rate": 0.0014474935590013704, "loss": 0.7776, "step": 5344 }, { "epoch": 0.37183902048766915, "grad_norm": 1.0078125, "learning_rate": 0.0014472920352370426, "loss": 0.7737, "step": 5345 }, { "epoch": 0.3719085881248043, "grad_norm": 1.1484375, "learning_rate": 0.0014470904887614795, "loss": 0.8102, "step": 5346 }, { "epoch": 0.37197815576193954, "grad_norm": 1.0078125, "learning_rate": 0.001446888919584915, "loss": 0.8435, "step": 5347 }, { "epoch": 0.37204772339907477, "grad_norm": 0.98828125, "learning_rate": 0.0014466873277175839, "loss": 0.832, "step": 5348 }, { "epoch": 0.37211729103620994, "grad_norm": 1.234375, "learning_rate": 0.0014464857131697214, "loss": 0.8213, "step": 5349 }, { "epoch": 0.37218685867334517, "grad_norm": 1.1171875, "learning_rate": 0.001446284075951565, "loss": 0.6133, "step": 5350 }, { "epoch": 0.37225642631048034, "grad_norm": 1.234375, "learning_rate": 0.0014460824160733524, "loss": 0.9, "step": 5351 }, { "epoch": 0.37232599394761556, "grad_norm": 1.1484375, "learning_rate": 0.0014458807335453235, "loss": 0.6688, "step": 5352 }, { "epoch": 0.3723955615847508, "grad_norm": 0.84765625, "learning_rate": 0.0014456790283777182, "loss": 0.7935, "step": 5353 }, { "epoch": 0.37246512922188596, "grad_norm": 1.046875, "learning_rate": 0.001445477300580778, "loss": 0.8331, "step": 5354 }, { "epoch": 0.3725346968590212, "grad_norm": 1.0703125, "learning_rate": 0.001445275550164746, "loss": 1.0071, "step": 5355 }, { "epoch": 0.3726042644961564, "grad_norm": 1.15625, "learning_rate": 0.0014450737771398662, "loss": 0.6313, "step": 5356 }, { "epoch": 0.3726738321332916, "grad_norm": 1.328125, "learning_rate": 0.0014448719815163833, "loss": 0.9666, "step": 5357 }, { "epoch": 0.3727433997704268, "grad_norm": 1.21875, "learning_rate": 0.0014446701633045432, "loss": 0.989, "step": 5358 }, { "epoch": 0.372812967407562, "grad_norm": 1.109375, "learning_rate": 0.0014444683225145938, "loss": 0.795, "step": 5359 }, { "epoch": 0.3728825350446972, "grad_norm": 1.171875, "learning_rate": 0.001444266459156783, "loss": 0.9144, "step": 5360 }, { "epoch": 0.37295210268183243, "grad_norm": 1.0390625, "learning_rate": 0.0014440645732413607, "loss": 1.0063, "step": 5361 }, { "epoch": 0.3730216703189676, "grad_norm": 1.0234375, "learning_rate": 0.0014438626647785779, "loss": 0.8353, "step": 5362 }, { "epoch": 0.37309123795610283, "grad_norm": 1.0859375, "learning_rate": 0.0014436607337786859, "loss": 0.707, "step": 5363 }, { "epoch": 0.373160805593238, "grad_norm": 1.59375, "learning_rate": 0.0014434587802519383, "loss": 1.0309, "step": 5364 }, { "epoch": 0.3732303732303732, "grad_norm": 1.015625, "learning_rate": 0.0014432568042085886, "loss": 0.9463, "step": 5365 }, { "epoch": 0.37329994086750845, "grad_norm": 0.98828125, "learning_rate": 0.001443054805658893, "loss": 0.8642, "step": 5366 }, { "epoch": 0.3733695085046436, "grad_norm": 0.98828125, "learning_rate": 0.0014428527846131072, "loss": 0.9723, "step": 5367 }, { "epoch": 0.37343907614177885, "grad_norm": 0.89453125, "learning_rate": 0.0014426507410814895, "loss": 0.9318, "step": 5368 }, { "epoch": 0.3735086437789141, "grad_norm": 1.265625, "learning_rate": 0.001442448675074298, "loss": 0.8876, "step": 5369 }, { "epoch": 0.37357821141604924, "grad_norm": 1.078125, "learning_rate": 0.001442246586601793, "loss": 0.8524, "step": 5370 }, { "epoch": 0.37364777905318447, "grad_norm": 0.890625, "learning_rate": 0.0014420444756742354, "loss": 0.6487, "step": 5371 }, { "epoch": 0.37371734669031964, "grad_norm": 1.0859375, "learning_rate": 0.0014418423423018876, "loss": 0.7525, "step": 5372 }, { "epoch": 0.37378691432745487, "grad_norm": 1.3125, "learning_rate": 0.001441640186495013, "loss": 1.0137, "step": 5373 }, { "epoch": 0.3738564819645901, "grad_norm": 1.21875, "learning_rate": 0.0014414380082638748, "loss": 0.9516, "step": 5374 }, { "epoch": 0.37392604960172526, "grad_norm": 0.97265625, "learning_rate": 0.0014412358076187402, "loss": 0.7149, "step": 5375 }, { "epoch": 0.3739956172388605, "grad_norm": 0.984375, "learning_rate": 0.001441033584569875, "loss": 0.9754, "step": 5376 }, { "epoch": 0.37406518487599566, "grad_norm": 1.0234375, "learning_rate": 0.0014408313391275475, "loss": 0.6876, "step": 5377 }, { "epoch": 0.3741347525131309, "grad_norm": 1.0703125, "learning_rate": 0.0014406290713020265, "loss": 0.8897, "step": 5378 }, { "epoch": 0.3742043201502661, "grad_norm": 1.1875, "learning_rate": 0.0014404267811035823, "loss": 1.1495, "step": 5379 }, { "epoch": 0.3742738877874013, "grad_norm": 1.2734375, "learning_rate": 0.0014402244685424862, "loss": 0.9556, "step": 5380 }, { "epoch": 0.3743434554245365, "grad_norm": 0.9765625, "learning_rate": 0.00144002213362901, "loss": 0.975, "step": 5381 }, { "epoch": 0.37441302306167173, "grad_norm": 1.234375, "learning_rate": 0.0014398197763734282, "loss": 0.9633, "step": 5382 }, { "epoch": 0.3744825906988069, "grad_norm": 1.140625, "learning_rate": 0.0014396173967860149, "loss": 0.9289, "step": 5383 }, { "epoch": 0.37455215833594213, "grad_norm": 1.0703125, "learning_rate": 0.001439414994877046, "loss": 0.7651, "step": 5384 }, { "epoch": 0.3746217259730773, "grad_norm": 0.94140625, "learning_rate": 0.0014392125706567981, "loss": 0.7602, "step": 5385 }, { "epoch": 0.37469129361021253, "grad_norm": 0.921875, "learning_rate": 0.0014390101241355503, "loss": 0.5503, "step": 5386 }, { "epoch": 0.37476086124734775, "grad_norm": 1.015625, "learning_rate": 0.0014388076553235808, "loss": 0.9548, "step": 5387 }, { "epoch": 0.3748304288844829, "grad_norm": 0.86328125, "learning_rate": 0.0014386051642311705, "loss": 0.8298, "step": 5388 }, { "epoch": 0.37489999652161815, "grad_norm": 1.2265625, "learning_rate": 0.0014384026508686006, "loss": 0.9172, "step": 5389 }, { "epoch": 0.3749695641587533, "grad_norm": 1.0546875, "learning_rate": 0.0014382001152461537, "loss": 0.8015, "step": 5390 }, { "epoch": 0.37503913179588855, "grad_norm": 1.3125, "learning_rate": 0.0014379975573741135, "loss": 0.8311, "step": 5391 }, { "epoch": 0.3751086994330238, "grad_norm": 0.82421875, "learning_rate": 0.0014377949772627651, "loss": 0.703, "step": 5392 }, { "epoch": 0.37517826707015894, "grad_norm": 1.0859375, "learning_rate": 0.0014375923749223947, "loss": 0.8893, "step": 5393 }, { "epoch": 0.37524783470729417, "grad_norm": 1.03125, "learning_rate": 0.001437389750363289, "loss": 0.7522, "step": 5394 }, { "epoch": 0.3753174023444294, "grad_norm": 1.0703125, "learning_rate": 0.0014371871035957363, "loss": 0.9608, "step": 5395 }, { "epoch": 0.37538696998156457, "grad_norm": 1.3359375, "learning_rate": 0.0014369844346300265, "loss": 0.9194, "step": 5396 }, { "epoch": 0.3754565376186998, "grad_norm": 1.1484375, "learning_rate": 0.001436781743476449, "loss": 0.9093, "step": 5397 }, { "epoch": 0.37552610525583496, "grad_norm": 1.0703125, "learning_rate": 0.0014365790301452963, "loss": 0.8586, "step": 5398 }, { "epoch": 0.3755956728929702, "grad_norm": 1.265625, "learning_rate": 0.001436376294646861, "loss": 0.9752, "step": 5399 }, { "epoch": 0.3756652405301054, "grad_norm": 1.0625, "learning_rate": 0.001436173536991437, "loss": 0.8677, "step": 5400 }, { "epoch": 0.3757348081672406, "grad_norm": 1.0234375, "learning_rate": 0.0014359707571893194, "loss": 0.8918, "step": 5401 }, { "epoch": 0.3758043758043758, "grad_norm": 1.15625, "learning_rate": 0.0014357679552508041, "loss": 0.7776, "step": 5402 }, { "epoch": 0.375873943441511, "grad_norm": 1.078125, "learning_rate": 0.0014355651311861886, "loss": 0.8775, "step": 5403 }, { "epoch": 0.3759435110786462, "grad_norm": 1.21875, "learning_rate": 0.0014353622850057709, "loss": 1.008, "step": 5404 }, { "epoch": 0.37601307871578143, "grad_norm": 1.1484375, "learning_rate": 0.0014351594167198508, "loss": 0.8674, "step": 5405 }, { "epoch": 0.3760826463529166, "grad_norm": 1.078125, "learning_rate": 0.001434956526338729, "loss": 0.831, "step": 5406 }, { "epoch": 0.37615221399005183, "grad_norm": 1.15625, "learning_rate": 0.001434753613872707, "loss": 0.9915, "step": 5407 }, { "epoch": 0.37622178162718706, "grad_norm": 1.59375, "learning_rate": 0.001434550679332088, "loss": 1.0004, "step": 5408 }, { "epoch": 0.37629134926432223, "grad_norm": 1.046875, "learning_rate": 0.0014343477227271757, "loss": 0.7761, "step": 5409 }, { "epoch": 0.37636091690145745, "grad_norm": 1.2109375, "learning_rate": 0.0014341447440682754, "loss": 0.9584, "step": 5410 }, { "epoch": 0.3764304845385926, "grad_norm": 1.15625, "learning_rate": 0.001433941743365693, "loss": 0.7237, "step": 5411 }, { "epoch": 0.37650005217572785, "grad_norm": 1.2421875, "learning_rate": 0.0014337387206297364, "loss": 0.6604, "step": 5412 }, { "epoch": 0.3765696198128631, "grad_norm": 1.078125, "learning_rate": 0.0014335356758707137, "loss": 0.8516, "step": 5413 }, { "epoch": 0.37663918744999825, "grad_norm": 0.8984375, "learning_rate": 0.0014333326090989345, "loss": 0.85, "step": 5414 }, { "epoch": 0.3767087550871335, "grad_norm": 1.234375, "learning_rate": 0.0014331295203247095, "loss": 0.8663, "step": 5415 }, { "epoch": 0.37677832272426864, "grad_norm": 1.0234375, "learning_rate": 0.0014329264095583505, "loss": 0.7427, "step": 5416 }, { "epoch": 0.37684789036140387, "grad_norm": 1.1171875, "learning_rate": 0.0014327232768101708, "loss": 0.9801, "step": 5417 }, { "epoch": 0.3769174579985391, "grad_norm": 0.93359375, "learning_rate": 0.001432520122090484, "loss": 1.0844, "step": 5418 }, { "epoch": 0.37698702563567427, "grad_norm": 1.296875, "learning_rate": 0.0014323169454096057, "loss": 1.1017, "step": 5419 }, { "epoch": 0.3770565932728095, "grad_norm": 1.3984375, "learning_rate": 0.0014321137467778518, "loss": 0.9933, "step": 5420 }, { "epoch": 0.3771261609099447, "grad_norm": 1.03125, "learning_rate": 0.0014319105262055399, "loss": 0.974, "step": 5421 }, { "epoch": 0.3771957285470799, "grad_norm": 1.203125, "learning_rate": 0.0014317072837029883, "loss": 0.8098, "step": 5422 }, { "epoch": 0.3772652961842151, "grad_norm": 1.03125, "learning_rate": 0.001431504019280517, "loss": 0.7295, "step": 5423 }, { "epoch": 0.3773348638213503, "grad_norm": 0.8984375, "learning_rate": 0.0014313007329484462, "loss": 0.6766, "step": 5424 }, { "epoch": 0.3774044314584855, "grad_norm": 0.90625, "learning_rate": 0.0014310974247170984, "loss": 0.9285, "step": 5425 }, { "epoch": 0.37747399909562074, "grad_norm": 1.203125, "learning_rate": 0.0014308940945967964, "loss": 0.9051, "step": 5426 }, { "epoch": 0.3775435667327559, "grad_norm": 1.21875, "learning_rate": 0.001430690742597864, "loss": 0.9952, "step": 5427 }, { "epoch": 0.37761313436989113, "grad_norm": 1.046875, "learning_rate": 0.0014304873687306264, "loss": 1.0094, "step": 5428 }, { "epoch": 0.3776827020070263, "grad_norm": 1.28125, "learning_rate": 0.00143028397300541, "loss": 0.8287, "step": 5429 }, { "epoch": 0.37775226964416153, "grad_norm": 1.1015625, "learning_rate": 0.0014300805554325424, "loss": 0.6493, "step": 5430 }, { "epoch": 0.37782183728129676, "grad_norm": 1.1640625, "learning_rate": 0.001429877116022352, "loss": 0.8438, "step": 5431 }, { "epoch": 0.37789140491843193, "grad_norm": 1.0, "learning_rate": 0.0014296736547851684, "loss": 0.8506, "step": 5432 }, { "epoch": 0.37796097255556715, "grad_norm": 1.2109375, "learning_rate": 0.001429470171731322, "loss": 0.889, "step": 5433 }, { "epoch": 0.3780305401927024, "grad_norm": 1.109375, "learning_rate": 0.0014292666668711453, "loss": 0.8362, "step": 5434 }, { "epoch": 0.37810010782983755, "grad_norm": 1.1875, "learning_rate": 0.0014290631402149709, "loss": 0.7322, "step": 5435 }, { "epoch": 0.3781696754669728, "grad_norm": 1.0703125, "learning_rate": 0.0014288595917731329, "loss": 0.9391, "step": 5436 }, { "epoch": 0.37823924310410795, "grad_norm": 1.1328125, "learning_rate": 0.0014286560215559664, "loss": 0.7884, "step": 5437 }, { "epoch": 0.3783088107412432, "grad_norm": 1.21875, "learning_rate": 0.0014284524295738075, "loss": 0.8923, "step": 5438 }, { "epoch": 0.3783783783783784, "grad_norm": 0.94921875, "learning_rate": 0.001428248815836994, "loss": 0.8666, "step": 5439 }, { "epoch": 0.37844794601551357, "grad_norm": 1.2890625, "learning_rate": 0.001428045180355864, "loss": 0.9711, "step": 5440 }, { "epoch": 0.3785175136526488, "grad_norm": 1.0078125, "learning_rate": 0.0014278415231407575, "loss": 0.5071, "step": 5441 }, { "epoch": 0.37858708128978397, "grad_norm": 1.015625, "learning_rate": 0.0014276378442020148, "loss": 0.8519, "step": 5442 }, { "epoch": 0.3786566489269192, "grad_norm": 1.25, "learning_rate": 0.0014274341435499779, "loss": 1.1103, "step": 5443 }, { "epoch": 0.3787262165640544, "grad_norm": 1.0625, "learning_rate": 0.0014272304211949895, "loss": 1.0963, "step": 5444 }, { "epoch": 0.3787957842011896, "grad_norm": 1.203125, "learning_rate": 0.0014270266771473938, "loss": 0.914, "step": 5445 }, { "epoch": 0.3788653518383248, "grad_norm": 1.2890625, "learning_rate": 0.0014268229114175357, "loss": 0.9931, "step": 5446 }, { "epoch": 0.37893491947546004, "grad_norm": 1.375, "learning_rate": 0.0014266191240157617, "loss": 0.775, "step": 5447 }, { "epoch": 0.3790044871125952, "grad_norm": 1.03125, "learning_rate": 0.0014264153149524189, "loss": 0.9089, "step": 5448 }, { "epoch": 0.37907405474973044, "grad_norm": 1.0, "learning_rate": 0.0014262114842378555, "loss": 0.7882, "step": 5449 }, { "epoch": 0.3791436223868656, "grad_norm": 1.34375, "learning_rate": 0.0014260076318824211, "loss": 0.9675, "step": 5450 }, { "epoch": 0.37921319002400083, "grad_norm": 1.3828125, "learning_rate": 0.0014258037578964667, "loss": 0.8656, "step": 5451 }, { "epoch": 0.37928275766113606, "grad_norm": 0.9296875, "learning_rate": 0.0014255998622903433, "loss": 0.8968, "step": 5452 }, { "epoch": 0.37935232529827123, "grad_norm": 0.9921875, "learning_rate": 0.0014253959450744045, "loss": 0.8417, "step": 5453 }, { "epoch": 0.37942189293540646, "grad_norm": 1.2265625, "learning_rate": 0.0014251920062590036, "loss": 1.1655, "step": 5454 }, { "epoch": 0.37949146057254163, "grad_norm": 1.265625, "learning_rate": 0.0014249880458544956, "loss": 0.9787, "step": 5455 }, { "epoch": 0.37956102820967685, "grad_norm": 0.8125, "learning_rate": 0.001424784063871237, "loss": 0.4948, "step": 5456 }, { "epoch": 0.3796305958468121, "grad_norm": 1.0234375, "learning_rate": 0.0014245800603195846, "loss": 0.6736, "step": 5457 }, { "epoch": 0.37970016348394725, "grad_norm": 0.99609375, "learning_rate": 0.0014243760352098968, "loss": 0.9282, "step": 5458 }, { "epoch": 0.3797697311210825, "grad_norm": 1.40625, "learning_rate": 0.001424171988552533, "loss": 0.7448, "step": 5459 }, { "epoch": 0.3798392987582177, "grad_norm": 1.0625, "learning_rate": 0.0014239679203578532, "loss": 0.9497, "step": 5460 }, { "epoch": 0.3799088663953529, "grad_norm": 1.1328125, "learning_rate": 0.00142376383063622, "loss": 1.0226, "step": 5461 }, { "epoch": 0.3799784340324881, "grad_norm": 0.75390625, "learning_rate": 0.001423559719397995, "loss": 1.1281, "step": 5462 }, { "epoch": 0.38004800166962327, "grad_norm": 0.828125, "learning_rate": 0.0014233555866535424, "loss": 0.8493, "step": 5463 }, { "epoch": 0.3801175693067585, "grad_norm": 1.359375, "learning_rate": 0.0014231514324132269, "loss": 0.9384, "step": 5464 }, { "epoch": 0.3801871369438937, "grad_norm": 1.0625, "learning_rate": 0.0014229472566874147, "loss": 0.6505, "step": 5465 }, { "epoch": 0.3802567045810289, "grad_norm": 0.89453125, "learning_rate": 0.0014227430594864726, "loss": 0.7257, "step": 5466 }, { "epoch": 0.3803262722181641, "grad_norm": 1.359375, "learning_rate": 0.0014225388408207684, "loss": 1.0635, "step": 5467 }, { "epoch": 0.3803958398552993, "grad_norm": 1.09375, "learning_rate": 0.001422334600700672, "loss": 0.6463, "step": 5468 }, { "epoch": 0.3804654074924345, "grad_norm": 1.296875, "learning_rate": 0.0014221303391365532, "loss": 1.1023, "step": 5469 }, { "epoch": 0.38053497512956974, "grad_norm": 1.1640625, "learning_rate": 0.0014219260561387835, "loss": 0.7375, "step": 5470 }, { "epoch": 0.3806045427667049, "grad_norm": 1.2578125, "learning_rate": 0.0014217217517177353, "loss": 0.8715, "step": 5471 }, { "epoch": 0.38067411040384014, "grad_norm": 0.87109375, "learning_rate": 0.001421517425883782, "loss": 0.6879, "step": 5472 }, { "epoch": 0.38074367804097536, "grad_norm": 1.1484375, "learning_rate": 0.0014213130786472985, "loss": 0.891, "step": 5473 }, { "epoch": 0.38081324567811053, "grad_norm": 1.0625, "learning_rate": 0.0014211087100186605, "loss": 0.9219, "step": 5474 }, { "epoch": 0.38088281331524576, "grad_norm": 1.0390625, "learning_rate": 0.001420904320008245, "loss": 0.9867, "step": 5475 }, { "epoch": 0.38095238095238093, "grad_norm": 1.21875, "learning_rate": 0.0014206999086264292, "loss": 1.0281, "step": 5476 }, { "epoch": 0.38102194858951616, "grad_norm": 1.09375, "learning_rate": 0.0014204954758835929, "loss": 0.8644, "step": 5477 }, { "epoch": 0.3810915162266514, "grad_norm": 1.4140625, "learning_rate": 0.0014202910217901155, "loss": 0.914, "step": 5478 }, { "epoch": 0.38116108386378655, "grad_norm": 1.09375, "learning_rate": 0.0014200865463563786, "loss": 0.8536, "step": 5479 }, { "epoch": 0.3812306515009218, "grad_norm": 1.1640625, "learning_rate": 0.0014198820495927643, "loss": 1.0379, "step": 5480 }, { "epoch": 0.38130021913805695, "grad_norm": 1.1875, "learning_rate": 0.0014196775315096558, "loss": 0.879, "step": 5481 }, { "epoch": 0.3813697867751922, "grad_norm": 1.1484375, "learning_rate": 0.0014194729921174374, "loss": 0.9843, "step": 5482 }, { "epoch": 0.3814393544123274, "grad_norm": 1.2109375, "learning_rate": 0.0014192684314264952, "loss": 0.9935, "step": 5483 }, { "epoch": 0.3815089220494626, "grad_norm": 1.109375, "learning_rate": 0.001419063849447215, "loss": 0.9397, "step": 5484 }, { "epoch": 0.3815784896865978, "grad_norm": 1.53125, "learning_rate": 0.0014188592461899848, "loss": 0.958, "step": 5485 }, { "epoch": 0.381648057323733, "grad_norm": 1.1796875, "learning_rate": 0.0014186546216651932, "loss": 0.9617, "step": 5486 }, { "epoch": 0.3817176249608682, "grad_norm": 0.94140625, "learning_rate": 0.0014184499758832304, "loss": 0.8839, "step": 5487 }, { "epoch": 0.3817871925980034, "grad_norm": 1.1796875, "learning_rate": 0.0014182453088544867, "loss": 1.0621, "step": 5488 }, { "epoch": 0.3818567602351386, "grad_norm": 1.2109375, "learning_rate": 0.0014180406205893546, "loss": 1.0318, "step": 5489 }, { "epoch": 0.3819263278722738, "grad_norm": 1.03125, "learning_rate": 0.0014178359110982265, "loss": 0.7853, "step": 5490 }, { "epoch": 0.38199589550940904, "grad_norm": 1.015625, "learning_rate": 0.0014176311803914972, "loss": 0.9092, "step": 5491 }, { "epoch": 0.3820654631465442, "grad_norm": 1.1953125, "learning_rate": 0.0014174264284795614, "loss": 0.8226, "step": 5492 }, { "epoch": 0.38213503078367944, "grad_norm": 0.96484375, "learning_rate": 0.0014172216553728152, "loss": 0.8707, "step": 5493 }, { "epoch": 0.3822045984208146, "grad_norm": 1.078125, "learning_rate": 0.001417016861081657, "loss": 0.9307, "step": 5494 }, { "epoch": 0.38227416605794984, "grad_norm": 1.0859375, "learning_rate": 0.001416812045616484, "loss": 0.5553, "step": 5495 }, { "epoch": 0.38234373369508506, "grad_norm": 1.0703125, "learning_rate": 0.0014166072089876968, "loss": 0.9959, "step": 5496 }, { "epoch": 0.38241330133222023, "grad_norm": 1.03125, "learning_rate": 0.001416402351205695, "loss": 0.7066, "step": 5497 }, { "epoch": 0.38248286896935546, "grad_norm": 1.0234375, "learning_rate": 0.0014161974722808803, "loss": 0.7988, "step": 5498 }, { "epoch": 0.3825524366064907, "grad_norm": 1.3125, "learning_rate": 0.001415992572223656, "loss": 1.0347, "step": 5499 }, { "epoch": 0.38262200424362586, "grad_norm": 1.3515625, "learning_rate": 0.0014157876510444256, "loss": 0.923, "step": 5500 }, { "epoch": 0.3826915718807611, "grad_norm": 0.984375, "learning_rate": 0.0014155827087535943, "loss": 0.6956, "step": 5501 }, { "epoch": 0.38276113951789625, "grad_norm": 1.3125, "learning_rate": 0.0014153777453615678, "loss": 0.7743, "step": 5502 }, { "epoch": 0.3828307071550315, "grad_norm": 1.8046875, "learning_rate": 0.0014151727608787525, "loss": 1.0726, "step": 5503 }, { "epoch": 0.3829002747921667, "grad_norm": 1.1640625, "learning_rate": 0.0014149677553155575, "loss": 0.8967, "step": 5504 }, { "epoch": 0.3829698424293019, "grad_norm": 1.078125, "learning_rate": 0.0014147627286823915, "loss": 0.9015, "step": 5505 }, { "epoch": 0.3830394100664371, "grad_norm": 1.3828125, "learning_rate": 0.0014145576809896643, "loss": 0.7625, "step": 5506 }, { "epoch": 0.3831089777035723, "grad_norm": 1.15625, "learning_rate": 0.0014143526122477879, "loss": 0.8154, "step": 5507 }, { "epoch": 0.3831785453407075, "grad_norm": 1.0703125, "learning_rate": 0.0014141475224671743, "loss": 0.9021, "step": 5508 }, { "epoch": 0.3832481129778427, "grad_norm": 1.140625, "learning_rate": 0.0014139424116582364, "loss": 1.1319, "step": 5509 }, { "epoch": 0.3833176806149779, "grad_norm": 1.1640625, "learning_rate": 0.00141373727983139, "loss": 1.0978, "step": 5510 }, { "epoch": 0.3833872482521131, "grad_norm": 1.0625, "learning_rate": 0.0014135321269970497, "loss": 0.9746, "step": 5511 }, { "epoch": 0.38345681588924835, "grad_norm": 1.0625, "learning_rate": 0.0014133269531656323, "loss": 0.7934, "step": 5512 }, { "epoch": 0.3835263835263835, "grad_norm": 1.0625, "learning_rate": 0.0014131217583475558, "loss": 0.7595, "step": 5513 }, { "epoch": 0.38359595116351874, "grad_norm": 1.3046875, "learning_rate": 0.0014129165425532384, "loss": 1.0819, "step": 5514 }, { "epoch": 0.3836655188006539, "grad_norm": 1.2109375, "learning_rate": 0.0014127113057931003, "loss": 0.8422, "step": 5515 }, { "epoch": 0.38373508643778914, "grad_norm": 1.3203125, "learning_rate": 0.001412506048077562, "loss": 1.0966, "step": 5516 }, { "epoch": 0.38380465407492437, "grad_norm": 1.4609375, "learning_rate": 0.0014123007694170461, "loss": 1.1284, "step": 5517 }, { "epoch": 0.38387422171205954, "grad_norm": 1.09375, "learning_rate": 0.0014120954698219755, "loss": 0.7266, "step": 5518 }, { "epoch": 0.38394378934919476, "grad_norm": 0.7578125, "learning_rate": 0.0014118901493027738, "loss": 0.5555, "step": 5519 }, { "epoch": 0.38401335698632993, "grad_norm": 1.21875, "learning_rate": 0.0014116848078698663, "loss": 0.8231, "step": 5520 }, { "epoch": 0.38408292462346516, "grad_norm": 1.0, "learning_rate": 0.0014114794455336794, "loss": 0.8969, "step": 5521 }, { "epoch": 0.3841524922606004, "grad_norm": 1.4765625, "learning_rate": 0.0014112740623046403, "loss": 1.1414, "step": 5522 }, { "epoch": 0.38422205989773556, "grad_norm": 0.984375, "learning_rate": 0.0014110686581931772, "loss": 0.823, "step": 5523 }, { "epoch": 0.3842916275348708, "grad_norm": 1.0234375, "learning_rate": 0.0014108632332097198, "loss": 0.8634, "step": 5524 }, { "epoch": 0.384361195172006, "grad_norm": 0.96875, "learning_rate": 0.0014106577873646982, "loss": 0.9133, "step": 5525 }, { "epoch": 0.3844307628091412, "grad_norm": 1.2421875, "learning_rate": 0.001410452320668544, "loss": 0.9964, "step": 5526 }, { "epoch": 0.3845003304462764, "grad_norm": 1.234375, "learning_rate": 0.0014102468331316897, "loss": 1.0287, "step": 5527 }, { "epoch": 0.3845698980834116, "grad_norm": 0.98828125, "learning_rate": 0.001410041324764569, "loss": 0.8902, "step": 5528 }, { "epoch": 0.3846394657205468, "grad_norm": 1.265625, "learning_rate": 0.0014098357955776167, "loss": 1.0256, "step": 5529 }, { "epoch": 0.38470903335768203, "grad_norm": 1.046875, "learning_rate": 0.0014096302455812683, "loss": 0.858, "step": 5530 }, { "epoch": 0.3847786009948172, "grad_norm": 1.125, "learning_rate": 0.0014094246747859609, "loss": 0.997, "step": 5531 }, { "epoch": 0.3848481686319524, "grad_norm": 1.0546875, "learning_rate": 0.0014092190832021318, "loss": 0.7877, "step": 5532 }, { "epoch": 0.3849177362690876, "grad_norm": 1.2265625, "learning_rate": 0.001409013470840221, "loss": 1.1309, "step": 5533 }, { "epoch": 0.3849873039062228, "grad_norm": 1.078125, "learning_rate": 0.0014088078377106673, "loss": 0.7741, "step": 5534 }, { "epoch": 0.38505687154335805, "grad_norm": 1.34375, "learning_rate": 0.001408602183823912, "loss": 1.0019, "step": 5535 }, { "epoch": 0.3851264391804932, "grad_norm": 0.83984375, "learning_rate": 0.0014083965091903974, "loss": 0.8954, "step": 5536 }, { "epoch": 0.38519600681762844, "grad_norm": 1.1328125, "learning_rate": 0.0014081908138205664, "loss": 0.8182, "step": 5537 }, { "epoch": 0.38526557445476367, "grad_norm": 1.125, "learning_rate": 0.0014079850977248638, "loss": 1.0547, "step": 5538 }, { "epoch": 0.38533514209189884, "grad_norm": 1.140625, "learning_rate": 0.0014077793609137336, "loss": 1.1989, "step": 5539 }, { "epoch": 0.38540470972903407, "grad_norm": 0.7734375, "learning_rate": 0.0014075736033976236, "loss": 0.7847, "step": 5540 }, { "epoch": 0.38547427736616924, "grad_norm": 1.4140625, "learning_rate": 0.00140736782518698, "loss": 1.0591, "step": 5541 }, { "epoch": 0.38554384500330446, "grad_norm": 0.98046875, "learning_rate": 0.0014071620262922516, "loss": 0.8005, "step": 5542 }, { "epoch": 0.3856134126404397, "grad_norm": 1.3671875, "learning_rate": 0.0014069562067238874, "loss": 0.8348, "step": 5543 }, { "epoch": 0.38568298027757486, "grad_norm": 1.2265625, "learning_rate": 0.0014067503664923387, "loss": 0.655, "step": 5544 }, { "epoch": 0.3857525479147101, "grad_norm": 1.25, "learning_rate": 0.0014065445056080563, "loss": 0.8986, "step": 5545 }, { "epoch": 0.38582211555184526, "grad_norm": 1.0546875, "learning_rate": 0.001406338624081493, "loss": 0.7869, "step": 5546 }, { "epoch": 0.3858916831889805, "grad_norm": 1.1640625, "learning_rate": 0.0014061327219231025, "loss": 0.9504, "step": 5547 }, { "epoch": 0.3859612508261157, "grad_norm": 1.3515625, "learning_rate": 0.0014059267991433394, "loss": 0.9495, "step": 5548 }, { "epoch": 0.3860308184632509, "grad_norm": 0.93359375, "learning_rate": 0.00140572085575266, "loss": 0.7159, "step": 5549 }, { "epoch": 0.3861003861003861, "grad_norm": 1.1328125, "learning_rate": 0.00140551489176152, "loss": 0.8652, "step": 5550 }, { "epoch": 0.38616995373752133, "grad_norm": 1.15625, "learning_rate": 0.0014053089071803778, "loss": 0.919, "step": 5551 }, { "epoch": 0.3862395213746565, "grad_norm": 1.5546875, "learning_rate": 0.001405102902019692, "loss": 0.7285, "step": 5552 }, { "epoch": 0.38630908901179173, "grad_norm": 1.0859375, "learning_rate": 0.001404896876289923, "loss": 1.0848, "step": 5553 }, { "epoch": 0.3863786566489269, "grad_norm": 0.9921875, "learning_rate": 0.0014046908300015316, "loss": 0.8936, "step": 5554 }, { "epoch": 0.3864482242860621, "grad_norm": 1.0625, "learning_rate": 0.0014044847631649792, "loss": 0.9112, "step": 5555 }, { "epoch": 0.38651779192319735, "grad_norm": 1.140625, "learning_rate": 0.0014042786757907297, "loss": 0.8496, "step": 5556 }, { "epoch": 0.3865873595603325, "grad_norm": 1.1171875, "learning_rate": 0.0014040725678892466, "loss": 0.8048, "step": 5557 }, { "epoch": 0.38665692719746775, "grad_norm": 1.3828125, "learning_rate": 0.0014038664394709953, "loss": 1.0822, "step": 5558 }, { "epoch": 0.3867264948346029, "grad_norm": 1.421875, "learning_rate": 0.0014036602905464414, "loss": 0.8533, "step": 5559 }, { "epoch": 0.38679606247173814, "grad_norm": 0.953125, "learning_rate": 0.0014034541211260527, "loss": 0.7703, "step": 5560 }, { "epoch": 0.38686563010887337, "grad_norm": 1.0078125, "learning_rate": 0.0014032479312202977, "loss": 0.6636, "step": 5561 }, { "epoch": 0.38693519774600854, "grad_norm": 1.3046875, "learning_rate": 0.001403041720839645, "loss": 0.8638, "step": 5562 }, { "epoch": 0.38700476538314377, "grad_norm": 1.0078125, "learning_rate": 0.0014028354899945652, "loss": 0.8446, "step": 5563 }, { "epoch": 0.387074333020279, "grad_norm": 0.96484375, "learning_rate": 0.0014026292386955296, "loss": 0.724, "step": 5564 }, { "epoch": 0.38714390065741416, "grad_norm": 1.2265625, "learning_rate": 0.0014024229669530109, "loss": 1.009, "step": 5565 }, { "epoch": 0.3872134682945494, "grad_norm": 0.88671875, "learning_rate": 0.0014022166747774821, "loss": 0.6755, "step": 5566 }, { "epoch": 0.38728303593168456, "grad_norm": 1.2578125, "learning_rate": 0.0014020103621794177, "loss": 0.6224, "step": 5567 }, { "epoch": 0.3873526035688198, "grad_norm": 1.03125, "learning_rate": 0.001401804029169294, "loss": 0.9592, "step": 5568 }, { "epoch": 0.387422171205955, "grad_norm": 1.03125, "learning_rate": 0.001401597675757586, "loss": 0.6962, "step": 5569 }, { "epoch": 0.3874917388430902, "grad_norm": 1.0625, "learning_rate": 0.0014013913019547731, "loss": 0.9164, "step": 5570 }, { "epoch": 0.3875613064802254, "grad_norm": 1.09375, "learning_rate": 0.0014011849077713325, "loss": 0.8144, "step": 5571 }, { "epoch": 0.3876308741173606, "grad_norm": 1.453125, "learning_rate": 0.0014009784932177446, "loss": 1.1767, "step": 5572 }, { "epoch": 0.3877004417544958, "grad_norm": 1.1796875, "learning_rate": 0.0014007720583044901, "loss": 0.8022, "step": 5573 }, { "epoch": 0.38777000939163103, "grad_norm": 1.234375, "learning_rate": 0.0014005656030420502, "loss": 0.9998, "step": 5574 }, { "epoch": 0.3878395770287662, "grad_norm": 1.1328125, "learning_rate": 0.0014003591274409084, "loss": 0.8166, "step": 5575 }, { "epoch": 0.38790914466590143, "grad_norm": 1.46875, "learning_rate": 0.0014001526315115475, "loss": 0.8817, "step": 5576 }, { "epoch": 0.38797871230303665, "grad_norm": 0.99609375, "learning_rate": 0.0013999461152644536, "loss": 0.8195, "step": 5577 }, { "epoch": 0.3880482799401718, "grad_norm": 1.140625, "learning_rate": 0.001399739578710111, "loss": 1.1256, "step": 5578 }, { "epoch": 0.38811784757730705, "grad_norm": 1.109375, "learning_rate": 0.0013995330218590082, "loss": 0.744, "step": 5579 }, { "epoch": 0.3881874152144422, "grad_norm": 1.5703125, "learning_rate": 0.0013993264447216317, "loss": 1.0807, "step": 5580 }, { "epoch": 0.38825698285157745, "grad_norm": 1.328125, "learning_rate": 0.001399119847308471, "loss": 1.1585, "step": 5581 }, { "epoch": 0.3883265504887127, "grad_norm": 1.2578125, "learning_rate": 0.0013989132296300172, "loss": 0.9526, "step": 5582 }, { "epoch": 0.38839611812584784, "grad_norm": 1.1328125, "learning_rate": 0.0013987065916967595, "loss": 0.7696, "step": 5583 }, { "epoch": 0.38846568576298307, "grad_norm": 1.0234375, "learning_rate": 0.0013984999335191909, "loss": 1.1156, "step": 5584 }, { "epoch": 0.38853525340011824, "grad_norm": 1.0078125, "learning_rate": 0.0013982932551078041, "loss": 0.9445, "step": 5585 }, { "epoch": 0.38860482103725347, "grad_norm": 1.171875, "learning_rate": 0.0013980865564730935, "loss": 0.777, "step": 5586 }, { "epoch": 0.3886743886743887, "grad_norm": 0.859375, "learning_rate": 0.0013978798376255536, "loss": 0.9426, "step": 5587 }, { "epoch": 0.38874395631152386, "grad_norm": 0.95703125, "learning_rate": 0.0013976730985756818, "loss": 0.8967, "step": 5588 }, { "epoch": 0.3888135239486591, "grad_norm": 1.109375, "learning_rate": 0.0013974663393339739, "loss": 1.0173, "step": 5589 }, { "epoch": 0.3888830915857943, "grad_norm": 1.2734375, "learning_rate": 0.0013972595599109287, "loss": 0.864, "step": 5590 }, { "epoch": 0.3889526592229295, "grad_norm": 1.421875, "learning_rate": 0.0013970527603170458, "loss": 0.7937, "step": 5591 }, { "epoch": 0.3890222268600647, "grad_norm": 0.98828125, "learning_rate": 0.0013968459405628247, "loss": 0.7681, "step": 5592 }, { "epoch": 0.3890917944971999, "grad_norm": 1.03125, "learning_rate": 0.001396639100658767, "loss": 0.7632, "step": 5593 }, { "epoch": 0.3891613621343351, "grad_norm": 1.015625, "learning_rate": 0.001396432240615375, "loss": 0.955, "step": 5594 }, { "epoch": 0.38923092977147034, "grad_norm": 1.078125, "learning_rate": 0.0013962253604431524, "loss": 0.7488, "step": 5595 }, { "epoch": 0.3893004974086055, "grad_norm": 1.015625, "learning_rate": 0.0013960184601526024, "loss": 0.8588, "step": 5596 }, { "epoch": 0.38937006504574073, "grad_norm": 1.1015625, "learning_rate": 0.0013958115397542314, "loss": 0.7897, "step": 5597 }, { "epoch": 0.3894396326828759, "grad_norm": 1.2734375, "learning_rate": 0.0013956045992585457, "loss": 0.7896, "step": 5598 }, { "epoch": 0.38950920032001113, "grad_norm": 1.1953125, "learning_rate": 0.001395397638676052, "loss": 0.8627, "step": 5599 }, { "epoch": 0.38957876795714635, "grad_norm": 1.3046875, "learning_rate": 0.0013951906580172595, "loss": 0.908, "step": 5600 }, { "epoch": 0.3896483355942815, "grad_norm": 1.046875, "learning_rate": 0.0013949836572926771, "loss": 1.0089, "step": 5601 }, { "epoch": 0.38971790323141675, "grad_norm": 0.94921875, "learning_rate": 0.0013947766365128157, "loss": 0.8768, "step": 5602 }, { "epoch": 0.389787470868552, "grad_norm": 1.34375, "learning_rate": 0.001394569595688186, "loss": 1.0241, "step": 5603 }, { "epoch": 0.38985703850568715, "grad_norm": 1.1328125, "learning_rate": 0.0013943625348293014, "loss": 0.9728, "step": 5604 }, { "epoch": 0.3899266061428224, "grad_norm": 1.0546875, "learning_rate": 0.0013941554539466752, "loss": 0.8221, "step": 5605 }, { "epoch": 0.38999617377995754, "grad_norm": 1.1953125, "learning_rate": 0.0013939483530508213, "loss": 1.0314, "step": 5606 }, { "epoch": 0.39006574141709277, "grad_norm": 1.78125, "learning_rate": 0.001393741232152256, "loss": 1.1567, "step": 5607 }, { "epoch": 0.390135309054228, "grad_norm": 1.1171875, "learning_rate": 0.0013935340912614954, "loss": 0.7966, "step": 5608 }, { "epoch": 0.39020487669136317, "grad_norm": 1.296875, "learning_rate": 0.0013933269303890575, "loss": 1.0969, "step": 5609 }, { "epoch": 0.3902744443284984, "grad_norm": 1.1796875, "learning_rate": 0.00139311974954546, "loss": 0.8874, "step": 5610 }, { "epoch": 0.39034401196563356, "grad_norm": 1.234375, "learning_rate": 0.0013929125487412233, "loss": 0.7099, "step": 5611 }, { "epoch": 0.3904135796027688, "grad_norm": 1.109375, "learning_rate": 0.0013927053279868683, "loss": 0.9192, "step": 5612 }, { "epoch": 0.390483147239904, "grad_norm": 1.078125, "learning_rate": 0.0013924980872929153, "loss": 1.0022, "step": 5613 }, { "epoch": 0.3905527148770392, "grad_norm": 1.2421875, "learning_rate": 0.0013922908266698884, "loss": 0.9358, "step": 5614 }, { "epoch": 0.3906222825141744, "grad_norm": 1.109375, "learning_rate": 0.00139208354612831, "loss": 0.8857, "step": 5615 }, { "epoch": 0.3906918501513096, "grad_norm": 1.0625, "learning_rate": 0.0013918762456787061, "loss": 0.7615, "step": 5616 }, { "epoch": 0.3907614177884448, "grad_norm": 1.15625, "learning_rate": 0.0013916689253316013, "loss": 0.769, "step": 5617 }, { "epoch": 0.39083098542558004, "grad_norm": 1.0625, "learning_rate": 0.0013914615850975226, "loss": 0.9046, "step": 5618 }, { "epoch": 0.3909005530627152, "grad_norm": 1.4296875, "learning_rate": 0.0013912542249869978, "loss": 0.9263, "step": 5619 }, { "epoch": 0.39097012069985043, "grad_norm": 1.25, "learning_rate": 0.0013910468450105556, "loss": 1.1956, "step": 5620 }, { "epoch": 0.39103968833698566, "grad_norm": 0.91796875, "learning_rate": 0.0013908394451787255, "loss": 0.8908, "step": 5621 }, { "epoch": 0.39110925597412083, "grad_norm": 1.1875, "learning_rate": 0.0013906320255020384, "loss": 0.7833, "step": 5622 }, { "epoch": 0.39117882361125605, "grad_norm": 1.234375, "learning_rate": 0.001390424585991026, "loss": 1.0066, "step": 5623 }, { "epoch": 0.3912483912483912, "grad_norm": 1.109375, "learning_rate": 0.001390217126656221, "loss": 0.7541, "step": 5624 }, { "epoch": 0.39131795888552645, "grad_norm": 1.3046875, "learning_rate": 0.0013900096475081571, "loss": 0.8519, "step": 5625 }, { "epoch": 0.3913875265226617, "grad_norm": 0.99609375, "learning_rate": 0.0013898021485573688, "loss": 0.6573, "step": 5626 }, { "epoch": 0.39145709415979685, "grad_norm": 1.1640625, "learning_rate": 0.0013895946298143923, "loss": 0.8274, "step": 5627 }, { "epoch": 0.3915266617969321, "grad_norm": 1.2109375, "learning_rate": 0.0013893870912897648, "loss": 0.9457, "step": 5628 }, { "epoch": 0.39159622943406724, "grad_norm": 1.09375, "learning_rate": 0.001389179532994023, "loss": 0.8788, "step": 5629 }, { "epoch": 0.39166579707120247, "grad_norm": 0.9765625, "learning_rate": 0.0013889719549377063, "loss": 0.9339, "step": 5630 }, { "epoch": 0.3917353647083377, "grad_norm": 1.1171875, "learning_rate": 0.0013887643571313538, "loss": 0.8774, "step": 5631 }, { "epoch": 0.39180493234547287, "grad_norm": 0.90234375, "learning_rate": 0.0013885567395855072, "loss": 0.7744, "step": 5632 }, { "epoch": 0.3918744999826081, "grad_norm": 0.875, "learning_rate": 0.0013883491023107075, "loss": 0.846, "step": 5633 }, { "epoch": 0.3919440676197433, "grad_norm": 1.2890625, "learning_rate": 0.001388141445317498, "loss": 0.775, "step": 5634 }, { "epoch": 0.3920136352568785, "grad_norm": 1.015625, "learning_rate": 0.0013879337686164223, "loss": 0.7387, "step": 5635 }, { "epoch": 0.3920832028940137, "grad_norm": 1.015625, "learning_rate": 0.0013877260722180253, "loss": 0.8283, "step": 5636 }, { "epoch": 0.3921527705311489, "grad_norm": 1.015625, "learning_rate": 0.0013875183561328527, "loss": 0.9993, "step": 5637 }, { "epoch": 0.3922223381682841, "grad_norm": 1.25, "learning_rate": 0.001387310620371451, "loss": 1.0272, "step": 5638 }, { "epoch": 0.39229190580541934, "grad_norm": 1.2265625, "learning_rate": 0.0013871028649443682, "loss": 0.8058, "step": 5639 }, { "epoch": 0.3923614734425545, "grad_norm": 0.90234375, "learning_rate": 0.001386895089862153, "loss": 0.7725, "step": 5640 }, { "epoch": 0.39243104107968974, "grad_norm": 1.390625, "learning_rate": 0.0013866872951353553, "loss": 0.952, "step": 5641 }, { "epoch": 0.3925006087168249, "grad_norm": 1.0234375, "learning_rate": 0.0013864794807745258, "loss": 0.7073, "step": 5642 }, { "epoch": 0.39257017635396013, "grad_norm": 0.890625, "learning_rate": 0.0013862716467902163, "loss": 0.7186, "step": 5643 }, { "epoch": 0.39263974399109536, "grad_norm": 1.203125, "learning_rate": 0.0013860637931929797, "loss": 1.0442, "step": 5644 }, { "epoch": 0.39270931162823053, "grad_norm": 0.953125, "learning_rate": 0.0013858559199933693, "loss": 0.9521, "step": 5645 }, { "epoch": 0.39277887926536575, "grad_norm": 1.4609375, "learning_rate": 0.0013856480272019405, "loss": 0.933, "step": 5646 }, { "epoch": 0.392848446902501, "grad_norm": 1.4765625, "learning_rate": 0.001385440114829248, "loss": 0.6352, "step": 5647 }, { "epoch": 0.39291801453963615, "grad_norm": 1.1484375, "learning_rate": 0.0013852321828858498, "loss": 1.0762, "step": 5648 }, { "epoch": 0.3929875821767714, "grad_norm": 0.8828125, "learning_rate": 0.001385024231382303, "loss": 0.7916, "step": 5649 }, { "epoch": 0.39305714981390655, "grad_norm": 0.890625, "learning_rate": 0.001384816260329166, "loss": 0.8968, "step": 5650 }, { "epoch": 0.3931267174510418, "grad_norm": 1.125, "learning_rate": 0.0013846082697369995, "loss": 0.7204, "step": 5651 }, { "epoch": 0.393196285088177, "grad_norm": 1.3203125, "learning_rate": 0.0013844002596163634, "loss": 1.3161, "step": 5652 }, { "epoch": 0.39326585272531217, "grad_norm": 1.2265625, "learning_rate": 0.0013841922299778198, "loss": 0.7805, "step": 5653 }, { "epoch": 0.3933354203624474, "grad_norm": 1.0703125, "learning_rate": 0.0013839841808319306, "loss": 0.6772, "step": 5654 }, { "epoch": 0.39340498799958257, "grad_norm": 0.9375, "learning_rate": 0.0013837761121892607, "loss": 0.6694, "step": 5655 }, { "epoch": 0.3934745556367178, "grad_norm": 1.1640625, "learning_rate": 0.001383568024060374, "loss": 0.795, "step": 5656 }, { "epoch": 0.393544123273853, "grad_norm": 0.83203125, "learning_rate": 0.0013833599164558366, "loss": 0.8358, "step": 5657 }, { "epoch": 0.3936136909109882, "grad_norm": 0.88671875, "learning_rate": 0.0013831517893862146, "loss": 0.7724, "step": 5658 }, { "epoch": 0.3936832585481234, "grad_norm": 0.84375, "learning_rate": 0.001382943642862076, "loss": 0.7712, "step": 5659 }, { "epoch": 0.39375282618525864, "grad_norm": 1.078125, "learning_rate": 0.00138273547689399, "loss": 0.7206, "step": 5660 }, { "epoch": 0.3938223938223938, "grad_norm": 0.984375, "learning_rate": 0.001382527291492525, "loss": 0.7366, "step": 5661 }, { "epoch": 0.39389196145952904, "grad_norm": 0.90234375, "learning_rate": 0.0013823190866682526, "loss": 0.7126, "step": 5662 }, { "epoch": 0.3939615290966642, "grad_norm": 1.2109375, "learning_rate": 0.0013821108624317434, "loss": 0.8909, "step": 5663 }, { "epoch": 0.39403109673379944, "grad_norm": 0.8359375, "learning_rate": 0.0013819026187935708, "loss": 0.7864, "step": 5664 }, { "epoch": 0.39410066437093466, "grad_norm": 1.1953125, "learning_rate": 0.0013816943557643081, "loss": 0.8352, "step": 5665 }, { "epoch": 0.39417023200806983, "grad_norm": 1.0859375, "learning_rate": 0.0013814860733545303, "loss": 0.9868, "step": 5666 }, { "epoch": 0.39423979964520506, "grad_norm": 1.53125, "learning_rate": 0.0013812777715748125, "loss": 0.8523, "step": 5667 }, { "epoch": 0.39430936728234023, "grad_norm": 0.98828125, "learning_rate": 0.0013810694504357308, "loss": 0.7038, "step": 5668 }, { "epoch": 0.39437893491947545, "grad_norm": 1.546875, "learning_rate": 0.0013808611099478637, "loss": 1.205, "step": 5669 }, { "epoch": 0.3944485025566107, "grad_norm": 0.9140625, "learning_rate": 0.0013806527501217885, "loss": 0.7109, "step": 5670 }, { "epoch": 0.39451807019374585, "grad_norm": 1.03125, "learning_rate": 0.0013804443709680857, "loss": 0.6902, "step": 5671 }, { "epoch": 0.3945876378308811, "grad_norm": 0.9765625, "learning_rate": 0.001380235972497335, "loss": 0.7888, "step": 5672 }, { "epoch": 0.3946572054680163, "grad_norm": 1.109375, "learning_rate": 0.0013800275547201184, "loss": 0.8962, "step": 5673 }, { "epoch": 0.3947267731051515, "grad_norm": 1.125, "learning_rate": 0.001379819117647018, "loss": 0.8865, "step": 5674 }, { "epoch": 0.3947963407422867, "grad_norm": 1.046875, "learning_rate": 0.0013796106612886173, "loss": 0.7484, "step": 5675 }, { "epoch": 0.39486590837942187, "grad_norm": 0.921875, "learning_rate": 0.0013794021856555008, "loss": 0.735, "step": 5676 }, { "epoch": 0.3949354760165571, "grad_norm": 1.0546875, "learning_rate": 0.0013791936907582532, "loss": 0.7644, "step": 5677 }, { "epoch": 0.3950050436536923, "grad_norm": 1.2265625, "learning_rate": 0.0013789851766074614, "loss": 0.8093, "step": 5678 }, { "epoch": 0.3950746112908275, "grad_norm": 0.953125, "learning_rate": 0.0013787766432137127, "loss": 0.8287, "step": 5679 }, { "epoch": 0.3951441789279627, "grad_norm": 1.2265625, "learning_rate": 0.001378568090587595, "loss": 0.8146, "step": 5680 }, { "epoch": 0.3952137465650979, "grad_norm": 1.0546875, "learning_rate": 0.001378359518739698, "loss": 0.6935, "step": 5681 }, { "epoch": 0.3952833142022331, "grad_norm": 1.0390625, "learning_rate": 0.0013781509276806117, "loss": 1.1332, "step": 5682 }, { "epoch": 0.39535288183936834, "grad_norm": 1.140625, "learning_rate": 0.001377942317420927, "loss": 0.907, "step": 5683 }, { "epoch": 0.3954224494765035, "grad_norm": 1.078125, "learning_rate": 0.0013777336879712367, "loss": 0.8557, "step": 5684 }, { "epoch": 0.39549201711363874, "grad_norm": 1.15625, "learning_rate": 0.0013775250393421336, "loss": 0.7406, "step": 5685 }, { "epoch": 0.39556158475077396, "grad_norm": 1.0390625, "learning_rate": 0.0013773163715442118, "loss": 0.7507, "step": 5686 }, { "epoch": 0.39563115238790914, "grad_norm": 1.1484375, "learning_rate": 0.0013771076845880668, "loss": 0.9068, "step": 5687 }, { "epoch": 0.39570072002504436, "grad_norm": 1.1015625, "learning_rate": 0.0013768989784842941, "loss": 0.8867, "step": 5688 }, { "epoch": 0.39577028766217953, "grad_norm": 1.1875, "learning_rate": 0.001376690253243491, "loss": 0.9804, "step": 5689 }, { "epoch": 0.39583985529931476, "grad_norm": 1.328125, "learning_rate": 0.0013764815088762553, "loss": 1.0954, "step": 5690 }, { "epoch": 0.39590942293645, "grad_norm": 0.9921875, "learning_rate": 0.0013762727453931862, "loss": 0.7848, "step": 5691 }, { "epoch": 0.39597899057358515, "grad_norm": 1.5234375, "learning_rate": 0.0013760639628048838, "loss": 0.6177, "step": 5692 }, { "epoch": 0.3960485582107204, "grad_norm": 1.1484375, "learning_rate": 0.001375855161121949, "loss": 0.8519, "step": 5693 }, { "epoch": 0.39611812584785555, "grad_norm": 1.0625, "learning_rate": 0.0013756463403549835, "loss": 0.671, "step": 5694 }, { "epoch": 0.3961876934849908, "grad_norm": 0.96484375, "learning_rate": 0.00137543750051459, "loss": 0.8336, "step": 5695 }, { "epoch": 0.396257261122126, "grad_norm": 1.046875, "learning_rate": 0.0013752286416113728, "loss": 0.9169, "step": 5696 }, { "epoch": 0.3963268287592612, "grad_norm": 1.015625, "learning_rate": 0.0013750197636559363, "loss": 0.8185, "step": 5697 }, { "epoch": 0.3963963963963964, "grad_norm": 1.2109375, "learning_rate": 0.0013748108666588865, "loss": 1.1148, "step": 5698 }, { "epoch": 0.3964659640335316, "grad_norm": 0.953125, "learning_rate": 0.0013746019506308302, "loss": 0.903, "step": 5699 }, { "epoch": 0.3965355316706668, "grad_norm": 1.0625, "learning_rate": 0.001374393015582375, "loss": 0.9774, "step": 5700 }, { "epoch": 0.396605099307802, "grad_norm": 1.4921875, "learning_rate": 0.0013741840615241294, "loss": 1.2874, "step": 5701 }, { "epoch": 0.3966746669449372, "grad_norm": 1.4765625, "learning_rate": 0.001373975088466703, "loss": 0.7905, "step": 5702 }, { "epoch": 0.3967442345820724, "grad_norm": 1.21875, "learning_rate": 0.0013737660964207071, "loss": 0.8377, "step": 5703 }, { "epoch": 0.39681380221920765, "grad_norm": 1.1328125, "learning_rate": 0.0013735570853967522, "loss": 0.9901, "step": 5704 }, { "epoch": 0.3968833698563428, "grad_norm": 1.2421875, "learning_rate": 0.0013733480554054519, "loss": 0.8395, "step": 5705 }, { "epoch": 0.39695293749347804, "grad_norm": 1.53125, "learning_rate": 0.0013731390064574188, "loss": 0.8949, "step": 5706 }, { "epoch": 0.3970225051306132, "grad_norm": 1.0078125, "learning_rate": 0.0013729299385632676, "loss": 0.6607, "step": 5707 }, { "epoch": 0.39709207276774844, "grad_norm": 0.9453125, "learning_rate": 0.001372720851733614, "loss": 0.9082, "step": 5708 }, { "epoch": 0.39716164040488366, "grad_norm": 1.203125, "learning_rate": 0.0013725117459790744, "loss": 1.0577, "step": 5709 }, { "epoch": 0.39723120804201884, "grad_norm": 1.1953125, "learning_rate": 0.0013723026213102658, "loss": 0.6586, "step": 5710 }, { "epoch": 0.39730077567915406, "grad_norm": 1.40625, "learning_rate": 0.0013720934777378064, "loss": 0.8832, "step": 5711 }, { "epoch": 0.3973703433162893, "grad_norm": 1.25, "learning_rate": 0.001371884315272316, "loss": 0.9176, "step": 5712 }, { "epoch": 0.39743991095342446, "grad_norm": 1.234375, "learning_rate": 0.0013716751339244145, "loss": 0.9529, "step": 5713 }, { "epoch": 0.3975094785905597, "grad_norm": 1.171875, "learning_rate": 0.0013714659337047228, "loss": 0.9422, "step": 5714 }, { "epoch": 0.39757904622769485, "grad_norm": 1.28125, "learning_rate": 0.0013712567146238635, "loss": 0.8914, "step": 5715 }, { "epoch": 0.3976486138648301, "grad_norm": 1.2265625, "learning_rate": 0.0013710474766924596, "loss": 0.9733, "step": 5716 }, { "epoch": 0.3977181815019653, "grad_norm": 1.1875, "learning_rate": 0.0013708382199211348, "loss": 0.9387, "step": 5717 }, { "epoch": 0.3977877491391005, "grad_norm": 1.3359375, "learning_rate": 0.0013706289443205146, "loss": 0.9794, "step": 5718 }, { "epoch": 0.3978573167762357, "grad_norm": 1.1171875, "learning_rate": 0.0013704196499012247, "loss": 0.8667, "step": 5719 }, { "epoch": 0.3979268844133709, "grad_norm": 1.078125, "learning_rate": 0.0013702103366738919, "loss": 0.8185, "step": 5720 }, { "epoch": 0.3979964520505061, "grad_norm": 1.125, "learning_rate": 0.0013700010046491442, "loss": 1.0267, "step": 5721 }, { "epoch": 0.3980660196876413, "grad_norm": 1.0546875, "learning_rate": 0.0013697916538376106, "loss": 0.9751, "step": 5722 }, { "epoch": 0.3981355873247765, "grad_norm": 0.98828125, "learning_rate": 0.0013695822842499203, "loss": 0.6295, "step": 5723 }, { "epoch": 0.3982051549619117, "grad_norm": 1.2265625, "learning_rate": 0.001369372895896705, "loss": 1.0598, "step": 5724 }, { "epoch": 0.39827472259904695, "grad_norm": 1.1640625, "learning_rate": 0.0013691634887885954, "loss": 0.8897, "step": 5725 }, { "epoch": 0.3983442902361821, "grad_norm": 0.9296875, "learning_rate": 0.0013689540629362247, "loss": 0.6013, "step": 5726 }, { "epoch": 0.39841385787331735, "grad_norm": 1.2265625, "learning_rate": 0.0013687446183502264, "loss": 0.8765, "step": 5727 }, { "epoch": 0.3984834255104525, "grad_norm": 1.3515625, "learning_rate": 0.001368535155041235, "loss": 0.8201, "step": 5728 }, { "epoch": 0.39855299314758774, "grad_norm": 1.0, "learning_rate": 0.0013683256730198858, "loss": 0.6924, "step": 5729 }, { "epoch": 0.39862256078472297, "grad_norm": 1.3359375, "learning_rate": 0.0013681161722968157, "loss": 0.9522, "step": 5730 }, { "epoch": 0.39869212842185814, "grad_norm": 0.8515625, "learning_rate": 0.0013679066528826617, "loss": 0.7778, "step": 5731 }, { "epoch": 0.39876169605899336, "grad_norm": 1.3203125, "learning_rate": 0.001367697114788062, "loss": 1.1964, "step": 5732 }, { "epoch": 0.39883126369612854, "grad_norm": 1.078125, "learning_rate": 0.0013674875580236563, "loss": 0.7515, "step": 5733 }, { "epoch": 0.39890083133326376, "grad_norm": 0.984375, "learning_rate": 0.001367277982600085, "loss": 0.6181, "step": 5734 }, { "epoch": 0.398970398970399, "grad_norm": 1.34375, "learning_rate": 0.0013670683885279886, "loss": 0.9628, "step": 5735 }, { "epoch": 0.39903996660753416, "grad_norm": 1.2890625, "learning_rate": 0.0013668587758180095, "loss": 1.1417, "step": 5736 }, { "epoch": 0.3991095342446694, "grad_norm": 1.0703125, "learning_rate": 0.0013666491444807912, "loss": 0.9542, "step": 5737 }, { "epoch": 0.3991791018818046, "grad_norm": 1.046875, "learning_rate": 0.0013664394945269774, "loss": 0.8635, "step": 5738 }, { "epoch": 0.3992486695189398, "grad_norm": 1.4609375, "learning_rate": 0.0013662298259672129, "loss": 0.8736, "step": 5739 }, { "epoch": 0.399318237156075, "grad_norm": 1.25, "learning_rate": 0.0013660201388121438, "loss": 0.8686, "step": 5740 }, { "epoch": 0.3993878047932102, "grad_norm": 1.4140625, "learning_rate": 0.0013658104330724168, "loss": 1.0662, "step": 5741 }, { "epoch": 0.3994573724303454, "grad_norm": 0.9296875, "learning_rate": 0.00136560070875868, "loss": 0.6878, "step": 5742 }, { "epoch": 0.39952694006748063, "grad_norm": 1.296875, "learning_rate": 0.001365390965881582, "loss": 0.9694, "step": 5743 }, { "epoch": 0.3995965077046158, "grad_norm": 1.0234375, "learning_rate": 0.0013651812044517722, "loss": 0.8046, "step": 5744 }, { "epoch": 0.399666075341751, "grad_norm": 1.0859375, "learning_rate": 0.0013649714244799017, "loss": 0.8767, "step": 5745 }, { "epoch": 0.3997356429788862, "grad_norm": 1.171875, "learning_rate": 0.0013647616259766218, "loss": 0.809, "step": 5746 }, { "epoch": 0.3998052106160214, "grad_norm": 1.09375, "learning_rate": 0.001364551808952585, "loss": 0.96, "step": 5747 }, { "epoch": 0.39987477825315665, "grad_norm": 0.78515625, "learning_rate": 0.001364341973418445, "loss": 0.6902, "step": 5748 }, { "epoch": 0.3999443458902918, "grad_norm": 1.0859375, "learning_rate": 0.0013641321193848558, "loss": 0.8762, "step": 5749 }, { "epoch": 0.40001391352742705, "grad_norm": 1.1328125, "learning_rate": 0.0013639222468624732, "loss": 0.7761, "step": 5750 }, { "epoch": 0.40008348116456227, "grad_norm": 0.99609375, "learning_rate": 0.0013637123558619532, "loss": 0.6942, "step": 5751 }, { "epoch": 0.40015304880169744, "grad_norm": 1.0078125, "learning_rate": 0.0013635024463939528, "loss": 0.7257, "step": 5752 }, { "epoch": 0.40022261643883267, "grad_norm": 1.0234375, "learning_rate": 0.0013632925184691304, "loss": 0.9273, "step": 5753 }, { "epoch": 0.40029218407596784, "grad_norm": 1.421875, "learning_rate": 0.001363082572098145, "loss": 1.032, "step": 5754 }, { "epoch": 0.40036175171310306, "grad_norm": 1.3203125, "learning_rate": 0.0013628726072916568, "loss": 0.7682, "step": 5755 }, { "epoch": 0.4004313193502383, "grad_norm": 0.99609375, "learning_rate": 0.0013626626240603266, "loss": 0.8368, "step": 5756 }, { "epoch": 0.40050088698737346, "grad_norm": 1.0234375, "learning_rate": 0.0013624526224148162, "loss": 0.9481, "step": 5757 }, { "epoch": 0.4005704546245087, "grad_norm": 1.2109375, "learning_rate": 0.0013622426023657886, "loss": 0.9786, "step": 5758 }, { "epoch": 0.40064002226164386, "grad_norm": 1.40625, "learning_rate": 0.0013620325639239076, "loss": 0.852, "step": 5759 }, { "epoch": 0.4007095898987791, "grad_norm": 1.0546875, "learning_rate": 0.0013618225070998375, "loss": 0.9142, "step": 5760 }, { "epoch": 0.4007791575359143, "grad_norm": 1.015625, "learning_rate": 0.0013616124319042445, "loss": 0.8978, "step": 5761 }, { "epoch": 0.4008487251730495, "grad_norm": 1.28125, "learning_rate": 0.0013614023383477947, "loss": 1.0515, "step": 5762 }, { "epoch": 0.4009182928101847, "grad_norm": 1.1015625, "learning_rate": 0.0013611922264411558, "loss": 0.9575, "step": 5763 }, { "epoch": 0.40098786044731993, "grad_norm": 1.609375, "learning_rate": 0.0013609820961949961, "loss": 1.222, "step": 5764 }, { "epoch": 0.4010574280844551, "grad_norm": 1.125, "learning_rate": 0.0013607719476199853, "loss": 0.9202, "step": 5765 }, { "epoch": 0.40112699572159033, "grad_norm": 1.0625, "learning_rate": 0.0013605617807267933, "loss": 1.0188, "step": 5766 }, { "epoch": 0.4011965633587255, "grad_norm": 1.1484375, "learning_rate": 0.0013603515955260912, "loss": 0.8969, "step": 5767 }, { "epoch": 0.4012661309958607, "grad_norm": 1.0625, "learning_rate": 0.0013601413920285516, "loss": 0.6961, "step": 5768 }, { "epoch": 0.40133569863299595, "grad_norm": 1.2421875, "learning_rate": 0.0013599311702448473, "loss": 0.7371, "step": 5769 }, { "epoch": 0.4014052662701311, "grad_norm": 1.1328125, "learning_rate": 0.0013597209301856525, "loss": 0.7546, "step": 5770 }, { "epoch": 0.40147483390726635, "grad_norm": 1.28125, "learning_rate": 0.0013595106718616418, "loss": 0.9842, "step": 5771 }, { "epoch": 0.4015444015444015, "grad_norm": 1.1015625, "learning_rate": 0.0013593003952834914, "loss": 0.9441, "step": 5772 }, { "epoch": 0.40161396918153675, "grad_norm": 1.5859375, "learning_rate": 0.0013590901004618776, "loss": 0.6869, "step": 5773 }, { "epoch": 0.40168353681867197, "grad_norm": 1.234375, "learning_rate": 0.0013588797874074792, "loss": 0.751, "step": 5774 }, { "epoch": 0.40175310445580714, "grad_norm": 1.3828125, "learning_rate": 0.0013586694561309736, "loss": 1.1005, "step": 5775 }, { "epoch": 0.40182267209294237, "grad_norm": 1.1484375, "learning_rate": 0.0013584591066430408, "loss": 1.0155, "step": 5776 }, { "epoch": 0.4018922397300776, "grad_norm": 1.25, "learning_rate": 0.0013582487389543615, "loss": 1.0699, "step": 5777 }, { "epoch": 0.40196180736721276, "grad_norm": 0.828125, "learning_rate": 0.001358038353075617, "loss": 0.9256, "step": 5778 }, { "epoch": 0.402031375004348, "grad_norm": 1.3046875, "learning_rate": 0.0013578279490174892, "loss": 0.9979, "step": 5779 }, { "epoch": 0.40210094264148316, "grad_norm": 1.0703125, "learning_rate": 0.0013576175267906619, "loss": 0.8082, "step": 5780 }, { "epoch": 0.4021705102786184, "grad_norm": 1.0546875, "learning_rate": 0.0013574070864058193, "loss": 0.7615, "step": 5781 }, { "epoch": 0.4022400779157536, "grad_norm": 1.3359375, "learning_rate": 0.001357196627873646, "loss": 1.0908, "step": 5782 }, { "epoch": 0.4023096455528888, "grad_norm": 1.0, "learning_rate": 0.0013569861512048285, "loss": 0.712, "step": 5783 }, { "epoch": 0.402379213190024, "grad_norm": 1.2578125, "learning_rate": 0.0013567756564100537, "loss": 0.9485, "step": 5784 }, { "epoch": 0.4024487808271592, "grad_norm": 1.140625, "learning_rate": 0.0013565651435000093, "loss": 0.986, "step": 5785 }, { "epoch": 0.4025183484642944, "grad_norm": 0.96484375, "learning_rate": 0.001356354612485384, "loss": 0.8165, "step": 5786 }, { "epoch": 0.40258791610142963, "grad_norm": 1.203125, "learning_rate": 0.0013561440633768679, "loss": 1.107, "step": 5787 }, { "epoch": 0.4026574837385648, "grad_norm": 1.359375, "learning_rate": 0.001355933496185151, "loss": 0.9454, "step": 5788 }, { "epoch": 0.40272705137570003, "grad_norm": 1.3359375, "learning_rate": 0.0013557229109209252, "loss": 0.877, "step": 5789 }, { "epoch": 0.40279661901283526, "grad_norm": 0.99609375, "learning_rate": 0.0013555123075948835, "loss": 0.8719, "step": 5790 }, { "epoch": 0.4028661866499704, "grad_norm": 0.97265625, "learning_rate": 0.0013553016862177182, "loss": 0.871, "step": 5791 }, { "epoch": 0.40293575428710565, "grad_norm": 1.1796875, "learning_rate": 0.0013550910468001244, "loss": 0.9059, "step": 5792 }, { "epoch": 0.4030053219242408, "grad_norm": 1.1328125, "learning_rate": 0.0013548803893527971, "loss": 0.8285, "step": 5793 }, { "epoch": 0.40307488956137605, "grad_norm": 1.390625, "learning_rate": 0.0013546697138864321, "loss": 1.0612, "step": 5794 }, { "epoch": 0.4031444571985113, "grad_norm": 1.3671875, "learning_rate": 0.001354459020411727, "loss": 0.7523, "step": 5795 }, { "epoch": 0.40321402483564645, "grad_norm": 1.3984375, "learning_rate": 0.0013542483089393788, "loss": 0.9797, "step": 5796 }, { "epoch": 0.40328359247278167, "grad_norm": 1.0546875, "learning_rate": 0.0013540375794800876, "loss": 0.8473, "step": 5797 }, { "epoch": 0.40335316010991684, "grad_norm": 1.1875, "learning_rate": 0.0013538268320445526, "loss": 0.9444, "step": 5798 }, { "epoch": 0.40342272774705207, "grad_norm": 1.234375, "learning_rate": 0.0013536160666434746, "loss": 0.7539, "step": 5799 }, { "epoch": 0.4034922953841873, "grad_norm": 1.140625, "learning_rate": 0.0013534052832875547, "loss": 0.7358, "step": 5800 }, { "epoch": 0.40356186302132246, "grad_norm": 0.92578125, "learning_rate": 0.001353194481987496, "loss": 0.6306, "step": 5801 }, { "epoch": 0.4036314306584577, "grad_norm": 0.88671875, "learning_rate": 0.0013529836627540015, "loss": 0.7535, "step": 5802 }, { "epoch": 0.4037009982955929, "grad_norm": 1.40625, "learning_rate": 0.0013527728255977758, "loss": 1.0439, "step": 5803 }, { "epoch": 0.4037705659327281, "grad_norm": 1.0078125, "learning_rate": 0.0013525619705295245, "loss": 0.6658, "step": 5804 }, { "epoch": 0.4038401335698633, "grad_norm": 0.96484375, "learning_rate": 0.001352351097559953, "loss": 0.8662, "step": 5805 }, { "epoch": 0.4039097012069985, "grad_norm": 1.1640625, "learning_rate": 0.0013521402066997692, "loss": 0.8351, "step": 5806 }, { "epoch": 0.4039792688441337, "grad_norm": 1.34375, "learning_rate": 0.0013519292979596801, "loss": 0.9935, "step": 5807 }, { "epoch": 0.40404883648126894, "grad_norm": 1.015625, "learning_rate": 0.0013517183713503955, "loss": 0.7469, "step": 5808 }, { "epoch": 0.4041184041184041, "grad_norm": 1.3671875, "learning_rate": 0.0013515074268826246, "loss": 1.154, "step": 5809 }, { "epoch": 0.40418797175553933, "grad_norm": 0.9296875, "learning_rate": 0.0013512964645670783, "loss": 0.5069, "step": 5810 }, { "epoch": 0.4042575393926745, "grad_norm": 1.0703125, "learning_rate": 0.0013510854844144685, "loss": 0.8358, "step": 5811 }, { "epoch": 0.40432710702980973, "grad_norm": 1.09375, "learning_rate": 0.0013508744864355066, "loss": 0.8857, "step": 5812 }, { "epoch": 0.40439667466694496, "grad_norm": 1.1953125, "learning_rate": 0.0013506634706409078, "loss": 1.0108, "step": 5813 }, { "epoch": 0.4044662423040801, "grad_norm": 1.1796875, "learning_rate": 0.0013504524370413849, "loss": 0.8618, "step": 5814 }, { "epoch": 0.40453580994121535, "grad_norm": 1.2109375, "learning_rate": 0.0013502413856476539, "loss": 0.8774, "step": 5815 }, { "epoch": 0.4046053775783506, "grad_norm": 1.109375, "learning_rate": 0.0013500303164704305, "loss": 0.8373, "step": 5816 }, { "epoch": 0.40467494521548575, "grad_norm": 1.390625, "learning_rate": 0.0013498192295204317, "loss": 1.0593, "step": 5817 }, { "epoch": 0.404744512852621, "grad_norm": 1.46875, "learning_rate": 0.001349608124808376, "loss": 0.7007, "step": 5818 }, { "epoch": 0.40481408048975615, "grad_norm": 1.2578125, "learning_rate": 0.0013493970023449814, "loss": 0.7079, "step": 5819 }, { "epoch": 0.40488364812689137, "grad_norm": 1.078125, "learning_rate": 0.0013491858621409688, "loss": 0.8407, "step": 5820 }, { "epoch": 0.4049532157640266, "grad_norm": 0.890625, "learning_rate": 0.0013489747042070576, "loss": 0.6917, "step": 5821 }, { "epoch": 0.40502278340116177, "grad_norm": 0.98046875, "learning_rate": 0.0013487635285539703, "loss": 0.9607, "step": 5822 }, { "epoch": 0.405092351038297, "grad_norm": 1.03125, "learning_rate": 0.0013485523351924288, "loss": 0.9759, "step": 5823 }, { "epoch": 0.40516191867543216, "grad_norm": 1.0234375, "learning_rate": 0.0013483411241331565, "loss": 1.0282, "step": 5824 }, { "epoch": 0.4052314863125674, "grad_norm": 1.078125, "learning_rate": 0.0013481298953868777, "loss": 0.9879, "step": 5825 }, { "epoch": 0.4053010539497026, "grad_norm": 1.2421875, "learning_rate": 0.0013479186489643172, "loss": 0.962, "step": 5826 }, { "epoch": 0.4053706215868378, "grad_norm": 1.1953125, "learning_rate": 0.0013477073848762017, "loss": 1.0321, "step": 5827 }, { "epoch": 0.405440189223973, "grad_norm": 1.1640625, "learning_rate": 0.0013474961031332575, "loss": 0.972, "step": 5828 }, { "epoch": 0.40550975686110824, "grad_norm": 1.0703125, "learning_rate": 0.0013472848037462133, "loss": 0.8074, "step": 5829 }, { "epoch": 0.4055793244982434, "grad_norm": 1.2265625, "learning_rate": 0.0013470734867257967, "loss": 0.9583, "step": 5830 }, { "epoch": 0.40564889213537864, "grad_norm": 1.03125, "learning_rate": 0.001346862152082738, "loss": 0.72, "step": 5831 }, { "epoch": 0.4057184597725138, "grad_norm": 1.3515625, "learning_rate": 0.0013466507998277674, "loss": 0.8751, "step": 5832 }, { "epoch": 0.40578802740964903, "grad_norm": 1.015625, "learning_rate": 0.0013464394299716163, "loss": 0.647, "step": 5833 }, { "epoch": 0.40585759504678426, "grad_norm": 1.0546875, "learning_rate": 0.0013462280425250175, "loss": 0.8034, "step": 5834 }, { "epoch": 0.40592716268391943, "grad_norm": 0.94921875, "learning_rate": 0.0013460166374987036, "loss": 0.7786, "step": 5835 }, { "epoch": 0.40599673032105466, "grad_norm": 1.046875, "learning_rate": 0.001345805214903409, "loss": 0.6817, "step": 5836 }, { "epoch": 0.4060662979581898, "grad_norm": 1.53125, "learning_rate": 0.0013455937747498686, "loss": 0.8713, "step": 5837 }, { "epoch": 0.40613586559532505, "grad_norm": 1.140625, "learning_rate": 0.0013453823170488182, "loss": 0.8209, "step": 5838 }, { "epoch": 0.4062054332324603, "grad_norm": 0.8828125, "learning_rate": 0.0013451708418109945, "loss": 0.7567, "step": 5839 }, { "epoch": 0.40627500086959545, "grad_norm": 1.5078125, "learning_rate": 0.0013449593490471351, "loss": 1.2558, "step": 5840 }, { "epoch": 0.4063445685067307, "grad_norm": 1.15625, "learning_rate": 0.001344747838767979, "loss": 0.9353, "step": 5841 }, { "epoch": 0.4064141361438659, "grad_norm": 1.078125, "learning_rate": 0.001344536310984265, "loss": 0.8507, "step": 5842 }, { "epoch": 0.40648370378100107, "grad_norm": 0.875, "learning_rate": 0.0013443247657067342, "loss": 0.828, "step": 5843 }, { "epoch": 0.4065532714181363, "grad_norm": 1.0, "learning_rate": 0.0013441132029461268, "loss": 0.844, "step": 5844 }, { "epoch": 0.40662283905527147, "grad_norm": 0.85546875, "learning_rate": 0.0013439016227131857, "loss": 0.7064, "step": 5845 }, { "epoch": 0.4066924066924067, "grad_norm": 0.99609375, "learning_rate": 0.0013436900250186536, "loss": 0.6783, "step": 5846 }, { "epoch": 0.4067619743295419, "grad_norm": 1.0390625, "learning_rate": 0.0013434784098732742, "loss": 0.8343, "step": 5847 }, { "epoch": 0.4068315419666771, "grad_norm": 1.1953125, "learning_rate": 0.0013432667772877926, "loss": 0.8044, "step": 5848 }, { "epoch": 0.4069011096038123, "grad_norm": 0.98828125, "learning_rate": 0.0013430551272729538, "loss": 0.8037, "step": 5849 }, { "epoch": 0.4069706772409475, "grad_norm": 0.9765625, "learning_rate": 0.0013428434598395055, "loss": 0.7308, "step": 5850 }, { "epoch": 0.4070402448780827, "grad_norm": 1.421875, "learning_rate": 0.0013426317749981936, "loss": 1.059, "step": 5851 }, { "epoch": 0.40710981251521794, "grad_norm": 1.078125, "learning_rate": 0.0013424200727597678, "loss": 0.7579, "step": 5852 }, { "epoch": 0.4071793801523531, "grad_norm": 1.015625, "learning_rate": 0.0013422083531349762, "loss": 0.7347, "step": 5853 }, { "epoch": 0.40724894778948834, "grad_norm": 1.140625, "learning_rate": 0.0013419966161345694, "loss": 0.9378, "step": 5854 }, { "epoch": 0.40731851542662356, "grad_norm": 0.96875, "learning_rate": 0.0013417848617692984, "loss": 0.7056, "step": 5855 }, { "epoch": 0.40738808306375873, "grad_norm": 1.1640625, "learning_rate": 0.0013415730900499146, "loss": 0.6704, "step": 5856 }, { "epoch": 0.40745765070089396, "grad_norm": 1.125, "learning_rate": 0.0013413613009871713, "loss": 0.8853, "step": 5857 }, { "epoch": 0.40752721833802913, "grad_norm": 1.0390625, "learning_rate": 0.001341149494591821, "loss": 0.7417, "step": 5858 }, { "epoch": 0.40759678597516436, "grad_norm": 1.3046875, "learning_rate": 0.0013409376708746197, "loss": 0.7576, "step": 5859 }, { "epoch": 0.4076663536122996, "grad_norm": 1.078125, "learning_rate": 0.0013407258298463215, "loss": 0.8548, "step": 5860 }, { "epoch": 0.40773592124943475, "grad_norm": 1.203125, "learning_rate": 0.0013405139715176833, "loss": 0.9098, "step": 5861 }, { "epoch": 0.40780548888657, "grad_norm": 1.2109375, "learning_rate": 0.0013403020958994616, "loss": 0.7366, "step": 5862 }, { "epoch": 0.40787505652370515, "grad_norm": 0.95703125, "learning_rate": 0.0013400902030024147, "loss": 0.7845, "step": 5863 }, { "epoch": 0.4079446241608404, "grad_norm": 1.515625, "learning_rate": 0.0013398782928373018, "loss": 1.0852, "step": 5864 }, { "epoch": 0.4080141917979756, "grad_norm": 1.15625, "learning_rate": 0.0013396663654148822, "loss": 1.0065, "step": 5865 }, { "epoch": 0.40808375943511077, "grad_norm": 1.0390625, "learning_rate": 0.0013394544207459167, "loss": 0.7465, "step": 5866 }, { "epoch": 0.408153327072246, "grad_norm": 1.1484375, "learning_rate": 0.0013392424588411665, "loss": 0.871, "step": 5867 }, { "epoch": 0.4082228947093812, "grad_norm": 1.40625, "learning_rate": 0.0013390304797113943, "loss": 0.708, "step": 5868 }, { "epoch": 0.4082924623465164, "grad_norm": 1.0234375, "learning_rate": 0.0013388184833673631, "loss": 0.6567, "step": 5869 }, { "epoch": 0.4083620299836516, "grad_norm": 1.046875, "learning_rate": 0.001338606469819837, "loss": 0.7794, "step": 5870 }, { "epoch": 0.4084315976207868, "grad_norm": 0.890625, "learning_rate": 0.0013383944390795812, "loss": 0.899, "step": 5871 }, { "epoch": 0.408501165257922, "grad_norm": 1.1640625, "learning_rate": 0.001338182391157361, "loss": 0.8562, "step": 5872 }, { "epoch": 0.40857073289505724, "grad_norm": 1.140625, "learning_rate": 0.0013379703260639442, "loss": 0.9008, "step": 5873 }, { "epoch": 0.4086403005321924, "grad_norm": 1.0078125, "learning_rate": 0.0013377582438100972, "loss": 0.8773, "step": 5874 }, { "epoch": 0.40870986816932764, "grad_norm": 0.953125, "learning_rate": 0.0013375461444065896, "loss": 0.7937, "step": 5875 }, { "epoch": 0.4087794358064628, "grad_norm": 1.4140625, "learning_rate": 0.0013373340278641894, "loss": 0.7907, "step": 5876 }, { "epoch": 0.40884900344359804, "grad_norm": 1.1015625, "learning_rate": 0.0013371218941936683, "loss": 0.8613, "step": 5877 }, { "epoch": 0.40891857108073326, "grad_norm": 1.0546875, "learning_rate": 0.0013369097434057964, "loss": 0.7615, "step": 5878 }, { "epoch": 0.40898813871786843, "grad_norm": 0.98828125, "learning_rate": 0.0013366975755113456, "loss": 0.9024, "step": 5879 }, { "epoch": 0.40905770635500366, "grad_norm": 1.125, "learning_rate": 0.0013364853905210893, "loss": 0.7378, "step": 5880 }, { "epoch": 0.4091272739921389, "grad_norm": 1.140625, "learning_rate": 0.0013362731884458006, "loss": 0.7813, "step": 5881 }, { "epoch": 0.40919684162927406, "grad_norm": 1.2421875, "learning_rate": 0.0013360609692962546, "loss": 1.0286, "step": 5882 }, { "epoch": 0.4092664092664093, "grad_norm": 1.1484375, "learning_rate": 0.001335848733083226, "loss": 0.7492, "step": 5883 }, { "epoch": 0.40933597690354445, "grad_norm": 1.0703125, "learning_rate": 0.001335636479817492, "loss": 0.8455, "step": 5884 }, { "epoch": 0.4094055445406797, "grad_norm": 1.0234375, "learning_rate": 0.0013354242095098294, "loss": 0.9679, "step": 5885 }, { "epoch": 0.4094751121778149, "grad_norm": 1.046875, "learning_rate": 0.0013352119221710158, "loss": 0.9595, "step": 5886 }, { "epoch": 0.4095446798149501, "grad_norm": 1.0078125, "learning_rate": 0.0013349996178118305, "loss": 0.8342, "step": 5887 }, { "epoch": 0.4096142474520853, "grad_norm": 1.1484375, "learning_rate": 0.0013347872964430527, "loss": 0.7592, "step": 5888 }, { "epoch": 0.40968381508922047, "grad_norm": 1.125, "learning_rate": 0.0013345749580754643, "loss": 0.9402, "step": 5889 }, { "epoch": 0.4097533827263557, "grad_norm": 1.046875, "learning_rate": 0.0013343626027198451, "loss": 0.8261, "step": 5890 }, { "epoch": 0.4098229503634909, "grad_norm": 1.359375, "learning_rate": 0.0013341502303869787, "loss": 0.8737, "step": 5891 }, { "epoch": 0.4098925180006261, "grad_norm": 1.09375, "learning_rate": 0.0013339378410876478, "loss": 0.8406, "step": 5892 }, { "epoch": 0.4099620856377613, "grad_norm": 1.078125, "learning_rate": 0.0013337254348326363, "loss": 0.8002, "step": 5893 }, { "epoch": 0.4100316532748965, "grad_norm": 1.3125, "learning_rate": 0.0013335130116327296, "loss": 0.7979, "step": 5894 }, { "epoch": 0.4101012209120317, "grad_norm": 1.3515625, "learning_rate": 0.0013333005714987127, "loss": 0.9424, "step": 5895 }, { "epoch": 0.41017078854916694, "grad_norm": 1.3046875, "learning_rate": 0.0013330881144413733, "loss": 0.9167, "step": 5896 }, { "epoch": 0.4102403561863021, "grad_norm": 1.2734375, "learning_rate": 0.0013328756404714982, "loss": 1.0018, "step": 5897 }, { "epoch": 0.41030992382343734, "grad_norm": 1.0234375, "learning_rate": 0.0013326631495998759, "loss": 0.8047, "step": 5898 }, { "epoch": 0.41037949146057257, "grad_norm": 1.0078125, "learning_rate": 0.0013324506418372953, "loss": 0.918, "step": 5899 }, { "epoch": 0.41044905909770774, "grad_norm": 1.0859375, "learning_rate": 0.001332238117194547, "loss": 0.9236, "step": 5900 }, { "epoch": 0.41051862673484296, "grad_norm": 1.0546875, "learning_rate": 0.001332025575682422, "loss": 0.9684, "step": 5901 }, { "epoch": 0.41058819437197813, "grad_norm": 1.6484375, "learning_rate": 0.0013318130173117111, "loss": 1.0373, "step": 5902 }, { "epoch": 0.41065776200911336, "grad_norm": 1.359375, "learning_rate": 0.0013316004420932085, "loss": 0.8822, "step": 5903 }, { "epoch": 0.4107273296462486, "grad_norm": 0.890625, "learning_rate": 0.001331387850037706, "loss": 0.7408, "step": 5904 }, { "epoch": 0.41079689728338376, "grad_norm": 1.0859375, "learning_rate": 0.0013311752411559994, "loss": 0.8438, "step": 5905 }, { "epoch": 0.410866464920519, "grad_norm": 1.1171875, "learning_rate": 0.001330962615458883, "loss": 0.8473, "step": 5906 }, { "epoch": 0.41093603255765415, "grad_norm": 1.171875, "learning_rate": 0.0013307499729571532, "loss": 0.784, "step": 5907 }, { "epoch": 0.4110056001947894, "grad_norm": 1.3984375, "learning_rate": 0.001330537313661607, "loss": 1.2242, "step": 5908 }, { "epoch": 0.4110751678319246, "grad_norm": 1.0390625, "learning_rate": 0.001330324637583042, "loss": 0.7399, "step": 5909 }, { "epoch": 0.4111447354690598, "grad_norm": 1.21875, "learning_rate": 0.001330111944732257, "loss": 0.7732, "step": 5910 }, { "epoch": 0.411214303106195, "grad_norm": 1.3671875, "learning_rate": 0.0013298992351200509, "loss": 1.0661, "step": 5911 }, { "epoch": 0.4112838707433302, "grad_norm": 1.2421875, "learning_rate": 0.001329686508757225, "loss": 0.6823, "step": 5912 }, { "epoch": 0.4113534383804654, "grad_norm": 1.0390625, "learning_rate": 0.0013294737656545795, "loss": 0.8745, "step": 5913 }, { "epoch": 0.4114230060176006, "grad_norm": 1.015625, "learning_rate": 0.0013292610058229168, "loss": 0.7367, "step": 5914 }, { "epoch": 0.4114925736547358, "grad_norm": 0.87890625, "learning_rate": 0.0013290482292730402, "loss": 0.9285, "step": 5915 }, { "epoch": 0.411562141291871, "grad_norm": 1.2578125, "learning_rate": 0.0013288354360157528, "loss": 0.7864, "step": 5916 }, { "epoch": 0.41163170892900625, "grad_norm": 1.1015625, "learning_rate": 0.0013286226260618597, "loss": 0.8476, "step": 5917 }, { "epoch": 0.4117012765661414, "grad_norm": 1.34375, "learning_rate": 0.0013284097994221656, "loss": 0.7926, "step": 5918 }, { "epoch": 0.41177084420327664, "grad_norm": 1.03125, "learning_rate": 0.0013281969561074775, "loss": 1.0299, "step": 5919 }, { "epoch": 0.4118404118404118, "grad_norm": 1.1328125, "learning_rate": 0.001327984096128602, "loss": 0.9616, "step": 5920 }, { "epoch": 0.41190997947754704, "grad_norm": 1.2265625, "learning_rate": 0.0013277712194963475, "loss": 0.9856, "step": 5921 }, { "epoch": 0.41197954711468227, "grad_norm": 1.265625, "learning_rate": 0.0013275583262215224, "loss": 1.2096, "step": 5922 }, { "epoch": 0.41204911475181744, "grad_norm": 1.15625, "learning_rate": 0.0013273454163149365, "loss": 0.8468, "step": 5923 }, { "epoch": 0.41211868238895266, "grad_norm": 1.5625, "learning_rate": 0.0013271324897874007, "loss": 1.0125, "step": 5924 }, { "epoch": 0.4121882500260879, "grad_norm": 1.015625, "learning_rate": 0.0013269195466497252, "loss": 0.728, "step": 5925 }, { "epoch": 0.41225781766322306, "grad_norm": 1.1796875, "learning_rate": 0.0013267065869127235, "loss": 1.0771, "step": 5926 }, { "epoch": 0.4123273853003583, "grad_norm": 1.0, "learning_rate": 0.0013264936105872077, "loss": 0.6939, "step": 5927 }, { "epoch": 0.41239695293749346, "grad_norm": 1.1171875, "learning_rate": 0.001326280617683992, "loss": 0.8318, "step": 5928 }, { "epoch": 0.4124665205746287, "grad_norm": 0.9296875, "learning_rate": 0.0013260676082138914, "loss": 0.8941, "step": 5929 }, { "epoch": 0.4125360882117639, "grad_norm": 1.234375, "learning_rate": 0.001325854582187721, "loss": 0.8441, "step": 5930 }, { "epoch": 0.4126056558488991, "grad_norm": 0.9296875, "learning_rate": 0.0013256415396162976, "loss": 0.9476, "step": 5931 }, { "epoch": 0.4126752234860343, "grad_norm": 0.96875, "learning_rate": 0.0013254284805104377, "loss": 0.8693, "step": 5932 }, { "epoch": 0.4127447911231695, "grad_norm": 1.46875, "learning_rate": 0.0013252154048809604, "loss": 1.0098, "step": 5933 }, { "epoch": 0.4128143587603047, "grad_norm": 0.85546875, "learning_rate": 0.0013250023127386835, "loss": 0.5385, "step": 5934 }, { "epoch": 0.4128839263974399, "grad_norm": 0.95703125, "learning_rate": 0.0013247892040944276, "loss": 0.7623, "step": 5935 }, { "epoch": 0.4129534940345751, "grad_norm": 0.90234375, "learning_rate": 0.001324576078959013, "loss": 0.6963, "step": 5936 }, { "epoch": 0.4130230616717103, "grad_norm": 1.1875, "learning_rate": 0.0013243629373432609, "loss": 0.827, "step": 5937 }, { "epoch": 0.41309262930884555, "grad_norm": 1.1171875, "learning_rate": 0.0013241497792579938, "loss": 0.8957, "step": 5938 }, { "epoch": 0.4131621969459807, "grad_norm": 1.1796875, "learning_rate": 0.0013239366047140347, "loss": 0.8064, "step": 5939 }, { "epoch": 0.41323176458311595, "grad_norm": 1.2734375, "learning_rate": 0.001323723413722208, "loss": 0.8238, "step": 5940 }, { "epoch": 0.4133013322202511, "grad_norm": 0.9296875, "learning_rate": 0.0013235102062933372, "loss": 0.8791, "step": 5941 }, { "epoch": 0.41337089985738634, "grad_norm": 1.4765625, "learning_rate": 0.0013232969824382497, "loss": 0.7921, "step": 5942 }, { "epoch": 0.41344046749452157, "grad_norm": 1.078125, "learning_rate": 0.0013230837421677702, "loss": 0.8456, "step": 5943 }, { "epoch": 0.41351003513165674, "grad_norm": 1.1171875, "learning_rate": 0.0013228704854927268, "loss": 0.8593, "step": 5944 }, { "epoch": 0.41357960276879197, "grad_norm": 0.85546875, "learning_rate": 0.001322657212423948, "loss": 0.8399, "step": 5945 }, { "epoch": 0.41364917040592714, "grad_norm": 0.96875, "learning_rate": 0.001322443922972262, "loss": 0.9466, "step": 5946 }, { "epoch": 0.41371873804306236, "grad_norm": 0.97265625, "learning_rate": 0.001322230617148499, "loss": 0.8536, "step": 5947 }, { "epoch": 0.4137883056801976, "grad_norm": 0.93359375, "learning_rate": 0.0013220172949634892, "loss": 0.8135, "step": 5948 }, { "epoch": 0.41385787331733276, "grad_norm": 1.015625, "learning_rate": 0.0013218039564280647, "loss": 0.7584, "step": 5949 }, { "epoch": 0.413927440954468, "grad_norm": 1.0390625, "learning_rate": 0.0013215906015530568, "loss": 0.7815, "step": 5950 }, { "epoch": 0.4139970085916032, "grad_norm": 1.484375, "learning_rate": 0.001321377230349299, "loss": 1.0563, "step": 5951 }, { "epoch": 0.4140665762287384, "grad_norm": 1.2421875, "learning_rate": 0.0013211638428276256, "loss": 0.962, "step": 5952 }, { "epoch": 0.4141361438658736, "grad_norm": 1.140625, "learning_rate": 0.0013209504389988709, "loss": 0.7863, "step": 5953 }, { "epoch": 0.4142057115030088, "grad_norm": 1.2578125, "learning_rate": 0.0013207370188738708, "loss": 0.9184, "step": 5954 }, { "epoch": 0.414275279140144, "grad_norm": 0.96875, "learning_rate": 0.0013205235824634615, "loss": 0.8869, "step": 5955 }, { "epoch": 0.41434484677727923, "grad_norm": 1.3515625, "learning_rate": 0.0013203101297784804, "loss": 0.786, "step": 5956 }, { "epoch": 0.4144144144144144, "grad_norm": 1.1171875, "learning_rate": 0.0013200966608297648, "loss": 0.9513, "step": 5957 }, { "epoch": 0.4144839820515496, "grad_norm": 1.109375, "learning_rate": 0.0013198831756281546, "loss": 1.0198, "step": 5958 }, { "epoch": 0.4145535496886848, "grad_norm": 1.0234375, "learning_rate": 0.001319669674184489, "loss": 0.6842, "step": 5959 }, { "epoch": 0.41462311732582, "grad_norm": 1.5, "learning_rate": 0.0013194561565096085, "loss": 1.0767, "step": 5960 }, { "epoch": 0.41469268496295525, "grad_norm": 1.4140625, "learning_rate": 0.0013192426226143548, "loss": 0.9905, "step": 5961 }, { "epoch": 0.4147622526000904, "grad_norm": 1.2109375, "learning_rate": 0.0013190290725095695, "loss": 0.8643, "step": 5962 }, { "epoch": 0.41483182023722565, "grad_norm": 1.1171875, "learning_rate": 0.0013188155062060962, "loss": 0.6039, "step": 5963 }, { "epoch": 0.41490138787436087, "grad_norm": 1.0625, "learning_rate": 0.0013186019237147785, "loss": 0.8251, "step": 5964 }, { "epoch": 0.41497095551149604, "grad_norm": 0.94140625, "learning_rate": 0.0013183883250464606, "loss": 0.8622, "step": 5965 }, { "epoch": 0.41504052314863127, "grad_norm": 1.1015625, "learning_rate": 0.0013181747102119887, "loss": 0.8189, "step": 5966 }, { "epoch": 0.41511009078576644, "grad_norm": 1.4765625, "learning_rate": 0.0013179610792222085, "loss": 0.936, "step": 5967 }, { "epoch": 0.41517965842290167, "grad_norm": 0.9609375, "learning_rate": 0.0013177474320879674, "loss": 0.857, "step": 5968 }, { "epoch": 0.4152492260600369, "grad_norm": 1.0234375, "learning_rate": 0.0013175337688201135, "loss": 0.9093, "step": 5969 }, { "epoch": 0.41531879369717206, "grad_norm": 0.94921875, "learning_rate": 0.001317320089429495, "loss": 0.8407, "step": 5970 }, { "epoch": 0.4153883613343073, "grad_norm": 1.03125, "learning_rate": 0.001317106393926962, "loss": 0.7999, "step": 5971 }, { "epoch": 0.41545792897144246, "grad_norm": 0.9921875, "learning_rate": 0.0013168926823233645, "loss": 0.9324, "step": 5972 }, { "epoch": 0.4155274966085777, "grad_norm": 1.4609375, "learning_rate": 0.001316678954629554, "loss": 1.0212, "step": 5973 }, { "epoch": 0.4155970642457129, "grad_norm": 1.3828125, "learning_rate": 0.0013164652108563822, "loss": 0.9504, "step": 5974 }, { "epoch": 0.4156666318828481, "grad_norm": 1.3671875, "learning_rate": 0.0013162514510147022, "loss": 0.8767, "step": 5975 }, { "epoch": 0.4157361995199833, "grad_norm": 1.359375, "learning_rate": 0.0013160376751153674, "loss": 0.7471, "step": 5976 }, { "epoch": 0.41580576715711853, "grad_norm": 1.1015625, "learning_rate": 0.0013158238831692324, "loss": 0.9097, "step": 5977 }, { "epoch": 0.4158753347942537, "grad_norm": 1.359375, "learning_rate": 0.0013156100751871528, "loss": 1.1735, "step": 5978 }, { "epoch": 0.41594490243138893, "grad_norm": 1.1640625, "learning_rate": 0.0013153962511799843, "loss": 0.8713, "step": 5979 }, { "epoch": 0.4160144700685241, "grad_norm": 1.3984375, "learning_rate": 0.0013151824111585836, "loss": 1.0516, "step": 5980 }, { "epoch": 0.4160840377056593, "grad_norm": 1.0703125, "learning_rate": 0.0013149685551338086, "loss": 0.796, "step": 5981 }, { "epoch": 0.41615360534279455, "grad_norm": 0.9296875, "learning_rate": 0.0013147546831165182, "loss": 0.9837, "step": 5982 }, { "epoch": 0.4162231729799297, "grad_norm": 1.03125, "learning_rate": 0.0013145407951175717, "loss": 0.751, "step": 5983 }, { "epoch": 0.41629274061706495, "grad_norm": 0.99609375, "learning_rate": 0.0013143268911478287, "loss": 0.7369, "step": 5984 }, { "epoch": 0.4163623082542001, "grad_norm": 1.1875, "learning_rate": 0.0013141129712181505, "loss": 0.9013, "step": 5985 }, { "epoch": 0.41643187589133535, "grad_norm": 0.92578125, "learning_rate": 0.0013138990353393988, "loss": 1.1253, "step": 5986 }, { "epoch": 0.41650144352847057, "grad_norm": 1.078125, "learning_rate": 0.0013136850835224366, "loss": 0.6227, "step": 5987 }, { "epoch": 0.41657101116560574, "grad_norm": 0.98046875, "learning_rate": 0.0013134711157781268, "loss": 0.6232, "step": 5988 }, { "epoch": 0.41664057880274097, "grad_norm": 1.25, "learning_rate": 0.0013132571321173337, "loss": 0.7733, "step": 5989 }, { "epoch": 0.4167101464398762, "grad_norm": 1.375, "learning_rate": 0.0013130431325509221, "loss": 0.9784, "step": 5990 }, { "epoch": 0.41677971407701137, "grad_norm": 0.91796875, "learning_rate": 0.0013128291170897584, "loss": 0.8261, "step": 5991 }, { "epoch": 0.4168492817141466, "grad_norm": 1.078125, "learning_rate": 0.0013126150857447087, "loss": 0.8843, "step": 5992 }, { "epoch": 0.41691884935128176, "grad_norm": 1.2109375, "learning_rate": 0.001312401038526641, "loss": 0.7726, "step": 5993 }, { "epoch": 0.416988416988417, "grad_norm": 1.25, "learning_rate": 0.0013121869754464228, "loss": 0.9126, "step": 5994 }, { "epoch": 0.4170579846255522, "grad_norm": 1.3671875, "learning_rate": 0.0013119728965149237, "loss": 1.1141, "step": 5995 }, { "epoch": 0.4171275522626874, "grad_norm": 1.28125, "learning_rate": 0.0013117588017430134, "loss": 0.9249, "step": 5996 }, { "epoch": 0.4171971198998226, "grad_norm": 1.046875, "learning_rate": 0.0013115446911415626, "loss": 0.773, "step": 5997 }, { "epoch": 0.4172666875369578, "grad_norm": 1.1015625, "learning_rate": 0.0013113305647214424, "loss": 0.7567, "step": 5998 }, { "epoch": 0.417336255174093, "grad_norm": 1.1640625, "learning_rate": 0.0013111164224935256, "loss": 0.8103, "step": 5999 }, { "epoch": 0.41740582281122823, "grad_norm": 0.96484375, "learning_rate": 0.001310902264468685, "loss": 0.6928, "step": 6000 }, { "epoch": 0.4174753904483634, "grad_norm": 0.90234375, "learning_rate": 0.0013106880906577944, "loss": 0.6852, "step": 6001 }, { "epoch": 0.41754495808549863, "grad_norm": 1.125, "learning_rate": 0.0013104739010717287, "loss": 0.8544, "step": 6002 }, { "epoch": 0.41761452572263386, "grad_norm": 1.296875, "learning_rate": 0.0013102596957213631, "loss": 0.8348, "step": 6003 }, { "epoch": 0.417684093359769, "grad_norm": 1.1015625, "learning_rate": 0.0013100454746175739, "loss": 0.8061, "step": 6004 }, { "epoch": 0.41775366099690425, "grad_norm": 1.109375, "learning_rate": 0.0013098312377712383, "loss": 0.7722, "step": 6005 }, { "epoch": 0.4178232286340394, "grad_norm": 0.98828125, "learning_rate": 0.0013096169851932338, "loss": 1.0377, "step": 6006 }, { "epoch": 0.41789279627117465, "grad_norm": 0.9921875, "learning_rate": 0.0013094027168944397, "loss": 0.8373, "step": 6007 }, { "epoch": 0.4179623639083099, "grad_norm": 1.140625, "learning_rate": 0.001309188432885735, "loss": 1.1225, "step": 6008 }, { "epoch": 0.41803193154544505, "grad_norm": 1.2578125, "learning_rate": 0.0013089741331780004, "loss": 0.9004, "step": 6009 }, { "epoch": 0.41810149918258027, "grad_norm": 1.109375, "learning_rate": 0.0013087598177821166, "loss": 0.8053, "step": 6010 }, { "epoch": 0.41817106681971544, "grad_norm": 1.5, "learning_rate": 0.0013085454867089652, "loss": 0.8599, "step": 6011 }, { "epoch": 0.41824063445685067, "grad_norm": 1.125, "learning_rate": 0.0013083311399694293, "loss": 0.7888, "step": 6012 }, { "epoch": 0.4183102020939859, "grad_norm": 1.09375, "learning_rate": 0.0013081167775743925, "loss": 0.814, "step": 6013 }, { "epoch": 0.41837976973112107, "grad_norm": 1.2421875, "learning_rate": 0.0013079023995347385, "loss": 0.9164, "step": 6014 }, { "epoch": 0.4184493373682563, "grad_norm": 1.265625, "learning_rate": 0.0013076880058613524, "loss": 0.8386, "step": 6015 }, { "epoch": 0.4185189050053915, "grad_norm": 1.203125, "learning_rate": 0.0013074735965651206, "loss": 0.7152, "step": 6016 }, { "epoch": 0.4185884726425267, "grad_norm": 0.93359375, "learning_rate": 0.0013072591716569294, "loss": 0.6224, "step": 6017 }, { "epoch": 0.4186580402796619, "grad_norm": 0.96484375, "learning_rate": 0.001307044731147666, "loss": 0.902, "step": 6018 }, { "epoch": 0.4187276079167971, "grad_norm": 1.375, "learning_rate": 0.0013068302750482185, "loss": 0.7997, "step": 6019 }, { "epoch": 0.4187971755539323, "grad_norm": 1.1640625, "learning_rate": 0.0013066158033694763, "loss": 0.9071, "step": 6020 }, { "epoch": 0.41886674319106754, "grad_norm": 1.0703125, "learning_rate": 0.0013064013161223293, "loss": 0.913, "step": 6021 }, { "epoch": 0.4189363108282027, "grad_norm": 1.1171875, "learning_rate": 0.0013061868133176678, "loss": 0.9266, "step": 6022 }, { "epoch": 0.41900587846533793, "grad_norm": 0.87109375, "learning_rate": 0.001305972294966383, "loss": 0.9229, "step": 6023 }, { "epoch": 0.4190754461024731, "grad_norm": 0.96484375, "learning_rate": 0.0013057577610793673, "loss": 0.8808, "step": 6024 }, { "epoch": 0.41914501373960833, "grad_norm": 1.1171875, "learning_rate": 0.001305543211667514, "loss": 0.8094, "step": 6025 }, { "epoch": 0.41921458137674356, "grad_norm": 1.3515625, "learning_rate": 0.001305328646741716, "loss": 0.8917, "step": 6026 }, { "epoch": 0.4192841490138787, "grad_norm": 1.1796875, "learning_rate": 0.0013051140663128686, "loss": 0.8215, "step": 6027 }, { "epoch": 0.41935371665101395, "grad_norm": 1.1640625, "learning_rate": 0.0013048994703918667, "loss": 0.869, "step": 6028 }, { "epoch": 0.4194232842881492, "grad_norm": 1.1953125, "learning_rate": 0.0013046848589896066, "loss": 0.9581, "step": 6029 }, { "epoch": 0.41949285192528435, "grad_norm": 1.0703125, "learning_rate": 0.0013044702321169848, "loss": 0.7993, "step": 6030 }, { "epoch": 0.4195624195624196, "grad_norm": 1.265625, "learning_rate": 0.0013042555897848996, "loss": 0.9372, "step": 6031 }, { "epoch": 0.41963198719955475, "grad_norm": 1.21875, "learning_rate": 0.0013040409320042488, "loss": 0.7678, "step": 6032 }, { "epoch": 0.41970155483668997, "grad_norm": 1.0078125, "learning_rate": 0.0013038262587859323, "loss": 0.9445, "step": 6033 }, { "epoch": 0.4197711224738252, "grad_norm": 1.53125, "learning_rate": 0.0013036115701408493, "loss": 0.631, "step": 6034 }, { "epoch": 0.41984069011096037, "grad_norm": 1.5234375, "learning_rate": 0.0013033968660799014, "loss": 1.0625, "step": 6035 }, { "epoch": 0.4199102577480956, "grad_norm": 1.09375, "learning_rate": 0.00130318214661399, "loss": 0.6111, "step": 6036 }, { "epoch": 0.41997982538523077, "grad_norm": 0.921875, "learning_rate": 0.001302967411754017, "loss": 0.8909, "step": 6037 }, { "epoch": 0.420049393022366, "grad_norm": 1.1015625, "learning_rate": 0.0013027526615108863, "loss": 0.8946, "step": 6038 }, { "epoch": 0.4201189606595012, "grad_norm": 0.9296875, "learning_rate": 0.001302537895895501, "loss": 0.8895, "step": 6039 }, { "epoch": 0.4201885282966364, "grad_norm": 1.28125, "learning_rate": 0.0013023231149187663, "loss": 0.9964, "step": 6040 }, { "epoch": 0.4202580959337716, "grad_norm": 1.4609375, "learning_rate": 0.0013021083185915882, "loss": 1.0044, "step": 6041 }, { "epoch": 0.42032766357090684, "grad_norm": 0.95703125, "learning_rate": 0.0013018935069248718, "loss": 0.7365, "step": 6042 }, { "epoch": 0.420397231208042, "grad_norm": 0.87890625, "learning_rate": 0.0013016786799295251, "loss": 0.7353, "step": 6043 }, { "epoch": 0.42046679884517724, "grad_norm": 1.03125, "learning_rate": 0.0013014638376164555, "loss": 0.7773, "step": 6044 }, { "epoch": 0.4205363664823124, "grad_norm": 1.171875, "learning_rate": 0.0013012489799965716, "loss": 1.0401, "step": 6045 }, { "epoch": 0.42060593411944763, "grad_norm": 1.0234375, "learning_rate": 0.001301034107080783, "loss": 0.9157, "step": 6046 }, { "epoch": 0.42067550175658286, "grad_norm": 0.9921875, "learning_rate": 0.00130081921888, "loss": 0.7776, "step": 6047 }, { "epoch": 0.42074506939371803, "grad_norm": 1.203125, "learning_rate": 0.0013006043154051331, "loss": 0.9943, "step": 6048 }, { "epoch": 0.42081463703085326, "grad_norm": 1.1640625, "learning_rate": 0.0013003893966670942, "loss": 1.0388, "step": 6049 }, { "epoch": 0.4208842046679884, "grad_norm": 1.265625, "learning_rate": 0.0013001744626767958, "loss": 0.7578, "step": 6050 }, { "epoch": 0.42095377230512365, "grad_norm": 1.3203125, "learning_rate": 0.0012999595134451512, "loss": 0.9979, "step": 6051 }, { "epoch": 0.4210233399422589, "grad_norm": 1.84375, "learning_rate": 0.0012997445489830745, "loss": 0.855, "step": 6052 }, { "epoch": 0.42109290757939405, "grad_norm": 1.1875, "learning_rate": 0.0012995295693014803, "loss": 1.2088, "step": 6053 }, { "epoch": 0.4211624752165293, "grad_norm": 1.1953125, "learning_rate": 0.0012993145744112844, "loss": 0.7431, "step": 6054 }, { "epoch": 0.4212320428536645, "grad_norm": 0.92578125, "learning_rate": 0.001299099564323403, "loss": 1.0207, "step": 6055 }, { "epoch": 0.42130161049079967, "grad_norm": 1.1953125, "learning_rate": 0.0012988845390487533, "loss": 0.898, "step": 6056 }, { "epoch": 0.4213711781279349, "grad_norm": 1.3671875, "learning_rate": 0.0012986694985982533, "loss": 1.0087, "step": 6057 }, { "epoch": 0.42144074576507007, "grad_norm": 1.2265625, "learning_rate": 0.0012984544429828215, "loss": 1.1273, "step": 6058 }, { "epoch": 0.4215103134022053, "grad_norm": 1.0625, "learning_rate": 0.0012982393722133774, "loss": 0.7767, "step": 6059 }, { "epoch": 0.4215798810393405, "grad_norm": 0.9375, "learning_rate": 0.0012980242863008412, "loss": 0.6377, "step": 6060 }, { "epoch": 0.4216494486764757, "grad_norm": 1.1484375, "learning_rate": 0.001297809185256134, "loss": 1.0128, "step": 6061 }, { "epoch": 0.4217190163136109, "grad_norm": 0.93359375, "learning_rate": 0.0012975940690901772, "loss": 0.7818, "step": 6062 }, { "epoch": 0.4217885839507461, "grad_norm": 0.96484375, "learning_rate": 0.0012973789378138939, "loss": 0.6688, "step": 6063 }, { "epoch": 0.4218581515878813, "grad_norm": 1.4296875, "learning_rate": 0.001297163791438207, "loss": 0.8949, "step": 6064 }, { "epoch": 0.42192771922501654, "grad_norm": 1.3515625, "learning_rate": 0.0012969486299740402, "loss": 0.7757, "step": 6065 }, { "epoch": 0.4219972868621517, "grad_norm": 0.97265625, "learning_rate": 0.001296733453432319, "loss": 0.871, "step": 6066 }, { "epoch": 0.42206685449928694, "grad_norm": 1.0390625, "learning_rate": 0.0012965182618239685, "loss": 0.8648, "step": 6067 }, { "epoch": 0.42213642213642216, "grad_norm": 1.1953125, "learning_rate": 0.0012963030551599154, "loss": 0.9459, "step": 6068 }, { "epoch": 0.42220598977355733, "grad_norm": 0.84765625, "learning_rate": 0.0012960878334510864, "loss": 0.5526, "step": 6069 }, { "epoch": 0.42227555741069256, "grad_norm": 0.9921875, "learning_rate": 0.00129587259670841, "loss": 0.9363, "step": 6070 }, { "epoch": 0.42234512504782773, "grad_norm": 1.3125, "learning_rate": 0.001295657344942814, "loss": 1.0081, "step": 6071 }, { "epoch": 0.42241469268496296, "grad_norm": 1.2265625, "learning_rate": 0.0012954420781652288, "loss": 1.0101, "step": 6072 }, { "epoch": 0.4224842603220982, "grad_norm": 1.0625, "learning_rate": 0.0012952267963865839, "loss": 1.1445, "step": 6073 }, { "epoch": 0.42255382795923335, "grad_norm": 1.1171875, "learning_rate": 0.00129501149961781, "loss": 0.9124, "step": 6074 }, { "epoch": 0.4226233955963686, "grad_norm": 1.25, "learning_rate": 0.001294796187869839, "loss": 0.8919, "step": 6075 }, { "epoch": 0.42269296323350375, "grad_norm": 1.046875, "learning_rate": 0.0012945808611536038, "loss": 0.5887, "step": 6076 }, { "epoch": 0.422762530870639, "grad_norm": 1.46875, "learning_rate": 0.0012943655194800371, "loss": 0.8486, "step": 6077 }, { "epoch": 0.4228320985077742, "grad_norm": 1.1640625, "learning_rate": 0.0012941501628600733, "loss": 0.9304, "step": 6078 }, { "epoch": 0.42290166614490937, "grad_norm": 1.09375, "learning_rate": 0.0012939347913046466, "loss": 0.6868, "step": 6079 }, { "epoch": 0.4229712337820446, "grad_norm": 1.03125, "learning_rate": 0.001293719404824693, "loss": 0.7462, "step": 6080 }, { "epoch": 0.4230408014191798, "grad_norm": 0.99609375, "learning_rate": 0.0012935040034311482, "loss": 0.8727, "step": 6081 }, { "epoch": 0.423110369056315, "grad_norm": 0.98046875, "learning_rate": 0.0012932885871349497, "loss": 1.006, "step": 6082 }, { "epoch": 0.4231799366934502, "grad_norm": 1.0234375, "learning_rate": 0.0012930731559470346, "loss": 0.6387, "step": 6083 }, { "epoch": 0.4232495043305854, "grad_norm": 1.1875, "learning_rate": 0.0012928577098783422, "loss": 0.9473, "step": 6084 }, { "epoch": 0.4233190719677206, "grad_norm": 1.296875, "learning_rate": 0.0012926422489398114, "loss": 0.8836, "step": 6085 }, { "epoch": 0.42338863960485584, "grad_norm": 1.2890625, "learning_rate": 0.0012924267731423823, "loss": 0.8869, "step": 6086 }, { "epoch": 0.423458207241991, "grad_norm": 1.109375, "learning_rate": 0.0012922112824969953, "loss": 0.7571, "step": 6087 }, { "epoch": 0.42352777487912624, "grad_norm": 0.96484375, "learning_rate": 0.0012919957770145924, "loss": 0.7119, "step": 6088 }, { "epoch": 0.4235973425162614, "grad_norm": 0.98046875, "learning_rate": 0.001291780256706116, "loss": 0.6646, "step": 6089 }, { "epoch": 0.42366691015339664, "grad_norm": 1.0625, "learning_rate": 0.0012915647215825082, "loss": 0.7336, "step": 6090 }, { "epoch": 0.42373647779053186, "grad_norm": 1.1875, "learning_rate": 0.001291349171654714, "loss": 0.8615, "step": 6091 }, { "epoch": 0.42380604542766703, "grad_norm": 1.34375, "learning_rate": 0.001291133606933677, "loss": 0.8484, "step": 6092 }, { "epoch": 0.42387561306480226, "grad_norm": 1.0703125, "learning_rate": 0.0012909180274303432, "loss": 0.8087, "step": 6093 }, { "epoch": 0.4239451807019375, "grad_norm": 1.078125, "learning_rate": 0.001290702433155658, "loss": 0.6936, "step": 6094 }, { "epoch": 0.42401474833907266, "grad_norm": 1.0859375, "learning_rate": 0.0012904868241205686, "loss": 0.8051, "step": 6095 }, { "epoch": 0.4240843159762079, "grad_norm": 0.9453125, "learning_rate": 0.0012902712003360227, "loss": 0.7881, "step": 6096 }, { "epoch": 0.42415388361334305, "grad_norm": 1.0859375, "learning_rate": 0.001290055561812968, "loss": 0.8346, "step": 6097 }, { "epoch": 0.4242234512504783, "grad_norm": 1.15625, "learning_rate": 0.0012898399085623537, "loss": 0.9084, "step": 6098 }, { "epoch": 0.4242930188876135, "grad_norm": 0.97265625, "learning_rate": 0.00128962424059513, "loss": 0.6617, "step": 6099 }, { "epoch": 0.4243625865247487, "grad_norm": 1.0625, "learning_rate": 0.0012894085579222472, "loss": 0.8488, "step": 6100 }, { "epoch": 0.4244321541618839, "grad_norm": 0.98046875, "learning_rate": 0.0012891928605546564, "loss": 0.7682, "step": 6101 }, { "epoch": 0.42450172179901907, "grad_norm": 1.0390625, "learning_rate": 0.00128897714850331, "loss": 0.8503, "step": 6102 }, { "epoch": 0.4245712894361543, "grad_norm": 3.171875, "learning_rate": 0.0012887614217791605, "loss": 0.905, "step": 6103 }, { "epoch": 0.4246408570732895, "grad_norm": 1.2421875, "learning_rate": 0.0012885456803931614, "loss": 0.9455, "step": 6104 }, { "epoch": 0.4247104247104247, "grad_norm": 1.234375, "learning_rate": 0.0012883299243562673, "loss": 1.0008, "step": 6105 }, { "epoch": 0.4247799923475599, "grad_norm": 1.25, "learning_rate": 0.0012881141536794322, "loss": 0.8103, "step": 6106 }, { "epoch": 0.42484955998469515, "grad_norm": 1.2265625, "learning_rate": 0.001287898368373613, "loss": 0.9041, "step": 6107 }, { "epoch": 0.4249191276218303, "grad_norm": 1.4296875, "learning_rate": 0.0012876825684497658, "loss": 0.8348, "step": 6108 }, { "epoch": 0.42498869525896554, "grad_norm": 1.2265625, "learning_rate": 0.001287466753918848, "loss": 1.0223, "step": 6109 }, { "epoch": 0.4250582628961007, "grad_norm": 1.0859375, "learning_rate": 0.0012872509247918173, "loss": 0.8618, "step": 6110 }, { "epoch": 0.42512783053323594, "grad_norm": 1.0078125, "learning_rate": 0.0012870350810796323, "loss": 0.8407, "step": 6111 }, { "epoch": 0.42519739817037117, "grad_norm": 1.1875, "learning_rate": 0.0012868192227932526, "loss": 0.864, "step": 6112 }, { "epoch": 0.42526696580750634, "grad_norm": 0.9296875, "learning_rate": 0.0012866033499436384, "loss": 0.7074, "step": 6113 }, { "epoch": 0.42533653344464156, "grad_norm": 1.3046875, "learning_rate": 0.0012863874625417514, "loss": 0.919, "step": 6114 }, { "epoch": 0.42540610108177673, "grad_norm": 1.0234375, "learning_rate": 0.0012861715605985515, "loss": 0.8278, "step": 6115 }, { "epoch": 0.42547566871891196, "grad_norm": 1.1796875, "learning_rate": 0.0012859556441250032, "loss": 1.0292, "step": 6116 }, { "epoch": 0.4255452363560472, "grad_norm": 1.0859375, "learning_rate": 0.0012857397131320677, "loss": 0.803, "step": 6117 }, { "epoch": 0.42561480399318236, "grad_norm": 1.1171875, "learning_rate": 0.0012855237676307103, "loss": 0.8109, "step": 6118 }, { "epoch": 0.4256843716303176, "grad_norm": 1.125, "learning_rate": 0.0012853078076318952, "loss": 1.0028, "step": 6119 }, { "epoch": 0.4257539392674528, "grad_norm": 1.125, "learning_rate": 0.0012850918331465872, "loss": 0.9196, "step": 6120 }, { "epoch": 0.425823506904588, "grad_norm": 1.3125, "learning_rate": 0.0012848758441857534, "loss": 0.9724, "step": 6121 }, { "epoch": 0.4258930745417232, "grad_norm": 0.890625, "learning_rate": 0.0012846598407603596, "loss": 0.7952, "step": 6122 }, { "epoch": 0.4259626421788584, "grad_norm": 1.09375, "learning_rate": 0.0012844438228813745, "loss": 0.8355, "step": 6123 }, { "epoch": 0.4260322098159936, "grad_norm": 1.0546875, "learning_rate": 0.0012842277905597652, "loss": 0.6901, "step": 6124 }, { "epoch": 0.4261017774531288, "grad_norm": 1.203125, "learning_rate": 0.0012840117438065017, "loss": 0.9057, "step": 6125 }, { "epoch": 0.426171345090264, "grad_norm": 0.92578125, "learning_rate": 0.0012837956826325532, "loss": 0.7786, "step": 6126 }, { "epoch": 0.4262409127273992, "grad_norm": 1.515625, "learning_rate": 0.0012835796070488903, "loss": 1.1199, "step": 6127 }, { "epoch": 0.4263104803645344, "grad_norm": 1.296875, "learning_rate": 0.0012833635170664845, "loss": 0.9589, "step": 6128 }, { "epoch": 0.4263800480016696, "grad_norm": 1.1640625, "learning_rate": 0.0012831474126963074, "loss": 0.7652, "step": 6129 }, { "epoch": 0.42644961563880485, "grad_norm": 1.171875, "learning_rate": 0.001282931293949332, "loss": 0.8077, "step": 6130 }, { "epoch": 0.42651918327594, "grad_norm": 1.015625, "learning_rate": 0.0012827151608365312, "loss": 0.9046, "step": 6131 }, { "epoch": 0.42658875091307524, "grad_norm": 1.046875, "learning_rate": 0.0012824990133688803, "loss": 0.9335, "step": 6132 }, { "epoch": 0.42665831855021047, "grad_norm": 0.78515625, "learning_rate": 0.0012822828515573527, "loss": 0.6665, "step": 6133 }, { "epoch": 0.42672788618734564, "grad_norm": 1.0703125, "learning_rate": 0.0012820666754129251, "loss": 0.791, "step": 6134 }, { "epoch": 0.42679745382448087, "grad_norm": 0.96875, "learning_rate": 0.001281850484946573, "loss": 0.7337, "step": 6135 }, { "epoch": 0.42686702146161604, "grad_norm": 1.0, "learning_rate": 0.001281634280169274, "loss": 0.867, "step": 6136 }, { "epoch": 0.42693658909875126, "grad_norm": 1.453125, "learning_rate": 0.0012814180610920063, "loss": 0.7578, "step": 6137 }, { "epoch": 0.4270061567358865, "grad_norm": 1.2421875, "learning_rate": 0.0012812018277257474, "loss": 0.8261, "step": 6138 }, { "epoch": 0.42707572437302166, "grad_norm": 1.1171875, "learning_rate": 0.0012809855800814773, "loss": 0.9987, "step": 6139 }, { "epoch": 0.4271452920101569, "grad_norm": 1.5390625, "learning_rate": 0.0012807693181701757, "loss": 1.1341, "step": 6140 }, { "epoch": 0.42721485964729206, "grad_norm": 1.0703125, "learning_rate": 0.0012805530420028233, "loss": 1.0794, "step": 6141 }, { "epoch": 0.4272844272844273, "grad_norm": 1.0390625, "learning_rate": 0.0012803367515904017, "loss": 0.838, "step": 6142 }, { "epoch": 0.4273539949215625, "grad_norm": 1.4140625, "learning_rate": 0.0012801204469438923, "loss": 1.0831, "step": 6143 }, { "epoch": 0.4274235625586977, "grad_norm": 1.15625, "learning_rate": 0.001279904128074279, "loss": 1.0262, "step": 6144 }, { "epoch": 0.4274931301958329, "grad_norm": 1.1484375, "learning_rate": 0.0012796877949925445, "loss": 0.8847, "step": 6145 }, { "epoch": 0.42756269783296813, "grad_norm": 1.0546875, "learning_rate": 0.0012794714477096741, "loss": 0.96, "step": 6146 }, { "epoch": 0.4276322654701033, "grad_norm": 1.4609375, "learning_rate": 0.0012792550862366517, "loss": 1.0317, "step": 6147 }, { "epoch": 0.4277018331072385, "grad_norm": 1.1171875, "learning_rate": 0.0012790387105844638, "loss": 0.8646, "step": 6148 }, { "epoch": 0.4277714007443737, "grad_norm": 1.0078125, "learning_rate": 0.0012788223207640963, "loss": 0.8444, "step": 6149 }, { "epoch": 0.4278409683815089, "grad_norm": 0.9453125, "learning_rate": 0.0012786059167865372, "loss": 0.7417, "step": 6150 }, { "epoch": 0.42791053601864415, "grad_norm": 0.83984375, "learning_rate": 0.0012783894986627738, "loss": 0.686, "step": 6151 }, { "epoch": 0.4279801036557793, "grad_norm": 0.9609375, "learning_rate": 0.0012781730664037944, "loss": 0.7532, "step": 6152 }, { "epoch": 0.42804967129291455, "grad_norm": 1.046875, "learning_rate": 0.0012779566200205894, "loss": 0.8931, "step": 6153 }, { "epoch": 0.4281192389300497, "grad_norm": 1.6171875, "learning_rate": 0.0012777401595241479, "loss": 0.7948, "step": 6154 }, { "epoch": 0.42818880656718494, "grad_norm": 1.2265625, "learning_rate": 0.0012775236849254612, "loss": 0.9641, "step": 6155 }, { "epoch": 0.42825837420432017, "grad_norm": 1.109375, "learning_rate": 0.0012773071962355203, "loss": 0.8638, "step": 6156 }, { "epoch": 0.42832794184145534, "grad_norm": 1.1484375, "learning_rate": 0.001277090693465318, "loss": 0.8303, "step": 6157 }, { "epoch": 0.42839750947859057, "grad_norm": 1.21875, "learning_rate": 0.001276874176625847, "loss": 1.1343, "step": 6158 }, { "epoch": 0.4284670771157258, "grad_norm": 1.3046875, "learning_rate": 0.0012766576457281006, "loss": 0.7364, "step": 6159 }, { "epoch": 0.42853664475286096, "grad_norm": 1.0546875, "learning_rate": 0.0012764411007830736, "loss": 0.8541, "step": 6160 }, { "epoch": 0.4286062123899962, "grad_norm": 1.203125, "learning_rate": 0.0012762245418017606, "loss": 1.0099, "step": 6161 }, { "epoch": 0.42867578002713136, "grad_norm": 1.0390625, "learning_rate": 0.001276007968795158, "loss": 0.5945, "step": 6162 }, { "epoch": 0.4287453476642666, "grad_norm": 1.109375, "learning_rate": 0.0012757913817742614, "loss": 0.8322, "step": 6163 }, { "epoch": 0.4288149153014018, "grad_norm": 1.1015625, "learning_rate": 0.001275574780750069, "loss": 0.9469, "step": 6164 }, { "epoch": 0.428884482938537, "grad_norm": 0.9296875, "learning_rate": 0.0012753581657335782, "loss": 0.9341, "step": 6165 }, { "epoch": 0.4289540505756722, "grad_norm": 0.95703125, "learning_rate": 0.0012751415367357876, "loss": 0.7707, "step": 6166 }, { "epoch": 0.4290236182128074, "grad_norm": 1.09375, "learning_rate": 0.0012749248937676968, "loss": 0.7381, "step": 6167 }, { "epoch": 0.4290931858499426, "grad_norm": 1.09375, "learning_rate": 0.0012747082368403048, "loss": 0.8526, "step": 6168 }, { "epoch": 0.42916275348707783, "grad_norm": 1.2578125, "learning_rate": 0.0012744915659646141, "loss": 1.0997, "step": 6169 }, { "epoch": 0.429232321124213, "grad_norm": 1.2109375, "learning_rate": 0.0012742748811516247, "loss": 0.9804, "step": 6170 }, { "epoch": 0.4293018887613482, "grad_norm": 1.171875, "learning_rate": 0.0012740581824123396, "loss": 0.8845, "step": 6171 }, { "epoch": 0.42937145639848345, "grad_norm": 1.1796875, "learning_rate": 0.0012738414697577609, "loss": 0.9485, "step": 6172 }, { "epoch": 0.4294410240356186, "grad_norm": 1.0859375, "learning_rate": 0.001273624743198893, "loss": 0.6952, "step": 6173 }, { "epoch": 0.42951059167275385, "grad_norm": 1.03125, "learning_rate": 0.0012734080027467399, "loss": 0.7642, "step": 6174 }, { "epoch": 0.429580159309889, "grad_norm": 0.953125, "learning_rate": 0.001273191248412306, "loss": 0.6815, "step": 6175 }, { "epoch": 0.42964972694702425, "grad_norm": 1.0, "learning_rate": 0.001272974480206598, "loss": 0.8678, "step": 6176 }, { "epoch": 0.4297192945841595, "grad_norm": 1.265625, "learning_rate": 0.0012727576981406215, "loss": 1.0667, "step": 6177 }, { "epoch": 0.42978886222129464, "grad_norm": 0.99609375, "learning_rate": 0.0012725409022253842, "loss": 0.9059, "step": 6178 }, { "epoch": 0.42985842985842987, "grad_norm": 1.3984375, "learning_rate": 0.001272324092471893, "loss": 0.77, "step": 6179 }, { "epoch": 0.42992799749556504, "grad_norm": 1.125, "learning_rate": 0.0012721072688911576, "loss": 0.8317, "step": 6180 }, { "epoch": 0.42999756513270027, "grad_norm": 0.9921875, "learning_rate": 0.0012718904314941866, "loss": 0.5463, "step": 6181 }, { "epoch": 0.4300671327698355, "grad_norm": 1.359375, "learning_rate": 0.0012716735802919894, "loss": 0.7145, "step": 6182 }, { "epoch": 0.43013670040697066, "grad_norm": 1.1875, "learning_rate": 0.0012714567152955776, "loss": 0.9652, "step": 6183 }, { "epoch": 0.4302062680441059, "grad_norm": 1.15625, "learning_rate": 0.0012712398365159617, "loss": 0.9703, "step": 6184 }, { "epoch": 0.43027583568124106, "grad_norm": 1.078125, "learning_rate": 0.0012710229439641544, "loss": 0.8048, "step": 6185 }, { "epoch": 0.4303454033183763, "grad_norm": 1.1171875, "learning_rate": 0.0012708060376511677, "loss": 0.8256, "step": 6186 }, { "epoch": 0.4304149709555115, "grad_norm": 1.203125, "learning_rate": 0.0012705891175880156, "loss": 1.0206, "step": 6187 }, { "epoch": 0.4304845385926467, "grad_norm": 1.328125, "learning_rate": 0.0012703721837857118, "loss": 0.6895, "step": 6188 }, { "epoch": 0.4305541062297819, "grad_norm": 1.1640625, "learning_rate": 0.0012701552362552714, "loss": 0.7009, "step": 6189 }, { "epoch": 0.43062367386691713, "grad_norm": 1.109375, "learning_rate": 0.0012699382750077102, "loss": 0.7006, "step": 6190 }, { "epoch": 0.4306932415040523, "grad_norm": 1.296875, "learning_rate": 0.0012697213000540434, "loss": 0.9176, "step": 6191 }, { "epoch": 0.43076280914118753, "grad_norm": 0.99609375, "learning_rate": 0.0012695043114052886, "loss": 0.7925, "step": 6192 }, { "epoch": 0.4308323767783227, "grad_norm": 1.2109375, "learning_rate": 0.0012692873090724632, "loss": 0.7769, "step": 6193 }, { "epoch": 0.4309019444154579, "grad_norm": 1.3125, "learning_rate": 0.001269070293066586, "loss": 1.0246, "step": 6194 }, { "epoch": 0.43097151205259315, "grad_norm": 1.0078125, "learning_rate": 0.001268853263398675, "loss": 0.9114, "step": 6195 }, { "epoch": 0.4310410796897283, "grad_norm": 1.109375, "learning_rate": 0.0012686362200797507, "loss": 0.6292, "step": 6196 }, { "epoch": 0.43111064732686355, "grad_norm": 1.0546875, "learning_rate": 0.0012684191631208333, "loss": 0.8646, "step": 6197 }, { "epoch": 0.4311802149639987, "grad_norm": 1.3671875, "learning_rate": 0.0012682020925329433, "loss": 0.9575, "step": 6198 }, { "epoch": 0.43124978260113395, "grad_norm": 1.078125, "learning_rate": 0.0012679850083271034, "loss": 0.9149, "step": 6199 }, { "epoch": 0.4313193502382692, "grad_norm": 1.046875, "learning_rate": 0.0012677679105143349, "loss": 0.9711, "step": 6200 }, { "epoch": 0.43138891787540434, "grad_norm": 0.8046875, "learning_rate": 0.0012675507991056622, "loss": 0.644, "step": 6201 }, { "epoch": 0.43145848551253957, "grad_norm": 1.2265625, "learning_rate": 0.001267333674112108, "loss": 0.706, "step": 6202 }, { "epoch": 0.4315280531496748, "grad_norm": 1.171875, "learning_rate": 0.0012671165355446973, "loss": 0.7567, "step": 6203 }, { "epoch": 0.43159762078680997, "grad_norm": 1.0859375, "learning_rate": 0.0012668993834144555, "loss": 0.9832, "step": 6204 }, { "epoch": 0.4316671884239452, "grad_norm": 1.2734375, "learning_rate": 0.0012666822177324082, "loss": 0.8723, "step": 6205 }, { "epoch": 0.43173675606108036, "grad_norm": 0.98828125, "learning_rate": 0.0012664650385095825, "loss": 0.8979, "step": 6206 }, { "epoch": 0.4318063236982156, "grad_norm": 0.8359375, "learning_rate": 0.0012662478457570044, "loss": 0.7803, "step": 6207 }, { "epoch": 0.4318758913353508, "grad_norm": 1.765625, "learning_rate": 0.0012660306394857033, "loss": 1.0101, "step": 6208 }, { "epoch": 0.431945458972486, "grad_norm": 1.4140625, "learning_rate": 0.0012658134197067069, "loss": 0.778, "step": 6209 }, { "epoch": 0.4320150266096212, "grad_norm": 0.953125, "learning_rate": 0.001265596186431045, "loss": 0.8064, "step": 6210 }, { "epoch": 0.4320845942467564, "grad_norm": 1.1015625, "learning_rate": 0.0012653789396697476, "loss": 0.9607, "step": 6211 }, { "epoch": 0.4321541618838916, "grad_norm": 1.0625, "learning_rate": 0.0012651616794338448, "loss": 0.9648, "step": 6212 }, { "epoch": 0.43222372952102683, "grad_norm": 1.1484375, "learning_rate": 0.0012649444057343691, "loss": 0.7897, "step": 6213 }, { "epoch": 0.432293297158162, "grad_norm": 1.0703125, "learning_rate": 0.0012647271185823512, "loss": 0.9157, "step": 6214 }, { "epoch": 0.43236286479529723, "grad_norm": 0.86328125, "learning_rate": 0.001264509817988825, "loss": 0.5547, "step": 6215 }, { "epoch": 0.43243243243243246, "grad_norm": 0.9375, "learning_rate": 0.0012642925039648232, "loss": 0.7265, "step": 6216 }, { "epoch": 0.4325020000695676, "grad_norm": 1.203125, "learning_rate": 0.0012640751765213803, "loss": 1.0178, "step": 6217 }, { "epoch": 0.43257156770670285, "grad_norm": 0.95703125, "learning_rate": 0.001263857835669531, "loss": 0.9097, "step": 6218 }, { "epoch": 0.432641135343838, "grad_norm": 0.9609375, "learning_rate": 0.0012636404814203106, "loss": 0.6847, "step": 6219 }, { "epoch": 0.43271070298097325, "grad_norm": 1.0390625, "learning_rate": 0.0012634231137847556, "loss": 1.0189, "step": 6220 }, { "epoch": 0.4327802706181085, "grad_norm": 0.8671875, "learning_rate": 0.0012632057327739026, "loss": 0.8217, "step": 6221 }, { "epoch": 0.43284983825524365, "grad_norm": 1.234375, "learning_rate": 0.0012629883383987893, "loss": 0.7257, "step": 6222 }, { "epoch": 0.4329194058923789, "grad_norm": 1.1328125, "learning_rate": 0.0012627709306704533, "loss": 0.8535, "step": 6223 }, { "epoch": 0.43298897352951404, "grad_norm": 1.3203125, "learning_rate": 0.0012625535095999341, "loss": 1.0538, "step": 6224 }, { "epoch": 0.43305854116664927, "grad_norm": 1.3984375, "learning_rate": 0.0012623360751982712, "loss": 1.0261, "step": 6225 }, { "epoch": 0.4331281088037845, "grad_norm": 1.390625, "learning_rate": 0.0012621186274765044, "loss": 0.8567, "step": 6226 }, { "epoch": 0.43319767644091967, "grad_norm": 1.6328125, "learning_rate": 0.001261901166445675, "loss": 1.0199, "step": 6227 }, { "epoch": 0.4332672440780549, "grad_norm": 1.203125, "learning_rate": 0.0012616836921168243, "loss": 0.8066, "step": 6228 }, { "epoch": 0.4333368117151901, "grad_norm": 1.015625, "learning_rate": 0.0012614662045009953, "loss": 0.7862, "step": 6229 }, { "epoch": 0.4334063793523253, "grad_norm": 1.15625, "learning_rate": 0.0012612487036092297, "loss": 0.8326, "step": 6230 }, { "epoch": 0.4334759469894605, "grad_norm": 0.95703125, "learning_rate": 0.0012610311894525718, "loss": 0.8385, "step": 6231 }, { "epoch": 0.4335455146265957, "grad_norm": 1.25, "learning_rate": 0.001260813662042066, "loss": 0.9319, "step": 6232 }, { "epoch": 0.4336150822637309, "grad_norm": 1.1640625, "learning_rate": 0.001260596121388757, "loss": 0.7254, "step": 6233 }, { "epoch": 0.43368464990086614, "grad_norm": 1.109375, "learning_rate": 0.0012603785675036905, "loss": 0.8268, "step": 6234 }, { "epoch": 0.4337542175380013, "grad_norm": 1.0859375, "learning_rate": 0.0012601610003979125, "loss": 0.9525, "step": 6235 }, { "epoch": 0.43382378517513653, "grad_norm": 0.9453125, "learning_rate": 0.0012599434200824705, "loss": 0.7402, "step": 6236 }, { "epoch": 0.4338933528122717, "grad_norm": 1.2421875, "learning_rate": 0.0012597258265684118, "loss": 0.8009, "step": 6237 }, { "epoch": 0.43396292044940693, "grad_norm": 1.140625, "learning_rate": 0.0012595082198667846, "loss": 0.8976, "step": 6238 }, { "epoch": 0.43403248808654216, "grad_norm": 1.0625, "learning_rate": 0.001259290599988638, "loss": 0.8552, "step": 6239 }, { "epoch": 0.4341020557236773, "grad_norm": 1.3984375, "learning_rate": 0.0012590729669450219, "loss": 1.1349, "step": 6240 }, { "epoch": 0.43417162336081255, "grad_norm": 1.640625, "learning_rate": 0.001258855320746986, "loss": 0.9319, "step": 6241 }, { "epoch": 0.4342411909979478, "grad_norm": 1.3046875, "learning_rate": 0.001258637661405582, "loss": 1.2469, "step": 6242 }, { "epoch": 0.43431075863508295, "grad_norm": 0.8828125, "learning_rate": 0.0012584199889318609, "loss": 0.7951, "step": 6243 }, { "epoch": 0.4343803262722182, "grad_norm": 1.015625, "learning_rate": 0.0012582023033368755, "loss": 0.5938, "step": 6244 }, { "epoch": 0.43444989390935335, "grad_norm": 1.0078125, "learning_rate": 0.0012579846046316782, "loss": 0.9024, "step": 6245 }, { "epoch": 0.4345194615464886, "grad_norm": 1.078125, "learning_rate": 0.0012577668928273234, "loss": 1.0139, "step": 6246 }, { "epoch": 0.4345890291836238, "grad_norm": 1.3203125, "learning_rate": 0.001257549167934865, "loss": 0.9458, "step": 6247 }, { "epoch": 0.43465859682075897, "grad_norm": 0.9921875, "learning_rate": 0.0012573314299653578, "loss": 0.8125, "step": 6248 }, { "epoch": 0.4347281644578942, "grad_norm": 1.03125, "learning_rate": 0.0012571136789298579, "loss": 0.8901, "step": 6249 }, { "epoch": 0.43479773209502937, "grad_norm": 1.03125, "learning_rate": 0.0012568959148394213, "loss": 0.6321, "step": 6250 }, { "epoch": 0.4348672997321646, "grad_norm": 1.2578125, "learning_rate": 0.0012566781377051047, "loss": 0.934, "step": 6251 }, { "epoch": 0.4349368673692998, "grad_norm": 1.0078125, "learning_rate": 0.0012564603475379663, "loss": 0.6629, "step": 6252 }, { "epoch": 0.435006435006435, "grad_norm": 1.09375, "learning_rate": 0.001256242544349064, "loss": 0.9645, "step": 6253 }, { "epoch": 0.4350760026435702, "grad_norm": 1.1796875, "learning_rate": 0.0012560247281494569, "loss": 0.8213, "step": 6254 }, { "epoch": 0.43514557028070544, "grad_norm": 1.234375, "learning_rate": 0.0012558068989502044, "loss": 0.9691, "step": 6255 }, { "epoch": 0.4352151379178406, "grad_norm": 1.296875, "learning_rate": 0.0012555890567623668, "loss": 0.9168, "step": 6256 }, { "epoch": 0.43528470555497584, "grad_norm": 1.6484375, "learning_rate": 0.0012553712015970055, "loss": 0.9164, "step": 6257 }, { "epoch": 0.435354273192111, "grad_norm": 1.0625, "learning_rate": 0.0012551533334651816, "loss": 0.7233, "step": 6258 }, { "epoch": 0.43542384082924623, "grad_norm": 1.125, "learning_rate": 0.0012549354523779578, "loss": 0.6932, "step": 6259 }, { "epoch": 0.43549340846638146, "grad_norm": 1.75, "learning_rate": 0.0012547175583463963, "loss": 1.0742, "step": 6260 }, { "epoch": 0.43556297610351663, "grad_norm": 1.3125, "learning_rate": 0.0012544996513815614, "loss": 0.9574, "step": 6261 }, { "epoch": 0.43563254374065186, "grad_norm": 0.9375, "learning_rate": 0.0012542817314945168, "loss": 0.8548, "step": 6262 }, { "epoch": 0.435702111377787, "grad_norm": 1.234375, "learning_rate": 0.0012540637986963275, "loss": 0.8449, "step": 6263 }, { "epoch": 0.43577167901492225, "grad_norm": 1.203125, "learning_rate": 0.001253845852998059, "loss": 0.9364, "step": 6264 }, { "epoch": 0.4358412466520575, "grad_norm": 1.203125, "learning_rate": 0.0012536278944107776, "loss": 0.8772, "step": 6265 }, { "epoch": 0.43591081428919265, "grad_norm": 1.2578125, "learning_rate": 0.0012534099229455505, "loss": 1.127, "step": 6266 }, { "epoch": 0.4359803819263279, "grad_norm": 0.94140625, "learning_rate": 0.0012531919386134444, "loss": 0.7243, "step": 6267 }, { "epoch": 0.4360499495634631, "grad_norm": 1.1875, "learning_rate": 0.001252973941425528, "loss": 1.2287, "step": 6268 }, { "epoch": 0.4361195172005983, "grad_norm": 0.98046875, "learning_rate": 0.0012527559313928699, "loss": 0.7078, "step": 6269 }, { "epoch": 0.4361890848377335, "grad_norm": 1.0546875, "learning_rate": 0.0012525379085265393, "loss": 0.8298, "step": 6270 }, { "epoch": 0.43625865247486867, "grad_norm": 1.0234375, "learning_rate": 0.0012523198728376069, "loss": 0.9078, "step": 6271 }, { "epoch": 0.4363282201120039, "grad_norm": 1.0, "learning_rate": 0.001252101824337143, "loss": 0.898, "step": 6272 }, { "epoch": 0.4363977877491391, "grad_norm": 1.1171875, "learning_rate": 0.0012518837630362194, "loss": 1.1462, "step": 6273 }, { "epoch": 0.4364673553862743, "grad_norm": 1.1171875, "learning_rate": 0.0012516656889459078, "loss": 0.8964, "step": 6274 }, { "epoch": 0.4365369230234095, "grad_norm": 1.0625, "learning_rate": 0.0012514476020772808, "loss": 0.9142, "step": 6275 }, { "epoch": 0.4366064906605447, "grad_norm": 1.109375, "learning_rate": 0.001251229502441412, "loss": 1.0183, "step": 6276 }, { "epoch": 0.4366760582976799, "grad_norm": 1.0625, "learning_rate": 0.0012510113900493756, "loss": 0.7949, "step": 6277 }, { "epoch": 0.43674562593481514, "grad_norm": 1.0390625, "learning_rate": 0.0012507932649122458, "loss": 0.9381, "step": 6278 }, { "epoch": 0.4368151935719503, "grad_norm": 1.2421875, "learning_rate": 0.0012505751270410982, "loss": 0.9689, "step": 6279 }, { "epoch": 0.43688476120908554, "grad_norm": 0.859375, "learning_rate": 0.0012503569764470085, "loss": 0.8015, "step": 6280 }, { "epoch": 0.43695432884622076, "grad_norm": 0.984375, "learning_rate": 0.0012501388131410537, "loss": 0.6995, "step": 6281 }, { "epoch": 0.43702389648335593, "grad_norm": 1.0390625, "learning_rate": 0.0012499206371343104, "loss": 0.7622, "step": 6282 }, { "epoch": 0.43709346412049116, "grad_norm": 0.984375, "learning_rate": 0.001249702448437857, "loss": 0.8139, "step": 6283 }, { "epoch": 0.43716303175762633, "grad_norm": 1.1640625, "learning_rate": 0.0012494842470627719, "loss": 0.914, "step": 6284 }, { "epoch": 0.43723259939476156, "grad_norm": 0.96875, "learning_rate": 0.0012492660330201341, "loss": 0.7262, "step": 6285 }, { "epoch": 0.4373021670318968, "grad_norm": 1.109375, "learning_rate": 0.0012490478063210237, "loss": 1.0207, "step": 6286 }, { "epoch": 0.43737173466903195, "grad_norm": 0.86328125, "learning_rate": 0.001248829566976521, "loss": 0.8092, "step": 6287 }, { "epoch": 0.4374413023061672, "grad_norm": 1.0625, "learning_rate": 0.001248611314997707, "loss": 0.6535, "step": 6288 }, { "epoch": 0.43751086994330235, "grad_norm": 1.1171875, "learning_rate": 0.0012483930503956635, "loss": 0.9891, "step": 6289 }, { "epoch": 0.4375804375804376, "grad_norm": 1.140625, "learning_rate": 0.001248174773181473, "loss": 0.9172, "step": 6290 }, { "epoch": 0.4376500052175728, "grad_norm": 1.0546875, "learning_rate": 0.0012479564833662185, "loss": 0.6461, "step": 6291 }, { "epoch": 0.437719572854708, "grad_norm": 1.140625, "learning_rate": 0.0012477381809609834, "loss": 0.7959, "step": 6292 }, { "epoch": 0.4377891404918432, "grad_norm": 1.140625, "learning_rate": 0.0012475198659768522, "loss": 0.8386, "step": 6293 }, { "epoch": 0.4378587081289784, "grad_norm": 1.2421875, "learning_rate": 0.0012473015384249096, "loss": 0.8491, "step": 6294 }, { "epoch": 0.4379282757661136, "grad_norm": 1.1875, "learning_rate": 0.0012470831983162416, "loss": 0.9507, "step": 6295 }, { "epoch": 0.4379978434032488, "grad_norm": 1.6328125, "learning_rate": 0.001246864845661934, "loss": 1.2016, "step": 6296 }, { "epoch": 0.438067411040384, "grad_norm": 1.1796875, "learning_rate": 0.001246646480473074, "loss": 0.8521, "step": 6297 }, { "epoch": 0.4381369786775192, "grad_norm": 1.1171875, "learning_rate": 0.0012464281027607489, "loss": 0.7937, "step": 6298 }, { "epoch": 0.43820654631465444, "grad_norm": 1.0859375, "learning_rate": 0.0012462097125360467, "loss": 0.7912, "step": 6299 }, { "epoch": 0.4382761139517896, "grad_norm": 1.15625, "learning_rate": 0.0012459913098100566, "loss": 0.8754, "step": 6300 }, { "epoch": 0.43834568158892484, "grad_norm": 1.046875, "learning_rate": 0.0012457728945938673, "loss": 0.6109, "step": 6301 }, { "epoch": 0.43841524922606, "grad_norm": 1.21875, "learning_rate": 0.0012455544668985693, "loss": 0.8394, "step": 6302 }, { "epoch": 0.43848481686319524, "grad_norm": 1.3359375, "learning_rate": 0.0012453360267352534, "loss": 1.0656, "step": 6303 }, { "epoch": 0.43855438450033046, "grad_norm": 1.1796875, "learning_rate": 0.0012451175741150105, "loss": 0.8723, "step": 6304 }, { "epoch": 0.43862395213746563, "grad_norm": 1.1015625, "learning_rate": 0.0012448991090489325, "loss": 0.6628, "step": 6305 }, { "epoch": 0.43869351977460086, "grad_norm": 1.2265625, "learning_rate": 0.0012446806315481124, "loss": 0.7501, "step": 6306 }, { "epoch": 0.4387630874117361, "grad_norm": 1.078125, "learning_rate": 0.0012444621416236427, "loss": 1.0503, "step": 6307 }, { "epoch": 0.43883265504887126, "grad_norm": 1.1484375, "learning_rate": 0.0012442436392866181, "loss": 0.6441, "step": 6308 }, { "epoch": 0.4389022226860065, "grad_norm": 1.1796875, "learning_rate": 0.0012440251245481324, "loss": 1.0388, "step": 6309 }, { "epoch": 0.43897179032314165, "grad_norm": 0.8984375, "learning_rate": 0.0012438065974192808, "loss": 0.7253, "step": 6310 }, { "epoch": 0.4390413579602769, "grad_norm": 1.296875, "learning_rate": 0.001243588057911159, "loss": 0.8736, "step": 6311 }, { "epoch": 0.4391109255974121, "grad_norm": 0.96484375, "learning_rate": 0.0012433695060348636, "loss": 0.9115, "step": 6312 }, { "epoch": 0.4391804932345473, "grad_norm": 0.8828125, "learning_rate": 0.0012431509418014913, "loss": 0.8458, "step": 6313 }, { "epoch": 0.4392500608716825, "grad_norm": 1.1953125, "learning_rate": 0.0012429323652221396, "loss": 0.9502, "step": 6314 }, { "epoch": 0.43931962850881767, "grad_norm": 1.2734375, "learning_rate": 0.001242713776307907, "loss": 1.1474, "step": 6315 }, { "epoch": 0.4393891961459529, "grad_norm": 1.203125, "learning_rate": 0.001242495175069892, "loss": 0.8861, "step": 6316 }, { "epoch": 0.4394587637830881, "grad_norm": 0.9375, "learning_rate": 0.0012422765615191947, "loss": 0.7672, "step": 6317 }, { "epoch": 0.4395283314202233, "grad_norm": 1.171875, "learning_rate": 0.0012420579356669144, "loss": 0.7132, "step": 6318 }, { "epoch": 0.4395978990573585, "grad_norm": 1.2109375, "learning_rate": 0.0012418392975241522, "loss": 1.0153, "step": 6319 }, { "epoch": 0.43966746669449375, "grad_norm": 1.1640625, "learning_rate": 0.0012416206471020095, "loss": 0.7832, "step": 6320 }, { "epoch": 0.4397370343316289, "grad_norm": 1.1484375, "learning_rate": 0.0012414019844115883, "loss": 1.0355, "step": 6321 }, { "epoch": 0.43980660196876414, "grad_norm": 1.0546875, "learning_rate": 0.001241183309463991, "loss": 0.8978, "step": 6322 }, { "epoch": 0.4398761696058993, "grad_norm": 1.15625, "learning_rate": 0.001240964622270321, "loss": 0.8164, "step": 6323 }, { "epoch": 0.43994573724303454, "grad_norm": 1.109375, "learning_rate": 0.0012407459228416819, "loss": 1.0299, "step": 6324 }, { "epoch": 0.44001530488016977, "grad_norm": 0.9765625, "learning_rate": 0.0012405272111891783, "loss": 0.7, "step": 6325 }, { "epoch": 0.44008487251730494, "grad_norm": 1.015625, "learning_rate": 0.0012403084873239152, "loss": 0.7141, "step": 6326 }, { "epoch": 0.44015444015444016, "grad_norm": 1.234375, "learning_rate": 0.0012400897512569987, "loss": 1.0925, "step": 6327 }, { "epoch": 0.44022400779157533, "grad_norm": 1.2890625, "learning_rate": 0.0012398710029995345, "loss": 1.0427, "step": 6328 }, { "epoch": 0.44029357542871056, "grad_norm": 1.390625, "learning_rate": 0.0012396522425626299, "loss": 0.8871, "step": 6329 }, { "epoch": 0.4403631430658458, "grad_norm": 1.1640625, "learning_rate": 0.001239433469957392, "loss": 0.9091, "step": 6330 }, { "epoch": 0.44043271070298096, "grad_norm": 1.1796875, "learning_rate": 0.0012392146851949296, "loss": 0.6989, "step": 6331 }, { "epoch": 0.4405022783401162, "grad_norm": 1.2265625, "learning_rate": 0.0012389958882863515, "loss": 0.5548, "step": 6332 }, { "epoch": 0.4405718459772514, "grad_norm": 0.90625, "learning_rate": 0.0012387770792427664, "loss": 0.8693, "step": 6333 }, { "epoch": 0.4406414136143866, "grad_norm": 1.1328125, "learning_rate": 0.001238558258075285, "loss": 0.7976, "step": 6334 }, { "epoch": 0.4407109812515218, "grad_norm": 1.03125, "learning_rate": 0.0012383394247950175, "loss": 0.7941, "step": 6335 }, { "epoch": 0.440780548888657, "grad_norm": 1.078125, "learning_rate": 0.0012381205794130754, "loss": 0.7462, "step": 6336 }, { "epoch": 0.4408501165257922, "grad_norm": 0.9140625, "learning_rate": 0.0012379017219405705, "loss": 0.7067, "step": 6337 }, { "epoch": 0.4409196841629274, "grad_norm": 1.0859375, "learning_rate": 0.0012376828523886151, "loss": 0.948, "step": 6338 }, { "epoch": 0.4409892518000626, "grad_norm": 0.81640625, "learning_rate": 0.0012374639707683228, "loss": 0.7899, "step": 6339 }, { "epoch": 0.4410588194371978, "grad_norm": 1.0625, "learning_rate": 0.0012372450770908067, "loss": 0.8701, "step": 6340 }, { "epoch": 0.441128387074333, "grad_norm": 1.0703125, "learning_rate": 0.0012370261713671817, "loss": 0.8288, "step": 6341 }, { "epoch": 0.4411979547114682, "grad_norm": 1.1171875, "learning_rate": 0.001236807253608562, "loss": 0.8467, "step": 6342 }, { "epoch": 0.44126752234860345, "grad_norm": 1.0859375, "learning_rate": 0.001236588323826064, "loss": 0.934, "step": 6343 }, { "epoch": 0.4413370899857386, "grad_norm": 1.078125, "learning_rate": 0.0012363693820308032, "loss": 0.7287, "step": 6344 }, { "epoch": 0.44140665762287384, "grad_norm": 1.0390625, "learning_rate": 0.0012361504282338964, "loss": 0.7829, "step": 6345 }, { "epoch": 0.44147622526000907, "grad_norm": 1.1796875, "learning_rate": 0.0012359314624464616, "loss": 0.9451, "step": 6346 }, { "epoch": 0.44154579289714424, "grad_norm": 1.3203125, "learning_rate": 0.001235712484679616, "loss": 0.7119, "step": 6347 }, { "epoch": 0.44161536053427947, "grad_norm": 1.4765625, "learning_rate": 0.0012354934949444785, "loss": 1.2297, "step": 6348 }, { "epoch": 0.44168492817141464, "grad_norm": 1.0234375, "learning_rate": 0.001235274493252168, "loss": 0.8412, "step": 6349 }, { "epoch": 0.44175449580854986, "grad_norm": 0.88671875, "learning_rate": 0.0012350554796138051, "loss": 0.7168, "step": 6350 }, { "epoch": 0.4418240634456851, "grad_norm": 1.1484375, "learning_rate": 0.0012348364540405096, "loss": 0.8528, "step": 6351 }, { "epoch": 0.44189363108282026, "grad_norm": 1.09375, "learning_rate": 0.0012346174165434026, "loss": 0.7164, "step": 6352 }, { "epoch": 0.4419631987199555, "grad_norm": 1.28125, "learning_rate": 0.0012343983671336057, "loss": 1.1093, "step": 6353 }, { "epoch": 0.44203276635709066, "grad_norm": 0.96875, "learning_rate": 0.0012341793058222412, "loss": 0.6447, "step": 6354 }, { "epoch": 0.4421023339942259, "grad_norm": 0.9765625, "learning_rate": 0.001233960232620432, "loss": 0.7405, "step": 6355 }, { "epoch": 0.4421719016313611, "grad_norm": 1.0, "learning_rate": 0.001233741147539301, "loss": 0.7226, "step": 6356 }, { "epoch": 0.4422414692684963, "grad_norm": 0.96484375, "learning_rate": 0.001233522050589973, "loss": 0.7298, "step": 6357 }, { "epoch": 0.4423110369056315, "grad_norm": 1.0859375, "learning_rate": 0.0012333029417835725, "loss": 0.8299, "step": 6358 }, { "epoch": 0.44238060454276673, "grad_norm": 0.87109375, "learning_rate": 0.0012330838211312243, "loss": 0.6317, "step": 6359 }, { "epoch": 0.4424501721799019, "grad_norm": 1.125, "learning_rate": 0.0012328646886440547, "loss": 0.9076, "step": 6360 }, { "epoch": 0.4425197398170371, "grad_norm": 1.265625, "learning_rate": 0.0012326455443331897, "loss": 0.98, "step": 6361 }, { "epoch": 0.4425893074541723, "grad_norm": 1.0625, "learning_rate": 0.0012324263882097567, "loss": 0.8287, "step": 6362 }, { "epoch": 0.4426588750913075, "grad_norm": 1.3046875, "learning_rate": 0.0012322072202848831, "loss": 0.9169, "step": 6363 }, { "epoch": 0.44272844272844275, "grad_norm": 1.15625, "learning_rate": 0.0012319880405696974, "loss": 1.0304, "step": 6364 }, { "epoch": 0.4427980103655779, "grad_norm": 0.921875, "learning_rate": 0.0012317688490753281, "loss": 0.7497, "step": 6365 }, { "epoch": 0.44286757800271315, "grad_norm": 1.078125, "learning_rate": 0.0012315496458129053, "loss": 0.8016, "step": 6366 }, { "epoch": 0.4429371456398483, "grad_norm": 1.375, "learning_rate": 0.0012313304307935583, "loss": 1.0096, "step": 6367 }, { "epoch": 0.44300671327698354, "grad_norm": 1.25, "learning_rate": 0.001231111204028418, "loss": 0.7129, "step": 6368 }, { "epoch": 0.44307628091411877, "grad_norm": 0.9609375, "learning_rate": 0.0012308919655286154, "loss": 0.6975, "step": 6369 }, { "epoch": 0.44314584855125394, "grad_norm": 0.97265625, "learning_rate": 0.001230672715305283, "loss": 0.9218, "step": 6370 }, { "epoch": 0.44321541618838917, "grad_norm": 1.1796875, "learning_rate": 0.0012304534533695527, "loss": 0.9669, "step": 6371 }, { "epoch": 0.4432849838255244, "grad_norm": 1.1953125, "learning_rate": 0.0012302341797325572, "loss": 0.7856, "step": 6372 }, { "epoch": 0.44335455146265956, "grad_norm": 1.203125, "learning_rate": 0.001230014894405431, "loss": 0.7618, "step": 6373 }, { "epoch": 0.4434241190997948, "grad_norm": 1.140625, "learning_rate": 0.0012297955973993076, "loss": 0.8446, "step": 6374 }, { "epoch": 0.44349368673692996, "grad_norm": 0.9453125, "learning_rate": 0.001229576288725322, "loss": 0.8408, "step": 6375 }, { "epoch": 0.4435632543740652, "grad_norm": 1.75, "learning_rate": 0.00122935696839461, "loss": 0.9296, "step": 6376 }, { "epoch": 0.4436328220112004, "grad_norm": 1.0078125, "learning_rate": 0.0012291376364183069, "loss": 0.6171, "step": 6377 }, { "epoch": 0.4437023896483356, "grad_norm": 1.1796875, "learning_rate": 0.0012289182928075495, "loss": 0.7637, "step": 6378 }, { "epoch": 0.4437719572854708, "grad_norm": 1.15625, "learning_rate": 0.0012286989375734749, "loss": 0.697, "step": 6379 }, { "epoch": 0.443841524922606, "grad_norm": 0.94921875, "learning_rate": 0.0012284795707272213, "loss": 0.7791, "step": 6380 }, { "epoch": 0.4439110925597412, "grad_norm": 1.3046875, "learning_rate": 0.0012282601922799263, "loss": 0.9094, "step": 6381 }, { "epoch": 0.44398066019687643, "grad_norm": 0.94140625, "learning_rate": 0.0012280408022427298, "loss": 0.7983, "step": 6382 }, { "epoch": 0.4440502278340116, "grad_norm": 1.0546875, "learning_rate": 0.0012278214006267705, "loss": 0.872, "step": 6383 }, { "epoch": 0.4441197954711468, "grad_norm": 0.93359375, "learning_rate": 0.0012276019874431887, "loss": 0.7685, "step": 6384 }, { "epoch": 0.44418936310828205, "grad_norm": 1.09375, "learning_rate": 0.0012273825627031254, "loss": 0.8077, "step": 6385 }, { "epoch": 0.4442589307454172, "grad_norm": 1.0625, "learning_rate": 0.0012271631264177212, "loss": 0.7381, "step": 6386 }, { "epoch": 0.44432849838255245, "grad_norm": 0.94921875, "learning_rate": 0.001226943678598119, "loss": 0.7848, "step": 6387 }, { "epoch": 0.4443980660196876, "grad_norm": 1.046875, "learning_rate": 0.0012267242192554601, "loss": 0.9076, "step": 6388 }, { "epoch": 0.44446763365682285, "grad_norm": 1.0078125, "learning_rate": 0.0012265047484008886, "loss": 0.9831, "step": 6389 }, { "epoch": 0.4445372012939581, "grad_norm": 1.53125, "learning_rate": 0.0012262852660455477, "loss": 1.0663, "step": 6390 }, { "epoch": 0.44460676893109324, "grad_norm": 1.1171875, "learning_rate": 0.0012260657722005812, "loss": 0.8143, "step": 6391 }, { "epoch": 0.44467633656822847, "grad_norm": 0.9921875, "learning_rate": 0.0012258462668771344, "loss": 0.6318, "step": 6392 }, { "epoch": 0.44474590420536364, "grad_norm": 1.0078125, "learning_rate": 0.0012256267500863522, "loss": 0.7823, "step": 6393 }, { "epoch": 0.44481547184249887, "grad_norm": 1.125, "learning_rate": 0.0012254072218393815, "loss": 0.8409, "step": 6394 }, { "epoch": 0.4448850394796341, "grad_norm": 1.3984375, "learning_rate": 0.0012251876821473676, "loss": 0.9326, "step": 6395 }, { "epoch": 0.44495460711676926, "grad_norm": 1.1796875, "learning_rate": 0.001224968131021459, "loss": 0.8727, "step": 6396 }, { "epoch": 0.4450241747539045, "grad_norm": 0.94140625, "learning_rate": 0.0012247485684728017, "loss": 0.6485, "step": 6397 }, { "epoch": 0.4450937423910397, "grad_norm": 0.98828125, "learning_rate": 0.0012245289945125458, "loss": 0.8116, "step": 6398 }, { "epoch": 0.4451633100281749, "grad_norm": 0.98046875, "learning_rate": 0.0012243094091518387, "loss": 0.5843, "step": 6399 }, { "epoch": 0.4452328776653101, "grad_norm": 1.421875, "learning_rate": 0.0012240898124018303, "loss": 0.6947, "step": 6400 }, { "epoch": 0.4453024453024453, "grad_norm": 1.2890625, "learning_rate": 0.001223870204273671, "loss": 0.9957, "step": 6401 }, { "epoch": 0.4453720129395805, "grad_norm": 1.2265625, "learning_rate": 0.0012236505847785112, "loss": 0.902, "step": 6402 }, { "epoch": 0.44544158057671573, "grad_norm": 1.2578125, "learning_rate": 0.0012234309539275018, "loss": 0.857, "step": 6403 }, { "epoch": 0.4455111482138509, "grad_norm": 1.40625, "learning_rate": 0.0012232113117317948, "loss": 0.7336, "step": 6404 }, { "epoch": 0.44558071585098613, "grad_norm": 1.2421875, "learning_rate": 0.0012229916582025427, "loss": 1.2174, "step": 6405 }, { "epoch": 0.4456502834881213, "grad_norm": 1.1328125, "learning_rate": 0.0012227719933508977, "loss": 0.8288, "step": 6406 }, { "epoch": 0.4457198511252565, "grad_norm": 1.203125, "learning_rate": 0.001222552317188014, "loss": 0.8039, "step": 6407 }, { "epoch": 0.44578941876239175, "grad_norm": 1.359375, "learning_rate": 0.0012223326297250453, "loss": 1.055, "step": 6408 }, { "epoch": 0.4458589863995269, "grad_norm": 1.09375, "learning_rate": 0.0012221129309731463, "loss": 0.8263, "step": 6409 }, { "epoch": 0.44592855403666215, "grad_norm": 1.140625, "learning_rate": 0.0012218932209434722, "loss": 0.8756, "step": 6410 }, { "epoch": 0.4459981216737974, "grad_norm": 0.94921875, "learning_rate": 0.0012216734996471788, "loss": 0.8042, "step": 6411 }, { "epoch": 0.44606768931093255, "grad_norm": 0.94921875, "learning_rate": 0.0012214537670954225, "loss": 0.8315, "step": 6412 }, { "epoch": 0.4461372569480678, "grad_norm": 1.359375, "learning_rate": 0.0012212340232993597, "loss": 0.8429, "step": 6413 }, { "epoch": 0.44620682458520294, "grad_norm": 1.0703125, "learning_rate": 0.0012210142682701488, "loss": 0.9472, "step": 6414 }, { "epoch": 0.44627639222233817, "grad_norm": 0.90234375, "learning_rate": 0.0012207945020189473, "loss": 0.8691, "step": 6415 }, { "epoch": 0.4463459598594734, "grad_norm": 1.125, "learning_rate": 0.0012205747245569135, "loss": 0.7448, "step": 6416 }, { "epoch": 0.44641552749660857, "grad_norm": 0.9296875, "learning_rate": 0.0012203549358952076, "loss": 1.11, "step": 6417 }, { "epoch": 0.4464850951337438, "grad_norm": 1.140625, "learning_rate": 0.001220135136044988, "loss": 0.8819, "step": 6418 }, { "epoch": 0.44655466277087896, "grad_norm": 0.95703125, "learning_rate": 0.0012199153250174162, "loss": 0.8179, "step": 6419 }, { "epoch": 0.4466242304080142, "grad_norm": 1.015625, "learning_rate": 0.0012196955028236523, "loss": 0.9889, "step": 6420 }, { "epoch": 0.4466937980451494, "grad_norm": 1.0625, "learning_rate": 0.0012194756694748586, "loss": 0.8049, "step": 6421 }, { "epoch": 0.4467633656822846, "grad_norm": 1.03125, "learning_rate": 0.0012192558249821963, "loss": 0.9424, "step": 6422 }, { "epoch": 0.4468329333194198, "grad_norm": 1.15625, "learning_rate": 0.0012190359693568284, "loss": 0.9192, "step": 6423 }, { "epoch": 0.44690250095655504, "grad_norm": 1.1015625, "learning_rate": 0.0012188161026099183, "loss": 0.9867, "step": 6424 }, { "epoch": 0.4469720685936902, "grad_norm": 1.3125, "learning_rate": 0.0012185962247526288, "loss": 0.8561, "step": 6425 }, { "epoch": 0.44704163623082543, "grad_norm": 1.078125, "learning_rate": 0.0012183763357961252, "loss": 0.8188, "step": 6426 }, { "epoch": 0.4471112038679606, "grad_norm": 1.25, "learning_rate": 0.001218156435751572, "loss": 0.7588, "step": 6427 }, { "epoch": 0.44718077150509583, "grad_norm": 1.046875, "learning_rate": 0.0012179365246301347, "loss": 0.8457, "step": 6428 }, { "epoch": 0.44725033914223106, "grad_norm": 1.3203125, "learning_rate": 0.0012177166024429787, "loss": 0.9289, "step": 6429 }, { "epoch": 0.4473199067793662, "grad_norm": 0.9453125, "learning_rate": 0.0012174966692012712, "loss": 0.6494, "step": 6430 }, { "epoch": 0.44738947441650145, "grad_norm": 1.25, "learning_rate": 0.0012172767249161796, "loss": 0.8693, "step": 6431 }, { "epoch": 0.4474590420536366, "grad_norm": 1.1640625, "learning_rate": 0.0012170567695988703, "loss": 0.8021, "step": 6432 }, { "epoch": 0.44752860969077185, "grad_norm": 1.109375, "learning_rate": 0.0012168368032605128, "loss": 0.7772, "step": 6433 }, { "epoch": 0.4475981773279071, "grad_norm": 1.171875, "learning_rate": 0.001216616825912275, "loss": 0.9562, "step": 6434 }, { "epoch": 0.44766774496504225, "grad_norm": 1.1484375, "learning_rate": 0.001216396837565327, "loss": 0.9181, "step": 6435 }, { "epoch": 0.4477373126021775, "grad_norm": 1.046875, "learning_rate": 0.001216176838230838, "loss": 0.7663, "step": 6436 }, { "epoch": 0.4478068802393127, "grad_norm": 1.078125, "learning_rate": 0.001215956827919979, "loss": 0.7702, "step": 6437 }, { "epoch": 0.44787644787644787, "grad_norm": 1.09375, "learning_rate": 0.0012157368066439207, "loss": 0.7952, "step": 6438 }, { "epoch": 0.4479460155135831, "grad_norm": 1.171875, "learning_rate": 0.0012155167744138345, "loss": 0.5708, "step": 6439 }, { "epoch": 0.44801558315071827, "grad_norm": 1.0078125, "learning_rate": 0.0012152967312408932, "loss": 1.1361, "step": 6440 }, { "epoch": 0.4480851507878535, "grad_norm": 1.25, "learning_rate": 0.0012150766771362688, "loss": 0.9889, "step": 6441 }, { "epoch": 0.4481547184249887, "grad_norm": 1.0234375, "learning_rate": 0.0012148566121111348, "loss": 0.617, "step": 6442 }, { "epoch": 0.4482242860621239, "grad_norm": 1.09375, "learning_rate": 0.001214636536176665, "loss": 1.0036, "step": 6443 }, { "epoch": 0.4482938536992591, "grad_norm": 1.359375, "learning_rate": 0.001214416449344034, "loss": 0.8777, "step": 6444 }, { "epoch": 0.4483634213363943, "grad_norm": 1.1328125, "learning_rate": 0.001214196351624416, "loss": 0.9089, "step": 6445 }, { "epoch": 0.4484329889735295, "grad_norm": 1.90625, "learning_rate": 0.0012139762430289872, "loss": 1.0769, "step": 6446 }, { "epoch": 0.44850255661066474, "grad_norm": 1.0859375, "learning_rate": 0.0012137561235689234, "loss": 0.8059, "step": 6447 }, { "epoch": 0.4485721242477999, "grad_norm": 0.90625, "learning_rate": 0.0012135359932554006, "loss": 0.6083, "step": 6448 }, { "epoch": 0.44864169188493513, "grad_norm": 1.078125, "learning_rate": 0.001213315852099597, "loss": 0.6459, "step": 6449 }, { "epoch": 0.44871125952207036, "grad_norm": 1.21875, "learning_rate": 0.001213095700112689, "loss": 1.041, "step": 6450 }, { "epoch": 0.44878082715920553, "grad_norm": 0.9765625, "learning_rate": 0.001212875537305856, "loss": 0.8277, "step": 6451 }, { "epoch": 0.44885039479634076, "grad_norm": 1.21875, "learning_rate": 0.0012126553636902758, "loss": 0.7094, "step": 6452 }, { "epoch": 0.4489199624334759, "grad_norm": 1.3515625, "learning_rate": 0.001212435179277128, "loss": 0.9776, "step": 6453 }, { "epoch": 0.44898953007061115, "grad_norm": 1.390625, "learning_rate": 0.0012122149840775932, "loss": 0.9729, "step": 6454 }, { "epoch": 0.4490590977077464, "grad_norm": 1.2265625, "learning_rate": 0.0012119947781028503, "loss": 0.7491, "step": 6455 }, { "epoch": 0.44912866534488155, "grad_norm": 1.2265625, "learning_rate": 0.0012117745613640816, "loss": 0.8354, "step": 6456 }, { "epoch": 0.4491982329820168, "grad_norm": 1.1796875, "learning_rate": 0.001211554333872468, "loss": 0.9233, "step": 6457 }, { "epoch": 0.44926780061915195, "grad_norm": 0.9609375, "learning_rate": 0.0012113340956391916, "loss": 0.6918, "step": 6458 }, { "epoch": 0.4493373682562872, "grad_norm": 0.98046875, "learning_rate": 0.001211113846675435, "loss": 0.7891, "step": 6459 }, { "epoch": 0.4494069358934224, "grad_norm": 1.125, "learning_rate": 0.0012108935869923813, "loss": 0.679, "step": 6460 }, { "epoch": 0.44947650353055757, "grad_norm": 0.890625, "learning_rate": 0.0012106733166012144, "loss": 0.6628, "step": 6461 }, { "epoch": 0.4495460711676928, "grad_norm": 1.234375, "learning_rate": 0.0012104530355131183, "loss": 0.7719, "step": 6462 }, { "epoch": 0.449615638804828, "grad_norm": 0.9765625, "learning_rate": 0.001210232743739278, "loss": 0.8797, "step": 6463 }, { "epoch": 0.4496852064419632, "grad_norm": 1.078125, "learning_rate": 0.001210012441290878, "loss": 1.2471, "step": 6464 }, { "epoch": 0.4497547740790984, "grad_norm": 0.9453125, "learning_rate": 0.0012097921281791057, "loss": 0.57, "step": 6465 }, { "epoch": 0.4498243417162336, "grad_norm": 1.15625, "learning_rate": 0.0012095718044151458, "loss": 0.7033, "step": 6466 }, { "epoch": 0.4498939093533688, "grad_norm": 1.1796875, "learning_rate": 0.0012093514700101864, "loss": 0.9363, "step": 6467 }, { "epoch": 0.44996347699050404, "grad_norm": 1.1640625, "learning_rate": 0.0012091311249754144, "loss": 0.9462, "step": 6468 }, { "epoch": 0.4500330446276392, "grad_norm": 1.03125, "learning_rate": 0.001208910769322018, "loss": 1.047, "step": 6469 }, { "epoch": 0.45010261226477444, "grad_norm": 1.1640625, "learning_rate": 0.0012086904030611859, "loss": 0.7955, "step": 6470 }, { "epoch": 0.4501721799019096, "grad_norm": 1.0703125, "learning_rate": 0.0012084700262041067, "loss": 0.7809, "step": 6471 }, { "epoch": 0.45024174753904483, "grad_norm": 1.296875, "learning_rate": 0.0012082496387619706, "loss": 0.9051, "step": 6472 }, { "epoch": 0.45031131517618006, "grad_norm": 0.94140625, "learning_rate": 0.0012080292407459672, "loss": 0.6128, "step": 6473 }, { "epoch": 0.45038088281331523, "grad_norm": 0.84375, "learning_rate": 0.0012078088321672874, "loss": 0.6237, "step": 6474 }, { "epoch": 0.45045045045045046, "grad_norm": 1.2109375, "learning_rate": 0.001207588413037123, "loss": 0.9595, "step": 6475 }, { "epoch": 0.4505200180875856, "grad_norm": 1.3359375, "learning_rate": 0.001207367983366665, "loss": 0.9554, "step": 6476 }, { "epoch": 0.45058958572472085, "grad_norm": 1.1796875, "learning_rate": 0.0012071475431671066, "loss": 0.8974, "step": 6477 }, { "epoch": 0.4506591533618561, "grad_norm": 1.078125, "learning_rate": 0.0012069270924496393, "loss": 0.7054, "step": 6478 }, { "epoch": 0.45072872099899125, "grad_norm": 1.015625, "learning_rate": 0.0012067066312254579, "loss": 0.6205, "step": 6479 }, { "epoch": 0.4507982886361265, "grad_norm": 1.0859375, "learning_rate": 0.0012064861595057548, "loss": 1.0292, "step": 6480 }, { "epoch": 0.4508678562732617, "grad_norm": 1.046875, "learning_rate": 0.001206265677301726, "loss": 0.7173, "step": 6481 }, { "epoch": 0.4509374239103969, "grad_norm": 1.0234375, "learning_rate": 0.0012060451846245654, "loss": 0.8283, "step": 6482 }, { "epoch": 0.4510069915475321, "grad_norm": 1.1328125, "learning_rate": 0.001205824681485469, "loss": 0.9758, "step": 6483 }, { "epoch": 0.45107655918466727, "grad_norm": 1.0, "learning_rate": 0.0012056041678956326, "loss": 0.7103, "step": 6484 }, { "epoch": 0.4511461268218025, "grad_norm": 1.25, "learning_rate": 0.001205383643866253, "loss": 0.904, "step": 6485 }, { "epoch": 0.4512156944589377, "grad_norm": 0.8203125, "learning_rate": 0.0012051631094085274, "loss": 0.7688, "step": 6486 }, { "epoch": 0.4512852620960729, "grad_norm": 0.8515625, "learning_rate": 0.0012049425645336528, "loss": 0.7455, "step": 6487 }, { "epoch": 0.4513548297332081, "grad_norm": 0.94921875, "learning_rate": 0.0012047220092528282, "loss": 0.7169, "step": 6488 }, { "epoch": 0.4514243973703433, "grad_norm": 1.046875, "learning_rate": 0.0012045014435772513, "loss": 0.9323, "step": 6489 }, { "epoch": 0.4514939650074785, "grad_norm": 1.046875, "learning_rate": 0.001204280867518122, "loss": 0.9188, "step": 6490 }, { "epoch": 0.45156353264461374, "grad_norm": 1.421875, "learning_rate": 0.0012040602810866401, "loss": 0.9239, "step": 6491 }, { "epoch": 0.4516331002817489, "grad_norm": 0.99609375, "learning_rate": 0.0012038396842940055, "loss": 0.9611, "step": 6492 }, { "epoch": 0.45170266791888414, "grad_norm": 1.3828125, "learning_rate": 0.0012036190771514195, "loss": 0.9009, "step": 6493 }, { "epoch": 0.45177223555601936, "grad_norm": 1.09375, "learning_rate": 0.0012033984596700827, "loss": 0.6225, "step": 6494 }, { "epoch": 0.45184180319315453, "grad_norm": 0.98046875, "learning_rate": 0.0012031778318611977, "loss": 0.8611, "step": 6495 }, { "epoch": 0.45191137083028976, "grad_norm": 0.77734375, "learning_rate": 0.001202957193735966, "loss": 0.7015, "step": 6496 }, { "epoch": 0.45198093846742493, "grad_norm": 0.9765625, "learning_rate": 0.001202736545305591, "loss": 0.8262, "step": 6497 }, { "epoch": 0.45205050610456016, "grad_norm": 1.2265625, "learning_rate": 0.0012025158865812764, "loss": 0.8527, "step": 6498 }, { "epoch": 0.4521200737416954, "grad_norm": 1.1015625, "learning_rate": 0.001202295217574226, "loss": 0.7397, "step": 6499 }, { "epoch": 0.45218964137883055, "grad_norm": 1.1640625, "learning_rate": 0.0012020745382956438, "loss": 0.9292, "step": 6500 }, { "epoch": 0.4522592090159658, "grad_norm": 1.421875, "learning_rate": 0.001201853848756735, "loss": 0.7857, "step": 6501 }, { "epoch": 0.45232877665310095, "grad_norm": 1.09375, "learning_rate": 0.0012016331489687056, "loss": 0.8355, "step": 6502 }, { "epoch": 0.4523983442902362, "grad_norm": 1.1640625, "learning_rate": 0.0012014124389427606, "loss": 0.9161, "step": 6503 }, { "epoch": 0.4524679119273714, "grad_norm": 1.046875, "learning_rate": 0.0012011917186901075, "loss": 0.9408, "step": 6504 }, { "epoch": 0.4525374795645066, "grad_norm": 1.1015625, "learning_rate": 0.0012009709882219528, "loss": 0.9124, "step": 6505 }, { "epoch": 0.4526070472016418, "grad_norm": 1.0390625, "learning_rate": 0.0012007502475495048, "loss": 0.8368, "step": 6506 }, { "epoch": 0.452676614838777, "grad_norm": 1.578125, "learning_rate": 0.0012005294966839703, "loss": 0.9683, "step": 6507 }, { "epoch": 0.4527461824759122, "grad_norm": 1.4296875, "learning_rate": 0.0012003087356365595, "loss": 0.7981, "step": 6508 }, { "epoch": 0.4528157501130474, "grad_norm": 1.0703125, "learning_rate": 0.0012000879644184803, "loss": 0.6341, "step": 6509 }, { "epoch": 0.4528853177501826, "grad_norm": 1.1875, "learning_rate": 0.0011998671830409427, "loss": 0.6034, "step": 6510 }, { "epoch": 0.4529548853873178, "grad_norm": 1.2578125, "learning_rate": 0.0011996463915151573, "loss": 0.6165, "step": 6511 }, { "epoch": 0.45302445302445304, "grad_norm": 1.09375, "learning_rate": 0.0011994255898523341, "loss": 0.9816, "step": 6512 }, { "epoch": 0.4530940206615882, "grad_norm": 1.234375, "learning_rate": 0.0011992047780636848, "loss": 0.7594, "step": 6513 }, { "epoch": 0.45316358829872344, "grad_norm": 1.0078125, "learning_rate": 0.0011989839561604208, "loss": 0.7464, "step": 6514 }, { "epoch": 0.4532331559358586, "grad_norm": 0.8671875, "learning_rate": 0.0011987631241537546, "loss": 0.883, "step": 6515 }, { "epoch": 0.45330272357299384, "grad_norm": 1.265625, "learning_rate": 0.0011985422820548989, "loss": 0.776, "step": 6516 }, { "epoch": 0.45337229121012906, "grad_norm": 1.4609375, "learning_rate": 0.0011983214298750663, "loss": 0.9282, "step": 6517 }, { "epoch": 0.45344185884726423, "grad_norm": 0.92578125, "learning_rate": 0.0011981005676254717, "loss": 0.7438, "step": 6518 }, { "epoch": 0.45351142648439946, "grad_norm": 1.1328125, "learning_rate": 0.0011978796953173285, "loss": 0.9153, "step": 6519 }, { "epoch": 0.4535809941215347, "grad_norm": 0.95703125, "learning_rate": 0.001197658812961852, "loss": 0.7996, "step": 6520 }, { "epoch": 0.45365056175866986, "grad_norm": 0.8984375, "learning_rate": 0.001197437920570257, "loss": 0.7393, "step": 6521 }, { "epoch": 0.4537201293958051, "grad_norm": 1.3046875, "learning_rate": 0.0011972170181537595, "loss": 0.8687, "step": 6522 }, { "epoch": 0.45378969703294025, "grad_norm": 1.4296875, "learning_rate": 0.001196996105723576, "loss": 0.7682, "step": 6523 }, { "epoch": 0.4538592646700755, "grad_norm": 1.0703125, "learning_rate": 0.0011967751832909232, "loss": 0.7974, "step": 6524 }, { "epoch": 0.4539288323072107, "grad_norm": 1.3125, "learning_rate": 0.0011965542508670188, "loss": 1.0709, "step": 6525 }, { "epoch": 0.4539983999443459, "grad_norm": 0.921875, "learning_rate": 0.0011963333084630797, "loss": 0.8347, "step": 6526 }, { "epoch": 0.4540679675814811, "grad_norm": 1.109375, "learning_rate": 0.0011961123560903248, "loss": 0.9493, "step": 6527 }, { "epoch": 0.4541375352186163, "grad_norm": 1.1484375, "learning_rate": 0.0011958913937599731, "loss": 0.9302, "step": 6528 }, { "epoch": 0.4542071028557515, "grad_norm": 1.0703125, "learning_rate": 0.001195670421483244, "loss": 0.8061, "step": 6529 }, { "epoch": 0.4542766704928867, "grad_norm": 0.9453125, "learning_rate": 0.0011954494392713566, "loss": 0.8252, "step": 6530 }, { "epoch": 0.4543462381300219, "grad_norm": 1.09375, "learning_rate": 0.0011952284471355324, "loss": 0.9535, "step": 6531 }, { "epoch": 0.4544158057671571, "grad_norm": 1.1796875, "learning_rate": 0.0011950074450869912, "loss": 0.7242, "step": 6532 }, { "epoch": 0.45448537340429235, "grad_norm": 1.1484375, "learning_rate": 0.001194786433136955, "loss": 1.0569, "step": 6533 }, { "epoch": 0.4545549410414275, "grad_norm": 1.078125, "learning_rate": 0.0011945654112966457, "loss": 0.9092, "step": 6534 }, { "epoch": 0.45462450867856274, "grad_norm": 0.98828125, "learning_rate": 0.0011943443795772854, "loss": 0.7104, "step": 6535 }, { "epoch": 0.4546940763156979, "grad_norm": 1.0625, "learning_rate": 0.0011941233379900971, "loss": 0.8083, "step": 6536 }, { "epoch": 0.45476364395283314, "grad_norm": 1.015625, "learning_rate": 0.001193902286546304, "loss": 0.7365, "step": 6537 }, { "epoch": 0.45483321158996837, "grad_norm": 0.921875, "learning_rate": 0.0011936812252571303, "loss": 0.8173, "step": 6538 }, { "epoch": 0.45490277922710354, "grad_norm": 1.0703125, "learning_rate": 0.0011934601541338003, "loss": 0.8049, "step": 6539 }, { "epoch": 0.45497234686423876, "grad_norm": 1.4375, "learning_rate": 0.0011932390731875385, "loss": 0.983, "step": 6540 }, { "epoch": 0.45504191450137393, "grad_norm": 1.2265625, "learning_rate": 0.0011930179824295706, "loss": 0.9033, "step": 6541 }, { "epoch": 0.45511148213850916, "grad_norm": 0.93359375, "learning_rate": 0.0011927968818711227, "loss": 0.9472, "step": 6542 }, { "epoch": 0.4551810497756444, "grad_norm": 0.98828125, "learning_rate": 0.0011925757715234204, "loss": 0.5104, "step": 6543 }, { "epoch": 0.45525061741277956, "grad_norm": 1.0, "learning_rate": 0.0011923546513976915, "loss": 1.0062, "step": 6544 }, { "epoch": 0.4553201850499148, "grad_norm": 1.109375, "learning_rate": 0.001192133521505163, "loss": 0.8641, "step": 6545 }, { "epoch": 0.45538975268705, "grad_norm": 1.4140625, "learning_rate": 0.0011919123818570625, "loss": 0.911, "step": 6546 }, { "epoch": 0.4554593203241852, "grad_norm": 1.234375, "learning_rate": 0.0011916912324646184, "loss": 0.7561, "step": 6547 }, { "epoch": 0.4555288879613204, "grad_norm": 1.3046875, "learning_rate": 0.00119147007333906, "loss": 0.9323, "step": 6548 }, { "epoch": 0.4555984555984556, "grad_norm": 1.1328125, "learning_rate": 0.0011912489044916164, "loss": 0.6791, "step": 6549 }, { "epoch": 0.4556680232355908, "grad_norm": 1.0859375, "learning_rate": 0.0011910277259335172, "loss": 1.0677, "step": 6550 }, { "epoch": 0.45573759087272603, "grad_norm": 0.99609375, "learning_rate": 0.001190806537675993, "loss": 0.9121, "step": 6551 }, { "epoch": 0.4558071585098612, "grad_norm": 1.0859375, "learning_rate": 0.0011905853397302746, "loss": 0.9033, "step": 6552 }, { "epoch": 0.4558767261469964, "grad_norm": 1.375, "learning_rate": 0.001190364132107593, "loss": 0.9323, "step": 6553 }, { "epoch": 0.4559462937841316, "grad_norm": 1.0625, "learning_rate": 0.0011901429148191806, "loss": 0.6688, "step": 6554 }, { "epoch": 0.4560158614212668, "grad_norm": 1.1015625, "learning_rate": 0.0011899216878762692, "loss": 0.7977, "step": 6555 }, { "epoch": 0.45608542905840205, "grad_norm": 0.94921875, "learning_rate": 0.001189700451290092, "loss": 0.8965, "step": 6556 }, { "epoch": 0.4561549966955372, "grad_norm": 1.0078125, "learning_rate": 0.0011894792050718818, "loss": 0.7538, "step": 6557 }, { "epoch": 0.45622456433267244, "grad_norm": 1.453125, "learning_rate": 0.0011892579492328728, "loss": 0.8227, "step": 6558 }, { "epoch": 0.45629413196980767, "grad_norm": 1.265625, "learning_rate": 0.001189036683784299, "loss": 1.0174, "step": 6559 }, { "epoch": 0.45636369960694284, "grad_norm": 1.078125, "learning_rate": 0.001188815408737395, "loss": 0.7564, "step": 6560 }, { "epoch": 0.45643326724407807, "grad_norm": 1.109375, "learning_rate": 0.0011885941241033967, "loss": 0.72, "step": 6561 }, { "epoch": 0.45650283488121324, "grad_norm": 1.109375, "learning_rate": 0.001188372829893539, "loss": 0.7294, "step": 6562 }, { "epoch": 0.45657240251834846, "grad_norm": 1.09375, "learning_rate": 0.0011881515261190586, "loss": 0.8049, "step": 6563 }, { "epoch": 0.4566419701554837, "grad_norm": 1.421875, "learning_rate": 0.001187930212791192, "loss": 0.8503, "step": 6564 }, { "epoch": 0.45671153779261886, "grad_norm": 1.1953125, "learning_rate": 0.0011877088899211762, "loss": 0.7304, "step": 6565 }, { "epoch": 0.4567811054297541, "grad_norm": 1.0625, "learning_rate": 0.0011874875575202495, "loss": 0.6712, "step": 6566 }, { "epoch": 0.45685067306688926, "grad_norm": 1.2734375, "learning_rate": 0.0011872662155996494, "loss": 0.7354, "step": 6567 }, { "epoch": 0.4569202407040245, "grad_norm": 1.0703125, "learning_rate": 0.0011870448641706148, "loss": 0.8842, "step": 6568 }, { "epoch": 0.4569898083411597, "grad_norm": 1.1484375, "learning_rate": 0.0011868235032443848, "loss": 0.8922, "step": 6569 }, { "epoch": 0.4570593759782949, "grad_norm": 1.09375, "learning_rate": 0.001186602132832199, "loss": 0.9433, "step": 6570 }, { "epoch": 0.4571289436154301, "grad_norm": 0.9765625, "learning_rate": 0.0011863807529452974, "loss": 1.005, "step": 6571 }, { "epoch": 0.45719851125256533, "grad_norm": 0.9375, "learning_rate": 0.0011861593635949207, "loss": 0.7681, "step": 6572 }, { "epoch": 0.4572680788897005, "grad_norm": 1.125, "learning_rate": 0.0011859379647923096, "loss": 0.8305, "step": 6573 }, { "epoch": 0.45733764652683573, "grad_norm": 1.2890625, "learning_rate": 0.001185716556548706, "loss": 1.1047, "step": 6574 }, { "epoch": 0.4574072141639709, "grad_norm": 0.81640625, "learning_rate": 0.001185495138875352, "loss": 0.7204, "step": 6575 }, { "epoch": 0.4574767818011061, "grad_norm": 1.3359375, "learning_rate": 0.0011852737117834893, "loss": 0.8898, "step": 6576 }, { "epoch": 0.45754634943824135, "grad_norm": 1.1640625, "learning_rate": 0.0011850522752843615, "loss": 0.945, "step": 6577 }, { "epoch": 0.4576159170753765, "grad_norm": 1.15625, "learning_rate": 0.001184830829389212, "loss": 0.9251, "step": 6578 }, { "epoch": 0.45768548471251175, "grad_norm": 1.0546875, "learning_rate": 0.0011846093741092847, "loss": 0.874, "step": 6579 }, { "epoch": 0.4577550523496469, "grad_norm": 1.1171875, "learning_rate": 0.0011843879094558239, "loss": 0.9574, "step": 6580 }, { "epoch": 0.45782461998678214, "grad_norm": 1.15625, "learning_rate": 0.0011841664354400741, "loss": 0.9693, "step": 6581 }, { "epoch": 0.45789418762391737, "grad_norm": 1.15625, "learning_rate": 0.0011839449520732812, "loss": 0.9989, "step": 6582 }, { "epoch": 0.45796375526105254, "grad_norm": 1.1171875, "learning_rate": 0.0011837234593666908, "loss": 0.7398, "step": 6583 }, { "epoch": 0.45803332289818777, "grad_norm": 1.0234375, "learning_rate": 0.0011835019573315493, "loss": 0.7772, "step": 6584 }, { "epoch": 0.458102890535323, "grad_norm": 1.3671875, "learning_rate": 0.0011832804459791031, "loss": 0.7726, "step": 6585 }, { "epoch": 0.45817245817245816, "grad_norm": 1.1875, "learning_rate": 0.0011830589253205997, "loss": 0.9433, "step": 6586 }, { "epoch": 0.4582420258095934, "grad_norm": 1.1953125, "learning_rate": 0.0011828373953672868, "loss": 0.765, "step": 6587 }, { "epoch": 0.45831159344672856, "grad_norm": 1.1484375, "learning_rate": 0.0011826158561304126, "loss": 0.7674, "step": 6588 }, { "epoch": 0.4583811610838638, "grad_norm": 1.3515625, "learning_rate": 0.0011823943076212256, "loss": 1.1343, "step": 6589 }, { "epoch": 0.458450728720999, "grad_norm": 1.2734375, "learning_rate": 0.001182172749850975, "loss": 1.2282, "step": 6590 }, { "epoch": 0.4585202963581342, "grad_norm": 1.1875, "learning_rate": 0.0011819511828309102, "loss": 0.9132, "step": 6591 }, { "epoch": 0.4585898639952694, "grad_norm": 1.171875, "learning_rate": 0.0011817296065722816, "loss": 0.9758, "step": 6592 }, { "epoch": 0.4586594316324046, "grad_norm": 0.921875, "learning_rate": 0.0011815080210863397, "loss": 0.8898, "step": 6593 }, { "epoch": 0.4587289992695398, "grad_norm": 0.88671875, "learning_rate": 0.0011812864263843353, "loss": 0.6673, "step": 6594 }, { "epoch": 0.45879856690667503, "grad_norm": 0.91796875, "learning_rate": 0.0011810648224775198, "loss": 0.727, "step": 6595 }, { "epoch": 0.4588681345438102, "grad_norm": 0.8203125, "learning_rate": 0.0011808432093771454, "loss": 0.7471, "step": 6596 }, { "epoch": 0.45893770218094543, "grad_norm": 1.296875, "learning_rate": 0.0011806215870944642, "loss": 0.7002, "step": 6597 }, { "epoch": 0.45900726981808065, "grad_norm": 1.078125, "learning_rate": 0.0011803999556407293, "loss": 0.8018, "step": 6598 }, { "epoch": 0.4590768374552158, "grad_norm": 0.87109375, "learning_rate": 0.0011801783150271934, "loss": 0.6078, "step": 6599 }, { "epoch": 0.45914640509235105, "grad_norm": 1.234375, "learning_rate": 0.0011799566652651117, "loss": 0.8592, "step": 6600 }, { "epoch": 0.4592159727294862, "grad_norm": 1.0703125, "learning_rate": 0.001179735006365737, "loss": 0.8635, "step": 6601 }, { "epoch": 0.45928554036662145, "grad_norm": 1.359375, "learning_rate": 0.001179513338340325, "loss": 0.7542, "step": 6602 }, { "epoch": 0.4593551080037567, "grad_norm": 0.9375, "learning_rate": 0.0011792916612001303, "loss": 0.7602, "step": 6603 }, { "epoch": 0.45942467564089184, "grad_norm": 1.0859375, "learning_rate": 0.0011790699749564086, "loss": 0.7673, "step": 6604 }, { "epoch": 0.45949424327802707, "grad_norm": 0.84765625, "learning_rate": 0.0011788482796204164, "loss": 0.6943, "step": 6605 }, { "epoch": 0.45956381091516224, "grad_norm": 1.2109375, "learning_rate": 0.0011786265752034098, "loss": 0.7012, "step": 6606 }, { "epoch": 0.45963337855229747, "grad_norm": 1.3515625, "learning_rate": 0.0011784048617166463, "loss": 0.9853, "step": 6607 }, { "epoch": 0.4597029461894327, "grad_norm": 1.1953125, "learning_rate": 0.001178183139171383, "loss": 0.7788, "step": 6608 }, { "epoch": 0.45977251382656786, "grad_norm": 1.3828125, "learning_rate": 0.0011779614075788781, "loss": 0.5682, "step": 6609 }, { "epoch": 0.4598420814637031, "grad_norm": 1.0390625, "learning_rate": 0.0011777396669503898, "loss": 0.6482, "step": 6610 }, { "epoch": 0.4599116491008383, "grad_norm": 1.046875, "learning_rate": 0.0011775179172971771, "loss": 0.7693, "step": 6611 }, { "epoch": 0.4599812167379735, "grad_norm": 1.2734375, "learning_rate": 0.0011772961586304993, "loss": 0.7675, "step": 6612 }, { "epoch": 0.4600507843751087, "grad_norm": 0.984375, "learning_rate": 0.0011770743909616161, "loss": 0.7296, "step": 6613 }, { "epoch": 0.4601203520122439, "grad_norm": 1.1796875, "learning_rate": 0.0011768526143017882, "loss": 0.9234, "step": 6614 }, { "epoch": 0.4601899196493791, "grad_norm": 0.97265625, "learning_rate": 0.0011766308286622756, "loss": 0.8889, "step": 6615 }, { "epoch": 0.46025948728651433, "grad_norm": 0.85546875, "learning_rate": 0.00117640903405434, "loss": 0.756, "step": 6616 }, { "epoch": 0.4603290549236495, "grad_norm": 0.9921875, "learning_rate": 0.0011761872304892427, "loss": 0.6366, "step": 6617 }, { "epoch": 0.46039862256078473, "grad_norm": 1.34375, "learning_rate": 0.001175965417978246, "loss": 1.0019, "step": 6618 }, { "epoch": 0.4604681901979199, "grad_norm": 1.03125, "learning_rate": 0.0011757435965326123, "loss": 0.7696, "step": 6619 }, { "epoch": 0.46053775783505513, "grad_norm": 1.0078125, "learning_rate": 0.0011755217661636047, "loss": 0.7882, "step": 6620 }, { "epoch": 0.46060732547219035, "grad_norm": 1.1171875, "learning_rate": 0.0011752999268824862, "loss": 0.8832, "step": 6621 }, { "epoch": 0.4606768931093255, "grad_norm": 1.2734375, "learning_rate": 0.001175078078700521, "loss": 1.199, "step": 6622 }, { "epoch": 0.46074646074646075, "grad_norm": 1.046875, "learning_rate": 0.0011748562216289738, "loss": 0.9617, "step": 6623 }, { "epoch": 0.460816028383596, "grad_norm": 1.2421875, "learning_rate": 0.0011746343556791085, "loss": 0.8008, "step": 6624 }, { "epoch": 0.46088559602073115, "grad_norm": 1.2578125, "learning_rate": 0.001174412480862191, "loss": 0.6703, "step": 6625 }, { "epoch": 0.4609551636578664, "grad_norm": 1.265625, "learning_rate": 0.0011741905971894872, "loss": 0.9244, "step": 6626 }, { "epoch": 0.46102473129500154, "grad_norm": 1.0390625, "learning_rate": 0.0011739687046722627, "loss": 0.6949, "step": 6627 }, { "epoch": 0.46109429893213677, "grad_norm": 1.2734375, "learning_rate": 0.001173746803321784, "loss": 0.998, "step": 6628 }, { "epoch": 0.461163866569272, "grad_norm": 1.2734375, "learning_rate": 0.0011735248931493184, "loss": 0.9241, "step": 6629 }, { "epoch": 0.46123343420640717, "grad_norm": 1.3203125, "learning_rate": 0.0011733029741661336, "loss": 1.089, "step": 6630 }, { "epoch": 0.4613030018435424, "grad_norm": 1.515625, "learning_rate": 0.0011730810463834972, "loss": 0.7909, "step": 6631 }, { "epoch": 0.46137256948067756, "grad_norm": 0.96875, "learning_rate": 0.0011728591098126775, "loss": 0.6703, "step": 6632 }, { "epoch": 0.4614421371178128, "grad_norm": 1.125, "learning_rate": 0.0011726371644649436, "loss": 0.8211, "step": 6633 }, { "epoch": 0.461511704754948, "grad_norm": 1.046875, "learning_rate": 0.0011724152103515647, "loss": 0.9068, "step": 6634 }, { "epoch": 0.4615812723920832, "grad_norm": 1.1640625, "learning_rate": 0.0011721932474838103, "loss": 0.813, "step": 6635 }, { "epoch": 0.4616508400292184, "grad_norm": 1.234375, "learning_rate": 0.0011719712758729505, "loss": 0.7287, "step": 6636 }, { "epoch": 0.46172040766635364, "grad_norm": 1.234375, "learning_rate": 0.0011717492955302569, "loss": 0.9651, "step": 6637 }, { "epoch": 0.4617899753034888, "grad_norm": 0.96484375, "learning_rate": 0.0011715273064669988, "loss": 0.7242, "step": 6638 }, { "epoch": 0.46185954294062403, "grad_norm": 1.1328125, "learning_rate": 0.0011713053086944494, "loss": 1.2099, "step": 6639 }, { "epoch": 0.4619291105777592, "grad_norm": 1.109375, "learning_rate": 0.0011710833022238797, "loss": 0.6752, "step": 6640 }, { "epoch": 0.46199867821489443, "grad_norm": 1.1328125, "learning_rate": 0.001170861287066562, "loss": 0.9003, "step": 6641 }, { "epoch": 0.46206824585202966, "grad_norm": 1.125, "learning_rate": 0.0011706392632337694, "loss": 0.8056, "step": 6642 }, { "epoch": 0.46213781348916483, "grad_norm": 0.9609375, "learning_rate": 0.0011704172307367754, "loss": 0.697, "step": 6643 }, { "epoch": 0.46220738112630005, "grad_norm": 1.1015625, "learning_rate": 0.001170195189586853, "loss": 0.7523, "step": 6644 }, { "epoch": 0.4622769487634352, "grad_norm": 1.1015625, "learning_rate": 0.0011699731397952766, "loss": 1.0126, "step": 6645 }, { "epoch": 0.46234651640057045, "grad_norm": 1.28125, "learning_rate": 0.0011697510813733214, "loss": 0.869, "step": 6646 }, { "epoch": 0.4624160840377057, "grad_norm": 1.171875, "learning_rate": 0.0011695290143322616, "loss": 0.8879, "step": 6647 }, { "epoch": 0.46248565167484085, "grad_norm": 1.234375, "learning_rate": 0.001169306938683373, "loss": 0.8482, "step": 6648 }, { "epoch": 0.4625552193119761, "grad_norm": 1.1796875, "learning_rate": 0.0011690848544379316, "loss": 0.6539, "step": 6649 }, { "epoch": 0.4626247869491113, "grad_norm": 0.85546875, "learning_rate": 0.0011688627616072132, "loss": 0.732, "step": 6650 }, { "epoch": 0.46269435458624647, "grad_norm": 1.4765625, "learning_rate": 0.001168640660202495, "loss": 1.0946, "step": 6651 }, { "epoch": 0.4627639222233817, "grad_norm": 1.0078125, "learning_rate": 0.001168418550235054, "loss": 0.8689, "step": 6652 }, { "epoch": 0.46283348986051687, "grad_norm": 1.15625, "learning_rate": 0.0011681964317161685, "loss": 0.9217, "step": 6653 }, { "epoch": 0.4629030574976521, "grad_norm": 1.78125, "learning_rate": 0.001167974304657115, "loss": 0.93, "step": 6654 }, { "epoch": 0.4629726251347873, "grad_norm": 1.140625, "learning_rate": 0.001167752169069174, "loss": 0.8417, "step": 6655 }, { "epoch": 0.4630421927719225, "grad_norm": 1.15625, "learning_rate": 0.0011675300249636227, "loss": 0.8598, "step": 6656 }, { "epoch": 0.4631117604090577, "grad_norm": 1.171875, "learning_rate": 0.0011673078723517414, "loss": 0.9799, "step": 6657 }, { "epoch": 0.4631813280461929, "grad_norm": 1.1953125, "learning_rate": 0.0011670857112448094, "loss": 0.8178, "step": 6658 }, { "epoch": 0.4632508956833281, "grad_norm": 1.0546875, "learning_rate": 0.0011668635416541072, "loss": 0.9696, "step": 6659 }, { "epoch": 0.46332046332046334, "grad_norm": 1.109375, "learning_rate": 0.0011666413635909156, "loss": 0.8456, "step": 6660 }, { "epoch": 0.4633900309575985, "grad_norm": 0.89453125, "learning_rate": 0.0011664191770665154, "loss": 0.6578, "step": 6661 }, { "epoch": 0.46345959859473373, "grad_norm": 1.1171875, "learning_rate": 0.0011661969820921884, "loss": 0.742, "step": 6662 }, { "epoch": 0.46352916623186896, "grad_norm": 1.0859375, "learning_rate": 0.0011659747786792161, "loss": 0.8278, "step": 6663 }, { "epoch": 0.46359873386900413, "grad_norm": 0.91796875, "learning_rate": 0.0011657525668388813, "loss": 0.7629, "step": 6664 }, { "epoch": 0.46366830150613936, "grad_norm": 1.0546875, "learning_rate": 0.0011655303465824664, "loss": 0.637, "step": 6665 }, { "epoch": 0.46373786914327453, "grad_norm": 1.28125, "learning_rate": 0.0011653081179212549, "loss": 0.7785, "step": 6666 }, { "epoch": 0.46380743678040975, "grad_norm": 1.203125, "learning_rate": 0.0011650858808665303, "loss": 0.9898, "step": 6667 }, { "epoch": 0.463877004417545, "grad_norm": 1.0, "learning_rate": 0.0011648636354295767, "loss": 0.7137, "step": 6668 }, { "epoch": 0.46394657205468015, "grad_norm": 0.890625, "learning_rate": 0.0011646413816216792, "loss": 0.5219, "step": 6669 }, { "epoch": 0.4640161396918154, "grad_norm": 0.8359375, "learning_rate": 0.0011644191194541216, "loss": 0.6708, "step": 6670 }, { "epoch": 0.46408570732895055, "grad_norm": 1.0390625, "learning_rate": 0.0011641968489381903, "loss": 0.7185, "step": 6671 }, { "epoch": 0.4641552749660858, "grad_norm": 1.265625, "learning_rate": 0.0011639745700851702, "loss": 0.755, "step": 6672 }, { "epoch": 0.464224842603221, "grad_norm": 1.5703125, "learning_rate": 0.001163752282906348, "loss": 0.7932, "step": 6673 }, { "epoch": 0.46429441024035617, "grad_norm": 1.390625, "learning_rate": 0.0011635299874130107, "loss": 0.9895, "step": 6674 }, { "epoch": 0.4643639778774914, "grad_norm": 1.2890625, "learning_rate": 0.0011633076836164444, "loss": 1.0151, "step": 6675 }, { "epoch": 0.4644335455146266, "grad_norm": 1.25, "learning_rate": 0.0011630853715279374, "loss": 1.0777, "step": 6676 }, { "epoch": 0.4645031131517618, "grad_norm": 1.2890625, "learning_rate": 0.0011628630511587767, "loss": 0.8239, "step": 6677 }, { "epoch": 0.464572680788897, "grad_norm": 1.3046875, "learning_rate": 0.001162640722520252, "loss": 0.9758, "step": 6678 }, { "epoch": 0.4646422484260322, "grad_norm": 1.1796875, "learning_rate": 0.0011624183856236505, "loss": 1.0418, "step": 6679 }, { "epoch": 0.4647118160631674, "grad_norm": 0.984375, "learning_rate": 0.0011621960404802623, "loss": 0.8334, "step": 6680 }, { "epoch": 0.46478138370030264, "grad_norm": 0.9375, "learning_rate": 0.0011619736871013766, "loss": 0.764, "step": 6681 }, { "epoch": 0.4648509513374378, "grad_norm": 1.4453125, "learning_rate": 0.0011617513254982834, "loss": 1.0208, "step": 6682 }, { "epoch": 0.46492051897457304, "grad_norm": 1.0703125, "learning_rate": 0.0011615289556822735, "loss": 0.8085, "step": 6683 }, { "epoch": 0.4649900866117082, "grad_norm": 1.1484375, "learning_rate": 0.001161306577664637, "loss": 0.8879, "step": 6684 }, { "epoch": 0.46505965424884343, "grad_norm": 1.0859375, "learning_rate": 0.0011610841914566658, "loss": 0.9558, "step": 6685 }, { "epoch": 0.46512922188597866, "grad_norm": 0.89453125, "learning_rate": 0.0011608617970696512, "loss": 0.6572, "step": 6686 }, { "epoch": 0.46519878952311383, "grad_norm": 1.2734375, "learning_rate": 0.0011606393945148854, "loss": 1.0288, "step": 6687 }, { "epoch": 0.46526835716024906, "grad_norm": 1.21875, "learning_rate": 0.0011604169838036608, "loss": 0.9335, "step": 6688 }, { "epoch": 0.4653379247973843, "grad_norm": 1.109375, "learning_rate": 0.00116019456494727, "loss": 0.8959, "step": 6689 }, { "epoch": 0.46540749243451945, "grad_norm": 0.83984375, "learning_rate": 0.0011599721379570071, "loss": 0.6395, "step": 6690 }, { "epoch": 0.4654770600716547, "grad_norm": 1.109375, "learning_rate": 0.001159749702844165, "loss": 1.0481, "step": 6691 }, { "epoch": 0.46554662770878985, "grad_norm": 1.0859375, "learning_rate": 0.0011595272596200386, "loss": 0.792, "step": 6692 }, { "epoch": 0.4656161953459251, "grad_norm": 1.0625, "learning_rate": 0.0011593048082959216, "loss": 0.714, "step": 6693 }, { "epoch": 0.4656857629830603, "grad_norm": 1.1796875, "learning_rate": 0.00115908234888311, "loss": 0.845, "step": 6694 }, { "epoch": 0.4657553306201955, "grad_norm": 1.125, "learning_rate": 0.0011588598813928978, "loss": 0.6841, "step": 6695 }, { "epoch": 0.4658248982573307, "grad_norm": 1.0859375, "learning_rate": 0.001158637405836582, "loss": 0.7866, "step": 6696 }, { "epoch": 0.46589446589446587, "grad_norm": 1.0078125, "learning_rate": 0.0011584149222254583, "loss": 0.7629, "step": 6697 }, { "epoch": 0.4659640335316011, "grad_norm": 0.86328125, "learning_rate": 0.0011581924305708229, "loss": 0.6811, "step": 6698 }, { "epoch": 0.4660336011687363, "grad_norm": 1.125, "learning_rate": 0.0011579699308839739, "loss": 0.9732, "step": 6699 }, { "epoch": 0.4661031688058715, "grad_norm": 1.2421875, "learning_rate": 0.0011577474231762076, "loss": 0.7477, "step": 6700 }, { "epoch": 0.4661727364430067, "grad_norm": 0.93359375, "learning_rate": 0.0011575249074588223, "loss": 0.8375, "step": 6701 }, { "epoch": 0.46624230408014194, "grad_norm": 1.3203125, "learning_rate": 0.0011573023837431163, "loss": 0.6758, "step": 6702 }, { "epoch": 0.4663118717172771, "grad_norm": 1.09375, "learning_rate": 0.0011570798520403878, "loss": 0.6657, "step": 6703 }, { "epoch": 0.46638143935441234, "grad_norm": 1.046875, "learning_rate": 0.0011568573123619367, "loss": 0.7631, "step": 6704 }, { "epoch": 0.4664510069915475, "grad_norm": 0.90625, "learning_rate": 0.0011566347647190614, "loss": 0.5423, "step": 6705 }, { "epoch": 0.46652057462868274, "grad_norm": 0.97265625, "learning_rate": 0.0011564122091230627, "loss": 0.7526, "step": 6706 }, { "epoch": 0.46659014226581796, "grad_norm": 1.1875, "learning_rate": 0.00115618964558524, "loss": 1.0364, "step": 6707 }, { "epoch": 0.46665970990295313, "grad_norm": 1.3203125, "learning_rate": 0.0011559670741168946, "loss": 0.9639, "step": 6708 }, { "epoch": 0.46672927754008836, "grad_norm": 1.203125, "learning_rate": 0.001155744494729327, "loss": 0.8695, "step": 6709 }, { "epoch": 0.46679884517722353, "grad_norm": 1.328125, "learning_rate": 0.0011555219074338393, "loss": 0.9251, "step": 6710 }, { "epoch": 0.46686841281435876, "grad_norm": 1.0234375, "learning_rate": 0.001155299312241733, "loss": 0.8074, "step": 6711 }, { "epoch": 0.466937980451494, "grad_norm": 1.1875, "learning_rate": 0.00115507670916431, "loss": 0.8851, "step": 6712 }, { "epoch": 0.46700754808862915, "grad_norm": 0.96875, "learning_rate": 0.001154854098212874, "loss": 0.7742, "step": 6713 }, { "epoch": 0.4670771157257644, "grad_norm": 1.265625, "learning_rate": 0.0011546314793987268, "loss": 0.7902, "step": 6714 }, { "epoch": 0.4671466833628996, "grad_norm": 1.1796875, "learning_rate": 0.001154408852733173, "loss": 1.073, "step": 6715 }, { "epoch": 0.4672162510000348, "grad_norm": 0.9296875, "learning_rate": 0.0011541862182275155, "loss": 0.844, "step": 6716 }, { "epoch": 0.46728581863717, "grad_norm": 1.015625, "learning_rate": 0.0011539635758930592, "loss": 0.6413, "step": 6717 }, { "epoch": 0.4673553862743052, "grad_norm": 1.1875, "learning_rate": 0.0011537409257411084, "loss": 0.8827, "step": 6718 }, { "epoch": 0.4674249539114404, "grad_norm": 1.234375, "learning_rate": 0.0011535182677829684, "loss": 0.7699, "step": 6719 }, { "epoch": 0.4674945215485756, "grad_norm": 1.0078125, "learning_rate": 0.0011532956020299447, "loss": 0.8396, "step": 6720 }, { "epoch": 0.4675640891857108, "grad_norm": 1.0390625, "learning_rate": 0.0011530729284933428, "loss": 0.8124, "step": 6721 }, { "epoch": 0.467633656822846, "grad_norm": 1.2265625, "learning_rate": 0.0011528502471844693, "loss": 0.6586, "step": 6722 }, { "epoch": 0.4677032244599812, "grad_norm": 1.2421875, "learning_rate": 0.0011526275581146303, "loss": 1.1657, "step": 6723 }, { "epoch": 0.4677727920971164, "grad_norm": 0.96484375, "learning_rate": 0.0011524048612951336, "loss": 0.7377, "step": 6724 }, { "epoch": 0.46784235973425164, "grad_norm": 1.0859375, "learning_rate": 0.0011521821567372862, "loss": 0.7954, "step": 6725 }, { "epoch": 0.4679119273713868, "grad_norm": 1.2265625, "learning_rate": 0.0011519594444523956, "loss": 0.8808, "step": 6726 }, { "epoch": 0.46798149500852204, "grad_norm": 1.09375, "learning_rate": 0.001151736724451771, "loss": 0.5918, "step": 6727 }, { "epoch": 0.46805106264565727, "grad_norm": 1.265625, "learning_rate": 0.0011515139967467195, "loss": 0.9816, "step": 6728 }, { "epoch": 0.46812063028279244, "grad_norm": 1.046875, "learning_rate": 0.0011512912613485516, "loss": 0.96, "step": 6729 }, { "epoch": 0.46819019791992766, "grad_norm": 1.265625, "learning_rate": 0.0011510685182685755, "loss": 1.0691, "step": 6730 }, { "epoch": 0.46825976555706283, "grad_norm": 0.89453125, "learning_rate": 0.001150845767518102, "loss": 0.7501, "step": 6731 }, { "epoch": 0.46832933319419806, "grad_norm": 0.83203125, "learning_rate": 0.0011506230091084403, "loss": 0.6844, "step": 6732 }, { "epoch": 0.4683989008313333, "grad_norm": 0.89453125, "learning_rate": 0.0011504002430509014, "loss": 0.7496, "step": 6733 }, { "epoch": 0.46846846846846846, "grad_norm": 1.25, "learning_rate": 0.0011501774693567968, "loss": 0.9753, "step": 6734 }, { "epoch": 0.4685380361056037, "grad_norm": 1.1640625, "learning_rate": 0.0011499546880374366, "loss": 0.9054, "step": 6735 }, { "epoch": 0.46860760374273885, "grad_norm": 1.265625, "learning_rate": 0.0011497318991041336, "loss": 0.7376, "step": 6736 }, { "epoch": 0.4686771713798741, "grad_norm": 0.953125, "learning_rate": 0.001149509102568199, "loss": 0.6246, "step": 6737 }, { "epoch": 0.4687467390170093, "grad_norm": 1.1953125, "learning_rate": 0.0011492862984409464, "loss": 0.8417, "step": 6738 }, { "epoch": 0.4688163066541445, "grad_norm": 1.15625, "learning_rate": 0.0011490634867336875, "loss": 1.0912, "step": 6739 }, { "epoch": 0.4688858742912797, "grad_norm": 1.171875, "learning_rate": 0.0011488406674577364, "loss": 0.8998, "step": 6740 }, { "epoch": 0.46895544192841493, "grad_norm": 1.140625, "learning_rate": 0.001148617840624406, "loss": 1.0196, "step": 6741 }, { "epoch": 0.4690250095655501, "grad_norm": 1.328125, "learning_rate": 0.0011483950062450112, "loss": 0.981, "step": 6742 }, { "epoch": 0.4690945772026853, "grad_norm": 0.96875, "learning_rate": 0.001148172164330866, "loss": 0.7669, "step": 6743 }, { "epoch": 0.4691641448398205, "grad_norm": 1.3671875, "learning_rate": 0.0011479493148932847, "loss": 0.8624, "step": 6744 }, { "epoch": 0.4692337124769557, "grad_norm": 0.953125, "learning_rate": 0.0011477264579435834, "loss": 0.6625, "step": 6745 }, { "epoch": 0.46930328011409095, "grad_norm": 1.2421875, "learning_rate": 0.0011475035934930768, "loss": 0.9669, "step": 6746 }, { "epoch": 0.4693728477512261, "grad_norm": 0.96484375, "learning_rate": 0.0011472807215530813, "loss": 0.7936, "step": 6747 }, { "epoch": 0.46944241538836134, "grad_norm": 1.0, "learning_rate": 0.001147057842134913, "loss": 0.7275, "step": 6748 }, { "epoch": 0.4695119830254965, "grad_norm": 1.140625, "learning_rate": 0.0011468349552498887, "loss": 0.7721, "step": 6749 }, { "epoch": 0.46958155066263174, "grad_norm": 1.296875, "learning_rate": 0.0011466120609093257, "loss": 0.9905, "step": 6750 }, { "epoch": 0.46965111829976697, "grad_norm": 1.4609375, "learning_rate": 0.001146389159124541, "loss": 0.9612, "step": 6751 }, { "epoch": 0.46972068593690214, "grad_norm": 1.1171875, "learning_rate": 0.0011461662499068527, "loss": 0.8758, "step": 6752 }, { "epoch": 0.46979025357403736, "grad_norm": 0.96484375, "learning_rate": 0.001145943333267579, "loss": 0.7701, "step": 6753 }, { "epoch": 0.4698598212111726, "grad_norm": 1.0859375, "learning_rate": 0.0011457204092180384, "loss": 0.8275, "step": 6754 }, { "epoch": 0.46992938884830776, "grad_norm": 0.9453125, "learning_rate": 0.00114549747776955, "loss": 0.6869, "step": 6755 }, { "epoch": 0.469998956485443, "grad_norm": 0.984375, "learning_rate": 0.001145274538933433, "loss": 0.848, "step": 6756 }, { "epoch": 0.47006852412257816, "grad_norm": 1.03125, "learning_rate": 0.0011450515927210073, "loss": 0.8047, "step": 6757 }, { "epoch": 0.4701380917597134, "grad_norm": 1.03125, "learning_rate": 0.0011448286391435925, "loss": 0.7052, "step": 6758 }, { "epoch": 0.4702076593968486, "grad_norm": 1.015625, "learning_rate": 0.0011446056782125097, "loss": 0.7012, "step": 6759 }, { "epoch": 0.4702772270339838, "grad_norm": 1.5, "learning_rate": 0.0011443827099390793, "loss": 0.8022, "step": 6760 }, { "epoch": 0.470346794671119, "grad_norm": 1.3359375, "learning_rate": 0.001144159734334623, "loss": 0.8481, "step": 6761 }, { "epoch": 0.4704163623082542, "grad_norm": 1.1015625, "learning_rate": 0.0011439367514104613, "loss": 0.6478, "step": 6762 }, { "epoch": 0.4704859299453894, "grad_norm": 1.0546875, "learning_rate": 0.0011437137611779171, "loss": 0.6574, "step": 6763 }, { "epoch": 0.47055549758252463, "grad_norm": 1.0, "learning_rate": 0.0011434907636483126, "loss": 0.8813, "step": 6764 }, { "epoch": 0.4706250652196598, "grad_norm": 1.3515625, "learning_rate": 0.0011432677588329703, "loss": 0.8534, "step": 6765 }, { "epoch": 0.470694632856795, "grad_norm": 1.375, "learning_rate": 0.0011430447467432137, "loss": 1.1378, "step": 6766 }, { "epoch": 0.4707642004939302, "grad_norm": 1.21875, "learning_rate": 0.0011428217273903654, "loss": 0.7197, "step": 6767 }, { "epoch": 0.4708337681310654, "grad_norm": 1.28125, "learning_rate": 0.0011425987007857498, "loss": 1.1085, "step": 6768 }, { "epoch": 0.47090333576820065, "grad_norm": 0.9453125, "learning_rate": 0.0011423756669406908, "loss": 0.6024, "step": 6769 }, { "epoch": 0.4709729034053358, "grad_norm": 1.1796875, "learning_rate": 0.0011421526258665131, "loss": 0.6622, "step": 6770 }, { "epoch": 0.47104247104247104, "grad_norm": 1.0, "learning_rate": 0.0011419295775745417, "loss": 0.8794, "step": 6771 }, { "epoch": 0.47111203867960627, "grad_norm": 1.1796875, "learning_rate": 0.001141706522076102, "loss": 0.9826, "step": 6772 }, { "epoch": 0.47118160631674144, "grad_norm": 1.1640625, "learning_rate": 0.0011414834593825188, "loss": 0.9709, "step": 6773 }, { "epoch": 0.47125117395387667, "grad_norm": 1.078125, "learning_rate": 0.001141260389505119, "loss": 0.937, "step": 6774 }, { "epoch": 0.47132074159101184, "grad_norm": 1.25, "learning_rate": 0.0011410373124552287, "loss": 0.962, "step": 6775 }, { "epoch": 0.47139030922814706, "grad_norm": 1.2734375, "learning_rate": 0.001140814228244174, "loss": 0.797, "step": 6776 }, { "epoch": 0.4714598768652823, "grad_norm": 1.3671875, "learning_rate": 0.0011405911368832832, "loss": 0.8643, "step": 6777 }, { "epoch": 0.47152944450241746, "grad_norm": 1.28125, "learning_rate": 0.0011403680383838828, "loss": 0.7165, "step": 6778 }, { "epoch": 0.4715990121395527, "grad_norm": 1.0546875, "learning_rate": 0.0011401449327573007, "loss": 0.9037, "step": 6779 }, { "epoch": 0.47166857977668786, "grad_norm": 0.9296875, "learning_rate": 0.0011399218200148658, "loss": 0.726, "step": 6780 }, { "epoch": 0.4717381474138231, "grad_norm": 1.421875, "learning_rate": 0.0011396987001679058, "loss": 1.0062, "step": 6781 }, { "epoch": 0.4718077150509583, "grad_norm": 1.2109375, "learning_rate": 0.0011394755732277502, "loss": 0.9328, "step": 6782 }, { "epoch": 0.4718772826880935, "grad_norm": 0.91796875, "learning_rate": 0.0011392524392057277, "loss": 0.6841, "step": 6783 }, { "epoch": 0.4719468503252287, "grad_norm": 1.3203125, "learning_rate": 0.0011390292981131682, "loss": 0.7211, "step": 6784 }, { "epoch": 0.47201641796236393, "grad_norm": 1.3828125, "learning_rate": 0.001138806149961402, "loss": 0.8626, "step": 6785 }, { "epoch": 0.4720859855994991, "grad_norm": 1.3671875, "learning_rate": 0.001138582994761759, "loss": 0.5986, "step": 6786 }, { "epoch": 0.47215555323663433, "grad_norm": 1.3203125, "learning_rate": 0.00113835983252557, "loss": 1.2783, "step": 6787 }, { "epoch": 0.4722251208737695, "grad_norm": 1.5546875, "learning_rate": 0.0011381366632641661, "loss": 1.1105, "step": 6788 }, { "epoch": 0.4722946885109047, "grad_norm": 1.1484375, "learning_rate": 0.0011379134869888789, "loss": 0.8647, "step": 6789 }, { "epoch": 0.47236425614803995, "grad_norm": 1.0625, "learning_rate": 0.0011376903037110396, "loss": 0.8945, "step": 6790 }, { "epoch": 0.4724338237851751, "grad_norm": 0.9140625, "learning_rate": 0.0011374671134419807, "loss": 0.6122, "step": 6791 }, { "epoch": 0.47250339142231035, "grad_norm": 1.0390625, "learning_rate": 0.001137243916193035, "loss": 0.8256, "step": 6792 }, { "epoch": 0.4725729590594455, "grad_norm": 1.2890625, "learning_rate": 0.0011370207119755346, "loss": 0.7419, "step": 6793 }, { "epoch": 0.47264252669658074, "grad_norm": 0.98046875, "learning_rate": 0.0011367975008008133, "loss": 0.6636, "step": 6794 }, { "epoch": 0.47271209433371597, "grad_norm": 0.8515625, "learning_rate": 0.0011365742826802046, "loss": 0.5432, "step": 6795 }, { "epoch": 0.47278166197085114, "grad_norm": 1.2578125, "learning_rate": 0.001136351057625042, "loss": 0.9745, "step": 6796 }, { "epoch": 0.47285122960798637, "grad_norm": 1.2421875, "learning_rate": 0.00113612782564666, "loss": 0.9833, "step": 6797 }, { "epoch": 0.4729207972451216, "grad_norm": 1.1796875, "learning_rate": 0.0011359045867563933, "loss": 0.8484, "step": 6798 }, { "epoch": 0.47299036488225676, "grad_norm": 1.0859375, "learning_rate": 0.0011356813409655764, "loss": 0.7557, "step": 6799 }, { "epoch": 0.473059932519392, "grad_norm": 1.0546875, "learning_rate": 0.0011354580882855449, "loss": 0.949, "step": 6800 }, { "epoch": 0.47312950015652716, "grad_norm": 1.015625, "learning_rate": 0.0011352348287276346, "loss": 0.8126, "step": 6801 }, { "epoch": 0.4731990677936624, "grad_norm": 0.9765625, "learning_rate": 0.0011350115623031815, "loss": 0.6654, "step": 6802 }, { "epoch": 0.4732686354307976, "grad_norm": 1.40625, "learning_rate": 0.0011347882890235216, "loss": 0.7928, "step": 6803 }, { "epoch": 0.4733382030679328, "grad_norm": 1.3046875, "learning_rate": 0.0011345650088999918, "loss": 0.8891, "step": 6804 }, { "epoch": 0.473407770705068, "grad_norm": 0.95703125, "learning_rate": 0.0011343417219439292, "loss": 0.6199, "step": 6805 }, { "epoch": 0.4734773383422032, "grad_norm": 0.85546875, "learning_rate": 0.0011341184281666705, "loss": 0.6123, "step": 6806 }, { "epoch": 0.4735469059793384, "grad_norm": 1.125, "learning_rate": 0.0011338951275795546, "loss": 0.9712, "step": 6807 }, { "epoch": 0.47361647361647363, "grad_norm": 1.03125, "learning_rate": 0.0011336718201939186, "loss": 0.5731, "step": 6808 }, { "epoch": 0.4736860412536088, "grad_norm": 1.296875, "learning_rate": 0.0011334485060211018, "loss": 0.932, "step": 6809 }, { "epoch": 0.47375560889074403, "grad_norm": 1.3125, "learning_rate": 0.0011332251850724423, "loss": 1.0456, "step": 6810 }, { "epoch": 0.47382517652787925, "grad_norm": 1.09375, "learning_rate": 0.0011330018573592793, "loss": 0.8988, "step": 6811 }, { "epoch": 0.4738947441650144, "grad_norm": 1.4296875, "learning_rate": 0.0011327785228929525, "loss": 0.8061, "step": 6812 }, { "epoch": 0.47396431180214965, "grad_norm": 1.1328125, "learning_rate": 0.0011325551816848015, "loss": 0.7523, "step": 6813 }, { "epoch": 0.4740338794392848, "grad_norm": 1.3046875, "learning_rate": 0.0011323318337461666, "loss": 0.9292, "step": 6814 }, { "epoch": 0.47410344707642005, "grad_norm": 1.1328125, "learning_rate": 0.001132108479088388, "loss": 0.6064, "step": 6815 }, { "epoch": 0.4741730147135553, "grad_norm": 1.2578125, "learning_rate": 0.001131885117722807, "loss": 0.7639, "step": 6816 }, { "epoch": 0.47424258235069044, "grad_norm": 1.0703125, "learning_rate": 0.0011316617496607642, "loss": 0.783, "step": 6817 }, { "epoch": 0.47431214998782567, "grad_norm": 0.89453125, "learning_rate": 0.0011314383749136015, "loss": 0.6997, "step": 6818 }, { "epoch": 0.47438171762496084, "grad_norm": 0.9453125, "learning_rate": 0.0011312149934926605, "loss": 0.6055, "step": 6819 }, { "epoch": 0.47445128526209607, "grad_norm": 1.2734375, "learning_rate": 0.0011309916054092835, "loss": 0.9023, "step": 6820 }, { "epoch": 0.4745208528992313, "grad_norm": 0.953125, "learning_rate": 0.0011307682106748132, "loss": 0.8706, "step": 6821 }, { "epoch": 0.47459042053636646, "grad_norm": 1.125, "learning_rate": 0.001130544809300592, "loss": 0.8186, "step": 6822 }, { "epoch": 0.4746599881735017, "grad_norm": 1.0546875, "learning_rate": 0.0011303214012979637, "loss": 0.9062, "step": 6823 }, { "epoch": 0.4747295558106369, "grad_norm": 1.1015625, "learning_rate": 0.0011300979866782715, "loss": 0.8621, "step": 6824 }, { "epoch": 0.4747991234477721, "grad_norm": 1.1875, "learning_rate": 0.0011298745654528591, "loss": 0.9045, "step": 6825 }, { "epoch": 0.4748686910849073, "grad_norm": 1.2421875, "learning_rate": 0.001129651137633071, "loss": 0.9506, "step": 6826 }, { "epoch": 0.4749382587220425, "grad_norm": 1.15625, "learning_rate": 0.0011294277032302513, "loss": 0.8287, "step": 6827 }, { "epoch": 0.4750078263591777, "grad_norm": 1.328125, "learning_rate": 0.0011292042622557457, "loss": 1.0259, "step": 6828 }, { "epoch": 0.47507739399631294, "grad_norm": 1.359375, "learning_rate": 0.0011289808147208987, "loss": 0.895, "step": 6829 }, { "epoch": 0.4751469616334481, "grad_norm": 1.234375, "learning_rate": 0.0011287573606370558, "loss": 0.9821, "step": 6830 }, { "epoch": 0.47521652927058333, "grad_norm": 0.90625, "learning_rate": 0.0011285339000155635, "loss": 0.8931, "step": 6831 }, { "epoch": 0.4752860969077185, "grad_norm": 1.2890625, "learning_rate": 0.0011283104328677674, "loss": 0.815, "step": 6832 }, { "epoch": 0.47535566454485373, "grad_norm": 0.92578125, "learning_rate": 0.001128086959205014, "loss": 0.7367, "step": 6833 }, { "epoch": 0.47542523218198895, "grad_norm": 1.0234375, "learning_rate": 0.0011278634790386508, "loss": 0.8959, "step": 6834 }, { "epoch": 0.4754947998191241, "grad_norm": 0.984375, "learning_rate": 0.0011276399923800245, "loss": 0.9691, "step": 6835 }, { "epoch": 0.47556436745625935, "grad_norm": 0.97265625, "learning_rate": 0.0011274164992404827, "loss": 0.734, "step": 6836 }, { "epoch": 0.4756339350933946, "grad_norm": 1.015625, "learning_rate": 0.0011271929996313735, "loss": 1.028, "step": 6837 }, { "epoch": 0.47570350273052975, "grad_norm": 1.125, "learning_rate": 0.0011269694935640447, "loss": 0.7971, "step": 6838 }, { "epoch": 0.475773070367665, "grad_norm": 0.9609375, "learning_rate": 0.0011267459810498448, "loss": 0.7243, "step": 6839 }, { "epoch": 0.47584263800480014, "grad_norm": 1.34375, "learning_rate": 0.0011265224621001232, "loss": 0.8573, "step": 6840 }, { "epoch": 0.47591220564193537, "grad_norm": 1.2109375, "learning_rate": 0.0011262989367262285, "loss": 0.782, "step": 6841 }, { "epoch": 0.4759817732790706, "grad_norm": 1.109375, "learning_rate": 0.0011260754049395103, "loss": 0.9528, "step": 6842 }, { "epoch": 0.47605134091620577, "grad_norm": 1.5, "learning_rate": 0.0011258518667513187, "loss": 1.1033, "step": 6843 }, { "epoch": 0.476120908553341, "grad_norm": 1.140625, "learning_rate": 0.0011256283221730036, "loss": 0.894, "step": 6844 }, { "epoch": 0.47619047619047616, "grad_norm": 1.109375, "learning_rate": 0.0011254047712159156, "loss": 0.7271, "step": 6845 }, { "epoch": 0.4762600438276114, "grad_norm": 1.1796875, "learning_rate": 0.0011251812138914053, "loss": 0.905, "step": 6846 }, { "epoch": 0.4763296114647466, "grad_norm": 0.9140625, "learning_rate": 0.0011249576502108238, "loss": 0.7876, "step": 6847 }, { "epoch": 0.4763991791018818, "grad_norm": 1.1328125, "learning_rate": 0.0011247340801855228, "loss": 0.838, "step": 6848 }, { "epoch": 0.476468746739017, "grad_norm": 1.03125, "learning_rate": 0.001124510503826854, "loss": 0.8328, "step": 6849 }, { "epoch": 0.47653831437615224, "grad_norm": 1.0234375, "learning_rate": 0.001124286921146169, "loss": 0.9428, "step": 6850 }, { "epoch": 0.4766078820132874, "grad_norm": 1.25, "learning_rate": 0.001124063332154821, "loss": 1.0431, "step": 6851 }, { "epoch": 0.47667744965042264, "grad_norm": 0.99609375, "learning_rate": 0.001123839736864162, "loss": 0.607, "step": 6852 }, { "epoch": 0.4767470172875578, "grad_norm": 1.140625, "learning_rate": 0.0011236161352855456, "loss": 0.7733, "step": 6853 }, { "epoch": 0.47681658492469303, "grad_norm": 0.94140625, "learning_rate": 0.0011233925274303249, "loss": 0.7836, "step": 6854 }, { "epoch": 0.47688615256182826, "grad_norm": 1.0, "learning_rate": 0.0011231689133098537, "loss": 0.9309, "step": 6855 }, { "epoch": 0.47695572019896343, "grad_norm": 0.953125, "learning_rate": 0.0011229452929354857, "loss": 0.9256, "step": 6856 }, { "epoch": 0.47702528783609865, "grad_norm": 0.88671875, "learning_rate": 0.0011227216663185755, "loss": 0.7528, "step": 6857 }, { "epoch": 0.4770948554732338, "grad_norm": 0.9453125, "learning_rate": 0.0011224980334704777, "loss": 0.586, "step": 6858 }, { "epoch": 0.47716442311036905, "grad_norm": 0.921875, "learning_rate": 0.001122274394402547, "loss": 0.821, "step": 6859 }, { "epoch": 0.4772339907475043, "grad_norm": 1.0703125, "learning_rate": 0.001122050749126139, "loss": 1.0315, "step": 6860 }, { "epoch": 0.47730355838463945, "grad_norm": 1.15625, "learning_rate": 0.0011218270976526092, "loss": 0.8908, "step": 6861 }, { "epoch": 0.4773731260217747, "grad_norm": 1.1171875, "learning_rate": 0.0011216034399933134, "loss": 0.8264, "step": 6862 }, { "epoch": 0.4774426936589099, "grad_norm": 1.015625, "learning_rate": 0.0011213797761596078, "loss": 0.8753, "step": 6863 }, { "epoch": 0.47751226129604507, "grad_norm": 1.1953125, "learning_rate": 0.001121156106162849, "loss": 1.1162, "step": 6864 }, { "epoch": 0.4775818289331803, "grad_norm": 0.9375, "learning_rate": 0.0011209324300143937, "loss": 0.6004, "step": 6865 }, { "epoch": 0.47765139657031547, "grad_norm": 1.125, "learning_rate": 0.0011207087477255993, "loss": 0.7431, "step": 6866 }, { "epoch": 0.4777209642074507, "grad_norm": 1.3125, "learning_rate": 0.001120485059307823, "loss": 1.0819, "step": 6867 }, { "epoch": 0.4777905318445859, "grad_norm": 1.1875, "learning_rate": 0.0011202613647724228, "loss": 1.0026, "step": 6868 }, { "epoch": 0.4778600994817211, "grad_norm": 1.2421875, "learning_rate": 0.0011200376641307564, "loss": 0.8103, "step": 6869 }, { "epoch": 0.4779296671188563, "grad_norm": 1.0234375, "learning_rate": 0.0011198139573941827, "loss": 0.8203, "step": 6870 }, { "epoch": 0.4779992347559915, "grad_norm": 1.3671875, "learning_rate": 0.00111959024457406, "loss": 0.9181, "step": 6871 }, { "epoch": 0.4780688023931267, "grad_norm": 1.0, "learning_rate": 0.0011193665256817476, "loss": 0.8363, "step": 6872 }, { "epoch": 0.47813837003026194, "grad_norm": 1.390625, "learning_rate": 0.0011191428007286046, "loss": 0.7834, "step": 6873 }, { "epoch": 0.4782079376673971, "grad_norm": 1.125, "learning_rate": 0.0011189190697259907, "loss": 0.9514, "step": 6874 }, { "epoch": 0.47827750530453234, "grad_norm": 1.2265625, "learning_rate": 0.001118695332685266, "loss": 1.0461, "step": 6875 }, { "epoch": 0.47834707294166756, "grad_norm": 1.25, "learning_rate": 0.0011184715896177901, "loss": 0.9652, "step": 6876 }, { "epoch": 0.47841664057880273, "grad_norm": 0.828125, "learning_rate": 0.0011182478405349246, "loss": 0.688, "step": 6877 }, { "epoch": 0.47848620821593796, "grad_norm": 1.078125, "learning_rate": 0.0011180240854480295, "loss": 0.5792, "step": 6878 }, { "epoch": 0.47855577585307313, "grad_norm": 1.0078125, "learning_rate": 0.0011178003243684663, "loss": 0.8728, "step": 6879 }, { "epoch": 0.47862534349020835, "grad_norm": 1.21875, "learning_rate": 0.0011175765573075962, "loss": 0.7156, "step": 6880 }, { "epoch": 0.4786949111273436, "grad_norm": 0.79296875, "learning_rate": 0.0011173527842767812, "loss": 0.7903, "step": 6881 }, { "epoch": 0.47876447876447875, "grad_norm": 1.0546875, "learning_rate": 0.0011171290052873835, "loss": 0.734, "step": 6882 }, { "epoch": 0.478834046401614, "grad_norm": 1.0, "learning_rate": 0.0011169052203507653, "loss": 0.8546, "step": 6883 }, { "epoch": 0.47890361403874915, "grad_norm": 1.53125, "learning_rate": 0.001116681429478289, "loss": 0.7843, "step": 6884 }, { "epoch": 0.4789731816758844, "grad_norm": 1.171875, "learning_rate": 0.001116457632681318, "loss": 0.8304, "step": 6885 }, { "epoch": 0.4790427493130196, "grad_norm": 1.015625, "learning_rate": 0.0011162338299712153, "loss": 0.6309, "step": 6886 }, { "epoch": 0.47911231695015477, "grad_norm": 1.2265625, "learning_rate": 0.0011160100213593448, "loss": 1.0741, "step": 6887 }, { "epoch": 0.47918188458729, "grad_norm": 1.140625, "learning_rate": 0.0011157862068570698, "loss": 0.9332, "step": 6888 }, { "epoch": 0.4792514522244252, "grad_norm": 1.0859375, "learning_rate": 0.0011155623864757551, "loss": 0.8916, "step": 6889 }, { "epoch": 0.4793210198615604, "grad_norm": 1.09375, "learning_rate": 0.0011153385602267647, "loss": 0.98, "step": 6890 }, { "epoch": 0.4793905874986956, "grad_norm": 1.4609375, "learning_rate": 0.0011151147281214637, "loss": 1.0965, "step": 6891 }, { "epoch": 0.4794601551358308, "grad_norm": 0.85546875, "learning_rate": 0.0011148908901712172, "loss": 0.7172, "step": 6892 }, { "epoch": 0.479529722772966, "grad_norm": 1.25, "learning_rate": 0.00111466704638739, "loss": 0.6087, "step": 6893 }, { "epoch": 0.47959929041010124, "grad_norm": 1.0234375, "learning_rate": 0.0011144431967813485, "loss": 0.8725, "step": 6894 }, { "epoch": 0.4796688580472364, "grad_norm": 1.0703125, "learning_rate": 0.0011142193413644576, "loss": 0.6584, "step": 6895 }, { "epoch": 0.47973842568437164, "grad_norm": 1.3828125, "learning_rate": 0.0011139954801480851, "loss": 1.0956, "step": 6896 }, { "epoch": 0.4798079933215068, "grad_norm": 0.84765625, "learning_rate": 0.0011137716131435964, "loss": 0.6688, "step": 6897 }, { "epoch": 0.47987756095864204, "grad_norm": 1.1875, "learning_rate": 0.0011135477403623585, "loss": 0.9466, "step": 6898 }, { "epoch": 0.47994712859577726, "grad_norm": 1.25, "learning_rate": 0.001113323861815739, "loss": 0.6891, "step": 6899 }, { "epoch": 0.48001669623291243, "grad_norm": 0.921875, "learning_rate": 0.0011130999775151047, "loss": 0.705, "step": 6900 }, { "epoch": 0.48008626387004766, "grad_norm": 0.953125, "learning_rate": 0.0011128760874718237, "loss": 0.8579, "step": 6901 }, { "epoch": 0.4801558315071829, "grad_norm": 1.2734375, "learning_rate": 0.0011126521916972637, "loss": 0.8625, "step": 6902 }, { "epoch": 0.48022539914431805, "grad_norm": 1.3125, "learning_rate": 0.0011124282902027938, "loss": 0.9706, "step": 6903 }, { "epoch": 0.4802949667814533, "grad_norm": 1.09375, "learning_rate": 0.0011122043829997815, "loss": 0.706, "step": 6904 }, { "epoch": 0.48036453441858845, "grad_norm": 1.3203125, "learning_rate": 0.0011119804700995964, "loss": 1.1041, "step": 6905 }, { "epoch": 0.4804341020557237, "grad_norm": 1.1328125, "learning_rate": 0.0011117565515136071, "loss": 0.6875, "step": 6906 }, { "epoch": 0.4805036696928589, "grad_norm": 1.140625, "learning_rate": 0.0011115326272531838, "loss": 0.8057, "step": 6907 }, { "epoch": 0.4805732373299941, "grad_norm": 1.2265625, "learning_rate": 0.0011113086973296958, "loss": 0.8843, "step": 6908 }, { "epoch": 0.4806428049671293, "grad_norm": 0.99609375, "learning_rate": 0.0011110847617545128, "loss": 0.6403, "step": 6909 }, { "epoch": 0.48071237260426447, "grad_norm": 0.91796875, "learning_rate": 0.001110860820539006, "loss": 0.7008, "step": 6910 }, { "epoch": 0.4807819402413997, "grad_norm": 0.9375, "learning_rate": 0.0011106368736945452, "loss": 0.6692, "step": 6911 }, { "epoch": 0.4808515078785349, "grad_norm": 1.1953125, "learning_rate": 0.001110412921232502, "loss": 1.0763, "step": 6912 }, { "epoch": 0.4809210755156701, "grad_norm": 0.92578125, "learning_rate": 0.001110188963164247, "loss": 0.75, "step": 6913 }, { "epoch": 0.4809906431528053, "grad_norm": 1.0859375, "learning_rate": 0.0011099649995011515, "loss": 0.8156, "step": 6914 }, { "epoch": 0.48106021078994055, "grad_norm": 1.0625, "learning_rate": 0.0011097410302545881, "loss": 0.8426, "step": 6915 }, { "epoch": 0.4811297784270757, "grad_norm": 0.875, "learning_rate": 0.001109517055435928, "loss": 0.7825, "step": 6916 }, { "epoch": 0.48119934606421094, "grad_norm": 1.28125, "learning_rate": 0.001109293075056544, "loss": 0.8922, "step": 6917 }, { "epoch": 0.4812689137013461, "grad_norm": 1.2265625, "learning_rate": 0.001109069089127808, "loss": 0.9127, "step": 6918 }, { "epoch": 0.48133848133848134, "grad_norm": 0.90625, "learning_rate": 0.0011088450976610943, "loss": 0.7225, "step": 6919 }, { "epoch": 0.48140804897561656, "grad_norm": 1.4375, "learning_rate": 0.0011086211006677744, "loss": 0.8415, "step": 6920 }, { "epoch": 0.48147761661275174, "grad_norm": 1.421875, "learning_rate": 0.0011083970981592228, "loss": 1.0372, "step": 6921 }, { "epoch": 0.48154718424988696, "grad_norm": 1.0625, "learning_rate": 0.001108173090146813, "loss": 0.9047, "step": 6922 }, { "epoch": 0.48161675188702213, "grad_norm": 1.3046875, "learning_rate": 0.001107949076641919, "loss": 0.791, "step": 6923 }, { "epoch": 0.48168631952415736, "grad_norm": 1.2734375, "learning_rate": 0.0011077250576559145, "loss": 0.8391, "step": 6924 }, { "epoch": 0.4817558871612926, "grad_norm": 0.98828125, "learning_rate": 0.001107501033200175, "loss": 0.5883, "step": 6925 }, { "epoch": 0.48182545479842775, "grad_norm": 1.1953125, "learning_rate": 0.0011072770032860748, "loss": 0.8007, "step": 6926 }, { "epoch": 0.481895022435563, "grad_norm": 0.890625, "learning_rate": 0.0011070529679249887, "loss": 0.6742, "step": 6927 }, { "epoch": 0.4819645900726982, "grad_norm": 1.2578125, "learning_rate": 0.0011068289271282932, "loss": 0.8524, "step": 6928 }, { "epoch": 0.4820341577098334, "grad_norm": 1.3828125, "learning_rate": 0.0011066048809073629, "loss": 0.9279, "step": 6929 }, { "epoch": 0.4821037253469686, "grad_norm": 0.95703125, "learning_rate": 0.001106380829273574, "loss": 0.7533, "step": 6930 }, { "epoch": 0.4821732929841038, "grad_norm": 1.09375, "learning_rate": 0.0011061567722383029, "loss": 1.0112, "step": 6931 }, { "epoch": 0.482242860621239, "grad_norm": 1.015625, "learning_rate": 0.0011059327098129255, "loss": 0.7589, "step": 6932 }, { "epoch": 0.4823124282583742, "grad_norm": 1.1015625, "learning_rate": 0.0011057086420088195, "loss": 0.7081, "step": 6933 }, { "epoch": 0.4823819958955094, "grad_norm": 1.21875, "learning_rate": 0.0011054845688373614, "loss": 0.7104, "step": 6934 }, { "epoch": 0.4824515635326446, "grad_norm": 1.2890625, "learning_rate": 0.0011052604903099286, "loss": 0.9116, "step": 6935 }, { "epoch": 0.4825211311697798, "grad_norm": 1.171875, "learning_rate": 0.0011050364064378985, "loss": 1.0161, "step": 6936 }, { "epoch": 0.482590698806915, "grad_norm": 1.0234375, "learning_rate": 0.0011048123172326494, "loss": 0.6631, "step": 6937 }, { "epoch": 0.48266026644405025, "grad_norm": 1.0078125, "learning_rate": 0.001104588222705559, "loss": 0.8084, "step": 6938 }, { "epoch": 0.4827298340811854, "grad_norm": 1.09375, "learning_rate": 0.0011043641228680055, "loss": 0.7227, "step": 6939 }, { "epoch": 0.48279940171832064, "grad_norm": 1.078125, "learning_rate": 0.0011041400177313682, "loss": 0.9966, "step": 6940 }, { "epoch": 0.48286896935545587, "grad_norm": 1.3671875, "learning_rate": 0.0011039159073070258, "loss": 0.9344, "step": 6941 }, { "epoch": 0.48293853699259104, "grad_norm": 1.1171875, "learning_rate": 0.0011036917916063572, "loss": 0.6069, "step": 6942 }, { "epoch": 0.48300810462972626, "grad_norm": 1.078125, "learning_rate": 0.0011034676706407423, "loss": 1.0173, "step": 6943 }, { "epoch": 0.48307767226686144, "grad_norm": 0.8984375, "learning_rate": 0.0011032435444215602, "loss": 0.8079, "step": 6944 }, { "epoch": 0.48314723990399666, "grad_norm": 1.1171875, "learning_rate": 0.0011030194129601917, "loss": 0.899, "step": 6945 }, { "epoch": 0.4832168075411319, "grad_norm": 1.0078125, "learning_rate": 0.0011027952762680162, "loss": 0.8863, "step": 6946 }, { "epoch": 0.48328637517826706, "grad_norm": 1.0859375, "learning_rate": 0.001102571134356415, "loss": 0.8912, "step": 6947 }, { "epoch": 0.4833559428154023, "grad_norm": 0.8203125, "learning_rate": 0.0011023469872367686, "loss": 0.6625, "step": 6948 }, { "epoch": 0.48342551045253745, "grad_norm": 0.9453125, "learning_rate": 0.0011021228349204582, "loss": 0.9037, "step": 6949 }, { "epoch": 0.4834950780896727, "grad_norm": 1.1875, "learning_rate": 0.0011018986774188645, "loss": 0.8848, "step": 6950 }, { "epoch": 0.4835646457268079, "grad_norm": 1.09375, "learning_rate": 0.0011016745147433703, "loss": 0.7961, "step": 6951 }, { "epoch": 0.4836342133639431, "grad_norm": 1.109375, "learning_rate": 0.0011014503469053563, "loss": 0.9383, "step": 6952 }, { "epoch": 0.4837037810010783, "grad_norm": 1.1640625, "learning_rate": 0.0011012261739162049, "loss": 0.7179, "step": 6953 }, { "epoch": 0.48377334863821353, "grad_norm": 1.0546875, "learning_rate": 0.0011010019957872989, "loss": 1.0111, "step": 6954 }, { "epoch": 0.4838429162753487, "grad_norm": 1.0625, "learning_rate": 0.00110077781253002, "loss": 0.8226, "step": 6955 }, { "epoch": 0.4839124839124839, "grad_norm": 1.203125, "learning_rate": 0.0011005536241557525, "loss": 0.8878, "step": 6956 }, { "epoch": 0.4839820515496191, "grad_norm": 1.09375, "learning_rate": 0.0011003294306758781, "loss": 1.0296, "step": 6957 }, { "epoch": 0.4840516191867543, "grad_norm": 1.15625, "learning_rate": 0.0011001052321017817, "loss": 0.8059, "step": 6958 }, { "epoch": 0.48412118682388955, "grad_norm": 1.09375, "learning_rate": 0.001099881028444846, "loss": 0.8771, "step": 6959 }, { "epoch": 0.4841907544610247, "grad_norm": 1.1953125, "learning_rate": 0.0010996568197164547, "loss": 1.0343, "step": 6960 }, { "epoch": 0.48426032209815995, "grad_norm": 1.21875, "learning_rate": 0.0010994326059279927, "loss": 0.6113, "step": 6961 }, { "epoch": 0.4843298897352951, "grad_norm": 1.1015625, "learning_rate": 0.0010992083870908437, "loss": 0.8849, "step": 6962 }, { "epoch": 0.48439945737243034, "grad_norm": 1.046875, "learning_rate": 0.0010989841632163934, "loss": 0.8569, "step": 6963 }, { "epoch": 0.48446902500956557, "grad_norm": 1.0234375, "learning_rate": 0.001098759934316026, "loss": 0.8583, "step": 6964 }, { "epoch": 0.48453859264670074, "grad_norm": 1.1796875, "learning_rate": 0.0010985357004011272, "loss": 0.8247, "step": 6965 }, { "epoch": 0.48460816028383596, "grad_norm": 1.046875, "learning_rate": 0.0010983114614830816, "loss": 0.636, "step": 6966 }, { "epoch": 0.4846777279209712, "grad_norm": 1.0234375, "learning_rate": 0.0010980872175732762, "loss": 0.7528, "step": 6967 }, { "epoch": 0.48474729555810636, "grad_norm": 1.4140625, "learning_rate": 0.0010978629686830958, "loss": 0.874, "step": 6968 }, { "epoch": 0.4848168631952416, "grad_norm": 1.21875, "learning_rate": 0.001097638714823927, "loss": 1.0633, "step": 6969 }, { "epoch": 0.48488643083237676, "grad_norm": 1.09375, "learning_rate": 0.0010974144560071568, "loss": 0.7056, "step": 6970 }, { "epoch": 0.484955998469512, "grad_norm": 1.1171875, "learning_rate": 0.0010971901922441712, "loss": 0.8113, "step": 6971 }, { "epoch": 0.4850255661066472, "grad_norm": 1.3046875, "learning_rate": 0.001096965923546358, "loss": 0.9971, "step": 6972 }, { "epoch": 0.4850951337437824, "grad_norm": 1.0546875, "learning_rate": 0.0010967416499251034, "loss": 0.9382, "step": 6973 }, { "epoch": 0.4851647013809176, "grad_norm": 1.0859375, "learning_rate": 0.0010965173713917958, "loss": 0.6524, "step": 6974 }, { "epoch": 0.4852342690180528, "grad_norm": 1.28125, "learning_rate": 0.0010962930879578226, "loss": 1.0498, "step": 6975 }, { "epoch": 0.485303836655188, "grad_norm": 1.171875, "learning_rate": 0.0010960687996345712, "loss": 1.0655, "step": 6976 }, { "epoch": 0.48537340429232323, "grad_norm": 0.83984375, "learning_rate": 0.0010958445064334311, "loss": 0.8055, "step": 6977 }, { "epoch": 0.4854429719294584, "grad_norm": 1.4296875, "learning_rate": 0.0010956202083657893, "loss": 0.9072, "step": 6978 }, { "epoch": 0.4855125395665936, "grad_norm": 1.3046875, "learning_rate": 0.001095395905443036, "loss": 0.8098, "step": 6979 }, { "epoch": 0.48558210720372885, "grad_norm": 1.0859375, "learning_rate": 0.0010951715976765589, "loss": 0.7313, "step": 6980 }, { "epoch": 0.485651674840864, "grad_norm": 1.359375, "learning_rate": 0.0010949472850777483, "loss": 0.8856, "step": 6981 }, { "epoch": 0.48572124247799925, "grad_norm": 0.96484375, "learning_rate": 0.0010947229676579926, "loss": 0.7672, "step": 6982 }, { "epoch": 0.4857908101151344, "grad_norm": 1.21875, "learning_rate": 0.0010944986454286822, "loss": 0.69, "step": 6983 }, { "epoch": 0.48586037775226965, "grad_norm": 1.171875, "learning_rate": 0.0010942743184012072, "loss": 0.9117, "step": 6984 }, { "epoch": 0.48592994538940487, "grad_norm": 1.2578125, "learning_rate": 0.001094049986586957, "loss": 0.9458, "step": 6985 }, { "epoch": 0.48599951302654004, "grad_norm": 1.0078125, "learning_rate": 0.0010938256499973232, "loss": 0.9272, "step": 6986 }, { "epoch": 0.48606908066367527, "grad_norm": 1.3125, "learning_rate": 0.001093601308643695, "loss": 0.8044, "step": 6987 }, { "epoch": 0.48613864830081044, "grad_norm": 1.140625, "learning_rate": 0.001093376962537465, "loss": 0.6457, "step": 6988 }, { "epoch": 0.48620821593794566, "grad_norm": 0.99609375, "learning_rate": 0.0010931526116900229, "loss": 0.827, "step": 6989 }, { "epoch": 0.4862777835750809, "grad_norm": 1.1171875, "learning_rate": 0.0010929282561127607, "loss": 0.9153, "step": 6990 }, { "epoch": 0.48634735121221606, "grad_norm": 1.171875, "learning_rate": 0.0010927038958170703, "loss": 0.8547, "step": 6991 }, { "epoch": 0.4864169188493513, "grad_norm": 1.1875, "learning_rate": 0.0010924795308143432, "loss": 0.9355, "step": 6992 }, { "epoch": 0.4864864864864865, "grad_norm": 0.83203125, "learning_rate": 0.0010922551611159716, "loss": 0.6463, "step": 6993 }, { "epoch": 0.4865560541236217, "grad_norm": 0.99609375, "learning_rate": 0.0010920307867333479, "loss": 0.8102, "step": 6994 }, { "epoch": 0.4866256217607569, "grad_norm": 1.1171875, "learning_rate": 0.001091806407677865, "loss": 0.5896, "step": 6995 }, { "epoch": 0.4866951893978921, "grad_norm": 1.125, "learning_rate": 0.001091582023960915, "loss": 0.8311, "step": 6996 }, { "epoch": 0.4867647570350273, "grad_norm": 1.25, "learning_rate": 0.001091357635593892, "loss": 1.1586, "step": 6997 }, { "epoch": 0.48683432467216253, "grad_norm": 1.15625, "learning_rate": 0.0010911332425881885, "loss": 0.8111, "step": 6998 }, { "epoch": 0.4869038923092977, "grad_norm": 1.0625, "learning_rate": 0.001090908844955198, "loss": 0.9535, "step": 6999 }, { "epoch": 0.48697345994643293, "grad_norm": 0.90625, "learning_rate": 0.001090684442706315, "loss": 0.806, "step": 7000 }, { "epoch": 0.4870430275835681, "grad_norm": 1.1484375, "learning_rate": 0.0010904600358529327, "loss": 0.7193, "step": 7001 }, { "epoch": 0.4871125952207033, "grad_norm": 1.296875, "learning_rate": 0.0010902356244064462, "loss": 1.1411, "step": 7002 }, { "epoch": 0.48718216285783855, "grad_norm": 1.296875, "learning_rate": 0.001090011208378249, "loss": 0.7308, "step": 7003 }, { "epoch": 0.4872517304949737, "grad_norm": 1.0546875, "learning_rate": 0.0010897867877797368, "loss": 0.6471, "step": 7004 }, { "epoch": 0.48732129813210895, "grad_norm": 0.953125, "learning_rate": 0.0010895623626223034, "loss": 0.7874, "step": 7005 }, { "epoch": 0.4873908657692442, "grad_norm": 1.0859375, "learning_rate": 0.0010893379329173453, "loss": 0.7763, "step": 7006 }, { "epoch": 0.48746043340637935, "grad_norm": 1.1328125, "learning_rate": 0.0010891134986762572, "loss": 0.9469, "step": 7007 }, { "epoch": 0.48753000104351457, "grad_norm": 0.99609375, "learning_rate": 0.0010888890599104345, "loss": 0.896, "step": 7008 }, { "epoch": 0.48759956868064974, "grad_norm": 1.359375, "learning_rate": 0.0010886646166312736, "loss": 0.9828, "step": 7009 }, { "epoch": 0.48766913631778497, "grad_norm": 1.09375, "learning_rate": 0.0010884401688501702, "loss": 0.6623, "step": 7010 }, { "epoch": 0.4877387039549202, "grad_norm": 0.86328125, "learning_rate": 0.001088215716578521, "loss": 0.6458, "step": 7011 }, { "epoch": 0.48780827159205536, "grad_norm": 1.3125, "learning_rate": 0.001087991259827722, "loss": 0.9172, "step": 7012 }, { "epoch": 0.4878778392291906, "grad_norm": 1.3203125, "learning_rate": 0.0010877667986091705, "loss": 0.8783, "step": 7013 }, { "epoch": 0.48794740686632576, "grad_norm": 1.0234375, "learning_rate": 0.0010875423329342634, "loss": 0.5569, "step": 7014 }, { "epoch": 0.488016974503461, "grad_norm": 0.93359375, "learning_rate": 0.001087317862814398, "loss": 0.7734, "step": 7015 }, { "epoch": 0.4880865421405962, "grad_norm": 1.2578125, "learning_rate": 0.0010870933882609717, "loss": 0.9205, "step": 7016 }, { "epoch": 0.4881561097777314, "grad_norm": 0.99609375, "learning_rate": 0.0010868689092853817, "loss": 0.9377, "step": 7017 }, { "epoch": 0.4882256774148666, "grad_norm": 1.234375, "learning_rate": 0.0010866444258990269, "loss": 0.9297, "step": 7018 }, { "epoch": 0.48829524505200184, "grad_norm": 1.7265625, "learning_rate": 0.0010864199381133044, "loss": 0.9733, "step": 7019 }, { "epoch": 0.488364812689137, "grad_norm": 1.15625, "learning_rate": 0.0010861954459396132, "loss": 0.798, "step": 7020 }, { "epoch": 0.48843438032627223, "grad_norm": 0.87109375, "learning_rate": 0.0010859709493893518, "loss": 0.9854, "step": 7021 }, { "epoch": 0.4885039479634074, "grad_norm": 1.109375, "learning_rate": 0.0010857464484739189, "loss": 0.8518, "step": 7022 }, { "epoch": 0.48857351560054263, "grad_norm": 1.125, "learning_rate": 0.0010855219432047137, "loss": 0.9385, "step": 7023 }, { "epoch": 0.48864308323767786, "grad_norm": 1.1875, "learning_rate": 0.0010852974335931347, "loss": 0.8966, "step": 7024 }, { "epoch": 0.488712650874813, "grad_norm": 1.1953125, "learning_rate": 0.0010850729196505825, "loss": 0.7525, "step": 7025 }, { "epoch": 0.48878221851194825, "grad_norm": 1.2578125, "learning_rate": 0.001084848401388456, "loss": 0.7952, "step": 7026 }, { "epoch": 0.4888517861490834, "grad_norm": 0.97265625, "learning_rate": 0.0010846238788181552, "loss": 0.7759, "step": 7027 }, { "epoch": 0.48892135378621865, "grad_norm": 0.97265625, "learning_rate": 0.0010843993519510807, "loss": 0.9408, "step": 7028 }, { "epoch": 0.4889909214233539, "grad_norm": 0.9296875, "learning_rate": 0.0010841748207986324, "loss": 0.6235, "step": 7029 }, { "epoch": 0.48906048906048905, "grad_norm": 1.1484375, "learning_rate": 0.001083950285372211, "loss": 1.0157, "step": 7030 }, { "epoch": 0.48913005669762427, "grad_norm": 1.4453125, "learning_rate": 0.0010837257456832172, "loss": 0.9633, "step": 7031 }, { "epoch": 0.4891996243347595, "grad_norm": 0.91796875, "learning_rate": 0.0010835012017430521, "loss": 0.7513, "step": 7032 }, { "epoch": 0.48926919197189467, "grad_norm": 1.0703125, "learning_rate": 0.0010832766535631166, "loss": 0.9007, "step": 7033 }, { "epoch": 0.4893387596090299, "grad_norm": 1.140625, "learning_rate": 0.001083052101154813, "loss": 0.7177, "step": 7034 }, { "epoch": 0.48940832724616506, "grad_norm": 1.15625, "learning_rate": 0.0010828275445295414, "loss": 0.8104, "step": 7035 }, { "epoch": 0.4894778948833003, "grad_norm": 1.0, "learning_rate": 0.0010826029836987052, "loss": 0.8287, "step": 7036 }, { "epoch": 0.4895474625204355, "grad_norm": 1.4296875, "learning_rate": 0.0010823784186737059, "loss": 1.0064, "step": 7037 }, { "epoch": 0.4896170301575707, "grad_norm": 0.8671875, "learning_rate": 0.0010821538494659453, "loss": 0.6387, "step": 7038 }, { "epoch": 0.4896865977947059, "grad_norm": 1.03125, "learning_rate": 0.001081929276086827, "loss": 0.8335, "step": 7039 }, { "epoch": 0.4897561654318411, "grad_norm": 1.0703125, "learning_rate": 0.0010817046985477522, "loss": 0.8973, "step": 7040 }, { "epoch": 0.4898257330689763, "grad_norm": 1.1953125, "learning_rate": 0.0010814801168601252, "loss": 0.957, "step": 7041 }, { "epoch": 0.48989530070611154, "grad_norm": 1.03125, "learning_rate": 0.001081255531035348, "loss": 0.7339, "step": 7042 }, { "epoch": 0.4899648683432467, "grad_norm": 1.0625, "learning_rate": 0.0010810309410848248, "loss": 0.8312, "step": 7043 }, { "epoch": 0.49003443598038193, "grad_norm": 0.93359375, "learning_rate": 0.001080806347019959, "loss": 0.7359, "step": 7044 }, { "epoch": 0.4901040036175171, "grad_norm": 1.2109375, "learning_rate": 0.001080581748852154, "loss": 0.9715, "step": 7045 }, { "epoch": 0.49017357125465233, "grad_norm": 1.0703125, "learning_rate": 0.0010803571465928142, "loss": 0.7558, "step": 7046 }, { "epoch": 0.49024313889178756, "grad_norm": 1.03125, "learning_rate": 0.0010801325402533433, "loss": 0.8256, "step": 7047 }, { "epoch": 0.4903127065289227, "grad_norm": 1.0625, "learning_rate": 0.001079907929845146, "loss": 0.9817, "step": 7048 }, { "epoch": 0.49038227416605795, "grad_norm": 1.0703125, "learning_rate": 0.0010796833153796266, "loss": 1.0321, "step": 7049 }, { "epoch": 0.4904518418031932, "grad_norm": 1.1484375, "learning_rate": 0.00107945869686819, "loss": 0.6911, "step": 7050 }, { "epoch": 0.49052140944032835, "grad_norm": 1.3359375, "learning_rate": 0.0010792340743222418, "loss": 1.0358, "step": 7051 }, { "epoch": 0.4905909770774636, "grad_norm": 1.375, "learning_rate": 0.0010790094477531862, "loss": 0.7645, "step": 7052 }, { "epoch": 0.49066054471459875, "grad_norm": 1.3125, "learning_rate": 0.0010787848171724293, "loss": 0.8814, "step": 7053 }, { "epoch": 0.49073011235173397, "grad_norm": 1.3828125, "learning_rate": 0.0010785601825913764, "loss": 0.9904, "step": 7054 }, { "epoch": 0.4907996799888692, "grad_norm": 1.21875, "learning_rate": 0.0010783355440214335, "loss": 0.9773, "step": 7055 }, { "epoch": 0.49086924762600437, "grad_norm": 0.96875, "learning_rate": 0.0010781109014740063, "loss": 0.7507, "step": 7056 }, { "epoch": 0.4909388152631396, "grad_norm": 1.0, "learning_rate": 0.0010778862549605016, "loss": 0.8323, "step": 7057 }, { "epoch": 0.49100838290027476, "grad_norm": 1.203125, "learning_rate": 0.001077661604492325, "loss": 0.9502, "step": 7058 }, { "epoch": 0.49107795053741, "grad_norm": 1.109375, "learning_rate": 0.0010774369500808837, "loss": 0.8934, "step": 7059 }, { "epoch": 0.4911475181745452, "grad_norm": 1.2109375, "learning_rate": 0.0010772122917375845, "loss": 0.6439, "step": 7060 }, { "epoch": 0.4912170858116804, "grad_norm": 1.1171875, "learning_rate": 0.0010769876294738343, "loss": 0.9425, "step": 7061 }, { "epoch": 0.4912866534488156, "grad_norm": 1.109375, "learning_rate": 0.0010767629633010407, "loss": 0.7707, "step": 7062 }, { "epoch": 0.49135622108595084, "grad_norm": 1.1640625, "learning_rate": 0.00107653829323061, "loss": 0.6929, "step": 7063 }, { "epoch": 0.491425788723086, "grad_norm": 1.265625, "learning_rate": 0.0010763136192739509, "loss": 0.8058, "step": 7064 }, { "epoch": 0.49149535636022124, "grad_norm": 0.96484375, "learning_rate": 0.0010760889414424709, "loss": 0.8957, "step": 7065 }, { "epoch": 0.4915649239973564, "grad_norm": 0.8984375, "learning_rate": 0.0010758642597475778, "loss": 0.738, "step": 7066 }, { "epoch": 0.49163449163449163, "grad_norm": 0.90234375, "learning_rate": 0.0010756395742006803, "loss": 0.6443, "step": 7067 }, { "epoch": 0.49170405927162686, "grad_norm": 1.03125, "learning_rate": 0.0010754148848131863, "loss": 0.7924, "step": 7068 }, { "epoch": 0.49177362690876203, "grad_norm": 1.0078125, "learning_rate": 0.0010751901915965045, "loss": 0.7784, "step": 7069 }, { "epoch": 0.49184319454589726, "grad_norm": 1.2890625, "learning_rate": 0.0010749654945620437, "loss": 0.95, "step": 7070 }, { "epoch": 0.4919127621830324, "grad_norm": 1.1015625, "learning_rate": 0.0010747407937212133, "loss": 0.7802, "step": 7071 }, { "epoch": 0.49198232982016765, "grad_norm": 1.109375, "learning_rate": 0.0010745160890854215, "loss": 0.7512, "step": 7072 }, { "epoch": 0.4920518974573029, "grad_norm": 1.0703125, "learning_rate": 0.0010742913806660787, "loss": 0.9372, "step": 7073 }, { "epoch": 0.49212146509443805, "grad_norm": 0.71484375, "learning_rate": 0.001074066668474594, "loss": 0.677, "step": 7074 }, { "epoch": 0.4921910327315733, "grad_norm": 1.2578125, "learning_rate": 0.0010738419525223772, "loss": 0.9019, "step": 7075 }, { "epoch": 0.4922606003687085, "grad_norm": 1.421875, "learning_rate": 0.0010736172328208381, "loss": 0.8827, "step": 7076 }, { "epoch": 0.49233016800584367, "grad_norm": 1.1015625, "learning_rate": 0.0010733925093813872, "loss": 0.7767, "step": 7077 }, { "epoch": 0.4923997356429789, "grad_norm": 1.1796875, "learning_rate": 0.0010731677822154349, "loss": 0.7247, "step": 7078 }, { "epoch": 0.49246930328011407, "grad_norm": 1.0546875, "learning_rate": 0.0010729430513343908, "loss": 0.8941, "step": 7079 }, { "epoch": 0.4925388709172493, "grad_norm": 1.1953125, "learning_rate": 0.0010727183167496663, "loss": 0.899, "step": 7080 }, { "epoch": 0.4926084385543845, "grad_norm": 0.90625, "learning_rate": 0.0010724935784726724, "loss": 0.5359, "step": 7081 }, { "epoch": 0.4926780061915197, "grad_norm": 1.2578125, "learning_rate": 0.0010722688365148198, "loss": 0.9204, "step": 7082 }, { "epoch": 0.4927475738286549, "grad_norm": 1.046875, "learning_rate": 0.0010720440908875202, "loss": 1.1598, "step": 7083 }, { "epoch": 0.4928171414657901, "grad_norm": 1.1640625, "learning_rate": 0.0010718193416021846, "loss": 0.7563, "step": 7084 }, { "epoch": 0.4928867091029253, "grad_norm": 1.1875, "learning_rate": 0.0010715945886702247, "loss": 0.9402, "step": 7085 }, { "epoch": 0.49295627674006054, "grad_norm": 1.0703125, "learning_rate": 0.0010713698321030527, "loss": 0.8206, "step": 7086 }, { "epoch": 0.4930258443771957, "grad_norm": 1.09375, "learning_rate": 0.0010711450719120804, "loss": 0.8456, "step": 7087 }, { "epoch": 0.49309541201433094, "grad_norm": 1.203125, "learning_rate": 0.0010709203081087197, "loss": 0.6072, "step": 7088 }, { "epoch": 0.49316497965146616, "grad_norm": 1.1171875, "learning_rate": 0.0010706955407043834, "loss": 0.8227, "step": 7089 }, { "epoch": 0.49323454728860133, "grad_norm": 1.21875, "learning_rate": 0.001070470769710484, "loss": 0.9223, "step": 7090 }, { "epoch": 0.49330411492573656, "grad_norm": 0.97265625, "learning_rate": 0.0010702459951384337, "loss": 0.8665, "step": 7091 }, { "epoch": 0.49337368256287173, "grad_norm": 1.171875, "learning_rate": 0.0010700212169996461, "loss": 0.7868, "step": 7092 }, { "epoch": 0.49344325020000696, "grad_norm": 1.0703125, "learning_rate": 0.001069796435305534, "loss": 0.9867, "step": 7093 }, { "epoch": 0.4935128178371422, "grad_norm": 1.1171875, "learning_rate": 0.0010695716500675107, "loss": 0.8527, "step": 7094 }, { "epoch": 0.49358238547427735, "grad_norm": 1.0625, "learning_rate": 0.0010693468612969898, "loss": 0.9523, "step": 7095 }, { "epoch": 0.4936519531114126, "grad_norm": 0.984375, "learning_rate": 0.0010691220690053846, "loss": 0.8617, "step": 7096 }, { "epoch": 0.49372152074854775, "grad_norm": 1.1015625, "learning_rate": 0.0010688972732041094, "loss": 0.5449, "step": 7097 }, { "epoch": 0.493791088385683, "grad_norm": 1.203125, "learning_rate": 0.0010686724739045776, "loss": 0.5827, "step": 7098 }, { "epoch": 0.4938606560228182, "grad_norm": 0.96875, "learning_rate": 0.0010684476711182041, "loss": 0.8263, "step": 7099 }, { "epoch": 0.49393022365995337, "grad_norm": 1.265625, "learning_rate": 0.0010682228648564026, "loss": 0.7761, "step": 7100 }, { "epoch": 0.4939997912970886, "grad_norm": 1.1484375, "learning_rate": 0.001067998055130588, "loss": 0.8542, "step": 7101 }, { "epoch": 0.4940693589342238, "grad_norm": 1.0546875, "learning_rate": 0.0010677732419521748, "loss": 0.8381, "step": 7102 }, { "epoch": 0.494138926571359, "grad_norm": 1.015625, "learning_rate": 0.001067548425332578, "loss": 0.7612, "step": 7103 }, { "epoch": 0.4942084942084942, "grad_norm": 1.3671875, "learning_rate": 0.0010673236052832127, "loss": 1.0282, "step": 7104 }, { "epoch": 0.4942780618456294, "grad_norm": 1.125, "learning_rate": 0.0010670987818154941, "loss": 0.9268, "step": 7105 }, { "epoch": 0.4943476294827646, "grad_norm": 0.9921875, "learning_rate": 0.0010668739549408372, "loss": 0.8165, "step": 7106 }, { "epoch": 0.49441719711989984, "grad_norm": 1.1796875, "learning_rate": 0.0010666491246706584, "loss": 0.8969, "step": 7107 }, { "epoch": 0.494486764757035, "grad_norm": 1.1328125, "learning_rate": 0.0010664242910163727, "loss": 0.8662, "step": 7108 }, { "epoch": 0.49455633239417024, "grad_norm": 1.0078125, "learning_rate": 0.0010661994539893965, "loss": 0.8012, "step": 7109 }, { "epoch": 0.4946259000313054, "grad_norm": 1.765625, "learning_rate": 0.0010659746136011457, "loss": 1.2093, "step": 7110 }, { "epoch": 0.49469546766844064, "grad_norm": 1.1171875, "learning_rate": 0.0010657497698630363, "loss": 0.8276, "step": 7111 }, { "epoch": 0.49476503530557586, "grad_norm": 1.2265625, "learning_rate": 0.0010655249227864852, "loss": 0.7684, "step": 7112 }, { "epoch": 0.49483460294271103, "grad_norm": 1.09375, "learning_rate": 0.0010653000723829086, "loss": 0.9557, "step": 7113 }, { "epoch": 0.49490417057984626, "grad_norm": 0.921875, "learning_rate": 0.0010650752186637238, "loss": 0.6632, "step": 7114 }, { "epoch": 0.4949737382169815, "grad_norm": 1.1171875, "learning_rate": 0.001064850361640347, "loss": 0.9204, "step": 7115 }, { "epoch": 0.49504330585411666, "grad_norm": 1.6796875, "learning_rate": 0.0010646255013241962, "loss": 0.8105, "step": 7116 }, { "epoch": 0.4951128734912519, "grad_norm": 1.3359375, "learning_rate": 0.0010644006377266877, "loss": 0.9422, "step": 7117 }, { "epoch": 0.49518244112838705, "grad_norm": 1.4609375, "learning_rate": 0.0010641757708592396, "loss": 0.9256, "step": 7118 }, { "epoch": 0.4952520087655223, "grad_norm": 1.421875, "learning_rate": 0.0010639509007332694, "loss": 0.9179, "step": 7119 }, { "epoch": 0.4953215764026575, "grad_norm": 1.078125, "learning_rate": 0.0010637260273601947, "loss": 0.7678, "step": 7120 }, { "epoch": 0.4953911440397927, "grad_norm": 1.0078125, "learning_rate": 0.0010635011507514336, "loss": 0.824, "step": 7121 }, { "epoch": 0.4954607116769279, "grad_norm": 1.4765625, "learning_rate": 0.001063276270918404, "loss": 0.8793, "step": 7122 }, { "epoch": 0.49553027931406307, "grad_norm": 0.96484375, "learning_rate": 0.0010630513878725244, "loss": 0.8169, "step": 7123 }, { "epoch": 0.4955998469511983, "grad_norm": 1.1640625, "learning_rate": 0.0010628265016252132, "loss": 0.7137, "step": 7124 }, { "epoch": 0.4956694145883335, "grad_norm": 1.46875, "learning_rate": 0.0010626016121878887, "loss": 1.0796, "step": 7125 }, { "epoch": 0.4957389822254687, "grad_norm": 0.98046875, "learning_rate": 0.00106237671957197, "loss": 0.7397, "step": 7126 }, { "epoch": 0.4958085498626039, "grad_norm": 0.84375, "learning_rate": 0.0010621518237888762, "loss": 0.7724, "step": 7127 }, { "epoch": 0.49587811749973915, "grad_norm": 0.94921875, "learning_rate": 0.0010619269248500257, "loss": 0.8617, "step": 7128 }, { "epoch": 0.4959476851368743, "grad_norm": 1.0625, "learning_rate": 0.001061702022766838, "loss": 0.9586, "step": 7129 }, { "epoch": 0.49601725277400954, "grad_norm": 1.1328125, "learning_rate": 0.0010614771175507327, "loss": 0.8047, "step": 7130 }, { "epoch": 0.4960868204111447, "grad_norm": 1.078125, "learning_rate": 0.0010612522092131294, "loss": 0.9814, "step": 7131 }, { "epoch": 0.49615638804827994, "grad_norm": 0.81640625, "learning_rate": 0.0010610272977654475, "loss": 0.7223, "step": 7132 }, { "epoch": 0.49622595568541517, "grad_norm": 1.2734375, "learning_rate": 0.0010608023832191069, "loss": 0.82, "step": 7133 }, { "epoch": 0.49629552332255034, "grad_norm": 1.796875, "learning_rate": 0.0010605774655855279, "loss": 1.0853, "step": 7134 }, { "epoch": 0.49636509095968556, "grad_norm": 1.171875, "learning_rate": 0.0010603525448761304, "loss": 0.7723, "step": 7135 }, { "epoch": 0.49643465859682073, "grad_norm": 1.09375, "learning_rate": 0.001060127621102335, "loss": 0.8135, "step": 7136 }, { "epoch": 0.49650422623395596, "grad_norm": 1.0859375, "learning_rate": 0.001059902694275562, "loss": 0.8008, "step": 7137 }, { "epoch": 0.4965737938710912, "grad_norm": 1.203125, "learning_rate": 0.0010596777644072321, "loss": 0.7991, "step": 7138 }, { "epoch": 0.49664336150822636, "grad_norm": 0.94921875, "learning_rate": 0.0010594528315087664, "loss": 0.7725, "step": 7139 }, { "epoch": 0.4967129291453616, "grad_norm": 1.046875, "learning_rate": 0.0010592278955915853, "loss": 0.9814, "step": 7140 }, { "epoch": 0.4967824967824968, "grad_norm": 1.5546875, "learning_rate": 0.0010590029566671102, "loss": 0.994, "step": 7141 }, { "epoch": 0.496852064419632, "grad_norm": 1.0546875, "learning_rate": 0.0010587780147467624, "loss": 0.8298, "step": 7142 }, { "epoch": 0.4969216320567672, "grad_norm": 1.0546875, "learning_rate": 0.0010585530698419634, "loss": 0.8363, "step": 7143 }, { "epoch": 0.4969911996939024, "grad_norm": 1.140625, "learning_rate": 0.0010583281219641346, "loss": 0.807, "step": 7144 }, { "epoch": 0.4970607673310376, "grad_norm": 1.109375, "learning_rate": 0.0010581031711246977, "loss": 0.9414, "step": 7145 }, { "epoch": 0.4971303349681728, "grad_norm": 1.3828125, "learning_rate": 0.0010578782173350746, "loss": 0.6897, "step": 7146 }, { "epoch": 0.497199902605308, "grad_norm": 1.09375, "learning_rate": 0.0010576532606066873, "loss": 0.9442, "step": 7147 }, { "epoch": 0.4972694702424432, "grad_norm": 1.1015625, "learning_rate": 0.001057428300950958, "loss": 0.8061, "step": 7148 }, { "epoch": 0.4973390378795784, "grad_norm": 1.2578125, "learning_rate": 0.0010572033383793092, "loss": 0.794, "step": 7149 }, { "epoch": 0.4974086055167136, "grad_norm": 1.0, "learning_rate": 0.0010569783729031633, "loss": 0.8179, "step": 7150 }, { "epoch": 0.49747817315384885, "grad_norm": 1.640625, "learning_rate": 0.0010567534045339425, "loss": 0.9908, "step": 7151 }, { "epoch": 0.497547740790984, "grad_norm": 1.0546875, "learning_rate": 0.00105652843328307, "loss": 0.8828, "step": 7152 }, { "epoch": 0.49761730842811924, "grad_norm": 1.75, "learning_rate": 0.0010563034591619686, "loss": 0.8516, "step": 7153 }, { "epoch": 0.49768687606525447, "grad_norm": 1.3984375, "learning_rate": 0.0010560784821820614, "loss": 0.816, "step": 7154 }, { "epoch": 0.49775644370238964, "grad_norm": 1.2578125, "learning_rate": 0.0010558535023547715, "loss": 0.7183, "step": 7155 }, { "epoch": 0.49782601133952487, "grad_norm": 0.99609375, "learning_rate": 0.0010556285196915223, "loss": 0.7265, "step": 7156 }, { "epoch": 0.49789557897666004, "grad_norm": 0.96875, "learning_rate": 0.0010554035342037371, "loss": 0.7026, "step": 7157 }, { "epoch": 0.49796514661379526, "grad_norm": 0.9453125, "learning_rate": 0.0010551785459028398, "loss": 0.7944, "step": 7158 }, { "epoch": 0.4980347142509305, "grad_norm": 0.92578125, "learning_rate": 0.001054953554800254, "loss": 0.7208, "step": 7159 }, { "epoch": 0.49810428188806566, "grad_norm": 1.0234375, "learning_rate": 0.0010547285609074039, "loss": 0.9822, "step": 7160 }, { "epoch": 0.4981738495252009, "grad_norm": 1.1328125, "learning_rate": 0.001054503564235713, "loss": 0.7046, "step": 7161 }, { "epoch": 0.49824341716233606, "grad_norm": 1.21875, "learning_rate": 0.001054278564796606, "loss": 0.8731, "step": 7162 }, { "epoch": 0.4983129847994713, "grad_norm": 1.109375, "learning_rate": 0.0010540535626015072, "loss": 0.7237, "step": 7163 }, { "epoch": 0.4983825524366065, "grad_norm": 0.9609375, "learning_rate": 0.0010538285576618407, "loss": 0.7564, "step": 7164 }, { "epoch": 0.4984521200737417, "grad_norm": 0.75, "learning_rate": 0.0010536035499890315, "loss": 0.5736, "step": 7165 }, { "epoch": 0.4985216877108769, "grad_norm": 1.0078125, "learning_rate": 0.001053378539594504, "loss": 0.864, "step": 7166 }, { "epoch": 0.49859125534801213, "grad_norm": 0.9140625, "learning_rate": 0.0010531535264896837, "loss": 0.8084, "step": 7167 }, { "epoch": 0.4986608229851473, "grad_norm": 0.88671875, "learning_rate": 0.0010529285106859949, "loss": 0.6282, "step": 7168 }, { "epoch": 0.4987303906222825, "grad_norm": 1.15625, "learning_rate": 0.0010527034921948633, "loss": 0.7866, "step": 7169 }, { "epoch": 0.4987999582594177, "grad_norm": 1.0859375, "learning_rate": 0.0010524784710277137, "loss": 0.7971, "step": 7170 }, { "epoch": 0.4988695258965529, "grad_norm": 1.3984375, "learning_rate": 0.0010522534471959723, "loss": 0.8345, "step": 7171 }, { "epoch": 0.49893909353368815, "grad_norm": 1.3125, "learning_rate": 0.001052028420711064, "loss": 0.8096, "step": 7172 }, { "epoch": 0.4990086611708233, "grad_norm": 1.2421875, "learning_rate": 0.0010518033915844147, "loss": 0.7644, "step": 7173 }, { "epoch": 0.49907822880795855, "grad_norm": 1.09375, "learning_rate": 0.0010515783598274502, "loss": 0.9254, "step": 7174 }, { "epoch": 0.4991477964450937, "grad_norm": 1.1171875, "learning_rate": 0.0010513533254515965, "loss": 0.8688, "step": 7175 }, { "epoch": 0.49921736408222894, "grad_norm": 1.1796875, "learning_rate": 0.0010511282884682802, "loss": 0.8079, "step": 7176 }, { "epoch": 0.49928693171936417, "grad_norm": 0.9765625, "learning_rate": 0.001050903248888927, "loss": 0.695, "step": 7177 }, { "epoch": 0.49935649935649934, "grad_norm": 1.2265625, "learning_rate": 0.001050678206724963, "loss": 0.7909, "step": 7178 }, { "epoch": 0.49942606699363457, "grad_norm": 1.2421875, "learning_rate": 0.0010504531619878155, "loss": 0.9214, "step": 7179 }, { "epoch": 0.4994956346307698, "grad_norm": 1.1796875, "learning_rate": 0.0010502281146889108, "loss": 0.9654, "step": 7180 }, { "epoch": 0.49956520226790496, "grad_norm": 1.09375, "learning_rate": 0.0010500030648396752, "loss": 0.8288, "step": 7181 }, { "epoch": 0.4996347699050402, "grad_norm": 1.609375, "learning_rate": 0.0010497780124515362, "loss": 0.875, "step": 7182 }, { "epoch": 0.49970433754217536, "grad_norm": 1.15625, "learning_rate": 0.0010495529575359208, "loss": 0.8785, "step": 7183 }, { "epoch": 0.4997739051793106, "grad_norm": 1.03125, "learning_rate": 0.0010493279001042557, "loss": 0.7318, "step": 7184 }, { "epoch": 0.4998434728164458, "grad_norm": 0.94140625, "learning_rate": 0.0010491028401679687, "loss": 0.5975, "step": 7185 }, { "epoch": 0.499913040453581, "grad_norm": 0.890625, "learning_rate": 0.0010488777777384868, "loss": 0.9336, "step": 7186 }, { "epoch": 0.4999826080907162, "grad_norm": 1.03125, "learning_rate": 0.0010486527128272377, "loss": 0.8408, "step": 7187 }, { "epoch": 0.5000521757278514, "grad_norm": 1.1953125, "learning_rate": 0.0010484276454456492, "loss": 0.938, "step": 7188 }, { "epoch": 0.5001217433649866, "grad_norm": 1.1171875, "learning_rate": 0.001048202575605149, "loss": 0.7174, "step": 7189 }, { "epoch": 0.5001913110021218, "grad_norm": 1.2265625, "learning_rate": 0.0010479775033171647, "loss": 0.8057, "step": 7190 }, { "epoch": 0.5002608786392571, "grad_norm": 1.046875, "learning_rate": 0.0010477524285931246, "loss": 0.7395, "step": 7191 }, { "epoch": 0.5003304462763922, "grad_norm": 1.015625, "learning_rate": 0.0010475273514444574, "loss": 0.8219, "step": 7192 }, { "epoch": 0.5004000139135274, "grad_norm": 0.9921875, "learning_rate": 0.0010473022718825904, "loss": 0.9962, "step": 7193 }, { "epoch": 0.5004695815506627, "grad_norm": 1.171875, "learning_rate": 0.0010470771899189525, "loss": 0.9283, "step": 7194 }, { "epoch": 0.5005391491877978, "grad_norm": 1.375, "learning_rate": 0.0010468521055649722, "loss": 1.09, "step": 7195 }, { "epoch": 0.500608716824933, "grad_norm": 1.015625, "learning_rate": 0.0010466270188320783, "loss": 0.6323, "step": 7196 }, { "epoch": 0.5006782844620682, "grad_norm": 1.1015625, "learning_rate": 0.0010464019297316992, "loss": 0.9214, "step": 7197 }, { "epoch": 0.5007478520992035, "grad_norm": 1.59375, "learning_rate": 0.0010461768382752639, "loss": 1.1907, "step": 7198 }, { "epoch": 0.5008174197363386, "grad_norm": 1.0078125, "learning_rate": 0.001045951744474202, "loss": 0.9154, "step": 7199 }, { "epoch": 0.5008869873734738, "grad_norm": 1.15625, "learning_rate": 0.0010457266483399417, "loss": 0.623, "step": 7200 }, { "epoch": 0.5009565550106091, "grad_norm": 1.2421875, "learning_rate": 0.0010455015498839126, "loss": 0.9534, "step": 7201 }, { "epoch": 0.5010261226477443, "grad_norm": 0.97265625, "learning_rate": 0.0010452764491175443, "loss": 0.7288, "step": 7202 }, { "epoch": 0.5010956902848794, "grad_norm": 0.96484375, "learning_rate": 0.0010450513460522662, "loss": 0.8695, "step": 7203 }, { "epoch": 0.5011652579220147, "grad_norm": 1.3125, "learning_rate": 0.0010448262406995076, "loss": 1.0398, "step": 7204 }, { "epoch": 0.5012348255591499, "grad_norm": 1.1171875, "learning_rate": 0.0010446011330706986, "loss": 0.6585, "step": 7205 }, { "epoch": 0.5013043931962851, "grad_norm": 1.1953125, "learning_rate": 0.001044376023177269, "loss": 0.9344, "step": 7206 }, { "epoch": 0.5013739608334203, "grad_norm": 0.9765625, "learning_rate": 0.0010441509110306483, "loss": 0.8191, "step": 7207 }, { "epoch": 0.5014435284705555, "grad_norm": 1.015625, "learning_rate": 0.0010439257966422674, "loss": 0.8409, "step": 7208 }, { "epoch": 0.5015130961076907, "grad_norm": 1.1953125, "learning_rate": 0.0010437006800235553, "loss": 0.6652, "step": 7209 }, { "epoch": 0.5015826637448259, "grad_norm": 1.265625, "learning_rate": 0.0010434755611859435, "loss": 1.1023, "step": 7210 }, { "epoch": 0.5016522313819611, "grad_norm": 1.1328125, "learning_rate": 0.0010432504401408614, "loss": 1.0829, "step": 7211 }, { "epoch": 0.5017217990190963, "grad_norm": 1.171875, "learning_rate": 0.00104302531689974, "loss": 0.69, "step": 7212 }, { "epoch": 0.5017913666562315, "grad_norm": 0.92578125, "learning_rate": 0.0010428001914740102, "loss": 0.6944, "step": 7213 }, { "epoch": 0.5018609342933668, "grad_norm": 1.03125, "learning_rate": 0.0010425750638751018, "loss": 0.6269, "step": 7214 }, { "epoch": 0.5019305019305019, "grad_norm": 1.15625, "learning_rate": 0.001042349934114447, "loss": 0.8376, "step": 7215 }, { "epoch": 0.5020000695676371, "grad_norm": 0.9921875, "learning_rate": 0.0010421248022034755, "loss": 0.688, "step": 7216 }, { "epoch": 0.5020696372047724, "grad_norm": 1.0546875, "learning_rate": 0.001041899668153619, "loss": 0.9963, "step": 7217 }, { "epoch": 0.5021392048419075, "grad_norm": 1.0703125, "learning_rate": 0.0010416745319763085, "loss": 0.8619, "step": 7218 }, { "epoch": 0.5022087724790427, "grad_norm": 1.03125, "learning_rate": 0.0010414493936829754, "loss": 0.8277, "step": 7219 }, { "epoch": 0.502278340116178, "grad_norm": 1.09375, "learning_rate": 0.001041224253285051, "loss": 0.698, "step": 7220 }, { "epoch": 0.5023479077533132, "grad_norm": 1.0078125, "learning_rate": 0.0010409991107939668, "loss": 0.9984, "step": 7221 }, { "epoch": 0.5024174753904483, "grad_norm": 1.1484375, "learning_rate": 0.0010407739662211546, "loss": 0.9493, "step": 7222 }, { "epoch": 0.5024870430275835, "grad_norm": 1.28125, "learning_rate": 0.0010405488195780455, "loss": 1.0066, "step": 7223 }, { "epoch": 0.5025566106647188, "grad_norm": 1.2109375, "learning_rate": 0.0010403236708760723, "loss": 0.8073, "step": 7224 }, { "epoch": 0.502626178301854, "grad_norm": 1.1328125, "learning_rate": 0.0010400985201266656, "loss": 1.0504, "step": 7225 }, { "epoch": 0.5026957459389891, "grad_norm": 1.3125, "learning_rate": 0.0010398733673412583, "loss": 0.8966, "step": 7226 }, { "epoch": 0.5027653135761244, "grad_norm": 1.25, "learning_rate": 0.001039648212531283, "loss": 0.8191, "step": 7227 }, { "epoch": 0.5028348812132596, "grad_norm": 1.21875, "learning_rate": 0.0010394230557081708, "loss": 0.8162, "step": 7228 }, { "epoch": 0.5029044488503948, "grad_norm": 1.0703125, "learning_rate": 0.0010391978968833549, "loss": 0.8613, "step": 7229 }, { "epoch": 0.50297401648753, "grad_norm": 0.796875, "learning_rate": 0.0010389727360682669, "loss": 0.4959, "step": 7230 }, { "epoch": 0.5030435841246652, "grad_norm": 0.9765625, "learning_rate": 0.0010387475732743401, "loss": 0.8968, "step": 7231 }, { "epoch": 0.5031131517618004, "grad_norm": 1.3046875, "learning_rate": 0.0010385224085130067, "loss": 0.7869, "step": 7232 }, { "epoch": 0.5031827193989357, "grad_norm": 1.265625, "learning_rate": 0.0010382972417956997, "loss": 0.9447, "step": 7233 }, { "epoch": 0.5032522870360708, "grad_norm": 0.9765625, "learning_rate": 0.0010380720731338517, "loss": 0.6995, "step": 7234 }, { "epoch": 0.503321854673206, "grad_norm": 1.03125, "learning_rate": 0.0010378469025388954, "loss": 1.0611, "step": 7235 }, { "epoch": 0.5033914223103412, "grad_norm": 1.1328125, "learning_rate": 0.0010376217300222647, "loss": 0.8676, "step": 7236 }, { "epoch": 0.5034609899474765, "grad_norm": 1.1953125, "learning_rate": 0.0010373965555953919, "loss": 0.751, "step": 7237 }, { "epoch": 0.5035305575846116, "grad_norm": 0.90234375, "learning_rate": 0.0010371713792697108, "loss": 0.7965, "step": 7238 }, { "epoch": 0.5036001252217468, "grad_norm": 1.1015625, "learning_rate": 0.001036946201056654, "loss": 0.8837, "step": 7239 }, { "epoch": 0.5036696928588821, "grad_norm": 1.125, "learning_rate": 0.0010367210209676556, "loss": 0.9095, "step": 7240 }, { "epoch": 0.5037392604960172, "grad_norm": 2.171875, "learning_rate": 0.0010364958390141489, "loss": 0.6597, "step": 7241 }, { "epoch": 0.5038088281331524, "grad_norm": 1.1328125, "learning_rate": 0.0010362706552075672, "loss": 0.645, "step": 7242 }, { "epoch": 0.5038783957702877, "grad_norm": 0.97265625, "learning_rate": 0.0010360454695593447, "loss": 0.9205, "step": 7243 }, { "epoch": 0.5039479634074229, "grad_norm": 1.2109375, "learning_rate": 0.0010358202820809146, "loss": 1.0243, "step": 7244 }, { "epoch": 0.504017531044558, "grad_norm": 1.0546875, "learning_rate": 0.0010355950927837115, "loss": 0.6335, "step": 7245 }, { "epoch": 0.5040870986816933, "grad_norm": 1.1875, "learning_rate": 0.0010353699016791684, "loss": 0.7884, "step": 7246 }, { "epoch": 0.5041566663188285, "grad_norm": 1.1328125, "learning_rate": 0.0010351447087787206, "loss": 0.9097, "step": 7247 }, { "epoch": 0.5042262339559637, "grad_norm": 1.171875, "learning_rate": 0.0010349195140938016, "loss": 0.7768, "step": 7248 }, { "epoch": 0.5042958015930988, "grad_norm": 1.3046875, "learning_rate": 0.0010346943176358452, "loss": 1.0664, "step": 7249 }, { "epoch": 0.5043653692302341, "grad_norm": 1.421875, "learning_rate": 0.0010344691194162866, "loss": 0.9543, "step": 7250 }, { "epoch": 0.5044349368673693, "grad_norm": 1.1171875, "learning_rate": 0.00103424391944656, "loss": 0.9053, "step": 7251 }, { "epoch": 0.5045045045045045, "grad_norm": 1.140625, "learning_rate": 0.0010340187177380995, "loss": 0.8547, "step": 7252 }, { "epoch": 0.5045740721416397, "grad_norm": 1.015625, "learning_rate": 0.0010337935143023397, "loss": 0.558, "step": 7253 }, { "epoch": 0.5046436397787749, "grad_norm": 1.1484375, "learning_rate": 0.0010335683091507162, "loss": 0.7452, "step": 7254 }, { "epoch": 0.5047132074159101, "grad_norm": 0.859375, "learning_rate": 0.001033343102294663, "loss": 0.5924, "step": 7255 }, { "epoch": 0.5047827750530454, "grad_norm": 1.171875, "learning_rate": 0.0010331178937456147, "loss": 0.9584, "step": 7256 }, { "epoch": 0.5048523426901805, "grad_norm": 1.0234375, "learning_rate": 0.0010328926835150073, "loss": 0.9278, "step": 7257 }, { "epoch": 0.5049219103273157, "grad_norm": 1.15625, "learning_rate": 0.0010326674716142745, "loss": 0.9773, "step": 7258 }, { "epoch": 0.504991477964451, "grad_norm": 1.1953125, "learning_rate": 0.0010324422580548528, "loss": 1.0611, "step": 7259 }, { "epoch": 0.5050610456015862, "grad_norm": 1.1171875, "learning_rate": 0.0010322170428481764, "loss": 0.7346, "step": 7260 }, { "epoch": 0.5051306132387213, "grad_norm": 1.375, "learning_rate": 0.0010319918260056813, "loss": 0.993, "step": 7261 }, { "epoch": 0.5052001808758565, "grad_norm": 1.1015625, "learning_rate": 0.001031766607538802, "loss": 0.9258, "step": 7262 }, { "epoch": 0.5052697485129918, "grad_norm": 1.125, "learning_rate": 0.0010315413874589748, "loss": 0.6496, "step": 7263 }, { "epoch": 0.505339316150127, "grad_norm": 1.0078125, "learning_rate": 0.0010313161657776351, "loss": 0.669, "step": 7264 }, { "epoch": 0.5054088837872621, "grad_norm": 1.2734375, "learning_rate": 0.0010310909425062177, "loss": 0.9657, "step": 7265 }, { "epoch": 0.5054784514243974, "grad_norm": 1.53125, "learning_rate": 0.0010308657176561597, "loss": 1.0466, "step": 7266 }, { "epoch": 0.5055480190615326, "grad_norm": 0.97265625, "learning_rate": 0.0010306404912388957, "loss": 0.9254, "step": 7267 }, { "epoch": 0.5056175866986677, "grad_norm": 0.8125, "learning_rate": 0.0010304152632658623, "loss": 0.5554, "step": 7268 }, { "epoch": 0.505687154335803, "grad_norm": 1.09375, "learning_rate": 0.0010301900337484947, "loss": 1.0306, "step": 7269 }, { "epoch": 0.5057567219729382, "grad_norm": 1.3359375, "learning_rate": 0.0010299648026982297, "loss": 0.9134, "step": 7270 }, { "epoch": 0.5058262896100734, "grad_norm": 1.015625, "learning_rate": 0.001029739570126503, "loss": 1.012, "step": 7271 }, { "epoch": 0.5058958572472086, "grad_norm": 1.2265625, "learning_rate": 0.0010295143360447507, "loss": 0.717, "step": 7272 }, { "epoch": 0.5059654248843438, "grad_norm": 1.25, "learning_rate": 0.0010292891004644094, "loss": 0.9266, "step": 7273 }, { "epoch": 0.506034992521479, "grad_norm": 0.92578125, "learning_rate": 0.001029063863396915, "loss": 0.4913, "step": 7274 }, { "epoch": 0.5061045601586142, "grad_norm": 0.796875, "learning_rate": 0.0010288386248537042, "loss": 0.5788, "step": 7275 }, { "epoch": 0.5061741277957494, "grad_norm": 1.15625, "learning_rate": 0.0010286133848462131, "loss": 0.8754, "step": 7276 }, { "epoch": 0.5062436954328846, "grad_norm": 0.9921875, "learning_rate": 0.0010283881433858792, "loss": 0.7639, "step": 7277 }, { "epoch": 0.5063132630700198, "grad_norm": 1.1015625, "learning_rate": 0.0010281629004841378, "loss": 0.7923, "step": 7278 }, { "epoch": 0.5063828307071551, "grad_norm": 1.0390625, "learning_rate": 0.0010279376561524265, "loss": 0.8669, "step": 7279 }, { "epoch": 0.5064523983442902, "grad_norm": 1.1953125, "learning_rate": 0.0010277124104021821, "loss": 0.8788, "step": 7280 }, { "epoch": 0.5065219659814254, "grad_norm": 1.0, "learning_rate": 0.0010274871632448407, "loss": 0.7445, "step": 7281 }, { "epoch": 0.5065915336185607, "grad_norm": 1.09375, "learning_rate": 0.0010272619146918403, "loss": 0.9489, "step": 7282 }, { "epoch": 0.5066611012556959, "grad_norm": 1.1875, "learning_rate": 0.0010270366647546166, "loss": 0.8435, "step": 7283 }, { "epoch": 0.506730668892831, "grad_norm": 1.4921875, "learning_rate": 0.001026811413444608, "loss": 1.0238, "step": 7284 }, { "epoch": 0.5068002365299663, "grad_norm": 1.1171875, "learning_rate": 0.0010265861607732503, "loss": 0.7611, "step": 7285 }, { "epoch": 0.5068698041671015, "grad_norm": 1.171875, "learning_rate": 0.0010263609067519817, "loss": 0.8724, "step": 7286 }, { "epoch": 0.5069393718042366, "grad_norm": 1.0234375, "learning_rate": 0.0010261356513922393, "loss": 0.9458, "step": 7287 }, { "epoch": 0.5070089394413718, "grad_norm": 0.984375, "learning_rate": 0.00102591039470546, "loss": 0.7484, "step": 7288 }, { "epoch": 0.5070785070785071, "grad_norm": 1.6015625, "learning_rate": 0.0010256851367030817, "loss": 1.2545, "step": 7289 }, { "epoch": 0.5071480747156423, "grad_norm": 1.265625, "learning_rate": 0.001025459877396541, "loss": 0.9903, "step": 7290 }, { "epoch": 0.5072176423527774, "grad_norm": 1.078125, "learning_rate": 0.001025234616797277, "loss": 0.8205, "step": 7291 }, { "epoch": 0.5072872099899127, "grad_norm": 1.0234375, "learning_rate": 0.0010250093549167257, "loss": 0.8465, "step": 7292 }, { "epoch": 0.5073567776270479, "grad_norm": 1.0859375, "learning_rate": 0.0010247840917663254, "loss": 0.8993, "step": 7293 }, { "epoch": 0.5074263452641831, "grad_norm": 1.25, "learning_rate": 0.0010245588273575142, "loss": 0.9857, "step": 7294 }, { "epoch": 0.5074959129013183, "grad_norm": 0.9765625, "learning_rate": 0.001024333561701729, "loss": 0.6849, "step": 7295 }, { "epoch": 0.5075654805384535, "grad_norm": 1.3984375, "learning_rate": 0.001024108294810409, "loss": 0.9671, "step": 7296 }, { "epoch": 0.5076350481755887, "grad_norm": 1.109375, "learning_rate": 0.0010238830266949906, "loss": 0.9284, "step": 7297 }, { "epoch": 0.507704615812724, "grad_norm": 0.83203125, "learning_rate": 0.0010236577573669128, "loss": 0.6884, "step": 7298 }, { "epoch": 0.5077741834498591, "grad_norm": 1.328125, "learning_rate": 0.001023432486837613, "loss": 0.7001, "step": 7299 }, { "epoch": 0.5078437510869943, "grad_norm": 0.99609375, "learning_rate": 0.00102320721511853, "loss": 0.9619, "step": 7300 }, { "epoch": 0.5079133187241295, "grad_norm": 1.3515625, "learning_rate": 0.0010229819422211016, "loss": 0.9597, "step": 7301 }, { "epoch": 0.5079828863612648, "grad_norm": 1.6796875, "learning_rate": 0.0010227566681567657, "loss": 1.0313, "step": 7302 }, { "epoch": 0.5080524539983999, "grad_norm": 0.96875, "learning_rate": 0.0010225313929369613, "loss": 0.7011, "step": 7303 }, { "epoch": 0.5081220216355351, "grad_norm": 1.3046875, "learning_rate": 0.0010223061165731257, "loss": 0.8618, "step": 7304 }, { "epoch": 0.5081915892726704, "grad_norm": 1.0390625, "learning_rate": 0.0010220808390766986, "loss": 0.7311, "step": 7305 }, { "epoch": 0.5082611569098056, "grad_norm": 1.171875, "learning_rate": 0.0010218555604591174, "loss": 0.6457, "step": 7306 }, { "epoch": 0.5083307245469407, "grad_norm": 1.109375, "learning_rate": 0.0010216302807318214, "loss": 1.0473, "step": 7307 }, { "epoch": 0.508400292184076, "grad_norm": 1.1015625, "learning_rate": 0.0010214049999062481, "loss": 0.7673, "step": 7308 }, { "epoch": 0.5084698598212112, "grad_norm": 1.0, "learning_rate": 0.0010211797179938374, "loss": 0.6966, "step": 7309 }, { "epoch": 0.5085394274583463, "grad_norm": 0.83203125, "learning_rate": 0.0010209544350060272, "loss": 0.6101, "step": 7310 }, { "epoch": 0.5086089950954816, "grad_norm": 1.03125, "learning_rate": 0.0010207291509542562, "loss": 0.6792, "step": 7311 }, { "epoch": 0.5086785627326168, "grad_norm": 1.0078125, "learning_rate": 0.0010205038658499636, "loss": 0.6202, "step": 7312 }, { "epoch": 0.508748130369752, "grad_norm": 1.0703125, "learning_rate": 0.0010202785797045878, "loss": 0.5167, "step": 7313 }, { "epoch": 0.5088176980068871, "grad_norm": 1.1328125, "learning_rate": 0.0010200532925295684, "loss": 0.8961, "step": 7314 }, { "epoch": 0.5088872656440224, "grad_norm": 1.421875, "learning_rate": 0.0010198280043363435, "loss": 0.8841, "step": 7315 }, { "epoch": 0.5089568332811576, "grad_norm": 0.9140625, "learning_rate": 0.0010196027151363526, "loss": 0.8483, "step": 7316 }, { "epoch": 0.5090264009182928, "grad_norm": 1.0546875, "learning_rate": 0.0010193774249410345, "loss": 0.6354, "step": 7317 }, { "epoch": 0.509095968555428, "grad_norm": 1.1796875, "learning_rate": 0.0010191521337618286, "loss": 0.8367, "step": 7318 }, { "epoch": 0.5091655361925632, "grad_norm": 1.0703125, "learning_rate": 0.001018926841610174, "loss": 0.6624, "step": 7319 }, { "epoch": 0.5092351038296984, "grad_norm": 1.3359375, "learning_rate": 0.0010187015484975095, "loss": 0.7719, "step": 7320 }, { "epoch": 0.5093046714668337, "grad_norm": 0.97265625, "learning_rate": 0.001018476254435275, "loss": 0.6867, "step": 7321 }, { "epoch": 0.5093742391039688, "grad_norm": 1.265625, "learning_rate": 0.001018250959434909, "loss": 1.0151, "step": 7322 }, { "epoch": 0.509443806741104, "grad_norm": 1.0078125, "learning_rate": 0.0010180256635078514, "loss": 0.6282, "step": 7323 }, { "epoch": 0.5095133743782393, "grad_norm": 1.3828125, "learning_rate": 0.0010178003666655416, "loss": 0.895, "step": 7324 }, { "epoch": 0.5095829420153745, "grad_norm": 1.1171875, "learning_rate": 0.0010175750689194187, "loss": 0.7366, "step": 7325 }, { "epoch": 0.5096525096525096, "grad_norm": 1.2265625, "learning_rate": 0.0010173497702809225, "loss": 0.8923, "step": 7326 }, { "epoch": 0.5097220772896448, "grad_norm": 1.1953125, "learning_rate": 0.0010171244707614924, "loss": 0.9015, "step": 7327 }, { "epoch": 0.5097916449267801, "grad_norm": 1.03125, "learning_rate": 0.0010168991703725682, "loss": 0.8422, "step": 7328 }, { "epoch": 0.5098612125639153, "grad_norm": 1.3828125, "learning_rate": 0.001016673869125589, "loss": 0.8664, "step": 7329 }, { "epoch": 0.5099307802010504, "grad_norm": 1.2421875, "learning_rate": 0.0010164485670319948, "loss": 0.854, "step": 7330 }, { "epoch": 0.5100003478381857, "grad_norm": 1.1328125, "learning_rate": 0.0010162232641032253, "loss": 0.6463, "step": 7331 }, { "epoch": 0.5100699154753209, "grad_norm": 1.375, "learning_rate": 0.0010159979603507204, "loss": 0.9051, "step": 7332 }, { "epoch": 0.510139483112456, "grad_norm": 0.91015625, "learning_rate": 0.0010157726557859196, "loss": 0.7682, "step": 7333 }, { "epoch": 0.5102090507495913, "grad_norm": 1.109375, "learning_rate": 0.0010155473504202626, "loss": 0.7717, "step": 7334 }, { "epoch": 0.5102786183867265, "grad_norm": 1.2109375, "learning_rate": 0.00101532204426519, "loss": 0.63, "step": 7335 }, { "epoch": 0.5103481860238617, "grad_norm": 1.375, "learning_rate": 0.0010150967373321407, "loss": 0.8076, "step": 7336 }, { "epoch": 0.510417753660997, "grad_norm": 0.87890625, "learning_rate": 0.0010148714296325553, "loss": 0.7188, "step": 7337 }, { "epoch": 0.5104873212981321, "grad_norm": 1.2109375, "learning_rate": 0.0010146461211778738, "loss": 0.9532, "step": 7338 }, { "epoch": 0.5105568889352673, "grad_norm": 1.3515625, "learning_rate": 0.0010144208119795362, "loss": 0.9941, "step": 7339 }, { "epoch": 0.5106264565724025, "grad_norm": 0.8515625, "learning_rate": 0.0010141955020489823, "loss": 0.6594, "step": 7340 }, { "epoch": 0.5106960242095377, "grad_norm": 1.125, "learning_rate": 0.0010139701913976524, "loss": 0.893, "step": 7341 }, { "epoch": 0.5107655918466729, "grad_norm": 1.578125, "learning_rate": 0.0010137448800369869, "loss": 0.6618, "step": 7342 }, { "epoch": 0.5108351594838081, "grad_norm": 1.0390625, "learning_rate": 0.001013519567978425, "loss": 0.6087, "step": 7343 }, { "epoch": 0.5109047271209434, "grad_norm": 1.4921875, "learning_rate": 0.0010132942552334078, "loss": 0.9438, "step": 7344 }, { "epoch": 0.5109742947580785, "grad_norm": 1.2265625, "learning_rate": 0.0010130689418133755, "loss": 0.6784, "step": 7345 }, { "epoch": 0.5110438623952137, "grad_norm": 0.9453125, "learning_rate": 0.0010128436277297684, "loss": 0.6899, "step": 7346 }, { "epoch": 0.511113430032349, "grad_norm": 1.296875, "learning_rate": 0.0010126183129940264, "loss": 1.0467, "step": 7347 }, { "epoch": 0.5111829976694842, "grad_norm": 1.1328125, "learning_rate": 0.0010123929976175899, "loss": 0.8388, "step": 7348 }, { "epoch": 0.5112525653066193, "grad_norm": 0.95703125, "learning_rate": 0.0010121676816118997, "loss": 0.7295, "step": 7349 }, { "epoch": 0.5113221329437546, "grad_norm": 0.89453125, "learning_rate": 0.0010119423649883957, "loss": 0.6243, "step": 7350 }, { "epoch": 0.5113917005808898, "grad_norm": 1.2109375, "learning_rate": 0.001011717047758519, "loss": 0.7465, "step": 7351 }, { "epoch": 0.511461268218025, "grad_norm": 0.75390625, "learning_rate": 0.0010114917299337092, "loss": 0.7375, "step": 7352 }, { "epoch": 0.5115308358551601, "grad_norm": 1.296875, "learning_rate": 0.0010112664115254075, "loss": 0.7938, "step": 7353 }, { "epoch": 0.5116004034922954, "grad_norm": 1.25, "learning_rate": 0.0010110410925450542, "loss": 0.8754, "step": 7354 }, { "epoch": 0.5116699711294306, "grad_norm": 0.98828125, "learning_rate": 0.00101081577300409, "loss": 1.0421, "step": 7355 }, { "epoch": 0.5117395387665657, "grad_norm": 1.265625, "learning_rate": 0.001010590452913955, "loss": 0.9681, "step": 7356 }, { "epoch": 0.511809106403701, "grad_norm": 1.1640625, "learning_rate": 0.0010103651322860905, "loss": 0.8657, "step": 7357 }, { "epoch": 0.5118786740408362, "grad_norm": 1.46875, "learning_rate": 0.0010101398111319372, "loss": 0.9783, "step": 7358 }, { "epoch": 0.5119482416779714, "grad_norm": 1.2421875, "learning_rate": 0.0010099144894629346, "loss": 0.6622, "step": 7359 }, { "epoch": 0.5120178093151067, "grad_norm": 1.296875, "learning_rate": 0.0010096891672905246, "loss": 0.9833, "step": 7360 }, { "epoch": 0.5120873769522418, "grad_norm": 1.484375, "learning_rate": 0.0010094638446261474, "loss": 0.8881, "step": 7361 }, { "epoch": 0.512156944589377, "grad_norm": 1.125, "learning_rate": 0.0010092385214812438, "loss": 0.5359, "step": 7362 }, { "epoch": 0.5122265122265123, "grad_norm": 1.4140625, "learning_rate": 0.001009013197867255, "loss": 0.9023, "step": 7363 }, { "epoch": 0.5122960798636474, "grad_norm": 0.91796875, "learning_rate": 0.001008787873795621, "loss": 0.4285, "step": 7364 }, { "epoch": 0.5123656475007826, "grad_norm": 1.015625, "learning_rate": 0.0010085625492777834, "loss": 0.8085, "step": 7365 }, { "epoch": 0.5124352151379178, "grad_norm": 1.2890625, "learning_rate": 0.0010083372243251828, "loss": 1.0129, "step": 7366 }, { "epoch": 0.5125047827750531, "grad_norm": 1.0625, "learning_rate": 0.0010081118989492598, "loss": 0.6876, "step": 7367 }, { "epoch": 0.5125743504121882, "grad_norm": 1.4140625, "learning_rate": 0.0010078865731614553, "loss": 1.2099, "step": 7368 }, { "epoch": 0.5126439180493234, "grad_norm": 1.25, "learning_rate": 0.0010076612469732105, "loss": 1.0681, "step": 7369 }, { "epoch": 0.5127134856864587, "grad_norm": 1.4609375, "learning_rate": 0.0010074359203959661, "loss": 0.7904, "step": 7370 }, { "epoch": 0.5127830533235939, "grad_norm": 1.5703125, "learning_rate": 0.0010072105934411633, "loss": 0.9934, "step": 7371 }, { "epoch": 0.512852620960729, "grad_norm": 0.95703125, "learning_rate": 0.0010069852661202428, "loss": 0.8781, "step": 7372 }, { "epoch": 0.5129221885978643, "grad_norm": 1.1015625, "learning_rate": 0.0010067599384446456, "loss": 0.884, "step": 7373 }, { "epoch": 0.5129917562349995, "grad_norm": 1.046875, "learning_rate": 0.001006534610425813, "loss": 0.6635, "step": 7374 }, { "epoch": 0.5130613238721347, "grad_norm": 1.0625, "learning_rate": 0.0010063092820751858, "loss": 0.9075, "step": 7375 }, { "epoch": 0.5131308915092699, "grad_norm": 1.15625, "learning_rate": 0.001006083953404205, "loss": 0.6741, "step": 7376 }, { "epoch": 0.5132004591464051, "grad_norm": 0.96484375, "learning_rate": 0.0010058586244243118, "loss": 0.6751, "step": 7377 }, { "epoch": 0.5132700267835403, "grad_norm": 0.7890625, "learning_rate": 0.0010056332951469472, "loss": 0.6612, "step": 7378 }, { "epoch": 0.5133395944206754, "grad_norm": 1.0, "learning_rate": 0.001005407965583552, "loss": 0.9126, "step": 7379 }, { "epoch": 0.5134091620578107, "grad_norm": 0.953125, "learning_rate": 0.0010051826357455678, "loss": 0.676, "step": 7380 }, { "epoch": 0.5134787296949459, "grad_norm": 1.25, "learning_rate": 0.0010049573056444354, "loss": 0.6661, "step": 7381 }, { "epoch": 0.5135482973320811, "grad_norm": 1.3671875, "learning_rate": 0.001004731975291596, "loss": 0.7886, "step": 7382 }, { "epoch": 0.5136178649692164, "grad_norm": 1.265625, "learning_rate": 0.0010045066446984908, "loss": 0.8526, "step": 7383 }, { "epoch": 0.5136874326063515, "grad_norm": 1.46875, "learning_rate": 0.0010042813138765607, "loss": 1.2588, "step": 7384 }, { "epoch": 0.5137570002434867, "grad_norm": 1.4296875, "learning_rate": 0.0010040559828372474, "loss": 0.7593, "step": 7385 }, { "epoch": 0.513826567880622, "grad_norm": 0.92578125, "learning_rate": 0.0010038306515919916, "loss": 0.6358, "step": 7386 }, { "epoch": 0.5138961355177571, "grad_norm": 1.375, "learning_rate": 0.0010036053201522347, "loss": 0.7824, "step": 7387 }, { "epoch": 0.5139657031548923, "grad_norm": 1.2109375, "learning_rate": 0.0010033799885294174, "loss": 1.0388, "step": 7388 }, { "epoch": 0.5140352707920276, "grad_norm": 1.03125, "learning_rate": 0.0010031546567349815, "loss": 0.8488, "step": 7389 }, { "epoch": 0.5141048384291628, "grad_norm": 0.97265625, "learning_rate": 0.0010029293247803685, "loss": 0.6219, "step": 7390 }, { "epoch": 0.5141744060662979, "grad_norm": 1.0625, "learning_rate": 0.0010027039926770187, "loss": 0.5945, "step": 7391 }, { "epoch": 0.5142439737034331, "grad_norm": 1.015625, "learning_rate": 0.0010024786604363738, "loss": 0.8943, "step": 7392 }, { "epoch": 0.5143135413405684, "grad_norm": 1.3359375, "learning_rate": 0.0010022533280698751, "loss": 0.8981, "step": 7393 }, { "epoch": 0.5143831089777036, "grad_norm": 1.3203125, "learning_rate": 0.0010020279955889637, "loss": 0.7547, "step": 7394 }, { "epoch": 0.5144526766148387, "grad_norm": 1.125, "learning_rate": 0.001001802663005081, "loss": 0.6436, "step": 7395 }, { "epoch": 0.514522244251974, "grad_norm": 0.9921875, "learning_rate": 0.0010015773303296682, "loss": 0.6235, "step": 7396 }, { "epoch": 0.5145918118891092, "grad_norm": 0.9375, "learning_rate": 0.0010013519975741662, "loss": 0.7985, "step": 7397 }, { "epoch": 0.5146613795262444, "grad_norm": 1.2578125, "learning_rate": 0.001001126664750017, "loss": 0.9846, "step": 7398 }, { "epoch": 0.5147309471633796, "grad_norm": 1.0390625, "learning_rate": 0.0010009013318686612, "loss": 0.8467, "step": 7399 }, { "epoch": 0.5148005148005148, "grad_norm": 1.2265625, "learning_rate": 0.0010006759989415403, "loss": 0.8558, "step": 7400 }, { "epoch": 0.51487008243765, "grad_norm": 1.015625, "learning_rate": 0.0010004506659800959, "loss": 0.7214, "step": 7401 }, { "epoch": 0.5149396500747853, "grad_norm": 1.2578125, "learning_rate": 0.0010002253329957685, "loss": 1.0984, "step": 7402 }, { "epoch": 0.5150092177119204, "grad_norm": 1.203125, "learning_rate": 0.001, "loss": 0.95, "step": 7403 }, { "epoch": 0.5150787853490556, "grad_norm": 1.1875, "learning_rate": 0.0009997746670042315, "loss": 0.7794, "step": 7404 }, { "epoch": 0.5151483529861908, "grad_norm": 0.9921875, "learning_rate": 0.0009995493340199042, "loss": 0.8167, "step": 7405 }, { "epoch": 0.515217920623326, "grad_norm": 1.109375, "learning_rate": 0.0009993240010584597, "loss": 0.7491, "step": 7406 }, { "epoch": 0.5152874882604612, "grad_norm": 1.1328125, "learning_rate": 0.0009990986681313388, "loss": 1.0262, "step": 7407 }, { "epoch": 0.5153570558975964, "grad_norm": 0.9453125, "learning_rate": 0.0009988733352499833, "loss": 0.6585, "step": 7408 }, { "epoch": 0.5154266235347317, "grad_norm": 1.296875, "learning_rate": 0.0009986480024258338, "loss": 0.925, "step": 7409 }, { "epoch": 0.5154961911718668, "grad_norm": 1.2421875, "learning_rate": 0.000998422669670332, "loss": 0.8088, "step": 7410 }, { "epoch": 0.515565758809002, "grad_norm": 0.953125, "learning_rate": 0.000998197336994919, "loss": 0.565, "step": 7411 }, { "epoch": 0.5156353264461373, "grad_norm": 1.4296875, "learning_rate": 0.0009979720044110362, "loss": 0.9264, "step": 7412 }, { "epoch": 0.5157048940832725, "grad_norm": 1.0390625, "learning_rate": 0.0009977466719301251, "loss": 0.8953, "step": 7413 }, { "epoch": 0.5157744617204076, "grad_norm": 1.0390625, "learning_rate": 0.0009975213395636263, "loss": 0.769, "step": 7414 }, { "epoch": 0.5158440293575429, "grad_norm": 1.140625, "learning_rate": 0.0009972960073229818, "loss": 0.8064, "step": 7415 }, { "epoch": 0.5159135969946781, "grad_norm": 1.171875, "learning_rate": 0.0009970706752196316, "loss": 0.9126, "step": 7416 }, { "epoch": 0.5159831646318133, "grad_norm": 1.265625, "learning_rate": 0.0009968453432650185, "loss": 0.9174, "step": 7417 }, { "epoch": 0.5160527322689484, "grad_norm": 1.25, "learning_rate": 0.0009966200114705827, "loss": 0.8217, "step": 7418 }, { "epoch": 0.5161222999060837, "grad_norm": 1.046875, "learning_rate": 0.0009963946798477654, "loss": 0.7123, "step": 7419 }, { "epoch": 0.5161918675432189, "grad_norm": 1.21875, "learning_rate": 0.0009961693484080087, "loss": 1.0308, "step": 7420 }, { "epoch": 0.5162614351803541, "grad_norm": 0.9609375, "learning_rate": 0.000995944017162753, "loss": 0.8344, "step": 7421 }, { "epoch": 0.5163310028174893, "grad_norm": 1.0625, "learning_rate": 0.0009957186861234396, "loss": 0.6491, "step": 7422 }, { "epoch": 0.5164005704546245, "grad_norm": 1.203125, "learning_rate": 0.0009954933553015092, "loss": 0.8135, "step": 7423 }, { "epoch": 0.5164701380917597, "grad_norm": 1.2578125, "learning_rate": 0.0009952680247084043, "loss": 0.8808, "step": 7424 }, { "epoch": 0.516539705728895, "grad_norm": 1.0078125, "learning_rate": 0.0009950426943555648, "loss": 0.6941, "step": 7425 }, { "epoch": 0.5166092733660301, "grad_norm": 1.2421875, "learning_rate": 0.0009948173642544322, "loss": 0.7446, "step": 7426 }, { "epoch": 0.5166788410031653, "grad_norm": 1.1015625, "learning_rate": 0.000994592034416448, "loss": 0.6119, "step": 7427 }, { "epoch": 0.5167484086403006, "grad_norm": 1.140625, "learning_rate": 0.000994366704853053, "loss": 0.7891, "step": 7428 }, { "epoch": 0.5168179762774358, "grad_norm": 0.921875, "learning_rate": 0.0009941413755756886, "loss": 0.6071, "step": 7429 }, { "epoch": 0.5168875439145709, "grad_norm": 1.09375, "learning_rate": 0.000993916046595795, "loss": 0.751, "step": 7430 }, { "epoch": 0.5169571115517061, "grad_norm": 0.8515625, "learning_rate": 0.0009936907179248144, "loss": 0.7383, "step": 7431 }, { "epoch": 0.5170266791888414, "grad_norm": 1.2109375, "learning_rate": 0.0009934653895741872, "loss": 0.7809, "step": 7432 }, { "epoch": 0.5170962468259765, "grad_norm": 0.96875, "learning_rate": 0.0009932400615553542, "loss": 0.9368, "step": 7433 }, { "epoch": 0.5171658144631117, "grad_norm": 0.80859375, "learning_rate": 0.0009930147338797573, "loss": 0.6948, "step": 7434 }, { "epoch": 0.517235382100247, "grad_norm": 1.0234375, "learning_rate": 0.000992789406558837, "loss": 0.8302, "step": 7435 }, { "epoch": 0.5173049497373822, "grad_norm": 1.15625, "learning_rate": 0.0009925640796040341, "loss": 0.8594, "step": 7436 }, { "epoch": 0.5173745173745173, "grad_norm": 1.1640625, "learning_rate": 0.0009923387530267895, "loss": 0.8291, "step": 7437 }, { "epoch": 0.5174440850116526, "grad_norm": 1.3515625, "learning_rate": 0.000992113426838545, "loss": 0.9406, "step": 7438 }, { "epoch": 0.5175136526487878, "grad_norm": 0.9609375, "learning_rate": 0.0009918881010507405, "loss": 0.8503, "step": 7439 }, { "epoch": 0.517583220285923, "grad_norm": 0.9296875, "learning_rate": 0.0009916627756748173, "loss": 0.7722, "step": 7440 }, { "epoch": 0.5176527879230582, "grad_norm": 1.1328125, "learning_rate": 0.0009914374507222167, "loss": 1.0453, "step": 7441 }, { "epoch": 0.5177223555601934, "grad_norm": 1.0078125, "learning_rate": 0.000991212126204379, "loss": 0.8305, "step": 7442 }, { "epoch": 0.5177919231973286, "grad_norm": 1.0234375, "learning_rate": 0.0009909868021327451, "loss": 0.9021, "step": 7443 }, { "epoch": 0.5178614908344638, "grad_norm": 1.0625, "learning_rate": 0.000990761478518756, "loss": 0.8837, "step": 7444 }, { "epoch": 0.517931058471599, "grad_norm": 1.1875, "learning_rate": 0.0009905361553738529, "loss": 0.6102, "step": 7445 }, { "epoch": 0.5180006261087342, "grad_norm": 1.1875, "learning_rate": 0.0009903108327094757, "loss": 0.8095, "step": 7446 }, { "epoch": 0.5180701937458694, "grad_norm": 1.09375, "learning_rate": 0.0009900855105370657, "loss": 0.8938, "step": 7447 }, { "epoch": 0.5181397613830047, "grad_norm": 1.1328125, "learning_rate": 0.000989860188868063, "loss": 0.9159, "step": 7448 }, { "epoch": 0.5182093290201398, "grad_norm": 1.140625, "learning_rate": 0.0009896348677139095, "loss": 0.6995, "step": 7449 }, { "epoch": 0.518278896657275, "grad_norm": 0.9453125, "learning_rate": 0.000989409547086045, "loss": 0.779, "step": 7450 }, { "epoch": 0.5183484642944103, "grad_norm": 1.21875, "learning_rate": 0.00098918422699591, "loss": 0.9001, "step": 7451 }, { "epoch": 0.5184180319315455, "grad_norm": 1.0703125, "learning_rate": 0.0009889589074549459, "loss": 1.0165, "step": 7452 }, { "epoch": 0.5184875995686806, "grad_norm": 1.34375, "learning_rate": 0.0009887335884745925, "loss": 0.8979, "step": 7453 }, { "epoch": 0.5185571672058159, "grad_norm": 1.2578125, "learning_rate": 0.000988508270066291, "loss": 0.9494, "step": 7454 }, { "epoch": 0.5186267348429511, "grad_norm": 1.125, "learning_rate": 0.000988282952241481, "loss": 0.9349, "step": 7455 }, { "epoch": 0.5186963024800862, "grad_norm": 1.6171875, "learning_rate": 0.0009880576350116044, "loss": 0.9685, "step": 7456 }, { "epoch": 0.5187658701172214, "grad_norm": 1.3828125, "learning_rate": 0.0009878323183881005, "loss": 1.3639, "step": 7457 }, { "epoch": 0.5188354377543567, "grad_norm": 1.03125, "learning_rate": 0.0009876070023824102, "loss": 0.8812, "step": 7458 }, { "epoch": 0.5189050053914919, "grad_norm": 1.109375, "learning_rate": 0.0009873816870059739, "loss": 0.9826, "step": 7459 }, { "epoch": 0.518974573028627, "grad_norm": 1.5390625, "learning_rate": 0.0009871563722702319, "loss": 0.8891, "step": 7460 }, { "epoch": 0.5190441406657623, "grad_norm": 1.046875, "learning_rate": 0.0009869310581866247, "loss": 0.8919, "step": 7461 }, { "epoch": 0.5191137083028975, "grad_norm": 1.125, "learning_rate": 0.000986705744766592, "loss": 0.8724, "step": 7462 }, { "epoch": 0.5191832759400327, "grad_norm": 0.91796875, "learning_rate": 0.000986480432021575, "loss": 0.6715, "step": 7463 }, { "epoch": 0.5192528435771679, "grad_norm": 1.234375, "learning_rate": 0.0009862551199630136, "loss": 0.8011, "step": 7464 }, { "epoch": 0.5193224112143031, "grad_norm": 1.1015625, "learning_rate": 0.0009860298086023474, "loss": 0.8478, "step": 7465 }, { "epoch": 0.5193919788514383, "grad_norm": 1.265625, "learning_rate": 0.0009858044979510177, "loss": 0.7981, "step": 7466 }, { "epoch": 0.5194615464885736, "grad_norm": 1.2421875, "learning_rate": 0.0009855791880204639, "loss": 0.9811, "step": 7467 }, { "epoch": 0.5195311141257087, "grad_norm": 1.296875, "learning_rate": 0.0009853538788221262, "loss": 0.9317, "step": 7468 }, { "epoch": 0.5196006817628439, "grad_norm": 1.9609375, "learning_rate": 0.0009851285703674445, "loss": 1.1164, "step": 7469 }, { "epoch": 0.5196702493999791, "grad_norm": 1.109375, "learning_rate": 0.0009849032626678595, "loss": 0.7334, "step": 7470 }, { "epoch": 0.5197398170371144, "grad_norm": 0.87890625, "learning_rate": 0.0009846779557348103, "loss": 0.5433, "step": 7471 }, { "epoch": 0.5198093846742495, "grad_norm": 1.140625, "learning_rate": 0.0009844526495797372, "loss": 0.8945, "step": 7472 }, { "epoch": 0.5198789523113847, "grad_norm": 1.1796875, "learning_rate": 0.0009842273442140807, "loss": 0.7181, "step": 7473 }, { "epoch": 0.51994851994852, "grad_norm": 1.3203125, "learning_rate": 0.0009840020396492798, "loss": 0.9351, "step": 7474 }, { "epoch": 0.5200180875856552, "grad_norm": 1.484375, "learning_rate": 0.000983776735896775, "loss": 0.648, "step": 7475 }, { "epoch": 0.5200876552227903, "grad_norm": 0.984375, "learning_rate": 0.0009835514329680052, "loss": 0.8321, "step": 7476 }, { "epoch": 0.5201572228599256, "grad_norm": 1.09375, "learning_rate": 0.0009833261308744112, "loss": 0.8325, "step": 7477 }, { "epoch": 0.5202267904970608, "grad_norm": 1.3203125, "learning_rate": 0.0009831008296274323, "loss": 0.8579, "step": 7478 }, { "epoch": 0.520296358134196, "grad_norm": 1.3515625, "learning_rate": 0.0009828755292385076, "loss": 1.0932, "step": 7479 }, { "epoch": 0.5203659257713312, "grad_norm": 0.97265625, "learning_rate": 0.0009826502297190776, "loss": 0.8288, "step": 7480 }, { "epoch": 0.5204354934084664, "grad_norm": 1.1796875, "learning_rate": 0.0009824249310805815, "loss": 0.9406, "step": 7481 }, { "epoch": 0.5205050610456016, "grad_norm": 1.203125, "learning_rate": 0.0009821996333344587, "loss": 1.028, "step": 7482 }, { "epoch": 0.5205746286827367, "grad_norm": 1.0234375, "learning_rate": 0.0009819743364921484, "loss": 0.7697, "step": 7483 }, { "epoch": 0.520644196319872, "grad_norm": 1.2421875, "learning_rate": 0.000981749040565091, "loss": 0.7308, "step": 7484 }, { "epoch": 0.5207137639570072, "grad_norm": 1.015625, "learning_rate": 0.0009815237455647254, "loss": 0.8124, "step": 7485 }, { "epoch": 0.5207833315941424, "grad_norm": 1.1171875, "learning_rate": 0.0009812984515024904, "loss": 0.5755, "step": 7486 }, { "epoch": 0.5208528992312776, "grad_norm": 1.046875, "learning_rate": 0.000981073158389826, "loss": 0.9276, "step": 7487 }, { "epoch": 0.5209224668684128, "grad_norm": 0.78515625, "learning_rate": 0.0009808478662381714, "loss": 0.6658, "step": 7488 }, { "epoch": 0.520992034505548, "grad_norm": 1.0703125, "learning_rate": 0.0009806225750589655, "loss": 0.8713, "step": 7489 }, { "epoch": 0.5210616021426833, "grad_norm": 0.84375, "learning_rate": 0.0009803972848636473, "loss": 0.7039, "step": 7490 }, { "epoch": 0.5211311697798184, "grad_norm": 1.234375, "learning_rate": 0.0009801719956636567, "loss": 0.9872, "step": 7491 }, { "epoch": 0.5212007374169536, "grad_norm": 1.1328125, "learning_rate": 0.0009799467074704318, "loss": 0.6726, "step": 7492 }, { "epoch": 0.5212703050540889, "grad_norm": 1.0390625, "learning_rate": 0.000979721420295412, "loss": 0.7117, "step": 7493 }, { "epoch": 0.5213398726912241, "grad_norm": 1.4375, "learning_rate": 0.0009794961341500364, "loss": 0.7386, "step": 7494 }, { "epoch": 0.5214094403283592, "grad_norm": 1.0859375, "learning_rate": 0.0009792708490457438, "loss": 1.0354, "step": 7495 }, { "epoch": 0.5214790079654944, "grad_norm": 0.875, "learning_rate": 0.000979045564993973, "loss": 0.6982, "step": 7496 }, { "epoch": 0.5215485756026297, "grad_norm": 0.92578125, "learning_rate": 0.0009788202820061626, "loss": 0.8027, "step": 7497 }, { "epoch": 0.5216181432397649, "grad_norm": 0.86328125, "learning_rate": 0.000978595000093752, "loss": 0.8026, "step": 7498 }, { "epoch": 0.5216877108769, "grad_norm": 0.95703125, "learning_rate": 0.000978369719268179, "loss": 0.7331, "step": 7499 }, { "epoch": 0.5217572785140353, "grad_norm": 1.1015625, "learning_rate": 0.0009781444395408824, "loss": 1.0517, "step": 7500 }, { "epoch": 0.5218268461511705, "grad_norm": 1.046875, "learning_rate": 0.0009779191609233014, "loss": 0.7587, "step": 7501 }, { "epoch": 0.5218964137883056, "grad_norm": 1.34375, "learning_rate": 0.0009776938834268744, "loss": 1.1129, "step": 7502 }, { "epoch": 0.5219659814254409, "grad_norm": 1.078125, "learning_rate": 0.0009774686070630392, "loss": 0.605, "step": 7503 }, { "epoch": 0.5220355490625761, "grad_norm": 0.9453125, "learning_rate": 0.0009772433318432341, "loss": 0.6579, "step": 7504 }, { "epoch": 0.5221051166997113, "grad_norm": 1.125, "learning_rate": 0.0009770180577788987, "loss": 0.7113, "step": 7505 }, { "epoch": 0.5221746843368466, "grad_norm": 1.140625, "learning_rate": 0.0009767927848814701, "loss": 1.2031, "step": 7506 }, { "epoch": 0.5222442519739817, "grad_norm": 0.9765625, "learning_rate": 0.0009765675131623867, "loss": 0.8527, "step": 7507 }, { "epoch": 0.5223138196111169, "grad_norm": 0.98046875, "learning_rate": 0.0009763422426330873, "loss": 0.9367, "step": 7508 }, { "epoch": 0.5223833872482521, "grad_norm": 1.0703125, "learning_rate": 0.0009761169733050096, "loss": 0.8686, "step": 7509 }, { "epoch": 0.5224529548853873, "grad_norm": 0.85546875, "learning_rate": 0.0009758917051895915, "loss": 0.5321, "step": 7510 }, { "epoch": 0.5225225225225225, "grad_norm": 0.96875, "learning_rate": 0.0009756664382982708, "loss": 0.8484, "step": 7511 }, { "epoch": 0.5225920901596577, "grad_norm": 1.1328125, "learning_rate": 0.0009754411726424861, "loss": 0.7063, "step": 7512 }, { "epoch": 0.522661657796793, "grad_norm": 1.1171875, "learning_rate": 0.0009752159082336747, "loss": 0.825, "step": 7513 }, { "epoch": 0.5227312254339281, "grad_norm": 0.8203125, "learning_rate": 0.0009749906450832744, "loss": 0.6291, "step": 7514 }, { "epoch": 0.5228007930710633, "grad_norm": 1.21875, "learning_rate": 0.0009747653832027232, "loss": 0.7437, "step": 7515 }, { "epoch": 0.5228703607081986, "grad_norm": 0.88671875, "learning_rate": 0.0009745401226034589, "loss": 0.7518, "step": 7516 }, { "epoch": 0.5229399283453338, "grad_norm": 1.3046875, "learning_rate": 0.0009743148632969186, "loss": 0.8273, "step": 7517 }, { "epoch": 0.5230094959824689, "grad_norm": 1.21875, "learning_rate": 0.00097408960529454, "loss": 0.7889, "step": 7518 }, { "epoch": 0.5230790636196042, "grad_norm": 1.109375, "learning_rate": 0.0009738643486077608, "loss": 0.7051, "step": 7519 }, { "epoch": 0.5231486312567394, "grad_norm": 1.25, "learning_rate": 0.0009736390932480183, "loss": 0.7936, "step": 7520 }, { "epoch": 0.5232181988938746, "grad_norm": 1.25, "learning_rate": 0.0009734138392267497, "loss": 0.9106, "step": 7521 }, { "epoch": 0.5232877665310097, "grad_norm": 1.046875, "learning_rate": 0.0009731885865553922, "loss": 0.7028, "step": 7522 }, { "epoch": 0.523357334168145, "grad_norm": 1.09375, "learning_rate": 0.0009729633352453835, "loss": 0.7533, "step": 7523 }, { "epoch": 0.5234269018052802, "grad_norm": 1.3125, "learning_rate": 0.0009727380853081601, "loss": 0.8453, "step": 7524 }, { "epoch": 0.5234964694424153, "grad_norm": 0.91796875, "learning_rate": 0.0009725128367551592, "loss": 0.6497, "step": 7525 }, { "epoch": 0.5235660370795506, "grad_norm": 1.140625, "learning_rate": 0.000972287589597818, "loss": 0.9773, "step": 7526 }, { "epoch": 0.5236356047166858, "grad_norm": 1.21875, "learning_rate": 0.0009720623438475737, "loss": 0.7903, "step": 7527 }, { "epoch": 0.523705172353821, "grad_norm": 0.95703125, "learning_rate": 0.0009718370995158623, "loss": 0.5697, "step": 7528 }, { "epoch": 0.5237747399909563, "grad_norm": 1.0390625, "learning_rate": 0.000971611856614121, "loss": 0.7219, "step": 7529 }, { "epoch": 0.5238443076280914, "grad_norm": 0.82421875, "learning_rate": 0.0009713866151537869, "loss": 0.7004, "step": 7530 }, { "epoch": 0.5239138752652266, "grad_norm": 1.1328125, "learning_rate": 0.0009711613751462961, "loss": 0.7109, "step": 7531 }, { "epoch": 0.5239834429023619, "grad_norm": 0.96484375, "learning_rate": 0.000970936136603085, "loss": 0.6683, "step": 7532 }, { "epoch": 0.524053010539497, "grad_norm": 1.1328125, "learning_rate": 0.0009707108995355907, "loss": 0.8626, "step": 7533 }, { "epoch": 0.5241225781766322, "grad_norm": 1.2265625, "learning_rate": 0.0009704856639552495, "loss": 0.7987, "step": 7534 }, { "epoch": 0.5241921458137674, "grad_norm": 1.171875, "learning_rate": 0.0009702604298734973, "loss": 0.8395, "step": 7535 }, { "epoch": 0.5242617134509027, "grad_norm": 0.9296875, "learning_rate": 0.0009700351973017704, "loss": 0.7056, "step": 7536 }, { "epoch": 0.5243312810880378, "grad_norm": 0.95703125, "learning_rate": 0.0009698099662515054, "loss": 0.7448, "step": 7537 }, { "epoch": 0.524400848725173, "grad_norm": 1.40625, "learning_rate": 0.000969584736734138, "loss": 0.8765, "step": 7538 }, { "epoch": 0.5244704163623083, "grad_norm": 1.1015625, "learning_rate": 0.0009693595087611042, "loss": 0.6362, "step": 7539 }, { "epoch": 0.5245399839994435, "grad_norm": 1.3203125, "learning_rate": 0.0009691342823438403, "loss": 0.8538, "step": 7540 }, { "epoch": 0.5246095516365786, "grad_norm": 1.125, "learning_rate": 0.0009689090574937823, "loss": 0.6046, "step": 7541 }, { "epoch": 0.5246791192737139, "grad_norm": 0.9921875, "learning_rate": 0.0009686838342223654, "loss": 0.8277, "step": 7542 }, { "epoch": 0.5247486869108491, "grad_norm": 1.234375, "learning_rate": 0.0009684586125410252, "loss": 0.9067, "step": 7543 }, { "epoch": 0.5248182545479843, "grad_norm": 1.0234375, "learning_rate": 0.0009682333924611983, "loss": 0.5895, "step": 7544 }, { "epoch": 0.5248878221851195, "grad_norm": 0.70703125, "learning_rate": 0.0009680081739943192, "loss": 0.6634, "step": 7545 }, { "epoch": 0.5249573898222547, "grad_norm": 1.0625, "learning_rate": 0.0009677829571518237, "loss": 0.684, "step": 7546 }, { "epoch": 0.5250269574593899, "grad_norm": 1.0, "learning_rate": 0.0009675577419451473, "loss": 0.8417, "step": 7547 }, { "epoch": 0.525096525096525, "grad_norm": 1.6171875, "learning_rate": 0.0009673325283857256, "loss": 0.9912, "step": 7548 }, { "epoch": 0.5251660927336603, "grad_norm": 1.171875, "learning_rate": 0.0009671073164849932, "loss": 0.9259, "step": 7549 }, { "epoch": 0.5252356603707955, "grad_norm": 1.125, "learning_rate": 0.0009668821062543852, "loss": 0.6655, "step": 7550 }, { "epoch": 0.5253052280079307, "grad_norm": 0.95703125, "learning_rate": 0.0009666568977053371, "loss": 1.0478, "step": 7551 }, { "epoch": 0.525374795645066, "grad_norm": 1.4140625, "learning_rate": 0.000966431690849284, "loss": 0.6806, "step": 7552 }, { "epoch": 0.5254443632822011, "grad_norm": 1.21875, "learning_rate": 0.0009662064856976601, "loss": 0.8352, "step": 7553 }, { "epoch": 0.5255139309193363, "grad_norm": 1.1875, "learning_rate": 0.0009659812822619007, "loss": 0.7596, "step": 7554 }, { "epoch": 0.5255834985564716, "grad_norm": 1.34375, "learning_rate": 0.0009657560805534405, "loss": 0.9609, "step": 7555 }, { "epoch": 0.5256530661936067, "grad_norm": 1.4375, "learning_rate": 0.0009655308805837135, "loss": 1.0609, "step": 7556 }, { "epoch": 0.5257226338307419, "grad_norm": 0.87109375, "learning_rate": 0.0009653056823641546, "loss": 0.7534, "step": 7557 }, { "epoch": 0.5257922014678772, "grad_norm": 1.0703125, "learning_rate": 0.0009650804859061985, "loss": 0.8217, "step": 7558 }, { "epoch": 0.5258617691050124, "grad_norm": 1.234375, "learning_rate": 0.0009648552912212795, "loss": 0.8723, "step": 7559 }, { "epoch": 0.5259313367421475, "grad_norm": 1.0546875, "learning_rate": 0.0009646300983208314, "loss": 0.7671, "step": 7560 }, { "epoch": 0.5260009043792827, "grad_norm": 1.109375, "learning_rate": 0.0009644049072162887, "loss": 0.8247, "step": 7561 }, { "epoch": 0.526070472016418, "grad_norm": 1.1953125, "learning_rate": 0.0009641797179190856, "loss": 0.937, "step": 7562 }, { "epoch": 0.5261400396535532, "grad_norm": 1.359375, "learning_rate": 0.0009639545304406557, "loss": 0.9648, "step": 7563 }, { "epoch": 0.5262096072906883, "grad_norm": 1.1640625, "learning_rate": 0.0009637293447924329, "loss": 0.9806, "step": 7564 }, { "epoch": 0.5262791749278236, "grad_norm": 1.265625, "learning_rate": 0.0009635041609858513, "loss": 1.0049, "step": 7565 }, { "epoch": 0.5263487425649588, "grad_norm": 0.984375, "learning_rate": 0.0009632789790323446, "loss": 0.6768, "step": 7566 }, { "epoch": 0.526418310202094, "grad_norm": 1.078125, "learning_rate": 0.000963053798943346, "loss": 0.7095, "step": 7567 }, { "epoch": 0.5264878778392292, "grad_norm": 1.421875, "learning_rate": 0.0009628286207302893, "loss": 0.8472, "step": 7568 }, { "epoch": 0.5265574454763644, "grad_norm": 1.2578125, "learning_rate": 0.0009626034444046082, "loss": 1.0541, "step": 7569 }, { "epoch": 0.5266270131134996, "grad_norm": 1.0, "learning_rate": 0.0009623782699777354, "loss": 0.7968, "step": 7570 }, { "epoch": 0.5266965807506349, "grad_norm": 1.15625, "learning_rate": 0.0009621530974611044, "loss": 0.6698, "step": 7571 }, { "epoch": 0.52676614838777, "grad_norm": 1.078125, "learning_rate": 0.0009619279268661484, "loss": 1.0106, "step": 7572 }, { "epoch": 0.5268357160249052, "grad_norm": 1.1796875, "learning_rate": 0.0009617027582043006, "loss": 1.1062, "step": 7573 }, { "epoch": 0.5269052836620404, "grad_norm": 1.1171875, "learning_rate": 0.0009614775914869934, "loss": 0.7848, "step": 7574 }, { "epoch": 0.5269748512991757, "grad_norm": 1.15625, "learning_rate": 0.00096125242672566, "loss": 0.9292, "step": 7575 }, { "epoch": 0.5270444189363108, "grad_norm": 1.1171875, "learning_rate": 0.0009610272639317334, "loss": 0.8652, "step": 7576 }, { "epoch": 0.527113986573446, "grad_norm": 1.171875, "learning_rate": 0.0009608021031166456, "loss": 1.0807, "step": 7577 }, { "epoch": 0.5271835542105813, "grad_norm": 1.140625, "learning_rate": 0.0009605769442918293, "loss": 0.9035, "step": 7578 }, { "epoch": 0.5272531218477164, "grad_norm": 1.0703125, "learning_rate": 0.0009603517874687172, "loss": 0.6871, "step": 7579 }, { "epoch": 0.5273226894848516, "grad_norm": 1.265625, "learning_rate": 0.0009601266326587416, "loss": 0.9676, "step": 7580 }, { "epoch": 0.5273922571219869, "grad_norm": 1.28125, "learning_rate": 0.0009599014798733344, "loss": 0.8892, "step": 7581 }, { "epoch": 0.5274618247591221, "grad_norm": 0.89453125, "learning_rate": 0.0009596763291239281, "loss": 0.6882, "step": 7582 }, { "epoch": 0.5275313923962572, "grad_norm": 1.1484375, "learning_rate": 0.0009594511804219548, "loss": 0.8788, "step": 7583 }, { "epoch": 0.5276009600333925, "grad_norm": 1.0859375, "learning_rate": 0.0009592260337788459, "loss": 1.0275, "step": 7584 }, { "epoch": 0.5276705276705277, "grad_norm": 1.1953125, "learning_rate": 0.0009590008892060332, "loss": 0.7101, "step": 7585 }, { "epoch": 0.5277400953076629, "grad_norm": 1.53125, "learning_rate": 0.000958775746714949, "loss": 0.7943, "step": 7586 }, { "epoch": 0.527809662944798, "grad_norm": 1.265625, "learning_rate": 0.0009585506063170249, "loss": 0.9239, "step": 7587 }, { "epoch": 0.5278792305819333, "grad_norm": 0.88671875, "learning_rate": 0.0009583254680236915, "loss": 0.6241, "step": 7588 }, { "epoch": 0.5279487982190685, "grad_norm": 1.0234375, "learning_rate": 0.000958100331846381, "loss": 0.8665, "step": 7589 }, { "epoch": 0.5280183658562037, "grad_norm": 1.078125, "learning_rate": 0.0009578751977965246, "loss": 0.6914, "step": 7590 }, { "epoch": 0.5280879334933389, "grad_norm": 1.1640625, "learning_rate": 0.0009576500658855535, "loss": 0.7779, "step": 7591 }, { "epoch": 0.5281575011304741, "grad_norm": 1.1796875, "learning_rate": 0.0009574249361248981, "loss": 0.9939, "step": 7592 }, { "epoch": 0.5282270687676093, "grad_norm": 1.234375, "learning_rate": 0.0009571998085259901, "loss": 0.7421, "step": 7593 }, { "epoch": 0.5282966364047446, "grad_norm": 0.9765625, "learning_rate": 0.0009569746831002603, "loss": 0.7919, "step": 7594 }, { "epoch": 0.5283662040418797, "grad_norm": 0.9296875, "learning_rate": 0.0009567495598591387, "loss": 0.5853, "step": 7595 }, { "epoch": 0.5284357716790149, "grad_norm": 1.1875, "learning_rate": 0.0009565244388140569, "loss": 0.7668, "step": 7596 }, { "epoch": 0.5285053393161502, "grad_norm": 1.171875, "learning_rate": 0.0009562993199764447, "loss": 0.7021, "step": 7597 }, { "epoch": 0.5285749069532854, "grad_norm": 0.9296875, "learning_rate": 0.0009560742033577332, "loss": 0.7556, "step": 7598 }, { "epoch": 0.5286444745904205, "grad_norm": 1.1640625, "learning_rate": 0.0009558490889693518, "loss": 0.8799, "step": 7599 }, { "epoch": 0.5287140422275557, "grad_norm": 1.34375, "learning_rate": 0.0009556239768227312, "loss": 0.9766, "step": 7600 }, { "epoch": 0.528783609864691, "grad_norm": 1.0703125, "learning_rate": 0.0009553988669293017, "loss": 0.7397, "step": 7601 }, { "epoch": 0.5288531775018261, "grad_norm": 0.9375, "learning_rate": 0.0009551737593004926, "loss": 0.6558, "step": 7602 }, { "epoch": 0.5289227451389613, "grad_norm": 0.96875, "learning_rate": 0.000954948653947734, "loss": 0.8443, "step": 7603 }, { "epoch": 0.5289923127760966, "grad_norm": 0.953125, "learning_rate": 0.0009547235508824557, "loss": 0.6416, "step": 7604 }, { "epoch": 0.5290618804132318, "grad_norm": 1.1171875, "learning_rate": 0.0009544984501160878, "loss": 0.9741, "step": 7605 }, { "epoch": 0.5291314480503669, "grad_norm": 0.97265625, "learning_rate": 0.0009542733516600586, "loss": 0.9167, "step": 7606 }, { "epoch": 0.5292010156875022, "grad_norm": 1.140625, "learning_rate": 0.0009540482555257983, "loss": 1.1045, "step": 7607 }, { "epoch": 0.5292705833246374, "grad_norm": 1.140625, "learning_rate": 0.0009538231617247363, "loss": 1.1215, "step": 7608 }, { "epoch": 0.5293401509617726, "grad_norm": 1.1796875, "learning_rate": 0.0009535980702683011, "loss": 0.7904, "step": 7609 }, { "epoch": 0.5294097185989078, "grad_norm": 0.74609375, "learning_rate": 0.0009533729811679219, "loss": 0.5266, "step": 7610 }, { "epoch": 0.529479286236043, "grad_norm": 1.21875, "learning_rate": 0.0009531478944350278, "loss": 0.9714, "step": 7611 }, { "epoch": 0.5295488538731782, "grad_norm": 1.0, "learning_rate": 0.0009529228100810479, "loss": 0.7552, "step": 7612 }, { "epoch": 0.5296184215103134, "grad_norm": 1.140625, "learning_rate": 0.0009526977281174098, "loss": 0.7659, "step": 7613 }, { "epoch": 0.5296879891474486, "grad_norm": 1.0, "learning_rate": 0.0009524726485555428, "loss": 0.7575, "step": 7614 }, { "epoch": 0.5297575567845838, "grad_norm": 1.3125, "learning_rate": 0.0009522475714068754, "loss": 0.9411, "step": 7615 }, { "epoch": 0.529827124421719, "grad_norm": 1.265625, "learning_rate": 0.0009520224966828356, "loss": 0.9846, "step": 7616 }, { "epoch": 0.5298966920588543, "grad_norm": 1.0078125, "learning_rate": 0.0009517974243948512, "loss": 0.7321, "step": 7617 }, { "epoch": 0.5299662596959894, "grad_norm": 1.03125, "learning_rate": 0.0009515723545543509, "loss": 0.7617, "step": 7618 }, { "epoch": 0.5300358273331246, "grad_norm": 1.046875, "learning_rate": 0.0009513472871727625, "loss": 0.8585, "step": 7619 }, { "epoch": 0.5301053949702599, "grad_norm": 1.1171875, "learning_rate": 0.0009511222222615133, "loss": 0.772, "step": 7620 }, { "epoch": 0.530174962607395, "grad_norm": 1.359375, "learning_rate": 0.0009508971598320315, "loss": 1.0156, "step": 7621 }, { "epoch": 0.5302445302445302, "grad_norm": 1.0703125, "learning_rate": 0.0009506720998957443, "loss": 0.8706, "step": 7622 }, { "epoch": 0.5303140978816655, "grad_norm": 1.0390625, "learning_rate": 0.0009504470424640797, "loss": 0.7963, "step": 7623 }, { "epoch": 0.5303836655188007, "grad_norm": 1.1171875, "learning_rate": 0.0009502219875484639, "loss": 0.7009, "step": 7624 }, { "epoch": 0.5304532331559358, "grad_norm": 1.46875, "learning_rate": 0.0009499969351603248, "loss": 0.8547, "step": 7625 }, { "epoch": 0.530522800793071, "grad_norm": 1.4375, "learning_rate": 0.0009497718853110897, "loss": 1.025, "step": 7626 }, { "epoch": 0.5305923684302063, "grad_norm": 1.109375, "learning_rate": 0.0009495468380121846, "loss": 0.8212, "step": 7627 }, { "epoch": 0.5306619360673415, "grad_norm": 1.2890625, "learning_rate": 0.000949321793275037, "loss": 1.0669, "step": 7628 }, { "epoch": 0.5307315037044766, "grad_norm": 1.046875, "learning_rate": 0.0009490967511110733, "loss": 1.0157, "step": 7629 }, { "epoch": 0.5308010713416119, "grad_norm": 1.234375, "learning_rate": 0.0009488717115317202, "loss": 0.9015, "step": 7630 }, { "epoch": 0.5308706389787471, "grad_norm": 1.296875, "learning_rate": 0.0009486466745484034, "loss": 0.859, "step": 7631 }, { "epoch": 0.5309402066158823, "grad_norm": 1.15625, "learning_rate": 0.0009484216401725498, "loss": 0.6554, "step": 7632 }, { "epoch": 0.5310097742530175, "grad_norm": 1.15625, "learning_rate": 0.0009481966084155857, "loss": 0.8968, "step": 7633 }, { "epoch": 0.5310793418901527, "grad_norm": 1.453125, "learning_rate": 0.0009479715792889363, "loss": 1.0967, "step": 7634 }, { "epoch": 0.5311489095272879, "grad_norm": 1.1171875, "learning_rate": 0.000947746552804028, "loss": 0.7383, "step": 7635 }, { "epoch": 0.5312184771644232, "grad_norm": 1.0625, "learning_rate": 0.0009475215289722864, "loss": 0.7721, "step": 7636 }, { "epoch": 0.5312880448015583, "grad_norm": 0.9609375, "learning_rate": 0.0009472965078051372, "loss": 0.7587, "step": 7637 }, { "epoch": 0.5313576124386935, "grad_norm": 1.015625, "learning_rate": 0.0009470714893140053, "loss": 0.7739, "step": 7638 }, { "epoch": 0.5314271800758287, "grad_norm": 1.1953125, "learning_rate": 0.0009468464735103166, "loss": 0.949, "step": 7639 }, { "epoch": 0.531496747712964, "grad_norm": 1.4765625, "learning_rate": 0.0009466214604054962, "loss": 0.8655, "step": 7640 }, { "epoch": 0.5315663153500991, "grad_norm": 1.2734375, "learning_rate": 0.0009463964500109685, "loss": 0.8699, "step": 7641 }, { "epoch": 0.5316358829872343, "grad_norm": 1.125, "learning_rate": 0.0009461714423381595, "loss": 0.931, "step": 7642 }, { "epoch": 0.5317054506243696, "grad_norm": 1.265625, "learning_rate": 0.0009459464373984931, "loss": 1.0457, "step": 7643 }, { "epoch": 0.5317750182615048, "grad_norm": 1.296875, "learning_rate": 0.0009457214352033943, "loss": 0.8442, "step": 7644 }, { "epoch": 0.5318445858986399, "grad_norm": 1.1640625, "learning_rate": 0.0009454964357642872, "loss": 0.699, "step": 7645 }, { "epoch": 0.5319141535357752, "grad_norm": 0.88671875, "learning_rate": 0.0009452714390925964, "loss": 0.8539, "step": 7646 }, { "epoch": 0.5319837211729104, "grad_norm": 1.09375, "learning_rate": 0.0009450464451997463, "loss": 0.8525, "step": 7647 }, { "epoch": 0.5320532888100455, "grad_norm": 1.125, "learning_rate": 0.0009448214540971601, "loss": 1.0032, "step": 7648 }, { "epoch": 0.5321228564471808, "grad_norm": 0.9921875, "learning_rate": 0.000944596465796263, "loss": 0.8292, "step": 7649 }, { "epoch": 0.532192424084316, "grad_norm": 0.8671875, "learning_rate": 0.0009443714803084779, "loss": 0.5984, "step": 7650 }, { "epoch": 0.5322619917214512, "grad_norm": 1.0703125, "learning_rate": 0.0009441464976452288, "loss": 0.7322, "step": 7651 }, { "epoch": 0.5323315593585863, "grad_norm": 1.140625, "learning_rate": 0.0009439215178179388, "loss": 0.8294, "step": 7652 }, { "epoch": 0.5324011269957216, "grad_norm": 1.140625, "learning_rate": 0.0009436965408380314, "loss": 0.8577, "step": 7653 }, { "epoch": 0.5324706946328568, "grad_norm": 0.84375, "learning_rate": 0.0009434715667169303, "loss": 0.6898, "step": 7654 }, { "epoch": 0.532540262269992, "grad_norm": 1.1171875, "learning_rate": 0.0009432465954660574, "loss": 0.9053, "step": 7655 }, { "epoch": 0.5326098299071272, "grad_norm": 1.09375, "learning_rate": 0.0009430216270968371, "loss": 0.673, "step": 7656 }, { "epoch": 0.5326793975442624, "grad_norm": 0.87890625, "learning_rate": 0.0009427966616206909, "loss": 0.8517, "step": 7657 }, { "epoch": 0.5327489651813976, "grad_norm": 1.1171875, "learning_rate": 0.0009425716990490423, "loss": 0.8912, "step": 7658 }, { "epoch": 0.5328185328185329, "grad_norm": 0.83984375, "learning_rate": 0.0009423467393933128, "loss": 0.7184, "step": 7659 }, { "epoch": 0.532888100455668, "grad_norm": 1.203125, "learning_rate": 0.0009421217826649257, "loss": 0.9315, "step": 7660 }, { "epoch": 0.5329576680928032, "grad_norm": 0.85546875, "learning_rate": 0.0009418968288753026, "loss": 0.7263, "step": 7661 }, { "epoch": 0.5330272357299385, "grad_norm": 0.98046875, "learning_rate": 0.0009416718780358654, "loss": 0.7388, "step": 7662 }, { "epoch": 0.5330968033670737, "grad_norm": 1.265625, "learning_rate": 0.0009414469301580368, "loss": 0.9644, "step": 7663 }, { "epoch": 0.5331663710042088, "grad_norm": 1.4375, "learning_rate": 0.0009412219852532376, "loss": 0.9148, "step": 7664 }, { "epoch": 0.533235938641344, "grad_norm": 1.125, "learning_rate": 0.0009409970433328902, "loss": 0.777, "step": 7665 }, { "epoch": 0.5333055062784793, "grad_norm": 1.1640625, "learning_rate": 0.0009407721044084148, "loss": 0.8419, "step": 7666 }, { "epoch": 0.5333750739156145, "grad_norm": 1.1015625, "learning_rate": 0.0009405471684912338, "loss": 0.9105, "step": 7667 }, { "epoch": 0.5334446415527496, "grad_norm": 1.1171875, "learning_rate": 0.0009403222355927679, "loss": 0.8591, "step": 7668 }, { "epoch": 0.5335142091898849, "grad_norm": 1.015625, "learning_rate": 0.0009400973057244378, "loss": 0.7017, "step": 7669 }, { "epoch": 0.5335837768270201, "grad_norm": 1.078125, "learning_rate": 0.0009398723788976651, "loss": 0.9882, "step": 7670 }, { "epoch": 0.5336533444641552, "grad_norm": 0.890625, "learning_rate": 0.0009396474551238696, "loss": 0.6953, "step": 7671 }, { "epoch": 0.5337229121012905, "grad_norm": 1.0390625, "learning_rate": 0.0009394225344144725, "loss": 0.9536, "step": 7672 }, { "epoch": 0.5337924797384257, "grad_norm": 1.1328125, "learning_rate": 0.000939197616780893, "loss": 0.6909, "step": 7673 }, { "epoch": 0.5338620473755609, "grad_norm": 1.2265625, "learning_rate": 0.0009389727022345528, "loss": 0.9209, "step": 7674 }, { "epoch": 0.5339316150126961, "grad_norm": 1.1953125, "learning_rate": 0.0009387477907868709, "loss": 0.8242, "step": 7675 }, { "epoch": 0.5340011826498313, "grad_norm": 1.25, "learning_rate": 0.0009385228824492672, "loss": 1.1437, "step": 7676 }, { "epoch": 0.5340707502869665, "grad_norm": 0.91015625, "learning_rate": 0.0009382979772331622, "loss": 0.8774, "step": 7677 }, { "epoch": 0.5341403179241017, "grad_norm": 1.234375, "learning_rate": 0.0009380730751499747, "loss": 0.8611, "step": 7678 }, { "epoch": 0.5342098855612369, "grad_norm": 1.4140625, "learning_rate": 0.0009378481762111244, "loss": 0.8717, "step": 7679 }, { "epoch": 0.5342794531983721, "grad_norm": 0.99609375, "learning_rate": 0.0009376232804280298, "loss": 0.8216, "step": 7680 }, { "epoch": 0.5343490208355073, "grad_norm": 1.125, "learning_rate": 0.0009373983878121113, "loss": 0.8085, "step": 7681 }, { "epoch": 0.5344185884726426, "grad_norm": 1.1953125, "learning_rate": 0.000937173498374787, "loss": 0.8056, "step": 7682 }, { "epoch": 0.5344881561097777, "grad_norm": 1.1640625, "learning_rate": 0.0009369486121274759, "loss": 0.8503, "step": 7683 }, { "epoch": 0.5345577237469129, "grad_norm": 1.09375, "learning_rate": 0.0009367237290815961, "loss": 0.8109, "step": 7684 }, { "epoch": 0.5346272913840482, "grad_norm": 0.78125, "learning_rate": 0.0009364988492485667, "loss": 0.5855, "step": 7685 }, { "epoch": 0.5346968590211834, "grad_norm": 1.296875, "learning_rate": 0.0009362739726398058, "loss": 1.041, "step": 7686 }, { "epoch": 0.5347664266583185, "grad_norm": 1.0703125, "learning_rate": 0.0009360490992667306, "loss": 0.8213, "step": 7687 }, { "epoch": 0.5348359942954538, "grad_norm": 1.234375, "learning_rate": 0.0009358242291407604, "loss": 1.0044, "step": 7688 }, { "epoch": 0.534905561932589, "grad_norm": 0.97265625, "learning_rate": 0.0009355993622733124, "loss": 0.6749, "step": 7689 }, { "epoch": 0.5349751295697242, "grad_norm": 1.0859375, "learning_rate": 0.0009353744986758044, "loss": 0.7504, "step": 7690 }, { "epoch": 0.5350446972068593, "grad_norm": 1.1640625, "learning_rate": 0.000935149638359653, "loss": 0.9699, "step": 7691 }, { "epoch": 0.5351142648439946, "grad_norm": 0.8828125, "learning_rate": 0.0009349247813362764, "loss": 0.7765, "step": 7692 }, { "epoch": 0.5351838324811298, "grad_norm": 1.2109375, "learning_rate": 0.0009346999276170914, "loss": 1.1291, "step": 7693 }, { "epoch": 0.5352534001182649, "grad_norm": 1.3671875, "learning_rate": 0.0009344750772135148, "loss": 0.8869, "step": 7694 }, { "epoch": 0.5353229677554002, "grad_norm": 1.2109375, "learning_rate": 0.0009342502301369637, "loss": 0.8624, "step": 7695 }, { "epoch": 0.5353925353925354, "grad_norm": 0.765625, "learning_rate": 0.0009340253863988545, "loss": 0.6484, "step": 7696 }, { "epoch": 0.5354621030296706, "grad_norm": 1.109375, "learning_rate": 0.000933800546010604, "loss": 0.7179, "step": 7697 }, { "epoch": 0.5355316706668058, "grad_norm": 1.1796875, "learning_rate": 0.0009335757089836274, "loss": 0.6628, "step": 7698 }, { "epoch": 0.535601238303941, "grad_norm": 1.265625, "learning_rate": 0.0009333508753293418, "loss": 0.9152, "step": 7699 }, { "epoch": 0.5356708059410762, "grad_norm": 1.328125, "learning_rate": 0.0009331260450591627, "loss": 1.0213, "step": 7700 }, { "epoch": 0.5357403735782115, "grad_norm": 1.1640625, "learning_rate": 0.0009329012181845059, "loss": 0.7104, "step": 7701 }, { "epoch": 0.5358099412153466, "grad_norm": 1.1171875, "learning_rate": 0.0009326763947167875, "loss": 0.9936, "step": 7702 }, { "epoch": 0.5358795088524818, "grad_norm": 1.375, "learning_rate": 0.0009324515746674221, "loss": 0.8168, "step": 7703 }, { "epoch": 0.535949076489617, "grad_norm": 0.98046875, "learning_rate": 0.0009322267580478255, "loss": 0.7997, "step": 7704 }, { "epoch": 0.5360186441267523, "grad_norm": 1.359375, "learning_rate": 0.0009320019448694121, "loss": 0.8153, "step": 7705 }, { "epoch": 0.5360882117638874, "grad_norm": 0.8828125, "learning_rate": 0.0009317771351435975, "loss": 1.037, "step": 7706 }, { "epoch": 0.5361577794010226, "grad_norm": 1.2265625, "learning_rate": 0.0009315523288817961, "loss": 0.7164, "step": 7707 }, { "epoch": 0.5362273470381579, "grad_norm": 1.2109375, "learning_rate": 0.0009313275260954221, "loss": 0.8784, "step": 7708 }, { "epoch": 0.5362969146752931, "grad_norm": 1.109375, "learning_rate": 0.0009311027267958908, "loss": 0.6885, "step": 7709 }, { "epoch": 0.5363664823124282, "grad_norm": 0.91015625, "learning_rate": 0.0009308779309946155, "loss": 0.5003, "step": 7710 }, { "epoch": 0.5364360499495635, "grad_norm": 1.015625, "learning_rate": 0.0009306531387030106, "loss": 0.8354, "step": 7711 }, { "epoch": 0.5365056175866987, "grad_norm": 1.1015625, "learning_rate": 0.0009304283499324892, "loss": 0.869, "step": 7712 }, { "epoch": 0.5365751852238339, "grad_norm": 0.98828125, "learning_rate": 0.0009302035646944661, "loss": 0.8773, "step": 7713 }, { "epoch": 0.5366447528609691, "grad_norm": 1.0859375, "learning_rate": 0.000929978783000354, "loss": 0.6557, "step": 7714 }, { "epoch": 0.5367143204981043, "grad_norm": 0.9140625, "learning_rate": 0.0009297540048615661, "loss": 0.6123, "step": 7715 }, { "epoch": 0.5367838881352395, "grad_norm": 1.03125, "learning_rate": 0.0009295292302895163, "loss": 0.9011, "step": 7716 }, { "epoch": 0.5368534557723746, "grad_norm": 1.1640625, "learning_rate": 0.0009293044592956167, "loss": 0.7213, "step": 7717 }, { "epoch": 0.5369230234095099, "grad_norm": 1.2109375, "learning_rate": 0.0009290796918912806, "loss": 0.7583, "step": 7718 }, { "epoch": 0.5369925910466451, "grad_norm": 1.1953125, "learning_rate": 0.0009288549280879196, "loss": 1.0099, "step": 7719 }, { "epoch": 0.5370621586837803, "grad_norm": 1.046875, "learning_rate": 0.0009286301678969474, "loss": 0.8666, "step": 7720 }, { "epoch": 0.5371317263209155, "grad_norm": 1.3046875, "learning_rate": 0.0009284054113297753, "loss": 0.6667, "step": 7721 }, { "epoch": 0.5372012939580507, "grad_norm": 1.1796875, "learning_rate": 0.0009281806583978155, "loss": 0.8413, "step": 7722 }, { "epoch": 0.5372708615951859, "grad_norm": 1.0546875, "learning_rate": 0.00092795590911248, "loss": 0.7475, "step": 7723 }, { "epoch": 0.5373404292323212, "grad_norm": 1.1484375, "learning_rate": 0.0009277311634851803, "loss": 1.0075, "step": 7724 }, { "epoch": 0.5374099968694563, "grad_norm": 1.125, "learning_rate": 0.0009275064215273278, "loss": 0.7693, "step": 7725 }, { "epoch": 0.5374795645065915, "grad_norm": 1.140625, "learning_rate": 0.0009272816832503335, "loss": 0.8396, "step": 7726 }, { "epoch": 0.5375491321437268, "grad_norm": 1.28125, "learning_rate": 0.0009270569486656095, "loss": 0.9725, "step": 7727 }, { "epoch": 0.537618699780862, "grad_norm": 1.3203125, "learning_rate": 0.0009268322177845656, "loss": 0.9665, "step": 7728 }, { "epoch": 0.5376882674179971, "grad_norm": 1.40625, "learning_rate": 0.0009266074906186125, "loss": 0.9539, "step": 7729 }, { "epoch": 0.5377578350551323, "grad_norm": 1.1953125, "learning_rate": 0.0009263827671791619, "loss": 0.9783, "step": 7730 }, { "epoch": 0.5378274026922676, "grad_norm": 1.171875, "learning_rate": 0.0009261580474776229, "loss": 0.9241, "step": 7731 }, { "epoch": 0.5378969703294028, "grad_norm": 1.1171875, "learning_rate": 0.0009259333315254062, "loss": 0.6221, "step": 7732 }, { "epoch": 0.5379665379665379, "grad_norm": 0.9765625, "learning_rate": 0.0009257086193339212, "loss": 0.9105, "step": 7733 }, { "epoch": 0.5380361056036732, "grad_norm": 1.1328125, "learning_rate": 0.0009254839109145785, "loss": 0.8122, "step": 7734 }, { "epoch": 0.5381056732408084, "grad_norm": 0.9765625, "learning_rate": 0.0009252592062787871, "loss": 0.9319, "step": 7735 }, { "epoch": 0.5381752408779436, "grad_norm": 1.015625, "learning_rate": 0.0009250345054379562, "loss": 0.6494, "step": 7736 }, { "epoch": 0.5382448085150788, "grad_norm": 1.2734375, "learning_rate": 0.0009248098084034957, "loss": 0.9644, "step": 7737 }, { "epoch": 0.538314376152214, "grad_norm": 1.078125, "learning_rate": 0.000924585115186814, "loss": 0.87, "step": 7738 }, { "epoch": 0.5383839437893492, "grad_norm": 1.078125, "learning_rate": 0.0009243604257993199, "loss": 0.8682, "step": 7739 }, { "epoch": 0.5384535114264845, "grad_norm": 1.1953125, "learning_rate": 0.0009241357402524219, "loss": 0.822, "step": 7740 }, { "epoch": 0.5385230790636196, "grad_norm": 1.3203125, "learning_rate": 0.0009239110585575292, "loss": 0.666, "step": 7741 }, { "epoch": 0.5385926467007548, "grad_norm": 0.859375, "learning_rate": 0.0009236863807260493, "loss": 0.8514, "step": 7742 }, { "epoch": 0.53866221433789, "grad_norm": 0.9375, "learning_rate": 0.0009234617067693899, "loss": 0.7054, "step": 7743 }, { "epoch": 0.5387317819750252, "grad_norm": 0.890625, "learning_rate": 0.0009232370366989596, "loss": 0.7784, "step": 7744 }, { "epoch": 0.5388013496121604, "grad_norm": 0.9296875, "learning_rate": 0.0009230123705261657, "loss": 0.8373, "step": 7745 }, { "epoch": 0.5388709172492956, "grad_norm": 1.390625, "learning_rate": 0.0009227877082624155, "loss": 0.9933, "step": 7746 }, { "epoch": 0.5389404848864309, "grad_norm": 1.1171875, "learning_rate": 0.0009225630499191161, "loss": 1.0417, "step": 7747 }, { "epoch": 0.539010052523566, "grad_norm": 0.9375, "learning_rate": 0.0009223383955076752, "loss": 0.7612, "step": 7748 }, { "epoch": 0.5390796201607012, "grad_norm": 1.2578125, "learning_rate": 0.0009221137450394987, "loss": 0.7888, "step": 7749 }, { "epoch": 0.5391491877978365, "grad_norm": 1.171875, "learning_rate": 0.0009218890985259935, "loss": 0.9712, "step": 7750 }, { "epoch": 0.5392187554349717, "grad_norm": 1.125, "learning_rate": 0.0009216644559785665, "loss": 0.9912, "step": 7751 }, { "epoch": 0.5392883230721068, "grad_norm": 0.92578125, "learning_rate": 0.0009214398174086238, "loss": 0.6542, "step": 7752 }, { "epoch": 0.5393578907092421, "grad_norm": 1.078125, "learning_rate": 0.0009212151828275709, "loss": 0.8197, "step": 7753 }, { "epoch": 0.5394274583463773, "grad_norm": 1.0703125, "learning_rate": 0.0009209905522468137, "loss": 0.9404, "step": 7754 }, { "epoch": 0.5394970259835125, "grad_norm": 0.87109375, "learning_rate": 0.0009207659256777586, "loss": 0.64, "step": 7755 }, { "epoch": 0.5395665936206476, "grad_norm": 1.09375, "learning_rate": 0.00092054130313181, "loss": 0.6587, "step": 7756 }, { "epoch": 0.5396361612577829, "grad_norm": 1.09375, "learning_rate": 0.0009203166846203739, "loss": 0.8482, "step": 7757 }, { "epoch": 0.5397057288949181, "grad_norm": 1.1640625, "learning_rate": 0.0009200920701548541, "loss": 0.8016, "step": 7758 }, { "epoch": 0.5397752965320533, "grad_norm": 0.94140625, "learning_rate": 0.000919867459746657, "loss": 0.7776, "step": 7759 }, { "epoch": 0.5398448641691885, "grad_norm": 1.5546875, "learning_rate": 0.0009196428534071861, "loss": 1.1175, "step": 7760 }, { "epoch": 0.5399144318063237, "grad_norm": 1.2265625, "learning_rate": 0.000919418251147846, "loss": 0.7893, "step": 7761 }, { "epoch": 0.5399839994434589, "grad_norm": 1.2109375, "learning_rate": 0.0009191936529800412, "loss": 0.6078, "step": 7762 }, { "epoch": 0.5400535670805942, "grad_norm": 1.5078125, "learning_rate": 0.0009189690589151752, "loss": 0.689, "step": 7763 }, { "epoch": 0.5401231347177293, "grad_norm": 0.88671875, "learning_rate": 0.0009187444689646521, "loss": 0.5132, "step": 7764 }, { "epoch": 0.5401927023548645, "grad_norm": 1.21875, "learning_rate": 0.000918519883139875, "loss": 0.778, "step": 7765 }, { "epoch": 0.5402622699919997, "grad_norm": 1.59375, "learning_rate": 0.000918295301452248, "loss": 1.479, "step": 7766 }, { "epoch": 0.540331837629135, "grad_norm": 1.171875, "learning_rate": 0.0009180707239131735, "loss": 0.9314, "step": 7767 }, { "epoch": 0.5404014052662701, "grad_norm": 1.046875, "learning_rate": 0.0009178461505340546, "loss": 0.8695, "step": 7768 }, { "epoch": 0.5404709729034053, "grad_norm": 1.28125, "learning_rate": 0.0009176215813262944, "loss": 0.8224, "step": 7769 }, { "epoch": 0.5405405405405406, "grad_norm": 1.25, "learning_rate": 0.0009173970163012949, "loss": 0.6287, "step": 7770 }, { "epoch": 0.5406101081776757, "grad_norm": 1.296875, "learning_rate": 0.0009171724554704586, "loss": 1.0836, "step": 7771 }, { "epoch": 0.5406796758148109, "grad_norm": 1.15625, "learning_rate": 0.0009169478988451873, "loss": 0.8674, "step": 7772 }, { "epoch": 0.5407492434519462, "grad_norm": 1.171875, "learning_rate": 0.0009167233464368835, "loss": 0.6791, "step": 7773 }, { "epoch": 0.5408188110890814, "grad_norm": 1.046875, "learning_rate": 0.0009164987982569481, "loss": 0.7213, "step": 7774 }, { "epoch": 0.5408883787262165, "grad_norm": 1.03125, "learning_rate": 0.0009162742543167828, "loss": 0.6782, "step": 7775 }, { "epoch": 0.5409579463633518, "grad_norm": 0.9296875, "learning_rate": 0.000916049714627789, "loss": 0.7157, "step": 7776 }, { "epoch": 0.541027514000487, "grad_norm": 0.97265625, "learning_rate": 0.0009158251792013677, "loss": 0.8124, "step": 7777 }, { "epoch": 0.5410970816376222, "grad_norm": 0.96875, "learning_rate": 0.0009156006480489196, "loss": 0.642, "step": 7778 }, { "epoch": 0.5411666492747573, "grad_norm": 1.0234375, "learning_rate": 0.0009153761211818447, "loss": 0.5892, "step": 7779 }, { "epoch": 0.5412362169118926, "grad_norm": 0.9765625, "learning_rate": 0.0009151515986115442, "loss": 0.6871, "step": 7780 }, { "epoch": 0.5413057845490278, "grad_norm": 1.359375, "learning_rate": 0.0009149270803494178, "loss": 0.9627, "step": 7781 }, { "epoch": 0.541375352186163, "grad_norm": 1.140625, "learning_rate": 0.0009147025664068652, "loss": 0.8162, "step": 7782 }, { "epoch": 0.5414449198232982, "grad_norm": 1.0, "learning_rate": 0.0009144780567952866, "loss": 0.6913, "step": 7783 }, { "epoch": 0.5415144874604334, "grad_norm": 1.265625, "learning_rate": 0.0009142535515260814, "loss": 0.9963, "step": 7784 }, { "epoch": 0.5415840550975686, "grad_norm": 1.3984375, "learning_rate": 0.0009140290506106485, "loss": 1.0329, "step": 7785 }, { "epoch": 0.5416536227347039, "grad_norm": 1.25, "learning_rate": 0.0009138045540603868, "loss": 0.8808, "step": 7786 }, { "epoch": 0.541723190371839, "grad_norm": 0.7578125, "learning_rate": 0.0009135800618866957, "loss": 0.6811, "step": 7787 }, { "epoch": 0.5417927580089742, "grad_norm": 1.046875, "learning_rate": 0.0009133555741009735, "loss": 0.8626, "step": 7788 }, { "epoch": 0.5418623256461095, "grad_norm": 1.0, "learning_rate": 0.0009131310907146181, "loss": 0.8456, "step": 7789 }, { "epoch": 0.5419318932832446, "grad_norm": 1.0, "learning_rate": 0.0009129066117390284, "loss": 0.7679, "step": 7790 }, { "epoch": 0.5420014609203798, "grad_norm": 0.9609375, "learning_rate": 0.0009126821371856021, "loss": 0.7074, "step": 7791 }, { "epoch": 0.542071028557515, "grad_norm": 1.046875, "learning_rate": 0.0009124576670657366, "loss": 0.8705, "step": 7792 }, { "epoch": 0.5421405961946503, "grad_norm": 1.265625, "learning_rate": 0.0009122332013908293, "loss": 0.8058, "step": 7793 }, { "epoch": 0.5422101638317854, "grad_norm": 0.96875, "learning_rate": 0.0009120087401722782, "loss": 0.7583, "step": 7794 }, { "epoch": 0.5422797314689206, "grad_norm": 1.2109375, "learning_rate": 0.0009117842834214793, "loss": 0.781, "step": 7795 }, { "epoch": 0.5423492991060559, "grad_norm": 1.265625, "learning_rate": 0.0009115598311498299, "loss": 0.8502, "step": 7796 }, { "epoch": 0.5424188667431911, "grad_norm": 1.1796875, "learning_rate": 0.0009113353833687266, "loss": 0.7159, "step": 7797 }, { "epoch": 0.5424884343803262, "grad_norm": 0.90625, "learning_rate": 0.0009111109400895659, "loss": 0.5067, "step": 7798 }, { "epoch": 0.5425580020174615, "grad_norm": 1.375, "learning_rate": 0.0009108865013237433, "loss": 0.7159, "step": 7799 }, { "epoch": 0.5426275696545967, "grad_norm": 0.85546875, "learning_rate": 0.0009106620670826548, "loss": 0.6929, "step": 7800 }, { "epoch": 0.5426971372917319, "grad_norm": 1.90625, "learning_rate": 0.0009104376373776967, "loss": 0.7217, "step": 7801 }, { "epoch": 0.5427667049288671, "grad_norm": 1.109375, "learning_rate": 0.0009102132122202638, "loss": 0.8091, "step": 7802 }, { "epoch": 0.5428362725660023, "grad_norm": 1.6171875, "learning_rate": 0.000909988791621751, "loss": 0.9184, "step": 7803 }, { "epoch": 0.5429058402031375, "grad_norm": 1.1015625, "learning_rate": 0.0009097643755935541, "loss": 0.9267, "step": 7804 }, { "epoch": 0.5429754078402727, "grad_norm": 1.1328125, "learning_rate": 0.0009095399641470675, "loss": 0.8922, "step": 7805 }, { "epoch": 0.5430449754774079, "grad_norm": 0.98828125, "learning_rate": 0.0009093155572936854, "loss": 0.886, "step": 7806 }, { "epoch": 0.5431145431145431, "grad_norm": 1.0390625, "learning_rate": 0.000909091155044802, "loss": 0.8428, "step": 7807 }, { "epoch": 0.5431841107516783, "grad_norm": 0.94140625, "learning_rate": 0.0009088667574118119, "loss": 0.8639, "step": 7808 }, { "epoch": 0.5432536783888136, "grad_norm": 1.1484375, "learning_rate": 0.0009086423644061083, "loss": 0.7444, "step": 7809 }, { "epoch": 0.5433232460259487, "grad_norm": 1.015625, "learning_rate": 0.0009084179760390849, "loss": 0.7876, "step": 7810 }, { "epoch": 0.5433928136630839, "grad_norm": 1.0078125, "learning_rate": 0.0009081935923221352, "loss": 0.4759, "step": 7811 }, { "epoch": 0.5434623813002192, "grad_norm": 1.1953125, "learning_rate": 0.0009079692132666523, "loss": 0.6489, "step": 7812 }, { "epoch": 0.5435319489373543, "grad_norm": 0.94140625, "learning_rate": 0.0009077448388840286, "loss": 0.6377, "step": 7813 }, { "epoch": 0.5436015165744895, "grad_norm": 1.1015625, "learning_rate": 0.0009075204691856569, "loss": 0.9348, "step": 7814 }, { "epoch": 0.5436710842116248, "grad_norm": 0.921875, "learning_rate": 0.0009072961041829299, "loss": 0.7295, "step": 7815 }, { "epoch": 0.54374065184876, "grad_norm": 0.93359375, "learning_rate": 0.0009070717438872395, "loss": 0.6259, "step": 7816 }, { "epoch": 0.5438102194858951, "grad_norm": 1.1875, "learning_rate": 0.0009068473883099773, "loss": 0.7253, "step": 7817 }, { "epoch": 0.5438797871230303, "grad_norm": 1.203125, "learning_rate": 0.0009066230374625353, "loss": 0.6697, "step": 7818 }, { "epoch": 0.5439493547601656, "grad_norm": 1.03125, "learning_rate": 0.000906398691356305, "loss": 0.6115, "step": 7819 }, { "epoch": 0.5440189223973008, "grad_norm": 1.2734375, "learning_rate": 0.0009061743500026773, "loss": 0.9162, "step": 7820 }, { "epoch": 0.5440884900344359, "grad_norm": 1.1484375, "learning_rate": 0.0009059500134130428, "loss": 1.1168, "step": 7821 }, { "epoch": 0.5441580576715712, "grad_norm": 1.296875, "learning_rate": 0.0009057256815987928, "loss": 0.9183, "step": 7822 }, { "epoch": 0.5442276253087064, "grad_norm": 1.1015625, "learning_rate": 0.0009055013545713179, "loss": 0.9001, "step": 7823 }, { "epoch": 0.5442971929458416, "grad_norm": 1.3828125, "learning_rate": 0.0009052770323420074, "loss": 0.9196, "step": 7824 }, { "epoch": 0.5443667605829768, "grad_norm": 1.0859375, "learning_rate": 0.000905052714922252, "loss": 0.8726, "step": 7825 }, { "epoch": 0.544436328220112, "grad_norm": 1.03125, "learning_rate": 0.0009048284023234413, "loss": 0.558, "step": 7826 }, { "epoch": 0.5445058958572472, "grad_norm": 1.1484375, "learning_rate": 0.0009046040945569644, "loss": 0.915, "step": 7827 }, { "epoch": 0.5445754634943825, "grad_norm": 1.28125, "learning_rate": 0.0009043797916342106, "loss": 0.9056, "step": 7828 }, { "epoch": 0.5446450311315176, "grad_norm": 1.2890625, "learning_rate": 0.0009041554935665691, "loss": 0.8979, "step": 7829 }, { "epoch": 0.5447145987686528, "grad_norm": 1.15625, "learning_rate": 0.000903931200365429, "loss": 0.8202, "step": 7830 }, { "epoch": 0.544784166405788, "grad_norm": 1.1796875, "learning_rate": 0.0009037069120421777, "loss": 0.8047, "step": 7831 }, { "epoch": 0.5448537340429233, "grad_norm": 1.0546875, "learning_rate": 0.0009034826286082043, "loss": 0.6387, "step": 7832 }, { "epoch": 0.5449233016800584, "grad_norm": 1.2890625, "learning_rate": 0.0009032583500748968, "loss": 0.8866, "step": 7833 }, { "epoch": 0.5449928693171936, "grad_norm": 1.171875, "learning_rate": 0.0009030340764536424, "loss": 0.995, "step": 7834 }, { "epoch": 0.5450624369543289, "grad_norm": 1.2421875, "learning_rate": 0.0009028098077558287, "loss": 0.76, "step": 7835 }, { "epoch": 0.545132004591464, "grad_norm": 1.1796875, "learning_rate": 0.0009025855439928433, "loss": 0.7401, "step": 7836 }, { "epoch": 0.5452015722285992, "grad_norm": 1.0625, "learning_rate": 0.0009023612851760731, "loss": 0.8346, "step": 7837 }, { "epoch": 0.5452711398657345, "grad_norm": 0.85546875, "learning_rate": 0.0009021370313169046, "loss": 0.596, "step": 7838 }, { "epoch": 0.5453407075028697, "grad_norm": 0.84765625, "learning_rate": 0.0009019127824267242, "loss": 0.6525, "step": 7839 }, { "epoch": 0.5454102751400048, "grad_norm": 1.046875, "learning_rate": 0.0009016885385169185, "loss": 1.0252, "step": 7840 }, { "epoch": 0.5454798427771401, "grad_norm": 1.234375, "learning_rate": 0.0009014642995988733, "loss": 0.801, "step": 7841 }, { "epoch": 0.5455494104142753, "grad_norm": 0.88671875, "learning_rate": 0.000901240065683974, "loss": 0.5813, "step": 7842 }, { "epoch": 0.5456189780514105, "grad_norm": 1.0703125, "learning_rate": 0.0009010158367836066, "loss": 0.8475, "step": 7843 }, { "epoch": 0.5456885456885456, "grad_norm": 1.2578125, "learning_rate": 0.0009007916129091563, "loss": 0.8293, "step": 7844 }, { "epoch": 0.5457581133256809, "grad_norm": 0.86328125, "learning_rate": 0.0009005673940720077, "loss": 0.765, "step": 7845 }, { "epoch": 0.5458276809628161, "grad_norm": 1.0546875, "learning_rate": 0.0009003431802835454, "loss": 0.6574, "step": 7846 }, { "epoch": 0.5458972485999513, "grad_norm": 1.28125, "learning_rate": 0.0009001189715551544, "loss": 1.1601, "step": 7847 }, { "epoch": 0.5459668162370865, "grad_norm": 1.2890625, "learning_rate": 0.0008998947678982187, "loss": 0.5843, "step": 7848 }, { "epoch": 0.5460363838742217, "grad_norm": 1.046875, "learning_rate": 0.0008996705693241216, "loss": 0.723, "step": 7849 }, { "epoch": 0.5461059515113569, "grad_norm": 1.3046875, "learning_rate": 0.0008994463758442476, "loss": 0.848, "step": 7850 }, { "epoch": 0.5461755191484922, "grad_norm": 1.359375, "learning_rate": 0.0008992221874699801, "loss": 1.0163, "step": 7851 }, { "epoch": 0.5462450867856273, "grad_norm": 1.09375, "learning_rate": 0.0008989980042127016, "loss": 0.6162, "step": 7852 }, { "epoch": 0.5463146544227625, "grad_norm": 1.234375, "learning_rate": 0.0008987738260837952, "loss": 0.8456, "step": 7853 }, { "epoch": 0.5463842220598978, "grad_norm": 1.203125, "learning_rate": 0.000898549653094644, "loss": 0.8359, "step": 7854 }, { "epoch": 0.546453789697033, "grad_norm": 1.2421875, "learning_rate": 0.0008983254852566303, "loss": 0.8284, "step": 7855 }, { "epoch": 0.5465233573341681, "grad_norm": 0.91015625, "learning_rate": 0.0008981013225811354, "loss": 0.6926, "step": 7856 }, { "epoch": 0.5465929249713033, "grad_norm": 1.4765625, "learning_rate": 0.000897877165079542, "loss": 0.8208, "step": 7857 }, { "epoch": 0.5466624926084386, "grad_norm": 1.015625, "learning_rate": 0.0008976530127632317, "loss": 0.7933, "step": 7858 }, { "epoch": 0.5467320602455737, "grad_norm": 1.046875, "learning_rate": 0.0008974288656435852, "loss": 0.7539, "step": 7859 }, { "epoch": 0.5468016278827089, "grad_norm": 1.2265625, "learning_rate": 0.0008972047237319838, "loss": 0.7873, "step": 7860 }, { "epoch": 0.5468711955198442, "grad_norm": 1.140625, "learning_rate": 0.0008969805870398086, "loss": 0.8811, "step": 7861 }, { "epoch": 0.5469407631569794, "grad_norm": 1.3359375, "learning_rate": 0.0008967564555784401, "loss": 0.9331, "step": 7862 }, { "epoch": 0.5470103307941145, "grad_norm": 1.234375, "learning_rate": 0.000896532329359258, "loss": 0.8192, "step": 7863 }, { "epoch": 0.5470798984312498, "grad_norm": 1.203125, "learning_rate": 0.0008963082083936429, "loss": 0.9081, "step": 7864 }, { "epoch": 0.547149466068385, "grad_norm": 1.375, "learning_rate": 0.0008960840926929745, "loss": 1.0103, "step": 7865 }, { "epoch": 0.5472190337055202, "grad_norm": 1.1328125, "learning_rate": 0.0008958599822686319, "loss": 0.8191, "step": 7866 }, { "epoch": 0.5472886013426554, "grad_norm": 1.1484375, "learning_rate": 0.0008956358771319943, "loss": 0.755, "step": 7867 }, { "epoch": 0.5473581689797906, "grad_norm": 1.0, "learning_rate": 0.0008954117772944412, "loss": 0.8025, "step": 7868 }, { "epoch": 0.5474277366169258, "grad_norm": 0.80078125, "learning_rate": 0.000895187682767351, "loss": 0.7683, "step": 7869 }, { "epoch": 0.547497304254061, "grad_norm": 1.2890625, "learning_rate": 0.0008949635935621014, "loss": 0.9424, "step": 7870 }, { "epoch": 0.5475668718911962, "grad_norm": 1.015625, "learning_rate": 0.0008947395096900715, "loss": 0.9196, "step": 7871 }, { "epoch": 0.5476364395283314, "grad_norm": 0.95703125, "learning_rate": 0.0008945154311626389, "loss": 0.7907, "step": 7872 }, { "epoch": 0.5477060071654666, "grad_norm": 1.234375, "learning_rate": 0.0008942913579911808, "loss": 0.8883, "step": 7873 }, { "epoch": 0.5477755748026019, "grad_norm": 1.15625, "learning_rate": 0.0008940672901870745, "loss": 0.7396, "step": 7874 }, { "epoch": 0.547845142439737, "grad_norm": 1.15625, "learning_rate": 0.0008938432277616975, "loss": 0.7292, "step": 7875 }, { "epoch": 0.5479147100768722, "grad_norm": 1.15625, "learning_rate": 0.0008936191707264265, "loss": 0.814, "step": 7876 }, { "epoch": 0.5479842777140075, "grad_norm": 1.0234375, "learning_rate": 0.0008933951190926374, "loss": 0.8076, "step": 7877 }, { "epoch": 0.5480538453511427, "grad_norm": 1.328125, "learning_rate": 0.000893171072871707, "loss": 0.8851, "step": 7878 }, { "epoch": 0.5481234129882778, "grad_norm": 1.0625, "learning_rate": 0.0008929470320750114, "loss": 1.0926, "step": 7879 }, { "epoch": 0.5481929806254131, "grad_norm": 1.515625, "learning_rate": 0.0008927229967139256, "loss": 1.0434, "step": 7880 }, { "epoch": 0.5482625482625483, "grad_norm": 1.2734375, "learning_rate": 0.0008924989667998251, "loss": 0.9311, "step": 7881 }, { "epoch": 0.5483321158996834, "grad_norm": 1.015625, "learning_rate": 0.0008922749423440854, "loss": 0.8518, "step": 7882 }, { "epoch": 0.5484016835368186, "grad_norm": 1.3828125, "learning_rate": 0.0008920509233580814, "loss": 0.8492, "step": 7883 }, { "epoch": 0.5484712511739539, "grad_norm": 1.03125, "learning_rate": 0.0008918269098531871, "loss": 0.8881, "step": 7884 }, { "epoch": 0.5485408188110891, "grad_norm": 1.1015625, "learning_rate": 0.0008916029018407772, "loss": 0.8287, "step": 7885 }, { "epoch": 0.5486103864482242, "grad_norm": 1.2265625, "learning_rate": 0.0008913788993322256, "loss": 0.7321, "step": 7886 }, { "epoch": 0.5486799540853595, "grad_norm": 1.1328125, "learning_rate": 0.0008911549023389063, "loss": 0.9308, "step": 7887 }, { "epoch": 0.5487495217224947, "grad_norm": 0.98828125, "learning_rate": 0.0008909309108721918, "loss": 0.6023, "step": 7888 }, { "epoch": 0.5488190893596299, "grad_norm": 1.46875, "learning_rate": 0.0008907069249434563, "loss": 0.9637, "step": 7889 }, { "epoch": 0.5488886569967651, "grad_norm": 1.0234375, "learning_rate": 0.0008904829445640724, "loss": 0.7305, "step": 7890 }, { "epoch": 0.5489582246339003, "grad_norm": 1.2421875, "learning_rate": 0.0008902589697454122, "loss": 0.9317, "step": 7891 }, { "epoch": 0.5490277922710355, "grad_norm": 0.875, "learning_rate": 0.0008900350004988484, "loss": 0.66, "step": 7892 }, { "epoch": 0.5490973599081708, "grad_norm": 1.359375, "learning_rate": 0.0008898110368357533, "loss": 0.9249, "step": 7893 }, { "epoch": 0.5491669275453059, "grad_norm": 0.73828125, "learning_rate": 0.0008895870787674984, "loss": 0.6496, "step": 7894 }, { "epoch": 0.5492364951824411, "grad_norm": 1.2578125, "learning_rate": 0.0008893631263054547, "loss": 0.9521, "step": 7895 }, { "epoch": 0.5493060628195763, "grad_norm": 1.171875, "learning_rate": 0.0008891391794609941, "loss": 0.9686, "step": 7896 }, { "epoch": 0.5493756304567116, "grad_norm": 0.9375, "learning_rate": 0.0008889152382454872, "loss": 0.7132, "step": 7897 }, { "epoch": 0.5494451980938467, "grad_norm": 1.25, "learning_rate": 0.0008886913026703042, "loss": 0.5568, "step": 7898 }, { "epoch": 0.5495147657309819, "grad_norm": 1.0390625, "learning_rate": 0.0008884673727468164, "loss": 0.8192, "step": 7899 }, { "epoch": 0.5495843333681172, "grad_norm": 1.203125, "learning_rate": 0.0008882434484863928, "loss": 0.7144, "step": 7900 }, { "epoch": 0.5496539010052524, "grad_norm": 1.296875, "learning_rate": 0.000888019529900404, "loss": 0.9487, "step": 7901 }, { "epoch": 0.5497234686423875, "grad_norm": 0.953125, "learning_rate": 0.0008877956170002186, "loss": 0.5608, "step": 7902 }, { "epoch": 0.5497930362795228, "grad_norm": 1.25, "learning_rate": 0.0008875717097972064, "loss": 0.7444, "step": 7903 }, { "epoch": 0.549862603916658, "grad_norm": 1.0703125, "learning_rate": 0.0008873478083027364, "loss": 0.5599, "step": 7904 }, { "epoch": 0.5499321715537931, "grad_norm": 0.921875, "learning_rate": 0.0008871239125281761, "loss": 0.7114, "step": 7905 }, { "epoch": 0.5500017391909284, "grad_norm": 1.1953125, "learning_rate": 0.0008869000224848954, "loss": 0.9034, "step": 7906 }, { "epoch": 0.5500713068280636, "grad_norm": 0.8828125, "learning_rate": 0.0008866761381842612, "loss": 0.7924, "step": 7907 }, { "epoch": 0.5501408744651988, "grad_norm": 1.1328125, "learning_rate": 0.0008864522596376416, "loss": 0.585, "step": 7908 }, { "epoch": 0.5502104421023339, "grad_norm": 1.3203125, "learning_rate": 0.0008862283868564038, "loss": 0.9106, "step": 7909 }, { "epoch": 0.5502800097394692, "grad_norm": 0.91796875, "learning_rate": 0.000886004519851915, "loss": 0.8078, "step": 7910 }, { "epoch": 0.5503495773766044, "grad_norm": 1.0625, "learning_rate": 0.0008857806586355423, "loss": 0.7807, "step": 7911 }, { "epoch": 0.5504191450137396, "grad_norm": 1.125, "learning_rate": 0.0008855568032186517, "loss": 0.6597, "step": 7912 }, { "epoch": 0.5504887126508748, "grad_norm": 1.3515625, "learning_rate": 0.0008853329536126102, "loss": 1.0829, "step": 7913 }, { "epoch": 0.55055828028801, "grad_norm": 1.3046875, "learning_rate": 0.0008851091098287831, "loss": 0.8936, "step": 7914 }, { "epoch": 0.5506278479251452, "grad_norm": 1.046875, "learning_rate": 0.0008848852718785366, "loss": 0.7169, "step": 7915 }, { "epoch": 0.5506974155622805, "grad_norm": 0.87890625, "learning_rate": 0.0008846614397732354, "loss": 0.6306, "step": 7916 }, { "epoch": 0.5507669831994156, "grad_norm": 1.1171875, "learning_rate": 0.0008844376135242451, "loss": 0.7502, "step": 7917 }, { "epoch": 0.5508365508365508, "grad_norm": 1.1953125, "learning_rate": 0.0008842137931429303, "loss": 0.8823, "step": 7918 }, { "epoch": 0.5509061184736861, "grad_norm": 1.2421875, "learning_rate": 0.0008839899786406558, "loss": 0.8811, "step": 7919 }, { "epoch": 0.5509756861108213, "grad_norm": 1.0234375, "learning_rate": 0.0008837661700287849, "loss": 0.9098, "step": 7920 }, { "epoch": 0.5510452537479564, "grad_norm": 1.390625, "learning_rate": 0.0008835423673186822, "loss": 0.805, "step": 7921 }, { "epoch": 0.5511148213850916, "grad_norm": 1.15625, "learning_rate": 0.0008833185705217114, "loss": 0.8195, "step": 7922 }, { "epoch": 0.5511843890222269, "grad_norm": 1.171875, "learning_rate": 0.000883094779649235, "loss": 0.9827, "step": 7923 }, { "epoch": 0.551253956659362, "grad_norm": 1.1171875, "learning_rate": 0.0008828709947126166, "loss": 0.648, "step": 7924 }, { "epoch": 0.5513235242964972, "grad_norm": 1.203125, "learning_rate": 0.0008826472157232188, "loss": 0.8791, "step": 7925 }, { "epoch": 0.5513930919336325, "grad_norm": 1.2265625, "learning_rate": 0.0008824234426924041, "loss": 0.9768, "step": 7926 }, { "epoch": 0.5514626595707677, "grad_norm": 1.15625, "learning_rate": 0.0008821996756315341, "loss": 0.6314, "step": 7927 }, { "epoch": 0.5515322272079028, "grad_norm": 1.0546875, "learning_rate": 0.0008819759145519707, "loss": 0.7367, "step": 7928 }, { "epoch": 0.5516017948450381, "grad_norm": 1.0234375, "learning_rate": 0.0008817521594650759, "loss": 0.8871, "step": 7929 }, { "epoch": 0.5516713624821733, "grad_norm": 1.3828125, "learning_rate": 0.0008815284103822097, "loss": 0.9596, "step": 7930 }, { "epoch": 0.5517409301193085, "grad_norm": 1.5234375, "learning_rate": 0.0008813046673147344, "loss": 0.9696, "step": 7931 }, { "epoch": 0.5518104977564438, "grad_norm": 0.9921875, "learning_rate": 0.0008810809302740095, "loss": 0.9666, "step": 7932 }, { "epoch": 0.5518800653935789, "grad_norm": 0.94140625, "learning_rate": 0.0008808571992713958, "loss": 0.8094, "step": 7933 }, { "epoch": 0.5519496330307141, "grad_norm": 1.203125, "learning_rate": 0.0008806334743182526, "loss": 0.8582, "step": 7934 }, { "epoch": 0.5520192006678493, "grad_norm": 1.109375, "learning_rate": 0.0008804097554259402, "loss": 1.0506, "step": 7935 }, { "epoch": 0.5520887683049845, "grad_norm": 1.1640625, "learning_rate": 0.0008801860426058177, "loss": 0.9523, "step": 7936 }, { "epoch": 0.5521583359421197, "grad_norm": 1.2890625, "learning_rate": 0.0008799623358692434, "loss": 1.1695, "step": 7937 }, { "epoch": 0.5522279035792549, "grad_norm": 1.25, "learning_rate": 0.0008797386352275775, "loss": 0.9633, "step": 7938 }, { "epoch": 0.5522974712163902, "grad_norm": 1.140625, "learning_rate": 0.0008795149406921772, "loss": 0.7446, "step": 7939 }, { "epoch": 0.5523670388535253, "grad_norm": 1.0625, "learning_rate": 0.0008792912522744011, "loss": 0.9321, "step": 7940 }, { "epoch": 0.5524366064906605, "grad_norm": 1.4921875, "learning_rate": 0.0008790675699856064, "loss": 0.8671, "step": 7941 }, { "epoch": 0.5525061741277958, "grad_norm": 1.0625, "learning_rate": 0.0008788438938371512, "loss": 0.7517, "step": 7942 }, { "epoch": 0.552575741764931, "grad_norm": 0.93359375, "learning_rate": 0.0008786202238403926, "loss": 0.6515, "step": 7943 }, { "epoch": 0.5526453094020661, "grad_norm": 1.0546875, "learning_rate": 0.0008783965600066866, "loss": 0.7545, "step": 7944 }, { "epoch": 0.5527148770392014, "grad_norm": 1.125, "learning_rate": 0.000878172902347391, "loss": 0.7001, "step": 7945 }, { "epoch": 0.5527844446763366, "grad_norm": 1.109375, "learning_rate": 0.0008779492508738611, "loss": 0.9313, "step": 7946 }, { "epoch": 0.5528540123134718, "grad_norm": 1.125, "learning_rate": 0.0008777256055974533, "loss": 0.82, "step": 7947 }, { "epoch": 0.5529235799506069, "grad_norm": 1.1171875, "learning_rate": 0.0008775019665295225, "loss": 0.8616, "step": 7948 }, { "epoch": 0.5529931475877422, "grad_norm": 1.171875, "learning_rate": 0.0008772783336814246, "loss": 0.8757, "step": 7949 }, { "epoch": 0.5530627152248774, "grad_norm": 1.1015625, "learning_rate": 0.0008770547070645145, "loss": 0.9859, "step": 7950 }, { "epoch": 0.5531322828620125, "grad_norm": 0.8671875, "learning_rate": 0.0008768310866901463, "loss": 0.7146, "step": 7951 }, { "epoch": 0.5532018504991478, "grad_norm": 0.875, "learning_rate": 0.0008766074725696752, "loss": 0.7746, "step": 7952 }, { "epoch": 0.553271418136283, "grad_norm": 1.2578125, "learning_rate": 0.0008763838647144544, "loss": 0.822, "step": 7953 }, { "epoch": 0.5533409857734182, "grad_norm": 1.046875, "learning_rate": 0.0008761602631358382, "loss": 0.692, "step": 7954 }, { "epoch": 0.5534105534105535, "grad_norm": 1.015625, "learning_rate": 0.0008759366678451792, "loss": 0.6741, "step": 7955 }, { "epoch": 0.5534801210476886, "grad_norm": 1.0234375, "learning_rate": 0.0008757130788538311, "loss": 0.6768, "step": 7956 }, { "epoch": 0.5535496886848238, "grad_norm": 0.9609375, "learning_rate": 0.0008754894961731463, "loss": 0.63, "step": 7957 }, { "epoch": 0.5536192563219591, "grad_norm": 0.9296875, "learning_rate": 0.0008752659198144773, "loss": 0.7849, "step": 7958 }, { "epoch": 0.5536888239590942, "grad_norm": 1.0078125, "learning_rate": 0.0008750423497891764, "loss": 0.8418, "step": 7959 }, { "epoch": 0.5537583915962294, "grad_norm": 0.91015625, "learning_rate": 0.000874818786108595, "loss": 0.6265, "step": 7960 }, { "epoch": 0.5538279592333646, "grad_norm": 1.1875, "learning_rate": 0.0008745952287840849, "loss": 0.9863, "step": 7961 }, { "epoch": 0.5538975268704999, "grad_norm": 1.1875, "learning_rate": 0.0008743716778269966, "loss": 0.7077, "step": 7962 }, { "epoch": 0.553967094507635, "grad_norm": 1.046875, "learning_rate": 0.0008741481332486813, "loss": 0.9039, "step": 7963 }, { "epoch": 0.5540366621447702, "grad_norm": 0.93359375, "learning_rate": 0.0008739245950604897, "loss": 0.6563, "step": 7964 }, { "epoch": 0.5541062297819055, "grad_norm": 1.1015625, "learning_rate": 0.0008737010632737714, "loss": 0.9993, "step": 7965 }, { "epoch": 0.5541757974190407, "grad_norm": 1.0234375, "learning_rate": 0.0008734775378998771, "loss": 0.6714, "step": 7966 }, { "epoch": 0.5542453650561758, "grad_norm": 1.4296875, "learning_rate": 0.0008732540189501552, "loss": 1.1504, "step": 7967 }, { "epoch": 0.5543149326933111, "grad_norm": 1.046875, "learning_rate": 0.0008730305064359558, "loss": 0.7834, "step": 7968 }, { "epoch": 0.5543845003304463, "grad_norm": 1.0390625, "learning_rate": 0.0008728070003686266, "loss": 0.7097, "step": 7969 }, { "epoch": 0.5544540679675815, "grad_norm": 1.0546875, "learning_rate": 0.0008725835007595174, "loss": 0.7942, "step": 7970 }, { "epoch": 0.5545236356047167, "grad_norm": 1.3203125, "learning_rate": 0.0008723600076199757, "loss": 0.8154, "step": 7971 }, { "epoch": 0.5545932032418519, "grad_norm": 1.0390625, "learning_rate": 0.0008721365209613491, "loss": 0.8025, "step": 7972 }, { "epoch": 0.5546627708789871, "grad_norm": 0.984375, "learning_rate": 0.000871913040794986, "loss": 0.6833, "step": 7973 }, { "epoch": 0.5547323385161222, "grad_norm": 1.1484375, "learning_rate": 0.0008716895671322329, "loss": 0.9714, "step": 7974 }, { "epoch": 0.5548019061532575, "grad_norm": 1.328125, "learning_rate": 0.0008714660999844371, "loss": 1.101, "step": 7975 }, { "epoch": 0.5548714737903927, "grad_norm": 1.03125, "learning_rate": 0.0008712426393629441, "loss": 0.8226, "step": 7976 }, { "epoch": 0.5549410414275279, "grad_norm": 1.1015625, "learning_rate": 0.0008710191852791016, "loss": 0.8727, "step": 7977 }, { "epoch": 0.5550106090646632, "grad_norm": 1.359375, "learning_rate": 0.0008707957377442546, "loss": 0.8996, "step": 7978 }, { "epoch": 0.5550801767017983, "grad_norm": 0.96875, "learning_rate": 0.0008705722967697484, "loss": 0.8693, "step": 7979 }, { "epoch": 0.5551497443389335, "grad_norm": 1.1875, "learning_rate": 0.0008703488623669293, "loss": 0.768, "step": 7980 }, { "epoch": 0.5552193119760688, "grad_norm": 1.0859375, "learning_rate": 0.0008701254345471411, "loss": 0.9451, "step": 7981 }, { "epoch": 0.555288879613204, "grad_norm": 1.2265625, "learning_rate": 0.000869902013321729, "loss": 0.6398, "step": 7982 }, { "epoch": 0.5553584472503391, "grad_norm": 1.171875, "learning_rate": 0.0008696785987020362, "loss": 0.932, "step": 7983 }, { "epoch": 0.5554280148874744, "grad_norm": 0.97265625, "learning_rate": 0.0008694551906994081, "loss": 0.7843, "step": 7984 }, { "epoch": 0.5554975825246096, "grad_norm": 1.21875, "learning_rate": 0.000869231789325187, "loss": 0.8724, "step": 7985 }, { "epoch": 0.5555671501617447, "grad_norm": 0.97265625, "learning_rate": 0.0008690083945907163, "loss": 0.8195, "step": 7986 }, { "epoch": 0.5556367177988799, "grad_norm": 1.6484375, "learning_rate": 0.0008687850065073398, "loss": 0.9577, "step": 7987 }, { "epoch": 0.5557062854360152, "grad_norm": 0.82421875, "learning_rate": 0.0008685616250863988, "loss": 0.5295, "step": 7988 }, { "epoch": 0.5557758530731504, "grad_norm": 1.09375, "learning_rate": 0.0008683382503392361, "loss": 0.613, "step": 7989 }, { "epoch": 0.5558454207102855, "grad_norm": 1.09375, "learning_rate": 0.0008681148822771932, "loss": 0.8027, "step": 7990 }, { "epoch": 0.5559149883474208, "grad_norm": 1.1015625, "learning_rate": 0.0008678915209116121, "loss": 0.7651, "step": 7991 }, { "epoch": 0.555984555984556, "grad_norm": 0.95703125, "learning_rate": 0.0008676681662538335, "loss": 0.9081, "step": 7992 }, { "epoch": 0.5560541236216912, "grad_norm": 1.1171875, "learning_rate": 0.0008674448183151988, "loss": 0.9049, "step": 7993 }, { "epoch": 0.5561236912588264, "grad_norm": 0.90234375, "learning_rate": 0.0008672214771070477, "loss": 0.6433, "step": 7994 }, { "epoch": 0.5561932588959616, "grad_norm": 0.984375, "learning_rate": 0.0008669981426407208, "loss": 0.6013, "step": 7995 }, { "epoch": 0.5562628265330968, "grad_norm": 1.125, "learning_rate": 0.0008667748149275578, "loss": 0.7064, "step": 7996 }, { "epoch": 0.5563323941702321, "grad_norm": 0.9921875, "learning_rate": 0.0008665514939788981, "loss": 0.799, "step": 7997 }, { "epoch": 0.5564019618073672, "grad_norm": 1.375, "learning_rate": 0.0008663281798060814, "loss": 0.9057, "step": 7998 }, { "epoch": 0.5564715294445024, "grad_norm": 1.0703125, "learning_rate": 0.0008661048724204457, "loss": 0.8069, "step": 7999 }, { "epoch": 0.5565410970816376, "grad_norm": 1.0390625, "learning_rate": 0.0008658815718333298, "loss": 0.9088, "step": 8000 }, { "epoch": 0.5566106647187729, "grad_norm": 1.0, "learning_rate": 0.0008656582780560712, "loss": 0.7117, "step": 8001 }, { "epoch": 0.556680232355908, "grad_norm": 0.953125, "learning_rate": 0.0008654349911000086, "loss": 0.6986, "step": 8002 }, { "epoch": 0.5567497999930432, "grad_norm": 0.9296875, "learning_rate": 0.0008652117109764787, "loss": 0.7315, "step": 8003 }, { "epoch": 0.5568193676301785, "grad_norm": 0.91015625, "learning_rate": 0.0008649884376968186, "loss": 0.6983, "step": 8004 }, { "epoch": 0.5568889352673136, "grad_norm": 1.0078125, "learning_rate": 0.0008647651712723654, "loss": 0.5391, "step": 8005 }, { "epoch": 0.5569585029044488, "grad_norm": 1.4453125, "learning_rate": 0.000864541911714455, "loss": 0.9969, "step": 8006 }, { "epoch": 0.5570280705415841, "grad_norm": 1.0546875, "learning_rate": 0.0008643186590344239, "loss": 0.6933, "step": 8007 }, { "epoch": 0.5570976381787193, "grad_norm": 1.453125, "learning_rate": 0.0008640954132436067, "loss": 0.7267, "step": 8008 }, { "epoch": 0.5571672058158544, "grad_norm": 1.0625, "learning_rate": 0.0008638721743533402, "loss": 0.7014, "step": 8009 }, { "epoch": 0.5572367734529897, "grad_norm": 0.9296875, "learning_rate": 0.0008636489423749581, "loss": 0.8283, "step": 8010 }, { "epoch": 0.5573063410901249, "grad_norm": 1.1328125, "learning_rate": 0.0008634257173197954, "loss": 1.0623, "step": 8011 }, { "epoch": 0.5573759087272601, "grad_norm": 1.3671875, "learning_rate": 0.0008632024991991867, "loss": 0.8609, "step": 8012 }, { "epoch": 0.5574454763643952, "grad_norm": 0.98828125, "learning_rate": 0.0008629792880244653, "loss": 0.8745, "step": 8013 }, { "epoch": 0.5575150440015305, "grad_norm": 1.28125, "learning_rate": 0.0008627560838069655, "loss": 0.7606, "step": 8014 }, { "epoch": 0.5575846116386657, "grad_norm": 0.91796875, "learning_rate": 0.0008625328865580191, "loss": 0.7748, "step": 8015 }, { "epoch": 0.5576541792758009, "grad_norm": 1.125, "learning_rate": 0.0008623096962889606, "loss": 0.9064, "step": 8016 }, { "epoch": 0.5577237469129361, "grad_norm": 1.0390625, "learning_rate": 0.0008620865130111215, "loss": 0.8291, "step": 8017 }, { "epoch": 0.5577933145500713, "grad_norm": 1.6953125, "learning_rate": 0.0008618633367358339, "loss": 1.0929, "step": 8018 }, { "epoch": 0.5578628821872065, "grad_norm": 1.125, "learning_rate": 0.0008616401674744303, "loss": 0.7828, "step": 8019 }, { "epoch": 0.5579324498243418, "grad_norm": 1.15625, "learning_rate": 0.0008614170052382413, "loss": 0.9277, "step": 8020 }, { "epoch": 0.5580020174614769, "grad_norm": 1.2578125, "learning_rate": 0.0008611938500385983, "loss": 0.9666, "step": 8021 }, { "epoch": 0.5580715850986121, "grad_norm": 0.9140625, "learning_rate": 0.0008609707018868317, "loss": 0.4326, "step": 8022 }, { "epoch": 0.5581411527357474, "grad_norm": 1.015625, "learning_rate": 0.0008607475607942725, "loss": 0.5866, "step": 8023 }, { "epoch": 0.5582107203728826, "grad_norm": 1.640625, "learning_rate": 0.0008605244267722502, "loss": 0.9919, "step": 8024 }, { "epoch": 0.5582802880100177, "grad_norm": 0.96484375, "learning_rate": 0.0008603012998320941, "loss": 0.8603, "step": 8025 }, { "epoch": 0.5583498556471529, "grad_norm": 1.125, "learning_rate": 0.0008600781799851344, "loss": 0.8032, "step": 8026 }, { "epoch": 0.5584194232842882, "grad_norm": 1.1875, "learning_rate": 0.0008598550672426993, "loss": 1.0323, "step": 8027 }, { "epoch": 0.5584889909214233, "grad_norm": 0.94140625, "learning_rate": 0.0008596319616161175, "loss": 0.6604, "step": 8028 }, { "epoch": 0.5585585585585585, "grad_norm": 1.1015625, "learning_rate": 0.0008594088631167169, "loss": 0.8073, "step": 8029 }, { "epoch": 0.5586281261956938, "grad_norm": 1.15625, "learning_rate": 0.0008591857717558261, "loss": 0.8021, "step": 8030 }, { "epoch": 0.558697693832829, "grad_norm": 1.0078125, "learning_rate": 0.0008589626875447717, "loss": 0.8974, "step": 8031 }, { "epoch": 0.5587672614699641, "grad_norm": 1.0, "learning_rate": 0.0008587396104948811, "loss": 0.8931, "step": 8032 }, { "epoch": 0.5588368291070994, "grad_norm": 1.3515625, "learning_rate": 0.0008585165406174813, "loss": 0.894, "step": 8033 }, { "epoch": 0.5589063967442346, "grad_norm": 1.203125, "learning_rate": 0.0008582934779238985, "loss": 0.6545, "step": 8034 }, { "epoch": 0.5589759643813698, "grad_norm": 1.0546875, "learning_rate": 0.0008580704224254583, "loss": 0.7657, "step": 8035 }, { "epoch": 0.559045532018505, "grad_norm": 1.1328125, "learning_rate": 0.0008578473741334867, "loss": 0.7614, "step": 8036 }, { "epoch": 0.5591150996556402, "grad_norm": 1.015625, "learning_rate": 0.0008576243330593093, "loss": 0.8003, "step": 8037 }, { "epoch": 0.5591846672927754, "grad_norm": 1.046875, "learning_rate": 0.0008574012992142504, "loss": 1.0983, "step": 8038 }, { "epoch": 0.5592542349299106, "grad_norm": 1.21875, "learning_rate": 0.0008571782726096346, "loss": 1.0023, "step": 8039 }, { "epoch": 0.5593238025670458, "grad_norm": 1.125, "learning_rate": 0.0008569552532567865, "loss": 0.8936, "step": 8040 }, { "epoch": 0.559393370204181, "grad_norm": 1.109375, "learning_rate": 0.0008567322411670297, "loss": 0.7366, "step": 8041 }, { "epoch": 0.5594629378413162, "grad_norm": 1.046875, "learning_rate": 0.0008565092363516876, "loss": 0.7664, "step": 8042 }, { "epoch": 0.5595325054784515, "grad_norm": 1.1640625, "learning_rate": 0.0008562862388220828, "loss": 0.7319, "step": 8043 }, { "epoch": 0.5596020731155866, "grad_norm": 1.1328125, "learning_rate": 0.000856063248589539, "loss": 0.8718, "step": 8044 }, { "epoch": 0.5596716407527218, "grad_norm": 1.3671875, "learning_rate": 0.0008558402656653777, "loss": 0.8213, "step": 8045 }, { "epoch": 0.5597412083898571, "grad_norm": 1.53125, "learning_rate": 0.0008556172900609207, "loss": 0.7882, "step": 8046 }, { "epoch": 0.5598107760269923, "grad_norm": 0.90234375, "learning_rate": 0.0008553943217874903, "loss": 0.768, "step": 8047 }, { "epoch": 0.5598803436641274, "grad_norm": 1.2578125, "learning_rate": 0.0008551713608564075, "loss": 1.0058, "step": 8048 }, { "epoch": 0.5599499113012627, "grad_norm": 1.0, "learning_rate": 0.000854948407278993, "loss": 0.8091, "step": 8049 }, { "epoch": 0.5600194789383979, "grad_norm": 1.125, "learning_rate": 0.000854725461066567, "loss": 0.9949, "step": 8050 }, { "epoch": 0.560089046575533, "grad_norm": 1.046875, "learning_rate": 0.0008545025222304501, "loss": 0.7258, "step": 8051 }, { "epoch": 0.5601586142126682, "grad_norm": 0.97265625, "learning_rate": 0.0008542795907819618, "loss": 0.6841, "step": 8052 }, { "epoch": 0.5602281818498035, "grad_norm": 1.125, "learning_rate": 0.000854056666732421, "loss": 0.6304, "step": 8053 }, { "epoch": 0.5602977494869387, "grad_norm": 0.81640625, "learning_rate": 0.0008538337500931472, "loss": 0.6101, "step": 8054 }, { "epoch": 0.5603673171240738, "grad_norm": 1.34375, "learning_rate": 0.0008536108408754593, "loss": 1.0405, "step": 8055 }, { "epoch": 0.5604368847612091, "grad_norm": 1.3359375, "learning_rate": 0.0008533879390906747, "loss": 0.9695, "step": 8056 }, { "epoch": 0.5605064523983443, "grad_norm": 1.3984375, "learning_rate": 0.0008531650447501114, "loss": 1.005, "step": 8057 }, { "epoch": 0.5605760200354795, "grad_norm": 1.1328125, "learning_rate": 0.0008529421578650873, "loss": 0.8173, "step": 8058 }, { "epoch": 0.5606455876726147, "grad_norm": 1.25, "learning_rate": 0.0008527192784469191, "loss": 0.9021, "step": 8059 }, { "epoch": 0.5607151553097499, "grad_norm": 1.2265625, "learning_rate": 0.0008524964065069234, "loss": 0.756, "step": 8060 }, { "epoch": 0.5607847229468851, "grad_norm": 0.92578125, "learning_rate": 0.0008522735420564169, "loss": 0.8285, "step": 8061 }, { "epoch": 0.5608542905840204, "grad_norm": 1.1796875, "learning_rate": 0.0008520506851067154, "loss": 0.9753, "step": 8062 }, { "epoch": 0.5609238582211555, "grad_norm": 0.99609375, "learning_rate": 0.0008518278356691344, "loss": 0.8214, "step": 8063 }, { "epoch": 0.5609934258582907, "grad_norm": 1.59375, "learning_rate": 0.0008516049937549888, "loss": 0.8445, "step": 8064 }, { "epoch": 0.5610629934954259, "grad_norm": 1.1796875, "learning_rate": 0.0008513821593755939, "loss": 1.078, "step": 8065 }, { "epoch": 0.5611325611325612, "grad_norm": 1.109375, "learning_rate": 0.0008511593325422639, "loss": 0.7691, "step": 8066 }, { "epoch": 0.5612021287696963, "grad_norm": 1.0234375, "learning_rate": 0.0008509365132663124, "loss": 1.0448, "step": 8067 }, { "epoch": 0.5612716964068315, "grad_norm": 0.91796875, "learning_rate": 0.0008507137015590537, "loss": 0.5083, "step": 8068 }, { "epoch": 0.5613412640439668, "grad_norm": 0.98828125, "learning_rate": 0.0008504908974318009, "loss": 0.884, "step": 8069 }, { "epoch": 0.561410831681102, "grad_norm": 1.21875, "learning_rate": 0.0008502681008958667, "loss": 0.9151, "step": 8070 }, { "epoch": 0.5614803993182371, "grad_norm": 0.9609375, "learning_rate": 0.0008500453119625633, "loss": 0.807, "step": 8071 }, { "epoch": 0.5615499669553724, "grad_norm": 1.234375, "learning_rate": 0.0008498225306432034, "loss": 0.8662, "step": 8072 }, { "epoch": 0.5616195345925076, "grad_norm": 1.0, "learning_rate": 0.0008495997569490986, "loss": 0.872, "step": 8073 }, { "epoch": 0.5616891022296427, "grad_norm": 1.0234375, "learning_rate": 0.0008493769908915599, "loss": 0.8244, "step": 8074 }, { "epoch": 0.561758669866778, "grad_norm": 1.140625, "learning_rate": 0.0008491542324818982, "loss": 0.9894, "step": 8075 }, { "epoch": 0.5618282375039132, "grad_norm": 1.0390625, "learning_rate": 0.0008489314817314246, "loss": 0.8056, "step": 8076 }, { "epoch": 0.5618978051410484, "grad_norm": 1.15625, "learning_rate": 0.0008487087386514488, "loss": 0.7569, "step": 8077 }, { "epoch": 0.5619673727781835, "grad_norm": 1.1953125, "learning_rate": 0.0008484860032532804, "loss": 0.9542, "step": 8078 }, { "epoch": 0.5620369404153188, "grad_norm": 1.0078125, "learning_rate": 0.0008482632755482293, "loss": 0.8073, "step": 8079 }, { "epoch": 0.562106508052454, "grad_norm": 1.140625, "learning_rate": 0.0008480405555476045, "loss": 0.8613, "step": 8080 }, { "epoch": 0.5621760756895892, "grad_norm": 1.296875, "learning_rate": 0.0008478178432627142, "loss": 0.9116, "step": 8081 }, { "epoch": 0.5622456433267244, "grad_norm": 1.3125, "learning_rate": 0.0008475951387048664, "loss": 0.7866, "step": 8082 }, { "epoch": 0.5623152109638596, "grad_norm": 0.93359375, "learning_rate": 0.0008473724418853698, "loss": 0.8013, "step": 8083 }, { "epoch": 0.5623847786009948, "grad_norm": 1.1171875, "learning_rate": 0.0008471497528155311, "loss": 0.8295, "step": 8084 }, { "epoch": 0.5624543462381301, "grad_norm": 1.109375, "learning_rate": 0.0008469270715066573, "loss": 0.9016, "step": 8085 }, { "epoch": 0.5625239138752652, "grad_norm": 0.96484375, "learning_rate": 0.0008467043979700554, "loss": 0.6481, "step": 8086 }, { "epoch": 0.5625934815124004, "grad_norm": 1.0859375, "learning_rate": 0.0008464817322170319, "loss": 0.8493, "step": 8087 }, { "epoch": 0.5626630491495357, "grad_norm": 1.328125, "learning_rate": 0.0008462590742588918, "loss": 0.7085, "step": 8088 }, { "epoch": 0.5627326167866709, "grad_norm": 1.125, "learning_rate": 0.000846036424106941, "loss": 0.8964, "step": 8089 }, { "epoch": 0.562802184423806, "grad_norm": 1.28125, "learning_rate": 0.0008458137817724848, "loss": 0.9103, "step": 8090 }, { "epoch": 0.5628717520609412, "grad_norm": 1.1796875, "learning_rate": 0.0008455911472668276, "loss": 0.8857, "step": 8091 }, { "epoch": 0.5629413196980765, "grad_norm": 1.2578125, "learning_rate": 0.0008453685206012732, "loss": 0.8084, "step": 8092 }, { "epoch": 0.5630108873352117, "grad_norm": 1.078125, "learning_rate": 0.0008451459017871263, "loss": 0.7816, "step": 8093 }, { "epoch": 0.5630804549723468, "grad_norm": 0.890625, "learning_rate": 0.0008449232908356901, "loss": 0.6622, "step": 8094 }, { "epoch": 0.5631500226094821, "grad_norm": 1.046875, "learning_rate": 0.0008447006877582674, "loss": 0.8106, "step": 8095 }, { "epoch": 0.5632195902466173, "grad_norm": 1.25, "learning_rate": 0.0008444780925661609, "loss": 0.9192, "step": 8096 }, { "epoch": 0.5632891578837524, "grad_norm": 1.1640625, "learning_rate": 0.0008442555052706732, "loss": 0.8015, "step": 8097 }, { "epoch": 0.5633587255208877, "grad_norm": 1.1796875, "learning_rate": 0.0008440329258831057, "loss": 0.557, "step": 8098 }, { "epoch": 0.5634282931580229, "grad_norm": 1.0703125, "learning_rate": 0.0008438103544147601, "loss": 0.8574, "step": 8099 }, { "epoch": 0.5634978607951581, "grad_norm": 1.171875, "learning_rate": 0.0008435877908769375, "loss": 0.6947, "step": 8100 }, { "epoch": 0.5635674284322933, "grad_norm": 1.4609375, "learning_rate": 0.0008433652352809388, "loss": 1.0825, "step": 8101 }, { "epoch": 0.5636369960694285, "grad_norm": 1.2421875, "learning_rate": 0.0008431426876380636, "loss": 0.9564, "step": 8102 }, { "epoch": 0.5637065637065637, "grad_norm": 1.5078125, "learning_rate": 0.000842920147959612, "loss": 1.0568, "step": 8103 }, { "epoch": 0.5637761313436989, "grad_norm": 1.140625, "learning_rate": 0.0008426976162568837, "loss": 0.7129, "step": 8104 }, { "epoch": 0.5638456989808341, "grad_norm": 1.1328125, "learning_rate": 0.0008424750925411779, "loss": 1.0349, "step": 8105 }, { "epoch": 0.5639152666179693, "grad_norm": 1.296875, "learning_rate": 0.0008422525768237925, "loss": 0.6118, "step": 8106 }, { "epoch": 0.5639848342551045, "grad_norm": 0.93359375, "learning_rate": 0.0008420300691160263, "loss": 0.7699, "step": 8107 }, { "epoch": 0.5640544018922398, "grad_norm": 1.171875, "learning_rate": 0.0008418075694291772, "loss": 1.049, "step": 8108 }, { "epoch": 0.5641239695293749, "grad_norm": 1.375, "learning_rate": 0.0008415850777745421, "loss": 0.7753, "step": 8109 }, { "epoch": 0.5641935371665101, "grad_norm": 1.453125, "learning_rate": 0.0008413625941634181, "loss": 0.7612, "step": 8110 }, { "epoch": 0.5642631048036454, "grad_norm": 1.21875, "learning_rate": 0.0008411401186071022, "loss": 0.7208, "step": 8111 }, { "epoch": 0.5643326724407806, "grad_norm": 0.921875, "learning_rate": 0.0008409176511168906, "loss": 0.76, "step": 8112 }, { "epoch": 0.5644022400779157, "grad_norm": 0.9765625, "learning_rate": 0.0008406951917040784, "loss": 0.6868, "step": 8113 }, { "epoch": 0.564471807715051, "grad_norm": 0.98828125, "learning_rate": 0.0008404727403799614, "loss": 0.8213, "step": 8114 }, { "epoch": 0.5645413753521862, "grad_norm": 0.9921875, "learning_rate": 0.0008402502971558352, "loss": 0.7647, "step": 8115 }, { "epoch": 0.5646109429893214, "grad_norm": 1.3125, "learning_rate": 0.0008400278620429932, "loss": 0.7873, "step": 8116 }, { "epoch": 0.5646805106264565, "grad_norm": 1.2421875, "learning_rate": 0.0008398054350527298, "loss": 0.9852, "step": 8117 }, { "epoch": 0.5647500782635918, "grad_norm": 1.0625, "learning_rate": 0.0008395830161963394, "loss": 0.7469, "step": 8118 }, { "epoch": 0.564819645900727, "grad_norm": 1.2890625, "learning_rate": 0.000839360605485115, "loss": 1.0719, "step": 8119 }, { "epoch": 0.5648892135378621, "grad_norm": 0.953125, "learning_rate": 0.000839138202930349, "loss": 0.7244, "step": 8120 }, { "epoch": 0.5649587811749974, "grad_norm": 1.28125, "learning_rate": 0.0008389158085433343, "loss": 1.0414, "step": 8121 }, { "epoch": 0.5650283488121326, "grad_norm": 1.171875, "learning_rate": 0.0008386934223353632, "loss": 0.9417, "step": 8122 }, { "epoch": 0.5650979164492678, "grad_norm": 1.2421875, "learning_rate": 0.0008384710443177269, "loss": 0.8437, "step": 8123 }, { "epoch": 0.565167484086403, "grad_norm": 1.1328125, "learning_rate": 0.0008382486745017166, "loss": 0.8869, "step": 8124 }, { "epoch": 0.5652370517235382, "grad_norm": 1.2109375, "learning_rate": 0.0008380263128986235, "loss": 1.0772, "step": 8125 }, { "epoch": 0.5653066193606734, "grad_norm": 1.1328125, "learning_rate": 0.000837803959519738, "loss": 0.5437, "step": 8126 }, { "epoch": 0.5653761869978087, "grad_norm": 0.93359375, "learning_rate": 0.0008375816143763495, "loss": 0.6332, "step": 8127 }, { "epoch": 0.5654457546349438, "grad_norm": 1.1796875, "learning_rate": 0.0008373592774797482, "loss": 0.8213, "step": 8128 }, { "epoch": 0.565515322272079, "grad_norm": 1.3046875, "learning_rate": 0.0008371369488412233, "loss": 0.8402, "step": 8129 }, { "epoch": 0.5655848899092142, "grad_norm": 0.81640625, "learning_rate": 0.000836914628472063, "loss": 0.6205, "step": 8130 }, { "epoch": 0.5656544575463495, "grad_norm": 1.5625, "learning_rate": 0.0008366923163835556, "loss": 0.9671, "step": 8131 }, { "epoch": 0.5657240251834846, "grad_norm": 1.0234375, "learning_rate": 0.0008364700125869895, "loss": 0.5901, "step": 8132 }, { "epoch": 0.5657935928206198, "grad_norm": 1.0703125, "learning_rate": 0.000836247717093652, "loss": 0.9082, "step": 8133 }, { "epoch": 0.5658631604577551, "grad_norm": 1.421875, "learning_rate": 0.0008360254299148298, "loss": 0.98, "step": 8134 }, { "epoch": 0.5659327280948903, "grad_norm": 1.25, "learning_rate": 0.0008358031510618099, "loss": 0.8122, "step": 8135 }, { "epoch": 0.5660022957320254, "grad_norm": 1.0234375, "learning_rate": 0.0008355808805458786, "loss": 0.7831, "step": 8136 }, { "epoch": 0.5660718633691607, "grad_norm": 0.9609375, "learning_rate": 0.0008353586183783212, "loss": 0.8124, "step": 8137 }, { "epoch": 0.5661414310062959, "grad_norm": 1.0, "learning_rate": 0.0008351363645704231, "loss": 0.7187, "step": 8138 }, { "epoch": 0.566210998643431, "grad_norm": 1.890625, "learning_rate": 0.0008349141191334697, "loss": 0.5892, "step": 8139 }, { "epoch": 0.5662805662805663, "grad_norm": 1.09375, "learning_rate": 0.0008346918820787455, "loss": 0.7405, "step": 8140 }, { "epoch": 0.5663501339177015, "grad_norm": 1.5546875, "learning_rate": 0.0008344696534175337, "loss": 0.7435, "step": 8141 }, { "epoch": 0.5664197015548367, "grad_norm": 0.98828125, "learning_rate": 0.0008342474331611189, "loss": 0.726, "step": 8142 }, { "epoch": 0.5664892691919718, "grad_norm": 1.0078125, "learning_rate": 0.0008340252213207839, "loss": 0.6987, "step": 8143 }, { "epoch": 0.5665588368291071, "grad_norm": 0.734375, "learning_rate": 0.000833803017907812, "loss": 0.6203, "step": 8144 }, { "epoch": 0.5666284044662423, "grad_norm": 0.92578125, "learning_rate": 0.0008335808229334846, "loss": 0.7581, "step": 8145 }, { "epoch": 0.5666979721033775, "grad_norm": 1.1171875, "learning_rate": 0.0008333586364090844, "loss": 0.9403, "step": 8146 }, { "epoch": 0.5667675397405127, "grad_norm": 1.015625, "learning_rate": 0.0008331364583458929, "loss": 0.7753, "step": 8147 }, { "epoch": 0.5668371073776479, "grad_norm": 1.0, "learning_rate": 0.0008329142887551908, "loss": 0.9515, "step": 8148 }, { "epoch": 0.5669066750147831, "grad_norm": 1.28125, "learning_rate": 0.0008326921276482588, "loss": 1.0615, "step": 8149 }, { "epoch": 0.5669762426519184, "grad_norm": 1.1328125, "learning_rate": 0.0008324699750363774, "loss": 0.702, "step": 8150 }, { "epoch": 0.5670458102890535, "grad_norm": 1.609375, "learning_rate": 0.0008322478309308266, "loss": 0.8821, "step": 8151 }, { "epoch": 0.5671153779261887, "grad_norm": 1.2109375, "learning_rate": 0.0008320256953428849, "loss": 0.7465, "step": 8152 }, { "epoch": 0.567184945563324, "grad_norm": 1.015625, "learning_rate": 0.0008318035682838319, "loss": 0.9299, "step": 8153 }, { "epoch": 0.5672545132004592, "grad_norm": 1.21875, "learning_rate": 0.0008315814497649461, "loss": 0.8122, "step": 8154 }, { "epoch": 0.5673240808375943, "grad_norm": 1.0625, "learning_rate": 0.0008313593397975052, "loss": 0.9976, "step": 8155 }, { "epoch": 0.5673936484747295, "grad_norm": 1.0625, "learning_rate": 0.0008311372383927869, "loss": 0.743, "step": 8156 }, { "epoch": 0.5674632161118648, "grad_norm": 1.328125, "learning_rate": 0.0008309151455620687, "loss": 0.9827, "step": 8157 }, { "epoch": 0.567532783749, "grad_norm": 1.03125, "learning_rate": 0.0008306930613166272, "loss": 0.9007, "step": 8158 }, { "epoch": 0.5676023513861351, "grad_norm": 0.97265625, "learning_rate": 0.0008304709856677384, "loss": 0.952, "step": 8159 }, { "epoch": 0.5676719190232704, "grad_norm": 0.87890625, "learning_rate": 0.0008302489186266788, "loss": 0.575, "step": 8160 }, { "epoch": 0.5677414866604056, "grad_norm": 1.1640625, "learning_rate": 0.0008300268602047235, "loss": 0.7995, "step": 8161 }, { "epoch": 0.5678110542975408, "grad_norm": 0.98828125, "learning_rate": 0.0008298048104131474, "loss": 0.5987, "step": 8162 }, { "epoch": 0.567880621934676, "grad_norm": 0.8671875, "learning_rate": 0.0008295827692632249, "loss": 0.6065, "step": 8163 }, { "epoch": 0.5679501895718112, "grad_norm": 0.84375, "learning_rate": 0.0008293607367662306, "loss": 0.7019, "step": 8164 }, { "epoch": 0.5680197572089464, "grad_norm": 0.921875, "learning_rate": 0.0008291387129334383, "loss": 0.4507, "step": 8165 }, { "epoch": 0.5680893248460817, "grad_norm": 1.1484375, "learning_rate": 0.0008289166977761205, "loss": 0.8238, "step": 8166 }, { "epoch": 0.5681588924832168, "grad_norm": 1.0625, "learning_rate": 0.0008286946913055506, "loss": 0.7219, "step": 8167 }, { "epoch": 0.568228460120352, "grad_norm": 1.7265625, "learning_rate": 0.0008284726935330011, "loss": 0.7893, "step": 8168 }, { "epoch": 0.5682980277574872, "grad_norm": 1.0703125, "learning_rate": 0.0008282507044697436, "loss": 0.5606, "step": 8169 }, { "epoch": 0.5683675953946224, "grad_norm": 1.1640625, "learning_rate": 0.0008280287241270492, "loss": 0.9674, "step": 8170 }, { "epoch": 0.5684371630317576, "grad_norm": 1.1875, "learning_rate": 0.0008278067525161897, "loss": 0.8512, "step": 8171 }, { "epoch": 0.5685067306688928, "grad_norm": 1.125, "learning_rate": 0.0008275847896484356, "loss": 0.8297, "step": 8172 }, { "epoch": 0.5685762983060281, "grad_norm": 1.265625, "learning_rate": 0.0008273628355350564, "loss": 0.8624, "step": 8173 }, { "epoch": 0.5686458659431632, "grad_norm": 1.2109375, "learning_rate": 0.0008271408901873225, "loss": 0.7673, "step": 8174 }, { "epoch": 0.5687154335802984, "grad_norm": 0.984375, "learning_rate": 0.000826918953616503, "loss": 0.7336, "step": 8175 }, { "epoch": 0.5687850012174337, "grad_norm": 1.109375, "learning_rate": 0.0008266970258338668, "loss": 0.6666, "step": 8176 }, { "epoch": 0.5688545688545689, "grad_norm": 1.3125, "learning_rate": 0.0008264751068506816, "loss": 0.9051, "step": 8177 }, { "epoch": 0.568924136491704, "grad_norm": 1.0, "learning_rate": 0.0008262531966782161, "loss": 0.6647, "step": 8178 }, { "epoch": 0.5689937041288393, "grad_norm": 1.0625, "learning_rate": 0.0008260312953277378, "loss": 0.7353, "step": 8179 }, { "epoch": 0.5690632717659745, "grad_norm": 1.0703125, "learning_rate": 0.000825809402810513, "loss": 0.9339, "step": 8180 }, { "epoch": 0.5691328394031097, "grad_norm": 1.1953125, "learning_rate": 0.0008255875191378089, "loss": 0.7516, "step": 8181 }, { "epoch": 0.5692024070402448, "grad_norm": 1.0703125, "learning_rate": 0.0008253656443208915, "loss": 0.8792, "step": 8182 }, { "epoch": 0.5692719746773801, "grad_norm": 1.4140625, "learning_rate": 0.0008251437783710267, "loss": 0.9131, "step": 8183 }, { "epoch": 0.5693415423145153, "grad_norm": 1.3046875, "learning_rate": 0.000824921921299479, "loss": 0.8456, "step": 8184 }, { "epoch": 0.5694111099516505, "grad_norm": 0.859375, "learning_rate": 0.0008247000731175139, "loss": 0.668, "step": 8185 }, { "epoch": 0.5694806775887857, "grad_norm": 1.2890625, "learning_rate": 0.0008244782338363959, "loss": 1.0285, "step": 8186 }, { "epoch": 0.5695502452259209, "grad_norm": 1.25, "learning_rate": 0.0008242564034673879, "loss": 0.876, "step": 8187 }, { "epoch": 0.5696198128630561, "grad_norm": 0.81640625, "learning_rate": 0.0008240345820217541, "loss": 0.5813, "step": 8188 }, { "epoch": 0.5696893805001914, "grad_norm": 1.3203125, "learning_rate": 0.0008238127695107574, "loss": 0.7834, "step": 8189 }, { "epoch": 0.5697589481373265, "grad_norm": 1.0625, "learning_rate": 0.0008235909659456604, "loss": 0.8144, "step": 8190 }, { "epoch": 0.5698285157744617, "grad_norm": 0.9453125, "learning_rate": 0.0008233691713377245, "loss": 0.6035, "step": 8191 }, { "epoch": 0.569898083411597, "grad_norm": 1.296875, "learning_rate": 0.0008231473856982121, "loss": 1.0587, "step": 8192 }, { "epoch": 0.5699676510487321, "grad_norm": 1.125, "learning_rate": 0.0008229256090383841, "loss": 0.7681, "step": 8193 }, { "epoch": 0.5700372186858673, "grad_norm": 1.390625, "learning_rate": 0.0008227038413695007, "loss": 0.9053, "step": 8194 }, { "epoch": 0.5701067863230025, "grad_norm": 1.21875, "learning_rate": 0.0008224820827028231, "loss": 0.9224, "step": 8195 }, { "epoch": 0.5701763539601378, "grad_norm": 1.1015625, "learning_rate": 0.0008222603330496105, "loss": 0.7537, "step": 8196 }, { "epoch": 0.5702459215972729, "grad_norm": 1.0703125, "learning_rate": 0.0008220385924211224, "loss": 0.6989, "step": 8197 }, { "epoch": 0.5703154892344081, "grad_norm": 1.2578125, "learning_rate": 0.0008218168608286172, "loss": 0.7965, "step": 8198 }, { "epoch": 0.5703850568715434, "grad_norm": 1.34375, "learning_rate": 0.000821595138283354, "loss": 0.7993, "step": 8199 }, { "epoch": 0.5704546245086786, "grad_norm": 1.03125, "learning_rate": 0.0008213734247965905, "loss": 0.8572, "step": 8200 }, { "epoch": 0.5705241921458137, "grad_norm": 1.1640625, "learning_rate": 0.0008211517203795837, "loss": 0.7204, "step": 8201 }, { "epoch": 0.570593759782949, "grad_norm": 0.87890625, "learning_rate": 0.0008209300250435915, "loss": 0.7224, "step": 8202 }, { "epoch": 0.5706633274200842, "grad_norm": 0.98046875, "learning_rate": 0.00082070833879987, "loss": 0.7471, "step": 8203 }, { "epoch": 0.5707328950572194, "grad_norm": 1.3125, "learning_rate": 0.0008204866616596754, "loss": 0.9107, "step": 8204 }, { "epoch": 0.5708024626943546, "grad_norm": 0.93359375, "learning_rate": 0.0008202649936342631, "loss": 0.8132, "step": 8205 }, { "epoch": 0.5708720303314898, "grad_norm": 1.0859375, "learning_rate": 0.0008200433347348886, "loss": 0.8231, "step": 8206 }, { "epoch": 0.570941597968625, "grad_norm": 0.8984375, "learning_rate": 0.0008198216849728068, "loss": 0.7626, "step": 8207 }, { "epoch": 0.5710111656057602, "grad_norm": 1.1171875, "learning_rate": 0.0008196000443592708, "loss": 0.8453, "step": 8208 }, { "epoch": 0.5710807332428954, "grad_norm": 1.1640625, "learning_rate": 0.0008193784129055362, "loss": 0.9886, "step": 8209 }, { "epoch": 0.5711503008800306, "grad_norm": 1.0, "learning_rate": 0.000819156790622855, "loss": 0.7544, "step": 8210 }, { "epoch": 0.5712198685171658, "grad_norm": 1.21875, "learning_rate": 0.0008189351775224807, "loss": 1.0287, "step": 8211 }, { "epoch": 0.5712894361543011, "grad_norm": 1.234375, "learning_rate": 0.000818713573615665, "loss": 0.9659, "step": 8212 }, { "epoch": 0.5713590037914362, "grad_norm": 0.9296875, "learning_rate": 0.0008184919789136606, "loss": 0.7124, "step": 8213 }, { "epoch": 0.5714285714285714, "grad_norm": 1.0625, "learning_rate": 0.0008182703934277184, "loss": 0.831, "step": 8214 }, { "epoch": 0.5714981390657067, "grad_norm": 1.1171875, "learning_rate": 0.0008180488171690896, "loss": 0.7193, "step": 8215 }, { "epoch": 0.5715677067028418, "grad_norm": 1.2265625, "learning_rate": 0.0008178272501490252, "loss": 0.838, "step": 8216 }, { "epoch": 0.571637274339977, "grad_norm": 1.5078125, "learning_rate": 0.0008176056923787747, "loss": 0.7169, "step": 8217 }, { "epoch": 0.5717068419771123, "grad_norm": 0.90625, "learning_rate": 0.0008173841438695879, "loss": 0.7327, "step": 8218 }, { "epoch": 0.5717764096142475, "grad_norm": 1.0625, "learning_rate": 0.0008171626046327134, "loss": 0.7963, "step": 8219 }, { "epoch": 0.5718459772513826, "grad_norm": 1.109375, "learning_rate": 0.0008169410746794005, "loss": 0.9955, "step": 8220 }, { "epoch": 0.5719155448885178, "grad_norm": 1.171875, "learning_rate": 0.000816719554020897, "loss": 0.8668, "step": 8221 }, { "epoch": 0.5719851125256531, "grad_norm": 1.09375, "learning_rate": 0.0008164980426684507, "loss": 1.036, "step": 8222 }, { "epoch": 0.5720546801627883, "grad_norm": 1.0078125, "learning_rate": 0.0008162765406333093, "loss": 0.9154, "step": 8223 }, { "epoch": 0.5721242477999234, "grad_norm": 1.078125, "learning_rate": 0.0008160550479267188, "loss": 0.6411, "step": 8224 }, { "epoch": 0.5721938154370587, "grad_norm": 1.078125, "learning_rate": 0.0008158335645599262, "loss": 0.7755, "step": 8225 }, { "epoch": 0.5722633830741939, "grad_norm": 1.125, "learning_rate": 0.0008156120905441762, "loss": 0.941, "step": 8226 }, { "epoch": 0.5723329507113291, "grad_norm": 0.95703125, "learning_rate": 0.0008153906258907155, "loss": 0.8045, "step": 8227 }, { "epoch": 0.5724025183484643, "grad_norm": 1.09375, "learning_rate": 0.000815169170610788, "loss": 0.7237, "step": 8228 }, { "epoch": 0.5724720859855995, "grad_norm": 1.09375, "learning_rate": 0.0008149477247156387, "loss": 0.8402, "step": 8229 }, { "epoch": 0.5725416536227347, "grad_norm": 0.97265625, "learning_rate": 0.0008147262882165109, "loss": 0.7556, "step": 8230 }, { "epoch": 0.57261122125987, "grad_norm": 0.984375, "learning_rate": 0.0008145048611246484, "loss": 0.8533, "step": 8231 }, { "epoch": 0.5726807888970051, "grad_norm": 1.03125, "learning_rate": 0.0008142834434512943, "loss": 0.7094, "step": 8232 }, { "epoch": 0.5727503565341403, "grad_norm": 1.0, "learning_rate": 0.0008140620352076903, "loss": 0.6124, "step": 8233 }, { "epoch": 0.5728199241712755, "grad_norm": 1.3984375, "learning_rate": 0.0008138406364050796, "loss": 0.805, "step": 8234 }, { "epoch": 0.5728894918084108, "grad_norm": 0.96484375, "learning_rate": 0.0008136192470547027, "loss": 0.6793, "step": 8235 }, { "epoch": 0.5729590594455459, "grad_norm": 1.25, "learning_rate": 0.0008133978671678013, "loss": 1.1407, "step": 8236 }, { "epoch": 0.5730286270826811, "grad_norm": 0.94921875, "learning_rate": 0.0008131764967556154, "loss": 0.6932, "step": 8237 }, { "epoch": 0.5730981947198164, "grad_norm": 1.1484375, "learning_rate": 0.0008129551358293853, "loss": 0.6433, "step": 8238 }, { "epoch": 0.5731677623569515, "grad_norm": 1.09375, "learning_rate": 0.0008127337844003509, "loss": 0.9065, "step": 8239 }, { "epoch": 0.5732373299940867, "grad_norm": 1.40625, "learning_rate": 0.0008125124424797506, "loss": 0.8267, "step": 8240 }, { "epoch": 0.573306897631222, "grad_norm": 1.3515625, "learning_rate": 0.0008122911100788238, "loss": 0.9824, "step": 8241 }, { "epoch": 0.5733764652683572, "grad_norm": 1.1953125, "learning_rate": 0.0008120697872088083, "loss": 0.729, "step": 8242 }, { "epoch": 0.5734460329054923, "grad_norm": 1.171875, "learning_rate": 0.000811848473880942, "loss": 0.7309, "step": 8243 }, { "epoch": 0.5735156005426276, "grad_norm": 1.0703125, "learning_rate": 0.0008116271701064612, "loss": 0.8049, "step": 8244 }, { "epoch": 0.5735851681797628, "grad_norm": 1.2421875, "learning_rate": 0.0008114058758966037, "loss": 1.1414, "step": 8245 }, { "epoch": 0.573654735816898, "grad_norm": 1.4296875, "learning_rate": 0.000811184591262605, "loss": 0.9099, "step": 8246 }, { "epoch": 0.5737243034540331, "grad_norm": 1.2578125, "learning_rate": 0.000810963316215701, "loss": 0.9187, "step": 8247 }, { "epoch": 0.5737938710911684, "grad_norm": 0.86328125, "learning_rate": 0.0008107420507671275, "loss": 0.707, "step": 8248 }, { "epoch": 0.5738634387283036, "grad_norm": 1.4296875, "learning_rate": 0.0008105207949281184, "loss": 0.9479, "step": 8249 }, { "epoch": 0.5739330063654388, "grad_norm": 1.125, "learning_rate": 0.0008102995487099085, "loss": 0.6448, "step": 8250 }, { "epoch": 0.574002574002574, "grad_norm": 1.859375, "learning_rate": 0.0008100783121237308, "loss": 0.7715, "step": 8251 }, { "epoch": 0.5740721416397092, "grad_norm": 1.4453125, "learning_rate": 0.0008098570851808194, "loss": 1.0675, "step": 8252 }, { "epoch": 0.5741417092768444, "grad_norm": 0.87890625, "learning_rate": 0.000809635867892407, "loss": 0.5275, "step": 8253 }, { "epoch": 0.5742112769139797, "grad_norm": 1.1328125, "learning_rate": 0.0008094146602697254, "loss": 0.8431, "step": 8254 }, { "epoch": 0.5742808445511148, "grad_norm": 0.8984375, "learning_rate": 0.0008091934623240071, "loss": 0.5103, "step": 8255 }, { "epoch": 0.57435041218825, "grad_norm": 1.09375, "learning_rate": 0.000808972274066483, "loss": 0.8274, "step": 8256 }, { "epoch": 0.5744199798253853, "grad_norm": 1.078125, "learning_rate": 0.0008087510955083841, "loss": 0.7134, "step": 8257 }, { "epoch": 0.5744895474625205, "grad_norm": 1.1015625, "learning_rate": 0.00080852992666094, "loss": 0.9269, "step": 8258 }, { "epoch": 0.5745591150996556, "grad_norm": 0.9921875, "learning_rate": 0.0008083087675353816, "loss": 0.7844, "step": 8259 }, { "epoch": 0.5746286827367908, "grad_norm": 0.765625, "learning_rate": 0.0008080876181429377, "loss": 0.4759, "step": 8260 }, { "epoch": 0.5746982503739261, "grad_norm": 0.8984375, "learning_rate": 0.000807866478494837, "loss": 0.9786, "step": 8261 }, { "epoch": 0.5747678180110612, "grad_norm": 1.1953125, "learning_rate": 0.0008076453486023087, "loss": 0.7311, "step": 8262 }, { "epoch": 0.5748373856481964, "grad_norm": 1.0859375, "learning_rate": 0.0008074242284765796, "loss": 0.6925, "step": 8263 }, { "epoch": 0.5749069532853317, "grad_norm": 0.828125, "learning_rate": 0.0008072031181288779, "loss": 0.7691, "step": 8264 }, { "epoch": 0.5749765209224669, "grad_norm": 1.265625, "learning_rate": 0.0008069820175704293, "loss": 0.8411, "step": 8265 }, { "epoch": 0.575046088559602, "grad_norm": 0.98828125, "learning_rate": 0.0008067609268124617, "loss": 0.5944, "step": 8266 }, { "epoch": 0.5751156561967373, "grad_norm": 1.171875, "learning_rate": 0.0008065398458662001, "loss": 1.019, "step": 8267 }, { "epoch": 0.5751852238338725, "grad_norm": 1.2109375, "learning_rate": 0.0008063187747428698, "loss": 0.8377, "step": 8268 }, { "epoch": 0.5752547914710077, "grad_norm": 1.0703125, "learning_rate": 0.0008060977134536961, "loss": 0.6803, "step": 8269 }, { "epoch": 0.575324359108143, "grad_norm": 1.0078125, "learning_rate": 0.0008058766620099031, "loss": 0.7064, "step": 8270 }, { "epoch": 0.5753939267452781, "grad_norm": 1.1328125, "learning_rate": 0.000805655620422715, "loss": 0.9263, "step": 8271 }, { "epoch": 0.5754634943824133, "grad_norm": 1.390625, "learning_rate": 0.0008054345887033542, "loss": 0.8418, "step": 8272 }, { "epoch": 0.5755330620195485, "grad_norm": 1.078125, "learning_rate": 0.000805213566863045, "loss": 0.7247, "step": 8273 }, { "epoch": 0.5756026296566837, "grad_norm": 1.0859375, "learning_rate": 0.0008049925549130089, "loss": 0.8379, "step": 8274 }, { "epoch": 0.5756721972938189, "grad_norm": 1.09375, "learning_rate": 0.0008047715528644677, "loss": 0.8124, "step": 8275 }, { "epoch": 0.5757417649309541, "grad_norm": 1.0859375, "learning_rate": 0.0008045505607286434, "loss": 0.7437, "step": 8276 }, { "epoch": 0.5758113325680894, "grad_norm": 1.0703125, "learning_rate": 0.0008043295785167563, "loss": 0.7772, "step": 8277 }, { "epoch": 0.5758809002052245, "grad_norm": 0.84375, "learning_rate": 0.000804108606240027, "loss": 0.5885, "step": 8278 }, { "epoch": 0.5759504678423597, "grad_norm": 1.0625, "learning_rate": 0.000803887643909675, "loss": 0.7219, "step": 8279 }, { "epoch": 0.576020035479495, "grad_norm": 1.03125, "learning_rate": 0.0008036666915369205, "loss": 0.6723, "step": 8280 }, { "epoch": 0.5760896031166302, "grad_norm": 1.03125, "learning_rate": 0.0008034457491329816, "loss": 0.5599, "step": 8281 }, { "epoch": 0.5761591707537653, "grad_norm": 1.140625, "learning_rate": 0.0008032248167090765, "loss": 1.0868, "step": 8282 }, { "epoch": 0.5762287383909006, "grad_norm": 1.078125, "learning_rate": 0.0008030038942764239, "loss": 0.9291, "step": 8283 }, { "epoch": 0.5762983060280358, "grad_norm": 1.46875, "learning_rate": 0.0008027829818462405, "loss": 0.8722, "step": 8284 }, { "epoch": 0.576367873665171, "grad_norm": 1.140625, "learning_rate": 0.0008025620794297431, "loss": 0.8945, "step": 8285 }, { "epoch": 0.5764374413023061, "grad_norm": 1.3125, "learning_rate": 0.000802341187038148, "loss": 0.783, "step": 8286 }, { "epoch": 0.5765070089394414, "grad_norm": 1.15625, "learning_rate": 0.0008021203046826716, "loss": 1.0315, "step": 8287 }, { "epoch": 0.5765765765765766, "grad_norm": 1.078125, "learning_rate": 0.0008018994323745284, "loss": 0.8479, "step": 8288 }, { "epoch": 0.5766461442137117, "grad_norm": 1.046875, "learning_rate": 0.0008016785701249334, "loss": 0.8675, "step": 8289 }, { "epoch": 0.576715711850847, "grad_norm": 1.234375, "learning_rate": 0.0008014577179451015, "loss": 0.718, "step": 8290 }, { "epoch": 0.5767852794879822, "grad_norm": 1.046875, "learning_rate": 0.0008012368758462456, "loss": 0.8693, "step": 8291 }, { "epoch": 0.5768548471251174, "grad_norm": 0.99609375, "learning_rate": 0.0008010160438395794, "loss": 0.7722, "step": 8292 }, { "epoch": 0.5769244147622526, "grad_norm": 1.03125, "learning_rate": 0.0008007952219363152, "loss": 0.6732, "step": 8293 }, { "epoch": 0.5769939823993878, "grad_norm": 1.2265625, "learning_rate": 0.0008005744101476661, "loss": 0.6754, "step": 8294 }, { "epoch": 0.577063550036523, "grad_norm": 1.25, "learning_rate": 0.0008003536084848431, "loss": 0.8421, "step": 8295 }, { "epoch": 0.5771331176736583, "grad_norm": 1.234375, "learning_rate": 0.0008001328169590571, "loss": 0.9898, "step": 8296 }, { "epoch": 0.5772026853107934, "grad_norm": 1.109375, "learning_rate": 0.0007999120355815197, "loss": 0.9763, "step": 8297 }, { "epoch": 0.5772722529479286, "grad_norm": 1.171875, "learning_rate": 0.0007996912643634409, "loss": 0.7011, "step": 8298 }, { "epoch": 0.5773418205850638, "grad_norm": 1.3046875, "learning_rate": 0.0007994705033160296, "loss": 0.9451, "step": 8299 }, { "epoch": 0.5774113882221991, "grad_norm": 1.21875, "learning_rate": 0.0007992497524504954, "loss": 1.1057, "step": 8300 }, { "epoch": 0.5774809558593342, "grad_norm": 1.1796875, "learning_rate": 0.0007990290117780472, "loss": 0.9615, "step": 8301 }, { "epoch": 0.5775505234964694, "grad_norm": 1.3984375, "learning_rate": 0.0007988082813098927, "loss": 1.1864, "step": 8302 }, { "epoch": 0.5776200911336047, "grad_norm": 0.828125, "learning_rate": 0.0007985875610572393, "loss": 0.6633, "step": 8303 }, { "epoch": 0.5776896587707399, "grad_norm": 0.92578125, "learning_rate": 0.0007983668510312947, "loss": 0.543, "step": 8304 }, { "epoch": 0.577759226407875, "grad_norm": 0.9609375, "learning_rate": 0.0007981461512432652, "loss": 0.4702, "step": 8305 }, { "epoch": 0.5778287940450103, "grad_norm": 1.2578125, "learning_rate": 0.0007979254617043565, "loss": 1.0051, "step": 8306 }, { "epoch": 0.5778983616821455, "grad_norm": 1.1640625, "learning_rate": 0.0007977047824257741, "loss": 0.9822, "step": 8307 }, { "epoch": 0.5779679293192806, "grad_norm": 1.0390625, "learning_rate": 0.0007974841134187236, "loss": 0.8998, "step": 8308 }, { "epoch": 0.5780374969564159, "grad_norm": 1.109375, "learning_rate": 0.000797263454694409, "loss": 0.7201, "step": 8309 }, { "epoch": 0.5781070645935511, "grad_norm": 1.0390625, "learning_rate": 0.0007970428062640345, "loss": 0.6195, "step": 8310 }, { "epoch": 0.5781766322306863, "grad_norm": 1.1640625, "learning_rate": 0.0007968221681388026, "loss": 0.9949, "step": 8311 }, { "epoch": 0.5782461998678214, "grad_norm": 1.1640625, "learning_rate": 0.0007966015403299175, "loss": 0.9154, "step": 8312 }, { "epoch": 0.5783157675049567, "grad_norm": 1.3046875, "learning_rate": 0.0007963809228485807, "loss": 0.7892, "step": 8313 }, { "epoch": 0.5783853351420919, "grad_norm": 1.21875, "learning_rate": 0.0007961603157059943, "loss": 0.6663, "step": 8314 }, { "epoch": 0.5784549027792271, "grad_norm": 0.984375, "learning_rate": 0.00079593971891336, "loss": 0.5892, "step": 8315 }, { "epoch": 0.5785244704163623, "grad_norm": 1.125, "learning_rate": 0.0007957191324818781, "loss": 0.8504, "step": 8316 }, { "epoch": 0.5785940380534975, "grad_norm": 1.0234375, "learning_rate": 0.0007954985564227489, "loss": 0.7952, "step": 8317 }, { "epoch": 0.5786636056906327, "grad_norm": 1.3203125, "learning_rate": 0.000795277990747172, "loss": 0.8688, "step": 8318 }, { "epoch": 0.578733173327768, "grad_norm": 1.4609375, "learning_rate": 0.0007950574354663474, "loss": 1.0818, "step": 8319 }, { "epoch": 0.5788027409649031, "grad_norm": 1.1171875, "learning_rate": 0.0007948368905914729, "loss": 0.8581, "step": 8320 }, { "epoch": 0.5788723086020383, "grad_norm": 1.1875, "learning_rate": 0.0007946163561337468, "loss": 0.6678, "step": 8321 }, { "epoch": 0.5789418762391736, "grad_norm": 1.171875, "learning_rate": 0.0007943958321043674, "loss": 0.8144, "step": 8322 }, { "epoch": 0.5790114438763088, "grad_norm": 1.234375, "learning_rate": 0.0007941753185145312, "loss": 0.7094, "step": 8323 }, { "epoch": 0.5790810115134439, "grad_norm": 0.94921875, "learning_rate": 0.0007939548153754347, "loss": 0.7981, "step": 8324 }, { "epoch": 0.5791505791505791, "grad_norm": 1.0078125, "learning_rate": 0.0007937343226982741, "loss": 0.8511, "step": 8325 }, { "epoch": 0.5792201467877144, "grad_norm": 1.34375, "learning_rate": 0.0007935138404942452, "loss": 0.8673, "step": 8326 }, { "epoch": 0.5792897144248496, "grad_norm": 1.28125, "learning_rate": 0.0007932933687745426, "loss": 0.8772, "step": 8327 }, { "epoch": 0.5793592820619847, "grad_norm": 1.0078125, "learning_rate": 0.0007930729075503606, "loss": 0.6199, "step": 8328 }, { "epoch": 0.57942884969912, "grad_norm": 1.171875, "learning_rate": 0.0007928524568328936, "loss": 0.8623, "step": 8329 }, { "epoch": 0.5794984173362552, "grad_norm": 1.203125, "learning_rate": 0.0007926320166333349, "loss": 1.0028, "step": 8330 }, { "epoch": 0.5795679849733903, "grad_norm": 1.1484375, "learning_rate": 0.0007924115869628771, "loss": 0.7935, "step": 8331 }, { "epoch": 0.5796375526105256, "grad_norm": 1.234375, "learning_rate": 0.0007921911678327123, "loss": 0.9247, "step": 8332 }, { "epoch": 0.5797071202476608, "grad_norm": 1.2109375, "learning_rate": 0.0007919707592540329, "loss": 0.8797, "step": 8333 }, { "epoch": 0.579776687884796, "grad_norm": 1.2109375, "learning_rate": 0.0007917503612380298, "loss": 0.8182, "step": 8334 }, { "epoch": 0.5798462555219313, "grad_norm": 1.046875, "learning_rate": 0.0007915299737958933, "loss": 0.8666, "step": 8335 }, { "epoch": 0.5799158231590664, "grad_norm": 1.6953125, "learning_rate": 0.0007913095969388143, "loss": 0.9228, "step": 8336 }, { "epoch": 0.5799853907962016, "grad_norm": 1.53125, "learning_rate": 0.0007910892306779822, "loss": 0.9564, "step": 8337 }, { "epoch": 0.5800549584333368, "grad_norm": 1.0703125, "learning_rate": 0.0007908688750245858, "loss": 0.8895, "step": 8338 }, { "epoch": 0.580124526070472, "grad_norm": 1.390625, "learning_rate": 0.0007906485299898137, "loss": 0.8285, "step": 8339 }, { "epoch": 0.5801940937076072, "grad_norm": 1.3046875, "learning_rate": 0.0007904281955848543, "loss": 0.8271, "step": 8340 }, { "epoch": 0.5802636613447424, "grad_norm": 1.1328125, "learning_rate": 0.0007902078718208947, "loss": 0.6401, "step": 8341 }, { "epoch": 0.5803332289818777, "grad_norm": 1.0078125, "learning_rate": 0.0007899875587091216, "loss": 0.8646, "step": 8342 }, { "epoch": 0.5804027966190128, "grad_norm": 1.3515625, "learning_rate": 0.0007897672562607221, "loss": 0.8125, "step": 8343 }, { "epoch": 0.580472364256148, "grad_norm": 1.0859375, "learning_rate": 0.0007895469644868819, "loss": 0.7378, "step": 8344 }, { "epoch": 0.5805419318932833, "grad_norm": 1.328125, "learning_rate": 0.0007893266833987857, "loss": 1.0015, "step": 8345 }, { "epoch": 0.5806114995304185, "grad_norm": 1.0625, "learning_rate": 0.0007891064130076187, "loss": 0.6859, "step": 8346 }, { "epoch": 0.5806810671675536, "grad_norm": 1.4375, "learning_rate": 0.0007888861533245652, "loss": 0.9137, "step": 8347 }, { "epoch": 0.5807506348046888, "grad_norm": 1.265625, "learning_rate": 0.0007886659043608086, "loss": 0.912, "step": 8348 }, { "epoch": 0.5808202024418241, "grad_norm": 1.09375, "learning_rate": 0.0007884456661275321, "loss": 0.8555, "step": 8349 }, { "epoch": 0.5808897700789593, "grad_norm": 1.234375, "learning_rate": 0.0007882254386359184, "loss": 1.044, "step": 8350 }, { "epoch": 0.5809593377160944, "grad_norm": 1.0, "learning_rate": 0.0007880052218971499, "loss": 0.5766, "step": 8351 }, { "epoch": 0.5810289053532297, "grad_norm": 1.0546875, "learning_rate": 0.0007877850159224073, "loss": 0.6393, "step": 8352 }, { "epoch": 0.5810984729903649, "grad_norm": 1.0859375, "learning_rate": 0.0007875648207228719, "loss": 0.8741, "step": 8353 }, { "epoch": 0.5811680406275, "grad_norm": 0.83984375, "learning_rate": 0.0007873446363097246, "loss": 0.7753, "step": 8354 }, { "epoch": 0.5812376082646353, "grad_norm": 1.03125, "learning_rate": 0.0007871244626941444, "loss": 0.9385, "step": 8355 }, { "epoch": 0.5813071759017705, "grad_norm": 1.0859375, "learning_rate": 0.0007869042998873108, "loss": 0.9722, "step": 8356 }, { "epoch": 0.5813767435389057, "grad_norm": 1.0859375, "learning_rate": 0.0007866841479004032, "loss": 1.0156, "step": 8357 }, { "epoch": 0.581446311176041, "grad_norm": 1.0078125, "learning_rate": 0.0007864640067445994, "loss": 1.0329, "step": 8358 }, { "epoch": 0.5815158788131761, "grad_norm": 1.4296875, "learning_rate": 0.0007862438764310769, "loss": 0.8019, "step": 8359 }, { "epoch": 0.5815854464503113, "grad_norm": 1.1796875, "learning_rate": 0.0007860237569710127, "loss": 0.8743, "step": 8360 }, { "epoch": 0.5816550140874465, "grad_norm": 0.859375, "learning_rate": 0.0007858036483755842, "loss": 0.5349, "step": 8361 }, { "epoch": 0.5817245817245817, "grad_norm": 0.98046875, "learning_rate": 0.0007855835506559663, "loss": 0.9931, "step": 8362 }, { "epoch": 0.5817941493617169, "grad_norm": 0.96484375, "learning_rate": 0.0007853634638233349, "loss": 0.8545, "step": 8363 }, { "epoch": 0.5818637169988521, "grad_norm": 1.2265625, "learning_rate": 0.0007851433878888652, "loss": 0.899, "step": 8364 }, { "epoch": 0.5819332846359874, "grad_norm": 0.92578125, "learning_rate": 0.0007849233228637315, "loss": 0.7024, "step": 8365 }, { "epoch": 0.5820028522731225, "grad_norm": 1.03125, "learning_rate": 0.0007847032687591072, "loss": 0.8722, "step": 8366 }, { "epoch": 0.5820724199102577, "grad_norm": 1.09375, "learning_rate": 0.0007844832255861654, "loss": 1.013, "step": 8367 }, { "epoch": 0.582141987547393, "grad_norm": 0.80859375, "learning_rate": 0.0007842631933560794, "loss": 0.7286, "step": 8368 }, { "epoch": 0.5822115551845282, "grad_norm": 1.140625, "learning_rate": 0.0007840431720800212, "loss": 0.8038, "step": 8369 }, { "epoch": 0.5822811228216633, "grad_norm": 1.34375, "learning_rate": 0.000783823161769162, "loss": 0.8539, "step": 8370 }, { "epoch": 0.5823506904587986, "grad_norm": 1.4140625, "learning_rate": 0.0007836031624346731, "loss": 0.7812, "step": 8371 }, { "epoch": 0.5824202580959338, "grad_norm": 1.2109375, "learning_rate": 0.000783383174087725, "loss": 0.901, "step": 8372 }, { "epoch": 0.582489825733069, "grad_norm": 1.265625, "learning_rate": 0.0007831631967394876, "loss": 0.7741, "step": 8373 }, { "epoch": 0.5825593933702041, "grad_norm": 1.09375, "learning_rate": 0.0007829432304011297, "loss": 0.886, "step": 8374 }, { "epoch": 0.5826289610073394, "grad_norm": 1.203125, "learning_rate": 0.0007827232750838207, "loss": 0.8501, "step": 8375 }, { "epoch": 0.5826985286444746, "grad_norm": 1.1484375, "learning_rate": 0.0007825033307987289, "loss": 0.9736, "step": 8376 }, { "epoch": 0.5827680962816097, "grad_norm": 1.1171875, "learning_rate": 0.0007822833975570213, "loss": 0.9093, "step": 8377 }, { "epoch": 0.582837663918745, "grad_norm": 1.0234375, "learning_rate": 0.0007820634753698656, "loss": 1.0385, "step": 8378 }, { "epoch": 0.5829072315558802, "grad_norm": 0.99609375, "learning_rate": 0.0007818435642484283, "loss": 0.8872, "step": 8379 }, { "epoch": 0.5829767991930154, "grad_norm": 0.9765625, "learning_rate": 0.000781623664203875, "loss": 0.7607, "step": 8380 }, { "epoch": 0.5830463668301507, "grad_norm": 0.9140625, "learning_rate": 0.0007814037752473711, "loss": 0.6788, "step": 8381 }, { "epoch": 0.5831159344672858, "grad_norm": 1.0078125, "learning_rate": 0.000781183897390082, "loss": 0.7802, "step": 8382 }, { "epoch": 0.583185502104421, "grad_norm": 1.1015625, "learning_rate": 0.0007809640306431718, "loss": 0.699, "step": 8383 }, { "epoch": 0.5832550697415563, "grad_norm": 0.93359375, "learning_rate": 0.000780744175017804, "loss": 0.688, "step": 8384 }, { "epoch": 0.5833246373786914, "grad_norm": 1.0859375, "learning_rate": 0.0007805243305251415, "loss": 0.8424, "step": 8385 }, { "epoch": 0.5833942050158266, "grad_norm": 1.453125, "learning_rate": 0.0007803044971763477, "loss": 0.9309, "step": 8386 }, { "epoch": 0.5834637726529618, "grad_norm": 1.0625, "learning_rate": 0.0007800846749825842, "loss": 0.7958, "step": 8387 }, { "epoch": 0.5835333402900971, "grad_norm": 1.03125, "learning_rate": 0.000779864863955012, "loss": 0.7098, "step": 8388 }, { "epoch": 0.5836029079272322, "grad_norm": 1.0625, "learning_rate": 0.0007796450641047928, "loss": 0.9335, "step": 8389 }, { "epoch": 0.5836724755643674, "grad_norm": 1.078125, "learning_rate": 0.0007794252754430866, "loss": 1.0127, "step": 8390 }, { "epoch": 0.5837420432015027, "grad_norm": 1.078125, "learning_rate": 0.0007792054979810531, "loss": 0.8231, "step": 8391 }, { "epoch": 0.5838116108386379, "grad_norm": 1.359375, "learning_rate": 0.0007789857317298512, "loss": 0.829, "step": 8392 }, { "epoch": 0.583881178475773, "grad_norm": 1.171875, "learning_rate": 0.0007787659767006403, "loss": 0.9883, "step": 8393 }, { "epoch": 0.5839507461129083, "grad_norm": 0.9921875, "learning_rate": 0.0007785462329045779, "loss": 0.8823, "step": 8394 }, { "epoch": 0.5840203137500435, "grad_norm": 0.859375, "learning_rate": 0.0007783265003528212, "loss": 0.6567, "step": 8395 }, { "epoch": 0.5840898813871787, "grad_norm": 1.015625, "learning_rate": 0.0007781067790565278, "loss": 0.6483, "step": 8396 }, { "epoch": 0.5841594490243139, "grad_norm": 0.9921875, "learning_rate": 0.000777887069026854, "loss": 0.8813, "step": 8397 }, { "epoch": 0.5842290166614491, "grad_norm": 1.0703125, "learning_rate": 0.000777667370274955, "loss": 0.9875, "step": 8398 }, { "epoch": 0.5842985842985843, "grad_norm": 0.9453125, "learning_rate": 0.0007774476828119861, "loss": 0.554, "step": 8399 }, { "epoch": 0.5843681519357194, "grad_norm": 1.1640625, "learning_rate": 0.0007772280066491024, "loss": 1.1132, "step": 8400 }, { "epoch": 0.5844377195728547, "grad_norm": 1.3359375, "learning_rate": 0.0007770083417974578, "loss": 0.9085, "step": 8401 }, { "epoch": 0.5845072872099899, "grad_norm": 0.93359375, "learning_rate": 0.0007767886882682053, "loss": 0.6959, "step": 8402 }, { "epoch": 0.5845768548471251, "grad_norm": 1.0703125, "learning_rate": 0.0007765690460724982, "loss": 0.5727, "step": 8403 }, { "epoch": 0.5846464224842604, "grad_norm": 1.2421875, "learning_rate": 0.0007763494152214892, "loss": 0.8546, "step": 8404 }, { "epoch": 0.5847159901213955, "grad_norm": 0.9140625, "learning_rate": 0.0007761297957263291, "loss": 0.8025, "step": 8405 }, { "epoch": 0.5847855577585307, "grad_norm": 1.640625, "learning_rate": 0.0007759101875981695, "loss": 1.1985, "step": 8406 }, { "epoch": 0.584855125395666, "grad_norm": 1.4140625, "learning_rate": 0.0007756905908481615, "loss": 0.8312, "step": 8407 }, { "epoch": 0.5849246930328011, "grad_norm": 1.2734375, "learning_rate": 0.0007754710054874548, "loss": 0.8647, "step": 8408 }, { "epoch": 0.5849942606699363, "grad_norm": 1.0546875, "learning_rate": 0.0007752514315271981, "loss": 0.8611, "step": 8409 }, { "epoch": 0.5850638283070716, "grad_norm": 1.1484375, "learning_rate": 0.0007750318689785413, "loss": 0.7379, "step": 8410 }, { "epoch": 0.5851333959442068, "grad_norm": 1.2421875, "learning_rate": 0.0007748123178526324, "loss": 0.6662, "step": 8411 }, { "epoch": 0.5852029635813419, "grad_norm": 1.390625, "learning_rate": 0.0007745927781606188, "loss": 1.269, "step": 8412 }, { "epoch": 0.5852725312184771, "grad_norm": 0.99609375, "learning_rate": 0.0007743732499136476, "loss": 0.7875, "step": 8413 }, { "epoch": 0.5853420988556124, "grad_norm": 1.015625, "learning_rate": 0.0007741537331228657, "loss": 0.6835, "step": 8414 }, { "epoch": 0.5854116664927476, "grad_norm": 1.25, "learning_rate": 0.000773934227799419, "loss": 0.877, "step": 8415 }, { "epoch": 0.5854812341298827, "grad_norm": 1.140625, "learning_rate": 0.0007737147339544526, "loss": 0.9002, "step": 8416 }, { "epoch": 0.585550801767018, "grad_norm": 0.9765625, "learning_rate": 0.0007734952515991114, "loss": 0.918, "step": 8417 }, { "epoch": 0.5856203694041532, "grad_norm": 1.203125, "learning_rate": 0.00077327578074454, "loss": 0.9253, "step": 8418 }, { "epoch": 0.5856899370412884, "grad_norm": 1.2421875, "learning_rate": 0.0007730563214018814, "loss": 1.0449, "step": 8419 }, { "epoch": 0.5857595046784236, "grad_norm": 1.5, "learning_rate": 0.0007728368735822787, "loss": 0.6915, "step": 8420 }, { "epoch": 0.5858290723155588, "grad_norm": 1.328125, "learning_rate": 0.0007726174372968748, "loss": 1.0598, "step": 8421 }, { "epoch": 0.585898639952694, "grad_norm": 0.91015625, "learning_rate": 0.0007723980125568116, "loss": 0.7353, "step": 8422 }, { "epoch": 0.5859682075898293, "grad_norm": 1.0390625, "learning_rate": 0.0007721785993732296, "loss": 0.8202, "step": 8423 }, { "epoch": 0.5860377752269644, "grad_norm": 1.109375, "learning_rate": 0.0007719591977572704, "loss": 0.7018, "step": 8424 }, { "epoch": 0.5861073428640996, "grad_norm": 1.0, "learning_rate": 0.0007717398077200738, "loss": 0.6593, "step": 8425 }, { "epoch": 0.5861769105012348, "grad_norm": 1.1171875, "learning_rate": 0.0007715204292727791, "loss": 0.962, "step": 8426 }, { "epoch": 0.58624647813837, "grad_norm": 1.125, "learning_rate": 0.0007713010624265251, "loss": 0.7747, "step": 8427 }, { "epoch": 0.5863160457755052, "grad_norm": 1.203125, "learning_rate": 0.0007710817071924507, "loss": 0.7734, "step": 8428 }, { "epoch": 0.5863856134126404, "grad_norm": 1.109375, "learning_rate": 0.0007708623635816936, "loss": 0.7227, "step": 8429 }, { "epoch": 0.5864551810497757, "grad_norm": 1.0390625, "learning_rate": 0.0007706430316053903, "loss": 0.841, "step": 8430 }, { "epoch": 0.5865247486869108, "grad_norm": 0.90625, "learning_rate": 0.0007704237112746779, "loss": 0.5351, "step": 8431 }, { "epoch": 0.586594316324046, "grad_norm": 1.171875, "learning_rate": 0.0007702044026006927, "loss": 0.6932, "step": 8432 }, { "epoch": 0.5866638839611813, "grad_norm": 1.1171875, "learning_rate": 0.0007699851055945693, "loss": 0.8046, "step": 8433 }, { "epoch": 0.5867334515983165, "grad_norm": 0.96875, "learning_rate": 0.0007697658202674427, "loss": 0.8083, "step": 8434 }, { "epoch": 0.5868030192354516, "grad_norm": 0.84765625, "learning_rate": 0.0007695465466304476, "loss": 0.7808, "step": 8435 }, { "epoch": 0.5868725868725869, "grad_norm": 1.2890625, "learning_rate": 0.0007693272846947173, "loss": 1.0373, "step": 8436 }, { "epoch": 0.5869421545097221, "grad_norm": 1.1640625, "learning_rate": 0.0007691080344713845, "loss": 0.923, "step": 8437 }, { "epoch": 0.5870117221468573, "grad_norm": 1.2578125, "learning_rate": 0.0007688887959715823, "loss": 0.8264, "step": 8438 }, { "epoch": 0.5870812897839924, "grad_norm": 1.0, "learning_rate": 0.0007686695692064419, "loss": 0.9924, "step": 8439 }, { "epoch": 0.5871508574211277, "grad_norm": 1.03125, "learning_rate": 0.0007684503541870952, "loss": 0.7213, "step": 8440 }, { "epoch": 0.5872204250582629, "grad_norm": 1.625, "learning_rate": 0.0007682311509246719, "loss": 0.9749, "step": 8441 }, { "epoch": 0.587289992695398, "grad_norm": 1.453125, "learning_rate": 0.0007680119594303028, "loss": 0.9688, "step": 8442 }, { "epoch": 0.5873595603325333, "grad_norm": 1.0859375, "learning_rate": 0.0007677927797151172, "loss": 0.7979, "step": 8443 }, { "epoch": 0.5874291279696685, "grad_norm": 1.3515625, "learning_rate": 0.0007675736117902435, "loss": 0.8144, "step": 8444 }, { "epoch": 0.5874986956068037, "grad_norm": 1.109375, "learning_rate": 0.0007673544556668104, "loss": 0.937, "step": 8445 }, { "epoch": 0.587568263243939, "grad_norm": 1.0546875, "learning_rate": 0.0007671353113559455, "loss": 0.8635, "step": 8446 }, { "epoch": 0.5876378308810741, "grad_norm": 1.203125, "learning_rate": 0.000766916178868776, "loss": 0.7995, "step": 8447 }, { "epoch": 0.5877073985182093, "grad_norm": 1.15625, "learning_rate": 0.0007666970582164277, "loss": 1.0202, "step": 8448 }, { "epoch": 0.5877769661553446, "grad_norm": 0.984375, "learning_rate": 0.0007664779494100269, "loss": 0.8607, "step": 8449 }, { "epoch": 0.5878465337924798, "grad_norm": 1.0, "learning_rate": 0.0007662588524606992, "loss": 0.8812, "step": 8450 }, { "epoch": 0.5879161014296149, "grad_norm": 1.5390625, "learning_rate": 0.000766039767379568, "loss": 0.9578, "step": 8451 }, { "epoch": 0.5879856690667501, "grad_norm": 1.234375, "learning_rate": 0.0007658206941777591, "loss": 0.663, "step": 8452 }, { "epoch": 0.5880552367038854, "grad_norm": 1.296875, "learning_rate": 0.0007656016328663944, "loss": 0.7829, "step": 8453 }, { "epoch": 0.5881248043410205, "grad_norm": 1.1328125, "learning_rate": 0.0007653825834565977, "loss": 0.6719, "step": 8454 }, { "epoch": 0.5881943719781557, "grad_norm": 0.7734375, "learning_rate": 0.0007651635459594905, "loss": 0.6089, "step": 8455 }, { "epoch": 0.588263939615291, "grad_norm": 1.1484375, "learning_rate": 0.000764944520386195, "loss": 0.8261, "step": 8456 }, { "epoch": 0.5883335072524262, "grad_norm": 1.203125, "learning_rate": 0.0007647255067478321, "loss": 0.7335, "step": 8457 }, { "epoch": 0.5884030748895613, "grad_norm": 1.21875, "learning_rate": 0.0007645065050555216, "loss": 0.6148, "step": 8458 }, { "epoch": 0.5884726425266966, "grad_norm": 1.3046875, "learning_rate": 0.0007642875153203843, "loss": 1.1661, "step": 8459 }, { "epoch": 0.5885422101638318, "grad_norm": 1.21875, "learning_rate": 0.0007640685375535388, "loss": 0.7649, "step": 8460 }, { "epoch": 0.588611777800967, "grad_norm": 1.0234375, "learning_rate": 0.0007638495717661038, "loss": 0.7276, "step": 8461 }, { "epoch": 0.5886813454381022, "grad_norm": 1.0546875, "learning_rate": 0.0007636306179691969, "loss": 0.8225, "step": 8462 }, { "epoch": 0.5887509130752374, "grad_norm": 0.94921875, "learning_rate": 0.0007634116761739362, "loss": 0.6969, "step": 8463 }, { "epoch": 0.5888204807123726, "grad_norm": 1.109375, "learning_rate": 0.0007631927463914382, "loss": 0.7609, "step": 8464 }, { "epoch": 0.5888900483495078, "grad_norm": 1.234375, "learning_rate": 0.0007629738286328187, "loss": 0.6378, "step": 8465 }, { "epoch": 0.588959615986643, "grad_norm": 1.0390625, "learning_rate": 0.0007627549229091932, "loss": 0.9368, "step": 8466 }, { "epoch": 0.5890291836237782, "grad_norm": 1.0234375, "learning_rate": 0.0007625360292316773, "loss": 0.7146, "step": 8467 }, { "epoch": 0.5890987512609134, "grad_norm": 1.1640625, "learning_rate": 0.000762317147611385, "loss": 0.8827, "step": 8468 }, { "epoch": 0.5891683188980487, "grad_norm": 0.9609375, "learning_rate": 0.0007620982780594297, "loss": 0.7071, "step": 8469 }, { "epoch": 0.5892378865351838, "grad_norm": 1.140625, "learning_rate": 0.0007618794205869247, "loss": 0.8249, "step": 8470 }, { "epoch": 0.589307454172319, "grad_norm": 1.015625, "learning_rate": 0.0007616605752049827, "loss": 0.5886, "step": 8471 }, { "epoch": 0.5893770218094543, "grad_norm": 0.8984375, "learning_rate": 0.0007614417419247155, "loss": 0.6516, "step": 8472 }, { "epoch": 0.5894465894465895, "grad_norm": 1.0546875, "learning_rate": 0.0007612229207572337, "loss": 0.7519, "step": 8473 }, { "epoch": 0.5895161570837246, "grad_norm": 1.0, "learning_rate": 0.0007610041117136488, "loss": 0.6568, "step": 8474 }, { "epoch": 0.5895857247208599, "grad_norm": 1.046875, "learning_rate": 0.0007607853148050706, "loss": 0.7215, "step": 8475 }, { "epoch": 0.5896552923579951, "grad_norm": 0.95703125, "learning_rate": 0.000760566530042608, "loss": 0.7886, "step": 8476 }, { "epoch": 0.5897248599951302, "grad_norm": 1.0390625, "learning_rate": 0.0007603477574373705, "loss": 0.7763, "step": 8477 }, { "epoch": 0.5897944276322654, "grad_norm": 0.85546875, "learning_rate": 0.0007601289970004658, "loss": 0.641, "step": 8478 }, { "epoch": 0.5898639952694007, "grad_norm": 1.421875, "learning_rate": 0.0007599102487430018, "loss": 0.6329, "step": 8479 }, { "epoch": 0.5899335629065359, "grad_norm": 1.3828125, "learning_rate": 0.0007596915126760848, "loss": 0.9425, "step": 8480 }, { "epoch": 0.590003130543671, "grad_norm": 1.1015625, "learning_rate": 0.0007594727888108219, "loss": 1.0997, "step": 8481 }, { "epoch": 0.5900726981808063, "grad_norm": 0.98828125, "learning_rate": 0.0007592540771583185, "loss": 0.8175, "step": 8482 }, { "epoch": 0.5901422658179415, "grad_norm": 1.171875, "learning_rate": 0.0007590353777296793, "loss": 0.8869, "step": 8483 }, { "epoch": 0.5902118334550767, "grad_norm": 0.92578125, "learning_rate": 0.0007588166905360091, "loss": 0.7391, "step": 8484 }, { "epoch": 0.5902814010922119, "grad_norm": 1.2578125, "learning_rate": 0.0007585980155884118, "loss": 0.9174, "step": 8485 }, { "epoch": 0.5903509687293471, "grad_norm": 1.171875, "learning_rate": 0.0007583793528979908, "loss": 0.8217, "step": 8486 }, { "epoch": 0.5904205363664823, "grad_norm": 1.328125, "learning_rate": 0.0007581607024758479, "loss": 0.8131, "step": 8487 }, { "epoch": 0.5904901040036176, "grad_norm": 1.2734375, "learning_rate": 0.0007579420643330858, "loss": 0.6882, "step": 8488 }, { "epoch": 0.5905596716407527, "grad_norm": 1.2890625, "learning_rate": 0.0007577234384808058, "loss": 0.7639, "step": 8489 }, { "epoch": 0.5906292392778879, "grad_norm": 1.015625, "learning_rate": 0.0007575048249301078, "loss": 0.5981, "step": 8490 }, { "epoch": 0.5906988069150231, "grad_norm": 1.0703125, "learning_rate": 0.0007572862236920932, "loss": 0.677, "step": 8491 }, { "epoch": 0.5907683745521584, "grad_norm": 0.8984375, "learning_rate": 0.0007570676347778605, "loss": 0.6852, "step": 8492 }, { "epoch": 0.5908379421892935, "grad_norm": 1.0234375, "learning_rate": 0.0007568490581985091, "loss": 0.8176, "step": 8493 }, { "epoch": 0.5909075098264287, "grad_norm": 1.0625, "learning_rate": 0.0007566304939651366, "loss": 1.033, "step": 8494 }, { "epoch": 0.590977077463564, "grad_norm": 1.0703125, "learning_rate": 0.0007564119420888411, "loss": 0.9066, "step": 8495 }, { "epoch": 0.5910466451006992, "grad_norm": 1.265625, "learning_rate": 0.0007561934025807196, "loss": 0.5497, "step": 8496 }, { "epoch": 0.5911162127378343, "grad_norm": 0.796875, "learning_rate": 0.0007559748754518677, "loss": 0.6161, "step": 8497 }, { "epoch": 0.5911857803749696, "grad_norm": 1.125, "learning_rate": 0.000755756360713382, "loss": 0.746, "step": 8498 }, { "epoch": 0.5912553480121048, "grad_norm": 1.03125, "learning_rate": 0.0007555378583763572, "loss": 1.0243, "step": 8499 }, { "epoch": 0.59132491564924, "grad_norm": 1.0625, "learning_rate": 0.0007553193684518881, "loss": 0.9379, "step": 8500 }, { "epoch": 0.5913944832863752, "grad_norm": 1.1953125, "learning_rate": 0.0007551008909510676, "loss": 0.7344, "step": 8501 }, { "epoch": 0.5914640509235104, "grad_norm": 0.9375, "learning_rate": 0.0007548824258849898, "loss": 0.6471, "step": 8502 }, { "epoch": 0.5915336185606456, "grad_norm": 1.3359375, "learning_rate": 0.0007546639732647468, "loss": 1.029, "step": 8503 }, { "epoch": 0.5916031861977807, "grad_norm": 0.9609375, "learning_rate": 0.0007544455331014305, "loss": 0.6613, "step": 8504 }, { "epoch": 0.591672753834916, "grad_norm": 1.2421875, "learning_rate": 0.0007542271054061328, "loss": 0.9418, "step": 8505 }, { "epoch": 0.5917423214720512, "grad_norm": 1.0625, "learning_rate": 0.0007540086901899436, "loss": 0.8629, "step": 8506 }, { "epoch": 0.5918118891091864, "grad_norm": 1.15625, "learning_rate": 0.0007537902874639535, "loss": 0.8829, "step": 8507 }, { "epoch": 0.5918814567463216, "grad_norm": 1.8515625, "learning_rate": 0.0007535718972392512, "loss": 0.9109, "step": 8508 }, { "epoch": 0.5919510243834568, "grad_norm": 1.453125, "learning_rate": 0.0007533535195269262, "loss": 0.847, "step": 8509 }, { "epoch": 0.592020592020592, "grad_norm": 0.890625, "learning_rate": 0.000753135154338066, "loss": 0.7576, "step": 8510 }, { "epoch": 0.5920901596577273, "grad_norm": 0.96875, "learning_rate": 0.0007529168016837584, "loss": 0.5772, "step": 8511 }, { "epoch": 0.5921597272948624, "grad_norm": 1.296875, "learning_rate": 0.0007526984615750904, "loss": 1.1715, "step": 8512 }, { "epoch": 0.5922292949319976, "grad_norm": 1.21875, "learning_rate": 0.0007524801340231481, "loss": 1.0781, "step": 8513 }, { "epoch": 0.5922988625691329, "grad_norm": 1.2421875, "learning_rate": 0.0007522618190390171, "loss": 0.8278, "step": 8514 }, { "epoch": 0.5923684302062681, "grad_norm": 1.1640625, "learning_rate": 0.0007520435166337817, "loss": 0.6436, "step": 8515 }, { "epoch": 0.5924379978434032, "grad_norm": 1.0234375, "learning_rate": 0.0007518252268185272, "loss": 0.7351, "step": 8516 }, { "epoch": 0.5925075654805384, "grad_norm": 1.2109375, "learning_rate": 0.0007516069496043365, "loss": 0.913, "step": 8517 }, { "epoch": 0.5925771331176737, "grad_norm": 1.1640625, "learning_rate": 0.0007513886850022928, "loss": 1.0798, "step": 8518 }, { "epoch": 0.5926467007548089, "grad_norm": 0.8828125, "learning_rate": 0.0007511704330234791, "loss": 0.6147, "step": 8519 }, { "epoch": 0.592716268391944, "grad_norm": 1.0078125, "learning_rate": 0.0007509521936789763, "loss": 0.6515, "step": 8520 }, { "epoch": 0.5927858360290793, "grad_norm": 1.328125, "learning_rate": 0.000750733966979866, "loss": 0.8566, "step": 8521 }, { "epoch": 0.5928554036662145, "grad_norm": 1.3671875, "learning_rate": 0.000750515752937228, "loss": 1.1151, "step": 8522 }, { "epoch": 0.5929249713033496, "grad_norm": 1.3203125, "learning_rate": 0.0007502975515621431, "loss": 0.803, "step": 8523 }, { "epoch": 0.5929945389404849, "grad_norm": 1.125, "learning_rate": 0.0007500793628656897, "loss": 0.9002, "step": 8524 }, { "epoch": 0.5930641065776201, "grad_norm": 1.0234375, "learning_rate": 0.0007498611868589464, "loss": 0.8477, "step": 8525 }, { "epoch": 0.5931336742147553, "grad_norm": 1.1796875, "learning_rate": 0.0007496430235529916, "loss": 0.8164, "step": 8526 }, { "epoch": 0.5932032418518906, "grad_norm": 0.8671875, "learning_rate": 0.000749424872958902, "loss": 0.6571, "step": 8527 }, { "epoch": 0.5932728094890257, "grad_norm": 1.1171875, "learning_rate": 0.0007492067350877546, "loss": 0.7534, "step": 8528 }, { "epoch": 0.5933423771261609, "grad_norm": 1.3046875, "learning_rate": 0.0007489886099506244, "loss": 0.8796, "step": 8529 }, { "epoch": 0.5934119447632961, "grad_norm": 0.85546875, "learning_rate": 0.000748770497558588, "loss": 0.8716, "step": 8530 }, { "epoch": 0.5934815124004313, "grad_norm": 1.265625, "learning_rate": 0.0007485523979227194, "loss": 0.6666, "step": 8531 }, { "epoch": 0.5935510800375665, "grad_norm": 0.9453125, "learning_rate": 0.0007483343110540923, "loss": 0.6986, "step": 8532 }, { "epoch": 0.5936206476747017, "grad_norm": 1.0234375, "learning_rate": 0.0007481162369637808, "loss": 0.6915, "step": 8533 }, { "epoch": 0.593690215311837, "grad_norm": 1.234375, "learning_rate": 0.0007478981756628571, "loss": 1.1911, "step": 8534 }, { "epoch": 0.5937597829489721, "grad_norm": 1.1171875, "learning_rate": 0.0007476801271623934, "loss": 0.7592, "step": 8535 }, { "epoch": 0.5938293505861073, "grad_norm": 1.0625, "learning_rate": 0.0007474620914734606, "loss": 0.7574, "step": 8536 }, { "epoch": 0.5938989182232426, "grad_norm": 1.0546875, "learning_rate": 0.0007472440686071305, "loss": 0.6431, "step": 8537 }, { "epoch": 0.5939684858603778, "grad_norm": 1.3046875, "learning_rate": 0.0007470260585744722, "loss": 0.8533, "step": 8538 }, { "epoch": 0.5940380534975129, "grad_norm": 1.0625, "learning_rate": 0.000746808061386556, "loss": 0.9223, "step": 8539 }, { "epoch": 0.5941076211346482, "grad_norm": 1.09375, "learning_rate": 0.0007465900770544498, "loss": 0.7645, "step": 8540 }, { "epoch": 0.5941771887717834, "grad_norm": 0.9296875, "learning_rate": 0.0007463721055892223, "loss": 0.7114, "step": 8541 }, { "epoch": 0.5942467564089186, "grad_norm": 1.171875, "learning_rate": 0.0007461541470019411, "loss": 0.9832, "step": 8542 }, { "epoch": 0.5943163240460537, "grad_norm": 1.1328125, "learning_rate": 0.0007459362013036725, "loss": 0.7882, "step": 8543 }, { "epoch": 0.594385891683189, "grad_norm": 0.9609375, "learning_rate": 0.0007457182685054834, "loss": 0.7626, "step": 8544 }, { "epoch": 0.5944554593203242, "grad_norm": 0.98828125, "learning_rate": 0.0007455003486184389, "loss": 0.5522, "step": 8545 }, { "epoch": 0.5945250269574593, "grad_norm": 1.4140625, "learning_rate": 0.0007452824416536039, "loss": 1.0521, "step": 8546 }, { "epoch": 0.5945945945945946, "grad_norm": 1.171875, "learning_rate": 0.0007450645476220424, "loss": 0.904, "step": 8547 }, { "epoch": 0.5946641622317298, "grad_norm": 1.109375, "learning_rate": 0.0007448466665348184, "loss": 0.9891, "step": 8548 }, { "epoch": 0.594733729868865, "grad_norm": 1.078125, "learning_rate": 0.0007446287984029944, "loss": 0.6378, "step": 8549 }, { "epoch": 0.5948032975060003, "grad_norm": 1.21875, "learning_rate": 0.0007444109432376329, "loss": 1.0443, "step": 8550 }, { "epoch": 0.5948728651431354, "grad_norm": 0.9140625, "learning_rate": 0.0007441931010497958, "loss": 0.768, "step": 8551 }, { "epoch": 0.5949424327802706, "grad_norm": 1.1015625, "learning_rate": 0.0007439752718505435, "loss": 0.7619, "step": 8552 }, { "epoch": 0.5950120004174059, "grad_norm": 1.21875, "learning_rate": 0.0007437574556509365, "loss": 0.9104, "step": 8553 }, { "epoch": 0.595081568054541, "grad_norm": 1.0546875, "learning_rate": 0.0007435396524620338, "loss": 0.7708, "step": 8554 }, { "epoch": 0.5951511356916762, "grad_norm": 1.0859375, "learning_rate": 0.0007433218622948956, "loss": 0.7666, "step": 8555 }, { "epoch": 0.5952207033288114, "grad_norm": 1.125, "learning_rate": 0.0007431040851605791, "loss": 0.8577, "step": 8556 }, { "epoch": 0.5952902709659467, "grad_norm": 1.1640625, "learning_rate": 0.0007428863210701422, "loss": 0.7772, "step": 8557 }, { "epoch": 0.5953598386030818, "grad_norm": 1.2421875, "learning_rate": 0.0007426685700346422, "loss": 0.6987, "step": 8558 }, { "epoch": 0.595429406240217, "grad_norm": 0.97265625, "learning_rate": 0.0007424508320651352, "loss": 0.7198, "step": 8559 }, { "epoch": 0.5954989738773523, "grad_norm": 0.95703125, "learning_rate": 0.0007422331071726769, "loss": 0.5827, "step": 8560 }, { "epoch": 0.5955685415144875, "grad_norm": 0.953125, "learning_rate": 0.0007420153953683215, "loss": 0.7368, "step": 8561 }, { "epoch": 0.5956381091516226, "grad_norm": 1.171875, "learning_rate": 0.0007417976966631249, "loss": 0.7991, "step": 8562 }, { "epoch": 0.5957076767887579, "grad_norm": 1.1953125, "learning_rate": 0.0007415800110681392, "loss": 0.87, "step": 8563 }, { "epoch": 0.5957772444258931, "grad_norm": 1.3671875, "learning_rate": 0.0007413623385944182, "loss": 0.7575, "step": 8564 }, { "epoch": 0.5958468120630283, "grad_norm": 1.0234375, "learning_rate": 0.0007411446792530141, "loss": 0.8132, "step": 8565 }, { "epoch": 0.5959163797001635, "grad_norm": 1.4140625, "learning_rate": 0.0007409270330549784, "loss": 0.8191, "step": 8566 }, { "epoch": 0.5959859473372987, "grad_norm": 1.296875, "learning_rate": 0.0007407094000113623, "loss": 0.7915, "step": 8567 }, { "epoch": 0.5960555149744339, "grad_norm": 1.3125, "learning_rate": 0.0007404917801332154, "loss": 0.8721, "step": 8568 }, { "epoch": 0.596125082611569, "grad_norm": 0.9453125, "learning_rate": 0.0007402741734315885, "loss": 0.7433, "step": 8569 }, { "epoch": 0.5961946502487043, "grad_norm": 0.96875, "learning_rate": 0.0007400565799175296, "loss": 0.517, "step": 8570 }, { "epoch": 0.5962642178858395, "grad_norm": 1.09375, "learning_rate": 0.0007398389996020873, "loss": 0.7589, "step": 8571 }, { "epoch": 0.5963337855229747, "grad_norm": 1.046875, "learning_rate": 0.0007396214324963098, "loss": 0.8574, "step": 8572 }, { "epoch": 0.59640335316011, "grad_norm": 0.96484375, "learning_rate": 0.0007394038786112431, "loss": 0.6718, "step": 8573 }, { "epoch": 0.5964729207972451, "grad_norm": 1.15625, "learning_rate": 0.000739186337957934, "loss": 0.796, "step": 8574 }, { "epoch": 0.5965424884343803, "grad_norm": 1.0234375, "learning_rate": 0.0007389688105474279, "loss": 0.9065, "step": 8575 }, { "epoch": 0.5966120560715156, "grad_norm": 1.1875, "learning_rate": 0.0007387512963907704, "loss": 0.7815, "step": 8576 }, { "epoch": 0.5966816237086507, "grad_norm": 0.84765625, "learning_rate": 0.000738533795499005, "loss": 0.7542, "step": 8577 }, { "epoch": 0.5967511913457859, "grad_norm": 1.2578125, "learning_rate": 0.0007383163078831754, "loss": 0.832, "step": 8578 }, { "epoch": 0.5968207589829212, "grad_norm": 1.4765625, "learning_rate": 0.000738098833554325, "loss": 0.9228, "step": 8579 }, { "epoch": 0.5968903266200564, "grad_norm": 1.1171875, "learning_rate": 0.0007378813725234958, "loss": 0.784, "step": 8580 }, { "epoch": 0.5969598942571915, "grad_norm": 0.9296875, "learning_rate": 0.0007376639248017291, "loss": 0.5069, "step": 8581 }, { "epoch": 0.5970294618943267, "grad_norm": 1.2109375, "learning_rate": 0.0007374464904000658, "loss": 0.6602, "step": 8582 }, { "epoch": 0.597099029531462, "grad_norm": 1.234375, "learning_rate": 0.0007372290693295469, "loss": 0.8191, "step": 8583 }, { "epoch": 0.5971685971685972, "grad_norm": 1.0703125, "learning_rate": 0.0007370116616012112, "loss": 0.8624, "step": 8584 }, { "epoch": 0.5972381648057323, "grad_norm": 1.078125, "learning_rate": 0.0007367942672260974, "loss": 0.9326, "step": 8585 }, { "epoch": 0.5973077324428676, "grad_norm": 1.1328125, "learning_rate": 0.0007365768862152447, "loss": 0.9081, "step": 8586 }, { "epoch": 0.5973773000800028, "grad_norm": 0.92578125, "learning_rate": 0.0007363595185796895, "loss": 0.837, "step": 8587 }, { "epoch": 0.597446867717138, "grad_norm": 1.3828125, "learning_rate": 0.0007361421643304692, "loss": 1.1247, "step": 8588 }, { "epoch": 0.5975164353542732, "grad_norm": 0.97265625, "learning_rate": 0.0007359248234786198, "loss": 0.7298, "step": 8589 }, { "epoch": 0.5975860029914084, "grad_norm": 1.3515625, "learning_rate": 0.0007357074960351771, "loss": 0.8085, "step": 8590 }, { "epoch": 0.5976555706285436, "grad_norm": 1.0546875, "learning_rate": 0.0007354901820111753, "loss": 0.9944, "step": 8591 }, { "epoch": 0.5977251382656789, "grad_norm": 1.25, "learning_rate": 0.0007352728814176489, "loss": 0.8763, "step": 8592 }, { "epoch": 0.597794705902814, "grad_norm": 1.1015625, "learning_rate": 0.0007350555942656311, "loss": 0.9391, "step": 8593 }, { "epoch": 0.5978642735399492, "grad_norm": 1.1953125, "learning_rate": 0.0007348383205661552, "loss": 0.8523, "step": 8594 }, { "epoch": 0.5979338411770844, "grad_norm": 1.0859375, "learning_rate": 0.0007346210603302528, "loss": 0.8748, "step": 8595 }, { "epoch": 0.5980034088142197, "grad_norm": 1.1171875, "learning_rate": 0.000734403813568955, "loss": 0.8543, "step": 8596 }, { "epoch": 0.5980729764513548, "grad_norm": 1.0390625, "learning_rate": 0.0007341865802932932, "loss": 0.812, "step": 8597 }, { "epoch": 0.59814254408849, "grad_norm": 1.15625, "learning_rate": 0.0007339693605142969, "loss": 0.7481, "step": 8598 }, { "epoch": 0.5982121117256253, "grad_norm": 0.9375, "learning_rate": 0.0007337521542429955, "loss": 0.6395, "step": 8599 }, { "epoch": 0.5982816793627604, "grad_norm": 1.078125, "learning_rate": 0.0007335349614904179, "loss": 0.7155, "step": 8600 }, { "epoch": 0.5983512469998956, "grad_norm": 1.09375, "learning_rate": 0.0007333177822675918, "loss": 0.8966, "step": 8601 }, { "epoch": 0.5984208146370309, "grad_norm": 0.84765625, "learning_rate": 0.0007331006165855448, "loss": 0.7555, "step": 8602 }, { "epoch": 0.5984903822741661, "grad_norm": 1.1328125, "learning_rate": 0.0007328834644553026, "loss": 0.8706, "step": 8603 }, { "epoch": 0.5985599499113012, "grad_norm": 0.96484375, "learning_rate": 0.0007326663258878923, "loss": 0.8422, "step": 8604 }, { "epoch": 0.5986295175484365, "grad_norm": 1.078125, "learning_rate": 0.0007324492008943382, "loss": 0.9008, "step": 8605 }, { "epoch": 0.5986990851855717, "grad_norm": 1.09375, "learning_rate": 0.000732232089485665, "loss": 0.7004, "step": 8606 }, { "epoch": 0.5987686528227069, "grad_norm": 0.92578125, "learning_rate": 0.0007320149916728969, "loss": 0.6937, "step": 8607 }, { "epoch": 0.598838220459842, "grad_norm": 1.1171875, "learning_rate": 0.0007317979074670569, "loss": 0.8367, "step": 8608 }, { "epoch": 0.5989077880969773, "grad_norm": 1.171875, "learning_rate": 0.0007315808368791671, "loss": 0.8127, "step": 8609 }, { "epoch": 0.5989773557341125, "grad_norm": 0.96875, "learning_rate": 0.0007313637799202493, "loss": 0.8375, "step": 8610 }, { "epoch": 0.5990469233712477, "grad_norm": 1.1953125, "learning_rate": 0.0007311467366013251, "loss": 0.7361, "step": 8611 }, { "epoch": 0.5991164910083829, "grad_norm": 1.03125, "learning_rate": 0.0007309297069334143, "loss": 0.7678, "step": 8612 }, { "epoch": 0.5991860586455181, "grad_norm": 1.3046875, "learning_rate": 0.0007307126909275365, "loss": 0.8631, "step": 8613 }, { "epoch": 0.5992556262826533, "grad_norm": 1.3359375, "learning_rate": 0.0007304956885947114, "loss": 1.013, "step": 8614 }, { "epoch": 0.5993251939197886, "grad_norm": 1.296875, "learning_rate": 0.0007302786999459569, "loss": 0.6568, "step": 8615 }, { "epoch": 0.5993947615569237, "grad_norm": 1.28125, "learning_rate": 0.0007300617249922903, "loss": 0.8126, "step": 8616 }, { "epoch": 0.5994643291940589, "grad_norm": 0.97265625, "learning_rate": 0.0007298447637447284, "loss": 0.5406, "step": 8617 }, { "epoch": 0.5995338968311942, "grad_norm": 1.171875, "learning_rate": 0.0007296278162142882, "loss": 0.6937, "step": 8618 }, { "epoch": 0.5996034644683294, "grad_norm": 1.1953125, "learning_rate": 0.0007294108824119846, "loss": 0.8836, "step": 8619 }, { "epoch": 0.5996730321054645, "grad_norm": 1.109375, "learning_rate": 0.0007291939623488324, "loss": 1.1188, "step": 8620 }, { "epoch": 0.5997425997425997, "grad_norm": 1.0390625, "learning_rate": 0.0007289770560358458, "loss": 0.8996, "step": 8621 }, { "epoch": 0.599812167379735, "grad_norm": 1.3046875, "learning_rate": 0.0007287601634840384, "loss": 0.8006, "step": 8622 }, { "epoch": 0.5998817350168701, "grad_norm": 0.984375, "learning_rate": 0.0007285432847044227, "loss": 0.7099, "step": 8623 }, { "epoch": 0.5999513026540053, "grad_norm": 1.203125, "learning_rate": 0.0007283264197080106, "loss": 0.9949, "step": 8624 }, { "epoch": 0.6000208702911406, "grad_norm": 1.0, "learning_rate": 0.0007281095685058137, "loss": 1.1107, "step": 8625 }, { "epoch": 0.6000904379282758, "grad_norm": 1.2890625, "learning_rate": 0.0007278927311088426, "loss": 0.8135, "step": 8626 }, { "epoch": 0.6001600055654109, "grad_norm": 1.25, "learning_rate": 0.0007276759075281069, "loss": 0.905, "step": 8627 }, { "epoch": 0.6002295732025462, "grad_norm": 1.3671875, "learning_rate": 0.0007274590977746161, "loss": 0.868, "step": 8628 }, { "epoch": 0.6002991408396814, "grad_norm": 1.109375, "learning_rate": 0.0007272423018593787, "loss": 0.7799, "step": 8629 }, { "epoch": 0.6003687084768166, "grad_norm": 0.984375, "learning_rate": 0.0007270255197934024, "loss": 0.8145, "step": 8630 }, { "epoch": 0.6004382761139518, "grad_norm": 1.1171875, "learning_rate": 0.0007268087515876939, "loss": 0.8468, "step": 8631 }, { "epoch": 0.600507843751087, "grad_norm": 0.9609375, "learning_rate": 0.0007265919972532603, "loss": 0.5068, "step": 8632 }, { "epoch": 0.6005774113882222, "grad_norm": 1.0390625, "learning_rate": 0.0007263752568011073, "loss": 0.8017, "step": 8633 }, { "epoch": 0.6006469790253574, "grad_norm": 1.125, "learning_rate": 0.0007261585302422392, "loss": 0.7295, "step": 8634 }, { "epoch": 0.6007165466624926, "grad_norm": 1.234375, "learning_rate": 0.0007259418175876607, "loss": 0.9748, "step": 8635 }, { "epoch": 0.6007861142996278, "grad_norm": 1.234375, "learning_rate": 0.0007257251188483756, "loss": 0.9806, "step": 8636 }, { "epoch": 0.600855681936763, "grad_norm": 0.9453125, "learning_rate": 0.0007255084340353862, "loss": 0.6462, "step": 8637 }, { "epoch": 0.6009252495738983, "grad_norm": 1.1015625, "learning_rate": 0.0007252917631596949, "loss": 0.8728, "step": 8638 }, { "epoch": 0.6009948172110334, "grad_norm": 1.453125, "learning_rate": 0.0007250751062323036, "loss": 1.0304, "step": 8639 }, { "epoch": 0.6010643848481686, "grad_norm": 1.171875, "learning_rate": 0.0007248584632642127, "loss": 0.8004, "step": 8640 }, { "epoch": 0.6011339524853039, "grad_norm": 0.94921875, "learning_rate": 0.000724641834266422, "loss": 0.6333, "step": 8641 }, { "epoch": 0.601203520122439, "grad_norm": 1.203125, "learning_rate": 0.000724425219249931, "loss": 0.8913, "step": 8642 }, { "epoch": 0.6012730877595742, "grad_norm": 0.9296875, "learning_rate": 0.0007242086182257386, "loss": 0.9301, "step": 8643 }, { "epoch": 0.6013426553967095, "grad_norm": 1.25, "learning_rate": 0.0007239920312048423, "loss": 0.7183, "step": 8644 }, { "epoch": 0.6014122230338447, "grad_norm": 0.95703125, "learning_rate": 0.0007237754581982394, "loss": 0.7616, "step": 8645 }, { "epoch": 0.6014817906709798, "grad_norm": 1.21875, "learning_rate": 0.0007235588992169265, "loss": 0.6698, "step": 8646 }, { "epoch": 0.601551358308115, "grad_norm": 1.484375, "learning_rate": 0.0007233423542718997, "loss": 0.8227, "step": 8647 }, { "epoch": 0.6016209259452503, "grad_norm": 1.078125, "learning_rate": 0.0007231258233741533, "loss": 0.8004, "step": 8648 }, { "epoch": 0.6016904935823855, "grad_norm": 1.2265625, "learning_rate": 0.0007229093065346818, "loss": 0.9062, "step": 8649 }, { "epoch": 0.6017600612195206, "grad_norm": 1.0703125, "learning_rate": 0.0007226928037644798, "loss": 0.9083, "step": 8650 }, { "epoch": 0.6018296288566559, "grad_norm": 1.125, "learning_rate": 0.000722476315074539, "loss": 0.6559, "step": 8651 }, { "epoch": 0.6018991964937911, "grad_norm": 1.1015625, "learning_rate": 0.000722259840475852, "loss": 0.7909, "step": 8652 }, { "epoch": 0.6019687641309263, "grad_norm": 1.234375, "learning_rate": 0.0007220433799794106, "loss": 0.6338, "step": 8653 }, { "epoch": 0.6020383317680615, "grad_norm": 0.91796875, "learning_rate": 0.0007218269335962055, "loss": 0.97, "step": 8654 }, { "epoch": 0.6021078994051967, "grad_norm": 1.0234375, "learning_rate": 0.0007216105013372266, "loss": 0.8516, "step": 8655 }, { "epoch": 0.6021774670423319, "grad_norm": 1.1953125, "learning_rate": 0.0007213940832134629, "loss": 0.913, "step": 8656 }, { "epoch": 0.6022470346794672, "grad_norm": 1.546875, "learning_rate": 0.0007211776792359038, "loss": 0.9543, "step": 8657 }, { "epoch": 0.6023166023166023, "grad_norm": 1.3046875, "learning_rate": 0.0007209612894155367, "loss": 0.8336, "step": 8658 }, { "epoch": 0.6023861699537375, "grad_norm": 1.21875, "learning_rate": 0.0007207449137633483, "loss": 0.9429, "step": 8659 }, { "epoch": 0.6024557375908727, "grad_norm": 1.0234375, "learning_rate": 0.0007205285522903262, "loss": 0.7073, "step": 8660 }, { "epoch": 0.602525305228008, "grad_norm": 0.796875, "learning_rate": 0.0007203122050074556, "loss": 0.468, "step": 8661 }, { "epoch": 0.6025948728651431, "grad_norm": 1.1796875, "learning_rate": 0.0007200958719257213, "loss": 0.9161, "step": 8662 }, { "epoch": 0.6026644405022783, "grad_norm": 1.140625, "learning_rate": 0.0007198795530561077, "loss": 0.9724, "step": 8663 }, { "epoch": 0.6027340081394136, "grad_norm": 1.6171875, "learning_rate": 0.0007196632484095986, "loss": 0.6824, "step": 8664 }, { "epoch": 0.6028035757765488, "grad_norm": 0.953125, "learning_rate": 0.0007194469579971769, "loss": 0.8068, "step": 8665 }, { "epoch": 0.6028731434136839, "grad_norm": 1.2265625, "learning_rate": 0.0007192306818298244, "loss": 0.8191, "step": 8666 }, { "epoch": 0.6029427110508192, "grad_norm": 1.1484375, "learning_rate": 0.0007190144199185227, "loss": 0.8178, "step": 8667 }, { "epoch": 0.6030122786879544, "grad_norm": 0.86328125, "learning_rate": 0.0007187981722742527, "loss": 0.6776, "step": 8668 }, { "epoch": 0.6030818463250895, "grad_norm": 1.046875, "learning_rate": 0.0007185819389079939, "loss": 0.8442, "step": 8669 }, { "epoch": 0.6031514139622248, "grad_norm": 1.203125, "learning_rate": 0.0007183657198307258, "loss": 0.6645, "step": 8670 }, { "epoch": 0.60322098159936, "grad_norm": 1.3359375, "learning_rate": 0.000718149515053427, "loss": 0.7999, "step": 8671 }, { "epoch": 0.6032905492364952, "grad_norm": 1.65625, "learning_rate": 0.0007179333245870753, "loss": 0.7067, "step": 8672 }, { "epoch": 0.6033601168736303, "grad_norm": 0.98046875, "learning_rate": 0.0007177171484426474, "loss": 0.6579, "step": 8673 }, { "epoch": 0.6034296845107656, "grad_norm": 1.015625, "learning_rate": 0.0007175009866311199, "loss": 0.7907, "step": 8674 }, { "epoch": 0.6034992521479008, "grad_norm": 1.0234375, "learning_rate": 0.0007172848391634687, "loss": 0.8546, "step": 8675 }, { "epoch": 0.603568819785036, "grad_norm": 1.3515625, "learning_rate": 0.0007170687060506682, "loss": 0.9074, "step": 8676 }, { "epoch": 0.6036383874221712, "grad_norm": 1.0234375, "learning_rate": 0.0007168525873036926, "loss": 0.8058, "step": 8677 }, { "epoch": 0.6037079550593064, "grad_norm": 1.171875, "learning_rate": 0.0007166364829335155, "loss": 0.8768, "step": 8678 }, { "epoch": 0.6037775226964416, "grad_norm": 0.83203125, "learning_rate": 0.00071642039295111, "loss": 0.8745, "step": 8679 }, { "epoch": 0.6038470903335769, "grad_norm": 1.328125, "learning_rate": 0.0007162043173674468, "loss": 0.7824, "step": 8680 }, { "epoch": 0.603916657970712, "grad_norm": 1.0234375, "learning_rate": 0.0007159882561934984, "loss": 0.9104, "step": 8681 }, { "epoch": 0.6039862256078472, "grad_norm": 1.3359375, "learning_rate": 0.0007157722094402351, "loss": 0.6902, "step": 8682 }, { "epoch": 0.6040557932449825, "grad_norm": 1.3671875, "learning_rate": 0.0007155561771186259, "loss": 0.839, "step": 8683 }, { "epoch": 0.6041253608821177, "grad_norm": 1.6875, "learning_rate": 0.0007153401592396402, "loss": 0.7838, "step": 8684 }, { "epoch": 0.6041949285192528, "grad_norm": 1.125, "learning_rate": 0.0007151241558142467, "loss": 0.8241, "step": 8685 }, { "epoch": 0.604264496156388, "grad_norm": 1.15625, "learning_rate": 0.000714908166853413, "loss": 0.745, "step": 8686 }, { "epoch": 0.6043340637935233, "grad_norm": 0.98046875, "learning_rate": 0.0007146921923681051, "loss": 0.7767, "step": 8687 }, { "epoch": 0.6044036314306585, "grad_norm": 1.109375, "learning_rate": 0.0007144762323692897, "loss": 0.7174, "step": 8688 }, { "epoch": 0.6044731990677936, "grad_norm": 0.83203125, "learning_rate": 0.0007142602868679324, "loss": 0.6602, "step": 8689 }, { "epoch": 0.6045427667049289, "grad_norm": 1.0390625, "learning_rate": 0.0007140443558749974, "loss": 0.6507, "step": 8690 }, { "epoch": 0.6046123343420641, "grad_norm": 1.21875, "learning_rate": 0.0007138284394014483, "loss": 0.79, "step": 8691 }, { "epoch": 0.6046819019791992, "grad_norm": 0.890625, "learning_rate": 0.000713612537458249, "loss": 0.7754, "step": 8692 }, { "epoch": 0.6047514696163345, "grad_norm": 1.1796875, "learning_rate": 0.0007133966500563615, "loss": 0.9146, "step": 8693 }, { "epoch": 0.6048210372534697, "grad_norm": 1.109375, "learning_rate": 0.0007131807772067473, "loss": 0.6198, "step": 8694 }, { "epoch": 0.6048906048906049, "grad_norm": 1.0625, "learning_rate": 0.0007129649189203677, "loss": 0.7704, "step": 8695 }, { "epoch": 0.6049601725277401, "grad_norm": 1.0703125, "learning_rate": 0.0007127490752081829, "loss": 0.7555, "step": 8696 }, { "epoch": 0.6050297401648753, "grad_norm": 1.203125, "learning_rate": 0.0007125332460811522, "loss": 0.9109, "step": 8697 }, { "epoch": 0.6050993078020105, "grad_norm": 1.375, "learning_rate": 0.0007123174315502341, "loss": 0.7943, "step": 8698 }, { "epoch": 0.6051688754391457, "grad_norm": 0.96875, "learning_rate": 0.0007121016316263869, "loss": 0.7806, "step": 8699 }, { "epoch": 0.6052384430762809, "grad_norm": 1.140625, "learning_rate": 0.000711885846320568, "loss": 0.7104, "step": 8700 }, { "epoch": 0.6053080107134161, "grad_norm": 1.1484375, "learning_rate": 0.0007116700756437333, "loss": 0.7789, "step": 8701 }, { "epoch": 0.6053775783505513, "grad_norm": 1.40625, "learning_rate": 0.0007114543196068389, "loss": 0.7917, "step": 8702 }, { "epoch": 0.6054471459876866, "grad_norm": 1.03125, "learning_rate": 0.0007112385782208397, "loss": 0.8465, "step": 8703 }, { "epoch": 0.6055167136248217, "grad_norm": 1.078125, "learning_rate": 0.0007110228514966903, "loss": 1.0022, "step": 8704 }, { "epoch": 0.6055862812619569, "grad_norm": 0.96875, "learning_rate": 0.0007108071394453436, "loss": 0.5816, "step": 8705 }, { "epoch": 0.6056558488990922, "grad_norm": 1.0625, "learning_rate": 0.0007105914420777529, "loss": 0.6953, "step": 8706 }, { "epoch": 0.6057254165362274, "grad_norm": 1.0234375, "learning_rate": 0.0007103757594048703, "loss": 0.6059, "step": 8707 }, { "epoch": 0.6057949841733625, "grad_norm": 1.765625, "learning_rate": 0.0007101600914376465, "loss": 0.8081, "step": 8708 }, { "epoch": 0.6058645518104978, "grad_norm": 1.125, "learning_rate": 0.0007099444381870322, "loss": 0.8674, "step": 8709 }, { "epoch": 0.605934119447633, "grad_norm": 0.9921875, "learning_rate": 0.0007097287996639776, "loss": 0.7671, "step": 8710 }, { "epoch": 0.6060036870847682, "grad_norm": 1.1796875, "learning_rate": 0.0007095131758794317, "loss": 0.6164, "step": 8711 }, { "epoch": 0.6060732547219033, "grad_norm": 1.1171875, "learning_rate": 0.0007092975668443421, "loss": 0.9779, "step": 8712 }, { "epoch": 0.6061428223590386, "grad_norm": 0.9609375, "learning_rate": 0.000709081972569657, "loss": 0.7917, "step": 8713 }, { "epoch": 0.6062123899961738, "grad_norm": 1.0546875, "learning_rate": 0.0007088663930663232, "loss": 0.967, "step": 8714 }, { "epoch": 0.6062819576333089, "grad_norm": 1.015625, "learning_rate": 0.0007086508283452864, "loss": 0.6636, "step": 8715 }, { "epoch": 0.6063515252704442, "grad_norm": 1.0859375, "learning_rate": 0.0007084352784174917, "loss": 0.8809, "step": 8716 }, { "epoch": 0.6064210929075794, "grad_norm": 1.1171875, "learning_rate": 0.0007082197432938844, "loss": 0.8034, "step": 8717 }, { "epoch": 0.6064906605447146, "grad_norm": 1.125, "learning_rate": 0.0007080042229854077, "loss": 0.5958, "step": 8718 }, { "epoch": 0.6065602281818498, "grad_norm": 0.97265625, "learning_rate": 0.0007077887175030047, "loss": 0.7012, "step": 8719 }, { "epoch": 0.606629795818985, "grad_norm": 1.1015625, "learning_rate": 0.000707573226857618, "loss": 0.7884, "step": 8720 }, { "epoch": 0.6066993634561202, "grad_norm": 1.140625, "learning_rate": 0.0007073577510601889, "loss": 0.6782, "step": 8721 }, { "epoch": 0.6067689310932555, "grad_norm": 2.328125, "learning_rate": 0.0007071422901216579, "loss": 0.8939, "step": 8722 }, { "epoch": 0.6068384987303906, "grad_norm": 1.03125, "learning_rate": 0.0007069268440529654, "loss": 0.5865, "step": 8723 }, { "epoch": 0.6069080663675258, "grad_norm": 1.0234375, "learning_rate": 0.0007067114128650506, "loss": 0.6065, "step": 8724 }, { "epoch": 0.606977634004661, "grad_norm": 1.015625, "learning_rate": 0.0007064959965688522, "loss": 0.6358, "step": 8725 }, { "epoch": 0.6070472016417963, "grad_norm": 1.4765625, "learning_rate": 0.0007062805951753073, "loss": 1.11, "step": 8726 }, { "epoch": 0.6071167692789314, "grad_norm": 1.125, "learning_rate": 0.0007060652086953534, "loss": 0.9128, "step": 8727 }, { "epoch": 0.6071863369160666, "grad_norm": 0.78125, "learning_rate": 0.0007058498371399269, "loss": 0.5899, "step": 8728 }, { "epoch": 0.6072559045532019, "grad_norm": 0.99609375, "learning_rate": 0.000705634480519963, "loss": 0.6378, "step": 8729 }, { "epoch": 0.6073254721903371, "grad_norm": 1.140625, "learning_rate": 0.0007054191388463962, "loss": 0.958, "step": 8730 }, { "epoch": 0.6073950398274722, "grad_norm": 1.171875, "learning_rate": 0.0007052038121301609, "loss": 0.9299, "step": 8731 }, { "epoch": 0.6074646074646075, "grad_norm": 1.5703125, "learning_rate": 0.0007049885003821905, "loss": 1.0552, "step": 8732 }, { "epoch": 0.6075341751017427, "grad_norm": 0.9140625, "learning_rate": 0.0007047732036134165, "loss": 0.6015, "step": 8733 }, { "epoch": 0.6076037427388779, "grad_norm": 0.9140625, "learning_rate": 0.0007045579218347712, "loss": 0.7952, "step": 8734 }, { "epoch": 0.6076733103760131, "grad_norm": 1.1953125, "learning_rate": 0.0007043426550571858, "loss": 1.0644, "step": 8735 }, { "epoch": 0.6077428780131483, "grad_norm": 1.28125, "learning_rate": 0.0007041274032915903, "loss": 0.7886, "step": 8736 }, { "epoch": 0.6078124456502835, "grad_norm": 1.1796875, "learning_rate": 0.0007039121665489134, "loss": 1.0829, "step": 8737 }, { "epoch": 0.6078820132874186, "grad_norm": 1.3984375, "learning_rate": 0.0007036969448400847, "loss": 0.9535, "step": 8738 }, { "epoch": 0.6079515809245539, "grad_norm": 1.2421875, "learning_rate": 0.0007034817381760317, "loss": 0.8826, "step": 8739 }, { "epoch": 0.6080211485616891, "grad_norm": 1.2265625, "learning_rate": 0.0007032665465676812, "loss": 0.6847, "step": 8740 }, { "epoch": 0.6080907161988243, "grad_norm": 1.1015625, "learning_rate": 0.00070305137002596, "loss": 0.9334, "step": 8741 }, { "epoch": 0.6081602838359595, "grad_norm": 0.8359375, "learning_rate": 0.0007028362085617935, "loss": 0.7175, "step": 8742 }, { "epoch": 0.6082298514730947, "grad_norm": 1.0625, "learning_rate": 0.0007026210621861066, "loss": 0.8694, "step": 8743 }, { "epoch": 0.6082994191102299, "grad_norm": 1.2421875, "learning_rate": 0.0007024059309098229, "loss": 0.9104, "step": 8744 }, { "epoch": 0.6083689867473652, "grad_norm": 1.046875, "learning_rate": 0.0007021908147438662, "loss": 0.8699, "step": 8745 }, { "epoch": 0.6084385543845003, "grad_norm": 1.5234375, "learning_rate": 0.0007019757136991591, "loss": 0.6986, "step": 8746 }, { "epoch": 0.6085081220216355, "grad_norm": 1.0390625, "learning_rate": 0.0007017606277866225, "loss": 0.7712, "step": 8747 }, { "epoch": 0.6085776896587708, "grad_norm": 1.1953125, "learning_rate": 0.0007015455570171787, "loss": 1.0519, "step": 8748 }, { "epoch": 0.608647257295906, "grad_norm": 1.2734375, "learning_rate": 0.0007013305014017468, "loss": 0.8171, "step": 8749 }, { "epoch": 0.6087168249330411, "grad_norm": 1.0546875, "learning_rate": 0.000701115460951247, "loss": 0.8163, "step": 8750 }, { "epoch": 0.6087863925701763, "grad_norm": 1.5859375, "learning_rate": 0.0007009004356765971, "loss": 0.8739, "step": 8751 }, { "epoch": 0.6088559602073116, "grad_norm": 1.03125, "learning_rate": 0.0007006854255887157, "loss": 0.7692, "step": 8752 }, { "epoch": 0.6089255278444468, "grad_norm": 1.2109375, "learning_rate": 0.0007004704306985201, "loss": 0.9266, "step": 8753 }, { "epoch": 0.6089950954815819, "grad_norm": 0.96484375, "learning_rate": 0.0007002554510169254, "loss": 0.7674, "step": 8754 }, { "epoch": 0.6090646631187172, "grad_norm": 1.109375, "learning_rate": 0.0007000404865548489, "loss": 0.8297, "step": 8755 }, { "epoch": 0.6091342307558524, "grad_norm": 0.96875, "learning_rate": 0.0006998255373232043, "loss": 0.7031, "step": 8756 }, { "epoch": 0.6092037983929875, "grad_norm": 1.375, "learning_rate": 0.0006996106033329061, "loss": 0.7895, "step": 8757 }, { "epoch": 0.6092733660301228, "grad_norm": 1.078125, "learning_rate": 0.000699395684594867, "loss": 0.7787, "step": 8758 }, { "epoch": 0.609342933667258, "grad_norm": 1.078125, "learning_rate": 0.0006991807811200002, "loss": 0.8288, "step": 8759 }, { "epoch": 0.6094125013043932, "grad_norm": 1.328125, "learning_rate": 0.0006989658929192171, "loss": 0.9346, "step": 8760 }, { "epoch": 0.6094820689415285, "grad_norm": 1.03125, "learning_rate": 0.0006987510200034281, "loss": 0.8066, "step": 8761 }, { "epoch": 0.6095516365786636, "grad_norm": 0.9453125, "learning_rate": 0.0006985361623835447, "loss": 0.6906, "step": 8762 }, { "epoch": 0.6096212042157988, "grad_norm": 0.8984375, "learning_rate": 0.000698321320070475, "loss": 0.7455, "step": 8763 }, { "epoch": 0.609690771852934, "grad_norm": 1.296875, "learning_rate": 0.0006981064930751285, "loss": 0.8661, "step": 8764 }, { "epoch": 0.6097603394900692, "grad_norm": 1.0703125, "learning_rate": 0.0006978916814084121, "loss": 0.7087, "step": 8765 }, { "epoch": 0.6098299071272044, "grad_norm": 0.83203125, "learning_rate": 0.0006976768850812336, "loss": 0.6889, "step": 8766 }, { "epoch": 0.6098994747643396, "grad_norm": 1.09375, "learning_rate": 0.000697462104104499, "loss": 0.8307, "step": 8767 }, { "epoch": 0.6099690424014749, "grad_norm": 1.515625, "learning_rate": 0.0006972473384891138, "loss": 0.9023, "step": 8768 }, { "epoch": 0.61003861003861, "grad_norm": 0.92578125, "learning_rate": 0.0006970325882459832, "loss": 0.7498, "step": 8769 }, { "epoch": 0.6101081776757452, "grad_norm": 0.984375, "learning_rate": 0.0006968178533860103, "loss": 0.4831, "step": 8770 }, { "epoch": 0.6101777453128805, "grad_norm": 1.0859375, "learning_rate": 0.0006966031339200989, "loss": 0.664, "step": 8771 }, { "epoch": 0.6102473129500157, "grad_norm": 0.9609375, "learning_rate": 0.0006963884298591507, "loss": 0.8983, "step": 8772 }, { "epoch": 0.6103168805871508, "grad_norm": 0.9140625, "learning_rate": 0.0006961737412140681, "loss": 0.8764, "step": 8773 }, { "epoch": 0.6103864482242861, "grad_norm": 1.3671875, "learning_rate": 0.0006959590679957513, "loss": 0.8993, "step": 8774 }, { "epoch": 0.6104560158614213, "grad_norm": 0.7890625, "learning_rate": 0.0006957444102151009, "loss": 0.7429, "step": 8775 }, { "epoch": 0.6105255834985565, "grad_norm": 0.93359375, "learning_rate": 0.0006955297678830153, "loss": 0.8008, "step": 8776 }, { "epoch": 0.6105951511356916, "grad_norm": 1.0703125, "learning_rate": 0.0006953151410103937, "loss": 0.8046, "step": 8777 }, { "epoch": 0.6106647187728269, "grad_norm": 0.97265625, "learning_rate": 0.0006951005296081336, "loss": 0.7889, "step": 8778 }, { "epoch": 0.6107342864099621, "grad_norm": 1.1796875, "learning_rate": 0.0006948859336871314, "loss": 0.8505, "step": 8779 }, { "epoch": 0.6108038540470972, "grad_norm": 1.0, "learning_rate": 0.0006946713532582841, "loss": 0.6689, "step": 8780 }, { "epoch": 0.6108734216842325, "grad_norm": 1.125, "learning_rate": 0.0006944567883324863, "loss": 0.8853, "step": 8781 }, { "epoch": 0.6109429893213677, "grad_norm": 1.15625, "learning_rate": 0.0006942422389206329, "loss": 0.8171, "step": 8782 }, { "epoch": 0.6110125569585029, "grad_norm": 1.09375, "learning_rate": 0.0006940277050336172, "loss": 1.0236, "step": 8783 }, { "epoch": 0.6110821245956382, "grad_norm": 1.140625, "learning_rate": 0.0006938131866823324, "loss": 0.8976, "step": 8784 }, { "epoch": 0.6111516922327733, "grad_norm": 1.2734375, "learning_rate": 0.0006935986838776711, "loss": 0.8862, "step": 8785 }, { "epoch": 0.6112212598699085, "grad_norm": 1.859375, "learning_rate": 0.0006933841966305234, "loss": 1.2325, "step": 8786 }, { "epoch": 0.6112908275070438, "grad_norm": 0.95703125, "learning_rate": 0.0006931697249517816, "loss": 0.8163, "step": 8787 }, { "epoch": 0.611360395144179, "grad_norm": 0.98828125, "learning_rate": 0.0006929552688523344, "loss": 0.9309, "step": 8788 }, { "epoch": 0.6114299627813141, "grad_norm": 1.0390625, "learning_rate": 0.0006927408283430712, "loss": 0.8401, "step": 8789 }, { "epoch": 0.6114995304184493, "grad_norm": 1.15625, "learning_rate": 0.0006925264034348795, "loss": 0.7555, "step": 8790 }, { "epoch": 0.6115690980555846, "grad_norm": 0.953125, "learning_rate": 0.0006923119941386475, "loss": 0.7396, "step": 8791 }, { "epoch": 0.6116386656927197, "grad_norm": 1.1484375, "learning_rate": 0.000692097600465262, "loss": 0.6304, "step": 8792 }, { "epoch": 0.6117082333298549, "grad_norm": 1.1171875, "learning_rate": 0.0006918832224256076, "loss": 0.962, "step": 8793 }, { "epoch": 0.6117778009669902, "grad_norm": 1.1015625, "learning_rate": 0.0006916688600305707, "loss": 0.9029, "step": 8794 }, { "epoch": 0.6118473686041254, "grad_norm": 1.015625, "learning_rate": 0.0006914545132910348, "loss": 0.8357, "step": 8795 }, { "epoch": 0.6119169362412605, "grad_norm": 1.359375, "learning_rate": 0.0006912401822178839, "loss": 0.945, "step": 8796 }, { "epoch": 0.6119865038783958, "grad_norm": 1.015625, "learning_rate": 0.0006910258668219998, "loss": 0.7049, "step": 8797 }, { "epoch": 0.612056071515531, "grad_norm": 0.91796875, "learning_rate": 0.000690811567114265, "loss": 0.715, "step": 8798 }, { "epoch": 0.6121256391526662, "grad_norm": 1.046875, "learning_rate": 0.0006905972831055604, "loss": 0.7373, "step": 8799 }, { "epoch": 0.6121952067898014, "grad_norm": 1.015625, "learning_rate": 0.000690383014806766, "loss": 0.9297, "step": 8800 }, { "epoch": 0.6122647744269366, "grad_norm": 1.078125, "learning_rate": 0.000690168762228762, "loss": 0.8317, "step": 8801 }, { "epoch": 0.6123343420640718, "grad_norm": 1.1640625, "learning_rate": 0.0006899545253824265, "loss": 0.6979, "step": 8802 }, { "epoch": 0.612403909701207, "grad_norm": 1.15625, "learning_rate": 0.0006897403042786374, "loss": 0.9181, "step": 8803 }, { "epoch": 0.6124734773383422, "grad_norm": 1.421875, "learning_rate": 0.0006895260989282717, "loss": 0.7358, "step": 8804 }, { "epoch": 0.6125430449754774, "grad_norm": 1.5234375, "learning_rate": 0.0006893119093422058, "loss": 0.7538, "step": 8805 }, { "epoch": 0.6126126126126126, "grad_norm": 1.375, "learning_rate": 0.0006890977355313152, "loss": 0.8676, "step": 8806 }, { "epoch": 0.6126821802497479, "grad_norm": 1.15625, "learning_rate": 0.0006888835775064743, "loss": 0.5304, "step": 8807 }, { "epoch": 0.612751747886883, "grad_norm": 0.92578125, "learning_rate": 0.0006886694352785576, "loss": 0.5638, "step": 8808 }, { "epoch": 0.6128213155240182, "grad_norm": 0.9453125, "learning_rate": 0.0006884553088584376, "loss": 0.6907, "step": 8809 }, { "epoch": 0.6128908831611535, "grad_norm": 1.421875, "learning_rate": 0.0006882411982569869, "loss": 1.1476, "step": 8810 }, { "epoch": 0.6129604507982886, "grad_norm": 0.92578125, "learning_rate": 0.0006880271034850763, "loss": 0.7789, "step": 8811 }, { "epoch": 0.6130300184354238, "grad_norm": 1.203125, "learning_rate": 0.0006878130245535772, "loss": 0.9118, "step": 8812 }, { "epoch": 0.6130995860725591, "grad_norm": 1.2109375, "learning_rate": 0.0006875989614733592, "loss": 0.9038, "step": 8813 }, { "epoch": 0.6131691537096943, "grad_norm": 1.1640625, "learning_rate": 0.000687384914255291, "loss": 0.7975, "step": 8814 }, { "epoch": 0.6132387213468294, "grad_norm": 1.28125, "learning_rate": 0.0006871708829102417, "loss": 0.9838, "step": 8815 }, { "epoch": 0.6133082889839646, "grad_norm": 1.0234375, "learning_rate": 0.000686956867449078, "loss": 0.7255, "step": 8816 }, { "epoch": 0.6133778566210999, "grad_norm": 1.0, "learning_rate": 0.0006867428678826668, "loss": 0.8206, "step": 8817 }, { "epoch": 0.6134474242582351, "grad_norm": 1.03125, "learning_rate": 0.0006865288842218733, "loss": 0.6303, "step": 8818 }, { "epoch": 0.6135169918953702, "grad_norm": 1.0546875, "learning_rate": 0.0006863149164775637, "loss": 0.5099, "step": 8819 }, { "epoch": 0.6135865595325055, "grad_norm": 0.859375, "learning_rate": 0.0006861009646606012, "loss": 0.706, "step": 8820 }, { "epoch": 0.6136561271696407, "grad_norm": 1.0390625, "learning_rate": 0.0006858870287818494, "loss": 0.7001, "step": 8821 }, { "epoch": 0.6137256948067759, "grad_norm": 1.34375, "learning_rate": 0.0006856731088521715, "loss": 0.6741, "step": 8822 }, { "epoch": 0.6137952624439111, "grad_norm": 0.93359375, "learning_rate": 0.0006854592048824286, "loss": 0.8331, "step": 8823 }, { "epoch": 0.6138648300810463, "grad_norm": 1.1875, "learning_rate": 0.000685245316883482, "loss": 0.7475, "step": 8824 }, { "epoch": 0.6139343977181815, "grad_norm": 1.1484375, "learning_rate": 0.0006850314448661912, "loss": 0.6212, "step": 8825 }, { "epoch": 0.6140039653553168, "grad_norm": 1.3671875, "learning_rate": 0.0006848175888414166, "loss": 0.9171, "step": 8826 }, { "epoch": 0.6140735329924519, "grad_norm": 1.28125, "learning_rate": 0.0006846037488200161, "loss": 0.9713, "step": 8827 }, { "epoch": 0.6141431006295871, "grad_norm": 1.078125, "learning_rate": 0.0006843899248128473, "loss": 0.7993, "step": 8828 }, { "epoch": 0.6142126682667223, "grad_norm": 1.2421875, "learning_rate": 0.0006841761168307676, "loss": 0.8893, "step": 8829 }, { "epoch": 0.6142822359038576, "grad_norm": 1.234375, "learning_rate": 0.0006839623248846327, "loss": 0.8157, "step": 8830 }, { "epoch": 0.6143518035409927, "grad_norm": 1.078125, "learning_rate": 0.0006837485489852983, "loss": 0.8176, "step": 8831 }, { "epoch": 0.6144213711781279, "grad_norm": 1.1796875, "learning_rate": 0.0006835347891436178, "loss": 1.0037, "step": 8832 }, { "epoch": 0.6144909388152632, "grad_norm": 1.15625, "learning_rate": 0.0006833210453704463, "loss": 0.9301, "step": 8833 }, { "epoch": 0.6145605064523983, "grad_norm": 0.953125, "learning_rate": 0.0006831073176766356, "loss": 0.9012, "step": 8834 }, { "epoch": 0.6146300740895335, "grad_norm": 1.1484375, "learning_rate": 0.000682893606073038, "loss": 0.8061, "step": 8835 }, { "epoch": 0.6146996417266688, "grad_norm": 1.0859375, "learning_rate": 0.000682679910570505, "loss": 0.9522, "step": 8836 }, { "epoch": 0.614769209363804, "grad_norm": 1.2265625, "learning_rate": 0.0006824662311798867, "loss": 0.7811, "step": 8837 }, { "epoch": 0.6148387770009391, "grad_norm": 1.296875, "learning_rate": 0.0006822525679120326, "loss": 1.0802, "step": 8838 }, { "epoch": 0.6149083446380744, "grad_norm": 1.2421875, "learning_rate": 0.0006820389207777914, "loss": 1.1608, "step": 8839 }, { "epoch": 0.6149779122752096, "grad_norm": 0.921875, "learning_rate": 0.0006818252897880115, "loss": 0.5822, "step": 8840 }, { "epoch": 0.6150474799123448, "grad_norm": 0.94140625, "learning_rate": 0.0006816116749535395, "loss": 0.684, "step": 8841 }, { "epoch": 0.6151170475494799, "grad_norm": 1.1328125, "learning_rate": 0.0006813980762852217, "loss": 0.6828, "step": 8842 }, { "epoch": 0.6151866151866152, "grad_norm": 1.1328125, "learning_rate": 0.000681184493793904, "loss": 0.9205, "step": 8843 }, { "epoch": 0.6152561828237504, "grad_norm": 1.0078125, "learning_rate": 0.0006809709274904305, "loss": 0.7127, "step": 8844 }, { "epoch": 0.6153257504608856, "grad_norm": 1.578125, "learning_rate": 0.0006807573773856455, "loss": 0.9358, "step": 8845 }, { "epoch": 0.6153953180980208, "grad_norm": 0.78515625, "learning_rate": 0.0006805438434903915, "loss": 0.8105, "step": 8846 }, { "epoch": 0.615464885735156, "grad_norm": 1.375, "learning_rate": 0.0006803303258155111, "loss": 0.9114, "step": 8847 }, { "epoch": 0.6155344533722912, "grad_norm": 1.390625, "learning_rate": 0.0006801168243718457, "loss": 0.9992, "step": 8848 }, { "epoch": 0.6156040210094265, "grad_norm": 1.2109375, "learning_rate": 0.0006799033391702351, "loss": 0.8461, "step": 8849 }, { "epoch": 0.6156735886465616, "grad_norm": 1.296875, "learning_rate": 0.0006796898702215199, "loss": 0.6119, "step": 8850 }, { "epoch": 0.6157431562836968, "grad_norm": 1.1875, "learning_rate": 0.0006794764175365387, "loss": 0.8069, "step": 8851 }, { "epoch": 0.6158127239208321, "grad_norm": 0.92578125, "learning_rate": 0.0006792629811261293, "loss": 0.7707, "step": 8852 }, { "epoch": 0.6158822915579673, "grad_norm": 1.3125, "learning_rate": 0.0006790495610011289, "loss": 0.5123, "step": 8853 }, { "epoch": 0.6159518591951024, "grad_norm": 1.0625, "learning_rate": 0.0006788361571723744, "loss": 0.7792, "step": 8854 }, { "epoch": 0.6160214268322376, "grad_norm": 1.03125, "learning_rate": 0.0006786227696507011, "loss": 1.1437, "step": 8855 }, { "epoch": 0.6160909944693729, "grad_norm": 0.8984375, "learning_rate": 0.0006784093984469437, "loss": 0.7059, "step": 8856 }, { "epoch": 0.616160562106508, "grad_norm": 0.92578125, "learning_rate": 0.0006781960435719355, "loss": 0.6805, "step": 8857 }, { "epoch": 0.6162301297436432, "grad_norm": 0.7578125, "learning_rate": 0.0006779827050365109, "loss": 0.5224, "step": 8858 }, { "epoch": 0.6162996973807785, "grad_norm": 1.0390625, "learning_rate": 0.0006777693828515012, "loss": 0.715, "step": 8859 }, { "epoch": 0.6163692650179137, "grad_norm": 1.6171875, "learning_rate": 0.0006775560770277378, "loss": 0.6267, "step": 8860 }, { "epoch": 0.6164388326550488, "grad_norm": 1.125, "learning_rate": 0.0006773427875760521, "loss": 0.6734, "step": 8861 }, { "epoch": 0.6165084002921841, "grad_norm": 1.0625, "learning_rate": 0.0006771295145072731, "loss": 0.7742, "step": 8862 }, { "epoch": 0.6165779679293193, "grad_norm": 1.3203125, "learning_rate": 0.0006769162578322301, "loss": 0.9444, "step": 8863 }, { "epoch": 0.6166475355664545, "grad_norm": 1.203125, "learning_rate": 0.0006767030175617505, "loss": 0.7557, "step": 8864 }, { "epoch": 0.6167171032035897, "grad_norm": 1.015625, "learning_rate": 0.0006764897937066627, "loss": 0.7259, "step": 8865 }, { "epoch": 0.6167866708407249, "grad_norm": 1.2421875, "learning_rate": 0.0006762765862777924, "loss": 0.9274, "step": 8866 }, { "epoch": 0.6168562384778601, "grad_norm": 1.1640625, "learning_rate": 0.0006760633952859652, "loss": 0.86, "step": 8867 }, { "epoch": 0.6169258061149953, "grad_norm": 1.375, "learning_rate": 0.0006758502207420065, "loss": 0.939, "step": 8868 }, { "epoch": 0.6169953737521305, "grad_norm": 1.4296875, "learning_rate": 0.0006756370626567394, "loss": 0.8592, "step": 8869 }, { "epoch": 0.6170649413892657, "grad_norm": 1.171875, "learning_rate": 0.0006754239210409874, "loss": 0.803, "step": 8870 }, { "epoch": 0.6171345090264009, "grad_norm": 1.1953125, "learning_rate": 0.0006752107959055724, "loss": 0.7991, "step": 8871 }, { "epoch": 0.6172040766635362, "grad_norm": 1.03125, "learning_rate": 0.0006749976872613166, "loss": 0.8255, "step": 8872 }, { "epoch": 0.6172736443006713, "grad_norm": 1.3203125, "learning_rate": 0.00067478459511904, "loss": 0.8896, "step": 8873 }, { "epoch": 0.6173432119378065, "grad_norm": 1.0078125, "learning_rate": 0.0006745715194895622, "loss": 0.7888, "step": 8874 }, { "epoch": 0.6174127795749418, "grad_norm": 0.8984375, "learning_rate": 0.0006743584603837027, "loss": 0.7649, "step": 8875 }, { "epoch": 0.617482347212077, "grad_norm": 1.03125, "learning_rate": 0.000674145417812279, "loss": 0.8674, "step": 8876 }, { "epoch": 0.6175519148492121, "grad_norm": 0.98828125, "learning_rate": 0.0006739323917861087, "loss": 0.7114, "step": 8877 }, { "epoch": 0.6176214824863474, "grad_norm": 1.0625, "learning_rate": 0.0006737193823160077, "loss": 0.8836, "step": 8878 }, { "epoch": 0.6176910501234826, "grad_norm": 1.1796875, "learning_rate": 0.0006735063894127924, "loss": 0.6029, "step": 8879 }, { "epoch": 0.6177606177606177, "grad_norm": 1.1328125, "learning_rate": 0.0006732934130872768, "loss": 0.7293, "step": 8880 }, { "epoch": 0.6178301853977529, "grad_norm": 1.1171875, "learning_rate": 0.0006730804533502747, "loss": 0.7565, "step": 8881 }, { "epoch": 0.6178997530348882, "grad_norm": 0.984375, "learning_rate": 0.0006728675102125997, "loss": 0.8098, "step": 8882 }, { "epoch": 0.6179693206720234, "grad_norm": 0.90625, "learning_rate": 0.0006726545836850636, "loss": 0.7885, "step": 8883 }, { "epoch": 0.6180388883091585, "grad_norm": 1.265625, "learning_rate": 0.0006724416737784777, "loss": 0.8904, "step": 8884 }, { "epoch": 0.6181084559462938, "grad_norm": 1.078125, "learning_rate": 0.0006722287805036525, "loss": 0.9974, "step": 8885 }, { "epoch": 0.618178023583429, "grad_norm": 1.3359375, "learning_rate": 0.0006720159038713981, "loss": 0.681, "step": 8886 }, { "epoch": 0.6182475912205642, "grad_norm": 1.015625, "learning_rate": 0.0006718030438925227, "loss": 0.6932, "step": 8887 }, { "epoch": 0.6183171588576994, "grad_norm": 0.93359375, "learning_rate": 0.0006715902005778343, "loss": 0.7525, "step": 8888 }, { "epoch": 0.6183867264948346, "grad_norm": 1.1953125, "learning_rate": 0.0006713773739381403, "loss": 0.6183, "step": 8889 }, { "epoch": 0.6184562941319698, "grad_norm": 1.15625, "learning_rate": 0.0006711645639842474, "loss": 0.8117, "step": 8890 }, { "epoch": 0.6185258617691051, "grad_norm": 1.1484375, "learning_rate": 0.00067095177072696, "loss": 0.736, "step": 8891 }, { "epoch": 0.6185954294062402, "grad_norm": 1.453125, "learning_rate": 0.0006707389941770829, "loss": 0.9317, "step": 8892 }, { "epoch": 0.6186649970433754, "grad_norm": 1.3203125, "learning_rate": 0.0006705262343454208, "loss": 0.8924, "step": 8893 }, { "epoch": 0.6187345646805106, "grad_norm": 1.0546875, "learning_rate": 0.0006703134912427754, "loss": 0.6364, "step": 8894 }, { "epoch": 0.6188041323176459, "grad_norm": 1.0078125, "learning_rate": 0.0006701007648799491, "loss": 0.779, "step": 8895 }, { "epoch": 0.618873699954781, "grad_norm": 0.9296875, "learning_rate": 0.0006698880552677432, "loss": 0.6551, "step": 8896 }, { "epoch": 0.6189432675919162, "grad_norm": 1.0625, "learning_rate": 0.0006696753624169582, "loss": 0.9548, "step": 8897 }, { "epoch": 0.6190128352290515, "grad_norm": 1.03125, "learning_rate": 0.0006694626863383932, "loss": 0.7592, "step": 8898 }, { "epoch": 0.6190824028661867, "grad_norm": 1.296875, "learning_rate": 0.0006692500270428467, "loss": 1.0177, "step": 8899 }, { "epoch": 0.6191519705033218, "grad_norm": 1.2265625, "learning_rate": 0.0006690373845411173, "loss": 0.8578, "step": 8900 }, { "epoch": 0.6192215381404571, "grad_norm": 1.015625, "learning_rate": 0.0006688247588440008, "loss": 0.7239, "step": 8901 }, { "epoch": 0.6192911057775923, "grad_norm": 1.046875, "learning_rate": 0.000668612149962294, "loss": 0.6681, "step": 8902 }, { "epoch": 0.6193606734147274, "grad_norm": 1.2890625, "learning_rate": 0.0006683995579067918, "loss": 0.8568, "step": 8903 }, { "epoch": 0.6194302410518627, "grad_norm": 1.046875, "learning_rate": 0.0006681869826882889, "loss": 0.6597, "step": 8904 }, { "epoch": 0.6194998086889979, "grad_norm": 0.9453125, "learning_rate": 0.0006679744243175785, "loss": 0.76, "step": 8905 }, { "epoch": 0.6195693763261331, "grad_norm": 1.328125, "learning_rate": 0.000667761882805453, "loss": 1.2157, "step": 8906 }, { "epoch": 0.6196389439632682, "grad_norm": 1.0859375, "learning_rate": 0.0006675493581627049, "loss": 0.7298, "step": 8907 }, { "epoch": 0.6197085116004035, "grad_norm": 1.296875, "learning_rate": 0.0006673368504001245, "loss": 0.8582, "step": 8908 }, { "epoch": 0.6197780792375387, "grad_norm": 1.234375, "learning_rate": 0.000667124359528502, "loss": 1.0202, "step": 8909 }, { "epoch": 0.6198476468746739, "grad_norm": 1.0234375, "learning_rate": 0.0006669118855586267, "loss": 0.8024, "step": 8910 }, { "epoch": 0.6199172145118091, "grad_norm": 1.140625, "learning_rate": 0.0006666994285012873, "loss": 0.8253, "step": 8911 }, { "epoch": 0.6199867821489443, "grad_norm": 1.3125, "learning_rate": 0.0006664869883672708, "loss": 0.8428, "step": 8912 }, { "epoch": 0.6200563497860795, "grad_norm": 0.83984375, "learning_rate": 0.0006662745651673638, "loss": 0.8329, "step": 8913 }, { "epoch": 0.6201259174232148, "grad_norm": 1.0, "learning_rate": 0.0006660621589123526, "loss": 0.7488, "step": 8914 }, { "epoch": 0.6201954850603499, "grad_norm": 1.1328125, "learning_rate": 0.0006658497696130216, "loss": 0.7186, "step": 8915 }, { "epoch": 0.6202650526974851, "grad_norm": 1.390625, "learning_rate": 0.0006656373972801548, "loss": 0.81, "step": 8916 }, { "epoch": 0.6203346203346203, "grad_norm": 0.6484375, "learning_rate": 0.000665425041924536, "loss": 0.5148, "step": 8917 }, { "epoch": 0.6204041879717556, "grad_norm": 0.9375, "learning_rate": 0.0006652127035569473, "loss": 0.8071, "step": 8918 }, { "epoch": 0.6204737556088907, "grad_norm": 1.0625, "learning_rate": 0.0006650003821881698, "loss": 0.6649, "step": 8919 }, { "epoch": 0.6205433232460259, "grad_norm": 1.328125, "learning_rate": 0.0006647880778289843, "loss": 0.9216, "step": 8920 }, { "epoch": 0.6206128908831612, "grad_norm": 1.2734375, "learning_rate": 0.0006645757904901708, "loss": 0.7633, "step": 8921 }, { "epoch": 0.6206824585202964, "grad_norm": 1.0859375, "learning_rate": 0.0006643635201825081, "loss": 0.5849, "step": 8922 }, { "epoch": 0.6207520261574315, "grad_norm": 1.0390625, "learning_rate": 0.0006641512669167737, "loss": 0.8264, "step": 8923 }, { "epoch": 0.6208215937945668, "grad_norm": 1.1875, "learning_rate": 0.0006639390307037456, "loss": 0.8133, "step": 8924 }, { "epoch": 0.620891161431702, "grad_norm": 1.046875, "learning_rate": 0.0006637268115541997, "loss": 0.6556, "step": 8925 }, { "epoch": 0.6209607290688371, "grad_norm": 1.0546875, "learning_rate": 0.0006635146094789111, "loss": 0.7816, "step": 8926 }, { "epoch": 0.6210302967059724, "grad_norm": 1.0625, "learning_rate": 0.0006633024244886546, "loss": 0.9437, "step": 8927 }, { "epoch": 0.6210998643431076, "grad_norm": 0.89453125, "learning_rate": 0.0006630902565942039, "loss": 0.6375, "step": 8928 }, { "epoch": 0.6211694319802428, "grad_norm": 1.3359375, "learning_rate": 0.000662878105806332, "loss": 0.8066, "step": 8929 }, { "epoch": 0.6212389996173779, "grad_norm": 1.296875, "learning_rate": 0.0006626659721358103, "loss": 0.7373, "step": 8930 }, { "epoch": 0.6213085672545132, "grad_norm": 1.2265625, "learning_rate": 0.0006624538555934105, "loss": 0.8738, "step": 8931 }, { "epoch": 0.6213781348916484, "grad_norm": 1.3125, "learning_rate": 0.0006622417561899028, "loss": 0.9945, "step": 8932 }, { "epoch": 0.6214477025287836, "grad_norm": 1.4375, "learning_rate": 0.0006620296739360561, "loss": 0.6965, "step": 8933 }, { "epoch": 0.6215172701659188, "grad_norm": 0.87109375, "learning_rate": 0.0006618176088426388, "loss": 0.6123, "step": 8934 }, { "epoch": 0.621586837803054, "grad_norm": 1.09375, "learning_rate": 0.0006616055609204191, "loss": 0.6154, "step": 8935 }, { "epoch": 0.6216564054401892, "grad_norm": 1.0703125, "learning_rate": 0.0006613935301801633, "loss": 0.6419, "step": 8936 }, { "epoch": 0.6217259730773245, "grad_norm": 0.85546875, "learning_rate": 0.0006611815166326373, "loss": 0.6474, "step": 8937 }, { "epoch": 0.6217955407144596, "grad_norm": 1.6796875, "learning_rate": 0.0006609695202886059, "loss": 0.8672, "step": 8938 }, { "epoch": 0.6218651083515948, "grad_norm": 0.9609375, "learning_rate": 0.0006607575411588338, "loss": 0.7448, "step": 8939 }, { "epoch": 0.6219346759887301, "grad_norm": 0.80078125, "learning_rate": 0.0006605455792540837, "loss": 0.6578, "step": 8940 }, { "epoch": 0.6220042436258653, "grad_norm": 1.140625, "learning_rate": 0.0006603336345851179, "loss": 0.9665, "step": 8941 }, { "epoch": 0.6220738112630004, "grad_norm": 1.0546875, "learning_rate": 0.0006601217071626981, "loss": 0.7588, "step": 8942 }, { "epoch": 0.6221433789001356, "grad_norm": 1.4140625, "learning_rate": 0.0006599097969975853, "loss": 0.9968, "step": 8943 }, { "epoch": 0.6222129465372709, "grad_norm": 1.28125, "learning_rate": 0.0006596979041005387, "loss": 0.8707, "step": 8944 }, { "epoch": 0.622282514174406, "grad_norm": 1.234375, "learning_rate": 0.000659486028482317, "loss": 1.1097, "step": 8945 }, { "epoch": 0.6223520818115412, "grad_norm": 0.984375, "learning_rate": 0.0006592741701536789, "loss": 0.6772, "step": 8946 }, { "epoch": 0.6224216494486765, "grad_norm": 1.296875, "learning_rate": 0.0006590623291253807, "loss": 0.8832, "step": 8947 }, { "epoch": 0.6224912170858117, "grad_norm": 1.1015625, "learning_rate": 0.0006588505054081788, "loss": 0.826, "step": 8948 }, { "epoch": 0.6225607847229468, "grad_norm": 1.0390625, "learning_rate": 0.000658638699012829, "loss": 0.6682, "step": 8949 }, { "epoch": 0.6226303523600821, "grad_norm": 1.1875, "learning_rate": 0.0006584269099500857, "loss": 0.7514, "step": 8950 }, { "epoch": 0.6226999199972173, "grad_norm": 1.0703125, "learning_rate": 0.000658215138230702, "loss": 0.7864, "step": 8951 }, { "epoch": 0.6227694876343525, "grad_norm": 0.95703125, "learning_rate": 0.0006580033838654305, "loss": 0.585, "step": 8952 }, { "epoch": 0.6228390552714878, "grad_norm": 1.203125, "learning_rate": 0.0006577916468650238, "loss": 0.9401, "step": 8953 }, { "epoch": 0.6229086229086229, "grad_norm": 1.4765625, "learning_rate": 0.0006575799272402326, "loss": 1.1381, "step": 8954 }, { "epoch": 0.6229781905457581, "grad_norm": 1.1796875, "learning_rate": 0.0006573682250018062, "loss": 0.8025, "step": 8955 }, { "epoch": 0.6230477581828933, "grad_norm": 1.3203125, "learning_rate": 0.0006571565401604948, "loss": 0.8385, "step": 8956 }, { "epoch": 0.6231173258200285, "grad_norm": 1.078125, "learning_rate": 0.0006569448727270462, "loss": 0.7887, "step": 8957 }, { "epoch": 0.6231868934571637, "grad_norm": 1.15625, "learning_rate": 0.0006567332227122078, "loss": 1.0143, "step": 8958 }, { "epoch": 0.6232564610942989, "grad_norm": 1.0, "learning_rate": 0.0006565215901267259, "loss": 0.6021, "step": 8959 }, { "epoch": 0.6233260287314342, "grad_norm": 1.1328125, "learning_rate": 0.0006563099749813466, "loss": 0.7746, "step": 8960 }, { "epoch": 0.6233955963685693, "grad_norm": 1.2734375, "learning_rate": 0.0006560983772868146, "loss": 0.9201, "step": 8961 }, { "epoch": 0.6234651640057045, "grad_norm": 0.92578125, "learning_rate": 0.0006558867970538733, "loss": 0.7459, "step": 8962 }, { "epoch": 0.6235347316428398, "grad_norm": 1.125, "learning_rate": 0.000655675234293266, "loss": 0.7632, "step": 8963 }, { "epoch": 0.623604299279975, "grad_norm": 1.5, "learning_rate": 0.0006554636890157352, "loss": 0.6866, "step": 8964 }, { "epoch": 0.6236738669171101, "grad_norm": 0.90234375, "learning_rate": 0.0006552521612320214, "loss": 0.6747, "step": 8965 }, { "epoch": 0.6237434345542454, "grad_norm": 1.0859375, "learning_rate": 0.0006550406509528649, "loss": 0.8112, "step": 8966 }, { "epoch": 0.6238130021913806, "grad_norm": 0.99609375, "learning_rate": 0.0006548291581890057, "loss": 0.8227, "step": 8967 }, { "epoch": 0.6238825698285158, "grad_norm": 1.0546875, "learning_rate": 0.0006546176829511823, "loss": 0.6668, "step": 8968 }, { "epoch": 0.6239521374656509, "grad_norm": 0.9921875, "learning_rate": 0.0006544062252501317, "loss": 0.7372, "step": 8969 }, { "epoch": 0.6240217051027862, "grad_norm": 1.1328125, "learning_rate": 0.0006541947850965911, "loss": 0.9286, "step": 8970 }, { "epoch": 0.6240912727399214, "grad_norm": 1.3125, "learning_rate": 0.0006539833625012968, "loss": 1.1129, "step": 8971 }, { "epoch": 0.6241608403770565, "grad_norm": 1.0625, "learning_rate": 0.0006537719574749828, "loss": 0.6248, "step": 8972 }, { "epoch": 0.6242304080141918, "grad_norm": 1.046875, "learning_rate": 0.0006535605700283836, "loss": 0.6675, "step": 8973 }, { "epoch": 0.624299975651327, "grad_norm": 1.328125, "learning_rate": 0.0006533492001722327, "loss": 0.8368, "step": 8974 }, { "epoch": 0.6243695432884622, "grad_norm": 1.3359375, "learning_rate": 0.0006531378479172624, "loss": 0.6613, "step": 8975 }, { "epoch": 0.6244391109255975, "grad_norm": 1.046875, "learning_rate": 0.0006529265132742035, "loss": 0.78, "step": 8976 }, { "epoch": 0.6245086785627326, "grad_norm": 1.203125, "learning_rate": 0.000652715196253787, "loss": 0.8276, "step": 8977 }, { "epoch": 0.6245782461998678, "grad_norm": 1.171875, "learning_rate": 0.0006525038968667425, "loss": 0.8103, "step": 8978 }, { "epoch": 0.6246478138370031, "grad_norm": 1.5625, "learning_rate": 0.0006522926151237984, "loss": 0.7788, "step": 8979 }, { "epoch": 0.6247173814741382, "grad_norm": 1.125, "learning_rate": 0.0006520813510356827, "loss": 0.6742, "step": 8980 }, { "epoch": 0.6247869491112734, "grad_norm": 1.2578125, "learning_rate": 0.0006518701046131226, "loss": 1.0233, "step": 8981 }, { "epoch": 0.6248565167484086, "grad_norm": 1.1171875, "learning_rate": 0.000651658875866844, "loss": 0.7299, "step": 8982 }, { "epoch": 0.6249260843855439, "grad_norm": 1.4453125, "learning_rate": 0.0006514476648075714, "loss": 0.906, "step": 8983 }, { "epoch": 0.624995652022679, "grad_norm": 1.0625, "learning_rate": 0.0006512364714460297, "loss": 0.8938, "step": 8984 }, { "epoch": 0.6250652196598142, "grad_norm": 1.1015625, "learning_rate": 0.0006510252957929426, "loss": 1.0569, "step": 8985 }, { "epoch": 0.6251347872969495, "grad_norm": 1.046875, "learning_rate": 0.0006508141378590316, "loss": 0.7806, "step": 8986 }, { "epoch": 0.6252043549340847, "grad_norm": 1.3125, "learning_rate": 0.0006506029976550184, "loss": 0.7813, "step": 8987 }, { "epoch": 0.6252739225712198, "grad_norm": 1.578125, "learning_rate": 0.0006503918751916241, "loss": 0.8226, "step": 8988 }, { "epoch": 0.6253434902083551, "grad_norm": 1.2109375, "learning_rate": 0.0006501807704795686, "loss": 0.8797, "step": 8989 }, { "epoch": 0.6254130578454903, "grad_norm": 1.0703125, "learning_rate": 0.0006499696835295698, "loss": 0.8448, "step": 8990 }, { "epoch": 0.6254826254826255, "grad_norm": 1.4453125, "learning_rate": 0.0006497586143523464, "loss": 0.8622, "step": 8991 }, { "epoch": 0.6255521931197607, "grad_norm": 1.3203125, "learning_rate": 0.0006495475629586153, "loss": 0.8413, "step": 8992 }, { "epoch": 0.6256217607568959, "grad_norm": 1.1640625, "learning_rate": 0.0006493365293590927, "loss": 0.9827, "step": 8993 }, { "epoch": 0.6256913283940311, "grad_norm": 1.2109375, "learning_rate": 0.0006491255135644931, "loss": 0.6953, "step": 8994 }, { "epoch": 0.6257608960311662, "grad_norm": 1.015625, "learning_rate": 0.0006489145155855318, "loss": 0.5838, "step": 8995 }, { "epoch": 0.6258304636683015, "grad_norm": 0.96875, "learning_rate": 0.000648703535432922, "loss": 0.783, "step": 8996 }, { "epoch": 0.6259000313054367, "grad_norm": 1.125, "learning_rate": 0.0006484925731173755, "loss": 0.8399, "step": 8997 }, { "epoch": 0.6259695989425719, "grad_norm": 1.2734375, "learning_rate": 0.0006482816286496046, "loss": 0.8525, "step": 8998 }, { "epoch": 0.6260391665797072, "grad_norm": 1.2265625, "learning_rate": 0.0006480707020403198, "loss": 0.8645, "step": 8999 }, { "epoch": 0.6261087342168423, "grad_norm": 0.828125, "learning_rate": 0.0006478597933002313, "loss": 0.7281, "step": 9000 }, { "epoch": 0.6261783018539775, "grad_norm": 0.859375, "learning_rate": 0.000647648902440047, "loss": 0.7644, "step": 9001 }, { "epoch": 0.6262478694911128, "grad_norm": 1.34375, "learning_rate": 0.0006474380294704756, "loss": 0.7728, "step": 9002 }, { "epoch": 0.626317437128248, "grad_norm": 0.94921875, "learning_rate": 0.0006472271744022243, "loss": 0.5117, "step": 9003 }, { "epoch": 0.6263870047653831, "grad_norm": 0.890625, "learning_rate": 0.0006470163372459984, "loss": 0.6666, "step": 9004 }, { "epoch": 0.6264565724025184, "grad_norm": 0.94921875, "learning_rate": 0.0006468055180125043, "loss": 0.8523, "step": 9005 }, { "epoch": 0.6265261400396536, "grad_norm": 0.90234375, "learning_rate": 0.0006465947167124455, "loss": 0.7207, "step": 9006 }, { "epoch": 0.6265957076767887, "grad_norm": 1.0625, "learning_rate": 0.000646383933356526, "loss": 0.5216, "step": 9007 }, { "epoch": 0.6266652753139239, "grad_norm": 1.1875, "learning_rate": 0.0006461731679554476, "loss": 0.8078, "step": 9008 }, { "epoch": 0.6267348429510592, "grad_norm": 1.0625, "learning_rate": 0.0006459624205199124, "loss": 0.7409, "step": 9009 }, { "epoch": 0.6268044105881944, "grad_norm": 1.21875, "learning_rate": 0.0006457516910606213, "loss": 1.0532, "step": 9010 }, { "epoch": 0.6268739782253295, "grad_norm": 1.03125, "learning_rate": 0.0006455409795882737, "loss": 0.6999, "step": 9011 }, { "epoch": 0.6269435458624648, "grad_norm": 1.5, "learning_rate": 0.0006453302861135681, "loss": 1.0903, "step": 9012 }, { "epoch": 0.6270131134996, "grad_norm": 0.9921875, "learning_rate": 0.0006451196106472031, "loss": 0.9208, "step": 9013 }, { "epoch": 0.6270826811367352, "grad_norm": 1.0234375, "learning_rate": 0.0006449089531998759, "loss": 0.5971, "step": 9014 }, { "epoch": 0.6271522487738704, "grad_norm": 1.3671875, "learning_rate": 0.0006446983137822818, "loss": 1.0601, "step": 9015 }, { "epoch": 0.6272218164110056, "grad_norm": 1.3671875, "learning_rate": 0.0006444876924051168, "loss": 0.8429, "step": 9016 }, { "epoch": 0.6272913840481408, "grad_norm": 1.3515625, "learning_rate": 0.0006442770890790749, "loss": 0.7286, "step": 9017 }, { "epoch": 0.6273609516852761, "grad_norm": 1.03125, "learning_rate": 0.0006440665038148493, "loss": 0.9192, "step": 9018 }, { "epoch": 0.6274305193224112, "grad_norm": 1.203125, "learning_rate": 0.0006438559366231325, "loss": 1.0337, "step": 9019 }, { "epoch": 0.6275000869595464, "grad_norm": 0.99609375, "learning_rate": 0.0006436453875146161, "loss": 0.8791, "step": 9020 }, { "epoch": 0.6275696545966816, "grad_norm": 1.109375, "learning_rate": 0.0006434348564999911, "loss": 0.5871, "step": 9021 }, { "epoch": 0.6276392222338169, "grad_norm": 1.6796875, "learning_rate": 0.0006432243435899465, "loss": 0.9247, "step": 9022 }, { "epoch": 0.627708789870952, "grad_norm": 1.3671875, "learning_rate": 0.0006430138487951715, "loss": 0.7651, "step": 9023 }, { "epoch": 0.6277783575080872, "grad_norm": 1.0546875, "learning_rate": 0.0006428033721263541, "loss": 0.9575, "step": 9024 }, { "epoch": 0.6278479251452225, "grad_norm": 1.28125, "learning_rate": 0.0006425929135941813, "loss": 0.7282, "step": 9025 }, { "epoch": 0.6279174927823576, "grad_norm": 1.390625, "learning_rate": 0.0006423824732093383, "loss": 1.0565, "step": 9026 }, { "epoch": 0.6279870604194928, "grad_norm": 1.0859375, "learning_rate": 0.0006421720509825111, "loss": 0.8532, "step": 9027 }, { "epoch": 0.6280566280566281, "grad_norm": 1.140625, "learning_rate": 0.0006419616469243837, "loss": 0.81, "step": 9028 }, { "epoch": 0.6281261956937633, "grad_norm": 1.234375, "learning_rate": 0.0006417512610456389, "loss": 0.9244, "step": 9029 }, { "epoch": 0.6281957633308984, "grad_norm": 1.0625, "learning_rate": 0.0006415408933569593, "loss": 0.8321, "step": 9030 }, { "epoch": 0.6282653309680337, "grad_norm": 1.125, "learning_rate": 0.0006413305438690267, "loss": 0.8649, "step": 9031 }, { "epoch": 0.6283348986051689, "grad_norm": 1.2265625, "learning_rate": 0.0006411202125925213, "loss": 1.0341, "step": 9032 }, { "epoch": 0.6284044662423041, "grad_norm": 1.390625, "learning_rate": 0.0006409098995381222, "loss": 0.9063, "step": 9033 }, { "epoch": 0.6284740338794392, "grad_norm": 1.4375, "learning_rate": 0.0006406996047165086, "loss": 0.7885, "step": 9034 }, { "epoch": 0.6285436015165745, "grad_norm": 1.5625, "learning_rate": 0.0006404893281383583, "loss": 1.0488, "step": 9035 }, { "epoch": 0.6286131691537097, "grad_norm": 1.0546875, "learning_rate": 0.0006402790698143477, "loss": 0.8029, "step": 9036 }, { "epoch": 0.6286827367908449, "grad_norm": 1.0546875, "learning_rate": 0.0006400688297551526, "loss": 0.7888, "step": 9037 }, { "epoch": 0.6287523044279801, "grad_norm": 1.3828125, "learning_rate": 0.0006398586079714485, "loss": 0.903, "step": 9038 }, { "epoch": 0.6288218720651153, "grad_norm": 1.171875, "learning_rate": 0.000639648404473909, "loss": 0.8528, "step": 9039 }, { "epoch": 0.6288914397022505, "grad_norm": 1.3203125, "learning_rate": 0.0006394382192732069, "loss": 0.8498, "step": 9040 }, { "epoch": 0.6289610073393858, "grad_norm": 1.1171875, "learning_rate": 0.0006392280523800149, "loss": 0.7965, "step": 9041 }, { "epoch": 0.6290305749765209, "grad_norm": 1.265625, "learning_rate": 0.0006390179038050041, "loss": 0.688, "step": 9042 }, { "epoch": 0.6291001426136561, "grad_norm": 0.8046875, "learning_rate": 0.0006388077735588441, "loss": 0.6369, "step": 9043 }, { "epoch": 0.6291697102507914, "grad_norm": 1.09375, "learning_rate": 0.0006385976616522054, "loss": 0.9437, "step": 9044 }, { "epoch": 0.6292392778879266, "grad_norm": 1.171875, "learning_rate": 0.0006383875680957557, "loss": 0.8621, "step": 9045 }, { "epoch": 0.6293088455250617, "grad_norm": 1.109375, "learning_rate": 0.0006381774929001628, "loss": 0.683, "step": 9046 }, { "epoch": 0.6293784131621969, "grad_norm": 0.984375, "learning_rate": 0.0006379674360760927, "loss": 0.7988, "step": 9047 }, { "epoch": 0.6294479807993322, "grad_norm": 1.046875, "learning_rate": 0.0006377573976342114, "loss": 0.7488, "step": 9048 }, { "epoch": 0.6295175484364673, "grad_norm": 1.1328125, "learning_rate": 0.0006375473775851841, "loss": 0.7922, "step": 9049 }, { "epoch": 0.6295871160736025, "grad_norm": 1.1328125, "learning_rate": 0.0006373373759396735, "loss": 0.8314, "step": 9050 }, { "epoch": 0.6296566837107378, "grad_norm": 1.0625, "learning_rate": 0.0006371273927083434, "loss": 0.765, "step": 9051 }, { "epoch": 0.629726251347873, "grad_norm": 1.3515625, "learning_rate": 0.000636917427901855, "loss": 0.9509, "step": 9052 }, { "epoch": 0.6297958189850081, "grad_norm": 1.15625, "learning_rate": 0.00063670748153087, "loss": 0.6709, "step": 9053 }, { "epoch": 0.6298653866221434, "grad_norm": 0.96484375, "learning_rate": 0.0006364975536060475, "loss": 0.8492, "step": 9054 }, { "epoch": 0.6299349542592786, "grad_norm": 1.15625, "learning_rate": 0.0006362876441380471, "loss": 0.787, "step": 9055 }, { "epoch": 0.6300045218964138, "grad_norm": 0.86328125, "learning_rate": 0.000636077753137527, "loss": 0.5293, "step": 9056 }, { "epoch": 0.630074089533549, "grad_norm": 0.98828125, "learning_rate": 0.000635867880615144, "loss": 0.4938, "step": 9057 }, { "epoch": 0.6301436571706842, "grad_norm": 1.359375, "learning_rate": 0.0006356580265815551, "loss": 0.9754, "step": 9058 }, { "epoch": 0.6302132248078194, "grad_norm": 1.125, "learning_rate": 0.000635448191047415, "loss": 0.6962, "step": 9059 }, { "epoch": 0.6302827924449546, "grad_norm": 1.171875, "learning_rate": 0.0006352383740233784, "loss": 0.5832, "step": 9060 }, { "epoch": 0.6303523600820898, "grad_norm": 1.515625, "learning_rate": 0.0006350285755200984, "loss": 0.6581, "step": 9061 }, { "epoch": 0.630421927719225, "grad_norm": 0.9140625, "learning_rate": 0.0006348187955482279, "loss": 0.7414, "step": 9062 }, { "epoch": 0.6304914953563602, "grad_norm": 0.9765625, "learning_rate": 0.0006346090341184183, "loss": 0.7638, "step": 9063 }, { "epoch": 0.6305610629934955, "grad_norm": 1.421875, "learning_rate": 0.00063439929124132, "loss": 0.9622, "step": 9064 }, { "epoch": 0.6306306306306306, "grad_norm": 1.265625, "learning_rate": 0.0006341895669275834, "loss": 0.7825, "step": 9065 }, { "epoch": 0.6307001982677658, "grad_norm": 1.0390625, "learning_rate": 0.0006339798611878565, "loss": 0.6645, "step": 9066 }, { "epoch": 0.6307697659049011, "grad_norm": 0.85546875, "learning_rate": 0.0006337701740327876, "loss": 0.5933, "step": 9067 }, { "epoch": 0.6308393335420363, "grad_norm": 1.1640625, "learning_rate": 0.000633560505473023, "loss": 0.6615, "step": 9068 }, { "epoch": 0.6309089011791714, "grad_norm": 0.86328125, "learning_rate": 0.0006333508555192089, "loss": 0.6311, "step": 9069 }, { "epoch": 0.6309784688163067, "grad_norm": 1.1015625, "learning_rate": 0.0006331412241819905, "loss": 0.7053, "step": 9070 }, { "epoch": 0.6310480364534419, "grad_norm": 1.3828125, "learning_rate": 0.0006329316114720114, "loss": 0.9294, "step": 9071 }, { "epoch": 0.631117604090577, "grad_norm": 1.3125, "learning_rate": 0.0006327220173999153, "loss": 1.0703, "step": 9072 }, { "epoch": 0.6311871717277122, "grad_norm": 0.953125, "learning_rate": 0.0006325124419763438, "loss": 0.8098, "step": 9073 }, { "epoch": 0.6312567393648475, "grad_norm": 1.1640625, "learning_rate": 0.0006323028852119383, "loss": 0.8282, "step": 9074 }, { "epoch": 0.6313263070019827, "grad_norm": 1.25, "learning_rate": 0.0006320933471173385, "loss": 0.8094, "step": 9075 }, { "epoch": 0.6313958746391178, "grad_norm": 1.1484375, "learning_rate": 0.0006318838277031845, "loss": 0.8589, "step": 9076 }, { "epoch": 0.6314654422762531, "grad_norm": 1.203125, "learning_rate": 0.0006316743269801142, "loss": 0.907, "step": 9077 }, { "epoch": 0.6315350099133883, "grad_norm": 0.8984375, "learning_rate": 0.0006314648449587649, "loss": 0.7442, "step": 9078 }, { "epoch": 0.6316045775505235, "grad_norm": 1.1015625, "learning_rate": 0.0006312553816497737, "loss": 0.8458, "step": 9079 }, { "epoch": 0.6316741451876587, "grad_norm": 1.2109375, "learning_rate": 0.0006310459370637754, "loss": 0.9374, "step": 9080 }, { "epoch": 0.6317437128247939, "grad_norm": 1.0859375, "learning_rate": 0.000630836511211405, "loss": 0.828, "step": 9081 }, { "epoch": 0.6318132804619291, "grad_norm": 1.140625, "learning_rate": 0.000630627104103295, "loss": 0.879, "step": 9082 }, { "epoch": 0.6318828480990644, "grad_norm": 0.89453125, "learning_rate": 0.0006304177157500796, "loss": 0.6049, "step": 9083 }, { "epoch": 0.6319524157361995, "grad_norm": 1.28125, "learning_rate": 0.0006302083461623896, "loss": 0.6668, "step": 9084 }, { "epoch": 0.6320219833733347, "grad_norm": 1.3125, "learning_rate": 0.0006299989953508558, "loss": 0.8314, "step": 9085 }, { "epoch": 0.6320915510104699, "grad_norm": 0.92578125, "learning_rate": 0.0006297896633261083, "loss": 0.6113, "step": 9086 }, { "epoch": 0.6321611186476052, "grad_norm": 1.1484375, "learning_rate": 0.0006295803500987755, "loss": 0.7645, "step": 9087 }, { "epoch": 0.6322306862847403, "grad_norm": 1.296875, "learning_rate": 0.0006293710556794859, "loss": 0.9322, "step": 9088 }, { "epoch": 0.6323002539218755, "grad_norm": 1.109375, "learning_rate": 0.000629161780078865, "loss": 0.8756, "step": 9089 }, { "epoch": 0.6323698215590108, "grad_norm": 0.98046875, "learning_rate": 0.0006289525233075406, "loss": 0.8356, "step": 9090 }, { "epoch": 0.632439389196146, "grad_norm": 1.328125, "learning_rate": 0.0006287432853761365, "loss": 0.6584, "step": 9091 }, { "epoch": 0.6325089568332811, "grad_norm": 1.25, "learning_rate": 0.0006285340662952775, "loss": 0.9389, "step": 9092 }, { "epoch": 0.6325785244704164, "grad_norm": 1.140625, "learning_rate": 0.0006283248660755858, "loss": 1.0067, "step": 9093 }, { "epoch": 0.6326480921075516, "grad_norm": 1.0078125, "learning_rate": 0.0006281156847276841, "loss": 0.8184, "step": 9094 }, { "epoch": 0.6327176597446867, "grad_norm": 1.2890625, "learning_rate": 0.0006279065222621936, "loss": 0.982, "step": 9095 }, { "epoch": 0.632787227381822, "grad_norm": 1.25, "learning_rate": 0.0006276973786897342, "loss": 1.0089, "step": 9096 }, { "epoch": 0.6328567950189572, "grad_norm": 0.87109375, "learning_rate": 0.0006274882540209258, "loss": 0.598, "step": 9097 }, { "epoch": 0.6329263626560924, "grad_norm": 1.140625, "learning_rate": 0.0006272791482663859, "loss": 0.6582, "step": 9098 }, { "epoch": 0.6329959302932275, "grad_norm": 1.34375, "learning_rate": 0.0006270700614367326, "loss": 1.0057, "step": 9099 }, { "epoch": 0.6330654979303628, "grad_norm": 1.34375, "learning_rate": 0.0006268609935425815, "loss": 1.0524, "step": 9100 }, { "epoch": 0.633135065567498, "grad_norm": 1.171875, "learning_rate": 0.0006266519445945484, "loss": 0.8258, "step": 9101 }, { "epoch": 0.6332046332046332, "grad_norm": 1.046875, "learning_rate": 0.0006264429146032478, "loss": 0.8512, "step": 9102 }, { "epoch": 0.6332742008417684, "grad_norm": 1.15625, "learning_rate": 0.000626233903579293, "loss": 0.8977, "step": 9103 }, { "epoch": 0.6333437684789036, "grad_norm": 1.0546875, "learning_rate": 0.000626024911533297, "loss": 0.678, "step": 9104 }, { "epoch": 0.6334133361160388, "grad_norm": 1.0234375, "learning_rate": 0.0006258159384758709, "loss": 0.6439, "step": 9105 }, { "epoch": 0.6334829037531741, "grad_norm": 1.1484375, "learning_rate": 0.0006256069844176256, "loss": 0.702, "step": 9106 }, { "epoch": 0.6335524713903092, "grad_norm": 1.0703125, "learning_rate": 0.0006253980493691698, "loss": 0.9091, "step": 9107 }, { "epoch": 0.6336220390274444, "grad_norm": 0.9453125, "learning_rate": 0.0006251891333411136, "loss": 0.9381, "step": 9108 }, { "epoch": 0.6336916066645797, "grad_norm": 1.125, "learning_rate": 0.0006249802363440638, "loss": 0.816, "step": 9109 }, { "epoch": 0.6337611743017149, "grad_norm": 1.109375, "learning_rate": 0.0006247713583886272, "loss": 0.9566, "step": 9110 }, { "epoch": 0.63383074193885, "grad_norm": 1.1953125, "learning_rate": 0.0006245624994854102, "loss": 0.8885, "step": 9111 }, { "epoch": 0.6339003095759852, "grad_norm": 0.94140625, "learning_rate": 0.0006243536596450168, "loss": 0.7782, "step": 9112 }, { "epoch": 0.6339698772131205, "grad_norm": 1.4296875, "learning_rate": 0.0006241448388780514, "loss": 0.9671, "step": 9113 }, { "epoch": 0.6340394448502557, "grad_norm": 1.0546875, "learning_rate": 0.0006239360371951161, "loss": 0.7471, "step": 9114 }, { "epoch": 0.6341090124873908, "grad_norm": 1.453125, "learning_rate": 0.0006237272546068137, "loss": 0.8383, "step": 9115 }, { "epoch": 0.6341785801245261, "grad_norm": 1.0703125, "learning_rate": 0.0006235184911237449, "loss": 0.7559, "step": 9116 }, { "epoch": 0.6342481477616613, "grad_norm": 1.4140625, "learning_rate": 0.0006233097467565092, "loss": 0.8646, "step": 9117 }, { "epoch": 0.6343177153987964, "grad_norm": 1.171875, "learning_rate": 0.0006231010215157062, "loss": 0.8285, "step": 9118 }, { "epoch": 0.6343872830359317, "grad_norm": 0.78125, "learning_rate": 0.0006228923154119334, "loss": 0.6399, "step": 9119 }, { "epoch": 0.6344568506730669, "grad_norm": 1.078125, "learning_rate": 0.0006226836284557885, "loss": 0.8933, "step": 9120 }, { "epoch": 0.6345264183102021, "grad_norm": 0.90625, "learning_rate": 0.0006224749606578662, "loss": 0.64, "step": 9121 }, { "epoch": 0.6345959859473373, "grad_norm": 1.5234375, "learning_rate": 0.0006222663120287633, "loss": 0.8596, "step": 9122 }, { "epoch": 0.6346655535844725, "grad_norm": 1.390625, "learning_rate": 0.0006220576825790729, "loss": 0.9377, "step": 9123 }, { "epoch": 0.6347351212216077, "grad_norm": 1.3984375, "learning_rate": 0.0006218490723193884, "loss": 1.0686, "step": 9124 }, { "epoch": 0.6348046888587429, "grad_norm": 0.87109375, "learning_rate": 0.0006216404812603021, "loss": 0.7806, "step": 9125 }, { "epoch": 0.6348742564958781, "grad_norm": 0.94140625, "learning_rate": 0.0006214319094124051, "loss": 0.7259, "step": 9126 }, { "epoch": 0.6349438241330133, "grad_norm": 0.9921875, "learning_rate": 0.0006212233567862875, "loss": 1.2013, "step": 9127 }, { "epoch": 0.6350133917701485, "grad_norm": 0.953125, "learning_rate": 0.0006210148233925385, "loss": 0.6091, "step": 9128 }, { "epoch": 0.6350829594072838, "grad_norm": 1.125, "learning_rate": 0.000620806309241747, "loss": 0.8251, "step": 9129 }, { "epoch": 0.6351525270444189, "grad_norm": 1.03125, "learning_rate": 0.0006205978143444996, "loss": 0.7176, "step": 9130 }, { "epoch": 0.6352220946815541, "grad_norm": 1.09375, "learning_rate": 0.0006203893387113826, "loss": 0.8539, "step": 9131 }, { "epoch": 0.6352916623186894, "grad_norm": 1.2734375, "learning_rate": 0.0006201808823529819, "loss": 0.7645, "step": 9132 }, { "epoch": 0.6353612299558246, "grad_norm": 1.234375, "learning_rate": 0.0006199724452798816, "loss": 0.8681, "step": 9133 }, { "epoch": 0.6354307975929597, "grad_norm": 1.3515625, "learning_rate": 0.000619764027502665, "loss": 0.9698, "step": 9134 }, { "epoch": 0.635500365230095, "grad_norm": 1.046875, "learning_rate": 0.0006195556290319143, "loss": 0.9378, "step": 9135 }, { "epoch": 0.6355699328672302, "grad_norm": 1.046875, "learning_rate": 0.0006193472498782116, "loss": 0.7968, "step": 9136 }, { "epoch": 0.6356395005043654, "grad_norm": 1.40625, "learning_rate": 0.0006191388900521368, "loss": 0.8968, "step": 9137 }, { "epoch": 0.6357090681415005, "grad_norm": 1.2109375, "learning_rate": 0.000618930549564269, "loss": 0.8122, "step": 9138 }, { "epoch": 0.6357786357786358, "grad_norm": 0.8671875, "learning_rate": 0.0006187222284251879, "loss": 0.7596, "step": 9139 }, { "epoch": 0.635848203415771, "grad_norm": 1.0546875, "learning_rate": 0.0006185139266454698, "loss": 0.5965, "step": 9140 }, { "epoch": 0.6359177710529061, "grad_norm": 1.3203125, "learning_rate": 0.0006183056442356918, "loss": 1.0042, "step": 9141 }, { "epoch": 0.6359873386900414, "grad_norm": 1.078125, "learning_rate": 0.0006180973812064291, "loss": 0.8028, "step": 9142 }, { "epoch": 0.6360569063271766, "grad_norm": 1.2265625, "learning_rate": 0.000617889137568257, "loss": 0.9763, "step": 9143 }, { "epoch": 0.6361264739643118, "grad_norm": 1.015625, "learning_rate": 0.000617680913331748, "loss": 0.5821, "step": 9144 }, { "epoch": 0.636196041601447, "grad_norm": 1.09375, "learning_rate": 0.0006174727085074751, "loss": 0.637, "step": 9145 }, { "epoch": 0.6362656092385822, "grad_norm": 1.2265625, "learning_rate": 0.0006172645231060103, "loss": 0.9573, "step": 9146 }, { "epoch": 0.6363351768757174, "grad_norm": 1.5234375, "learning_rate": 0.000617056357137924, "loss": 0.8373, "step": 9147 }, { "epoch": 0.6364047445128527, "grad_norm": 1.203125, "learning_rate": 0.0006168482106137854, "loss": 0.7554, "step": 9148 }, { "epoch": 0.6364743121499878, "grad_norm": 1.0078125, "learning_rate": 0.0006166400835441635, "loss": 0.6937, "step": 9149 }, { "epoch": 0.636543879787123, "grad_norm": 1.078125, "learning_rate": 0.0006164319759396261, "loss": 0.6661, "step": 9150 }, { "epoch": 0.6366134474242582, "grad_norm": 1.265625, "learning_rate": 0.0006162238878107394, "loss": 0.7754, "step": 9151 }, { "epoch": 0.6366830150613935, "grad_norm": 1.34375, "learning_rate": 0.0006160158191680691, "loss": 0.79, "step": 9152 }, { "epoch": 0.6367525826985286, "grad_norm": 0.85546875, "learning_rate": 0.0006158077700221805, "loss": 0.6313, "step": 9153 }, { "epoch": 0.6368221503356638, "grad_norm": 1.1171875, "learning_rate": 0.0006155997403836369, "loss": 0.8359, "step": 9154 }, { "epoch": 0.6368917179727991, "grad_norm": 0.9765625, "learning_rate": 0.0006153917302630007, "loss": 0.6287, "step": 9155 }, { "epoch": 0.6369612856099343, "grad_norm": 1.1328125, "learning_rate": 0.0006151837396708337, "loss": 1.0975, "step": 9156 }, { "epoch": 0.6370308532470694, "grad_norm": 1.078125, "learning_rate": 0.0006149757686176973, "loss": 0.6502, "step": 9157 }, { "epoch": 0.6371004208842047, "grad_norm": 1.203125, "learning_rate": 0.0006147678171141504, "loss": 0.8921, "step": 9158 }, { "epoch": 0.6371699885213399, "grad_norm": 1.1484375, "learning_rate": 0.0006145598851707519, "loss": 0.9472, "step": 9159 }, { "epoch": 0.637239556158475, "grad_norm": 1.140625, "learning_rate": 0.0006143519727980597, "loss": 0.8791, "step": 9160 }, { "epoch": 0.6373091237956103, "grad_norm": 1.3359375, "learning_rate": 0.0006141440800066309, "loss": 0.9028, "step": 9161 }, { "epoch": 0.6373786914327455, "grad_norm": 1.2109375, "learning_rate": 0.0006139362068070207, "loss": 0.7902, "step": 9162 }, { "epoch": 0.6374482590698807, "grad_norm": 1.078125, "learning_rate": 0.0006137283532097837, "loss": 0.7436, "step": 9163 }, { "epoch": 0.6375178267070158, "grad_norm": 1.375, "learning_rate": 0.0006135205192254742, "loss": 0.8525, "step": 9164 }, { "epoch": 0.6375873943441511, "grad_norm": 1.2578125, "learning_rate": 0.0006133127048646448, "loss": 0.8239, "step": 9165 }, { "epoch": 0.6376569619812863, "grad_norm": 1.1796875, "learning_rate": 0.0006131049101378472, "loss": 0.7516, "step": 9166 }, { "epoch": 0.6377265296184215, "grad_norm": 1.171875, "learning_rate": 0.0006128971350556319, "loss": 0.7965, "step": 9167 }, { "epoch": 0.6377960972555567, "grad_norm": 1.1640625, "learning_rate": 0.0006126893796285493, "loss": 0.7398, "step": 9168 }, { "epoch": 0.6378656648926919, "grad_norm": 1.0703125, "learning_rate": 0.0006124816438671476, "loss": 0.7929, "step": 9169 }, { "epoch": 0.6379352325298271, "grad_norm": 1.03125, "learning_rate": 0.0006122739277819747, "loss": 0.7309, "step": 9170 }, { "epoch": 0.6380048001669624, "grad_norm": 1.3203125, "learning_rate": 0.0006120662313835776, "loss": 1.0432, "step": 9171 }, { "epoch": 0.6380743678040975, "grad_norm": 1.234375, "learning_rate": 0.0006118585546825019, "loss": 0.7969, "step": 9172 }, { "epoch": 0.6381439354412327, "grad_norm": 1.1484375, "learning_rate": 0.0006116508976892925, "loss": 0.6666, "step": 9173 }, { "epoch": 0.638213503078368, "grad_norm": 0.88671875, "learning_rate": 0.0006114432604144928, "loss": 0.6517, "step": 9174 }, { "epoch": 0.6382830707155032, "grad_norm": 1.0390625, "learning_rate": 0.0006112356428686463, "loss": 0.7091, "step": 9175 }, { "epoch": 0.6383526383526383, "grad_norm": 1.2734375, "learning_rate": 0.0006110280450622943, "loss": 0.9462, "step": 9176 }, { "epoch": 0.6384222059897735, "grad_norm": 0.90625, "learning_rate": 0.0006108204670059772, "loss": 0.9103, "step": 9177 }, { "epoch": 0.6384917736269088, "grad_norm": 1.03125, "learning_rate": 0.0006106129087102354, "loss": 0.827, "step": 9178 }, { "epoch": 0.638561341264044, "grad_norm": 1.2734375, "learning_rate": 0.0006104053701856076, "loss": 0.9415, "step": 9179 }, { "epoch": 0.6386309089011791, "grad_norm": 1.1328125, "learning_rate": 0.0006101978514426312, "loss": 0.7695, "step": 9180 }, { "epoch": 0.6387004765383144, "grad_norm": 1.375, "learning_rate": 0.000609990352491843, "loss": 0.6326, "step": 9181 }, { "epoch": 0.6387700441754496, "grad_norm": 1.0234375, "learning_rate": 0.0006097828733437794, "loss": 0.6975, "step": 9182 }, { "epoch": 0.6388396118125848, "grad_norm": 1.1796875, "learning_rate": 0.0006095754140089744, "loss": 0.9786, "step": 9183 }, { "epoch": 0.63890917944972, "grad_norm": 1.125, "learning_rate": 0.0006093679744979617, "loss": 0.9109, "step": 9184 }, { "epoch": 0.6389787470868552, "grad_norm": 0.79296875, "learning_rate": 0.0006091605548212746, "loss": 0.7764, "step": 9185 }, { "epoch": 0.6390483147239904, "grad_norm": 1.140625, "learning_rate": 0.0006089531549894447, "loss": 0.6927, "step": 9186 }, { "epoch": 0.6391178823611257, "grad_norm": 1.28125, "learning_rate": 0.0006087457750130023, "loss": 0.9123, "step": 9187 }, { "epoch": 0.6391874499982608, "grad_norm": 1.0546875, "learning_rate": 0.0006085384149024773, "loss": 0.7084, "step": 9188 }, { "epoch": 0.639257017635396, "grad_norm": 1.078125, "learning_rate": 0.000608331074668399, "loss": 1.138, "step": 9189 }, { "epoch": 0.6393265852725312, "grad_norm": 1.4296875, "learning_rate": 0.000608123754321294, "loss": 0.9994, "step": 9190 }, { "epoch": 0.6393961529096664, "grad_norm": 1.3203125, "learning_rate": 0.0006079164538716897, "loss": 0.98, "step": 9191 }, { "epoch": 0.6394657205468016, "grad_norm": 1.015625, "learning_rate": 0.0006077091733301117, "loss": 0.6393, "step": 9192 }, { "epoch": 0.6395352881839368, "grad_norm": 1.4921875, "learning_rate": 0.0006075019127070849, "loss": 1.0215, "step": 9193 }, { "epoch": 0.6396048558210721, "grad_norm": 1.03125, "learning_rate": 0.0006072946720131323, "loss": 0.9159, "step": 9194 }, { "epoch": 0.6396744234582072, "grad_norm": 0.96484375, "learning_rate": 0.0006070874512587766, "loss": 0.5661, "step": 9195 }, { "epoch": 0.6397439910953424, "grad_norm": 1.3359375, "learning_rate": 0.0006068802504545402, "loss": 0.8845, "step": 9196 }, { "epoch": 0.6398135587324777, "grad_norm": 1.1484375, "learning_rate": 0.000606673069610943, "loss": 0.6966, "step": 9197 }, { "epoch": 0.6398831263696129, "grad_norm": 1.25, "learning_rate": 0.0006064659087385047, "loss": 0.691, "step": 9198 }, { "epoch": 0.639952694006748, "grad_norm": 1.2109375, "learning_rate": 0.0006062587678477441, "loss": 0.8337, "step": 9199 }, { "epoch": 0.6400222616438833, "grad_norm": 1.0859375, "learning_rate": 0.0006060516469491788, "loss": 0.7723, "step": 9200 }, { "epoch": 0.6400918292810185, "grad_norm": 1.171875, "learning_rate": 0.0006058445460533251, "loss": 0.7276, "step": 9201 }, { "epoch": 0.6401613969181537, "grad_norm": 1.515625, "learning_rate": 0.0006056374651706985, "loss": 0.9775, "step": 9202 }, { "epoch": 0.6402309645552888, "grad_norm": 1.2265625, "learning_rate": 0.0006054304043118141, "loss": 0.8342, "step": 9203 }, { "epoch": 0.6403005321924241, "grad_norm": 0.9921875, "learning_rate": 0.0006052233634871847, "loss": 0.741, "step": 9204 }, { "epoch": 0.6403700998295593, "grad_norm": 1.3828125, "learning_rate": 0.000605016342707323, "loss": 0.9725, "step": 9205 }, { "epoch": 0.6404396674666945, "grad_norm": 1.15625, "learning_rate": 0.0006048093419827405, "loss": 0.6572, "step": 9206 }, { "epoch": 0.6405092351038297, "grad_norm": 1.125, "learning_rate": 0.0006046023613239482, "loss": 0.7487, "step": 9207 }, { "epoch": 0.6405788027409649, "grad_norm": 1.4140625, "learning_rate": 0.0006043954007414548, "loss": 1.0782, "step": 9208 }, { "epoch": 0.6406483703781001, "grad_norm": 1.078125, "learning_rate": 0.0006041884602457685, "loss": 0.9721, "step": 9209 }, { "epoch": 0.6407179380152354, "grad_norm": 1.2578125, "learning_rate": 0.0006039815398473978, "loss": 1.022, "step": 9210 }, { "epoch": 0.6407875056523705, "grad_norm": 1.1015625, "learning_rate": 0.0006037746395568481, "loss": 0.9364, "step": 9211 }, { "epoch": 0.6408570732895057, "grad_norm": 1.0078125, "learning_rate": 0.0006035677593846249, "loss": 0.8639, "step": 9212 }, { "epoch": 0.640926640926641, "grad_norm": 1.28125, "learning_rate": 0.0006033608993412329, "loss": 0.8383, "step": 9213 }, { "epoch": 0.6409962085637761, "grad_norm": 1.125, "learning_rate": 0.0006031540594371755, "loss": 0.7661, "step": 9214 }, { "epoch": 0.6410657762009113, "grad_norm": 0.90234375, "learning_rate": 0.0006029472396829545, "loss": 0.514, "step": 9215 }, { "epoch": 0.6411353438380465, "grad_norm": 1.09375, "learning_rate": 0.0006027404400890711, "loss": 0.9713, "step": 9216 }, { "epoch": 0.6412049114751818, "grad_norm": 1.0859375, "learning_rate": 0.0006025336606660262, "loss": 0.6745, "step": 9217 }, { "epoch": 0.6412744791123169, "grad_norm": 1.0, "learning_rate": 0.0006023269014243186, "loss": 0.6754, "step": 9218 }, { "epoch": 0.6413440467494521, "grad_norm": 1.1171875, "learning_rate": 0.0006021201623744462, "loss": 0.7241, "step": 9219 }, { "epoch": 0.6414136143865874, "grad_norm": 1.0078125, "learning_rate": 0.0006019134435269066, "loss": 0.8975, "step": 9220 }, { "epoch": 0.6414831820237226, "grad_norm": 1.015625, "learning_rate": 0.0006017067448921962, "loss": 0.732, "step": 9221 }, { "epoch": 0.6415527496608577, "grad_norm": 0.71484375, "learning_rate": 0.0006015000664808096, "loss": 0.4636, "step": 9222 }, { "epoch": 0.641622317297993, "grad_norm": 1.140625, "learning_rate": 0.0006012934083032406, "loss": 0.7586, "step": 9223 }, { "epoch": 0.6416918849351282, "grad_norm": 1.15625, "learning_rate": 0.0006010867703699831, "loss": 0.8372, "step": 9224 }, { "epoch": 0.6417614525722634, "grad_norm": 0.9453125, "learning_rate": 0.0006008801526915288, "loss": 0.7143, "step": 9225 }, { "epoch": 0.6418310202093986, "grad_norm": 1.140625, "learning_rate": 0.0006006735552783683, "loss": 0.8462, "step": 9226 }, { "epoch": 0.6419005878465338, "grad_norm": 1.140625, "learning_rate": 0.0006004669781409922, "loss": 0.6911, "step": 9227 }, { "epoch": 0.641970155483669, "grad_norm": 1.2578125, "learning_rate": 0.0006002604212898892, "loss": 0.8371, "step": 9228 }, { "epoch": 0.6420397231208042, "grad_norm": 1.1953125, "learning_rate": 0.000600053884735547, "loss": 0.7316, "step": 9229 }, { "epoch": 0.6421092907579394, "grad_norm": 1.921875, "learning_rate": 0.0005998473684884525, "loss": 0.8465, "step": 9230 }, { "epoch": 0.6421788583950746, "grad_norm": 0.96875, "learning_rate": 0.0005996408725590918, "loss": 0.7709, "step": 9231 }, { "epoch": 0.6422484260322098, "grad_norm": 1.1484375, "learning_rate": 0.0005994343969579498, "loss": 0.5967, "step": 9232 }, { "epoch": 0.642317993669345, "grad_norm": 0.96875, "learning_rate": 0.00059922794169551, "loss": 0.9269, "step": 9233 }, { "epoch": 0.6423875613064802, "grad_norm": 1.0390625, "learning_rate": 0.0005990215067822553, "loss": 0.979, "step": 9234 }, { "epoch": 0.6424571289436154, "grad_norm": 1.2109375, "learning_rate": 0.0005988150922286676, "loss": 0.8963, "step": 9235 }, { "epoch": 0.6425266965807507, "grad_norm": 0.9296875, "learning_rate": 0.0005986086980452272, "loss": 0.7806, "step": 9236 }, { "epoch": 0.6425962642178858, "grad_norm": 1.25, "learning_rate": 0.0005984023242424138, "loss": 0.8006, "step": 9237 }, { "epoch": 0.642665831855021, "grad_norm": 1.1171875, "learning_rate": 0.0005981959708307063, "loss": 0.7691, "step": 9238 }, { "epoch": 0.6427353994921563, "grad_norm": 1.1171875, "learning_rate": 0.0005979896378205824, "loss": 0.477, "step": 9239 }, { "epoch": 0.6428049671292915, "grad_norm": 1.125, "learning_rate": 0.000597783325222518, "loss": 0.8926, "step": 9240 }, { "epoch": 0.6428745347664266, "grad_norm": 1.1015625, "learning_rate": 0.0005975770330469892, "loss": 0.6737, "step": 9241 }, { "epoch": 0.6429441024035618, "grad_norm": 1.53125, "learning_rate": 0.0005973707613044706, "loss": 0.9849, "step": 9242 }, { "epoch": 0.6430136700406971, "grad_norm": 1.359375, "learning_rate": 0.000597164510005435, "loss": 0.8287, "step": 9243 }, { "epoch": 0.6430832376778323, "grad_norm": 0.98046875, "learning_rate": 0.0005969582791603551, "loss": 0.6986, "step": 9244 }, { "epoch": 0.6431528053149674, "grad_norm": 1.3359375, "learning_rate": 0.0005967520687797023, "loss": 0.9721, "step": 9245 }, { "epoch": 0.6432223729521027, "grad_norm": 1.46875, "learning_rate": 0.0005965458788739473, "loss": 0.8994, "step": 9246 }, { "epoch": 0.6432919405892379, "grad_norm": 0.88671875, "learning_rate": 0.0005963397094535587, "loss": 0.6767, "step": 9247 }, { "epoch": 0.6433615082263731, "grad_norm": 1.1875, "learning_rate": 0.000596133560529005, "loss": 0.9858, "step": 9248 }, { "epoch": 0.6434310758635083, "grad_norm": 1.2734375, "learning_rate": 0.0005959274321107535, "loss": 0.8106, "step": 9249 }, { "epoch": 0.6435006435006435, "grad_norm": 1.3125, "learning_rate": 0.0005957213242092707, "loss": 0.7467, "step": 9250 }, { "epoch": 0.6435702111377787, "grad_norm": 1.203125, "learning_rate": 0.0005955152368350207, "loss": 0.7088, "step": 9251 }, { "epoch": 0.643639778774914, "grad_norm": 0.98046875, "learning_rate": 0.0005953091699984687, "loss": 0.7812, "step": 9252 }, { "epoch": 0.6437093464120491, "grad_norm": 0.890625, "learning_rate": 0.0005951031237100773, "loss": 0.4536, "step": 9253 }, { "epoch": 0.6437789140491843, "grad_norm": 1.359375, "learning_rate": 0.0005948970979803082, "loss": 1.0513, "step": 9254 }, { "epoch": 0.6438484816863195, "grad_norm": 1.0703125, "learning_rate": 0.0005946910928196224, "loss": 0.7437, "step": 9255 }, { "epoch": 0.6439180493234548, "grad_norm": 0.8671875, "learning_rate": 0.0005944851082384802, "loss": 0.7235, "step": 9256 }, { "epoch": 0.6439876169605899, "grad_norm": 1.0234375, "learning_rate": 0.0005942791442473405, "loss": 0.9745, "step": 9257 }, { "epoch": 0.6440571845977251, "grad_norm": 1.2734375, "learning_rate": 0.0005940732008566605, "loss": 0.753, "step": 9258 }, { "epoch": 0.6441267522348604, "grad_norm": 1.25, "learning_rate": 0.0005938672780768974, "loss": 1.0134, "step": 9259 }, { "epoch": 0.6441963198719955, "grad_norm": 1.3125, "learning_rate": 0.0005936613759185073, "loss": 0.9824, "step": 9260 }, { "epoch": 0.6442658875091307, "grad_norm": 1.234375, "learning_rate": 0.0005934554943919442, "loss": 0.8076, "step": 9261 }, { "epoch": 0.644335455146266, "grad_norm": 1.1640625, "learning_rate": 0.0005932496335076616, "loss": 0.7829, "step": 9262 }, { "epoch": 0.6444050227834012, "grad_norm": 1.2890625, "learning_rate": 0.0005930437932761126, "loss": 0.7276, "step": 9263 }, { "epoch": 0.6444745904205363, "grad_norm": 1.0859375, "learning_rate": 0.0005928379737077489, "loss": 0.7165, "step": 9264 }, { "epoch": 0.6445441580576716, "grad_norm": 1.0546875, "learning_rate": 0.0005926321748130201, "loss": 0.7109, "step": 9265 }, { "epoch": 0.6446137256948068, "grad_norm": 1.1953125, "learning_rate": 0.0005924263966023767, "loss": 0.9391, "step": 9266 }, { "epoch": 0.644683293331942, "grad_norm": 1.0078125, "learning_rate": 0.0005922206390862663, "loss": 0.8388, "step": 9267 }, { "epoch": 0.6447528609690771, "grad_norm": 1.15625, "learning_rate": 0.0005920149022751366, "loss": 0.9156, "step": 9268 }, { "epoch": 0.6448224286062124, "grad_norm": 1.125, "learning_rate": 0.0005918091861794334, "loss": 0.7867, "step": 9269 }, { "epoch": 0.6448919962433476, "grad_norm": 1.1015625, "learning_rate": 0.0005916034908096026, "loss": 0.8344, "step": 9270 }, { "epoch": 0.6449615638804828, "grad_norm": 1.1640625, "learning_rate": 0.0005913978161760883, "loss": 0.697, "step": 9271 }, { "epoch": 0.645031131517618, "grad_norm": 1.265625, "learning_rate": 0.0005911921622893331, "loss": 0.7987, "step": 9272 }, { "epoch": 0.6451006991547532, "grad_norm": 1.1171875, "learning_rate": 0.0005909865291597792, "loss": 0.7588, "step": 9273 }, { "epoch": 0.6451702667918884, "grad_norm": 0.8828125, "learning_rate": 0.0005907809167978682, "loss": 0.7214, "step": 9274 }, { "epoch": 0.6452398344290237, "grad_norm": 0.94140625, "learning_rate": 0.0005905753252140394, "loss": 0.8408, "step": 9275 }, { "epoch": 0.6453094020661588, "grad_norm": 1.078125, "learning_rate": 0.0005903697544187318, "loss": 1.061, "step": 9276 }, { "epoch": 0.645378969703294, "grad_norm": 0.9921875, "learning_rate": 0.0005901642044223834, "loss": 0.6878, "step": 9277 }, { "epoch": 0.6454485373404293, "grad_norm": 1.0546875, "learning_rate": 0.0005899586752354314, "loss": 0.7811, "step": 9278 }, { "epoch": 0.6455181049775645, "grad_norm": 1.1171875, "learning_rate": 0.0005897531668683104, "loss": 0.7057, "step": 9279 }, { "epoch": 0.6455876726146996, "grad_norm": 1.234375, "learning_rate": 0.0005895476793314563, "loss": 0.8484, "step": 9280 }, { "epoch": 0.6456572402518348, "grad_norm": 1.265625, "learning_rate": 0.0005893422126353021, "loss": 0.8589, "step": 9281 }, { "epoch": 0.6457268078889701, "grad_norm": 1.0546875, "learning_rate": 0.0005891367667902807, "loss": 0.9069, "step": 9282 }, { "epoch": 0.6457963755261052, "grad_norm": 1.65625, "learning_rate": 0.0005889313418068229, "loss": 0.9748, "step": 9283 }, { "epoch": 0.6458659431632404, "grad_norm": 1.125, "learning_rate": 0.0005887259376953597, "loss": 0.804, "step": 9284 }, { "epoch": 0.6459355108003757, "grad_norm": 1.015625, "learning_rate": 0.0005885205544663208, "loss": 0.7784, "step": 9285 }, { "epoch": 0.6460050784375109, "grad_norm": 1.0234375, "learning_rate": 0.0005883151921301337, "loss": 0.6842, "step": 9286 }, { "epoch": 0.646074646074646, "grad_norm": 1.1015625, "learning_rate": 0.0005881098506972265, "loss": 0.709, "step": 9287 }, { "epoch": 0.6461442137117813, "grad_norm": 0.9921875, "learning_rate": 0.0005879045301780247, "loss": 0.8058, "step": 9288 }, { "epoch": 0.6462137813489165, "grad_norm": 1.015625, "learning_rate": 0.000587699230582954, "loss": 0.6286, "step": 9289 }, { "epoch": 0.6462833489860517, "grad_norm": 1.1640625, "learning_rate": 0.0005874939519224378, "loss": 1.0014, "step": 9290 }, { "epoch": 0.646352916623187, "grad_norm": 1.2421875, "learning_rate": 0.0005872886942068999, "loss": 0.8455, "step": 9291 }, { "epoch": 0.6464224842603221, "grad_norm": 1.0859375, "learning_rate": 0.0005870834574467621, "loss": 0.7493, "step": 9292 }, { "epoch": 0.6464920518974573, "grad_norm": 2.328125, "learning_rate": 0.0005868782416524446, "loss": 0.8493, "step": 9293 }, { "epoch": 0.6465616195345925, "grad_norm": 1.0546875, "learning_rate": 0.0005866730468343678, "loss": 0.5316, "step": 9294 }, { "epoch": 0.6466311871717277, "grad_norm": 1.0078125, "learning_rate": 0.0005864678730029503, "loss": 0.6703, "step": 9295 }, { "epoch": 0.6467007548088629, "grad_norm": 1.171875, "learning_rate": 0.0005862627201686102, "loss": 1.0093, "step": 9296 }, { "epoch": 0.6467703224459981, "grad_norm": 0.9609375, "learning_rate": 0.0005860575883417634, "loss": 0.7401, "step": 9297 }, { "epoch": 0.6468398900831334, "grad_norm": 1.0625, "learning_rate": 0.000585852477532826, "loss": 0.6057, "step": 9298 }, { "epoch": 0.6469094577202685, "grad_norm": 0.9921875, "learning_rate": 0.0005856473877522126, "loss": 0.7363, "step": 9299 }, { "epoch": 0.6469790253574037, "grad_norm": 1.09375, "learning_rate": 0.0005854423190103357, "loss": 0.7201, "step": 9300 }, { "epoch": 0.647048592994539, "grad_norm": 1.1015625, "learning_rate": 0.0005852372713176088, "loss": 0.6854, "step": 9301 }, { "epoch": 0.6471181606316742, "grad_norm": 1.015625, "learning_rate": 0.0005850322446844427, "loss": 0.8433, "step": 9302 }, { "epoch": 0.6471877282688093, "grad_norm": 0.96484375, "learning_rate": 0.0005848272391212477, "loss": 0.6544, "step": 9303 }, { "epoch": 0.6472572959059446, "grad_norm": 1.2421875, "learning_rate": 0.0005846222546384325, "loss": 1.06, "step": 9304 }, { "epoch": 0.6473268635430798, "grad_norm": 0.921875, "learning_rate": 0.0005844172912464057, "loss": 0.6242, "step": 9305 }, { "epoch": 0.647396431180215, "grad_norm": 1.1484375, "learning_rate": 0.0005842123489555744, "loss": 0.7195, "step": 9306 }, { "epoch": 0.6474659988173501, "grad_norm": 1.046875, "learning_rate": 0.0005840074277763437, "loss": 0.7005, "step": 9307 }, { "epoch": 0.6475355664544854, "grad_norm": 1.1640625, "learning_rate": 0.0005838025277191197, "loss": 0.7961, "step": 9308 }, { "epoch": 0.6476051340916206, "grad_norm": 0.8828125, "learning_rate": 0.0005835976487943055, "loss": 0.9478, "step": 9309 }, { "epoch": 0.6476747017287557, "grad_norm": 0.96875, "learning_rate": 0.0005833927910123036, "loss": 0.6468, "step": 9310 }, { "epoch": 0.647744269365891, "grad_norm": 1.109375, "learning_rate": 0.0005831879543835157, "loss": 0.595, "step": 9311 }, { "epoch": 0.6478138370030262, "grad_norm": 1.5078125, "learning_rate": 0.0005829831389183431, "loss": 1.113, "step": 9312 }, { "epoch": 0.6478834046401614, "grad_norm": 1.125, "learning_rate": 0.0005827783446271848, "loss": 0.9451, "step": 9313 }, { "epoch": 0.6479529722772966, "grad_norm": 1.21875, "learning_rate": 0.0005825735715204388, "loss": 1.0168, "step": 9314 }, { "epoch": 0.6480225399144318, "grad_norm": 0.98046875, "learning_rate": 0.0005823688196085028, "loss": 0.735, "step": 9315 }, { "epoch": 0.648092107551567, "grad_norm": 0.9375, "learning_rate": 0.0005821640889017737, "loss": 0.6886, "step": 9316 }, { "epoch": 0.6481616751887023, "grad_norm": 1.4140625, "learning_rate": 0.000581959379410646, "loss": 0.7337, "step": 9317 }, { "epoch": 0.6482312428258374, "grad_norm": 0.8984375, "learning_rate": 0.0005817546911455134, "loss": 0.7089, "step": 9318 }, { "epoch": 0.6483008104629726, "grad_norm": 1.015625, "learning_rate": 0.0005815500241167699, "loss": 0.7163, "step": 9319 }, { "epoch": 0.6483703781001078, "grad_norm": 1.328125, "learning_rate": 0.0005813453783348069, "loss": 1.0089, "step": 9320 }, { "epoch": 0.6484399457372431, "grad_norm": 0.9609375, "learning_rate": 0.0005811407538100151, "loss": 0.6632, "step": 9321 }, { "epoch": 0.6485095133743782, "grad_norm": 1.0625, "learning_rate": 0.0005809361505527852, "loss": 0.6799, "step": 9322 }, { "epoch": 0.6485790810115134, "grad_norm": 1.015625, "learning_rate": 0.0005807315685735052, "loss": 0.7795, "step": 9323 }, { "epoch": 0.6486486486486487, "grad_norm": 1.03125, "learning_rate": 0.0005805270078825626, "loss": 0.808, "step": 9324 }, { "epoch": 0.6487182162857839, "grad_norm": 1.046875, "learning_rate": 0.0005803224684903442, "loss": 0.793, "step": 9325 }, { "epoch": 0.648787783922919, "grad_norm": 1.109375, "learning_rate": 0.0005801179504072359, "loss": 0.8611, "step": 9326 }, { "epoch": 0.6488573515600543, "grad_norm": 1.3203125, "learning_rate": 0.0005799134536436217, "loss": 0.9363, "step": 9327 }, { "epoch": 0.6489269191971895, "grad_norm": 1.046875, "learning_rate": 0.0005797089782098846, "loss": 0.7961, "step": 9328 }, { "epoch": 0.6489964868343246, "grad_norm": 0.97265625, "learning_rate": 0.0005795045241164072, "loss": 0.648, "step": 9329 }, { "epoch": 0.6490660544714599, "grad_norm": 1.3828125, "learning_rate": 0.0005793000913735709, "loss": 0.9337, "step": 9330 }, { "epoch": 0.6491356221085951, "grad_norm": 1.1640625, "learning_rate": 0.0005790956799917555, "loss": 0.8339, "step": 9331 }, { "epoch": 0.6492051897457303, "grad_norm": 1.21875, "learning_rate": 0.0005788912899813395, "loss": 0.809, "step": 9332 }, { "epoch": 0.6492747573828654, "grad_norm": 1.1875, "learning_rate": 0.0005786869213527013, "loss": 0.8671, "step": 9333 }, { "epoch": 0.6493443250200007, "grad_norm": 1.15625, "learning_rate": 0.0005784825741162181, "loss": 0.8602, "step": 9334 }, { "epoch": 0.6494138926571359, "grad_norm": 1.390625, "learning_rate": 0.0005782782482822653, "loss": 0.6793, "step": 9335 }, { "epoch": 0.6494834602942711, "grad_norm": 1.5546875, "learning_rate": 0.0005780739438612169, "loss": 0.775, "step": 9336 }, { "epoch": 0.6495530279314063, "grad_norm": 1.453125, "learning_rate": 0.0005778696608634473, "loss": 0.7829, "step": 9337 }, { "epoch": 0.6496225955685415, "grad_norm": 1.1484375, "learning_rate": 0.0005776653992993282, "loss": 0.6999, "step": 9338 }, { "epoch": 0.6496921632056767, "grad_norm": 1.0078125, "learning_rate": 0.0005774611591792314, "loss": 0.7163, "step": 9339 }, { "epoch": 0.649761730842812, "grad_norm": 1.0234375, "learning_rate": 0.0005772569405135277, "loss": 0.6897, "step": 9340 }, { "epoch": 0.6498312984799471, "grad_norm": 1.2265625, "learning_rate": 0.0005770527433125857, "loss": 0.8467, "step": 9341 }, { "epoch": 0.6499008661170823, "grad_norm": 1.1171875, "learning_rate": 0.0005768485675867732, "loss": 0.8483, "step": 9342 }, { "epoch": 0.6499704337542176, "grad_norm": 1.0546875, "learning_rate": 0.0005766444133464577, "loss": 0.8846, "step": 9343 }, { "epoch": 0.6500400013913528, "grad_norm": 1.171875, "learning_rate": 0.0005764402806020053, "loss": 0.7636, "step": 9344 }, { "epoch": 0.6501095690284879, "grad_norm": 1.0546875, "learning_rate": 0.0005762361693637805, "loss": 0.8441, "step": 9345 }, { "epoch": 0.6501791366656231, "grad_norm": 1.0, "learning_rate": 0.0005760320796421468, "loss": 0.7062, "step": 9346 }, { "epoch": 0.6502487043027584, "grad_norm": 1.015625, "learning_rate": 0.0005758280114474671, "loss": 0.7896, "step": 9347 }, { "epoch": 0.6503182719398936, "grad_norm": 1.0703125, "learning_rate": 0.0005756239647901033, "loss": 0.8114, "step": 9348 }, { "epoch": 0.6503878395770287, "grad_norm": 1.0546875, "learning_rate": 0.0005754199396804157, "loss": 0.7884, "step": 9349 }, { "epoch": 0.650457407214164, "grad_norm": 1.2734375, "learning_rate": 0.0005752159361287631, "loss": 1.0018, "step": 9350 }, { "epoch": 0.6505269748512992, "grad_norm": 1.234375, "learning_rate": 0.0005750119541455045, "loss": 0.8045, "step": 9351 }, { "epoch": 0.6505965424884343, "grad_norm": 1.015625, "learning_rate": 0.0005748079937409965, "loss": 0.8519, "step": 9352 }, { "epoch": 0.6506661101255696, "grad_norm": 1.234375, "learning_rate": 0.0005746040549255955, "loss": 0.9646, "step": 9353 }, { "epoch": 0.6507356777627048, "grad_norm": 1.234375, "learning_rate": 0.0005744001377096566, "loss": 0.8221, "step": 9354 }, { "epoch": 0.65080524539984, "grad_norm": 1.1953125, "learning_rate": 0.0005741962421035337, "loss": 1.0693, "step": 9355 }, { "epoch": 0.6508748130369753, "grad_norm": 1.4296875, "learning_rate": 0.0005739923681175789, "loss": 0.7727, "step": 9356 }, { "epoch": 0.6509443806741104, "grad_norm": 1.0546875, "learning_rate": 0.0005737885157621446, "loss": 0.6614, "step": 9357 }, { "epoch": 0.6510139483112456, "grad_norm": 1.25, "learning_rate": 0.0005735846850475814, "loss": 0.8921, "step": 9358 }, { "epoch": 0.6510835159483808, "grad_norm": 1.5234375, "learning_rate": 0.0005733808759842387, "loss": 0.9109, "step": 9359 }, { "epoch": 0.651153083585516, "grad_norm": 1.34375, "learning_rate": 0.0005731770885824643, "loss": 0.916, "step": 9360 }, { "epoch": 0.6512226512226512, "grad_norm": 1.1328125, "learning_rate": 0.0005729733228526061, "loss": 1.0362, "step": 9361 }, { "epoch": 0.6512922188597864, "grad_norm": 1.21875, "learning_rate": 0.0005727695788050106, "loss": 0.8676, "step": 9362 }, { "epoch": 0.6513617864969217, "grad_norm": 1.09375, "learning_rate": 0.0005725658564500225, "loss": 0.7026, "step": 9363 }, { "epoch": 0.6514313541340568, "grad_norm": 1.1484375, "learning_rate": 0.0005723621557979854, "loss": 0.8282, "step": 9364 }, { "epoch": 0.651500921771192, "grad_norm": 1.2890625, "learning_rate": 0.0005721584768592425, "loss": 0.9341, "step": 9365 }, { "epoch": 0.6515704894083273, "grad_norm": 1.5234375, "learning_rate": 0.0005719548196441359, "loss": 0.9879, "step": 9366 }, { "epoch": 0.6516400570454625, "grad_norm": 1.1328125, "learning_rate": 0.0005717511841630058, "loss": 0.6812, "step": 9367 }, { "epoch": 0.6517096246825976, "grad_norm": 1.0546875, "learning_rate": 0.0005715475704261925, "loss": 0.6552, "step": 9368 }, { "epoch": 0.6517791923197329, "grad_norm": 1.0234375, "learning_rate": 0.0005713439784440341, "loss": 0.6276, "step": 9369 }, { "epoch": 0.6518487599568681, "grad_norm": 1.3125, "learning_rate": 0.0005711404082268673, "loss": 0.856, "step": 9370 }, { "epoch": 0.6519183275940033, "grad_norm": 0.94921875, "learning_rate": 0.0005709368597850291, "loss": 0.9023, "step": 9371 }, { "epoch": 0.6519878952311384, "grad_norm": 1.0234375, "learning_rate": 0.0005707333331288548, "loss": 1.0215, "step": 9372 }, { "epoch": 0.6520574628682737, "grad_norm": 1.3203125, "learning_rate": 0.0005705298282686782, "loss": 0.8265, "step": 9373 }, { "epoch": 0.6521270305054089, "grad_norm": 1.0703125, "learning_rate": 0.0005703263452148319, "loss": 0.772, "step": 9374 }, { "epoch": 0.652196598142544, "grad_norm": 1.1484375, "learning_rate": 0.000570122883977648, "loss": 0.7103, "step": 9375 }, { "epoch": 0.6522661657796793, "grad_norm": 1.1640625, "learning_rate": 0.0005699194445674577, "loss": 0.9021, "step": 9376 }, { "epoch": 0.6523357334168145, "grad_norm": 1.1328125, "learning_rate": 0.0005697160269945902, "loss": 0.971, "step": 9377 }, { "epoch": 0.6524053010539497, "grad_norm": 1.03125, "learning_rate": 0.0005695126312693738, "loss": 0.8955, "step": 9378 }, { "epoch": 0.652474868691085, "grad_norm": 0.9375, "learning_rate": 0.0005693092574021361, "loss": 0.8563, "step": 9379 }, { "epoch": 0.6525444363282201, "grad_norm": 1.1640625, "learning_rate": 0.0005691059054032039, "loss": 0.721, "step": 9380 }, { "epoch": 0.6526140039653553, "grad_norm": 1.34375, "learning_rate": 0.0005689025752829014, "loss": 1.0648, "step": 9381 }, { "epoch": 0.6526835716024906, "grad_norm": 0.984375, "learning_rate": 0.0005686992670515538, "loss": 0.7004, "step": 9382 }, { "epoch": 0.6527531392396257, "grad_norm": 0.96875, "learning_rate": 0.0005684959807194835, "loss": 0.8682, "step": 9383 }, { "epoch": 0.6528227068767609, "grad_norm": 1.2734375, "learning_rate": 0.0005682927162970119, "loss": 1.0182, "step": 9384 }, { "epoch": 0.6528922745138961, "grad_norm": 1.1953125, "learning_rate": 0.0005680894737944602, "loss": 0.9278, "step": 9385 }, { "epoch": 0.6529618421510314, "grad_norm": 1.0546875, "learning_rate": 0.0005678862532221485, "loss": 0.6509, "step": 9386 }, { "epoch": 0.6530314097881665, "grad_norm": 1.2421875, "learning_rate": 0.0005676830545903948, "loss": 0.7036, "step": 9387 }, { "epoch": 0.6531009774253017, "grad_norm": 0.88671875, "learning_rate": 0.0005674798779095161, "loss": 0.6861, "step": 9388 }, { "epoch": 0.653170545062437, "grad_norm": 1.046875, "learning_rate": 0.0005672767231898292, "loss": 0.7598, "step": 9389 }, { "epoch": 0.6532401126995722, "grad_norm": 1.203125, "learning_rate": 0.0005670735904416495, "loss": 0.7604, "step": 9390 }, { "epoch": 0.6533096803367073, "grad_norm": 1.0234375, "learning_rate": 0.0005668704796752909, "loss": 0.7848, "step": 9391 }, { "epoch": 0.6533792479738426, "grad_norm": 1.71875, "learning_rate": 0.0005666673909010658, "loss": 0.9454, "step": 9392 }, { "epoch": 0.6534488156109778, "grad_norm": 1.3203125, "learning_rate": 0.0005664643241292864, "loss": 0.9623, "step": 9393 }, { "epoch": 0.653518383248113, "grad_norm": 1.2421875, "learning_rate": 0.0005662612793702639, "loss": 0.8303, "step": 9394 }, { "epoch": 0.6535879508852482, "grad_norm": 1.140625, "learning_rate": 0.0005660582566343068, "loss": 0.7962, "step": 9395 }, { "epoch": 0.6536575185223834, "grad_norm": 1.359375, "learning_rate": 0.0005658552559317248, "loss": 0.9712, "step": 9396 }, { "epoch": 0.6537270861595186, "grad_norm": 1.1328125, "learning_rate": 0.0005656522772728243, "loss": 0.9124, "step": 9397 }, { "epoch": 0.6537966537966537, "grad_norm": 1.078125, "learning_rate": 0.0005654493206679121, "loss": 0.968, "step": 9398 }, { "epoch": 0.653866221433789, "grad_norm": 1.0234375, "learning_rate": 0.0005652463861272928, "loss": 0.6466, "step": 9399 }, { "epoch": 0.6539357890709242, "grad_norm": 1.078125, "learning_rate": 0.0005650434736612711, "loss": 0.8994, "step": 9400 }, { "epoch": 0.6540053567080594, "grad_norm": 0.91015625, "learning_rate": 0.0005648405832801495, "loss": 0.6486, "step": 9401 }, { "epoch": 0.6540749243451947, "grad_norm": 1.421875, "learning_rate": 0.0005646377149942292, "loss": 1.1225, "step": 9402 }, { "epoch": 0.6541444919823298, "grad_norm": 1.046875, "learning_rate": 0.0005644348688138114, "loss": 0.7517, "step": 9403 }, { "epoch": 0.654214059619465, "grad_norm": 0.9765625, "learning_rate": 0.000564232044749196, "loss": 0.7078, "step": 9404 }, { "epoch": 0.6542836272566003, "grad_norm": 1.6484375, "learning_rate": 0.000564029242810681, "loss": 0.8726, "step": 9405 }, { "epoch": 0.6543531948937354, "grad_norm": 0.92578125, "learning_rate": 0.000563826463008563, "loss": 0.7171, "step": 9406 }, { "epoch": 0.6544227625308706, "grad_norm": 0.87890625, "learning_rate": 0.0005636237053531388, "loss": 0.5968, "step": 9407 }, { "epoch": 0.6544923301680059, "grad_norm": 0.96484375, "learning_rate": 0.0005634209698547038, "loss": 0.7417, "step": 9408 }, { "epoch": 0.6545618978051411, "grad_norm": 1.234375, "learning_rate": 0.0005632182565235514, "loss": 0.8136, "step": 9409 }, { "epoch": 0.6546314654422762, "grad_norm": 0.95703125, "learning_rate": 0.000563015565369974, "loss": 0.7378, "step": 9410 }, { "epoch": 0.6547010330794114, "grad_norm": 1.59375, "learning_rate": 0.0005628128964042636, "loss": 0.919, "step": 9411 }, { "epoch": 0.6547706007165467, "grad_norm": 1.2578125, "learning_rate": 0.0005626102496367111, "loss": 0.9547, "step": 9412 }, { "epoch": 0.6548401683536819, "grad_norm": 0.9765625, "learning_rate": 0.0005624076250776052, "loss": 0.7509, "step": 9413 }, { "epoch": 0.654909735990817, "grad_norm": 1.109375, "learning_rate": 0.0005622050227372348, "loss": 0.7757, "step": 9414 }, { "epoch": 0.6549793036279523, "grad_norm": 1.046875, "learning_rate": 0.0005620024426258867, "loss": 0.6383, "step": 9415 }, { "epoch": 0.6550488712650875, "grad_norm": 1.0625, "learning_rate": 0.0005617998847538466, "loss": 0.648, "step": 9416 }, { "epoch": 0.6551184389022227, "grad_norm": 1.25, "learning_rate": 0.0005615973491313996, "loss": 0.723, "step": 9417 }, { "epoch": 0.6551880065393579, "grad_norm": 1.1796875, "learning_rate": 0.0005613948357688299, "loss": 0.7752, "step": 9418 }, { "epoch": 0.6552575741764931, "grad_norm": 0.96875, "learning_rate": 0.0005611923446764196, "loss": 0.8143, "step": 9419 }, { "epoch": 0.6553271418136283, "grad_norm": 1.1953125, "learning_rate": 0.00056098987586445, "loss": 0.8424, "step": 9420 }, { "epoch": 0.6553967094507636, "grad_norm": 1.3203125, "learning_rate": 0.0005607874293432017, "loss": 0.987, "step": 9421 }, { "epoch": 0.6554662770878987, "grad_norm": 0.87890625, "learning_rate": 0.0005605850051229544, "loss": 0.5593, "step": 9422 }, { "epoch": 0.6555358447250339, "grad_norm": 1.140625, "learning_rate": 0.0005603826032139856, "loss": 0.7599, "step": 9423 }, { "epoch": 0.6556054123621691, "grad_norm": 1.21875, "learning_rate": 0.0005601802236265721, "loss": 0.8143, "step": 9424 }, { "epoch": 0.6556749799993044, "grad_norm": 1.171875, "learning_rate": 0.0005599778663709898, "loss": 0.5966, "step": 9425 }, { "epoch": 0.6557445476364395, "grad_norm": 1.2265625, "learning_rate": 0.0005597755314575142, "loss": 0.8998, "step": 9426 }, { "epoch": 0.6558141152735747, "grad_norm": 1.21875, "learning_rate": 0.0005595732188964177, "loss": 1.1193, "step": 9427 }, { "epoch": 0.65588368291071, "grad_norm": 1.5625, "learning_rate": 0.0005593709286979736, "loss": 0.8141, "step": 9428 }, { "epoch": 0.6559532505478451, "grad_norm": 1.1015625, "learning_rate": 0.0005591686608724524, "loss": 0.7236, "step": 9429 }, { "epoch": 0.6560228181849803, "grad_norm": 0.98046875, "learning_rate": 0.000558966415430125, "loss": 0.8101, "step": 9430 }, { "epoch": 0.6560923858221156, "grad_norm": 1.046875, "learning_rate": 0.0005587641923812599, "loss": 0.9927, "step": 9431 }, { "epoch": 0.6561619534592508, "grad_norm": 1.03125, "learning_rate": 0.0005585619917361254, "loss": 0.7156, "step": 9432 }, { "epoch": 0.6562315210963859, "grad_norm": 1.328125, "learning_rate": 0.0005583598135049879, "loss": 0.9143, "step": 9433 }, { "epoch": 0.6563010887335212, "grad_norm": 1.171875, "learning_rate": 0.0005581576576981125, "loss": 0.8323, "step": 9434 }, { "epoch": 0.6563706563706564, "grad_norm": 1.2421875, "learning_rate": 0.0005579555243257644, "loss": 0.8342, "step": 9435 }, { "epoch": 0.6564402240077916, "grad_norm": 1.125, "learning_rate": 0.0005577534133982071, "loss": 0.8752, "step": 9436 }, { "epoch": 0.6565097916449267, "grad_norm": 1.2109375, "learning_rate": 0.0005575513249257022, "loss": 1.2009, "step": 9437 }, { "epoch": 0.656579359282062, "grad_norm": 1.1015625, "learning_rate": 0.0005573492589185107, "loss": 0.8155, "step": 9438 }, { "epoch": 0.6566489269191972, "grad_norm": 1.1875, "learning_rate": 0.0005571472153868926, "loss": 1.0064, "step": 9439 }, { "epoch": 0.6567184945563324, "grad_norm": 1.0, "learning_rate": 0.0005569451943411072, "loss": 0.7553, "step": 9440 }, { "epoch": 0.6567880621934676, "grad_norm": 0.8671875, "learning_rate": 0.0005567431957914114, "loss": 0.7241, "step": 9441 }, { "epoch": 0.6568576298306028, "grad_norm": 1.015625, "learning_rate": 0.0005565412197480621, "loss": 0.8892, "step": 9442 }, { "epoch": 0.656927197467738, "grad_norm": 1.53125, "learning_rate": 0.0005563392662213143, "loss": 0.9475, "step": 9443 }, { "epoch": 0.6569967651048733, "grad_norm": 1.09375, "learning_rate": 0.0005561373352214225, "loss": 0.8963, "step": 9444 }, { "epoch": 0.6570663327420084, "grad_norm": 1.1875, "learning_rate": 0.0005559354267586394, "loss": 0.9154, "step": 9445 }, { "epoch": 0.6571359003791436, "grad_norm": 1.046875, "learning_rate": 0.0005557335408432174, "loss": 0.7852, "step": 9446 }, { "epoch": 0.6572054680162789, "grad_norm": 1.140625, "learning_rate": 0.0005555316774854068, "loss": 0.8167, "step": 9447 }, { "epoch": 0.657275035653414, "grad_norm": 1.0546875, "learning_rate": 0.0005553298366954566, "loss": 0.7987, "step": 9448 }, { "epoch": 0.6573446032905492, "grad_norm": 0.984375, "learning_rate": 0.000555128018483617, "loss": 0.8527, "step": 9449 }, { "epoch": 0.6574141709276844, "grad_norm": 1.1953125, "learning_rate": 0.000554926222860134, "loss": 0.7769, "step": 9450 }, { "epoch": 0.6574837385648197, "grad_norm": 1.0234375, "learning_rate": 0.0005547244498352542, "loss": 0.7784, "step": 9451 }, { "epoch": 0.6575533062019548, "grad_norm": 1.265625, "learning_rate": 0.0005545226994192221, "loss": 0.8866, "step": 9452 }, { "epoch": 0.65762287383909, "grad_norm": 1.09375, "learning_rate": 0.0005543209716222819, "loss": 0.7285, "step": 9453 }, { "epoch": 0.6576924414762253, "grad_norm": 1.203125, "learning_rate": 0.0005541192664546768, "loss": 0.8682, "step": 9454 }, { "epoch": 0.6577620091133605, "grad_norm": 0.9609375, "learning_rate": 0.0005539175839266475, "loss": 0.9855, "step": 9455 }, { "epoch": 0.6578315767504956, "grad_norm": 1.3203125, "learning_rate": 0.0005537159240484353, "loss": 1.0906, "step": 9456 }, { "epoch": 0.6579011443876309, "grad_norm": 1.109375, "learning_rate": 0.0005535142868302787, "loss": 0.7366, "step": 9457 }, { "epoch": 0.6579707120247661, "grad_norm": 0.921875, "learning_rate": 0.0005533126722824164, "loss": 0.7251, "step": 9458 }, { "epoch": 0.6580402796619013, "grad_norm": 1.0859375, "learning_rate": 0.000553111080415085, "loss": 0.6313, "step": 9459 }, { "epoch": 0.6581098472990365, "grad_norm": 1.1953125, "learning_rate": 0.0005529095112385207, "loss": 0.8287, "step": 9460 }, { "epoch": 0.6581794149361717, "grad_norm": 1.1015625, "learning_rate": 0.0005527079647629578, "loss": 0.9603, "step": 9461 }, { "epoch": 0.6582489825733069, "grad_norm": 1.078125, "learning_rate": 0.0005525064409986292, "loss": 0.7685, "step": 9462 }, { "epoch": 0.658318550210442, "grad_norm": 0.99609375, "learning_rate": 0.0005523049399557689, "loss": 0.7197, "step": 9463 }, { "epoch": 0.6583881178475773, "grad_norm": 1.203125, "learning_rate": 0.0005521034616446071, "loss": 0.7905, "step": 9464 }, { "epoch": 0.6584576854847125, "grad_norm": 0.984375, "learning_rate": 0.0005519020060753739, "loss": 0.9, "step": 9465 }, { "epoch": 0.6585272531218477, "grad_norm": 1.0234375, "learning_rate": 0.0005517005732582981, "loss": 0.7885, "step": 9466 }, { "epoch": 0.658596820758983, "grad_norm": 1.3046875, "learning_rate": 0.0005514991632036073, "loss": 1.0088, "step": 9467 }, { "epoch": 0.6586663883961181, "grad_norm": 1.2578125, "learning_rate": 0.0005512977759215289, "loss": 0.738, "step": 9468 }, { "epoch": 0.6587359560332533, "grad_norm": 1.0625, "learning_rate": 0.0005510964114222873, "loss": 0.8165, "step": 9469 }, { "epoch": 0.6588055236703886, "grad_norm": 0.94921875, "learning_rate": 0.0005508950697161079, "loss": 0.7834, "step": 9470 }, { "epoch": 0.6588750913075238, "grad_norm": 1.0234375, "learning_rate": 0.0005506937508132127, "loss": 0.7316, "step": 9471 }, { "epoch": 0.6589446589446589, "grad_norm": 1.1171875, "learning_rate": 0.0005504924547238245, "loss": 0.6777, "step": 9472 }, { "epoch": 0.6590142265817942, "grad_norm": 1.2265625, "learning_rate": 0.0005502911814581634, "loss": 0.8826, "step": 9473 }, { "epoch": 0.6590837942189294, "grad_norm": 0.8671875, "learning_rate": 0.00055008993102645, "loss": 0.6996, "step": 9474 }, { "epoch": 0.6591533618560645, "grad_norm": 0.91796875, "learning_rate": 0.0005498887034389015, "loss": 0.7108, "step": 9475 }, { "epoch": 0.6592229294931997, "grad_norm": 0.9296875, "learning_rate": 0.0005496874987057361, "loss": 0.6236, "step": 9476 }, { "epoch": 0.659292497130335, "grad_norm": 1.0, "learning_rate": 0.0005494863168371701, "loss": 0.7545, "step": 9477 }, { "epoch": 0.6593620647674702, "grad_norm": 0.87890625, "learning_rate": 0.0005492851578434182, "loss": 0.6097, "step": 9478 }, { "epoch": 0.6594316324046053, "grad_norm": 1.140625, "learning_rate": 0.0005490840217346942, "loss": 0.665, "step": 9479 }, { "epoch": 0.6595012000417406, "grad_norm": 1.1328125, "learning_rate": 0.00054888290852121, "loss": 0.8723, "step": 9480 }, { "epoch": 0.6595707676788758, "grad_norm": 1.0859375, "learning_rate": 0.0005486818182131785, "loss": 0.8145, "step": 9481 }, { "epoch": 0.659640335316011, "grad_norm": 1.0546875, "learning_rate": 0.0005484807508208098, "loss": 0.7581, "step": 9482 }, { "epoch": 0.6597099029531462, "grad_norm": 1.2734375, "learning_rate": 0.0005482797063543125, "loss": 0.6727, "step": 9483 }, { "epoch": 0.6597794705902814, "grad_norm": 0.89453125, "learning_rate": 0.0005480786848238946, "loss": 0.6584, "step": 9484 }, { "epoch": 0.6598490382274166, "grad_norm": 0.8359375, "learning_rate": 0.0005478776862397631, "loss": 0.7583, "step": 9485 }, { "epoch": 0.6599186058645519, "grad_norm": 0.94140625, "learning_rate": 0.0005476767106121245, "loss": 0.6691, "step": 9486 }, { "epoch": 0.659988173501687, "grad_norm": 1.234375, "learning_rate": 0.000547475757951182, "loss": 0.8247, "step": 9487 }, { "epoch": 0.6600577411388222, "grad_norm": 1.4609375, "learning_rate": 0.0005472748282671401, "loss": 0.9761, "step": 9488 }, { "epoch": 0.6601273087759574, "grad_norm": 1.0234375, "learning_rate": 0.0005470739215702001, "loss": 0.8019, "step": 9489 }, { "epoch": 0.6601968764130927, "grad_norm": 1.2265625, "learning_rate": 0.000546873037870564, "loss": 0.7796, "step": 9490 }, { "epoch": 0.6602664440502278, "grad_norm": 1.4453125, "learning_rate": 0.0005466721771784305, "loss": 0.8948, "step": 9491 }, { "epoch": 0.660336011687363, "grad_norm": 1.2578125, "learning_rate": 0.0005464713395039993, "loss": 1.0112, "step": 9492 }, { "epoch": 0.6604055793244983, "grad_norm": 1.140625, "learning_rate": 0.0005462705248574677, "loss": 0.852, "step": 9493 }, { "epoch": 0.6604751469616335, "grad_norm": 1.0859375, "learning_rate": 0.000546069733249031, "loss": 0.9642, "step": 9494 }, { "epoch": 0.6605447145987686, "grad_norm": 1.0390625, "learning_rate": 0.0005458689646888859, "loss": 0.8078, "step": 9495 }, { "epoch": 0.6606142822359039, "grad_norm": 1.3671875, "learning_rate": 0.000545668219187226, "loss": 0.9276, "step": 9496 }, { "epoch": 0.6606838498730391, "grad_norm": 1.25, "learning_rate": 0.0005454674967542439, "loss": 0.8876, "step": 9497 }, { "epoch": 0.6607534175101742, "grad_norm": 0.96484375, "learning_rate": 0.0005452667974001308, "loss": 0.7041, "step": 9498 }, { "epoch": 0.6608229851473094, "grad_norm": 1.2578125, "learning_rate": 0.0005450661211350779, "loss": 0.6879, "step": 9499 }, { "epoch": 0.6608925527844447, "grad_norm": 0.94921875, "learning_rate": 0.0005448654679692745, "loss": 0.8165, "step": 9500 }, { "epoch": 0.6609621204215799, "grad_norm": 1.0078125, "learning_rate": 0.0005446648379129083, "loss": 0.7812, "step": 9501 }, { "epoch": 0.661031688058715, "grad_norm": 1.171875, "learning_rate": 0.0005444642309761669, "loss": 0.8959, "step": 9502 }, { "epoch": 0.6611012556958503, "grad_norm": 0.93359375, "learning_rate": 0.0005442636471692355, "loss": 0.9082, "step": 9503 }, { "epoch": 0.6611708233329855, "grad_norm": 0.80859375, "learning_rate": 0.0005440630865022993, "loss": 0.5763, "step": 9504 }, { "epoch": 0.6612403909701207, "grad_norm": 0.890625, "learning_rate": 0.0005438625489855412, "loss": 0.844, "step": 9505 }, { "epoch": 0.6613099586072559, "grad_norm": 1.3125, "learning_rate": 0.000543662034629144, "loss": 1.0799, "step": 9506 }, { "epoch": 0.6613795262443911, "grad_norm": 1.0078125, "learning_rate": 0.0005434615434432884, "loss": 0.7713, "step": 9507 }, { "epoch": 0.6614490938815263, "grad_norm": 1.03125, "learning_rate": 0.0005432610754381543, "loss": 0.7808, "step": 9508 }, { "epoch": 0.6615186615186616, "grad_norm": 1.3515625, "learning_rate": 0.0005430606306239211, "loss": 0.8285, "step": 9509 }, { "epoch": 0.6615882291557967, "grad_norm": 1.3046875, "learning_rate": 0.000542860209010766, "loss": 0.6139, "step": 9510 }, { "epoch": 0.6616577967929319, "grad_norm": 1.1171875, "learning_rate": 0.0005426598106088651, "loss": 0.7686, "step": 9511 }, { "epoch": 0.6617273644300671, "grad_norm": 0.91796875, "learning_rate": 0.0005424594354283937, "loss": 0.8478, "step": 9512 }, { "epoch": 0.6617969320672024, "grad_norm": 1.1171875, "learning_rate": 0.0005422590834795259, "loss": 0.7465, "step": 9513 }, { "epoch": 0.6618664997043375, "grad_norm": 1.3046875, "learning_rate": 0.0005420587547724352, "loss": 1.0016, "step": 9514 }, { "epoch": 0.6619360673414727, "grad_norm": 1.0625, "learning_rate": 0.0005418584493172921, "loss": 0.6856, "step": 9515 }, { "epoch": 0.662005634978608, "grad_norm": 1.5390625, "learning_rate": 0.0005416581671242682, "loss": 1.1134, "step": 9516 }, { "epoch": 0.6620752026157432, "grad_norm": 0.9453125, "learning_rate": 0.000541457908203532, "loss": 0.5706, "step": 9517 }, { "epoch": 0.6621447702528783, "grad_norm": 0.97265625, "learning_rate": 0.0005412576725652525, "loss": 0.8443, "step": 9518 }, { "epoch": 0.6622143378900136, "grad_norm": 1.484375, "learning_rate": 0.0005410574602195957, "loss": 0.9196, "step": 9519 }, { "epoch": 0.6622839055271488, "grad_norm": 1.2109375, "learning_rate": 0.0005408572711767282, "loss": 0.9009, "step": 9520 }, { "epoch": 0.662353473164284, "grad_norm": 1.2109375, "learning_rate": 0.0005406571054468137, "loss": 0.9215, "step": 9521 }, { "epoch": 0.6624230408014192, "grad_norm": 1.203125, "learning_rate": 0.0005404569630400163, "loss": 0.9282, "step": 9522 }, { "epoch": 0.6624926084385544, "grad_norm": 0.94140625, "learning_rate": 0.0005402568439664983, "loss": 0.7831, "step": 9523 }, { "epoch": 0.6625621760756896, "grad_norm": 0.84765625, "learning_rate": 0.0005400567482364207, "loss": 0.5068, "step": 9524 }, { "epoch": 0.6626317437128247, "grad_norm": 0.96484375, "learning_rate": 0.0005398566758599429, "loss": 0.6971, "step": 9525 }, { "epoch": 0.66270131134996, "grad_norm": 1.1796875, "learning_rate": 0.0005396566268472231, "loss": 0.9397, "step": 9526 }, { "epoch": 0.6627708789870952, "grad_norm": 0.8984375, "learning_rate": 0.0005394566012084203, "loss": 0.5619, "step": 9527 }, { "epoch": 0.6628404466242304, "grad_norm": 1.046875, "learning_rate": 0.00053925659895369, "loss": 0.9043, "step": 9528 }, { "epoch": 0.6629100142613656, "grad_norm": 1.234375, "learning_rate": 0.0005390566200931869, "loss": 0.9458, "step": 9529 }, { "epoch": 0.6629795818985008, "grad_norm": 1.4453125, "learning_rate": 0.0005388566646370656, "loss": 0.7127, "step": 9530 }, { "epoch": 0.663049149535636, "grad_norm": 0.96484375, "learning_rate": 0.0005386567325954783, "loss": 0.7, "step": 9531 }, { "epoch": 0.6631187171727713, "grad_norm": 0.90234375, "learning_rate": 0.0005384568239785771, "loss": 0.7347, "step": 9532 }, { "epoch": 0.6631882848099064, "grad_norm": 1.265625, "learning_rate": 0.0005382569387965115, "loss": 0.8503, "step": 9533 }, { "epoch": 0.6632578524470416, "grad_norm": 1.2265625, "learning_rate": 0.0005380570770594317, "loss": 0.8793, "step": 9534 }, { "epoch": 0.6633274200841769, "grad_norm": 1.1171875, "learning_rate": 0.0005378572387774849, "loss": 0.8123, "step": 9535 }, { "epoch": 0.6633969877213121, "grad_norm": 1.0390625, "learning_rate": 0.0005376574239608179, "loss": 0.9453, "step": 9536 }, { "epoch": 0.6634665553584472, "grad_norm": 1.484375, "learning_rate": 0.000537457632619577, "loss": 0.8145, "step": 9537 }, { "epoch": 0.6635361229955824, "grad_norm": 1.15625, "learning_rate": 0.0005372578647639063, "loss": 0.9752, "step": 9538 }, { "epoch": 0.6636056906327177, "grad_norm": 1.1640625, "learning_rate": 0.0005370581204039482, "loss": 0.7784, "step": 9539 }, { "epoch": 0.6636752582698529, "grad_norm": 0.94140625, "learning_rate": 0.0005368583995498455, "loss": 0.7265, "step": 9540 }, { "epoch": 0.663744825906988, "grad_norm": 0.90234375, "learning_rate": 0.0005366587022117392, "loss": 0.5821, "step": 9541 }, { "epoch": 0.6638143935441233, "grad_norm": 1.3671875, "learning_rate": 0.0005364590283997685, "loss": 1.0459, "step": 9542 }, { "epoch": 0.6638839611812585, "grad_norm": 1.1015625, "learning_rate": 0.0005362593781240716, "loss": 0.8329, "step": 9543 }, { "epoch": 0.6639535288183936, "grad_norm": 0.9921875, "learning_rate": 0.0005360597513947866, "loss": 0.5252, "step": 9544 }, { "epoch": 0.6640230964555289, "grad_norm": 1.1953125, "learning_rate": 0.0005358601482220484, "loss": 0.6583, "step": 9545 }, { "epoch": 0.6640926640926641, "grad_norm": 1.1953125, "learning_rate": 0.000535660568615993, "loss": 0.8504, "step": 9546 }, { "epoch": 0.6641622317297993, "grad_norm": 1.046875, "learning_rate": 0.0005354610125867529, "loss": 0.8956, "step": 9547 }, { "epoch": 0.6642317993669345, "grad_norm": 1.15625, "learning_rate": 0.0005352614801444617, "loss": 1.0768, "step": 9548 }, { "epoch": 0.6643013670040697, "grad_norm": 1.0390625, "learning_rate": 0.0005350619712992495, "loss": 0.8248, "step": 9549 }, { "epoch": 0.6643709346412049, "grad_norm": 1.234375, "learning_rate": 0.0005348624860612471, "loss": 0.7256, "step": 9550 }, { "epoch": 0.6644405022783401, "grad_norm": 0.8828125, "learning_rate": 0.0005346630244405835, "loss": 0.7343, "step": 9551 }, { "epoch": 0.6645100699154753, "grad_norm": 0.875, "learning_rate": 0.0005344635864473861, "loss": 0.6724, "step": 9552 }, { "epoch": 0.6645796375526105, "grad_norm": 0.7890625, "learning_rate": 0.0005342641720917809, "loss": 0.6969, "step": 9553 }, { "epoch": 0.6646492051897457, "grad_norm": 1.40625, "learning_rate": 0.0005340647813838935, "loss": 1.1734, "step": 9554 }, { "epoch": 0.664718772826881, "grad_norm": 1.0625, "learning_rate": 0.0005338654143338484, "loss": 0.9715, "step": 9555 }, { "epoch": 0.6647883404640161, "grad_norm": 1.21875, "learning_rate": 0.0005336660709517681, "loss": 0.6627, "step": 9556 }, { "epoch": 0.6648579081011513, "grad_norm": 1.1875, "learning_rate": 0.0005334667512477742, "loss": 0.8215, "step": 9557 }, { "epoch": 0.6649274757382866, "grad_norm": 1.09375, "learning_rate": 0.0005332674552319865, "loss": 0.7588, "step": 9558 }, { "epoch": 0.6649970433754218, "grad_norm": 1.109375, "learning_rate": 0.0005330681829145257, "loss": 0.8211, "step": 9559 }, { "epoch": 0.6650666110125569, "grad_norm": 1.140625, "learning_rate": 0.0005328689343055089, "loss": 0.7802, "step": 9560 }, { "epoch": 0.6651361786496922, "grad_norm": 0.953125, "learning_rate": 0.0005326697094150528, "loss": 0.699, "step": 9561 }, { "epoch": 0.6652057462868274, "grad_norm": 0.9453125, "learning_rate": 0.0005324705082532737, "loss": 0.6794, "step": 9562 }, { "epoch": 0.6652753139239626, "grad_norm": 1.09375, "learning_rate": 0.0005322713308302852, "loss": 0.6506, "step": 9563 }, { "epoch": 0.6653448815610977, "grad_norm": 0.96875, "learning_rate": 0.0005320721771562015, "loss": 0.8187, "step": 9564 }, { "epoch": 0.665414449198233, "grad_norm": 1.1796875, "learning_rate": 0.0005318730472411337, "loss": 0.9967, "step": 9565 }, { "epoch": 0.6654840168353682, "grad_norm": 1.2265625, "learning_rate": 0.0005316739410951934, "loss": 0.7501, "step": 9566 }, { "epoch": 0.6655535844725033, "grad_norm": 1.0234375, "learning_rate": 0.0005314748587284895, "loss": 0.7076, "step": 9567 }, { "epoch": 0.6656231521096386, "grad_norm": 0.99609375, "learning_rate": 0.0005312758001511307, "loss": 1.0002, "step": 9568 }, { "epoch": 0.6656927197467738, "grad_norm": 1.109375, "learning_rate": 0.0005310767653732246, "loss": 0.8335, "step": 9569 }, { "epoch": 0.665762287383909, "grad_norm": 1.0859375, "learning_rate": 0.0005308777544048767, "loss": 0.8125, "step": 9570 }, { "epoch": 0.6658318550210442, "grad_norm": 1.0234375, "learning_rate": 0.0005306787672561917, "loss": 0.7869, "step": 9571 }, { "epoch": 0.6659014226581794, "grad_norm": 1.046875, "learning_rate": 0.0005304798039372731, "loss": 0.7647, "step": 9572 }, { "epoch": 0.6659709902953146, "grad_norm": 1.1796875, "learning_rate": 0.0005302808644582241, "loss": 0.8244, "step": 9573 }, { "epoch": 0.6660405579324499, "grad_norm": 0.9375, "learning_rate": 0.0005300819488291452, "loss": 0.6193, "step": 9574 }, { "epoch": 0.666110125569585, "grad_norm": 1.1015625, "learning_rate": 0.000529883057060136, "loss": 0.8622, "step": 9575 }, { "epoch": 0.6661796932067202, "grad_norm": 0.8203125, "learning_rate": 0.0005296841891612959, "loss": 0.8218, "step": 9576 }, { "epoch": 0.6662492608438554, "grad_norm": 1.2265625, "learning_rate": 0.0005294853451427217, "loss": 0.8445, "step": 9577 }, { "epoch": 0.6663188284809907, "grad_norm": 1.0078125, "learning_rate": 0.0005292865250145107, "loss": 0.6962, "step": 9578 }, { "epoch": 0.6663883961181258, "grad_norm": 1.15625, "learning_rate": 0.0005290877287867568, "loss": 0.9622, "step": 9579 }, { "epoch": 0.666457963755261, "grad_norm": 1.1484375, "learning_rate": 0.000528888956469555, "loss": 0.8845, "step": 9580 }, { "epoch": 0.6665275313923963, "grad_norm": 1.1953125, "learning_rate": 0.0005286902080729967, "loss": 0.9223, "step": 9581 }, { "epoch": 0.6665970990295315, "grad_norm": 1.2109375, "learning_rate": 0.0005284914836071743, "loss": 0.8549, "step": 9582 }, { "epoch": 0.6666666666666666, "grad_norm": 1.328125, "learning_rate": 0.0005282927830821782, "loss": 0.9951, "step": 9583 }, { "epoch": 0.6667362343038019, "grad_norm": 0.8359375, "learning_rate": 0.000528094106508097, "loss": 0.6767, "step": 9584 }, { "epoch": 0.6668058019409371, "grad_norm": 1.1171875, "learning_rate": 0.000527895453895018, "loss": 0.9949, "step": 9585 }, { "epoch": 0.6668753695780723, "grad_norm": 0.96484375, "learning_rate": 0.0005276968252530283, "loss": 0.7045, "step": 9586 }, { "epoch": 0.6669449372152075, "grad_norm": 1.1796875, "learning_rate": 0.0005274982205922136, "loss": 0.8576, "step": 9587 }, { "epoch": 0.6670145048523427, "grad_norm": 1.1640625, "learning_rate": 0.0005272996399226578, "loss": 0.8649, "step": 9588 }, { "epoch": 0.6670840724894779, "grad_norm": 1.3515625, "learning_rate": 0.0005271010832544431, "loss": 0.7696, "step": 9589 }, { "epoch": 0.667153640126613, "grad_norm": 1.25, "learning_rate": 0.0005269025505976521, "loss": 0.8932, "step": 9590 }, { "epoch": 0.6672232077637483, "grad_norm": 1.0546875, "learning_rate": 0.0005267040419623652, "loss": 0.9085, "step": 9591 }, { "epoch": 0.6672927754008835, "grad_norm": 1.2890625, "learning_rate": 0.0005265055573586614, "loss": 0.7903, "step": 9592 }, { "epoch": 0.6673623430380187, "grad_norm": 1.0546875, "learning_rate": 0.0005263070967966186, "loss": 0.7596, "step": 9593 }, { "epoch": 0.667431910675154, "grad_norm": 1.15625, "learning_rate": 0.0005261086602863141, "loss": 0.8148, "step": 9594 }, { "epoch": 0.6675014783122891, "grad_norm": 1.0234375, "learning_rate": 0.0005259102478378228, "loss": 0.6692, "step": 9595 }, { "epoch": 0.6675710459494243, "grad_norm": 1.1953125, "learning_rate": 0.0005257118594612195, "loss": 0.916, "step": 9596 }, { "epoch": 0.6676406135865596, "grad_norm": 0.87109375, "learning_rate": 0.000525513495166578, "loss": 0.5399, "step": 9597 }, { "epoch": 0.6677101812236947, "grad_norm": 0.99609375, "learning_rate": 0.0005253151549639694, "loss": 0.7878, "step": 9598 }, { "epoch": 0.6677797488608299, "grad_norm": 1.203125, "learning_rate": 0.0005251168388634644, "loss": 0.8096, "step": 9599 }, { "epoch": 0.6678493164979652, "grad_norm": 0.9921875, "learning_rate": 0.0005249185468751327, "loss": 0.762, "step": 9600 }, { "epoch": 0.6679188841351004, "grad_norm": 1.3828125, "learning_rate": 0.000524720279009043, "loss": 0.7958, "step": 9601 }, { "epoch": 0.6679884517722355, "grad_norm": 1.2578125, "learning_rate": 0.0005245220352752619, "loss": 0.9365, "step": 9602 }, { "epoch": 0.6680580194093707, "grad_norm": 1.046875, "learning_rate": 0.0005243238156838548, "loss": 0.8501, "step": 9603 }, { "epoch": 0.668127587046506, "grad_norm": 1.4375, "learning_rate": 0.000524125620244887, "loss": 0.9144, "step": 9604 }, { "epoch": 0.6681971546836412, "grad_norm": 1.1953125, "learning_rate": 0.0005239274489684218, "loss": 0.8119, "step": 9605 }, { "epoch": 0.6682667223207763, "grad_norm": 1.3984375, "learning_rate": 0.0005237293018645211, "loss": 0.9269, "step": 9606 }, { "epoch": 0.6683362899579116, "grad_norm": 1.34375, "learning_rate": 0.0005235311789432457, "loss": 0.9449, "step": 9607 }, { "epoch": 0.6684058575950468, "grad_norm": 1.1171875, "learning_rate": 0.0005233330802146556, "loss": 0.9373, "step": 9608 }, { "epoch": 0.668475425232182, "grad_norm": 0.92578125, "learning_rate": 0.0005231350056888089, "loss": 0.823, "step": 9609 }, { "epoch": 0.6685449928693172, "grad_norm": 1.484375, "learning_rate": 0.000522936955375763, "loss": 0.9763, "step": 9610 }, { "epoch": 0.6686145605064524, "grad_norm": 1.25, "learning_rate": 0.0005227389292855743, "loss": 0.9294, "step": 9611 }, { "epoch": 0.6686841281435876, "grad_norm": 1.0, "learning_rate": 0.0005225409274282973, "loss": 0.8357, "step": 9612 }, { "epoch": 0.6687536957807229, "grad_norm": 1.5625, "learning_rate": 0.0005223429498139849, "loss": 0.8657, "step": 9613 }, { "epoch": 0.668823263417858, "grad_norm": 1.4921875, "learning_rate": 0.0005221449964526899, "loss": 1.0415, "step": 9614 }, { "epoch": 0.6688928310549932, "grad_norm": 1.2109375, "learning_rate": 0.000521947067354464, "loss": 0.8141, "step": 9615 }, { "epoch": 0.6689623986921284, "grad_norm": 1.1171875, "learning_rate": 0.0005217491625293562, "loss": 0.8964, "step": 9616 }, { "epoch": 0.6690319663292636, "grad_norm": 1.21875, "learning_rate": 0.0005215512819874152, "loss": 0.8618, "step": 9617 }, { "epoch": 0.6691015339663988, "grad_norm": 1.15625, "learning_rate": 0.0005213534257386885, "loss": 0.9095, "step": 9618 }, { "epoch": 0.669171101603534, "grad_norm": 1.03125, "learning_rate": 0.0005211555937932225, "loss": 0.58, "step": 9619 }, { "epoch": 0.6692406692406693, "grad_norm": 1.109375, "learning_rate": 0.0005209577861610621, "loss": 0.7973, "step": 9620 }, { "epoch": 0.6693102368778044, "grad_norm": 0.859375, "learning_rate": 0.0005207600028522503, "loss": 0.6226, "step": 9621 }, { "epoch": 0.6693798045149396, "grad_norm": 1.1953125, "learning_rate": 0.00052056224387683, "loss": 0.8321, "step": 9622 }, { "epoch": 0.6694493721520749, "grad_norm": 1.0390625, "learning_rate": 0.0005203645092448428, "loss": 0.7101, "step": 9623 }, { "epoch": 0.6695189397892101, "grad_norm": 0.9296875, "learning_rate": 0.0005201667989663279, "loss": 0.7034, "step": 9624 }, { "epoch": 0.6695885074263452, "grad_norm": 1.515625, "learning_rate": 0.0005199691130513248, "loss": 0.9162, "step": 9625 }, { "epoch": 0.6696580750634805, "grad_norm": 0.89453125, "learning_rate": 0.0005197714515098705, "loss": 0.7831, "step": 9626 }, { "epoch": 0.6697276427006157, "grad_norm": 0.953125, "learning_rate": 0.0005195738143520012, "loss": 0.8744, "step": 9627 }, { "epoch": 0.6697972103377509, "grad_norm": 0.984375, "learning_rate": 0.0005193762015877519, "loss": 0.7083, "step": 9628 }, { "epoch": 0.669866777974886, "grad_norm": 1.0390625, "learning_rate": 0.000519178613227157, "loss": 0.6718, "step": 9629 }, { "epoch": 0.6699363456120213, "grad_norm": 1.0390625, "learning_rate": 0.0005189810492802485, "loss": 0.915, "step": 9630 }, { "epoch": 0.6700059132491565, "grad_norm": 1.1953125, "learning_rate": 0.0005187835097570576, "loss": 0.7503, "step": 9631 }, { "epoch": 0.6700754808862917, "grad_norm": 1.1015625, "learning_rate": 0.0005185859946676143, "loss": 0.8699, "step": 9632 }, { "epoch": 0.6701450485234269, "grad_norm": 1.234375, "learning_rate": 0.0005183885040219484, "loss": 0.9738, "step": 9633 }, { "epoch": 0.6702146161605621, "grad_norm": 1.03125, "learning_rate": 0.0005181910378300866, "loss": 0.6297, "step": 9634 }, { "epoch": 0.6702841837976973, "grad_norm": 1.140625, "learning_rate": 0.000517993596102055, "loss": 0.779, "step": 9635 }, { "epoch": 0.6703537514348326, "grad_norm": 1.359375, "learning_rate": 0.000517796178847879, "loss": 0.8163, "step": 9636 }, { "epoch": 0.6704233190719677, "grad_norm": 0.9140625, "learning_rate": 0.0005175987860775832, "loss": 0.7354, "step": 9637 }, { "epoch": 0.6704928867091029, "grad_norm": 1.0, "learning_rate": 0.0005174014178011894, "loss": 0.8463, "step": 9638 }, { "epoch": 0.6705624543462382, "grad_norm": 1.28125, "learning_rate": 0.0005172040740287188, "loss": 0.8223, "step": 9639 }, { "epoch": 0.6706320219833733, "grad_norm": 0.96875, "learning_rate": 0.0005170067547701922, "loss": 0.7575, "step": 9640 }, { "epoch": 0.6707015896205085, "grad_norm": 1.1953125, "learning_rate": 0.0005168094600356277, "loss": 0.5398, "step": 9641 }, { "epoch": 0.6707711572576437, "grad_norm": 0.90234375, "learning_rate": 0.0005166121898350434, "loss": 0.681, "step": 9642 }, { "epoch": 0.670840724894779, "grad_norm": 1.3125, "learning_rate": 0.000516414944178456, "loss": 0.9722, "step": 9643 }, { "epoch": 0.6709102925319141, "grad_norm": 1.203125, "learning_rate": 0.0005162177230758803, "loss": 0.8177, "step": 9644 }, { "epoch": 0.6709798601690493, "grad_norm": 1.0703125, "learning_rate": 0.0005160205265373299, "loss": 0.8753, "step": 9645 }, { "epoch": 0.6710494278061846, "grad_norm": 1.3203125, "learning_rate": 0.0005158233545728175, "loss": 1.0807, "step": 9646 }, { "epoch": 0.6711189954433198, "grad_norm": 1.3984375, "learning_rate": 0.0005156262071923553, "loss": 0.8875, "step": 9647 }, { "epoch": 0.6711885630804549, "grad_norm": 1.375, "learning_rate": 0.0005154290844059528, "loss": 1.0051, "step": 9648 }, { "epoch": 0.6712581307175902, "grad_norm": 1.4921875, "learning_rate": 0.0005152319862236185, "loss": 1.0798, "step": 9649 }, { "epoch": 0.6713276983547254, "grad_norm": 1.171875, "learning_rate": 0.0005150349126553607, "loss": 0.8117, "step": 9650 }, { "epoch": 0.6713972659918606, "grad_norm": 1.03125, "learning_rate": 0.000514837863711186, "loss": 0.7735, "step": 9651 }, { "epoch": 0.6714668336289958, "grad_norm": 1.0625, "learning_rate": 0.0005146408394010991, "loss": 0.8665, "step": 9652 }, { "epoch": 0.671536401266131, "grad_norm": 1.5625, "learning_rate": 0.0005144438397351037, "loss": 1.0766, "step": 9653 }, { "epoch": 0.6716059689032662, "grad_norm": 1.03125, "learning_rate": 0.0005142468647232025, "loss": 0.843, "step": 9654 }, { "epoch": 0.6716755365404014, "grad_norm": 1.0390625, "learning_rate": 0.0005140499143753978, "loss": 0.746, "step": 9655 }, { "epoch": 0.6717451041775366, "grad_norm": 1.1796875, "learning_rate": 0.0005138529887016885, "loss": 0.844, "step": 9656 }, { "epoch": 0.6718146718146718, "grad_norm": 1.1875, "learning_rate": 0.0005136560877120746, "loss": 0.7019, "step": 9657 }, { "epoch": 0.671884239451807, "grad_norm": 1.171875, "learning_rate": 0.0005134592114165531, "loss": 0.7212, "step": 9658 }, { "epoch": 0.6719538070889423, "grad_norm": 1.28125, "learning_rate": 0.0005132623598251201, "loss": 0.9283, "step": 9659 }, { "epoch": 0.6720233747260774, "grad_norm": 1.1171875, "learning_rate": 0.0005130655329477712, "loss": 0.7341, "step": 9660 }, { "epoch": 0.6720929423632126, "grad_norm": 1.0546875, "learning_rate": 0.0005128687307945006, "loss": 0.5998, "step": 9661 }, { "epoch": 0.6721625100003479, "grad_norm": 1.3046875, "learning_rate": 0.0005126719533753006, "loss": 0.8028, "step": 9662 }, { "epoch": 0.672232077637483, "grad_norm": 1.3515625, "learning_rate": 0.0005124752007001619, "loss": 0.9841, "step": 9663 }, { "epoch": 0.6723016452746182, "grad_norm": 1.53125, "learning_rate": 0.0005122784727790752, "loss": 1.1907, "step": 9664 }, { "epoch": 0.6723712129117535, "grad_norm": 1.1796875, "learning_rate": 0.0005120817696220299, "loss": 1.0263, "step": 9665 }, { "epoch": 0.6724407805488887, "grad_norm": 0.96875, "learning_rate": 0.0005118850912390131, "loss": 0.827, "step": 9666 }, { "epoch": 0.6725103481860238, "grad_norm": 1.0859375, "learning_rate": 0.0005116884376400107, "loss": 0.6834, "step": 9667 }, { "epoch": 0.672579915823159, "grad_norm": 1.015625, "learning_rate": 0.0005114918088350079, "loss": 0.9123, "step": 9668 }, { "epoch": 0.6726494834602943, "grad_norm": 1.453125, "learning_rate": 0.0005112952048339894, "loss": 0.8906, "step": 9669 }, { "epoch": 0.6727190510974295, "grad_norm": 1.234375, "learning_rate": 0.0005110986256469366, "loss": 0.7647, "step": 9670 }, { "epoch": 0.6727886187345646, "grad_norm": 0.86328125, "learning_rate": 0.0005109020712838318, "loss": 0.5316, "step": 9671 }, { "epoch": 0.6728581863716999, "grad_norm": 0.92578125, "learning_rate": 0.0005107055417546547, "loss": 0.6323, "step": 9672 }, { "epoch": 0.6729277540088351, "grad_norm": 1.2109375, "learning_rate": 0.0005105090370693835, "loss": 0.7915, "step": 9673 }, { "epoch": 0.6729973216459703, "grad_norm": 1.40625, "learning_rate": 0.000510312557237996, "loss": 1.0018, "step": 9674 }, { "epoch": 0.6730668892831055, "grad_norm": 0.91796875, "learning_rate": 0.0005101161022704692, "loss": 0.6877, "step": 9675 }, { "epoch": 0.6731364569202407, "grad_norm": 0.91796875, "learning_rate": 0.0005099196721767776, "loss": 0.7115, "step": 9676 }, { "epoch": 0.6732060245573759, "grad_norm": 1.140625, "learning_rate": 0.0005097232669668943, "loss": 0.8713, "step": 9677 }, { "epoch": 0.6732755921945112, "grad_norm": 0.875, "learning_rate": 0.0005095268866507924, "loss": 0.4574, "step": 9678 }, { "epoch": 0.6733451598316463, "grad_norm": 1.015625, "learning_rate": 0.0005093305312384434, "loss": 0.7687, "step": 9679 }, { "epoch": 0.6734147274687815, "grad_norm": 1.0078125, "learning_rate": 0.000509134200739817, "loss": 0.7457, "step": 9680 }, { "epoch": 0.6734842951059167, "grad_norm": 1.0, "learning_rate": 0.0005089378951648811, "loss": 0.5863, "step": 9681 }, { "epoch": 0.673553862743052, "grad_norm": 1.0, "learning_rate": 0.0005087416145236039, "loss": 0.7629, "step": 9682 }, { "epoch": 0.6736234303801871, "grad_norm": 1.0390625, "learning_rate": 0.0005085453588259519, "loss": 0.6333, "step": 9683 }, { "epoch": 0.6736929980173223, "grad_norm": 1.0234375, "learning_rate": 0.0005083491280818888, "loss": 0.9301, "step": 9684 }, { "epoch": 0.6737625656544576, "grad_norm": 1.046875, "learning_rate": 0.0005081529223013795, "loss": 0.7265, "step": 9685 }, { "epoch": 0.6738321332915927, "grad_norm": 0.8671875, "learning_rate": 0.0005079567414943856, "loss": 0.5622, "step": 9686 }, { "epoch": 0.6739017009287279, "grad_norm": 1.0390625, "learning_rate": 0.0005077605856708678, "loss": 0.9549, "step": 9687 }, { "epoch": 0.6739712685658632, "grad_norm": 1.0390625, "learning_rate": 0.0005075644548407865, "loss": 0.6197, "step": 9688 }, { "epoch": 0.6740408362029984, "grad_norm": 1.28125, "learning_rate": 0.0005073683490141005, "loss": 0.8045, "step": 9689 }, { "epoch": 0.6741104038401335, "grad_norm": 0.80859375, "learning_rate": 0.0005071722682007667, "loss": 0.7151, "step": 9690 }, { "epoch": 0.6741799714772688, "grad_norm": 1.1328125, "learning_rate": 0.0005069762124107408, "loss": 0.726, "step": 9691 }, { "epoch": 0.674249539114404, "grad_norm": 1.3046875, "learning_rate": 0.0005067801816539776, "loss": 1.2405, "step": 9692 }, { "epoch": 0.6743191067515392, "grad_norm": 1.09375, "learning_rate": 0.0005065841759404313, "loss": 0.7895, "step": 9693 }, { "epoch": 0.6743886743886743, "grad_norm": 1.078125, "learning_rate": 0.0005063881952800535, "loss": 1.0126, "step": 9694 }, { "epoch": 0.6744582420258096, "grad_norm": 1.015625, "learning_rate": 0.0005061922396827947, "loss": 0.7312, "step": 9695 }, { "epoch": 0.6745278096629448, "grad_norm": 1.015625, "learning_rate": 0.0005059963091586051, "loss": 0.8364, "step": 9696 }, { "epoch": 0.67459737730008, "grad_norm": 1.265625, "learning_rate": 0.0005058004037174333, "loss": 0.7215, "step": 9697 }, { "epoch": 0.6746669449372152, "grad_norm": 1.1875, "learning_rate": 0.0005056045233692257, "loss": 0.9513, "step": 9698 }, { "epoch": 0.6747365125743504, "grad_norm": 1.1328125, "learning_rate": 0.0005054086681239288, "loss": 1.0549, "step": 9699 }, { "epoch": 0.6748060802114856, "grad_norm": 1.3671875, "learning_rate": 0.0005052128379914864, "loss": 0.5822, "step": 9700 }, { "epoch": 0.6748756478486209, "grad_norm": 1.140625, "learning_rate": 0.0005050170329818427, "loss": 0.7373, "step": 9701 }, { "epoch": 0.674945215485756, "grad_norm": 1.1875, "learning_rate": 0.0005048212531049386, "loss": 0.8542, "step": 9702 }, { "epoch": 0.6750147831228912, "grad_norm": 1.015625, "learning_rate": 0.0005046254983707159, "loss": 0.8557, "step": 9703 }, { "epoch": 0.6750843507600265, "grad_norm": 1.2109375, "learning_rate": 0.0005044297687891135, "loss": 0.9148, "step": 9704 }, { "epoch": 0.6751539183971617, "grad_norm": 0.90234375, "learning_rate": 0.0005042340643700687, "loss": 0.7164, "step": 9705 }, { "epoch": 0.6752234860342968, "grad_norm": 1.125, "learning_rate": 0.0005040383851235202, "loss": 0.764, "step": 9706 }, { "epoch": 0.675293053671432, "grad_norm": 0.94921875, "learning_rate": 0.0005038427310594026, "loss": 0.6977, "step": 9707 }, { "epoch": 0.6753626213085673, "grad_norm": 1.0390625, "learning_rate": 0.0005036471021876503, "loss": 1.0362, "step": 9708 }, { "epoch": 0.6754321889457024, "grad_norm": 0.890625, "learning_rate": 0.0005034514985181959, "loss": 0.6269, "step": 9709 }, { "epoch": 0.6755017565828376, "grad_norm": 1.1484375, "learning_rate": 0.0005032559200609716, "loss": 0.8695, "step": 9710 }, { "epoch": 0.6755713242199729, "grad_norm": 1.015625, "learning_rate": 0.0005030603668259084, "loss": 0.8633, "step": 9711 }, { "epoch": 0.6756408918571081, "grad_norm": 0.90625, "learning_rate": 0.0005028648388229346, "loss": 0.5972, "step": 9712 }, { "epoch": 0.6757104594942432, "grad_norm": 0.97265625, "learning_rate": 0.000502669336061979, "loss": 0.7126, "step": 9713 }, { "epoch": 0.6757800271313785, "grad_norm": 1.1171875, "learning_rate": 0.0005024738585529672, "loss": 0.9349, "step": 9714 }, { "epoch": 0.6758495947685137, "grad_norm": 1.203125, "learning_rate": 0.0005022784063058257, "loss": 0.8628, "step": 9715 }, { "epoch": 0.6759191624056489, "grad_norm": 0.80859375, "learning_rate": 0.0005020829793304775, "loss": 0.6641, "step": 9716 }, { "epoch": 0.6759887300427841, "grad_norm": 0.8046875, "learning_rate": 0.0005018875776368464, "loss": 0.5537, "step": 9717 }, { "epoch": 0.6760582976799193, "grad_norm": 0.98828125, "learning_rate": 0.0005016922012348535, "loss": 0.6479, "step": 9718 }, { "epoch": 0.6761278653170545, "grad_norm": 1.0859375, "learning_rate": 0.0005014968501344184, "loss": 0.949, "step": 9719 }, { "epoch": 0.6761974329541897, "grad_norm": 1.2734375, "learning_rate": 0.0005013015243454607, "loss": 0.9195, "step": 9720 }, { "epoch": 0.6762670005913249, "grad_norm": 1.4921875, "learning_rate": 0.0005011062238778983, "loss": 0.8893, "step": 9721 }, { "epoch": 0.6763365682284601, "grad_norm": 0.91796875, "learning_rate": 0.0005009109487416473, "loss": 0.6517, "step": 9722 }, { "epoch": 0.6764061358655953, "grad_norm": 1.125, "learning_rate": 0.0005007156989466224, "loss": 0.8944, "step": 9723 }, { "epoch": 0.6764757035027306, "grad_norm": 0.953125, "learning_rate": 0.0005005204745027376, "loss": 0.7622, "step": 9724 }, { "epoch": 0.6765452711398657, "grad_norm": 1.0859375, "learning_rate": 0.000500325275419906, "loss": 1.125, "step": 9725 }, { "epoch": 0.6766148387770009, "grad_norm": 1.1171875, "learning_rate": 0.0005001301017080384, "loss": 0.7286, "step": 9726 }, { "epoch": 0.6766844064141362, "grad_norm": 1.578125, "learning_rate": 0.0004999349533770444, "loss": 0.8135, "step": 9727 }, { "epoch": 0.6767539740512714, "grad_norm": 1.234375, "learning_rate": 0.0004997398304368327, "loss": 0.7637, "step": 9728 }, { "epoch": 0.6768235416884065, "grad_norm": 1.015625, "learning_rate": 0.0004995447328973114, "loss": 0.6227, "step": 9729 }, { "epoch": 0.6768931093255418, "grad_norm": 1.34375, "learning_rate": 0.0004993496607683857, "loss": 0.9152, "step": 9730 }, { "epoch": 0.676962676962677, "grad_norm": 1.0078125, "learning_rate": 0.0004991546140599612, "loss": 0.767, "step": 9731 }, { "epoch": 0.6770322445998121, "grad_norm": 1.4765625, "learning_rate": 0.0004989595927819406, "loss": 0.9917, "step": 9732 }, { "epoch": 0.6771018122369473, "grad_norm": 1.1015625, "learning_rate": 0.0004987645969442268, "loss": 0.9896, "step": 9733 }, { "epoch": 0.6771713798740826, "grad_norm": 0.96875, "learning_rate": 0.0004985696265567198, "loss": 0.6176, "step": 9734 }, { "epoch": 0.6772409475112178, "grad_norm": 1.3359375, "learning_rate": 0.0004983746816293204, "loss": 0.8158, "step": 9735 }, { "epoch": 0.6773105151483529, "grad_norm": 1.046875, "learning_rate": 0.0004981797621719262, "loss": 0.7196, "step": 9736 }, { "epoch": 0.6773800827854882, "grad_norm": 1.171875, "learning_rate": 0.0004979848681944338, "loss": 0.9503, "step": 9737 }, { "epoch": 0.6774496504226234, "grad_norm": 1.6875, "learning_rate": 0.0004977899997067396, "loss": 0.9683, "step": 9738 }, { "epoch": 0.6775192180597586, "grad_norm": 1.6328125, "learning_rate": 0.0004975951567187382, "loss": 0.7924, "step": 9739 }, { "epoch": 0.6775887856968938, "grad_norm": 1.1953125, "learning_rate": 0.0004974003392403224, "loss": 0.8315, "step": 9740 }, { "epoch": 0.677658353334029, "grad_norm": 1.453125, "learning_rate": 0.0004972055472813839, "loss": 1.0786, "step": 9741 }, { "epoch": 0.6777279209711642, "grad_norm": 1.0546875, "learning_rate": 0.0004970107808518133, "loss": 0.8551, "step": 9742 }, { "epoch": 0.6777974886082995, "grad_norm": 0.9140625, "learning_rate": 0.0004968160399615003, "loss": 0.6797, "step": 9743 }, { "epoch": 0.6778670562454346, "grad_norm": 0.8984375, "learning_rate": 0.0004966213246203323, "loss": 0.7715, "step": 9744 }, { "epoch": 0.6779366238825698, "grad_norm": 0.921875, "learning_rate": 0.0004964266348381965, "loss": 0.5859, "step": 9745 }, { "epoch": 0.678006191519705, "grad_norm": 1.171875, "learning_rate": 0.0004962319706249777, "loss": 0.7948, "step": 9746 }, { "epoch": 0.6780757591568403, "grad_norm": 1.0546875, "learning_rate": 0.0004960373319905605, "loss": 0.6442, "step": 9747 }, { "epoch": 0.6781453267939754, "grad_norm": 1.046875, "learning_rate": 0.0004958427189448272, "loss": 0.5826, "step": 9748 }, { "epoch": 0.6782148944311106, "grad_norm": 1.0546875, "learning_rate": 0.0004956481314976599, "loss": 0.8098, "step": 9749 }, { "epoch": 0.6782844620682459, "grad_norm": 1.078125, "learning_rate": 0.0004954535696589382, "loss": 0.9073, "step": 9750 }, { "epoch": 0.6783540297053811, "grad_norm": 1.046875, "learning_rate": 0.0004952590334385404, "loss": 0.8885, "step": 9751 }, { "epoch": 0.6784235973425162, "grad_norm": 1.34375, "learning_rate": 0.0004950645228463457, "loss": 0.9079, "step": 9752 }, { "epoch": 0.6784931649796515, "grad_norm": 1.546875, "learning_rate": 0.0004948700378922293, "loss": 0.8953, "step": 9753 }, { "epoch": 0.6785627326167867, "grad_norm": 1.15625, "learning_rate": 0.0004946755785860664, "loss": 0.839, "step": 9754 }, { "epoch": 0.6786323002539218, "grad_norm": 1.1328125, "learning_rate": 0.0004944811449377301, "loss": 0.8367, "step": 9755 }, { "epoch": 0.6787018678910571, "grad_norm": 1.046875, "learning_rate": 0.0004942867369570934, "loss": 0.6971, "step": 9756 }, { "epoch": 0.6787714355281923, "grad_norm": 1.1640625, "learning_rate": 0.0004940923546540276, "loss": 0.9824, "step": 9757 }, { "epoch": 0.6788410031653275, "grad_norm": 1.28125, "learning_rate": 0.0004938979980384017, "loss": 0.8169, "step": 9758 }, { "epoch": 0.6789105708024626, "grad_norm": 1.125, "learning_rate": 0.0004937036671200847, "loss": 0.8172, "step": 9759 }, { "epoch": 0.6789801384395979, "grad_norm": 1.2109375, "learning_rate": 0.0004935093619089434, "loss": 0.8487, "step": 9760 }, { "epoch": 0.6790497060767331, "grad_norm": 1.234375, "learning_rate": 0.0004933150824148441, "loss": 0.7046, "step": 9761 }, { "epoch": 0.6791192737138683, "grad_norm": 0.84765625, "learning_rate": 0.0004931208286476506, "loss": 0.5163, "step": 9762 }, { "epoch": 0.6791888413510035, "grad_norm": 1.0390625, "learning_rate": 0.000492926600617227, "loss": 0.7764, "step": 9763 }, { "epoch": 0.6792584089881387, "grad_norm": 1.1640625, "learning_rate": 0.0004927323983334344, "loss": 0.8477, "step": 9764 }, { "epoch": 0.6793279766252739, "grad_norm": 1.140625, "learning_rate": 0.0004925382218061338, "loss": 0.8992, "step": 9765 }, { "epoch": 0.6793975442624092, "grad_norm": 1.0234375, "learning_rate": 0.0004923440710451848, "loss": 0.958, "step": 9766 }, { "epoch": 0.6794671118995443, "grad_norm": 1.15625, "learning_rate": 0.0004921499460604453, "loss": 0.8821, "step": 9767 }, { "epoch": 0.6795366795366795, "grad_norm": 1.6171875, "learning_rate": 0.0004919558468617717, "loss": 1.3672, "step": 9768 }, { "epoch": 0.6796062471738148, "grad_norm": 1.171875, "learning_rate": 0.000491761773459019, "loss": 0.6571, "step": 9769 }, { "epoch": 0.67967581481095, "grad_norm": 0.921875, "learning_rate": 0.0004915677258620416, "loss": 0.8382, "step": 9770 }, { "epoch": 0.6797453824480851, "grad_norm": 1.7265625, "learning_rate": 0.0004913737040806931, "loss": 1.2732, "step": 9771 }, { "epoch": 0.6798149500852203, "grad_norm": 1.1640625, "learning_rate": 0.0004911797081248238, "loss": 1.0633, "step": 9772 }, { "epoch": 0.6798845177223556, "grad_norm": 1.1953125, "learning_rate": 0.0004909857380042845, "loss": 0.8228, "step": 9773 }, { "epoch": 0.6799540853594908, "grad_norm": 1.296875, "learning_rate": 0.0004907917937289235, "loss": 0.8447, "step": 9774 }, { "epoch": 0.6800236529966259, "grad_norm": 0.96875, "learning_rate": 0.0004905978753085889, "loss": 0.8315, "step": 9775 }, { "epoch": 0.6800932206337612, "grad_norm": 1.1796875, "learning_rate": 0.0004904039827531262, "loss": 1.024, "step": 9776 }, { "epoch": 0.6801627882708964, "grad_norm": 1.53125, "learning_rate": 0.0004902101160723813, "loss": 1.0698, "step": 9777 }, { "epoch": 0.6802323559080315, "grad_norm": 0.9921875, "learning_rate": 0.0004900162752761966, "loss": 0.9307, "step": 9778 }, { "epoch": 0.6803019235451668, "grad_norm": 1.25, "learning_rate": 0.0004898224603744151, "loss": 1.0074, "step": 9779 }, { "epoch": 0.680371491182302, "grad_norm": 1.1328125, "learning_rate": 0.0004896286713768778, "loss": 0.7614, "step": 9780 }, { "epoch": 0.6804410588194372, "grad_norm": 1.078125, "learning_rate": 0.0004894349082934243, "loss": 0.6902, "step": 9781 }, { "epoch": 0.6805106264565725, "grad_norm": 1.34375, "learning_rate": 0.0004892411711338925, "loss": 0.786, "step": 9782 }, { "epoch": 0.6805801940937076, "grad_norm": 1.140625, "learning_rate": 0.000489047459908119, "loss": 0.7574, "step": 9783 }, { "epoch": 0.6806497617308428, "grad_norm": 0.90625, "learning_rate": 0.0004888537746259408, "loss": 0.9947, "step": 9784 }, { "epoch": 0.680719329367978, "grad_norm": 1.03125, "learning_rate": 0.0004886601152971915, "loss": 0.5515, "step": 9785 }, { "epoch": 0.6807888970051132, "grad_norm": 1.2734375, "learning_rate": 0.000488466481931704, "loss": 0.7297, "step": 9786 }, { "epoch": 0.6808584646422484, "grad_norm": 0.875, "learning_rate": 0.0004882728745393105, "loss": 0.6101, "step": 9787 }, { "epoch": 0.6809280322793836, "grad_norm": 1.0390625, "learning_rate": 0.0004880792931298408, "loss": 1.0144, "step": 9788 }, { "epoch": 0.6809975999165189, "grad_norm": 1.0703125, "learning_rate": 0.0004878857377131246, "loss": 0.7814, "step": 9789 }, { "epoch": 0.681067167553654, "grad_norm": 1.1640625, "learning_rate": 0.0004876922082989891, "loss": 0.7869, "step": 9790 }, { "epoch": 0.6811367351907892, "grad_norm": 1.375, "learning_rate": 0.00048749870489726133, "loss": 0.883, "step": 9791 }, { "epoch": 0.6812063028279245, "grad_norm": 1.828125, "learning_rate": 0.00048730522751776586, "loss": 0.9427, "step": 9792 }, { "epoch": 0.6812758704650597, "grad_norm": 1.0390625, "learning_rate": 0.0004871117761703271, "loss": 0.6713, "step": 9793 }, { "epoch": 0.6813454381021948, "grad_norm": 1.328125, "learning_rate": 0.0004869183508647668, "loss": 0.8737, "step": 9794 }, { "epoch": 0.6814150057393301, "grad_norm": 1.1171875, "learning_rate": 0.0004867249516109069, "loss": 0.76, "step": 9795 }, { "epoch": 0.6814845733764653, "grad_norm": 1.140625, "learning_rate": 0.0004865315784185664, "loss": 0.7439, "step": 9796 }, { "epoch": 0.6815541410136005, "grad_norm": 1.1015625, "learning_rate": 0.0004863382312975644, "loss": 0.8266, "step": 9797 }, { "epoch": 0.6816237086507356, "grad_norm": 1.375, "learning_rate": 0.00048614491025771836, "loss": 0.9495, "step": 9798 }, { "epoch": 0.6816932762878709, "grad_norm": 1.0, "learning_rate": 0.0004859516153088437, "loss": 1.083, "step": 9799 }, { "epoch": 0.6817628439250061, "grad_norm": 0.88671875, "learning_rate": 0.00048575834646075503, "loss": 0.7001, "step": 9800 }, { "epoch": 0.6818324115621412, "grad_norm": 1.109375, "learning_rate": 0.00048556510372326514, "loss": 0.6385, "step": 9801 }, { "epoch": 0.6819019791992765, "grad_norm": 1.265625, "learning_rate": 0.0004853718871061863, "loss": 1.0354, "step": 9802 }, { "epoch": 0.6819715468364117, "grad_norm": 1.2421875, "learning_rate": 0.00048517869661932956, "loss": 0.7948, "step": 9803 }, { "epoch": 0.6820411144735469, "grad_norm": 1.3671875, "learning_rate": 0.0004849855322725034, "loss": 0.7419, "step": 9804 }, { "epoch": 0.6821106821106822, "grad_norm": 0.90234375, "learning_rate": 0.00048479239407551636, "loss": 0.7721, "step": 9805 }, { "epoch": 0.6821802497478173, "grad_norm": 1.1640625, "learning_rate": 0.0004845992820381743, "loss": 0.9139, "step": 9806 }, { "epoch": 0.6822498173849525, "grad_norm": 1.21875, "learning_rate": 0.00048440619617028325, "loss": 0.8945, "step": 9807 }, { "epoch": 0.6823193850220878, "grad_norm": 1.0546875, "learning_rate": 0.00048421313648164645, "loss": 0.8678, "step": 9808 }, { "epoch": 0.682388952659223, "grad_norm": 0.99609375, "learning_rate": 0.0004840201029820672, "loss": 0.6539, "step": 9809 }, { "epoch": 0.6824585202963581, "grad_norm": 0.8671875, "learning_rate": 0.0004838270956813461, "loss": 0.9426, "step": 9810 }, { "epoch": 0.6825280879334933, "grad_norm": 1.015625, "learning_rate": 0.0004836341145892832, "loss": 0.6187, "step": 9811 }, { "epoch": 0.6825976555706286, "grad_norm": 1.3046875, "learning_rate": 0.0004834411597156777, "loss": 0.8741, "step": 9812 }, { "epoch": 0.6826672232077637, "grad_norm": 1.0703125, "learning_rate": 0.00048324823107032653, "loss": 0.7333, "step": 9813 }, { "epoch": 0.6827367908448989, "grad_norm": 0.98046875, "learning_rate": 0.0004830553286630256, "loss": 0.8243, "step": 9814 }, { "epoch": 0.6828063584820342, "grad_norm": 1.0234375, "learning_rate": 0.00048286245250356866, "loss": 0.8961, "step": 9815 }, { "epoch": 0.6828759261191694, "grad_norm": 1.328125, "learning_rate": 0.00048266960260175053, "loss": 0.8247, "step": 9816 }, { "epoch": 0.6829454937563045, "grad_norm": 1.1484375, "learning_rate": 0.00048247677896736253, "loss": 0.9039, "step": 9817 }, { "epoch": 0.6830150613934398, "grad_norm": 1.078125, "learning_rate": 0.00048228398161019473, "loss": 0.7044, "step": 9818 }, { "epoch": 0.683084629030575, "grad_norm": 0.8828125, "learning_rate": 0.00048209121054003726, "loss": 0.7803, "step": 9819 }, { "epoch": 0.6831541966677102, "grad_norm": 1.0390625, "learning_rate": 0.00048189846576667726, "loss": 0.6082, "step": 9820 }, { "epoch": 0.6832237643048454, "grad_norm": 0.96875, "learning_rate": 0.00048170574729990227, "loss": 0.8898, "step": 9821 }, { "epoch": 0.6832933319419806, "grad_norm": 0.98828125, "learning_rate": 0.0004815130551494965, "loss": 1.072, "step": 9822 }, { "epoch": 0.6833628995791158, "grad_norm": 1.046875, "learning_rate": 0.00048132038932524493, "loss": 0.7886, "step": 9823 }, { "epoch": 0.683432467216251, "grad_norm": 0.84375, "learning_rate": 0.00048112774983692907, "loss": 0.6386, "step": 9824 }, { "epoch": 0.6835020348533862, "grad_norm": 1.0390625, "learning_rate": 0.000480935136694331, "loss": 0.7901, "step": 9825 }, { "epoch": 0.6835716024905214, "grad_norm": 1.0234375, "learning_rate": 0.00048074254990723063, "loss": 0.821, "step": 9826 }, { "epoch": 0.6836411701276566, "grad_norm": 1.078125, "learning_rate": 0.0004805499894854063, "loss": 0.8903, "step": 9827 }, { "epoch": 0.6837107377647919, "grad_norm": 1.4375, "learning_rate": 0.0004803574554386351, "loss": 0.8313, "step": 9828 }, { "epoch": 0.683780305401927, "grad_norm": 1.09375, "learning_rate": 0.00048016494777669295, "loss": 0.9615, "step": 9829 }, { "epoch": 0.6838498730390622, "grad_norm": 1.296875, "learning_rate": 0.000479972466509355, "loss": 0.7357, "step": 9830 }, { "epoch": 0.6839194406761975, "grad_norm": 0.921875, "learning_rate": 0.00047978001164639404, "loss": 0.6229, "step": 9831 }, { "epoch": 0.6839890083133326, "grad_norm": 0.99609375, "learning_rate": 0.00047958758319758166, "loss": 0.688, "step": 9832 }, { "epoch": 0.6840585759504678, "grad_norm": 1.359375, "learning_rate": 0.0004793951811726891, "loss": 0.8784, "step": 9833 }, { "epoch": 0.6841281435876031, "grad_norm": 1.03125, "learning_rate": 0.0004792028055814848, "loss": 0.7363, "step": 9834 }, { "epoch": 0.6841977112247383, "grad_norm": 1.125, "learning_rate": 0.0004790104564337374, "loss": 0.8188, "step": 9835 }, { "epoch": 0.6842672788618734, "grad_norm": 1.0234375, "learning_rate": 0.0004788181337392127, "loss": 0.6419, "step": 9836 }, { "epoch": 0.6843368464990086, "grad_norm": 1.484375, "learning_rate": 0.00047862583750767654, "loss": 0.7032, "step": 9837 }, { "epoch": 0.6844064141361439, "grad_norm": 1.109375, "learning_rate": 0.0004784335677488921, "loss": 0.6312, "step": 9838 }, { "epoch": 0.6844759817732791, "grad_norm": 1.109375, "learning_rate": 0.00047824132447262213, "loss": 0.6002, "step": 9839 }, { "epoch": 0.6845455494104142, "grad_norm": 1.1171875, "learning_rate": 0.0004780491076886283, "loss": 0.8293, "step": 9840 }, { "epoch": 0.6846151170475495, "grad_norm": 1.5703125, "learning_rate": 0.0004778569174066699, "loss": 1.0433, "step": 9841 }, { "epoch": 0.6846846846846847, "grad_norm": 1.390625, "learning_rate": 0.0004776647536365051, "loss": 1.086, "step": 9842 }, { "epoch": 0.6847542523218199, "grad_norm": 1.0234375, "learning_rate": 0.0004774726163878914, "loss": 0.5927, "step": 9843 }, { "epoch": 0.6848238199589551, "grad_norm": 1.03125, "learning_rate": 0.0004772805056705848, "loss": 0.7805, "step": 9844 }, { "epoch": 0.6848933875960903, "grad_norm": 0.95703125, "learning_rate": 0.0004770884214943394, "loss": 0.7865, "step": 9845 }, { "epoch": 0.6849629552332255, "grad_norm": 1.0390625, "learning_rate": 0.000476896363868908, "loss": 0.7678, "step": 9846 }, { "epoch": 0.6850325228703608, "grad_norm": 1.109375, "learning_rate": 0.00047670433280404257, "loss": 0.952, "step": 9847 }, { "epoch": 0.6851020905074959, "grad_norm": 1.2265625, "learning_rate": 0.00047651232830949386, "loss": 0.8057, "step": 9848 }, { "epoch": 0.6851716581446311, "grad_norm": 1.109375, "learning_rate": 0.00047632035039501055, "loss": 0.7233, "step": 9849 }, { "epoch": 0.6852412257817663, "grad_norm": 1.1875, "learning_rate": 0.0004761283990703399, "loss": 0.5269, "step": 9850 }, { "epoch": 0.6853107934189016, "grad_norm": 0.9375, "learning_rate": 0.000475936474345229, "loss": 0.5662, "step": 9851 }, { "epoch": 0.6853803610560367, "grad_norm": 1.2890625, "learning_rate": 0.00047574457622942225, "loss": 0.7139, "step": 9852 }, { "epoch": 0.6854499286931719, "grad_norm": 1.0859375, "learning_rate": 0.0004755527047326633, "loss": 0.7737, "step": 9853 }, { "epoch": 0.6855194963303072, "grad_norm": 1.0546875, "learning_rate": 0.000475360859864695, "loss": 0.7234, "step": 9854 }, { "epoch": 0.6855890639674423, "grad_norm": 1.0859375, "learning_rate": 0.00047516904163525796, "loss": 0.6907, "step": 9855 }, { "epoch": 0.6856586316045775, "grad_norm": 1.4921875, "learning_rate": 0.0004749772500540912, "loss": 0.8297, "step": 9856 }, { "epoch": 0.6857281992417128, "grad_norm": 1.03125, "learning_rate": 0.00047478548513093334, "loss": 0.6738, "step": 9857 }, { "epoch": 0.685797766878848, "grad_norm": 1.296875, "learning_rate": 0.0004745937468755217, "loss": 1.108, "step": 9858 }, { "epoch": 0.6858673345159831, "grad_norm": 0.80078125, "learning_rate": 0.0004744020352975913, "loss": 0.7054, "step": 9859 }, { "epoch": 0.6859369021531184, "grad_norm": 1.0390625, "learning_rate": 0.000474210350406876, "loss": 0.7712, "step": 9860 }, { "epoch": 0.6860064697902536, "grad_norm": 1.0703125, "learning_rate": 0.00047401869221310887, "loss": 0.867, "step": 9861 }, { "epoch": 0.6860760374273888, "grad_norm": 1.0390625, "learning_rate": 0.0004738270607260218, "loss": 0.8149, "step": 9862 }, { "epoch": 0.6861456050645239, "grad_norm": 1.0859375, "learning_rate": 0.0004736354559553445, "loss": 0.74, "step": 9863 }, { "epoch": 0.6862151727016592, "grad_norm": 0.7421875, "learning_rate": 0.00047344387791080535, "loss": 0.4866, "step": 9864 }, { "epoch": 0.6862847403387944, "grad_norm": 1.5078125, "learning_rate": 0.00047325232660213234, "loss": 1.0872, "step": 9865 }, { "epoch": 0.6863543079759296, "grad_norm": 0.9140625, "learning_rate": 0.00047306080203905076, "loss": 0.7222, "step": 9866 }, { "epoch": 0.6864238756130648, "grad_norm": 0.9921875, "learning_rate": 0.00047286930423128584, "loss": 0.6516, "step": 9867 }, { "epoch": 0.6864934432502, "grad_norm": 0.91015625, "learning_rate": 0.00047267783318856097, "loss": 0.9393, "step": 9868 }, { "epoch": 0.6865630108873352, "grad_norm": 1.09375, "learning_rate": 0.0004724863889205978, "loss": 0.673, "step": 9869 }, { "epoch": 0.6866325785244705, "grad_norm": 1.0625, "learning_rate": 0.0004722949714371166, "loss": 0.7548, "step": 9870 }, { "epoch": 0.6867021461616056, "grad_norm": 1.203125, "learning_rate": 0.000472103580747837, "loss": 0.899, "step": 9871 }, { "epoch": 0.6867717137987408, "grad_norm": 1.140625, "learning_rate": 0.0004719122168624771, "loss": 0.892, "step": 9872 }, { "epoch": 0.6868412814358761, "grad_norm": 1.203125, "learning_rate": 0.00047172087979075307, "loss": 1.0007, "step": 9873 }, { "epoch": 0.6869108490730113, "grad_norm": 1.5546875, "learning_rate": 0.00047152956954237967, "loss": 0.9274, "step": 9874 }, { "epoch": 0.6869804167101464, "grad_norm": 1.0234375, "learning_rate": 0.00047133828612707095, "loss": 0.8581, "step": 9875 }, { "epoch": 0.6870499843472816, "grad_norm": 1.0234375, "learning_rate": 0.0004711470295545399, "loss": 0.8484, "step": 9876 }, { "epoch": 0.6871195519844169, "grad_norm": 1.078125, "learning_rate": 0.0004709557998344971, "loss": 0.7848, "step": 9877 }, { "epoch": 0.687189119621552, "grad_norm": 1.2578125, "learning_rate": 0.00047076459697665174, "loss": 0.9097, "step": 9878 }, { "epoch": 0.6872586872586872, "grad_norm": 1.140625, "learning_rate": 0.00047057342099071257, "loss": 0.9504, "step": 9879 }, { "epoch": 0.6873282548958225, "grad_norm": 1.375, "learning_rate": 0.00047038227188638703, "loss": 0.9776, "step": 9880 }, { "epoch": 0.6873978225329577, "grad_norm": 1.25, "learning_rate": 0.00047019114967338015, "loss": 0.7916, "step": 9881 }, { "epoch": 0.6874673901700928, "grad_norm": 0.9375, "learning_rate": 0.000470000054361396, "loss": 0.5856, "step": 9882 }, { "epoch": 0.6875369578072281, "grad_norm": 1.0546875, "learning_rate": 0.00046980898596013797, "loss": 0.6789, "step": 9883 }, { "epoch": 0.6876065254443633, "grad_norm": 1.0234375, "learning_rate": 0.0004696179444793071, "loss": 0.83, "step": 9884 }, { "epoch": 0.6876760930814985, "grad_norm": 1.234375, "learning_rate": 0.00046942692992860347, "loss": 1.0817, "step": 9885 }, { "epoch": 0.6877456607186337, "grad_norm": 0.86328125, "learning_rate": 0.0004692359423177265, "loss": 0.8211, "step": 9886 }, { "epoch": 0.6878152283557689, "grad_norm": 0.87109375, "learning_rate": 0.0004690449816563731, "loss": 0.6253, "step": 9887 }, { "epoch": 0.6878847959929041, "grad_norm": 1.1875, "learning_rate": 0.00046885404795423894, "loss": 0.749, "step": 9888 }, { "epoch": 0.6879543636300393, "grad_norm": 1.09375, "learning_rate": 0.00046866314122101906, "loss": 1.0462, "step": 9889 }, { "epoch": 0.6880239312671745, "grad_norm": 1.03125, "learning_rate": 0.0004684722614664072, "loss": 0.7023, "step": 9890 }, { "epoch": 0.6880934989043097, "grad_norm": 0.94140625, "learning_rate": 0.00046828140870009473, "loss": 0.7339, "step": 9891 }, { "epoch": 0.6881630665414449, "grad_norm": 1.09375, "learning_rate": 0.00046809058293177186, "loss": 0.7907, "step": 9892 }, { "epoch": 0.6882326341785802, "grad_norm": 1.03125, "learning_rate": 0.00046789978417112823, "loss": 0.8627, "step": 9893 }, { "epoch": 0.6883022018157153, "grad_norm": 1.171875, "learning_rate": 0.0004677090124278519, "loss": 0.7997, "step": 9894 }, { "epoch": 0.6883717694528505, "grad_norm": 1.0703125, "learning_rate": 0.00046751826771162895, "loss": 0.8273, "step": 9895 }, { "epoch": 0.6884413370899858, "grad_norm": 1.203125, "learning_rate": 0.0004673275500321441, "loss": 0.875, "step": 9896 }, { "epoch": 0.688510904727121, "grad_norm": 1.53125, "learning_rate": 0.0004671368593990818, "loss": 0.7543, "step": 9897 }, { "epoch": 0.6885804723642561, "grad_norm": 1.0625, "learning_rate": 0.0004669461958221236, "loss": 0.983, "step": 9898 }, { "epoch": 0.6886500400013914, "grad_norm": 1.203125, "learning_rate": 0.0004667555593109507, "loss": 0.843, "step": 9899 }, { "epoch": 0.6887196076385266, "grad_norm": 1.2109375, "learning_rate": 0.0004665649498752432, "loss": 0.6497, "step": 9900 }, { "epoch": 0.6887891752756617, "grad_norm": 1.0234375, "learning_rate": 0.00046637436752467874, "loss": 0.8861, "step": 9901 }, { "epoch": 0.6888587429127969, "grad_norm": 1.3515625, "learning_rate": 0.00046618381226893403, "loss": 0.8833, "step": 9902 }, { "epoch": 0.6889283105499322, "grad_norm": 0.91015625, "learning_rate": 0.0004659932841176845, "loss": 0.7966, "step": 9903 }, { "epoch": 0.6889978781870674, "grad_norm": 1.09375, "learning_rate": 0.0004658027830806049, "loss": 0.6317, "step": 9904 }, { "epoch": 0.6890674458242025, "grad_norm": 1.234375, "learning_rate": 0.0004656123091673674, "loss": 1.1123, "step": 9905 }, { "epoch": 0.6891370134613378, "grad_norm": 1.3359375, "learning_rate": 0.00046542186238764295, "loss": 0.8116, "step": 9906 }, { "epoch": 0.689206581098473, "grad_norm": 1.1953125, "learning_rate": 0.00046523144275110187, "loss": 0.9125, "step": 9907 }, { "epoch": 0.6892761487356082, "grad_norm": 1.1640625, "learning_rate": 0.0004650410502674131, "loss": 0.9166, "step": 9908 }, { "epoch": 0.6893457163727434, "grad_norm": 1.0390625, "learning_rate": 0.0004648506849462433, "loss": 0.8827, "step": 9909 }, { "epoch": 0.6894152840098786, "grad_norm": 1.5078125, "learning_rate": 0.000464660346797258, "loss": 0.645, "step": 9910 }, { "epoch": 0.6894848516470138, "grad_norm": 1.15625, "learning_rate": 0.0004644700358301224, "loss": 0.9024, "step": 9911 }, { "epoch": 0.6895544192841491, "grad_norm": 1.1171875, "learning_rate": 0.0004642797520544987, "loss": 0.8633, "step": 9912 }, { "epoch": 0.6896239869212842, "grad_norm": 0.9921875, "learning_rate": 0.00046408949548004897, "loss": 0.6726, "step": 9913 }, { "epoch": 0.6896935545584194, "grad_norm": 2.15625, "learning_rate": 0.00046389926611643394, "loss": 1.0634, "step": 9914 }, { "epoch": 0.6897631221955546, "grad_norm": 1.125, "learning_rate": 0.0004637090639733119, "loss": 0.9568, "step": 9915 }, { "epoch": 0.6898326898326899, "grad_norm": 1.3125, "learning_rate": 0.0004635188890603402, "loss": 0.7871, "step": 9916 }, { "epoch": 0.689902257469825, "grad_norm": 1.015625, "learning_rate": 0.00046332874138717517, "loss": 0.731, "step": 9917 }, { "epoch": 0.6899718251069602, "grad_norm": 1.2734375, "learning_rate": 0.00046313862096347203, "loss": 0.9366, "step": 9918 }, { "epoch": 0.6900413927440955, "grad_norm": 1.6796875, "learning_rate": 0.00046294852779888384, "loss": 1.1027, "step": 9919 }, { "epoch": 0.6901109603812307, "grad_norm": 0.9921875, "learning_rate": 0.00046275846190306193, "loss": 0.6191, "step": 9920 }, { "epoch": 0.6901805280183658, "grad_norm": 0.92578125, "learning_rate": 0.0004625684232856575, "loss": 0.7207, "step": 9921 }, { "epoch": 0.6902500956555011, "grad_norm": 1.1328125, "learning_rate": 0.00046237841195632013, "loss": 0.797, "step": 9922 }, { "epoch": 0.6903196632926363, "grad_norm": 0.9609375, "learning_rate": 0.0004621884279246971, "loss": 0.6195, "step": 9923 }, { "epoch": 0.6903892309297714, "grad_norm": 0.953125, "learning_rate": 0.0004619984712004346, "loss": 0.5835, "step": 9924 }, { "epoch": 0.6904587985669067, "grad_norm": 0.9140625, "learning_rate": 0.0004618085417931779, "loss": 0.696, "step": 9925 }, { "epoch": 0.6905283662040419, "grad_norm": 1.0078125, "learning_rate": 0.00046161863971257123, "loss": 0.8957, "step": 9926 }, { "epoch": 0.6905979338411771, "grad_norm": 0.875, "learning_rate": 0.00046142876496825606, "loss": 0.7213, "step": 9927 }, { "epoch": 0.6906675014783122, "grad_norm": 1.0, "learning_rate": 0.0004612389175698739, "loss": 0.855, "step": 9928 }, { "epoch": 0.6907370691154475, "grad_norm": 1.2734375, "learning_rate": 0.0004610490975270639, "loss": 0.7702, "step": 9929 }, { "epoch": 0.6908066367525827, "grad_norm": 1.453125, "learning_rate": 0.0004608593048494639, "loss": 1.1194, "step": 9930 }, { "epoch": 0.6908762043897179, "grad_norm": 1.03125, "learning_rate": 0.000460669539546711, "loss": 0.4911, "step": 9931 }, { "epoch": 0.6909457720268531, "grad_norm": 1.0625, "learning_rate": 0.00046047980162844073, "loss": 0.8049, "step": 9932 }, { "epoch": 0.6910153396639883, "grad_norm": 1.0859375, "learning_rate": 0.0004602900911042868, "loss": 0.8622, "step": 9933 }, { "epoch": 0.6910849073011235, "grad_norm": 1.125, "learning_rate": 0.0004601004079838813, "loss": 0.7903, "step": 9934 }, { "epoch": 0.6911544749382588, "grad_norm": 1.3046875, "learning_rate": 0.0004599107522768557, "loss": 0.9072, "step": 9935 }, { "epoch": 0.6912240425753939, "grad_norm": 1.21875, "learning_rate": 0.00045972112399284037, "loss": 0.7728, "step": 9936 }, { "epoch": 0.6912936102125291, "grad_norm": 1.0625, "learning_rate": 0.0004595315231414632, "loss": 1.007, "step": 9937 }, { "epoch": 0.6913631778496644, "grad_norm": 1.0, "learning_rate": 0.00045934194973235054, "loss": 0.7445, "step": 9938 }, { "epoch": 0.6914327454867996, "grad_norm": 1.15625, "learning_rate": 0.00045915240377512867, "loss": 0.963, "step": 9939 }, { "epoch": 0.6915023131239347, "grad_norm": 0.95703125, "learning_rate": 0.0004589628852794221, "loss": 0.7394, "step": 9940 }, { "epoch": 0.6915718807610699, "grad_norm": 1.0078125, "learning_rate": 0.00045877339425485277, "loss": 0.778, "step": 9941 }, { "epoch": 0.6916414483982052, "grad_norm": 1.234375, "learning_rate": 0.0004585839307110428, "loss": 0.7646, "step": 9942 }, { "epoch": 0.6917110160353404, "grad_norm": 1.234375, "learning_rate": 0.00045839449465761195, "loss": 0.9427, "step": 9943 }, { "epoch": 0.6917805836724755, "grad_norm": 1.0859375, "learning_rate": 0.0004582050861041783, "loss": 0.5645, "step": 9944 }, { "epoch": 0.6918501513096108, "grad_norm": 0.93359375, "learning_rate": 0.00045801570506035974, "loss": 0.7495, "step": 9945 }, { "epoch": 0.691919718946746, "grad_norm": 1.5078125, "learning_rate": 0.000457826351535772, "loss": 0.8523, "step": 9946 }, { "epoch": 0.6919892865838811, "grad_norm": 1.0, "learning_rate": 0.0004576370255400295, "loss": 1.0741, "step": 9947 }, { "epoch": 0.6920588542210164, "grad_norm": 1.296875, "learning_rate": 0.00045744772708274485, "loss": 0.8472, "step": 9948 }, { "epoch": 0.6921284218581516, "grad_norm": 1.3515625, "learning_rate": 0.00045725845617352977, "loss": 0.7895, "step": 9949 }, { "epoch": 0.6921979894952868, "grad_norm": 1.078125, "learning_rate": 0.0004570692128219951, "loss": 0.7883, "step": 9950 }, { "epoch": 0.692267557132422, "grad_norm": 1.0546875, "learning_rate": 0.0004568799970377493, "loss": 0.8317, "step": 9951 }, { "epoch": 0.6923371247695572, "grad_norm": 1.125, "learning_rate": 0.00045669080883039924, "loss": 0.9023, "step": 9952 }, { "epoch": 0.6924066924066924, "grad_norm": 1.0703125, "learning_rate": 0.0004565016482095515, "loss": 0.7524, "step": 9953 }, { "epoch": 0.6924762600438276, "grad_norm": 1.2421875, "learning_rate": 0.000456312515184811, "loss": 0.7842, "step": 9954 }, { "epoch": 0.6925458276809628, "grad_norm": 1.0234375, "learning_rate": 0.0004561234097657806, "loss": 0.8211, "step": 9955 }, { "epoch": 0.692615395318098, "grad_norm": 1.1796875, "learning_rate": 0.0004559343319620617, "loss": 0.6618, "step": 9956 }, { "epoch": 0.6926849629552332, "grad_norm": 1.28125, "learning_rate": 0.0004557452817832551, "loss": 0.8867, "step": 9957 }, { "epoch": 0.6927545305923685, "grad_norm": 1.03125, "learning_rate": 0.0004555562592389603, "loss": 0.7492, "step": 9958 }, { "epoch": 0.6928240982295036, "grad_norm": 1.359375, "learning_rate": 0.00045536726433877405, "loss": 0.9273, "step": 9959 }, { "epoch": 0.6928936658666388, "grad_norm": 1.328125, "learning_rate": 0.0004551782970922933, "loss": 1.032, "step": 9960 }, { "epoch": 0.6929632335037741, "grad_norm": 1.046875, "learning_rate": 0.00045498935750911253, "loss": 0.7288, "step": 9961 }, { "epoch": 0.6930328011409093, "grad_norm": 1.171875, "learning_rate": 0.0004548004455988248, "loss": 0.9221, "step": 9962 }, { "epoch": 0.6931023687780444, "grad_norm": 0.9921875, "learning_rate": 0.0004546115613710224, "loss": 0.6233, "step": 9963 }, { "epoch": 0.6931719364151797, "grad_norm": 1.078125, "learning_rate": 0.00045442270483529636, "loss": 0.7356, "step": 9964 }, { "epoch": 0.6932415040523149, "grad_norm": 1.2734375, "learning_rate": 0.00045423387600123543, "loss": 1.0036, "step": 9965 }, { "epoch": 0.69331107168945, "grad_norm": 1.171875, "learning_rate": 0.00045404507487842706, "loss": 1.0622, "step": 9966 }, { "epoch": 0.6933806393265852, "grad_norm": 1.0234375, "learning_rate": 0.00045385630147645793, "loss": 0.5868, "step": 9967 }, { "epoch": 0.6934502069637205, "grad_norm": 1.1484375, "learning_rate": 0.00045366755580491337, "loss": 0.9475, "step": 9968 }, { "epoch": 0.6935197746008557, "grad_norm": 1.078125, "learning_rate": 0.00045347883787337674, "loss": 1.0282, "step": 9969 }, { "epoch": 0.6935893422379908, "grad_norm": 0.94921875, "learning_rate": 0.00045329014769142963, "loss": 0.9051, "step": 9970 }, { "epoch": 0.6936589098751261, "grad_norm": 1.0390625, "learning_rate": 0.00045310148526865314, "loss": 0.6954, "step": 9971 }, { "epoch": 0.6937284775122613, "grad_norm": 1.0078125, "learning_rate": 0.00045291285061462705, "loss": 0.8558, "step": 9972 }, { "epoch": 0.6937980451493965, "grad_norm": 0.8671875, "learning_rate": 0.0004527242437389285, "loss": 0.6456, "step": 9973 }, { "epoch": 0.6938676127865318, "grad_norm": 1.015625, "learning_rate": 0.0004525356646511348, "loss": 0.754, "step": 9974 }, { "epoch": 0.6939371804236669, "grad_norm": 1.203125, "learning_rate": 0.0004523471133608206, "loss": 0.8408, "step": 9975 }, { "epoch": 0.6940067480608021, "grad_norm": 0.953125, "learning_rate": 0.0004521585898775592, "loss": 0.629, "step": 9976 }, { "epoch": 0.6940763156979374, "grad_norm": 0.95703125, "learning_rate": 0.0004519700942109234, "loss": 0.861, "step": 9977 }, { "epoch": 0.6941458833350725, "grad_norm": 1.1640625, "learning_rate": 0.00045178162637048413, "loss": 0.7202, "step": 9978 }, { "epoch": 0.6942154509722077, "grad_norm": 1.1015625, "learning_rate": 0.00045159318636581083, "loss": 0.8389, "step": 9979 }, { "epoch": 0.6942850186093429, "grad_norm": 1.25, "learning_rate": 0.0004514047742064709, "loss": 0.689, "step": 9980 }, { "epoch": 0.6943545862464782, "grad_norm": 1.03125, "learning_rate": 0.0004512163899020314, "loss": 0.6303, "step": 9981 }, { "epoch": 0.6944241538836133, "grad_norm": 1.25, "learning_rate": 0.0004510280334620579, "loss": 0.7624, "step": 9982 }, { "epoch": 0.6944937215207485, "grad_norm": 1.078125, "learning_rate": 0.00045083970489611383, "loss": 0.6146, "step": 9983 }, { "epoch": 0.6945632891578838, "grad_norm": 1.15625, "learning_rate": 0.00045065140421376125, "loss": 0.9113, "step": 9984 }, { "epoch": 0.694632856795019, "grad_norm": 1.265625, "learning_rate": 0.0004504631314245614, "loss": 0.7849, "step": 9985 }, { "epoch": 0.6947024244321541, "grad_norm": 1.390625, "learning_rate": 0.00045027488653807425, "loss": 0.7079, "step": 9986 }, { "epoch": 0.6947719920692894, "grad_norm": 1.484375, "learning_rate": 0.00045008666956385725, "loss": 0.8342, "step": 9987 }, { "epoch": 0.6948415597064246, "grad_norm": 1.2421875, "learning_rate": 0.00044989848051146765, "loss": 0.7096, "step": 9988 }, { "epoch": 0.6949111273435598, "grad_norm": 1.1640625, "learning_rate": 0.0004497103193904601, "loss": 0.8621, "step": 9989 }, { "epoch": 0.694980694980695, "grad_norm": 0.97265625, "learning_rate": 0.00044952218621038944, "loss": 0.7878, "step": 9990 }, { "epoch": 0.6950502626178302, "grad_norm": 1.109375, "learning_rate": 0.000449334080980807, "loss": 0.925, "step": 9991 }, { "epoch": 0.6951198302549654, "grad_norm": 1.234375, "learning_rate": 0.0004491460037112648, "loss": 0.8893, "step": 9992 }, { "epoch": 0.6951893978921005, "grad_norm": 1.09375, "learning_rate": 0.000448957954411312, "loss": 0.9219, "step": 9993 }, { "epoch": 0.6952589655292358, "grad_norm": 1.3203125, "learning_rate": 0.00044876993309049654, "loss": 1.1209, "step": 9994 }, { "epoch": 0.695328533166371, "grad_norm": 0.87109375, "learning_rate": 0.00044858193975836534, "loss": 0.7067, "step": 9995 }, { "epoch": 0.6953981008035062, "grad_norm": 0.921875, "learning_rate": 0.0004483939744244643, "loss": 0.5887, "step": 9996 }, { "epoch": 0.6954676684406415, "grad_norm": 1.2265625, "learning_rate": 0.000448206037098337, "loss": 0.9967, "step": 9997 }, { "epoch": 0.6955372360777766, "grad_norm": 1.0, "learning_rate": 0.00044801812778952544, "loss": 0.8292, "step": 9998 }, { "epoch": 0.6956068037149118, "grad_norm": 1.1171875, "learning_rate": 0.0004478302465075711, "loss": 0.8827, "step": 9999 }, { "epoch": 0.6956763713520471, "grad_norm": 1.1015625, "learning_rate": 0.00044764239326201415, "loss": 0.809, "step": 10000 }, { "epoch": 0.6957459389891822, "grad_norm": 0.90625, "learning_rate": 0.00044745456806239206, "loss": 0.6268, "step": 10001 }, { "epoch": 0.6958155066263174, "grad_norm": 1.0859375, "learning_rate": 0.0004472667709182423, "loss": 0.8195, "step": 10002 }, { "epoch": 0.6958850742634527, "grad_norm": 1.09375, "learning_rate": 0.00044707900183909953, "loss": 0.6104, "step": 10003 }, { "epoch": 0.6959546419005879, "grad_norm": 1.0, "learning_rate": 0.0004468912608344985, "loss": 0.6953, "step": 10004 }, { "epoch": 0.696024209537723, "grad_norm": 0.9375, "learning_rate": 0.00044670354791397104, "loss": 0.547, "step": 10005 }, { "epoch": 0.6960937771748582, "grad_norm": 1.03125, "learning_rate": 0.00044651586308704896, "loss": 0.7972, "step": 10006 }, { "epoch": 0.6961633448119935, "grad_norm": 1.1875, "learning_rate": 0.00044632820636326156, "loss": 0.7603, "step": 10007 }, { "epoch": 0.6962329124491287, "grad_norm": 1.4140625, "learning_rate": 0.00044614057775213637, "loss": 0.9619, "step": 10008 }, { "epoch": 0.6963024800862638, "grad_norm": 2.859375, "learning_rate": 0.00044595297726320173, "loss": 1.0059, "step": 10009 }, { "epoch": 0.6963720477233991, "grad_norm": 1.1640625, "learning_rate": 0.00044576540490598226, "loss": 0.8679, "step": 10010 }, { "epoch": 0.6964416153605343, "grad_norm": 1.0, "learning_rate": 0.0004455778606900021, "loss": 0.5389, "step": 10011 }, { "epoch": 0.6965111829976695, "grad_norm": 1.234375, "learning_rate": 0.00044539034462478324, "loss": 0.7035, "step": 10012 }, { "epoch": 0.6965807506348047, "grad_norm": 0.9609375, "learning_rate": 0.00044520285671984715, "loss": 0.6121, "step": 10013 }, { "epoch": 0.6966503182719399, "grad_norm": 1.078125, "learning_rate": 0.00044501539698471414, "loss": 0.9757, "step": 10014 }, { "epoch": 0.6967198859090751, "grad_norm": 1.1953125, "learning_rate": 0.0004448279654289015, "loss": 0.7322, "step": 10015 }, { "epoch": 0.6967894535462104, "grad_norm": 0.9921875, "learning_rate": 0.00044464056206192684, "loss": 0.6639, "step": 10016 }, { "epoch": 0.6968590211833455, "grad_norm": 0.86328125, "learning_rate": 0.00044445318689330496, "loss": 0.6743, "step": 10017 }, { "epoch": 0.6969285888204807, "grad_norm": 1.125, "learning_rate": 0.0004442658399325503, "loss": 1.0035, "step": 10018 }, { "epoch": 0.6969981564576159, "grad_norm": 1.484375, "learning_rate": 0.0004440785211891749, "loss": 0.9152, "step": 10019 }, { "epoch": 0.6970677240947512, "grad_norm": 0.96484375, "learning_rate": 0.00044389123067269055, "loss": 0.7669, "step": 10020 }, { "epoch": 0.6971372917318863, "grad_norm": 1.0390625, "learning_rate": 0.00044370396839260606, "loss": 0.9853, "step": 10021 }, { "epoch": 0.6972068593690215, "grad_norm": 1.0703125, "learning_rate": 0.0004435167343584302, "loss": 0.827, "step": 10022 }, { "epoch": 0.6972764270061568, "grad_norm": 1.3125, "learning_rate": 0.0004433295285796699, "loss": 0.8992, "step": 10023 }, { "epoch": 0.6973459946432919, "grad_norm": 0.8515625, "learning_rate": 0.0004431423510658304, "loss": 0.6252, "step": 10024 }, { "epoch": 0.6974155622804271, "grad_norm": 1.1015625, "learning_rate": 0.0004429552018264157, "loss": 0.955, "step": 10025 }, { "epoch": 0.6974851299175624, "grad_norm": 1.28125, "learning_rate": 0.0004427680808709276, "loss": 1.0229, "step": 10026 }, { "epoch": 0.6975546975546976, "grad_norm": 1.15625, "learning_rate": 0.00044258098820886774, "loss": 0.8838, "step": 10027 }, { "epoch": 0.6976242651918327, "grad_norm": 1.15625, "learning_rate": 0.000442393923849736, "loss": 0.9667, "step": 10028 }, { "epoch": 0.697693832828968, "grad_norm": 1.1015625, "learning_rate": 0.0004422068878030303, "loss": 0.8528, "step": 10029 }, { "epoch": 0.6977634004661032, "grad_norm": 1.2578125, "learning_rate": 0.000442019880078247, "loss": 0.851, "step": 10030 }, { "epoch": 0.6978329681032384, "grad_norm": 1.359375, "learning_rate": 0.0004418329006848818, "loss": 0.7651, "step": 10031 }, { "epoch": 0.6979025357403735, "grad_norm": 1.140625, "learning_rate": 0.0004416459496324289, "loss": 0.8588, "step": 10032 }, { "epoch": 0.6979721033775088, "grad_norm": 1.5078125, "learning_rate": 0.00044145902693037986, "loss": 1.0708, "step": 10033 }, { "epoch": 0.698041671014644, "grad_norm": 0.83203125, "learning_rate": 0.0004412721325882266, "loss": 0.701, "step": 10034 }, { "epoch": 0.6981112386517792, "grad_norm": 0.953125, "learning_rate": 0.000441085266615458, "loss": 0.7535, "step": 10035 }, { "epoch": 0.6981808062889144, "grad_norm": 1.25, "learning_rate": 0.00044089842902156275, "loss": 1.0042, "step": 10036 }, { "epoch": 0.6982503739260496, "grad_norm": 1.109375, "learning_rate": 0.00044071161981602667, "loss": 0.9096, "step": 10037 }, { "epoch": 0.6983199415631848, "grad_norm": 1.28125, "learning_rate": 0.0004405248390083361, "loss": 1.0399, "step": 10038 }, { "epoch": 0.6983895092003201, "grad_norm": 1.4453125, "learning_rate": 0.0004403380866079741, "loss": 1.0811, "step": 10039 }, { "epoch": 0.6984590768374552, "grad_norm": 0.96484375, "learning_rate": 0.00044015136262442247, "loss": 0.4406, "step": 10040 }, { "epoch": 0.6985286444745904, "grad_norm": 1.109375, "learning_rate": 0.00043996466706716354, "loss": 0.8179, "step": 10041 }, { "epoch": 0.6985982121117257, "grad_norm": 1.1953125, "learning_rate": 0.00043977799994567604, "loss": 0.7871, "step": 10042 }, { "epoch": 0.6986677797488609, "grad_norm": 1.4296875, "learning_rate": 0.0004395913612694379, "loss": 1.0078, "step": 10043 }, { "epoch": 0.698737347385996, "grad_norm": 1.203125, "learning_rate": 0.0004394047510479254, "loss": 0.721, "step": 10044 }, { "epoch": 0.6988069150231312, "grad_norm": 1.4140625, "learning_rate": 0.0004392181692906142, "loss": 0.8101, "step": 10045 }, { "epoch": 0.6988764826602665, "grad_norm": 1.0390625, "learning_rate": 0.00043903161600697806, "loss": 0.7656, "step": 10046 }, { "epoch": 0.6989460502974016, "grad_norm": 0.98046875, "learning_rate": 0.00043884509120648864, "loss": 0.5622, "step": 10047 }, { "epoch": 0.6990156179345368, "grad_norm": 0.9921875, "learning_rate": 0.0004386585948986174, "loss": 0.7001, "step": 10048 }, { "epoch": 0.6990851855716721, "grad_norm": 0.99609375, "learning_rate": 0.0004384721270928329, "loss": 0.8348, "step": 10049 }, { "epoch": 0.6991547532088073, "grad_norm": 1.015625, "learning_rate": 0.0004382856877986039, "loss": 0.7234, "step": 10050 }, { "epoch": 0.6992243208459424, "grad_norm": 0.9296875, "learning_rate": 0.00043809927702539607, "loss": 0.5557, "step": 10051 }, { "epoch": 0.6992938884830777, "grad_norm": 1.609375, "learning_rate": 0.00043791289478267514, "loss": 1.1759, "step": 10052 }, { "epoch": 0.6993634561202129, "grad_norm": 0.9296875, "learning_rate": 0.00043772654107990385, "loss": 0.8746, "step": 10053 }, { "epoch": 0.6994330237573481, "grad_norm": 0.8125, "learning_rate": 0.0004375402159265448, "loss": 0.8024, "step": 10054 }, { "epoch": 0.6995025913944833, "grad_norm": 1.0390625, "learning_rate": 0.0004373539193320589, "loss": 0.7398, "step": 10055 }, { "epoch": 0.6995721590316185, "grad_norm": 1.125, "learning_rate": 0.00043716765130590507, "loss": 0.6992, "step": 10056 }, { "epoch": 0.6996417266687537, "grad_norm": 1.4296875, "learning_rate": 0.00043698141185754104, "loss": 0.8592, "step": 10057 }, { "epoch": 0.6997112943058889, "grad_norm": 1.0390625, "learning_rate": 0.00043679520099642276, "loss": 0.7887, "step": 10058 }, { "epoch": 0.6997808619430241, "grad_norm": 1.03125, "learning_rate": 0.00043660901873200533, "loss": 0.7324, "step": 10059 }, { "epoch": 0.6998504295801593, "grad_norm": 1.2890625, "learning_rate": 0.0004364228650737426, "loss": 0.7056, "step": 10060 }, { "epoch": 0.6999199972172945, "grad_norm": 0.8671875, "learning_rate": 0.00043623674003108584, "loss": 0.8171, "step": 10061 }, { "epoch": 0.6999895648544298, "grad_norm": 1.078125, "learning_rate": 0.00043605064361348613, "loss": 0.7432, "step": 10062 }, { "epoch": 0.7000591324915649, "grad_norm": 1.125, "learning_rate": 0.00043586457583039183, "loss": 0.6791, "step": 10063 }, { "epoch": 0.7001287001287001, "grad_norm": 1.3671875, "learning_rate": 0.00043567853669125133, "loss": 0.8353, "step": 10064 }, { "epoch": 0.7001982677658354, "grad_norm": 1.015625, "learning_rate": 0.00043549252620551004, "loss": 0.7989, "step": 10065 }, { "epoch": 0.7002678354029706, "grad_norm": 1.1875, "learning_rate": 0.0004353065443826133, "loss": 0.6995, "step": 10066 }, { "epoch": 0.7003374030401057, "grad_norm": 0.99609375, "learning_rate": 0.00043512059123200356, "loss": 0.6129, "step": 10067 }, { "epoch": 0.7004069706772409, "grad_norm": 1.1015625, "learning_rate": 0.000434934666763123, "loss": 0.8444, "step": 10068 }, { "epoch": 0.7004765383143762, "grad_norm": 1.0078125, "learning_rate": 0.0004347487709854122, "loss": 0.8488, "step": 10069 }, { "epoch": 0.7005461059515113, "grad_norm": 1.234375, "learning_rate": 0.0004345629039083099, "loss": 0.8437, "step": 10070 }, { "epoch": 0.7006156735886465, "grad_norm": 1.0078125, "learning_rate": 0.0004343770655412532, "loss": 1.0469, "step": 10071 }, { "epoch": 0.7006852412257818, "grad_norm": 0.875, "learning_rate": 0.00043419125589367745, "loss": 0.7606, "step": 10072 }, { "epoch": 0.700754808862917, "grad_norm": 0.890625, "learning_rate": 0.00043400547497501863, "loss": 0.6727, "step": 10073 }, { "epoch": 0.7008243765000521, "grad_norm": 1.078125, "learning_rate": 0.00043381972279470896, "loss": 0.7686, "step": 10074 }, { "epoch": 0.7008939441371874, "grad_norm": 0.7890625, "learning_rate": 0.0004336339993621795, "loss": 0.6855, "step": 10075 }, { "epoch": 0.7009635117743226, "grad_norm": 0.90625, "learning_rate": 0.00043344830468686137, "loss": 0.7121, "step": 10076 }, { "epoch": 0.7010330794114578, "grad_norm": 1.3125, "learning_rate": 0.00043326263877818227, "loss": 0.8287, "step": 10077 }, { "epoch": 0.701102647048593, "grad_norm": 1.1328125, "learning_rate": 0.00043307700164557016, "loss": 0.7769, "step": 10078 }, { "epoch": 0.7011722146857282, "grad_norm": 1.0859375, "learning_rate": 0.00043289139329845004, "loss": 0.8616, "step": 10079 }, { "epoch": 0.7012417823228634, "grad_norm": 0.828125, "learning_rate": 0.00043270581374624695, "loss": 0.8422, "step": 10080 }, { "epoch": 0.7013113499599986, "grad_norm": 1.296875, "learning_rate": 0.0004325202629983829, "loss": 0.9342, "step": 10081 }, { "epoch": 0.7013809175971338, "grad_norm": 1.078125, "learning_rate": 0.0004323347410642795, "loss": 0.7916, "step": 10082 }, { "epoch": 0.701450485234269, "grad_norm": 1.203125, "learning_rate": 0.00043214924795335717, "loss": 0.7212, "step": 10083 }, { "epoch": 0.7015200528714042, "grad_norm": 1.1796875, "learning_rate": 0.00043196378367503377, "loss": 0.7458, "step": 10084 }, { "epoch": 0.7015896205085395, "grad_norm": 1.2265625, "learning_rate": 0.00043177834823872644, "loss": 0.8478, "step": 10085 }, { "epoch": 0.7016591881456746, "grad_norm": 1.0078125, "learning_rate": 0.00043159294165384963, "loss": 0.8785, "step": 10086 }, { "epoch": 0.7017287557828098, "grad_norm": 0.8828125, "learning_rate": 0.00043140756392981905, "loss": 0.6397, "step": 10087 }, { "epoch": 0.7017983234199451, "grad_norm": 1.0234375, "learning_rate": 0.00043122221507604653, "loss": 0.8018, "step": 10088 }, { "epoch": 0.7018678910570803, "grad_norm": 1.0, "learning_rate": 0.00043103689510194264, "loss": 0.6511, "step": 10089 }, { "epoch": 0.7019374586942154, "grad_norm": 1.4140625, "learning_rate": 0.0004308516040169178, "loss": 0.7782, "step": 10090 }, { "epoch": 0.7020070263313507, "grad_norm": 1.0078125, "learning_rate": 0.00043066634183037945, "loss": 0.7878, "step": 10091 }, { "epoch": 0.7020765939684859, "grad_norm": 0.921875, "learning_rate": 0.00043048110855173507, "loss": 0.688, "step": 10092 }, { "epoch": 0.702146161605621, "grad_norm": 0.89453125, "learning_rate": 0.0004302959041903889, "loss": 0.619, "step": 10093 }, { "epoch": 0.7022157292427562, "grad_norm": 0.984375, "learning_rate": 0.0004301107287557455, "loss": 0.8383, "step": 10094 }, { "epoch": 0.7022852968798915, "grad_norm": 1.0625, "learning_rate": 0.0004299255822572067, "loss": 0.7158, "step": 10095 }, { "epoch": 0.7023548645170267, "grad_norm": 1.265625, "learning_rate": 0.00042974046470417327, "loss": 0.8948, "step": 10096 }, { "epoch": 0.7024244321541618, "grad_norm": 1.0859375, "learning_rate": 0.0004295553761060451, "loss": 0.9329, "step": 10097 }, { "epoch": 0.7024939997912971, "grad_norm": 1.0078125, "learning_rate": 0.0004293703164722197, "loss": 0.7611, "step": 10098 }, { "epoch": 0.7025635674284323, "grad_norm": 1.0546875, "learning_rate": 0.00042918528581209313, "loss": 0.6111, "step": 10099 }, { "epoch": 0.7026331350655675, "grad_norm": 1.0546875, "learning_rate": 0.00042900028413506055, "loss": 0.6119, "step": 10100 }, { "epoch": 0.7027027027027027, "grad_norm": 1.171875, "learning_rate": 0.0004288153114505159, "loss": 0.7833, "step": 10101 }, { "epoch": 0.7027722703398379, "grad_norm": 1.0, "learning_rate": 0.0004286303677678508, "loss": 0.6972, "step": 10102 }, { "epoch": 0.7028418379769731, "grad_norm": 1.0546875, "learning_rate": 0.0004284454530964552, "loss": 0.7726, "step": 10103 }, { "epoch": 0.7029114056141084, "grad_norm": 1.140625, "learning_rate": 0.00042826056744571875, "loss": 0.7007, "step": 10104 }, { "epoch": 0.7029809732512435, "grad_norm": 1.28125, "learning_rate": 0.0004280757108250293, "loss": 0.838, "step": 10105 }, { "epoch": 0.7030505408883787, "grad_norm": 1.296875, "learning_rate": 0.00042789088324377244, "loss": 0.9073, "step": 10106 }, { "epoch": 0.7031201085255139, "grad_norm": 0.9921875, "learning_rate": 0.00042770608471133254, "loss": 0.642, "step": 10107 }, { "epoch": 0.7031896761626492, "grad_norm": 1.5, "learning_rate": 0.00042752131523709347, "loss": 1.1096, "step": 10108 }, { "epoch": 0.7032592437997843, "grad_norm": 1.0390625, "learning_rate": 0.0004273365748304362, "loss": 0.7165, "step": 10109 }, { "epoch": 0.7033288114369195, "grad_norm": 1.03125, "learning_rate": 0.0004271518635007415, "loss": 0.7117, "step": 10110 }, { "epoch": 0.7033983790740548, "grad_norm": 1.1953125, "learning_rate": 0.00042696718125738756, "loss": 0.8286, "step": 10111 }, { "epoch": 0.70346794671119, "grad_norm": 1.21875, "learning_rate": 0.00042678252810975206, "loss": 0.8924, "step": 10112 }, { "epoch": 0.7035375143483251, "grad_norm": 1.578125, "learning_rate": 0.00042659790406721033, "loss": 0.6813, "step": 10113 }, { "epoch": 0.7036070819854604, "grad_norm": 1.28125, "learning_rate": 0.00042641330913913676, "loss": 1.0315, "step": 10114 }, { "epoch": 0.7036766496225956, "grad_norm": 1.078125, "learning_rate": 0.0004262287433349047, "loss": 0.8003, "step": 10115 }, { "epoch": 0.7037462172597307, "grad_norm": 1.296875, "learning_rate": 0.000426044206663885, "loss": 0.9367, "step": 10116 }, { "epoch": 0.703815784896866, "grad_norm": 1.0078125, "learning_rate": 0.0004258596991354475, "loss": 0.7856, "step": 10117 }, { "epoch": 0.7038853525340012, "grad_norm": 0.82421875, "learning_rate": 0.0004256752207589599, "loss": 0.776, "step": 10118 }, { "epoch": 0.7039549201711364, "grad_norm": 1.953125, "learning_rate": 0.00042549077154379055, "loss": 0.6142, "step": 10119 }, { "epoch": 0.7040244878082715, "grad_norm": 1.03125, "learning_rate": 0.00042530635149930397, "loss": 0.5824, "step": 10120 }, { "epoch": 0.7040940554454068, "grad_norm": 1.40625, "learning_rate": 0.00042512196063486396, "loss": 0.8986, "step": 10121 }, { "epoch": 0.704163623082542, "grad_norm": 1.2109375, "learning_rate": 0.0004249375989598335, "loss": 0.9061, "step": 10122 }, { "epoch": 0.7042331907196772, "grad_norm": 0.99609375, "learning_rate": 0.00042475326648357283, "loss": 0.6117, "step": 10123 }, { "epoch": 0.7043027583568124, "grad_norm": 1.0078125, "learning_rate": 0.00042456896321544225, "loss": 0.7902, "step": 10124 }, { "epoch": 0.7043723259939476, "grad_norm": 1.25, "learning_rate": 0.0004243846891647989, "loss": 0.8703, "step": 10125 }, { "epoch": 0.7044418936310828, "grad_norm": 0.98828125, "learning_rate": 0.00042420044434100015, "loss": 0.9079, "step": 10126 }, { "epoch": 0.7045114612682181, "grad_norm": 1.0390625, "learning_rate": 0.00042401622875340016, "loss": 0.7512, "step": 10127 }, { "epoch": 0.7045810289053532, "grad_norm": 1.0546875, "learning_rate": 0.000423832042411353, "loss": 0.7617, "step": 10128 }, { "epoch": 0.7046505965424884, "grad_norm": 0.796875, "learning_rate": 0.00042364788532421095, "loss": 0.6617, "step": 10129 }, { "epoch": 0.7047201641796237, "grad_norm": 0.859375, "learning_rate": 0.00042346375750132415, "loss": 0.5288, "step": 10130 }, { "epoch": 0.7047897318167589, "grad_norm": 1.1953125, "learning_rate": 0.00042327965895204157, "loss": 0.9053, "step": 10131 }, { "epoch": 0.704859299453894, "grad_norm": 0.9296875, "learning_rate": 0.0004230955896857109, "loss": 0.7674, "step": 10132 }, { "epoch": 0.7049288670910292, "grad_norm": 0.89453125, "learning_rate": 0.0004229115497116788, "loss": 0.5979, "step": 10133 }, { "epoch": 0.7049984347281645, "grad_norm": 1.125, "learning_rate": 0.0004227275390392895, "loss": 0.9274, "step": 10134 }, { "epoch": 0.7050680023652997, "grad_norm": 1.0703125, "learning_rate": 0.00042254355767788564, "loss": 0.8571, "step": 10135 }, { "epoch": 0.7051375700024348, "grad_norm": 0.91796875, "learning_rate": 0.0004223596056368094, "loss": 0.9398, "step": 10136 }, { "epoch": 0.7052071376395701, "grad_norm": 1.359375, "learning_rate": 0.0004221756829254012, "loss": 0.9064, "step": 10137 }, { "epoch": 0.7052767052767053, "grad_norm": 0.94140625, "learning_rate": 0.00042199178955299946, "loss": 0.6491, "step": 10138 }, { "epoch": 0.7053462729138404, "grad_norm": 1.046875, "learning_rate": 0.00042180792552894077, "loss": 0.7337, "step": 10139 }, { "epoch": 0.7054158405509757, "grad_norm": 1.1953125, "learning_rate": 0.0004216240908625617, "loss": 0.7206, "step": 10140 }, { "epoch": 0.7054854081881109, "grad_norm": 0.99609375, "learning_rate": 0.0004214402855631958, "loss": 0.6149, "step": 10141 }, { "epoch": 0.7055549758252461, "grad_norm": 1.140625, "learning_rate": 0.00042125650964017593, "loss": 0.8573, "step": 10142 }, { "epoch": 0.7056245434623813, "grad_norm": 0.90625, "learning_rate": 0.00042107276310283384, "loss": 0.7419, "step": 10143 }, { "epoch": 0.7056941110995165, "grad_norm": 1.046875, "learning_rate": 0.00042088904596049884, "loss": 0.8879, "step": 10144 }, { "epoch": 0.7057636787366517, "grad_norm": 1.1171875, "learning_rate": 0.00042070535822249865, "loss": 0.8415, "step": 10145 }, { "epoch": 0.7058332463737869, "grad_norm": 1.046875, "learning_rate": 0.0004205216998981607, "loss": 0.7848, "step": 10146 }, { "epoch": 0.7059028140109221, "grad_norm": 1.09375, "learning_rate": 0.0004203380709968103, "loss": 0.7432, "step": 10147 }, { "epoch": 0.7059723816480573, "grad_norm": 1.28125, "learning_rate": 0.000420154471527771, "loss": 0.7737, "step": 10148 }, { "epoch": 0.7060419492851925, "grad_norm": 0.8046875, "learning_rate": 0.0004199709015003645, "loss": 0.7522, "step": 10149 }, { "epoch": 0.7061115169223278, "grad_norm": 0.89453125, "learning_rate": 0.00041978736092391226, "loss": 0.5649, "step": 10150 }, { "epoch": 0.7061810845594629, "grad_norm": 1.046875, "learning_rate": 0.00041960384980773357, "loss": 0.7011, "step": 10151 }, { "epoch": 0.7062506521965981, "grad_norm": 1.1484375, "learning_rate": 0.00041942036816114604, "loss": 0.6468, "step": 10152 }, { "epoch": 0.7063202198337334, "grad_norm": 1.21875, "learning_rate": 0.00041923691599346545, "loss": 0.7579, "step": 10153 }, { "epoch": 0.7063897874708686, "grad_norm": 1.1484375, "learning_rate": 0.00041905349331400744, "loss": 0.7975, "step": 10154 }, { "epoch": 0.7064593551080037, "grad_norm": 1.3828125, "learning_rate": 0.0004188701001320845, "loss": 1.0643, "step": 10155 }, { "epoch": 0.706528922745139, "grad_norm": 1.59375, "learning_rate": 0.0004186867364570087, "loss": 0.741, "step": 10156 }, { "epoch": 0.7065984903822742, "grad_norm": 1.5703125, "learning_rate": 0.0004185034022980907, "loss": 1.1413, "step": 10157 }, { "epoch": 0.7066680580194094, "grad_norm": 0.890625, "learning_rate": 0.0004183200976646391, "loss": 0.5771, "step": 10158 }, { "epoch": 0.7067376256565445, "grad_norm": 1.125, "learning_rate": 0.00041813682256596065, "loss": 0.9539, "step": 10159 }, { "epoch": 0.7068071932936798, "grad_norm": 1.0546875, "learning_rate": 0.0004179535770113615, "loss": 0.6908, "step": 10160 }, { "epoch": 0.706876760930815, "grad_norm": 0.94140625, "learning_rate": 0.0004177703610101463, "loss": 0.6224, "step": 10161 }, { "epoch": 0.7069463285679501, "grad_norm": 0.9921875, "learning_rate": 0.00041758717457161766, "loss": 0.8689, "step": 10162 }, { "epoch": 0.7070158962050854, "grad_norm": 1.0390625, "learning_rate": 0.0004174040177050762, "loss": 0.7937, "step": 10163 }, { "epoch": 0.7070854638422206, "grad_norm": 0.921875, "learning_rate": 0.00041722089041982234, "loss": 0.8885, "step": 10164 }, { "epoch": 0.7071550314793558, "grad_norm": 1.1953125, "learning_rate": 0.0004170377927251545, "loss": 0.9376, "step": 10165 }, { "epoch": 0.707224599116491, "grad_norm": 1.140625, "learning_rate": 0.00041685472463036936, "loss": 0.6855, "step": 10166 }, { "epoch": 0.7072941667536262, "grad_norm": 1.0625, "learning_rate": 0.0004166716861447615, "loss": 0.7672, "step": 10167 }, { "epoch": 0.7073637343907614, "grad_norm": 1.046875, "learning_rate": 0.00041648867727762565, "loss": 0.8199, "step": 10168 }, { "epoch": 0.7074333020278967, "grad_norm": 1.1796875, "learning_rate": 0.00041630569803825324, "loss": 0.8799, "step": 10169 }, { "epoch": 0.7075028696650318, "grad_norm": 1.2421875, "learning_rate": 0.00041612274843593557, "loss": 0.6829, "step": 10170 }, { "epoch": 0.707572437302167, "grad_norm": 0.99609375, "learning_rate": 0.00041593982847996203, "loss": 0.6841, "step": 10171 }, { "epoch": 0.7076420049393022, "grad_norm": 1.2109375, "learning_rate": 0.00041575693817962013, "loss": 0.8985, "step": 10172 }, { "epoch": 0.7077115725764375, "grad_norm": 1.0546875, "learning_rate": 0.0004155740775441957, "loss": 0.8377, "step": 10173 }, { "epoch": 0.7077811402135726, "grad_norm": 1.1875, "learning_rate": 0.000415391246582974, "loss": 0.8208, "step": 10174 }, { "epoch": 0.7078507078507078, "grad_norm": 1.1640625, "learning_rate": 0.0004152084453052385, "loss": 0.8361, "step": 10175 }, { "epoch": 0.7079202754878431, "grad_norm": 1.3671875, "learning_rate": 0.00041502567372027056, "loss": 1.1543, "step": 10176 }, { "epoch": 0.7079898431249783, "grad_norm": 1.40625, "learning_rate": 0.00041484293183735, "loss": 1.0145, "step": 10177 }, { "epoch": 0.7080594107621134, "grad_norm": 1.171875, "learning_rate": 0.0004146602196657561, "loss": 0.8072, "step": 10178 }, { "epoch": 0.7081289783992487, "grad_norm": 1.203125, "learning_rate": 0.0004144775372147661, "loss": 0.8695, "step": 10179 }, { "epoch": 0.7081985460363839, "grad_norm": 1.09375, "learning_rate": 0.0004142948844936556, "loss": 0.8563, "step": 10180 }, { "epoch": 0.708268113673519, "grad_norm": 1.2109375, "learning_rate": 0.0004141122615116982, "loss": 0.7231, "step": 10181 }, { "epoch": 0.7083376813106543, "grad_norm": 1.140625, "learning_rate": 0.00041392966827816723, "loss": 0.677, "step": 10182 }, { "epoch": 0.7084072489477895, "grad_norm": 1.203125, "learning_rate": 0.00041374710480233403, "loss": 1.0409, "step": 10183 }, { "epoch": 0.7084768165849247, "grad_norm": 1.0859375, "learning_rate": 0.0004135645710934679, "loss": 1.1684, "step": 10184 }, { "epoch": 0.7085463842220598, "grad_norm": 1.0390625, "learning_rate": 0.0004133820671608366, "loss": 0.6784, "step": 10185 }, { "epoch": 0.7086159518591951, "grad_norm": 1.0859375, "learning_rate": 0.0004131995930137076, "loss": 0.8895, "step": 10186 }, { "epoch": 0.7086855194963303, "grad_norm": 1.3671875, "learning_rate": 0.0004130171486613451, "loss": 1.0721, "step": 10187 }, { "epoch": 0.7087550871334655, "grad_norm": 0.90625, "learning_rate": 0.0004128347341130132, "loss": 0.7085, "step": 10188 }, { "epoch": 0.7088246547706007, "grad_norm": 1.140625, "learning_rate": 0.00041265234937797437, "loss": 0.6015, "step": 10189 }, { "epoch": 0.7088942224077359, "grad_norm": 1.0546875, "learning_rate": 0.00041246999446548885, "loss": 0.859, "step": 10190 }, { "epoch": 0.7089637900448711, "grad_norm": 0.94921875, "learning_rate": 0.0004122876693848151, "loss": 0.8156, "step": 10191 }, { "epoch": 0.7090333576820064, "grad_norm": 1.5078125, "learning_rate": 0.0004121053741452113, "loss": 0.5851, "step": 10192 }, { "epoch": 0.7091029253191415, "grad_norm": 0.9609375, "learning_rate": 0.00041192310875593386, "loss": 0.7188, "step": 10193 }, { "epoch": 0.7091724929562767, "grad_norm": 1.09375, "learning_rate": 0.00041174087322623667, "loss": 0.5221, "step": 10194 }, { "epoch": 0.709242060593412, "grad_norm": 1.234375, "learning_rate": 0.00041155866756537263, "loss": 0.8736, "step": 10195 }, { "epoch": 0.7093116282305472, "grad_norm": 1.0078125, "learning_rate": 0.0004113764917825935, "loss": 0.6578, "step": 10196 }, { "epoch": 0.7093811958676823, "grad_norm": 1.0703125, "learning_rate": 0.0004111943458871495, "loss": 0.7692, "step": 10197 }, { "epoch": 0.7094507635048175, "grad_norm": 1.21875, "learning_rate": 0.0004110122298882889, "loss": 1.0225, "step": 10198 }, { "epoch": 0.7095203311419528, "grad_norm": 1.21875, "learning_rate": 0.0004108301437952582, "loss": 0.9021, "step": 10199 }, { "epoch": 0.709589898779088, "grad_norm": 1.09375, "learning_rate": 0.00041064808761730344, "loss": 0.7685, "step": 10200 }, { "epoch": 0.7096594664162231, "grad_norm": 1.0078125, "learning_rate": 0.00041046606136366795, "loss": 0.843, "step": 10201 }, { "epoch": 0.7097290340533584, "grad_norm": 1.078125, "learning_rate": 0.0004102840650435943, "loss": 0.6716, "step": 10202 }, { "epoch": 0.7097986016904936, "grad_norm": 1.25, "learning_rate": 0.0004101020986663239, "loss": 0.9474, "step": 10203 }, { "epoch": 0.7098681693276288, "grad_norm": 1.2734375, "learning_rate": 0.00040992016224109554, "loss": 0.6837, "step": 10204 }, { "epoch": 0.709937736964764, "grad_norm": 0.85546875, "learning_rate": 0.00040973825577714674, "loss": 0.5875, "step": 10205 }, { "epoch": 0.7100073046018992, "grad_norm": 1.0703125, "learning_rate": 0.00040955637928371424, "loss": 0.7265, "step": 10206 }, { "epoch": 0.7100768722390344, "grad_norm": 1.2109375, "learning_rate": 0.0004093745327700331, "loss": 0.9111, "step": 10207 }, { "epoch": 0.7101464398761697, "grad_norm": 1.3359375, "learning_rate": 0.00040919271624533627, "loss": 0.8341, "step": 10208 }, { "epoch": 0.7102160075133048, "grad_norm": 1.0859375, "learning_rate": 0.00040901092971885503, "loss": 0.6712, "step": 10209 }, { "epoch": 0.71028557515044, "grad_norm": 1.0546875, "learning_rate": 0.0004088291731998198, "loss": 0.7024, "step": 10210 }, { "epoch": 0.7103551427875752, "grad_norm": 1.1953125, "learning_rate": 0.00040864744669746, "loss": 0.6244, "step": 10211 }, { "epoch": 0.7104247104247104, "grad_norm": 0.921875, "learning_rate": 0.0004084657502210022, "loss": 0.6959, "step": 10212 }, { "epoch": 0.7104942780618456, "grad_norm": 1.1875, "learning_rate": 0.00040828408377967165, "loss": 0.5662, "step": 10213 }, { "epoch": 0.7105638456989808, "grad_norm": 1.25, "learning_rate": 0.00040810244738269277, "loss": 0.7056, "step": 10214 }, { "epoch": 0.7106334133361161, "grad_norm": 1.3203125, "learning_rate": 0.0004079208410392887, "loss": 0.9715, "step": 10215 }, { "epoch": 0.7107029809732512, "grad_norm": 0.9921875, "learning_rate": 0.0004077392647586796, "loss": 0.7252, "step": 10216 }, { "epoch": 0.7107725486103864, "grad_norm": 1.1015625, "learning_rate": 0.0004075577185500858, "loss": 0.768, "step": 10217 }, { "epoch": 0.7108421162475217, "grad_norm": 0.98828125, "learning_rate": 0.000407376202422725, "loss": 0.789, "step": 10218 }, { "epoch": 0.7109116838846569, "grad_norm": 1.328125, "learning_rate": 0.0004071947163858131, "loss": 0.8223, "step": 10219 }, { "epoch": 0.710981251521792, "grad_norm": 0.88671875, "learning_rate": 0.00040701326044856556, "loss": 0.6058, "step": 10220 }, { "epoch": 0.7110508191589273, "grad_norm": 1.3671875, "learning_rate": 0.0004068318346201962, "loss": 0.8007, "step": 10221 }, { "epoch": 0.7111203867960625, "grad_norm": 1.0703125, "learning_rate": 0.0004066504389099165, "loss": 0.7994, "step": 10222 }, { "epoch": 0.7111899544331977, "grad_norm": 1.40625, "learning_rate": 0.0004064690733269365, "loss": 0.7742, "step": 10223 }, { "epoch": 0.7112595220703328, "grad_norm": 1.0390625, "learning_rate": 0.00040628773788046525, "loss": 0.7676, "step": 10224 }, { "epoch": 0.7113290897074681, "grad_norm": 1.3046875, "learning_rate": 0.0004061064325797105, "loss": 0.7144, "step": 10225 }, { "epoch": 0.7113986573446033, "grad_norm": 1.2578125, "learning_rate": 0.0004059251574338776, "loss": 0.8438, "step": 10226 }, { "epoch": 0.7114682249817385, "grad_norm": 1.515625, "learning_rate": 0.0004057439124521706, "loss": 1.0629, "step": 10227 }, { "epoch": 0.7115377926188737, "grad_norm": 1.25, "learning_rate": 0.0004055626976437924, "loss": 0.816, "step": 10228 }, { "epoch": 0.7116073602560089, "grad_norm": 1.03125, "learning_rate": 0.00040538151301794455, "loss": 0.7275, "step": 10229 }, { "epoch": 0.7116769278931441, "grad_norm": 1.21875, "learning_rate": 0.0004052003585838261, "loss": 0.7616, "step": 10230 }, { "epoch": 0.7117464955302794, "grad_norm": 1.1640625, "learning_rate": 0.0004050192343506358, "loss": 0.9566, "step": 10231 }, { "epoch": 0.7118160631674145, "grad_norm": 1.390625, "learning_rate": 0.0004048381403275697, "loss": 0.7487, "step": 10232 }, { "epoch": 0.7118856308045497, "grad_norm": 0.99609375, "learning_rate": 0.00040465707652382276, "loss": 0.855, "step": 10233 }, { "epoch": 0.711955198441685, "grad_norm": 0.7734375, "learning_rate": 0.00040447604294858877, "loss": 0.6897, "step": 10234 }, { "epoch": 0.7120247660788201, "grad_norm": 1.0859375, "learning_rate": 0.00040429503961106, "loss": 0.9849, "step": 10235 }, { "epoch": 0.7120943337159553, "grad_norm": 1.1484375, "learning_rate": 0.0004041140665204264, "loss": 0.6458, "step": 10236 }, { "epoch": 0.7121639013530905, "grad_norm": 0.9765625, "learning_rate": 0.00040393312368587674, "loss": 0.8279, "step": 10237 }, { "epoch": 0.7122334689902258, "grad_norm": 1.140625, "learning_rate": 0.00040375221111659866, "loss": 0.8865, "step": 10238 }, { "epoch": 0.7123030366273609, "grad_norm": 1.046875, "learning_rate": 0.0004035713288217784, "loss": 0.9888, "step": 10239 }, { "epoch": 0.7123726042644961, "grad_norm": 1.0859375, "learning_rate": 0.0004033904768105997, "loss": 0.8738, "step": 10240 }, { "epoch": 0.7124421719016314, "grad_norm": 0.91796875, "learning_rate": 0.0004032096550922453, "loss": 0.8892, "step": 10241 }, { "epoch": 0.7125117395387666, "grad_norm": 1.234375, "learning_rate": 0.0004030288636758964, "loss": 0.7223, "step": 10242 }, { "epoch": 0.7125813071759017, "grad_norm": 1.140625, "learning_rate": 0.00040284810257073324, "loss": 0.6951, "step": 10243 }, { "epoch": 0.712650874813037, "grad_norm": 1.015625, "learning_rate": 0.00040266737178593326, "loss": 0.7866, "step": 10244 }, { "epoch": 0.7127204424501722, "grad_norm": 0.90234375, "learning_rate": 0.00040248667133067364, "loss": 0.693, "step": 10245 }, { "epoch": 0.7127900100873074, "grad_norm": 1.171875, "learning_rate": 0.00040230600121412885, "loss": 0.9007, "step": 10246 }, { "epoch": 0.7128595777244426, "grad_norm": 1.046875, "learning_rate": 0.0004021253614454731, "loss": 0.6981, "step": 10247 }, { "epoch": 0.7129291453615778, "grad_norm": 1.1015625, "learning_rate": 0.0004019447520338776, "loss": 0.7544, "step": 10248 }, { "epoch": 0.712998712998713, "grad_norm": 1.1796875, "learning_rate": 0.0004017641729885134, "loss": 0.8259, "step": 10249 }, { "epoch": 0.7130682806358482, "grad_norm": 1.140625, "learning_rate": 0.00040158362431854934, "loss": 0.9236, "step": 10250 }, { "epoch": 0.7131378482729834, "grad_norm": 1.140625, "learning_rate": 0.0004014031060331522, "loss": 0.7259, "step": 10251 }, { "epoch": 0.7132074159101186, "grad_norm": 1.328125, "learning_rate": 0.0004012226181414882, "loss": 0.8314, "step": 10252 }, { "epoch": 0.7132769835472538, "grad_norm": 1.1796875, "learning_rate": 0.0004010421606527218, "loss": 0.7988, "step": 10253 }, { "epoch": 0.713346551184389, "grad_norm": 1.3203125, "learning_rate": 0.00040086173357601566, "loss": 0.8601, "step": 10254 }, { "epoch": 0.7134161188215242, "grad_norm": 1.046875, "learning_rate": 0.00040068133692053044, "loss": 0.9581, "step": 10255 }, { "epoch": 0.7134856864586594, "grad_norm": 1.3046875, "learning_rate": 0.00040050097069542614, "loss": 0.9045, "step": 10256 }, { "epoch": 0.7135552540957947, "grad_norm": 1.2109375, "learning_rate": 0.00040032063490986114, "loss": 0.9228, "step": 10257 }, { "epoch": 0.7136248217329298, "grad_norm": 0.96484375, "learning_rate": 0.0004001403295729914, "loss": 0.7246, "step": 10258 }, { "epoch": 0.713694389370065, "grad_norm": 0.94140625, "learning_rate": 0.0003999600546939726, "loss": 0.9472, "step": 10259 }, { "epoch": 0.7137639570072003, "grad_norm": 1.5625, "learning_rate": 0.0003997798102819573, "loss": 0.6811, "step": 10260 }, { "epoch": 0.7138335246443355, "grad_norm": 1.3125, "learning_rate": 0.0003995995963460983, "loss": 0.8605, "step": 10261 }, { "epoch": 0.7139030922814706, "grad_norm": 0.91015625, "learning_rate": 0.00039941941289554526, "loss": 0.5264, "step": 10262 }, { "epoch": 0.7139726599186058, "grad_norm": 1.203125, "learning_rate": 0.00039923925993944764, "loss": 0.945, "step": 10263 }, { "epoch": 0.7140422275557411, "grad_norm": 1.109375, "learning_rate": 0.0003990591374869523, "loss": 0.958, "step": 10264 }, { "epoch": 0.7141117951928763, "grad_norm": 1.125, "learning_rate": 0.00039887904554720467, "loss": 1.0233, "step": 10265 }, { "epoch": 0.7141813628300114, "grad_norm": 1.390625, "learning_rate": 0.00039869898412934926, "loss": 0.8459, "step": 10266 }, { "epoch": 0.7142509304671467, "grad_norm": 1.1484375, "learning_rate": 0.00039851895324252896, "loss": 0.757, "step": 10267 }, { "epoch": 0.7143204981042819, "grad_norm": 1.2734375, "learning_rate": 0.0003983389528958845, "loss": 0.9271, "step": 10268 }, { "epoch": 0.7143900657414171, "grad_norm": 1.5078125, "learning_rate": 0.000398158983098555, "loss": 0.8022, "step": 10269 }, { "epoch": 0.7144596333785523, "grad_norm": 1.125, "learning_rate": 0.000397979043859679, "loss": 0.7929, "step": 10270 }, { "epoch": 0.7145292010156875, "grad_norm": 1.015625, "learning_rate": 0.00039779913518839304, "loss": 0.6102, "step": 10271 }, { "epoch": 0.7145987686528227, "grad_norm": 0.984375, "learning_rate": 0.0003976192570938316, "loss": 0.7635, "step": 10272 }, { "epoch": 0.714668336289958, "grad_norm": 1.0390625, "learning_rate": 0.00039743940958512783, "loss": 0.8293, "step": 10273 }, { "epoch": 0.7147379039270931, "grad_norm": 0.90625, "learning_rate": 0.00039725959267141364, "loss": 0.8435, "step": 10274 }, { "epoch": 0.7148074715642283, "grad_norm": 1.3671875, "learning_rate": 0.0003970798063618196, "loss": 0.832, "step": 10275 }, { "epoch": 0.7148770392013635, "grad_norm": 1.4140625, "learning_rate": 0.00039690005066547377, "loss": 0.8953, "step": 10276 }, { "epoch": 0.7149466068384988, "grad_norm": 1.3046875, "learning_rate": 0.00039672032559150383, "loss": 0.8819, "step": 10277 }, { "epoch": 0.7150161744756339, "grad_norm": 1.0859375, "learning_rate": 0.0003965406311490347, "loss": 0.7694, "step": 10278 }, { "epoch": 0.7150857421127691, "grad_norm": 1.1328125, "learning_rate": 0.00039636096734719096, "loss": 0.7337, "step": 10279 }, { "epoch": 0.7151553097499044, "grad_norm": 1.328125, "learning_rate": 0.0003961813341950945, "loss": 0.9547, "step": 10280 }, { "epoch": 0.7152248773870395, "grad_norm": 1.1875, "learning_rate": 0.0003960017317018666, "loss": 0.815, "step": 10281 }, { "epoch": 0.7152944450241747, "grad_norm": 1.3984375, "learning_rate": 0.0003958221598766265, "loss": 0.9948, "step": 10282 }, { "epoch": 0.71536401266131, "grad_norm": 0.99609375, "learning_rate": 0.0003956426187284915, "loss": 0.6415, "step": 10283 }, { "epoch": 0.7154335802984452, "grad_norm": 1.4765625, "learning_rate": 0.000395463108266578, "loss": 0.8901, "step": 10284 }, { "epoch": 0.7155031479355803, "grad_norm": 1.2578125, "learning_rate": 0.0003952836285000012, "loss": 0.8202, "step": 10285 }, { "epoch": 0.7155727155727156, "grad_norm": 1.3125, "learning_rate": 0.0003951041794378738, "loss": 0.6531, "step": 10286 }, { "epoch": 0.7156422832098508, "grad_norm": 1.0625, "learning_rate": 0.00039492476108930687, "loss": 0.8524, "step": 10287 }, { "epoch": 0.715711850846986, "grad_norm": 1.125, "learning_rate": 0.00039474537346341075, "loss": 0.7424, "step": 10288 }, { "epoch": 0.7157814184841211, "grad_norm": 1.078125, "learning_rate": 0.0003945660165692942, "loss": 0.8893, "step": 10289 }, { "epoch": 0.7158509861212564, "grad_norm": 1.203125, "learning_rate": 0.00039438669041606345, "loss": 0.738, "step": 10290 }, { "epoch": 0.7159205537583916, "grad_norm": 1.0234375, "learning_rate": 0.0003942073950128243, "loss": 0.7486, "step": 10291 }, { "epoch": 0.7159901213955268, "grad_norm": 1.2890625, "learning_rate": 0.0003940281303686799, "loss": 0.8488, "step": 10292 }, { "epoch": 0.716059689032662, "grad_norm": 1.0, "learning_rate": 0.00039384889649273305, "loss": 0.9268, "step": 10293 }, { "epoch": 0.7161292566697972, "grad_norm": 1.0234375, "learning_rate": 0.00039366969339408366, "loss": 0.6976, "step": 10294 }, { "epoch": 0.7161988243069324, "grad_norm": 0.98046875, "learning_rate": 0.00039349052108183147, "loss": 0.8058, "step": 10295 }, { "epoch": 0.7162683919440677, "grad_norm": 1.0078125, "learning_rate": 0.0003933113795650737, "loss": 0.7992, "step": 10296 }, { "epoch": 0.7163379595812028, "grad_norm": 1.1484375, "learning_rate": 0.0003931322688529052, "loss": 0.7471, "step": 10297 }, { "epoch": 0.716407527218338, "grad_norm": 1.2421875, "learning_rate": 0.0003929531889544221, "loss": 0.8424, "step": 10298 }, { "epoch": 0.7164770948554733, "grad_norm": 1.03125, "learning_rate": 0.00039277413987871633, "loss": 0.805, "step": 10299 }, { "epoch": 0.7165466624926085, "grad_norm": 1.0859375, "learning_rate": 0.00039259512163487896, "loss": 0.6088, "step": 10300 }, { "epoch": 0.7166162301297436, "grad_norm": 1.15625, "learning_rate": 0.0003924161342319996, "loss": 0.8697, "step": 10301 }, { "epoch": 0.7166857977668788, "grad_norm": 1.5390625, "learning_rate": 0.00039223717767916633, "loss": 0.8839, "step": 10302 }, { "epoch": 0.7167553654040141, "grad_norm": 1.3359375, "learning_rate": 0.00039205825198546627, "loss": 0.8275, "step": 10303 }, { "epoch": 0.7168249330411492, "grad_norm": 0.9140625, "learning_rate": 0.0003918793571599836, "loss": 0.5849, "step": 10304 }, { "epoch": 0.7168945006782844, "grad_norm": 1.2109375, "learning_rate": 0.0003917004932118023, "loss": 1.1259, "step": 10305 }, { "epoch": 0.7169640683154197, "grad_norm": 1.0, "learning_rate": 0.00039152166015000354, "loss": 0.7061, "step": 10306 }, { "epoch": 0.7170336359525549, "grad_norm": 1.3984375, "learning_rate": 0.0003913428579836683, "loss": 0.9957, "step": 10307 }, { "epoch": 0.71710320358969, "grad_norm": 1.09375, "learning_rate": 0.0003911640867218745, "loss": 0.837, "step": 10308 }, { "epoch": 0.7171727712268253, "grad_norm": 1.2890625, "learning_rate": 0.00039098534637369996, "loss": 0.8902, "step": 10309 }, { "epoch": 0.7172423388639605, "grad_norm": 1.0546875, "learning_rate": 0.0003908066369482196, "loss": 0.7183, "step": 10310 }, { "epoch": 0.7173119065010957, "grad_norm": 1.0234375, "learning_rate": 0.0003906279584545076, "loss": 0.6909, "step": 10311 }, { "epoch": 0.717381474138231, "grad_norm": 0.90625, "learning_rate": 0.0003904493109016367, "loss": 0.6833, "step": 10312 }, { "epoch": 0.7174510417753661, "grad_norm": 1.15625, "learning_rate": 0.00039027069429867754, "loss": 0.8421, "step": 10313 }, { "epoch": 0.7175206094125013, "grad_norm": 1.171875, "learning_rate": 0.00039009210865469926, "loss": 0.759, "step": 10314 }, { "epoch": 0.7175901770496365, "grad_norm": 1.09375, "learning_rate": 0.00038991355397876903, "loss": 0.9539, "step": 10315 }, { "epoch": 0.7176597446867717, "grad_norm": 1.015625, "learning_rate": 0.0003897350302799536, "loss": 0.8893, "step": 10316 }, { "epoch": 0.7177293123239069, "grad_norm": 1.1953125, "learning_rate": 0.0003895565375673177, "loss": 1.0996, "step": 10317 }, { "epoch": 0.7177988799610421, "grad_norm": 1.1953125, "learning_rate": 0.0003893780758499236, "loss": 0.701, "step": 10318 }, { "epoch": 0.7178684475981774, "grad_norm": 0.9921875, "learning_rate": 0.00038919964513683334, "loss": 0.6515, "step": 10319 }, { "epoch": 0.7179380152353125, "grad_norm": 1.015625, "learning_rate": 0.00038902124543710616, "loss": 0.7534, "step": 10320 }, { "epoch": 0.7180075828724477, "grad_norm": 1.2109375, "learning_rate": 0.0003888428767598009, "loss": 0.8148, "step": 10321 }, { "epoch": 0.718077150509583, "grad_norm": 1.1640625, "learning_rate": 0.0003886645391139736, "loss": 0.938, "step": 10322 }, { "epoch": 0.7181467181467182, "grad_norm": 1.265625, "learning_rate": 0.00038848623250867985, "loss": 1.1074, "step": 10323 }, { "epoch": 0.7182162857838533, "grad_norm": 1.1875, "learning_rate": 0.0003883079569529727, "loss": 0.8166, "step": 10324 }, { "epoch": 0.7182858534209886, "grad_norm": 1.046875, "learning_rate": 0.00038812971245590446, "loss": 0.7357, "step": 10325 }, { "epoch": 0.7183554210581238, "grad_norm": 1.109375, "learning_rate": 0.0003879514990265255, "loss": 0.8994, "step": 10326 }, { "epoch": 0.718424988695259, "grad_norm": 0.95703125, "learning_rate": 0.0003877733166738846, "loss": 0.8689, "step": 10327 }, { "epoch": 0.7184945563323941, "grad_norm": 1.1875, "learning_rate": 0.00038759516540702875, "loss": 0.8483, "step": 10328 }, { "epoch": 0.7185641239695294, "grad_norm": 1.203125, "learning_rate": 0.0003874170452350031, "loss": 0.8876, "step": 10329 }, { "epoch": 0.7186336916066646, "grad_norm": 1.3203125, "learning_rate": 0.00038723895616685276, "loss": 0.7773, "step": 10330 }, { "epoch": 0.7187032592437997, "grad_norm": 1.328125, "learning_rate": 0.0003870608982116198, "loss": 0.7464, "step": 10331 }, { "epoch": 0.718772826880935, "grad_norm": 1.3125, "learning_rate": 0.00038688287137834455, "loss": 0.8565, "step": 10332 }, { "epoch": 0.7188423945180702, "grad_norm": 1.2109375, "learning_rate": 0.00038670487567606717, "loss": 0.7739, "step": 10333 }, { "epoch": 0.7189119621552054, "grad_norm": 1.4140625, "learning_rate": 0.0003865269111138247, "loss": 1.0526, "step": 10334 }, { "epoch": 0.7189815297923406, "grad_norm": 1.0859375, "learning_rate": 0.0003863489777006537, "loss": 0.8619, "step": 10335 }, { "epoch": 0.7190510974294758, "grad_norm": 1.1171875, "learning_rate": 0.0003861710754455884, "loss": 0.6509, "step": 10336 }, { "epoch": 0.719120665066611, "grad_norm": 1.21875, "learning_rate": 0.00038599320435766214, "loss": 0.7926, "step": 10337 }, { "epoch": 0.7191902327037463, "grad_norm": 1.25, "learning_rate": 0.0003858153644459059, "loss": 0.804, "step": 10338 }, { "epoch": 0.7192598003408814, "grad_norm": 0.9296875, "learning_rate": 0.00038563755571934975, "loss": 0.8925, "step": 10339 }, { "epoch": 0.7193293679780166, "grad_norm": 1.09375, "learning_rate": 0.00038545977818702225, "loss": 0.5914, "step": 10340 }, { "epoch": 0.7193989356151518, "grad_norm": 1.09375, "learning_rate": 0.00038528203185794963, "loss": 0.8468, "step": 10341 }, { "epoch": 0.7194685032522871, "grad_norm": 1.0078125, "learning_rate": 0.0003851043167411571, "loss": 0.8216, "step": 10342 }, { "epoch": 0.7195380708894222, "grad_norm": 1.078125, "learning_rate": 0.0003849266328456673, "loss": 0.8859, "step": 10343 }, { "epoch": 0.7196076385265574, "grad_norm": 0.921875, "learning_rate": 0.0003847489801805036, "loss": 0.7897, "step": 10344 }, { "epoch": 0.7196772061636927, "grad_norm": 1.203125, "learning_rate": 0.00038457135875468574, "loss": 0.7999, "step": 10345 }, { "epoch": 0.7197467738008279, "grad_norm": 0.8828125, "learning_rate": 0.0003843937685772321, "loss": 0.8396, "step": 10346 }, { "epoch": 0.719816341437963, "grad_norm": 1.03125, "learning_rate": 0.00038421620965715974, "loss": 0.6124, "step": 10347 }, { "epoch": 0.7198859090750983, "grad_norm": 0.8515625, "learning_rate": 0.00038403868200348446, "loss": 0.6664, "step": 10348 }, { "epoch": 0.7199554767122335, "grad_norm": 1.2265625, "learning_rate": 0.00038386118562522053, "loss": 0.9508, "step": 10349 }, { "epoch": 0.7200250443493686, "grad_norm": 1.3515625, "learning_rate": 0.0003836837205313798, "loss": 0.9475, "step": 10350 }, { "epoch": 0.7200946119865039, "grad_norm": 1.03125, "learning_rate": 0.00038350628673097353, "loss": 0.6598, "step": 10351 }, { "epoch": 0.7201641796236391, "grad_norm": 0.81640625, "learning_rate": 0.00038332888423301027, "loss": 0.7691, "step": 10352 }, { "epoch": 0.7202337472607743, "grad_norm": 1.0078125, "learning_rate": 0.00038315151304649844, "loss": 0.6976, "step": 10353 }, { "epoch": 0.7203033148979094, "grad_norm": 1.0, "learning_rate": 0.00038297417318044325, "loss": 0.8059, "step": 10354 }, { "epoch": 0.7203728825350447, "grad_norm": 1.0625, "learning_rate": 0.00038279686464384987, "loss": 0.9312, "step": 10355 }, { "epoch": 0.7204424501721799, "grad_norm": 1.09375, "learning_rate": 0.00038261958744572044, "loss": 0.7603, "step": 10356 }, { "epoch": 0.7205120178093151, "grad_norm": 0.9921875, "learning_rate": 0.00038244234159505653, "loss": 0.803, "step": 10357 }, { "epoch": 0.7205815854464503, "grad_norm": 1.0234375, "learning_rate": 0.00038226512710085817, "loss": 0.745, "step": 10358 }, { "epoch": 0.7206511530835855, "grad_norm": 1.0234375, "learning_rate": 0.000382087943972123, "loss": 0.7745, "step": 10359 }, { "epoch": 0.7207207207207207, "grad_norm": 0.9765625, "learning_rate": 0.00038191079221784754, "loss": 0.8289, "step": 10360 }, { "epoch": 0.720790288357856, "grad_norm": 1.1796875, "learning_rate": 0.0003817336718470259, "loss": 1.0168, "step": 10361 }, { "epoch": 0.7208598559949911, "grad_norm": 1.046875, "learning_rate": 0.0003815565828686528, "loss": 0.7349, "step": 10362 }, { "epoch": 0.7209294236321263, "grad_norm": 1.1796875, "learning_rate": 0.00038137952529171924, "loss": 0.8569, "step": 10363 }, { "epoch": 0.7209989912692616, "grad_norm": 1.28125, "learning_rate": 0.0003812024991252151, "loss": 0.9164, "step": 10364 }, { "epoch": 0.7210685589063968, "grad_norm": 1.2421875, "learning_rate": 0.00038102550437812933, "loss": 0.8319, "step": 10365 }, { "epoch": 0.7211381265435319, "grad_norm": 1.171875, "learning_rate": 0.0003808485410594482, "loss": 0.676, "step": 10366 }, { "epoch": 0.7212076941806671, "grad_norm": 0.91796875, "learning_rate": 0.0003806716091781578, "loss": 0.7555, "step": 10367 }, { "epoch": 0.7212772618178024, "grad_norm": 1.0078125, "learning_rate": 0.0003804947087432411, "loss": 0.5666, "step": 10368 }, { "epoch": 0.7213468294549376, "grad_norm": 1.1953125, "learning_rate": 0.0003803178397636808, "loss": 0.9253, "step": 10369 }, { "epoch": 0.7214163970920727, "grad_norm": 1.3515625, "learning_rate": 0.0003801410022484569, "loss": 0.8512, "step": 10370 }, { "epoch": 0.721485964729208, "grad_norm": 1.15625, "learning_rate": 0.00037996419620654867, "loss": 0.7916, "step": 10371 }, { "epoch": 0.7215555323663432, "grad_norm": 1.0703125, "learning_rate": 0.0003797874216469336, "loss": 0.8352, "step": 10372 }, { "epoch": 0.7216251000034783, "grad_norm": 1.1796875, "learning_rate": 0.0003796106785785871, "loss": 0.7665, "step": 10373 }, { "epoch": 0.7216946676406136, "grad_norm": 1.53125, "learning_rate": 0.0003794339670104835, "loss": 0.8361, "step": 10374 }, { "epoch": 0.7217642352777488, "grad_norm": 0.859375, "learning_rate": 0.00037925728695159435, "loss": 0.6771, "step": 10375 }, { "epoch": 0.721833802914884, "grad_norm": 0.9609375, "learning_rate": 0.00037908063841089214, "loss": 0.6722, "step": 10376 }, { "epoch": 0.7219033705520193, "grad_norm": 1.4453125, "learning_rate": 0.0003789040213973454, "loss": 0.87, "step": 10377 }, { "epoch": 0.7219729381891544, "grad_norm": 1.1328125, "learning_rate": 0.00037872743591992156, "loss": 0.804, "step": 10378 }, { "epoch": 0.7220425058262896, "grad_norm": 1.09375, "learning_rate": 0.00037855088198758747, "loss": 1.0457, "step": 10379 }, { "epoch": 0.7221120734634248, "grad_norm": 1.203125, "learning_rate": 0.00037837435960930686, "loss": 0.8591, "step": 10380 }, { "epoch": 0.72218164110056, "grad_norm": 1.046875, "learning_rate": 0.00037819786879404336, "loss": 0.8595, "step": 10381 }, { "epoch": 0.7222512087376952, "grad_norm": 1.0234375, "learning_rate": 0.0003780214095507577, "loss": 0.8191, "step": 10382 }, { "epoch": 0.7223207763748304, "grad_norm": 1.171875, "learning_rate": 0.0003778449818884102, "loss": 0.8937, "step": 10383 }, { "epoch": 0.7223903440119657, "grad_norm": 1.4765625, "learning_rate": 0.0003776685858159583, "loss": 0.6803, "step": 10384 }, { "epoch": 0.7224599116491008, "grad_norm": 1.0390625, "learning_rate": 0.0003774922213423588, "loss": 0.9583, "step": 10385 }, { "epoch": 0.722529479286236, "grad_norm": 1.1953125, "learning_rate": 0.0003773158884765669, "loss": 0.9292, "step": 10386 }, { "epoch": 0.7225990469233713, "grad_norm": 1.046875, "learning_rate": 0.0003771395872275357, "loss": 0.5646, "step": 10387 }, { "epoch": 0.7226686145605065, "grad_norm": 1.1171875, "learning_rate": 0.00037696331760421654, "loss": 0.8099, "step": 10388 }, { "epoch": 0.7227381821976416, "grad_norm": 1.0859375, "learning_rate": 0.0003767870796155597, "loss": 0.7322, "step": 10389 }, { "epoch": 0.7228077498347769, "grad_norm": 1.3515625, "learning_rate": 0.000376610873270514, "loss": 0.8178, "step": 10390 }, { "epoch": 0.7228773174719121, "grad_norm": 0.94921875, "learning_rate": 0.00037643469857802614, "loss": 0.7163, "step": 10391 }, { "epoch": 0.7229468851090473, "grad_norm": 0.99609375, "learning_rate": 0.0003762585555470409, "loss": 0.7489, "step": 10392 }, { "epoch": 0.7230164527461824, "grad_norm": 1.0234375, "learning_rate": 0.0003760824441865026, "loss": 0.4969, "step": 10393 }, { "epoch": 0.7230860203833177, "grad_norm": 1.0625, "learning_rate": 0.0003759063645053528, "loss": 0.983, "step": 10394 }, { "epoch": 0.7231555880204529, "grad_norm": 1.0859375, "learning_rate": 0.00037573031651253245, "loss": 0.98, "step": 10395 }, { "epoch": 0.723225155657588, "grad_norm": 1.53125, "learning_rate": 0.0003755543002169797, "loss": 1.1609, "step": 10396 }, { "epoch": 0.7232947232947233, "grad_norm": 1.1328125, "learning_rate": 0.0003753783156276325, "loss": 0.8617, "step": 10397 }, { "epoch": 0.7233642909318585, "grad_norm": 1.046875, "learning_rate": 0.00037520236275342565, "loss": 0.8373, "step": 10398 }, { "epoch": 0.7234338585689937, "grad_norm": 0.953125, "learning_rate": 0.0003750264416032938, "loss": 0.8973, "step": 10399 }, { "epoch": 0.723503426206129, "grad_norm": 1.234375, "learning_rate": 0.0003748505521861694, "loss": 0.6331, "step": 10400 }, { "epoch": 0.7235729938432641, "grad_norm": 0.98828125, "learning_rate": 0.00037467469451098293, "loss": 0.8331, "step": 10401 }, { "epoch": 0.7236425614803993, "grad_norm": 1.4453125, "learning_rate": 0.0003744988685866633, "loss": 0.9041, "step": 10402 }, { "epoch": 0.7237121291175346, "grad_norm": 1.265625, "learning_rate": 0.0003743230744221383, "loss": 1.0202, "step": 10403 }, { "epoch": 0.7237816967546697, "grad_norm": 1.09375, "learning_rate": 0.0003741473120263345, "loss": 0.8418, "step": 10404 }, { "epoch": 0.7238512643918049, "grad_norm": 1.0546875, "learning_rate": 0.0003739715814081754, "loss": 0.7421, "step": 10405 }, { "epoch": 0.7239208320289401, "grad_norm": 1.2578125, "learning_rate": 0.0003737958825765837, "loss": 0.9736, "step": 10406 }, { "epoch": 0.7239903996660754, "grad_norm": 1.0078125, "learning_rate": 0.0003736202155404809, "loss": 0.6861, "step": 10407 }, { "epoch": 0.7240599673032105, "grad_norm": 1.0859375, "learning_rate": 0.0003734445803087866, "loss": 1.0322, "step": 10408 }, { "epoch": 0.7241295349403457, "grad_norm": 1.0625, "learning_rate": 0.0003732689768904185, "loss": 0.7579, "step": 10409 }, { "epoch": 0.724199102577481, "grad_norm": 1.1796875, "learning_rate": 0.0003730934052942924, "loss": 0.7262, "step": 10410 }, { "epoch": 0.7242686702146162, "grad_norm": 1.015625, "learning_rate": 0.00037291786552932373, "loss": 0.6312, "step": 10411 }, { "epoch": 0.7243382378517513, "grad_norm": 0.96484375, "learning_rate": 0.00037274235760442466, "loss": 0.7394, "step": 10412 }, { "epoch": 0.7244078054888866, "grad_norm": 1.1171875, "learning_rate": 0.00037256688152850716, "loss": 1.1719, "step": 10413 }, { "epoch": 0.7244773731260218, "grad_norm": 0.8671875, "learning_rate": 0.0003723914373104813, "loss": 0.5643, "step": 10414 }, { "epoch": 0.724546940763157, "grad_norm": 0.9453125, "learning_rate": 0.0003722160249592548, "loss": 0.6835, "step": 10415 }, { "epoch": 0.7246165084002922, "grad_norm": 1.0078125, "learning_rate": 0.000372040644483734, "loss": 0.7814, "step": 10416 }, { "epoch": 0.7246860760374274, "grad_norm": 1.171875, "learning_rate": 0.00037186529589282405, "loss": 0.7209, "step": 10417 }, { "epoch": 0.7247556436745626, "grad_norm": 1.1171875, "learning_rate": 0.0003716899791954287, "loss": 0.8136, "step": 10418 }, { "epoch": 0.7248252113116977, "grad_norm": 1.125, "learning_rate": 0.0003715146944004494, "loss": 0.941, "step": 10419 }, { "epoch": 0.724894778948833, "grad_norm": 0.9921875, "learning_rate": 0.0003713394415167856, "loss": 0.6571, "step": 10420 }, { "epoch": 0.7249643465859682, "grad_norm": 1.3515625, "learning_rate": 0.00037116422055333634, "loss": 1.0111, "step": 10421 }, { "epoch": 0.7250339142231034, "grad_norm": 1.1015625, "learning_rate": 0.0003709890315189988, "loss": 0.8752, "step": 10422 }, { "epoch": 0.7251034818602387, "grad_norm": 1.4453125, "learning_rate": 0.0003708138744226678, "loss": 1.1282, "step": 10423 }, { "epoch": 0.7251730494973738, "grad_norm": 1.265625, "learning_rate": 0.0003706387492732365, "loss": 0.925, "step": 10424 }, { "epoch": 0.725242617134509, "grad_norm": 1.0703125, "learning_rate": 0.0003704636560795976, "loss": 0.9034, "step": 10425 }, { "epoch": 0.7253121847716443, "grad_norm": 1.0703125, "learning_rate": 0.00037028859485064094, "loss": 0.7064, "step": 10426 }, { "epoch": 0.7253817524087794, "grad_norm": 1.03125, "learning_rate": 0.0003701135655952557, "loss": 0.7784, "step": 10427 }, { "epoch": 0.7254513200459146, "grad_norm": 1.4375, "learning_rate": 0.00036993856832232843, "loss": 0.9509, "step": 10428 }, { "epoch": 0.7255208876830499, "grad_norm": 0.95703125, "learning_rate": 0.00036976360304074516, "loss": 0.8873, "step": 10429 }, { "epoch": 0.7255904553201851, "grad_norm": 1.484375, "learning_rate": 0.0003695886697593893, "loss": 0.8801, "step": 10430 }, { "epoch": 0.7256600229573202, "grad_norm": 0.85546875, "learning_rate": 0.00036941376848714325, "loss": 0.5566, "step": 10431 }, { "epoch": 0.7257295905944554, "grad_norm": 1.21875, "learning_rate": 0.0003692388992328879, "loss": 0.6438, "step": 10432 }, { "epoch": 0.7257991582315907, "grad_norm": 0.99609375, "learning_rate": 0.00036906406200550213, "loss": 0.9409, "step": 10433 }, { "epoch": 0.7258687258687259, "grad_norm": 1.4453125, "learning_rate": 0.00036888925681386267, "loss": 0.8923, "step": 10434 }, { "epoch": 0.725938293505861, "grad_norm": 1.0546875, "learning_rate": 0.0003687144836668457, "loss": 0.9672, "step": 10435 }, { "epoch": 0.7260078611429963, "grad_norm": 1.21875, "learning_rate": 0.0003685397425733258, "loss": 0.9184, "step": 10436 }, { "epoch": 0.7260774287801315, "grad_norm": 1.21875, "learning_rate": 0.0003683650335421749, "loss": 0.8752, "step": 10437 }, { "epoch": 0.7261469964172667, "grad_norm": 1.1953125, "learning_rate": 0.0003681903565822635, "loss": 0.9065, "step": 10438 }, { "epoch": 0.7262165640544019, "grad_norm": 1.0, "learning_rate": 0.0003680157117024614, "loss": 0.8314, "step": 10439 }, { "epoch": 0.7262861316915371, "grad_norm": 1.0390625, "learning_rate": 0.0003678410989116362, "loss": 0.7061, "step": 10440 }, { "epoch": 0.7263556993286723, "grad_norm": 0.95703125, "learning_rate": 0.0003676665182186538, "loss": 0.702, "step": 10441 }, { "epoch": 0.7264252669658076, "grad_norm": 0.8359375, "learning_rate": 0.0003674919696323781, "loss": 0.7032, "step": 10442 }, { "epoch": 0.7264948346029427, "grad_norm": 1.3359375, "learning_rate": 0.0003673174531616723, "loss": 1.001, "step": 10443 }, { "epoch": 0.7265644022400779, "grad_norm": 1.21875, "learning_rate": 0.000367142968815397, "loss": 0.6741, "step": 10444 }, { "epoch": 0.7266339698772131, "grad_norm": 1.28125, "learning_rate": 0.0003669685166024119, "loss": 0.7309, "step": 10445 }, { "epoch": 0.7267035375143484, "grad_norm": 1.359375, "learning_rate": 0.00036679409653157525, "loss": 0.7796, "step": 10446 }, { "epoch": 0.7267731051514835, "grad_norm": 1.0390625, "learning_rate": 0.00036661970861174263, "loss": 0.832, "step": 10447 }, { "epoch": 0.7268426727886187, "grad_norm": 1.328125, "learning_rate": 0.0003664453528517685, "loss": 0.9177, "step": 10448 }, { "epoch": 0.726912240425754, "grad_norm": 1.2734375, "learning_rate": 0.00036627102926050596, "loss": 1.0279, "step": 10449 }, { "epoch": 0.7269818080628891, "grad_norm": 1.1640625, "learning_rate": 0.00036609673784680666, "loss": 0.7577, "step": 10450 }, { "epoch": 0.7270513757000243, "grad_norm": 1.015625, "learning_rate": 0.0003659224786195199, "loss": 0.6044, "step": 10451 }, { "epoch": 0.7271209433371596, "grad_norm": 0.9453125, "learning_rate": 0.00036574825158749335, "loss": 0.5833, "step": 10452 }, { "epoch": 0.7271905109742948, "grad_norm": 1.0546875, "learning_rate": 0.0003655740567595738, "loss": 0.8016, "step": 10453 }, { "epoch": 0.7272600786114299, "grad_norm": 1.109375, "learning_rate": 0.00036539989414460615, "loss": 0.7016, "step": 10454 }, { "epoch": 0.7273296462485652, "grad_norm": 1.0234375, "learning_rate": 0.00036522576375143325, "loss": 0.7156, "step": 10455 }, { "epoch": 0.7273992138857004, "grad_norm": 1.2890625, "learning_rate": 0.00036505166558889625, "loss": 0.8444, "step": 10456 }, { "epoch": 0.7274687815228356, "grad_norm": 1.1796875, "learning_rate": 0.00036487759966583565, "loss": 0.9313, "step": 10457 }, { "epoch": 0.7275383491599707, "grad_norm": 1.0859375, "learning_rate": 0.00036470356599108887, "loss": 0.8356, "step": 10458 }, { "epoch": 0.727607916797106, "grad_norm": 1.71875, "learning_rate": 0.0003645295645734931, "loss": 1.0616, "step": 10459 }, { "epoch": 0.7276774844342412, "grad_norm": 1.2109375, "learning_rate": 0.00036435559542188315, "loss": 0.7141, "step": 10460 }, { "epoch": 0.7277470520713764, "grad_norm": 1.0234375, "learning_rate": 0.0003641816585450922, "loss": 0.7129, "step": 10461 }, { "epoch": 0.7278166197085116, "grad_norm": 1.125, "learning_rate": 0.0003640077539519516, "loss": 0.9076, "step": 10462 }, { "epoch": 0.7278861873456468, "grad_norm": 1.09375, "learning_rate": 0.0003638338816512916, "loss": 0.7787, "step": 10463 }, { "epoch": 0.727955754982782, "grad_norm": 1.1953125, "learning_rate": 0.0003636600416519409, "loss": 0.9157, "step": 10464 }, { "epoch": 0.7280253226199173, "grad_norm": 1.078125, "learning_rate": 0.0003634862339627258, "loss": 0.7722, "step": 10465 }, { "epoch": 0.7280948902570524, "grad_norm": 1.3984375, "learning_rate": 0.0003633124585924713, "loss": 0.914, "step": 10466 }, { "epoch": 0.7281644578941876, "grad_norm": 1.21875, "learning_rate": 0.00036313871555000086, "loss": 0.6732, "step": 10467 }, { "epoch": 0.7282340255313229, "grad_norm": 1.046875, "learning_rate": 0.00036296500484413695, "loss": 0.7468, "step": 10468 }, { "epoch": 0.728303593168458, "grad_norm": 1.5, "learning_rate": 0.0003627913264836991, "loss": 0.7366, "step": 10469 }, { "epoch": 0.7283731608055932, "grad_norm": 1.203125, "learning_rate": 0.00036261768047750554, "loss": 1.0082, "step": 10470 }, { "epoch": 0.7284427284427284, "grad_norm": 1.1484375, "learning_rate": 0.0003624440668343736, "loss": 0.7463, "step": 10471 }, { "epoch": 0.7285122960798637, "grad_norm": 1.3359375, "learning_rate": 0.0003622704855631187, "loss": 0.6717, "step": 10472 }, { "epoch": 0.7285818637169988, "grad_norm": 1.265625, "learning_rate": 0.0003620969366725538, "loss": 0.9143, "step": 10473 }, { "epoch": 0.728651431354134, "grad_norm": 0.9296875, "learning_rate": 0.0003619234201714916, "loss": 0.9855, "step": 10474 }, { "epoch": 0.7287209989912693, "grad_norm": 1.3359375, "learning_rate": 0.00036174993606874186, "loss": 1.0841, "step": 10475 }, { "epoch": 0.7287905666284045, "grad_norm": 1.046875, "learning_rate": 0.0003615764843731131, "loss": 1.0146, "step": 10476 }, { "epoch": 0.7288601342655396, "grad_norm": 1.0078125, "learning_rate": 0.0003614030650934126, "loss": 0.6484, "step": 10477 }, { "epoch": 0.7289297019026749, "grad_norm": 1.0546875, "learning_rate": 0.000361229678238446, "loss": 0.7777, "step": 10478 }, { "epoch": 0.7289992695398101, "grad_norm": 1.265625, "learning_rate": 0.0003610563238170166, "loss": 0.7467, "step": 10479 }, { "epoch": 0.7290688371769453, "grad_norm": 0.8984375, "learning_rate": 0.00036088300183792634, "loss": 0.6488, "step": 10480 }, { "epoch": 0.7291384048140805, "grad_norm": 1.234375, "learning_rate": 0.00036070971230997583, "loss": 0.9898, "step": 10481 }, { "epoch": 0.7292079724512157, "grad_norm": 1.078125, "learning_rate": 0.0003605364552419642, "loss": 0.7607, "step": 10482 }, { "epoch": 0.7292775400883509, "grad_norm": 1.3203125, "learning_rate": 0.00036036323064268815, "loss": 0.862, "step": 10483 }, { "epoch": 0.729347107725486, "grad_norm": 1.0703125, "learning_rate": 0.00036019003852094303, "loss": 0.9068, "step": 10484 }, { "epoch": 0.7294166753626213, "grad_norm": 1.03125, "learning_rate": 0.0003600168788855228, "loss": 0.6621, "step": 10485 }, { "epoch": 0.7294862429997565, "grad_norm": 1.1484375, "learning_rate": 0.0003598437517452201, "loss": 0.8121, "step": 10486 }, { "epoch": 0.7295558106368917, "grad_norm": 1.1328125, "learning_rate": 0.00035967065710882474, "loss": 0.9737, "step": 10487 }, { "epoch": 0.729625378274027, "grad_norm": 1.3359375, "learning_rate": 0.0003594975949851261, "loss": 0.7987, "step": 10488 }, { "epoch": 0.7296949459111621, "grad_norm": 1.1015625, "learning_rate": 0.00035932456538291134, "loss": 0.7368, "step": 10489 }, { "epoch": 0.7297645135482973, "grad_norm": 1.2421875, "learning_rate": 0.0003591515683109656, "loss": 0.8261, "step": 10490 }, { "epoch": 0.7298340811854326, "grad_norm": 1.40625, "learning_rate": 0.00035897860377807303, "loss": 1.0825, "step": 10491 }, { "epoch": 0.7299036488225678, "grad_norm": 1.40625, "learning_rate": 0.00035880567179301636, "loss": 1.1469, "step": 10492 }, { "epoch": 0.7299732164597029, "grad_norm": 1.015625, "learning_rate": 0.0003586327723645758, "loss": 0.6994, "step": 10493 }, { "epoch": 0.7300427840968382, "grad_norm": 1.0859375, "learning_rate": 0.00035845990550153, "loss": 0.9453, "step": 10494 }, { "epoch": 0.7301123517339734, "grad_norm": 0.953125, "learning_rate": 0.0003582870712126566, "loss": 0.8885, "step": 10495 }, { "epoch": 0.7301819193711085, "grad_norm": 1.03125, "learning_rate": 0.00035811426950673153, "loss": 0.6837, "step": 10496 }, { "epoch": 0.7302514870082437, "grad_norm": 1.390625, "learning_rate": 0.0003579415003925285, "loss": 0.8565, "step": 10497 }, { "epoch": 0.730321054645379, "grad_norm": 1.2109375, "learning_rate": 0.00035776876387881964, "loss": 0.8585, "step": 10498 }, { "epoch": 0.7303906222825142, "grad_norm": 1.171875, "learning_rate": 0.00035759605997437574, "loss": 0.7695, "step": 10499 }, { "epoch": 0.7304601899196493, "grad_norm": 1.078125, "learning_rate": 0.0003574233886879664, "loss": 1.0199, "step": 10500 }, { "epoch": 0.7305297575567846, "grad_norm": 1.1875, "learning_rate": 0.0003572507500283585, "loss": 0.9845, "step": 10501 }, { "epoch": 0.7305993251939198, "grad_norm": 1.203125, "learning_rate": 0.00035707814400431746, "loss": 0.7833, "step": 10502 }, { "epoch": 0.730668892831055, "grad_norm": 1.4453125, "learning_rate": 0.0003569055706246077, "loss": 1.0962, "step": 10503 }, { "epoch": 0.7307384604681902, "grad_norm": 1.375, "learning_rate": 0.00035673302989799204, "loss": 0.8713, "step": 10504 }, { "epoch": 0.7308080281053254, "grad_norm": 1.1484375, "learning_rate": 0.0003565605218332304, "loss": 0.7375, "step": 10505 }, { "epoch": 0.7308775957424606, "grad_norm": 1.0859375, "learning_rate": 0.00035638804643908274, "loss": 0.7821, "step": 10506 }, { "epoch": 0.7309471633795959, "grad_norm": 1.1171875, "learning_rate": 0.00035621560372430596, "loss": 0.6925, "step": 10507 }, { "epoch": 0.731016731016731, "grad_norm": 1.03125, "learning_rate": 0.0003560431936976556, "loss": 0.9956, "step": 10508 }, { "epoch": 0.7310862986538662, "grad_norm": 1.6015625, "learning_rate": 0.000355870816367886, "loss": 0.8902, "step": 10509 }, { "epoch": 0.7311558662910014, "grad_norm": 1.4375, "learning_rate": 0.00035569847174375, "loss": 0.8974, "step": 10510 }, { "epoch": 0.7312254339281367, "grad_norm": 1.09375, "learning_rate": 0.0003555261598339983, "loss": 0.7936, "step": 10511 }, { "epoch": 0.7312950015652718, "grad_norm": 1.109375, "learning_rate": 0.0003553538806473793, "loss": 0.8037, "step": 10512 }, { "epoch": 0.731364569202407, "grad_norm": 1.3046875, "learning_rate": 0.00035518163419264104, "loss": 0.6703, "step": 10513 }, { "epoch": 0.7314341368395423, "grad_norm": 1.1640625, "learning_rate": 0.0003550094204785296, "loss": 0.7503, "step": 10514 }, { "epoch": 0.7315037044766775, "grad_norm": 1.0546875, "learning_rate": 0.0003548372395137888, "loss": 0.7974, "step": 10515 }, { "epoch": 0.7315732721138126, "grad_norm": 1.1953125, "learning_rate": 0.0003546650913071607, "loss": 0.8388, "step": 10516 }, { "epoch": 0.7316428397509479, "grad_norm": 1.09375, "learning_rate": 0.0003544929758673866, "loss": 0.7568, "step": 10517 }, { "epoch": 0.7317124073880831, "grad_norm": 1.0859375, "learning_rate": 0.00035432089320320593, "loss": 1.0404, "step": 10518 }, { "epoch": 0.7317819750252182, "grad_norm": 0.8984375, "learning_rate": 0.0003541488433233555, "loss": 0.5822, "step": 10519 }, { "epoch": 0.7318515426623535, "grad_norm": 1.078125, "learning_rate": 0.0003539768262365719, "loss": 0.9355, "step": 10520 }, { "epoch": 0.7319211102994887, "grad_norm": 1.796875, "learning_rate": 0.0003538048419515887, "loss": 1.4892, "step": 10521 }, { "epoch": 0.7319906779366239, "grad_norm": 1.2578125, "learning_rate": 0.0003536328904771383, "loss": 0.7739, "step": 10522 }, { "epoch": 0.732060245573759, "grad_norm": 1.140625, "learning_rate": 0.0003534609718219518, "loss": 0.9225, "step": 10523 }, { "epoch": 0.7321298132108943, "grad_norm": 1.15625, "learning_rate": 0.00035328908599475874, "loss": 0.712, "step": 10524 }, { "epoch": 0.7321993808480295, "grad_norm": 1.03125, "learning_rate": 0.0003531172330042861, "loss": 0.865, "step": 10525 }, { "epoch": 0.7322689484851647, "grad_norm": 1.0390625, "learning_rate": 0.00035294541285925965, "loss": 0.5902, "step": 10526 }, { "epoch": 0.7323385161222999, "grad_norm": 1.0, "learning_rate": 0.00035277362556840363, "loss": 0.6576, "step": 10527 }, { "epoch": 0.7324080837594351, "grad_norm": 1.5859375, "learning_rate": 0.00035260187114044095, "loss": 0.8863, "step": 10528 }, { "epoch": 0.7324776513965703, "grad_norm": 0.91796875, "learning_rate": 0.0003524301495840923, "loss": 0.5281, "step": 10529 }, { "epoch": 0.7325472190337056, "grad_norm": 1.09375, "learning_rate": 0.0003522584609080761, "loss": 0.7786, "step": 10530 }, { "epoch": 0.7326167866708407, "grad_norm": 1.0, "learning_rate": 0.00035208680512111056, "loss": 0.6755, "step": 10531 }, { "epoch": 0.7326863543079759, "grad_norm": 1.1328125, "learning_rate": 0.00035191518223191153, "loss": 0.6931, "step": 10532 }, { "epoch": 0.7327559219451112, "grad_norm": 1.1953125, "learning_rate": 0.00035174359224919273, "loss": 0.9638, "step": 10533 }, { "epoch": 0.7328254895822464, "grad_norm": 1.0625, "learning_rate": 0.00035157203518166723, "loss": 0.6878, "step": 10534 }, { "epoch": 0.7328950572193815, "grad_norm": 1.0859375, "learning_rate": 0.00035140051103804503, "loss": 0.9544, "step": 10535 }, { "epoch": 0.7329646248565167, "grad_norm": 1.0078125, "learning_rate": 0.00035122901982703606, "loss": 0.6548, "step": 10536 }, { "epoch": 0.733034192493652, "grad_norm": 0.8671875, "learning_rate": 0.0003510575615573471, "loss": 0.6057, "step": 10537 }, { "epoch": 0.7331037601307872, "grad_norm": 1.1171875, "learning_rate": 0.0003508861362376846, "loss": 0.8301, "step": 10538 }, { "epoch": 0.7331733277679223, "grad_norm": 1.203125, "learning_rate": 0.00035071474387675226, "loss": 0.8204, "step": 10539 }, { "epoch": 0.7332428954050576, "grad_norm": 1.0546875, "learning_rate": 0.0003505433844832523, "loss": 0.7581, "step": 10540 }, { "epoch": 0.7333124630421928, "grad_norm": 1.2265625, "learning_rate": 0.0003503720580658858, "loss": 0.9967, "step": 10541 }, { "epoch": 0.733382030679328, "grad_norm": 1.03125, "learning_rate": 0.00035020076463335213, "loss": 0.8009, "step": 10542 }, { "epoch": 0.7334515983164632, "grad_norm": 0.99609375, "learning_rate": 0.0003500295041943484, "loss": 0.7618, "step": 10543 }, { "epoch": 0.7335211659535984, "grad_norm": 1.109375, "learning_rate": 0.00034985827675756997, "loss": 0.772, "step": 10544 }, { "epoch": 0.7335907335907336, "grad_norm": 1.2734375, "learning_rate": 0.00034968708233171133, "loss": 0.9681, "step": 10545 }, { "epoch": 0.7336603012278688, "grad_norm": 1.1640625, "learning_rate": 0.0003495159209254651, "loss": 0.6452, "step": 10546 }, { "epoch": 0.733729868865004, "grad_norm": 0.84375, "learning_rate": 0.0003493447925475215, "loss": 0.7094, "step": 10547 }, { "epoch": 0.7337994365021392, "grad_norm": 1.1171875, "learning_rate": 0.00034917369720657013, "loss": 0.6876, "step": 10548 }, { "epoch": 0.7338690041392744, "grad_norm": 1.2421875, "learning_rate": 0.0003490026349112976, "loss": 0.7527, "step": 10549 }, { "epoch": 0.7339385717764096, "grad_norm": 1.3359375, "learning_rate": 0.0003488316056703904, "loss": 0.8257, "step": 10550 }, { "epoch": 0.7340081394135448, "grad_norm": 1.0078125, "learning_rate": 0.00034866060949253173, "loss": 0.6408, "step": 10551 }, { "epoch": 0.73407770705068, "grad_norm": 0.91015625, "learning_rate": 0.0003484896463864047, "loss": 0.6556, "step": 10552 }, { "epoch": 0.7341472746878153, "grad_norm": 1.203125, "learning_rate": 0.0003483187163606895, "loss": 0.8944, "step": 10553 }, { "epoch": 0.7342168423249504, "grad_norm": 1.265625, "learning_rate": 0.0003481478194240645, "loss": 0.8914, "step": 10554 }, { "epoch": 0.7342864099620856, "grad_norm": 1.1171875, "learning_rate": 0.00034797695558520835, "loss": 0.756, "step": 10555 }, { "epoch": 0.7343559775992209, "grad_norm": 1.3671875, "learning_rate": 0.00034780612485279605, "loss": 1.0581, "step": 10556 }, { "epoch": 0.7344255452363561, "grad_norm": 1.234375, "learning_rate": 0.0003476353272355013, "loss": 1.0234, "step": 10557 }, { "epoch": 0.7344951128734912, "grad_norm": 0.91796875, "learning_rate": 0.00034746456274199625, "loss": 0.6046, "step": 10558 }, { "epoch": 0.7345646805106265, "grad_norm": 1.015625, "learning_rate": 0.0003472938313809515, "loss": 0.7405, "step": 10559 }, { "epoch": 0.7346342481477617, "grad_norm": 1.1796875, "learning_rate": 0.00034712313316103663, "loss": 0.9239, "step": 10560 }, { "epoch": 0.7347038157848969, "grad_norm": 1.0625, "learning_rate": 0.00034695246809091784, "loss": 0.6487, "step": 10561 }, { "epoch": 0.734773383422032, "grad_norm": 1.015625, "learning_rate": 0.0003467818361792615, "loss": 0.791, "step": 10562 }, { "epoch": 0.7348429510591673, "grad_norm": 0.80078125, "learning_rate": 0.00034661123743473076, "loss": 0.4346, "step": 10563 }, { "epoch": 0.7349125186963025, "grad_norm": 1.015625, "learning_rate": 0.00034644067186598835, "loss": 0.7441, "step": 10564 }, { "epoch": 0.7349820863334376, "grad_norm": 1.25, "learning_rate": 0.0003462701394816942, "loss": 0.607, "step": 10565 }, { "epoch": 0.7350516539705729, "grad_norm": 1.1484375, "learning_rate": 0.00034609964029050757, "loss": 0.8549, "step": 10566 }, { "epoch": 0.7351212216077081, "grad_norm": 0.828125, "learning_rate": 0.00034592917430108537, "loss": 0.5786, "step": 10567 }, { "epoch": 0.7351907892448433, "grad_norm": 1.1796875, "learning_rate": 0.0003457587415220822, "loss": 0.7486, "step": 10568 }, { "epoch": 0.7352603568819785, "grad_norm": 1.1875, "learning_rate": 0.0003455883419621532, "loss": 0.8182, "step": 10569 }, { "epoch": 0.7353299245191137, "grad_norm": 1.0859375, "learning_rate": 0.0003454179756299497, "loss": 0.8274, "step": 10570 }, { "epoch": 0.7353994921562489, "grad_norm": 0.765625, "learning_rate": 0.0003452476425341221, "loss": 0.732, "step": 10571 }, { "epoch": 0.7354690597933842, "grad_norm": 0.97265625, "learning_rate": 0.0003450773426833187, "loss": 0.7192, "step": 10572 }, { "epoch": 0.7355386274305193, "grad_norm": 1.25, "learning_rate": 0.00034490707608618676, "loss": 1.0045, "step": 10573 }, { "epoch": 0.7356081950676545, "grad_norm": 1.328125, "learning_rate": 0.00034473684275137184, "loss": 1.0613, "step": 10574 }, { "epoch": 0.7356777627047897, "grad_norm": 1.2421875, "learning_rate": 0.000344566642687517, "loss": 1.0541, "step": 10575 }, { "epoch": 0.735747330341925, "grad_norm": 1.0234375, "learning_rate": 0.0003443964759032647, "loss": 0.9216, "step": 10576 }, { "epoch": 0.7358168979790601, "grad_norm": 1.4453125, "learning_rate": 0.0003442263424072547, "loss": 0.7047, "step": 10577 }, { "epoch": 0.7358864656161953, "grad_norm": 1.109375, "learning_rate": 0.0003440562422081259, "loss": 0.8835, "step": 10578 }, { "epoch": 0.7359560332533306, "grad_norm": 1.2734375, "learning_rate": 0.0003438861753145146, "loss": 0.8249, "step": 10579 }, { "epoch": 0.7360256008904658, "grad_norm": 1.0, "learning_rate": 0.0003437161417350565, "loss": 0.9425, "step": 10580 }, { "epoch": 0.7360951685276009, "grad_norm": 1.546875, "learning_rate": 0.0003435461414783846, "loss": 0.9638, "step": 10581 }, { "epoch": 0.7361647361647362, "grad_norm": 1.078125, "learning_rate": 0.00034337617455313117, "loss": 0.7829, "step": 10582 }, { "epoch": 0.7362343038018714, "grad_norm": 0.96484375, "learning_rate": 0.0003432062409679256, "loss": 0.5947, "step": 10583 }, { "epoch": 0.7363038714390066, "grad_norm": 0.9609375, "learning_rate": 0.000343036340731397, "loss": 0.7484, "step": 10584 }, { "epoch": 0.7363734390761418, "grad_norm": 1.6015625, "learning_rate": 0.00034286647385217163, "loss": 1.0335, "step": 10585 }, { "epoch": 0.736443006713277, "grad_norm": 1.3046875, "learning_rate": 0.0003426966403388739, "loss": 0.9315, "step": 10586 }, { "epoch": 0.7365125743504122, "grad_norm": 1.0546875, "learning_rate": 0.0003425268402001284, "loss": 0.5161, "step": 10587 }, { "epoch": 0.7365821419875473, "grad_norm": 1.3046875, "learning_rate": 0.00034235707344455605, "loss": 0.8376, "step": 10588 }, { "epoch": 0.7366517096246826, "grad_norm": 1.375, "learning_rate": 0.00034218734008077667, "loss": 0.9266, "step": 10589 }, { "epoch": 0.7367212772618178, "grad_norm": 1.0390625, "learning_rate": 0.0003420176401174082, "loss": 0.6244, "step": 10590 }, { "epoch": 0.736790844898953, "grad_norm": 1.1328125, "learning_rate": 0.0003418479735630675, "loss": 0.8644, "step": 10591 }, { "epoch": 0.7368604125360882, "grad_norm": 1.09375, "learning_rate": 0.0003416783404263698, "loss": 1.0935, "step": 10592 }, { "epoch": 0.7369299801732234, "grad_norm": 1.359375, "learning_rate": 0.0003415087407159273, "loss": 0.9523, "step": 10593 }, { "epoch": 0.7369995478103586, "grad_norm": 1.328125, "learning_rate": 0.0003413391744403523, "loss": 0.8776, "step": 10594 }, { "epoch": 0.7370691154474939, "grad_norm": 1.03125, "learning_rate": 0.00034116964160825394, "loss": 0.7411, "step": 10595 }, { "epoch": 0.737138683084629, "grad_norm": 1.140625, "learning_rate": 0.0003410001422282406, "loss": 0.9877, "step": 10596 }, { "epoch": 0.7372082507217642, "grad_norm": 1.2265625, "learning_rate": 0.0003408306763089182, "loss": 0.9689, "step": 10597 }, { "epoch": 0.7372778183588995, "grad_norm": 1.2578125, "learning_rate": 0.00034066124385889176, "loss": 0.7874, "step": 10598 }, { "epoch": 0.7373473859960347, "grad_norm": 1.3515625, "learning_rate": 0.00034049184488676423, "loss": 0.8638, "step": 10599 }, { "epoch": 0.7374169536331698, "grad_norm": 1.03125, "learning_rate": 0.0003403224794011358, "loss": 0.7705, "step": 10600 }, { "epoch": 0.737486521270305, "grad_norm": 0.890625, "learning_rate": 0.00034015314741060764, "loss": 0.5618, "step": 10601 }, { "epoch": 0.7375560889074403, "grad_norm": 1.234375, "learning_rate": 0.00033998384892377673, "loss": 1.0214, "step": 10602 }, { "epoch": 0.7376256565445755, "grad_norm": 1.1171875, "learning_rate": 0.00033981458394923936, "loss": 0.7092, "step": 10603 }, { "epoch": 0.7376952241817106, "grad_norm": 1.5390625, "learning_rate": 0.0003396453524955894, "loss": 0.8239, "step": 10604 }, { "epoch": 0.7377647918188459, "grad_norm": 1.1171875, "learning_rate": 0.00033947615457142, "loss": 0.8572, "step": 10605 }, { "epoch": 0.7378343594559811, "grad_norm": 1.09375, "learning_rate": 0.0003393069901853225, "loss": 0.905, "step": 10606 }, { "epoch": 0.7379039270931163, "grad_norm": 0.84375, "learning_rate": 0.00033913785934588556, "loss": 0.6206, "step": 10607 }, { "epoch": 0.7379734947302515, "grad_norm": 1.0703125, "learning_rate": 0.0003389687620616976, "loss": 0.7289, "step": 10608 }, { "epoch": 0.7380430623673867, "grad_norm": 1.1796875, "learning_rate": 0.0003387996983413436, "loss": 0.8671, "step": 10609 }, { "epoch": 0.7381126300045219, "grad_norm": 1.0, "learning_rate": 0.0003386306681934086, "loss": 0.6568, "step": 10610 }, { "epoch": 0.7381821976416572, "grad_norm": 0.82421875, "learning_rate": 0.00033846167162647435, "loss": 0.6951, "step": 10611 }, { "epoch": 0.7382517652787923, "grad_norm": 0.98046875, "learning_rate": 0.0003382927086491223, "loss": 0.5366, "step": 10612 }, { "epoch": 0.7383213329159275, "grad_norm": 1.2890625, "learning_rate": 0.000338123779269931, "loss": 0.9697, "step": 10613 }, { "epoch": 0.7383909005530627, "grad_norm": 1.28125, "learning_rate": 0.00033795488349747815, "loss": 0.9811, "step": 10614 }, { "epoch": 0.738460468190198, "grad_norm": 0.9921875, "learning_rate": 0.0003377860213403395, "loss": 0.965, "step": 10615 }, { "epoch": 0.7385300358273331, "grad_norm": 1.1015625, "learning_rate": 0.00033761719280708905, "loss": 0.7906, "step": 10616 }, { "epoch": 0.7385996034644683, "grad_norm": 1.296875, "learning_rate": 0.0003374483979062989, "loss": 0.8549, "step": 10617 }, { "epoch": 0.7386691711016036, "grad_norm": 1.203125, "learning_rate": 0.00033727963664653915, "loss": 1.0295, "step": 10618 }, { "epoch": 0.7387387387387387, "grad_norm": 1.0625, "learning_rate": 0.0003371109090363792, "loss": 0.6678, "step": 10619 }, { "epoch": 0.7388083063758739, "grad_norm": 1.2890625, "learning_rate": 0.0003369422150843863, "loss": 0.8005, "step": 10620 }, { "epoch": 0.7388778740130092, "grad_norm": 1.2265625, "learning_rate": 0.00033677355479912543, "loss": 0.6075, "step": 10621 }, { "epoch": 0.7389474416501444, "grad_norm": 1.6796875, "learning_rate": 0.0003366049281891608, "loss": 1.0783, "step": 10622 }, { "epoch": 0.7390170092872795, "grad_norm": 1.2890625, "learning_rate": 0.0003364363352630538, "loss": 0.8589, "step": 10623 }, { "epoch": 0.7390865769244148, "grad_norm": 1.1484375, "learning_rate": 0.00033626777602936556, "loss": 0.8846, "step": 10624 }, { "epoch": 0.73915614456155, "grad_norm": 1.109375, "learning_rate": 0.00033609925049665377, "loss": 0.7845, "step": 10625 }, { "epoch": 0.7392257121986852, "grad_norm": 1.1953125, "learning_rate": 0.000335930758673476, "loss": 0.6639, "step": 10626 }, { "epoch": 0.7392952798358203, "grad_norm": 1.2265625, "learning_rate": 0.000335762300568387, "loss": 0.8502, "step": 10627 }, { "epoch": 0.7393648474729556, "grad_norm": 1.1171875, "learning_rate": 0.0003355938761899402, "loss": 0.7402, "step": 10628 }, { "epoch": 0.7394344151100908, "grad_norm": 1.0234375, "learning_rate": 0.00033542548554668785, "loss": 0.852, "step": 10629 }, { "epoch": 0.739503982747226, "grad_norm": 1.3359375, "learning_rate": 0.0003352571286471797, "loss": 0.8183, "step": 10630 }, { "epoch": 0.7395735503843612, "grad_norm": 1.0703125, "learning_rate": 0.000335088805499964, "loss": 0.8635, "step": 10631 }, { "epoch": 0.7396431180214964, "grad_norm": 1.140625, "learning_rate": 0.00033492051611358665, "loss": 0.9351, "step": 10632 }, { "epoch": 0.7397126856586316, "grad_norm": 1.1796875, "learning_rate": 0.00033475226049659403, "loss": 0.9912, "step": 10633 }, { "epoch": 0.7397822532957669, "grad_norm": 0.95703125, "learning_rate": 0.0003345840386575284, "loss": 0.6397, "step": 10634 }, { "epoch": 0.739851820932902, "grad_norm": 1.015625, "learning_rate": 0.00033441585060493107, "loss": 0.9123, "step": 10635 }, { "epoch": 0.7399213885700372, "grad_norm": 1.15625, "learning_rate": 0.00033424769634734234, "loss": 0.6297, "step": 10636 }, { "epoch": 0.7399909562071725, "grad_norm": 1.0390625, "learning_rate": 0.0003340795758932996, "loss": 0.8262, "step": 10637 }, { "epoch": 0.7400605238443076, "grad_norm": 1.28125, "learning_rate": 0.00033391148925133996, "loss": 0.6155, "step": 10638 }, { "epoch": 0.7401300914814428, "grad_norm": 1.09375, "learning_rate": 0.0003337434364299972, "loss": 0.7819, "step": 10639 }, { "epoch": 0.740199659118578, "grad_norm": 1.15625, "learning_rate": 0.0003335754174378047, "loss": 0.8932, "step": 10640 }, { "epoch": 0.7402692267557133, "grad_norm": 0.99609375, "learning_rate": 0.0003334074322832933, "loss": 0.6714, "step": 10641 }, { "epoch": 0.7403387943928484, "grad_norm": 1.25, "learning_rate": 0.0003332394809749927, "loss": 0.8553, "step": 10642 }, { "epoch": 0.7404083620299836, "grad_norm": 1.4375, "learning_rate": 0.00033307156352143063, "loss": 1.0901, "step": 10643 }, { "epoch": 0.7404779296671189, "grad_norm": 0.94921875, "learning_rate": 0.0003329036799311331, "loss": 0.6526, "step": 10644 }, { "epoch": 0.7405474973042541, "grad_norm": 1.1015625, "learning_rate": 0.0003327358302126241, "loss": 0.9371, "step": 10645 }, { "epoch": 0.7406170649413892, "grad_norm": 1.046875, "learning_rate": 0.0003325680143744262, "loss": 0.6818, "step": 10646 }, { "epoch": 0.7406866325785245, "grad_norm": 1.171875, "learning_rate": 0.0003324002324250609, "loss": 0.7781, "step": 10647 }, { "epoch": 0.7407562002156597, "grad_norm": 0.84765625, "learning_rate": 0.0003322324843730468, "loss": 0.6444, "step": 10648 }, { "epoch": 0.7408257678527949, "grad_norm": 1.1015625, "learning_rate": 0.00033206477022690084, "loss": 0.6224, "step": 10649 }, { "epoch": 0.74089533548993, "grad_norm": 1.0625, "learning_rate": 0.0003318970899951397, "loss": 0.6883, "step": 10650 }, { "epoch": 0.7409649031270653, "grad_norm": 1.2734375, "learning_rate": 0.00033172944368627653, "loss": 0.8715, "step": 10651 }, { "epoch": 0.7410344707642005, "grad_norm": 0.93359375, "learning_rate": 0.0003315618313088241, "loss": 0.7715, "step": 10652 }, { "epoch": 0.7411040384013357, "grad_norm": 1.0390625, "learning_rate": 0.0003313942528712924, "loss": 0.659, "step": 10653 }, { "epoch": 0.7411736060384709, "grad_norm": 0.84765625, "learning_rate": 0.0003312267083821909, "loss": 0.602, "step": 10654 }, { "epoch": 0.7412431736756061, "grad_norm": 1.3046875, "learning_rate": 0.00033105919785002594, "loss": 1.0, "step": 10655 }, { "epoch": 0.7413127413127413, "grad_norm": 1.2421875, "learning_rate": 0.0003308917212833036, "loss": 0.7299, "step": 10656 }, { "epoch": 0.7413823089498766, "grad_norm": 1.0234375, "learning_rate": 0.00033072427869052667, "loss": 0.6468, "step": 10657 }, { "epoch": 0.7414518765870117, "grad_norm": 1.265625, "learning_rate": 0.00033055687008019775, "loss": 0.7774, "step": 10658 }, { "epoch": 0.7415214442241469, "grad_norm": 1.171875, "learning_rate": 0.0003303894954608165, "loss": 0.7469, "step": 10659 }, { "epoch": 0.7415910118612822, "grad_norm": 1.3515625, "learning_rate": 0.00033022215484088157, "loss": 0.9953, "step": 10660 }, { "epoch": 0.7416605794984173, "grad_norm": 1.0703125, "learning_rate": 0.00033005484822889, "loss": 0.6639, "step": 10661 }, { "epoch": 0.7417301471355525, "grad_norm": 0.8984375, "learning_rate": 0.00032988757563333636, "loss": 0.689, "step": 10662 }, { "epoch": 0.7417997147726877, "grad_norm": 1.234375, "learning_rate": 0.0003297203370627142, "loss": 0.7924, "step": 10663 }, { "epoch": 0.741869282409823, "grad_norm": 1.125, "learning_rate": 0.0003295531325255141, "loss": 0.845, "step": 10664 }, { "epoch": 0.7419388500469581, "grad_norm": 0.88671875, "learning_rate": 0.0003293859620302273, "loss": 0.8241, "step": 10665 }, { "epoch": 0.7420084176840933, "grad_norm": 1.2734375, "learning_rate": 0.00032921882558534113, "loss": 1.2517, "step": 10666 }, { "epoch": 0.7420779853212286, "grad_norm": 1.203125, "learning_rate": 0.00032905172319934174, "loss": 0.7807, "step": 10667 }, { "epoch": 0.7421475529583638, "grad_norm": 0.96875, "learning_rate": 0.00032888465488071437, "loss": 0.6846, "step": 10668 }, { "epoch": 0.7422171205954989, "grad_norm": 0.8359375, "learning_rate": 0.0003287176206379412, "loss": 0.5882, "step": 10669 }, { "epoch": 0.7422866882326342, "grad_norm": 1.2109375, "learning_rate": 0.00032855062047950414, "loss": 0.9032, "step": 10670 }, { "epoch": 0.7423562558697694, "grad_norm": 1.28125, "learning_rate": 0.0003283836544138818, "loss": 0.8563, "step": 10671 }, { "epoch": 0.7424258235069046, "grad_norm": 1.6484375, "learning_rate": 0.0003282167224495527, "loss": 0.6975, "step": 10672 }, { "epoch": 0.7424953911440398, "grad_norm": 1.1328125, "learning_rate": 0.000328049824594992, "loss": 0.8196, "step": 10673 }, { "epoch": 0.742564958781175, "grad_norm": 1.0078125, "learning_rate": 0.0003278829608586743, "loss": 0.8175, "step": 10674 }, { "epoch": 0.7426345264183102, "grad_norm": 1.0625, "learning_rate": 0.0003277161312490725, "loss": 0.72, "step": 10675 }, { "epoch": 0.7427040940554454, "grad_norm": 0.921875, "learning_rate": 0.00032754933577465694, "loss": 0.8003, "step": 10676 }, { "epoch": 0.7427736616925806, "grad_norm": 0.94921875, "learning_rate": 0.0003273825744438965, "loss": 0.6127, "step": 10677 }, { "epoch": 0.7428432293297158, "grad_norm": 1.0625, "learning_rate": 0.00032721584726525855, "loss": 0.8807, "step": 10678 }, { "epoch": 0.742912796966851, "grad_norm": 0.9375, "learning_rate": 0.0003270491542472092, "loss": 0.9, "step": 10679 }, { "epoch": 0.7429823646039863, "grad_norm": 0.99609375, "learning_rate": 0.0003268824953982119, "loss": 0.75, "step": 10680 }, { "epoch": 0.7430519322411214, "grad_norm": 1.109375, "learning_rate": 0.0003267158707267284, "loss": 0.835, "step": 10681 }, { "epoch": 0.7431214998782566, "grad_norm": 1.1953125, "learning_rate": 0.00032654928024121953, "loss": 0.9836, "step": 10682 }, { "epoch": 0.7431910675153919, "grad_norm": 1.109375, "learning_rate": 0.00032638272395014355, "loss": 0.6932, "step": 10683 }, { "epoch": 0.743260635152527, "grad_norm": 0.953125, "learning_rate": 0.00032621620186195797, "loss": 0.6945, "step": 10684 }, { "epoch": 0.7433302027896622, "grad_norm": 1.078125, "learning_rate": 0.0003260497139851172, "loss": 0.7294, "step": 10685 }, { "epoch": 0.7433997704267975, "grad_norm": 1.3515625, "learning_rate": 0.00032588326032807524, "loss": 0.8689, "step": 10686 }, { "epoch": 0.7434693380639327, "grad_norm": 1.3359375, "learning_rate": 0.00032571684089928324, "loss": 0.7692, "step": 10687 }, { "epoch": 0.7435389057010678, "grad_norm": 1.0859375, "learning_rate": 0.00032555045570719135, "loss": 0.8178, "step": 10688 }, { "epoch": 0.743608473338203, "grad_norm": 1.3046875, "learning_rate": 0.0003253841047602483, "loss": 0.9112, "step": 10689 }, { "epoch": 0.7436780409753383, "grad_norm": 1.046875, "learning_rate": 0.0003252177880668999, "loss": 0.7473, "step": 10690 }, { "epoch": 0.7437476086124735, "grad_norm": 1.203125, "learning_rate": 0.00032505150563559094, "loss": 0.8301, "step": 10691 }, { "epoch": 0.7438171762496086, "grad_norm": 1.0859375, "learning_rate": 0.0003248852574747644, "loss": 0.6411, "step": 10692 }, { "epoch": 0.7438867438867439, "grad_norm": 1.03125, "learning_rate": 0.0003247190435928621, "loss": 0.7741, "step": 10693 }, { "epoch": 0.7439563115238791, "grad_norm": 1.28125, "learning_rate": 0.00032455286399832295, "loss": 0.8128, "step": 10694 }, { "epoch": 0.7440258791610143, "grad_norm": 1.15625, "learning_rate": 0.0003243867186995847, "loss": 0.757, "step": 10695 }, { "epoch": 0.7440954467981495, "grad_norm": 0.98828125, "learning_rate": 0.0003242206077050834, "loss": 0.7755, "step": 10696 }, { "epoch": 0.7441650144352847, "grad_norm": 1.0078125, "learning_rate": 0.0003240545310232538, "loss": 0.7966, "step": 10697 }, { "epoch": 0.7442345820724199, "grad_norm": 1.1015625, "learning_rate": 0.0003238884886625282, "loss": 0.5772, "step": 10698 }, { "epoch": 0.7443041497095552, "grad_norm": 0.8984375, "learning_rate": 0.0003237224806313368, "loss": 0.832, "step": 10699 }, { "epoch": 0.7443737173466903, "grad_norm": 1.28125, "learning_rate": 0.00032355650693810956, "loss": 1.0497, "step": 10700 }, { "epoch": 0.7444432849838255, "grad_norm": 1.0703125, "learning_rate": 0.00032339056759127303, "loss": 0.7867, "step": 10701 }, { "epoch": 0.7445128526209607, "grad_norm": 0.9921875, "learning_rate": 0.0003232246625992532, "loss": 0.6914, "step": 10702 }, { "epoch": 0.744582420258096, "grad_norm": 1.3046875, "learning_rate": 0.00032305879197047405, "loss": 0.7868, "step": 10703 }, { "epoch": 0.7446519878952311, "grad_norm": 1.3046875, "learning_rate": 0.00032289295571335744, "loss": 0.9864, "step": 10704 }, { "epoch": 0.7447215555323663, "grad_norm": 1.0625, "learning_rate": 0.0003227271538363232, "loss": 0.74, "step": 10705 }, { "epoch": 0.7447911231695016, "grad_norm": 1.3046875, "learning_rate": 0.00032256138634779053, "loss": 1.1954, "step": 10706 }, { "epoch": 0.7448606908066367, "grad_norm": 1.125, "learning_rate": 0.0003223956532561765, "loss": 0.7271, "step": 10707 }, { "epoch": 0.7449302584437719, "grad_norm": 0.9375, "learning_rate": 0.00032222995456989567, "loss": 0.7618, "step": 10708 }, { "epoch": 0.7449998260809072, "grad_norm": 0.99609375, "learning_rate": 0.0003220642902973613, "loss": 0.9101, "step": 10709 }, { "epoch": 0.7450693937180424, "grad_norm": 0.984375, "learning_rate": 0.0003218986604469851, "loss": 0.808, "step": 10710 }, { "epoch": 0.7451389613551775, "grad_norm": 1.2734375, "learning_rate": 0.0003217330650271775, "loss": 0.6827, "step": 10711 }, { "epoch": 0.7452085289923128, "grad_norm": 1.296875, "learning_rate": 0.00032156750404634604, "loss": 0.7297, "step": 10712 }, { "epoch": 0.745278096629448, "grad_norm": 1.0703125, "learning_rate": 0.00032140197751289693, "loss": 0.8698, "step": 10713 }, { "epoch": 0.7453476642665832, "grad_norm": 1.6015625, "learning_rate": 0.00032123648543523533, "loss": 0.9402, "step": 10714 }, { "epoch": 0.7454172319037183, "grad_norm": 1.3671875, "learning_rate": 0.0003210710278217634, "loss": 0.7883, "step": 10715 }, { "epoch": 0.7454867995408536, "grad_norm": 1.15625, "learning_rate": 0.0003209056046808827, "loss": 0.6464, "step": 10716 }, { "epoch": 0.7455563671779888, "grad_norm": 1.359375, "learning_rate": 0.0003207402160209927, "loss": 0.7639, "step": 10717 }, { "epoch": 0.745625934815124, "grad_norm": 1.203125, "learning_rate": 0.0003205748618504909, "loss": 0.8756, "step": 10718 }, { "epoch": 0.7456955024522592, "grad_norm": 0.98046875, "learning_rate": 0.00032040954217777274, "loss": 0.7125, "step": 10719 }, { "epoch": 0.7457650700893944, "grad_norm": 1.3046875, "learning_rate": 0.00032024425701123263, "loss": 0.9542, "step": 10720 }, { "epoch": 0.7458346377265296, "grad_norm": 0.8828125, "learning_rate": 0.00032007900635926324, "loss": 0.6046, "step": 10721 }, { "epoch": 0.7459042053636649, "grad_norm": 0.9765625, "learning_rate": 0.0003199137902302548, "loss": 0.751, "step": 10722 }, { "epoch": 0.7459737730008, "grad_norm": 1.40625, "learning_rate": 0.0003197486086325959, "loss": 0.806, "step": 10723 }, { "epoch": 0.7460433406379352, "grad_norm": 1.0859375, "learning_rate": 0.000319583461574674, "loss": 0.7304, "step": 10724 }, { "epoch": 0.7461129082750705, "grad_norm": 1.5078125, "learning_rate": 0.00031941834906487463, "loss": 0.7119, "step": 10725 }, { "epoch": 0.7461824759122057, "grad_norm": 0.94140625, "learning_rate": 0.0003192532711115812, "loss": 0.6333, "step": 10726 }, { "epoch": 0.7462520435493408, "grad_norm": 1.0625, "learning_rate": 0.00031908822772317504, "loss": 0.7809, "step": 10727 }, { "epoch": 0.746321611186476, "grad_norm": 0.94921875, "learning_rate": 0.00031892321890803654, "loss": 0.8851, "step": 10728 }, { "epoch": 0.7463911788236113, "grad_norm": 0.98828125, "learning_rate": 0.0003187582446745446, "loss": 0.9008, "step": 10729 }, { "epoch": 0.7464607464607464, "grad_norm": 1.1171875, "learning_rate": 0.0003185933050310749, "loss": 0.6785, "step": 10730 }, { "epoch": 0.7465303140978816, "grad_norm": 1.015625, "learning_rate": 0.0003184283999860029, "loss": 0.8088, "step": 10731 }, { "epoch": 0.7465998817350169, "grad_norm": 1.3125, "learning_rate": 0.0003182635295477014, "loss": 0.7669, "step": 10732 }, { "epoch": 0.7466694493721521, "grad_norm": 0.9921875, "learning_rate": 0.00031809869372454136, "loss": 0.6425, "step": 10733 }, { "epoch": 0.7467390170092872, "grad_norm": 1.1171875, "learning_rate": 0.0003179338925248926, "loss": 0.7079, "step": 10734 }, { "epoch": 0.7468085846464225, "grad_norm": 0.8828125, "learning_rate": 0.0003177691259571233, "loss": 0.782, "step": 10735 }, { "epoch": 0.7468781522835577, "grad_norm": 1.0546875, "learning_rate": 0.00031760439402959896, "loss": 0.6856, "step": 10736 }, { "epoch": 0.7469477199206929, "grad_norm": 1.046875, "learning_rate": 0.0003174396967506837, "loss": 0.6101, "step": 10737 }, { "epoch": 0.7470172875578281, "grad_norm": 1.109375, "learning_rate": 0.00031727503412874025, "loss": 0.9982, "step": 10738 }, { "epoch": 0.7470868551949633, "grad_norm": 1.0546875, "learning_rate": 0.00031711040617212973, "loss": 0.8929, "step": 10739 }, { "epoch": 0.7471564228320985, "grad_norm": 1.40625, "learning_rate": 0.00031694581288921076, "loss": 0.9524, "step": 10740 }, { "epoch": 0.7472259904692337, "grad_norm": 1.2578125, "learning_rate": 0.00031678125428834025, "loss": 0.8278, "step": 10741 }, { "epoch": 0.7472955581063689, "grad_norm": 1.09375, "learning_rate": 0.000316616730377874, "loss": 0.8842, "step": 10742 }, { "epoch": 0.7473651257435041, "grad_norm": 1.4140625, "learning_rate": 0.000316452241166166, "loss": 0.8872, "step": 10743 }, { "epoch": 0.7474346933806393, "grad_norm": 1.609375, "learning_rate": 0.00031628778666156776, "loss": 0.7571, "step": 10744 }, { "epoch": 0.7475042610177746, "grad_norm": 1.578125, "learning_rate": 0.00031612336687242927, "loss": 0.7759, "step": 10745 }, { "epoch": 0.7475738286549097, "grad_norm": 0.99609375, "learning_rate": 0.00031595898180709957, "loss": 0.7668, "step": 10746 }, { "epoch": 0.7476433962920449, "grad_norm": 1.15625, "learning_rate": 0.00031579463147392463, "loss": 0.771, "step": 10747 }, { "epoch": 0.7477129639291802, "grad_norm": 1.296875, "learning_rate": 0.00031563031588124966, "loss": 0.8609, "step": 10748 }, { "epoch": 0.7477825315663154, "grad_norm": 1.1796875, "learning_rate": 0.0003154660350374181, "loss": 0.7597, "step": 10749 }, { "epoch": 0.7478520992034505, "grad_norm": 1.296875, "learning_rate": 0.0003153017889507709, "loss": 0.9094, "step": 10750 }, { "epoch": 0.7479216668405858, "grad_norm": 1.2578125, "learning_rate": 0.00031513757762964746, "loss": 0.8527, "step": 10751 }, { "epoch": 0.747991234477721, "grad_norm": 0.92578125, "learning_rate": 0.0003149734010823858, "loss": 0.6744, "step": 10752 }, { "epoch": 0.7480608021148561, "grad_norm": 1.1796875, "learning_rate": 0.00031480925931732254, "loss": 0.8425, "step": 10753 }, { "epoch": 0.7481303697519913, "grad_norm": 1.0859375, "learning_rate": 0.0003146451523427912, "loss": 0.8637, "step": 10754 }, { "epoch": 0.7481999373891266, "grad_norm": 1.109375, "learning_rate": 0.0003144810801671245, "loss": 0.7141, "step": 10755 }, { "epoch": 0.7482695050262618, "grad_norm": 1.25, "learning_rate": 0.0003143170427986531, "loss": 0.932, "step": 10756 }, { "epoch": 0.7483390726633969, "grad_norm": 0.72265625, "learning_rate": 0.0003141530402457067, "loss": 0.571, "step": 10757 }, { "epoch": 0.7484086403005322, "grad_norm": 1.34375, "learning_rate": 0.0003139890725166118, "loss": 1.0473, "step": 10758 }, { "epoch": 0.7484782079376674, "grad_norm": 0.828125, "learning_rate": 0.00031382513961969384, "loss": 0.5942, "step": 10759 }, { "epoch": 0.7485477755748026, "grad_norm": 1.0625, "learning_rate": 0.00031366124156327667, "loss": 0.7604, "step": 10760 }, { "epoch": 0.7486173432119378, "grad_norm": 1.3671875, "learning_rate": 0.0003134973783556825, "loss": 0.8232, "step": 10761 }, { "epoch": 0.748686910849073, "grad_norm": 1.15625, "learning_rate": 0.000313333550005231, "loss": 0.9719, "step": 10762 }, { "epoch": 0.7487564784862082, "grad_norm": 1.03125, "learning_rate": 0.00031316975652024106, "loss": 0.8764, "step": 10763 }, { "epoch": 0.7488260461233435, "grad_norm": 1.1953125, "learning_rate": 0.00031300599790902905, "loss": 0.6826, "step": 10764 }, { "epoch": 0.7488956137604786, "grad_norm": 0.97265625, "learning_rate": 0.0003128422741799094, "loss": 0.7145, "step": 10765 }, { "epoch": 0.7489651813976138, "grad_norm": 1.390625, "learning_rate": 0.00031267858534119553, "loss": 0.9347, "step": 10766 }, { "epoch": 0.749034749034749, "grad_norm": 0.9375, "learning_rate": 0.000312514931401199, "loss": 0.6208, "step": 10767 }, { "epoch": 0.7491043166718843, "grad_norm": 1.21875, "learning_rate": 0.0003123513123682292, "loss": 0.7783, "step": 10768 }, { "epoch": 0.7491738843090194, "grad_norm": 0.9140625, "learning_rate": 0.00031218772825059336, "loss": 0.8459, "step": 10769 }, { "epoch": 0.7492434519461546, "grad_norm": 1.15625, "learning_rate": 0.0003120241790565979, "loss": 0.7067, "step": 10770 }, { "epoch": 0.7493130195832899, "grad_norm": 0.87890625, "learning_rate": 0.0003118606647945472, "loss": 0.7017, "step": 10771 }, { "epoch": 0.7493825872204251, "grad_norm": 1.1484375, "learning_rate": 0.0003116971854727435, "loss": 0.7706, "step": 10772 }, { "epoch": 0.7494521548575602, "grad_norm": 1.015625, "learning_rate": 0.0003115337410994872, "loss": 0.7049, "step": 10773 }, { "epoch": 0.7495217224946955, "grad_norm": 1.1796875, "learning_rate": 0.00031137033168307727, "loss": 0.8327, "step": 10774 }, { "epoch": 0.7495912901318307, "grad_norm": 1.140625, "learning_rate": 0.00031120695723181125, "loss": 0.5652, "step": 10775 }, { "epoch": 0.7496608577689658, "grad_norm": 1.0703125, "learning_rate": 0.0003110436177539839, "loss": 0.9325, "step": 10776 }, { "epoch": 0.7497304254061011, "grad_norm": 0.98046875, "learning_rate": 0.00031088031325788944, "loss": 0.7637, "step": 10777 }, { "epoch": 0.7497999930432363, "grad_norm": 0.9921875, "learning_rate": 0.0003107170437518192, "loss": 0.5993, "step": 10778 }, { "epoch": 0.7498695606803715, "grad_norm": 1.1796875, "learning_rate": 0.00031055380924406285, "loss": 1.0458, "step": 10779 }, { "epoch": 0.7499391283175066, "grad_norm": 0.87109375, "learning_rate": 0.0003103906097429091, "loss": 0.6709, "step": 10780 }, { "epoch": 0.7500086959546419, "grad_norm": 0.94921875, "learning_rate": 0.0003102274452566445, "loss": 0.5583, "step": 10781 }, { "epoch": 0.7500782635917771, "grad_norm": 1.3125, "learning_rate": 0.00031006431579355367, "loss": 0.8395, "step": 10782 }, { "epoch": 0.7501478312289123, "grad_norm": 1.1015625, "learning_rate": 0.0003099012213619189, "loss": 0.8713, "step": 10783 }, { "epoch": 0.7502173988660475, "grad_norm": 1.28125, "learning_rate": 0.0003097381619700218, "loss": 0.7511, "step": 10784 }, { "epoch": 0.7502869665031827, "grad_norm": 1.265625, "learning_rate": 0.00030957513762614196, "loss": 0.6935, "step": 10785 }, { "epoch": 0.7503565341403179, "grad_norm": 0.93359375, "learning_rate": 0.0003094121483385567, "loss": 0.4937, "step": 10786 }, { "epoch": 0.7504261017774532, "grad_norm": 1.375, "learning_rate": 0.0003092491941155413, "loss": 0.9161, "step": 10787 }, { "epoch": 0.7504956694145883, "grad_norm": 1.1953125, "learning_rate": 0.0003090862749653702, "loss": 1.0645, "step": 10788 }, { "epoch": 0.7505652370517235, "grad_norm": 1.140625, "learning_rate": 0.00030892339089631603, "loss": 0.5419, "step": 10789 }, { "epoch": 0.7506348046888588, "grad_norm": 0.83984375, "learning_rate": 0.0003087605419166484, "loss": 0.7095, "step": 10790 }, { "epoch": 0.750704372325994, "grad_norm": 1.3984375, "learning_rate": 0.0003085977280346366, "loss": 0.9747, "step": 10791 }, { "epoch": 0.7507739399631291, "grad_norm": 1.109375, "learning_rate": 0.0003084349492585473, "loss": 1.0787, "step": 10792 }, { "epoch": 0.7508435076002643, "grad_norm": 1.125, "learning_rate": 0.00030827220559664524, "loss": 0.7317, "step": 10793 }, { "epoch": 0.7509130752373996, "grad_norm": 1.1171875, "learning_rate": 0.00030810949705719395, "loss": 0.7491, "step": 10794 }, { "epoch": 0.7509826428745348, "grad_norm": 1.03125, "learning_rate": 0.0003079468236484554, "loss": 0.8051, "step": 10795 }, { "epoch": 0.7510522105116699, "grad_norm": 1.140625, "learning_rate": 0.00030778418537868893, "loss": 0.6746, "step": 10796 }, { "epoch": 0.7511217781488052, "grad_norm": 1.03125, "learning_rate": 0.0003076215822561521, "loss": 0.8839, "step": 10797 }, { "epoch": 0.7511913457859404, "grad_norm": 0.90234375, "learning_rate": 0.0003074590142891015, "loss": 0.8582, "step": 10798 }, { "epoch": 0.7512609134230755, "grad_norm": 1.3203125, "learning_rate": 0.0003072964814857918, "loss": 1.0314, "step": 10799 }, { "epoch": 0.7513304810602108, "grad_norm": 0.86328125, "learning_rate": 0.00030713398385447534, "loss": 0.5999, "step": 10800 }, { "epoch": 0.751400048697346, "grad_norm": 1.1328125, "learning_rate": 0.00030697152140340256, "loss": 0.761, "step": 10801 }, { "epoch": 0.7514696163344812, "grad_norm": 1.15625, "learning_rate": 0.0003068090941408228, "loss": 0.906, "step": 10802 }, { "epoch": 0.7515391839716165, "grad_norm": 1.078125, "learning_rate": 0.0003066467020749836, "loss": 0.7689, "step": 10803 }, { "epoch": 0.7516087516087516, "grad_norm": 1.015625, "learning_rate": 0.00030648434521412984, "loss": 0.8183, "step": 10804 }, { "epoch": 0.7516783192458868, "grad_norm": 1.609375, "learning_rate": 0.0003063220235665056, "loss": 0.7437, "step": 10805 }, { "epoch": 0.751747886883022, "grad_norm": 0.890625, "learning_rate": 0.0003061597371403525, "loss": 0.4423, "step": 10806 }, { "epoch": 0.7518174545201572, "grad_norm": 1.0390625, "learning_rate": 0.00030599748594391094, "loss": 0.5525, "step": 10807 }, { "epoch": 0.7518870221572924, "grad_norm": 1.1875, "learning_rate": 0.00030583526998541875, "loss": 1.2907, "step": 10808 }, { "epoch": 0.7519565897944276, "grad_norm": 1.0625, "learning_rate": 0.000305673089273113, "loss": 0.757, "step": 10809 }, { "epoch": 0.7520261574315629, "grad_norm": 1.2421875, "learning_rate": 0.00030551094381522806, "loss": 0.7258, "step": 10810 }, { "epoch": 0.752095725068698, "grad_norm": 1.0546875, "learning_rate": 0.00030534883361999664, "loss": 0.8237, "step": 10811 }, { "epoch": 0.7521652927058332, "grad_norm": 1.0625, "learning_rate": 0.0003051867586956502, "loss": 0.5544, "step": 10812 }, { "epoch": 0.7522348603429685, "grad_norm": 1.0703125, "learning_rate": 0.00030502471905041815, "loss": 0.7884, "step": 10813 }, { "epoch": 0.7523044279801037, "grad_norm": 0.8984375, "learning_rate": 0.0003048627146925281, "loss": 0.4507, "step": 10814 }, { "epoch": 0.7523739956172388, "grad_norm": 1.078125, "learning_rate": 0.00030470074563020534, "loss": 0.5509, "step": 10815 }, { "epoch": 0.7524435632543741, "grad_norm": 1.2734375, "learning_rate": 0.0003045388118716741, "loss": 0.9134, "step": 10816 }, { "epoch": 0.7525131308915093, "grad_norm": 1.2109375, "learning_rate": 0.00030437691342515694, "loss": 0.5909, "step": 10817 }, { "epoch": 0.7525826985286445, "grad_norm": 1.28125, "learning_rate": 0.0003042150502988739, "loss": 0.9357, "step": 10818 }, { "epoch": 0.7526522661657796, "grad_norm": 1.265625, "learning_rate": 0.0003040532225010433, "loss": 0.7893, "step": 10819 }, { "epoch": 0.7527218338029149, "grad_norm": 1.3046875, "learning_rate": 0.00030389143003988216, "loss": 0.9734, "step": 10820 }, { "epoch": 0.7527914014400501, "grad_norm": 1.0625, "learning_rate": 0.00030372967292360587, "loss": 0.8374, "step": 10821 }, { "epoch": 0.7528609690771852, "grad_norm": 1.046875, "learning_rate": 0.00030356795116042714, "loss": 0.8235, "step": 10822 }, { "epoch": 0.7529305367143205, "grad_norm": 1.1484375, "learning_rate": 0.00030340626475855784, "loss": 1.0226, "step": 10823 }, { "epoch": 0.7530001043514557, "grad_norm": 1.6328125, "learning_rate": 0.00030324461372620726, "loss": 0.727, "step": 10824 }, { "epoch": 0.7530696719885909, "grad_norm": 0.98046875, "learning_rate": 0.000303082998071583, "loss": 0.7792, "step": 10825 }, { "epoch": 0.7531392396257262, "grad_norm": 1.0078125, "learning_rate": 0.0003029214178028914, "loss": 0.5346, "step": 10826 }, { "epoch": 0.7532088072628613, "grad_norm": 1.15625, "learning_rate": 0.000302759872928337, "loss": 0.797, "step": 10827 }, { "epoch": 0.7532783748999965, "grad_norm": 1.609375, "learning_rate": 0.0003025983634561218, "loss": 1.0819, "step": 10828 }, { "epoch": 0.7533479425371318, "grad_norm": 1.03125, "learning_rate": 0.0003024368893944462, "loss": 0.6368, "step": 10829 }, { "epoch": 0.753417510174267, "grad_norm": 1.3203125, "learning_rate": 0.00030227545075150954, "loss": 1.2654, "step": 10830 }, { "epoch": 0.7534870778114021, "grad_norm": 1.1328125, "learning_rate": 0.000302114047535509, "loss": 0.7587, "step": 10831 }, { "epoch": 0.7535566454485373, "grad_norm": 1.0390625, "learning_rate": 0.0003019526797546395, "loss": 0.5841, "step": 10832 }, { "epoch": 0.7536262130856726, "grad_norm": 1.1640625, "learning_rate": 0.00030179134741709405, "loss": 0.8582, "step": 10833 }, { "epoch": 0.7536957807228077, "grad_norm": 1.109375, "learning_rate": 0.00030163005053106484, "loss": 0.6291, "step": 10834 }, { "epoch": 0.7537653483599429, "grad_norm": 0.98046875, "learning_rate": 0.00030146878910474194, "loss": 0.6024, "step": 10835 }, { "epoch": 0.7538349159970782, "grad_norm": 0.9140625, "learning_rate": 0.0003013075631463128, "loss": 0.5758, "step": 10836 }, { "epoch": 0.7539044836342134, "grad_norm": 1.140625, "learning_rate": 0.00030114637266396416, "loss": 0.7231, "step": 10837 }, { "epoch": 0.7539740512713485, "grad_norm": 1.3046875, "learning_rate": 0.00030098521766587993, "loss": 0.8559, "step": 10838 }, { "epoch": 0.7540436189084838, "grad_norm": 1.0859375, "learning_rate": 0.0003008240981602435, "loss": 0.8338, "step": 10839 }, { "epoch": 0.754113186545619, "grad_norm": 1.1875, "learning_rate": 0.00030066301415523477, "loss": 0.732, "step": 10840 }, { "epoch": 0.7541827541827542, "grad_norm": 1.1953125, "learning_rate": 0.00030050196565903364, "loss": 0.814, "step": 10841 }, { "epoch": 0.7542523218198894, "grad_norm": 1.28125, "learning_rate": 0.000300340952679817, "loss": 0.8623, "step": 10842 }, { "epoch": 0.7543218894570246, "grad_norm": 1.3984375, "learning_rate": 0.00030017997522575993, "loss": 0.7784, "step": 10843 }, { "epoch": 0.7543914570941598, "grad_norm": 1.1484375, "learning_rate": 0.0003000190333050363, "loss": 0.8666, "step": 10844 }, { "epoch": 0.754461024731295, "grad_norm": 1.1484375, "learning_rate": 0.0002998581269258183, "loss": 0.8736, "step": 10845 }, { "epoch": 0.7545305923684302, "grad_norm": 1.0234375, "learning_rate": 0.0002996972560962757, "loss": 0.7556, "step": 10846 }, { "epoch": 0.7546001600055654, "grad_norm": 1.2734375, "learning_rate": 0.00029953642082457634, "loss": 0.9385, "step": 10847 }, { "epoch": 0.7546697276427006, "grad_norm": 1.2109375, "learning_rate": 0.00029937562111888685, "loss": 0.6932, "step": 10848 }, { "epoch": 0.7547392952798359, "grad_norm": 1.171875, "learning_rate": 0.0002992148569873723, "loss": 0.9168, "step": 10849 }, { "epoch": 0.754808862916971, "grad_norm": 0.96484375, "learning_rate": 0.0002990541284381947, "loss": 0.799, "step": 10850 }, { "epoch": 0.7548784305541062, "grad_norm": 1.2265625, "learning_rate": 0.00029889343547951584, "loss": 0.957, "step": 10851 }, { "epoch": 0.7549479981912415, "grad_norm": 1.0234375, "learning_rate": 0.0002987327781194942, "loss": 0.8664, "step": 10852 }, { "epoch": 0.7550175658283766, "grad_norm": 1.0859375, "learning_rate": 0.00029857215636628763, "loss": 1.0017, "step": 10853 }, { "epoch": 0.7550871334655118, "grad_norm": 0.80078125, "learning_rate": 0.0002984115702280512, "loss": 0.7072, "step": 10854 }, { "epoch": 0.7551567011026471, "grad_norm": 1.09375, "learning_rate": 0.0002982510197129393, "loss": 0.7619, "step": 10855 }, { "epoch": 0.7552262687397823, "grad_norm": 0.9765625, "learning_rate": 0.0002980905048291036, "loss": 0.7234, "step": 10856 }, { "epoch": 0.7552958363769174, "grad_norm": 1.03125, "learning_rate": 0.0002979300255846935, "loss": 0.6323, "step": 10857 }, { "epoch": 0.7553654040140526, "grad_norm": 1.1640625, "learning_rate": 0.00029776958198785865, "loss": 0.7847, "step": 10858 }, { "epoch": 0.7554349716511879, "grad_norm": 1.328125, "learning_rate": 0.0002976091740467449, "loss": 0.8902, "step": 10859 }, { "epoch": 0.7555045392883231, "grad_norm": 1.1015625, "learning_rate": 0.00029744880176949706, "loss": 0.934, "step": 10860 }, { "epoch": 0.7555741069254582, "grad_norm": 1.046875, "learning_rate": 0.0002972884651642576, "loss": 0.6618, "step": 10861 }, { "epoch": 0.7556436745625935, "grad_norm": 1.0078125, "learning_rate": 0.0002971281642391679, "loss": 0.9202, "step": 10862 }, { "epoch": 0.7557132421997287, "grad_norm": 1.171875, "learning_rate": 0.00029696789900236754, "loss": 0.8987, "step": 10863 }, { "epoch": 0.7557828098368639, "grad_norm": 0.9921875, "learning_rate": 0.00029680766946199355, "loss": 0.7069, "step": 10864 }, { "epoch": 0.7558523774739991, "grad_norm": 1.03125, "learning_rate": 0.000296647475626182, "loss": 0.8637, "step": 10865 }, { "epoch": 0.7559219451111343, "grad_norm": 1.1328125, "learning_rate": 0.0002964873175030661, "loss": 0.8416, "step": 10866 }, { "epoch": 0.7559915127482695, "grad_norm": 0.8828125, "learning_rate": 0.00029632719510077867, "loss": 0.6538, "step": 10867 }, { "epoch": 0.7560610803854048, "grad_norm": 1.03125, "learning_rate": 0.0002961671084274492, "loss": 0.639, "step": 10868 }, { "epoch": 0.7561306480225399, "grad_norm": 1.0546875, "learning_rate": 0.0002960070574912066, "loss": 0.6999, "step": 10869 }, { "epoch": 0.7562002156596751, "grad_norm": 1.0703125, "learning_rate": 0.000295847042300177, "loss": 0.8679, "step": 10870 }, { "epoch": 0.7562697832968103, "grad_norm": 1.0390625, "learning_rate": 0.0002956870628624854, "loss": 0.6761, "step": 10871 }, { "epoch": 0.7563393509339456, "grad_norm": 0.91015625, "learning_rate": 0.00029552711918625496, "loss": 0.7618, "step": 10872 }, { "epoch": 0.7564089185710807, "grad_norm": 1.265625, "learning_rate": 0.00029536721127960676, "loss": 0.9856, "step": 10873 }, { "epoch": 0.7564784862082159, "grad_norm": 0.97265625, "learning_rate": 0.0002952073391506598, "loss": 0.6998, "step": 10874 }, { "epoch": 0.7565480538453512, "grad_norm": 1.0078125, "learning_rate": 0.00029504750280753145, "loss": 0.8895, "step": 10875 }, { "epoch": 0.7566176214824863, "grad_norm": 1.140625, "learning_rate": 0.0002948877022583378, "loss": 0.6585, "step": 10876 }, { "epoch": 0.7566871891196215, "grad_norm": 1.046875, "learning_rate": 0.00029472793751119286, "loss": 0.5516, "step": 10877 }, { "epoch": 0.7567567567567568, "grad_norm": 1.265625, "learning_rate": 0.0002945682085742081, "loss": 0.8468, "step": 10878 }, { "epoch": 0.756826324393892, "grad_norm": 1.1953125, "learning_rate": 0.0002944085154554943, "loss": 0.9927, "step": 10879 }, { "epoch": 0.7568958920310271, "grad_norm": 1.2421875, "learning_rate": 0.0002942488581631594, "loss": 0.776, "step": 10880 }, { "epoch": 0.7569654596681624, "grad_norm": 1.171875, "learning_rate": 0.0002940892367053105, "loss": 0.7683, "step": 10881 }, { "epoch": 0.7570350273052976, "grad_norm": 1.1640625, "learning_rate": 0.0002939296510900519, "loss": 0.6813, "step": 10882 }, { "epoch": 0.7571045949424328, "grad_norm": 1.046875, "learning_rate": 0.00029377010132548696, "loss": 0.6979, "step": 10883 }, { "epoch": 0.7571741625795679, "grad_norm": 1.078125, "learning_rate": 0.00029361058741971636, "loss": 0.9223, "step": 10884 }, { "epoch": 0.7572437302167032, "grad_norm": 0.97265625, "learning_rate": 0.00029345110938083964, "loss": 0.5794, "step": 10885 }, { "epoch": 0.7573132978538384, "grad_norm": 1.203125, "learning_rate": 0.00029329166721695464, "loss": 0.7523, "step": 10886 }, { "epoch": 0.7573828654909736, "grad_norm": 1.1015625, "learning_rate": 0.0002931322609361567, "loss": 0.7918, "step": 10887 }, { "epoch": 0.7574524331281088, "grad_norm": 1.203125, "learning_rate": 0.00029297289054653974, "loss": 0.9891, "step": 10888 }, { "epoch": 0.757522000765244, "grad_norm": 1.5546875, "learning_rate": 0.00029281355605619496, "loss": 0.9407, "step": 10889 }, { "epoch": 0.7575915684023792, "grad_norm": 1.2578125, "learning_rate": 0.0002926542574732141, "loss": 0.7065, "step": 10890 }, { "epoch": 0.7576611360395145, "grad_norm": 1.125, "learning_rate": 0.00029249499480568463, "loss": 0.7619, "step": 10891 }, { "epoch": 0.7577307036766496, "grad_norm": 0.96875, "learning_rate": 0.00029233576806169325, "loss": 0.7175, "step": 10892 }, { "epoch": 0.7578002713137848, "grad_norm": 1.03125, "learning_rate": 0.00029217657724932446, "loss": 0.7108, "step": 10893 }, { "epoch": 0.7578698389509201, "grad_norm": 0.96875, "learning_rate": 0.0002920174223766613, "loss": 0.7984, "step": 10894 }, { "epoch": 0.7579394065880553, "grad_norm": 1.046875, "learning_rate": 0.0002918583034517852, "loss": 0.7702, "step": 10895 }, { "epoch": 0.7580089742251904, "grad_norm": 1.3046875, "learning_rate": 0.00029169922048277486, "loss": 0.8288, "step": 10896 }, { "epoch": 0.7580785418623256, "grad_norm": 0.9765625, "learning_rate": 0.00029154017347770845, "loss": 0.7305, "step": 10897 }, { "epoch": 0.7581481094994609, "grad_norm": 1.1328125, "learning_rate": 0.0002913811624446606, "loss": 0.7453, "step": 10898 }, { "epoch": 0.758217677136596, "grad_norm": 0.984375, "learning_rate": 0.00029122218739170615, "loss": 0.5697, "step": 10899 }, { "epoch": 0.7582872447737312, "grad_norm": 1.2578125, "learning_rate": 0.0002910632483269161, "loss": 0.5748, "step": 10900 }, { "epoch": 0.7583568124108665, "grad_norm": 0.94140625, "learning_rate": 0.00029090434525836127, "loss": 0.6577, "step": 10901 }, { "epoch": 0.7584263800480017, "grad_norm": 0.9921875, "learning_rate": 0.00029074547819410944, "loss": 0.596, "step": 10902 }, { "epoch": 0.7584959476851368, "grad_norm": 0.9921875, "learning_rate": 0.00029058664714222724, "loss": 0.7455, "step": 10903 }, { "epoch": 0.7585655153222721, "grad_norm": 1.3125, "learning_rate": 0.00029042785211077983, "loss": 0.8898, "step": 10904 }, { "epoch": 0.7586350829594073, "grad_norm": 1.0234375, "learning_rate": 0.00029026909310782945, "loss": 0.8803, "step": 10905 }, { "epoch": 0.7587046505965425, "grad_norm": 0.87109375, "learning_rate": 0.00029011037014143725, "loss": 0.8165, "step": 10906 }, { "epoch": 0.7587742182336777, "grad_norm": 0.90625, "learning_rate": 0.00028995168321966215, "loss": 0.5155, "step": 10907 }, { "epoch": 0.7588437858708129, "grad_norm": 1.28125, "learning_rate": 0.0002897930323505615, "loss": 1.0407, "step": 10908 }, { "epoch": 0.7589133535079481, "grad_norm": 1.09375, "learning_rate": 0.00028963441754219135, "loss": 0.9402, "step": 10909 }, { "epoch": 0.7589829211450833, "grad_norm": 1.1328125, "learning_rate": 0.00028947583880260466, "loss": 0.8225, "step": 10910 }, { "epoch": 0.7590524887822185, "grad_norm": 1.09375, "learning_rate": 0.00028931729613985394, "loss": 0.7145, "step": 10911 }, { "epoch": 0.7591220564193537, "grad_norm": 1.1640625, "learning_rate": 0.00028915878956198835, "loss": 0.733, "step": 10912 }, { "epoch": 0.7591916240564889, "grad_norm": 1.1953125, "learning_rate": 0.0002890003190770569, "loss": 0.8173, "step": 10913 }, { "epoch": 0.7592611916936242, "grad_norm": 1.484375, "learning_rate": 0.00028884188469310525, "loss": 0.8284, "step": 10914 }, { "epoch": 0.7593307593307593, "grad_norm": 1.4375, "learning_rate": 0.00028868348641817855, "loss": 0.7452, "step": 10915 }, { "epoch": 0.7594003269678945, "grad_norm": 0.82421875, "learning_rate": 0.00028852512426031876, "loss": 0.4362, "step": 10916 }, { "epoch": 0.7594698946050298, "grad_norm": 1.203125, "learning_rate": 0.0002883667982275671, "loss": 0.8837, "step": 10917 }, { "epoch": 0.759539462242165, "grad_norm": 0.984375, "learning_rate": 0.00028820850832796276, "loss": 0.9928, "step": 10918 }, { "epoch": 0.7596090298793001, "grad_norm": 0.7421875, "learning_rate": 0.00028805025456954256, "loss": 0.6872, "step": 10919 }, { "epoch": 0.7596785975164354, "grad_norm": 1.4140625, "learning_rate": 0.00028789203696034216, "loss": 1.023, "step": 10920 }, { "epoch": 0.7597481651535706, "grad_norm": 1.59375, "learning_rate": 0.00028773385550839414, "loss": 0.7345, "step": 10921 }, { "epoch": 0.7598177327907057, "grad_norm": 1.0859375, "learning_rate": 0.00028757571022173145, "loss": 0.8374, "step": 10922 }, { "epoch": 0.7598873004278409, "grad_norm": 1.234375, "learning_rate": 0.00028741760110838333, "loss": 0.7209, "step": 10923 }, { "epoch": 0.7599568680649762, "grad_norm": 1.140625, "learning_rate": 0.00028725952817637747, "loss": 0.69, "step": 10924 }, { "epoch": 0.7600264357021114, "grad_norm": 1.4140625, "learning_rate": 0.00028710149143374055, "loss": 0.9986, "step": 10925 }, { "epoch": 0.7600960033392465, "grad_norm": 0.79296875, "learning_rate": 0.00028694349088849625, "loss": 0.4994, "step": 10926 }, { "epoch": 0.7601655709763818, "grad_norm": 1.0546875, "learning_rate": 0.00028678552654866785, "loss": 0.8706, "step": 10927 }, { "epoch": 0.760235138613517, "grad_norm": 0.89453125, "learning_rate": 0.00028662759842227513, "loss": 0.6004, "step": 10928 }, { "epoch": 0.7603047062506522, "grad_norm": 1.1015625, "learning_rate": 0.0002864697065173377, "loss": 0.878, "step": 10929 }, { "epoch": 0.7603742738877874, "grad_norm": 0.90234375, "learning_rate": 0.0002863118508418717, "loss": 0.7029, "step": 10930 }, { "epoch": 0.7604438415249226, "grad_norm": 1.0, "learning_rate": 0.0002861540314038927, "loss": 0.9266, "step": 10931 }, { "epoch": 0.7605134091620578, "grad_norm": 1.2421875, "learning_rate": 0.00028599624821141437, "loss": 0.8503, "step": 10932 }, { "epoch": 0.7605829767991931, "grad_norm": 1.2421875, "learning_rate": 0.0002858385012724476, "loss": 0.6257, "step": 10933 }, { "epoch": 0.7606525444363282, "grad_norm": 1.09375, "learning_rate": 0.00028568079059500175, "loss": 0.7435, "step": 10934 }, { "epoch": 0.7607221120734634, "grad_norm": 1.2734375, "learning_rate": 0.00028552311618708495, "loss": 0.8444, "step": 10935 }, { "epoch": 0.7607916797105986, "grad_norm": 1.078125, "learning_rate": 0.0002853654780567034, "loss": 0.7446, "step": 10936 }, { "epoch": 0.7608612473477339, "grad_norm": 1.234375, "learning_rate": 0.0002852078762118608, "loss": 0.8777, "step": 10937 }, { "epoch": 0.760930814984869, "grad_norm": 1.046875, "learning_rate": 0.0002850503106605592, "loss": 0.7718, "step": 10938 }, { "epoch": 0.7610003826220042, "grad_norm": 1.0625, "learning_rate": 0.0002848927814107994, "loss": 0.6905, "step": 10939 }, { "epoch": 0.7610699502591395, "grad_norm": 1.34375, "learning_rate": 0.0002847352884705796, "loss": 0.9064, "step": 10940 }, { "epoch": 0.7611395178962747, "grad_norm": 1.1015625, "learning_rate": 0.0002845778318478969, "loss": 0.6481, "step": 10941 }, { "epoch": 0.7612090855334098, "grad_norm": 1.09375, "learning_rate": 0.0002844204115507456, "loss": 0.7602, "step": 10942 }, { "epoch": 0.7612786531705451, "grad_norm": 1.3828125, "learning_rate": 0.0002842630275871193, "loss": 0.7536, "step": 10943 }, { "epoch": 0.7613482208076803, "grad_norm": 1.015625, "learning_rate": 0.00028410567996500855, "loss": 0.9728, "step": 10944 }, { "epoch": 0.7614177884448154, "grad_norm": 0.9375, "learning_rate": 0.000283948368692403, "loss": 0.5581, "step": 10945 }, { "epoch": 0.7614873560819507, "grad_norm": 1.140625, "learning_rate": 0.0002837910937772905, "loss": 0.9369, "step": 10946 }, { "epoch": 0.7615569237190859, "grad_norm": 1.0859375, "learning_rate": 0.00028363385522765615, "loss": 0.7163, "step": 10947 }, { "epoch": 0.7616264913562211, "grad_norm": 1.4453125, "learning_rate": 0.0002834766530514837, "loss": 0.9173, "step": 10948 }, { "epoch": 0.7616960589933562, "grad_norm": 1.0390625, "learning_rate": 0.00028331948725675526, "loss": 0.9139, "step": 10949 }, { "epoch": 0.7617656266304915, "grad_norm": 1.359375, "learning_rate": 0.00028316235785145116, "loss": 0.8441, "step": 10950 }, { "epoch": 0.7618351942676267, "grad_norm": 1.21875, "learning_rate": 0.0002830052648435495, "loss": 0.9639, "step": 10951 }, { "epoch": 0.7619047619047619, "grad_norm": 1.046875, "learning_rate": 0.0002828482082410262, "loss": 0.6816, "step": 10952 }, { "epoch": 0.7619743295418971, "grad_norm": 1.078125, "learning_rate": 0.0002826911880518561, "loss": 0.7788, "step": 10953 }, { "epoch": 0.7620438971790323, "grad_norm": 1.046875, "learning_rate": 0.0002825342042840123, "loss": 0.6936, "step": 10954 }, { "epoch": 0.7621134648161675, "grad_norm": 1.15625, "learning_rate": 0.00028237725694546544, "loss": 0.7557, "step": 10955 }, { "epoch": 0.7621830324533028, "grad_norm": 1.046875, "learning_rate": 0.000282220346044184, "loss": 0.7433, "step": 10956 }, { "epoch": 0.7622526000904379, "grad_norm": 0.78515625, "learning_rate": 0.0002820634715881358, "loss": 0.6299, "step": 10957 }, { "epoch": 0.7623221677275731, "grad_norm": 1.0, "learning_rate": 0.0002819066335852856, "loss": 0.8281, "step": 10958 }, { "epoch": 0.7623917353647084, "grad_norm": 0.86328125, "learning_rate": 0.0002817498320435969, "loss": 0.4377, "step": 10959 }, { "epoch": 0.7624613030018436, "grad_norm": 1.1640625, "learning_rate": 0.0002815930669710319, "loss": 0.9633, "step": 10960 }, { "epoch": 0.7625308706389787, "grad_norm": 1.2421875, "learning_rate": 0.00028143633837555005, "loss": 0.9027, "step": 10961 }, { "epoch": 0.7626004382761139, "grad_norm": 1.015625, "learning_rate": 0.0002812796462651087, "loss": 0.7701, "step": 10962 }, { "epoch": 0.7626700059132492, "grad_norm": 1.0078125, "learning_rate": 0.00028112299064766424, "loss": 0.7056, "step": 10963 }, { "epoch": 0.7627395735503844, "grad_norm": 1.1484375, "learning_rate": 0.00028096637153117123, "loss": 0.8179, "step": 10964 }, { "epoch": 0.7628091411875195, "grad_norm": 1.0703125, "learning_rate": 0.00028080978892358176, "loss": 0.9911, "step": 10965 }, { "epoch": 0.7628787088246548, "grad_norm": 1.2265625, "learning_rate": 0.00028065324283284586, "loss": 0.755, "step": 10966 }, { "epoch": 0.76294827646179, "grad_norm": 1.0234375, "learning_rate": 0.0002804967332669125, "loss": 0.6879, "step": 10967 }, { "epoch": 0.7630178440989251, "grad_norm": 1.265625, "learning_rate": 0.00028034026023372873, "loss": 0.8447, "step": 10968 }, { "epoch": 0.7630874117360604, "grad_norm": 0.9609375, "learning_rate": 0.0002801838237412393, "loss": 0.6667, "step": 10969 }, { "epoch": 0.7631569793731956, "grad_norm": 0.921875, "learning_rate": 0.00028002742379738674, "loss": 0.6053, "step": 10970 }, { "epoch": 0.7632265470103308, "grad_norm": 1.03125, "learning_rate": 0.000279871060410113, "loss": 0.9168, "step": 10971 }, { "epoch": 0.763296114647466, "grad_norm": 1.0703125, "learning_rate": 0.0002797147335873569, "loss": 0.7791, "step": 10972 }, { "epoch": 0.7633656822846012, "grad_norm": 1.5625, "learning_rate": 0.00027955844333705626, "loss": 1.2102, "step": 10973 }, { "epoch": 0.7634352499217364, "grad_norm": 1.171875, "learning_rate": 0.00027940218966714635, "loss": 0.7311, "step": 10974 }, { "epoch": 0.7635048175588716, "grad_norm": 0.859375, "learning_rate": 0.0002792459725855615, "loss": 0.6264, "step": 10975 }, { "epoch": 0.7635743851960068, "grad_norm": 1.171875, "learning_rate": 0.000279089792100233, "loss": 0.868, "step": 10976 }, { "epoch": 0.763643952833142, "grad_norm": 1.2109375, "learning_rate": 0.0002789336482190912, "loss": 0.7842, "step": 10977 }, { "epoch": 0.7637135204702772, "grad_norm": 1.15625, "learning_rate": 0.0002787775409500645, "loss": 0.8368, "step": 10978 }, { "epoch": 0.7637830881074125, "grad_norm": 1.4375, "learning_rate": 0.0002786214703010791, "loss": 0.9723, "step": 10979 }, { "epoch": 0.7638526557445476, "grad_norm": 1.0546875, "learning_rate": 0.00027846543628005916, "loss": 0.7424, "step": 10980 }, { "epoch": 0.7639222233816828, "grad_norm": 1.015625, "learning_rate": 0.0002783094388949274, "loss": 0.6343, "step": 10981 }, { "epoch": 0.7639917910188181, "grad_norm": 1.2421875, "learning_rate": 0.00027815347815360526, "loss": 0.7459, "step": 10982 }, { "epoch": 0.7640613586559533, "grad_norm": 1.234375, "learning_rate": 0.0002779975540640111, "loss": 0.7888, "step": 10983 }, { "epoch": 0.7641309262930884, "grad_norm": 1.2421875, "learning_rate": 0.0002778416666340615, "loss": 0.8225, "step": 10984 }, { "epoch": 0.7642004939302237, "grad_norm": 1.1484375, "learning_rate": 0.0002776858158716723, "loss": 0.8207, "step": 10985 }, { "epoch": 0.7642700615673589, "grad_norm": 1.1953125, "learning_rate": 0.00027753000178475687, "loss": 0.9062, "step": 10986 }, { "epoch": 0.764339629204494, "grad_norm": 1.25, "learning_rate": 0.00027737422438122637, "loss": 0.902, "step": 10987 }, { "epoch": 0.7644091968416292, "grad_norm": 1.09375, "learning_rate": 0.00027721848366899025, "loss": 0.9645, "step": 10988 }, { "epoch": 0.7644787644787645, "grad_norm": 1.1328125, "learning_rate": 0.0002770627796559567, "loss": 1.067, "step": 10989 }, { "epoch": 0.7645483321158997, "grad_norm": 1.140625, "learning_rate": 0.000276907112350031, "loss": 0.7567, "step": 10990 }, { "epoch": 0.7646178997530348, "grad_norm": 0.9375, "learning_rate": 0.00027675148175911746, "loss": 0.6529, "step": 10991 }, { "epoch": 0.7646874673901701, "grad_norm": 1.140625, "learning_rate": 0.0002765958878911187, "loss": 0.9196, "step": 10992 }, { "epoch": 0.7647570350273053, "grad_norm": 1.0, "learning_rate": 0.00027644033075393436, "loss": 0.6483, "step": 10993 }, { "epoch": 0.7648266026644405, "grad_norm": 1.3515625, "learning_rate": 0.0002762848103554627, "loss": 0.7832, "step": 10994 }, { "epoch": 0.7648961703015758, "grad_norm": 1.1484375, "learning_rate": 0.0002761293267036007, "loss": 0.7285, "step": 10995 }, { "epoch": 0.7649657379387109, "grad_norm": 1.1953125, "learning_rate": 0.0002759738798062431, "loss": 1.087, "step": 10996 }, { "epoch": 0.7650353055758461, "grad_norm": 1.2421875, "learning_rate": 0.00027581846967128255, "loss": 0.7953, "step": 10997 }, { "epoch": 0.7651048732129814, "grad_norm": 1.484375, "learning_rate": 0.0002756630963066097, "loss": 0.9286, "step": 10998 }, { "epoch": 0.7651744408501165, "grad_norm": 1.0859375, "learning_rate": 0.0002755077597201139, "loss": 0.7367, "step": 10999 }, { "epoch": 0.7652440084872517, "grad_norm": 0.82421875, "learning_rate": 0.0002753524599196826, "loss": 0.8037, "step": 11000 }, { "epoch": 0.7653135761243869, "grad_norm": 1.25, "learning_rate": 0.0002751971969132009, "loss": 0.7853, "step": 11001 }, { "epoch": 0.7653831437615222, "grad_norm": 1.2265625, "learning_rate": 0.00027504197070855196, "loss": 0.8712, "step": 11002 }, { "epoch": 0.7654527113986573, "grad_norm": 1.0703125, "learning_rate": 0.000274886781313618, "loss": 0.7074, "step": 11003 }, { "epoch": 0.7655222790357925, "grad_norm": 0.91015625, "learning_rate": 0.0002747316287362782, "loss": 0.8645, "step": 11004 }, { "epoch": 0.7655918466729278, "grad_norm": 1.1328125, "learning_rate": 0.00027457651298441055, "loss": 0.7833, "step": 11005 }, { "epoch": 0.765661414310063, "grad_norm": 0.98046875, "learning_rate": 0.0002744214340658916, "loss": 0.788, "step": 11006 }, { "epoch": 0.7657309819471981, "grad_norm": 1.2734375, "learning_rate": 0.0002742663919885949, "loss": 1.0066, "step": 11007 }, { "epoch": 0.7658005495843334, "grad_norm": 1.0234375, "learning_rate": 0.0002741113867603927, "loss": 0.7631, "step": 11008 }, { "epoch": 0.7658701172214686, "grad_norm": 1.109375, "learning_rate": 0.0002739564183891554, "loss": 0.7219, "step": 11009 }, { "epoch": 0.7659396848586038, "grad_norm": 0.9765625, "learning_rate": 0.0002738014868827521, "loss": 0.7114, "step": 11010 }, { "epoch": 0.766009252495739, "grad_norm": 1.1015625, "learning_rate": 0.00027364659224904885, "loss": 0.7598, "step": 11011 }, { "epoch": 0.7660788201328742, "grad_norm": 0.9765625, "learning_rate": 0.0002734917344959103, "loss": 0.8081, "step": 11012 }, { "epoch": 0.7661483877700094, "grad_norm": 1.2421875, "learning_rate": 0.0002733369136311995, "loss": 0.6663, "step": 11013 }, { "epoch": 0.7662179554071445, "grad_norm": 1.171875, "learning_rate": 0.000273182129662778, "loss": 0.8881, "step": 11014 }, { "epoch": 0.7662875230442798, "grad_norm": 1.109375, "learning_rate": 0.00027302738259850443, "loss": 0.8484, "step": 11015 }, { "epoch": 0.766357090681415, "grad_norm": 1.5234375, "learning_rate": 0.0002728726724462359, "loss": 0.8909, "step": 11016 }, { "epoch": 0.7664266583185502, "grad_norm": 1.109375, "learning_rate": 0.00027271799921382844, "loss": 0.5954, "step": 11017 }, { "epoch": 0.7664962259556855, "grad_norm": 1.0390625, "learning_rate": 0.00027256336290913484, "loss": 0.7347, "step": 11018 }, { "epoch": 0.7665657935928206, "grad_norm": 1.25, "learning_rate": 0.0002724087635400071, "loss": 0.8684, "step": 11019 }, { "epoch": 0.7666353612299558, "grad_norm": 1.3125, "learning_rate": 0.00027225420111429534, "loss": 0.8686, "step": 11020 }, { "epoch": 0.7667049288670911, "grad_norm": 1.3671875, "learning_rate": 0.00027209967563984717, "loss": 0.963, "step": 11021 }, { "epoch": 0.7667744965042262, "grad_norm": 0.82421875, "learning_rate": 0.0002719451871245082, "loss": 0.7989, "step": 11022 }, { "epoch": 0.7668440641413614, "grad_norm": 1.234375, "learning_rate": 0.000271790735576123, "loss": 0.6866, "step": 11023 }, { "epoch": 0.7669136317784967, "grad_norm": 0.94140625, "learning_rate": 0.0002716363210025341, "loss": 0.8577, "step": 11024 }, { "epoch": 0.7669831994156319, "grad_norm": 0.83203125, "learning_rate": 0.0002714819434115816, "loss": 0.5434, "step": 11025 }, { "epoch": 0.767052767052767, "grad_norm": 1.1328125, "learning_rate": 0.0002713276028111037, "loss": 0.8487, "step": 11026 }, { "epoch": 0.7671223346899022, "grad_norm": 1.125, "learning_rate": 0.0002711732992089374, "loss": 0.8545, "step": 11027 }, { "epoch": 0.7671919023270375, "grad_norm": 1.1015625, "learning_rate": 0.00027101903261291763, "loss": 0.9335, "step": 11028 }, { "epoch": 0.7672614699641727, "grad_norm": 1.015625, "learning_rate": 0.00027086480303087715, "loss": 0.6288, "step": 11029 }, { "epoch": 0.7673310376013078, "grad_norm": 1.1328125, "learning_rate": 0.0002707106104706464, "loss": 0.8595, "step": 11030 }, { "epoch": 0.7674006052384431, "grad_norm": 3.515625, "learning_rate": 0.0002705564549400551, "loss": 0.6332, "step": 11031 }, { "epoch": 0.7674701728755783, "grad_norm": 1.109375, "learning_rate": 0.0002704023364469306, "loss": 0.8574, "step": 11032 }, { "epoch": 0.7675397405127135, "grad_norm": 0.90234375, "learning_rate": 0.0002702482549990977, "loss": 0.6316, "step": 11033 }, { "epoch": 0.7676093081498487, "grad_norm": 0.90234375, "learning_rate": 0.0002700942106043804, "loss": 0.6021, "step": 11034 }, { "epoch": 0.7676788757869839, "grad_norm": 1.015625, "learning_rate": 0.0002699402032706003, "loss": 0.5737, "step": 11035 }, { "epoch": 0.7677484434241191, "grad_norm": 1.4140625, "learning_rate": 0.00026978623300557647, "loss": 0.9823, "step": 11036 }, { "epoch": 0.7678180110612544, "grad_norm": 1.078125, "learning_rate": 0.00026963229981712724, "loss": 0.9057, "step": 11037 }, { "epoch": 0.7678875786983895, "grad_norm": 1.0625, "learning_rate": 0.00026947840371306875, "loss": 0.6561, "step": 11038 }, { "epoch": 0.7679571463355247, "grad_norm": 1.3046875, "learning_rate": 0.00026932454470121484, "loss": 0.8715, "step": 11039 }, { "epoch": 0.7680267139726599, "grad_norm": 0.95703125, "learning_rate": 0.0002691707227893774, "loss": 0.7328, "step": 11040 }, { "epoch": 0.7680962816097952, "grad_norm": 1.03125, "learning_rate": 0.00026901693798536686, "loss": 0.7496, "step": 11041 }, { "epoch": 0.7681658492469303, "grad_norm": 1.2421875, "learning_rate": 0.00026886319029699224, "loss": 0.6002, "step": 11042 }, { "epoch": 0.7682354168840655, "grad_norm": 1.0546875, "learning_rate": 0.00026870947973205953, "loss": 0.7732, "step": 11043 }, { "epoch": 0.7683049845212008, "grad_norm": 1.0, "learning_rate": 0.0002685558062983732, "loss": 0.8224, "step": 11044 }, { "epoch": 0.7683745521583359, "grad_norm": 0.96875, "learning_rate": 0.00026840217000373624, "loss": 0.8444, "step": 11045 }, { "epoch": 0.7684441197954711, "grad_norm": 1.078125, "learning_rate": 0.00026824857085594987, "loss": 0.7547, "step": 11046 }, { "epoch": 0.7685136874326064, "grad_norm": 1.1328125, "learning_rate": 0.0002680950088628128, "loss": 0.6605, "step": 11047 }, { "epoch": 0.7685832550697416, "grad_norm": 1.265625, "learning_rate": 0.00026794148403212184, "loss": 0.6999, "step": 11048 }, { "epoch": 0.7686528227068767, "grad_norm": 1.2890625, "learning_rate": 0.00026778799637167274, "loss": 0.7683, "step": 11049 }, { "epoch": 0.768722390344012, "grad_norm": 1.2421875, "learning_rate": 0.00026763454588925816, "loss": 0.9527, "step": 11050 }, { "epoch": 0.7687919579811472, "grad_norm": 1.34375, "learning_rate": 0.00026748113259267005, "loss": 0.7597, "step": 11051 }, { "epoch": 0.7688615256182824, "grad_norm": 0.99609375, "learning_rate": 0.0002673277564896982, "loss": 0.6541, "step": 11052 }, { "epoch": 0.7689310932554175, "grad_norm": 0.96484375, "learning_rate": 0.0002671744175881299, "loss": 0.8399, "step": 11053 }, { "epoch": 0.7690006608925528, "grad_norm": 1.125, "learning_rate": 0.0002670211158957506, "loss": 0.832, "step": 11054 }, { "epoch": 0.769070228529688, "grad_norm": 1.59375, "learning_rate": 0.00026686785142034455, "loss": 0.9521, "step": 11055 }, { "epoch": 0.7691397961668232, "grad_norm": 1.1796875, "learning_rate": 0.00026671462416969416, "loss": 0.8878, "step": 11056 }, { "epoch": 0.7692093638039584, "grad_norm": 1.203125, "learning_rate": 0.00026656143415157896, "loss": 0.7894, "step": 11057 }, { "epoch": 0.7692789314410936, "grad_norm": 1.1796875, "learning_rate": 0.00026640828137377713, "loss": 0.8664, "step": 11058 }, { "epoch": 0.7693484990782288, "grad_norm": 1.2265625, "learning_rate": 0.00026625516584406517, "loss": 0.9427, "step": 11059 }, { "epoch": 0.7694180667153641, "grad_norm": 1.078125, "learning_rate": 0.00026610208757021784, "loss": 0.7319, "step": 11060 }, { "epoch": 0.7694876343524992, "grad_norm": 1.0234375, "learning_rate": 0.0002659490465600074, "loss": 0.7949, "step": 11061 }, { "epoch": 0.7695572019896344, "grad_norm": 1.3359375, "learning_rate": 0.00026579604282120416, "loss": 0.9307, "step": 11062 }, { "epoch": 0.7696267696267697, "grad_norm": 1.1015625, "learning_rate": 0.00026564307636157725, "loss": 0.8738, "step": 11063 }, { "epoch": 0.7696963372639049, "grad_norm": 1.03125, "learning_rate": 0.00026549014718889373, "loss": 0.7137, "step": 11064 }, { "epoch": 0.76976590490104, "grad_norm": 1.234375, "learning_rate": 0.0002653372553109181, "loss": 0.8228, "step": 11065 }, { "epoch": 0.7698354725381752, "grad_norm": 1.5078125, "learning_rate": 0.00026518440073541394, "loss": 0.87, "step": 11066 }, { "epoch": 0.7699050401753105, "grad_norm": 1.1640625, "learning_rate": 0.0002650315834701421, "loss": 0.8271, "step": 11067 }, { "epoch": 0.7699746078124456, "grad_norm": 0.875, "learning_rate": 0.00026487880352286177, "loss": 0.5988, "step": 11068 }, { "epoch": 0.7700441754495808, "grad_norm": 1.015625, "learning_rate": 0.0002647260609013303, "loss": 0.8066, "step": 11069 }, { "epoch": 0.7701137430867161, "grad_norm": 1.15625, "learning_rate": 0.0002645733556133039, "loss": 0.7901, "step": 11070 }, { "epoch": 0.7701833107238513, "grad_norm": 1.3125, "learning_rate": 0.0002644206876665356, "loss": 0.7836, "step": 11071 }, { "epoch": 0.7702528783609864, "grad_norm": 1.1171875, "learning_rate": 0.00026426805706877685, "loss": 0.9634, "step": 11072 }, { "epoch": 0.7703224459981217, "grad_norm": 1.3828125, "learning_rate": 0.00026411546382777793, "loss": 1.0034, "step": 11073 }, { "epoch": 0.7703920136352569, "grad_norm": 1.1015625, "learning_rate": 0.00026396290795128687, "loss": 0.6641, "step": 11074 }, { "epoch": 0.7704615812723921, "grad_norm": 0.765625, "learning_rate": 0.0002638103894470494, "loss": 0.7064, "step": 11075 }, { "epoch": 0.7705311489095273, "grad_norm": 1.03125, "learning_rate": 0.0002636579083228093, "loss": 0.6517, "step": 11076 }, { "epoch": 0.7706007165466625, "grad_norm": 0.96484375, "learning_rate": 0.0002635054645863093, "loss": 0.7051, "step": 11077 }, { "epoch": 0.7706702841837977, "grad_norm": 1.2578125, "learning_rate": 0.00026335305824528985, "loss": 1.0038, "step": 11078 }, { "epoch": 0.7707398518209329, "grad_norm": 1.5625, "learning_rate": 0.00026320068930748896, "loss": 0.7383, "step": 11079 }, { "epoch": 0.7708094194580681, "grad_norm": 1.1875, "learning_rate": 0.0002630483577806435, "loss": 0.8704, "step": 11080 }, { "epoch": 0.7708789870952033, "grad_norm": 0.90234375, "learning_rate": 0.00026289606367248784, "loss": 0.6069, "step": 11081 }, { "epoch": 0.7709485547323385, "grad_norm": 0.9140625, "learning_rate": 0.0002627438069907546, "loss": 0.6408, "step": 11082 }, { "epoch": 0.7710181223694738, "grad_norm": 0.8125, "learning_rate": 0.00026259158774317483, "loss": 0.6818, "step": 11083 }, { "epoch": 0.7710876900066089, "grad_norm": 1.15625, "learning_rate": 0.00026243940593747764, "loss": 0.9507, "step": 11084 }, { "epoch": 0.7711572576437441, "grad_norm": 1.09375, "learning_rate": 0.00026228726158138984, "loss": 0.6779, "step": 11085 }, { "epoch": 0.7712268252808794, "grad_norm": 1.015625, "learning_rate": 0.00026213515468263626, "loss": 0.6792, "step": 11086 }, { "epoch": 0.7712963929180146, "grad_norm": 1.03125, "learning_rate": 0.0002619830852489404, "loss": 0.8555, "step": 11087 }, { "epoch": 0.7713659605551497, "grad_norm": 0.921875, "learning_rate": 0.000261831053288024, "loss": 0.8009, "step": 11088 }, { "epoch": 0.771435528192285, "grad_norm": 1.015625, "learning_rate": 0.000261679058807606, "loss": 0.861, "step": 11089 }, { "epoch": 0.7715050958294202, "grad_norm": 1.125, "learning_rate": 0.0002615271018154036, "loss": 0.8647, "step": 11090 }, { "epoch": 0.7715746634665553, "grad_norm": 1.3046875, "learning_rate": 0.0002613751823191328, "loss": 0.8819, "step": 11091 }, { "epoch": 0.7716442311036905, "grad_norm": 1.09375, "learning_rate": 0.00026122330032650774, "loss": 0.6314, "step": 11092 }, { "epoch": 0.7717137987408258, "grad_norm": 0.92578125, "learning_rate": 0.0002610714558452394, "loss": 0.7506, "step": 11093 }, { "epoch": 0.771783366377961, "grad_norm": 0.9765625, "learning_rate": 0.0002609196488830383, "loss": 0.6546, "step": 11094 }, { "epoch": 0.7718529340150961, "grad_norm": 1.3671875, "learning_rate": 0.0002607678794476119, "loss": 1.1312, "step": 11095 }, { "epoch": 0.7719225016522314, "grad_norm": 1.0546875, "learning_rate": 0.00026061614754666697, "loss": 0.766, "step": 11096 }, { "epoch": 0.7719920692893666, "grad_norm": 1.40625, "learning_rate": 0.0002604644531879069, "loss": 0.8864, "step": 11097 }, { "epoch": 0.7720616369265018, "grad_norm": 1.21875, "learning_rate": 0.0002603127963790347, "loss": 0.7961, "step": 11098 }, { "epoch": 0.772131204563637, "grad_norm": 1.125, "learning_rate": 0.0002601611771277505, "loss": 0.6875, "step": 11099 }, { "epoch": 0.7722007722007722, "grad_norm": 0.85546875, "learning_rate": 0.0002600095954417522, "loss": 0.8267, "step": 11100 }, { "epoch": 0.7722703398379074, "grad_norm": 1.125, "learning_rate": 0.00025985805132873685, "loss": 0.9563, "step": 11101 }, { "epoch": 0.7723399074750427, "grad_norm": 1.328125, "learning_rate": 0.0002597065447963993, "loss": 0.7816, "step": 11102 }, { "epoch": 0.7724094751121778, "grad_norm": 0.9453125, "learning_rate": 0.0002595550758524322, "loss": 0.72, "step": 11103 }, { "epoch": 0.772479042749313, "grad_norm": 1.2734375, "learning_rate": 0.0002594036445045258, "loss": 0.9023, "step": 11104 }, { "epoch": 0.7725486103864482, "grad_norm": 1.3828125, "learning_rate": 0.0002592522507603695, "loss": 0.8009, "step": 11105 }, { "epoch": 0.7726181780235835, "grad_norm": 3.078125, "learning_rate": 0.0002591008946276506, "loss": 0.6459, "step": 11106 }, { "epoch": 0.7726877456607186, "grad_norm": 1.0078125, "learning_rate": 0.00025894957611405356, "loss": 0.6435, "step": 11107 }, { "epoch": 0.7727573132978538, "grad_norm": 0.94921875, "learning_rate": 0.00025879829522726215, "loss": 0.8786, "step": 11108 }, { "epoch": 0.7728268809349891, "grad_norm": 0.890625, "learning_rate": 0.0002586470519749571, "loss": 0.8265, "step": 11109 }, { "epoch": 0.7728964485721243, "grad_norm": 1.25, "learning_rate": 0.00025849584636481826, "loss": 1.0323, "step": 11110 }, { "epoch": 0.7729660162092594, "grad_norm": 1.015625, "learning_rate": 0.0002583446784045227, "loss": 0.7944, "step": 11111 }, { "epoch": 0.7730355838463947, "grad_norm": 0.83984375, "learning_rate": 0.00025819354810174643, "loss": 0.4472, "step": 11112 }, { "epoch": 0.7731051514835299, "grad_norm": 1.03125, "learning_rate": 0.00025804245546416274, "loss": 0.9385, "step": 11113 }, { "epoch": 0.773174719120665, "grad_norm": 1.1171875, "learning_rate": 0.0002578914004994429, "loss": 1.0097, "step": 11114 }, { "epoch": 0.7732442867578003, "grad_norm": 1.109375, "learning_rate": 0.0002577403832152578, "loss": 0.9413, "step": 11115 }, { "epoch": 0.7733138543949355, "grad_norm": 1.140625, "learning_rate": 0.00025758940361927474, "loss": 0.7039, "step": 11116 }, { "epoch": 0.7733834220320707, "grad_norm": 1.34375, "learning_rate": 0.00025743846171915973, "loss": 0.913, "step": 11117 }, { "epoch": 0.7734529896692058, "grad_norm": 0.97265625, "learning_rate": 0.0002572875575225766, "loss": 0.6947, "step": 11118 }, { "epoch": 0.7735225573063411, "grad_norm": 1.125, "learning_rate": 0.00025713669103718774, "loss": 0.8442, "step": 11119 }, { "epoch": 0.7735921249434763, "grad_norm": 1.0859375, "learning_rate": 0.0002569858622706537, "loss": 0.8794, "step": 11120 }, { "epoch": 0.7736616925806115, "grad_norm": 1.234375, "learning_rate": 0.0002568350712306322, "loss": 0.9511, "step": 11121 }, { "epoch": 0.7737312602177467, "grad_norm": 1.109375, "learning_rate": 0.00025668431792478033, "loss": 0.8524, "step": 11122 }, { "epoch": 0.7738008278548819, "grad_norm": 0.9296875, "learning_rate": 0.00025653360236075186, "loss": 0.7128, "step": 11123 }, { "epoch": 0.7738703954920171, "grad_norm": 1.3203125, "learning_rate": 0.00025638292454619995, "loss": 1.1436, "step": 11124 }, { "epoch": 0.7739399631291524, "grad_norm": 1.3828125, "learning_rate": 0.0002562322844887748, "loss": 0.7928, "step": 11125 }, { "epoch": 0.7740095307662875, "grad_norm": 0.82421875, "learning_rate": 0.0002560816821961256, "loss": 0.5738, "step": 11126 }, { "epoch": 0.7740790984034227, "grad_norm": 1.3203125, "learning_rate": 0.0002559311176758986, "loss": 0.8763, "step": 11127 }, { "epoch": 0.774148666040558, "grad_norm": 1.1953125, "learning_rate": 0.00025578059093573946, "loss": 0.836, "step": 11128 }, { "epoch": 0.7742182336776932, "grad_norm": 1.2578125, "learning_rate": 0.0002556301019832905, "loss": 0.962, "step": 11129 }, { "epoch": 0.7742878013148283, "grad_norm": 1.1328125, "learning_rate": 0.0002554796508261933, "loss": 0.8005, "step": 11130 }, { "epoch": 0.7743573689519635, "grad_norm": 1.1953125, "learning_rate": 0.0002553292374720868, "loss": 0.7667, "step": 11131 }, { "epoch": 0.7744269365890988, "grad_norm": 1.1953125, "learning_rate": 0.00025517886192860786, "loss": 0.7471, "step": 11132 }, { "epoch": 0.774496504226234, "grad_norm": 1.1953125, "learning_rate": 0.0002550285242033922, "loss": 0.9119, "step": 11133 }, { "epoch": 0.7745660718633691, "grad_norm": 1.1484375, "learning_rate": 0.00025487822430407336, "loss": 0.709, "step": 11134 }, { "epoch": 0.7746356395005044, "grad_norm": 1.3359375, "learning_rate": 0.00025472796223828265, "loss": 0.9223, "step": 11135 }, { "epoch": 0.7747052071376396, "grad_norm": 1.140625, "learning_rate": 0.00025457773801364935, "loss": 0.8418, "step": 11136 }, { "epoch": 0.7747747747747747, "grad_norm": 1.3359375, "learning_rate": 0.0002544275516378012, "loss": 0.933, "step": 11137 }, { "epoch": 0.77484434241191, "grad_norm": 1.0859375, "learning_rate": 0.00025427740311836434, "loss": 0.7165, "step": 11138 }, { "epoch": 0.7749139100490452, "grad_norm": 1.1484375, "learning_rate": 0.00025412729246296193, "loss": 0.6515, "step": 11139 }, { "epoch": 0.7749834776861804, "grad_norm": 1.34375, "learning_rate": 0.0002539772196792164, "loss": 0.8536, "step": 11140 }, { "epoch": 0.7750530453233156, "grad_norm": 1.375, "learning_rate": 0.0002538271847747472, "loss": 0.9593, "step": 11141 }, { "epoch": 0.7751226129604508, "grad_norm": 1.2421875, "learning_rate": 0.00025367718775717277, "loss": 0.6178, "step": 11142 }, { "epoch": 0.775192180597586, "grad_norm": 1.4375, "learning_rate": 0.0002535272286341087, "loss": 1.0278, "step": 11143 }, { "epoch": 0.7752617482347212, "grad_norm": 1.0859375, "learning_rate": 0.0002533773074131699, "loss": 0.6262, "step": 11144 }, { "epoch": 0.7753313158718564, "grad_norm": 1.296875, "learning_rate": 0.0002532274241019681, "loss": 0.9228, "step": 11145 }, { "epoch": 0.7754008835089916, "grad_norm": 0.96484375, "learning_rate": 0.000253077578708113, "loss": 0.7004, "step": 11146 }, { "epoch": 0.7754704511461268, "grad_norm": 1.390625, "learning_rate": 0.0002529277712392144, "loss": 1.1014, "step": 11147 }, { "epoch": 0.7755400187832621, "grad_norm": 0.89453125, "learning_rate": 0.000252778001702878, "loss": 0.6228, "step": 11148 }, { "epoch": 0.7756095864203972, "grad_norm": 1.34375, "learning_rate": 0.0002526282701067084, "loss": 0.7733, "step": 11149 }, { "epoch": 0.7756791540575324, "grad_norm": 1.2109375, "learning_rate": 0.00025247857645830784, "loss": 0.9438, "step": 11150 }, { "epoch": 0.7757487216946677, "grad_norm": 0.94140625, "learning_rate": 0.00025232892076527746, "loss": 0.8475, "step": 11151 }, { "epoch": 0.7758182893318029, "grad_norm": 0.984375, "learning_rate": 0.0002521793030352163, "loss": 0.9927, "step": 11152 }, { "epoch": 0.775887856968938, "grad_norm": 1.046875, "learning_rate": 0.0002520297232757205, "loss": 0.6633, "step": 11153 }, { "epoch": 0.7759574246060733, "grad_norm": 1.2265625, "learning_rate": 0.0002518801814943855, "loss": 0.8391, "step": 11154 }, { "epoch": 0.7760269922432085, "grad_norm": 0.97265625, "learning_rate": 0.00025173067769880384, "loss": 0.7216, "step": 11155 }, { "epoch": 0.7760965598803437, "grad_norm": 1.3203125, "learning_rate": 0.00025158121189656715, "loss": 0.9152, "step": 11156 }, { "epoch": 0.7761661275174788, "grad_norm": 1.3203125, "learning_rate": 0.0002514317840952639, "loss": 0.7242, "step": 11157 }, { "epoch": 0.7762356951546141, "grad_norm": 1.2421875, "learning_rate": 0.0002512823943024819, "loss": 0.9426, "step": 11158 }, { "epoch": 0.7763052627917493, "grad_norm": 1.15625, "learning_rate": 0.0002511330425258057, "loss": 0.9526, "step": 11159 }, { "epoch": 0.7763748304288844, "grad_norm": 0.9375, "learning_rate": 0.00025098372877281914, "loss": 0.7022, "step": 11160 }, { "epoch": 0.7764443980660197, "grad_norm": 2.15625, "learning_rate": 0.00025083445305110387, "loss": 0.53, "step": 11161 }, { "epoch": 0.7765139657031549, "grad_norm": 1.25, "learning_rate": 0.00025068521536823887, "loss": 0.7777, "step": 11162 }, { "epoch": 0.7765835333402901, "grad_norm": 1.2421875, "learning_rate": 0.00025053601573180186, "loss": 0.9637, "step": 11163 }, { "epoch": 0.7766531009774253, "grad_norm": 0.90625, "learning_rate": 0.000250386854149368, "loss": 0.8286, "step": 11164 }, { "epoch": 0.7767226686145605, "grad_norm": 1.0859375, "learning_rate": 0.0002502377306285115, "loss": 0.919, "step": 11165 }, { "epoch": 0.7767922362516957, "grad_norm": 1.0078125, "learning_rate": 0.00025008864517680416, "loss": 0.8048, "step": 11166 }, { "epoch": 0.776861803888831, "grad_norm": 1.0625, "learning_rate": 0.0002499395978018153, "loss": 0.8079, "step": 11167 }, { "epoch": 0.7769313715259661, "grad_norm": 1.0703125, "learning_rate": 0.0002497905885111135, "loss": 0.8159, "step": 11168 }, { "epoch": 0.7770009391631013, "grad_norm": 1.046875, "learning_rate": 0.00024964161731226374, "loss": 0.6467, "step": 11169 }, { "epoch": 0.7770705068002365, "grad_norm": 1.1328125, "learning_rate": 0.0002494926842128311, "loss": 0.8966, "step": 11170 }, { "epoch": 0.7771400744373718, "grad_norm": 1.1796875, "learning_rate": 0.00024934378922037673, "loss": 0.8229, "step": 11171 }, { "epoch": 0.7772096420745069, "grad_norm": 0.98828125, "learning_rate": 0.00024919493234246137, "loss": 0.7604, "step": 11172 }, { "epoch": 0.7772792097116421, "grad_norm": 1.046875, "learning_rate": 0.00024904611358664286, "loss": 0.7434, "step": 11173 }, { "epoch": 0.7773487773487774, "grad_norm": 1.453125, "learning_rate": 0.0002488973329604774, "loss": 0.9076, "step": 11174 }, { "epoch": 0.7774183449859126, "grad_norm": 0.8984375, "learning_rate": 0.0002487485904715201, "loss": 0.6852, "step": 11175 }, { "epoch": 0.7774879126230477, "grad_norm": 1.0390625, "learning_rate": 0.0002485998861273226, "loss": 0.9038, "step": 11176 }, { "epoch": 0.777557480260183, "grad_norm": 1.625, "learning_rate": 0.00024845121993543565, "loss": 0.9135, "step": 11177 }, { "epoch": 0.7776270478973182, "grad_norm": 1.015625, "learning_rate": 0.000248302591903407, "loss": 0.7006, "step": 11178 }, { "epoch": 0.7776966155344534, "grad_norm": 0.91796875, "learning_rate": 0.00024815400203878445, "loss": 0.6716, "step": 11179 }, { "epoch": 0.7777661831715886, "grad_norm": 1.1875, "learning_rate": 0.00024800545034911226, "loss": 0.8515, "step": 11180 }, { "epoch": 0.7778357508087238, "grad_norm": 1.09375, "learning_rate": 0.00024785693684193256, "loss": 0.8551, "step": 11181 }, { "epoch": 0.777905318445859, "grad_norm": 1.09375, "learning_rate": 0.0002477084615247868, "loss": 1.0675, "step": 11182 }, { "epoch": 0.7779748860829941, "grad_norm": 1.3359375, "learning_rate": 0.0002475600244052133, "loss": 0.8185, "step": 11183 }, { "epoch": 0.7780444537201294, "grad_norm": 1.0, "learning_rate": 0.0002474116254907495, "loss": 0.7336, "step": 11184 }, { "epoch": 0.7781140213572646, "grad_norm": 1.1875, "learning_rate": 0.00024726326478892956, "loss": 0.8614, "step": 11185 }, { "epoch": 0.7781835889943998, "grad_norm": 1.296875, "learning_rate": 0.00024711494230728737, "loss": 0.8187, "step": 11186 }, { "epoch": 0.778253156631535, "grad_norm": 0.91015625, "learning_rate": 0.0002469666580533534, "loss": 0.6623, "step": 11187 }, { "epoch": 0.7783227242686702, "grad_norm": 0.94921875, "learning_rate": 0.0002468184120346568, "loss": 0.7252, "step": 11188 }, { "epoch": 0.7783922919058054, "grad_norm": 1.0703125, "learning_rate": 0.0002466702042587253, "loss": 0.6136, "step": 11189 }, { "epoch": 0.7784618595429407, "grad_norm": 1.2734375, "learning_rate": 0.00024652203473308375, "loss": 1.0125, "step": 11190 }, { "epoch": 0.7785314271800758, "grad_norm": 1.171875, "learning_rate": 0.00024637390346525544, "loss": 0.8981, "step": 11191 }, { "epoch": 0.778600994817211, "grad_norm": 1.5078125, "learning_rate": 0.0002462258104627612, "loss": 1.1634, "step": 11192 }, { "epoch": 0.7786705624543463, "grad_norm": 1.1484375, "learning_rate": 0.0002460777557331215, "loss": 0.8263, "step": 11193 }, { "epoch": 0.7787401300914815, "grad_norm": 1.0078125, "learning_rate": 0.0002459297392838534, "loss": 0.7874, "step": 11194 }, { "epoch": 0.7788096977286166, "grad_norm": 1.359375, "learning_rate": 0.0002457817611224721, "loss": 0.9767, "step": 11195 }, { "epoch": 0.7788792653657518, "grad_norm": 1.1328125, "learning_rate": 0.00024563382125649167, "loss": 0.7474, "step": 11196 }, { "epoch": 0.7789488330028871, "grad_norm": 1.0859375, "learning_rate": 0.00024548591969342313, "loss": 0.8021, "step": 11197 }, { "epoch": 0.7790184006400223, "grad_norm": 1.125, "learning_rate": 0.000245338056440777, "loss": 0.8787, "step": 11198 }, { "epoch": 0.7790879682771574, "grad_norm": 1.7265625, "learning_rate": 0.00024519023150606026, "loss": 0.7636, "step": 11199 }, { "epoch": 0.7791575359142927, "grad_norm": 1.0390625, "learning_rate": 0.0002450424448967793, "loss": 0.6921, "step": 11200 }, { "epoch": 0.7792271035514279, "grad_norm": 0.9296875, "learning_rate": 0.0002448946966204374, "loss": 0.548, "step": 11201 }, { "epoch": 0.779296671188563, "grad_norm": 1.2890625, "learning_rate": 0.0002447469866845371, "loss": 1.0118, "step": 11202 }, { "epoch": 0.7793662388256983, "grad_norm": 1.109375, "learning_rate": 0.00024459931509657776, "loss": 0.8319, "step": 11203 }, { "epoch": 0.7794358064628335, "grad_norm": 1.1171875, "learning_rate": 0.00024445168186405797, "loss": 0.794, "step": 11204 }, { "epoch": 0.7795053740999687, "grad_norm": 0.95703125, "learning_rate": 0.00024430408699447324, "loss": 0.6468, "step": 11205 }, { "epoch": 0.779574941737104, "grad_norm": 1.0625, "learning_rate": 0.00024415653049531807, "loss": 0.6303, "step": 11206 }, { "epoch": 0.7796445093742391, "grad_norm": 1.2421875, "learning_rate": 0.0002440090123740848, "loss": 0.7152, "step": 11207 }, { "epoch": 0.7797140770113743, "grad_norm": 1.1484375, "learning_rate": 0.00024386153263826339, "loss": 0.8969, "step": 11208 }, { "epoch": 0.7797836446485095, "grad_norm": 1.234375, "learning_rate": 0.00024371409129534205, "loss": 0.9184, "step": 11209 }, { "epoch": 0.7798532122856447, "grad_norm": 1.0546875, "learning_rate": 0.0002435666883528067, "loss": 0.9784, "step": 11210 }, { "epoch": 0.7799227799227799, "grad_norm": 0.9453125, "learning_rate": 0.0002434193238181428, "loss": 0.7509, "step": 11211 }, { "epoch": 0.7799923475599151, "grad_norm": 1.265625, "learning_rate": 0.00024327199769883222, "loss": 0.6518, "step": 11212 }, { "epoch": 0.7800619151970504, "grad_norm": 1.25, "learning_rate": 0.00024312471000235503, "loss": 0.8756, "step": 11213 }, { "epoch": 0.7801314828341855, "grad_norm": 0.98046875, "learning_rate": 0.00024297746073619043, "loss": 1.0232, "step": 11214 }, { "epoch": 0.7802010504713207, "grad_norm": 1.09375, "learning_rate": 0.00024283024990781444, "loss": 0.7334, "step": 11215 }, { "epoch": 0.780270618108456, "grad_norm": 1.2109375, "learning_rate": 0.0002426830775247022, "loss": 0.8859, "step": 11216 }, { "epoch": 0.7803401857455912, "grad_norm": 1.203125, "learning_rate": 0.00024253594359432585, "loss": 0.7879, "step": 11217 }, { "epoch": 0.7804097533827263, "grad_norm": 1.4609375, "learning_rate": 0.00024238884812415674, "loss": 0.9066, "step": 11218 }, { "epoch": 0.7804793210198615, "grad_norm": 1.1015625, "learning_rate": 0.0002422417911216629, "loss": 0.7788, "step": 11219 }, { "epoch": 0.7805488886569968, "grad_norm": 0.9453125, "learning_rate": 0.00024209477259431157, "loss": 0.5782, "step": 11220 }, { "epoch": 0.780618456294132, "grad_norm": 0.97265625, "learning_rate": 0.00024194779254956778, "loss": 0.6061, "step": 11221 }, { "epoch": 0.7806880239312671, "grad_norm": 1.5625, "learning_rate": 0.00024180085099489423, "loss": 1.0929, "step": 11222 }, { "epoch": 0.7807575915684024, "grad_norm": 0.85546875, "learning_rate": 0.00024165394793775196, "loss": 0.5921, "step": 11223 }, { "epoch": 0.7808271592055376, "grad_norm": 1.1953125, "learning_rate": 0.00024150708338559922, "loss": 0.6992, "step": 11224 }, { "epoch": 0.7808967268426728, "grad_norm": 1.0, "learning_rate": 0.00024136025734589428, "loss": 0.7722, "step": 11225 }, { "epoch": 0.780966294479808, "grad_norm": 1.1015625, "learning_rate": 0.0002412134698260916, "loss": 0.8283, "step": 11226 }, { "epoch": 0.7810358621169432, "grad_norm": 1.109375, "learning_rate": 0.00024106672083364412, "loss": 0.8134, "step": 11227 }, { "epoch": 0.7811054297540784, "grad_norm": 1.5546875, "learning_rate": 0.00024092001037600354, "loss": 0.8106, "step": 11228 }, { "epoch": 0.7811749973912137, "grad_norm": 1.1875, "learning_rate": 0.00024077333846061856, "loss": 0.5998, "step": 11229 }, { "epoch": 0.7812445650283488, "grad_norm": 1.15625, "learning_rate": 0.0002406267050949369, "loss": 0.7085, "step": 11230 }, { "epoch": 0.781314132665484, "grad_norm": 1.203125, "learning_rate": 0.00024048011028640328, "loss": 0.9104, "step": 11231 }, { "epoch": 0.7813837003026192, "grad_norm": 1.15625, "learning_rate": 0.00024033355404246172, "loss": 0.8226, "step": 11232 }, { "epoch": 0.7814532679397544, "grad_norm": 1.2265625, "learning_rate": 0.00024018703637055305, "loss": 0.809, "step": 11233 }, { "epoch": 0.7815228355768896, "grad_norm": 1.0078125, "learning_rate": 0.0002400405572781168, "loss": 0.7225, "step": 11234 }, { "epoch": 0.7815924032140248, "grad_norm": 0.9921875, "learning_rate": 0.000239894116772591, "loss": 0.7899, "step": 11235 }, { "epoch": 0.7816619708511601, "grad_norm": 1.1328125, "learning_rate": 0.00023974771486141066, "loss": 0.8799, "step": 11236 }, { "epoch": 0.7817315384882952, "grad_norm": 1.1015625, "learning_rate": 0.00023960135155200914, "loss": 0.8214, "step": 11237 }, { "epoch": 0.7818011061254304, "grad_norm": 1.09375, "learning_rate": 0.0002394550268518183, "loss": 0.7401, "step": 11238 }, { "epoch": 0.7818706737625657, "grad_norm": 1.234375, "learning_rate": 0.00023930874076826802, "loss": 0.823, "step": 11239 }, { "epoch": 0.7819402413997009, "grad_norm": 0.984375, "learning_rate": 0.00023916249330878581, "loss": 0.5478, "step": 11240 }, { "epoch": 0.782009809036836, "grad_norm": 1.3671875, "learning_rate": 0.00023901628448079693, "loss": 0.804, "step": 11241 }, { "epoch": 0.7820793766739713, "grad_norm": 0.9921875, "learning_rate": 0.00023887011429172568, "loss": 0.8168, "step": 11242 }, { "epoch": 0.7821489443111065, "grad_norm": 1.15625, "learning_rate": 0.00023872398274899344, "loss": 0.8006, "step": 11243 }, { "epoch": 0.7822185119482417, "grad_norm": 0.83984375, "learning_rate": 0.0002385778898600206, "loss": 0.5458, "step": 11244 }, { "epoch": 0.7822880795853768, "grad_norm": 0.96484375, "learning_rate": 0.00023843183563222425, "loss": 0.9043, "step": 11245 }, { "epoch": 0.7823576472225121, "grad_norm": 1.015625, "learning_rate": 0.00023828582007302102, "loss": 0.9319, "step": 11246 }, { "epoch": 0.7824272148596473, "grad_norm": 0.9296875, "learning_rate": 0.00023813984318982428, "loss": 0.6755, "step": 11247 }, { "epoch": 0.7824967824967825, "grad_norm": 1.09375, "learning_rate": 0.00023799390499004626, "loss": 0.7779, "step": 11248 }, { "epoch": 0.7825663501339177, "grad_norm": 1.1171875, "learning_rate": 0.0002378480054810972, "loss": 0.7601, "step": 11249 }, { "epoch": 0.7826359177710529, "grad_norm": 1.0, "learning_rate": 0.00023770214467038487, "loss": 0.6702, "step": 11250 }, { "epoch": 0.7827054854081881, "grad_norm": 0.94921875, "learning_rate": 0.00023755632256531513, "loss": 0.6623, "step": 11251 }, { "epoch": 0.7827750530453234, "grad_norm": 1.203125, "learning_rate": 0.00023741053917329224, "loss": 0.868, "step": 11252 }, { "epoch": 0.7828446206824585, "grad_norm": 1.0234375, "learning_rate": 0.00023726479450171878, "loss": 0.6187, "step": 11253 }, { "epoch": 0.7829141883195937, "grad_norm": 1.109375, "learning_rate": 0.0002371190885579946, "loss": 1.0516, "step": 11254 }, { "epoch": 0.782983755956729, "grad_norm": 1.4296875, "learning_rate": 0.0002369734213495176, "loss": 1.0133, "step": 11255 }, { "epoch": 0.7830533235938641, "grad_norm": 1.109375, "learning_rate": 0.00023682779288368438, "loss": 0.8297, "step": 11256 }, { "epoch": 0.7831228912309993, "grad_norm": 0.80859375, "learning_rate": 0.00023668220316788935, "loss": 0.5686, "step": 11257 }, { "epoch": 0.7831924588681345, "grad_norm": 1.0859375, "learning_rate": 0.0002365366522095247, "loss": 0.82, "step": 11258 }, { "epoch": 0.7832620265052698, "grad_norm": 1.03125, "learning_rate": 0.00023639114001598038, "loss": 0.6162, "step": 11259 }, { "epoch": 0.7833315941424049, "grad_norm": 1.234375, "learning_rate": 0.00023624566659464542, "loss": 0.7459, "step": 11260 }, { "epoch": 0.7834011617795401, "grad_norm": 1.109375, "learning_rate": 0.00023610023195290563, "loss": 0.6846, "step": 11261 }, { "epoch": 0.7834707294166754, "grad_norm": 1.515625, "learning_rate": 0.0002359548360981457, "loss": 0.9254, "step": 11262 }, { "epoch": 0.7835402970538106, "grad_norm": 1.1171875, "learning_rate": 0.0002358094790377484, "loss": 0.8209, "step": 11263 }, { "epoch": 0.7836098646909457, "grad_norm": 1.46875, "learning_rate": 0.0002356641607790939, "loss": 1.0074, "step": 11264 }, { "epoch": 0.783679432328081, "grad_norm": 1.1640625, "learning_rate": 0.00023551888132956056, "loss": 0.7177, "step": 11265 }, { "epoch": 0.7837489999652162, "grad_norm": 0.76953125, "learning_rate": 0.00023537364069652511, "loss": 0.4447, "step": 11266 }, { "epoch": 0.7838185676023514, "grad_norm": 0.953125, "learning_rate": 0.00023522843888736257, "loss": 0.7159, "step": 11267 }, { "epoch": 0.7838881352394866, "grad_norm": 1.046875, "learning_rate": 0.000235083275909445, "loss": 0.7927, "step": 11268 }, { "epoch": 0.7839577028766218, "grad_norm": 1.0859375, "learning_rate": 0.0002349381517701431, "loss": 0.7813, "step": 11269 }, { "epoch": 0.784027270513757, "grad_norm": 1.3046875, "learning_rate": 0.00023479306647682552, "loss": 0.8981, "step": 11270 }, { "epoch": 0.7840968381508922, "grad_norm": 1.34375, "learning_rate": 0.00023464802003685947, "loss": 0.9437, "step": 11271 }, { "epoch": 0.7841664057880274, "grad_norm": 0.9765625, "learning_rate": 0.0002345030124576093, "loss": 0.7387, "step": 11272 }, { "epoch": 0.7842359734251626, "grad_norm": 1.1640625, "learning_rate": 0.00023435804374643743, "loss": 0.9306, "step": 11273 }, { "epoch": 0.7843055410622978, "grad_norm": 0.9296875, "learning_rate": 0.00023421311391070532, "loss": 0.6672, "step": 11274 }, { "epoch": 0.784375108699433, "grad_norm": 1.109375, "learning_rate": 0.00023406822295777107, "loss": 1.0116, "step": 11275 }, { "epoch": 0.7844446763365682, "grad_norm": 1.3203125, "learning_rate": 0.00023392337089499194, "loss": 0.7879, "step": 11276 }, { "epoch": 0.7845142439737034, "grad_norm": 1.171875, "learning_rate": 0.000233778557729723, "loss": 0.7684, "step": 11277 }, { "epoch": 0.7845838116108387, "grad_norm": 1.21875, "learning_rate": 0.00023363378346931684, "loss": 0.7108, "step": 11278 }, { "epoch": 0.7846533792479738, "grad_norm": 1.1796875, "learning_rate": 0.00023348904812112403, "loss": 0.7429, "step": 11279 }, { "epoch": 0.784722946885109, "grad_norm": 1.265625, "learning_rate": 0.00023334435169249402, "loss": 0.7404, "step": 11280 }, { "epoch": 0.7847925145222443, "grad_norm": 1.34375, "learning_rate": 0.0002331996941907738, "loss": 0.842, "step": 11281 }, { "epoch": 0.7848620821593795, "grad_norm": 1.0859375, "learning_rate": 0.00023305507562330807, "loss": 0.8211, "step": 11282 }, { "epoch": 0.7849316497965146, "grad_norm": 1.375, "learning_rate": 0.00023291049599743975, "loss": 0.8249, "step": 11283 }, { "epoch": 0.7850012174336498, "grad_norm": 0.98828125, "learning_rate": 0.0002327659553205099, "loss": 0.6402, "step": 11284 }, { "epoch": 0.7850707850707851, "grad_norm": 1.5234375, "learning_rate": 0.00023262145359985808, "loss": 0.6879, "step": 11285 }, { "epoch": 0.7851403527079203, "grad_norm": 1.125, "learning_rate": 0.00023247699084282092, "loss": 0.69, "step": 11286 }, { "epoch": 0.7852099203450554, "grad_norm": 0.984375, "learning_rate": 0.00023233256705673333, "loss": 0.6935, "step": 11287 }, { "epoch": 0.7852794879821907, "grad_norm": 1.0078125, "learning_rate": 0.00023218818224892868, "loss": 0.6691, "step": 11288 }, { "epoch": 0.7853490556193259, "grad_norm": 0.95703125, "learning_rate": 0.0002320438364267383, "loss": 0.9906, "step": 11289 }, { "epoch": 0.7854186232564611, "grad_norm": 1.078125, "learning_rate": 0.00023189952959749106, "loss": 0.7326, "step": 11290 }, { "epoch": 0.7854881908935963, "grad_norm": 1.15625, "learning_rate": 0.00023175526176851403, "loss": 0.7043, "step": 11291 }, { "epoch": 0.7855577585307315, "grad_norm": 1.0546875, "learning_rate": 0.00023161103294713282, "loss": 0.7164, "step": 11292 }, { "epoch": 0.7856273261678667, "grad_norm": 1.0234375, "learning_rate": 0.00023146684314067002, "loss": 1.0191, "step": 11293 }, { "epoch": 0.785696893805002, "grad_norm": 1.234375, "learning_rate": 0.00023132269235644733, "loss": 0.9472, "step": 11294 }, { "epoch": 0.7857664614421371, "grad_norm": 0.99609375, "learning_rate": 0.0002311785806017842, "loss": 0.6708, "step": 11295 }, { "epoch": 0.7858360290792723, "grad_norm": 1.1328125, "learning_rate": 0.0002310345078839975, "loss": 0.707, "step": 11296 }, { "epoch": 0.7859055967164075, "grad_norm": 0.8984375, "learning_rate": 0.00023089047421040243, "loss": 0.5383, "step": 11297 }, { "epoch": 0.7859751643535428, "grad_norm": 1.296875, "learning_rate": 0.0002307464795883124, "loss": 0.7086, "step": 11298 }, { "epoch": 0.7860447319906779, "grad_norm": 1.03125, "learning_rate": 0.00023060252402503913, "loss": 0.6749, "step": 11299 }, { "epoch": 0.7861142996278131, "grad_norm": 1.1640625, "learning_rate": 0.0002304586075278916, "loss": 0.8607, "step": 11300 }, { "epoch": 0.7861838672649484, "grad_norm": 1.125, "learning_rate": 0.00023031473010417703, "loss": 0.8593, "step": 11301 }, { "epoch": 0.7862534349020835, "grad_norm": 0.8828125, "learning_rate": 0.00023017089176120088, "loss": 0.6794, "step": 11302 }, { "epoch": 0.7863230025392187, "grad_norm": 1.3828125, "learning_rate": 0.00023002709250626686, "loss": 0.9, "step": 11303 }, { "epoch": 0.786392570176354, "grad_norm": 1.3828125, "learning_rate": 0.00022988333234667626, "loss": 0.8108, "step": 11304 }, { "epoch": 0.7864621378134892, "grad_norm": 1.34375, "learning_rate": 0.00022973961128972797, "loss": 0.6458, "step": 11305 }, { "epoch": 0.7865317054506243, "grad_norm": 1.046875, "learning_rate": 0.0002295959293427201, "loss": 0.7002, "step": 11306 }, { "epoch": 0.7866012730877596, "grad_norm": 1.046875, "learning_rate": 0.0002294522865129476, "loss": 0.8288, "step": 11307 }, { "epoch": 0.7866708407248948, "grad_norm": 1.140625, "learning_rate": 0.00022930868280770413, "loss": 0.841, "step": 11308 }, { "epoch": 0.78674040836203, "grad_norm": 0.99609375, "learning_rate": 0.00022916511823428142, "loss": 0.6988, "step": 11309 }, { "epoch": 0.7868099759991651, "grad_norm": 0.93359375, "learning_rate": 0.00022902159279996871, "loss": 0.6887, "step": 11310 }, { "epoch": 0.7868795436363004, "grad_norm": 1.09375, "learning_rate": 0.00022887810651205331, "loss": 0.9695, "step": 11311 }, { "epoch": 0.7869491112734356, "grad_norm": 1.2109375, "learning_rate": 0.00022873465937782079, "loss": 0.7344, "step": 11312 }, { "epoch": 0.7870186789105708, "grad_norm": 1.09375, "learning_rate": 0.00022859125140455515, "loss": 0.9219, "step": 11313 }, { "epoch": 0.787088246547706, "grad_norm": 1.25, "learning_rate": 0.00022844788259953765, "loss": 0.923, "step": 11314 }, { "epoch": 0.7871578141848412, "grad_norm": 1.2578125, "learning_rate": 0.00022830455297004738, "loss": 0.7885, "step": 11315 }, { "epoch": 0.7872273818219764, "grad_norm": 1.1953125, "learning_rate": 0.00022816126252336223, "loss": 0.9752, "step": 11316 }, { "epoch": 0.7872969494591117, "grad_norm": 0.8828125, "learning_rate": 0.00022801801126675814, "loss": 0.7138, "step": 11317 }, { "epoch": 0.7873665170962468, "grad_norm": 1.1796875, "learning_rate": 0.00022787479920750842, "loss": 0.9382, "step": 11318 }, { "epoch": 0.787436084733382, "grad_norm": 1.0390625, "learning_rate": 0.00022773162635288425, "loss": 0.7596, "step": 11319 }, { "epoch": 0.7875056523705173, "grad_norm": 1.1015625, "learning_rate": 0.0002275884927101557, "loss": 0.6886, "step": 11320 }, { "epoch": 0.7875752200076525, "grad_norm": 1.234375, "learning_rate": 0.0002274453982865904, "loss": 0.6172, "step": 11321 }, { "epoch": 0.7876447876447876, "grad_norm": 1.09375, "learning_rate": 0.00022730234308945352, "loss": 0.5757, "step": 11322 }, { "epoch": 0.7877143552819228, "grad_norm": 1.015625, "learning_rate": 0.00022715932712600928, "loss": 0.8766, "step": 11323 }, { "epoch": 0.7877839229190581, "grad_norm": 1.296875, "learning_rate": 0.00022701635040351897, "loss": 0.6226, "step": 11324 }, { "epoch": 0.7878534905561932, "grad_norm": 0.8984375, "learning_rate": 0.00022687341292924212, "loss": 0.5474, "step": 11325 }, { "epoch": 0.7879230581933284, "grad_norm": 1.3515625, "learning_rate": 0.00022673051471043637, "loss": 0.6295, "step": 11326 }, { "epoch": 0.7879926258304637, "grad_norm": 1.1640625, "learning_rate": 0.00022658765575435792, "loss": 0.7875, "step": 11327 }, { "epoch": 0.7880621934675989, "grad_norm": 1.1484375, "learning_rate": 0.00022644483606825994, "loss": 0.7761, "step": 11328 }, { "epoch": 0.788131761104734, "grad_norm": 1.203125, "learning_rate": 0.00022630205565939387, "loss": 0.8934, "step": 11329 }, { "epoch": 0.7882013287418693, "grad_norm": 1.078125, "learning_rate": 0.00022615931453500972, "loss": 0.9277, "step": 11330 }, { "epoch": 0.7882708963790045, "grad_norm": 1.1171875, "learning_rate": 0.0002260166127023554, "loss": 0.5723, "step": 11331 }, { "epoch": 0.7883404640161397, "grad_norm": 1.0703125, "learning_rate": 0.0002258739501686763, "loss": 0.7372, "step": 11332 }, { "epoch": 0.788410031653275, "grad_norm": 1.1328125, "learning_rate": 0.0002257313269412159, "loss": 0.7885, "step": 11333 }, { "epoch": 0.7884795992904101, "grad_norm": 1.2109375, "learning_rate": 0.0002255887430272161, "loss": 0.7335, "step": 11334 }, { "epoch": 0.7885491669275453, "grad_norm": 1.390625, "learning_rate": 0.0002254461984339169, "loss": 1.1112, "step": 11335 }, { "epoch": 0.7886187345646805, "grad_norm": 0.98828125, "learning_rate": 0.00022530369316855537, "loss": 0.7953, "step": 11336 }, { "epoch": 0.7886883022018157, "grad_norm": 1.140625, "learning_rate": 0.00022516122723836786, "loss": 0.5744, "step": 11337 }, { "epoch": 0.7887578698389509, "grad_norm": 1.046875, "learning_rate": 0.00022501880065058777, "loss": 0.859, "step": 11338 }, { "epoch": 0.7888274374760861, "grad_norm": 0.8984375, "learning_rate": 0.00022487641341244647, "loss": 0.7815, "step": 11339 }, { "epoch": 0.7888970051132214, "grad_norm": 1.0859375, "learning_rate": 0.00022473406553117403, "loss": 0.7874, "step": 11340 }, { "epoch": 0.7889665727503565, "grad_norm": 0.95703125, "learning_rate": 0.00022459175701399837, "loss": 0.8343, "step": 11341 }, { "epoch": 0.7890361403874917, "grad_norm": 1.0, "learning_rate": 0.00022444948786814502, "loss": 0.7663, "step": 11342 }, { "epoch": 0.789105708024627, "grad_norm": 1.203125, "learning_rate": 0.00022430725810083718, "loss": 0.7282, "step": 11343 }, { "epoch": 0.7891752756617622, "grad_norm": 1.1640625, "learning_rate": 0.00022416506771929712, "loss": 0.7486, "step": 11344 }, { "epoch": 0.7892448432988973, "grad_norm": 1.203125, "learning_rate": 0.0002240229167307446, "loss": 0.8693, "step": 11345 }, { "epoch": 0.7893144109360326, "grad_norm": 0.8984375, "learning_rate": 0.00022388080514239718, "loss": 0.8566, "step": 11346 }, { "epoch": 0.7893839785731678, "grad_norm": 1.140625, "learning_rate": 0.0002237387329614703, "loss": 0.5645, "step": 11347 }, { "epoch": 0.789453546210303, "grad_norm": 1.0703125, "learning_rate": 0.00022359670019517797, "loss": 0.8409, "step": 11348 }, { "epoch": 0.7895231138474381, "grad_norm": 1.0390625, "learning_rate": 0.000223454706850732, "loss": 0.8261, "step": 11349 }, { "epoch": 0.7895926814845734, "grad_norm": 0.75390625, "learning_rate": 0.0002233127529353417, "loss": 0.5983, "step": 11350 }, { "epoch": 0.7896622491217086, "grad_norm": 0.9453125, "learning_rate": 0.00022317083845621534, "loss": 0.8145, "step": 11351 }, { "epoch": 0.7897318167588437, "grad_norm": 1.5546875, "learning_rate": 0.00022302896342055802, "loss": 0.9216, "step": 11352 }, { "epoch": 0.789801384395979, "grad_norm": 1.046875, "learning_rate": 0.00022288712783557387, "loss": 0.7054, "step": 11353 }, { "epoch": 0.7898709520331142, "grad_norm": 0.89453125, "learning_rate": 0.00022274533170846424, "loss": 0.6841, "step": 11354 }, { "epoch": 0.7899405196702494, "grad_norm": 0.9453125, "learning_rate": 0.00022260357504642924, "loss": 0.811, "step": 11355 }, { "epoch": 0.7900100873073846, "grad_norm": 1.015625, "learning_rate": 0.0002224618578566664, "loss": 0.7281, "step": 11356 }, { "epoch": 0.7900796549445198, "grad_norm": 1.171875, "learning_rate": 0.00022232018014637102, "loss": 0.8313, "step": 11357 }, { "epoch": 0.790149222581655, "grad_norm": 1.0703125, "learning_rate": 0.0002221785419227371, "loss": 0.7241, "step": 11358 }, { "epoch": 0.7902187902187903, "grad_norm": 0.984375, "learning_rate": 0.00022203694319295665, "loss": 0.9019, "step": 11359 }, { "epoch": 0.7902883578559254, "grad_norm": 1.0625, "learning_rate": 0.00022189538396421893, "loss": 0.867, "step": 11360 }, { "epoch": 0.7903579254930606, "grad_norm": 1.1015625, "learning_rate": 0.00022175386424371136, "loss": 0.8799, "step": 11361 }, { "epoch": 0.7904274931301958, "grad_norm": 1.15625, "learning_rate": 0.00022161238403861993, "loss": 0.991, "step": 11362 }, { "epoch": 0.7904970607673311, "grad_norm": 1.21875, "learning_rate": 0.0002214709433561286, "loss": 1.1083, "step": 11363 }, { "epoch": 0.7905666284044662, "grad_norm": 1.1875, "learning_rate": 0.00022132954220341873, "loss": 0.6552, "step": 11364 }, { "epoch": 0.7906361960416014, "grad_norm": 1.1484375, "learning_rate": 0.00022118818058766953, "loss": 0.6947, "step": 11365 }, { "epoch": 0.7907057636787367, "grad_norm": 1.2578125, "learning_rate": 0.0002210468585160591, "loss": 0.8956, "step": 11366 }, { "epoch": 0.7907753313158719, "grad_norm": 1.140625, "learning_rate": 0.0002209055759957632, "loss": 0.762, "step": 11367 }, { "epoch": 0.790844898953007, "grad_norm": 1.21875, "learning_rate": 0.00022076433303395504, "loss": 0.9421, "step": 11368 }, { "epoch": 0.7909144665901423, "grad_norm": 0.9609375, "learning_rate": 0.00022062312963780663, "loss": 0.5986, "step": 11369 }, { "epoch": 0.7909840342272775, "grad_norm": 0.97265625, "learning_rate": 0.00022048196581448732, "loss": 0.7251, "step": 11370 }, { "epoch": 0.7910536018644126, "grad_norm": 1.0234375, "learning_rate": 0.0002203408415711644, "loss": 0.6971, "step": 11371 }, { "epoch": 0.7911231695015479, "grad_norm": 1.453125, "learning_rate": 0.00022019975691500382, "loss": 0.9502, "step": 11372 }, { "epoch": 0.7911927371386831, "grad_norm": 0.78125, "learning_rate": 0.0002200587118531694, "loss": 0.6688, "step": 11373 }, { "epoch": 0.7912623047758183, "grad_norm": 1.0078125, "learning_rate": 0.00021991770639282238, "loss": 0.7167, "step": 11374 }, { "epoch": 0.7913318724129534, "grad_norm": 1.09375, "learning_rate": 0.00021977674054112205, "loss": 0.8944, "step": 11375 }, { "epoch": 0.7914014400500887, "grad_norm": 1.2109375, "learning_rate": 0.00021963581430522628, "loss": 0.5945, "step": 11376 }, { "epoch": 0.7914710076872239, "grad_norm": 0.91015625, "learning_rate": 0.00021949492769229073, "loss": 0.5457, "step": 11377 }, { "epoch": 0.7915405753243591, "grad_norm": 1.3359375, "learning_rate": 0.0002193540807094687, "loss": 0.9095, "step": 11378 }, { "epoch": 0.7916101429614943, "grad_norm": 1.0234375, "learning_rate": 0.0002192132733639115, "loss": 0.8057, "step": 11379 }, { "epoch": 0.7916797105986295, "grad_norm": 1.03125, "learning_rate": 0.00021907250566276882, "loss": 0.6894, "step": 11380 }, { "epoch": 0.7917492782357647, "grad_norm": 0.921875, "learning_rate": 0.0002189317776131884, "loss": 0.586, "step": 11381 }, { "epoch": 0.7918188458729, "grad_norm": 1.1484375, "learning_rate": 0.00021879108922231516, "loss": 0.8967, "step": 11382 }, { "epoch": 0.7918884135100351, "grad_norm": 1.0390625, "learning_rate": 0.000218650440497293, "loss": 0.7873, "step": 11383 }, { "epoch": 0.7919579811471703, "grad_norm": 1.15625, "learning_rate": 0.00021850983144526304, "loss": 0.8728, "step": 11384 }, { "epoch": 0.7920275487843056, "grad_norm": 1.28125, "learning_rate": 0.00021836926207336504, "loss": 0.9168, "step": 11385 }, { "epoch": 0.7920971164214408, "grad_norm": 1.3046875, "learning_rate": 0.00021822873238873597, "loss": 0.7407, "step": 11386 }, { "epoch": 0.7921666840585759, "grad_norm": 1.1171875, "learning_rate": 0.00021808824239851165, "loss": 0.9536, "step": 11387 }, { "epoch": 0.7922362516957111, "grad_norm": 1.2265625, "learning_rate": 0.0002179477921098253, "loss": 0.9262, "step": 11388 }, { "epoch": 0.7923058193328464, "grad_norm": 1.09375, "learning_rate": 0.00021780738152980795, "loss": 0.7183, "step": 11389 }, { "epoch": 0.7923753869699816, "grad_norm": 1.140625, "learning_rate": 0.00021766701066558924, "loss": 0.7785, "step": 11390 }, { "epoch": 0.7924449546071167, "grad_norm": 1.109375, "learning_rate": 0.00021752667952429673, "loss": 0.7651, "step": 11391 }, { "epoch": 0.792514522244252, "grad_norm": 1.1171875, "learning_rate": 0.00021738638811305555, "loss": 0.6941, "step": 11392 }, { "epoch": 0.7925840898813872, "grad_norm": 1.3984375, "learning_rate": 0.00021724613643898848, "loss": 0.7646, "step": 11393 }, { "epoch": 0.7926536575185223, "grad_norm": 1.2109375, "learning_rate": 0.0002171059245092174, "loss": 0.9519, "step": 11394 }, { "epoch": 0.7927232251556576, "grad_norm": 1.046875, "learning_rate": 0.00021696575233086157, "loss": 0.8139, "step": 11395 }, { "epoch": 0.7927927927927928, "grad_norm": 1.0859375, "learning_rate": 0.0002168256199110379, "loss": 1.0322, "step": 11396 }, { "epoch": 0.792862360429928, "grad_norm": 1.0390625, "learning_rate": 0.00021668552725686186, "loss": 0.8298, "step": 11397 }, { "epoch": 0.7929319280670633, "grad_norm": 1.078125, "learning_rate": 0.00021654547437544635, "loss": 0.6793, "step": 11398 }, { "epoch": 0.7930014957041984, "grad_norm": 1.5859375, "learning_rate": 0.00021640546127390302, "loss": 0.9698, "step": 11399 }, { "epoch": 0.7930710633413336, "grad_norm": 1.2578125, "learning_rate": 0.00021626548795934054, "loss": 0.8393, "step": 11400 }, { "epoch": 0.7931406309784688, "grad_norm": 0.828125, "learning_rate": 0.0002161255544388665, "loss": 0.6756, "step": 11401 }, { "epoch": 0.793210198615604, "grad_norm": 0.91796875, "learning_rate": 0.0002159856607195857, "loss": 0.4974, "step": 11402 }, { "epoch": 0.7932797662527392, "grad_norm": 1.4296875, "learning_rate": 0.00021584580680860088, "loss": 1.0398, "step": 11403 }, { "epoch": 0.7933493338898744, "grad_norm": 1.5078125, "learning_rate": 0.00021570599271301404, "loss": 0.9352, "step": 11404 }, { "epoch": 0.7934189015270097, "grad_norm": 1.234375, "learning_rate": 0.00021556621843992385, "loss": 0.6301, "step": 11405 }, { "epoch": 0.7934884691641448, "grad_norm": 0.96484375, "learning_rate": 0.00021542648399642717, "loss": 0.6083, "step": 11406 }, { "epoch": 0.79355803680128, "grad_norm": 1.2578125, "learning_rate": 0.00021528678938961888, "loss": 0.6556, "step": 11407 }, { "epoch": 0.7936276044384153, "grad_norm": 1.4140625, "learning_rate": 0.00021514713462659208, "loss": 0.9146, "step": 11408 }, { "epoch": 0.7936971720755505, "grad_norm": 1.2578125, "learning_rate": 0.0002150075197144382, "loss": 0.8798, "step": 11409 }, { "epoch": 0.7937667397126856, "grad_norm": 1.015625, "learning_rate": 0.0002148679446602455, "loss": 0.7407, "step": 11410 }, { "epoch": 0.7938363073498209, "grad_norm": 1.21875, "learning_rate": 0.0002147284094711015, "loss": 0.8417, "step": 11411 }, { "epoch": 0.7939058749869561, "grad_norm": 1.0703125, "learning_rate": 0.00021458891415409055, "loss": 0.8481, "step": 11412 }, { "epoch": 0.7939754426240913, "grad_norm": 1.234375, "learning_rate": 0.00021444945871629595, "loss": 0.724, "step": 11413 }, { "epoch": 0.7940450102612264, "grad_norm": 1.3515625, "learning_rate": 0.00021431004316479818, "loss": 1.0432, "step": 11414 }, { "epoch": 0.7941145778983617, "grad_norm": 1.0546875, "learning_rate": 0.00021417066750667658, "loss": 0.9333, "step": 11415 }, { "epoch": 0.7941841455354969, "grad_norm": 0.96875, "learning_rate": 0.00021403133174900747, "loss": 0.4688, "step": 11416 }, { "epoch": 0.794253713172632, "grad_norm": 1.1640625, "learning_rate": 0.0002138920358988653, "loss": 0.8709, "step": 11417 }, { "epoch": 0.7943232808097673, "grad_norm": 1.078125, "learning_rate": 0.00021375277996332377, "loss": 0.6321, "step": 11418 }, { "epoch": 0.7943928484469025, "grad_norm": 1.4296875, "learning_rate": 0.00021361356394945308, "loss": 1.1456, "step": 11419 }, { "epoch": 0.7944624160840377, "grad_norm": 1.1328125, "learning_rate": 0.00021347438786432205, "loss": 0.6778, "step": 11420 }, { "epoch": 0.794531983721173, "grad_norm": 1.53125, "learning_rate": 0.0002133352517149968, "loss": 0.7393, "step": 11421 }, { "epoch": 0.7946015513583081, "grad_norm": 1.1015625, "learning_rate": 0.00021319615550854243, "loss": 0.5918, "step": 11422 }, { "epoch": 0.7946711189954433, "grad_norm": 0.8984375, "learning_rate": 0.0002130570992520219, "loss": 0.5849, "step": 11423 }, { "epoch": 0.7947406866325786, "grad_norm": 1.4609375, "learning_rate": 0.00021291808295249493, "loss": 0.7043, "step": 11424 }, { "epoch": 0.7948102542697137, "grad_norm": 0.98046875, "learning_rate": 0.0002127791066170208, "loss": 0.5975, "step": 11425 }, { "epoch": 0.7948798219068489, "grad_norm": 0.93359375, "learning_rate": 0.00021264017025265558, "loss": 0.5729, "step": 11426 }, { "epoch": 0.7949493895439841, "grad_norm": 1.0625, "learning_rate": 0.00021250127386645412, "loss": 0.7633, "step": 11427 }, { "epoch": 0.7950189571811194, "grad_norm": 1.046875, "learning_rate": 0.00021236241746546848, "loss": 0.8501, "step": 11428 }, { "epoch": 0.7950885248182545, "grad_norm": 1.09375, "learning_rate": 0.00021222360105674953, "loss": 0.7237, "step": 11429 }, { "epoch": 0.7951580924553897, "grad_norm": 0.94140625, "learning_rate": 0.00021208482464734525, "loss": 0.7258, "step": 11430 }, { "epoch": 0.795227660092525, "grad_norm": 1.4453125, "learning_rate": 0.00021194608824430205, "loss": 0.781, "step": 11431 }, { "epoch": 0.7952972277296602, "grad_norm": 1.2734375, "learning_rate": 0.00021180739185466468, "loss": 1.0382, "step": 11432 }, { "epoch": 0.7953667953667953, "grad_norm": 1.0859375, "learning_rate": 0.00021166873548547526, "loss": 0.7659, "step": 11433 }, { "epoch": 0.7954363630039306, "grad_norm": 1.140625, "learning_rate": 0.00021153011914377395, "loss": 0.8325, "step": 11434 }, { "epoch": 0.7955059306410658, "grad_norm": 1.1796875, "learning_rate": 0.00021139154283659846, "loss": 0.8347, "step": 11435 }, { "epoch": 0.795575498278201, "grad_norm": 1.0703125, "learning_rate": 0.000211253006570986, "loss": 0.7483, "step": 11436 }, { "epoch": 0.7956450659153362, "grad_norm": 1.2890625, "learning_rate": 0.00021111451035397033, "loss": 0.7951, "step": 11437 }, { "epoch": 0.7957146335524714, "grad_norm": 0.98046875, "learning_rate": 0.0002109760541925836, "loss": 0.8256, "step": 11438 }, { "epoch": 0.7957842011896066, "grad_norm": 3.0, "learning_rate": 0.0002108376380938556, "loss": 0.8697, "step": 11439 }, { "epoch": 0.7958537688267417, "grad_norm": 1.4609375, "learning_rate": 0.0002106992620648146, "loss": 1.0132, "step": 11440 }, { "epoch": 0.795923336463877, "grad_norm": 1.3203125, "learning_rate": 0.000210560926112487, "loss": 0.8459, "step": 11441 }, { "epoch": 0.7959929041010122, "grad_norm": 1.15625, "learning_rate": 0.00021042263024389617, "loss": 0.6435, "step": 11442 }, { "epoch": 0.7960624717381474, "grad_norm": 1.2890625, "learning_rate": 0.00021028437446606475, "loss": 0.9039, "step": 11443 }, { "epoch": 0.7961320393752827, "grad_norm": 0.94140625, "learning_rate": 0.00021014615878601207, "loss": 0.7823, "step": 11444 }, { "epoch": 0.7962016070124178, "grad_norm": 1.15625, "learning_rate": 0.00021000798321075653, "loss": 0.7463, "step": 11445 }, { "epoch": 0.796271174649553, "grad_norm": 0.97265625, "learning_rate": 0.00020986984774731354, "loss": 0.6783, "step": 11446 }, { "epoch": 0.7963407422866883, "grad_norm": 0.93359375, "learning_rate": 0.00020973175240269739, "loss": 0.7749, "step": 11447 }, { "epoch": 0.7964103099238234, "grad_norm": 1.0546875, "learning_rate": 0.0002095936971839195, "loss": 0.691, "step": 11448 }, { "epoch": 0.7964798775609586, "grad_norm": 1.453125, "learning_rate": 0.00020945568209798928, "loss": 0.886, "step": 11449 }, { "epoch": 0.7965494451980939, "grad_norm": 1.203125, "learning_rate": 0.00020931770715191533, "loss": 1.0378, "step": 11450 }, { "epoch": 0.7966190128352291, "grad_norm": 0.9765625, "learning_rate": 0.00020917977235270302, "loss": 0.8532, "step": 11451 }, { "epoch": 0.7966885804723642, "grad_norm": 1.203125, "learning_rate": 0.00020904187770735572, "loss": 0.838, "step": 11452 }, { "epoch": 0.7967581481094994, "grad_norm": 1.3125, "learning_rate": 0.00020890402322287495, "loss": 0.6772, "step": 11453 }, { "epoch": 0.7968277157466347, "grad_norm": 1.1171875, "learning_rate": 0.00020876620890626041, "loss": 0.8102, "step": 11454 }, { "epoch": 0.7968972833837699, "grad_norm": 1.0703125, "learning_rate": 0.00020862843476451, "loss": 0.8577, "step": 11455 }, { "epoch": 0.796966851020905, "grad_norm": 1.125, "learning_rate": 0.00020849070080461852, "loss": 0.9275, "step": 11456 }, { "epoch": 0.7970364186580403, "grad_norm": 1.28125, "learning_rate": 0.00020835300703358006, "loss": 0.7921, "step": 11457 }, { "epoch": 0.7971059862951755, "grad_norm": 1.0703125, "learning_rate": 0.00020821535345838537, "loss": 1.0517, "step": 11458 }, { "epoch": 0.7971755539323107, "grad_norm": 1.0625, "learning_rate": 0.00020807774008602454, "loss": 0.6087, "step": 11459 }, { "epoch": 0.7972451215694459, "grad_norm": 1.0859375, "learning_rate": 0.00020794016692348417, "loss": 0.8276, "step": 11460 }, { "epoch": 0.7973146892065811, "grad_norm": 0.86328125, "learning_rate": 0.00020780263397775, "loss": 0.7144, "step": 11461 }, { "epoch": 0.7973842568437163, "grad_norm": 0.953125, "learning_rate": 0.00020766514125580493, "loss": 0.6881, "step": 11462 }, { "epoch": 0.7974538244808516, "grad_norm": 1.0703125, "learning_rate": 0.00020752768876463034, "loss": 0.8766, "step": 11463 }, { "epoch": 0.7975233921179867, "grad_norm": 0.9453125, "learning_rate": 0.00020739027651120567, "loss": 0.6938, "step": 11464 }, { "epoch": 0.7975929597551219, "grad_norm": 0.98046875, "learning_rate": 0.00020725290450250767, "loss": 0.7493, "step": 11465 }, { "epoch": 0.7976625273922571, "grad_norm": 1.2578125, "learning_rate": 0.0002071155727455114, "loss": 0.8856, "step": 11466 }, { "epoch": 0.7977320950293924, "grad_norm": 1.2734375, "learning_rate": 0.00020697828124718965, "loss": 0.8574, "step": 11467 }, { "epoch": 0.7978016626665275, "grad_norm": 1.25, "learning_rate": 0.0002068410300145136, "loss": 0.9403, "step": 11468 }, { "epoch": 0.7978712303036627, "grad_norm": 0.9921875, "learning_rate": 0.00020670381905445257, "loss": 0.9696, "step": 11469 }, { "epoch": 0.797940797940798, "grad_norm": 1.2890625, "learning_rate": 0.00020656664837397288, "loss": 0.8581, "step": 11470 }, { "epoch": 0.7980103655779331, "grad_norm": 1.53125, "learning_rate": 0.00020642951798003972, "loss": 0.88, "step": 11471 }, { "epoch": 0.7980799332150683, "grad_norm": 1.1171875, "learning_rate": 0.00020629242787961556, "loss": 0.8249, "step": 11472 }, { "epoch": 0.7981495008522036, "grad_norm": 1.1796875, "learning_rate": 0.00020615537807966167, "loss": 0.8592, "step": 11473 }, { "epoch": 0.7982190684893388, "grad_norm": 1.015625, "learning_rate": 0.00020601836858713597, "loss": 0.7575, "step": 11474 }, { "epoch": 0.7982886361264739, "grad_norm": 1.125, "learning_rate": 0.00020588139940899597, "loss": 0.5698, "step": 11475 }, { "epoch": 0.7983582037636092, "grad_norm": 1.125, "learning_rate": 0.00020574447055219546, "loss": 0.9127, "step": 11476 }, { "epoch": 0.7984277714007444, "grad_norm": 1.0859375, "learning_rate": 0.00020560758202368745, "loss": 0.7286, "step": 11477 }, { "epoch": 0.7984973390378796, "grad_norm": 1.1328125, "learning_rate": 0.0002054707338304227, "loss": 0.9412, "step": 11478 }, { "epoch": 0.7985669066750147, "grad_norm": 1.3125, "learning_rate": 0.0002053339259793493, "loss": 0.7786, "step": 11479 }, { "epoch": 0.79863647431215, "grad_norm": 1.0703125, "learning_rate": 0.0002051971584774137, "loss": 0.6275, "step": 11480 }, { "epoch": 0.7987060419492852, "grad_norm": 0.79296875, "learning_rate": 0.00020506043133155982, "loss": 0.801, "step": 11481 }, { "epoch": 0.7987756095864204, "grad_norm": 1.09375, "learning_rate": 0.00020492374454873097, "loss": 0.8623, "step": 11482 }, { "epoch": 0.7988451772235556, "grad_norm": 1.140625, "learning_rate": 0.00020478709813586692, "loss": 0.7663, "step": 11483 }, { "epoch": 0.7989147448606908, "grad_norm": 1.796875, "learning_rate": 0.0002046504920999056, "loss": 1.0197, "step": 11484 }, { "epoch": 0.798984312497826, "grad_norm": 0.953125, "learning_rate": 0.00020451392644778356, "loss": 0.7304, "step": 11485 }, { "epoch": 0.7990538801349613, "grad_norm": 0.97265625, "learning_rate": 0.00020437740118643466, "loss": 0.6665, "step": 11486 }, { "epoch": 0.7991234477720964, "grad_norm": 1.0859375, "learning_rate": 0.00020424091632279128, "loss": 0.8603, "step": 11487 }, { "epoch": 0.7991930154092316, "grad_norm": 1.3671875, "learning_rate": 0.000204104471863783, "loss": 1.0289, "step": 11488 }, { "epoch": 0.7992625830463669, "grad_norm": 1.2421875, "learning_rate": 0.00020396806781633836, "loss": 0.8675, "step": 11489 }, { "epoch": 0.799332150683502, "grad_norm": 1.125, "learning_rate": 0.0002038317041873826, "loss": 0.7787, "step": 11490 }, { "epoch": 0.7994017183206372, "grad_norm": 1.6875, "learning_rate": 0.00020369538098383987, "loss": 0.9907, "step": 11491 }, { "epoch": 0.7994712859577724, "grad_norm": 1.0234375, "learning_rate": 0.0002035590982126324, "loss": 0.8315, "step": 11492 }, { "epoch": 0.7995408535949077, "grad_norm": 1.21875, "learning_rate": 0.00020342285588067954, "loss": 0.6062, "step": 11493 }, { "epoch": 0.7996104212320428, "grad_norm": 1.1875, "learning_rate": 0.00020328665399489866, "loss": 0.7618, "step": 11494 }, { "epoch": 0.799679988869178, "grad_norm": 1.1796875, "learning_rate": 0.00020315049256220584, "loss": 0.9091, "step": 11495 }, { "epoch": 0.7997495565063133, "grad_norm": 1.125, "learning_rate": 0.00020301437158951486, "loss": 0.9664, "step": 11496 }, { "epoch": 0.7998191241434485, "grad_norm": 1.109375, "learning_rate": 0.0002028782910837369, "loss": 0.9051, "step": 11497 }, { "epoch": 0.7998886917805836, "grad_norm": 1.2421875, "learning_rate": 0.00020274225105178134, "loss": 0.9678, "step": 11498 }, { "epoch": 0.7999582594177189, "grad_norm": 1.25, "learning_rate": 0.00020260625150055612, "loss": 0.7862, "step": 11499 }, { "epoch": 0.8000278270548541, "grad_norm": 1.1484375, "learning_rate": 0.0002024702924369659, "loss": 0.8813, "step": 11500 }, { "epoch": 0.8000973946919893, "grad_norm": 1.2109375, "learning_rate": 0.00020233437386791463, "loss": 0.6225, "step": 11501 }, { "epoch": 0.8001669623291245, "grad_norm": 1.2109375, "learning_rate": 0.00020219849580030313, "loss": 0.7075, "step": 11502 }, { "epoch": 0.8002365299662597, "grad_norm": 0.859375, "learning_rate": 0.0002020626582410311, "loss": 0.6389, "step": 11503 }, { "epoch": 0.8003060976033949, "grad_norm": 1.03125, "learning_rate": 0.000201926861196995, "loss": 0.8935, "step": 11504 }, { "epoch": 0.80037566524053, "grad_norm": 1.1171875, "learning_rate": 0.00020179110467509042, "loss": 0.8508, "step": 11505 }, { "epoch": 0.8004452328776653, "grad_norm": 1.0625, "learning_rate": 0.00020165538868221046, "loss": 0.8624, "step": 11506 }, { "epoch": 0.8005148005148005, "grad_norm": 1.09375, "learning_rate": 0.00020151971322524597, "loss": 0.7857, "step": 11507 }, { "epoch": 0.8005843681519357, "grad_norm": 1.3046875, "learning_rate": 0.0002013840783110854, "loss": 0.8685, "step": 11508 }, { "epoch": 0.800653935789071, "grad_norm": 1.0078125, "learning_rate": 0.00020124848394661622, "loss": 0.8577, "step": 11509 }, { "epoch": 0.8007235034262061, "grad_norm": 1.15625, "learning_rate": 0.0002011129301387231, "loss": 0.6494, "step": 11510 }, { "epoch": 0.8007930710633413, "grad_norm": 1.0703125, "learning_rate": 0.00020097741689428884, "loss": 0.7739, "step": 11511 }, { "epoch": 0.8008626387004766, "grad_norm": 1.296875, "learning_rate": 0.00020084194422019365, "loss": 0.7364, "step": 11512 }, { "epoch": 0.8009322063376118, "grad_norm": 1.171875, "learning_rate": 0.00020070651212331648, "loss": 0.7561, "step": 11513 }, { "epoch": 0.8010017739747469, "grad_norm": 1.25, "learning_rate": 0.00020057112061053407, "loss": 0.8435, "step": 11514 }, { "epoch": 0.8010713416118822, "grad_norm": 1.1796875, "learning_rate": 0.0002004357696887208, "loss": 0.7197, "step": 11515 }, { "epoch": 0.8011409092490174, "grad_norm": 1.296875, "learning_rate": 0.00020030045936474884, "loss": 0.9916, "step": 11516 }, { "epoch": 0.8012104768861525, "grad_norm": 1.0703125, "learning_rate": 0.0002001651896454889, "loss": 0.8623, "step": 11517 }, { "epoch": 0.8012800445232877, "grad_norm": 1.1015625, "learning_rate": 0.00020002996053780907, "loss": 0.875, "step": 11518 }, { "epoch": 0.801349612160423, "grad_norm": 1.2421875, "learning_rate": 0.00019989477204857586, "loss": 0.8547, "step": 11519 }, { "epoch": 0.8014191797975582, "grad_norm": 1.21875, "learning_rate": 0.00019975962418465298, "loss": 0.6164, "step": 11520 }, { "epoch": 0.8014887474346933, "grad_norm": 1.1640625, "learning_rate": 0.00019962451695290328, "loss": 0.8538, "step": 11521 }, { "epoch": 0.8015583150718286, "grad_norm": 1.46875, "learning_rate": 0.00019948945036018606, "loss": 0.6978, "step": 11522 }, { "epoch": 0.8016278827089638, "grad_norm": 1.0546875, "learning_rate": 0.0001993544244133597, "loss": 0.5762, "step": 11523 }, { "epoch": 0.801697450346099, "grad_norm": 1.046875, "learning_rate": 0.00019921943911928032, "loss": 0.742, "step": 11524 }, { "epoch": 0.8017670179832342, "grad_norm": 1.125, "learning_rate": 0.0001990844944848017, "loss": 0.6807, "step": 11525 }, { "epoch": 0.8018365856203694, "grad_norm": 1.296875, "learning_rate": 0.0001989495905167752, "loss": 0.8195, "step": 11526 }, { "epoch": 0.8019061532575046, "grad_norm": 1.2890625, "learning_rate": 0.00019881472722205085, "loss": 0.9336, "step": 11527 }, { "epoch": 0.8019757208946399, "grad_norm": 0.87109375, "learning_rate": 0.00019867990460747676, "loss": 0.6865, "step": 11528 }, { "epoch": 0.802045288531775, "grad_norm": 1.421875, "learning_rate": 0.00019854512267989812, "loss": 0.8677, "step": 11529 }, { "epoch": 0.8021148561689102, "grad_norm": 0.91796875, "learning_rate": 0.0001984103814461582, "loss": 0.721, "step": 11530 }, { "epoch": 0.8021844238060454, "grad_norm": 0.8515625, "learning_rate": 0.0001982756809130991, "loss": 0.7316, "step": 11531 }, { "epoch": 0.8022539914431807, "grad_norm": 1.0859375, "learning_rate": 0.00019814102108755972, "loss": 0.7935, "step": 11532 }, { "epoch": 0.8023235590803158, "grad_norm": 1.234375, "learning_rate": 0.00019800640197637786, "loss": 1.0023, "step": 11533 }, { "epoch": 0.802393126717451, "grad_norm": 0.8359375, "learning_rate": 0.00019787182358638823, "loss": 0.5747, "step": 11534 }, { "epoch": 0.8024626943545863, "grad_norm": 1.5703125, "learning_rate": 0.00019773728592442465, "loss": 0.7026, "step": 11535 }, { "epoch": 0.8025322619917215, "grad_norm": 1.234375, "learning_rate": 0.00019760278899731777, "loss": 0.8311, "step": 11536 }, { "epoch": 0.8026018296288566, "grad_norm": 1.09375, "learning_rate": 0.0001974683328118969, "loss": 0.8812, "step": 11537 }, { "epoch": 0.8026713972659919, "grad_norm": 0.90234375, "learning_rate": 0.0001973339173749893, "loss": 0.5614, "step": 11538 }, { "epoch": 0.8027409649031271, "grad_norm": 1.1171875, "learning_rate": 0.00019719954269341956, "loss": 0.8661, "step": 11539 }, { "epoch": 0.8028105325402622, "grad_norm": 1.2109375, "learning_rate": 0.00019706520877401035, "loss": 0.9264, "step": 11540 }, { "epoch": 0.8028801001773975, "grad_norm": 0.86328125, "learning_rate": 0.0001969309156235829, "loss": 0.7194, "step": 11541 }, { "epoch": 0.8029496678145327, "grad_norm": 0.89453125, "learning_rate": 0.00019679666324895595, "loss": 0.5175, "step": 11542 }, { "epoch": 0.8030192354516679, "grad_norm": 1.1484375, "learning_rate": 0.00019666245165694596, "loss": 0.6996, "step": 11543 }, { "epoch": 0.803088803088803, "grad_norm": 0.9375, "learning_rate": 0.00019652828085436736, "loss": 0.6839, "step": 11544 }, { "epoch": 0.8031583707259383, "grad_norm": 1.2890625, "learning_rate": 0.0001963941508480328, "loss": 0.914, "step": 11545 }, { "epoch": 0.8032279383630735, "grad_norm": 1.1875, "learning_rate": 0.00019626006164475307, "loss": 0.8159, "step": 11546 }, { "epoch": 0.8032975060002087, "grad_norm": 1.0234375, "learning_rate": 0.00019612601325133628, "loss": 0.6157, "step": 11547 }, { "epoch": 0.8033670736373439, "grad_norm": 1.0078125, "learning_rate": 0.0001959920056745884, "loss": 0.7854, "step": 11548 }, { "epoch": 0.8034366412744791, "grad_norm": 1.203125, "learning_rate": 0.00019585803892131426, "loss": 0.8643, "step": 11549 }, { "epoch": 0.8035062089116143, "grad_norm": 1.0859375, "learning_rate": 0.0001957241129983155, "loss": 1.024, "step": 11550 }, { "epoch": 0.8035757765487496, "grad_norm": 0.90625, "learning_rate": 0.00019559022791239245, "loss": 0.6395, "step": 11551 }, { "epoch": 0.8036453441858847, "grad_norm": 1.0703125, "learning_rate": 0.00019545638367034335, "loss": 0.6166, "step": 11552 }, { "epoch": 0.8037149118230199, "grad_norm": 1.0390625, "learning_rate": 0.00019532258027896377, "loss": 0.5566, "step": 11553 }, { "epoch": 0.8037844794601552, "grad_norm": 0.9609375, "learning_rate": 0.0001951888177450476, "loss": 0.947, "step": 11554 }, { "epoch": 0.8038540470972904, "grad_norm": 1.0390625, "learning_rate": 0.00019505509607538663, "loss": 0.8042, "step": 11555 }, { "epoch": 0.8039236147344255, "grad_norm": 1.1015625, "learning_rate": 0.00019492141527677087, "loss": 0.8172, "step": 11556 }, { "epoch": 0.8039931823715607, "grad_norm": 1.375, "learning_rate": 0.0001947877753559878, "loss": 0.7079, "step": 11557 }, { "epoch": 0.804062750008696, "grad_norm": 1.1796875, "learning_rate": 0.00019465417631982262, "loss": 0.8163, "step": 11558 }, { "epoch": 0.8041323176458312, "grad_norm": 1.109375, "learning_rate": 0.00019452061817505918, "loss": 0.9206, "step": 11559 }, { "epoch": 0.8042018852829663, "grad_norm": 1.0, "learning_rate": 0.0001943871009284791, "loss": 0.7406, "step": 11560 }, { "epoch": 0.8042714529201016, "grad_norm": 1.296875, "learning_rate": 0.00019425362458686148, "loss": 0.8649, "step": 11561 }, { "epoch": 0.8043410205572368, "grad_norm": 1.0546875, "learning_rate": 0.00019412018915698315, "loss": 0.8075, "step": 11562 }, { "epoch": 0.804410588194372, "grad_norm": 2.25, "learning_rate": 0.00019398679464562008, "loss": 1.022, "step": 11563 }, { "epoch": 0.8044801558315072, "grad_norm": 1.5703125, "learning_rate": 0.00019385344105954462, "loss": 1.0943, "step": 11564 }, { "epoch": 0.8045497234686424, "grad_norm": 1.2421875, "learning_rate": 0.00019372012840552822, "loss": 0.769, "step": 11565 }, { "epoch": 0.8046192911057776, "grad_norm": 1.53125, "learning_rate": 0.00019358685669033994, "loss": 1.0729, "step": 11566 }, { "epoch": 0.8046888587429128, "grad_norm": 1.296875, "learning_rate": 0.00019345362592074645, "loss": 0.7369, "step": 11567 }, { "epoch": 0.804758426380048, "grad_norm": 1.4453125, "learning_rate": 0.00019332043610351224, "loss": 0.7023, "step": 11568 }, { "epoch": 0.8048279940171832, "grad_norm": 1.6015625, "learning_rate": 0.00019318728724540047, "loss": 0.8439, "step": 11569 }, { "epoch": 0.8048975616543184, "grad_norm": 1.1640625, "learning_rate": 0.0001930541793531717, "loss": 0.9113, "step": 11570 }, { "epoch": 0.8049671292914536, "grad_norm": 1.1640625, "learning_rate": 0.00019292111243358445, "loss": 0.9856, "step": 11571 }, { "epoch": 0.8050366969285888, "grad_norm": 0.9140625, "learning_rate": 0.00019278808649339496, "loss": 0.688, "step": 11572 }, { "epoch": 0.805106264565724, "grad_norm": 1.078125, "learning_rate": 0.00019265510153935772, "loss": 0.7357, "step": 11573 }, { "epoch": 0.8051758322028593, "grad_norm": 0.9921875, "learning_rate": 0.00019252215757822533, "loss": 0.6238, "step": 11574 }, { "epoch": 0.8052453998399944, "grad_norm": 1.140625, "learning_rate": 0.00019238925461674783, "loss": 0.6776, "step": 11575 }, { "epoch": 0.8053149674771296, "grad_norm": 1.140625, "learning_rate": 0.00019225639266167317, "loss": 0.8295, "step": 11576 }, { "epoch": 0.8053845351142649, "grad_norm": 1.2890625, "learning_rate": 0.00019212357171974738, "loss": 0.8657, "step": 11577 }, { "epoch": 0.8054541027514001, "grad_norm": 0.96875, "learning_rate": 0.00019199079179771494, "loss": 0.6886, "step": 11578 }, { "epoch": 0.8055236703885352, "grad_norm": 0.96875, "learning_rate": 0.00019185805290231718, "loss": 0.8363, "step": 11579 }, { "epoch": 0.8055932380256705, "grad_norm": 1.3515625, "learning_rate": 0.00019172535504029443, "loss": 0.8789, "step": 11580 }, { "epoch": 0.8056628056628057, "grad_norm": 1.6015625, "learning_rate": 0.0001915926982183841, "loss": 1.0713, "step": 11581 }, { "epoch": 0.8057323732999409, "grad_norm": 1.1015625, "learning_rate": 0.0001914600824433217, "loss": 0.7367, "step": 11582 }, { "epoch": 0.805801940937076, "grad_norm": 1.2734375, "learning_rate": 0.00019132750772184092, "loss": 0.7935, "step": 11583 }, { "epoch": 0.8058715085742113, "grad_norm": 1.125, "learning_rate": 0.00019119497406067354, "loss": 0.7253, "step": 11584 }, { "epoch": 0.8059410762113465, "grad_norm": 1.0390625, "learning_rate": 0.00019106248146654869, "loss": 0.6743, "step": 11585 }, { "epoch": 0.8060106438484816, "grad_norm": 1.21875, "learning_rate": 0.00019093002994619346, "loss": 0.9133, "step": 11586 }, { "epoch": 0.8060802114856169, "grad_norm": 1.0546875, "learning_rate": 0.00019079761950633323, "loss": 0.763, "step": 11587 }, { "epoch": 0.8061497791227521, "grad_norm": 0.8125, "learning_rate": 0.0001906652501536915, "loss": 0.5673, "step": 11588 }, { "epoch": 0.8062193467598873, "grad_norm": 1.015625, "learning_rate": 0.00019053292189498904, "loss": 0.6943, "step": 11589 }, { "epoch": 0.8062889143970225, "grad_norm": 1.28125, "learning_rate": 0.00019040063473694448, "loss": 0.8947, "step": 11590 }, { "epoch": 0.8063584820341577, "grad_norm": 1.0703125, "learning_rate": 0.00019026838868627506, "loss": 0.7626, "step": 11591 }, { "epoch": 0.8064280496712929, "grad_norm": 0.8671875, "learning_rate": 0.00019013618374969578, "loss": 0.8147, "step": 11592 }, { "epoch": 0.8064976173084282, "grad_norm": 0.890625, "learning_rate": 0.00019000401993391868, "loss": 0.4795, "step": 11593 }, { "epoch": 0.8065671849455633, "grad_norm": 1.359375, "learning_rate": 0.00018987189724565512, "loss": 0.7489, "step": 11594 }, { "epoch": 0.8066367525826985, "grad_norm": 0.83203125, "learning_rate": 0.00018973981569161337, "loss": 0.6915, "step": 11595 }, { "epoch": 0.8067063202198337, "grad_norm": 1.3125, "learning_rate": 0.00018960777527849936, "loss": 0.8171, "step": 11596 }, { "epoch": 0.806775887856969, "grad_norm": 1.15625, "learning_rate": 0.0001894757760130179, "loss": 1.0254, "step": 11597 }, { "epoch": 0.8068454554941041, "grad_norm": 1.0234375, "learning_rate": 0.00018934381790187139, "loss": 0.8368, "step": 11598 }, { "epoch": 0.8069150231312393, "grad_norm": 1.34375, "learning_rate": 0.0001892119009517599, "loss": 0.91, "step": 11599 }, { "epoch": 0.8069845907683746, "grad_norm": 1.15625, "learning_rate": 0.00018908002516938106, "loss": 0.8141, "step": 11600 }, { "epoch": 0.8070541584055098, "grad_norm": 1.015625, "learning_rate": 0.0001889481905614313, "loss": 0.947, "step": 11601 }, { "epoch": 0.8071237260426449, "grad_norm": 1.5625, "learning_rate": 0.00018881639713460452, "loss": 0.9287, "step": 11602 }, { "epoch": 0.8071932936797802, "grad_norm": 0.8359375, "learning_rate": 0.00018868464489559257, "loss": 0.5424, "step": 11603 }, { "epoch": 0.8072628613169154, "grad_norm": 1.1640625, "learning_rate": 0.00018855293385108474, "loss": 0.6286, "step": 11604 }, { "epoch": 0.8073324289540506, "grad_norm": 1.1328125, "learning_rate": 0.00018842126400776883, "loss": 0.6904, "step": 11605 }, { "epoch": 0.8074019965911858, "grad_norm": 1.2421875, "learning_rate": 0.0001882896353723308, "loss": 0.667, "step": 11606 }, { "epoch": 0.807471564228321, "grad_norm": 1.0625, "learning_rate": 0.00018815804795145385, "loss": 0.9498, "step": 11607 }, { "epoch": 0.8075411318654562, "grad_norm": 1.15625, "learning_rate": 0.0001880265017518189, "loss": 0.6469, "step": 11608 }, { "epoch": 0.8076106995025913, "grad_norm": 1.234375, "learning_rate": 0.00018789499678010548, "loss": 1.0743, "step": 11609 }, { "epoch": 0.8076802671397266, "grad_norm": 0.95703125, "learning_rate": 0.0001877635330429911, "loss": 0.6298, "step": 11610 }, { "epoch": 0.8077498347768618, "grad_norm": 1.1953125, "learning_rate": 0.00018763211054715034, "loss": 0.7808, "step": 11611 }, { "epoch": 0.807819402413997, "grad_norm": 1.0234375, "learning_rate": 0.00018750072929925654, "loss": 0.7542, "step": 11612 }, { "epoch": 0.8078889700511322, "grad_norm": 1.3671875, "learning_rate": 0.00018736938930598047, "loss": 0.8713, "step": 11613 }, { "epoch": 0.8079585376882674, "grad_norm": 1.0390625, "learning_rate": 0.00018723809057399066, "loss": 0.8394, "step": 11614 }, { "epoch": 0.8080281053254026, "grad_norm": 0.8046875, "learning_rate": 0.00018710683310995392, "loss": 0.5079, "step": 11615 }, { "epoch": 0.8080976729625379, "grad_norm": 1.125, "learning_rate": 0.00018697561692053512, "loss": 0.762, "step": 11616 }, { "epoch": 0.808167240599673, "grad_norm": 1.109375, "learning_rate": 0.00018684444201239658, "loss": 0.9277, "step": 11617 }, { "epoch": 0.8082368082368082, "grad_norm": 1.0, "learning_rate": 0.00018671330839219836, "loss": 0.8453, "step": 11618 }, { "epoch": 0.8083063758739435, "grad_norm": 0.8984375, "learning_rate": 0.0001865822160665992, "loss": 0.8398, "step": 11619 }, { "epoch": 0.8083759435110787, "grad_norm": 1.1953125, "learning_rate": 0.00018645116504225536, "loss": 0.8521, "step": 11620 }, { "epoch": 0.8084455111482138, "grad_norm": 1.171875, "learning_rate": 0.0001863201553258207, "loss": 0.8618, "step": 11621 }, { "epoch": 0.808515078785349, "grad_norm": 1.03125, "learning_rate": 0.00018618918692394715, "loss": 0.7516, "step": 11622 }, { "epoch": 0.8085846464224843, "grad_norm": 1.2109375, "learning_rate": 0.00018605825984328473, "loss": 0.8396, "step": 11623 }, { "epoch": 0.8086542140596195, "grad_norm": 1.0703125, "learning_rate": 0.00018592737409048156, "loss": 0.7484, "step": 11624 }, { "epoch": 0.8087237816967546, "grad_norm": 1.1953125, "learning_rate": 0.00018579652967218286, "loss": 0.7865, "step": 11625 }, { "epoch": 0.8087933493338899, "grad_norm": 0.9375, "learning_rate": 0.0001856657265950328, "loss": 0.7438, "step": 11626 }, { "epoch": 0.8088629169710251, "grad_norm": 1.109375, "learning_rate": 0.00018553496486567244, "loss": 0.7413, "step": 11627 }, { "epoch": 0.8089324846081603, "grad_norm": 1.28125, "learning_rate": 0.00018540424449074123, "loss": 0.7804, "step": 11628 }, { "epoch": 0.8090020522452955, "grad_norm": 1.25, "learning_rate": 0.00018527356547687657, "loss": 0.8049, "step": 11629 }, { "epoch": 0.8090716198824307, "grad_norm": 1.109375, "learning_rate": 0.00018514292783071407, "loss": 0.5443, "step": 11630 }, { "epoch": 0.8091411875195659, "grad_norm": 1.296875, "learning_rate": 0.0001850123315588864, "loss": 1.0762, "step": 11631 }, { "epoch": 0.8092107551567012, "grad_norm": 1.125, "learning_rate": 0.00018488177666802454, "loss": 1.1306, "step": 11632 }, { "epoch": 0.8092803227938363, "grad_norm": 1.421875, "learning_rate": 0.00018475126316475744, "loss": 0.8331, "step": 11633 }, { "epoch": 0.8093498904309715, "grad_norm": 1.109375, "learning_rate": 0.0001846207910557124, "loss": 0.815, "step": 11634 }, { "epoch": 0.8094194580681067, "grad_norm": 1.09375, "learning_rate": 0.00018449036034751375, "loss": 0.7388, "step": 11635 }, { "epoch": 0.809489025705242, "grad_norm": 0.984375, "learning_rate": 0.00018435997104678382, "loss": 0.784, "step": 11636 }, { "epoch": 0.8095585933423771, "grad_norm": 1.390625, "learning_rate": 0.00018422962316014347, "loss": 0.7113, "step": 11637 }, { "epoch": 0.8096281609795123, "grad_norm": 1.0546875, "learning_rate": 0.00018409931669421132, "loss": 0.7346, "step": 11638 }, { "epoch": 0.8096977286166476, "grad_norm": 0.91015625, "learning_rate": 0.0001839690516556032, "loss": 0.6103, "step": 11639 }, { "epoch": 0.8097672962537827, "grad_norm": 1.2421875, "learning_rate": 0.00018383882805093367, "loss": 0.841, "step": 11640 }, { "epoch": 0.8098368638909179, "grad_norm": 1.09375, "learning_rate": 0.0001837086458868148, "loss": 0.8778, "step": 11641 }, { "epoch": 0.8099064315280532, "grad_norm": 0.80859375, "learning_rate": 0.0001835785051698562, "loss": 0.6279, "step": 11642 }, { "epoch": 0.8099759991651884, "grad_norm": 1.3671875, "learning_rate": 0.00018344840590666612, "loss": 1.0347, "step": 11643 }, { "epoch": 0.8100455668023235, "grad_norm": 1.375, "learning_rate": 0.0001833183481038504, "loss": 0.8542, "step": 11644 }, { "epoch": 0.8101151344394588, "grad_norm": 1.1328125, "learning_rate": 0.00018318833176801265, "loss": 0.6418, "step": 11645 }, { "epoch": 0.810184702076594, "grad_norm": 0.984375, "learning_rate": 0.00018305835690575413, "loss": 0.7551, "step": 11646 }, { "epoch": 0.8102542697137292, "grad_norm": 1.1953125, "learning_rate": 0.00018292842352367444, "loss": 0.5889, "step": 11647 }, { "epoch": 0.8103238373508643, "grad_norm": 1.3203125, "learning_rate": 0.00018279853162837145, "loss": 1.0018, "step": 11648 }, { "epoch": 0.8103934049879996, "grad_norm": 1.5234375, "learning_rate": 0.00018266868122643998, "loss": 0.9089, "step": 11649 }, { "epoch": 0.8104629726251348, "grad_norm": 1.046875, "learning_rate": 0.00018253887232447285, "loss": 0.8482, "step": 11650 }, { "epoch": 0.81053254026227, "grad_norm": 1.109375, "learning_rate": 0.0001824091049290616, "loss": 0.8073, "step": 11651 }, { "epoch": 0.8106021078994052, "grad_norm": 1.21875, "learning_rate": 0.00018227937904679526, "loss": 0.7902, "step": 11652 }, { "epoch": 0.8106716755365404, "grad_norm": 1.125, "learning_rate": 0.00018214969468426022, "loss": 0.8469, "step": 11653 }, { "epoch": 0.8107412431736756, "grad_norm": 1.2109375, "learning_rate": 0.00018202005184804172, "loss": 0.9903, "step": 11654 }, { "epoch": 0.8108108108108109, "grad_norm": 1.1640625, "learning_rate": 0.00018189045054472163, "loss": 0.8441, "step": 11655 }, { "epoch": 0.810880378447946, "grad_norm": 1.1796875, "learning_rate": 0.00018176089078088132, "loss": 0.7733, "step": 11656 }, { "epoch": 0.8109499460850812, "grad_norm": 0.9765625, "learning_rate": 0.00018163137256309837, "loss": 0.5121, "step": 11657 }, { "epoch": 0.8110195137222165, "grad_norm": 1.3515625, "learning_rate": 0.00018150189589794975, "loss": 0.7866, "step": 11658 }, { "epoch": 0.8110890813593516, "grad_norm": 0.9765625, "learning_rate": 0.0001813724607920093, "loss": 0.7702, "step": 11659 }, { "epoch": 0.8111586489964868, "grad_norm": 1.0625, "learning_rate": 0.00018124306725184858, "loss": 0.703, "step": 11660 }, { "epoch": 0.811228216633622, "grad_norm": 1.1640625, "learning_rate": 0.00018111371528403851, "loss": 0.7809, "step": 11661 }, { "epoch": 0.8112977842707573, "grad_norm": 0.91015625, "learning_rate": 0.00018098440489514668, "loss": 0.6439, "step": 11662 }, { "epoch": 0.8113673519078924, "grad_norm": 1.046875, "learning_rate": 0.0001808551360917384, "loss": 0.7882, "step": 11663 }, { "epoch": 0.8114369195450276, "grad_norm": 1.1875, "learning_rate": 0.00018072590888037744, "loss": 0.9247, "step": 11664 }, { "epoch": 0.8115064871821629, "grad_norm": 1.1796875, "learning_rate": 0.00018059672326762533, "loss": 0.7383, "step": 11665 }, { "epoch": 0.8115760548192981, "grad_norm": 1.4140625, "learning_rate": 0.00018046757926004164, "loss": 0.8242, "step": 11666 }, { "epoch": 0.8116456224564332, "grad_norm": 0.9609375, "learning_rate": 0.00018033847686418347, "loss": 0.91, "step": 11667 }, { "epoch": 0.8117151900935685, "grad_norm": 1.046875, "learning_rate": 0.00018020941608660614, "loss": 0.7519, "step": 11668 }, { "epoch": 0.8117847577307037, "grad_norm": 1.28125, "learning_rate": 0.00018008039693386246, "loss": 0.7026, "step": 11669 }, { "epoch": 0.8118543253678389, "grad_norm": 1.3828125, "learning_rate": 0.0001799514194125037, "loss": 0.749, "step": 11670 }, { "epoch": 0.8119238930049741, "grad_norm": 1.0390625, "learning_rate": 0.00017982248352907827, "loss": 0.8047, "step": 11671 }, { "epoch": 0.8119934606421093, "grad_norm": 1.2109375, "learning_rate": 0.00017969358929013346, "loss": 0.8779, "step": 11672 }, { "epoch": 0.8120630282792445, "grad_norm": 1.046875, "learning_rate": 0.0001795647367022135, "loss": 0.8413, "step": 11673 }, { "epoch": 0.8121325959163797, "grad_norm": 1.125, "learning_rate": 0.00017943592577186063, "loss": 0.8145, "step": 11674 }, { "epoch": 0.8122021635535149, "grad_norm": 1.0859375, "learning_rate": 0.00017930715650561546, "loss": 0.9092, "step": 11675 }, { "epoch": 0.8122717311906501, "grad_norm": 0.94140625, "learning_rate": 0.00017917842891001658, "loss": 0.871, "step": 11676 }, { "epoch": 0.8123412988277853, "grad_norm": 1.046875, "learning_rate": 0.00017904974299159983, "loss": 0.7144, "step": 11677 }, { "epoch": 0.8124108664649206, "grad_norm": 1.2109375, "learning_rate": 0.000178921098756899, "loss": 0.7592, "step": 11678 }, { "epoch": 0.8124804341020557, "grad_norm": 0.9296875, "learning_rate": 0.0001787924962124462, "loss": 0.7098, "step": 11679 }, { "epoch": 0.8125500017391909, "grad_norm": 1.0703125, "learning_rate": 0.00017866393536477155, "loss": 0.8082, "step": 11680 }, { "epoch": 0.8126195693763262, "grad_norm": 1.1796875, "learning_rate": 0.00017853541622040237, "loss": 0.5663, "step": 11681 }, { "epoch": 0.8126891370134613, "grad_norm": 1.0078125, "learning_rate": 0.000178406938785864, "loss": 0.9504, "step": 11682 }, { "epoch": 0.8127587046505965, "grad_norm": 1.34375, "learning_rate": 0.00017827850306768024, "loss": 0.9961, "step": 11683 }, { "epoch": 0.8128282722877318, "grad_norm": 1.0625, "learning_rate": 0.0001781501090723725, "loss": 0.8448, "step": 11684 }, { "epoch": 0.812897839924867, "grad_norm": 1.125, "learning_rate": 0.00017802175680645948, "loss": 0.8075, "step": 11685 }, { "epoch": 0.8129674075620021, "grad_norm": 1.1640625, "learning_rate": 0.00017789344627645897, "loss": 0.9241, "step": 11686 }, { "epoch": 0.8130369751991373, "grad_norm": 0.8984375, "learning_rate": 0.0001777651774888851, "loss": 0.7554, "step": 11687 }, { "epoch": 0.8131065428362726, "grad_norm": 1.1875, "learning_rate": 0.00017763695045025152, "loss": 0.8555, "step": 11688 }, { "epoch": 0.8131761104734078, "grad_norm": 1.0234375, "learning_rate": 0.00017750876516706837, "loss": 0.7459, "step": 11689 }, { "epoch": 0.8132456781105429, "grad_norm": 1.375, "learning_rate": 0.00017738062164584457, "loss": 0.5829, "step": 11690 }, { "epoch": 0.8133152457476782, "grad_norm": 1.3125, "learning_rate": 0.00017725251989308654, "loss": 0.7101, "step": 11691 }, { "epoch": 0.8133848133848134, "grad_norm": 1.1640625, "learning_rate": 0.00017712445991529814, "loss": 0.7135, "step": 11692 }, { "epoch": 0.8134543810219486, "grad_norm": 1.0390625, "learning_rate": 0.00017699644171898256, "loss": 0.6507, "step": 11693 }, { "epoch": 0.8135239486590838, "grad_norm": 0.98828125, "learning_rate": 0.0001768684653106395, "loss": 0.8359, "step": 11694 }, { "epoch": 0.813593516296219, "grad_norm": 1.3828125, "learning_rate": 0.00017674053069676677, "loss": 0.9942, "step": 11695 }, { "epoch": 0.8136630839333542, "grad_norm": 1.1328125, "learning_rate": 0.00017661263788386005, "loss": 0.7005, "step": 11696 }, { "epoch": 0.8137326515704895, "grad_norm": 1.015625, "learning_rate": 0.00017648478687841353, "loss": 0.706, "step": 11697 }, { "epoch": 0.8138022192076246, "grad_norm": 1.265625, "learning_rate": 0.00017635697768691894, "loss": 0.918, "step": 11698 }, { "epoch": 0.8138717868447598, "grad_norm": 1.0078125, "learning_rate": 0.00017622921031586525, "loss": 0.7621, "step": 11699 }, { "epoch": 0.813941354481895, "grad_norm": 1.0234375, "learning_rate": 0.00017610148477174037, "loss": 0.7783, "step": 11700 }, { "epoch": 0.8140109221190303, "grad_norm": 0.89453125, "learning_rate": 0.00017597380106102923, "loss": 0.7429, "step": 11701 }, { "epoch": 0.8140804897561654, "grad_norm": 1.0546875, "learning_rate": 0.0001758461591902152, "loss": 0.9648, "step": 11702 }, { "epoch": 0.8141500573933006, "grad_norm": 1.1484375, "learning_rate": 0.00017571855916577895, "loss": 0.9206, "step": 11703 }, { "epoch": 0.8142196250304359, "grad_norm": 1.0703125, "learning_rate": 0.0001755910009941998, "loss": 0.9077, "step": 11704 }, { "epoch": 0.814289192667571, "grad_norm": 1.2578125, "learning_rate": 0.0001754634846819543, "loss": 0.772, "step": 11705 }, { "epoch": 0.8143587603047062, "grad_norm": 1.15625, "learning_rate": 0.0001753360102355166, "loss": 1.0154, "step": 11706 }, { "epoch": 0.8144283279418415, "grad_norm": 1.015625, "learning_rate": 0.00017520857766136012, "loss": 0.7274, "step": 11707 }, { "epoch": 0.8144978955789767, "grad_norm": 1.296875, "learning_rate": 0.00017508118696595487, "loss": 0.9585, "step": 11708 }, { "epoch": 0.8145674632161118, "grad_norm": 1.1796875, "learning_rate": 0.00017495383815576904, "loss": 0.7493, "step": 11709 }, { "epoch": 0.8146370308532471, "grad_norm": 1.03125, "learning_rate": 0.00017482653123726855, "loss": 0.7809, "step": 11710 }, { "epoch": 0.8147065984903823, "grad_norm": 1.046875, "learning_rate": 0.00017469926621691757, "loss": 0.694, "step": 11711 }, { "epoch": 0.8147761661275175, "grad_norm": 0.984375, "learning_rate": 0.00017457204310117837, "loss": 0.7324, "step": 11712 }, { "epoch": 0.8148457337646526, "grad_norm": 1.0234375, "learning_rate": 0.00017444486189651, "loss": 0.9471, "step": 11713 }, { "epoch": 0.8149153014017879, "grad_norm": 1.2109375, "learning_rate": 0.00017431772260937073, "loss": 0.8573, "step": 11714 }, { "epoch": 0.8149848690389231, "grad_norm": 1.484375, "learning_rate": 0.00017419062524621544, "loss": 0.904, "step": 11715 }, { "epoch": 0.8150544366760583, "grad_norm": 1.4453125, "learning_rate": 0.00017406356981349813, "loss": 0.8472, "step": 11716 }, { "epoch": 0.8151240043131935, "grad_norm": 0.87109375, "learning_rate": 0.00017393655631766947, "loss": 0.5351, "step": 11717 }, { "epoch": 0.8151935719503287, "grad_norm": 1.1015625, "learning_rate": 0.00017380958476517904, "loss": 0.8369, "step": 11718 }, { "epoch": 0.8152631395874639, "grad_norm": 1.2265625, "learning_rate": 0.00017368265516247338, "loss": 0.7326, "step": 11719 }, { "epoch": 0.8153327072245992, "grad_norm": 1.3359375, "learning_rate": 0.00017355576751599744, "loss": 0.8288, "step": 11720 }, { "epoch": 0.8154022748617343, "grad_norm": 1.03125, "learning_rate": 0.0001734289218321944, "loss": 0.7971, "step": 11721 }, { "epoch": 0.8154718424988695, "grad_norm": 1.3984375, "learning_rate": 0.0001733021181175044, "loss": 1.0176, "step": 11722 }, { "epoch": 0.8155414101360048, "grad_norm": 1.2421875, "learning_rate": 0.00017317535637836602, "loss": 0.7279, "step": 11723 }, { "epoch": 0.81561097777314, "grad_norm": 1.3828125, "learning_rate": 0.00017304863662121527, "loss": 1.0087, "step": 11724 }, { "epoch": 0.8156805454102751, "grad_norm": 1.0234375, "learning_rate": 0.00017292195885248662, "loss": 0.5907, "step": 11725 }, { "epoch": 0.8157501130474103, "grad_norm": 1.3203125, "learning_rate": 0.00017279532307861245, "loss": 0.9269, "step": 11726 }, { "epoch": 0.8158196806845456, "grad_norm": 0.98046875, "learning_rate": 0.00017266872930602197, "loss": 0.7344, "step": 11727 }, { "epoch": 0.8158892483216807, "grad_norm": 1.0390625, "learning_rate": 0.00017254217754114365, "loss": 0.7148, "step": 11728 }, { "epoch": 0.8159588159588159, "grad_norm": 1.125, "learning_rate": 0.00017241566779040263, "loss": 0.7071, "step": 11729 }, { "epoch": 0.8160283835959512, "grad_norm": 1.1015625, "learning_rate": 0.00017228920006022287, "loss": 1.045, "step": 11730 }, { "epoch": 0.8160979512330864, "grad_norm": 1.234375, "learning_rate": 0.00017216277435702542, "loss": 0.9198, "step": 11731 }, { "epoch": 0.8161675188702215, "grad_norm": 1.0390625, "learning_rate": 0.00017203639068722975, "loss": 0.7394, "step": 11732 }, { "epoch": 0.8162370865073568, "grad_norm": 1.125, "learning_rate": 0.00017191004905725283, "loss": 0.8574, "step": 11733 }, { "epoch": 0.816306654144492, "grad_norm": 0.8203125, "learning_rate": 0.0001717837494735097, "loss": 0.6116, "step": 11734 }, { "epoch": 0.8163762217816272, "grad_norm": 1.0703125, "learning_rate": 0.00017165749194241343, "loss": 0.6934, "step": 11735 }, { "epoch": 0.8164457894187624, "grad_norm": 1.1171875, "learning_rate": 0.00017153127647037458, "loss": 0.7483, "step": 11736 }, { "epoch": 0.8165153570558976, "grad_norm": 1.125, "learning_rate": 0.00017140510306380176, "loss": 0.8049, "step": 11737 }, { "epoch": 0.8165849246930328, "grad_norm": 1.1328125, "learning_rate": 0.0001712789717291009, "loss": 0.9112, "step": 11738 }, { "epoch": 0.816654492330168, "grad_norm": 1.0234375, "learning_rate": 0.00017115288247267725, "loss": 0.868, "step": 11739 }, { "epoch": 0.8167240599673032, "grad_norm": 1.3984375, "learning_rate": 0.00017102683530093255, "loss": 0.7093, "step": 11740 }, { "epoch": 0.8167936276044384, "grad_norm": 0.96484375, "learning_rate": 0.0001709008302202666, "loss": 0.8052, "step": 11741 }, { "epoch": 0.8168631952415736, "grad_norm": 1.109375, "learning_rate": 0.0001707748672370777, "loss": 0.9135, "step": 11742 }, { "epoch": 0.8169327628787089, "grad_norm": 1.3671875, "learning_rate": 0.00017064894635776117, "loss": 0.7799, "step": 11743 }, { "epoch": 0.817002330515844, "grad_norm": 1.21875, "learning_rate": 0.00017052306758871127, "loss": 0.9484, "step": 11744 }, { "epoch": 0.8170718981529792, "grad_norm": 1.125, "learning_rate": 0.00017039723093631876, "loss": 0.8796, "step": 11745 }, { "epoch": 0.8171414657901145, "grad_norm": 1.0703125, "learning_rate": 0.00017027143640697362, "loss": 0.6473, "step": 11746 }, { "epoch": 0.8172110334272497, "grad_norm": 1.25, "learning_rate": 0.00017014568400706265, "loss": 0.7669, "step": 11747 }, { "epoch": 0.8172806010643848, "grad_norm": 1.4296875, "learning_rate": 0.00017001997374297095, "loss": 0.9723, "step": 11748 }, { "epoch": 0.8173501687015201, "grad_norm": 0.953125, "learning_rate": 0.00016989430562108188, "loss": 0.7161, "step": 11749 }, { "epoch": 0.8174197363386553, "grad_norm": 1.078125, "learning_rate": 0.00016976867964777598, "loss": 0.894, "step": 11750 }, { "epoch": 0.8174893039757904, "grad_norm": 1.2109375, "learning_rate": 0.0001696430958294315, "loss": 0.7189, "step": 11751 }, { "epoch": 0.8175588716129256, "grad_norm": 1.46875, "learning_rate": 0.0001695175541724253, "loss": 0.8627, "step": 11752 }, { "epoch": 0.8176284392500609, "grad_norm": 1.265625, "learning_rate": 0.00016939205468313213, "loss": 0.9201, "step": 11753 }, { "epoch": 0.8176980068871961, "grad_norm": 0.76953125, "learning_rate": 0.0001692665973679237, "loss": 0.4185, "step": 11754 }, { "epoch": 0.8177675745243312, "grad_norm": 1.0546875, "learning_rate": 0.00016914118223317033, "loss": 0.7814, "step": 11755 }, { "epoch": 0.8178371421614665, "grad_norm": 1.328125, "learning_rate": 0.00016901580928523963, "loss": 0.5873, "step": 11756 }, { "epoch": 0.8179067097986017, "grad_norm": 1.5546875, "learning_rate": 0.00016889047853049766, "loss": 0.9774, "step": 11757 }, { "epoch": 0.8179762774357369, "grad_norm": 0.81640625, "learning_rate": 0.00016876518997530843, "loss": 0.6748, "step": 11758 }, { "epoch": 0.8180458450728721, "grad_norm": 1.1328125, "learning_rate": 0.00016863994362603275, "loss": 0.7434, "step": 11759 }, { "epoch": 0.8181154127100073, "grad_norm": 0.984375, "learning_rate": 0.00016851473948903062, "loss": 0.9599, "step": 11760 }, { "epoch": 0.8181849803471425, "grad_norm": 0.94140625, "learning_rate": 0.00016838957757065877, "loss": 0.6192, "step": 11761 }, { "epoch": 0.8182545479842778, "grad_norm": 1.1796875, "learning_rate": 0.00016826445787727285, "loss": 0.7509, "step": 11762 }, { "epoch": 0.8183241156214129, "grad_norm": 1.125, "learning_rate": 0.00016813938041522526, "loss": 0.7313, "step": 11763 }, { "epoch": 0.8183936832585481, "grad_norm": 1.1171875, "learning_rate": 0.00016801434519086723, "loss": 0.8128, "step": 11764 }, { "epoch": 0.8184632508956833, "grad_norm": 1.203125, "learning_rate": 0.00016788935221054703, "loss": 0.9444, "step": 11765 }, { "epoch": 0.8185328185328186, "grad_norm": 1.140625, "learning_rate": 0.00016776440148061133, "loss": 0.9276, "step": 11766 }, { "epoch": 0.8186023861699537, "grad_norm": 1.4453125, "learning_rate": 0.0001676394930074049, "loss": 0.6666, "step": 11767 }, { "epoch": 0.8186719538070889, "grad_norm": 1.2265625, "learning_rate": 0.00016751462679726948, "loss": 0.7016, "step": 11768 }, { "epoch": 0.8187415214442242, "grad_norm": 1.328125, "learning_rate": 0.00016738980285654537, "loss": 0.8631, "step": 11769 }, { "epoch": 0.8188110890813594, "grad_norm": 1.078125, "learning_rate": 0.00016726502119156984, "loss": 0.7953, "step": 11770 }, { "epoch": 0.8188806567184945, "grad_norm": 1.046875, "learning_rate": 0.0001671402818086797, "loss": 0.8171, "step": 11771 }, { "epoch": 0.8189502243556298, "grad_norm": 1.0703125, "learning_rate": 0.0001670155847142082, "loss": 0.5757, "step": 11772 }, { "epoch": 0.819019791992765, "grad_norm": 1.1640625, "learning_rate": 0.0001668909299144865, "loss": 0.9231, "step": 11773 }, { "epoch": 0.8190893596299001, "grad_norm": 1.2734375, "learning_rate": 0.00016676631741584447, "loss": 0.7563, "step": 11774 }, { "epoch": 0.8191589272670354, "grad_norm": 0.94921875, "learning_rate": 0.00016664174722460866, "loss": 0.6916, "step": 11775 }, { "epoch": 0.8192284949041706, "grad_norm": 1.1796875, "learning_rate": 0.00016651721934710483, "loss": 0.7551, "step": 11776 }, { "epoch": 0.8192980625413058, "grad_norm": 1.21875, "learning_rate": 0.00016639273378965536, "loss": 0.697, "step": 11777 }, { "epoch": 0.8193676301784409, "grad_norm": 1.3203125, "learning_rate": 0.00016626829055858128, "loss": 0.9492, "step": 11778 }, { "epoch": 0.8194371978155762, "grad_norm": 1.046875, "learning_rate": 0.000166143889660201, "loss": 0.6445, "step": 11779 }, { "epoch": 0.8195067654527114, "grad_norm": 1.34375, "learning_rate": 0.0001660195311008309, "loss": 0.8859, "step": 11780 }, { "epoch": 0.8195763330898466, "grad_norm": 1.1953125, "learning_rate": 0.00016589521488678582, "loss": 0.8174, "step": 11781 }, { "epoch": 0.8196459007269818, "grad_norm": 0.8828125, "learning_rate": 0.0001657709410243774, "loss": 0.6393, "step": 11782 }, { "epoch": 0.819715468364117, "grad_norm": 1.28125, "learning_rate": 0.00016564670951991556, "loss": 0.9107, "step": 11783 }, { "epoch": 0.8197850360012522, "grad_norm": 1.2890625, "learning_rate": 0.00016552252037970838, "loss": 0.965, "step": 11784 }, { "epoch": 0.8198546036383875, "grad_norm": 1.2734375, "learning_rate": 0.00016539837361006184, "loss": 0.6285, "step": 11785 }, { "epoch": 0.8199241712755226, "grad_norm": 1.125, "learning_rate": 0.00016527426921727917, "loss": 0.5821, "step": 11786 }, { "epoch": 0.8199937389126578, "grad_norm": 1.3125, "learning_rate": 0.00016515020720766149, "loss": 0.7352, "step": 11787 }, { "epoch": 0.820063306549793, "grad_norm": 1.1796875, "learning_rate": 0.00016502618758750854, "loss": 0.9191, "step": 11788 }, { "epoch": 0.8201328741869283, "grad_norm": 1.0703125, "learning_rate": 0.00016490221036311704, "loss": 0.8733, "step": 11789 }, { "epoch": 0.8202024418240634, "grad_norm": 1.234375, "learning_rate": 0.00016477827554078228, "loss": 0.9191, "step": 11790 }, { "epoch": 0.8202720094611986, "grad_norm": 1.1875, "learning_rate": 0.0001646543831267966, "loss": 0.7753, "step": 11791 }, { "epoch": 0.8203415770983339, "grad_norm": 1.3046875, "learning_rate": 0.00016453053312745115, "loss": 0.689, "step": 11792 }, { "epoch": 0.8204111447354691, "grad_norm": 1.3046875, "learning_rate": 0.0001644067255490339, "loss": 0.8691, "step": 11793 }, { "epoch": 0.8204807123726042, "grad_norm": 1.234375, "learning_rate": 0.00016428296039783152, "loss": 0.6144, "step": 11794 }, { "epoch": 0.8205502800097395, "grad_norm": 1.1484375, "learning_rate": 0.0001641592376801282, "loss": 1.0364, "step": 11795 }, { "epoch": 0.8206198476468747, "grad_norm": 0.8671875, "learning_rate": 0.0001640355574022059, "loss": 0.694, "step": 11796 }, { "epoch": 0.8206894152840098, "grad_norm": 1.5703125, "learning_rate": 0.00016391191957034422, "loss": 0.8732, "step": 11797 }, { "epoch": 0.8207589829211451, "grad_norm": 1.515625, "learning_rate": 0.00016378832419082102, "loss": 0.9643, "step": 11798 }, { "epoch": 0.8208285505582803, "grad_norm": 1.234375, "learning_rate": 0.00016366477126991208, "loss": 0.8182, "step": 11799 }, { "epoch": 0.8208981181954155, "grad_norm": 0.98828125, "learning_rate": 0.00016354126081389076, "loss": 0.705, "step": 11800 }, { "epoch": 0.8209676858325506, "grad_norm": 1.1796875, "learning_rate": 0.0001634177928290278, "loss": 0.7874, "step": 11801 }, { "epoch": 0.8210372534696859, "grad_norm": 0.99609375, "learning_rate": 0.00016329436732159263, "loss": 0.5235, "step": 11802 }, { "epoch": 0.8211068211068211, "grad_norm": 0.87109375, "learning_rate": 0.00016317098429785248, "loss": 0.6098, "step": 11803 }, { "epoch": 0.8211763887439563, "grad_norm": 1.296875, "learning_rate": 0.00016304764376407177, "loss": 0.7253, "step": 11804 }, { "epoch": 0.8212459563810915, "grad_norm": 1.015625, "learning_rate": 0.00016292434572651293, "loss": 0.6622, "step": 11805 }, { "epoch": 0.8213155240182267, "grad_norm": 1.0546875, "learning_rate": 0.00016280109019143685, "loss": 0.5838, "step": 11806 }, { "epoch": 0.8213850916553619, "grad_norm": 1.0859375, "learning_rate": 0.00016267787716510142, "loss": 0.7913, "step": 11807 }, { "epoch": 0.8214546592924972, "grad_norm": 0.91796875, "learning_rate": 0.00016255470665376304, "loss": 0.6731, "step": 11808 }, { "epoch": 0.8215242269296323, "grad_norm": 1.546875, "learning_rate": 0.00016243157866367575, "loss": 1.1183, "step": 11809 }, { "epoch": 0.8215937945667675, "grad_norm": 1.0859375, "learning_rate": 0.0001623084932010912, "loss": 0.8298, "step": 11810 }, { "epoch": 0.8216633622039028, "grad_norm": 1.1875, "learning_rate": 0.00016218545027225895, "loss": 0.7191, "step": 11811 }, { "epoch": 0.821732929841038, "grad_norm": 1.1484375, "learning_rate": 0.00016206244988342666, "loss": 0.7012, "step": 11812 }, { "epoch": 0.8218024974781731, "grad_norm": 1.03125, "learning_rate": 0.0001619394920408398, "loss": 0.8181, "step": 11813 }, { "epoch": 0.8218720651153083, "grad_norm": 1.1875, "learning_rate": 0.00016181657675074147, "loss": 0.6448, "step": 11814 }, { "epoch": 0.8219416327524436, "grad_norm": 1.2734375, "learning_rate": 0.00016169370401937223, "loss": 0.7898, "step": 11815 }, { "epoch": 0.8220112003895788, "grad_norm": 1.1484375, "learning_rate": 0.00016157087385297142, "loss": 0.797, "step": 11816 }, { "epoch": 0.8220807680267139, "grad_norm": 0.9296875, "learning_rate": 0.00016144808625777595, "loss": 0.7012, "step": 11817 }, { "epoch": 0.8221503356638492, "grad_norm": 1.2578125, "learning_rate": 0.00016132534124001997, "loss": 0.7456, "step": 11818 }, { "epoch": 0.8222199033009844, "grad_norm": 1.2265625, "learning_rate": 0.00016120263880593566, "loss": 0.7268, "step": 11819 }, { "epoch": 0.8222894709381195, "grad_norm": 1.0703125, "learning_rate": 0.00016107997896175374, "loss": 0.7633, "step": 11820 }, { "epoch": 0.8223590385752548, "grad_norm": 1.21875, "learning_rate": 0.0001609573617137019, "loss": 0.8158, "step": 11821 }, { "epoch": 0.82242860621239, "grad_norm": 0.94140625, "learning_rate": 0.00016083478706800604, "loss": 0.6253, "step": 11822 }, { "epoch": 0.8224981738495252, "grad_norm": 1.1171875, "learning_rate": 0.00016071225503089026, "loss": 0.5911, "step": 11823 }, { "epoch": 0.8225677414866605, "grad_norm": 0.921875, "learning_rate": 0.00016058976560857574, "loss": 0.5473, "step": 11824 }, { "epoch": 0.8226373091237956, "grad_norm": 0.9375, "learning_rate": 0.00016046731880728184, "loss": 0.6328, "step": 11825 }, { "epoch": 0.8227068767609308, "grad_norm": 1.21875, "learning_rate": 0.000160344914633226, "loss": 0.8415, "step": 11826 }, { "epoch": 0.822776444398066, "grad_norm": 0.96484375, "learning_rate": 0.00016022255309262334, "loss": 0.8935, "step": 11827 }, { "epoch": 0.8228460120352012, "grad_norm": 1.0390625, "learning_rate": 0.00016010023419168673, "loss": 0.7528, "step": 11828 }, { "epoch": 0.8229155796723364, "grad_norm": 0.96875, "learning_rate": 0.0001599779579366265, "loss": 0.7167, "step": 11829 }, { "epoch": 0.8229851473094716, "grad_norm": 1.53125, "learning_rate": 0.00015985572433365158, "loss": 0.9432, "step": 11830 }, { "epoch": 0.8230547149466069, "grad_norm": 1.4765625, "learning_rate": 0.00015973353338896856, "loss": 0.9314, "step": 11831 }, { "epoch": 0.823124282583742, "grad_norm": 1.015625, "learning_rate": 0.0001596113851087815, "loss": 0.818, "step": 11832 }, { "epoch": 0.8231938502208772, "grad_norm": 1.28125, "learning_rate": 0.00015948927949929216, "loss": 0.7425, "step": 11833 }, { "epoch": 0.8232634178580125, "grad_norm": 1.0546875, "learning_rate": 0.0001593672165667007, "loss": 0.6268, "step": 11834 }, { "epoch": 0.8233329854951477, "grad_norm": 1.0234375, "learning_rate": 0.00015924519631720514, "loss": 0.8678, "step": 11835 }, { "epoch": 0.8234025531322828, "grad_norm": 1.5078125, "learning_rate": 0.00015912321875700074, "loss": 1.029, "step": 11836 }, { "epoch": 0.8234721207694181, "grad_norm": 0.953125, "learning_rate": 0.00015900128389228086, "loss": 0.8239, "step": 11837 }, { "epoch": 0.8235416884065533, "grad_norm": 1.28125, "learning_rate": 0.00015887939172923692, "loss": 0.6994, "step": 11838 }, { "epoch": 0.8236112560436885, "grad_norm": 0.98828125, "learning_rate": 0.0001587575422740578, "loss": 0.5914, "step": 11839 }, { "epoch": 0.8236808236808236, "grad_norm": 0.8671875, "learning_rate": 0.00015863573553293042, "loss": 0.7486, "step": 11840 }, { "epoch": 0.8237503913179589, "grad_norm": 0.99609375, "learning_rate": 0.00015851397151203983, "loss": 0.7262, "step": 11841 }, { "epoch": 0.8238199589550941, "grad_norm": 0.83984375, "learning_rate": 0.0001583922502175684, "loss": 0.7706, "step": 11842 }, { "epoch": 0.8238895265922292, "grad_norm": 1.0703125, "learning_rate": 0.00015827057165569624, "loss": 0.7025, "step": 11843 }, { "epoch": 0.8239590942293645, "grad_norm": 1.03125, "learning_rate": 0.0001581489358326018, "loss": 0.7226, "step": 11844 }, { "epoch": 0.8240286618664997, "grad_norm": 0.82421875, "learning_rate": 0.0001580273427544614, "loss": 0.5513, "step": 11845 }, { "epoch": 0.8240982295036349, "grad_norm": 1.03125, "learning_rate": 0.00015790579242744873, "loss": 0.8411, "step": 11846 }, { "epoch": 0.8241677971407702, "grad_norm": 1.046875, "learning_rate": 0.00015778428485773522, "loss": 0.8452, "step": 11847 }, { "epoch": 0.8242373647779053, "grad_norm": 1.09375, "learning_rate": 0.00015766282005149056, "loss": 0.6987, "step": 11848 }, { "epoch": 0.8243069324150405, "grad_norm": 1.3203125, "learning_rate": 0.00015754139801488256, "loss": 0.9424, "step": 11849 }, { "epoch": 0.8243765000521758, "grad_norm": 1.03125, "learning_rate": 0.00015742001875407598, "loss": 0.6831, "step": 11850 }, { "epoch": 0.824446067689311, "grad_norm": 0.7109375, "learning_rate": 0.0001572986822752336, "loss": 0.6033, "step": 11851 }, { "epoch": 0.8245156353264461, "grad_norm": 1.1953125, "learning_rate": 0.000157177388584517, "loss": 0.9047, "step": 11852 }, { "epoch": 0.8245852029635813, "grad_norm": 1.125, "learning_rate": 0.00015705613768808414, "loss": 0.8724, "step": 11853 }, { "epoch": 0.8246547706007166, "grad_norm": 0.99609375, "learning_rate": 0.00015693492959209187, "loss": 0.8429, "step": 11854 }, { "epoch": 0.8247243382378517, "grad_norm": 0.8359375, "learning_rate": 0.0001568137643026948, "loss": 0.6627, "step": 11855 }, { "epoch": 0.8247939058749869, "grad_norm": 1.1171875, "learning_rate": 0.0001566926418260447, "loss": 0.8518, "step": 11856 }, { "epoch": 0.8248634735121222, "grad_norm": 1.1015625, "learning_rate": 0.00015657156216829148, "loss": 0.938, "step": 11857 }, { "epoch": 0.8249330411492574, "grad_norm": 1.2578125, "learning_rate": 0.00015645052533558323, "loss": 0.7418, "step": 11858 }, { "epoch": 0.8250026087863925, "grad_norm": 1.015625, "learning_rate": 0.0001563295313340657, "loss": 0.7214, "step": 11859 }, { "epoch": 0.8250721764235278, "grad_norm": 1.0625, "learning_rate": 0.00015620858016988205, "loss": 0.857, "step": 11860 }, { "epoch": 0.825141744060663, "grad_norm": 0.89453125, "learning_rate": 0.0001560876718491735, "loss": 0.5527, "step": 11861 }, { "epoch": 0.8252113116977982, "grad_norm": 1.3125, "learning_rate": 0.00015596680637807936, "loss": 0.8925, "step": 11862 }, { "epoch": 0.8252808793349334, "grad_norm": 1.2421875, "learning_rate": 0.00015584598376273674, "loss": 0.6308, "step": 11863 }, { "epoch": 0.8253504469720686, "grad_norm": 0.98046875, "learning_rate": 0.00015572520400928026, "loss": 0.7314, "step": 11864 }, { "epoch": 0.8254200146092038, "grad_norm": 1.15625, "learning_rate": 0.00015560446712384223, "loss": 0.7527, "step": 11865 }, { "epoch": 0.825489582246339, "grad_norm": 1.03125, "learning_rate": 0.00015548377311255324, "loss": 0.6309, "step": 11866 }, { "epoch": 0.8255591498834742, "grad_norm": 0.80859375, "learning_rate": 0.0001553631219815419, "loss": 0.4498, "step": 11867 }, { "epoch": 0.8256287175206094, "grad_norm": 1.1328125, "learning_rate": 0.00015524251373693354, "loss": 0.7638, "step": 11868 }, { "epoch": 0.8256982851577446, "grad_norm": 1.296875, "learning_rate": 0.00015512194838485284, "loss": 0.846, "step": 11869 }, { "epoch": 0.8257678527948799, "grad_norm": 0.9765625, "learning_rate": 0.0001550014259314211, "loss": 0.983, "step": 11870 }, { "epoch": 0.825837420432015, "grad_norm": 1.2109375, "learning_rate": 0.00015488094638275751, "loss": 0.7911, "step": 11871 }, { "epoch": 0.8259069880691502, "grad_norm": 1.3671875, "learning_rate": 0.0001547605097449798, "loss": 0.9909, "step": 11872 }, { "epoch": 0.8259765557062855, "grad_norm": 1.0703125, "learning_rate": 0.00015464011602420324, "loss": 0.8125, "step": 11873 }, { "epoch": 0.8260461233434206, "grad_norm": 1.078125, "learning_rate": 0.00015451976522654076, "loss": 0.9632, "step": 11874 }, { "epoch": 0.8261156909805558, "grad_norm": 0.98046875, "learning_rate": 0.0001543994573581028, "loss": 0.7242, "step": 11875 }, { "epoch": 0.8261852586176911, "grad_norm": 1.140625, "learning_rate": 0.00015427919242499822, "loss": 0.5407, "step": 11876 }, { "epoch": 0.8262548262548263, "grad_norm": 1.2421875, "learning_rate": 0.0001541589704333337, "loss": 0.798, "step": 11877 }, { "epoch": 0.8263243938919614, "grad_norm": 1.203125, "learning_rate": 0.0001540387913892134, "loss": 0.8822, "step": 11878 }, { "epoch": 0.8263939615290966, "grad_norm": 1.15625, "learning_rate": 0.00015391865529873906, "loss": 0.7547, "step": 11879 }, { "epoch": 0.8264635291662319, "grad_norm": 1.21875, "learning_rate": 0.0001537985621680108, "loss": 0.7576, "step": 11880 }, { "epoch": 0.8265330968033671, "grad_norm": 1.1484375, "learning_rate": 0.00015367851200312666, "loss": 0.6437, "step": 11881 }, { "epoch": 0.8266026644405022, "grad_norm": 1.484375, "learning_rate": 0.00015355850481018162, "loss": 0.859, "step": 11882 }, { "epoch": 0.8266722320776375, "grad_norm": 1.0625, "learning_rate": 0.00015343854059526952, "loss": 0.703, "step": 11883 }, { "epoch": 0.8267417997147727, "grad_norm": 1.09375, "learning_rate": 0.00015331861936448144, "loss": 0.7281, "step": 11884 }, { "epoch": 0.8268113673519079, "grad_norm": 0.94921875, "learning_rate": 0.00015319874112390598, "loss": 0.8862, "step": 11885 }, { "epoch": 0.8268809349890431, "grad_norm": 1.2890625, "learning_rate": 0.00015307890587963036, "loss": 0.8087, "step": 11886 }, { "epoch": 0.8269505026261783, "grad_norm": 1.1796875, "learning_rate": 0.00015295911363773918, "loss": 0.7998, "step": 11887 }, { "epoch": 0.8270200702633135, "grad_norm": 1.21875, "learning_rate": 0.0001528393644043149, "loss": 0.9382, "step": 11888 }, { "epoch": 0.8270896379004488, "grad_norm": 1.203125, "learning_rate": 0.00015271965818543744, "loss": 0.8733, "step": 11889 }, { "epoch": 0.8271592055375839, "grad_norm": 1.28125, "learning_rate": 0.00015259999498718513, "loss": 0.8205, "step": 11890 }, { "epoch": 0.8272287731747191, "grad_norm": 1.1328125, "learning_rate": 0.00015248037481563415, "loss": 0.7617, "step": 11891 }, { "epoch": 0.8272983408118543, "grad_norm": 1.1484375, "learning_rate": 0.00015236079767685785, "loss": 0.6772, "step": 11892 }, { "epoch": 0.8273679084489896, "grad_norm": 0.77734375, "learning_rate": 0.00015224126357692757, "loss": 0.4945, "step": 11893 }, { "epoch": 0.8274374760861247, "grad_norm": 1.0, "learning_rate": 0.00015212177252191294, "loss": 0.8874, "step": 11894 }, { "epoch": 0.8275070437232599, "grad_norm": 1.2890625, "learning_rate": 0.00015200232451788133, "loss": 0.8229, "step": 11895 }, { "epoch": 0.8275766113603952, "grad_norm": 0.9453125, "learning_rate": 0.00015188291957089718, "loss": 0.6975, "step": 11896 }, { "epoch": 0.8276461789975303, "grad_norm": 1.0390625, "learning_rate": 0.00015176355768702388, "loss": 0.6228, "step": 11897 }, { "epoch": 0.8277157466346655, "grad_norm": 1.140625, "learning_rate": 0.0001516442388723216, "loss": 0.8128, "step": 11898 }, { "epoch": 0.8277853142718008, "grad_norm": 1.3515625, "learning_rate": 0.0001515249631328486, "loss": 1.1627, "step": 11899 }, { "epoch": 0.827854881908936, "grad_norm": 1.2265625, "learning_rate": 0.00015140573047466133, "loss": 0.6686, "step": 11900 }, { "epoch": 0.8279244495460711, "grad_norm": 1.25, "learning_rate": 0.0001512865409038141, "loss": 1.0271, "step": 11901 }, { "epoch": 0.8279940171832064, "grad_norm": 0.96875, "learning_rate": 0.00015116739442635853, "loss": 0.8546, "step": 11902 }, { "epoch": 0.8280635848203416, "grad_norm": 1.3046875, "learning_rate": 0.00015104829104834394, "loss": 0.7779, "step": 11903 }, { "epoch": 0.8281331524574768, "grad_norm": 0.9921875, "learning_rate": 0.0001509292307758181, "loss": 0.7168, "step": 11904 }, { "epoch": 0.8282027200946119, "grad_norm": 1.328125, "learning_rate": 0.00015081021361482662, "loss": 0.8175, "step": 11905 }, { "epoch": 0.8282722877317472, "grad_norm": 1.09375, "learning_rate": 0.00015069123957141219, "loss": 0.8798, "step": 11906 }, { "epoch": 0.8283418553688824, "grad_norm": 1.171875, "learning_rate": 0.00015057230865161552, "loss": 1.0727, "step": 11907 }, { "epoch": 0.8284114230060176, "grad_norm": 1.203125, "learning_rate": 0.00015045342086147562, "loss": 1.0258, "step": 11908 }, { "epoch": 0.8284809906431528, "grad_norm": 1.0, "learning_rate": 0.00015033457620702918, "loss": 0.9374, "step": 11909 }, { "epoch": 0.828550558280288, "grad_norm": 1.2890625, "learning_rate": 0.00015021577469431037, "loss": 0.7597, "step": 11910 }, { "epoch": 0.8286201259174232, "grad_norm": 1.46875, "learning_rate": 0.00015009701632935103, "loss": 0.9634, "step": 11911 }, { "epoch": 0.8286896935545585, "grad_norm": 1.0390625, "learning_rate": 0.00014997830111818133, "loss": 0.9563, "step": 11912 }, { "epoch": 0.8287592611916936, "grad_norm": 1.5859375, "learning_rate": 0.00014985962906682938, "loss": 0.5603, "step": 11913 }, { "epoch": 0.8288288288288288, "grad_norm": 1.140625, "learning_rate": 0.00014974100018132018, "loss": 0.818, "step": 11914 }, { "epoch": 0.8288983964659641, "grad_norm": 1.203125, "learning_rate": 0.00014962241446767765, "loss": 0.9945, "step": 11915 }, { "epoch": 0.8289679641030993, "grad_norm": 1.1796875, "learning_rate": 0.0001495038719319226, "loss": 0.8931, "step": 11916 }, { "epoch": 0.8290375317402344, "grad_norm": 1.2109375, "learning_rate": 0.0001493853725800739, "loss": 0.7958, "step": 11917 }, { "epoch": 0.8291070993773696, "grad_norm": 1.0234375, "learning_rate": 0.0001492669164181486, "loss": 0.9674, "step": 11918 }, { "epoch": 0.8291766670145049, "grad_norm": 1.0859375, "learning_rate": 0.00014914850345216146, "loss": 0.8706, "step": 11919 }, { "epoch": 0.82924623465164, "grad_norm": 1.2109375, "learning_rate": 0.00014903013368812478, "loss": 0.7766, "step": 11920 }, { "epoch": 0.8293158022887752, "grad_norm": 1.171875, "learning_rate": 0.00014891180713204845, "loss": 1.0476, "step": 11921 }, { "epoch": 0.8293853699259105, "grad_norm": 1.34375, "learning_rate": 0.0001487935237899407, "loss": 0.9776, "step": 11922 }, { "epoch": 0.8294549375630457, "grad_norm": 1.375, "learning_rate": 0.0001486752836678077, "loss": 0.7633, "step": 11923 }, { "epoch": 0.8295245052001808, "grad_norm": 1.015625, "learning_rate": 0.0001485570867716528, "loss": 0.9992, "step": 11924 }, { "epoch": 0.8295940728373161, "grad_norm": 0.921875, "learning_rate": 0.00014843893310747714, "loss": 0.8413, "step": 11925 }, { "epoch": 0.8296636404744513, "grad_norm": 1.015625, "learning_rate": 0.00014832082268128032, "loss": 0.8898, "step": 11926 }, { "epoch": 0.8297332081115865, "grad_norm": 1.2578125, "learning_rate": 0.00014820275549905958, "loss": 0.7373, "step": 11927 }, { "epoch": 0.8298027757487217, "grad_norm": 1.1484375, "learning_rate": 0.00014808473156680934, "loss": 0.8684, "step": 11928 }, { "epoch": 0.8298723433858569, "grad_norm": 1.546875, "learning_rate": 0.0001479667508905227, "loss": 1.0323, "step": 11929 }, { "epoch": 0.8299419110229921, "grad_norm": 1.1015625, "learning_rate": 0.00014784881347618985, "loss": 1.0172, "step": 11930 }, { "epoch": 0.8300114786601273, "grad_norm": 0.94140625, "learning_rate": 0.00014773091932979886, "loss": 0.8575, "step": 11931 }, { "epoch": 0.8300810462972625, "grad_norm": 1.1171875, "learning_rate": 0.00014761306845733602, "loss": 0.7712, "step": 11932 }, { "epoch": 0.8301506139343977, "grad_norm": 1.6328125, "learning_rate": 0.00014749526086478538, "loss": 1.0773, "step": 11933 }, { "epoch": 0.8302201815715329, "grad_norm": 1.3046875, "learning_rate": 0.0001473774965581286, "loss": 0.836, "step": 11934 }, { "epoch": 0.8302897492086682, "grad_norm": 1.125, "learning_rate": 0.0001472597755433447, "loss": 0.9085, "step": 11935 }, { "epoch": 0.8303593168458033, "grad_norm": 1.171875, "learning_rate": 0.0001471420978264112, "loss": 1.0149, "step": 11936 }, { "epoch": 0.8304288844829385, "grad_norm": 1.0390625, "learning_rate": 0.00014702446341330355, "loss": 0.6521, "step": 11937 }, { "epoch": 0.8304984521200738, "grad_norm": 0.875, "learning_rate": 0.00014690687230999434, "loss": 0.6956, "step": 11938 }, { "epoch": 0.830568019757209, "grad_norm": 1.0859375, "learning_rate": 0.00014678932452245397, "loss": 0.8258, "step": 11939 }, { "epoch": 0.8306375873943441, "grad_norm": 1.109375, "learning_rate": 0.00014667182005665124, "loss": 0.6902, "step": 11940 }, { "epoch": 0.8307071550314794, "grad_norm": 1.3046875, "learning_rate": 0.00014655435891855261, "loss": 0.9419, "step": 11941 }, { "epoch": 0.8307767226686146, "grad_norm": 1.0546875, "learning_rate": 0.00014643694111412175, "loss": 0.5936, "step": 11942 }, { "epoch": 0.8308462903057497, "grad_norm": 1.2890625, "learning_rate": 0.000146319566649321, "loss": 1.076, "step": 11943 }, { "epoch": 0.8309158579428849, "grad_norm": 1.1484375, "learning_rate": 0.00014620223553010947, "loss": 0.9286, "step": 11944 }, { "epoch": 0.8309854255800202, "grad_norm": 1.140625, "learning_rate": 0.00014608494776244529, "loss": 0.7935, "step": 11945 }, { "epoch": 0.8310549932171554, "grad_norm": 1.0859375, "learning_rate": 0.00014596770335228315, "loss": 0.9173, "step": 11946 }, { "epoch": 0.8311245608542905, "grad_norm": 1.1328125, "learning_rate": 0.0001458505023055765, "loss": 0.8054, "step": 11947 }, { "epoch": 0.8311941284914258, "grad_norm": 1.1796875, "learning_rate": 0.00014573334462827624, "loss": 0.8516, "step": 11948 }, { "epoch": 0.831263696128561, "grad_norm": 1.1796875, "learning_rate": 0.00014561623032633065, "loss": 1.006, "step": 11949 }, { "epoch": 0.8313332637656962, "grad_norm": 0.8828125, "learning_rate": 0.00014549915940568648, "loss": 0.5378, "step": 11950 }, { "epoch": 0.8314028314028314, "grad_norm": 1.1640625, "learning_rate": 0.0001453821318722882, "loss": 0.8296, "step": 11951 }, { "epoch": 0.8314723990399666, "grad_norm": 1.1640625, "learning_rate": 0.00014526514773207776, "loss": 0.8133, "step": 11952 }, { "epoch": 0.8315419666771018, "grad_norm": 0.9296875, "learning_rate": 0.00014514820699099463, "loss": 0.7953, "step": 11953 }, { "epoch": 0.8316115343142371, "grad_norm": 1.171875, "learning_rate": 0.0001450313096549768, "loss": 0.8305, "step": 11954 }, { "epoch": 0.8316811019513722, "grad_norm": 1.25, "learning_rate": 0.00014491445572995988, "loss": 0.968, "step": 11955 }, { "epoch": 0.8317506695885074, "grad_norm": 1.0, "learning_rate": 0.00014479764522187677, "loss": 0.6758, "step": 11956 }, { "epoch": 0.8318202372256426, "grad_norm": 1.3125, "learning_rate": 0.00014468087813665888, "loss": 0.7468, "step": 11957 }, { "epoch": 0.8318898048627779, "grad_norm": 1.1953125, "learning_rate": 0.00014456415448023464, "loss": 0.9035, "step": 11958 }, { "epoch": 0.831959372499913, "grad_norm": 1.09375, "learning_rate": 0.00014444747425853123, "loss": 0.6326, "step": 11959 }, { "epoch": 0.8320289401370482, "grad_norm": 0.69921875, "learning_rate": 0.00014433083747747243, "loss": 0.4847, "step": 11960 }, { "epoch": 0.8320985077741835, "grad_norm": 1.15625, "learning_rate": 0.00014421424414298113, "loss": 0.8069, "step": 11961 }, { "epoch": 0.8321680754113187, "grad_norm": 1.078125, "learning_rate": 0.00014409769426097695, "loss": 0.7904, "step": 11962 }, { "epoch": 0.8322376430484538, "grad_norm": 1.125, "learning_rate": 0.00014398118783737746, "loss": 0.8436, "step": 11963 }, { "epoch": 0.8323072106855891, "grad_norm": 1.1953125, "learning_rate": 0.00014386472487809898, "loss": 0.6022, "step": 11964 }, { "epoch": 0.8323767783227243, "grad_norm": 1.2109375, "learning_rate": 0.00014374830538905448, "loss": 0.9282, "step": 11965 }, { "epoch": 0.8324463459598594, "grad_norm": 1.453125, "learning_rate": 0.0001436319293761552, "loss": 0.7943, "step": 11966 }, { "epoch": 0.8325159135969947, "grad_norm": 0.8515625, "learning_rate": 0.00014351559684531, "loss": 0.5992, "step": 11967 }, { "epoch": 0.8325854812341299, "grad_norm": 1.1484375, "learning_rate": 0.00014339930780242572, "loss": 0.7814, "step": 11968 }, { "epoch": 0.8326550488712651, "grad_norm": 1.03125, "learning_rate": 0.00014328306225340725, "loss": 0.823, "step": 11969 }, { "epoch": 0.8327246165084002, "grad_norm": 1.1796875, "learning_rate": 0.00014316686020415649, "loss": 0.8234, "step": 11970 }, { "epoch": 0.8327941841455355, "grad_norm": 1.0, "learning_rate": 0.0001430507016605741, "loss": 0.554, "step": 11971 }, { "epoch": 0.8328637517826707, "grad_norm": 0.88671875, "learning_rate": 0.00014293458662855741, "loss": 0.6621, "step": 11972 }, { "epoch": 0.8329333194198059, "grad_norm": 0.97265625, "learning_rate": 0.0001428185151140028, "loss": 0.6714, "step": 11973 }, { "epoch": 0.8330028870569411, "grad_norm": 0.84765625, "learning_rate": 0.0001427024871228031, "loss": 0.5449, "step": 11974 }, { "epoch": 0.8330724546940763, "grad_norm": 1.1953125, "learning_rate": 0.00014258650266085038, "loss": 1.0034, "step": 11975 }, { "epoch": 0.8331420223312115, "grad_norm": 1.203125, "learning_rate": 0.00014247056173403305, "loss": 0.8965, "step": 11976 }, { "epoch": 0.8332115899683468, "grad_norm": 1.171875, "learning_rate": 0.0001423546643482384, "loss": 0.8702, "step": 11977 }, { "epoch": 0.8332811576054819, "grad_norm": 1.1015625, "learning_rate": 0.00014223881050935117, "loss": 0.7934, "step": 11978 }, { "epoch": 0.8333507252426171, "grad_norm": 1.0859375, "learning_rate": 0.00014212300022325376, "loss": 0.9499, "step": 11979 }, { "epoch": 0.8334202928797524, "grad_norm": 1.1484375, "learning_rate": 0.00014200723349582644, "loss": 1.0192, "step": 11980 }, { "epoch": 0.8334898605168876, "grad_norm": 1.0390625, "learning_rate": 0.00014189151033294688, "loss": 0.7489, "step": 11981 }, { "epoch": 0.8335594281540227, "grad_norm": 0.84765625, "learning_rate": 0.00014177583074049128, "loss": 0.6674, "step": 11982 }, { "epoch": 0.8336289957911579, "grad_norm": 1.0546875, "learning_rate": 0.00014166019472433344, "loss": 0.7599, "step": 11983 }, { "epoch": 0.8336985634282932, "grad_norm": 1.1640625, "learning_rate": 0.00014154460229034427, "loss": 0.7389, "step": 11984 }, { "epoch": 0.8337681310654284, "grad_norm": 1.4296875, "learning_rate": 0.0001414290534443936, "loss": 0.8186, "step": 11985 }, { "epoch": 0.8338376987025635, "grad_norm": 0.88671875, "learning_rate": 0.00014131354819234775, "loss": 0.7043, "step": 11986 }, { "epoch": 0.8339072663396988, "grad_norm": 1.1015625, "learning_rate": 0.00014119808654007216, "loss": 0.965, "step": 11987 }, { "epoch": 0.833976833976834, "grad_norm": 1.0703125, "learning_rate": 0.00014108266849342877, "loss": 0.7556, "step": 11988 }, { "epoch": 0.8340464016139691, "grad_norm": 1.2734375, "learning_rate": 0.00014096729405827847, "loss": 0.8747, "step": 11989 }, { "epoch": 0.8341159692511044, "grad_norm": 1.234375, "learning_rate": 0.00014085196324047878, "loss": 0.7994, "step": 11990 }, { "epoch": 0.8341855368882396, "grad_norm": 1.0625, "learning_rate": 0.00014073667604588635, "loss": 0.645, "step": 11991 }, { "epoch": 0.8342551045253748, "grad_norm": 1.1171875, "learning_rate": 0.0001406214324803542, "loss": 0.8727, "step": 11992 }, { "epoch": 0.83432467216251, "grad_norm": 0.96484375, "learning_rate": 0.0001405062325497344, "loss": 0.6552, "step": 11993 }, { "epoch": 0.8343942397996452, "grad_norm": 0.8671875, "learning_rate": 0.0001403910762598758, "loss": 0.5523, "step": 11994 }, { "epoch": 0.8344638074367804, "grad_norm": 0.99609375, "learning_rate": 0.0001402759636166253, "loss": 0.7422, "step": 11995 }, { "epoch": 0.8345333750739156, "grad_norm": 1.0546875, "learning_rate": 0.00014016089462582837, "loss": 0.9296, "step": 11996 }, { "epoch": 0.8346029427110508, "grad_norm": 1.2890625, "learning_rate": 0.00014004586929332742, "loss": 0.887, "step": 11997 }, { "epoch": 0.834672510348186, "grad_norm": 1.0703125, "learning_rate": 0.00013993088762496265, "loss": 0.8131, "step": 11998 }, { "epoch": 0.8347420779853212, "grad_norm": 0.78515625, "learning_rate": 0.00013981594962657218, "loss": 0.4077, "step": 11999 }, { "epoch": 0.8348116456224565, "grad_norm": 1.4453125, "learning_rate": 0.00013970105530399212, "loss": 1.0036, "step": 12000 }, { "epoch": 0.8348812132595916, "grad_norm": 1.140625, "learning_rate": 0.0001395862046630564, "loss": 0.7612, "step": 12001 }, { "epoch": 0.8349507808967268, "grad_norm": 1.203125, "learning_rate": 0.00013947139770959627, "loss": 0.5829, "step": 12002 }, { "epoch": 0.8350203485338621, "grad_norm": 1.1640625, "learning_rate": 0.00013935663444944135, "loss": 0.8279, "step": 12003 }, { "epoch": 0.8350899161709973, "grad_norm": 1.203125, "learning_rate": 0.0001392419148884183, "loss": 0.9097, "step": 12004 }, { "epoch": 0.8351594838081324, "grad_norm": 1.15625, "learning_rate": 0.00013912723903235257, "loss": 0.65, "step": 12005 }, { "epoch": 0.8352290514452677, "grad_norm": 1.0859375, "learning_rate": 0.0001390126068870663, "loss": 0.7578, "step": 12006 }, { "epoch": 0.8352986190824029, "grad_norm": 0.97265625, "learning_rate": 0.00013889801845838034, "loss": 0.7598, "step": 12007 }, { "epoch": 0.835368186719538, "grad_norm": 1.25, "learning_rate": 0.00013878347375211253, "loss": 0.863, "step": 12008 }, { "epoch": 0.8354377543566732, "grad_norm": 0.7890625, "learning_rate": 0.00013866897277407908, "loss": 0.5281, "step": 12009 }, { "epoch": 0.8355073219938085, "grad_norm": 1.4296875, "learning_rate": 0.00013855451553009392, "loss": 1.1497, "step": 12010 }, { "epoch": 0.8355768896309437, "grad_norm": 0.98828125, "learning_rate": 0.00013844010202596847, "loss": 0.7283, "step": 12011 }, { "epoch": 0.8356464572680788, "grad_norm": 0.9921875, "learning_rate": 0.0001383257322675121, "loss": 0.8684, "step": 12012 }, { "epoch": 0.8357160249052141, "grad_norm": 1.46875, "learning_rate": 0.00013821140626053163, "loss": 0.9184, "step": 12013 }, { "epoch": 0.8357855925423493, "grad_norm": 1.03125, "learning_rate": 0.00013809712401083229, "loss": 0.9563, "step": 12014 }, { "epoch": 0.8358551601794845, "grad_norm": 1.2734375, "learning_rate": 0.0001379828855242168, "loss": 1.0051, "step": 12015 }, { "epoch": 0.8359247278166198, "grad_norm": 0.80859375, "learning_rate": 0.00013786869080648534, "loss": 0.5833, "step": 12016 }, { "epoch": 0.8359942954537549, "grad_norm": 1.09375, "learning_rate": 0.00013775453986343645, "loss": 0.7164, "step": 12017 }, { "epoch": 0.8360638630908901, "grad_norm": 1.0625, "learning_rate": 0.0001376404327008659, "loss": 0.8782, "step": 12018 }, { "epoch": 0.8361334307280254, "grad_norm": 0.8671875, "learning_rate": 0.00013752636932456763, "loss": 0.6167, "step": 12019 }, { "epoch": 0.8362029983651605, "grad_norm": 1.1484375, "learning_rate": 0.000137412349740333, "loss": 0.9512, "step": 12020 }, { "epoch": 0.8362725660022957, "grad_norm": 0.94140625, "learning_rate": 0.00013729837395395173, "loss": 0.8319, "step": 12021 }, { "epoch": 0.8363421336394309, "grad_norm": 1.0390625, "learning_rate": 0.00013718444197121038, "loss": 0.8411, "step": 12022 }, { "epoch": 0.8364117012765662, "grad_norm": 1.234375, "learning_rate": 0.00013707055379789425, "loss": 1.0388, "step": 12023 }, { "epoch": 0.8364812689137013, "grad_norm": 1.140625, "learning_rate": 0.000136956709439786, "loss": 0.7488, "step": 12024 }, { "epoch": 0.8365508365508365, "grad_norm": 1.0234375, "learning_rate": 0.00013684290890266605, "loss": 0.9733, "step": 12025 }, { "epoch": 0.8366204041879718, "grad_norm": 1.140625, "learning_rate": 0.00013672915219231264, "loss": 0.8369, "step": 12026 }, { "epoch": 0.836689971825107, "grad_norm": 1.0859375, "learning_rate": 0.00013661543931450115, "loss": 0.8344, "step": 12027 }, { "epoch": 0.8367595394622421, "grad_norm": 0.97265625, "learning_rate": 0.00013650177027500632, "loss": 0.9055, "step": 12028 }, { "epoch": 0.8368291070993774, "grad_norm": 1.0625, "learning_rate": 0.0001363881450795993, "loss": 0.6351, "step": 12029 }, { "epoch": 0.8368986747365126, "grad_norm": 1.1484375, "learning_rate": 0.000136274563734049, "loss": 0.8663, "step": 12030 }, { "epoch": 0.8369682423736478, "grad_norm": 0.9375, "learning_rate": 0.00013616102624412318, "loss": 0.6302, "step": 12031 }, { "epoch": 0.837037810010783, "grad_norm": 1.5, "learning_rate": 0.0001360475326155861, "loss": 0.8401, "step": 12032 }, { "epoch": 0.8371073776479182, "grad_norm": 0.9140625, "learning_rate": 0.00013593408285420095, "loss": 0.7076, "step": 12033 }, { "epoch": 0.8371769452850534, "grad_norm": 1.328125, "learning_rate": 0.00013582067696572752, "loss": 1.0298, "step": 12034 }, { "epoch": 0.8372465129221885, "grad_norm": 0.96875, "learning_rate": 0.0001357073149559247, "loss": 0.6113, "step": 12035 }, { "epoch": 0.8373160805593238, "grad_norm": 1.03125, "learning_rate": 0.00013559399683054773, "loss": 0.8717, "step": 12036 }, { "epoch": 0.837385648196459, "grad_norm": 1.125, "learning_rate": 0.0001354807225953507, "loss": 0.7921, "step": 12037 }, { "epoch": 0.8374552158335942, "grad_norm": 1.1875, "learning_rate": 0.00013536749225608535, "loss": 0.7053, "step": 12038 }, { "epoch": 0.8375247834707295, "grad_norm": 0.98828125, "learning_rate": 0.0001352543058185006, "loss": 0.8521, "step": 12039 }, { "epoch": 0.8375943511078646, "grad_norm": 1.359375, "learning_rate": 0.00013514116328834348, "loss": 0.7449, "step": 12040 }, { "epoch": 0.8376639187449998, "grad_norm": 1.1953125, "learning_rate": 0.00013502806467135874, "loss": 0.9089, "step": 12041 }, { "epoch": 0.8377334863821351, "grad_norm": 1.109375, "learning_rate": 0.0001349150099732893, "loss": 0.6579, "step": 12042 }, { "epoch": 0.8378030540192702, "grad_norm": 1.4921875, "learning_rate": 0.00013480199919987536, "loss": 0.9044, "step": 12043 }, { "epoch": 0.8378726216564054, "grad_norm": 1.2109375, "learning_rate": 0.0001346890323568548, "loss": 0.7812, "step": 12044 }, { "epoch": 0.8379421892935407, "grad_norm": 1.515625, "learning_rate": 0.0001345761094499638, "loss": 0.8827, "step": 12045 }, { "epoch": 0.8380117569306759, "grad_norm": 1.0234375, "learning_rate": 0.0001344632304849358, "loss": 1.0183, "step": 12046 }, { "epoch": 0.838081324567811, "grad_norm": 1.1484375, "learning_rate": 0.0001343503954675025, "loss": 0.9414, "step": 12047 }, { "epoch": 0.8381508922049462, "grad_norm": 1.1640625, "learning_rate": 0.00013423760440339262, "loss": 0.6596, "step": 12048 }, { "epoch": 0.8382204598420815, "grad_norm": 1.03125, "learning_rate": 0.00013412485729833367, "loss": 0.6656, "step": 12049 }, { "epoch": 0.8382900274792167, "grad_norm": 1.0703125, "learning_rate": 0.00013401215415805002, "loss": 0.8514, "step": 12050 }, { "epoch": 0.8383595951163518, "grad_norm": 1.2265625, "learning_rate": 0.00013389949498826415, "loss": 0.8535, "step": 12051 }, { "epoch": 0.8384291627534871, "grad_norm": 1.1171875, "learning_rate": 0.00013378687979469684, "loss": 0.5699, "step": 12052 }, { "epoch": 0.8384987303906223, "grad_norm": 1.4765625, "learning_rate": 0.00013367430858306562, "loss": 0.9435, "step": 12053 }, { "epoch": 0.8385682980277575, "grad_norm": 0.9140625, "learning_rate": 0.00013356178135908613, "loss": 0.5726, "step": 12054 }, { "epoch": 0.8386378656648927, "grad_norm": 1.078125, "learning_rate": 0.0001334492981284723, "loss": 0.7919, "step": 12055 }, { "epoch": 0.8387074333020279, "grad_norm": 1.015625, "learning_rate": 0.00013333685889693557, "loss": 0.5164, "step": 12056 }, { "epoch": 0.8387770009391631, "grad_norm": 1.078125, "learning_rate": 0.0001332244636701848, "loss": 0.5733, "step": 12057 }, { "epoch": 0.8388465685762984, "grad_norm": 1.015625, "learning_rate": 0.00013311211245392674, "loss": 0.6505, "step": 12058 }, { "epoch": 0.8389161362134335, "grad_norm": 1.5, "learning_rate": 0.00013299980525386613, "loss": 0.8734, "step": 12059 }, { "epoch": 0.8389857038505687, "grad_norm": 1.2734375, "learning_rate": 0.00013288754207570563, "loss": 0.9035, "step": 12060 }, { "epoch": 0.8390552714877039, "grad_norm": 0.93359375, "learning_rate": 0.00013277532292514527, "loss": 0.685, "step": 12061 }, { "epoch": 0.8391248391248392, "grad_norm": 1.1484375, "learning_rate": 0.00013266314780788246, "loss": 0.7693, "step": 12062 }, { "epoch": 0.8391944067619743, "grad_norm": 1.28125, "learning_rate": 0.00013255101672961366, "loss": 0.8275, "step": 12063 }, { "epoch": 0.8392639743991095, "grad_norm": 1.5, "learning_rate": 0.00013243892969603177, "loss": 0.8141, "step": 12064 }, { "epoch": 0.8393335420362448, "grad_norm": 1.171875, "learning_rate": 0.00013232688671282832, "loss": 0.8422, "step": 12065 }, { "epoch": 0.8394031096733799, "grad_norm": 1.171875, "learning_rate": 0.000132214887785692, "loss": 0.7412, "step": 12066 }, { "epoch": 0.8394726773105151, "grad_norm": 1.0859375, "learning_rate": 0.00013210293292030995, "loss": 0.9879, "step": 12067 }, { "epoch": 0.8395422449476504, "grad_norm": 1.1953125, "learning_rate": 0.00013199102212236614, "loss": 0.6968, "step": 12068 }, { "epoch": 0.8396118125847856, "grad_norm": 1.0234375, "learning_rate": 0.00013187915539754325, "loss": 0.6176, "step": 12069 }, { "epoch": 0.8396813802219207, "grad_norm": 1.171875, "learning_rate": 0.0001317673327515213, "loss": 0.7005, "step": 12070 }, { "epoch": 0.839750947859056, "grad_norm": 1.28125, "learning_rate": 0.0001316555541899781, "loss": 0.9586, "step": 12071 }, { "epoch": 0.8398205154961912, "grad_norm": 1.234375, "learning_rate": 0.00013154381971858898, "loss": 0.8076, "step": 12072 }, { "epoch": 0.8398900831333264, "grad_norm": 1.3359375, "learning_rate": 0.00013143212934302694, "loss": 0.8819, "step": 12073 }, { "epoch": 0.8399596507704615, "grad_norm": 1.1015625, "learning_rate": 0.00013132048306896394, "loss": 0.6674, "step": 12074 }, { "epoch": 0.8400292184075968, "grad_norm": 1.125, "learning_rate": 0.00013120888090206828, "loss": 0.8355, "step": 12075 }, { "epoch": 0.840098786044732, "grad_norm": 1.1328125, "learning_rate": 0.00013109732284800646, "loss": 0.712, "step": 12076 }, { "epoch": 0.8401683536818672, "grad_norm": 1.2734375, "learning_rate": 0.00013098580891244315, "loss": 0.6425, "step": 12077 }, { "epoch": 0.8402379213190024, "grad_norm": 0.89453125, "learning_rate": 0.00013087433910104006, "loss": 0.6536, "step": 12078 }, { "epoch": 0.8403074889561376, "grad_norm": 1.0390625, "learning_rate": 0.00013076291341945756, "loss": 0.7956, "step": 12079 }, { "epoch": 0.8403770565932728, "grad_norm": 1.3671875, "learning_rate": 0.0001306515318733529, "loss": 0.9083, "step": 12080 }, { "epoch": 0.8404466242304081, "grad_norm": 1.234375, "learning_rate": 0.00013054019446838173, "loss": 1.0065, "step": 12081 }, { "epoch": 0.8405161918675432, "grad_norm": 1.25, "learning_rate": 0.00013042890121019691, "loss": 1.097, "step": 12082 }, { "epoch": 0.8405857595046784, "grad_norm": 1.3828125, "learning_rate": 0.00013031765210444956, "loss": 0.8163, "step": 12083 }, { "epoch": 0.8406553271418137, "grad_norm": 1.09375, "learning_rate": 0.00013020644715678855, "loss": 0.7708, "step": 12084 }, { "epoch": 0.8407248947789489, "grad_norm": 1.171875, "learning_rate": 0.00013009528637285994, "loss": 0.6088, "step": 12085 }, { "epoch": 0.840794462416084, "grad_norm": 1.03125, "learning_rate": 0.00012998416975830795, "loss": 0.8125, "step": 12086 }, { "epoch": 0.8408640300532192, "grad_norm": 1.0078125, "learning_rate": 0.0001298730973187745, "loss": 0.7017, "step": 12087 }, { "epoch": 0.8409335976903545, "grad_norm": 1.109375, "learning_rate": 0.00012976206905989973, "loss": 0.7749, "step": 12088 }, { "epoch": 0.8410031653274896, "grad_norm": 1.5234375, "learning_rate": 0.0001296510849873207, "loss": 1.0097, "step": 12089 }, { "epoch": 0.8410727329646248, "grad_norm": 1.09375, "learning_rate": 0.00012954014510667246, "loss": 0.7875, "step": 12090 }, { "epoch": 0.8411423006017601, "grad_norm": 1.0703125, "learning_rate": 0.00012942924942358825, "loss": 0.811, "step": 12091 }, { "epoch": 0.8412118682388953, "grad_norm": 1.15625, "learning_rate": 0.00012931839794369892, "loss": 0.7561, "step": 12092 }, { "epoch": 0.8412814358760304, "grad_norm": 1.109375, "learning_rate": 0.00012920759067263287, "loss": 0.7486, "step": 12093 }, { "epoch": 0.8413510035131657, "grad_norm": 1.4375, "learning_rate": 0.00012909682761601604, "loss": 0.9831, "step": 12094 }, { "epoch": 0.8414205711503009, "grad_norm": 1.1328125, "learning_rate": 0.0001289861087794727, "loss": 0.9107, "step": 12095 }, { "epoch": 0.8414901387874361, "grad_norm": 1.1875, "learning_rate": 0.00012887543416862445, "loss": 0.8535, "step": 12096 }, { "epoch": 0.8415597064245713, "grad_norm": 1.03125, "learning_rate": 0.00012876480378909083, "loss": 0.8429, "step": 12097 }, { "epoch": 0.8416292740617065, "grad_norm": 1.078125, "learning_rate": 0.0001286542176464892, "loss": 0.56, "step": 12098 }, { "epoch": 0.8416988416988417, "grad_norm": 0.98046875, "learning_rate": 0.00012854367574643467, "loss": 0.8433, "step": 12099 }, { "epoch": 0.8417684093359769, "grad_norm": 1.0, "learning_rate": 0.00012843317809453959, "loss": 0.958, "step": 12100 }, { "epoch": 0.8418379769731121, "grad_norm": 0.9921875, "learning_rate": 0.00012832272469641458, "loss": 0.8084, "step": 12101 }, { "epoch": 0.8419075446102473, "grad_norm": 0.94921875, "learning_rate": 0.00012821231555766832, "loss": 0.6842, "step": 12102 }, { "epoch": 0.8419771122473825, "grad_norm": 1.546875, "learning_rate": 0.0001281019506839065, "loss": 1.1108, "step": 12103 }, { "epoch": 0.8420466798845178, "grad_norm": 1.3515625, "learning_rate": 0.00012799163008073278, "loss": 0.794, "step": 12104 }, { "epoch": 0.8421162475216529, "grad_norm": 1.0703125, "learning_rate": 0.0001278813537537489, "loss": 0.732, "step": 12105 }, { "epoch": 0.8421858151587881, "grad_norm": 0.87890625, "learning_rate": 0.0001277711217085541, "loss": 0.544, "step": 12106 }, { "epoch": 0.8422553827959234, "grad_norm": 1.015625, "learning_rate": 0.00012766093395074552, "loss": 0.7447, "step": 12107 }, { "epoch": 0.8423249504330586, "grad_norm": 1.015625, "learning_rate": 0.00012755079048591756, "loss": 0.7633, "step": 12108 }, { "epoch": 0.8423945180701937, "grad_norm": 1.15625, "learning_rate": 0.00012744069131966318, "loss": 0.934, "step": 12109 }, { "epoch": 0.842464085707329, "grad_norm": 1.2734375, "learning_rate": 0.00012733063645757226, "loss": 0.8014, "step": 12110 }, { "epoch": 0.8425336533444642, "grad_norm": 1.203125, "learning_rate": 0.000127220625905233, "loss": 0.7666, "step": 12111 }, { "epoch": 0.8426032209815993, "grad_norm": 1.203125, "learning_rate": 0.00012711065966823155, "loss": 1.0765, "step": 12112 }, { "epoch": 0.8426727886187345, "grad_norm": 1.28125, "learning_rate": 0.00012700073775215093, "loss": 0.6896, "step": 12113 }, { "epoch": 0.8427423562558698, "grad_norm": 1.046875, "learning_rate": 0.00012689086016257257, "loss": 0.8598, "step": 12114 }, { "epoch": 0.842811923893005, "grad_norm": 1.453125, "learning_rate": 0.00012678102690507544, "loss": 0.9347, "step": 12115 }, { "epoch": 0.8428814915301401, "grad_norm": 0.97265625, "learning_rate": 0.0001266712379852367, "loss": 0.8, "step": 12116 }, { "epoch": 0.8429510591672754, "grad_norm": 1.40625, "learning_rate": 0.00012656149340863055, "loss": 0.9972, "step": 12117 }, { "epoch": 0.8430206268044106, "grad_norm": 0.98046875, "learning_rate": 0.00012645179318082912, "loss": 0.768, "step": 12118 }, { "epoch": 0.8430901944415458, "grad_norm": 1.203125, "learning_rate": 0.00012634213730740253, "loss": 0.8108, "step": 12119 }, { "epoch": 0.843159762078681, "grad_norm": 1.171875, "learning_rate": 0.00012623252579391898, "loss": 0.8261, "step": 12120 }, { "epoch": 0.8432293297158162, "grad_norm": 1.1171875, "learning_rate": 0.00012612295864594358, "loss": 0.6399, "step": 12121 }, { "epoch": 0.8432988973529514, "grad_norm": 1.109375, "learning_rate": 0.00012601343586903947, "loss": 0.9218, "step": 12122 }, { "epoch": 0.8433684649900867, "grad_norm": 1.1328125, "learning_rate": 0.00012590395746876802, "loss": 0.6682, "step": 12123 }, { "epoch": 0.8434380326272218, "grad_norm": 1.1953125, "learning_rate": 0.00012579452345068775, "loss": 0.7739, "step": 12124 }, { "epoch": 0.843507600264357, "grad_norm": 1.40625, "learning_rate": 0.0001256851338203552, "loss": 0.966, "step": 12125 }, { "epoch": 0.8435771679014922, "grad_norm": 1.140625, "learning_rate": 0.00012557578858332486, "loss": 0.6529, "step": 12126 }, { "epoch": 0.8436467355386275, "grad_norm": 1.046875, "learning_rate": 0.00012546648774514868, "loss": 0.7381, "step": 12127 }, { "epoch": 0.8437163031757626, "grad_norm": 1.2890625, "learning_rate": 0.00012535723131137588, "loss": 0.7895, "step": 12128 }, { "epoch": 0.8437858708128978, "grad_norm": 1.3671875, "learning_rate": 0.00012524801928755447, "loss": 0.9043, "step": 12129 }, { "epoch": 0.8438554384500331, "grad_norm": 1.140625, "learning_rate": 0.00012513885167922978, "loss": 0.8401, "step": 12130 }, { "epoch": 0.8439250060871683, "grad_norm": 1.046875, "learning_rate": 0.0001250297284919445, "loss": 0.6916, "step": 12131 }, { "epoch": 0.8439945737243034, "grad_norm": 0.9296875, "learning_rate": 0.0001249206497312393, "loss": 0.808, "step": 12132 }, { "epoch": 0.8440641413614387, "grad_norm": 1.1796875, "learning_rate": 0.00012481161540265273, "loss": 0.8094, "step": 12133 }, { "epoch": 0.8441337089985739, "grad_norm": 0.89453125, "learning_rate": 0.0001247026255117213, "loss": 0.8593, "step": 12134 }, { "epoch": 0.844203276635709, "grad_norm": 0.97265625, "learning_rate": 0.00012459368006397865, "loss": 0.4995, "step": 12135 }, { "epoch": 0.8442728442728443, "grad_norm": 1.28125, "learning_rate": 0.0001244847790649565, "loss": 0.8877, "step": 12136 }, { "epoch": 0.8443424119099795, "grad_norm": 1.0546875, "learning_rate": 0.00012437592252018416, "loss": 0.8176, "step": 12137 }, { "epoch": 0.8444119795471147, "grad_norm": 1.0859375, "learning_rate": 0.00012426711043518924, "loss": 0.6735, "step": 12138 }, { "epoch": 0.8444815471842498, "grad_norm": 1.15625, "learning_rate": 0.0001241583428154963, "loss": 0.8597, "step": 12139 }, { "epoch": 0.8445511148213851, "grad_norm": 0.87109375, "learning_rate": 0.0001240496196666283, "loss": 0.8347, "step": 12140 }, { "epoch": 0.8446206824585203, "grad_norm": 1.0, "learning_rate": 0.0001239409409941056, "loss": 0.7573, "step": 12141 }, { "epoch": 0.8446902500956555, "grad_norm": 1.3984375, "learning_rate": 0.00012383230680344592, "loss": 0.886, "step": 12142 }, { "epoch": 0.8447598177327907, "grad_norm": 1.359375, "learning_rate": 0.0001237237171001655, "loss": 0.9381, "step": 12143 }, { "epoch": 0.8448293853699259, "grad_norm": 1.4765625, "learning_rate": 0.00012361517188977822, "loss": 0.957, "step": 12144 }, { "epoch": 0.8448989530070611, "grad_norm": 1.0078125, "learning_rate": 0.00012350667117779512, "loss": 0.8213, "step": 12145 }, { "epoch": 0.8449685206441964, "grad_norm": 1.609375, "learning_rate": 0.00012339821496972536, "loss": 0.7806, "step": 12146 }, { "epoch": 0.8450380882813315, "grad_norm": 1.203125, "learning_rate": 0.00012328980327107575, "loss": 0.972, "step": 12147 }, { "epoch": 0.8451076559184667, "grad_norm": 0.8984375, "learning_rate": 0.0001231814360873511, "loss": 0.7905, "step": 12148 }, { "epoch": 0.845177223555602, "grad_norm": 0.80078125, "learning_rate": 0.0001230731134240538, "loss": 0.7074, "step": 12149 }, { "epoch": 0.8452467911927372, "grad_norm": 1.28125, "learning_rate": 0.00012296483528668345, "loss": 0.8852, "step": 12150 }, { "epoch": 0.8453163588298723, "grad_norm": 1.1953125, "learning_rate": 0.0001228566016807382, "loss": 0.7658, "step": 12151 }, { "epoch": 0.8453859264670075, "grad_norm": 0.91015625, "learning_rate": 0.00012274841261171376, "loss": 0.588, "step": 12152 }, { "epoch": 0.8454554941041428, "grad_norm": 1.2890625, "learning_rate": 0.0001226402680851033, "loss": 0.9275, "step": 12153 }, { "epoch": 0.845525061741278, "grad_norm": 1.171875, "learning_rate": 0.00012253216810639755, "loss": 0.8288, "step": 12154 }, { "epoch": 0.8455946293784131, "grad_norm": 1.421875, "learning_rate": 0.00012242411268108578, "loss": 0.9221, "step": 12155 }, { "epoch": 0.8456641970155484, "grad_norm": 1.03125, "learning_rate": 0.00012231610181465415, "loss": 0.8036, "step": 12156 }, { "epoch": 0.8457337646526836, "grad_norm": 1.125, "learning_rate": 0.0001222081355125868, "loss": 0.5976, "step": 12157 }, { "epoch": 0.8458033322898187, "grad_norm": 1.1484375, "learning_rate": 0.00012210021378036628, "loss": 0.7004, "step": 12158 }, { "epoch": 0.845872899926954, "grad_norm": 1.140625, "learning_rate": 0.00012199233662347198, "loss": 0.7648, "step": 12159 }, { "epoch": 0.8459424675640892, "grad_norm": 1.0859375, "learning_rate": 0.00012188450404738105, "loss": 0.7067, "step": 12160 }, { "epoch": 0.8460120352012244, "grad_norm": 1.1328125, "learning_rate": 0.00012177671605756901, "loss": 0.7604, "step": 12161 }, { "epoch": 0.8460816028383596, "grad_norm": 1.1171875, "learning_rate": 0.00012166897265950894, "loss": 0.886, "step": 12162 }, { "epoch": 0.8461511704754948, "grad_norm": 0.86328125, "learning_rate": 0.00012156127385867144, "loss": 0.542, "step": 12163 }, { "epoch": 0.84622073811263, "grad_norm": 1.0703125, "learning_rate": 0.00012145361966052449, "loss": 0.6837, "step": 12164 }, { "epoch": 0.8462903057497652, "grad_norm": 1.15625, "learning_rate": 0.00012134601007053447, "loss": 0.7801, "step": 12165 }, { "epoch": 0.8463598733869004, "grad_norm": 0.7890625, "learning_rate": 0.00012123844509416559, "loss": 0.4521, "step": 12166 }, { "epoch": 0.8464294410240356, "grad_norm": 1.4921875, "learning_rate": 0.00012113092473687914, "loss": 1.0481, "step": 12167 }, { "epoch": 0.8464990086611708, "grad_norm": 1.140625, "learning_rate": 0.00012102344900413442, "loss": 1.0554, "step": 12168 }, { "epoch": 0.8465685762983061, "grad_norm": 0.9140625, "learning_rate": 0.00012091601790138851, "loss": 0.7972, "step": 12169 }, { "epoch": 0.8466381439354412, "grad_norm": 1.125, "learning_rate": 0.00012080863143409648, "loss": 0.7867, "step": 12170 }, { "epoch": 0.8467077115725764, "grad_norm": 1.03125, "learning_rate": 0.00012070128960771043, "loss": 0.7568, "step": 12171 }, { "epoch": 0.8467772792097117, "grad_norm": 1.140625, "learning_rate": 0.00012059399242768122, "loss": 0.6988, "step": 12172 }, { "epoch": 0.8468468468468469, "grad_norm": 0.9921875, "learning_rate": 0.00012048673989945657, "loss": 0.6091, "step": 12173 }, { "epoch": 0.846916414483982, "grad_norm": 1.203125, "learning_rate": 0.00012037953202848184, "loss": 0.5821, "step": 12174 }, { "epoch": 0.8469859821211173, "grad_norm": 0.9765625, "learning_rate": 0.00012027236882020099, "loss": 0.829, "step": 12175 }, { "epoch": 0.8470555497582525, "grad_norm": 1.0, "learning_rate": 0.00012016525028005521, "loss": 0.5804, "step": 12176 }, { "epoch": 0.8471251173953877, "grad_norm": 1.078125, "learning_rate": 0.00012005817641348337, "loss": 0.6997, "step": 12177 }, { "epoch": 0.8471946850325228, "grad_norm": 1.2578125, "learning_rate": 0.00011995114722592193, "loss": 0.8979, "step": 12178 }, { "epoch": 0.8472642526696581, "grad_norm": 1.4609375, "learning_rate": 0.0001198441627228054, "loss": 1.0183, "step": 12179 }, { "epoch": 0.8473338203067933, "grad_norm": 1.2578125, "learning_rate": 0.00011973722290956613, "loss": 0.8141, "step": 12180 }, { "epoch": 0.8474033879439284, "grad_norm": 1.046875, "learning_rate": 0.00011963032779163397, "loss": 0.7746, "step": 12181 }, { "epoch": 0.8474729555810637, "grad_norm": 1.1015625, "learning_rate": 0.00011952347737443603, "loss": 0.8633, "step": 12182 }, { "epoch": 0.8475425232181989, "grad_norm": 0.84375, "learning_rate": 0.00011941667166339809, "loss": 0.6313, "step": 12183 }, { "epoch": 0.8476120908553341, "grad_norm": 1.3203125, "learning_rate": 0.00011930991066394315, "loss": 0.8317, "step": 12184 }, { "epoch": 0.8476816584924693, "grad_norm": 0.97265625, "learning_rate": 0.00011920319438149185, "loss": 0.7958, "step": 12185 }, { "epoch": 0.8477512261296045, "grad_norm": 1.140625, "learning_rate": 0.00011909652282146299, "loss": 0.8036, "step": 12186 }, { "epoch": 0.8478207937667397, "grad_norm": 1.3125, "learning_rate": 0.00011898989598927257, "loss": 0.9245, "step": 12187 }, { "epoch": 0.847890361403875, "grad_norm": 1.0, "learning_rate": 0.00011888331389033447, "loss": 0.9106, "step": 12188 }, { "epoch": 0.8479599290410101, "grad_norm": 0.875, "learning_rate": 0.00011877677653006058, "loss": 0.5841, "step": 12189 }, { "epoch": 0.8480294966781453, "grad_norm": 0.7890625, "learning_rate": 0.00011867028391386037, "loss": 0.4885, "step": 12190 }, { "epoch": 0.8480990643152805, "grad_norm": 1.3046875, "learning_rate": 0.00011856383604714094, "loss": 1.0532, "step": 12191 }, { "epoch": 0.8481686319524158, "grad_norm": 1.0703125, "learning_rate": 0.00011845743293530697, "loss": 0.8783, "step": 12192 }, { "epoch": 0.8482381995895509, "grad_norm": 1.0625, "learning_rate": 0.00011835107458376126, "loss": 0.8023, "step": 12193 }, { "epoch": 0.8483077672266861, "grad_norm": 1.4140625, "learning_rate": 0.00011824476099790426, "loss": 0.784, "step": 12194 }, { "epoch": 0.8483773348638214, "grad_norm": 0.796875, "learning_rate": 0.000118138492183134, "loss": 0.7627, "step": 12195 }, { "epoch": 0.8484469025009566, "grad_norm": 1.09375, "learning_rate": 0.00011803226814484602, "loss": 0.8796, "step": 12196 }, { "epoch": 0.8485164701380917, "grad_norm": 1.1484375, "learning_rate": 0.00011792608888843392, "loss": 1.01, "step": 12197 }, { "epoch": 0.848586037775227, "grad_norm": 0.97265625, "learning_rate": 0.00011781995441928939, "loss": 0.7642, "step": 12198 }, { "epoch": 0.8486556054123622, "grad_norm": 1.1328125, "learning_rate": 0.00011771386474280077, "loss": 0.9497, "step": 12199 }, { "epoch": 0.8487251730494974, "grad_norm": 1.3046875, "learning_rate": 0.0001176078198643552, "loss": 0.8968, "step": 12200 }, { "epoch": 0.8487947406866326, "grad_norm": 1.1171875, "learning_rate": 0.00011750181978933682, "loss": 0.8285, "step": 12201 }, { "epoch": 0.8488643083237678, "grad_norm": 1.265625, "learning_rate": 0.00011739586452312812, "loss": 1.0651, "step": 12202 }, { "epoch": 0.848933875960903, "grad_norm": 1.1171875, "learning_rate": 0.00011728995407110854, "loss": 0.757, "step": 12203 }, { "epoch": 0.8490034435980381, "grad_norm": 1.15625, "learning_rate": 0.00011718408843865602, "loss": 0.727, "step": 12204 }, { "epoch": 0.8490730112351734, "grad_norm": 1.4140625, "learning_rate": 0.00011707826763114593, "loss": 0.773, "step": 12205 }, { "epoch": 0.8491425788723086, "grad_norm": 1.03125, "learning_rate": 0.00011697249165395085, "loss": 0.7744, "step": 12206 }, { "epoch": 0.8492121465094438, "grad_norm": 1.203125, "learning_rate": 0.00011686676051244183, "loss": 0.8447, "step": 12207 }, { "epoch": 0.849281714146579, "grad_norm": 0.76953125, "learning_rate": 0.00011676107421198767, "loss": 0.4904, "step": 12208 }, { "epoch": 0.8493512817837142, "grad_norm": 1.1171875, "learning_rate": 0.00011665543275795432, "loss": 0.9249, "step": 12209 }, { "epoch": 0.8494208494208494, "grad_norm": 0.98046875, "learning_rate": 0.00011654983615570546, "loss": 0.5941, "step": 12210 }, { "epoch": 0.8494904170579847, "grad_norm": 0.90625, "learning_rate": 0.00011644428441060295, "loss": 0.7498, "step": 12211 }, { "epoch": 0.8495599846951198, "grad_norm": 1.171875, "learning_rate": 0.00011633877752800648, "loss": 0.8989, "step": 12212 }, { "epoch": 0.849629552332255, "grad_norm": 1.078125, "learning_rate": 0.00011623331551327276, "loss": 0.8272, "step": 12213 }, { "epoch": 0.8496991199693903, "grad_norm": 0.87109375, "learning_rate": 0.00011612789837175686, "loss": 0.5662, "step": 12214 }, { "epoch": 0.8497686876065255, "grad_norm": 1.1640625, "learning_rate": 0.00011602252610881115, "loss": 0.7696, "step": 12215 }, { "epoch": 0.8498382552436606, "grad_norm": 1.0234375, "learning_rate": 0.00011591719872978601, "loss": 0.7914, "step": 12216 }, { "epoch": 0.8499078228807958, "grad_norm": 1.5234375, "learning_rate": 0.0001158119162400294, "loss": 1.0314, "step": 12217 }, { "epoch": 0.8499773905179311, "grad_norm": 0.74609375, "learning_rate": 0.00011570667864488716, "loss": 0.6842, "step": 12218 }, { "epoch": 0.8500469581550663, "grad_norm": 1.2109375, "learning_rate": 0.00011560148594970266, "loss": 0.8372, "step": 12219 }, { "epoch": 0.8501165257922014, "grad_norm": 1.1171875, "learning_rate": 0.00011549633815981652, "loss": 0.7289, "step": 12220 }, { "epoch": 0.8501860934293367, "grad_norm": 0.921875, "learning_rate": 0.0001153912352805685, "loss": 0.746, "step": 12221 }, { "epoch": 0.8502556610664719, "grad_norm": 1.1171875, "learning_rate": 0.00011528617731729485, "loss": 0.7059, "step": 12222 }, { "epoch": 0.850325228703607, "grad_norm": 1.1875, "learning_rate": 0.00011518116427532988, "loss": 0.6602, "step": 12223 }, { "epoch": 0.8503947963407423, "grad_norm": 1.2265625, "learning_rate": 0.0001150761961600052, "loss": 0.7342, "step": 12224 }, { "epoch": 0.8504643639778775, "grad_norm": 1.125, "learning_rate": 0.00011497127297665111, "loss": 0.836, "step": 12225 }, { "epoch": 0.8505339316150127, "grad_norm": 1.2890625, "learning_rate": 0.00011486639473059502, "loss": 0.7602, "step": 12226 }, { "epoch": 0.850603499252148, "grad_norm": 0.99609375, "learning_rate": 0.00011476156142716198, "loss": 0.6743, "step": 12227 }, { "epoch": 0.8506730668892831, "grad_norm": 1.3515625, "learning_rate": 0.00011465677307167477, "loss": 1.1329, "step": 12228 }, { "epoch": 0.8507426345264183, "grad_norm": 1.4921875, "learning_rate": 0.0001145520296694541, "loss": 0.999, "step": 12229 }, { "epoch": 0.8508122021635535, "grad_norm": 0.9375, "learning_rate": 0.00011444733122581863, "loss": 0.7615, "step": 12230 }, { "epoch": 0.8508817698006887, "grad_norm": 1.046875, "learning_rate": 0.00011434267774608398, "loss": 0.7004, "step": 12231 }, { "epoch": 0.8509513374378239, "grad_norm": 1.109375, "learning_rate": 0.00011423806923556424, "loss": 0.9571, "step": 12232 }, { "epoch": 0.8510209050749591, "grad_norm": 1.1484375, "learning_rate": 0.0001141335056995706, "loss": 0.8939, "step": 12233 }, { "epoch": 0.8510904727120944, "grad_norm": 0.9453125, "learning_rate": 0.00011402898714341269, "loss": 0.7862, "step": 12234 }, { "epoch": 0.8511600403492295, "grad_norm": 1.03125, "learning_rate": 0.00011392451357239697, "loss": 0.7262, "step": 12235 }, { "epoch": 0.8512296079863647, "grad_norm": 1.1953125, "learning_rate": 0.0001138200849918285, "loss": 0.7603, "step": 12236 }, { "epoch": 0.8512991756235, "grad_norm": 1.2578125, "learning_rate": 0.00011371570140700937, "loss": 0.7464, "step": 12237 }, { "epoch": 0.8513687432606352, "grad_norm": 1.875, "learning_rate": 0.00011361136282323959, "loss": 0.8486, "step": 12238 }, { "epoch": 0.8514383108977703, "grad_norm": 0.96484375, "learning_rate": 0.00011350706924581711, "loss": 0.6902, "step": 12239 }, { "epoch": 0.8515078785349056, "grad_norm": 1.34375, "learning_rate": 0.00011340282068003749, "loss": 0.9536, "step": 12240 }, { "epoch": 0.8515774461720408, "grad_norm": 1.0703125, "learning_rate": 0.00011329861713119394, "loss": 0.6185, "step": 12241 }, { "epoch": 0.851647013809176, "grad_norm": 1.3125, "learning_rate": 0.00011319445860457711, "loss": 0.9331, "step": 12242 }, { "epoch": 0.8517165814463111, "grad_norm": 1.171875, "learning_rate": 0.00011309034510547578, "loss": 0.9381, "step": 12243 }, { "epoch": 0.8517861490834464, "grad_norm": 1.0859375, "learning_rate": 0.0001129862766391766, "loss": 0.8453, "step": 12244 }, { "epoch": 0.8518557167205816, "grad_norm": 1.359375, "learning_rate": 0.00011288225321096323, "loss": 0.8247, "step": 12245 }, { "epoch": 0.8519252843577167, "grad_norm": 0.84765625, "learning_rate": 0.0001127782748261178, "loss": 0.7709, "step": 12246 }, { "epoch": 0.851994851994852, "grad_norm": 1.21875, "learning_rate": 0.0001126743414899194, "loss": 0.7957, "step": 12247 }, { "epoch": 0.8520644196319872, "grad_norm": 1.28125, "learning_rate": 0.00011257045320764581, "loss": 0.975, "step": 12248 }, { "epoch": 0.8521339872691224, "grad_norm": 1.2265625, "learning_rate": 0.00011246660998457136, "loss": 1.0589, "step": 12249 }, { "epoch": 0.8522035549062577, "grad_norm": 1.140625, "learning_rate": 0.0001123628118259692, "loss": 0.8049, "step": 12250 }, { "epoch": 0.8522731225433928, "grad_norm": 1.4765625, "learning_rate": 0.00011225905873710929, "loss": 1.0467, "step": 12251 }, { "epoch": 0.852342690180528, "grad_norm": 1.1171875, "learning_rate": 0.00011215535072325956, "loss": 0.5724, "step": 12252 }, { "epoch": 0.8524122578176633, "grad_norm": 1.1171875, "learning_rate": 0.00011205168778968644, "loss": 0.9426, "step": 12253 }, { "epoch": 0.8524818254547984, "grad_norm": 1.0, "learning_rate": 0.00011194806994165297, "loss": 0.6056, "step": 12254 }, { "epoch": 0.8525513930919336, "grad_norm": 0.859375, "learning_rate": 0.00011184449718442047, "loss": 0.711, "step": 12255 }, { "epoch": 0.8526209607290688, "grad_norm": 1.015625, "learning_rate": 0.00011174096952324753, "loss": 0.9735, "step": 12256 }, { "epoch": 0.8526905283662041, "grad_norm": 1.15625, "learning_rate": 0.00011163748696339104, "loss": 0.7256, "step": 12257 }, { "epoch": 0.8527600960033392, "grad_norm": 1.375, "learning_rate": 0.00011153404951010537, "loss": 0.6559, "step": 12258 }, { "epoch": 0.8528296636404744, "grad_norm": 0.9609375, "learning_rate": 0.00011143065716864243, "loss": 0.8445, "step": 12259 }, { "epoch": 0.8528992312776097, "grad_norm": 1.015625, "learning_rate": 0.00011132730994425211, "loss": 0.6826, "step": 12260 }, { "epoch": 0.8529687989147449, "grad_norm": 1.1953125, "learning_rate": 0.00011122400784218157, "loss": 0.8554, "step": 12261 }, { "epoch": 0.85303836655188, "grad_norm": 0.984375, "learning_rate": 0.00011112075086767626, "loss": 0.8118, "step": 12262 }, { "epoch": 0.8531079341890153, "grad_norm": 1.109375, "learning_rate": 0.00011101753902597877, "loss": 0.693, "step": 12263 }, { "epoch": 0.8531775018261505, "grad_norm": 1.2421875, "learning_rate": 0.00011091437232233015, "loss": 0.8587, "step": 12264 }, { "epoch": 0.8532470694632857, "grad_norm": 1.09375, "learning_rate": 0.00011081125076196807, "loss": 0.7478, "step": 12265 }, { "epoch": 0.8533166371004209, "grad_norm": 1.0390625, "learning_rate": 0.00011070817435012892, "loss": 0.8519, "step": 12266 }, { "epoch": 0.8533862047375561, "grad_norm": 0.96875, "learning_rate": 0.00011060514309204639, "loss": 0.5889, "step": 12267 }, { "epoch": 0.8534557723746913, "grad_norm": 1.484375, "learning_rate": 0.00011050215699295196, "loss": 1.0257, "step": 12268 }, { "epoch": 0.8535253400118264, "grad_norm": 1.34375, "learning_rate": 0.00011039921605807446, "loss": 0.7956, "step": 12269 }, { "epoch": 0.8535949076489617, "grad_norm": 2.1875, "learning_rate": 0.00011029632029264069, "loss": 1.1196, "step": 12270 }, { "epoch": 0.8536644752860969, "grad_norm": 1.328125, "learning_rate": 0.00011019346970187538, "loss": 0.9829, "step": 12271 }, { "epoch": 0.8537340429232321, "grad_norm": 1.1640625, "learning_rate": 0.0001100906642910009, "loss": 0.9512, "step": 12272 }, { "epoch": 0.8538036105603674, "grad_norm": 1.046875, "learning_rate": 0.00010998790406523685, "loss": 0.6774, "step": 12273 }, { "epoch": 0.8538731781975025, "grad_norm": 1.0625, "learning_rate": 0.00010988518902980115, "loss": 0.8759, "step": 12274 }, { "epoch": 0.8539427458346377, "grad_norm": 1.28125, "learning_rate": 0.00010978251918990889, "loss": 0.6949, "step": 12275 }, { "epoch": 0.854012313471773, "grad_norm": 1.1796875, "learning_rate": 0.00010967989455077353, "loss": 0.8158, "step": 12276 }, { "epoch": 0.8540818811089081, "grad_norm": 1.109375, "learning_rate": 0.00010957731511760527, "loss": 0.7616, "step": 12277 }, { "epoch": 0.8541514487460433, "grad_norm": 1.03125, "learning_rate": 0.00010947478089561314, "loss": 0.6717, "step": 12278 }, { "epoch": 0.8542210163831786, "grad_norm": 1.1953125, "learning_rate": 0.00010937229189000286, "loss": 0.8055, "step": 12279 }, { "epoch": 0.8542905840203138, "grad_norm": 1.171875, "learning_rate": 0.00010926984810597851, "loss": 0.8088, "step": 12280 }, { "epoch": 0.8543601516574489, "grad_norm": 1.046875, "learning_rate": 0.00010916744954874192, "loss": 0.8147, "step": 12281 }, { "epoch": 0.8544297192945841, "grad_norm": 0.92578125, "learning_rate": 0.00010906509622349204, "loss": 0.8183, "step": 12282 }, { "epoch": 0.8544992869317194, "grad_norm": 1.296875, "learning_rate": 0.00010896278813542593, "loss": 0.7092, "step": 12283 }, { "epoch": 0.8545688545688546, "grad_norm": 0.9140625, "learning_rate": 0.00010886052528973789, "loss": 0.9953, "step": 12284 }, { "epoch": 0.8546384222059897, "grad_norm": 0.89453125, "learning_rate": 0.00010875830769162109, "loss": 0.9157, "step": 12285 }, { "epoch": 0.854707989843125, "grad_norm": 0.9375, "learning_rate": 0.00010865613534626517, "loss": 0.6253, "step": 12286 }, { "epoch": 0.8547775574802602, "grad_norm": 1.3125, "learning_rate": 0.00010855400825885786, "loss": 0.9757, "step": 12287 }, { "epoch": 0.8548471251173954, "grad_norm": 1.046875, "learning_rate": 0.00010845192643458501, "loss": 0.7175, "step": 12288 }, { "epoch": 0.8549166927545306, "grad_norm": 1.21875, "learning_rate": 0.00010834988987862936, "loss": 0.6697, "step": 12289 }, { "epoch": 0.8549862603916658, "grad_norm": 1.078125, "learning_rate": 0.00010824789859617224, "loss": 0.9938, "step": 12290 }, { "epoch": 0.855055828028801, "grad_norm": 1.1796875, "learning_rate": 0.0001081459525923919, "loss": 0.766, "step": 12291 }, { "epoch": 0.8551253956659363, "grad_norm": 1.0625, "learning_rate": 0.00010804405187246502, "loss": 0.6445, "step": 12292 }, { "epoch": 0.8551949633030714, "grad_norm": 1.1484375, "learning_rate": 0.00010794219644156522, "loss": 0.7632, "step": 12293 }, { "epoch": 0.8552645309402066, "grad_norm": 1.0546875, "learning_rate": 0.00010784038630486437, "loss": 0.8305, "step": 12294 }, { "epoch": 0.8553340985773418, "grad_norm": 1.4609375, "learning_rate": 0.000107738621467532, "loss": 0.765, "step": 12295 }, { "epoch": 0.855403666214477, "grad_norm": 0.9296875, "learning_rate": 0.00010763690193473519, "loss": 0.7739, "step": 12296 }, { "epoch": 0.8554732338516122, "grad_norm": 1.140625, "learning_rate": 0.0001075352277116386, "loss": 0.8307, "step": 12297 }, { "epoch": 0.8555428014887474, "grad_norm": 1.0859375, "learning_rate": 0.00010743359880340442, "loss": 0.648, "step": 12298 }, { "epoch": 0.8556123691258827, "grad_norm": 1.234375, "learning_rate": 0.00010733201521519364, "loss": 0.8163, "step": 12299 }, { "epoch": 0.8556819367630178, "grad_norm": 0.84765625, "learning_rate": 0.0001072304769521637, "loss": 0.5939, "step": 12300 }, { "epoch": 0.855751504400153, "grad_norm": 1.359375, "learning_rate": 0.00010712898401947024, "loss": 0.8583, "step": 12301 }, { "epoch": 0.8558210720372883, "grad_norm": 0.8828125, "learning_rate": 0.00010702753642226649, "loss": 0.7241, "step": 12302 }, { "epoch": 0.8558906396744235, "grad_norm": 1.0546875, "learning_rate": 0.00010692613416570341, "loss": 0.7889, "step": 12303 }, { "epoch": 0.8559602073115586, "grad_norm": 0.9765625, "learning_rate": 0.00010682477725493, "loss": 0.7806, "step": 12304 }, { "epoch": 0.8560297749486939, "grad_norm": 0.91015625, "learning_rate": 0.00010672346569509229, "loss": 0.7109, "step": 12305 }, { "epoch": 0.8560993425858291, "grad_norm": 0.77734375, "learning_rate": 0.00010662219949133478, "loss": 0.5379, "step": 12306 }, { "epoch": 0.8561689102229643, "grad_norm": 1.0078125, "learning_rate": 0.00010652097864879884, "loss": 0.8115, "step": 12307 }, { "epoch": 0.8562384778600994, "grad_norm": 1.1171875, "learning_rate": 0.00010641980317262423, "loss": 0.7521, "step": 12308 }, { "epoch": 0.8563080454972347, "grad_norm": 1.0625, "learning_rate": 0.00010631867306794795, "loss": 0.7504, "step": 12309 }, { "epoch": 0.8563776131343699, "grad_norm": 0.83203125, "learning_rate": 0.00010621758833990513, "loss": 0.701, "step": 12310 }, { "epoch": 0.8564471807715051, "grad_norm": 1.015625, "learning_rate": 0.00010611654899362789, "loss": 0.8729, "step": 12311 }, { "epoch": 0.8565167484086403, "grad_norm": 0.9765625, "learning_rate": 0.00010601555503424687, "loss": 0.7054, "step": 12312 }, { "epoch": 0.8565863160457755, "grad_norm": 1.0546875, "learning_rate": 0.00010591460646689022, "loss": 0.78, "step": 12313 }, { "epoch": 0.8566558836829107, "grad_norm": 0.96875, "learning_rate": 0.00010581370329668316, "loss": 0.7408, "step": 12314 }, { "epoch": 0.856725451320046, "grad_norm": 1.0625, "learning_rate": 0.00010571284552874939, "loss": 0.7002, "step": 12315 }, { "epoch": 0.8567950189571811, "grad_norm": 1.09375, "learning_rate": 0.00010561203316820922, "loss": 0.7092, "step": 12316 }, { "epoch": 0.8568645865943163, "grad_norm": 0.9921875, "learning_rate": 0.00010551126622018248, "loss": 0.858, "step": 12317 }, { "epoch": 0.8569341542314516, "grad_norm": 1.28125, "learning_rate": 0.00010541054468978507, "loss": 1.0974, "step": 12318 }, { "epoch": 0.8570037218685868, "grad_norm": 1.2578125, "learning_rate": 0.00010530986858213088, "loss": 0.9695, "step": 12319 }, { "epoch": 0.8570732895057219, "grad_norm": 1.03125, "learning_rate": 0.00010520923790233217, "loss": 0.8819, "step": 12320 }, { "epoch": 0.8571428571428571, "grad_norm": 1.359375, "learning_rate": 0.00010510865265549818, "loss": 0.8206, "step": 12321 }, { "epoch": 0.8572124247799924, "grad_norm": 1.0078125, "learning_rate": 0.00010500811284673628, "loss": 0.7581, "step": 12322 }, { "epoch": 0.8572819924171275, "grad_norm": 1.0625, "learning_rate": 0.00010490761848115127, "loss": 1.0358, "step": 12323 }, { "epoch": 0.8573515600542627, "grad_norm": 1.1953125, "learning_rate": 0.00010480716956384584, "loss": 0.8605, "step": 12324 }, { "epoch": 0.857421127691398, "grad_norm": 1.2265625, "learning_rate": 0.00010470676609992014, "loss": 0.928, "step": 12325 }, { "epoch": 0.8574906953285332, "grad_norm": 1.4140625, "learning_rate": 0.0001046064080944723, "loss": 0.8935, "step": 12326 }, { "epoch": 0.8575602629656683, "grad_norm": 1.2421875, "learning_rate": 0.00010450609555259805, "loss": 0.9788, "step": 12327 }, { "epoch": 0.8576298306028036, "grad_norm": 1.0859375, "learning_rate": 0.00010440582847939061, "loss": 0.6736, "step": 12328 }, { "epoch": 0.8576993982399388, "grad_norm": 0.953125, "learning_rate": 0.00010430560687994117, "loss": 0.6788, "step": 12329 }, { "epoch": 0.857768965877074, "grad_norm": 1.4140625, "learning_rate": 0.00010420543075933786, "loss": 0.7944, "step": 12330 }, { "epoch": 0.8578385335142092, "grad_norm": 1.3515625, "learning_rate": 0.00010410530012266817, "loss": 0.8764, "step": 12331 }, { "epoch": 0.8579081011513444, "grad_norm": 0.99609375, "learning_rate": 0.00010400521497501558, "loss": 0.6809, "step": 12332 }, { "epoch": 0.8579776687884796, "grad_norm": 1.3828125, "learning_rate": 0.00010390517532146182, "loss": 0.8418, "step": 12333 }, { "epoch": 0.8580472364256148, "grad_norm": 0.86328125, "learning_rate": 0.00010380518116708692, "loss": 0.6544, "step": 12334 }, { "epoch": 0.85811680406275, "grad_norm": 1.34375, "learning_rate": 0.00010370523251696751, "loss": 0.891, "step": 12335 }, { "epoch": 0.8581863716998852, "grad_norm": 1.4140625, "learning_rate": 0.00010360532937617894, "loss": 1.1266, "step": 12336 }, { "epoch": 0.8582559393370204, "grad_norm": 0.90625, "learning_rate": 0.0001035054717497933, "loss": 0.5519, "step": 12337 }, { "epoch": 0.8583255069741557, "grad_norm": 1.1875, "learning_rate": 0.0001034056596428814, "loss": 0.9505, "step": 12338 }, { "epoch": 0.8583950746112908, "grad_norm": 1.1875, "learning_rate": 0.00010330589306051074, "loss": 1.0161, "step": 12339 }, { "epoch": 0.858464642248426, "grad_norm": 1.3828125, "learning_rate": 0.00010320617200774718, "loss": 0.7717, "step": 12340 }, { "epoch": 0.8585342098855613, "grad_norm": 1.40625, "learning_rate": 0.0001031064964896542, "loss": 0.9509, "step": 12341 }, { "epoch": 0.8586037775226965, "grad_norm": 1.375, "learning_rate": 0.00010300686651129265, "loss": 1.0319, "step": 12342 }, { "epoch": 0.8586733451598316, "grad_norm": 0.96484375, "learning_rate": 0.00010290728207772104, "loss": 0.734, "step": 12343 }, { "epoch": 0.8587429127969669, "grad_norm": 0.890625, "learning_rate": 0.00010280774319399599, "loss": 0.7712, "step": 12344 }, { "epoch": 0.8588124804341021, "grad_norm": 0.953125, "learning_rate": 0.00010270824986517169, "loss": 0.87, "step": 12345 }, { "epoch": 0.8588820480712372, "grad_norm": 1.4921875, "learning_rate": 0.00010260880209629985, "loss": 0.8685, "step": 12346 }, { "epoch": 0.8589516157083724, "grad_norm": 0.99609375, "learning_rate": 0.00010250939989242957, "loss": 0.662, "step": 12347 }, { "epoch": 0.8590211833455077, "grad_norm": 1.0859375, "learning_rate": 0.00010241004325860859, "loss": 0.6317, "step": 12348 }, { "epoch": 0.8590907509826429, "grad_norm": 1.015625, "learning_rate": 0.00010231073219988108, "loss": 0.663, "step": 12349 }, { "epoch": 0.859160318619778, "grad_norm": 1.265625, "learning_rate": 0.00010221146672129022, "loss": 1.0907, "step": 12350 }, { "epoch": 0.8592298862569133, "grad_norm": 1.1171875, "learning_rate": 0.00010211224682787567, "loss": 0.7271, "step": 12351 }, { "epoch": 0.8592994538940485, "grad_norm": 1.03125, "learning_rate": 0.00010201307252467573, "loss": 0.7229, "step": 12352 }, { "epoch": 0.8593690215311837, "grad_norm": 1.0703125, "learning_rate": 0.00010191394381672547, "loss": 0.848, "step": 12353 }, { "epoch": 0.859438589168319, "grad_norm": 1.0234375, "learning_rate": 0.00010181486070905855, "loss": 0.9485, "step": 12354 }, { "epoch": 0.8595081568054541, "grad_norm": 0.92578125, "learning_rate": 0.00010171582320670602, "loss": 0.6691, "step": 12355 }, { "epoch": 0.8595777244425893, "grad_norm": 0.9609375, "learning_rate": 0.00010161683131469635, "loss": 0.9354, "step": 12356 }, { "epoch": 0.8596472920797246, "grad_norm": 1.203125, "learning_rate": 0.00010151788503805548, "loss": 0.8412, "step": 12357 }, { "epoch": 0.8597168597168597, "grad_norm": 1.046875, "learning_rate": 0.00010141898438180785, "loss": 0.8609, "step": 12358 }, { "epoch": 0.8597864273539949, "grad_norm": 1.0546875, "learning_rate": 0.00010132012935097512, "loss": 1.051, "step": 12359 }, { "epoch": 0.8598559949911301, "grad_norm": 0.97265625, "learning_rate": 0.0001012213199505766, "loss": 0.9528, "step": 12360 }, { "epoch": 0.8599255626282654, "grad_norm": 1.1953125, "learning_rate": 0.00010112255618562894, "loss": 0.9181, "step": 12361 }, { "epoch": 0.8599951302654005, "grad_norm": 1.25, "learning_rate": 0.00010102383806114735, "loss": 0.9213, "step": 12362 }, { "epoch": 0.8600646979025357, "grad_norm": 1.09375, "learning_rate": 0.00010092516558214427, "loss": 0.7589, "step": 12363 }, { "epoch": 0.860134265539671, "grad_norm": 1.0703125, "learning_rate": 0.00010082653875362946, "loss": 0.8638, "step": 12364 }, { "epoch": 0.8602038331768062, "grad_norm": 1.40625, "learning_rate": 0.00010072795758061082, "loss": 0.7913, "step": 12365 }, { "epoch": 0.8602734008139413, "grad_norm": 1.1875, "learning_rate": 0.0001006294220680939, "loss": 0.7034, "step": 12366 }, { "epoch": 0.8603429684510766, "grad_norm": 1.5234375, "learning_rate": 0.00010053093222108168, "loss": 0.9527, "step": 12367 }, { "epoch": 0.8604125360882118, "grad_norm": 1.21875, "learning_rate": 0.00010043248804457494, "loss": 0.7554, "step": 12368 }, { "epoch": 0.860482103725347, "grad_norm": 1.1875, "learning_rate": 0.0001003340895435726, "loss": 0.9477, "step": 12369 }, { "epoch": 0.8605516713624821, "grad_norm": 1.125, "learning_rate": 0.00010023573672307052, "loss": 0.8501, "step": 12370 }, { "epoch": 0.8606212389996174, "grad_norm": 1.2890625, "learning_rate": 0.00010013742958806238, "loss": 0.6945, "step": 12371 }, { "epoch": 0.8606908066367526, "grad_norm": 1.1796875, "learning_rate": 0.00010003916814353986, "loss": 0.9602, "step": 12372 }, { "epoch": 0.8607603742738877, "grad_norm": 1.140625, "learning_rate": 9.994095239449253e-05, "loss": 0.6929, "step": 12373 }, { "epoch": 0.860829941911023, "grad_norm": 0.828125, "learning_rate": 9.984278234590694e-05, "loss": 0.6558, "step": 12374 }, { "epoch": 0.8608995095481582, "grad_norm": 1.15625, "learning_rate": 9.974465800276755e-05, "loss": 0.8409, "step": 12375 }, { "epoch": 0.8609690771852934, "grad_norm": 1.1875, "learning_rate": 9.964657937005683e-05, "loss": 0.8428, "step": 12376 }, { "epoch": 0.8610386448224286, "grad_norm": 0.78125, "learning_rate": 9.95485464527549e-05, "loss": 0.9456, "step": 12377 }, { "epoch": 0.8611082124595638, "grad_norm": 1.265625, "learning_rate": 9.945055925583913e-05, "loss": 0.7942, "step": 12378 }, { "epoch": 0.861177780096699, "grad_norm": 0.88671875, "learning_rate": 9.935261778428473e-05, "loss": 0.6238, "step": 12379 }, { "epoch": 0.8612473477338343, "grad_norm": 1.1015625, "learning_rate": 9.925472204306485e-05, "loss": 0.8494, "step": 12380 }, { "epoch": 0.8613169153709694, "grad_norm": 1.140625, "learning_rate": 9.915687203715007e-05, "loss": 0.7037, "step": 12381 }, { "epoch": 0.8613864830081046, "grad_norm": 1.5546875, "learning_rate": 9.905906777150874e-05, "loss": 0.9521, "step": 12382 }, { "epoch": 0.8614560506452398, "grad_norm": 1.3359375, "learning_rate": 9.89613092511068e-05, "loss": 0.9395, "step": 12383 }, { "epoch": 0.8615256182823751, "grad_norm": 1.046875, "learning_rate": 9.886359648090826e-05, "loss": 0.7269, "step": 12384 }, { "epoch": 0.8615951859195102, "grad_norm": 1.0390625, "learning_rate": 9.876592946587393e-05, "loss": 0.7753, "step": 12385 }, { "epoch": 0.8616647535566454, "grad_norm": 0.98828125, "learning_rate": 9.866830821096318e-05, "loss": 0.7973, "step": 12386 }, { "epoch": 0.8617343211937807, "grad_norm": 1.546875, "learning_rate": 9.857073272113282e-05, "loss": 0.6929, "step": 12387 }, { "epoch": 0.8618038888309159, "grad_norm": 0.94140625, "learning_rate": 9.847320300133722e-05, "loss": 0.8213, "step": 12388 }, { "epoch": 0.861873456468051, "grad_norm": 1.1015625, "learning_rate": 9.837571905652808e-05, "loss": 0.8065, "step": 12389 }, { "epoch": 0.8619430241051863, "grad_norm": 1.21875, "learning_rate": 9.827828089165547e-05, "loss": 0.7282, "step": 12390 }, { "epoch": 0.8620125917423215, "grad_norm": 0.83984375, "learning_rate": 9.818088851166684e-05, "loss": 0.6526, "step": 12391 }, { "epoch": 0.8620821593794566, "grad_norm": 1.03125, "learning_rate": 9.808354192150725e-05, "loss": 0.918, "step": 12392 }, { "epoch": 0.8621517270165919, "grad_norm": 1.1171875, "learning_rate": 9.79862411261192e-05, "loss": 1.1219, "step": 12393 }, { "epoch": 0.8622212946537271, "grad_norm": 1.296875, "learning_rate": 9.788898613044328e-05, "loss": 0.8244, "step": 12394 }, { "epoch": 0.8622908622908623, "grad_norm": 1.0546875, "learning_rate": 9.779177693941799e-05, "loss": 0.7914, "step": 12395 }, { "epoch": 0.8623604299279974, "grad_norm": 0.91015625, "learning_rate": 9.76946135579787e-05, "loss": 0.6921, "step": 12396 }, { "epoch": 0.8624299975651327, "grad_norm": 1.15625, "learning_rate": 9.759749599105883e-05, "loss": 0.9035, "step": 12397 }, { "epoch": 0.8624995652022679, "grad_norm": 0.7578125, "learning_rate": 9.750042424358984e-05, "loss": 0.6005, "step": 12398 }, { "epoch": 0.8625691328394031, "grad_norm": 1.2109375, "learning_rate": 9.740339832050016e-05, "loss": 0.8299, "step": 12399 }, { "epoch": 0.8626387004765383, "grad_norm": 0.94140625, "learning_rate": 9.730641822671649e-05, "loss": 0.8842, "step": 12400 }, { "epoch": 0.8627082681136735, "grad_norm": 1.1171875, "learning_rate": 9.720948396716323e-05, "loss": 1.0405, "step": 12401 }, { "epoch": 0.8627778357508087, "grad_norm": 1.4609375, "learning_rate": 9.711259554676188e-05, "loss": 0.8272, "step": 12402 }, { "epoch": 0.862847403387944, "grad_norm": 1.109375, "learning_rate": 9.701575297043197e-05, "loss": 0.9613, "step": 12403 }, { "epoch": 0.8629169710250791, "grad_norm": 0.96484375, "learning_rate": 9.691895624309066e-05, "loss": 0.6554, "step": 12404 }, { "epoch": 0.8629865386622143, "grad_norm": 0.79296875, "learning_rate": 9.682220536965314e-05, "loss": 0.6862, "step": 12405 }, { "epoch": 0.8630561062993496, "grad_norm": 1.2890625, "learning_rate": 9.672550035503158e-05, "loss": 0.6961, "step": 12406 }, { "epoch": 0.8631256739364848, "grad_norm": 1.203125, "learning_rate": 9.662884120413617e-05, "loss": 0.9276, "step": 12407 }, { "epoch": 0.8631952415736199, "grad_norm": 0.890625, "learning_rate": 9.653222792187489e-05, "loss": 0.584, "step": 12408 }, { "epoch": 0.8632648092107551, "grad_norm": 1.015625, "learning_rate": 9.643566051315334e-05, "loss": 0.6404, "step": 12409 }, { "epoch": 0.8633343768478904, "grad_norm": 1.1484375, "learning_rate": 9.633913898287472e-05, "loss": 0.7392, "step": 12410 }, { "epoch": 0.8634039444850256, "grad_norm": 1.0078125, "learning_rate": 9.624266333593968e-05, "loss": 0.6214, "step": 12411 }, { "epoch": 0.8634735121221607, "grad_norm": 1.21875, "learning_rate": 9.614623357724706e-05, "loss": 0.9785, "step": 12412 }, { "epoch": 0.863543079759296, "grad_norm": 1.0078125, "learning_rate": 9.604984971169273e-05, "loss": 0.7385, "step": 12413 }, { "epoch": 0.8636126473964312, "grad_norm": 0.796875, "learning_rate": 9.595351174417089e-05, "loss": 0.6108, "step": 12414 }, { "epoch": 0.8636822150335663, "grad_norm": 1.1640625, "learning_rate": 9.585721967957306e-05, "loss": 0.8732, "step": 12415 }, { "epoch": 0.8637517826707016, "grad_norm": 1.4609375, "learning_rate": 9.576097352278846e-05, "loss": 0.8376, "step": 12416 }, { "epoch": 0.8638213503078368, "grad_norm": 0.9765625, "learning_rate": 9.566477327870371e-05, "loss": 0.883, "step": 12417 }, { "epoch": 0.863890917944972, "grad_norm": 1.34375, "learning_rate": 9.55686189522036e-05, "loss": 0.9439, "step": 12418 }, { "epoch": 0.8639604855821073, "grad_norm": 1.1875, "learning_rate": 9.547251054817052e-05, "loss": 0.9326, "step": 12419 }, { "epoch": 0.8640300532192424, "grad_norm": 1.1484375, "learning_rate": 9.537644807148416e-05, "loss": 0.6504, "step": 12420 }, { "epoch": 0.8640996208563776, "grad_norm": 1.2421875, "learning_rate": 9.528043152702204e-05, "loss": 0.9633, "step": 12421 }, { "epoch": 0.8641691884935128, "grad_norm": 0.9453125, "learning_rate": 9.518446091965938e-05, "loss": 0.7757, "step": 12422 }, { "epoch": 0.864238756130648, "grad_norm": 1.203125, "learning_rate": 9.50885362542695e-05, "loss": 0.8814, "step": 12423 }, { "epoch": 0.8643083237677832, "grad_norm": 1.109375, "learning_rate": 9.49926575357225e-05, "loss": 0.6348, "step": 12424 }, { "epoch": 0.8643778914049184, "grad_norm": 1.15625, "learning_rate": 9.489682476888673e-05, "loss": 0.8008, "step": 12425 }, { "epoch": 0.8644474590420537, "grad_norm": 1.1796875, "learning_rate": 9.480103795862805e-05, "loss": 0.8026, "step": 12426 }, { "epoch": 0.8645170266791888, "grad_norm": 1.15625, "learning_rate": 9.470529710981036e-05, "loss": 0.8007, "step": 12427 }, { "epoch": 0.864586594316324, "grad_norm": 1.15625, "learning_rate": 9.460960222729443e-05, "loss": 0.9158, "step": 12428 }, { "epoch": 0.8646561619534593, "grad_norm": 1.4375, "learning_rate": 9.45139533159396e-05, "loss": 0.6803, "step": 12429 }, { "epoch": 0.8647257295905945, "grad_norm": 1.203125, "learning_rate": 9.441835038060221e-05, "loss": 1.0275, "step": 12430 }, { "epoch": 0.8647952972277296, "grad_norm": 1.234375, "learning_rate": 9.432279342613637e-05, "loss": 0.9827, "step": 12431 }, { "epoch": 0.8648648648648649, "grad_norm": 0.984375, "learning_rate": 9.42272824573941e-05, "loss": 0.469, "step": 12432 }, { "epoch": 0.8649344325020001, "grad_norm": 1.28125, "learning_rate": 9.413181747922517e-05, "loss": 0.8856, "step": 12433 }, { "epoch": 0.8650040001391353, "grad_norm": 1.265625, "learning_rate": 9.403639849647672e-05, "loss": 0.8125, "step": 12434 }, { "epoch": 0.8650735677762704, "grad_norm": 1.3984375, "learning_rate": 9.39410255139933e-05, "loss": 0.6766, "step": 12435 }, { "epoch": 0.8651431354134057, "grad_norm": 0.99609375, "learning_rate": 9.384569853661773e-05, "loss": 0.8128, "step": 12436 }, { "epoch": 0.8652127030505409, "grad_norm": 1.2421875, "learning_rate": 9.375041756919045e-05, "loss": 0.9982, "step": 12437 }, { "epoch": 0.865282270687676, "grad_norm": 0.8828125, "learning_rate": 9.365518261654904e-05, "loss": 0.5813, "step": 12438 }, { "epoch": 0.8653518383248113, "grad_norm": 1.0546875, "learning_rate": 9.355999368352907e-05, "loss": 0.8297, "step": 12439 }, { "epoch": 0.8654214059619465, "grad_norm": 1.1015625, "learning_rate": 9.346485077496369e-05, "loss": 0.8446, "step": 12440 }, { "epoch": 0.8654909735990817, "grad_norm": 1.0390625, "learning_rate": 9.336975389568425e-05, "loss": 0.7497, "step": 12441 }, { "epoch": 0.865560541236217, "grad_norm": 0.94140625, "learning_rate": 9.327470305051866e-05, "loss": 0.7889, "step": 12442 }, { "epoch": 0.8656301088733521, "grad_norm": 1.0859375, "learning_rate": 9.317969824429363e-05, "loss": 0.6662, "step": 12443 }, { "epoch": 0.8656996765104873, "grad_norm": 1.015625, "learning_rate": 9.308473948183283e-05, "loss": 0.7784, "step": 12444 }, { "epoch": 0.8657692441476226, "grad_norm": 1.0859375, "learning_rate": 9.298982676795764e-05, "loss": 0.8745, "step": 12445 }, { "epoch": 0.8658388117847577, "grad_norm": 1.1015625, "learning_rate": 9.289496010748722e-05, "loss": 0.7813, "step": 12446 }, { "epoch": 0.8659083794218929, "grad_norm": 1.109375, "learning_rate": 9.280013950523891e-05, "loss": 0.9556, "step": 12447 }, { "epoch": 0.8659779470590281, "grad_norm": 0.98046875, "learning_rate": 9.270536496602678e-05, "loss": 0.8412, "step": 12448 }, { "epoch": 0.8660475146961634, "grad_norm": 1.0625, "learning_rate": 9.261063649466306e-05, "loss": 0.7001, "step": 12449 }, { "epoch": 0.8661170823332985, "grad_norm": 1.171875, "learning_rate": 9.251595409595748e-05, "loss": 0.8509, "step": 12450 }, { "epoch": 0.8661866499704337, "grad_norm": 1.234375, "learning_rate": 9.242131777471796e-05, "loss": 0.6475, "step": 12451 }, { "epoch": 0.866256217607569, "grad_norm": 1.21875, "learning_rate": 9.232672753574944e-05, "loss": 0.7832, "step": 12452 }, { "epoch": 0.8663257852447042, "grad_norm": 0.9140625, "learning_rate": 9.223218338385441e-05, "loss": 0.7964, "step": 12453 }, { "epoch": 0.8663953528818393, "grad_norm": 1.0234375, "learning_rate": 9.21376853238336e-05, "loss": 0.6256, "step": 12454 }, { "epoch": 0.8664649205189746, "grad_norm": 1.2421875, "learning_rate": 9.204323336048548e-05, "loss": 0.8907, "step": 12455 }, { "epoch": 0.8665344881561098, "grad_norm": 1.3203125, "learning_rate": 9.194882749860545e-05, "loss": 0.839, "step": 12456 }, { "epoch": 0.866604055793245, "grad_norm": 1.109375, "learning_rate": 9.185446774298678e-05, "loss": 0.9559, "step": 12457 }, { "epoch": 0.8666736234303802, "grad_norm": 1.28125, "learning_rate": 9.176015409842098e-05, "loss": 0.9003, "step": 12458 }, { "epoch": 0.8667431910675154, "grad_norm": 1.1328125, "learning_rate": 9.166588656969676e-05, "loss": 1.0415, "step": 12459 }, { "epoch": 0.8668127587046506, "grad_norm": 1.2734375, "learning_rate": 9.157166516160031e-05, "loss": 0.8276, "step": 12460 }, { "epoch": 0.8668823263417857, "grad_norm": 1.25, "learning_rate": 9.147748987891614e-05, "loss": 0.9967, "step": 12461 }, { "epoch": 0.866951893978921, "grad_norm": 1.59375, "learning_rate": 9.138336072642573e-05, "loss": 0.4936, "step": 12462 }, { "epoch": 0.8670214616160562, "grad_norm": 1.015625, "learning_rate": 9.128927770890826e-05, "loss": 0.682, "step": 12463 }, { "epoch": 0.8670910292531914, "grad_norm": 0.98828125, "learning_rate": 9.119524083114106e-05, "loss": 0.5948, "step": 12464 }, { "epoch": 0.8671605968903267, "grad_norm": 1.203125, "learning_rate": 9.110125009789905e-05, "loss": 0.8522, "step": 12465 }, { "epoch": 0.8672301645274618, "grad_norm": 0.9765625, "learning_rate": 9.100730551395431e-05, "loss": 0.9638, "step": 12466 }, { "epoch": 0.867299732164597, "grad_norm": 1.109375, "learning_rate": 9.09134070840767e-05, "loss": 0.8597, "step": 12467 }, { "epoch": 0.8673692998017323, "grad_norm": 1.0234375, "learning_rate": 9.081955481303416e-05, "loss": 0.6316, "step": 12468 }, { "epoch": 0.8674388674388674, "grad_norm": 1.328125, "learning_rate": 9.072574870559224e-05, "loss": 1.031, "step": 12469 }, { "epoch": 0.8675084350760026, "grad_norm": 1.09375, "learning_rate": 9.06319887665138e-05, "loss": 0.7982, "step": 12470 }, { "epoch": 0.8675780027131379, "grad_norm": 0.953125, "learning_rate": 9.053827500055911e-05, "loss": 0.7964, "step": 12471 }, { "epoch": 0.8676475703502731, "grad_norm": 1.1171875, "learning_rate": 9.044460741248683e-05, "loss": 0.8397, "step": 12472 }, { "epoch": 0.8677171379874082, "grad_norm": 0.890625, "learning_rate": 9.035098600705305e-05, "loss": 0.7533, "step": 12473 }, { "epoch": 0.8677867056245434, "grad_norm": 1.046875, "learning_rate": 9.025741078901106e-05, "loss": 0.6866, "step": 12474 }, { "epoch": 0.8678562732616787, "grad_norm": 0.953125, "learning_rate": 9.016388176311251e-05, "loss": 0.5942, "step": 12475 }, { "epoch": 0.8679258408988139, "grad_norm": 1.03125, "learning_rate": 9.007039893410607e-05, "loss": 0.7647, "step": 12476 }, { "epoch": 0.867995408535949, "grad_norm": 1.2890625, "learning_rate": 8.997696230673824e-05, "loss": 0.8657, "step": 12477 }, { "epoch": 0.8680649761730843, "grad_norm": 1.109375, "learning_rate": 8.988357188575347e-05, "loss": 0.6619, "step": 12478 }, { "epoch": 0.8681345438102195, "grad_norm": 1.0859375, "learning_rate": 8.979022767589373e-05, "loss": 0.6795, "step": 12479 }, { "epoch": 0.8682041114473547, "grad_norm": 1.0703125, "learning_rate": 8.969692968189835e-05, "loss": 0.7326, "step": 12480 }, { "epoch": 0.8682736790844899, "grad_norm": 0.9921875, "learning_rate": 8.960367790850455e-05, "loss": 0.7366, "step": 12481 }, { "epoch": 0.8683432467216251, "grad_norm": 1.296875, "learning_rate": 8.951047236044719e-05, "loss": 0.8505, "step": 12482 }, { "epoch": 0.8684128143587603, "grad_norm": 0.97265625, "learning_rate": 8.941731304245903e-05, "loss": 1.0211, "step": 12483 }, { "epoch": 0.8684823819958956, "grad_norm": 1.1328125, "learning_rate": 8.932419995927e-05, "loss": 0.8623, "step": 12484 }, { "epoch": 0.8685519496330307, "grad_norm": 1.15625, "learning_rate": 8.923113311560782e-05, "loss": 0.746, "step": 12485 }, { "epoch": 0.8686215172701659, "grad_norm": 1.296875, "learning_rate": 8.913811251619807e-05, "loss": 0.9333, "step": 12486 }, { "epoch": 0.8686910849073011, "grad_norm": 1.1328125, "learning_rate": 8.90451381657641e-05, "loss": 0.9065, "step": 12487 }, { "epoch": 0.8687606525444364, "grad_norm": 1.1796875, "learning_rate": 8.89522100690262e-05, "loss": 1.2835, "step": 12488 }, { "epoch": 0.8688302201815715, "grad_norm": 1.1484375, "learning_rate": 8.88593282307033e-05, "loss": 0.8051, "step": 12489 }, { "epoch": 0.8688997878187067, "grad_norm": 1.0, "learning_rate": 8.876649265551107e-05, "loss": 0.7495, "step": 12490 }, { "epoch": 0.868969355455842, "grad_norm": 1.1484375, "learning_rate": 8.86737033481635e-05, "loss": 0.869, "step": 12491 }, { "epoch": 0.8690389230929771, "grad_norm": 1.34375, "learning_rate": 8.85809603133716e-05, "loss": 0.8236, "step": 12492 }, { "epoch": 0.8691084907301123, "grad_norm": 0.98828125, "learning_rate": 8.848826355584494e-05, "loss": 0.8449, "step": 12493 }, { "epoch": 0.8691780583672476, "grad_norm": 1.046875, "learning_rate": 8.839561308028987e-05, "loss": 0.6904, "step": 12494 }, { "epoch": 0.8692476260043828, "grad_norm": 0.88671875, "learning_rate": 8.830300889141051e-05, "loss": 0.5591, "step": 12495 }, { "epoch": 0.8693171936415179, "grad_norm": 1.140625, "learning_rate": 8.821045099390911e-05, "loss": 0.8662, "step": 12496 }, { "epoch": 0.8693867612786532, "grad_norm": 1.4296875, "learning_rate": 8.811793939248547e-05, "loss": 0.7743, "step": 12497 }, { "epoch": 0.8694563289157884, "grad_norm": 1.140625, "learning_rate": 8.802547409183659e-05, "loss": 0.7568, "step": 12498 }, { "epoch": 0.8695258965529236, "grad_norm": 1.0078125, "learning_rate": 8.793305509665727e-05, "loss": 0.7029, "step": 12499 }, { "epoch": 0.8695954641900587, "grad_norm": 1.015625, "learning_rate": 8.784068241164023e-05, "loss": 0.7975, "step": 12500 }, { "epoch": 0.869665031827194, "grad_norm": 1.109375, "learning_rate": 8.774835604147602e-05, "loss": 0.8389, "step": 12501 }, { "epoch": 0.8697345994643292, "grad_norm": 0.9921875, "learning_rate": 8.76560759908519e-05, "loss": 0.9547, "step": 12502 }, { "epoch": 0.8698041671014644, "grad_norm": 0.96484375, "learning_rate": 8.75638422644539e-05, "loss": 0.6406, "step": 12503 }, { "epoch": 0.8698737347385996, "grad_norm": 1.078125, "learning_rate": 8.747165486696474e-05, "loss": 0.75, "step": 12504 }, { "epoch": 0.8699433023757348, "grad_norm": 1.046875, "learning_rate": 8.737951380306564e-05, "loss": 0.7778, "step": 12505 }, { "epoch": 0.87001287001287, "grad_norm": 0.95703125, "learning_rate": 8.728741907743476e-05, "loss": 0.7914, "step": 12506 }, { "epoch": 0.8700824376500053, "grad_norm": 1.171875, "learning_rate": 8.719537069474848e-05, "loss": 0.8327, "step": 12507 }, { "epoch": 0.8701520052871404, "grad_norm": 1.3515625, "learning_rate": 8.71033686596805e-05, "loss": 1.1135, "step": 12508 }, { "epoch": 0.8702215729242756, "grad_norm": 0.92578125, "learning_rate": 8.701141297690163e-05, "loss": 0.8336, "step": 12509 }, { "epoch": 0.8702911405614109, "grad_norm": 1.0703125, "learning_rate": 8.69195036510818e-05, "loss": 0.6165, "step": 12510 }, { "epoch": 0.870360708198546, "grad_norm": 1.140625, "learning_rate": 8.68276406868873e-05, "loss": 0.895, "step": 12511 }, { "epoch": 0.8704302758356812, "grad_norm": 1.421875, "learning_rate": 8.673582408898251e-05, "loss": 0.8865, "step": 12512 }, { "epoch": 0.8704998434728164, "grad_norm": 1.0546875, "learning_rate": 8.664405386202911e-05, "loss": 0.735, "step": 12513 }, { "epoch": 0.8705694111099517, "grad_norm": 1.3984375, "learning_rate": 8.655233001068708e-05, "loss": 0.9591, "step": 12514 }, { "epoch": 0.8706389787470868, "grad_norm": 0.97265625, "learning_rate": 8.646065253961377e-05, "loss": 0.8405, "step": 12515 }, { "epoch": 0.870708546384222, "grad_norm": 0.94921875, "learning_rate": 8.636902145346381e-05, "loss": 0.7767, "step": 12516 }, { "epoch": 0.8707781140213573, "grad_norm": 1.1875, "learning_rate": 8.627743675689004e-05, "loss": 0.8149, "step": 12517 }, { "epoch": 0.8708476816584925, "grad_norm": 0.85546875, "learning_rate": 8.618589845454239e-05, "loss": 0.7053, "step": 12518 }, { "epoch": 0.8709172492956276, "grad_norm": 1.0234375, "learning_rate": 8.609440655106903e-05, "loss": 0.5503, "step": 12519 }, { "epoch": 0.8709868169327629, "grad_norm": 1.125, "learning_rate": 8.600296105111505e-05, "loss": 0.6353, "step": 12520 }, { "epoch": 0.8710563845698981, "grad_norm": 1.0234375, "learning_rate": 8.591156195932403e-05, "loss": 0.6243, "step": 12521 }, { "epoch": 0.8711259522070333, "grad_norm": 1.3046875, "learning_rate": 8.582020928033651e-05, "loss": 0.9454, "step": 12522 }, { "epoch": 0.8711955198441685, "grad_norm": 0.9296875, "learning_rate": 8.572890301879066e-05, "loss": 0.7238, "step": 12523 }, { "epoch": 0.8712650874813037, "grad_norm": 1.3359375, "learning_rate": 8.56376431793231e-05, "loss": 0.849, "step": 12524 }, { "epoch": 0.8713346551184389, "grad_norm": 1.234375, "learning_rate": 8.554642976656734e-05, "loss": 0.5616, "step": 12525 }, { "epoch": 0.871404222755574, "grad_norm": 1.21875, "learning_rate": 8.54552627851548e-05, "loss": 0.7973, "step": 12526 }, { "epoch": 0.8714737903927093, "grad_norm": 1.0703125, "learning_rate": 8.5364142239714e-05, "loss": 0.7257, "step": 12527 }, { "epoch": 0.8715433580298445, "grad_norm": 0.97265625, "learning_rate": 8.527306813487213e-05, "loss": 0.6967, "step": 12528 }, { "epoch": 0.8716129256669797, "grad_norm": 1.1796875, "learning_rate": 8.518204047525336e-05, "loss": 0.8197, "step": 12529 }, { "epoch": 0.871682493304115, "grad_norm": 0.9140625, "learning_rate": 8.509105926547945e-05, "loss": 0.4892, "step": 12530 }, { "epoch": 0.8717520609412501, "grad_norm": 1.40625, "learning_rate": 8.500012451017014e-05, "loss": 1.0708, "step": 12531 }, { "epoch": 0.8718216285783853, "grad_norm": 0.9765625, "learning_rate": 8.490923621394242e-05, "loss": 0.7588, "step": 12532 }, { "epoch": 0.8718911962155206, "grad_norm": 1.078125, "learning_rate": 8.481839438141159e-05, "loss": 0.7692, "step": 12533 }, { "epoch": 0.8719607638526558, "grad_norm": 1.1015625, "learning_rate": 8.472759901718952e-05, "loss": 0.9075, "step": 12534 }, { "epoch": 0.8720303314897909, "grad_norm": 0.6640625, "learning_rate": 8.463685012588685e-05, "loss": 0.5494, "step": 12535 }, { "epoch": 0.8720998991269262, "grad_norm": 1.1015625, "learning_rate": 8.4546147712111e-05, "loss": 0.7523, "step": 12536 }, { "epoch": 0.8721694667640614, "grad_norm": 1.0859375, "learning_rate": 8.445549178046774e-05, "loss": 0.7254, "step": 12537 }, { "epoch": 0.8722390344011965, "grad_norm": 0.8515625, "learning_rate": 8.436488233555973e-05, "loss": 0.5229, "step": 12538 }, { "epoch": 0.8723086020383317, "grad_norm": 0.984375, "learning_rate": 8.427431938198805e-05, "loss": 0.5742, "step": 12539 }, { "epoch": 0.872378169675467, "grad_norm": 1.1015625, "learning_rate": 8.418380292435079e-05, "loss": 0.8908, "step": 12540 }, { "epoch": 0.8724477373126022, "grad_norm": 1.1171875, "learning_rate": 8.409333296724364e-05, "loss": 0.7893, "step": 12541 }, { "epoch": 0.8725173049497373, "grad_norm": 1.1015625, "learning_rate": 8.40029095152609e-05, "loss": 0.546, "step": 12542 }, { "epoch": 0.8725868725868726, "grad_norm": 1.2890625, "learning_rate": 8.391253257299336e-05, "loss": 0.6896, "step": 12543 }, { "epoch": 0.8726564402240078, "grad_norm": 1.0859375, "learning_rate": 8.382220214503011e-05, "loss": 0.6414, "step": 12544 }, { "epoch": 0.872726007861143, "grad_norm": 1.1875, "learning_rate": 8.373191823595727e-05, "loss": 0.7615, "step": 12545 }, { "epoch": 0.8727955754982782, "grad_norm": 1.2421875, "learning_rate": 8.364168085035939e-05, "loss": 0.8378, "step": 12546 }, { "epoch": 0.8728651431354134, "grad_norm": 0.95703125, "learning_rate": 8.355148999281825e-05, "loss": 0.7706, "step": 12547 }, { "epoch": 0.8729347107725486, "grad_norm": 1.15625, "learning_rate": 8.346134566791308e-05, "loss": 0.863, "step": 12548 }, { "epoch": 0.8730042784096839, "grad_norm": 1.1171875, "learning_rate": 8.337124788022122e-05, "loss": 0.632, "step": 12549 }, { "epoch": 0.873073846046819, "grad_norm": 1.0390625, "learning_rate": 8.32811966343171e-05, "loss": 0.8922, "step": 12550 }, { "epoch": 0.8731434136839542, "grad_norm": 1.2109375, "learning_rate": 8.319119193477342e-05, "loss": 0.8238, "step": 12551 }, { "epoch": 0.8732129813210894, "grad_norm": 1.3203125, "learning_rate": 8.310123378615975e-05, "loss": 1.0244, "step": 12552 }, { "epoch": 0.8732825489582247, "grad_norm": 1.3046875, "learning_rate": 8.301132219304408e-05, "loss": 0.9827, "step": 12553 }, { "epoch": 0.8733521165953598, "grad_norm": 1.21875, "learning_rate": 8.292145715999144e-05, "loss": 0.6882, "step": 12554 }, { "epoch": 0.873421684232495, "grad_norm": 0.99609375, "learning_rate": 8.283163869156451e-05, "loss": 0.6744, "step": 12555 }, { "epoch": 0.8734912518696303, "grad_norm": 1.09375, "learning_rate": 8.274186679232443e-05, "loss": 0.7423, "step": 12556 }, { "epoch": 0.8735608195067655, "grad_norm": 1.484375, "learning_rate": 8.265214146682909e-05, "loss": 0.9127, "step": 12557 }, { "epoch": 0.8736303871439006, "grad_norm": 1.34375, "learning_rate": 8.256246271963419e-05, "loss": 0.7658, "step": 12558 }, { "epoch": 0.8736999547810359, "grad_norm": 1.1328125, "learning_rate": 8.247283055529298e-05, "loss": 0.9081, "step": 12559 }, { "epoch": 0.8737695224181711, "grad_norm": 1.578125, "learning_rate": 8.238324497835681e-05, "loss": 0.7911, "step": 12560 }, { "epoch": 0.8738390900553062, "grad_norm": 1.0625, "learning_rate": 8.229370599337449e-05, "loss": 0.7267, "step": 12561 }, { "epoch": 0.8739086576924415, "grad_norm": 1.375, "learning_rate": 8.220421360489205e-05, "loss": 0.823, "step": 12562 }, { "epoch": 0.8739782253295767, "grad_norm": 1.1015625, "learning_rate": 8.211476781745375e-05, "loss": 0.8053, "step": 12563 }, { "epoch": 0.8740477929667119, "grad_norm": 1.0078125, "learning_rate": 8.202536863560083e-05, "loss": 0.6133, "step": 12564 }, { "epoch": 0.874117360603847, "grad_norm": 1.140625, "learning_rate": 8.193601606387302e-05, "loss": 0.8156, "step": 12565 }, { "epoch": 0.8741869282409823, "grad_norm": 1.359375, "learning_rate": 8.184671010680677e-05, "loss": 1.0027, "step": 12566 }, { "epoch": 0.8742564958781175, "grad_norm": 1.0546875, "learning_rate": 8.175745076893681e-05, "loss": 0.8141, "step": 12567 }, { "epoch": 0.8743260635152527, "grad_norm": 1.4140625, "learning_rate": 8.166823805479507e-05, "loss": 0.8223, "step": 12568 }, { "epoch": 0.8743956311523879, "grad_norm": 1.0234375, "learning_rate": 8.157907196891157e-05, "loss": 0.6991, "step": 12569 }, { "epoch": 0.8744651987895231, "grad_norm": 0.83984375, "learning_rate": 8.14899525158137e-05, "loss": 0.6608, "step": 12570 }, { "epoch": 0.8745347664266583, "grad_norm": 0.94140625, "learning_rate": 8.14008797000264e-05, "loss": 0.9501, "step": 12571 }, { "epoch": 0.8746043340637936, "grad_norm": 1.046875, "learning_rate": 8.13118535260724e-05, "loss": 0.6796, "step": 12572 }, { "epoch": 0.8746739017009287, "grad_norm": 2.09375, "learning_rate": 8.122287399847173e-05, "loss": 1.0462, "step": 12573 }, { "epoch": 0.8747434693380639, "grad_norm": 1.203125, "learning_rate": 8.113394112174255e-05, "loss": 0.7808, "step": 12574 }, { "epoch": 0.8748130369751992, "grad_norm": 1.203125, "learning_rate": 8.10450549004006e-05, "loss": 0.829, "step": 12575 }, { "epoch": 0.8748826046123344, "grad_norm": 0.8828125, "learning_rate": 8.095621533895869e-05, "loss": 0.7831, "step": 12576 }, { "epoch": 0.8749521722494695, "grad_norm": 1.21875, "learning_rate": 8.086742244192802e-05, "loss": 0.7498, "step": 12577 }, { "epoch": 0.8750217398866047, "grad_norm": 1.2578125, "learning_rate": 8.077867621381662e-05, "loss": 0.8737, "step": 12578 }, { "epoch": 0.87509130752374, "grad_norm": 0.8984375, "learning_rate": 8.068997665913113e-05, "loss": 0.7675, "step": 12579 }, { "epoch": 0.8751608751608752, "grad_norm": 1.1953125, "learning_rate": 8.060132378237473e-05, "loss": 1.011, "step": 12580 }, { "epoch": 0.8752304427980103, "grad_norm": 1.140625, "learning_rate": 8.051271758804913e-05, "loss": 0.7039, "step": 12581 }, { "epoch": 0.8753000104351456, "grad_norm": 1.1171875, "learning_rate": 8.042415808065306e-05, "loss": 0.7051, "step": 12582 }, { "epoch": 0.8753695780722808, "grad_norm": 1.046875, "learning_rate": 8.033564526468318e-05, "loss": 0.7209, "step": 12583 }, { "epoch": 0.875439145709416, "grad_norm": 1.4375, "learning_rate": 8.024717914463397e-05, "loss": 0.9931, "step": 12584 }, { "epoch": 0.8755087133465512, "grad_norm": 1.21875, "learning_rate": 8.01587597249972e-05, "loss": 1.0059, "step": 12585 }, { "epoch": 0.8755782809836864, "grad_norm": 1.0, "learning_rate": 8.007038701026215e-05, "loss": 0.8795, "step": 12586 }, { "epoch": 0.8756478486208216, "grad_norm": 1.0390625, "learning_rate": 7.998206100491578e-05, "loss": 0.7554, "step": 12587 }, { "epoch": 0.8757174162579568, "grad_norm": 1.0234375, "learning_rate": 7.989378171344341e-05, "loss": 0.7503, "step": 12588 }, { "epoch": 0.875786983895092, "grad_norm": 1.171875, "learning_rate": 7.980554914032712e-05, "loss": 0.9079, "step": 12589 }, { "epoch": 0.8758565515322272, "grad_norm": 0.6953125, "learning_rate": 7.971736329004675e-05, "loss": 0.5777, "step": 12590 }, { "epoch": 0.8759261191693624, "grad_norm": 2.09375, "learning_rate": 7.962922416708029e-05, "loss": 0.9667, "step": 12591 }, { "epoch": 0.8759956868064976, "grad_norm": 1.1875, "learning_rate": 7.954113177590272e-05, "loss": 0.9835, "step": 12592 }, { "epoch": 0.8760652544436328, "grad_norm": 1.28125, "learning_rate": 7.945308612098712e-05, "loss": 0.9883, "step": 12593 }, { "epoch": 0.876134822080768, "grad_norm": 1.21875, "learning_rate": 7.93650872068038e-05, "loss": 0.8747, "step": 12594 }, { "epoch": 0.8762043897179033, "grad_norm": 1.046875, "learning_rate": 7.927713503782107e-05, "loss": 0.8781, "step": 12595 }, { "epoch": 0.8762739573550384, "grad_norm": 1.1328125, "learning_rate": 7.91892296185045e-05, "loss": 0.7986, "step": 12596 }, { "epoch": 0.8763435249921736, "grad_norm": 1.1015625, "learning_rate": 7.91013709533177e-05, "loss": 0.7469, "step": 12597 }, { "epoch": 0.8764130926293089, "grad_norm": 1.0, "learning_rate": 7.90135590467217e-05, "loss": 0.9099, "step": 12598 }, { "epoch": 0.8764826602664441, "grad_norm": 1.1484375, "learning_rate": 7.892579390317511e-05, "loss": 0.7349, "step": 12599 }, { "epoch": 0.8765522279035792, "grad_norm": 0.9765625, "learning_rate": 7.883807552713384e-05, "loss": 0.6099, "step": 12600 }, { "epoch": 0.8766217955407145, "grad_norm": 1.046875, "learning_rate": 7.875040392305222e-05, "loss": 0.9179, "step": 12601 }, { "epoch": 0.8766913631778497, "grad_norm": 1.03125, "learning_rate": 7.866277909538177e-05, "loss": 0.8175, "step": 12602 }, { "epoch": 0.8767609308149849, "grad_norm": 1.4609375, "learning_rate": 7.857520104857163e-05, "loss": 1.1682, "step": 12603 }, { "epoch": 0.87683049845212, "grad_norm": 1.0703125, "learning_rate": 7.848766978706812e-05, "loss": 0.6443, "step": 12604 }, { "epoch": 0.8769000660892553, "grad_norm": 1.296875, "learning_rate": 7.840018531531623e-05, "loss": 0.7135, "step": 12605 }, { "epoch": 0.8769696337263905, "grad_norm": 1.2421875, "learning_rate": 7.831274763775754e-05, "loss": 0.9032, "step": 12606 }, { "epoch": 0.8770392013635256, "grad_norm": 1.453125, "learning_rate": 7.822535675883202e-05, "loss": 0.734, "step": 12607 }, { "epoch": 0.8771087690006609, "grad_norm": 0.875, "learning_rate": 7.813801268297672e-05, "loss": 0.726, "step": 12608 }, { "epoch": 0.8771783366377961, "grad_norm": 1.109375, "learning_rate": 7.805071541462672e-05, "loss": 0.7206, "step": 12609 }, { "epoch": 0.8772479042749313, "grad_norm": 1.2734375, "learning_rate": 7.796346495821415e-05, "loss": 0.7493, "step": 12610 }, { "epoch": 0.8773174719120665, "grad_norm": 0.83984375, "learning_rate": 7.78762613181696e-05, "loss": 0.8072, "step": 12611 }, { "epoch": 0.8773870395492017, "grad_norm": 0.84765625, "learning_rate": 7.778910449892074e-05, "loss": 0.6677, "step": 12612 }, { "epoch": 0.8774566071863369, "grad_norm": 1.296875, "learning_rate": 7.770199450489279e-05, "loss": 0.7072, "step": 12613 }, { "epoch": 0.8775261748234722, "grad_norm": 1.4453125, "learning_rate": 7.761493134050879e-05, "loss": 0.9677, "step": 12614 }, { "epoch": 0.8775957424606073, "grad_norm": 1.078125, "learning_rate": 7.75279150101893e-05, "loss": 1.0538, "step": 12615 }, { "epoch": 0.8776653100977425, "grad_norm": 1.15625, "learning_rate": 7.744094551835291e-05, "loss": 0.9044, "step": 12616 }, { "epoch": 0.8777348777348777, "grad_norm": 1.40625, "learning_rate": 7.735402286941528e-05, "loss": 1.0882, "step": 12617 }, { "epoch": 0.877804445372013, "grad_norm": 1.484375, "learning_rate": 7.726714706778992e-05, "loss": 0.9486, "step": 12618 }, { "epoch": 0.8778740130091481, "grad_norm": 1.0703125, "learning_rate": 7.71803181178875e-05, "loss": 0.8556, "step": 12619 }, { "epoch": 0.8779435806462833, "grad_norm": 1.09375, "learning_rate": 7.709353602411751e-05, "loss": 0.6466, "step": 12620 }, { "epoch": 0.8780131482834186, "grad_norm": 1.1953125, "learning_rate": 7.700680079088595e-05, "loss": 0.9507, "step": 12621 }, { "epoch": 0.8780827159205538, "grad_norm": 1.046875, "learning_rate": 7.692011242259677e-05, "loss": 0.7419, "step": 12622 }, { "epoch": 0.8781522835576889, "grad_norm": 1.2578125, "learning_rate": 7.683347092365166e-05, "loss": 0.9697, "step": 12623 }, { "epoch": 0.8782218511948242, "grad_norm": 0.98046875, "learning_rate": 7.674687629844967e-05, "loss": 0.7267, "step": 12624 }, { "epoch": 0.8782914188319594, "grad_norm": 1.0078125, "learning_rate": 7.666032855138793e-05, "loss": 0.6941, "step": 12625 }, { "epoch": 0.8783609864690946, "grad_norm": 0.890625, "learning_rate": 7.65738276868605e-05, "loss": 0.6775, "step": 12626 }, { "epoch": 0.8784305541062298, "grad_norm": 1.1484375, "learning_rate": 7.648737370925995e-05, "loss": 0.8829, "step": 12627 }, { "epoch": 0.878500121743365, "grad_norm": 1.140625, "learning_rate": 7.640096662297547e-05, "loss": 1.0224, "step": 12628 }, { "epoch": 0.8785696893805002, "grad_norm": 1.078125, "learning_rate": 7.631460643239463e-05, "loss": 0.9203, "step": 12629 }, { "epoch": 0.8786392570176353, "grad_norm": 1.140625, "learning_rate": 7.62282931419026e-05, "loss": 0.7338, "step": 12630 }, { "epoch": 0.8787088246547706, "grad_norm": 1.1171875, "learning_rate": 7.614202675588167e-05, "loss": 0.7872, "step": 12631 }, { "epoch": 0.8787783922919058, "grad_norm": 0.91796875, "learning_rate": 7.605580727871175e-05, "loss": 0.8034, "step": 12632 }, { "epoch": 0.878847959929041, "grad_norm": 0.94921875, "learning_rate": 7.596963471477103e-05, "loss": 0.4924, "step": 12633 }, { "epoch": 0.8789175275661762, "grad_norm": 1.0546875, "learning_rate": 7.5883509068435e-05, "loss": 0.9425, "step": 12634 }, { "epoch": 0.8789870952033114, "grad_norm": 1.203125, "learning_rate": 7.579743034407638e-05, "loss": 1.0256, "step": 12635 }, { "epoch": 0.8790566628404466, "grad_norm": 1.1640625, "learning_rate": 7.571139854606579e-05, "loss": 0.8888, "step": 12636 }, { "epoch": 0.8791262304775819, "grad_norm": 0.9453125, "learning_rate": 7.562541367877184e-05, "loss": 0.8218, "step": 12637 }, { "epoch": 0.879195798114717, "grad_norm": 0.9375, "learning_rate": 7.553947574655995e-05, "loss": 0.5736, "step": 12638 }, { "epoch": 0.8792653657518522, "grad_norm": 0.90625, "learning_rate": 7.545358475379405e-05, "loss": 0.7649, "step": 12639 }, { "epoch": 0.8793349333889875, "grad_norm": 0.984375, "learning_rate": 7.536774070483488e-05, "loss": 0.7031, "step": 12640 }, { "epoch": 0.8794045010261227, "grad_norm": 1.0703125, "learning_rate": 7.52819436040415e-05, "loss": 0.8432, "step": 12641 }, { "epoch": 0.8794740686632578, "grad_norm": 1.3203125, "learning_rate": 7.519619345577e-05, "loss": 1.072, "step": 12642 }, { "epoch": 0.879543636300393, "grad_norm": 1.234375, "learning_rate": 7.511049026437434e-05, "loss": 0.7029, "step": 12643 }, { "epoch": 0.8796132039375283, "grad_norm": 0.92578125, "learning_rate": 7.502483403420646e-05, "loss": 0.656, "step": 12644 }, { "epoch": 0.8796827715746635, "grad_norm": 0.99609375, "learning_rate": 7.493922476961523e-05, "loss": 0.7228, "step": 12645 }, { "epoch": 0.8797523392117986, "grad_norm": 1.203125, "learning_rate": 7.48536624749474e-05, "loss": 0.7988, "step": 12646 }, { "epoch": 0.8798219068489339, "grad_norm": 1.15625, "learning_rate": 7.476814715454738e-05, "loss": 0.9107, "step": 12647 }, { "epoch": 0.8798914744860691, "grad_norm": 0.8984375, "learning_rate": 7.46826788127577e-05, "loss": 0.5806, "step": 12648 }, { "epoch": 0.8799610421232043, "grad_norm": 1.0234375, "learning_rate": 7.459725745391743e-05, "loss": 0.7292, "step": 12649 }, { "epoch": 0.8800306097603395, "grad_norm": 0.94921875, "learning_rate": 7.451188308236401e-05, "loss": 0.7109, "step": 12650 }, { "epoch": 0.8801001773974747, "grad_norm": 1.03125, "learning_rate": 7.44265557024324e-05, "loss": 0.7169, "step": 12651 }, { "epoch": 0.8801697450346099, "grad_norm": 1.4140625, "learning_rate": 7.434127531845514e-05, "loss": 0.8128, "step": 12652 }, { "epoch": 0.8802393126717452, "grad_norm": 1.5859375, "learning_rate": 7.425604193476232e-05, "loss": 1.0139, "step": 12653 }, { "epoch": 0.8803088803088803, "grad_norm": 1.1484375, "learning_rate": 7.417085555568137e-05, "loss": 0.8128, "step": 12654 }, { "epoch": 0.8803784479460155, "grad_norm": 0.83984375, "learning_rate": 7.408571618553794e-05, "loss": 0.8119, "step": 12655 }, { "epoch": 0.8804480155831507, "grad_norm": 0.91015625, "learning_rate": 7.400062382865491e-05, "loss": 0.6313, "step": 12656 }, { "epoch": 0.880517583220286, "grad_norm": 1.1953125, "learning_rate": 7.39155784893527e-05, "loss": 0.7273, "step": 12657 }, { "epoch": 0.8805871508574211, "grad_norm": 1.3671875, "learning_rate": 7.383058017194976e-05, "loss": 0.977, "step": 12658 }, { "epoch": 0.8806567184945563, "grad_norm": 0.84765625, "learning_rate": 7.374562888076175e-05, "loss": 0.4533, "step": 12659 }, { "epoch": 0.8807262861316916, "grad_norm": 1.0546875, "learning_rate": 7.366072462010187e-05, "loss": 0.6656, "step": 12660 }, { "epoch": 0.8807958537688267, "grad_norm": 0.9921875, "learning_rate": 7.357586739428135e-05, "loss": 0.7439, "step": 12661 }, { "epoch": 0.8808654214059619, "grad_norm": 1.21875, "learning_rate": 7.349105720760884e-05, "loss": 0.7649, "step": 12662 }, { "epoch": 0.8809349890430972, "grad_norm": 0.9296875, "learning_rate": 7.340629406439048e-05, "loss": 0.7316, "step": 12663 }, { "epoch": 0.8810045566802324, "grad_norm": 1.0390625, "learning_rate": 7.332157796893002e-05, "loss": 0.6436, "step": 12664 }, { "epoch": 0.8810741243173675, "grad_norm": 1.2421875, "learning_rate": 7.323690892552903e-05, "loss": 0.7449, "step": 12665 }, { "epoch": 0.8811436919545028, "grad_norm": 1.1171875, "learning_rate": 7.315228693848674e-05, "loss": 0.7347, "step": 12666 }, { "epoch": 0.881213259591638, "grad_norm": 0.9140625, "learning_rate": 7.306771201209961e-05, "loss": 0.604, "step": 12667 }, { "epoch": 0.8812828272287732, "grad_norm": 1.3046875, "learning_rate": 7.298318415066186e-05, "loss": 1.0214, "step": 12668 }, { "epoch": 0.8813523948659083, "grad_norm": 1.03125, "learning_rate": 7.289870335846571e-05, "loss": 0.683, "step": 12669 }, { "epoch": 0.8814219625030436, "grad_norm": 0.87890625, "learning_rate": 7.28142696398002e-05, "loss": 0.4949, "step": 12670 }, { "epoch": 0.8814915301401788, "grad_norm": 1.125, "learning_rate": 7.272988299895278e-05, "loss": 0.9083, "step": 12671 }, { "epoch": 0.881561097777314, "grad_norm": 1.5078125, "learning_rate": 7.264554344020835e-05, "loss": 0.964, "step": 12672 }, { "epoch": 0.8816306654144492, "grad_norm": 1.28125, "learning_rate": 7.256125096784893e-05, "loss": 0.7362, "step": 12673 }, { "epoch": 0.8817002330515844, "grad_norm": 1.2109375, "learning_rate": 7.247700558615433e-05, "loss": 0.9041, "step": 12674 }, { "epoch": 0.8817698006887196, "grad_norm": 0.96875, "learning_rate": 7.239280729940234e-05, "loss": 0.6304, "step": 12675 }, { "epoch": 0.8818393683258549, "grad_norm": 1.328125, "learning_rate": 7.230865611186833e-05, "loss": 0.8177, "step": 12676 }, { "epoch": 0.88190893596299, "grad_norm": 0.96875, "learning_rate": 7.222455202782485e-05, "loss": 0.6874, "step": 12677 }, { "epoch": 0.8819785036001252, "grad_norm": 1.125, "learning_rate": 7.214049505154207e-05, "loss": 0.7348, "step": 12678 }, { "epoch": 0.8820480712372605, "grad_norm": 0.91796875, "learning_rate": 7.205648518728824e-05, "loss": 0.6891, "step": 12679 }, { "epoch": 0.8821176388743956, "grad_norm": 1.125, "learning_rate": 7.197252243932906e-05, "loss": 0.6281, "step": 12680 }, { "epoch": 0.8821872065115308, "grad_norm": 1.2265625, "learning_rate": 7.188860681192766e-05, "loss": 0.8182, "step": 12681 }, { "epoch": 0.882256774148666, "grad_norm": 1.34375, "learning_rate": 7.180473830934453e-05, "loss": 0.6742, "step": 12682 }, { "epoch": 0.8823263417858013, "grad_norm": 1.1484375, "learning_rate": 7.172091693583826e-05, "loss": 0.7446, "step": 12683 }, { "epoch": 0.8823959094229364, "grad_norm": 1.21875, "learning_rate": 7.163714269566524e-05, "loss": 0.6961, "step": 12684 }, { "epoch": 0.8824654770600716, "grad_norm": 1.40625, "learning_rate": 7.15534155930786e-05, "loss": 0.7753, "step": 12685 }, { "epoch": 0.8825350446972069, "grad_norm": 1.296875, "learning_rate": 7.146973563233005e-05, "loss": 0.8588, "step": 12686 }, { "epoch": 0.8826046123343421, "grad_norm": 1.4375, "learning_rate": 7.138610281766811e-05, "loss": 0.6299, "step": 12687 }, { "epoch": 0.8826741799714772, "grad_norm": 1.15625, "learning_rate": 7.130251715333913e-05, "loss": 0.6606, "step": 12688 }, { "epoch": 0.8827437476086125, "grad_norm": 1.0859375, "learning_rate": 7.12189786435874e-05, "loss": 0.7995, "step": 12689 }, { "epoch": 0.8828133152457477, "grad_norm": 1.078125, "learning_rate": 7.113548729265462e-05, "loss": 0.7172, "step": 12690 }, { "epoch": 0.8828828828828829, "grad_norm": 0.9453125, "learning_rate": 7.105204310478009e-05, "loss": 0.7823, "step": 12691 }, { "epoch": 0.8829524505200181, "grad_norm": 0.921875, "learning_rate": 7.096864608420029e-05, "loss": 0.6294, "step": 12692 }, { "epoch": 0.8830220181571533, "grad_norm": 1.1015625, "learning_rate": 7.088529623514995e-05, "loss": 0.6797, "step": 12693 }, { "epoch": 0.8830915857942885, "grad_norm": 1.3359375, "learning_rate": 7.080199356186146e-05, "loss": 0.814, "step": 12694 }, { "epoch": 0.8831611534314237, "grad_norm": 1.2265625, "learning_rate": 7.071873806856422e-05, "loss": 0.8889, "step": 12695 }, { "epoch": 0.8832307210685589, "grad_norm": 1.3515625, "learning_rate": 7.063552975948528e-05, "loss": 0.9012, "step": 12696 }, { "epoch": 0.8833002887056941, "grad_norm": 1.4453125, "learning_rate": 7.055236863884984e-05, "loss": 1.0218, "step": 12697 }, { "epoch": 0.8833698563428293, "grad_norm": 1.5546875, "learning_rate": 7.04692547108805e-05, "loss": 0.6138, "step": 12698 }, { "epoch": 0.8834394239799646, "grad_norm": 1.3671875, "learning_rate": 7.038618797979735e-05, "loss": 0.8291, "step": 12699 }, { "epoch": 0.8835089916170997, "grad_norm": 1.1484375, "learning_rate": 7.030316844981766e-05, "loss": 0.87, "step": 12700 }, { "epoch": 0.8835785592542349, "grad_norm": 1.3359375, "learning_rate": 7.022019612515728e-05, "loss": 0.8387, "step": 12701 }, { "epoch": 0.8836481268913702, "grad_norm": 1.046875, "learning_rate": 7.013727101002876e-05, "loss": 0.5919, "step": 12702 }, { "epoch": 0.8837176945285053, "grad_norm": 1.203125, "learning_rate": 7.00543931086427e-05, "loss": 0.8006, "step": 12703 }, { "epoch": 0.8837872621656405, "grad_norm": 1.3046875, "learning_rate": 6.997156242520752e-05, "loss": 1.0003, "step": 12704 }, { "epoch": 0.8838568298027758, "grad_norm": 1.109375, "learning_rate": 6.988877896392864e-05, "loss": 0.7739, "step": 12705 }, { "epoch": 0.883926397439911, "grad_norm": 1.1171875, "learning_rate": 6.980604272900937e-05, "loss": 0.9019, "step": 12706 }, { "epoch": 0.8839959650770461, "grad_norm": 0.91015625, "learning_rate": 6.972335372465067e-05, "loss": 0.7214, "step": 12707 }, { "epoch": 0.8840655327141813, "grad_norm": 1.15625, "learning_rate": 6.964071195505129e-05, "loss": 0.8192, "step": 12708 }, { "epoch": 0.8841351003513166, "grad_norm": 0.9375, "learning_rate": 6.955811742440721e-05, "loss": 0.5364, "step": 12709 }, { "epoch": 0.8842046679884518, "grad_norm": 0.91015625, "learning_rate": 6.947557013691197e-05, "loss": 0.6884, "step": 12710 }, { "epoch": 0.8842742356255869, "grad_norm": 1.3515625, "learning_rate": 6.939307009675711e-05, "loss": 1.0167, "step": 12711 }, { "epoch": 0.8843438032627222, "grad_norm": 1.0625, "learning_rate": 6.931061730813171e-05, "loss": 0.9485, "step": 12712 }, { "epoch": 0.8844133708998574, "grad_norm": 1.0703125, "learning_rate": 6.92282117752221e-05, "loss": 0.7769, "step": 12713 }, { "epoch": 0.8844829385369926, "grad_norm": 1.0, "learning_rate": 6.914585350221236e-05, "loss": 0.938, "step": 12714 }, { "epoch": 0.8845525061741278, "grad_norm": 1.046875, "learning_rate": 6.906354249328428e-05, "loss": 0.7045, "step": 12715 }, { "epoch": 0.884622073811263, "grad_norm": 1.171875, "learning_rate": 6.89812787526175e-05, "loss": 0.8892, "step": 12716 }, { "epoch": 0.8846916414483982, "grad_norm": 1.0859375, "learning_rate": 6.889906228438847e-05, "loss": 1.0516, "step": 12717 }, { "epoch": 0.8847612090855335, "grad_norm": 0.859375, "learning_rate": 6.881689309277206e-05, "loss": 0.5978, "step": 12718 }, { "epoch": 0.8848307767226686, "grad_norm": 0.984375, "learning_rate": 6.873477118194038e-05, "loss": 0.8943, "step": 12719 }, { "epoch": 0.8849003443598038, "grad_norm": 1.1328125, "learning_rate": 6.865269655606288e-05, "loss": 0.8368, "step": 12720 }, { "epoch": 0.884969911996939, "grad_norm": 1.2265625, "learning_rate": 6.857066921930721e-05, "loss": 0.7237, "step": 12721 }, { "epoch": 0.8850394796340743, "grad_norm": 1.046875, "learning_rate": 6.848868917583828e-05, "loss": 0.8268, "step": 12722 }, { "epoch": 0.8851090472712094, "grad_norm": 1.1484375, "learning_rate": 6.840675642981864e-05, "loss": 0.9917, "step": 12723 }, { "epoch": 0.8851786149083446, "grad_norm": 1.1484375, "learning_rate": 6.832487098540807e-05, "loss": 0.7869, "step": 12724 }, { "epoch": 0.8852481825454799, "grad_norm": 1.09375, "learning_rate": 6.824303284676459e-05, "loss": 0.7963, "step": 12725 }, { "epoch": 0.885317750182615, "grad_norm": 1.0625, "learning_rate": 6.816124201804364e-05, "loss": 0.5991, "step": 12726 }, { "epoch": 0.8853873178197502, "grad_norm": 1.3671875, "learning_rate": 6.807949850339801e-05, "loss": 0.8378, "step": 12727 }, { "epoch": 0.8854568854568855, "grad_norm": 1.1015625, "learning_rate": 6.799780230697816e-05, "loss": 0.6915, "step": 12728 }, { "epoch": 0.8855264530940207, "grad_norm": 1.0078125, "learning_rate": 6.791615343293211e-05, "loss": 0.8808, "step": 12729 }, { "epoch": 0.8855960207311558, "grad_norm": 1.0078125, "learning_rate": 6.783455188540599e-05, "loss": 0.7991, "step": 12730 }, { "epoch": 0.8856655883682911, "grad_norm": 1.0, "learning_rate": 6.775299766854271e-05, "loss": 0.8685, "step": 12731 }, { "epoch": 0.8857351560054263, "grad_norm": 1.1796875, "learning_rate": 6.767149078648348e-05, "loss": 1.0224, "step": 12732 }, { "epoch": 0.8858047236425615, "grad_norm": 1.0859375, "learning_rate": 6.759003124336671e-05, "loss": 0.695, "step": 12733 }, { "epoch": 0.8858742912796966, "grad_norm": 1.5, "learning_rate": 6.750861904332817e-05, "loss": 1.0809, "step": 12734 }, { "epoch": 0.8859438589168319, "grad_norm": 1.1640625, "learning_rate": 6.7427254190502e-05, "loss": 0.6709, "step": 12735 }, { "epoch": 0.8860134265539671, "grad_norm": 1.09375, "learning_rate": 6.734593668901945e-05, "loss": 0.7581, "step": 12736 }, { "epoch": 0.8860829941911023, "grad_norm": 1.1875, "learning_rate": 6.726466654300922e-05, "loss": 0.7828, "step": 12737 }, { "epoch": 0.8861525618282375, "grad_norm": 1.1171875, "learning_rate": 6.718344375659779e-05, "loss": 0.7232, "step": 12738 }, { "epoch": 0.8862221294653727, "grad_norm": 1.0390625, "learning_rate": 6.710226833390942e-05, "loss": 0.7902, "step": 12739 }, { "epoch": 0.8862916971025079, "grad_norm": 1.0703125, "learning_rate": 6.702114027906581e-05, "loss": 0.8745, "step": 12740 }, { "epoch": 0.8863612647396432, "grad_norm": 1.1640625, "learning_rate": 6.694005959618609e-05, "loss": 0.8363, "step": 12741 }, { "epoch": 0.8864308323767783, "grad_norm": 1.4765625, "learning_rate": 6.685902628938711e-05, "loss": 0.8588, "step": 12742 }, { "epoch": 0.8865004000139135, "grad_norm": 1.2421875, "learning_rate": 6.677804036278334e-05, "loss": 1.0298, "step": 12743 }, { "epoch": 0.8865699676510488, "grad_norm": 1.2265625, "learning_rate": 6.669710182048705e-05, "loss": 0.7713, "step": 12744 }, { "epoch": 0.886639535288184, "grad_norm": 0.8046875, "learning_rate": 6.66162106666075e-05, "loss": 0.6444, "step": 12745 }, { "epoch": 0.8867091029253191, "grad_norm": 1.2265625, "learning_rate": 6.653536690525241e-05, "loss": 0.8326, "step": 12746 }, { "epoch": 0.8867786705624543, "grad_norm": 0.984375, "learning_rate": 6.645457054052639e-05, "loss": 0.7872, "step": 12747 }, { "epoch": 0.8868482381995896, "grad_norm": 0.89453125, "learning_rate": 6.637382157653171e-05, "loss": 0.7572, "step": 12748 }, { "epoch": 0.8869178058367247, "grad_norm": 0.94140625, "learning_rate": 6.629312001736853e-05, "loss": 0.5386, "step": 12749 }, { "epoch": 0.8869873734738599, "grad_norm": 1.0546875, "learning_rate": 6.62124658671347e-05, "loss": 0.9461, "step": 12750 }, { "epoch": 0.8870569411109952, "grad_norm": 1.03125, "learning_rate": 6.613185912992514e-05, "loss": 0.843, "step": 12751 }, { "epoch": 0.8871265087481304, "grad_norm": 1.0390625, "learning_rate": 6.605129980983249e-05, "loss": 0.9026, "step": 12752 }, { "epoch": 0.8871960763852655, "grad_norm": 1.109375, "learning_rate": 6.597078791094757e-05, "loss": 0.8575, "step": 12753 }, { "epoch": 0.8872656440224008, "grad_norm": 1.515625, "learning_rate": 6.589032343735823e-05, "loss": 0.8761, "step": 12754 }, { "epoch": 0.887335211659536, "grad_norm": 1.21875, "learning_rate": 6.580990639314998e-05, "loss": 1.0263, "step": 12755 }, { "epoch": 0.8874047792966712, "grad_norm": 1.046875, "learning_rate": 6.57295367824059e-05, "loss": 0.653, "step": 12756 }, { "epoch": 0.8874743469338064, "grad_norm": 1.1328125, "learning_rate": 6.564921460920692e-05, "loss": 0.8159, "step": 12757 }, { "epoch": 0.8875439145709416, "grad_norm": 1.15625, "learning_rate": 6.556893987763146e-05, "loss": 0.8262, "step": 12758 }, { "epoch": 0.8876134822080768, "grad_norm": 1.0078125, "learning_rate": 6.548871259175516e-05, "loss": 0.7602, "step": 12759 }, { "epoch": 0.887683049845212, "grad_norm": 1.21875, "learning_rate": 6.540853275565195e-05, "loss": 0.876, "step": 12760 }, { "epoch": 0.8877526174823472, "grad_norm": 1.140625, "learning_rate": 6.532840037339261e-05, "loss": 0.8405, "step": 12761 }, { "epoch": 0.8878221851194824, "grad_norm": 1.1171875, "learning_rate": 6.524831544904609e-05, "loss": 0.9209, "step": 12762 }, { "epoch": 0.8878917527566176, "grad_norm": 1.1171875, "learning_rate": 6.516827798667857e-05, "loss": 0.6923, "step": 12763 }, { "epoch": 0.8879613203937529, "grad_norm": 1.4296875, "learning_rate": 6.508828799035404e-05, "loss": 0.8913, "step": 12764 }, { "epoch": 0.888030888030888, "grad_norm": 1.328125, "learning_rate": 6.500834546413404e-05, "loss": 0.9831, "step": 12765 }, { "epoch": 0.8881004556680232, "grad_norm": 1.1328125, "learning_rate": 6.492845041207707e-05, "loss": 0.8282, "step": 12766 }, { "epoch": 0.8881700233051585, "grad_norm": 1.65625, "learning_rate": 6.484860283824079e-05, "loss": 0.9055, "step": 12767 }, { "epoch": 0.8882395909422937, "grad_norm": 1.3515625, "learning_rate": 6.476880274667885e-05, "loss": 0.8114, "step": 12768 }, { "epoch": 0.8883091585794288, "grad_norm": 0.796875, "learning_rate": 6.468905014144322e-05, "loss": 0.6349, "step": 12769 }, { "epoch": 0.8883787262165641, "grad_norm": 1.328125, "learning_rate": 6.460934502658311e-05, "loss": 1.0425, "step": 12770 }, { "epoch": 0.8884482938536993, "grad_norm": 0.828125, "learning_rate": 6.452968740614574e-05, "loss": 0.7217, "step": 12771 }, { "epoch": 0.8885178614908344, "grad_norm": 1.0234375, "learning_rate": 6.445007728417596e-05, "loss": 0.9206, "step": 12772 }, { "epoch": 0.8885874291279696, "grad_norm": 1.15625, "learning_rate": 6.437051466471567e-05, "loss": 0.8422, "step": 12773 }, { "epoch": 0.8886569967651049, "grad_norm": 1.15625, "learning_rate": 6.429099955180451e-05, "loss": 0.9084, "step": 12774 }, { "epoch": 0.8887265644022401, "grad_norm": 1.203125, "learning_rate": 6.421153194948015e-05, "loss": 0.86, "step": 12775 }, { "epoch": 0.8887961320393752, "grad_norm": 0.89453125, "learning_rate": 6.413211186177759e-05, "loss": 0.8385, "step": 12776 }, { "epoch": 0.8888656996765105, "grad_norm": 1.0078125, "learning_rate": 6.405273929272914e-05, "loss": 0.8658, "step": 12777 }, { "epoch": 0.8889352673136457, "grad_norm": 1.25, "learning_rate": 6.397341424636527e-05, "loss": 0.789, "step": 12778 }, { "epoch": 0.8890048349507809, "grad_norm": 1.453125, "learning_rate": 6.38941367267134e-05, "loss": 0.9334, "step": 12779 }, { "epoch": 0.8890744025879161, "grad_norm": 1.2734375, "learning_rate": 6.381490673779888e-05, "loss": 1.0498, "step": 12780 }, { "epoch": 0.8891439702250513, "grad_norm": 1.0703125, "learning_rate": 6.37357242836446e-05, "loss": 0.787, "step": 12781 }, { "epoch": 0.8892135378621865, "grad_norm": 1.2734375, "learning_rate": 6.365658936827135e-05, "loss": 0.8056, "step": 12782 }, { "epoch": 0.8892831054993218, "grad_norm": 1.0546875, "learning_rate": 6.35775019956969e-05, "loss": 0.7835, "step": 12783 }, { "epoch": 0.8893526731364569, "grad_norm": 1.2890625, "learning_rate": 6.349846216993682e-05, "loss": 0.6149, "step": 12784 }, { "epoch": 0.8894222407735921, "grad_norm": 1.09375, "learning_rate": 6.341946989500458e-05, "loss": 0.8973, "step": 12785 }, { "epoch": 0.8894918084107273, "grad_norm": 1.1328125, "learning_rate": 6.334052517491107e-05, "loss": 0.7343, "step": 12786 }, { "epoch": 0.8895613760478626, "grad_norm": 1.265625, "learning_rate": 6.326162801366453e-05, "loss": 0.9022, "step": 12787 }, { "epoch": 0.8896309436849977, "grad_norm": 1.1484375, "learning_rate": 6.318277841527087e-05, "loss": 0.9184, "step": 12788 }, { "epoch": 0.8897005113221329, "grad_norm": 1.140625, "learning_rate": 6.310397638373388e-05, "loss": 0.9703, "step": 12789 }, { "epoch": 0.8897700789592682, "grad_norm": 1.1328125, "learning_rate": 6.302522192305471e-05, "loss": 0.8512, "step": 12790 }, { "epoch": 0.8898396465964034, "grad_norm": 1.2578125, "learning_rate": 6.294651503723204e-05, "loss": 0.7479, "step": 12791 }, { "epoch": 0.8899092142335385, "grad_norm": 1.0390625, "learning_rate": 6.286785573026232e-05, "loss": 0.6878, "step": 12792 }, { "epoch": 0.8899787818706738, "grad_norm": 0.9140625, "learning_rate": 6.278924400613928e-05, "loss": 0.4392, "step": 12793 }, { "epoch": 0.890048349507809, "grad_norm": 1.2890625, "learning_rate": 6.271067986885459e-05, "loss": 0.8874, "step": 12794 }, { "epoch": 0.8901179171449441, "grad_norm": 1.328125, "learning_rate": 6.263216332239718e-05, "loss": 0.9909, "step": 12795 }, { "epoch": 0.8901874847820794, "grad_norm": 1.046875, "learning_rate": 6.255369437075409e-05, "loss": 0.7922, "step": 12796 }, { "epoch": 0.8902570524192146, "grad_norm": 1.0234375, "learning_rate": 6.247527301790922e-05, "loss": 0.8277, "step": 12797 }, { "epoch": 0.8903266200563498, "grad_norm": 1.171875, "learning_rate": 6.23968992678443e-05, "loss": 0.8435, "step": 12798 }, { "epoch": 0.8903961876934849, "grad_norm": 1.078125, "learning_rate": 6.231857312453903e-05, "loss": 0.6398, "step": 12799 }, { "epoch": 0.8904657553306202, "grad_norm": 0.88671875, "learning_rate": 6.224029459197056e-05, "loss": 0.6042, "step": 12800 }, { "epoch": 0.8905353229677554, "grad_norm": 1.046875, "learning_rate": 6.216206367411326e-05, "loss": 0.7371, "step": 12801 }, { "epoch": 0.8906048906048906, "grad_norm": 1.0859375, "learning_rate": 6.208388037493906e-05, "loss": 0.7232, "step": 12802 }, { "epoch": 0.8906744582420258, "grad_norm": 1.0078125, "learning_rate": 6.200574469841813e-05, "loss": 0.8836, "step": 12803 }, { "epoch": 0.890744025879161, "grad_norm": 1.21875, "learning_rate": 6.192765664851763e-05, "loss": 1.2466, "step": 12804 }, { "epoch": 0.8908135935162962, "grad_norm": 0.91015625, "learning_rate": 6.184961622920237e-05, "loss": 0.766, "step": 12805 }, { "epoch": 0.8908831611534315, "grad_norm": 1.390625, "learning_rate": 6.177162344443521e-05, "loss": 0.7032, "step": 12806 }, { "epoch": 0.8909527287905666, "grad_norm": 1.3125, "learning_rate": 6.169367829817573e-05, "loss": 0.6213, "step": 12807 }, { "epoch": 0.8910222964277018, "grad_norm": 1.1640625, "learning_rate": 6.161578079438212e-05, "loss": 0.815, "step": 12808 }, { "epoch": 0.8910918640648371, "grad_norm": 0.87109375, "learning_rate": 6.15379309370091e-05, "loss": 0.6714, "step": 12809 }, { "epoch": 0.8911614317019723, "grad_norm": 1.09375, "learning_rate": 6.146012873000994e-05, "loss": 0.9554, "step": 12810 }, { "epoch": 0.8912309993391074, "grad_norm": 1.421875, "learning_rate": 6.138237417733494e-05, "loss": 0.9702, "step": 12811 }, { "epoch": 0.8913005669762426, "grad_norm": 1.3046875, "learning_rate": 6.130466728293161e-05, "loss": 1.0276, "step": 12812 }, { "epoch": 0.8913701346133779, "grad_norm": 1.2265625, "learning_rate": 6.122700805074622e-05, "loss": 0.9332, "step": 12813 }, { "epoch": 0.891439702250513, "grad_norm": 1.21875, "learning_rate": 6.114939648472151e-05, "loss": 0.8333, "step": 12814 }, { "epoch": 0.8915092698876482, "grad_norm": 1.1640625, "learning_rate": 6.107183258879833e-05, "loss": 0.9553, "step": 12815 }, { "epoch": 0.8915788375247835, "grad_norm": 1.4296875, "learning_rate": 6.099431636691488e-05, "loss": 0.9614, "step": 12816 }, { "epoch": 0.8916484051619187, "grad_norm": 1.109375, "learning_rate": 6.0916847823006994e-05, "loss": 0.6986, "step": 12817 }, { "epoch": 0.8917179727990538, "grad_norm": 1.25, "learning_rate": 6.083942696100842e-05, "loss": 0.8352, "step": 12818 }, { "epoch": 0.8917875404361891, "grad_norm": 1.46875, "learning_rate": 6.076205378484989e-05, "loss": 1.0811, "step": 12819 }, { "epoch": 0.8918571080733243, "grad_norm": 1.21875, "learning_rate": 6.068472829846039e-05, "loss": 0.7868, "step": 12820 }, { "epoch": 0.8919266757104595, "grad_norm": 0.9609375, "learning_rate": 6.060745050576566e-05, "loss": 0.7291, "step": 12821 }, { "epoch": 0.8919962433475948, "grad_norm": 1.1015625, "learning_rate": 6.0530220410689786e-05, "loss": 0.9385, "step": 12822 }, { "epoch": 0.8920658109847299, "grad_norm": 0.89453125, "learning_rate": 6.045303801715396e-05, "loss": 0.6088, "step": 12823 }, { "epoch": 0.8921353786218651, "grad_norm": 0.9609375, "learning_rate": 6.037590332907739e-05, "loss": 0.679, "step": 12824 }, { "epoch": 0.8922049462590003, "grad_norm": 0.9296875, "learning_rate": 6.029881635037615e-05, "loss": 0.628, "step": 12825 }, { "epoch": 0.8922745138961355, "grad_norm": 0.96484375, "learning_rate": 6.022177708496468e-05, "loss": 0.7714, "step": 12826 }, { "epoch": 0.8923440815332707, "grad_norm": 1.125, "learning_rate": 6.014478553675462e-05, "loss": 0.8259, "step": 12827 }, { "epoch": 0.8924136491704059, "grad_norm": 0.84765625, "learning_rate": 6.006784170965518e-05, "loss": 0.7613, "step": 12828 }, { "epoch": 0.8924832168075412, "grad_norm": 1.0078125, "learning_rate": 5.999094560757301e-05, "loss": 0.5784, "step": 12829 }, { "epoch": 0.8925527844446763, "grad_norm": 1.3203125, "learning_rate": 5.991409723441255e-05, "loss": 0.6955, "step": 12830 }, { "epoch": 0.8926223520818115, "grad_norm": 0.98828125, "learning_rate": 5.983729659407589e-05, "loss": 0.9643, "step": 12831 }, { "epoch": 0.8926919197189468, "grad_norm": 1.1640625, "learning_rate": 5.976054369046269e-05, "loss": 0.8414, "step": 12832 }, { "epoch": 0.892761487356082, "grad_norm": 1.25, "learning_rate": 5.968383852746973e-05, "loss": 0.825, "step": 12833 }, { "epoch": 0.8928310549932171, "grad_norm": 1.015625, "learning_rate": 5.9607181108991994e-05, "loss": 0.9173, "step": 12834 }, { "epoch": 0.8929006226303524, "grad_norm": 1.0859375, "learning_rate": 5.95305714389216e-05, "loss": 0.6716, "step": 12835 }, { "epoch": 0.8929701902674876, "grad_norm": 1.015625, "learning_rate": 5.945400952114866e-05, "loss": 0.7928, "step": 12836 }, { "epoch": 0.8930397579046228, "grad_norm": 1.265625, "learning_rate": 5.9377495359560165e-05, "loss": 0.9662, "step": 12837 }, { "epoch": 0.8931093255417579, "grad_norm": 1.375, "learning_rate": 5.930102895804157e-05, "loss": 0.9403, "step": 12838 }, { "epoch": 0.8931788931788932, "grad_norm": 1.4375, "learning_rate": 5.92246103204751e-05, "loss": 0.7703, "step": 12839 }, { "epoch": 0.8932484608160284, "grad_norm": 1.25, "learning_rate": 5.914823945074099e-05, "loss": 0.6388, "step": 12840 }, { "epoch": 0.8933180284531635, "grad_norm": 1.015625, "learning_rate": 5.907191635271725e-05, "loss": 0.8618, "step": 12841 }, { "epoch": 0.8933875960902988, "grad_norm": 0.91796875, "learning_rate": 5.899564103027899e-05, "loss": 0.6604, "step": 12842 }, { "epoch": 0.893457163727434, "grad_norm": 1.046875, "learning_rate": 5.891941348729901e-05, "loss": 0.7512, "step": 12843 }, { "epoch": 0.8935267313645692, "grad_norm": 1.09375, "learning_rate": 5.884323372764755e-05, "loss": 0.853, "step": 12844 }, { "epoch": 0.8935962990017045, "grad_norm": 1.078125, "learning_rate": 5.8767101755193174e-05, "loss": 0.6933, "step": 12845 }, { "epoch": 0.8936658666388396, "grad_norm": 0.86328125, "learning_rate": 5.8691017573801244e-05, "loss": 0.6461, "step": 12846 }, { "epoch": 0.8937354342759748, "grad_norm": 1.078125, "learning_rate": 5.8614981187334884e-05, "loss": 0.8107, "step": 12847 }, { "epoch": 0.8938050019131101, "grad_norm": 1.125, "learning_rate": 5.853899259965467e-05, "loss": 0.7591, "step": 12848 }, { "epoch": 0.8938745695502452, "grad_norm": 0.87109375, "learning_rate": 5.846305181461908e-05, "loss": 0.64, "step": 12849 }, { "epoch": 0.8939441371873804, "grad_norm": 1.0, "learning_rate": 5.8387158836084254e-05, "loss": 0.5869, "step": 12850 }, { "epoch": 0.8940137048245156, "grad_norm": 0.94921875, "learning_rate": 5.8311313667903206e-05, "loss": 0.6576, "step": 12851 }, { "epoch": 0.8940832724616509, "grad_norm": 1.015625, "learning_rate": 5.8235516313927316e-05, "loss": 0.7791, "step": 12852 }, { "epoch": 0.894152840098786, "grad_norm": 1.2734375, "learning_rate": 5.815976677800505e-05, "loss": 0.9427, "step": 12853 }, { "epoch": 0.8942224077359212, "grad_norm": 1.375, "learning_rate": 5.808406506398256e-05, "loss": 0.7885, "step": 12854 }, { "epoch": 0.8942919753730565, "grad_norm": 1.1640625, "learning_rate": 5.800841117570366e-05, "loss": 0.7102, "step": 12855 }, { "epoch": 0.8943615430101917, "grad_norm": 1.1328125, "learning_rate": 5.793280511700971e-05, "loss": 0.7416, "step": 12856 }, { "epoch": 0.8944311106473268, "grad_norm": 1.1171875, "learning_rate": 5.7857246891739324e-05, "loss": 0.8422, "step": 12857 }, { "epoch": 0.8945006782844621, "grad_norm": 1.21875, "learning_rate": 5.778173650372931e-05, "loss": 0.5853, "step": 12858 }, { "epoch": 0.8945702459215973, "grad_norm": 1.9140625, "learning_rate": 5.7706273956813716e-05, "loss": 1.1291, "step": 12859 }, { "epoch": 0.8946398135587325, "grad_norm": 1.03125, "learning_rate": 5.763085925482403e-05, "loss": 0.9255, "step": 12860 }, { "epoch": 0.8947093811958677, "grad_norm": 0.890625, "learning_rate": 5.7555492401589304e-05, "loss": 0.7343, "step": 12861 }, { "epoch": 0.8947789488330029, "grad_norm": 0.9765625, "learning_rate": 5.748017340093636e-05, "loss": 0.8176, "step": 12862 }, { "epoch": 0.8948485164701381, "grad_norm": 1.1171875, "learning_rate": 5.7404902256689596e-05, "loss": 0.834, "step": 12863 }, { "epoch": 0.8949180841072732, "grad_norm": 1.2265625, "learning_rate": 5.732967897267094e-05, "loss": 0.6891, "step": 12864 }, { "epoch": 0.8949876517444085, "grad_norm": 1.09375, "learning_rate": 5.725450355269957e-05, "loss": 0.6077, "step": 12865 }, { "epoch": 0.8950572193815437, "grad_norm": 1.2734375, "learning_rate": 5.7179376000592975e-05, "loss": 0.9541, "step": 12866 }, { "epoch": 0.8951267870186789, "grad_norm": 1.0546875, "learning_rate": 5.710429632016534e-05, "loss": 0.7923, "step": 12867 }, { "epoch": 0.8951963546558142, "grad_norm": 0.984375, "learning_rate": 5.702926451522905e-05, "loss": 0.4878, "step": 12868 }, { "epoch": 0.8952659222929493, "grad_norm": 1.1796875, "learning_rate": 5.695428058959373e-05, "loss": 1.0191, "step": 12869 }, { "epoch": 0.8953354899300845, "grad_norm": 1.0390625, "learning_rate": 5.687934454706689e-05, "loss": 0.7994, "step": 12870 }, { "epoch": 0.8954050575672198, "grad_norm": 1.3984375, "learning_rate": 5.680445639145304e-05, "loss": 0.8081, "step": 12871 }, { "epoch": 0.895474625204355, "grad_norm": 1.5859375, "learning_rate": 5.67296161265548e-05, "loss": 0.9523, "step": 12872 }, { "epoch": 0.8955441928414901, "grad_norm": 1.3203125, "learning_rate": 5.665482375617248e-05, "loss": 0.9154, "step": 12873 }, { "epoch": 0.8956137604786254, "grad_norm": 1.1796875, "learning_rate": 5.658007928410336e-05, "loss": 0.7976, "step": 12874 }, { "epoch": 0.8956833281157606, "grad_norm": 1.390625, "learning_rate": 5.6505382714142626e-05, "loss": 0.9391, "step": 12875 }, { "epoch": 0.8957528957528957, "grad_norm": 1.0234375, "learning_rate": 5.64307340500827e-05, "loss": 0.9901, "step": 12876 }, { "epoch": 0.8958224633900309, "grad_norm": 1.0078125, "learning_rate": 5.6356133295714426e-05, "loss": 0.759, "step": 12877 }, { "epoch": 0.8958920310271662, "grad_norm": 1.765625, "learning_rate": 5.6281580454825344e-05, "loss": 0.9905, "step": 12878 }, { "epoch": 0.8959615986643014, "grad_norm": 1.0703125, "learning_rate": 5.620707553120086e-05, "loss": 0.7871, "step": 12879 }, { "epoch": 0.8960311663014365, "grad_norm": 0.94921875, "learning_rate": 5.6132618528624055e-05, "loss": 0.6924, "step": 12880 }, { "epoch": 0.8961007339385718, "grad_norm": 1.046875, "learning_rate": 5.605820945087536e-05, "loss": 0.8007, "step": 12881 }, { "epoch": 0.896170301575707, "grad_norm": 0.95703125, "learning_rate": 5.598384830173309e-05, "loss": 0.7307, "step": 12882 }, { "epoch": 0.8962398692128422, "grad_norm": 0.91796875, "learning_rate": 5.590953508497276e-05, "loss": 0.8733, "step": 12883 }, { "epoch": 0.8963094368499774, "grad_norm": 1.09375, "learning_rate": 5.583526980436771e-05, "loss": 0.9888, "step": 12884 }, { "epoch": 0.8963790044871126, "grad_norm": 1.015625, "learning_rate": 5.576105246368857e-05, "loss": 0.7823, "step": 12885 }, { "epoch": 0.8964485721242478, "grad_norm": 1.2578125, "learning_rate": 5.568688306670389e-05, "loss": 0.8014, "step": 12886 }, { "epoch": 0.8965181397613831, "grad_norm": 1.2265625, "learning_rate": 5.5612761617179766e-05, "loss": 0.7285, "step": 12887 }, { "epoch": 0.8965877073985182, "grad_norm": 1.6171875, "learning_rate": 5.553868811887952e-05, "loss": 0.9137, "step": 12888 }, { "epoch": 0.8966572750356534, "grad_norm": 0.93359375, "learning_rate": 5.546466257556415e-05, "loss": 0.7395, "step": 12889 }, { "epoch": 0.8967268426727886, "grad_norm": 1.0546875, "learning_rate": 5.539068499099231e-05, "loss": 0.9236, "step": 12890 }, { "epoch": 0.8967964103099239, "grad_norm": 1.0703125, "learning_rate": 5.5316755368920554e-05, "loss": 0.7547, "step": 12891 }, { "epoch": 0.896865977947059, "grad_norm": 1.0078125, "learning_rate": 5.5242873713102326e-05, "loss": 0.8267, "step": 12892 }, { "epoch": 0.8969355455841942, "grad_norm": 1.515625, "learning_rate": 5.516904002728895e-05, "loss": 0.8644, "step": 12893 }, { "epoch": 0.8970051132213295, "grad_norm": 1.125, "learning_rate": 5.509525431522955e-05, "loss": 0.7402, "step": 12894 }, { "epoch": 0.8970746808584646, "grad_norm": 1.0390625, "learning_rate": 5.502151658067034e-05, "loss": 0.8537, "step": 12895 }, { "epoch": 0.8971442484955998, "grad_norm": 1.203125, "learning_rate": 5.494782682735555e-05, "loss": 0.6948, "step": 12896 }, { "epoch": 0.8972138161327351, "grad_norm": 1.0703125, "learning_rate": 5.487418505902664e-05, "loss": 0.8975, "step": 12897 }, { "epoch": 0.8972833837698703, "grad_norm": 1.2578125, "learning_rate": 5.480059127942283e-05, "loss": 0.8004, "step": 12898 }, { "epoch": 0.8973529514070054, "grad_norm": 1.0390625, "learning_rate": 5.47270454922808e-05, "loss": 0.7677, "step": 12899 }, { "epoch": 0.8974225190441407, "grad_norm": 1.1015625, "learning_rate": 5.465354770133491e-05, "loss": 1.0044, "step": 12900 }, { "epoch": 0.8974920866812759, "grad_norm": 0.984375, "learning_rate": 5.4580097910317036e-05, "loss": 0.6903, "step": 12901 }, { "epoch": 0.8975616543184111, "grad_norm": 1.0390625, "learning_rate": 5.4506696122956556e-05, "loss": 0.9146, "step": 12902 }, { "epoch": 0.8976312219555462, "grad_norm": 1.609375, "learning_rate": 5.443334234298025e-05, "loss": 0.8403, "step": 12903 }, { "epoch": 0.8977007895926815, "grad_norm": 1.28125, "learning_rate": 5.436003657411281e-05, "loss": 0.7782, "step": 12904 }, { "epoch": 0.8977703572298167, "grad_norm": 1.09375, "learning_rate": 5.4286778820076486e-05, "loss": 0.9536, "step": 12905 }, { "epoch": 0.8978399248669519, "grad_norm": 1.421875, "learning_rate": 5.421356908459074e-05, "loss": 0.9864, "step": 12906 }, { "epoch": 0.8979094925040871, "grad_norm": 1.0546875, "learning_rate": 5.414040737137271e-05, "loss": 0.636, "step": 12907 }, { "epoch": 0.8979790601412223, "grad_norm": 1.421875, "learning_rate": 5.406729368413743e-05, "loss": 0.9173, "step": 12908 }, { "epoch": 0.8980486277783575, "grad_norm": 1.3203125, "learning_rate": 5.399422802659715e-05, "loss": 0.8698, "step": 12909 }, { "epoch": 0.8981181954154928, "grad_norm": 0.9296875, "learning_rate": 5.3921210402461785e-05, "loss": 0.6691, "step": 12910 }, { "epoch": 0.8981877630526279, "grad_norm": 1.015625, "learning_rate": 5.38482408154386e-05, "loss": 0.7015, "step": 12911 }, { "epoch": 0.8982573306897631, "grad_norm": 1.6484375, "learning_rate": 5.377531926923285e-05, "loss": 0.8865, "step": 12912 }, { "epoch": 0.8983268983268984, "grad_norm": 1.1171875, "learning_rate": 5.3702445767547015e-05, "loss": 0.9282, "step": 12913 }, { "epoch": 0.8983964659640336, "grad_norm": 1.25, "learning_rate": 5.362962031408136e-05, "loss": 0.925, "step": 12914 }, { "epoch": 0.8984660336011687, "grad_norm": 1.296875, "learning_rate": 5.35568429125336e-05, "loss": 0.8217, "step": 12915 }, { "epoch": 0.8985356012383039, "grad_norm": 1.1171875, "learning_rate": 5.348411356659888e-05, "loss": 0.897, "step": 12916 }, { "epoch": 0.8986051688754392, "grad_norm": 1.4453125, "learning_rate": 5.341143227996992e-05, "loss": 0.8641, "step": 12917 }, { "epoch": 0.8986747365125743, "grad_norm": 1.0859375, "learning_rate": 5.3338799056337316e-05, "loss": 0.7648, "step": 12918 }, { "epoch": 0.8987443041497095, "grad_norm": 1.03125, "learning_rate": 5.326621389938913e-05, "loss": 0.7663, "step": 12919 }, { "epoch": 0.8988138717868448, "grad_norm": 1.0234375, "learning_rate": 5.319367681281073e-05, "loss": 1.0038, "step": 12920 }, { "epoch": 0.89888343942398, "grad_norm": 1.046875, "learning_rate": 5.312118780028496e-05, "loss": 0.8561, "step": 12921 }, { "epoch": 0.8989530070611151, "grad_norm": 1.078125, "learning_rate": 5.304874686549277e-05, "loss": 0.8276, "step": 12922 }, { "epoch": 0.8990225746982504, "grad_norm": 0.9609375, "learning_rate": 5.29763540121122e-05, "loss": 0.7428, "step": 12923 }, { "epoch": 0.8990921423353856, "grad_norm": 1.2421875, "learning_rate": 5.290400924381911e-05, "loss": 0.7342, "step": 12924 }, { "epoch": 0.8991617099725208, "grad_norm": 1.1640625, "learning_rate": 5.2831712564286536e-05, "loss": 1.0198, "step": 12925 }, { "epoch": 0.899231277609656, "grad_norm": 0.96484375, "learning_rate": 5.275946397718578e-05, "loss": 0.9774, "step": 12926 }, { "epoch": 0.8993008452467912, "grad_norm": 1.390625, "learning_rate": 5.2687263486184686e-05, "loss": 0.9127, "step": 12927 }, { "epoch": 0.8993704128839264, "grad_norm": 0.8046875, "learning_rate": 5.2615111094949765e-05, "loss": 0.6506, "step": 12928 }, { "epoch": 0.8994399805210616, "grad_norm": 1.0859375, "learning_rate": 5.254300680714419e-05, "loss": 0.9133, "step": 12929 }, { "epoch": 0.8995095481581968, "grad_norm": 0.9921875, "learning_rate": 5.247095062642937e-05, "loss": 0.8668, "step": 12930 }, { "epoch": 0.899579115795332, "grad_norm": 1.171875, "learning_rate": 5.23989425564636e-05, "loss": 0.7779, "step": 12931 }, { "epoch": 0.8996486834324672, "grad_norm": 1.1484375, "learning_rate": 5.2326982600903184e-05, "loss": 0.8159, "step": 12932 }, { "epoch": 0.8997182510696025, "grad_norm": 0.86328125, "learning_rate": 5.225507076340219e-05, "loss": 0.6071, "step": 12933 }, { "epoch": 0.8997878187067376, "grad_norm": 1.0390625, "learning_rate": 5.21832070476117e-05, "loss": 0.6333, "step": 12934 }, { "epoch": 0.8998573863438728, "grad_norm": 1.4453125, "learning_rate": 5.211139145718047e-05, "loss": 0.9271, "step": 12935 }, { "epoch": 0.8999269539810081, "grad_norm": 0.81640625, "learning_rate": 5.2039623995755126e-05, "loss": 0.6386, "step": 12936 }, { "epoch": 0.8999965216181433, "grad_norm": 1.0390625, "learning_rate": 5.196790466697965e-05, "loss": 0.7896, "step": 12937 }, { "epoch": 0.9000660892552784, "grad_norm": 1.484375, "learning_rate": 5.189623347449557e-05, "loss": 0.9823, "step": 12938 }, { "epoch": 0.9001356568924136, "grad_norm": 1.015625, "learning_rate": 5.182461042194175e-05, "loss": 0.727, "step": 12939 }, { "epoch": 0.9002052245295489, "grad_norm": 1.1640625, "learning_rate": 5.1753035512955184e-05, "loss": 0.9766, "step": 12940 }, { "epoch": 0.900274792166684, "grad_norm": 1.3125, "learning_rate": 5.168150875117006e-05, "loss": 0.7615, "step": 12941 }, { "epoch": 0.9003443598038192, "grad_norm": 0.8984375, "learning_rate": 5.161003014021792e-05, "loss": 0.7439, "step": 12942 }, { "epoch": 0.9004139274409545, "grad_norm": 1.3828125, "learning_rate": 5.1538599683728206e-05, "loss": 0.8239, "step": 12943 }, { "epoch": 0.9004834950780897, "grad_norm": 1.140625, "learning_rate": 5.146721738532789e-05, "loss": 0.6905, "step": 12944 }, { "epoch": 0.9005530627152248, "grad_norm": 1.2734375, "learning_rate": 5.1395883248641196e-05, "loss": 0.7003, "step": 12945 }, { "epoch": 0.9006226303523601, "grad_norm": 1.0625, "learning_rate": 5.132459727729022e-05, "loss": 1.021, "step": 12946 }, { "epoch": 0.9006921979894953, "grad_norm": 1.15625, "learning_rate": 5.125335947489462e-05, "loss": 0.7083, "step": 12947 }, { "epoch": 0.9007617656266305, "grad_norm": 1.015625, "learning_rate": 5.118216984507151e-05, "loss": 1.0288, "step": 12948 }, { "epoch": 0.9008313332637657, "grad_norm": 1.4375, "learning_rate": 5.111102839143511e-05, "loss": 0.9, "step": 12949 }, { "epoch": 0.9009009009009009, "grad_norm": 0.90625, "learning_rate": 5.103993511759808e-05, "loss": 0.6472, "step": 12950 }, { "epoch": 0.9009704685380361, "grad_norm": 1.359375, "learning_rate": 5.096889002717009e-05, "loss": 0.9621, "step": 12951 }, { "epoch": 0.9010400361751713, "grad_norm": 1.0546875, "learning_rate": 5.0897893123758365e-05, "loss": 0.7863, "step": 12952 }, { "epoch": 0.9011096038123065, "grad_norm": 1.28125, "learning_rate": 5.08269444109678e-05, "loss": 0.9322, "step": 12953 }, { "epoch": 0.9011791714494417, "grad_norm": 1.2890625, "learning_rate": 5.0756043892400626e-05, "loss": 0.8211, "step": 12954 }, { "epoch": 0.9012487390865769, "grad_norm": 1.0625, "learning_rate": 5.0685191571657294e-05, "loss": 0.9078, "step": 12955 }, { "epoch": 0.9013183067237122, "grad_norm": 0.98828125, "learning_rate": 5.061438745233493e-05, "loss": 0.6872, "step": 12956 }, { "epoch": 0.9013878743608473, "grad_norm": 1.125, "learning_rate": 5.054363153802865e-05, "loss": 0.8901, "step": 12957 }, { "epoch": 0.9014574419979825, "grad_norm": 1.046875, "learning_rate": 5.0472923832331266e-05, "loss": 0.8544, "step": 12958 }, { "epoch": 0.9015270096351178, "grad_norm": 1.125, "learning_rate": 5.040226433883266e-05, "loss": 0.9961, "step": 12959 }, { "epoch": 0.901596577272253, "grad_norm": 1.0234375, "learning_rate": 5.0331653061120755e-05, "loss": 0.7943, "step": 12960 }, { "epoch": 0.9016661449093881, "grad_norm": 1.15625, "learning_rate": 5.0261090002781004e-05, "loss": 0.716, "step": 12961 }, { "epoch": 0.9017357125465234, "grad_norm": 0.77734375, "learning_rate": 5.0190575167396e-05, "loss": 0.7278, "step": 12962 }, { "epoch": 0.9018052801836586, "grad_norm": 1.1015625, "learning_rate": 5.01201085585461e-05, "loss": 0.8286, "step": 12963 }, { "epoch": 0.9018748478207937, "grad_norm": 1.015625, "learning_rate": 5.0049690179809315e-05, "loss": 0.6693, "step": 12964 }, { "epoch": 0.9019444154579289, "grad_norm": 0.984375, "learning_rate": 4.997932003476124e-05, "loss": 0.7188, "step": 12965 }, { "epoch": 0.9020139830950642, "grad_norm": 1.0859375, "learning_rate": 4.9908998126974915e-05, "loss": 0.7914, "step": 12966 }, { "epoch": 0.9020835507321994, "grad_norm": 0.87109375, "learning_rate": 4.9838724460020693e-05, "loss": 0.7081, "step": 12967 }, { "epoch": 0.9021531183693345, "grad_norm": 0.96875, "learning_rate": 4.9768499037466944e-05, "loss": 0.7018, "step": 12968 }, { "epoch": 0.9022226860064698, "grad_norm": 1.0546875, "learning_rate": 4.969832186287937e-05, "loss": 0.7203, "step": 12969 }, { "epoch": 0.902292253643605, "grad_norm": 0.9765625, "learning_rate": 4.962819293982113e-05, "loss": 0.8163, "step": 12970 }, { "epoch": 0.9023618212807402, "grad_norm": 1.015625, "learning_rate": 4.9558112271852916e-05, "loss": 0.7653, "step": 12971 }, { "epoch": 0.9024313889178754, "grad_norm": 1.09375, "learning_rate": 4.948807986253323e-05, "loss": 0.8682, "step": 12972 }, { "epoch": 0.9025009565550106, "grad_norm": 1.3515625, "learning_rate": 4.9418095715417885e-05, "loss": 0.6972, "step": 12973 }, { "epoch": 0.9025705241921458, "grad_norm": 1.5390625, "learning_rate": 4.934815983406027e-05, "loss": 0.7371, "step": 12974 }, { "epoch": 0.9026400918292811, "grad_norm": 1.53125, "learning_rate": 4.927827222201165e-05, "loss": 0.9693, "step": 12975 }, { "epoch": 0.9027096594664162, "grad_norm": 1.296875, "learning_rate": 4.9208432882820396e-05, "loss": 0.691, "step": 12976 }, { "epoch": 0.9027792271035514, "grad_norm": 1.03125, "learning_rate": 4.913864182003236e-05, "loss": 0.7755, "step": 12977 }, { "epoch": 0.9028487947406866, "grad_norm": 1.3515625, "learning_rate": 4.9068899037191364e-05, "loss": 0.7264, "step": 12978 }, { "epoch": 0.9029183623778219, "grad_norm": 1.1171875, "learning_rate": 4.8999204537838906e-05, "loss": 0.7654, "step": 12979 }, { "epoch": 0.902987930014957, "grad_norm": 1.0078125, "learning_rate": 4.892955832551338e-05, "loss": 0.7029, "step": 12980 }, { "epoch": 0.9030574976520922, "grad_norm": 1.140625, "learning_rate": 4.885996040375096e-05, "loss": 1.0747, "step": 12981 }, { "epoch": 0.9031270652892275, "grad_norm": 1.2421875, "learning_rate": 4.8790410776085705e-05, "loss": 0.8239, "step": 12982 }, { "epoch": 0.9031966329263627, "grad_norm": 1.0234375, "learning_rate": 4.872090944604901e-05, "loss": 0.8294, "step": 12983 }, { "epoch": 0.9032662005634978, "grad_norm": 1.390625, "learning_rate": 4.865145641716972e-05, "loss": 1.057, "step": 12984 }, { "epoch": 0.9033357682006331, "grad_norm": 1.109375, "learning_rate": 4.858205169297425e-05, "loss": 0.7924, "step": 12985 }, { "epoch": 0.9034053358377683, "grad_norm": 1.2734375, "learning_rate": 4.851269527698665e-05, "loss": 0.6779, "step": 12986 }, { "epoch": 0.9034749034749034, "grad_norm": 1.3515625, "learning_rate": 4.8443387172728784e-05, "loss": 0.7835, "step": 12987 }, { "epoch": 0.9035444711120387, "grad_norm": 1.3671875, "learning_rate": 4.837412738371927e-05, "loss": 1.1405, "step": 12988 }, { "epoch": 0.9036140387491739, "grad_norm": 1.03125, "learning_rate": 4.830491591347519e-05, "loss": 0.7007, "step": 12989 }, { "epoch": 0.9036836063863091, "grad_norm": 1.1328125, "learning_rate": 4.823575276551051e-05, "loss": 0.8549, "step": 12990 }, { "epoch": 0.9037531740234442, "grad_norm": 1.0078125, "learning_rate": 4.816663794333698e-05, "loss": 0.673, "step": 12991 }, { "epoch": 0.9038227416605795, "grad_norm": 1.015625, "learning_rate": 4.8097571450464006e-05, "loss": 0.9113, "step": 12992 }, { "epoch": 0.9038923092977147, "grad_norm": 1.21875, "learning_rate": 4.802855329039846e-05, "loss": 0.6998, "step": 12993 }, { "epoch": 0.9039618769348499, "grad_norm": 1.0859375, "learning_rate": 4.795958346664475e-05, "loss": 0.7755, "step": 12994 }, { "epoch": 0.9040314445719851, "grad_norm": 1.6484375, "learning_rate": 4.789066198270464e-05, "loss": 0.6506, "step": 12995 }, { "epoch": 0.9041010122091203, "grad_norm": 1.1328125, "learning_rate": 4.782178884207766e-05, "loss": 0.8783, "step": 12996 }, { "epoch": 0.9041705798462555, "grad_norm": 1.203125, "learning_rate": 4.775296404826113e-05, "loss": 0.8482, "step": 12997 }, { "epoch": 0.9042401474833908, "grad_norm": 1.046875, "learning_rate": 4.768418760474935e-05, "loss": 0.721, "step": 12998 }, { "epoch": 0.9043097151205259, "grad_norm": 0.96875, "learning_rate": 4.761545951503432e-05, "loss": 0.9024, "step": 12999 }, { "epoch": 0.9043792827576611, "grad_norm": 0.90234375, "learning_rate": 4.7546779782605906e-05, "loss": 0.7128, "step": 13000 }, { "epoch": 0.9044488503947964, "grad_norm": 0.8671875, "learning_rate": 4.7478148410951546e-05, "loss": 0.7462, "step": 13001 }, { "epoch": 0.9045184180319316, "grad_norm": 1.265625, "learning_rate": 4.7409565403555456e-05, "loss": 0.6865, "step": 13002 }, { "epoch": 0.9045879856690667, "grad_norm": 1.1484375, "learning_rate": 4.734103076390039e-05, "loss": 0.8292, "step": 13003 }, { "epoch": 0.9046575533062019, "grad_norm": 1.6171875, "learning_rate": 4.727254449546614e-05, "loss": 0.6711, "step": 13004 }, { "epoch": 0.9047271209433372, "grad_norm": 0.8984375, "learning_rate": 4.720410660172969e-05, "loss": 0.878, "step": 13005 }, { "epoch": 0.9047966885804724, "grad_norm": 1.1796875, "learning_rate": 4.7135717086166375e-05, "loss": 0.882, "step": 13006 }, { "epoch": 0.9048662562176075, "grad_norm": 1.21875, "learning_rate": 4.7067375952248637e-05, "loss": 0.848, "step": 13007 }, { "epoch": 0.9049358238547428, "grad_norm": 1.4453125, "learning_rate": 4.6999083203446366e-05, "loss": 0.9239, "step": 13008 }, { "epoch": 0.905005391491878, "grad_norm": 0.96875, "learning_rate": 4.693083884322713e-05, "loss": 0.6508, "step": 13009 }, { "epoch": 0.9050749591290131, "grad_norm": 1.0859375, "learning_rate": 4.686264287505604e-05, "loss": 0.7265, "step": 13010 }, { "epoch": 0.9051445267661484, "grad_norm": 1.234375, "learning_rate": 4.679449530239588e-05, "loss": 0.8146, "step": 13011 }, { "epoch": 0.9052140944032836, "grad_norm": 1.171875, "learning_rate": 4.6726396128706774e-05, "loss": 0.8923, "step": 13012 }, { "epoch": 0.9052836620404188, "grad_norm": 1.734375, "learning_rate": 4.665834535744617e-05, "loss": 0.8934, "step": 13013 }, { "epoch": 0.905353229677554, "grad_norm": 0.8671875, "learning_rate": 4.659034299206977e-05, "loss": 0.6589, "step": 13014 }, { "epoch": 0.9054227973146892, "grad_norm": 1.1796875, "learning_rate": 4.652238903603023e-05, "loss": 0.9328, "step": 13015 }, { "epoch": 0.9054923649518244, "grad_norm": 1.171875, "learning_rate": 4.6454483492777925e-05, "loss": 0.9544, "step": 13016 }, { "epoch": 0.9055619325889596, "grad_norm": 1.0703125, "learning_rate": 4.638662636576052e-05, "loss": 0.9136, "step": 13017 }, { "epoch": 0.9056315002260948, "grad_norm": 1.125, "learning_rate": 4.6318817658423715e-05, "loss": 0.7097, "step": 13018 }, { "epoch": 0.90570106786323, "grad_norm": 1.09375, "learning_rate": 4.625105737421065e-05, "loss": 0.8913, "step": 13019 }, { "epoch": 0.9057706355003652, "grad_norm": 0.89453125, "learning_rate": 4.618334551656145e-05, "loss": 0.7505, "step": 13020 }, { "epoch": 0.9058402031375005, "grad_norm": 0.859375, "learning_rate": 4.611568208891448e-05, "loss": 0.7989, "step": 13021 }, { "epoch": 0.9059097707746356, "grad_norm": 0.9609375, "learning_rate": 4.6048067094705216e-05, "loss": 0.68, "step": 13022 }, { "epoch": 0.9059793384117708, "grad_norm": 1.296875, "learning_rate": 4.59805005373668e-05, "loss": 0.8832, "step": 13023 }, { "epoch": 0.9060489060489061, "grad_norm": 1.3671875, "learning_rate": 4.591298242032982e-05, "loss": 0.9539, "step": 13024 }, { "epoch": 0.9061184736860413, "grad_norm": 0.9609375, "learning_rate": 4.5845512747022865e-05, "loss": 0.6243, "step": 13025 }, { "epoch": 0.9061880413231764, "grad_norm": 0.94921875, "learning_rate": 4.577809152087142e-05, "loss": 0.7258, "step": 13026 }, { "epoch": 0.9062576089603117, "grad_norm": 1.1328125, "learning_rate": 4.571071874529886e-05, "loss": 0.6775, "step": 13027 }, { "epoch": 0.9063271765974469, "grad_norm": 1.1796875, "learning_rate": 4.5643394423725895e-05, "loss": 0.8348, "step": 13028 }, { "epoch": 0.906396744234582, "grad_norm": 0.9296875, "learning_rate": 4.5576118559571224e-05, "loss": 0.7729, "step": 13029 }, { "epoch": 0.9064663118717172, "grad_norm": 1.0859375, "learning_rate": 4.5508891156250565e-05, "loss": 0.7014, "step": 13030 }, { "epoch": 0.9065358795088525, "grad_norm": 1.140625, "learning_rate": 4.54417122171773e-05, "loss": 0.7075, "step": 13031 }, { "epoch": 0.9066054471459877, "grad_norm": 1.1015625, "learning_rate": 4.537458174576259e-05, "loss": 0.7201, "step": 13032 }, { "epoch": 0.9066750147831228, "grad_norm": 1.25, "learning_rate": 4.530749974541504e-05, "loss": 0.7543, "step": 13033 }, { "epoch": 0.9067445824202581, "grad_norm": 1.1796875, "learning_rate": 4.524046621954048e-05, "loss": 0.7822, "step": 13034 }, { "epoch": 0.9068141500573933, "grad_norm": 1.3125, "learning_rate": 4.517348117154296e-05, "loss": 0.8342, "step": 13035 }, { "epoch": 0.9068837176945285, "grad_norm": 0.9921875, "learning_rate": 4.510654460482322e-05, "loss": 0.8462, "step": 13036 }, { "epoch": 0.9069532853316637, "grad_norm": 1.1796875, "learning_rate": 4.503965652278008e-05, "loss": 0.6069, "step": 13037 }, { "epoch": 0.9070228529687989, "grad_norm": 1.4453125, "learning_rate": 4.497281692880983e-05, "loss": 0.9126, "step": 13038 }, { "epoch": 0.9070924206059341, "grad_norm": 1.078125, "learning_rate": 4.490602582630643e-05, "loss": 0.8152, "step": 13039 }, { "epoch": 0.9071619882430694, "grad_norm": 1.2109375, "learning_rate": 4.483928321866093e-05, "loss": 1.0337, "step": 13040 }, { "epoch": 0.9072315558802045, "grad_norm": 1.265625, "learning_rate": 4.4772589109262184e-05, "loss": 0.6577, "step": 13041 }, { "epoch": 0.9073011235173397, "grad_norm": 0.9296875, "learning_rate": 4.4705943501496596e-05, "loss": 1.0599, "step": 13042 }, { "epoch": 0.9073706911544749, "grad_norm": 0.99609375, "learning_rate": 4.463934639874834e-05, "loss": 0.7178, "step": 13043 }, { "epoch": 0.9074402587916102, "grad_norm": 1.1640625, "learning_rate": 4.45727978043986e-05, "loss": 0.7231, "step": 13044 }, { "epoch": 0.9075098264287453, "grad_norm": 1.046875, "learning_rate": 4.450629772182646e-05, "loss": 0.8416, "step": 13045 }, { "epoch": 0.9075793940658805, "grad_norm": 1.9296875, "learning_rate": 4.4439846154408435e-05, "loss": 1.1456, "step": 13046 }, { "epoch": 0.9076489617030158, "grad_norm": 0.99609375, "learning_rate": 4.4373443105518827e-05, "loss": 0.7196, "step": 13047 }, { "epoch": 0.907718529340151, "grad_norm": 1.046875, "learning_rate": 4.430708857852883e-05, "loss": 1.0032, "step": 13048 }, { "epoch": 0.9077880969772861, "grad_norm": 1.25, "learning_rate": 4.424078257680808e-05, "loss": 0.9284, "step": 13049 }, { "epoch": 0.9078576646144214, "grad_norm": 1.7578125, "learning_rate": 4.417452510372277e-05, "loss": 0.8973, "step": 13050 }, { "epoch": 0.9079272322515566, "grad_norm": 1.0859375, "learning_rate": 4.410831616263755e-05, "loss": 0.9603, "step": 13051 }, { "epoch": 0.9079967998886918, "grad_norm": 1.2890625, "learning_rate": 4.404215575691384e-05, "loss": 0.8146, "step": 13052 }, { "epoch": 0.908066367525827, "grad_norm": 1.3515625, "learning_rate": 4.397604388991116e-05, "loss": 0.7503, "step": 13053 }, { "epoch": 0.9081359351629622, "grad_norm": 0.953125, "learning_rate": 4.3909980564986294e-05, "loss": 0.6531, "step": 13054 }, { "epoch": 0.9082055028000974, "grad_norm": 1.0703125, "learning_rate": 4.3843965785493435e-05, "loss": 0.6746, "step": 13055 }, { "epoch": 0.9082750704372325, "grad_norm": 1.1328125, "learning_rate": 4.377799955478456e-05, "loss": 0.6423, "step": 13056 }, { "epoch": 0.9083446380743678, "grad_norm": 0.98046875, "learning_rate": 4.371208187620934e-05, "loss": 0.6185, "step": 13057 }, { "epoch": 0.908414205711503, "grad_norm": 1.25, "learning_rate": 4.364621275311453e-05, "loss": 0.846, "step": 13058 }, { "epoch": 0.9084837733486382, "grad_norm": 1.0859375, "learning_rate": 4.358039218884458e-05, "loss": 0.8656, "step": 13059 }, { "epoch": 0.9085533409857734, "grad_norm": 1.0, "learning_rate": 4.351462018674157e-05, "loss": 0.5444, "step": 13060 }, { "epoch": 0.9086229086229086, "grad_norm": 1.0859375, "learning_rate": 4.3448896750145184e-05, "loss": 0.8221, "step": 13061 }, { "epoch": 0.9086924762600438, "grad_norm": 0.95703125, "learning_rate": 4.338322188239241e-05, "loss": 0.6084, "step": 13062 }, { "epoch": 0.9087620438971791, "grad_norm": 0.86328125, "learning_rate": 4.331759558681803e-05, "loss": 0.5762, "step": 13063 }, { "epoch": 0.9088316115343142, "grad_norm": 1.59375, "learning_rate": 4.3252017866753926e-05, "loss": 1.0052, "step": 13064 }, { "epoch": 0.9089011791714494, "grad_norm": 1.3125, "learning_rate": 4.318648872553011e-05, "loss": 0.9105, "step": 13065 }, { "epoch": 0.9089707468085847, "grad_norm": 1.140625, "learning_rate": 4.3121008166473576e-05, "loss": 0.843, "step": 13066 }, { "epoch": 0.9090403144457199, "grad_norm": 1.3671875, "learning_rate": 4.305557619290934e-05, "loss": 0.968, "step": 13067 }, { "epoch": 0.909109882082855, "grad_norm": 1.4765625, "learning_rate": 4.2990192808159636e-05, "loss": 1.0298, "step": 13068 }, { "epoch": 0.9091794497199902, "grad_norm": 0.88671875, "learning_rate": 4.292485801554402e-05, "loss": 0.5187, "step": 13069 }, { "epoch": 0.9092490173571255, "grad_norm": 1.3984375, "learning_rate": 4.2859571818380295e-05, "loss": 1.0645, "step": 13070 }, { "epoch": 0.9093185849942607, "grad_norm": 1.1953125, "learning_rate": 4.279433421998324e-05, "loss": 1.0641, "step": 13071 }, { "epoch": 0.9093881526313958, "grad_norm": 0.890625, "learning_rate": 4.272914522366511e-05, "loss": 0.715, "step": 13072 }, { "epoch": 0.9094577202685311, "grad_norm": 1.203125, "learning_rate": 4.266400483273591e-05, "loss": 1.082, "step": 13073 }, { "epoch": 0.9095272879056663, "grad_norm": 1.0078125, "learning_rate": 4.259891305050323e-05, "loss": 0.4616, "step": 13074 }, { "epoch": 0.9095968555428015, "grad_norm": 1.046875, "learning_rate": 4.253386988027219e-05, "loss": 0.7929, "step": 13075 }, { "epoch": 0.9096664231799367, "grad_norm": 1.2265625, "learning_rate": 4.246887532534516e-05, "loss": 0.7197, "step": 13076 }, { "epoch": 0.9097359908170719, "grad_norm": 1.1796875, "learning_rate": 4.240392938902238e-05, "loss": 1.2147, "step": 13077 }, { "epoch": 0.9098055584542071, "grad_norm": 1.25, "learning_rate": 4.2339032074601326e-05, "loss": 1.0718, "step": 13078 }, { "epoch": 0.9098751260913424, "grad_norm": 1.046875, "learning_rate": 4.2274183385377476e-05, "loss": 0.5959, "step": 13079 }, { "epoch": 0.9099446937284775, "grad_norm": 1.0546875, "learning_rate": 4.220938332464308e-05, "loss": 0.9636, "step": 13080 }, { "epoch": 0.9100142613656127, "grad_norm": 1.1953125, "learning_rate": 4.214463189568874e-05, "loss": 0.9016, "step": 13081 }, { "epoch": 0.9100838290027479, "grad_norm": 1.203125, "learning_rate": 4.20799291018018e-05, "loss": 0.8073, "step": 13082 }, { "epoch": 0.9101533966398831, "grad_norm": 1.1171875, "learning_rate": 4.2015274946268115e-05, "loss": 0.9264, "step": 13083 }, { "epoch": 0.9102229642770183, "grad_norm": 1.3671875, "learning_rate": 4.195066943236991e-05, "loss": 0.7468, "step": 13084 }, { "epoch": 0.9102925319141535, "grad_norm": 0.96875, "learning_rate": 4.1886112563387924e-05, "loss": 0.8731, "step": 13085 }, { "epoch": 0.9103620995512888, "grad_norm": 1.25, "learning_rate": 4.1821604342599854e-05, "loss": 0.9215, "step": 13086 }, { "epoch": 0.9104316671884239, "grad_norm": 1.03125, "learning_rate": 4.175714477328108e-05, "loss": 0.8136, "step": 13087 }, { "epoch": 0.9105012348255591, "grad_norm": 0.9453125, "learning_rate": 4.169273385870454e-05, "loss": 0.7271, "step": 13088 }, { "epoch": 0.9105708024626944, "grad_norm": 1.2265625, "learning_rate": 4.162837160214095e-05, "loss": 0.6549, "step": 13089 }, { "epoch": 0.9106403700998296, "grad_norm": 1.2734375, "learning_rate": 4.156405800685803e-05, "loss": 1.0101, "step": 13090 }, { "epoch": 0.9107099377369647, "grad_norm": 1.5625, "learning_rate": 4.1499793076121285e-05, "loss": 0.7849, "step": 13091 }, { "epoch": 0.9107795053741, "grad_norm": 0.921875, "learning_rate": 4.1435576813193765e-05, "loss": 0.8024, "step": 13092 }, { "epoch": 0.9108490730112352, "grad_norm": 1.8671875, "learning_rate": 4.137140922133642e-05, "loss": 0.7898, "step": 13093 }, { "epoch": 0.9109186406483704, "grad_norm": 1.1875, "learning_rate": 4.130729030380675e-05, "loss": 0.732, "step": 13094 }, { "epoch": 0.9109882082855055, "grad_norm": 0.890625, "learning_rate": 4.1243220063860944e-05, "loss": 0.779, "step": 13095 }, { "epoch": 0.9110577759226408, "grad_norm": 1.0859375, "learning_rate": 4.117919850475183e-05, "loss": 0.6693, "step": 13096 }, { "epoch": 0.911127343559776, "grad_norm": 1.21875, "learning_rate": 4.111522562973025e-05, "loss": 1.0803, "step": 13097 }, { "epoch": 0.9111969111969112, "grad_norm": 1.046875, "learning_rate": 4.1051301442044276e-05, "loss": 0.9407, "step": 13098 }, { "epoch": 0.9112664788340464, "grad_norm": 1.4375, "learning_rate": 4.098742594493998e-05, "loss": 0.7143, "step": 13099 }, { "epoch": 0.9113360464711816, "grad_norm": 0.97265625, "learning_rate": 4.092359914166033e-05, "loss": 0.6816, "step": 13100 }, { "epoch": 0.9114056141083168, "grad_norm": 1.109375, "learning_rate": 4.0859821035445946e-05, "loss": 0.7882, "step": 13101 }, { "epoch": 0.9114751817454521, "grad_norm": 1.5703125, "learning_rate": 4.079609162953568e-05, "loss": 1.231, "step": 13102 }, { "epoch": 0.9115447493825872, "grad_norm": 1.3046875, "learning_rate": 4.0732410927165067e-05, "loss": 0.9799, "step": 13103 }, { "epoch": 0.9116143170197224, "grad_norm": 1.03125, "learning_rate": 4.066877893156762e-05, "loss": 0.7711, "step": 13104 }, { "epoch": 0.9116838846568577, "grad_norm": 1.0, "learning_rate": 4.0605195645974094e-05, "loss": 0.527, "step": 13105 }, { "epoch": 0.9117534522939928, "grad_norm": 0.87890625, "learning_rate": 4.054166107361301e-05, "loss": 0.6169, "step": 13106 }, { "epoch": 0.911823019931128, "grad_norm": 1.1484375, "learning_rate": 4.0478175217710466e-05, "loss": 0.8007, "step": 13107 }, { "epoch": 0.9118925875682632, "grad_norm": 1.125, "learning_rate": 4.041473808148977e-05, "loss": 0.6891, "step": 13108 }, { "epoch": 0.9119621552053985, "grad_norm": 1.25, "learning_rate": 4.035134966817211e-05, "loss": 0.8601, "step": 13109 }, { "epoch": 0.9120317228425336, "grad_norm": 1.359375, "learning_rate": 4.0288009980975706e-05, "loss": 1.1731, "step": 13110 }, { "epoch": 0.9121012904796688, "grad_norm": 1.09375, "learning_rate": 4.022471902311709e-05, "loss": 0.7185, "step": 13111 }, { "epoch": 0.9121708581168041, "grad_norm": 1.25, "learning_rate": 4.0161476797809456e-05, "loss": 0.9068, "step": 13112 }, { "epoch": 0.9122404257539393, "grad_norm": 1.046875, "learning_rate": 4.009828330826415e-05, "loss": 0.6831, "step": 13113 }, { "epoch": 0.9123099933910744, "grad_norm": 1.078125, "learning_rate": 4.00351385576897e-05, "loss": 0.6043, "step": 13114 }, { "epoch": 0.9123795610282097, "grad_norm": 0.8984375, "learning_rate": 3.997204254929232e-05, "loss": 0.8852, "step": 13115 }, { "epoch": 0.9124491286653449, "grad_norm": 1.046875, "learning_rate": 3.9908995286275784e-05, "loss": 0.8096, "step": 13116 }, { "epoch": 0.9125186963024801, "grad_norm": 1.1796875, "learning_rate": 3.984599677184131e-05, "loss": 0.6992, "step": 13117 }, { "epoch": 0.9125882639396153, "grad_norm": 0.859375, "learning_rate": 3.978304700918755e-05, "loss": 0.6913, "step": 13118 }, { "epoch": 0.9126578315767505, "grad_norm": 1.1171875, "learning_rate": 3.9720146001510746e-05, "loss": 0.8249, "step": 13119 }, { "epoch": 0.9127273992138857, "grad_norm": 1.0546875, "learning_rate": 3.965729375200477e-05, "loss": 0.7471, "step": 13120 }, { "epoch": 0.9127969668510209, "grad_norm": 1.234375, "learning_rate": 3.959449026386097e-05, "loss": 1.1734, "step": 13121 }, { "epoch": 0.9128665344881561, "grad_norm": 1.0546875, "learning_rate": 3.953173554026801e-05, "loss": 0.72, "step": 13122 }, { "epoch": 0.9129361021252913, "grad_norm": 1.03125, "learning_rate": 3.9469029584412676e-05, "loss": 0.7768, "step": 13123 }, { "epoch": 0.9130056697624265, "grad_norm": 1.1875, "learning_rate": 3.940637239947831e-05, "loss": 0.7127, "step": 13124 }, { "epoch": 0.9130752373995618, "grad_norm": 1.0703125, "learning_rate": 3.9343763988646807e-05, "loss": 0.8563, "step": 13125 }, { "epoch": 0.9131448050366969, "grad_norm": 1.4140625, "learning_rate": 3.928120435509675e-05, "loss": 1.2617, "step": 13126 }, { "epoch": 0.9132143726738321, "grad_norm": 1.09375, "learning_rate": 3.921869350200491e-05, "loss": 0.9248, "step": 13127 }, { "epoch": 0.9132839403109674, "grad_norm": 1.03125, "learning_rate": 3.915623143254488e-05, "loss": 0.7759, "step": 13128 }, { "epoch": 0.9133535079481025, "grad_norm": 1.03125, "learning_rate": 3.909381814988855e-05, "loss": 0.7245, "step": 13129 }, { "epoch": 0.9134230755852377, "grad_norm": 1.09375, "learning_rate": 3.903145365720484e-05, "loss": 0.8968, "step": 13130 }, { "epoch": 0.913492643222373, "grad_norm": 1.21875, "learning_rate": 3.896913795766033e-05, "loss": 0.9318, "step": 13131 }, { "epoch": 0.9135622108595082, "grad_norm": 0.94140625, "learning_rate": 3.8906871054419034e-05, "loss": 0.6763, "step": 13132 }, { "epoch": 0.9136317784966433, "grad_norm": 1.0234375, "learning_rate": 3.884465295064232e-05, "loss": 0.7571, "step": 13133 }, { "epoch": 0.9137013461337785, "grad_norm": 1.0390625, "learning_rate": 3.878248364948978e-05, "loss": 0.7489, "step": 13134 }, { "epoch": 0.9137709137709138, "grad_norm": 1.3203125, "learning_rate": 3.8720363154117755e-05, "loss": 0.8554, "step": 13135 }, { "epoch": 0.913840481408049, "grad_norm": 1.140625, "learning_rate": 3.865829146768041e-05, "loss": 0.7288, "step": 13136 }, { "epoch": 0.9139100490451841, "grad_norm": 1.1953125, "learning_rate": 3.859626859332965e-05, "loss": 0.8952, "step": 13137 }, { "epoch": 0.9139796166823194, "grad_norm": 1.1875, "learning_rate": 3.853429453421442e-05, "loss": 0.799, "step": 13138 }, { "epoch": 0.9140491843194546, "grad_norm": 1.2890625, "learning_rate": 3.847236929348163e-05, "loss": 0.5849, "step": 13139 }, { "epoch": 0.9141187519565898, "grad_norm": 1.4296875, "learning_rate": 3.8410492874275335e-05, "loss": 0.6744, "step": 13140 }, { "epoch": 0.914188319593725, "grad_norm": 1.140625, "learning_rate": 3.8348665279737684e-05, "loss": 0.833, "step": 13141 }, { "epoch": 0.9142578872308602, "grad_norm": 1.1015625, "learning_rate": 3.828688651300749e-05, "loss": 0.7386, "step": 13142 }, { "epoch": 0.9143274548679954, "grad_norm": 1.1875, "learning_rate": 3.822515657722181e-05, "loss": 0.8128, "step": 13143 }, { "epoch": 0.9143970225051307, "grad_norm": 1.109375, "learning_rate": 3.816347547551524e-05, "loss": 0.8064, "step": 13144 }, { "epoch": 0.9144665901422658, "grad_norm": 1.3359375, "learning_rate": 3.810184321101917e-05, "loss": 0.9626, "step": 13145 }, { "epoch": 0.914536157779401, "grad_norm": 1.125, "learning_rate": 3.8040259786863315e-05, "loss": 0.8708, "step": 13146 }, { "epoch": 0.9146057254165362, "grad_norm": 1.1796875, "learning_rate": 3.797872520617418e-05, "loss": 0.6174, "step": 13147 }, { "epoch": 0.9146752930536715, "grad_norm": 1.1875, "learning_rate": 3.791723947207659e-05, "loss": 0.8264, "step": 13148 }, { "epoch": 0.9147448606908066, "grad_norm": 1.1171875, "learning_rate": 3.785580258769239e-05, "loss": 0.5879, "step": 13149 }, { "epoch": 0.9148144283279418, "grad_norm": 0.91796875, "learning_rate": 3.779441455614086e-05, "loss": 0.5422, "step": 13150 }, { "epoch": 0.9148839959650771, "grad_norm": 1.0703125, "learning_rate": 3.773307538053916e-05, "loss": 0.5574, "step": 13151 }, { "epoch": 0.9149535636022122, "grad_norm": 1.1015625, "learning_rate": 3.76717850640016e-05, "loss": 0.6906, "step": 13152 }, { "epoch": 0.9150231312393474, "grad_norm": 1.125, "learning_rate": 3.7610543609640444e-05, "loss": 0.8463, "step": 13153 }, { "epoch": 0.9150926988764827, "grad_norm": 1.0859375, "learning_rate": 3.754935102056489e-05, "loss": 0.9383, "step": 13154 }, { "epoch": 0.9151622665136179, "grad_norm": 1.0546875, "learning_rate": 3.7488207299882336e-05, "loss": 0.7741, "step": 13155 }, { "epoch": 0.915231834150753, "grad_norm": 1.21875, "learning_rate": 3.7427112450697075e-05, "loss": 0.9069, "step": 13156 }, { "epoch": 0.9153014017878883, "grad_norm": 1.15625, "learning_rate": 3.736606647611141e-05, "loss": 0.6133, "step": 13157 }, { "epoch": 0.9153709694250235, "grad_norm": 1.1015625, "learning_rate": 3.730506937922484e-05, "loss": 0.7051, "step": 13158 }, { "epoch": 0.9154405370621587, "grad_norm": 1.015625, "learning_rate": 3.7244121163134584e-05, "loss": 0.6315, "step": 13159 }, { "epoch": 0.9155101046992938, "grad_norm": 0.953125, "learning_rate": 3.718322183093503e-05, "loss": 0.7364, "step": 13160 }, { "epoch": 0.9155796723364291, "grad_norm": 1.109375, "learning_rate": 3.7122371385718614e-05, "loss": 0.6312, "step": 13161 }, { "epoch": 0.9156492399735643, "grad_norm": 1.2265625, "learning_rate": 3.706156983057496e-05, "loss": 0.8725, "step": 13162 }, { "epoch": 0.9157188076106995, "grad_norm": 1.359375, "learning_rate": 3.700081716859116e-05, "loss": 1.1978, "step": 13163 }, { "epoch": 0.9157883752478347, "grad_norm": 1.203125, "learning_rate": 3.694011340285208e-05, "loss": 1.0957, "step": 13164 }, { "epoch": 0.9158579428849699, "grad_norm": 1.5546875, "learning_rate": 3.687945853643959e-05, "loss": 0.6028, "step": 13165 }, { "epoch": 0.9159275105221051, "grad_norm": 1.0078125, "learning_rate": 3.6818852572434e-05, "loss": 0.7117, "step": 13166 }, { "epoch": 0.9159970781592404, "grad_norm": 0.9375, "learning_rate": 3.6758295513912185e-05, "loss": 0.6572, "step": 13167 }, { "epoch": 0.9160666457963755, "grad_norm": 1.578125, "learning_rate": 3.669778736394902e-05, "loss": 0.7317, "step": 13168 }, { "epoch": 0.9161362134335107, "grad_norm": 1.046875, "learning_rate": 3.663732812561682e-05, "loss": 0.729, "step": 13169 }, { "epoch": 0.916205781070646, "grad_norm": 1.2109375, "learning_rate": 3.6576917801985355e-05, "loss": 0.8536, "step": 13170 }, { "epoch": 0.9162753487077812, "grad_norm": 1.3125, "learning_rate": 3.651655639612206e-05, "loss": 0.9006, "step": 13171 }, { "epoch": 0.9163449163449163, "grad_norm": 1.09375, "learning_rate": 3.64562439110917e-05, "loss": 0.6241, "step": 13172 }, { "epoch": 0.9164144839820515, "grad_norm": 1.59375, "learning_rate": 3.6395980349956616e-05, "loss": 0.7321, "step": 13173 }, { "epoch": 0.9164840516191868, "grad_norm": 1.2890625, "learning_rate": 3.6335765715776684e-05, "loss": 0.6549, "step": 13174 }, { "epoch": 0.916553619256322, "grad_norm": 1.140625, "learning_rate": 3.627560001160935e-05, "loss": 0.8101, "step": 13175 }, { "epoch": 0.9166231868934571, "grad_norm": 1.109375, "learning_rate": 3.6215483240509604e-05, "loss": 0.7974, "step": 13176 }, { "epoch": 0.9166927545305924, "grad_norm": 1.15625, "learning_rate": 3.61554154055298e-05, "loss": 0.7702, "step": 13177 }, { "epoch": 0.9167623221677276, "grad_norm": 1.59375, "learning_rate": 3.6095396509719934e-05, "loss": 0.8351, "step": 13178 }, { "epoch": 0.9168318898048627, "grad_norm": 0.7734375, "learning_rate": 3.603542655612702e-05, "loss": 0.5195, "step": 13179 }, { "epoch": 0.916901457441998, "grad_norm": 1.2578125, "learning_rate": 3.5975505547796714e-05, "loss": 0.8202, "step": 13180 }, { "epoch": 0.9169710250791332, "grad_norm": 1.1484375, "learning_rate": 3.591563348777127e-05, "loss": 0.7548, "step": 13181 }, { "epoch": 0.9170405927162684, "grad_norm": 1.0078125, "learning_rate": 3.585581037909036e-05, "loss": 0.615, "step": 13182 }, { "epoch": 0.9171101603534036, "grad_norm": 1.3671875, "learning_rate": 3.5796036224791884e-05, "loss": 1.1456, "step": 13183 }, { "epoch": 0.9171797279905388, "grad_norm": 0.9765625, "learning_rate": 3.573631102791075e-05, "loss": 0.747, "step": 13184 }, { "epoch": 0.917249295627674, "grad_norm": 1.171875, "learning_rate": 3.5676634791479535e-05, "loss": 0.8888, "step": 13185 }, { "epoch": 0.9173188632648092, "grad_norm": 1.1328125, "learning_rate": 3.561700751852803e-05, "loss": 0.8245, "step": 13186 }, { "epoch": 0.9173884309019444, "grad_norm": 1.0546875, "learning_rate": 3.555742921208427e-05, "loss": 0.8364, "step": 13187 }, { "epoch": 0.9174579985390796, "grad_norm": 0.9609375, "learning_rate": 3.5497899875172935e-05, "loss": 0.8607, "step": 13188 }, { "epoch": 0.9175275661762148, "grad_norm": 1.109375, "learning_rate": 3.5438419510816834e-05, "loss": 0.9017, "step": 13189 }, { "epoch": 0.9175971338133501, "grad_norm": 0.9765625, "learning_rate": 3.537898812203621e-05, "loss": 0.6674, "step": 13190 }, { "epoch": 0.9176667014504852, "grad_norm": 0.84765625, "learning_rate": 3.531960571184845e-05, "loss": 0.7925, "step": 13191 }, { "epoch": 0.9177362690876204, "grad_norm": 1.1171875, "learning_rate": 3.526027228326867e-05, "loss": 0.9025, "step": 13192 }, { "epoch": 0.9178058367247557, "grad_norm": 0.94921875, "learning_rate": 3.520098783930958e-05, "loss": 0.6693, "step": 13193 }, { "epoch": 0.9178754043618909, "grad_norm": 1.34375, "learning_rate": 3.514175238298145e-05, "loss": 0.8207, "step": 13194 }, { "epoch": 0.917944971999026, "grad_norm": 1.328125, "learning_rate": 3.508256591729198e-05, "loss": 1.0026, "step": 13195 }, { "epoch": 0.9180145396361613, "grad_norm": 1.1328125, "learning_rate": 3.5023428445246085e-05, "loss": 0.9401, "step": 13196 }, { "epoch": 0.9180841072732965, "grad_norm": 1.265625, "learning_rate": 3.496433996984682e-05, "loss": 0.8634, "step": 13197 }, { "epoch": 0.9181536749104316, "grad_norm": 1.265625, "learning_rate": 3.4905300494094125e-05, "loss": 0.6497, "step": 13198 }, { "epoch": 0.9182232425475668, "grad_norm": 1.3984375, "learning_rate": 3.4846310020985925e-05, "loss": 0.7236, "step": 13199 }, { "epoch": 0.9182928101847021, "grad_norm": 1.046875, "learning_rate": 3.478736855351727e-05, "loss": 0.8447, "step": 13200 }, { "epoch": 0.9183623778218373, "grad_norm": 1.1015625, "learning_rate": 3.4728476094681105e-05, "loss": 0.8562, "step": 13201 }, { "epoch": 0.9184319454589724, "grad_norm": 1.328125, "learning_rate": 3.466963264746748e-05, "loss": 0.723, "step": 13202 }, { "epoch": 0.9185015130961077, "grad_norm": 1.2890625, "learning_rate": 3.461083821486421e-05, "loss": 0.9176, "step": 13203 }, { "epoch": 0.9185710807332429, "grad_norm": 1.21875, "learning_rate": 3.4552092799856826e-05, "loss": 0.6648, "step": 13204 }, { "epoch": 0.9186406483703781, "grad_norm": 1.1484375, "learning_rate": 3.449339640542804e-05, "loss": 0.5927, "step": 13205 }, { "epoch": 0.9187102160075133, "grad_norm": 1.15625, "learning_rate": 3.44347490345579e-05, "loss": 1.0229, "step": 13206 }, { "epoch": 0.9187797836446485, "grad_norm": 1.1796875, "learning_rate": 3.4376150690224375e-05, "loss": 0.9107, "step": 13207 }, { "epoch": 0.9188493512817837, "grad_norm": 1.328125, "learning_rate": 3.431760137540285e-05, "loss": 0.7092, "step": 13208 }, { "epoch": 0.918918918918919, "grad_norm": 1.4453125, "learning_rate": 3.425910109306618e-05, "loss": 0.8211, "step": 13209 }, { "epoch": 0.9189884865560541, "grad_norm": 1.1796875, "learning_rate": 3.4200649846184654e-05, "loss": 0.9294, "step": 13210 }, { "epoch": 0.9190580541931893, "grad_norm": 1.296875, "learning_rate": 3.4142247637726e-05, "loss": 1.0419, "step": 13211 }, { "epoch": 0.9191276218303245, "grad_norm": 1.0546875, "learning_rate": 3.408389447065596e-05, "loss": 0.9011, "step": 13212 }, { "epoch": 0.9191971894674598, "grad_norm": 1.15625, "learning_rate": 3.4025590347937066e-05, "loss": 0.6634, "step": 13213 }, { "epoch": 0.9192667571045949, "grad_norm": 1.0859375, "learning_rate": 3.396733527252982e-05, "loss": 0.9252, "step": 13214 }, { "epoch": 0.9193363247417301, "grad_norm": 1.0703125, "learning_rate": 3.390912924739209e-05, "loss": 0.6012, "step": 13215 }, { "epoch": 0.9194058923788654, "grad_norm": 1.28125, "learning_rate": 3.385097227547929e-05, "loss": 0.7775, "step": 13216 }, { "epoch": 0.9194754600160006, "grad_norm": 1.1875, "learning_rate": 3.379286435974438e-05, "loss": 0.8743, "step": 13217 }, { "epoch": 0.9195450276531357, "grad_norm": 1.1640625, "learning_rate": 3.37348055031379e-05, "loss": 0.9789, "step": 13218 }, { "epoch": 0.919614595290271, "grad_norm": 1.0546875, "learning_rate": 3.36767957086076e-05, "loss": 0.7683, "step": 13219 }, { "epoch": 0.9196841629274062, "grad_norm": 0.86328125, "learning_rate": 3.361883497909901e-05, "loss": 0.7352, "step": 13220 }, { "epoch": 0.9197537305645413, "grad_norm": 1.375, "learning_rate": 3.356092331755489e-05, "loss": 0.8684, "step": 13221 }, { "epoch": 0.9198232982016766, "grad_norm": 0.96484375, "learning_rate": 3.350306072691611e-05, "loss": 0.798, "step": 13222 }, { "epoch": 0.9198928658388118, "grad_norm": 1.3984375, "learning_rate": 3.3445247210120324e-05, "loss": 0.9874, "step": 13223 }, { "epoch": 0.919962433475947, "grad_norm": 1.1875, "learning_rate": 3.338748277010295e-05, "loss": 0.6423, "step": 13224 }, { "epoch": 0.9200320011130821, "grad_norm": 1.1953125, "learning_rate": 3.33297674097971e-05, "loss": 0.7429, "step": 13225 }, { "epoch": 0.9201015687502174, "grad_norm": 1.171875, "learning_rate": 3.327210113213353e-05, "loss": 0.9039, "step": 13226 }, { "epoch": 0.9201711363873526, "grad_norm": 1.15625, "learning_rate": 3.3214483940039894e-05, "loss": 0.7173, "step": 13227 }, { "epoch": 0.9202407040244878, "grad_norm": 1.1953125, "learning_rate": 3.315691583644165e-05, "loss": 0.6867, "step": 13228 }, { "epoch": 0.920310271661623, "grad_norm": 1.078125, "learning_rate": 3.309939682426222e-05, "loss": 0.8921, "step": 13229 }, { "epoch": 0.9203798392987582, "grad_norm": 0.94140625, "learning_rate": 3.30419269064216e-05, "loss": 0.7802, "step": 13230 }, { "epoch": 0.9204494069358934, "grad_norm": 1.21875, "learning_rate": 3.298450608583825e-05, "loss": 0.9367, "step": 13231 }, { "epoch": 0.9205189745730287, "grad_norm": 1.109375, "learning_rate": 3.29271343654276e-05, "loss": 0.7717, "step": 13232 }, { "epoch": 0.9205885422101638, "grad_norm": 1.2421875, "learning_rate": 3.286981174810266e-05, "loss": 0.8901, "step": 13233 }, { "epoch": 0.920658109847299, "grad_norm": 1.1640625, "learning_rate": 3.281253823677388e-05, "loss": 0.7818, "step": 13234 }, { "epoch": 0.9207276774844343, "grad_norm": 1.15625, "learning_rate": 3.275531383434938e-05, "loss": 0.8075, "step": 13235 }, { "epoch": 0.9207972451215695, "grad_norm": 1.0703125, "learning_rate": 3.269813854373493e-05, "loss": 0.793, "step": 13236 }, { "epoch": 0.9208668127587046, "grad_norm": 1.1875, "learning_rate": 3.264101236783346e-05, "loss": 0.8248, "step": 13237 }, { "epoch": 0.9209363803958398, "grad_norm": 1.1015625, "learning_rate": 3.25839353095454e-05, "loss": 0.7525, "step": 13238 }, { "epoch": 0.9210059480329751, "grad_norm": 1.0859375, "learning_rate": 3.2526907371768996e-05, "loss": 0.5513, "step": 13239 }, { "epoch": 0.9210755156701103, "grad_norm": 0.890625, "learning_rate": 3.246992855739983e-05, "loss": 0.7061, "step": 13240 }, { "epoch": 0.9211450833072454, "grad_norm": 1.1171875, "learning_rate": 3.2412998869331134e-05, "loss": 0.7867, "step": 13241 }, { "epoch": 0.9212146509443807, "grad_norm": 1.0, "learning_rate": 3.235611831045304e-05, "loss": 0.6275, "step": 13242 }, { "epoch": 0.9212842185815159, "grad_norm": 0.9296875, "learning_rate": 3.229928688365413e-05, "loss": 0.5783, "step": 13243 }, { "epoch": 0.921353786218651, "grad_norm": 1.4609375, "learning_rate": 3.224250459181988e-05, "loss": 0.6496, "step": 13244 }, { "epoch": 0.9214233538557863, "grad_norm": 1.0078125, "learning_rate": 3.218577143783341e-05, "loss": 0.8002, "step": 13245 }, { "epoch": 0.9214929214929215, "grad_norm": 1.015625, "learning_rate": 3.212908742457532e-05, "loss": 0.7586, "step": 13246 }, { "epoch": 0.9215624891300567, "grad_norm": 1.5859375, "learning_rate": 3.2072452554923746e-05, "loss": 0.9243, "step": 13247 }, { "epoch": 0.921632056767192, "grad_norm": 0.90625, "learning_rate": 3.201586683175417e-05, "loss": 0.7317, "step": 13248 }, { "epoch": 0.9217016244043271, "grad_norm": 1.2734375, "learning_rate": 3.1959330257939957e-05, "loss": 0.9879, "step": 13249 }, { "epoch": 0.9217711920414623, "grad_norm": 1.703125, "learning_rate": 3.1902842836351696e-05, "loss": 0.7883, "step": 13250 }, { "epoch": 0.9218407596785975, "grad_norm": 1.171875, "learning_rate": 3.184640456985755e-05, "loss": 0.8505, "step": 13251 }, { "epoch": 0.9219103273157327, "grad_norm": 1.03125, "learning_rate": 3.179001546132298e-05, "loss": 0.6454, "step": 13252 }, { "epoch": 0.9219798949528679, "grad_norm": 1.0859375, "learning_rate": 3.173367551361139e-05, "loss": 0.7965, "step": 13253 }, { "epoch": 0.9220494625900031, "grad_norm": 0.9375, "learning_rate": 3.167738472958337e-05, "loss": 0.6902, "step": 13254 }, { "epoch": 0.9221190302271384, "grad_norm": 1.109375, "learning_rate": 3.162114311209707e-05, "loss": 0.9991, "step": 13255 }, { "epoch": 0.9221885978642735, "grad_norm": 0.84765625, "learning_rate": 3.1564950664007996e-05, "loss": 0.5433, "step": 13256 }, { "epoch": 0.9222581655014087, "grad_norm": 1.0546875, "learning_rate": 3.1508807388169414e-05, "loss": 0.781, "step": 13257 }, { "epoch": 0.922327733138544, "grad_norm": 0.91015625, "learning_rate": 3.1452713287432154e-05, "loss": 0.7424, "step": 13258 }, { "epoch": 0.9223973007756792, "grad_norm": 1.171875, "learning_rate": 3.139666836464439e-05, "loss": 0.8479, "step": 13259 }, { "epoch": 0.9224668684128143, "grad_norm": 0.76953125, "learning_rate": 3.13406726226515e-05, "loss": 0.6565, "step": 13260 }, { "epoch": 0.9225364360499496, "grad_norm": 1.515625, "learning_rate": 3.128472606429689e-05, "loss": 0.8064, "step": 13261 }, { "epoch": 0.9226060036870848, "grad_norm": 1.0390625, "learning_rate": 3.122882869242116e-05, "loss": 0.7445, "step": 13262 }, { "epoch": 0.92267557132422, "grad_norm": 1.0, "learning_rate": 3.1172980509862504e-05, "loss": 0.5144, "step": 13263 }, { "epoch": 0.9227451389613551, "grad_norm": 1.125, "learning_rate": 3.111718151945686e-05, "loss": 0.8032, "step": 13264 }, { "epoch": 0.9228147065984904, "grad_norm": 0.94921875, "learning_rate": 3.106143172403708e-05, "loss": 0.7622, "step": 13265 }, { "epoch": 0.9228842742356256, "grad_norm": 1.0546875, "learning_rate": 3.10057311264339e-05, "loss": 0.5968, "step": 13266 }, { "epoch": 0.9229538418727607, "grad_norm": 1.1484375, "learning_rate": 3.095007972947572e-05, "loss": 0.8642, "step": 13267 }, { "epoch": 0.923023409509896, "grad_norm": 0.96484375, "learning_rate": 3.089447753598806e-05, "loss": 0.7421, "step": 13268 }, { "epoch": 0.9230929771470312, "grad_norm": 1.09375, "learning_rate": 3.083892454879433e-05, "loss": 0.6603, "step": 13269 }, { "epoch": 0.9231625447841664, "grad_norm": 0.95703125, "learning_rate": 3.0783420770714834e-05, "loss": 0.9264, "step": 13270 }, { "epoch": 0.9232321124213017, "grad_norm": 1.2421875, "learning_rate": 3.072796620456808e-05, "loss": 0.7977, "step": 13271 }, { "epoch": 0.9233016800584368, "grad_norm": 1.078125, "learning_rate": 3.067256085316983e-05, "loss": 0.7678, "step": 13272 }, { "epoch": 0.923371247695572, "grad_norm": 0.90625, "learning_rate": 3.0617204719333155e-05, "loss": 0.7053, "step": 13273 }, { "epoch": 0.9234408153327073, "grad_norm": 1.0859375, "learning_rate": 3.056189780586871e-05, "loss": 0.6626, "step": 13274 }, { "epoch": 0.9235103829698424, "grad_norm": 2.109375, "learning_rate": 3.0506640115584682e-05, "loss": 1.0136, "step": 13275 }, { "epoch": 0.9235799506069776, "grad_norm": 0.98046875, "learning_rate": 3.0451431651286943e-05, "loss": 0.6453, "step": 13276 }, { "epoch": 0.9236495182441128, "grad_norm": 1.53125, "learning_rate": 3.039627241577858e-05, "loss": 0.9795, "step": 13277 }, { "epoch": 0.9237190858812481, "grad_norm": 0.97265625, "learning_rate": 3.0341162411860466e-05, "loss": 0.7024, "step": 13278 }, { "epoch": 0.9237886535183832, "grad_norm": 1.1328125, "learning_rate": 3.02861016423307e-05, "loss": 0.8726, "step": 13279 }, { "epoch": 0.9238582211555184, "grad_norm": 0.98828125, "learning_rate": 3.0231090109984814e-05, "loss": 0.5164, "step": 13280 }, { "epoch": 0.9239277887926537, "grad_norm": 0.8359375, "learning_rate": 3.0176127817616138e-05, "loss": 0.6693, "step": 13281 }, { "epoch": 0.9239973564297889, "grad_norm": 1.390625, "learning_rate": 3.0121214768015548e-05, "loss": 0.7757, "step": 13282 }, { "epoch": 0.924066924066924, "grad_norm": 1.0, "learning_rate": 3.0066350963971145e-05, "loss": 0.6186, "step": 13283 }, { "epoch": 0.9241364917040593, "grad_norm": 1.0625, "learning_rate": 3.0011536408268482e-05, "loss": 0.8729, "step": 13284 }, { "epoch": 0.9242060593411945, "grad_norm": 1.328125, "learning_rate": 2.995677110369088e-05, "loss": 1.0522, "step": 13285 }, { "epoch": 0.9242756269783297, "grad_norm": 1.3828125, "learning_rate": 2.9902055053019238e-05, "loss": 0.9778, "step": 13286 }, { "epoch": 0.9243451946154649, "grad_norm": 1.0, "learning_rate": 2.984738825903155e-05, "loss": 0.628, "step": 13287 }, { "epoch": 0.9244147622526001, "grad_norm": 1.421875, "learning_rate": 2.979277072450348e-05, "loss": 0.7561, "step": 13288 }, { "epoch": 0.9244843298897353, "grad_norm": 1.203125, "learning_rate": 2.9738202452208263e-05, "loss": 0.8658, "step": 13289 }, { "epoch": 0.9245538975268704, "grad_norm": 1.15625, "learning_rate": 2.9683683444916787e-05, "loss": 0.6984, "step": 13290 }, { "epoch": 0.9246234651640057, "grad_norm": 1.421875, "learning_rate": 2.9629213705396953e-05, "loss": 0.6508, "step": 13291 }, { "epoch": 0.9246930328011409, "grad_norm": 0.96875, "learning_rate": 2.9574793236414764e-05, "loss": 0.6579, "step": 13292 }, { "epoch": 0.9247626004382761, "grad_norm": 1.4453125, "learning_rate": 2.952042204073324e-05, "loss": 0.9784, "step": 13293 }, { "epoch": 0.9248321680754114, "grad_norm": 1.2734375, "learning_rate": 2.9466100121112947e-05, "loss": 0.8942, "step": 13294 }, { "epoch": 0.9249017357125465, "grad_norm": 1.3671875, "learning_rate": 2.941182748031235e-05, "loss": 0.9168, "step": 13295 }, { "epoch": 0.9249713033496817, "grad_norm": 1.15625, "learning_rate": 2.935760412108701e-05, "loss": 1.0138, "step": 13296 }, { "epoch": 0.925040870986817, "grad_norm": 1.515625, "learning_rate": 2.9303430046190184e-05, "loss": 0.614, "step": 13297 }, { "epoch": 0.9251104386239521, "grad_norm": 1.0703125, "learning_rate": 2.9249305258372437e-05, "loss": 0.535, "step": 13298 }, { "epoch": 0.9251800062610873, "grad_norm": 1.3671875, "learning_rate": 2.9195229760382026e-05, "loss": 0.9334, "step": 13299 }, { "epoch": 0.9252495738982226, "grad_norm": 0.953125, "learning_rate": 2.9141203554964745e-05, "loss": 0.8297, "step": 13300 }, { "epoch": 0.9253191415353578, "grad_norm": 1.21875, "learning_rate": 2.9087226644863628e-05, "loss": 0.8797, "step": 13301 }, { "epoch": 0.9253887091724929, "grad_norm": 1.3359375, "learning_rate": 2.903329903281926e-05, "loss": 0.893, "step": 13302 }, { "epoch": 0.9254582768096281, "grad_norm": 0.86328125, "learning_rate": 2.8979420721569892e-05, "loss": 0.7535, "step": 13303 }, { "epoch": 0.9255278444467634, "grad_norm": 0.9375, "learning_rate": 2.892559171385145e-05, "loss": 0.6196, "step": 13304 }, { "epoch": 0.9255974120838986, "grad_norm": 0.9140625, "learning_rate": 2.8871812012396635e-05, "loss": 0.6602, "step": 13305 }, { "epoch": 0.9256669797210337, "grad_norm": 1.0546875, "learning_rate": 2.88180816199366e-05, "loss": 0.8892, "step": 13306 }, { "epoch": 0.925736547358169, "grad_norm": 0.96484375, "learning_rate": 2.876440053919904e-05, "loss": 0.7827, "step": 13307 }, { "epoch": 0.9258061149953042, "grad_norm": 0.8828125, "learning_rate": 2.871076877291001e-05, "loss": 0.6778, "step": 13308 }, { "epoch": 0.9258756826324394, "grad_norm": 0.91796875, "learning_rate": 2.8657186323792438e-05, "loss": 0.6027, "step": 13309 }, { "epoch": 0.9259452502695746, "grad_norm": 1.03125, "learning_rate": 2.8603653194567036e-05, "loss": 0.9155, "step": 13310 }, { "epoch": 0.9260148179067098, "grad_norm": 1.0234375, "learning_rate": 2.8550169387951852e-05, "loss": 0.6383, "step": 13311 }, { "epoch": 0.926084385543845, "grad_norm": 1.1171875, "learning_rate": 2.8496734906662604e-05, "loss": 0.7247, "step": 13312 }, { "epoch": 0.9261539531809803, "grad_norm": 1.203125, "learning_rate": 2.844334975341234e-05, "loss": 0.9816, "step": 13313 }, { "epoch": 0.9262235208181154, "grad_norm": 1.1875, "learning_rate": 2.8390013930912008e-05, "loss": 0.8588, "step": 13314 }, { "epoch": 0.9262930884552506, "grad_norm": 1.265625, "learning_rate": 2.8336727441869326e-05, "loss": 0.7097, "step": 13315 }, { "epoch": 0.9263626560923858, "grad_norm": 1.2890625, "learning_rate": 2.828349028899002e-05, "loss": 0.7123, "step": 13316 }, { "epoch": 0.926432223729521, "grad_norm": 1.109375, "learning_rate": 2.8230302474977376e-05, "loss": 0.7025, "step": 13317 }, { "epoch": 0.9265017913666562, "grad_norm": 1.1015625, "learning_rate": 2.8177164002531897e-05, "loss": 0.9708, "step": 13318 }, { "epoch": 0.9265713590037914, "grad_norm": 1.6015625, "learning_rate": 2.8124074874351646e-05, "loss": 0.7133, "step": 13319 }, { "epoch": 0.9266409266409267, "grad_norm": 1.1953125, "learning_rate": 2.8071035093132247e-05, "loss": 0.7549, "step": 13320 }, { "epoch": 0.9267104942780618, "grad_norm": 0.89453125, "learning_rate": 2.8018044661566768e-05, "loss": 0.7371, "step": 13321 }, { "epoch": 0.926780061915197, "grad_norm": 1.0, "learning_rate": 2.796510358234583e-05, "loss": 0.8252, "step": 13322 }, { "epoch": 0.9268496295523323, "grad_norm": 1.0390625, "learning_rate": 2.791221185815751e-05, "loss": 0.6271, "step": 13323 }, { "epoch": 0.9269191971894675, "grad_norm": 1.1640625, "learning_rate": 2.7859369491687547e-05, "loss": 0.7798, "step": 13324 }, { "epoch": 0.9269887648266026, "grad_norm": 0.984375, "learning_rate": 2.7806576485618683e-05, "loss": 0.7248, "step": 13325 }, { "epoch": 0.9270583324637379, "grad_norm": 1.109375, "learning_rate": 2.7753832842631665e-05, "loss": 0.6875, "step": 13326 }, { "epoch": 0.9271279001008731, "grad_norm": 1.09375, "learning_rate": 2.770113856540457e-05, "loss": 0.8432, "step": 13327 }, { "epoch": 0.9271974677380083, "grad_norm": 1.203125, "learning_rate": 2.7648493656612926e-05, "loss": 0.8636, "step": 13328 }, { "epoch": 0.9272670353751434, "grad_norm": 1.4453125, "learning_rate": 2.7595898118929706e-05, "loss": 1.0768, "step": 13329 }, { "epoch": 0.9273366030122787, "grad_norm": 1.109375, "learning_rate": 2.7543351955025552e-05, "loss": 0.8567, "step": 13330 }, { "epoch": 0.9274061706494139, "grad_norm": 1.2578125, "learning_rate": 2.749085516756833e-05, "loss": 0.801, "step": 13331 }, { "epoch": 0.9274757382865491, "grad_norm": 1.0625, "learning_rate": 2.7438407759223793e-05, "loss": 0.8172, "step": 13332 }, { "epoch": 0.9275453059236843, "grad_norm": 0.859375, "learning_rate": 2.7386009732654815e-05, "loss": 0.6904, "step": 13333 }, { "epoch": 0.9276148735608195, "grad_norm": 1.09375, "learning_rate": 2.7333661090521932e-05, "loss": 0.8203, "step": 13334 }, { "epoch": 0.9276844411979547, "grad_norm": 0.97265625, "learning_rate": 2.7281361835483022e-05, "loss": 0.7293, "step": 13335 }, { "epoch": 0.92775400883509, "grad_norm": 1.0703125, "learning_rate": 2.7229111970193842e-05, "loss": 0.8736, "step": 13336 }, { "epoch": 0.9278235764722251, "grad_norm": 1.34375, "learning_rate": 2.7176911497307166e-05, "loss": 0.9797, "step": 13337 }, { "epoch": 0.9278931441093603, "grad_norm": 1.4375, "learning_rate": 2.7124760419473537e-05, "loss": 0.8142, "step": 13338 }, { "epoch": 0.9279627117464956, "grad_norm": 0.96484375, "learning_rate": 2.7072658739340837e-05, "loss": 0.6258, "step": 13339 }, { "epoch": 0.9280322793836308, "grad_norm": 0.92578125, "learning_rate": 2.702060645955473e-05, "loss": 0.8679, "step": 13340 }, { "epoch": 0.9281018470207659, "grad_norm": 1.234375, "learning_rate": 2.696860358275799e-05, "loss": 0.946, "step": 13341 }, { "epoch": 0.9281714146579011, "grad_norm": 0.87109375, "learning_rate": 2.691665011159117e-05, "loss": 0.6146, "step": 13342 }, { "epoch": 0.9282409822950364, "grad_norm": 1.5234375, "learning_rate": 2.6864746048692156e-05, "loss": 1.1254, "step": 13343 }, { "epoch": 0.9283105499321715, "grad_norm": 1.2265625, "learning_rate": 2.6812891396696294e-05, "loss": 0.9415, "step": 13344 }, { "epoch": 0.9283801175693067, "grad_norm": 1.015625, "learning_rate": 2.676108615823658e-05, "loss": 0.6591, "step": 13345 }, { "epoch": 0.928449685206442, "grad_norm": 1.6015625, "learning_rate": 2.670933033594358e-05, "loss": 0.712, "step": 13346 }, { "epoch": 0.9285192528435772, "grad_norm": 1.21875, "learning_rate": 2.6657623932444975e-05, "loss": 0.6956, "step": 13347 }, { "epoch": 0.9285888204807123, "grad_norm": 1.203125, "learning_rate": 2.66059669503661e-05, "loss": 0.8038, "step": 13348 }, { "epoch": 0.9286583881178476, "grad_norm": 1.109375, "learning_rate": 2.6554359392329973e-05, "loss": 0.7031, "step": 13349 }, { "epoch": 0.9287279557549828, "grad_norm": 1.3046875, "learning_rate": 2.6502801260957054e-05, "loss": 0.8144, "step": 13350 }, { "epoch": 0.928797523392118, "grad_norm": 1.125, "learning_rate": 2.6451292558864915e-05, "loss": 0.8768, "step": 13351 }, { "epoch": 0.9288670910292532, "grad_norm": 1.046875, "learning_rate": 2.639983328866935e-05, "loss": 0.7542, "step": 13352 }, { "epoch": 0.9289366586663884, "grad_norm": 0.98828125, "learning_rate": 2.6348423452982717e-05, "loss": 0.9279, "step": 13353 }, { "epoch": 0.9290062263035236, "grad_norm": 1.2734375, "learning_rate": 2.6297063054415705e-05, "loss": 0.8012, "step": 13354 }, { "epoch": 0.9290757939406588, "grad_norm": 1.2734375, "learning_rate": 2.624575209557589e-05, "loss": 0.9576, "step": 13355 }, { "epoch": 0.929145361577794, "grad_norm": 1.34375, "learning_rate": 2.6194490579068864e-05, "loss": 0.7498, "step": 13356 }, { "epoch": 0.9292149292149292, "grad_norm": 1.1875, "learning_rate": 2.6143278507497203e-05, "loss": 0.7359, "step": 13357 }, { "epoch": 0.9292844968520644, "grad_norm": 1.3046875, "learning_rate": 2.6092115883461054e-05, "loss": 0.9154, "step": 13358 }, { "epoch": 0.9293540644891997, "grad_norm": 1.1484375, "learning_rate": 2.604100270955867e-05, "loss": 1.0482, "step": 13359 }, { "epoch": 0.9294236321263348, "grad_norm": 1.1484375, "learning_rate": 2.5989938988384976e-05, "loss": 0.7849, "step": 13360 }, { "epoch": 0.92949319976347, "grad_norm": 1.3046875, "learning_rate": 2.5938924722532788e-05, "loss": 0.9157, "step": 13361 }, { "epoch": 0.9295627674006053, "grad_norm": 1.09375, "learning_rate": 2.5887959914592364e-05, "loss": 0.8902, "step": 13362 }, { "epoch": 0.9296323350377405, "grad_norm": 1.3671875, "learning_rate": 2.5837044567151412e-05, "loss": 0.8757, "step": 13363 }, { "epoch": 0.9297019026748756, "grad_norm": 0.9140625, "learning_rate": 2.5786178682795204e-05, "loss": 0.6357, "step": 13364 }, { "epoch": 0.9297714703120109, "grad_norm": 1.1875, "learning_rate": 2.5735362264106442e-05, "loss": 0.7472, "step": 13365 }, { "epoch": 0.9298410379491461, "grad_norm": 1.015625, "learning_rate": 2.5684595313665405e-05, "loss": 0.7725, "step": 13366 }, { "epoch": 0.9299106055862812, "grad_norm": 1.21875, "learning_rate": 2.5633877834049578e-05, "loss": 0.9519, "step": 13367 }, { "epoch": 0.9299801732234164, "grad_norm": 1.34375, "learning_rate": 2.5583209827834353e-05, "loss": 0.8959, "step": 13368 }, { "epoch": 0.9300497408605517, "grad_norm": 0.95703125, "learning_rate": 2.5532591297592333e-05, "loss": 0.807, "step": 13369 }, { "epoch": 0.9301193084976869, "grad_norm": 1.2421875, "learning_rate": 2.5482022245893578e-05, "loss": 0.9299, "step": 13370 }, { "epoch": 0.930188876134822, "grad_norm": 0.80859375, "learning_rate": 2.543150267530592e-05, "loss": 0.661, "step": 13371 }, { "epoch": 0.9302584437719573, "grad_norm": 1.0234375, "learning_rate": 2.538103258839408e-05, "loss": 0.862, "step": 13372 }, { "epoch": 0.9303280114090925, "grad_norm": 0.96875, "learning_rate": 2.533061198772124e-05, "loss": 0.7233, "step": 13373 }, { "epoch": 0.9303975790462277, "grad_norm": 1.4921875, "learning_rate": 2.5280240875847126e-05, "loss": 0.6801, "step": 13374 }, { "epoch": 0.930467146683363, "grad_norm": 1.0390625, "learning_rate": 2.522991925532958e-05, "loss": 0.9739, "step": 13375 }, { "epoch": 0.9305367143204981, "grad_norm": 1.4453125, "learning_rate": 2.5179647128723337e-05, "loss": 0.9727, "step": 13376 }, { "epoch": 0.9306062819576333, "grad_norm": 1.2109375, "learning_rate": 2.5129424498581132e-05, "loss": 0.6165, "step": 13377 }, { "epoch": 0.9306758495947686, "grad_norm": 1.2109375, "learning_rate": 2.507925136745315e-05, "loss": 0.6481, "step": 13378 }, { "epoch": 0.9307454172319037, "grad_norm": 1.0234375, "learning_rate": 2.5029127737886793e-05, "loss": 0.8426, "step": 13379 }, { "epoch": 0.9308149848690389, "grad_norm": 1.25, "learning_rate": 2.497905361242714e-05, "loss": 0.7517, "step": 13380 }, { "epoch": 0.9308845525061741, "grad_norm": 1.2421875, "learning_rate": 2.4929028993616598e-05, "loss": 0.7481, "step": 13381 }, { "epoch": 0.9309541201433094, "grad_norm": 1.015625, "learning_rate": 2.487905388399525e-05, "loss": 0.612, "step": 13382 }, { "epoch": 0.9310236877804445, "grad_norm": 1.484375, "learning_rate": 2.482912828610062e-05, "loss": 0.7068, "step": 13383 }, { "epoch": 0.9310932554175797, "grad_norm": 1.390625, "learning_rate": 2.4779252202467685e-05, "loss": 0.823, "step": 13384 }, { "epoch": 0.931162823054715, "grad_norm": 1.046875, "learning_rate": 2.4729425635628634e-05, "loss": 0.6847, "step": 13385 }, { "epoch": 0.9312323906918502, "grad_norm": 1.234375, "learning_rate": 2.4679648588113777e-05, "loss": 0.8414, "step": 13386 }, { "epoch": 0.9313019583289853, "grad_norm": 1.0078125, "learning_rate": 2.462992106245043e-05, "loss": 0.9564, "step": 13387 }, { "epoch": 0.9313715259661206, "grad_norm": 1.375, "learning_rate": 2.4580243061163466e-05, "loss": 0.709, "step": 13388 }, { "epoch": 0.9314410936032558, "grad_norm": 0.92578125, "learning_rate": 2.453061458677519e-05, "loss": 0.7071, "step": 13389 }, { "epoch": 0.931510661240391, "grad_norm": 0.8984375, "learning_rate": 2.448103564180548e-05, "loss": 0.6645, "step": 13390 }, { "epoch": 0.9315802288775262, "grad_norm": 1.828125, "learning_rate": 2.4431506228771993e-05, "loss": 0.8114, "step": 13391 }, { "epoch": 0.9316497965146614, "grad_norm": 1.0078125, "learning_rate": 2.438202635018938e-05, "loss": 0.735, "step": 13392 }, { "epoch": 0.9317193641517966, "grad_norm": 1.046875, "learning_rate": 2.4332596008569853e-05, "loss": 0.9012, "step": 13393 }, { "epoch": 0.9317889317889317, "grad_norm": 1.25, "learning_rate": 2.4283215206423514e-05, "loss": 0.8712, "step": 13394 }, { "epoch": 0.931858499426067, "grad_norm": 1.1171875, "learning_rate": 2.4233883946257364e-05, "loss": 0.8293, "step": 13395 }, { "epoch": 0.9319280670632022, "grad_norm": 1.15625, "learning_rate": 2.4184602230576613e-05, "loss": 0.698, "step": 13396 }, { "epoch": 0.9319976347003374, "grad_norm": 1.125, "learning_rate": 2.4135370061883045e-05, "loss": 0.7936, "step": 13397 }, { "epoch": 0.9320672023374726, "grad_norm": 1.21875, "learning_rate": 2.4086187442676766e-05, "loss": 0.9763, "step": 13398 }, { "epoch": 0.9321367699746078, "grad_norm": 0.91015625, "learning_rate": 2.403705437545489e-05, "loss": 0.7281, "step": 13399 }, { "epoch": 0.932206337611743, "grad_norm": 1.3046875, "learning_rate": 2.3987970862712204e-05, "loss": 0.7393, "step": 13400 }, { "epoch": 0.9322759052488783, "grad_norm": 1.390625, "learning_rate": 2.3938936906940824e-05, "loss": 0.9169, "step": 13401 }, { "epoch": 0.9323454728860134, "grad_norm": 1.3203125, "learning_rate": 2.3889952510630643e-05, "loss": 0.7412, "step": 13402 }, { "epoch": 0.9324150405231486, "grad_norm": 1.0234375, "learning_rate": 2.3841017676268673e-05, "loss": 0.6227, "step": 13403 }, { "epoch": 0.9324846081602839, "grad_norm": 1.1328125, "learning_rate": 2.3792132406339485e-05, "loss": 1.0202, "step": 13404 }, { "epoch": 0.9325541757974191, "grad_norm": 1.2265625, "learning_rate": 2.3743296703325533e-05, "loss": 0.7722, "step": 13405 }, { "epoch": 0.9326237434345542, "grad_norm": 1.390625, "learning_rate": 2.3694510569706285e-05, "loss": 1.0787, "step": 13406 }, { "epoch": 0.9326933110716894, "grad_norm": 0.8671875, "learning_rate": 2.3645774007958754e-05, "loss": 0.6709, "step": 13407 }, { "epoch": 0.9327628787088247, "grad_norm": 0.9453125, "learning_rate": 2.3597087020557628e-05, "loss": 0.6799, "step": 13408 }, { "epoch": 0.9328324463459599, "grad_norm": 1.1015625, "learning_rate": 2.354844960997493e-05, "loss": 0.726, "step": 13409 }, { "epoch": 0.932902013983095, "grad_norm": 0.9375, "learning_rate": 2.3499861778680463e-05, "loss": 1.0161, "step": 13410 }, { "epoch": 0.9329715816202303, "grad_norm": 1.265625, "learning_rate": 2.3451323529140923e-05, "loss": 0.8431, "step": 13411 }, { "epoch": 0.9330411492573655, "grad_norm": 1.0625, "learning_rate": 2.340283486382111e-05, "loss": 0.8835, "step": 13412 }, { "epoch": 0.9331107168945006, "grad_norm": 1.046875, "learning_rate": 2.3354395785182836e-05, "loss": 0.6826, "step": 13413 }, { "epoch": 0.9331802845316359, "grad_norm": 1.1015625, "learning_rate": 2.330600629568569e-05, "loss": 0.8793, "step": 13414 }, { "epoch": 0.9332498521687711, "grad_norm": 1.0546875, "learning_rate": 2.3257666397786702e-05, "loss": 0.9135, "step": 13415 }, { "epoch": 0.9333194198059063, "grad_norm": 0.9765625, "learning_rate": 2.320937609394025e-05, "loss": 0.6617, "step": 13416 }, { "epoch": 0.9333889874430416, "grad_norm": 1.3515625, "learning_rate": 2.3161135386598255e-05, "loss": 0.8328, "step": 13417 }, { "epoch": 0.9334585550801767, "grad_norm": 1.5859375, "learning_rate": 2.31129442782102e-05, "loss": 0.822, "step": 13418 }, { "epoch": 0.9335281227173119, "grad_norm": 1.1796875, "learning_rate": 2.3064802771223026e-05, "loss": 0.5763, "step": 13419 }, { "epoch": 0.9335976903544471, "grad_norm": 1.0234375, "learning_rate": 2.301671086808099e-05, "loss": 0.7431, "step": 13420 }, { "epoch": 0.9336672579915823, "grad_norm": 0.94140625, "learning_rate": 2.2968668571226038e-05, "loss": 0.7305, "step": 13421 }, { "epoch": 0.9337368256287175, "grad_norm": 1.125, "learning_rate": 2.292067588309732e-05, "loss": 0.6679, "step": 13422 }, { "epoch": 0.9338063932658527, "grad_norm": 1.359375, "learning_rate": 2.287273280613211e-05, "loss": 0.9048, "step": 13423 }, { "epoch": 0.933875960902988, "grad_norm": 1.0390625, "learning_rate": 2.282483934276436e-05, "loss": 0.873, "step": 13424 }, { "epoch": 0.9339455285401231, "grad_norm": 1.1640625, "learning_rate": 2.2776995495425778e-05, "loss": 0.6686, "step": 13425 }, { "epoch": 0.9340150961772583, "grad_norm": 1.1328125, "learning_rate": 2.2729201266545983e-05, "loss": 0.7767, "step": 13426 }, { "epoch": 0.9340846638143936, "grad_norm": 1.265625, "learning_rate": 2.268145665855148e-05, "loss": 0.8113, "step": 13427 }, { "epoch": 0.9341542314515288, "grad_norm": 1.4296875, "learning_rate": 2.2633761673866548e-05, "loss": 0.8365, "step": 13428 }, { "epoch": 0.9342237990886639, "grad_norm": 0.859375, "learning_rate": 2.2586116314912807e-05, "loss": 0.7165, "step": 13429 }, { "epoch": 0.9342933667257992, "grad_norm": 1.3515625, "learning_rate": 2.2538520584109766e-05, "loss": 0.7445, "step": 13430 }, { "epoch": 0.9343629343629344, "grad_norm": 1.0078125, "learning_rate": 2.2490974483873715e-05, "loss": 0.788, "step": 13431 }, { "epoch": 0.9344325020000696, "grad_norm": 1.1640625, "learning_rate": 2.2443478016618945e-05, "loss": 0.8022, "step": 13432 }, { "epoch": 0.9345020696372047, "grad_norm": 1.1171875, "learning_rate": 2.2396031184757193e-05, "loss": 0.8723, "step": 13433 }, { "epoch": 0.93457163727434, "grad_norm": 1.015625, "learning_rate": 2.234863399069753e-05, "loss": 0.7293, "step": 13434 }, { "epoch": 0.9346412049114752, "grad_norm": 1.1953125, "learning_rate": 2.230128643684648e-05, "loss": 0.659, "step": 13435 }, { "epoch": 0.9347107725486103, "grad_norm": 1.1171875, "learning_rate": 2.2253988525608004e-05, "loss": 0.6691, "step": 13436 }, { "epoch": 0.9347803401857456, "grad_norm": 1.390625, "learning_rate": 2.2206740259383963e-05, "loss": 1.1001, "step": 13437 }, { "epoch": 0.9348499078228808, "grad_norm": 1.03125, "learning_rate": 2.2159541640573212e-05, "loss": 0.7743, "step": 13438 }, { "epoch": 0.934919475460016, "grad_norm": 1.1640625, "learning_rate": 2.2112392671572058e-05, "loss": 0.9172, "step": 13439 }, { "epoch": 0.9349890430971513, "grad_norm": 1.1640625, "learning_rate": 2.2065293354774916e-05, "loss": 0.8179, "step": 13440 }, { "epoch": 0.9350586107342864, "grad_norm": 0.8515625, "learning_rate": 2.201824369257288e-05, "loss": 0.6874, "step": 13441 }, { "epoch": 0.9351281783714216, "grad_norm": 0.9765625, "learning_rate": 2.1971243687355034e-05, "loss": 0.7957, "step": 13442 }, { "epoch": 0.9351977460085569, "grad_norm": 1.2109375, "learning_rate": 2.1924293341507804e-05, "loss": 0.9496, "step": 13443 }, { "epoch": 0.935267313645692, "grad_norm": 1.25, "learning_rate": 2.1877392657415172e-05, "loss": 0.8483, "step": 13444 }, { "epoch": 0.9353368812828272, "grad_norm": 0.875, "learning_rate": 2.1830541637458347e-05, "loss": 0.7571, "step": 13445 }, { "epoch": 0.9354064489199624, "grad_norm": 1.1328125, "learning_rate": 2.1783740284016306e-05, "loss": 0.9877, "step": 13446 }, { "epoch": 0.9354760165570977, "grad_norm": 1.375, "learning_rate": 2.173698859946538e-05, "loss": 1.0809, "step": 13447 }, { "epoch": 0.9355455841942328, "grad_norm": 1.875, "learning_rate": 2.169028658617944e-05, "loss": 0.8189, "step": 13448 }, { "epoch": 0.935615151831368, "grad_norm": 1.1171875, "learning_rate": 2.1643634246529597e-05, "loss": 0.7207, "step": 13449 }, { "epoch": 0.9356847194685033, "grad_norm": 1.125, "learning_rate": 2.159703158288462e-05, "loss": 0.608, "step": 13450 }, { "epoch": 0.9357542871056385, "grad_norm": 1.1328125, "learning_rate": 2.1550478597611055e-05, "loss": 0.6458, "step": 13451 }, { "epoch": 0.9358238547427736, "grad_norm": 1.171875, "learning_rate": 2.1503975293072466e-05, "loss": 0.7931, "step": 13452 }, { "epoch": 0.9358934223799089, "grad_norm": 1.1171875, "learning_rate": 2.1457521671629842e-05, "loss": 0.8415, "step": 13453 }, { "epoch": 0.9359629900170441, "grad_norm": 1.0390625, "learning_rate": 2.1411117735642194e-05, "loss": 0.7379, "step": 13454 }, { "epoch": 0.9360325576541793, "grad_norm": 1.2734375, "learning_rate": 2.136476348746541e-05, "loss": 0.7582, "step": 13455 }, { "epoch": 0.9361021252913145, "grad_norm": 1.1875, "learning_rate": 2.1318458929453388e-05, "loss": 0.8989, "step": 13456 }, { "epoch": 0.9361716929284497, "grad_norm": 1.2578125, "learning_rate": 2.1272204063957022e-05, "loss": 0.571, "step": 13457 }, { "epoch": 0.9362412605655849, "grad_norm": 1.109375, "learning_rate": 2.1225998893324993e-05, "loss": 0.6005, "step": 13458 }, { "epoch": 0.93631082820272, "grad_norm": 1.46875, "learning_rate": 2.117984341990331e-05, "loss": 0.7056, "step": 13459 }, { "epoch": 0.9363803958398553, "grad_norm": 1.1953125, "learning_rate": 2.1133737646035544e-05, "loss": 0.7055, "step": 13460 }, { "epoch": 0.9364499634769905, "grad_norm": 0.984375, "learning_rate": 2.1087681574062824e-05, "loss": 0.6748, "step": 13461 }, { "epoch": 0.9365195311141257, "grad_norm": 0.984375, "learning_rate": 2.1041675206323498e-05, "loss": 0.7552, "step": 13462 }, { "epoch": 0.936589098751261, "grad_norm": 1.3828125, "learning_rate": 2.0995718545153585e-05, "loss": 0.9645, "step": 13463 }, { "epoch": 0.9366586663883961, "grad_norm": 1.4765625, "learning_rate": 2.094981159288656e-05, "loss": 0.7061, "step": 13464 }, { "epoch": 0.9367282340255313, "grad_norm": 1.546875, "learning_rate": 2.0903954351853328e-05, "loss": 0.7951, "step": 13465 }, { "epoch": 0.9367978016626666, "grad_norm": 1.0546875, "learning_rate": 2.085814682438225e-05, "loss": 0.8461, "step": 13466 }, { "epoch": 0.9368673692998017, "grad_norm": 1.015625, "learning_rate": 2.0812389012799248e-05, "loss": 0.7609, "step": 13467 }, { "epoch": 0.9369369369369369, "grad_norm": 1.078125, "learning_rate": 2.0766680919427682e-05, "loss": 0.7872, "step": 13468 }, { "epoch": 0.9370065045740722, "grad_norm": 1.6796875, "learning_rate": 2.0721022546588362e-05, "loss": 0.8846, "step": 13469 }, { "epoch": 0.9370760722112074, "grad_norm": 1.3125, "learning_rate": 2.0675413896599548e-05, "loss": 0.7828, "step": 13470 }, { "epoch": 0.9371456398483425, "grad_norm": 1.078125, "learning_rate": 2.0629854971777053e-05, "loss": 0.5559, "step": 13471 }, { "epoch": 0.9372152074854777, "grad_norm": 1.2734375, "learning_rate": 2.0584345774434243e-05, "loss": 0.8942, "step": 13472 }, { "epoch": 0.937284775122613, "grad_norm": 0.9296875, "learning_rate": 2.053888630688161e-05, "loss": 0.7171, "step": 13473 }, { "epoch": 0.9373543427597482, "grad_norm": 1.34375, "learning_rate": 2.0493476571427526e-05, "loss": 0.8903, "step": 13474 }, { "epoch": 0.9374239103968833, "grad_norm": 1.328125, "learning_rate": 2.0448116570377596e-05, "loss": 0.8405, "step": 13475 }, { "epoch": 0.9374934780340186, "grad_norm": 1.078125, "learning_rate": 2.0402806306034973e-05, "loss": 0.5641, "step": 13476 }, { "epoch": 0.9375630456711538, "grad_norm": 1.046875, "learning_rate": 2.035754578070037e-05, "loss": 0.7364, "step": 13477 }, { "epoch": 0.937632613308289, "grad_norm": 1.3046875, "learning_rate": 2.0312334996671734e-05, "loss": 0.9378, "step": 13478 }, { "epoch": 0.9377021809454242, "grad_norm": 1.03125, "learning_rate": 2.0267173956244887e-05, "loss": 0.7104, "step": 13479 }, { "epoch": 0.9377717485825594, "grad_norm": 1.484375, "learning_rate": 2.022206266171267e-05, "loss": 0.8865, "step": 13480 }, { "epoch": 0.9378413162196946, "grad_norm": 1.2890625, "learning_rate": 2.017700111536558e-05, "loss": 0.7749, "step": 13481 }, { "epoch": 0.9379108838568299, "grad_norm": 1.0859375, "learning_rate": 2.0131989319491784e-05, "loss": 0.9203, "step": 13482 }, { "epoch": 0.937980451493965, "grad_norm": 1.40625, "learning_rate": 2.008702727637668e-05, "loss": 1.0085, "step": 13483 }, { "epoch": 0.9380500191311002, "grad_norm": 1.0703125, "learning_rate": 2.0042114988303217e-05, "loss": 0.7835, "step": 13484 }, { "epoch": 0.9381195867682354, "grad_norm": 1.09375, "learning_rate": 1.9997252457551685e-05, "loss": 0.7453, "step": 13485 }, { "epoch": 0.9381891544053707, "grad_norm": 1.0546875, "learning_rate": 1.9952439686400148e-05, "loss": 0.5907, "step": 13486 }, { "epoch": 0.9382587220425058, "grad_norm": 0.9765625, "learning_rate": 1.9907676677123898e-05, "loss": 0.6764, "step": 13487 }, { "epoch": 0.938328289679641, "grad_norm": 1.3984375, "learning_rate": 1.9862963431995895e-05, "loss": 0.912, "step": 13488 }, { "epoch": 0.9383978573167763, "grad_norm": 1.0703125, "learning_rate": 1.981829995328621e-05, "loss": 0.8614, "step": 13489 }, { "epoch": 0.9384674249539114, "grad_norm": 1.328125, "learning_rate": 1.9773686243262924e-05, "loss": 1.0099, "step": 13490 }, { "epoch": 0.9385369925910466, "grad_norm": 1.0859375, "learning_rate": 1.9729122304191104e-05, "loss": 0.6065, "step": 13491 }, { "epoch": 0.9386065602281819, "grad_norm": 1.546875, "learning_rate": 1.9684608138333392e-05, "loss": 0.9515, "step": 13492 }, { "epoch": 0.9386761278653171, "grad_norm": 1.0390625, "learning_rate": 1.9640143747950312e-05, "loss": 0.6751, "step": 13493 }, { "epoch": 0.9387456955024522, "grad_norm": 1.0, "learning_rate": 1.959572913529928e-05, "loss": 0.6923, "step": 13494 }, { "epoch": 0.9388152631395875, "grad_norm": 1.2421875, "learning_rate": 1.9551364302635377e-05, "loss": 1.0487, "step": 13495 }, { "epoch": 0.9388848307767227, "grad_norm": 1.3359375, "learning_rate": 1.9507049252211472e-05, "loss": 0.9824, "step": 13496 }, { "epoch": 0.9389543984138579, "grad_norm": 1.15625, "learning_rate": 1.9462783986277655e-05, "loss": 0.9057, "step": 13497 }, { "epoch": 0.939023966050993, "grad_norm": 0.85546875, "learning_rate": 1.9418568507081346e-05, "loss": 0.6693, "step": 13498 }, { "epoch": 0.9390935336881283, "grad_norm": 1.0, "learning_rate": 1.937440281686753e-05, "loss": 0.5237, "step": 13499 }, { "epoch": 0.9391631013252635, "grad_norm": 0.87890625, "learning_rate": 1.933028691787886e-05, "loss": 0.6226, "step": 13500 }, { "epoch": 0.9392326689623987, "grad_norm": 1.2734375, "learning_rate": 1.9286220812355317e-05, "loss": 0.77, "step": 13501 }, { "epoch": 0.9393022365995339, "grad_norm": 0.9453125, "learning_rate": 1.9242204502534332e-05, "loss": 0.6295, "step": 13502 }, { "epoch": 0.9393718042366691, "grad_norm": 1.2578125, "learning_rate": 1.9198237990650792e-05, "loss": 0.6855, "step": 13503 }, { "epoch": 0.9394413718738043, "grad_norm": 1.0234375, "learning_rate": 1.9154321278937126e-05, "loss": 0.7297, "step": 13504 }, { "epoch": 0.9395109395109396, "grad_norm": 1.0390625, "learning_rate": 1.911045436962322e-05, "loss": 0.8111, "step": 13505 }, { "epoch": 0.9395805071480747, "grad_norm": 1.0703125, "learning_rate": 1.9066637264936293e-05, "loss": 0.7445, "step": 13506 }, { "epoch": 0.9396500747852099, "grad_norm": 1.1796875, "learning_rate": 1.902286996710134e-05, "loss": 0.8456, "step": 13507 }, { "epoch": 0.9397196424223452, "grad_norm": 1.203125, "learning_rate": 1.8979152478340588e-05, "loss": 0.8239, "step": 13508 }, { "epoch": 0.9397892100594804, "grad_norm": 1.1171875, "learning_rate": 1.8935484800873702e-05, "loss": 0.6998, "step": 13509 }, { "epoch": 0.9398587776966155, "grad_norm": 1.0546875, "learning_rate": 1.8891866936917913e-05, "loss": 0.8455, "step": 13510 }, { "epoch": 0.9399283453337507, "grad_norm": 1.140625, "learning_rate": 1.8848298888688108e-05, "loss": 0.7153, "step": 13511 }, { "epoch": 0.939997912970886, "grad_norm": 1.0546875, "learning_rate": 1.8804780658396303e-05, "loss": 0.9088, "step": 13512 }, { "epoch": 0.9400674806080211, "grad_norm": 1.0234375, "learning_rate": 1.876131224825195e-05, "loss": 0.6779, "step": 13513 }, { "epoch": 0.9401370482451563, "grad_norm": 1.1796875, "learning_rate": 1.8717893660462502e-05, "loss": 0.6937, "step": 13514 }, { "epoch": 0.9402066158822916, "grad_norm": 0.98046875, "learning_rate": 1.8674524897232427e-05, "loss": 0.6491, "step": 13515 }, { "epoch": 0.9402761835194268, "grad_norm": 0.9609375, "learning_rate": 1.863120596076373e-05, "loss": 0.7033, "step": 13516 }, { "epoch": 0.9403457511565619, "grad_norm": 1.15625, "learning_rate": 1.858793685325577e-05, "loss": 0.6249, "step": 13517 }, { "epoch": 0.9404153187936972, "grad_norm": 1.2265625, "learning_rate": 1.85447175769059e-05, "loss": 0.7647, "step": 13518 }, { "epoch": 0.9404848864308324, "grad_norm": 1.28125, "learning_rate": 1.850154813390814e-05, "loss": 0.8839, "step": 13519 }, { "epoch": 0.9405544540679676, "grad_norm": 1.8203125, "learning_rate": 1.845842852645474e-05, "loss": 1.343, "step": 13520 }, { "epoch": 0.9406240217051027, "grad_norm": 1.125, "learning_rate": 1.8415358756735168e-05, "loss": 0.6996, "step": 13521 }, { "epoch": 0.940693589342238, "grad_norm": 1.3515625, "learning_rate": 1.8372338826936007e-05, "loss": 0.9822, "step": 13522 }, { "epoch": 0.9407631569793732, "grad_norm": 1.0, "learning_rate": 1.8329368739241625e-05, "loss": 0.5543, "step": 13523 }, { "epoch": 0.9408327246165084, "grad_norm": 1.1015625, "learning_rate": 1.828644849583394e-05, "loss": 0.9282, "step": 13524 }, { "epoch": 0.9409022922536436, "grad_norm": 1.453125, "learning_rate": 1.8243578098892322e-05, "loss": 0.9974, "step": 13525 }, { "epoch": 0.9409718598907788, "grad_norm": 1.2265625, "learning_rate": 1.820075755059336e-05, "loss": 0.7867, "step": 13526 }, { "epoch": 0.941041427527914, "grad_norm": 1.1484375, "learning_rate": 1.8157986853111208e-05, "loss": 1.1717, "step": 13527 }, { "epoch": 0.9411109951650493, "grad_norm": 1.1015625, "learning_rate": 1.811526600861757e-05, "loss": 0.8979, "step": 13528 }, { "epoch": 0.9411805628021844, "grad_norm": 0.97265625, "learning_rate": 1.8072595019281824e-05, "loss": 0.5598, "step": 13529 }, { "epoch": 0.9412501304393196, "grad_norm": 1.5078125, "learning_rate": 1.8029973887270344e-05, "loss": 1.0097, "step": 13530 }, { "epoch": 0.9413196980764549, "grad_norm": 1.046875, "learning_rate": 1.7987402614747296e-05, "loss": 0.7811, "step": 13531 }, { "epoch": 0.94138926571359, "grad_norm": 1.09375, "learning_rate": 1.7944881203874162e-05, "loss": 0.7663, "step": 13532 }, { "epoch": 0.9414588333507252, "grad_norm": 1.421875, "learning_rate": 1.7902409656810226e-05, "loss": 0.9591, "step": 13533 }, { "epoch": 0.9415284009878604, "grad_norm": 1.171875, "learning_rate": 1.7859987975711644e-05, "loss": 0.8446, "step": 13534 }, { "epoch": 0.9415979686249957, "grad_norm": 1.2265625, "learning_rate": 1.7817616162732587e-05, "loss": 0.7937, "step": 13535 }, { "epoch": 0.9416675362621308, "grad_norm": 1.234375, "learning_rate": 1.777529422002444e-05, "loss": 1.1306, "step": 13536 }, { "epoch": 0.941737103899266, "grad_norm": 0.9921875, "learning_rate": 1.7733022149735934e-05, "loss": 0.5752, "step": 13537 }, { "epoch": 0.9418066715364013, "grad_norm": 1.0390625, "learning_rate": 1.769079995401357e-05, "loss": 0.7649, "step": 13538 }, { "epoch": 0.9418762391735365, "grad_norm": 1.515625, "learning_rate": 1.76486276350013e-05, "loss": 0.9554, "step": 13539 }, { "epoch": 0.9419458068106716, "grad_norm": 1.140625, "learning_rate": 1.7606505194840304e-05, "loss": 0.9893, "step": 13540 }, { "epoch": 0.9420153744478069, "grad_norm": 1.0859375, "learning_rate": 1.7564432635669314e-05, "loss": 0.7798, "step": 13541 }, { "epoch": 0.9420849420849421, "grad_norm": 1.21875, "learning_rate": 1.752240995962451e-05, "loss": 0.8572, "step": 13542 }, { "epoch": 0.9421545097220773, "grad_norm": 1.078125, "learning_rate": 1.7480437168839847e-05, "loss": 0.9295, "step": 13543 }, { "epoch": 0.9422240773592125, "grad_norm": 1.1953125, "learning_rate": 1.743851426544618e-05, "loss": 0.7993, "step": 13544 }, { "epoch": 0.9422936449963477, "grad_norm": 1.203125, "learning_rate": 1.7396641251572364e-05, "loss": 0.8911, "step": 13545 }, { "epoch": 0.9423632126334829, "grad_norm": 1.3046875, "learning_rate": 1.7354818129344253e-05, "loss": 0.9248, "step": 13546 }, { "epoch": 0.942432780270618, "grad_norm": 1.1484375, "learning_rate": 1.731304490088581e-05, "loss": 0.7356, "step": 13547 }, { "epoch": 0.9425023479077533, "grad_norm": 1.0078125, "learning_rate": 1.7271321568317677e-05, "loss": 0.5402, "step": 13548 }, { "epoch": 0.9425719155448885, "grad_norm": 1.0, "learning_rate": 1.72296481337586e-05, "loss": 0.5653, "step": 13549 }, { "epoch": 0.9426414831820237, "grad_norm": 1.0625, "learning_rate": 1.7188024599324448e-05, "loss": 0.7083, "step": 13550 }, { "epoch": 0.942711050819159, "grad_norm": 1.1328125, "learning_rate": 1.7146450967128635e-05, "loss": 0.8384, "step": 13551 }, { "epoch": 0.9427806184562941, "grad_norm": 1.15625, "learning_rate": 1.710492723928203e-05, "loss": 0.6302, "step": 13552 }, { "epoch": 0.9428501860934293, "grad_norm": 1.2890625, "learning_rate": 1.7063453417893173e-05, "loss": 0.7427, "step": 13553 }, { "epoch": 0.9429197537305646, "grad_norm": 1.078125, "learning_rate": 1.7022029505067816e-05, "loss": 0.7552, "step": 13554 }, { "epoch": 0.9429893213676998, "grad_norm": 1.0390625, "learning_rate": 1.698065550290906e-05, "loss": 0.6747, "step": 13555 }, { "epoch": 0.9430588890048349, "grad_norm": 1.1015625, "learning_rate": 1.693933141351789e-05, "loss": 0.7567, "step": 13556 }, { "epoch": 0.9431284566419702, "grad_norm": 1.0234375, "learning_rate": 1.6898057238992625e-05, "loss": 0.7553, "step": 13557 }, { "epoch": 0.9431980242791054, "grad_norm": 1.109375, "learning_rate": 1.6856832981428706e-05, "loss": 0.7016, "step": 13558 }, { "epoch": 0.9432675919162405, "grad_norm": 0.94140625, "learning_rate": 1.681565864291934e-05, "loss": 0.7851, "step": 13559 }, { "epoch": 0.9433371595533757, "grad_norm": 1.0859375, "learning_rate": 1.6774534225555194e-05, "loss": 0.9058, "step": 13560 }, { "epoch": 0.943406727190511, "grad_norm": 1.390625, "learning_rate": 1.6733459731424594e-05, "loss": 0.7591, "step": 13561 }, { "epoch": 0.9434762948276462, "grad_norm": 1.09375, "learning_rate": 1.6692435162612764e-05, "loss": 0.6447, "step": 13562 }, { "epoch": 0.9435458624647813, "grad_norm": 1.1796875, "learning_rate": 1.66514605212027e-05, "loss": 1.0391, "step": 13563 }, { "epoch": 0.9436154301019166, "grad_norm": 1.4296875, "learning_rate": 1.6610535809275185e-05, "loss": 0.7364, "step": 13564 }, { "epoch": 0.9436849977390518, "grad_norm": 1.0625, "learning_rate": 1.6569661028908e-05, "loss": 0.6518, "step": 13565 }, { "epoch": 0.943754565376187, "grad_norm": 1.2109375, "learning_rate": 1.6528836182176487e-05, "loss": 0.8753, "step": 13566 }, { "epoch": 0.9438241330133222, "grad_norm": 1.15625, "learning_rate": 1.6488061271153653e-05, "loss": 0.9546, "step": 13567 }, { "epoch": 0.9438937006504574, "grad_norm": 0.859375, "learning_rate": 1.6447336297909842e-05, "loss": 0.7661, "step": 13568 }, { "epoch": 0.9439632682875926, "grad_norm": 1.1484375, "learning_rate": 1.6406661264512733e-05, "loss": 0.8817, "step": 13569 }, { "epoch": 0.9440328359247279, "grad_norm": 0.9375, "learning_rate": 1.6366036173027676e-05, "loss": 0.7684, "step": 13570 }, { "epoch": 0.944102403561863, "grad_norm": 1.3671875, "learning_rate": 1.6325461025517574e-05, "loss": 0.9332, "step": 13571 }, { "epoch": 0.9441719711989982, "grad_norm": 1.3671875, "learning_rate": 1.6284935824042447e-05, "loss": 0.7933, "step": 13572 }, { "epoch": 0.9442415388361334, "grad_norm": 1.3671875, "learning_rate": 1.624446057065987e-05, "loss": 0.8663, "step": 13573 }, { "epoch": 0.9443111064732687, "grad_norm": 1.3203125, "learning_rate": 1.6204035267425088e-05, "loss": 0.9225, "step": 13574 }, { "epoch": 0.9443806741104038, "grad_norm": 1.4296875, "learning_rate": 1.6163659916390794e-05, "loss": 1.2551, "step": 13575 }, { "epoch": 0.944450241747539, "grad_norm": 0.9921875, "learning_rate": 1.61233345196069e-05, "loss": 0.6812, "step": 13576 }, { "epoch": 0.9445198093846743, "grad_norm": 1.0625, "learning_rate": 1.6083059079121e-05, "loss": 0.5759, "step": 13577 }, { "epoch": 0.9445893770218095, "grad_norm": 1.2890625, "learning_rate": 1.6042833596978e-05, "loss": 0.8327, "step": 13578 }, { "epoch": 0.9446589446589446, "grad_norm": 0.8984375, "learning_rate": 1.600265807522039e-05, "loss": 0.9751, "step": 13579 }, { "epoch": 0.9447285122960799, "grad_norm": 1.1640625, "learning_rate": 1.5962532515888086e-05, "loss": 0.964, "step": 13580 }, { "epoch": 0.9447980799332151, "grad_norm": 1.140625, "learning_rate": 1.592245692101857e-05, "loss": 0.7652, "step": 13581 }, { "epoch": 0.9448676475703502, "grad_norm": 1.265625, "learning_rate": 1.588243129264655e-05, "loss": 1.1237, "step": 13582 }, { "epoch": 0.9449372152074855, "grad_norm": 1.0390625, "learning_rate": 1.5842455632804288e-05, "loss": 0.6775, "step": 13583 }, { "epoch": 0.9450067828446207, "grad_norm": 1.0390625, "learning_rate": 1.5802529943521604e-05, "loss": 0.6229, "step": 13584 }, { "epoch": 0.9450763504817559, "grad_norm": 1.2265625, "learning_rate": 1.576265422682577e-05, "loss": 0.6356, "step": 13585 }, { "epoch": 0.945145918118891, "grad_norm": 1.2890625, "learning_rate": 1.5722828484741382e-05, "loss": 0.9438, "step": 13586 }, { "epoch": 0.9452154857560263, "grad_norm": 1.1796875, "learning_rate": 1.5683052719290714e-05, "loss": 0.9678, "step": 13587 }, { "epoch": 0.9452850533931615, "grad_norm": 1.109375, "learning_rate": 1.564332693249315e-05, "loss": 0.9181, "step": 13588 }, { "epoch": 0.9453546210302967, "grad_norm": 1.0390625, "learning_rate": 1.560365112636608e-05, "loss": 0.927, "step": 13589 }, { "epoch": 0.9454241886674319, "grad_norm": 0.91796875, "learning_rate": 1.556402530292389e-05, "loss": 0.9889, "step": 13590 }, { "epoch": 0.9454937563045671, "grad_norm": 1.2109375, "learning_rate": 1.5524449464178413e-05, "loss": 0.8351, "step": 13591 }, { "epoch": 0.9455633239417023, "grad_norm": 0.94140625, "learning_rate": 1.548492361213938e-05, "loss": 0.7599, "step": 13592 }, { "epoch": 0.9456328915788376, "grad_norm": 1.0859375, "learning_rate": 1.5445447748813624e-05, "loss": 0.7491, "step": 13593 }, { "epoch": 0.9457024592159727, "grad_norm": 1.25, "learning_rate": 1.5406021876205435e-05, "loss": 0.8565, "step": 13594 }, { "epoch": 0.9457720268531079, "grad_norm": 1.046875, "learning_rate": 1.5366645996316764e-05, "loss": 0.8743, "step": 13595 }, { "epoch": 0.9458415944902432, "grad_norm": 1.15625, "learning_rate": 1.5327320111146904e-05, "loss": 0.8134, "step": 13596 }, { "epoch": 0.9459111621273784, "grad_norm": 1.28125, "learning_rate": 1.528804422269259e-05, "loss": 0.8351, "step": 13597 }, { "epoch": 0.9459807297645135, "grad_norm": 1.359375, "learning_rate": 1.5248818332948e-05, "loss": 1.1409, "step": 13598 }, { "epoch": 0.9460502974016487, "grad_norm": 1.6171875, "learning_rate": 1.5209642443905103e-05, "loss": 0.7357, "step": 13599 }, { "epoch": 0.946119865038784, "grad_norm": 1.1796875, "learning_rate": 1.517051655755275e-05, "loss": 0.8461, "step": 13600 }, { "epoch": 0.9461894326759192, "grad_norm": 1.0390625, "learning_rate": 1.5131440675877572e-05, "loss": 0.7245, "step": 13601 }, { "epoch": 0.9462590003130543, "grad_norm": 1.21875, "learning_rate": 1.5092414800863763e-05, "loss": 0.8758, "step": 13602 }, { "epoch": 0.9463285679501896, "grad_norm": 1.03125, "learning_rate": 1.5053438934492958e-05, "loss": 0.8778, "step": 13603 }, { "epoch": 0.9463981355873248, "grad_norm": 1.2265625, "learning_rate": 1.5014513078743907e-05, "loss": 0.8159, "step": 13604 }, { "epoch": 0.9464677032244599, "grad_norm": 1.109375, "learning_rate": 1.4975637235593253e-05, "loss": 0.7748, "step": 13605 }, { "epoch": 0.9465372708615952, "grad_norm": 1.140625, "learning_rate": 1.493681140701475e-05, "loss": 0.7375, "step": 13606 }, { "epoch": 0.9466068384987304, "grad_norm": 1.0859375, "learning_rate": 1.4898035594979931e-05, "loss": 0.7438, "step": 13607 }, { "epoch": 0.9466764061358656, "grad_norm": 1.0625, "learning_rate": 1.4859309801457555e-05, "loss": 0.7884, "step": 13608 }, { "epoch": 0.9467459737730008, "grad_norm": 1.6796875, "learning_rate": 1.4820634028414049e-05, "loss": 0.7238, "step": 13609 }, { "epoch": 0.946815541410136, "grad_norm": 1.015625, "learning_rate": 1.4782008277812953e-05, "loss": 0.937, "step": 13610 }, { "epoch": 0.9468851090472712, "grad_norm": 0.96484375, "learning_rate": 1.4743432551615698e-05, "loss": 0.8037, "step": 13611 }, { "epoch": 0.9469546766844064, "grad_norm": 0.99609375, "learning_rate": 1.470490685178083e-05, "loss": 0.7272, "step": 13612 }, { "epoch": 0.9470242443215416, "grad_norm": 0.92578125, "learning_rate": 1.4666431180264561e-05, "loss": 0.777, "step": 13613 }, { "epoch": 0.9470938119586768, "grad_norm": 1.125, "learning_rate": 1.4628005539020551e-05, "loss": 0.8623, "step": 13614 }, { "epoch": 0.947163379595812, "grad_norm": 1.0859375, "learning_rate": 1.458962992999957e-05, "loss": 0.6876, "step": 13615 }, { "epoch": 0.9472329472329473, "grad_norm": 1.0703125, "learning_rate": 1.4551304355150396e-05, "loss": 1.0869, "step": 13616 }, { "epoch": 0.9473025148700824, "grad_norm": 1.28125, "learning_rate": 1.4513028816419138e-05, "loss": 0.9281, "step": 13617 }, { "epoch": 0.9473720825072176, "grad_norm": 1.1015625, "learning_rate": 1.4474803315748908e-05, "loss": 0.7179, "step": 13618 }, { "epoch": 0.9474416501443529, "grad_norm": 0.9765625, "learning_rate": 1.443662785508082e-05, "loss": 0.8745, "step": 13619 }, { "epoch": 0.9475112177814881, "grad_norm": 1.0859375, "learning_rate": 1.43985024363531e-05, "loss": 0.6506, "step": 13620 }, { "epoch": 0.9475807854186232, "grad_norm": 0.92578125, "learning_rate": 1.4360427061501646e-05, "loss": 0.6436, "step": 13621 }, { "epoch": 0.9476503530557585, "grad_norm": 1.1953125, "learning_rate": 1.432240173245969e-05, "loss": 0.8159, "step": 13622 }, { "epoch": 0.9477199206928937, "grad_norm": 1.0625, "learning_rate": 1.4284426451158018e-05, "loss": 0.9967, "step": 13623 }, { "epoch": 0.9477894883300289, "grad_norm": 1.21875, "learning_rate": 1.4246501219524754e-05, "loss": 0.7271, "step": 13624 }, { "epoch": 0.947859055967164, "grad_norm": 1.2109375, "learning_rate": 1.4208626039485695e-05, "loss": 0.832, "step": 13625 }, { "epoch": 0.9479286236042993, "grad_norm": 1.0703125, "learning_rate": 1.4170800912963744e-05, "loss": 0.7337, "step": 13626 }, { "epoch": 0.9479981912414345, "grad_norm": 1.15625, "learning_rate": 1.4133025841879699e-05, "loss": 0.7956, "step": 13627 }, { "epoch": 0.9480677588785696, "grad_norm": 1.546875, "learning_rate": 1.4095300828151358e-05, "loss": 0.7781, "step": 13628 }, { "epoch": 0.9481373265157049, "grad_norm": 1.4765625, "learning_rate": 1.4057625873694191e-05, "loss": 0.8593, "step": 13629 }, { "epoch": 0.9482068941528401, "grad_norm": 1.0703125, "learning_rate": 1.4020000980421554e-05, "loss": 0.7329, "step": 13630 }, { "epoch": 0.9482764617899753, "grad_norm": 1.21875, "learning_rate": 1.3982426150243366e-05, "loss": 0.8627, "step": 13631 }, { "epoch": 0.9483460294271105, "grad_norm": 1.2421875, "learning_rate": 1.3944901385067765e-05, "loss": 0.7527, "step": 13632 }, { "epoch": 0.9484155970642457, "grad_norm": 1.03125, "learning_rate": 1.3907426686800007e-05, "loss": 0.9456, "step": 13633 }, { "epoch": 0.9484851647013809, "grad_norm": 1.0546875, "learning_rate": 1.3870002057342679e-05, "loss": 0.6164, "step": 13634 }, { "epoch": 0.9485547323385162, "grad_norm": 1.109375, "learning_rate": 1.3832627498596372e-05, "loss": 0.8265, "step": 13635 }, { "epoch": 0.9486242999756513, "grad_norm": 0.9453125, "learning_rate": 1.379530301245857e-05, "loss": 0.7207, "step": 13636 }, { "epoch": 0.9486938676127865, "grad_norm": 1.4765625, "learning_rate": 1.3758028600824313e-05, "loss": 0.9727, "step": 13637 }, { "epoch": 0.9487634352499217, "grad_norm": 1.3984375, "learning_rate": 1.3720804265586417e-05, "loss": 0.8802, "step": 13638 }, { "epoch": 0.948833002887057, "grad_norm": 1.7109375, "learning_rate": 1.3683630008634817e-05, "loss": 1.1856, "step": 13639 }, { "epoch": 0.9489025705241921, "grad_norm": 1.1953125, "learning_rate": 1.3646505831857115e-05, "loss": 0.7854, "step": 13640 }, { "epoch": 0.9489721381613273, "grad_norm": 0.921875, "learning_rate": 1.3609431737138356e-05, "loss": 0.7218, "step": 13641 }, { "epoch": 0.9490417057984626, "grad_norm": 1.2734375, "learning_rate": 1.3572407726360703e-05, "loss": 0.9039, "step": 13642 }, { "epoch": 0.9491112734355978, "grad_norm": 1.0703125, "learning_rate": 1.3535433801404317e-05, "loss": 0.5143, "step": 13643 }, { "epoch": 0.9491808410727329, "grad_norm": 1.203125, "learning_rate": 1.3498509964146366e-05, "loss": 0.6741, "step": 13644 }, { "epoch": 0.9492504087098682, "grad_norm": 1.1328125, "learning_rate": 1.3461636216461904e-05, "loss": 0.7088, "step": 13645 }, { "epoch": 0.9493199763470034, "grad_norm": 1.265625, "learning_rate": 1.3424812560222987e-05, "loss": 0.877, "step": 13646 }, { "epoch": 0.9493895439841386, "grad_norm": 1.1796875, "learning_rate": 1.3388038997299235e-05, "loss": 0.8244, "step": 13647 }, { "epoch": 0.9494591116212738, "grad_norm": 1.1875, "learning_rate": 1.335131552955815e-05, "loss": 0.7707, "step": 13648 }, { "epoch": 0.949528679258409, "grad_norm": 0.9609375, "learning_rate": 1.3314642158864132e-05, "loss": 0.7041, "step": 13649 }, { "epoch": 0.9495982468955442, "grad_norm": 1.0859375, "learning_rate": 1.3278018887079247e-05, "loss": 0.751, "step": 13650 }, { "epoch": 0.9496678145326793, "grad_norm": 1.328125, "learning_rate": 1.3241445716063227e-05, "loss": 0.8735, "step": 13651 }, { "epoch": 0.9497373821698146, "grad_norm": 0.8828125, "learning_rate": 1.3204922647672813e-05, "loss": 0.6172, "step": 13652 }, { "epoch": 0.9498069498069498, "grad_norm": 1.1953125, "learning_rate": 1.316844968376274e-05, "loss": 0.9435, "step": 13653 }, { "epoch": 0.949876517444085, "grad_norm": 0.9296875, "learning_rate": 1.3132026826184751e-05, "loss": 0.6585, "step": 13654 }, { "epoch": 0.9499460850812202, "grad_norm": 1.375, "learning_rate": 1.3095654076788254e-05, "loss": 0.9507, "step": 13655 }, { "epoch": 0.9500156527183554, "grad_norm": 1.109375, "learning_rate": 1.3059331437420108e-05, "loss": 0.8002, "step": 13656 }, { "epoch": 0.9500852203554906, "grad_norm": 1.390625, "learning_rate": 1.30230589099245e-05, "loss": 0.8445, "step": 13657 }, { "epoch": 0.9501547879926259, "grad_norm": 1.171875, "learning_rate": 1.2986836496143295e-05, "loss": 0.8644, "step": 13658 }, { "epoch": 0.950224355629761, "grad_norm": 1.6015625, "learning_rate": 1.2950664197915573e-05, "loss": 0.6882, "step": 13659 }, { "epoch": 0.9502939232668962, "grad_norm": 0.96875, "learning_rate": 1.291454201707809e-05, "loss": 0.7682, "step": 13660 }, { "epoch": 0.9503634909040315, "grad_norm": 1.21875, "learning_rate": 1.2878469955464712e-05, "loss": 0.8046, "step": 13661 }, { "epoch": 0.9504330585411667, "grad_norm": 1.28125, "learning_rate": 1.2842448014907304e-05, "loss": 0.7754, "step": 13662 }, { "epoch": 0.9505026261783018, "grad_norm": 1.3359375, "learning_rate": 1.280647619723474e-05, "loss": 0.7069, "step": 13663 }, { "epoch": 0.950572193815437, "grad_norm": 1.1640625, "learning_rate": 1.2770554504273557e-05, "loss": 0.9939, "step": 13664 }, { "epoch": 0.9506417614525723, "grad_norm": 0.8671875, "learning_rate": 1.273468293784752e-05, "loss": 0.7638, "step": 13665 }, { "epoch": 0.9507113290897075, "grad_norm": 1.1484375, "learning_rate": 1.2698861499778058e-05, "loss": 0.8706, "step": 13666 }, { "epoch": 0.9507808967268426, "grad_norm": 1.2109375, "learning_rate": 1.2663090191884164e-05, "loss": 0.8316, "step": 13667 }, { "epoch": 0.9508504643639779, "grad_norm": 1.203125, "learning_rate": 1.2627369015981827e-05, "loss": 0.9694, "step": 13668 }, { "epoch": 0.9509200320011131, "grad_norm": 1.4140625, "learning_rate": 1.2591697973885152e-05, "loss": 0.9504, "step": 13669 }, { "epoch": 0.9509895996382483, "grad_norm": 1.265625, "learning_rate": 1.2556077067405026e-05, "loss": 0.8707, "step": 13670 }, { "epoch": 0.9510591672753835, "grad_norm": 1.375, "learning_rate": 1.2520506298350332e-05, "loss": 0.8861, "step": 13671 }, { "epoch": 0.9511287349125187, "grad_norm": 0.9296875, "learning_rate": 1.248498566852696e-05, "loss": 0.5927, "step": 13672 }, { "epoch": 0.9511983025496539, "grad_norm": 0.8515625, "learning_rate": 1.244951517973858e-05, "loss": 0.6532, "step": 13673 }, { "epoch": 0.9512678701867892, "grad_norm": 1.15625, "learning_rate": 1.2414094833786194e-05, "loss": 0.7398, "step": 13674 }, { "epoch": 0.9513374378239243, "grad_norm": 1.171875, "learning_rate": 1.2378724632468253e-05, "loss": 0.9259, "step": 13675 }, { "epoch": 0.9514070054610595, "grad_norm": 1.0859375, "learning_rate": 1.2343404577580764e-05, "loss": 0.9632, "step": 13676 }, { "epoch": 0.9514765730981947, "grad_norm": 1.03125, "learning_rate": 1.230813467091707e-05, "loss": 0.6046, "step": 13677 }, { "epoch": 0.95154614073533, "grad_norm": 1.046875, "learning_rate": 1.2272914914267963e-05, "loss": 0.6549, "step": 13678 }, { "epoch": 0.9516157083724651, "grad_norm": 1.1328125, "learning_rate": 1.2237745309421567e-05, "loss": 0.6816, "step": 13679 }, { "epoch": 0.9516852760096003, "grad_norm": 1.8046875, "learning_rate": 1.2202625858163896e-05, "loss": 0.8623, "step": 13680 }, { "epoch": 0.9517548436467356, "grad_norm": 1.09375, "learning_rate": 1.216755656227797e-05, "loss": 0.7623, "step": 13681 }, { "epoch": 0.9518244112838707, "grad_norm": 1.546875, "learning_rate": 1.2132537423544476e-05, "loss": 1.0027, "step": 13682 }, { "epoch": 0.9518939789210059, "grad_norm": 1.203125, "learning_rate": 1.2097568443741547e-05, "loss": 0.8778, "step": 13683 }, { "epoch": 0.9519635465581412, "grad_norm": 0.93359375, "learning_rate": 1.206264962464465e-05, "loss": 0.8399, "step": 13684 }, { "epoch": 0.9520331141952764, "grad_norm": 1.0546875, "learning_rate": 1.2027780968026925e-05, "loss": 0.8131, "step": 13685 }, { "epoch": 0.9521026818324115, "grad_norm": 1.1953125, "learning_rate": 1.199296247565862e-05, "loss": 0.719, "step": 13686 }, { "epoch": 0.9521722494695468, "grad_norm": 0.890625, "learning_rate": 1.1958194149307767e-05, "loss": 0.7673, "step": 13687 }, { "epoch": 0.952241817106682, "grad_norm": 0.9609375, "learning_rate": 1.1923475990739729e-05, "loss": 0.5885, "step": 13688 }, { "epoch": 0.9523113847438172, "grad_norm": 1.3203125, "learning_rate": 1.1888808001717321e-05, "loss": 0.7645, "step": 13689 }, { "epoch": 0.9523809523809523, "grad_norm": 0.96484375, "learning_rate": 1.1854190184000801e-05, "loss": 0.6647, "step": 13690 }, { "epoch": 0.9524505200180876, "grad_norm": 1.625, "learning_rate": 1.1819622539347985e-05, "loss": 0.9428, "step": 13691 }, { "epoch": 0.9525200876552228, "grad_norm": 1.0, "learning_rate": 1.1785105069513802e-05, "loss": 0.7621, "step": 13692 }, { "epoch": 0.952589655292358, "grad_norm": 0.81640625, "learning_rate": 1.1750637776250961e-05, "loss": 0.439, "step": 13693 }, { "epoch": 0.9526592229294932, "grad_norm": 1.2578125, "learning_rate": 1.171622066130973e-05, "loss": 0.8382, "step": 13694 }, { "epoch": 0.9527287905666284, "grad_norm": 1.1484375, "learning_rate": 1.1681853726437376e-05, "loss": 0.6706, "step": 13695 }, { "epoch": 0.9527983582037636, "grad_norm": 1.1875, "learning_rate": 1.1647536973379058e-05, "loss": 0.7817, "step": 13696 }, { "epoch": 0.9528679258408989, "grad_norm": 1.46875, "learning_rate": 1.1613270403877163e-05, "loss": 1.0401, "step": 13697 }, { "epoch": 0.952937493478034, "grad_norm": 1.109375, "learning_rate": 1.157905401967152e-05, "loss": 0.9601, "step": 13698 }, { "epoch": 0.9530070611151692, "grad_norm": 1.046875, "learning_rate": 1.1544887822499517e-05, "loss": 0.8719, "step": 13699 }, { "epoch": 0.9530766287523045, "grad_norm": 1.1171875, "learning_rate": 1.1510771814095989e-05, "loss": 0.6628, "step": 13700 }, { "epoch": 0.9531461963894396, "grad_norm": 0.95703125, "learning_rate": 1.1476705996192993e-05, "loss": 0.7796, "step": 13701 }, { "epoch": 0.9532157640265748, "grad_norm": 1.2578125, "learning_rate": 1.144269037052037e-05, "loss": 0.9382, "step": 13702 }, { "epoch": 0.95328533166371, "grad_norm": 1.046875, "learning_rate": 1.1408724938805293e-05, "loss": 0.927, "step": 13703 }, { "epoch": 0.9533548993008453, "grad_norm": 0.8671875, "learning_rate": 1.137480970277227e-05, "loss": 0.7409, "step": 13704 }, { "epoch": 0.9534244669379804, "grad_norm": 0.8515625, "learning_rate": 1.1340944664143371e-05, "loss": 0.5701, "step": 13705 }, { "epoch": 0.9534940345751156, "grad_norm": 1.234375, "learning_rate": 1.1307129824638108e-05, "loss": 0.8807, "step": 13706 }, { "epoch": 0.9535636022122509, "grad_norm": 0.8671875, "learning_rate": 1.1273365185973328e-05, "loss": 0.7222, "step": 13707 }, { "epoch": 0.9536331698493861, "grad_norm": 1.0078125, "learning_rate": 1.1239650749863662e-05, "loss": 0.7902, "step": 13708 }, { "epoch": 0.9537027374865212, "grad_norm": 1.1328125, "learning_rate": 1.1205986518020738e-05, "loss": 0.8234, "step": 13709 }, { "epoch": 0.9537723051236565, "grad_norm": 1.2109375, "learning_rate": 1.1172372492153859e-05, "loss": 0.7902, "step": 13710 }, { "epoch": 0.9538418727607917, "grad_norm": 0.8671875, "learning_rate": 1.113880867396988e-05, "loss": 0.6262, "step": 13711 }, { "epoch": 0.9539114403979269, "grad_norm": 1.1171875, "learning_rate": 1.1105295065172993e-05, "loss": 0.7055, "step": 13712 }, { "epoch": 0.9539810080350621, "grad_norm": 1.0859375, "learning_rate": 1.1071831667464838e-05, "loss": 0.7361, "step": 13713 }, { "epoch": 0.9540505756721973, "grad_norm": 1.421875, "learning_rate": 1.1038418482544387e-05, "loss": 0.9626, "step": 13714 }, { "epoch": 0.9541201433093325, "grad_norm": 1.2890625, "learning_rate": 1.1005055512108508e-05, "loss": 1.0645, "step": 13715 }, { "epoch": 0.9541897109464677, "grad_norm": 1.390625, "learning_rate": 1.0971742757850844e-05, "loss": 0.8561, "step": 13716 }, { "epoch": 0.9542592785836029, "grad_norm": 1.0859375, "learning_rate": 1.0938480221463155e-05, "loss": 0.8213, "step": 13717 }, { "epoch": 0.9543288462207381, "grad_norm": 1.078125, "learning_rate": 1.0905267904633975e-05, "loss": 0.7922, "step": 13718 }, { "epoch": 0.9543984138578733, "grad_norm": 1.03125, "learning_rate": 1.087210580905007e-05, "loss": 0.7726, "step": 13719 }, { "epoch": 0.9544679814950086, "grad_norm": 1.078125, "learning_rate": 1.083899393639498e-05, "loss": 0.6426, "step": 13720 }, { "epoch": 0.9545375491321437, "grad_norm": 1.59375, "learning_rate": 1.0805932288350029e-05, "loss": 0.9605, "step": 13721 }, { "epoch": 0.9546071167692789, "grad_norm": 1.578125, "learning_rate": 1.0772920866593983e-05, "loss": 0.7406, "step": 13722 }, { "epoch": 0.9546766844064142, "grad_norm": 1.4296875, "learning_rate": 1.0739959672803057e-05, "loss": 1.0407, "step": 13723 }, { "epoch": 0.9547462520435493, "grad_norm": 1.015625, "learning_rate": 1.070704870865058e-05, "loss": 0.8359, "step": 13724 }, { "epoch": 0.9548158196806845, "grad_norm": 1.1328125, "learning_rate": 1.0674187975807659e-05, "loss": 0.8006, "step": 13725 }, { "epoch": 0.9548853873178198, "grad_norm": 1.4140625, "learning_rate": 1.064137747594307e-05, "loss": 1.2597, "step": 13726 }, { "epoch": 0.954954954954955, "grad_norm": 1.015625, "learning_rate": 1.0608617210722594e-05, "loss": 0.8139, "step": 13727 }, { "epoch": 0.9550245225920901, "grad_norm": 1.0078125, "learning_rate": 1.0575907181809563e-05, "loss": 0.667, "step": 13728 }, { "epoch": 0.9550940902292253, "grad_norm": 1.09375, "learning_rate": 1.0543247390864984e-05, "loss": 0.7991, "step": 13729 }, { "epoch": 0.9551636578663606, "grad_norm": 1.09375, "learning_rate": 1.0510637839546977e-05, "loss": 0.9141, "step": 13730 }, { "epoch": 0.9552332255034958, "grad_norm": 1.265625, "learning_rate": 1.0478078529511436e-05, "loss": 0.9213, "step": 13731 }, { "epoch": 0.9553027931406309, "grad_norm": 1.1171875, "learning_rate": 1.0445569462411487e-05, "loss": 0.9326, "step": 13732 }, { "epoch": 0.9553723607777662, "grad_norm": 1.109375, "learning_rate": 1.0413110639897916e-05, "loss": 0.8153, "step": 13733 }, { "epoch": 0.9554419284149014, "grad_norm": 1.1875, "learning_rate": 1.038070206361852e-05, "loss": 0.7108, "step": 13734 }, { "epoch": 0.9555114960520366, "grad_norm": 0.8125, "learning_rate": 1.034834373521909e-05, "loss": 0.5579, "step": 13735 }, { "epoch": 0.9555810636891718, "grad_norm": 0.9453125, "learning_rate": 1.0316035656342537e-05, "loss": 0.8924, "step": 13736 }, { "epoch": 0.955650631326307, "grad_norm": 1.3125, "learning_rate": 1.0283777828629437e-05, "loss": 0.8648, "step": 13737 }, { "epoch": 0.9557201989634422, "grad_norm": 0.8515625, "learning_rate": 1.0251570253717369e-05, "loss": 0.6623, "step": 13738 }, { "epoch": 0.9557897666005775, "grad_norm": 0.90234375, "learning_rate": 1.0219412933241911e-05, "loss": 0.5868, "step": 13739 }, { "epoch": 0.9558593342377126, "grad_norm": 1.0546875, "learning_rate": 1.0187305868835872e-05, "loss": 0.7931, "step": 13740 }, { "epoch": 0.9559289018748478, "grad_norm": 1.046875, "learning_rate": 1.015524906212939e-05, "loss": 0.8052, "step": 13741 }, { "epoch": 0.955998469511983, "grad_norm": 1.0078125, "learning_rate": 1.0123242514750163e-05, "loss": 0.7964, "step": 13742 }, { "epoch": 0.9560680371491183, "grad_norm": 1.1484375, "learning_rate": 1.0091286228323338e-05, "loss": 0.9343, "step": 13743 }, { "epoch": 0.9561376047862534, "grad_norm": 1.09375, "learning_rate": 1.0059380204471503e-05, "loss": 0.816, "step": 13744 }, { "epoch": 0.9562071724233886, "grad_norm": 1.1953125, "learning_rate": 1.0027524444814694e-05, "loss": 0.917, "step": 13745 }, { "epoch": 0.9562767400605239, "grad_norm": 1.015625, "learning_rate": 9.995718950970289e-06, "loss": 0.5941, "step": 13746 }, { "epoch": 0.956346307697659, "grad_norm": 1.265625, "learning_rate": 9.963963724553327e-06, "loss": 0.8269, "step": 13747 }, { "epoch": 0.9564158753347942, "grad_norm": 2.015625, "learning_rate": 9.932258767176072e-06, "loss": 0.8375, "step": 13748 }, { "epoch": 0.9564854429719295, "grad_norm": 0.99609375, "learning_rate": 9.900604080448461e-06, "loss": 0.6939, "step": 13749 }, { "epoch": 0.9565550106090647, "grad_norm": 1.328125, "learning_rate": 9.868999665977763e-06, "loss": 0.8623, "step": 13750 }, { "epoch": 0.9566245782461998, "grad_norm": 1.0703125, "learning_rate": 9.837445525368582e-06, "loss": 1.0773, "step": 13751 }, { "epoch": 0.9566941458833351, "grad_norm": 1.09375, "learning_rate": 9.805941660223083e-06, "loss": 0.8229, "step": 13752 }, { "epoch": 0.9567637135204703, "grad_norm": 1.078125, "learning_rate": 9.774488072140874e-06, "loss": 0.7507, "step": 13753 }, { "epoch": 0.9568332811576055, "grad_norm": 1.4765625, "learning_rate": 9.743084762719235e-06, "loss": 1.0015, "step": 13754 }, { "epoch": 0.9569028487947406, "grad_norm": 1.3515625, "learning_rate": 9.711731733552442e-06, "loss": 0.9465, "step": 13755 }, { "epoch": 0.9569724164318759, "grad_norm": 1.1875, "learning_rate": 9.680428986232337e-06, "loss": 0.9657, "step": 13756 }, { "epoch": 0.9570419840690111, "grad_norm": 1.0625, "learning_rate": 9.649176522348535e-06, "loss": 0.9344, "step": 13757 }, { "epoch": 0.9571115517061463, "grad_norm": 1.1953125, "learning_rate": 9.617974343487878e-06, "loss": 0.7727, "step": 13758 }, { "epoch": 0.9571811193432815, "grad_norm": 1.2265625, "learning_rate": 9.586822451234546e-06, "loss": 0.7814, "step": 13759 }, { "epoch": 0.9572506869804167, "grad_norm": 1.0625, "learning_rate": 9.555720847170379e-06, "loss": 0.5553, "step": 13760 }, { "epoch": 0.9573202546175519, "grad_norm": 1.46875, "learning_rate": 9.524669532874452e-06, "loss": 0.8443, "step": 13761 }, { "epoch": 0.9573898222546872, "grad_norm": 1.109375, "learning_rate": 9.493668509923392e-06, "loss": 1.0941, "step": 13762 }, { "epoch": 0.9574593898918223, "grad_norm": 0.890625, "learning_rate": 9.462717779891273e-06, "loss": 0.6499, "step": 13763 }, { "epoch": 0.9575289575289575, "grad_norm": 1.1015625, "learning_rate": 9.431817344349835e-06, "loss": 0.7202, "step": 13764 }, { "epoch": 0.9575985251660928, "grad_norm": 1.015625, "learning_rate": 9.400967204867827e-06, "loss": 0.7296, "step": 13765 }, { "epoch": 0.957668092803228, "grad_norm": 0.8984375, "learning_rate": 9.370167363011662e-06, "loss": 0.7224, "step": 13766 }, { "epoch": 0.9577376604403631, "grad_norm": 1.4921875, "learning_rate": 9.339417820345198e-06, "loss": 0.8337, "step": 13767 }, { "epoch": 0.9578072280774983, "grad_norm": 1.1875, "learning_rate": 9.308718578429964e-06, "loss": 1.0526, "step": 13768 }, { "epoch": 0.9578767957146336, "grad_norm": 1.078125, "learning_rate": 9.278069638824494e-06, "loss": 0.7711, "step": 13769 }, { "epoch": 0.9579463633517687, "grad_norm": 1.015625, "learning_rate": 9.247471003084984e-06, "loss": 0.5816, "step": 13770 }, { "epoch": 0.9580159309889039, "grad_norm": 1.078125, "learning_rate": 9.216922672765082e-06, "loss": 0.752, "step": 13771 }, { "epoch": 0.9580854986260392, "grad_norm": 1.0859375, "learning_rate": 9.186424649416103e-06, "loss": 0.8951, "step": 13772 }, { "epoch": 0.9581550662631744, "grad_norm": 1.109375, "learning_rate": 9.155976934586251e-06, "loss": 0.7709, "step": 13773 }, { "epoch": 0.9582246339003095, "grad_norm": 1.0390625, "learning_rate": 9.125579529821736e-06, "loss": 0.7626, "step": 13774 }, { "epoch": 0.9582942015374448, "grad_norm": 1.390625, "learning_rate": 9.09523243666599e-06, "loss": 0.7321, "step": 13775 }, { "epoch": 0.95836376917458, "grad_norm": 1.0625, "learning_rate": 9.064935656659668e-06, "loss": 0.763, "step": 13776 }, { "epoch": 0.9584333368117152, "grad_norm": 1.0703125, "learning_rate": 9.034689191341206e-06, "loss": 0.59, "step": 13777 }, { "epoch": 0.9585029044488504, "grad_norm": 1.03125, "learning_rate": 9.004493042246487e-06, "loss": 0.7677, "step": 13778 }, { "epoch": 0.9585724720859856, "grad_norm": 1.09375, "learning_rate": 8.974347210908729e-06, "loss": 0.9117, "step": 13779 }, { "epoch": 0.9586420397231208, "grad_norm": 1.2109375, "learning_rate": 8.944251698858263e-06, "loss": 0.752, "step": 13780 }, { "epoch": 0.958711607360256, "grad_norm": 1.5703125, "learning_rate": 8.914206507623535e-06, "loss": 0.6105, "step": 13781 }, { "epoch": 0.9587811749973912, "grad_norm": 1.203125, "learning_rate": 8.884211638729877e-06, "loss": 0.7613, "step": 13782 }, { "epoch": 0.9588507426345264, "grad_norm": 1.1484375, "learning_rate": 8.854267093700518e-06, "loss": 0.9493, "step": 13783 }, { "epoch": 0.9589203102716616, "grad_norm": 1.1875, "learning_rate": 8.824372874055575e-06, "loss": 0.7822, "step": 13784 }, { "epoch": 0.9589898779087969, "grad_norm": 1.34375, "learning_rate": 8.794528981313055e-06, "loss": 1.0353, "step": 13785 }, { "epoch": 0.959059445545932, "grad_norm": 1.3515625, "learning_rate": 8.764735416988413e-06, "loss": 1.0167, "step": 13786 }, { "epoch": 0.9591290131830672, "grad_norm": 1.109375, "learning_rate": 8.734992182594325e-06, "loss": 0.9073, "step": 13787 }, { "epoch": 0.9591985808202025, "grad_norm": 1.328125, "learning_rate": 8.705299279640921e-06, "loss": 0.8753, "step": 13788 }, { "epoch": 0.9592681484573377, "grad_norm": 1.4453125, "learning_rate": 8.675656709635882e-06, "loss": 0.8206, "step": 13789 }, { "epoch": 0.9593377160944728, "grad_norm": 1.3671875, "learning_rate": 8.646064474084447e-06, "loss": 1.1265, "step": 13790 }, { "epoch": 0.9594072837316081, "grad_norm": 1.0390625, "learning_rate": 8.616522574489083e-06, "loss": 0.7658, "step": 13791 }, { "epoch": 0.9594768513687433, "grad_norm": 1.1875, "learning_rate": 8.5870310123497e-06, "loss": 0.8042, "step": 13792 }, { "epoch": 0.9595464190058784, "grad_norm": 0.9296875, "learning_rate": 8.557589789163767e-06, "loss": 0.6322, "step": 13793 }, { "epoch": 0.9596159866430136, "grad_norm": 0.93359375, "learning_rate": 8.528198906426198e-06, "loss": 0.9129, "step": 13794 }, { "epoch": 0.9596855542801489, "grad_norm": 0.87109375, "learning_rate": 8.498858365629359e-06, "loss": 0.5803, "step": 13795 }, { "epoch": 0.9597551219172841, "grad_norm": 0.98828125, "learning_rate": 8.469568168262943e-06, "loss": 0.7071, "step": 13796 }, { "epoch": 0.9598246895544192, "grad_norm": 1.0390625, "learning_rate": 8.440328315814094e-06, "loss": 0.928, "step": 13797 }, { "epoch": 0.9598942571915545, "grad_norm": 0.94921875, "learning_rate": 8.411138809767626e-06, "loss": 0.55, "step": 13798 }, { "epoch": 0.9599638248286897, "grad_norm": 1.0078125, "learning_rate": 8.381999651605466e-06, "loss": 0.8848, "step": 13799 }, { "epoch": 0.9600333924658249, "grad_norm": 1.171875, "learning_rate": 8.352910842807315e-06, "loss": 0.9132, "step": 13800 }, { "epoch": 0.9601029601029601, "grad_norm": 1.2109375, "learning_rate": 8.323872384850106e-06, "loss": 0.7374, "step": 13801 }, { "epoch": 0.9601725277400953, "grad_norm": 1.5234375, "learning_rate": 8.294884279208104e-06, "loss": 1.0615, "step": 13802 }, { "epoch": 0.9602420953772305, "grad_norm": 1.15625, "learning_rate": 8.265946527353462e-06, "loss": 0.7314, "step": 13803 }, { "epoch": 0.9603116630143658, "grad_norm": 1.203125, "learning_rate": 8.237059130755232e-06, "loss": 0.5477, "step": 13804 }, { "epoch": 0.9603812306515009, "grad_norm": 1.1796875, "learning_rate": 8.208222090880346e-06, "loss": 0.8335, "step": 13805 }, { "epoch": 0.9604507982886361, "grad_norm": 1.4609375, "learning_rate": 8.17943540919297e-06, "loss": 0.7856, "step": 13806 }, { "epoch": 0.9605203659257713, "grad_norm": 0.84765625, "learning_rate": 8.150699087154712e-06, "loss": 0.7394, "step": 13807 }, { "epoch": 0.9605899335629066, "grad_norm": 1.1640625, "learning_rate": 8.122013126224514e-06, "loss": 0.9102, "step": 13808 }, { "epoch": 0.9606595012000417, "grad_norm": 1.28125, "learning_rate": 8.093377527859213e-06, "loss": 0.7345, "step": 13809 }, { "epoch": 0.9607290688371769, "grad_norm": 1.21875, "learning_rate": 8.064792293512535e-06, "loss": 0.8206, "step": 13810 }, { "epoch": 0.9607986364743122, "grad_norm": 1.1796875, "learning_rate": 8.036257424636096e-06, "loss": 0.9497, "step": 13811 }, { "epoch": 0.9608682041114474, "grad_norm": 1.0625, "learning_rate": 8.007772922678514e-06, "loss": 0.6294, "step": 13812 }, { "epoch": 0.9609377717485825, "grad_norm": 1.0546875, "learning_rate": 7.979338789086299e-06, "loss": 0.5506, "step": 13813 }, { "epoch": 0.9610073393857178, "grad_norm": 0.828125, "learning_rate": 7.950955025303076e-06, "loss": 0.7495, "step": 13814 }, { "epoch": 0.961076907022853, "grad_norm": 1.125, "learning_rate": 7.922621632770022e-06, "loss": 0.6539, "step": 13815 }, { "epoch": 0.9611464746599881, "grad_norm": 1.1796875, "learning_rate": 7.894338612925877e-06, "loss": 0.9119, "step": 13816 }, { "epoch": 0.9612160422971234, "grad_norm": 1.0546875, "learning_rate": 7.866105967206493e-06, "loss": 0.6867, "step": 13817 }, { "epoch": 0.9612856099342586, "grad_norm": 1.0390625, "learning_rate": 7.837923697045613e-06, "loss": 0.9962, "step": 13818 }, { "epoch": 0.9613551775713938, "grad_norm": 1.1640625, "learning_rate": 7.8097918038742e-06, "loss": 0.8745, "step": 13819 }, { "epoch": 0.9614247452085289, "grad_norm": 0.90625, "learning_rate": 7.781710289120447e-06, "loss": 0.9529, "step": 13820 }, { "epoch": 0.9614943128456642, "grad_norm": 1.21875, "learning_rate": 7.753679154210214e-06, "loss": 0.8506, "step": 13821 }, { "epoch": 0.9615638804827994, "grad_norm": 0.9765625, "learning_rate": 7.725698400567026e-06, "loss": 0.7563, "step": 13822 }, { "epoch": 0.9616334481199346, "grad_norm": 1.5234375, "learning_rate": 7.697768029611308e-06, "loss": 1.1066, "step": 13823 }, { "epoch": 0.9617030157570698, "grad_norm": 0.87890625, "learning_rate": 7.669888042761475e-06, "loss": 0.6832, "step": 13824 }, { "epoch": 0.961772583394205, "grad_norm": 1.390625, "learning_rate": 7.642058441432953e-06, "loss": 0.8851, "step": 13825 }, { "epoch": 0.9618421510313402, "grad_norm": 0.8984375, "learning_rate": 7.614279227038834e-06, "loss": 0.7575, "step": 13826 }, { "epoch": 0.9619117186684755, "grad_norm": 0.921875, "learning_rate": 7.5865504009895445e-06, "loss": 0.6526, "step": 13827 }, { "epoch": 0.9619812863056106, "grad_norm": 1.1640625, "learning_rate": 7.558871964693181e-06, "loss": 0.741, "step": 13828 }, { "epoch": 0.9620508539427458, "grad_norm": 1.1484375, "learning_rate": 7.531243919555064e-06, "loss": 0.8455, "step": 13829 }, { "epoch": 0.9621204215798811, "grad_norm": 1.5390625, "learning_rate": 7.50366626697796e-06, "loss": 0.8973, "step": 13830 }, { "epoch": 0.9621899892170163, "grad_norm": 0.8984375, "learning_rate": 7.4761390083619706e-06, "loss": 0.6072, "step": 13831 }, { "epoch": 0.9622595568541514, "grad_norm": 1.1796875, "learning_rate": 7.4486621451052e-06, "loss": 0.6326, "step": 13832 }, { "epoch": 0.9623291244912866, "grad_norm": 1.0625, "learning_rate": 7.421235678602423e-06, "loss": 0.722, "step": 13833 }, { "epoch": 0.9623986921284219, "grad_norm": 1.2890625, "learning_rate": 7.3938596102463005e-06, "loss": 0.7974, "step": 13834 }, { "epoch": 0.962468259765557, "grad_norm": 1.2890625, "learning_rate": 7.366533941426834e-06, "loss": 1.1735, "step": 13835 }, { "epoch": 0.9625378274026922, "grad_norm": 1.265625, "learning_rate": 7.339258673531579e-06, "loss": 0.6646, "step": 13836 }, { "epoch": 0.9626073950398275, "grad_norm": 0.97265625, "learning_rate": 7.3120338079454285e-06, "loss": 0.8732, "step": 13837 }, { "epoch": 0.9626769626769627, "grad_norm": 1.3515625, "learning_rate": 7.28485934605072e-06, "loss": 0.9918, "step": 13838 }, { "epoch": 0.9627465303140978, "grad_norm": 1.0234375, "learning_rate": 7.25773528922713e-06, "loss": 0.54, "step": 13839 }, { "epoch": 0.9628160979512331, "grad_norm": 1.140625, "learning_rate": 7.230661638851887e-06, "loss": 0.891, "step": 13840 }, { "epoch": 0.9628856655883683, "grad_norm": 1.515625, "learning_rate": 7.2036383962997835e-06, "loss": 0.645, "step": 13841 }, { "epoch": 0.9629552332255035, "grad_norm": 1.296875, "learning_rate": 7.176665562942941e-06, "loss": 0.7792, "step": 13842 }, { "epoch": 0.9630248008626388, "grad_norm": 0.94921875, "learning_rate": 7.149743140150711e-06, "loss": 0.7466, "step": 13843 }, { "epoch": 0.9630943684997739, "grad_norm": 1.2421875, "learning_rate": 7.12287112929022e-06, "loss": 0.9052, "step": 13844 }, { "epoch": 0.9631639361369091, "grad_norm": 1.3671875, "learning_rate": 7.096049531725823e-06, "loss": 0.9388, "step": 13845 }, { "epoch": 0.9632335037740443, "grad_norm": 1.3046875, "learning_rate": 7.069278348819541e-06, "loss": 0.8991, "step": 13846 }, { "epoch": 0.9633030714111795, "grad_norm": 1.1875, "learning_rate": 7.042557581930508e-06, "loss": 0.9729, "step": 13847 }, { "epoch": 0.9633726390483147, "grad_norm": 0.8828125, "learning_rate": 7.015887232415419e-06, "loss": 0.775, "step": 13848 }, { "epoch": 0.9634422066854499, "grad_norm": 0.8828125, "learning_rate": 6.989267301628632e-06, "loss": 0.5252, "step": 13849 }, { "epoch": 0.9635117743225852, "grad_norm": 0.953125, "learning_rate": 6.9626977909217346e-06, "loss": 0.7728, "step": 13850 }, { "epoch": 0.9635813419597203, "grad_norm": 1.15625, "learning_rate": 6.936178701643758e-06, "loss": 0.7419, "step": 13851 }, { "epoch": 0.9636509095968555, "grad_norm": 1.1796875, "learning_rate": 6.909710035141292e-06, "loss": 1.1057, "step": 13852 }, { "epoch": 0.9637204772339908, "grad_norm": 1.296875, "learning_rate": 6.883291792758151e-06, "loss": 0.8733, "step": 13853 }, { "epoch": 0.963790044871126, "grad_norm": 0.9765625, "learning_rate": 6.856923975835705e-06, "loss": 0.9535, "step": 13854 }, { "epoch": 0.9638596125082611, "grad_norm": 1.3203125, "learning_rate": 6.830606585712884e-06, "loss": 0.9676, "step": 13855 }, { "epoch": 0.9639291801453964, "grad_norm": 1.0859375, "learning_rate": 6.804339623725842e-06, "loss": 0.7737, "step": 13856 }, { "epoch": 0.9639987477825316, "grad_norm": 1.0546875, "learning_rate": 6.77812309120851e-06, "loss": 1.0122, "step": 13857 }, { "epoch": 0.9640683154196668, "grad_norm": 1.109375, "learning_rate": 6.751956989491825e-06, "loss": 0.8278, "step": 13858 }, { "epoch": 0.9641378830568019, "grad_norm": 1.078125, "learning_rate": 6.72584131990428e-06, "loss": 0.5063, "step": 13859 }, { "epoch": 0.9642074506939372, "grad_norm": 1.1953125, "learning_rate": 6.699776083772257e-06, "loss": 0.7343, "step": 13860 }, { "epoch": 0.9642770183310724, "grad_norm": 1.5234375, "learning_rate": 6.673761282418922e-06, "loss": 0.8882, "step": 13861 }, { "epoch": 0.9643465859682075, "grad_norm": 1.09375, "learning_rate": 6.647796917165216e-06, "loss": 0.6325, "step": 13862 }, { "epoch": 0.9644161536053428, "grad_norm": 1.0546875, "learning_rate": 6.621882989329531e-06, "loss": 0.6688, "step": 13863 }, { "epoch": 0.964485721242478, "grad_norm": 1.15625, "learning_rate": 6.59601950022759e-06, "loss": 0.8961, "step": 13864 }, { "epoch": 0.9645552888796132, "grad_norm": 1.109375, "learning_rate": 6.570206451172789e-06, "loss": 1.0174, "step": 13865 }, { "epoch": 0.9646248565167485, "grad_norm": 1.28125, "learning_rate": 6.544443843475523e-06, "loss": 0.8975, "step": 13866 }, { "epoch": 0.9646944241538836, "grad_norm": 0.98046875, "learning_rate": 6.518731678443968e-06, "loss": 0.7626, "step": 13867 }, { "epoch": 0.9647639917910188, "grad_norm": 1.234375, "learning_rate": 6.493069957383857e-06, "loss": 0.808, "step": 13868 }, { "epoch": 0.9648335594281541, "grad_norm": 0.8125, "learning_rate": 6.467458681597926e-06, "loss": 0.6525, "step": 13869 }, { "epoch": 0.9649031270652892, "grad_norm": 1.15625, "learning_rate": 6.441897852386691e-06, "loss": 1.021, "step": 13870 }, { "epoch": 0.9649726947024244, "grad_norm": 1.0703125, "learning_rate": 6.416387471047891e-06, "loss": 0.7757, "step": 13871 }, { "epoch": 0.9650422623395596, "grad_norm": 1.0859375, "learning_rate": 6.390927538877045e-06, "loss": 0.7479, "step": 13872 }, { "epoch": 0.9651118299766949, "grad_norm": 1.03125, "learning_rate": 6.365518057166564e-06, "loss": 0.7607, "step": 13873 }, { "epoch": 0.96518139761383, "grad_norm": 1.0078125, "learning_rate": 6.340159027206971e-06, "loss": 0.9347, "step": 13874 }, { "epoch": 0.9652509652509652, "grad_norm": 1.46875, "learning_rate": 6.3148504502855695e-06, "loss": 1.1005, "step": 13875 }, { "epoch": 0.9653205328881005, "grad_norm": 1.3515625, "learning_rate": 6.289592327687554e-06, "loss": 1.03, "step": 13876 }, { "epoch": 0.9653901005252357, "grad_norm": 1.125, "learning_rate": 6.264384660695343e-06, "loss": 0.6541, "step": 13877 }, { "epoch": 0.9654596681623708, "grad_norm": 1.1171875, "learning_rate": 6.239227450588914e-06, "loss": 1.0166, "step": 13878 }, { "epoch": 0.9655292357995061, "grad_norm": 0.9140625, "learning_rate": 6.214120698645575e-06, "loss": 0.7993, "step": 13879 }, { "epoch": 0.9655988034366413, "grad_norm": 1.2421875, "learning_rate": 6.189064406140199e-06, "loss": 0.723, "step": 13880 }, { "epoch": 0.9656683710737765, "grad_norm": 1.2421875, "learning_rate": 6.164058574344766e-06, "loss": 0.5604, "step": 13881 }, { "epoch": 0.9657379387109117, "grad_norm": 1.25, "learning_rate": 6.139103204529372e-06, "loss": 0.8618, "step": 13882 }, { "epoch": 0.9658075063480469, "grad_norm": 1.2109375, "learning_rate": 6.114198297960672e-06, "loss": 0.741, "step": 13883 }, { "epoch": 0.9658770739851821, "grad_norm": 1.078125, "learning_rate": 6.089343855903651e-06, "loss": 0.8718, "step": 13884 }, { "epoch": 0.9659466416223172, "grad_norm": 1.09375, "learning_rate": 6.064539879619968e-06, "loss": 0.5796, "step": 13885 }, { "epoch": 0.9660162092594525, "grad_norm": 1.0546875, "learning_rate": 6.03978637036906e-06, "loss": 0.8759, "step": 13886 }, { "epoch": 0.9660857768965877, "grad_norm": 0.9296875, "learning_rate": 6.015083329407922e-06, "loss": 0.5381, "step": 13887 }, { "epoch": 0.9661553445337229, "grad_norm": 1.1640625, "learning_rate": 5.990430757990773e-06, "loss": 1.0053, "step": 13888 }, { "epoch": 0.9662249121708582, "grad_norm": 1.359375, "learning_rate": 5.9658286573694984e-06, "loss": 1.0386, "step": 13889 }, { "epoch": 0.9662944798079933, "grad_norm": 1.4296875, "learning_rate": 5.941277028792991e-06, "loss": 1.0889, "step": 13890 }, { "epoch": 0.9663640474451285, "grad_norm": 1.328125, "learning_rate": 5.916775873508029e-06, "loss": 0.8672, "step": 13891 }, { "epoch": 0.9664336150822638, "grad_norm": 1.1640625, "learning_rate": 5.89232519275873e-06, "loss": 0.7844, "step": 13892 }, { "epoch": 0.966503182719399, "grad_norm": 1.53125, "learning_rate": 5.867924987786432e-06, "loss": 1.0494, "step": 13893 }, { "epoch": 0.9665727503565341, "grad_norm": 1.09375, "learning_rate": 5.843575259830036e-06, "loss": 0.5348, "step": 13894 }, { "epoch": 0.9666423179936694, "grad_norm": 0.93359375, "learning_rate": 5.819276010126107e-06, "loss": 0.7969, "step": 13895 }, { "epoch": 0.9667118856308046, "grad_norm": 1.3359375, "learning_rate": 5.795027239908213e-06, "loss": 0.6913, "step": 13896 }, { "epoch": 0.9667814532679397, "grad_norm": 1.0703125, "learning_rate": 5.7708289504077024e-06, "loss": 0.6975, "step": 13897 }, { "epoch": 0.9668510209050749, "grad_norm": 1.015625, "learning_rate": 5.746681142853149e-06, "loss": 0.6556, "step": 13898 }, { "epoch": 0.9669205885422102, "grad_norm": 0.94921875, "learning_rate": 5.722583818470795e-06, "loss": 0.7655, "step": 13899 }, { "epoch": 0.9669901561793454, "grad_norm": 0.94921875, "learning_rate": 5.698536978484104e-06, "loss": 0.6782, "step": 13900 }, { "epoch": 0.9670597238164805, "grad_norm": 1.265625, "learning_rate": 5.674540624113988e-06, "loss": 0.7805, "step": 13901 }, { "epoch": 0.9671292914536158, "grad_norm": 1.046875, "learning_rate": 5.650594756579031e-06, "loss": 0.7414, "step": 13902 }, { "epoch": 0.967198859090751, "grad_norm": 0.99609375, "learning_rate": 5.626699377094924e-06, "loss": 0.8086, "step": 13903 }, { "epoch": 0.9672684267278862, "grad_norm": 0.98828125, "learning_rate": 5.6028544868749194e-06, "loss": 0.7736, "step": 13904 }, { "epoch": 0.9673379943650214, "grad_norm": 1.1484375, "learning_rate": 5.579060087129939e-06, "loss": 0.6967, "step": 13905 }, { "epoch": 0.9674075620021566, "grad_norm": 1.109375, "learning_rate": 5.555316179068015e-06, "loss": 0.9138, "step": 13906 }, { "epoch": 0.9674771296392918, "grad_norm": 1.1796875, "learning_rate": 5.531622763894739e-06, "loss": 0.837, "step": 13907 }, { "epoch": 0.9675466972764271, "grad_norm": 0.90234375, "learning_rate": 5.507979842813149e-06, "loss": 0.7755, "step": 13908 }, { "epoch": 0.9676162649135622, "grad_norm": 1.1953125, "learning_rate": 5.48438741702384e-06, "loss": 0.7594, "step": 13909 }, { "epoch": 0.9676858325506974, "grad_norm": 1.0859375, "learning_rate": 5.46084548772452e-06, "loss": 0.816, "step": 13910 }, { "epoch": 0.9677554001878326, "grad_norm": 1.0390625, "learning_rate": 5.437354056110566e-06, "loss": 0.5594, "step": 13911 }, { "epoch": 0.9678249678249679, "grad_norm": 1.1015625, "learning_rate": 5.413913123374914e-06, "loss": 0.8616, "step": 13912 }, { "epoch": 0.967894535462103, "grad_norm": 1.1640625, "learning_rate": 5.390522690707611e-06, "loss": 0.7617, "step": 13913 }, { "epoch": 0.9679641030992382, "grad_norm": 1.3359375, "learning_rate": 5.367182759296374e-06, "loss": 0.8175, "step": 13914 }, { "epoch": 0.9680336707363735, "grad_norm": 1.1640625, "learning_rate": 5.343893330326255e-06, "loss": 0.7259, "step": 13915 }, { "epoch": 0.9681032383735086, "grad_norm": 0.9296875, "learning_rate": 5.320654404979863e-06, "loss": 0.6797, "step": 13916 }, { "epoch": 0.9681728060106438, "grad_norm": 1.4375, "learning_rate": 5.297465984437033e-06, "loss": 0.8994, "step": 13917 }, { "epoch": 0.9682423736477791, "grad_norm": 1.25, "learning_rate": 5.274328069875156e-06, "loss": 0.8834, "step": 13918 }, { "epoch": 0.9683119412849143, "grad_norm": 1.1875, "learning_rate": 5.251240662469181e-06, "loss": 0.7321, "step": 13919 }, { "epoch": 0.9683815089220494, "grad_norm": 1.0625, "learning_rate": 5.228203763391392e-06, "loss": 0.6997, "step": 13920 }, { "epoch": 0.9684510765591847, "grad_norm": 1.2734375, "learning_rate": 5.2052173738113e-06, "loss": 0.7915, "step": 13921 }, { "epoch": 0.9685206441963199, "grad_norm": 1.1484375, "learning_rate": 5.18228149489608e-06, "loss": 0.8359, "step": 13922 }, { "epoch": 0.9685902118334551, "grad_norm": 1.1796875, "learning_rate": 5.1593961278103566e-06, "loss": 0.82, "step": 13923 }, { "epoch": 0.9686597794705902, "grad_norm": 1.0859375, "learning_rate": 5.136561273716201e-06, "loss": 0.8778, "step": 13924 }, { "epoch": 0.9687293471077255, "grad_norm": 1.1640625, "learning_rate": 5.113776933772907e-06, "loss": 0.8501, "step": 13925 }, { "epoch": 0.9687989147448607, "grad_norm": 0.91015625, "learning_rate": 5.09104310913755e-06, "loss": 0.5983, "step": 13926 }, { "epoch": 0.9688684823819959, "grad_norm": 1.2890625, "learning_rate": 5.068359800964206e-06, "loss": 0.8232, "step": 13927 }, { "epoch": 0.9689380500191311, "grad_norm": 0.91015625, "learning_rate": 5.045727010404733e-06, "loss": 0.6038, "step": 13928 }, { "epoch": 0.9690076176562663, "grad_norm": 1.484375, "learning_rate": 5.023144738608321e-06, "loss": 0.6608, "step": 13929 }, { "epoch": 0.9690771852934015, "grad_norm": 1.0078125, "learning_rate": 5.000612986721498e-06, "loss": 0.6612, "step": 13930 }, { "epoch": 0.9691467529305368, "grad_norm": 1.0703125, "learning_rate": 4.9781317558884596e-06, "loss": 0.6919, "step": 13931 }, { "epoch": 0.9692163205676719, "grad_norm": 1.1484375, "learning_rate": 4.955701047250516e-06, "loss": 0.9562, "step": 13932 }, { "epoch": 0.9692858882048071, "grad_norm": 1.421875, "learning_rate": 4.933320861946866e-06, "loss": 1.0431, "step": 13933 }, { "epoch": 0.9693554558419424, "grad_norm": 0.9609375, "learning_rate": 4.91099120111349e-06, "loss": 0.7166, "step": 13934 }, { "epoch": 0.9694250234790776, "grad_norm": 1.2265625, "learning_rate": 4.888712065884482e-06, "loss": 1.1322, "step": 13935 }, { "epoch": 0.9694945911162127, "grad_norm": 0.97265625, "learning_rate": 4.866483457390825e-06, "loss": 0.738, "step": 13936 }, { "epoch": 0.9695641587533479, "grad_norm": 1.046875, "learning_rate": 4.844305376761393e-06, "loss": 0.931, "step": 13937 }, { "epoch": 0.9696337263904832, "grad_norm": 1.0390625, "learning_rate": 4.822177825122176e-06, "loss": 0.8767, "step": 13938 }, { "epoch": 0.9697032940276183, "grad_norm": 1.125, "learning_rate": 4.800100803596607e-06, "loss": 0.6092, "step": 13939 }, { "epoch": 0.9697728616647535, "grad_norm": 1.1484375, "learning_rate": 4.778074313305791e-06, "loss": 0.7926, "step": 13940 }, { "epoch": 0.9698424293018888, "grad_norm": 1.21875, "learning_rate": 4.756098355368055e-06, "loss": 0.9556, "step": 13941 }, { "epoch": 0.969911996939024, "grad_norm": 1.265625, "learning_rate": 4.734172930899283e-06, "loss": 0.7866, "step": 13942 }, { "epoch": 0.9699815645761591, "grad_norm": 1.46875, "learning_rate": 4.712298041012697e-06, "loss": 0.9331, "step": 13943 }, { "epoch": 0.9700511322132944, "grad_norm": 1.1796875, "learning_rate": 4.690473686819075e-06, "loss": 0.9383, "step": 13944 }, { "epoch": 0.9701206998504296, "grad_norm": 1.0234375, "learning_rate": 4.668699869426308e-06, "loss": 0.6549, "step": 13945 }, { "epoch": 0.9701902674875648, "grad_norm": 1.078125, "learning_rate": 4.646976589940177e-06, "loss": 0.7424, "step": 13946 }, { "epoch": 0.9702598351247, "grad_norm": 1.1328125, "learning_rate": 4.625303849463581e-06, "loss": 0.8849, "step": 13947 }, { "epoch": 0.9703294027618352, "grad_norm": 1.2109375, "learning_rate": 4.6036816490970805e-06, "loss": 0.8294, "step": 13948 }, { "epoch": 0.9703989703989704, "grad_norm": 1.1484375, "learning_rate": 4.582109989938465e-06, "loss": 1.1237, "step": 13949 }, { "epoch": 0.9704685380361056, "grad_norm": 1.0546875, "learning_rate": 4.560588873082972e-06, "loss": 0.8116, "step": 13950 }, { "epoch": 0.9705381056732408, "grad_norm": 1.1015625, "learning_rate": 4.539118299623391e-06, "loss": 0.793, "step": 13951 }, { "epoch": 0.970607673310376, "grad_norm": 1.0390625, "learning_rate": 4.517698270649961e-06, "loss": 0.8075, "step": 13952 }, { "epoch": 0.9706772409475112, "grad_norm": 1.15625, "learning_rate": 4.496328787250148e-06, "loss": 0.7519, "step": 13953 }, { "epoch": 0.9707468085846465, "grad_norm": 1.078125, "learning_rate": 4.4750098505089705e-06, "loss": 0.7596, "step": 13954 }, { "epoch": 0.9708163762217816, "grad_norm": 1.390625, "learning_rate": 4.45374146150912e-06, "loss": 0.8662, "step": 13955 }, { "epoch": 0.9708859438589168, "grad_norm": 0.98828125, "learning_rate": 4.4325236213302865e-06, "loss": 0.7271, "step": 13956 }, { "epoch": 0.9709555114960521, "grad_norm": 1.1328125, "learning_rate": 4.411356331049832e-06, "loss": 1.0329, "step": 13957 }, { "epoch": 0.9710250791331873, "grad_norm": 1.4375, "learning_rate": 4.390239591742562e-06, "loss": 0.7974, "step": 13958 }, { "epoch": 0.9710946467703224, "grad_norm": 0.98828125, "learning_rate": 4.369173404480731e-06, "loss": 0.7028, "step": 13959 }, { "epoch": 0.9711642144074577, "grad_norm": 1.25, "learning_rate": 4.348157770333927e-06, "loss": 0.7796, "step": 13960 }, { "epoch": 0.9712337820445929, "grad_norm": 1.234375, "learning_rate": 4.327192690369186e-06, "loss": 0.7537, "step": 13961 }, { "epoch": 0.971303349681728, "grad_norm": 1.2109375, "learning_rate": 4.306278165651101e-06, "loss": 0.9896, "step": 13962 }, { "epoch": 0.9713729173188632, "grad_norm": 0.8828125, "learning_rate": 4.2854141972414885e-06, "loss": 0.9013, "step": 13963 }, { "epoch": 0.9714424849559985, "grad_norm": 1.1640625, "learning_rate": 4.2646007861997235e-06, "loss": 0.8468, "step": 13964 }, { "epoch": 0.9715120525931337, "grad_norm": 0.97265625, "learning_rate": 4.243837933582739e-06, "loss": 0.7459, "step": 13965 }, { "epoch": 0.9715816202302688, "grad_norm": 1.078125, "learning_rate": 4.2231256404446916e-06, "loss": 0.856, "step": 13966 }, { "epoch": 0.9716511878674041, "grad_norm": 1.046875, "learning_rate": 4.202463907837184e-06, "loss": 0.8179, "step": 13967 }, { "epoch": 0.9717207555045393, "grad_norm": 0.89453125, "learning_rate": 4.1818527368093775e-06, "loss": 0.6342, "step": 13968 }, { "epoch": 0.9717903231416745, "grad_norm": 1.171875, "learning_rate": 4.161292128407767e-06, "loss": 1.057, "step": 13969 }, { "epoch": 0.9718598907788097, "grad_norm": 1.3046875, "learning_rate": 4.140782083676409e-06, "loss": 0.8391, "step": 13970 }, { "epoch": 0.9719294584159449, "grad_norm": 1.0078125, "learning_rate": 4.1203226036565785e-06, "loss": 0.7712, "step": 13971 }, { "epoch": 0.9719990260530801, "grad_norm": 0.796875, "learning_rate": 4.099913689387114e-06, "loss": 0.5111, "step": 13972 }, { "epoch": 0.9720685936902154, "grad_norm": 1.4921875, "learning_rate": 4.079555341904406e-06, "loss": 0.9152, "step": 13973 }, { "epoch": 0.9721381613273505, "grad_norm": 1.2578125, "learning_rate": 4.059247562242074e-06, "loss": 0.758, "step": 13974 }, { "epoch": 0.9722077289644857, "grad_norm": 1.2109375, "learning_rate": 4.038990351431182e-06, "loss": 0.7742, "step": 13975 }, { "epoch": 0.9722772966016209, "grad_norm": 1.1328125, "learning_rate": 4.018783710500462e-06, "loss": 0.8875, "step": 13976 }, { "epoch": 0.9723468642387562, "grad_norm": 0.9765625, "learning_rate": 3.998627640475649e-06, "loss": 0.7069, "step": 13977 }, { "epoch": 0.9724164318758913, "grad_norm": 0.99609375, "learning_rate": 3.978522142380259e-06, "loss": 0.6547, "step": 13978 }, { "epoch": 0.9724859995130265, "grad_norm": 0.9609375, "learning_rate": 3.958467217235362e-06, "loss": 0.9462, "step": 13979 }, { "epoch": 0.9725555671501618, "grad_norm": 1.3359375, "learning_rate": 3.938462866059034e-06, "loss": 0.5854, "step": 13980 }, { "epoch": 0.972625134787297, "grad_norm": 1.0703125, "learning_rate": 3.918509089867017e-06, "loss": 1.0063, "step": 13981 }, { "epoch": 0.9726947024244321, "grad_norm": 1.1171875, "learning_rate": 3.898605889672391e-06, "loss": 0.7507, "step": 13982 }, { "epoch": 0.9727642700615674, "grad_norm": 0.97265625, "learning_rate": 3.878753266486013e-06, "loss": 0.7795, "step": 13983 }, { "epoch": 0.9728338376987026, "grad_norm": 1.0625, "learning_rate": 3.858951221315632e-06, "loss": 0.865, "step": 13984 }, { "epoch": 0.9729034053358377, "grad_norm": 0.96484375, "learning_rate": 3.839199755166778e-06, "loss": 0.7666, "step": 13985 }, { "epoch": 0.972972972972973, "grad_norm": 0.85546875, "learning_rate": 3.819498869042315e-06, "loss": 0.5805, "step": 13986 }, { "epoch": 0.9730425406101082, "grad_norm": 1.09375, "learning_rate": 3.7998485639426648e-06, "loss": 0.776, "step": 13987 }, { "epoch": 0.9731121082472434, "grad_norm": 1.1484375, "learning_rate": 3.7802488408653635e-06, "loss": 0.8188, "step": 13988 }, { "epoch": 0.9731816758843785, "grad_norm": 1.15625, "learning_rate": 3.7606997008058363e-06, "loss": 0.9584, "step": 13989 }, { "epoch": 0.9732512435215138, "grad_norm": 1.2890625, "learning_rate": 3.741201144756512e-06, "loss": 0.8802, "step": 13990 }, { "epoch": 0.973320811158649, "grad_norm": 1.125, "learning_rate": 3.7217531737073762e-06, "loss": 0.7809, "step": 13991 }, { "epoch": 0.9733903787957842, "grad_norm": 0.98046875, "learning_rate": 3.7023557886460833e-06, "loss": 0.7774, "step": 13992 }, { "epoch": 0.9734599464329194, "grad_norm": 0.90234375, "learning_rate": 3.6830089905575128e-06, "loss": 0.9329, "step": 13993 }, { "epoch": 0.9735295140700546, "grad_norm": 1.015625, "learning_rate": 3.663712780423878e-06, "loss": 0.582, "step": 13994 }, { "epoch": 0.9735990817071898, "grad_norm": 1.09375, "learning_rate": 3.644467159224951e-06, "loss": 0.8051, "step": 13995 }, { "epoch": 0.9736686493443251, "grad_norm": 0.87890625, "learning_rate": 3.625272127938062e-06, "loss": 0.745, "step": 13996 }, { "epoch": 0.9737382169814602, "grad_norm": 1.2109375, "learning_rate": 3.6061276875376527e-06, "loss": 0.7743, "step": 13997 }, { "epoch": 0.9738077846185954, "grad_norm": 1.4609375, "learning_rate": 3.5870338389959454e-06, "loss": 0.9294, "step": 13998 }, { "epoch": 0.9738773522557307, "grad_norm": 1.3984375, "learning_rate": 3.567990583282388e-06, "loss": 0.7511, "step": 13999 }, { "epoch": 0.9739469198928659, "grad_norm": 1.0234375, "learning_rate": 3.5489979213638724e-06, "loss": 0.8622, "step": 14000 }, { "epoch": 0.974016487530001, "grad_norm": 1.03125, "learning_rate": 3.530055854204739e-06, "loss": 0.5685, "step": 14001 }, { "epoch": 0.9740860551671362, "grad_norm": 1.078125, "learning_rate": 3.5111643827667737e-06, "loss": 0.6536, "step": 14002 }, { "epoch": 0.9741556228042715, "grad_norm": 1.2578125, "learning_rate": 3.4923235080092095e-06, "loss": 1.0551, "step": 14003 }, { "epoch": 0.9742251904414067, "grad_norm": 1.3828125, "learning_rate": 3.473533230888726e-06, "loss": 0.8098, "step": 14004 }, { "epoch": 0.9742947580785418, "grad_norm": 1.0625, "learning_rate": 3.4547935523593366e-06, "loss": 0.6146, "step": 14005 }, { "epoch": 0.9743643257156771, "grad_norm": 1.0390625, "learning_rate": 3.436104473372503e-06, "loss": 0.7175, "step": 14006 }, { "epoch": 0.9744338933528123, "grad_norm": 1.3984375, "learning_rate": 3.4174659948773554e-06, "loss": 0.8898, "step": 14007 }, { "epoch": 0.9745034609899474, "grad_norm": 1.203125, "learning_rate": 3.3988781178201366e-06, "loss": 0.7473, "step": 14008 }, { "epoch": 0.9745730286270827, "grad_norm": 1.2578125, "learning_rate": 3.380340843144536e-06, "loss": 0.9942, "step": 14009 }, { "epoch": 0.9746425962642179, "grad_norm": 1.015625, "learning_rate": 3.3618541717919117e-06, "loss": 0.7472, "step": 14010 }, { "epoch": 0.9747121639013531, "grad_norm": 1.1484375, "learning_rate": 3.343418104700957e-06, "loss": 0.7854, "step": 14011 }, { "epoch": 0.9747817315384883, "grad_norm": 1.015625, "learning_rate": 3.3250326428077014e-06, "loss": 0.7731, "step": 14012 }, { "epoch": 0.9748512991756235, "grad_norm": 1.0546875, "learning_rate": 3.3066977870456205e-06, "loss": 0.7759, "step": 14013 }, { "epoch": 0.9749208668127587, "grad_norm": 0.921875, "learning_rate": 3.288413538345747e-06, "loss": 0.6518, "step": 14014 }, { "epoch": 0.9749904344498939, "grad_norm": 1.1796875, "learning_rate": 3.2701798976364494e-06, "loss": 0.8985, "step": 14015 }, { "epoch": 0.9750600020870291, "grad_norm": 1.1015625, "learning_rate": 3.2519968658435427e-06, "loss": 0.7888, "step": 14016 }, { "epoch": 0.9751295697241643, "grad_norm": 0.9375, "learning_rate": 3.2338644438902887e-06, "loss": 0.6723, "step": 14017 }, { "epoch": 0.9751991373612995, "grad_norm": 1.0234375, "learning_rate": 3.2157826326972837e-06, "loss": 0.8776, "step": 14018 }, { "epoch": 0.9752687049984348, "grad_norm": 1.5625, "learning_rate": 3.197751433182572e-06, "loss": 0.848, "step": 14019 }, { "epoch": 0.9753382726355699, "grad_norm": 1.359375, "learning_rate": 3.179770846261865e-06, "loss": 1.0315, "step": 14020 }, { "epoch": 0.9754078402727051, "grad_norm": 1.234375, "learning_rate": 3.1618408728480985e-06, "loss": 0.8892, "step": 14021 }, { "epoch": 0.9754774079098404, "grad_norm": 1.171875, "learning_rate": 3.1439615138515454e-06, "loss": 1.0339, "step": 14022 }, { "epoch": 0.9755469755469756, "grad_norm": 1.3671875, "learning_rate": 3.126132770180146e-06, "loss": 0.9649, "step": 14023 }, { "epoch": 0.9756165431841107, "grad_norm": 0.984375, "learning_rate": 3.108354642739064e-06, "loss": 0.7231, "step": 14024 }, { "epoch": 0.975686110821246, "grad_norm": 0.9453125, "learning_rate": 3.090627132431023e-06, "loss": 0.8066, "step": 14025 }, { "epoch": 0.9757556784583812, "grad_norm": 0.96875, "learning_rate": 3.0729502401561916e-06, "loss": 0.5879, "step": 14026 }, { "epoch": 0.9758252460955164, "grad_norm": 0.7421875, "learning_rate": 3.0553239668120735e-06, "loss": 0.5491, "step": 14027 }, { "epoch": 0.9758948137326515, "grad_norm": 1.03125, "learning_rate": 3.037748313293509e-06, "loss": 0.7964, "step": 14028 }, { "epoch": 0.9759643813697868, "grad_norm": 1.2421875, "learning_rate": 3.020223280493228e-06, "loss": 0.7861, "step": 14029 }, { "epoch": 0.976033949006922, "grad_norm": 1.09375, "learning_rate": 3.0027488693007422e-06, "loss": 0.8609, "step": 14030 }, { "epoch": 0.9761035166440571, "grad_norm": 1.21875, "learning_rate": 2.9853250806033407e-06, "loss": 0.7567, "step": 14031 }, { "epoch": 0.9761730842811924, "grad_norm": 1.1796875, "learning_rate": 2.9679519152859824e-06, "loss": 0.8647, "step": 14032 }, { "epoch": 0.9762426519183276, "grad_norm": 1.125, "learning_rate": 2.950629374230518e-06, "loss": 0.8666, "step": 14033 }, { "epoch": 0.9763122195554628, "grad_norm": 1.2578125, "learning_rate": 2.9333574583165767e-06, "loss": 0.9188, "step": 14034 }, { "epoch": 0.976381787192598, "grad_norm": 1.5234375, "learning_rate": 2.916136168421124e-06, "loss": 0.9102, "step": 14035 }, { "epoch": 0.9764513548297332, "grad_norm": 1.1875, "learning_rate": 2.8989655054186827e-06, "loss": 0.6939, "step": 14036 }, { "epoch": 0.9765209224668684, "grad_norm": 0.92578125, "learning_rate": 2.881845470180999e-06, "loss": 0.6198, "step": 14037 }, { "epoch": 0.9765904901040037, "grad_norm": 1.0625, "learning_rate": 2.864776063577268e-06, "loss": 0.7109, "step": 14038 }, { "epoch": 0.9766600577411388, "grad_norm": 0.9375, "learning_rate": 2.8477572864744616e-06, "loss": 0.7561, "step": 14039 }, { "epoch": 0.976729625378274, "grad_norm": 1.0703125, "learning_rate": 2.830789139736334e-06, "loss": 0.6205, "step": 14040 }, { "epoch": 0.9767991930154092, "grad_norm": 1.1171875, "learning_rate": 2.8138716242247507e-06, "loss": 0.882, "step": 14041 }, { "epoch": 0.9768687606525445, "grad_norm": 0.99609375, "learning_rate": 2.79700474079847e-06, "loss": 0.9954, "step": 14042 }, { "epoch": 0.9769383282896796, "grad_norm": 0.99609375, "learning_rate": 2.7801884903141396e-06, "loss": 0.8026, "step": 14043 }, { "epoch": 0.9770078959268148, "grad_norm": 1.34375, "learning_rate": 2.7634228736254097e-06, "loss": 0.8356, "step": 14044 }, { "epoch": 0.9770774635639501, "grad_norm": 0.91015625, "learning_rate": 2.7467078915835996e-06, "loss": 0.5747, "step": 14045 }, { "epoch": 0.9771470312010853, "grad_norm": 0.9375, "learning_rate": 2.730043545037364e-06, "loss": 0.8995, "step": 14046 }, { "epoch": 0.9772165988382204, "grad_norm": 1.578125, "learning_rate": 2.7134298348330257e-06, "loss": 0.9942, "step": 14047 }, { "epoch": 0.9772861664753557, "grad_norm": 1.21875, "learning_rate": 2.6968667618140207e-06, "loss": 0.9477, "step": 14048 }, { "epoch": 0.9773557341124909, "grad_norm": 1.1171875, "learning_rate": 2.6803543268213436e-06, "loss": 0.725, "step": 14049 }, { "epoch": 0.977425301749626, "grad_norm": 1.0390625, "learning_rate": 2.663892530693324e-06, "loss": 0.843, "step": 14050 }, { "epoch": 0.9774948693867613, "grad_norm": 1.1484375, "learning_rate": 2.6474813742659587e-06, "loss": 0.7772, "step": 14051 }, { "epoch": 0.9775644370238965, "grad_norm": 1.1484375, "learning_rate": 2.631120858372471e-06, "loss": 0.8957, "step": 14052 }, { "epoch": 0.9776340046610317, "grad_norm": 1.328125, "learning_rate": 2.6148109838435297e-06, "loss": 0.8659, "step": 14053 }, { "epoch": 0.9777035722981668, "grad_norm": 1.0546875, "learning_rate": 2.59855175150725e-06, "loss": 0.8784, "step": 14054 }, { "epoch": 0.9777731399353021, "grad_norm": 1.2109375, "learning_rate": 2.5823431621893046e-06, "loss": 0.7079, "step": 14055 }, { "epoch": 0.9778427075724373, "grad_norm": 1.3828125, "learning_rate": 2.566185216712591e-06, "loss": 0.8376, "step": 14056 }, { "epoch": 0.9779122752095725, "grad_norm": 1.4140625, "learning_rate": 2.550077915897564e-06, "loss": 0.9035, "step": 14057 }, { "epoch": 0.9779818428467077, "grad_norm": 0.78125, "learning_rate": 2.534021260562014e-06, "loss": 0.597, "step": 14058 }, { "epoch": 0.9780514104838429, "grad_norm": 0.984375, "learning_rate": 2.5180152515212885e-06, "loss": 0.8717, "step": 14059 }, { "epoch": 0.9781209781209781, "grad_norm": 1.140625, "learning_rate": 2.5020598895880706e-06, "loss": 0.985, "step": 14060 }, { "epoch": 0.9781905457581134, "grad_norm": 1.3671875, "learning_rate": 2.486155175572491e-06, "loss": 0.7367, "step": 14061 }, { "epoch": 0.9782601133952485, "grad_norm": 1.296875, "learning_rate": 2.470301110282236e-06, "loss": 0.9985, "step": 14062 }, { "epoch": 0.9783296810323837, "grad_norm": 1.140625, "learning_rate": 2.4544976945219953e-06, "loss": 0.7257, "step": 14063 }, { "epoch": 0.978399248669519, "grad_norm": 0.90625, "learning_rate": 2.438744929094461e-06, "loss": 0.7439, "step": 14064 }, { "epoch": 0.9784688163066542, "grad_norm": 0.90234375, "learning_rate": 2.4230428147992146e-06, "loss": 0.66, "step": 14065 }, { "epoch": 0.9785383839437893, "grad_norm": 1.21875, "learning_rate": 2.407391352433841e-06, "loss": 0.7612, "step": 14066 }, { "epoch": 0.9786079515809245, "grad_norm": 1.0078125, "learning_rate": 2.3917905427929265e-06, "loss": 0.649, "step": 14067 }, { "epoch": 0.9786775192180598, "grad_norm": 0.9140625, "learning_rate": 2.3762403866685046e-06, "loss": 0.8349, "step": 14068 }, { "epoch": 0.978747086855195, "grad_norm": 1.390625, "learning_rate": 2.3607408848501655e-06, "loss": 0.7158, "step": 14069 }, { "epoch": 0.9788166544923301, "grad_norm": 1.1796875, "learning_rate": 2.3452920381249466e-06, "loss": 1.0078, "step": 14070 }, { "epoch": 0.9788862221294654, "grad_norm": 0.95703125, "learning_rate": 2.329893847277331e-06, "loss": 0.9462, "step": 14071 }, { "epoch": 0.9789557897666006, "grad_norm": 1.03125, "learning_rate": 2.3145463130890276e-06, "loss": 0.7862, "step": 14072 }, { "epoch": 0.9790253574037358, "grad_norm": 1.0546875, "learning_rate": 2.299249436339301e-06, "loss": 0.9804, "step": 14073 }, { "epoch": 0.979094925040871, "grad_norm": 1.3515625, "learning_rate": 2.284003217804864e-06, "loss": 0.9441, "step": 14074 }, { "epoch": 0.9791644926780062, "grad_norm": 1.2109375, "learning_rate": 2.268807658259986e-06, "loss": 0.9997, "step": 14075 }, { "epoch": 0.9792340603151414, "grad_norm": 1.296875, "learning_rate": 2.2536627584761603e-06, "loss": 1.0353, "step": 14076 }, { "epoch": 0.9793036279522767, "grad_norm": 1.296875, "learning_rate": 2.2385685192222173e-06, "loss": 0.9707, "step": 14077 }, { "epoch": 0.9793731955894118, "grad_norm": 1.2109375, "learning_rate": 2.2235249412647653e-06, "loss": 0.7933, "step": 14078 }, { "epoch": 0.979442763226547, "grad_norm": 1.453125, "learning_rate": 2.2085320253674155e-06, "loss": 0.9587, "step": 14079 }, { "epoch": 0.9795123308636822, "grad_norm": 0.92578125, "learning_rate": 2.193589772291671e-06, "loss": 0.6871, "step": 14080 }, { "epoch": 0.9795818985008174, "grad_norm": 1.5703125, "learning_rate": 2.178698182796146e-06, "loss": 1.0512, "step": 14081 }, { "epoch": 0.9796514661379526, "grad_norm": 1.0390625, "learning_rate": 2.163857257636903e-06, "loss": 0.6405, "step": 14082 }, { "epoch": 0.9797210337750878, "grad_norm": 0.7734375, "learning_rate": 2.1490669975674506e-06, "loss": 0.6477, "step": 14083 }, { "epoch": 0.9797906014122231, "grad_norm": 1.28125, "learning_rate": 2.134327403338854e-06, "loss": 0.9768, "step": 14084 }, { "epoch": 0.9798601690493582, "grad_norm": 1.234375, "learning_rate": 2.1196384756995145e-06, "loss": 0.6931, "step": 14085 }, { "epoch": 0.9799297366864934, "grad_norm": 1.2109375, "learning_rate": 2.105000215395281e-06, "loss": 0.8718, "step": 14086 }, { "epoch": 0.9799993043236287, "grad_norm": 1.09375, "learning_rate": 2.0904126231693355e-06, "loss": 0.7593, "step": 14087 }, { "epoch": 0.9800688719607639, "grad_norm": 1.125, "learning_rate": 2.0758756997624194e-06, "loss": 0.6853, "step": 14088 }, { "epoch": 0.980138439597899, "grad_norm": 0.90625, "learning_rate": 2.0613894459127204e-06, "loss": 0.7454, "step": 14089 }, { "epoch": 0.9802080072350342, "grad_norm": 0.96484375, "learning_rate": 2.0469538623555385e-06, "loss": 0.7557, "step": 14090 }, { "epoch": 0.9802775748721695, "grad_norm": 1.09375, "learning_rate": 2.032568949824065e-06, "loss": 0.5998, "step": 14091 }, { "epoch": 0.9803471425093047, "grad_norm": 1.0, "learning_rate": 2.0182347090484944e-06, "loss": 0.7196, "step": 14092 }, { "epoch": 0.9804167101464398, "grad_norm": 1.2734375, "learning_rate": 2.00395114075691e-06, "loss": 0.9062, "step": 14093 }, { "epoch": 0.9804862777835751, "grad_norm": 1.109375, "learning_rate": 1.989718245674288e-06, "loss": 0.8441, "step": 14094 }, { "epoch": 0.9805558454207103, "grad_norm": 1.203125, "learning_rate": 1.9755360245236055e-06, "loss": 0.7855, "step": 14095 }, { "epoch": 0.9806254130578455, "grad_norm": 1.015625, "learning_rate": 1.9614044780246198e-06, "loss": 0.9628, "step": 14096 }, { "epoch": 0.9806949806949807, "grad_norm": 1.0234375, "learning_rate": 1.9473236068950905e-06, "loss": 0.8135, "step": 14097 }, { "epoch": 0.9807645483321159, "grad_norm": 0.94140625, "learning_rate": 1.93329341184989e-06, "loss": 0.6462, "step": 14098 }, { "epoch": 0.9808341159692511, "grad_norm": 1.0625, "learning_rate": 1.9193138936014488e-06, "loss": 0.5737, "step": 14099 }, { "epoch": 0.9809036836063864, "grad_norm": 0.8828125, "learning_rate": 1.9053850528595318e-06, "loss": 0.6714, "step": 14100 }, { "epoch": 0.9809732512435215, "grad_norm": 1.1328125, "learning_rate": 1.8915068903313515e-06, "loss": 0.8413, "step": 14101 }, { "epoch": 0.9810428188806567, "grad_norm": 1.125, "learning_rate": 1.8776794067216774e-06, "loss": 0.6286, "step": 14102 }, { "epoch": 0.9811123865177919, "grad_norm": 1.1484375, "learning_rate": 1.8639026027325035e-06, "loss": 0.8265, "step": 14103 }, { "epoch": 0.9811819541549271, "grad_norm": 1.453125, "learning_rate": 1.8501764790633814e-06, "loss": 0.7367, "step": 14104 }, { "epoch": 0.9812515217920623, "grad_norm": 0.953125, "learning_rate": 1.8365010364113089e-06, "loss": 0.8278, "step": 14105 }, { "epoch": 0.9813210894291975, "grad_norm": 1.6875, "learning_rate": 1.8228762754705086e-06, "loss": 0.925, "step": 14106 }, { "epoch": 0.9813906570663328, "grad_norm": 1.2265625, "learning_rate": 1.8093021969328716e-06, "loss": 0.7409, "step": 14107 }, { "epoch": 0.9814602247034679, "grad_norm": 0.8671875, "learning_rate": 1.7957788014877352e-06, "loss": 0.7799, "step": 14108 }, { "epoch": 0.9815297923406031, "grad_norm": 0.9375, "learning_rate": 1.7823060898214395e-06, "loss": 0.8179, "step": 14109 }, { "epoch": 0.9815993599777384, "grad_norm": 1.21875, "learning_rate": 1.7688840626184367e-06, "loss": 0.7299, "step": 14110 }, { "epoch": 0.9816689276148736, "grad_norm": 1.3203125, "learning_rate": 1.7555127205598487e-06, "loss": 0.8655, "step": 14111 }, { "epoch": 0.9817384952520087, "grad_norm": 1.0546875, "learning_rate": 1.74219206432491e-06, "loss": 0.7925, "step": 14112 }, { "epoch": 0.981808062889144, "grad_norm": 0.99609375, "learning_rate": 1.7289220945898576e-06, "loss": 0.6584, "step": 14113 }, { "epoch": 0.9818776305262792, "grad_norm": 1.21875, "learning_rate": 1.7157028120284857e-06, "loss": 0.8806, "step": 14114 }, { "epoch": 0.9819471981634144, "grad_norm": 0.9609375, "learning_rate": 1.702534217312035e-06, "loss": 0.8664, "step": 14115 }, { "epoch": 0.9820167658005495, "grad_norm": 1.3125, "learning_rate": 1.689416311109082e-06, "loss": 0.7739, "step": 14116 }, { "epoch": 0.9820863334376848, "grad_norm": 1.1953125, "learning_rate": 1.6763490940856496e-06, "loss": 0.6885, "step": 14117 }, { "epoch": 0.98215590107482, "grad_norm": 0.890625, "learning_rate": 1.6633325669054289e-06, "loss": 0.6807, "step": 14118 }, { "epoch": 0.9822254687119552, "grad_norm": 1.125, "learning_rate": 1.6503667302290027e-06, "loss": 0.8408, "step": 14119 }, { "epoch": 0.9822950363490904, "grad_norm": 1.4140625, "learning_rate": 1.6374515847149552e-06, "loss": 0.8461, "step": 14120 }, { "epoch": 0.9823646039862256, "grad_norm": 1.1328125, "learning_rate": 1.6245871310190952e-06, "loss": 0.9569, "step": 14121 }, { "epoch": 0.9824341716233608, "grad_norm": 1.046875, "learning_rate": 1.611773369794456e-06, "loss": 0.8808, "step": 14122 }, { "epoch": 0.9825037392604961, "grad_norm": 1.046875, "learning_rate": 1.599010301691739e-06, "loss": 0.6954, "step": 14123 }, { "epoch": 0.9825733068976312, "grad_norm": 1.1015625, "learning_rate": 1.5862979273588707e-06, "loss": 1.0216, "step": 14124 }, { "epoch": 0.9826428745347664, "grad_norm": 0.82421875, "learning_rate": 1.5736362474415567e-06, "loss": 0.7222, "step": 14125 }, { "epoch": 0.9827124421719017, "grad_norm": 1.28125, "learning_rate": 1.561025262582394e-06, "loss": 0.8758, "step": 14126 }, { "epoch": 0.9827820098090368, "grad_norm": 1.4296875, "learning_rate": 1.5484649734219814e-06, "loss": 0.7369, "step": 14127 }, { "epoch": 0.982851577446172, "grad_norm": 1.0234375, "learning_rate": 1.5359553805979198e-06, "loss": 0.9374, "step": 14128 }, { "epoch": 0.9829211450833072, "grad_norm": 1.0546875, "learning_rate": 1.523496484745368e-06, "loss": 0.8218, "step": 14129 }, { "epoch": 0.9829907127204425, "grad_norm": 1.3359375, "learning_rate": 1.5110882864970422e-06, "loss": 0.7643, "step": 14130 }, { "epoch": 0.9830602803575776, "grad_norm": 1.1171875, "learning_rate": 1.4987307864828825e-06, "loss": 0.8093, "step": 14131 }, { "epoch": 0.9831298479947128, "grad_norm": 1.09375, "learning_rate": 1.4864239853303873e-06, "loss": 0.5871, "step": 14132 }, { "epoch": 0.9831994156318481, "grad_norm": 1.0703125, "learning_rate": 1.474167883664279e-06, "loss": 0.7188, "step": 14133 }, { "epoch": 0.9832689832689833, "grad_norm": 1.078125, "learning_rate": 1.4619624821070599e-06, "loss": 0.8958, "step": 14134 }, { "epoch": 0.9833385509061184, "grad_norm": 1.2578125, "learning_rate": 1.449807781278345e-06, "loss": 0.8396, "step": 14135 }, { "epoch": 0.9834081185432537, "grad_norm": 0.98046875, "learning_rate": 1.4377037817954186e-06, "loss": 0.7646, "step": 14136 }, { "epoch": 0.9834776861803889, "grad_norm": 1.0234375, "learning_rate": 1.425650484272678e-06, "loss": 0.7525, "step": 14137 }, { "epoch": 0.9835472538175241, "grad_norm": 1.265625, "learning_rate": 1.4136478893221894e-06, "loss": 0.7781, "step": 14138 }, { "epoch": 0.9836168214546593, "grad_norm": 1.09375, "learning_rate": 1.401695997553465e-06, "loss": 0.831, "step": 14139 }, { "epoch": 0.9836863890917945, "grad_norm": 1.03125, "learning_rate": 1.3897948095733525e-06, "loss": 0.9721, "step": 14140 }, { "epoch": 0.9837559567289297, "grad_norm": 1.2265625, "learning_rate": 1.3779443259860359e-06, "loss": 0.7669, "step": 14141 }, { "epoch": 0.9838255243660649, "grad_norm": 1.34375, "learning_rate": 1.3661445473933664e-06, "loss": 0.8089, "step": 14142 }, { "epoch": 0.9838950920032001, "grad_norm": 0.9921875, "learning_rate": 1.35439547439431e-06, "loss": 0.674, "step": 14143 }, { "epoch": 0.9839646596403353, "grad_norm": 0.7890625, "learning_rate": 1.3426971075855e-06, "loss": 0.5941, "step": 14144 }, { "epoch": 0.9840342272774705, "grad_norm": 0.9609375, "learning_rate": 1.3310494475609058e-06, "loss": 0.6877, "step": 14145 }, { "epoch": 0.9841037949146058, "grad_norm": 1.2578125, "learning_rate": 1.3194524949119435e-06, "loss": 0.6499, "step": 14146 }, { "epoch": 0.9841733625517409, "grad_norm": 1.2421875, "learning_rate": 1.3079062502275863e-06, "loss": 0.8235, "step": 14147 }, { "epoch": 0.9842429301888761, "grad_norm": 1.375, "learning_rate": 1.2964107140938096e-06, "loss": 0.8694, "step": 14148 }, { "epoch": 0.9843124978260114, "grad_norm": 1.1015625, "learning_rate": 1.2849658870945914e-06, "loss": 0.9599, "step": 14149 }, { "epoch": 0.9843820654631465, "grad_norm": 0.84765625, "learning_rate": 1.2735717698107996e-06, "loss": 0.5055, "step": 14150 }, { "epoch": 0.9844516331002817, "grad_norm": 1.171875, "learning_rate": 1.262228362821194e-06, "loss": 1.1511, "step": 14151 }, { "epoch": 0.984521200737417, "grad_norm": 1.0859375, "learning_rate": 1.250935666701536e-06, "loss": 0.7013, "step": 14152 }, { "epoch": 0.9845907683745522, "grad_norm": 1.1328125, "learning_rate": 1.2396936820252557e-06, "loss": 0.8599, "step": 14153 }, { "epoch": 0.9846603360116873, "grad_norm": 1.140625, "learning_rate": 1.2285024093632303e-06, "loss": 0.8528, "step": 14154 }, { "epoch": 0.9847299036488225, "grad_norm": 0.9609375, "learning_rate": 1.2173618492837823e-06, "loss": 0.8547, "step": 14155 }, { "epoch": 0.9847994712859578, "grad_norm": 1.109375, "learning_rate": 1.2062720023523488e-06, "loss": 1.0045, "step": 14156 }, { "epoch": 0.984869038923093, "grad_norm": 0.9765625, "learning_rate": 1.1952328691321457e-06, "loss": 0.5509, "step": 14157 }, { "epoch": 0.9849386065602281, "grad_norm": 1.40625, "learning_rate": 1.1842444501837245e-06, "loss": 0.9216, "step": 14158 }, { "epoch": 0.9850081741973634, "grad_norm": 1.09375, "learning_rate": 1.1733067460649727e-06, "loss": 0.8681, "step": 14159 }, { "epoch": 0.9850777418344986, "grad_norm": 0.91796875, "learning_rate": 1.1624197573312234e-06, "loss": 0.8702, "step": 14160 }, { "epoch": 0.9851473094716338, "grad_norm": 1.0390625, "learning_rate": 1.1515834845352568e-06, "loss": 0.859, "step": 14161 }, { "epoch": 0.985216877108769, "grad_norm": 1.2109375, "learning_rate": 1.1407979282272996e-06, "loss": 0.6349, "step": 14162 }, { "epoch": 0.9852864447459042, "grad_norm": 1.2109375, "learning_rate": 1.1300630889550245e-06, "loss": 0.6855, "step": 14163 }, { "epoch": 0.9853560123830394, "grad_norm": 1.0859375, "learning_rate": 1.1193789672634402e-06, "loss": 0.8024, "step": 14164 }, { "epoch": 0.9854255800201747, "grad_norm": 1.078125, "learning_rate": 1.1087455636951128e-06, "loss": 0.6758, "step": 14165 }, { "epoch": 0.9854951476573098, "grad_norm": 1.0234375, "learning_rate": 1.0981628787898323e-06, "loss": 0.612, "step": 14166 }, { "epoch": 0.985564715294445, "grad_norm": 1.015625, "learning_rate": 1.0876309130850582e-06, "loss": 0.7018, "step": 14167 }, { "epoch": 0.9856342829315802, "grad_norm": 1.1953125, "learning_rate": 1.0771496671154736e-06, "loss": 0.8467, "step": 14168 }, { "epoch": 0.9857038505687155, "grad_norm": 1.2265625, "learning_rate": 1.0667191414133192e-06, "loss": 0.8525, "step": 14169 }, { "epoch": 0.9857734182058506, "grad_norm": 1.4453125, "learning_rate": 1.0563393365080609e-06, "loss": 1.0241, "step": 14170 }, { "epoch": 0.9858429858429858, "grad_norm": 0.97265625, "learning_rate": 1.0460102529269432e-06, "loss": 0.7085, "step": 14171 }, { "epoch": 0.9859125534801211, "grad_norm": 1.546875, "learning_rate": 1.0357318911943247e-06, "loss": 0.9521, "step": 14172 }, { "epoch": 0.9859821211172562, "grad_norm": 0.98046875, "learning_rate": 1.0255042518320102e-06, "loss": 0.8574, "step": 14173 }, { "epoch": 0.9860516887543914, "grad_norm": 0.96875, "learning_rate": 1.0153273353594727e-06, "loss": 0.6841, "step": 14174 }, { "epoch": 0.9861212563915267, "grad_norm": 1.015625, "learning_rate": 1.0052011422932994e-06, "loss": 0.5727, "step": 14175 }, { "epoch": 0.9861908240286619, "grad_norm": 1.0703125, "learning_rate": 9.951256731477453e-07, "loss": 0.6811, "step": 14176 }, { "epoch": 0.986260391665797, "grad_norm": 1.1953125, "learning_rate": 9.851009284344016e-07, "loss": 0.8459, "step": 14177 }, { "epoch": 0.9863299593029323, "grad_norm": 1.0546875, "learning_rate": 9.751269086620829e-07, "loss": 0.642, "step": 14178 }, { "epoch": 0.9863995269400675, "grad_norm": 1.5546875, "learning_rate": 9.652036143374953e-07, "loss": 0.965, "step": 14179 }, { "epoch": 0.9864690945772027, "grad_norm": 1.2734375, "learning_rate": 9.55331045964236e-07, "loss": 0.9367, "step": 14180 }, { "epoch": 0.9865386622143378, "grad_norm": 1.1328125, "learning_rate": 9.455092040437929e-07, "loss": 0.6349, "step": 14181 }, { "epoch": 0.9866082298514731, "grad_norm": 0.98046875, "learning_rate": 9.357380890747668e-07, "loss": 0.5858, "step": 14182 }, { "epoch": 0.9866777974886083, "grad_norm": 0.9375, "learning_rate": 9.260177015533167e-07, "loss": 0.6042, "step": 14183 }, { "epoch": 0.9867473651257435, "grad_norm": 1.1015625, "learning_rate": 9.163480419729365e-07, "loss": 0.6278, "step": 14184 }, { "epoch": 0.9868169327628787, "grad_norm": 1.0859375, "learning_rate": 9.067291108246778e-07, "loss": 0.9643, "step": 14185 }, { "epoch": 0.9868865004000139, "grad_norm": 0.8671875, "learning_rate": 8.971609085969279e-07, "loss": 0.5668, "step": 14186 }, { "epoch": 0.9869560680371491, "grad_norm": 1.171875, "learning_rate": 8.876434357755203e-07, "loss": 0.8407, "step": 14187 }, { "epoch": 0.9870256356742844, "grad_norm": 1.1484375, "learning_rate": 8.781766928436241e-07, "loss": 0.8556, "step": 14188 }, { "epoch": 0.9870952033114195, "grad_norm": 1.0390625, "learning_rate": 8.687606802819659e-07, "loss": 0.6536, "step": 14189 }, { "epoch": 0.9871647709485547, "grad_norm": 1.453125, "learning_rate": 8.593953985687186e-07, "loss": 0.8054, "step": 14190 }, { "epoch": 0.98723433858569, "grad_norm": 1.078125, "learning_rate": 8.500808481792799e-07, "loss": 0.6906, "step": 14191 }, { "epoch": 0.9873039062228252, "grad_norm": 0.90625, "learning_rate": 8.408170295866046e-07, "loss": 0.6133, "step": 14192 }, { "epoch": 0.9873734738599603, "grad_norm": 0.8828125, "learning_rate": 8.316039432612055e-07, "loss": 0.5521, "step": 14193 }, { "epoch": 0.9874430414970955, "grad_norm": 1.359375, "learning_rate": 8.224415896705972e-07, "loss": 0.7895, "step": 14194 }, { "epoch": 0.9875126091342308, "grad_norm": 1.046875, "learning_rate": 8.133299692804075e-07, "loss": 0.9231, "step": 14195 }, { "epoch": 0.987582176771366, "grad_norm": 1.0234375, "learning_rate": 8.042690825529331e-07, "loss": 0.7716, "step": 14196 }, { "epoch": 0.9876517444085011, "grad_norm": 1.0625, "learning_rate": 7.952589299483615e-07, "loss": 0.75, "step": 14197 }, { "epoch": 0.9877213120456364, "grad_norm": 1.5, "learning_rate": 7.862995119241045e-07, "loss": 0.9462, "step": 14198 }, { "epoch": 0.9877908796827716, "grad_norm": 0.96484375, "learning_rate": 7.773908289352427e-07, "loss": 0.8243, "step": 14199 }, { "epoch": 0.9878604473199067, "grad_norm": 1.4921875, "learning_rate": 7.685328814339698e-07, "loss": 1.1487, "step": 14200 }, { "epoch": 0.987930014957042, "grad_norm": 1.1875, "learning_rate": 7.597256698701482e-07, "loss": 0.9426, "step": 14201 }, { "epoch": 0.9879995825941772, "grad_norm": 1.171875, "learning_rate": 7.509691946908648e-07, "loss": 0.785, "step": 14202 }, { "epoch": 0.9880691502313124, "grad_norm": 1.125, "learning_rate": 7.422634563407638e-07, "loss": 0.989, "step": 14203 }, { "epoch": 0.9881387178684476, "grad_norm": 1.0703125, "learning_rate": 7.33608455261936e-07, "loss": 0.9506, "step": 14204 }, { "epoch": 0.9882082855055828, "grad_norm": 0.875, "learning_rate": 7.250041918938077e-07, "loss": 0.5925, "step": 14205 }, { "epoch": 0.988277853142718, "grad_norm": 1.1640625, "learning_rate": 7.164506666732518e-07, "loss": 0.8583, "step": 14206 }, { "epoch": 0.9883474207798532, "grad_norm": 1.296875, "learning_rate": 7.079478800344763e-07, "loss": 0.835, "step": 14207 }, { "epoch": 0.9884169884169884, "grad_norm": 1.265625, "learning_rate": 6.994958324093581e-07, "loss": 0.7567, "step": 14208 }, { "epoch": 0.9884865560541236, "grad_norm": 1.1328125, "learning_rate": 6.910945242269983e-07, "loss": 0.8193, "step": 14209 }, { "epoch": 0.9885561236912588, "grad_norm": 1.25, "learning_rate": 6.827439559140558e-07, "loss": 0.8375, "step": 14210 }, { "epoch": 0.9886256913283941, "grad_norm": 0.9140625, "learning_rate": 6.744441278943025e-07, "loss": 0.7245, "step": 14211 }, { "epoch": 0.9886952589655292, "grad_norm": 1.34375, "learning_rate": 6.661950405894013e-07, "loss": 0.8492, "step": 14212 }, { "epoch": 0.9887648266026644, "grad_norm": 1.4765625, "learning_rate": 6.579966944180172e-07, "loss": 0.8967, "step": 14213 }, { "epoch": 0.9888343942397997, "grad_norm": 1.0703125, "learning_rate": 6.498490897965948e-07, "loss": 0.6543, "step": 14214 }, { "epoch": 0.9889039618769349, "grad_norm": 1.125, "learning_rate": 6.417522271386922e-07, "loss": 0.7294, "step": 14215 }, { "epoch": 0.98897352951407, "grad_norm": 1.25, "learning_rate": 6.33706106855425e-07, "loss": 0.7326, "step": 14216 }, { "epoch": 0.9890430971512053, "grad_norm": 1.203125, "learning_rate": 6.257107293554664e-07, "loss": 0.9212, "step": 14217 }, { "epoch": 0.9891126647883405, "grad_norm": 1.1875, "learning_rate": 6.177660950446029e-07, "loss": 0.9369, "step": 14218 }, { "epoch": 0.9891822324254756, "grad_norm": 1.2421875, "learning_rate": 6.098722043264005e-07, "loss": 0.7213, "step": 14219 }, { "epoch": 0.9892518000626108, "grad_norm": 1.125, "learning_rate": 6.020290576015386e-07, "loss": 0.8029, "step": 14220 }, { "epoch": 0.9893213676997461, "grad_norm": 2.359375, "learning_rate": 5.942366552683654e-07, "loss": 0.724, "step": 14221 }, { "epoch": 0.9893909353368813, "grad_norm": 1.203125, "learning_rate": 5.864949977224532e-07, "loss": 0.5431, "step": 14222 }, { "epoch": 0.9894605029740164, "grad_norm": 1.2578125, "learning_rate": 5.788040853568211e-07, "loss": 0.772, "step": 14223 }, { "epoch": 0.9895300706111517, "grad_norm": 1.0859375, "learning_rate": 5.711639185621564e-07, "loss": 0.7427, "step": 14224 }, { "epoch": 0.9895996382482869, "grad_norm": 1.0390625, "learning_rate": 5.635744977262603e-07, "loss": 0.7589, "step": 14225 }, { "epoch": 0.9896692058854221, "grad_norm": 0.98046875, "learning_rate": 5.560358232344909e-07, "loss": 0.716, "step": 14226 }, { "epoch": 0.9897387735225573, "grad_norm": 1.25, "learning_rate": 5.485478954697643e-07, "loss": 1.0223, "step": 14227 }, { "epoch": 0.9898083411596925, "grad_norm": 1.171875, "learning_rate": 5.411107148119987e-07, "loss": 0.9527, "step": 14228 }, { "epoch": 0.9898779087968277, "grad_norm": 1.09375, "learning_rate": 5.337242816391142e-07, "loss": 0.7207, "step": 14229 }, { "epoch": 0.989947476433963, "grad_norm": 1.21875, "learning_rate": 5.263885963260329e-07, "loss": 0.8742, "step": 14230 }, { "epoch": 0.9900170440710981, "grad_norm": 1.2109375, "learning_rate": 5.191036592451237e-07, "loss": 0.6883, "step": 14231 }, { "epoch": 0.9900866117082333, "grad_norm": 1.1484375, "learning_rate": 5.11869470766424e-07, "loss": 0.8554, "step": 14232 }, { "epoch": 0.9901561793453685, "grad_norm": 1.234375, "learning_rate": 5.046860312571955e-07, "loss": 0.8186, "step": 14233 }, { "epoch": 0.9902257469825038, "grad_norm": 1.2265625, "learning_rate": 4.975533410821465e-07, "loss": 1.1046, "step": 14234 }, { "epoch": 0.9902953146196389, "grad_norm": 1.21875, "learning_rate": 4.904714006035427e-07, "loss": 0.7421, "step": 14235 }, { "epoch": 0.9903648822567741, "grad_norm": 1.1171875, "learning_rate": 4.834402101808743e-07, "loss": 0.7434, "step": 14236 }, { "epoch": 0.9904344498939094, "grad_norm": 1.4609375, "learning_rate": 4.7645977017118926e-07, "loss": 1.1464, "step": 14237 }, { "epoch": 0.9905040175310446, "grad_norm": 1.0, "learning_rate": 4.695300809288705e-07, "loss": 0.9736, "step": 14238 }, { "epoch": 0.9905735851681797, "grad_norm": 1.0703125, "learning_rate": 4.626511428058588e-07, "loss": 0.8671, "step": 14239 }, { "epoch": 0.990643152805315, "grad_norm": 1.171875, "learning_rate": 4.558229561513194e-07, "loss": 0.6504, "step": 14240 }, { "epoch": 0.9907127204424502, "grad_norm": 1.2890625, "learning_rate": 4.4904552131197485e-07, "loss": 0.8912, "step": 14241 }, { "epoch": 0.9907822880795853, "grad_norm": 1.1484375, "learning_rate": 4.423188386321053e-07, "loss": 0.6163, "step": 14242 }, { "epoch": 0.9908518557167206, "grad_norm": 1.1484375, "learning_rate": 4.356429084531044e-07, "loss": 0.8492, "step": 14243 }, { "epoch": 0.9909214233538558, "grad_norm": 0.9921875, "learning_rate": 4.2901773111392317e-07, "loss": 0.8083, "step": 14244 }, { "epoch": 0.990990990990991, "grad_norm": 1.1484375, "learning_rate": 4.2244330695107024e-07, "loss": 0.7564, "step": 14245 }, { "epoch": 0.9910605586281261, "grad_norm": 1.1953125, "learning_rate": 4.1591963629827867e-07, "loss": 0.8627, "step": 14246 }, { "epoch": 0.9911301262652614, "grad_norm": 1.15625, "learning_rate": 4.09446719486839e-07, "loss": 0.8946, "step": 14247 }, { "epoch": 0.9911996939023966, "grad_norm": 1.0, "learning_rate": 4.030245568453772e-07, "loss": 0.7983, "step": 14248 }, { "epoch": 0.9912692615395318, "grad_norm": 1.203125, "learning_rate": 3.966531486998548e-07, "loss": 0.8036, "step": 14249 }, { "epoch": 0.991338829176667, "grad_norm": 1.0859375, "learning_rate": 3.9033249537412384e-07, "loss": 0.8449, "step": 14250 }, { "epoch": 0.9914083968138022, "grad_norm": 1.0703125, "learning_rate": 3.8406259718881673e-07, "loss": 0.8014, "step": 14251 }, { "epoch": 0.9914779644509374, "grad_norm": 0.96484375, "learning_rate": 3.7784345446234545e-07, "loss": 0.7806, "step": 14252 }, { "epoch": 0.9915475320880727, "grad_norm": 1.015625, "learning_rate": 3.716750675104574e-07, "loss": 0.6997, "step": 14253 }, { "epoch": 0.9916170997252078, "grad_norm": 1.234375, "learning_rate": 3.6555743664645757e-07, "loss": 0.707, "step": 14254 }, { "epoch": 0.991686667362343, "grad_norm": 1.4296875, "learning_rate": 3.594905621809863e-07, "loss": 0.9648, "step": 14255 }, { "epoch": 0.9917562349994783, "grad_norm": 1.1484375, "learning_rate": 3.534744444220195e-07, "loss": 0.6445, "step": 14256 }, { "epoch": 0.9918258026366135, "grad_norm": 1.046875, "learning_rate": 3.4750908367497946e-07, "loss": 0.9463, "step": 14257 }, { "epoch": 0.9918953702737486, "grad_norm": 1.2265625, "learning_rate": 3.415944802428461e-07, "loss": 0.9668, "step": 14258 }, { "epoch": 0.9919649379108838, "grad_norm": 1.25, "learning_rate": 3.3573063442582376e-07, "loss": 0.9442, "step": 14259 }, { "epoch": 0.9920345055480191, "grad_norm": 1.265625, "learning_rate": 3.299175465217852e-07, "loss": 0.9371, "step": 14260 }, { "epoch": 0.9921040731851543, "grad_norm": 1.0625, "learning_rate": 3.241552168257167e-07, "loss": 0.8297, "step": 14261 }, { "epoch": 0.9921736408222894, "grad_norm": 1.3515625, "learning_rate": 3.1844364563038407e-07, "loss": 0.9003, "step": 14262 }, { "epoch": 0.9922432084594247, "grad_norm": 0.96875, "learning_rate": 3.127828332257776e-07, "loss": 0.8385, "step": 14263 }, { "epoch": 0.9923127760965599, "grad_norm": 1.0859375, "learning_rate": 3.07172779899223e-07, "loss": 0.8694, "step": 14264 }, { "epoch": 0.992382343733695, "grad_norm": 1.09375, "learning_rate": 3.016134859354924e-07, "loss": 0.7328, "step": 14265 }, { "epoch": 0.9924519113708303, "grad_norm": 1.3203125, "learning_rate": 2.961049516171377e-07, "loss": 0.911, "step": 14266 }, { "epoch": 0.9925214790079655, "grad_norm": 1.6171875, "learning_rate": 2.906471772236019e-07, "loss": 0.773, "step": 14267 }, { "epoch": 0.9925910466451007, "grad_norm": 1.0859375, "learning_rate": 2.852401630321078e-07, "loss": 0.7979, "step": 14268 }, { "epoch": 0.992660614282236, "grad_norm": 1.1015625, "learning_rate": 2.798839093172134e-07, "loss": 0.7917, "step": 14269 }, { "epoch": 0.9927301819193711, "grad_norm": 1.3359375, "learning_rate": 2.745784163508125e-07, "loss": 0.6746, "step": 14270 }, { "epoch": 0.9927997495565063, "grad_norm": 1.3125, "learning_rate": 2.693236844023561e-07, "loss": 0.9603, "step": 14271 }, { "epoch": 0.9928693171936415, "grad_norm": 1.296875, "learning_rate": 2.6411971373863086e-07, "loss": 0.9019, "step": 14272 }, { "epoch": 0.9929388848307767, "grad_norm": 1.4375, "learning_rate": 2.5896650462386985e-07, "loss": 0.8084, "step": 14273 }, { "epoch": 0.9930084524679119, "grad_norm": 1.1328125, "learning_rate": 2.5386405731964157e-07, "loss": 0.8645, "step": 14274 }, { "epoch": 0.9930780201050471, "grad_norm": 0.9453125, "learning_rate": 2.4881237208518313e-07, "loss": 0.7938, "step": 14275 }, { "epoch": 0.9931475877421824, "grad_norm": 1.125, "learning_rate": 2.4381144917695606e-07, "loss": 0.8516, "step": 14276 }, { "epoch": 0.9932171553793175, "grad_norm": 0.99609375, "learning_rate": 2.3886128884875737e-07, "loss": 0.743, "step": 14277 }, { "epoch": 0.9932867230164527, "grad_norm": 0.80078125, "learning_rate": 2.3396189135205248e-07, "loss": 0.7701, "step": 14278 }, { "epoch": 0.993356290653588, "grad_norm": 1.046875, "learning_rate": 2.2911325693553142e-07, "loss": 0.8914, "step": 14279 }, { "epoch": 0.9934258582907232, "grad_norm": 1.296875, "learning_rate": 2.2431538584544164e-07, "loss": 0.5938, "step": 14280 }, { "epoch": 0.9934954259278583, "grad_norm": 1.2265625, "learning_rate": 2.1956827832536608e-07, "loss": 0.8855, "step": 14281 }, { "epoch": 0.9935649935649936, "grad_norm": 1.109375, "learning_rate": 2.1487193461633415e-07, "loss": 0.8047, "step": 14282 }, { "epoch": 0.9936345612021288, "grad_norm": 1.1875, "learning_rate": 2.1022635495682174e-07, "loss": 0.9119, "step": 14283 }, { "epoch": 0.993704128839264, "grad_norm": 0.82421875, "learning_rate": 2.0563153958275128e-07, "loss": 0.8005, "step": 14284 }, { "epoch": 0.9937736964763991, "grad_norm": 1.015625, "learning_rate": 2.0108748872726956e-07, "loss": 0.9094, "step": 14285 }, { "epoch": 0.9938432641135344, "grad_norm": 0.91015625, "learning_rate": 1.9659420262130302e-07, "loss": 0.8076, "step": 14286 }, { "epoch": 0.9939128317506696, "grad_norm": 1.296875, "learning_rate": 1.9215168149289143e-07, "loss": 0.953, "step": 14287 }, { "epoch": 0.9939823993878047, "grad_norm": 1.1328125, "learning_rate": 1.8775992556752108e-07, "loss": 0.8002, "step": 14288 }, { "epoch": 0.99405196702494, "grad_norm": 1.1015625, "learning_rate": 1.8341893506834684e-07, "loss": 0.8113, "step": 14289 }, { "epoch": 0.9941215346620752, "grad_norm": 1.15625, "learning_rate": 1.7912871021574794e-07, "loss": 0.7923, "step": 14290 }, { "epoch": 0.9941911022992104, "grad_norm": 0.90234375, "learning_rate": 1.7488925122743916e-07, "loss": 0.8136, "step": 14291 }, { "epoch": 0.9942606699363457, "grad_norm": 1.203125, "learning_rate": 1.7070055831880372e-07, "loss": 0.7789, "step": 14292 }, { "epoch": 0.9943302375734808, "grad_norm": 1.015625, "learning_rate": 1.6656263170244934e-07, "loss": 0.9456, "step": 14293 }, { "epoch": 0.994399805210616, "grad_norm": 1.140625, "learning_rate": 1.6247547158854125e-07, "loss": 0.9678, "step": 14294 }, { "epoch": 0.9944693728477513, "grad_norm": 1.109375, "learning_rate": 1.5843907818458015e-07, "loss": 0.8439, "step": 14295 }, { "epoch": 0.9945389404848864, "grad_norm": 1.1171875, "learning_rate": 1.5445345169551316e-07, "loss": 0.7758, "step": 14296 }, { "epoch": 0.9946085081220216, "grad_norm": 1.046875, "learning_rate": 1.5051859232373398e-07, "loss": 0.7314, "step": 14297 }, { "epoch": 0.9946780757591568, "grad_norm": 1.2890625, "learning_rate": 1.4663450026897174e-07, "loss": 1.0071, "step": 14298 }, { "epoch": 0.9947476433962921, "grad_norm": 0.96484375, "learning_rate": 1.4280117572840202e-07, "loss": 0.6489, "step": 14299 }, { "epoch": 0.9948172110334272, "grad_norm": 1.0390625, "learning_rate": 1.3901861889686895e-07, "loss": 0.7407, "step": 14300 }, { "epoch": 0.9948867786705624, "grad_norm": 0.83203125, "learning_rate": 1.352868299662191e-07, "loss": 0.6603, "step": 14301 }, { "epoch": 0.9949563463076977, "grad_norm": 0.9296875, "learning_rate": 1.3160580912596753e-07, "loss": 0.8934, "step": 14302 }, { "epoch": 0.9950259139448329, "grad_norm": 1.640625, "learning_rate": 1.2797555656318682e-07, "loss": 0.9611, "step": 14303 }, { "epoch": 0.995095481581968, "grad_norm": 1.21875, "learning_rate": 1.2439607246195194e-07, "loss": 0.6547, "step": 14304 }, { "epoch": 0.9951650492191033, "grad_norm": 1.0390625, "learning_rate": 1.2086735700422846e-07, "loss": 0.9057, "step": 14305 }, { "epoch": 0.9952346168562385, "grad_norm": 1.109375, "learning_rate": 1.1738941036909535e-07, "loss": 0.9191, "step": 14306 }, { "epoch": 0.9953041844933737, "grad_norm": 1.515625, "learning_rate": 1.1396223273307804e-07, "loss": 0.7479, "step": 14307 }, { "epoch": 0.9953737521305089, "grad_norm": 0.9609375, "learning_rate": 1.1058582427025954e-07, "loss": 0.7768, "step": 14308 }, { "epoch": 0.9954433197676441, "grad_norm": 1.296875, "learning_rate": 1.0726018515216929e-07, "loss": 0.6844, "step": 14309 }, { "epoch": 0.9955128874047793, "grad_norm": 1.2265625, "learning_rate": 1.0398531554745017e-07, "loss": 0.8366, "step": 14310 }, { "epoch": 0.9955824550419144, "grad_norm": 1.6640625, "learning_rate": 1.0076121562263563e-07, "loss": 0.8562, "step": 14311 }, { "epoch": 0.9956520226790497, "grad_norm": 1.359375, "learning_rate": 9.758788554126152e-08, "loss": 0.812, "step": 14312 }, { "epoch": 0.9957215903161849, "grad_norm": 1.1484375, "learning_rate": 9.446532546442121e-08, "loss": 0.9063, "step": 14313 }, { "epoch": 0.9957911579533201, "grad_norm": 1.015625, "learning_rate": 9.139353555076557e-08, "loss": 0.688, "step": 14314 }, { "epoch": 0.9958607255904554, "grad_norm": 1.3828125, "learning_rate": 8.837251595628093e-08, "loss": 0.8599, "step": 14315 }, { "epoch": 0.9959302932275905, "grad_norm": 1.1796875, "learning_rate": 8.540226683428908e-08, "loss": 0.8422, "step": 14316 }, { "epoch": 0.9959998608647257, "grad_norm": 1.1015625, "learning_rate": 8.248278833566936e-08, "loss": 0.7857, "step": 14317 }, { "epoch": 0.996069428501861, "grad_norm": 0.90625, "learning_rate": 7.961408060852549e-08, "loss": 0.6352, "step": 14318 }, { "epoch": 0.9961389961389961, "grad_norm": 1.171875, "learning_rate": 7.679614379862976e-08, "loss": 0.8828, "step": 14319 }, { "epoch": 0.9962085637761313, "grad_norm": 1.40625, "learning_rate": 7.402897804908992e-08, "loss": 0.9948, "step": 14320 }, { "epoch": 0.9962781314132666, "grad_norm": 1.1640625, "learning_rate": 7.13125835003492e-08, "loss": 0.9744, "step": 14321 }, { "epoch": 0.9963476990504018, "grad_norm": 1.078125, "learning_rate": 6.864696029029727e-08, "loss": 0.6612, "step": 14322 }, { "epoch": 0.9964172666875369, "grad_norm": 1.140625, "learning_rate": 6.603210855438136e-08, "loss": 0.705, "step": 14323 }, { "epoch": 0.9964868343246721, "grad_norm": 1.4140625, "learning_rate": 6.34680284252731e-08, "loss": 0.9975, "step": 14324 }, { "epoch": 0.9965564019618074, "grad_norm": 1.109375, "learning_rate": 6.095472003320169e-08, "loss": 0.5625, "step": 14325 }, { "epoch": 0.9966259695989426, "grad_norm": 1.140625, "learning_rate": 5.849218350573171e-08, "loss": 0.9337, "step": 14326 }, { "epoch": 0.9966955372360777, "grad_norm": 1.03125, "learning_rate": 5.6080418968096346e-08, "loss": 0.665, "step": 14327 }, { "epoch": 0.996765104873213, "grad_norm": 0.9609375, "learning_rate": 5.371942654242012e-08, "loss": 0.8357, "step": 14328 }, { "epoch": 0.9968346725103482, "grad_norm": 1.1328125, "learning_rate": 5.14092063489402e-08, "loss": 0.7825, "step": 14329 }, { "epoch": 0.9969042401474834, "grad_norm": 1.046875, "learning_rate": 4.914975850467407e-08, "loss": 0.9078, "step": 14330 }, { "epoch": 0.9969738077846186, "grad_norm": 1.2421875, "learning_rate": 4.6941083124529824e-08, "loss": 1.0158, "step": 14331 }, { "epoch": 0.9970433754217538, "grad_norm": 1.0078125, "learning_rate": 4.478318032052897e-08, "loss": 0.8284, "step": 14332 }, { "epoch": 0.997112943058889, "grad_norm": 1.3671875, "learning_rate": 4.267605020236154e-08, "loss": 1.2325, "step": 14333 }, { "epoch": 0.9971825106960243, "grad_norm": 1.28125, "learning_rate": 4.061969287683098e-08, "loss": 0.8964, "step": 14334 }, { "epoch": 0.9972520783331594, "grad_norm": 0.84375, "learning_rate": 3.8614108448520316e-08, "loss": 0.7729, "step": 14335 }, { "epoch": 0.9973216459702946, "grad_norm": 1.046875, "learning_rate": 3.665929701923698e-08, "loss": 0.966, "step": 14336 }, { "epoch": 0.9973912136074298, "grad_norm": 0.8984375, "learning_rate": 3.475525868823493e-08, "loss": 0.5503, "step": 14337 }, { "epoch": 0.997460781244565, "grad_norm": 0.95703125, "learning_rate": 3.2901993552103546e-08, "loss": 0.7011, "step": 14338 }, { "epoch": 0.9975303488817002, "grad_norm": 1.078125, "learning_rate": 3.109950170498976e-08, "loss": 0.9463, "step": 14339 }, { "epoch": 0.9975999165188354, "grad_norm": 1.09375, "learning_rate": 2.934778323848697e-08, "loss": 0.7466, "step": 14340 }, { "epoch": 0.9976694841559707, "grad_norm": 1.140625, "learning_rate": 2.764683824141301e-08, "loss": 0.9995, "step": 14341 }, { "epoch": 0.9977390517931058, "grad_norm": 0.91796875, "learning_rate": 2.5996666800254253e-08, "loss": 0.6633, "step": 14342 }, { "epoch": 0.997808619430241, "grad_norm": 2.296875, "learning_rate": 2.4397268998721523e-08, "loss": 1.3606, "step": 14343 }, { "epoch": 0.9978781870673763, "grad_norm": 1.0703125, "learning_rate": 2.2848644917972116e-08, "loss": 0.6953, "step": 14344 }, { "epoch": 0.9979477547045115, "grad_norm": 1.21875, "learning_rate": 2.1350794636831872e-08, "loss": 0.9079, "step": 14345 }, { "epoch": 0.9980173223416466, "grad_norm": 1.2421875, "learning_rate": 1.990371823112902e-08, "loss": 0.9467, "step": 14346 }, { "epoch": 0.9980868899787819, "grad_norm": 1.109375, "learning_rate": 1.850741577447135e-08, "loss": 0.6985, "step": 14347 }, { "epoch": 0.9981564576159171, "grad_norm": 1.0, "learning_rate": 1.7161887337802108e-08, "loss": 0.6278, "step": 14348 }, { "epoch": 0.9982260252530523, "grad_norm": 1.5703125, "learning_rate": 1.586713298928899e-08, "loss": 0.6759, "step": 14349 }, { "epoch": 0.9982955928901874, "grad_norm": 1.15625, "learning_rate": 1.4623152794768224e-08, "loss": 0.6557, "step": 14350 }, { "epoch": 0.9983651605273227, "grad_norm": 1.390625, "learning_rate": 1.3429946817300476e-08, "loss": 0.7722, "step": 14351 }, { "epoch": 0.9984347281644579, "grad_norm": 1.0390625, "learning_rate": 1.2287515117725968e-08, "loss": 0.8084, "step": 14352 }, { "epoch": 0.9985042958015931, "grad_norm": 0.98046875, "learning_rate": 1.1195857753776295e-08, "loss": 0.7004, "step": 14353 }, { "epoch": 0.9985738634387283, "grad_norm": 0.9609375, "learning_rate": 1.0154974780962611e-08, "loss": 0.6947, "step": 14354 }, { "epoch": 0.9986434310758635, "grad_norm": 1.0234375, "learning_rate": 9.164866252242554e-09, "loss": 0.674, "step": 14355 }, { "epoch": 0.9987129987129987, "grad_norm": 1.09375, "learning_rate": 8.225532217687181e-09, "loss": 0.8365, "step": 14356 }, { "epoch": 0.998782566350134, "grad_norm": 1.1015625, "learning_rate": 7.336972725147106e-09, "loss": 0.6817, "step": 14357 }, { "epoch": 0.9988521339872691, "grad_norm": 1.015625, "learning_rate": 6.499187819808405e-09, "loss": 0.6803, "step": 14358 }, { "epoch": 0.9989217016244043, "grad_norm": 1.0390625, "learning_rate": 5.712177543970576e-09, "loss": 0.8999, "step": 14359 }, { "epoch": 0.9989912692615396, "grad_norm": 1.015625, "learning_rate": 4.975941937823691e-09, "loss": 0.7464, "step": 14360 }, { "epoch": 0.9990608368986748, "grad_norm": 1.8359375, "learning_rate": 4.290481038560223e-09, "loss": 0.9455, "step": 14361 }, { "epoch": 0.9991304045358099, "grad_norm": 1.0, "learning_rate": 3.655794881152197e-09, "loss": 0.7925, "step": 14362 }, { "epoch": 0.9991999721729451, "grad_norm": 1.0546875, "learning_rate": 3.0718834976850575e-09, "loss": 0.656, "step": 14363 }, { "epoch": 0.9992695398100804, "grad_norm": 1.2109375, "learning_rate": 2.538746917912782e-09, "loss": 0.6469, "step": 14364 }, { "epoch": 0.9993391074472155, "grad_norm": 1.265625, "learning_rate": 2.0563851688137903e-09, "loss": 0.8579, "step": 14365 }, { "epoch": 0.9994086750843507, "grad_norm": 0.9609375, "learning_rate": 1.624798274924011e-09, "loss": 0.7825, "step": 14366 }, { "epoch": 0.999478242721486, "grad_norm": 1.03125, "learning_rate": 1.24398625822586e-09, "loss": 0.834, "step": 14367 }, { "epoch": 0.9995478103586212, "grad_norm": 1.34375, "learning_rate": 9.139491379261955e-10, "loss": 1.0088, "step": 14368 }, { "epoch": 0.9996173779957563, "grad_norm": 1.359375, "learning_rate": 6.346869309004078e-10, "loss": 0.7419, "step": 14369 }, { "epoch": 0.9996869456328916, "grad_norm": 1.015625, "learning_rate": 4.06199651248329e-10, "loss": 0.6799, "step": 14370 }, { "epoch": 0.9997565132700268, "grad_norm": 1.046875, "learning_rate": 2.284873106273011e-10, "loss": 0.9677, "step": 14371 }, { "epoch": 0.999826080907162, "grad_norm": 0.98046875, "learning_rate": 1.0154991791910816e-10, "loss": 0.6741, "step": 14372 }, { "epoch": 0.9998956485442972, "grad_norm": 1.4453125, "learning_rate": 2.5387479785088375e-11, "loss": 0.7453, "step": 14373 }, { "epoch": 0.9999652161814324, "grad_norm": 1.171875, "learning_rate": 0.0, "loss": 1.1312, "step": 14374 } ], "logging_steps": 1, "max_steps": 14374, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7122538767292826e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }