{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.2107882485150103, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00032107882485150104, "grad_norm": 15.157858848571777, "learning_rate": 2.0000000000000002e-07, "loss": 2.4811, "step": 1 }, { "epoch": 0.0006421576497030021, "grad_norm": 15.802922248840332, "learning_rate": 4.0000000000000003e-07, "loss": 2.3901, "step": 2 }, { "epoch": 0.0009632364745545031, "grad_norm": 23.659759521484375, "learning_rate": 6.000000000000001e-07, "loss": 2.3375, "step": 3 }, { "epoch": 0.0012843152994060042, "grad_norm": 27.097450256347656, "learning_rate": 8.000000000000001e-07, "loss": 2.3807, "step": 4 }, { "epoch": 0.0016053941242575051, "grad_norm": 29.78221321105957, "learning_rate": 1.0000000000000002e-06, "loss": 2.4191, "step": 5 }, { "epoch": 0.0019264729491090063, "grad_norm": 33.7828254699707, "learning_rate": 1.2000000000000002e-06, "loss": 2.5859, "step": 6 }, { "epoch": 0.002247551773960507, "grad_norm": 43.44171142578125, "learning_rate": 1.4000000000000001e-06, "loss": 2.8257, "step": 7 }, { "epoch": 0.0025686305988120084, "grad_norm": 51.39571762084961, "learning_rate": 1.6000000000000001e-06, "loss": 3.0337, "step": 8 }, { "epoch": 0.0028897094236635095, "grad_norm": 54.60009002685547, "learning_rate": 1.8e-06, "loss": 3.1402, "step": 9 }, { "epoch": 0.0032107882485150102, "grad_norm": 57.4810791015625, "learning_rate": 2.0000000000000003e-06, "loss": 3.4821, "step": 10 }, { "epoch": 0.0035318670733665114, "grad_norm": 61.211952209472656, "learning_rate": 2.2e-06, "loss": 3.4304, "step": 11 }, { "epoch": 0.0038529458982180125, "grad_norm": 61.09406280517578, "learning_rate": 2.4000000000000003e-06, "loss": 3.5862, "step": 12 }, { "epoch": 0.004174024723069513, "grad_norm": 61.53642654418945, "learning_rate": 2.6e-06, "loss": 3.6255, "step": 13 }, { "epoch": 0.004495103547921014, "grad_norm": 58.33311080932617, "learning_rate": 2.8000000000000003e-06, "loss": 3.7706, "step": 14 }, { "epoch": 0.0048161823727725156, "grad_norm": 57.1992301940918, "learning_rate": 3e-06, "loss": 3.7195, "step": 15 }, { "epoch": 0.005137261197624017, "grad_norm": 55.674320220947266, "learning_rate": 3.2000000000000003e-06, "loss": 3.6768, "step": 16 }, { "epoch": 0.005458340022475518, "grad_norm": 54.121376037597656, "learning_rate": 3.4000000000000005e-06, "loss": 3.9861, "step": 17 }, { "epoch": 0.005779418847327019, "grad_norm": 52.377349853515625, "learning_rate": 3.6e-06, "loss": 3.8352, "step": 18 }, { "epoch": 0.00610049767217852, "grad_norm": 50.22018051147461, "learning_rate": 3.8e-06, "loss": 3.9499, "step": 19 }, { "epoch": 0.0064215764970300205, "grad_norm": 47.37348556518555, "learning_rate": 4.000000000000001e-06, "loss": 3.8506, "step": 20 }, { "epoch": 0.006742655321881522, "grad_norm": 44.72926712036133, "learning_rate": 4.2000000000000004e-06, "loss": 4.0215, "step": 21 }, { "epoch": 0.007063734146733023, "grad_norm": 44.81752014160156, "learning_rate": 4.4e-06, "loss": 3.6998, "step": 22 }, { "epoch": 0.007384812971584524, "grad_norm": 43.61850357055664, "learning_rate": 4.6e-06, "loss": 3.7522, "step": 23 }, { "epoch": 0.007705891796436025, "grad_norm": 45.95828628540039, "learning_rate": 4.800000000000001e-06, "loss": 3.816, "step": 24 }, { "epoch": 0.008026970621287526, "grad_norm": 49.80207443237305, "learning_rate": 5e-06, "loss": 4.0823, "step": 25 }, { "epoch": 0.008348049446139027, "grad_norm": 51.24964141845703, "learning_rate": 5.2e-06, "loss": 3.9784, "step": 26 }, { "epoch": 0.008669128270990529, "grad_norm": 54.43299102783203, "learning_rate": 5.4e-06, "loss": 3.9404, "step": 27 }, { "epoch": 0.008990207095842029, "grad_norm": 55.02586364746094, "learning_rate": 5.600000000000001e-06, "loss": 3.917, "step": 28 }, { "epoch": 0.00931128592069353, "grad_norm": 55.936119079589844, "learning_rate": 5.8e-06, "loss": 3.852, "step": 29 }, { "epoch": 0.009632364745545031, "grad_norm": 53.62590789794922, "learning_rate": 6e-06, "loss": 3.8488, "step": 30 }, { "epoch": 0.009953443570396533, "grad_norm": 50.38666534423828, "learning_rate": 6.2e-06, "loss": 3.3054, "step": 31 }, { "epoch": 0.010274522395248033, "grad_norm": 49.8444709777832, "learning_rate": 6.4000000000000006e-06, "loss": 3.6827, "step": 32 }, { "epoch": 0.010595601220099534, "grad_norm": 46.34492111206055, "learning_rate": 6.6e-06, "loss": 3.7251, "step": 33 }, { "epoch": 0.010916680044951036, "grad_norm": 45.03758239746094, "learning_rate": 6.800000000000001e-06, "loss": 3.6144, "step": 34 }, { "epoch": 0.011237758869802536, "grad_norm": 40.92512893676758, "learning_rate": 7.000000000000001e-06, "loss": 3.3677, "step": 35 }, { "epoch": 0.011558837694654038, "grad_norm": 36.829925537109375, "learning_rate": 7.2e-06, "loss": 3.1936, "step": 36 }, { "epoch": 0.011879916519505538, "grad_norm": 40.46910858154297, "learning_rate": 7.4e-06, "loss": 3.4061, "step": 37 }, { "epoch": 0.01220099534435704, "grad_norm": 54.956382751464844, "learning_rate": 7.6e-06, "loss": 3.5662, "step": 38 }, { "epoch": 0.01252207416920854, "grad_norm": 57.516902923583984, "learning_rate": 7.8e-06, "loss": 3.0622, "step": 39 }, { "epoch": 0.012843152994060041, "grad_norm": 61.08616256713867, "learning_rate": 8.000000000000001e-06, "loss": 3.5104, "step": 40 }, { "epoch": 0.013164231818911543, "grad_norm": 58.04061508178711, "learning_rate": 8.200000000000001e-06, "loss": 2.9037, "step": 41 }, { "epoch": 0.013485310643763043, "grad_norm": 56.112709045410156, "learning_rate": 8.400000000000001e-06, "loss": 2.8564, "step": 42 }, { "epoch": 0.013806389468614545, "grad_norm": 50.86652755737305, "learning_rate": 8.599999999999999e-06, "loss": 3.1863, "step": 43 }, { "epoch": 0.014127468293466046, "grad_norm": 41.73533630371094, "learning_rate": 8.8e-06, "loss": 3.0879, "step": 44 }, { "epoch": 0.014448547118317548, "grad_norm": 36.46621322631836, "learning_rate": 9e-06, "loss": 3.1359, "step": 45 }, { "epoch": 0.014769625943169048, "grad_norm": 28.671815872192383, "learning_rate": 9.2e-06, "loss": 2.9814, "step": 46 }, { "epoch": 0.01509070476802055, "grad_norm": 24.950931549072266, "learning_rate": 9.4e-06, "loss": 2.8951, "step": 47 }, { "epoch": 0.01541178359287205, "grad_norm": 27.621028900146484, "learning_rate": 9.600000000000001e-06, "loss": 2.6941, "step": 48 }, { "epoch": 0.01573286241772355, "grad_norm": 27.553085327148438, "learning_rate": 9.800000000000001e-06, "loss": 2.5166, "step": 49 }, { "epoch": 0.016053941242575052, "grad_norm": 26.491987228393555, "learning_rate": 1e-05, "loss": 2.6277, "step": 50 }, { "epoch": 0.016375020067426554, "grad_norm": 5.4101104736328125, "learning_rate": 1.02e-05, "loss": 1.925, "step": 51 }, { "epoch": 0.016696098892278053, "grad_norm": 4.891691207885742, "learning_rate": 1.04e-05, "loss": 1.8656, "step": 52 }, { "epoch": 0.017017177717129555, "grad_norm": 5.655025482177734, "learning_rate": 1.06e-05, "loss": 1.66, "step": 53 }, { "epoch": 0.017338256541981057, "grad_norm": 6.683502674102783, "learning_rate": 1.08e-05, "loss": 1.6638, "step": 54 }, { "epoch": 0.01765933536683256, "grad_norm": 6.518195152282715, "learning_rate": 1.1000000000000001e-05, "loss": 1.6495, "step": 55 }, { "epoch": 0.017980414191684058, "grad_norm": 6.019643783569336, "learning_rate": 1.1200000000000001e-05, "loss": 1.7143, "step": 56 }, { "epoch": 0.01830149301653556, "grad_norm": 5.8600053787231445, "learning_rate": 1.1400000000000001e-05, "loss": 1.3576, "step": 57 }, { "epoch": 0.01862257184138706, "grad_norm": 6.502221584320068, "learning_rate": 1.16e-05, "loss": 1.4601, "step": 58 }, { "epoch": 0.01894365066623856, "grad_norm": 5.659090995788574, "learning_rate": 1.18e-05, "loss": 1.4309, "step": 59 }, { "epoch": 0.019264729491090062, "grad_norm": 5.396006107330322, "learning_rate": 1.2e-05, "loss": 1.6735, "step": 60 }, { "epoch": 0.019585808315941564, "grad_norm": 5.009734630584717, "learning_rate": 1.22e-05, "loss": 1.5251, "step": 61 }, { "epoch": 0.019906887140793066, "grad_norm": 5.51364278793335, "learning_rate": 1.24e-05, "loss": 1.8121, "step": 62 }, { "epoch": 0.020227965965644565, "grad_norm": 4.808631420135498, "learning_rate": 1.2600000000000001e-05, "loss": 1.5659, "step": 63 }, { "epoch": 0.020549044790496067, "grad_norm": 3.892801523208618, "learning_rate": 1.2800000000000001e-05, "loss": 1.6308, "step": 64 }, { "epoch": 0.02087012361534757, "grad_norm": 3.904027223587036, "learning_rate": 1.3000000000000001e-05, "loss": 1.5642, "step": 65 }, { "epoch": 0.021191202440199067, "grad_norm": 4.057150840759277, "learning_rate": 1.32e-05, "loss": 1.4334, "step": 66 }, { "epoch": 0.02151228126505057, "grad_norm": 4.412062168121338, "learning_rate": 1.3400000000000002e-05, "loss": 1.5825, "step": 67 }, { "epoch": 0.02183336008990207, "grad_norm": 3.494084358215332, "learning_rate": 1.3600000000000002e-05, "loss": 1.5724, "step": 68 }, { "epoch": 0.022154438914753573, "grad_norm": 4.483047008514404, "learning_rate": 1.3800000000000002e-05, "loss": 1.688, "step": 69 }, { "epoch": 0.022475517739605072, "grad_norm": 3.417116165161133, "learning_rate": 1.4000000000000001e-05, "loss": 1.5698, "step": 70 }, { "epoch": 0.022796596564456574, "grad_norm": 6.7289838790893555, "learning_rate": 1.42e-05, "loss": 1.6379, "step": 71 }, { "epoch": 0.023117675389308076, "grad_norm": 3.190251111984253, "learning_rate": 1.44e-05, "loss": 1.4405, "step": 72 }, { "epoch": 0.023438754214159575, "grad_norm": 3.8525373935699463, "learning_rate": 1.4599999999999999e-05, "loss": 1.5232, "step": 73 }, { "epoch": 0.023759833039011077, "grad_norm": 3.5374369621276855, "learning_rate": 1.48e-05, "loss": 1.7238, "step": 74 }, { "epoch": 0.02408091186386258, "grad_norm": 5.770689964294434, "learning_rate": 1.5e-05, "loss": 1.5226, "step": 75 }, { "epoch": 0.02440199068871408, "grad_norm": 4.280745506286621, "learning_rate": 1.52e-05, "loss": 1.6567, "step": 76 }, { "epoch": 0.02472306951356558, "grad_norm": 3.3552117347717285, "learning_rate": 1.54e-05, "loss": 1.4622, "step": 77 }, { "epoch": 0.02504414833841708, "grad_norm": 4.613933563232422, "learning_rate": 1.56e-05, "loss": 1.612, "step": 78 }, { "epoch": 0.025365227163268583, "grad_norm": 4.299830913543701, "learning_rate": 1.58e-05, "loss": 1.7098, "step": 79 }, { "epoch": 0.025686305988120082, "grad_norm": 3.547947406768799, "learning_rate": 1.6000000000000003e-05, "loss": 1.4593, "step": 80 }, { "epoch": 0.026007384812971584, "grad_norm": 3.5630974769592285, "learning_rate": 1.62e-05, "loss": 1.6422, "step": 81 }, { "epoch": 0.026328463637823086, "grad_norm": 3.984318256378174, "learning_rate": 1.6400000000000002e-05, "loss": 1.5161, "step": 82 }, { "epoch": 0.026649542462674588, "grad_norm": 4.502562046051025, "learning_rate": 1.66e-05, "loss": 1.7414, "step": 83 }, { "epoch": 0.026970621287526086, "grad_norm": 3.2806875705718994, "learning_rate": 1.6800000000000002e-05, "loss": 1.4668, "step": 84 }, { "epoch": 0.02729170011237759, "grad_norm": 3.762796401977539, "learning_rate": 1.7000000000000003e-05, "loss": 1.6335, "step": 85 }, { "epoch": 0.02761277893722909, "grad_norm": 3.4556496143341064, "learning_rate": 1.7199999999999998e-05, "loss": 1.6127, "step": 86 }, { "epoch": 0.027933857762080593, "grad_norm": 3.3840537071228027, "learning_rate": 1.74e-05, "loss": 1.4251, "step": 87 }, { "epoch": 0.02825493658693209, "grad_norm": 3.363680601119995, "learning_rate": 1.76e-05, "loss": 1.6259, "step": 88 }, { "epoch": 0.028576015411783593, "grad_norm": 3.2910609245300293, "learning_rate": 1.78e-05, "loss": 1.5546, "step": 89 }, { "epoch": 0.028897094236635095, "grad_norm": 2.9161438941955566, "learning_rate": 1.8e-05, "loss": 1.2032, "step": 90 }, { "epoch": 0.029218173061486594, "grad_norm": 3.4742963314056396, "learning_rate": 1.8200000000000002e-05, "loss": 1.5664, "step": 91 }, { "epoch": 0.029539251886338096, "grad_norm": 3.563717842102051, "learning_rate": 1.84e-05, "loss": 1.4364, "step": 92 }, { "epoch": 0.029860330711189598, "grad_norm": 2.6174569129943848, "learning_rate": 1.86e-05, "loss": 1.4534, "step": 93 }, { "epoch": 0.0301814095360411, "grad_norm": 5.107251167297363, "learning_rate": 1.88e-05, "loss": 1.4267, "step": 94 }, { "epoch": 0.0305024883608926, "grad_norm": 3.532844066619873, "learning_rate": 1.9e-05, "loss": 1.5197, "step": 95 }, { "epoch": 0.0308235671857441, "grad_norm": 3.5595579147338867, "learning_rate": 1.9200000000000003e-05, "loss": 1.3465, "step": 96 }, { "epoch": 0.031144646010595602, "grad_norm": 3.7405896186828613, "learning_rate": 1.94e-05, "loss": 1.4418, "step": 97 }, { "epoch": 0.0314657248354471, "grad_norm": 3.423434257507324, "learning_rate": 1.9600000000000002e-05, "loss": 1.3589, "step": 98 }, { "epoch": 0.0317868036602986, "grad_norm": 7.2687602043151855, "learning_rate": 1.9800000000000004e-05, "loss": 1.5237, "step": 99 }, { "epoch": 0.032107882485150105, "grad_norm": 9.787822723388672, "learning_rate": 2e-05, "loss": 1.3776, "step": 100 }, { "epoch": 0.03242896131000161, "grad_norm": 3.462364435195923, "learning_rate": 2.0200000000000003e-05, "loss": 1.921, "step": 101 }, { "epoch": 0.03275004013485311, "grad_norm": 3.6257283687591553, "learning_rate": 2.04e-05, "loss": 1.7884, "step": 102 }, { "epoch": 0.033071118959704604, "grad_norm": 3.9720280170440674, "learning_rate": 2.06e-05, "loss": 1.5258, "step": 103 }, { "epoch": 0.033392197784556106, "grad_norm": 3.6847167015075684, "learning_rate": 2.08e-05, "loss": 1.2969, "step": 104 }, { "epoch": 0.03371327660940761, "grad_norm": 4.045772075653076, "learning_rate": 2.1e-05, "loss": 1.4464, "step": 105 }, { "epoch": 0.03403435543425911, "grad_norm": 3.264374017715454, "learning_rate": 2.12e-05, "loss": 1.2651, "step": 106 }, { "epoch": 0.03435543425911061, "grad_norm": 2.960892677307129, "learning_rate": 2.1400000000000002e-05, "loss": 1.1481, "step": 107 }, { "epoch": 0.034676513083962114, "grad_norm": 2.9885120391845703, "learning_rate": 2.16e-05, "loss": 1.367, "step": 108 }, { "epoch": 0.034997591908813616, "grad_norm": 3.1829378604888916, "learning_rate": 2.18e-05, "loss": 1.3112, "step": 109 }, { "epoch": 0.03531867073366512, "grad_norm": 2.956650733947754, "learning_rate": 2.2000000000000003e-05, "loss": 1.4058, "step": 110 }, { "epoch": 0.03563974955851661, "grad_norm": 2.93878436088562, "learning_rate": 2.22e-05, "loss": 1.6511, "step": 111 }, { "epoch": 0.035960828383368115, "grad_norm": 4.111947059631348, "learning_rate": 2.2400000000000002e-05, "loss": 1.2943, "step": 112 }, { "epoch": 0.03628190720821962, "grad_norm": 3.833181142807007, "learning_rate": 2.26e-05, "loss": 1.3733, "step": 113 }, { "epoch": 0.03660298603307112, "grad_norm": 2.5650758743286133, "learning_rate": 2.2800000000000002e-05, "loss": 1.4837, "step": 114 }, { "epoch": 0.03692406485792262, "grad_norm": 2.7790398597717285, "learning_rate": 2.3000000000000003e-05, "loss": 1.3758, "step": 115 }, { "epoch": 0.03724514368277412, "grad_norm": 2.0799965858459473, "learning_rate": 2.32e-05, "loss": 1.2325, "step": 116 }, { "epoch": 0.037566222507625625, "grad_norm": 2.8642420768737793, "learning_rate": 2.3400000000000003e-05, "loss": 1.422, "step": 117 }, { "epoch": 0.03788730133247712, "grad_norm": 2.840057373046875, "learning_rate": 2.36e-05, "loss": 1.3231, "step": 118 }, { "epoch": 0.03820838015732862, "grad_norm": 2.1735825538635254, "learning_rate": 2.38e-05, "loss": 1.441, "step": 119 }, { "epoch": 0.038529458982180124, "grad_norm": 2.365070343017578, "learning_rate": 2.4e-05, "loss": 1.3248, "step": 120 }, { "epoch": 0.038850537807031627, "grad_norm": 2.179810047149658, "learning_rate": 2.4200000000000002e-05, "loss": 1.4982, "step": 121 }, { "epoch": 0.03917161663188313, "grad_norm": 2.4042208194732666, "learning_rate": 2.44e-05, "loss": 1.6785, "step": 122 }, { "epoch": 0.03949269545673463, "grad_norm": 2.441134452819824, "learning_rate": 2.46e-05, "loss": 1.4426, "step": 123 }, { "epoch": 0.03981377428158613, "grad_norm": 3.4666857719421387, "learning_rate": 2.48e-05, "loss": 1.4485, "step": 124 }, { "epoch": 0.04013485310643763, "grad_norm": 2.5980589389801025, "learning_rate": 2.5e-05, "loss": 1.3558, "step": 125 }, { "epoch": 0.04045593193128913, "grad_norm": 3.334627628326416, "learning_rate": 2.5200000000000003e-05, "loss": 1.5553, "step": 126 }, { "epoch": 0.04077701075614063, "grad_norm": 2.676223039627075, "learning_rate": 2.54e-05, "loss": 1.4527, "step": 127 }, { "epoch": 0.041098089580992134, "grad_norm": 2.7710494995117188, "learning_rate": 2.5600000000000002e-05, "loss": 1.5945, "step": 128 }, { "epoch": 0.041419168405843636, "grad_norm": 2.559156894683838, "learning_rate": 2.58e-05, "loss": 1.3244, "step": 129 }, { "epoch": 0.04174024723069514, "grad_norm": 2.5117619037628174, "learning_rate": 2.6000000000000002e-05, "loss": 1.5478, "step": 130 }, { "epoch": 0.04206132605554664, "grad_norm": 2.3930675983428955, "learning_rate": 2.6200000000000003e-05, "loss": 1.3329, "step": 131 }, { "epoch": 0.042382404880398135, "grad_norm": 2.4628591537475586, "learning_rate": 2.64e-05, "loss": 1.4671, "step": 132 }, { "epoch": 0.04270348370524964, "grad_norm": 1.9905048608779907, "learning_rate": 2.6600000000000003e-05, "loss": 1.482, "step": 133 }, { "epoch": 0.04302456253010114, "grad_norm": 2.3793320655822754, "learning_rate": 2.6800000000000004e-05, "loss": 1.4634, "step": 134 }, { "epoch": 0.04334564135495264, "grad_norm": 2.54632830619812, "learning_rate": 2.7000000000000002e-05, "loss": 1.2852, "step": 135 }, { "epoch": 0.04366672017980414, "grad_norm": 3.641115188598633, "learning_rate": 2.7200000000000004e-05, "loss": 1.369, "step": 136 }, { "epoch": 0.043987799004655645, "grad_norm": 2.443338394165039, "learning_rate": 2.7400000000000002e-05, "loss": 1.3057, "step": 137 }, { "epoch": 0.04430887782950715, "grad_norm": 3.577340602874756, "learning_rate": 2.7600000000000003e-05, "loss": 1.3757, "step": 138 }, { "epoch": 0.04462995665435864, "grad_norm": 2.4298670291900635, "learning_rate": 2.7800000000000005e-05, "loss": 1.2701, "step": 139 }, { "epoch": 0.044951035479210144, "grad_norm": 2.5725412368774414, "learning_rate": 2.8000000000000003e-05, "loss": 1.3694, "step": 140 }, { "epoch": 0.045272114304061646, "grad_norm": 2.3307549953460693, "learning_rate": 2.8199999999999998e-05, "loss": 1.3261, "step": 141 }, { "epoch": 0.04559319312891315, "grad_norm": 3.738875389099121, "learning_rate": 2.84e-05, "loss": 1.3514, "step": 142 }, { "epoch": 0.04591427195376465, "grad_norm": 2.905665397644043, "learning_rate": 2.86e-05, "loss": 1.0867, "step": 143 }, { "epoch": 0.04623535077861615, "grad_norm": 3.744802713394165, "learning_rate": 2.88e-05, "loss": 1.1365, "step": 144 }, { "epoch": 0.046556429603467654, "grad_norm": 3.559023141860962, "learning_rate": 2.9e-05, "loss": 1.2399, "step": 145 }, { "epoch": 0.04687750842831915, "grad_norm": 3.565185070037842, "learning_rate": 2.9199999999999998e-05, "loss": 1.2912, "step": 146 }, { "epoch": 0.04719858725317065, "grad_norm": 3.949876308441162, "learning_rate": 2.94e-05, "loss": 1.1523, "step": 147 }, { "epoch": 0.04751966607802215, "grad_norm": 3.7057857513427734, "learning_rate": 2.96e-05, "loss": 1.1759, "step": 148 }, { "epoch": 0.047840744902873655, "grad_norm": 2.8545663356781006, "learning_rate": 2.98e-05, "loss": 1.058, "step": 149 }, { "epoch": 0.04816182372772516, "grad_norm": 5.648036003112793, "learning_rate": 3e-05, "loss": 1.1387, "step": 150 }, { "epoch": 0.04848290255257666, "grad_norm": 2.1492836475372314, "learning_rate": 3.02e-05, "loss": 1.6236, "step": 151 }, { "epoch": 0.04880398137742816, "grad_norm": 2.80293607711792, "learning_rate": 3.04e-05, "loss": 1.7931, "step": 152 }, { "epoch": 0.049125060202279656, "grad_norm": 2.3928093910217285, "learning_rate": 3.06e-05, "loss": 1.2039, "step": 153 }, { "epoch": 0.04944613902713116, "grad_norm": 2.5905890464782715, "learning_rate": 3.08e-05, "loss": 1.1153, "step": 154 }, { "epoch": 0.04976721785198266, "grad_norm": 3.102632761001587, "learning_rate": 3.1e-05, "loss": 1.2945, "step": 155 }, { "epoch": 0.05008829667683416, "grad_norm": 3.5045828819274902, "learning_rate": 3.12e-05, "loss": 1.15, "step": 156 }, { "epoch": 0.050409375501685665, "grad_norm": 3.210549831390381, "learning_rate": 3.1400000000000004e-05, "loss": 1.2008, "step": 157 }, { "epoch": 0.05073045432653717, "grad_norm": 2.5296566486358643, "learning_rate": 3.16e-05, "loss": 1.2581, "step": 158 }, { "epoch": 0.05105153315138867, "grad_norm": 3.071568727493286, "learning_rate": 3.18e-05, "loss": 0.9904, "step": 159 }, { "epoch": 0.051372611976240164, "grad_norm": 2.763911724090576, "learning_rate": 3.2000000000000005e-05, "loss": 1.12, "step": 160 }, { "epoch": 0.051693690801091666, "grad_norm": 2.2567617893218994, "learning_rate": 3.2200000000000003e-05, "loss": 1.2534, "step": 161 }, { "epoch": 0.05201476962594317, "grad_norm": 2.49045991897583, "learning_rate": 3.24e-05, "loss": 1.2713, "step": 162 }, { "epoch": 0.05233584845079467, "grad_norm": 3.3164381980895996, "learning_rate": 3.26e-05, "loss": 1.1137, "step": 163 }, { "epoch": 0.05265692727564617, "grad_norm": 2.6513657569885254, "learning_rate": 3.2800000000000004e-05, "loss": 1.3505, "step": 164 }, { "epoch": 0.052978006100497674, "grad_norm": 2.629395008087158, "learning_rate": 3.3e-05, "loss": 1.206, "step": 165 }, { "epoch": 0.053299084925349176, "grad_norm": 3.333925485610962, "learning_rate": 3.32e-05, "loss": 1.1088, "step": 166 }, { "epoch": 0.05362016375020067, "grad_norm": 2.59550142288208, "learning_rate": 3.3400000000000005e-05, "loss": 1.2533, "step": 167 }, { "epoch": 0.05394124257505217, "grad_norm": 2.676664352416992, "learning_rate": 3.3600000000000004e-05, "loss": 1.2916, "step": 168 }, { "epoch": 0.054262321399903675, "grad_norm": 2.5266637802124023, "learning_rate": 3.38e-05, "loss": 1.3359, "step": 169 }, { "epoch": 0.05458340022475518, "grad_norm": 2.394808292388916, "learning_rate": 3.4000000000000007e-05, "loss": 1.0293, "step": 170 }, { "epoch": 0.05490447904960668, "grad_norm": 3.7004685401916504, "learning_rate": 3.4200000000000005e-05, "loss": 1.224, "step": 171 }, { "epoch": 0.05522555787445818, "grad_norm": 2.8753304481506348, "learning_rate": 3.4399999999999996e-05, "loss": 1.2212, "step": 172 }, { "epoch": 0.05554663669930968, "grad_norm": 2.939927101135254, "learning_rate": 3.46e-05, "loss": 1.4577, "step": 173 }, { "epoch": 0.055867715524161185, "grad_norm": 2.922861099243164, "learning_rate": 3.48e-05, "loss": 1.2349, "step": 174 }, { "epoch": 0.05618879434901268, "grad_norm": 2.545013427734375, "learning_rate": 3.5e-05, "loss": 1.0631, "step": 175 }, { "epoch": 0.05650987317386418, "grad_norm": 3.1497952938079834, "learning_rate": 3.52e-05, "loss": 1.2337, "step": 176 }, { "epoch": 0.056830951998715684, "grad_norm": 2.5342869758605957, "learning_rate": 3.54e-05, "loss": 1.2889, "step": 177 }, { "epoch": 0.057152030823567186, "grad_norm": 2.435774087905884, "learning_rate": 3.56e-05, "loss": 1.4616, "step": 178 }, { "epoch": 0.05747310964841869, "grad_norm": 1.7864798307418823, "learning_rate": 3.58e-05, "loss": 1.0572, "step": 179 }, { "epoch": 0.05779418847327019, "grad_norm": 1.8739286661148071, "learning_rate": 3.6e-05, "loss": 1.227, "step": 180 }, { "epoch": 0.05811526729812169, "grad_norm": 3.2398200035095215, "learning_rate": 3.62e-05, "loss": 1.2632, "step": 181 }, { "epoch": 0.05843634612297319, "grad_norm": 2.340625047683716, "learning_rate": 3.6400000000000004e-05, "loss": 1.2501, "step": 182 }, { "epoch": 0.05875742494782469, "grad_norm": 2.0247950553894043, "learning_rate": 3.66e-05, "loss": 1.3319, "step": 183 }, { "epoch": 0.05907850377267619, "grad_norm": 2.264277935028076, "learning_rate": 3.68e-05, "loss": 1.2761, "step": 184 }, { "epoch": 0.05939958259752769, "grad_norm": 1.971635341644287, "learning_rate": 3.7e-05, "loss": 0.9021, "step": 185 }, { "epoch": 0.059720661422379195, "grad_norm": 1.713057518005371, "learning_rate": 3.72e-05, "loss": 1.2372, "step": 186 }, { "epoch": 0.0600417402472307, "grad_norm": 3.2709195613861084, "learning_rate": 3.74e-05, "loss": 1.2062, "step": 187 }, { "epoch": 0.0603628190720822, "grad_norm": 1.7991886138916016, "learning_rate": 3.76e-05, "loss": 1.0789, "step": 188 }, { "epoch": 0.060683897896933695, "grad_norm": 3.217481851577759, "learning_rate": 3.7800000000000004e-05, "loss": 1.2653, "step": 189 }, { "epoch": 0.0610049767217852, "grad_norm": 1.5862120389938354, "learning_rate": 3.8e-05, "loss": 1.2394, "step": 190 }, { "epoch": 0.0613260555466367, "grad_norm": 1.8591636419296265, "learning_rate": 3.82e-05, "loss": 1.2675, "step": 191 }, { "epoch": 0.0616471343714882, "grad_norm": 1.6677289009094238, "learning_rate": 3.8400000000000005e-05, "loss": 1.0502, "step": 192 }, { "epoch": 0.0619682131963397, "grad_norm": 2.0882251262664795, "learning_rate": 3.86e-05, "loss": 0.9488, "step": 193 }, { "epoch": 0.062289292021191205, "grad_norm": 1.579268217086792, "learning_rate": 3.88e-05, "loss": 1.1667, "step": 194 }, { "epoch": 0.0626103708460427, "grad_norm": 1.7781903743743896, "learning_rate": 3.9000000000000006e-05, "loss": 1.238, "step": 195 }, { "epoch": 0.0629314496708942, "grad_norm": 2.556015729904175, "learning_rate": 3.9200000000000004e-05, "loss": 1.0398, "step": 196 }, { "epoch": 0.06325252849574571, "grad_norm": 1.9498616456985474, "learning_rate": 3.94e-05, "loss": 1.0116, "step": 197 }, { "epoch": 0.0635736073205972, "grad_norm": 3.5160417556762695, "learning_rate": 3.960000000000001e-05, "loss": 0.8621, "step": 198 }, { "epoch": 0.0638946861454487, "grad_norm": 3.2988150119781494, "learning_rate": 3.9800000000000005e-05, "loss": 0.836, "step": 199 }, { "epoch": 0.06421576497030021, "grad_norm": 3.2342076301574707, "learning_rate": 4e-05, "loss": 0.7853, "step": 200 }, { "epoch": 0.0645368437951517, "grad_norm": 3.1116089820861816, "learning_rate": 4.02e-05, "loss": 1.5181, "step": 201 }, { "epoch": 0.06485792262000321, "grad_norm": 4.22519063949585, "learning_rate": 4.0400000000000006e-05, "loss": 1.5198, "step": 202 }, { "epoch": 0.06517900144485471, "grad_norm": 3.6451752185821533, "learning_rate": 4.0600000000000004e-05, "loss": 1.1846, "step": 203 }, { "epoch": 0.06550008026970622, "grad_norm": 2.907416582107544, "learning_rate": 4.08e-05, "loss": 1.1008, "step": 204 }, { "epoch": 0.06582115909455771, "grad_norm": 3.7612717151641846, "learning_rate": 4.1e-05, "loss": 1.0514, "step": 205 }, { "epoch": 0.06614223791940921, "grad_norm": 2.8270351886749268, "learning_rate": 4.12e-05, "loss": 1.1469, "step": 206 }, { "epoch": 0.06646331674426072, "grad_norm": 2.9322104454040527, "learning_rate": 4.14e-05, "loss": 1.1255, "step": 207 }, { "epoch": 0.06678439556911221, "grad_norm": 2.4711625576019287, "learning_rate": 4.16e-05, "loss": 1.0622, "step": 208 }, { "epoch": 0.06710547439396372, "grad_norm": 1.7461209297180176, "learning_rate": 4.18e-05, "loss": 1.1028, "step": 209 }, { "epoch": 0.06742655321881522, "grad_norm": 2.2536096572875977, "learning_rate": 4.2e-05, "loss": 1.0743, "step": 210 }, { "epoch": 0.06774763204366673, "grad_norm": 2.037104606628418, "learning_rate": 4.22e-05, "loss": 1.2257, "step": 211 }, { "epoch": 0.06806871086851822, "grad_norm": 2.400221586227417, "learning_rate": 4.24e-05, "loss": 1.2784, "step": 212 }, { "epoch": 0.06838978969336973, "grad_norm": 2.0338759422302246, "learning_rate": 4.26e-05, "loss": 1.1648, "step": 213 }, { "epoch": 0.06871086851822122, "grad_norm": 3.6578612327575684, "learning_rate": 4.2800000000000004e-05, "loss": 1.1226, "step": 214 }, { "epoch": 0.06903194734307272, "grad_norm": 2.1461496353149414, "learning_rate": 4.3e-05, "loss": 1.0823, "step": 215 }, { "epoch": 0.06935302616792423, "grad_norm": 1.87822687625885, "learning_rate": 4.32e-05, "loss": 1.1413, "step": 216 }, { "epoch": 0.06967410499277572, "grad_norm": 1.7954866886138916, "learning_rate": 4.3400000000000005e-05, "loss": 1.2208, "step": 217 }, { "epoch": 0.06999518381762723, "grad_norm": 2.6523921489715576, "learning_rate": 4.36e-05, "loss": 1.0858, "step": 218 }, { "epoch": 0.07031626264247873, "grad_norm": 2.4885449409484863, "learning_rate": 4.38e-05, "loss": 1.0251, "step": 219 }, { "epoch": 0.07063734146733024, "grad_norm": 2.9225237369537354, "learning_rate": 4.4000000000000006e-05, "loss": 1.2586, "step": 220 }, { "epoch": 0.07095842029218173, "grad_norm": 2.579859495162964, "learning_rate": 4.4200000000000004e-05, "loss": 1.2634, "step": 221 }, { "epoch": 0.07127949911703323, "grad_norm": 3.152012586593628, "learning_rate": 4.44e-05, "loss": 1.0579, "step": 222 }, { "epoch": 0.07160057794188474, "grad_norm": 1.843085765838623, "learning_rate": 4.46e-05, "loss": 1.0507, "step": 223 }, { "epoch": 0.07192165676673623, "grad_norm": 2.062282085418701, "learning_rate": 4.4800000000000005e-05, "loss": 1.3025, "step": 224 }, { "epoch": 0.07224273559158774, "grad_norm": 1.8540760278701782, "learning_rate": 4.5e-05, "loss": 1.0763, "step": 225 }, { "epoch": 0.07256381441643923, "grad_norm": 2.1458258628845215, "learning_rate": 4.52e-05, "loss": 1.1163, "step": 226 }, { "epoch": 0.07288489324129074, "grad_norm": 2.402344226837158, "learning_rate": 4.5400000000000006e-05, "loss": 1.2973, "step": 227 }, { "epoch": 0.07320597206614224, "grad_norm": 1.6383142471313477, "learning_rate": 4.5600000000000004e-05, "loss": 1.1735, "step": 228 }, { "epoch": 0.07352705089099373, "grad_norm": 1.9293252229690552, "learning_rate": 4.58e-05, "loss": 1.1113, "step": 229 }, { "epoch": 0.07384812971584524, "grad_norm": 2.625548839569092, "learning_rate": 4.600000000000001e-05, "loss": 1.1503, "step": 230 }, { "epoch": 0.07416920854069674, "grad_norm": 2.3177950382232666, "learning_rate": 4.6200000000000005e-05, "loss": 0.9863, "step": 231 }, { "epoch": 0.07449028736554825, "grad_norm": 3.1583642959594727, "learning_rate": 4.64e-05, "loss": 1.3059, "step": 232 }, { "epoch": 0.07481136619039974, "grad_norm": 1.7886139154434204, "learning_rate": 4.660000000000001e-05, "loss": 1.0193, "step": 233 }, { "epoch": 0.07513244501525125, "grad_norm": 3.0615925788879395, "learning_rate": 4.6800000000000006e-05, "loss": 1.4904, "step": 234 }, { "epoch": 0.07545352384010275, "grad_norm": 823.4281005859375, "learning_rate": 4.7e-05, "loss": 1.1034, "step": 235 }, { "epoch": 0.07577460266495424, "grad_norm": 3.0355687141418457, "learning_rate": 4.72e-05, "loss": 0.9429, "step": 236 }, { "epoch": 0.07609568148980575, "grad_norm": 556.7845458984375, "learning_rate": 4.74e-05, "loss": 1.1951, "step": 237 }, { "epoch": 0.07641676031465724, "grad_norm": 3.066845417022705, "learning_rate": 4.76e-05, "loss": 1.1543, "step": 238 }, { "epoch": 0.07673783913950875, "grad_norm": 2.495962142944336, "learning_rate": 4.78e-05, "loss": 1.1047, "step": 239 }, { "epoch": 0.07705891796436025, "grad_norm": 12.180785179138184, "learning_rate": 4.8e-05, "loss": 0.9588, "step": 240 }, { "epoch": 0.07737999678921176, "grad_norm": 3.484467029571533, "learning_rate": 4.82e-05, "loss": 1.1392, "step": 241 }, { "epoch": 0.07770107561406325, "grad_norm": 2.198939561843872, "learning_rate": 4.8400000000000004e-05, "loss": 1.3359, "step": 242 }, { "epoch": 0.07802215443891475, "grad_norm": 1.4292407035827637, "learning_rate": 4.86e-05, "loss": 0.9899, "step": 243 }, { "epoch": 0.07834323326376626, "grad_norm": 1.910438895225525, "learning_rate": 4.88e-05, "loss": 0.928, "step": 244 }, { "epoch": 0.07866431208861775, "grad_norm": 3.0100486278533936, "learning_rate": 4.9e-05, "loss": 1.4606, "step": 245 }, { "epoch": 0.07898539091346926, "grad_norm": 2.0493595600128174, "learning_rate": 4.92e-05, "loss": 1.0296, "step": 246 }, { "epoch": 0.07930646973832076, "grad_norm": 2.848602771759033, "learning_rate": 4.94e-05, "loss": 0.9189, "step": 247 }, { "epoch": 0.07962754856317227, "grad_norm": 3.749636650085449, "learning_rate": 4.96e-05, "loss": 0.9614, "step": 248 }, { "epoch": 0.07994862738802376, "grad_norm": 2.882443428039551, "learning_rate": 4.9800000000000004e-05, "loss": 0.872, "step": 249 }, { "epoch": 0.08026970621287526, "grad_norm": 1.6551190614700317, "learning_rate": 5e-05, "loss": 0.7895, "step": 250 }, { "epoch": 0.08059078503772676, "grad_norm": 2.3475422859191895, "learning_rate": 5.02e-05, "loss": 1.4449, "step": 251 }, { "epoch": 0.08091186386257826, "grad_norm": 2.298914670944214, "learning_rate": 5.0400000000000005e-05, "loss": 1.0738, "step": 252 }, { "epoch": 0.08123294268742977, "grad_norm": 2.8183484077453613, "learning_rate": 5.0600000000000003e-05, "loss": 1.0035, "step": 253 }, { "epoch": 0.08155402151228126, "grad_norm": 2.7477033138275146, "learning_rate": 5.08e-05, "loss": 1.0145, "step": 254 }, { "epoch": 0.08187510033713277, "grad_norm": 2.9522218704223633, "learning_rate": 5.1000000000000006e-05, "loss": 0.962, "step": 255 }, { "epoch": 0.08219617916198427, "grad_norm": 2.496779203414917, "learning_rate": 5.1200000000000004e-05, "loss": 0.899, "step": 256 }, { "epoch": 0.08251725798683576, "grad_norm": 3.338456392288208, "learning_rate": 5.14e-05, "loss": 1.0442, "step": 257 }, { "epoch": 0.08283833681168727, "grad_norm": 2.447253465652466, "learning_rate": 5.16e-05, "loss": 1.0634, "step": 258 }, { "epoch": 0.08315941563653877, "grad_norm": 2.5269699096679688, "learning_rate": 5.1800000000000005e-05, "loss": 1.216, "step": 259 }, { "epoch": 0.08348049446139028, "grad_norm": 1.807884693145752, "learning_rate": 5.2000000000000004e-05, "loss": 1.051, "step": 260 }, { "epoch": 0.08380157328624177, "grad_norm": 3.292743444442749, "learning_rate": 5.22e-05, "loss": 1.0427, "step": 261 }, { "epoch": 0.08412265211109328, "grad_norm": 1.926658034324646, "learning_rate": 5.2400000000000007e-05, "loss": 1.1237, "step": 262 }, { "epoch": 0.08444373093594477, "grad_norm": 2.5301294326782227, "learning_rate": 5.2600000000000005e-05, "loss": 1.1137, "step": 263 }, { "epoch": 0.08476480976079627, "grad_norm": 2.3400182723999023, "learning_rate": 5.28e-05, "loss": 0.9848, "step": 264 }, { "epoch": 0.08508588858564778, "grad_norm": 1.7750979661941528, "learning_rate": 5.300000000000001e-05, "loss": 1.1295, "step": 265 }, { "epoch": 0.08540696741049927, "grad_norm": 2.335904598236084, "learning_rate": 5.3200000000000006e-05, "loss": 1.2678, "step": 266 }, { "epoch": 0.08572804623535078, "grad_norm": 1.9911553859710693, "learning_rate": 5.3400000000000004e-05, "loss": 1.1416, "step": 267 }, { "epoch": 0.08604912506020228, "grad_norm": 1.7395027875900269, "learning_rate": 5.360000000000001e-05, "loss": 1.1398, "step": 268 }, { "epoch": 0.08637020388505379, "grad_norm": 1.7891736030578613, "learning_rate": 5.380000000000001e-05, "loss": 1.1321, "step": 269 }, { "epoch": 0.08669128270990528, "grad_norm": 1.8368785381317139, "learning_rate": 5.4000000000000005e-05, "loss": 1.0112, "step": 270 }, { "epoch": 0.08701236153475678, "grad_norm": 1.638079047203064, "learning_rate": 5.420000000000001e-05, "loss": 1.1376, "step": 271 }, { "epoch": 0.08733344035960829, "grad_norm": 1.9354631900787354, "learning_rate": 5.440000000000001e-05, "loss": 1.1693, "step": 272 }, { "epoch": 0.08765451918445978, "grad_norm": 2.017259359359741, "learning_rate": 5.4600000000000006e-05, "loss": 1.2163, "step": 273 }, { "epoch": 0.08797559800931129, "grad_norm": 1.6436361074447632, "learning_rate": 5.4800000000000004e-05, "loss": 0.9737, "step": 274 }, { "epoch": 0.08829667683416279, "grad_norm": 1.7359622716903687, "learning_rate": 5.500000000000001e-05, "loss": 1.2599, "step": 275 }, { "epoch": 0.0886177556590143, "grad_norm": 1.698771595954895, "learning_rate": 5.520000000000001e-05, "loss": 1.0724, "step": 276 }, { "epoch": 0.08893883448386579, "grad_norm": 3.8529412746429443, "learning_rate": 5.5400000000000005e-05, "loss": 1.4372, "step": 277 }, { "epoch": 0.08925991330871728, "grad_norm": 1.826474905014038, "learning_rate": 5.560000000000001e-05, "loss": 1.4767, "step": 278 }, { "epoch": 0.0895809921335688, "grad_norm": 1.7558326721191406, "learning_rate": 5.580000000000001e-05, "loss": 1.1549, "step": 279 }, { "epoch": 0.08990207095842029, "grad_norm": 2.5463342666625977, "learning_rate": 5.6000000000000006e-05, "loss": 0.9819, "step": 280 }, { "epoch": 0.0902231497832718, "grad_norm": 1.6322652101516724, "learning_rate": 5.620000000000001e-05, "loss": 1.2071, "step": 281 }, { "epoch": 0.09054422860812329, "grad_norm": 1.6438385248184204, "learning_rate": 5.6399999999999995e-05, "loss": 0.991, "step": 282 }, { "epoch": 0.0908653074329748, "grad_norm": 1.8053836822509766, "learning_rate": 5.66e-05, "loss": 1.1238, "step": 283 }, { "epoch": 0.0911863862578263, "grad_norm": 2.149841070175171, "learning_rate": 5.68e-05, "loss": 1.1431, "step": 284 }, { "epoch": 0.09150746508267779, "grad_norm": 1.8680065870285034, "learning_rate": 5.6999999999999996e-05, "loss": 1.0299, "step": 285 }, { "epoch": 0.0918285439075293, "grad_norm": 1.402052879333496, "learning_rate": 5.72e-05, "loss": 1.0877, "step": 286 }, { "epoch": 0.0921496227323808, "grad_norm": 1.4989826679229736, "learning_rate": 5.74e-05, "loss": 0.9015, "step": 287 }, { "epoch": 0.0924707015572323, "grad_norm": 1.2958741188049316, "learning_rate": 5.76e-05, "loss": 1.0378, "step": 288 }, { "epoch": 0.0927917803820838, "grad_norm": 2.0742673873901367, "learning_rate": 5.7799999999999995e-05, "loss": 1.0337, "step": 289 }, { "epoch": 0.09311285920693531, "grad_norm": 1.4535075426101685, "learning_rate": 5.8e-05, "loss": 1.0506, "step": 290 }, { "epoch": 0.0934339380317868, "grad_norm": 1.7830564975738525, "learning_rate": 5.82e-05, "loss": 1.0116, "step": 291 }, { "epoch": 0.0937550168566383, "grad_norm": 1.6709142923355103, "learning_rate": 5.8399999999999997e-05, "loss": 0.7538, "step": 292 }, { "epoch": 0.09407609568148981, "grad_norm": 2.7168803215026855, "learning_rate": 5.86e-05, "loss": 1.1456, "step": 293 }, { "epoch": 0.0943971745063413, "grad_norm": 1.8454351425170898, "learning_rate": 5.88e-05, "loss": 0.7657, "step": 294 }, { "epoch": 0.09471825333119281, "grad_norm": 2.2031004428863525, "learning_rate": 5.9e-05, "loss": 0.8091, "step": 295 }, { "epoch": 0.0950393321560443, "grad_norm": 1.5919681787490845, "learning_rate": 5.92e-05, "loss": 0.9871, "step": 296 }, { "epoch": 0.09536041098089582, "grad_norm": 2.7647721767425537, "learning_rate": 5.94e-05, "loss": 0.9902, "step": 297 }, { "epoch": 0.09568148980574731, "grad_norm": 2.6411798000335693, "learning_rate": 5.96e-05, "loss": 0.9511, "step": 298 }, { "epoch": 0.0960025686305988, "grad_norm": 2.426961898803711, "learning_rate": 5.9800000000000003e-05, "loss": 1.06, "step": 299 }, { "epoch": 0.09632364745545031, "grad_norm": 2.5059926509857178, "learning_rate": 6e-05, "loss": 0.7106, "step": 300 }, { "epoch": 0.09664472628030181, "grad_norm": 3.1037328243255615, "learning_rate": 6.02e-05, "loss": 1.5007, "step": 301 }, { "epoch": 0.09696580510515332, "grad_norm": 2.7043604850769043, "learning_rate": 6.04e-05, "loss": 1.5066, "step": 302 }, { "epoch": 0.09728688393000481, "grad_norm": 3.1140081882476807, "learning_rate": 6.06e-05, "loss": 1.052, "step": 303 }, { "epoch": 0.09760796275485632, "grad_norm": 3.623964309692383, "learning_rate": 6.08e-05, "loss": 0.946, "step": 304 }, { "epoch": 0.09792904157970782, "grad_norm": 3.414036750793457, "learning_rate": 6.1e-05, "loss": 0.9647, "step": 305 }, { "epoch": 0.09825012040455931, "grad_norm": 3.4817802906036377, "learning_rate": 6.12e-05, "loss": 0.9753, "step": 306 }, { "epoch": 0.09857119922941082, "grad_norm": 3.067047119140625, "learning_rate": 6.14e-05, "loss": 1.1435, "step": 307 }, { "epoch": 0.09889227805426232, "grad_norm": 2.2207508087158203, "learning_rate": 6.16e-05, "loss": 0.998, "step": 308 }, { "epoch": 0.09921335687911383, "grad_norm": 2.4805872440338135, "learning_rate": 6.18e-05, "loss": 0.9144, "step": 309 }, { "epoch": 0.09953443570396532, "grad_norm": 1.8067208528518677, "learning_rate": 6.2e-05, "loss": 1.1963, "step": 310 }, { "epoch": 0.09985551452881683, "grad_norm": 1.950782299041748, "learning_rate": 6.220000000000001e-05, "loss": 1.1947, "step": 311 }, { "epoch": 0.10017659335366833, "grad_norm": 1.88652765750885, "learning_rate": 6.24e-05, "loss": 0.9983, "step": 312 }, { "epoch": 0.10049767217851982, "grad_norm": 1.9043608903884888, "learning_rate": 6.26e-05, "loss": 0.8638, "step": 313 }, { "epoch": 0.10081875100337133, "grad_norm": 1.84428071975708, "learning_rate": 6.280000000000001e-05, "loss": 1.0008, "step": 314 }, { "epoch": 0.10113982982822282, "grad_norm": 1.763142466545105, "learning_rate": 6.3e-05, "loss": 1.3726, "step": 315 }, { "epoch": 0.10146090865307433, "grad_norm": 1.6071946620941162, "learning_rate": 6.32e-05, "loss": 1.0704, "step": 316 }, { "epoch": 0.10178198747792583, "grad_norm": 2.2262184619903564, "learning_rate": 6.340000000000001e-05, "loss": 1.0787, "step": 317 }, { "epoch": 0.10210306630277734, "grad_norm": 2.342939615249634, "learning_rate": 6.36e-05, "loss": 1.096, "step": 318 }, { "epoch": 0.10242414512762883, "grad_norm": 2.372135639190674, "learning_rate": 6.38e-05, "loss": 1.1364, "step": 319 }, { "epoch": 0.10274522395248033, "grad_norm": 1.7545585632324219, "learning_rate": 6.400000000000001e-05, "loss": 0.9141, "step": 320 }, { "epoch": 0.10306630277733184, "grad_norm": 2.9137213230133057, "learning_rate": 6.42e-05, "loss": 0.9489, "step": 321 }, { "epoch": 0.10338738160218333, "grad_norm": 2.827456474304199, "learning_rate": 6.440000000000001e-05, "loss": 1.1006, "step": 322 }, { "epoch": 0.10370846042703484, "grad_norm": 3.5646371841430664, "learning_rate": 6.460000000000001e-05, "loss": 1.0534, "step": 323 }, { "epoch": 0.10402953925188634, "grad_norm": 1.827415943145752, "learning_rate": 6.48e-05, "loss": 1.0138, "step": 324 }, { "epoch": 0.10435061807673784, "grad_norm": 2.491384267807007, "learning_rate": 6.500000000000001e-05, "loss": 1.4315, "step": 325 }, { "epoch": 0.10467169690158934, "grad_norm": 1.7412528991699219, "learning_rate": 6.52e-05, "loss": 0.9961, "step": 326 }, { "epoch": 0.10499277572644083, "grad_norm": 2.2214128971099854, "learning_rate": 6.54e-05, "loss": 1.2276, "step": 327 }, { "epoch": 0.10531385455129234, "grad_norm": 1.9566681385040283, "learning_rate": 6.560000000000001e-05, "loss": 1.3574, "step": 328 }, { "epoch": 0.10563493337614384, "grad_norm": 1.5677928924560547, "learning_rate": 6.58e-05, "loss": 1.1721, "step": 329 }, { "epoch": 0.10595601220099535, "grad_norm": 2.4960641860961914, "learning_rate": 6.6e-05, "loss": 1.4447, "step": 330 }, { "epoch": 0.10627709102584684, "grad_norm": 2.4556682109832764, "learning_rate": 6.620000000000001e-05, "loss": 1.0275, "step": 331 }, { "epoch": 0.10659816985069835, "grad_norm": 2.3573825359344482, "learning_rate": 6.64e-05, "loss": 1.2238, "step": 332 }, { "epoch": 0.10691924867554985, "grad_norm": 1.6296504735946655, "learning_rate": 6.66e-05, "loss": 1.0889, "step": 333 }, { "epoch": 0.10724032750040134, "grad_norm": 1.5856961011886597, "learning_rate": 6.680000000000001e-05, "loss": 1.1944, "step": 334 }, { "epoch": 0.10756140632525285, "grad_norm": 1.3460253477096558, "learning_rate": 6.7e-05, "loss": 1.2147, "step": 335 }, { "epoch": 0.10788248515010435, "grad_norm": 2.425680637359619, "learning_rate": 6.720000000000001e-05, "loss": 1.0764, "step": 336 }, { "epoch": 0.10820356397495585, "grad_norm": 1.4525935649871826, "learning_rate": 6.740000000000001e-05, "loss": 1.1013, "step": 337 }, { "epoch": 0.10852464279980735, "grad_norm": 2.8759169578552246, "learning_rate": 6.76e-05, "loss": 1.1745, "step": 338 }, { "epoch": 0.10884572162465886, "grad_norm": 2.58520770072937, "learning_rate": 6.780000000000001e-05, "loss": 0.8345, "step": 339 }, { "epoch": 0.10916680044951035, "grad_norm": 1.522647738456726, "learning_rate": 6.800000000000001e-05, "loss": 0.9961, "step": 340 }, { "epoch": 0.10948787927436185, "grad_norm": 2.1563315391540527, "learning_rate": 6.82e-05, "loss": 1.1935, "step": 341 }, { "epoch": 0.10980895809921336, "grad_norm": 2.3725993633270264, "learning_rate": 6.840000000000001e-05, "loss": 1.193, "step": 342 }, { "epoch": 0.11013003692406485, "grad_norm": 1.3734397888183594, "learning_rate": 6.860000000000001e-05, "loss": 1.0738, "step": 343 }, { "epoch": 0.11045111574891636, "grad_norm": 2.5987794399261475, "learning_rate": 6.879999999999999e-05, "loss": 1.3577, "step": 344 }, { "epoch": 0.11077219457376786, "grad_norm": 2.429164171218872, "learning_rate": 6.9e-05, "loss": 1.1118, "step": 345 }, { "epoch": 0.11109327339861937, "grad_norm": 2.4097049236297607, "learning_rate": 6.92e-05, "loss": 1.0425, "step": 346 }, { "epoch": 0.11141435222347086, "grad_norm": 2.2892768383026123, "learning_rate": 6.939999999999999e-05, "loss": 0.914, "step": 347 }, { "epoch": 0.11173543104832237, "grad_norm": 2.1729300022125244, "learning_rate": 6.96e-05, "loss": 1.0326, "step": 348 }, { "epoch": 0.11205650987317387, "grad_norm": 3.1933183670043945, "learning_rate": 6.98e-05, "loss": 0.7324, "step": 349 }, { "epoch": 0.11237758869802536, "grad_norm": 2.5278120040893555, "learning_rate": 7e-05, "loss": 0.5691, "step": 350 }, { "epoch": 0.11269866752287687, "grad_norm": 2.725175619125366, "learning_rate": 7.02e-05, "loss": 1.5229, "step": 351 }, { "epoch": 0.11301974634772836, "grad_norm": 2.0373573303222656, "learning_rate": 7.04e-05, "loss": 1.4782, "step": 352 }, { "epoch": 0.11334082517257987, "grad_norm": 2.7589669227600098, "learning_rate": 7.06e-05, "loss": 1.0219, "step": 353 }, { "epoch": 0.11366190399743137, "grad_norm": 3.1588339805603027, "learning_rate": 7.08e-05, "loss": 1.0494, "step": 354 }, { "epoch": 0.11398298282228288, "grad_norm": 2.983210563659668, "learning_rate": 7.1e-05, "loss": 0.9478, "step": 355 }, { "epoch": 0.11430406164713437, "grad_norm": 2.4293222427368164, "learning_rate": 7.12e-05, "loss": 0.9519, "step": 356 }, { "epoch": 0.11462514047198587, "grad_norm": 3.7218642234802246, "learning_rate": 7.14e-05, "loss": 0.8504, "step": 357 }, { "epoch": 0.11494621929683738, "grad_norm": 2.7284634113311768, "learning_rate": 7.16e-05, "loss": 0.9837, "step": 358 }, { "epoch": 0.11526729812168887, "grad_norm": 2.264646291732788, "learning_rate": 7.18e-05, "loss": 0.8728, "step": 359 }, { "epoch": 0.11558837694654038, "grad_norm": 3.814307451248169, "learning_rate": 7.2e-05, "loss": 1.1014, "step": 360 }, { "epoch": 0.11590945577139188, "grad_norm": 2.3103249073028564, "learning_rate": 7.22e-05, "loss": 1.0027, "step": 361 }, { "epoch": 0.11623053459624338, "grad_norm": 2.059762716293335, "learning_rate": 7.24e-05, "loss": 1.0825, "step": 362 }, { "epoch": 0.11655161342109488, "grad_norm": 1.9121147394180298, "learning_rate": 7.26e-05, "loss": 1.0882, "step": 363 }, { "epoch": 0.11687269224594637, "grad_norm": 1.7068138122558594, "learning_rate": 7.280000000000001e-05, "loss": 1.0976, "step": 364 }, { "epoch": 0.11719377107079788, "grad_norm": 4.309755802154541, "learning_rate": 7.3e-05, "loss": 1.1305, "step": 365 }, { "epoch": 0.11751484989564938, "grad_norm": 1.5519227981567383, "learning_rate": 7.32e-05, "loss": 0.9828, "step": 366 }, { "epoch": 0.11783592872050089, "grad_norm": 2.859467029571533, "learning_rate": 7.340000000000001e-05, "loss": 1.0766, "step": 367 }, { "epoch": 0.11815700754535238, "grad_norm": 2.033249855041504, "learning_rate": 7.36e-05, "loss": 1.0528, "step": 368 }, { "epoch": 0.11847808637020389, "grad_norm": 3.5415592193603516, "learning_rate": 7.38e-05, "loss": 1.1399, "step": 369 }, { "epoch": 0.11879916519505539, "grad_norm": 2.895137071609497, "learning_rate": 7.4e-05, "loss": 1.1532, "step": 370 }, { "epoch": 0.11912024401990688, "grad_norm": 1.6931350231170654, "learning_rate": 7.42e-05, "loss": 1.1186, "step": 371 }, { "epoch": 0.11944132284475839, "grad_norm": 1.9246593713760376, "learning_rate": 7.44e-05, "loss": 1.0281, "step": 372 }, { "epoch": 0.11976240166960989, "grad_norm": 1.789298176765442, "learning_rate": 7.46e-05, "loss": 1.0957, "step": 373 }, { "epoch": 0.1200834804944614, "grad_norm": 2.8938393592834473, "learning_rate": 7.48e-05, "loss": 0.9665, "step": 374 }, { "epoch": 0.12040455931931289, "grad_norm": 2.1179943084716797, "learning_rate": 7.500000000000001e-05, "loss": 1.1447, "step": 375 }, { "epoch": 0.1207256381441644, "grad_norm": 1.7801289558410645, "learning_rate": 7.52e-05, "loss": 1.2483, "step": 376 }, { "epoch": 0.1210467169690159, "grad_norm": 1.6875920295715332, "learning_rate": 7.54e-05, "loss": 1.009, "step": 377 }, { "epoch": 0.12136779579386739, "grad_norm": 3.599494695663452, "learning_rate": 7.560000000000001e-05, "loss": 1.2534, "step": 378 }, { "epoch": 0.1216888746187189, "grad_norm": 1.4294577836990356, "learning_rate": 7.58e-05, "loss": 1.1328, "step": 379 }, { "epoch": 0.1220099534435704, "grad_norm": 1.809747576713562, "learning_rate": 7.6e-05, "loss": 1.0365, "step": 380 }, { "epoch": 0.1223310322684219, "grad_norm": 1.7651673555374146, "learning_rate": 7.620000000000001e-05, "loss": 1.0842, "step": 381 }, { "epoch": 0.1226521110932734, "grad_norm": 2.022040843963623, "learning_rate": 7.64e-05, "loss": 1.1518, "step": 382 }, { "epoch": 0.1229731899181249, "grad_norm": 1.3994489908218384, "learning_rate": 7.66e-05, "loss": 0.9633, "step": 383 }, { "epoch": 0.1232942687429764, "grad_norm": 1.4892637729644775, "learning_rate": 7.680000000000001e-05, "loss": 1.1681, "step": 384 }, { "epoch": 0.1236153475678279, "grad_norm": 2.849607467651367, "learning_rate": 7.7e-05, "loss": 1.2844, "step": 385 }, { "epoch": 0.1239364263926794, "grad_norm": 3.940389394760132, "learning_rate": 7.72e-05, "loss": 1.1795, "step": 386 }, { "epoch": 0.1242575052175309, "grad_norm": 1.6070865392684937, "learning_rate": 7.740000000000001e-05, "loss": 1.1437, "step": 387 }, { "epoch": 0.12457858404238241, "grad_norm": 1.3367242813110352, "learning_rate": 7.76e-05, "loss": 0.9778, "step": 388 }, { "epoch": 0.1248996628672339, "grad_norm": 1.9852445125579834, "learning_rate": 7.780000000000001e-05, "loss": 1.0968, "step": 389 }, { "epoch": 0.1252207416920854, "grad_norm": 2.6707146167755127, "learning_rate": 7.800000000000001e-05, "loss": 1.2846, "step": 390 }, { "epoch": 0.1255418205169369, "grad_norm": 2.2092552185058594, "learning_rate": 7.82e-05, "loss": 1.0746, "step": 391 }, { "epoch": 0.1258628993417884, "grad_norm": 2.5755155086517334, "learning_rate": 7.840000000000001e-05, "loss": 0.9431, "step": 392 }, { "epoch": 0.1261839781666399, "grad_norm": 1.506671667098999, "learning_rate": 7.860000000000001e-05, "loss": 1.0914, "step": 393 }, { "epoch": 0.12650505699149142, "grad_norm": 1.7302513122558594, "learning_rate": 7.88e-05, "loss": 0.9212, "step": 394 }, { "epoch": 0.12682613581634292, "grad_norm": 3.166785478591919, "learning_rate": 7.900000000000001e-05, "loss": 1.1882, "step": 395 }, { "epoch": 0.1271472146411944, "grad_norm": 2.0447499752044678, "learning_rate": 7.920000000000001e-05, "loss": 0.9331, "step": 396 }, { "epoch": 0.1274682934660459, "grad_norm": 2.1262240409851074, "learning_rate": 7.94e-05, "loss": 0.9565, "step": 397 }, { "epoch": 0.1277893722908974, "grad_norm": 3.0543501377105713, "learning_rate": 7.960000000000001e-05, "loss": 0.9219, "step": 398 }, { "epoch": 0.12811045111574892, "grad_norm": 4.1444597244262695, "learning_rate": 7.98e-05, "loss": 0.8325, "step": 399 }, { "epoch": 0.12843152994060042, "grad_norm": 1.8101000785827637, "learning_rate": 8e-05, "loss": 0.7651, "step": 400 }, { "epoch": 0.12875260876545191, "grad_norm": 4.419740676879883, "learning_rate": 8.020000000000001e-05, "loss": 1.6615, "step": 401 }, { "epoch": 0.1290736875903034, "grad_norm": 2.6006617546081543, "learning_rate": 8.04e-05, "loss": 1.6427, "step": 402 }, { "epoch": 0.12939476641515493, "grad_norm": 3.2348382472991943, "learning_rate": 8.060000000000001e-05, "loss": 1.0184, "step": 403 }, { "epoch": 0.12971584524000643, "grad_norm": 2.8556830883026123, "learning_rate": 8.080000000000001e-05, "loss": 1.0283, "step": 404 }, { "epoch": 0.13003692406485792, "grad_norm": 3.4048428535461426, "learning_rate": 8.1e-05, "loss": 1.0695, "step": 405 }, { "epoch": 0.13035800288970942, "grad_norm": 2.2543652057647705, "learning_rate": 8.120000000000001e-05, "loss": 0.9112, "step": 406 }, { "epoch": 0.1306790817145609, "grad_norm": 4.042235851287842, "learning_rate": 8.14e-05, "loss": 0.9898, "step": 407 }, { "epoch": 0.13100016053941244, "grad_norm": 2.8973443508148193, "learning_rate": 8.16e-05, "loss": 0.8473, "step": 408 }, { "epoch": 0.13132123936426393, "grad_norm": 3.279799461364746, "learning_rate": 8.18e-05, "loss": 1.0893, "step": 409 }, { "epoch": 0.13164231818911543, "grad_norm": 3.183879852294922, "learning_rate": 8.2e-05, "loss": 1.0263, "step": 410 }, { "epoch": 0.13196339701396692, "grad_norm": 2.066544532775879, "learning_rate": 8.22e-05, "loss": 1.021, "step": 411 }, { "epoch": 0.13228447583881842, "grad_norm": 1.7702231407165527, "learning_rate": 8.24e-05, "loss": 1.1344, "step": 412 }, { "epoch": 0.13260555466366994, "grad_norm": 2.262660026550293, "learning_rate": 8.26e-05, "loss": 1.024, "step": 413 }, { "epoch": 0.13292663348852143, "grad_norm": 1.833029866218567, "learning_rate": 8.28e-05, "loss": 1.2207, "step": 414 }, { "epoch": 0.13324771231337293, "grad_norm": 2.144501209259033, "learning_rate": 8.3e-05, "loss": 1.1325, "step": 415 }, { "epoch": 0.13356879113822442, "grad_norm": 3.092042922973633, "learning_rate": 8.32e-05, "loss": 1.3365, "step": 416 }, { "epoch": 0.13388986996307595, "grad_norm": 2.136219024658203, "learning_rate": 8.34e-05, "loss": 1.0163, "step": 417 }, { "epoch": 0.13421094878792744, "grad_norm": 1.7004696130752563, "learning_rate": 8.36e-05, "loss": 0.9517, "step": 418 }, { "epoch": 0.13453202761277894, "grad_norm": 2.4267795085906982, "learning_rate": 8.38e-05, "loss": 1.0013, "step": 419 }, { "epoch": 0.13485310643763043, "grad_norm": 2.0210540294647217, "learning_rate": 8.4e-05, "loss": 1.1871, "step": 420 }, { "epoch": 0.13517418526248193, "grad_norm": 2.205508232116699, "learning_rate": 8.42e-05, "loss": 1.0592, "step": 421 }, { "epoch": 0.13549526408733345, "grad_norm": 2.3667571544647217, "learning_rate": 8.44e-05, "loss": 0.9743, "step": 422 }, { "epoch": 0.13581634291218495, "grad_norm": 2.3195512294769287, "learning_rate": 8.46e-05, "loss": 1.1209, "step": 423 }, { "epoch": 0.13613742173703644, "grad_norm": 2.4299728870391846, "learning_rate": 8.48e-05, "loss": 1.1707, "step": 424 }, { "epoch": 0.13645850056188794, "grad_norm": 2.341249465942383, "learning_rate": 8.5e-05, "loss": 1.0963, "step": 425 }, { "epoch": 0.13677957938673946, "grad_norm": 2.3623969554901123, "learning_rate": 8.52e-05, "loss": 1.2913, "step": 426 }, { "epoch": 0.13710065821159095, "grad_norm": 3.114179849624634, "learning_rate": 8.54e-05, "loss": 1.2193, "step": 427 }, { "epoch": 0.13742173703644245, "grad_norm": 2.372199058532715, "learning_rate": 8.560000000000001e-05, "loss": 1.1863, "step": 428 }, { "epoch": 0.13774281586129394, "grad_norm": 2.142880916595459, "learning_rate": 8.58e-05, "loss": 1.0837, "step": 429 }, { "epoch": 0.13806389468614544, "grad_norm": 2.024472951889038, "learning_rate": 8.6e-05, "loss": 1.036, "step": 430 }, { "epoch": 0.13838497351099696, "grad_norm": 2.7535400390625, "learning_rate": 8.620000000000001e-05, "loss": 0.9773, "step": 431 }, { "epoch": 0.13870605233584846, "grad_norm": 2.199688196182251, "learning_rate": 8.64e-05, "loss": 1.0129, "step": 432 }, { "epoch": 0.13902713116069995, "grad_norm": 2.8626081943511963, "learning_rate": 8.66e-05, "loss": 1.0404, "step": 433 }, { "epoch": 0.13934820998555145, "grad_norm": 2.4395930767059326, "learning_rate": 8.680000000000001e-05, "loss": 0.9112, "step": 434 }, { "epoch": 0.13966928881040294, "grad_norm": 2.2529709339141846, "learning_rate": 8.7e-05, "loss": 1.0701, "step": 435 }, { "epoch": 0.13999036763525446, "grad_norm": 2.0451371669769287, "learning_rate": 8.72e-05, "loss": 1.2465, "step": 436 }, { "epoch": 0.14031144646010596, "grad_norm": 1.9797896146774292, "learning_rate": 8.740000000000001e-05, "loss": 0.9705, "step": 437 }, { "epoch": 0.14063252528495745, "grad_norm": 2.4405622482299805, "learning_rate": 8.76e-05, "loss": 1.197, "step": 438 }, { "epoch": 0.14095360410980895, "grad_norm": 1.761278748512268, "learning_rate": 8.78e-05, "loss": 1.0646, "step": 439 }, { "epoch": 0.14127468293466047, "grad_norm": 1.8971152305603027, "learning_rate": 8.800000000000001e-05, "loss": 0.9596, "step": 440 }, { "epoch": 0.14159576175951197, "grad_norm": 2.072056531906128, "learning_rate": 8.82e-05, "loss": 0.9762, "step": 441 }, { "epoch": 0.14191684058436346, "grad_norm": 2.229867935180664, "learning_rate": 8.840000000000001e-05, "loss": 1.1814, "step": 442 }, { "epoch": 0.14223791940921496, "grad_norm": 1.3738412857055664, "learning_rate": 8.86e-05, "loss": 0.8249, "step": 443 }, { "epoch": 0.14255899823406645, "grad_norm": 1.5406107902526855, "learning_rate": 8.88e-05, "loss": 0.894, "step": 444 }, { "epoch": 0.14288007705891798, "grad_norm": 4.295513153076172, "learning_rate": 8.900000000000001e-05, "loss": 0.9362, "step": 445 }, { "epoch": 0.14320115588376947, "grad_norm": 2.7712790966033936, "learning_rate": 8.92e-05, "loss": 0.8543, "step": 446 }, { "epoch": 0.14352223470862097, "grad_norm": 2.536055088043213, "learning_rate": 8.94e-05, "loss": 0.9281, "step": 447 }, { "epoch": 0.14384331353347246, "grad_norm": 3.111720323562622, "learning_rate": 8.960000000000001e-05, "loss": 0.7778, "step": 448 }, { "epoch": 0.14416439235832396, "grad_norm": 2.919748306274414, "learning_rate": 8.98e-05, "loss": 0.8721, "step": 449 }, { "epoch": 0.14448547118317548, "grad_norm": 2.425325870513916, "learning_rate": 9e-05, "loss": 0.7305, "step": 450 }, { "epoch": 0.14480655000802697, "grad_norm": 2.294600009918213, "learning_rate": 9.020000000000001e-05, "loss": 1.6339, "step": 451 }, { "epoch": 0.14512762883287847, "grad_norm": 2.2173311710357666, "learning_rate": 9.04e-05, "loss": 1.4077, "step": 452 }, { "epoch": 0.14544870765772996, "grad_norm": 2.2760026454925537, "learning_rate": 9.06e-05, "loss": 1.0451, "step": 453 }, { "epoch": 0.1457697864825815, "grad_norm": 2.850890636444092, "learning_rate": 9.080000000000001e-05, "loss": 0.9108, "step": 454 }, { "epoch": 0.14609086530743298, "grad_norm": 2.1252541542053223, "learning_rate": 9.1e-05, "loss": 0.9681, "step": 455 }, { "epoch": 0.14641194413228448, "grad_norm": 2.2665953636169434, "learning_rate": 9.120000000000001e-05, "loss": 0.9184, "step": 456 }, { "epoch": 0.14673302295713597, "grad_norm": 3.0463030338287354, "learning_rate": 9.140000000000001e-05, "loss": 0.8551, "step": 457 }, { "epoch": 0.14705410178198747, "grad_norm": 4.869279384613037, "learning_rate": 9.16e-05, "loss": 1.1517, "step": 458 }, { "epoch": 0.147375180606839, "grad_norm": 2.495591878890991, "learning_rate": 9.180000000000001e-05, "loss": 1.264, "step": 459 }, { "epoch": 0.14769625943169049, "grad_norm": 1.5504367351531982, "learning_rate": 9.200000000000001e-05, "loss": 1.1832, "step": 460 }, { "epoch": 0.14801733825654198, "grad_norm": 1.7231016159057617, "learning_rate": 9.22e-05, "loss": 1.0143, "step": 461 }, { "epoch": 0.14833841708139348, "grad_norm": 2.187086343765259, "learning_rate": 9.240000000000001e-05, "loss": 1.2475, "step": 462 }, { "epoch": 0.14865949590624497, "grad_norm": 2.143453359603882, "learning_rate": 9.260000000000001e-05, "loss": 1.1353, "step": 463 }, { "epoch": 0.1489805747310965, "grad_norm": 2.101834774017334, "learning_rate": 9.28e-05, "loss": 1.2927, "step": 464 }, { "epoch": 0.149301653555948, "grad_norm": 2.4847493171691895, "learning_rate": 9.300000000000001e-05, "loss": 1.2384, "step": 465 }, { "epoch": 0.14962273238079948, "grad_norm": 2.28727650642395, "learning_rate": 9.320000000000002e-05, "loss": 1.0456, "step": 466 }, { "epoch": 0.14994381120565098, "grad_norm": 1.9797146320343018, "learning_rate": 9.340000000000001e-05, "loss": 1.1047, "step": 467 }, { "epoch": 0.1502648900305025, "grad_norm": 2.9211812019348145, "learning_rate": 9.360000000000001e-05, "loss": 1.0695, "step": 468 }, { "epoch": 0.150585968855354, "grad_norm": 1.7299671173095703, "learning_rate": 9.38e-05, "loss": 0.8681, "step": 469 }, { "epoch": 0.1509070476802055, "grad_norm": 2.1782050132751465, "learning_rate": 9.4e-05, "loss": 1.1974, "step": 470 }, { "epoch": 0.151228126505057, "grad_norm": 4.492698669433594, "learning_rate": 9.42e-05, "loss": 1.0819, "step": 471 }, { "epoch": 0.15154920532990848, "grad_norm": 2.4074699878692627, "learning_rate": 9.44e-05, "loss": 1.1869, "step": 472 }, { "epoch": 0.15187028415476, "grad_norm": 2.0078368186950684, "learning_rate": 9.46e-05, "loss": 1.0196, "step": 473 }, { "epoch": 0.1521913629796115, "grad_norm": 1.4234404563903809, "learning_rate": 9.48e-05, "loss": 1.1275, "step": 474 }, { "epoch": 0.152512441804463, "grad_norm": 1.73353111743927, "learning_rate": 9.5e-05, "loss": 1.103, "step": 475 }, { "epoch": 0.1528335206293145, "grad_norm": 2.3276383876800537, "learning_rate": 9.52e-05, "loss": 1.0127, "step": 476 }, { "epoch": 0.15315459945416598, "grad_norm": 2.4894511699676514, "learning_rate": 9.54e-05, "loss": 1.3882, "step": 477 }, { "epoch": 0.1534756782790175, "grad_norm": 2.496798038482666, "learning_rate": 9.56e-05, "loss": 0.9479, "step": 478 }, { "epoch": 0.153796757103869, "grad_norm": 1.701995611190796, "learning_rate": 9.58e-05, "loss": 1.0526, "step": 479 }, { "epoch": 0.1541178359287205, "grad_norm": 1.432371973991394, "learning_rate": 9.6e-05, "loss": 1.1382, "step": 480 }, { "epoch": 0.154438914753572, "grad_norm": 2.706610918045044, "learning_rate": 9.620000000000001e-05, "loss": 0.9633, "step": 481 }, { "epoch": 0.15475999357842352, "grad_norm": 2.06569504737854, "learning_rate": 9.64e-05, "loss": 0.9708, "step": 482 }, { "epoch": 0.155081072403275, "grad_norm": 2.1104109287261963, "learning_rate": 9.66e-05, "loss": 0.9835, "step": 483 }, { "epoch": 0.1554021512281265, "grad_norm": 1.5272530317306519, "learning_rate": 9.680000000000001e-05, "loss": 1.0398, "step": 484 }, { "epoch": 0.155723230052978, "grad_norm": 2.510329246520996, "learning_rate": 9.7e-05, "loss": 1.1728, "step": 485 }, { "epoch": 0.1560443088778295, "grad_norm": 1.6082532405853271, "learning_rate": 9.72e-05, "loss": 1.1114, "step": 486 }, { "epoch": 0.15636538770268102, "grad_norm": 1.8710626363754272, "learning_rate": 9.74e-05, "loss": 0.8267, "step": 487 }, { "epoch": 0.15668646652753251, "grad_norm": 1.3604836463928223, "learning_rate": 9.76e-05, "loss": 0.9311, "step": 488 }, { "epoch": 0.157007545352384, "grad_norm": 1.7233448028564453, "learning_rate": 9.78e-05, "loss": 1.0344, "step": 489 }, { "epoch": 0.1573286241772355, "grad_norm": 1.827020287513733, "learning_rate": 9.8e-05, "loss": 1.0538, "step": 490 }, { "epoch": 0.157649703002087, "grad_norm": 2.838731050491333, "learning_rate": 9.82e-05, "loss": 1.0937, "step": 491 }, { "epoch": 0.15797078182693852, "grad_norm": 2.2758519649505615, "learning_rate": 9.84e-05, "loss": 1.0409, "step": 492 }, { "epoch": 0.15829186065179002, "grad_norm": 3.4251341819763184, "learning_rate": 9.86e-05, "loss": 1.0816, "step": 493 }, { "epoch": 0.1586129394766415, "grad_norm": 2.235792398452759, "learning_rate": 9.88e-05, "loss": 0.9386, "step": 494 }, { "epoch": 0.158934018301493, "grad_norm": 1.5516377687454224, "learning_rate": 9.900000000000001e-05, "loss": 1.0178, "step": 495 }, { "epoch": 0.15925509712634453, "grad_norm": 2.0881407260894775, "learning_rate": 9.92e-05, "loss": 0.893, "step": 496 }, { "epoch": 0.15957617595119603, "grad_norm": 2.0493526458740234, "learning_rate": 9.94e-05, "loss": 0.9658, "step": 497 }, { "epoch": 0.15989725477604752, "grad_norm": 1.7837196588516235, "learning_rate": 9.960000000000001e-05, "loss": 0.7789, "step": 498 }, { "epoch": 0.16021833360089902, "grad_norm": 2.1364388465881348, "learning_rate": 9.98e-05, "loss": 0.586, "step": 499 }, { "epoch": 0.1605394124257505, "grad_norm": 2.1484198570251465, "learning_rate": 0.0001, "loss": 0.6172, "step": 500 }, { "epoch": 0.16086049125060203, "grad_norm": 6.418078899383545, "learning_rate": 0.00010020000000000001, "loss": 1.499, "step": 501 }, { "epoch": 0.16118157007545353, "grad_norm": 3.4180362224578857, "learning_rate": 0.0001004, "loss": 1.1029, "step": 502 }, { "epoch": 0.16150264890030502, "grad_norm": 3.427628517150879, "learning_rate": 0.0001006, "loss": 1.1054, "step": 503 }, { "epoch": 0.16182372772515652, "grad_norm": 3.6065824031829834, "learning_rate": 0.00010080000000000001, "loss": 0.9196, "step": 504 }, { "epoch": 0.162144806550008, "grad_norm": 2.572456121444702, "learning_rate": 0.000101, "loss": 0.9429, "step": 505 }, { "epoch": 0.16246588537485954, "grad_norm": 2.5031585693359375, "learning_rate": 0.00010120000000000001, "loss": 0.8701, "step": 506 }, { "epoch": 0.16278696419971103, "grad_norm": 2.1001455783843994, "learning_rate": 0.00010140000000000001, "loss": 0.992, "step": 507 }, { "epoch": 0.16310804302456253, "grad_norm": 2.580418109893799, "learning_rate": 0.0001016, "loss": 1.0474, "step": 508 }, { "epoch": 0.16342912184941402, "grad_norm": 2.187901496887207, "learning_rate": 0.00010180000000000001, "loss": 1.1151, "step": 509 }, { "epoch": 0.16375020067426554, "grad_norm": 1.5737202167510986, "learning_rate": 0.00010200000000000001, "loss": 1.0749, "step": 510 }, { "epoch": 0.16407127949911704, "grad_norm": 2.0835020542144775, "learning_rate": 0.0001022, "loss": 0.7957, "step": 511 }, { "epoch": 0.16439235832396853, "grad_norm": 2.3619582653045654, "learning_rate": 0.00010240000000000001, "loss": 0.8308, "step": 512 }, { "epoch": 0.16471343714882003, "grad_norm": 1.7399780750274658, "learning_rate": 0.00010260000000000001, "loss": 1.1296, "step": 513 }, { "epoch": 0.16503451597367153, "grad_norm": 2.110400676727295, "learning_rate": 0.0001028, "loss": 0.8289, "step": 514 }, { "epoch": 0.16535559479852305, "grad_norm": 1.960696816444397, "learning_rate": 0.00010300000000000001, "loss": 1.1581, "step": 515 }, { "epoch": 0.16567667362337454, "grad_norm": 1.2459540367126465, "learning_rate": 0.0001032, "loss": 1.0465, "step": 516 }, { "epoch": 0.16599775244822604, "grad_norm": 1.4018001556396484, "learning_rate": 0.0001034, "loss": 0.9106, "step": 517 }, { "epoch": 0.16631883127307753, "grad_norm": 1.3802064657211304, "learning_rate": 0.00010360000000000001, "loss": 1.0696, "step": 518 }, { "epoch": 0.16663991009792903, "grad_norm": 5.539737224578857, "learning_rate": 0.0001038, "loss": 1.0406, "step": 519 }, { "epoch": 0.16696098892278055, "grad_norm": 2.785435676574707, "learning_rate": 0.00010400000000000001, "loss": 0.9252, "step": 520 }, { "epoch": 0.16728206774763205, "grad_norm": 1.6449223756790161, "learning_rate": 0.00010420000000000001, "loss": 0.9388, "step": 521 }, { "epoch": 0.16760314657248354, "grad_norm": 1.6841291189193726, "learning_rate": 0.0001044, "loss": 0.9676, "step": 522 }, { "epoch": 0.16792422539733504, "grad_norm": 2.328427314758301, "learning_rate": 0.00010460000000000001, "loss": 0.9967, "step": 523 }, { "epoch": 0.16824530422218656, "grad_norm": 1.5015443563461304, "learning_rate": 0.00010480000000000001, "loss": 0.9851, "step": 524 }, { "epoch": 0.16856638304703805, "grad_norm": 1.587814450263977, "learning_rate": 0.000105, "loss": 0.9411, "step": 525 }, { "epoch": 0.16888746187188955, "grad_norm": 2.9774792194366455, "learning_rate": 0.00010520000000000001, "loss": 0.9465, "step": 526 }, { "epoch": 0.16920854069674104, "grad_norm": 2.033804416656494, "learning_rate": 0.00010540000000000001, "loss": 0.991, "step": 527 }, { "epoch": 0.16952961952159254, "grad_norm": 2.3683605194091797, "learning_rate": 0.0001056, "loss": 1.0893, "step": 528 }, { "epoch": 0.16985069834644406, "grad_norm": 2.22643780708313, "learning_rate": 0.00010580000000000001, "loss": 1.1218, "step": 529 }, { "epoch": 0.17017177717129556, "grad_norm": 2.789557933807373, "learning_rate": 0.00010600000000000002, "loss": 1.0919, "step": 530 }, { "epoch": 0.17049285599614705, "grad_norm": 1.379966378211975, "learning_rate": 0.0001062, "loss": 1.1469, "step": 531 }, { "epoch": 0.17081393482099855, "grad_norm": 1.87637197971344, "learning_rate": 0.00010640000000000001, "loss": 1.0153, "step": 532 }, { "epoch": 0.17113501364585004, "grad_norm": 1.728216528892517, "learning_rate": 0.00010660000000000002, "loss": 1.0824, "step": 533 }, { "epoch": 0.17145609247070157, "grad_norm": 1.858970046043396, "learning_rate": 0.00010680000000000001, "loss": 1.0331, "step": 534 }, { "epoch": 0.17177717129555306, "grad_norm": 2.390653133392334, "learning_rate": 0.00010700000000000001, "loss": 1.2246, "step": 535 }, { "epoch": 0.17209825012040456, "grad_norm": 1.582828402519226, "learning_rate": 0.00010720000000000002, "loss": 1.0276, "step": 536 }, { "epoch": 0.17241932894525605, "grad_norm": 1.6667520999908447, "learning_rate": 0.00010740000000000001, "loss": 0.899, "step": 537 }, { "epoch": 0.17274040777010757, "grad_norm": 2.3515865802764893, "learning_rate": 0.00010760000000000001, "loss": 0.9804, "step": 538 }, { "epoch": 0.17306148659495907, "grad_norm": 2.0802536010742188, "learning_rate": 0.00010780000000000002, "loss": 1.0002, "step": 539 }, { "epoch": 0.17338256541981056, "grad_norm": 1.2941781282424927, "learning_rate": 0.00010800000000000001, "loss": 1.1026, "step": 540 }, { "epoch": 0.17370364424466206, "grad_norm": 1.7126529216766357, "learning_rate": 0.00010820000000000001, "loss": 1.1084, "step": 541 }, { "epoch": 0.17402472306951355, "grad_norm": 1.5774749517440796, "learning_rate": 0.00010840000000000002, "loss": 0.99, "step": 542 }, { "epoch": 0.17434580189436508, "grad_norm": 1.576080322265625, "learning_rate": 0.00010860000000000001, "loss": 0.9603, "step": 543 }, { "epoch": 0.17466688071921657, "grad_norm": 1.4444918632507324, "learning_rate": 0.00010880000000000002, "loss": 1.0226, "step": 544 }, { "epoch": 0.17498795954406807, "grad_norm": 2.149320363998413, "learning_rate": 0.000109, "loss": 0.8809, "step": 545 }, { "epoch": 0.17530903836891956, "grad_norm": 2.1339616775512695, "learning_rate": 0.00010920000000000001, "loss": 0.9411, "step": 546 }, { "epoch": 0.17563011719377106, "grad_norm": 3.13826847076416, "learning_rate": 0.00010940000000000002, "loss": 0.9321, "step": 547 }, { "epoch": 0.17595119601862258, "grad_norm": 2.3245761394500732, "learning_rate": 0.00010960000000000001, "loss": 0.8781, "step": 548 }, { "epoch": 0.17627227484347407, "grad_norm": 1.5890634059906006, "learning_rate": 0.00010980000000000001, "loss": 0.8076, "step": 549 }, { "epoch": 0.17659335366832557, "grad_norm": 1.824544906616211, "learning_rate": 0.00011000000000000002, "loss": 0.5724, "step": 550 }, { "epoch": 0.17691443249317707, "grad_norm": 3.6526408195495605, "learning_rate": 0.00011020000000000001, "loss": 1.5191, "step": 551 }, { "epoch": 0.1772355113180286, "grad_norm": 3.4367077350616455, "learning_rate": 0.00011040000000000001, "loss": 1.2869, "step": 552 }, { "epoch": 0.17755659014288008, "grad_norm": 3.5046842098236084, "learning_rate": 0.00011060000000000002, "loss": 1.0015, "step": 553 }, { "epoch": 0.17787766896773158, "grad_norm": 3.2727510929107666, "learning_rate": 0.00011080000000000001, "loss": 0.9038, "step": 554 }, { "epoch": 0.17819874779258307, "grad_norm": 2.613293409347534, "learning_rate": 0.00011100000000000001, "loss": 0.9512, "step": 555 }, { "epoch": 0.17851982661743457, "grad_norm": 4.011002540588379, "learning_rate": 0.00011120000000000002, "loss": 0.8484, "step": 556 }, { "epoch": 0.1788409054422861, "grad_norm": 3.158597707748413, "learning_rate": 0.00011140000000000001, "loss": 1.0517, "step": 557 }, { "epoch": 0.1791619842671376, "grad_norm": 2.143503427505493, "learning_rate": 0.00011160000000000002, "loss": 1.0966, "step": 558 }, { "epoch": 0.17948306309198908, "grad_norm": 2.083723545074463, "learning_rate": 0.00011180000000000002, "loss": 1.0665, "step": 559 }, { "epoch": 0.17980414191684058, "grad_norm": 1.7016173601150513, "learning_rate": 0.00011200000000000001, "loss": 0.9647, "step": 560 }, { "epoch": 0.1801252207416921, "grad_norm": 3.817215919494629, "learning_rate": 0.00011220000000000002, "loss": 0.7164, "step": 561 }, { "epoch": 0.1804462995665436, "grad_norm": 2.013831853866577, "learning_rate": 0.00011240000000000002, "loss": 1.135, "step": 562 }, { "epoch": 0.1807673783913951, "grad_norm": 2.0431296825408936, "learning_rate": 0.0001126, "loss": 1.1298, "step": 563 }, { "epoch": 0.18108845721624658, "grad_norm": 2.106182813644409, "learning_rate": 0.00011279999999999999, "loss": 1.1183, "step": 564 }, { "epoch": 0.18140953604109808, "grad_norm": 2.747995615005493, "learning_rate": 0.000113, "loss": 0.9181, "step": 565 }, { "epoch": 0.1817306148659496, "grad_norm": 2.186434268951416, "learning_rate": 0.0001132, "loss": 1.3199, "step": 566 }, { "epoch": 0.1820516936908011, "grad_norm": 2.072404146194458, "learning_rate": 0.00011339999999999999, "loss": 0.9275, "step": 567 }, { "epoch": 0.1823727725156526, "grad_norm": 2.051323890686035, "learning_rate": 0.0001136, "loss": 0.9817, "step": 568 }, { "epoch": 0.1826938513405041, "grad_norm": 2.325993061065674, "learning_rate": 0.0001138, "loss": 1.2054, "step": 569 }, { "epoch": 0.18301493016535558, "grad_norm": 2.8382861614227295, "learning_rate": 0.00011399999999999999, "loss": 1.0196, "step": 570 }, { "epoch": 0.1833360089902071, "grad_norm": 1.9472187757492065, "learning_rate": 0.0001142, "loss": 1.028, "step": 571 }, { "epoch": 0.1836570878150586, "grad_norm": 2.2961337566375732, "learning_rate": 0.0001144, "loss": 1.1601, "step": 572 }, { "epoch": 0.1839781666399101, "grad_norm": 1.6054937839508057, "learning_rate": 0.0001146, "loss": 0.9415, "step": 573 }, { "epoch": 0.1842992454647616, "grad_norm": 1.5658379793167114, "learning_rate": 0.0001148, "loss": 0.8921, "step": 574 }, { "epoch": 0.1846203242896131, "grad_norm": 2.6736388206481934, "learning_rate": 0.00011499999999999999, "loss": 1.036, "step": 575 }, { "epoch": 0.1849414031144646, "grad_norm": 2.179887056350708, "learning_rate": 0.0001152, "loss": 1.3059, "step": 576 }, { "epoch": 0.1852624819393161, "grad_norm": 1.418059229850769, "learning_rate": 0.0001154, "loss": 1.0964, "step": 577 }, { "epoch": 0.1855835607641676, "grad_norm": 2.5086400508880615, "learning_rate": 0.00011559999999999999, "loss": 1.1934, "step": 578 }, { "epoch": 0.1859046395890191, "grad_norm": 2.1473209857940674, "learning_rate": 0.0001158, "loss": 1.0245, "step": 579 }, { "epoch": 0.18622571841387062, "grad_norm": 4.459160804748535, "learning_rate": 0.000116, "loss": 1.3467, "step": 580 }, { "epoch": 0.1865467972387221, "grad_norm": 2.073753833770752, "learning_rate": 0.00011619999999999999, "loss": 0.8753, "step": 581 }, { "epoch": 0.1868678760635736, "grad_norm": 1.9933395385742188, "learning_rate": 0.0001164, "loss": 0.7768, "step": 582 }, { "epoch": 0.1871889548884251, "grad_norm": 2.649278402328491, "learning_rate": 0.0001166, "loss": 1.2715, "step": 583 }, { "epoch": 0.1875100337132766, "grad_norm": 3.018387794494629, "learning_rate": 0.00011679999999999999, "loss": 1.139, "step": 584 }, { "epoch": 0.18783111253812812, "grad_norm": 2.405787467956543, "learning_rate": 0.000117, "loss": 1.1507, "step": 585 }, { "epoch": 0.18815219136297961, "grad_norm": 2.1480977535247803, "learning_rate": 0.0001172, "loss": 1.1337, "step": 586 }, { "epoch": 0.1884732701878311, "grad_norm": NaN, "learning_rate": 0.0001172, "loss": 1.233, "step": 587 }, { "epoch": 0.1887943490126826, "grad_norm": 1.5942261219024658, "learning_rate": 0.0001174, "loss": 0.874, "step": 588 }, { "epoch": 0.18911542783753413, "grad_norm": 3.029067039489746, "learning_rate": 0.0001176, "loss": 0.9061, "step": 589 }, { "epoch": 0.18943650666238562, "grad_norm": 1.6258342266082764, "learning_rate": 0.0001178, "loss": 0.8887, "step": 590 }, { "epoch": 0.18975758548723712, "grad_norm": 1.8686645030975342, "learning_rate": 0.000118, "loss": 0.887, "step": 591 }, { "epoch": 0.1900786643120886, "grad_norm": 2.315640687942505, "learning_rate": 0.0001182, "loss": 1.2327, "step": 592 }, { "epoch": 0.1903997431369401, "grad_norm": 2.111231565475464, "learning_rate": 0.0001184, "loss": 1.2629, "step": 593 }, { "epoch": 0.19072082196179163, "grad_norm": 2.422922372817993, "learning_rate": 0.0001186, "loss": 0.9423, "step": 594 }, { "epoch": 0.19104190078664313, "grad_norm": 2.0617730617523193, "learning_rate": 0.0001188, "loss": 1.0237, "step": 595 }, { "epoch": 0.19136297961149462, "grad_norm": 2.295034408569336, "learning_rate": 0.000119, "loss": 0.993, "step": 596 }, { "epoch": 0.19168405843634612, "grad_norm": 1.7605993747711182, "learning_rate": 0.0001192, "loss": 1.0313, "step": 597 }, { "epoch": 0.1920051372611976, "grad_norm": 1.9907429218292236, "learning_rate": 0.0001194, "loss": 0.9979, "step": 598 }, { "epoch": 0.19232621608604913, "grad_norm": 2.0522098541259766, "learning_rate": 0.00011960000000000001, "loss": 0.8497, "step": 599 }, { "epoch": 0.19264729491090063, "grad_norm": 1.6448557376861572, "learning_rate": 0.0001198, "loss": 0.6443, "step": 600 }, { "epoch": 0.19296837373575212, "grad_norm": 95.2552719116211, "learning_rate": 0.00012, "loss": 1.6446, "step": 601 }, { "epoch": 0.19328945256060362, "grad_norm": 50.20346450805664, "learning_rate": 0.00012020000000000001, "loss": 1.4505, "step": 602 }, { "epoch": 0.19361053138545514, "grad_norm": 3.067426919937134, "learning_rate": 0.0001204, "loss": 1.0486, "step": 603 }, { "epoch": 0.19393161021030664, "grad_norm": 3.5429553985595703, "learning_rate": 0.0001206, "loss": 0.9873, "step": 604 }, { "epoch": 0.19425268903515813, "grad_norm": 3.2231061458587646, "learning_rate": 0.0001208, "loss": 0.9301, "step": 605 }, { "epoch": 0.19457376786000963, "grad_norm": 2.5521445274353027, "learning_rate": 0.000121, "loss": 0.8936, "step": 606 }, { "epoch": 0.19489484668486112, "grad_norm": 2.4326229095458984, "learning_rate": 0.0001212, "loss": 0.9659, "step": 607 }, { "epoch": 0.19521592550971265, "grad_norm": 4.246782302856445, "learning_rate": 0.0001214, "loss": 1.0794, "step": 608 }, { "epoch": 0.19553700433456414, "grad_norm": 3.502556562423706, "learning_rate": 0.0001216, "loss": 1.0255, "step": 609 }, { "epoch": 0.19585808315941564, "grad_norm": 2.036647319793701, "learning_rate": 0.0001218, "loss": 0.8848, "step": 610 }, { "epoch": 0.19617916198426713, "grad_norm": 2.284869432449341, "learning_rate": 0.000122, "loss": 0.9834, "step": 611 }, { "epoch": 0.19650024080911863, "grad_norm": 1.4191306829452515, "learning_rate": 0.00012220000000000002, "loss": 0.945, "step": 612 }, { "epoch": 0.19682131963397015, "grad_norm": 2.529686450958252, "learning_rate": 0.0001224, "loss": 1.0871, "step": 613 }, { "epoch": 0.19714239845882164, "grad_norm": 1.5338515043258667, "learning_rate": 0.0001226, "loss": 0.9792, "step": 614 }, { "epoch": 0.19746347728367314, "grad_norm": 1.4318336248397827, "learning_rate": 0.0001228, "loss": 1.0498, "step": 615 }, { "epoch": 0.19778455610852463, "grad_norm": 1.7187212705612183, "learning_rate": 0.000123, "loss": 1.0418, "step": 616 }, { "epoch": 0.19810563493337616, "grad_norm": 1.5892479419708252, "learning_rate": 0.0001232, "loss": 1.1123, "step": 617 }, { "epoch": 0.19842671375822765, "grad_norm": 2.3069276809692383, "learning_rate": 0.00012340000000000002, "loss": 0.8825, "step": 618 }, { "epoch": 0.19874779258307915, "grad_norm": 2.5315756797790527, "learning_rate": 0.0001236, "loss": 0.9985, "step": 619 }, { "epoch": 0.19906887140793064, "grad_norm": 3.1710851192474365, "learning_rate": 0.0001238, "loss": 1.0591, "step": 620 }, { "epoch": 0.19938995023278214, "grad_norm": 3.0601823329925537, "learning_rate": 0.000124, "loss": 1.1456, "step": 621 }, { "epoch": 0.19971102905763366, "grad_norm": 4.170618057250977, "learning_rate": 0.0001242, "loss": 1.0819, "step": 622 }, { "epoch": 0.20003210788248516, "grad_norm": 6.973270416259766, "learning_rate": 0.00012440000000000002, "loss": 1.0412, "step": 623 }, { "epoch": 0.20035318670733665, "grad_norm": 1.518310546875, "learning_rate": 0.0001246, "loss": 0.9291, "step": 624 }, { "epoch": 0.20067426553218815, "grad_norm": 1.8494837284088135, "learning_rate": 0.0001248, "loss": 0.8893, "step": 625 }, { "epoch": 0.20099534435703964, "grad_norm": 1.8757221698760986, "learning_rate": 0.000125, "loss": 0.9862, "step": 626 }, { "epoch": 0.20131642318189116, "grad_norm": 2.3286845684051514, "learning_rate": 0.0001252, "loss": 1.3974, "step": 627 }, { "epoch": 0.20163750200674266, "grad_norm": 2.6092865467071533, "learning_rate": 0.0001254, "loss": 1.3019, "step": 628 }, { "epoch": 0.20195858083159415, "grad_norm": 2.212606906890869, "learning_rate": 0.00012560000000000002, "loss": 1.1727, "step": 629 }, { "epoch": 0.20227965965644565, "grad_norm": 1.5337756872177124, "learning_rate": 0.0001258, "loss": 1.058, "step": 630 }, { "epoch": 0.20260073848129717, "grad_norm": 2.063206672668457, "learning_rate": 0.000126, "loss": 1.1522, "step": 631 }, { "epoch": 0.20292181730614867, "grad_norm": 2.371931552886963, "learning_rate": 0.0001262, "loss": 1.1871, "step": 632 }, { "epoch": 0.20324289613100016, "grad_norm": 1.9318181276321411, "learning_rate": 0.0001264, "loss": 0.8263, "step": 633 }, { "epoch": 0.20356397495585166, "grad_norm": 1.6861572265625, "learning_rate": 0.00012660000000000001, "loss": 0.9605, "step": 634 }, { "epoch": 0.20388505378070315, "grad_norm": 1.6631700992584229, "learning_rate": 0.00012680000000000002, "loss": 1.085, "step": 635 }, { "epoch": 0.20420613260555467, "grad_norm": 2.279987096786499, "learning_rate": 0.000127, "loss": 1.0187, "step": 636 }, { "epoch": 0.20452721143040617, "grad_norm": 1.2203277349472046, "learning_rate": 0.0001272, "loss": 0.8815, "step": 637 }, { "epoch": 0.20484829025525766, "grad_norm": 1.7548545598983765, "learning_rate": 0.0001274, "loss": 1.1048, "step": 638 }, { "epoch": 0.20516936908010916, "grad_norm": 2.2074527740478516, "learning_rate": 0.0001276, "loss": 0.9279, "step": 639 }, { "epoch": 0.20549044790496065, "grad_norm": 2.118504762649536, "learning_rate": 0.00012780000000000002, "loss": 1.3603, "step": 640 }, { "epoch": 0.20581152672981218, "grad_norm": 1.5209214687347412, "learning_rate": 0.00012800000000000002, "loss": 0.9429, "step": 641 }, { "epoch": 0.20613260555466367, "grad_norm": 1.7410812377929688, "learning_rate": 0.0001282, "loss": 0.7626, "step": 642 }, { "epoch": 0.20645368437951517, "grad_norm": 2.151503562927246, "learning_rate": 0.0001284, "loss": 1.1012, "step": 643 }, { "epoch": 0.20677476320436666, "grad_norm": 1.4617127180099487, "learning_rate": 0.0001286, "loss": 1.072, "step": 644 }, { "epoch": 0.20709584202921819, "grad_norm": 1.6642550230026245, "learning_rate": 0.00012880000000000001, "loss": 1.08, "step": 645 }, { "epoch": 0.20741692085406968, "grad_norm": 1.8791637420654297, "learning_rate": 0.00012900000000000002, "loss": 0.8933, "step": 646 }, { "epoch": 0.20773799967892118, "grad_norm": 1.970080018043518, "learning_rate": 0.00012920000000000002, "loss": 0.8507, "step": 647 }, { "epoch": 0.20805907850377267, "grad_norm": 1.6559851169586182, "learning_rate": 0.0001294, "loss": 0.8517, "step": 648 }, { "epoch": 0.20838015732862417, "grad_norm": 3.3205556869506836, "learning_rate": 0.0001296, "loss": 0.8304, "step": 649 }, { "epoch": 0.2087012361534757, "grad_norm": 2.0807406902313232, "learning_rate": 0.0001298, "loss": 0.6431, "step": 650 }, { "epoch": 0.20902231497832718, "grad_norm": 2.3025104999542236, "learning_rate": 0.00013000000000000002, "loss": 1.3972, "step": 651 }, { "epoch": 0.20934339380317868, "grad_norm": 2.2260947227478027, "learning_rate": 0.00013020000000000002, "loss": 1.3472, "step": 652 }, { "epoch": 0.20966447262803017, "grad_norm": 2.1171836853027344, "learning_rate": 0.0001304, "loss": 0.9882, "step": 653 }, { "epoch": 0.20998555145288167, "grad_norm": 2.8188836574554443, "learning_rate": 0.0001306, "loss": 0.9835, "step": 654 }, { "epoch": 0.2103066302777332, "grad_norm": 2.3081471920013428, "learning_rate": 0.0001308, "loss": 0.8868, "step": 655 }, { "epoch": 0.2106277091025847, "grad_norm": 2.6961159706115723, "learning_rate": 0.000131, "loss": 0.8788, "step": 656 }, { "epoch": 0.21094878792743618, "grad_norm": 2.381976366043091, "learning_rate": 0.00013120000000000002, "loss": 0.8831, "step": 657 }, { "epoch": 0.21126986675228768, "grad_norm": 1.8818697929382324, "learning_rate": 0.00013140000000000002, "loss": 0.9654, "step": 658 }, { "epoch": 0.2115909455771392, "grad_norm": 1.7192848920822144, "learning_rate": 0.0001316, "loss": 0.9574, "step": 659 }, { "epoch": 0.2119120244019907, "grad_norm": 1.9481501579284668, "learning_rate": 0.0001318, "loss": 1.15, "step": 660 }, { "epoch": 0.2122331032268422, "grad_norm": 1.8775956630706787, "learning_rate": 0.000132, "loss": 0.9595, "step": 661 }, { "epoch": 0.21255418205169369, "grad_norm": 1.4788551330566406, "learning_rate": 0.00013220000000000001, "loss": 1.0547, "step": 662 }, { "epoch": 0.21287526087654518, "grad_norm": 1.339841604232788, "learning_rate": 0.00013240000000000002, "loss": 1.0089, "step": 663 }, { "epoch": 0.2131963397013967, "grad_norm": 2.0733635425567627, "learning_rate": 0.00013260000000000002, "loss": 0.8325, "step": 664 }, { "epoch": 0.2135174185262482, "grad_norm": 1.9117823839187622, "learning_rate": 0.0001328, "loss": 1.048, "step": 665 }, { "epoch": 0.2138384973510997, "grad_norm": 1.8078653812408447, "learning_rate": 0.000133, "loss": 0.9458, "step": 666 }, { "epoch": 0.2141595761759512, "grad_norm": 1.7276756763458252, "learning_rate": 0.0001332, "loss": 1.1302, "step": 667 }, { "epoch": 0.21448065500080268, "grad_norm": 2.459287166595459, "learning_rate": 0.00013340000000000002, "loss": 1.041, "step": 668 }, { "epoch": 0.2148017338256542, "grad_norm": 1.7712688446044922, "learning_rate": 0.00013360000000000002, "loss": 1.0259, "step": 669 }, { "epoch": 0.2151228126505057, "grad_norm": 2.60807728767395, "learning_rate": 0.00013380000000000003, "loss": 1.0492, "step": 670 }, { "epoch": 0.2154438914753572, "grad_norm": 1.5164037942886353, "learning_rate": 0.000134, "loss": 1.06, "step": 671 }, { "epoch": 0.2157649703002087, "grad_norm": 1.355437994003296, "learning_rate": 0.0001342, "loss": 1.0805, "step": 672 }, { "epoch": 0.21608604912506021, "grad_norm": 1.6522624492645264, "learning_rate": 0.00013440000000000001, "loss": 1.1322, "step": 673 }, { "epoch": 0.2164071279499117, "grad_norm": 2.0744080543518066, "learning_rate": 0.00013460000000000002, "loss": 1.1511, "step": 674 }, { "epoch": 0.2167282067747632, "grad_norm": 3.5550551414489746, "learning_rate": 0.00013480000000000002, "loss": 1.1194, "step": 675 }, { "epoch": 0.2170492855996147, "grad_norm": 1.7500160932540894, "learning_rate": 0.00013500000000000003, "loss": 1.0948, "step": 676 }, { "epoch": 0.2173703644244662, "grad_norm": 2.251464605331421, "learning_rate": 0.0001352, "loss": 1.0829, "step": 677 }, { "epoch": 0.21769144324931772, "grad_norm": 1.911451816558838, "learning_rate": 0.0001354, "loss": 1.1959, "step": 678 }, { "epoch": 0.2180125220741692, "grad_norm": 2.199814796447754, "learning_rate": 0.00013560000000000002, "loss": 1.0434, "step": 679 }, { "epoch": 0.2183336008990207, "grad_norm": 2.2858433723449707, "learning_rate": 0.00013580000000000002, "loss": 0.9276, "step": 680 }, { "epoch": 0.2186546797238722, "grad_norm": 1.9057203531265259, "learning_rate": 0.00013600000000000003, "loss": 1.1686, "step": 681 }, { "epoch": 0.2189757585487237, "grad_norm": 1.6700472831726074, "learning_rate": 0.0001362, "loss": 1.0646, "step": 682 }, { "epoch": 0.21929683737357522, "grad_norm": 1.5589715242385864, "learning_rate": 0.0001364, "loss": 1.0016, "step": 683 }, { "epoch": 0.21961791619842672, "grad_norm": 1.5741453170776367, "learning_rate": 0.0001366, "loss": 1.0659, "step": 684 }, { "epoch": 0.2199389950232782, "grad_norm": 2.477905511856079, "learning_rate": 0.00013680000000000002, "loss": 1.0588, "step": 685 }, { "epoch": 0.2202600738481297, "grad_norm": 1.8854243755340576, "learning_rate": 0.00013700000000000002, "loss": 0.7684, "step": 686 }, { "epoch": 0.22058115267298123, "grad_norm": 1.5644632577896118, "learning_rate": 0.00013720000000000003, "loss": 1.1827, "step": 687 }, { "epoch": 0.22090223149783272, "grad_norm": 2.956141948699951, "learning_rate": 0.0001374, "loss": 1.0493, "step": 688 }, { "epoch": 0.22122331032268422, "grad_norm": 1.7298507690429688, "learning_rate": 0.00013759999999999998, "loss": 1.1918, "step": 689 }, { "epoch": 0.22154438914753571, "grad_norm": 1.3661199808120728, "learning_rate": 0.0001378, "loss": 0.9168, "step": 690 }, { "epoch": 0.2218654679723872, "grad_norm": 3.3666417598724365, "learning_rate": 0.000138, "loss": 0.9071, "step": 691 }, { "epoch": 0.22218654679723873, "grad_norm": 1.8022873401641846, "learning_rate": 0.0001382, "loss": 1.0451, "step": 692 }, { "epoch": 0.22250762562209023, "grad_norm": 2.2194650173187256, "learning_rate": 0.0001384, "loss": 1.2171, "step": 693 }, { "epoch": 0.22282870444694172, "grad_norm": 2.7648520469665527, "learning_rate": 0.0001386, "loss": 0.7702, "step": 694 }, { "epoch": 0.22314978327179322, "grad_norm": 2.6405279636383057, "learning_rate": 0.00013879999999999999, "loss": 0.8529, "step": 695 }, { "epoch": 0.22347086209664474, "grad_norm": 2.8034744262695312, "learning_rate": 0.000139, "loss": 1.0367, "step": 696 }, { "epoch": 0.22379194092149624, "grad_norm": 2.957364320755005, "learning_rate": 0.0001392, "loss": 0.9444, "step": 697 }, { "epoch": 0.22411301974634773, "grad_norm": 1.5870848894119263, "learning_rate": 0.0001394, "loss": 0.9007, "step": 698 }, { "epoch": 0.22443409857119923, "grad_norm": 1.8650282621383667, "learning_rate": 0.0001396, "loss": 0.7505, "step": 699 }, { "epoch": 0.22475517739605072, "grad_norm": 3.5914523601531982, "learning_rate": 0.0001398, "loss": 0.8165, "step": 700 }, { "epoch": 0.22507625622090224, "grad_norm": 6.338365077972412, "learning_rate": 0.00014, "loss": 1.48, "step": 701 }, { "epoch": 0.22539733504575374, "grad_norm": 2.300706148147583, "learning_rate": 0.0001402, "loss": 1.3409, "step": 702 }, { "epoch": 0.22571841387060523, "grad_norm": 2.9373714923858643, "learning_rate": 0.0001404, "loss": 1.3802, "step": 703 }, { "epoch": 0.22603949269545673, "grad_norm": 2.5629308223724365, "learning_rate": 0.0001406, "loss": 0.8298, "step": 704 }, { "epoch": 0.22636057152030822, "grad_norm": 2.515796184539795, "learning_rate": 0.0001408, "loss": 0.7154, "step": 705 }, { "epoch": 0.22668165034515975, "grad_norm": 2.616245985031128, "learning_rate": 0.000141, "loss": 0.8415, "step": 706 }, { "epoch": 0.22700272917001124, "grad_norm": 2.6605446338653564, "learning_rate": 0.0001412, "loss": 0.829, "step": 707 }, { "epoch": 0.22732380799486274, "grad_norm": 2.9635884761810303, "learning_rate": 0.0001414, "loss": 0.9828, "step": 708 }, { "epoch": 0.22764488681971423, "grad_norm": 3.6770541667938232, "learning_rate": 0.0001416, "loss": 1.0846, "step": 709 }, { "epoch": 0.22796596564456575, "grad_norm": 2.220874071121216, "learning_rate": 0.0001418, "loss": 0.9111, "step": 710 }, { "epoch": 0.22828704446941725, "grad_norm": 2.443786859512329, "learning_rate": 0.000142, "loss": 0.9152, "step": 711 }, { "epoch": 0.22860812329426874, "grad_norm": 2.4385950565338135, "learning_rate": 0.0001422, "loss": 0.9161, "step": 712 }, { "epoch": 0.22892920211912024, "grad_norm": 1.6144802570343018, "learning_rate": 0.0001424, "loss": 0.9943, "step": 713 }, { "epoch": 0.22925028094397173, "grad_norm": 2.6351311206817627, "learning_rate": 0.0001426, "loss": 0.9274, "step": 714 }, { "epoch": 0.22957135976882326, "grad_norm": 1.7359614372253418, "learning_rate": 0.0001428, "loss": 1.099, "step": 715 }, { "epoch": 0.22989243859367475, "grad_norm": 1.4439224004745483, "learning_rate": 0.000143, "loss": 1.0443, "step": 716 }, { "epoch": 0.23021351741852625, "grad_norm": 2.533806085586548, "learning_rate": 0.0001432, "loss": 1.0614, "step": 717 }, { "epoch": 0.23053459624337774, "grad_norm": 2.2040460109710693, "learning_rate": 0.0001434, "loss": 0.8822, "step": 718 }, { "epoch": 0.23085567506822924, "grad_norm": 1.4895590543746948, "learning_rate": 0.0001436, "loss": 1.244, "step": 719 }, { "epoch": 0.23117675389308076, "grad_norm": 2.5897271633148193, "learning_rate": 0.0001438, "loss": 1.2112, "step": 720 }, { "epoch": 0.23149783271793226, "grad_norm": 1.7311580181121826, "learning_rate": 0.000144, "loss": 0.9964, "step": 721 }, { "epoch": 0.23181891154278375, "grad_norm": 1.475502610206604, "learning_rate": 0.0001442, "loss": 1.0241, "step": 722 }, { "epoch": 0.23213999036763525, "grad_norm": 2.242170810699463, "learning_rate": 0.0001444, "loss": 1.2009, "step": 723 }, { "epoch": 0.23246106919248677, "grad_norm": 1.811926007270813, "learning_rate": 0.0001446, "loss": 0.7842, "step": 724 }, { "epoch": 0.23278214801733826, "grad_norm": 1.8713488578796387, "learning_rate": 0.0001448, "loss": 1.0161, "step": 725 }, { "epoch": 0.23310322684218976, "grad_norm": 1.8007389307022095, "learning_rate": 0.000145, "loss": 1.1193, "step": 726 }, { "epoch": 0.23342430566704125, "grad_norm": 1.432399868965149, "learning_rate": 0.0001452, "loss": 0.9496, "step": 727 }, { "epoch": 0.23374538449189275, "grad_norm": 1.667504072189331, "learning_rate": 0.0001454, "loss": 1.0116, "step": 728 }, { "epoch": 0.23406646331674427, "grad_norm": 1.476586103439331, "learning_rate": 0.00014560000000000002, "loss": 1.186, "step": 729 }, { "epoch": 0.23438754214159577, "grad_norm": 1.715766429901123, "learning_rate": 0.0001458, "loss": 0.9619, "step": 730 }, { "epoch": 0.23470862096644726, "grad_norm": 1.6196917295455933, "learning_rate": 0.000146, "loss": 0.7596, "step": 731 }, { "epoch": 0.23502969979129876, "grad_norm": 2.2061188220977783, "learning_rate": 0.0001462, "loss": 0.8178, "step": 732 }, { "epoch": 0.23535077861615025, "grad_norm": 1.9532783031463623, "learning_rate": 0.0001464, "loss": 1.0697, "step": 733 }, { "epoch": 0.23567185744100178, "grad_norm": 2.0365941524505615, "learning_rate": 0.0001466, "loss": 0.9422, "step": 734 }, { "epoch": 0.23599293626585327, "grad_norm": 2.11383318901062, "learning_rate": 0.00014680000000000002, "loss": 1.1384, "step": 735 }, { "epoch": 0.23631401509070477, "grad_norm": 1.7044955492019653, "learning_rate": 0.000147, "loss": 1.0329, "step": 736 }, { "epoch": 0.23663509391555626, "grad_norm": 1.1772584915161133, "learning_rate": 0.0001472, "loss": 0.7503, "step": 737 }, { "epoch": 0.23695617274040778, "grad_norm": 1.8224774599075317, "learning_rate": 0.0001474, "loss": 1.0196, "step": 738 }, { "epoch": 0.23727725156525928, "grad_norm": 1.4998708963394165, "learning_rate": 0.0001476, "loss": 0.8942, "step": 739 }, { "epoch": 0.23759833039011077, "grad_norm": 1.2025278806686401, "learning_rate": 0.00014780000000000001, "loss": 0.8372, "step": 740 }, { "epoch": 0.23791940921496227, "grad_norm": 1.9754714965820312, "learning_rate": 0.000148, "loss": 1.1058, "step": 741 }, { "epoch": 0.23824048803981376, "grad_norm": 1.4123296737670898, "learning_rate": 0.0001482, "loss": 0.897, "step": 742 }, { "epoch": 0.2385615668646653, "grad_norm": 1.8686493635177612, "learning_rate": 0.0001484, "loss": 1.1749, "step": 743 }, { "epoch": 0.23888264568951678, "grad_norm": 2.425736904144287, "learning_rate": 0.0001486, "loss": 0.9771, "step": 744 }, { "epoch": 0.23920372451436828, "grad_norm": 2.431823492050171, "learning_rate": 0.0001488, "loss": 0.8766, "step": 745 }, { "epoch": 0.23952480333921977, "grad_norm": 3.2059309482574463, "learning_rate": 0.00014900000000000002, "loss": 0.86, "step": 746 }, { "epoch": 0.23984588216407127, "grad_norm": 1.9675642251968384, "learning_rate": 0.0001492, "loss": 0.7826, "step": 747 }, { "epoch": 0.2401669609889228, "grad_norm": 2.1029534339904785, "learning_rate": 0.0001494, "loss": 0.9178, "step": 748 }, { "epoch": 0.24048803981377428, "grad_norm": 1.725165605545044, "learning_rate": 0.0001496, "loss": 0.7452, "step": 749 }, { "epoch": 0.24080911863862578, "grad_norm": 2.5603771209716797, "learning_rate": 0.0001498, "loss": 0.7243, "step": 750 }, { "epoch": 0.24113019746347727, "grad_norm": 3.2743546962738037, "learning_rate": 0.00015000000000000001, "loss": 1.5645, "step": 751 }, { "epoch": 0.2414512762883288, "grad_norm": 2.220749616622925, "learning_rate": 0.00015020000000000002, "loss": 1.2963, "step": 752 }, { "epoch": 0.2417723551131803, "grad_norm": 2.2174606323242188, "learning_rate": 0.0001504, "loss": 0.9431, "step": 753 }, { "epoch": 0.2420934339380318, "grad_norm": 2.5543525218963623, "learning_rate": 0.0001506, "loss": 0.7803, "step": 754 }, { "epoch": 0.24241451276288328, "grad_norm": 2.6568639278411865, "learning_rate": 0.0001508, "loss": 0.7203, "step": 755 }, { "epoch": 0.24273559158773478, "grad_norm": 2.6093509197235107, "learning_rate": 0.000151, "loss": 0.7474, "step": 756 }, { "epoch": 0.2430566704125863, "grad_norm": 3.100468635559082, "learning_rate": 0.00015120000000000002, "loss": 1.1042, "step": 757 }, { "epoch": 0.2433777492374378, "grad_norm": 2.6489973068237305, "learning_rate": 0.00015140000000000002, "loss": 1.1115, "step": 758 }, { "epoch": 0.2436988280622893, "grad_norm": 1.9445077180862427, "learning_rate": 0.0001516, "loss": 1.2512, "step": 759 }, { "epoch": 0.2440199068871408, "grad_norm": 2.7067511081695557, "learning_rate": 0.0001518, "loss": 1.0337, "step": 760 }, { "epoch": 0.24434098571199228, "grad_norm": 1.793434977531433, "learning_rate": 0.000152, "loss": 1.2837, "step": 761 }, { "epoch": 0.2446620645368438, "grad_norm": 2.2494122982025146, "learning_rate": 0.0001522, "loss": 0.9891, "step": 762 }, { "epoch": 0.2449831433616953, "grad_norm": 2.235886812210083, "learning_rate": 0.00015240000000000002, "loss": 1.0547, "step": 763 }, { "epoch": 0.2453042221865468, "grad_norm": 1.8249210119247437, "learning_rate": 0.00015260000000000002, "loss": 0.9715, "step": 764 }, { "epoch": 0.2456253010113983, "grad_norm": 1.6558443307876587, "learning_rate": 0.0001528, "loss": 0.9652, "step": 765 }, { "epoch": 0.2459463798362498, "grad_norm": 1.6599318981170654, "learning_rate": 0.000153, "loss": 1.2118, "step": 766 }, { "epoch": 0.2462674586611013, "grad_norm": 2.2288310527801514, "learning_rate": 0.0001532, "loss": 1.0511, "step": 767 }, { "epoch": 0.2465885374859528, "grad_norm": 2.151366949081421, "learning_rate": 0.00015340000000000002, "loss": 1.0277, "step": 768 }, { "epoch": 0.2469096163108043, "grad_norm": 1.8321627378463745, "learning_rate": 0.00015360000000000002, "loss": 1.0368, "step": 769 }, { "epoch": 0.2472306951356558, "grad_norm": 1.2890126705169678, "learning_rate": 0.0001538, "loss": 1.0081, "step": 770 }, { "epoch": 0.24755177396050732, "grad_norm": 2.4398739337921143, "learning_rate": 0.000154, "loss": 0.8832, "step": 771 }, { "epoch": 0.2478728527853588, "grad_norm": 2.993023157119751, "learning_rate": 0.0001542, "loss": 1.2266, "step": 772 }, { "epoch": 0.2481939316102103, "grad_norm": 1.544333577156067, "learning_rate": 0.0001544, "loss": 1.0772, "step": 773 }, { "epoch": 0.2485150104350618, "grad_norm": 1.9021294116973877, "learning_rate": 0.00015460000000000002, "loss": 1.1266, "step": 774 }, { "epoch": 0.2488360892599133, "grad_norm": 2.129915952682495, "learning_rate": 0.00015480000000000002, "loss": 1.0554, "step": 775 }, { "epoch": 0.24915716808476482, "grad_norm": 2.156653642654419, "learning_rate": 0.000155, "loss": 0.9094, "step": 776 }, { "epoch": 0.2494782469096163, "grad_norm": 2.2110507488250732, "learning_rate": 0.0001552, "loss": 1.1234, "step": 777 }, { "epoch": 0.2497993257344678, "grad_norm": 1.9623451232910156, "learning_rate": 0.0001554, "loss": 1.071, "step": 778 }, { "epoch": 0.25012040455931933, "grad_norm": 2.1549320220947266, "learning_rate": 0.00015560000000000001, "loss": 1.1625, "step": 779 }, { "epoch": 0.2504414833841708, "grad_norm": 1.2295719385147095, "learning_rate": 0.00015580000000000002, "loss": 0.8526, "step": 780 }, { "epoch": 0.2507625622090223, "grad_norm": 1.3640968799591064, "learning_rate": 0.00015600000000000002, "loss": 0.9144, "step": 781 }, { "epoch": 0.2510836410338738, "grad_norm": 2.0186564922332764, "learning_rate": 0.0001562, "loss": 1.0645, "step": 782 }, { "epoch": 0.2514047198587253, "grad_norm": 2.253253698348999, "learning_rate": 0.0001564, "loss": 1.087, "step": 783 }, { "epoch": 0.2517257986835768, "grad_norm": 2.00610089302063, "learning_rate": 0.0001566, "loss": 1.0834, "step": 784 }, { "epoch": 0.2520468775084283, "grad_norm": 1.227858543395996, "learning_rate": 0.00015680000000000002, "loss": 0.8328, "step": 785 }, { "epoch": 0.2523679563332798, "grad_norm": 1.6620421409606934, "learning_rate": 0.00015700000000000002, "loss": 0.9007, "step": 786 }, { "epoch": 0.25268903515813135, "grad_norm": 1.969337821006775, "learning_rate": 0.00015720000000000003, "loss": 0.8842, "step": 787 }, { "epoch": 0.25301011398298284, "grad_norm": 1.8422551155090332, "learning_rate": 0.0001574, "loss": 1.0329, "step": 788 }, { "epoch": 0.25333119280783434, "grad_norm": 2.0123472213745117, "learning_rate": 0.0001576, "loss": 1.0015, "step": 789 }, { "epoch": 0.25365227163268583, "grad_norm": 1.7080883979797363, "learning_rate": 0.00015780000000000001, "loss": 1.0998, "step": 790 }, { "epoch": 0.25397335045753733, "grad_norm": 3.233981132507324, "learning_rate": 0.00015800000000000002, "loss": 1.132, "step": 791 }, { "epoch": 0.2542944292823888, "grad_norm": 1.4392627477645874, "learning_rate": 0.00015820000000000002, "loss": 0.8428, "step": 792 }, { "epoch": 0.2546155081072403, "grad_norm": 1.7838289737701416, "learning_rate": 0.00015840000000000003, "loss": 0.986, "step": 793 }, { "epoch": 0.2549365869320918, "grad_norm": 2.152930498123169, "learning_rate": 0.0001586, "loss": 0.7264, "step": 794 }, { "epoch": 0.2552576657569433, "grad_norm": 1.7008942365646362, "learning_rate": 0.0001588, "loss": 1.2359, "step": 795 }, { "epoch": 0.2555787445817948, "grad_norm": 1.2774722576141357, "learning_rate": 0.00015900000000000002, "loss": 0.8856, "step": 796 }, { "epoch": 0.25589982340664635, "grad_norm": 1.5711169242858887, "learning_rate": 0.00015920000000000002, "loss": 0.8394, "step": 797 }, { "epoch": 0.25622090223149785, "grad_norm": 2.9855380058288574, "learning_rate": 0.00015940000000000003, "loss": 0.8879, "step": 798 }, { "epoch": 0.25654198105634934, "grad_norm": 2.1591596603393555, "learning_rate": 0.0001596, "loss": 0.8859, "step": 799 }, { "epoch": 0.25686305988120084, "grad_norm": 1.9048887491226196, "learning_rate": 0.0001598, "loss": 0.5577, "step": 800 }, { "epoch": 0.25718413870605233, "grad_norm": 18.338882446289062, "learning_rate": 0.00016, "loss": 1.7698, "step": 801 }, { "epoch": 0.25750521753090383, "grad_norm": 2.443437099456787, "learning_rate": 0.00016020000000000002, "loss": 1.5749, "step": 802 }, { "epoch": 0.2578262963557553, "grad_norm": 2.9275338649749756, "learning_rate": 0.00016040000000000002, "loss": 1.1417, "step": 803 }, { "epoch": 0.2581473751806068, "grad_norm": 2.1856069564819336, "learning_rate": 0.00016060000000000003, "loss": 0.919, "step": 804 }, { "epoch": 0.2584684540054583, "grad_norm": 2.806535005569458, "learning_rate": 0.0001608, "loss": 0.8787, "step": 805 }, { "epoch": 0.25878953283030987, "grad_norm": 2.342740774154663, "learning_rate": 0.000161, "loss": 0.8913, "step": 806 }, { "epoch": 0.25911061165516136, "grad_norm": 2.3539390563964844, "learning_rate": 0.00016120000000000002, "loss": 0.9415, "step": 807 }, { "epoch": 0.25943169048001286, "grad_norm": 2.2060728073120117, "learning_rate": 0.00016140000000000002, "loss": 1.1096, "step": 808 }, { "epoch": 0.25975276930486435, "grad_norm": 1.5557055473327637, "learning_rate": 0.00016160000000000002, "loss": 0.9594, "step": 809 }, { "epoch": 0.26007384812971585, "grad_norm": 1.8541979789733887, "learning_rate": 0.00016180000000000003, "loss": 0.9541, "step": 810 }, { "epoch": 0.26039492695456734, "grad_norm": 2.1980228424072266, "learning_rate": 0.000162, "loss": 1.0466, "step": 811 }, { "epoch": 0.26071600577941884, "grad_norm": 3.6660101413726807, "learning_rate": 0.0001622, "loss": 1.045, "step": 812 }, { "epoch": 0.26103708460427033, "grad_norm": 1.4750818014144897, "learning_rate": 0.00016240000000000002, "loss": 0.9617, "step": 813 }, { "epoch": 0.2613581634291218, "grad_norm": 1.7362655401229858, "learning_rate": 0.0001626, "loss": 1.0644, "step": 814 }, { "epoch": 0.2616792422539734, "grad_norm": 3.413910150527954, "learning_rate": 0.0001628, "loss": 0.9958, "step": 815 }, { "epoch": 0.26200032107882487, "grad_norm": 1.9933480024337769, "learning_rate": 0.000163, "loss": 1.0984, "step": 816 }, { "epoch": 0.26232139990367637, "grad_norm": 2.60626220703125, "learning_rate": 0.0001632, "loss": 0.9565, "step": 817 }, { "epoch": 0.26264247872852786, "grad_norm": 2.5874414443969727, "learning_rate": 0.0001634, "loss": 1.1555, "step": 818 }, { "epoch": 0.26296355755337936, "grad_norm": 1.991832971572876, "learning_rate": 0.0001636, "loss": 0.879, "step": 819 }, { "epoch": 0.26328463637823085, "grad_norm": 2.291994333267212, "learning_rate": 0.0001638, "loss": 1.1457, "step": 820 }, { "epoch": 0.26360571520308235, "grad_norm": 2.4672718048095703, "learning_rate": 0.000164, "loss": 1.0751, "step": 821 }, { "epoch": 0.26392679402793384, "grad_norm": 1.6756705045700073, "learning_rate": 0.0001642, "loss": 0.8653, "step": 822 }, { "epoch": 0.26424787285278534, "grad_norm": 1.8445497751235962, "learning_rate": 0.0001644, "loss": 0.9074, "step": 823 }, { "epoch": 0.26456895167763683, "grad_norm": 1.6544156074523926, "learning_rate": 0.0001646, "loss": 0.9691, "step": 824 }, { "epoch": 0.2648900305024884, "grad_norm": 1.8549728393554688, "learning_rate": 0.0001648, "loss": 1.1391, "step": 825 }, { "epoch": 0.2652111093273399, "grad_norm": 2.89859938621521, "learning_rate": 0.000165, "loss": 1.0288, "step": 826 }, { "epoch": 0.2655321881521914, "grad_norm": 3.0639214515686035, "learning_rate": 0.0001652, "loss": 1.1499, "step": 827 }, { "epoch": 0.26585326697704287, "grad_norm": 1.4466873407363892, "learning_rate": 0.0001654, "loss": 0.9982, "step": 828 }, { "epoch": 0.26617434580189436, "grad_norm": 2.002682685852051, "learning_rate": 0.0001656, "loss": 0.9603, "step": 829 }, { "epoch": 0.26649542462674586, "grad_norm": 2.583599805831909, "learning_rate": 0.0001658, "loss": 1.1498, "step": 830 }, { "epoch": 0.26681650345159735, "grad_norm": 1.9900764226913452, "learning_rate": 0.000166, "loss": 1.1214, "step": 831 }, { "epoch": 0.26713758227644885, "grad_norm": 1.384462594985962, "learning_rate": 0.0001662, "loss": 0.7923, "step": 832 }, { "epoch": 0.26745866110130034, "grad_norm": 7.388343334197998, "learning_rate": 0.0001664, "loss": 1.2836, "step": 833 }, { "epoch": 0.2677797399261519, "grad_norm": 1.9020540714263916, "learning_rate": 0.0001666, "loss": 1.0163, "step": 834 }, { "epoch": 0.2681008187510034, "grad_norm": 1.6172999143600464, "learning_rate": 0.0001668, "loss": 1.1151, "step": 835 }, { "epoch": 0.2684218975758549, "grad_norm": 1.3895915746688843, "learning_rate": 0.000167, "loss": 1.2039, "step": 836 }, { "epoch": 0.2687429764007064, "grad_norm": 4.345794200897217, "learning_rate": 0.0001672, "loss": 1.4846, "step": 837 }, { "epoch": 0.2690640552255579, "grad_norm": 1.9014428853988647, "learning_rate": 0.0001674, "loss": 1.164, "step": 838 }, { "epoch": 0.26938513405040937, "grad_norm": 1.5048143863677979, "learning_rate": 0.0001676, "loss": 0.9134, "step": 839 }, { "epoch": 0.26970621287526086, "grad_norm": 2.302903890609741, "learning_rate": 0.0001678, "loss": 0.9193, "step": 840 }, { "epoch": 0.27002729170011236, "grad_norm": 2.238719940185547, "learning_rate": 0.000168, "loss": 0.9618, "step": 841 }, { "epoch": 0.27034837052496385, "grad_norm": 1.821960210800171, "learning_rate": 0.0001682, "loss": 0.9859, "step": 842 }, { "epoch": 0.2706694493498154, "grad_norm": 1.5913723707199097, "learning_rate": 0.0001684, "loss": 0.7387, "step": 843 }, { "epoch": 0.2709905281746669, "grad_norm": 2.4586009979248047, "learning_rate": 0.0001686, "loss": 1.1114, "step": 844 }, { "epoch": 0.2713116069995184, "grad_norm": 1.5896835327148438, "learning_rate": 0.0001688, "loss": 0.8628, "step": 845 }, { "epoch": 0.2716326858243699, "grad_norm": 1.7305841445922852, "learning_rate": 0.00016900000000000002, "loss": 0.952, "step": 846 }, { "epoch": 0.2719537646492214, "grad_norm": 2.2322258949279785, "learning_rate": 0.0001692, "loss": 0.8601, "step": 847 }, { "epoch": 0.2722748434740729, "grad_norm": 3.5011134147644043, "learning_rate": 0.0001694, "loss": 0.8119, "step": 848 }, { "epoch": 0.2725959222989244, "grad_norm": 1.6285743713378906, "learning_rate": 0.0001696, "loss": 0.8586, "step": 849 }, { "epoch": 0.27291700112377587, "grad_norm": 2.2089757919311523, "learning_rate": 0.0001698, "loss": 0.7086, "step": 850 }, { "epoch": 0.27323807994862737, "grad_norm": 3.487232208251953, "learning_rate": 0.00017, "loss": 1.52, "step": 851 }, { "epoch": 0.2735591587734789, "grad_norm": 2.836216688156128, "learning_rate": 0.00017020000000000002, "loss": 1.4958, "step": 852 }, { "epoch": 0.2738802375983304, "grad_norm": 2.933957099914551, "learning_rate": 0.0001704, "loss": 1.1824, "step": 853 }, { "epoch": 0.2742013164231819, "grad_norm": 2.5374979972839355, "learning_rate": 0.0001706, "loss": 1.0371, "step": 854 }, { "epoch": 0.2745223952480334, "grad_norm": 2.2211618423461914, "learning_rate": 0.0001708, "loss": 0.8578, "step": 855 }, { "epoch": 0.2748434740728849, "grad_norm": 2.6845898628234863, "learning_rate": 0.000171, "loss": 0.8998, "step": 856 }, { "epoch": 0.2751645528977364, "grad_norm": 2.2504897117614746, "learning_rate": 0.00017120000000000001, "loss": 0.8661, "step": 857 }, { "epoch": 0.2754856317225879, "grad_norm": 2.5308399200439453, "learning_rate": 0.0001714, "loss": 0.8642, "step": 858 }, { "epoch": 0.2758067105474394, "grad_norm": 2.6598222255706787, "learning_rate": 0.0001716, "loss": 0.8098, "step": 859 }, { "epoch": 0.2761277893722909, "grad_norm": 1.8385624885559082, "learning_rate": 0.0001718, "loss": 1.0242, "step": 860 }, { "epoch": 0.2764488681971424, "grad_norm": 2.0007526874542236, "learning_rate": 0.000172, "loss": 1.166, "step": 861 }, { "epoch": 0.2767699470219939, "grad_norm": 3.164480209350586, "learning_rate": 0.0001722, "loss": 0.9461, "step": 862 }, { "epoch": 0.2770910258468454, "grad_norm": 1.8709050416946411, "learning_rate": 0.00017240000000000002, "loss": 1.0128, "step": 863 }, { "epoch": 0.2774121046716969, "grad_norm": 1.6714998483657837, "learning_rate": 0.0001726, "loss": 0.8408, "step": 864 }, { "epoch": 0.2777331834965484, "grad_norm": 1.7060233354568481, "learning_rate": 0.0001728, "loss": 0.796, "step": 865 }, { "epoch": 0.2780542623213999, "grad_norm": 1.3818310499191284, "learning_rate": 0.000173, "loss": 1.0166, "step": 866 }, { "epoch": 0.2783753411462514, "grad_norm": 1.8317921161651611, "learning_rate": 0.0001732, "loss": 1.0847, "step": 867 }, { "epoch": 0.2786964199711029, "grad_norm": 2.0381312370300293, "learning_rate": 0.0001734, "loss": 0.9889, "step": 868 }, { "epoch": 0.2790174987959544, "grad_norm": 2.772416353225708, "learning_rate": 0.00017360000000000002, "loss": 1.1122, "step": 869 }, { "epoch": 0.2793385776208059, "grad_norm": 4.282870292663574, "learning_rate": 0.0001738, "loss": 1.2099, "step": 870 }, { "epoch": 0.27965965644565743, "grad_norm": 1.2967722415924072, "learning_rate": 0.000174, "loss": 0.9476, "step": 871 }, { "epoch": 0.27998073527050893, "grad_norm": 1.8298275470733643, "learning_rate": 0.0001742, "loss": 1.0377, "step": 872 }, { "epoch": 0.2803018140953604, "grad_norm": 2.2432875633239746, "learning_rate": 0.0001744, "loss": 1.0409, "step": 873 }, { "epoch": 0.2806228929202119, "grad_norm": 1.3698315620422363, "learning_rate": 0.00017460000000000002, "loss": 1.0456, "step": 874 }, { "epoch": 0.2809439717450634, "grad_norm": 1.9452381134033203, "learning_rate": 0.00017480000000000002, "loss": 0.8272, "step": 875 }, { "epoch": 0.2812650505699149, "grad_norm": 1.601507306098938, "learning_rate": 0.000175, "loss": 1.0913, "step": 876 }, { "epoch": 0.2815861293947664, "grad_norm": 3.9005463123321533, "learning_rate": 0.0001752, "loss": 1.4486, "step": 877 }, { "epoch": 0.2819072082196179, "grad_norm": 2.154240608215332, "learning_rate": 0.0001754, "loss": 0.9732, "step": 878 }, { "epoch": 0.2822282870444694, "grad_norm": 1.2495157718658447, "learning_rate": 0.0001756, "loss": 1.0217, "step": 879 }, { "epoch": 0.28254936586932095, "grad_norm": 6.445877552032471, "learning_rate": 0.00017580000000000002, "loss": 1.1539, "step": 880 }, { "epoch": 0.28287044469417244, "grad_norm": 2.6896142959594727, "learning_rate": 0.00017600000000000002, "loss": 1.0758, "step": 881 }, { "epoch": 0.28319152351902394, "grad_norm": 1.3675345182418823, "learning_rate": 0.0001762, "loss": 1.0229, "step": 882 }, { "epoch": 0.28351260234387543, "grad_norm": 9.003292083740234, "learning_rate": 0.0001764, "loss": 1.1875, "step": 883 }, { "epoch": 0.2838336811687269, "grad_norm": 1.7090665102005005, "learning_rate": 0.0001766, "loss": 1.2299, "step": 884 }, { "epoch": 0.2841547599935784, "grad_norm": 2.320629119873047, "learning_rate": 0.00017680000000000001, "loss": 1.3493, "step": 885 }, { "epoch": 0.2844758388184299, "grad_norm": 3.2875213623046875, "learning_rate": 0.00017700000000000002, "loss": 1.0224, "step": 886 }, { "epoch": 0.2847969176432814, "grad_norm": 3.1284916400909424, "learning_rate": 0.0001772, "loss": 1.0231, "step": 887 }, { "epoch": 0.2851179964681329, "grad_norm": 1.9187723398208618, "learning_rate": 0.0001774, "loss": 1.0035, "step": 888 }, { "epoch": 0.2854390752929844, "grad_norm": 1.3869761228561401, "learning_rate": 0.0001776, "loss": 0.8738, "step": 889 }, { "epoch": 0.28576015411783595, "grad_norm": 1.281718134880066, "learning_rate": 0.0001778, "loss": 0.8464, "step": 890 }, { "epoch": 0.28608123294268745, "grad_norm": 1.203602910041809, "learning_rate": 0.00017800000000000002, "loss": 0.9197, "step": 891 }, { "epoch": 0.28640231176753894, "grad_norm": 1.9265743494033813, "learning_rate": 0.00017820000000000002, "loss": 1.0422, "step": 892 }, { "epoch": 0.28672339059239044, "grad_norm": 1.4553351402282715, "learning_rate": 0.0001784, "loss": 1.1858, "step": 893 }, { "epoch": 0.28704446941724193, "grad_norm": 1.6324553489685059, "learning_rate": 0.0001786, "loss": 0.9635, "step": 894 }, { "epoch": 0.2873655482420934, "grad_norm": 4.445520401000977, "learning_rate": 0.0001788, "loss": 1.1842, "step": 895 }, { "epoch": 0.2876866270669449, "grad_norm": 2.625737428665161, "learning_rate": 0.00017900000000000001, "loss": 1.0714, "step": 896 }, { "epoch": 0.2880077058917964, "grad_norm": 1.3031972646713257, "learning_rate": 0.00017920000000000002, "loss": 0.9172, "step": 897 }, { "epoch": 0.2883287847166479, "grad_norm": 1.1816191673278809, "learning_rate": 0.00017940000000000002, "loss": 0.8658, "step": 898 }, { "epoch": 0.28864986354149946, "grad_norm": 2.7634835243225098, "learning_rate": 0.0001796, "loss": 0.8205, "step": 899 }, { "epoch": 0.28897094236635096, "grad_norm": 1.715277910232544, "learning_rate": 0.0001798, "loss": 0.6063, "step": 900 }, { "epoch": 0.28929202119120245, "grad_norm": 5.679165840148926, "learning_rate": 0.00018, "loss": 1.6148, "step": 901 }, { "epoch": 0.28961310001605395, "grad_norm": 8.520236015319824, "learning_rate": 0.00018020000000000002, "loss": 1.669, "step": 902 }, { "epoch": 0.28993417884090544, "grad_norm": 5.708471298217773, "learning_rate": 0.00018040000000000002, "loss": 1.0973, "step": 903 }, { "epoch": 0.29025525766575694, "grad_norm": 4.294043064117432, "learning_rate": 0.00018060000000000003, "loss": 0.9894, "step": 904 }, { "epoch": 0.29057633649060843, "grad_norm": 4.61492919921875, "learning_rate": 0.0001808, "loss": 0.9369, "step": 905 }, { "epoch": 0.29089741531545993, "grad_norm": 2.621717691421509, "learning_rate": 0.000181, "loss": 1.0031, "step": 906 }, { "epoch": 0.2912184941403114, "grad_norm": 2.668614149093628, "learning_rate": 0.0001812, "loss": 0.9567, "step": 907 }, { "epoch": 0.291539572965163, "grad_norm": 2.6157805919647217, "learning_rate": 0.00018140000000000002, "loss": 1.0925, "step": 908 }, { "epoch": 0.29186065179001447, "grad_norm": 2.2772955894470215, "learning_rate": 0.00018160000000000002, "loss": 1.0826, "step": 909 }, { "epoch": 0.29218173061486596, "grad_norm": 2.532787561416626, "learning_rate": 0.00018180000000000003, "loss": 1.0492, "step": 910 }, { "epoch": 0.29250280943971746, "grad_norm": 2.4008162021636963, "learning_rate": 0.000182, "loss": 0.9815, "step": 911 }, { "epoch": 0.29282388826456895, "grad_norm": 1.3333711624145508, "learning_rate": 0.0001822, "loss": 1.071, "step": 912 }, { "epoch": 0.29314496708942045, "grad_norm": 2.2058145999908447, "learning_rate": 0.00018240000000000002, "loss": 1.1503, "step": 913 }, { "epoch": 0.29346604591427194, "grad_norm": 1.4182907342910767, "learning_rate": 0.00018260000000000002, "loss": 1.1016, "step": 914 }, { "epoch": 0.29378712473912344, "grad_norm": 2.7944185733795166, "learning_rate": 0.00018280000000000003, "loss": 1.233, "step": 915 }, { "epoch": 0.29410820356397493, "grad_norm": 1.5195378065109253, "learning_rate": 0.000183, "loss": 1.1977, "step": 916 }, { "epoch": 0.29442928238882643, "grad_norm": 1.8296455144882202, "learning_rate": 0.0001832, "loss": 0.9808, "step": 917 }, { "epoch": 0.294750361213678, "grad_norm": 1.8149925470352173, "learning_rate": 0.0001834, "loss": 1.0705, "step": 918 }, { "epoch": 0.2950714400385295, "grad_norm": 1.741716980934143, "learning_rate": 0.00018360000000000002, "loss": 1.1261, "step": 919 }, { "epoch": 0.29539251886338097, "grad_norm": 2.0786545276641846, "learning_rate": 0.00018380000000000002, "loss": 1.1481, "step": 920 }, { "epoch": 0.29571359768823247, "grad_norm": 1.7828667163848877, "learning_rate": 0.00018400000000000003, "loss": 1.034, "step": 921 }, { "epoch": 0.29603467651308396, "grad_norm": 1.6644797325134277, "learning_rate": 0.0001842, "loss": 1.1322, "step": 922 }, { "epoch": 0.29635575533793546, "grad_norm": 1.3413432836532593, "learning_rate": 0.0001844, "loss": 1.1023, "step": 923 }, { "epoch": 0.29667683416278695, "grad_norm": 1.985334873199463, "learning_rate": 0.00018460000000000001, "loss": 0.9872, "step": 924 }, { "epoch": 0.29699791298763845, "grad_norm": 2.2249557971954346, "learning_rate": 0.00018480000000000002, "loss": 1.0467, "step": 925 }, { "epoch": 0.29731899181248994, "grad_norm": 1.843141794204712, "learning_rate": 0.00018500000000000002, "loss": 1.0913, "step": 926 }, { "epoch": 0.2976400706373415, "grad_norm": 1.8132327795028687, "learning_rate": 0.00018520000000000003, "loss": 0.9764, "step": 927 }, { "epoch": 0.297961149462193, "grad_norm": 2.068922996520996, "learning_rate": 0.0001854, "loss": 0.9522, "step": 928 }, { "epoch": 0.2982822282870445, "grad_norm": 1.975690245628357, "learning_rate": 0.0001856, "loss": 0.961, "step": 929 }, { "epoch": 0.298603307111896, "grad_norm": 2.761979103088379, "learning_rate": 0.00018580000000000002, "loss": 1.1271, "step": 930 }, { "epoch": 0.29892438593674747, "grad_norm": 2.052741527557373, "learning_rate": 0.00018600000000000002, "loss": 1.0824, "step": 931 }, { "epoch": 0.29924546476159897, "grad_norm": 2.1390645503997803, "learning_rate": 0.00018620000000000003, "loss": 1.2875, "step": 932 }, { "epoch": 0.29956654358645046, "grad_norm": 2.17155385017395, "learning_rate": 0.00018640000000000003, "loss": 1.0559, "step": 933 }, { "epoch": 0.29988762241130196, "grad_norm": 1.4446396827697754, "learning_rate": 0.0001866, "loss": 1.052, "step": 934 }, { "epoch": 0.30020870123615345, "grad_norm": 2.108684778213501, "learning_rate": 0.00018680000000000001, "loss": 0.9973, "step": 935 }, { "epoch": 0.300529780061005, "grad_norm": 1.7543820142745972, "learning_rate": 0.00018700000000000002, "loss": 1.0001, "step": 936 }, { "epoch": 0.3008508588858565, "grad_norm": 1.7651071548461914, "learning_rate": 0.00018720000000000002, "loss": 0.8726, "step": 937 }, { "epoch": 0.301171937710708, "grad_norm": 1.3931697607040405, "learning_rate": 0.00018740000000000003, "loss": 0.9752, "step": 938 }, { "epoch": 0.3014930165355595, "grad_norm": 1.4437761306762695, "learning_rate": 0.0001876, "loss": 0.8859, "step": 939 }, { "epoch": 0.301814095360411, "grad_norm": 2.0850377082824707, "learning_rate": 0.0001878, "loss": 1.0133, "step": 940 }, { "epoch": 0.3021351741852625, "grad_norm": 2.9318671226501465, "learning_rate": 0.000188, "loss": 1.0726, "step": 941 }, { "epoch": 0.302456253010114, "grad_norm": 1.589202880859375, "learning_rate": 0.0001882, "loss": 0.8896, "step": 942 }, { "epoch": 0.30277733183496547, "grad_norm": 1.5876160860061646, "learning_rate": 0.0001884, "loss": 0.8636, "step": 943 }, { "epoch": 0.30309841065981696, "grad_norm": 1.6258230209350586, "learning_rate": 0.0001886, "loss": 1.0084, "step": 944 }, { "epoch": 0.30341948948466846, "grad_norm": 2.1613311767578125, "learning_rate": 0.0001888, "loss": 0.8062, "step": 945 }, { "epoch": 0.30374056830952, "grad_norm": 1.6936204433441162, "learning_rate": 0.00018899999999999999, "loss": 1.1147, "step": 946 }, { "epoch": 0.3040616471343715, "grad_norm": 2.857898473739624, "learning_rate": 0.0001892, "loss": 0.8206, "step": 947 }, { "epoch": 0.304382725959223, "grad_norm": 1.1513748168945312, "learning_rate": 0.0001894, "loss": 0.7425, "step": 948 }, { "epoch": 0.3047038047840745, "grad_norm": 1.6653155088424683, "learning_rate": 0.0001896, "loss": 0.9059, "step": 949 }, { "epoch": 0.305024883608926, "grad_norm": 1.6235933303833008, "learning_rate": 0.0001898, "loss": 0.7386, "step": 950 }, { "epoch": 0.3053459624337775, "grad_norm": 3.6082000732421875, "learning_rate": 0.00019, "loss": 1.5358, "step": 951 }, { "epoch": 0.305667041258629, "grad_norm": 2.132099151611328, "learning_rate": 0.0001902, "loss": 1.4807, "step": 952 }, { "epoch": 0.3059881200834805, "grad_norm": 1.9843887090682983, "learning_rate": 0.0001904, "loss": 0.91, "step": 953 }, { "epoch": 0.30630919890833197, "grad_norm": 1.8752169609069824, "learning_rate": 0.0001906, "loss": 0.686, "step": 954 }, { "epoch": 0.3066302777331835, "grad_norm": 3.0422487258911133, "learning_rate": 0.0001908, "loss": 1.0079, "step": 955 }, { "epoch": 0.306951356558035, "grad_norm": 2.0845787525177, "learning_rate": 0.000191, "loss": 0.8389, "step": 956 }, { "epoch": 0.3072724353828865, "grad_norm": 2.3233184814453125, "learning_rate": 0.0001912, "loss": 0.9484, "step": 957 }, { "epoch": 0.307593514207738, "grad_norm": 2.147631883621216, "learning_rate": 0.0001914, "loss": 1.0776, "step": 958 }, { "epoch": 0.3079145930325895, "grad_norm": 1.7034640312194824, "learning_rate": 0.0001916, "loss": 0.9197, "step": 959 }, { "epoch": 0.308235671857441, "grad_norm": 1.8278881311416626, "learning_rate": 0.0001918, "loss": 1.1262, "step": 960 }, { "epoch": 0.3085567506822925, "grad_norm": 2.158137321472168, "learning_rate": 0.000192, "loss": 1.0464, "step": 961 }, { "epoch": 0.308877829507144, "grad_norm": 1.8429055213928223, "learning_rate": 0.0001922, "loss": 1.1075, "step": 962 }, { "epoch": 0.3091989083319955, "grad_norm": 1.6389920711517334, "learning_rate": 0.00019240000000000001, "loss": 1.1258, "step": 963 }, { "epoch": 0.30951998715684703, "grad_norm": 1.337121605873108, "learning_rate": 0.0001926, "loss": 0.789, "step": 964 }, { "epoch": 0.3098410659816985, "grad_norm": 1.542604684829712, "learning_rate": 0.0001928, "loss": 1.1683, "step": 965 }, { "epoch": 0.31016214480655, "grad_norm": 1.8985679149627686, "learning_rate": 0.000193, "loss": 0.8494, "step": 966 }, { "epoch": 0.3104832236314015, "grad_norm": 1.403977870941162, "learning_rate": 0.0001932, "loss": 1.2153, "step": 967 }, { "epoch": 0.310804302456253, "grad_norm": 1.4651598930358887, "learning_rate": 0.0001934, "loss": 1.0217, "step": 968 }, { "epoch": 0.3111253812811045, "grad_norm": 1.3526400327682495, "learning_rate": 0.00019360000000000002, "loss": 0.956, "step": 969 }, { "epoch": 0.311446460105956, "grad_norm": 3.0191409587860107, "learning_rate": 0.0001938, "loss": 1.1278, "step": 970 }, { "epoch": 0.3117675389308075, "grad_norm": 1.9719985723495483, "learning_rate": 0.000194, "loss": 1.1472, "step": 971 }, { "epoch": 0.312088617755659, "grad_norm": 2.3695621490478516, "learning_rate": 0.0001942, "loss": 1.1406, "step": 972 }, { "epoch": 0.31240969658051054, "grad_norm": 1.8187494277954102, "learning_rate": 0.0001944, "loss": 1.0193, "step": 973 }, { "epoch": 0.31273077540536204, "grad_norm": 1.494602084159851, "learning_rate": 0.00019460000000000001, "loss": 1.0561, "step": 974 }, { "epoch": 0.31305185423021353, "grad_norm": 2.378328323364258, "learning_rate": 0.0001948, "loss": 0.9976, "step": 975 }, { "epoch": 0.31337293305506503, "grad_norm": 1.5211881399154663, "learning_rate": 0.000195, "loss": 1.1273, "step": 976 }, { "epoch": 0.3136940118799165, "grad_norm": 1.2796664237976074, "learning_rate": 0.0001952, "loss": 0.8582, "step": 977 }, { "epoch": 0.314015090704768, "grad_norm": 1.8555333614349365, "learning_rate": 0.0001954, "loss": 1.0359, "step": 978 }, { "epoch": 0.3143361695296195, "grad_norm": 1.1893086433410645, "learning_rate": 0.0001956, "loss": 0.8822, "step": 979 }, { "epoch": 0.314657248354471, "grad_norm": 2.2236552238464355, "learning_rate": 0.00019580000000000002, "loss": 1.0201, "step": 980 }, { "epoch": 0.3149783271793225, "grad_norm": 2.314167022705078, "learning_rate": 0.000196, "loss": 0.9865, "step": 981 }, { "epoch": 0.315299406004174, "grad_norm": 2.2062315940856934, "learning_rate": 0.0001962, "loss": 1.117, "step": 982 }, { "epoch": 0.31562048482902555, "grad_norm": 2.284191846847534, "learning_rate": 0.0001964, "loss": 1.1285, "step": 983 }, { "epoch": 0.31594156365387704, "grad_norm": 2.3977997303009033, "learning_rate": 0.0001966, "loss": 1.2463, "step": 984 }, { "epoch": 0.31626264247872854, "grad_norm": 2.204314708709717, "learning_rate": 0.0001968, "loss": 0.8849, "step": 985 }, { "epoch": 0.31658372130358003, "grad_norm": 3.1310677528381348, "learning_rate": 0.00019700000000000002, "loss": 1.2409, "step": 986 }, { "epoch": 0.31690480012843153, "grad_norm": 1.4078696966171265, "learning_rate": 0.0001972, "loss": 0.8572, "step": 987 }, { "epoch": 0.317225878953283, "grad_norm": 1.9044156074523926, "learning_rate": 0.0001974, "loss": 0.9414, "step": 988 }, { "epoch": 0.3175469577781345, "grad_norm": 1.3988277912139893, "learning_rate": 0.0001976, "loss": 1.1101, "step": 989 }, { "epoch": 0.317868036602986, "grad_norm": 1.4820644855499268, "learning_rate": 0.0001978, "loss": 1.0266, "step": 990 }, { "epoch": 0.3181891154278375, "grad_norm": 1.5065770149230957, "learning_rate": 0.00019800000000000002, "loss": 0.8049, "step": 991 }, { "epoch": 0.31851019425268906, "grad_norm": 1.5369571447372437, "learning_rate": 0.00019820000000000002, "loss": 0.9029, "step": 992 }, { "epoch": 0.31883127307754056, "grad_norm": 4.79142951965332, "learning_rate": 0.0001984, "loss": 0.9041, "step": 993 }, { "epoch": 0.31915235190239205, "grad_norm": 2.016233205795288, "learning_rate": 0.0001986, "loss": 1.0123, "step": 994 }, { "epoch": 0.31947343072724355, "grad_norm": 1.5374374389648438, "learning_rate": 0.0001988, "loss": 0.8762, "step": 995 }, { "epoch": 0.31979450955209504, "grad_norm": 1.3565267324447632, "learning_rate": 0.000199, "loss": 0.7286, "step": 996 }, { "epoch": 0.32011558837694654, "grad_norm": 2.1161487102508545, "learning_rate": 0.00019920000000000002, "loss": 1.025, "step": 997 }, { "epoch": 0.32043666720179803, "grad_norm": 1.741025686264038, "learning_rate": 0.00019940000000000002, "loss": 0.7437, "step": 998 }, { "epoch": 0.3207577460266495, "grad_norm": 2.049365997314453, "learning_rate": 0.0001996, "loss": 0.7756, "step": 999 }, { "epoch": 0.321078824851501, "grad_norm": 1.8877449035644531, "learning_rate": 0.0001998, "loss": 0.695, "step": 1000 }, { "epoch": 0.32139990367635257, "grad_norm": 2.1886143684387207, "learning_rate": 0.0002, "loss": 1.4415, "step": 1001 }, { "epoch": 0.32172098250120407, "grad_norm": 2.847805976867676, "learning_rate": 0.00019999999390765168, "loss": 1.2191, "step": 1002 }, { "epoch": 0.32204206132605556, "grad_norm": 2.2231152057647705, "learning_rate": 0.0001999999756306074, "loss": 1.0706, "step": 1003 }, { "epoch": 0.32236314015090706, "grad_norm": 2.219747304916382, "learning_rate": 0.00019999994516886946, "loss": 1.0274, "step": 1004 }, { "epoch": 0.32268421897575855, "grad_norm": 2.3217408657073975, "learning_rate": 0.00019999990252244151, "loss": 0.9952, "step": 1005 }, { "epoch": 0.32300529780061005, "grad_norm": 2.1023569107055664, "learning_rate": 0.00019999984769132877, "loss": 0.9075, "step": 1006 }, { "epoch": 0.32332637662546154, "grad_norm": 2.414299249649048, "learning_rate": 0.00019999978067553796, "loss": 0.916, "step": 1007 }, { "epoch": 0.32364745545031304, "grad_norm": 1.7825126647949219, "learning_rate": 0.00019999970147507713, "loss": 1.0684, "step": 1008 }, { "epoch": 0.32396853427516453, "grad_norm": 1.9296294450759888, "learning_rate": 0.00019999961008995606, "loss": 1.1201, "step": 1009 }, { "epoch": 0.324289613100016, "grad_norm": 1.7151492834091187, "learning_rate": 0.00019999950652018584, "loss": 0.947, "step": 1010 }, { "epoch": 0.3246106919248676, "grad_norm": 1.7226728200912476, "learning_rate": 0.00019999939076577905, "loss": 1.0448, "step": 1011 }, { "epoch": 0.3249317707497191, "grad_norm": 1.8700830936431885, "learning_rate": 0.00019999926282674983, "loss": 0.9668, "step": 1012 }, { "epoch": 0.32525284957457057, "grad_norm": 1.468245267868042, "learning_rate": 0.00019999912270311375, "loss": 0.8688, "step": 1013 }, { "epoch": 0.32557392839942206, "grad_norm": 2.5088555812835693, "learning_rate": 0.00019999897039488796, "loss": 1.0542, "step": 1014 }, { "epoch": 0.32589500722427356, "grad_norm": 1.5317341089248657, "learning_rate": 0.0001999988059020909, "loss": 1.1052, "step": 1015 }, { "epoch": 0.32621608604912505, "grad_norm": 1.1775538921356201, "learning_rate": 0.00019999862922474268, "loss": 1.0154, "step": 1016 }, { "epoch": 0.32653716487397655, "grad_norm": 1.4249135255813599, "learning_rate": 0.00019999844036286483, "loss": 0.9882, "step": 1017 }, { "epoch": 0.32685824369882804, "grad_norm": 1.9009687900543213, "learning_rate": 0.00019999823931648035, "loss": 1.173, "step": 1018 }, { "epoch": 0.32717932252367954, "grad_norm": 2.4772071838378906, "learning_rate": 0.0001999980260856137, "loss": 1.2154, "step": 1019 }, { "epoch": 0.3275004013485311, "grad_norm": 2.5375864505767822, "learning_rate": 0.00019999780067029094, "loss": 1.1588, "step": 1020 }, { "epoch": 0.3278214801733826, "grad_norm": 2.0548741817474365, "learning_rate": 0.00019999756307053948, "loss": 1.17, "step": 1021 }, { "epoch": 0.3281425589982341, "grad_norm": 2.063422441482544, "learning_rate": 0.00019999731328638828, "loss": 0.9767, "step": 1022 }, { "epoch": 0.3284636378230856, "grad_norm": 1.4420465230941772, "learning_rate": 0.0001999970513178678, "loss": 0.9672, "step": 1023 }, { "epoch": 0.32878471664793707, "grad_norm": 8.236002922058105, "learning_rate": 0.00019999677716500992, "loss": 1.4308, "step": 1024 }, { "epoch": 0.32910579547278856, "grad_norm": 1.764096736907959, "learning_rate": 0.0001999964908278481, "loss": 0.889, "step": 1025 }, { "epoch": 0.32942687429764006, "grad_norm": 1.7221022844314575, "learning_rate": 0.00019999619230641713, "loss": 1.0662, "step": 1026 }, { "epoch": 0.32974795312249155, "grad_norm": 1.4062892198562622, "learning_rate": 0.00019999588160075348, "loss": 1.034, "step": 1027 }, { "epoch": 0.33006903194734305, "grad_norm": 1.5284225940704346, "learning_rate": 0.000199995558710895, "loss": 1.0265, "step": 1028 }, { "epoch": 0.3303901107721946, "grad_norm": 2.0174477100372314, "learning_rate": 0.000199995223636881, "loss": 1.0751, "step": 1029 }, { "epoch": 0.3307111895970461, "grad_norm": 1.3949508666992188, "learning_rate": 0.0001999948763787523, "loss": 1.0296, "step": 1030 }, { "epoch": 0.3310322684218976, "grad_norm": 1.5616528987884521, "learning_rate": 0.00019999451693655123, "loss": 1.0017, "step": 1031 }, { "epoch": 0.3313533472467491, "grad_norm": 1.674304723739624, "learning_rate": 0.00019999414531032158, "loss": 1.1445, "step": 1032 }, { "epoch": 0.3316744260716006, "grad_norm": 2.715172290802002, "learning_rate": 0.00019999376150010866, "loss": 1.1429, "step": 1033 }, { "epoch": 0.3319955048964521, "grad_norm": 2.873642683029175, "learning_rate": 0.0001999933655059592, "loss": 0.6858, "step": 1034 }, { "epoch": 0.33231658372130357, "grad_norm": 1.6501632928848267, "learning_rate": 0.00019999295732792146, "loss": 1.1858, "step": 1035 }, { "epoch": 0.33263766254615507, "grad_norm": 1.3060718774795532, "learning_rate": 0.00019999253696604522, "loss": 1.0933, "step": 1036 }, { "epoch": 0.33295874137100656, "grad_norm": 0.9465512633323669, "learning_rate": 0.00019999210442038162, "loss": 0.8341, "step": 1037 }, { "epoch": 0.33327982019585806, "grad_norm": 1.2716128826141357, "learning_rate": 0.00019999165969098343, "loss": 0.8147, "step": 1038 }, { "epoch": 0.3336008990207096, "grad_norm": 1.762526035308838, "learning_rate": 0.00019999120277790477, "loss": 1.0783, "step": 1039 }, { "epoch": 0.3339219778455611, "grad_norm": 2.2095861434936523, "learning_rate": 0.0001999907336812014, "loss": 1.1156, "step": 1040 }, { "epoch": 0.3342430566704126, "grad_norm": 1.668186068534851, "learning_rate": 0.00019999025240093044, "loss": 0.8137, "step": 1041 }, { "epoch": 0.3345641354952641, "grad_norm": 1.4128243923187256, "learning_rate": 0.0001999897589371505, "loss": 1.0737, "step": 1042 }, { "epoch": 0.3348852143201156, "grad_norm": 2.0269153118133545, "learning_rate": 0.00019998925328992175, "loss": 1.0204, "step": 1043 }, { "epoch": 0.3352062931449671, "grad_norm": 1.2762573957443237, "learning_rate": 0.0001999887354593058, "loss": 1.1014, "step": 1044 }, { "epoch": 0.3355273719698186, "grad_norm": 1.0873875617980957, "learning_rate": 0.0001999882054453657, "loss": 0.7875, "step": 1045 }, { "epoch": 0.3358484507946701, "grad_norm": 1.1676608324050903, "learning_rate": 0.00019998766324816607, "loss": 0.9086, "step": 1046 }, { "epoch": 0.33616952961952157, "grad_norm": 1.2658352851867676, "learning_rate": 0.000199987108867773, "loss": 0.7835, "step": 1047 }, { "epoch": 0.3364906084443731, "grad_norm": 3.1894469261169434, "learning_rate": 0.00019998654230425395, "loss": 1.0001, "step": 1048 }, { "epoch": 0.3368116872692246, "grad_norm": 2.7069225311279297, "learning_rate": 0.00019998596355767805, "loss": 0.6518, "step": 1049 }, { "epoch": 0.3371327660940761, "grad_norm": 1.401463508605957, "learning_rate": 0.00019998537262811577, "loss": 0.6609, "step": 1050 }, { "epoch": 0.3374538449189276, "grad_norm": 2.923952102661133, "learning_rate": 0.00019998476951563915, "loss": 1.4718, "step": 1051 }, { "epoch": 0.3377749237437791, "grad_norm": 4.130305767059326, "learning_rate": 0.00019998415422032163, "loss": 1.109, "step": 1052 }, { "epoch": 0.3380960025686306, "grad_norm": 2.697674036026001, "learning_rate": 0.00019998352674223816, "loss": 0.9685, "step": 1053 }, { "epoch": 0.3384170813934821, "grad_norm": 13.59544849395752, "learning_rate": 0.00019998288708146527, "loss": 1.4453, "step": 1054 }, { "epoch": 0.3387381602183336, "grad_norm": 2.960008144378662, "learning_rate": 0.0001999822352380809, "loss": 1.0084, "step": 1055 }, { "epoch": 0.3390592390431851, "grad_norm": 1.8568581342697144, "learning_rate": 0.00019998157121216444, "loss": 0.8141, "step": 1056 }, { "epoch": 0.33938031786803663, "grad_norm": 2.4931318759918213, "learning_rate": 0.00019998089500379676, "loss": 0.9077, "step": 1057 }, { "epoch": 0.3397013966928881, "grad_norm": 2.592142105102539, "learning_rate": 0.00019998020661306037, "loss": 0.967, "step": 1058 }, { "epoch": 0.3400224755177396, "grad_norm": 2.2816684246063232, "learning_rate": 0.000199979506040039, "loss": 1.0623, "step": 1059 }, { "epoch": 0.3403435543425911, "grad_norm": 1.5964871644973755, "learning_rate": 0.00019997879328481814, "loss": 0.9404, "step": 1060 }, { "epoch": 0.3406646331674426, "grad_norm": 1.8557053804397583, "learning_rate": 0.00019997806834748456, "loss": 0.9474, "step": 1061 }, { "epoch": 0.3409857119922941, "grad_norm": 1.6931266784667969, "learning_rate": 0.0001999773312281266, "loss": 1.216, "step": 1062 }, { "epoch": 0.3413067908171456, "grad_norm": 1.908123254776001, "learning_rate": 0.00019997658192683412, "loss": 1.0184, "step": 1063 }, { "epoch": 0.3416278696419971, "grad_norm": 1.5717891454696655, "learning_rate": 0.00019997582044369843, "loss": 1.022, "step": 1064 }, { "epoch": 0.3419489484668486, "grad_norm": 1.7411060333251953, "learning_rate": 0.00019997504677881224, "loss": 0.9467, "step": 1065 }, { "epoch": 0.3422700272917001, "grad_norm": 3.3722310066223145, "learning_rate": 0.00019997426093226986, "loss": 0.9437, "step": 1066 }, { "epoch": 0.34259110611655164, "grad_norm": 1.1751868724822998, "learning_rate": 0.000199973462904167, "loss": 0.9127, "step": 1067 }, { "epoch": 0.34291218494140313, "grad_norm": 1.7528998851776123, "learning_rate": 0.000199972652694601, "loss": 0.9023, "step": 1068 }, { "epoch": 0.3432332637662546, "grad_norm": 1.4955263137817383, "learning_rate": 0.00019997183030367048, "loss": 1.3134, "step": 1069 }, { "epoch": 0.3435543425911061, "grad_norm": 2.127793550491333, "learning_rate": 0.0001999709957314757, "loss": 1.0524, "step": 1070 }, { "epoch": 0.3438754214159576, "grad_norm": 1.1916999816894531, "learning_rate": 0.00019997014897811833, "loss": 0.8918, "step": 1071 }, { "epoch": 0.3441965002408091, "grad_norm": 2.142920970916748, "learning_rate": 0.00019996929004370152, "loss": 0.9653, "step": 1072 }, { "epoch": 0.3445175790656606, "grad_norm": 1.9465276002883911, "learning_rate": 0.00019996841892833, "loss": 1.1276, "step": 1073 }, { "epoch": 0.3448386578905121, "grad_norm": 4.488142490386963, "learning_rate": 0.00019996753563210985, "loss": 1.2271, "step": 1074 }, { "epoch": 0.3451597367153636, "grad_norm": 9.381378173828125, "learning_rate": 0.00019996664015514871, "loss": 0.894, "step": 1075 }, { "epoch": 0.34548081554021515, "grad_norm": 1.1667776107788086, "learning_rate": 0.00019996573249755572, "loss": 0.8693, "step": 1076 }, { "epoch": 0.34580189436506664, "grad_norm": 1.7447643280029297, "learning_rate": 0.00019996481265944146, "loss": 1.0464, "step": 1077 }, { "epoch": 0.34612297318991814, "grad_norm": 1.9767264127731323, "learning_rate": 0.000199963880640918, "loss": 1.1109, "step": 1078 }, { "epoch": 0.34644405201476963, "grad_norm": 1.110445499420166, "learning_rate": 0.00019996293644209887, "loss": 1.0815, "step": 1079 }, { "epoch": 0.3467651308396211, "grad_norm": 2.0246994495391846, "learning_rate": 0.0001999619800630992, "loss": 1.1254, "step": 1080 }, { "epoch": 0.3470862096644726, "grad_norm": 1.2259923219680786, "learning_rate": 0.00019996101150403543, "loss": 0.7593, "step": 1081 }, { "epoch": 0.3474072884893241, "grad_norm": 1.174506425857544, "learning_rate": 0.00019996003076502565, "loss": 1.0263, "step": 1082 }, { "epoch": 0.3477283673141756, "grad_norm": 1.8576600551605225, "learning_rate": 0.00019995903784618936, "loss": 1.0134, "step": 1083 }, { "epoch": 0.3480494461390271, "grad_norm": 1.9805476665496826, "learning_rate": 0.00019995803274764747, "loss": 1.0945, "step": 1084 }, { "epoch": 0.34837052496387866, "grad_norm": 1.4714640378952026, "learning_rate": 0.0001999570154695225, "loss": 1.0238, "step": 1085 }, { "epoch": 0.34869160378873015, "grad_norm": 2.3097331523895264, "learning_rate": 0.00019995598601193842, "loss": 0.972, "step": 1086 }, { "epoch": 0.34901268261358165, "grad_norm": 1.2940665483474731, "learning_rate": 0.00019995494437502064, "loss": 0.8954, "step": 1087 }, { "epoch": 0.34933376143843314, "grad_norm": 1.3209530115127563, "learning_rate": 0.00019995389055889605, "loss": 0.9124, "step": 1088 }, { "epoch": 0.34965484026328464, "grad_norm": 1.4666227102279663, "learning_rate": 0.0001999528245636931, "loss": 1.0218, "step": 1089 }, { "epoch": 0.34997591908813613, "grad_norm": 2.331610918045044, "learning_rate": 0.0001999517463895417, "loss": 0.9959, "step": 1090 }, { "epoch": 0.35029699791298763, "grad_norm": 1.3941978216171265, "learning_rate": 0.00019995065603657316, "loss": 0.9071, "step": 1091 }, { "epoch": 0.3506180767378391, "grad_norm": 1.3337680101394653, "learning_rate": 0.00019994955350492038, "loss": 1.08, "step": 1092 }, { "epoch": 0.3509391555626906, "grad_norm": 1.6207735538482666, "learning_rate": 0.0001999484387947177, "loss": 0.972, "step": 1093 }, { "epoch": 0.3512602343875421, "grad_norm": 1.4743515253067017, "learning_rate": 0.00019994731190610087, "loss": 1.1447, "step": 1094 }, { "epoch": 0.35158131321239366, "grad_norm": 1.3109478950500488, "learning_rate": 0.0001999461728392073, "loss": 0.9514, "step": 1095 }, { "epoch": 0.35190239203724516, "grad_norm": 1.4319483041763306, "learning_rate": 0.00019994502159417573, "loss": 0.9688, "step": 1096 }, { "epoch": 0.35222347086209665, "grad_norm": 1.4653232097625732, "learning_rate": 0.00019994385817114646, "loss": 0.9073, "step": 1097 }, { "epoch": 0.35254454968694815, "grad_norm": 1.3482818603515625, "learning_rate": 0.00019994268257026118, "loss": 0.7425, "step": 1098 }, { "epoch": 0.35286562851179964, "grad_norm": 2.7118563652038574, "learning_rate": 0.00019994149479166324, "loss": 0.8003, "step": 1099 }, { "epoch": 0.35318670733665114, "grad_norm": 1.2395905256271362, "learning_rate": 0.0001999402948354973, "loss": 0.6656, "step": 1100 }, { "epoch": 0.35350778616150264, "grad_norm": 2.8349075317382812, "learning_rate": 0.0001999390827019096, "loss": 1.369, "step": 1101 }, { "epoch": 0.35382886498635413, "grad_norm": 1.6994882822036743, "learning_rate": 0.0001999378583910478, "loss": 1.4933, "step": 1102 }, { "epoch": 0.3541499438112056, "grad_norm": 2.044842481613159, "learning_rate": 0.0001999366219030611, "loss": 1.0908, "step": 1103 }, { "epoch": 0.3544710226360572, "grad_norm": 2.008758306503296, "learning_rate": 0.00019993537323810014, "loss": 0.963, "step": 1104 }, { "epoch": 0.35479210146090867, "grad_norm": 2.8811473846435547, "learning_rate": 0.0001999341123963171, "loss": 0.8424, "step": 1105 }, { "epoch": 0.35511318028576017, "grad_norm": 1.9336885213851929, "learning_rate": 0.00019993283937786563, "loss": 0.9013, "step": 1106 }, { "epoch": 0.35543425911061166, "grad_norm": 1.6170105934143066, "learning_rate": 0.0001999315541829008, "loss": 0.7265, "step": 1107 }, { "epoch": 0.35575533793546316, "grad_norm": 2.101649284362793, "learning_rate": 0.0001999302568115792, "loss": 1.1258, "step": 1108 }, { "epoch": 0.35607641676031465, "grad_norm": 2.104947328567505, "learning_rate": 0.00019992894726405893, "loss": 0.9501, "step": 1109 }, { "epoch": 0.35639749558516615, "grad_norm": 1.6322426795959473, "learning_rate": 0.00019992762554049955, "loss": 1.0367, "step": 1110 }, { "epoch": 0.35671857441001764, "grad_norm": 1.6271668672561646, "learning_rate": 0.0001999262916410621, "loss": 0.893, "step": 1111 }, { "epoch": 0.35703965323486914, "grad_norm": 1.3786333799362183, "learning_rate": 0.00019992494556590916, "loss": 0.9951, "step": 1112 }, { "epoch": 0.3573607320597207, "grad_norm": 1.2673497200012207, "learning_rate": 0.00019992358731520468, "loss": 0.9283, "step": 1113 }, { "epoch": 0.3576818108845722, "grad_norm": 1.338150143623352, "learning_rate": 0.0001999222168891142, "loss": 0.9409, "step": 1114 }, { "epoch": 0.3580028897094237, "grad_norm": 1.344650149345398, "learning_rate": 0.0001999208342878047, "loss": 1.0276, "step": 1115 }, { "epoch": 0.3583239685342752, "grad_norm": 1.805612325668335, "learning_rate": 0.0001999194395114446, "loss": 1.1324, "step": 1116 }, { "epoch": 0.35864504735912667, "grad_norm": 21.904685974121094, "learning_rate": 0.00019991803256020393, "loss": 1.1808, "step": 1117 }, { "epoch": 0.35896612618397816, "grad_norm": 1.6144940853118896, "learning_rate": 0.000199916613434254, "loss": 1.1416, "step": 1118 }, { "epoch": 0.35928720500882966, "grad_norm": 2.7180378437042236, "learning_rate": 0.00019991518213376787, "loss": 1.0546, "step": 1119 }, { "epoch": 0.35960828383368115, "grad_norm": 1.8319542407989502, "learning_rate": 0.00019991373865891986, "loss": 0.991, "step": 1120 }, { "epoch": 0.35992936265853265, "grad_norm": 2.3465638160705566, "learning_rate": 0.00019991228300988585, "loss": 1.0318, "step": 1121 }, { "epoch": 0.3602504414833842, "grad_norm": 2.1191277503967285, "learning_rate": 0.00019991081518684321, "loss": 1.1896, "step": 1122 }, { "epoch": 0.3605715203082357, "grad_norm": 7.694565773010254, "learning_rate": 0.00019990933518997084, "loss": 1.3582, "step": 1123 }, { "epoch": 0.3608925991330872, "grad_norm": 1.5988242626190186, "learning_rate": 0.00019990784301944902, "loss": 1.225, "step": 1124 }, { "epoch": 0.3612136779579387, "grad_norm": 1.4618570804595947, "learning_rate": 0.00019990633867545955, "loss": 1.152, "step": 1125 }, { "epoch": 0.3615347567827902, "grad_norm": 1.4518593549728394, "learning_rate": 0.0001999048221581858, "loss": 0.7903, "step": 1126 }, { "epoch": 0.3618558356076417, "grad_norm": 2.108856439590454, "learning_rate": 0.0001999032934678125, "loss": 0.9205, "step": 1127 }, { "epoch": 0.36217691443249317, "grad_norm": 1.7413921356201172, "learning_rate": 0.0001999017526045259, "loss": 0.9283, "step": 1128 }, { "epoch": 0.36249799325734466, "grad_norm": 1.6088074445724487, "learning_rate": 0.00019990019956851382, "loss": 1.0622, "step": 1129 }, { "epoch": 0.36281907208219616, "grad_norm": 2.035529375076294, "learning_rate": 0.00019989863435996544, "loss": 1.0937, "step": 1130 }, { "epoch": 0.36314015090704765, "grad_norm": 1.6727184057235718, "learning_rate": 0.00019989705697907149, "loss": 1.133, "step": 1131 }, { "epoch": 0.3634612297318992, "grad_norm": 4.085322856903076, "learning_rate": 0.00019989546742602414, "loss": 0.8277, "step": 1132 }, { "epoch": 0.3637823085567507, "grad_norm": 1.4142351150512695, "learning_rate": 0.00019989386570101714, "loss": 1.0965, "step": 1133 }, { "epoch": 0.3641033873816022, "grad_norm": 2.0783348083496094, "learning_rate": 0.0001998922518042456, "loss": 1.4158, "step": 1134 }, { "epoch": 0.3644244662064537, "grad_norm": 1.3447226285934448, "learning_rate": 0.00019989062573590616, "loss": 0.8552, "step": 1135 }, { "epoch": 0.3647455450313052, "grad_norm": 1.5264992713928223, "learning_rate": 0.00019988898749619702, "loss": 0.9846, "step": 1136 }, { "epoch": 0.3650666238561567, "grad_norm": 1.8115450143814087, "learning_rate": 0.0001998873370853177, "loss": 1.0604, "step": 1137 }, { "epoch": 0.3653877026810082, "grad_norm": 2.269794225692749, "learning_rate": 0.00019988567450346937, "loss": 0.8845, "step": 1138 }, { "epoch": 0.36570878150585967, "grad_norm": 1.198961853981018, "learning_rate": 0.0001998839997508546, "loss": 0.9815, "step": 1139 }, { "epoch": 0.36602986033071117, "grad_norm": 2.3479223251342773, "learning_rate": 0.00019988231282767744, "loss": 0.9144, "step": 1140 }, { "epoch": 0.3663509391555627, "grad_norm": 1.9824615716934204, "learning_rate": 0.0001998806137341434, "loss": 1.0468, "step": 1141 }, { "epoch": 0.3666720179804142, "grad_norm": 1.780985713005066, "learning_rate": 0.00019987890247045957, "loss": 1.0552, "step": 1142 }, { "epoch": 0.3669930968052657, "grad_norm": 1.192861557006836, "learning_rate": 0.00019987717903683448, "loss": 0.8069, "step": 1143 }, { "epoch": 0.3673141756301172, "grad_norm": 2.568082094192505, "learning_rate": 0.00019987544343347803, "loss": 0.9455, "step": 1144 }, { "epoch": 0.3676352544549687, "grad_norm": 1.151548981666565, "learning_rate": 0.00019987369566060176, "loss": 0.6518, "step": 1145 }, { "epoch": 0.3679563332798202, "grad_norm": 2.208691358566284, "learning_rate": 0.0001998719357184186, "loss": 0.7998, "step": 1146 }, { "epoch": 0.3682774121046717, "grad_norm": 2.1074390411376953, "learning_rate": 0.00019987016360714307, "loss": 1.0609, "step": 1147 }, { "epoch": 0.3685984909295232, "grad_norm": 1.2993351221084595, "learning_rate": 0.00019986837932699103, "loss": 0.7665, "step": 1148 }, { "epoch": 0.3689195697543747, "grad_norm": 0.9701393246650696, "learning_rate": 0.00019986658287817987, "loss": 0.5932, "step": 1149 }, { "epoch": 0.3692406485792262, "grad_norm": 6.661820411682129, "learning_rate": 0.00019986477426092855, "loss": 0.6992, "step": 1150 }, { "epoch": 0.3695617274040777, "grad_norm": 3.1357054710388184, "learning_rate": 0.0001998629534754574, "loss": 1.5801, "step": 1151 }, { "epoch": 0.3698828062289292, "grad_norm": 3.9719338417053223, "learning_rate": 0.0001998611205219883, "loss": 1.5882, "step": 1152 }, { "epoch": 0.3702038850537807, "grad_norm": 3.1035382747650146, "learning_rate": 0.00019985927540074454, "loss": 1.0743, "step": 1153 }, { "epoch": 0.3705249638786322, "grad_norm": 1.9447718858718872, "learning_rate": 0.00019985741811195097, "loss": 0.8629, "step": 1154 }, { "epoch": 0.3708460427034837, "grad_norm": 1.993913173675537, "learning_rate": 0.00019985554865583393, "loss": 0.8233, "step": 1155 }, { "epoch": 0.3711671215283352, "grad_norm": 1.8630907535552979, "learning_rate": 0.0001998536670326212, "loss": 0.8642, "step": 1156 }, { "epoch": 0.3714882003531867, "grad_norm": 1.8467717170715332, "learning_rate": 0.000199851773242542, "loss": 0.8528, "step": 1157 }, { "epoch": 0.3718092791780382, "grad_norm": 2.4014041423797607, "learning_rate": 0.0001998498672858271, "loss": 1.0696, "step": 1158 }, { "epoch": 0.3721303580028897, "grad_norm": 1.915739893913269, "learning_rate": 0.00019984794916270876, "loss": 1.0595, "step": 1159 }, { "epoch": 0.37245143682774123, "grad_norm": 1.4160325527191162, "learning_rate": 0.00019984601887342073, "loss": 1.1056, "step": 1160 }, { "epoch": 0.37277251565259273, "grad_norm": 2.503619909286499, "learning_rate": 0.00019984407641819812, "loss": 0.6714, "step": 1161 }, { "epoch": 0.3730935944774442, "grad_norm": 1.4751476049423218, "learning_rate": 0.00019984212179727766, "loss": 0.9467, "step": 1162 }, { "epoch": 0.3734146733022957, "grad_norm": 1.4288772344589233, "learning_rate": 0.00019984015501089752, "loss": 1.0041, "step": 1163 }, { "epoch": 0.3737357521271472, "grad_norm": 1.389568567276001, "learning_rate": 0.00019983817605929733, "loss": 0.9213, "step": 1164 }, { "epoch": 0.3740568309519987, "grad_norm": 1.2409948110580444, "learning_rate": 0.00019983618494271826, "loss": 0.9687, "step": 1165 }, { "epoch": 0.3743779097768502, "grad_norm": 1.3233766555786133, "learning_rate": 0.00019983418166140285, "loss": 0.9057, "step": 1166 }, { "epoch": 0.3746989886017017, "grad_norm": 1.7424918413162231, "learning_rate": 0.00019983216621559524, "loss": 1.3071, "step": 1167 }, { "epoch": 0.3750200674265532, "grad_norm": 2.5900015830993652, "learning_rate": 0.00019983013860554101, "loss": 0.9437, "step": 1168 }, { "epoch": 0.37534114625140474, "grad_norm": 1.2561944723129272, "learning_rate": 0.00019982809883148722, "loss": 1.0481, "step": 1169 }, { "epoch": 0.37566222507625624, "grad_norm": 2.1015701293945312, "learning_rate": 0.00019982604689368239, "loss": 1.1775, "step": 1170 }, { "epoch": 0.37598330390110773, "grad_norm": 2.039940357208252, "learning_rate": 0.00019982398279237655, "loss": 1.2701, "step": 1171 }, { "epoch": 0.37630438272595923, "grad_norm": 1.330772042274475, "learning_rate": 0.0001998219065278212, "loss": 0.9725, "step": 1172 }, { "epoch": 0.3766254615508107, "grad_norm": 2.106564998626709, "learning_rate": 0.00019981981810026934, "loss": 1.0919, "step": 1173 }, { "epoch": 0.3769465403756622, "grad_norm": 1.5898199081420898, "learning_rate": 0.0001998177175099754, "loss": 0.9706, "step": 1174 }, { "epoch": 0.3772676192005137, "grad_norm": 1.8556996583938599, "learning_rate": 0.00019981560475719538, "loss": 1.025, "step": 1175 }, { "epoch": 0.3775886980253652, "grad_norm": 1.008709192276001, "learning_rate": 0.0001998134798421867, "loss": 0.8611, "step": 1176 }, { "epoch": 0.3779097768502167, "grad_norm": 2.131082057952881, "learning_rate": 0.00019981134276520827, "loss": 1.2977, "step": 1177 }, { "epoch": 0.37823085567506826, "grad_norm": 1.7559832334518433, "learning_rate": 0.00019980919352652048, "loss": 0.9628, "step": 1178 }, { "epoch": 0.37855193449991975, "grad_norm": 1.290353536605835, "learning_rate": 0.00019980703212638522, "loss": 1.0724, "step": 1179 }, { "epoch": 0.37887301332477125, "grad_norm": 1.12660813331604, "learning_rate": 0.00019980485856506582, "loss": 0.9982, "step": 1180 }, { "epoch": 0.37919409214962274, "grad_norm": 1.5011529922485352, "learning_rate": 0.00019980267284282717, "loss": 0.9109, "step": 1181 }, { "epoch": 0.37951517097447424, "grad_norm": 1.762739896774292, "learning_rate": 0.00019980047495993554, "loss": 0.9277, "step": 1182 }, { "epoch": 0.37983624979932573, "grad_norm": 1.3952839374542236, "learning_rate": 0.00019979826491665881, "loss": 0.7895, "step": 1183 }, { "epoch": 0.3801573286241772, "grad_norm": 1.6314538717269897, "learning_rate": 0.00019979604271326616, "loss": 0.9361, "step": 1184 }, { "epoch": 0.3804784074490287, "grad_norm": 2.3080692291259766, "learning_rate": 0.00019979380835002846, "loss": 1.1475, "step": 1185 }, { "epoch": 0.3807994862738802, "grad_norm": 2.15315842628479, "learning_rate": 0.0001997915618272179, "loss": 1.1916, "step": 1186 }, { "epoch": 0.3811205650987317, "grad_norm": 1.3747003078460693, "learning_rate": 0.00019978930314510824, "loss": 0.8076, "step": 1187 }, { "epoch": 0.38144164392358326, "grad_norm": 1.4667130708694458, "learning_rate": 0.0001997870323039747, "loss": 0.95, "step": 1188 }, { "epoch": 0.38176272274843476, "grad_norm": 1.726277470588684, "learning_rate": 0.00019978474930409395, "loss": 0.9735, "step": 1189 }, { "epoch": 0.38208380157328625, "grad_norm": 2.355656385421753, "learning_rate": 0.00019978245414574417, "loss": 1.0268, "step": 1190 }, { "epoch": 0.38240488039813775, "grad_norm": 1.840656042098999, "learning_rate": 0.000199780146829205, "loss": 0.9747, "step": 1191 }, { "epoch": 0.38272595922298924, "grad_norm": 1.4547884464263916, "learning_rate": 0.00019977782735475764, "loss": 1.0729, "step": 1192 }, { "epoch": 0.38304703804784074, "grad_norm": 1.2916489839553833, "learning_rate": 0.00019977549572268468, "loss": 0.875, "step": 1193 }, { "epoch": 0.38336811687269223, "grad_norm": 1.5631312131881714, "learning_rate": 0.00019977315193327018, "loss": 0.8071, "step": 1194 }, { "epoch": 0.38368919569754373, "grad_norm": 1.6755515336990356, "learning_rate": 0.00019977079598679977, "loss": 0.9683, "step": 1195 }, { "epoch": 0.3840102745223952, "grad_norm": 1.1934847831726074, "learning_rate": 0.00019976842788356055, "loss": 0.666, "step": 1196 }, { "epoch": 0.3843313533472468, "grad_norm": 1.6635997295379639, "learning_rate": 0.000199766047623841, "loss": 0.8521, "step": 1197 }, { "epoch": 0.38465243217209827, "grad_norm": 1.6572028398513794, "learning_rate": 0.00019976365520793114, "loss": 0.7134, "step": 1198 }, { "epoch": 0.38497351099694976, "grad_norm": 1.2580087184906006, "learning_rate": 0.00019976125063612252, "loss": 0.6517, "step": 1199 }, { "epoch": 0.38529458982180126, "grad_norm": 1.3600796461105347, "learning_rate": 0.00019975883390870817, "loss": 0.5476, "step": 1200 }, { "epoch": 0.38561566864665275, "grad_norm": 2.1080329418182373, "learning_rate": 0.00019975640502598244, "loss": 1.4574, "step": 1201 }, { "epoch": 0.38593674747150425, "grad_norm": 1.5729361772537231, "learning_rate": 0.0001997539639882414, "loss": 1.3152, "step": 1202 }, { "epoch": 0.38625782629635574, "grad_norm": 1.7094898223876953, "learning_rate": 0.00019975151079578237, "loss": 1.1812, "step": 1203 }, { "epoch": 0.38657890512120724, "grad_norm": 1.5421949625015259, "learning_rate": 0.0001997490454489044, "loss": 1.023, "step": 1204 }, { "epoch": 0.38689998394605873, "grad_norm": 2.4401865005493164, "learning_rate": 0.00019974656794790775, "loss": 0.866, "step": 1205 }, { "epoch": 0.3872210627709103, "grad_norm": 1.9785239696502686, "learning_rate": 0.0001997440782930944, "loss": 0.8572, "step": 1206 }, { "epoch": 0.3875421415957618, "grad_norm": 1.7715507745742798, "learning_rate": 0.00019974157648476766, "loss": 0.7572, "step": 1207 }, { "epoch": 0.3878632204206133, "grad_norm": 1.8642398118972778, "learning_rate": 0.00019973906252323238, "loss": 0.8058, "step": 1208 }, { "epoch": 0.38818429924546477, "grad_norm": 1.5486159324645996, "learning_rate": 0.00019973653640879485, "loss": 1.0713, "step": 1209 }, { "epoch": 0.38850537807031627, "grad_norm": 2.690429449081421, "learning_rate": 0.00019973399814176293, "loss": 1.024, "step": 1210 }, { "epoch": 0.38882645689516776, "grad_norm": 1.7301387786865234, "learning_rate": 0.00019973144772244582, "loss": 0.9245, "step": 1211 }, { "epoch": 0.38914753572001926, "grad_norm": 1.1060234308242798, "learning_rate": 0.00019972888515115434, "loss": 0.9201, "step": 1212 }, { "epoch": 0.38946861454487075, "grad_norm": 2.06071138381958, "learning_rate": 0.0001997263104282007, "loss": 0.8777, "step": 1213 }, { "epoch": 0.38978969336972225, "grad_norm": 1.2055797576904297, "learning_rate": 0.00019972372355389867, "loss": 0.9946, "step": 1214 }, { "epoch": 0.39011077219457374, "grad_norm": 1.2430628538131714, "learning_rate": 0.00019972112452856339, "loss": 0.6915, "step": 1215 }, { "epoch": 0.3904318510194253, "grad_norm": 1.3444316387176514, "learning_rate": 0.00019971851335251158, "loss": 0.9794, "step": 1216 }, { "epoch": 0.3907529298442768, "grad_norm": 1.4662611484527588, "learning_rate": 0.0001997158900260614, "loss": 1.0779, "step": 1217 }, { "epoch": 0.3910740086691283, "grad_norm": 1.2781003713607788, "learning_rate": 0.00019971325454953248, "loss": 0.9051, "step": 1218 }, { "epoch": 0.3913950874939798, "grad_norm": 1.5118160247802734, "learning_rate": 0.00019971060692324598, "loss": 1.3483, "step": 1219 }, { "epoch": 0.39171616631883127, "grad_norm": 3.3005893230438232, "learning_rate": 0.00019970794714752445, "loss": 1.0498, "step": 1220 }, { "epoch": 0.39203724514368277, "grad_norm": 2.5156784057617188, "learning_rate": 0.00019970527522269205, "loss": 1.0823, "step": 1221 }, { "epoch": 0.39235832396853426, "grad_norm": 2.2197351455688477, "learning_rate": 0.00019970259114907425, "loss": 0.9576, "step": 1222 }, { "epoch": 0.39267940279338576, "grad_norm": 1.4013001918792725, "learning_rate": 0.0001996998949269982, "loss": 1.0194, "step": 1223 }, { "epoch": 0.39300048161823725, "grad_norm": 1.1013851165771484, "learning_rate": 0.00019969718655679232, "loss": 0.9049, "step": 1224 }, { "epoch": 0.3933215604430888, "grad_norm": 1.4185377359390259, "learning_rate": 0.00019969446603878673, "loss": 0.8871, "step": 1225 }, { "epoch": 0.3936426392679403, "grad_norm": 1.4201257228851318, "learning_rate": 0.0001996917333733128, "loss": 0.8107, "step": 1226 }, { "epoch": 0.3939637180927918, "grad_norm": 1.788902997970581, "learning_rate": 0.0001996889885607036, "loss": 1.1504, "step": 1227 }, { "epoch": 0.3942847969176433, "grad_norm": 2.3521134853363037, "learning_rate": 0.0001996862316012935, "loss": 1.2274, "step": 1228 }, { "epoch": 0.3946058757424948, "grad_norm": 2.3193109035491943, "learning_rate": 0.00019968346249541846, "loss": 1.2382, "step": 1229 }, { "epoch": 0.3949269545673463, "grad_norm": 1.1716614961624146, "learning_rate": 0.0001996806812434159, "loss": 0.8529, "step": 1230 }, { "epoch": 0.3952480333921978, "grad_norm": 1.5683951377868652, "learning_rate": 0.00019967788784562473, "loss": 1.1172, "step": 1231 }, { "epoch": 0.39556911221704927, "grad_norm": 1.3435286283493042, "learning_rate": 0.00019967508230238522, "loss": 1.1469, "step": 1232 }, { "epoch": 0.39589019104190076, "grad_norm": 2.070988893508911, "learning_rate": 0.00019967226461403933, "loss": 1.1132, "step": 1233 }, { "epoch": 0.3962112698667523, "grad_norm": 1.4258763790130615, "learning_rate": 0.0001996694347809303, "loss": 1.0245, "step": 1234 }, { "epoch": 0.3965323486916038, "grad_norm": 1.191821813583374, "learning_rate": 0.00019966659280340297, "loss": 1.0535, "step": 1235 }, { "epoch": 0.3968534275164553, "grad_norm": 1.3982123136520386, "learning_rate": 0.00019966373868180365, "loss": 0.8893, "step": 1236 }, { "epoch": 0.3971745063413068, "grad_norm": 1.734355092048645, "learning_rate": 0.0001996608724164801, "loss": 1.1121, "step": 1237 }, { "epoch": 0.3974955851661583, "grad_norm": 1.516452670097351, "learning_rate": 0.00019965799400778152, "loss": 1.0574, "step": 1238 }, { "epoch": 0.3978166639910098, "grad_norm": 2.239095449447632, "learning_rate": 0.00019965510345605866, "loss": 1.0554, "step": 1239 }, { "epoch": 0.3981377428158613, "grad_norm": 1.5699093341827393, "learning_rate": 0.00019965220076166376, "loss": 1.0202, "step": 1240 }, { "epoch": 0.3984588216407128, "grad_norm": 1.7507424354553223, "learning_rate": 0.00019964928592495045, "loss": 0.9151, "step": 1241 }, { "epoch": 0.3987799004655643, "grad_norm": 2.988863229751587, "learning_rate": 0.0001996463589462739, "loss": 0.8941, "step": 1242 }, { "epoch": 0.39910097929041577, "grad_norm": 2.8474647998809814, "learning_rate": 0.00019964341982599078, "loss": 0.9185, "step": 1243 }, { "epoch": 0.3994220581152673, "grad_norm": 1.7581831216812134, "learning_rate": 0.00019964046856445924, "loss": 0.9486, "step": 1244 }, { "epoch": 0.3997431369401188, "grad_norm": 1.2665711641311646, "learning_rate": 0.00019963750516203884, "loss": 0.816, "step": 1245 }, { "epoch": 0.4000642157649703, "grad_norm": 1.6899489164352417, "learning_rate": 0.00019963452961909063, "loss": 0.692, "step": 1246 }, { "epoch": 0.4003852945898218, "grad_norm": 1.0340851545333862, "learning_rate": 0.00019963154193597727, "loss": 0.7222, "step": 1247 }, { "epoch": 0.4007063734146733, "grad_norm": 2.3070790767669678, "learning_rate": 0.00019962854211306267, "loss": 0.9091, "step": 1248 }, { "epoch": 0.4010274522395248, "grad_norm": 1.4314565658569336, "learning_rate": 0.0001996255301507125, "loss": 0.6755, "step": 1249 }, { "epoch": 0.4013485310643763, "grad_norm": 0.9915478825569153, "learning_rate": 0.0001996225060492936, "loss": 0.6343, "step": 1250 }, { "epoch": 0.4016696098892278, "grad_norm": 1.879836916923523, "learning_rate": 0.00019961946980917456, "loss": 1.3669, "step": 1251 }, { "epoch": 0.4019906887140793, "grad_norm": 2.172273874282837, "learning_rate": 0.00019961642143072529, "loss": 1.2117, "step": 1252 }, { "epoch": 0.40231176753893083, "grad_norm": 2.1239922046661377, "learning_rate": 0.00019961336091431727, "loss": 1.0067, "step": 1253 }, { "epoch": 0.4026328463637823, "grad_norm": 1.5531266927719116, "learning_rate": 0.00019961028826032332, "loss": 0.7723, "step": 1254 }, { "epoch": 0.4029539251886338, "grad_norm": 1.8615666627883911, "learning_rate": 0.00019960720346911797, "loss": 0.8307, "step": 1255 }, { "epoch": 0.4032750040134853, "grad_norm": 1.9920127391815186, "learning_rate": 0.00019960410654107697, "loss": 0.7862, "step": 1256 }, { "epoch": 0.4035960828383368, "grad_norm": 1.7379018068313599, "learning_rate": 0.00019960099747657774, "loss": 0.8312, "step": 1257 }, { "epoch": 0.4039171616631883, "grad_norm": 2.0180602073669434, "learning_rate": 0.00019959787627599906, "loss": 0.8846, "step": 1258 }, { "epoch": 0.4042382404880398, "grad_norm": 1.567842960357666, "learning_rate": 0.00019959474293972129, "loss": 1.0824, "step": 1259 }, { "epoch": 0.4045593193128913, "grad_norm": 2.6913554668426514, "learning_rate": 0.0001995915974681262, "loss": 1.0713, "step": 1260 }, { "epoch": 0.4048803981377428, "grad_norm": 2.3127224445343018, "learning_rate": 0.00019958843986159704, "loss": 0.8672, "step": 1261 }, { "epoch": 0.40520147696259434, "grad_norm": 2.383711338043213, "learning_rate": 0.00019958527012051857, "loss": 0.9304, "step": 1262 }, { "epoch": 0.40552255578744584, "grad_norm": 1.3288320302963257, "learning_rate": 0.000199582088245277, "loss": 1.0369, "step": 1263 }, { "epoch": 0.40584363461229733, "grad_norm": 1.3985912799835205, "learning_rate": 0.00019957889423626005, "loss": 1.0793, "step": 1264 }, { "epoch": 0.4061647134371488, "grad_norm": 1.3505330085754395, "learning_rate": 0.00019957568809385694, "loss": 0.8989, "step": 1265 }, { "epoch": 0.4064857922620003, "grad_norm": 1.663252353668213, "learning_rate": 0.00019957246981845822, "loss": 1.0797, "step": 1266 }, { "epoch": 0.4068068710868518, "grad_norm": 1.4397826194763184, "learning_rate": 0.0001995692394104561, "loss": 0.8712, "step": 1267 }, { "epoch": 0.4071279499117033, "grad_norm": 1.1795347929000854, "learning_rate": 0.0001995659968702442, "loss": 1.1528, "step": 1268 }, { "epoch": 0.4074490287365548, "grad_norm": 2.6851541996002197, "learning_rate": 0.00019956274219821757, "loss": 0.9325, "step": 1269 }, { "epoch": 0.4077701075614063, "grad_norm": 1.8018854856491089, "learning_rate": 0.00019955947539477284, "loss": 1.1969, "step": 1270 }, { "epoch": 0.40809118638625785, "grad_norm": 1.442039966583252, "learning_rate": 0.00019955619646030802, "loss": 0.9039, "step": 1271 }, { "epoch": 0.40841226521110935, "grad_norm": 1.8041740655899048, "learning_rate": 0.0001995529053952226, "loss": 1.0132, "step": 1272 }, { "epoch": 0.40873334403596084, "grad_norm": 1.6970267295837402, "learning_rate": 0.0001995496021999177, "loss": 1.2557, "step": 1273 }, { "epoch": 0.40905442286081234, "grad_norm": 2.2318642139434814, "learning_rate": 0.00019954628687479572, "loss": 0.9386, "step": 1274 }, { "epoch": 0.40937550168566383, "grad_norm": 1.3459020853042603, "learning_rate": 0.00019954295942026064, "loss": 0.979, "step": 1275 }, { "epoch": 0.40969658051051533, "grad_norm": 1.2683597803115845, "learning_rate": 0.00019953961983671788, "loss": 0.8684, "step": 1276 }, { "epoch": 0.4100176593353668, "grad_norm": 2.0490283966064453, "learning_rate": 0.0001995362681245744, "loss": 1.1168, "step": 1277 }, { "epoch": 0.4103387381602183, "grad_norm": 1.9572994709014893, "learning_rate": 0.00019953290428423857, "loss": 0.8459, "step": 1278 }, { "epoch": 0.4106598169850698, "grad_norm": 1.6033639907836914, "learning_rate": 0.00019952952831612026, "loss": 1.1142, "step": 1279 }, { "epoch": 0.4109808958099213, "grad_norm": 1.5231640338897705, "learning_rate": 0.00019952614022063084, "loss": 1.1958, "step": 1280 }, { "epoch": 0.41130197463477286, "grad_norm": 1.5248790979385376, "learning_rate": 0.0001995227399981831, "loss": 1.2117, "step": 1281 }, { "epoch": 0.41162305345962436, "grad_norm": 1.1884125471115112, "learning_rate": 0.00019951932764919144, "loss": 0.8868, "step": 1282 }, { "epoch": 0.41194413228447585, "grad_norm": 1.509526014328003, "learning_rate": 0.00019951590317407152, "loss": 1.0724, "step": 1283 }, { "epoch": 0.41226521110932735, "grad_norm": 1.9606530666351318, "learning_rate": 0.0001995124665732407, "loss": 1.1933, "step": 1284 }, { "epoch": 0.41258628993417884, "grad_norm": 1.8705965280532837, "learning_rate": 0.00019950901784711764, "loss": 1.1115, "step": 1285 }, { "epoch": 0.41290736875903034, "grad_norm": 2.0867602825164795, "learning_rate": 0.00019950555699612267, "loss": 1.2534, "step": 1286 }, { "epoch": 0.41322844758388183, "grad_norm": 1.6749221086502075, "learning_rate": 0.00019950208402067733, "loss": 1.1313, "step": 1287 }, { "epoch": 0.4135495264087333, "grad_norm": 1.0996752977371216, "learning_rate": 0.00019949859892120491, "loss": 0.9663, "step": 1288 }, { "epoch": 0.4138706052335848, "grad_norm": 1.0107436180114746, "learning_rate": 0.00019949510169813003, "loss": 0.8468, "step": 1289 }, { "epoch": 0.41419168405843637, "grad_norm": 1.7090445756912231, "learning_rate": 0.0001994915923518788, "loss": 1.1206, "step": 1290 }, { "epoch": 0.41451276288328787, "grad_norm": 1.2257174253463745, "learning_rate": 0.00019948807088287883, "loss": 0.9886, "step": 1291 }, { "epoch": 0.41483384170813936, "grad_norm": 1.3304343223571777, "learning_rate": 0.00019948453729155922, "loss": 0.9679, "step": 1292 }, { "epoch": 0.41515492053299086, "grad_norm": 3.28393816947937, "learning_rate": 0.00019948099157835047, "loss": 0.9752, "step": 1293 }, { "epoch": 0.41547599935784235, "grad_norm": 1.4946495294570923, "learning_rate": 0.00019947743374368467, "loss": 0.9856, "step": 1294 }, { "epoch": 0.41579707818269385, "grad_norm": 1.3553723096847534, "learning_rate": 0.00019947386378799532, "loss": 0.8419, "step": 1295 }, { "epoch": 0.41611815700754534, "grad_norm": 2.194031238555908, "learning_rate": 0.00019947028171171742, "loss": 0.8151, "step": 1296 }, { "epoch": 0.41643923583239684, "grad_norm": 1.5983561277389526, "learning_rate": 0.00019946668751528744, "loss": 0.8907, "step": 1297 }, { "epoch": 0.41676031465724833, "grad_norm": 1.9155287742614746, "learning_rate": 0.00019946308119914323, "loss": 0.9095, "step": 1298 }, { "epoch": 0.4170813934820999, "grad_norm": 1.078909993171692, "learning_rate": 0.00019945946276372434, "loss": 0.6413, "step": 1299 }, { "epoch": 0.4174024723069514, "grad_norm": 1.317728042602539, "learning_rate": 0.00019945583220947158, "loss": 0.7406, "step": 1300 }, { "epoch": 0.4177235511318029, "grad_norm": 1.7083852291107178, "learning_rate": 0.00019945218953682734, "loss": 1.3869, "step": 1301 }, { "epoch": 0.41804462995665437, "grad_norm": 2.715884208679199, "learning_rate": 0.00019944853474623548, "loss": 1.4385, "step": 1302 }, { "epoch": 0.41836570878150586, "grad_norm": 1.6352131366729736, "learning_rate": 0.00019944486783814134, "loss": 1.0713, "step": 1303 }, { "epoch": 0.41868678760635736, "grad_norm": 1.9549245834350586, "learning_rate": 0.00019944118881299168, "loss": 0.876, "step": 1304 }, { "epoch": 0.41900786643120885, "grad_norm": 3.7763001918792725, "learning_rate": 0.0001994374976712348, "loss": 0.7917, "step": 1305 }, { "epoch": 0.41932894525606035, "grad_norm": 1.8948286771774292, "learning_rate": 0.00019943379441332047, "loss": 0.8379, "step": 1306 }, { "epoch": 0.41965002408091184, "grad_norm": 5.399472713470459, "learning_rate": 0.0001994300790396999, "loss": 0.8858, "step": 1307 }, { "epoch": 0.41997110290576334, "grad_norm": 2.114410400390625, "learning_rate": 0.0001994263515508258, "loss": 0.8726, "step": 1308 }, { "epoch": 0.4202921817306149, "grad_norm": 2.5239293575286865, "learning_rate": 0.00019942261194715236, "loss": 1.225, "step": 1309 }, { "epoch": 0.4206132605554664, "grad_norm": 1.2384289503097534, "learning_rate": 0.00019941886022913522, "loss": 1.1042, "step": 1310 }, { "epoch": 0.4209343393803179, "grad_norm": 1.1283193826675415, "learning_rate": 0.00019941509639723155, "loss": 1.073, "step": 1311 }, { "epoch": 0.4212554182051694, "grad_norm": 1.4066673517227173, "learning_rate": 0.0001994113204518999, "loss": 1.0025, "step": 1312 }, { "epoch": 0.42157649703002087, "grad_norm": 1.927209734916687, "learning_rate": 0.00019940753239360047, "loss": 0.8864, "step": 1313 }, { "epoch": 0.42189757585487236, "grad_norm": 1.1925103664398193, "learning_rate": 0.00019940373222279473, "loss": 0.7443, "step": 1314 }, { "epoch": 0.42221865467972386, "grad_norm": 0.9499497413635254, "learning_rate": 0.0001993999199399457, "loss": 0.7643, "step": 1315 }, { "epoch": 0.42253973350457535, "grad_norm": 1.9734052419662476, "learning_rate": 0.000199396095545518, "loss": 0.9895, "step": 1316 }, { "epoch": 0.42286081232942685, "grad_norm": 1.0893967151641846, "learning_rate": 0.0001993922590399775, "loss": 0.9146, "step": 1317 }, { "epoch": 0.4231818911542784, "grad_norm": 2.039592742919922, "learning_rate": 0.00019938841042379174, "loss": 1.0294, "step": 1318 }, { "epoch": 0.4235029699791299, "grad_norm": 1.233694314956665, "learning_rate": 0.00019938454969742968, "loss": 0.8611, "step": 1319 }, { "epoch": 0.4238240488039814, "grad_norm": 2.1485254764556885, "learning_rate": 0.00019938067686136167, "loss": 1.0674, "step": 1320 }, { "epoch": 0.4241451276288329, "grad_norm": 1.7402235269546509, "learning_rate": 0.00019937679191605963, "loss": 1.1541, "step": 1321 }, { "epoch": 0.4244662064536844, "grad_norm": 1.3716280460357666, "learning_rate": 0.00019937289486199696, "loss": 1.1266, "step": 1322 }, { "epoch": 0.4247872852785359, "grad_norm": 1.6395798921585083, "learning_rate": 0.00019936898569964848, "loss": 0.9904, "step": 1323 }, { "epoch": 0.42510836410338737, "grad_norm": 1.3475819826126099, "learning_rate": 0.0001993650644294905, "loss": 1.0421, "step": 1324 }, { "epoch": 0.42542944292823887, "grad_norm": 1.9199656248092651, "learning_rate": 0.00019936113105200085, "loss": 1.1184, "step": 1325 }, { "epoch": 0.42575052175309036, "grad_norm": 1.6179684400558472, "learning_rate": 0.00019935718556765876, "loss": 1.2786, "step": 1326 }, { "epoch": 0.4260716005779419, "grad_norm": 1.5954012870788574, "learning_rate": 0.000199353227976945, "loss": 0.9325, "step": 1327 }, { "epoch": 0.4263926794027934, "grad_norm": 1.832868218421936, "learning_rate": 0.00019934925828034175, "loss": 1.114, "step": 1328 }, { "epoch": 0.4267137582276449, "grad_norm": 1.5652598142623901, "learning_rate": 0.00019934527647833276, "loss": 1.2399, "step": 1329 }, { "epoch": 0.4270348370524964, "grad_norm": 2.4210355281829834, "learning_rate": 0.0001993412825714032, "loss": 0.9815, "step": 1330 }, { "epoch": 0.4273559158773479, "grad_norm": 1.2339181900024414, "learning_rate": 0.00019933727656003963, "loss": 0.9054, "step": 1331 }, { "epoch": 0.4276769947021994, "grad_norm": 2.5136241912841797, "learning_rate": 0.0001993332584447303, "loss": 1.1086, "step": 1332 }, { "epoch": 0.4279980735270509, "grad_norm": 1.339241623878479, "learning_rate": 0.00019932922822596473, "loss": 1.2639, "step": 1333 }, { "epoch": 0.4283191523519024, "grad_norm": 1.0698463916778564, "learning_rate": 0.00019932518590423394, "loss": 0.8401, "step": 1334 }, { "epoch": 0.42864023117675387, "grad_norm": 2.2309930324554443, "learning_rate": 0.00019932113148003058, "loss": 0.8794, "step": 1335 }, { "epoch": 0.42896131000160537, "grad_norm": 2.3192408084869385, "learning_rate": 0.00019931706495384863, "loss": 0.9037, "step": 1336 }, { "epoch": 0.4292823888264569, "grad_norm": 1.2625378370285034, "learning_rate": 0.00019931298632618356, "loss": 0.9458, "step": 1337 }, { "epoch": 0.4296034676513084, "grad_norm": 2.8485538959503174, "learning_rate": 0.00019930889559753234, "loss": 0.9646, "step": 1338 }, { "epoch": 0.4299245464761599, "grad_norm": 1.6909540891647339, "learning_rate": 0.00019930479276839344, "loss": 1.0197, "step": 1339 }, { "epoch": 0.4302456253010114, "grad_norm": 1.6980613470077515, "learning_rate": 0.00019930067783926675, "loss": 0.9211, "step": 1340 }, { "epoch": 0.4305667041258629, "grad_norm": 0.8947736024856567, "learning_rate": 0.0001992965508106537, "loss": 0.7911, "step": 1341 }, { "epoch": 0.4308877829507144, "grad_norm": 1.4755980968475342, "learning_rate": 0.00019929241168305714, "loss": 1.0548, "step": 1342 }, { "epoch": 0.4312088617755659, "grad_norm": 2.1290855407714844, "learning_rate": 0.00019928826045698136, "loss": 0.9762, "step": 1343 }, { "epoch": 0.4315299406004174, "grad_norm": 1.3842682838439941, "learning_rate": 0.00019928409713293227, "loss": 1.0557, "step": 1344 }, { "epoch": 0.4318510194252689, "grad_norm": 2.651461601257324, "learning_rate": 0.00019927992171141708, "loss": 0.8827, "step": 1345 }, { "epoch": 0.43217209825012043, "grad_norm": 3.1933929920196533, "learning_rate": 0.00019927573419294456, "loss": 0.9752, "step": 1346 }, { "epoch": 0.4324931770749719, "grad_norm": 2.6903903484344482, "learning_rate": 0.000199271534578025, "loss": 1.0742, "step": 1347 }, { "epoch": 0.4328142558998234, "grad_norm": 0.9604675769805908, "learning_rate": 0.00019926732286717003, "loss": 0.7134, "step": 1348 }, { "epoch": 0.4331353347246749, "grad_norm": 1.6569525003433228, "learning_rate": 0.0001992630990608929, "loss": 0.6801, "step": 1349 }, { "epoch": 0.4334564135495264, "grad_norm": 1.8039077520370483, "learning_rate": 0.00019925886315970824, "loss": 0.6657, "step": 1350 }, { "epoch": 0.4337774923743779, "grad_norm": 2.7572011947631836, "learning_rate": 0.00019925461516413223, "loss": 1.5341, "step": 1351 }, { "epoch": 0.4340985711992294, "grad_norm": 1.5353821516036987, "learning_rate": 0.0001992503550746824, "loss": 1.2077, "step": 1352 }, { "epoch": 0.4344196500240809, "grad_norm": 1.8560153245925903, "learning_rate": 0.00019924608289187786, "loss": 1.043, "step": 1353 }, { "epoch": 0.4347407288489324, "grad_norm": 1.4403096437454224, "learning_rate": 0.00019924179861623915, "loss": 0.7871, "step": 1354 }, { "epoch": 0.43506180767378394, "grad_norm": 1.403806447982788, "learning_rate": 0.00019923750224828832, "loss": 0.7443, "step": 1355 }, { "epoch": 0.43538288649863544, "grad_norm": 1.7703312635421753, "learning_rate": 0.00019923319378854887, "loss": 0.9553, "step": 1356 }, { "epoch": 0.43570396532348693, "grad_norm": 1.7927194833755493, "learning_rate": 0.00019922887323754577, "loss": 0.6829, "step": 1357 }, { "epoch": 0.4360250441483384, "grad_norm": 3.560272693634033, "learning_rate": 0.00019922454059580544, "loss": 1.0068, "step": 1358 }, { "epoch": 0.4363461229731899, "grad_norm": 1.5825556516647339, "learning_rate": 0.00019922019586385585, "loss": 1.0626, "step": 1359 }, { "epoch": 0.4366672017980414, "grad_norm": 1.7079756259918213, "learning_rate": 0.00019921583904222633, "loss": 0.9531, "step": 1360 }, { "epoch": 0.4369882806228929, "grad_norm": 1.5126279592514038, "learning_rate": 0.0001992114701314478, "loss": 0.9648, "step": 1361 }, { "epoch": 0.4373093594477444, "grad_norm": 1.6111955642700195, "learning_rate": 0.00019920708913205256, "loss": 1.111, "step": 1362 }, { "epoch": 0.4376304382725959, "grad_norm": 1.1664777994155884, "learning_rate": 0.00019920269604457446, "loss": 1.0386, "step": 1363 }, { "epoch": 0.4379515170974474, "grad_norm": 1.0824946165084839, "learning_rate": 0.0001991982908695487, "loss": 0.9912, "step": 1364 }, { "epoch": 0.43827259592229895, "grad_norm": 1.131016492843628, "learning_rate": 0.00019919387360751217, "loss": 0.8238, "step": 1365 }, { "epoch": 0.43859367474715044, "grad_norm": 1.307161808013916, "learning_rate": 0.000199189444259003, "loss": 0.8337, "step": 1366 }, { "epoch": 0.43891475357200194, "grad_norm": 1.598332166671753, "learning_rate": 0.0001991850028245609, "loss": 1.1024, "step": 1367 }, { "epoch": 0.43923583239685343, "grad_norm": 1.131783366203308, "learning_rate": 0.00019918054930472706, "loss": 0.9586, "step": 1368 }, { "epoch": 0.4395569112217049, "grad_norm": 1.2683618068695068, "learning_rate": 0.00019917608370004417, "loss": 1.02, "step": 1369 }, { "epoch": 0.4398779900465564, "grad_norm": 2.1559362411499023, "learning_rate": 0.0001991716060110563, "loss": 0.9757, "step": 1370 }, { "epoch": 0.4401990688714079, "grad_norm": 1.6462043523788452, "learning_rate": 0.00019916711623830903, "loss": 1.0838, "step": 1371 }, { "epoch": 0.4405201476962594, "grad_norm": 1.468853235244751, "learning_rate": 0.0001991626143823495, "loss": 1.1971, "step": 1372 }, { "epoch": 0.4408412265211109, "grad_norm": 1.2045490741729736, "learning_rate": 0.00019915810044372618, "loss": 1.2012, "step": 1373 }, { "epoch": 0.44116230534596246, "grad_norm": 1.699227213859558, "learning_rate": 0.0001991535744229891, "loss": 0.9933, "step": 1374 }, { "epoch": 0.44148338417081395, "grad_norm": 1.5638002157211304, "learning_rate": 0.00019914903632068973, "loss": 1.2959, "step": 1375 }, { "epoch": 0.44180446299566545, "grad_norm": 2.418407440185547, "learning_rate": 0.00019914448613738106, "loss": 1.1246, "step": 1376 }, { "epoch": 0.44212554182051694, "grad_norm": 2.6279420852661133, "learning_rate": 0.00019913992387361745, "loss": 1.069, "step": 1377 }, { "epoch": 0.44244662064536844, "grad_norm": 1.4538555145263672, "learning_rate": 0.00019913534952995486, "loss": 0.9135, "step": 1378 }, { "epoch": 0.44276769947021993, "grad_norm": 1.7238460779190063, "learning_rate": 0.00019913076310695068, "loss": 1.3682, "step": 1379 }, { "epoch": 0.44308877829507143, "grad_norm": 2.1236801147460938, "learning_rate": 0.00019912616460516364, "loss": 1.0323, "step": 1380 }, { "epoch": 0.4434098571199229, "grad_norm": 1.527573585510254, "learning_rate": 0.00019912155402515417, "loss": 1.2224, "step": 1381 }, { "epoch": 0.4437309359447744, "grad_norm": 1.1867547035217285, "learning_rate": 0.00019911693136748403, "loss": 0.9621, "step": 1382 }, { "epoch": 0.44405201476962597, "grad_norm": 1.631277322769165, "learning_rate": 0.0001991122966327164, "loss": 1.1633, "step": 1383 }, { "epoch": 0.44437309359447746, "grad_norm": 2.110377550125122, "learning_rate": 0.0001991076498214161, "loss": 1.081, "step": 1384 }, { "epoch": 0.44469417241932896, "grad_norm": 1.2821978330612183, "learning_rate": 0.0001991029909341493, "loss": 0.8347, "step": 1385 }, { "epoch": 0.44501525124418045, "grad_norm": 1.3833321332931519, "learning_rate": 0.00019909831997148362, "loss": 1.0652, "step": 1386 }, { "epoch": 0.44533633006903195, "grad_norm": 1.2607406377792358, "learning_rate": 0.00019909363693398828, "loss": 0.9888, "step": 1387 }, { "epoch": 0.44565740889388344, "grad_norm": 1.2479708194732666, "learning_rate": 0.00019908894182223388, "loss": 0.9652, "step": 1388 }, { "epoch": 0.44597848771873494, "grad_norm": 1.9284359216690063, "learning_rate": 0.00019908423463679248, "loss": 1.0546, "step": 1389 }, { "epoch": 0.44629956654358643, "grad_norm": 1.1474945545196533, "learning_rate": 0.0001990795153782376, "loss": 0.8867, "step": 1390 }, { "epoch": 0.44662064536843793, "grad_norm": 1.8049343824386597, "learning_rate": 0.00019907478404714436, "loss": 0.921, "step": 1391 }, { "epoch": 0.4469417241932895, "grad_norm": 2.072420835494995, "learning_rate": 0.0001990700406440892, "loss": 1.0613, "step": 1392 }, { "epoch": 0.447262803018141, "grad_norm": 1.3974262475967407, "learning_rate": 0.00019906528516965008, "loss": 0.9483, "step": 1393 }, { "epoch": 0.44758388184299247, "grad_norm": 1.0429906845092773, "learning_rate": 0.0001990605176244065, "loss": 0.8286, "step": 1394 }, { "epoch": 0.44790496066784397, "grad_norm": 1.2362806797027588, "learning_rate": 0.0001990557380089393, "loss": 1.0296, "step": 1395 }, { "epoch": 0.44822603949269546, "grad_norm": 1.1999589204788208, "learning_rate": 0.0001990509463238309, "loss": 0.7282, "step": 1396 }, { "epoch": 0.44854711831754696, "grad_norm": 1.3461198806762695, "learning_rate": 0.00019904614256966512, "loss": 0.9301, "step": 1397 }, { "epoch": 0.44886819714239845, "grad_norm": 2.5032989978790283, "learning_rate": 0.00019904132674702734, "loss": 0.5983, "step": 1398 }, { "epoch": 0.44918927596724995, "grad_norm": 1.029445767402649, "learning_rate": 0.0001990364988565043, "loss": 0.7837, "step": 1399 }, { "epoch": 0.44951035479210144, "grad_norm": 0.8912380337715149, "learning_rate": 0.0001990316588986843, "loss": 0.6475, "step": 1400 }, { "epoch": 0.44983143361695294, "grad_norm": 4.18993616104126, "learning_rate": 0.00019902680687415705, "loss": 1.7317, "step": 1401 }, { "epoch": 0.4501525124418045, "grad_norm": 6.351428031921387, "learning_rate": 0.00019902194278351374, "loss": 1.6514, "step": 1402 }, { "epoch": 0.450473591266656, "grad_norm": 2.5410821437835693, "learning_rate": 0.00019901706662734712, "loss": 1.1215, "step": 1403 }, { "epoch": 0.4507946700915075, "grad_norm": 1.9113538265228271, "learning_rate": 0.0001990121784062512, "loss": 0.9147, "step": 1404 }, { "epoch": 0.45111574891635897, "grad_norm": 3.196178913116455, "learning_rate": 0.00019900727812082177, "loss": 0.9308, "step": 1405 }, { "epoch": 0.45143682774121047, "grad_norm": 1.5448819398880005, "learning_rate": 0.00019900236577165576, "loss": 0.8038, "step": 1406 }, { "epoch": 0.45175790656606196, "grad_norm": 1.4571887254714966, "learning_rate": 0.0001989974413593518, "loss": 0.7484, "step": 1407 }, { "epoch": 0.45207898539091346, "grad_norm": 1.5297858715057373, "learning_rate": 0.0001989925048845099, "loss": 0.854, "step": 1408 }, { "epoch": 0.45240006421576495, "grad_norm": 1.8126001358032227, "learning_rate": 0.00019898755634773158, "loss": 0.9785, "step": 1409 }, { "epoch": 0.45272114304061645, "grad_norm": 1.3122637271881104, "learning_rate": 0.00019898259574961978, "loss": 1.1906, "step": 1410 }, { "epoch": 0.453042221865468, "grad_norm": 1.2841436862945557, "learning_rate": 0.0001989776230907789, "loss": 0.7769, "step": 1411 }, { "epoch": 0.4533633006903195, "grad_norm": 1.6594353914260864, "learning_rate": 0.00019897263837181491, "loss": 0.8995, "step": 1412 }, { "epoch": 0.453684379515171, "grad_norm": 1.2056258916854858, "learning_rate": 0.0001989676415933351, "loss": 0.9341, "step": 1413 }, { "epoch": 0.4540054583400225, "grad_norm": 1.2220386266708374, "learning_rate": 0.00019896263275594842, "loss": 0.9202, "step": 1414 }, { "epoch": 0.454326537164874, "grad_norm": 1.083976149559021, "learning_rate": 0.0001989576118602651, "loss": 0.8357, "step": 1415 }, { "epoch": 0.4546476159897255, "grad_norm": 1.134037733078003, "learning_rate": 0.00019895257890689696, "loss": 1.0837, "step": 1416 }, { "epoch": 0.45496869481457697, "grad_norm": 1.6373850107192993, "learning_rate": 0.00019894753389645723, "loss": 1.0875, "step": 1417 }, { "epoch": 0.45528977363942846, "grad_norm": 1.8361527919769287, "learning_rate": 0.0001989424768295606, "loss": 0.9749, "step": 1418 }, { "epoch": 0.45561085246427996, "grad_norm": 2.0697522163391113, "learning_rate": 0.00019893740770682335, "loss": 0.9262, "step": 1419 }, { "epoch": 0.4559319312891315, "grad_norm": 1.6787400245666504, "learning_rate": 0.00019893232652886306, "loss": 1.028, "step": 1420 }, { "epoch": 0.456253010113983, "grad_norm": 1.6536740064620972, "learning_rate": 0.00019892723329629887, "loss": 1.138, "step": 1421 }, { "epoch": 0.4565740889388345, "grad_norm": 2.046743631362915, "learning_rate": 0.00019892212800975135, "loss": 0.9917, "step": 1422 }, { "epoch": 0.456895167763686, "grad_norm": 1.4609099626541138, "learning_rate": 0.00019891701066984262, "loss": 0.9998, "step": 1423 }, { "epoch": 0.4572162465885375, "grad_norm": 1.2826584577560425, "learning_rate": 0.00019891188127719618, "loss": 1.0006, "step": 1424 }, { "epoch": 0.457537325413389, "grad_norm": 1.3913053274154663, "learning_rate": 0.00019890673983243706, "loss": 1.098, "step": 1425 }, { "epoch": 0.4578584042382405, "grad_norm": 2.0255448818206787, "learning_rate": 0.0001989015863361917, "loss": 1.0888, "step": 1426 }, { "epoch": 0.458179483063092, "grad_norm": 2.3133769035339355, "learning_rate": 0.00019889642078908804, "loss": 1.2081, "step": 1427 }, { "epoch": 0.45850056188794347, "grad_norm": 1.3124561309814453, "learning_rate": 0.00019889124319175547, "loss": 1.125, "step": 1428 }, { "epoch": 0.45882164071279496, "grad_norm": 1.5743858814239502, "learning_rate": 0.0001988860535448249, "loss": 1.0983, "step": 1429 }, { "epoch": 0.4591427195376465, "grad_norm": 1.6619899272918701, "learning_rate": 0.00019888085184892868, "loss": 1.1139, "step": 1430 }, { "epoch": 0.459463798362498, "grad_norm": 1.2634574174880981, "learning_rate": 0.0001988756381047006, "loss": 1.1412, "step": 1431 }, { "epoch": 0.4597848771873495, "grad_norm": 1.1055835485458374, "learning_rate": 0.00019887041231277593, "loss": 0.7893, "step": 1432 }, { "epoch": 0.460105956012201, "grad_norm": 1.8112157583236694, "learning_rate": 0.0001988651744737914, "loss": 0.8947, "step": 1433 }, { "epoch": 0.4604270348370525, "grad_norm": 0.9498884081840515, "learning_rate": 0.00019885992458838528, "loss": 0.9129, "step": 1434 }, { "epoch": 0.460748113661904, "grad_norm": 1.6810704469680786, "learning_rate": 0.0001988546626571972, "loss": 1.0324, "step": 1435 }, { "epoch": 0.4610691924867555, "grad_norm": 1.1200361251831055, "learning_rate": 0.00019884938868086835, "loss": 1.1815, "step": 1436 }, { "epoch": 0.461390271311607, "grad_norm": 1.532668113708496, "learning_rate": 0.00019884410266004135, "loss": 0.9087, "step": 1437 }, { "epoch": 0.4617113501364585, "grad_norm": 3.874738931655884, "learning_rate": 0.00019883880459536024, "loss": 0.9593, "step": 1438 }, { "epoch": 0.46203242896131, "grad_norm": 2.262633800506592, "learning_rate": 0.00019883349448747062, "loss": 0.9921, "step": 1439 }, { "epoch": 0.4623535077861615, "grad_norm": 1.0738508701324463, "learning_rate": 0.00019882817233701948, "loss": 0.7347, "step": 1440 }, { "epoch": 0.462674586611013, "grad_norm": 1.7245491743087769, "learning_rate": 0.0001988228381446553, "loss": 0.9579, "step": 1441 }, { "epoch": 0.4629956654358645, "grad_norm": 1.3573378324508667, "learning_rate": 0.00019881749191102808, "loss": 0.9096, "step": 1442 }, { "epoch": 0.463316744260716, "grad_norm": 1.2465156316757202, "learning_rate": 0.0001988121336367892, "loss": 0.9165, "step": 1443 }, { "epoch": 0.4636378230855675, "grad_norm": 1.3146942853927612, "learning_rate": 0.00019880676332259154, "loss": 0.7766, "step": 1444 }, { "epoch": 0.463958901910419, "grad_norm": 1.2299082279205322, "learning_rate": 0.00019880138096908952, "loss": 0.7833, "step": 1445 }, { "epoch": 0.4642799807352705, "grad_norm": 1.622185230255127, "learning_rate": 0.00019879598657693891, "loss": 0.7363, "step": 1446 }, { "epoch": 0.464601059560122, "grad_norm": 1.7409615516662598, "learning_rate": 0.00019879058014679704, "loss": 1.0281, "step": 1447 }, { "epoch": 0.46492213838497354, "grad_norm": 1.8660942316055298, "learning_rate": 0.00019878516167932261, "loss": 0.8218, "step": 1448 }, { "epoch": 0.46524321720982503, "grad_norm": 1.445410132408142, "learning_rate": 0.0001987797311751759, "loss": 0.7792, "step": 1449 }, { "epoch": 0.46556429603467653, "grad_norm": 1.0770496129989624, "learning_rate": 0.00019877428863501856, "loss": 0.6364, "step": 1450 }, { "epoch": 0.465885374859528, "grad_norm": 1.7615505456924438, "learning_rate": 0.00019876883405951377, "loss": 1.367, "step": 1451 }, { "epoch": 0.4662064536843795, "grad_norm": 1.661313772201538, "learning_rate": 0.00019876336744932614, "loss": 1.1688, "step": 1452 }, { "epoch": 0.466527532509231, "grad_norm": 1.480622410774231, "learning_rate": 0.0001987578888051218, "loss": 1.0435, "step": 1453 }, { "epoch": 0.4668486113340825, "grad_norm": 1.6343483924865723, "learning_rate": 0.00019875239812756825, "loss": 0.9328, "step": 1454 }, { "epoch": 0.467169690158934, "grad_norm": 2.5799641609191895, "learning_rate": 0.00019874689541733457, "loss": 0.8994, "step": 1455 }, { "epoch": 0.4674907689837855, "grad_norm": 1.4654110670089722, "learning_rate": 0.00019874138067509117, "loss": 0.7419, "step": 1456 }, { "epoch": 0.467811847808637, "grad_norm": 1.7656830549240112, "learning_rate": 0.00019873585390151003, "loss": 1.0638, "step": 1457 }, { "epoch": 0.46813292663348854, "grad_norm": 1.733784794807434, "learning_rate": 0.00019873031509726462, "loss": 1.1878, "step": 1458 }, { "epoch": 0.46845400545834004, "grad_norm": 2.345583915710449, "learning_rate": 0.00019872476426302982, "loss": 0.98, "step": 1459 }, { "epoch": 0.46877508428319153, "grad_norm": 1.3415275812149048, "learning_rate": 0.00019871920139948192, "loss": 0.9307, "step": 1460 }, { "epoch": 0.46909616310804303, "grad_norm": 1.6265960931777954, "learning_rate": 0.0001987136265072988, "loss": 1.0417, "step": 1461 }, { "epoch": 0.4694172419328945, "grad_norm": 1.1737104654312134, "learning_rate": 0.00019870803958715972, "loss": 0.8448, "step": 1462 }, { "epoch": 0.469738320757746, "grad_norm": 1.189310908317566, "learning_rate": 0.0001987024406397454, "loss": 1.006, "step": 1463 }, { "epoch": 0.4700593995825975, "grad_norm": 1.5854594707489014, "learning_rate": 0.00019869682966573813, "loss": 0.9803, "step": 1464 }, { "epoch": 0.470380478407449, "grad_norm": 1.1072514057159424, "learning_rate": 0.00019869120666582153, "loss": 1.0124, "step": 1465 }, { "epoch": 0.4707015572323005, "grad_norm": 1.613070011138916, "learning_rate": 0.00019868557164068074, "loss": 1.2908, "step": 1466 }, { "epoch": 0.47102263605715206, "grad_norm": 1.2394685745239258, "learning_rate": 0.0001986799245910024, "loss": 1.1003, "step": 1467 }, { "epoch": 0.47134371488200355, "grad_norm": 1.047046422958374, "learning_rate": 0.00019867426551747457, "loss": 1.0153, "step": 1468 }, { "epoch": 0.47166479370685505, "grad_norm": 1.3292659521102905, "learning_rate": 0.0001986685944207868, "loss": 0.9651, "step": 1469 }, { "epoch": 0.47198587253170654, "grad_norm": 1.4986034631729126, "learning_rate": 0.0001986629113016301, "loss": 0.9747, "step": 1470 }, { "epoch": 0.47230695135655804, "grad_norm": 2.4843838214874268, "learning_rate": 0.00019865721616069696, "loss": 1.0655, "step": 1471 }, { "epoch": 0.47262803018140953, "grad_norm": 1.3794037103652954, "learning_rate": 0.00019865150899868125, "loss": 1.1716, "step": 1472 }, { "epoch": 0.472949109006261, "grad_norm": 1.1681032180786133, "learning_rate": 0.00019864578981627844, "loss": 1.1593, "step": 1473 }, { "epoch": 0.4732701878311125, "grad_norm": 1.135563850402832, "learning_rate": 0.00019864005861418535, "loss": 0.9522, "step": 1474 }, { "epoch": 0.473591266655964, "grad_norm": 1.3052153587341309, "learning_rate": 0.0001986343153931003, "loss": 1.0636, "step": 1475 }, { "epoch": 0.47391234548081557, "grad_norm": 2.6790149211883545, "learning_rate": 0.00019862856015372317, "loss": 1.0276, "step": 1476 }, { "epoch": 0.47423342430566706, "grad_norm": 1.5842307806015015, "learning_rate": 0.00019862279289675509, "loss": 0.9003, "step": 1477 }, { "epoch": 0.47455450313051856, "grad_norm": 1.1842899322509766, "learning_rate": 0.0001986170136228989, "loss": 0.8584, "step": 1478 }, { "epoch": 0.47487558195537005, "grad_norm": 1.4418596029281616, "learning_rate": 0.0001986112223328587, "loss": 0.9339, "step": 1479 }, { "epoch": 0.47519666078022155, "grad_norm": 1.6683169603347778, "learning_rate": 0.00019860541902734022, "loss": 0.9629, "step": 1480 }, { "epoch": 0.47551773960507304, "grad_norm": 1.3546855449676514, "learning_rate": 0.0001985996037070505, "loss": 1.0198, "step": 1481 }, { "epoch": 0.47583881842992454, "grad_norm": 1.2630248069763184, "learning_rate": 0.00019859377637269815, "loss": 0.9135, "step": 1482 }, { "epoch": 0.47615989725477603, "grad_norm": 1.2437412738800049, "learning_rate": 0.00019858793702499323, "loss": 0.8743, "step": 1483 }, { "epoch": 0.4764809760796275, "grad_norm": 2.26094388961792, "learning_rate": 0.00019858208566464724, "loss": 1.038, "step": 1484 }, { "epoch": 0.476802054904479, "grad_norm": 1.2740654945373535, "learning_rate": 0.00019857622229237313, "loss": 1.0045, "step": 1485 }, { "epoch": 0.4771231337293306, "grad_norm": 1.8857311010360718, "learning_rate": 0.00019857034690888537, "loss": 0.9932, "step": 1486 }, { "epoch": 0.47744421255418207, "grad_norm": 1.1165155172348022, "learning_rate": 0.00019856445951489982, "loss": 0.8839, "step": 1487 }, { "epoch": 0.47776529137903356, "grad_norm": 1.5677917003631592, "learning_rate": 0.00019855856011113384, "loss": 0.8075, "step": 1488 }, { "epoch": 0.47808637020388506, "grad_norm": 1.1825371980667114, "learning_rate": 0.00019855264869830629, "loss": 0.875, "step": 1489 }, { "epoch": 0.47840744902873655, "grad_norm": 1.572262167930603, "learning_rate": 0.00019854672527713744, "loss": 1.1585, "step": 1490 }, { "epoch": 0.47872852785358805, "grad_norm": 2.2230706214904785, "learning_rate": 0.00019854078984834903, "loss": 0.9478, "step": 1491 }, { "epoch": 0.47904960667843954, "grad_norm": 1.5785419940948486, "learning_rate": 0.00019853484241266428, "loss": 0.8647, "step": 1492 }, { "epoch": 0.47937068550329104, "grad_norm": 0.8901344537734985, "learning_rate": 0.00019852888297080786, "loss": 0.7951, "step": 1493 }, { "epoch": 0.47969176432814253, "grad_norm": 1.1752713918685913, "learning_rate": 0.00019852291152350592, "loss": 0.9361, "step": 1494 }, { "epoch": 0.4800128431529941, "grad_norm": 1.196556568145752, "learning_rate": 0.0001985169280714861, "loss": 0.9662, "step": 1495 }, { "epoch": 0.4803339219778456, "grad_norm": 1.6458885669708252, "learning_rate": 0.0001985109326154774, "loss": 0.932, "step": 1496 }, { "epoch": 0.4806550008026971, "grad_norm": 1.7147083282470703, "learning_rate": 0.00019850492515621038, "loss": 0.8323, "step": 1497 }, { "epoch": 0.48097607962754857, "grad_norm": 1.5965648889541626, "learning_rate": 0.00019849890569441703, "loss": 0.6222, "step": 1498 }, { "epoch": 0.48129715845240006, "grad_norm": 0.8603938221931458, "learning_rate": 0.00019849287423083078, "loss": 0.4993, "step": 1499 }, { "epoch": 0.48161823727725156, "grad_norm": 1.216637134552002, "learning_rate": 0.00019848683076618658, "loss": 0.606, "step": 1500 }, { "epoch": 0.48193931610210305, "grad_norm": 1.512508511543274, "learning_rate": 0.00019848077530122083, "loss": 1.5987, "step": 1501 }, { "epoch": 0.48226039492695455, "grad_norm": 1.3921582698822021, "learning_rate": 0.00019847470783667127, "loss": 1.3467, "step": 1502 }, { "epoch": 0.48258147375180604, "grad_norm": 1.5864951610565186, "learning_rate": 0.0001984686283732773, "loss": 1.3512, "step": 1503 }, { "epoch": 0.4829025525766576, "grad_norm": 1.5356676578521729, "learning_rate": 0.00019846253691177966, "loss": 0.8669, "step": 1504 }, { "epoch": 0.4832236314015091, "grad_norm": 1.3852877616882324, "learning_rate": 0.00019845643345292054, "loss": 0.677, "step": 1505 }, { "epoch": 0.4835447102263606, "grad_norm": 1.4612033367156982, "learning_rate": 0.00019845031799744367, "loss": 0.8081, "step": 1506 }, { "epoch": 0.4838657890512121, "grad_norm": 1.5875556468963623, "learning_rate": 0.0001984441905460942, "loss": 0.783, "step": 1507 }, { "epoch": 0.4841868678760636, "grad_norm": 2.032498359680176, "learning_rate": 0.00019843805109961868, "loss": 1.2169, "step": 1508 }, { "epoch": 0.48450794670091507, "grad_norm": 1.468684196472168, "learning_rate": 0.00019843189965876526, "loss": 1.1707, "step": 1509 }, { "epoch": 0.48482902552576657, "grad_norm": 1.416133165359497, "learning_rate": 0.00019842573622428345, "loss": 1.0083, "step": 1510 }, { "epoch": 0.48515010435061806, "grad_norm": 1.3447717428207397, "learning_rate": 0.0001984195607969242, "loss": 1.158, "step": 1511 }, { "epoch": 0.48547118317546956, "grad_norm": 0.9209278225898743, "learning_rate": 0.00019841337337744004, "loss": 0.8482, "step": 1512 }, { "epoch": 0.48579226200032105, "grad_norm": 1.1930347681045532, "learning_rate": 0.00019840717396658484, "loss": 0.9346, "step": 1513 }, { "epoch": 0.4861133408251726, "grad_norm": 2.087160587310791, "learning_rate": 0.00019840096256511398, "loss": 1.034, "step": 1514 }, { "epoch": 0.4864344196500241, "grad_norm": 2.3745105266571045, "learning_rate": 0.00019839473917378434, "loss": 0.9559, "step": 1515 }, { "epoch": 0.4867554984748756, "grad_norm": 1.2410439252853394, "learning_rate": 0.00019838850379335417, "loss": 1.1425, "step": 1516 }, { "epoch": 0.4870765772997271, "grad_norm": 1.2628313302993774, "learning_rate": 0.00019838225642458327, "loss": 0.8775, "step": 1517 }, { "epoch": 0.4873976561245786, "grad_norm": 1.2423824071884155, "learning_rate": 0.00019837599706823284, "loss": 1.1197, "step": 1518 }, { "epoch": 0.4877187349494301, "grad_norm": 1.6348954439163208, "learning_rate": 0.00019836972572506557, "loss": 1.1318, "step": 1519 }, { "epoch": 0.4880398137742816, "grad_norm": 1.3113266229629517, "learning_rate": 0.00019836344239584564, "loss": 0.8346, "step": 1520 }, { "epoch": 0.48836089259913307, "grad_norm": 2.537024974822998, "learning_rate": 0.00019835714708133862, "loss": 0.9601, "step": 1521 }, { "epoch": 0.48868197142398456, "grad_norm": 1.3241581916809082, "learning_rate": 0.00019835083978231156, "loss": 1.0883, "step": 1522 }, { "epoch": 0.4890030502488361, "grad_norm": 1.2066631317138672, "learning_rate": 0.00019834452049953297, "loss": 0.8249, "step": 1523 }, { "epoch": 0.4893241290736876, "grad_norm": 2.023279905319214, "learning_rate": 0.0001983381892337729, "loss": 1.1508, "step": 1524 }, { "epoch": 0.4896452078985391, "grad_norm": 1.0849757194519043, "learning_rate": 0.00019833184598580276, "loss": 0.8601, "step": 1525 }, { "epoch": 0.4899662867233906, "grad_norm": 1.6499979496002197, "learning_rate": 0.0001983254907563955, "loss": 1.0404, "step": 1526 }, { "epoch": 0.4902873655482421, "grad_norm": 2.0686190128326416, "learning_rate": 0.00019831912354632535, "loss": 1.0121, "step": 1527 }, { "epoch": 0.4906084443730936, "grad_norm": 1.2880290746688843, "learning_rate": 0.0001983127443563683, "loss": 0.9684, "step": 1528 }, { "epoch": 0.4909295231979451, "grad_norm": 1.8285752534866333, "learning_rate": 0.00019830635318730154, "loss": 1.1573, "step": 1529 }, { "epoch": 0.4912506020227966, "grad_norm": 1.0872652530670166, "learning_rate": 0.00019829995003990388, "loss": 0.9672, "step": 1530 }, { "epoch": 0.4915716808476481, "grad_norm": 3.2847740650177, "learning_rate": 0.00019829353491495545, "loss": 1.0548, "step": 1531 }, { "epoch": 0.4918927596724996, "grad_norm": 1.274139165878296, "learning_rate": 0.00019828710781323792, "loss": 1.1498, "step": 1532 }, { "epoch": 0.4922138384973511, "grad_norm": 1.1564581394195557, "learning_rate": 0.00019828066873553448, "loss": 1.0376, "step": 1533 }, { "epoch": 0.4925349173222026, "grad_norm": 2.3864920139312744, "learning_rate": 0.00019827421768262967, "loss": 1.2347, "step": 1534 }, { "epoch": 0.4928559961470541, "grad_norm": 1.5429226160049438, "learning_rate": 0.0001982677546553095, "loss": 0.7993, "step": 1535 }, { "epoch": 0.4931770749719056, "grad_norm": 1.7663393020629883, "learning_rate": 0.00019826127965436152, "loss": 1.1139, "step": 1536 }, { "epoch": 0.4934981537967571, "grad_norm": 1.8667831420898438, "learning_rate": 0.00019825479268057467, "loss": 1.1442, "step": 1537 }, { "epoch": 0.4938192326216086, "grad_norm": 1.3998355865478516, "learning_rate": 0.0001982482937347394, "loss": 0.9002, "step": 1538 }, { "epoch": 0.4941403114464601, "grad_norm": 1.7938013076782227, "learning_rate": 0.00019824178281764753, "loss": 1.039, "step": 1539 }, { "epoch": 0.4944613902713116, "grad_norm": 2.098247528076172, "learning_rate": 0.00019823525993009243, "loss": 0.729, "step": 1540 }, { "epoch": 0.49478246909616314, "grad_norm": 1.4042150974273682, "learning_rate": 0.0001982287250728689, "loss": 1.1048, "step": 1541 }, { "epoch": 0.49510354792101463, "grad_norm": 1.135709285736084, "learning_rate": 0.00019822217824677315, "loss": 0.8585, "step": 1542 }, { "epoch": 0.4954246267458661, "grad_norm": 2.2327728271484375, "learning_rate": 0.0001982156194526029, "loss": 1.0654, "step": 1543 }, { "epoch": 0.4957457055707176, "grad_norm": 1.6728448867797852, "learning_rate": 0.0001982090486911574, "loss": 0.8941, "step": 1544 }, { "epoch": 0.4960667843955691, "grad_norm": 1.843428373336792, "learning_rate": 0.0001982024659632372, "loss": 1.0319, "step": 1545 }, { "epoch": 0.4963878632204206, "grad_norm": 1.780278205871582, "learning_rate": 0.00019819587126964437, "loss": 0.9306, "step": 1546 }, { "epoch": 0.4967089420452721, "grad_norm": 1.5725250244140625, "learning_rate": 0.00019818926461118253, "loss": 0.8823, "step": 1547 }, { "epoch": 0.4970300208701236, "grad_norm": 2.787895679473877, "learning_rate": 0.0001981826459886566, "loss": 1.0951, "step": 1548 }, { "epoch": 0.4973510996949751, "grad_norm": 0.9631800651550293, "learning_rate": 0.00019817601540287306, "loss": 0.637, "step": 1549 }, { "epoch": 0.4976721785198266, "grad_norm": 1.3815464973449707, "learning_rate": 0.0001981693728546399, "loss": 0.7441, "step": 1550 }, { "epoch": 0.49799325734467814, "grad_norm": 1.8321810960769653, "learning_rate": 0.00019816271834476642, "loss": 1.4642, "step": 1551 }, { "epoch": 0.49831433616952964, "grad_norm": 1.4489206075668335, "learning_rate": 0.00019815605187406345, "loss": 1.2029, "step": 1552 }, { "epoch": 0.49863541499438113, "grad_norm": 1.4845582246780396, "learning_rate": 0.0001981493734433433, "loss": 0.823, "step": 1553 }, { "epoch": 0.4989564938192326, "grad_norm": 1.642909288406372, "learning_rate": 0.0001981426830534197, "loss": 0.8766, "step": 1554 }, { "epoch": 0.4992775726440841, "grad_norm": 1.8939411640167236, "learning_rate": 0.00019813598070510792, "loss": 0.9463, "step": 1555 }, { "epoch": 0.4995986514689356, "grad_norm": 1.7453148365020752, "learning_rate": 0.0001981292663992245, "loss": 0.8896, "step": 1556 }, { "epoch": 0.4999197302937871, "grad_norm": 1.5144426822662354, "learning_rate": 0.00019812254013658768, "loss": 0.8909, "step": 1557 }, { "epoch": 0.5002408091186387, "grad_norm": 1.3950881958007812, "learning_rate": 0.00019811580191801697, "loss": 0.9749, "step": 1558 }, { "epoch": 0.5005618879434901, "grad_norm": 1.4966586828231812, "learning_rate": 0.0001981090517443334, "loss": 0.9999, "step": 1559 }, { "epoch": 0.5008829667683417, "grad_norm": 1.356341004371643, "learning_rate": 0.0001981022896163595, "loss": 1.192, "step": 1560 }, { "epoch": 0.5012040455931931, "grad_norm": 3.053656816482544, "learning_rate": 0.00019809551553491916, "loss": 1.1255, "step": 1561 }, { "epoch": 0.5015251244180446, "grad_norm": 1.3365064859390259, "learning_rate": 0.00019808872950083782, "loss": 0.9588, "step": 1562 }, { "epoch": 0.5018462032428961, "grad_norm": 0.789279580116272, "learning_rate": 0.00019808193151494232, "loss": 0.747, "step": 1563 }, { "epoch": 0.5021672820677476, "grad_norm": 1.067087173461914, "learning_rate": 0.000198075121578061, "loss": 0.9783, "step": 1564 }, { "epoch": 0.5024883608925992, "grad_norm": 1.0413140058517456, "learning_rate": 0.00019806829969102357, "loss": 0.8484, "step": 1565 }, { "epoch": 0.5028094397174506, "grad_norm": 1.8259743452072144, "learning_rate": 0.0001980614658546613, "loss": 1.0746, "step": 1566 }, { "epoch": 0.5031305185423022, "grad_norm": 1.282426118850708, "learning_rate": 0.00019805462006980689, "loss": 1.1119, "step": 1567 }, { "epoch": 0.5034515973671536, "grad_norm": 1.075327754020691, "learning_rate": 0.00019804776233729444, "loss": 0.8866, "step": 1568 }, { "epoch": 0.5037726761920052, "grad_norm": 1.3174536228179932, "learning_rate": 0.0001980408926579596, "loss": 0.9901, "step": 1569 }, { "epoch": 0.5040937550168566, "grad_norm": 1.03889000415802, "learning_rate": 0.00019803401103263933, "loss": 0.9264, "step": 1570 }, { "epoch": 0.5044148338417082, "grad_norm": 1.4712145328521729, "learning_rate": 0.00019802711746217218, "loss": 1.113, "step": 1571 }, { "epoch": 0.5047359126665596, "grad_norm": 3.3879761695861816, "learning_rate": 0.00019802021194739814, "loss": 0.9667, "step": 1572 }, { "epoch": 0.5050569914914111, "grad_norm": 1.116847276687622, "learning_rate": 0.00019801329448915862, "loss": 1.0384, "step": 1573 }, { "epoch": 0.5053780703162627, "grad_norm": 1.0987874269485474, "learning_rate": 0.00019800636508829643, "loss": 1.0302, "step": 1574 }, { "epoch": 0.5056991491411141, "grad_norm": 1.6565288305282593, "learning_rate": 0.00019799942374565597, "loss": 1.0317, "step": 1575 }, { "epoch": 0.5060202279659657, "grad_norm": 1.2395991086959839, "learning_rate": 0.00019799247046208297, "loss": 1.1209, "step": 1576 }, { "epoch": 0.5063413067908171, "grad_norm": 1.0488249063491821, "learning_rate": 0.0001979855052384247, "loss": 0.9093, "step": 1577 }, { "epoch": 0.5066623856156687, "grad_norm": 1.3731625080108643, "learning_rate": 0.00019797852807552983, "loss": 0.9188, "step": 1578 }, { "epoch": 0.5069834644405201, "grad_norm": 2.111989974975586, "learning_rate": 0.00019797153897424852, "loss": 1.098, "step": 1579 }, { "epoch": 0.5073045432653717, "grad_norm": 1.2938238382339478, "learning_rate": 0.00019796453793543238, "loss": 0.8608, "step": 1580 }, { "epoch": 0.5076256220902231, "grad_norm": 1.4208569526672363, "learning_rate": 0.0001979575249599344, "loss": 1.0857, "step": 1581 }, { "epoch": 0.5079467009150747, "grad_norm": 1.5202668905258179, "learning_rate": 0.00019795050004860917, "loss": 1.2191, "step": 1582 }, { "epoch": 0.5082677797399262, "grad_norm": 0.9358740448951721, "learning_rate": 0.00019794346320231265, "loss": 0.9756, "step": 1583 }, { "epoch": 0.5085888585647776, "grad_norm": 1.4582595825195312, "learning_rate": 0.00019793641442190221, "loss": 1.0251, "step": 1584 }, { "epoch": 0.5089099373896292, "grad_norm": 1.3542490005493164, "learning_rate": 0.00019792935370823675, "loss": 1.0413, "step": 1585 }, { "epoch": 0.5092310162144806, "grad_norm": 0.8536126017570496, "learning_rate": 0.00019792228106217658, "loss": 0.7827, "step": 1586 }, { "epoch": 0.5095520950393322, "grad_norm": 1.294661521911621, "learning_rate": 0.00019791519648458352, "loss": 0.8979, "step": 1587 }, { "epoch": 0.5098731738641836, "grad_norm": 1.0872153043746948, "learning_rate": 0.00019790809997632076, "loss": 0.8247, "step": 1588 }, { "epoch": 0.5101942526890352, "grad_norm": 1.6541205644607544, "learning_rate": 0.000197900991538253, "loss": 1.0276, "step": 1589 }, { "epoch": 0.5105153315138866, "grad_norm": 1.2755903005599976, "learning_rate": 0.00019789387117124637, "loss": 0.9243, "step": 1590 }, { "epoch": 0.5108364103387382, "grad_norm": 1.2465382814407349, "learning_rate": 0.0001978867388761685, "loss": 0.9222, "step": 1591 }, { "epoch": 0.5111574891635896, "grad_norm": 0.9154629707336426, "learning_rate": 0.00019787959465388842, "loss": 0.8289, "step": 1592 }, { "epoch": 0.5114785679884412, "grad_norm": 1.7428443431854248, "learning_rate": 0.00019787243850527664, "loss": 1.0268, "step": 1593 }, { "epoch": 0.5117996468132927, "grad_norm": 1.0472004413604736, "learning_rate": 0.0001978652704312051, "loss": 0.8157, "step": 1594 }, { "epoch": 0.5121207256381441, "grad_norm": 1.6456936597824097, "learning_rate": 0.00019785809043254722, "loss": 0.9551, "step": 1595 }, { "epoch": 0.5124418044629957, "grad_norm": 1.1858004331588745, "learning_rate": 0.00019785089851017787, "loss": 0.9122, "step": 1596 }, { "epoch": 0.5127628832878471, "grad_norm": 1.2532726526260376, "learning_rate": 0.0001978436946649733, "loss": 0.8158, "step": 1597 }, { "epoch": 0.5130839621126987, "grad_norm": 0.9633054733276367, "learning_rate": 0.00019783647889781136, "loss": 0.7278, "step": 1598 }, { "epoch": 0.5134050409375501, "grad_norm": 1.5852664709091187, "learning_rate": 0.00019782925120957124, "loss": 0.8357, "step": 1599 }, { "epoch": 0.5137261197624017, "grad_norm": 0.9574192762374878, "learning_rate": 0.0001978220116011336, "loss": 0.5688, "step": 1600 }, { "epoch": 0.5140471985872531, "grad_norm": 1.5146691799163818, "learning_rate": 0.00019781476007338058, "loss": 1.3743, "step": 1601 }, { "epoch": 0.5143682774121047, "grad_norm": 1.5694231986999512, "learning_rate": 0.00019780749662719573, "loss": 1.6016, "step": 1602 }, { "epoch": 0.5146893562369562, "grad_norm": 1.2235115766525269, "learning_rate": 0.0001978002212634641, "loss": 1.0506, "step": 1603 }, { "epoch": 0.5150104350618077, "grad_norm": 1.2383902072906494, "learning_rate": 0.0001977929339830722, "loss": 0.7889, "step": 1604 }, { "epoch": 0.5153315138866592, "grad_norm": 1.5607134103775024, "learning_rate": 0.0001977856347869079, "loss": 0.9399, "step": 1605 }, { "epoch": 0.5156525927115106, "grad_norm": 1.2659744024276733, "learning_rate": 0.00019777832367586063, "loss": 0.7502, "step": 1606 }, { "epoch": 0.5159736715363622, "grad_norm": 1.645847201347351, "learning_rate": 0.00019777100065082118, "loss": 1.0366, "step": 1607 }, { "epoch": 0.5162947503612136, "grad_norm": 1.4110909700393677, "learning_rate": 0.00019776366571268192, "loss": 1.1166, "step": 1608 }, { "epoch": 0.5166158291860652, "grad_norm": 1.3182811737060547, "learning_rate": 0.00019775631886233654, "loss": 1.0907, "step": 1609 }, { "epoch": 0.5169369080109166, "grad_norm": 1.0148051977157593, "learning_rate": 0.0001977489601006802, "loss": 0.7937, "step": 1610 }, { "epoch": 0.5172579868357682, "grad_norm": 1.499527096748352, "learning_rate": 0.0001977415894286096, "loss": 0.8182, "step": 1611 }, { "epoch": 0.5175790656606197, "grad_norm": 1.6975308656692505, "learning_rate": 0.0001977342068470228, "loss": 0.9804, "step": 1612 }, { "epoch": 0.5179001444854712, "grad_norm": 1.4909874200820923, "learning_rate": 0.00019772681235681936, "loss": 1.088, "step": 1613 }, { "epoch": 0.5182212233103227, "grad_norm": 1.8054780960083008, "learning_rate": 0.00019771940595890027, "loss": 0.8292, "step": 1614 }, { "epoch": 0.5185423021351742, "grad_norm": 1.4584299325942993, "learning_rate": 0.000197711987654168, "loss": 1.0235, "step": 1615 }, { "epoch": 0.5188633809600257, "grad_norm": 1.4883002042770386, "learning_rate": 0.0001977045574435264, "loss": 0.9593, "step": 1616 }, { "epoch": 0.5191844597848772, "grad_norm": 1.0827285051345825, "learning_rate": 0.00019769711532788083, "loss": 0.9392, "step": 1617 }, { "epoch": 0.5195055386097287, "grad_norm": 1.358249306678772, "learning_rate": 0.0001976896613081381, "loss": 1.1373, "step": 1618 }, { "epoch": 0.5198266174345801, "grad_norm": 1.4473522901535034, "learning_rate": 0.0001976821953852065, "loss": 0.9981, "step": 1619 }, { "epoch": 0.5201476962594317, "grad_norm": 1.2284332513809204, "learning_rate": 0.0001976747175599957, "loss": 0.9899, "step": 1620 }, { "epoch": 0.5204687750842832, "grad_norm": 1.0846779346466064, "learning_rate": 0.0001976672278334168, "loss": 1.0516, "step": 1621 }, { "epoch": 0.5207898539091347, "grad_norm": 1.5218236446380615, "learning_rate": 0.00019765972620638248, "loss": 0.7433, "step": 1622 }, { "epoch": 0.5211109327339862, "grad_norm": 1.1767224073410034, "learning_rate": 0.00019765221267980675, "loss": 0.9726, "step": 1623 }, { "epoch": 0.5214320115588377, "grad_norm": 1.486958384513855, "learning_rate": 0.00019764468725460508, "loss": 0.8919, "step": 1624 }, { "epoch": 0.5217530903836892, "grad_norm": 1.7597538232803345, "learning_rate": 0.00019763714993169452, "loss": 0.8817, "step": 1625 }, { "epoch": 0.5220741692085407, "grad_norm": 1.96730637550354, "learning_rate": 0.00019762960071199333, "loss": 1.0347, "step": 1626 }, { "epoch": 0.5223952480333922, "grad_norm": 1.458665370941162, "learning_rate": 0.0001976220395964215, "loss": 0.9896, "step": 1627 }, { "epoch": 0.5227163268582437, "grad_norm": 1.2586889266967773, "learning_rate": 0.00019761446658590024, "loss": 0.9721, "step": 1628 }, { "epoch": 0.5230374056830952, "grad_norm": 1.374340295791626, "learning_rate": 0.00019760688168135232, "loss": 1.1315, "step": 1629 }, { "epoch": 0.5233584845079468, "grad_norm": 1.393900990486145, "learning_rate": 0.00019759928488370193, "loss": 1.0165, "step": 1630 }, { "epoch": 0.5236795633327982, "grad_norm": 1.0543795824050903, "learning_rate": 0.00019759167619387476, "loss": 0.8592, "step": 1631 }, { "epoch": 0.5240006421576497, "grad_norm": 0.9907365441322327, "learning_rate": 0.00019758405561279784, "loss": 1.095, "step": 1632 }, { "epoch": 0.5243217209825012, "grad_norm": 0.9849667549133301, "learning_rate": 0.00019757642314139977, "loss": 0.8316, "step": 1633 }, { "epoch": 0.5246427998073527, "grad_norm": 1.4609546661376953, "learning_rate": 0.00019756877878061052, "loss": 0.8168, "step": 1634 }, { "epoch": 0.5249638786322042, "grad_norm": 1.2530486583709717, "learning_rate": 0.0001975611225313615, "loss": 0.922, "step": 1635 }, { "epoch": 0.5252849574570557, "grad_norm": 1.1039314270019531, "learning_rate": 0.00019755345439458565, "loss": 0.9009, "step": 1636 }, { "epoch": 0.5256060362819072, "grad_norm": 1.1519142389297485, "learning_rate": 0.00019754577437121733, "loss": 1.0119, "step": 1637 }, { "epoch": 0.5259271151067587, "grad_norm": 1.89316725730896, "learning_rate": 0.00019753808246219224, "loss": 0.9892, "step": 1638 }, { "epoch": 0.5262481939316103, "grad_norm": 1.0566115379333496, "learning_rate": 0.00019753037866844771, "loss": 0.8139, "step": 1639 }, { "epoch": 0.5265692727564617, "grad_norm": 1.0403966903686523, "learning_rate": 0.00019752266299092236, "loss": 0.9116, "step": 1640 }, { "epoch": 0.5268903515813133, "grad_norm": 1.4061709642410278, "learning_rate": 0.00019751493543055632, "loss": 1.0763, "step": 1641 }, { "epoch": 0.5272114304061647, "grad_norm": 1.2321988344192505, "learning_rate": 0.00019750719598829122, "loss": 0.8309, "step": 1642 }, { "epoch": 0.5275325092310162, "grad_norm": 1.538996696472168, "learning_rate": 0.00019749944466507008, "loss": 1.1677, "step": 1643 }, { "epoch": 0.5278535880558677, "grad_norm": 1.1386488676071167, "learning_rate": 0.00019749168146183731, "loss": 0.9062, "step": 1644 }, { "epoch": 0.5281746668807192, "grad_norm": 1.7029637098312378, "learning_rate": 0.0001974839063795389, "loss": 0.898, "step": 1645 }, { "epoch": 0.5284957457055707, "grad_norm": 1.1630817651748657, "learning_rate": 0.0001974761194191222, "loss": 0.9107, "step": 1646 }, { "epoch": 0.5288168245304222, "grad_norm": 0.9628080725669861, "learning_rate": 0.00019746832058153602, "loss": 0.6659, "step": 1647 }, { "epoch": 0.5291379033552737, "grad_norm": 0.8993632197380066, "learning_rate": 0.0001974605098677306, "loss": 0.8283, "step": 1648 }, { "epoch": 0.5294589821801252, "grad_norm": 3.56905198097229, "learning_rate": 0.00019745268727865774, "loss": 0.6945, "step": 1649 }, { "epoch": 0.5297800610049768, "grad_norm": 1.1273267269134521, "learning_rate": 0.00019744485281527049, "loss": 0.5489, "step": 1650 }, { "epoch": 0.5301011398298282, "grad_norm": 1.4737544059753418, "learning_rate": 0.00019743700647852354, "loss": 1.4845, "step": 1651 }, { "epoch": 0.5304222186546798, "grad_norm": 1.4742531776428223, "learning_rate": 0.00019742914826937288, "loss": 1.327, "step": 1652 }, { "epoch": 0.5307432974795312, "grad_norm": 1.2791469097137451, "learning_rate": 0.00019742127818877606, "loss": 1.2235, "step": 1653 }, { "epoch": 0.5310643763043827, "grad_norm": 1.511093258857727, "learning_rate": 0.000197413396237692, "loss": 0.8866, "step": 1654 }, { "epoch": 0.5313854551292342, "grad_norm": 2.150474786758423, "learning_rate": 0.00019740550241708108, "loss": 0.9557, "step": 1655 }, { "epoch": 0.5317065339540857, "grad_norm": 1.3363398313522339, "learning_rate": 0.0001973975967279052, "loss": 0.6729, "step": 1656 }, { "epoch": 0.5320276127789372, "grad_norm": 1.4328546524047852, "learning_rate": 0.0001973896791711275, "loss": 0.6497, "step": 1657 }, { "epoch": 0.5323486916037887, "grad_norm": 1.1948676109313965, "learning_rate": 0.0001973817497477129, "loss": 0.6963, "step": 1658 }, { "epoch": 0.5326697704286403, "grad_norm": 1.3898074626922607, "learning_rate": 0.00019737380845862745, "loss": 1.0177, "step": 1659 }, { "epoch": 0.5329908492534917, "grad_norm": 1.250869631767273, "learning_rate": 0.0001973658553048388, "loss": 0.9914, "step": 1660 }, { "epoch": 0.5333119280783433, "grad_norm": 1.9206534624099731, "learning_rate": 0.00019735789028731604, "loss": 0.9322, "step": 1661 }, { "epoch": 0.5336330069031947, "grad_norm": 1.8212529420852661, "learning_rate": 0.00019734991340702966, "loss": 1.1671, "step": 1662 }, { "epoch": 0.5339540857280463, "grad_norm": 1.4000660181045532, "learning_rate": 0.00019734192466495162, "loss": 1.0022, "step": 1663 }, { "epoch": 0.5342751645528977, "grad_norm": 0.9797595143318176, "learning_rate": 0.0001973339240620553, "loss": 0.8335, "step": 1664 }, { "epoch": 0.5345962433777492, "grad_norm": 1.2020220756530762, "learning_rate": 0.0001973259115993156, "loss": 0.9649, "step": 1665 }, { "epoch": 0.5349173222026007, "grad_norm": 1.1489737033843994, "learning_rate": 0.00019731788727770885, "loss": 0.9856, "step": 1666 }, { "epoch": 0.5352384010274522, "grad_norm": 1.017920970916748, "learning_rate": 0.00019730985109821266, "loss": 0.899, "step": 1667 }, { "epoch": 0.5355594798523038, "grad_norm": 0.8471775054931641, "learning_rate": 0.0001973018030618063, "loss": 0.88, "step": 1668 }, { "epoch": 0.5358805586771552, "grad_norm": 1.4469013214111328, "learning_rate": 0.0001972937431694704, "loss": 1.0509, "step": 1669 }, { "epoch": 0.5362016375020068, "grad_norm": 1.3785357475280762, "learning_rate": 0.00019728567142218703, "loss": 1.0469, "step": 1670 }, { "epoch": 0.5365227163268582, "grad_norm": 1.0268610715866089, "learning_rate": 0.00019727758782093967, "loss": 1.0102, "step": 1671 }, { "epoch": 0.5368437951517098, "grad_norm": 1.2795389890670776, "learning_rate": 0.00019726949236671332, "loss": 1.0537, "step": 1672 }, { "epoch": 0.5371648739765612, "grad_norm": 3.1554412841796875, "learning_rate": 0.00019726138506049438, "loss": 0.9041, "step": 1673 }, { "epoch": 0.5374859528014128, "grad_norm": 1.8737775087356567, "learning_rate": 0.00019725326590327066, "loss": 1.0927, "step": 1674 }, { "epoch": 0.5378070316262642, "grad_norm": 1.8332324028015137, "learning_rate": 0.00019724513489603155, "loss": 1.0715, "step": 1675 }, { "epoch": 0.5381281104511157, "grad_norm": 1.0385593175888062, "learning_rate": 0.00019723699203976766, "loss": 0.8541, "step": 1676 }, { "epoch": 0.5384491892759673, "grad_norm": 1.0988675355911255, "learning_rate": 0.00019722883733547128, "loss": 0.7933, "step": 1677 }, { "epoch": 0.5387702681008187, "grad_norm": 1.472822666168213, "learning_rate": 0.00019722067078413599, "loss": 0.9275, "step": 1678 }, { "epoch": 0.5390913469256703, "grad_norm": 1.4651199579238892, "learning_rate": 0.00019721249238675688, "loss": 1.2088, "step": 1679 }, { "epoch": 0.5394124257505217, "grad_norm": 1.5309169292449951, "learning_rate": 0.00019720430214433042, "loss": 1.0605, "step": 1680 }, { "epoch": 0.5397335045753733, "grad_norm": 1.445311427116394, "learning_rate": 0.00019719610005785465, "loss": 1.0896, "step": 1681 }, { "epoch": 0.5400545834002247, "grad_norm": 1.6236193180084229, "learning_rate": 0.00019718788612832887, "loss": 1.0223, "step": 1682 }, { "epoch": 0.5403756622250763, "grad_norm": 1.451526165008545, "learning_rate": 0.00019717966035675397, "loss": 0.8866, "step": 1683 }, { "epoch": 0.5406967410499277, "grad_norm": 1.2759616374969482, "learning_rate": 0.00019717142274413223, "loss": 1.0889, "step": 1684 }, { "epoch": 0.5410178198747793, "grad_norm": 1.0535154342651367, "learning_rate": 0.0001971631732914674, "loss": 0.9059, "step": 1685 }, { "epoch": 0.5413388986996308, "grad_norm": 1.0445899963378906, "learning_rate": 0.0001971549119997646, "loss": 0.9921, "step": 1686 }, { "epoch": 0.5416599775244823, "grad_norm": 1.2191705703735352, "learning_rate": 0.00019714663887003054, "loss": 1.0312, "step": 1687 }, { "epoch": 0.5419810563493338, "grad_norm": 1.2853853702545166, "learning_rate": 0.00019713835390327316, "loss": 1.0743, "step": 1688 }, { "epoch": 0.5423021351741852, "grad_norm": 1.3771874904632568, "learning_rate": 0.000197130057100502, "loss": 1.033, "step": 1689 }, { "epoch": 0.5426232139990368, "grad_norm": 1.2575607299804688, "learning_rate": 0.00019712174846272805, "loss": 0.94, "step": 1690 }, { "epoch": 0.5429442928238882, "grad_norm": 1.1774635314941406, "learning_rate": 0.00019711342799096361, "loss": 0.9817, "step": 1691 }, { "epoch": 0.5432653716487398, "grad_norm": 1.3301347494125366, "learning_rate": 0.00019710509568622258, "loss": 1.0584, "step": 1692 }, { "epoch": 0.5435864504735912, "grad_norm": 1.1408923864364624, "learning_rate": 0.00019709675154952017, "loss": 0.9083, "step": 1693 }, { "epoch": 0.5439075292984428, "grad_norm": 1.0724045038223267, "learning_rate": 0.0001970883955818731, "loss": 0.7918, "step": 1694 }, { "epoch": 0.5442286081232943, "grad_norm": 0.9542874693870544, "learning_rate": 0.00019708002778429955, "loss": 0.9377, "step": 1695 }, { "epoch": 0.5445496869481458, "grad_norm": 1.1520448923110962, "learning_rate": 0.00019707164815781908, "loss": 0.6503, "step": 1696 }, { "epoch": 0.5448707657729973, "grad_norm": 1.0709972381591797, "learning_rate": 0.00019706325670345275, "loss": 0.905, "step": 1697 }, { "epoch": 0.5451918445978488, "grad_norm": 0.7395738959312439, "learning_rate": 0.000197054853422223, "loss": 0.6726, "step": 1698 }, { "epoch": 0.5455129234227003, "grad_norm": 1.2998228073120117, "learning_rate": 0.00019704643831515374, "loss": 0.8171, "step": 1699 }, { "epoch": 0.5458340022475517, "grad_norm": 1.2186784744262695, "learning_rate": 0.00019703801138327038, "loss": 0.6297, "step": 1700 }, { "epoch": 0.5461550810724033, "grad_norm": 1.2121597528457642, "learning_rate": 0.00019702957262759965, "loss": 1.4739, "step": 1701 }, { "epoch": 0.5464761598972547, "grad_norm": 1.483771800994873, "learning_rate": 0.00019702112204916984, "loss": 1.3041, "step": 1702 }, { "epoch": 0.5467972387221063, "grad_norm": 1.4804165363311768, "learning_rate": 0.0001970126596490106, "loss": 0.8875, "step": 1703 }, { "epoch": 0.5471183175469578, "grad_norm": 1.211633324623108, "learning_rate": 0.00019700418542815306, "loss": 0.8732, "step": 1704 }, { "epoch": 0.5474393963718093, "grad_norm": 1.4509685039520264, "learning_rate": 0.00019699569938762973, "loss": 0.7624, "step": 1705 }, { "epoch": 0.5477604751966608, "grad_norm": 1.4379287958145142, "learning_rate": 0.00019698720152847468, "loss": 0.6996, "step": 1706 }, { "epoch": 0.5480815540215123, "grad_norm": 1.7644935846328735, "learning_rate": 0.00019697869185172331, "loss": 1.1261, "step": 1707 }, { "epoch": 0.5484026328463638, "grad_norm": 1.4107887744903564, "learning_rate": 0.00019697017035841252, "loss": 1.1591, "step": 1708 }, { "epoch": 0.5487237116712153, "grad_norm": 1.2489043474197388, "learning_rate": 0.0001969616370495806, "loss": 0.8075, "step": 1709 }, { "epoch": 0.5490447904960668, "grad_norm": 1.4318604469299316, "learning_rate": 0.00019695309192626734, "loss": 0.9552, "step": 1710 }, { "epoch": 0.5493658693209182, "grad_norm": 1.043524146080017, "learning_rate": 0.0001969445349895139, "loss": 0.7759, "step": 1711 }, { "epoch": 0.5496869481457698, "grad_norm": 1.1414694786071777, "learning_rate": 0.00019693596624036292, "loss": 1.0484, "step": 1712 }, { "epoch": 0.5500080269706212, "grad_norm": 1.1262508630752563, "learning_rate": 0.00019692738567985853, "loss": 0.924, "step": 1713 }, { "epoch": 0.5503291057954728, "grad_norm": 1.4722039699554443, "learning_rate": 0.0001969187933090462, "loss": 0.7755, "step": 1714 }, { "epoch": 0.5506501846203243, "grad_norm": 1.0937949419021606, "learning_rate": 0.00019691018912897286, "loss": 0.8581, "step": 1715 }, { "epoch": 0.5509712634451758, "grad_norm": 1.4399571418762207, "learning_rate": 0.00019690157314068696, "loss": 0.9408, "step": 1716 }, { "epoch": 0.5512923422700273, "grad_norm": 1.3758063316345215, "learning_rate": 0.0001968929453452383, "loss": 1.0267, "step": 1717 }, { "epoch": 0.5516134210948788, "grad_norm": 1.2351738214492798, "learning_rate": 0.00019688430574367819, "loss": 1.1119, "step": 1718 }, { "epoch": 0.5519344999197303, "grad_norm": 0.9020370244979858, "learning_rate": 0.00019687565433705926, "loss": 0.8939, "step": 1719 }, { "epoch": 0.5522555787445818, "grad_norm": 1.680139183998108, "learning_rate": 0.00019686699112643572, "loss": 1.1864, "step": 1720 }, { "epoch": 0.5525766575694333, "grad_norm": 1.5821527242660522, "learning_rate": 0.0001968583161128631, "loss": 1.125, "step": 1721 }, { "epoch": 0.5528977363942847, "grad_norm": 1.2951277494430542, "learning_rate": 0.00019684962929739853, "loss": 1.0259, "step": 1722 }, { "epoch": 0.5532188152191363, "grad_norm": 1.182592749595642, "learning_rate": 0.00019684093068110038, "loss": 1.1474, "step": 1723 }, { "epoch": 0.5535398940439878, "grad_norm": 1.3489532470703125, "learning_rate": 0.00019683222026502858, "loss": 0.774, "step": 1724 }, { "epoch": 0.5538609728688393, "grad_norm": 1.1987816095352173, "learning_rate": 0.00019682349805024446, "loss": 1.1013, "step": 1725 }, { "epoch": 0.5541820516936908, "grad_norm": 1.347397804260254, "learning_rate": 0.0001968147640378108, "loss": 1.1095, "step": 1726 }, { "epoch": 0.5545031305185423, "grad_norm": 1.4341198205947876, "learning_rate": 0.00019680601822879182, "loss": 1.2611, "step": 1727 }, { "epoch": 0.5548242093433938, "grad_norm": 1.5567268133163452, "learning_rate": 0.00019679726062425316, "loss": 1.0499, "step": 1728 }, { "epoch": 0.5551452881682453, "grad_norm": 0.9341502785682678, "learning_rate": 0.00019678849122526187, "loss": 0.9607, "step": 1729 }, { "epoch": 0.5554663669930968, "grad_norm": 1.2237082719802856, "learning_rate": 0.00019677971003288655, "loss": 1.0517, "step": 1730 }, { "epoch": 0.5557874458179483, "grad_norm": 0.9689716696739197, "learning_rate": 0.00019677091704819715, "loss": 1.0166, "step": 1731 }, { "epoch": 0.5561085246427998, "grad_norm": 1.32294762134552, "learning_rate": 0.000196762112272265, "loss": 0.9483, "step": 1732 }, { "epoch": 0.5564296034676514, "grad_norm": 1.5692658424377441, "learning_rate": 0.00019675329570616298, "loss": 0.8919, "step": 1733 }, { "epoch": 0.5567506822925028, "grad_norm": 1.022190809249878, "learning_rate": 0.0001967444673509654, "loss": 0.7758, "step": 1734 }, { "epoch": 0.5570717611173543, "grad_norm": 0.9672703742980957, "learning_rate": 0.00019673562720774792, "loss": 0.9181, "step": 1735 }, { "epoch": 0.5573928399422058, "grad_norm": 1.6587204933166504, "learning_rate": 0.0001967267752775877, "loss": 0.8566, "step": 1736 }, { "epoch": 0.5577139187670573, "grad_norm": 1.575669765472412, "learning_rate": 0.0001967179115615633, "loss": 0.9348, "step": 1737 }, { "epoch": 0.5580349975919088, "grad_norm": 1.2742935419082642, "learning_rate": 0.00019670903606075474, "loss": 0.6976, "step": 1738 }, { "epoch": 0.5583560764167603, "grad_norm": 2.927837610244751, "learning_rate": 0.00019670014877624353, "loss": 0.9914, "step": 1739 }, { "epoch": 0.5586771552416118, "grad_norm": 1.1376343965530396, "learning_rate": 0.00019669124970911247, "loss": 0.6877, "step": 1740 }, { "epoch": 0.5589982340664633, "grad_norm": 1.5874276161193848, "learning_rate": 0.00019668233886044597, "loss": 1.0347, "step": 1741 }, { "epoch": 0.5593193128913149, "grad_norm": 1.3938146829605103, "learning_rate": 0.0001966734162313297, "loss": 0.8869, "step": 1742 }, { "epoch": 0.5596403917161663, "grad_norm": 2.021955728530884, "learning_rate": 0.00019666448182285094, "loss": 0.9766, "step": 1743 }, { "epoch": 0.5599614705410179, "grad_norm": 1.3332651853561401, "learning_rate": 0.00019665553563609825, "loss": 0.9548, "step": 1744 }, { "epoch": 0.5602825493658693, "grad_norm": 1.0142797231674194, "learning_rate": 0.00019664657767216176, "loss": 0.8287, "step": 1745 }, { "epoch": 0.5606036281907208, "grad_norm": 1.369992733001709, "learning_rate": 0.00019663760793213296, "loss": 1.0265, "step": 1746 }, { "epoch": 0.5609247070155723, "grad_norm": 1.5672804117202759, "learning_rate": 0.0001966286264171047, "loss": 0.7512, "step": 1747 }, { "epoch": 0.5612457858404238, "grad_norm": 1.7840815782546997, "learning_rate": 0.00019661963312817148, "loss": 0.985, "step": 1748 }, { "epoch": 0.5615668646652753, "grad_norm": 0.9597679972648621, "learning_rate": 0.00019661062806642903, "loss": 0.5063, "step": 1749 }, { "epoch": 0.5618879434901268, "grad_norm": 1.5060789585113525, "learning_rate": 0.00019660161123297458, "loss": 0.5703, "step": 1750 }, { "epoch": 0.5622090223149784, "grad_norm": 1.2468721866607666, "learning_rate": 0.00019659258262890683, "loss": 1.4691, "step": 1751 }, { "epoch": 0.5625301011398298, "grad_norm": 1.2596136331558228, "learning_rate": 0.00019658354225532589, "loss": 0.9999, "step": 1752 }, { "epoch": 0.5628511799646814, "grad_norm": 1.5096850395202637, "learning_rate": 0.00019657449011333328, "loss": 1.04, "step": 1753 }, { "epoch": 0.5631722587895328, "grad_norm": 1.6973052024841309, "learning_rate": 0.00019656542620403203, "loss": 0.8056, "step": 1754 }, { "epoch": 0.5634933376143844, "grad_norm": 1.484620213508606, "learning_rate": 0.00019655635052852647, "loss": 0.7742, "step": 1755 }, { "epoch": 0.5638144164392358, "grad_norm": 1.2450261116027832, "learning_rate": 0.0001965472630879225, "loss": 0.9274, "step": 1756 }, { "epoch": 0.5641354952640873, "grad_norm": 1.8217682838439941, "learning_rate": 0.0001965381638833274, "loss": 0.8686, "step": 1757 }, { "epoch": 0.5644565740889388, "grad_norm": 1.1573996543884277, "learning_rate": 0.00019652905291584984, "loss": 0.9089, "step": 1758 }, { "epoch": 0.5647776529137903, "grad_norm": 1.1910955905914307, "learning_rate": 0.0001965199301866, "loss": 1.1458, "step": 1759 }, { "epoch": 0.5650987317386419, "grad_norm": 1.2282229661941528, "learning_rate": 0.00019651079569668945, "loss": 0.9382, "step": 1760 }, { "epoch": 0.5654198105634933, "grad_norm": 1.6961151361465454, "learning_rate": 0.00019650164944723115, "loss": 1.1206, "step": 1761 }, { "epoch": 0.5657408893883449, "grad_norm": 1.1346982717514038, "learning_rate": 0.00019649249143933962, "loss": 0.9793, "step": 1762 }, { "epoch": 0.5660619682131963, "grad_norm": 1.3887813091278076, "learning_rate": 0.00019648332167413067, "loss": 0.9796, "step": 1763 }, { "epoch": 0.5663830470380479, "grad_norm": 0.7095258831977844, "learning_rate": 0.0001964741401527217, "loss": 0.7365, "step": 1764 }, { "epoch": 0.5667041258628993, "grad_norm": 1.4679083824157715, "learning_rate": 0.00019646494687623135, "loss": 0.8576, "step": 1765 }, { "epoch": 0.5670252046877509, "grad_norm": 1.2272289991378784, "learning_rate": 0.00019645574184577982, "loss": 0.9592, "step": 1766 }, { "epoch": 0.5673462835126023, "grad_norm": 0.9056873917579651, "learning_rate": 0.00019644652506248874, "loss": 0.9373, "step": 1767 }, { "epoch": 0.5676673623374539, "grad_norm": 1.2756919860839844, "learning_rate": 0.00019643729652748113, "loss": 0.9717, "step": 1768 }, { "epoch": 0.5679884411623053, "grad_norm": 1.2208895683288574, "learning_rate": 0.00019642805624188147, "loss": 1.0495, "step": 1769 }, { "epoch": 0.5683095199871568, "grad_norm": 1.9911493062973022, "learning_rate": 0.00019641880420681566, "loss": 1.0649, "step": 1770 }, { "epoch": 0.5686305988120084, "grad_norm": 1.385181188583374, "learning_rate": 0.00019640954042341103, "loss": 1.0059, "step": 1771 }, { "epoch": 0.5689516776368598, "grad_norm": 1.642540693283081, "learning_rate": 0.0001964002648927963, "loss": 0.924, "step": 1772 }, { "epoch": 0.5692727564617114, "grad_norm": 1.2365972995758057, "learning_rate": 0.00019639097761610174, "loss": 0.7404, "step": 1773 }, { "epoch": 0.5695938352865628, "grad_norm": 1.8943426609039307, "learning_rate": 0.00019638167859445895, "loss": 0.9449, "step": 1774 }, { "epoch": 0.5699149141114144, "grad_norm": 1.3321115970611572, "learning_rate": 0.000196372367829001, "loss": 0.9601, "step": 1775 }, { "epoch": 0.5702359929362658, "grad_norm": 1.592492938041687, "learning_rate": 0.0001963630453208623, "loss": 0.9763, "step": 1776 }, { "epoch": 0.5705570717611174, "grad_norm": 1.3213098049163818, "learning_rate": 0.00019635371107117888, "loss": 1.3418, "step": 1777 }, { "epoch": 0.5708781505859688, "grad_norm": 1.128093957901001, "learning_rate": 0.000196344365081088, "loss": 0.8828, "step": 1778 }, { "epoch": 0.5711992294108204, "grad_norm": 1.2909680604934692, "learning_rate": 0.0001963350073517285, "loss": 1.1179, "step": 1779 }, { "epoch": 0.5715203082356719, "grad_norm": 1.7713595628738403, "learning_rate": 0.00019632563788424053, "loss": 1.0826, "step": 1780 }, { "epoch": 0.5718413870605233, "grad_norm": 1.069727897644043, "learning_rate": 0.00019631625667976583, "loss": 0.659, "step": 1781 }, { "epoch": 0.5721624658853749, "grad_norm": 1.3080742359161377, "learning_rate": 0.00019630686373944738, "loss": 0.8356, "step": 1782 }, { "epoch": 0.5724835447102263, "grad_norm": 1.5017932653427124, "learning_rate": 0.0001962974590644297, "loss": 0.9553, "step": 1783 }, { "epoch": 0.5728046235350779, "grad_norm": 2.8345601558685303, "learning_rate": 0.00019628804265585877, "loss": 0.984, "step": 1784 }, { "epoch": 0.5731257023599293, "grad_norm": 0.9238687753677368, "learning_rate": 0.00019627861451488189, "loss": 0.8865, "step": 1785 }, { "epoch": 0.5734467811847809, "grad_norm": 1.3167964220046997, "learning_rate": 0.0001962691746426479, "loss": 0.8951, "step": 1786 }, { "epoch": 0.5737678600096323, "grad_norm": 0.8535014986991882, "learning_rate": 0.00019625972304030697, "loss": 0.7118, "step": 1787 }, { "epoch": 0.5740889388344839, "grad_norm": 1.6030869483947754, "learning_rate": 0.00019625025970901078, "loss": 0.9116, "step": 1788 }, { "epoch": 0.5744100176593354, "grad_norm": 1.7325750589370728, "learning_rate": 0.0001962407846499124, "loss": 0.9291, "step": 1789 }, { "epoch": 0.5747310964841869, "grad_norm": 1.3394780158996582, "learning_rate": 0.00019623129786416635, "loss": 0.9792, "step": 1790 }, { "epoch": 0.5750521753090384, "grad_norm": 1.3643033504486084, "learning_rate": 0.00019622179935292855, "loss": 0.8923, "step": 1791 }, { "epoch": 0.5753732541338898, "grad_norm": 1.2005494832992554, "learning_rate": 0.00019621228911735636, "loss": 1.0839, "step": 1792 }, { "epoch": 0.5756943329587414, "grad_norm": 1.9481810331344604, "learning_rate": 0.0001962027671586086, "loss": 1.0381, "step": 1793 }, { "epoch": 0.5760154117835928, "grad_norm": 1.1878541707992554, "learning_rate": 0.00019619323347784548, "loss": 1.005, "step": 1794 }, { "epoch": 0.5763364906084444, "grad_norm": 1.2770967483520508, "learning_rate": 0.00019618368807622862, "loss": 1.008, "step": 1795 }, { "epoch": 0.5766575694332958, "grad_norm": 0.9496734738349915, "learning_rate": 0.00019617413095492114, "loss": 0.7313, "step": 1796 }, { "epoch": 0.5769786482581474, "grad_norm": 1.6152045726776123, "learning_rate": 0.00019616456211508752, "loss": 0.9678, "step": 1797 }, { "epoch": 0.5772997270829989, "grad_norm": 1.1719512939453125, "learning_rate": 0.0001961549815578937, "loss": 0.735, "step": 1798 }, { "epoch": 0.5776208059078504, "grad_norm": 0.9691303372383118, "learning_rate": 0.0001961453892845071, "loss": 0.6029, "step": 1799 }, { "epoch": 0.5779418847327019, "grad_norm": 1.4163908958435059, "learning_rate": 0.0001961357852960964, "loss": 0.6984, "step": 1800 }, { "epoch": 0.5782629635575534, "grad_norm": 2.1804752349853516, "learning_rate": 0.0001961261695938319, "loss": 1.5497, "step": 1801 }, { "epoch": 0.5785840423824049, "grad_norm": 1.3419125080108643, "learning_rate": 0.0001961165421788852, "loss": 1.3835, "step": 1802 }, { "epoch": 0.5789051212072563, "grad_norm": 1.8257540464401245, "learning_rate": 0.0001961069030524294, "loss": 0.9603, "step": 1803 }, { "epoch": 0.5792262000321079, "grad_norm": 1.763144612312317, "learning_rate": 0.00019609725221563897, "loss": 0.9591, "step": 1804 }, { "epoch": 0.5795472788569593, "grad_norm": 1.1090623140335083, "learning_rate": 0.00019608758966968988, "loss": 0.8613, "step": 1805 }, { "epoch": 0.5798683576818109, "grad_norm": 1.6234127283096313, "learning_rate": 0.00019607791541575943, "loss": 0.8793, "step": 1806 }, { "epoch": 0.5801894365066624, "grad_norm": 1.1503936052322388, "learning_rate": 0.0001960682294550264, "loss": 0.6553, "step": 1807 }, { "epoch": 0.5805105153315139, "grad_norm": 1.3944035768508911, "learning_rate": 0.00019605853178867105, "loss": 0.682, "step": 1808 }, { "epoch": 0.5808315941563654, "grad_norm": 1.1830666065216064, "learning_rate": 0.00019604882241787498, "loss": 0.9909, "step": 1809 }, { "epoch": 0.5811526729812169, "grad_norm": 1.4636564254760742, "learning_rate": 0.00019603910134382123, "loss": 1.0726, "step": 1810 }, { "epoch": 0.5814737518060684, "grad_norm": 1.203645944595337, "learning_rate": 0.0001960293685676943, "loss": 0.9043, "step": 1811 }, { "epoch": 0.5817948306309199, "grad_norm": 1.30366051197052, "learning_rate": 0.00019601962409068012, "loss": 0.9162, "step": 1812 }, { "epoch": 0.5821159094557714, "grad_norm": 2.1224048137664795, "learning_rate": 0.000196009867913966, "loss": 1.1189, "step": 1813 }, { "epoch": 0.5824369882806228, "grad_norm": 0.9786831140518188, "learning_rate": 0.00019600010003874069, "loss": 0.8792, "step": 1814 }, { "epoch": 0.5827580671054744, "grad_norm": 1.0438469648361206, "learning_rate": 0.00019599032046619438, "loss": 1.1121, "step": 1815 }, { "epoch": 0.583079145930326, "grad_norm": 1.4787214994430542, "learning_rate": 0.0001959805291975187, "loss": 1.1414, "step": 1816 }, { "epoch": 0.5834002247551774, "grad_norm": 1.3728911876678467, "learning_rate": 0.00019597072623390668, "loss": 0.8714, "step": 1817 }, { "epoch": 0.5837213035800289, "grad_norm": 1.0536571741104126, "learning_rate": 0.00019596091157655278, "loss": 0.9525, "step": 1818 }, { "epoch": 0.5840423824048804, "grad_norm": 1.2520835399627686, "learning_rate": 0.0001959510852266529, "loss": 0.7777, "step": 1819 }, { "epoch": 0.5843634612297319, "grad_norm": 0.8757473826408386, "learning_rate": 0.0001959412471854043, "loss": 1.0104, "step": 1820 }, { "epoch": 0.5846845400545834, "grad_norm": 1.4845868349075317, "learning_rate": 0.00019593139745400576, "loss": 0.9476, "step": 1821 }, { "epoch": 0.5850056188794349, "grad_norm": 2.418626546859741, "learning_rate": 0.00019592153603365743, "loss": 1.0727, "step": 1822 }, { "epoch": 0.5853266977042864, "grad_norm": 1.0435357093811035, "learning_rate": 0.0001959116629255609, "loss": 1.0303, "step": 1823 }, { "epoch": 0.5856477765291379, "grad_norm": 1.3769514560699463, "learning_rate": 0.0001959017781309192, "loss": 1.0692, "step": 1824 }, { "epoch": 0.5859688553539895, "grad_norm": 1.0408692359924316, "learning_rate": 0.0001958918816509367, "loss": 0.903, "step": 1825 }, { "epoch": 0.5862899341788409, "grad_norm": 1.2975987195968628, "learning_rate": 0.0001958819734868193, "loss": 0.9121, "step": 1826 }, { "epoch": 0.5866110130036924, "grad_norm": 1.479382872581482, "learning_rate": 0.00019587205363977427, "loss": 0.9408, "step": 1827 }, { "epoch": 0.5869320918285439, "grad_norm": 1.200589656829834, "learning_rate": 0.00019586212211101037, "loss": 1.0283, "step": 1828 }, { "epoch": 0.5872531706533954, "grad_norm": 1.039357304573059, "learning_rate": 0.0001958521789017376, "loss": 0.8903, "step": 1829 }, { "epoch": 0.5875742494782469, "grad_norm": 1.5455893278121948, "learning_rate": 0.0001958422240131676, "loss": 1.0474, "step": 1830 }, { "epoch": 0.5878953283030984, "grad_norm": 1.092637538909912, "learning_rate": 0.00019583225744651333, "loss": 0.9873, "step": 1831 }, { "epoch": 0.5882164071279499, "grad_norm": 2.400148868560791, "learning_rate": 0.00019582227920298916, "loss": 1.2347, "step": 1832 }, { "epoch": 0.5885374859528014, "grad_norm": 1.1216742992401123, "learning_rate": 0.00019581228928381098, "loss": 1.0158, "step": 1833 }, { "epoch": 0.5888585647776529, "grad_norm": 1.167275309562683, "learning_rate": 0.00019580228769019593, "loss": 0.9061, "step": 1834 }, { "epoch": 0.5891796436025044, "grad_norm": 1.382562518119812, "learning_rate": 0.00019579227442336278, "loss": 1.216, "step": 1835 }, { "epoch": 0.589500722427356, "grad_norm": 1.2429423332214355, "learning_rate": 0.0001957822494845315, "loss": 0.9084, "step": 1836 }, { "epoch": 0.5898218012522074, "grad_norm": 1.1769795417785645, "learning_rate": 0.00019577221287492367, "loss": 0.9287, "step": 1837 }, { "epoch": 0.590142880077059, "grad_norm": 1.2153514623641968, "learning_rate": 0.00019576216459576222, "loss": 1.0391, "step": 1838 }, { "epoch": 0.5904639589019104, "grad_norm": 1.096490740776062, "learning_rate": 0.00019575210464827149, "loss": 0.9332, "step": 1839 }, { "epoch": 0.5907850377267619, "grad_norm": 1.9070253372192383, "learning_rate": 0.00019574203303367727, "loss": 0.859, "step": 1840 }, { "epoch": 0.5911061165516134, "grad_norm": 1.2111899852752686, "learning_rate": 0.00019573194975320673, "loss": 1.0296, "step": 1841 }, { "epoch": 0.5914271953764649, "grad_norm": 1.3868889808654785, "learning_rate": 0.00019572185480808848, "loss": 1.0413, "step": 1842 }, { "epoch": 0.5917482742013164, "grad_norm": 1.646368384361267, "learning_rate": 0.00019571174819955263, "loss": 0.9224, "step": 1843 }, { "epoch": 0.5920693530261679, "grad_norm": 1.1916215419769287, "learning_rate": 0.00019570162992883054, "loss": 0.9388, "step": 1844 }, { "epoch": 0.5923904318510195, "grad_norm": 1.1193400621414185, "learning_rate": 0.00019569149999715515, "loss": 0.8399, "step": 1845 }, { "epoch": 0.5927115106758709, "grad_norm": 1.161150574684143, "learning_rate": 0.00019568135840576076, "loss": 0.8907, "step": 1846 }, { "epoch": 0.5930325895007225, "grad_norm": 1.9916490316390991, "learning_rate": 0.00019567120515588308, "loss": 0.9989, "step": 1847 }, { "epoch": 0.5933536683255739, "grad_norm": 1.4825630187988281, "learning_rate": 0.0001956610402487592, "loss": 0.6702, "step": 1848 }, { "epoch": 0.5936747471504255, "grad_norm": 1.547219157218933, "learning_rate": 0.0001956508636856278, "loss": 0.6974, "step": 1849 }, { "epoch": 0.5939958259752769, "grad_norm": 0.9746522307395935, "learning_rate": 0.00019564067546772878, "loss": 0.499, "step": 1850 }, { "epoch": 0.5943169048001284, "grad_norm": 1.3846455812454224, "learning_rate": 0.00019563047559630357, "loss": 1.4075, "step": 1851 }, { "epoch": 0.5946379836249799, "grad_norm": 1.398285984992981, "learning_rate": 0.00019562026407259495, "loss": 0.8492, "step": 1852 }, { "epoch": 0.5949590624498314, "grad_norm": 3.702108860015869, "learning_rate": 0.00019561004089784723, "loss": 1.2278, "step": 1853 }, { "epoch": 0.595280141274683, "grad_norm": 1.2834221124649048, "learning_rate": 0.00019559980607330605, "loss": 0.7047, "step": 1854 }, { "epoch": 0.5956012200995344, "grad_norm": 1.3995305299758911, "learning_rate": 0.00019558955960021849, "loss": 0.7136, "step": 1855 }, { "epoch": 0.595922298924386, "grad_norm": 1.1465208530426025, "learning_rate": 0.00019557930147983302, "loss": 0.651, "step": 1856 }, { "epoch": 0.5962433777492374, "grad_norm": 1.3999861478805542, "learning_rate": 0.00019556903171339963, "loss": 0.7879, "step": 1857 }, { "epoch": 0.596564456574089, "grad_norm": 1.0851597785949707, "learning_rate": 0.00019555875030216954, "loss": 0.8221, "step": 1858 }, { "epoch": 0.5968855353989404, "grad_norm": 1.2108211517333984, "learning_rate": 0.00019554845724739566, "loss": 0.7746, "step": 1859 }, { "epoch": 0.597206614223792, "grad_norm": 1.5989631414413452, "learning_rate": 0.00019553815255033205, "loss": 1.028, "step": 1860 }, { "epoch": 0.5975276930486434, "grad_norm": 1.233514666557312, "learning_rate": 0.00019552783621223436, "loss": 0.6234, "step": 1861 }, { "epoch": 0.5978487718734949, "grad_norm": 0.9018953442573547, "learning_rate": 0.0001955175082343596, "loss": 0.824, "step": 1862 }, { "epoch": 0.5981698506983465, "grad_norm": 1.028957486152649, "learning_rate": 0.00019550716861796623, "loss": 0.9231, "step": 1863 }, { "epoch": 0.5984909295231979, "grad_norm": 1.2545114755630493, "learning_rate": 0.00019549681736431404, "loss": 0.8434, "step": 1864 }, { "epoch": 0.5988120083480495, "grad_norm": 1.197217583656311, "learning_rate": 0.00019548645447466431, "loss": 0.9775, "step": 1865 }, { "epoch": 0.5991330871729009, "grad_norm": 1.3030047416687012, "learning_rate": 0.00019547607995027978, "loss": 0.8244, "step": 1866 }, { "epoch": 0.5994541659977525, "grad_norm": 1.4685194492340088, "learning_rate": 0.00019546569379242444, "loss": 0.9339, "step": 1867 }, { "epoch": 0.5997752448226039, "grad_norm": 1.9783920049667358, "learning_rate": 0.00019545529600236398, "loss": 0.9437, "step": 1868 }, { "epoch": 0.6000963236474555, "grad_norm": 1.3374611139297485, "learning_rate": 0.00019544488658136523, "loss": 1.0672, "step": 1869 }, { "epoch": 0.6004174024723069, "grad_norm": 1.4614479541778564, "learning_rate": 0.0001954344655306965, "loss": 0.9309, "step": 1870 }, { "epoch": 0.6007384812971585, "grad_norm": 1.359053373336792, "learning_rate": 0.0001954240328516277, "loss": 1.0011, "step": 1871 }, { "epoch": 0.60105956012201, "grad_norm": 1.4997376203536987, "learning_rate": 0.00019541358854542991, "loss": 1.0507, "step": 1872 }, { "epoch": 0.6013806389468614, "grad_norm": 1.5839393138885498, "learning_rate": 0.0001954031326133758, "loss": 1.1981, "step": 1873 }, { "epoch": 0.601701717771713, "grad_norm": 1.4018974304199219, "learning_rate": 0.00019539266505673938, "loss": 1.0872, "step": 1874 }, { "epoch": 0.6020227965965644, "grad_norm": 1.35053551197052, "learning_rate": 0.00019538218587679605, "loss": 0.8378, "step": 1875 }, { "epoch": 0.602343875421416, "grad_norm": 0.7986770272254944, "learning_rate": 0.0001953716950748227, "loss": 0.6296, "step": 1876 }, { "epoch": 0.6026649542462674, "grad_norm": 1.5917302370071411, "learning_rate": 0.0001953611926520976, "loss": 1.0359, "step": 1877 }, { "epoch": 0.602986033071119, "grad_norm": 1.197108507156372, "learning_rate": 0.00019535067860990046, "loss": 1.1222, "step": 1878 }, { "epoch": 0.6033071118959704, "grad_norm": 1.6555898189544678, "learning_rate": 0.00019534015294951233, "loss": 1.0129, "step": 1879 }, { "epoch": 0.603628190720822, "grad_norm": 0.971400797367096, "learning_rate": 0.00019532961567221576, "loss": 0.9535, "step": 1880 }, { "epoch": 0.6039492695456735, "grad_norm": 1.5631428956985474, "learning_rate": 0.0001953190667792947, "loss": 1.2075, "step": 1881 }, { "epoch": 0.604270348370525, "grad_norm": 1.193997859954834, "learning_rate": 0.0001953085062720345, "loss": 0.9112, "step": 1882 }, { "epoch": 0.6045914271953765, "grad_norm": 1.317486047744751, "learning_rate": 0.00019529793415172192, "loss": 1.02, "step": 1883 }, { "epoch": 0.604912506020228, "grad_norm": 0.9995319247245789, "learning_rate": 0.00019528735041964509, "loss": 0.8154, "step": 1884 }, { "epoch": 0.6052335848450795, "grad_norm": 1.0479660034179688, "learning_rate": 0.00019527675507709366, "loss": 0.8572, "step": 1885 }, { "epoch": 0.6055546636699309, "grad_norm": 1.4373043775558472, "learning_rate": 0.00019526614812535864, "loss": 0.9219, "step": 1886 }, { "epoch": 0.6058757424947825, "grad_norm": 1.3879246711730957, "learning_rate": 0.00019525552956573244, "loss": 0.9512, "step": 1887 }, { "epoch": 0.6061968213196339, "grad_norm": 1.2492396831512451, "learning_rate": 0.0001952448993995089, "loss": 1.0225, "step": 1888 }, { "epoch": 0.6065179001444855, "grad_norm": 1.0773907899856567, "learning_rate": 0.00019523425762798329, "loss": 0.7924, "step": 1889 }, { "epoch": 0.6068389789693369, "grad_norm": 1.1691362857818604, "learning_rate": 0.00019522360425245226, "loss": 0.8436, "step": 1890 }, { "epoch": 0.6071600577941885, "grad_norm": 1.218471646308899, "learning_rate": 0.00019521293927421388, "loss": 1.0175, "step": 1891 }, { "epoch": 0.60748113661904, "grad_norm": 0.9042808413505554, "learning_rate": 0.00019520226269456768, "loss": 0.9039, "step": 1892 }, { "epoch": 0.6078022154438915, "grad_norm": 1.3849420547485352, "learning_rate": 0.00019519157451481454, "loss": 1.1497, "step": 1893 }, { "epoch": 0.608123294268743, "grad_norm": 1.3135268688201904, "learning_rate": 0.0001951808747362568, "loss": 0.8948, "step": 1894 }, { "epoch": 0.6084443730935944, "grad_norm": 0.94450443983078, "learning_rate": 0.0001951701633601982, "loss": 0.9394, "step": 1895 }, { "epoch": 0.608765451918446, "grad_norm": 1.5512750148773193, "learning_rate": 0.00019515944038794384, "loss": 0.9455, "step": 1896 }, { "epoch": 0.6090865307432974, "grad_norm": 1.3148894309997559, "learning_rate": 0.00019514870582080032, "loss": 0.8146, "step": 1897 }, { "epoch": 0.609407609568149, "grad_norm": 1.836193323135376, "learning_rate": 0.00019513795966007562, "loss": 0.5957, "step": 1898 }, { "epoch": 0.6097286883930004, "grad_norm": 1.1311513185501099, "learning_rate": 0.00019512720190707913, "loss": 0.9068, "step": 1899 }, { "epoch": 0.610049767217852, "grad_norm": 0.6564053893089294, "learning_rate": 0.00019511643256312164, "loss": 0.4583, "step": 1900 }, { "epoch": 0.6103708460427035, "grad_norm": 1.3620964288711548, "learning_rate": 0.00019510565162951537, "loss": 1.3662, "step": 1901 }, { "epoch": 0.610691924867555, "grad_norm": 1.2974669933319092, "learning_rate": 0.0001950948591075739, "loss": 1.2901, "step": 1902 }, { "epoch": 0.6110130036924065, "grad_norm": 1.4451632499694824, "learning_rate": 0.00019508405499861232, "loss": 0.9189, "step": 1903 }, { "epoch": 0.611334082517258, "grad_norm": 1.583996057510376, "learning_rate": 0.00019507323930394708, "loss": 0.9118, "step": 1904 }, { "epoch": 0.6116551613421095, "grad_norm": 1.4750548601150513, "learning_rate": 0.00019506241202489602, "loss": 0.7096, "step": 1905 }, { "epoch": 0.611976240166961, "grad_norm": 1.2632248401641846, "learning_rate": 0.00019505157316277837, "loss": 0.9424, "step": 1906 }, { "epoch": 0.6122973189918125, "grad_norm": 1.392075538635254, "learning_rate": 0.00019504072271891488, "loss": 0.9846, "step": 1907 }, { "epoch": 0.6126183978166639, "grad_norm": 1.0871883630752563, "learning_rate": 0.00019502986069462762, "loss": 0.9791, "step": 1908 }, { "epoch": 0.6129394766415155, "grad_norm": 1.2287681102752686, "learning_rate": 0.00019501898709124008, "loss": 1.0328, "step": 1909 }, { "epoch": 0.613260555466367, "grad_norm": 1.0907347202301025, "learning_rate": 0.00019500810191007718, "loss": 0.9497, "step": 1910 }, { "epoch": 0.6135816342912185, "grad_norm": 1.0309808254241943, "learning_rate": 0.00019499720515246525, "loss": 0.7238, "step": 1911 }, { "epoch": 0.61390271311607, "grad_norm": 1.0022937059402466, "learning_rate": 0.00019498629681973206, "loss": 0.8948, "step": 1912 }, { "epoch": 0.6142237919409215, "grad_norm": 1.440856695175171, "learning_rate": 0.00019497537691320668, "loss": 1.0733, "step": 1913 }, { "epoch": 0.614544870765773, "grad_norm": 1.5727375745773315, "learning_rate": 0.00019496444543421975, "loss": 1.0399, "step": 1914 }, { "epoch": 0.6148659495906245, "grad_norm": 1.1591044664382935, "learning_rate": 0.0001949535023841032, "loss": 0.9664, "step": 1915 }, { "epoch": 0.615187028415476, "grad_norm": 1.1390159130096436, "learning_rate": 0.0001949425477641904, "loss": 0.9239, "step": 1916 }, { "epoch": 0.6155081072403275, "grad_norm": 1.00704026222229, "learning_rate": 0.00019493158157581615, "loss": 0.9421, "step": 1917 }, { "epoch": 0.615829186065179, "grad_norm": 2.458767890930176, "learning_rate": 0.00019492060382031662, "loss": 1.1242, "step": 1918 }, { "epoch": 0.6161502648900306, "grad_norm": 1.4069280624389648, "learning_rate": 0.00019490961449902946, "loss": 1.0579, "step": 1919 }, { "epoch": 0.616471343714882, "grad_norm": 0.9519532322883606, "learning_rate": 0.00019489861361329366, "loss": 0.7231, "step": 1920 }, { "epoch": 0.6167924225397335, "grad_norm": 1.2751188278198242, "learning_rate": 0.00019488760116444966, "loss": 1.1183, "step": 1921 }, { "epoch": 0.617113501364585, "grad_norm": 1.6662013530731201, "learning_rate": 0.00019487657715383926, "loss": 1.1126, "step": 1922 }, { "epoch": 0.6174345801894365, "grad_norm": 1.7743055820465088, "learning_rate": 0.00019486554158280574, "loss": 0.945, "step": 1923 }, { "epoch": 0.617755659014288, "grad_norm": 1.3609472513198853, "learning_rate": 0.00019485449445269377, "loss": 0.9343, "step": 1924 }, { "epoch": 0.6180767378391395, "grad_norm": 2.255979061126709, "learning_rate": 0.00019484343576484933, "loss": 0.9, "step": 1925 }, { "epoch": 0.618397816663991, "grad_norm": 1.3037607669830322, "learning_rate": 0.00019483236552061994, "loss": 1.059, "step": 1926 }, { "epoch": 0.6187188954888425, "grad_norm": 1.691819190979004, "learning_rate": 0.00019482128372135446, "loss": 1.0162, "step": 1927 }, { "epoch": 0.6190399743136941, "grad_norm": 1.8937206268310547, "learning_rate": 0.0001948101903684032, "loss": 0.9981, "step": 1928 }, { "epoch": 0.6193610531385455, "grad_norm": 1.4838215112686157, "learning_rate": 0.00019479908546311781, "loss": 1.0377, "step": 1929 }, { "epoch": 0.619682131963397, "grad_norm": 1.2339526414871216, "learning_rate": 0.00019478796900685146, "loss": 0.911, "step": 1930 }, { "epoch": 0.6200032107882485, "grad_norm": 1.4416377544403076, "learning_rate": 0.0001947768410009586, "loss": 0.9814, "step": 1931 }, { "epoch": 0.6203242896131, "grad_norm": 1.8542779684066772, "learning_rate": 0.0001947657014467951, "loss": 1.0792, "step": 1932 }, { "epoch": 0.6206453684379515, "grad_norm": 1.3977333307266235, "learning_rate": 0.00019475455034571838, "loss": 0.9371, "step": 1933 }, { "epoch": 0.620966447262803, "grad_norm": 1.1695326566696167, "learning_rate": 0.0001947433876990871, "loss": 0.9352, "step": 1934 }, { "epoch": 0.6212875260876545, "grad_norm": 1.1949793100357056, "learning_rate": 0.00019473221350826142, "loss": 0.8381, "step": 1935 }, { "epoch": 0.621608604912506, "grad_norm": 1.197929859161377, "learning_rate": 0.0001947210277746029, "loss": 1.0588, "step": 1936 }, { "epoch": 0.6219296837373576, "grad_norm": 0.9528883695602417, "learning_rate": 0.00019470983049947444, "loss": 0.7636, "step": 1937 }, { "epoch": 0.622250762562209, "grad_norm": 0.9562763571739197, "learning_rate": 0.00019469862168424042, "loss": 0.9649, "step": 1938 }, { "epoch": 0.6225718413870606, "grad_norm": 1.5887722969055176, "learning_rate": 0.0001946874013302666, "loss": 0.9054, "step": 1939 }, { "epoch": 0.622892920211912, "grad_norm": 1.0492193698883057, "learning_rate": 0.0001946761694389202, "loss": 0.9068, "step": 1940 }, { "epoch": 0.6232139990367636, "grad_norm": 0.9907318353652954, "learning_rate": 0.00019466492601156966, "loss": 0.8565, "step": 1941 }, { "epoch": 0.623535077861615, "grad_norm": 1.7006665468215942, "learning_rate": 0.00019465367104958505, "loss": 1.0636, "step": 1942 }, { "epoch": 0.6238561566864665, "grad_norm": 1.3816338777542114, "learning_rate": 0.00019464240455433775, "loss": 0.9676, "step": 1943 }, { "epoch": 0.624177235511318, "grad_norm": 1.1604748964309692, "learning_rate": 0.00019463112652720054, "loss": 0.8379, "step": 1944 }, { "epoch": 0.6244983143361695, "grad_norm": 5.007503509521484, "learning_rate": 0.00019461983696954758, "loss": 1.1482, "step": 1945 }, { "epoch": 0.6248193931610211, "grad_norm": 1.0724717378616333, "learning_rate": 0.00019460853588275454, "loss": 0.7847, "step": 1946 }, { "epoch": 0.6251404719858725, "grad_norm": 1.2051833868026733, "learning_rate": 0.00019459722326819838, "loss": 0.9865, "step": 1947 }, { "epoch": 0.6254615508107241, "grad_norm": 1.1510910987854004, "learning_rate": 0.00019458589912725748, "loss": 0.689, "step": 1948 }, { "epoch": 0.6257826296355755, "grad_norm": 0.7677122354507446, "learning_rate": 0.0001945745634613117, "loss": 0.7075, "step": 1949 }, { "epoch": 0.6261037084604271, "grad_norm": 1.6661738157272339, "learning_rate": 0.00019456321627174221, "loss": 0.6606, "step": 1950 }, { "epoch": 0.6264247872852785, "grad_norm": 1.3030189275741577, "learning_rate": 0.0001945518575599317, "loss": 1.3519, "step": 1951 }, { "epoch": 0.6267458661101301, "grad_norm": 1.27639901638031, "learning_rate": 0.00019454048732726412, "loss": 1.4878, "step": 1952 }, { "epoch": 0.6270669449349815, "grad_norm": 1.197721004486084, "learning_rate": 0.00019452910557512496, "loss": 1.0872, "step": 1953 }, { "epoch": 0.627388023759833, "grad_norm": 1.3855386972427368, "learning_rate": 0.000194517712304901, "loss": 0.8866, "step": 1954 }, { "epoch": 0.6277091025846845, "grad_norm": 1.5599814653396606, "learning_rate": 0.00019450630751798048, "loss": 0.8322, "step": 1955 }, { "epoch": 0.628030181409536, "grad_norm": 1.247623324394226, "learning_rate": 0.0001944948912157531, "loss": 0.4998, "step": 1956 }, { "epoch": 0.6283512602343876, "grad_norm": 1.6704400777816772, "learning_rate": 0.00019448346339960982, "loss": 0.739, "step": 1957 }, { "epoch": 0.628672339059239, "grad_norm": 1.241674542427063, "learning_rate": 0.00019447202407094316, "loss": 0.7236, "step": 1958 }, { "epoch": 0.6289934178840906, "grad_norm": 1.0023155212402344, "learning_rate": 0.0001944605732311469, "loss": 0.8875, "step": 1959 }, { "epoch": 0.629314496708942, "grad_norm": 1.710057020187378, "learning_rate": 0.00019444911088161636, "loss": 1.1843, "step": 1960 }, { "epoch": 0.6296355755337936, "grad_norm": 1.0903862714767456, "learning_rate": 0.00019443763702374812, "loss": 0.9194, "step": 1961 }, { "epoch": 0.629956654358645, "grad_norm": 0.8305079340934753, "learning_rate": 0.00019442615165894027, "loss": 0.9536, "step": 1962 }, { "epoch": 0.6302777331834966, "grad_norm": 13.161450386047363, "learning_rate": 0.00019441465478859228, "loss": 0.9715, "step": 1963 }, { "epoch": 0.630598812008348, "grad_norm": 1.4573770761489868, "learning_rate": 0.000194403146414105, "loss": 1.056, "step": 1964 }, { "epoch": 0.6309198908331995, "grad_norm": 2.9237208366394043, "learning_rate": 0.00019439162653688065, "loss": 0.8055, "step": 1965 }, { "epoch": 0.6312409696580511, "grad_norm": 1.0383076667785645, "learning_rate": 0.00019438009515832297, "loss": 0.938, "step": 1966 }, { "epoch": 0.6315620484829025, "grad_norm": 1.4683743715286255, "learning_rate": 0.00019436855227983695, "loss": 1.0683, "step": 1967 }, { "epoch": 0.6318831273077541, "grad_norm": 1.3468014001846313, "learning_rate": 0.00019435699790282908, "loss": 1.1329, "step": 1968 }, { "epoch": 0.6322042061326055, "grad_norm": 1.5651507377624512, "learning_rate": 0.00019434543202870725, "loss": 0.8679, "step": 1969 }, { "epoch": 0.6325252849574571, "grad_norm": 1.5266989469528198, "learning_rate": 0.0001943338546588807, "loss": 1.2249, "step": 1970 }, { "epoch": 0.6328463637823085, "grad_norm": 3.1390273571014404, "learning_rate": 0.0001943222657947601, "loss": 1.2727, "step": 1971 }, { "epoch": 0.6331674426071601, "grad_norm": 1.983047366142273, "learning_rate": 0.00019431066543775752, "loss": 0.8392, "step": 1972 }, { "epoch": 0.6334885214320115, "grad_norm": 1.197851538658142, "learning_rate": 0.00019429905358928646, "loss": 1.0931, "step": 1973 }, { "epoch": 0.6338096002568631, "grad_norm": 1.3977620601654053, "learning_rate": 0.00019428743025076174, "loss": 1.1318, "step": 1974 }, { "epoch": 0.6341306790817146, "grad_norm": 1.217929720878601, "learning_rate": 0.00019427579542359965, "loss": 1.1208, "step": 1975 }, { "epoch": 0.634451757906566, "grad_norm": 1.4628900289535522, "learning_rate": 0.00019426414910921787, "loss": 1.0285, "step": 1976 }, { "epoch": 0.6347728367314176, "grad_norm": 1.353716254234314, "learning_rate": 0.00019425249130903543, "loss": 0.9842, "step": 1977 }, { "epoch": 0.635093915556269, "grad_norm": 1.2540056705474854, "learning_rate": 0.00019424082202447283, "loss": 0.9581, "step": 1978 }, { "epoch": 0.6354149943811206, "grad_norm": 1.8885056972503662, "learning_rate": 0.0001942291412569519, "loss": 0.9603, "step": 1979 }, { "epoch": 0.635736073205972, "grad_norm": 1.4361056089401245, "learning_rate": 0.00019421744900789597, "loss": 0.9598, "step": 1980 }, { "epoch": 0.6360571520308236, "grad_norm": 0.9510266780853271, "learning_rate": 0.00019420574527872968, "loss": 0.8859, "step": 1981 }, { "epoch": 0.636378230855675, "grad_norm": 1.5164238214492798, "learning_rate": 0.00019419403007087907, "loss": 1.045, "step": 1982 }, { "epoch": 0.6366993096805266, "grad_norm": 1.3843255043029785, "learning_rate": 0.0001941823033857716, "loss": 0.9721, "step": 1983 }, { "epoch": 0.6370203885053781, "grad_norm": 1.692832112312317, "learning_rate": 0.0001941705652248362, "loss": 0.8765, "step": 1984 }, { "epoch": 0.6373414673302296, "grad_norm": 1.3866074085235596, "learning_rate": 0.00019415881558950302, "loss": 1.0165, "step": 1985 }, { "epoch": 0.6376625461550811, "grad_norm": 0.9165050983428955, "learning_rate": 0.0001941470544812038, "loss": 1.0291, "step": 1986 }, { "epoch": 0.6379836249799326, "grad_norm": 0.8897827863693237, "learning_rate": 0.0001941352819013716, "loss": 0.7903, "step": 1987 }, { "epoch": 0.6383047038047841, "grad_norm": 1.3819725513458252, "learning_rate": 0.00019412349785144078, "loss": 1.0681, "step": 1988 }, { "epoch": 0.6386257826296355, "grad_norm": 1.1194961071014404, "learning_rate": 0.00019411170233284727, "loss": 0.8758, "step": 1989 }, { "epoch": 0.6389468614544871, "grad_norm": 0.9944826364517212, "learning_rate": 0.00019409989534702833, "loss": 0.8176, "step": 1990 }, { "epoch": 0.6392679402793385, "grad_norm": 1.1443473100662231, "learning_rate": 0.00019408807689542257, "loss": 1.0307, "step": 1991 }, { "epoch": 0.6395890191041901, "grad_norm": 0.8666215538978577, "learning_rate": 0.00019407624697947003, "loss": 0.8433, "step": 1992 }, { "epoch": 0.6399100979290416, "grad_norm": 1.3199023008346558, "learning_rate": 0.00019406440560061216, "loss": 1.0904, "step": 1993 }, { "epoch": 0.6402311767538931, "grad_norm": 0.9692159295082092, "learning_rate": 0.0001940525527602918, "loss": 0.8195, "step": 1994 }, { "epoch": 0.6405522555787446, "grad_norm": 1.1354212760925293, "learning_rate": 0.00019404068845995317, "loss": 0.8017, "step": 1995 }, { "epoch": 0.6408733344035961, "grad_norm": 1.7543773651123047, "learning_rate": 0.0001940288127010419, "loss": 0.8184, "step": 1996 }, { "epoch": 0.6411944132284476, "grad_norm": 1.025387167930603, "learning_rate": 0.00019401692548500502, "loss": 0.7647, "step": 1997 }, { "epoch": 0.641515492053299, "grad_norm": 0.7941446304321289, "learning_rate": 0.00019400502681329098, "loss": 0.7321, "step": 1998 }, { "epoch": 0.6418365708781506, "grad_norm": 1.0787677764892578, "learning_rate": 0.00019399311668734956, "loss": 0.6183, "step": 1999 }, { "epoch": 0.642157649703002, "grad_norm": 0.9851073026657104, "learning_rate": 0.00019398119510863197, "loss": 0.6669, "step": 2000 }, { "epoch": 0.6424787285278536, "grad_norm": 1.7059520483016968, "learning_rate": 0.00019396926207859084, "loss": 1.4499, "step": 2001 }, { "epoch": 0.6427998073527051, "grad_norm": 1.7769484519958496, "learning_rate": 0.00019395731759868018, "loss": 1.2898, "step": 2002 }, { "epoch": 0.6431208861775566, "grad_norm": 1.4100226163864136, "learning_rate": 0.00019394536167035534, "loss": 0.8129, "step": 2003 }, { "epoch": 0.6434419650024081, "grad_norm": 1.3856101036071777, "learning_rate": 0.00019393339429507318, "loss": 0.6282, "step": 2004 }, { "epoch": 0.6437630438272596, "grad_norm": 1.2996238470077515, "learning_rate": 0.00019392141547429183, "loss": 0.7415, "step": 2005 }, { "epoch": 0.6440841226521111, "grad_norm": 1.237810730934143, "learning_rate": 0.0001939094252094709, "loss": 0.5929, "step": 2006 }, { "epoch": 0.6444052014769626, "grad_norm": 1.1693778038024902, "learning_rate": 0.00019389742350207141, "loss": 0.8008, "step": 2007 }, { "epoch": 0.6447262803018141, "grad_norm": 1.4258911609649658, "learning_rate": 0.00019388541035355564, "loss": 1.2064, "step": 2008 }, { "epoch": 0.6450473591266656, "grad_norm": 1.4066739082336426, "learning_rate": 0.00019387338576538744, "loss": 1.0647, "step": 2009 }, { "epoch": 0.6453684379515171, "grad_norm": 1.1812559366226196, "learning_rate": 0.0001938613497390319, "loss": 0.9243, "step": 2010 }, { "epoch": 0.6456895167763685, "grad_norm": 5.421741962432861, "learning_rate": 0.0001938493022759556, "loss": 0.9313, "step": 2011 }, { "epoch": 0.6460105956012201, "grad_norm": 1.2386350631713867, "learning_rate": 0.0001938372433776265, "loss": 0.6525, "step": 2012 }, { "epoch": 0.6463316744260716, "grad_norm": 1.1228787899017334, "learning_rate": 0.00019382517304551396, "loss": 0.9062, "step": 2013 }, { "epoch": 0.6466527532509231, "grad_norm": 0.9291990399360657, "learning_rate": 0.00019381309128108865, "loss": 1.1112, "step": 2014 }, { "epoch": 0.6469738320757746, "grad_norm": 0.8409721851348877, "learning_rate": 0.00019380099808582278, "loss": 1.0773, "step": 2015 }, { "epoch": 0.6472949109006261, "grad_norm": 1.0756123065948486, "learning_rate": 0.0001937888934611898, "loss": 0.9227, "step": 2016 }, { "epoch": 0.6476159897254776, "grad_norm": 1.1562964916229248, "learning_rate": 0.0001937767774086646, "loss": 0.9476, "step": 2017 }, { "epoch": 0.6479370685503291, "grad_norm": 1.2570315599441528, "learning_rate": 0.00019376464992972356, "loss": 0.8856, "step": 2018 }, { "epoch": 0.6482581473751806, "grad_norm": 1.1748182773590088, "learning_rate": 0.0001937525110258444, "loss": 0.8097, "step": 2019 }, { "epoch": 0.648579226200032, "grad_norm": 0.8975158333778381, "learning_rate": 0.00019374036069850608, "loss": 0.9826, "step": 2020 }, { "epoch": 0.6489003050248836, "grad_norm": 1.5920442342758179, "learning_rate": 0.00019372819894918915, "loss": 1.1392, "step": 2021 }, { "epoch": 0.6492213838497352, "grad_norm": 0.9668254256248474, "learning_rate": 0.00019371602577937554, "loss": 0.8112, "step": 2022 }, { "epoch": 0.6495424626745866, "grad_norm": 1.265324592590332, "learning_rate": 0.0001937038411905484, "loss": 0.9334, "step": 2023 }, { "epoch": 0.6498635414994381, "grad_norm": 1.3954192399978638, "learning_rate": 0.0001936916451841925, "loss": 0.9908, "step": 2024 }, { "epoch": 0.6501846203242896, "grad_norm": 1.23148775100708, "learning_rate": 0.0001936794377617938, "loss": 1.0773, "step": 2025 }, { "epoch": 0.6505056991491411, "grad_norm": 1.0222960710525513, "learning_rate": 0.00019366721892483978, "loss": 0.8985, "step": 2026 }, { "epoch": 0.6508267779739926, "grad_norm": 0.9243746995925903, "learning_rate": 0.00019365498867481923, "loss": 0.6689, "step": 2027 }, { "epoch": 0.6511478567988441, "grad_norm": 0.9078730344772339, "learning_rate": 0.00019364274701322244, "loss": 0.7267, "step": 2028 }, { "epoch": 0.6514689356236956, "grad_norm": 1.5736430883407593, "learning_rate": 0.00019363049394154094, "loss": 0.994, "step": 2029 }, { "epoch": 0.6517900144485471, "grad_norm": 0.8982104659080505, "learning_rate": 0.0001936182294612678, "loss": 0.838, "step": 2030 }, { "epoch": 0.6521110932733987, "grad_norm": 1.2022709846496582, "learning_rate": 0.00019360595357389735, "loss": 1.2414, "step": 2031 }, { "epoch": 0.6524321720982501, "grad_norm": 0.9198827147483826, "learning_rate": 0.00019359366628092539, "loss": 0.7564, "step": 2032 }, { "epoch": 0.6527532509231017, "grad_norm": 1.3141475915908813, "learning_rate": 0.00019358136758384912, "loss": 1.3289, "step": 2033 }, { "epoch": 0.6530743297479531, "grad_norm": 1.4782156944274902, "learning_rate": 0.00019356905748416702, "loss": 0.9758, "step": 2034 }, { "epoch": 0.6533954085728046, "grad_norm": 1.2020496129989624, "learning_rate": 0.00019355673598337914, "loss": 0.8308, "step": 2035 }, { "epoch": 0.6537164873976561, "grad_norm": 1.2560274600982666, "learning_rate": 0.00019354440308298675, "loss": 0.7407, "step": 2036 }, { "epoch": 0.6540375662225076, "grad_norm": 0.8934011459350586, "learning_rate": 0.00019353205878449258, "loss": 0.865, "step": 2037 }, { "epoch": 0.6543586450473591, "grad_norm": 0.9289383888244629, "learning_rate": 0.0001935197030894008, "loss": 0.7145, "step": 2038 }, { "epoch": 0.6546797238722106, "grad_norm": 1.2418136596679688, "learning_rate": 0.00019350733599921683, "loss": 0.8111, "step": 2039 }, { "epoch": 0.6550008026970622, "grad_norm": 0.9828119277954102, "learning_rate": 0.00019349495751544763, "loss": 0.9877, "step": 2040 }, { "epoch": 0.6553218815219136, "grad_norm": 0.8384512662887573, "learning_rate": 0.00019348256763960145, "loss": 0.7265, "step": 2041 }, { "epoch": 0.6556429603467652, "grad_norm": 1.1183538436889648, "learning_rate": 0.000193470166373188, "loss": 0.8691, "step": 2042 }, { "epoch": 0.6559640391716166, "grad_norm": 1.0585153102874756, "learning_rate": 0.00019345775371771824, "loss": 0.9087, "step": 2043 }, { "epoch": 0.6562851179964682, "grad_norm": 1.3051196336746216, "learning_rate": 0.0001934453296747047, "loss": 0.8324, "step": 2044 }, { "epoch": 0.6566061968213196, "grad_norm": 1.0037448406219482, "learning_rate": 0.00019343289424566122, "loss": 0.7462, "step": 2045 }, { "epoch": 0.6569272756461711, "grad_norm": 1.1593952178955078, "learning_rate": 0.00019342044743210295, "loss": 0.8161, "step": 2046 }, { "epoch": 0.6572483544710226, "grad_norm": 0.9699090123176575, "learning_rate": 0.00019340798923554657, "loss": 0.8817, "step": 2047 }, { "epoch": 0.6575694332958741, "grad_norm": 2.443246841430664, "learning_rate": 0.00019339551965751002, "loss": 0.888, "step": 2048 }, { "epoch": 0.6578905121207257, "grad_norm": 0.9894551634788513, "learning_rate": 0.00019338303869951269, "loss": 0.6925, "step": 2049 }, { "epoch": 0.6582115909455771, "grad_norm": 1.5795120000839233, "learning_rate": 0.00019337054636307536, "loss": 0.6042, "step": 2050 }, { "epoch": 0.6585326697704287, "grad_norm": 1.1168063879013062, "learning_rate": 0.00019335804264972018, "loss": 1.449, "step": 2051 }, { "epoch": 0.6588537485952801, "grad_norm": 1.1965980529785156, "learning_rate": 0.0001933455275609707, "loss": 1.1259, "step": 2052 }, { "epoch": 0.6591748274201317, "grad_norm": 1.1799712181091309, "learning_rate": 0.0001933330010983518, "loss": 1.0329, "step": 2053 }, { "epoch": 0.6594959062449831, "grad_norm": 1.4881231784820557, "learning_rate": 0.00019332046326338986, "loss": 0.8752, "step": 2054 }, { "epoch": 0.6598169850698347, "grad_norm": 1.203697919845581, "learning_rate": 0.00019330791405761252, "loss": 0.7875, "step": 2055 }, { "epoch": 0.6601380638946861, "grad_norm": 1.563508152961731, "learning_rate": 0.00019329535348254893, "loss": 0.907, "step": 2056 }, { "epoch": 0.6604591427195377, "grad_norm": 1.3101892471313477, "learning_rate": 0.00019328278153972947, "loss": 0.9096, "step": 2057 }, { "epoch": 0.6607802215443892, "grad_norm": 1.4064226150512695, "learning_rate": 0.00019327019823068604, "loss": 1.0788, "step": 2058 }, { "epoch": 0.6611013003692406, "grad_norm": 1.2184844017028809, "learning_rate": 0.00019325760355695188, "loss": 0.9344, "step": 2059 }, { "epoch": 0.6614223791940922, "grad_norm": 1.2525115013122559, "learning_rate": 0.0001932449975200616, "loss": 0.9809, "step": 2060 }, { "epoch": 0.6617434580189436, "grad_norm": 1.2751420736312866, "learning_rate": 0.00019323238012155123, "loss": 0.8502, "step": 2061 }, { "epoch": 0.6620645368437952, "grad_norm": 1.271164059638977, "learning_rate": 0.00019321975136295813, "loss": 0.9252, "step": 2062 }, { "epoch": 0.6623856156686466, "grad_norm": 0.9167636632919312, "learning_rate": 0.0001932071112458211, "loss": 0.9382, "step": 2063 }, { "epoch": 0.6627066944934982, "grad_norm": 1.027632474899292, "learning_rate": 0.0001931944597716803, "loss": 0.941, "step": 2064 }, { "epoch": 0.6630277733183496, "grad_norm": 0.8215566873550415, "learning_rate": 0.00019318179694207725, "loss": 0.8984, "step": 2065 }, { "epoch": 0.6633488521432012, "grad_norm": 1.2999035120010376, "learning_rate": 0.0001931691227585549, "loss": 0.8938, "step": 2066 }, { "epoch": 0.6636699309680526, "grad_norm": 0.8892626762390137, "learning_rate": 0.00019315643722265757, "loss": 0.711, "step": 2067 }, { "epoch": 0.6639910097929042, "grad_norm": 1.4477237462997437, "learning_rate": 0.0001931437403359309, "loss": 1.0521, "step": 2068 }, { "epoch": 0.6643120886177557, "grad_norm": 1.3851264715194702, "learning_rate": 0.00019313103209992204, "loss": 1.0395, "step": 2069 }, { "epoch": 0.6646331674426071, "grad_norm": 1.1687705516815186, "learning_rate": 0.0001931183125161794, "loss": 0.8819, "step": 2070 }, { "epoch": 0.6649542462674587, "grad_norm": 1.4307971000671387, "learning_rate": 0.00019310558158625285, "loss": 1.012, "step": 2071 }, { "epoch": 0.6652753250923101, "grad_norm": 1.1840988397598267, "learning_rate": 0.00019309283931169356, "loss": 1.034, "step": 2072 }, { "epoch": 0.6655964039171617, "grad_norm": 1.1295652389526367, "learning_rate": 0.00019308008569405422, "loss": 0.968, "step": 2073 }, { "epoch": 0.6659174827420131, "grad_norm": 1.238215684890747, "learning_rate": 0.0001930673207348888, "loss": 0.9026, "step": 2074 }, { "epoch": 0.6662385615668647, "grad_norm": 1.0732163190841675, "learning_rate": 0.00019305454443575262, "loss": 1.0993, "step": 2075 }, { "epoch": 0.6665596403917161, "grad_norm": 1.2432470321655273, "learning_rate": 0.00019304175679820247, "loss": 1.2107, "step": 2076 }, { "epoch": 0.6668807192165677, "grad_norm": 1.0395501852035522, "learning_rate": 0.0001930289578237965, "loss": 0.8751, "step": 2077 }, { "epoch": 0.6672017980414192, "grad_norm": 1.7850005626678467, "learning_rate": 0.00019301614751409416, "loss": 0.9662, "step": 2078 }, { "epoch": 0.6675228768662707, "grad_norm": 1.5475821495056152, "learning_rate": 0.0001930033258706564, "loss": 1.056, "step": 2079 }, { "epoch": 0.6678439556911222, "grad_norm": 0.9385414123535156, "learning_rate": 0.00019299049289504553, "loss": 0.8348, "step": 2080 }, { "epoch": 0.6681650345159736, "grad_norm": 1.2614946365356445, "learning_rate": 0.00019297764858882514, "loss": 0.8467, "step": 2081 }, { "epoch": 0.6684861133408252, "grad_norm": 1.07332444190979, "learning_rate": 0.00019296479295356035, "loss": 0.9718, "step": 2082 }, { "epoch": 0.6688071921656766, "grad_norm": 2.2811384201049805, "learning_rate": 0.00019295192599081746, "loss": 1.1172, "step": 2083 }, { "epoch": 0.6691282709905282, "grad_norm": 0.9308247566223145, "learning_rate": 0.00019293904770216437, "loss": 0.878, "step": 2084 }, { "epoch": 0.6694493498153796, "grad_norm": 1.3048980236053467, "learning_rate": 0.00019292615808917026, "loss": 0.9988, "step": 2085 }, { "epoch": 0.6697704286402312, "grad_norm": 2.0807573795318604, "learning_rate": 0.00019291325715340563, "loss": 0.9486, "step": 2086 }, { "epoch": 0.6700915074650827, "grad_norm": 1.4993385076522827, "learning_rate": 0.00019290034489644246, "loss": 0.9338, "step": 2087 }, { "epoch": 0.6704125862899342, "grad_norm": 1.1639915704727173, "learning_rate": 0.00019288742131985407, "loss": 0.9569, "step": 2088 }, { "epoch": 0.6707336651147857, "grad_norm": 1.270796775817871, "learning_rate": 0.00019287448642521513, "loss": 0.904, "step": 2089 }, { "epoch": 0.6710547439396372, "grad_norm": 1.7701547145843506, "learning_rate": 0.00019286154021410173, "loss": 1.0951, "step": 2090 }, { "epoch": 0.6713758227644887, "grad_norm": 1.2145819664001465, "learning_rate": 0.00019284858268809137, "loss": 0.7358, "step": 2091 }, { "epoch": 0.6716969015893401, "grad_norm": 1.4250010251998901, "learning_rate": 0.00019283561384876284, "loss": 1.0274, "step": 2092 }, { "epoch": 0.6720179804141917, "grad_norm": 1.232686996459961, "learning_rate": 0.00019282263369769633, "loss": 1.0309, "step": 2093 }, { "epoch": 0.6723390592390431, "grad_norm": 1.1875118017196655, "learning_rate": 0.00019280964223647348, "loss": 0.789, "step": 2094 }, { "epoch": 0.6726601380638947, "grad_norm": 1.035929560661316, "learning_rate": 0.00019279663946667727, "loss": 0.8858, "step": 2095 }, { "epoch": 0.6729812168887462, "grad_norm": 1.323173999786377, "learning_rate": 0.000192783625389892, "loss": 1.0185, "step": 2096 }, { "epoch": 0.6733022957135977, "grad_norm": 1.4377621412277222, "learning_rate": 0.00019277060000770342, "loss": 0.6274, "step": 2097 }, { "epoch": 0.6736233745384492, "grad_norm": 0.6943148374557495, "learning_rate": 0.00019275756332169867, "loss": 0.5938, "step": 2098 }, { "epoch": 0.6739444533633007, "grad_norm": 1.0975568294525146, "learning_rate": 0.00019274451533346615, "loss": 0.5224, "step": 2099 }, { "epoch": 0.6742655321881522, "grad_norm": 0.7381674647331238, "learning_rate": 0.00019273145604459577, "loss": 0.5018, "step": 2100 }, { "epoch": 0.6745866110130037, "grad_norm": 1.5068893432617188, "learning_rate": 0.00019271838545667876, "loss": 1.3901, "step": 2101 }, { "epoch": 0.6749076898378552, "grad_norm": 4.2163472175598145, "learning_rate": 0.0001927053035713077, "loss": 0.9407, "step": 2102 }, { "epoch": 0.6752287686627066, "grad_norm": 1.432925820350647, "learning_rate": 0.00019269221039007665, "loss": 0.9272, "step": 2103 }, { "epoch": 0.6755498474875582, "grad_norm": 1.374459981918335, "learning_rate": 0.0001926791059145809, "loss": 0.7463, "step": 2104 }, { "epoch": 0.6758709263124097, "grad_norm": 1.4353713989257812, "learning_rate": 0.0001926659901464172, "loss": 0.8202, "step": 2105 }, { "epoch": 0.6761920051372612, "grad_norm": 1.1444305181503296, "learning_rate": 0.00019265286308718372, "loss": 0.7957, "step": 2106 }, { "epoch": 0.6765130839621127, "grad_norm": 1.4592642784118652, "learning_rate": 0.00019263972473847993, "loss": 0.6475, "step": 2107 }, { "epoch": 0.6768341627869642, "grad_norm": 1.0431547164916992, "learning_rate": 0.00019262657510190666, "loss": 0.8323, "step": 2108 }, { "epoch": 0.6771552416118157, "grad_norm": 1.0479865074157715, "learning_rate": 0.00019261341417906621, "loss": 0.9267, "step": 2109 }, { "epoch": 0.6774763204366672, "grad_norm": 1.3619253635406494, "learning_rate": 0.00019260024197156214, "loss": 1.0108, "step": 2110 }, { "epoch": 0.6777973992615187, "grad_norm": 1.088303565979004, "learning_rate": 0.0001925870584809995, "loss": 0.9458, "step": 2111 }, { "epoch": 0.6781184780863702, "grad_norm": 1.064591646194458, "learning_rate": 0.00019257386370898457, "loss": 0.8121, "step": 2112 }, { "epoch": 0.6784395569112217, "grad_norm": 0.9795963764190674, "learning_rate": 0.00019256065765712522, "loss": 1.006, "step": 2113 }, { "epoch": 0.6787606357360733, "grad_norm": 1.218254804611206, "learning_rate": 0.00019254744032703048, "loss": 0.866, "step": 2114 }, { "epoch": 0.6790817145609247, "grad_norm": 0.9186314940452576, "learning_rate": 0.00019253421172031086, "loss": 0.922, "step": 2115 }, { "epoch": 0.6794027933857762, "grad_norm": 1.187469244003296, "learning_rate": 0.00019252097183857823, "loss": 0.7671, "step": 2116 }, { "epoch": 0.6797238722106277, "grad_norm": 1.1949058771133423, "learning_rate": 0.0001925077206834458, "loss": 0.9525, "step": 2117 }, { "epoch": 0.6800449510354792, "grad_norm": 1.0139573812484741, "learning_rate": 0.00019249445825652824, "loss": 0.8317, "step": 2118 }, { "epoch": 0.6803660298603307, "grad_norm": 1.2550780773162842, "learning_rate": 0.0001924811845594415, "loss": 1.0096, "step": 2119 }, { "epoch": 0.6806871086851822, "grad_norm": 1.0419883728027344, "learning_rate": 0.00019246789959380295, "loss": 0.9382, "step": 2120 }, { "epoch": 0.6810081875100337, "grad_norm": 0.8842172622680664, "learning_rate": 0.00019245460336123134, "loss": 1.0352, "step": 2121 }, { "epoch": 0.6813292663348852, "grad_norm": 1.3533904552459717, "learning_rate": 0.00019244129586334672, "loss": 1.1023, "step": 2122 }, { "epoch": 0.6816503451597368, "grad_norm": 1.0904252529144287, "learning_rate": 0.0001924279771017706, "loss": 1.0112, "step": 2123 }, { "epoch": 0.6819714239845882, "grad_norm": 1.2503504753112793, "learning_rate": 0.00019241464707812585, "loss": 0.8125, "step": 2124 }, { "epoch": 0.6822925028094398, "grad_norm": 1.4106786251068115, "learning_rate": 0.0001924013057940367, "loss": 0.9185, "step": 2125 }, { "epoch": 0.6826135816342912, "grad_norm": 1.1263327598571777, "learning_rate": 0.0001923879532511287, "loss": 0.9063, "step": 2126 }, { "epoch": 0.6829346604591428, "grad_norm": 2.7052721977233887, "learning_rate": 0.00019237458945102882, "loss": 0.9509, "step": 2127 }, { "epoch": 0.6832557392839942, "grad_norm": 1.1038817167282104, "learning_rate": 0.00019236121439536542, "loss": 0.9669, "step": 2128 }, { "epoch": 0.6835768181088457, "grad_norm": 1.2589917182922363, "learning_rate": 0.00019234782808576824, "loss": 0.9313, "step": 2129 }, { "epoch": 0.6838978969336972, "grad_norm": 1.6602294445037842, "learning_rate": 0.0001923344305238683, "loss": 1.0642, "step": 2130 }, { "epoch": 0.6842189757585487, "grad_norm": 0.957186758518219, "learning_rate": 0.00019232102171129811, "loss": 0.7523, "step": 2131 }, { "epoch": 0.6845400545834002, "grad_norm": 0.8682358264923096, "learning_rate": 0.00019230760164969143, "loss": 0.8184, "step": 2132 }, { "epoch": 0.6848611334082517, "grad_norm": 1.2894452810287476, "learning_rate": 0.0001922941703406835, "loss": 0.7959, "step": 2133 }, { "epoch": 0.6851822122331033, "grad_norm": 0.9780199527740479, "learning_rate": 0.00019228072778591088, "loss": 0.865, "step": 2134 }, { "epoch": 0.6855032910579547, "grad_norm": 1.7117539644241333, "learning_rate": 0.0001922672739870115, "loss": 0.9489, "step": 2135 }, { "epoch": 0.6858243698828063, "grad_norm": 1.3718947172164917, "learning_rate": 0.00019225380894562463, "loss": 1.0358, "step": 2136 }, { "epoch": 0.6861454487076577, "grad_norm": 1.2108441591262817, "learning_rate": 0.00019224033266339102, "loss": 1.023, "step": 2137 }, { "epoch": 0.6864665275325093, "grad_norm": 1.9689425230026245, "learning_rate": 0.00019222684514195264, "loss": 0.8403, "step": 2138 }, { "epoch": 0.6867876063573607, "grad_norm": 1.626488208770752, "learning_rate": 0.00019221334638295294, "loss": 0.889, "step": 2139 }, { "epoch": 0.6871086851822122, "grad_norm": 1.0145803689956665, "learning_rate": 0.0001921998363880367, "loss": 0.9659, "step": 2140 }, { "epoch": 0.6874297640070637, "grad_norm": 2.1822826862335205, "learning_rate": 0.00019218631515885006, "loss": 0.8542, "step": 2141 }, { "epoch": 0.6877508428319152, "grad_norm": 0.9943452477455139, "learning_rate": 0.00019217278269704056, "loss": 0.8499, "step": 2142 }, { "epoch": 0.6880719216567668, "grad_norm": 0.675469696521759, "learning_rate": 0.00019215923900425707, "loss": 0.6101, "step": 2143 }, { "epoch": 0.6883930004816182, "grad_norm": 1.052608609199524, "learning_rate": 0.00019214568408214985, "loss": 0.8342, "step": 2144 }, { "epoch": 0.6887140793064698, "grad_norm": 0.9476191997528076, "learning_rate": 0.00019213211793237057, "loss": 0.8063, "step": 2145 }, { "epoch": 0.6890351581313212, "grad_norm": 1.0476568937301636, "learning_rate": 0.00019211854055657215, "loss": 0.6778, "step": 2146 }, { "epoch": 0.6893562369561728, "grad_norm": 1.4577487707138062, "learning_rate": 0.00019210495195640895, "loss": 0.7882, "step": 2147 }, { "epoch": 0.6896773157810242, "grad_norm": 1.045127034187317, "learning_rate": 0.0001920913521335368, "loss": 0.6774, "step": 2148 }, { "epoch": 0.6899983946058758, "grad_norm": 0.8924066424369812, "learning_rate": 0.00019207774108961272, "loss": 0.764, "step": 2149 }, { "epoch": 0.6903194734307272, "grad_norm": 1.1722183227539062, "learning_rate": 0.00019206411882629517, "loss": 0.7897, "step": 2150 }, { "epoch": 0.6906405522555787, "grad_norm": 1.1900309324264526, "learning_rate": 0.00019205048534524406, "loss": 1.2467, "step": 2151 }, { "epoch": 0.6909616310804303, "grad_norm": 1.3887277841567993, "learning_rate": 0.00019203684064812045, "loss": 1.2293, "step": 2152 }, { "epoch": 0.6912827099052817, "grad_norm": 1.1636091470718384, "learning_rate": 0.00019202318473658705, "loss": 0.8659, "step": 2153 }, { "epoch": 0.6916037887301333, "grad_norm": 1.2683323621749878, "learning_rate": 0.0001920095176123077, "loss": 0.7881, "step": 2154 }, { "epoch": 0.6919248675549847, "grad_norm": 1.227537989616394, "learning_rate": 0.00019199583927694772, "loss": 0.841, "step": 2155 }, { "epoch": 0.6922459463798363, "grad_norm": 1.9995452165603638, "learning_rate": 0.00019198214973217378, "loss": 0.8185, "step": 2156 }, { "epoch": 0.6925670252046877, "grad_norm": 1.2101178169250488, "learning_rate": 0.00019196844897965393, "loss": 0.5357, "step": 2157 }, { "epoch": 0.6928881040295393, "grad_norm": 0.990922749042511, "learning_rate": 0.00019195473702105748, "loss": 0.8727, "step": 2158 }, { "epoch": 0.6932091828543907, "grad_norm": 1.5191935300827026, "learning_rate": 0.0001919410138580553, "loss": 1.1395, "step": 2159 }, { "epoch": 0.6935302616792423, "grad_norm": 1.074205994606018, "learning_rate": 0.00019192727949231945, "loss": 1.1821, "step": 2160 }, { "epoch": 0.6938513405040938, "grad_norm": 1.0748862028121948, "learning_rate": 0.00019191353392552344, "loss": 0.9226, "step": 2161 }, { "epoch": 0.6941724193289452, "grad_norm": 1.0452769994735718, "learning_rate": 0.00019189977715934213, "loss": 0.8429, "step": 2162 }, { "epoch": 0.6944934981537968, "grad_norm": 1.0717366933822632, "learning_rate": 0.00019188600919545174, "loss": 0.8875, "step": 2163 }, { "epoch": 0.6948145769786482, "grad_norm": 0.7801916599273682, "learning_rate": 0.00019187223003552985, "loss": 0.8721, "step": 2164 }, { "epoch": 0.6951356558034998, "grad_norm": 1.0750560760498047, "learning_rate": 0.0001918584396812554, "loss": 1.103, "step": 2165 }, { "epoch": 0.6954567346283512, "grad_norm": 1.0144506692886353, "learning_rate": 0.00019184463813430873, "loss": 1.0441, "step": 2166 }, { "epoch": 0.6957778134532028, "grad_norm": 1.0422799587249756, "learning_rate": 0.00019183082539637146, "loss": 0.8684, "step": 2167 }, { "epoch": 0.6960988922780542, "grad_norm": 1.1190441846847534, "learning_rate": 0.0001918170014691267, "loss": 0.8669, "step": 2168 }, { "epoch": 0.6964199711029058, "grad_norm": 1.2634443044662476, "learning_rate": 0.0001918031663542588, "loss": 0.8319, "step": 2169 }, { "epoch": 0.6967410499277573, "grad_norm": 1.256981611251831, "learning_rate": 0.0001917893200534536, "loss": 0.9238, "step": 2170 }, { "epoch": 0.6970621287526088, "grad_norm": 1.5736911296844482, "learning_rate": 0.00019177546256839812, "loss": 1.1809, "step": 2171 }, { "epoch": 0.6973832075774603, "grad_norm": 0.890072762966156, "learning_rate": 0.00019176159390078094, "loss": 0.8873, "step": 2172 }, { "epoch": 0.6977042864023117, "grad_norm": 0.9746190309524536, "learning_rate": 0.00019174771405229186, "loss": 0.8685, "step": 2173 }, { "epoch": 0.6980253652271633, "grad_norm": 1.4396299123764038, "learning_rate": 0.00019173382302462214, "loss": 0.9388, "step": 2174 }, { "epoch": 0.6983464440520147, "grad_norm": 1.3588879108428955, "learning_rate": 0.00019171992081946435, "loss": 1.0749, "step": 2175 }, { "epoch": 0.6986675228768663, "grad_norm": 2.2067346572875977, "learning_rate": 0.0001917060074385124, "loss": 1.0014, "step": 2176 }, { "epoch": 0.6989886017017177, "grad_norm": 1.2330677509307861, "learning_rate": 0.00019169208288346166, "loss": 1.1379, "step": 2177 }, { "epoch": 0.6993096805265693, "grad_norm": 1.0044240951538086, "learning_rate": 0.0001916781471560087, "loss": 0.9738, "step": 2178 }, { "epoch": 0.6996307593514208, "grad_norm": 1.3933671712875366, "learning_rate": 0.00019166420025785164, "loss": 1.0522, "step": 2179 }, { "epoch": 0.6999518381762723, "grad_norm": 0.9893519282341003, "learning_rate": 0.0001916502421906898, "loss": 0.8907, "step": 2180 }, { "epoch": 0.7002729170011238, "grad_norm": 1.0318580865859985, "learning_rate": 0.00019163627295622397, "loss": 0.9829, "step": 2181 }, { "epoch": 0.7005939958259753, "grad_norm": 1.4720815420150757, "learning_rate": 0.00019162229255615624, "loss": 0.9374, "step": 2182 }, { "epoch": 0.7009150746508268, "grad_norm": 1.006734013557434, "learning_rate": 0.00019160830099219006, "loss": 1.04, "step": 2183 }, { "epoch": 0.7012361534756782, "grad_norm": 2.0803046226501465, "learning_rate": 0.00019159429826603032, "loss": 1.1813, "step": 2184 }, { "epoch": 0.7015572323005298, "grad_norm": 1.770952820777893, "learning_rate": 0.00019158028437938317, "loss": 1.0018, "step": 2185 }, { "epoch": 0.7018783111253812, "grad_norm": 1.0838485956192017, "learning_rate": 0.00019156625933395614, "loss": 1.0851, "step": 2186 }, { "epoch": 0.7021993899502328, "grad_norm": 1.384687066078186, "learning_rate": 0.00019155222313145816, "loss": 0.9151, "step": 2187 }, { "epoch": 0.7025204687750842, "grad_norm": 0.8908778429031372, "learning_rate": 0.0001915381757735995, "loss": 0.9169, "step": 2188 }, { "epoch": 0.7028415475999358, "grad_norm": 0.814580500125885, "learning_rate": 0.00019152411726209176, "loss": 0.5335, "step": 2189 }, { "epoch": 0.7031626264247873, "grad_norm": 1.491808533668518, "learning_rate": 0.000191510047598648, "loss": 0.9649, "step": 2190 }, { "epoch": 0.7034837052496388, "grad_norm": 1.308821678161621, "learning_rate": 0.0001914959667849825, "loss": 0.9609, "step": 2191 }, { "epoch": 0.7038047840744903, "grad_norm": 1.6235363483428955, "learning_rate": 0.00019148187482281097, "loss": 0.8551, "step": 2192 }, { "epoch": 0.7041258628993418, "grad_norm": 1.9721722602844238, "learning_rate": 0.0001914677717138505, "loss": 0.8588, "step": 2193 }, { "epoch": 0.7044469417241933, "grad_norm": 1.485297679901123, "learning_rate": 0.00019145365745981948, "loss": 0.7772, "step": 2194 }, { "epoch": 0.7047680205490447, "grad_norm": 1.1313917636871338, "learning_rate": 0.00019143953206243776, "loss": 0.8873, "step": 2195 }, { "epoch": 0.7050890993738963, "grad_norm": 1.1025351285934448, "learning_rate": 0.00019142539552342638, "loss": 0.8683, "step": 2196 }, { "epoch": 0.7054101781987477, "grad_norm": 1.7644602060317993, "learning_rate": 0.0001914112478445079, "loss": 0.6767, "step": 2197 }, { "epoch": 0.7057312570235993, "grad_norm": 0.9576209783554077, "learning_rate": 0.00019139708902740613, "loss": 0.6193, "step": 2198 }, { "epoch": 0.7060523358484508, "grad_norm": 1.4080398082733154, "learning_rate": 0.0001913829190738463, "loss": 0.7909, "step": 2199 }, { "epoch": 0.7063734146733023, "grad_norm": 1.04661226272583, "learning_rate": 0.000191368737985555, "loss": 0.6024, "step": 2200 }, { "epoch": 0.7066944934981538, "grad_norm": 1.448823094367981, "learning_rate": 0.0001913545457642601, "loss": 1.4655, "step": 2201 }, { "epoch": 0.7070155723230053, "grad_norm": 1.4213786125183105, "learning_rate": 0.0001913403424116909, "loss": 1.2999, "step": 2202 }, { "epoch": 0.7073366511478568, "grad_norm": 1.3486974239349365, "learning_rate": 0.00019132612792957808, "loss": 0.946, "step": 2203 }, { "epoch": 0.7076577299727083, "grad_norm": 1.5810649394989014, "learning_rate": 0.00019131190231965356, "loss": 0.8893, "step": 2204 }, { "epoch": 0.7079788087975598, "grad_norm": 1.3420324325561523, "learning_rate": 0.00019129766558365074, "loss": 0.7434, "step": 2205 }, { "epoch": 0.7082998876224113, "grad_norm": 1.3447771072387695, "learning_rate": 0.0001912834177233043, "loss": 0.8466, "step": 2206 }, { "epoch": 0.7086209664472628, "grad_norm": 1.1809029579162598, "learning_rate": 0.0001912691587403503, "loss": 0.6462, "step": 2207 }, { "epoch": 0.7089420452721144, "grad_norm": 1.2314188480377197, "learning_rate": 0.00019125488863652615, "loss": 0.5693, "step": 2208 }, { "epoch": 0.7092631240969658, "grad_norm": 1.04121994972229, "learning_rate": 0.00019124060741357063, "loss": 0.8847, "step": 2209 }, { "epoch": 0.7095842029218173, "grad_norm": 1.0673465728759766, "learning_rate": 0.00019122631507322387, "loss": 1.0044, "step": 2210 }, { "epoch": 0.7099052817466688, "grad_norm": 0.6472902894020081, "learning_rate": 0.0001912120116172273, "loss": 0.7363, "step": 2211 }, { "epoch": 0.7102263605715203, "grad_norm": 0.9594241380691528, "learning_rate": 0.00019119769704732382, "loss": 0.7698, "step": 2212 }, { "epoch": 0.7105474393963718, "grad_norm": 1.1029056310653687, "learning_rate": 0.0001911833713652576, "loss": 1.025, "step": 2213 }, { "epoch": 0.7108685182212233, "grad_norm": 1.1992616653442383, "learning_rate": 0.00019116903457277413, "loss": 1.0412, "step": 2214 }, { "epoch": 0.7111895970460748, "grad_norm": 1.4310470819473267, "learning_rate": 0.00019115468667162038, "loss": 0.8216, "step": 2215 }, { "epoch": 0.7115106758709263, "grad_norm": 1.5409244298934937, "learning_rate": 0.00019114032766354453, "loss": 0.5844, "step": 2216 }, { "epoch": 0.7118317546957779, "grad_norm": 1.4199776649475098, "learning_rate": 0.00019112595755029624, "loss": 1.0082, "step": 2217 }, { "epoch": 0.7121528335206293, "grad_norm": 1.1927727460861206, "learning_rate": 0.0001911115763336264, "loss": 0.8184, "step": 2218 }, { "epoch": 0.7124739123454809, "grad_norm": 0.9337337017059326, "learning_rate": 0.0001910971840152874, "loss": 0.8842, "step": 2219 }, { "epoch": 0.7127949911703323, "grad_norm": 0.9035426378250122, "learning_rate": 0.0001910827805970328, "loss": 0.8288, "step": 2220 }, { "epoch": 0.7131160699951838, "grad_norm": 1.3903597593307495, "learning_rate": 0.00019106836608061772, "loss": 0.8678, "step": 2221 }, { "epoch": 0.7134371488200353, "grad_norm": 1.2221095561981201, "learning_rate": 0.00019105394046779845, "loss": 0.7565, "step": 2222 }, { "epoch": 0.7137582276448868, "grad_norm": 1.1277964115142822, "learning_rate": 0.00019103950376033276, "loss": 0.9622, "step": 2223 }, { "epoch": 0.7140793064697383, "grad_norm": 1.453009843826294, "learning_rate": 0.00019102505595997965, "loss": 0.9864, "step": 2224 }, { "epoch": 0.7144003852945898, "grad_norm": 1.2029697895050049, "learning_rate": 0.00019101059706849957, "loss": 1.1135, "step": 2225 }, { "epoch": 0.7147214641194414, "grad_norm": 1.6069939136505127, "learning_rate": 0.00019099612708765434, "loss": 1.078, "step": 2226 }, { "epoch": 0.7150425429442928, "grad_norm": 1.1098419427871704, "learning_rate": 0.000190981646019207, "loss": 0.8924, "step": 2227 }, { "epoch": 0.7153636217691444, "grad_norm": 1.2886768579483032, "learning_rate": 0.0001909671538649221, "loss": 1.105, "step": 2228 }, { "epoch": 0.7156847005939958, "grad_norm": 0.9400178790092468, "learning_rate": 0.00019095265062656544, "loss": 1.1208, "step": 2229 }, { "epoch": 0.7160057794188474, "grad_norm": 1.2319140434265137, "learning_rate": 0.00019093813630590418, "loss": 0.8575, "step": 2230 }, { "epoch": 0.7163268582436988, "grad_norm": 1.095289945602417, "learning_rate": 0.00019092361090470688, "loss": 0.842, "step": 2231 }, { "epoch": 0.7166479370685503, "grad_norm": 1.421386957168579, "learning_rate": 0.00019090907442474334, "loss": 1.0803, "step": 2232 }, { "epoch": 0.7169690158934018, "grad_norm": 1.5871707201004028, "learning_rate": 0.00019089452686778488, "loss": 1.0677, "step": 2233 }, { "epoch": 0.7172900947182533, "grad_norm": 1.591778039932251, "learning_rate": 0.00019087996823560402, "loss": 1.0679, "step": 2234 }, { "epoch": 0.7176111735431049, "grad_norm": 2.42740797996521, "learning_rate": 0.0001908653985299747, "loss": 1.0548, "step": 2235 }, { "epoch": 0.7179322523679563, "grad_norm": 1.0320085287094116, "learning_rate": 0.0001908508177526722, "loss": 0.7861, "step": 2236 }, { "epoch": 0.7182533311928079, "grad_norm": 1.18021821975708, "learning_rate": 0.00019083622590547312, "loss": 0.8066, "step": 2237 }, { "epoch": 0.7185744100176593, "grad_norm": 1.0343506336212158, "learning_rate": 0.00019082162299015546, "loss": 0.8142, "step": 2238 }, { "epoch": 0.7188954888425109, "grad_norm": 1.4337462186813354, "learning_rate": 0.00019080700900849851, "loss": 0.9664, "step": 2239 }, { "epoch": 0.7192165676673623, "grad_norm": 1.5604829788208008, "learning_rate": 0.000190792383962283, "loss": 1.024, "step": 2240 }, { "epoch": 0.7195376464922139, "grad_norm": 1.152066707611084, "learning_rate": 0.00019077774785329087, "loss": 1.0221, "step": 2241 }, { "epoch": 0.7198587253170653, "grad_norm": 1.9510457515716553, "learning_rate": 0.00019076310068330554, "loss": 1.0129, "step": 2242 }, { "epoch": 0.7201798041419168, "grad_norm": 1.1222991943359375, "learning_rate": 0.0001907484424541117, "loss": 0.8, "step": 2243 }, { "epoch": 0.7205008829667684, "grad_norm": 1.8768889904022217, "learning_rate": 0.00019073377316749542, "loss": 0.8479, "step": 2244 }, { "epoch": 0.7208219617916198, "grad_norm": 1.251123309135437, "learning_rate": 0.00019071909282524413, "loss": 1.0588, "step": 2245 }, { "epoch": 0.7211430406164714, "grad_norm": 1.2855134010314941, "learning_rate": 0.0001907044014291465, "loss": 0.9369, "step": 2246 }, { "epoch": 0.7214641194413228, "grad_norm": 0.9821279644966125, "learning_rate": 0.0001906896989809927, "loss": 0.6617, "step": 2247 }, { "epoch": 0.7217851982661744, "grad_norm": 1.674775242805481, "learning_rate": 0.00019067498548257423, "loss": 0.7747, "step": 2248 }, { "epoch": 0.7221062770910258, "grad_norm": 1.1248822212219238, "learning_rate": 0.00019066026093568378, "loss": 0.739, "step": 2249 }, { "epoch": 0.7224273559158774, "grad_norm": 0.8824935555458069, "learning_rate": 0.00019064552534211554, "loss": 0.5349, "step": 2250 }, { "epoch": 0.7227484347407288, "grad_norm": 1.197925329208374, "learning_rate": 0.000190630778703665, "loss": 1.3444, "step": 2251 }, { "epoch": 0.7230695135655804, "grad_norm": 0.9842731356620789, "learning_rate": 0.00019061602102212898, "loss": 1.3156, "step": 2252 }, { "epoch": 0.7233905923904318, "grad_norm": 1.0988041162490845, "learning_rate": 0.0001906012522993057, "loss": 1.1763, "step": 2253 }, { "epoch": 0.7237116712152833, "grad_norm": 1.067678451538086, "learning_rate": 0.0001905864725369946, "loss": 0.8682, "step": 2254 }, { "epoch": 0.7240327500401349, "grad_norm": 1.3331475257873535, "learning_rate": 0.00019057168173699664, "loss": 0.7538, "step": 2255 }, { "epoch": 0.7243538288649863, "grad_norm": 1.1442729234695435, "learning_rate": 0.00019055687990111398, "loss": 0.7896, "step": 2256 }, { "epoch": 0.7246749076898379, "grad_norm": 1.0878592729568481, "learning_rate": 0.0001905420670311502, "loss": 0.5776, "step": 2257 }, { "epoch": 0.7249959865146893, "grad_norm": 1.124906301498413, "learning_rate": 0.00019052724312891014, "loss": 0.8746, "step": 2258 }, { "epoch": 0.7253170653395409, "grad_norm": 1.1294090747833252, "learning_rate": 0.00019051240819620014, "loss": 1.0163, "step": 2259 }, { "epoch": 0.7256381441643923, "grad_norm": 1.1215919256210327, "learning_rate": 0.0001904975622348278, "loss": 1.042, "step": 2260 }, { "epoch": 0.7259592229892439, "grad_norm": 0.8851989507675171, "learning_rate": 0.00019048270524660196, "loss": 0.6864, "step": 2261 }, { "epoch": 0.7262803018140953, "grad_norm": 0.9922202229499817, "learning_rate": 0.00019046783723333297, "loss": 0.7434, "step": 2262 }, { "epoch": 0.7266013806389469, "grad_norm": 1.0879603624343872, "learning_rate": 0.00019045295819683242, "loss": 0.7117, "step": 2263 }, { "epoch": 0.7269224594637984, "grad_norm": 1.3650341033935547, "learning_rate": 0.0001904380681389133, "loss": 0.9001, "step": 2264 }, { "epoch": 0.7272435382886498, "grad_norm": 1.5964879989624023, "learning_rate": 0.00019042316706138987, "loss": 1.0102, "step": 2265 }, { "epoch": 0.7275646171135014, "grad_norm": 0.8338374495506287, "learning_rate": 0.00019040825496607786, "loss": 0.8816, "step": 2266 }, { "epoch": 0.7278856959383528, "grad_norm": 1.0702760219573975, "learning_rate": 0.00019039333185479418, "loss": 0.8459, "step": 2267 }, { "epoch": 0.7282067747632044, "grad_norm": 0.976639449596405, "learning_rate": 0.0001903783977293572, "loss": 0.8101, "step": 2268 }, { "epoch": 0.7285278535880558, "grad_norm": 1.2155683040618896, "learning_rate": 0.00019036345259158667, "loss": 1.0607, "step": 2269 }, { "epoch": 0.7288489324129074, "grad_norm": 1.2647230625152588, "learning_rate": 0.0001903484964433035, "loss": 1.1008, "step": 2270 }, { "epoch": 0.7291700112377588, "grad_norm": 0.9674025177955627, "learning_rate": 0.0001903335292863301, "loss": 0.7245, "step": 2271 }, { "epoch": 0.7294910900626104, "grad_norm": 1.2419253587722778, "learning_rate": 0.00019031855112249015, "loss": 0.8682, "step": 2272 }, { "epoch": 0.7298121688874619, "grad_norm": 0.9692038297653198, "learning_rate": 0.00019030356195360874, "loss": 0.9075, "step": 2273 }, { "epoch": 0.7301332477123134, "grad_norm": 1.1425732374191284, "learning_rate": 0.0001902885617815122, "loss": 0.7757, "step": 2274 }, { "epoch": 0.7304543265371649, "grad_norm": 0.9863192439079285, "learning_rate": 0.0001902735506080283, "loss": 0.8154, "step": 2275 }, { "epoch": 0.7307754053620164, "grad_norm": 0.9807154536247253, "learning_rate": 0.00019025852843498607, "loss": 0.9582, "step": 2276 }, { "epoch": 0.7310964841868679, "grad_norm": 1.2582796812057495, "learning_rate": 0.00019024349526421594, "loss": 1.009, "step": 2277 }, { "epoch": 0.7314175630117193, "grad_norm": 1.0475612878799438, "learning_rate": 0.00019022845109754966, "loss": 0.997, "step": 2278 }, { "epoch": 0.7317386418365709, "grad_norm": 1.6267993450164795, "learning_rate": 0.00019021339593682028, "loss": 0.9519, "step": 2279 }, { "epoch": 0.7320597206614223, "grad_norm": 1.2485898733139038, "learning_rate": 0.00019019832978386228, "loss": 0.8931, "step": 2280 }, { "epoch": 0.7323807994862739, "grad_norm": 0.9349825382232666, "learning_rate": 0.0001901832526405114, "loss": 0.8948, "step": 2281 }, { "epoch": 0.7327018783111254, "grad_norm": 1.5776710510253906, "learning_rate": 0.00019016816450860474, "loss": 0.9504, "step": 2282 }, { "epoch": 0.7330229571359769, "grad_norm": 0.9056895971298218, "learning_rate": 0.0001901530653899807, "loss": 0.8712, "step": 2283 }, { "epoch": 0.7333440359608284, "grad_norm": 1.3263038396835327, "learning_rate": 0.00019013795528647912, "loss": 0.8974, "step": 2284 }, { "epoch": 0.7336651147856799, "grad_norm": 1.0501210689544678, "learning_rate": 0.00019012283419994115, "loss": 0.888, "step": 2285 }, { "epoch": 0.7339861936105314, "grad_norm": 1.9327582120895386, "learning_rate": 0.00019010770213220916, "loss": 1.0191, "step": 2286 }, { "epoch": 0.7343072724353829, "grad_norm": 0.9051433205604553, "learning_rate": 0.000190092559085127, "loss": 0.736, "step": 2287 }, { "epoch": 0.7346283512602344, "grad_norm": 2.308243989944458, "learning_rate": 0.00019007740506053983, "loss": 0.9449, "step": 2288 }, { "epoch": 0.7349494300850858, "grad_norm": 0.9226866960525513, "learning_rate": 0.00019006224006029406, "loss": 0.9529, "step": 2289 }, { "epoch": 0.7352705089099374, "grad_norm": 1.1376844644546509, "learning_rate": 0.0001900470640862375, "loss": 0.8272, "step": 2290 }, { "epoch": 0.7355915877347889, "grad_norm": 1.475791335105896, "learning_rate": 0.00019003187714021938, "loss": 1.1438, "step": 2291 }, { "epoch": 0.7359126665596404, "grad_norm": 0.9544022679328918, "learning_rate": 0.00019001667922409008, "loss": 0.8274, "step": 2292 }, { "epoch": 0.7362337453844919, "grad_norm": 1.031445860862732, "learning_rate": 0.00019000147033970148, "loss": 0.7776, "step": 2293 }, { "epoch": 0.7365548242093434, "grad_norm": 0.8652722835540771, "learning_rate": 0.00018998625048890672, "loss": 0.7621, "step": 2294 }, { "epoch": 0.7368759030341949, "grad_norm": 1.1719671487808228, "learning_rate": 0.0001899710196735603, "loss": 0.9207, "step": 2295 }, { "epoch": 0.7371969818590464, "grad_norm": 1.2041524648666382, "learning_rate": 0.00018995577789551803, "loss": 0.8044, "step": 2296 }, { "epoch": 0.7375180606838979, "grad_norm": 1.1736308336257935, "learning_rate": 0.0001899405251566371, "loss": 0.8288, "step": 2297 }, { "epoch": 0.7378391395087494, "grad_norm": 0.8900883793830872, "learning_rate": 0.000189925261458776, "loss": 0.7487, "step": 2298 }, { "epoch": 0.7381602183336009, "grad_norm": 0.7392171621322632, "learning_rate": 0.00018990998680379456, "loss": 0.6899, "step": 2299 }, { "epoch": 0.7384812971584525, "grad_norm": 1.4253010749816895, "learning_rate": 0.00018989470119355398, "loss": 0.6706, "step": 2300 }, { "epoch": 0.7388023759833039, "grad_norm": 0.8190407752990723, "learning_rate": 0.0001898794046299167, "loss": 1.2835, "step": 2301 }, { "epoch": 0.7391234548081554, "grad_norm": 1.0139261484146118, "learning_rate": 0.00018986409711474665, "loss": 0.9747, "step": 2302 }, { "epoch": 0.7394445336330069, "grad_norm": 1.0037243366241455, "learning_rate": 0.00018984877864990888, "loss": 0.7472, "step": 2303 }, { "epoch": 0.7397656124578584, "grad_norm": 1.3445838689804077, "learning_rate": 0.00018983344923727003, "loss": 0.8011, "step": 2304 }, { "epoch": 0.7400866912827099, "grad_norm": 1.1861538887023926, "learning_rate": 0.00018981810887869785, "loss": 0.5261, "step": 2305 }, { "epoch": 0.7404077701075614, "grad_norm": 1.1027861833572388, "learning_rate": 0.00018980275757606157, "loss": 0.6695, "step": 2306 }, { "epoch": 0.7407288489324129, "grad_norm": 1.280778169631958, "learning_rate": 0.0001897873953312317, "loss": 0.7354, "step": 2307 }, { "epoch": 0.7410499277572644, "grad_norm": 1.1856905221939087, "learning_rate": 0.00018977202214608, "loss": 1.0707, "step": 2308 }, { "epoch": 0.7413710065821159, "grad_norm": 0.8002036809921265, "learning_rate": 0.00018975663802247976, "loss": 0.763, "step": 2309 }, { "epoch": 0.7416920854069674, "grad_norm": 1.3091520071029663, "learning_rate": 0.0001897412429623054, "loss": 1.1745, "step": 2310 }, { "epoch": 0.742013164231819, "grad_norm": 1.0086097717285156, "learning_rate": 0.00018972583696743285, "loss": 1.039, "step": 2311 }, { "epoch": 0.7423342430566704, "grad_norm": 1.0016741752624512, "learning_rate": 0.00018971042003973924, "loss": 0.9677, "step": 2312 }, { "epoch": 0.742655321881522, "grad_norm": 1.2029709815979004, "learning_rate": 0.000189694992181103, "loss": 1.062, "step": 2313 }, { "epoch": 0.7429764007063734, "grad_norm": 0.854852020740509, "learning_rate": 0.00018967955339340407, "loss": 0.8209, "step": 2314 }, { "epoch": 0.7432974795312249, "grad_norm": 0.892218828201294, "learning_rate": 0.00018966410367852362, "loss": 1.0757, "step": 2315 }, { "epoch": 0.7436185583560764, "grad_norm": 0.8785187005996704, "learning_rate": 0.00018964864303834406, "loss": 0.9317, "step": 2316 }, { "epoch": 0.7439396371809279, "grad_norm": 0.999880850315094, "learning_rate": 0.0001896331714747493, "loss": 0.8786, "step": 2317 }, { "epoch": 0.7442607160057794, "grad_norm": 1.373669147491455, "learning_rate": 0.0001896176889896245, "loss": 1.0008, "step": 2318 }, { "epoch": 0.7445817948306309, "grad_norm": 1.0708602666854858, "learning_rate": 0.0001896021955848561, "loss": 1.0163, "step": 2319 }, { "epoch": 0.7449028736554825, "grad_norm": 1.0174452066421509, "learning_rate": 0.00018958669126233199, "loss": 0.9382, "step": 2320 }, { "epoch": 0.7452239524803339, "grad_norm": 1.2005442380905151, "learning_rate": 0.0001895711760239413, "loss": 0.8355, "step": 2321 }, { "epoch": 0.7455450313051855, "grad_norm": 1.3386541604995728, "learning_rate": 0.0001895556498715745, "loss": 0.8852, "step": 2322 }, { "epoch": 0.7458661101300369, "grad_norm": 1.0639350414276123, "learning_rate": 0.0001895401128071234, "loss": 0.9128, "step": 2323 }, { "epoch": 0.7461871889548884, "grad_norm": 0.9678146243095398, "learning_rate": 0.00018952456483248119, "loss": 0.9178, "step": 2324 }, { "epoch": 0.7465082677797399, "grad_norm": 1.260991096496582, "learning_rate": 0.00018950900594954227, "loss": 0.9372, "step": 2325 }, { "epoch": 0.7468293466045914, "grad_norm": 1.2211194038391113, "learning_rate": 0.00018949343616020252, "loss": 0.8576, "step": 2326 }, { "epoch": 0.7471504254294429, "grad_norm": 1.1809611320495605, "learning_rate": 0.00018947785546635904, "loss": 0.9619, "step": 2327 }, { "epoch": 0.7474715042542944, "grad_norm": 2.355696678161621, "learning_rate": 0.00018946226386991027, "loss": 1.1663, "step": 2328 }, { "epoch": 0.747792583079146, "grad_norm": 1.0631635189056396, "learning_rate": 0.000189446661372756, "loss": 0.8316, "step": 2329 }, { "epoch": 0.7481136619039974, "grad_norm": 1.7369928359985352, "learning_rate": 0.0001894310479767974, "loss": 0.8755, "step": 2330 }, { "epoch": 0.748434740728849, "grad_norm": 2.2223386764526367, "learning_rate": 0.0001894154236839368, "loss": 1.1898, "step": 2331 }, { "epoch": 0.7487558195537004, "grad_norm": 1.2930457592010498, "learning_rate": 0.00018939978849607814, "loss": 0.8362, "step": 2332 }, { "epoch": 0.749076898378552, "grad_norm": 0.9054352045059204, "learning_rate": 0.0001893841424151264, "loss": 0.8712, "step": 2333 }, { "epoch": 0.7493979772034034, "grad_norm": 1.637449026107788, "learning_rate": 0.000189368485442988, "loss": 1.0388, "step": 2334 }, { "epoch": 0.749719056028255, "grad_norm": 1.306647777557373, "learning_rate": 0.00018935281758157078, "loss": 0.8911, "step": 2335 }, { "epoch": 0.7500401348531064, "grad_norm": 1.2520787715911865, "learning_rate": 0.00018933713883278376, "loss": 0.8615, "step": 2336 }, { "epoch": 0.7503612136779579, "grad_norm": 1.5709820985794067, "learning_rate": 0.0001893214491985374, "loss": 0.8583, "step": 2337 }, { "epoch": 0.7506822925028095, "grad_norm": 1.1838668584823608, "learning_rate": 0.00018930574868074334, "loss": 0.9393, "step": 2338 }, { "epoch": 0.7510033713276609, "grad_norm": 1.7159479856491089, "learning_rate": 0.0001892900372813147, "loss": 0.7978, "step": 2339 }, { "epoch": 0.7513244501525125, "grad_norm": 0.8659077882766724, "learning_rate": 0.00018927431500216586, "loss": 0.8576, "step": 2340 }, { "epoch": 0.7516455289773639, "grad_norm": 1.2940974235534668, "learning_rate": 0.00018925858184521256, "loss": 0.7817, "step": 2341 }, { "epoch": 0.7519666078022155, "grad_norm": 1.3620824813842773, "learning_rate": 0.0001892428378123718, "loss": 1.0294, "step": 2342 }, { "epoch": 0.7522876866270669, "grad_norm": 1.067583680152893, "learning_rate": 0.00018922708290556198, "loss": 0.6811, "step": 2343 }, { "epoch": 0.7526087654519185, "grad_norm": 1.2058768272399902, "learning_rate": 0.0001892113171267027, "loss": 0.8882, "step": 2344 }, { "epoch": 0.7529298442767699, "grad_norm": 1.3467442989349365, "learning_rate": 0.0001891955404777151, "loss": 0.6371, "step": 2345 }, { "epoch": 0.7532509231016215, "grad_norm": 0.8499478697776794, "learning_rate": 0.00018917975296052142, "loss": 0.7051, "step": 2346 }, { "epoch": 0.753572001926473, "grad_norm": 1.8910988569259644, "learning_rate": 0.00018916395457704534, "loss": 0.7791, "step": 2347 }, { "epoch": 0.7538930807513244, "grad_norm": 1.0912384986877441, "learning_rate": 0.00018914814532921187, "loss": 0.6608, "step": 2348 }, { "epoch": 0.754214159576176, "grad_norm": 0.564804196357727, "learning_rate": 0.00018913232521894732, "loss": 0.4543, "step": 2349 }, { "epoch": 0.7545352384010274, "grad_norm": 0.8918449878692627, "learning_rate": 0.00018911649424817933, "loss": 0.4786, "step": 2350 }, { "epoch": 0.754856317225879, "grad_norm": 1.0368508100509644, "learning_rate": 0.0001891006524188368, "loss": 1.2446, "step": 2351 }, { "epoch": 0.7551773960507304, "grad_norm": 1.0669269561767578, "learning_rate": 0.00018908479973285005, "loss": 1.1619, "step": 2352 }, { "epoch": 0.755498474875582, "grad_norm": 1.34662663936615, "learning_rate": 0.00018906893619215066, "loss": 0.9378, "step": 2353 }, { "epoch": 0.7558195537004334, "grad_norm": 1.1142168045043945, "learning_rate": 0.0001890530617986716, "loss": 0.6657, "step": 2354 }, { "epoch": 0.756140632525285, "grad_norm": 1.00096595287323, "learning_rate": 0.00018903717655434707, "loss": 0.6454, "step": 2355 }, { "epoch": 0.7564617113501365, "grad_norm": 1.4639570713043213, "learning_rate": 0.00018902128046111266, "loss": 0.6425, "step": 2356 }, { "epoch": 0.756782790174988, "grad_norm": 1.1114808320999146, "learning_rate": 0.00018900537352090524, "loss": 0.7002, "step": 2357 }, { "epoch": 0.7571038689998395, "grad_norm": 1.290031909942627, "learning_rate": 0.00018898945573566308, "loss": 0.8956, "step": 2358 }, { "epoch": 0.7574249478246909, "grad_norm": 1.2316958904266357, "learning_rate": 0.00018897352710732564, "loss": 0.8771, "step": 2359 }, { "epoch": 0.7577460266495425, "grad_norm": 1.4026559591293335, "learning_rate": 0.00018895758763783383, "loss": 1.0492, "step": 2360 }, { "epoch": 0.7580671054743939, "grad_norm": 0.8219192624092102, "learning_rate": 0.00018894163732912977, "loss": 0.8172, "step": 2361 }, { "epoch": 0.7583881842992455, "grad_norm": 0.9561658501625061, "learning_rate": 0.000188925676183157, "loss": 0.8669, "step": 2362 }, { "epoch": 0.7587092631240969, "grad_norm": 0.9221644997596741, "learning_rate": 0.00018890970420186033, "loss": 0.701, "step": 2363 }, { "epoch": 0.7590303419489485, "grad_norm": 1.0757882595062256, "learning_rate": 0.0001888937213871859, "loss": 0.7451, "step": 2364 }, { "epoch": 0.7593514207738, "grad_norm": 1.2633696794509888, "learning_rate": 0.00018887772774108116, "loss": 0.9686, "step": 2365 }, { "epoch": 0.7596724995986515, "grad_norm": 1.1361249685287476, "learning_rate": 0.0001888617232654949, "loss": 1.032, "step": 2366 }, { "epoch": 0.759993578423503, "grad_norm": 1.2562459707260132, "learning_rate": 0.00018884570796237718, "loss": 0.9559, "step": 2367 }, { "epoch": 0.7603146572483545, "grad_norm": 0.9838060140609741, "learning_rate": 0.00018882968183367947, "loss": 0.9249, "step": 2368 }, { "epoch": 0.760635736073206, "grad_norm": 0.8353525996208191, "learning_rate": 0.00018881364488135448, "loss": 0.8962, "step": 2369 }, { "epoch": 0.7609568148980574, "grad_norm": 1.24613618850708, "learning_rate": 0.00018879759710735622, "loss": 0.927, "step": 2370 }, { "epoch": 0.761277893722909, "grad_norm": 1.1252332925796509, "learning_rate": 0.00018878153851364013, "loss": 0.9016, "step": 2371 }, { "epoch": 0.7615989725477604, "grad_norm": 0.9512665271759033, "learning_rate": 0.00018876546910216288, "loss": 0.9949, "step": 2372 }, { "epoch": 0.761920051372612, "grad_norm": 1.5389331579208374, "learning_rate": 0.00018874938887488248, "loss": 1.0535, "step": 2373 }, { "epoch": 0.7622411301974634, "grad_norm": 0.9745280146598816, "learning_rate": 0.00018873329783375824, "loss": 0.9594, "step": 2374 }, { "epoch": 0.762562209022315, "grad_norm": 1.1034287214279175, "learning_rate": 0.0001887171959807508, "loss": 0.9506, "step": 2375 }, { "epoch": 0.7628832878471665, "grad_norm": 1.1382795572280884, "learning_rate": 0.00018870108331782217, "loss": 0.771, "step": 2376 }, { "epoch": 0.763204366672018, "grad_norm": 1.042029619216919, "learning_rate": 0.0001886849598469356, "loss": 0.8624, "step": 2377 }, { "epoch": 0.7635254454968695, "grad_norm": 1.0116221904754639, "learning_rate": 0.00018866882557005567, "loss": 0.9996, "step": 2378 }, { "epoch": 0.763846524321721, "grad_norm": 0.6933526992797852, "learning_rate": 0.00018865268048914828, "loss": 0.657, "step": 2379 }, { "epoch": 0.7641676031465725, "grad_norm": 1.2314203977584839, "learning_rate": 0.0001886365246061807, "loss": 1.1146, "step": 2380 }, { "epoch": 0.764488681971424, "grad_norm": 1.3230317831039429, "learning_rate": 0.00018862035792312147, "loss": 0.8496, "step": 2381 }, { "epoch": 0.7648097607962755, "grad_norm": 0.9279382824897766, "learning_rate": 0.00018860418044194045, "loss": 0.8664, "step": 2382 }, { "epoch": 0.7651308396211269, "grad_norm": 1.3693788051605225, "learning_rate": 0.00018858799216460881, "loss": 0.935, "step": 2383 }, { "epoch": 0.7654519184459785, "grad_norm": 1.4639575481414795, "learning_rate": 0.00018857179309309901, "loss": 0.9289, "step": 2384 }, { "epoch": 0.76577299727083, "grad_norm": 1.192893147468567, "learning_rate": 0.00018855558322938493, "loss": 0.841, "step": 2385 }, { "epoch": 0.7660940760956815, "grad_norm": 2.4525952339172363, "learning_rate": 0.0001885393625754416, "loss": 0.9004, "step": 2386 }, { "epoch": 0.766415154920533, "grad_norm": 0.7626288533210754, "learning_rate": 0.00018852313113324552, "loss": 0.5966, "step": 2387 }, { "epoch": 0.7667362337453845, "grad_norm": 1.5377343893051147, "learning_rate": 0.00018850688890477445, "loss": 0.9392, "step": 2388 }, { "epoch": 0.767057312570236, "grad_norm": 1.555207371711731, "learning_rate": 0.00018849063589200743, "loss": 1.0675, "step": 2389 }, { "epoch": 0.7673783913950875, "grad_norm": 1.0154179334640503, "learning_rate": 0.00018847437209692486, "loss": 0.6848, "step": 2390 }, { "epoch": 0.767699470219939, "grad_norm": 1.9315353631973267, "learning_rate": 0.0001884580975215084, "loss": 0.8662, "step": 2391 }, { "epoch": 0.7680205490447904, "grad_norm": 1.4244552850723267, "learning_rate": 0.0001884418121677411, "loss": 1.0924, "step": 2392 }, { "epoch": 0.768341627869642, "grad_norm": 1.190861701965332, "learning_rate": 0.00018842551603760724, "loss": 0.8864, "step": 2393 }, { "epoch": 0.7686627066944935, "grad_norm": 0.8999507427215576, "learning_rate": 0.0001884092091330925, "loss": 0.6656, "step": 2394 }, { "epoch": 0.768983785519345, "grad_norm": 1.2089276313781738, "learning_rate": 0.00018839289145618378, "loss": 0.7695, "step": 2395 }, { "epoch": 0.7693048643441965, "grad_norm": 0.7257319092750549, "learning_rate": 0.00018837656300886937, "loss": 0.5922, "step": 2396 }, { "epoch": 0.769625943169048, "grad_norm": 0.9687317609786987, "learning_rate": 0.00018836022379313883, "loss": 0.6401, "step": 2397 }, { "epoch": 0.7699470219938995, "grad_norm": 1.0825469493865967, "learning_rate": 0.000188343873810983, "loss": 0.8336, "step": 2398 }, { "epoch": 0.770268100818751, "grad_norm": 1.286629319190979, "learning_rate": 0.00018832751306439418, "loss": 0.9844, "step": 2399 }, { "epoch": 0.7705891796436025, "grad_norm": 0.8030791878700256, "learning_rate": 0.0001883111415553658, "loss": 0.5217, "step": 2400 }, { "epoch": 0.770910258468454, "grad_norm": 1.3023558855056763, "learning_rate": 0.00018829475928589271, "loss": 1.1508, "step": 2401 }, { "epoch": 0.7712313372933055, "grad_norm": 1.1620362997055054, "learning_rate": 0.00018827836625797103, "loss": 1.1917, "step": 2402 }, { "epoch": 0.7715524161181571, "grad_norm": 1.134852647781372, "learning_rate": 0.00018826196247359817, "loss": 0.8599, "step": 2403 }, { "epoch": 0.7718734949430085, "grad_norm": 0.962922990322113, "learning_rate": 0.00018824554793477294, "loss": 0.708, "step": 2404 }, { "epoch": 0.77219457376786, "grad_norm": 1.1392818689346313, "learning_rate": 0.00018822912264349534, "loss": 0.6393, "step": 2405 }, { "epoch": 0.7725156525927115, "grad_norm": 1.5677722692489624, "learning_rate": 0.00018821268660176678, "loss": 0.7293, "step": 2406 }, { "epoch": 0.772836731417563, "grad_norm": 1.1908127069473267, "learning_rate": 0.00018819623981158995, "loss": 0.8353, "step": 2407 }, { "epoch": 0.7731578102424145, "grad_norm": 0.9969918727874756, "learning_rate": 0.00018817978227496883, "loss": 0.9109, "step": 2408 }, { "epoch": 0.773478889067266, "grad_norm": 0.8602403402328491, "learning_rate": 0.0001881633139939087, "loss": 0.9064, "step": 2409 }, { "epoch": 0.7737999678921175, "grad_norm": 0.8332744836807251, "learning_rate": 0.0001881468349704162, "loss": 0.8529, "step": 2410 }, { "epoch": 0.774121046716969, "grad_norm": 1.0467489957809448, "learning_rate": 0.0001881303452064992, "loss": 0.8339, "step": 2411 }, { "epoch": 0.7744421255418206, "grad_norm": 1.618408441543579, "learning_rate": 0.00018811384470416705, "loss": 0.7408, "step": 2412 }, { "epoch": 0.774763204366672, "grad_norm": 1.06851327419281, "learning_rate": 0.00018809733346543013, "loss": 0.857, "step": 2413 }, { "epoch": 0.7750842831915236, "grad_norm": 1.3492529392242432, "learning_rate": 0.00018808081149230036, "loss": 0.8873, "step": 2414 }, { "epoch": 0.775405362016375, "grad_norm": 1.1693357229232788, "learning_rate": 0.00018806427878679093, "loss": 1.1389, "step": 2415 }, { "epoch": 0.7757264408412265, "grad_norm": 1.2862006425857544, "learning_rate": 0.0001880477353509162, "loss": 0.9218, "step": 2416 }, { "epoch": 0.776047519666078, "grad_norm": 0.9942718744277954, "learning_rate": 0.00018803118118669202, "loss": 0.7227, "step": 2417 }, { "epoch": 0.7763685984909295, "grad_norm": 1.2188372611999512, "learning_rate": 0.00018801461629613546, "loss": 0.9224, "step": 2418 }, { "epoch": 0.776689677315781, "grad_norm": 1.206196904182434, "learning_rate": 0.00018799804068126485, "loss": 0.9985, "step": 2419 }, { "epoch": 0.7770107561406325, "grad_norm": 1.327648639678955, "learning_rate": 0.0001879814543440999, "loss": 0.932, "step": 2420 }, { "epoch": 0.7773318349654841, "grad_norm": 1.5388193130493164, "learning_rate": 0.00018796485728666165, "loss": 0.9052, "step": 2421 }, { "epoch": 0.7776529137903355, "grad_norm": 1.7653731107711792, "learning_rate": 0.00018794824951097236, "loss": 1.032, "step": 2422 }, { "epoch": 0.7779739926151871, "grad_norm": 1.1087538003921509, "learning_rate": 0.00018793163101905563, "loss": 1.283, "step": 2423 }, { "epoch": 0.7782950714400385, "grad_norm": 1.3659240007400513, "learning_rate": 0.0001879150018129364, "loss": 1.0172, "step": 2424 }, { "epoch": 0.7786161502648901, "grad_norm": 0.9709173440933228, "learning_rate": 0.00018789836189464086, "loss": 0.9285, "step": 2425 }, { "epoch": 0.7789372290897415, "grad_norm": 1.2485631704330444, "learning_rate": 0.00018788171126619653, "loss": 0.9621, "step": 2426 }, { "epoch": 0.779258307914593, "grad_norm": 1.27434504032135, "learning_rate": 0.0001878650499296323, "loss": 0.984, "step": 2427 }, { "epoch": 0.7795793867394445, "grad_norm": 1.2106627225875854, "learning_rate": 0.00018784837788697823, "loss": 0.9109, "step": 2428 }, { "epoch": 0.779900465564296, "grad_norm": 0.9945119023323059, "learning_rate": 0.00018783169514026578, "loss": 1.0478, "step": 2429 }, { "epoch": 0.7802215443891475, "grad_norm": 2.2942955493927, "learning_rate": 0.00018781500169152773, "loss": 1.0132, "step": 2430 }, { "epoch": 0.780542623213999, "grad_norm": 1.0083764791488647, "learning_rate": 0.00018779829754279805, "loss": 1.0231, "step": 2431 }, { "epoch": 0.7808637020388506, "grad_norm": 1.2520384788513184, "learning_rate": 0.00018778158269611218, "loss": 0.8075, "step": 2432 }, { "epoch": 0.781184780863702, "grad_norm": 1.1380735635757446, "learning_rate": 0.00018776485715350671, "loss": 0.8545, "step": 2433 }, { "epoch": 0.7815058596885536, "grad_norm": 1.0106878280639648, "learning_rate": 0.00018774812091701962, "loss": 0.8459, "step": 2434 }, { "epoch": 0.781826938513405, "grad_norm": 1.134502649307251, "learning_rate": 0.00018773137398869015, "loss": 0.9232, "step": 2435 }, { "epoch": 0.7821480173382566, "grad_norm": 1.356572151184082, "learning_rate": 0.00018771461637055888, "loss": 0.9854, "step": 2436 }, { "epoch": 0.782469096163108, "grad_norm": 1.156056523323059, "learning_rate": 0.0001876978480646677, "loss": 0.9877, "step": 2437 }, { "epoch": 0.7827901749879596, "grad_norm": 1.1275618076324463, "learning_rate": 0.00018768106907305973, "loss": 0.7909, "step": 2438 }, { "epoch": 0.783111253812811, "grad_norm": 0.8512035012245178, "learning_rate": 0.00018766427939777945, "loss": 0.9474, "step": 2439 }, { "epoch": 0.7834323326376625, "grad_norm": 1.328540563583374, "learning_rate": 0.00018764747904087263, "loss": 0.9969, "step": 2440 }, { "epoch": 0.7837534114625141, "grad_norm": 0.8241159319877625, "learning_rate": 0.00018763066800438636, "loss": 0.5979, "step": 2441 }, { "epoch": 0.7840744902873655, "grad_norm": 1.58833646774292, "learning_rate": 0.00018761384629036902, "loss": 0.9713, "step": 2442 }, { "epoch": 0.7843955691122171, "grad_norm": 1.2584527730941772, "learning_rate": 0.00018759701390087027, "loss": 1.0281, "step": 2443 }, { "epoch": 0.7847166479370685, "grad_norm": 0.8919162154197693, "learning_rate": 0.0001875801708379411, "loss": 0.9197, "step": 2444 }, { "epoch": 0.7850377267619201, "grad_norm": 2.073561906814575, "learning_rate": 0.00018756331710363374, "loss": 0.9238, "step": 2445 }, { "epoch": 0.7853588055867715, "grad_norm": 0.9129263162612915, "learning_rate": 0.0001875464527000018, "loss": 0.8524, "step": 2446 }, { "epoch": 0.7856798844116231, "grad_norm": 1.1756608486175537, "learning_rate": 0.00018752957762910018, "loss": 0.9372, "step": 2447 }, { "epoch": 0.7860009632364745, "grad_norm": 1.1006115674972534, "learning_rate": 0.000187512691892985, "loss": 0.8847, "step": 2448 }, { "epoch": 0.786322042061326, "grad_norm": 1.1314774751663208, "learning_rate": 0.0001874957954937138, "loss": 0.724, "step": 2449 }, { "epoch": 0.7866431208861776, "grad_norm": 0.5091024041175842, "learning_rate": 0.0001874788884333453, "loss": 0.4491, "step": 2450 }, { "epoch": 0.786964199711029, "grad_norm": 0.8892128467559814, "learning_rate": 0.00018746197071393958, "loss": 1.5905, "step": 2451 }, { "epoch": 0.7872852785358806, "grad_norm": 0.9708239436149597, "learning_rate": 0.00018744504233755805, "loss": 1.2197, "step": 2452 }, { "epoch": 0.787606357360732, "grad_norm": 1.2645493745803833, "learning_rate": 0.00018742810330626337, "loss": 0.8199, "step": 2453 }, { "epoch": 0.7879274361855836, "grad_norm": 0.9297351837158203, "learning_rate": 0.00018741115362211949, "loss": 0.6392, "step": 2454 }, { "epoch": 0.788248515010435, "grad_norm": 1.3193695545196533, "learning_rate": 0.0001873941932871917, "loss": 0.7132, "step": 2455 }, { "epoch": 0.7885695938352866, "grad_norm": 1.7561746835708618, "learning_rate": 0.00018737722230354655, "loss": 0.607, "step": 2456 }, { "epoch": 0.788890672660138, "grad_norm": 1.2228854894638062, "learning_rate": 0.00018736024067325188, "loss": 1.0434, "step": 2457 }, { "epoch": 0.7892117514849896, "grad_norm": 1.381565809249878, "learning_rate": 0.0001873432483983769, "loss": 1.0155, "step": 2458 }, { "epoch": 0.7895328303098411, "grad_norm": 1.2549446821212769, "learning_rate": 0.00018732624548099204, "loss": 0.6954, "step": 2459 }, { "epoch": 0.7898539091346926, "grad_norm": 1.154805064201355, "learning_rate": 0.00018730923192316902, "loss": 0.6555, "step": 2460 }, { "epoch": 0.7901749879595441, "grad_norm": 0.7833784818649292, "learning_rate": 0.00018729220772698097, "loss": 0.8146, "step": 2461 }, { "epoch": 0.7904960667843955, "grad_norm": 0.9661710858345032, "learning_rate": 0.0001872751728945022, "loss": 0.9448, "step": 2462 }, { "epoch": 0.7908171456092471, "grad_norm": 0.9815911054611206, "learning_rate": 0.00018725812742780834, "loss": 0.9413, "step": 2463 }, { "epoch": 0.7911382244340985, "grad_norm": 0.823572039604187, "learning_rate": 0.0001872410713289763, "loss": 0.8122, "step": 2464 }, { "epoch": 0.7914593032589501, "grad_norm": 0.8957955837249756, "learning_rate": 0.0001872240046000844, "loss": 0.8169, "step": 2465 }, { "epoch": 0.7917803820838015, "grad_norm": 1.1782604455947876, "learning_rate": 0.00018720692724321207, "loss": 0.8323, "step": 2466 }, { "epoch": 0.7921014609086531, "grad_norm": 0.887837827205658, "learning_rate": 0.0001871898392604402, "loss": 0.8662, "step": 2467 }, { "epoch": 0.7924225397335046, "grad_norm": 0.903867781162262, "learning_rate": 0.0001871727406538509, "loss": 1.0044, "step": 2468 }, { "epoch": 0.7927436185583561, "grad_norm": 0.8444812297821045, "learning_rate": 0.00018715563142552758, "loss": 0.9237, "step": 2469 }, { "epoch": 0.7930646973832076, "grad_norm": 1.5705596208572388, "learning_rate": 0.00018713851157755492, "loss": 0.9272, "step": 2470 }, { "epoch": 0.7933857762080591, "grad_norm": 0.9843676090240479, "learning_rate": 0.00018712138111201895, "loss": 0.9544, "step": 2471 }, { "epoch": 0.7937068550329106, "grad_norm": 1.2598280906677246, "learning_rate": 0.00018710424003100698, "loss": 0.8223, "step": 2472 }, { "epoch": 0.794027933857762, "grad_norm": 0.903620183467865, "learning_rate": 0.00018708708833660754, "loss": 0.6692, "step": 2473 }, { "epoch": 0.7943490126826136, "grad_norm": 1.244788408279419, "learning_rate": 0.00018706992603091058, "loss": 1.0025, "step": 2474 }, { "epoch": 0.794670091507465, "grad_norm": 1.1981257200241089, "learning_rate": 0.00018705275311600722, "loss": 0.8593, "step": 2475 }, { "epoch": 0.7949911703323166, "grad_norm": 0.8418868184089661, "learning_rate": 0.00018703556959398998, "loss": 0.7955, "step": 2476 }, { "epoch": 0.7953122491571681, "grad_norm": 0.8239766955375671, "learning_rate": 0.0001870183754669526, "loss": 0.796, "step": 2477 }, { "epoch": 0.7956333279820196, "grad_norm": 1.1803929805755615, "learning_rate": 0.0001870011707369901, "loss": 1.0684, "step": 2478 }, { "epoch": 0.7959544068068711, "grad_norm": 1.2590785026550293, "learning_rate": 0.0001869839554061988, "loss": 0.8635, "step": 2479 }, { "epoch": 0.7962754856317226, "grad_norm": 1.4843522310256958, "learning_rate": 0.00018696672947667646, "loss": 0.7823, "step": 2480 }, { "epoch": 0.7965965644565741, "grad_norm": 0.9807465076446533, "learning_rate": 0.0001869494929505219, "loss": 0.8197, "step": 2481 }, { "epoch": 0.7969176432814256, "grad_norm": 1.2640116214752197, "learning_rate": 0.0001869322458298354, "loss": 0.9056, "step": 2482 }, { "epoch": 0.7972387221062771, "grad_norm": 1.0154175758361816, "learning_rate": 0.0001869149881167184, "loss": 0.8573, "step": 2483 }, { "epoch": 0.7975598009311285, "grad_norm": 1.578016996383667, "learning_rate": 0.00018689771981327376, "loss": 0.9812, "step": 2484 }, { "epoch": 0.7978808797559801, "grad_norm": 0.9746622443199158, "learning_rate": 0.00018688044092160551, "loss": 0.8187, "step": 2485 }, { "epoch": 0.7982019585808315, "grad_norm": 1.5574668645858765, "learning_rate": 0.00018686315144381913, "loss": 0.7508, "step": 2486 }, { "epoch": 0.7985230374056831, "grad_norm": 0.9885238409042358, "learning_rate": 0.00018684585138202122, "loss": 0.8462, "step": 2487 }, { "epoch": 0.7988441162305346, "grad_norm": 1.220053791999817, "learning_rate": 0.00018682854073831973, "loss": 0.9497, "step": 2488 }, { "epoch": 0.7991651950553861, "grad_norm": 0.8417456746101379, "learning_rate": 0.00018681121951482393, "loss": 0.6397, "step": 2489 }, { "epoch": 0.7994862738802376, "grad_norm": 1.1695717573165894, "learning_rate": 0.00018679388771364436, "loss": 0.6754, "step": 2490 }, { "epoch": 0.7998073527050891, "grad_norm": 1.0518317222595215, "learning_rate": 0.00018677654533689287, "loss": 0.8917, "step": 2491 }, { "epoch": 0.8001284315299406, "grad_norm": 1.123487114906311, "learning_rate": 0.0001867591923866825, "loss": 0.977, "step": 2492 }, { "epoch": 0.8004495103547921, "grad_norm": 1.3612045049667358, "learning_rate": 0.00018674182886512774, "loss": 0.8039, "step": 2493 }, { "epoch": 0.8007705891796436, "grad_norm": 1.34379243850708, "learning_rate": 0.00018672445477434425, "loss": 0.8616, "step": 2494 }, { "epoch": 0.801091668004495, "grad_norm": 0.8489518761634827, "learning_rate": 0.000186707070116449, "loss": 0.7731, "step": 2495 }, { "epoch": 0.8014127468293466, "grad_norm": 1.2689493894577026, "learning_rate": 0.00018668967489356028, "loss": 0.7308, "step": 2496 }, { "epoch": 0.8017338256541982, "grad_norm": 1.7729278802871704, "learning_rate": 0.00018667226910779765, "loss": 0.6356, "step": 2497 }, { "epoch": 0.8020549044790496, "grad_norm": 0.9951494932174683, "learning_rate": 0.00018665485276128188, "loss": 0.9106, "step": 2498 }, { "epoch": 0.8023759833039011, "grad_norm": 1.5119075775146484, "learning_rate": 0.00018663742585613518, "loss": 0.7492, "step": 2499 }, { "epoch": 0.8026970621287526, "grad_norm": 1.1886110305786133, "learning_rate": 0.00018661998839448094, "loss": 0.5986, "step": 2500 }, { "epoch": 0.8030181409536041, "grad_norm": 1.4970862865447998, "learning_rate": 0.00018660254037844388, "loss": 1.4061, "step": 2501 }, { "epoch": 0.8033392197784556, "grad_norm": 1.2722395658493042, "learning_rate": 0.00018658508181014995, "loss": 1.2115, "step": 2502 }, { "epoch": 0.8036602986033071, "grad_norm": 1.3451300859451294, "learning_rate": 0.00018656761269172643, "loss": 0.9685, "step": 2503 }, { "epoch": 0.8039813774281586, "grad_norm": 1.2562284469604492, "learning_rate": 0.0001865501330253019, "loss": 0.7432, "step": 2504 }, { "epoch": 0.8043024562530101, "grad_norm": 1.0537221431732178, "learning_rate": 0.00018653264281300622, "loss": 0.7564, "step": 2505 }, { "epoch": 0.8046235350778617, "grad_norm": 1.2182271480560303, "learning_rate": 0.00018651514205697046, "loss": 0.7227, "step": 2506 }, { "epoch": 0.8049446139027131, "grad_norm": 0.9667792320251465, "learning_rate": 0.00018649763075932708, "loss": 0.6023, "step": 2507 }, { "epoch": 0.8052656927275647, "grad_norm": 1.413209319114685, "learning_rate": 0.00018648010892220978, "loss": 0.9355, "step": 2508 }, { "epoch": 0.8055867715524161, "grad_norm": 1.3829725980758667, "learning_rate": 0.0001864625765477535, "loss": 0.7942, "step": 2509 }, { "epoch": 0.8059078503772676, "grad_norm": 0.8415265083312988, "learning_rate": 0.00018644503363809457, "loss": 0.7862, "step": 2510 }, { "epoch": 0.8062289292021191, "grad_norm": 1.514174222946167, "learning_rate": 0.0001864274801953705, "loss": 1.0991, "step": 2511 }, { "epoch": 0.8065500080269706, "grad_norm": 1.3786250352859497, "learning_rate": 0.0001864099162217201, "loss": 0.8292, "step": 2512 }, { "epoch": 0.8068710868518221, "grad_norm": 1.0749139785766602, "learning_rate": 0.00018639234171928353, "loss": 0.9804, "step": 2513 }, { "epoch": 0.8071921656766736, "grad_norm": 0.8056579828262329, "learning_rate": 0.0001863747566902022, "loss": 0.8092, "step": 2514 }, { "epoch": 0.8075132445015252, "grad_norm": 1.0548845529556274, "learning_rate": 0.00018635716113661873, "loss": 0.7606, "step": 2515 }, { "epoch": 0.8078343233263766, "grad_norm": 0.9919301271438599, "learning_rate": 0.00018633955506067718, "loss": 1.0425, "step": 2516 }, { "epoch": 0.8081554021512282, "grad_norm": 0.918125569820404, "learning_rate": 0.0001863219384645227, "loss": 0.8707, "step": 2517 }, { "epoch": 0.8084764809760796, "grad_norm": 1.2536126375198364, "learning_rate": 0.0001863043113503019, "loss": 1.1534, "step": 2518 }, { "epoch": 0.8087975598009312, "grad_norm": 1.1653470993041992, "learning_rate": 0.0001862866737201625, "loss": 0.8979, "step": 2519 }, { "epoch": 0.8091186386257826, "grad_norm": 1.016139030456543, "learning_rate": 0.00018626902557625368, "loss": 0.7874, "step": 2520 }, { "epoch": 0.8094397174506341, "grad_norm": 1.2816565036773682, "learning_rate": 0.00018625136692072575, "loss": 0.8338, "step": 2521 }, { "epoch": 0.8097607962754856, "grad_norm": 1.5095349550247192, "learning_rate": 0.0001862336977557304, "loss": 1.0349, "step": 2522 }, { "epoch": 0.8100818751003371, "grad_norm": 1.753609299659729, "learning_rate": 0.00018621601808342056, "loss": 0.932, "step": 2523 }, { "epoch": 0.8104029539251887, "grad_norm": 1.1441911458969116, "learning_rate": 0.00018619832790595043, "loss": 1.0346, "step": 2524 }, { "epoch": 0.8107240327500401, "grad_norm": 1.1393861770629883, "learning_rate": 0.0001861806272254755, "loss": 0.8634, "step": 2525 }, { "epoch": 0.8110451115748917, "grad_norm": 0.9023036956787109, "learning_rate": 0.00018616291604415258, "loss": 0.7442, "step": 2526 }, { "epoch": 0.8113661903997431, "grad_norm": 0.9922143220901489, "learning_rate": 0.0001861451943641397, "loss": 0.893, "step": 2527 }, { "epoch": 0.8116872692245947, "grad_norm": 1.2461748123168945, "learning_rate": 0.00018612746218759618, "loss": 1.0521, "step": 2528 }, { "epoch": 0.8120083480494461, "grad_norm": 1.3164596557617188, "learning_rate": 0.00018610971951668265, "loss": 0.9787, "step": 2529 }, { "epoch": 0.8123294268742977, "grad_norm": 1.3357511758804321, "learning_rate": 0.000186091966353561, "loss": 0.8539, "step": 2530 }, { "epoch": 0.8126505056991491, "grad_norm": 0.9411560297012329, "learning_rate": 0.0001860742027003944, "loss": 1.0298, "step": 2531 }, { "epoch": 0.8129715845240006, "grad_norm": 0.8316774368286133, "learning_rate": 0.00018605642855934725, "loss": 0.8219, "step": 2532 }, { "epoch": 0.8132926633488522, "grad_norm": 1.3453702926635742, "learning_rate": 0.00018603864393258534, "loss": 0.9055, "step": 2533 }, { "epoch": 0.8136137421737036, "grad_norm": 0.9413175582885742, "learning_rate": 0.00018602084882227566, "loss": 0.9423, "step": 2534 }, { "epoch": 0.8139348209985552, "grad_norm": 0.9945281744003296, "learning_rate": 0.00018600304323058647, "loss": 0.9676, "step": 2535 }, { "epoch": 0.8142558998234066, "grad_norm": 1.2609598636627197, "learning_rate": 0.00018598522715968736, "loss": 0.877, "step": 2536 }, { "epoch": 0.8145769786482582, "grad_norm": 1.2044451236724854, "learning_rate": 0.0001859674006117491, "loss": 1.0152, "step": 2537 }, { "epoch": 0.8148980574731096, "grad_norm": 1.1789966821670532, "learning_rate": 0.00018594956358894388, "loss": 0.7836, "step": 2538 }, { "epoch": 0.8152191362979612, "grad_norm": 2.6686882972717285, "learning_rate": 0.00018593171609344503, "loss": 0.7848, "step": 2539 }, { "epoch": 0.8155402151228126, "grad_norm": 0.900510311126709, "learning_rate": 0.00018591385812742725, "loss": 0.8703, "step": 2540 }, { "epoch": 0.8158612939476642, "grad_norm": 0.9134002923965454, "learning_rate": 0.00018589598969306645, "loss": 0.7962, "step": 2541 }, { "epoch": 0.8161823727725157, "grad_norm": 1.1984633207321167, "learning_rate": 0.00018587811079253985, "loss": 0.984, "step": 2542 }, { "epoch": 0.8165034515973671, "grad_norm": 1.6189078092575073, "learning_rate": 0.00018586022142802597, "loss": 1.013, "step": 2543 }, { "epoch": 0.8168245304222187, "grad_norm": 1.4700372219085693, "learning_rate": 0.00018584232160170452, "loss": 1.1043, "step": 2544 }, { "epoch": 0.8171456092470701, "grad_norm": 1.035675287246704, "learning_rate": 0.0001858244113157566, "loss": 0.8351, "step": 2545 }, { "epoch": 0.8174666880719217, "grad_norm": 1.505395531654358, "learning_rate": 0.00018580649057236447, "loss": 0.8241, "step": 2546 }, { "epoch": 0.8177877668967731, "grad_norm": 1.6375924348831177, "learning_rate": 0.00018578855937371173, "loss": 0.6124, "step": 2547 }, { "epoch": 0.8181088457216247, "grad_norm": 0.5977188944816589, "learning_rate": 0.0001857706177219833, "loss": 0.5429, "step": 2548 }, { "epoch": 0.8184299245464761, "grad_norm": 0.6796979308128357, "learning_rate": 0.00018575266561936523, "loss": 0.5959, "step": 2549 }, { "epoch": 0.8187510033713277, "grad_norm": 0.6622598171234131, "learning_rate": 0.00018573470306804498, "loss": 0.5182, "step": 2550 }, { "epoch": 0.8190720821961791, "grad_norm": 0.9923622012138367, "learning_rate": 0.00018571673007021123, "loss": 1.2836, "step": 2551 }, { "epoch": 0.8193931610210307, "grad_norm": 1.1485919952392578, "learning_rate": 0.00018569874662805393, "loss": 1.3207, "step": 2552 }, { "epoch": 0.8197142398458822, "grad_norm": 0.9416031837463379, "learning_rate": 0.0001856807527437643, "loss": 0.9675, "step": 2553 }, { "epoch": 0.8200353186707336, "grad_norm": 1.0910120010375977, "learning_rate": 0.00018566274841953483, "loss": 0.8907, "step": 2554 }, { "epoch": 0.8203563974955852, "grad_norm": 1.061464786529541, "learning_rate": 0.00018564473365755935, "loss": 0.6902, "step": 2555 }, { "epoch": 0.8206774763204366, "grad_norm": 1.0407187938690186, "learning_rate": 0.00018562670846003284, "loss": 0.6008, "step": 2556 }, { "epoch": 0.8209985551452882, "grad_norm": 1.0993876457214355, "learning_rate": 0.0001856086728291516, "loss": 0.7588, "step": 2557 }, { "epoch": 0.8213196339701396, "grad_norm": 1.2485312223434448, "learning_rate": 0.00018559062676711332, "loss": 0.9553, "step": 2558 }, { "epoch": 0.8216407127949912, "grad_norm": 1.184083342552185, "learning_rate": 0.00018557257027611675, "loss": 1.0036, "step": 2559 }, { "epoch": 0.8219617916198426, "grad_norm": 1.219896674156189, "learning_rate": 0.00018555450335836206, "loss": 0.9131, "step": 2560 }, { "epoch": 0.8222828704446942, "grad_norm": 0.8921108841896057, "learning_rate": 0.00018553642601605068, "loss": 0.7521, "step": 2561 }, { "epoch": 0.8226039492695457, "grad_norm": 1.3684065341949463, "learning_rate": 0.0001855183382513852, "loss": 0.8931, "step": 2562 }, { "epoch": 0.8229250280943972, "grad_norm": 0.8778723478317261, "learning_rate": 0.00018550024006656966, "loss": 0.8721, "step": 2563 }, { "epoch": 0.8232461069192487, "grad_norm": 1.4915300607681274, "learning_rate": 0.00018548213146380918, "loss": 0.9267, "step": 2564 }, { "epoch": 0.8235671857441001, "grad_norm": 1.0653166770935059, "learning_rate": 0.0001854640124453103, "loss": 0.9316, "step": 2565 }, { "epoch": 0.8238882645689517, "grad_norm": 1.8792741298675537, "learning_rate": 0.00018544588301328075, "loss": 0.9906, "step": 2566 }, { "epoch": 0.8242093433938031, "grad_norm": 1.2481502294540405, "learning_rate": 0.0001854277431699295, "loss": 0.9631, "step": 2567 }, { "epoch": 0.8245304222186547, "grad_norm": 1.4939336776733398, "learning_rate": 0.00018540959291746693, "loss": 0.9405, "step": 2568 }, { "epoch": 0.8248515010435061, "grad_norm": 1.0083293914794922, "learning_rate": 0.0001853914322581045, "loss": 0.9924, "step": 2569 }, { "epoch": 0.8251725798683577, "grad_norm": 1.3089494705200195, "learning_rate": 0.00018537326119405506, "loss": 0.899, "step": 2570 }, { "epoch": 0.8254936586932092, "grad_norm": 1.0954056978225708, "learning_rate": 0.00018535507972753274, "loss": 0.977, "step": 2571 }, { "epoch": 0.8258147375180607, "grad_norm": 0.9227792620658875, "learning_rate": 0.00018533688786075288, "loss": 0.7727, "step": 2572 }, { "epoch": 0.8261358163429122, "grad_norm": 1.0734128952026367, "learning_rate": 0.00018531868559593204, "loss": 1.0625, "step": 2573 }, { "epoch": 0.8264568951677637, "grad_norm": 0.9797103404998779, "learning_rate": 0.00018530047293528819, "loss": 1.0345, "step": 2574 }, { "epoch": 0.8267779739926152, "grad_norm": 1.2002710103988647, "learning_rate": 0.00018528224988104044, "loss": 0.9635, "step": 2575 }, { "epoch": 0.8270990528174667, "grad_norm": 1.1027469635009766, "learning_rate": 0.00018526401643540922, "loss": 0.8992, "step": 2576 }, { "epoch": 0.8274201316423182, "grad_norm": 1.8506336212158203, "learning_rate": 0.00018524577260061627, "loss": 0.9184, "step": 2577 }, { "epoch": 0.8277412104671696, "grad_norm": 1.1233233213424683, "learning_rate": 0.0001852275183788845, "loss": 0.9805, "step": 2578 }, { "epoch": 0.8280622892920212, "grad_norm": 1.0749657154083252, "learning_rate": 0.0001852092537724381, "loss": 0.999, "step": 2579 }, { "epoch": 0.8283833681168727, "grad_norm": 1.4672601222991943, "learning_rate": 0.00018519097878350263, "loss": 1.0324, "step": 2580 }, { "epoch": 0.8287044469417242, "grad_norm": 1.0896575450897217, "learning_rate": 0.00018517269341430476, "loss": 0.9614, "step": 2581 }, { "epoch": 0.8290255257665757, "grad_norm": 1.345746397972107, "learning_rate": 0.00018515439766707262, "loss": 0.9667, "step": 2582 }, { "epoch": 0.8293466045914272, "grad_norm": 0.8869307041168213, "learning_rate": 0.00018513609154403534, "loss": 0.8017, "step": 2583 }, { "epoch": 0.8296676834162787, "grad_norm": 1.2558448314666748, "learning_rate": 0.00018511777504742362, "loss": 0.9269, "step": 2584 }, { "epoch": 0.8299887622411302, "grad_norm": 1.5162900686264038, "learning_rate": 0.00018509944817946922, "loss": 0.9928, "step": 2585 }, { "epoch": 0.8303098410659817, "grad_norm": 1.0953887701034546, "learning_rate": 0.00018508111094240514, "loss": 0.7442, "step": 2586 }, { "epoch": 0.8306309198908332, "grad_norm": 0.9997647404670715, "learning_rate": 0.00018506276333846579, "loss": 0.8279, "step": 2587 }, { "epoch": 0.8309519987156847, "grad_norm": 0.8126912117004395, "learning_rate": 0.00018504440536988673, "loss": 0.6527, "step": 2588 }, { "epoch": 0.8312730775405363, "grad_norm": 0.7459484338760376, "learning_rate": 0.00018502603703890488, "loss": 0.7202, "step": 2589 }, { "epoch": 0.8315941563653877, "grad_norm": 1.0116833448410034, "learning_rate": 0.00018500765834775828, "loss": 0.8612, "step": 2590 }, { "epoch": 0.8319152351902392, "grad_norm": 1.7899558544158936, "learning_rate": 0.00018498926929868642, "loss": 1.0347, "step": 2591 }, { "epoch": 0.8322363140150907, "grad_norm": 1.1006872653961182, "learning_rate": 0.00018497086989392988, "loss": 0.9109, "step": 2592 }, { "epoch": 0.8325573928399422, "grad_norm": 2.0811855792999268, "learning_rate": 0.00018495246013573054, "loss": 0.9743, "step": 2593 }, { "epoch": 0.8328784716647937, "grad_norm": 1.6364414691925049, "learning_rate": 0.00018493404002633166, "loss": 1.1582, "step": 2594 }, { "epoch": 0.8331995504896452, "grad_norm": 1.5922627449035645, "learning_rate": 0.00018491560956797765, "loss": 0.7103, "step": 2595 }, { "epoch": 0.8335206293144967, "grad_norm": 1.3164536952972412, "learning_rate": 0.00018489716876291415, "loss": 1.0022, "step": 2596 }, { "epoch": 0.8338417081393482, "grad_norm": 0.8407567739486694, "learning_rate": 0.0001848787176133882, "loss": 0.7196, "step": 2597 }, { "epoch": 0.8341627869641998, "grad_norm": 1.0273271799087524, "learning_rate": 0.00018486025612164794, "loss": 0.7724, "step": 2598 }, { "epoch": 0.8344838657890512, "grad_norm": 0.7496247291564941, "learning_rate": 0.0001848417842899429, "loss": 0.5834, "step": 2599 }, { "epoch": 0.8348049446139028, "grad_norm": 0.6890442371368408, "learning_rate": 0.00018482330212052378, "loss": 0.6603, "step": 2600 }, { "epoch": 0.8351260234387542, "grad_norm": 1.2235509157180786, "learning_rate": 0.0001848048096156426, "loss": 1.3789, "step": 2601 }, { "epoch": 0.8354471022636057, "grad_norm": 0.9681376218795776, "learning_rate": 0.00018478630677755262, "loss": 0.9582, "step": 2602 }, { "epoch": 0.8357681810884572, "grad_norm": 1.0608433485031128, "learning_rate": 0.00018476779360850832, "loss": 0.7719, "step": 2603 }, { "epoch": 0.8360892599133087, "grad_norm": 1.0130113363265991, "learning_rate": 0.00018474927011076552, "loss": 0.822, "step": 2604 }, { "epoch": 0.8364103387381602, "grad_norm": 1.0975375175476074, "learning_rate": 0.0001847307362865812, "loss": 0.7262, "step": 2605 }, { "epoch": 0.8367314175630117, "grad_norm": 0.8877381086349487, "learning_rate": 0.00018471219213821375, "loss": 0.5131, "step": 2606 }, { "epoch": 0.8370524963878632, "grad_norm": 1.0735324621200562, "learning_rate": 0.00018469363766792255, "loss": 0.6324, "step": 2607 }, { "epoch": 0.8373735752127147, "grad_norm": 0.9613997936248779, "learning_rate": 0.00018467507287796856, "loss": 0.8337, "step": 2608 }, { "epoch": 0.8376946540375663, "grad_norm": 1.0283273458480835, "learning_rate": 0.0001846564977706138, "loss": 0.8794, "step": 2609 }, { "epoch": 0.8380157328624177, "grad_norm": 1.1222115755081177, "learning_rate": 0.00018463791234812153, "loss": 0.9956, "step": 2610 }, { "epoch": 0.8383368116872693, "grad_norm": 0.8465763330459595, "learning_rate": 0.00018461931661275643, "loss": 0.8214, "step": 2611 }, { "epoch": 0.8386578905121207, "grad_norm": 1.5119593143463135, "learning_rate": 0.00018460071056678422, "loss": 1.0962, "step": 2612 }, { "epoch": 0.8389789693369722, "grad_norm": 0.7871680855751038, "learning_rate": 0.00018458209421247208, "loss": 0.6603, "step": 2613 }, { "epoch": 0.8393000481618237, "grad_norm": 0.7733088731765747, "learning_rate": 0.00018456346755208833, "loss": 0.8264, "step": 2614 }, { "epoch": 0.8396211269866752, "grad_norm": 0.8866434693336487, "learning_rate": 0.00018454483058790255, "loss": 0.7796, "step": 2615 }, { "epoch": 0.8399422058115267, "grad_norm": 0.9938840270042419, "learning_rate": 0.00018452618332218563, "loss": 1.0233, "step": 2616 }, { "epoch": 0.8402632846363782, "grad_norm": 1.1391087770462036, "learning_rate": 0.00018450752575720967, "loss": 0.8702, "step": 2617 }, { "epoch": 0.8405843634612298, "grad_norm": 1.0326181650161743, "learning_rate": 0.00018448885789524802, "loss": 1.1119, "step": 2618 }, { "epoch": 0.8409054422860812, "grad_norm": 1.0216528177261353, "learning_rate": 0.00018447017973857532, "loss": 0.9276, "step": 2619 }, { "epoch": 0.8412265211109328, "grad_norm": 1.6243035793304443, "learning_rate": 0.00018445149128946744, "loss": 0.7381, "step": 2620 }, { "epoch": 0.8415475999357842, "grad_norm": 1.0764888525009155, "learning_rate": 0.00018443279255020152, "loss": 1.0441, "step": 2621 }, { "epoch": 0.8418686787606358, "grad_norm": 0.9923635721206665, "learning_rate": 0.00018441408352305594, "loss": 0.9418, "step": 2622 }, { "epoch": 0.8421897575854872, "grad_norm": 1.145235538482666, "learning_rate": 0.00018439536421031033, "loss": 0.8303, "step": 2623 }, { "epoch": 0.8425108364103387, "grad_norm": 1.0477502346038818, "learning_rate": 0.0001843766346142456, "loss": 0.9138, "step": 2624 }, { "epoch": 0.8428319152351902, "grad_norm": 0.9633687734603882, "learning_rate": 0.0001843578947371439, "loss": 0.7834, "step": 2625 }, { "epoch": 0.8431529940600417, "grad_norm": 0.8642808198928833, "learning_rate": 0.0001843391445812886, "loss": 0.9069, "step": 2626 }, { "epoch": 0.8434740728848933, "grad_norm": 1.472249984741211, "learning_rate": 0.00018432038414896434, "loss": 0.8867, "step": 2627 }, { "epoch": 0.8437951517097447, "grad_norm": 1.1954059600830078, "learning_rate": 0.00018430161344245707, "loss": 0.9881, "step": 2628 }, { "epoch": 0.8441162305345963, "grad_norm": 0.8830155730247498, "learning_rate": 0.0001842828324640539, "loss": 0.6729, "step": 2629 }, { "epoch": 0.8444373093594477, "grad_norm": 1.3963950872421265, "learning_rate": 0.00018426404121604323, "loss": 1.0219, "step": 2630 }, { "epoch": 0.8447583881842993, "grad_norm": 1.185807704925537, "learning_rate": 0.00018424523970071477, "loss": 0.9033, "step": 2631 }, { "epoch": 0.8450794670091507, "grad_norm": 0.9597305059432983, "learning_rate": 0.0001842264279203594, "loss": 1.0832, "step": 2632 }, { "epoch": 0.8454005458340023, "grad_norm": 0.8251401782035828, "learning_rate": 0.00018420760587726923, "loss": 0.7083, "step": 2633 }, { "epoch": 0.8457216246588537, "grad_norm": 1.246910572052002, "learning_rate": 0.00018418877357373776, "loss": 1.1274, "step": 2634 }, { "epoch": 0.8460427034837052, "grad_norm": 1.0847307443618774, "learning_rate": 0.00018416993101205958, "loss": 0.8948, "step": 2635 }, { "epoch": 0.8463637823085568, "grad_norm": 1.0887922048568726, "learning_rate": 0.00018415107819453062, "loss": 1.111, "step": 2636 }, { "epoch": 0.8466848611334082, "grad_norm": 0.9815394878387451, "learning_rate": 0.00018413221512344805, "loss": 0.8554, "step": 2637 }, { "epoch": 0.8470059399582598, "grad_norm": 1.085922122001648, "learning_rate": 0.00018411334180111027, "loss": 0.7271, "step": 2638 }, { "epoch": 0.8473270187831112, "grad_norm": 1.687097430229187, "learning_rate": 0.00018409445822981693, "loss": 0.9256, "step": 2639 }, { "epoch": 0.8476480976079628, "grad_norm": 1.0354297161102295, "learning_rate": 0.00018407556441186893, "loss": 0.7517, "step": 2640 }, { "epoch": 0.8479691764328142, "grad_norm": 1.799309492111206, "learning_rate": 0.00018405666034956844, "loss": 0.888, "step": 2641 }, { "epoch": 0.8482902552576658, "grad_norm": 1.172786831855774, "learning_rate": 0.00018403774604521886, "loss": 0.9754, "step": 2642 }, { "epoch": 0.8486113340825172, "grad_norm": 1.1656599044799805, "learning_rate": 0.00018401882150112484, "loss": 0.8011, "step": 2643 }, { "epoch": 0.8489324129073688, "grad_norm": 1.0657223463058472, "learning_rate": 0.00018399988671959227, "loss": 0.8496, "step": 2644 }, { "epoch": 0.8492534917322203, "grad_norm": 1.51372230052948, "learning_rate": 0.0001839809417029283, "loss": 0.9055, "step": 2645 }, { "epoch": 0.8495745705570718, "grad_norm": 1.423654556274414, "learning_rate": 0.00018396198645344135, "loss": 0.8467, "step": 2646 }, { "epoch": 0.8498956493819233, "grad_norm": 2.3389534950256348, "learning_rate": 0.000183943020973441, "loss": 0.8266, "step": 2647 }, { "epoch": 0.8502167282067747, "grad_norm": 0.9923876523971558, "learning_rate": 0.00018392404526523817, "loss": 0.7428, "step": 2648 }, { "epoch": 0.8505378070316263, "grad_norm": 0.8069606423377991, "learning_rate": 0.000183905059331145, "loss": 0.5436, "step": 2649 }, { "epoch": 0.8508588858564777, "grad_norm": 0.787869930267334, "learning_rate": 0.0001838860631734749, "loss": 0.4813, "step": 2650 }, { "epoch": 0.8511799646813293, "grad_norm": 1.2456567287445068, "learning_rate": 0.00018386705679454242, "loss": 1.2521, "step": 2651 }, { "epoch": 0.8515010435061807, "grad_norm": 1.0350762605667114, "learning_rate": 0.00018384804019666345, "loss": 1.4132, "step": 2652 }, { "epoch": 0.8518221223310323, "grad_norm": 1.5614581108093262, "learning_rate": 0.00018382901338215516, "loss": 0.9286, "step": 2653 }, { "epoch": 0.8521432011558838, "grad_norm": 1.5894172191619873, "learning_rate": 0.00018380997635333585, "loss": 0.7567, "step": 2654 }, { "epoch": 0.8524642799807353, "grad_norm": 1.0441161394119263, "learning_rate": 0.00018379092911252514, "loss": 0.5728, "step": 2655 }, { "epoch": 0.8527853588055868, "grad_norm": 1.1320891380310059, "learning_rate": 0.0001837718716620439, "loss": 0.5784, "step": 2656 }, { "epoch": 0.8531064376304383, "grad_norm": 1.0418906211853027, "learning_rate": 0.0001837528040042142, "loss": 0.5328, "step": 2657 }, { "epoch": 0.8534275164552898, "grad_norm": 1.1594635248184204, "learning_rate": 0.00018373372614135936, "loss": 0.7899, "step": 2658 }, { "epoch": 0.8537485952801412, "grad_norm": 1.4470210075378418, "learning_rate": 0.000183714638075804, "loss": 1.0683, "step": 2659 }, { "epoch": 0.8540696741049928, "grad_norm": 0.8838327527046204, "learning_rate": 0.0001836955398098739, "loss": 0.8985, "step": 2660 }, { "epoch": 0.8543907529298442, "grad_norm": 0.9166834354400635, "learning_rate": 0.00018367643134589617, "loss": 0.9936, "step": 2661 }, { "epoch": 0.8547118317546958, "grad_norm": 0.998890221118927, "learning_rate": 0.0001836573126861991, "loss": 0.9257, "step": 2662 }, { "epoch": 0.8550329105795473, "grad_norm": 1.1718299388885498, "learning_rate": 0.00018363818383311225, "loss": 1.0218, "step": 2663 }, { "epoch": 0.8553539894043988, "grad_norm": 1.2179023027420044, "learning_rate": 0.0001836190447889664, "loss": 1.0098, "step": 2664 }, { "epoch": 0.8556750682292503, "grad_norm": 1.2525956630706787, "learning_rate": 0.00018359989555609353, "loss": 0.8362, "step": 2665 }, { "epoch": 0.8559961470541018, "grad_norm": 1.1447235345840454, "learning_rate": 0.00018358073613682706, "loss": 0.825, "step": 2666 }, { "epoch": 0.8563172258789533, "grad_norm": 1.0088034868240356, "learning_rate": 0.00018356156653350137, "loss": 0.8964, "step": 2667 }, { "epoch": 0.8566383047038048, "grad_norm": 1.095577597618103, "learning_rate": 0.00018354238674845225, "loss": 0.8193, "step": 2668 }, { "epoch": 0.8569593835286563, "grad_norm": 1.3043547868728638, "learning_rate": 0.00018352319678401676, "loss": 1.0768, "step": 2669 }, { "epoch": 0.8572804623535077, "grad_norm": 1.7319622039794922, "learning_rate": 0.00018350399664253305, "loss": 0.9552, "step": 2670 }, { "epoch": 0.8576015411783593, "grad_norm": 1.5099067687988281, "learning_rate": 0.00018348478632634066, "loss": 1.0515, "step": 2671 }, { "epoch": 0.8579226200032107, "grad_norm": 1.069471001625061, "learning_rate": 0.0001834655658377803, "loss": 0.8308, "step": 2672 }, { "epoch": 0.8582436988280623, "grad_norm": 0.9602334499359131, "learning_rate": 0.00018344633517919392, "loss": 0.8164, "step": 2673 }, { "epoch": 0.8585647776529138, "grad_norm": 1.416114091873169, "learning_rate": 0.00018342709435292473, "loss": 0.9465, "step": 2674 }, { "epoch": 0.8588858564777653, "grad_norm": 1.6077988147735596, "learning_rate": 0.00018340784336131713, "loss": 0.907, "step": 2675 }, { "epoch": 0.8592069353026168, "grad_norm": 1.6769777536392212, "learning_rate": 0.00018338858220671682, "loss": 0.8495, "step": 2676 }, { "epoch": 0.8595280141274683, "grad_norm": 0.7927365899085999, "learning_rate": 0.00018336931089147073, "loss": 0.7901, "step": 2677 }, { "epoch": 0.8598490929523198, "grad_norm": 0.780259907245636, "learning_rate": 0.00018335002941792698, "loss": 0.714, "step": 2678 }, { "epoch": 0.8601701717771713, "grad_norm": 1.1689207553863525, "learning_rate": 0.000183330737788435, "loss": 0.9876, "step": 2679 }, { "epoch": 0.8604912506020228, "grad_norm": 1.2812306880950928, "learning_rate": 0.00018331143600534535, "loss": 1.1554, "step": 2680 }, { "epoch": 0.8608123294268742, "grad_norm": 1.0278218984603882, "learning_rate": 0.00018329212407100994, "loss": 0.75, "step": 2681 }, { "epoch": 0.8611334082517258, "grad_norm": 1.0668963193893433, "learning_rate": 0.0001832728019877819, "loss": 0.8591, "step": 2682 }, { "epoch": 0.8614544870765773, "grad_norm": 1.1060330867767334, "learning_rate": 0.0001832534697580155, "loss": 1.0402, "step": 2683 }, { "epoch": 0.8617755659014288, "grad_norm": 1.13728928565979, "learning_rate": 0.00018323412738406635, "loss": 0.8727, "step": 2684 }, { "epoch": 0.8620966447262803, "grad_norm": 1.4041433334350586, "learning_rate": 0.00018321477486829126, "loss": 0.949, "step": 2685 }, { "epoch": 0.8624177235511318, "grad_norm": 1.0087988376617432, "learning_rate": 0.00018319541221304827, "loss": 0.8199, "step": 2686 }, { "epoch": 0.8627388023759833, "grad_norm": 0.8733333945274353, "learning_rate": 0.00018317603942069664, "loss": 0.8297, "step": 2687 }, { "epoch": 0.8630598812008348, "grad_norm": 1.4244256019592285, "learning_rate": 0.00018315665649359692, "loss": 0.9084, "step": 2688 }, { "epoch": 0.8633809600256863, "grad_norm": 0.8898962736129761, "learning_rate": 0.00018313726343411086, "loss": 0.7508, "step": 2689 }, { "epoch": 0.8637020388505378, "grad_norm": 1.4125561714172363, "learning_rate": 0.0001831178602446014, "loss": 0.8825, "step": 2690 }, { "epoch": 0.8640231176753893, "grad_norm": 0.8810192346572876, "learning_rate": 0.00018309844692743283, "loss": 0.8294, "step": 2691 }, { "epoch": 0.8643441965002409, "grad_norm": 2.355106830596924, "learning_rate": 0.00018307902348497056, "loss": 0.6482, "step": 2692 }, { "epoch": 0.8646652753250923, "grad_norm": 0.9504086375236511, "learning_rate": 0.00018305958991958127, "loss": 0.7463, "step": 2693 }, { "epoch": 0.8649863541499438, "grad_norm": 1.0893597602844238, "learning_rate": 0.0001830401462336329, "loss": 0.8072, "step": 2694 }, { "epoch": 0.8653074329747953, "grad_norm": 0.7756816148757935, "learning_rate": 0.0001830206924294946, "loss": 0.6733, "step": 2695 }, { "epoch": 0.8656285117996468, "grad_norm": 0.8419047594070435, "learning_rate": 0.00018300122850953675, "loss": 0.6931, "step": 2696 }, { "epoch": 0.8659495906244983, "grad_norm": 0.8006436228752136, "learning_rate": 0.00018298175447613096, "loss": 0.6323, "step": 2697 }, { "epoch": 0.8662706694493498, "grad_norm": 0.9535905122756958, "learning_rate": 0.00018296227033165013, "loss": 0.6195, "step": 2698 }, { "epoch": 0.8665917482742013, "grad_norm": 0.7773939371109009, "learning_rate": 0.00018294277607846832, "loss": 0.5081, "step": 2699 }, { "epoch": 0.8669128270990528, "grad_norm": 2.4419236183166504, "learning_rate": 0.0001829232717189608, "loss": 0.7581, "step": 2700 }, { "epoch": 0.8672339059239044, "grad_norm": 0.919539213180542, "learning_rate": 0.00018290375725550417, "loss": 1.1346, "step": 2701 }, { "epoch": 0.8675549847487558, "grad_norm": 0.9994780421257019, "learning_rate": 0.0001828842326904762, "loss": 0.9724, "step": 2702 }, { "epoch": 0.8678760635736074, "grad_norm": 1.1944332122802734, "learning_rate": 0.00018286469802625589, "loss": 0.8469, "step": 2703 }, { "epoch": 0.8681971423984588, "grad_norm": 1.1087571382522583, "learning_rate": 0.00018284515326522346, "loss": 0.6316, "step": 2704 }, { "epoch": 0.8685182212233103, "grad_norm": 1.079188346862793, "learning_rate": 0.00018282559840976042, "loss": 0.5803, "step": 2705 }, { "epoch": 0.8688393000481618, "grad_norm": 0.9261389374732971, "learning_rate": 0.00018280603346224945, "loss": 0.5796, "step": 2706 }, { "epoch": 0.8691603788730133, "grad_norm": 1.1797443628311157, "learning_rate": 0.00018278645842507448, "loss": 0.7453, "step": 2707 }, { "epoch": 0.8694814576978648, "grad_norm": 1.0713974237442017, "learning_rate": 0.00018276687330062065, "loss": 0.994, "step": 2708 }, { "epoch": 0.8698025365227163, "grad_norm": 1.1797808408737183, "learning_rate": 0.00018274727809127438, "loss": 0.7688, "step": 2709 }, { "epoch": 0.8701236153475679, "grad_norm": 0.9432153105735779, "learning_rate": 0.00018272767279942328, "loss": 1.0629, "step": 2710 }, { "epoch": 0.8704446941724193, "grad_norm": 0.9762241840362549, "learning_rate": 0.00018270805742745617, "loss": 1.0634, "step": 2711 }, { "epoch": 0.8707657729972709, "grad_norm": 0.8841230273246765, "learning_rate": 0.00018268843197776318, "loss": 0.9472, "step": 2712 }, { "epoch": 0.8710868518221223, "grad_norm": 1.085910439491272, "learning_rate": 0.00018266879645273556, "loss": 0.9591, "step": 2713 }, { "epoch": 0.8714079306469739, "grad_norm": 0.8603227138519287, "learning_rate": 0.00018264915085476583, "loss": 0.8901, "step": 2714 }, { "epoch": 0.8717290094718253, "grad_norm": 1.1412354707717896, "learning_rate": 0.0001826294951862478, "loss": 0.8354, "step": 2715 }, { "epoch": 0.8720500882966769, "grad_norm": 0.9545773267745972, "learning_rate": 0.00018260982944957638, "loss": 1.0581, "step": 2716 }, { "epoch": 0.8723711671215283, "grad_norm": 1.4084380865097046, "learning_rate": 0.00018259015364714787, "loss": 0.9433, "step": 2717 }, { "epoch": 0.8726922459463798, "grad_norm": 1.183406949043274, "learning_rate": 0.00018257046778135964, "loss": 0.9878, "step": 2718 }, { "epoch": 0.8730133247712314, "grad_norm": 0.791094958782196, "learning_rate": 0.00018255077185461038, "loss": 0.8771, "step": 2719 }, { "epoch": 0.8733344035960828, "grad_norm": 1.0063689947128296, "learning_rate": 0.00018253106586929997, "loss": 0.8799, "step": 2720 }, { "epoch": 0.8736554824209344, "grad_norm": 1.1546752452850342, "learning_rate": 0.00018251134982782952, "loss": 1.0092, "step": 2721 }, { "epoch": 0.8739765612457858, "grad_norm": 1.525952696800232, "learning_rate": 0.00018249162373260141, "loss": 0.9339, "step": 2722 }, { "epoch": 0.8742976400706374, "grad_norm": 1.4072719812393188, "learning_rate": 0.0001824718875860191, "loss": 0.8635, "step": 2723 }, { "epoch": 0.8746187188954888, "grad_norm": 1.070801854133606, "learning_rate": 0.00018245214139048753, "loss": 1.107, "step": 2724 }, { "epoch": 0.8749397977203404, "grad_norm": 0.9687029123306274, "learning_rate": 0.0001824323851484126, "loss": 0.6512, "step": 2725 }, { "epoch": 0.8752608765451918, "grad_norm": 1.0900896787643433, "learning_rate": 0.00018241261886220154, "loss": 1.0686, "step": 2726 }, { "epoch": 0.8755819553700434, "grad_norm": 1.1022065877914429, "learning_rate": 0.00018239284253426295, "loss": 0.9941, "step": 2727 }, { "epoch": 0.8759030341948948, "grad_norm": 0.7542319297790527, "learning_rate": 0.00018237305616700637, "loss": 0.783, "step": 2728 }, { "epoch": 0.8762241130197463, "grad_norm": 1.090649127960205, "learning_rate": 0.00018235325976284275, "loss": 0.8426, "step": 2729 }, { "epoch": 0.8765451918445979, "grad_norm": 0.8522968888282776, "learning_rate": 0.00018233345332418423, "loss": 0.689, "step": 2730 }, { "epoch": 0.8768662706694493, "grad_norm": 0.9584747552871704, "learning_rate": 0.0001823136368534442, "loss": 0.6896, "step": 2731 }, { "epoch": 0.8771873494943009, "grad_norm": 1.115478754043579, "learning_rate": 0.00018229381035303718, "loss": 0.72, "step": 2732 }, { "epoch": 0.8775084283191523, "grad_norm": 0.9085679650306702, "learning_rate": 0.000182273973825379, "loss": 0.6926, "step": 2733 }, { "epoch": 0.8778295071440039, "grad_norm": 1.5002387762069702, "learning_rate": 0.00018225412727288667, "loss": 1.0068, "step": 2734 }, { "epoch": 0.8781505859688553, "grad_norm": 1.0805754661560059, "learning_rate": 0.00018223427069797844, "loss": 0.8847, "step": 2735 }, { "epoch": 0.8784716647937069, "grad_norm": 1.1780469417572021, "learning_rate": 0.00018221440410307374, "loss": 0.798, "step": 2736 }, { "epoch": 0.8787927436185583, "grad_norm": 1.034372091293335, "learning_rate": 0.0001821945274905933, "loss": 1.0309, "step": 2737 }, { "epoch": 0.8791138224434099, "grad_norm": 1.680379867553711, "learning_rate": 0.00018217464086295904, "loss": 1.0186, "step": 2738 }, { "epoch": 0.8794349012682614, "grad_norm": 1.7660083770751953, "learning_rate": 0.00018215474422259402, "loss": 0.8119, "step": 2739 }, { "epoch": 0.8797559800931128, "grad_norm": 1.747609257698059, "learning_rate": 0.00018213483757192263, "loss": 0.5625, "step": 2740 }, { "epoch": 0.8800770589179644, "grad_norm": 1.156343936920166, "learning_rate": 0.00018211492091337042, "loss": 1.0188, "step": 2741 }, { "epoch": 0.8803981377428158, "grad_norm": 1.104516863822937, "learning_rate": 0.00018209499424936415, "loss": 0.6375, "step": 2742 }, { "epoch": 0.8807192165676674, "grad_norm": 3.744687080383301, "learning_rate": 0.0001820750575823319, "loss": 0.9079, "step": 2743 }, { "epoch": 0.8810402953925188, "grad_norm": 1.1239784955978394, "learning_rate": 0.00018205511091470283, "loss": 0.7289, "step": 2744 }, { "epoch": 0.8813613742173704, "grad_norm": 0.7483261823654175, "learning_rate": 0.0001820351542489074, "loss": 0.5682, "step": 2745 }, { "epoch": 0.8816824530422218, "grad_norm": 1.107564926147461, "learning_rate": 0.00018201518758737724, "loss": 0.6602, "step": 2746 }, { "epoch": 0.8820035318670734, "grad_norm": 4.356315612792969, "learning_rate": 0.00018199521093254523, "loss": 0.7124, "step": 2747 }, { "epoch": 0.8823246106919249, "grad_norm": 1.0629299879074097, "learning_rate": 0.00018197522428684552, "loss": 0.4317, "step": 2748 }, { "epoch": 0.8826456895167764, "grad_norm": 0.7678312063217163, "learning_rate": 0.0001819552276527134, "loss": 0.447, "step": 2749 }, { "epoch": 0.8829667683416279, "grad_norm": 1.091256856918335, "learning_rate": 0.00018193522103258537, "loss": 0.5988, "step": 2750 }, { "epoch": 0.8832878471664793, "grad_norm": 1.013898253440857, "learning_rate": 0.0001819152044288992, "loss": 1.3746, "step": 2751 }, { "epoch": 0.8836089259913309, "grad_norm": 0.9919261336326599, "learning_rate": 0.00018189517784409381, "loss": 1.3712, "step": 2752 }, { "epoch": 0.8839300048161823, "grad_norm": 1.0880647897720337, "learning_rate": 0.00018187514128060946, "loss": 0.9107, "step": 2753 }, { "epoch": 0.8842510836410339, "grad_norm": 1.6532173156738281, "learning_rate": 0.0001818550947408875, "loss": 0.8713, "step": 2754 }, { "epoch": 0.8845721624658853, "grad_norm": 0.9895550608634949, "learning_rate": 0.0001818350382273705, "loss": 0.6474, "step": 2755 }, { "epoch": 0.8848932412907369, "grad_norm": 1.1515357494354248, "learning_rate": 0.00018181497174250236, "loss": 0.5816, "step": 2756 }, { "epoch": 0.8852143201155884, "grad_norm": 1.0654630661010742, "learning_rate": 0.00018179489528872807, "loss": 0.5709, "step": 2757 }, { "epoch": 0.8855353989404399, "grad_norm": 1.0934091806411743, "learning_rate": 0.00018177480886849388, "loss": 0.9673, "step": 2758 }, { "epoch": 0.8858564777652914, "grad_norm": 2.52614426612854, "learning_rate": 0.0001817547124842473, "loss": 0.9605, "step": 2759 }, { "epoch": 0.8861775565901429, "grad_norm": 0.8149087429046631, "learning_rate": 0.00018173460613843701, "loss": 0.981, "step": 2760 }, { "epoch": 0.8864986354149944, "grad_norm": 0.758175253868103, "learning_rate": 0.00018171448983351284, "loss": 0.7961, "step": 2761 }, { "epoch": 0.8868197142398458, "grad_norm": 0.8692430257797241, "learning_rate": 0.00018169436357192602, "loss": 0.782, "step": 2762 }, { "epoch": 0.8871407930646974, "grad_norm": 1.1529359817504883, "learning_rate": 0.00018167422735612877, "loss": 0.9836, "step": 2763 }, { "epoch": 0.8874618718895488, "grad_norm": 0.9792761206626892, "learning_rate": 0.00018165408118857464, "loss": 0.9161, "step": 2764 }, { "epoch": 0.8877829507144004, "grad_norm": 1.1091268062591553, "learning_rate": 0.00018163392507171842, "loss": 0.9312, "step": 2765 }, { "epoch": 0.8881040295392519, "grad_norm": 0.8531142473220825, "learning_rate": 0.00018161375900801604, "loss": 0.6937, "step": 2766 }, { "epoch": 0.8884251083641034, "grad_norm": 0.9302113652229309, "learning_rate": 0.00018159358299992467, "loss": 0.9284, "step": 2767 }, { "epoch": 0.8887461871889549, "grad_norm": 1.1668531894683838, "learning_rate": 0.00018157339704990275, "loss": 1.0318, "step": 2768 }, { "epoch": 0.8890672660138064, "grad_norm": 0.8978874683380127, "learning_rate": 0.00018155320116040982, "loss": 0.9906, "step": 2769 }, { "epoch": 0.8893883448386579, "grad_norm": 1.0586960315704346, "learning_rate": 0.00018153299533390672, "loss": 0.8478, "step": 2770 }, { "epoch": 0.8897094236635094, "grad_norm": 1.5482786893844604, "learning_rate": 0.00018151277957285543, "loss": 0.8452, "step": 2771 }, { "epoch": 0.8900305024883609, "grad_norm": 1.1627875566482544, "learning_rate": 0.00018149255387971922, "loss": 1.1749, "step": 2772 }, { "epoch": 0.8903515813132123, "grad_norm": 1.5141947269439697, "learning_rate": 0.00018147231825696252, "loss": 0.9226, "step": 2773 }, { "epoch": 0.8906726601380639, "grad_norm": 0.9088945984840393, "learning_rate": 0.00018145207270705096, "loss": 0.8038, "step": 2774 }, { "epoch": 0.8909937389629154, "grad_norm": 1.2037333250045776, "learning_rate": 0.0001814318172324514, "loss": 0.926, "step": 2775 }, { "epoch": 0.8913148177877669, "grad_norm": 1.1055127382278442, "learning_rate": 0.00018141155183563193, "loss": 0.8784, "step": 2776 }, { "epoch": 0.8916358966126184, "grad_norm": 0.8244720101356506, "learning_rate": 0.00018139127651906184, "loss": 0.8521, "step": 2777 }, { "epoch": 0.8919569754374699, "grad_norm": 1.0560851097106934, "learning_rate": 0.00018137099128521156, "loss": 0.7876, "step": 2778 }, { "epoch": 0.8922780542623214, "grad_norm": 0.7370025515556335, "learning_rate": 0.0001813506961365528, "loss": 0.7918, "step": 2779 }, { "epoch": 0.8925991330871729, "grad_norm": 0.9496878385543823, "learning_rate": 0.00018133039107555852, "loss": 0.9127, "step": 2780 }, { "epoch": 0.8929202119120244, "grad_norm": 0.897685706615448, "learning_rate": 0.00018131007610470276, "loss": 0.8017, "step": 2781 }, { "epoch": 0.8932412907368759, "grad_norm": 1.8280360698699951, "learning_rate": 0.0001812897512264609, "loss": 1.2183, "step": 2782 }, { "epoch": 0.8935623695617274, "grad_norm": 1.164359450340271, "learning_rate": 0.0001812694164433094, "loss": 0.8879, "step": 2783 }, { "epoch": 0.893883448386579, "grad_norm": 0.8923983573913574, "learning_rate": 0.00018124907175772604, "loss": 0.8414, "step": 2784 }, { "epoch": 0.8942045272114304, "grad_norm": 1.2509326934814453, "learning_rate": 0.0001812287171721897, "loss": 1.0194, "step": 2785 }, { "epoch": 0.894525606036282, "grad_norm": 0.9996488094329834, "learning_rate": 0.00018120835268918063, "loss": 0.9708, "step": 2786 }, { "epoch": 0.8948466848611334, "grad_norm": 0.8612101078033447, "learning_rate": 0.0001811879783111801, "loss": 0.7101, "step": 2787 }, { "epoch": 0.8951677636859849, "grad_norm": 1.1495945453643799, "learning_rate": 0.00018116759404067064, "loss": 0.7831, "step": 2788 }, { "epoch": 0.8954888425108364, "grad_norm": 0.9605495929718018, "learning_rate": 0.00018114719988013612, "loss": 0.6721, "step": 2789 }, { "epoch": 0.8958099213356879, "grad_norm": 1.1470704078674316, "learning_rate": 0.00018112679583206137, "loss": 0.6784, "step": 2790 }, { "epoch": 0.8961310001605394, "grad_norm": 1.1517149209976196, "learning_rate": 0.00018110638189893267, "loss": 0.9704, "step": 2791 }, { "epoch": 0.8964520789853909, "grad_norm": 1.7360931634902954, "learning_rate": 0.00018108595808323736, "loss": 0.7464, "step": 2792 }, { "epoch": 0.8967731578102424, "grad_norm": 0.8036109805107117, "learning_rate": 0.000181065524387464, "loss": 0.7334, "step": 2793 }, { "epoch": 0.8970942366350939, "grad_norm": 0.5651756525039673, "learning_rate": 0.0001810450808141024, "loss": 0.5383, "step": 2794 }, { "epoch": 0.8974153154599455, "grad_norm": 1.0314606428146362, "learning_rate": 0.00018102462736564355, "loss": 0.6901, "step": 2795 }, { "epoch": 0.8977363942847969, "grad_norm": 1.6273174285888672, "learning_rate": 0.00018100416404457961, "loss": 0.7406, "step": 2796 }, { "epoch": 0.8980574731096485, "grad_norm": 1.643372893333435, "learning_rate": 0.00018098369085340398, "loss": 0.6796, "step": 2797 }, { "epoch": 0.8983785519344999, "grad_norm": 1.2636449337005615, "learning_rate": 0.00018096320779461132, "loss": 0.6756, "step": 2798 }, { "epoch": 0.8986996307593514, "grad_norm": 1.1232649087905884, "learning_rate": 0.00018094271487069735, "loss": 0.5207, "step": 2799 }, { "epoch": 0.8990207095842029, "grad_norm": 0.5236806869506836, "learning_rate": 0.00018092221208415907, "loss": 0.4062, "step": 2800 }, { "epoch": 0.8993417884090544, "grad_norm": 0.7692060470581055, "learning_rate": 0.00018090169943749476, "loss": 0.9762, "step": 2801 }, { "epoch": 0.8996628672339059, "grad_norm": 0.8355540633201599, "learning_rate": 0.00018088117693320374, "loss": 1.2168, "step": 2802 }, { "epoch": 0.8999839460587574, "grad_norm": 0.8926533460617065, "learning_rate": 0.00018086064457378665, "loss": 0.7676, "step": 2803 }, { "epoch": 0.900305024883609, "grad_norm": 1.108173131942749, "learning_rate": 0.00018084010236174534, "loss": 0.552, "step": 2804 }, { "epoch": 0.9006261037084604, "grad_norm": 1.1268749237060547, "learning_rate": 0.00018081955029958274, "loss": 0.5069, "step": 2805 }, { "epoch": 0.900947182533312, "grad_norm": 1.0203336477279663, "learning_rate": 0.00018079898838980305, "loss": 0.6792, "step": 2806 }, { "epoch": 0.9012682613581634, "grad_norm": 1.2625064849853516, "learning_rate": 0.00018077841663491175, "loss": 0.6916, "step": 2807 }, { "epoch": 0.901589340183015, "grad_norm": 1.1094204187393188, "learning_rate": 0.0001807578350374154, "loss": 1.146, "step": 2808 }, { "epoch": 0.9019104190078664, "grad_norm": 1.1322005987167358, "learning_rate": 0.00018073724359982186, "loss": 0.8353, "step": 2809 }, { "epoch": 0.9022314978327179, "grad_norm": 0.9779659509658813, "learning_rate": 0.00018071664232464002, "loss": 0.7259, "step": 2810 }, { "epoch": 0.9025525766575694, "grad_norm": 1.0345367193222046, "learning_rate": 0.00018069603121438022, "loss": 0.7205, "step": 2811 }, { "epoch": 0.9028736554824209, "grad_norm": 1.5466442108154297, "learning_rate": 0.00018067541027155375, "loss": 1.3155, "step": 2812 }, { "epoch": 0.9031947343072725, "grad_norm": 1.0183089971542358, "learning_rate": 0.00018065477949867327, "loss": 0.7655, "step": 2813 }, { "epoch": 0.9035158131321239, "grad_norm": 0.7953174114227295, "learning_rate": 0.00018063413889825254, "loss": 0.8727, "step": 2814 }, { "epoch": 0.9038368919569755, "grad_norm": 1.6211440563201904, "learning_rate": 0.0001806134884728066, "loss": 0.9523, "step": 2815 }, { "epoch": 0.9041579707818269, "grad_norm": 1.3157118558883667, "learning_rate": 0.00018059282822485158, "loss": 1.0292, "step": 2816 }, { "epoch": 0.9044790496066785, "grad_norm": 1.2418993711471558, "learning_rate": 0.00018057215815690494, "loss": 0.8633, "step": 2817 }, { "epoch": 0.9048001284315299, "grad_norm": 1.4473196268081665, "learning_rate": 0.00018055147827148523, "loss": 0.8806, "step": 2818 }, { "epoch": 0.9051212072563815, "grad_norm": 1.058814525604248, "learning_rate": 0.0001805307885711122, "loss": 0.8762, "step": 2819 }, { "epoch": 0.9054422860812329, "grad_norm": 0.8622485995292664, "learning_rate": 0.0001805100890583069, "loss": 0.8043, "step": 2820 }, { "epoch": 0.9057633649060844, "grad_norm": 1.5182201862335205, "learning_rate": 0.0001804893797355914, "loss": 1.0228, "step": 2821 }, { "epoch": 0.906084443730936, "grad_norm": 0.8368403315544128, "learning_rate": 0.00018046866060548918, "loss": 0.7839, "step": 2822 }, { "epoch": 0.9064055225557874, "grad_norm": 1.0724780559539795, "learning_rate": 0.00018044793167052477, "loss": 0.9187, "step": 2823 }, { "epoch": 0.906726601380639, "grad_norm": 1.226404070854187, "learning_rate": 0.00018042719293322388, "loss": 0.9023, "step": 2824 }, { "epoch": 0.9070476802054904, "grad_norm": 0.865028977394104, "learning_rate": 0.00018040644439611348, "loss": 0.6866, "step": 2825 }, { "epoch": 0.907368759030342, "grad_norm": 1.3817094564437866, "learning_rate": 0.00018038568606172173, "loss": 1.2747, "step": 2826 }, { "epoch": 0.9076898378551934, "grad_norm": 1.4625390768051147, "learning_rate": 0.00018036491793257798, "loss": 0.9156, "step": 2827 }, { "epoch": 0.908010916680045, "grad_norm": 1.0158188343048096, "learning_rate": 0.00018034414001121278, "loss": 0.9246, "step": 2828 }, { "epoch": 0.9083319955048964, "grad_norm": 0.98722243309021, "learning_rate": 0.0001803233523001578, "loss": 1.1086, "step": 2829 }, { "epoch": 0.908653074329748, "grad_norm": 0.8799543976783752, "learning_rate": 0.000180302554801946, "loss": 0.724, "step": 2830 }, { "epoch": 0.9089741531545995, "grad_norm": 1.1787347793579102, "learning_rate": 0.00018028174751911146, "loss": 1.0376, "step": 2831 }, { "epoch": 0.909295231979451, "grad_norm": 1.1940038204193115, "learning_rate": 0.00018026093045418954, "loss": 0.7911, "step": 2832 }, { "epoch": 0.9096163108043025, "grad_norm": 0.9036272168159485, "learning_rate": 0.0001802401036097167, "loss": 0.7765, "step": 2833 }, { "epoch": 0.9099373896291539, "grad_norm": 1.148139238357544, "learning_rate": 0.00018021926698823059, "loss": 0.8591, "step": 2834 }, { "epoch": 0.9102584684540055, "grad_norm": 0.8454437851905823, "learning_rate": 0.00018019842059227012, "loss": 0.7655, "step": 2835 }, { "epoch": 0.9105795472788569, "grad_norm": 1.5326919555664062, "learning_rate": 0.0001801775644243754, "loss": 0.9971, "step": 2836 }, { "epoch": 0.9109006261037085, "grad_norm": 1.6905158758163452, "learning_rate": 0.00018015669848708767, "loss": 1.1873, "step": 2837 }, { "epoch": 0.9112217049285599, "grad_norm": 0.8212069869041443, "learning_rate": 0.00018013582278294935, "loss": 0.7388, "step": 2838 }, { "epoch": 0.9115427837534115, "grad_norm": 1.243600606918335, "learning_rate": 0.00018011493731450413, "loss": 0.8089, "step": 2839 }, { "epoch": 0.911863862578263, "grad_norm": 1.1280215978622437, "learning_rate": 0.0001800940420842968, "loss": 0.8168, "step": 2840 }, { "epoch": 0.9121849414031145, "grad_norm": 1.2804031372070312, "learning_rate": 0.00018007313709487334, "loss": 1.0121, "step": 2841 }, { "epoch": 0.912506020227966, "grad_norm": 1.406071662902832, "learning_rate": 0.0001800522223487811, "loss": 0.8801, "step": 2842 }, { "epoch": 0.9128270990528174, "grad_norm": 1.1531569957733154, "learning_rate": 0.0001800312978485683, "loss": 0.9636, "step": 2843 }, { "epoch": 0.913148177877669, "grad_norm": 2.095679521560669, "learning_rate": 0.00018001036359678469, "loss": 0.9504, "step": 2844 }, { "epoch": 0.9134692567025204, "grad_norm": 0.9581215381622314, "learning_rate": 0.00017998941959598095, "loss": 0.798, "step": 2845 }, { "epoch": 0.913790335527372, "grad_norm": 0.78875333070755, "learning_rate": 0.00017996846584870908, "loss": 0.7823, "step": 2846 }, { "epoch": 0.9141114143522234, "grad_norm": 1.2259639501571655, "learning_rate": 0.0001799475023575222, "loss": 0.8185, "step": 2847 }, { "epoch": 0.914432493177075, "grad_norm": 1.0644735097885132, "learning_rate": 0.00017992652912497464, "loss": 0.6927, "step": 2848 }, { "epoch": 0.9147535720019264, "grad_norm": 1.1323128938674927, "learning_rate": 0.00017990554615362198, "loss": 0.7886, "step": 2849 }, { "epoch": 0.915074650826778, "grad_norm": 0.6815614700317383, "learning_rate": 0.00017988455344602092, "loss": 0.5591, "step": 2850 }, { "epoch": 0.9153957296516295, "grad_norm": 0.9420286417007446, "learning_rate": 0.00017986355100472928, "loss": 1.0964, "step": 2851 }, { "epoch": 0.915716808476481, "grad_norm": 0.9961267113685608, "learning_rate": 0.00017984253883230627, "loss": 1.2273, "step": 2852 }, { "epoch": 0.9160378873013325, "grad_norm": 0.790002167224884, "learning_rate": 0.00017982151693131203, "loss": 0.8297, "step": 2853 }, { "epoch": 0.916358966126184, "grad_norm": 1.0446445941925049, "learning_rate": 0.0001798004853043081, "loss": 0.7856, "step": 2854 }, { "epoch": 0.9166800449510355, "grad_norm": 0.9418595433235168, "learning_rate": 0.0001797794439538571, "loss": 0.5708, "step": 2855 }, { "epoch": 0.9170011237758869, "grad_norm": 1.0176951885223389, "learning_rate": 0.00017975839288252287, "loss": 0.6077, "step": 2856 }, { "epoch": 0.9173222026007385, "grad_norm": 1.1099709272384644, "learning_rate": 0.00017973733209287036, "loss": 0.7867, "step": 2857 }, { "epoch": 0.9176432814255899, "grad_norm": 1.4851493835449219, "learning_rate": 0.00017971626158746584, "loss": 1.0428, "step": 2858 }, { "epoch": 0.9179643602504415, "grad_norm": 1.387786865234375, "learning_rate": 0.00017969518136887663, "loss": 0.9532, "step": 2859 }, { "epoch": 0.918285439075293, "grad_norm": 0.9932065010070801, "learning_rate": 0.00017967409143967132, "loss": 0.8475, "step": 2860 }, { "epoch": 0.9186065179001445, "grad_norm": 0.8978595733642578, "learning_rate": 0.00017965299180241963, "loss": 1.0245, "step": 2861 }, { "epoch": 0.918927596724996, "grad_norm": 1.0536555051803589, "learning_rate": 0.00017963188245969253, "loss": 0.814, "step": 2862 }, { "epoch": 0.9192486755498475, "grad_norm": 1.004948616027832, "learning_rate": 0.00017961076341406208, "loss": 0.7993, "step": 2863 }, { "epoch": 0.919569754374699, "grad_norm": 0.9587133526802063, "learning_rate": 0.0001795896346681016, "loss": 0.7497, "step": 2864 }, { "epoch": 0.9198908331995505, "grad_norm": 0.9380829930305481, "learning_rate": 0.00017956849622438554, "loss": 0.7871, "step": 2865 }, { "epoch": 0.920211912024402, "grad_norm": 0.9536609053611755, "learning_rate": 0.00017954734808548958, "loss": 0.8406, "step": 2866 }, { "epoch": 0.9205329908492534, "grad_norm": 0.9843786954879761, "learning_rate": 0.00017952619025399057, "loss": 0.9366, "step": 2867 }, { "epoch": 0.920854069674105, "grad_norm": 0.8176027536392212, "learning_rate": 0.00017950502273246649, "loss": 0.8689, "step": 2868 }, { "epoch": 0.9211751484989565, "grad_norm": 0.9969504475593567, "learning_rate": 0.00017948384552349657, "loss": 1.0414, "step": 2869 }, { "epoch": 0.921496227323808, "grad_norm": 1.0049349069595337, "learning_rate": 0.00017946265862966114, "loss": 0.8943, "step": 2870 }, { "epoch": 0.9218173061486595, "grad_norm": 1.8080761432647705, "learning_rate": 0.00017944146205354182, "loss": 1.0632, "step": 2871 }, { "epoch": 0.922138384973511, "grad_norm": 0.8137750625610352, "learning_rate": 0.00017942025579772132, "loss": 0.7279, "step": 2872 }, { "epoch": 0.9224594637983625, "grad_norm": 0.9603304862976074, "learning_rate": 0.00017939903986478355, "loss": 0.9531, "step": 2873 }, { "epoch": 0.922780542623214, "grad_norm": 1.556550145149231, "learning_rate": 0.0001793778142573136, "loss": 0.8103, "step": 2874 }, { "epoch": 0.9231016214480655, "grad_norm": 1.2223728895187378, "learning_rate": 0.0001793565789778978, "loss": 0.9159, "step": 2875 }, { "epoch": 0.923422700272917, "grad_norm": 1.2416236400604248, "learning_rate": 0.00017933533402912354, "loss": 0.8169, "step": 2876 }, { "epoch": 0.9237437790977685, "grad_norm": 1.7050341367721558, "learning_rate": 0.00017931407941357947, "loss": 0.9111, "step": 2877 }, { "epoch": 0.92406485792262, "grad_norm": 1.7200475931167603, "learning_rate": 0.0001792928151338554, "loss": 1.0214, "step": 2878 }, { "epoch": 0.9243859367474715, "grad_norm": 1.3585445880889893, "learning_rate": 0.00017927154119254236, "loss": 1.0152, "step": 2879 }, { "epoch": 0.924707015572323, "grad_norm": 1.661501407623291, "learning_rate": 0.00017925025759223245, "loss": 0.7003, "step": 2880 }, { "epoch": 0.9250280943971745, "grad_norm": 0.8686938285827637, "learning_rate": 0.00017922896433551907, "loss": 0.8456, "step": 2881 }, { "epoch": 0.925349173222026, "grad_norm": 1.1402549743652344, "learning_rate": 0.00017920766142499672, "loss": 0.91, "step": 2882 }, { "epoch": 0.9256702520468775, "grad_norm": 1.2883620262145996, "learning_rate": 0.00017918634886326108, "loss": 0.834, "step": 2883 }, { "epoch": 0.925991330871729, "grad_norm": 0.8248382210731506, "learning_rate": 0.00017916502665290903, "loss": 0.7228, "step": 2884 }, { "epoch": 0.9263124096965805, "grad_norm": 1.0151740312576294, "learning_rate": 0.0001791436947965386, "loss": 0.8951, "step": 2885 }, { "epoch": 0.926633488521432, "grad_norm": 1.5327341556549072, "learning_rate": 0.00017912235329674902, "loss": 0.912, "step": 2886 }, { "epoch": 0.9269545673462836, "grad_norm": 1.1290961503982544, "learning_rate": 0.0001791010021561407, "loss": 0.8542, "step": 2887 }, { "epoch": 0.927275646171135, "grad_norm": 1.0894137620925903, "learning_rate": 0.0001790796413773152, "loss": 0.9589, "step": 2888 }, { "epoch": 0.9275967249959866, "grad_norm": 1.0757745504379272, "learning_rate": 0.0001790582709628753, "loss": 0.9288, "step": 2889 }, { "epoch": 0.927917803820838, "grad_norm": 0.8483561277389526, "learning_rate": 0.0001790368909154249, "loss": 0.852, "step": 2890 }, { "epoch": 0.9282388826456895, "grad_norm": 1.5198390483856201, "learning_rate": 0.00017901550123756906, "loss": 0.7621, "step": 2891 }, { "epoch": 0.928559961470541, "grad_norm": 1.2075666189193726, "learning_rate": 0.00017899410193191406, "loss": 0.9783, "step": 2892 }, { "epoch": 0.9288810402953925, "grad_norm": 1.1517733335494995, "learning_rate": 0.00017897269300106737, "loss": 0.9489, "step": 2893 }, { "epoch": 0.929202119120244, "grad_norm": 1.0166940689086914, "learning_rate": 0.0001789512744476376, "loss": 0.8871, "step": 2894 }, { "epoch": 0.9295231979450955, "grad_norm": 0.7956641912460327, "learning_rate": 0.0001789298462742345, "loss": 0.6786, "step": 2895 }, { "epoch": 0.9298442767699471, "grad_norm": 1.2172540426254272, "learning_rate": 0.00017890840848346908, "loss": 0.7532, "step": 2896 }, { "epoch": 0.9301653555947985, "grad_norm": 1.0997369289398193, "learning_rate": 0.00017888696107795342, "loss": 0.8759, "step": 2897 }, { "epoch": 0.9304864344196501, "grad_norm": 0.9977442026138306, "learning_rate": 0.00017886550406030085, "loss": 0.7112, "step": 2898 }, { "epoch": 0.9308075132445015, "grad_norm": 1.1181156635284424, "learning_rate": 0.00017884403743312582, "loss": 0.6298, "step": 2899 }, { "epoch": 0.9311285920693531, "grad_norm": 0.6041698455810547, "learning_rate": 0.00017882256119904403, "loss": 0.4929, "step": 2900 }, { "epoch": 0.9314496708942045, "grad_norm": 0.8137570023536682, "learning_rate": 0.00017880107536067218, "loss": 0.9051, "step": 2901 }, { "epoch": 0.931770749719056, "grad_norm": 0.838915228843689, "learning_rate": 0.0001787795799206284, "loss": 1.2534, "step": 2902 }, { "epoch": 0.9320918285439075, "grad_norm": 0.7378472685813904, "learning_rate": 0.00017875807488153175, "loss": 0.6947, "step": 2903 }, { "epoch": 0.932412907368759, "grad_norm": 0.8197500705718994, "learning_rate": 0.00017873656024600254, "loss": 0.7199, "step": 2904 }, { "epoch": 0.9327339861936105, "grad_norm": 1.34345281124115, "learning_rate": 0.00017871503601666233, "loss": 0.737, "step": 2905 }, { "epoch": 0.933055065018462, "grad_norm": 1.0809118747711182, "learning_rate": 0.00017869350219613375, "loss": 0.5367, "step": 2906 }, { "epoch": 0.9333761438433136, "grad_norm": 0.9395135045051575, "learning_rate": 0.0001786719587870406, "loss": 0.4426, "step": 2907 }, { "epoch": 0.933697222668165, "grad_norm": 0.8438148498535156, "learning_rate": 0.00017865040579200794, "loss": 0.5694, "step": 2908 }, { "epoch": 0.9340183014930166, "grad_norm": 0.836493194103241, "learning_rate": 0.00017862884321366188, "loss": 0.8198, "step": 2909 }, { "epoch": 0.934339380317868, "grad_norm": 1.9980072975158691, "learning_rate": 0.0001786072710546298, "loss": 0.9351, "step": 2910 }, { "epoch": 0.9346604591427196, "grad_norm": 1.1404621601104736, "learning_rate": 0.0001785856893175402, "loss": 0.93, "step": 2911 }, { "epoch": 0.934981537967571, "grad_norm": 1.2216063737869263, "learning_rate": 0.00017856409800502272, "loss": 0.811, "step": 2912 }, { "epoch": 0.9353026167924225, "grad_norm": 0.7324502468109131, "learning_rate": 0.00017854249711970818, "loss": 0.6464, "step": 2913 }, { "epoch": 0.935623695617274, "grad_norm": 0.8018614053726196, "learning_rate": 0.00017852088666422863, "loss": 0.8989, "step": 2914 }, { "epoch": 0.9359447744421255, "grad_norm": 0.8852442502975464, "learning_rate": 0.00017849926664121726, "loss": 0.7535, "step": 2915 }, { "epoch": 0.9362658532669771, "grad_norm": 1.2402286529541016, "learning_rate": 0.0001784776370533083, "loss": 0.9203, "step": 2916 }, { "epoch": 0.9365869320918285, "grad_norm": 0.9982452988624573, "learning_rate": 0.00017845599790313735, "loss": 0.8699, "step": 2917 }, { "epoch": 0.9369080109166801, "grad_norm": 1.3398115634918213, "learning_rate": 0.000178434349193341, "loss": 0.9541, "step": 2918 }, { "epoch": 0.9372290897415315, "grad_norm": 1.2586543560028076, "learning_rate": 0.00017841269092655715, "loss": 0.8738, "step": 2919 }, { "epoch": 0.9375501685663831, "grad_norm": 0.7111466526985168, "learning_rate": 0.00017839102310542477, "loss": 0.7364, "step": 2920 }, { "epoch": 0.9378712473912345, "grad_norm": 1.377344012260437, "learning_rate": 0.000178369345732584, "loss": 0.9916, "step": 2921 }, { "epoch": 0.9381923262160861, "grad_norm": 0.881392240524292, "learning_rate": 0.00017834765881067616, "loss": 0.8958, "step": 2922 }, { "epoch": 0.9385134050409375, "grad_norm": 0.8309992551803589, "learning_rate": 0.00017832596234234376, "loss": 0.6358, "step": 2923 }, { "epoch": 0.938834483865789, "grad_norm": 0.9630143046379089, "learning_rate": 0.00017830425633023043, "loss": 0.7877, "step": 2924 }, { "epoch": 0.9391555626906406, "grad_norm": 0.92414790391922, "learning_rate": 0.000178282540776981, "loss": 0.9282, "step": 2925 }, { "epoch": 0.939476641515492, "grad_norm": 1.1467437744140625, "learning_rate": 0.0001782608156852414, "loss": 0.8531, "step": 2926 }, { "epoch": 0.9397977203403436, "grad_norm": 1.1309285163879395, "learning_rate": 0.0001782390810576588, "loss": 0.969, "step": 2927 }, { "epoch": 0.940118799165195, "grad_norm": 1.0550163984298706, "learning_rate": 0.00017821733689688153, "loss": 0.8446, "step": 2928 }, { "epoch": 0.9404398779900466, "grad_norm": 0.9224303364753723, "learning_rate": 0.000178195583205559, "loss": 0.9563, "step": 2929 }, { "epoch": 0.940760956814898, "grad_norm": 1.0767320394515991, "learning_rate": 0.00017817381998634185, "loss": 0.8241, "step": 2930 }, { "epoch": 0.9410820356397496, "grad_norm": 0.7739982604980469, "learning_rate": 0.00017815204724188187, "loss": 0.5874, "step": 2931 }, { "epoch": 0.941403114464601, "grad_norm": 1.8265200853347778, "learning_rate": 0.000178130264974832, "loss": 1.0106, "step": 2932 }, { "epoch": 0.9417241932894526, "grad_norm": 0.7935619950294495, "learning_rate": 0.0001781084731878463, "loss": 0.7087, "step": 2933 }, { "epoch": 0.9420452721143041, "grad_norm": 1.314613938331604, "learning_rate": 0.00017808667188358012, "loss": 1.1337, "step": 2934 }, { "epoch": 0.9423663509391556, "grad_norm": 1.5377808809280396, "learning_rate": 0.00017806486106468981, "loss": 1.074, "step": 2935 }, { "epoch": 0.9426874297640071, "grad_norm": 0.8148975968360901, "learning_rate": 0.000178043040733833, "loss": 0.6838, "step": 2936 }, { "epoch": 0.9430085085888585, "grad_norm": 1.7382673025131226, "learning_rate": 0.00017802121089366836, "loss": 0.731, "step": 2937 }, { "epoch": 0.9433295874137101, "grad_norm": 1.164584994316101, "learning_rate": 0.00017799937154685586, "loss": 1.0691, "step": 2938 }, { "epoch": 0.9436506662385615, "grad_norm": 1.2241766452789307, "learning_rate": 0.00017797752269605653, "loss": 0.9289, "step": 2939 }, { "epoch": 0.9439717450634131, "grad_norm": 1.3293641805648804, "learning_rate": 0.00017795566434393258, "loss": 0.7479, "step": 2940 }, { "epoch": 0.9442928238882645, "grad_norm": 1.1661382913589478, "learning_rate": 0.00017793379649314744, "loss": 0.9162, "step": 2941 }, { "epoch": 0.9446139027131161, "grad_norm": 1.2080588340759277, "learning_rate": 0.00017791191914636554, "loss": 0.7161, "step": 2942 }, { "epoch": 0.9449349815379676, "grad_norm": 1.0982539653778076, "learning_rate": 0.00017789003230625266, "loss": 0.8409, "step": 2943 }, { "epoch": 0.9452560603628191, "grad_norm": 1.0143113136291504, "learning_rate": 0.00017786813597547562, "loss": 0.7521, "step": 2944 }, { "epoch": 0.9455771391876706, "grad_norm": 1.105790615081787, "learning_rate": 0.00017784623015670238, "loss": 0.9074, "step": 2945 }, { "epoch": 0.945898218012522, "grad_norm": 0.9993615746498108, "learning_rate": 0.00017782431485260212, "loss": 0.8263, "step": 2946 }, { "epoch": 0.9462192968373736, "grad_norm": 0.969996452331543, "learning_rate": 0.00017780239006584515, "loss": 0.6984, "step": 2947 }, { "epoch": 0.946540375662225, "grad_norm": 0.934131383895874, "learning_rate": 0.00017778045579910302, "loss": 0.7838, "step": 2948 }, { "epoch": 0.9468614544870766, "grad_norm": 0.7031441330909729, "learning_rate": 0.0001777585120550482, "loss": 0.4672, "step": 2949 }, { "epoch": 0.947182533311928, "grad_norm": 1.3736737966537476, "learning_rate": 0.0001777365588363546, "loss": 0.6343, "step": 2950 }, { "epoch": 0.9475036121367796, "grad_norm": 1.2793693542480469, "learning_rate": 0.0001777145961456971, "loss": 1.0671, "step": 2951 }, { "epoch": 0.9478246909616311, "grad_norm": 1.0697038173675537, "learning_rate": 0.0001776926239857518, "loss": 0.9461, "step": 2952 }, { "epoch": 0.9481457697864826, "grad_norm": 1.163883090019226, "learning_rate": 0.00017767064235919592, "loss": 0.8222, "step": 2953 }, { "epoch": 0.9484668486113341, "grad_norm": 1.1357632875442505, "learning_rate": 0.00017764865126870786, "loss": 0.6185, "step": 2954 }, { "epoch": 0.9487879274361856, "grad_norm": 1.044090986251831, "learning_rate": 0.0001776266507169672, "loss": 0.5034, "step": 2955 }, { "epoch": 0.9491090062610371, "grad_norm": 1.0913854837417603, "learning_rate": 0.0001776046407066546, "loss": 0.4259, "step": 2956 }, { "epoch": 0.9494300850858886, "grad_norm": 0.9846095442771912, "learning_rate": 0.00017758262124045195, "loss": 0.464, "step": 2957 }, { "epoch": 0.9497511639107401, "grad_norm": 1.2047371864318848, "learning_rate": 0.0001775605923210422, "loss": 0.9376, "step": 2958 }, { "epoch": 0.9500722427355915, "grad_norm": 2.1599411964416504, "learning_rate": 0.0001775385539511096, "loss": 1.0841, "step": 2959 }, { "epoch": 0.9503933215604431, "grad_norm": 1.1163110733032227, "learning_rate": 0.00017751650613333935, "loss": 0.8837, "step": 2960 }, { "epoch": 0.9507144003852946, "grad_norm": 1.0459928512573242, "learning_rate": 0.00017749444887041799, "loss": 0.9614, "step": 2961 }, { "epoch": 0.9510354792101461, "grad_norm": 0.897926390171051, "learning_rate": 0.00017747238216503307, "loss": 0.8908, "step": 2962 }, { "epoch": 0.9513565580349976, "grad_norm": 0.9355558753013611, "learning_rate": 0.00017745030601987337, "loss": 0.8321, "step": 2963 }, { "epoch": 0.9516776368598491, "grad_norm": 0.7442376613616943, "learning_rate": 0.00017742822043762888, "loss": 0.627, "step": 2964 }, { "epoch": 0.9519987156847006, "grad_norm": 0.7983213067054749, "learning_rate": 0.00017740612542099053, "loss": 0.8255, "step": 2965 }, { "epoch": 0.9523197945095521, "grad_norm": 1.141676425933838, "learning_rate": 0.00017738402097265064, "loss": 0.9653, "step": 2966 }, { "epoch": 0.9526408733344036, "grad_norm": 0.9923720955848694, "learning_rate": 0.0001773619070953025, "loss": 0.7411, "step": 2967 }, { "epoch": 0.952961952159255, "grad_norm": 1.1766963005065918, "learning_rate": 0.00017733978379164066, "loss": 1.008, "step": 2968 }, { "epoch": 0.9532830309841066, "grad_norm": 1.0631762742996216, "learning_rate": 0.00017731765106436073, "loss": 0.8621, "step": 2969 }, { "epoch": 0.953604109808958, "grad_norm": 0.9990087747573853, "learning_rate": 0.00017729550891615957, "loss": 0.8143, "step": 2970 }, { "epoch": 0.9539251886338096, "grad_norm": 0.9863772988319397, "learning_rate": 0.00017727335734973512, "loss": 0.8346, "step": 2971 }, { "epoch": 0.9542462674586611, "grad_norm": 1.3948047161102295, "learning_rate": 0.00017725119636778644, "loss": 0.7758, "step": 2972 }, { "epoch": 0.9545673462835126, "grad_norm": 1.3944754600524902, "learning_rate": 0.00017722902597301383, "loss": 0.9348, "step": 2973 }, { "epoch": 0.9548884251083641, "grad_norm": 1.196573257446289, "learning_rate": 0.00017720684616811866, "loss": 0.9216, "step": 2974 }, { "epoch": 0.9552095039332156, "grad_norm": 1.4773893356323242, "learning_rate": 0.0001771846569558035, "loss": 1.0486, "step": 2975 }, { "epoch": 0.9555305827580671, "grad_norm": 1.0879054069519043, "learning_rate": 0.00017716245833877201, "loss": 0.7509, "step": 2976 }, { "epoch": 0.9558516615829186, "grad_norm": 0.9991624355316162, "learning_rate": 0.00017714025031972903, "loss": 0.7418, "step": 2977 }, { "epoch": 0.9561727404077701, "grad_norm": 1.0229377746582031, "learning_rate": 0.00017711803290138052, "loss": 0.8292, "step": 2978 }, { "epoch": 0.9564938192326216, "grad_norm": 0.9216927289962769, "learning_rate": 0.00017709580608643363, "loss": 0.7256, "step": 2979 }, { "epoch": 0.9568148980574731, "grad_norm": 1.1415516138076782, "learning_rate": 0.0001770735698775966, "loss": 0.9657, "step": 2980 }, { "epoch": 0.9571359768823247, "grad_norm": 1.4062246084213257, "learning_rate": 0.00017705132427757895, "loss": 1.0777, "step": 2981 }, { "epoch": 0.9574570557071761, "grad_norm": 1.2299398183822632, "learning_rate": 0.00017702906928909108, "loss": 1.1786, "step": 2982 }, { "epoch": 0.9577781345320276, "grad_norm": 1.3134931325912476, "learning_rate": 0.0001770068049148448, "loss": 1.0092, "step": 2983 }, { "epoch": 0.9580992133568791, "grad_norm": 1.0825444459915161, "learning_rate": 0.00017698453115755293, "loss": 0.6504, "step": 2984 }, { "epoch": 0.9584202921817306, "grad_norm": 1.2085427045822144, "learning_rate": 0.00017696224801992945, "loss": 0.9662, "step": 2985 }, { "epoch": 0.9587413710065821, "grad_norm": 1.1950358152389526, "learning_rate": 0.0001769399555046895, "loss": 0.8202, "step": 2986 }, { "epoch": 0.9590624498314336, "grad_norm": 1.1132547855377197, "learning_rate": 0.00017691765361454938, "loss": 0.9621, "step": 2987 }, { "epoch": 0.9593835286562851, "grad_norm": 0.9960354566574097, "learning_rate": 0.00017689534235222648, "loss": 0.9149, "step": 2988 }, { "epoch": 0.9597046074811366, "grad_norm": 1.1196013689041138, "learning_rate": 0.00017687302172043933, "loss": 0.8249, "step": 2989 }, { "epoch": 0.9600256863059882, "grad_norm": 1.1874531507492065, "learning_rate": 0.00017685069172190766, "loss": 0.8245, "step": 2990 }, { "epoch": 0.9603467651308396, "grad_norm": 1.0298069715499878, "learning_rate": 0.00017682835235935236, "loss": 0.7936, "step": 2991 }, { "epoch": 0.9606678439556912, "grad_norm": 1.3679097890853882, "learning_rate": 0.00017680600363549533, "loss": 0.8939, "step": 2992 }, { "epoch": 0.9609889227805426, "grad_norm": 0.9797439575195312, "learning_rate": 0.00017678364555305978, "loss": 0.6787, "step": 2993 }, { "epoch": 0.9613100016053941, "grad_norm": 1.0256136655807495, "learning_rate": 0.00017676127811476987, "loss": 0.6678, "step": 2994 }, { "epoch": 0.9616310804302456, "grad_norm": 1.0464802980422974, "learning_rate": 0.0001767389013233511, "loss": 0.6863, "step": 2995 }, { "epoch": 0.9619521592550971, "grad_norm": 1.0149427652359009, "learning_rate": 0.00017671651518153, "loss": 0.6783, "step": 2996 }, { "epoch": 0.9622732380799486, "grad_norm": 0.8050268292427063, "learning_rate": 0.00017669411969203417, "loss": 0.7203, "step": 2997 }, { "epoch": 0.9625943169048001, "grad_norm": 0.896780252456665, "learning_rate": 0.00017667171485759252, "loss": 0.575, "step": 2998 }, { "epoch": 0.9629153957296517, "grad_norm": 0.813828706741333, "learning_rate": 0.00017664930068093498, "loss": 0.7544, "step": 2999 }, { "epoch": 0.9632364745545031, "grad_norm": 0.922319769859314, "learning_rate": 0.00017662687716479266, "loss": 0.5257, "step": 3000 }, { "epoch": 0.9635575533793547, "grad_norm": 1.0877915620803833, "learning_rate": 0.0001766044443118978, "loss": 1.0061, "step": 3001 }, { "epoch": 0.9638786322042061, "grad_norm": 1.045781135559082, "learning_rate": 0.00017658200212498378, "loss": 0.9707, "step": 3002 }, { "epoch": 0.9641997110290577, "grad_norm": 1.023611307144165, "learning_rate": 0.00017655955060678506, "loss": 0.7057, "step": 3003 }, { "epoch": 0.9645207898539091, "grad_norm": 1.043655514717102, "learning_rate": 0.00017653708976003737, "loss": 0.5738, "step": 3004 }, { "epoch": 0.9648418686787607, "grad_norm": 0.8784222602844238, "learning_rate": 0.00017651461958747745, "loss": 0.5103, "step": 3005 }, { "epoch": 0.9651629475036121, "grad_norm": 1.0596736669540405, "learning_rate": 0.0001764921400918432, "loss": 0.7845, "step": 3006 }, { "epoch": 0.9654840263284636, "grad_norm": 1.0180422067642212, "learning_rate": 0.0001764696512758737, "loss": 0.8355, "step": 3007 }, { "epoch": 0.9658051051533152, "grad_norm": 0.8986467719078064, "learning_rate": 0.00017644715314230918, "loss": 0.8457, "step": 3008 }, { "epoch": 0.9661261839781666, "grad_norm": 0.9878740906715393, "learning_rate": 0.0001764246456938909, "loss": 0.7791, "step": 3009 }, { "epoch": 0.9664472628030182, "grad_norm": 0.8736189603805542, "learning_rate": 0.00017640212893336142, "loss": 0.8261, "step": 3010 }, { "epoch": 0.9667683416278696, "grad_norm": 0.9508904218673706, "learning_rate": 0.00017637960286346425, "loss": 0.8671, "step": 3011 }, { "epoch": 0.9670894204527212, "grad_norm": 1.029610514640808, "learning_rate": 0.00017635706748694413, "loss": 0.8247, "step": 3012 }, { "epoch": 0.9674104992775726, "grad_norm": 1.0193253755569458, "learning_rate": 0.000176334522806547, "loss": 0.8908, "step": 3013 }, { "epoch": 0.9677315781024242, "grad_norm": 1.048719882965088, "learning_rate": 0.00017631196882501973, "loss": 0.985, "step": 3014 }, { "epoch": 0.9680526569272756, "grad_norm": 0.8610148429870605, "learning_rate": 0.00017628940554511061, "loss": 0.7015, "step": 3015 }, { "epoch": 0.9683737357521272, "grad_norm": 0.8026429414749146, "learning_rate": 0.00017626683296956882, "loss": 0.7906, "step": 3016 }, { "epoch": 0.9686948145769787, "grad_norm": 1.1094107627868652, "learning_rate": 0.0001762442511011448, "loss": 1.057, "step": 3017 }, { "epoch": 0.9690158934018301, "grad_norm": 1.203913688659668, "learning_rate": 0.00017622165994259, "loss": 0.7759, "step": 3018 }, { "epoch": 0.9693369722266817, "grad_norm": 0.8827437162399292, "learning_rate": 0.0001761990594966572, "loss": 0.948, "step": 3019 }, { "epoch": 0.9696580510515331, "grad_norm": 1.1221593618392944, "learning_rate": 0.0001761764497661001, "loss": 0.9794, "step": 3020 }, { "epoch": 0.9699791298763847, "grad_norm": 0.9867079854011536, "learning_rate": 0.0001761538307536737, "loss": 1.082, "step": 3021 }, { "epoch": 0.9703002087012361, "grad_norm": 5.697073936462402, "learning_rate": 0.000176131202462134, "loss": 1.1723, "step": 3022 }, { "epoch": 0.9706212875260877, "grad_norm": 0.8379837870597839, "learning_rate": 0.0001761085648942382, "loss": 0.6992, "step": 3023 }, { "epoch": 0.9709423663509391, "grad_norm": 1.10336172580719, "learning_rate": 0.00017608591805274464, "loss": 0.9453, "step": 3024 }, { "epoch": 0.9712634451757907, "grad_norm": 1.2008827924728394, "learning_rate": 0.00017606326194041273, "loss": 0.8227, "step": 3025 }, { "epoch": 0.9715845240006421, "grad_norm": 0.9085438251495361, "learning_rate": 0.0001760405965600031, "loss": 0.8305, "step": 3026 }, { "epoch": 0.9719056028254937, "grad_norm": 1.2480196952819824, "learning_rate": 0.00017601792191427741, "loss": 1.0148, "step": 3027 }, { "epoch": 0.9722266816503452, "grad_norm": 0.7692457437515259, "learning_rate": 0.0001759952380059986, "loss": 0.7948, "step": 3028 }, { "epoch": 0.9725477604751966, "grad_norm": 1.0976073741912842, "learning_rate": 0.00017597254483793048, "loss": 0.9424, "step": 3029 }, { "epoch": 0.9728688393000482, "grad_norm": 0.9526044130325317, "learning_rate": 0.00017594984241283825, "loss": 0.8496, "step": 3030 }, { "epoch": 0.9731899181248996, "grad_norm": 0.7856842279434204, "learning_rate": 0.00017592713073348807, "loss": 0.6682, "step": 3031 }, { "epoch": 0.9735109969497512, "grad_norm": 1.1540440320968628, "learning_rate": 0.00017590440980264738, "loss": 0.8165, "step": 3032 }, { "epoch": 0.9738320757746026, "grad_norm": 1.6081081628799438, "learning_rate": 0.00017588167962308458, "loss": 1.0609, "step": 3033 }, { "epoch": 0.9741531545994542, "grad_norm": 0.7435811758041382, "learning_rate": 0.00017585894019756925, "loss": 0.6044, "step": 3034 }, { "epoch": 0.9744742334243056, "grad_norm": 0.9327528476715088, "learning_rate": 0.0001758361915288722, "loss": 0.8865, "step": 3035 }, { "epoch": 0.9747953122491572, "grad_norm": 0.8505873680114746, "learning_rate": 0.00017581343361976524, "loss": 0.7442, "step": 3036 }, { "epoch": 0.9751163910740087, "grad_norm": 0.8487008213996887, "learning_rate": 0.00017579066647302133, "loss": 0.7235, "step": 3037 }, { "epoch": 0.9754374698988602, "grad_norm": 1.0709935426712036, "learning_rate": 0.00017576789009141465, "loss": 0.9574, "step": 3038 }, { "epoch": 0.9757585487237117, "grad_norm": 0.9425415992736816, "learning_rate": 0.00017574510447772039, "loss": 0.7343, "step": 3039 }, { "epoch": 0.9760796275485631, "grad_norm": 1.0272222757339478, "learning_rate": 0.00017572230963471488, "loss": 0.9407, "step": 3040 }, { "epoch": 0.9764007063734147, "grad_norm": 1.2393730878829956, "learning_rate": 0.00017569950556517566, "loss": 0.93, "step": 3041 }, { "epoch": 0.9767217851982661, "grad_norm": 1.0345895290374756, "learning_rate": 0.00017567669227188128, "loss": 0.6844, "step": 3042 }, { "epoch": 0.9770428640231177, "grad_norm": 1.0678569078445435, "learning_rate": 0.0001756538697576115, "loss": 0.5873, "step": 3043 }, { "epoch": 0.9773639428479691, "grad_norm": 1.1300995349884033, "learning_rate": 0.0001756310380251472, "loss": 0.8044, "step": 3044 }, { "epoch": 0.9776850216728207, "grad_norm": 1.4100874662399292, "learning_rate": 0.00017560819707727033, "loss": 0.9445, "step": 3045 }, { "epoch": 0.9780061004976722, "grad_norm": 0.9837530255317688, "learning_rate": 0.00017558534691676397, "loss": 0.6677, "step": 3046 }, { "epoch": 0.9783271793225237, "grad_norm": 1.2359012365341187, "learning_rate": 0.00017556248754641235, "loss": 0.8114, "step": 3047 }, { "epoch": 0.9786482581473752, "grad_norm": 0.6732703447341919, "learning_rate": 0.00017553961896900087, "loss": 0.5295, "step": 3048 }, { "epoch": 0.9789693369722267, "grad_norm": 1.0407007932662964, "learning_rate": 0.00017551674118731591, "loss": 0.714, "step": 3049 }, { "epoch": 0.9792904157970782, "grad_norm": 0.6249756217002869, "learning_rate": 0.00017549385420414514, "loss": 0.4261, "step": 3050 }, { "epoch": 0.9796114946219296, "grad_norm": 0.8719810247421265, "learning_rate": 0.00017547095802227723, "loss": 0.9618, "step": 3051 }, { "epoch": 0.9799325734467812, "grad_norm": 0.9490910768508911, "learning_rate": 0.00017544805264450196, "loss": 0.925, "step": 3052 }, { "epoch": 0.9802536522716326, "grad_norm": 1.0451079607009888, "learning_rate": 0.00017542513807361037, "loss": 0.8166, "step": 3053 }, { "epoch": 0.9805747310964842, "grad_norm": 1.0744620561599731, "learning_rate": 0.00017540221431239453, "loss": 0.5847, "step": 3054 }, { "epoch": 0.9808958099213357, "grad_norm": 1.3825839757919312, "learning_rate": 0.00017537928136364755, "loss": 0.4744, "step": 3055 }, { "epoch": 0.9812168887461872, "grad_norm": 1.0428171157836914, "learning_rate": 0.0001753563392301638, "loss": 0.7581, "step": 3056 }, { "epoch": 0.9815379675710387, "grad_norm": 1.382125973701477, "learning_rate": 0.0001753333879147387, "loss": 0.9721, "step": 3057 }, { "epoch": 0.9818590463958902, "grad_norm": 0.9676405787467957, "learning_rate": 0.00017531042742016876, "loss": 0.9361, "step": 3058 }, { "epoch": 0.9821801252207417, "grad_norm": 0.9411641955375671, "learning_rate": 0.00017528745774925172, "loss": 0.9277, "step": 3059 }, { "epoch": 0.9825012040455932, "grad_norm": 1.0995005369186401, "learning_rate": 0.00017526447890478633, "loss": 0.9208, "step": 3060 }, { "epoch": 0.9828222828704447, "grad_norm": 0.9930598735809326, "learning_rate": 0.00017524149088957245, "loss": 0.798, "step": 3061 }, { "epoch": 0.9831433616952961, "grad_norm": 0.6785845756530762, "learning_rate": 0.00017521849370641114, "loss": 0.8495, "step": 3062 }, { "epoch": 0.9834644405201477, "grad_norm": 0.7817926406860352, "learning_rate": 0.00017519548735810456, "loss": 0.787, "step": 3063 }, { "epoch": 0.9837855193449992, "grad_norm": 0.8893129825592041, "learning_rate": 0.00017517247184745593, "loss": 0.9347, "step": 3064 }, { "epoch": 0.9841065981698507, "grad_norm": 0.8218941688537598, "learning_rate": 0.00017514944717726962, "loss": 0.7465, "step": 3065 }, { "epoch": 0.9844276769947022, "grad_norm": 1.1597498655319214, "learning_rate": 0.00017512641335035113, "loss": 0.9377, "step": 3066 }, { "epoch": 0.9847487558195537, "grad_norm": 1.1089355945587158, "learning_rate": 0.00017510337036950703, "loss": 1.0179, "step": 3067 }, { "epoch": 0.9850698346444052, "grad_norm": 0.9803544878959656, "learning_rate": 0.0001750803182375451, "loss": 1.0501, "step": 3068 }, { "epoch": 0.9853909134692567, "grad_norm": 1.3686444759368896, "learning_rate": 0.00017505725695727412, "loss": 1.0379, "step": 3069 }, { "epoch": 0.9857119922941082, "grad_norm": 1.3570899963378906, "learning_rate": 0.00017503418653150405, "loss": 0.8903, "step": 3070 }, { "epoch": 0.9860330711189597, "grad_norm": 1.031009554862976, "learning_rate": 0.00017501110696304596, "loss": 0.8283, "step": 3071 }, { "epoch": 0.9863541499438112, "grad_norm": 1.3445180654525757, "learning_rate": 0.00017498801825471203, "loss": 0.9019, "step": 3072 }, { "epoch": 0.9866752287686628, "grad_norm": 0.8621981143951416, "learning_rate": 0.00017496492040931552, "loss": 0.7137, "step": 3073 }, { "epoch": 0.9869963075935142, "grad_norm": 1.229825735092163, "learning_rate": 0.00017494181342967083, "loss": 1.0208, "step": 3074 }, { "epoch": 0.9873173864183657, "grad_norm": 0.9832011461257935, "learning_rate": 0.00017491869731859353, "loss": 0.7463, "step": 3075 }, { "epoch": 0.9876384652432172, "grad_norm": 0.7911801934242249, "learning_rate": 0.00017489557207890023, "loss": 0.8144, "step": 3076 }, { "epoch": 0.9879595440680687, "grad_norm": 0.849236011505127, "learning_rate": 0.0001748724377134086, "loss": 0.7658, "step": 3077 }, { "epoch": 0.9882806228929202, "grad_norm": 0.7961871027946472, "learning_rate": 0.0001748492942249376, "loss": 0.8704, "step": 3078 }, { "epoch": 0.9886017017177717, "grad_norm": 1.293888807296753, "learning_rate": 0.00017482614161630714, "loss": 0.9067, "step": 3079 }, { "epoch": 0.9889227805426232, "grad_norm": 1.098433256149292, "learning_rate": 0.00017480297989033825, "loss": 0.9446, "step": 3080 }, { "epoch": 0.9892438593674747, "grad_norm": 1.4394235610961914, "learning_rate": 0.0001747798090498532, "loss": 0.7206, "step": 3081 }, { "epoch": 0.9895649381923263, "grad_norm": 1.047020435333252, "learning_rate": 0.00017475662909767522, "loss": 0.9727, "step": 3082 }, { "epoch": 0.9898860170171777, "grad_norm": 0.9796097278594971, "learning_rate": 0.00017473344003662877, "loss": 0.8787, "step": 3083 }, { "epoch": 0.9902070958420293, "grad_norm": 0.9924256801605225, "learning_rate": 0.00017471024186953936, "loss": 0.7212, "step": 3084 }, { "epoch": 0.9905281746668807, "grad_norm": 1.5609381198883057, "learning_rate": 0.00017468703459923356, "loss": 0.9614, "step": 3085 }, { "epoch": 0.9908492534917323, "grad_norm": 0.9042816162109375, "learning_rate": 0.00017466381822853915, "loss": 0.8655, "step": 3086 }, { "epoch": 0.9911703323165837, "grad_norm": 1.2502557039260864, "learning_rate": 0.00017464059276028497, "loss": 0.8242, "step": 3087 }, { "epoch": 0.9914914111414352, "grad_norm": 1.919909119606018, "learning_rate": 0.00017461735819730096, "loss": 0.8001, "step": 3088 }, { "epoch": 0.9918124899662867, "grad_norm": 1.0362643003463745, "learning_rate": 0.00017459411454241822, "loss": 0.6697, "step": 3089 }, { "epoch": 0.9921335687911382, "grad_norm": 1.6059205532073975, "learning_rate": 0.00017457086179846888, "loss": 0.898, "step": 3090 }, { "epoch": 0.9924546476159897, "grad_norm": 1.2551214694976807, "learning_rate": 0.00017454759996828623, "loss": 0.8499, "step": 3091 }, { "epoch": 0.9927757264408412, "grad_norm": 0.9736458659172058, "learning_rate": 0.00017452432905470464, "loss": 0.8884, "step": 3092 }, { "epoch": 0.9930968052656928, "grad_norm": 1.4679756164550781, "learning_rate": 0.00017450104906055963, "loss": 0.9162, "step": 3093 }, { "epoch": 0.9934178840905442, "grad_norm": 0.9897777438163757, "learning_rate": 0.00017447775998868776, "loss": 0.7051, "step": 3094 }, { "epoch": 0.9937389629153958, "grad_norm": 0.9767149686813354, "learning_rate": 0.00017445446184192676, "loss": 0.7716, "step": 3095 }, { "epoch": 0.9940600417402472, "grad_norm": 1.2053884267807007, "learning_rate": 0.0001744311546231154, "loss": 0.8858, "step": 3096 }, { "epoch": 0.9943811205650988, "grad_norm": 1.160662293434143, "learning_rate": 0.00017440783833509366, "loss": 0.5346, "step": 3097 }, { "epoch": 0.9947021993899502, "grad_norm": 0.992720365524292, "learning_rate": 0.00017438451298070252, "loss": 0.6864, "step": 3098 }, { "epoch": 0.9950232782148017, "grad_norm": 0.705855667591095, "learning_rate": 0.0001743611785627841, "loss": 0.6184, "step": 3099 }, { "epoch": 0.9953443570396532, "grad_norm": 1.4650824069976807, "learning_rate": 0.00017433783508418162, "loss": 0.4631, "step": 3100 }, { "epoch": 0.9956654358645047, "grad_norm": 1.113871693611145, "learning_rate": 0.00017431448254773944, "loss": 1.1905, "step": 3101 }, { "epoch": 0.9959865146893563, "grad_norm": 1.3432302474975586, "learning_rate": 0.00017429112095630295, "loss": 0.6531, "step": 3102 }, { "epoch": 0.9963075935142077, "grad_norm": 1.2041120529174805, "learning_rate": 0.00017426775031271875, "loss": 0.8234, "step": 3103 }, { "epoch": 0.9966286723390593, "grad_norm": 0.980356752872467, "learning_rate": 0.00017424437061983446, "loss": 1.0048, "step": 3104 }, { "epoch": 0.9969497511639107, "grad_norm": 0.9739211201667786, "learning_rate": 0.00017422098188049883, "loss": 0.648, "step": 3105 }, { "epoch": 0.9972708299887623, "grad_norm": 1.0046653747558594, "learning_rate": 0.00017419758409756164, "loss": 0.8891, "step": 3106 }, { "epoch": 0.9975919088136137, "grad_norm": 0.8817589282989502, "learning_rate": 0.00017417417727387394, "loss": 0.8698, "step": 3107 }, { "epoch": 0.9979129876384653, "grad_norm": 0.8024824857711792, "learning_rate": 0.0001741507614122877, "loss": 0.7637, "step": 3108 }, { "epoch": 0.9982340664633167, "grad_norm": 1.0565115213394165, "learning_rate": 0.0001741273365156561, "loss": 0.9549, "step": 3109 }, { "epoch": 0.9985551452881682, "grad_norm": 1.4340211153030396, "learning_rate": 0.00017410390258683345, "loss": 0.9085, "step": 3110 }, { "epoch": 0.9988762241130198, "grad_norm": 0.8469492197036743, "learning_rate": 0.000174080459628675, "loss": 0.7797, "step": 3111 }, { "epoch": 0.9991973029378712, "grad_norm": 2.2130372524261475, "learning_rate": 0.00017405700764403726, "loss": 0.7562, "step": 3112 }, { "epoch": 0.9995183817627228, "grad_norm": 0.984209418296814, "learning_rate": 0.00017403354663577783, "loss": 0.9092, "step": 3113 }, { "epoch": 0.9998394605875742, "grad_norm": 1.7540405988693237, "learning_rate": 0.00017401007660675525, "loss": 0.6898, "step": 3114 }, { "epoch": 1.0001605394124258, "grad_norm": 0.4434870779514313, "learning_rate": 0.00017398659755982936, "loss": 0.5678, "step": 3115 }, { "epoch": 1.0004816182372773, "grad_norm": 0.6553979516029358, "learning_rate": 0.000173963109497861, "loss": 1.0352, "step": 3116 }, { "epoch": 1.0008026970621287, "grad_norm": 0.8394825458526611, "learning_rate": 0.00017393961242371205, "loss": 0.4912, "step": 3117 }, { "epoch": 1.0011237758869802, "grad_norm": 1.0343304872512817, "learning_rate": 0.00017391610634024564, "loss": 0.6326, "step": 3118 }, { "epoch": 1.0014448547118318, "grad_norm": 0.8101858496665955, "learning_rate": 0.0001738925912503259, "loss": 0.5709, "step": 3119 }, { "epoch": 1.0017659335366833, "grad_norm": 0.8755748271942139, "learning_rate": 0.000173869067156818, "loss": 0.377, "step": 3120 }, { "epoch": 1.0020870123615349, "grad_norm": 0.9467462301254272, "learning_rate": 0.00017384553406258842, "loss": 0.6883, "step": 3121 }, { "epoch": 1.0024080911863862, "grad_norm": 0.8870365619659424, "learning_rate": 0.0001738219919705044, "loss": 0.7174, "step": 3122 }, { "epoch": 1.0027291700112377, "grad_norm": 1.0948724746704102, "learning_rate": 0.00017379844088343468, "loss": 0.9455, "step": 3123 }, { "epoch": 1.0030502488360893, "grad_norm": 0.8313069939613342, "learning_rate": 0.00017377488080424876, "loss": 0.6838, "step": 3124 }, { "epoch": 1.0033713276609408, "grad_norm": 0.9020499587059021, "learning_rate": 0.0001737513117358174, "loss": 0.7241, "step": 3125 }, { "epoch": 1.0036924064857922, "grad_norm": 1.03434419631958, "learning_rate": 0.0001737277336810124, "loss": 0.5053, "step": 3126 }, { "epoch": 1.0040134853106437, "grad_norm": 0.6919084191322327, "learning_rate": 0.00017370414664270674, "loss": 0.5537, "step": 3127 }, { "epoch": 1.0043345641354953, "grad_norm": 0.7697117924690247, "learning_rate": 0.00017368055062377434, "loss": 0.5852, "step": 3128 }, { "epoch": 1.0046556429603468, "grad_norm": 0.8752188086509705, "learning_rate": 0.00017365694562709034, "loss": 0.6626, "step": 3129 }, { "epoch": 1.0049767217851984, "grad_norm": 1.3550692796707153, "learning_rate": 0.00017363333165553092, "loss": 0.5762, "step": 3130 }, { "epoch": 1.0052978006100497, "grad_norm": 1.0785512924194336, "learning_rate": 0.00017360970871197346, "loss": 0.6023, "step": 3131 }, { "epoch": 1.0056188794349012, "grad_norm": 0.6877568960189819, "learning_rate": 0.0001735860767992962, "loss": 0.5769, "step": 3132 }, { "epoch": 1.0059399582597528, "grad_norm": 1.3127740621566772, "learning_rate": 0.00017356243592037875, "loss": 0.5348, "step": 3133 }, { "epoch": 1.0062610370846043, "grad_norm": 1.041380763053894, "learning_rate": 0.0001735387860781016, "loss": 0.8355, "step": 3134 }, { "epoch": 1.0065821159094557, "grad_norm": 1.9624297618865967, "learning_rate": 0.00017351512727534644, "loss": 0.7688, "step": 3135 }, { "epoch": 1.0069031947343072, "grad_norm": 1.034769892692566, "learning_rate": 0.000173491459514996, "loss": 0.6289, "step": 3136 }, { "epoch": 1.0072242735591588, "grad_norm": 0.9164260625839233, "learning_rate": 0.00017346778279993415, "loss": 0.6022, "step": 3137 }, { "epoch": 1.0075453523840103, "grad_norm": 0.9046801328659058, "learning_rate": 0.0001734440971330458, "loss": 0.7448, "step": 3138 }, { "epoch": 1.0078664312088619, "grad_norm": 1.0306909084320068, "learning_rate": 0.00017342040251721702, "loss": 0.738, "step": 3139 }, { "epoch": 1.0081875100337132, "grad_norm": 1.137430191040039, "learning_rate": 0.0001733966989553349, "loss": 0.7579, "step": 3140 }, { "epoch": 1.0085085888585648, "grad_norm": 0.7871647477149963, "learning_rate": 0.00017337298645028764, "loss": 0.5093, "step": 3141 }, { "epoch": 1.0088296676834163, "grad_norm": 0.9174445867538452, "learning_rate": 0.00017334926500496456, "loss": 0.6771, "step": 3142 }, { "epoch": 1.0091507465082679, "grad_norm": 0.8707671761512756, "learning_rate": 0.00017332553462225602, "loss": 0.698, "step": 3143 }, { "epoch": 1.0094718253331192, "grad_norm": 1.3214155435562134, "learning_rate": 0.00017330179530505348, "loss": 0.7583, "step": 3144 }, { "epoch": 1.0097929041579707, "grad_norm": 0.8207698464393616, "learning_rate": 0.00017327804705624957, "loss": 0.6534, "step": 3145 }, { "epoch": 1.0101139829828223, "grad_norm": 0.9219654202461243, "learning_rate": 0.0001732542898787379, "loss": 0.5564, "step": 3146 }, { "epoch": 1.0104350618076738, "grad_norm": 0.9626970887184143, "learning_rate": 0.00017323052377541317, "loss": 0.758, "step": 3147 }, { "epoch": 1.0107561406325254, "grad_norm": 0.8818027377128601, "learning_rate": 0.0001732067487491713, "loss": 0.7285, "step": 3148 }, { "epoch": 1.0110772194573767, "grad_norm": 1.1400665044784546, "learning_rate": 0.0001731829648029091, "loss": 0.7988, "step": 3149 }, { "epoch": 1.0113982982822283, "grad_norm": 1.296278476715088, "learning_rate": 0.0001731591719395247, "loss": 0.5509, "step": 3150 }, { "epoch": 1.0117193771070798, "grad_norm": 0.886254608631134, "learning_rate": 0.00017313537016191706, "loss": 0.5616, "step": 3151 }, { "epoch": 1.0120404559319314, "grad_norm": 0.9640228748321533, "learning_rate": 0.00017311155947298643, "loss": 0.6448, "step": 3152 }, { "epoch": 1.0123615347567827, "grad_norm": 1.8710490465164185, "learning_rate": 0.00017308773987563406, "loss": 0.5566, "step": 3153 }, { "epoch": 1.0126826135816343, "grad_norm": 1.0365729331970215, "learning_rate": 0.00017306391137276224, "loss": 0.7364, "step": 3154 }, { "epoch": 1.0130036924064858, "grad_norm": 1.0165393352508545, "learning_rate": 0.00017304007396727448, "loss": 0.6858, "step": 3155 }, { "epoch": 1.0133247712313374, "grad_norm": 1.3366596698760986, "learning_rate": 0.00017301622766207527, "loss": 0.6922, "step": 3156 }, { "epoch": 1.013645850056189, "grad_norm": 1.253536581993103, "learning_rate": 0.00017299237246007015, "loss": 0.7062, "step": 3157 }, { "epoch": 1.0139669288810402, "grad_norm": 0.9793636202812195, "learning_rate": 0.00017296850836416588, "loss": 0.6105, "step": 3158 }, { "epoch": 1.0142880077058918, "grad_norm": 1.0851402282714844, "learning_rate": 0.00017294463537727024, "loss": 0.6383, "step": 3159 }, { "epoch": 1.0146090865307433, "grad_norm": 0.8400055766105652, "learning_rate": 0.000172920753502292, "loss": 0.6843, "step": 3160 }, { "epoch": 1.0149301653555949, "grad_norm": 1.1264251470565796, "learning_rate": 0.00017289686274214118, "loss": 0.5577, "step": 3161 }, { "epoch": 1.0152512441804462, "grad_norm": 1.5338242053985596, "learning_rate": 0.0001728729630997287, "loss": 0.4795, "step": 3162 }, { "epoch": 1.0155723230052978, "grad_norm": 0.7459766864776611, "learning_rate": 0.00017284905457796675, "loss": 0.4932, "step": 3163 }, { "epoch": 1.0158934018301493, "grad_norm": 0.6073582172393799, "learning_rate": 0.00017282513717976848, "loss": 0.3484, "step": 3164 }, { "epoch": 1.0162144806550009, "grad_norm": 0.8098936080932617, "learning_rate": 0.00017280121090804812, "loss": 0.4744, "step": 3165 }, { "epoch": 1.0165355594798524, "grad_norm": 1.1062911748886108, "learning_rate": 0.00017277727576572107, "loss": 1.023, "step": 3166 }, { "epoch": 1.0168566383047037, "grad_norm": 1.3004356622695923, "learning_rate": 0.00017275333175570368, "loss": 0.7562, "step": 3167 }, { "epoch": 1.0171777171295553, "grad_norm": 1.5146620273590088, "learning_rate": 0.00017272937888091353, "loss": 0.6177, "step": 3168 }, { "epoch": 1.0174987959544068, "grad_norm": 1.1251643896102905, "learning_rate": 0.0001727054171442692, "loss": 0.4857, "step": 3169 }, { "epoch": 1.0178198747792584, "grad_norm": 0.8808386921882629, "learning_rate": 0.0001726814465486903, "loss": 0.2652, "step": 3170 }, { "epoch": 1.0181409536041097, "grad_norm": 1.0438605546951294, "learning_rate": 0.0001726574670970976, "loss": 0.5482, "step": 3171 }, { "epoch": 1.0184620324289613, "grad_norm": 3.0426223278045654, "learning_rate": 0.00017263347879241291, "loss": 0.7843, "step": 3172 }, { "epoch": 1.0187831112538128, "grad_norm": 0.8068621754646301, "learning_rate": 0.00017260948163755918, "loss": 0.7201, "step": 3173 }, { "epoch": 1.0191041900786644, "grad_norm": 0.9241558313369751, "learning_rate": 0.00017258547563546038, "loss": 0.719, "step": 3174 }, { "epoch": 1.019425268903516, "grad_norm": 0.9264689087867737, "learning_rate": 0.00017256146078904153, "loss": 0.8149, "step": 3175 }, { "epoch": 1.0197463477283673, "grad_norm": 0.7384179830551147, "learning_rate": 0.00017253743710122875, "loss": 0.5689, "step": 3176 }, { "epoch": 1.0200674265532188, "grad_norm": 1.2575445175170898, "learning_rate": 0.00017251340457494934, "loss": 0.6949, "step": 3177 }, { "epoch": 1.0203885053780704, "grad_norm": 0.8369306921958923, "learning_rate": 0.0001724893632131315, "loss": 0.6599, "step": 3178 }, { "epoch": 1.020709584202922, "grad_norm": 1.2006936073303223, "learning_rate": 0.0001724653130187047, "loss": 0.6771, "step": 3179 }, { "epoch": 1.0210306630277732, "grad_norm": 0.7915051579475403, "learning_rate": 0.00017244125399459926, "loss": 0.7149, "step": 3180 }, { "epoch": 1.0213517418526248, "grad_norm": 1.273621916770935, "learning_rate": 0.00017241718614374678, "loss": 0.7439, "step": 3181 }, { "epoch": 1.0216728206774763, "grad_norm": 0.8545572757720947, "learning_rate": 0.0001723931094690798, "loss": 0.72, "step": 3182 }, { "epoch": 1.0219938995023279, "grad_norm": 0.8133922815322876, "learning_rate": 0.00017236902397353205, "loss": 0.5812, "step": 3183 }, { "epoch": 1.0223149783271792, "grad_norm": 1.2312626838684082, "learning_rate": 0.00017234492966003824, "loss": 0.6452, "step": 3184 }, { "epoch": 1.0226360571520308, "grad_norm": 1.0506107807159424, "learning_rate": 0.00017232082653153422, "loss": 0.697, "step": 3185 }, { "epoch": 1.0229571359768823, "grad_norm": 0.9486799240112305, "learning_rate": 0.00017229671459095683, "loss": 0.8227, "step": 3186 }, { "epoch": 1.0232782148017339, "grad_norm": 0.8300420641899109, "learning_rate": 0.0001722725938412441, "loss": 0.7616, "step": 3187 }, { "epoch": 1.0235992936265854, "grad_norm": 1.1895195245742798, "learning_rate": 0.00017224846428533499, "loss": 0.8887, "step": 3188 }, { "epoch": 1.0239203724514367, "grad_norm": 0.9756418466567993, "learning_rate": 0.0001722243259261697, "loss": 0.5706, "step": 3189 }, { "epoch": 1.0242414512762883, "grad_norm": 0.9104772210121155, "learning_rate": 0.00017220017876668934, "loss": 0.6564, "step": 3190 }, { "epoch": 1.0245625301011398, "grad_norm": 0.8529773950576782, "learning_rate": 0.00017217602280983623, "loss": 0.664, "step": 3191 }, { "epoch": 1.0248836089259914, "grad_norm": 1.0890889167785645, "learning_rate": 0.00017215185805855368, "loss": 0.5597, "step": 3192 }, { "epoch": 1.0252046877508427, "grad_norm": 0.9177720546722412, "learning_rate": 0.00017212768451578607, "loss": 0.8042, "step": 3193 }, { "epoch": 1.0255257665756943, "grad_norm": 0.8690738677978516, "learning_rate": 0.00017210350218447887, "loss": 0.6965, "step": 3194 }, { "epoch": 1.0258468454005458, "grad_norm": 0.9098244309425354, "learning_rate": 0.00017207931106757868, "loss": 0.5547, "step": 3195 }, { "epoch": 1.0261679242253974, "grad_norm": 0.8864629864692688, "learning_rate": 0.00017205511116803306, "loss": 0.6666, "step": 3196 }, { "epoch": 1.026489003050249, "grad_norm": 0.9058293700218201, "learning_rate": 0.0001720309024887907, "loss": 0.6675, "step": 3197 }, { "epoch": 1.0268100818751003, "grad_norm": 1.372377634048462, "learning_rate": 0.00017200668503280136, "loss": 0.7582, "step": 3198 }, { "epoch": 1.0271311606999518, "grad_norm": 0.7774912118911743, "learning_rate": 0.0001719824588030159, "loss": 0.6099, "step": 3199 }, { "epoch": 1.0274522395248034, "grad_norm": 1.0164647102355957, "learning_rate": 0.00017195822380238615, "loss": 0.7993, "step": 3200 }, { "epoch": 1.027773318349655, "grad_norm": 0.9077091813087463, "learning_rate": 0.0001719339800338651, "loss": 0.7304, "step": 3201 }, { "epoch": 1.0280943971745062, "grad_norm": 1.8159345388412476, "learning_rate": 0.00017190972750040682, "loss": 0.6752, "step": 3202 }, { "epoch": 1.0284154759993578, "grad_norm": 1.1675589084625244, "learning_rate": 0.00017188546620496635, "loss": 0.8066, "step": 3203 }, { "epoch": 1.0287365548242093, "grad_norm": 1.103464126586914, "learning_rate": 0.00017186119615049988, "loss": 0.6484, "step": 3204 }, { "epoch": 1.0290576336490609, "grad_norm": 1.758217692375183, "learning_rate": 0.00017183691733996462, "loss": 0.6094, "step": 3205 }, { "epoch": 1.0293787124739124, "grad_norm": 1.3336718082427979, "learning_rate": 0.00017181262977631888, "loss": 0.6994, "step": 3206 }, { "epoch": 1.0296997912987638, "grad_norm": 1.3006914854049683, "learning_rate": 0.00017178833346252206, "loss": 0.5225, "step": 3207 }, { "epoch": 1.0300208701236153, "grad_norm": 1.329729437828064, "learning_rate": 0.00017176402840153455, "loss": 0.6402, "step": 3208 }, { "epoch": 1.0303419489484669, "grad_norm": 1.2530442476272583, "learning_rate": 0.00017173971459631787, "loss": 0.6543, "step": 3209 }, { "epoch": 1.0306630277733184, "grad_norm": 1.1140762567520142, "learning_rate": 0.0001717153920498346, "loss": 0.5683, "step": 3210 }, { "epoch": 1.0309841065981697, "grad_norm": 1.3410987854003906, "learning_rate": 0.0001716910607650483, "loss": 0.5983, "step": 3211 }, { "epoch": 1.0313051854230213, "grad_norm": 1.056822419166565, "learning_rate": 0.0001716667207449237, "loss": 0.5871, "step": 3212 }, { "epoch": 1.0316262642478728, "grad_norm": 1.11257004737854, "learning_rate": 0.0001716423719924266, "loss": 0.4527, "step": 3213 }, { "epoch": 1.0319473430727244, "grad_norm": 1.4045213460922241, "learning_rate": 0.00017161801451052376, "loss": 0.7051, "step": 3214 }, { "epoch": 1.032268421897576, "grad_norm": 0.6990142464637756, "learning_rate": 0.00017159364830218312, "loss": 0.69, "step": 3215 }, { "epoch": 1.0325895007224273, "grad_norm": 0.6937003135681152, "learning_rate": 0.0001715692733703736, "loss": 0.9796, "step": 3216 }, { "epoch": 1.0329105795472788, "grad_norm": 0.7836512923240662, "learning_rate": 0.00017154488971806518, "loss": 0.5325, "step": 3217 }, { "epoch": 1.0332316583721304, "grad_norm": 0.7977432012557983, "learning_rate": 0.00017152049734822902, "loss": 0.5356, "step": 3218 }, { "epoch": 1.033552737196982, "grad_norm": 0.8344630599021912, "learning_rate": 0.00017149609626383717, "loss": 0.529, "step": 3219 }, { "epoch": 1.0338738160218333, "grad_norm": 0.9146289229393005, "learning_rate": 0.00017147168646786286, "loss": 0.4378, "step": 3220 }, { "epoch": 1.0341948948466848, "grad_norm": 0.8820130825042725, "learning_rate": 0.00017144726796328034, "loss": 0.4912, "step": 3221 }, { "epoch": 1.0345159736715364, "grad_norm": 0.8433278203010559, "learning_rate": 0.00017142284075306497, "loss": 0.5559, "step": 3222 }, { "epoch": 1.034837052496388, "grad_norm": 1.3478479385375977, "learning_rate": 0.0001713984048401931, "loss": 0.8736, "step": 3223 }, { "epoch": 1.0351581313212395, "grad_norm": 1.062957763671875, "learning_rate": 0.00017137396022764214, "loss": 0.7858, "step": 3224 }, { "epoch": 1.0354792101460908, "grad_norm": 1.3038783073425293, "learning_rate": 0.00017134950691839065, "loss": 0.8089, "step": 3225 }, { "epoch": 1.0358002889709423, "grad_norm": 1.0557838678359985, "learning_rate": 0.00017132504491541818, "loss": 0.5426, "step": 3226 }, { "epoch": 1.036121367795794, "grad_norm": 0.7496936321258545, "learning_rate": 0.0001713005742217053, "loss": 0.5222, "step": 3227 }, { "epoch": 1.0364424466206454, "grad_norm": 0.9153050184249878, "learning_rate": 0.00017127609484023377, "loss": 0.6843, "step": 3228 }, { "epoch": 1.0367635254454968, "grad_norm": 1.0914545059204102, "learning_rate": 0.00017125160677398626, "loss": 0.5231, "step": 3229 }, { "epoch": 1.0370846042703483, "grad_norm": 0.7990017533302307, "learning_rate": 0.0001712271100259466, "loss": 0.6105, "step": 3230 }, { "epoch": 1.0374056830951999, "grad_norm": 0.8752117156982422, "learning_rate": 0.00017120260459909967, "loss": 0.6104, "step": 3231 }, { "epoch": 1.0377267619200514, "grad_norm": 1.1408075094223022, "learning_rate": 0.0001711780904964313, "loss": 0.8112, "step": 3232 }, { "epoch": 1.038047840744903, "grad_norm": 0.7575783133506775, "learning_rate": 0.00017115356772092857, "loss": 0.4848, "step": 3233 }, { "epoch": 1.0383689195697543, "grad_norm": 1.2850323915481567, "learning_rate": 0.0001711290362755794, "loss": 0.7803, "step": 3234 }, { "epoch": 1.0386899983946059, "grad_norm": 0.9859423637390137, "learning_rate": 0.00017110449616337289, "loss": 0.6785, "step": 3235 }, { "epoch": 1.0390110772194574, "grad_norm": 1.224280834197998, "learning_rate": 0.00017107994738729926, "loss": 0.806, "step": 3236 }, { "epoch": 1.039332156044309, "grad_norm": 1.1364513635635376, "learning_rate": 0.00017105538995034963, "loss": 0.8497, "step": 3237 }, { "epoch": 1.0396532348691603, "grad_norm": 0.712236225605011, "learning_rate": 0.00017103082385551627, "loss": 0.4612, "step": 3238 }, { "epoch": 1.0399743136940118, "grad_norm": 0.8277900218963623, "learning_rate": 0.0001710062491057925, "loss": 0.6745, "step": 3239 }, { "epoch": 1.0402953925188634, "grad_norm": 1.1629518270492554, "learning_rate": 0.00017098166570417262, "loss": 0.7966, "step": 3240 }, { "epoch": 1.040616471343715, "grad_norm": 1.143298864364624, "learning_rate": 0.0001709570736536521, "loss": 0.5971, "step": 3241 }, { "epoch": 1.0409375501685665, "grad_norm": 1.7243444919586182, "learning_rate": 0.0001709324729572274, "loss": 0.521, "step": 3242 }, { "epoch": 1.0412586289934178, "grad_norm": 0.7155652046203613, "learning_rate": 0.000170907863617896, "loss": 0.5353, "step": 3243 }, { "epoch": 1.0415797078182694, "grad_norm": 0.8861052393913269, "learning_rate": 0.00017088324563865656, "loss": 0.7525, "step": 3244 }, { "epoch": 1.041900786643121, "grad_norm": 0.786766529083252, "learning_rate": 0.00017085861902250865, "loss": 0.6192, "step": 3245 }, { "epoch": 1.0422218654679725, "grad_norm": 1.0128397941589355, "learning_rate": 0.0001708339837724529, "loss": 0.6272, "step": 3246 }, { "epoch": 1.0425429442928238, "grad_norm": 0.9892904162406921, "learning_rate": 0.0001708093398914911, "loss": 0.5604, "step": 3247 }, { "epoch": 1.0428640231176753, "grad_norm": 1.106776237487793, "learning_rate": 0.00017078468738262602, "loss": 0.7397, "step": 3248 }, { "epoch": 1.043185101942527, "grad_norm": 1.0783487558364868, "learning_rate": 0.00017076002624886154, "loss": 0.6305, "step": 3249 }, { "epoch": 1.0435061807673784, "grad_norm": 1.2297683954238892, "learning_rate": 0.00017073535649320248, "loss": 0.6252, "step": 3250 }, { "epoch": 1.04382725959223, "grad_norm": 0.9655210375785828, "learning_rate": 0.00017071067811865476, "loss": 0.6704, "step": 3251 }, { "epoch": 1.0441483384170813, "grad_norm": 1.2158546447753906, "learning_rate": 0.00017068599112822543, "loss": 0.6869, "step": 3252 }, { "epoch": 1.0444694172419329, "grad_norm": 1.0447654724121094, "learning_rate": 0.0001706612955249225, "loss": 0.4427, "step": 3253 }, { "epoch": 1.0447904960667844, "grad_norm": 0.8590002059936523, "learning_rate": 0.000170636591311755, "loss": 0.5222, "step": 3254 }, { "epoch": 1.045111574891636, "grad_norm": 1.114013433456421, "learning_rate": 0.00017061187849173317, "loss": 0.5776, "step": 3255 }, { "epoch": 1.0454326537164873, "grad_norm": 0.8874313235282898, "learning_rate": 0.0001705871570678681, "loss": 0.6153, "step": 3256 }, { "epoch": 1.0457537325413389, "grad_norm": 1.2078914642333984, "learning_rate": 0.0001705624270431721, "loss": 0.7155, "step": 3257 }, { "epoch": 1.0460748113661904, "grad_norm": 0.9867888689041138, "learning_rate": 0.00017053768842065833, "loss": 0.5113, "step": 3258 }, { "epoch": 1.046395890191042, "grad_norm": 0.6994266510009766, "learning_rate": 0.00017051294120334125, "loss": 0.5073, "step": 3259 }, { "epoch": 1.0467169690158935, "grad_norm": 1.1606996059417725, "learning_rate": 0.00017048818539423615, "loss": 0.5518, "step": 3260 }, { "epoch": 1.0470380478407448, "grad_norm": 0.9645611643791199, "learning_rate": 0.00017046342099635948, "loss": 0.5303, "step": 3261 }, { "epoch": 1.0473591266655964, "grad_norm": 0.6704840660095215, "learning_rate": 0.00017043864801272868, "loss": 0.4778, "step": 3262 }, { "epoch": 1.047680205490448, "grad_norm": 0.7522701025009155, "learning_rate": 0.0001704138664463623, "loss": 0.5107, "step": 3263 }, { "epoch": 1.0480012843152995, "grad_norm": 1.1835343837738037, "learning_rate": 0.00017038907630027988, "loss": 0.516, "step": 3264 }, { "epoch": 1.0483223631401508, "grad_norm": 0.9107168316841125, "learning_rate": 0.00017036427757750205, "loss": 0.7626, "step": 3265 }, { "epoch": 1.0486434419650024, "grad_norm": 0.9731441736221313, "learning_rate": 0.00017033947028105039, "loss": 0.848, "step": 3266 }, { "epoch": 1.048964520789854, "grad_norm": 1.073034644126892, "learning_rate": 0.00017031465441394767, "loss": 0.875, "step": 3267 }, { "epoch": 1.0492855996147055, "grad_norm": 1.114713430404663, "learning_rate": 0.00017028982997921758, "loss": 0.5253, "step": 3268 }, { "epoch": 1.049606678439557, "grad_norm": 1.0419694185256958, "learning_rate": 0.00017026499697988493, "loss": 0.6738, "step": 3269 }, { "epoch": 1.0499277572644083, "grad_norm": 1.0044914484024048, "learning_rate": 0.0001702401554189755, "loss": 0.4802, "step": 3270 }, { "epoch": 1.05024883608926, "grad_norm": 0.8162968158721924, "learning_rate": 0.00017021530529951625, "loss": 0.3764, "step": 3271 }, { "epoch": 1.0505699149141114, "grad_norm": 0.8983839154243469, "learning_rate": 0.000170190446624535, "loss": 0.5214, "step": 3272 }, { "epoch": 1.050890993738963, "grad_norm": 1.0330827236175537, "learning_rate": 0.00017016557939706075, "loss": 0.7508, "step": 3273 }, { "epoch": 1.0512120725638143, "grad_norm": 0.8711283802986145, "learning_rate": 0.00017014070362012348, "loss": 0.6736, "step": 3274 }, { "epoch": 1.0515331513886659, "grad_norm": 0.8156806230545044, "learning_rate": 0.00017011581929675425, "loss": 0.633, "step": 3275 }, { "epoch": 1.0518542302135174, "grad_norm": 0.9561333060264587, "learning_rate": 0.0001700909264299851, "loss": 0.7371, "step": 3276 }, { "epoch": 1.052175309038369, "grad_norm": 0.7813916802406311, "learning_rate": 0.00017006602502284918, "loss": 0.5109, "step": 3277 }, { "epoch": 1.0524963878632203, "grad_norm": 0.8601284027099609, "learning_rate": 0.00017004111507838064, "loss": 0.5188, "step": 3278 }, { "epoch": 1.0528174666880719, "grad_norm": 1.1225234270095825, "learning_rate": 0.00017001619659961467, "loss": 0.6945, "step": 3279 }, { "epoch": 1.0531385455129234, "grad_norm": 1.0475960969924927, "learning_rate": 0.00016999126958958756, "loss": 0.6678, "step": 3280 }, { "epoch": 1.053459624337775, "grad_norm": 1.015073537826538, "learning_rate": 0.00016996633405133655, "loss": 0.8029, "step": 3281 }, { "epoch": 1.0537807031626265, "grad_norm": 1.1307599544525146, "learning_rate": 0.00016994138998789997, "loss": 0.6215, "step": 3282 }, { "epoch": 1.0541017819874778, "grad_norm": 1.1362276077270508, "learning_rate": 0.00016991643740231716, "loss": 0.7888, "step": 3283 }, { "epoch": 1.0544228608123294, "grad_norm": 1.2660646438598633, "learning_rate": 0.0001698914762976285, "loss": 0.7849, "step": 3284 }, { "epoch": 1.054743939637181, "grad_norm": 0.9536210298538208, "learning_rate": 0.00016986650667687552, "loss": 0.6164, "step": 3285 }, { "epoch": 1.0550650184620325, "grad_norm": 0.9557814598083496, "learning_rate": 0.0001698415285431006, "loss": 0.6736, "step": 3286 }, { "epoch": 1.0553860972868838, "grad_norm": 1.0140166282653809, "learning_rate": 0.00016981654189934727, "loss": 0.6405, "step": 3287 }, { "epoch": 1.0557071761117354, "grad_norm": 0.9973203539848328, "learning_rate": 0.0001697915467486601, "loss": 0.7855, "step": 3288 }, { "epoch": 1.056028254936587, "grad_norm": 1.143325924873352, "learning_rate": 0.00016976654309408464, "loss": 0.8278, "step": 3289 }, { "epoch": 1.0563493337614385, "grad_norm": 1.1182677745819092, "learning_rate": 0.00016974153093866757, "loss": 0.6487, "step": 3290 }, { "epoch": 1.05667041258629, "grad_norm": 0.8752659559249878, "learning_rate": 0.00016971651028545648, "loss": 0.7054, "step": 3291 }, { "epoch": 1.0569914914111413, "grad_norm": 1.3598425388336182, "learning_rate": 0.00016969148113750007, "loss": 0.8206, "step": 3292 }, { "epoch": 1.057312570235993, "grad_norm": 0.8707671165466309, "learning_rate": 0.00016966644349784808, "loss": 0.6105, "step": 3293 }, { "epoch": 1.0576336490608444, "grad_norm": 0.9644749760627747, "learning_rate": 0.0001696413973695513, "loss": 0.5752, "step": 3294 }, { "epoch": 1.057954727885696, "grad_norm": 0.894107460975647, "learning_rate": 0.00016961634275566146, "loss": 0.6371, "step": 3295 }, { "epoch": 1.0582758067105473, "grad_norm": 1.123781442642212, "learning_rate": 0.00016959127965923142, "loss": 0.6209, "step": 3296 }, { "epoch": 1.0585968855353989, "grad_norm": 1.2167694568634033, "learning_rate": 0.0001695662080833151, "loss": 0.576, "step": 3297 }, { "epoch": 1.0589179643602504, "grad_norm": 1.0875262022018433, "learning_rate": 0.00016954112803096728, "loss": 0.6831, "step": 3298 }, { "epoch": 1.059239043185102, "grad_norm": 0.9846530556678772, "learning_rate": 0.000169516039505244, "loss": 0.6525, "step": 3299 }, { "epoch": 1.0595601220099535, "grad_norm": 0.9327017664909363, "learning_rate": 0.00016949094250920217, "loss": 0.5837, "step": 3300 }, { "epoch": 1.0598812008348049, "grad_norm": 0.8693040013313293, "learning_rate": 0.00016946583704589973, "loss": 0.7224, "step": 3301 }, { "epoch": 1.0602022796596564, "grad_norm": 1.8250339031219482, "learning_rate": 0.00016944072311839581, "loss": 0.6624, "step": 3302 }, { "epoch": 1.060523358484508, "grad_norm": 1.6133804321289062, "learning_rate": 0.0001694156007297504, "loss": 0.6358, "step": 3303 }, { "epoch": 1.0608444373093595, "grad_norm": 1.2496116161346436, "learning_rate": 0.00016939046988302458, "loss": 0.788, "step": 3304 }, { "epoch": 1.0611655161342108, "grad_norm": 0.8245418071746826, "learning_rate": 0.0001693653305812805, "loss": 0.6231, "step": 3305 }, { "epoch": 1.0614865949590624, "grad_norm": 0.9475066661834717, "learning_rate": 0.0001693401828275813, "loss": 0.6326, "step": 3306 }, { "epoch": 1.061807673783914, "grad_norm": 0.8447438478469849, "learning_rate": 0.00016931502662499118, "loss": 0.4383, "step": 3307 }, { "epoch": 1.0621287526087655, "grad_norm": 0.9479712247848511, "learning_rate": 0.00016928986197657525, "loss": 0.4397, "step": 3308 }, { "epoch": 1.062449831433617, "grad_norm": 1.2100830078125, "learning_rate": 0.0001692646888853999, "loss": 0.6239, "step": 3309 }, { "epoch": 1.0627709102584684, "grad_norm": 1.2680039405822754, "learning_rate": 0.00016923950735453226, "loss": 0.5405, "step": 3310 }, { "epoch": 1.06309198908332, "grad_norm": 0.9997744560241699, "learning_rate": 0.0001692143173870407, "loss": 0.5493, "step": 3311 }, { "epoch": 1.0634130679081715, "grad_norm": 0.9026280641555786, "learning_rate": 0.0001691891189859945, "loss": 0.5545, "step": 3312 }, { "epoch": 1.063734146733023, "grad_norm": 1.112392544746399, "learning_rate": 0.000169163912154464, "loss": 0.6452, "step": 3313 }, { "epoch": 1.0640552255578744, "grad_norm": 0.7569609880447388, "learning_rate": 0.00016913869689552064, "loss": 0.4284, "step": 3314 }, { "epoch": 1.064376304382726, "grad_norm": 0.8292409181594849, "learning_rate": 0.0001691134732122368, "loss": 0.5371, "step": 3315 }, { "epoch": 1.0646973832075775, "grad_norm": 0.8496055006980896, "learning_rate": 0.00016908824110768584, "loss": 1.1306, "step": 3316 }, { "epoch": 1.065018462032429, "grad_norm": 0.9334166049957275, "learning_rate": 0.00016906300058494228, "loss": 0.619, "step": 3317 }, { "epoch": 1.0653395408572806, "grad_norm": 0.9362530708312988, "learning_rate": 0.00016903775164708163, "loss": 0.6312, "step": 3318 }, { "epoch": 1.0656606196821319, "grad_norm": 0.7941862344741821, "learning_rate": 0.00016901249429718032, "loss": 0.5325, "step": 3319 }, { "epoch": 1.0659816985069834, "grad_norm": 0.8351782560348511, "learning_rate": 0.00016898722853831593, "loss": 0.4099, "step": 3320 }, { "epoch": 1.066302777331835, "grad_norm": 2.7632505893707275, "learning_rate": 0.000168961954373567, "loss": 0.6414, "step": 3321 }, { "epoch": 1.0666238561566865, "grad_norm": 1.1203904151916504, "learning_rate": 0.00016893667180601312, "loss": 0.7519, "step": 3322 }, { "epoch": 1.0669449349815379, "grad_norm": 1.0219404697418213, "learning_rate": 0.00016891138083873487, "loss": 0.6174, "step": 3323 }, { "epoch": 1.0672660138063894, "grad_norm": 0.9879265427589417, "learning_rate": 0.00016888608147481388, "loss": 0.636, "step": 3324 }, { "epoch": 1.067587092631241, "grad_norm": 0.7659774422645569, "learning_rate": 0.00016886077371733283, "loss": 0.5238, "step": 3325 }, { "epoch": 1.0679081714560925, "grad_norm": 0.9733467698097229, "learning_rate": 0.0001688354575693754, "loss": 0.7139, "step": 3326 }, { "epoch": 1.068229250280944, "grad_norm": 0.7653968334197998, "learning_rate": 0.0001688101330340263, "loss": 0.4363, "step": 3327 }, { "epoch": 1.0685503291057954, "grad_norm": 1.0636018514633179, "learning_rate": 0.0001687848001143711, "loss": 0.7243, "step": 3328 }, { "epoch": 1.068871407930647, "grad_norm": 1.1847553253173828, "learning_rate": 0.00016875945881349676, "loss": 0.7352, "step": 3329 }, { "epoch": 1.0691924867554985, "grad_norm": 1.2986525297164917, "learning_rate": 0.00016873410913449091, "loss": 0.7651, "step": 3330 }, { "epoch": 1.06951356558035, "grad_norm": 0.921399712562561, "learning_rate": 0.0001687087510804423, "loss": 0.6639, "step": 3331 }, { "epoch": 1.0698346444052014, "grad_norm": 0.900202751159668, "learning_rate": 0.00016868338465444085, "loss": 0.6414, "step": 3332 }, { "epoch": 1.070155723230053, "grad_norm": 0.7372778654098511, "learning_rate": 0.00016865800985957726, "loss": 0.518, "step": 3333 }, { "epoch": 1.0704768020549045, "grad_norm": 1.0439753532409668, "learning_rate": 0.0001686326266989435, "loss": 0.7912, "step": 3334 }, { "epoch": 1.070797880879756, "grad_norm": 0.9654948711395264, "learning_rate": 0.0001686072351756323, "loss": 0.7365, "step": 3335 }, { "epoch": 1.0711189597046076, "grad_norm": 0.9803206324577332, "learning_rate": 0.00016858183529273765, "loss": 0.5448, "step": 3336 }, { "epoch": 1.071440038529459, "grad_norm": 0.9181073307991028, "learning_rate": 0.00016855642705335437, "loss": 0.6209, "step": 3337 }, { "epoch": 1.0717611173543105, "grad_norm": 1.7982044219970703, "learning_rate": 0.0001685310104605784, "loss": 0.6342, "step": 3338 }, { "epoch": 1.072082196179162, "grad_norm": 1.0803323984146118, "learning_rate": 0.0001685055855175067, "loss": 0.8042, "step": 3339 }, { "epoch": 1.0724032750040136, "grad_norm": 0.936269223690033, "learning_rate": 0.0001684801522272372, "loss": 0.589, "step": 3340 }, { "epoch": 1.0727243538288649, "grad_norm": 0.9068983197212219, "learning_rate": 0.00016845471059286887, "loss": 0.5997, "step": 3341 }, { "epoch": 1.0730454326537164, "grad_norm": 1.2561771869659424, "learning_rate": 0.0001684292606175017, "loss": 0.6738, "step": 3342 }, { "epoch": 1.073366511478568, "grad_norm": 2.2465908527374268, "learning_rate": 0.00016840380230423668, "loss": 0.7389, "step": 3343 }, { "epoch": 1.0736875903034195, "grad_norm": 1.6561983823776245, "learning_rate": 0.0001683783356561759, "loss": 0.8363, "step": 3344 }, { "epoch": 1.074008669128271, "grad_norm": 0.843169629573822, "learning_rate": 0.00016835286067642228, "loss": 0.6381, "step": 3345 }, { "epoch": 1.0743297479531224, "grad_norm": 0.7259941697120667, "learning_rate": 0.00016832737736807994, "loss": 0.5703, "step": 3346 }, { "epoch": 1.074650826777974, "grad_norm": 1.018009901046753, "learning_rate": 0.00016830188573425389, "loss": 0.7034, "step": 3347 }, { "epoch": 1.0749719056028255, "grad_norm": 0.8844952583312988, "learning_rate": 0.00016827638577805026, "loss": 0.6634, "step": 3348 }, { "epoch": 1.075292984427677, "grad_norm": 0.9090389609336853, "learning_rate": 0.0001682508775025762, "loss": 0.5809, "step": 3349 }, { "epoch": 1.0756140632525284, "grad_norm": 0.8599204421043396, "learning_rate": 0.00016822536091093965, "loss": 0.6162, "step": 3350 }, { "epoch": 1.07593514207738, "grad_norm": 1.0319000482559204, "learning_rate": 0.00016819983600624986, "loss": 0.6512, "step": 3351 }, { "epoch": 1.0762562209022315, "grad_norm": 0.8953613042831421, "learning_rate": 0.0001681743027916169, "loss": 0.4827, "step": 3352 }, { "epoch": 1.076577299727083, "grad_norm": 0.7418705821037292, "learning_rate": 0.000168148761270152, "loss": 0.4673, "step": 3353 }, { "epoch": 1.0768983785519346, "grad_norm": 1.4244052171707153, "learning_rate": 0.0001681232114449672, "loss": 0.728, "step": 3354 }, { "epoch": 1.077219457376786, "grad_norm": 0.9894974827766418, "learning_rate": 0.00016809765331917575, "loss": 0.6679, "step": 3355 }, { "epoch": 1.0775405362016375, "grad_norm": 0.8581247925758362, "learning_rate": 0.0001680720868958918, "loss": 0.5517, "step": 3356 }, { "epoch": 1.077861615026489, "grad_norm": 0.8083166480064392, "learning_rate": 0.00016804651217823053, "loss": 0.5014, "step": 3357 }, { "epoch": 1.0781826938513406, "grad_norm": 1.293942928314209, "learning_rate": 0.00016802092916930818, "loss": 0.724, "step": 3358 }, { "epoch": 1.078503772676192, "grad_norm": 0.8754002451896667, "learning_rate": 0.00016799533787224192, "loss": 0.5268, "step": 3359 }, { "epoch": 1.0788248515010435, "grad_norm": 0.9752049446105957, "learning_rate": 0.00016796973829015, "loss": 0.5621, "step": 3360 }, { "epoch": 1.079145930325895, "grad_norm": 0.9150457978248596, "learning_rate": 0.00016794413042615168, "loss": 0.4815, "step": 3361 }, { "epoch": 1.0794670091507466, "grad_norm": 0.992889940738678, "learning_rate": 0.00016791851428336711, "loss": 0.5026, "step": 3362 }, { "epoch": 1.079788087975598, "grad_norm": 0.6349382996559143, "learning_rate": 0.00016789288986491762, "loss": 0.3832, "step": 3363 }, { "epoch": 1.0801091668004494, "grad_norm": 0.9240784645080566, "learning_rate": 0.00016786725717392545, "loss": 0.4269, "step": 3364 }, { "epoch": 1.080430245625301, "grad_norm": 0.9478742480278015, "learning_rate": 0.00016784161621351382, "loss": 0.9252, "step": 3365 }, { "epoch": 1.0807513244501525, "grad_norm": 0.8908936381340027, "learning_rate": 0.0001678159669868071, "loss": 0.9994, "step": 3366 }, { "epoch": 1.081072403275004, "grad_norm": 0.7920323610305786, "learning_rate": 0.00016779030949693044, "loss": 0.5493, "step": 3367 }, { "epoch": 1.0813934820998554, "grad_norm": 1.6557127237319946, "learning_rate": 0.00016776464374701025, "loss": 0.638, "step": 3368 }, { "epoch": 1.081714560924707, "grad_norm": 0.9681761860847473, "learning_rate": 0.00016773896974017373, "loss": 0.5296, "step": 3369 }, { "epoch": 1.0820356397495585, "grad_norm": 0.812778651714325, "learning_rate": 0.00016771328747954925, "loss": 0.3148, "step": 3370 }, { "epoch": 1.08235671857441, "grad_norm": 0.8901128768920898, "learning_rate": 0.00016768759696826608, "loss": 0.6074, "step": 3371 }, { "epoch": 1.0826777973992616, "grad_norm": 1.0377326011657715, "learning_rate": 0.00016766189820945456, "loss": 0.5944, "step": 3372 }, { "epoch": 1.082998876224113, "grad_norm": 0.9749375581741333, "learning_rate": 0.00016763619120624594, "loss": 0.6133, "step": 3373 }, { "epoch": 1.0833199550489645, "grad_norm": 1.1712234020233154, "learning_rate": 0.00016761047596177263, "loss": 0.735, "step": 3374 }, { "epoch": 1.083641033873816, "grad_norm": 0.9546951651573181, "learning_rate": 0.00016758475247916787, "loss": 0.6357, "step": 3375 }, { "epoch": 1.0839621126986676, "grad_norm": 1.6145797967910767, "learning_rate": 0.00016755902076156604, "loss": 0.8267, "step": 3376 }, { "epoch": 1.084283191523519, "grad_norm": 0.9280446767807007, "learning_rate": 0.00016753328081210245, "loss": 0.7201, "step": 3377 }, { "epoch": 1.0846042703483705, "grad_norm": 0.7923296689987183, "learning_rate": 0.00016750753263391346, "loss": 0.6021, "step": 3378 }, { "epoch": 1.084925349173222, "grad_norm": 0.9804905652999878, "learning_rate": 0.00016748177623013638, "loss": 0.544, "step": 3379 }, { "epoch": 1.0852464279980736, "grad_norm": 0.815140962600708, "learning_rate": 0.00016745601160390958, "loss": 0.6108, "step": 3380 }, { "epoch": 1.0855675068229251, "grad_norm": 0.842607855796814, "learning_rate": 0.00016743023875837233, "loss": 0.6777, "step": 3381 }, { "epoch": 1.0858885856477765, "grad_norm": 0.7987306118011475, "learning_rate": 0.00016740445769666509, "loss": 0.672, "step": 3382 }, { "epoch": 1.086209664472628, "grad_norm": 0.8180364966392517, "learning_rate": 0.0001673786684219291, "loss": 0.5932, "step": 3383 }, { "epoch": 1.0865307432974796, "grad_norm": 1.0452377796173096, "learning_rate": 0.00016735287093730676, "loss": 0.5845, "step": 3384 }, { "epoch": 1.0868518221223311, "grad_norm": 1.3078027963638306, "learning_rate": 0.00016732706524594137, "loss": 0.632, "step": 3385 }, { "epoch": 1.0871729009471824, "grad_norm": 1.3132222890853882, "learning_rate": 0.00016730125135097735, "loss": 0.5767, "step": 3386 }, { "epoch": 1.087493979772034, "grad_norm": 1.1473840475082397, "learning_rate": 0.00016727542925555996, "loss": 0.7189, "step": 3387 }, { "epoch": 1.0878150585968855, "grad_norm": 1.0360926389694214, "learning_rate": 0.00016724959896283559, "loss": 0.6505, "step": 3388 }, { "epoch": 1.088136137421737, "grad_norm": 0.9349396228790283, "learning_rate": 0.00016722376047595164, "loss": 0.7034, "step": 3389 }, { "epoch": 1.0884572162465886, "grad_norm": 1.2072162628173828, "learning_rate": 0.0001671979137980563, "loss": 0.6266, "step": 3390 }, { "epoch": 1.08877829507144, "grad_norm": 1.2379400730133057, "learning_rate": 0.00016717205893229903, "loss": 0.6909, "step": 3391 }, { "epoch": 1.0890993738962915, "grad_norm": 0.8755012154579163, "learning_rate": 0.00016714619588183014, "loss": 0.7037, "step": 3392 }, { "epoch": 1.089420452721143, "grad_norm": 1.0528205633163452, "learning_rate": 0.00016712032464980095, "loss": 0.6186, "step": 3393 }, { "epoch": 1.0897415315459946, "grad_norm": 0.9427685141563416, "learning_rate": 0.0001670944452393638, "loss": 0.7993, "step": 3394 }, { "epoch": 1.090062610370846, "grad_norm": 0.9150214791297913, "learning_rate": 0.000167068557653672, "loss": 0.739, "step": 3395 }, { "epoch": 1.0903836891956975, "grad_norm": 2.4165000915527344, "learning_rate": 0.0001670426618958799, "loss": 0.6721, "step": 3396 }, { "epoch": 1.090704768020549, "grad_norm": 0.821657121181488, "learning_rate": 0.00016701675796914286, "loss": 0.6333, "step": 3397 }, { "epoch": 1.0910258468454006, "grad_norm": 0.8281720876693726, "learning_rate": 0.0001669908458766171, "loss": 0.6536, "step": 3398 }, { "epoch": 1.0913469256702522, "grad_norm": 0.7724263668060303, "learning_rate": 0.00016696492562145996, "loss": 0.5401, "step": 3399 }, { "epoch": 1.0916680044951035, "grad_norm": 1.0343469381332397, "learning_rate": 0.00016693899720682977, "loss": 0.6455, "step": 3400 }, { "epoch": 1.091989083319955, "grad_norm": 0.9886199831962585, "learning_rate": 0.00016691306063588583, "loss": 0.4821, "step": 3401 }, { "epoch": 1.0923101621448066, "grad_norm": 0.8337252736091614, "learning_rate": 0.00016688711591178842, "loss": 0.5141, "step": 3402 }, { "epoch": 1.0926312409696581, "grad_norm": 0.8266648054122925, "learning_rate": 0.00016686116303769882, "loss": 0.5598, "step": 3403 }, { "epoch": 1.0929523197945095, "grad_norm": 0.9328741431236267, "learning_rate": 0.0001668352020167793, "loss": 0.7067, "step": 3404 }, { "epoch": 1.093273398619361, "grad_norm": 1.163390040397644, "learning_rate": 0.00016680923285219317, "loss": 0.6493, "step": 3405 }, { "epoch": 1.0935944774442126, "grad_norm": 1.0952308177947998, "learning_rate": 0.00016678325554710468, "loss": 0.6423, "step": 3406 }, { "epoch": 1.0939155562690641, "grad_norm": 1.341279149055481, "learning_rate": 0.00016675727010467906, "loss": 0.751, "step": 3407 }, { "epoch": 1.0942366350939157, "grad_norm": 0.7064843773841858, "learning_rate": 0.0001667312765280826, "loss": 0.4315, "step": 3408 }, { "epoch": 1.094557713918767, "grad_norm": 1.5312671661376953, "learning_rate": 0.00016670527482048246, "loss": 0.5555, "step": 3409 }, { "epoch": 1.0948787927436185, "grad_norm": 0.8741896152496338, "learning_rate": 0.00016667926498504696, "loss": 0.573, "step": 3410 }, { "epoch": 1.09519987156847, "grad_norm": 0.6594826579093933, "learning_rate": 0.00016665324702494524, "loss": 0.4327, "step": 3411 }, { "epoch": 1.0955209503933216, "grad_norm": 0.7997058033943176, "learning_rate": 0.0001666272209433476, "loss": 0.4166, "step": 3412 }, { "epoch": 1.095842029218173, "grad_norm": 1.526983380317688, "learning_rate": 0.00016660118674342517, "loss": 0.6392, "step": 3413 }, { "epoch": 1.0961631080430245, "grad_norm": 0.7481285929679871, "learning_rate": 0.00016657514442835014, "loss": 0.4221, "step": 3414 }, { "epoch": 1.096484186867876, "grad_norm": 0.7330780625343323, "learning_rate": 0.00016654909400129575, "loss": 0.8458, "step": 3415 }, { "epoch": 1.0968052656927276, "grad_norm": 0.798095703125, "learning_rate": 0.00016652303546543608, "loss": 1.141, "step": 3416 }, { "epoch": 1.0971263445175792, "grad_norm": 1.091076374053955, "learning_rate": 0.00016649696882394633, "loss": 0.6236, "step": 3417 }, { "epoch": 1.0974474233424305, "grad_norm": 1.0143516063690186, "learning_rate": 0.00016647089408000266, "loss": 0.5477, "step": 3418 }, { "epoch": 1.097768502167282, "grad_norm": 0.8810601234436035, "learning_rate": 0.00016644481123678217, "loss": 0.3834, "step": 3419 }, { "epoch": 1.0980895809921336, "grad_norm": 1.0156632661819458, "learning_rate": 0.00016641872029746297, "loss": 0.3836, "step": 3420 }, { "epoch": 1.0984106598169852, "grad_norm": 1.0656507015228271, "learning_rate": 0.00016639262126522418, "loss": 0.5248, "step": 3421 }, { "epoch": 1.0987317386418365, "grad_norm": 0.9195857048034668, "learning_rate": 0.00016636651414324587, "loss": 0.3556, "step": 3422 }, { "epoch": 1.099052817466688, "grad_norm": 1.0913866758346558, "learning_rate": 0.00016634039893470912, "loss": 0.8998, "step": 3423 }, { "epoch": 1.0993738962915396, "grad_norm": 1.01368248462677, "learning_rate": 0.000166314275642796, "loss": 0.8026, "step": 3424 }, { "epoch": 1.0996949751163911, "grad_norm": 0.8792101740837097, "learning_rate": 0.00016628814427068953, "loss": 0.7112, "step": 3425 }, { "epoch": 1.1000160539412427, "grad_norm": 1.1131765842437744, "learning_rate": 0.00016626200482157378, "loss": 0.7527, "step": 3426 }, { "epoch": 1.100337132766094, "grad_norm": 0.7965254783630371, "learning_rate": 0.00016623585729863368, "loss": 0.5228, "step": 3427 }, { "epoch": 1.1006582115909456, "grad_norm": 0.7073742747306824, "learning_rate": 0.00016620970170505534, "loss": 0.5549, "step": 3428 }, { "epoch": 1.1009792904157971, "grad_norm": 1.1967341899871826, "learning_rate": 0.00016618353804402568, "loss": 0.8142, "step": 3429 }, { "epoch": 1.1013003692406487, "grad_norm": 1.0835646390914917, "learning_rate": 0.00016615736631873262, "loss": 0.7728, "step": 3430 }, { "epoch": 1.1016214480655, "grad_norm": 0.8416442275047302, "learning_rate": 0.00016613118653236518, "loss": 0.5987, "step": 3431 }, { "epoch": 1.1019425268903515, "grad_norm": 3.7327215671539307, "learning_rate": 0.00016610499868811328, "loss": 0.8418, "step": 3432 }, { "epoch": 1.102263605715203, "grad_norm": 0.9980570077896118, "learning_rate": 0.00016607880278916777, "loss": 0.5245, "step": 3433 }, { "epoch": 1.1025846845400546, "grad_norm": 0.9758525490760803, "learning_rate": 0.0001660525988387206, "loss": 0.6669, "step": 3434 }, { "epoch": 1.1029057633649062, "grad_norm": 1.14530611038208, "learning_rate": 0.00016602638683996463, "loss": 0.7346, "step": 3435 }, { "epoch": 1.1032268421897575, "grad_norm": 1.0736594200134277, "learning_rate": 0.0001660001667960937, "loss": 0.8106, "step": 3436 }, { "epoch": 1.103547921014609, "grad_norm": 1.2492066621780396, "learning_rate": 0.00016597393871030264, "loss": 0.6462, "step": 3437 }, { "epoch": 1.1038689998394606, "grad_norm": 1.3556599617004395, "learning_rate": 0.0001659477025857872, "loss": 0.8159, "step": 3438 }, { "epoch": 1.1041900786643122, "grad_norm": 0.9458972811698914, "learning_rate": 0.00016592145842574433, "loss": 0.6553, "step": 3439 }, { "epoch": 1.1045111574891635, "grad_norm": 1.0294822454452515, "learning_rate": 0.0001658952062333717, "loss": 0.6278, "step": 3440 }, { "epoch": 1.104832236314015, "grad_norm": 1.8948813676834106, "learning_rate": 0.00016586894601186805, "loss": 0.7584, "step": 3441 }, { "epoch": 1.1051533151388666, "grad_norm": 1.0939890146255493, "learning_rate": 0.00016584267776443317, "loss": 0.7916, "step": 3442 }, { "epoch": 1.1054743939637182, "grad_norm": 1.3582258224487305, "learning_rate": 0.00016581640149426768, "loss": 0.6503, "step": 3443 }, { "epoch": 1.1057954727885697, "grad_norm": 1.082184910774231, "learning_rate": 0.00016579011720457333, "loss": 0.6415, "step": 3444 }, { "epoch": 1.106116551613421, "grad_norm": 0.8474713563919067, "learning_rate": 0.00016576382489855274, "loss": 0.5673, "step": 3445 }, { "epoch": 1.1064376304382726, "grad_norm": 0.9523148536682129, "learning_rate": 0.0001657375245794096, "loss": 0.6361, "step": 3446 }, { "epoch": 1.1067587092631241, "grad_norm": 1.021826982498169, "learning_rate": 0.00016571121625034847, "loss": 0.7183, "step": 3447 }, { "epoch": 1.1070797880879757, "grad_norm": 1.3967097997665405, "learning_rate": 0.00016568489991457497, "loss": 0.7846, "step": 3448 }, { "epoch": 1.107400866912827, "grad_norm": 0.7168581485748291, "learning_rate": 0.00016565857557529566, "loss": 0.4492, "step": 3449 }, { "epoch": 1.1077219457376786, "grad_norm": 0.7148603200912476, "learning_rate": 0.00016563224323571806, "loss": 0.4595, "step": 3450 }, { "epoch": 1.1080430245625301, "grad_norm": 1.2017083168029785, "learning_rate": 0.00016560590289905073, "loss": 0.5782, "step": 3451 }, { "epoch": 1.1083641033873817, "grad_norm": 0.8505964279174805, "learning_rate": 0.00016557955456850313, "loss": 0.5918, "step": 3452 }, { "epoch": 1.108685182212233, "grad_norm": 1.3165613412857056, "learning_rate": 0.00016555319824728575, "loss": 0.6645, "step": 3453 }, { "epoch": 1.1090062610370846, "grad_norm": 0.9442883729934692, "learning_rate": 0.00016552683393860997, "loss": 0.6143, "step": 3454 }, { "epoch": 1.109327339861936, "grad_norm": 1.0663771629333496, "learning_rate": 0.00016550046164568827, "loss": 0.5655, "step": 3455 }, { "epoch": 1.1096484186867877, "grad_norm": 0.8024634122848511, "learning_rate": 0.00016547408137173396, "loss": 0.4356, "step": 3456 }, { "epoch": 1.1099694975116392, "grad_norm": 1.0509380102157593, "learning_rate": 0.00016544769311996148, "loss": 0.5454, "step": 3457 }, { "epoch": 1.1102905763364905, "grad_norm": 1.2511743307113647, "learning_rate": 0.00016542129689358612, "loss": 0.7829, "step": 3458 }, { "epoch": 1.110611655161342, "grad_norm": 0.8298420906066895, "learning_rate": 0.00016539489269582416, "loss": 0.5684, "step": 3459 }, { "epoch": 1.1109327339861936, "grad_norm": 0.8872331380844116, "learning_rate": 0.00016536848052989291, "loss": 0.6568, "step": 3460 }, { "epoch": 1.1112538128110452, "grad_norm": 1.0351537466049194, "learning_rate": 0.00016534206039901057, "loss": 0.5094, "step": 3461 }, { "epoch": 1.1115748916358965, "grad_norm": 0.9417381286621094, "learning_rate": 0.00016531563230639637, "loss": 0.4403, "step": 3462 }, { "epoch": 1.111895970460748, "grad_norm": 1.471907615661621, "learning_rate": 0.0001652891962552705, "loss": 0.5832, "step": 3463 }, { "epoch": 1.1122170492855996, "grad_norm": 0.8464019894599915, "learning_rate": 0.00016526275224885411, "loss": 0.4628, "step": 3464 }, { "epoch": 1.1125381281104512, "grad_norm": 0.7151861190795898, "learning_rate": 0.00016523630029036931, "loss": 0.6354, "step": 3465 }, { "epoch": 1.1128592069353027, "grad_norm": 0.8638876080513, "learning_rate": 0.00016520984038303924, "loss": 0.9545, "step": 3466 }, { "epoch": 1.113180285760154, "grad_norm": 0.7509087920188904, "learning_rate": 0.0001651833725300879, "loss": 0.8626, "step": 3467 }, { "epoch": 1.1135013645850056, "grad_norm": 0.7917301058769226, "learning_rate": 0.00016515689673474033, "loss": 0.5097, "step": 3468 }, { "epoch": 1.1138224434098571, "grad_norm": 1.0008916854858398, "learning_rate": 0.00016513041300022255, "loss": 0.4359, "step": 3469 }, { "epoch": 1.1141435222347087, "grad_norm": 1.335162878036499, "learning_rate": 0.0001651039213297615, "loss": 0.3395, "step": 3470 }, { "epoch": 1.11446460105956, "grad_norm": 0.8309035897254944, "learning_rate": 0.0001650774217265851, "loss": 0.4172, "step": 3471 }, { "epoch": 1.1147856798844116, "grad_norm": 1.0509357452392578, "learning_rate": 0.00016505091419392228, "loss": 0.9272, "step": 3472 }, { "epoch": 1.1151067587092631, "grad_norm": 1.047833800315857, "learning_rate": 0.00016502439873500289, "loss": 0.8088, "step": 3473 }, { "epoch": 1.1154278375341147, "grad_norm": 0.9323020577430725, "learning_rate": 0.00016499787535305776, "loss": 0.713, "step": 3474 }, { "epoch": 1.1157489163589662, "grad_norm": 0.902013897895813, "learning_rate": 0.00016497134405131866, "loss": 0.644, "step": 3475 }, { "epoch": 1.1160699951838176, "grad_norm": 0.8789449334144592, "learning_rate": 0.00016494480483301836, "loss": 0.6663, "step": 3476 }, { "epoch": 1.116391074008669, "grad_norm": 0.7771266102790833, "learning_rate": 0.00016491825770139062, "loss": 0.7559, "step": 3477 }, { "epoch": 1.1167121528335207, "grad_norm": 0.9561981558799744, "learning_rate": 0.00016489170265967008, "loss": 0.7239, "step": 3478 }, { "epoch": 1.1170332316583722, "grad_norm": 0.9018192887306213, "learning_rate": 0.00016486513971109243, "loss": 0.8175, "step": 3479 }, { "epoch": 1.1173543104832235, "grad_norm": 1.1967219114303589, "learning_rate": 0.0001648385688588942, "loss": 0.7613, "step": 3480 }, { "epoch": 1.117675389308075, "grad_norm": 0.9554547667503357, "learning_rate": 0.0001648119901063131, "loss": 0.4639, "step": 3481 }, { "epoch": 1.1179964681329266, "grad_norm": 1.2637308835983276, "learning_rate": 0.00016478540345658759, "loss": 0.7384, "step": 3482 }, { "epoch": 1.1183175469577782, "grad_norm": 0.9255183935165405, "learning_rate": 0.00016475880891295716, "loss": 0.7321, "step": 3483 }, { "epoch": 1.1186386257826297, "grad_norm": 1.0068658590316772, "learning_rate": 0.0001647322064786623, "loss": 0.6394, "step": 3484 }, { "epoch": 1.118959704607481, "grad_norm": 1.1133503913879395, "learning_rate": 0.00016470559615694446, "loss": 0.713, "step": 3485 }, { "epoch": 1.1192807834323326, "grad_norm": 0.8424758911132812, "learning_rate": 0.000164678977951046, "loss": 0.692, "step": 3486 }, { "epoch": 1.1196018622571842, "grad_norm": 1.4674954414367676, "learning_rate": 0.0001646523518642102, "loss": 0.7578, "step": 3487 }, { "epoch": 1.1199229410820357, "grad_norm": 1.0364352464675903, "learning_rate": 0.00016462571789968152, "loss": 0.6651, "step": 3488 }, { "epoch": 1.120244019906887, "grad_norm": 0.8905995488166809, "learning_rate": 0.0001645990760607051, "loss": 0.5398, "step": 3489 }, { "epoch": 1.1205650987317386, "grad_norm": 0.9402886629104614, "learning_rate": 0.00016457242635052724, "loss": 0.6423, "step": 3490 }, { "epoch": 1.1208861775565901, "grad_norm": 1.093013882637024, "learning_rate": 0.00016454576877239507, "loss": 0.629, "step": 3491 }, { "epoch": 1.1212072563814417, "grad_norm": 1.6441494226455688, "learning_rate": 0.00016451910332955679, "loss": 0.8024, "step": 3492 }, { "epoch": 1.1215283352062932, "grad_norm": 1.1009982824325562, "learning_rate": 0.00016449243002526144, "loss": 0.6887, "step": 3493 }, { "epoch": 1.1218494140311446, "grad_norm": 0.9255393743515015, "learning_rate": 0.00016446574886275913, "loss": 0.6147, "step": 3494 }, { "epoch": 1.1221704928559961, "grad_norm": 1.203102469444275, "learning_rate": 0.0001644390598453009, "loss": 0.7035, "step": 3495 }, { "epoch": 1.1224915716808477, "grad_norm": 1.5952414274215698, "learning_rate": 0.00016441236297613866, "loss": 0.753, "step": 3496 }, { "epoch": 1.1228126505056992, "grad_norm": 1.0285307168960571, "learning_rate": 0.0001643856582585254, "loss": 0.6581, "step": 3497 }, { "epoch": 1.1231337293305506, "grad_norm": 1.1580029726028442, "learning_rate": 0.00016435894569571496, "loss": 0.75, "step": 3498 }, { "epoch": 1.123454808155402, "grad_norm": 2.223442792892456, "learning_rate": 0.0001643322252909622, "loss": 0.751, "step": 3499 }, { "epoch": 1.1237758869802537, "grad_norm": 1.224037528038025, "learning_rate": 0.00016430549704752294, "loss": 0.6832, "step": 3500 }, { "epoch": 1.1240969658051052, "grad_norm": 0.9943028092384338, "learning_rate": 0.00016427876096865394, "loss": 0.639, "step": 3501 }, { "epoch": 1.1244180446299565, "grad_norm": 1.723598599433899, "learning_rate": 0.00016425201705761288, "loss": 0.7863, "step": 3502 }, { "epoch": 1.124739123454808, "grad_norm": 0.7261873483657837, "learning_rate": 0.00016422526531765846, "loss": 0.53, "step": 3503 }, { "epoch": 1.1250602022796596, "grad_norm": 0.9602160453796387, "learning_rate": 0.00016419850575205024, "loss": 0.4901, "step": 3504 }, { "epoch": 1.1253812811045112, "grad_norm": 0.8210904002189636, "learning_rate": 0.00016417173836404887, "loss": 0.4663, "step": 3505 }, { "epoch": 1.1257023599293627, "grad_norm": 1.026781439781189, "learning_rate": 0.00016414496315691581, "loss": 0.6553, "step": 3506 }, { "epoch": 1.126023438754214, "grad_norm": 0.9842190146446228, "learning_rate": 0.00016411818013391355, "loss": 0.4576, "step": 3507 }, { "epoch": 1.1263445175790656, "grad_norm": 0.8347097635269165, "learning_rate": 0.00016409138929830553, "loss": 0.478, "step": 3508 }, { "epoch": 1.1266655964039172, "grad_norm": 1.0178107023239136, "learning_rate": 0.00016406459065335615, "loss": 0.621, "step": 3509 }, { "epoch": 1.1269866752287687, "grad_norm": 0.8920689225196838, "learning_rate": 0.00016403778420233075, "loss": 0.4913, "step": 3510 }, { "epoch": 1.12730775405362, "grad_norm": 0.9431245923042297, "learning_rate": 0.00016401096994849557, "loss": 0.5671, "step": 3511 }, { "epoch": 1.1276288328784716, "grad_norm": 1.142845869064331, "learning_rate": 0.00016398414789511786, "loss": 0.6351, "step": 3512 }, { "epoch": 1.1279499117033231, "grad_norm": 0.4318004250526428, "learning_rate": 0.0001639573180454658, "loss": 0.3718, "step": 3513 }, { "epoch": 1.1282709905281747, "grad_norm": 0.7192524671554565, "learning_rate": 0.00016393048040280855, "loss": 0.411, "step": 3514 }, { "epoch": 1.1285920693530263, "grad_norm": 0.7344934344291687, "learning_rate": 0.00016390363497041622, "loss": 0.7704, "step": 3515 }, { "epoch": 1.1289131481778776, "grad_norm": 0.746320366859436, "learning_rate": 0.00016387678175155978, "loss": 1.1028, "step": 3516 }, { "epoch": 1.1292342270027291, "grad_norm": 0.9115157127380371, "learning_rate": 0.00016384992074951123, "loss": 1.1808, "step": 3517 }, { "epoch": 1.1295553058275807, "grad_norm": 1.0850014686584473, "learning_rate": 0.00016382305196754356, "loss": 0.6813, "step": 3518 }, { "epoch": 1.1298763846524322, "grad_norm": 1.0016835927963257, "learning_rate": 0.00016379617540893056, "loss": 0.5984, "step": 3519 }, { "epoch": 1.1301974634772836, "grad_norm": 0.8297609090805054, "learning_rate": 0.0001637692910769471, "loss": 0.3999, "step": 3520 }, { "epoch": 1.130518542302135, "grad_norm": 0.8285101056098938, "learning_rate": 0.000163742398974869, "loss": 0.4791, "step": 3521 }, { "epoch": 1.1308396211269867, "grad_norm": 0.9352186918258667, "learning_rate": 0.00016371549910597287, "loss": 0.5488, "step": 3522 }, { "epoch": 1.1311606999518382, "grad_norm": 1.118675947189331, "learning_rate": 0.0001636885914735365, "loss": 0.7107, "step": 3523 }, { "epoch": 1.1314817787766898, "grad_norm": 0.8197215795516968, "learning_rate": 0.00016366167608083843, "loss": 0.6317, "step": 3524 }, { "epoch": 1.131802857601541, "grad_norm": 0.8849331736564636, "learning_rate": 0.00016363475293115824, "loss": 0.7617, "step": 3525 }, { "epoch": 1.1321239364263926, "grad_norm": 1.0144376754760742, "learning_rate": 0.0001636078220277764, "loss": 0.7364, "step": 3526 }, { "epoch": 1.1324450152512442, "grad_norm": 0.8314408659934998, "learning_rate": 0.00016358088337397442, "loss": 0.759, "step": 3527 }, { "epoch": 1.1327660940760957, "grad_norm": 0.8000319004058838, "learning_rate": 0.00016355393697303465, "loss": 0.6728, "step": 3528 }, { "epoch": 1.133087172900947, "grad_norm": 0.9634177684783936, "learning_rate": 0.00016352698282824044, "loss": 0.7623, "step": 3529 }, { "epoch": 1.1334082517257986, "grad_norm": 0.7649347186088562, "learning_rate": 0.00016350002094287609, "loss": 0.5196, "step": 3530 }, { "epoch": 1.1337293305506502, "grad_norm": 0.8428080081939697, "learning_rate": 0.00016347305132022677, "loss": 0.6598, "step": 3531 }, { "epoch": 1.1340504093755017, "grad_norm": 0.9351022243499756, "learning_rate": 0.0001634460739635787, "loss": 0.667, "step": 3532 }, { "epoch": 1.1343714882003533, "grad_norm": 0.854694128036499, "learning_rate": 0.00016341908887621895, "loss": 0.7199, "step": 3533 }, { "epoch": 1.1346925670252046, "grad_norm": 0.8774638175964355, "learning_rate": 0.00016339209606143563, "loss": 0.4986, "step": 3534 }, { "epoch": 1.1350136458500562, "grad_norm": 0.8318768739700317, "learning_rate": 0.00016336509552251766, "loss": 0.7203, "step": 3535 }, { "epoch": 1.1353347246749077, "grad_norm": 1.0216394662857056, "learning_rate": 0.000163338087262755, "loss": 0.7023, "step": 3536 }, { "epoch": 1.1356558034997593, "grad_norm": 1.4008989334106445, "learning_rate": 0.00016331107128543857, "loss": 0.8295, "step": 3537 }, { "epoch": 1.1359768823246106, "grad_norm": 0.8423093557357788, "learning_rate": 0.00016328404759386014, "loss": 0.6274, "step": 3538 }, { "epoch": 1.1362979611494621, "grad_norm": 1.1864322423934937, "learning_rate": 0.00016325701619131246, "loss": 0.6257, "step": 3539 }, { "epoch": 1.1366190399743137, "grad_norm": 1.03271484375, "learning_rate": 0.00016322997708108922, "loss": 0.712, "step": 3540 }, { "epoch": 1.1369401187991652, "grad_norm": 0.9949981570243835, "learning_rate": 0.0001632029302664851, "loss": 0.7167, "step": 3541 }, { "epoch": 1.1372611976240168, "grad_norm": 0.9992634057998657, "learning_rate": 0.00016317587575079563, "loss": 0.6267, "step": 3542 }, { "epoch": 1.1375822764488681, "grad_norm": 0.9867181181907654, "learning_rate": 0.00016314881353731732, "loss": 0.6953, "step": 3543 }, { "epoch": 1.1379033552737197, "grad_norm": 1.025295615196228, "learning_rate": 0.00016312174362934765, "loss": 0.8544, "step": 3544 }, { "epoch": 1.1382244340985712, "grad_norm": 0.9153071641921997, "learning_rate": 0.00016309466603018496, "loss": 0.6237, "step": 3545 }, { "epoch": 1.1385455129234228, "grad_norm": 0.8474112749099731, "learning_rate": 0.00016306758074312864, "loss": 0.6815, "step": 3546 }, { "epoch": 1.138866591748274, "grad_norm": 0.9534016847610474, "learning_rate": 0.0001630404877714789, "loss": 0.7636, "step": 3547 }, { "epoch": 1.1391876705731256, "grad_norm": 1.3331577777862549, "learning_rate": 0.00016301338711853693, "loss": 0.8063, "step": 3548 }, { "epoch": 1.1395087493979772, "grad_norm": 1.318224549293518, "learning_rate": 0.00016298627878760487, "loss": 0.7616, "step": 3549 }, { "epoch": 1.1398298282228287, "grad_norm": 1.084550142288208, "learning_rate": 0.00016295916278198584, "loss": 0.7024, "step": 3550 }, { "epoch": 1.1401509070476803, "grad_norm": 1.946920394897461, "learning_rate": 0.00016293203910498376, "loss": 0.8216, "step": 3551 }, { "epoch": 1.1404719858725316, "grad_norm": 0.9112229943275452, "learning_rate": 0.0001629049077599036, "loss": 0.59, "step": 3552 }, { "epoch": 1.1407930646973832, "grad_norm": 0.9161878824234009, "learning_rate": 0.0001628777687500513, "loss": 0.4761, "step": 3553 }, { "epoch": 1.1411141435222347, "grad_norm": 0.8824045658111572, "learning_rate": 0.00016285062207873355, "loss": 0.6317, "step": 3554 }, { "epoch": 1.1414352223470863, "grad_norm": 0.897005021572113, "learning_rate": 0.00016282346774925817, "loss": 0.5112, "step": 3555 }, { "epoch": 1.1417563011719376, "grad_norm": 0.7413773536682129, "learning_rate": 0.00016279630576493382, "loss": 0.5975, "step": 3556 }, { "epoch": 1.1420773799967892, "grad_norm": 0.8787861466407776, "learning_rate": 0.00016276913612907007, "loss": 0.508, "step": 3557 }, { "epoch": 1.1423984588216407, "grad_norm": 1.0276175737380981, "learning_rate": 0.0001627419588449775, "loss": 0.6004, "step": 3558 }, { "epoch": 1.1427195376464923, "grad_norm": 1.1329381465911865, "learning_rate": 0.00016271477391596753, "loss": 0.5258, "step": 3559 }, { "epoch": 1.1430406164713438, "grad_norm": 0.9113026857376099, "learning_rate": 0.0001626875813453526, "loss": 0.5028, "step": 3560 }, { "epoch": 1.1433616952961951, "grad_norm": 1.3290276527404785, "learning_rate": 0.00016266038113644607, "loss": 0.6505, "step": 3561 }, { "epoch": 1.1436827741210467, "grad_norm": 0.9289746284484863, "learning_rate": 0.00016263317329256213, "loss": 0.449, "step": 3562 }, { "epoch": 1.1440038529458982, "grad_norm": 1.1701115369796753, "learning_rate": 0.00016260595781701604, "loss": 0.6017, "step": 3563 }, { "epoch": 1.1443249317707498, "grad_norm": 1.0579955577850342, "learning_rate": 0.0001625787347131239, "loss": 0.3807, "step": 3564 }, { "epoch": 1.1446460105956011, "grad_norm": 0.7256002426147461, "learning_rate": 0.00016255150398420271, "loss": 0.6831, "step": 3565 }, { "epoch": 1.1449670894204527, "grad_norm": 0.8824403882026672, "learning_rate": 0.00016252426563357055, "loss": 0.8835, "step": 3566 }, { "epoch": 1.1452881682453042, "grad_norm": 1.1135727167129517, "learning_rate": 0.00016249701966454625, "loss": 0.4474, "step": 3567 }, { "epoch": 1.1456092470701558, "grad_norm": 1.1596729755401611, "learning_rate": 0.0001624697660804497, "loss": 0.4549, "step": 3568 }, { "epoch": 1.1459303258950073, "grad_norm": 1.1575850248336792, "learning_rate": 0.00016244250488460158, "loss": 0.3866, "step": 3569 }, { "epoch": 1.1462514047198586, "grad_norm": 1.0404236316680908, "learning_rate": 0.00016241523608032373, "loss": 0.7547, "step": 3570 }, { "epoch": 1.1465724835447102, "grad_norm": 1.020094633102417, "learning_rate": 0.00016238795967093864, "loss": 0.752, "step": 3571 }, { "epoch": 1.1468935623695617, "grad_norm": 1.119242787361145, "learning_rate": 0.00016236067565976992, "loss": 0.755, "step": 3572 }, { "epoch": 1.1472146411944133, "grad_norm": 0.830894947052002, "learning_rate": 0.00016233338405014202, "loss": 0.639, "step": 3573 }, { "epoch": 1.1475357200192646, "grad_norm": 0.7884930968284607, "learning_rate": 0.00016230608484538034, "loss": 0.546, "step": 3574 }, { "epoch": 1.1478567988441162, "grad_norm": 1.13563072681427, "learning_rate": 0.00016227877804881127, "loss": 0.7434, "step": 3575 }, { "epoch": 1.1481778776689677, "grad_norm": 0.8976322412490845, "learning_rate": 0.00016225146366376198, "loss": 0.5961, "step": 3576 }, { "epoch": 1.1484989564938193, "grad_norm": 0.8841371536254883, "learning_rate": 0.00016222414169356065, "loss": 0.5675, "step": 3577 }, { "epoch": 1.1488200353186708, "grad_norm": 1.0855551958084106, "learning_rate": 0.00016219681214153643, "loss": 0.8424, "step": 3578 }, { "epoch": 1.1491411141435222, "grad_norm": 1.0877726078033447, "learning_rate": 0.0001621694750110193, "loss": 0.8544, "step": 3579 }, { "epoch": 1.1494621929683737, "grad_norm": 0.8400183320045471, "learning_rate": 0.0001621421303053402, "loss": 0.6311, "step": 3580 }, { "epoch": 1.1497832717932253, "grad_norm": 0.8696182370185852, "learning_rate": 0.00016211477802783103, "loss": 0.5355, "step": 3581 }, { "epoch": 1.1501043506180768, "grad_norm": 1.010580062866211, "learning_rate": 0.0001620874181818246, "loss": 0.7799, "step": 3582 }, { "epoch": 1.1504254294429281, "grad_norm": 0.9656292200088501, "learning_rate": 0.00016206005077065458, "loss": 0.7286, "step": 3583 }, { "epoch": 1.1507465082677797, "grad_norm": 1.5450822114944458, "learning_rate": 0.00016203267579765563, "loss": 0.791, "step": 3584 }, { "epoch": 1.1510675870926312, "grad_norm": 0.836439847946167, "learning_rate": 0.00016200529326616328, "loss": 0.6037, "step": 3585 }, { "epoch": 1.1513886659174828, "grad_norm": 1.2512813806533813, "learning_rate": 0.00016197790317951403, "loss": 0.5621, "step": 3586 }, { "epoch": 1.1517097447423343, "grad_norm": 1.2249475717544556, "learning_rate": 0.00016195050554104528, "loss": 0.7086, "step": 3587 }, { "epoch": 1.1520308235671857, "grad_norm": 1.0801141262054443, "learning_rate": 0.00016192310035409536, "loss": 0.7006, "step": 3588 }, { "epoch": 1.1523519023920372, "grad_norm": 1.20231294631958, "learning_rate": 0.00016189568762200348, "loss": 0.6001, "step": 3589 }, { "epoch": 1.1526729812168888, "grad_norm": 1.2221317291259766, "learning_rate": 0.00016186826734810979, "loss": 0.8367, "step": 3590 }, { "epoch": 1.1529940600417403, "grad_norm": 0.8664124011993408, "learning_rate": 0.0001618408395357554, "loss": 0.646, "step": 3591 }, { "epoch": 1.1533151388665916, "grad_norm": 1.5447945594787598, "learning_rate": 0.00016181340418828233, "loss": 0.7546, "step": 3592 }, { "epoch": 1.1536362176914432, "grad_norm": 0.9267856478691101, "learning_rate": 0.00016178596130903344, "loss": 0.7593, "step": 3593 }, { "epoch": 1.1539572965162948, "grad_norm": 1.0048587322235107, "learning_rate": 0.0001617585109013526, "loss": 0.6133, "step": 3594 }, { "epoch": 1.1542783753411463, "grad_norm": 1.1687082052230835, "learning_rate": 0.00016173105296858452, "loss": 0.5192, "step": 3595 }, { "epoch": 1.1545994541659979, "grad_norm": 0.8441829085350037, "learning_rate": 0.00016170358751407487, "loss": 0.6674, "step": 3596 }, { "epoch": 1.1549205329908492, "grad_norm": 0.8441125154495239, "learning_rate": 0.00016167611454117025, "loss": 0.5651, "step": 3597 }, { "epoch": 1.1552416118157007, "grad_norm": 0.8785204291343689, "learning_rate": 0.0001616486340532182, "loss": 0.6461, "step": 3598 }, { "epoch": 1.1555626906405523, "grad_norm": 0.8196426033973694, "learning_rate": 0.00016162114605356703, "loss": 0.6182, "step": 3599 }, { "epoch": 1.1558837694654038, "grad_norm": 1.4259721040725708, "learning_rate": 0.0001615936505455662, "loss": 0.7115, "step": 3600 }, { "epoch": 1.1562048482902552, "grad_norm": 1.1216281652450562, "learning_rate": 0.0001615661475325658, "loss": 0.5116, "step": 3601 }, { "epoch": 1.1565259271151067, "grad_norm": 0.9022599458694458, "learning_rate": 0.00016153863701791717, "loss": 0.5675, "step": 3602 }, { "epoch": 1.1568470059399583, "grad_norm": 1.1778627634048462, "learning_rate": 0.00016151111900497225, "loss": 0.6868, "step": 3603 }, { "epoch": 1.1571680847648098, "grad_norm": 1.2097725868225098, "learning_rate": 0.00016148359349708402, "loss": 0.6071, "step": 3604 }, { "epoch": 1.1574891635896614, "grad_norm": 1.0817495584487915, "learning_rate": 0.00016145606049760644, "loss": 0.871, "step": 3605 }, { "epoch": 1.1578102424145127, "grad_norm": 1.1181918382644653, "learning_rate": 0.00016142852000989433, "loss": 0.6375, "step": 3606 }, { "epoch": 1.1581313212393642, "grad_norm": 1.0407376289367676, "learning_rate": 0.00016140097203730337, "loss": 0.6119, "step": 3607 }, { "epoch": 1.1584524000642158, "grad_norm": 0.9020042419433594, "learning_rate": 0.00016137341658319023, "loss": 0.5714, "step": 3608 }, { "epoch": 1.1587734788890673, "grad_norm": 1.4991035461425781, "learning_rate": 0.00016134585365091243, "loss": 0.8278, "step": 3609 }, { "epoch": 1.1590945577139187, "grad_norm": 1.3282997608184814, "learning_rate": 0.00016131828324382846, "loss": 0.5376, "step": 3610 }, { "epoch": 1.1594156365387702, "grad_norm": 3.6863720417022705, "learning_rate": 0.00016129070536529766, "loss": 0.5749, "step": 3611 }, { "epoch": 1.1597367153636218, "grad_norm": 0.7200700640678406, "learning_rate": 0.00016126312001868033, "loss": 0.4114, "step": 3612 }, { "epoch": 1.1600577941884733, "grad_norm": 0.7452567219734192, "learning_rate": 0.00016123552720733765, "loss": 0.5016, "step": 3613 }, { "epoch": 1.1603788730133249, "grad_norm": 0.543973982334137, "learning_rate": 0.00016120792693463174, "loss": 0.4231, "step": 3614 }, { "epoch": 1.1606999518381762, "grad_norm": 0.718130886554718, "learning_rate": 0.00016118031920392558, "loss": 0.6174, "step": 3615 }, { "epoch": 1.1610210306630278, "grad_norm": 0.9145506024360657, "learning_rate": 0.00016115270401858314, "loss": 0.9531, "step": 3616 }, { "epoch": 1.1613421094878793, "grad_norm": 0.9525867700576782, "learning_rate": 0.00016112508138196917, "loss": 0.6047, "step": 3617 }, { "epoch": 1.1616631883127309, "grad_norm": 0.9359449744224548, "learning_rate": 0.00016109745129744946, "loss": 0.4499, "step": 3618 }, { "epoch": 1.1619842671375822, "grad_norm": 0.8167291283607483, "learning_rate": 0.00016106981376839066, "loss": 0.4337, "step": 3619 }, { "epoch": 1.1623053459624337, "grad_norm": 1.0462777614593506, "learning_rate": 0.00016104216879816026, "loss": 0.5782, "step": 3620 }, { "epoch": 1.1626264247872853, "grad_norm": 0.8575799465179443, "learning_rate": 0.0001610145163901268, "loss": 0.7717, "step": 3621 }, { "epoch": 1.1629475036121368, "grad_norm": 0.911267876625061, "learning_rate": 0.00016098685654765955, "loss": 0.786, "step": 3622 }, { "epoch": 1.1632685824369884, "grad_norm": 0.5476121306419373, "learning_rate": 0.0001609591892741288, "loss": 0.4105, "step": 3623 }, { "epoch": 1.1635896612618397, "grad_norm": 1.0486372709274292, "learning_rate": 0.0001609315145729058, "loss": 0.7131, "step": 3624 }, { "epoch": 1.1639107400866913, "grad_norm": 0.9732680320739746, "learning_rate": 0.00016090383244736256, "loss": 0.6468, "step": 3625 }, { "epoch": 1.1642318189115428, "grad_norm": 0.9579371213912964, "learning_rate": 0.00016087614290087208, "loss": 0.651, "step": 3626 }, { "epoch": 1.1645528977363944, "grad_norm": 0.8852554559707642, "learning_rate": 0.0001608484459368082, "loss": 0.734, "step": 3627 }, { "epoch": 1.1648739765612457, "grad_norm": 1.0034711360931396, "learning_rate": 0.00016082074155854582, "loss": 0.6897, "step": 3628 }, { "epoch": 1.1651950553860972, "grad_norm": 1.2633333206176758, "learning_rate": 0.00016079302976946055, "loss": 0.5851, "step": 3629 }, { "epoch": 1.1655161342109488, "grad_norm": 0.9554821848869324, "learning_rate": 0.000160765310572929, "loss": 0.6042, "step": 3630 }, { "epoch": 1.1658372130358003, "grad_norm": 1.3010461330413818, "learning_rate": 0.00016073758397232868, "loss": 0.7993, "step": 3631 }, { "epoch": 1.166158291860652, "grad_norm": 2.045257091522217, "learning_rate": 0.000160709849971038, "loss": 0.8856, "step": 3632 }, { "epoch": 1.1664793706855032, "grad_norm": 1.0188322067260742, "learning_rate": 0.00016068210857243624, "loss": 0.7296, "step": 3633 }, { "epoch": 1.1668004495103548, "grad_norm": 0.7480521202087402, "learning_rate": 0.0001606543597799036, "loss": 0.4887, "step": 3634 }, { "epoch": 1.1671215283352063, "grad_norm": 1.0140467882156372, "learning_rate": 0.00016062660359682124, "loss": 0.8066, "step": 3635 }, { "epoch": 1.1674426071600579, "grad_norm": 1.1531801223754883, "learning_rate": 0.0001605988400265711, "loss": 0.7233, "step": 3636 }, { "epoch": 1.1677636859849092, "grad_norm": 0.8829801678657532, "learning_rate": 0.00016057106907253616, "loss": 0.6, "step": 3637 }, { "epoch": 1.1680847648097608, "grad_norm": 1.2912678718566895, "learning_rate": 0.00016054329073810015, "loss": 0.6598, "step": 3638 }, { "epoch": 1.1684058436346123, "grad_norm": 2.6909449100494385, "learning_rate": 0.0001605155050266478, "loss": 0.8908, "step": 3639 }, { "epoch": 1.1687269224594639, "grad_norm": 1.1661655902862549, "learning_rate": 0.00016048771194156477, "loss": 0.8248, "step": 3640 }, { "epoch": 1.1690480012843154, "grad_norm": 0.8896608948707581, "learning_rate": 0.0001604599114862375, "loss": 0.6519, "step": 3641 }, { "epoch": 1.1693690801091667, "grad_norm": 1.0213063955307007, "learning_rate": 0.0001604321036640534, "loss": 0.598, "step": 3642 }, { "epoch": 1.1696901589340183, "grad_norm": 1.0079537630081177, "learning_rate": 0.00016040428847840079, "loss": 0.5429, "step": 3643 }, { "epoch": 1.1700112377588698, "grad_norm": 1.1655068397521973, "learning_rate": 0.00016037646593266883, "loss": 0.6184, "step": 3644 }, { "epoch": 1.1703323165837214, "grad_norm": 1.1363699436187744, "learning_rate": 0.00016034863603024767, "loss": 0.6852, "step": 3645 }, { "epoch": 1.1706533954085727, "grad_norm": 2.0418121814727783, "learning_rate": 0.00016032079877452825, "loss": 0.6902, "step": 3646 }, { "epoch": 1.1709744742334243, "grad_norm": 0.8260238170623779, "learning_rate": 0.00016029295416890248, "loss": 0.5518, "step": 3647 }, { "epoch": 1.1712955530582758, "grad_norm": 0.9947139620780945, "learning_rate": 0.00016026510221676311, "loss": 0.6918, "step": 3648 }, { "epoch": 1.1716166318831274, "grad_norm": 1.0895559787750244, "learning_rate": 0.00016023724292150385, "loss": 0.8454, "step": 3649 }, { "epoch": 1.171937710707979, "grad_norm": 1.3528664112091064, "learning_rate": 0.00016020937628651927, "loss": 0.6834, "step": 3650 }, { "epoch": 1.1722587895328302, "grad_norm": 1.0099211931228638, "learning_rate": 0.00016018150231520486, "loss": 0.5097, "step": 3651 }, { "epoch": 1.1725798683576818, "grad_norm": 1.2328240871429443, "learning_rate": 0.0001601536210109569, "loss": 0.6222, "step": 3652 }, { "epoch": 1.1729009471825333, "grad_norm": 0.7499989867210388, "learning_rate": 0.0001601257323771727, "loss": 0.4985, "step": 3653 }, { "epoch": 1.173222026007385, "grad_norm": 1.5503602027893066, "learning_rate": 0.00016009783641725034, "loss": 0.7534, "step": 3654 }, { "epoch": 1.1735431048322362, "grad_norm": 1.1560394763946533, "learning_rate": 0.00016006993313458896, "loss": 0.5315, "step": 3655 }, { "epoch": 1.1738641836570878, "grad_norm": 1.0473573207855225, "learning_rate": 0.00016004202253258842, "loss": 0.7029, "step": 3656 }, { "epoch": 1.1741852624819393, "grad_norm": 0.7268290519714355, "learning_rate": 0.00016001410461464956, "loss": 0.3893, "step": 3657 }, { "epoch": 1.1745063413067909, "grad_norm": 0.8399918079376221, "learning_rate": 0.0001599861793841741, "loss": 0.6149, "step": 3658 }, { "epoch": 1.1748274201316424, "grad_norm": 0.9458096027374268, "learning_rate": 0.00015995824684456465, "loss": 0.6191, "step": 3659 }, { "epoch": 1.1751484989564938, "grad_norm": 0.9515113234519958, "learning_rate": 0.00015993030699922468, "loss": 0.6762, "step": 3660 }, { "epoch": 1.1754695777813453, "grad_norm": 0.9861279726028442, "learning_rate": 0.0001599023598515586, "loss": 0.4427, "step": 3661 }, { "epoch": 1.1757906566061969, "grad_norm": 0.9347848892211914, "learning_rate": 0.00015987440540497167, "loss": 0.4939, "step": 3662 }, { "epoch": 1.1761117354310484, "grad_norm": 0.8265044093132019, "learning_rate": 0.00015984644366287006, "loss": 0.525, "step": 3663 }, { "epoch": 1.1764328142558997, "grad_norm": 1.6481767892837524, "learning_rate": 0.00015981847462866083, "loss": 0.4591, "step": 3664 }, { "epoch": 1.1767538930807513, "grad_norm": 0.738932728767395, "learning_rate": 0.0001597904983057519, "loss": 0.5946, "step": 3665 }, { "epoch": 1.1770749719056028, "grad_norm": 0.8453285694122314, "learning_rate": 0.00015976251469755214, "loss": 1.0439, "step": 3666 }, { "epoch": 1.1773960507304544, "grad_norm": 1.1691863536834717, "learning_rate": 0.00015973452380747122, "loss": 0.7068, "step": 3667 }, { "epoch": 1.177717129555306, "grad_norm": 0.9889507293701172, "learning_rate": 0.00015970652563891978, "loss": 0.6272, "step": 3668 }, { "epoch": 1.1780382083801573, "grad_norm": 0.9118053317070007, "learning_rate": 0.00015967852019530926, "loss": 0.5458, "step": 3669 }, { "epoch": 1.1783592872050088, "grad_norm": 0.9585251808166504, "learning_rate": 0.00015965050748005215, "loss": 0.3483, "step": 3670 }, { "epoch": 1.1786803660298604, "grad_norm": 0.7852297425270081, "learning_rate": 0.0001596224874965616, "loss": 0.3958, "step": 3671 }, { "epoch": 1.179001444854712, "grad_norm": 1.148110270500183, "learning_rate": 0.0001595944602482518, "loss": 0.7532, "step": 3672 }, { "epoch": 1.1793225236795633, "grad_norm": 1.0729384422302246, "learning_rate": 0.0001595664257385378, "loss": 0.7822, "step": 3673 }, { "epoch": 1.1796436025044148, "grad_norm": 1.0979061126708984, "learning_rate": 0.00015953838397083552, "loss": 0.8229, "step": 3674 }, { "epoch": 1.1799646813292664, "grad_norm": 0.9034024477005005, "learning_rate": 0.00015951033494856175, "loss": 0.6591, "step": 3675 }, { "epoch": 1.180285760154118, "grad_norm": 0.832550048828125, "learning_rate": 0.00015948227867513415, "loss": 0.6285, "step": 3676 }, { "epoch": 1.1806068389789695, "grad_norm": 0.8613409399986267, "learning_rate": 0.00015945421515397133, "loss": 0.5975, "step": 3677 }, { "epoch": 1.1809279178038208, "grad_norm": 0.762203574180603, "learning_rate": 0.00015942614438849275, "loss": 0.5549, "step": 3678 }, { "epoch": 1.1812489966286723, "grad_norm": 0.8697448968887329, "learning_rate": 0.0001593980663821187, "loss": 0.7776, "step": 3679 }, { "epoch": 1.1815700754535239, "grad_norm": 0.8074071407318115, "learning_rate": 0.00015936998113827048, "loss": 0.6768, "step": 3680 }, { "epoch": 1.1818911542783754, "grad_norm": 0.7951146364212036, "learning_rate": 0.00015934188866037016, "loss": 0.6422, "step": 3681 }, { "epoch": 1.1822122331032268, "grad_norm": 0.7514532804489136, "learning_rate": 0.0001593137889518407, "loss": 0.5805, "step": 3682 }, { "epoch": 1.1825333119280783, "grad_norm": 0.7481774687767029, "learning_rate": 0.00015928568201610595, "loss": 0.5634, "step": 3683 }, { "epoch": 1.1828543907529299, "grad_norm": 1.1757893562316895, "learning_rate": 0.00015925756785659069, "loss": 0.8346, "step": 3684 }, { "epoch": 1.1831754695777814, "grad_norm": 1.0502216815948486, "learning_rate": 0.00015922944647672052, "loss": 0.6632, "step": 3685 }, { "epoch": 1.183496548402633, "grad_norm": 0.7584315538406372, "learning_rate": 0.00015920131787992197, "loss": 0.5628, "step": 3686 }, { "epoch": 1.1838176272274843, "grad_norm": 0.8802182674407959, "learning_rate": 0.0001591731820696224, "loss": 0.6358, "step": 3687 }, { "epoch": 1.1841387060523358, "grad_norm": 0.9516767263412476, "learning_rate": 0.0001591450390492501, "loss": 0.6534, "step": 3688 }, { "epoch": 1.1844597848771874, "grad_norm": 0.8391969799995422, "learning_rate": 0.0001591168888222342, "loss": 0.6248, "step": 3689 }, { "epoch": 1.184780863702039, "grad_norm": 1.1127198934555054, "learning_rate": 0.00015908873139200473, "loss": 0.6813, "step": 3690 }, { "epoch": 1.1851019425268903, "grad_norm": 1.2551233768463135, "learning_rate": 0.00015906056676199255, "loss": 0.577, "step": 3691 }, { "epoch": 1.1854230213517418, "grad_norm": 1.1656824350357056, "learning_rate": 0.00015903239493562948, "loss": 0.8192, "step": 3692 }, { "epoch": 1.1857441001765934, "grad_norm": 0.7559661269187927, "learning_rate": 0.00015900421591634814, "loss": 0.5647, "step": 3693 }, { "epoch": 1.186065179001445, "grad_norm": 0.8689810037612915, "learning_rate": 0.00015897602970758206, "loss": 0.6108, "step": 3694 }, { "epoch": 1.1863862578262965, "grad_norm": 1.167493462562561, "learning_rate": 0.00015894783631276567, "loss": 0.6969, "step": 3695 }, { "epoch": 1.1867073366511478, "grad_norm": 1.027427077293396, "learning_rate": 0.0001589196357353342, "loss": 0.6776, "step": 3696 }, { "epoch": 1.1870284154759994, "grad_norm": 1.0420182943344116, "learning_rate": 0.00015889142797872387, "loss": 0.606, "step": 3697 }, { "epoch": 1.187349494300851, "grad_norm": 0.8472828269004822, "learning_rate": 0.0001588632130463717, "loss": 0.5962, "step": 3698 }, { "epoch": 1.1876705731257025, "grad_norm": 0.9795169830322266, "learning_rate": 0.00015883499094171554, "loss": 0.59, "step": 3699 }, { "epoch": 1.1879916519505538, "grad_norm": 1.1912764310836792, "learning_rate": 0.0001588067616681942, "loss": 0.7445, "step": 3700 }, { "epoch": 1.1883127307754053, "grad_norm": 1.2817906141281128, "learning_rate": 0.00015877852522924732, "loss": 0.447, "step": 3701 }, { "epoch": 1.1886338096002569, "grad_norm": 1.1116948127746582, "learning_rate": 0.00015875028162831546, "loss": 0.7283, "step": 3702 }, { "epoch": 1.1889548884251084, "grad_norm": 1.298180341720581, "learning_rate": 0.00015872203086883996, "loss": 0.6075, "step": 3703 }, { "epoch": 1.18927596724996, "grad_norm": 1.0509965419769287, "learning_rate": 0.00015869377295426316, "loss": 0.5656, "step": 3704 }, { "epoch": 1.1895970460748113, "grad_norm": 1.1119554042816162, "learning_rate": 0.00015866550788802813, "loss": 0.5898, "step": 3705 }, { "epoch": 1.1899181248996629, "grad_norm": 1.1349931955337524, "learning_rate": 0.00015863723567357892, "loss": 0.7535, "step": 3706 }, { "epoch": 1.1902392037245144, "grad_norm": 1.1311873197555542, "learning_rate": 0.00015860895631436043, "loss": 0.6378, "step": 3707 }, { "epoch": 1.1905602825493657, "grad_norm": 1.291495680809021, "learning_rate": 0.0001585806698138184, "loss": 0.6692, "step": 3708 }, { "epoch": 1.1908813613742173, "grad_norm": 0.741792619228363, "learning_rate": 0.0001585523761753994, "loss": 0.3739, "step": 3709 }, { "epoch": 1.1912024401990688, "grad_norm": 1.0837442874908447, "learning_rate": 0.00015852407540255104, "loss": 0.5723, "step": 3710 }, { "epoch": 1.1915235190239204, "grad_norm": 0.8327803015708923, "learning_rate": 0.00015849576749872157, "loss": 0.565, "step": 3711 }, { "epoch": 1.191844597848772, "grad_norm": 0.62799072265625, "learning_rate": 0.00015846745246736026, "loss": 0.3977, "step": 3712 }, { "epoch": 1.1921656766736235, "grad_norm": 0.8305913209915161, "learning_rate": 0.00015843913031191723, "loss": 0.4562, "step": 3713 }, { "epoch": 1.1924867554984748, "grad_norm": 0.7770129442214966, "learning_rate": 0.00015841080103584342, "loss": 0.4003, "step": 3714 }, { "epoch": 1.1928078343233264, "grad_norm": 0.9295758605003357, "learning_rate": 0.0001583824646425907, "loss": 0.803, "step": 3715 }, { "epoch": 1.193128913148178, "grad_norm": 0.8279706239700317, "learning_rate": 0.00015835412113561175, "loss": 0.3903, "step": 3716 }, { "epoch": 1.1934499919730293, "grad_norm": 0.8809469938278198, "learning_rate": 0.00015832577051836015, "loss": 0.771, "step": 3717 }, { "epoch": 1.1937710707978808, "grad_norm": 1.3099849224090576, "learning_rate": 0.00015829741279429035, "loss": 0.7353, "step": 3718 }, { "epoch": 1.1940921496227324, "grad_norm": 0.8764384388923645, "learning_rate": 0.00015826904796685762, "loss": 0.5436, "step": 3719 }, { "epoch": 1.194413228447584, "grad_norm": 0.8666921257972717, "learning_rate": 0.00015824067603951812, "loss": 0.323, "step": 3720 }, { "epoch": 1.1947343072724355, "grad_norm": 0.9187259674072266, "learning_rate": 0.00015821229701572896, "loss": 0.5078, "step": 3721 }, { "epoch": 1.1950553860972868, "grad_norm": 0.9329090118408203, "learning_rate": 0.00015818391089894796, "loss": 0.4697, "step": 3722 }, { "epoch": 1.1953764649221383, "grad_norm": 0.9224849939346313, "learning_rate": 0.00015815551769263387, "loss": 0.6722, "step": 3723 }, { "epoch": 1.1956975437469899, "grad_norm": 0.9811240434646606, "learning_rate": 0.0001581271174002464, "loss": 0.7558, "step": 3724 }, { "epoch": 1.1960186225718414, "grad_norm": 0.8810230493545532, "learning_rate": 0.000158098710025246, "loss": 0.6703, "step": 3725 }, { "epoch": 1.1963397013966928, "grad_norm": 0.852672815322876, "learning_rate": 0.00015807029557109398, "loss": 0.6907, "step": 3726 }, { "epoch": 1.1966607802215443, "grad_norm": 0.9756046533584595, "learning_rate": 0.0001580418740412526, "loss": 0.7458, "step": 3727 }, { "epoch": 1.1969818590463959, "grad_norm": 0.7994744181632996, "learning_rate": 0.00015801344543918495, "loss": 0.572, "step": 3728 }, { "epoch": 1.1973029378712474, "grad_norm": 0.8432713747024536, "learning_rate": 0.00015798500976835493, "loss": 0.567, "step": 3729 }, { "epoch": 1.197624016696099, "grad_norm": 0.8624154925346375, "learning_rate": 0.00015795656703222736, "loss": 0.5077, "step": 3730 }, { "epoch": 1.1979450955209503, "grad_norm": 1.059381127357483, "learning_rate": 0.0001579281172342679, "loss": 0.6805, "step": 3731 }, { "epoch": 1.1982661743458018, "grad_norm": 1.291218638420105, "learning_rate": 0.00015789966037794306, "loss": 0.816, "step": 3732 }, { "epoch": 1.1985872531706534, "grad_norm": 1.0364362001419067, "learning_rate": 0.00015787119646672025, "loss": 0.7035, "step": 3733 }, { "epoch": 1.198908331995505, "grad_norm": 0.8102630972862244, "learning_rate": 0.00015784272550406765, "loss": 0.4892, "step": 3734 }, { "epoch": 1.1992294108203563, "grad_norm": 0.8558127284049988, "learning_rate": 0.00015781424749345446, "loss": 0.6971, "step": 3735 }, { "epoch": 1.1995504896452078, "grad_norm": 0.9906354546546936, "learning_rate": 0.00015778576243835054, "loss": 0.5872, "step": 3736 }, { "epoch": 1.1998715684700594, "grad_norm": 1.178027629852295, "learning_rate": 0.00015775727034222675, "loss": 0.7676, "step": 3737 }, { "epoch": 1.200192647294911, "grad_norm": 0.6666367053985596, "learning_rate": 0.0001577287712085548, "loss": 0.4359, "step": 3738 }, { "epoch": 1.2005137261197625, "grad_norm": 0.7421877980232239, "learning_rate": 0.00015770026504080718, "loss": 0.582, "step": 3739 }, { "epoch": 1.2008348049446138, "grad_norm": 1.3498107194900513, "learning_rate": 0.00015767175184245726, "loss": 0.656, "step": 3740 }, { "epoch": 1.2011558837694654, "grad_norm": 0.896021842956543, "learning_rate": 0.00015764323161697935, "loss": 0.4453, "step": 3741 }, { "epoch": 1.201476962594317, "grad_norm": 0.9143558740615845, "learning_rate": 0.00015761470436784846, "loss": 0.4917, "step": 3742 }, { "epoch": 1.2017980414191685, "grad_norm": 1.4460296630859375, "learning_rate": 0.0001575861700985407, "loss": 0.8317, "step": 3743 }, { "epoch": 1.2021191202440198, "grad_norm": 1.7859466075897217, "learning_rate": 0.0001575576288125327, "loss": 0.6328, "step": 3744 }, { "epoch": 1.2024401990688713, "grad_norm": 0.773352324962616, "learning_rate": 0.00015752908051330227, "loss": 0.4811, "step": 3745 }, { "epoch": 1.202761277893723, "grad_norm": 0.9597437381744385, "learning_rate": 0.00015750052520432787, "loss": 0.6322, "step": 3746 }, { "epoch": 1.2030823567185744, "grad_norm": 1.0898431539535522, "learning_rate": 0.00015747196288908887, "loss": 0.6503, "step": 3747 }, { "epoch": 1.203403435543426, "grad_norm": 1.4335774183273315, "learning_rate": 0.00015744339357106558, "loss": 0.502, "step": 3748 }, { "epoch": 1.2037245143682773, "grad_norm": 1.5681631565093994, "learning_rate": 0.000157414817253739, "loss": 0.7586, "step": 3749 }, { "epoch": 1.2040455931931289, "grad_norm": 1.2081352472305298, "learning_rate": 0.00015738623394059107, "loss": 0.743, "step": 3750 }, { "epoch": 1.2043666720179804, "grad_norm": 0.9064257740974426, "learning_rate": 0.0001573576436351046, "loss": 0.6028, "step": 3751 }, { "epoch": 1.204687750842832, "grad_norm": 0.9491926431655884, "learning_rate": 0.00015732904634076328, "loss": 0.5614, "step": 3752 }, { "epoch": 1.2050088296676833, "grad_norm": 1.0897586345672607, "learning_rate": 0.00015730044206105156, "loss": 0.5848, "step": 3753 }, { "epoch": 1.2053299084925349, "grad_norm": 0.8406479358673096, "learning_rate": 0.00015727183079945476, "loss": 0.4931, "step": 3754 }, { "epoch": 1.2056509873173864, "grad_norm": 0.9714513421058655, "learning_rate": 0.0001572432125594591, "loss": 0.4901, "step": 3755 }, { "epoch": 1.205972066142238, "grad_norm": 1.135326623916626, "learning_rate": 0.00015721458734455163, "loss": 0.6236, "step": 3756 }, { "epoch": 1.2062931449670895, "grad_norm": 2.5608773231506348, "learning_rate": 0.00015718595515822027, "loss": 0.5221, "step": 3757 }, { "epoch": 1.2066142237919408, "grad_norm": 0.9237856864929199, "learning_rate": 0.0001571573160039537, "loss": 0.5451, "step": 3758 }, { "epoch": 1.2069353026167924, "grad_norm": 1.0552841424942017, "learning_rate": 0.00015712866988524155, "loss": 0.5602, "step": 3759 }, { "epoch": 1.207256381441644, "grad_norm": 1.123316764831543, "learning_rate": 0.0001571000168055743, "loss": 0.6103, "step": 3760 }, { "epoch": 1.2075774602664955, "grad_norm": 0.7353083491325378, "learning_rate": 0.0001570713567684432, "loss": 0.4778, "step": 3761 }, { "epoch": 1.2078985390913468, "grad_norm": 1.4208050966262817, "learning_rate": 0.00015704268977734037, "loss": 0.529, "step": 3762 }, { "epoch": 1.2082196179161984, "grad_norm": 1.0792194604873657, "learning_rate": 0.00015701401583575884, "loss": 0.5751, "step": 3763 }, { "epoch": 1.20854069674105, "grad_norm": 0.7297836542129517, "learning_rate": 0.00015698533494719238, "loss": 0.4981, "step": 3764 }, { "epoch": 1.2088617755659015, "grad_norm": 0.9594284892082214, "learning_rate": 0.00015695664711513576, "loss": 0.9049, "step": 3765 }, { "epoch": 1.209182854390753, "grad_norm": 0.8230898976325989, "learning_rate": 0.00015692795234308445, "loss": 0.964, "step": 3766 }, { "epoch": 1.2095039332156043, "grad_norm": 0.7228332757949829, "learning_rate": 0.0001568992506345348, "loss": 1.1967, "step": 3767 }, { "epoch": 1.209825012040456, "grad_norm": 0.8351823687553406, "learning_rate": 0.00015687054199298408, "loss": 0.5122, "step": 3768 }, { "epoch": 1.2101460908653074, "grad_norm": 1.0156910419464111, "learning_rate": 0.0001568418264219303, "loss": 0.4224, "step": 3769 }, { "epoch": 1.210467169690159, "grad_norm": 0.9522204995155334, "learning_rate": 0.0001568131039248724, "loss": 0.4744, "step": 3770 }, { "epoch": 1.2107882485150103, "grad_norm": 0.835364818572998, "learning_rate": 0.00015678437450531013, "loss": 0.3386, "step": 3771 }, { "epoch": 1.2111093273398619, "grad_norm": 1.0940889120101929, "learning_rate": 0.00015675563816674407, "loss": 0.4991, "step": 3772 }, { "epoch": 1.2114304061647134, "grad_norm": 0.7849900722503662, "learning_rate": 0.00015672689491267567, "loss": 0.3609, "step": 3773 }, { "epoch": 1.211751484989565, "grad_norm": 1.0691943168640137, "learning_rate": 0.00015669814474660718, "loss": 0.8524, "step": 3774 }, { "epoch": 1.2120725638144165, "grad_norm": 2.4347715377807617, "learning_rate": 0.0001566693876720417, "loss": 0.6352, "step": 3775 }, { "epoch": 1.2123936426392679, "grad_norm": 0.8152544498443604, "learning_rate": 0.00015664062369248328, "loss": 0.5616, "step": 3776 }, { "epoch": 1.2127147214641194, "grad_norm": 0.7303499579429626, "learning_rate": 0.00015661185281143667, "loss": 0.546, "step": 3777 }, { "epoch": 1.213035800288971, "grad_norm": 0.9726660251617432, "learning_rate": 0.0001565830750324075, "loss": 0.5368, "step": 3778 }, { "epoch": 1.2133568791138225, "grad_norm": 0.8433571457862854, "learning_rate": 0.0001565542903589023, "loss": 0.5876, "step": 3779 }, { "epoch": 1.2136779579386738, "grad_norm": 0.7719855308532715, "learning_rate": 0.00015652549879442834, "loss": 0.6917, "step": 3780 }, { "epoch": 1.2139990367635254, "grad_norm": 0.931829571723938, "learning_rate": 0.0001564967003424938, "loss": 0.7677, "step": 3781 }, { "epoch": 1.214320115588377, "grad_norm": 1.2775667905807495, "learning_rate": 0.00015646789500660773, "loss": 0.8883, "step": 3782 }, { "epoch": 1.2146411944132285, "grad_norm": 0.9605757594108582, "learning_rate": 0.00015643908279027992, "loss": 0.6806, "step": 3783 }, { "epoch": 1.21496227323808, "grad_norm": 0.9084275960922241, "learning_rate": 0.00015641026369702106, "loss": 0.6906, "step": 3784 }, { "epoch": 1.2152833520629314, "grad_norm": 0.9735769629478455, "learning_rate": 0.00015638143773034267, "loss": 0.6756, "step": 3785 }, { "epoch": 1.215604430887783, "grad_norm": 1.068384051322937, "learning_rate": 0.00015635260489375714, "loss": 0.7305, "step": 3786 }, { "epoch": 1.2159255097126345, "grad_norm": 0.9031590223312378, "learning_rate": 0.00015632376519077767, "loss": 0.6657, "step": 3787 }, { "epoch": 1.216246588537486, "grad_norm": 1.1685518026351929, "learning_rate": 0.0001562949186249182, "loss": 0.8324, "step": 3788 }, { "epoch": 1.2165676673623373, "grad_norm": 0.9389743804931641, "learning_rate": 0.0001562660651996937, "loss": 0.6689, "step": 3789 }, { "epoch": 1.216888746187189, "grad_norm": 0.8513121008872986, "learning_rate": 0.0001562372049186198, "loss": 0.5915, "step": 3790 }, { "epoch": 1.2172098250120404, "grad_norm": 1.3301939964294434, "learning_rate": 0.00015620833778521307, "loss": 0.5882, "step": 3791 }, { "epoch": 1.217530903836892, "grad_norm": 1.1245044469833374, "learning_rate": 0.00015617946380299088, "loss": 0.6076, "step": 3792 }, { "epoch": 1.2178519826617435, "grad_norm": 0.8256964087486267, "learning_rate": 0.00015615058297547145, "loss": 0.5895, "step": 3793 }, { "epoch": 1.2181730614865949, "grad_norm": 1.246702790260315, "learning_rate": 0.0001561216953061738, "loss": 0.6992, "step": 3794 }, { "epoch": 1.2184941403114464, "grad_norm": 0.8872213959693909, "learning_rate": 0.0001560928007986178, "loss": 0.547, "step": 3795 }, { "epoch": 1.218815219136298, "grad_norm": 1.465101718902588, "learning_rate": 0.0001560638994563242, "loss": 0.879, "step": 3796 }, { "epoch": 1.2191362979611495, "grad_norm": 2.137561321258545, "learning_rate": 0.00015603499128281448, "loss": 0.5223, "step": 3797 }, { "epoch": 1.2194573767860009, "grad_norm": 1.957757830619812, "learning_rate": 0.00015600607628161103, "loss": 0.5569, "step": 3798 }, { "epoch": 1.2197784556108524, "grad_norm": 0.9652926921844482, "learning_rate": 0.00015597715445623712, "loss": 0.6782, "step": 3799 }, { "epoch": 1.220099534435704, "grad_norm": 1.3139662742614746, "learning_rate": 0.0001559482258102167, "loss": 0.5627, "step": 3800 }, { "epoch": 1.2204206132605555, "grad_norm": 1.0370103120803833, "learning_rate": 0.0001559192903470747, "loss": 0.5761, "step": 3801 }, { "epoch": 1.220741692085407, "grad_norm": 1.4218648672103882, "learning_rate": 0.00015589034807033677, "loss": 0.6787, "step": 3802 }, { "epoch": 1.2210627709102584, "grad_norm": 1.046571969985962, "learning_rate": 0.00015586139898352946, "loss": 0.5636, "step": 3803 }, { "epoch": 1.22138384973511, "grad_norm": 0.8225265145301819, "learning_rate": 0.00015583244309018014, "loss": 0.5093, "step": 3804 }, { "epoch": 1.2217049285599615, "grad_norm": 0.8345786929130554, "learning_rate": 0.000155803480393817, "loss": 0.6215, "step": 3805 }, { "epoch": 1.222026007384813, "grad_norm": 1.0300137996673584, "learning_rate": 0.00015577451089796905, "loss": 0.5036, "step": 3806 }, { "epoch": 1.2223470862096644, "grad_norm": 1.6953532695770264, "learning_rate": 0.00015574553460616608, "loss": 0.5542, "step": 3807 }, { "epoch": 1.222668165034516, "grad_norm": 1.2669715881347656, "learning_rate": 0.00015571655152193885, "loss": 0.5519, "step": 3808 }, { "epoch": 1.2229892438593675, "grad_norm": 0.9114670753479004, "learning_rate": 0.00015568756164881882, "loss": 0.5563, "step": 3809 }, { "epoch": 1.223310322684219, "grad_norm": 1.259053349494934, "learning_rate": 0.00015565856499033832, "loss": 0.5622, "step": 3810 }, { "epoch": 1.2236314015090706, "grad_norm": 1.13980233669281, "learning_rate": 0.0001556295615500305, "loss": 0.528, "step": 3811 }, { "epoch": 1.223952480333922, "grad_norm": 1.1629769802093506, "learning_rate": 0.0001556005513314293, "loss": 0.389, "step": 3812 }, { "epoch": 1.2242735591587735, "grad_norm": 1.1004542112350464, "learning_rate": 0.00015557153433806966, "loss": 0.43, "step": 3813 }, { "epoch": 1.224594637983625, "grad_norm": 1.0557912588119507, "learning_rate": 0.00015554251057348713, "loss": 0.5082, "step": 3814 }, { "epoch": 1.2249157168084766, "grad_norm": 0.8889212012290955, "learning_rate": 0.0001555134800412181, "loss": 0.9576, "step": 3815 }, { "epoch": 1.2252367956333279, "grad_norm": 0.7801089882850647, "learning_rate": 0.00015548444274479995, "loss": 1.078, "step": 3816 }, { "epoch": 1.2255578744581794, "grad_norm": 0.8572964668273926, "learning_rate": 0.00015545539868777074, "loss": 0.5412, "step": 3817 }, { "epoch": 1.225878953283031, "grad_norm": 1.0506659746170044, "learning_rate": 0.00015542634787366942, "loss": 0.5478, "step": 3818 }, { "epoch": 1.2262000321078825, "grad_norm": 0.9632019996643066, "learning_rate": 0.00015539729030603574, "loss": 0.4899, "step": 3819 }, { "epoch": 1.226521110932734, "grad_norm": 0.9268298149108887, "learning_rate": 0.00015536822598841024, "loss": 0.2964, "step": 3820 }, { "epoch": 1.2268421897575854, "grad_norm": 0.7900720834732056, "learning_rate": 0.00015533915492433443, "loss": 0.4777, "step": 3821 }, { "epoch": 1.227163268582437, "grad_norm": 0.9437267780303955, "learning_rate": 0.0001553100771173504, "loss": 0.3384, "step": 3822 }, { "epoch": 1.2274843474072885, "grad_norm": 0.995335578918457, "learning_rate": 0.00015528099257100127, "loss": 0.5097, "step": 3823 }, { "epoch": 1.22780542623214, "grad_norm": 1.3662070035934448, "learning_rate": 0.00015525190128883083, "loss": 0.7474, "step": 3824 }, { "epoch": 1.2281265050569914, "grad_norm": 1.1693106889724731, "learning_rate": 0.00015522280327438388, "loss": 0.6907, "step": 3825 }, { "epoch": 1.228447583881843, "grad_norm": 0.9598641395568848, "learning_rate": 0.0001551936985312058, "loss": 0.651, "step": 3826 }, { "epoch": 1.2287686627066945, "grad_norm": 0.9100416898727417, "learning_rate": 0.00015516458706284303, "loss": 0.6725, "step": 3827 }, { "epoch": 1.229089741531546, "grad_norm": 0.8291442394256592, "learning_rate": 0.00015513546887284264, "loss": 0.621, "step": 3828 }, { "epoch": 1.2294108203563976, "grad_norm": 0.8551065325737, "learning_rate": 0.0001551063439647526, "loss": 0.593, "step": 3829 }, { "epoch": 1.229731899181249, "grad_norm": 0.6962476968765259, "learning_rate": 0.00015507721234212172, "loss": 0.6304, "step": 3830 }, { "epoch": 1.2300529780061005, "grad_norm": 0.8959445357322693, "learning_rate": 0.00015504807400849958, "loss": 0.7921, "step": 3831 }, { "epoch": 1.230374056830952, "grad_norm": 0.88429856300354, "learning_rate": 0.0001550189289674366, "loss": 0.6881, "step": 3832 }, { "epoch": 1.2306951356558036, "grad_norm": 1.0589004755020142, "learning_rate": 0.000154989777222484, "loss": 0.6597, "step": 3833 }, { "epoch": 1.231016214480655, "grad_norm": 0.932532548904419, "learning_rate": 0.00015496061877719384, "loss": 0.6299, "step": 3834 }, { "epoch": 1.2313372933055065, "grad_norm": 1.763346552848816, "learning_rate": 0.000154931453635119, "loss": 0.6717, "step": 3835 }, { "epoch": 1.231658372130358, "grad_norm": 2.0481505393981934, "learning_rate": 0.0001549022817998132, "loss": 0.6489, "step": 3836 }, { "epoch": 1.2319794509552096, "grad_norm": 0.8745352029800415, "learning_rate": 0.00015487310327483086, "loss": 0.6171, "step": 3837 }, { "epoch": 1.232300529780061, "grad_norm": 0.8354154825210571, "learning_rate": 0.00015484391806372733, "loss": 0.669, "step": 3838 }, { "epoch": 1.2326216086049124, "grad_norm": 0.9111486673355103, "learning_rate": 0.00015481472617005876, "loss": 0.7414, "step": 3839 }, { "epoch": 1.232942687429764, "grad_norm": 1.0252684354782104, "learning_rate": 0.00015478552759738207, "loss": 0.724, "step": 3840 }, { "epoch": 1.2332637662546155, "grad_norm": 1.018760323524475, "learning_rate": 0.00015475632234925504, "loss": 0.5888, "step": 3841 }, { "epoch": 1.233584845079467, "grad_norm": 1.5923256874084473, "learning_rate": 0.00015472711042923621, "loss": 0.6708, "step": 3842 }, { "epoch": 1.2339059239043184, "grad_norm": 1.3504046201705933, "learning_rate": 0.00015469789184088497, "loss": 0.6927, "step": 3843 }, { "epoch": 1.23422700272917, "grad_norm": 0.9088150262832642, "learning_rate": 0.00015466866658776155, "loss": 0.6003, "step": 3844 }, { "epoch": 1.2345480815540215, "grad_norm": 0.9223833680152893, "learning_rate": 0.00015463943467342693, "loss": 0.7096, "step": 3845 }, { "epoch": 1.234869160378873, "grad_norm": 0.9848785400390625, "learning_rate": 0.0001546101961014429, "loss": 0.6357, "step": 3846 }, { "epoch": 1.2351902392037246, "grad_norm": 0.9770264029502869, "learning_rate": 0.00015458095087537218, "loss": 0.5513, "step": 3847 }, { "epoch": 1.235511318028576, "grad_norm": 0.8450601696968079, "learning_rate": 0.00015455169899877813, "loss": 0.571, "step": 3848 }, { "epoch": 1.2358323968534275, "grad_norm": 0.8427115678787231, "learning_rate": 0.00015452244047522502, "loss": 0.637, "step": 3849 }, { "epoch": 1.236153475678279, "grad_norm": 0.7641344666481018, "learning_rate": 0.00015449317530827794, "loss": 0.564, "step": 3850 }, { "epoch": 1.2364745545031306, "grad_norm": 1.1168718338012695, "learning_rate": 0.00015446390350150273, "loss": 0.8165, "step": 3851 }, { "epoch": 1.236795633327982, "grad_norm": 1.0445823669433594, "learning_rate": 0.0001544346250584661, "loss": 0.5067, "step": 3852 }, { "epoch": 1.2371167121528335, "grad_norm": 0.9036487340927124, "learning_rate": 0.00015440533998273547, "loss": 0.5537, "step": 3853 }, { "epoch": 1.237437790977685, "grad_norm": 0.7309536933898926, "learning_rate": 0.00015437604827787927, "loss": 0.4998, "step": 3854 }, { "epoch": 1.2377588698025366, "grad_norm": 0.7521160840988159, "learning_rate": 0.0001543467499474665, "loss": 0.4568, "step": 3855 }, { "epoch": 1.2380799486273881, "grad_norm": 1.4212712049484253, "learning_rate": 0.00015431744499506706, "loss": 0.6532, "step": 3856 }, { "epoch": 1.2384010274522395, "grad_norm": 0.758643627166748, "learning_rate": 0.00015428813342425177, "loss": 0.436, "step": 3857 }, { "epoch": 1.238722106277091, "grad_norm": 0.8695588707923889, "learning_rate": 0.00015425881523859207, "loss": 0.4969, "step": 3858 }, { "epoch": 1.2390431851019426, "grad_norm": 1.0201971530914307, "learning_rate": 0.0001542294904416603, "loss": 0.5688, "step": 3859 }, { "epoch": 1.239364263926794, "grad_norm": 0.7259702682495117, "learning_rate": 0.00015420015903702962, "loss": 0.4379, "step": 3860 }, { "epoch": 1.2396853427516454, "grad_norm": 1.1150838136672974, "learning_rate": 0.000154170821028274, "loss": 0.6326, "step": 3861 }, { "epoch": 1.240006421576497, "grad_norm": 0.9569064974784851, "learning_rate": 0.00015414147641896813, "loss": 0.4673, "step": 3862 }, { "epoch": 1.2403275004013485, "grad_norm": 0.6631338000297546, "learning_rate": 0.00015411212521268758, "loss": 0.4138, "step": 3863 }, { "epoch": 1.2406485792262, "grad_norm": 1.1545629501342773, "learning_rate": 0.00015408276741300873, "loss": 0.4498, "step": 3864 }, { "epoch": 1.2409696580510516, "grad_norm": 3.294052839279175, "learning_rate": 0.00015405340302350871, "loss": 0.7942, "step": 3865 }, { "epoch": 1.241290736875903, "grad_norm": 0.7048175930976868, "learning_rate": 0.0001540240320477655, "loss": 0.727, "step": 3866 }, { "epoch": 1.2416118157007545, "grad_norm": 0.6912996172904968, "learning_rate": 0.00015399465448935788, "loss": 0.7886, "step": 3867 }, { "epoch": 1.241932894525606, "grad_norm": 0.8170081377029419, "learning_rate": 0.00015396527035186537, "loss": 0.4395, "step": 3868 }, { "epoch": 1.2422539733504576, "grad_norm": 0.9392108917236328, "learning_rate": 0.00015393587963886835, "loss": 0.3322, "step": 3869 }, { "epoch": 1.242575052175309, "grad_norm": 1.0908229351043701, "learning_rate": 0.00015390648235394803, "loss": 0.3907, "step": 3870 }, { "epoch": 1.2428961310001605, "grad_norm": 1.2291988134384155, "learning_rate": 0.0001538770785006863, "loss": 0.5189, "step": 3871 }, { "epoch": 1.243217209825012, "grad_norm": 0.9118694067001343, "learning_rate": 0.00015384766808266602, "loss": 0.702, "step": 3872 }, { "epoch": 1.2435382886498636, "grad_norm": 1.1265658140182495, "learning_rate": 0.00015381825110347074, "loss": 0.6291, "step": 3873 }, { "epoch": 1.2438593674747151, "grad_norm": 0.9491468667984009, "learning_rate": 0.00015378882756668478, "loss": 0.7169, "step": 3874 }, { "epoch": 1.2441804462995665, "grad_norm": 0.8333759307861328, "learning_rate": 0.00015375939747589335, "loss": 0.5432, "step": 3875 }, { "epoch": 1.244501525124418, "grad_norm": 0.7121776342391968, "learning_rate": 0.0001537299608346824, "loss": 0.4806, "step": 3876 }, { "epoch": 1.2448226039492696, "grad_norm": 0.9570683836936951, "learning_rate": 0.0001537005176466387, "loss": 0.6031, "step": 3877 }, { "epoch": 1.2451436827741211, "grad_norm": 0.8433529734611511, "learning_rate": 0.00015367106791534983, "loss": 0.7327, "step": 3878 }, { "epoch": 1.2454647615989725, "grad_norm": 0.8592220544815063, "learning_rate": 0.0001536416116444041, "loss": 0.5449, "step": 3879 }, { "epoch": 1.245785840423824, "grad_norm": 0.9075251221656799, "learning_rate": 0.00015361214883739076, "loss": 0.6982, "step": 3880 }, { "epoch": 1.2461069192486756, "grad_norm": 1.0835230350494385, "learning_rate": 0.00015358267949789966, "loss": 0.6304, "step": 3881 }, { "epoch": 1.246427998073527, "grad_norm": 0.9230034947395325, "learning_rate": 0.00015355320362952162, "loss": 0.631, "step": 3882 }, { "epoch": 1.2467490768983787, "grad_norm": 1.1106213331222534, "learning_rate": 0.00015352372123584814, "loss": 0.7395, "step": 3883 }, { "epoch": 1.24707015572323, "grad_norm": 1.3075162172317505, "learning_rate": 0.00015349423232047162, "loss": 0.6999, "step": 3884 }, { "epoch": 1.2473912345480815, "grad_norm": 1.0641080141067505, "learning_rate": 0.00015346473688698513, "loss": 0.6968, "step": 3885 }, { "epoch": 1.247712313372933, "grad_norm": 0.8962423801422119, "learning_rate": 0.00015343523493898265, "loss": 0.5366, "step": 3886 }, { "epoch": 1.2480333921977846, "grad_norm": 1.0747464895248413, "learning_rate": 0.00015340572648005888, "loss": 0.5723, "step": 3887 }, { "epoch": 1.248354471022636, "grad_norm": 1.0071834325790405, "learning_rate": 0.0001533762115138093, "loss": 0.6652, "step": 3888 }, { "epoch": 1.2486755498474875, "grad_norm": 1.272632360458374, "learning_rate": 0.0001533466900438303, "loss": 0.7782, "step": 3889 }, { "epoch": 1.248996628672339, "grad_norm": 1.1216950416564941, "learning_rate": 0.00015331716207371888, "loss": 0.6813, "step": 3890 }, { "epoch": 1.2493177074971906, "grad_norm": 0.9694088697433472, "learning_rate": 0.000153287627607073, "loss": 0.5781, "step": 3891 }, { "epoch": 1.2496387863220422, "grad_norm": 1.916099190711975, "learning_rate": 0.00015325808664749135, "loss": 0.8175, "step": 3892 }, { "epoch": 1.2499598651468935, "grad_norm": 0.9841503500938416, "learning_rate": 0.0001532285391985734, "loss": 0.6193, "step": 3893 }, { "epoch": 1.250280943971745, "grad_norm": 1.1670957803726196, "learning_rate": 0.0001531989852639194, "loss": 0.7807, "step": 3894 }, { "epoch": 1.2506020227965966, "grad_norm": 1.1303762197494507, "learning_rate": 0.0001531694248471304, "loss": 0.7768, "step": 3895 }, { "epoch": 1.250923101621448, "grad_norm": 0.9170710444450378, "learning_rate": 0.00015313985795180828, "loss": 0.6192, "step": 3896 }, { "epoch": 1.2512441804462995, "grad_norm": 1.1706234216690063, "learning_rate": 0.00015311028458155567, "loss": 0.6049, "step": 3897 }, { "epoch": 1.251565259271151, "grad_norm": 1.188920021057129, "learning_rate": 0.00015308070473997598, "loss": 0.6907, "step": 3898 }, { "epoch": 1.2518863380960026, "grad_norm": 0.8956981301307678, "learning_rate": 0.0001530511184306734, "loss": 0.5518, "step": 3899 }, { "epoch": 1.2522074169208541, "grad_norm": 0.978665828704834, "learning_rate": 0.00015302152565725298, "loss": 0.5264, "step": 3900 }, { "epoch": 1.2525284957457057, "grad_norm": 0.822583794593811, "learning_rate": 0.0001529919264233205, "loss": 0.5521, "step": 3901 }, { "epoch": 1.252849574570557, "grad_norm": 0.7783530354499817, "learning_rate": 0.00015296232073248251, "loss": 0.4333, "step": 3902 }, { "epoch": 1.2531706533954086, "grad_norm": 0.7691320776939392, "learning_rate": 0.00015293270858834644, "loss": 0.4828, "step": 3903 }, { "epoch": 1.2534917322202601, "grad_norm": 1.0390291213989258, "learning_rate": 0.00015290308999452032, "loss": 0.7419, "step": 3904 }, { "epoch": 1.2538128110451114, "grad_norm": 0.8240483999252319, "learning_rate": 0.00015287346495461315, "loss": 0.5811, "step": 3905 }, { "epoch": 1.254133889869963, "grad_norm": 1.1478774547576904, "learning_rate": 0.00015284383347223472, "loss": 0.7649, "step": 3906 }, { "epoch": 1.2544549686948145, "grad_norm": 1.2211809158325195, "learning_rate": 0.00015281419555099546, "loss": 0.6535, "step": 3907 }, { "epoch": 1.254776047519666, "grad_norm": 1.166593074798584, "learning_rate": 0.00015278455119450664, "loss": 0.6659, "step": 3908 }, { "epoch": 1.2550971263445176, "grad_norm": 0.8463481068611145, "learning_rate": 0.00015275490040638038, "loss": 0.508, "step": 3909 }, { "epoch": 1.2554182051693692, "grad_norm": 1.5510456562042236, "learning_rate": 0.00015272524319022955, "loss": 0.3536, "step": 3910 }, { "epoch": 1.2557392839942205, "grad_norm": 0.9114112854003906, "learning_rate": 0.00015269557954966778, "loss": 0.5686, "step": 3911 }, { "epoch": 1.256060362819072, "grad_norm": 0.8683449029922485, "learning_rate": 0.00015266590948830947, "loss": 0.4499, "step": 3912 }, { "epoch": 1.2563814416439236, "grad_norm": 0.9325652122497559, "learning_rate": 0.00015263623300976978, "loss": 0.4684, "step": 3913 }, { "epoch": 1.256702520468775, "grad_norm": 0.4808101952075958, "learning_rate": 0.00015260655011766484, "loss": 0.3881, "step": 3914 }, { "epoch": 1.2570235992936265, "grad_norm": 0.9658359289169312, "learning_rate": 0.00015257686081561134, "loss": 0.9497, "step": 3915 }, { "epoch": 1.257344678118478, "grad_norm": 0.7216698527336121, "learning_rate": 0.00015254716510722677, "loss": 0.7033, "step": 3916 }, { "epoch": 1.2576657569433296, "grad_norm": 0.9119216799736023, "learning_rate": 0.0001525174629961296, "loss": 0.6078, "step": 3917 }, { "epoch": 1.2579868357681812, "grad_norm": 0.9573872685432434, "learning_rate": 0.00015248775448593882, "loss": 0.5487, "step": 3918 }, { "epoch": 1.2583079145930327, "grad_norm": 0.8556592464447021, "learning_rate": 0.00015245803958027434, "loss": 0.3048, "step": 3919 }, { "epoch": 1.258628993417884, "grad_norm": 0.7970437407493591, "learning_rate": 0.00015242831828275692, "loss": 0.5525, "step": 3920 }, { "epoch": 1.2589500722427356, "grad_norm": 1.1400879621505737, "learning_rate": 0.00015239859059700794, "loss": 0.7523, "step": 3921 }, { "epoch": 1.2592711510675871, "grad_norm": 1.1350511312484741, "learning_rate": 0.00015236885652664963, "loss": 0.6163, "step": 3922 }, { "epoch": 1.2595922298924385, "grad_norm": 1.0047470331192017, "learning_rate": 0.00015233911607530497, "loss": 0.5577, "step": 3923 }, { "epoch": 1.25991330871729, "grad_norm": 1.0919958353042603, "learning_rate": 0.0001523093692465978, "loss": 0.5031, "step": 3924 }, { "epoch": 1.2602343875421416, "grad_norm": 0.7224966883659363, "learning_rate": 0.00015227961604415266, "loss": 0.5719, "step": 3925 }, { "epoch": 1.2605554663669931, "grad_norm": 0.9928240776062012, "learning_rate": 0.0001522498564715949, "loss": 0.6879, "step": 3926 }, { "epoch": 1.2608765451918447, "grad_norm": 0.8768775463104248, "learning_rate": 0.0001522200905325506, "loss": 0.6634, "step": 3927 }, { "epoch": 1.2611976240166962, "grad_norm": 0.8131898641586304, "learning_rate": 0.00015219031823064666, "loss": 0.5647, "step": 3928 }, { "epoch": 1.2615187028415475, "grad_norm": 1.0961014032363892, "learning_rate": 0.0001521605395695108, "loss": 0.6579, "step": 3929 }, { "epoch": 1.261839781666399, "grad_norm": 0.9365900754928589, "learning_rate": 0.0001521307545527714, "loss": 0.6051, "step": 3930 }, { "epoch": 1.2621608604912506, "grad_norm": 1.2082931995391846, "learning_rate": 0.00015210096318405767, "loss": 0.7062, "step": 3931 }, { "epoch": 1.262481939316102, "grad_norm": 0.8848322033882141, "learning_rate": 0.0001520711654669996, "loss": 0.6358, "step": 3932 }, { "epoch": 1.2628030181409535, "grad_norm": 0.8963106274604797, "learning_rate": 0.00015204136140522799, "loss": 0.5787, "step": 3933 }, { "epoch": 1.263124096965805, "grad_norm": 1.1440925598144531, "learning_rate": 0.0001520115510023743, "loss": 0.6726, "step": 3934 }, { "epoch": 1.2634451757906566, "grad_norm": 1.4616795778274536, "learning_rate": 0.00015198173426207094, "loss": 0.8507, "step": 3935 }, { "epoch": 1.2637662546155082, "grad_norm": 1.1262779235839844, "learning_rate": 0.00015195191118795096, "loss": 0.6682, "step": 3936 }, { "epoch": 1.2640873334403597, "grad_norm": 0.878693699836731, "learning_rate": 0.00015192208178364816, "loss": 0.6938, "step": 3937 }, { "epoch": 1.264408412265211, "grad_norm": 1.1921688318252563, "learning_rate": 0.00015189224605279718, "loss": 0.6734, "step": 3938 }, { "epoch": 1.2647294910900626, "grad_norm": 1.2111103534698486, "learning_rate": 0.00015186240399903342, "loss": 0.6716, "step": 3939 }, { "epoch": 1.2650505699149142, "grad_norm": 1.1788506507873535, "learning_rate": 0.00015183255562599307, "loss": 0.6973, "step": 3940 }, { "epoch": 1.2653716487397655, "grad_norm": 1.2426005601882935, "learning_rate": 0.00015180270093731303, "loss": 0.5777, "step": 3941 }, { "epoch": 1.265692727564617, "grad_norm": 1.2214686870574951, "learning_rate": 0.000151772839936631, "loss": 0.8486, "step": 3942 }, { "epoch": 1.2660138063894686, "grad_norm": 0.9162314534187317, "learning_rate": 0.0001517429726275855, "loss": 0.4804, "step": 3943 }, { "epoch": 1.2663348852143201, "grad_norm": 1.108119249343872, "learning_rate": 0.00015171309901381572, "loss": 0.7479, "step": 3944 }, { "epoch": 1.2666559640391717, "grad_norm": 0.9174817204475403, "learning_rate": 0.00015168321909896172, "loss": 0.6093, "step": 3945 }, { "epoch": 1.2669770428640232, "grad_norm": 1.142285704612732, "learning_rate": 0.0001516533328866642, "loss": 0.6486, "step": 3946 }, { "epoch": 1.2672981216888746, "grad_norm": 1.0875240564346313, "learning_rate": 0.00015162344038056476, "loss": 0.7344, "step": 3947 }, { "epoch": 1.2676192005137261, "grad_norm": 0.7915586233139038, "learning_rate": 0.00015159354158430572, "loss": 0.5285, "step": 3948 }, { "epoch": 1.2679402793385777, "grad_norm": 1.0847499370574951, "learning_rate": 0.00015156363650153012, "loss": 0.5974, "step": 3949 }, { "epoch": 1.268261358163429, "grad_norm": 0.7492148876190186, "learning_rate": 0.00015153372513588182, "loss": 0.5093, "step": 3950 }, { "epoch": 1.2685824369882805, "grad_norm": 1.150233507156372, "learning_rate": 0.00015150380749100545, "loss": 0.6276, "step": 3951 }, { "epoch": 1.268903515813132, "grad_norm": 0.8965975642204285, "learning_rate": 0.00015147388357054632, "loss": 0.6621, "step": 3952 }, { "epoch": 1.2692245946379836, "grad_norm": 1.1043106317520142, "learning_rate": 0.00015144395337815064, "loss": 0.6958, "step": 3953 }, { "epoch": 1.2695456734628352, "grad_norm": 1.1139799356460571, "learning_rate": 0.0001514140169174653, "loss": 0.613, "step": 3954 }, { "epoch": 1.2698667522876868, "grad_norm": 1.0345196723937988, "learning_rate": 0.00015138407419213796, "loss": 0.5999, "step": 3955 }, { "epoch": 1.270187831112538, "grad_norm": 1.0387682914733887, "learning_rate": 0.00015135412520581702, "loss": 0.6301, "step": 3956 }, { "epoch": 1.2705089099373896, "grad_norm": 0.9722599387168884, "learning_rate": 0.0001513241699621517, "loss": 0.6064, "step": 3957 }, { "epoch": 1.2708299887622412, "grad_norm": 1.886247158050537, "learning_rate": 0.00015129420846479196, "loss": 0.7651, "step": 3958 }, { "epoch": 1.2711510675870925, "grad_norm": 1.071523666381836, "learning_rate": 0.0001512642407173885, "loss": 0.5445, "step": 3959 }, { "epoch": 1.271472146411944, "grad_norm": 0.7643309235572815, "learning_rate": 0.00015123426672359285, "loss": 0.3537, "step": 3960 }, { "epoch": 1.2717932252367956, "grad_norm": 1.0264235734939575, "learning_rate": 0.00015120428648705717, "loss": 0.6581, "step": 3961 }, { "epoch": 1.2721143040616472, "grad_norm": 1.0826537609100342, "learning_rate": 0.00015117430001143452, "loss": 0.6621, "step": 3962 }, { "epoch": 1.2724353828864987, "grad_norm": 0.5251657366752625, "learning_rate": 0.0001511443073003786, "loss": 0.3996, "step": 3963 }, { "epoch": 1.2727564617113503, "grad_norm": 0.6575037240982056, "learning_rate": 0.000151114308357544, "loss": 0.3963, "step": 3964 }, { "epoch": 1.2730775405362016, "grad_norm": 0.7141722440719604, "learning_rate": 0.000151084303186586, "loss": 0.6904, "step": 3965 }, { "epoch": 1.2733986193610531, "grad_norm": 0.8512976169586182, "learning_rate": 0.0001510542917911606, "loss": 0.999, "step": 3966 }, { "epoch": 1.2737196981859047, "grad_norm": 0.7313768863677979, "learning_rate": 0.0001510242741749246, "loss": 1.1832, "step": 3967 }, { "epoch": 1.274040777010756, "grad_norm": 0.7449411153793335, "learning_rate": 0.00015099425034153553, "loss": 0.6666, "step": 3968 }, { "epoch": 1.2743618558356076, "grad_norm": 0.7838384509086609, "learning_rate": 0.00015096422029465178, "loss": 0.4504, "step": 3969 }, { "epoch": 1.2746829346604591, "grad_norm": 0.6540324091911316, "learning_rate": 0.00015093418403793238, "loss": 0.3508, "step": 3970 }, { "epoch": 1.2750040134853107, "grad_norm": 0.8694151043891907, "learning_rate": 0.00015090414157503714, "loss": 0.3151, "step": 3971 }, { "epoch": 1.2753250923101622, "grad_norm": 1.0265659093856812, "learning_rate": 0.00015087409290962667, "loss": 0.3798, "step": 3972 }, { "epoch": 1.2756461711350138, "grad_norm": 0.7983294129371643, "learning_rate": 0.0001508440380453623, "loss": 0.7452, "step": 3973 }, { "epoch": 1.275967249959865, "grad_norm": 1.3019341230392456, "learning_rate": 0.0001508139769859061, "loss": 0.6089, "step": 3974 }, { "epoch": 1.2762883287847167, "grad_norm": 0.756151556968689, "learning_rate": 0.00015078390973492092, "loss": 0.6198, "step": 3975 }, { "epoch": 1.2766094076095682, "grad_norm": 0.797984778881073, "learning_rate": 0.00015075383629607042, "loss": 0.5604, "step": 3976 }, { "epoch": 1.2769304864344195, "grad_norm": 0.7493342161178589, "learning_rate": 0.00015072375667301893, "loss": 0.6128, "step": 3977 }, { "epoch": 1.277251565259271, "grad_norm": 0.8932711482048035, "learning_rate": 0.00015069367086943154, "loss": 0.5593, "step": 3978 }, { "epoch": 1.2775726440841226, "grad_norm": 0.9008491039276123, "learning_rate": 0.0001506635788889741, "loss": 0.4842, "step": 3979 }, { "epoch": 1.2778937229089742, "grad_norm": 0.8058199882507324, "learning_rate": 0.00015063348073531324, "loss": 0.6554, "step": 3980 }, { "epoch": 1.2782148017338257, "grad_norm": 1.0209277868270874, "learning_rate": 0.00015060337641211637, "loss": 0.5969, "step": 3981 }, { "epoch": 1.2785358805586773, "grad_norm": 1.4035515785217285, "learning_rate": 0.0001505732659230516, "loss": 0.7034, "step": 3982 }, { "epoch": 1.2788569593835286, "grad_norm": 0.963071882724762, "learning_rate": 0.0001505431492717878, "loss": 0.5722, "step": 3983 }, { "epoch": 1.2791780382083802, "grad_norm": 1.2736893892288208, "learning_rate": 0.0001505130264619945, "loss": 0.6295, "step": 3984 }, { "epoch": 1.2794991170332317, "grad_norm": 0.9955981373786926, "learning_rate": 0.0001504828974973422, "loss": 0.7137, "step": 3985 }, { "epoch": 1.279820195858083, "grad_norm": 1.3226053714752197, "learning_rate": 0.00015045276238150192, "loss": 0.7224, "step": 3986 }, { "epoch": 1.2801412746829346, "grad_norm": 1.3493560552597046, "learning_rate": 0.00015042262111814565, "loss": 0.932, "step": 3987 }, { "epoch": 1.2804623535077861, "grad_norm": 0.8732056617736816, "learning_rate": 0.00015039247371094588, "loss": 0.5749, "step": 3988 }, { "epoch": 1.2807834323326377, "grad_norm": 0.9701531529426575, "learning_rate": 0.0001503623201635761, "loss": 0.6856, "step": 3989 }, { "epoch": 1.2811045111574892, "grad_norm": 1.385581135749817, "learning_rate": 0.00015033216047971031, "loss": 0.7327, "step": 3990 }, { "epoch": 1.2814255899823408, "grad_norm": 0.7699181437492371, "learning_rate": 0.00015030199466302353, "loss": 0.5799, "step": 3991 }, { "epoch": 1.2817466688071921, "grad_norm": 1.5015044212341309, "learning_rate": 0.00015027182271719122, "loss": 0.8638, "step": 3992 }, { "epoch": 1.2820677476320437, "grad_norm": 0.9158933758735657, "learning_rate": 0.00015024164464588982, "loss": 0.7239, "step": 3993 }, { "epoch": 1.2823888264568952, "grad_norm": 1.1398961544036865, "learning_rate": 0.0001502114604527964, "loss": 0.7259, "step": 3994 }, { "epoch": 1.2827099052817466, "grad_norm": 0.9500359892845154, "learning_rate": 0.00015018127014158886, "loss": 0.5439, "step": 3995 }, { "epoch": 1.283030984106598, "grad_norm": 0.9452093243598938, "learning_rate": 0.00015015107371594573, "loss": 0.617, "step": 3996 }, { "epoch": 1.2833520629314497, "grad_norm": 0.881150484085083, "learning_rate": 0.00015012087117954642, "loss": 0.4604, "step": 3997 }, { "epoch": 1.2836731417563012, "grad_norm": 1.126312494277954, "learning_rate": 0.000150090662536071, "loss": 0.8014, "step": 3998 }, { "epoch": 1.2839942205811528, "grad_norm": 1.0232257843017578, "learning_rate": 0.0001500604477892003, "loss": 0.7495, "step": 3999 }, { "epoch": 1.2843152994060043, "grad_norm": 0.9484618902206421, "learning_rate": 0.00015003022694261585, "loss": 0.6574, "step": 4000 }, { "epoch": 1.2846363782308556, "grad_norm": 0.8673555850982666, "learning_rate": 0.00015000000000000001, "loss": 0.6955, "step": 4001 }, { "epoch": 1.2849574570557072, "grad_norm": 0.842471718788147, "learning_rate": 0.00014996976696503587, "loss": 0.5209, "step": 4002 }, { "epoch": 1.2852785358805587, "grad_norm": 0.6719858050346375, "learning_rate": 0.00014993952784140717, "loss": 0.4186, "step": 4003 }, { "epoch": 1.28559961470541, "grad_norm": 1.7661453485488892, "learning_rate": 0.00014990928263279848, "loss": 0.5979, "step": 4004 }, { "epoch": 1.2859206935302616, "grad_norm": 1.1179763078689575, "learning_rate": 0.00014987903134289508, "loss": 0.5886, "step": 4005 }, { "epoch": 1.2862417723551132, "grad_norm": 0.9625594615936279, "learning_rate": 0.00014984877397538303, "loss": 0.5743, "step": 4006 }, { "epoch": 1.2865628511799647, "grad_norm": 1.0250779390335083, "learning_rate": 0.0001498185105339491, "loss": 0.5355, "step": 4007 }, { "epoch": 1.2868839300048163, "grad_norm": 1.0908957719802856, "learning_rate": 0.00014978824102228076, "loss": 0.5967, "step": 4008 }, { "epoch": 1.2872050088296678, "grad_norm": 0.8703522086143494, "learning_rate": 0.00014975796544406625, "loss": 0.5883, "step": 4009 }, { "epoch": 1.2875260876545191, "grad_norm": 0.976186990737915, "learning_rate": 0.0001497276838029946, "loss": 0.5918, "step": 4010 }, { "epoch": 1.2878471664793707, "grad_norm": 1.2151916027069092, "learning_rate": 0.00014969739610275556, "loss": 0.4542, "step": 4011 }, { "epoch": 1.2881682453042222, "grad_norm": 0.8926727175712585, "learning_rate": 0.0001496671023470395, "loss": 0.5784, "step": 4012 }, { "epoch": 1.2884893241290736, "grad_norm": 1.0263932943344116, "learning_rate": 0.0001496368025395377, "loss": 0.5809, "step": 4013 }, { "epoch": 1.2888104029539251, "grad_norm": 1.3325320482254028, "learning_rate": 0.00014960649668394207, "loss": 0.4552, "step": 4014 }, { "epoch": 1.2891314817787767, "grad_norm": 1.0666171312332153, "learning_rate": 0.00014957618478394529, "loss": 1.0576, "step": 4015 }, { "epoch": 1.2894525606036282, "grad_norm": 0.7908462285995483, "learning_rate": 0.00014954586684324078, "loss": 1.0942, "step": 4016 }, { "epoch": 1.2897736394284798, "grad_norm": 0.7149842381477356, "learning_rate": 0.00014951554286552266, "loss": 0.8738, "step": 4017 }, { "epoch": 1.2900947182533313, "grad_norm": 0.8624144196510315, "learning_rate": 0.00014948521285448586, "loss": 0.9059, "step": 4018 }, { "epoch": 1.2904157970781827, "grad_norm": 0.8445031642913818, "learning_rate": 0.00014945487681382598, "loss": 0.6081, "step": 4019 }, { "epoch": 1.2907368759030342, "grad_norm": 0.8286342620849609, "learning_rate": 0.00014942453474723935, "loss": 0.4396, "step": 4020 }, { "epoch": 1.2910579547278858, "grad_norm": 0.7982895374298096, "learning_rate": 0.0001493941866584231, "loss": 0.3624, "step": 4021 }, { "epoch": 1.291379033552737, "grad_norm": 0.7270027995109558, "learning_rate": 0.00014936383255107505, "loss": 0.3002, "step": 4022 }, { "epoch": 1.2917001123775886, "grad_norm": 0.838603138923645, "learning_rate": 0.0001493334724288937, "loss": 0.7561, "step": 4023 }, { "epoch": 1.2920211912024402, "grad_norm": 1.2956212759017944, "learning_rate": 0.0001493031062955784, "loss": 0.6119, "step": 4024 }, { "epoch": 1.2923422700272917, "grad_norm": 0.8435970544815063, "learning_rate": 0.00014927273415482915, "loss": 0.5659, "step": 4025 }, { "epoch": 1.2926633488521433, "grad_norm": 0.8424569368362427, "learning_rate": 0.00014924235601034672, "loss": 0.5801, "step": 4026 }, { "epoch": 1.2929844276769948, "grad_norm": 1.0110530853271484, "learning_rate": 0.00014921197186583255, "loss": 0.6738, "step": 4027 }, { "epoch": 1.2933055065018462, "grad_norm": 0.7541561126708984, "learning_rate": 0.0001491815817249889, "loss": 0.5595, "step": 4028 }, { "epoch": 1.2936265853266977, "grad_norm": 0.9687687158584595, "learning_rate": 0.0001491511855915187, "loss": 0.6474, "step": 4029 }, { "epoch": 1.2939476641515493, "grad_norm": 0.9450034499168396, "learning_rate": 0.00014912078346912563, "loss": 0.5455, "step": 4030 }, { "epoch": 1.2942687429764006, "grad_norm": 0.8591203689575195, "learning_rate": 0.00014909037536151409, "loss": 0.5695, "step": 4031 }, { "epoch": 1.2945898218012521, "grad_norm": 1.0376840829849243, "learning_rate": 0.0001490599612723892, "loss": 0.7787, "step": 4032 }, { "epoch": 1.2949109006261037, "grad_norm": 0.9652031064033508, "learning_rate": 0.00014902954120545687, "loss": 0.6709, "step": 4033 }, { "epoch": 1.2952319794509553, "grad_norm": 0.8163144588470459, "learning_rate": 0.00014899911516442365, "loss": 0.6559, "step": 4034 }, { "epoch": 1.2955530582758068, "grad_norm": 1.0613020658493042, "learning_rate": 0.00014896868315299693, "loss": 0.5966, "step": 4035 }, { "epoch": 1.2958741371006581, "grad_norm": 0.9142425656318665, "learning_rate": 0.00014893824517488464, "loss": 0.6798, "step": 4036 }, { "epoch": 1.2961952159255097, "grad_norm": 0.8986964225769043, "learning_rate": 0.00014890780123379564, "loss": 0.7447, "step": 4037 }, { "epoch": 1.2965162947503612, "grad_norm": 1.133459448814392, "learning_rate": 0.0001488773513334394, "loss": 0.567, "step": 4038 }, { "epoch": 1.2968373735752128, "grad_norm": 1.0823416709899902, "learning_rate": 0.0001488468954775262, "loss": 0.7199, "step": 4039 }, { "epoch": 1.297158452400064, "grad_norm": 0.8796799182891846, "learning_rate": 0.00014881643366976692, "loss": 0.5962, "step": 4040 }, { "epoch": 1.2974795312249157, "grad_norm": 1.0232266187667847, "learning_rate": 0.0001487859659138733, "loss": 0.7782, "step": 4041 }, { "epoch": 1.2978006100497672, "grad_norm": 1.0571743249893188, "learning_rate": 0.00014875549221355768, "loss": 0.6525, "step": 4042 }, { "epoch": 1.2981216888746188, "grad_norm": 1.7555642127990723, "learning_rate": 0.00014872501257253323, "loss": 0.8595, "step": 4043 }, { "epoch": 1.2984427676994703, "grad_norm": 1.0583701133728027, "learning_rate": 0.00014869452699451383, "loss": 0.6666, "step": 4044 }, { "epoch": 1.2987638465243216, "grad_norm": 1.1438575983047485, "learning_rate": 0.000148664035483214, "loss": 0.8125, "step": 4045 }, { "epoch": 1.2990849253491732, "grad_norm": 1.0455180406570435, "learning_rate": 0.00014863353804234905, "loss": 0.5961, "step": 4046 }, { "epoch": 1.2994060041740247, "grad_norm": 1.0730552673339844, "learning_rate": 0.00014860303467563503, "loss": 0.4922, "step": 4047 }, { "epoch": 1.2997270829988763, "grad_norm": 1.0210821628570557, "learning_rate": 0.00014857252538678865, "loss": 0.6711, "step": 4048 }, { "epoch": 1.3000481618237276, "grad_norm": 1.0794029235839844, "learning_rate": 0.0001485420101795274, "loss": 0.6992, "step": 4049 }, { "epoch": 1.3003692406485792, "grad_norm": 1.1778931617736816, "learning_rate": 0.00014851148905756947, "loss": 0.5997, "step": 4050 }, { "epoch": 1.3006903194734307, "grad_norm": 1.5340932607650757, "learning_rate": 0.00014848096202463372, "loss": 0.4667, "step": 4051 }, { "epoch": 1.3010113982982823, "grad_norm": 1.405383825302124, "learning_rate": 0.0001484504290844398, "loss": 0.7285, "step": 4052 }, { "epoch": 1.3013324771231338, "grad_norm": 0.9958581328392029, "learning_rate": 0.00014841989024070809, "loss": 0.651, "step": 4053 }, { "epoch": 1.3016535559479852, "grad_norm": 1.1035085916519165, "learning_rate": 0.00014838934549715963, "loss": 0.6727, "step": 4054 }, { "epoch": 1.3019746347728367, "grad_norm": 0.9837401509284973, "learning_rate": 0.00014835879485751617, "loss": 0.6048, "step": 4055 }, { "epoch": 1.3022957135976883, "grad_norm": 1.033929467201233, "learning_rate": 0.00014832823832550024, "loss": 0.709, "step": 4056 }, { "epoch": 1.3026167924225398, "grad_norm": 1.9119707345962524, "learning_rate": 0.00014829767590483506, "loss": 0.7885, "step": 4057 }, { "epoch": 1.3029378712473911, "grad_norm": 1.1973304748535156, "learning_rate": 0.0001482671075992446, "loss": 0.6681, "step": 4058 }, { "epoch": 1.3032589500722427, "grad_norm": 0.8669224977493286, "learning_rate": 0.00014823653341245353, "loss": 0.5783, "step": 4059 }, { "epoch": 1.3035800288970942, "grad_norm": 1.0361067056655884, "learning_rate": 0.00014820595334818712, "loss": 0.7104, "step": 4060 }, { "epoch": 1.3039011077219458, "grad_norm": 0.9638859033584595, "learning_rate": 0.00014817536741017152, "loss": 0.609, "step": 4061 }, { "epoch": 1.3042221865467973, "grad_norm": 0.9824711084365845, "learning_rate": 0.00014814477560213358, "loss": 0.526, "step": 4062 }, { "epoch": 1.3045432653716487, "grad_norm": 0.6022173762321472, "learning_rate": 0.00014811417792780075, "loss": 0.4001, "step": 4063 }, { "epoch": 1.3048643441965002, "grad_norm": 0.7990144491195679, "learning_rate": 0.00014808357439090127, "loss": 0.4216, "step": 4064 }, { "epoch": 1.3051854230213518, "grad_norm": 0.676643431186676, "learning_rate": 0.00014805296499516407, "loss": 0.7422, "step": 4065 }, { "epoch": 1.3055065018462033, "grad_norm": 0.8250537514686584, "learning_rate": 0.0001480223497443189, "loss": 1.0526, "step": 4066 }, { "epoch": 1.3058275806710546, "grad_norm": 1.0050231218338013, "learning_rate": 0.00014799172864209608, "loss": 0.6552, "step": 4067 }, { "epoch": 1.3061486594959062, "grad_norm": 0.8684215545654297, "learning_rate": 0.00014796110169222666, "loss": 0.4027, "step": 4068 }, { "epoch": 1.3064697383207577, "grad_norm": 0.89919513463974, "learning_rate": 0.0001479304688984425, "loss": 0.4916, "step": 4069 }, { "epoch": 1.3067908171456093, "grad_norm": 0.8932663798332214, "learning_rate": 0.00014789983026447612, "loss": 0.2584, "step": 4070 }, { "epoch": 1.3071118959704608, "grad_norm": 0.8465712666511536, "learning_rate": 0.0001478691857940607, "loss": 0.3093, "step": 4071 }, { "epoch": 1.3074329747953122, "grad_norm": 0.7077059149742126, "learning_rate": 0.00014783853549093018, "loss": 0.4673, "step": 4072 }, { "epoch": 1.3077540536201637, "grad_norm": 1.0644139051437378, "learning_rate": 0.00014780787935881923, "loss": 0.7199, "step": 4073 }, { "epoch": 1.3080751324450153, "grad_norm": 0.7375410795211792, "learning_rate": 0.0001477772174014632, "loss": 0.5636, "step": 4074 }, { "epoch": 1.3083962112698668, "grad_norm": 0.9191805720329285, "learning_rate": 0.00014774654962259812, "loss": 0.663, "step": 4075 }, { "epoch": 1.3087172900947182, "grad_norm": 0.9933133721351624, "learning_rate": 0.00014771587602596084, "loss": 0.6267, "step": 4076 }, { "epoch": 1.3090383689195697, "grad_norm": 0.7604119777679443, "learning_rate": 0.0001476851966152888, "loss": 0.5479, "step": 4077 }, { "epoch": 1.3093594477444213, "grad_norm": 0.8366871476173401, "learning_rate": 0.0001476545113943202, "loss": 0.643, "step": 4078 }, { "epoch": 1.3096805265692728, "grad_norm": 0.8511562943458557, "learning_rate": 0.0001476238203667939, "loss": 0.5903, "step": 4079 }, { "epoch": 1.3100016053941244, "grad_norm": 0.9097726941108704, "learning_rate": 0.0001475931235364496, "loss": 0.6136, "step": 4080 }, { "epoch": 1.3103226842189757, "grad_norm": 1.0176063776016235, "learning_rate": 0.00014756242090702756, "loss": 0.6633, "step": 4081 }, { "epoch": 1.3106437630438272, "grad_norm": 1.1305146217346191, "learning_rate": 0.00014753171248226875, "loss": 0.7559, "step": 4082 }, { "epoch": 1.3109648418686788, "grad_norm": 0.7558575868606567, "learning_rate": 0.00014750099826591498, "loss": 0.5253, "step": 4083 }, { "epoch": 1.3112859206935303, "grad_norm": 0.850307047367096, "learning_rate": 0.00014747027826170867, "loss": 0.6287, "step": 4084 }, { "epoch": 1.3116069995183817, "grad_norm": 1.0060899257659912, "learning_rate": 0.00014743955247339293, "loss": 0.7239, "step": 4085 }, { "epoch": 1.3119280783432332, "grad_norm": 0.9706814885139465, "learning_rate": 0.0001474088209047116, "loss": 0.6527, "step": 4086 }, { "epoch": 1.3122491571680848, "grad_norm": 0.847439706325531, "learning_rate": 0.00014737808355940932, "loss": 0.6468, "step": 4087 }, { "epoch": 1.3125702359929363, "grad_norm": 0.9411894679069519, "learning_rate": 0.0001473473404412312, "loss": 0.6667, "step": 4088 }, { "epoch": 1.3128913148177879, "grad_norm": 1.1132397651672363, "learning_rate": 0.00014731659155392332, "loss": 0.5735, "step": 4089 }, { "epoch": 1.3132123936426392, "grad_norm": 1.0457525253295898, "learning_rate": 0.00014728583690123224, "loss": 0.6922, "step": 4090 }, { "epoch": 1.3135334724674907, "grad_norm": 0.8525384068489075, "learning_rate": 0.00014725507648690543, "loss": 0.634, "step": 4091 }, { "epoch": 1.3138545512923423, "grad_norm": 0.7392247319221497, "learning_rate": 0.00014722431031469083, "loss": 0.491, "step": 4092 }, { "epoch": 1.3141756301171938, "grad_norm": 1.0142507553100586, "learning_rate": 0.0001471935383883373, "loss": 0.7623, "step": 4093 }, { "epoch": 1.3144967089420452, "grad_norm": 1.3004919290542603, "learning_rate": 0.00014716276071159422, "loss": 0.7285, "step": 4094 }, { "epoch": 1.3148177877668967, "grad_norm": 1.129610538482666, "learning_rate": 0.00014713197728821183, "loss": 0.7317, "step": 4095 }, { "epoch": 1.3151388665917483, "grad_norm": 0.9293155670166016, "learning_rate": 0.000147101188121941, "loss": 0.6089, "step": 4096 }, { "epoch": 1.3154599454165998, "grad_norm": 0.9089670777320862, "learning_rate": 0.0001470703932165333, "loss": 0.6274, "step": 4097 }, { "epoch": 1.3157810242414514, "grad_norm": 1.2431045770645142, "learning_rate": 0.0001470395925757409, "loss": 0.6852, "step": 4098 }, { "epoch": 1.3161021030663027, "grad_norm": 0.9798834919929504, "learning_rate": 0.00014700878620331684, "loss": 0.7175, "step": 4099 }, { "epoch": 1.3164231818911543, "grad_norm": 0.9327998757362366, "learning_rate": 0.0001469779741030148, "loss": 0.6003, "step": 4100 }, { "epoch": 1.3167442607160058, "grad_norm": 1.2201324701309204, "learning_rate": 0.00014694715627858908, "loss": 0.5154, "step": 4101 }, { "epoch": 1.3170653395408574, "grad_norm": 0.912707507610321, "learning_rate": 0.0001469163327337948, "loss": 0.4929, "step": 4102 }, { "epoch": 1.3173864183657087, "grad_norm": 1.2306265830993652, "learning_rate": 0.0001468855034723877, "loss": 0.6194, "step": 4103 }, { "epoch": 1.3177074971905602, "grad_norm": 1.114705204963684, "learning_rate": 0.00014685466849812418, "loss": 0.6703, "step": 4104 }, { "epoch": 1.3180285760154118, "grad_norm": 0.8249489068984985, "learning_rate": 0.00014682382781476146, "loss": 0.533, "step": 4105 }, { "epoch": 1.3183496548402633, "grad_norm": 1.0677366256713867, "learning_rate": 0.00014679298142605734, "loss": 0.6168, "step": 4106 }, { "epoch": 1.318670733665115, "grad_norm": 0.7703533172607422, "learning_rate": 0.0001467621293357704, "loss": 0.4699, "step": 4107 }, { "epoch": 1.3189918124899662, "grad_norm": 1.1350317001342773, "learning_rate": 0.0001467312715476598, "loss": 0.5877, "step": 4108 }, { "epoch": 1.3193128913148178, "grad_norm": 0.9653196930885315, "learning_rate": 0.00014670040806548555, "loss": 0.5712, "step": 4109 }, { "epoch": 1.3196339701396693, "grad_norm": 1.1715449094772339, "learning_rate": 0.0001466695388930082, "loss": 0.5182, "step": 4110 }, { "epoch": 1.3199550489645209, "grad_norm": 0.7922108769416809, "learning_rate": 0.00014663866403398913, "loss": 0.5427, "step": 4111 }, { "epoch": 1.3202761277893722, "grad_norm": 0.9558143615722656, "learning_rate": 0.0001466077834921903, "loss": 0.5072, "step": 4112 }, { "epoch": 1.3205972066142238, "grad_norm": 0.6734508872032166, "learning_rate": 0.00014657689727137443, "loss": 0.4252, "step": 4113 }, { "epoch": 1.3209182854390753, "grad_norm": 0.826819658279419, "learning_rate": 0.0001465460053753049, "loss": 0.4521, "step": 4114 }, { "epoch": 1.3212393642639269, "grad_norm": 0.8778970241546631, "learning_rate": 0.00014651510780774583, "loss": 0.5193, "step": 4115 }, { "epoch": 1.3215604430887784, "grad_norm": 0.7205748558044434, "learning_rate": 0.000146484204572462, "loss": 0.8583, "step": 4116 }, { "epoch": 1.3218815219136297, "grad_norm": 0.5651400685310364, "learning_rate": 0.0001464532956732188, "loss": 0.4929, "step": 4117 }, { "epoch": 1.3222026007384813, "grad_norm": 0.8473796248435974, "learning_rate": 0.0001464223811137824, "loss": 0.5065, "step": 4118 }, { "epoch": 1.3225236795633328, "grad_norm": 0.8395222425460815, "learning_rate": 0.0001463914608979197, "loss": 0.63, "step": 4119 }, { "epoch": 1.3228447583881842, "grad_norm": 0.8078770041465759, "learning_rate": 0.00014636053502939823, "loss": 0.3171, "step": 4120 }, { "epoch": 1.3231658372130357, "grad_norm": 0.7320200204849243, "learning_rate": 0.00014632960351198618, "loss": 0.4024, "step": 4121 }, { "epoch": 1.3234869160378873, "grad_norm": 0.9218945503234863, "learning_rate": 0.00014629866634945248, "loss": 0.3652, "step": 4122 }, { "epoch": 1.3238079948627388, "grad_norm": 0.6042816042900085, "learning_rate": 0.0001462677235455667, "loss": 0.2944, "step": 4123 }, { "epoch": 1.3241290736875904, "grad_norm": 1.0619382858276367, "learning_rate": 0.00014623677510409918, "loss": 0.7914, "step": 4124 }, { "epoch": 1.324450152512442, "grad_norm": 1.0162999629974365, "learning_rate": 0.00014620582102882089, "loss": 0.7339, "step": 4125 }, { "epoch": 1.3247712313372932, "grad_norm": 1.0029937028884888, "learning_rate": 0.00014617486132350343, "loss": 0.63, "step": 4126 }, { "epoch": 1.3250923101621448, "grad_norm": 0.9867237210273743, "learning_rate": 0.00014614389599191917, "loss": 0.7598, "step": 4127 }, { "epoch": 1.3254133889869963, "grad_norm": 0.8086850643157959, "learning_rate": 0.00014611292503784117, "loss": 0.5079, "step": 4128 }, { "epoch": 1.3257344678118477, "grad_norm": 0.8222152590751648, "learning_rate": 0.0001460819484650431, "loss": 0.551, "step": 4129 }, { "epoch": 1.3260555466366992, "grad_norm": 0.84171462059021, "learning_rate": 0.0001460509662772994, "loss": 0.6227, "step": 4130 }, { "epoch": 1.3263766254615508, "grad_norm": 0.8350204825401306, "learning_rate": 0.00014601997847838518, "loss": 0.626, "step": 4131 }, { "epoch": 1.3266977042864023, "grad_norm": 1.4779672622680664, "learning_rate": 0.00014598898507207615, "loss": 0.6658, "step": 4132 }, { "epoch": 1.3270187831112539, "grad_norm": 0.7730790376663208, "learning_rate": 0.00014595798606214882, "loss": 0.5676, "step": 4133 }, { "epoch": 1.3273398619361054, "grad_norm": 1.5839117765426636, "learning_rate": 0.00014592698145238028, "loss": 0.7933, "step": 4134 }, { "epoch": 1.3276609407609568, "grad_norm": 0.9410939812660217, "learning_rate": 0.00014589597124654833, "loss": 0.6305, "step": 4135 }, { "epoch": 1.3279820195858083, "grad_norm": 1.0541006326675415, "learning_rate": 0.00014586495544843152, "loss": 0.7623, "step": 4136 }, { "epoch": 1.3283030984106599, "grad_norm": 0.8780885934829712, "learning_rate": 0.000145833934061809, "loss": 0.6705, "step": 4137 }, { "epoch": 1.3286241772355112, "grad_norm": 0.790573239326477, "learning_rate": 0.00014580290709046066, "loss": 0.6865, "step": 4138 }, { "epoch": 1.3289452560603627, "grad_norm": 0.7406685948371887, "learning_rate": 0.000145771874538167, "loss": 0.4454, "step": 4139 }, { "epoch": 1.3292663348852143, "grad_norm": 1.0333536863327026, "learning_rate": 0.0001457408364087093, "loss": 0.7394, "step": 4140 }, { "epoch": 1.3295874137100658, "grad_norm": 0.9966391324996948, "learning_rate": 0.00014570979270586945, "loss": 0.5846, "step": 4141 }, { "epoch": 1.3299084925349174, "grad_norm": 1.0538800954818726, "learning_rate": 0.00014567874343342997, "loss": 0.7812, "step": 4142 }, { "epoch": 1.330229571359769, "grad_norm": 0.90020352602005, "learning_rate": 0.00014564768859517418, "loss": 0.62, "step": 4143 }, { "epoch": 1.3305506501846203, "grad_norm": 1.0670877695083618, "learning_rate": 0.00014561662819488597, "loss": 0.6715, "step": 4144 }, { "epoch": 1.3308717290094718, "grad_norm": 1.155572772026062, "learning_rate": 0.00014558556223635003, "loss": 0.6894, "step": 4145 }, { "epoch": 1.3311928078343234, "grad_norm": 1.7336785793304443, "learning_rate": 0.00014555449072335157, "loss": 0.8215, "step": 4146 }, { "epoch": 1.3315138866591747, "grad_norm": 1.0733205080032349, "learning_rate": 0.00014552341365967658, "loss": 0.7403, "step": 4147 }, { "epoch": 1.3318349654840262, "grad_norm": 0.9724444150924683, "learning_rate": 0.00014549233104911178, "loss": 0.6041, "step": 4148 }, { "epoch": 1.3321560443088778, "grad_norm": 1.006319522857666, "learning_rate": 0.0001454612428954444, "loss": 0.6566, "step": 4149 }, { "epoch": 1.3324771231337293, "grad_norm": 0.736380398273468, "learning_rate": 0.00014543014920246247, "loss": 0.5568, "step": 4150 }, { "epoch": 1.332798201958581, "grad_norm": 1.084572196006775, "learning_rate": 0.00014539904997395468, "loss": 0.5559, "step": 4151 }, { "epoch": 1.3331192807834324, "grad_norm": 0.8436475396156311, "learning_rate": 0.00014536794521371037, "loss": 0.5434, "step": 4152 }, { "epoch": 1.3334403596082838, "grad_norm": 1.179206371307373, "learning_rate": 0.00014533683492551952, "loss": 0.7242, "step": 4153 }, { "epoch": 1.3337614384331353, "grad_norm": 1.3009215593338013, "learning_rate": 0.0001453057191131729, "loss": 0.6785, "step": 4154 }, { "epoch": 1.3340825172579869, "grad_norm": 1.1896605491638184, "learning_rate": 0.0001452745977804618, "loss": 0.6612, "step": 4155 }, { "epoch": 1.3344035960828382, "grad_norm": 1.1616253852844238, "learning_rate": 0.00014524347093117828, "loss": 0.6739, "step": 4156 }, { "epoch": 1.3347246749076898, "grad_norm": 0.7783265113830566, "learning_rate": 0.00014521233856911508, "loss": 0.5076, "step": 4157 }, { "epoch": 1.3350457537325413, "grad_norm": 1.7753092050552368, "learning_rate": 0.00014518120069806557, "loss": 0.5744, "step": 4158 }, { "epoch": 1.3353668325573929, "grad_norm": 1.8857307434082031, "learning_rate": 0.00014515005732182383, "loss": 0.8165, "step": 4159 }, { "epoch": 1.3356879113822444, "grad_norm": 0.8890649676322937, "learning_rate": 0.00014511890844418453, "loss": 0.3913, "step": 4160 }, { "epoch": 1.336008990207096, "grad_norm": 0.691440999507904, "learning_rate": 0.00014508775406894307, "loss": 0.495, "step": 4161 }, { "epoch": 1.3363300690319473, "grad_norm": 1.066584587097168, "learning_rate": 0.0001450565941998956, "loss": 0.6014, "step": 4162 }, { "epoch": 1.3366511478567988, "grad_norm": 1.0590673685073853, "learning_rate": 0.00014502542884083875, "loss": 0.4447, "step": 4163 }, { "epoch": 1.3369722266816504, "grad_norm": 0.7496399283409119, "learning_rate": 0.00014499425799557, "loss": 0.3962, "step": 4164 }, { "epoch": 1.3372933055065017, "grad_norm": 0.5220407843589783, "learning_rate": 0.0001449630816678874, "loss": 0.4822, "step": 4165 }, { "epoch": 1.3376143843313533, "grad_norm": 0.6849836111068726, "learning_rate": 0.00014493189986158965, "loss": 0.875, "step": 4166 }, { "epoch": 1.3379354631562048, "grad_norm": 0.9034988284111023, "learning_rate": 0.00014490071258047623, "loss": 0.7436, "step": 4167 }, { "epoch": 1.3382565419810564, "grad_norm": 0.7391397356987, "learning_rate": 0.0001448695198283472, "loss": 0.5641, "step": 4168 }, { "epoch": 1.338577620805908, "grad_norm": 0.5824310779571533, "learning_rate": 0.00014483832160900326, "loss": 0.3517, "step": 4169 }, { "epoch": 1.3388986996307595, "grad_norm": 0.7779441475868225, "learning_rate": 0.0001448071179262458, "loss": 0.4648, "step": 4170 }, { "epoch": 1.3392197784556108, "grad_norm": 0.7949094176292419, "learning_rate": 0.00014477590878387696, "loss": 0.3886, "step": 4171 }, { "epoch": 1.3395408572804623, "grad_norm": 0.7168573141098022, "learning_rate": 0.0001447446941856995, "loss": 0.5573, "step": 4172 }, { "epoch": 1.339861936105314, "grad_norm": 0.9068509340286255, "learning_rate": 0.00014471347413551672, "loss": 0.7493, "step": 4173 }, { "epoch": 1.3401830149301652, "grad_norm": 0.9333740472793579, "learning_rate": 0.00014468224863713278, "loss": 0.8158, "step": 4174 }, { "epoch": 1.3405040937550168, "grad_norm": 0.8705101609230042, "learning_rate": 0.00014465101769435234, "loss": 0.7667, "step": 4175 }, { "epoch": 1.3408251725798683, "grad_norm": 0.9098716974258423, "learning_rate": 0.00014461978131098088, "loss": 0.6398, "step": 4176 }, { "epoch": 1.3411462514047199, "grad_norm": 0.7872979044914246, "learning_rate": 0.00014458853949082443, "loss": 0.5239, "step": 4177 }, { "epoch": 1.3414673302295714, "grad_norm": 0.6730207800865173, "learning_rate": 0.00014455729223768966, "loss": 0.4844, "step": 4178 }, { "epoch": 1.341788409054423, "grad_norm": 0.7260086536407471, "learning_rate": 0.00014452603955538397, "loss": 0.6197, "step": 4179 }, { "epoch": 1.3421094878792743, "grad_norm": 0.8892760276794434, "learning_rate": 0.00014449478144771543, "loss": 0.6995, "step": 4180 }, { "epoch": 1.3424305667041259, "grad_norm": 1.4739090204238892, "learning_rate": 0.00014446351791849276, "loss": 0.7409, "step": 4181 }, { "epoch": 1.3427516455289774, "grad_norm": 0.7913824915885925, "learning_rate": 0.0001444322489715253, "loss": 0.6096, "step": 4182 }, { "epoch": 1.3430727243538287, "grad_norm": 0.9372351169586182, "learning_rate": 0.00014440097461062307, "loss": 0.5211, "step": 4183 }, { "epoch": 1.3433938031786803, "grad_norm": 1.0964716672897339, "learning_rate": 0.00014436969483959676, "loss": 0.7583, "step": 4184 }, { "epoch": 1.3437148820035318, "grad_norm": 1.6157090663909912, "learning_rate": 0.00014433840966225772, "loss": 0.6351, "step": 4185 }, { "epoch": 1.3440359608283834, "grad_norm": 0.8620708584785461, "learning_rate": 0.00014430711908241798, "loss": 0.5351, "step": 4186 }, { "epoch": 1.344357039653235, "grad_norm": 0.814059317111969, "learning_rate": 0.0001442758231038902, "loss": 0.6808, "step": 4187 }, { "epoch": 1.3446781184780865, "grad_norm": 1.1919108629226685, "learning_rate": 0.0001442445217304876, "loss": 0.8476, "step": 4188 }, { "epoch": 1.3449991973029378, "grad_norm": 1.3358224630355835, "learning_rate": 0.00014421321496602428, "loss": 0.8407, "step": 4189 }, { "epoch": 1.3453202761277894, "grad_norm": 1.180011510848999, "learning_rate": 0.00014418190281431482, "loss": 0.5575, "step": 4190 }, { "epoch": 1.345641354952641, "grad_norm": 1.2531673908233643, "learning_rate": 0.00014415058527917452, "loss": 0.661, "step": 4191 }, { "epoch": 1.3459624337774923, "grad_norm": 0.8697596192359924, "learning_rate": 0.00014411926236441934, "loss": 0.7263, "step": 4192 }, { "epoch": 1.3462835126023438, "grad_norm": 1.6186376810073853, "learning_rate": 0.00014408793407386588, "loss": 0.6973, "step": 4193 }, { "epoch": 1.3466045914271954, "grad_norm": 0.9928228259086609, "learning_rate": 0.00014405660041133132, "loss": 0.5972, "step": 4194 }, { "epoch": 1.346925670252047, "grad_norm": 1.2978379726409912, "learning_rate": 0.00014402526138063373, "loss": 0.5707, "step": 4195 }, { "epoch": 1.3472467490768985, "grad_norm": 1.1947479248046875, "learning_rate": 0.00014399391698559152, "loss": 0.7005, "step": 4196 }, { "epoch": 1.34756782790175, "grad_norm": 1.2066709995269775, "learning_rate": 0.000143962567230024, "loss": 0.6688, "step": 4197 }, { "epoch": 1.3478889067266013, "grad_norm": 0.8478728532791138, "learning_rate": 0.000143931212117751, "loss": 0.5476, "step": 4198 }, { "epoch": 1.3482099855514529, "grad_norm": 1.081978678703308, "learning_rate": 0.00014389985165259308, "loss": 0.684, "step": 4199 }, { "epoch": 1.3485310643763044, "grad_norm": 1.2415368556976318, "learning_rate": 0.0001438684858383714, "loss": 0.7176, "step": 4200 }, { "epoch": 1.3488521432011558, "grad_norm": 1.0181922912597656, "learning_rate": 0.00014383711467890774, "loss": 0.6968, "step": 4201 }, { "epoch": 1.3491732220260073, "grad_norm": 1.017606258392334, "learning_rate": 0.00014380573817802467, "loss": 0.474, "step": 4202 }, { "epoch": 1.3494943008508589, "grad_norm": 1.2191901206970215, "learning_rate": 0.00014377435633954527, "loss": 0.5747, "step": 4203 }, { "epoch": 1.3498153796757104, "grad_norm": 0.7296000719070435, "learning_rate": 0.00014374296916729336, "loss": 0.4687, "step": 4204 }, { "epoch": 1.350136458500562, "grad_norm": 1.0375962257385254, "learning_rate": 0.0001437115766650933, "loss": 0.5935, "step": 4205 }, { "epoch": 1.3504575373254135, "grad_norm": 0.8629053831100464, "learning_rate": 0.00014368017883677024, "loss": 0.5089, "step": 4206 }, { "epoch": 1.3507786161502648, "grad_norm": 1.310308575630188, "learning_rate": 0.0001436487756861499, "loss": 0.7386, "step": 4207 }, { "epoch": 1.3510996949751164, "grad_norm": 0.7340025901794434, "learning_rate": 0.0001436173672170586, "loss": 0.492, "step": 4208 }, { "epoch": 1.351420773799968, "grad_norm": 0.8930183053016663, "learning_rate": 0.00014358595343332342, "loss": 0.4347, "step": 4209 }, { "epoch": 1.3517418526248193, "grad_norm": 1.247145175933838, "learning_rate": 0.00014355453433877204, "loss": 0.5153, "step": 4210 }, { "epoch": 1.3520629314496708, "grad_norm": 1.237648844718933, "learning_rate": 0.00014352310993723277, "loss": 0.5992, "step": 4211 }, { "epoch": 1.3523840102745224, "grad_norm": 0.776263952255249, "learning_rate": 0.00014349168023253456, "loss": 0.5426, "step": 4212 }, { "epoch": 1.352705089099374, "grad_norm": 1.0824544429779053, "learning_rate": 0.00014346024522850703, "loss": 0.5301, "step": 4213 }, { "epoch": 1.3530261679242255, "grad_norm": 0.7915077209472656, "learning_rate": 0.00014342880492898048, "loss": 0.456, "step": 4214 }, { "epoch": 1.353347246749077, "grad_norm": 0.845167338848114, "learning_rate": 0.00014339735933778576, "loss": 0.8329, "step": 4215 }, { "epoch": 1.3536683255739284, "grad_norm": 0.7723715901374817, "learning_rate": 0.00014336590845875446, "loss": 0.914, "step": 4216 }, { "epoch": 1.35398940439878, "grad_norm": 0.7588359713554382, "learning_rate": 0.00014333445229571873, "loss": 0.7267, "step": 4217 }, { "epoch": 1.3543104832236315, "grad_norm": 0.733163058757782, "learning_rate": 0.00014330299085251144, "loss": 0.486, "step": 4218 }, { "epoch": 1.3546315620484828, "grad_norm": 0.6390459537506104, "learning_rate": 0.00014327152413296608, "loss": 0.3084, "step": 4219 }, { "epoch": 1.3549526408733343, "grad_norm": 0.8938314318656921, "learning_rate": 0.00014324005214091676, "loss": 0.4455, "step": 4220 }, { "epoch": 1.3552737196981859, "grad_norm": 1.0440847873687744, "learning_rate": 0.00014320857488019824, "loss": 0.4297, "step": 4221 }, { "epoch": 1.3555947985230374, "grad_norm": 0.8476107716560364, "learning_rate": 0.00014317709235464593, "loss": 0.5257, "step": 4222 }, { "epoch": 1.355915877347889, "grad_norm": 0.9037560224533081, "learning_rate": 0.0001431456045680959, "loss": 0.6943, "step": 4223 }, { "epoch": 1.3562369561727405, "grad_norm": 0.875007152557373, "learning_rate": 0.00014311411152438482, "loss": 0.6148, "step": 4224 }, { "epoch": 1.3565580349975919, "grad_norm": 0.829646646976471, "learning_rate": 0.00014308261322735005, "loss": 0.7564, "step": 4225 }, { "epoch": 1.3568791138224434, "grad_norm": 0.7148510217666626, "learning_rate": 0.00014305110968082952, "loss": 0.5596, "step": 4226 }, { "epoch": 1.357200192647295, "grad_norm": 0.7443501949310303, "learning_rate": 0.00014301960088866186, "loss": 0.6532, "step": 4227 }, { "epoch": 1.3575212714721463, "grad_norm": 0.9788792729377747, "learning_rate": 0.00014298808685468635, "loss": 0.5607, "step": 4228 }, { "epoch": 1.3578423502969978, "grad_norm": 0.7856297492980957, "learning_rate": 0.00014295656758274284, "loss": 0.5779, "step": 4229 }, { "epoch": 1.3581634291218494, "grad_norm": 0.7464038133621216, "learning_rate": 0.00014292504307667186, "loss": 0.4625, "step": 4230 }, { "epoch": 1.358484507946701, "grad_norm": 0.958886981010437, "learning_rate": 0.0001428935133403146, "loss": 0.6014, "step": 4231 }, { "epoch": 1.3588055867715525, "grad_norm": 1.1012500524520874, "learning_rate": 0.00014286197837751286, "loss": 0.9194, "step": 4232 }, { "epoch": 1.359126665596404, "grad_norm": 0.8240073323249817, "learning_rate": 0.00014283043819210905, "loss": 0.5618, "step": 4233 }, { "epoch": 1.3594477444212554, "grad_norm": 0.8680970072746277, "learning_rate": 0.00014279889278794627, "loss": 0.6105, "step": 4234 }, { "epoch": 1.359768823246107, "grad_norm": 1.032063364982605, "learning_rate": 0.00014276734216886821, "loss": 0.7325, "step": 4235 }, { "epoch": 1.3600899020709585, "grad_norm": 0.9061574935913086, "learning_rate": 0.00014273578633871927, "loss": 0.6026, "step": 4236 }, { "epoch": 1.3604109808958098, "grad_norm": 0.7440208196640015, "learning_rate": 0.00014270422530134432, "loss": 0.552, "step": 4237 }, { "epoch": 1.3607320597206614, "grad_norm": 1.1810888051986694, "learning_rate": 0.00014267265906058914, "loss": 0.7499, "step": 4238 }, { "epoch": 1.361053138545513, "grad_norm": 0.903121292591095, "learning_rate": 0.0001426410876202999, "loss": 0.6064, "step": 4239 }, { "epoch": 1.3613742173703645, "grad_norm": 0.9991443157196045, "learning_rate": 0.00014260951098432343, "loss": 0.661, "step": 4240 }, { "epoch": 1.361695296195216, "grad_norm": 0.9029455184936523, "learning_rate": 0.00014257792915650728, "loss": 0.5712, "step": 4241 }, { "epoch": 1.3620163750200676, "grad_norm": 0.899472713470459, "learning_rate": 0.00014254634214069963, "loss": 0.6596, "step": 4242 }, { "epoch": 1.3623374538449189, "grad_norm": 0.7812873125076294, "learning_rate": 0.00014251474994074928, "loss": 0.5881, "step": 4243 }, { "epoch": 1.3626585326697704, "grad_norm": 1.202508807182312, "learning_rate": 0.00014248315256050557, "loss": 0.6451, "step": 4244 }, { "epoch": 1.362979611494622, "grad_norm": 0.7961817979812622, "learning_rate": 0.0001424515500038186, "loss": 0.4156, "step": 4245 }, { "epoch": 1.3633006903194733, "grad_norm": 0.9045254588127136, "learning_rate": 0.00014241994227453901, "loss": 0.5355, "step": 4246 }, { "epoch": 1.3636217691443249, "grad_norm": 1.0353204011917114, "learning_rate": 0.00014238832937651816, "loss": 0.6559, "step": 4247 }, { "epoch": 1.3639428479691764, "grad_norm": 0.866054117679596, "learning_rate": 0.00014235671131360798, "loss": 0.562, "step": 4248 }, { "epoch": 1.364263926794028, "grad_norm": 1.5949488878250122, "learning_rate": 0.00014232508808966098, "loss": 0.7606, "step": 4249 }, { "epoch": 1.3645850056188795, "grad_norm": 1.053368091583252, "learning_rate": 0.00014229345970853032, "loss": 0.6, "step": 4250 }, { "epoch": 1.364906084443731, "grad_norm": 0.8603706359863281, "learning_rate": 0.00014226182617406996, "loss": 0.5477, "step": 4251 }, { "epoch": 1.3652271632685824, "grad_norm": 2.3181567192077637, "learning_rate": 0.00014223018749013423, "loss": 0.6106, "step": 4252 }, { "epoch": 1.365548242093434, "grad_norm": 0.8432567715644836, "learning_rate": 0.0001421985436605783, "loss": 0.4574, "step": 4253 }, { "epoch": 1.3658693209182855, "grad_norm": 1.323652982711792, "learning_rate": 0.0001421668946892578, "loss": 0.7721, "step": 4254 }, { "epoch": 1.3661903997431368, "grad_norm": 0.9810237288475037, "learning_rate": 0.0001421352405800291, "loss": 0.5758, "step": 4255 }, { "epoch": 1.3665114785679884, "grad_norm": 1.0755305290222168, "learning_rate": 0.00014210358133674912, "loss": 0.5253, "step": 4256 }, { "epoch": 1.36683255739284, "grad_norm": 1.2368508577346802, "learning_rate": 0.00014207191696327548, "loss": 0.5937, "step": 4257 }, { "epoch": 1.3671536362176915, "grad_norm": 0.9989250302314758, "learning_rate": 0.00014204024746346637, "loss": 0.7571, "step": 4258 }, { "epoch": 1.367474715042543, "grad_norm": 0.9901091456413269, "learning_rate": 0.00014200857284118066, "loss": 0.6444, "step": 4259 }, { "epoch": 1.3677957938673946, "grad_norm": 0.8036117553710938, "learning_rate": 0.00014197689310027772, "loss": 0.4396, "step": 4260 }, { "epoch": 1.368116872692246, "grad_norm": 1.049436330795288, "learning_rate": 0.00014194520824461771, "loss": 0.5589, "step": 4261 }, { "epoch": 1.3684379515170975, "grad_norm": 0.8437821865081787, "learning_rate": 0.00014191351827806133, "loss": 0.5044, "step": 4262 }, { "epoch": 1.368759030341949, "grad_norm": 1.1721175909042358, "learning_rate": 0.00014188182320446985, "loss": 0.4725, "step": 4263 }, { "epoch": 1.3690801091668003, "grad_norm": 0.7921808958053589, "learning_rate": 0.00014185012302770527, "loss": 0.4147, "step": 4264 }, { "epoch": 1.369401187991652, "grad_norm": 1.2697014808654785, "learning_rate": 0.00014181841775163013, "loss": 0.9084, "step": 4265 }, { "epoch": 1.3697222668165034, "grad_norm": 0.8648690581321716, "learning_rate": 0.0001417867073801077, "loss": 0.9195, "step": 4266 }, { "epoch": 1.370043345641355, "grad_norm": 0.7991724014282227, "learning_rate": 0.00014175499191700167, "loss": 0.7083, "step": 4267 }, { "epoch": 1.3703644244662065, "grad_norm": 0.8012338876724243, "learning_rate": 0.00014172327136617656, "loss": 0.4684, "step": 4268 }, { "epoch": 1.3706855032910579, "grad_norm": 0.9364102482795715, "learning_rate": 0.00014169154573149737, "loss": 0.383, "step": 4269 }, { "epoch": 1.3710065821159094, "grad_norm": 1.1031969785690308, "learning_rate": 0.0001416598150168298, "loss": 0.5984, "step": 4270 }, { "epoch": 1.371327660940761, "grad_norm": 1.1069051027297974, "learning_rate": 0.00014162807922604012, "loss": 0.7745, "step": 4271 }, { "epoch": 1.3716487397656125, "grad_norm": 1.0749973058700562, "learning_rate": 0.00014159633836299527, "loss": 0.7569, "step": 4272 }, { "epoch": 1.3719698185904639, "grad_norm": 0.8237097263336182, "learning_rate": 0.0001415645924315628, "loss": 0.7982, "step": 4273 }, { "epoch": 1.3722908974153154, "grad_norm": 0.9842055439949036, "learning_rate": 0.0001415328414356108, "loss": 0.6246, "step": 4274 }, { "epoch": 1.372611976240167, "grad_norm": 0.9688705205917358, "learning_rate": 0.00014150108537900805, "loss": 0.5753, "step": 4275 }, { "epoch": 1.3729330550650185, "grad_norm": 0.6846550703048706, "learning_rate": 0.00014146932426562392, "loss": 0.5259, "step": 4276 }, { "epoch": 1.37325413388987, "grad_norm": 1.078979730606079, "learning_rate": 0.00014143755809932845, "loss": 0.6775, "step": 4277 }, { "epoch": 1.3735752127147214, "grad_norm": 0.8213169574737549, "learning_rate": 0.00014140578688399218, "loss": 0.4362, "step": 4278 }, { "epoch": 1.373896291539573, "grad_norm": 1.00385320186615, "learning_rate": 0.00014137401062348638, "loss": 0.6659, "step": 4279 }, { "epoch": 1.3742173703644245, "grad_norm": 0.8310264945030212, "learning_rate": 0.0001413422293216829, "loss": 0.8067, "step": 4280 }, { "epoch": 1.374538449189276, "grad_norm": 0.888613224029541, "learning_rate": 0.0001413104429824542, "loss": 0.688, "step": 4281 }, { "epoch": 1.3748595280141274, "grad_norm": 0.9263864755630493, "learning_rate": 0.0001412786516096733, "loss": 0.5974, "step": 4282 }, { "epoch": 1.375180606838979, "grad_norm": 1.2327766418457031, "learning_rate": 0.00014124685520721392, "loss": 0.8498, "step": 4283 }, { "epoch": 1.3755016856638305, "grad_norm": 0.7652939558029175, "learning_rate": 0.00014121505377895038, "loss": 0.4643, "step": 4284 }, { "epoch": 1.375822764488682, "grad_norm": 0.8574872016906738, "learning_rate": 0.00014118324732875748, "loss": 0.5679, "step": 4285 }, { "epoch": 1.3761438433135336, "grad_norm": 1.3594255447387695, "learning_rate": 0.00014115143586051088, "loss": 0.7465, "step": 4286 }, { "epoch": 1.376464922138385, "grad_norm": 0.8461220264434814, "learning_rate": 0.00014111961937808665, "loss": 0.4636, "step": 4287 }, { "epoch": 1.3767860009632364, "grad_norm": 0.9702261686325073, "learning_rate": 0.0001410877978853615, "loss": 0.6617, "step": 4288 }, { "epoch": 1.377107079788088, "grad_norm": 0.8296035528182983, "learning_rate": 0.0001410559713862128, "loss": 0.5687, "step": 4289 }, { "epoch": 1.3774281586129395, "grad_norm": 1.2875922918319702, "learning_rate": 0.00014102413988451856, "loss": 0.8027, "step": 4290 }, { "epoch": 1.3777492374377909, "grad_norm": 0.8944180607795715, "learning_rate": 0.00014099230338415728, "loss": 0.6401, "step": 4291 }, { "epoch": 1.3780703162626424, "grad_norm": 0.9776332378387451, "learning_rate": 0.00014096046188900822, "loss": 0.7287, "step": 4292 }, { "epoch": 1.378391395087494, "grad_norm": 0.7893264293670654, "learning_rate": 0.00014092861540295108, "loss": 0.5128, "step": 4293 }, { "epoch": 1.3787124739123455, "grad_norm": 1.7183669805526733, "learning_rate": 0.0001408967639298663, "loss": 0.5958, "step": 4294 }, { "epoch": 1.379033552737197, "grad_norm": 1.1158559322357178, "learning_rate": 0.00014086490747363493, "loss": 0.8365, "step": 4295 }, { "epoch": 1.3793546315620484, "grad_norm": 1.0233869552612305, "learning_rate": 0.00014083304603813848, "loss": 0.7533, "step": 4296 }, { "epoch": 1.3796757103869, "grad_norm": 1.1017775535583496, "learning_rate": 0.0001408011796272593, "loss": 0.6726, "step": 4297 }, { "epoch": 1.3799967892117515, "grad_norm": 0.8513362407684326, "learning_rate": 0.0001407693082448801, "loss": 0.5752, "step": 4298 }, { "epoch": 1.380317868036603, "grad_norm": 1.0489625930786133, "learning_rate": 0.00014073743189488435, "loss": 0.5832, "step": 4299 }, { "epoch": 1.3806389468614544, "grad_norm": 0.8657923340797424, "learning_rate": 0.00014070555058115614, "loss": 0.6029, "step": 4300 }, { "epoch": 1.380960025686306, "grad_norm": 1.106916904449463, "learning_rate": 0.00014067366430758004, "loss": 0.655, "step": 4301 }, { "epoch": 1.3812811045111575, "grad_norm": 0.9051868915557861, "learning_rate": 0.00014064177307804133, "loss": 0.6185, "step": 4302 }, { "epoch": 1.381602183336009, "grad_norm": 0.950446605682373, "learning_rate": 0.00014060987689642581, "loss": 0.6738, "step": 4303 }, { "epoch": 1.3819232621608606, "grad_norm": 1.1080812215805054, "learning_rate": 0.00014057797576662, "loss": 0.6398, "step": 4304 }, { "epoch": 1.382244340985712, "grad_norm": 0.6714284420013428, "learning_rate": 0.00014054606969251095, "loss": 0.4397, "step": 4305 }, { "epoch": 1.3825654198105635, "grad_norm": 0.9164280891418457, "learning_rate": 0.00014051415867798628, "loss": 0.5544, "step": 4306 }, { "epoch": 1.382886498635415, "grad_norm": 0.951714277267456, "learning_rate": 0.00014048224272693424, "loss": 0.6772, "step": 4307 }, { "epoch": 1.3832075774602666, "grad_norm": 0.9855425357818604, "learning_rate": 0.00014045032184324373, "loss": 0.6, "step": 4308 }, { "epoch": 1.383528656285118, "grad_norm": 0.9370409250259399, "learning_rate": 0.00014041839603080422, "loss": 0.5603, "step": 4309 }, { "epoch": 1.3838497351099694, "grad_norm": 0.9768514633178711, "learning_rate": 0.00014038646529350579, "loss": 0.5308, "step": 4310 }, { "epoch": 1.384170813934821, "grad_norm": 1.384358525276184, "learning_rate": 0.00014035452963523902, "loss": 0.6596, "step": 4311 }, { "epoch": 1.3844918927596725, "grad_norm": 1.2001529932022095, "learning_rate": 0.00014032258905989522, "loss": 0.4874, "step": 4312 }, { "epoch": 1.384812971584524, "grad_norm": 0.5518187284469604, "learning_rate": 0.00014029064357136628, "loss": 0.3581, "step": 4313 }, { "epoch": 1.3851340504093754, "grad_norm": 0.7382297515869141, "learning_rate": 0.00014025869317354462, "loss": 0.4, "step": 4314 }, { "epoch": 1.385455129234227, "grad_norm": 0.8723937273025513, "learning_rate": 0.00014022673787032332, "loss": 0.6065, "step": 4315 }, { "epoch": 1.3857762080590785, "grad_norm": 0.8456537127494812, "learning_rate": 0.00014019477766559604, "loss": 0.5173, "step": 4316 }, { "epoch": 1.38609728688393, "grad_norm": 0.8314757347106934, "learning_rate": 0.000140162812563257, "loss": 0.5405, "step": 4317 }, { "epoch": 1.3864183657087814, "grad_norm": 0.9915850162506104, "learning_rate": 0.00014013084256720107, "loss": 0.4016, "step": 4318 }, { "epoch": 1.386739444533633, "grad_norm": 0.6893324851989746, "learning_rate": 0.00014009886768132375, "loss": 0.2416, "step": 4319 }, { "epoch": 1.3870605233584845, "grad_norm": 0.7434089183807373, "learning_rate": 0.000140066887909521, "loss": 0.2607, "step": 4320 }, { "epoch": 1.387381602183336, "grad_norm": 0.7426462769508362, "learning_rate": 0.00014003490325568954, "loss": 0.6526, "step": 4321 }, { "epoch": 1.3877026810081876, "grad_norm": 1.0482592582702637, "learning_rate": 0.00014000291372372647, "loss": 0.6866, "step": 4322 }, { "epoch": 1.388023759833039, "grad_norm": 0.7865654826164246, "learning_rate": 0.00013997091931752977, "loss": 0.7286, "step": 4323 }, { "epoch": 1.3883448386578905, "grad_norm": 0.7435041666030884, "learning_rate": 0.00013993892004099777, "loss": 0.6065, "step": 4324 }, { "epoch": 1.388665917482742, "grad_norm": 1.0697741508483887, "learning_rate": 0.00013990691589802954, "loss": 0.7525, "step": 4325 }, { "epoch": 1.3889869963075936, "grad_norm": 0.6864974498748779, "learning_rate": 0.00013987490689252463, "loss": 0.5124, "step": 4326 }, { "epoch": 1.389308075132445, "grad_norm": 1.0872437953948975, "learning_rate": 0.00013984289302838328, "loss": 0.7208, "step": 4327 }, { "epoch": 1.3896291539572965, "grad_norm": 0.8134937286376953, "learning_rate": 0.00013981087430950628, "loss": 0.5676, "step": 4328 }, { "epoch": 1.389950232782148, "grad_norm": 0.8556815981864929, "learning_rate": 0.000139778850739795, "loss": 0.6281, "step": 4329 }, { "epoch": 1.3902713116069996, "grad_norm": 0.8196367025375366, "learning_rate": 0.0001397468223231514, "loss": 0.5989, "step": 4330 }, { "epoch": 1.3905923904318511, "grad_norm": 0.7392389178276062, "learning_rate": 0.00013971478906347806, "loss": 0.5696, "step": 4331 }, { "epoch": 1.3909134692567025, "grad_norm": 0.8934568166732788, "learning_rate": 0.0001396827509646782, "loss": 0.649, "step": 4332 }, { "epoch": 1.391234548081554, "grad_norm": 0.861387312412262, "learning_rate": 0.00013965070803065543, "loss": 0.5132, "step": 4333 }, { "epoch": 1.3915556269064056, "grad_norm": 0.8012207746505737, "learning_rate": 0.00013961866026531417, "loss": 0.4985, "step": 4334 }, { "epoch": 1.391876705731257, "grad_norm": 1.5408804416656494, "learning_rate": 0.00013958660767255938, "loss": 0.8035, "step": 4335 }, { "epoch": 1.3921977845561084, "grad_norm": 1.0151411294937134, "learning_rate": 0.00013955455025629651, "loss": 0.6018, "step": 4336 }, { "epoch": 1.39251886338096, "grad_norm": 0.9835482835769653, "learning_rate": 0.00013952248802043165, "loss": 0.625, "step": 4337 }, { "epoch": 1.3928399422058115, "grad_norm": 1.0157198905944824, "learning_rate": 0.00013949042096887153, "loss": 0.6432, "step": 4338 }, { "epoch": 1.393161021030663, "grad_norm": 1.2168697118759155, "learning_rate": 0.0001394583491055234, "loss": 0.4917, "step": 4339 }, { "epoch": 1.3934820998555146, "grad_norm": 1.4631038904190063, "learning_rate": 0.00013942627243429512, "loss": 0.6929, "step": 4340 }, { "epoch": 1.393803178680366, "grad_norm": 1.6047730445861816, "learning_rate": 0.00013939419095909512, "loss": 0.6978, "step": 4341 }, { "epoch": 1.3941242575052175, "grad_norm": 1.1495589017868042, "learning_rate": 0.00013936210468383246, "loss": 0.8062, "step": 4342 }, { "epoch": 1.394445336330069, "grad_norm": 1.0219823122024536, "learning_rate": 0.00013933001361241673, "loss": 0.6822, "step": 4343 }, { "epoch": 1.3947664151549206, "grad_norm": 0.9300022125244141, "learning_rate": 0.00013929791774875815, "loss": 0.6795, "step": 4344 }, { "epoch": 1.395087493979772, "grad_norm": 0.8773917555809021, "learning_rate": 0.00013926581709676751, "loss": 0.4946, "step": 4345 }, { "epoch": 1.3954085728046235, "grad_norm": 0.8553141951560974, "learning_rate": 0.00013923371166035616, "loss": 0.6083, "step": 4346 }, { "epoch": 1.395729651629475, "grad_norm": 0.9813185930252075, "learning_rate": 0.00013920160144343603, "loss": 0.8011, "step": 4347 }, { "epoch": 1.3960507304543266, "grad_norm": 1.4478272199630737, "learning_rate": 0.0001391694864499197, "loss": 0.5279, "step": 4348 }, { "epoch": 1.3963718092791781, "grad_norm": 0.9650620818138123, "learning_rate": 0.00013913736668372026, "loss": 0.6811, "step": 4349 }, { "epoch": 1.3966928881040295, "grad_norm": 1.1189510822296143, "learning_rate": 0.00013910524214875137, "loss": 0.7923, "step": 4350 }, { "epoch": 1.397013966928881, "grad_norm": 1.1408734321594238, "learning_rate": 0.00013907311284892736, "loss": 0.6301, "step": 4351 }, { "epoch": 1.3973350457537326, "grad_norm": 1.3011447191238403, "learning_rate": 0.00013904097878816312, "loss": 0.4263, "step": 4352 }, { "epoch": 1.3976561245785841, "grad_norm": 0.7979041337966919, "learning_rate": 0.00013900883997037397, "loss": 0.534, "step": 4353 }, { "epoch": 1.3979772034034355, "grad_norm": 0.9429032206535339, "learning_rate": 0.00013897669639947606, "loss": 0.7349, "step": 4354 }, { "epoch": 1.398298282228287, "grad_norm": 1.0224515199661255, "learning_rate": 0.00013894454807938586, "loss": 0.5381, "step": 4355 }, { "epoch": 1.3986193610531386, "grad_norm": 0.8756483197212219, "learning_rate": 0.0001389123950140206, "loss": 0.5863, "step": 4356 }, { "epoch": 1.39894043987799, "grad_norm": 1.352267861366272, "learning_rate": 0.0001388802372072981, "loss": 0.6053, "step": 4357 }, { "epoch": 1.3992615187028417, "grad_norm": 0.9977678060531616, "learning_rate": 0.00013884807466313663, "loss": 0.6707, "step": 4358 }, { "epoch": 1.399582597527693, "grad_norm": 0.9317201972007751, "learning_rate": 0.00013881590738545508, "loss": 0.6246, "step": 4359 }, { "epoch": 1.3999036763525445, "grad_norm": 0.8313597440719604, "learning_rate": 0.00013878373537817292, "loss": 0.4458, "step": 4360 }, { "epoch": 1.400224755177396, "grad_norm": 0.9241980314254761, "learning_rate": 0.0001387515586452103, "loss": 0.566, "step": 4361 }, { "epoch": 1.4005458340022474, "grad_norm": 0.5518043041229248, "learning_rate": 0.00013871937719048779, "loss": 0.3723, "step": 4362 }, { "epoch": 1.400866912827099, "grad_norm": 0.598588764667511, "learning_rate": 0.00013868719101792665, "loss": 0.354, "step": 4363 }, { "epoch": 1.4011879916519505, "grad_norm": 1.0155926942825317, "learning_rate": 0.00013865500013144857, "loss": 0.614, "step": 4364 }, { "epoch": 1.401509070476802, "grad_norm": 0.5616649389266968, "learning_rate": 0.000138622804534976, "loss": 0.4872, "step": 4365 }, { "epoch": 1.4018301493016536, "grad_norm": 0.6463456749916077, "learning_rate": 0.00013859060423243187, "loss": 1.0319, "step": 4366 }, { "epoch": 1.4021512281265052, "grad_norm": 0.708520770072937, "learning_rate": 0.00013855839922773968, "loss": 1.1378, "step": 4367 }, { "epoch": 1.4024723069513565, "grad_norm": 0.6274012327194214, "learning_rate": 0.00013852618952482347, "loss": 0.4126, "step": 4368 }, { "epoch": 1.402793385776208, "grad_norm": 0.7933291792869568, "learning_rate": 0.00013849397512760795, "loss": 0.4245, "step": 4369 }, { "epoch": 1.4031144646010596, "grad_norm": 0.7001267671585083, "learning_rate": 0.0001384617560400183, "loss": 0.3049, "step": 4370 }, { "epoch": 1.403435543425911, "grad_norm": 0.7495763897895813, "learning_rate": 0.00013842953226598037, "loss": 0.3643, "step": 4371 }, { "epoch": 1.4037566222507625, "grad_norm": 0.7259708046913147, "learning_rate": 0.00013839730380942053, "loss": 0.3979, "step": 4372 }, { "epoch": 1.404077701075614, "grad_norm": 1.0623929500579834, "learning_rate": 0.00013836507067426564, "loss": 0.9446, "step": 4373 }, { "epoch": 1.4043987799004656, "grad_norm": 0.9666998386383057, "learning_rate": 0.00013833283286444328, "loss": 0.7658, "step": 4374 }, { "epoch": 1.4047198587253171, "grad_norm": 0.7665534019470215, "learning_rate": 0.0001383005903838815, "loss": 0.5662, "step": 4375 }, { "epoch": 1.4050409375501687, "grad_norm": 1.0555462837219238, "learning_rate": 0.000138268343236509, "loss": 0.7391, "step": 4376 }, { "epoch": 1.40536201637502, "grad_norm": 0.9639325141906738, "learning_rate": 0.0001382360914262549, "loss": 0.6964, "step": 4377 }, { "epoch": 1.4056830951998716, "grad_norm": 0.8347415924072266, "learning_rate": 0.0001382038349570491, "loss": 0.5785, "step": 4378 }, { "epoch": 1.406004174024723, "grad_norm": 0.8627128005027771, "learning_rate": 0.00013817157383282184, "loss": 0.6525, "step": 4379 }, { "epoch": 1.4063252528495744, "grad_norm": 1.1546865701675415, "learning_rate": 0.00013813930805750413, "loss": 0.7064, "step": 4380 }, { "epoch": 1.406646331674426, "grad_norm": 0.9595782160758972, "learning_rate": 0.00013810703763502744, "loss": 0.593, "step": 4381 }, { "epoch": 1.4069674104992775, "grad_norm": 0.7686512470245361, "learning_rate": 0.00013807476256932376, "loss": 0.4359, "step": 4382 }, { "epoch": 1.407288489324129, "grad_norm": 1.0802874565124512, "learning_rate": 0.00013804248286432578, "loss": 0.8544, "step": 4383 }, { "epoch": 1.4076095681489806, "grad_norm": 0.8571449518203735, "learning_rate": 0.00013801019852396665, "loss": 0.6631, "step": 4384 }, { "epoch": 1.4079306469738322, "grad_norm": 0.9189439415931702, "learning_rate": 0.00013797790955218014, "loss": 0.6817, "step": 4385 }, { "epoch": 1.4082517257986835, "grad_norm": 0.814200222492218, "learning_rate": 0.00013794561595290052, "loss": 0.5863, "step": 4386 }, { "epoch": 1.408572804623535, "grad_norm": 1.049676537513733, "learning_rate": 0.0001379133177300627, "loss": 0.6476, "step": 4387 }, { "epoch": 1.4088938834483866, "grad_norm": 0.9915246367454529, "learning_rate": 0.00013788101488760215, "loss": 0.6418, "step": 4388 }, { "epoch": 1.409214962273238, "grad_norm": 1.644821047782898, "learning_rate": 0.00013784870742945482, "loss": 0.776, "step": 4389 }, { "epoch": 1.4095360410980895, "grad_norm": 0.856365978717804, "learning_rate": 0.00013781639535955732, "loss": 0.5186, "step": 4390 }, { "epoch": 1.409857119922941, "grad_norm": 0.9367002844810486, "learning_rate": 0.00013778407868184672, "loss": 0.7263, "step": 4391 }, { "epoch": 1.4101781987477926, "grad_norm": 0.8069283962249756, "learning_rate": 0.00013775175740026078, "loss": 0.6471, "step": 4392 }, { "epoch": 1.4104992775726442, "grad_norm": 0.9524908065795898, "learning_rate": 0.00013771943151873767, "loss": 0.762, "step": 4393 }, { "epoch": 1.4108203563974957, "grad_norm": 1.1511192321777344, "learning_rate": 0.00013768710104121627, "loss": 0.5543, "step": 4394 }, { "epoch": 1.411141435222347, "grad_norm": 0.8177773952484131, "learning_rate": 0.00013765476597163594, "loss": 0.6148, "step": 4395 }, { "epoch": 1.4114625140471986, "grad_norm": 1.0919883251190186, "learning_rate": 0.00013762242631393655, "loss": 0.7347, "step": 4396 }, { "epoch": 1.4117835928720501, "grad_norm": 0.9575279951095581, "learning_rate": 0.0001375900820720587, "loss": 0.6493, "step": 4397 }, { "epoch": 1.4121046716969015, "grad_norm": 0.8994300365447998, "learning_rate": 0.0001375577332499433, "loss": 0.6778, "step": 4398 }, { "epoch": 1.412425750521753, "grad_norm": 1.2716469764709473, "learning_rate": 0.0001375253798515321, "loss": 0.794, "step": 4399 }, { "epoch": 1.4127468293466046, "grad_norm": 0.9935747385025024, "learning_rate": 0.00013749302188076717, "loss": 0.688, "step": 4400 }, { "epoch": 1.413067908171456, "grad_norm": 1.3132479190826416, "learning_rate": 0.00013746065934159123, "loss": 0.6833, "step": 4401 }, { "epoch": 1.4133889869963077, "grad_norm": 0.9004419445991516, "learning_rate": 0.00013742829223794759, "loss": 0.5584, "step": 4402 }, { "epoch": 1.4137100658211592, "grad_norm": 0.8987523317337036, "learning_rate": 0.00013739592057378003, "loss": 0.5162, "step": 4403 }, { "epoch": 1.4140311446460105, "grad_norm": 1.0713285207748413, "learning_rate": 0.00013736354435303305, "loss": 0.6563, "step": 4404 }, { "epoch": 1.414352223470862, "grad_norm": 0.9994504451751709, "learning_rate": 0.0001373311635796515, "loss": 0.6936, "step": 4405 }, { "epoch": 1.4146733022957136, "grad_norm": 1.2554911375045776, "learning_rate": 0.0001372987782575809, "loss": 0.6968, "step": 4406 }, { "epoch": 1.414994381120565, "grad_norm": 0.9798457622528076, "learning_rate": 0.0001372663883907673, "loss": 0.648, "step": 4407 }, { "epoch": 1.4153154599454165, "grad_norm": 1.1374400854110718, "learning_rate": 0.00013723399398315734, "loss": 0.6739, "step": 4408 }, { "epoch": 1.415636538770268, "grad_norm": 0.9649138450622559, "learning_rate": 0.00013720159503869815, "loss": 0.5802, "step": 4409 }, { "epoch": 1.4159576175951196, "grad_norm": 1.5144495964050293, "learning_rate": 0.00013716919156133746, "loss": 0.6543, "step": 4410 }, { "epoch": 1.4162786964199712, "grad_norm": 1.2621392011642456, "learning_rate": 0.00013713678355502351, "loss": 0.6814, "step": 4411 }, { "epoch": 1.4165997752448227, "grad_norm": 0.90981125831604, "learning_rate": 0.0001371043710237051, "loss": 0.5224, "step": 4412 }, { "epoch": 1.416920854069674, "grad_norm": 0.5423502922058105, "learning_rate": 0.00013707195397133165, "loss": 0.369, "step": 4413 }, { "epoch": 1.4172419328945256, "grad_norm": 0.6597849130630493, "learning_rate": 0.0001370395324018531, "loss": 0.4365, "step": 4414 }, { "epoch": 1.4175630117193772, "grad_norm": 0.6346552968025208, "learning_rate": 0.00013700710631921984, "loss": 0.5433, "step": 4415 }, { "epoch": 1.4178840905442285, "grad_norm": 0.8271416425704956, "learning_rate": 0.00013697467572738295, "loss": 0.9643, "step": 4416 }, { "epoch": 1.41820516936908, "grad_norm": 0.77828449010849, "learning_rate": 0.00013694224063029396, "loss": 0.8363, "step": 4417 }, { "epoch": 1.4185262481939316, "grad_norm": 0.9805882573127747, "learning_rate": 0.00013690980103190503, "loss": 0.4089, "step": 4418 }, { "epoch": 1.4188473270187831, "grad_norm": 0.9755864143371582, "learning_rate": 0.00013687735693616876, "loss": 0.3488, "step": 4419 }, { "epoch": 1.4191684058436347, "grad_norm": 0.9460292458534241, "learning_rate": 0.00013684490834703843, "loss": 0.3581, "step": 4420 }, { "epoch": 1.4194894846684862, "grad_norm": 0.8120269179344177, "learning_rate": 0.00013681245526846783, "loss": 0.4829, "step": 4421 }, { "epoch": 1.4198105634933376, "grad_norm": 0.925631046295166, "learning_rate": 0.00013677999770441115, "loss": 0.7187, "step": 4422 }, { "epoch": 1.4201316423181891, "grad_norm": 0.8358470797538757, "learning_rate": 0.00013674753565882334, "loss": 0.6196, "step": 4423 }, { "epoch": 1.4204527211430407, "grad_norm": 1.1134681701660156, "learning_rate": 0.0001367150691356598, "loss": 0.7756, "step": 4424 }, { "epoch": 1.420773799967892, "grad_norm": 0.7973681688308716, "learning_rate": 0.00013668259813887643, "loss": 0.5704, "step": 4425 }, { "epoch": 1.4210948787927435, "grad_norm": 0.8833571076393127, "learning_rate": 0.00013665012267242974, "loss": 0.7326, "step": 4426 }, { "epoch": 1.421415957617595, "grad_norm": 0.85068678855896, "learning_rate": 0.00013661764274027678, "loss": 0.5295, "step": 4427 }, { "epoch": 1.4217370364424466, "grad_norm": 0.5901229977607727, "learning_rate": 0.00013658515834637512, "loss": 0.4465, "step": 4428 }, { "epoch": 1.4220581152672982, "grad_norm": 0.7249242067337036, "learning_rate": 0.0001365526694946829, "loss": 0.6021, "step": 4429 }, { "epoch": 1.4223791940921497, "grad_norm": 0.8554542660713196, "learning_rate": 0.0001365201761891588, "loss": 0.5952, "step": 4430 }, { "epoch": 1.422700272917001, "grad_norm": 1.1563721895217896, "learning_rate": 0.00013648767843376196, "loss": 0.6391, "step": 4431 }, { "epoch": 1.4230213517418526, "grad_norm": 0.7757640480995178, "learning_rate": 0.0001364551762324522, "loss": 0.5914, "step": 4432 }, { "epoch": 1.4233424305667042, "grad_norm": 0.9488760828971863, "learning_rate": 0.00013642266958918984, "loss": 0.678, "step": 4433 }, { "epoch": 1.4236635093915555, "grad_norm": 0.8043296337127686, "learning_rate": 0.00013639015850793563, "loss": 0.6684, "step": 4434 }, { "epoch": 1.423984588216407, "grad_norm": 1.0018153190612793, "learning_rate": 0.000136357642992651, "loss": 0.8602, "step": 4435 }, { "epoch": 1.4243056670412586, "grad_norm": 0.8006235361099243, "learning_rate": 0.00013632512304729785, "loss": 0.6058, "step": 4436 }, { "epoch": 1.4246267458661102, "grad_norm": 0.9268879890441895, "learning_rate": 0.00013629259867583863, "loss": 0.6974, "step": 4437 }, { "epoch": 1.4249478246909617, "grad_norm": 0.8567279577255249, "learning_rate": 0.00013626006988223636, "loss": 0.5062, "step": 4438 }, { "epoch": 1.4252689035158133, "grad_norm": 1.0375492572784424, "learning_rate": 0.00013622753667045457, "loss": 0.7928, "step": 4439 }, { "epoch": 1.4255899823406646, "grad_norm": 0.6748252511024475, "learning_rate": 0.00013619499904445734, "loss": 0.4775, "step": 4440 }, { "epoch": 1.4259110611655161, "grad_norm": 0.8363870978355408, "learning_rate": 0.00013616245700820922, "loss": 0.6386, "step": 4441 }, { "epoch": 1.4262321399903677, "grad_norm": 1.0793986320495605, "learning_rate": 0.00013612991056567543, "loss": 0.5909, "step": 4442 }, { "epoch": 1.426553218815219, "grad_norm": 0.9321547150611877, "learning_rate": 0.00013609735972082166, "loss": 0.7883, "step": 4443 }, { "epoch": 1.4268742976400706, "grad_norm": 1.0707225799560547, "learning_rate": 0.00013606480447761409, "loss": 0.6685, "step": 4444 }, { "epoch": 1.4271953764649221, "grad_norm": 0.9186208248138428, "learning_rate": 0.00013603224484001948, "loss": 0.6659, "step": 4445 }, { "epoch": 1.4275164552897737, "grad_norm": 1.3622729778289795, "learning_rate": 0.00013599968081200514, "loss": 0.6381, "step": 4446 }, { "epoch": 1.4278375341146252, "grad_norm": 0.9809760451316833, "learning_rate": 0.0001359671123975389, "loss": 0.5467, "step": 4447 }, { "epoch": 1.4281586129394768, "grad_norm": 1.0845415592193604, "learning_rate": 0.00013593453960058908, "loss": 0.6937, "step": 4448 }, { "epoch": 1.428479691764328, "grad_norm": 1.316145896911621, "learning_rate": 0.00013590196242512463, "loss": 0.7624, "step": 4449 }, { "epoch": 1.4288007705891796, "grad_norm": 0.8910375833511353, "learning_rate": 0.00013586938087511494, "loss": 0.532, "step": 4450 }, { "epoch": 1.4291218494140312, "grad_norm": 1.1741594076156616, "learning_rate": 0.00013583679495453, "loss": 0.5843, "step": 4451 }, { "epoch": 1.4294429282388825, "grad_norm": 0.8071622252464294, "learning_rate": 0.00013580420466734037, "loss": 0.5575, "step": 4452 }, { "epoch": 1.429764007063734, "grad_norm": 1.1299529075622559, "learning_rate": 0.00013577161001751694, "loss": 0.4591, "step": 4453 }, { "epoch": 1.4300850858885856, "grad_norm": 1.1855186223983765, "learning_rate": 0.00013573901100903134, "loss": 0.6654, "step": 4454 }, { "epoch": 1.4304061647134372, "grad_norm": 1.0821056365966797, "learning_rate": 0.00013570640764585566, "loss": 0.6234, "step": 4455 }, { "epoch": 1.4307272435382887, "grad_norm": 1.0219788551330566, "learning_rate": 0.00013567379993196252, "loss": 0.5468, "step": 4456 }, { "epoch": 1.4310483223631403, "grad_norm": 1.1291626691818237, "learning_rate": 0.00013564118787132506, "loss": 0.782, "step": 4457 }, { "epoch": 1.4313694011879916, "grad_norm": 0.9877511262893677, "learning_rate": 0.000135608571467917, "loss": 0.5307, "step": 4458 }, { "epoch": 1.4316904800128432, "grad_norm": 0.9503813982009888, "learning_rate": 0.0001355759507257125, "loss": 0.5951, "step": 4459 }, { "epoch": 1.4320115588376947, "grad_norm": 0.9377137422561646, "learning_rate": 0.0001355433256486863, "loss": 0.4201, "step": 4460 }, { "epoch": 1.432332637662546, "grad_norm": 0.9886158108711243, "learning_rate": 0.0001355106962408137, "loss": 0.5017, "step": 4461 }, { "epoch": 1.4326537164873976, "grad_norm": 1.0197458267211914, "learning_rate": 0.0001354780625060705, "loss": 0.4774, "step": 4462 }, { "epoch": 1.4329747953122491, "grad_norm": 0.9758638143539429, "learning_rate": 0.00013544542444843299, "loss": 0.4103, "step": 4463 }, { "epoch": 1.4332958741371007, "grad_norm": 0.7870468497276306, "learning_rate": 0.000135412782071878, "loss": 0.4326, "step": 4464 }, { "epoch": 1.4336169529619522, "grad_norm": 0.738239049911499, "learning_rate": 0.00013538013538038295, "loss": 0.8734, "step": 4465 }, { "epoch": 1.4339380317868038, "grad_norm": 0.6055427193641663, "learning_rate": 0.00013534748437792573, "loss": 0.7507, "step": 4466 }, { "epoch": 1.4342591106116551, "grad_norm": 0.7751237154006958, "learning_rate": 0.00013531482906848475, "loss": 0.3607, "step": 4467 }, { "epoch": 1.4345801894365067, "grad_norm": 0.7698683738708496, "learning_rate": 0.000135282169456039, "loss": 0.3953, "step": 4468 }, { "epoch": 1.4349012682613582, "grad_norm": 0.8559454679489136, "learning_rate": 0.00013524950554456784, "loss": 0.4011, "step": 4469 }, { "epoch": 1.4352223470862095, "grad_norm": 1.0439728498458862, "learning_rate": 0.00013521683733805145, "loss": 0.775, "step": 4470 }, { "epoch": 1.435543425911061, "grad_norm": 0.8648383021354675, "learning_rate": 0.00013518416484047018, "loss": 0.6064, "step": 4471 }, { "epoch": 1.4358645047359127, "grad_norm": 1.1003457307815552, "learning_rate": 0.0001351514880558052, "loss": 0.5959, "step": 4472 }, { "epoch": 1.4361855835607642, "grad_norm": 0.8835905194282532, "learning_rate": 0.00013511880698803801, "loss": 0.579, "step": 4473 }, { "epoch": 1.4365066623856158, "grad_norm": 0.654793381690979, "learning_rate": 0.00013508612164115068, "loss": 0.4533, "step": 4474 }, { "epoch": 1.4368277412104673, "grad_norm": 0.832150399684906, "learning_rate": 0.0001350534320191259, "loss": 0.59, "step": 4475 }, { "epoch": 1.4371488200353186, "grad_norm": 0.805191695690155, "learning_rate": 0.00013502073812594675, "loss": 0.5468, "step": 4476 }, { "epoch": 1.4374698988601702, "grad_norm": 1.0974748134613037, "learning_rate": 0.00013498803996559692, "loss": 0.8118, "step": 4477 }, { "epoch": 1.4377909776850217, "grad_norm": 1.0176666975021362, "learning_rate": 0.0001349553375420605, "loss": 0.6775, "step": 4478 }, { "epoch": 1.438112056509873, "grad_norm": 1.417904019355774, "learning_rate": 0.00013492263085932224, "loss": 0.7306, "step": 4479 }, { "epoch": 1.4384331353347246, "grad_norm": 1.5575790405273438, "learning_rate": 0.00013488991992136734, "loss": 0.7895, "step": 4480 }, { "epoch": 1.4387542141595762, "grad_norm": 0.9798082113265991, "learning_rate": 0.00013485720473218154, "loss": 0.6748, "step": 4481 }, { "epoch": 1.4390752929844277, "grad_norm": 1.1362134218215942, "learning_rate": 0.00013482448529575106, "loss": 0.7912, "step": 4482 }, { "epoch": 1.4393963718092793, "grad_norm": 0.7490417957305908, "learning_rate": 0.0001347917616160627, "loss": 0.549, "step": 4483 }, { "epoch": 1.4397174506341308, "grad_norm": 1.027808427810669, "learning_rate": 0.0001347590336971037, "loss": 0.7783, "step": 4484 }, { "epoch": 1.4400385294589821, "grad_norm": 0.8055115938186646, "learning_rate": 0.0001347263015428619, "loss": 0.5538, "step": 4485 }, { "epoch": 1.4403596082838337, "grad_norm": 1.0068027973175049, "learning_rate": 0.00013469356515732558, "loss": 0.8646, "step": 4486 }, { "epoch": 1.4406806871086852, "grad_norm": 1.301796555519104, "learning_rate": 0.00013466082454448362, "loss": 0.7354, "step": 4487 }, { "epoch": 1.4410017659335366, "grad_norm": 0.9036690592765808, "learning_rate": 0.0001346280797083253, "loss": 0.5607, "step": 4488 }, { "epoch": 1.4413228447583881, "grad_norm": 0.928037703037262, "learning_rate": 0.0001345953306528405, "loss": 0.7001, "step": 4489 }, { "epoch": 1.4416439235832397, "grad_norm": 0.9121316075325012, "learning_rate": 0.00013456257738201957, "loss": 0.561, "step": 4490 }, { "epoch": 1.4419650024080912, "grad_norm": 1.3250795602798462, "learning_rate": 0.00013452981989985348, "loss": 0.81, "step": 4491 }, { "epoch": 1.4422860812329428, "grad_norm": 0.8794850707054138, "learning_rate": 0.00013449705821033355, "loss": 0.7304, "step": 4492 }, { "epoch": 1.4426071600577943, "grad_norm": 1.1400325298309326, "learning_rate": 0.0001344642923174517, "loss": 0.6693, "step": 4493 }, { "epoch": 1.4429282388826457, "grad_norm": 1.1574450731277466, "learning_rate": 0.00013443152222520038, "loss": 0.5701, "step": 4494 }, { "epoch": 1.4432493177074972, "grad_norm": 1.0430783033370972, "learning_rate": 0.00013439874793757254, "loss": 0.7403, "step": 4495 }, { "epoch": 1.4435703965323488, "grad_norm": 0.968147337436676, "learning_rate": 0.00013436596945856164, "loss": 0.7173, "step": 4496 }, { "epoch": 1.4438914753572, "grad_norm": 0.7893198132514954, "learning_rate": 0.00013433318679216153, "loss": 0.5111, "step": 4497 }, { "epoch": 1.4442125541820516, "grad_norm": 0.9785538911819458, "learning_rate": 0.00013430039994236678, "loss": 0.5591, "step": 4498 }, { "epoch": 1.4445336330069032, "grad_norm": 1.5484408140182495, "learning_rate": 0.00013426760891317236, "loss": 0.7088, "step": 4499 }, { "epoch": 1.4448547118317547, "grad_norm": 0.9762724041938782, "learning_rate": 0.00013423481370857375, "loss": 0.5886, "step": 4500 }, { "epoch": 1.4451757906566063, "grad_norm": 1.3003888130187988, "learning_rate": 0.00013420201433256689, "loss": 0.7262, "step": 4501 }, { "epoch": 1.4454968694814578, "grad_norm": 0.7951440811157227, "learning_rate": 0.00013416921078914835, "loss": 0.4705, "step": 4502 }, { "epoch": 1.4458179483063092, "grad_norm": 0.8316115140914917, "learning_rate": 0.0001341364030823151, "loss": 0.5546, "step": 4503 }, { "epoch": 1.4461390271311607, "grad_norm": 1.0351200103759766, "learning_rate": 0.0001341035912160647, "loss": 0.5174, "step": 4504 }, { "epoch": 1.4464601059560123, "grad_norm": 1.0522757768630981, "learning_rate": 0.0001340707751943952, "loss": 0.6695, "step": 4505 }, { "epoch": 1.4467811847808636, "grad_norm": 0.8904996514320374, "learning_rate": 0.00013403795502130503, "loss": 0.5893, "step": 4506 }, { "epoch": 1.4471022636057151, "grad_norm": 0.866426408290863, "learning_rate": 0.0001340051307007933, "loss": 0.6581, "step": 4507 }, { "epoch": 1.4474233424305667, "grad_norm": 0.922524094581604, "learning_rate": 0.00013397230223685956, "loss": 0.5626, "step": 4508 }, { "epoch": 1.4477444212554182, "grad_norm": 1.0396236181259155, "learning_rate": 0.00013393946963350382, "loss": 0.524, "step": 4509 }, { "epoch": 1.4480655000802698, "grad_norm": 1.3418883085250854, "learning_rate": 0.00013390663289472666, "loss": 0.6468, "step": 4510 }, { "epoch": 1.4483865789051211, "grad_norm": 0.7169436812400818, "learning_rate": 0.00013387379202452917, "loss": 0.4527, "step": 4511 }, { "epoch": 1.4487076577299727, "grad_norm": 1.3441863059997559, "learning_rate": 0.00013384094702691281, "loss": 0.5522, "step": 4512 }, { "epoch": 1.4490287365548242, "grad_norm": 0.7752569913864136, "learning_rate": 0.00013380809790587974, "loss": 0.441, "step": 4513 }, { "epoch": 1.4493498153796758, "grad_norm": 0.8399336934089661, "learning_rate": 0.00013377524466543248, "loss": 0.4828, "step": 4514 }, { "epoch": 1.449670894204527, "grad_norm": 0.9468197822570801, "learning_rate": 0.00013374238730957412, "loss": 0.6818, "step": 4515 }, { "epoch": 1.4499919730293787, "grad_norm": 0.5419335961341858, "learning_rate": 0.00013370952584230823, "loss": 0.7663, "step": 4516 }, { "epoch": 1.4503130518542302, "grad_norm": 0.7655202150344849, "learning_rate": 0.00013367666026763882, "loss": 0.6944, "step": 4517 }, { "epoch": 1.4506341306790818, "grad_norm": 0.7268679141998291, "learning_rate": 0.00013364379058957056, "loss": 0.3399, "step": 4518 }, { "epoch": 1.4509552095039333, "grad_norm": 0.787480354309082, "learning_rate": 0.00013361091681210845, "loss": 0.4652, "step": 4519 }, { "epoch": 1.4512762883287846, "grad_norm": 0.9196494221687317, "learning_rate": 0.00013357803893925807, "loss": 0.5813, "step": 4520 }, { "epoch": 1.4515973671536362, "grad_norm": 0.7516376376152039, "learning_rate": 0.00013354515697502553, "loss": 0.3046, "step": 4521 }, { "epoch": 1.4519184459784877, "grad_norm": 0.7327350974082947, "learning_rate": 0.00013351227092341732, "loss": 0.6606, "step": 4522 }, { "epoch": 1.4522395248033393, "grad_norm": 0.9102757573127747, "learning_rate": 0.0001334793807884406, "loss": 0.7293, "step": 4523 }, { "epoch": 1.4525606036281906, "grad_norm": 0.8285297155380249, "learning_rate": 0.00013344648657410282, "loss": 0.5445, "step": 4524 }, { "epoch": 1.4528816824530422, "grad_norm": 1.0268747806549072, "learning_rate": 0.00013341358828441218, "loss": 0.7724, "step": 4525 }, { "epoch": 1.4532027612778937, "grad_norm": 0.7675826549530029, "learning_rate": 0.0001333806859233771, "loss": 0.5759, "step": 4526 }, { "epoch": 1.4535238401027453, "grad_norm": 0.8815962672233582, "learning_rate": 0.00013334777949500673, "loss": 0.7797, "step": 4527 }, { "epoch": 1.4538449189275968, "grad_norm": 0.7846258282661438, "learning_rate": 0.00013331486900331057, "loss": 0.5603, "step": 4528 }, { "epoch": 1.4541659977524481, "grad_norm": 1.0539195537567139, "learning_rate": 0.00013328195445229868, "loss": 0.7525, "step": 4529 }, { "epoch": 1.4544870765772997, "grad_norm": 0.9988424777984619, "learning_rate": 0.0001332490358459816, "loss": 0.7323, "step": 4530 }, { "epoch": 1.4548081554021512, "grad_norm": 0.7499678730964661, "learning_rate": 0.00013321611318837032, "loss": 0.6056, "step": 4531 }, { "epoch": 1.4551292342270028, "grad_norm": 0.7443246841430664, "learning_rate": 0.00013318318648347646, "loss": 0.4723, "step": 4532 }, { "epoch": 1.4554503130518541, "grad_norm": 0.7882360219955444, "learning_rate": 0.00013315025573531198, "loss": 0.5804, "step": 4533 }, { "epoch": 1.4557713918767057, "grad_norm": 0.9871779680252075, "learning_rate": 0.00013311732094788937, "loss": 0.715, "step": 4534 }, { "epoch": 1.4560924707015572, "grad_norm": 0.8856052756309509, "learning_rate": 0.00013308438212522163, "loss": 0.6614, "step": 4535 }, { "epoch": 1.4564135495264088, "grad_norm": 0.9039063453674316, "learning_rate": 0.0001330514392713223, "loss": 0.7465, "step": 4536 }, { "epoch": 1.4567346283512603, "grad_norm": 0.7212230563163757, "learning_rate": 0.00013301849239020537, "loss": 0.6114, "step": 4537 }, { "epoch": 1.4570557071761117, "grad_norm": 0.8474130630493164, "learning_rate": 0.00013298554148588528, "loss": 0.5634, "step": 4538 }, { "epoch": 1.4573767860009632, "grad_norm": 1.1321617364883423, "learning_rate": 0.00013295258656237702, "loss": 0.6322, "step": 4539 }, { "epoch": 1.4576978648258148, "grad_norm": 1.0911418199539185, "learning_rate": 0.00013291962762369608, "loss": 0.6877, "step": 4540 }, { "epoch": 1.4580189436506663, "grad_norm": 0.9263336658477783, "learning_rate": 0.00013288666467385833, "loss": 0.5769, "step": 4541 }, { "epoch": 1.4583400224755176, "grad_norm": 0.677189826965332, "learning_rate": 0.00013285369771688025, "loss": 0.5574, "step": 4542 }, { "epoch": 1.4586611013003692, "grad_norm": 0.9409946203231812, "learning_rate": 0.0001328207267567788, "loss": 0.6771, "step": 4543 }, { "epoch": 1.4589821801252207, "grad_norm": 0.848575234413147, "learning_rate": 0.00013278775179757131, "loss": 0.5214, "step": 4544 }, { "epoch": 1.4593032589500723, "grad_norm": 0.8765393495559692, "learning_rate": 0.0001327547728432757, "loss": 0.5847, "step": 4545 }, { "epoch": 1.4596243377749238, "grad_norm": 0.8433587551116943, "learning_rate": 0.0001327217898979104, "loss": 0.4884, "step": 4546 }, { "epoch": 1.4599454165997752, "grad_norm": 1.2517951726913452, "learning_rate": 0.00013268880296549425, "loss": 0.5431, "step": 4547 }, { "epoch": 1.4602664954246267, "grad_norm": 0.8801559209823608, "learning_rate": 0.00013265581205004661, "loss": 0.5063, "step": 4548 }, { "epoch": 1.4605875742494783, "grad_norm": 1.0915852785110474, "learning_rate": 0.00013262281715558736, "loss": 0.6934, "step": 4549 }, { "epoch": 1.4609086530743298, "grad_norm": 1.139147400856018, "learning_rate": 0.00013258981828613678, "loss": 0.6021, "step": 4550 }, { "epoch": 1.4612297318991812, "grad_norm": 1.0436866283416748, "learning_rate": 0.00013255681544571568, "loss": 0.8493, "step": 4551 }, { "epoch": 1.4615508107240327, "grad_norm": 0.8446107506752014, "learning_rate": 0.0001325238086383454, "loss": 0.4369, "step": 4552 }, { "epoch": 1.4618718895488843, "grad_norm": 2.1884477138519287, "learning_rate": 0.00013249079786804765, "loss": 0.6191, "step": 4553 }, { "epoch": 1.4621929683737358, "grad_norm": 0.8910212516784668, "learning_rate": 0.00013245778313884478, "loss": 0.4856, "step": 4554 }, { "epoch": 1.4625140471985874, "grad_norm": 0.6236334443092346, "learning_rate": 0.00013242476445475944, "loss": 0.4026, "step": 4555 }, { "epoch": 1.4628351260234387, "grad_norm": 1.155137538909912, "learning_rate": 0.00013239174181981495, "loss": 0.7141, "step": 4556 }, { "epoch": 1.4631562048482902, "grad_norm": 1.5037484169006348, "learning_rate": 0.00013235871523803496, "loss": 0.7128, "step": 4557 }, { "epoch": 1.4634772836731418, "grad_norm": 0.9598552584648132, "learning_rate": 0.0001323256847134437, "loss": 0.6375, "step": 4558 }, { "epoch": 1.4637983624979933, "grad_norm": 0.9860318303108215, "learning_rate": 0.00013229265025006576, "loss": 0.572, "step": 4559 }, { "epoch": 1.4641194413228447, "grad_norm": 0.9279815554618835, "learning_rate": 0.00013225961185192638, "loss": 0.4483, "step": 4560 }, { "epoch": 1.4644405201476962, "grad_norm": 0.8800573945045471, "learning_rate": 0.00013222656952305113, "loss": 0.5297, "step": 4561 }, { "epoch": 1.4647615989725478, "grad_norm": 1.1138170957565308, "learning_rate": 0.00013219352326746613, "loss": 0.5575, "step": 4562 }, { "epoch": 1.4650826777973993, "grad_norm": 0.760184645652771, "learning_rate": 0.00013216047308919798, "loss": 0.4005, "step": 4563 }, { "epoch": 1.4654037566222509, "grad_norm": 0.6601788997650146, "learning_rate": 0.00013212741899227374, "loss": 0.3951, "step": 4564 }, { "epoch": 1.4657248354471022, "grad_norm": 0.825016438961029, "learning_rate": 0.00013209436098072095, "loss": 0.7009, "step": 4565 }, { "epoch": 1.4660459142719537, "grad_norm": 0.652347981929779, "learning_rate": 0.00013206129905856765, "loss": 0.6756, "step": 4566 }, { "epoch": 1.4663669930968053, "grad_norm": 0.5992451906204224, "learning_rate": 0.00013202823322984228, "loss": 0.6338, "step": 4567 }, { "epoch": 1.4666880719216568, "grad_norm": 0.8534404039382935, "learning_rate": 0.00013199516349857385, "loss": 0.5359, "step": 4568 }, { "epoch": 1.4670091507465082, "grad_norm": 0.7051871418952942, "learning_rate": 0.0001319620898687918, "loss": 0.3355, "step": 4569 }, { "epoch": 1.4673302295713597, "grad_norm": 0.9312176704406738, "learning_rate": 0.00013192901234452607, "loss": 0.4381, "step": 4570 }, { "epoch": 1.4676513083962113, "grad_norm": 1.0084874629974365, "learning_rate": 0.00013189593092980702, "loss": 0.3896, "step": 4571 }, { "epoch": 1.4679723872210628, "grad_norm": 0.9059427380561829, "learning_rate": 0.00013186284562866554, "loss": 0.3917, "step": 4572 }, { "epoch": 1.4682934660459144, "grad_norm": 0.9761437773704529, "learning_rate": 0.00013182975644513296, "loss": 0.6641, "step": 4573 }, { "epoch": 1.4686145448707657, "grad_norm": 0.8613228797912598, "learning_rate": 0.00013179666338324108, "loss": 0.5554, "step": 4574 }, { "epoch": 1.4689356236956173, "grad_norm": 0.9337467551231384, "learning_rate": 0.00013176356644702225, "loss": 0.82, "step": 4575 }, { "epoch": 1.4692567025204688, "grad_norm": 0.6009548306465149, "learning_rate": 0.00013173046564050924, "loss": 0.4591, "step": 4576 }, { "epoch": 1.4695777813453204, "grad_norm": 0.8322634100914001, "learning_rate": 0.0001316973609677352, "loss": 0.6572, "step": 4577 }, { "epoch": 1.4698988601701717, "grad_norm": 0.9558845162391663, "learning_rate": 0.00013166425243273385, "loss": 0.7128, "step": 4578 }, { "epoch": 1.4702199389950232, "grad_norm": 0.9433383345603943, "learning_rate": 0.0001316311400395394, "loss": 0.6818, "step": 4579 }, { "epoch": 1.4705410178198748, "grad_norm": 0.7871822118759155, "learning_rate": 0.0001315980237921865, "loss": 0.6238, "step": 4580 }, { "epoch": 1.4708620966447263, "grad_norm": 0.8970574140548706, "learning_rate": 0.00013156490369471027, "loss": 0.6878, "step": 4581 }, { "epoch": 1.4711831754695779, "grad_norm": 1.0396785736083984, "learning_rate": 0.00013153177975114625, "loss": 0.7369, "step": 4582 }, { "epoch": 1.4715042542944292, "grad_norm": 0.8116987943649292, "learning_rate": 0.0001314986519655305, "loss": 0.6017, "step": 4583 }, { "epoch": 1.4718253331192808, "grad_norm": 1.061795711517334, "learning_rate": 0.00013146552034189955, "loss": 0.5786, "step": 4584 }, { "epoch": 1.4721464119441323, "grad_norm": 0.9677996039390564, "learning_rate": 0.0001314323848842904, "loss": 0.4793, "step": 4585 }, { "epoch": 1.4724674907689839, "grad_norm": 0.8710011839866638, "learning_rate": 0.00013139924559674052, "loss": 0.6054, "step": 4586 }, { "epoch": 1.4727885695938352, "grad_norm": 0.9556978344917297, "learning_rate": 0.00013136610248328778, "loss": 0.6016, "step": 4587 }, { "epoch": 1.4731096484186867, "grad_norm": 1.0169155597686768, "learning_rate": 0.0001313329555479706, "loss": 0.5464, "step": 4588 }, { "epoch": 1.4734307272435383, "grad_norm": 0.9183655977249146, "learning_rate": 0.00013129980479482782, "loss": 0.7172, "step": 4589 }, { "epoch": 1.4737518060683898, "grad_norm": 0.7591384053230286, "learning_rate": 0.00013126665022789879, "loss": 0.6191, "step": 4590 }, { "epoch": 1.4740728848932414, "grad_norm": 0.8953664898872375, "learning_rate": 0.00013123349185122327, "loss": 0.532, "step": 4591 }, { "epoch": 1.4743939637180927, "grad_norm": 1.7143136262893677, "learning_rate": 0.0001312003296688415, "loss": 0.581, "step": 4592 }, { "epoch": 1.4747150425429443, "grad_norm": 0.8613825440406799, "learning_rate": 0.0001311671636847942, "loss": 0.5599, "step": 4593 }, { "epoch": 1.4750361213677958, "grad_norm": 0.901800274848938, "learning_rate": 0.00013113399390312256, "loss": 0.5636, "step": 4594 }, { "epoch": 1.4753572001926474, "grad_norm": 0.9919823408126831, "learning_rate": 0.0001311008203278682, "loss": 0.6591, "step": 4595 }, { "epoch": 1.4756782790174987, "grad_norm": 1.3764430284500122, "learning_rate": 0.0001310676429630732, "loss": 0.6994, "step": 4596 }, { "epoch": 1.4759993578423503, "grad_norm": 1.025925874710083, "learning_rate": 0.00013103446181278013, "loss": 0.571, "step": 4597 }, { "epoch": 1.4763204366672018, "grad_norm": 0.9428420662879944, "learning_rate": 0.00013100127688103205, "loss": 0.6081, "step": 4598 }, { "epoch": 1.4766415154920534, "grad_norm": 0.8529512882232666, "learning_rate": 0.00013096808817187242, "loss": 0.4775, "step": 4599 }, { "epoch": 1.476962594316905, "grad_norm": 1.0675090551376343, "learning_rate": 0.0001309348956893452, "loss": 0.7771, "step": 4600 }, { "epoch": 1.4772836731417562, "grad_norm": 1.3736308813095093, "learning_rate": 0.00013090169943749476, "loss": 0.6906, "step": 4601 }, { "epoch": 1.4776047519666078, "grad_norm": 1.8220711946487427, "learning_rate": 0.00013086849942036595, "loss": 0.704, "step": 4602 }, { "epoch": 1.4779258307914593, "grad_norm": 0.9620269536972046, "learning_rate": 0.00013083529564200417, "loss": 0.526, "step": 4603 }, { "epoch": 1.4782469096163107, "grad_norm": 0.6493226289749146, "learning_rate": 0.00013080208810645514, "loss": 0.3707, "step": 4604 }, { "epoch": 1.4785679884411622, "grad_norm": 1.0979892015457153, "learning_rate": 0.0001307688768177651, "loss": 0.6589, "step": 4605 }, { "epoch": 1.4788890672660138, "grad_norm": 0.8274226784706116, "learning_rate": 0.00013073566177998074, "loss": 0.5467, "step": 4606 }, { "epoch": 1.4792101460908653, "grad_norm": 1.2158207893371582, "learning_rate": 0.0001307024429971492, "loss": 0.5996, "step": 4607 }, { "epoch": 1.4795312249157169, "grad_norm": 1.105692982673645, "learning_rate": 0.00013066922047331813, "loss": 0.5918, "step": 4608 }, { "epoch": 1.4798523037405684, "grad_norm": 0.8877955675125122, "learning_rate": 0.00013063599421253558, "loss": 0.5614, "step": 4609 }, { "epoch": 1.4801733825654197, "grad_norm": 0.8180930018424988, "learning_rate": 0.0001306027642188501, "loss": 0.5291, "step": 4610 }, { "epoch": 1.4804944613902713, "grad_norm": 1.009455680847168, "learning_rate": 0.00013056953049631057, "loss": 0.4543, "step": 4611 }, { "epoch": 1.4808155402151228, "grad_norm": 0.779489278793335, "learning_rate": 0.0001305362930489665, "loss": 0.5272, "step": 4612 }, { "epoch": 1.4811366190399742, "grad_norm": 0.9974098801612854, "learning_rate": 0.0001305030518808678, "loss": 0.4729, "step": 4613 }, { "epoch": 1.4814576978648257, "grad_norm": 0.7673447132110596, "learning_rate": 0.00013046980699606467, "loss": 0.345, "step": 4614 }, { "epoch": 1.4817787766896773, "grad_norm": 0.6975226402282715, "learning_rate": 0.00013043655839860803, "loss": 0.6528, "step": 4615 }, { "epoch": 1.4820998555145288, "grad_norm": 0.652308464050293, "learning_rate": 0.00013040330609254903, "loss": 0.5996, "step": 4616 }, { "epoch": 1.4824209343393804, "grad_norm": 0.7014802694320679, "learning_rate": 0.00013037005008193942, "loss": 0.6009, "step": 4617 }, { "epoch": 1.482742013164232, "grad_norm": 0.8486183881759644, "learning_rate": 0.00013033679037083138, "loss": 0.5923, "step": 4618 }, { "epoch": 1.4830630919890833, "grad_norm": 0.7879297137260437, "learning_rate": 0.00013030352696327742, "loss": 0.3314, "step": 4619 }, { "epoch": 1.4833841708139348, "grad_norm": 0.6520053148269653, "learning_rate": 0.0001302702598633306, "loss": 0.3041, "step": 4620 }, { "epoch": 1.4837052496387864, "grad_norm": 0.8596171736717224, "learning_rate": 0.00013023698907504446, "loss": 0.3313, "step": 4621 }, { "epoch": 1.4840263284636377, "grad_norm": 0.5723639130592346, "learning_rate": 0.0001302037146024729, "loss": 0.188, "step": 4622 }, { "epoch": 1.4843474072884892, "grad_norm": 0.7225738167762756, "learning_rate": 0.00013017043644967036, "loss": 0.3561, "step": 4623 }, { "epoch": 1.4846684861133408, "grad_norm": 0.9386597871780396, "learning_rate": 0.00013013715462069166, "loss": 0.7271, "step": 4624 }, { "epoch": 1.4849895649381923, "grad_norm": 1.0117478370666504, "learning_rate": 0.00013010386911959206, "loss": 0.806, "step": 4625 }, { "epoch": 1.485310643763044, "grad_norm": 0.9228258728981018, "learning_rate": 0.00013007057995042732, "loss": 0.5942, "step": 4626 }, { "epoch": 1.4856317225878954, "grad_norm": 0.9322078227996826, "learning_rate": 0.0001300372871172536, "loss": 0.6389, "step": 4627 }, { "epoch": 1.4859528014127468, "grad_norm": 1.0570144653320312, "learning_rate": 0.00013000399062412763, "loss": 0.6589, "step": 4628 }, { "epoch": 1.4862738802375983, "grad_norm": 0.8913359642028809, "learning_rate": 0.0001299706904751064, "loss": 0.6874, "step": 4629 }, { "epoch": 1.4865949590624499, "grad_norm": 0.8339388966560364, "learning_rate": 0.0001299373866742474, "loss": 0.6398, "step": 4630 }, { "epoch": 1.4869160378873012, "grad_norm": 0.6980022192001343, "learning_rate": 0.00012990407922560868, "loss": 0.4593, "step": 4631 }, { "epoch": 1.4872371167121528, "grad_norm": 0.9570392370223999, "learning_rate": 0.00012987076813324858, "loss": 0.5951, "step": 4632 }, { "epoch": 1.4875581955370043, "grad_norm": 0.8993356227874756, "learning_rate": 0.00012983745340122604, "loss": 0.7096, "step": 4633 }, { "epoch": 1.4878792743618559, "grad_norm": 0.9646190404891968, "learning_rate": 0.0001298041350336003, "loss": 0.662, "step": 4634 }, { "epoch": 1.4882003531867074, "grad_norm": 0.8036277294158936, "learning_rate": 0.00012977081303443107, "loss": 0.5762, "step": 4635 }, { "epoch": 1.488521432011559, "grad_norm": 1.3247047662734985, "learning_rate": 0.0001297374874077786, "loss": 0.6607, "step": 4636 }, { "epoch": 1.4888425108364103, "grad_norm": 0.9229692816734314, "learning_rate": 0.0001297041581577035, "loss": 0.6021, "step": 4637 }, { "epoch": 1.4891635896612618, "grad_norm": 0.8703567981719971, "learning_rate": 0.00012967082528826684, "loss": 0.5731, "step": 4638 }, { "epoch": 1.4894846684861134, "grad_norm": 1.0840383768081665, "learning_rate": 0.00012963748880353011, "loss": 0.6943, "step": 4639 }, { "epoch": 1.4898057473109647, "grad_norm": 0.9261624217033386, "learning_rate": 0.00012960414870755524, "loss": 0.5193, "step": 4640 }, { "epoch": 1.4901268261358163, "grad_norm": 1.1405514478683472, "learning_rate": 0.00012957080500440468, "loss": 0.709, "step": 4641 }, { "epoch": 1.4904479049606678, "grad_norm": 1.0057867765426636, "learning_rate": 0.00012953745769814123, "loss": 0.6075, "step": 4642 }, { "epoch": 1.4907689837855194, "grad_norm": 0.86097252368927, "learning_rate": 0.00012950410679282815, "loss": 0.4511, "step": 4643 }, { "epoch": 1.491090062610371, "grad_norm": 1.2975316047668457, "learning_rate": 0.00012947075229252915, "loss": 0.7537, "step": 4644 }, { "epoch": 1.4914111414352225, "grad_norm": 1.0226067304611206, "learning_rate": 0.00012943739420130836, "loss": 0.497, "step": 4645 }, { "epoch": 1.4917322202600738, "grad_norm": 1.0293515920639038, "learning_rate": 0.0001294040325232304, "loss": 0.5741, "step": 4646 }, { "epoch": 1.4920532990849253, "grad_norm": 0.7942186594009399, "learning_rate": 0.0001293706672623603, "loss": 0.5992, "step": 4647 }, { "epoch": 1.492374377909777, "grad_norm": 0.8032208681106567, "learning_rate": 0.00012933729842276343, "loss": 0.4878, "step": 4648 }, { "epoch": 1.4926954567346282, "grad_norm": 0.9675914645195007, "learning_rate": 0.00012930392600850573, "loss": 0.5973, "step": 4649 }, { "epoch": 1.4930165355594798, "grad_norm": 1.0577799081802368, "learning_rate": 0.0001292705500236536, "loss": 0.7173, "step": 4650 }, { "epoch": 1.4933376143843313, "grad_norm": 0.9797313213348389, "learning_rate": 0.00012923717047227368, "loss": 0.5796, "step": 4651 }, { "epoch": 1.4936586932091829, "grad_norm": 1.041603446006775, "learning_rate": 0.00012920378735843327, "loss": 0.4257, "step": 4652 }, { "epoch": 1.4939797720340344, "grad_norm": 0.7669867873191833, "learning_rate": 0.0001291704006861999, "loss": 0.5239, "step": 4653 }, { "epoch": 1.494300850858886, "grad_norm": 0.8458186388015747, "learning_rate": 0.00012913701045964173, "loss": 0.4882, "step": 4654 }, { "epoch": 1.4946219296837373, "grad_norm": 0.9558455944061279, "learning_rate": 0.00012910361668282719, "loss": 0.53, "step": 4655 }, { "epoch": 1.4949430085085889, "grad_norm": 0.9773117899894714, "learning_rate": 0.00012907021935982526, "loss": 0.5155, "step": 4656 }, { "epoch": 1.4952640873334404, "grad_norm": 1.0766710042953491, "learning_rate": 0.00012903681849470527, "loss": 0.5791, "step": 4657 }, { "epoch": 1.4955851661582917, "grad_norm": 0.9035495519638062, "learning_rate": 0.00012900341409153703, "loss": 0.5649, "step": 4658 }, { "epoch": 1.4959062449831433, "grad_norm": 1.3426756858825684, "learning_rate": 0.00012897000615439075, "loss": 0.4312, "step": 4659 }, { "epoch": 1.4962273238079948, "grad_norm": 1.0395222902297974, "learning_rate": 0.0001289365946873371, "loss": 0.5155, "step": 4660 }, { "epoch": 1.4965484026328464, "grad_norm": 0.9974014163017273, "learning_rate": 0.00012890317969444716, "loss": 0.6004, "step": 4661 }, { "epoch": 1.496869481457698, "grad_norm": 0.9123116135597229, "learning_rate": 0.00012886976117979247, "loss": 0.6294, "step": 4662 }, { "epoch": 1.4971905602825495, "grad_norm": 0.7099243402481079, "learning_rate": 0.00012883633914744492, "loss": 0.3992, "step": 4663 }, { "epoch": 1.4975116391074008, "grad_norm": 0.47554677724838257, "learning_rate": 0.00012880291360147693, "loss": 0.3285, "step": 4664 }, { "epoch": 1.4978327179322524, "grad_norm": 0.7324989438056946, "learning_rate": 0.00012876948454596128, "loss": 0.6776, "step": 4665 }, { "epoch": 1.498153796757104, "grad_norm": 0.677893877029419, "learning_rate": 0.00012873605198497124, "loss": 0.5352, "step": 4666 }, { "epoch": 1.4984748755819552, "grad_norm": 0.9297817945480347, "learning_rate": 0.00012870261592258037, "loss": 0.451, "step": 4667 }, { "epoch": 1.4987959544068068, "grad_norm": 0.8155097961425781, "learning_rate": 0.00012866917636286286, "loss": 0.5778, "step": 4668 }, { "epoch": 1.4991170332316583, "grad_norm": 0.7590045928955078, "learning_rate": 0.00012863573330989313, "loss": 0.3355, "step": 4669 }, { "epoch": 1.49943811205651, "grad_norm": 0.9300277233123779, "learning_rate": 0.0001286022867677462, "loss": 0.3444, "step": 4670 }, { "epoch": 1.4997591908813614, "grad_norm": 0.7806175947189331, "learning_rate": 0.00012856883674049736, "loss": 0.4089, "step": 4671 }, { "epoch": 1.500080269706213, "grad_norm": 0.7708941102027893, "learning_rate": 0.00012853538323222242, "loss": 0.5435, "step": 4672 }, { "epoch": 1.5004013485310643, "grad_norm": 1.0800435543060303, "learning_rate": 0.0001285019262469976, "loss": 0.6181, "step": 4673 }, { "epoch": 1.5007224273559159, "grad_norm": 1.2180079221725464, "learning_rate": 0.00012846846578889955, "loss": 0.705, "step": 4674 }, { "epoch": 1.5010435061807674, "grad_norm": 1.081411600112915, "learning_rate": 0.00012843500186200527, "loss": 0.6284, "step": 4675 }, { "epoch": 1.5013645850056188, "grad_norm": 0.7021897435188293, "learning_rate": 0.00012840153447039228, "loss": 0.5129, "step": 4676 }, { "epoch": 1.5016856638304703, "grad_norm": 0.9453624486923218, "learning_rate": 0.00012836806361813844, "loss": 0.5492, "step": 4677 }, { "epoch": 1.5020067426553219, "grad_norm": 0.9042197465896606, "learning_rate": 0.00012833458930932212, "loss": 0.7457, "step": 4678 }, { "epoch": 1.5023278214801734, "grad_norm": 1.1903046369552612, "learning_rate": 0.00012830111154802203, "loss": 0.5891, "step": 4679 }, { "epoch": 1.502648900305025, "grad_norm": 0.779662013053894, "learning_rate": 0.00012826763033831735, "loss": 0.665, "step": 4680 }, { "epoch": 1.5029699791298765, "grad_norm": 0.9202207326889038, "learning_rate": 0.00012823414568428768, "loss": 0.7502, "step": 4681 }, { "epoch": 1.5032910579547278, "grad_norm": 0.9345004558563232, "learning_rate": 0.00012820065759001293, "loss": 0.6421, "step": 4682 }, { "epoch": 1.5036121367795794, "grad_norm": 0.7800152897834778, "learning_rate": 0.00012816716605957367, "loss": 0.5856, "step": 4683 }, { "epoch": 1.503933215604431, "grad_norm": 0.8760406374931335, "learning_rate": 0.00012813367109705063, "loss": 0.6065, "step": 4684 }, { "epoch": 1.5042542944292823, "grad_norm": 0.9938950538635254, "learning_rate": 0.00012810017270652513, "loss": 0.7377, "step": 4685 }, { "epoch": 1.5045753732541338, "grad_norm": 0.9110274910926819, "learning_rate": 0.0001280666708920788, "loss": 0.6981, "step": 4686 }, { "epoch": 1.5048964520789854, "grad_norm": 0.902837336063385, "learning_rate": 0.00012803316565779377, "loss": 0.6673, "step": 4687 }, { "epoch": 1.505217530903837, "grad_norm": 0.8513844013214111, "learning_rate": 0.0001279996570077525, "loss": 0.623, "step": 4688 }, { "epoch": 1.5055386097286885, "grad_norm": 0.8973796963691711, "learning_rate": 0.000127966144946038, "loss": 0.5205, "step": 4689 }, { "epoch": 1.50585968855354, "grad_norm": 0.9134697318077087, "learning_rate": 0.00012793262947673355, "loss": 0.6839, "step": 4690 }, { "epoch": 1.5061807673783913, "grad_norm": 1.0323461294174194, "learning_rate": 0.00012789911060392294, "loss": 0.8083, "step": 4691 }, { "epoch": 1.506501846203243, "grad_norm": 0.8523952960968018, "learning_rate": 0.00012786558833169031, "loss": 0.4945, "step": 4692 }, { "epoch": 1.5068229250280945, "grad_norm": 1.019515037536621, "learning_rate": 0.0001278320626641203, "loss": 0.5435, "step": 4693 }, { "epoch": 1.5071440038529458, "grad_norm": 0.8791556358337402, "learning_rate": 0.00012779853360529785, "loss": 0.6037, "step": 4694 }, { "epoch": 1.5074650826777973, "grad_norm": 0.9916840195655823, "learning_rate": 0.00012776500115930842, "loss": 0.581, "step": 4695 }, { "epoch": 1.5077861615026489, "grad_norm": 0.7988699674606323, "learning_rate": 0.00012773146533023782, "loss": 0.5452, "step": 4696 }, { "epoch": 1.5081072403275004, "grad_norm": 0.7620382308959961, "learning_rate": 0.00012769792612217224, "loss": 0.5199, "step": 4697 }, { "epoch": 1.508428319152352, "grad_norm": 1.141939401626587, "learning_rate": 0.0001276643835391984, "loss": 0.7435, "step": 4698 }, { "epoch": 1.5087493979772035, "grad_norm": 0.9198951125144958, "learning_rate": 0.00012763083758540337, "loss": 0.6203, "step": 4699 }, { "epoch": 1.5090704768020549, "grad_norm": 0.7245876789093018, "learning_rate": 0.0001275972882648746, "loss": 0.5134, "step": 4700 }, { "epoch": 1.5093915556269064, "grad_norm": 0.7252978086471558, "learning_rate": 0.0001275637355816999, "loss": 0.4891, "step": 4701 }, { "epoch": 1.5097126344517577, "grad_norm": 0.7837473750114441, "learning_rate": 0.0001275301795399677, "loss": 0.4528, "step": 4702 }, { "epoch": 1.5100337132766093, "grad_norm": 0.8873403668403625, "learning_rate": 0.00012749662014376663, "loss": 0.5449, "step": 4703 }, { "epoch": 1.5103547921014608, "grad_norm": 0.9590548872947693, "learning_rate": 0.00012746305739718577, "loss": 0.5991, "step": 4704 }, { "epoch": 1.5106758709263124, "grad_norm": 1.017732858657837, "learning_rate": 0.00012742949130431467, "loss": 0.5145, "step": 4705 }, { "epoch": 1.510996949751164, "grad_norm": 0.720311164855957, "learning_rate": 0.00012739592186924328, "loss": 0.4315, "step": 4706 }, { "epoch": 1.5113180285760155, "grad_norm": 0.7741522192955017, "learning_rate": 0.00012736234909606185, "loss": 0.529, "step": 4707 }, { "epoch": 1.511639107400867, "grad_norm": 1.115466594696045, "learning_rate": 0.00012732877298886124, "loss": 0.6952, "step": 4708 }, { "epoch": 1.5119601862257184, "grad_norm": 0.8059918880462646, "learning_rate": 0.00012729519355173254, "loss": 0.5197, "step": 4709 }, { "epoch": 1.51228126505057, "grad_norm": 1.1203827857971191, "learning_rate": 0.0001272616107887673, "loss": 0.5233, "step": 4710 }, { "epoch": 1.5126023438754213, "grad_norm": 1.13717520236969, "learning_rate": 0.00012722802470405744, "loss": 0.5846, "step": 4711 }, { "epoch": 1.5129234227002728, "grad_norm": 0.5460472106933594, "learning_rate": 0.0001271944353016954, "loss": 0.3628, "step": 4712 }, { "epoch": 1.5132445015251244, "grad_norm": 0.9577854871749878, "learning_rate": 0.00012716084258577388, "loss": 0.5651, "step": 4713 }, { "epoch": 1.513565580349976, "grad_norm": 0.5291711091995239, "learning_rate": 0.0001271272465603861, "loss": 0.3436, "step": 4714 }, { "epoch": 1.5138866591748275, "grad_norm": 0.6409555673599243, "learning_rate": 0.0001270936472296256, "loss": 0.7887, "step": 4715 }, { "epoch": 1.514207737999679, "grad_norm": 0.6114294528961182, "learning_rate": 0.00012706004459758636, "loss": 0.7388, "step": 4716 }, { "epoch": 1.5145288168245306, "grad_norm": 0.6325629353523254, "learning_rate": 0.00012702643866836278, "loss": 0.416, "step": 4717 }, { "epoch": 1.5148498956493819, "grad_norm": 0.9128963351249695, "learning_rate": 0.00012699282944604967, "loss": 0.4181, "step": 4718 }, { "epoch": 1.5151709744742334, "grad_norm": 0.7952489256858826, "learning_rate": 0.00012695921693474212, "loss": 0.2609, "step": 4719 }, { "epoch": 1.5154920532990848, "grad_norm": 0.715871274471283, "learning_rate": 0.00012692560113853574, "loss": 0.269, "step": 4720 }, { "epoch": 1.5158131321239363, "grad_norm": 0.9184644222259521, "learning_rate": 0.00012689198206152657, "loss": 0.5186, "step": 4721 }, { "epoch": 1.5161342109487879, "grad_norm": 1.1602627038955688, "learning_rate": 0.00012685835970781097, "loss": 0.736, "step": 4722 }, { "epoch": 1.5164552897736394, "grad_norm": 1.0593267679214478, "learning_rate": 0.0001268247340814857, "loss": 0.6185, "step": 4723 }, { "epoch": 1.516776368598491, "grad_norm": 0.8425109386444092, "learning_rate": 0.00012679110518664795, "loss": 0.555, "step": 4724 }, { "epoch": 1.5170974474233425, "grad_norm": 1.03610360622406, "learning_rate": 0.00012675747302739527, "loss": 0.8006, "step": 4725 }, { "epoch": 1.517418526248194, "grad_norm": 0.6738163828849792, "learning_rate": 0.00012672383760782568, "loss": 0.431, "step": 4726 }, { "epoch": 1.5177396050730454, "grad_norm": 1.9449659585952759, "learning_rate": 0.00012669019893203759, "loss": 0.658, "step": 4727 }, { "epoch": 1.518060683897897, "grad_norm": 0.9231836795806885, "learning_rate": 0.00012665655700412967, "loss": 0.6246, "step": 4728 }, { "epoch": 1.5183817627227483, "grad_norm": 0.884365975856781, "learning_rate": 0.00012662291182820114, "loss": 0.6044, "step": 4729 }, { "epoch": 1.5187028415475998, "grad_norm": 0.9164744019508362, "learning_rate": 0.00012658926340835156, "loss": 0.6805, "step": 4730 }, { "epoch": 1.5190239203724514, "grad_norm": 1.071161150932312, "learning_rate": 0.00012655561174868088, "loss": 0.6354, "step": 4731 }, { "epoch": 1.519344999197303, "grad_norm": 1.1153299808502197, "learning_rate": 0.00012652195685328946, "loss": 0.8903, "step": 4732 }, { "epoch": 1.5196660780221545, "grad_norm": 1.0313321352005005, "learning_rate": 0.00012648829872627807, "loss": 0.5328, "step": 4733 }, { "epoch": 1.519987156847006, "grad_norm": 1.025863528251648, "learning_rate": 0.00012645463737174782, "loss": 0.6995, "step": 4734 }, { "epoch": 1.5203082356718576, "grad_norm": 0.8044242858886719, "learning_rate": 0.00012642097279380027, "loss": 0.6276, "step": 4735 }, { "epoch": 1.520629314496709, "grad_norm": 1.842185139656067, "learning_rate": 0.0001263873049965373, "loss": 0.6605, "step": 4736 }, { "epoch": 1.5209503933215605, "grad_norm": 1.1269233226776123, "learning_rate": 0.00012635363398406127, "loss": 0.6219, "step": 4737 }, { "epoch": 1.5212714721464118, "grad_norm": 1.2168982028961182, "learning_rate": 0.00012631995976047488, "loss": 0.9384, "step": 4738 }, { "epoch": 1.5215925509712633, "grad_norm": 0.9263548254966736, "learning_rate": 0.0001262862823298812, "loss": 0.6786, "step": 4739 }, { "epoch": 1.5219136297961149, "grad_norm": 0.9486871361732483, "learning_rate": 0.00012625260169638378, "loss": 0.7042, "step": 4740 }, { "epoch": 1.5222347086209664, "grad_norm": 1.2256191968917847, "learning_rate": 0.00012621891786408648, "loss": 0.8557, "step": 4741 }, { "epoch": 1.522555787445818, "grad_norm": 0.9984161853790283, "learning_rate": 0.00012618523083709357, "loss": 0.6621, "step": 4742 }, { "epoch": 1.5228768662706695, "grad_norm": 1.0978655815124512, "learning_rate": 0.00012615154061950968, "loss": 0.6981, "step": 4743 }, { "epoch": 1.523197945095521, "grad_norm": 0.8551576137542725, "learning_rate": 0.00012611784721543995, "loss": 0.6271, "step": 4744 }, { "epoch": 1.5235190239203724, "grad_norm": 0.907645583152771, "learning_rate": 0.00012608415062898972, "loss": 0.6353, "step": 4745 }, { "epoch": 1.523840102745224, "grad_norm": 1.3650954961776733, "learning_rate": 0.00012605045086426487, "loss": 0.5611, "step": 4746 }, { "epoch": 1.5241611815700753, "grad_norm": 0.9021798372268677, "learning_rate": 0.00012601674792537157, "loss": 0.6619, "step": 4747 }, { "epoch": 1.5244822603949268, "grad_norm": 1.0378648042678833, "learning_rate": 0.00012598304181641646, "loss": 0.6465, "step": 4748 }, { "epoch": 1.5248033392197784, "grad_norm": 1.0701501369476318, "learning_rate": 0.00012594933254150655, "loss": 0.6018, "step": 4749 }, { "epoch": 1.52512441804463, "grad_norm": 0.9513067603111267, "learning_rate": 0.00012591562010474914, "loss": 0.5637, "step": 4750 }, { "epoch": 1.5254454968694815, "grad_norm": 0.8903157114982605, "learning_rate": 0.00012588190451025207, "loss": 0.6278, "step": 4751 }, { "epoch": 1.525766575694333, "grad_norm": 1.711920142173767, "learning_rate": 0.00012584818576212345, "loss": 0.6439, "step": 4752 }, { "epoch": 1.5260876545191846, "grad_norm": 1.3381930589675903, "learning_rate": 0.0001258144638644718, "loss": 0.6182, "step": 4753 }, { "epoch": 1.526408733344036, "grad_norm": 0.8519673347473145, "learning_rate": 0.000125780738821406, "loss": 0.5031, "step": 4754 }, { "epoch": 1.5267298121688875, "grad_norm": 1.123969554901123, "learning_rate": 0.0001257470106370354, "loss": 0.702, "step": 4755 }, { "epoch": 1.5270508909937388, "grad_norm": 0.7065092921257019, "learning_rate": 0.00012571327931546963, "loss": 0.438, "step": 4756 }, { "epoch": 1.5273719698185904, "grad_norm": 1.5530750751495361, "learning_rate": 0.00012567954486081878, "loss": 0.5497, "step": 4757 }, { "epoch": 1.527693048643442, "grad_norm": 1.869837760925293, "learning_rate": 0.0001256458072771933, "loss": 0.5909, "step": 4758 }, { "epoch": 1.5280141274682935, "grad_norm": 0.8413248658180237, "learning_rate": 0.00012561206656870396, "loss": 0.5934, "step": 4759 }, { "epoch": 1.528335206293145, "grad_norm": 0.7256144285202026, "learning_rate": 0.00012557832273946205, "loss": 0.4381, "step": 4760 }, { "epoch": 1.5286562851179966, "grad_norm": 1.1708571910858154, "learning_rate": 0.00012554457579357905, "loss": 0.5908, "step": 4761 }, { "epoch": 1.5289773639428481, "grad_norm": 0.7625494003295898, "learning_rate": 0.00012551082573516705, "loss": 0.4367, "step": 4762 }, { "epoch": 1.5292984427676994, "grad_norm": 0.81993567943573, "learning_rate": 0.00012547707256833823, "loss": 0.5166, "step": 4763 }, { "epoch": 1.529619521592551, "grad_norm": 0.6082926392555237, "learning_rate": 0.00012544331629720543, "loss": 0.4116, "step": 4764 }, { "epoch": 1.5299406004174023, "grad_norm": 0.818605899810791, "learning_rate": 0.00012540955692588173, "loss": 0.6703, "step": 4765 }, { "epoch": 1.5302616792422539, "grad_norm": 0.6201203465461731, "learning_rate": 0.00012537579445848058, "loss": 1.0005, "step": 4766 }, { "epoch": 1.5305827580671054, "grad_norm": 0.7237659692764282, "learning_rate": 0.00012534202889911584, "loss": 0.6653, "step": 4767 }, { "epoch": 1.530903836891957, "grad_norm": 0.7227284908294678, "learning_rate": 0.00012530826025190174, "loss": 0.5275, "step": 4768 }, { "epoch": 1.5312249157168085, "grad_norm": 0.7782474160194397, "learning_rate": 0.00012527448852095295, "loss": 0.4055, "step": 4769 }, { "epoch": 1.53154599454166, "grad_norm": 0.8507752418518066, "learning_rate": 0.00012524071371038434, "loss": 0.2621, "step": 4770 }, { "epoch": 1.5318670733665116, "grad_norm": 0.8687464594841003, "learning_rate": 0.0001252069358243114, "loss": 0.3743, "step": 4771 }, { "epoch": 1.532188152191363, "grad_norm": 0.7505998015403748, "learning_rate": 0.00012517315486684972, "loss": 0.4383, "step": 4772 }, { "epoch": 1.5325092310162145, "grad_norm": 0.9363805055618286, "learning_rate": 0.0001251393708421155, "loss": 0.7721, "step": 4773 }, { "epoch": 1.5328303098410658, "grad_norm": 0.9391258955001831, "learning_rate": 0.00012510558375422522, "loss": 0.7531, "step": 4774 }, { "epoch": 1.5331513886659174, "grad_norm": 0.9769285917282104, "learning_rate": 0.0001250717936072957, "loss": 0.8322, "step": 4775 }, { "epoch": 1.533472467490769, "grad_norm": 0.670604944229126, "learning_rate": 0.00012503800040544416, "loss": 0.4766, "step": 4776 }, { "epoch": 1.5337935463156205, "grad_norm": 0.7332351803779602, "learning_rate": 0.00012500420415278822, "loss": 0.6418, "step": 4777 }, { "epoch": 1.534114625140472, "grad_norm": 0.7202760577201843, "learning_rate": 0.00012497040485344584, "loss": 0.5964, "step": 4778 }, { "epoch": 1.5344357039653236, "grad_norm": 0.9445421695709229, "learning_rate": 0.0001249366025115354, "loss": 0.6184, "step": 4779 }, { "epoch": 1.5347567827901751, "grad_norm": 0.7218775749206543, "learning_rate": 0.0001249027971311756, "loss": 0.5217, "step": 4780 }, { "epoch": 1.5350778616150265, "grad_norm": 0.7306594252586365, "learning_rate": 0.0001248689887164855, "loss": 0.5849, "step": 4781 }, { "epoch": 1.535398940439878, "grad_norm": 0.942954421043396, "learning_rate": 0.00012483517727158454, "loss": 0.6694, "step": 4782 }, { "epoch": 1.5357200192647293, "grad_norm": 0.7879648208618164, "learning_rate": 0.00012480136280059256, "loss": 0.5698, "step": 4783 }, { "epoch": 1.536041098089581, "grad_norm": 0.835502028465271, "learning_rate": 0.00012476754530762977, "loss": 0.63, "step": 4784 }, { "epoch": 1.5363621769144324, "grad_norm": 0.9831227660179138, "learning_rate": 0.00012473372479681672, "loss": 0.7113, "step": 4785 }, { "epoch": 1.536683255739284, "grad_norm": 0.9741390347480774, "learning_rate": 0.00012469990127227432, "loss": 0.6879, "step": 4786 }, { "epoch": 1.5370043345641355, "grad_norm": 0.7839673757553101, "learning_rate": 0.00012466607473812387, "loss": 0.5085, "step": 4787 }, { "epoch": 1.537325413388987, "grad_norm": 0.9261371493339539, "learning_rate": 0.000124632245198487, "loss": 0.6465, "step": 4788 }, { "epoch": 1.5376464922138386, "grad_norm": 1.3706035614013672, "learning_rate": 0.0001245984126574858, "loss": 0.8686, "step": 4789 }, { "epoch": 1.53796757103869, "grad_norm": 0.6611718535423279, "learning_rate": 0.00012456457711924266, "loss": 0.3897, "step": 4790 }, { "epoch": 1.5382886498635415, "grad_norm": 0.9132871031761169, "learning_rate": 0.00012453073858788026, "loss": 0.6996, "step": 4791 }, { "epoch": 1.5386097286883929, "grad_norm": 0.9300896525382996, "learning_rate": 0.00012449689706752178, "loss": 0.7055, "step": 4792 }, { "epoch": 1.5389308075132444, "grad_norm": 0.7551592588424683, "learning_rate": 0.00012446305256229073, "loss": 0.6199, "step": 4793 }, { "epoch": 1.539251886338096, "grad_norm": 0.9444193243980408, "learning_rate": 0.00012442920507631092, "loss": 0.6397, "step": 4794 }, { "epoch": 1.5395729651629475, "grad_norm": 0.8001585006713867, "learning_rate": 0.00012439535461370658, "loss": 0.4635, "step": 4795 }, { "epoch": 1.539894043987799, "grad_norm": 0.9087579250335693, "learning_rate": 0.00012436150117860225, "loss": 0.5755, "step": 4796 }, { "epoch": 1.5402151228126506, "grad_norm": 0.9415332674980164, "learning_rate": 0.00012432764477512292, "loss": 0.6303, "step": 4797 }, { "epoch": 1.5405362016375022, "grad_norm": 0.8173394203186035, "learning_rate": 0.00012429378540739386, "loss": 0.5832, "step": 4798 }, { "epoch": 1.5408572804623535, "grad_norm": 1.0621390342712402, "learning_rate": 0.00012425992307954075, "loss": 0.6614, "step": 4799 }, { "epoch": 1.541178359287205, "grad_norm": 1.3637477159500122, "learning_rate": 0.0001242260577956896, "loss": 0.6073, "step": 4800 }, { "epoch": 1.5414994381120564, "grad_norm": 1.0318750143051147, "learning_rate": 0.00012419218955996676, "loss": 0.6623, "step": 4801 }, { "epoch": 1.541820516936908, "grad_norm": 1.1521292924880981, "learning_rate": 0.00012415831837649905, "loss": 0.5285, "step": 4802 }, { "epoch": 1.5421415957617595, "grad_norm": 1.148503065109253, "learning_rate": 0.0001241244442494135, "loss": 0.5444, "step": 4803 }, { "epoch": 1.542462674586611, "grad_norm": 0.9957001805305481, "learning_rate": 0.0001240905671828376, "loss": 0.5206, "step": 4804 }, { "epoch": 1.5427837534114626, "grad_norm": 0.6883019804954529, "learning_rate": 0.00012405668718089917, "loss": 0.4488, "step": 4805 }, { "epoch": 1.5431048322363141, "grad_norm": 1.0081177949905396, "learning_rate": 0.00012402280424772639, "loss": 0.6462, "step": 4806 }, { "epoch": 1.5434259110611657, "grad_norm": 0.8516654372215271, "learning_rate": 0.00012398891838744778, "loss": 0.5383, "step": 4807 }, { "epoch": 1.543746989886017, "grad_norm": 0.8348969221115112, "learning_rate": 0.0001239550296041922, "loss": 0.484, "step": 4808 }, { "epoch": 1.5440680687108685, "grad_norm": 1.3261665105819702, "learning_rate": 0.00012392113790208895, "loss": 0.5407, "step": 4809 }, { "epoch": 1.5443891475357199, "grad_norm": 1.114880919456482, "learning_rate": 0.0001238872432852676, "loss": 0.7105, "step": 4810 }, { "epoch": 1.5447102263605714, "grad_norm": 0.8749669790267944, "learning_rate": 0.0001238533457578581, "loss": 0.4329, "step": 4811 }, { "epoch": 1.545031305185423, "grad_norm": 0.7413171529769897, "learning_rate": 0.0001238194453239908, "loss": 0.3563, "step": 4812 }, { "epoch": 1.5453523840102745, "grad_norm": 0.7285036444664001, "learning_rate": 0.0001237855419877963, "loss": 0.4244, "step": 4813 }, { "epoch": 1.545673462835126, "grad_norm": 0.6822255253791809, "learning_rate": 0.0001237516357534057, "loss": 0.3974, "step": 4814 }, { "epoch": 1.5459945416599776, "grad_norm": 0.44284263253211975, "learning_rate": 0.0001237177266249503, "loss": 0.6239, "step": 4815 }, { "epoch": 1.5463156204848292, "grad_norm": 0.6735876798629761, "learning_rate": 0.0001236838146065619, "loss": 1.0601, "step": 4816 }, { "epoch": 1.5466366993096805, "grad_norm": 0.6792389154434204, "learning_rate": 0.00012364989970237248, "loss": 0.6631, "step": 4817 }, { "epoch": 1.546957778134532, "grad_norm": 0.8029338717460632, "learning_rate": 0.00012361598191651453, "loss": 0.446, "step": 4818 }, { "epoch": 1.5472788569593834, "grad_norm": 0.5691446661949158, "learning_rate": 0.00012358206125312083, "loss": 0.2494, "step": 4819 }, { "epoch": 1.547599935784235, "grad_norm": 0.7446752786636353, "learning_rate": 0.00012354813771632447, "loss": 0.3416, "step": 4820 }, { "epoch": 1.5479210146090865, "grad_norm": 0.5998992323875427, "learning_rate": 0.000123514211310259, "loss": 0.2877, "step": 4821 }, { "epoch": 1.548242093433938, "grad_norm": 0.9401177763938904, "learning_rate": 0.00012348028203905818, "loss": 0.7846, "step": 4822 }, { "epoch": 1.5485631722587896, "grad_norm": 0.9916895627975464, "learning_rate": 0.00012344634990685624, "loss": 0.8004, "step": 4823 }, { "epoch": 1.5488842510836411, "grad_norm": 0.8018080592155457, "learning_rate": 0.0001234124149177877, "loss": 0.6483, "step": 4824 }, { "epoch": 1.5492053299084927, "grad_norm": 0.9374871850013733, "learning_rate": 0.00012337847707598738, "loss": 0.7748, "step": 4825 }, { "epoch": 1.549526408733344, "grad_norm": 0.975395917892456, "learning_rate": 0.00012334453638559057, "loss": 0.6303, "step": 4826 }, { "epoch": 1.5498474875581956, "grad_norm": 1.0059309005737305, "learning_rate": 0.0001233105928507328, "loss": 0.4923, "step": 4827 }, { "epoch": 1.550168566383047, "grad_norm": 0.9571434259414673, "learning_rate": 0.00012327664647554998, "loss": 0.7088, "step": 4828 }, { "epoch": 1.5504896452078984, "grad_norm": 0.9377606511116028, "learning_rate": 0.0001232426972641784, "loss": 0.7066, "step": 4829 }, { "epoch": 1.55081072403275, "grad_norm": 0.748374342918396, "learning_rate": 0.00012320874522075468, "loss": 0.4916, "step": 4830 }, { "epoch": 1.5511318028576015, "grad_norm": 0.8802591562271118, "learning_rate": 0.00012317479034941573, "loss": 0.5684, "step": 4831 }, { "epoch": 1.551452881682453, "grad_norm": 0.7750247120857239, "learning_rate": 0.0001231408326542989, "loss": 0.5093, "step": 4832 }, { "epoch": 1.5517739605073047, "grad_norm": 0.7303853034973145, "learning_rate": 0.00012310687213954182, "loss": 0.582, "step": 4833 }, { "epoch": 1.5520950393321562, "grad_norm": 0.9113678932189941, "learning_rate": 0.0001230729088092824, "loss": 0.5847, "step": 4834 }, { "epoch": 1.5524161181570075, "grad_norm": 0.9570289254188538, "learning_rate": 0.00012303894266765908, "loss": 0.622, "step": 4835 }, { "epoch": 1.552737196981859, "grad_norm": 0.9025993347167969, "learning_rate": 0.00012300497371881046, "loss": 0.5098, "step": 4836 }, { "epoch": 1.5530582758067104, "grad_norm": 1.2178800106048584, "learning_rate": 0.00012297100196687557, "loss": 0.6402, "step": 4837 }, { "epoch": 1.553379354631562, "grad_norm": 0.9967452883720398, "learning_rate": 0.00012293702741599378, "loss": 0.6723, "step": 4838 }, { "epoch": 1.5537004334564135, "grad_norm": 0.7148991823196411, "learning_rate": 0.00012290305007030478, "loss": 0.5875, "step": 4839 }, { "epoch": 1.554021512281265, "grad_norm": 1.1900482177734375, "learning_rate": 0.00012286906993394856, "loss": 0.6827, "step": 4840 }, { "epoch": 1.5543425911061166, "grad_norm": 1.2599159479141235, "learning_rate": 0.00012283508701106557, "loss": 0.8436, "step": 4841 }, { "epoch": 1.5546636699309682, "grad_norm": 1.0731333494186401, "learning_rate": 0.0001228011013057965, "loss": 0.7042, "step": 4842 }, { "epoch": 1.5549847487558197, "grad_norm": 0.7968559861183167, "learning_rate": 0.0001227671128222824, "loss": 0.5391, "step": 4843 }, { "epoch": 1.555305827580671, "grad_norm": 1.2837064266204834, "learning_rate": 0.00012273312156466464, "loss": 0.8585, "step": 4844 }, { "epoch": 1.5556269064055226, "grad_norm": 0.7288888692855835, "learning_rate": 0.00012269912753708502, "loss": 0.5193, "step": 4845 }, { "epoch": 1.555947985230374, "grad_norm": 0.8178963661193848, "learning_rate": 0.0001226651307436855, "loss": 0.5001, "step": 4846 }, { "epoch": 1.5562690640552255, "grad_norm": 1.0722037553787231, "learning_rate": 0.0001226311311886086, "loss": 0.5924, "step": 4847 }, { "epoch": 1.556590142880077, "grad_norm": 0.9165698885917664, "learning_rate": 0.00012259712887599698, "loss": 0.5656, "step": 4848 }, { "epoch": 1.5569112217049286, "grad_norm": 1.0873520374298096, "learning_rate": 0.00012256312380999376, "loss": 0.7788, "step": 4849 }, { "epoch": 1.5572323005297801, "grad_norm": 1.1277052164077759, "learning_rate": 0.00012252911599474237, "loss": 0.6183, "step": 4850 }, { "epoch": 1.5575533793546317, "grad_norm": 0.8544272780418396, "learning_rate": 0.0001224951054343865, "loss": 0.5859, "step": 4851 }, { "epoch": 1.5578744581794832, "grad_norm": 1.0256620645523071, "learning_rate": 0.0001224610921330703, "loss": 0.663, "step": 4852 }, { "epoch": 1.5581955370043346, "grad_norm": 0.9969653487205505, "learning_rate": 0.00012242707609493814, "loss": 0.6084, "step": 4853 }, { "epoch": 1.558516615829186, "grad_norm": 0.8547719717025757, "learning_rate": 0.00012239305732413477, "loss": 0.5886, "step": 4854 }, { "epoch": 1.5588376946540374, "grad_norm": 1.0089030265808105, "learning_rate": 0.0001223590358248053, "loss": 0.6111, "step": 4855 }, { "epoch": 1.559158773478889, "grad_norm": 1.0773385763168335, "learning_rate": 0.00012232501160109514, "loss": 0.5771, "step": 4856 }, { "epoch": 1.5594798523037405, "grad_norm": 0.8894745707511902, "learning_rate": 0.00012229098465715006, "loss": 0.5668, "step": 4857 }, { "epoch": 1.559800931128592, "grad_norm": 0.9482760429382324, "learning_rate": 0.00012225695499711606, "loss": 0.4387, "step": 4858 }, { "epoch": 1.5601220099534436, "grad_norm": 1.192726969718933, "learning_rate": 0.00012222292262513965, "loss": 0.5511, "step": 4859 }, { "epoch": 1.5604430887782952, "grad_norm": 0.9910078644752502, "learning_rate": 0.0001221888875453675, "loss": 0.6603, "step": 4860 }, { "epoch": 1.5607641676031467, "grad_norm": 1.0606755018234253, "learning_rate": 0.00012215484976194676, "loss": 0.534, "step": 4861 }, { "epoch": 1.561085246427998, "grad_norm": 1.2194321155548096, "learning_rate": 0.00012212080927902474, "loss": 0.4939, "step": 4862 }, { "epoch": 1.5614063252528496, "grad_norm": 0.4965793490409851, "learning_rate": 0.00012208676610074917, "loss": 0.3877, "step": 4863 }, { "epoch": 1.561727404077701, "grad_norm": 0.7706876993179321, "learning_rate": 0.00012205272023126821, "loss": 0.4616, "step": 4864 }, { "epoch": 1.5620484829025525, "grad_norm": 0.7944642305374146, "learning_rate": 0.00012201867167473015, "loss": 0.9236, "step": 4865 }, { "epoch": 1.562369561727404, "grad_norm": 0.7954860329627991, "learning_rate": 0.00012198462043528376, "loss": 0.9766, "step": 4866 }, { "epoch": 1.5626906405522556, "grad_norm": 0.6089110970497131, "learning_rate": 0.00012195056651707806, "loss": 0.7265, "step": 4867 }, { "epoch": 1.5630117193771071, "grad_norm": 0.6708793640136719, "learning_rate": 0.00012191650992426238, "loss": 0.3664, "step": 4868 }, { "epoch": 1.5633327982019587, "grad_norm": 1.029671549797058, "learning_rate": 0.00012188245066098647, "loss": 0.4111, "step": 4869 }, { "epoch": 1.5636538770268102, "grad_norm": 0.8126581311225891, "learning_rate": 0.00012184838873140032, "loss": 0.3168, "step": 4870 }, { "epoch": 1.5639749558516616, "grad_norm": 0.7029819488525391, "learning_rate": 0.00012181432413965428, "loss": 0.4029, "step": 4871 }, { "epoch": 1.5642960346765131, "grad_norm": 0.7372133135795593, "learning_rate": 0.000121780256889899, "loss": 0.7746, "step": 4872 }, { "epoch": 1.5646171135013645, "grad_norm": 0.8719139099121094, "learning_rate": 0.00012174618698628549, "loss": 0.6874, "step": 4873 }, { "epoch": 1.564938192326216, "grad_norm": 0.8879600167274475, "learning_rate": 0.00012171211443296505, "loss": 0.6931, "step": 4874 }, { "epoch": 1.5652592711510676, "grad_norm": 0.8598337769508362, "learning_rate": 0.00012167803923408934, "loss": 0.5866, "step": 4875 }, { "epoch": 1.565580349975919, "grad_norm": 0.6961860656738281, "learning_rate": 0.00012164396139381029, "loss": 0.4646, "step": 4876 }, { "epoch": 1.5659014288007707, "grad_norm": 0.7586543560028076, "learning_rate": 0.00012160988091628022, "loss": 0.5582, "step": 4877 }, { "epoch": 1.5662225076256222, "grad_norm": 0.82035231590271, "learning_rate": 0.00012157579780565169, "loss": 0.5765, "step": 4878 }, { "epoch": 1.5665435864504738, "grad_norm": 0.6588397026062012, "learning_rate": 0.00012154171206607764, "loss": 0.4265, "step": 4879 }, { "epoch": 1.566864665275325, "grad_norm": 0.7135642766952515, "learning_rate": 0.00012150762370171136, "loss": 0.4743, "step": 4880 }, { "epoch": 1.5671857441001766, "grad_norm": 0.7233707308769226, "learning_rate": 0.00012147353271670634, "loss": 0.474, "step": 4881 }, { "epoch": 1.567506822925028, "grad_norm": 0.773969829082489, "learning_rate": 0.00012143943911521647, "loss": 0.5916, "step": 4882 }, { "epoch": 1.5678279017498795, "grad_norm": 0.9881671667098999, "learning_rate": 0.000121405342901396, "loss": 0.6945, "step": 4883 }, { "epoch": 1.568148980574731, "grad_norm": 1.0888925790786743, "learning_rate": 0.00012137124407939943, "loss": 0.5791, "step": 4884 }, { "epoch": 1.5684700593995826, "grad_norm": 1.0212979316711426, "learning_rate": 0.00012133714265338161, "loss": 0.7161, "step": 4885 }, { "epoch": 1.5687911382244342, "grad_norm": 0.8693848848342896, "learning_rate": 0.00012130303862749767, "loss": 0.6095, "step": 4886 }, { "epoch": 1.5691122170492857, "grad_norm": 1.1114261150360107, "learning_rate": 0.00012126893200590308, "loss": 0.6328, "step": 4887 }, { "epoch": 1.569433295874137, "grad_norm": 0.9835912585258484, "learning_rate": 0.00012123482279275365, "loss": 0.6541, "step": 4888 }, { "epoch": 1.5697543746989886, "grad_norm": 0.8264690041542053, "learning_rate": 0.00012120071099220549, "loss": 0.4623, "step": 4889 }, { "epoch": 1.5700754535238401, "grad_norm": 0.8061067461967468, "learning_rate": 0.00012116659660841499, "loss": 0.524, "step": 4890 }, { "epoch": 1.5703965323486915, "grad_norm": 0.846877932548523, "learning_rate": 0.00012113247964553888, "loss": 0.6132, "step": 4891 }, { "epoch": 1.570717611173543, "grad_norm": 1.043820858001709, "learning_rate": 0.00012109836010773424, "loss": 0.6175, "step": 4892 }, { "epoch": 1.5710386899983946, "grad_norm": 1.267200231552124, "learning_rate": 0.0001210642379991584, "loss": 0.6943, "step": 4893 }, { "epoch": 1.5713597688232461, "grad_norm": 1.0244359970092773, "learning_rate": 0.00012103011332396908, "loss": 0.6472, "step": 4894 }, { "epoch": 1.5716808476480977, "grad_norm": 1.727343201637268, "learning_rate": 0.00012099598608632428, "loss": 0.5938, "step": 4895 }, { "epoch": 1.5720019264729492, "grad_norm": 0.8495163917541504, "learning_rate": 0.0001209618562903822, "loss": 0.5524, "step": 4896 }, { "epoch": 1.5723230052978006, "grad_norm": 1.0026789903640747, "learning_rate": 0.00012092772394030152, "loss": 0.5923, "step": 4897 }, { "epoch": 1.572644084122652, "grad_norm": 1.362273097038269, "learning_rate": 0.00012089358904024117, "loss": 0.7854, "step": 4898 }, { "epoch": 1.5729651629475037, "grad_norm": 0.79168701171875, "learning_rate": 0.00012085945159436038, "loss": 0.5297, "step": 4899 }, { "epoch": 1.573286241772355, "grad_norm": 1.0517579317092896, "learning_rate": 0.00012082531160681869, "loss": 0.5276, "step": 4900 }, { "epoch": 1.5736073205972065, "grad_norm": 1.1384689807891846, "learning_rate": 0.00012079116908177593, "loss": 0.4365, "step": 4901 }, { "epoch": 1.573928399422058, "grad_norm": 0.916374683380127, "learning_rate": 0.00012075702402339231, "loss": 0.7095, "step": 4902 }, { "epoch": 1.5742494782469096, "grad_norm": 0.8095363974571228, "learning_rate": 0.00012072287643582825, "loss": 0.5287, "step": 4903 }, { "epoch": 1.5745705570717612, "grad_norm": 0.7615143060684204, "learning_rate": 0.0001206887263232446, "loss": 0.5034, "step": 4904 }, { "epoch": 1.5748916358966127, "grad_norm": 0.9699037671089172, "learning_rate": 0.00012065457368980236, "loss": 0.4912, "step": 4905 }, { "epoch": 1.575212714721464, "grad_norm": 1.3205819129943848, "learning_rate": 0.00012062041853966298, "loss": 0.7739, "step": 4906 }, { "epoch": 1.5755337935463156, "grad_norm": 1.0966612100601196, "learning_rate": 0.00012058626087698814, "loss": 0.5963, "step": 4907 }, { "epoch": 1.5758548723711672, "grad_norm": 1.8393914699554443, "learning_rate": 0.00012055210070593988, "loss": 0.5436, "step": 4908 }, { "epoch": 1.5761759511960185, "grad_norm": 1.010202407836914, "learning_rate": 0.00012051793803068046, "loss": 0.5538, "step": 4909 }, { "epoch": 1.57649703002087, "grad_norm": 0.9576059579849243, "learning_rate": 0.00012048377285537256, "loss": 0.4986, "step": 4910 }, { "epoch": 1.5768181088457216, "grad_norm": 1.2450673580169678, "learning_rate": 0.00012044960518417903, "loss": 0.6183, "step": 4911 }, { "epoch": 1.5771391876705732, "grad_norm": 0.9183635115623474, "learning_rate": 0.00012041543502126318, "loss": 0.5388, "step": 4912 }, { "epoch": 1.5774602664954247, "grad_norm": 0.63742595911026, "learning_rate": 0.0001203812623707885, "loss": 0.3583, "step": 4913 }, { "epoch": 1.5777813453202763, "grad_norm": 0.7811853885650635, "learning_rate": 0.00012034708723691881, "loss": 0.4776, "step": 4914 }, { "epoch": 1.5781024241451276, "grad_norm": 0.6186936497688293, "learning_rate": 0.00012031290962381823, "loss": 0.5344, "step": 4915 }, { "epoch": 1.5784235029699791, "grad_norm": 0.5814709663391113, "learning_rate": 0.00012027872953565125, "loss": 0.766, "step": 4916 }, { "epoch": 1.5787445817948307, "grad_norm": 0.767975926399231, "learning_rate": 0.00012024454697658261, "loss": 0.4243, "step": 4917 }, { "epoch": 1.579065660619682, "grad_norm": 0.8333182334899902, "learning_rate": 0.00012021036195077731, "loss": 0.4496, "step": 4918 }, { "epoch": 1.5793867394445336, "grad_norm": 0.9080848693847656, "learning_rate": 0.0001201761744624007, "loss": 0.2366, "step": 4919 }, { "epoch": 1.5797078182693851, "grad_norm": 0.7227024435997009, "learning_rate": 0.00012014198451561842, "loss": 0.2954, "step": 4920 }, { "epoch": 1.5800288970942367, "grad_norm": 0.7397527694702148, "learning_rate": 0.00012010779211459648, "loss": 0.2761, "step": 4921 }, { "epoch": 1.5803499759190882, "grad_norm": 0.753508448600769, "learning_rate": 0.00012007359726350105, "loss": 0.4536, "step": 4922 }, { "epoch": 1.5806710547439398, "grad_norm": 1.0282949209213257, "learning_rate": 0.00012003939996649865, "loss": 0.7643, "step": 4923 }, { "epoch": 1.580992133568791, "grad_norm": 0.9250288605690002, "learning_rate": 0.00012000520022775617, "loss": 0.7326, "step": 4924 }, { "epoch": 1.5813132123936426, "grad_norm": 0.898415207862854, "learning_rate": 0.00011997099805144069, "loss": 0.6032, "step": 4925 }, { "epoch": 1.5816342912184942, "grad_norm": 0.9478784799575806, "learning_rate": 0.00011993679344171973, "loss": 0.6384, "step": 4926 }, { "epoch": 1.5819553700433455, "grad_norm": 1.0748721361160278, "learning_rate": 0.00011990258640276094, "loss": 0.3729, "step": 4927 }, { "epoch": 1.582276448868197, "grad_norm": 0.7065726518630981, "learning_rate": 0.00011986837693873237, "loss": 0.5082, "step": 4928 }, { "epoch": 1.5825975276930486, "grad_norm": 0.789629340171814, "learning_rate": 0.00011983416505380234, "loss": 0.5867, "step": 4929 }, { "epoch": 1.5829186065179002, "grad_norm": 0.8611066341400146, "learning_rate": 0.00011979995075213946, "loss": 0.6781, "step": 4930 }, { "epoch": 1.5832396853427517, "grad_norm": 0.8396785259246826, "learning_rate": 0.00011976573403791262, "loss": 0.6383, "step": 4931 }, { "epoch": 1.5835607641676033, "grad_norm": 0.779766321182251, "learning_rate": 0.00011973151491529107, "loss": 0.6317, "step": 4932 }, { "epoch": 1.5838818429924546, "grad_norm": 0.8786318898200989, "learning_rate": 0.00011969729338844429, "loss": 0.683, "step": 4933 }, { "epoch": 1.5842029218173062, "grad_norm": 1.0004550218582153, "learning_rate": 0.000119663069461542, "loss": 0.6376, "step": 4934 }, { "epoch": 1.5845240006421577, "grad_norm": 0.8501437306404114, "learning_rate": 0.0001196288431387544, "loss": 0.6203, "step": 4935 }, { "epoch": 1.584845079467009, "grad_norm": 1.1006617546081543, "learning_rate": 0.00011959461442425177, "loss": 0.8655, "step": 4936 }, { "epoch": 1.5851661582918606, "grad_norm": 1.571334719657898, "learning_rate": 0.00011956038332220483, "loss": 0.5745, "step": 4937 }, { "epoch": 1.5854872371167121, "grad_norm": 0.8604090213775635, "learning_rate": 0.00011952614983678452, "loss": 0.5597, "step": 4938 }, { "epoch": 1.5858083159415637, "grad_norm": 0.9236046671867371, "learning_rate": 0.00011949191397216206, "loss": 0.5838, "step": 4939 }, { "epoch": 1.5861293947664152, "grad_norm": 0.8020102381706238, "learning_rate": 0.00011945767573250903, "loss": 0.5327, "step": 4940 }, { "epoch": 1.5864504735912668, "grad_norm": 1.1067618131637573, "learning_rate": 0.0001194234351219972, "loss": 0.5908, "step": 4941 }, { "epoch": 1.5867715524161181, "grad_norm": 1.1690189838409424, "learning_rate": 0.00011938919214479876, "loss": 0.6194, "step": 4942 }, { "epoch": 1.5870926312409697, "grad_norm": 0.883562445640564, "learning_rate": 0.00011935494680508606, "loss": 0.487, "step": 4943 }, { "epoch": 1.587413710065821, "grad_norm": 0.8721025586128235, "learning_rate": 0.00011932069910703176, "loss": 0.4942, "step": 4944 }, { "epoch": 1.5877347888906725, "grad_norm": 1.2519181966781616, "learning_rate": 0.0001192864490548089, "loss": 0.6544, "step": 4945 }, { "epoch": 1.588055867715524, "grad_norm": 0.8696319460868835, "learning_rate": 0.00011925219665259075, "loss": 0.5955, "step": 4946 }, { "epoch": 1.5883769465403756, "grad_norm": 0.7267146706581116, "learning_rate": 0.00011921794190455082, "loss": 0.5205, "step": 4947 }, { "epoch": 1.5886980253652272, "grad_norm": 0.9043694138526917, "learning_rate": 0.00011918368481486297, "loss": 0.6211, "step": 4948 }, { "epoch": 1.5890191041900787, "grad_norm": 0.7768150568008423, "learning_rate": 0.00011914942538770131, "loss": 0.5542, "step": 4949 }, { "epoch": 1.5893401830149303, "grad_norm": 1.070797324180603, "learning_rate": 0.00011911516362724024, "loss": 0.5892, "step": 4950 }, { "epoch": 1.5896612618397816, "grad_norm": 0.8269063234329224, "learning_rate": 0.00011908089953765449, "loss": 0.5293, "step": 4951 }, { "epoch": 1.5899823406646332, "grad_norm": 0.8718098402023315, "learning_rate": 0.00011904663312311901, "loss": 0.5791, "step": 4952 }, { "epoch": 1.5903034194894845, "grad_norm": 0.6878900527954102, "learning_rate": 0.00011901236438780902, "loss": 0.3673, "step": 4953 }, { "epoch": 1.590624498314336, "grad_norm": 0.8858333230018616, "learning_rate": 0.00011897809333590014, "loss": 0.5325, "step": 4954 }, { "epoch": 1.5909455771391876, "grad_norm": 0.894816517829895, "learning_rate": 0.00011894381997156813, "loss": 0.4611, "step": 4955 }, { "epoch": 1.5912666559640392, "grad_norm": 1.1014503240585327, "learning_rate": 0.00011890954429898912, "loss": 0.6538, "step": 4956 }, { "epoch": 1.5915877347888907, "grad_norm": 0.9481959342956543, "learning_rate": 0.00011887526632233954, "loss": 0.5716, "step": 4957 }, { "epoch": 1.5919088136137423, "grad_norm": 1.010277271270752, "learning_rate": 0.00011884098604579597, "loss": 0.7025, "step": 4958 }, { "epoch": 1.5922298924385938, "grad_norm": 0.8406404256820679, "learning_rate": 0.00011880670347353539, "loss": 0.4381, "step": 4959 }, { "epoch": 1.5925509712634451, "grad_norm": 0.948979914188385, "learning_rate": 0.00011877241860973507, "loss": 0.5589, "step": 4960 }, { "epoch": 1.5928720500882967, "grad_norm": 0.9533628821372986, "learning_rate": 0.00011873813145857249, "loss": 0.5339, "step": 4961 }, { "epoch": 1.593193128913148, "grad_norm": 1.116711974143982, "learning_rate": 0.0001187038420242254, "loss": 0.4653, "step": 4962 }, { "epoch": 1.5935142077379996, "grad_norm": 0.6900465488433838, "learning_rate": 0.0001186695503108719, "loss": 0.4061, "step": 4963 }, { "epoch": 1.5938352865628511, "grad_norm": 0.548774003982544, "learning_rate": 0.00011863525632269032, "loss": 0.3729, "step": 4964 }, { "epoch": 1.5941563653877027, "grad_norm": 0.519191324710846, "learning_rate": 0.0001186009600638593, "loss": 0.5593, "step": 4965 }, { "epoch": 1.5944774442125542, "grad_norm": 0.5707334280014038, "learning_rate": 0.00011856666153855776, "loss": 0.4427, "step": 4966 }, { "epoch": 1.5947985230374058, "grad_norm": 0.5691962242126465, "learning_rate": 0.00011853236075096474, "loss": 0.264, "step": 4967 }, { "epoch": 1.5951196018622573, "grad_norm": 0.8893996477127075, "learning_rate": 0.00011849805770525983, "loss": 0.4208, "step": 4968 }, { "epoch": 1.5954406806871086, "grad_norm": 1.1631325483322144, "learning_rate": 0.0001184637524056227, "loss": 0.2257, "step": 4969 }, { "epoch": 1.5957617595119602, "grad_norm": 0.7988505363464355, "learning_rate": 0.00011842944485623335, "loss": 0.2805, "step": 4970 }, { "epoch": 1.5960828383368115, "grad_norm": 0.6999688148498535, "learning_rate": 0.00011839513506127203, "loss": 0.3814, "step": 4971 }, { "epoch": 1.596403917161663, "grad_norm": 0.7029821276664734, "learning_rate": 0.0001183608230249193, "loss": 0.5627, "step": 4972 }, { "epoch": 1.5967249959865146, "grad_norm": 0.8678010702133179, "learning_rate": 0.00011832650875135598, "loss": 0.7244, "step": 4973 }, { "epoch": 1.5970460748113662, "grad_norm": 0.8419655561447144, "learning_rate": 0.00011829219224476318, "loss": 0.6425, "step": 4974 }, { "epoch": 1.5973671536362177, "grad_norm": 0.9832744598388672, "learning_rate": 0.00011825787350932222, "loss": 0.7158, "step": 4975 }, { "epoch": 1.5976882324610693, "grad_norm": 0.9112509489059448, "learning_rate": 0.00011822355254921478, "loss": 0.5684, "step": 4976 }, { "epoch": 1.5980093112859208, "grad_norm": 1.4586632251739502, "learning_rate": 0.00011818922936862269, "loss": 0.4982, "step": 4977 }, { "epoch": 1.5983303901107722, "grad_norm": 0.8100280165672302, "learning_rate": 0.00011815490397172821, "loss": 0.6925, "step": 4978 }, { "epoch": 1.5986514689356237, "grad_norm": 0.8247403502464294, "learning_rate": 0.00011812057636271374, "loss": 0.6636, "step": 4979 }, { "epoch": 1.598972547760475, "grad_norm": 0.8497698903083801, "learning_rate": 0.00011808624654576202, "loss": 0.5672, "step": 4980 }, { "epoch": 1.5992936265853266, "grad_norm": 0.9448472261428833, "learning_rate": 0.00011805191452505602, "loss": 0.6503, "step": 4981 }, { "epoch": 1.5996147054101781, "grad_norm": 1.0496573448181152, "learning_rate": 0.00011801758030477897, "loss": 0.8055, "step": 4982 }, { "epoch": 1.5999357842350297, "grad_norm": 0.7179785966873169, "learning_rate": 0.00011798324388911444, "loss": 0.4881, "step": 4983 }, { "epoch": 1.6002568630598812, "grad_norm": 0.8595174551010132, "learning_rate": 0.00011794890528224618, "loss": 0.615, "step": 4984 }, { "epoch": 1.6005779418847328, "grad_norm": 0.8961060047149658, "learning_rate": 0.00011791456448835825, "loss": 0.6427, "step": 4985 }, { "epoch": 1.6008990207095843, "grad_norm": 0.963125467300415, "learning_rate": 0.00011788022151163495, "loss": 0.7784, "step": 4986 }, { "epoch": 1.6012200995344357, "grad_norm": 0.7629128694534302, "learning_rate": 0.00011784587635626094, "loss": 0.509, "step": 4987 }, { "epoch": 1.6015411783592872, "grad_norm": 1.0571444034576416, "learning_rate": 0.000117811529026421, "loss": 0.7576, "step": 4988 }, { "epoch": 1.6018622571841385, "grad_norm": 1.0005722045898438, "learning_rate": 0.00011777717952630031, "loss": 0.6151, "step": 4989 }, { "epoch": 1.60218333600899, "grad_norm": 0.7066619396209717, "learning_rate": 0.00011774282786008422, "loss": 0.5324, "step": 4990 }, { "epoch": 1.6025044148338417, "grad_norm": 0.6396430730819702, "learning_rate": 0.00011770847403195834, "loss": 0.5243, "step": 4991 }, { "epoch": 1.6028254936586932, "grad_norm": 1.4114651679992676, "learning_rate": 0.00011767411804610864, "loss": 0.4948, "step": 4992 }, { "epoch": 1.6031465724835448, "grad_norm": 0.9310147166252136, "learning_rate": 0.00011763975990672125, "loss": 0.7609, "step": 4993 }, { "epoch": 1.6034676513083963, "grad_norm": 0.8665596842765808, "learning_rate": 0.00011760539961798262, "loss": 0.6365, "step": 4994 }, { "epoch": 1.6037887301332479, "grad_norm": 0.831099808216095, "learning_rate": 0.00011757103718407947, "loss": 0.5575, "step": 4995 }, { "epoch": 1.6041098089580992, "grad_norm": 0.9257915616035461, "learning_rate": 0.00011753667260919872, "loss": 0.5272, "step": 4996 }, { "epoch": 1.6044308877829507, "grad_norm": 1.372697353363037, "learning_rate": 0.00011750230589752762, "loss": 0.5669, "step": 4997 }, { "epoch": 1.604751966607802, "grad_norm": 0.978588342666626, "learning_rate": 0.00011746793705325363, "loss": 0.532, "step": 4998 }, { "epoch": 1.6050730454326536, "grad_norm": 1.0235109329223633, "learning_rate": 0.00011743356608056449, "loss": 0.7292, "step": 4999 }, { "epoch": 1.6053941242575052, "grad_norm": 1.004314661026001, "learning_rate": 0.0001173991929836482, "loss": 0.5686, "step": 5000 }, { "epoch": 1.6057152030823567, "grad_norm": 1.8537969589233398, "learning_rate": 0.00011736481776669306, "loss": 0.4302, "step": 5001 }, { "epoch": 1.6060362819072083, "grad_norm": 0.9163577556610107, "learning_rate": 0.00011733044043388752, "loss": 0.5876, "step": 5002 }, { "epoch": 1.6063573607320598, "grad_norm": 1.2079983949661255, "learning_rate": 0.00011729606098942039, "loss": 0.6811, "step": 5003 }, { "epoch": 1.6066784395569114, "grad_norm": 1.0708050727844238, "learning_rate": 0.00011726167943748067, "loss": 0.5863, "step": 5004 }, { "epoch": 1.6069995183817627, "grad_norm": 0.7910842895507812, "learning_rate": 0.00011722729578225769, "loss": 0.4729, "step": 5005 }, { "epoch": 1.6073205972066142, "grad_norm": 0.8176678419113159, "learning_rate": 0.00011719291002794096, "loss": 0.5986, "step": 5006 }, { "epoch": 1.6076416760314656, "grad_norm": 1.354212760925293, "learning_rate": 0.0001171585221787203, "loss": 0.5428, "step": 5007 }, { "epoch": 1.6079627548563171, "grad_norm": 1.120445966720581, "learning_rate": 0.00011712413223878578, "loss": 0.5019, "step": 5008 }, { "epoch": 1.6082838336811687, "grad_norm": 0.7986057996749878, "learning_rate": 0.00011708974021232769, "loss": 0.4202, "step": 5009 }, { "epoch": 1.6086049125060202, "grad_norm": 0.9796577095985413, "learning_rate": 0.00011705534610353657, "loss": 0.5712, "step": 5010 }, { "epoch": 1.6089259913308718, "grad_norm": 1.225272297859192, "learning_rate": 0.00011702094991660326, "loss": 0.615, "step": 5011 }, { "epoch": 1.6092470701557233, "grad_norm": 0.8488008975982666, "learning_rate": 0.00011698655165571886, "loss": 0.4476, "step": 5012 }, { "epoch": 1.6095681489805749, "grad_norm": 0.8429365158081055, "learning_rate": 0.00011695215132507464, "loss": 0.5331, "step": 5013 }, { "epoch": 1.6098892278054262, "grad_norm": 0.8646475672721863, "learning_rate": 0.00011691774892886222, "loss": 0.3656, "step": 5014 }, { "epoch": 1.6102103066302778, "grad_norm": 0.603814423084259, "learning_rate": 0.00011688334447127338, "loss": 0.3372, "step": 5015 }, { "epoch": 1.610531385455129, "grad_norm": 0.5882185697555542, "learning_rate": 0.00011684893795650027, "loss": 0.9782, "step": 5016 }, { "epoch": 1.6108524642799806, "grad_norm": 0.5701581239700317, "learning_rate": 0.00011681452938873516, "loss": 0.3712, "step": 5017 }, { "epoch": 1.6111735431048322, "grad_norm": 0.9942889213562012, "learning_rate": 0.00011678011877217065, "loss": 0.4755, "step": 5018 }, { "epoch": 1.6114946219296837, "grad_norm": 0.8010011911392212, "learning_rate": 0.00011674570611099955, "loss": 0.3993, "step": 5019 }, { "epoch": 1.6118157007545353, "grad_norm": 0.7642001509666443, "learning_rate": 0.00011671129140941499, "loss": 0.4508, "step": 5020 }, { "epoch": 1.6121367795793868, "grad_norm": 0.7442882061004639, "learning_rate": 0.00011667687467161024, "loss": 0.5295, "step": 5021 }, { "epoch": 1.6124578584042384, "grad_norm": 0.8074431419372559, "learning_rate": 0.00011664245590177892, "loss": 0.6415, "step": 5022 }, { "epoch": 1.6127789372290897, "grad_norm": 0.8954799175262451, "learning_rate": 0.0001166080351041148, "loss": 0.6567, "step": 5023 }, { "epoch": 1.6131000160539413, "grad_norm": 0.8098952174186707, "learning_rate": 0.00011657361228281199, "loss": 0.6164, "step": 5024 }, { "epoch": 1.6134210948787926, "grad_norm": 1.059260368347168, "learning_rate": 0.00011653918744206478, "loss": 0.4956, "step": 5025 }, { "epoch": 1.6137421737036441, "grad_norm": 0.6929076910018921, "learning_rate": 0.00011650476058606777, "loss": 0.5855, "step": 5026 }, { "epoch": 1.6140632525284957, "grad_norm": 0.9725719094276428, "learning_rate": 0.00011647033171901573, "loss": 0.6258, "step": 5027 }, { "epoch": 1.6143843313533472, "grad_norm": 0.8998417258262634, "learning_rate": 0.00011643590084510379, "loss": 0.6263, "step": 5028 }, { "epoch": 1.6147054101781988, "grad_norm": 1.7190628051757812, "learning_rate": 0.00011640146796852711, "loss": 0.6147, "step": 5029 }, { "epoch": 1.6150264890030503, "grad_norm": 0.8948490023612976, "learning_rate": 0.00011636703309348133, "loss": 0.6867, "step": 5030 }, { "epoch": 1.615347567827902, "grad_norm": 0.9756568670272827, "learning_rate": 0.00011633259622416224, "loss": 0.7751, "step": 5031 }, { "epoch": 1.6156686466527532, "grad_norm": 0.9897345304489136, "learning_rate": 0.00011629815736476581, "loss": 0.6944, "step": 5032 }, { "epoch": 1.6159897254776048, "grad_norm": 1.4569859504699707, "learning_rate": 0.00011626371651948838, "loss": 0.5682, "step": 5033 }, { "epoch": 1.616310804302456, "grad_norm": 1.191692590713501, "learning_rate": 0.00011622927369252638, "loss": 0.7242, "step": 5034 }, { "epoch": 1.6166318831273077, "grad_norm": 0.7807984948158264, "learning_rate": 0.00011619482888807662, "loss": 0.5721, "step": 5035 }, { "epoch": 1.6169529619521592, "grad_norm": 0.7991989254951477, "learning_rate": 0.00011616038211033613, "loss": 0.5288, "step": 5036 }, { "epoch": 1.6172740407770108, "grad_norm": 0.8879060745239258, "learning_rate": 0.00011612593336350208, "loss": 0.4782, "step": 5037 }, { "epoch": 1.6175951196018623, "grad_norm": 0.8001835942268372, "learning_rate": 0.00011609148265177193, "loss": 0.504, "step": 5038 }, { "epoch": 1.6179161984267139, "grad_norm": 0.9068782925605774, "learning_rate": 0.00011605702997934345, "loss": 0.5692, "step": 5039 }, { "epoch": 1.6182372772515654, "grad_norm": 0.9409818649291992, "learning_rate": 0.00011602257535041459, "loss": 0.5669, "step": 5040 }, { "epoch": 1.6185583560764167, "grad_norm": 0.9030987620353699, "learning_rate": 0.0001159881187691835, "loss": 0.4359, "step": 5041 }, { "epoch": 1.6188794349012683, "grad_norm": 1.0598105192184448, "learning_rate": 0.00011595366023984864, "loss": 0.693, "step": 5042 }, { "epoch": 1.6192005137261196, "grad_norm": 1.0515077114105225, "learning_rate": 0.00011591919976660868, "loss": 0.6296, "step": 5043 }, { "epoch": 1.6195215925509712, "grad_norm": 0.8490827083587646, "learning_rate": 0.00011588473735366249, "loss": 0.5571, "step": 5044 }, { "epoch": 1.6198426713758227, "grad_norm": 0.8692120909690857, "learning_rate": 0.0001158502730052093, "loss": 0.5099, "step": 5045 }, { "epoch": 1.6201637502006743, "grad_norm": 1.1289715766906738, "learning_rate": 0.00011581580672544838, "loss": 0.7012, "step": 5046 }, { "epoch": 1.6204848290255258, "grad_norm": 1.281795859336853, "learning_rate": 0.0001157813385185794, "loss": 0.7737, "step": 5047 }, { "epoch": 1.6208059078503774, "grad_norm": 1.1194076538085938, "learning_rate": 0.00011574686838880215, "loss": 0.633, "step": 5048 }, { "epoch": 1.621126986675229, "grad_norm": 0.8962092399597168, "learning_rate": 0.00011571239634031679, "loss": 0.6357, "step": 5049 }, { "epoch": 1.6214480655000802, "grad_norm": 0.9650201797485352, "learning_rate": 0.00011567792237732358, "loss": 0.5478, "step": 5050 }, { "epoch": 1.6217691443249318, "grad_norm": 0.8813601732254028, "learning_rate": 0.0001156434465040231, "loss": 0.4708, "step": 5051 }, { "epoch": 1.6220902231497831, "grad_norm": 0.8140305876731873, "learning_rate": 0.0001156089687246161, "loss": 0.485, "step": 5052 }, { "epoch": 1.6224113019746347, "grad_norm": 1.1410284042358398, "learning_rate": 0.00011557448904330362, "loss": 0.6938, "step": 5053 }, { "epoch": 1.6227323807994862, "grad_norm": 1.0146127939224243, "learning_rate": 0.0001155400074642869, "loss": 0.6013, "step": 5054 }, { "epoch": 1.6230534596243378, "grad_norm": 1.0160109996795654, "learning_rate": 0.00011550552399176739, "loss": 0.4768, "step": 5055 }, { "epoch": 1.6233745384491893, "grad_norm": 1.1787000894546509, "learning_rate": 0.00011547103862994684, "loss": 0.5916, "step": 5056 }, { "epoch": 1.6236956172740409, "grad_norm": 0.8274866342544556, "learning_rate": 0.00011543655138302714, "loss": 0.5369, "step": 5057 }, { "epoch": 1.6240166960988924, "grad_norm": 0.7210972905158997, "learning_rate": 0.00011540206225521046, "loss": 0.3732, "step": 5058 }, { "epoch": 1.6243377749237438, "grad_norm": 1.1187121868133545, "learning_rate": 0.00011536757125069923, "loss": 0.658, "step": 5059 }, { "epoch": 1.6246588537485953, "grad_norm": 0.6988892555236816, "learning_rate": 0.00011533307837369607, "loss": 0.3963, "step": 5060 }, { "epoch": 1.6249799325734466, "grad_norm": 0.8323413133621216, "learning_rate": 0.00011529858362840382, "loss": 0.3751, "step": 5061 }, { "epoch": 1.6253010113982982, "grad_norm": 1.572111964225769, "learning_rate": 0.00011526408701902556, "loss": 0.5879, "step": 5062 }, { "epoch": 1.6256220902231497, "grad_norm": 0.6193574666976929, "learning_rate": 0.00011522958854976458, "loss": 0.3986, "step": 5063 }, { "epoch": 1.6259431690480013, "grad_norm": 0.6282438635826111, "learning_rate": 0.00011519508822482446, "loss": 0.389, "step": 5064 }, { "epoch": 1.6262642478728528, "grad_norm": 0.6472731828689575, "learning_rate": 0.00011516058604840891, "loss": 0.7557, "step": 5065 }, { "epoch": 1.6265853266977044, "grad_norm": 0.5297544598579407, "learning_rate": 0.00011512608202472194, "loss": 0.7176, "step": 5066 }, { "epoch": 1.626906405522556, "grad_norm": 0.6865007281303406, "learning_rate": 0.00011509157615796776, "loss": 0.3564, "step": 5067 }, { "epoch": 1.6272274843474073, "grad_norm": 0.6907351613044739, "learning_rate": 0.00011505706845235078, "loss": 0.2377, "step": 5068 }, { "epoch": 1.6275485631722588, "grad_norm": 0.8242547512054443, "learning_rate": 0.00011502255891207572, "loss": 0.2439, "step": 5069 }, { "epoch": 1.6278696419971102, "grad_norm": 0.5880115032196045, "learning_rate": 0.0001149880475413474, "loss": 0.2065, "step": 5070 }, { "epoch": 1.6281907208219617, "grad_norm": 0.6865981817245483, "learning_rate": 0.00011495353434437098, "loss": 0.1763, "step": 5071 }, { "epoch": 1.6285117996468133, "grad_norm": 0.7891075015068054, "learning_rate": 0.00011491901932535172, "loss": 0.5724, "step": 5072 }, { "epoch": 1.6288328784716648, "grad_norm": 1.0175186395645142, "learning_rate": 0.00011488450248849522, "loss": 0.884, "step": 5073 }, { "epoch": 1.6291539572965164, "grad_norm": 0.808527946472168, "learning_rate": 0.00011484998383800726, "loss": 0.5626, "step": 5074 }, { "epoch": 1.629475036121368, "grad_norm": 0.8275448679924011, "learning_rate": 0.00011481546337809381, "loss": 0.5928, "step": 5075 }, { "epoch": 1.6297961149462195, "grad_norm": 0.7020288109779358, "learning_rate": 0.00011478094111296109, "loss": 0.4619, "step": 5076 }, { "epoch": 1.6301171937710708, "grad_norm": 0.7639602422714233, "learning_rate": 0.0001147464170468155, "loss": 0.5046, "step": 5077 }, { "epoch": 1.6304382725959223, "grad_norm": 0.7162854671478271, "learning_rate": 0.00011471189118386375, "loss": 0.5293, "step": 5078 }, { "epoch": 1.6307593514207737, "grad_norm": 0.826367199420929, "learning_rate": 0.00011467736352831266, "loss": 0.4409, "step": 5079 }, { "epoch": 1.6310804302456252, "grad_norm": 0.9901473522186279, "learning_rate": 0.00011464283408436938, "loss": 0.7239, "step": 5080 }, { "epoch": 1.6314015090704768, "grad_norm": 1.07185697555542, "learning_rate": 0.00011460830285624118, "loss": 0.6114, "step": 5081 }, { "epoch": 1.6317225878953283, "grad_norm": 1.0970301628112793, "learning_rate": 0.00011457376984813557, "loss": 0.6966, "step": 5082 }, { "epoch": 1.6320436667201799, "grad_norm": 0.672584056854248, "learning_rate": 0.00011453923506426032, "loss": 0.4641, "step": 5083 }, { "epoch": 1.6323647455450314, "grad_norm": 0.9183497428894043, "learning_rate": 0.00011450469850882337, "loss": 0.7209, "step": 5084 }, { "epoch": 1.632685824369883, "grad_norm": 1.120105266571045, "learning_rate": 0.00011447016018603292, "loss": 0.7857, "step": 5085 }, { "epoch": 1.6330069031947343, "grad_norm": 1.3121768236160278, "learning_rate": 0.00011443562010009731, "loss": 0.6645, "step": 5086 }, { "epoch": 1.6333279820195858, "grad_norm": 0.8911232352256775, "learning_rate": 0.00011440107825522521, "loss": 0.5584, "step": 5087 }, { "epoch": 1.6336490608444372, "grad_norm": 0.9025933146476746, "learning_rate": 0.00011436653465562542, "loss": 0.5403, "step": 5088 }, { "epoch": 1.6339701396692887, "grad_norm": 0.9944098591804504, "learning_rate": 0.00011433198930550695, "loss": 0.5797, "step": 5089 }, { "epoch": 1.6342912184941403, "grad_norm": 0.8134103417396545, "learning_rate": 0.00011429744220907903, "loss": 0.6454, "step": 5090 }, { "epoch": 1.6346122973189918, "grad_norm": 0.9299569129943848, "learning_rate": 0.00011426289337055119, "loss": 0.6705, "step": 5091 }, { "epoch": 1.6349333761438434, "grad_norm": 1.1119047403335571, "learning_rate": 0.00011422834279413301, "loss": 0.5778, "step": 5092 }, { "epoch": 1.635254454968695, "grad_norm": 0.860736072063446, "learning_rate": 0.00011419379048403444, "loss": 0.687, "step": 5093 }, { "epoch": 1.6355755337935465, "grad_norm": 1.328277349472046, "learning_rate": 0.00011415923644446557, "loss": 0.8227, "step": 5094 }, { "epoch": 1.6358966126183978, "grad_norm": 1.0155832767486572, "learning_rate": 0.00011412468067963669, "loss": 0.61, "step": 5095 }, { "epoch": 1.6362176914432494, "grad_norm": 0.8249773383140564, "learning_rate": 0.00011409012319375827, "loss": 0.454, "step": 5096 }, { "epoch": 1.6365387702681007, "grad_norm": 0.8771111369132996, "learning_rate": 0.00011405556399104109, "loss": 0.6191, "step": 5097 }, { "epoch": 1.6368598490929522, "grad_norm": 1.0984089374542236, "learning_rate": 0.00011402100307569612, "loss": 0.8062, "step": 5098 }, { "epoch": 1.6371809279178038, "grad_norm": 0.7686770558357239, "learning_rate": 0.00011398644045193444, "loss": 0.4661, "step": 5099 }, { "epoch": 1.6375020067426553, "grad_norm": 0.8252708911895752, "learning_rate": 0.00011395187612396738, "loss": 0.5061, "step": 5100 }, { "epoch": 1.6378230855675069, "grad_norm": 1.2015011310577393, "learning_rate": 0.00011391731009600654, "loss": 0.6285, "step": 5101 }, { "epoch": 1.6381441643923584, "grad_norm": 1.1227675676345825, "learning_rate": 0.00011388274237226371, "loss": 0.8034, "step": 5102 }, { "epoch": 1.63846524321721, "grad_norm": 0.8477095365524292, "learning_rate": 0.00011384817295695083, "loss": 0.5733, "step": 5103 }, { "epoch": 1.6387863220420613, "grad_norm": 0.7261363863945007, "learning_rate": 0.00011381360185428007, "loss": 0.3891, "step": 5104 }, { "epoch": 1.6391074008669129, "grad_norm": 0.9272900223731995, "learning_rate": 0.0001137790290684638, "loss": 0.3385, "step": 5105 }, { "epoch": 1.6394284796917642, "grad_norm": 1.001617431640625, "learning_rate": 0.00011374445460371466, "loss": 0.5374, "step": 5106 }, { "epoch": 1.6397495585166157, "grad_norm": 0.8543923497200012, "learning_rate": 0.00011370987846424546, "loss": 0.5697, "step": 5107 }, { "epoch": 1.6400706373414673, "grad_norm": 0.9451769590377808, "learning_rate": 0.0001136753006542691, "loss": 0.4807, "step": 5108 }, { "epoch": 1.6403917161663188, "grad_norm": 0.6929908394813538, "learning_rate": 0.00011364072117799885, "loss": 0.4472, "step": 5109 }, { "epoch": 1.6407127949911704, "grad_norm": 1.1182005405426025, "learning_rate": 0.00011360614003964809, "loss": 0.5891, "step": 5110 }, { "epoch": 1.641033873816022, "grad_norm": 1.0447207689285278, "learning_rate": 0.00011357155724343045, "loss": 0.5113, "step": 5111 }, { "epoch": 1.6413549526408735, "grad_norm": 0.7325683832168579, "learning_rate": 0.00011353697279355973, "loss": 0.3998, "step": 5112 }, { "epoch": 1.6416760314657248, "grad_norm": 0.6994525790214539, "learning_rate": 0.00011350238669424993, "loss": 0.441, "step": 5113 }, { "epoch": 1.6419971102905764, "grad_norm": 1.6808966398239136, "learning_rate": 0.00011346779894971527, "loss": 0.5446, "step": 5114 }, { "epoch": 1.6423181891154277, "grad_norm": 0.5908676981925964, "learning_rate": 0.00011343320956417014, "loss": 0.8066, "step": 5115 }, { "epoch": 1.6426392679402793, "grad_norm": 0.6339381337165833, "learning_rate": 0.00011339861854182922, "loss": 0.7703, "step": 5116 }, { "epoch": 1.6429603467651308, "grad_norm": 0.7568504810333252, "learning_rate": 0.00011336402588690726, "loss": 0.5826, "step": 5117 }, { "epoch": 1.6432814255899824, "grad_norm": 0.6824870705604553, "learning_rate": 0.00011332943160361926, "loss": 0.2832, "step": 5118 }, { "epoch": 1.643602504414834, "grad_norm": 0.7612968683242798, "learning_rate": 0.00011329483569618045, "loss": 0.291, "step": 5119 }, { "epoch": 1.6439235832396855, "grad_norm": 0.7312465906143188, "learning_rate": 0.00011326023816880625, "loss": 0.2184, "step": 5120 }, { "epoch": 1.6442446620645368, "grad_norm": 0.7438070178031921, "learning_rate": 0.00011322563902571226, "loss": 0.3823, "step": 5121 }, { "epoch": 1.6445657408893883, "grad_norm": 1.085114598274231, "learning_rate": 0.00011319103827111426, "loss": 0.6073, "step": 5122 }, { "epoch": 1.64488681971424, "grad_norm": 0.8549251556396484, "learning_rate": 0.00011315643590922827, "loss": 0.6365, "step": 5123 }, { "epoch": 1.6452078985390912, "grad_norm": 0.8248443603515625, "learning_rate": 0.00011312183194427046, "loss": 0.5795, "step": 5124 }, { "epoch": 1.6455289773639428, "grad_norm": 0.9894840121269226, "learning_rate": 0.00011308722638045724, "loss": 0.5455, "step": 5125 }, { "epoch": 1.6458500561887943, "grad_norm": 0.7669897079467773, "learning_rate": 0.00011305261922200519, "loss": 0.537, "step": 5126 }, { "epoch": 1.6461711350136459, "grad_norm": 0.9028288722038269, "learning_rate": 0.00011301801047313105, "loss": 0.6561, "step": 5127 }, { "epoch": 1.6464922138384974, "grad_norm": 0.8793859481811523, "learning_rate": 0.00011298340013805184, "loss": 0.6243, "step": 5128 }, { "epoch": 1.646813292663349, "grad_norm": 0.7349303364753723, "learning_rate": 0.00011294878822098469, "loss": 0.5293, "step": 5129 }, { "epoch": 1.6471343714882003, "grad_norm": 0.8124568462371826, "learning_rate": 0.000112914174726147, "loss": 0.5847, "step": 5130 }, { "epoch": 1.6474554503130519, "grad_norm": 1.0718876123428345, "learning_rate": 0.0001128795596577563, "loss": 0.6405, "step": 5131 }, { "epoch": 1.6477765291379034, "grad_norm": 0.7873112559318542, "learning_rate": 0.0001128449430200303, "loss": 0.5677, "step": 5132 }, { "epoch": 1.6480976079627547, "grad_norm": 0.8473823666572571, "learning_rate": 0.00011281032481718697, "loss": 0.6249, "step": 5133 }, { "epoch": 1.6484186867876063, "grad_norm": 0.8866380453109741, "learning_rate": 0.0001127757050534444, "loss": 0.7474, "step": 5134 }, { "epoch": 1.6487397656124578, "grad_norm": 0.9700244069099426, "learning_rate": 0.00011274108373302095, "loss": 0.7294, "step": 5135 }, { "epoch": 1.6490608444373094, "grad_norm": 0.9701023697853088, "learning_rate": 0.00011270646086013505, "loss": 0.8279, "step": 5136 }, { "epoch": 1.649381923262161, "grad_norm": 0.9571521282196045, "learning_rate": 0.00011267183643900548, "loss": 0.4458, "step": 5137 }, { "epoch": 1.6497030020870125, "grad_norm": 0.8921281099319458, "learning_rate": 0.00011263721047385105, "loss": 0.588, "step": 5138 }, { "epoch": 1.6500240809118638, "grad_norm": 1.0599466562271118, "learning_rate": 0.00011260258296889086, "loss": 0.7079, "step": 5139 }, { "epoch": 1.6503451597367154, "grad_norm": 0.9576237201690674, "learning_rate": 0.00011256795392834419, "loss": 0.6869, "step": 5140 }, { "epoch": 1.650666238561567, "grad_norm": 1.035122036933899, "learning_rate": 0.00011253332335643043, "loss": 0.6376, "step": 5141 }, { "epoch": 1.6509873173864182, "grad_norm": 1.0403311252593994, "learning_rate": 0.00011249869125736925, "loss": 0.5694, "step": 5142 }, { "epoch": 1.6513083962112698, "grad_norm": 0.9405300617218018, "learning_rate": 0.00011246405763538046, "loss": 0.5892, "step": 5143 }, { "epoch": 1.6516294750361213, "grad_norm": 1.0725064277648926, "learning_rate": 0.00011242942249468402, "loss": 0.7886, "step": 5144 }, { "epoch": 1.651950553860973, "grad_norm": 1.1589728593826294, "learning_rate": 0.00011239478583950018, "loss": 0.6928, "step": 5145 }, { "epoch": 1.6522716326858244, "grad_norm": 0.9433485865592957, "learning_rate": 0.00011236014767404927, "loss": 0.673, "step": 5146 }, { "epoch": 1.652592711510676, "grad_norm": 0.7959662079811096, "learning_rate": 0.00011232550800255188, "loss": 0.555, "step": 5147 }, { "epoch": 1.6529137903355273, "grad_norm": 0.8811464905738831, "learning_rate": 0.00011229086682922869, "loss": 0.5339, "step": 5148 }, { "epoch": 1.6532348691603789, "grad_norm": 1.0386700630187988, "learning_rate": 0.00011225622415830068, "loss": 0.5246, "step": 5149 }, { "epoch": 1.6535559479852304, "grad_norm": 0.907658040523529, "learning_rate": 0.00011222157999398895, "loss": 0.6589, "step": 5150 }, { "epoch": 1.6538770268100818, "grad_norm": 0.6662940979003906, "learning_rate": 0.00011218693434051475, "loss": 0.5202, "step": 5151 }, { "epoch": 1.6541981056349333, "grad_norm": 0.8499528765678406, "learning_rate": 0.00011215228720209958, "loss": 0.5316, "step": 5152 }, { "epoch": 1.6545191844597849, "grad_norm": 0.8833222389221191, "learning_rate": 0.00011211763858296507, "loss": 0.534, "step": 5153 }, { "epoch": 1.6548402632846364, "grad_norm": 0.8427216410636902, "learning_rate": 0.00011208298848733305, "loss": 0.4688, "step": 5154 }, { "epoch": 1.655161342109488, "grad_norm": 0.808330774307251, "learning_rate": 0.00011204833691942553, "loss": 0.5204, "step": 5155 }, { "epoch": 1.6554824209343395, "grad_norm": 0.6491779685020447, "learning_rate": 0.00011201368388346471, "loss": 0.383, "step": 5156 }, { "epoch": 1.6558034997591908, "grad_norm": 1.3244563341140747, "learning_rate": 0.00011197902938367298, "loss": 0.5214, "step": 5157 }, { "epoch": 1.6561245785840424, "grad_norm": 1.1561415195465088, "learning_rate": 0.0001119443734242728, "loss": 0.5834, "step": 5158 }, { "epoch": 1.656445657408894, "grad_norm": 1.0299034118652344, "learning_rate": 0.00011190971600948699, "loss": 0.5443, "step": 5159 }, { "epoch": 1.6567667362337453, "grad_norm": 0.8660802841186523, "learning_rate": 0.0001118750571435384, "loss": 0.4689, "step": 5160 }, { "epoch": 1.6570878150585968, "grad_norm": 0.6859344840049744, "learning_rate": 0.00011184039683065013, "loss": 0.4929, "step": 5161 }, { "epoch": 1.6574088938834484, "grad_norm": 1.2435836791992188, "learning_rate": 0.00011180573507504537, "loss": 0.5144, "step": 5162 }, { "epoch": 1.6577299727083, "grad_norm": 0.9810152649879456, "learning_rate": 0.00011177107188094764, "loss": 0.4428, "step": 5163 }, { "epoch": 1.6580510515331515, "grad_norm": 2.1121833324432373, "learning_rate": 0.00011173640725258052, "loss": 0.5309, "step": 5164 }, { "epoch": 1.658372130358003, "grad_norm": 0.8293464183807373, "learning_rate": 0.00011170174119416776, "loss": 0.7781, "step": 5165 }, { "epoch": 1.6586932091828543, "grad_norm": 0.5578963160514832, "learning_rate": 0.00011166707370993333, "loss": 0.5859, "step": 5166 }, { "epoch": 1.659014288007706, "grad_norm": 0.5009366869926453, "learning_rate": 0.00011163240480410135, "loss": 0.6605, "step": 5167 }, { "epoch": 1.6593353668325574, "grad_norm": 0.7301324605941772, "learning_rate": 0.00011159773448089614, "loss": 0.4758, "step": 5168 }, { "epoch": 1.6596564456574088, "grad_norm": 0.6743203401565552, "learning_rate": 0.00011156306274454218, "loss": 0.3494, "step": 5169 }, { "epoch": 1.6599775244822603, "grad_norm": 0.7202017903327942, "learning_rate": 0.00011152838959926408, "loss": 0.3711, "step": 5170 }, { "epoch": 1.6602986033071119, "grad_norm": 0.6269795298576355, "learning_rate": 0.00011149371504928668, "loss": 0.2464, "step": 5171 }, { "epoch": 1.6606196821319634, "grad_norm": 0.5528831481933594, "learning_rate": 0.00011145903909883495, "loss": 0.2153, "step": 5172 }, { "epoch": 1.660940760956815, "grad_norm": 0.695706307888031, "learning_rate": 0.00011142436175213409, "loss": 0.5348, "step": 5173 }, { "epoch": 1.6612618397816665, "grad_norm": 0.8045232892036438, "learning_rate": 0.0001113896830134094, "loss": 0.6501, "step": 5174 }, { "epoch": 1.6615829186065179, "grad_norm": 1.1666730642318726, "learning_rate": 0.00011135500288688636, "loss": 0.6264, "step": 5175 }, { "epoch": 1.6619039974313694, "grad_norm": 0.9477275013923645, "learning_rate": 0.0001113203213767907, "loss": 0.5997, "step": 5176 }, { "epoch": 1.662225076256221, "grad_norm": 0.7591928839683533, "learning_rate": 0.00011128563848734816, "loss": 0.4988, "step": 5177 }, { "epoch": 1.6625461550810723, "grad_norm": 1.0086168050765991, "learning_rate": 0.00011125095422278486, "loss": 0.5753, "step": 5178 }, { "epoch": 1.6628672339059238, "grad_norm": 0.78431636095047, "learning_rate": 0.0001112162685873269, "loss": 0.6154, "step": 5179 }, { "epoch": 1.6631883127307754, "grad_norm": 0.7329674959182739, "learning_rate": 0.00011118158158520064, "loss": 0.4773, "step": 5180 }, { "epoch": 1.663509391555627, "grad_norm": 0.6751540303230286, "learning_rate": 0.00011114689322063255, "loss": 0.4195, "step": 5181 }, { "epoch": 1.6638304703804785, "grad_norm": 0.8054059147834778, "learning_rate": 0.00011111220349784937, "loss": 0.5682, "step": 5182 }, { "epoch": 1.66415154920533, "grad_norm": 1.0262163877487183, "learning_rate": 0.00011107751242107787, "loss": 0.8966, "step": 5183 }, { "epoch": 1.6644726280301814, "grad_norm": 1.2697820663452148, "learning_rate": 0.00011104281999454511, "loss": 0.6868, "step": 5184 }, { "epoch": 1.664793706855033, "grad_norm": 1.0350483655929565, "learning_rate": 0.00011100812622247822, "loss": 0.5431, "step": 5185 }, { "epoch": 1.6651147856798842, "grad_norm": 0.7967930436134338, "learning_rate": 0.00011097343110910452, "loss": 0.3997, "step": 5186 }, { "epoch": 1.6654358645047358, "grad_norm": 1.0638127326965332, "learning_rate": 0.00011093873465865157, "loss": 0.8347, "step": 5187 }, { "epoch": 1.6657569433295873, "grad_norm": 0.8634060621261597, "learning_rate": 0.00011090403687534697, "loss": 0.5585, "step": 5188 }, { "epoch": 1.666078022154439, "grad_norm": 1.0660980939865112, "learning_rate": 0.00011086933776341852, "loss": 0.722, "step": 5189 }, { "epoch": 1.6663991009792904, "grad_norm": 1.0140385627746582, "learning_rate": 0.00011083463732709425, "loss": 0.5254, "step": 5190 }, { "epoch": 1.666720179804142, "grad_norm": 1.2487151622772217, "learning_rate": 0.0001107999355706023, "loss": 0.6787, "step": 5191 }, { "epoch": 1.6670412586289935, "grad_norm": 1.3040145635604858, "learning_rate": 0.00011076523249817094, "loss": 0.8144, "step": 5192 }, { "epoch": 1.6673623374538449, "grad_norm": 0.9779611229896545, "learning_rate": 0.00011073052811402867, "loss": 0.6663, "step": 5193 }, { "epoch": 1.6676834162786964, "grad_norm": 0.7410480976104736, "learning_rate": 0.0001106958224224041, "loss": 0.49, "step": 5194 }, { "epoch": 1.6680044951035478, "grad_norm": 1.0676357746124268, "learning_rate": 0.000110661115427526, "loss": 0.6552, "step": 5195 }, { "epoch": 1.6683255739283993, "grad_norm": 0.8795901536941528, "learning_rate": 0.00011062640713362333, "loss": 0.6453, "step": 5196 }, { "epoch": 1.6686466527532509, "grad_norm": 0.8016363382339478, "learning_rate": 0.0001105916975449252, "loss": 0.5491, "step": 5197 }, { "epoch": 1.6689677315781024, "grad_norm": 0.7814456820487976, "learning_rate": 0.00011055698666566084, "loss": 0.5246, "step": 5198 }, { "epoch": 1.669288810402954, "grad_norm": 1.1860097646713257, "learning_rate": 0.00011052227450005967, "loss": 0.5291, "step": 5199 }, { "epoch": 1.6696098892278055, "grad_norm": 0.7847801446914673, "learning_rate": 0.00011048756105235125, "loss": 0.5715, "step": 5200 }, { "epoch": 1.669930968052657, "grad_norm": 0.6763100028038025, "learning_rate": 0.00011045284632676536, "loss": 0.4152, "step": 5201 }, { "epoch": 1.6702520468775084, "grad_norm": 1.2399777173995972, "learning_rate": 0.00011041813032753183, "loss": 0.504, "step": 5202 }, { "epoch": 1.67057312570236, "grad_norm": 0.9793505668640137, "learning_rate": 0.00011038341305888074, "loss": 0.7147, "step": 5203 }, { "epoch": 1.6708942045272113, "grad_norm": 0.6837410926818848, "learning_rate": 0.00011034869452504226, "loss": 0.381, "step": 5204 }, { "epoch": 1.6712152833520628, "grad_norm": 1.278186321258545, "learning_rate": 0.00011031397473024674, "loss": 0.5128, "step": 5205 }, { "epoch": 1.6715363621769144, "grad_norm": 0.8159211277961731, "learning_rate": 0.00011027925367872469, "loss": 0.5699, "step": 5206 }, { "epoch": 1.671857441001766, "grad_norm": 0.7557732462882996, "learning_rate": 0.00011024453137470677, "loss": 0.5079, "step": 5207 }, { "epoch": 1.6721785198266175, "grad_norm": 0.9018636345863342, "learning_rate": 0.00011020980782242376, "loss": 0.533, "step": 5208 }, { "epoch": 1.672499598651469, "grad_norm": 0.8341492414474487, "learning_rate": 0.00011017508302610664, "loss": 0.5423, "step": 5209 }, { "epoch": 1.6728206774763206, "grad_norm": 1.1660737991333008, "learning_rate": 0.00011014035698998651, "loss": 0.4989, "step": 5210 }, { "epoch": 1.673141756301172, "grad_norm": 0.9293453097343445, "learning_rate": 0.00011010562971829463, "loss": 0.4339, "step": 5211 }, { "epoch": 1.6734628351260235, "grad_norm": 0.6236379146575928, "learning_rate": 0.00011007090121526245, "loss": 0.4249, "step": 5212 }, { "epoch": 1.6737839139508748, "grad_norm": 0.8735912442207336, "learning_rate": 0.00011003617148512149, "loss": 0.5046, "step": 5213 }, { "epoch": 1.6741049927757263, "grad_norm": 1.3508676290512085, "learning_rate": 0.00011000144053210348, "loss": 0.3542, "step": 5214 }, { "epoch": 1.6744260716005779, "grad_norm": 1.2363876104354858, "learning_rate": 0.0001099667083604403, "loss": 0.4835, "step": 5215 }, { "epoch": 1.6747471504254294, "grad_norm": 0.650037944316864, "learning_rate": 0.00010993197497436391, "loss": 0.7668, "step": 5216 }, { "epoch": 1.675068229250281, "grad_norm": 0.6067812442779541, "learning_rate": 0.00010989724037810652, "loss": 0.5965, "step": 5217 }, { "epoch": 1.6753893080751325, "grad_norm": 0.8295800685882568, "learning_rate": 0.00010986250457590039, "loss": 0.4706, "step": 5218 }, { "epoch": 1.675710386899984, "grad_norm": 0.9519297480583191, "learning_rate": 0.00010982776757197799, "loss": 0.3641, "step": 5219 }, { "epoch": 1.6760314657248354, "grad_norm": 0.8043132424354553, "learning_rate": 0.00010979302937057192, "loss": 0.3151, "step": 5220 }, { "epoch": 1.676352544549687, "grad_norm": 0.8412678837776184, "learning_rate": 0.00010975828997591495, "loss": 0.4017, "step": 5221 }, { "epoch": 1.6766736233745383, "grad_norm": 0.5943979620933533, "learning_rate": 0.00010972354939223996, "loss": 0.1791, "step": 5222 }, { "epoch": 1.6769947021993898, "grad_norm": 0.7364015579223633, "learning_rate": 0.00010968880762377993, "loss": 0.2705, "step": 5223 }, { "epoch": 1.6773157810242414, "grad_norm": 0.9227198362350464, "learning_rate": 0.00010965406467476808, "loss": 0.6049, "step": 5224 }, { "epoch": 1.677636859849093, "grad_norm": 0.7940647602081299, "learning_rate": 0.00010961932054943778, "loss": 0.5055, "step": 5225 }, { "epoch": 1.6779579386739445, "grad_norm": 0.8924132585525513, "learning_rate": 0.00010958457525202241, "loss": 0.6137, "step": 5226 }, { "epoch": 1.678279017498796, "grad_norm": 0.7673733830451965, "learning_rate": 0.00010954982878675563, "loss": 0.5312, "step": 5227 }, { "epoch": 1.6786000963236476, "grad_norm": 0.7759941816329956, "learning_rate": 0.00010951508115787119, "loss": 0.4362, "step": 5228 }, { "epoch": 1.678921175148499, "grad_norm": 0.9239017963409424, "learning_rate": 0.00010948033236960294, "loss": 0.6432, "step": 5229 }, { "epoch": 1.6792422539733505, "grad_norm": 0.9103022813796997, "learning_rate": 0.00010944558242618496, "loss": 0.6079, "step": 5230 }, { "epoch": 1.6795633327982018, "grad_norm": 1.2982368469238281, "learning_rate": 0.00010941083133185146, "loss": 0.7558, "step": 5231 }, { "epoch": 1.6798844116230534, "grad_norm": 0.9765358567237854, "learning_rate": 0.00010937607909083667, "loss": 0.5457, "step": 5232 }, { "epoch": 1.680205490447905, "grad_norm": 0.7929934859275818, "learning_rate": 0.00010934132570737507, "loss": 0.5489, "step": 5233 }, { "epoch": 1.6805265692727565, "grad_norm": 0.8707826733589172, "learning_rate": 0.00010930657118570126, "loss": 0.5285, "step": 5234 }, { "epoch": 1.680847648097608, "grad_norm": 0.8279105424880981, "learning_rate": 0.00010927181553005002, "loss": 0.6373, "step": 5235 }, { "epoch": 1.6811687269224596, "grad_norm": 1.1090543270111084, "learning_rate": 0.00010923705874465618, "loss": 0.5366, "step": 5236 }, { "epoch": 1.681489805747311, "grad_norm": 0.6475349068641663, "learning_rate": 0.00010920230083375473, "loss": 0.4386, "step": 5237 }, { "epoch": 1.6818108845721624, "grad_norm": 1.031524419784546, "learning_rate": 0.00010916754180158082, "loss": 0.6921, "step": 5238 }, { "epoch": 1.682131963397014, "grad_norm": 1.1546660661697388, "learning_rate": 0.00010913278165236978, "loss": 0.6612, "step": 5239 }, { "epoch": 1.6824530422218653, "grad_norm": 1.4143015146255493, "learning_rate": 0.00010909802039035701, "loss": 0.7917, "step": 5240 }, { "epoch": 1.6827741210467169, "grad_norm": 1.350466012954712, "learning_rate": 0.00010906325801977804, "loss": 0.7994, "step": 5241 }, { "epoch": 1.6830951998715684, "grad_norm": 0.7425410747528076, "learning_rate": 0.00010902849454486856, "loss": 0.5108, "step": 5242 }, { "epoch": 1.68341627869642, "grad_norm": 1.0292056798934937, "learning_rate": 0.00010899372996986439, "loss": 0.5466, "step": 5243 }, { "epoch": 1.6837373575212715, "grad_norm": 0.9850762486457825, "learning_rate": 0.00010895896429900154, "loss": 0.5561, "step": 5244 }, { "epoch": 1.684058436346123, "grad_norm": 1.2769955396652222, "learning_rate": 0.00010892419753651606, "loss": 0.614, "step": 5245 }, { "epoch": 1.6843795151709746, "grad_norm": 0.877348005771637, "learning_rate": 0.00010888942968664417, "loss": 0.5857, "step": 5246 }, { "epoch": 1.684700593995826, "grad_norm": 1.039420485496521, "learning_rate": 0.00010885466075362223, "loss": 0.5347, "step": 5247 }, { "epoch": 1.6850216728206775, "grad_norm": 1.2962031364440918, "learning_rate": 0.00010881989074168673, "loss": 0.86, "step": 5248 }, { "epoch": 1.6853427516455288, "grad_norm": 1.009063720703125, "learning_rate": 0.00010878511965507434, "loss": 0.6275, "step": 5249 }, { "epoch": 1.6856638304703804, "grad_norm": 0.9977805614471436, "learning_rate": 0.00010875034749802173, "loss": 0.6225, "step": 5250 }, { "epoch": 1.685984909295232, "grad_norm": 0.8478687405586243, "learning_rate": 0.00010871557427476583, "loss": 0.5791, "step": 5251 }, { "epoch": 1.6863059881200835, "grad_norm": 1.0061284303665161, "learning_rate": 0.00010868079998954364, "loss": 0.5899, "step": 5252 }, { "epoch": 1.686627066944935, "grad_norm": 0.7541671395301819, "learning_rate": 0.0001086460246465923, "loss": 0.5005, "step": 5253 }, { "epoch": 1.6869481457697866, "grad_norm": 0.8530805706977844, "learning_rate": 0.00010861124825014908, "loss": 0.5079, "step": 5254 }, { "epoch": 1.6872692245946381, "grad_norm": 0.8451884984970093, "learning_rate": 0.00010857647080445139, "loss": 0.5617, "step": 5255 }, { "epoch": 1.6875903034194895, "grad_norm": 1.1053026914596558, "learning_rate": 0.00010854169231373676, "loss": 0.6804, "step": 5256 }, { "epoch": 1.687911382244341, "grad_norm": 0.7350571751594543, "learning_rate": 0.00010850691278224281, "loss": 0.5118, "step": 5257 }, { "epoch": 1.6882324610691923, "grad_norm": 0.7682427167892456, "learning_rate": 0.00010847213221420736, "loss": 0.4746, "step": 5258 }, { "epoch": 1.6885535398940439, "grad_norm": 1.612838625907898, "learning_rate": 0.00010843735061386828, "loss": 0.6223, "step": 5259 }, { "epoch": 1.6888746187188954, "grad_norm": 0.8637731671333313, "learning_rate": 0.00010840256798546364, "loss": 0.4417, "step": 5260 }, { "epoch": 1.689195697543747, "grad_norm": 1.2856574058532715, "learning_rate": 0.00010836778433323158, "loss": 0.6902, "step": 5261 }, { "epoch": 1.6895167763685985, "grad_norm": 1.089842438697815, "learning_rate": 0.00010833299966141035, "loss": 0.5839, "step": 5262 }, { "epoch": 1.68983785519345, "grad_norm": 0.7094756364822388, "learning_rate": 0.0001082982139742384, "loss": 0.418, "step": 5263 }, { "epoch": 1.6901589340183016, "grad_norm": 0.7684889435768127, "learning_rate": 0.00010826342727595426, "loss": 0.3724, "step": 5264 }, { "epoch": 1.690480012843153, "grad_norm": 1.0270161628723145, "learning_rate": 0.00010822863957079656, "loss": 1.0122, "step": 5265 }, { "epoch": 1.6908010916680045, "grad_norm": 0.5250154137611389, "learning_rate": 0.0001081938508630041, "loss": 0.8635, "step": 5266 }, { "epoch": 1.6911221704928558, "grad_norm": 0.612006664276123, "learning_rate": 0.00010815906115681578, "loss": 0.8014, "step": 5267 }, { "epoch": 1.6914432493177074, "grad_norm": 0.5928871631622314, "learning_rate": 0.00010812427045647058, "loss": 0.4363, "step": 5268 }, { "epoch": 1.691764328142559, "grad_norm": 0.7500852346420288, "learning_rate": 0.00010808947876620767, "loss": 0.4729, "step": 5269 }, { "epoch": 1.6920854069674105, "grad_norm": 0.7043389081954956, "learning_rate": 0.00010805468609026632, "loss": 0.225, "step": 5270 }, { "epoch": 1.692406485792262, "grad_norm": 1.1725765466690063, "learning_rate": 0.00010801989243288589, "loss": 0.2793, "step": 5271 }, { "epoch": 1.6927275646171136, "grad_norm": 0.7991273403167725, "learning_rate": 0.0001079850977983059, "loss": 0.4612, "step": 5272 }, { "epoch": 1.6930486434419652, "grad_norm": 0.8243421316146851, "learning_rate": 0.00010795030219076599, "loss": 0.5367, "step": 5273 }, { "epoch": 1.6933697222668165, "grad_norm": 0.7552120685577393, "learning_rate": 0.00010791550561450586, "loss": 0.5766, "step": 5274 }, { "epoch": 1.693690801091668, "grad_norm": 0.7614461779594421, "learning_rate": 0.00010788070807376536, "loss": 0.5462, "step": 5275 }, { "epoch": 1.6940118799165194, "grad_norm": 1.0038031339645386, "learning_rate": 0.0001078459095727845, "loss": 0.6166, "step": 5276 }, { "epoch": 1.694332958741371, "grad_norm": 0.7951825857162476, "learning_rate": 0.00010781111011580336, "loss": 0.5819, "step": 5277 }, { "epoch": 1.6946540375662225, "grad_norm": 0.9689728021621704, "learning_rate": 0.00010777630970706217, "loss": 0.5629, "step": 5278 }, { "epoch": 1.694975116391074, "grad_norm": 1.189405918121338, "learning_rate": 0.00010774150835080119, "loss": 0.4832, "step": 5279 }, { "epoch": 1.6952961952159256, "grad_norm": 0.8709222674369812, "learning_rate": 0.00010770670605126092, "loss": 0.5692, "step": 5280 }, { "epoch": 1.6956172740407771, "grad_norm": 0.9132195115089417, "learning_rate": 0.00010767190281268187, "loss": 0.4311, "step": 5281 }, { "epoch": 1.6959383528656287, "grad_norm": 0.7161700129508972, "learning_rate": 0.00010763709863930476, "loss": 0.4397, "step": 5282 }, { "epoch": 1.69625943169048, "grad_norm": 0.9493613839149475, "learning_rate": 0.00010760229353537033, "loss": 0.6239, "step": 5283 }, { "epoch": 1.6965805105153315, "grad_norm": 1.3455016613006592, "learning_rate": 0.00010756748750511953, "loss": 0.7217, "step": 5284 }, { "epoch": 1.6969015893401829, "grad_norm": 0.9821035265922546, "learning_rate": 0.00010753268055279329, "loss": 0.5342, "step": 5285 }, { "epoch": 1.6972226681650344, "grad_norm": 0.8994725346565247, "learning_rate": 0.00010749787268263279, "loss": 0.6187, "step": 5286 }, { "epoch": 1.697543746989886, "grad_norm": 1.6420204639434814, "learning_rate": 0.00010746306389887924, "loss": 0.7116, "step": 5287 }, { "epoch": 1.6978648258147375, "grad_norm": 1.0258111953735352, "learning_rate": 0.000107428254205774, "loss": 0.6124, "step": 5288 }, { "epoch": 1.698185904639589, "grad_norm": 1.5440269708633423, "learning_rate": 0.00010739344360755852, "loss": 0.6184, "step": 5289 }, { "epoch": 1.6985069834644406, "grad_norm": 0.8540660738945007, "learning_rate": 0.00010735863210847433, "loss": 0.496, "step": 5290 }, { "epoch": 1.6988280622892922, "grad_norm": 0.9297248721122742, "learning_rate": 0.00010732381971276318, "loss": 0.5762, "step": 5291 }, { "epoch": 1.6991491411141435, "grad_norm": 0.9052714705467224, "learning_rate": 0.0001072890064246668, "loss": 0.5208, "step": 5292 }, { "epoch": 1.699470219938995, "grad_norm": 0.836807370185852, "learning_rate": 0.0001072541922484271, "loss": 0.4136, "step": 5293 }, { "epoch": 1.6997912987638464, "grad_norm": 1.4985709190368652, "learning_rate": 0.0001072193771882861, "loss": 0.9237, "step": 5294 }, { "epoch": 1.700112377588698, "grad_norm": 1.0867552757263184, "learning_rate": 0.00010718456124848583, "loss": 0.5195, "step": 5295 }, { "epoch": 1.7004334564135495, "grad_norm": 1.109985589981079, "learning_rate": 0.0001071497444332686, "loss": 0.7572, "step": 5296 }, { "epoch": 1.700754535238401, "grad_norm": 1.2597237825393677, "learning_rate": 0.00010711492674687671, "loss": 0.603, "step": 5297 }, { "epoch": 1.7010756140632526, "grad_norm": 1.0145076513290405, "learning_rate": 0.00010708010819355256, "loss": 0.5953, "step": 5298 }, { "epoch": 1.7013966928881041, "grad_norm": 1.2275587320327759, "learning_rate": 0.0001070452887775387, "loss": 0.6326, "step": 5299 }, { "epoch": 1.7017177717129557, "grad_norm": 0.6137974858283997, "learning_rate": 0.00010701046850307777, "loss": 0.3397, "step": 5300 }, { "epoch": 1.702038850537807, "grad_norm": 0.7091952562332153, "learning_rate": 0.00010697564737441252, "loss": 0.3673, "step": 5301 }, { "epoch": 1.7023599293626586, "grad_norm": 0.9978876113891602, "learning_rate": 0.00010694082539578585, "loss": 0.5951, "step": 5302 }, { "epoch": 1.70268100818751, "grad_norm": 1.3031730651855469, "learning_rate": 0.00010690600257144061, "loss": 0.72, "step": 5303 }, { "epoch": 1.7030020870123614, "grad_norm": 1.1480228900909424, "learning_rate": 0.00010687117890561988, "loss": 0.6365, "step": 5304 }, { "epoch": 1.703323165837213, "grad_norm": 0.8998190760612488, "learning_rate": 0.00010683635440256687, "loss": 0.4303, "step": 5305 }, { "epoch": 1.7036442446620645, "grad_norm": 0.8919349312782288, "learning_rate": 0.00010680152906652483, "loss": 0.5207, "step": 5306 }, { "epoch": 1.703965323486916, "grad_norm": 1.5924605131149292, "learning_rate": 0.00010676670290173709, "loss": 0.96, "step": 5307 }, { "epoch": 1.7042864023117676, "grad_norm": 1.0313186645507812, "learning_rate": 0.00010673187591244714, "loss": 0.5008, "step": 5308 }, { "epoch": 1.7046074811366192, "grad_norm": 0.8978110551834106, "learning_rate": 0.00010669704810289851, "loss": 0.5782, "step": 5309 }, { "epoch": 1.7049285599614705, "grad_norm": 0.9183505177497864, "learning_rate": 0.00010666221947733486, "loss": 0.4536, "step": 5310 }, { "epoch": 1.705249638786322, "grad_norm": 1.1860748529434204, "learning_rate": 0.00010662739004000005, "loss": 0.5646, "step": 5311 }, { "epoch": 1.7055707176111734, "grad_norm": 0.6028177738189697, "learning_rate": 0.0001065925597951378, "loss": 0.3459, "step": 5312 }, { "epoch": 1.705891796436025, "grad_norm": 0.9374269843101501, "learning_rate": 0.00010655772874699217, "loss": 0.4143, "step": 5313 }, { "epoch": 1.7062128752608765, "grad_norm": 0.8540414571762085, "learning_rate": 0.00010652289689980714, "loss": 0.4658, "step": 5314 }, { "epoch": 1.706533954085728, "grad_norm": 0.7660170197486877, "learning_rate": 0.00010648806425782695, "loss": 0.687, "step": 5315 }, { "epoch": 1.7068550329105796, "grad_norm": 0.5973314642906189, "learning_rate": 0.00010645323082529581, "loss": 0.7844, "step": 5316 }, { "epoch": 1.7071761117354312, "grad_norm": 0.6430346965789795, "learning_rate": 0.00010641839660645805, "loss": 0.6883, "step": 5317 }, { "epoch": 1.7074971905602827, "grad_norm": 0.7452960014343262, "learning_rate": 0.00010638356160555816, "loss": 0.5505, "step": 5318 }, { "epoch": 1.707818269385134, "grad_norm": 0.7918518781661987, "learning_rate": 0.00010634872582684061, "loss": 0.5305, "step": 5319 }, { "epoch": 1.7081393482099856, "grad_norm": 0.7591331005096436, "learning_rate": 0.00010631388927455013, "loss": 0.2967, "step": 5320 }, { "epoch": 1.708460427034837, "grad_norm": 0.6316062211990356, "learning_rate": 0.00010627905195293135, "loss": 0.3833, "step": 5321 }, { "epoch": 1.7087815058596885, "grad_norm": 0.77117919921875, "learning_rate": 0.00010624421386622916, "loss": 0.5453, "step": 5322 }, { "epoch": 1.70910258468454, "grad_norm": 0.8424499034881592, "learning_rate": 0.00010620937501868841, "loss": 0.684, "step": 5323 }, { "epoch": 1.7094236635093916, "grad_norm": 0.6682990789413452, "learning_rate": 0.0001061745354145542, "loss": 0.4461, "step": 5324 }, { "epoch": 1.7097447423342431, "grad_norm": 0.8375151753425598, "learning_rate": 0.00010613969505807156, "loss": 0.773, "step": 5325 }, { "epoch": 1.7100658211590947, "grad_norm": 0.800901472568512, "learning_rate": 0.00010610485395348571, "loss": 0.6591, "step": 5326 }, { "epoch": 1.7103868999839462, "grad_norm": 0.9238450527191162, "learning_rate": 0.00010607001210504191, "loss": 0.5129, "step": 5327 }, { "epoch": 1.7107079788087975, "grad_norm": 0.8702148199081421, "learning_rate": 0.00010603516951698556, "loss": 0.5476, "step": 5328 }, { "epoch": 1.711029057633649, "grad_norm": 0.7507149577140808, "learning_rate": 0.00010600032619356209, "loss": 0.6374, "step": 5329 }, { "epoch": 1.7113501364585004, "grad_norm": 0.9397144317626953, "learning_rate": 0.00010596548213901708, "loss": 0.579, "step": 5330 }, { "epoch": 1.711671215283352, "grad_norm": 0.8215104937553406, "learning_rate": 0.00010593063735759618, "loss": 0.726, "step": 5331 }, { "epoch": 1.7119922941082035, "grad_norm": 1.175023078918457, "learning_rate": 0.0001058957918535451, "loss": 0.6882, "step": 5332 }, { "epoch": 1.712313372933055, "grad_norm": 0.8529446125030518, "learning_rate": 0.00010586094563110964, "loss": 0.6363, "step": 5333 }, { "epoch": 1.7126344517579066, "grad_norm": 1.1046276092529297, "learning_rate": 0.00010582609869453577, "loss": 0.6866, "step": 5334 }, { "epoch": 1.7129555305827582, "grad_norm": 0.8943831920623779, "learning_rate": 0.00010579125104806944, "loss": 0.6993, "step": 5335 }, { "epoch": 1.7132766094076097, "grad_norm": 0.9862803220748901, "learning_rate": 0.00010575640269595675, "loss": 0.6898, "step": 5336 }, { "epoch": 1.713597688232461, "grad_norm": 0.8223783373832703, "learning_rate": 0.00010572155364244382, "loss": 0.6043, "step": 5337 }, { "epoch": 1.7139187670573126, "grad_norm": 0.6628785729408264, "learning_rate": 0.00010568670389177696, "loss": 0.3829, "step": 5338 }, { "epoch": 1.714239845882164, "grad_norm": 1.1625560522079468, "learning_rate": 0.00010565185344820247, "loss": 0.6914, "step": 5339 }, { "epoch": 1.7145609247070155, "grad_norm": 1.0547271966934204, "learning_rate": 0.00010561700231596678, "loss": 0.7001, "step": 5340 }, { "epoch": 1.714882003531867, "grad_norm": 0.8884708881378174, "learning_rate": 0.00010558215049931638, "loss": 0.6464, "step": 5341 }, { "epoch": 1.7152030823567186, "grad_norm": 0.7549629807472229, "learning_rate": 0.00010554729800249792, "loss": 0.4945, "step": 5342 }, { "epoch": 1.7155241611815701, "grad_norm": 0.7881496548652649, "learning_rate": 0.00010551244482975798, "loss": 0.6036, "step": 5343 }, { "epoch": 1.7158452400064217, "grad_norm": 0.919924259185791, "learning_rate": 0.00010547759098534335, "loss": 0.5523, "step": 5344 }, { "epoch": 1.7161663188312732, "grad_norm": 1.0688406229019165, "learning_rate": 0.00010544273647350092, "loss": 0.4713, "step": 5345 }, { "epoch": 1.7164873976561246, "grad_norm": 1.295059323310852, "learning_rate": 0.00010540788129847756, "loss": 0.6204, "step": 5346 }, { "epoch": 1.7168084764809761, "grad_norm": 1.1029874086380005, "learning_rate": 0.00010537302546452022, "loss": 0.7223, "step": 5347 }, { "epoch": 1.7171295553058274, "grad_norm": 0.7325824499130249, "learning_rate": 0.00010533816897587606, "loss": 0.3234, "step": 5348 }, { "epoch": 1.717450634130679, "grad_norm": 0.9354049563407898, "learning_rate": 0.00010530331183679218, "loss": 0.5902, "step": 5349 }, { "epoch": 1.7177717129555305, "grad_norm": 0.8697565793991089, "learning_rate": 0.00010526845405151586, "loss": 0.4462, "step": 5350 }, { "epoch": 1.718092791780382, "grad_norm": 0.930233895778656, "learning_rate": 0.0001052335956242944, "loss": 0.6314, "step": 5351 }, { "epoch": 1.7184138706052337, "grad_norm": 0.7648993730545044, "learning_rate": 0.00010519873655937516, "loss": 0.471, "step": 5352 }, { "epoch": 1.7187349494300852, "grad_norm": 0.9538902640342712, "learning_rate": 0.00010516387686100566, "loss": 0.5847, "step": 5353 }, { "epoch": 1.7190560282549368, "grad_norm": 0.749451220035553, "learning_rate": 0.00010512901653343344, "loss": 0.4562, "step": 5354 }, { "epoch": 1.719377107079788, "grad_norm": 0.8436141610145569, "learning_rate": 0.00010509415558090609, "loss": 0.5758, "step": 5355 }, { "epoch": 1.7196981859046396, "grad_norm": 1.4007991552352905, "learning_rate": 0.00010505929400767134, "loss": 0.6263, "step": 5356 }, { "epoch": 1.720019264729491, "grad_norm": 0.9559081196784973, "learning_rate": 0.00010502443181797697, "loss": 0.5488, "step": 5357 }, { "epoch": 1.7203403435543425, "grad_norm": 0.9755504131317139, "learning_rate": 0.00010498956901607083, "loss": 0.6127, "step": 5358 }, { "epoch": 1.720661422379194, "grad_norm": 0.9046206474304199, "learning_rate": 0.00010495470560620083, "loss": 0.4139, "step": 5359 }, { "epoch": 1.7209825012040456, "grad_norm": 0.9097113609313965, "learning_rate": 0.00010491984159261496, "loss": 0.5977, "step": 5360 }, { "epoch": 1.7213035800288972, "grad_norm": 0.5500661730766296, "learning_rate": 0.00010488497697956135, "loss": 0.3321, "step": 5361 }, { "epoch": 1.7216246588537487, "grad_norm": 0.9392353296279907, "learning_rate": 0.00010485011177128807, "loss": 0.4554, "step": 5362 }, { "epoch": 1.7219457376786, "grad_norm": 0.7440015077590942, "learning_rate": 0.00010481524597204342, "loss": 0.419, "step": 5363 }, { "epoch": 1.7222668165034516, "grad_norm": 0.6105871796607971, "learning_rate": 0.00010478037958607568, "loss": 0.4671, "step": 5364 }, { "epoch": 1.7225878953283031, "grad_norm": 0.8352953195571899, "learning_rate": 0.00010474551261763314, "loss": 0.6749, "step": 5365 }, { "epoch": 1.7229089741531545, "grad_norm": 0.5296282768249512, "learning_rate": 0.00010471064507096426, "loss": 0.7636, "step": 5366 }, { "epoch": 1.723230052978006, "grad_norm": 0.717930793762207, "learning_rate": 0.00010467577695031762, "loss": 0.3502, "step": 5367 }, { "epoch": 1.7235511318028576, "grad_norm": 0.6417704820632935, "learning_rate": 0.00010464090825994173, "loss": 0.2754, "step": 5368 }, { "epoch": 1.7238722106277091, "grad_norm": 0.6707653999328613, "learning_rate": 0.00010460603900408523, "loss": 0.1641, "step": 5369 }, { "epoch": 1.7241932894525607, "grad_norm": 2.9203500747680664, "learning_rate": 0.00010457116918699688, "loss": 0.4299, "step": 5370 }, { "epoch": 1.7245143682774122, "grad_norm": 0.8975229859352112, "learning_rate": 0.00010453629881292538, "loss": 0.689, "step": 5371 }, { "epoch": 1.7248354471022636, "grad_norm": 0.8682465553283691, "learning_rate": 0.00010450142788611965, "loss": 0.6525, "step": 5372 }, { "epoch": 1.725156525927115, "grad_norm": 1.0204635858535767, "learning_rate": 0.00010446655641082862, "loss": 0.6479, "step": 5373 }, { "epoch": 1.7254776047519667, "grad_norm": 0.8298991918563843, "learning_rate": 0.00010443168439130122, "loss": 0.4763, "step": 5374 }, { "epoch": 1.725798683576818, "grad_norm": 0.9152095913887024, "learning_rate": 0.0001043968118317865, "loss": 0.6022, "step": 5375 }, { "epoch": 1.7261197624016695, "grad_norm": 0.5959125757217407, "learning_rate": 0.00010436193873653361, "loss": 0.414, "step": 5376 }, { "epoch": 1.726440841226521, "grad_norm": 1.1426783800125122, "learning_rate": 0.00010432706510979171, "loss": 0.6795, "step": 5377 }, { "epoch": 1.7267619200513726, "grad_norm": 0.8990039825439453, "learning_rate": 0.00010429219095581007, "loss": 0.6957, "step": 5378 }, { "epoch": 1.7270829988762242, "grad_norm": 0.9482988119125366, "learning_rate": 0.00010425731627883797, "loss": 0.575, "step": 5379 }, { "epoch": 1.7274040777010757, "grad_norm": 0.8645418286323547, "learning_rate": 0.0001042224410831248, "loss": 0.5164, "step": 5380 }, { "epoch": 1.727725156525927, "grad_norm": 0.8730982542037964, "learning_rate": 0.00010418756537291996, "loss": 0.5756, "step": 5381 }, { "epoch": 1.7280462353507786, "grad_norm": 0.8636887669563293, "learning_rate": 0.00010415268915247303, "loss": 0.6095, "step": 5382 }, { "epoch": 1.7283673141756302, "grad_norm": 1.0389565229415894, "learning_rate": 0.00010411781242603352, "loss": 0.6065, "step": 5383 }, { "epoch": 1.7286883930004815, "grad_norm": 1.2318190336227417, "learning_rate": 0.00010408293519785101, "loss": 0.6049, "step": 5384 }, { "epoch": 1.729009471825333, "grad_norm": 1.0228158235549927, "learning_rate": 0.00010404805747217526, "loss": 0.7854, "step": 5385 }, { "epoch": 1.7293305506501846, "grad_norm": 0.8208790421485901, "learning_rate": 0.00010401317925325598, "loss": 0.5619, "step": 5386 }, { "epoch": 1.7296516294750361, "grad_norm": 0.8615589737892151, "learning_rate": 0.000103978300545343, "loss": 0.5876, "step": 5387 }, { "epoch": 1.7299727082998877, "grad_norm": 0.7015345096588135, "learning_rate": 0.00010394342135268613, "loss": 0.4422, "step": 5388 }, { "epoch": 1.7302937871247392, "grad_norm": 0.7104688286781311, "learning_rate": 0.00010390854167953537, "loss": 0.5317, "step": 5389 }, { "epoch": 1.7306148659495906, "grad_norm": 1.4996254444122314, "learning_rate": 0.00010387366153014062, "loss": 0.6955, "step": 5390 }, { "epoch": 1.7309359447744421, "grad_norm": 1.07838773727417, "learning_rate": 0.00010383878090875201, "loss": 0.4943, "step": 5391 }, { "epoch": 1.7312570235992937, "grad_norm": 1.10397207736969, "learning_rate": 0.00010380389981961958, "loss": 0.8043, "step": 5392 }, { "epoch": 1.731578102424145, "grad_norm": 1.2021815776824951, "learning_rate": 0.00010376901826699348, "loss": 0.5963, "step": 5393 }, { "epoch": 1.7318991812489966, "grad_norm": 0.8675143122673035, "learning_rate": 0.00010373413625512394, "loss": 0.5774, "step": 5394 }, { "epoch": 1.732220260073848, "grad_norm": 1.3191437721252441, "learning_rate": 0.0001036992537882612, "loss": 0.7901, "step": 5395 }, { "epoch": 1.7325413388986997, "grad_norm": 0.9630816578865051, "learning_rate": 0.00010366437087065564, "loss": 0.6245, "step": 5396 }, { "epoch": 1.7328624177235512, "grad_norm": 0.8633376359939575, "learning_rate": 0.00010362948750655759, "loss": 0.6033, "step": 5397 }, { "epoch": 1.7331834965484028, "grad_norm": 1.0866016149520874, "learning_rate": 0.0001035946037002175, "loss": 0.6458, "step": 5398 }, { "epoch": 1.733504575373254, "grad_norm": 1.0186740159988403, "learning_rate": 0.00010355971945588585, "loss": 0.6645, "step": 5399 }, { "epoch": 1.7338256541981056, "grad_norm": 1.092057228088379, "learning_rate": 0.0001035248347778132, "loss": 0.5861, "step": 5400 }, { "epoch": 1.7341467330229572, "grad_norm": 1.0183131694793701, "learning_rate": 0.00010348994967025012, "loss": 0.518, "step": 5401 }, { "epoch": 1.7344678118478085, "grad_norm": 0.9326373338699341, "learning_rate": 0.00010345506413744726, "loss": 0.5674, "step": 5402 }, { "epoch": 1.73478889067266, "grad_norm": 0.947853684425354, "learning_rate": 0.0001034201781836553, "loss": 0.5808, "step": 5403 }, { "epoch": 1.7351099694975116, "grad_norm": 0.948047935962677, "learning_rate": 0.00010338529181312497, "loss": 0.4687, "step": 5404 }, { "epoch": 1.7354310483223632, "grad_norm": 1.0557464361190796, "learning_rate": 0.00010335040503010716, "loss": 0.5766, "step": 5405 }, { "epoch": 1.7357521271472147, "grad_norm": 1.128117322921753, "learning_rate": 0.00010331551783885263, "loss": 0.5139, "step": 5406 }, { "epoch": 1.7360732059720663, "grad_norm": 0.9319320917129517, "learning_rate": 0.00010328063024361232, "loss": 0.5615, "step": 5407 }, { "epoch": 1.7363942847969176, "grad_norm": 1.243923544883728, "learning_rate": 0.00010324574224863717, "loss": 0.6024, "step": 5408 }, { "epoch": 1.7367153636217691, "grad_norm": 1.1786973476409912, "learning_rate": 0.00010321085385817817, "loss": 0.5292, "step": 5409 }, { "epoch": 1.7370364424466207, "grad_norm": 1.0594767332077026, "learning_rate": 0.00010317596507648636, "loss": 0.5848, "step": 5410 }, { "epoch": 1.737357521271472, "grad_norm": 0.8402320742607117, "learning_rate": 0.00010314107590781284, "loss": 0.4564, "step": 5411 }, { "epoch": 1.7376786000963236, "grad_norm": 1.0813262462615967, "learning_rate": 0.00010310618635640876, "loss": 0.465, "step": 5412 }, { "epoch": 1.7379996789211751, "grad_norm": 0.6268415451049805, "learning_rate": 0.00010307129642652528, "loss": 0.4441, "step": 5413 }, { "epoch": 1.7383207577460267, "grad_norm": 0.6250267028808594, "learning_rate": 0.00010303640612241363, "loss": 0.3773, "step": 5414 }, { "epoch": 1.7386418365708782, "grad_norm": 0.6857768893241882, "learning_rate": 0.00010300151544832512, "loss": 0.6693, "step": 5415 }, { "epoch": 1.7389629153957298, "grad_norm": 0.5000283122062683, "learning_rate": 0.00010296662440851108, "loss": 0.6734, "step": 5416 }, { "epoch": 1.739283994220581, "grad_norm": 0.5204218626022339, "learning_rate": 0.00010293173300722285, "loss": 0.6129, "step": 5417 }, { "epoch": 1.7396050730454327, "grad_norm": 0.6385371685028076, "learning_rate": 0.00010289684124871182, "loss": 0.6554, "step": 5418 }, { "epoch": 1.7399261518702842, "grad_norm": 0.5886783599853516, "learning_rate": 0.00010286194913722948, "loss": 0.1828, "step": 5419 }, { "epoch": 1.7402472306951355, "grad_norm": 0.9899042844772339, "learning_rate": 0.00010282705667702734, "loss": 0.3291, "step": 5420 }, { "epoch": 1.740568309519987, "grad_norm": 0.702520489692688, "learning_rate": 0.0001027921638723569, "loss": 0.3839, "step": 5421 }, { "epoch": 1.7408893883448386, "grad_norm": 0.5995388627052307, "learning_rate": 0.00010275727072746977, "loss": 0.2942, "step": 5422 }, { "epoch": 1.7412104671696902, "grad_norm": 1.077636957168579, "learning_rate": 0.00010272237724661753, "loss": 0.7508, "step": 5423 }, { "epoch": 1.7415315459945417, "grad_norm": 0.8297303915023804, "learning_rate": 0.00010268748343405192, "loss": 0.6525, "step": 5424 }, { "epoch": 1.7418526248193933, "grad_norm": 0.8193151950836182, "learning_rate": 0.0001026525892940246, "loss": 0.6898, "step": 5425 }, { "epoch": 1.7421737036442446, "grad_norm": 1.1228710412979126, "learning_rate": 0.00010261769483078733, "loss": 0.7282, "step": 5426 }, { "epoch": 1.7424947824690962, "grad_norm": 0.6626234650611877, "learning_rate": 0.00010258280004859188, "loss": 0.4688, "step": 5427 }, { "epoch": 1.7428158612939475, "grad_norm": 1.0781296491622925, "learning_rate": 0.00010254790495169006, "loss": 0.5331, "step": 5428 }, { "epoch": 1.743136940118799, "grad_norm": 0.7310062646865845, "learning_rate": 0.00010251300954433376, "loss": 0.5715, "step": 5429 }, { "epoch": 1.7434580189436506, "grad_norm": 0.6906360387802124, "learning_rate": 0.00010247811383077487, "loss": 0.474, "step": 5430 }, { "epoch": 1.7437790977685022, "grad_norm": 0.8053506016731262, "learning_rate": 0.00010244321781526533, "loss": 0.5084, "step": 5431 }, { "epoch": 1.7441001765933537, "grad_norm": 0.930061399936676, "learning_rate": 0.0001024083215020571, "loss": 0.6584, "step": 5432 }, { "epoch": 1.7444212554182053, "grad_norm": 1.089030146598816, "learning_rate": 0.0001023734248954022, "loss": 0.6163, "step": 5433 }, { "epoch": 1.7447423342430568, "grad_norm": 0.9254440665245056, "learning_rate": 0.00010233852799955268, "loss": 0.6622, "step": 5434 }, { "epoch": 1.7450634130679081, "grad_norm": 0.9075636267662048, "learning_rate": 0.00010230363081876064, "loss": 0.6482, "step": 5435 }, { "epoch": 1.7453844918927597, "grad_norm": 1.084458589553833, "learning_rate": 0.00010226873335727816, "loss": 0.6901, "step": 5436 }, { "epoch": 1.745705570717611, "grad_norm": 1.6565884351730347, "learning_rate": 0.00010223383561935738, "loss": 0.597, "step": 5437 }, { "epoch": 1.7460266495424626, "grad_norm": 0.997448742389679, "learning_rate": 0.00010219893760925052, "loss": 0.6115, "step": 5438 }, { "epoch": 1.7463477283673141, "grad_norm": 0.9751862287521362, "learning_rate": 0.00010216403933120979, "loss": 0.5718, "step": 5439 }, { "epoch": 1.7466688071921657, "grad_norm": 0.9030267596244812, "learning_rate": 0.0001021291407894874, "loss": 0.6273, "step": 5440 }, { "epoch": 1.7469898860170172, "grad_norm": 1.0032966136932373, "learning_rate": 0.0001020942419883357, "loss": 0.6734, "step": 5441 }, { "epoch": 1.7473109648418688, "grad_norm": 0.7883462309837341, "learning_rate": 0.00010205934293200696, "loss": 0.4985, "step": 5442 }, { "epoch": 1.7476320436667203, "grad_norm": 1.4333808422088623, "learning_rate": 0.00010202444362475352, "loss": 0.5427, "step": 5443 }, { "epoch": 1.7479531224915716, "grad_norm": 0.9737802147865295, "learning_rate": 0.0001019895440708278, "loss": 0.6445, "step": 5444 }, { "epoch": 1.7482742013164232, "grad_norm": 0.8467430472373962, "learning_rate": 0.00010195464427448213, "loss": 0.5475, "step": 5445 }, { "epoch": 1.7485952801412745, "grad_norm": 0.855104923248291, "learning_rate": 0.000101919744239969, "loss": 0.5532, "step": 5446 }, { "epoch": 1.748916358966126, "grad_norm": 1.96385657787323, "learning_rate": 0.00010188484397154084, "loss": 0.6679, "step": 5447 }, { "epoch": 1.7492374377909776, "grad_norm": 0.9854167699813843, "learning_rate": 0.00010184994347345016, "loss": 0.5125, "step": 5448 }, { "epoch": 1.7495585166158292, "grad_norm": 1.1345828771591187, "learning_rate": 0.00010181504274994948, "loss": 0.577, "step": 5449 }, { "epoch": 1.7498795954406807, "grad_norm": 0.941738486289978, "learning_rate": 0.00010178014180529136, "loss": 0.4594, "step": 5450 }, { "epoch": 1.7502006742655323, "grad_norm": 0.9602155685424805, "learning_rate": 0.00010174524064372837, "loss": 0.7691, "step": 5451 }, { "epoch": 1.7505217530903838, "grad_norm": 1.012502908706665, "learning_rate": 0.00010171033926951305, "loss": 0.6743, "step": 5452 }, { "epoch": 1.7508428319152352, "grad_norm": 1.0123480558395386, "learning_rate": 0.00010167543768689815, "loss": 0.5418, "step": 5453 }, { "epoch": 1.7511639107400867, "grad_norm": 0.9171655774116516, "learning_rate": 0.00010164053590013622, "loss": 0.4985, "step": 5454 }, { "epoch": 1.751484989564938, "grad_norm": 0.9921781420707703, "learning_rate": 0.00010160563391347996, "loss": 0.6584, "step": 5455 }, { "epoch": 1.7518060683897896, "grad_norm": 1.2568939924240112, "learning_rate": 0.00010157073173118208, "loss": 0.7265, "step": 5456 }, { "epoch": 1.7521271472146411, "grad_norm": 1.0861806869506836, "learning_rate": 0.0001015358293574953, "loss": 0.6727, "step": 5457 }, { "epoch": 1.7524482260394927, "grad_norm": 0.9609785676002502, "learning_rate": 0.00010150092679667238, "loss": 0.52, "step": 5458 }, { "epoch": 1.7527693048643442, "grad_norm": 1.046840786933899, "learning_rate": 0.00010146602405296607, "loss": 0.6536, "step": 5459 }, { "epoch": 1.7530903836891958, "grad_norm": 1.095132827758789, "learning_rate": 0.00010143112113062918, "loss": 0.467, "step": 5460 }, { "epoch": 1.7534114625140473, "grad_norm": 0.7272664904594421, "learning_rate": 0.00010139621803391455, "loss": 0.4159, "step": 5461 }, { "epoch": 1.7537325413388987, "grad_norm": 0.7782832980155945, "learning_rate": 0.00010136131476707496, "loss": 0.5137, "step": 5462 }, { "epoch": 1.7540536201637502, "grad_norm": 1.0231236219406128, "learning_rate": 0.0001013264113343633, "loss": 0.5287, "step": 5463 }, { "epoch": 1.7543746989886015, "grad_norm": 0.7744680643081665, "learning_rate": 0.00010129150774003245, "loss": 0.4127, "step": 5464 }, { "epoch": 1.754695777813453, "grad_norm": 0.7057910561561584, "learning_rate": 0.00010125660398833528, "loss": 0.6325, "step": 5465 }, { "epoch": 1.7550168566383046, "grad_norm": 0.5882181525230408, "learning_rate": 0.00010122170008352472, "loss": 1.0208, "step": 5466 }, { "epoch": 1.7553379354631562, "grad_norm": 0.5369687080383301, "learning_rate": 0.00010118679602985373, "loss": 0.6156, "step": 5467 }, { "epoch": 1.7556590142880077, "grad_norm": 0.8291602730751038, "learning_rate": 0.00010115189183157523, "loss": 0.4136, "step": 5468 }, { "epoch": 1.7559800931128593, "grad_norm": 0.6269962787628174, "learning_rate": 0.00010111698749294223, "loss": 0.2929, "step": 5469 }, { "epoch": 1.7563011719377108, "grad_norm": 0.68831467628479, "learning_rate": 0.00010108208301820767, "loss": 0.2129, "step": 5470 }, { "epoch": 1.7566222507625622, "grad_norm": 0.7378066778182983, "learning_rate": 0.00010104717841162458, "loss": 0.1843, "step": 5471 }, { "epoch": 1.7569433295874137, "grad_norm": 0.9951292276382446, "learning_rate": 0.000101012273677446, "loss": 0.5378, "step": 5472 }, { "epoch": 1.757264408412265, "grad_norm": 0.9426655173301697, "learning_rate": 0.00010097736881992492, "loss": 0.6999, "step": 5473 }, { "epoch": 1.7575854872371166, "grad_norm": 0.7657575607299805, "learning_rate": 0.00010094246384331442, "loss": 0.5443, "step": 5474 }, { "epoch": 1.7579065660619682, "grad_norm": 1.0222439765930176, "learning_rate": 0.00010090755875186753, "loss": 0.8449, "step": 5475 }, { "epoch": 1.7582276448868197, "grad_norm": 0.7700451016426086, "learning_rate": 0.0001008726535498374, "loss": 0.729, "step": 5476 }, { "epoch": 1.7585487237116713, "grad_norm": 0.9244915246963501, "learning_rate": 0.00010083774824147708, "loss": 0.595, "step": 5477 }, { "epoch": 1.7588698025365228, "grad_norm": 0.7973812222480774, "learning_rate": 0.00010080284283103965, "loss": 0.6733, "step": 5478 }, { "epoch": 1.7591908813613744, "grad_norm": 0.7147784233093262, "learning_rate": 0.00010076793732277829, "loss": 0.5151, "step": 5479 }, { "epoch": 1.7595119601862257, "grad_norm": 0.7087793946266174, "learning_rate": 0.00010073303172094606, "loss": 0.4801, "step": 5480 }, { "epoch": 1.7598330390110772, "grad_norm": 0.9641126394271851, "learning_rate": 0.00010069812602979615, "loss": 0.5686, "step": 5481 }, { "epoch": 1.7601541178359286, "grad_norm": 0.755591630935669, "learning_rate": 0.00010066322025358173, "loss": 0.4766, "step": 5482 }, { "epoch": 1.7604751966607801, "grad_norm": 0.9040724635124207, "learning_rate": 0.00010062831439655591, "loss": 0.578, "step": 5483 }, { "epoch": 1.7607962754856317, "grad_norm": 0.9179210066795349, "learning_rate": 0.00010059340846297189, "loss": 0.7127, "step": 5484 }, { "epoch": 1.7611173543104832, "grad_norm": 0.9386594891548157, "learning_rate": 0.00010055850245708283, "loss": 0.6266, "step": 5485 }, { "epoch": 1.7614384331353348, "grad_norm": 0.9094860553741455, "learning_rate": 0.00010052359638314195, "loss": 0.5722, "step": 5486 }, { "epoch": 1.7617595119601863, "grad_norm": 1.1115118265151978, "learning_rate": 0.00010048869024540247, "loss": 0.6238, "step": 5487 }, { "epoch": 1.7620805907850379, "grad_norm": 0.83982914686203, "learning_rate": 0.00010045378404811756, "loss": 0.6233, "step": 5488 }, { "epoch": 1.7624016696098892, "grad_norm": 1.0576977729797363, "learning_rate": 0.0001004188777955404, "loss": 0.7304, "step": 5489 }, { "epoch": 1.7627227484347407, "grad_norm": 0.9903758764266968, "learning_rate": 0.00010038397149192426, "loss": 0.5852, "step": 5490 }, { "epoch": 1.763043827259592, "grad_norm": 1.2004951238632202, "learning_rate": 0.00010034906514152238, "loss": 0.6996, "step": 5491 }, { "epoch": 1.7633649060844436, "grad_norm": 1.251308798789978, "learning_rate": 0.00010031415874858797, "loss": 0.7741, "step": 5492 }, { "epoch": 1.7636859849092952, "grad_norm": 1.0517253875732422, "learning_rate": 0.00010027925231737428, "loss": 0.4877, "step": 5493 }, { "epoch": 1.7640070637341467, "grad_norm": 1.2016655206680298, "learning_rate": 0.00010024434585213451, "loss": 0.6398, "step": 5494 }, { "epoch": 1.7643281425589983, "grad_norm": 1.0655850172042847, "learning_rate": 0.00010020943935712192, "loss": 0.5722, "step": 5495 }, { "epoch": 1.7646492213838498, "grad_norm": 1.114829182624817, "learning_rate": 0.00010017453283658984, "loss": 0.5166, "step": 5496 }, { "epoch": 1.7649703002087014, "grad_norm": 0.8498656153678894, "learning_rate": 0.00010013962629479146, "loss": 0.5585, "step": 5497 }, { "epoch": 1.7652913790335527, "grad_norm": 0.9619441628456116, "learning_rate": 0.00010010471973598002, "loss": 0.6372, "step": 5498 }, { "epoch": 1.7656124578584043, "grad_norm": 0.7192568778991699, "learning_rate": 0.00010006981316440875, "loss": 0.4638, "step": 5499 }, { "epoch": 1.7659335366832556, "grad_norm": 0.8864771127700806, "learning_rate": 0.00010003490658433101, "loss": 0.6032, "step": 5500 }, { "epoch": 1.7662546155081071, "grad_norm": 1.0561635494232178, "learning_rate": 0.0001, "loss": 0.5758, "step": 5501 }, { "epoch": 1.7665756943329587, "grad_norm": 0.9847275018692017, "learning_rate": 9.996509341566904e-05, "loss": 0.5311, "step": 5502 }, { "epoch": 1.7668967731578102, "grad_norm": 0.7542006373405457, "learning_rate": 9.993018683559126e-05, "loss": 0.4759, "step": 5503 }, { "epoch": 1.7672178519826618, "grad_norm": 1.13614821434021, "learning_rate": 9.989528026402003e-05, "loss": 0.4959, "step": 5504 }, { "epoch": 1.7675389308075133, "grad_norm": 0.8526762127876282, "learning_rate": 9.986037370520857e-05, "loss": 0.5481, "step": 5505 }, { "epoch": 1.767860009632365, "grad_norm": 0.771710216999054, "learning_rate": 9.98254671634102e-05, "loss": 0.5153, "step": 5506 }, { "epoch": 1.7681810884572162, "grad_norm": 0.8657694458961487, "learning_rate": 9.979056064287806e-05, "loss": 0.5242, "step": 5507 }, { "epoch": 1.7685021672820678, "grad_norm": 0.9876973628997803, "learning_rate": 9.975565414786551e-05, "loss": 0.5711, "step": 5508 }, { "epoch": 1.768823246106919, "grad_norm": 0.7581015229225159, "learning_rate": 9.972074768262576e-05, "loss": 0.5151, "step": 5509 }, { "epoch": 1.7691443249317707, "grad_norm": 0.7137047052383423, "learning_rate": 9.968584125141204e-05, "loss": 0.3444, "step": 5510 }, { "epoch": 1.7694654037566222, "grad_norm": 0.4818606376647949, "learning_rate": 9.965093485847767e-05, "loss": 0.3025, "step": 5511 }, { "epoch": 1.7697864825814738, "grad_norm": 0.567258894443512, "learning_rate": 9.961602850807576e-05, "loss": 0.3447, "step": 5512 }, { "epoch": 1.7701075614063253, "grad_norm": 0.9824522137641907, "learning_rate": 9.958112220445963e-05, "loss": 0.3899, "step": 5513 }, { "epoch": 1.7704286402311769, "grad_norm": 0.9963672161102295, "learning_rate": 9.954621595188247e-05, "loss": 0.4292, "step": 5514 }, { "epoch": 1.7707497190560284, "grad_norm": 0.6725934147834778, "learning_rate": 9.951130975459757e-05, "loss": 0.8659, "step": 5515 }, { "epoch": 1.7710707978808797, "grad_norm": 0.6370208263397217, "learning_rate": 9.947640361685804e-05, "loss": 1.0013, "step": 5516 }, { "epoch": 1.7713918767057313, "grad_norm": 0.7263522744178772, "learning_rate": 9.94414975429172e-05, "loss": 0.1642, "step": 5517 }, { "epoch": 1.7717129555305826, "grad_norm": 0.8269543647766113, "learning_rate": 9.940659153702813e-05, "loss": 0.3893, "step": 5518 }, { "epoch": 1.7720340343554342, "grad_norm": 0.6357668042182922, "learning_rate": 9.937168560344412e-05, "loss": 0.1586, "step": 5519 }, { "epoch": 1.7723551131802857, "grad_norm": 0.5424615144729614, "learning_rate": 9.933677974641831e-05, "loss": 0.2211, "step": 5520 }, { "epoch": 1.7726761920051373, "grad_norm": 0.8307551741600037, "learning_rate": 9.930187397020386e-05, "loss": 0.5533, "step": 5521 }, { "epoch": 1.7729972708299888, "grad_norm": 0.8896917104721069, "learning_rate": 9.926696827905395e-05, "loss": 0.5725, "step": 5522 }, { "epoch": 1.7733183496548404, "grad_norm": 1.096503496170044, "learning_rate": 9.923206267722173e-05, "loss": 0.8626, "step": 5523 }, { "epoch": 1.773639428479692, "grad_norm": 0.8159449696540833, "learning_rate": 9.919715716896036e-05, "loss": 0.6556, "step": 5524 }, { "epoch": 1.7739605073045432, "grad_norm": 0.8842390775680542, "learning_rate": 9.916225175852293e-05, "loss": 0.6443, "step": 5525 }, { "epoch": 1.7742815861293948, "grad_norm": 0.7416340708732605, "learning_rate": 9.912734645016263e-05, "loss": 0.4824, "step": 5526 }, { "epoch": 1.7746026649542461, "grad_norm": 0.804015040397644, "learning_rate": 9.909244124813247e-05, "loss": 0.4857, "step": 5527 }, { "epoch": 1.7749237437790977, "grad_norm": 0.7028852105140686, "learning_rate": 9.90575361566856e-05, "loss": 0.4962, "step": 5528 }, { "epoch": 1.7752448226039492, "grad_norm": 0.8853591084480286, "learning_rate": 9.902263118007513e-05, "loss": 0.6105, "step": 5529 }, { "epoch": 1.7755659014288008, "grad_norm": 0.9714551568031311, "learning_rate": 9.898772632255403e-05, "loss": 0.6712, "step": 5530 }, { "epoch": 1.7758869802536523, "grad_norm": 0.8881363272666931, "learning_rate": 9.895282158837545e-05, "loss": 0.5934, "step": 5531 }, { "epoch": 1.7762080590785039, "grad_norm": 1.1298437118530273, "learning_rate": 9.891791698179235e-05, "loss": 0.6977, "step": 5532 }, { "epoch": 1.7765291379033554, "grad_norm": 0.7341294288635254, "learning_rate": 9.888301250705779e-05, "loss": 0.522, "step": 5533 }, { "epoch": 1.7768502167282068, "grad_norm": 0.9081159234046936, "learning_rate": 9.884810816842475e-05, "loss": 0.6074, "step": 5534 }, { "epoch": 1.7771712955530583, "grad_norm": 1.22751784324646, "learning_rate": 9.881320397014629e-05, "loss": 0.7584, "step": 5535 }, { "epoch": 1.7774923743779096, "grad_norm": 0.7882629632949829, "learning_rate": 9.877829991647528e-05, "loss": 0.4237, "step": 5536 }, { "epoch": 1.7778134532027612, "grad_norm": 0.7696181535720825, "learning_rate": 9.874339601166473e-05, "loss": 0.3827, "step": 5537 }, { "epoch": 1.7781345320276127, "grad_norm": 1.0486514568328857, "learning_rate": 9.87084922599676e-05, "loss": 0.4434, "step": 5538 }, { "epoch": 1.7784556108524643, "grad_norm": 0.6344658732414246, "learning_rate": 9.867358866563673e-05, "loss": 0.3858, "step": 5539 }, { "epoch": 1.7787766896773158, "grad_norm": 0.9014244079589844, "learning_rate": 9.863868523292509e-05, "loss": 0.6374, "step": 5540 }, { "epoch": 1.7790977685021674, "grad_norm": 0.9677129983901978, "learning_rate": 9.860378196608549e-05, "loss": 0.652, "step": 5541 }, { "epoch": 1.779418847327019, "grad_norm": 1.439113736152649, "learning_rate": 9.856887886937083e-05, "loss": 0.7972, "step": 5542 }, { "epoch": 1.7797399261518703, "grad_norm": 1.0146809816360474, "learning_rate": 9.853397594703394e-05, "loss": 0.5427, "step": 5543 }, { "epoch": 1.7800610049767218, "grad_norm": 1.0371732711791992, "learning_rate": 9.849907320332766e-05, "loss": 0.526, "step": 5544 }, { "epoch": 1.7803820838015731, "grad_norm": 0.9912870526313782, "learning_rate": 9.846417064250471e-05, "loss": 0.6929, "step": 5545 }, { "epoch": 1.7807031626264247, "grad_norm": 0.9019957780838013, "learning_rate": 9.842926826881796e-05, "loss": 0.5505, "step": 5546 }, { "epoch": 1.7810242414512762, "grad_norm": 0.9842864274978638, "learning_rate": 9.839436608652007e-05, "loss": 0.6031, "step": 5547 }, { "epoch": 1.7813453202761278, "grad_norm": 1.212700605392456, "learning_rate": 9.83594640998638e-05, "loss": 0.5913, "step": 5548 }, { "epoch": 1.7816663991009793, "grad_norm": 1.029821753501892, "learning_rate": 9.832456231310189e-05, "loss": 0.546, "step": 5549 }, { "epoch": 1.781987477925831, "grad_norm": 0.77622389793396, "learning_rate": 9.828966073048693e-05, "loss": 0.4759, "step": 5550 }, { "epoch": 1.7823085567506824, "grad_norm": 0.9894078373908997, "learning_rate": 9.825475935627165e-05, "loss": 0.6129, "step": 5551 }, { "epoch": 1.7826296355755338, "grad_norm": 0.7435919642448425, "learning_rate": 9.821985819470863e-05, "loss": 0.5363, "step": 5552 }, { "epoch": 1.7829507144003853, "grad_norm": 0.7949634790420532, "learning_rate": 9.818495725005054e-05, "loss": 0.492, "step": 5553 }, { "epoch": 1.7832717932252367, "grad_norm": 0.7472631931304932, "learning_rate": 9.815005652654985e-05, "loss": 0.4438, "step": 5554 }, { "epoch": 1.7835928720500882, "grad_norm": 1.0186270475387573, "learning_rate": 9.81151560284592e-05, "loss": 0.602, "step": 5555 }, { "epoch": 1.7839139508749398, "grad_norm": 0.807303786277771, "learning_rate": 9.808025576003104e-05, "loss": 0.5087, "step": 5556 }, { "epoch": 1.7842350296997913, "grad_norm": 0.872272253036499, "learning_rate": 9.804535572551789e-05, "loss": 0.5801, "step": 5557 }, { "epoch": 1.7845561085246429, "grad_norm": 0.9321494102478027, "learning_rate": 9.801045592917226e-05, "loss": 0.4829, "step": 5558 }, { "epoch": 1.7848771873494944, "grad_norm": 1.4904452562332153, "learning_rate": 9.797555637524649e-05, "loss": 0.5219, "step": 5559 }, { "epoch": 1.785198266174346, "grad_norm": 1.060113787651062, "learning_rate": 9.794065706799306e-05, "loss": 0.641, "step": 5560 }, { "epoch": 1.7855193449991973, "grad_norm": 0.7635378241539001, "learning_rate": 9.790575801166432e-05, "loss": 0.4252, "step": 5561 }, { "epoch": 1.7858404238240488, "grad_norm": 0.670326292514801, "learning_rate": 9.78708592105126e-05, "loss": 0.3945, "step": 5562 }, { "epoch": 1.7861615026489002, "grad_norm": 0.8592783212661743, "learning_rate": 9.783596066879022e-05, "loss": 0.3788, "step": 5563 }, { "epoch": 1.7864825814737517, "grad_norm": 0.7472242712974548, "learning_rate": 9.78010623907495e-05, "loss": 0.353, "step": 5564 }, { "epoch": 1.7868036602986033, "grad_norm": 0.5249154567718506, "learning_rate": 9.776616438064264e-05, "loss": 0.6485, "step": 5565 }, { "epoch": 1.7871247391234548, "grad_norm": 0.6192828416824341, "learning_rate": 9.773126664272186e-05, "loss": 0.9866, "step": 5566 }, { "epoch": 1.7874458179483064, "grad_norm": 0.5821790099143982, "learning_rate": 9.76963691812394e-05, "loss": 0.3801, "step": 5567 }, { "epoch": 1.787766896773158, "grad_norm": 0.7041050791740417, "learning_rate": 9.766147200044732e-05, "loss": 0.2845, "step": 5568 }, { "epoch": 1.7880879755980095, "grad_norm": 0.7531072497367859, "learning_rate": 9.762657510459783e-05, "loss": 0.2994, "step": 5569 }, { "epoch": 1.7884090544228608, "grad_norm": 0.6308495998382568, "learning_rate": 9.759167849794292e-05, "loss": 0.1686, "step": 5570 }, { "epoch": 1.7887301332477124, "grad_norm": 0.5816466808319092, "learning_rate": 9.755678218473469e-05, "loss": 0.2938, "step": 5571 }, { "epoch": 1.7890512120725637, "grad_norm": 0.8111523389816284, "learning_rate": 9.752188616922518e-05, "loss": 0.7485, "step": 5572 }, { "epoch": 1.7893722908974152, "grad_norm": 0.9488875865936279, "learning_rate": 9.748699045566626e-05, "loss": 0.6345, "step": 5573 }, { "epoch": 1.7896933697222668, "grad_norm": 0.9654393196105957, "learning_rate": 9.745209504830996e-05, "loss": 0.669, "step": 5574 }, { "epoch": 1.7900144485471183, "grad_norm": 0.9864271879196167, "learning_rate": 9.741719995140814e-05, "loss": 0.6693, "step": 5575 }, { "epoch": 1.7903355273719699, "grad_norm": 0.7702199220657349, "learning_rate": 9.73823051692127e-05, "loss": 0.6806, "step": 5576 }, { "epoch": 1.7906566061968214, "grad_norm": 0.7644592523574829, "learning_rate": 9.734741070597539e-05, "loss": 0.6401, "step": 5577 }, { "epoch": 1.790977685021673, "grad_norm": 0.8069849610328674, "learning_rate": 9.73125165659481e-05, "loss": 0.5884, "step": 5578 }, { "epoch": 1.7912987638465243, "grad_norm": 1.0152876377105713, "learning_rate": 9.727762275338246e-05, "loss": 0.5491, "step": 5579 }, { "epoch": 1.7916198426713759, "grad_norm": 0.7692520618438721, "learning_rate": 9.724272927253025e-05, "loss": 0.5617, "step": 5580 }, { "epoch": 1.7919409214962272, "grad_norm": 0.7524055242538452, "learning_rate": 9.720783612764314e-05, "loss": 0.535, "step": 5581 }, { "epoch": 1.7922620003210787, "grad_norm": 0.7909640669822693, "learning_rate": 9.717294332297268e-05, "loss": 0.4679, "step": 5582 }, { "epoch": 1.7925830791459303, "grad_norm": 1.540922999382019, "learning_rate": 9.713805086277054e-05, "loss": 0.6385, "step": 5583 }, { "epoch": 1.7929041579707818, "grad_norm": 1.7500687837600708, "learning_rate": 9.710315875128819e-05, "loss": 0.6036, "step": 5584 }, { "epoch": 1.7932252367956334, "grad_norm": 0.8995754718780518, "learning_rate": 9.706826699277718e-05, "loss": 0.7047, "step": 5585 }, { "epoch": 1.793546315620485, "grad_norm": 0.9600510001182556, "learning_rate": 9.703337559148892e-05, "loss": 0.4654, "step": 5586 }, { "epoch": 1.7938673944453365, "grad_norm": 3.3275082111358643, "learning_rate": 9.699848455167489e-05, "loss": 0.7396, "step": 5587 }, { "epoch": 1.7941884732701878, "grad_norm": 1.0256354808807373, "learning_rate": 9.696359387758636e-05, "loss": 0.7023, "step": 5588 }, { "epoch": 1.7945095520950394, "grad_norm": 0.8502017855644226, "learning_rate": 9.692870357347473e-05, "loss": 0.4102, "step": 5589 }, { "epoch": 1.7948306309198907, "grad_norm": 0.9669037461280823, "learning_rate": 9.689381364359129e-05, "loss": 0.668, "step": 5590 }, { "epoch": 1.7951517097447423, "grad_norm": 1.028659462928772, "learning_rate": 9.685892409218717e-05, "loss": 0.493, "step": 5591 }, { "epoch": 1.7954727885695938, "grad_norm": 0.9129486680030823, "learning_rate": 9.682403492351369e-05, "loss": 0.6085, "step": 5592 }, { "epoch": 1.7957938673944454, "grad_norm": 1.1581437587738037, "learning_rate": 9.678914614182185e-05, "loss": 0.6057, "step": 5593 }, { "epoch": 1.796114946219297, "grad_norm": 1.030382513999939, "learning_rate": 9.675425775136286e-05, "loss": 0.6529, "step": 5594 }, { "epoch": 1.7964360250441485, "grad_norm": 0.7557877898216248, "learning_rate": 9.671936975638768e-05, "loss": 0.417, "step": 5595 }, { "epoch": 1.796757103869, "grad_norm": 0.8263580799102783, "learning_rate": 9.668448216114739e-05, "loss": 0.4406, "step": 5596 }, { "epoch": 1.7970781826938513, "grad_norm": 1.1736435890197754, "learning_rate": 9.664959496989285e-05, "loss": 0.7772, "step": 5597 }, { "epoch": 1.7973992615187029, "grad_norm": 0.8506044149398804, "learning_rate": 9.661470818687503e-05, "loss": 0.541, "step": 5598 }, { "epoch": 1.7977203403435542, "grad_norm": 1.1008727550506592, "learning_rate": 9.657982181634475e-05, "loss": 0.6642, "step": 5599 }, { "epoch": 1.7980414191684058, "grad_norm": 1.1743552684783936, "learning_rate": 9.654493586255278e-05, "loss": 0.6096, "step": 5600 }, { "epoch": 1.7983624979932573, "grad_norm": 0.5854097604751587, "learning_rate": 9.651005032974994e-05, "loss": 0.3086, "step": 5601 }, { "epoch": 1.7986835768181089, "grad_norm": 1.1236282587051392, "learning_rate": 9.647516522218683e-05, "loss": 0.3826, "step": 5602 }, { "epoch": 1.7990046556429604, "grad_norm": 1.1320160627365112, "learning_rate": 9.644028054411416e-05, "loss": 0.6647, "step": 5603 }, { "epoch": 1.799325734467812, "grad_norm": 0.9424121975898743, "learning_rate": 9.64053962997825e-05, "loss": 0.4697, "step": 5604 }, { "epoch": 1.7996468132926633, "grad_norm": 0.765177845954895, "learning_rate": 9.637051249344243e-05, "loss": 0.401, "step": 5605 }, { "epoch": 1.7999678921175148, "grad_norm": 0.8307339549064636, "learning_rate": 9.633562912934436e-05, "loss": 0.3938, "step": 5606 }, { "epoch": 1.8002889709423664, "grad_norm": 1.0224002599716187, "learning_rate": 9.630074621173883e-05, "loss": 0.6176, "step": 5607 }, { "epoch": 1.8006100497672177, "grad_norm": 1.1550755500793457, "learning_rate": 9.62658637448761e-05, "loss": 0.4616, "step": 5608 }, { "epoch": 1.8009311285920693, "grad_norm": 0.8593536615371704, "learning_rate": 9.623098173300654e-05, "loss": 0.443, "step": 5609 }, { "epoch": 1.8012522074169208, "grad_norm": 1.4475479125976562, "learning_rate": 9.619610018038048e-05, "loss": 0.6054, "step": 5610 }, { "epoch": 1.8015732862417724, "grad_norm": 1.1326322555541992, "learning_rate": 9.616121909124801e-05, "loss": 0.3655, "step": 5611 }, { "epoch": 1.801894365066624, "grad_norm": 0.7792235016822815, "learning_rate": 9.612633846985941e-05, "loss": 0.4951, "step": 5612 }, { "epoch": 1.8022154438914755, "grad_norm": 0.576023280620575, "learning_rate": 9.609145832046465e-05, "loss": 0.3493, "step": 5613 }, { "epoch": 1.8025365227163268, "grad_norm": 0.7932665944099426, "learning_rate": 9.605657864731388e-05, "loss": 0.3994, "step": 5614 }, { "epoch": 1.8028576015411784, "grad_norm": 0.6708626747131348, "learning_rate": 9.602169945465702e-05, "loss": 0.5825, "step": 5615 }, { "epoch": 1.80317868036603, "grad_norm": 0.534807026386261, "learning_rate": 9.598682074674405e-05, "loss": 0.8931, "step": 5616 }, { "epoch": 1.8034997591908812, "grad_norm": 0.5789874196052551, "learning_rate": 9.595194252782477e-05, "loss": 0.3229, "step": 5617 }, { "epoch": 1.8038208380157328, "grad_norm": 0.7923842668533325, "learning_rate": 9.591706480214901e-05, "loss": 0.4642, "step": 5618 }, { "epoch": 1.8041419168405843, "grad_norm": 0.6035703420639038, "learning_rate": 9.588218757396655e-05, "loss": 0.36, "step": 5619 }, { "epoch": 1.8044629956654359, "grad_norm": 0.6047983765602112, "learning_rate": 9.584731084752699e-05, "loss": 0.2533, "step": 5620 }, { "epoch": 1.8047840744902874, "grad_norm": 0.8523005843162537, "learning_rate": 9.581243462708006e-05, "loss": 0.2647, "step": 5621 }, { "epoch": 1.805105153315139, "grad_norm": 0.5535145998001099, "learning_rate": 9.577755891687523e-05, "loss": 0.137, "step": 5622 }, { "epoch": 1.8054262321399903, "grad_norm": 0.7448154091835022, "learning_rate": 9.574268372116205e-05, "loss": 0.4489, "step": 5623 }, { "epoch": 1.8057473109648419, "grad_norm": 1.0387903451919556, "learning_rate": 9.570780904418993e-05, "loss": 0.7846, "step": 5624 }, { "epoch": 1.8060683897896934, "grad_norm": 0.9373582005500793, "learning_rate": 9.567293489020831e-05, "loss": 0.7087, "step": 5625 }, { "epoch": 1.8063894686145447, "grad_norm": 0.9567878842353821, "learning_rate": 9.563806126346642e-05, "loss": 0.5429, "step": 5626 }, { "epoch": 1.8067105474393963, "grad_norm": 1.1631771326065063, "learning_rate": 9.560318816821353e-05, "loss": 0.778, "step": 5627 }, { "epoch": 1.8070316262642478, "grad_norm": 0.7982397675514221, "learning_rate": 9.556831560869882e-05, "loss": 0.6363, "step": 5628 }, { "epoch": 1.8073527050890994, "grad_norm": 0.7375602126121521, "learning_rate": 9.55334435891714e-05, "loss": 0.5203, "step": 5629 }, { "epoch": 1.807673783913951, "grad_norm": 0.8686415553092957, "learning_rate": 9.549857211388037e-05, "loss": 0.6616, "step": 5630 }, { "epoch": 1.8079948627388025, "grad_norm": 0.9008349776268005, "learning_rate": 9.546370118707463e-05, "loss": 0.7094, "step": 5631 }, { "epoch": 1.8083159415636538, "grad_norm": 0.7942934036254883, "learning_rate": 9.542883081300316e-05, "loss": 0.5465, "step": 5632 }, { "epoch": 1.8086370203885054, "grad_norm": 0.939593493938446, "learning_rate": 9.539396099591476e-05, "loss": 0.6448, "step": 5633 }, { "epoch": 1.808958099213357, "grad_norm": 0.8011131286621094, "learning_rate": 9.53590917400583e-05, "loss": 0.5164, "step": 5634 }, { "epoch": 1.8092791780382083, "grad_norm": 0.973583459854126, "learning_rate": 9.532422304968243e-05, "loss": 0.6263, "step": 5635 }, { "epoch": 1.8096002568630598, "grad_norm": 0.788236677646637, "learning_rate": 9.528935492903575e-05, "loss": 0.4855, "step": 5636 }, { "epoch": 1.8099213356879114, "grad_norm": 0.9553484916687012, "learning_rate": 9.525448738236691e-05, "loss": 0.7004, "step": 5637 }, { "epoch": 1.810242414512763, "grad_norm": 0.9981603622436523, "learning_rate": 9.521962041392436e-05, "loss": 0.64, "step": 5638 }, { "epoch": 1.8105634933376145, "grad_norm": 0.7514563798904419, "learning_rate": 9.518475402795661e-05, "loss": 0.5227, "step": 5639 }, { "epoch": 1.810884572162466, "grad_norm": 0.8019845485687256, "learning_rate": 9.514988822871193e-05, "loss": 0.4965, "step": 5640 }, { "epoch": 1.8112056509873173, "grad_norm": 0.9751089811325073, "learning_rate": 9.511502302043868e-05, "loss": 0.4515, "step": 5641 }, { "epoch": 1.811526729812169, "grad_norm": 1.0139288902282715, "learning_rate": 9.508015840738503e-05, "loss": 0.6349, "step": 5642 }, { "epoch": 1.8118478086370204, "grad_norm": 0.7157825231552124, "learning_rate": 9.504529439379921e-05, "loss": 0.4664, "step": 5643 }, { "epoch": 1.8121688874618718, "grad_norm": 0.942258358001709, "learning_rate": 9.501043098392924e-05, "loss": 0.5903, "step": 5644 }, { "epoch": 1.8124899662867233, "grad_norm": 0.9379916787147522, "learning_rate": 9.497556818202306e-05, "loss": 0.432, "step": 5645 }, { "epoch": 1.8128110451115749, "grad_norm": 0.9485198259353638, "learning_rate": 9.494070599232868e-05, "loss": 0.5559, "step": 5646 }, { "epoch": 1.8131321239364264, "grad_norm": 1.1162331104278564, "learning_rate": 9.490584441909392e-05, "loss": 0.613, "step": 5647 }, { "epoch": 1.813453202761278, "grad_norm": 1.1678434610366821, "learning_rate": 9.48709834665666e-05, "loss": 0.5808, "step": 5648 }, { "epoch": 1.8137742815861295, "grad_norm": 0.8702452182769775, "learning_rate": 9.483612313899435e-05, "loss": 0.583, "step": 5649 }, { "epoch": 1.8140953604109809, "grad_norm": 1.071449637413025, "learning_rate": 9.480126344062487e-05, "loss": 0.5968, "step": 5650 }, { "epoch": 1.8144164392358324, "grad_norm": 1.1288082599639893, "learning_rate": 9.476640437570562e-05, "loss": 0.5764, "step": 5651 }, { "epoch": 1.814737518060684, "grad_norm": 1.0096368789672852, "learning_rate": 9.473154594848415e-05, "loss": 0.5777, "step": 5652 }, { "epoch": 1.8150585968855353, "grad_norm": 0.9093414545059204, "learning_rate": 9.469668816320784e-05, "loss": 0.5467, "step": 5653 }, { "epoch": 1.8153796757103868, "grad_norm": 0.9198897480964661, "learning_rate": 9.466183102412395e-05, "loss": 0.5423, "step": 5654 }, { "epoch": 1.8157007545352384, "grad_norm": 0.9140397310256958, "learning_rate": 9.462697453547979e-05, "loss": 0.5262, "step": 5655 }, { "epoch": 1.81602183336009, "grad_norm": 1.1737873554229736, "learning_rate": 9.459211870152245e-05, "loss": 0.572, "step": 5656 }, { "epoch": 1.8163429121849415, "grad_norm": 0.6453090906143188, "learning_rate": 9.455726352649911e-05, "loss": 0.3562, "step": 5657 }, { "epoch": 1.816663991009793, "grad_norm": 0.9809330105781555, "learning_rate": 9.452240901465663e-05, "loss": 0.4382, "step": 5658 }, { "epoch": 1.8169850698346444, "grad_norm": 1.092984914779663, "learning_rate": 9.448755517024206e-05, "loss": 0.5466, "step": 5659 }, { "epoch": 1.817306148659496, "grad_norm": 0.7490816712379456, "learning_rate": 9.445270199750212e-05, "loss": 0.4304, "step": 5660 }, { "epoch": 1.8176272274843472, "grad_norm": 0.6709809303283691, "learning_rate": 9.441784950068362e-05, "loss": 0.3968, "step": 5661 }, { "epoch": 1.8179483063091988, "grad_norm": 0.8209671378135681, "learning_rate": 9.438299768403327e-05, "loss": 0.4619, "step": 5662 }, { "epoch": 1.8182693851340503, "grad_norm": 0.5862700939178467, "learning_rate": 9.434814655179755e-05, "loss": 0.3755, "step": 5663 }, { "epoch": 1.818590463958902, "grad_norm": 0.5392091870307922, "learning_rate": 9.43132961082231e-05, "loss": 0.3564, "step": 5664 }, { "epoch": 1.8189115427837534, "grad_norm": 0.6820465326309204, "learning_rate": 9.427844635755619e-05, "loss": 0.6728, "step": 5665 }, { "epoch": 1.819232621608605, "grad_norm": 0.4257480800151825, "learning_rate": 9.424359730404329e-05, "loss": 0.4742, "step": 5666 }, { "epoch": 1.8195537004334565, "grad_norm": 0.7783001065254211, "learning_rate": 9.420874895193056e-05, "loss": 0.3928, "step": 5667 }, { "epoch": 1.8198747792583079, "grad_norm": 0.7277984619140625, "learning_rate": 9.417390130546426e-05, "loss": 0.4529, "step": 5668 }, { "epoch": 1.8201958580831594, "grad_norm": 0.4899481236934662, "learning_rate": 9.413905436889035e-05, "loss": 0.2177, "step": 5669 }, { "epoch": 1.8205169369080108, "grad_norm": 0.7720094323158264, "learning_rate": 9.410420814645493e-05, "loss": 0.2418, "step": 5670 }, { "epoch": 1.8208380157328623, "grad_norm": 0.7921152114868164, "learning_rate": 9.406936264240386e-05, "loss": 0.3818, "step": 5671 }, { "epoch": 1.8211590945577139, "grad_norm": 0.8348581194877625, "learning_rate": 9.403451786098294e-05, "loss": 0.5024, "step": 5672 }, { "epoch": 1.8214801733825654, "grad_norm": 0.9092313051223755, "learning_rate": 9.399967380643796e-05, "loss": 0.6206, "step": 5673 }, { "epoch": 1.821801252207417, "grad_norm": 0.9595973491668701, "learning_rate": 9.396483048301448e-05, "loss": 0.5598, "step": 5674 }, { "epoch": 1.8221223310322685, "grad_norm": 0.7221679091453552, "learning_rate": 9.392998789495811e-05, "loss": 0.463, "step": 5675 }, { "epoch": 1.82244340985712, "grad_norm": 0.6640863418579102, "learning_rate": 9.38951460465143e-05, "loss": 0.4028, "step": 5676 }, { "epoch": 1.8227644886819714, "grad_norm": 0.8685281872749329, "learning_rate": 9.386030494192846e-05, "loss": 0.4908, "step": 5677 }, { "epoch": 1.823085567506823, "grad_norm": 0.8399313688278198, "learning_rate": 9.382546458544582e-05, "loss": 0.6339, "step": 5678 }, { "epoch": 1.8234066463316743, "grad_norm": 0.8233359456062317, "learning_rate": 9.37906249813116e-05, "loss": 0.5707, "step": 5679 }, { "epoch": 1.8237277251565258, "grad_norm": 0.9246627688407898, "learning_rate": 9.375578613377089e-05, "loss": 0.5633, "step": 5680 }, { "epoch": 1.8240488039813774, "grad_norm": 0.9575353860855103, "learning_rate": 9.372094804706867e-05, "loss": 0.622, "step": 5681 }, { "epoch": 1.824369882806229, "grad_norm": 0.6907000541687012, "learning_rate": 9.368611072544992e-05, "loss": 0.4413, "step": 5682 }, { "epoch": 1.8246909616310805, "grad_norm": 0.8728267550468445, "learning_rate": 9.36512741731594e-05, "loss": 0.4828, "step": 5683 }, { "epoch": 1.825012040455932, "grad_norm": 0.8942257165908813, "learning_rate": 9.361643839444188e-05, "loss": 0.6054, "step": 5684 }, { "epoch": 1.8253331192807836, "grad_norm": 1.4165688753128052, "learning_rate": 9.358160339354194e-05, "loss": 0.6781, "step": 5685 }, { "epoch": 1.825654198105635, "grad_norm": 1.0946552753448486, "learning_rate": 9.354676917470422e-05, "loss": 0.6444, "step": 5686 }, { "epoch": 1.8259752769304864, "grad_norm": 0.9636126756668091, "learning_rate": 9.351193574217306e-05, "loss": 0.5896, "step": 5687 }, { "epoch": 1.8262963557553378, "grad_norm": 0.9439002871513367, "learning_rate": 9.347710310019288e-05, "loss": 0.5716, "step": 5688 }, { "epoch": 1.8266174345801893, "grad_norm": 1.3570852279663086, "learning_rate": 9.344227125300788e-05, "loss": 0.5469, "step": 5689 }, { "epoch": 1.8269385134050409, "grad_norm": 1.0195239782333374, "learning_rate": 9.340744020486222e-05, "loss": 0.6181, "step": 5690 }, { "epoch": 1.8272595922298924, "grad_norm": 0.9529807567596436, "learning_rate": 9.337260996000002e-05, "loss": 0.5538, "step": 5691 }, { "epoch": 1.827580671054744, "grad_norm": 1.627726674079895, "learning_rate": 9.333778052266513e-05, "loss": 0.6368, "step": 5692 }, { "epoch": 1.8279017498795955, "grad_norm": 1.326042652130127, "learning_rate": 9.330295189710152e-05, "loss": 0.6902, "step": 5693 }, { "epoch": 1.828222828704447, "grad_norm": 0.8660145998001099, "learning_rate": 9.32681240875529e-05, "loss": 0.5452, "step": 5694 }, { "epoch": 1.8285439075292984, "grad_norm": 0.9196065664291382, "learning_rate": 9.323329709826294e-05, "loss": 0.5653, "step": 5695 }, { "epoch": 1.82886498635415, "grad_norm": 0.7746536135673523, "learning_rate": 9.319847093347522e-05, "loss": 0.4199, "step": 5696 }, { "epoch": 1.8291860651790013, "grad_norm": 0.7608351707458496, "learning_rate": 9.316364559743314e-05, "loss": 0.4165, "step": 5697 }, { "epoch": 1.8295071440038528, "grad_norm": 0.7243719696998596, "learning_rate": 9.312882109438013e-05, "loss": 0.4451, "step": 5698 }, { "epoch": 1.8298282228287044, "grad_norm": 1.2507250308990479, "learning_rate": 9.309399742855942e-05, "loss": 0.7069, "step": 5699 }, { "epoch": 1.830149301653556, "grad_norm": 0.8393792510032654, "learning_rate": 9.30591746042142e-05, "loss": 0.4762, "step": 5700 }, { "epoch": 1.8304703804784075, "grad_norm": 0.8377349376678467, "learning_rate": 9.302435262558747e-05, "loss": 0.4423, "step": 5701 }, { "epoch": 1.830791459303259, "grad_norm": 0.9344905614852905, "learning_rate": 9.298953149692225e-05, "loss": 0.5336, "step": 5702 }, { "epoch": 1.8311125381281106, "grad_norm": 0.8581468462944031, "learning_rate": 9.295471122246131e-05, "loss": 0.5264, "step": 5703 }, { "epoch": 1.831433616952962, "grad_norm": 0.9583359956741333, "learning_rate": 9.291989180644747e-05, "loss": 0.5861, "step": 5704 }, { "epoch": 1.8317546957778135, "grad_norm": 1.2595868110656738, "learning_rate": 9.288507325312335e-05, "loss": 0.5907, "step": 5705 }, { "epoch": 1.8320757746026648, "grad_norm": 0.8175548315048218, "learning_rate": 9.285025556673141e-05, "loss": 0.5518, "step": 5706 }, { "epoch": 1.8323968534275163, "grad_norm": 1.2646697759628296, "learning_rate": 9.281543875151419e-05, "loss": 0.5245, "step": 5707 }, { "epoch": 1.832717932252368, "grad_norm": 0.8525310754776001, "learning_rate": 9.278062281171393e-05, "loss": 0.5834, "step": 5708 }, { "epoch": 1.8330390110772194, "grad_norm": 1.1546502113342285, "learning_rate": 9.274580775157294e-05, "loss": 0.6688, "step": 5709 }, { "epoch": 1.833360089902071, "grad_norm": 0.5779723525047302, "learning_rate": 9.271099357533321e-05, "loss": 0.3218, "step": 5710 }, { "epoch": 1.8336811687269226, "grad_norm": 0.8706488013267517, "learning_rate": 9.267618028723686e-05, "loss": 0.4382, "step": 5711 }, { "epoch": 1.834002247551774, "grad_norm": 0.9869682788848877, "learning_rate": 9.264136789152567e-05, "loss": 0.3801, "step": 5712 }, { "epoch": 1.8343233263766254, "grad_norm": 0.890150785446167, "learning_rate": 9.26065563924415e-05, "loss": 0.5917, "step": 5713 }, { "epoch": 1.834644405201477, "grad_norm": 0.7428485155105591, "learning_rate": 9.257174579422605e-05, "loss": 0.4025, "step": 5714 }, { "epoch": 1.8349654840263283, "grad_norm": 0.6964389085769653, "learning_rate": 9.253693610112078e-05, "loss": 0.5247, "step": 5715 }, { "epoch": 1.8352865628511799, "grad_norm": 0.4811036288738251, "learning_rate": 9.250212731736726e-05, "loss": 0.7248, "step": 5716 }, { "epoch": 1.8356076416760314, "grad_norm": 0.4891749322414398, "learning_rate": 9.246731944720675e-05, "loss": 0.5256, "step": 5717 }, { "epoch": 1.835928720500883, "grad_norm": 0.6794456839561462, "learning_rate": 9.243251249488052e-05, "loss": 0.3973, "step": 5718 }, { "epoch": 1.8362497993257345, "grad_norm": 0.5766389966011047, "learning_rate": 9.239770646462968e-05, "loss": 0.2345, "step": 5719 }, { "epoch": 1.836570878150586, "grad_norm": 0.8062403202056885, "learning_rate": 9.236290136069528e-05, "loss": 0.5047, "step": 5720 }, { "epoch": 1.8368919569754376, "grad_norm": 0.6303856372833252, "learning_rate": 9.232809718731814e-05, "loss": 0.2518, "step": 5721 }, { "epoch": 1.837213035800289, "grad_norm": 1.0692548751831055, "learning_rate": 9.229329394873911e-05, "loss": 0.8745, "step": 5722 }, { "epoch": 1.8375341146251405, "grad_norm": 0.8411319255828857, "learning_rate": 9.225849164919885e-05, "loss": 0.5388, "step": 5723 }, { "epoch": 1.8378551934499918, "grad_norm": 0.8031153678894043, "learning_rate": 9.222369029293787e-05, "loss": 0.6806, "step": 5724 }, { "epoch": 1.8381762722748434, "grad_norm": 0.6173913478851318, "learning_rate": 9.218888988419668e-05, "loss": 0.4643, "step": 5725 }, { "epoch": 1.838497351099695, "grad_norm": 0.7925007939338684, "learning_rate": 9.215409042721552e-05, "loss": 0.5676, "step": 5726 }, { "epoch": 1.8388184299245465, "grad_norm": 0.9955210089683533, "learning_rate": 9.211929192623467e-05, "loss": 0.7278, "step": 5727 }, { "epoch": 1.839139508749398, "grad_norm": 0.7641330361366272, "learning_rate": 9.208449438549415e-05, "loss": 0.4462, "step": 5728 }, { "epoch": 1.8394605875742496, "grad_norm": 0.7429258227348328, "learning_rate": 9.204969780923403e-05, "loss": 0.6244, "step": 5729 }, { "epoch": 1.8397816663991011, "grad_norm": 0.8412753939628601, "learning_rate": 9.201490220169408e-05, "loss": 0.7808, "step": 5730 }, { "epoch": 1.8401027452239525, "grad_norm": 0.8708707094192505, "learning_rate": 9.198010756711412e-05, "loss": 0.6767, "step": 5731 }, { "epoch": 1.840423824048804, "grad_norm": 0.7798470258712769, "learning_rate": 9.194531390973371e-05, "loss": 0.5248, "step": 5732 }, { "epoch": 1.8407449028736553, "grad_norm": 0.8160192966461182, "learning_rate": 9.191052123379234e-05, "loss": 0.5404, "step": 5733 }, { "epoch": 1.8410659816985069, "grad_norm": 1.1252083778381348, "learning_rate": 9.187572954352947e-05, "loss": 0.7795, "step": 5734 }, { "epoch": 1.8413870605233584, "grad_norm": 0.8956120610237122, "learning_rate": 9.184093884318425e-05, "loss": 0.6233, "step": 5735 }, { "epoch": 1.84170813934821, "grad_norm": 0.9571072459220886, "learning_rate": 9.180614913699592e-05, "loss": 0.6217, "step": 5736 }, { "epoch": 1.8420292181730615, "grad_norm": 0.7272530794143677, "learning_rate": 9.177136042920344e-05, "loss": 0.4461, "step": 5737 }, { "epoch": 1.842350296997913, "grad_norm": 1.0026044845581055, "learning_rate": 9.173657272404576e-05, "loss": 0.7131, "step": 5738 }, { "epoch": 1.8426713758227646, "grad_norm": 0.8981397747993469, "learning_rate": 9.17017860257616e-05, "loss": 0.5629, "step": 5739 }, { "epoch": 1.842992454647616, "grad_norm": 1.2423765659332275, "learning_rate": 9.166700033858969e-05, "loss": 0.7977, "step": 5740 }, { "epoch": 1.8433135334724675, "grad_norm": 0.7401012778282166, "learning_rate": 9.163221566676847e-05, "loss": 0.4719, "step": 5741 }, { "epoch": 1.8436346122973188, "grad_norm": 0.9083794951438904, "learning_rate": 9.159743201453638e-05, "loss": 0.5664, "step": 5742 }, { "epoch": 1.8439556911221704, "grad_norm": 0.7999395728111267, "learning_rate": 9.156264938613174e-05, "loss": 0.5195, "step": 5743 }, { "epoch": 1.844276769947022, "grad_norm": 0.8803184628486633, "learning_rate": 9.152786778579267e-05, "loss": 0.4888, "step": 5744 }, { "epoch": 1.8445978487718735, "grad_norm": 0.923516571521759, "learning_rate": 9.14930872177572e-05, "loss": 0.4521, "step": 5745 }, { "epoch": 1.844918927596725, "grad_norm": 0.7799448370933533, "learning_rate": 9.145830768626327e-05, "loss": 0.5848, "step": 5746 }, { "epoch": 1.8452400064215766, "grad_norm": 0.9645082354545593, "learning_rate": 9.142352919554862e-05, "loss": 0.5881, "step": 5747 }, { "epoch": 1.8455610852464281, "grad_norm": 0.8418881297111511, "learning_rate": 9.138875174985091e-05, "loss": 0.4551, "step": 5748 }, { "epoch": 1.8458821640712795, "grad_norm": 0.8868610858917236, "learning_rate": 9.135397535340773e-05, "loss": 0.3873, "step": 5749 }, { "epoch": 1.846203242896131, "grad_norm": 1.052437424659729, "learning_rate": 9.131920001045638e-05, "loss": 0.5356, "step": 5750 }, { "epoch": 1.8465243217209824, "grad_norm": 0.9833017587661743, "learning_rate": 9.128442572523417e-05, "loss": 0.5061, "step": 5751 }, { "epoch": 1.846845400545834, "grad_norm": 1.3260668516159058, "learning_rate": 9.12496525019783e-05, "loss": 0.5013, "step": 5752 }, { "epoch": 1.8471664793706855, "grad_norm": 1.056301474571228, "learning_rate": 9.121488034492569e-05, "loss": 0.6949, "step": 5753 }, { "epoch": 1.847487558195537, "grad_norm": 0.8964768648147583, "learning_rate": 9.11801092583133e-05, "loss": 0.5426, "step": 5754 }, { "epoch": 1.8478086370203886, "grad_norm": 0.8216196894645691, "learning_rate": 9.114533924637778e-05, "loss": 0.4087, "step": 5755 }, { "epoch": 1.84812971584524, "grad_norm": 0.7172057032585144, "learning_rate": 9.111057031335585e-05, "loss": 0.417, "step": 5756 }, { "epoch": 1.8484507946700917, "grad_norm": 1.1012330055236816, "learning_rate": 9.107580246348395e-05, "loss": 0.3882, "step": 5757 }, { "epoch": 1.848771873494943, "grad_norm": 0.856521725654602, "learning_rate": 9.104103570099848e-05, "loss": 0.4772, "step": 5758 }, { "epoch": 1.8490929523197945, "grad_norm": 1.0229532718658447, "learning_rate": 9.100627003013562e-05, "loss": 0.4683, "step": 5759 }, { "epoch": 1.8494140311446459, "grad_norm": 1.44857919216156, "learning_rate": 9.097150545513145e-05, "loss": 0.7295, "step": 5760 }, { "epoch": 1.8497351099694974, "grad_norm": 0.8216368556022644, "learning_rate": 9.093674198022201e-05, "loss": 0.4156, "step": 5761 }, { "epoch": 1.850056188794349, "grad_norm": 0.5136415362358093, "learning_rate": 9.090197960964301e-05, "loss": 0.3143, "step": 5762 }, { "epoch": 1.8503772676192005, "grad_norm": 0.7286933660507202, "learning_rate": 9.086721834763024e-05, "loss": 0.3688, "step": 5763 }, { "epoch": 1.850698346444052, "grad_norm": 0.9348329305648804, "learning_rate": 9.083245819841918e-05, "loss": 0.5614, "step": 5764 }, { "epoch": 1.8510194252689036, "grad_norm": 1.0801033973693848, "learning_rate": 9.07976991662453e-05, "loss": 0.7294, "step": 5765 }, { "epoch": 1.8513405040937552, "grad_norm": 0.467505544424057, "learning_rate": 9.076294125534383e-05, "loss": 0.9349, "step": 5766 }, { "epoch": 1.8516615829186065, "grad_norm": 0.6000930070877075, "learning_rate": 9.072818446994999e-05, "loss": 0.5034, "step": 5767 }, { "epoch": 1.851982661743458, "grad_norm": 0.7102051973342896, "learning_rate": 9.069342881429876e-05, "loss": 0.2786, "step": 5768 }, { "epoch": 1.8523037405683094, "grad_norm": 0.6671398878097534, "learning_rate": 9.065867429262496e-05, "loss": 0.1777, "step": 5769 }, { "epoch": 1.852624819393161, "grad_norm": 0.6379199624061584, "learning_rate": 9.062392090916337e-05, "loss": 0.3455, "step": 5770 }, { "epoch": 1.8529458982180125, "grad_norm": 0.7193529009819031, "learning_rate": 9.058916866814858e-05, "loss": 0.2457, "step": 5771 }, { "epoch": 1.853266977042864, "grad_norm": 1.0792187452316284, "learning_rate": 9.055441757381506e-05, "loss": 0.7677, "step": 5772 }, { "epoch": 1.8535880558677156, "grad_norm": 0.8163480758666992, "learning_rate": 9.051966763039707e-05, "loss": 0.5803, "step": 5773 }, { "epoch": 1.8539091346925671, "grad_norm": 0.7993916869163513, "learning_rate": 9.048491884212884e-05, "loss": 0.5471, "step": 5774 }, { "epoch": 1.8542302135174187, "grad_norm": 0.9378834962844849, "learning_rate": 9.045017121324438e-05, "loss": 0.5756, "step": 5775 }, { "epoch": 1.85455129234227, "grad_norm": 0.7326672077178955, "learning_rate": 9.04154247479776e-05, "loss": 0.3347, "step": 5776 }, { "epoch": 1.8548723711671216, "grad_norm": 0.8225162625312805, "learning_rate": 9.038067945056227e-05, "loss": 0.6454, "step": 5777 }, { "epoch": 1.8551934499919729, "grad_norm": 1.0159610509872437, "learning_rate": 9.034593532523193e-05, "loss": 0.5996, "step": 5778 }, { "epoch": 1.8555145288168244, "grad_norm": 0.8151166439056396, "learning_rate": 9.03111923762201e-05, "loss": 0.6339, "step": 5779 }, { "epoch": 1.855835607641676, "grad_norm": 0.956063449382782, "learning_rate": 9.027645060776006e-05, "loss": 0.5437, "step": 5780 }, { "epoch": 1.8561566864665275, "grad_norm": 0.8743849396705627, "learning_rate": 9.024171002408506e-05, "loss": 0.5587, "step": 5781 }, { "epoch": 1.856477765291379, "grad_norm": 0.8803306818008423, "learning_rate": 9.020697062942807e-05, "loss": 0.5749, "step": 5782 }, { "epoch": 1.8567988441162306, "grad_norm": 1.1139497756958008, "learning_rate": 9.017223242802204e-05, "loss": 0.5244, "step": 5783 }, { "epoch": 1.8571199229410822, "grad_norm": 0.8290317058563232, "learning_rate": 9.013749542409963e-05, "loss": 0.5412, "step": 5784 }, { "epoch": 1.8574410017659335, "grad_norm": 0.9109243154525757, "learning_rate": 9.01027596218935e-05, "loss": 0.7158, "step": 5785 }, { "epoch": 1.857762080590785, "grad_norm": 0.8047156929969788, "learning_rate": 9.006802502563612e-05, "loss": 0.4727, "step": 5786 }, { "epoch": 1.8580831594156364, "grad_norm": 0.9587835073471069, "learning_rate": 9.003329163955972e-05, "loss": 0.6326, "step": 5787 }, { "epoch": 1.858404238240488, "grad_norm": 0.8253732919692993, "learning_rate": 8.999855946789653e-05, "loss": 0.5527, "step": 5788 }, { "epoch": 1.8587253170653395, "grad_norm": 1.1531040668487549, "learning_rate": 8.99638285148785e-05, "loss": 0.4517, "step": 5789 }, { "epoch": 1.859046395890191, "grad_norm": 0.7936963438987732, "learning_rate": 8.992909878473758e-05, "loss": 0.492, "step": 5790 }, { "epoch": 1.8593674747150426, "grad_norm": 1.0635939836502075, "learning_rate": 8.989437028170537e-05, "loss": 0.5887, "step": 5791 }, { "epoch": 1.8596885535398942, "grad_norm": 4.376264572143555, "learning_rate": 8.985964301001353e-05, "loss": 0.5299, "step": 5792 }, { "epoch": 1.8600096323647457, "grad_norm": 0.9289653897285461, "learning_rate": 8.982491697389338e-05, "loss": 0.6141, "step": 5793 }, { "epoch": 1.860330711189597, "grad_norm": 1.19081449508667, "learning_rate": 8.979019217757625e-05, "loss": 0.9054, "step": 5794 }, { "epoch": 1.8606517900144486, "grad_norm": 0.8353030681610107, "learning_rate": 8.975546862529328e-05, "loss": 0.5393, "step": 5795 }, { "epoch": 1.8609728688393, "grad_norm": 0.9625294804573059, "learning_rate": 8.972074632127533e-05, "loss": 0.5229, "step": 5796 }, { "epoch": 1.8612939476641515, "grad_norm": 0.9256024956703186, "learning_rate": 8.96860252697533e-05, "loss": 0.3256, "step": 5797 }, { "epoch": 1.861615026489003, "grad_norm": 0.9383046627044678, "learning_rate": 8.965130547495776e-05, "loss": 0.7085, "step": 5798 }, { "epoch": 1.8619361053138546, "grad_norm": 0.8655479550361633, "learning_rate": 8.961658694111929e-05, "loss": 0.5807, "step": 5799 }, { "epoch": 1.8622571841387061, "grad_norm": 1.0678274631500244, "learning_rate": 8.958186967246816e-05, "loss": 0.6035, "step": 5800 }, { "epoch": 1.8625782629635577, "grad_norm": 0.6425728797912598, "learning_rate": 8.954715367323468e-05, "loss": 0.3561, "step": 5801 }, { "epoch": 1.8628993417884092, "grad_norm": 0.835169792175293, "learning_rate": 8.951243894764876e-05, "loss": 0.5272, "step": 5802 }, { "epoch": 1.8632204206132605, "grad_norm": 0.9119851589202881, "learning_rate": 8.947772549994035e-05, "loss": 0.5669, "step": 5803 }, { "epoch": 1.863541499438112, "grad_norm": 0.8746310472488403, "learning_rate": 8.944301333433922e-05, "loss": 0.5494, "step": 5804 }, { "epoch": 1.8638625782629634, "grad_norm": 1.2345632314682007, "learning_rate": 8.940830245507483e-05, "loss": 0.751, "step": 5805 }, { "epoch": 1.864183657087815, "grad_norm": 1.1940757036209106, "learning_rate": 8.93735928663767e-05, "loss": 0.6379, "step": 5806 }, { "epoch": 1.8645047359126665, "grad_norm": 1.1962798833847046, "learning_rate": 8.933888457247402e-05, "loss": 0.5358, "step": 5807 }, { "epoch": 1.864825814737518, "grad_norm": 1.165390968322754, "learning_rate": 8.930417757759592e-05, "loss": 0.5781, "step": 5808 }, { "epoch": 1.8651468935623696, "grad_norm": 0.9763917922973633, "learning_rate": 8.926947188597134e-05, "loss": 0.4743, "step": 5809 }, { "epoch": 1.8654679723872212, "grad_norm": 1.0017492771148682, "learning_rate": 8.923476750182908e-05, "loss": 0.6824, "step": 5810 }, { "epoch": 1.8657890512120727, "grad_norm": 1.386108160018921, "learning_rate": 8.920006442939772e-05, "loss": 0.595, "step": 5811 }, { "epoch": 1.866110130036924, "grad_norm": 0.5562759041786194, "learning_rate": 8.916536267290578e-05, "loss": 0.3123, "step": 5812 }, { "epoch": 1.8664312088617756, "grad_norm": 0.5364285111427307, "learning_rate": 8.913066223658151e-05, "loss": 0.3485, "step": 5813 }, { "epoch": 1.866752287686627, "grad_norm": 0.5442401170730591, "learning_rate": 8.909596312465306e-05, "loss": 0.3492, "step": 5814 }, { "epoch": 1.8670733665114785, "grad_norm": 0.5919818878173828, "learning_rate": 8.906126534134848e-05, "loss": 0.8261, "step": 5815 }, { "epoch": 1.86739444533633, "grad_norm": 0.5136668682098389, "learning_rate": 8.902656889089548e-05, "loss": 0.7693, "step": 5816 }, { "epoch": 1.8677155241611816, "grad_norm": 0.7814684510231018, "learning_rate": 8.89918737775218e-05, "loss": 0.456, "step": 5817 }, { "epoch": 1.8680366029860331, "grad_norm": 0.5206146240234375, "learning_rate": 8.895718000545489e-05, "loss": 0.2985, "step": 5818 }, { "epoch": 1.8683576818108847, "grad_norm": 0.5971502065658569, "learning_rate": 8.892248757892214e-05, "loss": 0.2403, "step": 5819 }, { "epoch": 1.8686787606357362, "grad_norm": 0.8546911478042603, "learning_rate": 8.888779650215068e-05, "loss": 0.2383, "step": 5820 }, { "epoch": 1.8689998394605876, "grad_norm": 0.9832369089126587, "learning_rate": 8.885310677936746e-05, "loss": 0.7438, "step": 5821 }, { "epoch": 1.8693209182854391, "grad_norm": 1.0020122528076172, "learning_rate": 8.88184184147994e-05, "loss": 0.8816, "step": 5822 }, { "epoch": 1.8696419971102904, "grad_norm": 0.7644429206848145, "learning_rate": 8.878373141267311e-05, "loss": 0.6192, "step": 5823 }, { "epoch": 1.869963075935142, "grad_norm": 0.6598174571990967, "learning_rate": 8.874904577721518e-05, "loss": 0.4108, "step": 5824 }, { "epoch": 1.8702841547599935, "grad_norm": 0.7471329569816589, "learning_rate": 8.871436151265184e-05, "loss": 0.5137, "step": 5825 }, { "epoch": 1.870605233584845, "grad_norm": 0.910190999507904, "learning_rate": 8.867967862320934e-05, "loss": 0.5835, "step": 5826 }, { "epoch": 1.8709263124096966, "grad_norm": 0.7468095421791077, "learning_rate": 8.864499711311362e-05, "loss": 0.417, "step": 5827 }, { "epoch": 1.8712473912345482, "grad_norm": 0.8930068016052246, "learning_rate": 8.861031698659063e-05, "loss": 0.6094, "step": 5828 }, { "epoch": 1.8715684700593997, "grad_norm": 0.793305516242981, "learning_rate": 8.857563824786596e-05, "loss": 0.5365, "step": 5829 }, { "epoch": 1.871889548884251, "grad_norm": 0.6843454837799072, "learning_rate": 8.854096090116508e-05, "loss": 0.4672, "step": 5830 }, { "epoch": 1.8722106277091026, "grad_norm": 0.9117445349693298, "learning_rate": 8.850628495071336e-05, "loss": 0.5671, "step": 5831 }, { "epoch": 1.872531706533954, "grad_norm": 0.6631526350975037, "learning_rate": 8.847161040073594e-05, "loss": 0.3177, "step": 5832 }, { "epoch": 1.8728527853588055, "grad_norm": 0.716529130935669, "learning_rate": 8.843693725545786e-05, "loss": 0.4629, "step": 5833 }, { "epoch": 1.873173864183657, "grad_norm": 1.5731027126312256, "learning_rate": 8.840226551910387e-05, "loss": 0.8211, "step": 5834 }, { "epoch": 1.8734949430085086, "grad_norm": 1.7772483825683594, "learning_rate": 8.836759519589867e-05, "loss": 0.7299, "step": 5835 }, { "epoch": 1.8738160218333602, "grad_norm": 1.1696412563323975, "learning_rate": 8.833292629006668e-05, "loss": 0.5431, "step": 5836 }, { "epoch": 1.8741371006582117, "grad_norm": 1.2018239498138428, "learning_rate": 8.829825880583226e-05, "loss": 0.5802, "step": 5837 }, { "epoch": 1.8744581794830633, "grad_norm": 1.0494779348373413, "learning_rate": 8.826359274741953e-05, "loss": 0.7327, "step": 5838 }, { "epoch": 1.8747792583079146, "grad_norm": 1.106572151184082, "learning_rate": 8.822892811905237e-05, "loss": 0.6034, "step": 5839 }, { "epoch": 1.8751003371327661, "grad_norm": 0.8521847724914551, "learning_rate": 8.819426492495464e-05, "loss": 0.504, "step": 5840 }, { "epoch": 1.8754214159576175, "grad_norm": 0.9584136605262756, "learning_rate": 8.81596031693499e-05, "loss": 0.546, "step": 5841 }, { "epoch": 1.875742494782469, "grad_norm": 1.2079963684082031, "learning_rate": 8.812494285646163e-05, "loss": 0.491, "step": 5842 }, { "epoch": 1.8760635736073206, "grad_norm": 0.8575901389122009, "learning_rate": 8.809028399051302e-05, "loss": 0.6095, "step": 5843 }, { "epoch": 1.8763846524321721, "grad_norm": 0.8896117806434631, "learning_rate": 8.805562657572723e-05, "loss": 0.5603, "step": 5844 }, { "epoch": 1.8767057312570237, "grad_norm": 0.928081750869751, "learning_rate": 8.802097061632705e-05, "loss": 0.5653, "step": 5845 }, { "epoch": 1.8770268100818752, "grad_norm": 1.1048555374145508, "learning_rate": 8.79863161165353e-05, "loss": 0.5764, "step": 5846 }, { "epoch": 1.8773478889067265, "grad_norm": 0.8389851450920105, "learning_rate": 8.79516630805745e-05, "loss": 0.6391, "step": 5847 }, { "epoch": 1.877668967731578, "grad_norm": 0.9550890922546387, "learning_rate": 8.791701151266696e-05, "loss": 0.6019, "step": 5848 }, { "epoch": 1.8779900465564296, "grad_norm": 0.8240407109260559, "learning_rate": 8.788236141703498e-05, "loss": 0.4589, "step": 5849 }, { "epoch": 1.878311125381281, "grad_norm": 0.9926547408103943, "learning_rate": 8.784771279790044e-05, "loss": 0.5949, "step": 5850 }, { "epoch": 1.8786322042061325, "grad_norm": 1.015940546989441, "learning_rate": 8.781306565948528e-05, "loss": 0.611, "step": 5851 }, { "epoch": 1.878953283030984, "grad_norm": 0.912274181842804, "learning_rate": 8.777842000601105e-05, "loss": 0.488, "step": 5852 }, { "epoch": 1.8792743618558356, "grad_norm": 1.0201482772827148, "learning_rate": 8.774377584169933e-05, "loss": 0.6077, "step": 5853 }, { "epoch": 1.8795954406806872, "grad_norm": 1.027444839477539, "learning_rate": 8.77091331707713e-05, "loss": 0.6085, "step": 5854 }, { "epoch": 1.8799165195055387, "grad_norm": 0.821168065071106, "learning_rate": 8.767449199744814e-05, "loss": 0.5152, "step": 5855 }, { "epoch": 1.88023759833039, "grad_norm": 1.338505506515503, "learning_rate": 8.763985232595075e-05, "loss": 0.5792, "step": 5856 }, { "epoch": 1.8805586771552416, "grad_norm": 0.9771504402160645, "learning_rate": 8.760521416049983e-05, "loss": 0.6102, "step": 5857 }, { "epoch": 1.8808797559800932, "grad_norm": 0.9123767018318176, "learning_rate": 8.7570577505316e-05, "loss": 0.5544, "step": 5858 }, { "epoch": 1.8812008348049445, "grad_norm": 0.6781209707260132, "learning_rate": 8.753594236461957e-05, "loss": 0.4612, "step": 5859 }, { "epoch": 1.881521913629796, "grad_norm": 0.8213598132133484, "learning_rate": 8.750130874263077e-05, "loss": 0.338, "step": 5860 }, { "epoch": 1.8818429924546476, "grad_norm": 1.2444860935211182, "learning_rate": 8.746667664356956e-05, "loss": 0.6963, "step": 5861 }, { "epoch": 1.8821640712794991, "grad_norm": 0.9669928550720215, "learning_rate": 8.743204607165583e-05, "loss": 0.3941, "step": 5862 }, { "epoch": 1.8824851501043507, "grad_norm": 0.7179502248764038, "learning_rate": 8.739741703110913e-05, "loss": 0.3945, "step": 5863 }, { "epoch": 1.8828062289292022, "grad_norm": 0.9342414736747742, "learning_rate": 8.736278952614898e-05, "loss": 0.4656, "step": 5864 }, { "epoch": 1.8831273077540536, "grad_norm": 0.5712354779243469, "learning_rate": 8.732816356099456e-05, "loss": 0.8414, "step": 5865 }, { "epoch": 1.8834483865789051, "grad_norm": 0.5650877952575684, "learning_rate": 8.729353913986496e-05, "loss": 0.77, "step": 5866 }, { "epoch": 1.8837694654037567, "grad_norm": 0.6072726845741272, "learning_rate": 8.72589162669791e-05, "loss": 0.3555, "step": 5867 }, { "epoch": 1.884090544228608, "grad_norm": 0.5450513362884521, "learning_rate": 8.722429494655561e-05, "loss": 0.2534, "step": 5868 }, { "epoch": 1.8844116230534596, "grad_norm": 0.7012900114059448, "learning_rate": 8.718967518281307e-05, "loss": 0.4425, "step": 5869 }, { "epoch": 1.884732701878311, "grad_norm": 0.7500693798065186, "learning_rate": 8.715505697996971e-05, "loss": 0.221, "step": 5870 }, { "epoch": 1.8850537807031627, "grad_norm": 0.9719879627227783, "learning_rate": 8.712044034224374e-05, "loss": 0.8356, "step": 5871 }, { "epoch": 1.8853748595280142, "grad_norm": 0.8122969269752502, "learning_rate": 8.708582527385301e-05, "loss": 0.623, "step": 5872 }, { "epoch": 1.8856959383528658, "grad_norm": 1.0290989875793457, "learning_rate": 8.705121177901532e-05, "loss": 0.7414, "step": 5873 }, { "epoch": 1.886017017177717, "grad_norm": 1.0142052173614502, "learning_rate": 8.70165998619482e-05, "loss": 0.6093, "step": 5874 }, { "epoch": 1.8863380960025686, "grad_norm": 0.9030125141143799, "learning_rate": 8.698198952686896e-05, "loss": 0.6493, "step": 5875 }, { "epoch": 1.8866591748274202, "grad_norm": 0.8314472436904907, "learning_rate": 8.694738077799488e-05, "loss": 0.6468, "step": 5876 }, { "epoch": 1.8869802536522715, "grad_norm": 0.7704334855079651, "learning_rate": 8.691277361954279e-05, "loss": 0.4876, "step": 5877 }, { "epoch": 1.887301332477123, "grad_norm": 0.8338825702667236, "learning_rate": 8.687816805572956e-05, "loss": 0.6363, "step": 5878 }, { "epoch": 1.8876224113019746, "grad_norm": 0.8600116968154907, "learning_rate": 8.684356409077176e-05, "loss": 0.5554, "step": 5879 }, { "epoch": 1.8879434901268262, "grad_norm": 0.8333651423454285, "learning_rate": 8.680896172888576e-05, "loss": 0.6546, "step": 5880 }, { "epoch": 1.8882645689516777, "grad_norm": 0.9315165877342224, "learning_rate": 8.677436097428775e-05, "loss": 0.5263, "step": 5881 }, { "epoch": 1.8885856477765293, "grad_norm": 0.9922060370445251, "learning_rate": 8.673976183119376e-05, "loss": 0.6409, "step": 5882 }, { "epoch": 1.8889067266013806, "grad_norm": 0.9471839070320129, "learning_rate": 8.670516430381958e-05, "loss": 0.7208, "step": 5883 }, { "epoch": 1.8892278054262321, "grad_norm": 1.207506537437439, "learning_rate": 8.667056839638075e-05, "loss": 0.8187, "step": 5884 }, { "epoch": 1.8895488842510837, "grad_norm": 1.2479336261749268, "learning_rate": 8.663597411309279e-05, "loss": 0.4916, "step": 5885 }, { "epoch": 1.889869963075935, "grad_norm": 0.7998619675636292, "learning_rate": 8.660138145817079e-05, "loss": 0.5774, "step": 5886 }, { "epoch": 1.8901910419007866, "grad_norm": 0.6762514710426331, "learning_rate": 8.656679043582986e-05, "loss": 0.4435, "step": 5887 }, { "epoch": 1.8905121207256381, "grad_norm": 2.044832944869995, "learning_rate": 8.653220105028474e-05, "loss": 0.6974, "step": 5888 }, { "epoch": 1.8908331995504897, "grad_norm": 0.8140920400619507, "learning_rate": 8.64976133057501e-05, "loss": 0.6755, "step": 5889 }, { "epoch": 1.8911542783753412, "grad_norm": 0.837049663066864, "learning_rate": 8.646302720644027e-05, "loss": 0.4787, "step": 5890 }, { "epoch": 1.8914753572001928, "grad_norm": 0.9118859767913818, "learning_rate": 8.642844275656957e-05, "loss": 0.6674, "step": 5891 }, { "epoch": 1.891796436025044, "grad_norm": 0.8164528012275696, "learning_rate": 8.639385996035194e-05, "loss": 0.5175, "step": 5892 }, { "epoch": 1.8921175148498957, "grad_norm": 1.1621068716049194, "learning_rate": 8.635927882200116e-05, "loss": 0.5832, "step": 5893 }, { "epoch": 1.8924385936747472, "grad_norm": 0.7055744528770447, "learning_rate": 8.632469934573094e-05, "loss": 0.5454, "step": 5894 }, { "epoch": 1.8927596724995985, "grad_norm": 1.0053250789642334, "learning_rate": 8.629012153575458e-05, "loss": 0.7157, "step": 5895 }, { "epoch": 1.89308075132445, "grad_norm": 0.8789286613464355, "learning_rate": 8.625554539628535e-05, "loss": 0.5651, "step": 5896 }, { "epoch": 1.8934018301493016, "grad_norm": 0.8847569227218628, "learning_rate": 8.62209709315362e-05, "loss": 0.648, "step": 5897 }, { "epoch": 1.8937229089741532, "grad_norm": 0.8232566714286804, "learning_rate": 8.618639814571996e-05, "loss": 0.473, "step": 5898 }, { "epoch": 1.8940439877990047, "grad_norm": 0.8590362668037415, "learning_rate": 8.615182704304918e-05, "loss": 0.6142, "step": 5899 }, { "epoch": 1.8943650666238563, "grad_norm": 1.0845537185668945, "learning_rate": 8.611725762773631e-05, "loss": 0.5017, "step": 5900 }, { "epoch": 1.8946861454487076, "grad_norm": 0.7190443277359009, "learning_rate": 8.608268990399349e-05, "loss": 0.4462, "step": 5901 }, { "epoch": 1.8950072242735592, "grad_norm": 0.7313873171806335, "learning_rate": 8.604812387603265e-05, "loss": 0.4465, "step": 5902 }, { "epoch": 1.8953283030984105, "grad_norm": 0.816182553768158, "learning_rate": 8.601355954806561e-05, "loss": 0.4299, "step": 5903 }, { "epoch": 1.895649381923262, "grad_norm": 1.0076704025268555, "learning_rate": 8.597899692430389e-05, "loss": 0.5754, "step": 5904 }, { "epoch": 1.8959704607481136, "grad_norm": 0.9564604759216309, "learning_rate": 8.594443600895892e-05, "loss": 0.4443, "step": 5905 }, { "epoch": 1.8962915395729651, "grad_norm": 0.6225574612617493, "learning_rate": 8.590987680624174e-05, "loss": 0.3169, "step": 5906 }, { "epoch": 1.8966126183978167, "grad_norm": 1.0120649337768555, "learning_rate": 8.587531932036335e-05, "loss": 0.6126, "step": 5907 }, { "epoch": 1.8969336972226682, "grad_norm": 1.029392957687378, "learning_rate": 8.584076355553444e-05, "loss": 0.5688, "step": 5908 }, { "epoch": 1.8972547760475198, "grad_norm": 1.1245523691177368, "learning_rate": 8.580620951596557e-05, "loss": 0.5589, "step": 5909 }, { "epoch": 1.8975758548723711, "grad_norm": 0.8369296789169312, "learning_rate": 8.577165720586703e-05, "loss": 0.3727, "step": 5910 }, { "epoch": 1.8978969336972227, "grad_norm": 0.8176161646842957, "learning_rate": 8.573710662944885e-05, "loss": 0.3624, "step": 5911 }, { "epoch": 1.898218012522074, "grad_norm": 0.9345711469650269, "learning_rate": 8.570255779092098e-05, "loss": 0.4462, "step": 5912 }, { "epoch": 1.8985390913469256, "grad_norm": 1.294392466545105, "learning_rate": 8.566801069449305e-05, "loss": 0.3714, "step": 5913 }, { "epoch": 1.898860170171777, "grad_norm": 0.5765014886856079, "learning_rate": 8.56334653443746e-05, "loss": 0.3126, "step": 5914 }, { "epoch": 1.8991812489966287, "grad_norm": 0.6456901431083679, "learning_rate": 8.559892174477479e-05, "loss": 0.3416, "step": 5915 }, { "epoch": 1.8995023278214802, "grad_norm": 0.4262344539165497, "learning_rate": 8.55643798999027e-05, "loss": 0.4155, "step": 5916 }, { "epoch": 1.8998234066463318, "grad_norm": 0.4280303120613098, "learning_rate": 8.55298398139671e-05, "loss": 0.4822, "step": 5917 }, { "epoch": 1.9001444854711833, "grad_norm": 0.6191977858543396, "learning_rate": 8.549530149117664e-05, "loss": 0.5129, "step": 5918 }, { "epoch": 1.9004655642960346, "grad_norm": 0.5976957678794861, "learning_rate": 8.546076493573972e-05, "loss": 0.2694, "step": 5919 }, { "epoch": 1.9007866431208862, "grad_norm": 0.6138757467269897, "learning_rate": 8.542623015186445e-05, "loss": 0.1946, "step": 5920 }, { "epoch": 1.9011077219457375, "grad_norm": 0.624046802520752, "learning_rate": 8.539169714375885e-05, "loss": 0.131, "step": 5921 }, { "epoch": 1.901428800770589, "grad_norm": 0.7090097069740295, "learning_rate": 8.535716591563062e-05, "loss": 0.3913, "step": 5922 }, { "epoch": 1.9017498795954406, "grad_norm": 1.0457631349563599, "learning_rate": 8.532263647168735e-05, "loss": 0.6625, "step": 5923 }, { "epoch": 1.9020709584202922, "grad_norm": 0.8407219648361206, "learning_rate": 8.528810881613626e-05, "loss": 0.6366, "step": 5924 }, { "epoch": 1.9023920372451437, "grad_norm": 0.9645373225212097, "learning_rate": 8.525358295318454e-05, "loss": 0.524, "step": 5925 }, { "epoch": 1.9027131160699953, "grad_norm": 0.8919804096221924, "learning_rate": 8.521905888703893e-05, "loss": 0.5851, "step": 5926 }, { "epoch": 1.9030341948948468, "grad_norm": 0.740583598613739, "learning_rate": 8.51845366219062e-05, "loss": 0.5141, "step": 5927 }, { "epoch": 1.9033552737196981, "grad_norm": 0.7482326030731201, "learning_rate": 8.515001616199279e-05, "loss": 0.4804, "step": 5928 }, { "epoch": 1.9036763525445497, "grad_norm": 0.9648141860961914, "learning_rate": 8.511549751150479e-05, "loss": 0.6938, "step": 5929 }, { "epoch": 1.903997431369401, "grad_norm": 0.8778332471847534, "learning_rate": 8.508098067464832e-05, "loss": 0.6264, "step": 5930 }, { "epoch": 1.9043185101942526, "grad_norm": 0.9466199278831482, "learning_rate": 8.504646565562906e-05, "loss": 0.6328, "step": 5931 }, { "epoch": 1.9046395890191041, "grad_norm": 1.0974069833755493, "learning_rate": 8.501195245865263e-05, "loss": 0.8376, "step": 5932 }, { "epoch": 1.9049606678439557, "grad_norm": 1.1989012956619263, "learning_rate": 8.497744108792429e-05, "loss": 0.7932, "step": 5933 }, { "epoch": 1.9052817466688072, "grad_norm": 1.0475575923919678, "learning_rate": 8.494293154764924e-05, "loss": 0.6127, "step": 5934 }, { "epoch": 1.9056028254936588, "grad_norm": 0.8935070633888245, "learning_rate": 8.490842384203225e-05, "loss": 0.4574, "step": 5935 }, { "epoch": 1.9059239043185103, "grad_norm": 0.9734428524971008, "learning_rate": 8.487391797527808e-05, "loss": 0.6899, "step": 5936 }, { "epoch": 1.9062449831433617, "grad_norm": 1.1841446161270142, "learning_rate": 8.483941395159114e-05, "loss": 0.5652, "step": 5937 }, { "epoch": 1.9065660619682132, "grad_norm": 0.7886440753936768, "learning_rate": 8.480491177517557e-05, "loss": 0.5065, "step": 5938 }, { "epoch": 1.9068871407930645, "grad_norm": 1.349183440208435, "learning_rate": 8.477041145023546e-05, "loss": 0.6979, "step": 5939 }, { "epoch": 1.907208219617916, "grad_norm": 0.7376853227615356, "learning_rate": 8.473591298097448e-05, "loss": 0.485, "step": 5940 }, { "epoch": 1.9075292984427676, "grad_norm": 0.7252330183982849, "learning_rate": 8.47014163715962e-05, "loss": 0.4728, "step": 5941 }, { "epoch": 1.9078503772676192, "grad_norm": 1.054677128791809, "learning_rate": 8.466692162630392e-05, "loss": 0.7022, "step": 5942 }, { "epoch": 1.9081714560924707, "grad_norm": 0.9820946455001831, "learning_rate": 8.463242874930079e-05, "loss": 0.4489, "step": 5943 }, { "epoch": 1.9084925349173223, "grad_norm": 0.9880334734916687, "learning_rate": 8.459793774478957e-05, "loss": 0.5668, "step": 5944 }, { "epoch": 1.9088136137421738, "grad_norm": 0.8617029190063477, "learning_rate": 8.456344861697289e-05, "loss": 0.5731, "step": 5945 }, { "epoch": 1.9091346925670252, "grad_norm": 0.9127218127250671, "learning_rate": 8.452896137005321e-05, "loss": 0.5747, "step": 5946 }, { "epoch": 1.9094557713918767, "grad_norm": 0.8752137422561646, "learning_rate": 8.449447600823262e-05, "loss": 0.533, "step": 5947 }, { "epoch": 1.909776850216728, "grad_norm": 0.9676142334938049, "learning_rate": 8.445999253571315e-05, "loss": 0.6118, "step": 5948 }, { "epoch": 1.9100979290415796, "grad_norm": 1.485176920890808, "learning_rate": 8.442551095669639e-05, "loss": 0.491, "step": 5949 }, { "epoch": 1.9104190078664312, "grad_norm": 0.8751528263092041, "learning_rate": 8.439103127538392e-05, "loss": 0.5723, "step": 5950 }, { "epoch": 1.9107400866912827, "grad_norm": 0.8707427382469177, "learning_rate": 8.435655349597689e-05, "loss": 0.5169, "step": 5951 }, { "epoch": 1.9110611655161343, "grad_norm": 1.3300323486328125, "learning_rate": 8.432207762267644e-05, "loss": 0.7869, "step": 5952 }, { "epoch": 1.9113822443409858, "grad_norm": 0.8493139147758484, "learning_rate": 8.428760365968326e-05, "loss": 0.4855, "step": 5953 }, { "epoch": 1.9117033231658374, "grad_norm": 0.7276837825775146, "learning_rate": 8.425313161119787e-05, "loss": 0.449, "step": 5954 }, { "epoch": 1.9120244019906887, "grad_norm": 0.7487722635269165, "learning_rate": 8.421866148142066e-05, "loss": 0.5161, "step": 5955 }, { "epoch": 1.9123454808155402, "grad_norm": 0.7644703984260559, "learning_rate": 8.418419327455164e-05, "loss": 0.4887, "step": 5956 }, { "epoch": 1.9126665596403916, "grad_norm": 1.1047660112380981, "learning_rate": 8.414972699479075e-05, "loss": 0.6635, "step": 5957 }, { "epoch": 1.9129876384652431, "grad_norm": 1.2923800945281982, "learning_rate": 8.41152626463375e-05, "loss": 0.8645, "step": 5958 }, { "epoch": 1.9133087172900947, "grad_norm": 0.7669944167137146, "learning_rate": 8.408080023339133e-05, "loss": 0.5723, "step": 5959 }, { "epoch": 1.9136297961149462, "grad_norm": 0.7324369549751282, "learning_rate": 8.404633976015134e-05, "loss": 0.4862, "step": 5960 }, { "epoch": 1.9139508749397978, "grad_norm": 0.8924105763435364, "learning_rate": 8.401188123081653e-05, "loss": 0.4537, "step": 5961 }, { "epoch": 1.9142719537646493, "grad_norm": 0.8175060153007507, "learning_rate": 8.397742464958547e-05, "loss": 0.5257, "step": 5962 }, { "epoch": 1.9145930325895009, "grad_norm": 0.9657145738601685, "learning_rate": 8.394297002065658e-05, "loss": 0.4518, "step": 5963 }, { "epoch": 1.9149141114143522, "grad_norm": 0.5297245383262634, "learning_rate": 8.390851734822808e-05, "loss": 0.366, "step": 5964 }, { "epoch": 1.9152351902392037, "grad_norm": 0.5978273153305054, "learning_rate": 8.387406663649795e-05, "loss": 0.5728, "step": 5965 }, { "epoch": 1.915556269064055, "grad_norm": 0.3594565987586975, "learning_rate": 8.383961788966391e-05, "loss": 0.4309, "step": 5966 }, { "epoch": 1.9158773478889066, "grad_norm": 0.5129295587539673, "learning_rate": 8.380517111192337e-05, "loss": 0.7305, "step": 5967 }, { "epoch": 1.9161984267137582, "grad_norm": 0.6634910702705383, "learning_rate": 8.377072630747364e-05, "loss": 0.5793, "step": 5968 }, { "epoch": 1.9165195055386097, "grad_norm": 0.5111119747161865, "learning_rate": 8.373628348051165e-05, "loss": 0.2366, "step": 5969 }, { "epoch": 1.9168405843634613, "grad_norm": 0.6472846269607544, "learning_rate": 8.37018426352342e-05, "loss": 0.1341, "step": 5970 }, { "epoch": 1.9171616631883128, "grad_norm": 0.5201217532157898, "learning_rate": 8.366740377583781e-05, "loss": 0.2317, "step": 5971 }, { "epoch": 1.9174827420131644, "grad_norm": 0.4701801836490631, "learning_rate": 8.363296690651868e-05, "loss": 0.3096, "step": 5972 }, { "epoch": 1.9178038208380157, "grad_norm": 0.9514907598495483, "learning_rate": 8.359853203147291e-05, "loss": 0.7662, "step": 5973 }, { "epoch": 1.9181248996628673, "grad_norm": 0.7471129298210144, "learning_rate": 8.356409915489625e-05, "loss": 0.4633, "step": 5974 }, { "epoch": 1.9184459784877186, "grad_norm": 0.7893948554992676, "learning_rate": 8.352966828098428e-05, "loss": 0.5721, "step": 5975 }, { "epoch": 1.9187670573125701, "grad_norm": 0.6973623633384705, "learning_rate": 8.349523941393224e-05, "loss": 0.5921, "step": 5976 }, { "epoch": 1.9190881361374217, "grad_norm": 0.6330721378326416, "learning_rate": 8.346081255793525e-05, "loss": 0.4046, "step": 5977 }, { "epoch": 1.9194092149622732, "grad_norm": 0.844613254070282, "learning_rate": 8.342638771718802e-05, "loss": 0.6565, "step": 5978 }, { "epoch": 1.9197302937871248, "grad_norm": 0.8681631088256836, "learning_rate": 8.339196489588523e-05, "loss": 0.5315, "step": 5979 }, { "epoch": 1.9200513726119763, "grad_norm": 0.8076524138450623, "learning_rate": 8.335754409822114e-05, "loss": 0.621, "step": 5980 }, { "epoch": 1.9203724514368279, "grad_norm": 0.8629617094993591, "learning_rate": 8.332312532838978e-05, "loss": 0.5126, "step": 5981 }, { "epoch": 1.9206935302616792, "grad_norm": 0.8805716633796692, "learning_rate": 8.328870859058506e-05, "loss": 0.6354, "step": 5982 }, { "epoch": 1.9210146090865308, "grad_norm": 0.9122462272644043, "learning_rate": 8.325429388900046e-05, "loss": 0.6502, "step": 5983 }, { "epoch": 1.921335687911382, "grad_norm": 0.7924925684928894, "learning_rate": 8.321988122782937e-05, "loss": 0.6105, "step": 5984 }, { "epoch": 1.9216567667362336, "grad_norm": 0.9744467735290527, "learning_rate": 8.318547061126485e-05, "loss": 0.6674, "step": 5985 }, { "epoch": 1.9219778455610852, "grad_norm": 0.8236846327781677, "learning_rate": 8.315106204349976e-05, "loss": 0.5076, "step": 5986 }, { "epoch": 1.9222989243859367, "grad_norm": 0.8699111342430115, "learning_rate": 8.311665552872662e-05, "loss": 0.6371, "step": 5987 }, { "epoch": 1.9226200032107883, "grad_norm": 0.9178052544593811, "learning_rate": 8.30822510711378e-05, "loss": 0.7132, "step": 5988 }, { "epoch": 1.9229410820356398, "grad_norm": 1.2815251350402832, "learning_rate": 8.30478486749254e-05, "loss": 0.6409, "step": 5989 }, { "epoch": 1.9232621608604914, "grad_norm": 0.7497451901435852, "learning_rate": 8.301344834428116e-05, "loss": 0.4567, "step": 5990 }, { "epoch": 1.9235832396853427, "grad_norm": 0.8337712287902832, "learning_rate": 8.297905008339677e-05, "loss": 0.603, "step": 5991 }, { "epoch": 1.9239043185101943, "grad_norm": 1.0814270973205566, "learning_rate": 8.294465389646345e-05, "loss": 0.7705, "step": 5992 }, { "epoch": 1.9242253973350456, "grad_norm": 0.9741120934486389, "learning_rate": 8.291025978767235e-05, "loss": 0.7132, "step": 5993 }, { "epoch": 1.9245464761598972, "grad_norm": 0.6710167527198792, "learning_rate": 8.287586776121423e-05, "loss": 0.416, "step": 5994 }, { "epoch": 1.9248675549847487, "grad_norm": 0.6828485131263733, "learning_rate": 8.284147782127971e-05, "loss": 0.4111, "step": 5995 }, { "epoch": 1.9251886338096003, "grad_norm": 1.174616813659668, "learning_rate": 8.280708997205904e-05, "loss": 0.6958, "step": 5996 }, { "epoch": 1.9255097126344518, "grad_norm": 1.070745825767517, "learning_rate": 8.277270421774235e-05, "loss": 0.7379, "step": 5997 }, { "epoch": 1.9258307914593034, "grad_norm": 0.9961045384407043, "learning_rate": 8.273832056251937e-05, "loss": 0.467, "step": 5998 }, { "epoch": 1.926151870284155, "grad_norm": 0.8946678638458252, "learning_rate": 8.270393901057964e-05, "loss": 0.5288, "step": 5999 }, { "epoch": 1.9264729491090062, "grad_norm": 0.7546229362487793, "learning_rate": 8.266955956611253e-05, "loss": 0.4404, "step": 6000 }, { "epoch": 1.9267940279338578, "grad_norm": 0.8234146237373352, "learning_rate": 8.263518223330697e-05, "loss": 0.5327, "step": 6001 }, { "epoch": 1.9271151067587091, "grad_norm": 0.760515570640564, "learning_rate": 8.26008070163518e-05, "loss": 0.4604, "step": 6002 }, { "epoch": 1.9274361855835607, "grad_norm": 1.1221221685409546, "learning_rate": 8.256643391943551e-05, "loss": 0.5529, "step": 6003 }, { "epoch": 1.9277572644084122, "grad_norm": 0.8846840858459473, "learning_rate": 8.25320629467464e-05, "loss": 0.4192, "step": 6004 }, { "epoch": 1.9280783432332638, "grad_norm": 0.8447346091270447, "learning_rate": 8.249769410247239e-05, "loss": 0.4966, "step": 6005 }, { "epoch": 1.9283994220581153, "grad_norm": 1.1361325979232788, "learning_rate": 8.24633273908013e-05, "loss": 0.555, "step": 6006 }, { "epoch": 1.9287205008829669, "grad_norm": 1.2147115468978882, "learning_rate": 8.242896281592057e-05, "loss": 0.699, "step": 6007 }, { "epoch": 1.9290415797078184, "grad_norm": 0.7966561317443848, "learning_rate": 8.239460038201739e-05, "loss": 0.4552, "step": 6008 }, { "epoch": 1.9293626585326697, "grad_norm": 0.9599836468696594, "learning_rate": 8.236024009327879e-05, "loss": 0.6289, "step": 6009 }, { "epoch": 1.9296837373575213, "grad_norm": 1.475778341293335, "learning_rate": 8.23258819538914e-05, "loss": 0.4814, "step": 6010 }, { "epoch": 1.9300048161823726, "grad_norm": 1.2509891986846924, "learning_rate": 8.229152596804168e-05, "loss": 0.4725, "step": 6011 }, { "epoch": 1.9303258950072242, "grad_norm": 0.7716514468193054, "learning_rate": 8.225717213991579e-05, "loss": 0.4775, "step": 6012 }, { "epoch": 1.9306469738320757, "grad_norm": 0.9235790967941284, "learning_rate": 8.222282047369971e-05, "loss": 0.6091, "step": 6013 }, { "epoch": 1.9309680526569273, "grad_norm": 0.5926783084869385, "learning_rate": 8.218847097357898e-05, "loss": 0.3537, "step": 6014 }, { "epoch": 1.9312891314817788, "grad_norm": 0.5469971299171448, "learning_rate": 8.215412364373907e-05, "loss": 0.5765, "step": 6015 }, { "epoch": 1.9316102103066304, "grad_norm": 0.5453217029571533, "learning_rate": 8.211977848836506e-05, "loss": 0.8021, "step": 6016 }, { "epoch": 1.931931289131482, "grad_norm": 0.5584077835083008, "learning_rate": 8.208543551164178e-05, "loss": 0.2779, "step": 6017 }, { "epoch": 1.9322523679563333, "grad_norm": 1.3637664318084717, "learning_rate": 8.205109471775387e-05, "loss": 0.5096, "step": 6018 }, { "epoch": 1.9325734467811848, "grad_norm": 0.6770474910736084, "learning_rate": 8.201675611088558e-05, "loss": 0.3166, "step": 6019 }, { "epoch": 1.9328945256060361, "grad_norm": 0.7160546183586121, "learning_rate": 8.198241969522107e-05, "loss": 0.1541, "step": 6020 }, { "epoch": 1.9332156044308877, "grad_norm": 0.6660668253898621, "learning_rate": 8.194808547494401e-05, "loss": 0.5054, "step": 6021 }, { "epoch": 1.9335366832557392, "grad_norm": 0.7240803241729736, "learning_rate": 8.191375345423799e-05, "loss": 0.5679, "step": 6022 }, { "epoch": 1.9338577620805908, "grad_norm": 0.9651069045066833, "learning_rate": 8.187942363728625e-05, "loss": 0.5552, "step": 6023 }, { "epoch": 1.9341788409054423, "grad_norm": 0.6748064160346985, "learning_rate": 8.184509602827181e-05, "loss": 0.5365, "step": 6024 }, { "epoch": 1.934499919730294, "grad_norm": 0.7690969705581665, "learning_rate": 8.181077063137733e-05, "loss": 0.5248, "step": 6025 }, { "epoch": 1.9348209985551454, "grad_norm": 0.7835337519645691, "learning_rate": 8.177644745078526e-05, "loss": 0.5647, "step": 6026 }, { "epoch": 1.9351420773799968, "grad_norm": 0.7683063745498657, "learning_rate": 8.174212649067781e-05, "loss": 0.485, "step": 6027 }, { "epoch": 1.9354631562048483, "grad_norm": 0.9228124618530273, "learning_rate": 8.170780775523684e-05, "loss": 0.6106, "step": 6028 }, { "epoch": 1.9357842350296997, "grad_norm": 0.8772586584091187, "learning_rate": 8.167349124864405e-05, "loss": 0.5138, "step": 6029 }, { "epoch": 1.9361053138545512, "grad_norm": 0.8027740716934204, "learning_rate": 8.163917697508072e-05, "loss": 0.583, "step": 6030 }, { "epoch": 1.9364263926794028, "grad_norm": 0.9190700054168701, "learning_rate": 8.160486493872798e-05, "loss": 0.7091, "step": 6031 }, { "epoch": 1.9367474715042543, "grad_norm": 0.9426003098487854, "learning_rate": 8.157055514376666e-05, "loss": 0.6263, "step": 6032 }, { "epoch": 1.9370685503291059, "grad_norm": 0.918766975402832, "learning_rate": 8.153624759437732e-05, "loss": 0.669, "step": 6033 }, { "epoch": 1.9373896291539574, "grad_norm": 0.9351176023483276, "learning_rate": 8.15019422947402e-05, "loss": 0.6859, "step": 6034 }, { "epoch": 1.937710707978809, "grad_norm": 0.8434003591537476, "learning_rate": 8.146763924903527e-05, "loss": 0.5469, "step": 6035 }, { "epoch": 1.9380317868036603, "grad_norm": 0.8188439607620239, "learning_rate": 8.14333384614423e-05, "loss": 0.5615, "step": 6036 }, { "epoch": 1.9383528656285118, "grad_norm": 0.782618522644043, "learning_rate": 8.139903993614068e-05, "loss": 0.5117, "step": 6037 }, { "epoch": 1.9386739444533632, "grad_norm": 0.882308840751648, "learning_rate": 8.136474367730969e-05, "loss": 0.5433, "step": 6038 }, { "epoch": 1.9389950232782147, "grad_norm": 1.1491224765777588, "learning_rate": 8.133044968912811e-05, "loss": 0.5036, "step": 6039 }, { "epoch": 1.9393161021030663, "grad_norm": 1.1491897106170654, "learning_rate": 8.129615797577461e-05, "loss": 0.63, "step": 6040 }, { "epoch": 1.9396371809279178, "grad_norm": 0.8906580805778503, "learning_rate": 8.126186854142752e-05, "loss": 0.4457, "step": 6041 }, { "epoch": 1.9399582597527694, "grad_norm": 0.7406003475189209, "learning_rate": 8.122758139026495e-05, "loss": 0.4659, "step": 6042 }, { "epoch": 1.940279338577621, "grad_norm": 0.8870121240615845, "learning_rate": 8.119329652646463e-05, "loss": 0.6007, "step": 6043 }, { "epoch": 1.9406004174024725, "grad_norm": 1.128095269203186, "learning_rate": 8.115901395420407e-05, "loss": 0.6736, "step": 6044 }, { "epoch": 1.9409214962273238, "grad_norm": 1.0389516353607178, "learning_rate": 8.11247336776605e-05, "loss": 0.6387, "step": 6045 }, { "epoch": 1.9412425750521753, "grad_norm": 1.045128583908081, "learning_rate": 8.109045570101086e-05, "loss": 0.6013, "step": 6046 }, { "epoch": 1.9415636538770267, "grad_norm": 0.6581218838691711, "learning_rate": 8.105618002843189e-05, "loss": 0.4185, "step": 6047 }, { "epoch": 1.9418847327018782, "grad_norm": 0.7015986442565918, "learning_rate": 8.102190666409987e-05, "loss": 0.3757, "step": 6048 }, { "epoch": 1.9422058115267298, "grad_norm": 1.0713157653808594, "learning_rate": 8.0987635612191e-05, "loss": 0.5619, "step": 6049 }, { "epoch": 1.9425268903515813, "grad_norm": 0.5126787424087524, "learning_rate": 8.095336687688102e-05, "loss": 0.2985, "step": 6050 }, { "epoch": 1.9428479691764329, "grad_norm": 0.9810903668403625, "learning_rate": 8.091910046234552e-05, "loss": 0.5227, "step": 6051 }, { "epoch": 1.9431690480012844, "grad_norm": 1.0264313220977783, "learning_rate": 8.088483637275979e-05, "loss": 0.591, "step": 6052 }, { "epoch": 1.943490126826136, "grad_norm": 0.7621950507164001, "learning_rate": 8.085057461229872e-05, "loss": 0.4689, "step": 6053 }, { "epoch": 1.9438112056509873, "grad_norm": 0.6205912828445435, "learning_rate": 8.081631518513704e-05, "loss": 0.339, "step": 6054 }, { "epoch": 1.9441322844758389, "grad_norm": 0.9565242528915405, "learning_rate": 8.078205809544917e-05, "loss": 0.5811, "step": 6055 }, { "epoch": 1.9444533633006902, "grad_norm": 1.0997697114944458, "learning_rate": 8.074780334740928e-05, "loss": 0.6262, "step": 6056 }, { "epoch": 1.9447744421255417, "grad_norm": 0.8352198004722595, "learning_rate": 8.071355094519109e-05, "loss": 0.455, "step": 6057 }, { "epoch": 1.9450955209503933, "grad_norm": 1.3071821928024292, "learning_rate": 8.067930089296827e-05, "loss": 0.6426, "step": 6058 }, { "epoch": 1.9454165997752448, "grad_norm": 0.8504396677017212, "learning_rate": 8.064505319491398e-05, "loss": 0.4168, "step": 6059 }, { "epoch": 1.9457376786000964, "grad_norm": 1.1026358604431152, "learning_rate": 8.061080785520126e-05, "loss": 0.5795, "step": 6060 }, { "epoch": 1.946058757424948, "grad_norm": 0.9876211285591125, "learning_rate": 8.057656487800282e-05, "loss": 0.4293, "step": 6061 }, { "epoch": 1.9463798362497995, "grad_norm": 0.8381431102752686, "learning_rate": 8.0542324267491e-05, "loss": 0.4613, "step": 6062 }, { "epoch": 1.9467009150746508, "grad_norm": 0.9041491150856018, "learning_rate": 8.050808602783795e-05, "loss": 0.4021, "step": 6063 }, { "epoch": 1.9470219938995024, "grad_norm": 0.8426972031593323, "learning_rate": 8.047385016321552e-05, "loss": 0.5205, "step": 6064 }, { "epoch": 1.9473430727243537, "grad_norm": 0.5573900938034058, "learning_rate": 8.04396166777952e-05, "loss": 0.6376, "step": 6065 }, { "epoch": 1.9476641515492052, "grad_norm": 0.5124462246894836, "learning_rate": 8.040538557574822e-05, "loss": 0.7874, "step": 6066 }, { "epoch": 1.9479852303740568, "grad_norm": 0.7582365870475769, "learning_rate": 8.037115686124564e-05, "loss": 0.3713, "step": 6067 }, { "epoch": 1.9483063091989083, "grad_norm": 1.0878230333328247, "learning_rate": 8.033693053845801e-05, "loss": 0.5994, "step": 6068 }, { "epoch": 1.94862738802376, "grad_norm": 0.8553785681724548, "learning_rate": 8.030270661155574e-05, "loss": 0.2631, "step": 6069 }, { "epoch": 1.9489484668486114, "grad_norm": 0.5939182043075562, "learning_rate": 8.026848508470897e-05, "loss": 0.2166, "step": 6070 }, { "epoch": 1.949269545673463, "grad_norm": 0.5438021421432495, "learning_rate": 8.023426596208739e-05, "loss": 0.1312, "step": 6071 }, { "epoch": 1.9495906244983143, "grad_norm": 0.6619899272918701, "learning_rate": 8.020004924786059e-05, "loss": 0.2189, "step": 6072 }, { "epoch": 1.9499117033231659, "grad_norm": 0.8623759150505066, "learning_rate": 8.016583494619769e-05, "loss": 0.7344, "step": 6073 }, { "epoch": 1.9502327821480172, "grad_norm": 0.8690182566642761, "learning_rate": 8.013162306126765e-05, "loss": 0.6706, "step": 6074 }, { "epoch": 1.9505538609728688, "grad_norm": 0.8359203338623047, "learning_rate": 8.009741359723906e-05, "loss": 0.7241, "step": 6075 }, { "epoch": 1.9508749397977203, "grad_norm": 0.9023215770721436, "learning_rate": 8.00632065582803e-05, "loss": 0.662, "step": 6076 }, { "epoch": 1.9511960186225719, "grad_norm": 0.8437222242355347, "learning_rate": 8.002900194855932e-05, "loss": 0.4732, "step": 6077 }, { "epoch": 1.9515170974474234, "grad_norm": 1.18110990524292, "learning_rate": 7.999479977224384e-05, "loss": 0.7354, "step": 6078 }, { "epoch": 1.951838176272275, "grad_norm": 0.7391937971115112, "learning_rate": 7.996060003350139e-05, "loss": 0.5063, "step": 6079 }, { "epoch": 1.9521592550971263, "grad_norm": 0.937690794467926, "learning_rate": 7.992640273649898e-05, "loss": 0.5532, "step": 6080 }, { "epoch": 1.9524803339219778, "grad_norm": 0.7518649101257324, "learning_rate": 7.989220788540355e-05, "loss": 0.5798, "step": 6081 }, { "epoch": 1.9528014127468294, "grad_norm": 1.227014422416687, "learning_rate": 7.985801548438157e-05, "loss": 0.5997, "step": 6082 }, { "epoch": 1.9531224915716807, "grad_norm": 0.9930256009101868, "learning_rate": 7.982382553759931e-05, "loss": 0.7134, "step": 6083 }, { "epoch": 1.9534435703965323, "grad_norm": 0.8083244562149048, "learning_rate": 7.97896380492227e-05, "loss": 0.5235, "step": 6084 }, { "epoch": 1.9537646492213838, "grad_norm": 0.8794909119606018, "learning_rate": 7.975545302341743e-05, "loss": 0.6455, "step": 6085 }, { "epoch": 1.9540857280462354, "grad_norm": 0.7878838181495667, "learning_rate": 7.972127046434878e-05, "loss": 0.5327, "step": 6086 }, { "epoch": 1.954406806871087, "grad_norm": 0.861558198928833, "learning_rate": 7.96870903761818e-05, "loss": 0.4608, "step": 6087 }, { "epoch": 1.9547278856959385, "grad_norm": 0.7832056283950806, "learning_rate": 7.965291276308124e-05, "loss": 0.5346, "step": 6088 }, { "epoch": 1.9550489645207898, "grad_norm": 0.9764854907989502, "learning_rate": 7.961873762921153e-05, "loss": 0.5555, "step": 6089 }, { "epoch": 1.9553700433456414, "grad_norm": 1.0838629007339478, "learning_rate": 7.958456497873685e-05, "loss": 0.6255, "step": 6090 }, { "epoch": 1.955691122170493, "grad_norm": 1.2348774671554565, "learning_rate": 7.955039481582097e-05, "loss": 0.4626, "step": 6091 }, { "epoch": 1.9560122009953442, "grad_norm": 0.8627070784568787, "learning_rate": 7.951622714462746e-05, "loss": 0.5652, "step": 6092 }, { "epoch": 1.9563332798201958, "grad_norm": 1.0902684926986694, "learning_rate": 7.948206196931954e-05, "loss": 0.3824, "step": 6093 }, { "epoch": 1.9566543586450473, "grad_norm": 0.9122470617294312, "learning_rate": 7.944789929406016e-05, "loss": 0.4996, "step": 6094 }, { "epoch": 1.9569754374698989, "grad_norm": 0.9444713592529297, "learning_rate": 7.941373912301189e-05, "loss": 0.782, "step": 6095 }, { "epoch": 1.9572965162947504, "grad_norm": 0.7925707697868347, "learning_rate": 7.937958146033705e-05, "loss": 0.5442, "step": 6096 }, { "epoch": 1.957617595119602, "grad_norm": 0.7286838889122009, "learning_rate": 7.934542631019768e-05, "loss": 0.4491, "step": 6097 }, { "epoch": 1.9579386739444533, "grad_norm": 0.904128909111023, "learning_rate": 7.931127367675543e-05, "loss": 0.6565, "step": 6098 }, { "epoch": 1.9582597527693049, "grad_norm": 0.8943635821342468, "learning_rate": 7.927712356417176e-05, "loss": 0.5327, "step": 6099 }, { "epoch": 1.9585808315941564, "grad_norm": 0.8871841430664062, "learning_rate": 7.92429759766077e-05, "loss": 0.5109, "step": 6100 }, { "epoch": 1.9589019104190077, "grad_norm": 0.6855352520942688, "learning_rate": 7.920883091822408e-05, "loss": 0.4418, "step": 6101 }, { "epoch": 1.9592229892438593, "grad_norm": 0.8009575009346008, "learning_rate": 7.917468839318132e-05, "loss": 0.4312, "step": 6102 }, { "epoch": 1.9595440680687108, "grad_norm": 0.8051626682281494, "learning_rate": 7.914054840563963e-05, "loss": 0.4664, "step": 6103 }, { "epoch": 1.9598651468935624, "grad_norm": 0.9816034436225891, "learning_rate": 7.910641095975886e-05, "loss": 0.5121, "step": 6104 }, { "epoch": 1.960186225718414, "grad_norm": 1.6955353021621704, "learning_rate": 7.907227605969849e-05, "loss": 0.6544, "step": 6105 }, { "epoch": 1.9605073045432655, "grad_norm": 0.8258131146430969, "learning_rate": 7.903814370961784e-05, "loss": 0.4505, "step": 6106 }, { "epoch": 1.9608283833681168, "grad_norm": 1.0741841793060303, "learning_rate": 7.900401391367576e-05, "loss": 0.636, "step": 6107 }, { "epoch": 1.9611494621929684, "grad_norm": 0.7029876112937927, "learning_rate": 7.896988667603093e-05, "loss": 0.4387, "step": 6108 }, { "epoch": 1.96147054101782, "grad_norm": 0.8194742798805237, "learning_rate": 7.893576200084159e-05, "loss": 0.5288, "step": 6109 }, { "epoch": 1.9617916198426713, "grad_norm": 0.9146851301193237, "learning_rate": 7.89016398922658e-05, "loss": 0.504, "step": 6110 }, { "epoch": 1.9621126986675228, "grad_norm": 0.760689914226532, "learning_rate": 7.886752035446114e-05, "loss": 0.4793, "step": 6111 }, { "epoch": 1.9624337774923744, "grad_norm": 0.5260710120201111, "learning_rate": 7.883340339158505e-05, "loss": 0.3192, "step": 6112 }, { "epoch": 1.962754856317226, "grad_norm": 0.7600740194320679, "learning_rate": 7.879928900779456e-05, "loss": 0.3748, "step": 6113 }, { "epoch": 1.9630759351420775, "grad_norm": 0.8149203658103943, "learning_rate": 7.876517720724636e-05, "loss": 0.5003, "step": 6114 }, { "epoch": 1.963397013966929, "grad_norm": 0.630042314529419, "learning_rate": 7.873106799409695e-05, "loss": 0.6224, "step": 6115 }, { "epoch": 1.9637180927917803, "grad_norm": 0.6109962463378906, "learning_rate": 7.869696137250235e-05, "loss": 0.6135, "step": 6116 }, { "epoch": 1.9640391716166319, "grad_norm": 0.4882936179637909, "learning_rate": 7.866285734661841e-05, "loss": 0.3282, "step": 6117 }, { "epoch": 1.9643602504414834, "grad_norm": 0.5604743957519531, "learning_rate": 7.862875592060056e-05, "loss": 0.2431, "step": 6118 }, { "epoch": 1.9646813292663348, "grad_norm": 0.4672786593437195, "learning_rate": 7.8594657098604e-05, "loss": 0.183, "step": 6119 }, { "epoch": 1.9650024080911863, "grad_norm": 0.4110962450504303, "learning_rate": 7.856056088478352e-05, "loss": 0.1327, "step": 6120 }, { "epoch": 1.9653234869160379, "grad_norm": 0.5428380370140076, "learning_rate": 7.852646728329368e-05, "loss": 0.2487, "step": 6121 }, { "epoch": 1.9656445657408894, "grad_norm": 0.7151157855987549, "learning_rate": 7.849237629828869e-05, "loss": 0.2885, "step": 6122 }, { "epoch": 1.965965644565741, "grad_norm": 0.8936823606491089, "learning_rate": 7.845828793392236e-05, "loss": 0.7795, "step": 6123 }, { "epoch": 1.9662867233905925, "grad_norm": 0.8891395330429077, "learning_rate": 7.842420219434833e-05, "loss": 0.4893, "step": 6124 }, { "epoch": 1.9666078022154438, "grad_norm": 0.8358086943626404, "learning_rate": 7.83901190837198e-05, "loss": 0.7111, "step": 6125 }, { "epoch": 1.9669288810402954, "grad_norm": 0.7215904593467712, "learning_rate": 7.835603860618972e-05, "loss": 0.402, "step": 6126 }, { "epoch": 1.967249959865147, "grad_norm": 0.7132273316383362, "learning_rate": 7.832196076591067e-05, "loss": 0.4719, "step": 6127 }, { "epoch": 1.9675710386899983, "grad_norm": 0.9818544387817383, "learning_rate": 7.828788556703498e-05, "loss": 0.8142, "step": 6128 }, { "epoch": 1.9678921175148498, "grad_norm": 1.1569818258285522, "learning_rate": 7.825381301371452e-05, "loss": 0.5118, "step": 6129 }, { "epoch": 1.9682131963397014, "grad_norm": 0.9389197826385498, "learning_rate": 7.821974311010102e-05, "loss": 0.6396, "step": 6130 }, { "epoch": 1.968534275164553, "grad_norm": 1.039969563484192, "learning_rate": 7.818567586034577e-05, "loss": 0.7603, "step": 6131 }, { "epoch": 1.9688553539894045, "grad_norm": 1.0185346603393555, "learning_rate": 7.81516112685997e-05, "loss": 0.6472, "step": 6132 }, { "epoch": 1.969176432814256, "grad_norm": 0.9028285145759583, "learning_rate": 7.811754933901358e-05, "loss": 0.7387, "step": 6133 }, { "epoch": 1.9694975116391074, "grad_norm": 1.1110644340515137, "learning_rate": 7.808349007573763e-05, "loss": 0.5489, "step": 6134 }, { "epoch": 1.969818590463959, "grad_norm": 1.0398848056793213, "learning_rate": 7.804943348292197e-05, "loss": 0.7001, "step": 6135 }, { "epoch": 1.9701396692888105, "grad_norm": 0.7271793484687805, "learning_rate": 7.801537956471624e-05, "loss": 0.5753, "step": 6136 }, { "epoch": 1.9704607481136618, "grad_norm": 1.480493426322937, "learning_rate": 7.798132832526986e-05, "loss": 0.657, "step": 6137 }, { "epoch": 1.9707818269385133, "grad_norm": 0.8976219892501831, "learning_rate": 7.79472797687318e-05, "loss": 0.5007, "step": 6138 }, { "epoch": 1.9711029057633649, "grad_norm": 0.8589239716529846, "learning_rate": 7.791323389925084e-05, "loss": 0.603, "step": 6139 }, { "epoch": 1.9714239845882164, "grad_norm": 0.8826812505722046, "learning_rate": 7.787919072097531e-05, "loss": 0.6224, "step": 6140 }, { "epoch": 1.971745063413068, "grad_norm": 0.7189533114433289, "learning_rate": 7.784515023805328e-05, "loss": 0.4416, "step": 6141 }, { "epoch": 1.9720661422379195, "grad_norm": 1.1884037256240845, "learning_rate": 7.781111245463252e-05, "loss": 0.8795, "step": 6142 }, { "epoch": 1.9723872210627709, "grad_norm": 1.107159972190857, "learning_rate": 7.777707737486037e-05, "loss": 0.6781, "step": 6143 }, { "epoch": 1.9727082998876224, "grad_norm": 0.7705017924308777, "learning_rate": 7.774304500288394e-05, "loss": 0.5458, "step": 6144 }, { "epoch": 1.9730293787124737, "grad_norm": 0.8601257801055908, "learning_rate": 7.770901534284995e-05, "loss": 0.4366, "step": 6145 }, { "epoch": 1.9733504575373253, "grad_norm": 0.6789402961730957, "learning_rate": 7.767498839890488e-05, "loss": 0.4362, "step": 6146 }, { "epoch": 1.9736715363621768, "grad_norm": 0.7042021155357361, "learning_rate": 7.76409641751947e-05, "loss": 0.4543, "step": 6147 }, { "epoch": 1.9739926151870284, "grad_norm": 0.9554926753044128, "learning_rate": 7.760694267586525e-05, "loss": 0.7007, "step": 6148 }, { "epoch": 1.97431369401188, "grad_norm": 0.9321420192718506, "learning_rate": 7.75729239050619e-05, "loss": 0.6791, "step": 6149 }, { "epoch": 1.9746347728367315, "grad_norm": 0.8884396553039551, "learning_rate": 7.753890786692972e-05, "loss": 0.4627, "step": 6150 }, { "epoch": 1.974955851661583, "grad_norm": 0.805990993976593, "learning_rate": 7.750489456561352e-05, "loss": 0.4353, "step": 6151 }, { "epoch": 1.9752769304864344, "grad_norm": 0.9722745418548584, "learning_rate": 7.747088400525766e-05, "loss": 0.5465, "step": 6152 }, { "epoch": 1.975598009311286, "grad_norm": 0.662632405757904, "learning_rate": 7.743687619000626e-05, "loss": 0.4702, "step": 6153 }, { "epoch": 1.9759190881361373, "grad_norm": 0.8062264919281006, "learning_rate": 7.740287112400303e-05, "loss": 0.5142, "step": 6154 }, { "epoch": 1.9762401669609888, "grad_norm": 0.8860646486282349, "learning_rate": 7.736886881139142e-05, "loss": 0.4718, "step": 6155 }, { "epoch": 1.9765612457858404, "grad_norm": 0.928923487663269, "learning_rate": 7.733486925631447e-05, "loss": 0.5866, "step": 6156 }, { "epoch": 1.976882324610692, "grad_norm": 0.9381113648414612, "learning_rate": 7.730087246291502e-05, "loss": 0.4805, "step": 6157 }, { "epoch": 1.9772034034355435, "grad_norm": 0.8858740329742432, "learning_rate": 7.726687843533538e-05, "loss": 0.5572, "step": 6158 }, { "epoch": 1.977524482260395, "grad_norm": 0.9326648712158203, "learning_rate": 7.723288717771761e-05, "loss": 0.5227, "step": 6159 }, { "epoch": 1.9778455610852466, "grad_norm": 0.9480475187301636, "learning_rate": 7.719889869420353e-05, "loss": 0.4524, "step": 6160 }, { "epoch": 1.978166639910098, "grad_norm": 0.7123903632164001, "learning_rate": 7.716491298893442e-05, "loss": 0.4944, "step": 6161 }, { "epoch": 1.9784877187349494, "grad_norm": 0.5559878349304199, "learning_rate": 7.713093006605145e-05, "loss": 0.3082, "step": 6162 }, { "epoch": 1.9788087975598008, "grad_norm": 0.6076563000679016, "learning_rate": 7.709694992969526e-05, "loss": 0.4055, "step": 6163 }, { "epoch": 1.9791298763846523, "grad_norm": 0.5390298366546631, "learning_rate": 7.706297258400624e-05, "loss": 0.3068, "step": 6164 }, { "epoch": 1.9794509552095039, "grad_norm": 0.533420205116272, "learning_rate": 7.702899803312443e-05, "loss": 0.5381, "step": 6165 }, { "epoch": 1.9797720340343554, "grad_norm": 0.5672399997711182, "learning_rate": 7.699502628118958e-05, "loss": 0.9303, "step": 6166 }, { "epoch": 1.980093112859207, "grad_norm": 0.7802035212516785, "learning_rate": 7.696105733234098e-05, "loss": 0.5104, "step": 6167 }, { "epoch": 1.9804141916840585, "grad_norm": 0.6914035081863403, "learning_rate": 7.692709119071762e-05, "loss": 0.5653, "step": 6168 }, { "epoch": 1.98073527050891, "grad_norm": 0.6555715203285217, "learning_rate": 7.689312786045823e-05, "loss": 0.3267, "step": 6169 }, { "epoch": 1.9810563493337614, "grad_norm": 0.8108289241790771, "learning_rate": 7.685916734570112e-05, "loss": 0.2342, "step": 6170 }, { "epoch": 1.981377428158613, "grad_norm": 0.6541457772254944, "learning_rate": 7.682520965058428e-05, "loss": 0.4736, "step": 6171 }, { "epoch": 1.9816985069834643, "grad_norm": 0.9171504378318787, "learning_rate": 7.679125477924534e-05, "loss": 0.6021, "step": 6172 }, { "epoch": 1.9820195858083158, "grad_norm": 0.798708975315094, "learning_rate": 7.67573027358216e-05, "loss": 0.537, "step": 6173 }, { "epoch": 1.9823406646331674, "grad_norm": 0.8044041991233826, "learning_rate": 7.672335352445002e-05, "loss": 0.5214, "step": 6174 }, { "epoch": 1.982661743458019, "grad_norm": 0.9100499153137207, "learning_rate": 7.668940714926725e-05, "loss": 0.6087, "step": 6175 }, { "epoch": 1.9829828222828705, "grad_norm": 0.7136359810829163, "learning_rate": 7.66554636144095e-05, "loss": 0.4973, "step": 6176 }, { "epoch": 1.983303901107722, "grad_norm": 0.6627728343009949, "learning_rate": 7.662152292401264e-05, "loss": 0.4051, "step": 6177 }, { "epoch": 1.9836249799325736, "grad_norm": 0.6324448585510254, "learning_rate": 7.658758508221234e-05, "loss": 0.4427, "step": 6178 }, { "epoch": 1.983946058757425, "grad_norm": 0.8954194188117981, "learning_rate": 7.655365009314374e-05, "loss": 0.6454, "step": 6179 }, { "epoch": 1.9842671375822765, "grad_norm": 0.952394425868988, "learning_rate": 7.651971796094183e-05, "loss": 0.5537, "step": 6180 }, { "epoch": 1.9845882164071278, "grad_norm": 0.9424409866333008, "learning_rate": 7.6485788689741e-05, "loss": 0.5597, "step": 6181 }, { "epoch": 1.9849092952319793, "grad_norm": 1.0070812702178955, "learning_rate": 7.645186228367554e-05, "loss": 0.7455, "step": 6182 }, { "epoch": 1.985230374056831, "grad_norm": 0.8608502149581909, "learning_rate": 7.641793874687918e-05, "loss": 0.678, "step": 6183 }, { "epoch": 1.9855514528816824, "grad_norm": 1.0382908582687378, "learning_rate": 7.638401808348548e-05, "loss": 0.6999, "step": 6184 }, { "epoch": 1.985872531706534, "grad_norm": 1.0702378749847412, "learning_rate": 7.635010029762756e-05, "loss": 0.8023, "step": 6185 }, { "epoch": 1.9861936105313855, "grad_norm": 0.9120118618011475, "learning_rate": 7.631618539343814e-05, "loss": 0.6575, "step": 6186 }, { "epoch": 1.986514689356237, "grad_norm": 0.8130465149879456, "learning_rate": 7.628227337504972e-05, "loss": 0.5327, "step": 6187 }, { "epoch": 1.9868357681810884, "grad_norm": 1.0626096725463867, "learning_rate": 7.62483642465943e-05, "loss": 0.6589, "step": 6188 }, { "epoch": 1.98715684700594, "grad_norm": 1.0040377378463745, "learning_rate": 7.621445801220371e-05, "loss": 0.6529, "step": 6189 }, { "epoch": 1.9874779258307913, "grad_norm": 0.877011775970459, "learning_rate": 7.618055467600922e-05, "loss": 0.5563, "step": 6190 }, { "epoch": 1.9877990046556429, "grad_norm": 0.8776607513427734, "learning_rate": 7.614665424214193e-05, "loss": 0.6501, "step": 6191 }, { "epoch": 1.9881200834804944, "grad_norm": 1.0523321628570557, "learning_rate": 7.611275671473245e-05, "loss": 0.6605, "step": 6192 }, { "epoch": 1.988441162305346, "grad_norm": 0.7821586728096008, "learning_rate": 7.607886209791107e-05, "loss": 0.5578, "step": 6193 }, { "epoch": 1.9887622411301975, "grad_norm": 0.8066548109054565, "learning_rate": 7.604497039580785e-05, "loss": 0.5154, "step": 6194 }, { "epoch": 1.989083319955049, "grad_norm": 1.202035665512085, "learning_rate": 7.601108161255226e-05, "loss": 0.5195, "step": 6195 }, { "epoch": 1.9894043987799006, "grad_norm": 0.838572084903717, "learning_rate": 7.597719575227364e-05, "loss": 0.5041, "step": 6196 }, { "epoch": 1.989725477604752, "grad_norm": 0.9432242512702942, "learning_rate": 7.594331281910082e-05, "loss": 0.6335, "step": 6197 }, { "epoch": 1.9900465564296035, "grad_norm": 0.9394798278808594, "learning_rate": 7.590943281716241e-05, "loss": 0.4824, "step": 6198 }, { "epoch": 1.9903676352544548, "grad_norm": 0.6274194717407227, "learning_rate": 7.587555575058649e-05, "loss": 0.377, "step": 6199 }, { "epoch": 1.9906887140793064, "grad_norm": 1.0759146213531494, "learning_rate": 7.584168162350098e-05, "loss": 0.5105, "step": 6200 }, { "epoch": 1.991009792904158, "grad_norm": 0.6160650253295898, "learning_rate": 7.580781044003324e-05, "loss": 0.4023, "step": 6201 }, { "epoch": 1.9913308717290095, "grad_norm": 0.6983049511909485, "learning_rate": 7.577394220431042e-05, "loss": 0.3685, "step": 6202 }, { "epoch": 1.991651950553861, "grad_norm": 0.6337112784385681, "learning_rate": 7.574007692045928e-05, "loss": 0.41, "step": 6203 }, { "epoch": 1.9919730293787126, "grad_norm": 0.9221318960189819, "learning_rate": 7.570621459260615e-05, "loss": 0.5264, "step": 6204 }, { "epoch": 1.9922941082035641, "grad_norm": 0.8658796548843384, "learning_rate": 7.567235522487712e-05, "loss": 0.5192, "step": 6205 }, { "epoch": 1.9926151870284154, "grad_norm": 0.8173865079879761, "learning_rate": 7.563849882139776e-05, "loss": 0.464, "step": 6206 }, { "epoch": 1.992936265853267, "grad_norm": 0.7882019877433777, "learning_rate": 7.560464538629344e-05, "loss": 0.4191, "step": 6207 }, { "epoch": 1.9932573446781183, "grad_norm": 1.4528001546859741, "learning_rate": 7.557079492368909e-05, "loss": 0.3734, "step": 6208 }, { "epoch": 1.9935784235029699, "grad_norm": 1.0732624530792236, "learning_rate": 7.553694743770928e-05, "loss": 0.5081, "step": 6209 }, { "epoch": 1.9938995023278214, "grad_norm": 0.9734475612640381, "learning_rate": 7.550310293247823e-05, "loss": 0.4522, "step": 6210 }, { "epoch": 1.994220581152673, "grad_norm": 1.118553638458252, "learning_rate": 7.546926141211974e-05, "loss": 0.5817, "step": 6211 }, { "epoch": 1.9945416599775245, "grad_norm": 0.7407261729240417, "learning_rate": 7.543542288075739e-05, "loss": 0.3776, "step": 6212 }, { "epoch": 1.994862738802376, "grad_norm": 0.7426897287368774, "learning_rate": 7.54015873425142e-05, "loss": 0.3893, "step": 6213 }, { "epoch": 1.9951838176272276, "grad_norm": 0.4387853443622589, "learning_rate": 7.536775480151303e-05, "loss": 0.2971, "step": 6214 }, { "epoch": 1.995504896452079, "grad_norm": 0.4856942594051361, "learning_rate": 7.533392526187617e-05, "loss": 0.659, "step": 6215 }, { "epoch": 1.9958259752769305, "grad_norm": 0.49128448963165283, "learning_rate": 7.530009872772572e-05, "loss": 0.6117, "step": 6216 }, { "epoch": 1.9961470541017818, "grad_norm": 0.819917619228363, "learning_rate": 7.526627520318329e-05, "loss": 0.6772, "step": 6217 }, { "epoch": 1.9964681329266334, "grad_norm": 0.7481715083122253, "learning_rate": 7.523245469237026e-05, "loss": 0.4364, "step": 6218 }, { "epoch": 1.996789211751485, "grad_norm": 0.9296541810035706, "learning_rate": 7.519863719940748e-05, "loss": 0.5882, "step": 6219 }, { "epoch": 1.9971102905763365, "grad_norm": 0.8651626706123352, "learning_rate": 7.516482272841549e-05, "loss": 0.6071, "step": 6220 }, { "epoch": 1.997431369401188, "grad_norm": 0.8119394183158875, "learning_rate": 7.513101128351454e-05, "loss": 0.4912, "step": 6221 }, { "epoch": 1.9977524482260396, "grad_norm": 1.2624839544296265, "learning_rate": 7.50972028688244e-05, "loss": 0.5303, "step": 6222 }, { "epoch": 1.9980735270508911, "grad_norm": 1.1252477169036865, "learning_rate": 7.506339748846461e-05, "loss": 0.7267, "step": 6223 }, { "epoch": 1.9983946058757425, "grad_norm": 0.8501682877540588, "learning_rate": 7.502959514655414e-05, "loss": 0.6461, "step": 6224 }, { "epoch": 1.998715684700594, "grad_norm": 0.9397971034049988, "learning_rate": 7.499579584721179e-05, "loss": 0.491, "step": 6225 }, { "epoch": 1.9990367635254453, "grad_norm": 0.8632334470748901, "learning_rate": 7.496199959455584e-05, "loss": 0.5389, "step": 6226 }, { "epoch": 1.999357842350297, "grad_norm": 1.034378170967102, "learning_rate": 7.492820639270434e-05, "loss": 0.4877, "step": 6227 }, { "epoch": 1.9996789211751484, "grad_norm": 0.5917540192604065, "learning_rate": 7.489441624577485e-05, "loss": 0.3548, "step": 6228 }, { "epoch": 2.0, "grad_norm": 1.1308932304382324, "learning_rate": 7.486062915788452e-05, "loss": 0.6238, "step": 6229 }, { "epoch": 2.0003210788248516, "grad_norm": 0.3727356791496277, "learning_rate": 7.48268451331503e-05, "loss": 0.5456, "step": 6230 }, { "epoch": 2.000642157649703, "grad_norm": 0.40441709756851196, "learning_rate": 7.479306417568864e-05, "loss": 0.1526, "step": 6231 }, { "epoch": 2.0009632364745547, "grad_norm": 0.27756381034851074, "learning_rate": 7.475928628961566e-05, "loss": 0.0981, "step": 6232 }, { "epoch": 2.001284315299406, "grad_norm": 0.480465292930603, "learning_rate": 7.472551147904708e-05, "loss": 0.126, "step": 6233 }, { "epoch": 2.0016053941242573, "grad_norm": 0.26273319125175476, "learning_rate": 7.469173974809826e-05, "loss": 0.0916, "step": 6234 }, { "epoch": 2.001926472949109, "grad_norm": 0.4176071286201477, "learning_rate": 7.465797110088417e-05, "loss": 0.2166, "step": 6235 }, { "epoch": 2.0022475517739604, "grad_norm": 0.5277458429336548, "learning_rate": 7.462420554151944e-05, "loss": 0.3709, "step": 6236 }, { "epoch": 2.002568630598812, "grad_norm": 0.7284386157989502, "learning_rate": 7.459044307411832e-05, "loss": 0.4687, "step": 6237 }, { "epoch": 2.0028897094236635, "grad_norm": 0.6903724074363708, "learning_rate": 7.45566837027946e-05, "loss": 0.4989, "step": 6238 }, { "epoch": 2.003210788248515, "grad_norm": 0.7660374641418457, "learning_rate": 7.45229274316618e-05, "loss": 0.4508, "step": 6239 }, { "epoch": 2.0035318670733666, "grad_norm": 0.5654639005661011, "learning_rate": 7.448917426483299e-05, "loss": 0.39, "step": 6240 }, { "epoch": 2.003852945898218, "grad_norm": 0.6629282236099243, "learning_rate": 7.445542420642097e-05, "loss": 0.3377, "step": 6241 }, { "epoch": 2.0041740247230697, "grad_norm": 0.7881885170936584, "learning_rate": 7.442167726053797e-05, "loss": 0.3575, "step": 6242 }, { "epoch": 2.004495103547921, "grad_norm": 0.6721541881561279, "learning_rate": 7.438793343129605e-05, "loss": 0.3635, "step": 6243 }, { "epoch": 2.0048161823727724, "grad_norm": 0.8870039582252502, "learning_rate": 7.435419272280672e-05, "loss": 0.4121, "step": 6244 }, { "epoch": 2.005137261197624, "grad_norm": 0.9872992038726807, "learning_rate": 7.432045513918122e-05, "loss": 0.4399, "step": 6245 }, { "epoch": 2.0054583400224755, "grad_norm": 0.9258590340614319, "learning_rate": 7.42867206845304e-05, "loss": 0.3429, "step": 6246 }, { "epoch": 2.005779418847327, "grad_norm": 0.7288257479667664, "learning_rate": 7.425298936296463e-05, "loss": 0.3793, "step": 6247 }, { "epoch": 2.0061004976721786, "grad_norm": 0.6890774965286255, "learning_rate": 7.421926117859403e-05, "loss": 0.3466, "step": 6248 }, { "epoch": 2.00642157649703, "grad_norm": 0.7121484875679016, "learning_rate": 7.418553613552825e-05, "loss": 0.3155, "step": 6249 }, { "epoch": 2.0067426553218817, "grad_norm": 0.5670762658119202, "learning_rate": 7.415181423787659e-05, "loss": 0.2762, "step": 6250 }, { "epoch": 2.0070637341467332, "grad_norm": 0.684097945690155, "learning_rate": 7.411809548974792e-05, "loss": 0.3319, "step": 6251 }, { "epoch": 2.0073848129715843, "grad_norm": 0.8124287724494934, "learning_rate": 7.408437989525085e-05, "loss": 0.3208, "step": 6252 }, { "epoch": 2.007705891796436, "grad_norm": 0.9157565236091614, "learning_rate": 7.405066745849346e-05, "loss": 0.4214, "step": 6253 }, { "epoch": 2.0080269706212874, "grad_norm": 0.7512825131416321, "learning_rate": 7.401695818358353e-05, "loss": 0.2616, "step": 6254 }, { "epoch": 2.008348049446139, "grad_norm": 0.834835410118103, "learning_rate": 7.398325207462846e-05, "loss": 0.3358, "step": 6255 }, { "epoch": 2.0086691282709905, "grad_norm": 1.0159118175506592, "learning_rate": 7.394954913573517e-05, "loss": 0.4667, "step": 6256 }, { "epoch": 2.008990207095842, "grad_norm": 0.9639686942100525, "learning_rate": 7.391584937101033e-05, "loss": 0.2684, "step": 6257 }, { "epoch": 2.0093112859206936, "grad_norm": 0.9616678357124329, "learning_rate": 7.38821527845601e-05, "loss": 0.4674, "step": 6258 }, { "epoch": 2.009632364745545, "grad_norm": 1.6692441701889038, "learning_rate": 7.384845938049031e-05, "loss": 0.4352, "step": 6259 }, { "epoch": 2.0099534435703967, "grad_norm": 0.8217843770980835, "learning_rate": 7.381476916290644e-05, "loss": 0.3344, "step": 6260 }, { "epoch": 2.010274522395248, "grad_norm": 1.3268312215805054, "learning_rate": 7.378108213591355e-05, "loss": 0.5378, "step": 6261 }, { "epoch": 2.0105956012200994, "grad_norm": 0.9014345407485962, "learning_rate": 7.374739830361621e-05, "loss": 0.3766, "step": 6262 }, { "epoch": 2.010916680044951, "grad_norm": 0.8533119559288025, "learning_rate": 7.37137176701188e-05, "loss": 0.2654, "step": 6263 }, { "epoch": 2.0112377588698025, "grad_norm": 0.9980586171150208, "learning_rate": 7.368004023952517e-05, "loss": 0.3431, "step": 6264 }, { "epoch": 2.011558837694654, "grad_norm": 0.7079142332077026, "learning_rate": 7.364636601593875e-05, "loss": 0.2977, "step": 6265 }, { "epoch": 2.0118799165195056, "grad_norm": 0.8518555164337158, "learning_rate": 7.361269500346274e-05, "loss": 0.3331, "step": 6266 }, { "epoch": 2.012200995344357, "grad_norm": 0.9751218557357788, "learning_rate": 7.357902720619976e-05, "loss": 0.324, "step": 6267 }, { "epoch": 2.0125220741692087, "grad_norm": 1.0797030925750732, "learning_rate": 7.354536262825219e-05, "loss": 0.399, "step": 6268 }, { "epoch": 2.0128431529940602, "grad_norm": 1.0779436826705933, "learning_rate": 7.351170127372191e-05, "loss": 0.3863, "step": 6269 }, { "epoch": 2.0131642318189114, "grad_norm": 0.9085304737091064, "learning_rate": 7.347804314671055e-05, "loss": 0.3424, "step": 6270 }, { "epoch": 2.013485310643763, "grad_norm": 0.8026139140129089, "learning_rate": 7.344438825131911e-05, "loss": 0.3144, "step": 6271 }, { "epoch": 2.0138063894686145, "grad_norm": 0.9833243489265442, "learning_rate": 7.341073659164848e-05, "loss": 0.3074, "step": 6272 }, { "epoch": 2.014127468293466, "grad_norm": 1.829190969467163, "learning_rate": 7.33770881717989e-05, "loss": 0.3371, "step": 6273 }, { "epoch": 2.0144485471183176, "grad_norm": 0.8061204552650452, "learning_rate": 7.334344299587035e-05, "loss": 0.3001, "step": 6274 }, { "epoch": 2.014769625943169, "grad_norm": 0.5113372206687927, "learning_rate": 7.330980106796246e-05, "loss": 0.2632, "step": 6275 }, { "epoch": 2.0150907047680207, "grad_norm": 0.5458338260650635, "learning_rate": 7.327616239217431e-05, "loss": 0.279, "step": 6276 }, { "epoch": 2.015411783592872, "grad_norm": 0.7398447394371033, "learning_rate": 7.324252697260474e-05, "loss": 0.2997, "step": 6277 }, { "epoch": 2.0157328624177238, "grad_norm": 0.7369372248649597, "learning_rate": 7.320889481335207e-05, "loss": 0.2572, "step": 6278 }, { "epoch": 2.016053941242575, "grad_norm": 0.4629496932029724, "learning_rate": 7.317526591851433e-05, "loss": 0.2779, "step": 6279 }, { "epoch": 2.0163750200674264, "grad_norm": 0.5169734954833984, "learning_rate": 7.314164029218904e-05, "loss": 0.3046, "step": 6280 }, { "epoch": 2.016696098892278, "grad_norm": 0.6138988733291626, "learning_rate": 7.310801793847344e-05, "loss": 0.6675, "step": 6281 }, { "epoch": 2.0170171777171295, "grad_norm": 0.6308935284614563, "learning_rate": 7.307439886146428e-05, "loss": 0.1614, "step": 6282 }, { "epoch": 2.017338256541981, "grad_norm": 0.7147278785705566, "learning_rate": 7.30407830652579e-05, "loss": 0.2836, "step": 6283 }, { "epoch": 2.0176593353668326, "grad_norm": 0.5563004612922668, "learning_rate": 7.300717055395039e-05, "loss": 0.1, "step": 6284 }, { "epoch": 2.017980414191684, "grad_norm": 0.6495770812034607, "learning_rate": 7.297356133163721e-05, "loss": 0.1254, "step": 6285 }, { "epoch": 2.0183014930165357, "grad_norm": 0.6263350248336792, "learning_rate": 7.293995540241366e-05, "loss": 0.2167, "step": 6286 }, { "epoch": 2.0186225718413873, "grad_norm": 0.7048402428627014, "learning_rate": 7.290635277037442e-05, "loss": 0.3179, "step": 6287 }, { "epoch": 2.0189436506662384, "grad_norm": 0.934867799282074, "learning_rate": 7.287275343961392e-05, "loss": 0.4398, "step": 6288 }, { "epoch": 2.01926472949109, "grad_norm": 0.650744616985321, "learning_rate": 7.283915741422612e-05, "loss": 0.4162, "step": 6289 }, { "epoch": 2.0195858083159415, "grad_norm": 0.9540171027183533, "learning_rate": 7.280556469830464e-05, "loss": 0.4748, "step": 6290 }, { "epoch": 2.019906887140793, "grad_norm": 0.6486244201660156, "learning_rate": 7.277197529594257e-05, "loss": 0.3104, "step": 6291 }, { "epoch": 2.0202279659656446, "grad_norm": 0.7935119867324829, "learning_rate": 7.273838921123272e-05, "loss": 0.3595, "step": 6292 }, { "epoch": 2.020549044790496, "grad_norm": 0.6961763501167297, "learning_rate": 7.270480644826749e-05, "loss": 0.2754, "step": 6293 }, { "epoch": 2.0208701236153477, "grad_norm": 0.8212137818336487, "learning_rate": 7.267122701113876e-05, "loss": 0.3855, "step": 6294 }, { "epoch": 2.0211912024401992, "grad_norm": 0.6237925887107849, "learning_rate": 7.263765090393817e-05, "loss": 0.2879, "step": 6295 }, { "epoch": 2.021512281265051, "grad_norm": 0.5799258947372437, "learning_rate": 7.260407813075676e-05, "loss": 0.29, "step": 6296 }, { "epoch": 2.021833360089902, "grad_norm": 0.8160949349403381, "learning_rate": 7.257050869568535e-05, "loss": 0.4371, "step": 6297 }, { "epoch": 2.0221544389147534, "grad_norm": 0.9621134400367737, "learning_rate": 7.253694260281425e-05, "loss": 0.492, "step": 6298 }, { "epoch": 2.022475517739605, "grad_norm": 0.7058377861976624, "learning_rate": 7.250337985623342e-05, "loss": 0.299, "step": 6299 }, { "epoch": 2.0227965965644565, "grad_norm": 0.7675186991691589, "learning_rate": 7.246982046003234e-05, "loss": 0.2944, "step": 6300 }, { "epoch": 2.023117675389308, "grad_norm": 0.910317063331604, "learning_rate": 7.243626441830009e-05, "loss": 0.3878, "step": 6301 }, { "epoch": 2.0234387542141596, "grad_norm": 0.9493252038955688, "learning_rate": 7.240271173512546e-05, "loss": 0.3264, "step": 6302 }, { "epoch": 2.023759833039011, "grad_norm": 1.0064270496368408, "learning_rate": 7.236916241459663e-05, "loss": 0.3125, "step": 6303 }, { "epoch": 2.0240809118638627, "grad_norm": 0.7101002335548401, "learning_rate": 7.233561646080161e-05, "loss": 0.2849, "step": 6304 }, { "epoch": 2.0244019906887143, "grad_norm": 0.728187084197998, "learning_rate": 7.230207387782776e-05, "loss": 0.2976, "step": 6305 }, { "epoch": 2.0247230695135654, "grad_norm": 0.8924553394317627, "learning_rate": 7.226853466976222e-05, "loss": 0.4461, "step": 6306 }, { "epoch": 2.025044148338417, "grad_norm": 0.7692164182662964, "learning_rate": 7.22349988406916e-05, "loss": 0.3043, "step": 6307 }, { "epoch": 2.0253652271632685, "grad_norm": 0.7273018956184387, "learning_rate": 7.220146639470218e-05, "loss": 0.321, "step": 6308 }, { "epoch": 2.02568630598812, "grad_norm": 0.9611377716064453, "learning_rate": 7.216793733587976e-05, "loss": 0.2974, "step": 6309 }, { "epoch": 2.0260073848129716, "grad_norm": 0.8985442519187927, "learning_rate": 7.21344116683097e-05, "loss": 0.3249, "step": 6310 }, { "epoch": 2.026328463637823, "grad_norm": 0.7551534175872803, "learning_rate": 7.210088939607708e-05, "loss": 0.3713, "step": 6311 }, { "epoch": 2.0266495424626747, "grad_norm": 1.1302769184112549, "learning_rate": 7.206737052326645e-05, "loss": 0.3553, "step": 6312 }, { "epoch": 2.0269706212875263, "grad_norm": 0.7023568153381348, "learning_rate": 7.203385505396203e-05, "loss": 0.345, "step": 6313 }, { "epoch": 2.027291700112378, "grad_norm": 1.19681715965271, "learning_rate": 7.20003429922475e-05, "loss": 0.4365, "step": 6314 }, { "epoch": 2.027612778937229, "grad_norm": 1.0040020942687988, "learning_rate": 7.196683434220625e-05, "loss": 0.3879, "step": 6315 }, { "epoch": 2.0279338577620805, "grad_norm": 0.9291061758995056, "learning_rate": 7.193332910792124e-05, "loss": 0.4166, "step": 6316 }, { "epoch": 2.028254936586932, "grad_norm": 0.5969425439834595, "learning_rate": 7.18998272934749e-05, "loss": 0.2834, "step": 6317 }, { "epoch": 2.0285760154117836, "grad_norm": 0.7998864650726318, "learning_rate": 7.186632890294941e-05, "loss": 0.3245, "step": 6318 }, { "epoch": 2.028897094236635, "grad_norm": 0.6707684993743896, "learning_rate": 7.183283394042634e-05, "loss": 0.2533, "step": 6319 }, { "epoch": 2.0292181730614867, "grad_norm": 0.6057593822479248, "learning_rate": 7.179934240998706e-05, "loss": 0.2811, "step": 6320 }, { "epoch": 2.029539251886338, "grad_norm": 0.9882358908653259, "learning_rate": 7.176585431571235e-05, "loss": 0.4067, "step": 6321 }, { "epoch": 2.0298603307111898, "grad_norm": 0.688306450843811, "learning_rate": 7.173236966168268e-05, "loss": 0.2973, "step": 6322 }, { "epoch": 2.0301814095360413, "grad_norm": 0.886023223400116, "learning_rate": 7.169888845197798e-05, "loss": 0.3354, "step": 6323 }, { "epoch": 2.0305024883608924, "grad_norm": 0.6586227416992188, "learning_rate": 7.166541069067792e-05, "loss": 0.2762, "step": 6324 }, { "epoch": 2.030823567185744, "grad_norm": 1.0189096927642822, "learning_rate": 7.163193638186158e-05, "loss": 0.2873, "step": 6325 }, { "epoch": 2.0311446460105955, "grad_norm": 0.48503080010414124, "learning_rate": 7.159846552960774e-05, "loss": 0.2642, "step": 6326 }, { "epoch": 2.031465724835447, "grad_norm": 0.4218836724758148, "learning_rate": 7.156499813799476e-05, "loss": 0.247, "step": 6327 }, { "epoch": 2.0317868036602986, "grad_norm": 0.5482301712036133, "learning_rate": 7.153153421110048e-05, "loss": 0.268, "step": 6328 }, { "epoch": 2.03210788248515, "grad_norm": 0.47911396622657776, "learning_rate": 7.149807375300239e-05, "loss": 0.2903, "step": 6329 }, { "epoch": 2.0324289613100017, "grad_norm": 0.5212134718894958, "learning_rate": 7.146461676777756e-05, "loss": 0.8643, "step": 6330 }, { "epoch": 2.0327500401348533, "grad_norm": 0.43788644671440125, "learning_rate": 7.143116325950265e-05, "loss": 0.3721, "step": 6331 }, { "epoch": 2.033071118959705, "grad_norm": 0.5127784609794617, "learning_rate": 7.139771323225381e-05, "loss": 0.2185, "step": 6332 }, { "epoch": 2.033392197784556, "grad_norm": 0.5072081685066223, "learning_rate": 7.136426669010689e-05, "loss": 0.2602, "step": 6333 }, { "epoch": 2.0337132766094075, "grad_norm": 0.7099177241325378, "learning_rate": 7.13308236371372e-05, "loss": 0.413, "step": 6334 }, { "epoch": 2.034034355434259, "grad_norm": 0.9787275791168213, "learning_rate": 7.129738407741964e-05, "loss": 0.5135, "step": 6335 }, { "epoch": 2.0343554342591106, "grad_norm": 0.8141676783561707, "learning_rate": 7.126394801502882e-05, "loss": 0.3424, "step": 6336 }, { "epoch": 2.034676513083962, "grad_norm": 0.7322252988815308, "learning_rate": 7.123051545403874e-05, "loss": 0.3168, "step": 6337 }, { "epoch": 2.0349975919088137, "grad_norm": 0.8565484285354614, "learning_rate": 7.119708639852312e-05, "loss": 0.2875, "step": 6338 }, { "epoch": 2.0353186707336652, "grad_norm": 0.731372594833374, "learning_rate": 7.11636608525551e-05, "loss": 0.3009, "step": 6339 }, { "epoch": 2.035639749558517, "grad_norm": 0.9566566348075867, "learning_rate": 7.113023882020757e-05, "loss": 0.3571, "step": 6340 }, { "epoch": 2.0359608283833683, "grad_norm": 1.051526665687561, "learning_rate": 7.109682030555283e-05, "loss": 0.3774, "step": 6341 }, { "epoch": 2.0362819072082194, "grad_norm": 0.8493756651878357, "learning_rate": 7.106340531266292e-05, "loss": 0.2954, "step": 6342 }, { "epoch": 2.036602986033071, "grad_norm": 1.491635799407959, "learning_rate": 7.102999384560927e-05, "loss": 0.4563, "step": 6343 }, { "epoch": 2.0369240648579225, "grad_norm": 0.7433634400367737, "learning_rate": 7.099658590846299e-05, "loss": 0.3044, "step": 6344 }, { "epoch": 2.037245143682774, "grad_norm": 0.8622017502784729, "learning_rate": 7.096318150529477e-05, "loss": 0.2676, "step": 6345 }, { "epoch": 2.0375662225076256, "grad_norm": 0.8949119448661804, "learning_rate": 7.092978064017475e-05, "loss": 0.2809, "step": 6346 }, { "epoch": 2.037887301332477, "grad_norm": 1.0402297973632812, "learning_rate": 7.089638331717284e-05, "loss": 0.4512, "step": 6347 }, { "epoch": 2.0382083801573287, "grad_norm": 0.9292569756507874, "learning_rate": 7.08629895403583e-05, "loss": 0.4013, "step": 6348 }, { "epoch": 2.0385294589821803, "grad_norm": 1.625335454940796, "learning_rate": 7.082959931380011e-05, "loss": 0.3156, "step": 6349 }, { "epoch": 2.038850537807032, "grad_norm": 0.6953380703926086, "learning_rate": 7.079621264156675e-05, "loss": 0.3309, "step": 6350 }, { "epoch": 2.039171616631883, "grad_norm": 0.7905119061470032, "learning_rate": 7.076282952772633e-05, "loss": 0.3588, "step": 6351 }, { "epoch": 2.0394926954567345, "grad_norm": 0.7801741361618042, "learning_rate": 7.072944997634646e-05, "loss": 0.2859, "step": 6352 }, { "epoch": 2.039813774281586, "grad_norm": 1.3757237195968628, "learning_rate": 7.069607399149428e-05, "loss": 0.4971, "step": 6353 }, { "epoch": 2.0401348531064376, "grad_norm": 0.847542405128479, "learning_rate": 7.06627015772366e-05, "loss": 0.3273, "step": 6354 }, { "epoch": 2.040455931931289, "grad_norm": 0.6177198886871338, "learning_rate": 7.062933273763975e-05, "loss": 0.2351, "step": 6355 }, { "epoch": 2.0407770107561407, "grad_norm": 0.9541710019111633, "learning_rate": 7.059596747676962e-05, "loss": 0.3357, "step": 6356 }, { "epoch": 2.0410980895809923, "grad_norm": 0.8325964212417603, "learning_rate": 7.056260579869165e-05, "loss": 0.2587, "step": 6357 }, { "epoch": 2.041419168405844, "grad_norm": 0.8677617907524109, "learning_rate": 7.052924770747087e-05, "loss": 0.2691, "step": 6358 }, { "epoch": 2.0417402472306954, "grad_norm": 0.9467921257019043, "learning_rate": 7.049589320717186e-05, "loss": 0.3171, "step": 6359 }, { "epoch": 2.0420613260555465, "grad_norm": 0.9890326261520386, "learning_rate": 7.04625423018588e-05, "loss": 0.3976, "step": 6360 }, { "epoch": 2.042382404880398, "grad_norm": 0.6989203095436096, "learning_rate": 7.042919499559537e-05, "loss": 0.2859, "step": 6361 }, { "epoch": 2.0427034837052496, "grad_norm": 0.8812395930290222, "learning_rate": 7.039585129244477e-05, "loss": 0.3466, "step": 6362 }, { "epoch": 2.043024562530101, "grad_norm": 0.8233697414398193, "learning_rate": 7.036251119646992e-05, "loss": 0.2523, "step": 6363 }, { "epoch": 2.0433456413549527, "grad_norm": 0.9117799401283264, "learning_rate": 7.032917471173318e-05, "loss": 0.3386, "step": 6364 }, { "epoch": 2.043666720179804, "grad_norm": 0.9386948347091675, "learning_rate": 7.029584184229653e-05, "loss": 0.2908, "step": 6365 }, { "epoch": 2.0439877990046558, "grad_norm": 0.7460491061210632, "learning_rate": 7.026251259222141e-05, "loss": 0.2804, "step": 6366 }, { "epoch": 2.0443088778295073, "grad_norm": 0.7433897256851196, "learning_rate": 7.022918696556896e-05, "loss": 0.2923, "step": 6367 }, { "epoch": 2.0446299566543584, "grad_norm": 0.6513417959213257, "learning_rate": 7.019586496639974e-05, "loss": 0.2907, "step": 6368 }, { "epoch": 2.04495103547921, "grad_norm": 0.9250189661979675, "learning_rate": 7.016254659877398e-05, "loss": 0.3394, "step": 6369 }, { "epoch": 2.0452721143040615, "grad_norm": 0.848927915096283, "learning_rate": 7.012923186675144e-05, "loss": 0.3345, "step": 6370 }, { "epoch": 2.045593193128913, "grad_norm": 0.718906044960022, "learning_rate": 7.009592077439134e-05, "loss": 0.3041, "step": 6371 }, { "epoch": 2.0459142719537646, "grad_norm": 0.887864351272583, "learning_rate": 7.00626133257526e-05, "loss": 0.3669, "step": 6372 }, { "epoch": 2.046235350778616, "grad_norm": 0.7558139562606812, "learning_rate": 7.002930952489362e-05, "loss": 0.2784, "step": 6373 }, { "epoch": 2.0465564296034677, "grad_norm": 0.6548720002174377, "learning_rate": 6.999600937587239e-05, "loss": 0.2803, "step": 6374 }, { "epoch": 2.0468775084283193, "grad_norm": 0.7359789609909058, "learning_rate": 6.996271288274636e-05, "loss": 0.2794, "step": 6375 }, { "epoch": 2.047198587253171, "grad_norm": 0.7246059775352478, "learning_rate": 6.992942004957271e-05, "loss": 0.2569, "step": 6376 }, { "epoch": 2.047519666078022, "grad_norm": 0.4649535119533539, "learning_rate": 6.989613088040796e-05, "loss": 0.2646, "step": 6377 }, { "epoch": 2.0478407449028735, "grad_norm": 1.727747917175293, "learning_rate": 6.986284537930838e-05, "loss": 0.2704, "step": 6378 }, { "epoch": 2.048161823727725, "grad_norm": 0.7226576805114746, "learning_rate": 6.982956355032968e-05, "loss": 0.2848, "step": 6379 }, { "epoch": 2.0484829025525766, "grad_norm": 0.4427557587623596, "learning_rate": 6.979628539752711e-05, "loss": 0.5042, "step": 6380 }, { "epoch": 2.048803981377428, "grad_norm": 0.6116136312484741, "learning_rate": 6.976301092495556e-05, "loss": 0.7693, "step": 6381 }, { "epoch": 2.0491250602022797, "grad_norm": 0.5171028971672058, "learning_rate": 6.972974013666942e-05, "loss": 0.3138, "step": 6382 }, { "epoch": 2.0494461390271312, "grad_norm": 0.5506100058555603, "learning_rate": 6.969647303672262e-05, "loss": 0.1486, "step": 6383 }, { "epoch": 2.049767217851983, "grad_norm": 0.48883306980133057, "learning_rate": 6.966320962916864e-05, "loss": 0.1554, "step": 6384 }, { "epoch": 2.0500882966768343, "grad_norm": 0.720169186592102, "learning_rate": 6.962994991806059e-05, "loss": 0.3594, "step": 6385 }, { "epoch": 2.0504093755016854, "grad_norm": 0.9285702705383301, "learning_rate": 6.959669390745097e-05, "loss": 0.5368, "step": 6386 }, { "epoch": 2.050730454326537, "grad_norm": 0.9640303254127502, "learning_rate": 6.956344160139201e-05, "loss": 0.3814, "step": 6387 }, { "epoch": 2.0510515331513886, "grad_norm": 0.9004988670349121, "learning_rate": 6.953019300393538e-05, "loss": 0.4218, "step": 6388 }, { "epoch": 2.05137261197624, "grad_norm": 0.4966624975204468, "learning_rate": 6.949694811913225e-05, "loss": 0.206, "step": 6389 }, { "epoch": 2.0516936908010917, "grad_norm": 0.7329800724983215, "learning_rate": 6.946370695103353e-05, "loss": 0.3075, "step": 6390 }, { "epoch": 2.052014769625943, "grad_norm": 0.8683103322982788, "learning_rate": 6.943046950368944e-05, "loss": 0.4042, "step": 6391 }, { "epoch": 2.0523358484507948, "grad_norm": 0.7453745603561401, "learning_rate": 6.939723578114993e-05, "loss": 0.325, "step": 6392 }, { "epoch": 2.0526569272756463, "grad_norm": 0.8619517683982849, "learning_rate": 6.93640057874644e-05, "loss": 0.3574, "step": 6393 }, { "epoch": 2.052978006100498, "grad_norm": 0.7723063826560974, "learning_rate": 6.93307795266819e-05, "loss": 0.3337, "step": 6394 }, { "epoch": 2.053299084925349, "grad_norm": 0.7700170278549194, "learning_rate": 6.929755700285081e-05, "loss": 0.4027, "step": 6395 }, { "epoch": 2.0536201637502005, "grad_norm": 0.7963249087333679, "learning_rate": 6.92643382200193e-05, "loss": 0.3438, "step": 6396 }, { "epoch": 2.053941242575052, "grad_norm": 0.8687177896499634, "learning_rate": 6.923112318223496e-05, "loss": 0.4145, "step": 6397 }, { "epoch": 2.0542623213999036, "grad_norm": 0.988239049911499, "learning_rate": 6.91979118935449e-05, "loss": 0.3573, "step": 6398 }, { "epoch": 2.054583400224755, "grad_norm": 1.586459755897522, "learning_rate": 6.916470435799587e-05, "loss": 0.3187, "step": 6399 }, { "epoch": 2.0549044790496067, "grad_norm": 0.8232465386390686, "learning_rate": 6.913150057963404e-05, "loss": 0.4458, "step": 6400 }, { "epoch": 2.0552255578744583, "grad_norm": 0.6517797708511353, "learning_rate": 6.909830056250527e-05, "loss": 0.2838, "step": 6401 }, { "epoch": 2.05554663669931, "grad_norm": 0.7707570195198059, "learning_rate": 6.90651043106548e-05, "loss": 0.3037, "step": 6402 }, { "epoch": 2.0558677155241614, "grad_norm": 1.065109133720398, "learning_rate": 6.90319118281276e-05, "loss": 0.5664, "step": 6403 }, { "epoch": 2.0561887943490125, "grad_norm": 0.7648012042045593, "learning_rate": 6.899872311896795e-05, "loss": 0.392, "step": 6404 }, { "epoch": 2.056509873173864, "grad_norm": 0.7669097185134888, "learning_rate": 6.896553818721989e-05, "loss": 0.2889, "step": 6405 }, { "epoch": 2.0568309519987156, "grad_norm": 1.1542384624481201, "learning_rate": 6.893235703692685e-05, "loss": 0.3426, "step": 6406 }, { "epoch": 2.057152030823567, "grad_norm": 0.5195868015289307, "learning_rate": 6.889917967213185e-05, "loss": 0.2784, "step": 6407 }, { "epoch": 2.0574731096484187, "grad_norm": 0.7854254841804504, "learning_rate": 6.88660060968775e-05, "loss": 0.3684, "step": 6408 }, { "epoch": 2.0577941884732702, "grad_norm": 1.0029950141906738, "learning_rate": 6.883283631520582e-05, "loss": 0.4543, "step": 6409 }, { "epoch": 2.0581152672981218, "grad_norm": 1.1565372943878174, "learning_rate": 6.879967033115853e-05, "loss": 0.4042, "step": 6410 }, { "epoch": 2.0584363461229733, "grad_norm": 1.5860395431518555, "learning_rate": 6.876650814877674e-05, "loss": 0.3917, "step": 6411 }, { "epoch": 2.058757424947825, "grad_norm": 1.0455715656280518, "learning_rate": 6.873334977210122e-05, "loss": 0.3976, "step": 6412 }, { "epoch": 2.059078503772676, "grad_norm": 0.671190619468689, "learning_rate": 6.870019520517217e-05, "loss": 0.31, "step": 6413 }, { "epoch": 2.0593995825975275, "grad_norm": 0.5373357534408569, "learning_rate": 6.866704445202943e-05, "loss": 0.2763, "step": 6414 }, { "epoch": 2.059720661422379, "grad_norm": 1.1658103466033936, "learning_rate": 6.863389751671225e-05, "loss": 0.3591, "step": 6415 }, { "epoch": 2.0600417402472306, "grad_norm": 0.7469364404678345, "learning_rate": 6.860075440325951e-05, "loss": 0.3128, "step": 6416 }, { "epoch": 2.060362819072082, "grad_norm": 1.2972486019134521, "learning_rate": 6.856761511570963e-05, "loss": 0.3338, "step": 6417 }, { "epoch": 2.0606838978969337, "grad_norm": 0.8493379950523376, "learning_rate": 6.853447965810046e-05, "loss": 0.277, "step": 6418 }, { "epoch": 2.0610049767217853, "grad_norm": 0.7463499307632446, "learning_rate": 6.850134803446954e-05, "loss": 0.3535, "step": 6419 }, { "epoch": 2.061326055546637, "grad_norm": 0.6443728804588318, "learning_rate": 6.846822024885379e-05, "loss": 0.2776, "step": 6420 }, { "epoch": 2.0616471343714884, "grad_norm": 1.100448489189148, "learning_rate": 6.843509630528977e-05, "loss": 0.3022, "step": 6421 }, { "epoch": 2.0619682131963395, "grad_norm": 0.7827779054641724, "learning_rate": 6.840197620781349e-05, "loss": 0.3248, "step": 6422 }, { "epoch": 2.062289292021191, "grad_norm": 0.8405880331993103, "learning_rate": 6.836885996046061e-05, "loss": 0.3416, "step": 6423 }, { "epoch": 2.0626103708460426, "grad_norm": 0.832291841506958, "learning_rate": 6.833574756726618e-05, "loss": 0.3128, "step": 6424 }, { "epoch": 2.062931449670894, "grad_norm": 0.8597456812858582, "learning_rate": 6.830263903226483e-05, "loss": 0.3529, "step": 6425 }, { "epoch": 2.0632525284957457, "grad_norm": 0.6993900537490845, "learning_rate": 6.826953435949081e-05, "loss": 0.3465, "step": 6426 }, { "epoch": 2.0635736073205972, "grad_norm": 0.8503331542015076, "learning_rate": 6.823643355297773e-05, "loss": 0.3257, "step": 6427 }, { "epoch": 2.063894686145449, "grad_norm": 0.460232138633728, "learning_rate": 6.820333661675893e-05, "loss": 0.2558, "step": 6428 }, { "epoch": 2.0642157649703003, "grad_norm": 0.6968713402748108, "learning_rate": 6.817024355486706e-05, "loss": 0.3211, "step": 6429 }, { "epoch": 2.064536843795152, "grad_norm": 0.532193660736084, "learning_rate": 6.81371543713345e-05, "loss": 0.8584, "step": 6430 }, { "epoch": 2.064857922620003, "grad_norm": 0.474894255399704, "learning_rate": 6.8104069070193e-05, "loss": 0.6014, "step": 6431 }, { "epoch": 2.0651790014448546, "grad_norm": 0.5201006531715393, "learning_rate": 6.807098765547397e-05, "loss": 0.2353, "step": 6432 }, { "epoch": 2.065500080269706, "grad_norm": 0.5294368267059326, "learning_rate": 6.803791013120822e-05, "loss": 0.1455, "step": 6433 }, { "epoch": 2.0658211590945577, "grad_norm": 0.3815780580043793, "learning_rate": 6.800483650142617e-05, "loss": 0.0917, "step": 6434 }, { "epoch": 2.066142237919409, "grad_norm": 0.5428910851478577, "learning_rate": 6.797176677015775e-05, "loss": 0.1991, "step": 6435 }, { "epoch": 2.0664633167442608, "grad_norm": 0.46137359738349915, "learning_rate": 6.793870094143238e-05, "loss": 0.1742, "step": 6436 }, { "epoch": 2.0667843955691123, "grad_norm": 0.6830509901046753, "learning_rate": 6.790563901927907e-05, "loss": 0.4138, "step": 6437 }, { "epoch": 2.067105474393964, "grad_norm": 0.5673910975456238, "learning_rate": 6.787258100772627e-05, "loss": 0.2538, "step": 6438 }, { "epoch": 2.0674265532188154, "grad_norm": 0.769719123840332, "learning_rate": 6.783952691080203e-05, "loss": 0.257, "step": 6439 }, { "epoch": 2.0677476320436665, "grad_norm": 0.6150414347648621, "learning_rate": 6.780647673253391e-05, "loss": 0.2459, "step": 6440 }, { "epoch": 2.068068710868518, "grad_norm": 0.5758988857269287, "learning_rate": 6.77734304769489e-05, "loss": 0.2108, "step": 6441 }, { "epoch": 2.0683897896933696, "grad_norm": 0.979168713092804, "learning_rate": 6.774038814807369e-05, "loss": 0.4103, "step": 6442 }, { "epoch": 2.068710868518221, "grad_norm": 0.8934927582740784, "learning_rate": 6.770734974993426e-05, "loss": 0.3478, "step": 6443 }, { "epoch": 2.0690319473430727, "grad_norm": 1.4555315971374512, "learning_rate": 6.767431528655635e-05, "loss": 0.3517, "step": 6444 }, { "epoch": 2.0693530261679243, "grad_norm": 0.8350145220756531, "learning_rate": 6.764128476196505e-05, "loss": 0.3284, "step": 6445 }, { "epoch": 2.069674104992776, "grad_norm": 0.7142661809921265, "learning_rate": 6.760825818018508e-05, "loss": 0.2966, "step": 6446 }, { "epoch": 2.0699951838176274, "grad_norm": 1.0782002210617065, "learning_rate": 6.757523554524056e-05, "loss": 0.4881, "step": 6447 }, { "epoch": 2.070316262642479, "grad_norm": 0.9906001687049866, "learning_rate": 6.754221686115525e-05, "loss": 0.4128, "step": 6448 }, { "epoch": 2.07063734146733, "grad_norm": 0.8098526000976562, "learning_rate": 6.750920213195238e-05, "loss": 0.3199, "step": 6449 }, { "epoch": 2.0709584202921816, "grad_norm": 1.1390670537948608, "learning_rate": 6.747619136165463e-05, "loss": 0.41, "step": 6450 }, { "epoch": 2.071279499117033, "grad_norm": 0.8910248875617981, "learning_rate": 6.744318455428436e-05, "loss": 0.3355, "step": 6451 }, { "epoch": 2.0716005779418847, "grad_norm": 0.9463450312614441, "learning_rate": 6.741018171386326e-05, "loss": 0.4561, "step": 6452 }, { "epoch": 2.0719216567667362, "grad_norm": 0.7821532487869263, "learning_rate": 6.737718284441267e-05, "loss": 0.3068, "step": 6453 }, { "epoch": 2.072242735591588, "grad_norm": 1.0844594240188599, "learning_rate": 6.734418794995337e-05, "loss": 0.4195, "step": 6454 }, { "epoch": 2.0725638144164393, "grad_norm": 0.7623462677001953, "learning_rate": 6.731119703450577e-05, "loss": 0.3326, "step": 6455 }, { "epoch": 2.072884893241291, "grad_norm": 0.7975969314575195, "learning_rate": 6.727821010208962e-05, "loss": 0.3355, "step": 6456 }, { "epoch": 2.0732059720661424, "grad_norm": 1.0122053623199463, "learning_rate": 6.724522715672432e-05, "loss": 0.4545, "step": 6457 }, { "epoch": 2.0735270508909935, "grad_norm": 0.9668064117431641, "learning_rate": 6.721224820242875e-05, "loss": 0.381, "step": 6458 }, { "epoch": 2.073848129715845, "grad_norm": 0.8006826043128967, "learning_rate": 6.717927324322124e-05, "loss": 0.3322, "step": 6459 }, { "epoch": 2.0741692085406966, "grad_norm": 0.6765208840370178, "learning_rate": 6.714630228311978e-05, "loss": 0.2891, "step": 6460 }, { "epoch": 2.074490287365548, "grad_norm": 0.8815526366233826, "learning_rate": 6.711333532614168e-05, "loss": 0.3118, "step": 6461 }, { "epoch": 2.0748113661903997, "grad_norm": 0.7715243101119995, "learning_rate": 6.708037237630395e-05, "loss": 0.3149, "step": 6462 }, { "epoch": 2.0751324450152513, "grad_norm": 0.7587166428565979, "learning_rate": 6.704741343762295e-05, "loss": 0.3618, "step": 6463 }, { "epoch": 2.075453523840103, "grad_norm": 0.6574379205703735, "learning_rate": 6.701445851411472e-05, "loss": 0.3009, "step": 6464 }, { "epoch": 2.0757746026649544, "grad_norm": 1.0423983335494995, "learning_rate": 6.698150760979463e-05, "loss": 0.3738, "step": 6465 }, { "epoch": 2.076095681489806, "grad_norm": 0.5287320613861084, "learning_rate": 6.69485607286777e-05, "loss": 0.2526, "step": 6466 }, { "epoch": 2.076416760314657, "grad_norm": 0.7871254086494446, "learning_rate": 6.69156178747784e-05, "loss": 0.2799, "step": 6467 }, { "epoch": 2.0767378391395086, "grad_norm": 1.0920846462249756, "learning_rate": 6.688267905211067e-05, "loss": 0.3249, "step": 6468 }, { "epoch": 2.07705891796436, "grad_norm": 1.0607701539993286, "learning_rate": 6.684974426468808e-05, "loss": 0.363, "step": 6469 }, { "epoch": 2.0773799967892117, "grad_norm": 0.6999905705451965, "learning_rate": 6.681681351652356e-05, "loss": 0.3041, "step": 6470 }, { "epoch": 2.0777010756140633, "grad_norm": 1.2206069231033325, "learning_rate": 6.67838868116297e-05, "loss": 0.3103, "step": 6471 }, { "epoch": 2.078022154438915, "grad_norm": 0.7365018725395203, "learning_rate": 6.675096415401842e-05, "loss": 0.3293, "step": 6472 }, { "epoch": 2.0783432332637664, "grad_norm": 0.6262559294700623, "learning_rate": 6.671804554770135e-05, "loss": 0.2744, "step": 6473 }, { "epoch": 2.078664312088618, "grad_norm": 0.5494680404663086, "learning_rate": 6.668513099668944e-05, "loss": 0.2866, "step": 6474 }, { "epoch": 2.0789853909134695, "grad_norm": 0.6878925561904907, "learning_rate": 6.66522205049933e-05, "loss": 0.3257, "step": 6475 }, { "epoch": 2.0793064697383206, "grad_norm": 0.7526207566261292, "learning_rate": 6.661931407662292e-05, "loss": 0.3167, "step": 6476 }, { "epoch": 2.079627548563172, "grad_norm": 1.070246934890747, "learning_rate": 6.658641171558785e-05, "loss": 0.3273, "step": 6477 }, { "epoch": 2.0799486273880237, "grad_norm": 0.7761482000350952, "learning_rate": 6.65535134258972e-05, "loss": 0.304, "step": 6478 }, { "epoch": 2.080269706212875, "grad_norm": 0.6462170481681824, "learning_rate": 6.652061921155943e-05, "loss": 0.2834, "step": 6479 }, { "epoch": 2.0805907850377268, "grad_norm": 0.5869211554527283, "learning_rate": 6.648772907658272e-05, "loss": 0.5811, "step": 6480 }, { "epoch": 2.0809118638625783, "grad_norm": 0.397668719291687, "learning_rate": 6.64548430249745e-05, "loss": 0.2837, "step": 6481 }, { "epoch": 2.08123294268743, "grad_norm": 0.5283700227737427, "learning_rate": 6.642196106074194e-05, "loss": 0.2056, "step": 6482 }, { "epoch": 2.0815540215122814, "grad_norm": 0.5720309615135193, "learning_rate": 6.638908318789156e-05, "loss": 0.1464, "step": 6483 }, { "epoch": 2.081875100337133, "grad_norm": 0.4584559202194214, "learning_rate": 6.635620941042945e-05, "loss": 0.2292, "step": 6484 }, { "epoch": 2.082196179161984, "grad_norm": 0.5467604994773865, "learning_rate": 6.63233397323612e-05, "loss": 0.2558, "step": 6485 }, { "epoch": 2.0825172579868356, "grad_norm": 0.6699813008308411, "learning_rate": 6.62904741576918e-05, "loss": 0.3378, "step": 6486 }, { "epoch": 2.082838336811687, "grad_norm": 0.9696993231773376, "learning_rate": 6.62576126904259e-05, "loss": 0.3986, "step": 6487 }, { "epoch": 2.0831594156365387, "grad_norm": 0.788311243057251, "learning_rate": 6.622475533456751e-05, "loss": 0.361, "step": 6488 }, { "epoch": 2.0834804944613903, "grad_norm": 1.0402991771697998, "learning_rate": 6.61919020941203e-05, "loss": 0.3738, "step": 6489 }, { "epoch": 2.083801573286242, "grad_norm": 0.8160973191261292, "learning_rate": 6.61590529730872e-05, "loss": 0.3736, "step": 6490 }, { "epoch": 2.0841226521110934, "grad_norm": 0.6715680360794067, "learning_rate": 6.612620797547087e-05, "loss": 0.3151, "step": 6491 }, { "epoch": 2.084443730935945, "grad_norm": 0.8539772629737854, "learning_rate": 6.609336710527332e-05, "loss": 0.4157, "step": 6492 }, { "epoch": 2.0847648097607965, "grad_norm": 0.8843332529067993, "learning_rate": 6.606053036649619e-05, "loss": 0.3158, "step": 6493 }, { "epoch": 2.0850858885856476, "grad_norm": 0.9574533700942993, "learning_rate": 6.602769776314049e-05, "loss": 0.3908, "step": 6494 }, { "epoch": 2.085406967410499, "grad_norm": 0.8649710416793823, "learning_rate": 6.599486929920673e-05, "loss": 0.3199, "step": 6495 }, { "epoch": 2.0857280462353507, "grad_norm": 1.0361381769180298, "learning_rate": 6.5962044978695e-05, "loss": 0.4014, "step": 6496 }, { "epoch": 2.0860491250602022, "grad_norm": 1.0464524030685425, "learning_rate": 6.592922480560483e-05, "loss": 0.2594, "step": 6497 }, { "epoch": 2.086370203885054, "grad_norm": 1.0081483125686646, "learning_rate": 6.589640878393531e-05, "loss": 0.3912, "step": 6498 }, { "epoch": 2.0866912827099053, "grad_norm": 0.9563593864440918, "learning_rate": 6.58635969176849e-05, "loss": 0.453, "step": 6499 }, { "epoch": 2.087012361534757, "grad_norm": 0.8743892908096313, "learning_rate": 6.583078921085167e-05, "loss": 0.3783, "step": 6500 }, { "epoch": 2.0873334403596084, "grad_norm": 0.964160680770874, "learning_rate": 6.579798566743314e-05, "loss": 0.3375, "step": 6501 }, { "epoch": 2.08765451918446, "grad_norm": 1.0127390623092651, "learning_rate": 6.57651862914263e-05, "loss": 0.4236, "step": 6502 }, { "epoch": 2.087975598009311, "grad_norm": 0.7326671481132507, "learning_rate": 6.573239108682768e-05, "loss": 0.3081, "step": 6503 }, { "epoch": 2.0882966768341626, "grad_norm": 0.9221829771995544, "learning_rate": 6.569960005763323e-05, "loss": 0.284, "step": 6504 }, { "epoch": 2.088617755659014, "grad_norm": 0.6714749336242676, "learning_rate": 6.566681320783849e-05, "loss": 0.2871, "step": 6505 }, { "epoch": 2.0889388344838657, "grad_norm": 0.9831109642982483, "learning_rate": 6.56340305414384e-05, "loss": 0.3815, "step": 6506 }, { "epoch": 2.0892599133087173, "grad_norm": 0.8641473650932312, "learning_rate": 6.560125206242746e-05, "loss": 0.3888, "step": 6507 }, { "epoch": 2.089580992133569, "grad_norm": 0.9115124344825745, "learning_rate": 6.55684777747996e-05, "loss": 0.3683, "step": 6508 }, { "epoch": 2.0899020709584204, "grad_norm": 0.6893644332885742, "learning_rate": 6.55357076825483e-05, "loss": 0.2572, "step": 6509 }, { "epoch": 2.090223149783272, "grad_norm": 1.011763572692871, "learning_rate": 6.550294178966647e-05, "loss": 0.3746, "step": 6510 }, { "epoch": 2.0905442286081235, "grad_norm": 0.739936888217926, "learning_rate": 6.547018010014654e-05, "loss": 0.338, "step": 6511 }, { "epoch": 2.0908653074329746, "grad_norm": 0.9537821412086487, "learning_rate": 6.543742261798045e-05, "loss": 0.2584, "step": 6512 }, { "epoch": 2.091186386257826, "grad_norm": 0.6458030343055725, "learning_rate": 6.540466934715953e-05, "loss": 0.3046, "step": 6513 }, { "epoch": 2.0915074650826777, "grad_norm": 0.7992889881134033, "learning_rate": 6.537192029167474e-05, "loss": 0.3166, "step": 6514 }, { "epoch": 2.0918285439075293, "grad_norm": 0.651301383972168, "learning_rate": 6.53391754555164e-05, "loss": 0.2818, "step": 6515 }, { "epoch": 2.092149622732381, "grad_norm": 0.6748250722885132, "learning_rate": 6.530643484267443e-05, "loss": 0.3025, "step": 6516 }, { "epoch": 2.0924707015572324, "grad_norm": 0.7246717810630798, "learning_rate": 6.52736984571381e-05, "loss": 0.258, "step": 6517 }, { "epoch": 2.092791780382084, "grad_norm": 0.7231757640838623, "learning_rate": 6.52409663028963e-05, "loss": 0.2886, "step": 6518 }, { "epoch": 2.0931128592069355, "grad_norm": 0.8351329565048218, "learning_rate": 6.520823838393731e-05, "loss": 0.3284, "step": 6519 }, { "epoch": 2.093433938031787, "grad_norm": 0.9581044316291809, "learning_rate": 6.517551470424895e-05, "loss": 0.3134, "step": 6520 }, { "epoch": 2.093755016856638, "grad_norm": 0.9175045490264893, "learning_rate": 6.51427952678185e-05, "loss": 0.3739, "step": 6521 }, { "epoch": 2.0940760956814897, "grad_norm": 0.9837835431098938, "learning_rate": 6.511008007863268e-05, "loss": 0.4271, "step": 6522 }, { "epoch": 2.094397174506341, "grad_norm": 0.467220664024353, "learning_rate": 6.507736914067781e-05, "loss": 0.2381, "step": 6523 }, { "epoch": 2.0947182533311928, "grad_norm": 1.0905086994171143, "learning_rate": 6.504466245793955e-05, "loss": 0.4082, "step": 6524 }, { "epoch": 2.0950393321560443, "grad_norm": 0.6342186331748962, "learning_rate": 6.501196003440314e-05, "loss": 0.268, "step": 6525 }, { "epoch": 2.095360410980896, "grad_norm": 0.6534708142280579, "learning_rate": 6.497926187405326e-05, "loss": 0.261, "step": 6526 }, { "epoch": 2.0956814898057474, "grad_norm": 0.5003716349601746, "learning_rate": 6.494656798087412e-05, "loss": 0.2658, "step": 6527 }, { "epoch": 2.096002568630599, "grad_norm": 0.6496462225914001, "learning_rate": 6.49138783588493e-05, "loss": 0.3374, "step": 6528 }, { "epoch": 2.0963236474554505, "grad_norm": 0.8161367177963257, "learning_rate": 6.488119301196201e-05, "loss": 0.3874, "step": 6529 }, { "epoch": 2.0966447262803016, "grad_norm": 0.5122060775756836, "learning_rate": 6.484851194419484e-05, "loss": 0.6031, "step": 6530 }, { "epoch": 2.096965805105153, "grad_norm": 0.43576663732528687, "learning_rate": 6.481583515952983e-05, "loss": 0.6629, "step": 6531 }, { "epoch": 2.0972868839300047, "grad_norm": 0.484679639339447, "learning_rate": 6.478316266194861e-05, "loss": 0.2203, "step": 6532 }, { "epoch": 2.0976079627548563, "grad_norm": 0.5761163830757141, "learning_rate": 6.475049445543215e-05, "loss": 0.2443, "step": 6533 }, { "epoch": 2.097929041579708, "grad_norm": 0.5513499975204468, "learning_rate": 6.471783054396105e-05, "loss": 0.1622, "step": 6534 }, { "epoch": 2.0982501204045594, "grad_norm": 0.522097110748291, "learning_rate": 6.468517093151525e-05, "loss": 0.1611, "step": 6535 }, { "epoch": 2.098571199229411, "grad_norm": 0.7834147214889526, "learning_rate": 6.465251562207431e-05, "loss": 0.3935, "step": 6536 }, { "epoch": 2.0988922780542625, "grad_norm": 1.0069727897644043, "learning_rate": 6.461986461961706e-05, "loss": 0.4389, "step": 6537 }, { "epoch": 2.099213356879114, "grad_norm": 1.162589192390442, "learning_rate": 6.458721792812204e-05, "loss": 0.4037, "step": 6538 }, { "epoch": 2.099534435703965, "grad_norm": 0.7293219566345215, "learning_rate": 6.455457555156705e-05, "loss": 0.341, "step": 6539 }, { "epoch": 2.0998555145288167, "grad_norm": 0.6971396803855896, "learning_rate": 6.452193749392952e-05, "loss": 0.2258, "step": 6540 }, { "epoch": 2.1001765933536682, "grad_norm": 0.7499396800994873, "learning_rate": 6.448930375918631e-05, "loss": 0.2789, "step": 6541 }, { "epoch": 2.10049767217852, "grad_norm": 0.637330174446106, "learning_rate": 6.44566743513137e-05, "loss": 0.301, "step": 6542 }, { "epoch": 2.1008187510033713, "grad_norm": 0.9882725477218628, "learning_rate": 6.442404927428751e-05, "loss": 0.499, "step": 6543 }, { "epoch": 2.101139829828223, "grad_norm": 0.7943243980407715, "learning_rate": 6.4391428532083e-05, "loss": 0.324, "step": 6544 }, { "epoch": 2.1014609086530744, "grad_norm": 0.8991156816482544, "learning_rate": 6.435881212867493e-05, "loss": 0.3999, "step": 6545 }, { "epoch": 2.101781987477926, "grad_norm": 0.8025935292243958, "learning_rate": 6.432620006803746e-05, "loss": 0.3236, "step": 6546 }, { "epoch": 2.1021030663027775, "grad_norm": 0.795685350894928, "learning_rate": 6.429359235414436e-05, "loss": 0.3178, "step": 6547 }, { "epoch": 2.1024241451276287, "grad_norm": 0.8055205345153809, "learning_rate": 6.426098899096868e-05, "loss": 0.3545, "step": 6548 }, { "epoch": 2.10274522395248, "grad_norm": 0.9374400973320007, "learning_rate": 6.422838998248307e-05, "loss": 0.4094, "step": 6549 }, { "epoch": 2.1030663027773318, "grad_norm": 1.0976170301437378, "learning_rate": 6.419579533265968e-05, "loss": 0.3392, "step": 6550 }, { "epoch": 2.1033873816021833, "grad_norm": 0.8357941508293152, "learning_rate": 6.416320504546997e-05, "loss": 0.3858, "step": 6551 }, { "epoch": 2.103708460427035, "grad_norm": 1.0488425493240356, "learning_rate": 6.413061912488508e-05, "loss": 0.4397, "step": 6552 }, { "epoch": 2.1040295392518864, "grad_norm": 0.8378785848617554, "learning_rate": 6.409803757487538e-05, "loss": 0.3655, "step": 6553 }, { "epoch": 2.104350618076738, "grad_norm": 1.1138861179351807, "learning_rate": 6.406546039941094e-05, "loss": 0.3253, "step": 6554 }, { "epoch": 2.1046716969015895, "grad_norm": 1.0650768280029297, "learning_rate": 6.403288760246112e-05, "loss": 0.3725, "step": 6555 }, { "epoch": 2.1049927757264406, "grad_norm": 1.272623896598816, "learning_rate": 6.40003191879949e-05, "loss": 0.3764, "step": 6556 }, { "epoch": 2.105313854551292, "grad_norm": 1.197476863861084, "learning_rate": 6.396775515998055e-05, "loss": 0.3797, "step": 6557 }, { "epoch": 2.1056349333761437, "grad_norm": 0.9312015771865845, "learning_rate": 6.393519552238591e-05, "loss": 0.3614, "step": 6558 }, { "epoch": 2.1059560122009953, "grad_norm": 1.1965104341506958, "learning_rate": 6.390264027917836e-05, "loss": 0.4493, "step": 6559 }, { "epoch": 2.106277091025847, "grad_norm": 0.7930313348770142, "learning_rate": 6.387008943432455e-05, "loss": 0.3172, "step": 6560 }, { "epoch": 2.1065981698506984, "grad_norm": 0.645929753780365, "learning_rate": 6.383754299179079e-05, "loss": 0.3191, "step": 6561 }, { "epoch": 2.10691924867555, "grad_norm": 0.7615616917610168, "learning_rate": 6.380500095554268e-05, "loss": 0.2763, "step": 6562 }, { "epoch": 2.1072403275004015, "grad_norm": 1.1042746305465698, "learning_rate": 6.377246332954544e-05, "loss": 0.3695, "step": 6563 }, { "epoch": 2.107561406325253, "grad_norm": 0.7434027791023254, "learning_rate": 6.373993011776367e-05, "loss": 0.3348, "step": 6564 }, { "epoch": 2.107882485150104, "grad_norm": 0.8379579782485962, "learning_rate": 6.370740132416138e-05, "loss": 0.3715, "step": 6565 }, { "epoch": 2.1082035639749557, "grad_norm": 0.7400662899017334, "learning_rate": 6.367487695270217e-05, "loss": 0.3288, "step": 6566 }, { "epoch": 2.1085246427998072, "grad_norm": 0.8709506392478943, "learning_rate": 6.364235700734903e-05, "loss": 0.3382, "step": 6567 }, { "epoch": 2.1088457216246588, "grad_norm": 1.0580693483352661, "learning_rate": 6.360984149206439e-05, "loss": 0.3497, "step": 6568 }, { "epoch": 2.1091668004495103, "grad_norm": 1.0599806308746338, "learning_rate": 6.357733041081018e-05, "loss": 0.3606, "step": 6569 }, { "epoch": 2.109487879274362, "grad_norm": 0.7007160782814026, "learning_rate": 6.35448237675478e-05, "loss": 0.2877, "step": 6570 }, { "epoch": 2.1098089580992134, "grad_norm": 0.9604334831237793, "learning_rate": 6.351232156623803e-05, "loss": 0.3173, "step": 6571 }, { "epoch": 2.110130036924065, "grad_norm": 0.7274121642112732, "learning_rate": 6.347982381084123e-05, "loss": 0.2848, "step": 6572 }, { "epoch": 2.1104511157489165, "grad_norm": 0.8316487669944763, "learning_rate": 6.344733050531713e-05, "loss": 0.3307, "step": 6573 }, { "epoch": 2.1107721945737676, "grad_norm": 0.6824556589126587, "learning_rate": 6.341484165362487e-05, "loss": 0.2692, "step": 6574 }, { "epoch": 2.111093273398619, "grad_norm": 0.9442518949508667, "learning_rate": 6.338235725972325e-05, "loss": 0.326, "step": 6575 }, { "epoch": 2.1114143522234707, "grad_norm": 0.46426621079444885, "learning_rate": 6.334987732757029e-05, "loss": 0.2598, "step": 6576 }, { "epoch": 2.1117354310483223, "grad_norm": 0.6108339428901672, "learning_rate": 6.33174018611236e-05, "loss": 0.3211, "step": 6577 }, { "epoch": 2.112056509873174, "grad_norm": 0.8628884553909302, "learning_rate": 6.328493086434023e-05, "loss": 0.3061, "step": 6578 }, { "epoch": 2.1123775886980254, "grad_norm": 0.45920509099960327, "learning_rate": 6.325246434117668e-05, "loss": 0.2858, "step": 6579 }, { "epoch": 2.112698667522877, "grad_norm": 0.36384066939353943, "learning_rate": 6.322000229558887e-05, "loss": 0.3652, "step": 6580 }, { "epoch": 2.1130197463477285, "grad_norm": 0.6161776781082153, "learning_rate": 6.318754473153221e-05, "loss": 0.4469, "step": 6581 }, { "epoch": 2.11334082517258, "grad_norm": 0.5256691575050354, "learning_rate": 6.315509165296159e-05, "loss": 0.2341, "step": 6582 }, { "epoch": 2.113661903997431, "grad_norm": 0.5201000571250916, "learning_rate": 6.312264306383124e-05, "loss": 0.2947, "step": 6583 }, { "epoch": 2.1139829828222827, "grad_norm": 0.5829021334648132, "learning_rate": 6.309019896809503e-05, "loss": 0.2341, "step": 6584 }, { "epoch": 2.1143040616471342, "grad_norm": 0.5891327261924744, "learning_rate": 6.305775936970606e-05, "loss": 0.1354, "step": 6585 }, { "epoch": 2.114625140471986, "grad_norm": 0.4353516697883606, "learning_rate": 6.302532427261709e-05, "loss": 0.1752, "step": 6586 }, { "epoch": 2.1149462192968373, "grad_norm": 0.6292280554771423, "learning_rate": 6.299289368078016e-05, "loss": 0.327, "step": 6587 }, { "epoch": 2.115267298121689, "grad_norm": 0.7321916818618774, "learning_rate": 6.296046759814694e-05, "loss": 0.3184, "step": 6588 }, { "epoch": 2.1155883769465404, "grad_norm": 1.0000382661819458, "learning_rate": 6.292804602866834e-05, "loss": 0.3432, "step": 6589 }, { "epoch": 2.115909455771392, "grad_norm": 0.9078216552734375, "learning_rate": 6.289562897629492e-05, "loss": 0.4655, "step": 6590 }, { "epoch": 2.1162305345962436, "grad_norm": 0.827180802822113, "learning_rate": 6.286321644497655e-05, "loss": 0.369, "step": 6591 }, { "epoch": 2.1165516134210947, "grad_norm": 0.5973712205886841, "learning_rate": 6.283080843866256e-05, "loss": 0.2657, "step": 6592 }, { "epoch": 2.116872692245946, "grad_norm": 0.7066929936408997, "learning_rate": 6.27984049613019e-05, "loss": 0.3177, "step": 6593 }, { "epoch": 2.1171937710707978, "grad_norm": 0.8273329734802246, "learning_rate": 6.276600601684267e-05, "loss": 0.3679, "step": 6594 }, { "epoch": 2.1175148498956493, "grad_norm": 0.9276707768440247, "learning_rate": 6.27336116092327e-05, "loss": 0.4267, "step": 6595 }, { "epoch": 2.117835928720501, "grad_norm": 0.9455394744873047, "learning_rate": 6.27012217424191e-05, "loss": 0.3272, "step": 6596 }, { "epoch": 2.1181570075453524, "grad_norm": 1.1003650426864624, "learning_rate": 6.266883642034853e-05, "loss": 0.4239, "step": 6597 }, { "epoch": 2.118478086370204, "grad_norm": 1.107857346534729, "learning_rate": 6.263645564696696e-05, "loss": 0.436, "step": 6598 }, { "epoch": 2.1187991651950555, "grad_norm": 0.7510982155799866, "learning_rate": 6.260407942621998e-05, "loss": 0.3712, "step": 6599 }, { "epoch": 2.119120244019907, "grad_norm": 0.7718844413757324, "learning_rate": 6.257170776205245e-05, "loss": 0.3182, "step": 6600 }, { "epoch": 2.119441322844758, "grad_norm": 0.8712736964225769, "learning_rate": 6.25393406584088e-05, "loss": 0.3044, "step": 6601 }, { "epoch": 2.1197624016696097, "grad_norm": 1.5617177486419678, "learning_rate": 6.25069781192329e-05, "loss": 0.42, "step": 6602 }, { "epoch": 2.1200834804944613, "grad_norm": 0.9841622710227966, "learning_rate": 6.247462014846792e-05, "loss": 0.399, "step": 6603 }, { "epoch": 2.120404559319313, "grad_norm": 0.9455597400665283, "learning_rate": 6.24422667500567e-05, "loss": 0.4426, "step": 6604 }, { "epoch": 2.1207256381441644, "grad_norm": 0.6564199924468994, "learning_rate": 6.240991792794133e-05, "loss": 0.2695, "step": 6605 }, { "epoch": 2.121046716969016, "grad_norm": 0.9546607732772827, "learning_rate": 6.237757368606345e-05, "loss": 0.4005, "step": 6606 }, { "epoch": 2.1213677957938675, "grad_norm": 0.9721749424934387, "learning_rate": 6.234523402836407e-05, "loss": 0.4454, "step": 6607 }, { "epoch": 2.121688874618719, "grad_norm": 1.2680310010910034, "learning_rate": 6.231289895878375e-05, "loss": 0.3899, "step": 6608 }, { "epoch": 2.1220099534435706, "grad_norm": 0.8545310497283936, "learning_rate": 6.228056848126236e-05, "loss": 0.3709, "step": 6609 }, { "epoch": 2.1223310322684217, "grad_norm": 0.8629288077354431, "learning_rate": 6.224824259973925e-05, "loss": 0.2811, "step": 6610 }, { "epoch": 2.1226521110932732, "grad_norm": 1.1735135316848755, "learning_rate": 6.22159213181533e-05, "loss": 0.3109, "step": 6611 }, { "epoch": 2.122973189918125, "grad_norm": 0.7792378067970276, "learning_rate": 6.21836046404427e-05, "loss": 0.3302, "step": 6612 }, { "epoch": 2.1232942687429763, "grad_norm": 0.7332779169082642, "learning_rate": 6.215129257054522e-05, "loss": 0.3134, "step": 6613 }, { "epoch": 2.123615347567828, "grad_norm": 0.6187347769737244, "learning_rate": 6.211898511239787e-05, "loss": 0.2562, "step": 6614 }, { "epoch": 2.1239364263926794, "grad_norm": 0.8484505414962769, "learning_rate": 6.20866822699373e-05, "loss": 0.2516, "step": 6615 }, { "epoch": 2.124257505217531, "grad_norm": 0.8732132911682129, "learning_rate": 6.205438404709947e-05, "loss": 0.3172, "step": 6616 }, { "epoch": 2.1245785840423825, "grad_norm": 0.8905081152915955, "learning_rate": 6.20220904478199e-05, "loss": 0.3439, "step": 6617 }, { "epoch": 2.124899662867234, "grad_norm": 0.7029334306716919, "learning_rate": 6.198980147603339e-05, "loss": 0.2728, "step": 6618 }, { "epoch": 2.125220741692085, "grad_norm": 0.8085559010505676, "learning_rate": 6.195751713567427e-05, "loss": 0.2663, "step": 6619 }, { "epoch": 2.1255418205169367, "grad_norm": 0.8118886351585388, "learning_rate": 6.192523743067626e-05, "loss": 0.3628, "step": 6620 }, { "epoch": 2.1258628993417883, "grad_norm": 0.5868399143218994, "learning_rate": 6.18929623649726e-05, "loss": 0.2687, "step": 6621 }, { "epoch": 2.12618397816664, "grad_norm": 0.5492334961891174, "learning_rate": 6.18606919424959e-05, "loss": 0.2749, "step": 6622 }, { "epoch": 2.1265050569914914, "grad_norm": 1.074462652206421, "learning_rate": 6.182842616717817e-05, "loss": 0.3686, "step": 6623 }, { "epoch": 2.126826135816343, "grad_norm": 0.8293436765670776, "learning_rate": 6.179616504295092e-05, "loss": 0.3272, "step": 6624 }, { "epoch": 2.1271472146411945, "grad_norm": 1.137435793876648, "learning_rate": 6.176390857374507e-05, "loss": 0.3109, "step": 6625 }, { "epoch": 2.127468293466046, "grad_norm": 0.7483497858047485, "learning_rate": 6.173165676349103e-05, "loss": 0.343, "step": 6626 }, { "epoch": 2.1277893722908976, "grad_norm": 0.40780845284461975, "learning_rate": 6.169940961611854e-05, "loss": 0.2489, "step": 6627 }, { "epoch": 2.1281104511157487, "grad_norm": 0.6803835034370422, "learning_rate": 6.166716713555674e-05, "loss": 0.3458, "step": 6628 }, { "epoch": 2.1284315299406003, "grad_norm": 0.6361788511276245, "learning_rate": 6.163492932573438e-05, "loss": 0.3, "step": 6629 }, { "epoch": 2.128752608765452, "grad_norm": 0.5756948590278625, "learning_rate": 6.160269619057951e-05, "loss": 0.6565, "step": 6630 }, { "epoch": 2.1290736875903034, "grad_norm": 0.5994390845298767, "learning_rate": 6.157046773401964e-05, "loss": 0.4455, "step": 6631 }, { "epoch": 2.129394766415155, "grad_norm": 0.661205530166626, "learning_rate": 6.153824395998168e-05, "loss": 0.1752, "step": 6632 }, { "epoch": 2.1297158452400065, "grad_norm": 0.5093392729759216, "learning_rate": 6.150602487239206e-05, "loss": 0.0911, "step": 6633 }, { "epoch": 2.130036924064858, "grad_norm": 0.4452694356441498, "learning_rate": 6.147381047517655e-05, "loss": 0.0949, "step": 6634 }, { "epoch": 2.1303580028897096, "grad_norm": 0.4948364496231079, "learning_rate": 6.144160077226036e-05, "loss": 0.1567, "step": 6635 }, { "epoch": 2.130679081714561, "grad_norm": 0.5426846742630005, "learning_rate": 6.140939576756817e-05, "loss": 0.2952, "step": 6636 }, { "epoch": 2.131000160539412, "grad_norm": 0.6515873074531555, "learning_rate": 6.1377195465024e-05, "loss": 0.3118, "step": 6637 }, { "epoch": 2.1313212393642638, "grad_norm": 0.7269318103790283, "learning_rate": 6.134499986855144e-05, "loss": 0.3818, "step": 6638 }, { "epoch": 2.1316423181891153, "grad_norm": 0.7911025285720825, "learning_rate": 6.131280898207339e-05, "loss": 0.3881, "step": 6639 }, { "epoch": 2.131963397013967, "grad_norm": 0.8551725149154663, "learning_rate": 6.128062280951224e-05, "loss": 0.3669, "step": 6640 }, { "epoch": 2.1322844758388184, "grad_norm": 0.8696125745773315, "learning_rate": 6.12484413547897e-05, "loss": 0.3246, "step": 6641 }, { "epoch": 2.13260555466367, "grad_norm": 0.9644296765327454, "learning_rate": 6.121626462182707e-05, "loss": 0.3639, "step": 6642 }, { "epoch": 2.1329266334885215, "grad_norm": 0.9500702619552612, "learning_rate": 6.118409261454494e-05, "loss": 0.2879, "step": 6643 }, { "epoch": 2.133247712313373, "grad_norm": 0.942509114742279, "learning_rate": 6.11519253368634e-05, "loss": 0.3482, "step": 6644 }, { "epoch": 2.1335687911382246, "grad_norm": 0.7259782552719116, "learning_rate": 6.111976279270192e-05, "loss": 0.3214, "step": 6645 }, { "epoch": 2.1338898699630757, "grad_norm": 0.7801980972290039, "learning_rate": 6.108760498597938e-05, "loss": 0.3301, "step": 6646 }, { "epoch": 2.1342109487879273, "grad_norm": 0.8624287247657776, "learning_rate": 6.105545192061416e-05, "loss": 0.3717, "step": 6647 }, { "epoch": 2.134532027612779, "grad_norm": 0.8877105712890625, "learning_rate": 6.1023303600523975e-05, "loss": 0.4454, "step": 6648 }, { "epoch": 2.1348531064376304, "grad_norm": 0.7793693542480469, "learning_rate": 6.099116002962604e-05, "loss": 0.3266, "step": 6649 }, { "epoch": 2.135174185262482, "grad_norm": 1.096024990081787, "learning_rate": 6.09590212118369e-05, "loss": 0.4317, "step": 6650 }, { "epoch": 2.1354952640873335, "grad_norm": 0.889217734336853, "learning_rate": 6.092688715107264e-05, "loss": 0.4424, "step": 6651 }, { "epoch": 2.135816342912185, "grad_norm": 0.8628430962562561, "learning_rate": 6.089475785124863e-05, "loss": 0.3435, "step": 6652 }, { "epoch": 2.1361374217370366, "grad_norm": 0.8696472644805908, "learning_rate": 6.086263331627976e-05, "loss": 0.3443, "step": 6653 }, { "epoch": 2.136458500561888, "grad_norm": 0.9807567596435547, "learning_rate": 6.0830513550080335e-05, "loss": 0.3259, "step": 6654 }, { "epoch": 2.1367795793867392, "grad_norm": 0.9957831501960754, "learning_rate": 6.0798398556563976e-05, "loss": 0.3485, "step": 6655 }, { "epoch": 2.137100658211591, "grad_norm": 1.0580285787582397, "learning_rate": 6.076628833964388e-05, "loss": 0.4838, "step": 6656 }, { "epoch": 2.1374217370364423, "grad_norm": 0.7610054016113281, "learning_rate": 6.073418290323251e-05, "loss": 0.3736, "step": 6657 }, { "epoch": 2.137742815861294, "grad_norm": 0.8939658999443054, "learning_rate": 6.070208225124185e-05, "loss": 0.3186, "step": 6658 }, { "epoch": 2.1380638946861454, "grad_norm": 0.9913893342018127, "learning_rate": 6.066998638758326e-05, "loss": 0.3687, "step": 6659 }, { "epoch": 2.138384973510997, "grad_norm": 0.6824525594711304, "learning_rate": 6.063789531616757e-05, "loss": 0.277, "step": 6660 }, { "epoch": 2.1387060523358485, "grad_norm": 0.935042679309845, "learning_rate": 6.0605809040904894e-05, "loss": 0.3631, "step": 6661 }, { "epoch": 2.1390271311607, "grad_norm": 0.7435742020606995, "learning_rate": 6.05737275657049e-05, "loss": 0.3319, "step": 6662 }, { "epoch": 2.1393482099855516, "grad_norm": 0.6883519291877747, "learning_rate": 6.054165089447663e-05, "loss": 0.3006, "step": 6663 }, { "epoch": 2.1396692888104027, "grad_norm": 0.7741847038269043, "learning_rate": 6.0509579031128485e-05, "loss": 0.3047, "step": 6664 }, { "epoch": 2.1399903676352543, "grad_norm": 0.8180326223373413, "learning_rate": 6.047751197956838e-05, "loss": 0.3593, "step": 6665 }, { "epoch": 2.140311446460106, "grad_norm": 1.200945496559143, "learning_rate": 6.0445449743703516e-05, "loss": 0.3234, "step": 6666 }, { "epoch": 2.1406325252849574, "grad_norm": 0.9025703072547913, "learning_rate": 6.0413392327440635e-05, "loss": 0.3599, "step": 6667 }, { "epoch": 2.140953604109809, "grad_norm": 0.9718348979949951, "learning_rate": 6.03813397346858e-05, "loss": 0.3025, "step": 6668 }, { "epoch": 2.1412746829346605, "grad_norm": 0.9838557243347168, "learning_rate": 6.034929196934459e-05, "loss": 0.3157, "step": 6669 }, { "epoch": 2.141595761759512, "grad_norm": 0.940606951713562, "learning_rate": 6.031724903532183e-05, "loss": 0.3326, "step": 6670 }, { "epoch": 2.1419168405843636, "grad_norm": 0.6957113742828369, "learning_rate": 6.0285210936521955e-05, "loss": 0.2739, "step": 6671 }, { "epoch": 2.142237919409215, "grad_norm": 0.977173388004303, "learning_rate": 6.025317767684864e-05, "loss": 0.3673, "step": 6672 }, { "epoch": 2.1425589982340663, "grad_norm": 0.6859129071235657, "learning_rate": 6.022114926020504e-05, "loss": 0.3025, "step": 6673 }, { "epoch": 2.142880077058918, "grad_norm": 0.8257904052734375, "learning_rate": 6.018912569049376e-05, "loss": 0.3357, "step": 6674 }, { "epoch": 2.1432011558837694, "grad_norm": 0.5588051080703735, "learning_rate": 6.015710697161674e-05, "loss": 0.2849, "step": 6675 }, { "epoch": 2.143522234708621, "grad_norm": 0.5934134125709534, "learning_rate": 6.012509310747538e-05, "loss": 0.2849, "step": 6676 }, { "epoch": 2.1438433135334725, "grad_norm": 0.800035834312439, "learning_rate": 6.009308410197047e-05, "loss": 0.2704, "step": 6677 }, { "epoch": 2.144164392358324, "grad_norm": 0.42928749322891235, "learning_rate": 6.006107995900224e-05, "loss": 0.2584, "step": 6678 }, { "epoch": 2.1444854711831756, "grad_norm": 0.4132596552371979, "learning_rate": 6.002908068247024e-05, "loss": 0.2802, "step": 6679 }, { "epoch": 2.144806550008027, "grad_norm": 0.4437423348426819, "learning_rate": 5.999708627627354e-05, "loss": 0.4253, "step": 6680 }, { "epoch": 2.1451276288328787, "grad_norm": 0.4113101065158844, "learning_rate": 5.9965096744310526e-05, "loss": 0.2371, "step": 6681 }, { "epoch": 2.1454487076577298, "grad_norm": 0.513230562210083, "learning_rate": 5.9933112090479006e-05, "loss": 0.1586, "step": 6682 }, { "epoch": 2.1457697864825813, "grad_norm": 0.5119988918304443, "learning_rate": 5.990113231867629e-05, "loss": 0.1865, "step": 6683 }, { "epoch": 2.146090865307433, "grad_norm": 0.6047758460044861, "learning_rate": 5.9869157432798926e-05, "loss": 0.2309, "step": 6684 }, { "epoch": 2.1464119441322844, "grad_norm": 0.5919579267501831, "learning_rate": 5.9837187436743016e-05, "loss": 0.1818, "step": 6685 }, { "epoch": 2.146733022957136, "grad_norm": 0.44815778732299805, "learning_rate": 5.9805222334404e-05, "loss": 0.1748, "step": 6686 }, { "epoch": 2.1470541017819875, "grad_norm": 0.4934236705303192, "learning_rate": 5.977326212967671e-05, "loss": 0.1963, "step": 6687 }, { "epoch": 2.147375180606839, "grad_norm": 0.8252405524253845, "learning_rate": 5.974130682645538e-05, "loss": 0.3972, "step": 6688 }, { "epoch": 2.1476962594316906, "grad_norm": 1.336822509765625, "learning_rate": 5.9709356428633746e-05, "loss": 0.3795, "step": 6689 }, { "epoch": 2.148017338256542, "grad_norm": 0.7905248999595642, "learning_rate": 5.967741094010479e-05, "loss": 0.4087, "step": 6690 }, { "epoch": 2.1483384170813933, "grad_norm": 0.7174348831176758, "learning_rate": 5.964547036476099e-05, "loss": 0.3429, "step": 6691 }, { "epoch": 2.148659495906245, "grad_norm": 0.7305684685707092, "learning_rate": 5.961353470649426e-05, "loss": 0.2866, "step": 6692 }, { "epoch": 2.1489805747310964, "grad_norm": 0.6904447078704834, "learning_rate": 5.9581603969195766e-05, "loss": 0.3, "step": 6693 }, { "epoch": 2.149301653555948, "grad_norm": 0.8633493185043335, "learning_rate": 5.954967815675627e-05, "loss": 0.3736, "step": 6694 }, { "epoch": 2.1496227323807995, "grad_norm": 0.8246678113937378, "learning_rate": 5.951775727306577e-05, "loss": 0.359, "step": 6695 }, { "epoch": 2.149943811205651, "grad_norm": 1.400453805923462, "learning_rate": 5.9485841322013755e-05, "loss": 0.454, "step": 6696 }, { "epoch": 2.1502648900305026, "grad_norm": 2.0813472270965576, "learning_rate": 5.94539303074891e-05, "loss": 0.3139, "step": 6697 }, { "epoch": 2.150585968855354, "grad_norm": 0.9798773527145386, "learning_rate": 5.9422024233380013e-05, "loss": 0.4055, "step": 6698 }, { "epoch": 2.1509070476802057, "grad_norm": 0.9480912685394287, "learning_rate": 5.9390123103574215e-05, "loss": 0.3727, "step": 6699 }, { "epoch": 2.151228126505057, "grad_norm": 0.9623056650161743, "learning_rate": 5.935822692195869e-05, "loss": 0.2921, "step": 6700 }, { "epoch": 2.1515492053299083, "grad_norm": 0.8746601343154907, "learning_rate": 5.9326335692419995e-05, "loss": 0.3345, "step": 6701 }, { "epoch": 2.15187028415476, "grad_norm": 0.725217878818512, "learning_rate": 5.929444941884388e-05, "loss": 0.346, "step": 6702 }, { "epoch": 2.1521913629796114, "grad_norm": 0.8172053098678589, "learning_rate": 5.9262568105115654e-05, "loss": 0.3468, "step": 6703 }, { "epoch": 2.152512441804463, "grad_norm": 0.648247241973877, "learning_rate": 5.92306917551199e-05, "loss": 0.2908, "step": 6704 }, { "epoch": 2.1528335206293145, "grad_norm": 0.7141852378845215, "learning_rate": 5.9198820372740726e-05, "loss": 0.2882, "step": 6705 }, { "epoch": 2.153154599454166, "grad_norm": 0.8479703068733215, "learning_rate": 5.916695396186154e-05, "loss": 0.3407, "step": 6706 }, { "epoch": 2.1534756782790176, "grad_norm": 0.8146336674690247, "learning_rate": 5.913509252636511e-05, "loss": 0.2869, "step": 6707 }, { "epoch": 2.153796757103869, "grad_norm": 0.6545037627220154, "learning_rate": 5.910323607013373e-05, "loss": 0.2742, "step": 6708 }, { "epoch": 2.1541178359287203, "grad_norm": 0.7448673248291016, "learning_rate": 5.907138459704895e-05, "loss": 0.3192, "step": 6709 }, { "epoch": 2.154438914753572, "grad_norm": 0.7796539664268494, "learning_rate": 5.903953811099183e-05, "loss": 0.2947, "step": 6710 }, { "epoch": 2.1547599935784234, "grad_norm": 1.0737991333007812, "learning_rate": 5.900769661584272e-05, "loss": 0.4188, "step": 6711 }, { "epoch": 2.155081072403275, "grad_norm": 0.6433249711990356, "learning_rate": 5.8975860115481487e-05, "loss": 0.2853, "step": 6712 }, { "epoch": 2.1554021512281265, "grad_norm": 0.8667881488800049, "learning_rate": 5.8944028613787206e-05, "loss": 0.3175, "step": 6713 }, { "epoch": 2.155723230052978, "grad_norm": 1.061166524887085, "learning_rate": 5.8912202114638524e-05, "loss": 0.4216, "step": 6714 }, { "epoch": 2.1560443088778296, "grad_norm": 0.9208434224128723, "learning_rate": 5.88803806219134e-05, "loss": 0.3846, "step": 6715 }, { "epoch": 2.156365387702681, "grad_norm": 0.6994331479072571, "learning_rate": 5.884856413948913e-05, "loss": 0.2805, "step": 6716 }, { "epoch": 2.1566864665275327, "grad_norm": 0.9039559960365295, "learning_rate": 5.881675267124254e-05, "loss": 0.3893, "step": 6717 }, { "epoch": 2.157007545352384, "grad_norm": 0.8214314579963684, "learning_rate": 5.8784946221049666e-05, "loss": 0.3455, "step": 6718 }, { "epoch": 2.1573286241772354, "grad_norm": 0.5890683531761169, "learning_rate": 5.8753144792786096e-05, "loss": 0.2904, "step": 6719 }, { "epoch": 2.157649703002087, "grad_norm": 0.8071637749671936, "learning_rate": 5.87213483903267e-05, "loss": 0.3424, "step": 6720 }, { "epoch": 2.1579707818269385, "grad_norm": 0.5049499869346619, "learning_rate": 5.868955701754584e-05, "loss": 0.2401, "step": 6721 }, { "epoch": 2.15829186065179, "grad_norm": 0.7438219785690308, "learning_rate": 5.86577706783171e-05, "loss": 0.2888, "step": 6722 }, { "epoch": 2.1586129394766416, "grad_norm": 0.5607714056968689, "learning_rate": 5.862598937651365e-05, "loss": 0.28, "step": 6723 }, { "epoch": 2.158934018301493, "grad_norm": 0.6200205087661743, "learning_rate": 5.8594213116007855e-05, "loss": 0.3038, "step": 6724 }, { "epoch": 2.1592550971263447, "grad_norm": 0.9494879841804504, "learning_rate": 5.856244190067159e-05, "loss": 0.3483, "step": 6725 }, { "epoch": 2.159576175951196, "grad_norm": 0.9687539339065552, "learning_rate": 5.853067573437612e-05, "loss": 0.3126, "step": 6726 }, { "epoch": 2.1598972547760473, "grad_norm": 0.35860729217529297, "learning_rate": 5.849891462099198e-05, "loss": 0.2379, "step": 6727 }, { "epoch": 2.160218333600899, "grad_norm": 0.4721701443195343, "learning_rate": 5.8467158564389234e-05, "loss": 0.2996, "step": 6728 }, { "epoch": 2.1605394124257504, "grad_norm": 2.956984758377075, "learning_rate": 5.8435407568437216e-05, "loss": 0.2978, "step": 6729 }, { "epoch": 2.160860491250602, "grad_norm": 0.5004953742027283, "learning_rate": 5.8403661637004736e-05, "loss": 0.6088, "step": 6730 }, { "epoch": 2.1611815700754535, "grad_norm": 0.43807387351989746, "learning_rate": 5.83719207739599e-05, "loss": 0.6434, "step": 6731 }, { "epoch": 2.161502648900305, "grad_norm": 0.5199769139289856, "learning_rate": 5.834018498317024e-05, "loss": 0.5589, "step": 6732 }, { "epoch": 2.1618237277251566, "grad_norm": 0.46276336908340454, "learning_rate": 5.8308454268502675e-05, "loss": 0.2407, "step": 6733 }, { "epoch": 2.162144806550008, "grad_norm": 0.47607436776161194, "learning_rate": 5.82767286338235e-05, "loss": 0.2192, "step": 6734 }, { "epoch": 2.1624658853748597, "grad_norm": 0.36103343963623047, "learning_rate": 5.8245008082998364e-05, "loss": 0.0913, "step": 6735 }, { "epoch": 2.162786964199711, "grad_norm": 0.6850721836090088, "learning_rate": 5.82132926198923e-05, "loss": 0.353, "step": 6736 }, { "epoch": 2.1631080430245624, "grad_norm": 0.7700496315956116, "learning_rate": 5.818158224836987e-05, "loss": 0.4083, "step": 6737 }, { "epoch": 2.163429121849414, "grad_norm": 0.7721123695373535, "learning_rate": 5.814987697229471e-05, "loss": 0.382, "step": 6738 }, { "epoch": 2.1637502006742655, "grad_norm": 0.7945840954780579, "learning_rate": 5.8118176795530176e-05, "loss": 0.3534, "step": 6739 }, { "epoch": 2.164071279499117, "grad_norm": 0.5969064831733704, "learning_rate": 5.808648172193868e-05, "loss": 0.2527, "step": 6740 }, { "epoch": 2.1643923583239686, "grad_norm": 0.9726594686508179, "learning_rate": 5.805479175538229e-05, "loss": 0.3638, "step": 6741 }, { "epoch": 2.16471343714882, "grad_norm": 0.7358959913253784, "learning_rate": 5.802310689972233e-05, "loss": 0.3867, "step": 6742 }, { "epoch": 2.1650345159736717, "grad_norm": 0.7400618195533752, "learning_rate": 5.799142715881938e-05, "loss": 0.2788, "step": 6743 }, { "epoch": 2.1653555947985232, "grad_norm": 0.8120781779289246, "learning_rate": 5.795975253653364e-05, "loss": 0.3746, "step": 6744 }, { "epoch": 2.1656766736233743, "grad_norm": 0.7224815487861633, "learning_rate": 5.792808303672454e-05, "loss": 0.3336, "step": 6745 }, { "epoch": 2.165997752448226, "grad_norm": 0.7844706773757935, "learning_rate": 5.789641866325091e-05, "loss": 0.3321, "step": 6746 }, { "epoch": 2.1663188312730774, "grad_norm": 1.100580096244812, "learning_rate": 5.786475941997094e-05, "loss": 0.3908, "step": 6747 }, { "epoch": 2.166639910097929, "grad_norm": 0.7362777590751648, "learning_rate": 5.783310531074223e-05, "loss": 0.3151, "step": 6748 }, { "epoch": 2.1669609889227806, "grad_norm": 0.9936489462852478, "learning_rate": 5.780145633942173e-05, "loss": 0.401, "step": 6749 }, { "epoch": 2.167282067747632, "grad_norm": 0.7832114696502686, "learning_rate": 5.7769812509865773e-05, "loss": 0.3097, "step": 6750 }, { "epoch": 2.1676031465724837, "grad_norm": 0.7143785357475281, "learning_rate": 5.773817382593008e-05, "loss": 0.2509, "step": 6751 }, { "epoch": 2.167924225397335, "grad_norm": 0.9188450574874878, "learning_rate": 5.770654029146969e-05, "loss": 0.4234, "step": 6752 }, { "epoch": 2.1682453042221868, "grad_norm": 0.7946859002113342, "learning_rate": 5.7674911910339094e-05, "loss": 0.28, "step": 6753 }, { "epoch": 2.168566383047038, "grad_norm": 1.0123013257980347, "learning_rate": 5.764328868639208e-05, "loss": 0.3555, "step": 6754 }, { "epoch": 2.1688874618718894, "grad_norm": 1.0456223487854004, "learning_rate": 5.7611670623481864e-05, "loss": 0.28, "step": 6755 }, { "epoch": 2.169208540696741, "grad_norm": 0.765620231628418, "learning_rate": 5.758005772546097e-05, "loss": 0.3154, "step": 6756 }, { "epoch": 2.1695296195215925, "grad_norm": 0.8351200222969055, "learning_rate": 5.754844999618144e-05, "loss": 0.3056, "step": 6757 }, { "epoch": 2.169850698346444, "grad_norm": 0.7080745697021484, "learning_rate": 5.7516847439494435e-05, "loss": 0.2799, "step": 6758 }, { "epoch": 2.1701717771712956, "grad_norm": 0.7718917727470398, "learning_rate": 5.748525005925074e-05, "loss": 0.3316, "step": 6759 }, { "epoch": 2.170492855996147, "grad_norm": 0.6026411652565002, "learning_rate": 5.7453657859300415e-05, "loss": 0.2607, "step": 6760 }, { "epoch": 2.1708139348209987, "grad_norm": 1.0333638191223145, "learning_rate": 5.7422070843492734e-05, "loss": 0.2856, "step": 6761 }, { "epoch": 2.1711350136458503, "grad_norm": 0.8613486289978027, "learning_rate": 5.739048901567665e-05, "loss": 0.3556, "step": 6762 }, { "epoch": 2.1714560924707014, "grad_norm": 0.7801258563995361, "learning_rate": 5.735891237970015e-05, "loss": 0.3481, "step": 6763 }, { "epoch": 2.171777171295553, "grad_norm": 0.8479055762290955, "learning_rate": 5.732734093941087e-05, "loss": 0.3521, "step": 6764 }, { "epoch": 2.1720982501204045, "grad_norm": 0.8801478743553162, "learning_rate": 5.7295774698655655e-05, "loss": 0.3246, "step": 6765 }, { "epoch": 2.172419328945256, "grad_norm": 0.860466718673706, "learning_rate": 5.7264213661280765e-05, "loss": 0.3524, "step": 6766 }, { "epoch": 2.1727404077701076, "grad_norm": 0.7384558320045471, "learning_rate": 5.723265783113181e-05, "loss": 0.2897, "step": 6767 }, { "epoch": 2.173061486594959, "grad_norm": 0.9084320068359375, "learning_rate": 5.720110721205376e-05, "loss": 0.3288, "step": 6768 }, { "epoch": 2.1733825654198107, "grad_norm": 0.7406836152076721, "learning_rate": 5.716956180789098e-05, "loss": 0.3548, "step": 6769 }, { "epoch": 2.1737036442446622, "grad_norm": 0.8314934372901917, "learning_rate": 5.713802162248718e-05, "loss": 0.3294, "step": 6770 }, { "epoch": 2.1740247230695138, "grad_norm": 1.2621833086013794, "learning_rate": 5.710648665968543e-05, "loss": 0.4228, "step": 6771 }, { "epoch": 2.174345801894365, "grad_norm": 0.7521808743476868, "learning_rate": 5.707495692332816e-05, "loss": 0.2837, "step": 6772 }, { "epoch": 2.1746668807192164, "grad_norm": 1.019662857055664, "learning_rate": 5.704343241725719e-05, "loss": 0.3995, "step": 6773 }, { "epoch": 2.174987959544068, "grad_norm": 0.661185085773468, "learning_rate": 5.701191314531364e-05, "loss": 0.2683, "step": 6774 }, { "epoch": 2.1753090383689195, "grad_norm": 0.9004967212677002, "learning_rate": 5.6980399111338156e-05, "loss": 0.3288, "step": 6775 }, { "epoch": 2.175630117193771, "grad_norm": 0.5190272927284241, "learning_rate": 5.694889031917047e-05, "loss": 0.2803, "step": 6776 }, { "epoch": 2.1759511960186226, "grad_norm": 0.6035006642341614, "learning_rate": 5.691738677265e-05, "loss": 0.2716, "step": 6777 }, { "epoch": 2.176272274843474, "grad_norm": 0.888232409954071, "learning_rate": 5.6885888475615204e-05, "loss": 0.3035, "step": 6778 }, { "epoch": 2.1765933536683257, "grad_norm": 0.47930851578712463, "learning_rate": 5.6854395431904094e-05, "loss": 0.255, "step": 6779 }, { "epoch": 2.1769144324931773, "grad_norm": 0.3152773678302765, "learning_rate": 5.68229076453541e-05, "loss": 0.3738, "step": 6780 }, { "epoch": 2.1772355113180284, "grad_norm": 0.5532098412513733, "learning_rate": 5.679142511980175e-05, "loss": 0.7954, "step": 6781 }, { "epoch": 2.17755659014288, "grad_norm": 0.48920655250549316, "learning_rate": 5.675994785908329e-05, "loss": 0.4281, "step": 6782 }, { "epoch": 2.1778776689677315, "grad_norm": 0.5712006688117981, "learning_rate": 5.6728475867033925e-05, "loss": 0.255, "step": 6783 }, { "epoch": 2.178198747792583, "grad_norm": 0.6813241839408875, "learning_rate": 5.669700914748857e-05, "loss": 0.1684, "step": 6784 }, { "epoch": 2.1785198266174346, "grad_norm": 0.3276176452636719, "learning_rate": 5.666554770428129e-05, "loss": 0.0962, "step": 6785 }, { "epoch": 2.178840905442286, "grad_norm": 0.8344230651855469, "learning_rate": 5.663409154124557e-05, "loss": 0.3953, "step": 6786 }, { "epoch": 2.1791619842671377, "grad_norm": 0.9453107714653015, "learning_rate": 5.6602640662214256e-05, "loss": 0.4333, "step": 6787 }, { "epoch": 2.1794830630919892, "grad_norm": 0.737591564655304, "learning_rate": 5.657119507101954e-05, "loss": 0.3033, "step": 6788 }, { "epoch": 2.179804141916841, "grad_norm": 0.7458065748214722, "learning_rate": 5.653975477149298e-05, "loss": 0.2871, "step": 6789 }, { "epoch": 2.180125220741692, "grad_norm": 0.9435691833496094, "learning_rate": 5.6508319767465465e-05, "loss": 0.4635, "step": 6790 }, { "epoch": 2.1804462995665435, "grad_norm": 0.611143946647644, "learning_rate": 5.647689006276726e-05, "loss": 0.2361, "step": 6791 }, { "epoch": 2.180767378391395, "grad_norm": 0.6122483611106873, "learning_rate": 5.6445465661227994e-05, "loss": 0.3083, "step": 6792 }, { "epoch": 2.1810884572162466, "grad_norm": 0.9717581272125244, "learning_rate": 5.641404656667661e-05, "loss": 0.4157, "step": 6793 }, { "epoch": 2.181409536041098, "grad_norm": 0.8596290349960327, "learning_rate": 5.6382632782941405e-05, "loss": 0.3547, "step": 6794 }, { "epoch": 2.1817306148659497, "grad_norm": 0.9642032384872437, "learning_rate": 5.635122431385016e-05, "loss": 0.3883, "step": 6795 }, { "epoch": 2.182051693690801, "grad_norm": 0.8566706776618958, "learning_rate": 5.63198211632298e-05, "loss": 0.395, "step": 6796 }, { "epoch": 2.1823727725156528, "grad_norm": 0.8098425269126892, "learning_rate": 5.6288423334906735e-05, "loss": 0.3301, "step": 6797 }, { "epoch": 2.1826938513405043, "grad_norm": 0.8384745121002197, "learning_rate": 5.6257030832706695e-05, "loss": 0.3198, "step": 6798 }, { "epoch": 2.1830149301653554, "grad_norm": 0.8153161406517029, "learning_rate": 5.622564366045472e-05, "loss": 0.3101, "step": 6799 }, { "epoch": 2.183336008990207, "grad_norm": 0.8854767084121704, "learning_rate": 5.619426182197536e-05, "loss": 0.3009, "step": 6800 }, { "epoch": 2.1836570878150585, "grad_norm": 0.9710304737091064, "learning_rate": 5.616288532109225e-05, "loss": 0.3954, "step": 6801 }, { "epoch": 2.18397816663991, "grad_norm": 0.9379624128341675, "learning_rate": 5.6131514161628626e-05, "loss": 0.3856, "step": 6802 }, { "epoch": 2.1842992454647616, "grad_norm": 1.2100353240966797, "learning_rate": 5.610014834740693e-05, "loss": 0.5041, "step": 6803 }, { "epoch": 2.184620324289613, "grad_norm": 0.8026917576789856, "learning_rate": 5.6068787882249005e-05, "loss": 0.28, "step": 6804 }, { "epoch": 2.1849414031144647, "grad_norm": 0.776182234287262, "learning_rate": 5.6037432769976064e-05, "loss": 0.2885, "step": 6805 }, { "epoch": 2.1852624819393163, "grad_norm": 0.9188643097877502, "learning_rate": 5.6006083014408484e-05, "loss": 0.4413, "step": 6806 }, { "epoch": 2.185583560764168, "grad_norm": 0.9212349057197571, "learning_rate": 5.5974738619366295e-05, "loss": 0.4209, "step": 6807 }, { "epoch": 2.185904639589019, "grad_norm": 1.0901522636413574, "learning_rate": 5.594339958866867e-05, "loss": 0.3755, "step": 6808 }, { "epoch": 2.1862257184138705, "grad_norm": 1.0959854125976562, "learning_rate": 5.591206592613416e-05, "loss": 0.3334, "step": 6809 }, { "epoch": 2.186546797238722, "grad_norm": 0.8223209381103516, "learning_rate": 5.588073763558068e-05, "loss": 0.3331, "step": 6810 }, { "epoch": 2.1868678760635736, "grad_norm": 0.9825518131256104, "learning_rate": 5.584941472082549e-05, "loss": 0.3669, "step": 6811 }, { "epoch": 2.187188954888425, "grad_norm": 0.7833166122436523, "learning_rate": 5.5818097185685206e-05, "loss": 0.3309, "step": 6812 }, { "epoch": 2.1875100337132767, "grad_norm": 0.5879184603691101, "learning_rate": 5.578678503397574e-05, "loss": 0.2564, "step": 6813 }, { "epoch": 2.1878311125381282, "grad_norm": 0.9265559315681458, "learning_rate": 5.575547826951242e-05, "loss": 0.3454, "step": 6814 }, { "epoch": 2.18815219136298, "grad_norm": 0.9046943187713623, "learning_rate": 5.572417689610987e-05, "loss": 0.3133, "step": 6815 }, { "epoch": 2.1884732701878313, "grad_norm": 1.2076610326766968, "learning_rate": 5.5692880917582046e-05, "loss": 0.4036, "step": 6816 }, { "epoch": 2.1887943490126824, "grad_norm": 0.8415603637695312, "learning_rate": 5.566159033774225e-05, "loss": 0.3282, "step": 6817 }, { "epoch": 2.189115427837534, "grad_norm": 0.725054144859314, "learning_rate": 5.5630305160403275e-05, "loss": 0.2602, "step": 6818 }, { "epoch": 2.1894365066623855, "grad_norm": 0.7742078900337219, "learning_rate": 5.5599025389376935e-05, "loss": 0.3131, "step": 6819 }, { "epoch": 2.189757585487237, "grad_norm": 0.7798224091529846, "learning_rate": 5.556775102847475e-05, "loss": 0.3724, "step": 6820 }, { "epoch": 2.1900786643120886, "grad_norm": 0.964368462562561, "learning_rate": 5.553648208150728e-05, "loss": 0.3715, "step": 6821 }, { "epoch": 2.19039974313694, "grad_norm": 1.1923866271972656, "learning_rate": 5.5505218552284565e-05, "loss": 0.3996, "step": 6822 }, { "epoch": 2.1907208219617917, "grad_norm": 0.852208137512207, "learning_rate": 5.547396044461608e-05, "loss": 0.3929, "step": 6823 }, { "epoch": 2.1910419007866433, "grad_norm": 0.5591673254966736, "learning_rate": 5.544270776231038e-05, "loss": 0.2536, "step": 6824 }, { "epoch": 2.191362979611495, "grad_norm": 0.6292114853858948, "learning_rate": 5.541146050917561e-05, "loss": 0.3284, "step": 6825 }, { "epoch": 2.191684058436346, "grad_norm": 0.8230541944503784, "learning_rate": 5.5380218689019125e-05, "loss": 0.3711, "step": 6826 }, { "epoch": 2.1920051372611975, "grad_norm": 0.500881016254425, "learning_rate": 5.5348982305647643e-05, "loss": 0.2666, "step": 6827 }, { "epoch": 2.192326216086049, "grad_norm": 0.8684194087982178, "learning_rate": 5.5317751362867234e-05, "loss": 0.3415, "step": 6828 }, { "epoch": 2.1926472949109006, "grad_norm": 1.012944221496582, "learning_rate": 5.5286525864483285e-05, "loss": 0.3696, "step": 6829 }, { "epoch": 2.192968373735752, "grad_norm": 0.5349951386451721, "learning_rate": 5.525530581430054e-05, "loss": 0.6524, "step": 6830 }, { "epoch": 2.1932894525606037, "grad_norm": 0.5182074308395386, "learning_rate": 5.522409121612304e-05, "loss": 0.5625, "step": 6831 }, { "epoch": 2.1936105313854553, "grad_norm": 0.450907438993454, "learning_rate": 5.519288207375422e-05, "loss": 0.2453, "step": 6832 }, { "epoch": 2.193931610210307, "grad_norm": 0.4998016655445099, "learning_rate": 5.5161678390996796e-05, "loss": 0.1582, "step": 6833 }, { "epoch": 2.1942526890351584, "grad_norm": 0.7120688557624817, "learning_rate": 5.513048017165284e-05, "loss": 0.2156, "step": 6834 }, { "epoch": 2.1945737678600095, "grad_norm": 0.4577905237674713, "learning_rate": 5.509928741952379e-05, "loss": 0.203, "step": 6835 }, { "epoch": 2.194894846684861, "grad_norm": 0.744748055934906, "learning_rate": 5.506810013841036e-05, "loss": 0.377, "step": 6836 }, { "epoch": 2.1952159255097126, "grad_norm": 1.0785094499588013, "learning_rate": 5.50369183321126e-05, "loss": 0.4875, "step": 6837 }, { "epoch": 2.195537004334564, "grad_norm": 0.8972413539886475, "learning_rate": 5.5005742004430025e-05, "loss": 0.398, "step": 6838 }, { "epoch": 2.1958580831594157, "grad_norm": 0.7619357705116272, "learning_rate": 5.497457115916127e-05, "loss": 0.3321, "step": 6839 }, { "epoch": 2.196179161984267, "grad_norm": 0.711033046245575, "learning_rate": 5.494340580010441e-05, "loss": 0.3118, "step": 6840 }, { "epoch": 2.1965002408091188, "grad_norm": 0.715796172618866, "learning_rate": 5.491224593105695e-05, "loss": 0.3553, "step": 6841 }, { "epoch": 2.1968213196339703, "grad_norm": 0.6636841297149658, "learning_rate": 5.488109155581549e-05, "loss": 0.2737, "step": 6842 }, { "epoch": 2.197142398458822, "grad_norm": 0.918535590171814, "learning_rate": 5.484994267817624e-05, "loss": 0.284, "step": 6843 }, { "epoch": 2.197463477283673, "grad_norm": 0.7877380847930908, "learning_rate": 5.481879930193443e-05, "loss": 0.3617, "step": 6844 }, { "epoch": 2.1977845561085245, "grad_norm": 0.9507259130477905, "learning_rate": 5.478766143088492e-05, "loss": 0.3557, "step": 6845 }, { "epoch": 2.198105634933376, "grad_norm": 0.8569693565368652, "learning_rate": 5.475652906882173e-05, "loss": 0.3243, "step": 6846 }, { "epoch": 2.1984267137582276, "grad_norm": 0.8883730173110962, "learning_rate": 5.4725402219538236e-05, "loss": 0.3438, "step": 6847 }, { "epoch": 2.198747792583079, "grad_norm": 0.8322269320487976, "learning_rate": 5.469428088682717e-05, "loss": 0.3581, "step": 6848 }, { "epoch": 2.1990688714079307, "grad_norm": 0.8799639344215393, "learning_rate": 5.466316507448048e-05, "loss": 0.3936, "step": 6849 }, { "epoch": 2.1993899502327823, "grad_norm": 1.0511608123779297, "learning_rate": 5.4632054786289656e-05, "loss": 0.4143, "step": 6850 }, { "epoch": 2.199711029057634, "grad_norm": 0.8037779927253723, "learning_rate": 5.4600950026045326e-05, "loss": 0.3097, "step": 6851 }, { "epoch": 2.2000321078824854, "grad_norm": 0.8404053449630737, "learning_rate": 5.4569850797537536e-05, "loss": 0.3181, "step": 6852 }, { "epoch": 2.2003531867073365, "grad_norm": 1.0227787494659424, "learning_rate": 5.4538757104555615e-05, "loss": 0.3921, "step": 6853 }, { "epoch": 2.200674265532188, "grad_norm": 0.5781304240226746, "learning_rate": 5.4507668950888245e-05, "loss": 0.2198, "step": 6854 }, { "epoch": 2.2009953443570396, "grad_norm": 0.811543881893158, "learning_rate": 5.447658634032338e-05, "loss": 0.3287, "step": 6855 }, { "epoch": 2.201316423181891, "grad_norm": 0.7216514945030212, "learning_rate": 5.4445509276648466e-05, "loss": 0.3109, "step": 6856 }, { "epoch": 2.2016375020067427, "grad_norm": 0.7720044851303101, "learning_rate": 5.441443776365003e-05, "loss": 0.3122, "step": 6857 }, { "epoch": 2.2019585808315942, "grad_norm": 0.8112689852714539, "learning_rate": 5.438337180511406e-05, "loss": 0.3681, "step": 6858 }, { "epoch": 2.202279659656446, "grad_norm": 0.9748257398605347, "learning_rate": 5.435231140482587e-05, "loss": 0.4623, "step": 6859 }, { "epoch": 2.2026007384812973, "grad_norm": 0.9538961052894592, "learning_rate": 5.4321256566570036e-05, "loss": 0.4044, "step": 6860 }, { "epoch": 2.202921817306149, "grad_norm": 0.7297887206077576, "learning_rate": 5.4290207294130615e-05, "loss": 0.2882, "step": 6861 }, { "epoch": 2.203242896131, "grad_norm": 0.8942319750785828, "learning_rate": 5.42591635912907e-05, "loss": 0.3243, "step": 6862 }, { "epoch": 2.2035639749558515, "grad_norm": 0.6959256529808044, "learning_rate": 5.4228125461833024e-05, "loss": 0.2407, "step": 6863 }, { "epoch": 2.203885053780703, "grad_norm": 0.8571872115135193, "learning_rate": 5.419709290953936e-05, "loss": 0.3134, "step": 6864 }, { "epoch": 2.2042061326055546, "grad_norm": 0.8868092894554138, "learning_rate": 5.416606593819101e-05, "loss": 0.3696, "step": 6865 }, { "epoch": 2.204527211430406, "grad_norm": 1.1420269012451172, "learning_rate": 5.4135044551568546e-05, "loss": 0.338, "step": 6866 }, { "epoch": 2.2048482902552577, "grad_norm": 0.7926510572433472, "learning_rate": 5.4104028753451696e-05, "loss": 0.3127, "step": 6867 }, { "epoch": 2.2051693690801093, "grad_norm": 0.7856837511062622, "learning_rate": 5.407301854761977e-05, "loss": 0.2912, "step": 6868 }, { "epoch": 2.205490447904961, "grad_norm": 0.9291871786117554, "learning_rate": 5.404201393785122e-05, "loss": 0.3568, "step": 6869 }, { "epoch": 2.2058115267298124, "grad_norm": 0.7121933102607727, "learning_rate": 5.401101492792386e-05, "loss": 0.3451, "step": 6870 }, { "epoch": 2.2061326055546635, "grad_norm": 0.6835899949073792, "learning_rate": 5.398002152161484e-05, "loss": 0.284, "step": 6871 }, { "epoch": 2.206453684379515, "grad_norm": 0.9216342568397522, "learning_rate": 5.394903372270062e-05, "loss": 0.357, "step": 6872 }, { "epoch": 2.2067747632043666, "grad_norm": 0.6000509262084961, "learning_rate": 5.3918051534956926e-05, "loss": 0.2943, "step": 6873 }, { "epoch": 2.207095842029218, "grad_norm": 1.0322999954223633, "learning_rate": 5.388707496215888e-05, "loss": 0.3784, "step": 6874 }, { "epoch": 2.2074169208540697, "grad_norm": 0.9377049803733826, "learning_rate": 5.3856104008080876e-05, "loss": 0.3036, "step": 6875 }, { "epoch": 2.2077379996789213, "grad_norm": 0.6841163039207458, "learning_rate": 5.382513867649663e-05, "loss": 0.2938, "step": 6876 }, { "epoch": 2.208059078503773, "grad_norm": 0.9732836484909058, "learning_rate": 5.3794178971179165e-05, "loss": 0.377, "step": 6877 }, { "epoch": 2.2083801573286244, "grad_norm": 0.5027754306793213, "learning_rate": 5.3763224895900846e-05, "loss": 0.2797, "step": 6878 }, { "epoch": 2.208701236153476, "grad_norm": 0.867052435874939, "learning_rate": 5.373227645443332e-05, "loss": 0.3347, "step": 6879 }, { "epoch": 2.209022314978327, "grad_norm": 0.4138392210006714, "learning_rate": 5.3701333650547525e-05, "loss": 0.4415, "step": 6880 }, { "epoch": 2.2093433938031786, "grad_norm": 0.4743240475654602, "learning_rate": 5.3670396488013854e-05, "loss": 0.4268, "step": 6881 }, { "epoch": 2.20966447262803, "grad_norm": 0.6370947957038879, "learning_rate": 5.3639464970601775e-05, "loss": 0.5011, "step": 6882 }, { "epoch": 2.2099855514528817, "grad_norm": 0.6120859980583191, "learning_rate": 5.360853910208028e-05, "loss": 0.4556, "step": 6883 }, { "epoch": 2.210306630277733, "grad_norm": 0.5497114062309265, "learning_rate": 5.357761888621764e-05, "loss": 0.1802, "step": 6884 }, { "epoch": 2.2106277091025848, "grad_norm": 0.7469709515571594, "learning_rate": 5.3546704326781236e-05, "loss": 0.1502, "step": 6885 }, { "epoch": 2.2109487879274363, "grad_norm": 0.6369648575782776, "learning_rate": 5.351579542753807e-05, "loss": 0.2719, "step": 6886 }, { "epoch": 2.211269866752288, "grad_norm": 0.8421298861503601, "learning_rate": 5.348489219225416e-05, "loss": 0.4426, "step": 6887 }, { "epoch": 2.2115909455771394, "grad_norm": 0.9989409446716309, "learning_rate": 5.345399462469509e-05, "loss": 0.4848, "step": 6888 }, { "epoch": 2.2119120244019905, "grad_norm": 0.9956933259963989, "learning_rate": 5.3423102728625574e-05, "loss": 0.4123, "step": 6889 }, { "epoch": 2.212233103226842, "grad_norm": 0.6040937900543213, "learning_rate": 5.3392216507809714e-05, "loss": 0.2475, "step": 6890 }, { "epoch": 2.2125541820516936, "grad_norm": 0.8480896353721619, "learning_rate": 5.33613359660109e-05, "loss": 0.3257, "step": 6891 }, { "epoch": 2.212875260876545, "grad_norm": 0.824813961982727, "learning_rate": 5.33304611069918e-05, "loss": 0.3541, "step": 6892 }, { "epoch": 2.2131963397013967, "grad_norm": 1.2338895797729492, "learning_rate": 5.329959193451448e-05, "loss": 0.347, "step": 6893 }, { "epoch": 2.2135174185262483, "grad_norm": 0.9513993263244629, "learning_rate": 5.326872845234021e-05, "loss": 0.3895, "step": 6894 }, { "epoch": 2.2138384973511, "grad_norm": 0.9307944774627686, "learning_rate": 5.3237870664229636e-05, "loss": 0.4291, "step": 6895 }, { "epoch": 2.2141595761759514, "grad_norm": 0.8363532423973083, "learning_rate": 5.320701857394268e-05, "loss": 0.3269, "step": 6896 }, { "epoch": 2.2144806550008025, "grad_norm": 0.873292863368988, "learning_rate": 5.317617218523856e-05, "loss": 0.4286, "step": 6897 }, { "epoch": 2.214801733825654, "grad_norm": 0.7528856992721558, "learning_rate": 5.3145331501875796e-05, "loss": 0.3506, "step": 6898 }, { "epoch": 2.2151228126505056, "grad_norm": 0.7959845662117004, "learning_rate": 5.311449652761235e-05, "loss": 0.3377, "step": 6899 }, { "epoch": 2.215443891475357, "grad_norm": 0.954590916633606, "learning_rate": 5.3083667266205194e-05, "loss": 0.3659, "step": 6900 }, { "epoch": 2.2157649703002087, "grad_norm": 0.9657596945762634, "learning_rate": 5.305284372141095e-05, "loss": 0.437, "step": 6901 }, { "epoch": 2.2160860491250602, "grad_norm": 0.9250769019126892, "learning_rate": 5.302202589698525e-05, "loss": 0.3796, "step": 6902 }, { "epoch": 2.216407127949912, "grad_norm": 0.8130130171775818, "learning_rate": 5.299121379668316e-05, "loss": 0.286, "step": 6903 }, { "epoch": 2.2167282067747633, "grad_norm": 0.8332054018974304, "learning_rate": 5.296040742425916e-05, "loss": 0.3795, "step": 6904 }, { "epoch": 2.217049285599615, "grad_norm": 0.786918580532074, "learning_rate": 5.292960678346675e-05, "loss": 0.3355, "step": 6905 }, { "epoch": 2.217370364424466, "grad_norm": 1.060431718826294, "learning_rate": 5.2898811878059e-05, "loss": 0.4117, "step": 6906 }, { "epoch": 2.2176914432493176, "grad_norm": 0.7190358638763428, "learning_rate": 5.286802271178815e-05, "loss": 0.2718, "step": 6907 }, { "epoch": 2.218012522074169, "grad_norm": 1.1578835248947144, "learning_rate": 5.2837239288405784e-05, "loss": 0.4461, "step": 6908 }, { "epoch": 2.2183336008990207, "grad_norm": 0.8902946710586548, "learning_rate": 5.2806461611662735e-05, "loss": 0.3296, "step": 6909 }, { "epoch": 2.218654679723872, "grad_norm": 0.8117751479148865, "learning_rate": 5.277568968530919e-05, "loss": 0.3455, "step": 6910 }, { "epoch": 2.2189757585487238, "grad_norm": 0.6965182423591614, "learning_rate": 5.274492351309461e-05, "loss": 0.2972, "step": 6911 }, { "epoch": 2.2192968373735753, "grad_norm": 0.7272025942802429, "learning_rate": 5.271416309876776e-05, "loss": 0.2932, "step": 6912 }, { "epoch": 2.219617916198427, "grad_norm": 0.7494564056396484, "learning_rate": 5.26834084460767e-05, "loss": 0.2976, "step": 6913 }, { "epoch": 2.2199389950232784, "grad_norm": 0.7828695774078369, "learning_rate": 5.265265955876879e-05, "loss": 0.2948, "step": 6914 }, { "epoch": 2.2202600738481295, "grad_norm": 0.8334441781044006, "learning_rate": 5.2621916440590715e-05, "loss": 0.3339, "step": 6915 }, { "epoch": 2.220581152672981, "grad_norm": 0.6018253564834595, "learning_rate": 5.259117909528839e-05, "loss": 0.265, "step": 6916 }, { "epoch": 2.2209022314978326, "grad_norm": 1.2306244373321533, "learning_rate": 5.256044752660709e-05, "loss": 0.2778, "step": 6917 }, { "epoch": 2.221223310322684, "grad_norm": 1.4875001907348633, "learning_rate": 5.2529721738291315e-05, "loss": 0.3668, "step": 6918 }, { "epoch": 2.2215443891475357, "grad_norm": 1.1924291849136353, "learning_rate": 5.2499001734085044e-05, "loss": 0.4159, "step": 6919 }, { "epoch": 2.2218654679723873, "grad_norm": 0.8212143182754517, "learning_rate": 5.2468287517731276e-05, "loss": 0.3293, "step": 6920 }, { "epoch": 2.222186546797239, "grad_norm": 0.7851700782775879, "learning_rate": 5.243757909297247e-05, "loss": 0.3297, "step": 6921 }, { "epoch": 2.2225076256220904, "grad_norm": 0.7045104503631592, "learning_rate": 5.2406876463550445e-05, "loss": 0.2812, "step": 6922 }, { "epoch": 2.222828704446942, "grad_norm": 1.1284433603286743, "learning_rate": 5.237617963320608e-05, "loss": 0.3043, "step": 6923 }, { "epoch": 2.223149783271793, "grad_norm": 0.7409510016441345, "learning_rate": 5.234548860567985e-05, "loss": 0.3132, "step": 6924 }, { "epoch": 2.2234708620966446, "grad_norm": 1.1001427173614502, "learning_rate": 5.2314803384711195e-05, "loss": 0.3493, "step": 6925 }, { "epoch": 2.223791940921496, "grad_norm": 0.7392867803573608, "learning_rate": 5.2284123974039154e-05, "loss": 0.3222, "step": 6926 }, { "epoch": 2.2241130197463477, "grad_norm": 0.556638777256012, "learning_rate": 5.225345037740186e-05, "loss": 0.2963, "step": 6927 }, { "epoch": 2.2244340985711992, "grad_norm": 0.4246133863925934, "learning_rate": 5.222278259853681e-05, "loss": 0.2487, "step": 6928 }, { "epoch": 2.2247551773960508, "grad_norm": 1.3218295574188232, "learning_rate": 5.2192120641180786e-05, "loss": 0.2931, "step": 6929 }, { "epoch": 2.2250762562209023, "grad_norm": 0.4780152440071106, "learning_rate": 5.216146450906984e-05, "loss": 0.4964, "step": 6930 }, { "epoch": 2.225397335045754, "grad_norm": 0.41451188921928406, "learning_rate": 5.213081420593933e-05, "loss": 0.4104, "step": 6931 }, { "epoch": 2.2257184138706054, "grad_norm": 0.5102071762084961, "learning_rate": 5.210016973552391e-05, "loss": 0.2379, "step": 6932 }, { "epoch": 2.2260394926954565, "grad_norm": 0.32184290885925293, "learning_rate": 5.20695311015575e-05, "loss": 0.0819, "step": 6933 }, { "epoch": 2.226360571520308, "grad_norm": 0.5118618011474609, "learning_rate": 5.2038898307773354e-05, "loss": 0.1675, "step": 6934 }, { "epoch": 2.2266816503451596, "grad_norm": 0.561114490032196, "learning_rate": 5.200827135790396e-05, "loss": 0.2358, "step": 6935 }, { "epoch": 2.227002729170011, "grad_norm": 0.8075849413871765, "learning_rate": 5.197765025568109e-05, "loss": 0.4558, "step": 6936 }, { "epoch": 2.2273238079948627, "grad_norm": 0.7261122465133667, "learning_rate": 5.194703500483593e-05, "loss": 0.3476, "step": 6937 }, { "epoch": 2.2276448868197143, "grad_norm": 0.8632923364639282, "learning_rate": 5.1916425609098775e-05, "loss": 0.4246, "step": 6938 }, { "epoch": 2.227965965644566, "grad_norm": 0.8326036334037781, "learning_rate": 5.188582207219931e-05, "loss": 0.3473, "step": 6939 }, { "epoch": 2.2282870444694174, "grad_norm": 0.9028269648551941, "learning_rate": 5.1855224397866476e-05, "loss": 0.3969, "step": 6940 }, { "epoch": 2.228608123294269, "grad_norm": 0.8235176205635071, "learning_rate": 5.182463258982846e-05, "loss": 0.2969, "step": 6941 }, { "epoch": 2.22892920211912, "grad_norm": 0.8503168821334839, "learning_rate": 5.179404665181291e-05, "loss": 0.3176, "step": 6942 }, { "epoch": 2.2292502809439716, "grad_norm": 0.8072776794433594, "learning_rate": 5.1763466587546485e-05, "loss": 0.3188, "step": 6943 }, { "epoch": 2.229571359768823, "grad_norm": 0.9978686571121216, "learning_rate": 5.1732892400755376e-05, "loss": 0.3749, "step": 6944 }, { "epoch": 2.2298924385936747, "grad_norm": 0.9184989929199219, "learning_rate": 5.170232409516496e-05, "loss": 0.4544, "step": 6945 }, { "epoch": 2.2302135174185262, "grad_norm": 0.7581484913825989, "learning_rate": 5.1671761674499765e-05, "loss": 0.2865, "step": 6946 }, { "epoch": 2.230534596243378, "grad_norm": 0.6982542276382446, "learning_rate": 5.1641205142483894e-05, "loss": 0.285, "step": 6947 }, { "epoch": 2.2308556750682293, "grad_norm": 1.0385242700576782, "learning_rate": 5.16106545028404e-05, "loss": 0.3419, "step": 6948 }, { "epoch": 2.231176753893081, "grad_norm": 0.9583960175514221, "learning_rate": 5.158010975929193e-05, "loss": 0.3959, "step": 6949 }, { "epoch": 2.2314978327179325, "grad_norm": 1.119672417640686, "learning_rate": 5.1549570915560206e-05, "loss": 0.4012, "step": 6950 }, { "epoch": 2.2318189115427836, "grad_norm": 0.9875466823577881, "learning_rate": 5.15190379753663e-05, "loss": 0.4324, "step": 6951 }, { "epoch": 2.232139990367635, "grad_norm": 0.9298665523529053, "learning_rate": 5.148851094243057e-05, "loss": 0.4168, "step": 6952 }, { "epoch": 2.2324610691924867, "grad_norm": 0.7571067810058594, "learning_rate": 5.145798982047261e-05, "loss": 0.3063, "step": 6953 }, { "epoch": 2.232782148017338, "grad_norm": 0.9960950613021851, "learning_rate": 5.1427474613211356e-05, "loss": 0.4125, "step": 6954 }, { "epoch": 2.2331032268421898, "grad_norm": 0.9957188367843628, "learning_rate": 5.1396965324364986e-05, "loss": 0.3128, "step": 6955 }, { "epoch": 2.2334243056670413, "grad_norm": 0.9226759672164917, "learning_rate": 5.1366461957650954e-05, "loss": 0.3247, "step": 6956 }, { "epoch": 2.233745384491893, "grad_norm": 0.7779395580291748, "learning_rate": 5.133596451678603e-05, "loss": 0.297, "step": 6957 }, { "epoch": 2.2340664633167444, "grad_norm": 1.165313482284546, "learning_rate": 5.13054730054862e-05, "loss": 0.4306, "step": 6958 }, { "epoch": 2.234387542141596, "grad_norm": 0.7935650944709778, "learning_rate": 5.127498742746675e-05, "loss": 0.3009, "step": 6959 }, { "epoch": 2.234708620966447, "grad_norm": 1.200869083404541, "learning_rate": 5.1244507786442356e-05, "loss": 0.4304, "step": 6960 }, { "epoch": 2.2350296997912986, "grad_norm": 1.077374815940857, "learning_rate": 5.121403408612672e-05, "loss": 0.498, "step": 6961 }, { "epoch": 2.23535077861615, "grad_norm": 0.8770790100097656, "learning_rate": 5.1183566330233124e-05, "loss": 0.3659, "step": 6962 }, { "epoch": 2.2356718574410017, "grad_norm": 0.8858685493469238, "learning_rate": 5.115310452247386e-05, "loss": 0.3483, "step": 6963 }, { "epoch": 2.2359929362658533, "grad_norm": 0.9457120895385742, "learning_rate": 5.112264866656059e-05, "loss": 0.4015, "step": 6964 }, { "epoch": 2.236314015090705, "grad_norm": 0.7275859713554382, "learning_rate": 5.1092198766204415e-05, "loss": 0.3293, "step": 6965 }, { "epoch": 2.2366350939155564, "grad_norm": 0.8628625869750977, "learning_rate": 5.1061754825115374e-05, "loss": 0.3513, "step": 6966 }, { "epoch": 2.236956172740408, "grad_norm": 1.0785568952560425, "learning_rate": 5.103131684700314e-05, "loss": 0.3859, "step": 6967 }, { "epoch": 2.2372772515652595, "grad_norm": 1.0810168981552124, "learning_rate": 5.100088483557634e-05, "loss": 0.3165, "step": 6968 }, { "epoch": 2.2375983303901106, "grad_norm": 0.8149095177650452, "learning_rate": 5.097045879454313e-05, "loss": 0.3059, "step": 6969 }, { "epoch": 2.237919409214962, "grad_norm": 0.921907901763916, "learning_rate": 5.0940038727610796e-05, "loss": 0.279, "step": 6970 }, { "epoch": 2.2382404880398137, "grad_norm": 0.6431214213371277, "learning_rate": 5.090962463848592e-05, "loss": 0.3469, "step": 6971 }, { "epoch": 2.2385615668646652, "grad_norm": 0.8860642313957214, "learning_rate": 5.087921653087437e-05, "loss": 0.4101, "step": 6972 }, { "epoch": 2.238882645689517, "grad_norm": 0.930432140827179, "learning_rate": 5.0848814408481305e-05, "loss": 0.3629, "step": 6973 }, { "epoch": 2.2392037245143683, "grad_norm": 1.0363874435424805, "learning_rate": 5.0818418275011104e-05, "loss": 0.2949, "step": 6974 }, { "epoch": 2.23952480333922, "grad_norm": 1.0017728805541992, "learning_rate": 5.0788028134167456e-05, "loss": 0.37, "step": 6975 }, { "epoch": 2.2398458821640714, "grad_norm": 0.7278321385383606, "learning_rate": 5.07576439896533e-05, "loss": 0.2832, "step": 6976 }, { "epoch": 2.240166960988923, "grad_norm": 1.1001341342926025, "learning_rate": 5.072726584517086e-05, "loss": 0.3137, "step": 6977 }, { "epoch": 2.240488039813774, "grad_norm": 0.9127258062362671, "learning_rate": 5.069689370442161e-05, "loss": 0.2938, "step": 6978 }, { "epoch": 2.2408091186386256, "grad_norm": 0.6705423593521118, "learning_rate": 5.066652757110628e-05, "loss": 0.2919, "step": 6979 }, { "epoch": 2.241130197463477, "grad_norm": 0.6100792288780212, "learning_rate": 5.0636167448924987e-05, "loss": 0.8073, "step": 6980 }, { "epoch": 2.2414512762883287, "grad_norm": 0.6084210872650146, "learning_rate": 5.0605813341576924e-05, "loss": 0.8022, "step": 6981 }, { "epoch": 2.2417723551131803, "grad_norm": 0.42793241143226624, "learning_rate": 5.057546525276068e-05, "loss": 0.2486, "step": 6982 }, { "epoch": 2.242093433938032, "grad_norm": 0.3993290662765503, "learning_rate": 5.054512318617406e-05, "loss": 0.1494, "step": 6983 }, { "epoch": 2.2424145127628834, "grad_norm": 0.5702289938926697, "learning_rate": 5.051478714551414e-05, "loss": 0.3078, "step": 6984 }, { "epoch": 2.242735591587735, "grad_norm": 0.3860454559326172, "learning_rate": 5.048445713447738e-05, "loss": 0.0808, "step": 6985 }, { "epoch": 2.2430566704125865, "grad_norm": 0.7141852974891663, "learning_rate": 5.045413315675924e-05, "loss": 0.3344, "step": 6986 }, { "epoch": 2.2433777492374376, "grad_norm": 0.7017838954925537, "learning_rate": 5.0423815216054724e-05, "loss": 0.348, "step": 6987 }, { "epoch": 2.243698828062289, "grad_norm": 1.1939187049865723, "learning_rate": 5.039350331605794e-05, "loss": 0.4238, "step": 6988 }, { "epoch": 2.2440199068871407, "grad_norm": 0.8673609495162964, "learning_rate": 5.036319746046232e-05, "loss": 0.4174, "step": 6989 }, { "epoch": 2.2443409857119923, "grad_norm": 0.7690039873123169, "learning_rate": 5.033289765296054e-05, "loss": 0.3594, "step": 6990 }, { "epoch": 2.244662064536844, "grad_norm": 0.624330997467041, "learning_rate": 5.0302603897244474e-05, "loss": 0.2757, "step": 6991 }, { "epoch": 2.2449831433616954, "grad_norm": 0.8041682839393616, "learning_rate": 5.0272316197005396e-05, "loss": 0.347, "step": 6992 }, { "epoch": 2.245304222186547, "grad_norm": 0.6567179560661316, "learning_rate": 5.024203455593375e-05, "loss": 0.2858, "step": 6993 }, { "epoch": 2.2456253010113985, "grad_norm": 0.9437269568443298, "learning_rate": 5.021175897771927e-05, "loss": 0.3507, "step": 6994 }, { "epoch": 2.24594637983625, "grad_norm": 1.0030709505081177, "learning_rate": 5.018148946605092e-05, "loss": 0.4534, "step": 6995 }, { "epoch": 2.246267458661101, "grad_norm": 0.8728824853897095, "learning_rate": 5.015122602461698e-05, "loss": 0.3801, "step": 6996 }, { "epoch": 2.2465885374859527, "grad_norm": 0.9060454964637756, "learning_rate": 5.012096865710494e-05, "loss": 0.3869, "step": 6997 }, { "epoch": 2.246909616310804, "grad_norm": 0.8747976422309875, "learning_rate": 5.0090717367201554e-05, "loss": 0.3823, "step": 6998 }, { "epoch": 2.2472306951356558, "grad_norm": 0.9838657975196838, "learning_rate": 5.006047215859289e-05, "loss": 0.2901, "step": 6999 }, { "epoch": 2.2475517739605073, "grad_norm": 0.7466594576835632, "learning_rate": 5.003023303496419e-05, "loss": 0.3505, "step": 7000 }, { "epoch": 2.247872852785359, "grad_norm": 0.8229215145111084, "learning_rate": 5.000000000000002e-05, "loss": 0.3138, "step": 7001 }, { "epoch": 2.2481939316102104, "grad_norm": 0.7786136865615845, "learning_rate": 4.996977305738415e-05, "loss": 0.3044, "step": 7002 }, { "epoch": 2.248515010435062, "grad_norm": 0.6571848392486572, "learning_rate": 4.9939552210799755e-05, "loss": 0.2649, "step": 7003 }, { "epoch": 2.248836089259913, "grad_norm": 0.6646254062652588, "learning_rate": 4.990933746392899e-05, "loss": 0.3347, "step": 7004 }, { "epoch": 2.2491571680847646, "grad_norm": 0.8542558550834656, "learning_rate": 4.98791288204536e-05, "loss": 0.3518, "step": 7005 }, { "epoch": 2.249478246909616, "grad_norm": 0.726729154586792, "learning_rate": 4.9848926284054255e-05, "loss": 0.2939, "step": 7006 }, { "epoch": 2.2497993257344677, "grad_norm": 0.9049625396728516, "learning_rate": 4.981872985841115e-05, "loss": 0.3657, "step": 7007 }, { "epoch": 2.2501204045593193, "grad_norm": 0.9269988536834717, "learning_rate": 4.978853954720364e-05, "loss": 0.2894, "step": 7008 }, { "epoch": 2.250441483384171, "grad_norm": 1.2654590606689453, "learning_rate": 4.97583553541102e-05, "loss": 0.4061, "step": 7009 }, { "epoch": 2.2507625622090224, "grad_norm": 0.8163503408432007, "learning_rate": 4.97281772828088e-05, "loss": 0.336, "step": 7010 }, { "epoch": 2.251083641033874, "grad_norm": 1.1745022535324097, "learning_rate": 4.969800533697649e-05, "loss": 0.3993, "step": 7011 }, { "epoch": 2.2514047198587255, "grad_norm": 0.7915329933166504, "learning_rate": 4.966783952028967e-05, "loss": 0.3401, "step": 7012 }, { "epoch": 2.2517257986835766, "grad_norm": 0.9306576251983643, "learning_rate": 4.9637679836423924e-05, "loss": 0.3605, "step": 7013 }, { "epoch": 2.252046877508428, "grad_norm": 0.5942294597625732, "learning_rate": 4.960752628905412e-05, "loss": 0.2566, "step": 7014 }, { "epoch": 2.2523679563332797, "grad_norm": 0.743722140789032, "learning_rate": 4.957737888185439e-05, "loss": 0.2979, "step": 7015 }, { "epoch": 2.2526890351581312, "grad_norm": 0.7660803198814392, "learning_rate": 4.9547237618498085e-05, "loss": 0.2865, "step": 7016 }, { "epoch": 2.253010113982983, "grad_norm": 0.5819270014762878, "learning_rate": 4.9517102502657845e-05, "loss": 0.2663, "step": 7017 }, { "epoch": 2.2533311928078343, "grad_norm": 0.7460042834281921, "learning_rate": 4.9486973538005535e-05, "loss": 0.2898, "step": 7018 }, { "epoch": 2.253652271632686, "grad_norm": 0.8672288060188293, "learning_rate": 4.945685072821227e-05, "loss": 0.3627, "step": 7019 }, { "epoch": 2.2539733504575374, "grad_norm": 1.0746335983276367, "learning_rate": 4.9426734076948436e-05, "loss": 0.3425, "step": 7020 }, { "epoch": 2.254294429282389, "grad_norm": 0.6159694790840149, "learning_rate": 4.939662358788364e-05, "loss": 0.2663, "step": 7021 }, { "epoch": 2.25461550810724, "grad_norm": 0.8164811730384827, "learning_rate": 4.9366519264686725e-05, "loss": 0.251, "step": 7022 }, { "epoch": 2.2549365869320916, "grad_norm": 0.7953729033470154, "learning_rate": 4.933642111102594e-05, "loss": 0.3071, "step": 7023 }, { "epoch": 2.255257665756943, "grad_norm": 0.7565558552742004, "learning_rate": 4.9306329130568474e-05, "loss": 0.3028, "step": 7024 }, { "epoch": 2.2555787445817947, "grad_norm": 1.2023780345916748, "learning_rate": 4.927624332698109e-05, "loss": 0.4058, "step": 7025 }, { "epoch": 2.2558998234066463, "grad_norm": 0.4921356439590454, "learning_rate": 4.924616370392961e-05, "loss": 0.2675, "step": 7026 }, { "epoch": 2.256220902231498, "grad_norm": 0.8888304233551025, "learning_rate": 4.921609026507907e-05, "loss": 0.3284, "step": 7027 }, { "epoch": 2.2565419810563494, "grad_norm": 0.6142435669898987, "learning_rate": 4.918602301409395e-05, "loss": 0.2956, "step": 7028 }, { "epoch": 2.256863059881201, "grad_norm": 0.3907153308391571, "learning_rate": 4.915596195463773e-05, "loss": 0.2773, "step": 7029 }, { "epoch": 2.2571841387060525, "grad_norm": 0.5676328539848328, "learning_rate": 4.912590709037335e-05, "loss": 0.6601, "step": 7030 }, { "epoch": 2.2575052175309036, "grad_norm": 0.5613508224487305, "learning_rate": 4.909585842496287e-05, "loss": 0.4196, "step": 7031 }, { "epoch": 2.257826296355755, "grad_norm": 0.6825602054595947, "learning_rate": 4.906581596206764e-05, "loss": 0.4297, "step": 7032 }, { "epoch": 2.2581473751806067, "grad_norm": 0.6425372362136841, "learning_rate": 4.9035779705348226e-05, "loss": 0.1434, "step": 7033 }, { "epoch": 2.2584684540054583, "grad_norm": 0.3993998169898987, "learning_rate": 4.900574965846447e-05, "loss": 0.1526, "step": 7034 }, { "epoch": 2.25878953283031, "grad_norm": 0.9571073651313782, "learning_rate": 4.8975725825075435e-05, "loss": 0.4906, "step": 7035 }, { "epoch": 2.2591106116551614, "grad_norm": 0.9649335741996765, "learning_rate": 4.894570820883944e-05, "loss": 0.4216, "step": 7036 }, { "epoch": 2.259431690480013, "grad_norm": 0.8993098735809326, "learning_rate": 4.8915696813414026e-05, "loss": 0.4621, "step": 7037 }, { "epoch": 2.2597527693048645, "grad_norm": 0.8691626191139221, "learning_rate": 4.888569164245601e-05, "loss": 0.3543, "step": 7038 }, { "epoch": 2.260073848129716, "grad_norm": 1.0925235748291016, "learning_rate": 4.885569269962142e-05, "loss": 0.3965, "step": 7039 }, { "epoch": 2.260394926954567, "grad_norm": 0.9121451377868652, "learning_rate": 4.8825699988565485e-05, "loss": 0.3612, "step": 7040 }, { "epoch": 2.2607160057794187, "grad_norm": 0.8157550096511841, "learning_rate": 4.8795713512942865e-05, "loss": 0.3485, "step": 7041 }, { "epoch": 2.26103708460427, "grad_norm": 0.691774845123291, "learning_rate": 4.8765733276407156e-05, "loss": 0.285, "step": 7042 }, { "epoch": 2.2613581634291218, "grad_norm": 0.9489686489105225, "learning_rate": 4.8735759282611516e-05, "loss": 0.3201, "step": 7043 }, { "epoch": 2.2616792422539733, "grad_norm": 0.9636274576187134, "learning_rate": 4.870579153520807e-05, "loss": 0.3511, "step": 7044 }, { "epoch": 2.262000321078825, "grad_norm": 0.8838746547698975, "learning_rate": 4.867583003784829e-05, "loss": 0.3283, "step": 7045 }, { "epoch": 2.2623213999036764, "grad_norm": 0.8975582718849182, "learning_rate": 4.864587479418302e-05, "loss": 0.3509, "step": 7046 }, { "epoch": 2.262642478728528, "grad_norm": 0.8758841156959534, "learning_rate": 4.861592580786205e-05, "loss": 0.3312, "step": 7047 }, { "epoch": 2.2629635575533795, "grad_norm": 0.8865678310394287, "learning_rate": 4.858598308253473e-05, "loss": 0.4163, "step": 7048 }, { "epoch": 2.2632846363782306, "grad_norm": 0.7309367656707764, "learning_rate": 4.8556046621849346e-05, "loss": 0.3007, "step": 7049 }, { "epoch": 2.263605715203082, "grad_norm": 1.0383028984069824, "learning_rate": 4.852611642945368e-05, "loss": 0.4668, "step": 7050 }, { "epoch": 2.2639267940279337, "grad_norm": 0.8784850239753723, "learning_rate": 4.8496192508994576e-05, "loss": 0.2978, "step": 7051 }, { "epoch": 2.2642478728527853, "grad_norm": 0.8731421232223511, "learning_rate": 4.84662748641182e-05, "loss": 0.3559, "step": 7052 }, { "epoch": 2.264568951677637, "grad_norm": 0.8055264353752136, "learning_rate": 4.8436363498469906e-05, "loss": 0.333, "step": 7053 }, { "epoch": 2.2648900305024884, "grad_norm": 0.7497748732566833, "learning_rate": 4.840645841569431e-05, "loss": 0.2937, "step": 7054 }, { "epoch": 2.26521110932734, "grad_norm": 0.9812029600143433, "learning_rate": 4.837655961943526e-05, "loss": 0.4044, "step": 7055 }, { "epoch": 2.2655321881521915, "grad_norm": 0.9688860177993774, "learning_rate": 4.834666711333582e-05, "loss": 0.3708, "step": 7056 }, { "epoch": 2.265853266977043, "grad_norm": 0.8438839912414551, "learning_rate": 4.8316780901038314e-05, "loss": 0.2835, "step": 7057 }, { "epoch": 2.266174345801894, "grad_norm": 1.0307844877243042, "learning_rate": 4.828690098618429e-05, "loss": 0.3232, "step": 7058 }, { "epoch": 2.2664954246267457, "grad_norm": 0.6895515322685242, "learning_rate": 4.825702737241452e-05, "loss": 0.2816, "step": 7059 }, { "epoch": 2.2668165034515972, "grad_norm": 0.8338602185249329, "learning_rate": 4.822716006336897e-05, "loss": 0.3282, "step": 7060 }, { "epoch": 2.267137582276449, "grad_norm": 0.7110063433647156, "learning_rate": 4.8197299062686995e-05, "loss": 0.2868, "step": 7061 }, { "epoch": 2.2674586611013003, "grad_norm": 0.8078935146331787, "learning_rate": 4.816744437400697e-05, "loss": 0.3183, "step": 7062 }, { "epoch": 2.267779739926152, "grad_norm": 0.830915093421936, "learning_rate": 4.8137596000966614e-05, "loss": 0.3452, "step": 7063 }, { "epoch": 2.2681008187510034, "grad_norm": 1.4586313962936401, "learning_rate": 4.810775394720286e-05, "loss": 0.4637, "step": 7064 }, { "epoch": 2.268421897575855, "grad_norm": 0.9930241703987122, "learning_rate": 4.807791821635186e-05, "loss": 0.3232, "step": 7065 }, { "epoch": 2.2687429764007065, "grad_norm": 0.7299748063087463, "learning_rate": 4.8048088812049096e-05, "loss": 0.313, "step": 7066 }, { "epoch": 2.2690640552255577, "grad_norm": 0.6193024516105652, "learning_rate": 4.8018265737929044e-05, "loss": 0.285, "step": 7067 }, { "epoch": 2.269385134050409, "grad_norm": 0.773335874080658, "learning_rate": 4.798844899762568e-05, "loss": 0.3412, "step": 7068 }, { "epoch": 2.2697062128752608, "grad_norm": 0.9206930994987488, "learning_rate": 4.7958638594772064e-05, "loss": 0.3609, "step": 7069 }, { "epoch": 2.2700272917001123, "grad_norm": 1.3238471746444702, "learning_rate": 4.792883453300042e-05, "loss": 0.3777, "step": 7070 }, { "epoch": 2.270348370524964, "grad_norm": 0.8293914794921875, "learning_rate": 4.78990368159424e-05, "loss": 0.3592, "step": 7071 }, { "epoch": 2.2706694493498154, "grad_norm": 0.9413627982139587, "learning_rate": 4.786924544722864e-05, "loss": 0.377, "step": 7072 }, { "epoch": 2.270990528174667, "grad_norm": 0.9093666672706604, "learning_rate": 4.783946043048923e-05, "loss": 0.3278, "step": 7073 }, { "epoch": 2.2713116069995185, "grad_norm": 0.8311013579368591, "learning_rate": 4.780968176935333e-05, "loss": 0.2915, "step": 7074 }, { "epoch": 2.27163268582437, "grad_norm": 0.9607753157615662, "learning_rate": 4.7779909467449414e-05, "loss": 0.2904, "step": 7075 }, { "epoch": 2.271953764649221, "grad_norm": 0.5113980174064636, "learning_rate": 4.7750143528405126e-05, "loss": 0.2676, "step": 7076 }, { "epoch": 2.2722748434740727, "grad_norm": 0.7223218679428101, "learning_rate": 4.7720383955847345e-05, "loss": 0.291, "step": 7077 }, { "epoch": 2.2725959222989243, "grad_norm": 0.48104968667030334, "learning_rate": 4.769063075340222e-05, "loss": 0.2631, "step": 7078 }, { "epoch": 2.272917001123776, "grad_norm": 1.0848753452301025, "learning_rate": 4.766088392469506e-05, "loss": 0.3135, "step": 7079 }, { "epoch": 2.2732380799486274, "grad_norm": 0.40874695777893066, "learning_rate": 4.763114347335043e-05, "loss": 0.4864, "step": 7080 }, { "epoch": 2.273559158773479, "grad_norm": 0.5546656250953674, "learning_rate": 4.7601409402992106e-05, "loss": 0.3189, "step": 7081 }, { "epoch": 2.2738802375983305, "grad_norm": 0.678066611289978, "learning_rate": 4.757168171724311e-05, "loss": 0.3145, "step": 7082 }, { "epoch": 2.274201316423182, "grad_norm": 0.4192000925540924, "learning_rate": 4.7541960419725626e-05, "loss": 0.1832, "step": 7083 }, { "epoch": 2.2745223952480336, "grad_norm": 0.24414893984794617, "learning_rate": 4.7512245514061225e-05, "loss": 0.0799, "step": 7084 }, { "epoch": 2.2748434740728847, "grad_norm": 0.634087085723877, "learning_rate": 4.748253700387042e-05, "loss": 0.139, "step": 7085 }, { "epoch": 2.2751645528977362, "grad_norm": 0.9760504364967346, "learning_rate": 4.745283489277325e-05, "loss": 0.522, "step": 7086 }, { "epoch": 2.2754856317225878, "grad_norm": 0.9048032164573669, "learning_rate": 4.742313918438872e-05, "loss": 0.4195, "step": 7087 }, { "epoch": 2.2758067105474393, "grad_norm": 0.9374763369560242, "learning_rate": 4.739344988233516e-05, "loss": 0.3933, "step": 7088 }, { "epoch": 2.276127789372291, "grad_norm": 1.0608501434326172, "learning_rate": 4.736376699023023e-05, "loss": 0.4224, "step": 7089 }, { "epoch": 2.2764488681971424, "grad_norm": 0.7070604562759399, "learning_rate": 4.7334090511690554e-05, "loss": 0.3017, "step": 7090 }, { "epoch": 2.276769947021994, "grad_norm": 0.7293145060539246, "learning_rate": 4.7304420450332244e-05, "loss": 0.297, "step": 7091 }, { "epoch": 2.2770910258468455, "grad_norm": 0.7650532722473145, "learning_rate": 4.7274756809770446e-05, "loss": 0.3605, "step": 7092 }, { "epoch": 2.277412104671697, "grad_norm": 0.9350664019584656, "learning_rate": 4.724509959361961e-05, "loss": 0.3853, "step": 7093 }, { "epoch": 2.277733183496548, "grad_norm": 0.8287744522094727, "learning_rate": 4.721544880549337e-05, "loss": 0.3868, "step": 7094 }, { "epoch": 2.2780542623213997, "grad_norm": 0.7390232086181641, "learning_rate": 4.7185804449004565e-05, "loss": 0.3578, "step": 7095 }, { "epoch": 2.2783753411462513, "grad_norm": 0.8684871196746826, "learning_rate": 4.71561665277653e-05, "loss": 0.3543, "step": 7096 }, { "epoch": 2.278696419971103, "grad_norm": 0.6932856440544128, "learning_rate": 4.712653504538683e-05, "loss": 0.296, "step": 7097 }, { "epoch": 2.2790174987959544, "grad_norm": 1.051287055015564, "learning_rate": 4.70969100054797e-05, "loss": 0.5495, "step": 7098 }, { "epoch": 2.279338577620806, "grad_norm": 0.8018210530281067, "learning_rate": 4.706729141165361e-05, "loss": 0.2714, "step": 7099 }, { "epoch": 2.2796596564456575, "grad_norm": 1.3825582265853882, "learning_rate": 4.7037679267517495e-05, "loss": 0.3997, "step": 7100 }, { "epoch": 2.279980735270509, "grad_norm": 0.8683844804763794, "learning_rate": 4.700807357667952e-05, "loss": 0.3403, "step": 7101 }, { "epoch": 2.2803018140953606, "grad_norm": 0.9737560749053955, "learning_rate": 4.697847434274704e-05, "loss": 0.3464, "step": 7102 }, { "epoch": 2.2806228929202117, "grad_norm": 0.7259578704833984, "learning_rate": 4.694888156932658e-05, "loss": 0.3272, "step": 7103 }, { "epoch": 2.2809439717450632, "grad_norm": 0.7659843564033508, "learning_rate": 4.6919295260024054e-05, "loss": 0.32, "step": 7104 }, { "epoch": 2.281265050569915, "grad_norm": 1.1575708389282227, "learning_rate": 4.688971541844436e-05, "loss": 0.4335, "step": 7105 }, { "epoch": 2.2815861293947663, "grad_norm": 0.7869051098823547, "learning_rate": 4.68601420481917e-05, "loss": 0.3432, "step": 7106 }, { "epoch": 2.281907208219618, "grad_norm": 1.0924334526062012, "learning_rate": 4.6830575152869616e-05, "loss": 0.3652, "step": 7107 }, { "epoch": 2.2822282870444695, "grad_norm": 0.8119152188301086, "learning_rate": 4.6801014736080596e-05, "loss": 0.2975, "step": 7108 }, { "epoch": 2.282549365869321, "grad_norm": 0.9879711270332336, "learning_rate": 4.6771460801426635e-05, "loss": 0.3559, "step": 7109 }, { "epoch": 2.2828704446941726, "grad_norm": 0.7146173119544983, "learning_rate": 4.674191335250865e-05, "loss": 0.2943, "step": 7110 }, { "epoch": 2.283191523519024, "grad_norm": 0.7338638305664062, "learning_rate": 4.6712372392927e-05, "loss": 0.2994, "step": 7111 }, { "epoch": 2.283512602343875, "grad_norm": 1.170583963394165, "learning_rate": 4.668283792628114e-05, "loss": 0.3887, "step": 7112 }, { "epoch": 2.2838336811687268, "grad_norm": 0.9542192816734314, "learning_rate": 4.665330995616974e-05, "loss": 0.3925, "step": 7113 }, { "epoch": 2.2841547599935783, "grad_norm": 0.7801535129547119, "learning_rate": 4.6623788486190725e-05, "loss": 0.2912, "step": 7114 }, { "epoch": 2.28447583881843, "grad_norm": 1.0590436458587646, "learning_rate": 4.6594273519941154e-05, "loss": 0.3585, "step": 7115 }, { "epoch": 2.2847969176432814, "grad_norm": 0.7104535102844238, "learning_rate": 4.656476506101737e-05, "loss": 0.2778, "step": 7116 }, { "epoch": 2.285117996468133, "grad_norm": 0.7049524784088135, "learning_rate": 4.653526311301488e-05, "loss": 0.3042, "step": 7117 }, { "epoch": 2.2854390752929845, "grad_norm": 1.7429343461990356, "learning_rate": 4.65057676795284e-05, "loss": 0.3351, "step": 7118 }, { "epoch": 2.285760154117836, "grad_norm": 0.6940548419952393, "learning_rate": 4.647627876415186e-05, "loss": 0.2715, "step": 7119 }, { "epoch": 2.2860812329426876, "grad_norm": 0.7398602366447449, "learning_rate": 4.6446796370478394e-05, "loss": 0.3182, "step": 7120 }, { "epoch": 2.2864023117675387, "grad_norm": 1.2548072338104248, "learning_rate": 4.6417320502100316e-05, "loss": 0.3538, "step": 7121 }, { "epoch": 2.2867233905923903, "grad_norm": 0.6731551289558411, "learning_rate": 4.6387851162609275e-05, "loss": 0.3286, "step": 7122 }, { "epoch": 2.287044469417242, "grad_norm": 0.6933743357658386, "learning_rate": 4.6358388355595904e-05, "loss": 0.2885, "step": 7123 }, { "epoch": 2.2873655482420934, "grad_norm": 1.1013816595077515, "learning_rate": 4.632893208465021e-05, "loss": 0.4317, "step": 7124 }, { "epoch": 2.287686627066945, "grad_norm": 0.7992168664932251, "learning_rate": 4.629948235336133e-05, "loss": 0.317, "step": 7125 }, { "epoch": 2.2880077058917965, "grad_norm": 0.8445520401000977, "learning_rate": 4.6270039165317605e-05, "loss": 0.3382, "step": 7126 }, { "epoch": 2.288328784716648, "grad_norm": 0.7868288159370422, "learning_rate": 4.62406025241067e-05, "loss": 0.2714, "step": 7127 }, { "epoch": 2.2886498635414996, "grad_norm": 0.7132185697555542, "learning_rate": 4.621117243331523e-05, "loss": 0.3171, "step": 7128 }, { "epoch": 2.288970942366351, "grad_norm": 0.7570092082023621, "learning_rate": 4.6181748896529273e-05, "loss": 0.3292, "step": 7129 }, { "epoch": 2.2892920211912022, "grad_norm": 0.5305631160736084, "learning_rate": 4.615233191733398e-05, "loss": 0.6934, "step": 7130 }, { "epoch": 2.289613100016054, "grad_norm": 0.5653072595596313, "learning_rate": 4.612292149931369e-05, "loss": 0.7784, "step": 7131 }, { "epoch": 2.2899341788409053, "grad_norm": 0.5794984102249146, "learning_rate": 4.6093517646052034e-05, "loss": 0.4634, "step": 7132 }, { "epoch": 2.290255257665757, "grad_norm": 0.5841457843780518, "learning_rate": 4.6064120361131656e-05, "loss": 0.4593, "step": 7133 }, { "epoch": 2.2905763364906084, "grad_norm": 0.6066222786903381, "learning_rate": 4.603472964813466e-05, "loss": 0.1791, "step": 7134 }, { "epoch": 2.29089741531546, "grad_norm": 0.4143172800540924, "learning_rate": 4.600534551064215e-05, "loss": 0.0885, "step": 7135 }, { "epoch": 2.2912184941403115, "grad_norm": 0.424333393573761, "learning_rate": 4.59759679522345e-05, "loss": 0.1697, "step": 7136 }, { "epoch": 2.291539572965163, "grad_norm": 0.6681946516036987, "learning_rate": 4.5946596976491295e-05, "loss": 0.2762, "step": 7137 }, { "epoch": 2.2918606517900146, "grad_norm": 0.9873928427696228, "learning_rate": 4.591723258699127e-05, "loss": 0.3777, "step": 7138 }, { "epoch": 2.2921817306148657, "grad_norm": 0.8079444766044617, "learning_rate": 4.588787478731242e-05, "loss": 0.4295, "step": 7139 }, { "epoch": 2.2925028094397173, "grad_norm": 0.8375623822212219, "learning_rate": 4.5858523581031884e-05, "loss": 0.2995, "step": 7140 }, { "epoch": 2.292823888264569, "grad_norm": 1.0319302082061768, "learning_rate": 4.582917897172603e-05, "loss": 0.4127, "step": 7141 }, { "epoch": 2.2931449670894204, "grad_norm": 0.7338537573814392, "learning_rate": 4.579984096297038e-05, "loss": 0.3225, "step": 7142 }, { "epoch": 2.293466045914272, "grad_norm": 0.9782087206840515, "learning_rate": 4.577050955833973e-05, "loss": 0.3765, "step": 7143 }, { "epoch": 2.2937871247391235, "grad_norm": 0.8286393284797668, "learning_rate": 4.574118476140794e-05, "loss": 0.3714, "step": 7144 }, { "epoch": 2.294108203563975, "grad_norm": 0.7754277586936951, "learning_rate": 4.5711866575748276e-05, "loss": 0.3201, "step": 7145 }, { "epoch": 2.2944292823888266, "grad_norm": 0.8050145506858826, "learning_rate": 4.568255500493292e-05, "loss": 0.3388, "step": 7146 }, { "epoch": 2.294750361213678, "grad_norm": 0.9979050755500793, "learning_rate": 4.565325005253356e-05, "loss": 0.3467, "step": 7147 }, { "epoch": 2.2950714400385293, "grad_norm": 0.6032088398933411, "learning_rate": 4.5623951722120736e-05, "loss": 0.2315, "step": 7148 }, { "epoch": 2.295392518863381, "grad_norm": 0.8849102258682251, "learning_rate": 4.559466001726451e-05, "loss": 0.3303, "step": 7149 }, { "epoch": 2.2957135976882324, "grad_norm": 0.9479863047599792, "learning_rate": 4.5565374941533965e-05, "loss": 0.3925, "step": 7150 }, { "epoch": 2.296034676513084, "grad_norm": 0.8778762817382812, "learning_rate": 4.5536096498497295e-05, "loss": 0.3579, "step": 7151 }, { "epoch": 2.2963557553379355, "grad_norm": 1.0045291185379028, "learning_rate": 4.5506824691722126e-05, "loss": 0.3575, "step": 7152 }, { "epoch": 2.296676834162787, "grad_norm": 1.0932892560958862, "learning_rate": 4.5477559524774994e-05, "loss": 0.4418, "step": 7153 }, { "epoch": 2.2969979129876386, "grad_norm": 1.10843026638031, "learning_rate": 4.5448301001221895e-05, "loss": 0.5555, "step": 7154 }, { "epoch": 2.29731899181249, "grad_norm": 0.7526745200157166, "learning_rate": 4.541904912462784e-05, "loss": 0.3106, "step": 7155 }, { "epoch": 2.2976400706373417, "grad_norm": 0.8937078714370728, "learning_rate": 4.5389803898557106e-05, "loss": 0.3546, "step": 7156 }, { "epoch": 2.2979611494621928, "grad_norm": 0.660315752029419, "learning_rate": 4.5360565326573104e-05, "loss": 0.2998, "step": 7157 }, { "epoch": 2.2982822282870443, "grad_norm": 1.0045713186264038, "learning_rate": 4.5331333412238475e-05, "loss": 0.497, "step": 7158 }, { "epoch": 2.298603307111896, "grad_norm": 1.044492244720459, "learning_rate": 4.530210815911504e-05, "loss": 0.4079, "step": 7159 }, { "epoch": 2.2989243859367474, "grad_norm": 0.894120991230011, "learning_rate": 4.527288957076382e-05, "loss": 0.364, "step": 7160 }, { "epoch": 2.299245464761599, "grad_norm": 0.7585509419441223, "learning_rate": 4.524367765074499e-05, "loss": 0.2742, "step": 7161 }, { "epoch": 2.2995665435864505, "grad_norm": 1.1006792783737183, "learning_rate": 4.5214472402617944e-05, "loss": 0.4268, "step": 7162 }, { "epoch": 2.299887622411302, "grad_norm": 0.7505506873130798, "learning_rate": 4.518527382994127e-05, "loss": 0.3131, "step": 7163 }, { "epoch": 2.3002087012361536, "grad_norm": 0.645842432975769, "learning_rate": 4.515608193627265e-05, "loss": 0.2745, "step": 7164 }, { "epoch": 2.300529780061005, "grad_norm": 0.8791124820709229, "learning_rate": 4.512689672516918e-05, "loss": 0.4013, "step": 7165 }, { "epoch": 2.3008508588858563, "grad_norm": 0.6512730121612549, "learning_rate": 4.5097718200186814e-05, "loss": 0.2472, "step": 7166 }, { "epoch": 2.301171937710708, "grad_norm": 0.7656790018081665, "learning_rate": 4.506854636488103e-05, "loss": 0.3403, "step": 7167 }, { "epoch": 2.3014930165355594, "grad_norm": 0.6794445514678955, "learning_rate": 4.50393812228062e-05, "loss": 0.3161, "step": 7168 }, { "epoch": 2.301814095360411, "grad_norm": 0.6210803389549255, "learning_rate": 4.501022277751602e-05, "loss": 0.2855, "step": 7169 }, { "epoch": 2.3021351741852625, "grad_norm": 0.748984694480896, "learning_rate": 4.498107103256346e-05, "loss": 0.2983, "step": 7170 }, { "epoch": 2.302456253010114, "grad_norm": 0.7318181395530701, "learning_rate": 4.495192599150044e-05, "loss": 0.2941, "step": 7171 }, { "epoch": 2.3027773318349656, "grad_norm": 0.8461513519287109, "learning_rate": 4.4922787657878294e-05, "loss": 0.3321, "step": 7172 }, { "epoch": 2.303098410659817, "grad_norm": 0.4881146252155304, "learning_rate": 4.48936560352474e-05, "loss": 0.2739, "step": 7173 }, { "epoch": 2.3034194894846687, "grad_norm": 1.058592677116394, "learning_rate": 4.4864531127157374e-05, "loss": 0.381, "step": 7174 }, { "epoch": 2.30374056830952, "grad_norm": 0.6712564826011658, "learning_rate": 4.483541293715698e-05, "loss": 0.3067, "step": 7175 }, { "epoch": 2.3040616471343713, "grad_norm": 0.9052884578704834, "learning_rate": 4.480630146879419e-05, "loss": 0.2888, "step": 7176 }, { "epoch": 2.304382725959223, "grad_norm": 0.6388865113258362, "learning_rate": 4.4777196725616146e-05, "loss": 0.3019, "step": 7177 }, { "epoch": 2.3047038047840744, "grad_norm": 1.7161591053009033, "learning_rate": 4.474809871116916e-05, "loss": 0.3845, "step": 7178 }, { "epoch": 2.305024883608926, "grad_norm": 0.959423840045929, "learning_rate": 4.471900742899876e-05, "loss": 0.3427, "step": 7179 }, { "epoch": 2.3053459624337775, "grad_norm": 0.47565457224845886, "learning_rate": 4.4689922882649626e-05, "loss": 0.6239, "step": 7180 }, { "epoch": 2.305667041258629, "grad_norm": 0.5845574140548706, "learning_rate": 4.46608450756656e-05, "loss": 0.6796, "step": 7181 }, { "epoch": 2.3059881200834806, "grad_norm": 0.6896982192993164, "learning_rate": 4.463177401158975e-05, "loss": 0.3241, "step": 7182 }, { "epoch": 2.306309198908332, "grad_norm": 0.45037001371383667, "learning_rate": 4.460270969396429e-05, "loss": 0.1748, "step": 7183 }, { "epoch": 2.3066302777331833, "grad_norm": 0.4934408366680145, "learning_rate": 4.457365212633058e-05, "loss": 0.2459, "step": 7184 }, { "epoch": 2.306951356558035, "grad_norm": 0.6035980582237244, "learning_rate": 4.45446013122293e-05, "loss": 0.2141, "step": 7185 }, { "epoch": 2.3072724353828864, "grad_norm": 0.8812675476074219, "learning_rate": 4.451555725520009e-05, "loss": 0.4984, "step": 7186 }, { "epoch": 2.307593514207738, "grad_norm": 0.7593944072723389, "learning_rate": 4.44865199587819e-05, "loss": 0.3683, "step": 7187 }, { "epoch": 2.3079145930325895, "grad_norm": 0.6698897480964661, "learning_rate": 4.4457489426512947e-05, "loss": 0.2978, "step": 7188 }, { "epoch": 2.308235671857441, "grad_norm": 0.8218667507171631, "learning_rate": 4.4428465661930343e-05, "loss": 0.3963, "step": 7189 }, { "epoch": 2.3085567506822926, "grad_norm": 0.8323404788970947, "learning_rate": 4.43994486685707e-05, "loss": 0.3453, "step": 7190 }, { "epoch": 2.308877829507144, "grad_norm": 0.8024272322654724, "learning_rate": 4.437043844996952e-05, "loss": 0.2979, "step": 7191 }, { "epoch": 2.3091989083319957, "grad_norm": 1.1842060089111328, "learning_rate": 4.43414350096617e-05, "loss": 0.4108, "step": 7192 }, { "epoch": 2.309519987156847, "grad_norm": 0.9761362671852112, "learning_rate": 4.431243835118124e-05, "loss": 0.4397, "step": 7193 }, { "epoch": 2.3098410659816984, "grad_norm": 0.8918820023536682, "learning_rate": 4.428344847806116e-05, "loss": 0.3433, "step": 7194 }, { "epoch": 2.31016214480655, "grad_norm": 0.9746502041816711, "learning_rate": 4.425446539383393e-05, "loss": 0.404, "step": 7195 }, { "epoch": 2.3104832236314015, "grad_norm": 0.7089119553565979, "learning_rate": 4.4225489102030995e-05, "loss": 0.241, "step": 7196 }, { "epoch": 2.310804302456253, "grad_norm": 0.9124135971069336, "learning_rate": 4.419651960618302e-05, "loss": 0.3361, "step": 7197 }, { "epoch": 2.3111253812811046, "grad_norm": 0.95118647813797, "learning_rate": 4.4167556909819874e-05, "loss": 0.3814, "step": 7198 }, { "epoch": 2.311446460105956, "grad_norm": 1.066606044769287, "learning_rate": 4.413860101647055e-05, "loss": 0.3397, "step": 7199 }, { "epoch": 2.3117675389308077, "grad_norm": 0.8261109590530396, "learning_rate": 4.4109651929663256e-05, "loss": 0.2883, "step": 7200 }, { "epoch": 2.312088617755659, "grad_norm": 0.9831299185752869, "learning_rate": 4.4080709652925336e-05, "loss": 0.387, "step": 7201 }, { "epoch": 2.3124096965805103, "grad_norm": 1.2849937677383423, "learning_rate": 4.4051774189783315e-05, "loss": 0.4081, "step": 7202 }, { "epoch": 2.312730775405362, "grad_norm": 1.1613420248031616, "learning_rate": 4.4022845543762915e-05, "loss": 0.3917, "step": 7203 }, { "epoch": 2.3130518542302134, "grad_norm": 0.9455772638320923, "learning_rate": 4.399392371838897e-05, "loss": 0.334, "step": 7204 }, { "epoch": 2.313372933055065, "grad_norm": 0.8039372563362122, "learning_rate": 4.396500871718555e-05, "loss": 0.338, "step": 7205 }, { "epoch": 2.3136940118799165, "grad_norm": 0.7344679832458496, "learning_rate": 4.393610054367585e-05, "loss": 0.3172, "step": 7206 }, { "epoch": 2.314015090704768, "grad_norm": 0.6306678056716919, "learning_rate": 4.39071992013822e-05, "loss": 0.2823, "step": 7207 }, { "epoch": 2.3143361695296196, "grad_norm": 0.9511836171150208, "learning_rate": 4.387830469382624e-05, "loss": 0.4142, "step": 7208 }, { "epoch": 2.314657248354471, "grad_norm": 0.9060647487640381, "learning_rate": 4.3849417024528564e-05, "loss": 0.3342, "step": 7209 }, { "epoch": 2.3149783271793227, "grad_norm": 0.821698784828186, "learning_rate": 4.382053619700912e-05, "loss": 0.3357, "step": 7210 }, { "epoch": 2.315299406004174, "grad_norm": 0.7825961709022522, "learning_rate": 4.379166221478697e-05, "loss": 0.3329, "step": 7211 }, { "epoch": 2.3156204848290254, "grad_norm": 0.9795680046081543, "learning_rate": 4.3762795081380215e-05, "loss": 0.3623, "step": 7212 }, { "epoch": 2.315941563653877, "grad_norm": 0.7996529936790466, "learning_rate": 4.3733934800306366e-05, "loss": 0.2732, "step": 7213 }, { "epoch": 2.3162626424787285, "grad_norm": 0.6189697980880737, "learning_rate": 4.37050813750818e-05, "loss": 0.2654, "step": 7214 }, { "epoch": 2.31658372130358, "grad_norm": 0.7410408854484558, "learning_rate": 4.367623480922236e-05, "loss": 0.3059, "step": 7215 }, { "epoch": 2.3169048001284316, "grad_norm": 0.8010258078575134, "learning_rate": 4.364739510624286e-05, "loss": 0.3012, "step": 7216 }, { "epoch": 2.317225878953283, "grad_norm": 0.9137861728668213, "learning_rate": 4.361856226965733e-05, "loss": 0.391, "step": 7217 }, { "epoch": 2.3175469577781347, "grad_norm": 0.7650619745254517, "learning_rate": 4.3589736302978954e-05, "loss": 0.3483, "step": 7218 }, { "epoch": 2.3178680366029862, "grad_norm": 0.6533841490745544, "learning_rate": 4.356091720972011e-05, "loss": 0.2859, "step": 7219 }, { "epoch": 2.3181891154278373, "grad_norm": 0.5599982738494873, "learning_rate": 4.3532104993392306e-05, "loss": 0.2805, "step": 7220 }, { "epoch": 2.318510194252689, "grad_norm": 1.0042898654937744, "learning_rate": 4.350329965750621e-05, "loss": 0.4129, "step": 7221 }, { "epoch": 2.3188312730775404, "grad_norm": 0.5694493055343628, "learning_rate": 4.347450120557169e-05, "loss": 0.269, "step": 7222 }, { "epoch": 2.319152351902392, "grad_norm": 0.872643232345581, "learning_rate": 4.3445709641097745e-05, "loss": 0.3538, "step": 7223 }, { "epoch": 2.3194734307272435, "grad_norm": 0.4571438133716583, "learning_rate": 4.341692496759252e-05, "loss": 0.2497, "step": 7224 }, { "epoch": 2.319794509552095, "grad_norm": 0.6154420375823975, "learning_rate": 4.3388147188563325e-05, "loss": 0.2781, "step": 7225 }, { "epoch": 2.3201155883769466, "grad_norm": 0.6062678098678589, "learning_rate": 4.335937630751674e-05, "loss": 0.2748, "step": 7226 }, { "epoch": 2.320436667201798, "grad_norm": 0.8544726371765137, "learning_rate": 4.333061232795826e-05, "loss": 0.3392, "step": 7227 }, { "epoch": 2.3207577460266497, "grad_norm": 0.8965178728103638, "learning_rate": 4.3301855253392864e-05, "loss": 0.3342, "step": 7228 }, { "epoch": 2.321078824851501, "grad_norm": 0.7547941207885742, "learning_rate": 4.327310508732437e-05, "loss": 0.3099, "step": 7229 }, { "epoch": 2.3213999036763524, "grad_norm": 0.43631595373153687, "learning_rate": 4.324436183325593e-05, "loss": 0.5266, "step": 7230 }, { "epoch": 2.321720982501204, "grad_norm": 0.44915318489074707, "learning_rate": 4.32156254946899e-05, "loss": 0.586, "step": 7231 }, { "epoch": 2.3220420613260555, "grad_norm": 0.4304579198360443, "learning_rate": 4.3186896075127595e-05, "loss": 0.3149, "step": 7232 }, { "epoch": 2.322363140150907, "grad_norm": 0.5537540316581726, "learning_rate": 4.315817357806974e-05, "loss": 0.4514, "step": 7233 }, { "epoch": 2.3226842189757586, "grad_norm": 0.4857487380504608, "learning_rate": 4.3129458007015946e-05, "loss": 0.3249, "step": 7234 }, { "epoch": 2.32300529780061, "grad_norm": 0.44234228134155273, "learning_rate": 4.310074936546521e-05, "loss": 0.2186, "step": 7235 }, { "epoch": 2.3233263766254617, "grad_norm": 0.29130613803863525, "learning_rate": 4.307204765691558e-05, "loss": 0.0781, "step": 7236 }, { "epoch": 2.3236474554503133, "grad_norm": 0.6856487393379211, "learning_rate": 4.304335288486426e-05, "loss": 0.3749, "step": 7237 }, { "epoch": 2.3239685342751644, "grad_norm": 0.7507144212722778, "learning_rate": 4.301466505280762e-05, "loss": 0.3243, "step": 7238 }, { "epoch": 2.324289613100016, "grad_norm": 0.8678911328315735, "learning_rate": 4.29859841642412e-05, "loss": 0.4114, "step": 7239 }, { "epoch": 2.3246106919248675, "grad_norm": 0.8836848139762878, "learning_rate": 4.295731022265966e-05, "loss": 0.3738, "step": 7240 }, { "epoch": 2.324931770749719, "grad_norm": 0.7541016936302185, "learning_rate": 4.2928643231556844e-05, "loss": 0.3071, "step": 7241 }, { "epoch": 2.3252528495745706, "grad_norm": 0.8909046053886414, "learning_rate": 4.289998319442573e-05, "loss": 0.4172, "step": 7242 }, { "epoch": 2.325573928399422, "grad_norm": 0.733267605304718, "learning_rate": 4.287133011475847e-05, "loss": 0.3147, "step": 7243 }, { "epoch": 2.3258950072242737, "grad_norm": 0.8695718050003052, "learning_rate": 4.2842683996046327e-05, "loss": 0.3655, "step": 7244 }, { "epoch": 2.326216086049125, "grad_norm": 0.867275059223175, "learning_rate": 4.2814044841779745e-05, "loss": 0.4448, "step": 7245 }, { "epoch": 2.3265371648739768, "grad_norm": 0.9108153581619263, "learning_rate": 4.27854126554484e-05, "loss": 0.3652, "step": 7246 }, { "epoch": 2.326858243698828, "grad_norm": 0.5802766680717468, "learning_rate": 4.2756787440540936e-05, "loss": 0.2382, "step": 7247 }, { "epoch": 2.3271793225236794, "grad_norm": 0.9157735109329224, "learning_rate": 4.2728169200545286e-05, "loss": 0.4234, "step": 7248 }, { "epoch": 2.327500401348531, "grad_norm": 0.817433774471283, "learning_rate": 4.26995579389485e-05, "loss": 0.3119, "step": 7249 }, { "epoch": 2.3278214801733825, "grad_norm": 0.793776273727417, "learning_rate": 4.267095365923672e-05, "loss": 0.3468, "step": 7250 }, { "epoch": 2.328142558998234, "grad_norm": 0.7414565086364746, "learning_rate": 4.264235636489542e-05, "loss": 0.3007, "step": 7251 }, { "epoch": 2.3284636378230856, "grad_norm": 0.74152672290802, "learning_rate": 4.261376605940894e-05, "loss": 0.2868, "step": 7252 }, { "epoch": 2.328784716647937, "grad_norm": 0.9075784683227539, "learning_rate": 4.2585182746261035e-05, "loss": 0.3053, "step": 7253 }, { "epoch": 2.3291057954727887, "grad_norm": 0.8360973000526428, "learning_rate": 4.2556606428934443e-05, "loss": 0.3616, "step": 7254 }, { "epoch": 2.3294268742976403, "grad_norm": 1.0343592166900635, "learning_rate": 4.252803711091112e-05, "loss": 0.2661, "step": 7255 }, { "epoch": 2.3297479531224914, "grad_norm": 0.8651792407035828, "learning_rate": 4.249947479567218e-05, "loss": 0.398, "step": 7256 }, { "epoch": 2.330069031947343, "grad_norm": 0.8300990462303162, "learning_rate": 4.2470919486697744e-05, "loss": 0.3371, "step": 7257 }, { "epoch": 2.3303901107721945, "grad_norm": 0.7871789932250977, "learning_rate": 4.244237118746731e-05, "loss": 0.2825, "step": 7258 }, { "epoch": 2.330711189597046, "grad_norm": 0.9724371433258057, "learning_rate": 4.2413829901459344e-05, "loss": 0.2868, "step": 7259 }, { "epoch": 2.3310322684218976, "grad_norm": 1.072011113166809, "learning_rate": 4.238529563215153e-05, "loss": 0.3748, "step": 7260 }, { "epoch": 2.331353347246749, "grad_norm": 1.1171770095825195, "learning_rate": 4.235676838302068e-05, "loss": 0.5082, "step": 7261 }, { "epoch": 2.3316744260716007, "grad_norm": 0.8948763012886047, "learning_rate": 4.232824815754276e-05, "loss": 0.3269, "step": 7262 }, { "epoch": 2.3319955048964522, "grad_norm": 0.9017735123634338, "learning_rate": 4.229973495919286e-05, "loss": 0.3732, "step": 7263 }, { "epoch": 2.332316583721304, "grad_norm": 0.8788909316062927, "learning_rate": 4.227122879144523e-05, "loss": 0.3456, "step": 7264 }, { "epoch": 2.332637662546155, "grad_norm": 0.7098720669746399, "learning_rate": 4.224272965777326e-05, "loss": 0.2543, "step": 7265 }, { "epoch": 2.3329587413710065, "grad_norm": 1.32795250415802, "learning_rate": 4.221423756164948e-05, "loss": 0.3675, "step": 7266 }, { "epoch": 2.333279820195858, "grad_norm": 0.7488431930541992, "learning_rate": 4.2185752506545585e-05, "loss": 0.3516, "step": 7267 }, { "epoch": 2.3336008990207096, "grad_norm": 0.5389450788497925, "learning_rate": 4.215727449593233e-05, "loss": 0.2551, "step": 7268 }, { "epoch": 2.333921977845561, "grad_norm": 0.5667807459831238, "learning_rate": 4.212880353327979e-05, "loss": 0.2823, "step": 7269 }, { "epoch": 2.3342430566704127, "grad_norm": 0.6024496555328369, "learning_rate": 4.210033962205694e-05, "loss": 0.2439, "step": 7270 }, { "epoch": 2.334564135495264, "grad_norm": 0.6424733400344849, "learning_rate": 4.207188276573214e-05, "loss": 0.2782, "step": 7271 }, { "epoch": 2.3348852143201158, "grad_norm": 1.2001317739486694, "learning_rate": 4.204343296777265e-05, "loss": 0.3473, "step": 7272 }, { "epoch": 2.3352062931449673, "grad_norm": 0.7400069236755371, "learning_rate": 4.201499023164508e-05, "loss": 0.3178, "step": 7273 }, { "epoch": 2.3355273719698184, "grad_norm": 0.6566978693008423, "learning_rate": 4.1986554560815096e-05, "loss": 0.3215, "step": 7274 }, { "epoch": 2.33584845079467, "grad_norm": 0.6812121272087097, "learning_rate": 4.195812595874739e-05, "loss": 0.2494, "step": 7275 }, { "epoch": 2.3361695296195215, "grad_norm": 0.8875928521156311, "learning_rate": 4.1929704428906026e-05, "loss": 0.3022, "step": 7276 }, { "epoch": 2.336490608444373, "grad_norm": 0.6352400779724121, "learning_rate": 4.190128997475402e-05, "loss": 0.2617, "step": 7277 }, { "epoch": 2.3368116872692246, "grad_norm": 0.5926759243011475, "learning_rate": 4.1872882599753605e-05, "loss": 0.2664, "step": 7278 }, { "epoch": 2.337132766094076, "grad_norm": 0.41694337129592896, "learning_rate": 4.184448230736613e-05, "loss": 0.2739, "step": 7279 }, { "epoch": 2.3374538449189277, "grad_norm": 0.5080072283744812, "learning_rate": 4.181608910105207e-05, "loss": 0.6664, "step": 7280 }, { "epoch": 2.3377749237437793, "grad_norm": 0.468008428812027, "learning_rate": 4.1787702984271074e-05, "loss": 0.4515, "step": 7281 }, { "epoch": 2.338096002568631, "grad_norm": 0.5170953869819641, "learning_rate": 4.175932396048188e-05, "loss": 0.2252, "step": 7282 }, { "epoch": 2.338417081393482, "grad_norm": 0.46261003613471985, "learning_rate": 4.173095203314241e-05, "loss": 0.1327, "step": 7283 }, { "epoch": 2.3387381602183335, "grad_norm": 0.5765483975410461, "learning_rate": 4.170258720570968e-05, "loss": 0.2548, "step": 7284 }, { "epoch": 2.339059239043185, "grad_norm": 0.6683604121208191, "learning_rate": 4.167422948163986e-05, "loss": 0.2663, "step": 7285 }, { "epoch": 2.3393803178680366, "grad_norm": 0.32332003116607666, "learning_rate": 4.1645878864388266e-05, "loss": 0.1391, "step": 7286 }, { "epoch": 2.339701396692888, "grad_norm": 1.4612623453140259, "learning_rate": 4.161753535740932e-05, "loss": 0.3675, "step": 7287 }, { "epoch": 2.3400224755177397, "grad_norm": 0.8074957728385925, "learning_rate": 4.158919896415656e-05, "loss": 0.3862, "step": 7288 }, { "epoch": 2.3403435543425912, "grad_norm": 0.7950018048286438, "learning_rate": 4.15608696880828e-05, "loss": 0.3471, "step": 7289 }, { "epoch": 2.3406646331674428, "grad_norm": 0.8618911504745483, "learning_rate": 4.153254753263974e-05, "loss": 0.3643, "step": 7290 }, { "epoch": 2.3409857119922943, "grad_norm": 0.9194211959838867, "learning_rate": 4.150423250127845e-05, "loss": 0.4025, "step": 7291 }, { "epoch": 2.3413067908171454, "grad_norm": 0.7834305167198181, "learning_rate": 4.1475924597449024e-05, "loss": 0.3126, "step": 7292 }, { "epoch": 2.341627869641997, "grad_norm": 0.6242783665657043, "learning_rate": 4.144762382460059e-05, "loss": 0.2834, "step": 7293 }, { "epoch": 2.3419489484668485, "grad_norm": 0.8568835854530334, "learning_rate": 4.141933018618165e-05, "loss": 0.3792, "step": 7294 }, { "epoch": 2.3422700272917, "grad_norm": 1.036650538444519, "learning_rate": 4.1391043685639576e-05, "loss": 0.4148, "step": 7295 }, { "epoch": 2.3425911061165516, "grad_norm": 0.9023113250732422, "learning_rate": 4.1362764326421064e-05, "loss": 0.3962, "step": 7296 }, { "epoch": 2.342912184941403, "grad_norm": 0.9023537635803223, "learning_rate": 4.133449211197188e-05, "loss": 0.3818, "step": 7297 }, { "epoch": 2.3432332637662547, "grad_norm": 1.3327304124832153, "learning_rate": 4.130622704573685e-05, "loss": 0.2894, "step": 7298 }, { "epoch": 2.3435543425911063, "grad_norm": 0.8340547680854797, "learning_rate": 4.1277969131160045e-05, "loss": 0.3798, "step": 7299 }, { "epoch": 2.343875421415958, "grad_norm": 0.6969391107559204, "learning_rate": 4.1249718371684564e-05, "loss": 0.3505, "step": 7300 }, { "epoch": 2.344196500240809, "grad_norm": 0.7478737235069275, "learning_rate": 4.12214747707527e-05, "loss": 0.3221, "step": 7301 }, { "epoch": 2.3445175790656605, "grad_norm": 1.0204554796218872, "learning_rate": 4.1193238331805826e-05, "loss": 0.4387, "step": 7302 }, { "epoch": 2.344838657890512, "grad_norm": 1.032119870185852, "learning_rate": 4.11650090582845e-05, "loss": 0.3519, "step": 7303 }, { "epoch": 2.3451597367153636, "grad_norm": 0.8070648908615112, "learning_rate": 4.1136786953628334e-05, "loss": 0.3181, "step": 7304 }, { "epoch": 2.345480815540215, "grad_norm": 0.9976387023925781, "learning_rate": 4.110857202127615e-05, "loss": 0.2975, "step": 7305 }, { "epoch": 2.3458018943650667, "grad_norm": 0.7166529297828674, "learning_rate": 4.1080364264665774e-05, "loss": 0.2918, "step": 7306 }, { "epoch": 2.3461229731899182, "grad_norm": 1.119057297706604, "learning_rate": 4.1052163687234366e-05, "loss": 0.3691, "step": 7307 }, { "epoch": 2.34644405201477, "grad_norm": 0.8771776556968689, "learning_rate": 4.1023970292417935e-05, "loss": 0.3273, "step": 7308 }, { "epoch": 2.3467651308396213, "grad_norm": 1.007441759109497, "learning_rate": 4.099578408365191e-05, "loss": 0.39, "step": 7309 }, { "epoch": 2.3470862096644725, "grad_norm": 0.7695625424385071, "learning_rate": 4.096760506437057e-05, "loss": 0.3232, "step": 7310 }, { "epoch": 2.347407288489324, "grad_norm": 0.8058918714523315, "learning_rate": 4.093943323800745e-05, "loss": 0.279, "step": 7311 }, { "epoch": 2.3477283673141756, "grad_norm": 1.1080282926559448, "learning_rate": 4.0911268607995325e-05, "loss": 0.4143, "step": 7312 }, { "epoch": 2.348049446139027, "grad_norm": 0.9627980589866638, "learning_rate": 4.08831111777658e-05, "loss": 0.3703, "step": 7313 }, { "epoch": 2.3483705249638787, "grad_norm": 0.8395978808403015, "learning_rate": 4.08549609507499e-05, "loss": 0.3836, "step": 7314 }, { "epoch": 2.34869160378873, "grad_norm": 0.8003190755844116, "learning_rate": 4.08268179303776e-05, "loss": 0.3474, "step": 7315 }, { "epoch": 2.3490126826135818, "grad_norm": 0.987792432308197, "learning_rate": 4.0798682120078044e-05, "loss": 0.3746, "step": 7316 }, { "epoch": 2.3493337614384333, "grad_norm": 0.8142846822738647, "learning_rate": 4.077055352327953e-05, "loss": 0.3273, "step": 7317 }, { "epoch": 2.349654840263285, "grad_norm": 0.8976936340332031, "learning_rate": 4.074243214340934e-05, "loss": 0.3165, "step": 7318 }, { "epoch": 2.349975919088136, "grad_norm": 0.8904353380203247, "learning_rate": 4.071431798389408e-05, "loss": 0.296, "step": 7319 }, { "epoch": 2.3502969979129875, "grad_norm": 1.0113767385482788, "learning_rate": 4.068621104815934e-05, "loss": 0.3628, "step": 7320 }, { "epoch": 2.350618076737839, "grad_norm": 0.5627405643463135, "learning_rate": 4.065811133962987e-05, "loss": 0.2582, "step": 7321 }, { "epoch": 2.3509391555626906, "grad_norm": 0.9106473326683044, "learning_rate": 4.063001886172952e-05, "loss": 0.3047, "step": 7322 }, { "epoch": 2.351260234387542, "grad_norm": 1.275215744972229, "learning_rate": 4.0601933617881294e-05, "loss": 0.3613, "step": 7323 }, { "epoch": 2.3515813132123937, "grad_norm": 0.7992632985115051, "learning_rate": 4.057385561150727e-05, "loss": 0.3085, "step": 7324 }, { "epoch": 2.3519023920372453, "grad_norm": 0.7819589972496033, "learning_rate": 4.05457848460287e-05, "loss": 0.3735, "step": 7325 }, { "epoch": 2.352223470862097, "grad_norm": 1.0428626537322998, "learning_rate": 4.0517721324865884e-05, "loss": 0.3611, "step": 7326 }, { "epoch": 2.3525445496869484, "grad_norm": 0.7494415640830994, "learning_rate": 4.048966505143831e-05, "loss": 0.3112, "step": 7327 }, { "epoch": 2.3528656285117995, "grad_norm": 0.7105958461761475, "learning_rate": 4.0461616029164526e-05, "loss": 0.3408, "step": 7328 }, { "epoch": 2.353186707336651, "grad_norm": 0.5049455165863037, "learning_rate": 4.0433574261462206e-05, "loss": 0.3125, "step": 7329 }, { "epoch": 2.3535077861615026, "grad_norm": 0.4792654514312744, "learning_rate": 4.040553975174823e-05, "loss": 0.6885, "step": 7330 }, { "epoch": 2.353828864986354, "grad_norm": 0.450774222612381, "learning_rate": 4.037751250343841e-05, "loss": 0.4495, "step": 7331 }, { "epoch": 2.3541499438112057, "grad_norm": 0.5843284130096436, "learning_rate": 4.0349492519947904e-05, "loss": 0.5532, "step": 7332 }, { "epoch": 2.3544710226360572, "grad_norm": 0.535630464553833, "learning_rate": 4.032147980469072e-05, "loss": 0.316, "step": 7333 }, { "epoch": 2.354792101460909, "grad_norm": 0.4793131649494171, "learning_rate": 4.0293474361080244e-05, "loss": 0.1362, "step": 7334 }, { "epoch": 2.3551131802857603, "grad_norm": 0.4620800316333771, "learning_rate": 4.026547619252883e-05, "loss": 0.1559, "step": 7335 }, { "epoch": 2.355434259110612, "grad_norm": 0.44343894720077515, "learning_rate": 4.023748530244789e-05, "loss": 0.162, "step": 7336 }, { "epoch": 2.355755337935463, "grad_norm": 0.5997874140739441, "learning_rate": 4.020950169424815e-05, "loss": 0.2213, "step": 7337 }, { "epoch": 2.3560764167603145, "grad_norm": 0.9611859917640686, "learning_rate": 4.018152537133919e-05, "loss": 0.5503, "step": 7338 }, { "epoch": 2.356397495585166, "grad_norm": 0.9722205996513367, "learning_rate": 4.015355633712996e-05, "loss": 0.4235, "step": 7339 }, { "epoch": 2.3567185744100176, "grad_norm": 0.6975258588790894, "learning_rate": 4.012559459502835e-05, "loss": 0.3188, "step": 7340 }, { "epoch": 2.357039653234869, "grad_norm": 0.7221542000770569, "learning_rate": 4.009764014844143e-05, "loss": 0.3202, "step": 7341 }, { "epoch": 2.3573607320597207, "grad_norm": 0.7738749980926514, "learning_rate": 4.006969300077534e-05, "loss": 0.2977, "step": 7342 }, { "epoch": 2.3576818108845723, "grad_norm": 0.7268139123916626, "learning_rate": 4.004175315543538e-05, "loss": 0.2917, "step": 7343 }, { "epoch": 2.358002889709424, "grad_norm": 0.9446465373039246, "learning_rate": 4.001382061582593e-05, "loss": 0.3598, "step": 7344 }, { "epoch": 2.3583239685342754, "grad_norm": 1.0309242010116577, "learning_rate": 3.9985895385350456e-05, "loss": 0.349, "step": 7345 }, { "epoch": 2.3586450473591265, "grad_norm": 0.9522349834442139, "learning_rate": 3.9957977467411615e-05, "loss": 0.3701, "step": 7346 }, { "epoch": 2.358966126183978, "grad_norm": 0.9686524271965027, "learning_rate": 3.9930066865411075e-05, "loss": 0.4144, "step": 7347 }, { "epoch": 2.3592872050088296, "grad_norm": 0.9719577431678772, "learning_rate": 3.990216358274969e-05, "loss": 0.3456, "step": 7348 }, { "epoch": 2.359608283833681, "grad_norm": 0.8357756733894348, "learning_rate": 3.987426762282733e-05, "loss": 0.3546, "step": 7349 }, { "epoch": 2.3599293626585327, "grad_norm": 0.7465749979019165, "learning_rate": 3.9846378989043156e-05, "loss": 0.3632, "step": 7350 }, { "epoch": 2.3602504414833843, "grad_norm": 1.1760458946228027, "learning_rate": 3.981849768479517e-05, "loss": 0.3777, "step": 7351 }, { "epoch": 2.360571520308236, "grad_norm": 0.82599937915802, "learning_rate": 3.979062371348075e-05, "loss": 0.3254, "step": 7352 }, { "epoch": 2.3608925991330874, "grad_norm": 0.9712439179420471, "learning_rate": 3.976275707849616e-05, "loss": 0.4092, "step": 7353 }, { "epoch": 2.361213677957939, "grad_norm": 1.3784904479980469, "learning_rate": 3.973489778323688e-05, "loss": 0.433, "step": 7354 }, { "epoch": 2.36153475678279, "grad_norm": 0.9245650172233582, "learning_rate": 3.9707045831097555e-05, "loss": 0.343, "step": 7355 }, { "epoch": 2.3618558356076416, "grad_norm": 0.994295060634613, "learning_rate": 3.967920122547175e-05, "loss": 0.3945, "step": 7356 }, { "epoch": 2.362176914432493, "grad_norm": 0.8298449516296387, "learning_rate": 3.9651363969752344e-05, "loss": 0.3194, "step": 7357 }, { "epoch": 2.3624979932573447, "grad_norm": 1.0529513359069824, "learning_rate": 3.962353406733117e-05, "loss": 0.3887, "step": 7358 }, { "epoch": 2.362819072082196, "grad_norm": 1.0410268306732178, "learning_rate": 3.9595711521599224e-05, "loss": 0.4083, "step": 7359 }, { "epoch": 2.3631401509070478, "grad_norm": 0.7109187245368958, "learning_rate": 3.956789633594661e-05, "loss": 0.2836, "step": 7360 }, { "epoch": 2.3634612297318993, "grad_norm": 1.154443383216858, "learning_rate": 3.954008851376252e-05, "loss": 0.3007, "step": 7361 }, { "epoch": 2.363782308556751, "grad_norm": 0.7189384698867798, "learning_rate": 3.9512288058435256e-05, "loss": 0.2745, "step": 7362 }, { "epoch": 2.3641033873816024, "grad_norm": 0.5539619326591492, "learning_rate": 3.948449497335219e-05, "loss": 0.2625, "step": 7363 }, { "epoch": 2.3644244662064535, "grad_norm": 0.5823628306388855, "learning_rate": 3.945670926189987e-05, "loss": 0.2553, "step": 7364 }, { "epoch": 2.364745545031305, "grad_norm": 0.793720006942749, "learning_rate": 3.942893092746387e-05, "loss": 0.315, "step": 7365 }, { "epoch": 2.3650666238561566, "grad_norm": 0.7939404845237732, "learning_rate": 3.940115997342891e-05, "loss": 0.3487, "step": 7366 }, { "epoch": 2.365387702681008, "grad_norm": 0.80466628074646, "learning_rate": 3.9373396403178786e-05, "loss": 0.3018, "step": 7367 }, { "epoch": 2.3657087815058597, "grad_norm": 1.0654207468032837, "learning_rate": 3.9345640220096417e-05, "loss": 0.4041, "step": 7368 }, { "epoch": 2.3660298603307113, "grad_norm": 1.0553652048110962, "learning_rate": 3.931789142756377e-05, "loss": 0.3632, "step": 7369 }, { "epoch": 2.366350939155563, "grad_norm": 0.7207555770874023, "learning_rate": 3.9290150028962044e-05, "loss": 0.3082, "step": 7370 }, { "epoch": 2.3666720179804144, "grad_norm": 1.4064568281173706, "learning_rate": 3.9262416027671356e-05, "loss": 0.4244, "step": 7371 }, { "epoch": 2.366993096805266, "grad_norm": 0.7677832841873169, "learning_rate": 3.9234689427071006e-05, "loss": 0.2716, "step": 7372 }, { "epoch": 2.367314175630117, "grad_norm": 0.6761558651924133, "learning_rate": 3.920697023053949e-05, "loss": 0.2968, "step": 7373 }, { "epoch": 2.3676352544549686, "grad_norm": 0.7641769051551819, "learning_rate": 3.917925844145418e-05, "loss": 0.3176, "step": 7374 }, { "epoch": 2.36795633327982, "grad_norm": 0.5635340213775635, "learning_rate": 3.915155406319181e-05, "loss": 0.2882, "step": 7375 }, { "epoch": 2.3682774121046717, "grad_norm": 0.689179539680481, "learning_rate": 3.9123857099127936e-05, "loss": 0.2785, "step": 7376 }, { "epoch": 2.3685984909295232, "grad_norm": 0.6026139259338379, "learning_rate": 3.9096167552637454e-05, "loss": 0.2739, "step": 7377 }, { "epoch": 2.368919569754375, "grad_norm": 0.6535028219223022, "learning_rate": 3.9068485427094205e-05, "loss": 0.2989, "step": 7378 }, { "epoch": 2.3692406485792263, "grad_norm": 1.0952959060668945, "learning_rate": 3.904081072587119e-05, "loss": 0.3505, "step": 7379 }, { "epoch": 2.369561727404078, "grad_norm": 0.41570642590522766, "learning_rate": 3.9013143452340475e-05, "loss": 0.3985, "step": 7380 }, { "epoch": 2.3698828062289294, "grad_norm": 0.5073022246360779, "learning_rate": 3.8985483609873244e-05, "loss": 0.4265, "step": 7381 }, { "epoch": 2.3702038850537805, "grad_norm": 0.5192015171051025, "learning_rate": 3.895783120183976e-05, "loss": 0.1128, "step": 7382 }, { "epoch": 2.370524963878632, "grad_norm": 0.46728768944740295, "learning_rate": 3.893018623160938e-05, "loss": 0.1821, "step": 7383 }, { "epoch": 2.3708460427034836, "grad_norm": 0.4381241202354431, "learning_rate": 3.890254870255055e-05, "loss": 0.1916, "step": 7384 }, { "epoch": 2.371167121528335, "grad_norm": 0.6580232977867126, "learning_rate": 3.887491861803085e-05, "loss": 0.3314, "step": 7385 }, { "epoch": 2.3714882003531867, "grad_norm": 1.0685169696807861, "learning_rate": 3.8847295981416896e-05, "loss": 0.5294, "step": 7386 }, { "epoch": 2.3718092791780383, "grad_norm": 1.036335825920105, "learning_rate": 3.88196807960744e-05, "loss": 0.4178, "step": 7387 }, { "epoch": 2.37213035800289, "grad_norm": 0.7811726331710815, "learning_rate": 3.879207306536829e-05, "loss": 0.3555, "step": 7388 }, { "epoch": 2.3724514368277414, "grad_norm": 0.7611583471298218, "learning_rate": 3.876447279266238e-05, "loss": 0.2854, "step": 7389 }, { "epoch": 2.372772515652593, "grad_norm": 0.8770664930343628, "learning_rate": 3.8736879981319695e-05, "loss": 0.398, "step": 7390 }, { "epoch": 2.373093594477444, "grad_norm": 1.128092885017395, "learning_rate": 3.8709294634702376e-05, "loss": 0.3486, "step": 7391 }, { "epoch": 2.3734146733022956, "grad_norm": 1.1329503059387207, "learning_rate": 3.868171675617155e-05, "loss": 0.2897, "step": 7392 }, { "epoch": 2.373735752127147, "grad_norm": 0.9214721322059631, "learning_rate": 3.8654146349087606e-05, "loss": 0.3414, "step": 7393 }, { "epoch": 2.3740568309519987, "grad_norm": 0.9622142910957336, "learning_rate": 3.862658341680977e-05, "loss": 0.2992, "step": 7394 }, { "epoch": 2.3743779097768503, "grad_norm": 0.94224613904953, "learning_rate": 3.859902796269663e-05, "loss": 0.2816, "step": 7395 }, { "epoch": 2.374698988601702, "grad_norm": 0.7693343758583069, "learning_rate": 3.857147999010567e-05, "loss": 0.2936, "step": 7396 }, { "epoch": 2.3750200674265534, "grad_norm": 0.846794068813324, "learning_rate": 3.854393950239355e-05, "loss": 0.2686, "step": 7397 }, { "epoch": 2.375341146251405, "grad_norm": 0.8755759000778198, "learning_rate": 3.851640650291603e-05, "loss": 0.3316, "step": 7398 }, { "epoch": 2.3756622250762565, "grad_norm": 0.867134153842926, "learning_rate": 3.8488880995027786e-05, "loss": 0.2939, "step": 7399 }, { "epoch": 2.3759833039011076, "grad_norm": 0.9087311029434204, "learning_rate": 3.846136298208285e-05, "loss": 0.4108, "step": 7400 }, { "epoch": 2.376304382725959, "grad_norm": 1.0739927291870117, "learning_rate": 3.843385246743417e-05, "loss": 0.5237, "step": 7401 }, { "epoch": 2.3766254615508107, "grad_norm": 0.8434580564498901, "learning_rate": 3.840634945443382e-05, "loss": 0.3213, "step": 7402 }, { "epoch": 2.376946540375662, "grad_norm": 0.887002170085907, "learning_rate": 3.837885394643296e-05, "loss": 0.3309, "step": 7403 }, { "epoch": 2.3772676192005138, "grad_norm": 0.9817523956298828, "learning_rate": 3.835136594678183e-05, "loss": 0.3857, "step": 7404 }, { "epoch": 2.3775886980253653, "grad_norm": 0.9014565944671631, "learning_rate": 3.832388545882976e-05, "loss": 0.3774, "step": 7405 }, { "epoch": 2.377909776850217, "grad_norm": 0.9643536806106567, "learning_rate": 3.829641248592515e-05, "loss": 0.3548, "step": 7406 }, { "epoch": 2.3782308556750684, "grad_norm": 0.7037240266799927, "learning_rate": 3.8268947031415514e-05, "loss": 0.2919, "step": 7407 }, { "epoch": 2.37855193449992, "grad_norm": 0.9688251614570618, "learning_rate": 3.824148909864744e-05, "loss": 0.3215, "step": 7408 }, { "epoch": 2.378873013324771, "grad_norm": 0.6717649698257446, "learning_rate": 3.821403869096658e-05, "loss": 0.3176, "step": 7409 }, { "epoch": 2.3791940921496226, "grad_norm": 0.7562946081161499, "learning_rate": 3.818659581171766e-05, "loss": 0.3263, "step": 7410 }, { "epoch": 2.379515170974474, "grad_norm": 0.9467628598213196, "learning_rate": 3.8159160464244606e-05, "loss": 0.3804, "step": 7411 }, { "epoch": 2.3798362497993257, "grad_norm": 0.824565052986145, "learning_rate": 3.81317326518902e-05, "loss": 0.3667, "step": 7412 }, { "epoch": 2.3801573286241773, "grad_norm": 0.9573568105697632, "learning_rate": 3.8104312377996564e-05, "loss": 0.4441, "step": 7413 }, { "epoch": 2.380478407449029, "grad_norm": 0.6948115229606628, "learning_rate": 3.807689964590466e-05, "loss": 0.2781, "step": 7414 }, { "epoch": 2.3807994862738804, "grad_norm": 0.6470552682876587, "learning_rate": 3.8049494458954725e-05, "loss": 0.3053, "step": 7415 }, { "epoch": 2.3811205650987315, "grad_norm": 0.6895164847373962, "learning_rate": 3.802209682048602e-05, "loss": 0.3258, "step": 7416 }, { "epoch": 2.3814416439235835, "grad_norm": 1.2019386291503906, "learning_rate": 3.799470673383674e-05, "loss": 0.3939, "step": 7417 }, { "epoch": 2.3817627227484346, "grad_norm": 0.884876012802124, "learning_rate": 3.796732420234443e-05, "loss": 0.3349, "step": 7418 }, { "epoch": 2.382083801573286, "grad_norm": 0.8389701247215271, "learning_rate": 3.793994922934544e-05, "loss": 0.3409, "step": 7419 }, { "epoch": 2.3824048803981377, "grad_norm": 0.9509716629981995, "learning_rate": 3.791258181817542e-05, "loss": 0.3532, "step": 7420 }, { "epoch": 2.3827259592229892, "grad_norm": 0.8582257628440857, "learning_rate": 3.788522197216897e-05, "loss": 0.3035, "step": 7421 }, { "epoch": 2.383047038047841, "grad_norm": 0.9152477979660034, "learning_rate": 3.785786969465981e-05, "loss": 0.3324, "step": 7422 }, { "epoch": 2.3833681168726923, "grad_norm": 0.8005543947219849, "learning_rate": 3.783052498898073e-05, "loss": 0.3398, "step": 7423 }, { "epoch": 2.383689195697544, "grad_norm": 0.5987676382064819, "learning_rate": 3.7803187858463605e-05, "loss": 0.287, "step": 7424 }, { "epoch": 2.384010274522395, "grad_norm": 0.4976361393928528, "learning_rate": 3.777585830643937e-05, "loss": 0.2578, "step": 7425 }, { "epoch": 2.384331353347247, "grad_norm": 0.7815603017807007, "learning_rate": 3.774853633623806e-05, "loss": 0.3316, "step": 7426 }, { "epoch": 2.384652432172098, "grad_norm": 0.36369022727012634, "learning_rate": 3.7721221951188765e-05, "loss": 0.2671, "step": 7427 }, { "epoch": 2.3849735109969497, "grad_norm": 0.5498353242874146, "learning_rate": 3.769391515461966e-05, "loss": 0.2742, "step": 7428 }, { "epoch": 2.385294589821801, "grad_norm": 0.7687121629714966, "learning_rate": 3.766661594985801e-05, "loss": 0.2926, "step": 7429 }, { "epoch": 2.3856156686466528, "grad_norm": 0.5641812086105347, "learning_rate": 3.7639324340230085e-05, "loss": 0.5458, "step": 7430 }, { "epoch": 2.3859367474715043, "grad_norm": 0.49390679597854614, "learning_rate": 3.7612040329061405e-05, "loss": 0.5351, "step": 7431 }, { "epoch": 2.386257826296356, "grad_norm": 0.48149552941322327, "learning_rate": 3.7584763919676294e-05, "loss": 0.2797, "step": 7432 }, { "epoch": 2.3865789051212074, "grad_norm": 0.45125648379325867, "learning_rate": 3.755749511539845e-05, "loss": 0.2214, "step": 7433 }, { "epoch": 2.3868999839460585, "grad_norm": 0.28079113364219666, "learning_rate": 3.753023391955037e-05, "loss": 0.0733, "step": 7434 }, { "epoch": 2.3872210627709105, "grad_norm": 0.6530325412750244, "learning_rate": 3.7502980335453774e-05, "loss": 0.3432, "step": 7435 }, { "epoch": 2.3875421415957616, "grad_norm": 0.9349000453948975, "learning_rate": 3.747573436642951e-05, "loss": 0.4311, "step": 7436 }, { "epoch": 2.387863220420613, "grad_norm": 0.8858532905578613, "learning_rate": 3.7448496015797295e-05, "loss": 0.4246, "step": 7437 }, { "epoch": 2.3881842992454647, "grad_norm": 1.0124034881591797, "learning_rate": 3.742126528687614e-05, "loss": 0.3771, "step": 7438 }, { "epoch": 2.3885053780703163, "grad_norm": 0.7207339406013489, "learning_rate": 3.739404218298398e-05, "loss": 0.2938, "step": 7439 }, { "epoch": 2.388826456895168, "grad_norm": 0.8159089684486389, "learning_rate": 3.736682670743787e-05, "loss": 0.3231, "step": 7440 }, { "epoch": 2.3891475357200194, "grad_norm": 0.8773292303085327, "learning_rate": 3.733961886355398e-05, "loss": 0.3656, "step": 7441 }, { "epoch": 2.389468614544871, "grad_norm": 0.7147423624992371, "learning_rate": 3.7312418654647406e-05, "loss": 0.311, "step": 7442 }, { "epoch": 2.389789693369722, "grad_norm": 0.8349779844284058, "learning_rate": 3.728522608403249e-05, "loss": 0.3847, "step": 7443 }, { "epoch": 2.3901107721945736, "grad_norm": 0.7083280682563782, "learning_rate": 3.725804115502254e-05, "loss": 0.265, "step": 7444 }, { "epoch": 2.390431851019425, "grad_norm": 0.8586387634277344, "learning_rate": 3.7230863870929964e-05, "loss": 0.3667, "step": 7445 }, { "epoch": 2.3907529298442767, "grad_norm": 0.7868812680244446, "learning_rate": 3.720369423506622e-05, "loss": 0.335, "step": 7446 }, { "epoch": 2.3910740086691282, "grad_norm": 0.919206976890564, "learning_rate": 3.717653225074186e-05, "loss": 0.4075, "step": 7447 }, { "epoch": 2.3913950874939798, "grad_norm": 0.9069111347198486, "learning_rate": 3.714937792126647e-05, "loss": 0.3928, "step": 7448 }, { "epoch": 2.3917161663188313, "grad_norm": 0.7450005412101746, "learning_rate": 3.712223124994875e-05, "loss": 0.3179, "step": 7449 }, { "epoch": 2.392037245143683, "grad_norm": 0.90874844789505, "learning_rate": 3.709509224009641e-05, "loss": 0.3934, "step": 7450 }, { "epoch": 2.3923583239685344, "grad_norm": 0.797484815120697, "learning_rate": 3.7067960895016275e-05, "loss": 0.3171, "step": 7451 }, { "epoch": 2.3926794027933855, "grad_norm": 1.188254475593567, "learning_rate": 3.704083721801421e-05, "loss": 0.4681, "step": 7452 }, { "epoch": 2.393000481618237, "grad_norm": 0.8540223836898804, "learning_rate": 3.701372121239512e-05, "loss": 0.2897, "step": 7453 }, { "epoch": 2.3933215604430886, "grad_norm": 0.6741155385971069, "learning_rate": 3.698661288146311e-05, "loss": 0.265, "step": 7454 }, { "epoch": 2.39364263926794, "grad_norm": 0.8153589367866516, "learning_rate": 3.695951222852112e-05, "loss": 0.3477, "step": 7455 }, { "epoch": 2.3939637180927917, "grad_norm": 0.7138598561286926, "learning_rate": 3.6932419256871406e-05, "loss": 0.3001, "step": 7456 }, { "epoch": 2.3942847969176433, "grad_norm": 1.1851181983947754, "learning_rate": 3.690533396981504e-05, "loss": 0.4506, "step": 7457 }, { "epoch": 2.394605875742495, "grad_norm": 0.9050045013427734, "learning_rate": 3.687825637065236e-05, "loss": 0.3637, "step": 7458 }, { "epoch": 2.3949269545673464, "grad_norm": 0.7797844409942627, "learning_rate": 3.685118646268272e-05, "loss": 0.336, "step": 7459 }, { "epoch": 2.395248033392198, "grad_norm": 1.0224764347076416, "learning_rate": 3.682412424920438e-05, "loss": 0.3581, "step": 7460 }, { "epoch": 2.395569112217049, "grad_norm": 0.8182712197303772, "learning_rate": 3.679706973351491e-05, "loss": 0.307, "step": 7461 }, { "epoch": 2.3958901910419006, "grad_norm": 0.8494866490364075, "learning_rate": 3.677002291891078e-05, "loss": 0.3239, "step": 7462 }, { "epoch": 2.396211269866752, "grad_norm": 0.9537984728813171, "learning_rate": 3.674298380868756e-05, "loss": 0.3285, "step": 7463 }, { "epoch": 2.3965323486916037, "grad_norm": 0.9284290075302124, "learning_rate": 3.6715952406139885e-05, "loss": 0.3925, "step": 7464 }, { "epoch": 2.3968534275164552, "grad_norm": 1.1413391828536987, "learning_rate": 3.668892871456144e-05, "loss": 0.3875, "step": 7465 }, { "epoch": 2.397174506341307, "grad_norm": 0.987825870513916, "learning_rate": 3.6661912737245e-05, "loss": 0.3362, "step": 7466 }, { "epoch": 2.3974955851661583, "grad_norm": 0.8978787660598755, "learning_rate": 3.6634904477482354e-05, "loss": 0.3855, "step": 7467 }, { "epoch": 2.39781666399101, "grad_norm": 0.7444339990615845, "learning_rate": 3.6607903938564405e-05, "loss": 0.2704, "step": 7468 }, { "epoch": 2.3981377428158615, "grad_norm": 1.1076825857162476, "learning_rate": 3.6580911123781056e-05, "loss": 0.4605, "step": 7469 }, { "epoch": 2.3984588216407126, "grad_norm": 0.8118841052055359, "learning_rate": 3.6553926036421326e-05, "loss": 0.2976, "step": 7470 }, { "epoch": 2.398779900465564, "grad_norm": 0.8877241611480713, "learning_rate": 3.6526948679773257e-05, "loss": 0.3332, "step": 7471 }, { "epoch": 2.3991009792904157, "grad_norm": 0.7601701617240906, "learning_rate": 3.649997905712396e-05, "loss": 0.3221, "step": 7472 }, { "epoch": 2.399422058115267, "grad_norm": 1.4962290525436401, "learning_rate": 3.647301717175956e-05, "loss": 0.4457, "step": 7473 }, { "epoch": 2.3997431369401188, "grad_norm": 0.8516422510147095, "learning_rate": 3.6446063026965384e-05, "loss": 0.3285, "step": 7474 }, { "epoch": 2.4000642157649703, "grad_norm": 0.5712471008300781, "learning_rate": 3.641911662602559e-05, "loss": 0.2645, "step": 7475 }, { "epoch": 2.400385294589822, "grad_norm": 0.7448149919509888, "learning_rate": 3.6392177972223594e-05, "loss": 0.3071, "step": 7476 }, { "epoch": 2.4007063734146734, "grad_norm": 0.5864403247833252, "learning_rate": 3.6365247068841814e-05, "loss": 0.2586, "step": 7477 }, { "epoch": 2.401027452239525, "grad_norm": 0.6279107928276062, "learning_rate": 3.633832391916159e-05, "loss": 0.2896, "step": 7478 }, { "epoch": 2.401348531064376, "grad_norm": 0.7097012996673584, "learning_rate": 3.631140852646355e-05, "loss": 0.3143, "step": 7479 }, { "epoch": 2.4016696098892276, "grad_norm": 0.42605191469192505, "learning_rate": 3.628450089402713e-05, "loss": 0.4756, "step": 7480 }, { "epoch": 2.401990688714079, "grad_norm": 0.4604340195655823, "learning_rate": 3.6257601025131026e-05, "loss": 0.7031, "step": 7481 }, { "epoch": 2.4023117675389307, "grad_norm": 0.3688209652900696, "learning_rate": 3.62307089230529e-05, "loss": 0.247, "step": 7482 }, { "epoch": 2.4026328463637823, "grad_norm": 0.48397693037986755, "learning_rate": 3.6203824591069456e-05, "loss": 0.2488, "step": 7483 }, { "epoch": 2.402953925188634, "grad_norm": 0.6211444735527039, "learning_rate": 3.6176948032456473e-05, "loss": 0.3253, "step": 7484 }, { "epoch": 2.4032750040134854, "grad_norm": 0.5152127146720886, "learning_rate": 3.615007925048878e-05, "loss": 0.209, "step": 7485 }, { "epoch": 2.403596082838337, "grad_norm": 0.44418853521347046, "learning_rate": 3.612321824844024e-05, "loss": 0.0993, "step": 7486 }, { "epoch": 2.4039171616631885, "grad_norm": 0.5577342510223389, "learning_rate": 3.60963650295838e-05, "loss": 0.2749, "step": 7487 }, { "epoch": 2.4042382404880396, "grad_norm": 0.7818149328231812, "learning_rate": 3.606951959719145e-05, "loss": 0.4772, "step": 7488 }, { "epoch": 2.404559319312891, "grad_norm": 0.7911422252655029, "learning_rate": 3.604268195453421e-05, "loss": 0.3716, "step": 7489 }, { "epoch": 2.4048803981377427, "grad_norm": 0.8051799535751343, "learning_rate": 3.601585210488218e-05, "loss": 0.4383, "step": 7490 }, { "epoch": 2.4052014769625942, "grad_norm": 0.8039785623550415, "learning_rate": 3.5989030051504434e-05, "loss": 0.3691, "step": 7491 }, { "epoch": 2.405522555787446, "grad_norm": 0.6632509827613831, "learning_rate": 3.59622157976693e-05, "loss": 0.2926, "step": 7492 }, { "epoch": 2.4058436346122973, "grad_norm": 0.8523510694503784, "learning_rate": 3.5935409346643835e-05, "loss": 0.3532, "step": 7493 }, { "epoch": 2.406164713437149, "grad_norm": 0.9791433811187744, "learning_rate": 3.590861070169449e-05, "loss": 0.3427, "step": 7494 }, { "epoch": 2.4064857922620004, "grad_norm": 0.6575258374214172, "learning_rate": 3.5881819866086484e-05, "loss": 0.3172, "step": 7495 }, { "epoch": 2.406806871086852, "grad_norm": 0.833772599697113, "learning_rate": 3.585503684308421e-05, "loss": 0.414, "step": 7496 }, { "epoch": 2.407127949911703, "grad_norm": 0.9059905409812927, "learning_rate": 3.582826163595119e-05, "loss": 0.3809, "step": 7497 }, { "epoch": 2.4074490287365546, "grad_norm": 1.0018295049667358, "learning_rate": 3.580149424794976e-05, "loss": 0.4041, "step": 7498 }, { "epoch": 2.407770107561406, "grad_norm": 0.6466946005821228, "learning_rate": 3.577473468234156e-05, "loss": 0.2834, "step": 7499 }, { "epoch": 2.4080911863862577, "grad_norm": 0.7386956214904785, "learning_rate": 3.574798294238713e-05, "loss": 0.3142, "step": 7500 }, { "epoch": 2.4084122652111093, "grad_norm": 0.9483809471130371, "learning_rate": 3.5721239031346066e-05, "loss": 0.3326, "step": 7501 }, { "epoch": 2.408733344035961, "grad_norm": 0.9816690683364868, "learning_rate": 3.569450295247706e-05, "loss": 0.3653, "step": 7502 }, { "epoch": 2.4090544228608124, "grad_norm": 0.8358184099197388, "learning_rate": 3.56677747090378e-05, "loss": 0.3264, "step": 7503 }, { "epoch": 2.409375501685664, "grad_norm": 0.8334686160087585, "learning_rate": 3.564105430428506e-05, "loss": 0.3277, "step": 7504 }, { "epoch": 2.4096965805105155, "grad_norm": 0.8004488945007324, "learning_rate": 3.561434174147463e-05, "loss": 0.2976, "step": 7505 }, { "epoch": 2.4100176593353666, "grad_norm": 0.6906147599220276, "learning_rate": 3.558763702386135e-05, "loss": 0.2825, "step": 7506 }, { "epoch": 2.410338738160218, "grad_norm": 0.8044391870498657, "learning_rate": 3.556094015469913e-05, "loss": 0.3592, "step": 7507 }, { "epoch": 2.4106598169850697, "grad_norm": 0.8112503290176392, "learning_rate": 3.553425113724088e-05, "loss": 0.36, "step": 7508 }, { "epoch": 2.4109808958099213, "grad_norm": 0.9423220753669739, "learning_rate": 3.5507569974738574e-05, "loss": 0.3293, "step": 7509 }, { "epoch": 2.411301974634773, "grad_norm": 0.973623514175415, "learning_rate": 3.548089667044325e-05, "loss": 0.4033, "step": 7510 }, { "epoch": 2.4116230534596244, "grad_norm": 0.937961995601654, "learning_rate": 3.545423122760493e-05, "loss": 0.3519, "step": 7511 }, { "epoch": 2.411944132284476, "grad_norm": 0.8615179061889648, "learning_rate": 3.542757364947281e-05, "loss": 0.4388, "step": 7512 }, { "epoch": 2.4122652111093275, "grad_norm": 0.753441572189331, "learning_rate": 3.540092393929494e-05, "loss": 0.3198, "step": 7513 }, { "epoch": 2.412586289934179, "grad_norm": 0.9324014186859131, "learning_rate": 3.537428210031849e-05, "loss": 0.3705, "step": 7514 }, { "epoch": 2.41290736875903, "grad_norm": 0.7881529927253723, "learning_rate": 3.534764813578982e-05, "loss": 0.2409, "step": 7515 }, { "epoch": 2.4132284475838817, "grad_norm": 0.6942733526229858, "learning_rate": 3.5321022048954035e-05, "loss": 0.2954, "step": 7516 }, { "epoch": 2.413549526408733, "grad_norm": 0.7293675541877747, "learning_rate": 3.52944038430556e-05, "loss": 0.3183, "step": 7517 }, { "epoch": 2.4138706052335848, "grad_norm": 1.0365976095199585, "learning_rate": 3.52677935213377e-05, "loss": 0.3541, "step": 7518 }, { "epoch": 2.4141916840584363, "grad_norm": 0.6401132345199585, "learning_rate": 3.524119108704286e-05, "loss": 0.2692, "step": 7519 }, { "epoch": 2.414512762883288, "grad_norm": 0.7052464485168457, "learning_rate": 3.521459654341244e-05, "loss": 0.3227, "step": 7520 }, { "epoch": 2.4148338417081394, "grad_norm": 0.4697404205799103, "learning_rate": 3.518800989368691e-05, "loss": 0.2497, "step": 7521 }, { "epoch": 2.415154920532991, "grad_norm": 0.8850452303886414, "learning_rate": 3.516143114110582e-05, "loss": 0.3252, "step": 7522 }, { "epoch": 2.4154759993578425, "grad_norm": 0.8042798638343811, "learning_rate": 3.51348602889076e-05, "loss": 0.3285, "step": 7523 }, { "epoch": 2.4157970781826936, "grad_norm": 0.9007611274719238, "learning_rate": 3.510829734032993e-05, "loss": 0.3187, "step": 7524 }, { "epoch": 2.416118157007545, "grad_norm": 1.6435385942459106, "learning_rate": 3.50817422986094e-05, "loss": 0.2828, "step": 7525 }, { "epoch": 2.4164392358323967, "grad_norm": 0.31267601251602173, "learning_rate": 3.5055195166981645e-05, "loss": 0.2312, "step": 7526 }, { "epoch": 2.4167603146572483, "grad_norm": 0.6988046169281006, "learning_rate": 3.5028655948681355e-05, "loss": 0.314, "step": 7527 }, { "epoch": 2.4170813934821, "grad_norm": 0.6505711078643799, "learning_rate": 3.500212464694227e-05, "loss": 0.289, "step": 7528 }, { "epoch": 2.4174024723069514, "grad_norm": 0.6511385440826416, "learning_rate": 3.497560126499709e-05, "loss": 0.298, "step": 7529 }, { "epoch": 2.417723551131803, "grad_norm": 0.5870752930641174, "learning_rate": 3.494908580607774e-05, "loss": 0.7781, "step": 7530 }, { "epoch": 2.4180446299566545, "grad_norm": 0.5827964544296265, "learning_rate": 3.492257827341492e-05, "loss": 0.5468, "step": 7531 }, { "epoch": 2.418365708781506, "grad_norm": 0.3877847194671631, "learning_rate": 3.489607867023854e-05, "loss": 0.2175, "step": 7532 }, { "epoch": 2.418686787606357, "grad_norm": 0.42042702436447144, "learning_rate": 3.4869586999777495e-05, "loss": 0.1292, "step": 7533 }, { "epoch": 2.4190078664312087, "grad_norm": 0.42274191975593567, "learning_rate": 3.484310326525967e-05, "loss": 0.0746, "step": 7534 }, { "epoch": 2.4193289452560602, "grad_norm": 0.24631276726722717, "learning_rate": 3.481662746991214e-05, "loss": 0.0749, "step": 7535 }, { "epoch": 2.419650024080912, "grad_norm": 0.48242461681365967, "learning_rate": 3.479015961696077e-05, "loss": 0.1554, "step": 7536 }, { "epoch": 2.4199711029057633, "grad_norm": 0.8864598870277405, "learning_rate": 3.4763699709630716e-05, "loss": 0.4802, "step": 7537 }, { "epoch": 2.420292181730615, "grad_norm": 1.016950249671936, "learning_rate": 3.4737247751145896e-05, "loss": 0.5052, "step": 7538 }, { "epoch": 2.4206132605554664, "grad_norm": 0.9051849842071533, "learning_rate": 3.4710803744729515e-05, "loss": 0.381, "step": 7539 }, { "epoch": 2.420934339380318, "grad_norm": 0.9121940732002258, "learning_rate": 3.468436769360368e-05, "loss": 0.3998, "step": 7540 }, { "epoch": 2.4212554182051695, "grad_norm": 0.698441743850708, "learning_rate": 3.465793960098945e-05, "loss": 0.2916, "step": 7541 }, { "epoch": 2.4215764970300206, "grad_norm": 0.7434770464897156, "learning_rate": 3.463151947010712e-05, "loss": 0.3105, "step": 7542 }, { "epoch": 2.421897575854872, "grad_norm": 0.6803128123283386, "learning_rate": 3.460510730417585e-05, "loss": 0.332, "step": 7543 }, { "epoch": 2.4222186546797237, "grad_norm": 0.7351831197738647, "learning_rate": 3.4578703106413904e-05, "loss": 0.2964, "step": 7544 }, { "epoch": 2.4225397335045753, "grad_norm": 0.8372403383255005, "learning_rate": 3.455230688003852e-05, "loss": 0.3135, "step": 7545 }, { "epoch": 2.422860812329427, "grad_norm": 0.7963913679122925, "learning_rate": 3.452591862826603e-05, "loss": 0.3311, "step": 7546 }, { "epoch": 2.4231818911542784, "grad_norm": 0.9390442967414856, "learning_rate": 3.4499538354311755e-05, "loss": 0.4368, "step": 7547 }, { "epoch": 2.42350296997913, "grad_norm": 0.8431958556175232, "learning_rate": 3.447316606139004e-05, "loss": 0.3649, "step": 7548 }, { "epoch": 2.4238240488039815, "grad_norm": 1.0568405389785767, "learning_rate": 3.444680175271428e-05, "loss": 0.369, "step": 7549 }, { "epoch": 2.424145127628833, "grad_norm": 0.8188615441322327, "learning_rate": 3.442044543149688e-05, "loss": 0.348, "step": 7550 }, { "epoch": 2.424466206453684, "grad_norm": 1.063718557357788, "learning_rate": 3.439409710094929e-05, "loss": 0.4778, "step": 7551 }, { "epoch": 2.4247872852785357, "grad_norm": 0.9398375153541565, "learning_rate": 3.4367756764281955e-05, "loss": 0.3605, "step": 7552 }, { "epoch": 2.4251083641033873, "grad_norm": 0.9994428157806396, "learning_rate": 3.4341424424704375e-05, "loss": 0.3958, "step": 7553 }, { "epoch": 2.425429442928239, "grad_norm": 0.8988476395606995, "learning_rate": 3.4315100085425034e-05, "loss": 0.3561, "step": 7554 }, { "epoch": 2.4257505217530904, "grad_norm": 1.0344244241714478, "learning_rate": 3.4288783749651564e-05, "loss": 0.402, "step": 7555 }, { "epoch": 2.426071600577942, "grad_norm": 1.1983534097671509, "learning_rate": 3.426247542059041e-05, "loss": 0.403, "step": 7556 }, { "epoch": 2.4263926794027935, "grad_norm": 0.7842648029327393, "learning_rate": 3.423617510144727e-05, "loss": 0.3219, "step": 7557 }, { "epoch": 2.426713758227645, "grad_norm": 0.8381021022796631, "learning_rate": 3.4209882795426716e-05, "loss": 0.3593, "step": 7558 }, { "epoch": 2.4270348370524966, "grad_norm": 1.0330874919891357, "learning_rate": 3.418359850573234e-05, "loss": 0.3792, "step": 7559 }, { "epoch": 2.4273559158773477, "grad_norm": 0.9594546556472778, "learning_rate": 3.415732223556689e-05, "loss": 0.3862, "step": 7560 }, { "epoch": 2.427676994702199, "grad_norm": 0.9516655206680298, "learning_rate": 3.413105398813195e-05, "loss": 0.297, "step": 7561 }, { "epoch": 2.4279980735270508, "grad_norm": 0.8917472958564758, "learning_rate": 3.4104793766628304e-05, "loss": 0.2836, "step": 7562 }, { "epoch": 2.4283191523519023, "grad_norm": 0.5682129263877869, "learning_rate": 3.4078541574255664e-05, "loss": 0.2329, "step": 7563 }, { "epoch": 2.428640231176754, "grad_norm": 0.8117547631263733, "learning_rate": 3.4052297414212777e-05, "loss": 0.3157, "step": 7564 }, { "epoch": 2.4289613100016054, "grad_norm": 0.8948675394058228, "learning_rate": 3.40260612896974e-05, "loss": 0.2925, "step": 7565 }, { "epoch": 2.429282388826457, "grad_norm": 0.8941418528556824, "learning_rate": 3.3999833203906326e-05, "loss": 0.2747, "step": 7566 }, { "epoch": 2.4296034676513085, "grad_norm": 0.6833550333976746, "learning_rate": 3.397361316003539e-05, "loss": 0.2783, "step": 7567 }, { "epoch": 2.42992454647616, "grad_norm": 0.571975588798523, "learning_rate": 3.394740116127941e-05, "loss": 0.2373, "step": 7568 }, { "epoch": 2.430245625301011, "grad_norm": 0.6630203723907471, "learning_rate": 3.3921197210832235e-05, "loss": 0.2824, "step": 7569 }, { "epoch": 2.4305667041258627, "grad_norm": 0.7997381091117859, "learning_rate": 3.389500131188674e-05, "loss": 0.3572, "step": 7570 }, { "epoch": 2.4308877829507143, "grad_norm": 0.9815787076950073, "learning_rate": 3.386881346763483e-05, "loss": 0.4214, "step": 7571 }, { "epoch": 2.431208861775566, "grad_norm": 0.8200189471244812, "learning_rate": 3.3842633681267356e-05, "loss": 0.3118, "step": 7572 }, { "epoch": 2.4315299406004174, "grad_norm": 0.8650208115577698, "learning_rate": 3.3816461955974365e-05, "loss": 0.3753, "step": 7573 }, { "epoch": 2.431851019425269, "grad_norm": 0.7072727084159851, "learning_rate": 3.379029829494469e-05, "loss": 0.2879, "step": 7574 }, { "epoch": 2.4321720982501205, "grad_norm": 0.8465923070907593, "learning_rate": 3.376414270136633e-05, "loss": 0.3194, "step": 7575 }, { "epoch": 2.432493177074972, "grad_norm": 0.7302094101905823, "learning_rate": 3.373799517842627e-05, "loss": 0.307, "step": 7576 }, { "epoch": 2.4328142558998236, "grad_norm": 0.8451035022735596, "learning_rate": 3.371185572931048e-05, "loss": 0.3004, "step": 7577 }, { "epoch": 2.4331353347246747, "grad_norm": 0.870238184928894, "learning_rate": 3.3685724357204054e-05, "loss": 0.2926, "step": 7578 }, { "epoch": 2.4334564135495262, "grad_norm": 0.525023877620697, "learning_rate": 3.3659601065290893e-05, "loss": 0.282, "step": 7579 }, { "epoch": 2.433777492374378, "grad_norm": 0.5372437834739685, "learning_rate": 3.363348585675414e-05, "loss": 0.5397, "step": 7580 }, { "epoch": 2.4340985711992293, "grad_norm": 0.5941777229309082, "learning_rate": 3.360737873477584e-05, "loss": 0.6689, "step": 7581 }, { "epoch": 2.434419650024081, "grad_norm": 0.35295045375823975, "learning_rate": 3.358127970253704e-05, "loss": 0.2129, "step": 7582 }, { "epoch": 2.4347407288489324, "grad_norm": 0.43210405111312866, "learning_rate": 3.355518876321787e-05, "loss": 0.1644, "step": 7583 }, { "epoch": 2.435061807673784, "grad_norm": 0.26534727215766907, "learning_rate": 3.352910591999734e-05, "loss": 0.076, "step": 7584 }, { "epoch": 2.4353828864986355, "grad_norm": 0.1900699883699417, "learning_rate": 3.3503031176053656e-05, "loss": 0.072, "step": 7585 }, { "epoch": 2.435703965323487, "grad_norm": 0.45870304107666016, "learning_rate": 3.347696453456393e-05, "loss": 0.1865, "step": 7586 }, { "epoch": 2.436025044148338, "grad_norm": 0.7371842861175537, "learning_rate": 3.3450905998704275e-05, "loss": 0.341, "step": 7587 }, { "epoch": 2.4363461229731898, "grad_norm": 0.7692381143569946, "learning_rate": 3.342485557164986e-05, "loss": 0.4095, "step": 7588 }, { "epoch": 2.4366672017980413, "grad_norm": 0.7908117175102234, "learning_rate": 3.339881325657484e-05, "loss": 0.3606, "step": 7589 }, { "epoch": 2.436988280622893, "grad_norm": 0.7690086364746094, "learning_rate": 3.3372779056652426e-05, "loss": 0.3458, "step": 7590 }, { "epoch": 2.4373093594477444, "grad_norm": 0.8492259383201599, "learning_rate": 3.334675297505476e-05, "loss": 0.3107, "step": 7591 }, { "epoch": 2.437630438272596, "grad_norm": 0.6811227202415466, "learning_rate": 3.3320735014953076e-05, "loss": 0.2832, "step": 7592 }, { "epoch": 2.4379515170974475, "grad_norm": 0.8364322781562805, "learning_rate": 3.3294725179517574e-05, "loss": 0.322, "step": 7593 }, { "epoch": 2.438272595922299, "grad_norm": 0.9228085279464722, "learning_rate": 3.326872347191746e-05, "loss": 0.4433, "step": 7594 }, { "epoch": 2.4385936747471506, "grad_norm": 0.5413333773612976, "learning_rate": 3.3242729895320946e-05, "loss": 0.2079, "step": 7595 }, { "epoch": 2.4389147535720017, "grad_norm": 0.7873682975769043, "learning_rate": 3.3216744452895354e-05, "loss": 0.3379, "step": 7596 }, { "epoch": 2.4392358323968533, "grad_norm": 0.885543167591095, "learning_rate": 3.319076714780682e-05, "loss": 0.3296, "step": 7597 }, { "epoch": 2.439556911221705, "grad_norm": 0.9024078845977783, "learning_rate": 3.316479798322072e-05, "loss": 0.3597, "step": 7598 }, { "epoch": 2.4398779900465564, "grad_norm": 0.9780003428459167, "learning_rate": 3.313883696230119e-05, "loss": 0.4125, "step": 7599 }, { "epoch": 2.440199068871408, "grad_norm": 2.2751457691192627, "learning_rate": 3.311288408821159e-05, "loss": 0.402, "step": 7600 }, { "epoch": 2.4405201476962595, "grad_norm": 1.0487151145935059, "learning_rate": 3.308693936411421e-05, "loss": 0.4024, "step": 7601 }, { "epoch": 2.440841226521111, "grad_norm": 1.0639290809631348, "learning_rate": 3.306100279317024e-05, "loss": 0.4246, "step": 7602 }, { "epoch": 2.4411623053459626, "grad_norm": 0.9267032146453857, "learning_rate": 3.303507437854009e-05, "loss": 0.3029, "step": 7603 }, { "epoch": 2.441483384170814, "grad_norm": 0.8686235547065735, "learning_rate": 3.3009154123382936e-05, "loss": 0.3351, "step": 7604 }, { "epoch": 2.4418044629956652, "grad_norm": 0.9154815077781677, "learning_rate": 3.2983242030857174e-05, "loss": 0.2886, "step": 7605 }, { "epoch": 2.4421255418205168, "grad_norm": 0.6667101383209229, "learning_rate": 3.2957338104120096e-05, "loss": 0.312, "step": 7606 }, { "epoch": 2.4424466206453683, "grad_norm": 0.8355054259300232, "learning_rate": 3.2931442346328004e-05, "loss": 0.384, "step": 7607 }, { "epoch": 2.44276769947022, "grad_norm": 0.8191713094711304, "learning_rate": 3.290555476063622e-05, "loss": 0.3421, "step": 7608 }, { "epoch": 2.4430887782950714, "grad_norm": 0.9118049740791321, "learning_rate": 3.287967535019908e-05, "loss": 0.3099, "step": 7609 }, { "epoch": 2.443409857119923, "grad_norm": 1.0478988885879517, "learning_rate": 3.285380411816988e-05, "loss": 0.4101, "step": 7610 }, { "epoch": 2.4437309359447745, "grad_norm": 1.4113534688949585, "learning_rate": 3.2827941067700996e-05, "loss": 0.3687, "step": 7611 }, { "epoch": 2.444052014769626, "grad_norm": 0.7927390336990356, "learning_rate": 3.2802086201943724e-05, "loss": 0.2911, "step": 7612 }, { "epoch": 2.4443730935944776, "grad_norm": 0.8057661056518555, "learning_rate": 3.277623952404842e-05, "loss": 0.3085, "step": 7613 }, { "epoch": 2.4446941724193287, "grad_norm": 0.8335930705070496, "learning_rate": 3.275040103716441e-05, "loss": 0.3725, "step": 7614 }, { "epoch": 2.4450152512441803, "grad_norm": 0.7692309617996216, "learning_rate": 3.272457074444003e-05, "loss": 0.3191, "step": 7615 }, { "epoch": 2.445336330069032, "grad_norm": 0.9450210928916931, "learning_rate": 3.269874864902269e-05, "loss": 0.3662, "step": 7616 }, { "epoch": 2.4456574088938834, "grad_norm": 0.6600220203399658, "learning_rate": 3.2672934754058616e-05, "loss": 0.3059, "step": 7617 }, { "epoch": 2.445978487718735, "grad_norm": 0.8459724187850952, "learning_rate": 3.264712906269328e-05, "loss": 0.3184, "step": 7618 }, { "epoch": 2.4462995665435865, "grad_norm": 1.1846095323562622, "learning_rate": 3.2621331578070934e-05, "loss": 0.2196, "step": 7619 }, { "epoch": 2.446620645368438, "grad_norm": 0.5867680311203003, "learning_rate": 3.2595542303334924e-05, "loss": 0.2611, "step": 7620 }, { "epoch": 2.4469417241932896, "grad_norm": 0.8343913555145264, "learning_rate": 3.2569761241627696e-05, "loss": 0.3502, "step": 7621 }, { "epoch": 2.447262803018141, "grad_norm": 0.803591251373291, "learning_rate": 3.254398839609044e-05, "loss": 0.2839, "step": 7622 }, { "epoch": 2.4475838818429922, "grad_norm": 0.7186933755874634, "learning_rate": 3.251822376986363e-05, "loss": 0.3149, "step": 7623 }, { "epoch": 2.447904960667844, "grad_norm": 0.8497990369796753, "learning_rate": 3.249246736608655e-05, "loss": 0.3579, "step": 7624 }, { "epoch": 2.4482260394926953, "grad_norm": 0.5227159857749939, "learning_rate": 3.246671918789755e-05, "loss": 0.2571, "step": 7625 }, { "epoch": 2.448547118317547, "grad_norm": 0.8302345275878906, "learning_rate": 3.244097923843398e-05, "loss": 0.3503, "step": 7626 }, { "epoch": 2.4488681971423985, "grad_norm": 0.5355742573738098, "learning_rate": 3.2415247520832146e-05, "loss": 0.2695, "step": 7627 }, { "epoch": 2.44918927596725, "grad_norm": 0.7447862029075623, "learning_rate": 3.23895240382274e-05, "loss": 0.2746, "step": 7628 }, { "epoch": 2.4495103547921016, "grad_norm": 0.46801793575286865, "learning_rate": 3.236380879375408e-05, "loss": 0.3045, "step": 7629 }, { "epoch": 2.449831433616953, "grad_norm": 0.5388785004615784, "learning_rate": 3.233810179054548e-05, "loss": 0.9426, "step": 7630 }, { "epoch": 2.4501525124418047, "grad_norm": 0.4183870553970337, "learning_rate": 3.231240303173394e-05, "loss": 0.399, "step": 7631 }, { "epoch": 2.4504735912666558, "grad_norm": 0.9477866888046265, "learning_rate": 3.2286712520450765e-05, "loss": 0.2928, "step": 7632 }, { "epoch": 2.4507946700915073, "grad_norm": 0.34258604049682617, "learning_rate": 3.226103025982628e-05, "loss": 0.0871, "step": 7633 }, { "epoch": 2.451115748916359, "grad_norm": 0.6076464056968689, "learning_rate": 3.223535625298979e-05, "loss": 0.2203, "step": 7634 }, { "epoch": 2.4514368277412104, "grad_norm": 0.3136209547519684, "learning_rate": 3.220969050306955e-05, "loss": 0.1264, "step": 7635 }, { "epoch": 2.451757906566062, "grad_norm": 0.8506674766540527, "learning_rate": 3.218403301319296e-05, "loss": 0.4329, "step": 7636 }, { "epoch": 2.4520789853909135, "grad_norm": 0.8831260800361633, "learning_rate": 3.21583837864862e-05, "loss": 0.4926, "step": 7637 }, { "epoch": 2.452400064215765, "grad_norm": 0.8367322683334351, "learning_rate": 3.213274282607457e-05, "loss": 0.3901, "step": 7638 }, { "epoch": 2.4527211430406166, "grad_norm": 0.824145495891571, "learning_rate": 3.210711013508242e-05, "loss": 0.3007, "step": 7639 }, { "epoch": 2.453042221865468, "grad_norm": 0.8876867890357971, "learning_rate": 3.208148571663289e-05, "loss": 0.3398, "step": 7640 }, { "epoch": 2.4533633006903193, "grad_norm": 0.6797581315040588, "learning_rate": 3.205586957384838e-05, "loss": 0.2937, "step": 7641 }, { "epoch": 2.453684379515171, "grad_norm": 0.7675800919532776, "learning_rate": 3.2030261709849996e-05, "loss": 0.3564, "step": 7642 }, { "epoch": 2.4540054583400224, "grad_norm": 0.7745330929756165, "learning_rate": 3.200466212775808e-05, "loss": 0.3317, "step": 7643 }, { "epoch": 2.454326537164874, "grad_norm": 0.7452265024185181, "learning_rate": 3.197907083069184e-05, "loss": 0.3462, "step": 7644 }, { "epoch": 2.4546476159897255, "grad_norm": 0.8958961963653564, "learning_rate": 3.195348782176948e-05, "loss": 0.3256, "step": 7645 }, { "epoch": 2.454968694814577, "grad_norm": 0.9540597200393677, "learning_rate": 3.192791310410822e-05, "loss": 0.377, "step": 7646 }, { "epoch": 2.4552897736394286, "grad_norm": 1.0042481422424316, "learning_rate": 3.190234668082427e-05, "loss": 0.3533, "step": 7647 }, { "epoch": 2.45561085246428, "grad_norm": 1.0041635036468506, "learning_rate": 3.187678855503282e-05, "loss": 0.3521, "step": 7648 }, { "epoch": 2.4559319312891317, "grad_norm": 0.8743497133255005, "learning_rate": 3.1851238729848034e-05, "loss": 0.333, "step": 7649 }, { "epoch": 2.456253010113983, "grad_norm": 0.8867911696434021, "learning_rate": 3.1825697208383096e-05, "loss": 0.3217, "step": 7650 }, { "epoch": 2.4565740889388343, "grad_norm": 0.6222565770149231, "learning_rate": 3.1800163993750166e-05, "loss": 0.2614, "step": 7651 }, { "epoch": 2.456895167763686, "grad_norm": 1.0342620611190796, "learning_rate": 3.1774639089060363e-05, "loss": 0.3954, "step": 7652 }, { "epoch": 2.4572162465885374, "grad_norm": 0.7965120077133179, "learning_rate": 3.174912249742382e-05, "loss": 0.2804, "step": 7653 }, { "epoch": 2.457537325413389, "grad_norm": 0.7983464002609253, "learning_rate": 3.172361422194974e-05, "loss": 0.2956, "step": 7654 }, { "epoch": 2.4578584042382405, "grad_norm": 0.8190217018127441, "learning_rate": 3.1698114265746124e-05, "loss": 0.3554, "step": 7655 }, { "epoch": 2.458179483063092, "grad_norm": 0.7282566428184509, "learning_rate": 3.16726226319201e-05, "loss": 0.2899, "step": 7656 }, { "epoch": 2.4585005618879436, "grad_norm": 0.8753344416618347, "learning_rate": 3.164713932357776e-05, "loss": 0.347, "step": 7657 }, { "epoch": 2.458821640712795, "grad_norm": 0.8597754836082458, "learning_rate": 3.162166434382412e-05, "loss": 0.2803, "step": 7658 }, { "epoch": 2.4591427195376463, "grad_norm": 0.7598649263381958, "learning_rate": 3.159619769576333e-05, "loss": 0.2793, "step": 7659 }, { "epoch": 2.459463798362498, "grad_norm": 1.1359360218048096, "learning_rate": 3.157073938249829e-05, "loss": 0.3773, "step": 7660 }, { "epoch": 2.4597848771873494, "grad_norm": 1.1523734331130981, "learning_rate": 3.154528940713113e-05, "loss": 0.3819, "step": 7661 }, { "epoch": 2.460105956012201, "grad_norm": 0.9038000106811523, "learning_rate": 3.15198477727628e-05, "loss": 0.3285, "step": 7662 }, { "epoch": 2.4604270348370525, "grad_norm": 0.7829211354255676, "learning_rate": 3.1494414482493304e-05, "loss": 0.3033, "step": 7663 }, { "epoch": 2.460748113661904, "grad_norm": 0.8830636739730835, "learning_rate": 3.146898953942163e-05, "loss": 0.3084, "step": 7664 }, { "epoch": 2.4610691924867556, "grad_norm": 0.527309775352478, "learning_rate": 3.144357294664565e-05, "loss": 0.2242, "step": 7665 }, { "epoch": 2.461390271311607, "grad_norm": 1.0569745302200317, "learning_rate": 3.141816470726238e-05, "loss": 0.3431, "step": 7666 }, { "epoch": 2.4617113501364587, "grad_norm": 0.7063748240470886, "learning_rate": 3.1392764824367704e-05, "loss": 0.2735, "step": 7667 }, { "epoch": 2.46203242896131, "grad_norm": 0.5551251769065857, "learning_rate": 3.1367373301056536e-05, "loss": 0.2571, "step": 7668 }, { "epoch": 2.4623535077861614, "grad_norm": 0.8918856978416443, "learning_rate": 3.134199014042274e-05, "loss": 0.305, "step": 7669 }, { "epoch": 2.462674586611013, "grad_norm": 0.7437170743942261, "learning_rate": 3.1316615345559185e-05, "loss": 0.2788, "step": 7670 }, { "epoch": 2.4629956654358645, "grad_norm": 1.070360779762268, "learning_rate": 3.129124891955771e-05, "loss": 0.4669, "step": 7671 }, { "epoch": 2.463316744260716, "grad_norm": 0.8715602159500122, "learning_rate": 3.126589086550914e-05, "loss": 0.3444, "step": 7672 }, { "epoch": 2.4636378230855676, "grad_norm": 0.7617392539978027, "learning_rate": 3.124054118650327e-05, "loss": 0.2923, "step": 7673 }, { "epoch": 2.463958901910419, "grad_norm": 0.5233041644096375, "learning_rate": 3.12151998856289e-05, "loss": 0.2442, "step": 7674 }, { "epoch": 2.4642799807352707, "grad_norm": 0.776607871055603, "learning_rate": 3.1189866965973766e-05, "loss": 0.3125, "step": 7675 }, { "epoch": 2.464601059560122, "grad_norm": 0.5157756209373474, "learning_rate": 3.116454243062459e-05, "loss": 0.2563, "step": 7676 }, { "epoch": 2.4649221383849733, "grad_norm": 0.7321842312812805, "learning_rate": 3.113922628266718e-05, "loss": 0.3024, "step": 7677 }, { "epoch": 2.465243217209825, "grad_norm": 0.6504801511764526, "learning_rate": 3.111391852518611e-05, "loss": 0.3101, "step": 7678 }, { "epoch": 2.4655642960346764, "grad_norm": 0.39098212122917175, "learning_rate": 3.108861916126518e-05, "loss": 0.2716, "step": 7679 }, { "epoch": 2.465885374859528, "grad_norm": 0.5527970194816589, "learning_rate": 3.1063328193986904e-05, "loss": 0.8426, "step": 7680 }, { "epoch": 2.4662064536843795, "grad_norm": 0.3802039623260498, "learning_rate": 3.103804562643302e-05, "loss": 0.4868, "step": 7681 }, { "epoch": 2.466527532509231, "grad_norm": 0.40410691499710083, "learning_rate": 3.101277146168412e-05, "loss": 0.2456, "step": 7682 }, { "epoch": 2.4668486113340826, "grad_norm": 0.2699540853500366, "learning_rate": 3.098750570281969e-05, "loss": 0.0755, "step": 7683 }, { "epoch": 2.467169690158934, "grad_norm": 0.47554266452789307, "learning_rate": 3.096224835291839e-05, "loss": 0.1443, "step": 7684 }, { "epoch": 2.4674907689837857, "grad_norm": 0.6371931433677673, "learning_rate": 3.093699941505771e-05, "loss": 0.2333, "step": 7685 }, { "epoch": 2.467811847808637, "grad_norm": 0.8317315578460693, "learning_rate": 3.0911758892314166e-05, "loss": 0.4051, "step": 7686 }, { "epoch": 2.4681329266334884, "grad_norm": 0.8877337574958801, "learning_rate": 3.0886526787763234e-05, "loss": 0.4092, "step": 7687 }, { "epoch": 2.46845400545834, "grad_norm": 0.8633880019187927, "learning_rate": 3.086130310447937e-05, "loss": 0.4231, "step": 7688 }, { "epoch": 2.4687750842831915, "grad_norm": 0.7211251854896545, "learning_rate": 3.0836087845536e-05, "loss": 0.2923, "step": 7689 }, { "epoch": 2.469096163108043, "grad_norm": 0.9071685671806335, "learning_rate": 3.081088101400552e-05, "loss": 0.3378, "step": 7690 }, { "epoch": 2.4694172419328946, "grad_norm": 0.9691846370697021, "learning_rate": 3.078568261295933e-05, "loss": 0.3977, "step": 7691 }, { "epoch": 2.469738320757746, "grad_norm": 0.9273397326469421, "learning_rate": 3.0760492645467765e-05, "loss": 0.3731, "step": 7692 }, { "epoch": 2.4700593995825977, "grad_norm": 0.8951248526573181, "learning_rate": 3.073531111460013e-05, "loss": 0.4538, "step": 7693 }, { "epoch": 2.4703804784074492, "grad_norm": 0.868762731552124, "learning_rate": 3.071013802342475e-05, "loss": 0.374, "step": 7694 }, { "epoch": 2.4707015572323003, "grad_norm": 0.614288330078125, "learning_rate": 3.068497337500886e-05, "loss": 0.2634, "step": 7695 }, { "epoch": 2.471022636057152, "grad_norm": 1.016533374786377, "learning_rate": 3.0659817172418693e-05, "loss": 0.3648, "step": 7696 }, { "epoch": 2.4713437148820034, "grad_norm": 1.118348240852356, "learning_rate": 3.063466941871952e-05, "loss": 0.4014, "step": 7697 }, { "epoch": 2.471664793706855, "grad_norm": 0.9089675545692444, "learning_rate": 3.060953011697545e-05, "loss": 0.3507, "step": 7698 }, { "epoch": 2.4719858725317065, "grad_norm": 0.9801891446113586, "learning_rate": 3.058439927024962e-05, "loss": 0.388, "step": 7699 }, { "epoch": 2.472306951356558, "grad_norm": 1.0153359174728394, "learning_rate": 3.0559276881604236e-05, "loss": 0.4152, "step": 7700 }, { "epoch": 2.4726280301814096, "grad_norm": 0.6754663586616516, "learning_rate": 3.053416295410026e-05, "loss": 0.279, "step": 7701 }, { "epoch": 2.472949109006261, "grad_norm": 0.8051523566246033, "learning_rate": 3.0509057490797888e-05, "loss": 0.3163, "step": 7702 }, { "epoch": 2.4732701878311127, "grad_norm": 0.8289381861686707, "learning_rate": 3.0483960494756016e-05, "loss": 0.281, "step": 7703 }, { "epoch": 2.473591266655964, "grad_norm": 0.7152822613716125, "learning_rate": 3.045887196903271e-05, "loss": 0.3059, "step": 7704 }, { "epoch": 2.4739123454808154, "grad_norm": 1.1248416900634766, "learning_rate": 3.0433791916684916e-05, "loss": 0.4753, "step": 7705 }, { "epoch": 2.474233424305667, "grad_norm": 1.4356248378753662, "learning_rate": 3.0408720340768572e-05, "loss": 0.5532, "step": 7706 }, { "epoch": 2.4745545031305185, "grad_norm": 0.6347241401672363, "learning_rate": 3.038365724433858e-05, "loss": 0.2673, "step": 7707 }, { "epoch": 2.47487558195537, "grad_norm": 0.8377917408943176, "learning_rate": 3.035860263044873e-05, "loss": 0.3614, "step": 7708 }, { "epoch": 2.4751966607802216, "grad_norm": 0.866231381893158, "learning_rate": 3.0333556502151926e-05, "loss": 0.318, "step": 7709 }, { "epoch": 2.475517739605073, "grad_norm": 0.899551272392273, "learning_rate": 3.0308518862499957e-05, "loss": 0.4259, "step": 7710 }, { "epoch": 2.4758388184299247, "grad_norm": 0.8847672343254089, "learning_rate": 3.0283489714543556e-05, "loss": 0.3208, "step": 7711 }, { "epoch": 2.4761598972547763, "grad_norm": 1.1146432161331177, "learning_rate": 3.0258469061332463e-05, "loss": 0.4248, "step": 7712 }, { "epoch": 2.4764809760796274, "grad_norm": 0.6833528876304626, "learning_rate": 3.023345690591537e-05, "loss": 0.2858, "step": 7713 }, { "epoch": 2.476802054904479, "grad_norm": 0.6902426481246948, "learning_rate": 3.0208453251339885e-05, "loss": 0.3027, "step": 7714 }, { "epoch": 2.4771231337293305, "grad_norm": 0.7399451732635498, "learning_rate": 3.018345810065275e-05, "loss": 0.2832, "step": 7715 }, { "epoch": 2.477444212554182, "grad_norm": 0.6635550856590271, "learning_rate": 3.0158471456899428e-05, "loss": 0.2934, "step": 7716 }, { "epoch": 2.4777652913790336, "grad_norm": 0.7805300354957581, "learning_rate": 3.0133493323124505e-05, "loss": 0.3349, "step": 7717 }, { "epoch": 2.478086370203885, "grad_norm": 1.5365228652954102, "learning_rate": 3.0108523702371505e-05, "loss": 0.3909, "step": 7718 }, { "epoch": 2.4784074490287367, "grad_norm": 0.8297216892242432, "learning_rate": 3.0083562597682847e-05, "loss": 0.2861, "step": 7719 }, { "epoch": 2.478728527853588, "grad_norm": 0.5565605163574219, "learning_rate": 3.0058610012100074e-05, "loss": 0.2976, "step": 7720 }, { "epoch": 2.4790496066784398, "grad_norm": 1.5360130071640015, "learning_rate": 3.0033665948663448e-05, "loss": 0.3868, "step": 7721 }, { "epoch": 2.479370685503291, "grad_norm": 0.8052739500999451, "learning_rate": 3.0008730410412466e-05, "loss": 0.3711, "step": 7722 }, { "epoch": 2.4796917643281424, "grad_norm": 0.7085245847702026, "learning_rate": 2.9983803400385312e-05, "loss": 0.3142, "step": 7723 }, { "epoch": 2.480012843152994, "grad_norm": 0.939500629901886, "learning_rate": 2.9958884921619367e-05, "loss": 0.3342, "step": 7724 }, { "epoch": 2.4803339219778455, "grad_norm": 0.8791356682777405, "learning_rate": 2.993397497715086e-05, "loss": 0.2739, "step": 7725 }, { "epoch": 2.480655000802697, "grad_norm": 0.8108116984367371, "learning_rate": 2.9909073570014912e-05, "loss": 0.301, "step": 7726 }, { "epoch": 2.4809760796275486, "grad_norm": 0.6944962739944458, "learning_rate": 2.9884180703245767e-05, "loss": 0.3222, "step": 7727 }, { "epoch": 2.4812971584524, "grad_norm": 0.3254513144493103, "learning_rate": 2.9859296379876523e-05, "loss": 0.2407, "step": 7728 }, { "epoch": 2.4816182372772517, "grad_norm": 0.7786895632743835, "learning_rate": 2.9834420602939263e-05, "loss": 0.3052, "step": 7729 }, { "epoch": 2.4819393161021033, "grad_norm": 0.7794302701950073, "learning_rate": 2.9809553375465004e-05, "loss": 0.7187, "step": 7730 }, { "epoch": 2.4822603949269544, "grad_norm": 0.33989983797073364, "learning_rate": 2.9784694700483762e-05, "loss": 0.2142, "step": 7731 }, { "epoch": 2.482581473751806, "grad_norm": 0.5084500908851624, "learning_rate": 2.9759844581024486e-05, "loss": 0.2271, "step": 7732 }, { "epoch": 2.4829025525766575, "grad_norm": 0.4874570667743683, "learning_rate": 2.9735003020115092e-05, "loss": 0.0856, "step": 7733 }, { "epoch": 2.483223631401509, "grad_norm": 0.3979865610599518, "learning_rate": 2.9710170020782435e-05, "loss": 0.188, "step": 7734 }, { "epoch": 2.4835447102263606, "grad_norm": 0.5099960565567017, "learning_rate": 2.968534558605236e-05, "loss": 0.3362, "step": 7735 }, { "epoch": 2.483865789051212, "grad_norm": 0.5169116854667664, "learning_rate": 2.9660529718949627e-05, "loss": 0.3097, "step": 7736 }, { "epoch": 2.4841868678760637, "grad_norm": 0.9039494395256042, "learning_rate": 2.9635722422497993e-05, "loss": 0.4115, "step": 7737 }, { "epoch": 2.4845079467009152, "grad_norm": 0.8627937436103821, "learning_rate": 2.961092369972014e-05, "loss": 0.4387, "step": 7738 }, { "epoch": 2.484829025525767, "grad_norm": 0.8400534391403198, "learning_rate": 2.9586133553637683e-05, "loss": 0.4016, "step": 7739 }, { "epoch": 2.485150104350618, "grad_norm": 0.7134156823158264, "learning_rate": 2.9561351987271334e-05, "loss": 0.3185, "step": 7740 }, { "epoch": 2.4854711831754694, "grad_norm": 0.8354820609092712, "learning_rate": 2.953657900364053e-05, "loss": 0.4111, "step": 7741 }, { "epoch": 2.485792262000321, "grad_norm": 0.66590815782547, "learning_rate": 2.9511814605763855e-05, "loss": 0.2975, "step": 7742 }, { "epoch": 2.4861133408251725, "grad_norm": 0.7249523997306824, "learning_rate": 2.9487058796658783e-05, "loss": 0.3311, "step": 7743 }, { "epoch": 2.486434419650024, "grad_norm": 0.6020844578742981, "learning_rate": 2.9462311579341663e-05, "loss": 0.252, "step": 7744 }, { "epoch": 2.4867554984748756, "grad_norm": 0.8182882070541382, "learning_rate": 2.9437572956827964e-05, "loss": 0.3021, "step": 7745 }, { "epoch": 2.487076577299727, "grad_norm": 0.6254271268844604, "learning_rate": 2.94128429321319e-05, "loss": 0.2792, "step": 7746 }, { "epoch": 2.4873976561245787, "grad_norm": 0.83849036693573, "learning_rate": 2.938812150826684e-05, "loss": 0.3111, "step": 7747 }, { "epoch": 2.4877187349494303, "grad_norm": 1.0279197692871094, "learning_rate": 2.9363408688245e-05, "loss": 0.3787, "step": 7748 }, { "epoch": 2.4880398137742814, "grad_norm": 0.6685528755187988, "learning_rate": 2.933870447507753e-05, "loss": 0.3001, "step": 7749 }, { "epoch": 2.488360892599133, "grad_norm": 0.9342240691184998, "learning_rate": 2.931400887177459e-05, "loss": 0.3955, "step": 7750 }, { "epoch": 2.4886819714239845, "grad_norm": 0.6949548125267029, "learning_rate": 2.9289321881345254e-05, "loss": 0.3251, "step": 7751 }, { "epoch": 2.489003050248836, "grad_norm": 1.0305014848709106, "learning_rate": 2.926464350679756e-05, "loss": 0.3924, "step": 7752 }, { "epoch": 2.4893241290736876, "grad_norm": 0.8009200692176819, "learning_rate": 2.9239973751138495e-05, "loss": 0.3076, "step": 7753 }, { "epoch": 2.489645207898539, "grad_norm": 0.944476306438446, "learning_rate": 2.921531261737398e-05, "loss": 0.3573, "step": 7754 }, { "epoch": 2.4899662867233907, "grad_norm": 1.1194026470184326, "learning_rate": 2.9190660108508917e-05, "loss": 0.419, "step": 7755 }, { "epoch": 2.4902873655482423, "grad_norm": 0.9375110864639282, "learning_rate": 2.9166016227547133e-05, "loss": 0.3467, "step": 7756 }, { "epoch": 2.490608444373094, "grad_norm": 0.7370654344558716, "learning_rate": 2.9141380977491373e-05, "loss": 0.3359, "step": 7757 }, { "epoch": 2.490929523197945, "grad_norm": 0.9551532864570618, "learning_rate": 2.911675436134347e-05, "loss": 0.3477, "step": 7758 }, { "epoch": 2.4912506020227965, "grad_norm": 0.8018618226051331, "learning_rate": 2.9092136382103973e-05, "loss": 0.2881, "step": 7759 }, { "epoch": 2.491571680847648, "grad_norm": 0.8496661186218262, "learning_rate": 2.9067527042772636e-05, "loss": 0.3263, "step": 7760 }, { "epoch": 2.4918927596724996, "grad_norm": 0.5800387859344482, "learning_rate": 2.904292634634793e-05, "loss": 0.2534, "step": 7761 }, { "epoch": 2.492213838497351, "grad_norm": 0.9090917706489563, "learning_rate": 2.9018334295827388e-05, "loss": 0.3376, "step": 7762 }, { "epoch": 2.4925349173222027, "grad_norm": 0.84961998462677, "learning_rate": 2.899375089420756e-05, "loss": 0.306, "step": 7763 }, { "epoch": 2.492855996147054, "grad_norm": 0.7612555623054504, "learning_rate": 2.8969176144483744e-05, "loss": 0.2584, "step": 7764 }, { "epoch": 2.4931770749719058, "grad_norm": 0.7672126293182373, "learning_rate": 2.894461004965038e-05, "loss": 0.2945, "step": 7765 }, { "epoch": 2.4934981537967573, "grad_norm": 1.0515177249908447, "learning_rate": 2.8920052612700754e-05, "loss": 0.3602, "step": 7766 }, { "epoch": 2.4938192326216084, "grad_norm": 0.710355818271637, "learning_rate": 2.8895503836627103e-05, "loss": 0.2777, "step": 7767 }, { "epoch": 2.49414031144646, "grad_norm": 0.9773350954055786, "learning_rate": 2.887096372442063e-05, "loss": 0.3712, "step": 7768 }, { "epoch": 2.4944613902713115, "grad_norm": 0.9016016125679016, "learning_rate": 2.8846432279071467e-05, "loss": 0.3312, "step": 7769 }, { "epoch": 2.494782469096163, "grad_norm": 0.8454534411430359, "learning_rate": 2.88219095035687e-05, "loss": 0.3164, "step": 7770 }, { "epoch": 2.4951035479210146, "grad_norm": 0.6082969307899475, "learning_rate": 2.879739540090036e-05, "loss": 0.2435, "step": 7771 }, { "epoch": 2.495424626745866, "grad_norm": 1.3337501287460327, "learning_rate": 2.877288997405341e-05, "loss": 0.315, "step": 7772 }, { "epoch": 2.4957457055707177, "grad_norm": 0.6798154711723328, "learning_rate": 2.874839322601375e-05, "loss": 0.2806, "step": 7773 }, { "epoch": 2.4960667843955693, "grad_norm": 1.1664059162139893, "learning_rate": 2.872390515976625e-05, "loss": 0.3415, "step": 7774 }, { "epoch": 2.496387863220421, "grad_norm": 0.40442001819610596, "learning_rate": 2.869942577829471e-05, "loss": 0.2215, "step": 7775 }, { "epoch": 2.496708942045272, "grad_norm": 0.6790067553520203, "learning_rate": 2.8674955084581857e-05, "loss": 0.2721, "step": 7776 }, { "epoch": 2.4970300208701235, "grad_norm": 0.6047253608703613, "learning_rate": 2.865049308160934e-05, "loss": 0.2712, "step": 7777 }, { "epoch": 2.497351099694975, "grad_norm": 0.2929036617279053, "learning_rate": 2.8626039772357882e-05, "loss": 0.2281, "step": 7778 }, { "epoch": 2.4976721785198266, "grad_norm": 0.517440140247345, "learning_rate": 2.860159515980695e-05, "loss": 0.2949, "step": 7779 }, { "epoch": 2.497993257344678, "grad_norm": 0.5559537410736084, "learning_rate": 2.8577159246935037e-05, "loss": 0.6561, "step": 7780 }, { "epoch": 2.4983143361695297, "grad_norm": 0.4277403652667999, "learning_rate": 2.8552732036719687e-05, "loss": 0.5281, "step": 7781 }, { "epoch": 2.4986354149943812, "grad_norm": 0.5072173476219177, "learning_rate": 2.852831353213715e-05, "loss": 0.5995, "step": 7782 }, { "epoch": 2.498956493819233, "grad_norm": 0.4317438006401062, "learning_rate": 2.8503903736162875e-05, "loss": 0.1693, "step": 7783 }, { "epoch": 2.4992775726440843, "grad_norm": 0.39404723048210144, "learning_rate": 2.8479502651770995e-05, "loss": 0.1665, "step": 7784 }, { "epoch": 2.4995986514689355, "grad_norm": 0.3772273659706116, "learning_rate": 2.8455110281934803e-05, "loss": 0.1733, "step": 7785 }, { "epoch": 2.499919730293787, "grad_norm": 0.2818797528743744, "learning_rate": 2.8430726629626413e-05, "loss": 0.0803, "step": 7786 }, { "epoch": 2.5002408091186386, "grad_norm": 0.6350283026695251, "learning_rate": 2.840635169781688e-05, "loss": 0.3251, "step": 7787 }, { "epoch": 2.50056188794349, "grad_norm": 0.8361577987670898, "learning_rate": 2.838198548947627e-05, "loss": 0.4189, "step": 7788 }, { "epoch": 2.5008829667683417, "grad_norm": 0.7367003560066223, "learning_rate": 2.835762800757341e-05, "loss": 0.2988, "step": 7789 }, { "epoch": 2.501204045593193, "grad_norm": 0.8890562057495117, "learning_rate": 2.8333279255076306e-05, "loss": 0.4109, "step": 7790 }, { "epoch": 2.5015251244180448, "grad_norm": 0.8579180836677551, "learning_rate": 2.8308939234951726e-05, "loss": 0.3457, "step": 7791 }, { "epoch": 2.501846203242896, "grad_norm": 0.8420111536979675, "learning_rate": 2.8284607950165442e-05, "loss": 0.3082, "step": 7792 }, { "epoch": 2.502167282067748, "grad_norm": 0.78440922498703, "learning_rate": 2.826028540368215e-05, "loss": 0.2953, "step": 7793 }, { "epoch": 2.502488360892599, "grad_norm": 0.5694653391838074, "learning_rate": 2.823597159846547e-05, "loss": 0.2637, "step": 7794 }, { "epoch": 2.5028094397174505, "grad_norm": 0.77173912525177, "learning_rate": 2.8211666537477933e-05, "loss": 0.3009, "step": 7795 }, { "epoch": 2.503130518542302, "grad_norm": 0.9021281599998474, "learning_rate": 2.8187370223681132e-05, "loss": 0.3543, "step": 7796 }, { "epoch": 2.5034515973671536, "grad_norm": 0.8287353515625, "learning_rate": 2.816308266003541e-05, "loss": 0.307, "step": 7797 }, { "epoch": 2.503772676192005, "grad_norm": 1.0291224718093872, "learning_rate": 2.813880384950016e-05, "loss": 0.419, "step": 7798 }, { "epoch": 2.5040937550168567, "grad_norm": 0.7518448233604431, "learning_rate": 2.8114533795033683e-05, "loss": 0.3259, "step": 7799 }, { "epoch": 2.5044148338417083, "grad_norm": 0.9459495544433594, "learning_rate": 2.8090272499593173e-05, "loss": 0.3573, "step": 7800 }, { "epoch": 2.5047359126665594, "grad_norm": 0.5885202884674072, "learning_rate": 2.8066019966134904e-05, "loss": 0.2741, "step": 7801 }, { "epoch": 2.5050569914914114, "grad_norm": 0.9306980967521667, "learning_rate": 2.8041776197613844e-05, "loss": 0.3992, "step": 7802 }, { "epoch": 2.5053780703162625, "grad_norm": 0.9558607339859009, "learning_rate": 2.8017541196984142e-05, "loss": 0.3941, "step": 7803 }, { "epoch": 2.505699149141114, "grad_norm": 0.9242830276489258, "learning_rate": 2.7993314967198635e-05, "loss": 0.3423, "step": 7804 }, { "epoch": 2.5060202279659656, "grad_norm": 0.6770075559616089, "learning_rate": 2.7969097511209308e-05, "loss": 0.273, "step": 7805 }, { "epoch": 2.506341306790817, "grad_norm": 0.7561593055725098, "learning_rate": 2.7944888831966987e-05, "loss": 0.2808, "step": 7806 }, { "epoch": 2.5066623856156687, "grad_norm": 0.8181188702583313, "learning_rate": 2.7920688932421335e-05, "loss": 0.3705, "step": 7807 }, { "epoch": 2.5069834644405202, "grad_norm": 0.762374758720398, "learning_rate": 2.7896497815521128e-05, "loss": 0.3188, "step": 7808 }, { "epoch": 2.5073045432653718, "grad_norm": 1.0830848217010498, "learning_rate": 2.7872315484213952e-05, "loss": 0.4268, "step": 7809 }, { "epoch": 2.507625622090223, "grad_norm": 1.1398093700408936, "learning_rate": 2.7848141941446347e-05, "loss": 0.4128, "step": 7810 }, { "epoch": 2.507946700915075, "grad_norm": 0.8498218059539795, "learning_rate": 2.7823977190163786e-05, "loss": 0.2717, "step": 7811 }, { "epoch": 2.508267779739926, "grad_norm": 1.4102894067764282, "learning_rate": 2.7799821233310674e-05, "loss": 0.4987, "step": 7812 }, { "epoch": 2.5085888585647775, "grad_norm": 1.8042616844177246, "learning_rate": 2.7775674073830337e-05, "loss": 0.3982, "step": 7813 }, { "epoch": 2.508909937389629, "grad_norm": 0.7573620080947876, "learning_rate": 2.775153571466502e-05, "loss": 0.3681, "step": 7814 }, { "epoch": 2.5092310162144806, "grad_norm": 1.1396507024765015, "learning_rate": 2.772740615875594e-05, "loss": 0.2719, "step": 7815 }, { "epoch": 2.509552095039332, "grad_norm": 0.5033369064331055, "learning_rate": 2.770328540904319e-05, "loss": 0.2497, "step": 7816 }, { "epoch": 2.5098731738641837, "grad_norm": 0.7801833152770996, "learning_rate": 2.7679173468465812e-05, "loss": 0.2883, "step": 7817 }, { "epoch": 2.5101942526890353, "grad_norm": 0.8423357009887695, "learning_rate": 2.7655070339961776e-05, "loss": 0.3138, "step": 7818 }, { "epoch": 2.5105153315138864, "grad_norm": 0.672028124332428, "learning_rate": 2.7630976026467968e-05, "loss": 0.2912, "step": 7819 }, { "epoch": 2.5108364103387384, "grad_norm": 0.8141403198242188, "learning_rate": 2.7606890530920195e-05, "loss": 0.3421, "step": 7820 }, { "epoch": 2.5111574891635895, "grad_norm": 0.9063519239425659, "learning_rate": 2.7582813856253275e-05, "loss": 0.3336, "step": 7821 }, { "epoch": 2.511478567988441, "grad_norm": 0.767846941947937, "learning_rate": 2.755874600540078e-05, "loss": 0.3033, "step": 7822 }, { "epoch": 2.5117996468132926, "grad_norm": 2.2708170413970947, "learning_rate": 2.753468698129533e-05, "loss": 0.4001, "step": 7823 }, { "epoch": 2.512120725638144, "grad_norm": 1.0831410884857178, "learning_rate": 2.7510636786868514e-05, "loss": 0.3136, "step": 7824 }, { "epoch": 2.5124418044629957, "grad_norm": 0.8670080900192261, "learning_rate": 2.7486595425050665e-05, "loss": 0.3511, "step": 7825 }, { "epoch": 2.5127628832878472, "grad_norm": 0.7800338268280029, "learning_rate": 2.746256289877126e-05, "loss": 0.3466, "step": 7826 }, { "epoch": 2.513083962112699, "grad_norm": 0.871461033821106, "learning_rate": 2.743853921095848e-05, "loss": 0.2794, "step": 7827 }, { "epoch": 2.51340504093755, "grad_norm": 0.7801231741905212, "learning_rate": 2.741452436453963e-05, "loss": 0.3329, "step": 7828 }, { "epoch": 2.513726119762402, "grad_norm": 0.9049973487854004, "learning_rate": 2.7390518362440808e-05, "loss": 0.3623, "step": 7829 }, { "epoch": 2.514047198587253, "grad_norm": 0.6395286321640015, "learning_rate": 2.736652120758708e-05, "loss": 0.7761, "step": 7830 }, { "epoch": 2.5143682774121046, "grad_norm": 0.4424824118614197, "learning_rate": 2.734253290290242e-05, "loss": 0.6206, "step": 7831 }, { "epoch": 2.514689356236956, "grad_norm": 0.5469640493392944, "learning_rate": 2.7318553451309726e-05, "loss": 0.3887, "step": 7832 }, { "epoch": 2.5150104350618077, "grad_norm": 0.3367752134799957, "learning_rate": 2.7294582855730832e-05, "loss": 0.0776, "step": 7833 }, { "epoch": 2.515331513886659, "grad_norm": 0.5025734305381775, "learning_rate": 2.727062111908647e-05, "loss": 0.2786, "step": 7834 }, { "epoch": 2.5156525927115108, "grad_norm": 0.4243975281715393, "learning_rate": 2.7246668244296323e-05, "loss": 0.2066, "step": 7835 }, { "epoch": 2.5159736715363623, "grad_norm": 0.6841888427734375, "learning_rate": 2.722272423427896e-05, "loss": 0.3834, "step": 7836 }, { "epoch": 2.5162947503612134, "grad_norm": 0.6961179375648499, "learning_rate": 2.7198789091951902e-05, "loss": 0.4068, "step": 7837 }, { "epoch": 2.5166158291860654, "grad_norm": 0.9804593920707703, "learning_rate": 2.717486282023153e-05, "loss": 0.3961, "step": 7838 }, { "epoch": 2.5169369080109165, "grad_norm": 0.7120915651321411, "learning_rate": 2.715094542203327e-05, "loss": 0.3416, "step": 7839 }, { "epoch": 2.517257986835768, "grad_norm": 0.7814658284187317, "learning_rate": 2.7127036900271317e-05, "loss": 0.3016, "step": 7840 }, { "epoch": 2.5175790656606196, "grad_norm": 0.5942254662513733, "learning_rate": 2.7103137257858868e-05, "loss": 0.281, "step": 7841 }, { "epoch": 2.517900144485471, "grad_norm": 0.8657404780387878, "learning_rate": 2.707924649770802e-05, "loss": 0.3926, "step": 7842 }, { "epoch": 2.5182212233103227, "grad_norm": 0.7496175765991211, "learning_rate": 2.7055364622729773e-05, "loss": 0.3238, "step": 7843 }, { "epoch": 2.5185423021351743, "grad_norm": 0.9622612595558167, "learning_rate": 2.7031491635834137e-05, "loss": 0.3779, "step": 7844 }, { "epoch": 2.518863380960026, "grad_norm": 1.0655999183654785, "learning_rate": 2.700762753992985e-05, "loss": 0.2991, "step": 7845 }, { "epoch": 2.519184459784877, "grad_norm": 0.988420844078064, "learning_rate": 2.698377233792476e-05, "loss": 0.4061, "step": 7846 }, { "epoch": 2.519505538609729, "grad_norm": 0.6470503211021423, "learning_rate": 2.6959926032725535e-05, "loss": 0.2876, "step": 7847 }, { "epoch": 2.51982661743458, "grad_norm": 0.7641369104385376, "learning_rate": 2.6936088627237765e-05, "loss": 0.2941, "step": 7848 }, { "epoch": 2.5201476962594316, "grad_norm": 0.9086601138114929, "learning_rate": 2.6912260124366006e-05, "loss": 0.3384, "step": 7849 }, { "epoch": 2.520468775084283, "grad_norm": 0.7186222672462463, "learning_rate": 2.688844052701359e-05, "loss": 0.2905, "step": 7850 }, { "epoch": 2.5207898539091347, "grad_norm": 0.9355291724205017, "learning_rate": 2.6864629838082956e-05, "loss": 0.4228, "step": 7851 }, { "epoch": 2.5211109327339862, "grad_norm": 0.6973735690116882, "learning_rate": 2.6840828060475332e-05, "loss": 0.2988, "step": 7852 }, { "epoch": 2.521432011558838, "grad_norm": 0.8942835330963135, "learning_rate": 2.681703519709089e-05, "loss": 0.3534, "step": 7853 }, { "epoch": 2.5217530903836893, "grad_norm": 0.9520301222801208, "learning_rate": 2.679325125082872e-05, "loss": 0.3538, "step": 7854 }, { "epoch": 2.5220741692085404, "grad_norm": 0.8248050212860107, "learning_rate": 2.676947622458683e-05, "loss": 0.332, "step": 7855 }, { "epoch": 2.5223952480333924, "grad_norm": 1.0398519039154053, "learning_rate": 2.6745710121262136e-05, "loss": 0.4481, "step": 7856 }, { "epoch": 2.5227163268582435, "grad_norm": 0.9661288857460022, "learning_rate": 2.672195294375045e-05, "loss": 0.3479, "step": 7857 }, { "epoch": 2.523037405683095, "grad_norm": 0.7361083030700684, "learning_rate": 2.6698204694946527e-05, "loss": 0.2805, "step": 7858 }, { "epoch": 2.5233584845079466, "grad_norm": 0.7523530125617981, "learning_rate": 2.6674465377744017e-05, "loss": 0.2939, "step": 7859 }, { "epoch": 2.523679563332798, "grad_norm": 1.0147556066513062, "learning_rate": 2.6650734995035477e-05, "loss": 0.3913, "step": 7860 }, { "epoch": 2.5240006421576497, "grad_norm": 0.7158978581428528, "learning_rate": 2.6627013549712355e-05, "loss": 0.2834, "step": 7861 }, { "epoch": 2.5243217209825013, "grad_norm": 0.8579987287521362, "learning_rate": 2.660330104466513e-05, "loss": 0.3325, "step": 7862 }, { "epoch": 2.524642799807353, "grad_norm": 0.8115719556808472, "learning_rate": 2.657959748278297e-05, "loss": 0.3175, "step": 7863 }, { "epoch": 2.524963878632204, "grad_norm": 0.7678377032279968, "learning_rate": 2.655590286695422e-05, "loss": 0.3265, "step": 7864 }, { "epoch": 2.525284957457056, "grad_norm": 0.8553791642189026, "learning_rate": 2.6532217200065858e-05, "loss": 0.3043, "step": 7865 }, { "epoch": 2.525606036281907, "grad_norm": 0.8253560662269592, "learning_rate": 2.6508540485004006e-05, "loss": 0.2808, "step": 7866 }, { "epoch": 2.5259271151067586, "grad_norm": 0.8303504586219788, "learning_rate": 2.6484872724653608e-05, "loss": 0.3263, "step": 7867 }, { "epoch": 2.52624819393161, "grad_norm": 0.8876441717147827, "learning_rate": 2.646121392189841e-05, "loss": 0.3131, "step": 7868 }, { "epoch": 2.5265692727564617, "grad_norm": 0.9586053490638733, "learning_rate": 2.6437564079621267e-05, "loss": 0.3133, "step": 7869 }, { "epoch": 2.5268903515813133, "grad_norm": 1.3426408767700195, "learning_rate": 2.6413923200703794e-05, "loss": 0.3845, "step": 7870 }, { "epoch": 2.527211430406165, "grad_norm": 0.7227518558502197, "learning_rate": 2.639029128802657e-05, "loss": 0.2797, "step": 7871 }, { "epoch": 2.5275325092310164, "grad_norm": 0.6822044253349304, "learning_rate": 2.636666834446907e-05, "loss": 0.2739, "step": 7872 }, { "epoch": 2.5278535880558675, "grad_norm": 0.6981026530265808, "learning_rate": 2.634305437290968e-05, "loss": 0.2966, "step": 7873 }, { "epoch": 2.5281746668807195, "grad_norm": 0.6758238673210144, "learning_rate": 2.631944937622569e-05, "loss": 0.2845, "step": 7874 }, { "epoch": 2.5284957457055706, "grad_norm": 0.8002867698669434, "learning_rate": 2.6295853357293298e-05, "loss": 0.3453, "step": 7875 }, { "epoch": 2.528816824530422, "grad_norm": 0.7498476505279541, "learning_rate": 2.6272266318987603e-05, "loss": 0.3061, "step": 7876 }, { "epoch": 2.5291379033552737, "grad_norm": 0.633752703666687, "learning_rate": 2.624868826418262e-05, "loss": 0.2688, "step": 7877 }, { "epoch": 2.529458982180125, "grad_norm": 0.6049948334693909, "learning_rate": 2.6225119195751258e-05, "loss": 0.2902, "step": 7878 }, { "epoch": 2.5297800610049768, "grad_norm": 0.6882827877998352, "learning_rate": 2.6201559116565345e-05, "loss": 0.3093, "step": 7879 }, { "epoch": 2.5301011398298283, "grad_norm": 0.4292110800743103, "learning_rate": 2.6178008029495592e-05, "loss": 0.7076, "step": 7880 }, { "epoch": 2.53042221865468, "grad_norm": 0.42428478598594666, "learning_rate": 2.615446593741161e-05, "loss": 0.3113, "step": 7881 }, { "epoch": 2.530743297479531, "grad_norm": 0.4815612733364105, "learning_rate": 2.613093284318201e-05, "loss": 0.3371, "step": 7882 }, { "epoch": 2.531064376304383, "grad_norm": 0.5496947765350342, "learning_rate": 2.6107408749674122e-05, "loss": 0.2977, "step": 7883 }, { "epoch": 2.531385455129234, "grad_norm": 0.33660855889320374, "learning_rate": 2.6083893659754356e-05, "loss": 0.0772, "step": 7884 }, { "epoch": 2.5317065339540856, "grad_norm": 0.40665629506111145, "learning_rate": 2.606038757628798e-05, "loss": 0.1418, "step": 7885 }, { "epoch": 2.532027612778937, "grad_norm": 0.2342071682214737, "learning_rate": 2.603689050213902e-05, "loss": 0.0695, "step": 7886 }, { "epoch": 2.5323486916037887, "grad_norm": 0.29153773188591003, "learning_rate": 2.6013402440170676e-05, "loss": 0.116, "step": 7887 }, { "epoch": 2.5326697704286403, "grad_norm": 1.029974341392517, "learning_rate": 2.5989923393244742e-05, "loss": 0.4608, "step": 7888 }, { "epoch": 2.532990849253492, "grad_norm": 0.7505784034729004, "learning_rate": 2.5966453364222186e-05, "loss": 0.3698, "step": 7889 }, { "epoch": 2.5333119280783434, "grad_norm": 0.8639885783195496, "learning_rate": 2.5942992355962727e-05, "loss": 0.313, "step": 7890 }, { "epoch": 2.5336330069031945, "grad_norm": 0.9737856388092041, "learning_rate": 2.5919540371325e-05, "loss": 0.431, "step": 7891 }, { "epoch": 2.5339540857280465, "grad_norm": 0.6815305352210999, "learning_rate": 2.5896097413166564e-05, "loss": 0.2643, "step": 7892 }, { "epoch": 2.5342751645528976, "grad_norm": 0.9808143377304077, "learning_rate": 2.5872663484343884e-05, "loss": 0.4601, "step": 7893 }, { "epoch": 2.534596243377749, "grad_norm": 0.9797862768173218, "learning_rate": 2.584923858771231e-05, "loss": 0.3746, "step": 7894 }, { "epoch": 2.5349173222026007, "grad_norm": 0.7613912224769592, "learning_rate": 2.582582272612609e-05, "loss": 0.3253, "step": 7895 }, { "epoch": 2.5352384010274522, "grad_norm": 0.8609127402305603, "learning_rate": 2.580241590243837e-05, "loss": 0.3091, "step": 7896 }, { "epoch": 2.535559479852304, "grad_norm": 0.8038222789764404, "learning_rate": 2.5779018119501208e-05, "loss": 0.3423, "step": 7897 }, { "epoch": 2.5358805586771553, "grad_norm": 0.8532456159591675, "learning_rate": 2.575562938016556e-05, "loss": 0.3167, "step": 7898 }, { "epoch": 2.536201637502007, "grad_norm": 0.8715474009513855, "learning_rate": 2.573224968728123e-05, "loss": 0.3521, "step": 7899 }, { "epoch": 2.536522716326858, "grad_norm": 0.95112144947052, "learning_rate": 2.5708879043697054e-05, "loss": 0.3424, "step": 7900 }, { "epoch": 2.53684379515171, "grad_norm": 0.8659119009971619, "learning_rate": 2.5685517452260567e-05, "loss": 0.3874, "step": 7901 }, { "epoch": 2.537164873976561, "grad_norm": 0.9276009798049927, "learning_rate": 2.566216491581841e-05, "loss": 0.3019, "step": 7902 }, { "epoch": 2.5374859528014126, "grad_norm": 0.8854154944419861, "learning_rate": 2.5638821437215944e-05, "loss": 0.3359, "step": 7903 }, { "epoch": 2.537807031626264, "grad_norm": 0.9696468114852905, "learning_rate": 2.561548701929749e-05, "loss": 0.4029, "step": 7904 }, { "epoch": 2.5381281104511157, "grad_norm": 0.8522638082504272, "learning_rate": 2.5592161664906368e-05, "loss": 0.3034, "step": 7905 }, { "epoch": 2.5384491892759673, "grad_norm": 0.8860786557197571, "learning_rate": 2.5568845376884587e-05, "loss": 0.2844, "step": 7906 }, { "epoch": 2.538770268100819, "grad_norm": 0.919465184211731, "learning_rate": 2.554553815807328e-05, "loss": 0.3504, "step": 7907 }, { "epoch": 2.5390913469256704, "grad_norm": 0.8986783623695374, "learning_rate": 2.5522240011312247e-05, "loss": 0.3556, "step": 7908 }, { "epoch": 2.5394124257505215, "grad_norm": 0.5819917321205139, "learning_rate": 2.549895093944039e-05, "loss": 0.2474, "step": 7909 }, { "epoch": 2.5397335045753735, "grad_norm": 0.701481282711029, "learning_rate": 2.547567094529537e-05, "loss": 0.2874, "step": 7910 }, { "epoch": 2.5400545834002246, "grad_norm": 0.9206550121307373, "learning_rate": 2.5452400031713785e-05, "loss": 0.327, "step": 7911 }, { "epoch": 2.540375662225076, "grad_norm": 0.8680040836334229, "learning_rate": 2.542913820153113e-05, "loss": 0.3246, "step": 7912 }, { "epoch": 2.5406967410499277, "grad_norm": 0.9277855157852173, "learning_rate": 2.540588545758179e-05, "loss": 0.2907, "step": 7913 }, { "epoch": 2.5410178198747793, "grad_norm": 0.8211881518363953, "learning_rate": 2.5382641802699035e-05, "loss": 0.2917, "step": 7914 }, { "epoch": 2.541338898699631, "grad_norm": 1.1107295751571655, "learning_rate": 2.535940723971505e-05, "loss": 0.4215, "step": 7915 }, { "epoch": 2.5416599775244824, "grad_norm": 0.9217866659164429, "learning_rate": 2.5336181771460876e-05, "loss": 0.3113, "step": 7916 }, { "epoch": 2.541981056349334, "grad_norm": 0.8937177062034607, "learning_rate": 2.5312965400766474e-05, "loss": 0.2755, "step": 7917 }, { "epoch": 2.542302135174185, "grad_norm": 0.5329973697662354, "learning_rate": 2.5289758130460683e-05, "loss": 0.2237, "step": 7918 }, { "epoch": 2.542623213999037, "grad_norm": 0.9037802815437317, "learning_rate": 2.5266559963371216e-05, "loss": 0.3482, "step": 7919 }, { "epoch": 2.542944292823888, "grad_norm": 1.0273380279541016, "learning_rate": 2.5243370902324792e-05, "loss": 0.3545, "step": 7920 }, { "epoch": 2.5432653716487397, "grad_norm": 0.8402252197265625, "learning_rate": 2.5220190950146827e-05, "loss": 0.3273, "step": 7921 }, { "epoch": 2.543586450473591, "grad_norm": 1.0652894973754883, "learning_rate": 2.5197020109661772e-05, "loss": 0.348, "step": 7922 }, { "epoch": 2.5439075292984428, "grad_norm": 1.2122594118118286, "learning_rate": 2.5173858383692906e-05, "loss": 0.4131, "step": 7923 }, { "epoch": 2.5442286081232943, "grad_norm": 1.0098888874053955, "learning_rate": 2.51507057750624e-05, "loss": 0.3763, "step": 7924 }, { "epoch": 2.544549686948146, "grad_norm": 0.7478073835372925, "learning_rate": 2.512756228659141e-05, "loss": 0.2882, "step": 7925 }, { "epoch": 2.5448707657729974, "grad_norm": 0.486265629529953, "learning_rate": 2.5104427921099782e-05, "loss": 0.2455, "step": 7926 }, { "epoch": 2.5451918445978485, "grad_norm": 0.5730571746826172, "learning_rate": 2.508130268140646e-05, "loss": 0.2751, "step": 7927 }, { "epoch": 2.5455129234227005, "grad_norm": 0.6584112048149109, "learning_rate": 2.5058186570329156e-05, "loss": 0.2707, "step": 7928 }, { "epoch": 2.5458340022475516, "grad_norm": 0.6145785450935364, "learning_rate": 2.5035079590684497e-05, "loss": 0.323, "step": 7929 }, { "epoch": 2.546155081072403, "grad_norm": 0.4932810366153717, "learning_rate": 2.5011981745288015e-05, "loss": 0.5853, "step": 7930 }, { "epoch": 2.5464761598972547, "grad_norm": 0.5000682473182678, "learning_rate": 2.4988893036954043e-05, "loss": 0.3042, "step": 7931 }, { "epoch": 2.5467972387221063, "grad_norm": 0.4661361873149872, "learning_rate": 2.496581346849596e-05, "loss": 0.2213, "step": 7932 }, { "epoch": 2.547118317546958, "grad_norm": 0.6509041786193848, "learning_rate": 2.4942743042725892e-05, "loss": 0.3622, "step": 7933 }, { "epoch": 2.5474393963718094, "grad_norm": 0.3958999216556549, "learning_rate": 2.4919681762454918e-05, "loss": 0.2316, "step": 7934 }, { "epoch": 2.547760475196661, "grad_norm": 0.45946624875068665, "learning_rate": 2.4896629630492973e-05, "loss": 0.2446, "step": 7935 }, { "epoch": 2.548081554021512, "grad_norm": 0.3704213500022888, "learning_rate": 2.4873586649648894e-05, "loss": 0.0943, "step": 7936 }, { "epoch": 2.548402632846364, "grad_norm": 0.6541774272918701, "learning_rate": 2.48505528227304e-05, "loss": 0.2996, "step": 7937 }, { "epoch": 2.548723711671215, "grad_norm": 0.9613190293312073, "learning_rate": 2.48275281525441e-05, "loss": 0.564, "step": 7938 }, { "epoch": 2.5490447904960667, "grad_norm": 0.8291346430778503, "learning_rate": 2.480451264189546e-05, "loss": 0.4429, "step": 7939 }, { "epoch": 2.5493658693209182, "grad_norm": 0.6980516910552979, "learning_rate": 2.4781506293588873e-05, "loss": 0.3154, "step": 7940 }, { "epoch": 2.54968694814577, "grad_norm": 0.7115036249160767, "learning_rate": 2.4758509110427575e-05, "loss": 0.2834, "step": 7941 }, { "epoch": 2.5500080269706213, "grad_norm": 0.7645947933197021, "learning_rate": 2.4735521095213687e-05, "loss": 0.3213, "step": 7942 }, { "epoch": 2.550329105795473, "grad_norm": 0.8161126971244812, "learning_rate": 2.4712542250748304e-05, "loss": 0.3376, "step": 7943 }, { "epoch": 2.5506501846203244, "grad_norm": 0.9567040801048279, "learning_rate": 2.4689572579831222e-05, "loss": 0.379, "step": 7944 }, { "epoch": 2.5509712634451756, "grad_norm": 0.878835916519165, "learning_rate": 2.4666612085261342e-05, "loss": 0.3387, "step": 7945 }, { "epoch": 2.5512923422700275, "grad_norm": 0.9040188193321228, "learning_rate": 2.464366076983623e-05, "loss": 0.3488, "step": 7946 }, { "epoch": 2.5516134210948787, "grad_norm": 0.9474675059318542, "learning_rate": 2.4620718636352457e-05, "loss": 0.3582, "step": 7947 }, { "epoch": 2.55193449991973, "grad_norm": 0.6305925250053406, "learning_rate": 2.4597785687605513e-05, "loss": 0.256, "step": 7948 }, { "epoch": 2.5522555787445818, "grad_norm": 0.7935699820518494, "learning_rate": 2.4574861926389615e-05, "loss": 0.3362, "step": 7949 }, { "epoch": 2.5525766575694333, "grad_norm": 0.9189661741256714, "learning_rate": 2.4551947355498027e-05, "loss": 0.3227, "step": 7950 }, { "epoch": 2.552897736394285, "grad_norm": 1.0050448179244995, "learning_rate": 2.45290419777228e-05, "loss": 0.4139, "step": 7951 }, { "epoch": 2.5532188152191364, "grad_norm": 1.0046634674072266, "learning_rate": 2.4506145795854873e-05, "loss": 0.4224, "step": 7952 }, { "epoch": 2.553539894043988, "grad_norm": 0.7242197394371033, "learning_rate": 2.4483258812684096e-05, "loss": 0.3028, "step": 7953 }, { "epoch": 2.553860972868839, "grad_norm": 0.6570572257041931, "learning_rate": 2.4460381030999158e-05, "loss": 0.2526, "step": 7954 }, { "epoch": 2.554182051693691, "grad_norm": 0.7426683902740479, "learning_rate": 2.443751245358765e-05, "loss": 0.2802, "step": 7955 }, { "epoch": 2.554503130518542, "grad_norm": 0.7006813883781433, "learning_rate": 2.441465308323605e-05, "loss": 0.3058, "step": 7956 }, { "epoch": 2.5548242093433937, "grad_norm": 0.7258092164993286, "learning_rate": 2.43918029227297e-05, "loss": 0.2643, "step": 7957 }, { "epoch": 2.5551452881682453, "grad_norm": 1.4597491025924683, "learning_rate": 2.436896197485282e-05, "loss": 0.3756, "step": 7958 }, { "epoch": 2.555466366993097, "grad_norm": 0.7586961984634399, "learning_rate": 2.43461302423885e-05, "loss": 0.3493, "step": 7959 }, { "epoch": 2.5557874458179484, "grad_norm": 1.1637358665466309, "learning_rate": 2.4323307728118738e-05, "loss": 0.4117, "step": 7960 }, { "epoch": 2.5561085246428, "grad_norm": 0.764044463634491, "learning_rate": 2.4300494434824373e-05, "loss": 0.2827, "step": 7961 }, { "epoch": 2.5564296034676515, "grad_norm": 0.7120959162712097, "learning_rate": 2.4277690365285112e-05, "loss": 0.2924, "step": 7962 }, { "epoch": 2.5567506822925026, "grad_norm": 0.6114968061447144, "learning_rate": 2.425489552227964e-05, "loss": 0.2665, "step": 7963 }, { "epoch": 2.5570717611173546, "grad_norm": 0.710640013217926, "learning_rate": 2.4232109908585377e-05, "loss": 0.3022, "step": 7964 }, { "epoch": 2.5573928399422057, "grad_norm": 0.6761636137962341, "learning_rate": 2.420933352697865e-05, "loss": 0.2818, "step": 7965 }, { "epoch": 2.5577139187670572, "grad_norm": 0.927751898765564, "learning_rate": 2.4186566380234798e-05, "loss": 0.363, "step": 7966 }, { "epoch": 2.5580349975919088, "grad_norm": 0.8839023113250732, "learning_rate": 2.4163808471127812e-05, "loss": 0.3743, "step": 7967 }, { "epoch": 2.5583560764167603, "grad_norm": 0.7871977090835571, "learning_rate": 2.4141059802430777e-05, "loss": 0.3433, "step": 7968 }, { "epoch": 2.558677155241612, "grad_norm": 0.7037361860275269, "learning_rate": 2.411832037691545e-05, "loss": 0.2797, "step": 7969 }, { "epoch": 2.5589982340664634, "grad_norm": 0.6723461151123047, "learning_rate": 2.4095590197352635e-05, "loss": 0.2862, "step": 7970 }, { "epoch": 2.559319312891315, "grad_norm": 1.6565648317337036, "learning_rate": 2.407286926651192e-05, "loss": 0.3784, "step": 7971 }, { "epoch": 2.559640391716166, "grad_norm": 0.8423799276351929, "learning_rate": 2.405015758716177e-05, "loss": 0.3592, "step": 7972 }, { "epoch": 2.559961470541018, "grad_norm": 1.355826735496521, "learning_rate": 2.4027455162069567e-05, "loss": 0.3569, "step": 7973 }, { "epoch": 2.560282549365869, "grad_norm": 0.8471829891204834, "learning_rate": 2.4004761994001435e-05, "loss": 0.2898, "step": 7974 }, { "epoch": 2.5606036281907207, "grad_norm": 0.7122255563735962, "learning_rate": 2.3982078085722582e-05, "loss": 0.3251, "step": 7975 }, { "epoch": 2.5609247070155723, "grad_norm": 0.792010486125946, "learning_rate": 2.3959403439996907e-05, "loss": 0.2844, "step": 7976 }, { "epoch": 2.561245785840424, "grad_norm": 0.43959325551986694, "learning_rate": 2.3936738059587282e-05, "loss": 0.2479, "step": 7977 }, { "epoch": 2.5615668646652754, "grad_norm": 0.33454373478889465, "learning_rate": 2.3914081947255397e-05, "loss": 0.2251, "step": 7978 }, { "epoch": 2.561887943490127, "grad_norm": 0.6790142059326172, "learning_rate": 2.389143510576184e-05, "loss": 0.2833, "step": 7979 }, { "epoch": 2.5622090223149785, "grad_norm": 0.5404451489448547, "learning_rate": 2.3868797537866016e-05, "loss": 0.6905, "step": 7980 }, { "epoch": 2.5625301011398296, "grad_norm": 0.36907678842544556, "learning_rate": 2.3846169246326343e-05, "loss": 0.3373, "step": 7981 }, { "epoch": 2.5628511799646816, "grad_norm": 0.5592865347862244, "learning_rate": 2.3823550233899915e-05, "loss": 0.4475, "step": 7982 }, { "epoch": 2.5631722587895327, "grad_norm": 0.4797590374946594, "learning_rate": 2.3800940503342828e-05, "loss": 0.1677, "step": 7983 }, { "epoch": 2.5634933376143842, "grad_norm": 0.6905731558799744, "learning_rate": 2.3778340057409998e-05, "loss": 0.2087, "step": 7984 }, { "epoch": 2.563814416439236, "grad_norm": 0.22037728130817413, "learning_rate": 2.37557488988552e-05, "loss": 0.0671, "step": 7985 }, { "epoch": 2.5641354952640873, "grad_norm": 0.41251930594444275, "learning_rate": 2.373316703043119e-05, "loss": 0.1551, "step": 7986 }, { "epoch": 2.564456574088939, "grad_norm": 0.3948359191417694, "learning_rate": 2.3710594454889378e-05, "loss": 0.1627, "step": 7987 }, { "epoch": 2.5647776529137905, "grad_norm": 0.5292665362358093, "learning_rate": 2.3688031174980275e-05, "loss": 0.2721, "step": 7988 }, { "epoch": 2.565098731738642, "grad_norm": 0.8082164525985718, "learning_rate": 2.3665477193453034e-05, "loss": 0.3934, "step": 7989 }, { "epoch": 2.565419810563493, "grad_norm": 0.9454381465911865, "learning_rate": 2.3642932513055884e-05, "loss": 0.4163, "step": 7990 }, { "epoch": 2.565740889388345, "grad_norm": 0.889689564704895, "learning_rate": 2.362039713653581e-05, "loss": 0.4446, "step": 7991 }, { "epoch": 2.566061968213196, "grad_norm": 0.7542763948440552, "learning_rate": 2.359787106663861e-05, "loss": 0.3018, "step": 7992 }, { "epoch": 2.5663830470380478, "grad_norm": 0.7268791198730469, "learning_rate": 2.35753543061091e-05, "loss": 0.3345, "step": 7993 }, { "epoch": 2.5667041258628993, "grad_norm": 0.6492055654525757, "learning_rate": 2.3552846857690846e-05, "loss": 0.3103, "step": 7994 }, { "epoch": 2.567025204687751, "grad_norm": 0.7166942358016968, "learning_rate": 2.3530348724126307e-05, "loss": 0.2754, "step": 7995 }, { "epoch": 2.5673462835126024, "grad_norm": 0.9371656179428101, "learning_rate": 2.3507859908156827e-05, "loss": 0.5017, "step": 7996 }, { "epoch": 2.567667362337454, "grad_norm": 1.0456211566925049, "learning_rate": 2.3485380412522585e-05, "loss": 0.3592, "step": 7997 }, { "epoch": 2.5679884411623055, "grad_norm": 0.6862274408340454, "learning_rate": 2.3462910239962654e-05, "loss": 0.2818, "step": 7998 }, { "epoch": 2.5683095199871566, "grad_norm": 1.1700456142425537, "learning_rate": 2.3440449393214948e-05, "loss": 0.3444, "step": 7999 }, { "epoch": 2.5686305988120086, "grad_norm": 0.9991983771324158, "learning_rate": 2.341799787501625e-05, "loss": 0.4639, "step": 8000 }, { "epoch": 2.5689516776368597, "grad_norm": 0.9430015683174133, "learning_rate": 2.339555568810221e-05, "loss": 0.4327, "step": 8001 }, { "epoch": 2.5692727564617113, "grad_norm": 0.7065159678459167, "learning_rate": 2.337312283520735e-05, "loss": 0.2867, "step": 8002 }, { "epoch": 2.569593835286563, "grad_norm": 0.9828401803970337, "learning_rate": 2.3350699319065026e-05, "loss": 0.3369, "step": 8003 }, { "epoch": 2.5699149141114144, "grad_norm": 2.36687970161438, "learning_rate": 2.3328285142407503e-05, "loss": 0.3983, "step": 8004 }, { "epoch": 2.570235992936266, "grad_norm": 0.9053291082382202, "learning_rate": 2.3305880307965833e-05, "loss": 0.3354, "step": 8005 }, { "epoch": 2.5705570717611175, "grad_norm": 1.2159992456436157, "learning_rate": 2.328348481847006e-05, "loss": 0.5254, "step": 8006 }, { "epoch": 2.570878150585969, "grad_norm": 0.8332436680793762, "learning_rate": 2.3261098676648907e-05, "loss": 0.324, "step": 8007 }, { "epoch": 2.57119922941082, "grad_norm": 0.9708757400512695, "learning_rate": 2.323872188523013e-05, "loss": 0.3851, "step": 8008 }, { "epoch": 2.571520308235672, "grad_norm": 1.2236950397491455, "learning_rate": 2.321635444694028e-05, "loss": 0.3511, "step": 8009 }, { "epoch": 2.5718413870605232, "grad_norm": 0.8926087617874146, "learning_rate": 2.319399636450468e-05, "loss": 0.3669, "step": 8010 }, { "epoch": 2.572162465885375, "grad_norm": 0.9284133315086365, "learning_rate": 2.3171647640647687e-05, "loss": 0.348, "step": 8011 }, { "epoch": 2.5724835447102263, "grad_norm": 0.856762170791626, "learning_rate": 2.3149308278092342e-05, "loss": 0.3501, "step": 8012 }, { "epoch": 2.572804623535078, "grad_norm": 0.7136096358299255, "learning_rate": 2.3126978279560684e-05, "loss": 0.3205, "step": 8013 }, { "epoch": 2.5731257023599294, "grad_norm": 0.570107102394104, "learning_rate": 2.3104657647773554e-05, "loss": 0.2764, "step": 8014 }, { "epoch": 2.573446781184781, "grad_norm": 0.7570350766181946, "learning_rate": 2.3082346385450637e-05, "loss": 0.3118, "step": 8015 }, { "epoch": 2.5737678600096325, "grad_norm": 0.9262259006500244, "learning_rate": 2.3060044495310505e-05, "loss": 0.4234, "step": 8016 }, { "epoch": 2.5740889388344836, "grad_norm": 0.3663389980792999, "learning_rate": 2.3037751980070555e-05, "loss": 0.2028, "step": 8017 }, { "epoch": 2.5744100176593356, "grad_norm": 0.8820987343788147, "learning_rate": 2.3015468842447086e-05, "loss": 0.3276, "step": 8018 }, { "epoch": 2.5747310964841867, "grad_norm": 0.7441263198852539, "learning_rate": 2.2993195085155205e-05, "loss": 0.2819, "step": 8019 }, { "epoch": 2.5750521753090383, "grad_norm": 0.5265788435935974, "learning_rate": 2.2970930710908935e-05, "loss": 0.2229, "step": 8020 }, { "epoch": 2.57537325413389, "grad_norm": 0.9287590980529785, "learning_rate": 2.2948675722421086e-05, "loss": 0.3445, "step": 8021 }, { "epoch": 2.5756943329587414, "grad_norm": 0.8301019072532654, "learning_rate": 2.2926430122403386e-05, "loss": 0.3109, "step": 8022 }, { "epoch": 2.576015411783593, "grad_norm": 0.7834892868995667, "learning_rate": 2.2904193913566363e-05, "loss": 0.35, "step": 8023 }, { "epoch": 2.5763364906084445, "grad_norm": 0.5827646851539612, "learning_rate": 2.2881967098619506e-05, "loss": 0.256, "step": 8024 }, { "epoch": 2.576657569433296, "grad_norm": 0.6390456557273865, "learning_rate": 2.2859749680270982e-05, "loss": 0.272, "step": 8025 }, { "epoch": 2.576978648258147, "grad_norm": 0.9293005466461182, "learning_rate": 2.2837541661228025e-05, "loss": 0.3369, "step": 8026 }, { "epoch": 2.577299727082999, "grad_norm": 0.8350579738616943, "learning_rate": 2.2815343044196525e-05, "loss": 0.3416, "step": 8027 }, { "epoch": 2.5776208059078503, "grad_norm": 0.4396490156650543, "learning_rate": 2.279315383188132e-05, "loss": 0.2917, "step": 8028 }, { "epoch": 2.577941884732702, "grad_norm": 0.3990170359611511, "learning_rate": 2.277097402698619e-05, "loss": 0.2969, "step": 8029 }, { "epoch": 2.5782629635575534, "grad_norm": 0.5404382348060608, "learning_rate": 2.2748803632213557e-05, "loss": 0.8456, "step": 8030 }, { "epoch": 2.578584042382405, "grad_norm": 0.5150883793830872, "learning_rate": 2.2726642650264895e-05, "loss": 0.8014, "step": 8031 }, { "epoch": 2.5789051212072565, "grad_norm": 0.4833203852176666, "learning_rate": 2.270449108384044e-05, "loss": 0.5567, "step": 8032 }, { "epoch": 2.579226200032108, "grad_norm": 0.5803343057632446, "learning_rate": 2.2682348935639274e-05, "loss": 0.1951, "step": 8033 }, { "epoch": 2.5795472788569596, "grad_norm": 0.43384525179862976, "learning_rate": 2.2660216208359365e-05, "loss": 0.2521, "step": 8034 }, { "epoch": 2.5798683576818107, "grad_norm": 0.3641275465488434, "learning_rate": 2.2638092904697518e-05, "loss": 0.1943, "step": 8035 }, { "epoch": 2.5801894365066627, "grad_norm": 0.3974734842777252, "learning_rate": 2.2615979027349387e-05, "loss": 0.1705, "step": 8036 }, { "epoch": 2.5805105153315138, "grad_norm": 0.3340223431587219, "learning_rate": 2.259387457900948e-05, "loss": 0.1161, "step": 8037 }, { "epoch": 2.5808315941563653, "grad_norm": 0.750033974647522, "learning_rate": 2.2571779562371153e-05, "loss": 0.3098, "step": 8038 }, { "epoch": 2.581152672981217, "grad_norm": 0.7684246301651001, "learning_rate": 2.254969398012663e-05, "loss": 0.3886, "step": 8039 }, { "epoch": 2.5814737518060684, "grad_norm": 0.8656273484230042, "learning_rate": 2.2527617834966954e-05, "loss": 0.3571, "step": 8040 }, { "epoch": 2.58179483063092, "grad_norm": 0.7932442426681519, "learning_rate": 2.2505551129582047e-05, "loss": 0.3677, "step": 8041 }, { "epoch": 2.5821159094557715, "grad_norm": 0.7484654188156128, "learning_rate": 2.2483493866660676e-05, "loss": 0.3368, "step": 8042 }, { "epoch": 2.582436988280623, "grad_norm": 0.9018601179122925, "learning_rate": 2.246144604889042e-05, "loss": 0.4161, "step": 8043 }, { "epoch": 2.582758067105474, "grad_norm": 0.7390075325965881, "learning_rate": 2.2439407678957812e-05, "loss": 0.2773, "step": 8044 }, { "epoch": 2.583079145930326, "grad_norm": 0.7586901783943176, "learning_rate": 2.241737875954808e-05, "loss": 0.2567, "step": 8045 }, { "epoch": 2.5834002247551773, "grad_norm": 1.0482172966003418, "learning_rate": 2.2395359293345396e-05, "loss": 0.3444, "step": 8046 }, { "epoch": 2.583721303580029, "grad_norm": 0.7774665951728821, "learning_rate": 2.237334928303283e-05, "loss": 0.2983, "step": 8047 }, { "epoch": 2.5840423824048804, "grad_norm": 0.8551390767097473, "learning_rate": 2.235134873129213e-05, "loss": 0.325, "step": 8048 }, { "epoch": 2.584363461229732, "grad_norm": 0.8655200600624084, "learning_rate": 2.2329357640804117e-05, "loss": 0.3297, "step": 8049 }, { "epoch": 2.5846845400545835, "grad_norm": 0.7637379765510559, "learning_rate": 2.2307376014248216e-05, "loss": 0.2911, "step": 8050 }, { "epoch": 2.585005618879435, "grad_norm": 0.9776454567909241, "learning_rate": 2.2285403854302912e-05, "loss": 0.3879, "step": 8051 }, { "epoch": 2.5853266977042866, "grad_norm": 1.0411674976348877, "learning_rate": 2.2263441163645403e-05, "loss": 0.4912, "step": 8052 }, { "epoch": 2.5856477765291377, "grad_norm": 1.0440319776535034, "learning_rate": 2.22414879449518e-05, "loss": 0.4109, "step": 8053 }, { "epoch": 2.5859688553539897, "grad_norm": 0.7270945310592651, "learning_rate": 2.2219544200897025e-05, "loss": 0.3516, "step": 8054 }, { "epoch": 2.586289934178841, "grad_norm": 0.867685079574585, "learning_rate": 2.219760993415485e-05, "loss": 0.3819, "step": 8055 }, { "epoch": 2.5866110130036923, "grad_norm": 0.6216245293617249, "learning_rate": 2.2175685147397906e-05, "loss": 0.2304, "step": 8056 }, { "epoch": 2.586932091828544, "grad_norm": 0.9045018553733826, "learning_rate": 2.2153769843297667e-05, "loss": 0.3466, "step": 8057 }, { "epoch": 2.5872531706533954, "grad_norm": 1.0591224431991577, "learning_rate": 2.213186402452443e-05, "loss": 0.4561, "step": 8058 }, { "epoch": 2.587574249478247, "grad_norm": 0.8752357959747314, "learning_rate": 2.210996769374737e-05, "loss": 0.3874, "step": 8059 }, { "epoch": 2.5878953283030985, "grad_norm": 0.8251414895057678, "learning_rate": 2.2088080853634473e-05, "loss": 0.3303, "step": 8060 }, { "epoch": 2.58821640712795, "grad_norm": 1.0302598476409912, "learning_rate": 2.2066203506852566e-05, "loss": 0.3676, "step": 8061 }, { "epoch": 2.588537485952801, "grad_norm": 0.9010094404220581, "learning_rate": 2.204433565606743e-05, "loss": 0.3819, "step": 8062 }, { "epoch": 2.5888585647776527, "grad_norm": 0.7407838702201843, "learning_rate": 2.202247730394349e-05, "loss": 0.2689, "step": 8063 }, { "epoch": 2.5891796436025043, "grad_norm": 1.1048128604888916, "learning_rate": 2.200062845314417e-05, "loss": 0.3919, "step": 8064 }, { "epoch": 2.589500722427356, "grad_norm": 0.9192834496498108, "learning_rate": 2.1978789106331665e-05, "loss": 0.2973, "step": 8065 }, { "epoch": 2.5898218012522074, "grad_norm": 0.882653534412384, "learning_rate": 2.195695926616702e-05, "loss": 0.279, "step": 8066 }, { "epoch": 2.590142880077059, "grad_norm": 0.9665889143943787, "learning_rate": 2.1935138935310206e-05, "loss": 0.3333, "step": 8067 }, { "epoch": 2.5904639589019105, "grad_norm": 0.8618683218955994, "learning_rate": 2.1913328116419873e-05, "loss": 0.3014, "step": 8068 }, { "epoch": 2.590785037726762, "grad_norm": 0.8841969966888428, "learning_rate": 2.1891526812153672e-05, "loss": 0.3502, "step": 8069 }, { "epoch": 2.5911061165516136, "grad_norm": 1.0003149509429932, "learning_rate": 2.1869735025168026e-05, "loss": 0.333, "step": 8070 }, { "epoch": 2.5914271953764647, "grad_norm": 0.9237295985221863, "learning_rate": 2.1847952758118117e-05, "loss": 0.3748, "step": 8071 }, { "epoch": 2.5917482742013163, "grad_norm": 0.9068896770477295, "learning_rate": 2.1826180013658172e-05, "loss": 0.2793, "step": 8072 }, { "epoch": 2.592069353026168, "grad_norm": 1.0544371604919434, "learning_rate": 2.1804416794440995e-05, "loss": 0.3318, "step": 8073 }, { "epoch": 2.5923904318510194, "grad_norm": 1.1038298606872559, "learning_rate": 2.178266310311847e-05, "loss": 0.4288, "step": 8074 }, { "epoch": 2.592711510675871, "grad_norm": 0.530205488204956, "learning_rate": 2.1760918942341192e-05, "loss": 0.2411, "step": 8075 }, { "epoch": 2.5930325895007225, "grad_norm": 0.5433615446090698, "learning_rate": 2.173918431475861e-05, "loss": 0.2607, "step": 8076 }, { "epoch": 2.593353668325574, "grad_norm": 0.8718168139457703, "learning_rate": 2.1717459223019033e-05, "loss": 0.3014, "step": 8077 }, { "epoch": 2.5936747471504256, "grad_norm": 0.506506085395813, "learning_rate": 2.1695743669769596e-05, "loss": 0.268, "step": 8078 }, { "epoch": 2.593995825975277, "grad_norm": 0.8781798481941223, "learning_rate": 2.1674037657656266e-05, "loss": 0.3089, "step": 8079 }, { "epoch": 2.594316904800128, "grad_norm": 0.5046657919883728, "learning_rate": 2.1652341189323866e-05, "loss": 0.7222, "step": 8080 }, { "epoch": 2.5946379836249798, "grad_norm": 0.40744638442993164, "learning_rate": 2.163065426741603e-05, "loss": 0.4146, "step": 8081 }, { "epoch": 2.5949590624498313, "grad_norm": 0.49407830834388733, "learning_rate": 2.160897689457526e-05, "loss": 0.3484, "step": 8082 }, { "epoch": 2.595280141274683, "grad_norm": 0.34928953647613525, "learning_rate": 2.1587309073442863e-05, "loss": 0.0671, "step": 8083 }, { "epoch": 2.5956012200995344, "grad_norm": 0.22757402062416077, "learning_rate": 2.1565650806658975e-05, "loss": 0.0641, "step": 8084 }, { "epoch": 2.595922298924386, "grad_norm": 0.3532765805721283, "learning_rate": 2.154400209686268e-05, "loss": 0.1308, "step": 8085 }, { "epoch": 2.5962433777492375, "grad_norm": 0.7194594144821167, "learning_rate": 2.1522362946691698e-05, "loss": 0.347, "step": 8086 }, { "epoch": 2.596564456574089, "grad_norm": 0.8896145224571228, "learning_rate": 2.1500733358782786e-05, "loss": 0.4766, "step": 8087 }, { "epoch": 2.5968855353989406, "grad_norm": 1.069779872894287, "learning_rate": 2.1479113335771383e-05, "loss": 0.4661, "step": 8088 }, { "epoch": 2.5972066142237917, "grad_norm": 0.7583945989608765, "learning_rate": 2.1457502880291812e-05, "loss": 0.3911, "step": 8089 }, { "epoch": 2.5975276930486433, "grad_norm": 0.6904090046882629, "learning_rate": 2.1435901994977326e-05, "loss": 0.3297, "step": 8090 }, { "epoch": 2.597848771873495, "grad_norm": 0.7934258580207825, "learning_rate": 2.1414310682459802e-05, "loss": 0.3471, "step": 8091 }, { "epoch": 2.5981698506983464, "grad_norm": 1.1645605564117432, "learning_rate": 2.1392728945370222e-05, "loss": 0.365, "step": 8092 }, { "epoch": 2.598490929523198, "grad_norm": 0.57047039270401, "learning_rate": 2.137115678633811e-05, "loss": 0.2229, "step": 8093 }, { "epoch": 2.5988120083480495, "grad_norm": 0.8101687431335449, "learning_rate": 2.1349594207992064e-05, "loss": 0.3051, "step": 8094 }, { "epoch": 2.599133087172901, "grad_norm": 0.785053551197052, "learning_rate": 2.1328041212959403e-05, "loss": 0.3114, "step": 8095 }, { "epoch": 2.5994541659977526, "grad_norm": 0.7656204104423523, "learning_rate": 2.1306497803866277e-05, "loss": 0.3462, "step": 8096 }, { "epoch": 2.599775244822604, "grad_norm": 0.861936092376709, "learning_rate": 2.128496398333768e-05, "loss": 0.356, "step": 8097 }, { "epoch": 2.6000963236474552, "grad_norm": 0.8918510675430298, "learning_rate": 2.126343975399747e-05, "loss": 0.3629, "step": 8098 }, { "epoch": 2.600417402472307, "grad_norm": 0.8870481848716736, "learning_rate": 2.1241925118468287e-05, "loss": 0.3867, "step": 8099 }, { "epoch": 2.6007384812971583, "grad_norm": 1.0247498750686646, "learning_rate": 2.1220420079371628e-05, "loss": 0.3675, "step": 8100 }, { "epoch": 2.60105956012201, "grad_norm": 0.9953125715255737, "learning_rate": 2.119892463932781e-05, "loss": 0.3568, "step": 8101 }, { "epoch": 2.6013806389468614, "grad_norm": 0.9158168435096741, "learning_rate": 2.1177438800956007e-05, "loss": 0.3369, "step": 8102 }, { "epoch": 2.601701717771713, "grad_norm": 0.8674771785736084, "learning_rate": 2.115596256687419e-05, "loss": 0.3141, "step": 8103 }, { "epoch": 2.6020227965965645, "grad_norm": 0.6369979381561279, "learning_rate": 2.113449593969915e-05, "loss": 0.2762, "step": 8104 }, { "epoch": 2.602343875421416, "grad_norm": 0.8605027794837952, "learning_rate": 2.1113038922046602e-05, "loss": 0.3994, "step": 8105 }, { "epoch": 2.6026649542462676, "grad_norm": 1.133817434310913, "learning_rate": 2.1091591516530952e-05, "loss": 0.3732, "step": 8106 }, { "epoch": 2.6029860330711188, "grad_norm": 0.7040675282478333, "learning_rate": 2.107015372576552e-05, "loss": 0.3186, "step": 8107 }, { "epoch": 2.6033071118959703, "grad_norm": 0.7996187210083008, "learning_rate": 2.1048725552362435e-05, "loss": 0.3545, "step": 8108 }, { "epoch": 2.603628190720822, "grad_norm": 1.0517889261245728, "learning_rate": 2.1027306998932627e-05, "loss": 0.3864, "step": 8109 }, { "epoch": 2.6039492695456734, "grad_norm": 0.5958194136619568, "learning_rate": 2.100589806808597e-05, "loss": 0.2543, "step": 8110 }, { "epoch": 2.604270348370525, "grad_norm": 0.7594183683395386, "learning_rate": 2.098449876243096e-05, "loss": 0.328, "step": 8111 }, { "epoch": 2.6045914271953765, "grad_norm": 1.2153469324111938, "learning_rate": 2.096310908457513e-05, "loss": 0.3756, "step": 8112 }, { "epoch": 2.604912506020228, "grad_norm": 0.8676589727401733, "learning_rate": 2.09417290371247e-05, "loss": 0.3697, "step": 8113 }, { "epoch": 2.6052335848450796, "grad_norm": 0.792874813079834, "learning_rate": 2.0920358622684788e-05, "loss": 0.379, "step": 8114 }, { "epoch": 2.605554663669931, "grad_norm": 0.6348553895950317, "learning_rate": 2.0898997843859334e-05, "loss": 0.2493, "step": 8115 }, { "epoch": 2.6058757424947823, "grad_norm": 1.1245969533920288, "learning_rate": 2.0877646703250996e-05, "loss": 0.3336, "step": 8116 }, { "epoch": 2.606196821319634, "grad_norm": 0.794979453086853, "learning_rate": 2.0856305203461436e-05, "loss": 0.3597, "step": 8117 }, { "epoch": 2.6065179001444854, "grad_norm": 0.9265166521072388, "learning_rate": 2.0834973347091014e-05, "loss": 0.336, "step": 8118 }, { "epoch": 2.606838978969337, "grad_norm": 0.7240932583808899, "learning_rate": 2.0813651136738954e-05, "loss": 0.284, "step": 8119 }, { "epoch": 2.6071600577941885, "grad_norm": 0.5318633913993835, "learning_rate": 2.07923385750033e-05, "loss": 0.2458, "step": 8120 }, { "epoch": 2.60748113661904, "grad_norm": 0.6563091278076172, "learning_rate": 2.0771035664480942e-05, "loss": 0.2628, "step": 8121 }, { "epoch": 2.6078022154438916, "grad_norm": 0.9432152509689331, "learning_rate": 2.0749742407767546e-05, "loss": 0.3554, "step": 8122 }, { "epoch": 2.608123294268743, "grad_norm": 1.121485948562622, "learning_rate": 2.0728458807457662e-05, "loss": 0.3306, "step": 8123 }, { "epoch": 2.6084443730935947, "grad_norm": 0.8368777632713318, "learning_rate": 2.0707184866144603e-05, "loss": 0.2796, "step": 8124 }, { "epoch": 2.6087654519184458, "grad_norm": 0.9011041522026062, "learning_rate": 2.068592058642056e-05, "loss": 0.3304, "step": 8125 }, { "epoch": 2.6090865307432973, "grad_norm": 1.134626865386963, "learning_rate": 2.0664665970876496e-05, "loss": 0.2825, "step": 8126 }, { "epoch": 2.609407609568149, "grad_norm": 0.7329688668251038, "learning_rate": 2.0643421022102217e-05, "loss": 0.3027, "step": 8127 }, { "epoch": 2.6097286883930004, "grad_norm": 0.5429527759552002, "learning_rate": 2.0622185742686416e-05, "loss": 0.3091, "step": 8128 }, { "epoch": 2.610049767217852, "grad_norm": 0.6699220538139343, "learning_rate": 2.0600960135216462e-05, "loss": 0.296, "step": 8129 }, { "epoch": 2.6103708460427035, "grad_norm": 0.5311281681060791, "learning_rate": 2.0579744202278718e-05, "loss": 0.7533, "step": 8130 }, { "epoch": 2.610691924867555, "grad_norm": 0.5676338076591492, "learning_rate": 2.0558537946458177e-05, "loss": 0.8681, "step": 8131 }, { "epoch": 2.6110130036924066, "grad_norm": 0.38879385590553284, "learning_rate": 2.0537341370338857e-05, "loss": 0.1332, "step": 8132 }, { "epoch": 2.611334082517258, "grad_norm": 0.28156155347824097, "learning_rate": 2.051615447650347e-05, "loss": 0.1392, "step": 8133 }, { "epoch": 2.6116551613421093, "grad_norm": 0.38800543546676636, "learning_rate": 2.049497726753351e-05, "loss": 0.156, "step": 8134 }, { "epoch": 2.611976240166961, "grad_norm": 0.2520560026168823, "learning_rate": 2.0473809746009444e-05, "loss": 0.0688, "step": 8135 }, { "epoch": 2.6122973189918124, "grad_norm": 0.7137389183044434, "learning_rate": 2.045265191451041e-05, "loss": 0.3956, "step": 8136 }, { "epoch": 2.612618397816664, "grad_norm": 0.8318041563034058, "learning_rate": 2.0431503775614457e-05, "loss": 0.2772, "step": 8137 }, { "epoch": 2.6129394766415155, "grad_norm": 0.7499494552612305, "learning_rate": 2.0410365331898416e-05, "loss": 0.3192, "step": 8138 }, { "epoch": 2.613260555466367, "grad_norm": 0.8148148059844971, "learning_rate": 2.0389236585937945e-05, "loss": 0.3618, "step": 8139 }, { "epoch": 2.6135816342912186, "grad_norm": 1.1199220418930054, "learning_rate": 2.0368117540307496e-05, "loss": 0.3034, "step": 8140 }, { "epoch": 2.61390271311607, "grad_norm": 0.5880412459373474, "learning_rate": 2.0347008197580374e-05, "loss": 0.2263, "step": 8141 }, { "epoch": 2.6142237919409217, "grad_norm": 0.8377451300621033, "learning_rate": 2.03259085603287e-05, "loss": 0.4124, "step": 8142 }, { "epoch": 2.614544870765773, "grad_norm": 0.7712952494621277, "learning_rate": 2.030481863112339e-05, "loss": 0.2779, "step": 8143 }, { "epoch": 2.6148659495906243, "grad_norm": 0.8210815787315369, "learning_rate": 2.028373841253419e-05, "loss": 0.3206, "step": 8144 }, { "epoch": 2.615187028415476, "grad_norm": 1.114625334739685, "learning_rate": 2.026266790712965e-05, "loss": 0.4125, "step": 8145 }, { "epoch": 2.6155081072403275, "grad_norm": 1.0668013095855713, "learning_rate": 2.024160711747717e-05, "loss": 0.4166, "step": 8146 }, { "epoch": 2.615829186065179, "grad_norm": 1.219161868095398, "learning_rate": 2.0220556046142893e-05, "loss": 0.4567, "step": 8147 }, { "epoch": 2.6161502648900306, "grad_norm": 0.993944525718689, "learning_rate": 2.019951469569191e-05, "loss": 0.3649, "step": 8148 }, { "epoch": 2.616471343714882, "grad_norm": 0.8392919898033142, "learning_rate": 2.017848306868797e-05, "loss": 0.2713, "step": 8149 }, { "epoch": 2.6167924225397337, "grad_norm": 0.9957613945007324, "learning_rate": 2.0157461167693758e-05, "loss": 0.3948, "step": 8150 }, { "epoch": 2.617113501364585, "grad_norm": 0.8635070323944092, "learning_rate": 2.013644899527074e-05, "loss": 0.3869, "step": 8151 }, { "epoch": 2.6174345801894363, "grad_norm": 1.053145170211792, "learning_rate": 2.01154465539791e-05, "loss": 0.3391, "step": 8152 }, { "epoch": 2.617755659014288, "grad_norm": 0.6855107545852661, "learning_rate": 2.009445384637805e-05, "loss": 0.2861, "step": 8153 }, { "epoch": 2.6180767378391394, "grad_norm": 1.1673146486282349, "learning_rate": 2.0073470875025358e-05, "loss": 0.3544, "step": 8154 }, { "epoch": 2.618397816663991, "grad_norm": 0.7385457158088684, "learning_rate": 2.005249764247783e-05, "loss": 0.2765, "step": 8155 }, { "epoch": 2.6187188954888425, "grad_norm": 0.721240222454071, "learning_rate": 2.0031534151290943e-05, "loss": 0.2583, "step": 8156 }, { "epoch": 2.619039974313694, "grad_norm": 0.8315588235855103, "learning_rate": 2.0010580404019062e-05, "loss": 0.3844, "step": 8157 }, { "epoch": 2.6193610531385456, "grad_norm": 0.9637236595153809, "learning_rate": 1.9989636403215328e-05, "loss": 0.3219, "step": 8158 }, { "epoch": 2.619682131963397, "grad_norm": 0.7627900242805481, "learning_rate": 1.9968702151431696e-05, "loss": 0.2788, "step": 8159 }, { "epoch": 2.6200032107882487, "grad_norm": 0.6353088617324829, "learning_rate": 1.9947777651218946e-05, "loss": 0.256, "step": 8160 }, { "epoch": 2.6203242896131, "grad_norm": 1.0363852977752686, "learning_rate": 1.9926862905126665e-05, "loss": 0.451, "step": 8161 }, { "epoch": 2.6206453684379514, "grad_norm": 0.863524854183197, "learning_rate": 1.9905957915703244e-05, "loss": 0.3374, "step": 8162 }, { "epoch": 2.620966447262803, "grad_norm": 0.8532664775848389, "learning_rate": 1.9885062685495904e-05, "loss": 0.3011, "step": 8163 }, { "epoch": 2.6212875260876545, "grad_norm": 0.8648016452789307, "learning_rate": 1.9864177217050674e-05, "loss": 0.3211, "step": 8164 }, { "epoch": 2.621608604912506, "grad_norm": 0.7424303889274597, "learning_rate": 1.9843301512912327e-05, "loss": 0.3291, "step": 8165 }, { "epoch": 2.6219296837373576, "grad_norm": 0.7464106678962708, "learning_rate": 1.9822435575624608e-05, "loss": 0.3156, "step": 8166 }, { "epoch": 2.622250762562209, "grad_norm": 1.0930415391921997, "learning_rate": 1.9801579407729866e-05, "loss": 0.5009, "step": 8167 }, { "epoch": 2.6225718413870607, "grad_norm": 0.78467857837677, "learning_rate": 1.9780733011769447e-05, "loss": 0.2866, "step": 8168 }, { "epoch": 2.6228929202119122, "grad_norm": 0.7698622345924377, "learning_rate": 1.9759896390283362e-05, "loss": 0.3075, "step": 8169 }, { "epoch": 2.6232139990367633, "grad_norm": 0.5596573352813721, "learning_rate": 1.9739069545810485e-05, "loss": 0.2454, "step": 8170 }, { "epoch": 2.623535077861615, "grad_norm": 0.7981199622154236, "learning_rate": 1.9718252480888566e-05, "loss": 0.2827, "step": 8171 }, { "epoch": 2.6238561566864664, "grad_norm": 0.49611830711364746, "learning_rate": 1.969744519805402e-05, "loss": 0.2539, "step": 8172 }, { "epoch": 2.624177235511318, "grad_norm": 0.9452134966850281, "learning_rate": 1.9676647699842242e-05, "loss": 0.3056, "step": 8173 }, { "epoch": 2.6244983143361695, "grad_norm": 0.8190011382102966, "learning_rate": 1.965585998878724e-05, "loss": 0.304, "step": 8174 }, { "epoch": 2.624819393161021, "grad_norm": 0.6110972762107849, "learning_rate": 1.963508206742202e-05, "loss": 0.2405, "step": 8175 }, { "epoch": 2.6251404719858726, "grad_norm": 0.8863208293914795, "learning_rate": 1.9614313938278272e-05, "loss": 0.3248, "step": 8176 }, { "epoch": 2.625461550810724, "grad_norm": 0.5325760841369629, "learning_rate": 1.9593555603886538e-05, "loss": 0.2599, "step": 8177 }, { "epoch": 2.6257826296355757, "grad_norm": 0.9411792755126953, "learning_rate": 1.9572807066776143e-05, "loss": 0.3166, "step": 8178 }, { "epoch": 2.626103708460427, "grad_norm": 0.5804592370986938, "learning_rate": 1.955206832947526e-05, "loss": 0.2814, "step": 8179 }, { "epoch": 2.6264247872852784, "grad_norm": 0.5420529842376709, "learning_rate": 1.9531339394510827e-05, "loss": 0.7411, "step": 8180 }, { "epoch": 2.62674586611013, "grad_norm": 0.46422919631004333, "learning_rate": 1.9510620264408596e-05, "loss": 0.6111, "step": 8181 }, { "epoch": 2.6270669449349815, "grad_norm": 0.3627856969833374, "learning_rate": 1.9489910941693133e-05, "loss": 0.2278, "step": 8182 }, { "epoch": 2.627388023759833, "grad_norm": 0.4854031503200531, "learning_rate": 1.946921142888781e-05, "loss": 0.1976, "step": 8183 }, { "epoch": 2.6277091025846846, "grad_norm": 0.4990745186805725, "learning_rate": 1.9448521728514802e-05, "loss": 0.2803, "step": 8184 }, { "epoch": 2.628030181409536, "grad_norm": 0.2989073395729065, "learning_rate": 1.9427841843095063e-05, "loss": 0.0711, "step": 8185 }, { "epoch": 2.6283512602343877, "grad_norm": 0.5125943422317505, "learning_rate": 1.9407171775148436e-05, "loss": 0.2092, "step": 8186 }, { "epoch": 2.6286723390592392, "grad_norm": 0.8719359040260315, "learning_rate": 1.938651152719344e-05, "loss": 0.4531, "step": 8187 }, { "epoch": 2.6289934178840904, "grad_norm": 0.7034810781478882, "learning_rate": 1.9365861101747485e-05, "loss": 0.3558, "step": 8188 }, { "epoch": 2.629314496708942, "grad_norm": 0.9010298848152161, "learning_rate": 1.9345220501326777e-05, "loss": 0.4044, "step": 8189 }, { "epoch": 2.6296355755337935, "grad_norm": 0.8850868344306946, "learning_rate": 1.9324589728446262e-05, "loss": 0.3323, "step": 8190 }, { "epoch": 2.629956654358645, "grad_norm": 0.8857529163360596, "learning_rate": 1.930396878561983e-05, "loss": 0.3986, "step": 8191 }, { "epoch": 2.6302777331834966, "grad_norm": 0.7102099061012268, "learning_rate": 1.928335767535997e-05, "loss": 0.254, "step": 8192 }, { "epoch": 2.630598812008348, "grad_norm": 0.7003388404846191, "learning_rate": 1.9262756400178162e-05, "loss": 0.314, "step": 8193 }, { "epoch": 2.6309198908331997, "grad_norm": 0.9088608026504517, "learning_rate": 1.9242164962584618e-05, "loss": 0.3336, "step": 8194 }, { "epoch": 2.631240969658051, "grad_norm": 0.8129791617393494, "learning_rate": 1.922158336508825e-05, "loss": 0.3411, "step": 8195 }, { "epoch": 2.6315620484829028, "grad_norm": 0.9310645461082458, "learning_rate": 1.9201011610196973e-05, "loss": 0.4063, "step": 8196 }, { "epoch": 2.631883127307754, "grad_norm": 1.2737817764282227, "learning_rate": 1.918044970041729e-05, "loss": 0.4643, "step": 8197 }, { "epoch": 2.6322042061326054, "grad_norm": 0.9532968997955322, "learning_rate": 1.91598976382547e-05, "loss": 0.4322, "step": 8198 }, { "epoch": 2.632525284957457, "grad_norm": 0.7169177532196045, "learning_rate": 1.9139355426213347e-05, "loss": 0.2881, "step": 8199 }, { "epoch": 2.6328463637823085, "grad_norm": 1.0440733432769775, "learning_rate": 1.9118823066796276e-05, "loss": 0.4381, "step": 8200 }, { "epoch": 2.63316744260716, "grad_norm": 0.9224132299423218, "learning_rate": 1.9098300562505266e-05, "loss": 0.3855, "step": 8201 }, { "epoch": 2.6334885214320116, "grad_norm": 0.7816334962844849, "learning_rate": 1.9077787915840927e-05, "loss": 0.3378, "step": 8202 }, { "epoch": 2.633809600256863, "grad_norm": 1.13314688205719, "learning_rate": 1.9057285129302683e-05, "loss": 0.4, "step": 8203 }, { "epoch": 2.6341306790817147, "grad_norm": 0.848976731300354, "learning_rate": 1.903679220538871e-05, "loss": 0.3194, "step": 8204 }, { "epoch": 2.6344517579065663, "grad_norm": 1.028289556503296, "learning_rate": 1.9016309146596023e-05, "loss": 0.4086, "step": 8205 }, { "epoch": 2.6347728367314174, "grad_norm": 1.0954947471618652, "learning_rate": 1.8995835955420416e-05, "loss": 0.4058, "step": 8206 }, { "epoch": 2.635093915556269, "grad_norm": 1.0540190935134888, "learning_rate": 1.897537263435648e-05, "loss": 0.4228, "step": 8207 }, { "epoch": 2.6354149943811205, "grad_norm": 1.1869220733642578, "learning_rate": 1.895491918589759e-05, "loss": 0.3598, "step": 8208 }, { "epoch": 2.635736073205972, "grad_norm": 1.1555882692337036, "learning_rate": 1.8934475612536017e-05, "loss": 0.4349, "step": 8209 }, { "epoch": 2.6360571520308236, "grad_norm": 0.7538331151008606, "learning_rate": 1.891404191676265e-05, "loss": 0.3082, "step": 8210 }, { "epoch": 2.636378230855675, "grad_norm": 1.011115312576294, "learning_rate": 1.8893618101067355e-05, "loss": 0.2848, "step": 8211 }, { "epoch": 2.6366993096805267, "grad_norm": 0.9371142983436584, "learning_rate": 1.8873204167938653e-05, "loss": 0.3994, "step": 8212 }, { "epoch": 2.6370203885053782, "grad_norm": 0.7361756563186646, "learning_rate": 1.885280011986391e-05, "loss": 0.3152, "step": 8213 }, { "epoch": 2.63734146733023, "grad_norm": 1.0889180898666382, "learning_rate": 1.883240595932938e-05, "loss": 0.3764, "step": 8214 }, { "epoch": 2.637662546155081, "grad_norm": 0.6203745603561401, "learning_rate": 1.8812021688819915e-05, "loss": 0.2491, "step": 8215 }, { "epoch": 2.6379836249799324, "grad_norm": 0.7448567748069763, "learning_rate": 1.879164731081937e-05, "loss": 0.282, "step": 8216 }, { "epoch": 2.638304703804784, "grad_norm": 0.9264616966247559, "learning_rate": 1.877128282781028e-05, "loss": 0.2982, "step": 8217 }, { "epoch": 2.6386257826296355, "grad_norm": 0.6099106073379517, "learning_rate": 1.8750928242273968e-05, "loss": 0.2363, "step": 8218 }, { "epoch": 2.638946861454487, "grad_norm": 0.859263002872467, "learning_rate": 1.8730583556690605e-05, "loss": 0.3094, "step": 8219 }, { "epoch": 2.6392679402793386, "grad_norm": 0.5748609304428101, "learning_rate": 1.8710248773539118e-05, "loss": 0.2404, "step": 8220 }, { "epoch": 2.63958901910419, "grad_norm": 0.8924229741096497, "learning_rate": 1.8689923895297245e-05, "loss": 0.3191, "step": 8221 }, { "epoch": 2.6399100979290417, "grad_norm": 0.8334121108055115, "learning_rate": 1.8669608924441496e-05, "loss": 0.3053, "step": 8222 }, { "epoch": 2.6402311767538933, "grad_norm": 0.9076601266860962, "learning_rate": 1.8649303863447198e-05, "loss": 0.3409, "step": 8223 }, { "epoch": 2.6405522555787444, "grad_norm": 0.6396355032920837, "learning_rate": 1.8629008714788464e-05, "loss": 0.3092, "step": 8224 }, { "epoch": 2.640873334403596, "grad_norm": 0.7007104754447937, "learning_rate": 1.8608723480938206e-05, "loss": 0.3039, "step": 8225 }, { "epoch": 2.6411944132284475, "grad_norm": 0.4519537389278412, "learning_rate": 1.858844816436809e-05, "loss": 0.2598, "step": 8226 }, { "epoch": 2.641515492053299, "grad_norm": 0.609611988067627, "learning_rate": 1.8568182767548626e-05, "loss": 0.2982, "step": 8227 }, { "epoch": 2.6418365708781506, "grad_norm": 0.35319435596466064, "learning_rate": 1.854792729294905e-05, "loss": 0.2584, "step": 8228 }, { "epoch": 2.642157649703002, "grad_norm": 0.5530852675437927, "learning_rate": 1.852768174303752e-05, "loss": 0.3132, "step": 8229 }, { "epoch": 2.6424787285278537, "grad_norm": 0.6832270622253418, "learning_rate": 1.8507446120280814e-05, "loss": 0.9194, "step": 8230 }, { "epoch": 2.6427998073527053, "grad_norm": 0.36571961641311646, "learning_rate": 1.848722042714457e-05, "loss": 0.2325, "step": 8231 }, { "epoch": 2.643120886177557, "grad_norm": 0.3756738305091858, "learning_rate": 1.8467004666093325e-05, "loss": 0.1744, "step": 8232 }, { "epoch": 2.643441965002408, "grad_norm": 0.3277255594730377, "learning_rate": 1.8446798839590186e-05, "loss": 0.1241, "step": 8233 }, { "epoch": 2.6437630438272595, "grad_norm": 0.264514297246933, "learning_rate": 1.8426602950097283e-05, "loss": 0.1122, "step": 8234 }, { "epoch": 2.644084122652111, "grad_norm": 0.20338915288448334, "learning_rate": 1.8406417000075325e-05, "loss": 0.0738, "step": 8235 }, { "epoch": 2.6444052014769626, "grad_norm": 0.8854188323020935, "learning_rate": 1.838624099198397e-05, "loss": 0.4599, "step": 8236 }, { "epoch": 2.644726280301814, "grad_norm": 0.7100738883018494, "learning_rate": 1.8366074928281607e-05, "loss": 0.2707, "step": 8237 }, { "epoch": 2.6450473591266657, "grad_norm": 0.9927451014518738, "learning_rate": 1.834591881142538e-05, "loss": 0.3854, "step": 8238 }, { "epoch": 2.645368437951517, "grad_norm": 0.8780126571655273, "learning_rate": 1.8325772643871265e-05, "loss": 0.3619, "step": 8239 }, { "epoch": 2.6456895167763683, "grad_norm": 0.7575050592422485, "learning_rate": 1.8305636428074014e-05, "loss": 0.3066, "step": 8240 }, { "epoch": 2.6460105956012203, "grad_norm": 0.599746584892273, "learning_rate": 1.8285510166487152e-05, "loss": 0.2652, "step": 8241 }, { "epoch": 2.6463316744260714, "grad_norm": 0.9272753596305847, "learning_rate": 1.826539386156302e-05, "loss": 0.4376, "step": 8242 }, { "epoch": 2.646652753250923, "grad_norm": 0.7078785300254822, "learning_rate": 1.8245287515752708e-05, "loss": 0.2626, "step": 8243 }, { "epoch": 2.6469738320757745, "grad_norm": 0.8110345602035522, "learning_rate": 1.8225191131506126e-05, "loss": 0.2777, "step": 8244 }, { "epoch": 2.647294910900626, "grad_norm": 0.7809237241744995, "learning_rate": 1.8205104711271957e-05, "loss": 0.3272, "step": 8245 }, { "epoch": 2.6476159897254776, "grad_norm": 0.7376668453216553, "learning_rate": 1.818502825749764e-05, "loss": 0.3092, "step": 8246 }, { "epoch": 2.647937068550329, "grad_norm": 0.645916223526001, "learning_rate": 1.816496177262952e-05, "loss": 0.2277, "step": 8247 }, { "epoch": 2.6482581473751807, "grad_norm": 1.4311972856521606, "learning_rate": 1.8144905259112542e-05, "loss": 0.4121, "step": 8248 }, { "epoch": 2.648579226200032, "grad_norm": 0.8135798573493958, "learning_rate": 1.812485871939056e-05, "loss": 0.3105, "step": 8249 }, { "epoch": 2.648900305024884, "grad_norm": 0.9382086992263794, "learning_rate": 1.81048221559062e-05, "loss": 0.3712, "step": 8250 }, { "epoch": 2.649221383849735, "grad_norm": 0.9404576420783997, "learning_rate": 1.808479557110081e-05, "loss": 0.3391, "step": 8251 }, { "epoch": 2.6495424626745865, "grad_norm": 1.0706158876419067, "learning_rate": 1.8064778967414662e-05, "loss": 0.433, "step": 8252 }, { "epoch": 2.649863541499438, "grad_norm": 0.7485371828079224, "learning_rate": 1.804477234728661e-05, "loss": 0.2812, "step": 8253 }, { "epoch": 2.6501846203242896, "grad_norm": 1.0219621658325195, "learning_rate": 1.8024775713154473e-05, "loss": 0.4084, "step": 8254 }, { "epoch": 2.650505699149141, "grad_norm": 0.861611545085907, "learning_rate": 1.8004789067454764e-05, "loss": 0.3148, "step": 8255 }, { "epoch": 2.6508267779739927, "grad_norm": 1.0790586471557617, "learning_rate": 1.7984812412622787e-05, "loss": 0.4146, "step": 8256 }, { "epoch": 2.6511478567988442, "grad_norm": 0.6884385347366333, "learning_rate": 1.7964845751092664e-05, "loss": 0.2809, "step": 8257 }, { "epoch": 2.6514689356236953, "grad_norm": 1.2098215818405151, "learning_rate": 1.794488908529719e-05, "loss": 0.4282, "step": 8258 }, { "epoch": 2.6517900144485473, "grad_norm": 0.5315207242965698, "learning_rate": 1.792494241766811e-05, "loss": 0.2248, "step": 8259 }, { "epoch": 2.6521110932733984, "grad_norm": 1.038537859916687, "learning_rate": 1.790500575063584e-05, "loss": 0.4178, "step": 8260 }, { "epoch": 2.65243217209825, "grad_norm": 0.7643697261810303, "learning_rate": 1.78850790866296e-05, "loss": 0.2673, "step": 8261 }, { "epoch": 2.6527532509231015, "grad_norm": 0.6102786064147949, "learning_rate": 1.7865162428077386e-05, "loss": 0.2725, "step": 8262 }, { "epoch": 2.653074329747953, "grad_norm": 0.9418346285820007, "learning_rate": 1.7845255777406e-05, "loss": 0.3239, "step": 8263 }, { "epoch": 2.6533954085728046, "grad_norm": 0.7261042594909668, "learning_rate": 1.7825359137040988e-05, "loss": 0.2828, "step": 8264 }, { "epoch": 2.653716487397656, "grad_norm": 0.9733819961547852, "learning_rate": 1.7805472509406696e-05, "loss": 0.3408, "step": 8265 }, { "epoch": 2.6540375662225077, "grad_norm": 0.5591874718666077, "learning_rate": 1.7785595896926265e-05, "loss": 0.2455, "step": 8266 }, { "epoch": 2.654358645047359, "grad_norm": 0.7229105234146118, "learning_rate": 1.7765729302021596e-05, "loss": 0.2537, "step": 8267 }, { "epoch": 2.654679723872211, "grad_norm": 0.6605117917060852, "learning_rate": 1.7745872727113356e-05, "loss": 0.2885, "step": 8268 }, { "epoch": 2.655000802697062, "grad_norm": 0.5864028334617615, "learning_rate": 1.7726026174621003e-05, "loss": 0.2461, "step": 8269 }, { "epoch": 2.6553218815219135, "grad_norm": 1.8025933504104614, "learning_rate": 1.7706189646962847e-05, "loss": 0.3684, "step": 8270 }, { "epoch": 2.655642960346765, "grad_norm": 0.835909903049469, "learning_rate": 1.7686363146555805e-05, "loss": 0.3308, "step": 8271 }, { "epoch": 2.6559640391716166, "grad_norm": 0.9109052419662476, "learning_rate": 1.7666546675815778e-05, "loss": 0.3649, "step": 8272 }, { "epoch": 2.656285117996468, "grad_norm": 0.5964946150779724, "learning_rate": 1.7646740237157256e-05, "loss": 0.2541, "step": 8273 }, { "epoch": 2.6566061968213197, "grad_norm": 0.6849761605262756, "learning_rate": 1.7626943832993647e-05, "loss": 0.29, "step": 8274 }, { "epoch": 2.6569272756461713, "grad_norm": 0.5830851197242737, "learning_rate": 1.760715746573709e-05, "loss": 0.2506, "step": 8275 }, { "epoch": 2.6572483544710224, "grad_norm": 0.5915994644165039, "learning_rate": 1.7587381137798432e-05, "loss": 0.299, "step": 8276 }, { "epoch": 2.6575694332958744, "grad_norm": 0.5111377835273743, "learning_rate": 1.7567614851587443e-05, "loss": 0.2853, "step": 8277 }, { "epoch": 2.6578905121207255, "grad_norm": 0.5485566854476929, "learning_rate": 1.754785860951249e-05, "loss": 0.2842, "step": 8278 }, { "epoch": 2.658211590945577, "grad_norm": 0.38606464862823486, "learning_rate": 1.752811241398089e-05, "loss": 0.2609, "step": 8279 }, { "epoch": 2.6585326697704286, "grad_norm": 0.36825671792030334, "learning_rate": 1.750837626739863e-05, "loss": 0.4696, "step": 8280 }, { "epoch": 2.65885374859528, "grad_norm": 0.49861788749694824, "learning_rate": 1.7488650172170496e-05, "loss": 0.4966, "step": 8281 }, { "epoch": 2.6591748274201317, "grad_norm": 0.3955051004886627, "learning_rate": 1.7468934130700044e-05, "loss": 0.1671, "step": 8282 }, { "epoch": 2.659495906244983, "grad_norm": 0.30318066477775574, "learning_rate": 1.744922814538964e-05, "loss": 0.1232, "step": 8283 }, { "epoch": 2.6598169850698348, "grad_norm": 0.5016526579856873, "learning_rate": 1.7429532218640377e-05, "loss": 0.1385, "step": 8284 }, { "epoch": 2.660138063894686, "grad_norm": 0.33723151683807373, "learning_rate": 1.7409846352852143e-05, "loss": 0.122, "step": 8285 }, { "epoch": 2.660459142719538, "grad_norm": 0.7565352916717529, "learning_rate": 1.7390170550423625e-05, "loss": 0.3913, "step": 8286 }, { "epoch": 2.660780221544389, "grad_norm": 0.857373833656311, "learning_rate": 1.737050481375223e-05, "loss": 0.4855, "step": 8287 }, { "epoch": 2.6611013003692405, "grad_norm": 0.7241206765174866, "learning_rate": 1.7350849145234184e-05, "loss": 0.281, "step": 8288 }, { "epoch": 2.661422379194092, "grad_norm": 0.822955310344696, "learning_rate": 1.7331203547264453e-05, "loss": 0.3799, "step": 8289 }, { "epoch": 2.6617434580189436, "grad_norm": 0.9299210906028748, "learning_rate": 1.7311568022236845e-05, "loss": 0.3482, "step": 8290 }, { "epoch": 2.662064536843795, "grad_norm": 0.68746417760849, "learning_rate": 1.7291942572543807e-05, "loss": 0.293, "step": 8291 }, { "epoch": 2.6623856156686467, "grad_norm": 0.8198730945587158, "learning_rate": 1.7272327200576742e-05, "loss": 0.3485, "step": 8292 }, { "epoch": 2.6627066944934983, "grad_norm": 1.226529598236084, "learning_rate": 1.7252721908725632e-05, "loss": 0.3973, "step": 8293 }, { "epoch": 2.6630277733183494, "grad_norm": 0.7077026963233948, "learning_rate": 1.7233126699379343e-05, "loss": 0.2794, "step": 8294 }, { "epoch": 2.6633488521432014, "grad_norm": 0.8905551433563232, "learning_rate": 1.721354157492555e-05, "loss": 0.3946, "step": 8295 }, { "epoch": 2.6636699309680525, "grad_norm": 0.9082470536231995, "learning_rate": 1.719396653775056e-05, "loss": 0.3648, "step": 8296 }, { "epoch": 2.663991009792904, "grad_norm": 0.6751482486724854, "learning_rate": 1.7174401590239587e-05, "loss": 0.2582, "step": 8297 }, { "epoch": 2.6643120886177556, "grad_norm": 0.916202187538147, "learning_rate": 1.715484673477654e-05, "loss": 0.3537, "step": 8298 }, { "epoch": 2.664633167442607, "grad_norm": 1.0404547452926636, "learning_rate": 1.7135301973744124e-05, "loss": 0.3866, "step": 8299 }, { "epoch": 2.6649542462674587, "grad_norm": 0.7834314107894897, "learning_rate": 1.7115767309523812e-05, "loss": 0.3115, "step": 8300 }, { "epoch": 2.6652753250923102, "grad_norm": 0.8973174691200256, "learning_rate": 1.7096242744495837e-05, "loss": 0.2688, "step": 8301 }, { "epoch": 2.665596403917162, "grad_norm": 1.4204421043395996, "learning_rate": 1.70767282810392e-05, "loss": 0.5071, "step": 8302 }, { "epoch": 2.665917482742013, "grad_norm": 1.0806547403335571, "learning_rate": 1.7057223921531707e-05, "loss": 0.3664, "step": 8303 }, { "epoch": 2.666238561566865, "grad_norm": 1.0438770055770874, "learning_rate": 1.7037729668349877e-05, "loss": 0.3832, "step": 8304 }, { "epoch": 2.666559640391716, "grad_norm": 0.9235129356384277, "learning_rate": 1.7018245523869036e-05, "loss": 0.345, "step": 8305 }, { "epoch": 2.6668807192165676, "grad_norm": 0.9867783784866333, "learning_rate": 1.6998771490463262e-05, "loss": 0.3577, "step": 8306 }, { "epoch": 2.667201798041419, "grad_norm": 0.9603634476661682, "learning_rate": 1.697930757050542e-05, "loss": 0.3107, "step": 8307 }, { "epoch": 2.6675228768662707, "grad_norm": 0.6079133749008179, "learning_rate": 1.6959853766367118e-05, "loss": 0.2685, "step": 8308 }, { "epoch": 2.667843955691122, "grad_norm": 1.3194692134857178, "learning_rate": 1.6940410080418723e-05, "loss": 0.3876, "step": 8309 }, { "epoch": 2.6681650345159738, "grad_norm": 0.7050005793571472, "learning_rate": 1.6920976515029463e-05, "loss": 0.301, "step": 8310 }, { "epoch": 2.6684861133408253, "grad_norm": 1.0123231410980225, "learning_rate": 1.690155307256719e-05, "loss": 0.4113, "step": 8311 }, { "epoch": 2.6688071921656764, "grad_norm": 0.8244590163230896, "learning_rate": 1.6882139755398574e-05, "loss": 0.3621, "step": 8312 }, { "epoch": 2.6691282709905284, "grad_norm": 0.5136908292770386, "learning_rate": 1.686273656588917e-05, "loss": 0.2476, "step": 8313 }, { "epoch": 2.6694493498153795, "grad_norm": 0.7136945128440857, "learning_rate": 1.6843343506403075e-05, "loss": 0.2703, "step": 8314 }, { "epoch": 2.669770428640231, "grad_norm": 1.0239125490188599, "learning_rate": 1.6823960579303377e-05, "loss": 0.314, "step": 8315 }, { "epoch": 2.6700915074650826, "grad_norm": 0.6786704063415527, "learning_rate": 1.680458778695174e-05, "loss": 0.2897, "step": 8316 }, { "epoch": 2.670412586289934, "grad_norm": 0.6435678601264954, "learning_rate": 1.678522513170875e-05, "loss": 0.274, "step": 8317 }, { "epoch": 2.6707336651147857, "grad_norm": 0.6759331822395325, "learning_rate": 1.6765872615933677e-05, "loss": 0.2739, "step": 8318 }, { "epoch": 2.6710547439396373, "grad_norm": 0.7092223167419434, "learning_rate": 1.6746530241984504e-05, "loss": 0.2994, "step": 8319 }, { "epoch": 2.671375822764489, "grad_norm": 0.9770942330360413, "learning_rate": 1.6727198012218114e-05, "loss": 0.3379, "step": 8320 }, { "epoch": 2.67169690158934, "grad_norm": 1.3338515758514404, "learning_rate": 1.6707875928990058e-05, "loss": 0.3295, "step": 8321 }, { "epoch": 2.672017980414192, "grad_norm": 0.6919156312942505, "learning_rate": 1.668856399465466e-05, "loss": 0.2663, "step": 8322 }, { "epoch": 2.672339059239043, "grad_norm": 0.9007259011268616, "learning_rate": 1.6669262211565028e-05, "loss": 0.3146, "step": 8323 }, { "epoch": 2.6726601380638946, "grad_norm": 0.6799442172050476, "learning_rate": 1.6649970582073028e-05, "loss": 0.2957, "step": 8324 }, { "epoch": 2.672981216888746, "grad_norm": 0.8429846167564392, "learning_rate": 1.6630689108529284e-05, "loss": 0.3215, "step": 8325 }, { "epoch": 2.6733022957135977, "grad_norm": 0.9628200531005859, "learning_rate": 1.661141779328319e-05, "loss": 0.3283, "step": 8326 }, { "epoch": 2.6736233745384492, "grad_norm": 0.7817652225494385, "learning_rate": 1.6592156638682886e-05, "loss": 0.2815, "step": 8327 }, { "epoch": 2.6739444533633008, "grad_norm": 0.5728936791419983, "learning_rate": 1.6572905647075298e-05, "loss": 0.2687, "step": 8328 }, { "epoch": 2.6742655321881523, "grad_norm": 0.3916088342666626, "learning_rate": 1.6553664820806102e-05, "loss": 0.2574, "step": 8329 }, { "epoch": 2.6745866110130034, "grad_norm": 0.5118910670280457, "learning_rate": 1.6534434162219727e-05, "loss": 0.6336, "step": 8330 }, { "epoch": 2.6749076898378554, "grad_norm": 0.4536105990409851, "learning_rate": 1.6515213673659357e-05, "loss": 0.4168, "step": 8331 }, { "epoch": 2.6752287686627065, "grad_norm": 0.442342072725296, "learning_rate": 1.649600335746695e-05, "loss": 0.2928, "step": 8332 }, { "epoch": 2.675549847487558, "grad_norm": 0.4411477744579315, "learning_rate": 1.6476803215983294e-05, "loss": 0.1311, "step": 8333 }, { "epoch": 2.6758709263124096, "grad_norm": 0.3371138572692871, "learning_rate": 1.6457613251547754e-05, "loss": 0.1534, "step": 8334 }, { "epoch": 2.676192005137261, "grad_norm": 0.6045904755592346, "learning_rate": 1.643843346649866e-05, "loss": 0.3301, "step": 8335 }, { "epoch": 2.6765130839621127, "grad_norm": 0.905532956123352, "learning_rate": 1.6419263863172997e-05, "loss": 0.3937, "step": 8336 }, { "epoch": 2.6768341627869643, "grad_norm": 0.7415409088134766, "learning_rate": 1.640010444390646e-05, "loss": 0.3073, "step": 8337 }, { "epoch": 2.677155241611816, "grad_norm": 0.8520146608352661, "learning_rate": 1.6380955211033656e-05, "loss": 0.4127, "step": 8338 }, { "epoch": 2.677476320436667, "grad_norm": 0.9074903130531311, "learning_rate": 1.6361816166887768e-05, "loss": 0.3889, "step": 8339 }, { "epoch": 2.677797399261519, "grad_norm": 0.7967313528060913, "learning_rate": 1.634268731380091e-05, "loss": 0.2836, "step": 8340 }, { "epoch": 2.67811847808637, "grad_norm": 0.7491098046302795, "learning_rate": 1.632356865410384e-05, "loss": 0.3364, "step": 8341 }, { "epoch": 2.6784395569112216, "grad_norm": 0.7044479250907898, "learning_rate": 1.6304460190126103e-05, "loss": 0.3405, "step": 8342 }, { "epoch": 2.678760635736073, "grad_norm": 0.7631996273994446, "learning_rate": 1.628536192419603e-05, "loss": 0.3188, "step": 8343 }, { "epoch": 2.6790817145609247, "grad_norm": 0.9044662714004517, "learning_rate": 1.6266273858640656e-05, "loss": 0.3248, "step": 8344 }, { "epoch": 2.6794027933857762, "grad_norm": 0.9730323553085327, "learning_rate": 1.6247195995785837e-05, "loss": 0.3272, "step": 8345 }, { "epoch": 2.679723872210628, "grad_norm": 0.9909996390342712, "learning_rate": 1.622812833795613e-05, "loss": 0.4031, "step": 8346 }, { "epoch": 2.6800449510354794, "grad_norm": 1.0814063549041748, "learning_rate": 1.6209070887474876e-05, "loss": 0.3565, "step": 8347 }, { "epoch": 2.6803660298603305, "grad_norm": 0.8834957480430603, "learning_rate": 1.6190023646664175e-05, "loss": 0.4426, "step": 8348 }, { "epoch": 2.6806871086851825, "grad_norm": 1.0340230464935303, "learning_rate": 1.6170986617844863e-05, "loss": 0.3924, "step": 8349 }, { "epoch": 2.6810081875100336, "grad_norm": 0.743299663066864, "learning_rate": 1.6151959803336535e-05, "loss": 0.3009, "step": 8350 }, { "epoch": 2.681329266334885, "grad_norm": 0.802480936050415, "learning_rate": 1.6132943205457606e-05, "loss": 0.3529, "step": 8351 }, { "epoch": 2.6816503451597367, "grad_norm": 1.1612759828567505, "learning_rate": 1.611393682652511e-05, "loss": 0.377, "step": 8352 }, { "epoch": 2.681971423984588, "grad_norm": 0.9446073174476624, "learning_rate": 1.6094940668855006e-05, "loss": 0.3516, "step": 8353 }, { "epoch": 2.6822925028094398, "grad_norm": 0.7460670471191406, "learning_rate": 1.6075954734761845e-05, "loss": 0.3126, "step": 8354 }, { "epoch": 2.6826135816342913, "grad_norm": 0.7956533432006836, "learning_rate": 1.6056979026559004e-05, "loss": 0.3152, "step": 8355 }, { "epoch": 2.682934660459143, "grad_norm": 0.5445581674575806, "learning_rate": 1.6038013546558696e-05, "loss": 0.2414, "step": 8356 }, { "epoch": 2.683255739283994, "grad_norm": 0.8565784692764282, "learning_rate": 1.601905829707171e-05, "loss": 0.2997, "step": 8357 }, { "epoch": 2.683576818108846, "grad_norm": 0.512998104095459, "learning_rate": 1.600011328040777e-05, "loss": 0.2233, "step": 8358 }, { "epoch": 2.683897896933697, "grad_norm": 0.8238282203674316, "learning_rate": 1.598117849887518e-05, "loss": 0.3354, "step": 8359 }, { "epoch": 2.6842189757585486, "grad_norm": 0.8783250451087952, "learning_rate": 1.596225395478116e-05, "loss": 0.3224, "step": 8360 }, { "epoch": 2.6845400545834, "grad_norm": 0.6157239675521851, "learning_rate": 1.5943339650431576e-05, "loss": 0.2397, "step": 8361 }, { "epoch": 2.6848611334082517, "grad_norm": 0.7254630327224731, "learning_rate": 1.592443558813109e-05, "loss": 0.249, "step": 8362 }, { "epoch": 2.6851822122331033, "grad_norm": 1.0518107414245605, "learning_rate": 1.5905541770183096e-05, "loss": 0.3714, "step": 8363 }, { "epoch": 2.685503291057955, "grad_norm": 0.6833174824714661, "learning_rate": 1.588665819888976e-05, "loss": 0.277, "step": 8364 }, { "epoch": 2.6858243698828064, "grad_norm": 0.9686453938484192, "learning_rate": 1.586778487655197e-05, "loss": 0.4063, "step": 8365 }, { "epoch": 2.6861454487076575, "grad_norm": 0.634539008140564, "learning_rate": 1.5848921805469397e-05, "loss": 0.2757, "step": 8366 }, { "epoch": 2.6864665275325095, "grad_norm": 0.6041567325592041, "learning_rate": 1.5830068987940438e-05, "loss": 0.2413, "step": 8367 }, { "epoch": 2.6867876063573606, "grad_norm": 0.7220975160598755, "learning_rate": 1.581122642626226e-05, "loss": 0.2697, "step": 8368 }, { "epoch": 2.687108685182212, "grad_norm": 1.3267475366592407, "learning_rate": 1.579239412273078e-05, "loss": 0.3793, "step": 8369 }, { "epoch": 2.6874297640070637, "grad_norm": 1.0630850791931152, "learning_rate": 1.577357207964062e-05, "loss": 0.3434, "step": 8370 }, { "epoch": 2.6877508428319152, "grad_norm": 0.6697249412536621, "learning_rate": 1.5754760299285252e-05, "loss": 0.2852, "step": 8371 }, { "epoch": 2.688071921656767, "grad_norm": 0.8393073081970215, "learning_rate": 1.5735958783956794e-05, "loss": 0.3452, "step": 8372 }, { "epoch": 2.6883930004816183, "grad_norm": 0.8739592432975769, "learning_rate": 1.5717167535946144e-05, "loss": 0.3311, "step": 8373 }, { "epoch": 2.68871407930647, "grad_norm": 0.4763057231903076, "learning_rate": 1.569838655754298e-05, "loss": 0.2477, "step": 8374 }, { "epoch": 2.689035158131321, "grad_norm": 0.5161868333816528, "learning_rate": 1.567961585103567e-05, "loss": 0.2638, "step": 8375 }, { "epoch": 2.689356236956173, "grad_norm": 0.5207780003547668, "learning_rate": 1.566085541871145e-05, "loss": 0.2612, "step": 8376 }, { "epoch": 2.689677315781024, "grad_norm": 0.388313353061676, "learning_rate": 1.564210526285612e-05, "loss": 0.25, "step": 8377 }, { "epoch": 2.6899983946058756, "grad_norm": 0.6559248566627502, "learning_rate": 1.5623365385754408e-05, "loss": 0.2934, "step": 8378 }, { "epoch": 2.690319473430727, "grad_norm": 0.6351720094680786, "learning_rate": 1.560463578968967e-05, "loss": 0.2985, "step": 8379 }, { "epoch": 2.6906405522555787, "grad_norm": 0.6100612282752991, "learning_rate": 1.5585916476944073e-05, "loss": 0.7795, "step": 8380 }, { "epoch": 2.6909616310804303, "grad_norm": 0.3323669135570526, "learning_rate": 1.5567207449798515e-05, "loss": 0.246, "step": 8381 }, { "epoch": 2.691282709905282, "grad_norm": 0.4569035768508911, "learning_rate": 1.5548508710532572e-05, "loss": 0.1564, "step": 8382 }, { "epoch": 2.6916037887301334, "grad_norm": 0.3317892551422119, "learning_rate": 1.5529820261424698e-05, "loss": 0.1498, "step": 8383 }, { "epoch": 2.6919248675549845, "grad_norm": 0.3487982451915741, "learning_rate": 1.551114210475201e-05, "loss": 0.1257, "step": 8384 }, { "epoch": 2.6922459463798365, "grad_norm": 0.42391496896743774, "learning_rate": 1.5492474242790366e-05, "loss": 0.1278, "step": 8385 }, { "epoch": 2.6925670252046876, "grad_norm": 0.3310485780239105, "learning_rate": 1.547381667781439e-05, "loss": 0.1795, "step": 8386 }, { "epoch": 2.692888104029539, "grad_norm": 0.8500526547431946, "learning_rate": 1.545516941209747e-05, "loss": 0.3601, "step": 8387 }, { "epoch": 2.6932091828543907, "grad_norm": 0.8416752815246582, "learning_rate": 1.54365324479117e-05, "loss": 0.3523, "step": 8388 }, { "epoch": 2.6935302616792423, "grad_norm": 0.7443310022354126, "learning_rate": 1.541790578752794e-05, "loss": 0.3137, "step": 8389 }, { "epoch": 2.693851340504094, "grad_norm": 0.7136545777320862, "learning_rate": 1.539928943321579e-05, "loss": 0.2981, "step": 8390 }, { "epoch": 2.6941724193289454, "grad_norm": 0.6598768830299377, "learning_rate": 1.538068338724361e-05, "loss": 0.2808, "step": 8391 }, { "epoch": 2.694493498153797, "grad_norm": 0.8997542858123779, "learning_rate": 1.5362087651878475e-05, "loss": 0.3301, "step": 8392 }, { "epoch": 2.694814576978648, "grad_norm": 0.9594383835792542, "learning_rate": 1.5343502229386207e-05, "loss": 0.382, "step": 8393 }, { "epoch": 2.6951356558035, "grad_norm": 0.7930410504341125, "learning_rate": 1.532492712203145e-05, "loss": 0.3238, "step": 8394 }, { "epoch": 2.695456734628351, "grad_norm": 0.918022096157074, "learning_rate": 1.530636233207743e-05, "loss": 0.3953, "step": 8395 }, { "epoch": 2.6957778134532027, "grad_norm": 1.3082184791564941, "learning_rate": 1.528780786178631e-05, "loss": 0.4692, "step": 8396 }, { "epoch": 2.696098892278054, "grad_norm": 0.8029715418815613, "learning_rate": 1.526926371341878e-05, "loss": 0.3129, "step": 8397 }, { "epoch": 2.6964199711029058, "grad_norm": 0.8471099734306335, "learning_rate": 1.5250729889234482e-05, "loss": 0.3716, "step": 8398 }, { "epoch": 2.6967410499277573, "grad_norm": 0.8596959710121155, "learning_rate": 1.5232206391491699e-05, "loss": 0.3605, "step": 8399 }, { "epoch": 2.697062128752609, "grad_norm": 0.6960004568099976, "learning_rate": 1.521369322244739e-05, "loss": 0.288, "step": 8400 }, { "epoch": 2.6973832075774604, "grad_norm": 0.6006725430488586, "learning_rate": 1.5195190384357404e-05, "loss": 0.2479, "step": 8401 }, { "epoch": 2.6977042864023115, "grad_norm": 0.9754154086112976, "learning_rate": 1.5176697879476232e-05, "loss": 0.2391, "step": 8402 }, { "epoch": 2.6980253652271635, "grad_norm": 0.7562586665153503, "learning_rate": 1.5158215710057123e-05, "loss": 0.2767, "step": 8403 }, { "epoch": 2.6983464440520146, "grad_norm": 0.9271560907363892, "learning_rate": 1.5139743878352075e-05, "loss": 0.3777, "step": 8404 }, { "epoch": 2.698667522876866, "grad_norm": 0.9956105947494507, "learning_rate": 1.5121282386611824e-05, "loss": 0.4351, "step": 8405 }, { "epoch": 2.6989886017017177, "grad_norm": 1.1439400911331177, "learning_rate": 1.5102831237085857e-05, "loss": 0.3981, "step": 8406 }, { "epoch": 2.6993096805265693, "grad_norm": 0.8666257262229919, "learning_rate": 1.5084390432022377e-05, "loss": 0.3045, "step": 8407 }, { "epoch": 2.699630759351421, "grad_norm": 0.7622800469398499, "learning_rate": 1.5065959973668353e-05, "loss": 0.2846, "step": 8408 }, { "epoch": 2.6999518381762724, "grad_norm": 0.9066924452781677, "learning_rate": 1.5047539864269478e-05, "loss": 0.2969, "step": 8409 }, { "epoch": 2.700272917001124, "grad_norm": 0.9989892244338989, "learning_rate": 1.5029130106070167e-05, "loss": 0.3537, "step": 8410 }, { "epoch": 2.700593995825975, "grad_norm": 1.0656063556671143, "learning_rate": 1.5010730701313625e-05, "loss": 0.3365, "step": 8411 }, { "epoch": 2.700915074650827, "grad_norm": 0.9919269680976868, "learning_rate": 1.4992341652241737e-05, "loss": 0.3995, "step": 8412 }, { "epoch": 2.701236153475678, "grad_norm": 0.9192927479743958, "learning_rate": 1.4973962961095134e-05, "loss": 0.3682, "step": 8413 }, { "epoch": 2.7015572323005297, "grad_norm": 0.9061486124992371, "learning_rate": 1.495559463011329e-05, "loss": 0.3291, "step": 8414 }, { "epoch": 2.7018783111253812, "grad_norm": 1.0932555198669434, "learning_rate": 1.4937236661534226e-05, "loss": 0.274, "step": 8415 }, { "epoch": 2.702199389950233, "grad_norm": 1.2957507371902466, "learning_rate": 1.4918889057594876e-05, "loss": 0.2687, "step": 8416 }, { "epoch": 2.7025204687750843, "grad_norm": 0.8411356806755066, "learning_rate": 1.4900551820530828e-05, "loss": 0.2988, "step": 8417 }, { "epoch": 2.702841547599936, "grad_norm": 1.028803825378418, "learning_rate": 1.4882224952576373e-05, "loss": 0.393, "step": 8418 }, { "epoch": 2.7031626264247874, "grad_norm": 1.0516469478607178, "learning_rate": 1.486390845596466e-05, "loss": 0.3689, "step": 8419 }, { "epoch": 2.7034837052496385, "grad_norm": 0.5976935029029846, "learning_rate": 1.484560233292741e-05, "loss": 0.2572, "step": 8420 }, { "epoch": 2.7038047840744905, "grad_norm": 1.021976113319397, "learning_rate": 1.4827306585695234e-05, "loss": 0.4232, "step": 8421 }, { "epoch": 2.7041258628993416, "grad_norm": 0.8594872951507568, "learning_rate": 1.4809021216497399e-05, "loss": 0.3019, "step": 8422 }, { "epoch": 2.704446941724193, "grad_norm": 0.8935600519180298, "learning_rate": 1.4790746227561925e-05, "loss": 0.2665, "step": 8423 }, { "epoch": 2.7047680205490447, "grad_norm": 0.5370054244995117, "learning_rate": 1.4772481621115541e-05, "loss": 0.2293, "step": 8424 }, { "epoch": 2.7050890993738963, "grad_norm": 0.42363736033439636, "learning_rate": 1.4754227399383757e-05, "loss": 0.2423, "step": 8425 }, { "epoch": 2.705410178198748, "grad_norm": 0.6202889680862427, "learning_rate": 1.4735983564590783e-05, "loss": 0.3039, "step": 8426 }, { "epoch": 2.7057312570235994, "grad_norm": 0.5840016603469849, "learning_rate": 1.4717750118959584e-05, "loss": 0.3053, "step": 8427 }, { "epoch": 2.706052335848451, "grad_norm": 0.5942694544792175, "learning_rate": 1.4699527064711838e-05, "loss": 0.2565, "step": 8428 }, { "epoch": 2.706373414673302, "grad_norm": 0.7443995475769043, "learning_rate": 1.468131440406798e-05, "loss": 0.3169, "step": 8429 }, { "epoch": 2.706694493498154, "grad_norm": 0.47813400626182556, "learning_rate": 1.466311213924716e-05, "loss": 0.6779, "step": 8430 }, { "epoch": 2.707015572323005, "grad_norm": 0.406895250082016, "learning_rate": 1.4644920272467244e-05, "loss": 0.5476, "step": 8431 }, { "epoch": 2.7073366511478567, "grad_norm": 0.3295402228832245, "learning_rate": 1.462673880594494e-05, "loss": 0.155, "step": 8432 }, { "epoch": 2.7076577299727083, "grad_norm": 0.23888343572616577, "learning_rate": 1.4608567741895495e-05, "loss": 0.0741, "step": 8433 }, { "epoch": 2.70797880879756, "grad_norm": 0.20620322227478027, "learning_rate": 1.4590407082533097e-05, "loss": 0.0649, "step": 8434 }, { "epoch": 2.7082998876224114, "grad_norm": 0.3761898875236511, "learning_rate": 1.4572256830070497e-05, "loss": 0.2023, "step": 8435 }, { "epoch": 2.708620966447263, "grad_norm": 0.45816802978515625, "learning_rate": 1.4554116986719257e-05, "loss": 0.2164, "step": 8436 }, { "epoch": 2.7089420452721145, "grad_norm": 0.8823957443237305, "learning_rate": 1.4535987554689712e-05, "loss": 0.5551, "step": 8437 }, { "epoch": 2.7092631240969656, "grad_norm": 0.8065453171730042, "learning_rate": 1.4517868536190803e-05, "loss": 0.396, "step": 8438 }, { "epoch": 2.7095842029218176, "grad_norm": 0.9334639310836792, "learning_rate": 1.4499759933430346e-05, "loss": 0.4288, "step": 8439 }, { "epoch": 2.7099052817466687, "grad_norm": 0.9277933239936829, "learning_rate": 1.4481661748614784e-05, "loss": 0.3647, "step": 8440 }, { "epoch": 2.71022636057152, "grad_norm": 0.7920669317245483, "learning_rate": 1.4463573983949341e-05, "loss": 0.3438, "step": 8441 }, { "epoch": 2.7105474393963718, "grad_norm": 1.0169240236282349, "learning_rate": 1.4445496641637967e-05, "loss": 0.4013, "step": 8442 }, { "epoch": 2.7108685182212233, "grad_norm": 0.83201664686203, "learning_rate": 1.4427429723883257e-05, "loss": 0.3804, "step": 8443 }, { "epoch": 2.711189597046075, "grad_norm": 0.9397286772727966, "learning_rate": 1.4409373232886702e-05, "loss": 0.3005, "step": 8444 }, { "epoch": 2.7115106758709264, "grad_norm": 0.6609529852867126, "learning_rate": 1.439132717084839e-05, "loss": 0.314, "step": 8445 }, { "epoch": 2.711831754695778, "grad_norm": 0.8575668334960938, "learning_rate": 1.4373291539967182e-05, "loss": 0.3991, "step": 8446 }, { "epoch": 2.712152833520629, "grad_norm": 1.156197428703308, "learning_rate": 1.4355266342440677e-05, "loss": 0.3177, "step": 8447 }, { "epoch": 2.712473912345481, "grad_norm": 0.7570515275001526, "learning_rate": 1.4337251580465172e-05, "loss": 0.3598, "step": 8448 }, { "epoch": 2.712794991170332, "grad_norm": 0.7249849438667297, "learning_rate": 1.4319247256235714e-05, "loss": 0.2816, "step": 8449 }, { "epoch": 2.7131160699951837, "grad_norm": 0.9076480865478516, "learning_rate": 1.4301253371946089e-05, "loss": 0.3801, "step": 8450 }, { "epoch": 2.7134371488200353, "grad_norm": 1.0600674152374268, "learning_rate": 1.4283269929788779e-05, "loss": 0.4087, "step": 8451 }, { "epoch": 2.713758227644887, "grad_norm": 1.2555755376815796, "learning_rate": 1.426529693195503e-05, "loss": 0.5346, "step": 8452 }, { "epoch": 2.7140793064697384, "grad_norm": 0.9253405928611755, "learning_rate": 1.4247334380634792e-05, "loss": 0.3705, "step": 8453 }, { "epoch": 2.71440038529459, "grad_norm": 0.9356733560562134, "learning_rate": 1.4229382278016712e-05, "loss": 0.3621, "step": 8454 }, { "epoch": 2.7147214641194415, "grad_norm": 1.4253745079040527, "learning_rate": 1.4211440626288286e-05, "loss": 0.4511, "step": 8455 }, { "epoch": 2.7150425429442926, "grad_norm": 1.0171775817871094, "learning_rate": 1.4193509427635543e-05, "loss": 0.3103, "step": 8456 }, { "epoch": 2.7153636217691446, "grad_norm": 1.1757081747055054, "learning_rate": 1.4175588684243446e-05, "loss": 0.4486, "step": 8457 }, { "epoch": 2.7156847005939957, "grad_norm": 0.8698387742042542, "learning_rate": 1.4157678398295481e-05, "loss": 0.3556, "step": 8458 }, { "epoch": 2.7160057794188472, "grad_norm": 0.8687936067581177, "learning_rate": 1.4139778571974049e-05, "loss": 0.3552, "step": 8459 }, { "epoch": 2.716326858243699, "grad_norm": 0.8789449334144592, "learning_rate": 1.412188920746017e-05, "loss": 0.3352, "step": 8460 }, { "epoch": 2.7166479370685503, "grad_norm": 0.8837921023368835, "learning_rate": 1.4104010306933557e-05, "loss": 0.3605, "step": 8461 }, { "epoch": 2.716969015893402, "grad_norm": 0.7887502312660217, "learning_rate": 1.4086141872572789e-05, "loss": 0.2859, "step": 8462 }, { "epoch": 2.7172900947182534, "grad_norm": 0.8587145805358887, "learning_rate": 1.406828390655497e-05, "loss": 0.3172, "step": 8463 }, { "epoch": 2.717611173543105, "grad_norm": 0.6038246750831604, "learning_rate": 1.4050436411056123e-05, "loss": 0.2771, "step": 8464 }, { "epoch": 2.717932252367956, "grad_norm": 0.7046909332275391, "learning_rate": 1.40325993882509e-05, "loss": 0.2853, "step": 8465 }, { "epoch": 2.718253331192808, "grad_norm": 0.6671448945999146, "learning_rate": 1.4014772840312663e-05, "loss": 0.2452, "step": 8466 }, { "epoch": 2.718574410017659, "grad_norm": 0.46538805961608887, "learning_rate": 1.3996956769413538e-05, "loss": 0.2233, "step": 8467 }, { "epoch": 2.7188954888425108, "grad_norm": 0.845503032207489, "learning_rate": 1.3979151177724347e-05, "loss": 0.329, "step": 8468 }, { "epoch": 2.7192165676673623, "grad_norm": 0.6878446936607361, "learning_rate": 1.3961356067414666e-05, "loss": 0.255, "step": 8469 }, { "epoch": 2.719537646492214, "grad_norm": 0.9203073978424072, "learning_rate": 1.3943571440652769e-05, "loss": 0.3479, "step": 8470 }, { "epoch": 2.7198587253170654, "grad_norm": 0.7054129242897034, "learning_rate": 1.3925797299605647e-05, "loss": 0.3034, "step": 8471 }, { "epoch": 2.720179804141917, "grad_norm": 1.0850917100906372, "learning_rate": 1.3908033646439033e-05, "loss": 0.3771, "step": 8472 }, { "epoch": 2.7205008829667685, "grad_norm": 0.6727474927902222, "learning_rate": 1.3890280483317374e-05, "loss": 0.2476, "step": 8473 }, { "epoch": 2.7208219617916196, "grad_norm": 0.7375319600105286, "learning_rate": 1.3872537812403829e-05, "loss": 0.2564, "step": 8474 }, { "epoch": 2.7211430406164716, "grad_norm": 0.4288458526134491, "learning_rate": 1.3854805635860336e-05, "loss": 0.2455, "step": 8475 }, { "epoch": 2.7214641194413227, "grad_norm": 0.5464689135551453, "learning_rate": 1.3837083955847418e-05, "loss": 0.2698, "step": 8476 }, { "epoch": 2.7217851982661743, "grad_norm": 0.6613762378692627, "learning_rate": 1.3819372774524508e-05, "loss": 0.2394, "step": 8477 }, { "epoch": 2.722106277091026, "grad_norm": 0.6144346594810486, "learning_rate": 1.38016720940496e-05, "loss": 0.2879, "step": 8478 }, { "epoch": 2.7224273559158774, "grad_norm": 0.4245906174182892, "learning_rate": 1.3783981916579446e-05, "loss": 0.2576, "step": 8479 }, { "epoch": 2.722748434740729, "grad_norm": 0.5054341554641724, "learning_rate": 1.3766302244269624e-05, "loss": 0.6253, "step": 8480 }, { "epoch": 2.7230695135655805, "grad_norm": 0.44886812567710876, "learning_rate": 1.3748633079274253e-05, "loss": 0.3354, "step": 8481 }, { "epoch": 2.723390592390432, "grad_norm": 0.39985498785972595, "learning_rate": 1.3730974423746335e-05, "loss": 0.2347, "step": 8482 }, { "epoch": 2.723711671215283, "grad_norm": 0.5271901488304138, "learning_rate": 1.3713326279837501e-05, "loss": 0.2654, "step": 8483 }, { "epoch": 2.724032750040135, "grad_norm": 0.4039822518825531, "learning_rate": 1.3695688649698124e-05, "loss": 0.1506, "step": 8484 }, { "epoch": 2.7243538288649862, "grad_norm": 0.4156002104282379, "learning_rate": 1.3678061535477304e-05, "loss": 0.1746, "step": 8485 }, { "epoch": 2.7246749076898378, "grad_norm": 0.43996649980545044, "learning_rate": 1.3660444939322836e-05, "loss": 0.1855, "step": 8486 }, { "epoch": 2.7249959865146893, "grad_norm": 1.112030267715454, "learning_rate": 1.3642838863381257e-05, "loss": 0.4318, "step": 8487 }, { "epoch": 2.725317065339541, "grad_norm": 0.7671384215354919, "learning_rate": 1.362524330979782e-05, "loss": 0.3795, "step": 8488 }, { "epoch": 2.7256381441643924, "grad_norm": 0.8614218831062317, "learning_rate": 1.3607658280716473e-05, "loss": 0.3608, "step": 8489 }, { "epoch": 2.725959222989244, "grad_norm": 0.7562094330787659, "learning_rate": 1.3590083778279916e-05, "loss": 0.4333, "step": 8490 }, { "epoch": 2.7262803018140955, "grad_norm": 0.6444230079650879, "learning_rate": 1.3572519804629536e-05, "loss": 0.2859, "step": 8491 }, { "epoch": 2.7266013806389466, "grad_norm": 0.707561194896698, "learning_rate": 1.3554966361905464e-05, "loss": 0.2902, "step": 8492 }, { "epoch": 2.7269224594637986, "grad_norm": 0.7419070601463318, "learning_rate": 1.3537423452246523e-05, "loss": 0.3034, "step": 8493 }, { "epoch": 2.7272435382886497, "grad_norm": 0.9765730500221252, "learning_rate": 1.3519891077790236e-05, "loss": 0.3747, "step": 8494 }, { "epoch": 2.7275646171135013, "grad_norm": 0.6046754717826843, "learning_rate": 1.3502369240672941e-05, "loss": 0.2038, "step": 8495 }, { "epoch": 2.727885695938353, "grad_norm": 0.7997030019760132, "learning_rate": 1.3484857943029572e-05, "loss": 0.3041, "step": 8496 }, { "epoch": 2.7282067747632044, "grad_norm": 0.879155695438385, "learning_rate": 1.34673571869938e-05, "loss": 0.3071, "step": 8497 }, { "epoch": 2.728527853588056, "grad_norm": 1.1423804759979248, "learning_rate": 1.3449866974698122e-05, "loss": 0.3656, "step": 8498 }, { "epoch": 2.7288489324129075, "grad_norm": 0.8787369132041931, "learning_rate": 1.3432387308273575e-05, "loss": 0.4357, "step": 8499 }, { "epoch": 2.729170011237759, "grad_norm": 1.0522805452346802, "learning_rate": 1.3414918189850089e-05, "loss": 0.4416, "step": 8500 }, { "epoch": 2.72949109006261, "grad_norm": 0.7044956684112549, "learning_rate": 1.339745962155613e-05, "loss": 0.2899, "step": 8501 }, { "epoch": 2.729812168887462, "grad_norm": 0.7710807919502258, "learning_rate": 1.338001160551906e-05, "loss": 0.2822, "step": 8502 }, { "epoch": 2.7301332477123132, "grad_norm": 1.0037212371826172, "learning_rate": 1.3362574143864814e-05, "loss": 0.3884, "step": 8503 }, { "epoch": 2.730454326537165, "grad_norm": 0.7747420072555542, "learning_rate": 1.3345147238718126e-05, "loss": 0.3585, "step": 8504 }, { "epoch": 2.7307754053620164, "grad_norm": 0.5802621841430664, "learning_rate": 1.3327730892202383e-05, "loss": 0.2472, "step": 8505 }, { "epoch": 2.731096484186868, "grad_norm": 0.7853018641471863, "learning_rate": 1.3310325106439726e-05, "loss": 0.2751, "step": 8506 }, { "epoch": 2.7314175630117195, "grad_norm": 0.5948209166526794, "learning_rate": 1.3292929883550998e-05, "loss": 0.2399, "step": 8507 }, { "epoch": 2.731738641836571, "grad_norm": 1.0438854694366455, "learning_rate": 1.327554522565576e-05, "loss": 0.3403, "step": 8508 }, { "epoch": 2.7320597206614226, "grad_norm": 0.662353515625, "learning_rate": 1.3258171134872265e-05, "loss": 0.2787, "step": 8509 }, { "epoch": 2.7323807994862737, "grad_norm": 0.8576146960258484, "learning_rate": 1.3240807613317507e-05, "loss": 0.3339, "step": 8510 }, { "epoch": 2.7327018783111257, "grad_norm": 0.8656917214393616, "learning_rate": 1.3223454663107172e-05, "loss": 0.3105, "step": 8511 }, { "epoch": 2.7330229571359768, "grad_norm": 0.6916074752807617, "learning_rate": 1.3206112286355632e-05, "loss": 0.2808, "step": 8512 }, { "epoch": 2.7333440359608283, "grad_norm": 0.5651912689208984, "learning_rate": 1.3188780485176088e-05, "loss": 0.2671, "step": 8513 }, { "epoch": 2.73366511478568, "grad_norm": 0.7664126753807068, "learning_rate": 1.3171459261680297e-05, "loss": 0.316, "step": 8514 }, { "epoch": 2.7339861936105314, "grad_norm": 0.632695198059082, "learning_rate": 1.3154148617978812e-05, "loss": 0.2613, "step": 8515 }, { "epoch": 2.734307272435383, "grad_norm": 0.7974990606307983, "learning_rate": 1.3136848556180892e-05, "loss": 0.2982, "step": 8516 }, { "epoch": 2.7346283512602345, "grad_norm": 0.7887236475944519, "learning_rate": 1.3119559078394461e-05, "loss": 0.2945, "step": 8517 }, { "epoch": 2.734949430085086, "grad_norm": 0.8384912014007568, "learning_rate": 1.3102280186726269e-05, "loss": 0.3098, "step": 8518 }, { "epoch": 2.735270508909937, "grad_norm": 0.8313494920730591, "learning_rate": 1.3085011883281606e-05, "loss": 0.3429, "step": 8519 }, { "epoch": 2.735591587734789, "grad_norm": 0.7698463797569275, "learning_rate": 1.3067754170164614e-05, "loss": 0.276, "step": 8520 }, { "epoch": 2.7359126665596403, "grad_norm": 0.7834967374801636, "learning_rate": 1.30505070494781e-05, "loss": 0.2935, "step": 8521 }, { "epoch": 2.736233745384492, "grad_norm": 1.1544512510299683, "learning_rate": 1.3033270523323549e-05, "loss": 0.3743, "step": 8522 }, { "epoch": 2.7365548242093434, "grad_norm": 1.0003769397735596, "learning_rate": 1.3016044593801202e-05, "loss": 0.3563, "step": 8523 }, { "epoch": 2.736875903034195, "grad_norm": 1.167098879814148, "learning_rate": 1.2998829263009938e-05, "loss": 0.4428, "step": 8524 }, { "epoch": 2.7371969818590465, "grad_norm": 0.758287250995636, "learning_rate": 1.2981624533047432e-05, "loss": 0.3105, "step": 8525 }, { "epoch": 2.737518060683898, "grad_norm": 0.7774193286895752, "learning_rate": 1.296443040601003e-05, "loss": 0.3055, "step": 8526 }, { "epoch": 2.7378391395087496, "grad_norm": 0.4565434455871582, "learning_rate": 1.294724688399278e-05, "loss": 0.2601, "step": 8527 }, { "epoch": 2.7381602183336007, "grad_norm": 0.8566820025444031, "learning_rate": 1.293007396908944e-05, "loss": 0.2995, "step": 8528 }, { "epoch": 2.7384812971584527, "grad_norm": 0.6120618581771851, "learning_rate": 1.2912911663392469e-05, "loss": 0.3114, "step": 8529 }, { "epoch": 2.738802375983304, "grad_norm": 0.46445560455322266, "learning_rate": 1.2895759968993048e-05, "loss": 0.5424, "step": 8530 }, { "epoch": 2.7391234548081553, "grad_norm": 0.5213553309440613, "learning_rate": 1.2878618887981064e-05, "loss": 0.6759, "step": 8531 }, { "epoch": 2.739444533633007, "grad_norm": 0.38170596957206726, "learning_rate": 1.28614884224451e-05, "loss": 0.2189, "step": 8532 }, { "epoch": 2.7397656124578584, "grad_norm": 0.4311400353908539, "learning_rate": 1.2844368574472454e-05, "loss": 0.2501, "step": 8533 }, { "epoch": 2.74008669128271, "grad_norm": 0.23123976588249207, "learning_rate": 1.2827259346149122e-05, "loss": 0.0694, "step": 8534 }, { "epoch": 2.7404077701075615, "grad_norm": 0.32238295674324036, "learning_rate": 1.2810160739559796e-05, "loss": 0.1146, "step": 8535 }, { "epoch": 2.740728848932413, "grad_norm": 0.1789664924144745, "learning_rate": 1.279307275678795e-05, "loss": 0.063, "step": 8536 }, { "epoch": 2.741049927757264, "grad_norm": 0.7732048630714417, "learning_rate": 1.2775995399915631e-05, "loss": 0.4496, "step": 8537 }, { "epoch": 2.7413710065821157, "grad_norm": 1.2547789812088013, "learning_rate": 1.2758928671023718e-05, "loss": 0.4711, "step": 8538 }, { "epoch": 2.7416920854069673, "grad_norm": 0.7271549105644226, "learning_rate": 1.2741872572191682e-05, "loss": 0.3086, "step": 8539 }, { "epoch": 2.742013164231819, "grad_norm": 0.7055025696754456, "learning_rate": 1.2724827105497816e-05, "loss": 0.3016, "step": 8540 }, { "epoch": 2.7423342430566704, "grad_norm": 0.7456016540527344, "learning_rate": 1.2707792273019048e-05, "loss": 0.3477, "step": 8541 }, { "epoch": 2.742655321881522, "grad_norm": 0.9478852152824402, "learning_rate": 1.2690768076830972e-05, "loss": 0.302, "step": 8542 }, { "epoch": 2.7429764007063735, "grad_norm": 0.7163116335868835, "learning_rate": 1.2673754519008008e-05, "loss": 0.3128, "step": 8543 }, { "epoch": 2.743297479531225, "grad_norm": 0.6127589344978333, "learning_rate": 1.2656751601623118e-05, "loss": 0.2145, "step": 8544 }, { "epoch": 2.7436185583560766, "grad_norm": 0.8434281349182129, "learning_rate": 1.2639759326748135e-05, "loss": 0.2788, "step": 8545 }, { "epoch": 2.7439396371809277, "grad_norm": 0.6575958728790283, "learning_rate": 1.262277769645348e-05, "loss": 0.2678, "step": 8546 }, { "epoch": 2.7442607160057793, "grad_norm": 0.9952372312545776, "learning_rate": 1.260580671280832e-05, "loss": 0.3722, "step": 8547 }, { "epoch": 2.744581794830631, "grad_norm": 1.0888564586639404, "learning_rate": 1.2588846377880525e-05, "loss": 0.4193, "step": 8548 }, { "epoch": 2.7449028736554824, "grad_norm": 0.8872905373573303, "learning_rate": 1.257189669373664e-05, "loss": 0.3686, "step": 8549 }, { "epoch": 2.745223952480334, "grad_norm": 0.8427690267562866, "learning_rate": 1.2554957662441957e-05, "loss": 0.3544, "step": 8550 }, { "epoch": 2.7455450313051855, "grad_norm": 0.8962048888206482, "learning_rate": 1.2538029286060426e-05, "loss": 0.3592, "step": 8551 }, { "epoch": 2.745866110130037, "grad_norm": 0.9221064448356628, "learning_rate": 1.2521111566654731e-05, "loss": 0.3621, "step": 8552 }, { "epoch": 2.7461871889548886, "grad_norm": 1.0115526914596558, "learning_rate": 1.2504204506286243e-05, "loss": 0.4147, "step": 8553 }, { "epoch": 2.74650826777974, "grad_norm": 0.8102017045021057, "learning_rate": 1.2487308107015027e-05, "loss": 0.3009, "step": 8554 }, { "epoch": 2.746829346604591, "grad_norm": 1.0300239324569702, "learning_rate": 1.2470422370899838e-05, "loss": 0.3574, "step": 8555 }, { "epoch": 2.7471504254294428, "grad_norm": 1.0161161422729492, "learning_rate": 1.2453547299998225e-05, "loss": 0.3415, "step": 8556 }, { "epoch": 2.7474715042542943, "grad_norm": 1.0009255409240723, "learning_rate": 1.243668289636628e-05, "loss": 0.3357, "step": 8557 }, { "epoch": 2.747792583079146, "grad_norm": 0.9013484120368958, "learning_rate": 1.241982916205895e-05, "loss": 0.354, "step": 8558 }, { "epoch": 2.7481136619039974, "grad_norm": 0.9857357740402222, "learning_rate": 1.2402986099129765e-05, "loss": 0.3988, "step": 8559 }, { "epoch": 2.748434740728849, "grad_norm": 0.8265845775604248, "learning_rate": 1.2386153709630988e-05, "loss": 0.3291, "step": 8560 }, { "epoch": 2.7487558195537005, "grad_norm": 1.0555880069732666, "learning_rate": 1.2369331995613665e-05, "loss": 0.3589, "step": 8561 }, { "epoch": 2.749076898378552, "grad_norm": 0.9744288921356201, "learning_rate": 1.2352520959127379e-05, "loss": 0.3236, "step": 8562 }, { "epoch": 2.7493979772034036, "grad_norm": 0.7760259509086609, "learning_rate": 1.2335720602220569e-05, "loss": 0.2423, "step": 8563 }, { "epoch": 2.7497190560282547, "grad_norm": 1.1198222637176514, "learning_rate": 1.2318930926940298e-05, "loss": 0.3958, "step": 8564 }, { "epoch": 2.7500401348531063, "grad_norm": 1.1928644180297852, "learning_rate": 1.2302151935332329e-05, "loss": 0.4002, "step": 8565 }, { "epoch": 2.750361213677958, "grad_norm": 0.7599969506263733, "learning_rate": 1.228538362944115e-05, "loss": 0.2612, "step": 8566 }, { "epoch": 2.7506822925028094, "grad_norm": 1.1186960935592651, "learning_rate": 1.2268626011309858e-05, "loss": 0.3827, "step": 8567 }, { "epoch": 2.751003371327661, "grad_norm": 0.6915915608406067, "learning_rate": 1.22518790829804e-05, "loss": 0.2565, "step": 8568 }, { "epoch": 2.7513244501525125, "grad_norm": 0.7428755164146423, "learning_rate": 1.2235142846493308e-05, "loss": 0.2987, "step": 8569 }, { "epoch": 2.751645528977364, "grad_norm": 0.7877930998802185, "learning_rate": 1.2218417303887842e-05, "loss": 0.3379, "step": 8570 }, { "epoch": 2.7519666078022156, "grad_norm": 0.9466036558151245, "learning_rate": 1.2201702457201947e-05, "loss": 0.3894, "step": 8571 }, { "epoch": 2.752287686627067, "grad_norm": 0.8674002289772034, "learning_rate": 1.2184998308472295e-05, "loss": 0.3414, "step": 8572 }, { "epoch": 2.7526087654519182, "grad_norm": 0.8166511654853821, "learning_rate": 1.2168304859734226e-05, "loss": 0.3156, "step": 8573 }, { "epoch": 2.75292984427677, "grad_norm": 0.7151662707328796, "learning_rate": 1.2151622113021787e-05, "loss": 0.3031, "step": 8574 }, { "epoch": 2.7532509231016213, "grad_norm": 0.7391020655632019, "learning_rate": 1.2134950070367723e-05, "loss": 0.2881, "step": 8575 }, { "epoch": 2.753572001926473, "grad_norm": 0.8440263867378235, "learning_rate": 1.2118288733803473e-05, "loss": 0.3104, "step": 8576 }, { "epoch": 2.7538930807513244, "grad_norm": 0.8688570261001587, "learning_rate": 1.2101638105359169e-05, "loss": 0.3327, "step": 8577 }, { "epoch": 2.754214159576176, "grad_norm": 0.35422104597091675, "learning_rate": 1.2084998187063613e-05, "loss": 0.2317, "step": 8578 }, { "epoch": 2.7545352384010275, "grad_norm": 0.4497572183609009, "learning_rate": 1.206836898094439e-05, "loss": 0.2845, "step": 8579 }, { "epoch": 2.754856317225879, "grad_norm": 0.48799851536750793, "learning_rate": 1.2051750489027647e-05, "loss": 0.6289, "step": 8580 }, { "epoch": 2.7551773960507306, "grad_norm": 0.438763827085495, "learning_rate": 1.2035142713338366e-05, "loss": 0.5615, "step": 8581 }, { "epoch": 2.7554984748755817, "grad_norm": 0.4711540639400482, "learning_rate": 1.2018545655900081e-05, "loss": 0.3515, "step": 8582 }, { "epoch": 2.7558195537004333, "grad_norm": 0.43280723690986633, "learning_rate": 1.2001959318735156e-05, "loss": 0.16, "step": 8583 }, { "epoch": 2.756140632525285, "grad_norm": 0.3058044910430908, "learning_rate": 1.1985383703864583e-05, "loss": 0.1416, "step": 8584 }, { "epoch": 2.7564617113501364, "grad_norm": 0.15330089628696442, "learning_rate": 1.196881881330798e-05, "loss": 0.06, "step": 8585 }, { "epoch": 2.756782790174988, "grad_norm": 0.45798158645629883, "learning_rate": 1.19522646490838e-05, "loss": 0.1952, "step": 8586 }, { "epoch": 2.7571038689998395, "grad_norm": 0.8995585441589355, "learning_rate": 1.1935721213209105e-05, "loss": 0.6017, "step": 8587 }, { "epoch": 2.757424947824691, "grad_norm": 0.8658804297447205, "learning_rate": 1.191918850769964e-05, "loss": 0.4147, "step": 8588 }, { "epoch": 2.7577460266495426, "grad_norm": 0.7916271090507507, "learning_rate": 1.1902666534569883e-05, "loss": 0.3732, "step": 8589 }, { "epoch": 2.758067105474394, "grad_norm": 1.541236162185669, "learning_rate": 1.188615529583299e-05, "loss": 0.4061, "step": 8590 }, { "epoch": 2.7583881842992453, "grad_norm": 0.7521135210990906, "learning_rate": 1.1869654793500784e-05, "loss": 0.3043, "step": 8591 }, { "epoch": 2.758709263124097, "grad_norm": 0.7983594536781311, "learning_rate": 1.1853165029583823e-05, "loss": 0.3089, "step": 8592 }, { "epoch": 2.7590303419489484, "grad_norm": 0.6637342572212219, "learning_rate": 1.1836686006091313e-05, "loss": 0.2742, "step": 8593 }, { "epoch": 2.7593514207738, "grad_norm": 0.8131726384162903, "learning_rate": 1.1820217725031191e-05, "loss": 0.3027, "step": 8594 }, { "epoch": 2.7596724995986515, "grad_norm": 0.6360102891921997, "learning_rate": 1.1803760188410073e-05, "loss": 0.2646, "step": 8595 }, { "epoch": 2.759993578423503, "grad_norm": 0.7749152183532715, "learning_rate": 1.1787313398233235e-05, "loss": 0.3449, "step": 8596 }, { "epoch": 2.7603146572483546, "grad_norm": 0.9877920746803284, "learning_rate": 1.1770877356504683e-05, "loss": 0.3666, "step": 8597 }, { "epoch": 2.760635736073206, "grad_norm": 0.9534462094306946, "learning_rate": 1.1754452065227084e-05, "loss": 0.3528, "step": 8598 }, { "epoch": 2.7609568148980577, "grad_norm": 1.1205729246139526, "learning_rate": 1.1738037526401857e-05, "loss": 0.4691, "step": 8599 }, { "epoch": 2.7612778937229088, "grad_norm": 0.7297707200050354, "learning_rate": 1.1721633742028992e-05, "loss": 0.2858, "step": 8600 }, { "epoch": 2.7615989725477603, "grad_norm": 0.8408207297325134, "learning_rate": 1.1705240714107302e-05, "loss": 0.3679, "step": 8601 }, { "epoch": 2.761920051372612, "grad_norm": 1.0112425088882446, "learning_rate": 1.168885844463422e-05, "loss": 0.3732, "step": 8602 }, { "epoch": 2.7622411301974634, "grad_norm": 1.013698697090149, "learning_rate": 1.1672486935605831e-05, "loss": 0.3725, "step": 8603 }, { "epoch": 2.762562209022315, "grad_norm": 1.6599117517471313, "learning_rate": 1.1656126189017014e-05, "loss": 0.3795, "step": 8604 }, { "epoch": 2.7628832878471665, "grad_norm": 0.8416186571121216, "learning_rate": 1.1639776206861196e-05, "loss": 0.2958, "step": 8605 }, { "epoch": 2.763204366672018, "grad_norm": 0.892895519733429, "learning_rate": 1.1623436991130654e-05, "loss": 0.3505, "step": 8606 }, { "epoch": 2.7635254454968696, "grad_norm": 0.7338150143623352, "learning_rate": 1.1607108543816248e-05, "loss": 0.3204, "step": 8607 }, { "epoch": 2.763846524321721, "grad_norm": 0.9701830744743347, "learning_rate": 1.159079086690753e-05, "loss": 0.3527, "step": 8608 }, { "epoch": 2.7641676031465723, "grad_norm": 1.0651260614395142, "learning_rate": 1.1574483962392767e-05, "loss": 0.3822, "step": 8609 }, { "epoch": 2.764488681971424, "grad_norm": 0.7772603034973145, "learning_rate": 1.1558187832258926e-05, "loss": 0.3063, "step": 8610 }, { "epoch": 2.7648097607962754, "grad_norm": 0.5828967690467834, "learning_rate": 1.1541902478491606e-05, "loss": 0.2822, "step": 8611 }, { "epoch": 2.765130839621127, "grad_norm": 0.8578951954841614, "learning_rate": 1.1525627903075165e-05, "loss": 0.2906, "step": 8612 }, { "epoch": 2.7654519184459785, "grad_norm": 0.7797196507453918, "learning_rate": 1.1509364107992583e-05, "loss": 0.2938, "step": 8613 }, { "epoch": 2.76577299727083, "grad_norm": 0.892012894153595, "learning_rate": 1.1493111095225562e-05, "loss": 0.3103, "step": 8614 }, { "epoch": 2.7660940760956816, "grad_norm": 0.747068464756012, "learning_rate": 1.1476868866754486e-05, "loss": 0.2878, "step": 8615 }, { "epoch": 2.766415154920533, "grad_norm": 0.9010385274887085, "learning_rate": 1.1460637424558407e-05, "loss": 0.3162, "step": 8616 }, { "epoch": 2.7667362337453847, "grad_norm": 0.9162553548812866, "learning_rate": 1.1444416770615118e-05, "loss": 0.3087, "step": 8617 }, { "epoch": 2.767057312570236, "grad_norm": 0.7222566604614258, "learning_rate": 1.1428206906900995e-05, "loss": 0.2889, "step": 8618 }, { "epoch": 2.7673783913950873, "grad_norm": 0.5064111948013306, "learning_rate": 1.1412007835391236e-05, "loss": 0.2399, "step": 8619 }, { "epoch": 2.767699470219939, "grad_norm": 0.6242873072624207, "learning_rate": 1.1395819558059572e-05, "loss": 0.2545, "step": 8620 }, { "epoch": 2.7680205490447904, "grad_norm": 1.012289047241211, "learning_rate": 1.1379642076878527e-05, "loss": 0.3428, "step": 8621 }, { "epoch": 2.768341627869642, "grad_norm": 0.7770035862922668, "learning_rate": 1.1363475393819311e-05, "loss": 0.3126, "step": 8622 }, { "epoch": 2.7686627066944935, "grad_norm": 0.8441600799560547, "learning_rate": 1.1347319510851717e-05, "loss": 0.2974, "step": 8623 }, { "epoch": 2.768983785519345, "grad_norm": 1.1390506029129028, "learning_rate": 1.1331174429944347e-05, "loss": 0.3347, "step": 8624 }, { "epoch": 2.7693048643441966, "grad_norm": 0.9870839715003967, "learning_rate": 1.1315040153064416e-05, "loss": 0.4109, "step": 8625 }, { "epoch": 2.769625943169048, "grad_norm": 0.9025225043296814, "learning_rate": 1.129891668217783e-05, "loss": 0.3347, "step": 8626 }, { "epoch": 2.7699470219938993, "grad_norm": 0.6269119381904602, "learning_rate": 1.1282804019249182e-05, "loss": 0.2896, "step": 8627 }, { "epoch": 2.770268100818751, "grad_norm": 0.30881941318511963, "learning_rate": 1.1266702166241772e-05, "loss": 0.2314, "step": 8628 }, { "epoch": 2.7705891796436024, "grad_norm": 0.4127409756183624, "learning_rate": 1.1250611125117527e-05, "loss": 0.2654, "step": 8629 }, { "epoch": 2.770910258468454, "grad_norm": 0.49982887506484985, "learning_rate": 1.1234530897837126e-05, "loss": 0.7763, "step": 8630 }, { "epoch": 2.7712313372933055, "grad_norm": 0.3858592212200165, "learning_rate": 1.1218461486359877e-05, "loss": 0.3977, "step": 8631 }, { "epoch": 2.771552416118157, "grad_norm": 0.5631637573242188, "learning_rate": 1.1202402892643781e-05, "loss": 0.3626, "step": 8632 }, { "epoch": 2.7718734949430086, "grad_norm": 0.4585270881652832, "learning_rate": 1.1186355118645554e-05, "loss": 0.2111, "step": 8633 }, { "epoch": 2.77219457376786, "grad_norm": 0.4266338646411896, "learning_rate": 1.1170318166320548e-05, "loss": 0.1454, "step": 8634 }, { "epoch": 2.7725156525927117, "grad_norm": 0.3712063729763031, "learning_rate": 1.1154292037622838e-05, "loss": 0.1819, "step": 8635 }, { "epoch": 2.772836731417563, "grad_norm": 0.7830849289894104, "learning_rate": 1.1138276734505104e-05, "loss": 0.4185, "step": 8636 }, { "epoch": 2.7731578102424144, "grad_norm": 0.7869873046875, "learning_rate": 1.1122272258918865e-05, "loss": 0.3898, "step": 8637 }, { "epoch": 2.773478889067266, "grad_norm": 0.9816368222236633, "learning_rate": 1.1106278612814125e-05, "loss": 0.3876, "step": 8638 }, { "epoch": 2.7737999678921175, "grad_norm": 0.8416398167610168, "learning_rate": 1.1090295798139672e-05, "loss": 0.3797, "step": 8639 }, { "epoch": 2.774121046716969, "grad_norm": 0.7030868530273438, "learning_rate": 1.1074323816843024e-05, "loss": 0.3055, "step": 8640 }, { "epoch": 2.7744421255418206, "grad_norm": 0.6922499537467957, "learning_rate": 1.1058362670870249e-05, "loss": 0.2845, "step": 8641 }, { "epoch": 2.774763204366672, "grad_norm": 0.6932337284088135, "learning_rate": 1.1042412362166222e-05, "loss": 0.3024, "step": 8642 }, { "epoch": 2.7750842831915237, "grad_norm": 0.6935373544692993, "learning_rate": 1.1026472892674378e-05, "loss": 0.2872, "step": 8643 }, { "epoch": 2.775405362016375, "grad_norm": 0.7587444186210632, "learning_rate": 1.1010544264336942e-05, "loss": 0.3455, "step": 8644 }, { "epoch": 2.7757264408412263, "grad_norm": 0.6532779335975647, "learning_rate": 1.099462647909475e-05, "loss": 0.2364, "step": 8645 }, { "epoch": 2.776047519666078, "grad_norm": 0.923891007900238, "learning_rate": 1.0978719538887349e-05, "loss": 0.3352, "step": 8646 }, { "epoch": 2.7763685984909294, "grad_norm": 0.6212542057037354, "learning_rate": 1.0962823445652959e-05, "loss": 0.2283, "step": 8647 }, { "epoch": 2.776689677315781, "grad_norm": 1.0696156024932861, "learning_rate": 1.0946938201328416e-05, "loss": 0.4078, "step": 8648 }, { "epoch": 2.7770107561406325, "grad_norm": 0.7603042125701904, "learning_rate": 1.093106380784934e-05, "loss": 0.324, "step": 8649 }, { "epoch": 2.777331834965484, "grad_norm": 0.9371538162231445, "learning_rate": 1.0915200267149972e-05, "loss": 0.392, "step": 8650 }, { "epoch": 2.7776529137903356, "grad_norm": 0.7637982368469238, "learning_rate": 1.0899347581163221e-05, "loss": 0.3077, "step": 8651 }, { "epoch": 2.777973992615187, "grad_norm": 1.0060869455337524, "learning_rate": 1.08835057518207e-05, "loss": 0.3866, "step": 8652 }, { "epoch": 2.7782950714400387, "grad_norm": 0.8908117413520813, "learning_rate": 1.0867674781052684e-05, "loss": 0.3209, "step": 8653 }, { "epoch": 2.77861615026489, "grad_norm": 1.2514095306396484, "learning_rate": 1.0851854670788108e-05, "loss": 0.422, "step": 8654 }, { "epoch": 2.7789372290897414, "grad_norm": 0.9697837233543396, "learning_rate": 1.0836045422954666e-05, "loss": 0.3297, "step": 8655 }, { "epoch": 2.779258307914593, "grad_norm": 0.8684007525444031, "learning_rate": 1.0820247039478604e-05, "loss": 0.3689, "step": 8656 }, { "epoch": 2.7795793867394445, "grad_norm": 1.1245943307876587, "learning_rate": 1.0804459522284926e-05, "loss": 0.3227, "step": 8657 }, { "epoch": 2.779900465564296, "grad_norm": 0.9986475706100464, "learning_rate": 1.0788682873297307e-05, "loss": 0.3325, "step": 8658 }, { "epoch": 2.7802215443891476, "grad_norm": 0.8092026114463806, "learning_rate": 1.0772917094438051e-05, "loss": 0.3339, "step": 8659 }, { "epoch": 2.780542623213999, "grad_norm": 0.6285796165466309, "learning_rate": 1.0757162187628222e-05, "loss": 0.2289, "step": 8660 }, { "epoch": 2.7808637020388507, "grad_norm": 0.81215900182724, "learning_rate": 1.0741418154787442e-05, "loss": 0.3342, "step": 8661 }, { "epoch": 2.7811847808637022, "grad_norm": 0.7589025497436523, "learning_rate": 1.0725684997834162e-05, "loss": 0.3424, "step": 8662 }, { "epoch": 2.7815058596885534, "grad_norm": 0.7941707968711853, "learning_rate": 1.0709962718685318e-05, "loss": 0.342, "step": 8663 }, { "epoch": 2.781826938513405, "grad_norm": 1.3057998418807983, "learning_rate": 1.0694251319256687e-05, "loss": 0.3437, "step": 8664 }, { "epoch": 2.7821480173382565, "grad_norm": 1.0559587478637695, "learning_rate": 1.067855080146266e-05, "loss": 0.3157, "step": 8665 }, { "epoch": 2.782469096163108, "grad_norm": 1.1683728694915771, "learning_rate": 1.0662861167216243e-05, "loss": 0.3433, "step": 8666 }, { "epoch": 2.7827901749879596, "grad_norm": 0.8493248820304871, "learning_rate": 1.0647182418429225e-05, "loss": 0.3193, "step": 8667 }, { "epoch": 2.783111253812811, "grad_norm": 1.0568920373916626, "learning_rate": 1.063151455701199e-05, "loss": 0.3286, "step": 8668 }, { "epoch": 2.7834323326376627, "grad_norm": NaN, "learning_rate": 1.063151455701199e-05, "loss": 0.3763, "step": 8669 }, { "epoch": 2.783753411462514, "grad_norm": 0.562329888343811, "learning_rate": 1.0615857584873623e-05, "loss": 0.2469, "step": 8670 }, { "epoch": 2.7840744902873658, "grad_norm": 1.2342668771743774, "learning_rate": 1.0600211503921887e-05, "loss": 0.4084, "step": 8671 }, { "epoch": 2.784395569112217, "grad_norm": 0.6708762645721436, "learning_rate": 1.0584576316063188e-05, "loss": 0.2886, "step": 8672 }, { "epoch": 2.7847166479370684, "grad_norm": 0.7978615164756775, "learning_rate": 1.0568952023202638e-05, "loss": 0.3324, "step": 8673 }, { "epoch": 2.78503772676192, "grad_norm": 0.4819890260696411, "learning_rate": 1.0553338627244025e-05, "loss": 0.2395, "step": 8674 }, { "epoch": 2.7853588055867715, "grad_norm": 0.7550230026245117, "learning_rate": 1.053773613008977e-05, "loss": 0.2846, "step": 8675 }, { "epoch": 2.785679884411623, "grad_norm": 0.5349231958389282, "learning_rate": 1.0522144533640998e-05, "loss": 0.2853, "step": 8676 }, { "epoch": 2.7860009632364746, "grad_norm": 1.028280258178711, "learning_rate": 1.0506563839797501e-05, "loss": 0.3143, "step": 8677 }, { "epoch": 2.786322042061326, "grad_norm": 0.6815648674964905, "learning_rate": 1.0490994050457748e-05, "loss": 0.2822, "step": 8678 }, { "epoch": 2.7866431208861777, "grad_norm": 0.6263010501861572, "learning_rate": 1.0475435167518843e-05, "loss": 0.3049, "step": 8679 }, { "epoch": 2.7869641997110293, "grad_norm": 0.5349334478378296, "learning_rate": 1.0459887192876594e-05, "loss": 0.6824, "step": 8680 }, { "epoch": 2.7872852785358804, "grad_norm": 0.4282764494419098, "learning_rate": 1.0444350128425529e-05, "loss": 0.4408, "step": 8681 }, { "epoch": 2.787606357360732, "grad_norm": 0.4315713346004486, "learning_rate": 1.042882397605871e-05, "loss": 0.4115, "step": 8682 }, { "epoch": 2.7879274361855835, "grad_norm": 0.4827350974082947, "learning_rate": 1.0413308737668005e-05, "loss": 0.1461, "step": 8683 }, { "epoch": 2.788248515010435, "grad_norm": 0.28072378039360046, "learning_rate": 1.0397804415143909e-05, "loss": 0.1271, "step": 8684 }, { "epoch": 2.7885695938352866, "grad_norm": 0.3307070732116699, "learning_rate": 1.0382311010375512e-05, "loss": 0.1209, "step": 8685 }, { "epoch": 2.788890672660138, "grad_norm": 0.18453341722488403, "learning_rate": 1.0366828525250726e-05, "loss": 0.0659, "step": 8686 }, { "epoch": 2.7892117514849897, "grad_norm": 0.5584306716918945, "learning_rate": 1.0351356961655945e-05, "loss": 0.339, "step": 8687 }, { "epoch": 2.7895328303098412, "grad_norm": 0.9177886247634888, "learning_rate": 1.0335896321476413e-05, "loss": 0.4866, "step": 8688 }, { "epoch": 2.7898539091346928, "grad_norm": 1.1755162477493286, "learning_rate": 1.0320446606595934e-05, "loss": 0.5634, "step": 8689 }, { "epoch": 2.790174987959544, "grad_norm": 0.9318304657936096, "learning_rate": 1.0305007818897006e-05, "loss": 0.3891, "step": 8690 }, { "epoch": 2.7904960667843954, "grad_norm": 0.7948139309883118, "learning_rate": 1.028957996026081e-05, "loss": 0.3518, "step": 8691 }, { "epoch": 2.790817145609247, "grad_norm": 0.9322606921195984, "learning_rate": 1.0274163032567163e-05, "loss": 0.3725, "step": 8692 }, { "epoch": 2.7911382244340985, "grad_norm": 0.8909361958503723, "learning_rate": 1.0258757037694589e-05, "loss": 0.2977, "step": 8693 }, { "epoch": 2.79145930325895, "grad_norm": 0.964710533618927, "learning_rate": 1.0243361977520249e-05, "loss": 0.4172, "step": 8694 }, { "epoch": 2.7917803820838016, "grad_norm": 0.9714322686195374, "learning_rate": 1.022797785392e-05, "loss": 0.3906, "step": 8695 }, { "epoch": 2.792101460908653, "grad_norm": 0.7900899648666382, "learning_rate": 1.0212604668768343e-05, "loss": 0.2676, "step": 8696 }, { "epoch": 2.7924225397335047, "grad_norm": 0.7134426236152649, "learning_rate": 1.0197242423938446e-05, "loss": 0.2615, "step": 8697 }, { "epoch": 2.7927436185583563, "grad_norm": 0.9049974679946899, "learning_rate": 1.0181891121302145e-05, "loss": 0.2837, "step": 8698 }, { "epoch": 2.7930646973832074, "grad_norm": 0.9333779215812683, "learning_rate": 1.0166550762729998e-05, "loss": 0.2789, "step": 8699 }, { "epoch": 2.793385776208059, "grad_norm": 0.8934162855148315, "learning_rate": 1.0151221350091134e-05, "loss": 0.3497, "step": 8700 }, { "epoch": 2.7937068550329105, "grad_norm": 0.6106538772583008, "learning_rate": 1.0135902885253401e-05, "loss": 0.2565, "step": 8701 }, { "epoch": 2.794027933857762, "grad_norm": 0.990350067615509, "learning_rate": 1.0120595370083318e-05, "loss": 0.4381, "step": 8702 }, { "epoch": 2.7943490126826136, "grad_norm": 0.8355160355567932, "learning_rate": 1.0105298806446028e-05, "loss": 0.3453, "step": 8703 }, { "epoch": 2.794670091507465, "grad_norm": 0.881846010684967, "learning_rate": 1.009001319620545e-05, "loss": 0.2857, "step": 8704 }, { "epoch": 2.7949911703323167, "grad_norm": 0.8830981254577637, "learning_rate": 1.0074738541223993e-05, "loss": 0.327, "step": 8705 }, { "epoch": 2.7953122491571682, "grad_norm": 0.9528623819351196, "learning_rate": 1.0059474843362892e-05, "loss": 0.3359, "step": 8706 }, { "epoch": 2.79563332798202, "grad_norm": 0.9913058280944824, "learning_rate": 1.0044222104481971e-05, "loss": 0.3238, "step": 8707 }, { "epoch": 2.795954406806871, "grad_norm": 1.4466516971588135, "learning_rate": 1.0028980326439707e-05, "loss": 0.5732, "step": 8708 }, { "epoch": 2.7962754856317225, "grad_norm": 0.9511708617210388, "learning_rate": 1.0013749511093307e-05, "loss": 0.3061, "step": 8709 }, { "epoch": 2.796596564456574, "grad_norm": 0.5760300755500793, "learning_rate": 9.998529660298539e-06, "loss": 0.2568, "step": 8710 }, { "epoch": 2.7969176432814256, "grad_norm": 0.8406627774238586, "learning_rate": 9.983320775909933e-06, "loss": 0.3046, "step": 8711 }, { "epoch": 2.797238722106277, "grad_norm": 0.9190598726272583, "learning_rate": 9.968122859780648e-06, "loss": 0.3231, "step": 8712 }, { "epoch": 2.7975598009311287, "grad_norm": 0.6795236468315125, "learning_rate": 9.952935913762506e-06, "loss": 0.247, "step": 8713 }, { "epoch": 2.79788087975598, "grad_norm": 0.6721789240837097, "learning_rate": 9.937759939705971e-06, "loss": 0.2513, "step": 8714 }, { "epoch": 2.7982019585808313, "grad_norm": 1.041229009628296, "learning_rate": 9.922594939460194e-06, "loss": 0.3641, "step": 8715 }, { "epoch": 2.7985230374056833, "grad_norm": 1.105242371559143, "learning_rate": 9.907440914873e-06, "loss": 0.3677, "step": 8716 }, { "epoch": 2.7988441162305344, "grad_norm": 0.7640735507011414, "learning_rate": 9.892297867790845e-06, "loss": 0.2991, "step": 8717 }, { "epoch": 2.799165195055386, "grad_norm": 0.6073140501976013, "learning_rate": 9.877165800058874e-06, "loss": 0.2511, "step": 8718 }, { "epoch": 2.7994862738802375, "grad_norm": 0.9953632950782776, "learning_rate": 9.86204471352088e-06, "loss": 0.365, "step": 8719 }, { "epoch": 2.799807352705089, "grad_norm": 0.6479620337486267, "learning_rate": 9.84693461001932e-06, "loss": 0.2704, "step": 8720 }, { "epoch": 2.8001284315299406, "grad_norm": 0.9347096681594849, "learning_rate": 9.831835491395292e-06, "loss": 0.3145, "step": 8721 }, { "epoch": 2.800449510354792, "grad_norm": 0.5598117113113403, "learning_rate": 9.816747359488632e-06, "loss": 0.2696, "step": 8722 }, { "epoch": 2.8007705891796437, "grad_norm": 0.6550912857055664, "learning_rate": 9.801670216137727e-06, "loss": 0.2139, "step": 8723 }, { "epoch": 2.801091668004495, "grad_norm": 0.6343926787376404, "learning_rate": 9.786604063179728e-06, "loss": 0.2569, "step": 8724 }, { "epoch": 2.801412746829347, "grad_norm": 1.2895246744155884, "learning_rate": 9.771548902450357e-06, "loss": 0.3091, "step": 8725 }, { "epoch": 2.801733825654198, "grad_norm": 0.6956881880760193, "learning_rate": 9.756504735784067e-06, "loss": 0.3224, "step": 8726 }, { "epoch": 2.8020549044790495, "grad_norm": 0.8281194567680359, "learning_rate": 9.74147156501396e-06, "loss": 0.3129, "step": 8727 }, { "epoch": 2.802375983303901, "grad_norm": 0.6391897201538086, "learning_rate": 9.726449391971714e-06, "loss": 0.3007, "step": 8728 }, { "epoch": 2.8026970621287526, "grad_norm": 0.5030215978622437, "learning_rate": 9.711438218487834e-06, "loss": 0.2709, "step": 8729 }, { "epoch": 2.803018140953604, "grad_norm": 0.5472492575645447, "learning_rate": 9.696438046391288e-06, "loss": 0.7123, "step": 8730 }, { "epoch": 2.8033392197784557, "grad_norm": 0.44840529561042786, "learning_rate": 9.681448877509858e-06, "loss": 0.4614, "step": 8731 }, { "epoch": 2.8036602986033072, "grad_norm": 0.39549553394317627, "learning_rate": 9.666470713669918e-06, "loss": 0.3304, "step": 8732 }, { "epoch": 2.8039813774281583, "grad_norm": 0.3779802918434143, "learning_rate": 9.651503556696516e-06, "loss": 0.1788, "step": 8733 }, { "epoch": 2.8043024562530103, "grad_norm": 0.1729205846786499, "learning_rate": 9.636547408413355e-06, "loss": 0.0618, "step": 8734 }, { "epoch": 2.8046235350778614, "grad_norm": 0.44540268182754517, "learning_rate": 9.621602270642781e-06, "loss": 0.1766, "step": 8735 }, { "epoch": 2.804944613902713, "grad_norm": 0.7657910585403442, "learning_rate": 9.606668145205833e-06, "loss": 0.4684, "step": 8736 }, { "epoch": 2.8052656927275645, "grad_norm": 0.8212659358978271, "learning_rate": 9.591745033922173e-06, "loss": 0.384, "step": 8737 }, { "epoch": 2.805586771552416, "grad_norm": 0.8744103908538818, "learning_rate": 9.576832938610137e-06, "loss": 0.338, "step": 8738 }, { "epoch": 2.8059078503772676, "grad_norm": 0.7278825640678406, "learning_rate": 9.561931861086737e-06, "loss": 0.2896, "step": 8739 }, { "epoch": 2.806228929202119, "grad_norm": 0.5598768591880798, "learning_rate": 9.5470418031676e-06, "loss": 0.2411, "step": 8740 }, { "epoch": 2.8065500080269707, "grad_norm": 0.7483347058296204, "learning_rate": 9.532162766667042e-06, "loss": 0.2827, "step": 8741 }, { "epoch": 2.806871086851822, "grad_norm": 0.7367462515830994, "learning_rate": 9.517294753398064e-06, "loss": 0.3007, "step": 8742 }, { "epoch": 2.807192165676674, "grad_norm": 0.6921778917312622, "learning_rate": 9.502437765172212e-06, "loss": 0.2562, "step": 8743 }, { "epoch": 2.807513244501525, "grad_norm": 0.8599756956100464, "learning_rate": 9.487591803799856e-06, "loss": 0.3628, "step": 8744 }, { "epoch": 2.8078343233263765, "grad_norm": 0.8034688234329224, "learning_rate": 9.47275687108986e-06, "loss": 0.3219, "step": 8745 }, { "epoch": 2.808155402151228, "grad_norm": 1.0342375040054321, "learning_rate": 9.457932968849825e-06, "loss": 0.3947, "step": 8746 }, { "epoch": 2.8084764809760796, "grad_norm": 1.0900487899780273, "learning_rate": 9.443120098886061e-06, "loss": 0.357, "step": 8747 }, { "epoch": 2.808797559800931, "grad_norm": 0.7890565991401672, "learning_rate": 9.428318263003378e-06, "loss": 0.2979, "step": 8748 }, { "epoch": 2.8091186386257827, "grad_norm": 1.0363215208053589, "learning_rate": 9.4135274630054e-06, "loss": 0.4129, "step": 8749 }, { "epoch": 2.8094397174506343, "grad_norm": 0.8779375553131104, "learning_rate": 9.398747700694322e-06, "loss": 0.2943, "step": 8750 }, { "epoch": 2.8097607962754854, "grad_norm": 1.0878396034240723, "learning_rate": 9.383978977871021e-06, "loss": 0.3559, "step": 8751 }, { "epoch": 2.8100818751003374, "grad_norm": 0.8391216397285461, "learning_rate": 9.369221296335006e-06, "loss": 0.3069, "step": 8752 }, { "epoch": 2.8104029539251885, "grad_norm": 0.6143542528152466, "learning_rate": 9.354474657884472e-06, "loss": 0.225, "step": 8753 }, { "epoch": 2.81072403275004, "grad_norm": 0.9501350522041321, "learning_rate": 9.339739064316233e-06, "loss": 0.3666, "step": 8754 }, { "epoch": 2.8110451115748916, "grad_norm": 1.024901032447815, "learning_rate": 9.32501451742579e-06, "loss": 0.3373, "step": 8755 }, { "epoch": 2.811366190399743, "grad_norm": 1.0849446058273315, "learning_rate": 9.310301019007285e-06, "loss": 0.3409, "step": 8756 }, { "epoch": 2.8116872692245947, "grad_norm": 0.8314627408981323, "learning_rate": 9.295598570853514e-06, "loss": 0.3095, "step": 8757 }, { "epoch": 2.812008348049446, "grad_norm": 0.5649183392524719, "learning_rate": 9.280907174755915e-06, "loss": 0.2587, "step": 8758 }, { "epoch": 2.8123294268742978, "grad_norm": 1.2007514238357544, "learning_rate": 9.266226832504598e-06, "loss": 0.4184, "step": 8759 }, { "epoch": 2.812650505699149, "grad_norm": 0.562286376953125, "learning_rate": 9.251557545888312e-06, "loss": 0.2122, "step": 8760 }, { "epoch": 2.812971584524001, "grad_norm": 1.045385479927063, "learning_rate": 9.236899316694459e-06, "loss": 0.3434, "step": 8761 }, { "epoch": 2.813292663348852, "grad_norm": 1.0073976516723633, "learning_rate": 9.222252146709142e-06, "loss": 0.334, "step": 8762 }, { "epoch": 2.8136137421737035, "grad_norm": 0.5607120990753174, "learning_rate": 9.207616037717025e-06, "loss": 0.2412, "step": 8763 }, { "epoch": 2.813934820998555, "grad_norm": 0.7444592714309692, "learning_rate": 9.192990991501482e-06, "loss": 0.2799, "step": 8764 }, { "epoch": 2.8142558998234066, "grad_norm": 0.9598581194877625, "learning_rate": 9.178377009844563e-06, "loss": 0.3674, "step": 8765 }, { "epoch": 2.814576978648258, "grad_norm": 0.42026737332344055, "learning_rate": 9.163774094526889e-06, "loss": 0.2069, "step": 8766 }, { "epoch": 2.8148980574731097, "grad_norm": 0.8369678258895874, "learning_rate": 9.149182247327837e-06, "loss": 0.3303, "step": 8767 }, { "epoch": 2.8152191362979613, "grad_norm": 0.8033504486083984, "learning_rate": 9.134601470025306e-06, "loss": 0.2976, "step": 8768 }, { "epoch": 2.8155402151228124, "grad_norm": 0.7782095670700073, "learning_rate": 9.120031764395987e-06, "loss": 0.2766, "step": 8769 }, { "epoch": 2.8158612939476644, "grad_norm": 0.6862573623657227, "learning_rate": 9.105473132215125e-06, "loss": 0.2771, "step": 8770 }, { "epoch": 2.8161823727725155, "grad_norm": 1.1740379333496094, "learning_rate": 9.09092557525666e-06, "loss": 0.3947, "step": 8771 }, { "epoch": 2.816503451597367, "grad_norm": 1.0018607378005981, "learning_rate": 9.076389095293148e-06, "loss": 0.4111, "step": 8772 }, { "epoch": 2.8168245304222186, "grad_norm": 0.7517522573471069, "learning_rate": 9.061863694095828e-06, "loss": 0.2955, "step": 8773 }, { "epoch": 2.81714560924707, "grad_norm": 1.8710321187973022, "learning_rate": 9.047349373434566e-06, "loss": 0.2693, "step": 8774 }, { "epoch": 2.8174666880719217, "grad_norm": 0.8470088839530945, "learning_rate": 9.0328461350779e-06, "loss": 0.3039, "step": 8775 }, { "epoch": 2.8177877668967732, "grad_norm": 0.5964589715003967, "learning_rate": 9.018353980792993e-06, "loss": 0.2547, "step": 8776 }, { "epoch": 2.818108845721625, "grad_norm": 0.8444490432739258, "learning_rate": 9.00387291234569e-06, "loss": 0.3071, "step": 8777 }, { "epoch": 2.818429924546476, "grad_norm": 0.41703271865844727, "learning_rate": 8.989402931500434e-06, "loss": 0.2554, "step": 8778 }, { "epoch": 2.818751003371328, "grad_norm": 0.3994010090827942, "learning_rate": 8.974944040020362e-06, "loss": 0.2379, "step": 8779 }, { "epoch": 2.819072082196179, "grad_norm": 0.443518728017807, "learning_rate": 8.960496239667282e-06, "loss": 0.5404, "step": 8780 }, { "epoch": 2.8193931610210305, "grad_norm": 0.3758715093135834, "learning_rate": 8.946059532201567e-06, "loss": 0.3376, "step": 8781 }, { "epoch": 2.819714239845882, "grad_norm": 0.3072349727153778, "learning_rate": 8.931633919382298e-06, "loss": 0.1223, "step": 8782 }, { "epoch": 2.8200353186707336, "grad_norm": 0.3410075306892395, "learning_rate": 8.917219402967202e-06, "loss": 0.11, "step": 8783 }, { "epoch": 2.820356397495585, "grad_norm": 0.1781407743692398, "learning_rate": 8.90281598471262e-06, "loss": 0.0609, "step": 8784 }, { "epoch": 2.8206774763204367, "grad_norm": 0.3771248161792755, "learning_rate": 8.888423666373614e-06, "loss": 0.1744, "step": 8785 }, { "epoch": 2.8209985551452883, "grad_norm": 0.8106180429458618, "learning_rate": 8.87404244970378e-06, "loss": 0.4395, "step": 8786 }, { "epoch": 2.8213196339701394, "grad_norm": 0.7213160991668701, "learning_rate": 8.85967233645547e-06, "loss": 0.3335, "step": 8787 }, { "epoch": 2.8216407127949914, "grad_norm": 0.8492469787597656, "learning_rate": 8.845313328379634e-06, "loss": 0.401, "step": 8788 }, { "epoch": 2.8219617916198425, "grad_norm": 1.2384346723556519, "learning_rate": 8.830965427225868e-06, "loss": 0.3545, "step": 8789 }, { "epoch": 2.822282870444694, "grad_norm": 0.8457659482955933, "learning_rate": 8.816628634742441e-06, "loss": 0.3346, "step": 8790 }, { "epoch": 2.8226039492695456, "grad_norm": 0.9293232560157776, "learning_rate": 8.80230295267619e-06, "loss": 0.3992, "step": 8791 }, { "epoch": 2.822925028094397, "grad_norm": 0.7842317819595337, "learning_rate": 8.787988382772705e-06, "loss": 0.355, "step": 8792 }, { "epoch": 2.8232461069192487, "grad_norm": 0.9196096062660217, "learning_rate": 8.77368492677616e-06, "loss": 0.3592, "step": 8793 }, { "epoch": 2.8235671857441003, "grad_norm": 0.7270289659500122, "learning_rate": 8.759392586429393e-06, "loss": 0.3004, "step": 8794 }, { "epoch": 2.823888264568952, "grad_norm": 0.7461056709289551, "learning_rate": 8.745111363473868e-06, "loss": 0.2673, "step": 8795 }, { "epoch": 2.824209343393803, "grad_norm": 0.9872536659240723, "learning_rate": 8.730841259649725e-06, "loss": 0.3072, "step": 8796 }, { "epoch": 2.824530422218655, "grad_norm": 0.9411637783050537, "learning_rate": 8.716582276695728e-06, "loss": 0.3584, "step": 8797 }, { "epoch": 2.824851501043506, "grad_norm": 1.1243722438812256, "learning_rate": 8.702334416349278e-06, "loss": 0.4163, "step": 8798 }, { "epoch": 2.8251725798683576, "grad_norm": 0.9346780776977539, "learning_rate": 8.688097680346453e-06, "loss": 0.3681, "step": 8799 }, { "epoch": 2.825493658693209, "grad_norm": 1.0615766048431396, "learning_rate": 8.67387207042194e-06, "loss": 0.4532, "step": 8800 }, { "epoch": 2.8258147375180607, "grad_norm": 0.979707658290863, "learning_rate": 8.6596575883091e-06, "loss": 0.3883, "step": 8801 }, { "epoch": 2.826135816342912, "grad_norm": 0.7535959482192993, "learning_rate": 8.645454235739903e-06, "loss": 0.2761, "step": 8802 }, { "epoch": 2.8264568951677638, "grad_norm": 0.7734266519546509, "learning_rate": 8.63126201444503e-06, "loss": 0.2963, "step": 8803 }, { "epoch": 2.8267779739926153, "grad_norm": 0.7798780202865601, "learning_rate": 8.617080926153698e-06, "loss": 0.3009, "step": 8804 }, { "epoch": 2.8270990528174664, "grad_norm": 0.9741690158843994, "learning_rate": 8.602910972593892e-06, "loss": 0.3527, "step": 8805 }, { "epoch": 2.8274201316423184, "grad_norm": 0.9816387891769409, "learning_rate": 8.588752155492119e-06, "loss": 0.3465, "step": 8806 }, { "epoch": 2.8277412104671695, "grad_norm": 0.7801307439804077, "learning_rate": 8.574604476573621e-06, "loss": 0.292, "step": 8807 }, { "epoch": 2.828062289292021, "grad_norm": 0.5235636234283447, "learning_rate": 8.560467937562277e-06, "loss": 0.2318, "step": 8808 }, { "epoch": 2.8283833681168726, "grad_norm": 0.6542462110519409, "learning_rate": 8.546342540180508e-06, "loss": 0.2817, "step": 8809 }, { "epoch": 2.828704446941724, "grad_norm": 0.6339287161827087, "learning_rate": 8.532228286149501e-06, "loss": 0.2691, "step": 8810 }, { "epoch": 2.8290255257665757, "grad_norm": 0.9842689633369446, "learning_rate": 8.51812517718904e-06, "loss": 0.3822, "step": 8811 }, { "epoch": 2.8293466045914273, "grad_norm": 0.6883869767189026, "learning_rate": 8.504033215017527e-06, "loss": 0.3071, "step": 8812 }, { "epoch": 2.829667683416279, "grad_norm": 1.0689724683761597, "learning_rate": 8.489952401352019e-06, "loss": 0.4108, "step": 8813 }, { "epoch": 2.82998876224113, "grad_norm": 0.9899790287017822, "learning_rate": 8.475882737908248e-06, "loss": 0.3652, "step": 8814 }, { "epoch": 2.830309841065982, "grad_norm": 0.8097178936004639, "learning_rate": 8.46182422640054e-06, "loss": 0.3144, "step": 8815 }, { "epoch": 2.830630919890833, "grad_norm": 0.8424767255783081, "learning_rate": 8.447776868541879e-06, "loss": 0.2967, "step": 8816 }, { "epoch": 2.8309519987156846, "grad_norm": 0.7293074727058411, "learning_rate": 8.433740666043898e-06, "loss": 0.2959, "step": 8817 }, { "epoch": 2.831273077540536, "grad_norm": 0.6922279000282288, "learning_rate": 8.419715620616874e-06, "loss": 0.2773, "step": 8818 }, { "epoch": 2.8315941563653877, "grad_norm": 0.5553762316703796, "learning_rate": 8.405701733969706e-06, "loss": 0.2382, "step": 8819 }, { "epoch": 2.8319152351902392, "grad_norm": 0.7001710534095764, "learning_rate": 8.39169900780995e-06, "loss": 0.2947, "step": 8820 }, { "epoch": 2.832236314015091, "grad_norm": 0.524242639541626, "learning_rate": 8.377707443843786e-06, "loss": 0.2546, "step": 8821 }, { "epoch": 2.8325573928399423, "grad_norm": 0.7304323315620422, "learning_rate": 8.363727043776038e-06, "loss": 0.311, "step": 8822 }, { "epoch": 2.8328784716647935, "grad_norm": 0.5915831923484802, "learning_rate": 8.34975780931021e-06, "loss": 0.2488, "step": 8823 }, { "epoch": 2.8331995504896454, "grad_norm": 0.8694709539413452, "learning_rate": 8.335799742148387e-06, "loss": 0.368, "step": 8824 }, { "epoch": 2.8335206293144966, "grad_norm": 0.6131924986839294, "learning_rate": 8.321852843991295e-06, "loss": 0.2581, "step": 8825 }, { "epoch": 2.833841708139348, "grad_norm": 0.42285412549972534, "learning_rate": 8.307917116538378e-06, "loss": 0.2609, "step": 8826 }, { "epoch": 2.8341627869641997, "grad_norm": 0.6987262964248657, "learning_rate": 8.293992561487596e-06, "loss": 0.309, "step": 8827 }, { "epoch": 2.834483865789051, "grad_norm": 0.8132457137107849, "learning_rate": 8.280079180535672e-06, "loss": 0.2866, "step": 8828 }, { "epoch": 2.8348049446139028, "grad_norm": 0.3915092647075653, "learning_rate": 8.26617697537786e-06, "loss": 0.2629, "step": 8829 }, { "epoch": 2.8351260234387543, "grad_norm": 0.4772132337093353, "learning_rate": 8.252285947708139e-06, "loss": 0.5484, "step": 8830 }, { "epoch": 2.835447102263606, "grad_norm": 0.39662283658981323, "learning_rate": 8.238406099219077e-06, "loss": 0.3962, "step": 8831 }, { "epoch": 2.835768181088457, "grad_norm": 0.4367140233516693, "learning_rate": 8.224537431601886e-06, "loss": 0.319, "step": 8832 }, { "epoch": 2.836089259913309, "grad_norm": 0.6027937531471252, "learning_rate": 8.21067994654644e-06, "loss": 0.2027, "step": 8833 }, { "epoch": 2.83641033873816, "grad_norm": 0.3798494040966034, "learning_rate": 8.196833645741186e-06, "loss": 0.155, "step": 8834 }, { "epoch": 2.8367314175630116, "grad_norm": 0.2580123841762543, "learning_rate": 8.182998530873298e-06, "loss": 0.1179, "step": 8835 }, { "epoch": 2.837052496387863, "grad_norm": 0.6310969591140747, "learning_rate": 8.169174603628538e-06, "loss": 0.3001, "step": 8836 }, { "epoch": 2.8373735752127147, "grad_norm": 0.7293077707290649, "learning_rate": 8.15536186569129e-06, "loss": 0.3546, "step": 8837 }, { "epoch": 2.8376946540375663, "grad_norm": 0.7130358219146729, "learning_rate": 8.1415603187446e-06, "loss": 0.2964, "step": 8838 }, { "epoch": 2.838015732862418, "grad_norm": 0.7875835299491882, "learning_rate": 8.127769964470156e-06, "loss": 0.339, "step": 8839 }, { "epoch": 2.8383368116872694, "grad_norm": 0.784997820854187, "learning_rate": 8.113990804548244e-06, "loss": 0.3349, "step": 8840 }, { "epoch": 2.8386578905121205, "grad_norm": 0.6344702839851379, "learning_rate": 8.100222840657878e-06, "loss": 0.2666, "step": 8841 }, { "epoch": 2.8389789693369725, "grad_norm": 0.984622061252594, "learning_rate": 8.086466074476563e-06, "loss": 0.432, "step": 8842 }, { "epoch": 2.8393000481618236, "grad_norm": 0.8080422282218933, "learning_rate": 8.072720507680565e-06, "loss": 0.3034, "step": 8843 }, { "epoch": 2.839621126986675, "grad_norm": 0.9881112575531006, "learning_rate": 8.058986141944724e-06, "loss": 0.3383, "step": 8844 }, { "epoch": 2.8399422058115267, "grad_norm": 0.5864750742912292, "learning_rate": 8.045262978942513e-06, "loss": 0.2296, "step": 8845 }, { "epoch": 2.8402632846363782, "grad_norm": 0.9349332451820374, "learning_rate": 8.031551020346128e-06, "loss": 0.3404, "step": 8846 }, { "epoch": 2.8405843634612298, "grad_norm": 1.3247158527374268, "learning_rate": 8.017850267826232e-06, "loss": 0.4181, "step": 8847 }, { "epoch": 2.8409054422860813, "grad_norm": 0.8365796804428101, "learning_rate": 8.004160723052312e-06, "loss": 0.2536, "step": 8848 }, { "epoch": 2.841226521110933, "grad_norm": 0.7529785633087158, "learning_rate": 7.990482387692311e-06, "loss": 0.3008, "step": 8849 }, { "epoch": 2.841547599935784, "grad_norm": 1.1152650117874146, "learning_rate": 7.976815263412963e-06, "loss": 0.4836, "step": 8850 }, { "epoch": 2.841868678760636, "grad_norm": 1.6716127395629883, "learning_rate": 7.963159351879556e-06, "loss": 0.4522, "step": 8851 }, { "epoch": 2.842189757585487, "grad_norm": 0.8794559836387634, "learning_rate": 7.949514654755962e-06, "loss": 0.3452, "step": 8852 }, { "epoch": 2.8425108364103386, "grad_norm": 1.2077842950820923, "learning_rate": 7.935881173704819e-06, "loss": 0.3147, "step": 8853 }, { "epoch": 2.84283191523519, "grad_norm": 0.605907678604126, "learning_rate": 7.922258910387282e-06, "loss": 0.2739, "step": 8854 }, { "epoch": 2.8431529940600417, "grad_norm": 0.7989472150802612, "learning_rate": 7.908647866463203e-06, "loss": 0.2794, "step": 8855 }, { "epoch": 2.8434740728848933, "grad_norm": 1.0483945608139038, "learning_rate": 7.895048043591036e-06, "loss": 0.3598, "step": 8856 }, { "epoch": 2.843795151709745, "grad_norm": 0.4776444137096405, "learning_rate": 7.881459443427886e-06, "loss": 0.2168, "step": 8857 }, { "epoch": 2.8441162305345964, "grad_norm": 0.8608068227767944, "learning_rate": 7.867882067629472e-06, "loss": 0.3373, "step": 8858 }, { "epoch": 2.8444373093594475, "grad_norm": 0.8778037428855896, "learning_rate": 7.854315917850163e-06, "loss": 0.2954, "step": 8859 }, { "epoch": 2.8447583881842995, "grad_norm": 1.0180518627166748, "learning_rate": 7.840760995742946e-06, "loss": 0.3334, "step": 8860 }, { "epoch": 2.8450794670091506, "grad_norm": 0.9998841285705566, "learning_rate": 7.827217302959467e-06, "loss": 0.3415, "step": 8861 }, { "epoch": 2.845400545834002, "grad_norm": 0.7613440155982971, "learning_rate": 7.81368484114996e-06, "loss": 0.2876, "step": 8862 }, { "epoch": 2.8457216246588537, "grad_norm": 0.8815329670906067, "learning_rate": 7.800163611963318e-06, "loss": 0.3486, "step": 8863 }, { "epoch": 2.8460427034837052, "grad_norm": 0.6765179634094238, "learning_rate": 7.786653617047079e-06, "loss": 0.2835, "step": 8864 }, { "epoch": 2.846363782308557, "grad_norm": 0.7167448997497559, "learning_rate": 7.77315485804736e-06, "loss": 0.2964, "step": 8865 }, { "epoch": 2.8466848611334084, "grad_norm": 0.879822850227356, "learning_rate": 7.75966733660901e-06, "loss": 0.3248, "step": 8866 }, { "epoch": 2.84700593995826, "grad_norm": 0.6780831813812256, "learning_rate": 7.746191054375362e-06, "loss": 0.2852, "step": 8867 }, { "epoch": 2.847327018783111, "grad_norm": 0.6202150583267212, "learning_rate": 7.73272601298851e-06, "loss": 0.273, "step": 8868 }, { "epoch": 2.847648097607963, "grad_norm": 0.5386216640472412, "learning_rate": 7.719272214089145e-06, "loss": 0.2531, "step": 8869 }, { "epoch": 2.847969176432814, "grad_norm": 0.7454226016998291, "learning_rate": 7.7058296593165e-06, "loss": 0.2987, "step": 8870 }, { "epoch": 2.8482902552576657, "grad_norm": 0.7010564208030701, "learning_rate": 7.692398350308594e-06, "loss": 0.2399, "step": 8871 }, { "epoch": 2.848611334082517, "grad_norm": 1.0159744024276733, "learning_rate": 7.67897828870191e-06, "loss": 0.3846, "step": 8872 }, { "epoch": 2.8489324129073688, "grad_norm": 0.6551013588905334, "learning_rate": 7.665569476131706e-06, "loss": 0.2866, "step": 8873 }, { "epoch": 2.8492534917322203, "grad_norm": 0.5894836783409119, "learning_rate": 7.652171914231776e-06, "loss": 0.2376, "step": 8874 }, { "epoch": 2.849574570557072, "grad_norm": 1.2114777565002441, "learning_rate": 7.638785604634579e-06, "loss": 0.3111, "step": 8875 }, { "epoch": 2.8498956493819234, "grad_norm": 0.9398203492164612, "learning_rate": 7.625410548971191e-06, "loss": 0.2726, "step": 8876 }, { "epoch": 2.8502167282067745, "grad_norm": 0.8695142865180969, "learning_rate": 7.612046748871327e-06, "loss": 0.3361, "step": 8877 }, { "epoch": 2.8505378070316265, "grad_norm": 0.6532575488090515, "learning_rate": 7.59869420596333e-06, "loss": 0.3271, "step": 8878 }, { "epoch": 2.8508588858564776, "grad_norm": 0.48674994707107544, "learning_rate": 7.585352921874156e-06, "loss": 0.2893, "step": 8879 }, { "epoch": 2.851179964681329, "grad_norm": 0.5763227343559265, "learning_rate": 7.572022898229403e-06, "loss": 0.8793, "step": 8880 }, { "epoch": 2.8515010435061807, "grad_norm": 0.5322999954223633, "learning_rate": 7.558704136653305e-06, "loss": 0.7263, "step": 8881 }, { "epoch": 2.8518221223310323, "grad_norm": 0.44331344962120056, "learning_rate": 7.545396638768698e-06, "loss": 0.279, "step": 8882 }, { "epoch": 2.852143201155884, "grad_norm": 0.41767483949661255, "learning_rate": 7.5321004061970405e-06, "loss": 0.2259, "step": 8883 }, { "epoch": 2.8524642799807354, "grad_norm": 0.3086419403553009, "learning_rate": 7.518815440558513e-06, "loss": 0.1316, "step": 8884 }, { "epoch": 2.852785358805587, "grad_norm": 0.29793986678123474, "learning_rate": 7.505541743471756e-06, "loss": 0.1329, "step": 8885 }, { "epoch": 2.853106437630438, "grad_norm": 0.4482042193412781, "learning_rate": 7.492279316554207e-06, "loss": 0.2962, "step": 8886 }, { "epoch": 2.85342751645529, "grad_norm": 0.780487060546875, "learning_rate": 7.479028161421797e-06, "loss": 0.4438, "step": 8887 }, { "epoch": 2.853748595280141, "grad_norm": 1.0584594011306763, "learning_rate": 7.465788279689156e-06, "loss": 0.3568, "step": 8888 }, { "epoch": 2.8540696741049927, "grad_norm": 0.6687178611755371, "learning_rate": 7.45255967296955e-06, "loss": 0.2615, "step": 8889 }, { "epoch": 2.8543907529298442, "grad_norm": 0.9293543696403503, "learning_rate": 7.439342342874789e-06, "loss": 0.3512, "step": 8890 }, { "epoch": 2.854711831754696, "grad_norm": 0.8716065287590027, "learning_rate": 7.426136291015417e-06, "loss": 0.3854, "step": 8891 }, { "epoch": 2.8550329105795473, "grad_norm": 0.711845338344574, "learning_rate": 7.412941519000527e-06, "loss": 0.3156, "step": 8892 }, { "epoch": 2.855353989404399, "grad_norm": 0.7607097625732422, "learning_rate": 7.399758028437864e-06, "loss": 0.3388, "step": 8893 }, { "epoch": 2.8556750682292504, "grad_norm": 0.6983038783073425, "learning_rate": 7.386585820933811e-06, "loss": 0.2692, "step": 8894 }, { "epoch": 2.8559961470541015, "grad_norm": 0.703209638595581, "learning_rate": 7.3734248980933395e-06, "loss": 0.2435, "step": 8895 }, { "epoch": 2.8563172258789535, "grad_norm": 1.0315760374069214, "learning_rate": 7.360275261520078e-06, "loss": 0.3833, "step": 8896 }, { "epoch": 2.8566383047038046, "grad_norm": 0.6447276473045349, "learning_rate": 7.347136912816277e-06, "loss": 0.2404, "step": 8897 }, { "epoch": 2.856959383528656, "grad_norm": 1.043185830116272, "learning_rate": 7.3340098535827905e-06, "loss": 0.4912, "step": 8898 }, { "epoch": 2.8572804623535077, "grad_norm": 1.1728825569152832, "learning_rate": 7.320894085419116e-06, "loss": 0.4808, "step": 8899 }, { "epoch": 2.8576015411783593, "grad_norm": 0.8470208644866943, "learning_rate": 7.3077896099233765e-06, "loss": 0.38, "step": 8900 }, { "epoch": 2.857922620003211, "grad_norm": 0.6861622333526611, "learning_rate": 7.2946964286923046e-06, "loss": 0.2538, "step": 8901 }, { "epoch": 2.8582436988280624, "grad_norm": 0.7869503498077393, "learning_rate": 7.281614543321269e-06, "loss": 0.2132, "step": 8902 }, { "epoch": 2.858564777652914, "grad_norm": 0.7390521764755249, "learning_rate": 7.268543955404239e-06, "loss": 0.2631, "step": 8903 }, { "epoch": 2.858885856477765, "grad_norm": 0.6110241413116455, "learning_rate": 7.255484666533874e-06, "loss": 0.2363, "step": 8904 }, { "epoch": 2.859206935302617, "grad_norm": 0.7435978651046753, "learning_rate": 7.242436678301367e-06, "loss": 0.3225, "step": 8905 }, { "epoch": 2.859528014127468, "grad_norm": 0.5957700610160828, "learning_rate": 7.2293999922965705e-06, "loss": 0.2636, "step": 8906 }, { "epoch": 2.8598490929523197, "grad_norm": 0.6576164364814758, "learning_rate": 7.216374610108012e-06, "loss": 0.3076, "step": 8907 }, { "epoch": 2.8601701717771713, "grad_norm": 0.7818155288696289, "learning_rate": 7.203360533322734e-06, "loss": 0.3078, "step": 8908 }, { "epoch": 2.860491250602023, "grad_norm": 0.974690854549408, "learning_rate": 7.190357763526523e-06, "loss": 0.4259, "step": 8909 }, { "epoch": 2.8608123294268744, "grad_norm": 0.795142412185669, "learning_rate": 7.177366302303667e-06, "loss": 0.3382, "step": 8910 }, { "epoch": 2.861133408251726, "grad_norm": 1.03029465675354, "learning_rate": 7.164386151237179e-06, "loss": 0.3553, "step": 8911 }, { "epoch": 2.8614544870765775, "grad_norm": 0.9264429211616516, "learning_rate": 7.151417311908648e-06, "loss": 0.3729, "step": 8912 }, { "epoch": 2.8617755659014286, "grad_norm": 0.9430894255638123, "learning_rate": 7.138459785898266e-06, "loss": 0.4435, "step": 8913 }, { "epoch": 2.8620966447262806, "grad_norm": 0.7884198427200317, "learning_rate": 7.125513574784903e-06, "loss": 0.3564, "step": 8914 }, { "epoch": 2.8624177235511317, "grad_norm": 1.3142826557159424, "learning_rate": 7.112578680145954e-06, "loss": 0.3726, "step": 8915 }, { "epoch": 2.862738802375983, "grad_norm": 0.9246410727500916, "learning_rate": 7.099655103557556e-06, "loss": 0.3109, "step": 8916 }, { "epoch": 2.8630598812008348, "grad_norm": 0.8271921277046204, "learning_rate": 7.086742846594385e-06, "loss": 0.2753, "step": 8917 }, { "epoch": 2.8633809600256863, "grad_norm": 1.1295146942138672, "learning_rate": 7.07384191082977e-06, "loss": 0.3933, "step": 8918 }, { "epoch": 2.863702038850538, "grad_norm": 2.6274220943450928, "learning_rate": 7.060952297835633e-06, "loss": 0.4857, "step": 8919 }, { "epoch": 2.8640231176753894, "grad_norm": 0.9029250741004944, "learning_rate": 7.048074009182548e-06, "loss": 0.3297, "step": 8920 }, { "epoch": 2.864344196500241, "grad_norm": 0.5170316696166992, "learning_rate": 7.035207046439673e-06, "loss": 0.2399, "step": 8921 }, { "epoch": 2.864665275325092, "grad_norm": 0.6248924732208252, "learning_rate": 7.022351411174866e-06, "loss": 0.2731, "step": 8922 }, { "epoch": 2.864986354149944, "grad_norm": 0.6657963991165161, "learning_rate": 7.009507104954493e-06, "loss": 0.2576, "step": 8923 }, { "epoch": 2.865307432974795, "grad_norm": 0.5382866263389587, "learning_rate": 6.996674129343606e-06, "loss": 0.2667, "step": 8924 }, { "epoch": 2.8656285117996467, "grad_norm": 0.9319119453430176, "learning_rate": 6.9838524859058616e-06, "loss": 0.3681, "step": 8925 }, { "epoch": 2.8659495906244983, "grad_norm": 0.8236702084541321, "learning_rate": 6.971042176203535e-06, "loss": 0.3227, "step": 8926 }, { "epoch": 2.86627066944935, "grad_norm": 0.42309141159057617, "learning_rate": 6.958243201797554e-06, "loss": 0.2548, "step": 8927 }, { "epoch": 2.8665917482742014, "grad_norm": 0.6998513340950012, "learning_rate": 6.945455564247394e-06, "loss": 0.3271, "step": 8928 }, { "epoch": 2.866912827099053, "grad_norm": 0.48560941219329834, "learning_rate": 6.932679265111231e-06, "loss": 0.2742, "step": 8929 }, { "epoch": 2.8672339059239045, "grad_norm": 0.45436060428619385, "learning_rate": 6.919914305945774e-06, "loss": 0.6581, "step": 8930 }, { "epoch": 2.8675549847487556, "grad_norm": 0.4203808903694153, "learning_rate": 6.907160688306425e-06, "loss": 0.3959, "step": 8931 }, { "epoch": 2.8678760635736076, "grad_norm": 0.45685985684394836, "learning_rate": 6.894418413747183e-06, "loss": 0.3185, "step": 8932 }, { "epoch": 2.8681971423984587, "grad_norm": 0.4417474865913391, "learning_rate": 6.881687483820609e-06, "loss": 0.2633, "step": 8933 }, { "epoch": 2.8685182212233102, "grad_norm": 0.34829646348953247, "learning_rate": 6.868967900077972e-06, "loss": 0.1154, "step": 8934 }, { "epoch": 2.868839300048162, "grad_norm": 0.4470880925655365, "learning_rate": 6.856259664069098e-06, "loss": 0.1363, "step": 8935 }, { "epoch": 2.8691603788730133, "grad_norm": 0.42514151334762573, "learning_rate": 6.8435627773424495e-06, "loss": 0.1393, "step": 8936 }, { "epoch": 2.869481457697865, "grad_norm": 0.8983972668647766, "learning_rate": 6.830877241445111e-06, "loss": 0.5135, "step": 8937 }, { "epoch": 2.8698025365227164, "grad_norm": 1.0805124044418335, "learning_rate": 6.818203057922757e-06, "loss": 0.4579, "step": 8938 }, { "epoch": 2.870123615347568, "grad_norm": 0.8403233289718628, "learning_rate": 6.805540228319718e-06, "loss": 0.3314, "step": 8939 }, { "epoch": 2.870444694172419, "grad_norm": 0.9584758281707764, "learning_rate": 6.7928887541789055e-06, "loss": 0.3441, "step": 8940 }, { "epoch": 2.870765772997271, "grad_norm": 0.9408062100410461, "learning_rate": 6.780248637041875e-06, "loss": 0.3953, "step": 8941 }, { "epoch": 2.871086851822122, "grad_norm": 0.7710734605789185, "learning_rate": 6.767619878448783e-06, "loss": 0.3212, "step": 8942 }, { "epoch": 2.8714079306469737, "grad_norm": 0.6842841506004333, "learning_rate": 6.755002479938411e-06, "loss": 0.2995, "step": 8943 }, { "epoch": 2.8717290094718253, "grad_norm": 0.8255465030670166, "learning_rate": 6.742396443048138e-06, "loss": 0.3496, "step": 8944 }, { "epoch": 2.872050088296677, "grad_norm": 0.838579535484314, "learning_rate": 6.729801769313981e-06, "loss": 0.3619, "step": 8945 }, { "epoch": 2.8723711671215284, "grad_norm": 0.7680864930152893, "learning_rate": 6.717218460270536e-06, "loss": 0.3327, "step": 8946 }, { "epoch": 2.87269224594638, "grad_norm": 0.6783306002616882, "learning_rate": 6.704646517451107e-06, "loss": 0.3324, "step": 8947 }, { "epoch": 2.8730133247712315, "grad_norm": 0.8792276978492737, "learning_rate": 6.692085942387483e-06, "loss": 0.3289, "step": 8948 }, { "epoch": 2.8733344035960826, "grad_norm": 0.9213592410087585, "learning_rate": 6.679536736610137e-06, "loss": 0.3607, "step": 8949 }, { "epoch": 2.8736554824209346, "grad_norm": 0.8916637897491455, "learning_rate": 6.666998901648203e-06, "loss": 0.3322, "step": 8950 }, { "epoch": 2.8739765612457857, "grad_norm": 0.9735648036003113, "learning_rate": 6.654472439029313e-06, "loss": 0.3434, "step": 8951 }, { "epoch": 2.8742976400706373, "grad_norm": 0.7319313287734985, "learning_rate": 6.6419573502798374e-06, "loss": 0.3157, "step": 8952 }, { "epoch": 2.874618718895489, "grad_norm": 0.9477907419204712, "learning_rate": 6.629453636924643e-06, "loss": 0.3392, "step": 8953 }, { "epoch": 2.8749397977203404, "grad_norm": 0.9957652688026428, "learning_rate": 6.616961300487324e-06, "loss": 0.448, "step": 8954 }, { "epoch": 2.875260876545192, "grad_norm": 1.1732362508773804, "learning_rate": 6.604480342490004e-06, "loss": 0.438, "step": 8955 }, { "epoch": 2.8755819553700435, "grad_norm": 0.710220992565155, "learning_rate": 6.592010764453449e-06, "loss": 0.2393, "step": 8956 }, { "epoch": 2.875903034194895, "grad_norm": 0.8079162836074829, "learning_rate": 6.579552567897051e-06, "loss": 0.3013, "step": 8957 }, { "epoch": 2.876224113019746, "grad_norm": 0.8686780333518982, "learning_rate": 6.5671057543387985e-06, "loss": 0.3435, "step": 8958 }, { "epoch": 2.876545191844598, "grad_norm": 1.116487979888916, "learning_rate": 6.554670325295298e-06, "loss": 0.4351, "step": 8959 }, { "epoch": 2.876866270669449, "grad_norm": 0.6508135199546814, "learning_rate": 6.542246282281772e-06, "loss": 0.311, "step": 8960 }, { "epoch": 2.8771873494943008, "grad_norm": 1.2992252111434937, "learning_rate": 6.529833626812043e-06, "loss": 0.3634, "step": 8961 }, { "epoch": 2.8775084283191523, "grad_norm": 0.7349622845649719, "learning_rate": 6.517432360398556e-06, "loss": 0.3065, "step": 8962 }, { "epoch": 2.877829507144004, "grad_norm": 0.7028359770774841, "learning_rate": 6.5050424845523815e-06, "loss": 0.2529, "step": 8963 }, { "epoch": 2.8781505859688554, "grad_norm": 0.6974001526832581, "learning_rate": 6.492664000783166e-06, "loss": 0.2822, "step": 8964 }, { "epoch": 2.878471664793707, "grad_norm": 0.9233234524726868, "learning_rate": 6.480296910599237e-06, "loss": 0.3762, "step": 8965 }, { "epoch": 2.8787927436185585, "grad_norm": 1.0546296834945679, "learning_rate": 6.467941215507433e-06, "loss": 0.4223, "step": 8966 }, { "epoch": 2.8791138224434096, "grad_norm": 0.8296759724617004, "learning_rate": 6.455596917013273e-06, "loss": 0.3025, "step": 8967 }, { "epoch": 2.8794349012682616, "grad_norm": 0.6233950257301331, "learning_rate": 6.443264016620887e-06, "loss": 0.2302, "step": 8968 }, { "epoch": 2.8797559800931127, "grad_norm": 0.8595796227455139, "learning_rate": 6.430942515832983e-06, "loss": 0.267, "step": 8969 }, { "epoch": 2.8800770589179643, "grad_norm": 0.9391990900039673, "learning_rate": 6.418632416150927e-06, "loss": 0.3733, "step": 8970 }, { "epoch": 2.880398137742816, "grad_norm": 0.7740428447723389, "learning_rate": 6.406333719074619e-06, "loss": 0.2623, "step": 8971 }, { "epoch": 2.8807192165676674, "grad_norm": 0.760464608669281, "learning_rate": 6.394046426102674e-06, "loss": 0.3015, "step": 8972 }, { "epoch": 2.881040295392519, "grad_norm": 0.8069641590118408, "learning_rate": 6.381770538732224e-06, "loss": 0.324, "step": 8973 }, { "epoch": 2.8813613742173705, "grad_norm": 0.6506406664848328, "learning_rate": 6.3695060584590625e-06, "loss": 0.2543, "step": 8974 }, { "epoch": 2.881682453042222, "grad_norm": 0.4887990355491638, "learning_rate": 6.357252986777595e-06, "loss": 0.2442, "step": 8975 }, { "epoch": 2.882003531867073, "grad_norm": 1.2128475904464722, "learning_rate": 6.345011325180772e-06, "loss": 0.3487, "step": 8976 }, { "epoch": 2.882324610691925, "grad_norm": 0.8624823689460754, "learning_rate": 6.332781075160243e-06, "loss": 0.301, "step": 8977 }, { "epoch": 2.8826456895167762, "grad_norm": 0.4960239827632904, "learning_rate": 6.320562238206218e-06, "loss": 0.2894, "step": 8978 }, { "epoch": 2.882966768341628, "grad_norm": 0.48501670360565186, "learning_rate": 6.308354815807527e-06, "loss": 0.2678, "step": 8979 }, { "epoch": 2.8832878471664793, "grad_norm": 0.42197084426879883, "learning_rate": 6.296158809451602e-06, "loss": 0.4037, "step": 8980 }, { "epoch": 2.883608925991331, "grad_norm": 0.36780065298080444, "learning_rate": 6.283974220624489e-06, "loss": 0.3432, "step": 8981 }, { "epoch": 2.8839300048161824, "grad_norm": 0.3911552131175995, "learning_rate": 6.2718010508108545e-06, "loss": 0.217, "step": 8982 }, { "epoch": 2.884251083641034, "grad_norm": 0.5778048634529114, "learning_rate": 6.259639301493947e-06, "loss": 0.409, "step": 8983 }, { "epoch": 2.8845721624658855, "grad_norm": 0.13645879924297333, "learning_rate": 6.2474889741556575e-06, "loss": 0.0592, "step": 8984 }, { "epoch": 2.8848932412907367, "grad_norm": 0.3099921941757202, "learning_rate": 6.235350070276447e-06, "loss": 0.1358, "step": 8985 }, { "epoch": 2.8852143201155886, "grad_norm": 0.9487568736076355, "learning_rate": 6.223222591335409e-06, "loss": 0.3908, "step": 8986 }, { "epoch": 2.8855353989404398, "grad_norm": 0.6402967572212219, "learning_rate": 6.21110653881023e-06, "loss": 0.2223, "step": 8987 }, { "epoch": 2.8858564777652913, "grad_norm": 1.0103484392166138, "learning_rate": 6.1990019141772605e-06, "loss": 0.4384, "step": 8988 }, { "epoch": 2.886177556590143, "grad_norm": 0.8888756632804871, "learning_rate": 6.186908718911344e-06, "loss": 0.3182, "step": 8989 }, { "epoch": 2.8864986354149944, "grad_norm": 0.6990677118301392, "learning_rate": 6.174826954486068e-06, "loss": 0.2554, "step": 8990 }, { "epoch": 2.886819714239846, "grad_norm": 0.9189555048942566, "learning_rate": 6.1627566223735e-06, "loss": 0.3981, "step": 8991 }, { "epoch": 2.8871407930646975, "grad_norm": 0.9261887073516846, "learning_rate": 6.1506977240444074e-06, "loss": 0.3965, "step": 8992 }, { "epoch": 2.887461871889549, "grad_norm": 0.7619607448577881, "learning_rate": 6.138650260968137e-06, "loss": 0.3196, "step": 8993 }, { "epoch": 2.8877829507144, "grad_norm": 0.7999249696731567, "learning_rate": 6.126614234612593e-06, "loss": 0.2928, "step": 8994 }, { "epoch": 2.888104029539252, "grad_norm": 0.8483562469482422, "learning_rate": 6.1145896464443685e-06, "loss": 0.3259, "step": 8995 }, { "epoch": 2.8884251083641033, "grad_norm": 0.9350925087928772, "learning_rate": 6.102576497928614e-06, "loss": 0.3718, "step": 8996 }, { "epoch": 2.888746187188955, "grad_norm": 1.1752997636795044, "learning_rate": 6.090574790529091e-06, "loss": 0.4583, "step": 8997 }, { "epoch": 2.8890672660138064, "grad_norm": 1.18094801902771, "learning_rate": 6.078584525708176e-06, "loss": 0.4047, "step": 8998 }, { "epoch": 2.889388344838658, "grad_norm": 1.1742857694625854, "learning_rate": 6.066605704926831e-06, "loss": 0.4776, "step": 8999 }, { "epoch": 2.8897094236635095, "grad_norm": 0.905214786529541, "learning_rate": 6.054638329644657e-06, "loss": 0.3768, "step": 9000 }, { "epoch": 2.890030502488361, "grad_norm": 1.0812019109725952, "learning_rate": 6.042682401319844e-06, "loss": 0.4689, "step": 9001 }, { "epoch": 2.8903515813132126, "grad_norm": 1.0372034311294556, "learning_rate": 6.030737921409169e-06, "loss": 0.2963, "step": 9002 }, { "epoch": 2.8906726601380637, "grad_norm": 1.742632508277893, "learning_rate": 6.018804891368035e-06, "loss": 0.3989, "step": 9003 }, { "epoch": 2.8909937389629157, "grad_norm": 0.8828332424163818, "learning_rate": 6.006883312650457e-06, "loss": 0.3274, "step": 9004 }, { "epoch": 2.8913148177877668, "grad_norm": 0.6829835772514343, "learning_rate": 5.994973186709041e-06, "loss": 0.2829, "step": 9005 }, { "epoch": 2.8916358966126183, "grad_norm": 0.8279037475585938, "learning_rate": 5.98307451499498e-06, "loss": 0.3327, "step": 9006 }, { "epoch": 2.89195697543747, "grad_norm": 0.7003656625747681, "learning_rate": 5.971187298958103e-06, "loss": 0.2629, "step": 9007 }, { "epoch": 2.8922780542623214, "grad_norm": 0.7397180795669556, "learning_rate": 5.9593115400468636e-06, "loss": 0.2892, "step": 9008 }, { "epoch": 2.892599133087173, "grad_norm": 0.88568514585495, "learning_rate": 5.947447239708215e-06, "loss": 0.3233, "step": 9009 }, { "epoch": 2.8929202119120245, "grad_norm": 0.6601244807243347, "learning_rate": 5.935594399387856e-06, "loss": 0.27, "step": 9010 }, { "epoch": 2.893241290736876, "grad_norm": 1.0693775415420532, "learning_rate": 5.923753020529999e-06, "loss": 0.4009, "step": 9011 }, { "epoch": 2.893562369561727, "grad_norm": 0.7800514698028564, "learning_rate": 5.911923104577455e-06, "loss": 0.3128, "step": 9012 }, { "epoch": 2.893883448386579, "grad_norm": 0.9529879689216614, "learning_rate": 5.900104652971694e-06, "loss": 0.3787, "step": 9013 }, { "epoch": 2.8942045272114303, "grad_norm": 1.1537600755691528, "learning_rate": 5.888297667152731e-06, "loss": 0.531, "step": 9014 }, { "epoch": 2.894525606036282, "grad_norm": 0.9319456219673157, "learning_rate": 5.8765021485592376e-06, "loss": 0.2995, "step": 9015 }, { "epoch": 2.8948466848611334, "grad_norm": 0.5598946809768677, "learning_rate": 5.864718098628441e-06, "loss": 0.2649, "step": 9016 }, { "epoch": 2.895167763685985, "grad_norm": 0.70030277967453, "learning_rate": 5.852945518796205e-06, "loss": 0.2977, "step": 9017 }, { "epoch": 2.8954888425108365, "grad_norm": 0.9624463319778442, "learning_rate": 5.8411844104969916e-06, "loss": 0.3796, "step": 9018 }, { "epoch": 2.895809921335688, "grad_norm": 0.5246722102165222, "learning_rate": 5.829434775163833e-06, "loss": 0.2232, "step": 9019 }, { "epoch": 2.8961310001605396, "grad_norm": 0.5366243720054626, "learning_rate": 5.8176966142283965e-06, "loss": 0.2416, "step": 9020 }, { "epoch": 2.8964520789853907, "grad_norm": 0.7249617576599121, "learning_rate": 5.805969929120947e-06, "loss": 0.2787, "step": 9021 }, { "epoch": 2.8967731578102422, "grad_norm": 0.7474877834320068, "learning_rate": 5.7942547212703315e-06, "loss": 0.2923, "step": 9022 }, { "epoch": 2.897094236635094, "grad_norm": 0.5960254669189453, "learning_rate": 5.78255099210403e-06, "loss": 0.2618, "step": 9023 }, { "epoch": 2.8974153154599454, "grad_norm": 0.7626693844795227, "learning_rate": 5.770858743048091e-06, "loss": 0.2727, "step": 9024 }, { "epoch": 2.897736394284797, "grad_norm": 0.5556107759475708, "learning_rate": 5.759177975527186e-06, "loss": 0.258, "step": 9025 }, { "epoch": 2.8980574731096485, "grad_norm": 0.471110463142395, "learning_rate": 5.747508690964598e-06, "loss": 0.2506, "step": 9026 }, { "epoch": 2.8983785519345, "grad_norm": 0.47644883394241333, "learning_rate": 5.735850890782157e-06, "loss": 0.2521, "step": 9027 }, { "epoch": 2.8986996307593516, "grad_norm": 0.49500352144241333, "learning_rate": 5.724204576400371e-06, "loss": 0.275, "step": 9028 }, { "epoch": 2.899020709584203, "grad_norm": 0.31801772117614746, "learning_rate": 5.7125697492382835e-06, "loss": 0.2572, "step": 9029 }, { "epoch": 2.899341788409054, "grad_norm": 0.7811435461044312, "learning_rate": 5.700946410713548e-06, "loss": 0.8103, "step": 9030 }, { "epoch": 2.8996628672339058, "grad_norm": 0.3786062002182007, "learning_rate": 5.6893345622424874e-06, "loss": 0.3159, "step": 9031 }, { "epoch": 2.8999839460587573, "grad_norm": 0.2506957948207855, "learning_rate": 5.6777342052399045e-06, "loss": 0.0899, "step": 9032 }, { "epoch": 2.900305024883609, "grad_norm": 0.23247039318084717, "learning_rate": 5.666145341119322e-06, "loss": 0.0615, "step": 9033 }, { "epoch": 2.9006261037084604, "grad_norm": 0.1694241166114807, "learning_rate": 5.654567971292757e-06, "loss": 0.0589, "step": 9034 }, { "epoch": 2.900947182533312, "grad_norm": 0.25635042786598206, "learning_rate": 5.643002097170924e-06, "loss": 0.1044, "step": 9035 }, { "epoch": 2.9012682613581635, "grad_norm": 0.9146557450294495, "learning_rate": 5.6314477201630745e-06, "loss": 0.5639, "step": 9036 }, { "epoch": 2.901589340183015, "grad_norm": 0.8883252739906311, "learning_rate": 5.619904841677059e-06, "loss": 0.4244, "step": 9037 }, { "epoch": 2.9019104190078666, "grad_norm": 0.751973032951355, "learning_rate": 5.608373463119354e-06, "loss": 0.3138, "step": 9038 }, { "epoch": 2.9022314978327177, "grad_norm": 0.7506957054138184, "learning_rate": 5.5968535858950345e-06, "loss": 0.2615, "step": 9039 }, { "epoch": 2.9025525766575693, "grad_norm": 0.7786146402359009, "learning_rate": 5.585345211407733e-06, "loss": 0.3453, "step": 9040 }, { "epoch": 2.902873655482421, "grad_norm": 0.9360180497169495, "learning_rate": 5.573848341059739e-06, "loss": 0.4307, "step": 9041 }, { "epoch": 2.9031947343072724, "grad_norm": 0.9217041730880737, "learning_rate": 5.562362976251901e-06, "loss": 0.3228, "step": 9042 }, { "epoch": 2.903515813132124, "grad_norm": 0.904606819152832, "learning_rate": 5.550889118383673e-06, "loss": 0.3926, "step": 9043 }, { "epoch": 2.9038368919569755, "grad_norm": 0.7405612468719482, "learning_rate": 5.5394267688531066e-06, "loss": 0.2862, "step": 9044 }, { "epoch": 2.904157970781827, "grad_norm": 0.7969362735748291, "learning_rate": 5.52797592905685e-06, "loss": 0.3321, "step": 9045 }, { "epoch": 2.9044790496066786, "grad_norm": 0.8210546970367432, "learning_rate": 5.516536600390188e-06, "loss": 0.3051, "step": 9046 }, { "epoch": 2.90480012843153, "grad_norm": 0.796033501625061, "learning_rate": 5.505108784246926e-06, "loss": 0.3624, "step": 9047 }, { "epoch": 2.9051212072563812, "grad_norm": 0.7903563380241394, "learning_rate": 5.49369248201953e-06, "loss": 0.318, "step": 9048 }, { "epoch": 2.905442286081233, "grad_norm": 1.0777714252471924, "learning_rate": 5.482287695099031e-06, "loss": 0.4097, "step": 9049 }, { "epoch": 2.9057633649060843, "grad_norm": 1.2699135541915894, "learning_rate": 5.470894424875062e-06, "loss": 0.3572, "step": 9050 }, { "epoch": 2.906084443730936, "grad_norm": 0.935076892375946, "learning_rate": 5.4595126727359e-06, "loss": 0.3222, "step": 9051 }, { "epoch": 2.9064055225557874, "grad_norm": 0.7485237121582031, "learning_rate": 5.448142440068316e-06, "loss": 0.3005, "step": 9052 }, { "epoch": 2.906726601380639, "grad_norm": 0.9007347822189331, "learning_rate": 5.436783728257788e-06, "loss": 0.3643, "step": 9053 }, { "epoch": 2.9070476802054905, "grad_norm": 0.8481326699256897, "learning_rate": 5.425436538688322e-06, "loss": 0.3329, "step": 9054 }, { "epoch": 2.907368759030342, "grad_norm": 0.8909382820129395, "learning_rate": 5.414100872742534e-06, "loss": 0.2989, "step": 9055 }, { "epoch": 2.9076898378551936, "grad_norm": 0.9575411677360535, "learning_rate": 5.402776731801662e-06, "loss": 0.269, "step": 9056 }, { "epoch": 2.9080109166800447, "grad_norm": 0.7255709767341614, "learning_rate": 5.39146411724547e-06, "loss": 0.294, "step": 9057 }, { "epoch": 2.9083319955048963, "grad_norm": 1.5980173349380493, "learning_rate": 5.380163030452412e-06, "loss": 0.2823, "step": 9058 }, { "epoch": 2.908653074329748, "grad_norm": 0.8485715389251709, "learning_rate": 5.368873472799474e-06, "loss": 0.2583, "step": 9059 }, { "epoch": 2.9089741531545994, "grad_norm": 0.9129398465156555, "learning_rate": 5.357595445662267e-06, "loss": 0.3554, "step": 9060 }, { "epoch": 2.909295231979451, "grad_norm": 0.7431888580322266, "learning_rate": 5.346328950414969e-06, "loss": 0.2997, "step": 9061 }, { "epoch": 2.9096163108043025, "grad_norm": 1.5763301849365234, "learning_rate": 5.335073988430372e-06, "loss": 0.3451, "step": 9062 }, { "epoch": 2.909937389629154, "grad_norm": 0.8064843416213989, "learning_rate": 5.3238305610798565e-06, "loss": 0.3082, "step": 9063 }, { "epoch": 2.9102584684540056, "grad_norm": 1.073921799659729, "learning_rate": 5.312598669733404e-06, "loss": 0.3749, "step": 9064 }, { "epoch": 2.910579547278857, "grad_norm": 0.5479134917259216, "learning_rate": 5.301378315759598e-06, "loss": 0.2452, "step": 9065 }, { "epoch": 2.9109006261037083, "grad_norm": 0.9387060403823853, "learning_rate": 5.290169500525577e-06, "loss": 0.3453, "step": 9066 }, { "epoch": 2.91122170492856, "grad_norm": 0.6247801184654236, "learning_rate": 5.278972225397127e-06, "loss": 0.2472, "step": 9067 }, { "epoch": 2.9115427837534114, "grad_norm": 0.6648100018501282, "learning_rate": 5.267786491738569e-06, "loss": 0.2595, "step": 9068 }, { "epoch": 2.911863862578263, "grad_norm": 0.5038353204727173, "learning_rate": 5.256612300912911e-06, "loss": 0.2275, "step": 9069 }, { "epoch": 2.9121849414031145, "grad_norm": 0.5695350170135498, "learning_rate": 5.245449654281631e-06, "loss": 0.2695, "step": 9070 }, { "epoch": 2.912506020227966, "grad_norm": NaN, "learning_rate": 5.245449654281631e-06, "loss": 0.4011, "step": 9071 }, { "epoch": 2.9128270990528176, "grad_norm": 1.1442893743515015, "learning_rate": 5.2342985532049084e-06, "loss": 0.3682, "step": 9072 }, { "epoch": 2.913148177877669, "grad_norm": 0.6914578676223755, "learning_rate": 5.223158999041444e-06, "loss": 0.2844, "step": 9073 }, { "epoch": 2.9134692567025207, "grad_norm": 0.8676597476005554, "learning_rate": 5.212030993148553e-06, "loss": 0.3133, "step": 9074 }, { "epoch": 2.9137903355273718, "grad_norm": 0.6819028854370117, "learning_rate": 5.200914536882185e-06, "loss": 0.2754, "step": 9075 }, { "epoch": 2.9141114143522233, "grad_norm": 0.5778715014457703, "learning_rate": 5.189809631596798e-06, "loss": 0.2804, "step": 9076 }, { "epoch": 2.914432493177075, "grad_norm": 0.4760773479938507, "learning_rate": 5.178716278645535e-06, "loss": 0.245, "step": 9077 }, { "epoch": 2.9147535720019264, "grad_norm": 0.41519516706466675, "learning_rate": 5.167634479380068e-06, "loss": 0.2327, "step": 9078 }, { "epoch": 2.915074650826778, "grad_norm": 0.5341471433639526, "learning_rate": 5.1565642351506845e-06, "loss": 0.2778, "step": 9079 }, { "epoch": 2.9153957296516295, "grad_norm": 0.6417379379272461, "learning_rate": 5.145505547306251e-06, "loss": 0.9807, "step": 9080 }, { "epoch": 2.915716808476481, "grad_norm": 0.5737770199775696, "learning_rate": 5.134458417194254e-06, "loss": 0.5596, "step": 9081 }, { "epoch": 2.9160378873013326, "grad_norm": 0.29216137528419495, "learning_rate": 5.1234228461607304e-06, "loss": 0.1449, "step": 9082 }, { "epoch": 2.916358966126184, "grad_norm": 0.20528793334960938, "learning_rate": 5.1123988355503475e-06, "loss": 0.06, "step": 9083 }, { "epoch": 2.9166800449510353, "grad_norm": 0.1566951721906662, "learning_rate": 5.101386386706342e-06, "loss": 0.0578, "step": 9084 }, { "epoch": 2.917001123775887, "grad_norm": 0.1581335961818695, "learning_rate": 5.0903855009705514e-06, "loss": 0.0592, "step": 9085 }, { "epoch": 2.9173222026007384, "grad_norm": 0.24517378211021423, "learning_rate": 5.079396179683383e-06, "loss": 0.1089, "step": 9086 }, { "epoch": 2.91764328142559, "grad_norm": 0.6500630974769592, "learning_rate": 5.068418424183874e-06, "loss": 0.3078, "step": 9087 }, { "epoch": 2.9179643602504415, "grad_norm": 0.8994437456130981, "learning_rate": 5.057452235809624e-06, "loss": 0.509, "step": 9088 }, { "epoch": 2.918285439075293, "grad_norm": 0.7024485468864441, "learning_rate": 5.046497615896806e-06, "loss": 0.2671, "step": 9089 }, { "epoch": 2.9186065179001446, "grad_norm": 0.7663185000419617, "learning_rate": 5.035554565780265e-06, "loss": 0.3624, "step": 9090 }, { "epoch": 2.918927596724996, "grad_norm": 0.6804545521736145, "learning_rate": 5.024623086793323e-06, "loss": 0.2751, "step": 9091 }, { "epoch": 2.9192486755498477, "grad_norm": 0.78728187084198, "learning_rate": 5.013703180267959e-06, "loss": 0.3272, "step": 9092 }, { "epoch": 2.919569754374699, "grad_norm": 0.8548991680145264, "learning_rate": 5.002794847534764e-06, "loss": 0.3579, "step": 9093 }, { "epoch": 2.9198908331995503, "grad_norm": 1.0403963327407837, "learning_rate": 4.991898089922819e-06, "loss": 0.4054, "step": 9094 }, { "epoch": 2.920211912024402, "grad_norm": 1.1474127769470215, "learning_rate": 4.981012908759941e-06, "loss": 0.4484, "step": 9095 }, { "epoch": 2.9205329908492534, "grad_norm": 0.6695740818977356, "learning_rate": 4.97013930537239e-06, "loss": 0.2586, "step": 9096 }, { "epoch": 2.920854069674105, "grad_norm": 0.9828323125839233, "learning_rate": 4.959277281085129e-06, "loss": 0.3797, "step": 9097 }, { "epoch": 2.9211751484989565, "grad_norm": 0.9727553725242615, "learning_rate": 4.948426837221631e-06, "loss": 0.3328, "step": 9098 }, { "epoch": 2.921496227323808, "grad_norm": 0.7968175411224365, "learning_rate": 4.937587975103996e-06, "loss": 0.2932, "step": 9099 }, { "epoch": 2.9218173061486596, "grad_norm": 0.786050021648407, "learning_rate": 4.926760696052934e-06, "loss": 0.3092, "step": 9100 }, { "epoch": 2.922138384973511, "grad_norm": 0.9386889338493347, "learning_rate": 4.915945001387667e-06, "loss": 0.3844, "step": 9101 }, { "epoch": 2.9224594637983623, "grad_norm": 0.8530262112617493, "learning_rate": 4.905140892426097e-06, "loss": 0.3504, "step": 9102 }, { "epoch": 2.922780542623214, "grad_norm": 0.7439628839492798, "learning_rate": 4.8943483704846475e-06, "loss": 0.3001, "step": 9103 }, { "epoch": 2.9231016214480654, "grad_norm": 0.8572718501091003, "learning_rate": 4.8835674368783666e-06, "loss": 0.3333, "step": 9104 }, { "epoch": 2.923422700272917, "grad_norm": 0.9241260886192322, "learning_rate": 4.872798092920872e-06, "loss": 0.3437, "step": 9105 }, { "epoch": 2.9237437790977685, "grad_norm": 0.8410215973854065, "learning_rate": 4.862040339924378e-06, "loss": 0.3783, "step": 9106 }, { "epoch": 2.92406485792262, "grad_norm": 1.0775259733200073, "learning_rate": 4.8512941791996726e-06, "loss": 0.3497, "step": 9107 }, { "epoch": 2.9243859367474716, "grad_norm": 0.4644877016544342, "learning_rate": 4.840559612056183e-06, "loss": 0.2042, "step": 9108 }, { "epoch": 2.924707015572323, "grad_norm": 0.7072948217391968, "learning_rate": 4.829836639801843e-06, "loss": 0.2708, "step": 9109 }, { "epoch": 2.9250280943971747, "grad_norm": 0.8432921767234802, "learning_rate": 4.819125263743229e-06, "loss": 0.313, "step": 9110 }, { "epoch": 2.925349173222026, "grad_norm": 0.8821758031845093, "learning_rate": 4.808425485185486e-06, "loss": 0.4026, "step": 9111 }, { "epoch": 2.9256702520468774, "grad_norm": 1.0225335359573364, "learning_rate": 4.797737305432337e-06, "loss": 0.463, "step": 9112 }, { "epoch": 2.925991330871729, "grad_norm": 0.8588323593139648, "learning_rate": 4.7870607257861415e-06, "loss": 0.3394, "step": 9113 }, { "epoch": 2.9263124096965805, "grad_norm": 0.6533500552177429, "learning_rate": 4.776395747547757e-06, "loss": 0.2785, "step": 9114 }, { "epoch": 2.926633488521432, "grad_norm": 1.2023701667785645, "learning_rate": 4.765742372016735e-06, "loss": 0.2919, "step": 9115 }, { "epoch": 2.9269545673462836, "grad_norm": 1.0757431983947754, "learning_rate": 4.755100600491102e-06, "loss": 0.4093, "step": 9116 }, { "epoch": 2.927275646171135, "grad_norm": 1.0217454433441162, "learning_rate": 4.744470434267567e-06, "loss": 0.3653, "step": 9117 }, { "epoch": 2.9275967249959867, "grad_norm": 0.9355355501174927, "learning_rate": 4.733851874641382e-06, "loss": 0.3976, "step": 9118 }, { "epoch": 2.927917803820838, "grad_norm": 1.0048413276672363, "learning_rate": 4.723244922906356e-06, "loss": 0.3092, "step": 9119 }, { "epoch": 2.9282388826456893, "grad_norm": 0.6854848861694336, "learning_rate": 4.712649580354933e-06, "loss": 0.3157, "step": 9120 }, { "epoch": 2.928559961470541, "grad_norm": 1.0658174753189087, "learning_rate": 4.702065848278126e-06, "loss": 0.3169, "step": 9121 }, { "epoch": 2.9288810402953924, "grad_norm": 1.368660807609558, "learning_rate": 4.691493727965512e-06, "loss": 0.4592, "step": 9122 }, { "epoch": 2.929202119120244, "grad_norm": 0.9535953402519226, "learning_rate": 4.680933220705308e-06, "loss": 0.3418, "step": 9123 }, { "epoch": 2.9295231979450955, "grad_norm": 0.46225616335868835, "learning_rate": 4.670384327784239e-06, "loss": 0.2457, "step": 9124 }, { "epoch": 2.929844276769947, "grad_norm": 0.45245954394340515, "learning_rate": 4.659847050487687e-06, "loss": 0.248, "step": 9125 }, { "epoch": 2.9301653555947986, "grad_norm": 0.48897916078567505, "learning_rate": 4.64932139009957e-06, "loss": 0.261, "step": 9126 }, { "epoch": 2.93048643441965, "grad_norm": 0.7464796900749207, "learning_rate": 4.638807347902408e-06, "loss": 0.2816, "step": 9127 }, { "epoch": 2.9308075132445017, "grad_norm": 0.8038036823272705, "learning_rate": 4.628304925177318e-06, "loss": 0.3102, "step": 9128 }, { "epoch": 2.931128592069353, "grad_norm": 0.49647238850593567, "learning_rate": 4.617814123203967e-06, "loss": 0.2667, "step": 9129 }, { "epoch": 2.9314496708942044, "grad_norm": 0.3765721917152405, "learning_rate": 4.607334943260655e-06, "loss": 0.3785, "step": 9130 }, { "epoch": 2.931770749719056, "grad_norm": 0.40889376401901245, "learning_rate": 4.596867386624215e-06, "loss": 0.3646, "step": 9131 }, { "epoch": 2.9320918285439075, "grad_norm": 0.600988507270813, "learning_rate": 4.586411454570083e-06, "loss": 0.3995, "step": 9132 }, { "epoch": 2.932412907368759, "grad_norm": 0.4408535659313202, "learning_rate": 4.575967148372317e-06, "loss": 0.1431, "step": 9133 }, { "epoch": 2.9327339861936106, "grad_norm": 0.45499712228775024, "learning_rate": 4.5655344693034895e-06, "loss": 0.2273, "step": 9134 }, { "epoch": 2.933055065018462, "grad_norm": 0.5091567039489746, "learning_rate": 4.555113418634805e-06, "loss": 0.123, "step": 9135 }, { "epoch": 2.9333761438433137, "grad_norm": 0.4760441780090332, "learning_rate": 4.544703997636046e-06, "loss": 0.2153, "step": 9136 }, { "epoch": 2.9336972226681652, "grad_norm": 0.5263906717300415, "learning_rate": 4.534306207575545e-06, "loss": 0.2508, "step": 9137 }, { "epoch": 2.9340183014930163, "grad_norm": 0.9112762212753296, "learning_rate": 4.523920049720265e-06, "loss": 0.5172, "step": 9138 }, { "epoch": 2.934339380317868, "grad_norm": 0.9089072346687317, "learning_rate": 4.513545525335705e-06, "loss": 0.4721, "step": 9139 }, { "epoch": 2.9346604591427194, "grad_norm": 0.6721816062927246, "learning_rate": 4.5031826356859876e-06, "loss": 0.2626, "step": 9140 }, { "epoch": 2.934981537967571, "grad_norm": 0.9276089072227478, "learning_rate": 4.492831382033791e-06, "loss": 0.3248, "step": 9141 }, { "epoch": 2.9353026167924225, "grad_norm": 0.9039839506149292, "learning_rate": 4.482491765640395e-06, "loss": 0.3331, "step": 9142 }, { "epoch": 2.935623695617274, "grad_norm": 0.983473539352417, "learning_rate": 4.4721637877656375e-06, "loss": 0.3217, "step": 9143 }, { "epoch": 2.9359447744421256, "grad_norm": 0.7742102146148682, "learning_rate": 4.461847449667955e-06, "loss": 0.3411, "step": 9144 }, { "epoch": 2.936265853266977, "grad_norm": 0.8041900396347046, "learning_rate": 4.451542752604365e-06, "loss": 0.3223, "step": 9145 }, { "epoch": 2.9365869320918287, "grad_norm": 0.7607382535934448, "learning_rate": 4.4412496978304515e-06, "loss": 0.2994, "step": 9146 }, { "epoch": 2.93690801091668, "grad_norm": 0.6458768844604492, "learning_rate": 4.4309682866004125e-06, "loss": 0.2517, "step": 9147 }, { "epoch": 2.9372290897415314, "grad_norm": 0.8893207311630249, "learning_rate": 4.420698520166988e-06, "loss": 0.3428, "step": 9148 }, { "epoch": 2.937550168566383, "grad_norm": 1.1111611127853394, "learning_rate": 4.410440399781534e-06, "loss": 0.3199, "step": 9149 }, { "epoch": 2.9378712473912345, "grad_norm": 0.8783401846885681, "learning_rate": 4.400193926693952e-06, "loss": 0.3904, "step": 9150 }, { "epoch": 2.938192326216086, "grad_norm": 0.6565034985542297, "learning_rate": 4.389959102152774e-06, "loss": 0.2862, "step": 9151 }, { "epoch": 2.9385134050409376, "grad_norm": 0.8540425300598145, "learning_rate": 4.379735927405038e-06, "loss": 0.2698, "step": 9152 }, { "epoch": 2.938834483865789, "grad_norm": 1.0489143133163452, "learning_rate": 4.369524403696457e-06, "loss": 0.3294, "step": 9153 }, { "epoch": 2.9391555626906407, "grad_norm": 1.0321301221847534, "learning_rate": 4.3593245322712474e-06, "loss": 0.351, "step": 9154 }, { "epoch": 2.9394766415154923, "grad_norm": 0.7734938263893127, "learning_rate": 4.349136314372204e-06, "loss": 0.3194, "step": 9155 }, { "epoch": 2.9397977203403434, "grad_norm": 0.8938072919845581, "learning_rate": 4.338959751240801e-06, "loss": 0.3961, "step": 9156 }, { "epoch": 2.940118799165195, "grad_norm": 0.6044966578483582, "learning_rate": 4.328794844116946e-06, "loss": 0.2416, "step": 9157 }, { "epoch": 2.9404398779900465, "grad_norm": 0.69122314453125, "learning_rate": 4.318641594239259e-06, "loss": 0.2702, "step": 9158 }, { "epoch": 2.940760956814898, "grad_norm": 1.0131958723068237, "learning_rate": 4.308500002844862e-06, "loss": 0.362, "step": 9159 }, { "epoch": 2.9410820356397496, "grad_norm": 0.8207650184631348, "learning_rate": 4.2983700711694665e-06, "loss": 0.3119, "step": 9160 }, { "epoch": 2.941403114464601, "grad_norm": 0.6501420736312866, "learning_rate": 4.288251800447385e-06, "loss": 0.2507, "step": 9161 }, { "epoch": 2.9417241932894527, "grad_norm": 0.638103187084198, "learning_rate": 4.278145191911509e-06, "loss": 0.279, "step": 9162 }, { "epoch": 2.942045272114304, "grad_norm": 0.9142710566520691, "learning_rate": 4.268050246793276e-06, "loss": 0.416, "step": 9163 }, { "epoch": 2.9423663509391558, "grad_norm": 0.9541748762130737, "learning_rate": 4.257966966322735e-06, "loss": 0.314, "step": 9164 }, { "epoch": 2.942687429764007, "grad_norm": 0.9409205317497253, "learning_rate": 4.247895351728504e-06, "loss": 0.3532, "step": 9165 }, { "epoch": 2.9430085085888584, "grad_norm": 0.777114748954773, "learning_rate": 4.237835404237778e-06, "loss": 0.3019, "step": 9166 }, { "epoch": 2.94332958741371, "grad_norm": 0.796989917755127, "learning_rate": 4.227787125076332e-06, "loss": 0.251, "step": 9167 }, { "epoch": 2.9436506662385615, "grad_norm": 0.7624577879905701, "learning_rate": 4.217750515468522e-06, "loss": 0.2639, "step": 9168 }, { "epoch": 2.943971745063413, "grad_norm": 0.6462528109550476, "learning_rate": 4.207725576637256e-06, "loss": 0.2605, "step": 9169 }, { "epoch": 2.9442928238882646, "grad_norm": 0.8757241368293762, "learning_rate": 4.197712309804058e-06, "loss": 0.2975, "step": 9170 }, { "epoch": 2.944613902713116, "grad_norm": 0.9458180665969849, "learning_rate": 4.187710716189042e-06, "loss": 0.3727, "step": 9171 }, { "epoch": 2.9449349815379677, "grad_norm": 0.7695515155792236, "learning_rate": 4.177720797010831e-06, "loss": 0.2814, "step": 9172 }, { "epoch": 2.9452560603628193, "grad_norm": 1.1303104162216187, "learning_rate": 4.167742553486675e-06, "loss": 0.3553, "step": 9173 }, { "epoch": 2.9455771391876704, "grad_norm": 0.6950247883796692, "learning_rate": 4.1577759868324125e-06, "loss": 0.2848, "step": 9174 }, { "epoch": 2.945898218012522, "grad_norm": 0.5095248818397522, "learning_rate": 4.147821098262405e-06, "loss": 0.2629, "step": 9175 }, { "epoch": 2.9462192968373735, "grad_norm": 0.6144112944602966, "learning_rate": 4.137877888989672e-06, "loss": 0.2893, "step": 9176 }, { "epoch": 2.946540375662225, "grad_norm": 0.970645546913147, "learning_rate": 4.127946360225721e-06, "loss": 0.3173, "step": 9177 }, { "epoch": 2.9468614544870766, "grad_norm": 0.5283989310264587, "learning_rate": 4.118026513180695e-06, "loss": 0.2829, "step": 9178 }, { "epoch": 2.947182533311928, "grad_norm": 0.4269641935825348, "learning_rate": 4.108118349063306e-06, "loss": 0.2613, "step": 9179 }, { "epoch": 2.9475036121367797, "grad_norm": 0.476938396692276, "learning_rate": 4.09822186908082e-06, "loss": 0.64, "step": 9180 }, { "epoch": 2.9478246909616312, "grad_norm": 0.4373842477798462, "learning_rate": 4.088337074439097e-06, "loss": 0.4771, "step": 9181 }, { "epoch": 2.948145769786483, "grad_norm": 0.4393393397331238, "learning_rate": 4.078463966342571e-06, "loss": 0.3074, "step": 9182 }, { "epoch": 2.948466848611334, "grad_norm": 0.7229725122451782, "learning_rate": 4.068602545994249e-06, "loss": 0.4381, "step": 9183 }, { "epoch": 2.9487879274361855, "grad_norm": 0.48776933550834656, "learning_rate": 4.0587528145957235e-06, "loss": 0.3591, "step": 9184 }, { "epoch": 2.949109006261037, "grad_norm": 0.2947094738483429, "learning_rate": 4.048914773347134e-06, "loss": 0.1124, "step": 9185 }, { "epoch": 2.9494300850858886, "grad_norm": 0.3417683243751526, "learning_rate": 4.039088423447235e-06, "loss": 0.1572, "step": 9186 }, { "epoch": 2.94975116391074, "grad_norm": 0.2823491394519806, "learning_rate": 4.029273766093333e-06, "loss": 0.131, "step": 9187 }, { "epoch": 2.9500722427355917, "grad_norm": 0.8070669770240784, "learning_rate": 4.019470802481307e-06, "loss": 0.437, "step": 9188 }, { "epoch": 2.950393321560443, "grad_norm": 0.7624936699867249, "learning_rate": 4.009679533805633e-06, "loss": 0.3359, "step": 9189 }, { "epoch": 2.9507144003852948, "grad_norm": 0.7309533357620239, "learning_rate": 3.999899961259335e-06, "loss": 0.3002, "step": 9190 }, { "epoch": 2.9510354792101463, "grad_norm": 0.8740092515945435, "learning_rate": 3.990132086034026e-06, "loss": 0.4231, "step": 9191 }, { "epoch": 2.9513565580349974, "grad_norm": 0.9942984580993652, "learning_rate": 3.9803759093199e-06, "loss": 0.4098, "step": 9192 }, { "epoch": 2.951677636859849, "grad_norm": 0.6029365062713623, "learning_rate": 3.970631432305694e-06, "loss": 0.2482, "step": 9193 }, { "epoch": 2.9519987156847005, "grad_norm": 0.9003799557685852, "learning_rate": 3.96089865617878e-06, "loss": 0.3181, "step": 9194 }, { "epoch": 2.952319794509552, "grad_norm": 0.7407945990562439, "learning_rate": 3.951177582125021e-06, "loss": 0.2781, "step": 9195 }, { "epoch": 2.9526408733344036, "grad_norm": 0.8002344965934753, "learning_rate": 3.941468211328947e-06, "loss": 0.3166, "step": 9196 }, { "epoch": 2.952961952159255, "grad_norm": 0.7346023321151733, "learning_rate": 3.931770544973601e-06, "loss": 0.2666, "step": 9197 }, { "epoch": 2.9532830309841067, "grad_norm": 0.8236327767372131, "learning_rate": 3.922084584240582e-06, "loss": 0.3224, "step": 9198 }, { "epoch": 2.953604109808958, "grad_norm": 0.9807097315788269, "learning_rate": 3.912410330310156e-06, "loss": 0.4151, "step": 9199 }, { "epoch": 2.95392518863381, "grad_norm": 0.6370247006416321, "learning_rate": 3.902747784361038e-06, "loss": 0.2875, "step": 9200 }, { "epoch": 2.954246267458661, "grad_norm": 0.8275413513183594, "learning_rate": 3.893096947570618e-06, "loss": 0.3529, "step": 9201 }, { "epoch": 2.9545673462835125, "grad_norm": 1.0060149431228638, "learning_rate": 3.883457821114811e-06, "loss": 0.3392, "step": 9202 }, { "epoch": 2.954888425108364, "grad_norm": 0.9771203398704529, "learning_rate": 3.873830406168111e-06, "loss": 0.423, "step": 9203 }, { "epoch": 2.9552095039332156, "grad_norm": 0.7501790523529053, "learning_rate": 3.864214703903601e-06, "loss": 0.2913, "step": 9204 }, { "epoch": 2.955530582758067, "grad_norm": 1.0703070163726807, "learning_rate": 3.8546107154929235e-06, "loss": 0.4205, "step": 9205 }, { "epoch": 2.9558516615829187, "grad_norm": 0.6897678971290588, "learning_rate": 3.845018442106285e-06, "loss": 0.2425, "step": 9206 }, { "epoch": 2.9561727404077702, "grad_norm": 0.917190670967102, "learning_rate": 3.835437884912474e-06, "loss": 0.392, "step": 9207 }, { "epoch": 2.9564938192326213, "grad_norm": 1.1160334348678589, "learning_rate": 3.825869045078867e-06, "loss": 0.4136, "step": 9208 }, { "epoch": 2.9568148980574733, "grad_norm": 1.1579893827438354, "learning_rate": 3.816311923771387e-06, "loss": 0.4582, "step": 9209 }, { "epoch": 2.9571359768823244, "grad_norm": 0.7560253739356995, "learning_rate": 3.806766522154548e-06, "loss": 0.3024, "step": 9210 }, { "epoch": 2.957457055707176, "grad_norm": 1.1074497699737549, "learning_rate": 3.797232841391407e-06, "loss": 0.4429, "step": 9211 }, { "epoch": 2.9577781345320275, "grad_norm": 0.941001832485199, "learning_rate": 3.787710882643658e-06, "loss": 0.3496, "step": 9212 }, { "epoch": 2.958099213356879, "grad_norm": 0.8937235474586487, "learning_rate": 3.7782006470714616e-06, "loss": 0.325, "step": 9213 }, { "epoch": 2.9584202921817306, "grad_norm": 1.424865961074829, "learning_rate": 3.7687021358336683e-06, "loss": 0.4345, "step": 9214 }, { "epoch": 2.958741371006582, "grad_norm": 0.7509331703186035, "learning_rate": 3.759215350087619e-06, "loss": 0.2709, "step": 9215 }, { "epoch": 2.9590624498314337, "grad_norm": 0.6785710453987122, "learning_rate": 3.7497402909892342e-06, "loss": 0.237, "step": 9216 }, { "epoch": 2.959383528656285, "grad_norm": 0.7602850794792175, "learning_rate": 3.7402769596930563e-06, "loss": 0.3031, "step": 9217 }, { "epoch": 2.959704607481137, "grad_norm": 1.575135588645935, "learning_rate": 3.730825357352119e-06, "loss": 0.3567, "step": 9218 }, { "epoch": 2.960025686305988, "grad_norm": 0.6166884899139404, "learning_rate": 3.721385485118123e-06, "loss": 0.2792, "step": 9219 }, { "epoch": 2.9603467651308395, "grad_norm": 0.6132175922393799, "learning_rate": 3.711957344141237e-06, "loss": 0.2788, "step": 9220 }, { "epoch": 2.960667843955691, "grad_norm": 0.7397081255912781, "learning_rate": 3.7025409355702976e-06, "loss": 0.3096, "step": 9221 }, { "epoch": 2.9609889227805426, "grad_norm": 0.7033382654190063, "learning_rate": 3.693136260552632e-06, "loss": 0.2897, "step": 9222 }, { "epoch": 2.961310001605394, "grad_norm": 0.62275230884552, "learning_rate": 3.68374332023419e-06, "loss": 0.2907, "step": 9223 }, { "epoch": 2.9616310804302457, "grad_norm": 0.716752827167511, "learning_rate": 3.6743621157594555e-06, "loss": 0.2795, "step": 9224 }, { "epoch": 2.9619521592550972, "grad_norm": 1.4299006462097168, "learning_rate": 3.664992648271526e-06, "loss": 0.2909, "step": 9225 }, { "epoch": 2.9622732380799484, "grad_norm": 0.8173677921295166, "learning_rate": 3.6556349189120097e-06, "loss": 0.3142, "step": 9226 }, { "epoch": 2.9625943169048004, "grad_norm": 0.9796795845031738, "learning_rate": 3.6462889288211512e-06, "loss": 0.3166, "step": 9227 }, { "epoch": 2.9629153957296515, "grad_norm": 0.45657387375831604, "learning_rate": 3.6369546791377052e-06, "loss": 0.2369, "step": 9228 }, { "epoch": 2.963236474554503, "grad_norm": 0.5515535473823547, "learning_rate": 3.627632170999029e-06, "loss": 0.2845, "step": 9229 }, { "epoch": 2.9635575533793546, "grad_norm": 0.4276106357574463, "learning_rate": 3.6183214055410586e-06, "loss": 0.3697, "step": 9230 }, { "epoch": 2.963878632204206, "grad_norm": 0.45924896001815796, "learning_rate": 3.6090223838982417e-06, "loss": 0.4946, "step": 9231 }, { "epoch": 2.9641997110290577, "grad_norm": 0.29669415950775146, "learning_rate": 3.5997351072036945e-06, "loss": 0.142, "step": 9232 }, { "epoch": 2.964520789853909, "grad_norm": 0.5130549669265747, "learning_rate": 3.5904595765890005e-06, "loss": 0.2495, "step": 9233 }, { "epoch": 2.9648418686787608, "grad_norm": 0.23706403374671936, "learning_rate": 3.5811957931843554e-06, "loss": 0.0929, "step": 9234 }, { "epoch": 2.965162947503612, "grad_norm": 0.2668483853340149, "learning_rate": 3.5719437581185454e-06, "loss": 0.1031, "step": 9235 }, { "epoch": 2.965484026328464, "grad_norm": 0.812019407749176, "learning_rate": 3.562703472518869e-06, "loss": 0.3787, "step": 9236 }, { "epoch": 2.965805105153315, "grad_norm": 0.8627951741218567, "learning_rate": 3.553474937511281e-06, "loss": 0.372, "step": 9237 }, { "epoch": 2.9661261839781665, "grad_norm": 0.92494797706604, "learning_rate": 3.5442581542201923e-06, "loss": 0.3974, "step": 9238 }, { "epoch": 2.966447262803018, "grad_norm": 0.898228108882904, "learning_rate": 3.5350531237686724e-06, "loss": 0.2598, "step": 9239 }, { "epoch": 2.9667683416278696, "grad_norm": 0.7887357473373413, "learning_rate": 3.5258598472783233e-06, "loss": 0.2791, "step": 9240 }, { "epoch": 2.967089420452721, "grad_norm": 0.8121463060379028, "learning_rate": 3.516678325869316e-06, "loss": 0.3809, "step": 9241 }, { "epoch": 2.9674104992775727, "grad_norm": 0.7240415215492249, "learning_rate": 3.5075085606604e-06, "loss": 0.3064, "step": 9242 }, { "epoch": 2.9677315781024243, "grad_norm": 1.0837162733078003, "learning_rate": 3.4983505527688586e-06, "loss": 0.3971, "step": 9243 }, { "epoch": 2.9680526569272754, "grad_norm": 0.7732176780700684, "learning_rate": 3.489204303310578e-06, "loss": 0.2647, "step": 9244 }, { "epoch": 2.9683737357521274, "grad_norm": 0.777204155921936, "learning_rate": 3.480069813400022e-06, "loss": 0.3172, "step": 9245 }, { "epoch": 2.9686948145769785, "grad_norm": 0.9173356294631958, "learning_rate": 3.470947084150167e-06, "loss": 0.3888, "step": 9246 }, { "epoch": 2.96901589340183, "grad_norm": 0.6100045442581177, "learning_rate": 3.461836116672612e-06, "loss": 0.2627, "step": 9247 }, { "epoch": 2.9693369722266816, "grad_norm": 0.8461219668388367, "learning_rate": 3.452736912077503e-06, "loss": 0.342, "step": 9248 }, { "epoch": 2.969658051051533, "grad_norm": 0.9259991645812988, "learning_rate": 3.443649471473531e-06, "loss": 0.4195, "step": 9249 }, { "epoch": 2.9699791298763847, "grad_norm": 1.0365054607391357, "learning_rate": 3.434573795967988e-06, "loss": 0.3616, "step": 9250 }, { "epoch": 2.9703002087012362, "grad_norm": 1.170913815498352, "learning_rate": 3.425509886666711e-06, "loss": 0.4531, "step": 9251 }, { "epoch": 2.970621287526088, "grad_norm": 0.9523593187332153, "learning_rate": 3.4164577446741174e-06, "loss": 0.3797, "step": 9252 }, { "epoch": 2.970942366350939, "grad_norm": 0.5297471284866333, "learning_rate": 3.40741737109318e-06, "loss": 0.2028, "step": 9253 }, { "epoch": 2.971263445175791, "grad_norm": 0.84348064661026, "learning_rate": 3.3983887670254177e-06, "loss": 0.3622, "step": 9254 }, { "epoch": 2.971584524000642, "grad_norm": 0.9488373398780823, "learning_rate": 3.389371933570995e-06, "loss": 0.3887, "step": 9255 }, { "epoch": 2.9719056028254935, "grad_norm": 0.7091740369796753, "learning_rate": 3.380366871828522e-06, "loss": 0.2702, "step": 9256 }, { "epoch": 2.972226681650345, "grad_norm": 0.7415878176689148, "learning_rate": 3.3713735828952987e-06, "loss": 0.2945, "step": 9257 }, { "epoch": 2.9725477604751966, "grad_norm": 0.7940482497215271, "learning_rate": 3.3623920678670597e-06, "loss": 0.2931, "step": 9258 }, { "epoch": 2.972868839300048, "grad_norm": 0.745911717414856, "learning_rate": 3.3534223278382405e-06, "loss": 0.278, "step": 9259 }, { "epoch": 2.9731899181248997, "grad_norm": 0.6489874124526978, "learning_rate": 3.344464363901756e-06, "loss": 0.2768, "step": 9260 }, { "epoch": 2.9735109969497513, "grad_norm": 0.9306034445762634, "learning_rate": 3.3355181771490772e-06, "loss": 0.3243, "step": 9261 }, { "epoch": 2.9738320757746024, "grad_norm": 0.9093040823936462, "learning_rate": 3.3265837686703106e-06, "loss": 0.3566, "step": 9262 }, { "epoch": 2.9741531545994544, "grad_norm": 0.7288286685943604, "learning_rate": 3.3176611395540626e-06, "loss": 0.2532, "step": 9263 }, { "epoch": 2.9744742334243055, "grad_norm": 1.2597209215164185, "learning_rate": 3.3087502908875413e-06, "loss": 0.4041, "step": 9264 }, { "epoch": 2.974795312249157, "grad_norm": 0.8495983481407166, "learning_rate": 3.2998512237565005e-06, "loss": 0.3545, "step": 9265 }, { "epoch": 2.9751163910740086, "grad_norm": 0.9554901719093323, "learning_rate": 3.290963939245262e-06, "loss": 0.3384, "step": 9266 }, { "epoch": 2.97543746989886, "grad_norm": 0.5555322766304016, "learning_rate": 3.2820884384367146e-06, "loss": 0.2401, "step": 9267 }, { "epoch": 2.9757585487237117, "grad_norm": 0.5823652744293213, "learning_rate": 3.273224722412327e-06, "loss": 0.2434, "step": 9268 }, { "epoch": 2.9760796275485633, "grad_norm": 0.7519864439964294, "learning_rate": 3.2643727922520906e-06, "loss": 0.2569, "step": 9269 }, { "epoch": 2.976400706373415, "grad_norm": 0.6338984966278076, "learning_rate": 3.2555326490346095e-06, "loss": 0.2871, "step": 9270 }, { "epoch": 2.976721785198266, "grad_norm": 0.7331449389457703, "learning_rate": 3.246704293837011e-06, "loss": 0.2932, "step": 9271 }, { "epoch": 2.977042864023118, "grad_norm": 0.7755066752433777, "learning_rate": 3.2378877277350116e-06, "loss": 0.3737, "step": 9272 }, { "epoch": 2.977363942847969, "grad_norm": 1.2024750709533691, "learning_rate": 3.2290829518028862e-06, "loss": 0.2638, "step": 9273 }, { "epoch": 2.9776850216728206, "grad_norm": 0.7302016019821167, "learning_rate": 3.2202899671134546e-06, "loss": 0.2853, "step": 9274 }, { "epoch": 2.978006100497672, "grad_norm": 0.6680705547332764, "learning_rate": 3.2115087747381366e-06, "loss": 0.2578, "step": 9275 }, { "epoch": 2.9783271793225237, "grad_norm": 0.6737269163131714, "learning_rate": 3.2027393757468773e-06, "loss": 0.308, "step": 9276 }, { "epoch": 2.978648258147375, "grad_norm": 0.5532433986663818, "learning_rate": 3.19398177120821e-06, "loss": 0.2525, "step": 9277 }, { "epoch": 2.9789693369722268, "grad_norm": 0.49847257137298584, "learning_rate": 3.1852359621892367e-06, "loss": 0.2842, "step": 9278 }, { "epoch": 2.9792904157970783, "grad_norm": 0.4007408022880554, "learning_rate": 3.1765019497555616e-06, "loss": 0.2705, "step": 9279 }, { "epoch": 2.9796114946219294, "grad_norm": 0.4380526542663574, "learning_rate": 3.1677797349714544e-06, "loss": 0.4338, "step": 9280 }, { "epoch": 2.9799325734467814, "grad_norm": 0.45914846658706665, "learning_rate": 3.1590693188996323e-06, "loss": 0.33, "step": 9281 }, { "epoch": 2.9802536522716325, "grad_norm": 0.43808650970458984, "learning_rate": 3.1503707026014906e-06, "loss": 0.3765, "step": 9282 }, { "epoch": 2.980574731096484, "grad_norm": 0.36031296849250793, "learning_rate": 3.1416838871368924e-06, "loss": 0.1771, "step": 9283 }, { "epoch": 2.9808958099213356, "grad_norm": 0.28183841705322266, "learning_rate": 3.1330088735643025e-06, "loss": 0.1466, "step": 9284 }, { "epoch": 2.981216888746187, "grad_norm": 0.4093777537345886, "learning_rate": 3.124345662940764e-06, "loss": 0.1417, "step": 9285 }, { "epoch": 2.9815379675710387, "grad_norm": 0.3650287687778473, "learning_rate": 3.115694256321855e-06, "loss": 0.1786, "step": 9286 }, { "epoch": 2.9818590463958903, "grad_norm": 0.7063071131706238, "learning_rate": 3.1070546547617095e-06, "loss": 0.3095, "step": 9287 }, { "epoch": 2.982180125220742, "grad_norm": 0.9610477685928345, "learning_rate": 3.0984268593130528e-06, "loss": 0.4365, "step": 9288 }, { "epoch": 2.982501204045593, "grad_norm": 0.9653912782669067, "learning_rate": 3.0898108710271434e-06, "loss": 0.3772, "step": 9289 }, { "epoch": 2.982822282870445, "grad_norm": 0.7585698962211609, "learning_rate": 3.081206690953831e-06, "loss": 0.314, "step": 9290 }, { "epoch": 2.983143361695296, "grad_norm": 0.867253839969635, "learning_rate": 3.072614320141487e-06, "loss": 0.3468, "step": 9291 }, { "epoch": 2.9834644405201476, "grad_norm": 0.6894168257713318, "learning_rate": 3.064033759637064e-06, "loss": 0.2712, "step": 9292 }, { "epoch": 2.983785519344999, "grad_norm": 0.5496251583099365, "learning_rate": 3.0554650104861136e-06, "loss": 0.2125, "step": 9293 }, { "epoch": 2.9841065981698507, "grad_norm": 0.7446759343147278, "learning_rate": 3.046908073732668e-06, "loss": 0.3081, "step": 9294 }, { "epoch": 2.9844276769947022, "grad_norm": 0.8124382495880127, "learning_rate": 3.0383629504194046e-06, "loss": 0.281, "step": 9295 }, { "epoch": 2.984748755819554, "grad_norm": 1.0698877573013306, "learning_rate": 3.0298296415874894e-06, "loss": 0.3884, "step": 9296 }, { "epoch": 2.9850698346444053, "grad_norm": 0.9363170862197876, "learning_rate": 3.0213081482766805e-06, "loss": 0.3386, "step": 9297 }, { "epoch": 2.9853909134692564, "grad_norm": 0.909624457359314, "learning_rate": 3.012798471525324e-06, "loss": 0.3767, "step": 9298 }, { "epoch": 2.9857119922941084, "grad_norm": 0.8160519003868103, "learning_rate": 3.0043006123702697e-06, "loss": 0.4078, "step": 9299 }, { "epoch": 2.9860330711189595, "grad_norm": 0.7235801219940186, "learning_rate": 2.9958145718469777e-06, "loss": 0.3223, "step": 9300 }, { "epoch": 2.986354149943811, "grad_norm": 0.9839329719543457, "learning_rate": 2.9873403509894203e-06, "loss": 0.4005, "step": 9301 }, { "epoch": 2.9866752287686626, "grad_norm": 0.7384074926376343, "learning_rate": 2.978877950830172e-06, "loss": 0.2616, "step": 9302 }, { "epoch": 2.986996307593514, "grad_norm": 0.7919383645057678, "learning_rate": 2.970427372400353e-06, "loss": 0.3024, "step": 9303 }, { "epoch": 2.9873173864183657, "grad_norm": 0.8400102257728577, "learning_rate": 2.9619886167296384e-06, "loss": 0.3372, "step": 9304 }, { "epoch": 2.9876384652432173, "grad_norm": 0.610448956489563, "learning_rate": 2.953561684846262e-06, "loss": 0.2516, "step": 9305 }, { "epoch": 2.987959544068069, "grad_norm": 0.9138132333755493, "learning_rate": 2.9451465777770247e-06, "loss": 0.3427, "step": 9306 }, { "epoch": 2.98828062289292, "grad_norm": 1.5538538694381714, "learning_rate": 2.936743296547273e-06, "loss": 0.4386, "step": 9307 }, { "epoch": 2.988601701717772, "grad_norm": 0.7500302195549011, "learning_rate": 2.928351842180921e-06, "loss": 0.2692, "step": 9308 }, { "epoch": 2.988922780542623, "grad_norm": 0.8792135119438171, "learning_rate": 2.9199722157004616e-06, "loss": 0.3637, "step": 9309 }, { "epoch": 2.9892438593674746, "grad_norm": 0.4652367830276489, "learning_rate": 2.9116044181269007e-06, "loss": 0.2135, "step": 9310 }, { "epoch": 2.989564938192326, "grad_norm": 0.9699134826660156, "learning_rate": 2.9032484504798452e-06, "loss": 0.3932, "step": 9311 }, { "epoch": 2.9898860170171777, "grad_norm": 0.7483917474746704, "learning_rate": 2.8949043137774355e-06, "loss": 0.3142, "step": 9312 }, { "epoch": 2.9902070958420293, "grad_norm": 0.6890571117401123, "learning_rate": 2.8865720090364034e-06, "loss": 0.3233, "step": 9313 }, { "epoch": 2.990528174666881, "grad_norm": 0.7219890356063843, "learning_rate": 2.878251537271981e-06, "loss": 0.254, "step": 9314 }, { "epoch": 2.9908492534917324, "grad_norm": 0.8167535662651062, "learning_rate": 2.8699428994980017e-06, "loss": 0.2907, "step": 9315 }, { "epoch": 2.9911703323165835, "grad_norm": 0.8420015573501587, "learning_rate": 2.8616460967268667e-06, "loss": 0.3101, "step": 9316 }, { "epoch": 2.9914914111414355, "grad_norm": 0.5992588996887207, "learning_rate": 2.8533611299694783e-06, "loss": 0.2537, "step": 9317 }, { "epoch": 2.9918124899662866, "grad_norm": 0.9275438189506531, "learning_rate": 2.845088000235396e-06, "loss": 0.2914, "step": 9318 }, { "epoch": 2.992133568791138, "grad_norm": 0.6742895245552063, "learning_rate": 2.836826708532603e-06, "loss": 0.2999, "step": 9319 }, { "epoch": 2.9924546476159897, "grad_norm": 0.9248641729354858, "learning_rate": 2.8285772558677705e-06, "loss": 0.3232, "step": 9320 }, { "epoch": 2.992775726440841, "grad_norm": 0.8363873362541199, "learning_rate": 2.8203396432460506e-06, "loss": 0.3208, "step": 9321 }, { "epoch": 2.9930968052656928, "grad_norm": 0.5395601987838745, "learning_rate": 2.8121138716711404e-06, "loss": 0.2489, "step": 9322 }, { "epoch": 2.9934178840905443, "grad_norm": 0.5238479375839233, "learning_rate": 2.8038999421453826e-06, "loss": 0.2793, "step": 9323 }, { "epoch": 2.993738962915396, "grad_norm": 0.9257795214653015, "learning_rate": 2.7956978556695767e-06, "loss": 0.3502, "step": 9324 }, { "epoch": 2.994060041740247, "grad_norm": 0.7510598301887512, "learning_rate": 2.7875076132431344e-06, "loss": 0.3333, "step": 9325 }, { "epoch": 2.994381120565099, "grad_norm": 0.5098982453346252, "learning_rate": 2.7793292158640126e-06, "loss": 0.2552, "step": 9326 }, { "epoch": 2.99470219938995, "grad_norm": 0.6713955402374268, "learning_rate": 2.771162664528726e-06, "loss": 0.2947, "step": 9327 }, { "epoch": 2.9950232782148016, "grad_norm": 0.7259739637374878, "learning_rate": 2.7630079602323442e-06, "loss": 0.3072, "step": 9328 }, { "epoch": 2.995344357039653, "grad_norm": 0.29132696986198425, "learning_rate": 2.7548651039684846e-06, "loss": 0.247, "step": 9329 }, { "epoch": 2.9956654358645047, "grad_norm": 0.37000009417533875, "learning_rate": 2.746734096729342e-06, "loss": 0.2994, "step": 9330 }, { "epoch": 2.9959865146893563, "grad_norm": 0.6013002395629883, "learning_rate": 2.738614939505646e-06, "loss": 0.2101, "step": 9331 }, { "epoch": 2.996307593514208, "grad_norm": 0.7807649970054626, "learning_rate": 2.7305076332867054e-06, "loss": 0.3249, "step": 9332 }, { "epoch": 2.9966286723390594, "grad_norm": 0.8677118420600891, "learning_rate": 2.7224121790603517e-06, "loss": 0.3319, "step": 9333 }, { "epoch": 2.9969497511639105, "grad_norm": 0.7280771136283875, "learning_rate": 2.714328577812997e-06, "loss": 0.3257, "step": 9334 }, { "epoch": 2.9972708299887625, "grad_norm": 1.6487222909927368, "learning_rate": 2.7062568305295965e-06, "loss": 0.2733, "step": 9335 }, { "epoch": 2.9975919088136136, "grad_norm": 0.6912175416946411, "learning_rate": 2.6981969381936978e-06, "loss": 0.2817, "step": 9336 }, { "epoch": 2.997912987638465, "grad_norm": 0.8025344610214233, "learning_rate": 2.690148901787337e-06, "loss": 0.327, "step": 9337 }, { "epoch": 2.9982340664633167, "grad_norm": 0.6244665384292603, "learning_rate": 2.6821127222911857e-06, "loss": 0.2552, "step": 9338 }, { "epoch": 2.9985551452881682, "grad_norm": 0.8349512219429016, "learning_rate": 2.6740884006843825e-06, "loss": 0.3398, "step": 9339 }, { "epoch": 2.99887622411302, "grad_norm": 0.9518395066261292, "learning_rate": 2.66607593794469e-06, "loss": 0.3202, "step": 9340 }, { "epoch": 2.9991973029378713, "grad_norm": 0.6690359115600586, "learning_rate": 2.6580753350484046e-06, "loss": 0.2847, "step": 9341 }, { "epoch": 2.999518381762723, "grad_norm": 0.6814465522766113, "learning_rate": 2.650086592970358e-06, "loss": 0.28, "step": 9342 }, { "epoch": 2.999839460587574, "grad_norm": 1.4310189485549927, "learning_rate": 2.6421097126839712e-06, "loss": 0.4217, "step": 9343 }, { "epoch": 3.0001605394124256, "grad_norm": 0.43879497051239014, "learning_rate": 2.6341446951612005e-06, "loss": 0.3707, "step": 9344 }, { "epoch": 3.000481618237277, "grad_norm": 0.40804460644721985, "learning_rate": 2.6261915413725578e-06, "loss": 0.4423, "step": 9345 }, { "epoch": 3.0008026970621287, "grad_norm": 0.38268548250198364, "learning_rate": 2.618250252287113e-06, "loss": 0.2687, "step": 9346 }, { "epoch": 3.00112377588698, "grad_norm": 0.2906617224216461, "learning_rate": 2.610320828872481e-06, "loss": 0.1438, "step": 9347 }, { "epoch": 3.0014448547118318, "grad_norm": 0.2549183964729309, "learning_rate": 2.6024032720948443e-06, "loss": 0.1358, "step": 9348 }, { "epoch": 3.0017659335366833, "grad_norm": 0.3139497637748718, "learning_rate": 2.59449758291892e-06, "loss": 0.1061, "step": 9349 }, { "epoch": 3.002087012361535, "grad_norm": 0.11320850253105164, "learning_rate": 2.5866037623080153e-06, "loss": 0.0611, "step": 9350 }, { "epoch": 3.0024080911863864, "grad_norm": 0.5909608602523804, "learning_rate": 2.5787218112239496e-06, "loss": 0.3883, "step": 9351 }, { "epoch": 3.002729170011238, "grad_norm": 0.5687462687492371, "learning_rate": 2.570851730627122e-06, "loss": 0.2654, "step": 9352 }, { "epoch": 3.003050248836089, "grad_norm": 0.6400092840194702, "learning_rate": 2.5629935214764865e-06, "loss": 0.302, "step": 9353 }, { "epoch": 3.0033713276609406, "grad_norm": 0.46738189458847046, "learning_rate": 2.5551471847295228e-06, "loss": 0.1982, "step": 9354 }, { "epoch": 3.003692406485792, "grad_norm": 0.5949174165725708, "learning_rate": 2.5473127213422763e-06, "loss": 0.2598, "step": 9355 }, { "epoch": 3.0040134853106437, "grad_norm": 1.0250693559646606, "learning_rate": 2.5394901322694067e-06, "loss": 0.2021, "step": 9356 }, { "epoch": 3.0043345641354953, "grad_norm": 0.5591093897819519, "learning_rate": 2.531679418464006e-06, "loss": 0.2286, "step": 9357 }, { "epoch": 3.004655642960347, "grad_norm": 0.5650901198387146, "learning_rate": 2.5238805808778242e-06, "loss": 0.2416, "step": 9358 }, { "epoch": 3.0049767217851984, "grad_norm": 0.5326758027076721, "learning_rate": 2.516093620461124e-06, "loss": 0.2221, "step": 9359 }, { "epoch": 3.00529780061005, "grad_norm": 0.6107519865036011, "learning_rate": 2.508318538162702e-06, "loss": 0.2568, "step": 9360 }, { "epoch": 3.0056188794349015, "grad_norm": 0.5060980319976807, "learning_rate": 2.5005553349299547e-06, "loss": 0.228, "step": 9361 }, { "epoch": 3.0059399582597526, "grad_norm": 0.5858919024467468, "learning_rate": 2.4928040117087827e-06, "loss": 0.2552, "step": 9362 }, { "epoch": 3.006261037084604, "grad_norm": 0.5963205099105835, "learning_rate": 2.4850645694436736e-06, "loss": 0.2336, "step": 9363 }, { "epoch": 3.0065821159094557, "grad_norm": 0.5636211633682251, "learning_rate": 2.4773370090776626e-06, "loss": 0.1921, "step": 9364 }, { "epoch": 3.0069031947343072, "grad_norm": 0.5564295649528503, "learning_rate": 2.4696213315523074e-06, "loss": 0.2321, "step": 9365 }, { "epoch": 3.007224273559159, "grad_norm": 0.5773066282272339, "learning_rate": 2.4619175378077565e-06, "loss": 0.2308, "step": 9366 }, { "epoch": 3.0075453523840103, "grad_norm": 0.6271586418151855, "learning_rate": 2.4542256287826914e-06, "loss": 0.2407, "step": 9367 }, { "epoch": 3.007866431208862, "grad_norm": 0.5038358569145203, "learning_rate": 2.446545605414341e-06, "loss": 0.2377, "step": 9368 }, { "epoch": 3.0081875100337134, "grad_norm": 0.6960586905479431, "learning_rate": 2.4388774686385007e-06, "loss": 0.2911, "step": 9369 }, { "epoch": 3.008508588858565, "grad_norm": 0.5753246545791626, "learning_rate": 2.4312212193895125e-06, "loss": 0.2706, "step": 9370 }, { "epoch": 3.008829667683416, "grad_norm": 0.6433031558990479, "learning_rate": 2.4235768586002517e-06, "loss": 0.2616, "step": 9371 }, { "epoch": 3.0091507465082676, "grad_norm": 0.615394115447998, "learning_rate": 2.415944387202174e-06, "loss": 0.2654, "step": 9372 }, { "epoch": 3.009471825333119, "grad_norm": 0.7627741694450378, "learning_rate": 2.4083238061252567e-06, "loss": 0.318, "step": 9373 }, { "epoch": 3.0097929041579707, "grad_norm": 0.6070566773414612, "learning_rate": 2.40071511629808e-06, "loss": 0.2499, "step": 9374 }, { "epoch": 3.0101139829828223, "grad_norm": 0.8189682364463806, "learning_rate": 2.3931183186477026e-06, "loss": 0.2997, "step": 9375 }, { "epoch": 3.010435061807674, "grad_norm": 0.6750493049621582, "learning_rate": 2.385533414099783e-06, "loss": 0.2817, "step": 9376 }, { "epoch": 3.0107561406325254, "grad_norm": 0.7114454507827759, "learning_rate": 2.3779604035785273e-06, "loss": 0.3113, "step": 9377 }, { "epoch": 3.011077219457377, "grad_norm": 0.6532655954360962, "learning_rate": 2.3703992880066638e-06, "loss": 0.2617, "step": 9378 }, { "epoch": 3.0113982982822285, "grad_norm": 0.44464078545570374, "learning_rate": 2.3628500683055222e-06, "loss": 0.2033, "step": 9379 }, { "epoch": 3.0117193771070796, "grad_norm": 0.6115095615386963, "learning_rate": 2.355312745394922e-06, "loss": 0.2694, "step": 9380 }, { "epoch": 3.012040455931931, "grad_norm": 0.3214268982410431, "learning_rate": 2.3477873201932734e-06, "loss": 0.1775, "step": 9381 }, { "epoch": 3.0123615347567827, "grad_norm": 0.6672443151473999, "learning_rate": 2.3402737936175425e-06, "loss": 0.2725, "step": 9382 }, { "epoch": 3.0126826135816343, "grad_norm": 0.5313708782196045, "learning_rate": 2.332772166583208e-06, "loss": 0.2661, "step": 9383 }, { "epoch": 3.013003692406486, "grad_norm": 0.7770663499832153, "learning_rate": 2.325282440004339e-06, "loss": 0.3296, "step": 9384 }, { "epoch": 3.0133247712313374, "grad_norm": 0.4733831584453583, "learning_rate": 2.3178046147935175e-06, "loss": 0.2151, "step": 9385 }, { "epoch": 3.013645850056189, "grad_norm": 0.5805613398551941, "learning_rate": 2.3103386918619018e-06, "loss": 0.2395, "step": 9386 }, { "epoch": 3.0139669288810405, "grad_norm": 0.6747123003005981, "learning_rate": 2.3028846721191876e-06, "loss": 0.3138, "step": 9387 }, { "epoch": 3.014288007705892, "grad_norm": 0.4454551637172699, "learning_rate": 2.295442556473637e-06, "loss": 0.231, "step": 9388 }, { "epoch": 3.014609086530743, "grad_norm": 0.5723088979721069, "learning_rate": 2.288012345832047e-06, "loss": 0.2234, "step": 9389 }, { "epoch": 3.0149301653555947, "grad_norm": 0.5944519639015198, "learning_rate": 2.2805940410997484e-06, "loss": 0.2433, "step": 9390 }, { "epoch": 3.015251244180446, "grad_norm": 0.32701706886291504, "learning_rate": 2.273187643180652e-06, "loss": 0.2194, "step": 9391 }, { "epoch": 3.0155723230052978, "grad_norm": 0.37137144804000854, "learning_rate": 2.2657931529772136e-06, "loss": 0.2192, "step": 9392 }, { "epoch": 3.0158934018301493, "grad_norm": 0.4273146092891693, "learning_rate": 2.2584105713904125e-06, "loss": 0.2554, "step": 9393 }, { "epoch": 3.016214480655001, "grad_norm": 0.357513964176178, "learning_rate": 2.2510398993198067e-06, "loss": 0.393, "step": 9394 }, { "epoch": 3.0165355594798524, "grad_norm": 0.5867639183998108, "learning_rate": 2.2436811376634893e-06, "loss": 0.7159, "step": 9395 }, { "epoch": 3.016856638304704, "grad_norm": 0.3277376890182495, "learning_rate": 2.2363342873180757e-06, "loss": 0.2004, "step": 9396 }, { "epoch": 3.0171777171295555, "grad_norm": 0.2729596793651581, "learning_rate": 2.2289993491788064e-06, "loss": 0.1336, "step": 9397 }, { "epoch": 3.0174987959544066, "grad_norm": 0.2100820392370224, "learning_rate": 2.2216763241393767e-06, "loss": 0.092, "step": 9398 }, { "epoch": 3.017819874779258, "grad_norm": 0.24260498583316803, "learning_rate": 2.2143652130921176e-06, "loss": 0.0919, "step": 9399 }, { "epoch": 3.0181409536041097, "grad_norm": 0.4891859292984009, "learning_rate": 2.2070660169278166e-06, "loss": 0.2427, "step": 9400 }, { "epoch": 3.0184620324289613, "grad_norm": 0.6580247282981873, "learning_rate": 2.1997787365358958e-06, "loss": 0.3484, "step": 9401 }, { "epoch": 3.018783111253813, "grad_norm": 0.6362596154212952, "learning_rate": 2.192503372804278e-06, "loss": 0.2941, "step": 9402 }, { "epoch": 3.0191041900786644, "grad_norm": 0.7072274088859558, "learning_rate": 2.1852399266194314e-06, "loss": 0.3115, "step": 9403 }, { "epoch": 3.019425268903516, "grad_norm": 0.5164889097213745, "learning_rate": 2.177988398866415e-06, "loss": 0.2195, "step": 9404 }, { "epoch": 3.0197463477283675, "grad_norm": 0.5418469905853271, "learning_rate": 2.1707487904287672e-06, "loss": 0.2464, "step": 9405 }, { "epoch": 3.020067426553219, "grad_norm": 0.6452725529670715, "learning_rate": 2.163521102188648e-06, "loss": 0.2784, "step": 9406 }, { "epoch": 3.02038850537807, "grad_norm": 0.5608880519866943, "learning_rate": 2.156305335026698e-06, "loss": 0.2553, "step": 9407 }, { "epoch": 3.0207095842029217, "grad_norm": 0.4648946225643158, "learning_rate": 2.1491014898221582e-06, "loss": 0.1944, "step": 9408 }, { "epoch": 3.0210306630277732, "grad_norm": 0.5879479050636292, "learning_rate": 2.141909567452793e-06, "loss": 0.2373, "step": 9409 }, { "epoch": 3.021351741852625, "grad_norm": 0.6517860889434814, "learning_rate": 2.134729568794902e-06, "loss": 0.2796, "step": 9410 }, { "epoch": 3.0216728206774763, "grad_norm": 1.0598267316818237, "learning_rate": 2.1275614947233624e-06, "loss": 0.249, "step": 9411 }, { "epoch": 3.021993899502328, "grad_norm": 0.6839745044708252, "learning_rate": 2.120405346111576e-06, "loss": 0.269, "step": 9412 }, { "epoch": 3.0223149783271794, "grad_norm": 0.6479743123054504, "learning_rate": 2.1132611238315003e-06, "loss": 0.2993, "step": 9413 }, { "epoch": 3.022636057152031, "grad_norm": 0.5197052955627441, "learning_rate": 2.1061288287536285e-06, "loss": 0.2089, "step": 9414 }, { "epoch": 3.0229571359768825, "grad_norm": 0.5962384939193726, "learning_rate": 2.0990084617470206e-06, "loss": 0.2327, "step": 9415 }, { "epoch": 3.0232782148017336, "grad_norm": 1.2533619403839111, "learning_rate": 2.0919000236792607e-06, "loss": 0.3158, "step": 9416 }, { "epoch": 3.023599293626585, "grad_norm": 0.5709370374679565, "learning_rate": 2.084803515416511e-06, "loss": 0.2074, "step": 9417 }, { "epoch": 3.0239203724514367, "grad_norm": 0.8780946731567383, "learning_rate": 2.0777189378234143e-06, "loss": 0.356, "step": 9418 }, { "epoch": 3.0242414512762883, "grad_norm": 0.6024912595748901, "learning_rate": 2.0706462917632673e-06, "loss": 0.2511, "step": 9419 }, { "epoch": 3.02456253010114, "grad_norm": 0.6366291046142578, "learning_rate": 2.0635855780978044e-06, "loss": 0.2302, "step": 9420 }, { "epoch": 3.0248836089259914, "grad_norm": 0.6438530683517456, "learning_rate": 2.0565367976873584e-06, "loss": 0.2584, "step": 9421 }, { "epoch": 3.025204687750843, "grad_norm": 0.5729436874389648, "learning_rate": 2.049499951390832e-06, "loss": 0.237, "step": 9422 }, { "epoch": 3.0255257665756945, "grad_norm": 0.7282575964927673, "learning_rate": 2.0424750400655947e-06, "loss": 0.2925, "step": 9423 }, { "epoch": 3.025846845400546, "grad_norm": 0.5626926422119141, "learning_rate": 2.0354620645676504e-06, "loss": 0.2095, "step": 9424 }, { "epoch": 3.026167924225397, "grad_norm": 0.5786295533180237, "learning_rate": 2.0284610257514937e-06, "loss": 0.2728, "step": 9425 }, { "epoch": 3.0264890030502487, "grad_norm": 0.48927032947540283, "learning_rate": 2.021471924470175e-06, "loss": 0.201, "step": 9426 }, { "epoch": 3.0268100818751003, "grad_norm": 0.5696834921836853, "learning_rate": 2.014494761575314e-06, "loss": 0.2512, "step": 9427 }, { "epoch": 3.027131160699952, "grad_norm": 0.32591769099235535, "learning_rate": 2.0075295379170412e-06, "loss": 0.1816, "step": 9428 }, { "epoch": 3.0274522395248034, "grad_norm": 0.4733005464076996, "learning_rate": 2.0005762543440445e-06, "loss": 0.2298, "step": 9429 }, { "epoch": 3.027773318349655, "grad_norm": 0.42047736048698425, "learning_rate": 1.993634911703579e-06, "loss": 0.1959, "step": 9430 }, { "epoch": 3.0280943971745065, "grad_norm": 0.5498672127723694, "learning_rate": 1.986705510841402e-06, "loss": 0.2253, "step": 9431 }, { "epoch": 3.028415475999358, "grad_norm": 0.5200008153915405, "learning_rate": 1.979788052601861e-06, "loss": 0.2527, "step": 9432 }, { "epoch": 3.0287365548242096, "grad_norm": 0.5989855527877808, "learning_rate": 1.9728825378278246e-06, "loss": 0.2299, "step": 9433 }, { "epoch": 3.0290576336490607, "grad_norm": 0.48008453845977783, "learning_rate": 1.965988967360688e-06, "loss": 0.2221, "step": 9434 }, { "epoch": 3.029378712473912, "grad_norm": 0.6560676097869873, "learning_rate": 1.9591073420404337e-06, "loss": 0.2343, "step": 9435 }, { "epoch": 3.0296997912987638, "grad_norm": 0.5400110483169556, "learning_rate": 1.9522376627055583e-06, "loss": 0.2449, "step": 9436 }, { "epoch": 3.0300208701236153, "grad_norm": 0.4094994366168976, "learning_rate": 1.945379930193125e-06, "loss": 0.2177, "step": 9437 }, { "epoch": 3.030341948948467, "grad_norm": 0.37665337324142456, "learning_rate": 1.9385341453386995e-06, "loss": 0.2032, "step": 9438 }, { "epoch": 3.0306630277733184, "grad_norm": 0.6781096458435059, "learning_rate": 1.931700308976436e-06, "loss": 0.2793, "step": 9439 }, { "epoch": 3.03098410659817, "grad_norm": 0.43711480498313904, "learning_rate": 1.924878421939036e-06, "loss": 0.2372, "step": 9440 }, { "epoch": 3.0313051854230215, "grad_norm": 0.8434777855873108, "learning_rate": 1.918068485057689e-06, "loss": 0.2299, "step": 9441 }, { "epoch": 3.031626264247873, "grad_norm": 0.38669654726982117, "learning_rate": 1.911270499162199e-06, "loss": 0.2403, "step": 9442 }, { "epoch": 3.031947343072724, "grad_norm": 0.29554906487464905, "learning_rate": 1.904484465080847e-06, "loss": 0.2357, "step": 9443 }, { "epoch": 3.0322684218975757, "grad_norm": 0.5543558597564697, "learning_rate": 1.8977103836405053e-06, "loss": 0.3659, "step": 9444 }, { "epoch": 3.0325895007224273, "grad_norm": 0.2872603237628937, "learning_rate": 1.8909482556666024e-06, "loss": 0.2276, "step": 9445 }, { "epoch": 3.032910579547279, "grad_norm": 0.3142801523208618, "learning_rate": 1.8841980819830351e-06, "loss": 0.1512, "step": 9446 }, { "epoch": 3.0332316583721304, "grad_norm": 0.301505446434021, "learning_rate": 1.8774598634123232e-06, "loss": 0.1045, "step": 9447 }, { "epoch": 3.033552737196982, "grad_norm": 0.5997371077537537, "learning_rate": 1.8707336007754873e-06, "loss": 0.2805, "step": 9448 }, { "epoch": 3.0338738160218335, "grad_norm": 0.7243043184280396, "learning_rate": 1.8640192948921054e-06, "loss": 0.3329, "step": 9449 }, { "epoch": 3.034194894846685, "grad_norm": 0.6885005831718445, "learning_rate": 1.8573169465802898e-06, "loss": 0.3327, "step": 9450 }, { "epoch": 3.034515973671536, "grad_norm": 0.6366089582443237, "learning_rate": 1.8506265566567094e-06, "loss": 0.2606, "step": 9451 }, { "epoch": 3.0348370524963877, "grad_norm": 0.5336918234825134, "learning_rate": 1.8439481259365675e-06, "loss": 0.2271, "step": 9452 }, { "epoch": 3.0351581313212392, "grad_norm": 0.4865361154079437, "learning_rate": 1.8372816552336026e-06, "loss": 0.1939, "step": 9453 }, { "epoch": 3.035479210146091, "grad_norm": 0.6160674095153809, "learning_rate": 1.8306271453601199e-06, "loss": 0.2288, "step": 9454 }, { "epoch": 3.0358002889709423, "grad_norm": 0.49999356269836426, "learning_rate": 1.8239845971269266e-06, "loss": 0.1962, "step": 9455 }, { "epoch": 3.036121367795794, "grad_norm": 0.8584988713264465, "learning_rate": 1.8173540113434194e-06, "loss": 0.3474, "step": 9456 }, { "epoch": 3.0364424466206454, "grad_norm": 0.5525678396224976, "learning_rate": 1.8107353888175083e-06, "loss": 0.2213, "step": 9457 }, { "epoch": 3.036763525445497, "grad_norm": 0.5510037541389465, "learning_rate": 1.8041287303556364e-06, "loss": 0.2106, "step": 9458 }, { "epoch": 3.0370846042703485, "grad_norm": 0.8154999613761902, "learning_rate": 1.7975340367628268e-06, "loss": 0.2556, "step": 9459 }, { "epoch": 3.0374056830951996, "grad_norm": 0.7554807662963867, "learning_rate": 1.7909513088426255e-06, "loss": 0.3069, "step": 9460 }, { "epoch": 3.037726761920051, "grad_norm": 0.6828257441520691, "learning_rate": 1.7843805473970798e-06, "loss": 0.277, "step": 9461 }, { "epoch": 3.0380478407449028, "grad_norm": 1.2714136838912964, "learning_rate": 1.7778217532268714e-06, "loss": 0.2337, "step": 9462 }, { "epoch": 3.0383689195697543, "grad_norm": 1.116326093673706, "learning_rate": 1.771274927131139e-06, "loss": 0.3795, "step": 9463 }, { "epoch": 3.038689998394606, "grad_norm": 0.7132155299186707, "learning_rate": 1.7647400699075888e-06, "loss": 0.2681, "step": 9464 }, { "epoch": 3.0390110772194574, "grad_norm": 0.750069797039032, "learning_rate": 1.7582171823524951e-06, "loss": 0.2974, "step": 9465 }, { "epoch": 3.039332156044309, "grad_norm": 0.6962805986404419, "learning_rate": 1.751706265260611e-06, "loss": 0.246, "step": 9466 }, { "epoch": 3.0396532348691605, "grad_norm": 0.6339988708496094, "learning_rate": 1.7452073194253238e-06, "loss": 0.2297, "step": 9467 }, { "epoch": 3.039974313694012, "grad_norm": 0.7461841702461243, "learning_rate": 1.7387203456384782e-06, "loss": 0.3137, "step": 9468 }, { "epoch": 3.040295392518863, "grad_norm": 0.6014420986175537, "learning_rate": 1.7322453446905085e-06, "loss": 0.2633, "step": 9469 }, { "epoch": 3.0406164713437147, "grad_norm": 1.0465008020401, "learning_rate": 1.7257823173703503e-06, "loss": 0.352, "step": 9470 }, { "epoch": 3.0409375501685663, "grad_norm": 0.4762720465660095, "learning_rate": 1.719331264465529e-06, "loss": 0.224, "step": 9471 }, { "epoch": 3.041258628993418, "grad_norm": 0.6413834095001221, "learning_rate": 1.712892186762083e-06, "loss": 0.25, "step": 9472 }, { "epoch": 3.0415797078182694, "grad_norm": 0.46483296155929565, "learning_rate": 1.706465085044584e-06, "loss": 0.2117, "step": 9473 }, { "epoch": 3.041900786643121, "grad_norm": 0.49583905935287476, "learning_rate": 1.7000499600961505e-06, "loss": 0.198, "step": 9474 }, { "epoch": 3.0422218654679725, "grad_norm": 0.6456876993179321, "learning_rate": 1.6936468126984572e-06, "loss": 0.2849, "step": 9475 }, { "epoch": 3.042542944292824, "grad_norm": 0.589216947555542, "learning_rate": 1.6872556436317022e-06, "loss": 0.228, "step": 9476 }, { "epoch": 3.0428640231176756, "grad_norm": 0.5428295135498047, "learning_rate": 1.680876453674629e-06, "loss": 0.2151, "step": 9477 }, { "epoch": 3.0431851019425267, "grad_norm": 0.5795981884002686, "learning_rate": 1.6745092436045494e-06, "loss": 0.2326, "step": 9478 }, { "epoch": 3.043506180767378, "grad_norm": 0.6216808557510376, "learning_rate": 1.6681540141972429e-06, "loss": 0.2701, "step": 9479 }, { "epoch": 3.0438272595922298, "grad_norm": 0.5757028460502625, "learning_rate": 1.661810766227112e-06, "loss": 0.2414, "step": 9480 }, { "epoch": 3.0441483384170813, "grad_norm": 0.731472373008728, "learning_rate": 1.6554795004670388e-06, "loss": 0.259, "step": 9481 }, { "epoch": 3.044469417241933, "grad_norm": 0.6411800384521484, "learning_rate": 1.6491602176884724e-06, "loss": 0.2675, "step": 9482 }, { "epoch": 3.0447904960667844, "grad_norm": 0.40002959966659546, "learning_rate": 1.6428529186614195e-06, "loss": 0.2122, "step": 9483 }, { "epoch": 3.045111574891636, "grad_norm": 0.9253550171852112, "learning_rate": 1.636557604154365e-06, "loss": 0.3639, "step": 9484 }, { "epoch": 3.0454326537164875, "grad_norm": 0.5021322965621948, "learning_rate": 1.6302742749344291e-06, "loss": 0.2332, "step": 9485 }, { "epoch": 3.045753732541339, "grad_norm": 0.3761097192764282, "learning_rate": 1.6240029317671658e-06, "loss": 0.2103, "step": 9486 }, { "epoch": 3.04607481136619, "grad_norm": 0.6944969892501831, "learning_rate": 1.6177435754167415e-06, "loss": 0.2706, "step": 9487 }, { "epoch": 3.0463958901910417, "grad_norm": 0.43862295150756836, "learning_rate": 1.611496206645835e-06, "loss": 0.2505, "step": 9488 }, { "epoch": 3.0467169690158933, "grad_norm": 0.36449775099754333, "learning_rate": 1.605260826215682e-06, "loss": 0.2377, "step": 9489 }, { "epoch": 3.047038047840745, "grad_norm": 0.4346637725830078, "learning_rate": 1.5990374348860305e-06, "loss": 0.2281, "step": 9490 }, { "epoch": 3.0473591266655964, "grad_norm": 0.623367190361023, "learning_rate": 1.5928260334151845e-06, "loss": 0.2755, "step": 9491 }, { "epoch": 3.047680205490448, "grad_norm": 0.8898089528083801, "learning_rate": 1.5866266225599834e-06, "loss": 0.2915, "step": 9492 }, { "epoch": 3.0480012843152995, "grad_norm": 0.3457051217556, "learning_rate": 1.580439203075812e-06, "loss": 0.2286, "step": 9493 }, { "epoch": 3.048322363140151, "grad_norm": 0.48671117424964905, "learning_rate": 1.574263775716578e-06, "loss": 0.3948, "step": 9494 }, { "epoch": 3.0486434419650026, "grad_norm": 0.5143343210220337, "learning_rate": 1.5681003412347572e-06, "loss": 0.4827, "step": 9495 }, { "epoch": 3.0489645207898537, "grad_norm": 0.37674856185913086, "learning_rate": 1.561948900381327e-06, "loss": 0.2705, "step": 9496 }, { "epoch": 3.0492855996147052, "grad_norm": 0.3917763829231262, "learning_rate": 1.555809453905821e-06, "loss": 0.2283, "step": 9497 }, { "epoch": 3.049606678439557, "grad_norm": 0.4415786564350128, "learning_rate": 1.5496820025563409e-06, "loss": 0.2775, "step": 9498 }, { "epoch": 3.0499277572644083, "grad_norm": 0.28412747383117676, "learning_rate": 1.543566547079467e-06, "loss": 0.134, "step": 9499 }, { "epoch": 3.05024883608926, "grad_norm": 0.32522836327552795, "learning_rate": 1.5374630882203588e-06, "loss": 0.1553, "step": 9500 }, { "epoch": 3.0505699149141114, "grad_norm": 0.37719276547431946, "learning_rate": 1.5313716267226996e-06, "loss": 0.1436, "step": 9501 }, { "epoch": 3.050890993738963, "grad_norm": 0.7100079655647278, "learning_rate": 1.5252921633287177e-06, "loss": 0.423, "step": 9502 }, { "epoch": 3.0512120725638145, "grad_norm": 0.6729628443717957, "learning_rate": 1.5192246987791981e-06, "loss": 0.2616, "step": 9503 }, { "epoch": 3.051533151388666, "grad_norm": 0.7652589082717896, "learning_rate": 1.5131692338134052e-06, "loss": 0.2771, "step": 9504 }, { "epoch": 3.051854230213517, "grad_norm": 0.6066986918449402, "learning_rate": 1.5071257691692153e-06, "loss": 0.2357, "step": 9505 }, { "epoch": 3.0521753090383688, "grad_norm": 0.580855131149292, "learning_rate": 1.501094305582984e-06, "loss": 0.2318, "step": 9506 }, { "epoch": 3.0524963878632203, "grad_norm": 0.5671026110649109, "learning_rate": 1.4950748437896234e-06, "loss": 0.211, "step": 9507 }, { "epoch": 3.052817466688072, "grad_norm": 0.6296615600585938, "learning_rate": 1.4890673845226133e-06, "loss": 0.2119, "step": 9508 }, { "epoch": 3.0531385455129234, "grad_norm": 0.6201547980308533, "learning_rate": 1.4830719285139127e-06, "loss": 0.2538, "step": 9509 }, { "epoch": 3.053459624337775, "grad_norm": 0.9474941492080688, "learning_rate": 1.4770884764940706e-06, "loss": 0.242, "step": 9510 }, { "epoch": 3.0537807031626265, "grad_norm": 0.6287795901298523, "learning_rate": 1.4711170291921484e-06, "loss": 0.2053, "step": 9511 }, { "epoch": 3.054101781987478, "grad_norm": 0.6485587358474731, "learning_rate": 1.4651575873357416e-06, "loss": 0.2242, "step": 9512 }, { "epoch": 3.0544228608123296, "grad_norm": 0.6445651054382324, "learning_rate": 1.4592101516509914e-06, "loss": 0.2214, "step": 9513 }, { "epoch": 3.0547439396371807, "grad_norm": 0.6522722244262695, "learning_rate": 1.4532747228625854e-06, "loss": 0.2488, "step": 9514 }, { "epoch": 3.0550650184620323, "grad_norm": 0.6885031461715698, "learning_rate": 1.4473513016937223e-06, "loss": 0.2459, "step": 9515 }, { "epoch": 3.055386097286884, "grad_norm": 0.6756393909454346, "learning_rate": 1.4414398888661695e-06, "loss": 0.2131, "step": 9516 }, { "epoch": 3.0557071761117354, "grad_norm": 0.5485737919807434, "learning_rate": 1.4355404851001952e-06, "loss": 0.2285, "step": 9517 }, { "epoch": 3.056028254936587, "grad_norm": 0.3958723247051239, "learning_rate": 1.4296530911146466e-06, "loss": 0.1763, "step": 9518 }, { "epoch": 3.0563493337614385, "grad_norm": 0.5892900824546814, "learning_rate": 1.423777707626872e-06, "loss": 0.1842, "step": 9519 }, { "epoch": 3.05667041258629, "grad_norm": 0.6213316321372986, "learning_rate": 1.4179143353527547e-06, "loss": 0.26, "step": 9520 }, { "epoch": 3.0569914914111416, "grad_norm": 0.6566430330276489, "learning_rate": 1.412062975006767e-06, "loss": 0.2532, "step": 9521 }, { "epoch": 3.057312570235993, "grad_norm": 0.5408963561058044, "learning_rate": 1.4062236273018392e-06, "loss": 0.2406, "step": 9522 }, { "epoch": 3.0576336490608442, "grad_norm": 0.552483856678009, "learning_rate": 1.400396292949513e-06, "loss": 0.2385, "step": 9523 }, { "epoch": 3.057954727885696, "grad_norm": 0.6494653820991516, "learning_rate": 1.394580972659798e-06, "loss": 0.215, "step": 9524 }, { "epoch": 3.0582758067105473, "grad_norm": 0.5467716455459595, "learning_rate": 1.3887776671412943e-06, "loss": 0.2344, "step": 9525 }, { "epoch": 3.058596885535399, "grad_norm": 0.6003792881965637, "learning_rate": 1.3829863771011253e-06, "loss": 0.2197, "step": 9526 }, { "epoch": 3.0589179643602504, "grad_norm": 0.4577672481536865, "learning_rate": 1.377207103244904e-06, "loss": 0.2218, "step": 9527 }, { "epoch": 3.059239043185102, "grad_norm": 0.6770538687705994, "learning_rate": 1.3714398462768563e-06, "loss": 0.239, "step": 9528 }, { "epoch": 3.0595601220099535, "grad_norm": 0.5816624164581299, "learning_rate": 1.3656846068996976e-06, "loss": 0.2556, "step": 9529 }, { "epoch": 3.059881200834805, "grad_norm": 0.7855013012886047, "learning_rate": 1.359941385814667e-06, "loss": 0.2844, "step": 9530 }, { "epoch": 3.0602022796596566, "grad_norm": 0.26091650128364563, "learning_rate": 1.3542101837215826e-06, "loss": 0.1777, "step": 9531 }, { "epoch": 3.0605233584845077, "grad_norm": 0.46976807713508606, "learning_rate": 1.3484910013187524e-06, "loss": 0.2338, "step": 9532 }, { "epoch": 3.0608444373093593, "grad_norm": 0.46279844641685486, "learning_rate": 1.3427838393030633e-06, "loss": 0.2305, "step": 9533 }, { "epoch": 3.061165516134211, "grad_norm": 0.6863793730735779, "learning_rate": 1.3370886983698927e-06, "loss": 0.2443, "step": 9534 }, { "epoch": 3.0614865949590624, "grad_norm": 0.6161332130432129, "learning_rate": 1.3314055792131964e-06, "loss": 0.2431, "step": 9535 }, { "epoch": 3.061807673783914, "grad_norm": 0.4130920171737671, "learning_rate": 1.3257344825254315e-06, "loss": 0.2002, "step": 9536 }, { "epoch": 3.0621287526087655, "grad_norm": 0.7032944560050964, "learning_rate": 1.320075408997612e-06, "loss": 0.261, "step": 9537 }, { "epoch": 3.062449831433617, "grad_norm": 0.563331127166748, "learning_rate": 1.3144283593192752e-06, "loss": 0.2508, "step": 9538 }, { "epoch": 3.0627709102584686, "grad_norm": 0.2759096026420593, "learning_rate": 1.308793334178493e-06, "loss": 0.2001, "step": 9539 }, { "epoch": 3.06309198908332, "grad_norm": 0.3822631537914276, "learning_rate": 1.303170334261883e-06, "loss": 0.2032, "step": 9540 }, { "epoch": 3.0634130679081713, "grad_norm": 0.397834837436676, "learning_rate": 1.2975593602545965e-06, "loss": 0.2111, "step": 9541 }, { "epoch": 3.063734146733023, "grad_norm": 0.3753139078617096, "learning_rate": 1.2919604128402874e-06, "loss": 0.2241, "step": 9542 }, { "epoch": 3.0640552255578744, "grad_norm": 0.4659748673439026, "learning_rate": 1.2863734927012095e-06, "loss": 0.2573, "step": 9543 }, { "epoch": 3.064376304382726, "grad_norm": 0.35863274335861206, "learning_rate": 1.280798600518085e-06, "loss": 0.33, "step": 9544 }, { "epoch": 3.0646973832075775, "grad_norm": 0.48905041813850403, "learning_rate": 1.275235736970193e-06, "loss": 0.5968, "step": 9545 }, { "epoch": 3.065018462032429, "grad_norm": 0.3606785237789154, "learning_rate": 1.2696849027353796e-06, "loss": 0.2948, "step": 9546 }, { "epoch": 3.0653395408572806, "grad_norm": 0.32427453994750977, "learning_rate": 1.26414609848996e-06, "loss": 0.1369, "step": 9547 }, { "epoch": 3.065660619682132, "grad_norm": 0.3010817766189575, "learning_rate": 1.2586193249088608e-06, "loss": 0.146, "step": 9548 }, { "epoch": 3.0659816985069837, "grad_norm": 0.3775825500488281, "learning_rate": 1.2531045826654653e-06, "loss": 0.1995, "step": 9549 }, { "epoch": 3.0663027773318348, "grad_norm": 0.1418655514717102, "learning_rate": 1.2476018724317585e-06, "loss": 0.0591, "step": 9550 }, { "epoch": 3.0666238561566863, "grad_norm": 0.36233529448509216, "learning_rate": 1.2421111948782149e-06, "loss": 0.1744, "step": 9551 }, { "epoch": 3.066944934981538, "grad_norm": 0.6624466180801392, "learning_rate": 1.236632550673844e-06, "loss": 0.3614, "step": 9552 }, { "epoch": 3.0672660138063894, "grad_norm": 0.6916109919548035, "learning_rate": 1.231165940486234e-06, "loss": 0.2959, "step": 9553 }, { "epoch": 3.067587092631241, "grad_norm": 0.7930703163146973, "learning_rate": 1.225711364981441e-06, "loss": 0.3368, "step": 9554 }, { "epoch": 3.0679081714560925, "grad_norm": 0.636942982673645, "learning_rate": 1.2202688248241112e-06, "loss": 0.2359, "step": 9555 }, { "epoch": 3.068229250280944, "grad_norm": 0.5777634978294373, "learning_rate": 1.2148383206773916e-06, "loss": 0.2183, "step": 9556 }, { "epoch": 3.0685503291057956, "grad_norm": 0.6497059464454651, "learning_rate": 1.2094198532029755e-06, "loss": 0.2373, "step": 9557 }, { "epoch": 3.068871407930647, "grad_norm": 0.6847310066223145, "learning_rate": 1.20401342306109e-06, "loss": 0.2691, "step": 9558 }, { "epoch": 3.0691924867554983, "grad_norm": 0.6610705256462097, "learning_rate": 1.1986190309104861e-06, "loss": 0.2512, "step": 9559 }, { "epoch": 3.06951356558035, "grad_norm": 0.5450610518455505, "learning_rate": 1.193236677408449e-06, "loss": 0.1912, "step": 9560 }, { "epoch": 3.0698346444052014, "grad_norm": 0.5279379487037659, "learning_rate": 1.1878663632108322e-06, "loss": 0.1994, "step": 9561 }, { "epoch": 3.070155723230053, "grad_norm": 0.5520129799842834, "learning_rate": 1.1825080889719563e-06, "loss": 0.2005, "step": 9562 }, { "epoch": 3.0704768020549045, "grad_norm": 0.5607929825782776, "learning_rate": 1.1771618553447216e-06, "loss": 0.1891, "step": 9563 }, { "epoch": 3.070797880879756, "grad_norm": 0.9774504899978638, "learning_rate": 1.1718276629805625e-06, "loss": 0.3626, "step": 9564 }, { "epoch": 3.0711189597046076, "grad_norm": 0.7361037135124207, "learning_rate": 1.1665055125294033e-06, "loss": 0.2507, "step": 9565 }, { "epoch": 3.071440038529459, "grad_norm": 0.8793400526046753, "learning_rate": 1.16119540463977e-06, "loss": 0.2958, "step": 9566 }, { "epoch": 3.0717611173543107, "grad_norm": 0.8101871609687805, "learning_rate": 1.155897339958667e-06, "loss": 0.2885, "step": 9567 }, { "epoch": 3.072082196179162, "grad_norm": 1.022641658782959, "learning_rate": 1.1506113191316447e-06, "loss": 0.4274, "step": 9568 }, { "epoch": 3.0724032750040133, "grad_norm": 0.7252495288848877, "learning_rate": 1.1453373428027992e-06, "loss": 0.2599, "step": 9569 }, { "epoch": 3.072724353828865, "grad_norm": 0.4876370429992676, "learning_rate": 1.1400754116147271e-06, "loss": 0.2188, "step": 9570 }, { "epoch": 3.0730454326537164, "grad_norm": 0.6673651933670044, "learning_rate": 1.134825526208605e-06, "loss": 0.3235, "step": 9571 }, { "epoch": 3.073366511478568, "grad_norm": 0.7081831693649292, "learning_rate": 1.1295876872240873e-06, "loss": 0.2824, "step": 9572 }, { "epoch": 3.0736875903034195, "grad_norm": 0.8749157786369324, "learning_rate": 1.1243618952994195e-06, "loss": 0.3039, "step": 9573 }, { "epoch": 3.074008669128271, "grad_norm": 0.5778833031654358, "learning_rate": 1.1191481510713253e-06, "loss": 0.2355, "step": 9574 }, { "epoch": 3.0743297479531226, "grad_norm": 0.482704758644104, "learning_rate": 1.1139464551750856e-06, "loss": 0.2225, "step": 9575 }, { "epoch": 3.074650826777974, "grad_norm": 0.5122305750846863, "learning_rate": 1.1087568082445264e-06, "loss": 0.2251, "step": 9576 }, { "epoch": 3.0749719056028253, "grad_norm": 0.6916018128395081, "learning_rate": 1.103579210911976e-06, "loss": 0.2501, "step": 9577 }, { "epoch": 3.075292984427677, "grad_norm": 0.5738651156425476, "learning_rate": 1.0984136638083177e-06, "loss": 0.2508, "step": 9578 }, { "epoch": 3.0756140632525284, "grad_norm": 0.3557813763618469, "learning_rate": 1.0932601675629595e-06, "loss": 0.1737, "step": 9579 }, { "epoch": 3.07593514207738, "grad_norm": 0.7535400986671448, "learning_rate": 1.0881187228038215e-06, "loss": 0.2763, "step": 9580 }, { "epoch": 3.0762562209022315, "grad_norm": 0.4553007185459137, "learning_rate": 1.0829893301573913e-06, "loss": 0.226, "step": 9581 }, { "epoch": 3.076577299727083, "grad_norm": 0.3509460389614105, "learning_rate": 1.0778719902486689e-06, "loss": 0.1899, "step": 9582 }, { "epoch": 3.0768983785519346, "grad_norm": 0.7953051924705505, "learning_rate": 1.0727667037011668e-06, "loss": 0.3081, "step": 9583 }, { "epoch": 3.077219457376786, "grad_norm": 0.563300609588623, "learning_rate": 1.0676734711369762e-06, "loss": 0.2307, "step": 9584 }, { "epoch": 3.0775405362016377, "grad_norm": 0.5179484486579895, "learning_rate": 1.0625922931766785e-06, "loss": 0.2405, "step": 9585 }, { "epoch": 3.077861615026489, "grad_norm": 0.8609539270401001, "learning_rate": 1.0575231704393895e-06, "loss": 0.2623, "step": 9586 }, { "epoch": 3.0781826938513404, "grad_norm": 0.571172833442688, "learning_rate": 1.052466103542793e-06, "loss": 0.2477, "step": 9587 }, { "epoch": 3.078503772676192, "grad_norm": 0.396445095539093, "learning_rate": 1.0474210931030514e-06, "loss": 0.2204, "step": 9588 }, { "epoch": 3.0788248515010435, "grad_norm": 0.43316546082496643, "learning_rate": 1.0423881397349068e-06, "loss": 0.2267, "step": 9589 }, { "epoch": 3.079145930325895, "grad_norm": 0.4050913453102112, "learning_rate": 1.0373672440515902e-06, "loss": 0.221, "step": 9590 }, { "epoch": 3.0794670091507466, "grad_norm": 0.2422318160533905, "learning_rate": 1.0323584066648795e-06, "loss": 0.2087, "step": 9591 }, { "epoch": 3.079788087975598, "grad_norm": 0.34273797273635864, "learning_rate": 1.0273616281851083e-06, "loss": 0.2479, "step": 9592 }, { "epoch": 3.0801091668004497, "grad_norm": 0.35393643379211426, "learning_rate": 1.0223769092211012e-06, "loss": 0.2539, "step": 9593 }, { "epoch": 3.080430245625301, "grad_norm": 0.4257902503013611, "learning_rate": 1.0174042503802493e-06, "loss": 0.4868, "step": 9594 }, { "epoch": 3.0807513244501523, "grad_norm": 0.4722660779953003, "learning_rate": 1.0124436522684243e-06, "loss": 0.4076, "step": 9595 }, { "epoch": 3.081072403275004, "grad_norm": 0.2358543872833252, "learning_rate": 1.007495115490087e-06, "loss": 0.0826, "step": 9596 }, { "epoch": 3.0813934820998554, "grad_norm": 0.2644473612308502, "learning_rate": 1.002558640648199e-06, "loss": 0.102, "step": 9597 }, { "epoch": 3.081714560924707, "grad_norm": 0.3907872438430786, "learning_rate": 9.976342283442463e-07, "loss": 0.1718, "step": 9598 }, { "epoch": 3.0820356397495585, "grad_norm": 0.35824838280677795, "learning_rate": 9.927218791782599e-07, "loss": 0.1511, "step": 9599 }, { "epoch": 3.08235671857441, "grad_norm": 0.6002468466758728, "learning_rate": 9.878215937487834e-07, "loss": 0.3288, "step": 9600 }, { "epoch": 3.0826777973992616, "grad_norm": 0.827461302280426, "learning_rate": 9.829333726529056e-07, "loss": 0.3523, "step": 9601 }, { "epoch": 3.082998876224113, "grad_norm": 0.5873133540153503, "learning_rate": 9.78057216486261e-07, "loss": 0.2328, "step": 9602 }, { "epoch": 3.0833199550489647, "grad_norm": 0.5725942254066467, "learning_rate": 9.731931258429638e-07, "loss": 0.2127, "step": 9603 }, { "epoch": 3.083641033873816, "grad_norm": 0.584507405757904, "learning_rate": 9.683411013157174e-07, "loss": 0.1985, "step": 9604 }, { "epoch": 3.0839621126986674, "grad_norm": 0.7739067077636719, "learning_rate": 9.635011434957152e-07, "loss": 0.277, "step": 9605 }, { "epoch": 3.084283191523519, "grad_norm": 0.5025599598884583, "learning_rate": 9.58673252972675e-07, "loss": 0.1996, "step": 9606 }, { "epoch": 3.0846042703483705, "grad_norm": 0.6031046509742737, "learning_rate": 9.538574303348813e-07, "loss": 0.2324, "step": 9607 }, { "epoch": 3.084925349173222, "grad_norm": 0.8453519344329834, "learning_rate": 9.490536761691204e-07, "loss": 0.2988, "step": 9608 }, { "epoch": 3.0852464279980736, "grad_norm": 0.5942166447639465, "learning_rate": 9.442619910607131e-07, "loss": 0.2195, "step": 9609 }, { "epoch": 3.085567506822925, "grad_norm": 0.5165850520133972, "learning_rate": 9.394823755935145e-07, "loss": 0.2168, "step": 9610 }, { "epoch": 3.0858885856477767, "grad_norm": 0.4023417532444, "learning_rate": 9.347148303499142e-07, "loss": 0.1666, "step": 9611 }, { "epoch": 3.0862096644726282, "grad_norm": 0.44202151894569397, "learning_rate": 9.299593559108033e-07, "loss": 0.1773, "step": 9612 }, { "epoch": 3.0865307432974793, "grad_norm": 0.6434639096260071, "learning_rate": 9.252159528556403e-07, "loss": 0.2229, "step": 9613 }, { "epoch": 3.086851822122331, "grad_norm": 0.5662235021591187, "learning_rate": 9.204846217623853e-07, "loss": 0.2579, "step": 9614 }, { "epoch": 3.0871729009471824, "grad_norm": 0.7555933594703674, "learning_rate": 9.157653632075436e-07, "loss": 0.2733, "step": 9615 }, { "epoch": 3.087493979772034, "grad_norm": 0.5656288862228394, "learning_rate": 9.110581777661331e-07, "loss": 0.2362, "step": 9616 }, { "epoch": 3.0878150585968855, "grad_norm": 0.6788967251777649, "learning_rate": 9.063630660117173e-07, "loss": 0.2549, "step": 9617 }, { "epoch": 3.088136137421737, "grad_norm": 0.45415976643562317, "learning_rate": 9.016800285163718e-07, "loss": 0.1989, "step": 9618 }, { "epoch": 3.0884572162465886, "grad_norm": 0.4443502128124237, "learning_rate": 8.970090658507291e-07, "loss": 0.1886, "step": 9619 }, { "epoch": 3.08877829507144, "grad_norm": 0.5720863938331604, "learning_rate": 8.923501785839117e-07, "loss": 0.2065, "step": 9620 }, { "epoch": 3.0890993738962917, "grad_norm": 0.7446249723434448, "learning_rate": 8.877033672835988e-07, "loss": 0.3329, "step": 9621 }, { "epoch": 3.089420452721143, "grad_norm": 0.7151133418083191, "learning_rate": 8.830686325160042e-07, "loss": 0.3169, "step": 9622 }, { "epoch": 3.0897415315459944, "grad_norm": 0.5628482699394226, "learning_rate": 8.784459748458318e-07, "loss": 0.2448, "step": 9623 }, { "epoch": 3.090062610370846, "grad_norm": 0.7645835280418396, "learning_rate": 8.73835394836342e-07, "loss": 0.2118, "step": 9624 }, { "epoch": 3.0903836891956975, "grad_norm": 0.9074884653091431, "learning_rate": 8.692368930493521e-07, "loss": 0.2708, "step": 9625 }, { "epoch": 3.090704768020549, "grad_norm": 0.8181541562080383, "learning_rate": 8.646504700451252e-07, "loss": 0.2557, "step": 9626 }, { "epoch": 3.0910258468454006, "grad_norm": 0.39952725172042847, "learning_rate": 8.600761263825474e-07, "loss": 0.1937, "step": 9627 }, { "epoch": 3.091346925670252, "grad_norm": 0.505699634552002, "learning_rate": 8.555138626189618e-07, "loss": 0.2237, "step": 9628 }, { "epoch": 3.0916680044951037, "grad_norm": 0.8317124843597412, "learning_rate": 8.509636793102682e-07, "loss": 0.316, "step": 9629 }, { "epoch": 3.0919890833199553, "grad_norm": 0.4344399571418762, "learning_rate": 8.46425577010912e-07, "loss": 0.1989, "step": 9630 }, { "epoch": 3.0923101621448064, "grad_norm": 0.7013100385665894, "learning_rate": 8.418995562738285e-07, "loss": 0.2483, "step": 9631 }, { "epoch": 3.092631240969658, "grad_norm": 0.40041857957839966, "learning_rate": 8.373856176505101e-07, "loss": 0.2176, "step": 9632 }, { "epoch": 3.0929523197945095, "grad_norm": 0.4556305408477783, "learning_rate": 8.328837616909613e-07, "loss": 0.2162, "step": 9633 }, { "epoch": 3.093273398619361, "grad_norm": 2.067694664001465, "learning_rate": 8.283939889437209e-07, "loss": 0.2219, "step": 9634 }, { "epoch": 3.0935944774442126, "grad_norm": 0.48048630356788635, "learning_rate": 8.239162999558403e-07, "loss": 0.2146, "step": 9635 }, { "epoch": 3.093915556269064, "grad_norm": 0.8311915993690491, "learning_rate": 8.194506952729386e-07, "loss": 0.3099, "step": 9636 }, { "epoch": 3.0942366350939157, "grad_norm": 0.5806686282157898, "learning_rate": 8.14997175439125e-07, "loss": 0.2564, "step": 9637 }, { "epoch": 3.094557713918767, "grad_norm": 0.674216628074646, "learning_rate": 8.105557409970432e-07, "loss": 0.254, "step": 9638 }, { "epoch": 3.0948787927436188, "grad_norm": 0.826112687587738, "learning_rate": 8.061263924878604e-07, "loss": 0.2399, "step": 9639 }, { "epoch": 3.09519987156847, "grad_norm": 0.5507524013519287, "learning_rate": 8.017091304513003e-07, "loss": 0.257, "step": 9640 }, { "epoch": 3.0955209503933214, "grad_norm": 0.3682684302330017, "learning_rate": 7.973039554255768e-07, "loss": 0.216, "step": 9641 }, { "epoch": 3.095842029218173, "grad_norm": 0.49590498208999634, "learning_rate": 7.929108679474607e-07, "loss": 0.2547, "step": 9642 }, { "epoch": 3.0961631080430245, "grad_norm": 0.3100004494190216, "learning_rate": 7.885298685522235e-07, "loss": 0.2364, "step": 9643 }, { "epoch": 3.096484186867876, "grad_norm": 0.41891974210739136, "learning_rate": 7.841609577736719e-07, "loss": 0.404, "step": 9644 }, { "epoch": 3.0968052656927276, "grad_norm": 0.5358453989028931, "learning_rate": 7.798041361441688e-07, "loss": 0.6396, "step": 9645 }, { "epoch": 3.097126344517579, "grad_norm": 0.5173894166946411, "learning_rate": 7.754594041945562e-07, "loss": 0.5229, "step": 9646 }, { "epoch": 3.0974474233424307, "grad_norm": 0.4027728736400604, "learning_rate": 7.711267624542329e-07, "loss": 0.2377, "step": 9647 }, { "epoch": 3.0977685021672823, "grad_norm": 0.3841489851474762, "learning_rate": 7.668062114511321e-07, "loss": 0.214, "step": 9648 }, { "epoch": 3.0980895809921334, "grad_norm": 0.29058584570884705, "learning_rate": 7.624977517116772e-07, "loss": 0.1148, "step": 9649 }, { "epoch": 3.098410659816985, "grad_norm": 0.2613859474658966, "learning_rate": 7.582013837608592e-07, "loss": 0.0923, "step": 9650 }, { "epoch": 3.0987317386418365, "grad_norm": 0.5930027365684509, "learning_rate": 7.539171081221596e-07, "loss": 0.2433, "step": 9651 }, { "epoch": 3.099052817466688, "grad_norm": 0.6712236404418945, "learning_rate": 7.496449253176274e-07, "loss": 0.2822, "step": 9652 }, { "epoch": 3.0993738962915396, "grad_norm": 0.6282926797866821, "learning_rate": 7.453848358678017e-07, "loss": 0.2422, "step": 9653 }, { "epoch": 3.099694975116391, "grad_norm": 0.8251811265945435, "learning_rate": 7.411368402917563e-07, "loss": 0.2839, "step": 9654 }, { "epoch": 3.1000160539412427, "grad_norm": 0.5989024043083191, "learning_rate": 7.369009391070992e-07, "loss": 0.2299, "step": 9655 }, { "epoch": 3.1003371327660942, "grad_norm": 0.42122694849967957, "learning_rate": 7.326771328299731e-07, "loss": 0.1556, "step": 9656 }, { "epoch": 3.100658211590946, "grad_norm": 0.7038974761962891, "learning_rate": 7.284654219750331e-07, "loss": 0.2316, "step": 9657 }, { "epoch": 3.100979290415797, "grad_norm": 0.5096376538276672, "learning_rate": 7.242658070554464e-07, "loss": 0.2069, "step": 9658 }, { "epoch": 3.1013003692406484, "grad_norm": 0.8389344811439514, "learning_rate": 7.200782885829482e-07, "loss": 0.2828, "step": 9659 }, { "epoch": 3.1016214480655, "grad_norm": 0.551834762096405, "learning_rate": 7.159028670677526e-07, "loss": 0.1995, "step": 9660 }, { "epoch": 3.1019425268903515, "grad_norm": 0.7695516347885132, "learning_rate": 7.117395430186414e-07, "loss": 0.2759, "step": 9661 }, { "epoch": 3.102263605715203, "grad_norm": 0.6411928534507751, "learning_rate": 7.075883169428754e-07, "loss": 0.2384, "step": 9662 }, { "epoch": 3.1025846845400546, "grad_norm": 0.8904174566268921, "learning_rate": 7.034491893463058e-07, "loss": 0.286, "step": 9663 }, { "epoch": 3.102905763364906, "grad_norm": 0.7250691056251526, "learning_rate": 6.9932216073324e-07, "loss": 0.2928, "step": 9664 }, { "epoch": 3.1032268421897578, "grad_norm": 0.5427330136299133, "learning_rate": 6.952072316065761e-07, "loss": 0.2105, "step": 9665 }, { "epoch": 3.1035479210146093, "grad_norm": 0.7631176710128784, "learning_rate": 6.911044024676683e-07, "loss": 0.2677, "step": 9666 }, { "epoch": 3.1038689998394604, "grad_norm": 0.6781275272369385, "learning_rate": 6.870136738164612e-07, "loss": 0.2665, "step": 9667 }, { "epoch": 3.104190078664312, "grad_norm": 0.7830384969711304, "learning_rate": 6.829350461514006e-07, "loss": 0.2469, "step": 9668 }, { "epoch": 3.1045111574891635, "grad_norm": 0.836956799030304, "learning_rate": 6.788685199694222e-07, "loss": 0.2681, "step": 9669 }, { "epoch": 3.104832236314015, "grad_norm": 0.6083877086639404, "learning_rate": 6.748140957660631e-07, "loss": 0.2515, "step": 9670 }, { "epoch": 3.1051533151388666, "grad_norm": 0.5534031987190247, "learning_rate": 6.707717740353059e-07, "loss": 0.2078, "step": 9671 }, { "epoch": 3.105474393963718, "grad_norm": 0.42929285764694214, "learning_rate": 6.66741555269712e-07, "loss": 0.2037, "step": 9672 }, { "epoch": 3.1057954727885697, "grad_norm": 0.559162437915802, "learning_rate": 6.627234399603555e-07, "loss": 0.1936, "step": 9673 }, { "epoch": 3.1061165516134213, "grad_norm": 0.687892735004425, "learning_rate": 6.587174285968223e-07, "loss": 0.2577, "step": 9674 }, { "epoch": 3.106437630438273, "grad_norm": 0.499457448720932, "learning_rate": 6.547235216672443e-07, "loss": 0.211, "step": 9675 }, { "epoch": 3.106758709263124, "grad_norm": 0.4725257456302643, "learning_rate": 6.507417196582544e-07, "loss": 0.2347, "step": 9676 }, { "epoch": 3.1070797880879755, "grad_norm": 0.48947420716285706, "learning_rate": 6.4677202305502e-07, "loss": 0.1987, "step": 9677 }, { "epoch": 3.107400866912827, "grad_norm": 0.6859459280967712, "learning_rate": 6.428144323412544e-07, "loss": 0.2539, "step": 9678 }, { "epoch": 3.1077219457376786, "grad_norm": 0.5311869978904724, "learning_rate": 6.388689479991605e-07, "loss": 0.226, "step": 9679 }, { "epoch": 3.10804302456253, "grad_norm": 0.668393611907959, "learning_rate": 6.349355705094984e-07, "loss": 0.2531, "step": 9680 }, { "epoch": 3.1083641033873817, "grad_norm": 0.5925018787384033, "learning_rate": 6.310143003515179e-07, "loss": 0.2251, "step": 9681 }, { "epoch": 3.1086851822122332, "grad_norm": 0.76288902759552, "learning_rate": 6.271051380030368e-07, "loss": 0.2765, "step": 9682 }, { "epoch": 3.1090062610370848, "grad_norm": 0.8070435523986816, "learning_rate": 6.232080839403631e-07, "loss": 0.3033, "step": 9683 }, { "epoch": 3.1093273398619363, "grad_norm": 0.6173917055130005, "learning_rate": 6.193231386383391e-07, "loss": 0.2695, "step": 9684 }, { "epoch": 3.1096484186867874, "grad_norm": 0.8195832967758179, "learning_rate": 6.154503025703417e-07, "loss": 0.2724, "step": 9685 }, { "epoch": 3.109969497511639, "grad_norm": 0.433586061000824, "learning_rate": 6.115895762082602e-07, "loss": 0.2257, "step": 9686 }, { "epoch": 3.1102905763364905, "grad_norm": 0.4982379972934723, "learning_rate": 6.07740960022507e-07, "loss": 0.2394, "step": 9687 }, { "epoch": 3.110611655161342, "grad_norm": 0.7917426824569702, "learning_rate": 6.039044544820404e-07, "loss": 0.2325, "step": 9688 }, { "epoch": 3.1109327339861936, "grad_norm": 0.38256534934043884, "learning_rate": 6.000800600542977e-07, "loss": 0.2142, "step": 9689 }, { "epoch": 3.111253812811045, "grad_norm": 0.7258543372154236, "learning_rate": 5.96267777205295e-07, "loss": 0.246, "step": 9690 }, { "epoch": 3.1115748916358967, "grad_norm": 0.38089191913604736, "learning_rate": 5.924676063995382e-07, "loss": 0.2342, "step": 9691 }, { "epoch": 3.1118959704607483, "grad_norm": 0.4571894407272339, "learning_rate": 5.886795481000795e-07, "loss": 0.2346, "step": 9692 }, { "epoch": 3.1122170492856, "grad_norm": 0.3728485405445099, "learning_rate": 5.849036027684606e-07, "loss": 0.2318, "step": 9693 }, { "epoch": 3.112538128110451, "grad_norm": 0.40570032596588135, "learning_rate": 5.811397708647803e-07, "loss": 0.3118, "step": 9694 }, { "epoch": 3.1128592069353025, "grad_norm": 0.4947049617767334, "learning_rate": 5.773880528476494e-07, "loss": 0.5981, "step": 9695 }, { "epoch": 3.113180285760154, "grad_norm": 0.4747079014778137, "learning_rate": 5.736484491742134e-07, "loss": 0.5451, "step": 9696 }, { "epoch": 3.1135013645850056, "grad_norm": 0.23416991531848907, "learning_rate": 5.699209603001076e-07, "loss": 0.0842, "step": 9697 }, { "epoch": 3.113822443409857, "grad_norm": 0.40521523356437683, "learning_rate": 5.662055866795357e-07, "loss": 0.2264, "step": 9698 }, { "epoch": 3.1141435222347087, "grad_norm": 0.13257575035095215, "learning_rate": 5.62502328765202e-07, "loss": 0.055, "step": 9699 }, { "epoch": 3.1144646010595602, "grad_norm": 0.25109007954597473, "learning_rate": 5.588111870083346e-07, "loss": 0.1009, "step": 9700 }, { "epoch": 3.114785679884412, "grad_norm": 0.286777526140213, "learning_rate": 5.551321618586736e-07, "loss": 0.1089, "step": 9701 }, { "epoch": 3.1151067587092633, "grad_norm": 0.3400633633136749, "learning_rate": 5.514652537645271e-07, "loss": 0.1317, "step": 9702 }, { "epoch": 3.1154278375341145, "grad_norm": 0.6121630072593689, "learning_rate": 5.478104631726711e-07, "loss": 0.2507, "step": 9703 }, { "epoch": 3.115748916358966, "grad_norm": 0.6167968511581421, "learning_rate": 5.441677905284381e-07, "loss": 0.2289, "step": 9704 }, { "epoch": 3.1160699951838176, "grad_norm": 0.735968828201294, "learning_rate": 5.405372362756734e-07, "loss": 0.2501, "step": 9705 }, { "epoch": 3.116391074008669, "grad_norm": 0.7218773365020752, "learning_rate": 5.369188008567672e-07, "loss": 0.2835, "step": 9706 }, { "epoch": 3.1167121528335207, "grad_norm": 0.6471768617630005, "learning_rate": 5.333124847125892e-07, "loss": 0.1762, "step": 9707 }, { "epoch": 3.117033231658372, "grad_norm": 0.7177468538284302, "learning_rate": 5.297182882825879e-07, "loss": 0.2868, "step": 9708 }, { "epoch": 3.1173543104832238, "grad_norm": 0.540264904499054, "learning_rate": 5.261362120046686e-07, "loss": 0.2281, "step": 9709 }, { "epoch": 3.1176753893080753, "grad_norm": 0.5251516103744507, "learning_rate": 5.225662563153266e-07, "loss": 0.1886, "step": 9710 }, { "epoch": 3.117996468132927, "grad_norm": 0.5654820799827576, "learning_rate": 5.190084216495361e-07, "loss": 0.2386, "step": 9711 }, { "epoch": 3.118317546957778, "grad_norm": 0.5759437680244446, "learning_rate": 5.154627084408059e-07, "loss": 0.1868, "step": 9712 }, { "epoch": 3.1186386257826295, "grad_norm": 0.6524972319602966, "learning_rate": 5.119291171211793e-07, "loss": 0.2155, "step": 9713 }, { "epoch": 3.118959704607481, "grad_norm": 0.5860231518745422, "learning_rate": 5.084076481212119e-07, "loss": 0.2216, "step": 9714 }, { "epoch": 3.1192807834323326, "grad_norm": 0.7092858552932739, "learning_rate": 5.048983018699827e-07, "loss": 0.2829, "step": 9715 }, { "epoch": 3.119601862257184, "grad_norm": 0.585176944732666, "learning_rate": 5.01401078795094e-07, "loss": 0.196, "step": 9716 }, { "epoch": 3.1199229410820357, "grad_norm": 0.7186732888221741, "learning_rate": 4.979159793226718e-07, "loss": 0.2327, "step": 9717 }, { "epoch": 3.1202440199068873, "grad_norm": 0.9595154523849487, "learning_rate": 4.944430038773762e-07, "loss": 0.2656, "step": 9718 }, { "epoch": 3.120565098731739, "grad_norm": 0.4469728171825409, "learning_rate": 4.909821528823577e-07, "loss": 0.1912, "step": 9719 }, { "epoch": 3.1208861775565904, "grad_norm": 0.7011276483535767, "learning_rate": 4.875334267593234e-07, "loss": 0.2695, "step": 9720 }, { "epoch": 3.1212072563814415, "grad_norm": 0.49139729142189026, "learning_rate": 4.840968259284817e-07, "loss": 0.1871, "step": 9721 }, { "epoch": 3.121528335206293, "grad_norm": 0.7045285105705261, "learning_rate": 4.806723508085864e-07, "loss": 0.241, "step": 9722 }, { "epoch": 3.1218494140311446, "grad_norm": 0.6836577653884888, "learning_rate": 4.772600018168816e-07, "loss": 0.2571, "step": 9723 }, { "epoch": 3.122170492855996, "grad_norm": 0.9169760346412659, "learning_rate": 4.738597793691679e-07, "loss": 0.3147, "step": 9724 }, { "epoch": 3.1224915716808477, "grad_norm": 0.5703161954879761, "learning_rate": 4.704716838797363e-07, "loss": 0.2181, "step": 9725 }, { "epoch": 3.1228126505056992, "grad_norm": 0.8328591585159302, "learning_rate": 4.670957157614453e-07, "loss": 0.2938, "step": 9726 }, { "epoch": 3.123133729330551, "grad_norm": 0.545899510383606, "learning_rate": 4.6373187542561035e-07, "loss": 0.2104, "step": 9727 }, { "epoch": 3.1234548081554023, "grad_norm": 0.6301508545875549, "learning_rate": 4.6038016328211476e-07, "loss": 0.2477, "step": 9728 }, { "epoch": 3.123775886980254, "grad_norm": 0.7413309216499329, "learning_rate": 4.570405797393762e-07, "loss": 0.2591, "step": 9729 }, { "epoch": 3.124096965805105, "grad_norm": 0.5572231411933899, "learning_rate": 4.5371312520429144e-07, "loss": 0.2159, "step": 9730 }, { "epoch": 3.1244180446299565, "grad_norm": 0.4630489647388458, "learning_rate": 4.503978000823028e-07, "loss": 0.1875, "step": 9731 }, { "epoch": 3.124739123454808, "grad_norm": 0.5202605128288269, "learning_rate": 4.4709460477737607e-07, "loss": 0.2429, "step": 9732 }, { "epoch": 3.1250602022796596, "grad_norm": 0.5044448971748352, "learning_rate": 4.438035396920004e-07, "loss": 0.2206, "step": 9733 }, { "epoch": 3.125381281104511, "grad_norm": 0.48041826486587524, "learning_rate": 4.405246052271772e-07, "loss": 0.2322, "step": 9734 }, { "epoch": 3.1257023599293627, "grad_norm": 0.5572899580001831, "learning_rate": 4.372578017824314e-07, "loss": 0.232, "step": 9735 }, { "epoch": 3.1260234387542143, "grad_norm": 0.6565282344818115, "learning_rate": 4.3400312975581117e-07, "loss": 0.2185, "step": 9736 }, { "epoch": 3.126344517579066, "grad_norm": 0.484170526266098, "learning_rate": 4.307605895439104e-07, "loss": 0.2358, "step": 9737 }, { "epoch": 3.1266655964039174, "grad_norm": 0.6327326893806458, "learning_rate": 4.275301815417909e-07, "loss": 0.2278, "step": 9738 }, { "epoch": 3.1269866752287685, "grad_norm": 0.8204041719436646, "learning_rate": 4.2431190614309335e-07, "loss": 0.2654, "step": 9739 }, { "epoch": 3.12730775405362, "grad_norm": 0.3902547359466553, "learning_rate": 4.2110576373993736e-07, "loss": 0.2428, "step": 9740 }, { "epoch": 3.1276288328784716, "grad_norm": 0.44226837158203125, "learning_rate": 4.179117547229883e-07, "loss": 0.2411, "step": 9741 }, { "epoch": 3.127949911703323, "grad_norm": 0.4273797273635864, "learning_rate": 4.1472987948143473e-07, "loss": 0.2463, "step": 9742 }, { "epoch": 3.1282709905281747, "grad_norm": 0.29561299085617065, "learning_rate": 4.115601384029666e-07, "loss": 0.2297, "step": 9743 }, { "epoch": 3.1285920693530263, "grad_norm": 0.5465667247772217, "learning_rate": 4.084025318738083e-07, "loss": 0.6041, "step": 9744 }, { "epoch": 3.128913148177878, "grad_norm": 0.5179415941238403, "learning_rate": 4.0525706027870756e-07, "loss": 0.7336, "step": 9745 }, { "epoch": 3.1292342270027294, "grad_norm": 0.45659175515174866, "learning_rate": 4.021237240009468e-07, "loss": 0.3653, "step": 9746 }, { "epoch": 3.129555305827581, "grad_norm": 0.45041006803512573, "learning_rate": 3.9900252342228717e-07, "loss": 0.3496, "step": 9747 }, { "epoch": 3.129876384652432, "grad_norm": 0.29173073172569275, "learning_rate": 3.958934589230467e-07, "loss": 0.1509, "step": 9748 }, { "epoch": 3.1301974634772836, "grad_norm": 0.362936407327652, "learning_rate": 3.9279653088205584e-07, "loss": 0.1642, "step": 9749 }, { "epoch": 3.130518542302135, "grad_norm": 0.23348329961299896, "learning_rate": 3.8971173967666807e-07, "loss": 0.0935, "step": 9750 }, { "epoch": 3.1308396211269867, "grad_norm": 0.5271691083908081, "learning_rate": 3.866390856827495e-07, "loss": 0.2844, "step": 9751 }, { "epoch": 3.131160699951838, "grad_norm": 0.5729438662528992, "learning_rate": 3.835785692747118e-07, "loss": 0.233, "step": 9752 }, { "epoch": 3.1314817787766898, "grad_norm": 0.7787706851959229, "learning_rate": 3.805301908254455e-07, "loss": 0.3528, "step": 9753 }, { "epoch": 3.1318028576015413, "grad_norm": 0.6220112442970276, "learning_rate": 3.774939507063979e-07, "loss": 0.2668, "step": 9754 }, { "epoch": 3.132123936426393, "grad_norm": 0.5777502059936523, "learning_rate": 3.744698492875398e-07, "loss": 0.2084, "step": 9755 }, { "epoch": 3.1324450152512444, "grad_norm": 0.6816080808639526, "learning_rate": 3.7145788693732086e-07, "loss": 0.2497, "step": 9756 }, { "epoch": 3.1327660940760955, "grad_norm": 0.6858428120613098, "learning_rate": 3.6845806402275863e-07, "loss": 0.2512, "step": 9757 }, { "epoch": 3.133087172900947, "grad_norm": 0.8424202799797058, "learning_rate": 3.654703809093607e-07, "loss": 0.2316, "step": 9758 }, { "epoch": 3.1334082517257986, "grad_norm": 0.6304720640182495, "learning_rate": 3.6249483796116924e-07, "loss": 0.232, "step": 9759 }, { "epoch": 3.13372933055065, "grad_norm": 0.9778458476066589, "learning_rate": 3.595314355407609e-07, "loss": 0.3145, "step": 9760 }, { "epoch": 3.1340504093755017, "grad_norm": 0.593328058719635, "learning_rate": 3.565801740092023e-07, "loss": 0.2213, "step": 9761 }, { "epoch": 3.1343714882003533, "grad_norm": 0.8587982058525085, "learning_rate": 3.536410537260948e-07, "loss": 0.2974, "step": 9762 }, { "epoch": 3.134692567025205, "grad_norm": 0.6776645183563232, "learning_rate": 3.50714075049563e-07, "loss": 0.2801, "step": 9763 }, { "epoch": 3.1350136458500564, "grad_norm": 0.8762674331665039, "learning_rate": 3.4779923833626606e-07, "loss": 0.3066, "step": 9764 }, { "epoch": 3.135334724674908, "grad_norm": 0.6403717994689941, "learning_rate": 3.4489654394134205e-07, "loss": 0.2094, "step": 9765 }, { "epoch": 3.135655803499759, "grad_norm": 0.4854280352592468, "learning_rate": 3.4200599221848595e-07, "loss": 0.1921, "step": 9766 }, { "epoch": 3.1359768823246106, "grad_norm": 0.9062448143959045, "learning_rate": 3.3912758351991593e-07, "loss": 0.3486, "step": 9767 }, { "epoch": 3.136297961149462, "grad_norm": 0.7571354508399963, "learning_rate": 3.362613181963403e-07, "loss": 0.2229, "step": 9768 }, { "epoch": 3.1366190399743137, "grad_norm": 0.585010826587677, "learning_rate": 3.3340719659701313e-07, "loss": 0.2403, "step": 9769 }, { "epoch": 3.1369401187991652, "grad_norm": 0.6055962443351746, "learning_rate": 3.305652190696895e-07, "loss": 0.2285, "step": 9770 }, { "epoch": 3.137261197624017, "grad_norm": 0.49409618973731995, "learning_rate": 3.277353859606813e-07, "loss": 0.1914, "step": 9771 }, { "epoch": 3.1375822764488683, "grad_norm": 0.39980348944664, "learning_rate": 3.249176976147683e-07, "loss": 0.1664, "step": 9772 }, { "epoch": 3.13790335527372, "grad_norm": 0.574813187122345, "learning_rate": 3.2211215437528694e-07, "loss": 0.2273, "step": 9773 }, { "epoch": 3.138224434098571, "grad_norm": 0.5716437101364136, "learning_rate": 3.1931875658408604e-07, "loss": 0.2453, "step": 9774 }, { "epoch": 3.1385455129234225, "grad_norm": 0.40409037470817566, "learning_rate": 3.1653750458152666e-07, "loss": 0.1933, "step": 9775 }, { "epoch": 3.138866591748274, "grad_norm": 0.6817755103111267, "learning_rate": 3.137683987065043e-07, "loss": 0.2179, "step": 9776 }, { "epoch": 3.1391876705731256, "grad_norm": 0.6137420535087585, "learning_rate": 3.1101143929641585e-07, "loss": 0.2484, "step": 9777 }, { "epoch": 3.139508749397977, "grad_norm": 0.7133082747459412, "learning_rate": 3.0826662668720364e-07, "loss": 0.2384, "step": 9778 }, { "epoch": 3.1398298282228287, "grad_norm": 0.6078048348426819, "learning_rate": 3.0553396121330013e-07, "loss": 0.2337, "step": 9779 }, { "epoch": 3.1401509070476803, "grad_norm": 0.325295627117157, "learning_rate": 3.0281344320768347e-07, "loss": 0.1829, "step": 9780 }, { "epoch": 3.140471985872532, "grad_norm": 0.5655336976051331, "learning_rate": 3.001050730018218e-07, "loss": 0.2414, "step": 9781 }, { "epoch": 3.1407930646973834, "grad_norm": 0.6369791626930237, "learning_rate": 2.974088509257511e-07, "loss": 0.2344, "step": 9782 }, { "epoch": 3.1411141435222345, "grad_norm": 0.42372626066207886, "learning_rate": 2.947247773079753e-07, "loss": 0.2057, "step": 9783 }, { "epoch": 3.141435222347086, "grad_norm": 0.550973653793335, "learning_rate": 2.9205285247555505e-07, "loss": 0.2484, "step": 9784 }, { "epoch": 3.1417563011719376, "grad_norm": 0.5076101422309875, "learning_rate": 2.893930767540298e-07, "loss": 0.2296, "step": 9785 }, { "epoch": 3.142077379996789, "grad_norm": 0.4814938008785248, "learning_rate": 2.867454504675182e-07, "loss": 0.2261, "step": 9786 }, { "epoch": 3.1423984588216407, "grad_norm": 0.3789692521095276, "learning_rate": 2.841099739386066e-07, "loss": 0.2236, "step": 9787 }, { "epoch": 3.1427195376464923, "grad_norm": 0.3235059976577759, "learning_rate": 2.81486647488427e-07, "loss": 0.2032, "step": 9788 }, { "epoch": 3.143040616471344, "grad_norm": 0.4682113826274872, "learning_rate": 2.7887547143662373e-07, "loss": 0.2508, "step": 9789 }, { "epoch": 3.1433616952961954, "grad_norm": 0.48588764667510986, "learning_rate": 2.762764461013423e-07, "loss": 0.2209, "step": 9790 }, { "epoch": 3.143682774121047, "grad_norm": 0.4631730914115906, "learning_rate": 2.73689571799296e-07, "loss": 0.2206, "step": 9791 }, { "epoch": 3.144003852945898, "grad_norm": 0.3348900377750397, "learning_rate": 2.7111484884567717e-07, "loss": 0.2183, "step": 9792 }, { "epoch": 3.1443249317707496, "grad_norm": 0.2803027927875519, "learning_rate": 2.685522775541904e-07, "loss": 0.2277, "step": 9793 }, { "epoch": 3.144646010595601, "grad_norm": 0.18889014422893524, "learning_rate": 2.660018582370971e-07, "loss": 0.1372, "step": 9794 }, { "epoch": 3.1449670894204527, "grad_norm": 0.5592251420021057, "learning_rate": 2.6346359120514863e-07, "loss": 0.627, "step": 9795 }, { "epoch": 3.145288168245304, "grad_norm": 0.42517322301864624, "learning_rate": 2.609374767676309e-07, "loss": 0.4038, "step": 9796 }, { "epoch": 3.1456092470701558, "grad_norm": 0.48470279574394226, "learning_rate": 2.584235152323422e-07, "loss": 0.3099, "step": 9797 }, { "epoch": 3.1459303258950073, "grad_norm": 0.23615846037864685, "learning_rate": 2.5592170690560414e-07, "loss": 0.0814, "step": 9798 }, { "epoch": 3.146251404719859, "grad_norm": 0.24394987523555756, "learning_rate": 2.534320520922506e-07, "loss": 0.0938, "step": 9799 }, { "epoch": 3.1465724835447104, "grad_norm": 0.1944095492362976, "learning_rate": 2.5095455109562795e-07, "loss": 0.0707, "step": 9800 }, { "epoch": 3.1468935623695615, "grad_norm": 0.470803827047348, "learning_rate": 2.484892042176279e-07, "loss": 0.2102, "step": 9801 }, { "epoch": 3.147214641194413, "grad_norm": 0.5542094111442566, "learning_rate": 2.4603601175864356e-07, "loss": 0.3056, "step": 9802 }, { "epoch": 3.1475357200192646, "grad_norm": 0.6499555110931396, "learning_rate": 2.4359497401758024e-07, "loss": 0.287, "step": 9803 }, { "epoch": 3.147856798844116, "grad_norm": 0.690673291683197, "learning_rate": 2.4116609129187786e-07, "loss": 0.2237, "step": 9804 }, { "epoch": 3.1481778776689677, "grad_norm": 0.6088657975196838, "learning_rate": 2.387493638774774e-07, "loss": 0.2442, "step": 9805 }, { "epoch": 3.1484989564938193, "grad_norm": 0.6405919194221497, "learning_rate": 2.363447920688655e-07, "loss": 0.248, "step": 9806 }, { "epoch": 3.148820035318671, "grad_norm": 0.5600770711898804, "learning_rate": 2.339523761590301e-07, "loss": 0.2073, "step": 9807 }, { "epoch": 3.1491411141435224, "grad_norm": 0.6620354652404785, "learning_rate": 2.315721164394713e-07, "loss": 0.2274, "step": 9808 }, { "epoch": 3.149462192968374, "grad_norm": 0.8232807517051697, "learning_rate": 2.2920401320022378e-07, "loss": 0.2655, "step": 9809 }, { "epoch": 3.149783271793225, "grad_norm": 0.48089903593063354, "learning_rate": 2.2684806672982338e-07, "loss": 0.197, "step": 9810 }, { "epoch": 3.1501043506180766, "grad_norm": 0.6705470085144043, "learning_rate": 2.2450427731534053e-07, "loss": 0.2304, "step": 9811 }, { "epoch": 3.150425429442928, "grad_norm": 0.7051324248313904, "learning_rate": 2.2217264524236892e-07, "loss": 0.2445, "step": 9812 }, { "epoch": 3.1507465082677797, "grad_norm": 0.6257637143135071, "learning_rate": 2.1985317079500356e-07, "loss": 0.2012, "step": 9813 }, { "epoch": 3.1510675870926312, "grad_norm": 0.9062849879264832, "learning_rate": 2.175458542558517e-07, "loss": 0.286, "step": 9814 }, { "epoch": 3.151388665917483, "grad_norm": 0.5609592795372009, "learning_rate": 2.1525069590607737e-07, "loss": 0.185, "step": 9815 }, { "epoch": 3.1517097447423343, "grad_norm": 0.5486966371536255, "learning_rate": 2.1296769602532352e-07, "loss": 0.2412, "step": 9816 }, { "epoch": 3.152030823567186, "grad_norm": 0.7732871770858765, "learning_rate": 2.106968548917676e-07, "loss": 0.2621, "step": 9817 }, { "epoch": 3.1523519023920374, "grad_norm": 0.5511267781257629, "learning_rate": 2.0843817278209942e-07, "loss": 0.2065, "step": 9818 }, { "epoch": 3.1526729812168885, "grad_norm": 0.5723970532417297, "learning_rate": 2.0619164997155438e-07, "loss": 0.2028, "step": 9819 }, { "epoch": 3.15299406004174, "grad_norm": 0.7419432997703552, "learning_rate": 2.0395728673383574e-07, "loss": 0.2796, "step": 9820 }, { "epoch": 3.1533151388665916, "grad_norm": 0.7188732624053955, "learning_rate": 2.017350833412146e-07, "loss": 0.1864, "step": 9821 }, { "epoch": 3.153636217691443, "grad_norm": 0.7375403642654419, "learning_rate": 1.9952504006446325e-07, "loss": 0.2474, "step": 9822 }, { "epoch": 3.1539572965162948, "grad_norm": 0.5861619114875793, "learning_rate": 1.973271571728441e-07, "loss": 0.2157, "step": 9823 }, { "epoch": 3.1542783753411463, "grad_norm": 0.42802998423576355, "learning_rate": 1.9514143493417625e-07, "loss": 0.2182, "step": 9824 }, { "epoch": 3.154599454165998, "grad_norm": 0.6591411828994751, "learning_rate": 1.9296787361480216e-07, "loss": 0.246, "step": 9825 }, { "epoch": 3.1549205329908494, "grad_norm": 0.8120198845863342, "learning_rate": 1.908064734795323e-07, "loss": 0.3286, "step": 9826 }, { "epoch": 3.155241611815701, "grad_norm": 0.4934924840927124, "learning_rate": 1.8865723479173368e-07, "loss": 0.1993, "step": 9827 }, { "epoch": 3.155562690640552, "grad_norm": 0.558791995048523, "learning_rate": 1.86520157813308e-07, "loss": 0.2377, "step": 9828 }, { "epoch": 3.1558837694654036, "grad_norm": 0.8275998830795288, "learning_rate": 1.8439524280462472e-07, "loss": 0.2888, "step": 9829 }, { "epoch": 3.156204848290255, "grad_norm": 0.5782402157783508, "learning_rate": 1.8228249002461006e-07, "loss": 0.2376, "step": 9830 }, { "epoch": 3.1565259271151067, "grad_norm": 0.5709177255630493, "learning_rate": 1.8018189973069143e-07, "loss": 0.2141, "step": 9831 }, { "epoch": 3.1568470059399583, "grad_norm": 0.6545057892799377, "learning_rate": 1.7809347217881966e-07, "loss": 0.2365, "step": 9832 }, { "epoch": 3.15716808476481, "grad_norm": 0.7144107222557068, "learning_rate": 1.7601720762346897e-07, "loss": 0.2655, "step": 9833 }, { "epoch": 3.1574891635896614, "grad_norm": 0.4486263394355774, "learning_rate": 1.7395310631762585e-07, "loss": 0.1869, "step": 9834 }, { "epoch": 3.157810242414513, "grad_norm": 0.5604619979858398, "learning_rate": 1.7190116851280026e-07, "loss": 0.2374, "step": 9835 }, { "epoch": 3.1581313212393645, "grad_norm": 0.5720604658126831, "learning_rate": 1.698613944589922e-07, "loss": 0.2534, "step": 9836 }, { "epoch": 3.1584524000642156, "grad_norm": 0.5002235174179077, "learning_rate": 1.678337844047695e-07, "loss": 0.2297, "step": 9837 }, { "epoch": 3.158773478889067, "grad_norm": 0.519655168056488, "learning_rate": 1.6581833859716788e-07, "loss": 0.249, "step": 9838 }, { "epoch": 3.1590945577139187, "grad_norm": 0.4107975959777832, "learning_rate": 1.6381505728176872e-07, "loss": 0.2233, "step": 9839 }, { "epoch": 3.1594156365387702, "grad_norm": 0.30553480982780457, "learning_rate": 1.618239407026767e-07, "loss": 0.2119, "step": 9840 }, { "epoch": 3.1597367153636218, "grad_norm": 0.3645618259906769, "learning_rate": 1.598449891024978e-07, "loss": 0.2261, "step": 9841 }, { "epoch": 3.1600577941884733, "grad_norm": 0.3786523938179016, "learning_rate": 1.578782027223502e-07, "loss": 0.2326, "step": 9842 }, { "epoch": 3.160378873013325, "grad_norm": 0.4503689110279083, "learning_rate": 1.5592358180189782e-07, "loss": 0.2413, "step": 9843 }, { "epoch": 3.1606999518381764, "grad_norm": 0.5784024000167847, "learning_rate": 1.5398112657929453e-07, "loss": 0.5337, "step": 9844 }, { "epoch": 3.161021030663028, "grad_norm": 0.42213118076324463, "learning_rate": 1.520508372912288e-07, "loss": 0.4703, "step": 9845 }, { "epoch": 3.161342109487879, "grad_norm": 0.3703862428665161, "learning_rate": 1.5013271417290143e-07, "loss": 0.2417, "step": 9846 }, { "epoch": 3.1616631883127306, "grad_norm": 0.321639746427536, "learning_rate": 1.4822675745801429e-07, "loss": 0.1601, "step": 9847 }, { "epoch": 3.161984267137582, "grad_norm": 0.1442064493894577, "learning_rate": 1.4633296737882607e-07, "loss": 0.0541, "step": 9848 }, { "epoch": 3.1623053459624337, "grad_norm": 0.10288975387811661, "learning_rate": 1.4445134416607442e-07, "loss": 0.0532, "step": 9849 }, { "epoch": 3.1626264247872853, "grad_norm": 0.21865668892860413, "learning_rate": 1.425818880490315e-07, "loss": 0.0609, "step": 9850 }, { "epoch": 3.162947503612137, "grad_norm": 0.6108369827270508, "learning_rate": 1.4072459925548177e-07, "loss": 0.2515, "step": 9851 }, { "epoch": 3.1632685824369884, "grad_norm": 0.9449090361595154, "learning_rate": 1.3887947801173307e-07, "loss": 0.2917, "step": 9852 }, { "epoch": 3.16358966126184, "grad_norm": 0.6836480498313904, "learning_rate": 1.3704652454261668e-07, "loss": 0.231, "step": 9853 }, { "epoch": 3.1639107400866915, "grad_norm": 0.5700660943984985, "learning_rate": 1.3522573907145398e-07, "loss": 0.2352, "step": 9854 }, { "epoch": 3.1642318189115426, "grad_norm": 0.645478367805481, "learning_rate": 1.33417121820123e-07, "loss": 0.2189, "step": 9855 }, { "epoch": 3.164552897736394, "grad_norm": 0.7627379894256592, "learning_rate": 1.3162067300898084e-07, "loss": 0.236, "step": 9856 }, { "epoch": 3.1648739765612457, "grad_norm": 0.6092104911804199, "learning_rate": 1.2983639285693017e-07, "loss": 0.2363, "step": 9857 }, { "epoch": 3.1651950553860972, "grad_norm": 0.7562824487686157, "learning_rate": 1.2806428158138596e-07, "loss": 0.2824, "step": 9858 }, { "epoch": 3.165516134210949, "grad_norm": 0.7711445093154907, "learning_rate": 1.2630433939825327e-07, "loss": 0.2944, "step": 9859 }, { "epoch": 3.1658372130358003, "grad_norm": 0.7503237724304199, "learning_rate": 1.2455656652198277e-07, "loss": 0.2468, "step": 9860 }, { "epoch": 3.166158291860652, "grad_norm": 0.6290993690490723, "learning_rate": 1.2282096316554858e-07, "loss": 0.2132, "step": 9861 }, { "epoch": 3.1664793706855034, "grad_norm": 0.7252655625343323, "learning_rate": 1.2109752954042597e-07, "loss": 0.2505, "step": 9862 }, { "epoch": 3.166800449510355, "grad_norm": 0.5677751898765564, "learning_rate": 1.193862658566025e-07, "loss": 0.2001, "step": 9863 }, { "epoch": 3.167121528335206, "grad_norm": 0.5677169561386108, "learning_rate": 1.1768717232257809e-07, "loss": 0.205, "step": 9864 }, { "epoch": 3.1674426071600577, "grad_norm": 0.8806698322296143, "learning_rate": 1.160002491454093e-07, "loss": 0.3299, "step": 9865 }, { "epoch": 3.167763685984909, "grad_norm": 0.4428505003452301, "learning_rate": 1.1432549653063174e-07, "loss": 0.1925, "step": 9866 }, { "epoch": 3.1680847648097608, "grad_norm": 0.6543468236923218, "learning_rate": 1.1266291468229328e-07, "loss": 0.2174, "step": 9867 }, { "epoch": 3.1684058436346123, "grad_norm": 0.6415979862213135, "learning_rate": 1.1101250380300965e-07, "loss": 0.243, "step": 9868 }, { "epoch": 3.168726922459464, "grad_norm": 0.7214950323104858, "learning_rate": 1.0937426409384221e-07, "loss": 0.2707, "step": 9869 }, { "epoch": 3.1690480012843154, "grad_norm": 0.7399327754974365, "learning_rate": 1.0774819575442019e-07, "loss": 0.2824, "step": 9870 }, { "epoch": 3.169369080109167, "grad_norm": 0.6261020302772522, "learning_rate": 1.0613429898287398e-07, "loss": 0.2368, "step": 9871 }, { "epoch": 3.1696901589340185, "grad_norm": 0.507150411605835, "learning_rate": 1.0453257397585737e-07, "loss": 0.2091, "step": 9872 }, { "epoch": 3.1700112377588696, "grad_norm": 0.5187735557556152, "learning_rate": 1.0294302092853647e-07, "loss": 0.2175, "step": 9873 }, { "epoch": 3.170332316583721, "grad_norm": 0.9378407001495361, "learning_rate": 1.013656400345786e-07, "loss": 0.2481, "step": 9874 }, { "epoch": 3.1706533954085727, "grad_norm": 0.5098487734794617, "learning_rate": 9.980043148619667e-08, "loss": 0.2186, "step": 9875 }, { "epoch": 3.1709744742334243, "grad_norm": 0.4638996422290802, "learning_rate": 9.824739547410477e-08, "loss": 0.2072, "step": 9876 }, { "epoch": 3.171295553058276, "grad_norm": 0.7582216858863831, "learning_rate": 9.670653218752934e-08, "loss": 0.3402, "step": 9877 }, { "epoch": 3.1716166318831274, "grad_norm": 0.42839518189430237, "learning_rate": 9.517784181422019e-08, "loss": 0.2069, "step": 9878 }, { "epoch": 3.171937710707979, "grad_norm": 0.4199906885623932, "learning_rate": 9.366132454046162e-08, "loss": 0.2111, "step": 9879 }, { "epoch": 3.1722587895328305, "grad_norm": 0.6500821113586426, "learning_rate": 9.215698055100585e-08, "loss": 0.2415, "step": 9880 }, { "epoch": 3.1725798683576816, "grad_norm": 1.0988209247589111, "learning_rate": 9.066481002918403e-08, "loss": 0.2793, "step": 9881 }, { "epoch": 3.172900947182533, "grad_norm": 0.4966912567615509, "learning_rate": 8.918481315678406e-08, "loss": 0.2166, "step": 9882 }, { "epoch": 3.1732220260073847, "grad_norm": 0.4506468176841736, "learning_rate": 8.771699011416168e-08, "loss": 0.2084, "step": 9883 }, { "epoch": 3.1735431048322362, "grad_norm": 0.5450963973999023, "learning_rate": 8.626134108016271e-08, "loss": 0.2226, "step": 9884 }, { "epoch": 3.173864183657088, "grad_norm": 0.6653562784194946, "learning_rate": 8.481786623214528e-08, "loss": 0.2526, "step": 9885 }, { "epoch": 3.1741852624819393, "grad_norm": 0.3943636417388916, "learning_rate": 8.33865657459909e-08, "loss": 0.2209, "step": 9886 }, { "epoch": 3.174506341306791, "grad_norm": 0.6992161870002747, "learning_rate": 8.196743979610455e-08, "loss": 0.2377, "step": 9887 }, { "epoch": 3.1748274201316424, "grad_norm": 0.40512463450431824, "learning_rate": 8.056048855540343e-08, "loss": 0.245, "step": 9888 }, { "epoch": 3.175148498956494, "grad_norm": 0.5136418342590332, "learning_rate": 7.91657121953171e-08, "loss": 0.2392, "step": 9889 }, { "epoch": 3.175469577781345, "grad_norm": 0.6193544864654541, "learning_rate": 7.778311088579849e-08, "loss": 0.255, "step": 9890 }, { "epoch": 3.1757906566061966, "grad_norm": 0.4072697162628174, "learning_rate": 7.641268479531283e-08, "loss": 0.2205, "step": 9891 }, { "epoch": 3.176111735431048, "grad_norm": 0.3897491693496704, "learning_rate": 7.505443409083768e-08, "loss": 0.2429, "step": 9892 }, { "epoch": 3.1764328142558997, "grad_norm": 0.46470972895622253, "learning_rate": 7.370835893788508e-08, "loss": 0.2422, "step": 9893 }, { "epoch": 3.1767538930807513, "grad_norm": 0.39090585708618164, "learning_rate": 7.237445950044608e-08, "loss": 0.3282, "step": 9894 }, { "epoch": 3.177074971905603, "grad_norm": 0.5572707056999207, "learning_rate": 7.105273594107953e-08, "loss": 0.6177, "step": 9895 }, { "epoch": 3.1773960507304544, "grad_norm": 0.3608555793762207, "learning_rate": 6.974318842081217e-08, "loss": 0.2933, "step": 9896 }, { "epoch": 3.177717129555306, "grad_norm": 0.28094372153282166, "learning_rate": 6.844581709921638e-08, "loss": 0.16, "step": 9897 }, { "epoch": 3.1780382083801575, "grad_norm": 0.4451110363006592, "learning_rate": 6.71606221343768e-08, "loss": 0.261, "step": 9898 }, { "epoch": 3.1783592872050086, "grad_norm": 0.2656843364238739, "learning_rate": 6.588760368287928e-08, "loss": 0.0902, "step": 9899 }, { "epoch": 3.17868036602986, "grad_norm": 0.17016752064228058, "learning_rate": 6.462676189985528e-08, "loss": 0.061, "step": 9900 }, { "epoch": 3.1790014448547117, "grad_norm": 0.3899795413017273, "learning_rate": 6.337809693891527e-08, "loss": 0.2108, "step": 9901 }, { "epoch": 3.1793225236795633, "grad_norm": 0.6329613924026489, "learning_rate": 6.214160895222643e-08, "loss": 0.2528, "step": 9902 }, { "epoch": 3.179643602504415, "grad_norm": 0.6943358778953552, "learning_rate": 6.09172980904238e-08, "loss": 0.2953, "step": 9903 }, { "epoch": 3.1799646813292664, "grad_norm": 0.8991124629974365, "learning_rate": 5.970516450271025e-08, "loss": 0.2841, "step": 9904 }, { "epoch": 3.180285760154118, "grad_norm": 0.6497676968574524, "learning_rate": 5.850520833676765e-08, "loss": 0.2275, "step": 9905 }, { "epoch": 3.1806068389789695, "grad_norm": 0.6367692947387695, "learning_rate": 5.7317429738812376e-08, "loss": 0.2095, "step": 9906 }, { "epoch": 3.180927917803821, "grad_norm": 0.6393100619316101, "learning_rate": 5.6141828853573106e-08, "loss": 0.2308, "step": 9907 }, { "epoch": 3.181248996628672, "grad_norm": 0.8095628023147583, "learning_rate": 5.497840582429081e-08, "loss": 0.3126, "step": 9908 }, { "epoch": 3.1815700754535237, "grad_norm": 0.6111844182014465, "learning_rate": 5.382716079271877e-08, "loss": 0.2296, "step": 9909 }, { "epoch": 3.181891154278375, "grad_norm": 0.4887303113937378, "learning_rate": 5.268809389913365e-08, "loss": 0.1672, "step": 9910 }, { "epoch": 3.1822122331032268, "grad_norm": 0.6326609253883362, "learning_rate": 5.1561205282335547e-08, "loss": 0.2229, "step": 9911 }, { "epoch": 3.1825333119280783, "grad_norm": 0.6799615025520325, "learning_rate": 5.0446495079636836e-08, "loss": 0.2754, "step": 9912 }, { "epoch": 3.18285439075293, "grad_norm": 0.4907290041446686, "learning_rate": 4.934396342684e-08, "loss": 0.1849, "step": 9913 }, { "epoch": 3.1831754695777814, "grad_norm": 0.7006059288978577, "learning_rate": 4.825361045831534e-08, "loss": 0.2467, "step": 9914 }, { "epoch": 3.183496548402633, "grad_norm": 0.6643416285514832, "learning_rate": 4.717543630688992e-08, "loss": 0.2323, "step": 9915 }, { "epoch": 3.1838176272274845, "grad_norm": 0.7749451994895935, "learning_rate": 4.610944110394755e-08, "loss": 0.2579, "step": 9916 }, { "epoch": 3.1841387060523356, "grad_norm": 0.8002907633781433, "learning_rate": 4.5055624979384316e-08, "loss": 0.2735, "step": 9917 }, { "epoch": 3.184459784877187, "grad_norm": 0.7704986929893494, "learning_rate": 4.401398806159751e-08, "loss": 0.2775, "step": 9918 }, { "epoch": 3.1847808637020387, "grad_norm": 0.7502204775810242, "learning_rate": 4.298453047749673e-08, "loss": 0.255, "step": 9919 }, { "epoch": 3.1851019425268903, "grad_norm": 0.4624794125556946, "learning_rate": 4.196725235253718e-08, "loss": 0.2155, "step": 9920 }, { "epoch": 3.185423021351742, "grad_norm": 0.8697096705436707, "learning_rate": 4.096215381066415e-08, "loss": 0.3674, "step": 9921 }, { "epoch": 3.1857441001765934, "grad_norm": 0.8463855385780334, "learning_rate": 3.996923497434635e-08, "loss": 0.3001, "step": 9922 }, { "epoch": 3.186065179001445, "grad_norm": 0.7046919465065002, "learning_rate": 3.898849596456478e-08, "loss": 0.2382, "step": 9923 }, { "epoch": 3.1863862578262965, "grad_norm": 0.7259396314620972, "learning_rate": 3.8019936900812735e-08, "loss": 0.2166, "step": 9924 }, { "epoch": 3.186707336651148, "grad_norm": 0.8264093399047852, "learning_rate": 3.7063557901129144e-08, "loss": 0.2728, "step": 9925 }, { "epoch": 3.187028415475999, "grad_norm": 0.5185015201568604, "learning_rate": 3.61193590820208e-08, "loss": 0.2227, "step": 9926 }, { "epoch": 3.1873494943008507, "grad_norm": 0.9947241544723511, "learning_rate": 3.518734055855122e-08, "loss": 0.3701, "step": 9927 }, { "epoch": 3.1876705731257022, "grad_norm": 0.5121382474899292, "learning_rate": 3.4267502444274015e-08, "loss": 0.1992, "step": 9928 }, { "epoch": 3.187991651950554, "grad_norm": 0.5972121953964233, "learning_rate": 3.33598448512773e-08, "loss": 0.2321, "step": 9929 }, { "epoch": 3.1883127307754053, "grad_norm": 0.543809175491333, "learning_rate": 3.246436789015039e-08, "loss": 0.2201, "step": 9930 }, { "epoch": 3.188633809600257, "grad_norm": 0.8629385232925415, "learning_rate": 3.1581071670006015e-08, "loss": 0.2955, "step": 9931 }, { "epoch": 3.1889548884251084, "grad_norm": 0.6378942728042603, "learning_rate": 3.070995629846918e-08, "loss": 0.2141, "step": 9932 }, { "epoch": 3.18927596724996, "grad_norm": 0.5101456642150879, "learning_rate": 2.985102188168831e-08, "loss": 0.2327, "step": 9933 }, { "epoch": 3.1895970460748115, "grad_norm": 0.6790507435798645, "learning_rate": 2.900426852431304e-08, "loss": 0.2728, "step": 9934 }, { "epoch": 3.1899181248996626, "grad_norm": 0.4711005985736847, "learning_rate": 2.8169696329527483e-08, "loss": 0.2497, "step": 9935 }, { "epoch": 3.190239203724514, "grad_norm": 1.078393816947937, "learning_rate": 2.7347305399016977e-08, "loss": 0.2378, "step": 9936 }, { "epoch": 3.1905602825493657, "grad_norm": 0.6660891175270081, "learning_rate": 2.6537095832990245e-08, "loss": 0.2562, "step": 9937 }, { "epoch": 3.1908813613742173, "grad_norm": 0.5800371170043945, "learning_rate": 2.573906773016832e-08, "loss": 0.2481, "step": 9938 }, { "epoch": 3.191202440199069, "grad_norm": 0.47498077154159546, "learning_rate": 2.4953221187784537e-08, "loss": 0.2451, "step": 9939 }, { "epoch": 3.1915235190239204, "grad_norm": 0.5962786078453064, "learning_rate": 2.417955630159563e-08, "loss": 0.2374, "step": 9940 }, { "epoch": 3.191844597848772, "grad_norm": 0.3102143108844757, "learning_rate": 2.3418073165870636e-08, "loss": 0.246, "step": 9941 }, { "epoch": 3.1921656766736235, "grad_norm": 0.6328103542327881, "learning_rate": 2.266877187339089e-08, "loss": 0.2762, "step": 9942 }, { "epoch": 3.192486755498475, "grad_norm": 0.34649330377578735, "learning_rate": 2.193165251545004e-08, "loss": 0.237, "step": 9943 }, { "epoch": 3.192807834323326, "grad_norm": 0.34423136711120605, "learning_rate": 2.1206715181876226e-08, "loss": 0.3348, "step": 9944 }, { "epoch": 3.1931289131481777, "grad_norm": 0.44818225502967834, "learning_rate": 2.0493959960998787e-08, "loss": 0.4411, "step": 9945 }, { "epoch": 3.1934499919730293, "grad_norm": 0.2298501431941986, "learning_rate": 1.9793386939659376e-08, "loss": 0.1117, "step": 9946 }, { "epoch": 3.193771070797881, "grad_norm": 0.3223446011543274, "learning_rate": 1.9104996203223037e-08, "loss": 0.167, "step": 9947 }, { "epoch": 3.1940921496227324, "grad_norm": 0.4785112738609314, "learning_rate": 1.842878783557822e-08, "loss": 0.2142, "step": 9948 }, { "epoch": 3.194413228447584, "grad_norm": 0.2509891092777252, "learning_rate": 1.7764761919103477e-08, "loss": 0.1088, "step": 9949 }, { "epoch": 3.1947343072724355, "grad_norm": 0.5531440377235413, "learning_rate": 1.7112918534711865e-08, "loss": 0.2591, "step": 9950 }, { "epoch": 3.195055386097287, "grad_norm": 0.7335166931152344, "learning_rate": 1.6473257761828732e-08, "loss": 0.3958, "step": 9951 }, { "epoch": 3.1953764649221386, "grad_norm": 0.8385985493659973, "learning_rate": 1.584577967840284e-08, "loss": 0.305, "step": 9952 }, { "epoch": 3.1956975437469897, "grad_norm": 0.6894627809524536, "learning_rate": 1.5230484360873044e-08, "loss": 0.2275, "step": 9953 }, { "epoch": 3.196018622571841, "grad_norm": 0.570214569568634, "learning_rate": 1.4627371884234909e-08, "loss": 0.2196, "step": 9954 }, { "epoch": 3.1963397013966928, "grad_norm": 0.8872095346450806, "learning_rate": 1.4036442321962995e-08, "loss": 0.3162, "step": 9955 }, { "epoch": 3.1966607802215443, "grad_norm": 0.6017862558364868, "learning_rate": 1.3457695746055265e-08, "loss": 0.2273, "step": 9956 }, { "epoch": 3.196981859046396, "grad_norm": 0.6372138857841492, "learning_rate": 1.2891132227033087e-08, "loss": 0.2273, "step": 9957 }, { "epoch": 3.1973029378712474, "grad_norm": 0.5612054467201233, "learning_rate": 1.2336751833941229e-08, "loss": 0.2014, "step": 9958 }, { "epoch": 3.197624016696099, "grad_norm": 0.604500949382782, "learning_rate": 1.1794554634314558e-08, "loss": 0.2463, "step": 9959 }, { "epoch": 3.1979450955209505, "grad_norm": 0.7940091490745544, "learning_rate": 1.126454069423355e-08, "loss": 0.3033, "step": 9960 }, { "epoch": 3.198266174345802, "grad_norm": 0.7244638204574585, "learning_rate": 1.074671007825767e-08, "loss": 0.2531, "step": 9961 }, { "epoch": 3.198587253170653, "grad_norm": 0.6201990842819214, "learning_rate": 1.0241062849503103e-08, "loss": 0.2504, "step": 9962 }, { "epoch": 3.1989083319955047, "grad_norm": 0.5652727484703064, "learning_rate": 9.747599069576119e-09, "loss": 0.2249, "step": 9963 }, { "epoch": 3.1992294108203563, "grad_norm": 0.810356616973877, "learning_rate": 9.2663187986064e-09, "loss": 0.3258, "step": 9964 }, { "epoch": 3.199550489645208, "grad_norm": 0.8009127378463745, "learning_rate": 8.797222095224823e-09, "loss": 0.279, "step": 9965 }, { "epoch": 3.1998715684700594, "grad_norm": 0.5440275073051453, "learning_rate": 8.340309016585668e-09, "loss": 0.2299, "step": 9966 }, { "epoch": 3.200192647294911, "grad_norm": 0.7199362516403198, "learning_rate": 7.895579618388827e-09, "loss": 0.2572, "step": 9967 }, { "epoch": 3.2005137261197625, "grad_norm": 0.5794646739959717, "learning_rate": 7.463033954802078e-09, "loss": 0.2327, "step": 9968 }, { "epoch": 3.200834804944614, "grad_norm": 0.7608715295791626, "learning_rate": 7.042672078527713e-09, "loss": 0.284, "step": 9969 }, { "epoch": 3.2011558837694656, "grad_norm": 0.6678571701049805, "learning_rate": 6.634494040802519e-09, "loss": 0.2936, "step": 9970 }, { "epoch": 3.2014769625943167, "grad_norm": 0.7207247614860535, "learning_rate": 6.238499891353389e-09, "loss": 0.2576, "step": 9971 }, { "epoch": 3.2017980414191682, "grad_norm": 0.7166668176651001, "learning_rate": 5.854689678419512e-09, "loss": 0.3074, "step": 9972 }, { "epoch": 3.20211912024402, "grad_norm": 0.7045150399208069, "learning_rate": 5.483063448785686e-09, "loss": 0.2643, "step": 9973 }, { "epoch": 3.2024401990688713, "grad_norm": 0.8350734114646912, "learning_rate": 5.123621247726806e-09, "loss": 0.2232, "step": 9974 }, { "epoch": 3.202761277893723, "grad_norm": 0.7668434381484985, "learning_rate": 4.776363119030069e-09, "loss": 0.2897, "step": 9975 }, { "epoch": 3.2030823567185744, "grad_norm": 0.4828073978424072, "learning_rate": 4.4412891050171765e-09, "loss": 0.2087, "step": 9976 }, { "epoch": 3.203403435543426, "grad_norm": 0.5834342241287231, "learning_rate": 4.1183992465221315e-09, "loss": 0.238, "step": 9977 }, { "epoch": 3.2037245143682775, "grad_norm": 0.6065945625305176, "learning_rate": 3.807693582869032e-09, "loss": 0.2669, "step": 9978 }, { "epoch": 3.204045593193129, "grad_norm": 1.0055813789367676, "learning_rate": 3.509172151938689e-09, "loss": 0.3039, "step": 9979 }, { "epoch": 3.20436667201798, "grad_norm": 0.6315884590148926, "learning_rate": 3.222834990090906e-09, "loss": 0.2177, "step": 9980 }, { "epoch": 3.2046877508428318, "grad_norm": 0.5141793489456177, "learning_rate": 2.948682132208891e-09, "loss": 0.2139, "step": 9981 }, { "epoch": 3.2050088296676833, "grad_norm": 0.8965477347373962, "learning_rate": 2.6867136117214587e-09, "loss": 0.2669, "step": 9982 }, { "epoch": 3.205329908492535, "grad_norm": 0.4767134487628937, "learning_rate": 2.4369294605253166e-09, "loss": 0.2415, "step": 9983 }, { "epoch": 3.2056509873173864, "grad_norm": 0.5410603880882263, "learning_rate": 2.1993297090627806e-09, "loss": 0.2093, "step": 9984 }, { "epoch": 3.205972066142238, "grad_norm": 0.5572628378868103, "learning_rate": 1.973914386288467e-09, "loss": 0.2392, "step": 9985 }, { "epoch": 3.2062931449670895, "grad_norm": 0.5035524964332581, "learning_rate": 1.7606835196692927e-09, "loss": 0.2171, "step": 9986 }, { "epoch": 3.206614223791941, "grad_norm": 0.4417857229709625, "learning_rate": 1.5596371351733752e-09, "loss": 0.2107, "step": 9987 }, { "epoch": 3.2069353026167926, "grad_norm": 0.646651029586792, "learning_rate": 1.3707752573255405e-09, "loss": 0.2278, "step": 9988 }, { "epoch": 3.2072563814416437, "grad_norm": 0.7533266544342041, "learning_rate": 1.1940979091074056e-09, "loss": 0.3198, "step": 9989 }, { "epoch": 3.2075774602664953, "grad_norm": 0.35283178091049194, "learning_rate": 1.029605112068399e-09, "loss": 0.2025, "step": 9990 }, { "epoch": 3.207898539091347, "grad_norm": 0.4045920670032501, "learning_rate": 8.772968862369446e-10, "loss": 0.2284, "step": 9991 }, { "epoch": 3.2082196179161984, "grad_norm": 0.4130226969718933, "learning_rate": 7.371732501759709e-10, "loss": 0.2453, "step": 9992 }, { "epoch": 3.20854069674105, "grad_norm": 0.35095956921577454, "learning_rate": 6.092342209607083e-10, "loss": 0.2329, "step": 9993 }, { "epoch": 3.2088617755659015, "grad_norm": 0.28531062602996826, "learning_rate": 4.934798141786879e-10, "loss": 0.1969, "step": 9994 }, { "epoch": 3.209182854390753, "grad_norm": 0.4474446475505829, "learning_rate": 3.899100439408443e-10, "loss": 0.5176, "step": 9995 }, { "epoch": 3.2095039332156046, "grad_norm": 0.36056339740753174, "learning_rate": 2.985249228593112e-10, "loss": 0.213, "step": 9996 }, { "epoch": 3.209825012040456, "grad_norm": 0.27942565083503723, "learning_rate": 2.1932446206962553e-10, "loss": 0.1295, "step": 9997 }, { "epoch": 3.2101460908653072, "grad_norm": 0.34559550881385803, "learning_rate": 1.5230867123072756e-10, "loss": 0.169, "step": 9998 }, { "epoch": 3.2104671696901588, "grad_norm": 0.1161576583981514, "learning_rate": 9.74775584916543e-11, "loss": 0.0541, "step": 9999 }, { "epoch": 3.2107882485150103, "grad_norm": 0.46221232414245605, "learning_rate": 5.483113054705058e-11, "loss": 0.1606, "step": 10000 } ], "logging_steps": 1, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.492217387409408e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }