{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9994890883877088, "eval_steps": 500, "global_step": 3424, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005838989854755127, "grad_norm": 0.3390687170317642, "learning_rate": 5.830903790087464e-08, "loss": 0.714, "step": 1 }, { "epoch": 0.0011677979709510255, "grad_norm": 0.3726280622987026, "learning_rate": 1.1661807580174928e-07, "loss": 0.7227, "step": 2 }, { "epoch": 0.0017516969564265383, "grad_norm": 0.308056775893573, "learning_rate": 1.7492711370262392e-07, "loss": 0.6551, "step": 3 }, { "epoch": 0.002335595941902051, "grad_norm": 0.3493627281156298, "learning_rate": 2.3323615160349856e-07, "loss": 0.7156, "step": 4 }, { "epoch": 0.0029194949273775635, "grad_norm": 0.3447101896711025, "learning_rate": 2.915451895043732e-07, "loss": 0.7534, "step": 5 }, { "epoch": 0.0035033939128530766, "grad_norm": 0.3367839005146549, "learning_rate": 3.4985422740524783e-07, "loss": 0.6685, "step": 6 }, { "epoch": 0.004087292898328589, "grad_norm": 0.3445086896525828, "learning_rate": 4.0816326530612243e-07, "loss": 0.7358, "step": 7 }, { "epoch": 0.004671191883804102, "grad_norm": 0.3424433570172469, "learning_rate": 4.6647230320699713e-07, "loss": 0.7051, "step": 8 }, { "epoch": 0.005255090869279615, "grad_norm": 0.37619252069519077, "learning_rate": 5.247813411078718e-07, "loss": 0.7863, "step": 9 }, { "epoch": 0.005838989854755127, "grad_norm": 0.33734000398285563, "learning_rate": 5.830903790087464e-07, "loss": 0.7107, "step": 10 }, { "epoch": 0.00642288884023064, "grad_norm": 0.36528086614959604, "learning_rate": 6.413994169096211e-07, "loss": 0.7198, "step": 11 }, { "epoch": 0.007006787825706153, "grad_norm": 0.3300864452536882, "learning_rate": 6.997084548104957e-07, "loss": 0.6905, "step": 12 }, { "epoch": 0.007590686811181665, "grad_norm": 0.30763664573117133, "learning_rate": 7.580174927113704e-07, "loss": 0.7007, "step": 13 }, { "epoch": 0.008174585796657178, "grad_norm": 0.3141864208998816, "learning_rate": 8.163265306122449e-07, "loss": 0.7274, "step": 14 }, { "epoch": 0.00875848478213269, "grad_norm": 0.2661931039583663, "learning_rate": 8.746355685131196e-07, "loss": 0.6777, "step": 15 }, { "epoch": 0.009342383767608204, "grad_norm": 0.285375076158477, "learning_rate": 9.329446064139943e-07, "loss": 0.6741, "step": 16 }, { "epoch": 0.009926282753083717, "grad_norm": 0.2575491071602376, "learning_rate": 9.91253644314869e-07, "loss": 0.6744, "step": 17 }, { "epoch": 0.01051018173855923, "grad_norm": 0.2574569675605273, "learning_rate": 1.0495626822157436e-06, "loss": 0.6617, "step": 18 }, { "epoch": 0.011094080724034743, "grad_norm": 0.20944983331869532, "learning_rate": 1.1078717201166181e-06, "loss": 0.6726, "step": 19 }, { "epoch": 0.011677979709510254, "grad_norm": 0.18046622826541528, "learning_rate": 1.1661807580174927e-06, "loss": 0.6136, "step": 20 }, { "epoch": 0.012261878694985767, "grad_norm": 0.18487244021099764, "learning_rate": 1.2244897959183673e-06, "loss": 0.6773, "step": 21 }, { "epoch": 0.01284577768046128, "grad_norm": 0.17253589116285345, "learning_rate": 1.2827988338192421e-06, "loss": 0.6687, "step": 22 }, { "epoch": 0.013429676665936793, "grad_norm": 0.176317529078472, "learning_rate": 1.3411078717201167e-06, "loss": 0.6456, "step": 23 }, { "epoch": 0.014013575651412306, "grad_norm": 0.17166278322990755, "learning_rate": 1.3994169096209913e-06, "loss": 0.6404, "step": 24 }, { "epoch": 0.014597474636887818, "grad_norm": 0.16590290431745042, "learning_rate": 1.4577259475218661e-06, "loss": 0.6358, "step": 25 }, { "epoch": 0.01518137362236333, "grad_norm": 0.1627792330607246, "learning_rate": 1.5160349854227407e-06, "loss": 0.6303, "step": 26 }, { "epoch": 0.015765272607838846, "grad_norm": 0.23063585106732154, "learning_rate": 1.5743440233236153e-06, "loss": 0.6781, "step": 27 }, { "epoch": 0.016349171593314355, "grad_norm": 0.25875599411376604, "learning_rate": 1.6326530612244897e-06, "loss": 0.6323, "step": 28 }, { "epoch": 0.016933070578789868, "grad_norm": 0.2445175733204624, "learning_rate": 1.6909620991253645e-06, "loss": 0.614, "step": 29 }, { "epoch": 0.01751696956426538, "grad_norm": 0.25211167764035386, "learning_rate": 1.7492711370262391e-06, "loss": 0.645, "step": 30 }, { "epoch": 0.018100868549740894, "grad_norm": 0.19755348378999926, "learning_rate": 1.8075801749271137e-06, "loss": 0.6288, "step": 31 }, { "epoch": 0.018684767535216407, "grad_norm": 0.20487950220195067, "learning_rate": 1.8658892128279885e-06, "loss": 0.6526, "step": 32 }, { "epoch": 0.01926866652069192, "grad_norm": 0.16366613175734854, "learning_rate": 1.9241982507288633e-06, "loss": 0.59, "step": 33 }, { "epoch": 0.019852565506167433, "grad_norm": 0.15780317691924817, "learning_rate": 1.982507288629738e-06, "loss": 0.6057, "step": 34 }, { "epoch": 0.020436464491642947, "grad_norm": 0.13538774163487852, "learning_rate": 2.0408163265306125e-06, "loss": 0.5556, "step": 35 }, { "epoch": 0.02102036347711846, "grad_norm": 0.1426135006241502, "learning_rate": 2.099125364431487e-06, "loss": 0.581, "step": 36 }, { "epoch": 0.021604262462593973, "grad_norm": 0.15521166963164182, "learning_rate": 2.1574344023323617e-06, "loss": 0.6057, "step": 37 }, { "epoch": 0.022188161448069486, "grad_norm": 0.14348837457159508, "learning_rate": 2.2157434402332363e-06, "loss": 0.5801, "step": 38 }, { "epoch": 0.022772060433544995, "grad_norm": 0.14707552415840563, "learning_rate": 2.274052478134111e-06, "loss": 0.5643, "step": 39 }, { "epoch": 0.02335595941902051, "grad_norm": 0.1566108542279272, "learning_rate": 2.3323615160349855e-06, "loss": 0.5711, "step": 40 }, { "epoch": 0.02393985840449602, "grad_norm": 0.1515284275043642, "learning_rate": 2.39067055393586e-06, "loss": 0.5622, "step": 41 }, { "epoch": 0.024523757389971534, "grad_norm": 0.13886579017110506, "learning_rate": 2.4489795918367347e-06, "loss": 0.5146, "step": 42 }, { "epoch": 0.025107656375447047, "grad_norm": 0.1370383724604386, "learning_rate": 2.5072886297376097e-06, "loss": 0.5625, "step": 43 }, { "epoch": 0.02569155536092256, "grad_norm": 0.15855048958209444, "learning_rate": 2.5655976676384843e-06, "loss": 0.6613, "step": 44 }, { "epoch": 0.026275454346398074, "grad_norm": 0.1362960392350261, "learning_rate": 2.6239067055393585e-06, "loss": 0.5379, "step": 45 }, { "epoch": 0.026859353331873587, "grad_norm": 0.12908968378013574, "learning_rate": 2.6822157434402335e-06, "loss": 0.5433, "step": 46 }, { "epoch": 0.0274432523173491, "grad_norm": 0.13663459914763312, "learning_rate": 2.740524781341108e-06, "loss": 0.6143, "step": 47 }, { "epoch": 0.028027151302824613, "grad_norm": 0.11789674557992422, "learning_rate": 2.7988338192419827e-06, "loss": 0.5244, "step": 48 }, { "epoch": 0.028611050288300126, "grad_norm": 0.14288880812488924, "learning_rate": 2.8571428571428573e-06, "loss": 0.5539, "step": 49 }, { "epoch": 0.029194949273775635, "grad_norm": 0.12249395931139165, "learning_rate": 2.9154518950437323e-06, "loss": 0.5084, "step": 50 }, { "epoch": 0.02977884825925115, "grad_norm": 0.12133647543978174, "learning_rate": 2.9737609329446064e-06, "loss": 0.5478, "step": 51 }, { "epoch": 0.03036274724472666, "grad_norm": 0.1342023883234249, "learning_rate": 3.0320699708454815e-06, "loss": 0.6528, "step": 52 }, { "epoch": 0.030946646230202175, "grad_norm": 0.12550323675634337, "learning_rate": 3.090379008746356e-06, "loss": 0.5011, "step": 53 }, { "epoch": 0.03153054521567769, "grad_norm": 0.11525911853069458, "learning_rate": 3.1486880466472307e-06, "loss": 0.5459, "step": 54 }, { "epoch": 0.0321144442011532, "grad_norm": 0.12167786695046783, "learning_rate": 3.2069970845481052e-06, "loss": 0.5434, "step": 55 }, { "epoch": 0.03269834318662871, "grad_norm": 0.11863761227402488, "learning_rate": 3.2653061224489794e-06, "loss": 0.5112, "step": 56 }, { "epoch": 0.03328224217210422, "grad_norm": 0.1158241464291786, "learning_rate": 3.3236151603498544e-06, "loss": 0.496, "step": 57 }, { "epoch": 0.033866141157579736, "grad_norm": 0.11818433536270222, "learning_rate": 3.381924198250729e-06, "loss": 0.5136, "step": 58 }, { "epoch": 0.03445004014305525, "grad_norm": 0.12251151571717327, "learning_rate": 3.440233236151604e-06, "loss": 0.5191, "step": 59 }, { "epoch": 0.03503393912853076, "grad_norm": 0.1295969800082025, "learning_rate": 3.4985422740524782e-06, "loss": 0.5826, "step": 60 }, { "epoch": 0.035617838114006276, "grad_norm": 0.11974461074759006, "learning_rate": 3.5568513119533532e-06, "loss": 0.5348, "step": 61 }, { "epoch": 0.03620173709948179, "grad_norm": 0.12559435144670525, "learning_rate": 3.6151603498542274e-06, "loss": 0.6036, "step": 62 }, { "epoch": 0.0367856360849573, "grad_norm": 0.1293984124423545, "learning_rate": 3.6734693877551024e-06, "loss": 0.57, "step": 63 }, { "epoch": 0.037369535070432815, "grad_norm": 0.11961581082780358, "learning_rate": 3.731778425655977e-06, "loss": 0.5919, "step": 64 }, { "epoch": 0.03795343405590833, "grad_norm": 0.1298032133513758, "learning_rate": 3.790087463556852e-06, "loss": 0.5202, "step": 65 }, { "epoch": 0.03853733304138384, "grad_norm": 0.12108394196639935, "learning_rate": 3.848396501457727e-06, "loss": 0.4868, "step": 66 }, { "epoch": 0.039121232026859354, "grad_norm": 0.13078679163353937, "learning_rate": 3.906705539358601e-06, "loss": 0.5421, "step": 67 }, { "epoch": 0.03970513101233487, "grad_norm": 0.12629469965461465, "learning_rate": 3.965014577259476e-06, "loss": 0.5435, "step": 68 }, { "epoch": 0.04028902999781038, "grad_norm": 0.11950209724900167, "learning_rate": 4.02332361516035e-06, "loss": 0.5166, "step": 69 }, { "epoch": 0.04087292898328589, "grad_norm": 0.11973821497490046, "learning_rate": 4.081632653061225e-06, "loss": 0.5017, "step": 70 }, { "epoch": 0.041456827968761406, "grad_norm": 0.11575336287075896, "learning_rate": 4.139941690962099e-06, "loss": 0.495, "step": 71 }, { "epoch": 0.04204072695423692, "grad_norm": 0.11322728721519408, "learning_rate": 4.198250728862974e-06, "loss": 0.474, "step": 72 }, { "epoch": 0.04262462593971243, "grad_norm": 0.12112332495807418, "learning_rate": 4.256559766763848e-06, "loss": 0.5067, "step": 73 }, { "epoch": 0.043208524925187945, "grad_norm": 0.12208742555935308, "learning_rate": 4.314868804664723e-06, "loss": 0.5203, "step": 74 }, { "epoch": 0.04379242391066346, "grad_norm": 0.12295972429443812, "learning_rate": 4.3731778425655976e-06, "loss": 0.5442, "step": 75 }, { "epoch": 0.04437632289613897, "grad_norm": 0.11253164808288783, "learning_rate": 4.431486880466473e-06, "loss": 0.4809, "step": 76 }, { "epoch": 0.04496022188161448, "grad_norm": 0.11419453983341185, "learning_rate": 4.489795918367348e-06, "loss": 0.5151, "step": 77 }, { "epoch": 0.04554412086708999, "grad_norm": 0.11956441949047217, "learning_rate": 4.548104956268222e-06, "loss": 0.5054, "step": 78 }, { "epoch": 0.046128019852565504, "grad_norm": 0.11508575067820896, "learning_rate": 4.606413994169097e-06, "loss": 0.4673, "step": 79 }, { "epoch": 0.04671191883804102, "grad_norm": 0.11982209853482434, "learning_rate": 4.664723032069971e-06, "loss": 0.5588, "step": 80 }, { "epoch": 0.04729581782351653, "grad_norm": 0.12479530870671895, "learning_rate": 4.723032069970846e-06, "loss": 0.6101, "step": 81 }, { "epoch": 0.04787971680899204, "grad_norm": 0.11498278285573045, "learning_rate": 4.78134110787172e-06, "loss": 0.5615, "step": 82 }, { "epoch": 0.048463615794467556, "grad_norm": 0.12076457332503471, "learning_rate": 4.839650145772595e-06, "loss": 0.5285, "step": 83 }, { "epoch": 0.04904751477994307, "grad_norm": 0.119074316539934, "learning_rate": 4.897959183673469e-06, "loss": 0.532, "step": 84 }, { "epoch": 0.04963141376541858, "grad_norm": 0.12397965182280622, "learning_rate": 4.956268221574344e-06, "loss": 0.5371, "step": 85 }, { "epoch": 0.050215312750894095, "grad_norm": 0.12536513820651146, "learning_rate": 5.014577259475219e-06, "loss": 0.5451, "step": 86 }, { "epoch": 0.05079921173636961, "grad_norm": 0.11625777310631927, "learning_rate": 5.0728862973760935e-06, "loss": 0.4691, "step": 87 }, { "epoch": 0.05138311072184512, "grad_norm": 0.1207269431731059, "learning_rate": 5.1311953352769686e-06, "loss": 0.5105, "step": 88 }, { "epoch": 0.051967009707320634, "grad_norm": 0.11554973251853357, "learning_rate": 5.189504373177843e-06, "loss": 0.5322, "step": 89 }, { "epoch": 0.05255090869279615, "grad_norm": 0.1296807404979616, "learning_rate": 5.247813411078717e-06, "loss": 0.5459, "step": 90 }, { "epoch": 0.05313480767827166, "grad_norm": 0.12292871905902836, "learning_rate": 5.306122448979593e-06, "loss": 0.4945, "step": 91 }, { "epoch": 0.05371870666374717, "grad_norm": 0.13004063091045007, "learning_rate": 5.364431486880467e-06, "loss": 0.5008, "step": 92 }, { "epoch": 0.054302605649222686, "grad_norm": 0.1179304077310915, "learning_rate": 5.422740524781341e-06, "loss": 0.4609, "step": 93 }, { "epoch": 0.0548865046346982, "grad_norm": 0.129278854511865, "learning_rate": 5.481049562682216e-06, "loss": 0.5234, "step": 94 }, { "epoch": 0.05547040362017371, "grad_norm": 0.11080034511528934, "learning_rate": 5.539358600583091e-06, "loss": 0.4914, "step": 95 }, { "epoch": 0.056054302605649226, "grad_norm": 0.12635983290797684, "learning_rate": 5.597667638483965e-06, "loss": 0.4878, "step": 96 }, { "epoch": 0.05663820159112474, "grad_norm": 0.12138863772673356, "learning_rate": 5.65597667638484e-06, "loss": 0.5497, "step": 97 }, { "epoch": 0.05722210057660025, "grad_norm": 0.12208201294847129, "learning_rate": 5.7142857142857145e-06, "loss": 0.5148, "step": 98 }, { "epoch": 0.05780599956207576, "grad_norm": 0.12653090990527333, "learning_rate": 5.7725947521865895e-06, "loss": 0.5217, "step": 99 }, { "epoch": 0.05838989854755127, "grad_norm": 0.1184760806277104, "learning_rate": 5.8309037900874645e-06, "loss": 0.4714, "step": 100 }, { "epoch": 0.058973797533026784, "grad_norm": 0.11654466972686087, "learning_rate": 5.889212827988339e-06, "loss": 0.5102, "step": 101 }, { "epoch": 0.0595576965185023, "grad_norm": 0.11884911331632855, "learning_rate": 5.947521865889213e-06, "loss": 0.5124, "step": 102 }, { "epoch": 0.06014159550397781, "grad_norm": 0.11862501491844941, "learning_rate": 6.005830903790088e-06, "loss": 0.456, "step": 103 }, { "epoch": 0.06072549448945332, "grad_norm": 0.11694550824443123, "learning_rate": 6.064139941690963e-06, "loss": 0.4825, "step": 104 }, { "epoch": 0.061309393474928836, "grad_norm": 0.11923784758304308, "learning_rate": 6.122448979591837e-06, "loss": 0.5262, "step": 105 }, { "epoch": 0.06189329246040435, "grad_norm": 0.12344966270888437, "learning_rate": 6.180758017492712e-06, "loss": 0.4852, "step": 106 }, { "epoch": 0.06247719144587986, "grad_norm": 0.12114013733602692, "learning_rate": 6.239067055393586e-06, "loss": 0.4723, "step": 107 }, { "epoch": 0.06306109043135538, "grad_norm": 0.12273614882132368, "learning_rate": 6.297376093294461e-06, "loss": 0.4857, "step": 108 }, { "epoch": 0.06364498941683089, "grad_norm": 0.12488618408875408, "learning_rate": 6.355685131195336e-06, "loss": 0.495, "step": 109 }, { "epoch": 0.0642288884023064, "grad_norm": 0.12152245691740408, "learning_rate": 6.4139941690962105e-06, "loss": 0.4643, "step": 110 }, { "epoch": 0.06481278738778191, "grad_norm": 0.1320966260795437, "learning_rate": 6.472303206997085e-06, "loss": 0.5063, "step": 111 }, { "epoch": 0.06539668637325742, "grad_norm": 0.1291047710393746, "learning_rate": 6.530612244897959e-06, "loss": 0.4687, "step": 112 }, { "epoch": 0.06598058535873294, "grad_norm": 0.11819012797893215, "learning_rate": 6.588921282798835e-06, "loss": 0.5016, "step": 113 }, { "epoch": 0.06656448434420845, "grad_norm": 0.12796263071234956, "learning_rate": 6.647230320699709e-06, "loss": 0.4969, "step": 114 }, { "epoch": 0.06714838332968397, "grad_norm": 0.12005574408733562, "learning_rate": 6.705539358600584e-06, "loss": 0.4878, "step": 115 }, { "epoch": 0.06773228231515947, "grad_norm": 0.12878551733880775, "learning_rate": 6.763848396501458e-06, "loss": 0.5249, "step": 116 }, { "epoch": 0.06831618130063499, "grad_norm": 0.12109672267310762, "learning_rate": 6.822157434402333e-06, "loss": 0.5378, "step": 117 }, { "epoch": 0.0689000802861105, "grad_norm": 0.1240405064205781, "learning_rate": 6.880466472303208e-06, "loss": 0.484, "step": 118 }, { "epoch": 0.06948397927158602, "grad_norm": 0.11936579082475554, "learning_rate": 6.938775510204082e-06, "loss": 0.4456, "step": 119 }, { "epoch": 0.07006787825706152, "grad_norm": 0.1324714206696395, "learning_rate": 6.9970845481049564e-06, "loss": 0.4794, "step": 120 }, { "epoch": 0.07065177724253704, "grad_norm": 0.12196202770905257, "learning_rate": 7.055393586005832e-06, "loss": 0.4746, "step": 121 }, { "epoch": 0.07123567622801255, "grad_norm": 0.12696441168274267, "learning_rate": 7.1137026239067065e-06, "loss": 0.5098, "step": 122 }, { "epoch": 0.07181957521348807, "grad_norm": 0.1255190930111032, "learning_rate": 7.172011661807581e-06, "loss": 0.4868, "step": 123 }, { "epoch": 0.07240347419896358, "grad_norm": 0.13646491460270324, "learning_rate": 7.230320699708455e-06, "loss": 0.5198, "step": 124 }, { "epoch": 0.0729873731844391, "grad_norm": 0.13584249941516685, "learning_rate": 7.28862973760933e-06, "loss": 0.5564, "step": 125 }, { "epoch": 0.0735712721699146, "grad_norm": 0.12026217586944773, "learning_rate": 7.346938775510205e-06, "loss": 0.458, "step": 126 }, { "epoch": 0.07415517115539012, "grad_norm": 0.11876502166032897, "learning_rate": 7.40524781341108e-06, "loss": 0.5034, "step": 127 }, { "epoch": 0.07473907014086563, "grad_norm": 0.12301551769890233, "learning_rate": 7.463556851311954e-06, "loss": 0.4906, "step": 128 }, { "epoch": 0.07532296912634115, "grad_norm": 0.1348698481810509, "learning_rate": 7.521865889212828e-06, "loss": 0.5343, "step": 129 }, { "epoch": 0.07590686811181666, "grad_norm": 0.1343186821959265, "learning_rate": 7.580174927113704e-06, "loss": 0.5207, "step": 130 }, { "epoch": 0.07649076709729216, "grad_norm": 0.12274218212246112, "learning_rate": 7.638483965014577e-06, "loss": 0.4692, "step": 131 }, { "epoch": 0.07707466608276768, "grad_norm": 0.12559358118214686, "learning_rate": 7.696793002915453e-06, "loss": 0.5248, "step": 132 }, { "epoch": 0.07765856506824319, "grad_norm": 0.12740483791621776, "learning_rate": 7.755102040816327e-06, "loss": 0.5398, "step": 133 }, { "epoch": 0.07824246405371871, "grad_norm": 0.11841361706845167, "learning_rate": 7.813411078717202e-06, "loss": 0.4607, "step": 134 }, { "epoch": 0.07882636303919421, "grad_norm": 0.12275848211365212, "learning_rate": 7.871720116618077e-06, "loss": 0.491, "step": 135 }, { "epoch": 0.07941026202466973, "grad_norm": 0.1300430132675297, "learning_rate": 7.930029154518952e-06, "loss": 0.483, "step": 136 }, { "epoch": 0.07999416101014524, "grad_norm": 0.13450869922533656, "learning_rate": 7.988338192419826e-06, "loss": 0.5509, "step": 137 }, { "epoch": 0.08057805999562076, "grad_norm": 0.12363811242728294, "learning_rate": 8.0466472303207e-06, "loss": 0.4953, "step": 138 }, { "epoch": 0.08116195898109627, "grad_norm": 0.12776767087179056, "learning_rate": 8.104956268221576e-06, "loss": 0.5421, "step": 139 }, { "epoch": 0.08174585796657179, "grad_norm": 0.12062799229227246, "learning_rate": 8.16326530612245e-06, "loss": 0.4928, "step": 140 }, { "epoch": 0.08232975695204729, "grad_norm": 0.11591284479742185, "learning_rate": 8.221574344023324e-06, "loss": 0.4565, "step": 141 }, { "epoch": 0.08291365593752281, "grad_norm": 0.12253075866775676, "learning_rate": 8.279883381924198e-06, "loss": 0.4565, "step": 142 }, { "epoch": 0.08349755492299832, "grad_norm": 0.12356711120458595, "learning_rate": 8.338192419825074e-06, "loss": 0.5073, "step": 143 }, { "epoch": 0.08408145390847384, "grad_norm": 0.12279945956096004, "learning_rate": 8.396501457725948e-06, "loss": 0.49, "step": 144 }, { "epoch": 0.08466535289394934, "grad_norm": 0.13456201958000596, "learning_rate": 8.454810495626823e-06, "loss": 0.4906, "step": 145 }, { "epoch": 0.08524925187942486, "grad_norm": 0.11754134903383133, "learning_rate": 8.513119533527697e-06, "loss": 0.4523, "step": 146 }, { "epoch": 0.08583315086490037, "grad_norm": 0.118290181561225, "learning_rate": 8.571428571428571e-06, "loss": 0.4604, "step": 147 }, { "epoch": 0.08641704985037589, "grad_norm": 0.12737488238980574, "learning_rate": 8.629737609329447e-06, "loss": 0.4752, "step": 148 }, { "epoch": 0.0870009488358514, "grad_norm": 0.12376039953147717, "learning_rate": 8.688046647230321e-06, "loss": 0.5086, "step": 149 }, { "epoch": 0.08758484782132692, "grad_norm": 0.11346352672036682, "learning_rate": 8.746355685131195e-06, "loss": 0.4368, "step": 150 }, { "epoch": 0.08816874680680242, "grad_norm": 0.11955739896931368, "learning_rate": 8.804664723032071e-06, "loss": 0.4642, "step": 151 }, { "epoch": 0.08875264579227794, "grad_norm": 0.12310637011734082, "learning_rate": 8.862973760932945e-06, "loss": 0.4828, "step": 152 }, { "epoch": 0.08933654477775345, "grad_norm": 0.12490042229172243, "learning_rate": 8.921282798833821e-06, "loss": 0.4826, "step": 153 }, { "epoch": 0.08992044376322895, "grad_norm": 0.12960293150101349, "learning_rate": 8.979591836734695e-06, "loss": 0.4745, "step": 154 }, { "epoch": 0.09050434274870447, "grad_norm": 0.12017674644319651, "learning_rate": 9.03790087463557e-06, "loss": 0.4623, "step": 155 }, { "epoch": 0.09108824173417998, "grad_norm": 0.13276590698712729, "learning_rate": 9.096209912536444e-06, "loss": 0.4989, "step": 156 }, { "epoch": 0.0916721407196555, "grad_norm": 0.12521705641771727, "learning_rate": 9.15451895043732e-06, "loss": 0.5058, "step": 157 }, { "epoch": 0.09225603970513101, "grad_norm": 0.12293962616817236, "learning_rate": 9.212827988338194e-06, "loss": 0.5064, "step": 158 }, { "epoch": 0.09283993869060653, "grad_norm": 0.12526851605944064, "learning_rate": 9.271137026239068e-06, "loss": 0.4945, "step": 159 }, { "epoch": 0.09342383767608203, "grad_norm": 0.12552070875402624, "learning_rate": 9.329446064139942e-06, "loss": 0.5386, "step": 160 }, { "epoch": 0.09400773666155755, "grad_norm": 0.1218113157130629, "learning_rate": 9.387755102040818e-06, "loss": 0.4617, "step": 161 }, { "epoch": 0.09459163564703306, "grad_norm": 0.1178836146453351, "learning_rate": 9.446064139941692e-06, "loss": 0.4384, "step": 162 }, { "epoch": 0.09517553463250858, "grad_norm": 0.119702745135886, "learning_rate": 9.504373177842566e-06, "loss": 0.4867, "step": 163 }, { "epoch": 0.09575943361798409, "grad_norm": 0.12020549094009517, "learning_rate": 9.56268221574344e-06, "loss": 0.4474, "step": 164 }, { "epoch": 0.0963433326034596, "grad_norm": 0.12268230724017845, "learning_rate": 9.620991253644316e-06, "loss": 0.4767, "step": 165 }, { "epoch": 0.09692723158893511, "grad_norm": 0.11172922847217512, "learning_rate": 9.67930029154519e-06, "loss": 0.4559, "step": 166 }, { "epoch": 0.09751113057441063, "grad_norm": 0.13993575481044765, "learning_rate": 9.737609329446065e-06, "loss": 0.5055, "step": 167 }, { "epoch": 0.09809502955988614, "grad_norm": 0.12518964510579816, "learning_rate": 9.795918367346939e-06, "loss": 0.531, "step": 168 }, { "epoch": 0.09867892854536166, "grad_norm": 0.12054295269141461, "learning_rate": 9.854227405247815e-06, "loss": 0.4757, "step": 169 }, { "epoch": 0.09926282753083716, "grad_norm": 0.12399644706558342, "learning_rate": 9.912536443148689e-06, "loss": 0.4898, "step": 170 }, { "epoch": 0.09984672651631268, "grad_norm": 0.118400547184647, "learning_rate": 9.970845481049563e-06, "loss": 0.4148, "step": 171 }, { "epoch": 0.10043062550178819, "grad_norm": 0.13013971903809793, "learning_rate": 1.0029154518950439e-05, "loss": 0.4867, "step": 172 }, { "epoch": 0.10101452448726371, "grad_norm": 0.11897475562175724, "learning_rate": 1.0087463556851313e-05, "loss": 0.4644, "step": 173 }, { "epoch": 0.10159842347273922, "grad_norm": 0.1185794939059071, "learning_rate": 1.0145772594752187e-05, "loss": 0.4889, "step": 174 }, { "epoch": 0.10218232245821472, "grad_norm": 0.1323132258029058, "learning_rate": 1.0204081632653063e-05, "loss": 0.503, "step": 175 }, { "epoch": 0.10276622144369024, "grad_norm": 0.11587676156384519, "learning_rate": 1.0262390670553937e-05, "loss": 0.4927, "step": 176 }, { "epoch": 0.10335012042916575, "grad_norm": 0.12222531599221165, "learning_rate": 1.0320699708454811e-05, "loss": 0.5003, "step": 177 }, { "epoch": 0.10393401941464127, "grad_norm": 0.12411148530586089, "learning_rate": 1.0379008746355685e-05, "loss": 0.4644, "step": 178 }, { "epoch": 0.10451791840011677, "grad_norm": 0.1203039207979106, "learning_rate": 1.043731778425656e-05, "loss": 0.4627, "step": 179 }, { "epoch": 0.1051018173855923, "grad_norm": 0.11593711769961265, "learning_rate": 1.0495626822157434e-05, "loss": 0.4849, "step": 180 }, { "epoch": 0.1056857163710678, "grad_norm": 0.12372317700120196, "learning_rate": 1.0553935860058311e-05, "loss": 0.526, "step": 181 }, { "epoch": 0.10626961535654332, "grad_norm": 0.12404014194985187, "learning_rate": 1.0612244897959186e-05, "loss": 0.5098, "step": 182 }, { "epoch": 0.10685351434201883, "grad_norm": 0.1200099445618, "learning_rate": 1.067055393586006e-05, "loss": 0.4716, "step": 183 }, { "epoch": 0.10743741332749435, "grad_norm": 0.12218359305947715, "learning_rate": 1.0728862973760934e-05, "loss": 0.4605, "step": 184 }, { "epoch": 0.10802131231296985, "grad_norm": 0.12078189981533086, "learning_rate": 1.0787172011661808e-05, "loss": 0.4872, "step": 185 }, { "epoch": 0.10860521129844537, "grad_norm": 0.11374282264956635, "learning_rate": 1.0845481049562682e-05, "loss": 0.5, "step": 186 }, { "epoch": 0.10918911028392088, "grad_norm": 0.12259186585442199, "learning_rate": 1.0903790087463556e-05, "loss": 0.475, "step": 187 }, { "epoch": 0.1097730092693964, "grad_norm": 0.1234565384970318, "learning_rate": 1.0962099125364432e-05, "loss": 0.4614, "step": 188 }, { "epoch": 0.1103569082548719, "grad_norm": 0.11756024120743068, "learning_rate": 1.1020408163265306e-05, "loss": 0.4316, "step": 189 }, { "epoch": 0.11094080724034742, "grad_norm": 0.11810379000586563, "learning_rate": 1.1078717201166182e-05, "loss": 0.4892, "step": 190 }, { "epoch": 0.11152470622582293, "grad_norm": 0.12262140319108787, "learning_rate": 1.1137026239067056e-05, "loss": 0.5062, "step": 191 }, { "epoch": 0.11210860521129845, "grad_norm": 0.11954689792128187, "learning_rate": 1.119533527696793e-05, "loss": 0.4651, "step": 192 }, { "epoch": 0.11269250419677396, "grad_norm": 0.12090152338089157, "learning_rate": 1.1253644314868807e-05, "loss": 0.5301, "step": 193 }, { "epoch": 0.11327640318224948, "grad_norm": 0.11832185126359229, "learning_rate": 1.131195335276968e-05, "loss": 0.4763, "step": 194 }, { "epoch": 0.11386030216772498, "grad_norm": 0.11706295067367173, "learning_rate": 1.1370262390670555e-05, "loss": 0.5078, "step": 195 }, { "epoch": 0.1144442011532005, "grad_norm": 0.11737053753250545, "learning_rate": 1.1428571428571429e-05, "loss": 0.4463, "step": 196 }, { "epoch": 0.11502810013867601, "grad_norm": 0.11705396558705497, "learning_rate": 1.1486880466472303e-05, "loss": 0.4404, "step": 197 }, { "epoch": 0.11561199912415152, "grad_norm": 0.11722340263652692, "learning_rate": 1.1545189504373179e-05, "loss": 0.5247, "step": 198 }, { "epoch": 0.11619589810962704, "grad_norm": 0.11761698668465777, "learning_rate": 1.1603498542274055e-05, "loss": 0.4497, "step": 199 }, { "epoch": 0.11677979709510254, "grad_norm": 0.11900539072170403, "learning_rate": 1.1661807580174929e-05, "loss": 0.4835, "step": 200 }, { "epoch": 0.11736369608057806, "grad_norm": 0.1233465516105401, "learning_rate": 1.1720116618075803e-05, "loss": 0.4957, "step": 201 }, { "epoch": 0.11794759506605357, "grad_norm": 0.12511261049691477, "learning_rate": 1.1778425655976677e-05, "loss": 0.4478, "step": 202 }, { "epoch": 0.11853149405152909, "grad_norm": 0.11321936215742914, "learning_rate": 1.1836734693877552e-05, "loss": 0.4687, "step": 203 }, { "epoch": 0.1191153930370046, "grad_norm": 0.12053064521119089, "learning_rate": 1.1895043731778426e-05, "loss": 0.4581, "step": 204 }, { "epoch": 0.11969929202248011, "grad_norm": 0.11997972732345266, "learning_rate": 1.19533527696793e-05, "loss": 0.4649, "step": 205 }, { "epoch": 0.12028319100795562, "grad_norm": 0.11263799934908944, "learning_rate": 1.2011661807580176e-05, "loss": 0.4269, "step": 206 }, { "epoch": 0.12086708999343114, "grad_norm": 0.11799630315630344, "learning_rate": 1.2069970845481052e-05, "loss": 0.4451, "step": 207 }, { "epoch": 0.12145098897890665, "grad_norm": 0.11526656367535712, "learning_rate": 1.2128279883381926e-05, "loss": 0.4517, "step": 208 }, { "epoch": 0.12203488796438217, "grad_norm": 0.11572999636650731, "learning_rate": 1.21865889212828e-05, "loss": 0.4737, "step": 209 }, { "epoch": 0.12261878694985767, "grad_norm": 0.12632623812513719, "learning_rate": 1.2244897959183674e-05, "loss": 0.469, "step": 210 }, { "epoch": 0.12320268593533319, "grad_norm": 0.12958377011342753, "learning_rate": 1.2303206997084548e-05, "loss": 0.4595, "step": 211 }, { "epoch": 0.1237865849208087, "grad_norm": 0.11466194116909215, "learning_rate": 1.2361516034985424e-05, "loss": 0.4659, "step": 212 }, { "epoch": 0.12437048390628422, "grad_norm": 0.11608545331438934, "learning_rate": 1.2419825072886298e-05, "loss": 0.4448, "step": 213 }, { "epoch": 0.12495438289175972, "grad_norm": 0.12094852540363446, "learning_rate": 1.2478134110787173e-05, "loss": 0.4907, "step": 214 }, { "epoch": 0.12553828187723523, "grad_norm": 0.11446473393375815, "learning_rate": 1.2536443148688047e-05, "loss": 0.4936, "step": 215 }, { "epoch": 0.12612218086271076, "grad_norm": 0.11017316830215937, "learning_rate": 1.2594752186588923e-05, "loss": 0.5023, "step": 216 }, { "epoch": 0.12670607984818627, "grad_norm": 0.1192063388394849, "learning_rate": 1.2653061224489798e-05, "loss": 0.5565, "step": 217 }, { "epoch": 0.12728997883366178, "grad_norm": 0.11363451582936762, "learning_rate": 1.2711370262390673e-05, "loss": 0.4961, "step": 218 }, { "epoch": 0.12787387781913728, "grad_norm": 0.11222740002915803, "learning_rate": 1.2769679300291547e-05, "loss": 0.4658, "step": 219 }, { "epoch": 0.1284577768046128, "grad_norm": 0.12052039707598863, "learning_rate": 1.2827988338192421e-05, "loss": 0.4836, "step": 220 }, { "epoch": 0.12904167579008832, "grad_norm": 0.12982269382821787, "learning_rate": 1.2886297376093295e-05, "loss": 0.5056, "step": 221 }, { "epoch": 0.12962557477556383, "grad_norm": 0.11287880475266633, "learning_rate": 1.294460641399417e-05, "loss": 0.4621, "step": 222 }, { "epoch": 0.13020947376103933, "grad_norm": 0.1248687484152095, "learning_rate": 1.3002915451895044e-05, "loss": 0.4712, "step": 223 }, { "epoch": 0.13079337274651484, "grad_norm": 0.11714455932411486, "learning_rate": 1.3061224489795918e-05, "loss": 0.4484, "step": 224 }, { "epoch": 0.13137727173199037, "grad_norm": 0.10946858656516804, "learning_rate": 1.3119533527696795e-05, "loss": 0.4516, "step": 225 }, { "epoch": 0.13196117071746588, "grad_norm": 0.12202152826585395, "learning_rate": 1.317784256559767e-05, "loss": 0.5305, "step": 226 }, { "epoch": 0.1325450697029414, "grad_norm": 0.11975530880299502, "learning_rate": 1.3236151603498544e-05, "loss": 0.4752, "step": 227 }, { "epoch": 0.1331289686884169, "grad_norm": 0.11593461698821676, "learning_rate": 1.3294460641399418e-05, "loss": 0.4426, "step": 228 }, { "epoch": 0.13371286767389243, "grad_norm": 0.11872019771809456, "learning_rate": 1.3352769679300292e-05, "loss": 0.5095, "step": 229 }, { "epoch": 0.13429676665936793, "grad_norm": 0.12357268353203092, "learning_rate": 1.3411078717201168e-05, "loss": 0.5108, "step": 230 }, { "epoch": 0.13488066564484344, "grad_norm": 0.11630464601308643, "learning_rate": 1.3469387755102042e-05, "loss": 0.4477, "step": 231 }, { "epoch": 0.13546456463031895, "grad_norm": 0.12291726318696333, "learning_rate": 1.3527696793002916e-05, "loss": 0.4719, "step": 232 }, { "epoch": 0.13604846361579448, "grad_norm": 0.1213399024927338, "learning_rate": 1.358600583090379e-05, "loss": 0.4833, "step": 233 }, { "epoch": 0.13663236260126999, "grad_norm": 0.11683012506847132, "learning_rate": 1.3644314868804666e-05, "loss": 0.4841, "step": 234 }, { "epoch": 0.1372162615867455, "grad_norm": 0.12172462775789072, "learning_rate": 1.370262390670554e-05, "loss": 0.5004, "step": 235 }, { "epoch": 0.137800160572221, "grad_norm": 0.11461392058124112, "learning_rate": 1.3760932944606416e-05, "loss": 0.4487, "step": 236 }, { "epoch": 0.13838405955769653, "grad_norm": 0.12996180510567912, "learning_rate": 1.381924198250729e-05, "loss": 0.503, "step": 237 }, { "epoch": 0.13896795854317204, "grad_norm": 0.11513591929876078, "learning_rate": 1.3877551020408165e-05, "loss": 0.4636, "step": 238 }, { "epoch": 0.13955185752864754, "grad_norm": 0.12106025409639329, "learning_rate": 1.3935860058309039e-05, "loss": 0.447, "step": 239 }, { "epoch": 0.14013575651412305, "grad_norm": 0.1209164109547965, "learning_rate": 1.3994169096209913e-05, "loss": 0.4954, "step": 240 }, { "epoch": 0.14071965549959856, "grad_norm": 0.11953164209813787, "learning_rate": 1.4052478134110787e-05, "loss": 0.4757, "step": 241 }, { "epoch": 0.1413035544850741, "grad_norm": 0.11849529642469882, "learning_rate": 1.4110787172011665e-05, "loss": 0.4808, "step": 242 }, { "epoch": 0.1418874534705496, "grad_norm": 0.11875258155273663, "learning_rate": 1.4169096209912539e-05, "loss": 0.4908, "step": 243 }, { "epoch": 0.1424713524560251, "grad_norm": 0.11197694863960142, "learning_rate": 1.4227405247813413e-05, "loss": 0.4568, "step": 244 }, { "epoch": 0.1430552514415006, "grad_norm": 0.11909050279494017, "learning_rate": 1.4285714285714287e-05, "loss": 0.4529, "step": 245 }, { "epoch": 0.14363915042697614, "grad_norm": 0.11758193812189287, "learning_rate": 1.4344023323615161e-05, "loss": 0.4635, "step": 246 }, { "epoch": 0.14422304941245165, "grad_norm": 0.1264053542849704, "learning_rate": 1.4402332361516035e-05, "loss": 0.5341, "step": 247 }, { "epoch": 0.14480694839792715, "grad_norm": 0.11279751078323401, "learning_rate": 1.446064139941691e-05, "loss": 0.456, "step": 248 }, { "epoch": 0.14539084738340266, "grad_norm": 0.11236990136797083, "learning_rate": 1.4518950437317786e-05, "loss": 0.462, "step": 249 }, { "epoch": 0.1459747463688782, "grad_norm": 0.12100570690926585, "learning_rate": 1.457725947521866e-05, "loss": 0.4651, "step": 250 }, { "epoch": 0.1465586453543537, "grad_norm": 0.12441741633161725, "learning_rate": 1.4635568513119536e-05, "loss": 0.4606, "step": 251 }, { "epoch": 0.1471425443398292, "grad_norm": 0.11547262537218297, "learning_rate": 1.469387755102041e-05, "loss": 0.522, "step": 252 }, { "epoch": 0.1477264433253047, "grad_norm": 0.11978752870301916, "learning_rate": 1.4752186588921284e-05, "loss": 0.4973, "step": 253 }, { "epoch": 0.14831034231078025, "grad_norm": 0.12397959385199943, "learning_rate": 1.481049562682216e-05, "loss": 0.4518, "step": 254 }, { "epoch": 0.14889424129625575, "grad_norm": 0.11680063581354053, "learning_rate": 1.4868804664723034e-05, "loss": 0.4854, "step": 255 }, { "epoch": 0.14947814028173126, "grad_norm": 0.1234590944587526, "learning_rate": 1.4927113702623908e-05, "loss": 0.4366, "step": 256 }, { "epoch": 0.15006203926720676, "grad_norm": 0.12380681423095737, "learning_rate": 1.4985422740524782e-05, "loss": 0.4978, "step": 257 }, { "epoch": 0.1506459382526823, "grad_norm": 0.11541254510966559, "learning_rate": 1.5043731778425656e-05, "loss": 0.4473, "step": 258 }, { "epoch": 0.1512298372381578, "grad_norm": 0.11754689710101132, "learning_rate": 1.510204081632653e-05, "loss": 0.4208, "step": 259 }, { "epoch": 0.1518137362236333, "grad_norm": 0.12390984470925882, "learning_rate": 1.5160349854227408e-05, "loss": 0.4841, "step": 260 }, { "epoch": 0.15239763520910882, "grad_norm": 0.11835173736269126, "learning_rate": 1.5218658892128282e-05, "loss": 0.499, "step": 261 }, { "epoch": 0.15298153419458432, "grad_norm": 0.11338430913953951, "learning_rate": 1.5276967930029155e-05, "loss": 0.4405, "step": 262 }, { "epoch": 0.15356543318005986, "grad_norm": 0.11869875320510513, "learning_rate": 1.533527696793003e-05, "loss": 0.4836, "step": 263 }, { "epoch": 0.15414933216553536, "grad_norm": 0.1152692837995628, "learning_rate": 1.5393586005830907e-05, "loss": 0.4412, "step": 264 }, { "epoch": 0.15473323115101087, "grad_norm": 0.11789690071135059, "learning_rate": 1.545189504373178e-05, "loss": 0.5007, "step": 265 }, { "epoch": 0.15531713013648638, "grad_norm": 0.11198362174515086, "learning_rate": 1.5510204081632655e-05, "loss": 0.4448, "step": 266 }, { "epoch": 0.1559010291219619, "grad_norm": 0.1152342776853804, "learning_rate": 1.5568513119533527e-05, "loss": 0.4732, "step": 267 }, { "epoch": 0.15648492810743742, "grad_norm": 0.11627735203148074, "learning_rate": 1.5626822157434403e-05, "loss": 0.4462, "step": 268 }, { "epoch": 0.15706882709291292, "grad_norm": 0.11286425097138769, "learning_rate": 1.568513119533528e-05, "loss": 0.4822, "step": 269 }, { "epoch": 0.15765272607838843, "grad_norm": 0.12389489551777443, "learning_rate": 1.5743440233236155e-05, "loss": 0.4743, "step": 270 }, { "epoch": 0.15823662506386396, "grad_norm": 0.1152154153865144, "learning_rate": 1.5801749271137027e-05, "loss": 0.4828, "step": 271 }, { "epoch": 0.15882052404933947, "grad_norm": 0.10998756875343246, "learning_rate": 1.5860058309037903e-05, "loss": 0.4442, "step": 272 }, { "epoch": 0.15940442303481497, "grad_norm": 0.10978968554961133, "learning_rate": 1.5918367346938776e-05, "loss": 0.5137, "step": 273 }, { "epoch": 0.15998832202029048, "grad_norm": 0.11324882482410067, "learning_rate": 1.597667638483965e-05, "loss": 0.4495, "step": 274 }, { "epoch": 0.160572221005766, "grad_norm": 0.11990429641938329, "learning_rate": 1.6034985422740524e-05, "loss": 0.4838, "step": 275 }, { "epoch": 0.16115611999124152, "grad_norm": 0.12452344273291456, "learning_rate": 1.60932944606414e-05, "loss": 0.473, "step": 276 }, { "epoch": 0.16174001897671703, "grad_norm": 0.11738086481579423, "learning_rate": 1.6151603498542276e-05, "loss": 0.4417, "step": 277 }, { "epoch": 0.16232391796219253, "grad_norm": 0.12249645896597972, "learning_rate": 1.6209912536443152e-05, "loss": 0.4733, "step": 278 }, { "epoch": 0.16290781694766807, "grad_norm": 0.13145053224010636, "learning_rate": 1.6268221574344024e-05, "loss": 0.525, "step": 279 }, { "epoch": 0.16349171593314357, "grad_norm": 0.11369349285860712, "learning_rate": 1.63265306122449e-05, "loss": 0.4585, "step": 280 }, { "epoch": 0.16407561491861908, "grad_norm": 0.12262211724920026, "learning_rate": 1.6384839650145773e-05, "loss": 0.4639, "step": 281 }, { "epoch": 0.16465951390409458, "grad_norm": 0.11451274943954172, "learning_rate": 1.644314868804665e-05, "loss": 0.4471, "step": 282 }, { "epoch": 0.16524341288957012, "grad_norm": 0.11467522191245162, "learning_rate": 1.6501457725947524e-05, "loss": 0.519, "step": 283 }, { "epoch": 0.16582731187504562, "grad_norm": 0.12496049501255566, "learning_rate": 1.6559766763848397e-05, "loss": 0.4536, "step": 284 }, { "epoch": 0.16641121086052113, "grad_norm": 0.11961081955319386, "learning_rate": 1.6618075801749273e-05, "loss": 0.4409, "step": 285 }, { "epoch": 0.16699510984599664, "grad_norm": 0.11535223512014416, "learning_rate": 1.667638483965015e-05, "loss": 0.4846, "step": 286 }, { "epoch": 0.16757900883147214, "grad_norm": 0.11816110977219688, "learning_rate": 1.673469387755102e-05, "loss": 0.4548, "step": 287 }, { "epoch": 0.16816290781694768, "grad_norm": 0.12451491846708722, "learning_rate": 1.6793002915451897e-05, "loss": 0.4925, "step": 288 }, { "epoch": 0.16874680680242318, "grad_norm": 0.12436110912206134, "learning_rate": 1.6851311953352773e-05, "loss": 0.5004, "step": 289 }, { "epoch": 0.1693307057878987, "grad_norm": 0.11586303602712704, "learning_rate": 1.6909620991253645e-05, "loss": 0.4494, "step": 290 }, { "epoch": 0.1699146047733742, "grad_norm": 0.11961653532887162, "learning_rate": 1.696793002915452e-05, "loss": 0.5489, "step": 291 }, { "epoch": 0.17049850375884973, "grad_norm": 0.115865783690524, "learning_rate": 1.7026239067055393e-05, "loss": 0.4888, "step": 292 }, { "epoch": 0.17108240274432523, "grad_norm": 0.12068064592377012, "learning_rate": 1.708454810495627e-05, "loss": 0.4759, "step": 293 }, { "epoch": 0.17166630172980074, "grad_norm": 0.11828824520677259, "learning_rate": 1.7142857142857142e-05, "loss": 0.482, "step": 294 }, { "epoch": 0.17225020071527625, "grad_norm": 0.10766756610976881, "learning_rate": 1.720116618075802e-05, "loss": 0.4666, "step": 295 }, { "epoch": 0.17283409970075178, "grad_norm": 0.11656570625018386, "learning_rate": 1.7259475218658894e-05, "loss": 0.4844, "step": 296 }, { "epoch": 0.1734179986862273, "grad_norm": 0.11290140400692183, "learning_rate": 1.731778425655977e-05, "loss": 0.4367, "step": 297 }, { "epoch": 0.1740018976717028, "grad_norm": 0.10902846393387053, "learning_rate": 1.7376093294460642e-05, "loss": 0.4164, "step": 298 }, { "epoch": 0.1745857966571783, "grad_norm": 0.10604424733393077, "learning_rate": 1.7434402332361518e-05, "loss": 0.4627, "step": 299 }, { "epoch": 0.17516969564265383, "grad_norm": 0.12182853576812966, "learning_rate": 1.749271137026239e-05, "loss": 0.4735, "step": 300 }, { "epoch": 0.17575359462812934, "grad_norm": 0.10760062020124299, "learning_rate": 1.7551020408163266e-05, "loss": 0.4432, "step": 301 }, { "epoch": 0.17633749361360485, "grad_norm": 0.11067377796542106, "learning_rate": 1.7609329446064142e-05, "loss": 0.502, "step": 302 }, { "epoch": 0.17692139259908035, "grad_norm": 0.12215594874809621, "learning_rate": 1.7667638483965014e-05, "loss": 0.5103, "step": 303 }, { "epoch": 0.17750529158455589, "grad_norm": 0.10778694677958567, "learning_rate": 1.772594752186589e-05, "loss": 0.4361, "step": 304 }, { "epoch": 0.1780891905700314, "grad_norm": 0.12137530959399181, "learning_rate": 1.7784256559766766e-05, "loss": 0.4805, "step": 305 }, { "epoch": 0.1786730895555069, "grad_norm": 0.11814909124401246, "learning_rate": 1.7842565597667642e-05, "loss": 0.4858, "step": 306 }, { "epoch": 0.1792569885409824, "grad_norm": 0.12159074211858316, "learning_rate": 1.7900874635568515e-05, "loss": 0.4912, "step": 307 }, { "epoch": 0.1798408875264579, "grad_norm": 0.12154907314358372, "learning_rate": 1.795918367346939e-05, "loss": 0.4533, "step": 308 }, { "epoch": 0.18042478651193344, "grad_norm": 0.10806722335281341, "learning_rate": 1.8017492711370263e-05, "loss": 0.4473, "step": 309 }, { "epoch": 0.18100868549740895, "grad_norm": 0.11713096353510541, "learning_rate": 1.807580174927114e-05, "loss": 0.4062, "step": 310 }, { "epoch": 0.18159258448288446, "grad_norm": 0.11570613536563354, "learning_rate": 1.813411078717201e-05, "loss": 0.4469, "step": 311 }, { "epoch": 0.18217648346835996, "grad_norm": 0.13304589225093824, "learning_rate": 1.8192419825072887e-05, "loss": 0.4612, "step": 312 }, { "epoch": 0.1827603824538355, "grad_norm": 0.12130680446653411, "learning_rate": 1.8250728862973763e-05, "loss": 0.5536, "step": 313 }, { "epoch": 0.183344281439311, "grad_norm": 0.12627692451694655, "learning_rate": 1.830903790087464e-05, "loss": 0.4572, "step": 314 }, { "epoch": 0.1839281804247865, "grad_norm": 0.12024063489224032, "learning_rate": 1.836734693877551e-05, "loss": 0.5018, "step": 315 }, { "epoch": 0.18451207941026201, "grad_norm": 0.11717514721990752, "learning_rate": 1.8425655976676387e-05, "loss": 0.4601, "step": 316 }, { "epoch": 0.18509597839573755, "grad_norm": 0.1333878044628786, "learning_rate": 1.848396501457726e-05, "loss": 0.4921, "step": 317 }, { "epoch": 0.18567987738121305, "grad_norm": 0.12149116037462027, "learning_rate": 1.8542274052478135e-05, "loss": 0.481, "step": 318 }, { "epoch": 0.18626377636668856, "grad_norm": 0.11969394228364705, "learning_rate": 1.8600583090379008e-05, "loss": 0.4827, "step": 319 }, { "epoch": 0.18684767535216407, "grad_norm": 0.11889586690821168, "learning_rate": 1.8658892128279884e-05, "loss": 0.5101, "step": 320 }, { "epoch": 0.1874315743376396, "grad_norm": 0.10796307801395712, "learning_rate": 1.871720116618076e-05, "loss": 0.4538, "step": 321 }, { "epoch": 0.1880154733231151, "grad_norm": 0.11693657304267761, "learning_rate": 1.8775510204081636e-05, "loss": 0.4142, "step": 322 }, { "epoch": 0.1885993723085906, "grad_norm": 0.11782246582053942, "learning_rate": 1.8833819241982508e-05, "loss": 0.4702, "step": 323 }, { "epoch": 0.18918327129406612, "grad_norm": 0.120338495744636, "learning_rate": 1.8892128279883384e-05, "loss": 0.4462, "step": 324 }, { "epoch": 0.18976717027954165, "grad_norm": 0.11786039712773586, "learning_rate": 1.895043731778426e-05, "loss": 0.4772, "step": 325 }, { "epoch": 0.19035106926501716, "grad_norm": 0.13504957443320853, "learning_rate": 1.9008746355685132e-05, "loss": 0.4881, "step": 326 }, { "epoch": 0.19093496825049266, "grad_norm": 0.1135946798242213, "learning_rate": 1.9067055393586008e-05, "loss": 0.4677, "step": 327 }, { "epoch": 0.19151886723596817, "grad_norm": 0.11094104749417454, "learning_rate": 1.912536443148688e-05, "loss": 0.48, "step": 328 }, { "epoch": 0.19210276622144368, "grad_norm": 0.12775052877051457, "learning_rate": 1.9183673469387756e-05, "loss": 0.4779, "step": 329 }, { "epoch": 0.1926866652069192, "grad_norm": 0.11303922866532021, "learning_rate": 1.9241982507288632e-05, "loss": 0.4598, "step": 330 }, { "epoch": 0.19327056419239472, "grad_norm": 0.12084110048597047, "learning_rate": 1.9300291545189508e-05, "loss": 0.517, "step": 331 }, { "epoch": 0.19385446317787022, "grad_norm": 0.11994818785467505, "learning_rate": 1.935860058309038e-05, "loss": 0.4602, "step": 332 }, { "epoch": 0.19443836216334573, "grad_norm": 0.11673480392915012, "learning_rate": 1.9416909620991257e-05, "loss": 0.5048, "step": 333 }, { "epoch": 0.19502226114882126, "grad_norm": 0.11891735238648513, "learning_rate": 1.947521865889213e-05, "loss": 0.4953, "step": 334 }, { "epoch": 0.19560616013429677, "grad_norm": 0.1097352054742793, "learning_rate": 1.9533527696793005e-05, "loss": 0.4793, "step": 335 }, { "epoch": 0.19619005911977228, "grad_norm": 0.11148457609807508, "learning_rate": 1.9591836734693877e-05, "loss": 0.4484, "step": 336 }, { "epoch": 0.19677395810524778, "grad_norm": 0.11895509560626667, "learning_rate": 1.9650145772594753e-05, "loss": 0.5024, "step": 337 }, { "epoch": 0.19735785709072332, "grad_norm": 0.11915751095969412, "learning_rate": 1.970845481049563e-05, "loss": 0.5018, "step": 338 }, { "epoch": 0.19794175607619882, "grad_norm": 0.11532456885878473, "learning_rate": 1.9766763848396505e-05, "loss": 0.4625, "step": 339 }, { "epoch": 0.19852565506167433, "grad_norm": 0.12189908864378027, "learning_rate": 1.9825072886297377e-05, "loss": 0.4736, "step": 340 }, { "epoch": 0.19910955404714983, "grad_norm": 0.1127463295840486, "learning_rate": 1.9883381924198253e-05, "loss": 0.4836, "step": 341 }, { "epoch": 0.19969345303262537, "grad_norm": 0.1260876761389853, "learning_rate": 1.9941690962099126e-05, "loss": 0.5521, "step": 342 }, { "epoch": 0.20027735201810087, "grad_norm": 0.1213461196437235, "learning_rate": 2e-05, "loss": 0.5247, "step": 343 }, { "epoch": 0.20086125100357638, "grad_norm": 0.11656002353246182, "learning_rate": 1.999999480140104e-05, "loss": 0.4812, "step": 344 }, { "epoch": 0.20144514998905189, "grad_norm": 0.1428258500297625, "learning_rate": 1.999997920560957e-05, "loss": 0.476, "step": 345 }, { "epoch": 0.20202904897452742, "grad_norm": 0.12689608371142158, "learning_rate": 1.9999953212641804e-05, "loss": 0.5656, "step": 346 }, { "epoch": 0.20261294796000293, "grad_norm": 0.11586053766713184, "learning_rate": 1.9999916822524766e-05, "loss": 0.4483, "step": 347 }, { "epoch": 0.20319684694547843, "grad_norm": 0.11107862969492441, "learning_rate": 1.999987003529629e-05, "loss": 0.4739, "step": 348 }, { "epoch": 0.20378074593095394, "grad_norm": 0.10838880950062, "learning_rate": 1.9999812851005024e-05, "loss": 0.4544, "step": 349 }, { "epoch": 0.20436464491642944, "grad_norm": 0.11206051221229597, "learning_rate": 1.9999745269710423e-05, "loss": 0.4939, "step": 350 }, { "epoch": 0.20494854390190498, "grad_norm": 0.10875982186135955, "learning_rate": 1.999966729148275e-05, "loss": 0.4603, "step": 351 }, { "epoch": 0.20553244288738048, "grad_norm": 0.1118775117957714, "learning_rate": 1.9999578916403086e-05, "loss": 0.4872, "step": 352 }, { "epoch": 0.206116341872856, "grad_norm": 0.11122509677839064, "learning_rate": 1.9999480144563316e-05, "loss": 0.4479, "step": 353 }, { "epoch": 0.2067002408583315, "grad_norm": 0.10977272254380807, "learning_rate": 1.999937097606613e-05, "loss": 0.4797, "step": 354 }, { "epoch": 0.20728413984380703, "grad_norm": 0.12491553114575843, "learning_rate": 1.9999251411025034e-05, "loss": 0.4637, "step": 355 }, { "epoch": 0.20786803882928254, "grad_norm": 0.1156635215538547, "learning_rate": 1.9999121449564347e-05, "loss": 0.4582, "step": 356 }, { "epoch": 0.20845193781475804, "grad_norm": 0.12295588538154753, "learning_rate": 1.999898109181919e-05, "loss": 0.4972, "step": 357 }, { "epoch": 0.20903583680023355, "grad_norm": 0.10732217964456649, "learning_rate": 1.9998830337935488e-05, "loss": 0.4852, "step": 358 }, { "epoch": 0.20961973578570908, "grad_norm": 0.128288886734554, "learning_rate": 1.9998669188069992e-05, "loss": 0.4697, "step": 359 }, { "epoch": 0.2102036347711846, "grad_norm": 0.11131941608883142, "learning_rate": 1.9998497642390255e-05, "loss": 0.4124, "step": 360 }, { "epoch": 0.2107875337566601, "grad_norm": 0.11242069316400122, "learning_rate": 1.9998315701074624e-05, "loss": 0.4912, "step": 361 }, { "epoch": 0.2113714327421356, "grad_norm": 0.11802433662544858, "learning_rate": 1.999812336431228e-05, "loss": 0.4504, "step": 362 }, { "epoch": 0.21195533172761113, "grad_norm": 0.10991796933364892, "learning_rate": 1.9997920632303192e-05, "loss": 0.4367, "step": 363 }, { "epoch": 0.21253923071308664, "grad_norm": 0.11320472765993722, "learning_rate": 1.9997707505258147e-05, "loss": 0.4509, "step": 364 }, { "epoch": 0.21312312969856215, "grad_norm": 0.11338023784834925, "learning_rate": 1.9997483983398736e-05, "loss": 0.4696, "step": 365 }, { "epoch": 0.21370702868403765, "grad_norm": 0.1155361532924519, "learning_rate": 1.9997250066957357e-05, "loss": 0.4753, "step": 366 }, { "epoch": 0.2142909276695132, "grad_norm": 0.1124998472021198, "learning_rate": 1.9997005756177228e-05, "loss": 0.4937, "step": 367 }, { "epoch": 0.2148748266549887, "grad_norm": 0.10923761396518196, "learning_rate": 1.9996751051312352e-05, "loss": 0.4596, "step": 368 }, { "epoch": 0.2154587256404642, "grad_norm": 0.11077041621312993, "learning_rate": 1.9996485952627554e-05, "loss": 0.5159, "step": 369 }, { "epoch": 0.2160426246259397, "grad_norm": 0.10438147357856653, "learning_rate": 1.9996210460398464e-05, "loss": 0.461, "step": 370 }, { "epoch": 0.2166265236114152, "grad_norm": 0.1091091259199936, "learning_rate": 1.9995924574911516e-05, "loss": 0.4487, "step": 371 }, { "epoch": 0.21721042259689075, "grad_norm": 0.11061223019933858, "learning_rate": 1.9995628296463953e-05, "loss": 0.5056, "step": 372 }, { "epoch": 0.21779432158236625, "grad_norm": 0.10698613243023618, "learning_rate": 1.9995321625363814e-05, "loss": 0.4265, "step": 373 }, { "epoch": 0.21837822056784176, "grad_norm": 0.11697934393864638, "learning_rate": 1.999500456192996e-05, "loss": 0.5011, "step": 374 }, { "epoch": 0.21896211955331726, "grad_norm": 0.12021267426979866, "learning_rate": 1.9994677106492046e-05, "loss": 0.4588, "step": 375 }, { "epoch": 0.2195460185387928, "grad_norm": 0.11607632499234362, "learning_rate": 1.999433925939053e-05, "loss": 0.4603, "step": 376 }, { "epoch": 0.2201299175242683, "grad_norm": 0.10219007779528992, "learning_rate": 1.999399102097668e-05, "loss": 0.448, "step": 377 }, { "epoch": 0.2207138165097438, "grad_norm": 0.11434734043455015, "learning_rate": 1.999363239161257e-05, "loss": 0.5105, "step": 378 }, { "epoch": 0.22129771549521932, "grad_norm": 0.10942121835190848, "learning_rate": 1.9993263371671067e-05, "loss": 0.4484, "step": 379 }, { "epoch": 0.22188161448069485, "grad_norm": 0.11319260919554812, "learning_rate": 1.9992883961535857e-05, "loss": 0.4682, "step": 380 }, { "epoch": 0.22246551346617036, "grad_norm": 0.10917050325430962, "learning_rate": 1.9992494161601414e-05, "loss": 0.4383, "step": 381 }, { "epoch": 0.22304941245164586, "grad_norm": 0.10354946187656626, "learning_rate": 1.999209397227302e-05, "loss": 0.4365, "step": 382 }, { "epoch": 0.22363331143712137, "grad_norm": 0.11509263136023189, "learning_rate": 1.9991683393966764e-05, "loss": 0.4914, "step": 383 }, { "epoch": 0.2242172104225969, "grad_norm": 0.10469796059897106, "learning_rate": 1.9991262427109532e-05, "loss": 0.4763, "step": 384 }, { "epoch": 0.2248011094080724, "grad_norm": 0.1035486116671561, "learning_rate": 1.9990831072139008e-05, "loss": 0.4448, "step": 385 }, { "epoch": 0.22538500839354791, "grad_norm": 0.10510797219352166, "learning_rate": 1.9990389329503685e-05, "loss": 0.4316, "step": 386 }, { "epoch": 0.22596890737902342, "grad_norm": 0.10813119850764312, "learning_rate": 1.9989937199662845e-05, "loss": 0.4361, "step": 387 }, { "epoch": 0.22655280636449895, "grad_norm": 0.10084567441382253, "learning_rate": 1.998947468308658e-05, "loss": 0.4201, "step": 388 }, { "epoch": 0.22713670534997446, "grad_norm": 0.10996567533948841, "learning_rate": 1.9989001780255784e-05, "loss": 0.4238, "step": 389 }, { "epoch": 0.22772060433544997, "grad_norm": 0.12229104045447191, "learning_rate": 1.9988518491662134e-05, "loss": 0.5079, "step": 390 }, { "epoch": 0.22830450332092547, "grad_norm": 0.10553480005131946, "learning_rate": 1.9988024817808116e-05, "loss": 0.4862, "step": 391 }, { "epoch": 0.228888402306401, "grad_norm": 0.10319220917942994, "learning_rate": 1.9987520759207014e-05, "loss": 0.4449, "step": 392 }, { "epoch": 0.2294723012918765, "grad_norm": 0.1096517056665474, "learning_rate": 1.9987006316382913e-05, "loss": 0.4636, "step": 393 }, { "epoch": 0.23005620027735202, "grad_norm": 0.10394154749643401, "learning_rate": 1.9986481489870684e-05, "loss": 0.4609, "step": 394 }, { "epoch": 0.23064009926282752, "grad_norm": 0.10609680328975592, "learning_rate": 1.9985946280215996e-05, "loss": 0.5125, "step": 395 }, { "epoch": 0.23122399824830303, "grad_norm": 0.11741123055864604, "learning_rate": 1.9985400687975325e-05, "loss": 0.4679, "step": 396 }, { "epoch": 0.23180789723377856, "grad_norm": 0.10933128211178324, "learning_rate": 1.998484471371593e-05, "loss": 0.4724, "step": 397 }, { "epoch": 0.23239179621925407, "grad_norm": 0.12405494934932043, "learning_rate": 1.9984278358015867e-05, "loss": 0.4731, "step": 398 }, { "epoch": 0.23297569520472958, "grad_norm": 0.11267358717243843, "learning_rate": 1.998370162146399e-05, "loss": 0.4483, "step": 399 }, { "epoch": 0.23355959419020508, "grad_norm": 0.11639380744676249, "learning_rate": 1.9983114504659943e-05, "loss": 0.4921, "step": 400 }, { "epoch": 0.23414349317568062, "grad_norm": 0.11682509684500603, "learning_rate": 1.998251700821416e-05, "loss": 0.5282, "step": 401 }, { "epoch": 0.23472739216115612, "grad_norm": 0.11097028826192822, "learning_rate": 1.9981909132747875e-05, "loss": 0.4712, "step": 402 }, { "epoch": 0.23531129114663163, "grad_norm": 0.115417228094987, "learning_rate": 1.9981290878893103e-05, "loss": 0.4478, "step": 403 }, { "epoch": 0.23589519013210714, "grad_norm": 0.12161883035798017, "learning_rate": 1.9980662247292657e-05, "loss": 0.5192, "step": 404 }, { "epoch": 0.23647908911758267, "grad_norm": 0.12211455143226724, "learning_rate": 1.998002323860014e-05, "loss": 0.5026, "step": 405 }, { "epoch": 0.23706298810305818, "grad_norm": 0.1062121443778358, "learning_rate": 1.997937385347994e-05, "loss": 0.4618, "step": 406 }, { "epoch": 0.23764688708853368, "grad_norm": 0.11942541003072145, "learning_rate": 1.9978714092607234e-05, "loss": 0.5373, "step": 407 }, { "epoch": 0.2382307860740092, "grad_norm": 0.12340599172187595, "learning_rate": 1.997804395666799e-05, "loss": 0.5614, "step": 408 }, { "epoch": 0.23881468505948472, "grad_norm": 0.10735542853797142, "learning_rate": 1.997736344635896e-05, "loss": 0.4734, "step": 409 }, { "epoch": 0.23939858404496023, "grad_norm": 0.1110059559745934, "learning_rate": 1.997667256238769e-05, "loss": 0.4476, "step": 410 }, { "epoch": 0.23998248303043573, "grad_norm": 0.10479291484352153, "learning_rate": 1.99759713054725e-05, "loss": 0.4801, "step": 411 }, { "epoch": 0.24056638201591124, "grad_norm": 0.12018141756890292, "learning_rate": 1.99752596763425e-05, "loss": 0.492, "step": 412 }, { "epoch": 0.24115028100138677, "grad_norm": 0.11714467638661298, "learning_rate": 1.9974537675737587e-05, "loss": 0.4663, "step": 413 }, { "epoch": 0.24173417998686228, "grad_norm": 0.11144232941773193, "learning_rate": 1.9973805304408437e-05, "loss": 0.4727, "step": 414 }, { "epoch": 0.24231807897233779, "grad_norm": 0.1109162901083814, "learning_rate": 1.9973062563116515e-05, "loss": 0.4701, "step": 415 }, { "epoch": 0.2429019779578133, "grad_norm": 0.10696528676934203, "learning_rate": 1.9972309452634064e-05, "loss": 0.4314, "step": 416 }, { "epoch": 0.2434858769432888, "grad_norm": 0.1058291221432663, "learning_rate": 1.9971545973744102e-05, "loss": 0.4503, "step": 417 }, { "epoch": 0.24406977592876433, "grad_norm": 0.10813304627851558, "learning_rate": 1.997077212724044e-05, "loss": 0.5044, "step": 418 }, { "epoch": 0.24465367491423984, "grad_norm": 0.10651570643927237, "learning_rate": 1.9969987913927657e-05, "loss": 0.4283, "step": 419 }, { "epoch": 0.24523757389971534, "grad_norm": 0.10669366843402281, "learning_rate": 1.9969193334621117e-05, "loss": 0.4417, "step": 420 }, { "epoch": 0.24582147288519085, "grad_norm": 0.1133826730236897, "learning_rate": 1.996838839014696e-05, "loss": 0.502, "step": 421 }, { "epoch": 0.24640537187066638, "grad_norm": 0.10436714779320261, "learning_rate": 1.9967573081342103e-05, "loss": 0.4241, "step": 422 }, { "epoch": 0.2469892708561419, "grad_norm": 0.1085913621740312, "learning_rate": 1.9966747409054235e-05, "loss": 0.4588, "step": 423 }, { "epoch": 0.2475731698416174, "grad_norm": 0.10713201747578016, "learning_rate": 1.996591137414183e-05, "loss": 0.4824, "step": 424 }, { "epoch": 0.2481570688270929, "grad_norm": 0.13252029578878302, "learning_rate": 1.996506497747412e-05, "loss": 0.4394, "step": 425 }, { "epoch": 0.24874096781256844, "grad_norm": 0.10973728237820407, "learning_rate": 1.9964208219931135e-05, "loss": 0.429, "step": 426 }, { "epoch": 0.24932486679804394, "grad_norm": 0.1171719443309564, "learning_rate": 1.9963341102403652e-05, "loss": 0.4488, "step": 427 }, { "epoch": 0.24990876578351945, "grad_norm": 0.12412511539615693, "learning_rate": 1.996246362579323e-05, "loss": 0.5337, "step": 428 }, { "epoch": 0.25049266476899495, "grad_norm": 0.1085405667288723, "learning_rate": 1.99615757910122e-05, "loss": 0.4693, "step": 429 }, { "epoch": 0.25107656375447046, "grad_norm": 0.10911307576621807, "learning_rate": 1.9960677598983672e-05, "loss": 0.4658, "step": 430 }, { "epoch": 0.25166046273994597, "grad_norm": 1.8738780868150282, "learning_rate": 1.9959769050641498e-05, "loss": 0.4367, "step": 431 }, { "epoch": 0.25224436172542153, "grad_norm": 0.10778428216057653, "learning_rate": 1.9958850146930326e-05, "loss": 0.4976, "step": 432 }, { "epoch": 0.25282826071089703, "grad_norm": 0.10713233151118666, "learning_rate": 1.9957920888805548e-05, "loss": 0.4497, "step": 433 }, { "epoch": 0.25341215969637254, "grad_norm": 0.11619893208910013, "learning_rate": 1.9956981277233342e-05, "loss": 0.4564, "step": 434 }, { "epoch": 0.25399605868184805, "grad_norm": 0.11047533345331857, "learning_rate": 1.9956031313190634e-05, "loss": 0.4528, "step": 435 }, { "epoch": 0.25457995766732355, "grad_norm": 0.1059618114468681, "learning_rate": 1.9955070997665122e-05, "loss": 0.4474, "step": 436 }, { "epoch": 0.25516385665279906, "grad_norm": 0.11212241830288644, "learning_rate": 1.9954100331655265e-05, "loss": 0.4483, "step": 437 }, { "epoch": 0.25574775563827457, "grad_norm": 0.10612248965067383, "learning_rate": 1.9953119316170286e-05, "loss": 0.4877, "step": 438 }, { "epoch": 0.25633165462375007, "grad_norm": 0.10694194573291274, "learning_rate": 1.9952127952230166e-05, "loss": 0.4317, "step": 439 }, { "epoch": 0.2569155536092256, "grad_norm": 0.11864227083301106, "learning_rate": 1.995112624086564e-05, "loss": 0.4545, "step": 440 }, { "epoch": 0.25749945259470114, "grad_norm": 0.10438866034774671, "learning_rate": 1.9950114183118215e-05, "loss": 0.4727, "step": 441 }, { "epoch": 0.25808335158017665, "grad_norm": 0.11028283404059863, "learning_rate": 1.9949091780040143e-05, "loss": 0.4636, "step": 442 }, { "epoch": 0.25866725056565215, "grad_norm": 0.11711015887864364, "learning_rate": 1.9948059032694432e-05, "loss": 0.4722, "step": 443 }, { "epoch": 0.25925114955112766, "grad_norm": 0.1050038469106591, "learning_rate": 1.9947015942154864e-05, "loss": 0.5205, "step": 444 }, { "epoch": 0.25983504853660316, "grad_norm": 0.11040242063181632, "learning_rate": 1.994596250950595e-05, "loss": 0.4431, "step": 445 }, { "epoch": 0.26041894752207867, "grad_norm": 0.11161803513596398, "learning_rate": 1.9944898735842963e-05, "loss": 0.4875, "step": 446 }, { "epoch": 0.2610028465075542, "grad_norm": 0.11853658202584973, "learning_rate": 1.9943824622271934e-05, "loss": 0.5009, "step": 447 }, { "epoch": 0.2615867454930297, "grad_norm": 0.12224889593310367, "learning_rate": 1.9942740169909643e-05, "loss": 0.4946, "step": 448 }, { "epoch": 0.26217064447850524, "grad_norm": 0.11616516375952994, "learning_rate": 1.9941645379883613e-05, "loss": 0.4572, "step": 449 }, { "epoch": 0.26275454346398075, "grad_norm": 0.10583270363644827, "learning_rate": 1.9940540253332118e-05, "loss": 0.4623, "step": 450 }, { "epoch": 0.26333844244945626, "grad_norm": 0.12950113366468585, "learning_rate": 1.993942479140418e-05, "loss": 0.5186, "step": 451 }, { "epoch": 0.26392234143493176, "grad_norm": 0.10379866276281738, "learning_rate": 1.993829899525957e-05, "loss": 0.4609, "step": 452 }, { "epoch": 0.26450624042040727, "grad_norm": 0.11203799824876451, "learning_rate": 1.99371628660688e-05, "loss": 0.5055, "step": 453 }, { "epoch": 0.2650901394058828, "grad_norm": 0.10279962252889904, "learning_rate": 1.9936016405013117e-05, "loss": 0.4184, "step": 454 }, { "epoch": 0.2656740383913583, "grad_norm": 0.11286852131830184, "learning_rate": 1.9934859613284535e-05, "loss": 0.4448, "step": 455 }, { "epoch": 0.2662579373768338, "grad_norm": 0.10205253887493762, "learning_rate": 1.993369249208578e-05, "loss": 0.4367, "step": 456 }, { "epoch": 0.26684183636230935, "grad_norm": 0.11786100521772123, "learning_rate": 1.9932515042630335e-05, "loss": 0.5077, "step": 457 }, { "epoch": 0.26742573534778485, "grad_norm": 0.09919034250799569, "learning_rate": 1.9931327266142425e-05, "loss": 0.4456, "step": 458 }, { "epoch": 0.26800963433326036, "grad_norm": 0.1084320862325094, "learning_rate": 1.9930129163856992e-05, "loss": 0.4627, "step": 459 }, { "epoch": 0.26859353331873587, "grad_norm": 0.11524416064690532, "learning_rate": 1.9928920737019735e-05, "loss": 0.4939, "step": 460 }, { "epoch": 0.2691774323042114, "grad_norm": 0.11840793278968491, "learning_rate": 1.9927701986887077e-05, "loss": 0.4632, "step": 461 }, { "epoch": 0.2697613312896869, "grad_norm": 0.11005853274151675, "learning_rate": 1.9926472914726177e-05, "loss": 0.4191, "step": 462 }, { "epoch": 0.2703452302751624, "grad_norm": 0.10219704013723316, "learning_rate": 1.9925233521814926e-05, "loss": 0.4469, "step": 463 }, { "epoch": 0.2709291292606379, "grad_norm": 0.1024755965594187, "learning_rate": 1.9923983809441945e-05, "loss": 0.486, "step": 464 }, { "epoch": 0.2715130282461134, "grad_norm": 0.10409250622292944, "learning_rate": 1.9922723778906583e-05, "loss": 0.4611, "step": 465 }, { "epoch": 0.27209692723158896, "grad_norm": 0.10228853624802904, "learning_rate": 1.9921453431518923e-05, "loss": 0.4942, "step": 466 }, { "epoch": 0.27268082621706446, "grad_norm": 0.11243190553062415, "learning_rate": 1.9920172768599763e-05, "loss": 0.4722, "step": 467 }, { "epoch": 0.27326472520253997, "grad_norm": 0.10073329218602883, "learning_rate": 1.991888179148064e-05, "loss": 0.4623, "step": 468 }, { "epoch": 0.2738486241880155, "grad_norm": 0.11131990378209565, "learning_rate": 1.991758050150381e-05, "loss": 0.4869, "step": 469 }, { "epoch": 0.274432523173491, "grad_norm": 0.10990640624488181, "learning_rate": 1.991626890002224e-05, "loss": 0.4531, "step": 470 }, { "epoch": 0.2750164221589665, "grad_norm": 0.10626074369429182, "learning_rate": 1.9914946988399636e-05, "loss": 0.4874, "step": 471 }, { "epoch": 0.275600321144442, "grad_norm": 0.1087962855530405, "learning_rate": 1.9913614768010418e-05, "loss": 0.4627, "step": 472 }, { "epoch": 0.2761842201299175, "grad_norm": 0.10576239992610106, "learning_rate": 1.9912272240239715e-05, "loss": 0.3997, "step": 473 }, { "epoch": 0.27676811911539306, "grad_norm": 0.11086702063113658, "learning_rate": 1.9910919406483384e-05, "loss": 0.4546, "step": 474 }, { "epoch": 0.27735201810086857, "grad_norm": 0.11173241085903846, "learning_rate": 1.9909556268147995e-05, "loss": 0.4676, "step": 475 }, { "epoch": 0.2779359170863441, "grad_norm": 0.10529999702092804, "learning_rate": 1.990818282665082e-05, "loss": 0.4043, "step": 476 }, { "epoch": 0.2785198160718196, "grad_norm": 0.10377592781166715, "learning_rate": 1.9906799083419865e-05, "loss": 0.4648, "step": 477 }, { "epoch": 0.2791037150572951, "grad_norm": 0.1182955643865831, "learning_rate": 1.9905405039893827e-05, "loss": 0.4864, "step": 478 }, { "epoch": 0.2796876140427706, "grad_norm": 0.11189395406446798, "learning_rate": 1.9904000697522126e-05, "loss": 0.4128, "step": 479 }, { "epoch": 0.2802715130282461, "grad_norm": 0.1090488694258685, "learning_rate": 1.9902586057764882e-05, "loss": 0.4742, "step": 480 }, { "epoch": 0.2808554120137216, "grad_norm": 0.11243214486782586, "learning_rate": 1.9901161122092923e-05, "loss": 0.465, "step": 481 }, { "epoch": 0.2814393109991971, "grad_norm": 0.10119377409263833, "learning_rate": 1.9899725891987788e-05, "loss": 0.4883, "step": 482 }, { "epoch": 0.2820232099846727, "grad_norm": 0.1108291503826155, "learning_rate": 1.9898280368941708e-05, "loss": 0.407, "step": 483 }, { "epoch": 0.2826071089701482, "grad_norm": 0.10990913699438572, "learning_rate": 1.989682455445762e-05, "loss": 0.4113, "step": 484 }, { "epoch": 0.2831910079556237, "grad_norm": 0.10868682878955571, "learning_rate": 1.9895358450049175e-05, "loss": 0.495, "step": 485 }, { "epoch": 0.2837749069410992, "grad_norm": 0.1093718613211243, "learning_rate": 1.9893882057240698e-05, "loss": 0.4794, "step": 486 }, { "epoch": 0.2843588059265747, "grad_norm": 0.10870682167455632, "learning_rate": 1.989239537756723e-05, "loss": 0.4378, "step": 487 }, { "epoch": 0.2849427049120502, "grad_norm": 0.0909582780743866, "learning_rate": 1.98908984125745e-05, "loss": 0.4172, "step": 488 }, { "epoch": 0.2855266038975257, "grad_norm": 0.11761559453843998, "learning_rate": 1.9889391163818935e-05, "loss": 0.479, "step": 489 }, { "epoch": 0.2861105028830012, "grad_norm": 0.11032481187120695, "learning_rate": 1.9887873632867645e-05, "loss": 0.5029, "step": 490 }, { "epoch": 0.2866944018684768, "grad_norm": 0.09734767045483288, "learning_rate": 1.9886345821298442e-05, "loss": 0.4645, "step": 491 }, { "epoch": 0.2872783008539523, "grad_norm": 0.10206970240800319, "learning_rate": 1.988480773069982e-05, "loss": 0.4468, "step": 492 }, { "epoch": 0.2878621998394278, "grad_norm": 0.10876720779357976, "learning_rate": 1.9883259362670967e-05, "loss": 0.4794, "step": 493 }, { "epoch": 0.2884460988249033, "grad_norm": 0.09957518481760824, "learning_rate": 1.9881700718821744e-05, "loss": 0.4387, "step": 494 }, { "epoch": 0.2890299978103788, "grad_norm": 0.10801588094552271, "learning_rate": 1.988013180077271e-05, "loss": 0.4406, "step": 495 }, { "epoch": 0.2896138967958543, "grad_norm": 0.10532391049200143, "learning_rate": 1.9878552610155096e-05, "loss": 0.4578, "step": 496 }, { "epoch": 0.2901977957813298, "grad_norm": 0.1018635035739591, "learning_rate": 1.987696314861082e-05, "loss": 0.452, "step": 497 }, { "epoch": 0.2907816947668053, "grad_norm": 0.10503376884396284, "learning_rate": 1.9875363417792477e-05, "loss": 0.4867, "step": 498 }, { "epoch": 0.2913655937522809, "grad_norm": 0.10593117897926324, "learning_rate": 1.9873753419363336e-05, "loss": 0.4713, "step": 499 }, { "epoch": 0.2919494927377564, "grad_norm": 0.10795441414894086, "learning_rate": 1.9872133154997345e-05, "loss": 0.4998, "step": 500 }, { "epoch": 0.2925333917232319, "grad_norm": 0.10745025362692856, "learning_rate": 1.9870502626379127e-05, "loss": 0.4603, "step": 501 }, { "epoch": 0.2931172907087074, "grad_norm": 0.10338101242002788, "learning_rate": 1.986886183520398e-05, "loss": 0.4676, "step": 502 }, { "epoch": 0.2937011896941829, "grad_norm": 0.10368791964332882, "learning_rate": 1.9867210783177857e-05, "loss": 0.473, "step": 503 }, { "epoch": 0.2942850886796584, "grad_norm": 0.10866188933819464, "learning_rate": 1.986554947201739e-05, "loss": 0.4224, "step": 504 }, { "epoch": 0.2948689876651339, "grad_norm": 0.09789800195462658, "learning_rate": 1.9863877903449883e-05, "loss": 0.4453, "step": 505 }, { "epoch": 0.2954528866506094, "grad_norm": 0.1060949768002253, "learning_rate": 1.9862196079213298e-05, "loss": 0.4579, "step": 506 }, { "epoch": 0.29603678563608493, "grad_norm": 0.10859878585114753, "learning_rate": 1.986050400105626e-05, "loss": 0.4669, "step": 507 }, { "epoch": 0.2966206846215605, "grad_norm": 0.10502936170037692, "learning_rate": 1.9858801670738052e-05, "loss": 0.4578, "step": 508 }, { "epoch": 0.297204583607036, "grad_norm": 0.0985864630617534, "learning_rate": 1.9857089090028628e-05, "loss": 0.5119, "step": 509 }, { "epoch": 0.2977884825925115, "grad_norm": 0.10851546594767544, "learning_rate": 1.9855366260708586e-05, "loss": 0.4572, "step": 510 }, { "epoch": 0.298372381577987, "grad_norm": 0.10011483729909533, "learning_rate": 1.9853633184569187e-05, "loss": 0.4565, "step": 511 }, { "epoch": 0.2989562805634625, "grad_norm": 0.10045191488708473, "learning_rate": 1.9851889863412347e-05, "loss": 0.482, "step": 512 }, { "epoch": 0.299540179548938, "grad_norm": 0.11099591077961109, "learning_rate": 1.985013629905063e-05, "loss": 0.4396, "step": 513 }, { "epoch": 0.30012407853441353, "grad_norm": 0.11617281007963981, "learning_rate": 1.9848372493307253e-05, "loss": 0.4003, "step": 514 }, { "epoch": 0.30070797751988904, "grad_norm": 0.10803420361899901, "learning_rate": 1.9846598448016077e-05, "loss": 0.4657, "step": 515 }, { "epoch": 0.3012918765053646, "grad_norm": 0.10740114324565991, "learning_rate": 1.984481416502161e-05, "loss": 0.4643, "step": 516 }, { "epoch": 0.3018757754908401, "grad_norm": 0.10448276172791615, "learning_rate": 1.9843019646179014e-05, "loss": 0.465, "step": 517 }, { "epoch": 0.3024596744763156, "grad_norm": 0.10038510048687833, "learning_rate": 1.984121489335408e-05, "loss": 0.4541, "step": 518 }, { "epoch": 0.3030435734617911, "grad_norm": 0.10461722749634865, "learning_rate": 1.9839399908423248e-05, "loss": 0.4566, "step": 519 }, { "epoch": 0.3036274724472666, "grad_norm": 0.10657475629046335, "learning_rate": 1.983757469327359e-05, "loss": 0.4543, "step": 520 }, { "epoch": 0.30421137143274213, "grad_norm": 0.1155240548791097, "learning_rate": 1.983573924980282e-05, "loss": 0.4918, "step": 521 }, { "epoch": 0.30479527041821763, "grad_norm": 0.10229802542467423, "learning_rate": 1.983389357991929e-05, "loss": 0.4934, "step": 522 }, { "epoch": 0.30537916940369314, "grad_norm": 0.08946631174403587, "learning_rate": 1.9832037685541973e-05, "loss": 0.3864, "step": 523 }, { "epoch": 0.30596306838916865, "grad_norm": 0.1111394179040024, "learning_rate": 1.983017156860048e-05, "loss": 0.4866, "step": 524 }, { "epoch": 0.3065469673746442, "grad_norm": 0.09861437303566072, "learning_rate": 1.9828295231035054e-05, "loss": 0.4645, "step": 525 }, { "epoch": 0.3071308663601197, "grad_norm": 0.10523272476319889, "learning_rate": 1.9826408674796552e-05, "loss": 0.4723, "step": 526 }, { "epoch": 0.3077147653455952, "grad_norm": 0.11284379698328248, "learning_rate": 1.9824511901846475e-05, "loss": 0.4811, "step": 527 }, { "epoch": 0.3082986643310707, "grad_norm": 0.09895986674087091, "learning_rate": 1.9822604914156927e-05, "loss": 0.4337, "step": 528 }, { "epoch": 0.30888256331654623, "grad_norm": 0.11398226426825173, "learning_rate": 1.982068771371064e-05, "loss": 0.4982, "step": 529 }, { "epoch": 0.30946646230202174, "grad_norm": 0.10378078659754719, "learning_rate": 1.9818760302500976e-05, "loss": 0.4891, "step": 530 }, { "epoch": 0.31005036128749724, "grad_norm": 0.09948832402615183, "learning_rate": 1.9816822682531888e-05, "loss": 0.4428, "step": 531 }, { "epoch": 0.31063426027297275, "grad_norm": 0.10733086027443259, "learning_rate": 1.981487485581797e-05, "loss": 0.4223, "step": 532 }, { "epoch": 0.3112181592584483, "grad_norm": 0.0979828733459516, "learning_rate": 1.9812916824384406e-05, "loss": 0.4082, "step": 533 }, { "epoch": 0.3118020582439238, "grad_norm": 0.10385897612744865, "learning_rate": 1.9810948590267013e-05, "loss": 0.4638, "step": 534 }, { "epoch": 0.3123859572293993, "grad_norm": 0.1039170930940154, "learning_rate": 1.9808970155512187e-05, "loss": 0.455, "step": 535 }, { "epoch": 0.31296985621487483, "grad_norm": 0.10663732531315313, "learning_rate": 1.9806981522176957e-05, "loss": 0.4694, "step": 536 }, { "epoch": 0.31355375520035034, "grad_norm": 0.10082097080297628, "learning_rate": 1.9804982692328944e-05, "loss": 0.4584, "step": 537 }, { "epoch": 0.31413765418582584, "grad_norm": 0.10179554957938358, "learning_rate": 1.9802973668046364e-05, "loss": 0.4488, "step": 538 }, { "epoch": 0.31472155317130135, "grad_norm": 0.10593770944655213, "learning_rate": 1.9800954451418044e-05, "loss": 0.5161, "step": 539 }, { "epoch": 0.31530545215677686, "grad_norm": 0.10638160183880245, "learning_rate": 1.9798925044543402e-05, "loss": 0.4884, "step": 540 }, { "epoch": 0.3158893511422524, "grad_norm": 0.09796903270686329, "learning_rate": 1.9796885449532456e-05, "loss": 0.4889, "step": 541 }, { "epoch": 0.3164732501277279, "grad_norm": 0.10231789006111408, "learning_rate": 1.979483566850581e-05, "loss": 0.4592, "step": 542 }, { "epoch": 0.31705714911320343, "grad_norm": 0.105217543844321, "learning_rate": 1.9792775703594663e-05, "loss": 0.4524, "step": 543 }, { "epoch": 0.31764104809867894, "grad_norm": 0.10130880263174068, "learning_rate": 1.97907055569408e-05, "loss": 0.4282, "step": 544 }, { "epoch": 0.31822494708415444, "grad_norm": 0.09774971230126955, "learning_rate": 1.9788625230696596e-05, "loss": 0.4518, "step": 545 }, { "epoch": 0.31880884606962995, "grad_norm": 0.10968141780722504, "learning_rate": 1.9786534727025005e-05, "loss": 0.485, "step": 546 }, { "epoch": 0.31939274505510545, "grad_norm": 0.10404750193717321, "learning_rate": 1.9784434048099565e-05, "loss": 0.505, "step": 547 }, { "epoch": 0.31997664404058096, "grad_norm": 0.09582219916807343, "learning_rate": 1.9782323196104395e-05, "loss": 0.4281, "step": 548 }, { "epoch": 0.32056054302605647, "grad_norm": 0.09607285944923885, "learning_rate": 1.978020217323419e-05, "loss": 0.456, "step": 549 }, { "epoch": 0.321144442011532, "grad_norm": 0.10320977924294053, "learning_rate": 1.9778070981694216e-05, "loss": 0.481, "step": 550 }, { "epoch": 0.32172834099700753, "grad_norm": 0.10034562190690727, "learning_rate": 1.9775929623700318e-05, "loss": 0.4637, "step": 551 }, { "epoch": 0.32231223998248304, "grad_norm": 0.09656113366743427, "learning_rate": 1.9773778101478908e-05, "loss": 0.4472, "step": 552 }, { "epoch": 0.32289613896795855, "grad_norm": 0.10006754333895095, "learning_rate": 1.9771616417266966e-05, "loss": 0.4041, "step": 553 }, { "epoch": 0.32348003795343405, "grad_norm": 0.12693718311082727, "learning_rate": 1.976944457331204e-05, "loss": 0.5491, "step": 554 }, { "epoch": 0.32406393693890956, "grad_norm": 0.09812077371229931, "learning_rate": 1.976726257187223e-05, "loss": 0.4366, "step": 555 }, { "epoch": 0.32464783592438506, "grad_norm": 0.10426920885816618, "learning_rate": 1.9765070415216218e-05, "loss": 0.4353, "step": 556 }, { "epoch": 0.32523173490986057, "grad_norm": 0.0950033628071587, "learning_rate": 1.976286810562323e-05, "loss": 0.4279, "step": 557 }, { "epoch": 0.32581563389533613, "grad_norm": 0.11114642869801301, "learning_rate": 1.976065564538304e-05, "loss": 0.4562, "step": 558 }, { "epoch": 0.32639953288081164, "grad_norm": 0.11243106271201976, "learning_rate": 1.9758433036796003e-05, "loss": 0.4733, "step": 559 }, { "epoch": 0.32698343186628714, "grad_norm": 0.10281089581894261, "learning_rate": 1.9756200282173e-05, "loss": 0.4182, "step": 560 }, { "epoch": 0.32756733085176265, "grad_norm": 0.10111071863510909, "learning_rate": 1.975395738383547e-05, "loss": 0.4375, "step": 561 }, { "epoch": 0.32815122983723816, "grad_norm": 0.1130276699063671, "learning_rate": 1.9751704344115402e-05, "loss": 0.4549, "step": 562 }, { "epoch": 0.32873512882271366, "grad_norm": 0.09891327338221213, "learning_rate": 1.9749441165355322e-05, "loss": 0.4397, "step": 563 }, { "epoch": 0.32931902780818917, "grad_norm": 0.10046774329372109, "learning_rate": 1.9747167849908305e-05, "loss": 0.4518, "step": 564 }, { "epoch": 0.3299029267936647, "grad_norm": 0.09850693448155942, "learning_rate": 1.974488440013796e-05, "loss": 0.5, "step": 565 }, { "epoch": 0.33048682577914024, "grad_norm": 0.10169347896786042, "learning_rate": 1.9742590818418435e-05, "loss": 0.4386, "step": 566 }, { "epoch": 0.33107072476461574, "grad_norm": 0.09995200338447215, "learning_rate": 1.9740287107134417e-05, "loss": 0.4551, "step": 567 }, { "epoch": 0.33165462375009125, "grad_norm": 0.1021532890724786, "learning_rate": 1.9737973268681117e-05, "loss": 0.4373, "step": 568 }, { "epoch": 0.33223852273556675, "grad_norm": 0.10807841388857137, "learning_rate": 1.9735649305464274e-05, "loss": 0.4734, "step": 569 }, { "epoch": 0.33282242172104226, "grad_norm": 0.09986271582875712, "learning_rate": 1.9733315219900165e-05, "loss": 0.4689, "step": 570 }, { "epoch": 0.33340632070651777, "grad_norm": 0.09957883599216254, "learning_rate": 1.9730971014415585e-05, "loss": 0.4487, "step": 571 }, { "epoch": 0.3339902196919933, "grad_norm": 0.09839494817158226, "learning_rate": 1.9728616691447845e-05, "loss": 0.4419, "step": 572 }, { "epoch": 0.3345741186774688, "grad_norm": 0.11064121527768107, "learning_rate": 1.9726252253444784e-05, "loss": 0.4445, "step": 573 }, { "epoch": 0.3351580176629443, "grad_norm": 0.10482830694113973, "learning_rate": 1.9723877702864758e-05, "loss": 0.4961, "step": 574 }, { "epoch": 0.33574191664841985, "grad_norm": 0.10378126478026392, "learning_rate": 1.9721493042176632e-05, "loss": 0.4469, "step": 575 }, { "epoch": 0.33632581563389535, "grad_norm": 0.10996152031666606, "learning_rate": 1.9719098273859782e-05, "loss": 0.4785, "step": 576 }, { "epoch": 0.33690971461937086, "grad_norm": 0.10597918797813792, "learning_rate": 1.97166934004041e-05, "loss": 0.4686, "step": 577 }, { "epoch": 0.33749361360484637, "grad_norm": 0.09824585305338057, "learning_rate": 1.9714278424309983e-05, "loss": 0.4309, "step": 578 }, { "epoch": 0.33807751259032187, "grad_norm": 0.09589357287061442, "learning_rate": 1.971185334808832e-05, "loss": 0.4132, "step": 579 }, { "epoch": 0.3386614115757974, "grad_norm": 0.10274805762379925, "learning_rate": 1.9709418174260523e-05, "loss": 0.4846, "step": 580 }, { "epoch": 0.3392453105612729, "grad_norm": 0.11107268851282005, "learning_rate": 1.970697290535848e-05, "loss": 0.4584, "step": 581 }, { "epoch": 0.3398292095467484, "grad_norm": 0.11016782333605465, "learning_rate": 1.970451754392459e-05, "loss": 0.4073, "step": 582 }, { "epoch": 0.34041310853222395, "grad_norm": 0.1055014024017826, "learning_rate": 1.970205209251174e-05, "loss": 0.417, "step": 583 }, { "epoch": 0.34099700751769946, "grad_norm": 0.11449110983191173, "learning_rate": 1.9699576553683308e-05, "loss": 0.4653, "step": 584 }, { "epoch": 0.34158090650317496, "grad_norm": 0.12090277360507563, "learning_rate": 1.9697090930013164e-05, "loss": 0.4751, "step": 585 }, { "epoch": 0.34216480548865047, "grad_norm": 0.1123005006235753, "learning_rate": 1.969459522408566e-05, "loss": 0.413, "step": 586 }, { "epoch": 0.342748704474126, "grad_norm": 0.10765554559293461, "learning_rate": 1.9692089438495622e-05, "loss": 0.4095, "step": 587 }, { "epoch": 0.3433326034596015, "grad_norm": 0.1274207568548984, "learning_rate": 1.9689573575848375e-05, "loss": 0.4736, "step": 588 }, { "epoch": 0.343916502445077, "grad_norm": 0.11400872837485329, "learning_rate": 1.9687047638759707e-05, "loss": 0.4422, "step": 589 }, { "epoch": 0.3445004014305525, "grad_norm": 0.09602547311255592, "learning_rate": 1.968451162985589e-05, "loss": 0.4074, "step": 590 }, { "epoch": 0.345084300416028, "grad_norm": 0.11593883622028078, "learning_rate": 1.9681965551773653e-05, "loss": 0.4432, "step": 591 }, { "epoch": 0.34566819940150356, "grad_norm": 0.10224536220580321, "learning_rate": 1.967940940716021e-05, "loss": 0.4299, "step": 592 }, { "epoch": 0.34625209838697907, "grad_norm": 0.11613091322602201, "learning_rate": 1.9676843198673237e-05, "loss": 0.4446, "step": 593 }, { "epoch": 0.3468359973724546, "grad_norm": 0.10312614532667336, "learning_rate": 1.9674266928980863e-05, "loss": 0.4495, "step": 594 }, { "epoch": 0.3474198963579301, "grad_norm": 0.10628008427795624, "learning_rate": 1.9671680600761694e-05, "loss": 0.4093, "step": 595 }, { "epoch": 0.3480037953434056, "grad_norm": 0.10739723029460527, "learning_rate": 1.966908421670479e-05, "loss": 0.4695, "step": 596 }, { "epoch": 0.3485876943288811, "grad_norm": 0.11046922847839659, "learning_rate": 1.9666477779509655e-05, "loss": 0.4323, "step": 597 }, { "epoch": 0.3491715933143566, "grad_norm": 0.10466073474189728, "learning_rate": 1.9663861291886256e-05, "loss": 0.4642, "step": 598 }, { "epoch": 0.3497554922998321, "grad_norm": 0.10036271395453045, "learning_rate": 1.9661234756555006e-05, "loss": 0.4273, "step": 599 }, { "epoch": 0.35033939128530767, "grad_norm": 0.0998913707330275, "learning_rate": 1.965859817624677e-05, "loss": 0.4209, "step": 600 }, { "epoch": 0.3509232902707832, "grad_norm": 0.100122547137597, "learning_rate": 1.9655951553702848e-05, "loss": 0.4498, "step": 601 }, { "epoch": 0.3515071892562587, "grad_norm": 0.11132496667518098, "learning_rate": 1.965329489167499e-05, "loss": 0.4567, "step": 602 }, { "epoch": 0.3520910882417342, "grad_norm": 0.09309062313095293, "learning_rate": 1.9650628192925372e-05, "loss": 0.4383, "step": 603 }, { "epoch": 0.3526749872272097, "grad_norm": 0.09784530754655826, "learning_rate": 1.9647951460226622e-05, "loss": 0.4504, "step": 604 }, { "epoch": 0.3532588862126852, "grad_norm": 0.11262231558786127, "learning_rate": 1.964526469636179e-05, "loss": 0.4357, "step": 605 }, { "epoch": 0.3538427851981607, "grad_norm": 0.10657373945986712, "learning_rate": 1.9642567904124354e-05, "loss": 0.4822, "step": 606 }, { "epoch": 0.3544266841836362, "grad_norm": 0.1060285681296254, "learning_rate": 1.963986108631823e-05, "loss": 0.4938, "step": 607 }, { "epoch": 0.35501058316911177, "grad_norm": 0.11357703632459105, "learning_rate": 1.9637144245757742e-05, "loss": 0.4466, "step": 608 }, { "epoch": 0.3555944821545873, "grad_norm": 0.0988131201930295, "learning_rate": 1.9634417385267643e-05, "loss": 0.416, "step": 609 }, { "epoch": 0.3561783811400628, "grad_norm": 0.10154024956636144, "learning_rate": 1.963168050768311e-05, "loss": 0.4577, "step": 610 }, { "epoch": 0.3567622801255383, "grad_norm": 0.10137856342588691, "learning_rate": 1.9628933615849726e-05, "loss": 0.4704, "step": 611 }, { "epoch": 0.3573461791110138, "grad_norm": 0.11310110463327012, "learning_rate": 1.962617671262349e-05, "loss": 0.4711, "step": 612 }, { "epoch": 0.3579300780964893, "grad_norm": 0.11241283089332317, "learning_rate": 1.9623409800870804e-05, "loss": 0.4452, "step": 613 }, { "epoch": 0.3585139770819648, "grad_norm": 0.1038787466312925, "learning_rate": 1.9620632883468484e-05, "loss": 0.4432, "step": 614 }, { "epoch": 0.3590978760674403, "grad_norm": 0.12076522263509826, "learning_rate": 1.9617845963303744e-05, "loss": 0.4449, "step": 615 }, { "epoch": 0.3596817750529158, "grad_norm": 0.10516582760983299, "learning_rate": 1.9615049043274207e-05, "loss": 0.4477, "step": 616 }, { "epoch": 0.3602656740383914, "grad_norm": 0.10378606941427583, "learning_rate": 1.9612242126287876e-05, "loss": 0.431, "step": 617 }, { "epoch": 0.3608495730238669, "grad_norm": 0.1059945531923125, "learning_rate": 1.960942521526317e-05, "loss": 0.491, "step": 618 }, { "epoch": 0.3614334720093424, "grad_norm": 0.10268522102145074, "learning_rate": 1.9606598313128874e-05, "loss": 0.4096, "step": 619 }, { "epoch": 0.3620173709948179, "grad_norm": 0.10012132069275291, "learning_rate": 1.9603761422824187e-05, "loss": 0.434, "step": 620 }, { "epoch": 0.3626012699802934, "grad_norm": 0.11020485241080363, "learning_rate": 1.9600914547298666e-05, "loss": 0.4477, "step": 621 }, { "epoch": 0.3631851689657689, "grad_norm": 0.11026417466354196, "learning_rate": 1.9598057689512277e-05, "loss": 0.453, "step": 622 }, { "epoch": 0.3637690679512444, "grad_norm": 0.10508252932029731, "learning_rate": 1.9595190852435345e-05, "loss": 0.4354, "step": 623 }, { "epoch": 0.3643529669367199, "grad_norm": 0.09480981796170476, "learning_rate": 1.9592314039048575e-05, "loss": 0.4008, "step": 624 }, { "epoch": 0.3649368659221955, "grad_norm": 0.1009789670492867, "learning_rate": 1.9589427252343054e-05, "loss": 0.4827, "step": 625 }, { "epoch": 0.365520764907671, "grad_norm": 0.11489766651612651, "learning_rate": 1.9586530495320227e-05, "loss": 0.476, "step": 626 }, { "epoch": 0.3661046638931465, "grad_norm": 0.09304454267536716, "learning_rate": 1.958362377099191e-05, "loss": 0.4706, "step": 627 }, { "epoch": 0.366688562878622, "grad_norm": 0.09686768274749832, "learning_rate": 1.958070708238028e-05, "loss": 0.4879, "step": 628 }, { "epoch": 0.3672724618640975, "grad_norm": 0.09805712473164378, "learning_rate": 1.957778043251788e-05, "loss": 0.3966, "step": 629 }, { "epoch": 0.367856360849573, "grad_norm": 0.08747400765420087, "learning_rate": 1.9574843824447602e-05, "loss": 0.4754, "step": 630 }, { "epoch": 0.3684402598350485, "grad_norm": 0.09853352208790386, "learning_rate": 1.9571897261222694e-05, "loss": 0.3982, "step": 631 }, { "epoch": 0.36902415882052403, "grad_norm": 0.10633504358175697, "learning_rate": 1.9568940745906762e-05, "loss": 0.4834, "step": 632 }, { "epoch": 0.36960805780599953, "grad_norm": 0.10926092532839479, "learning_rate": 1.956597428157375e-05, "loss": 0.4729, "step": 633 }, { "epoch": 0.3701919567914751, "grad_norm": 0.09735601236478371, "learning_rate": 1.956299787130795e-05, "loss": 0.4635, "step": 634 }, { "epoch": 0.3707758557769506, "grad_norm": 0.10515315715877156, "learning_rate": 1.9560011518203996e-05, "loss": 0.508, "step": 635 }, { "epoch": 0.3713597547624261, "grad_norm": 0.09621174664101108, "learning_rate": 1.9557015225366855e-05, "loss": 0.4512, "step": 636 }, { "epoch": 0.3719436537479016, "grad_norm": 0.0917649002550007, "learning_rate": 1.9554008995911837e-05, "loss": 0.452, "step": 637 }, { "epoch": 0.3725275527333771, "grad_norm": 0.09558540844769464, "learning_rate": 1.9550992832964575e-05, "loss": 0.429, "step": 638 }, { "epoch": 0.3731114517188526, "grad_norm": 0.08788385365737263, "learning_rate": 1.9547966739661032e-05, "loss": 0.4338, "step": 639 }, { "epoch": 0.37369535070432813, "grad_norm": 0.10085825848226367, "learning_rate": 1.95449307191475e-05, "loss": 0.4458, "step": 640 }, { "epoch": 0.37427924968980364, "grad_norm": 0.0947207564948106, "learning_rate": 1.9541884774580588e-05, "loss": 0.4366, "step": 641 }, { "epoch": 0.3748631486752792, "grad_norm": 0.09586750822023606, "learning_rate": 1.953882890912723e-05, "loss": 0.3881, "step": 642 }, { "epoch": 0.3754470476607547, "grad_norm": 0.0911297753739927, "learning_rate": 1.953576312596466e-05, "loss": 0.4706, "step": 643 }, { "epoch": 0.3760309466462302, "grad_norm": 0.09885120714333712, "learning_rate": 1.9532687428280442e-05, "loss": 0.4181, "step": 644 }, { "epoch": 0.3766148456317057, "grad_norm": 0.0946418615460806, "learning_rate": 1.952960181927243e-05, "loss": 0.4382, "step": 645 }, { "epoch": 0.3771987446171812, "grad_norm": 0.094545307040026, "learning_rate": 1.9526506302148805e-05, "loss": 0.3744, "step": 646 }, { "epoch": 0.37778264360265673, "grad_norm": 0.0989871938086869, "learning_rate": 1.9523400880128032e-05, "loss": 0.4481, "step": 647 }, { "epoch": 0.37836654258813224, "grad_norm": 0.1054758626179792, "learning_rate": 1.952028555643888e-05, "loss": 0.452, "step": 648 }, { "epoch": 0.37895044157360774, "grad_norm": 0.10459425560299217, "learning_rate": 1.9517160334320405e-05, "loss": 0.4534, "step": 649 }, { "epoch": 0.3795343405590833, "grad_norm": 0.10058325081626053, "learning_rate": 1.9514025217021976e-05, "loss": 0.4582, "step": 650 }, { "epoch": 0.3801182395445588, "grad_norm": 0.09791056703589218, "learning_rate": 1.951088020780323e-05, "loss": 0.4556, "step": 651 }, { "epoch": 0.3807021385300343, "grad_norm": 0.10128621355644006, "learning_rate": 1.950772530993409e-05, "loss": 0.4333, "step": 652 }, { "epoch": 0.3812860375155098, "grad_norm": 0.09796039016096297, "learning_rate": 1.9504560526694773e-05, "loss": 0.4434, "step": 653 }, { "epoch": 0.38186993650098533, "grad_norm": 0.0914915084367736, "learning_rate": 1.9501385861375765e-05, "loss": 0.473, "step": 654 }, { "epoch": 0.38245383548646084, "grad_norm": 0.09515488483489858, "learning_rate": 1.949820131727783e-05, "loss": 0.4344, "step": 655 }, { "epoch": 0.38303773447193634, "grad_norm": 0.09624219812787571, "learning_rate": 1.9495006897711994e-05, "loss": 0.4192, "step": 656 }, { "epoch": 0.38362163345741185, "grad_norm": 0.10099508773576288, "learning_rate": 1.949180260599957e-05, "loss": 0.4666, "step": 657 }, { "epoch": 0.38420553244288735, "grad_norm": 0.0944622696122737, "learning_rate": 1.9488588445472115e-05, "loss": 0.4469, "step": 658 }, { "epoch": 0.3847894314283629, "grad_norm": 0.09710387508903776, "learning_rate": 1.9485364419471454e-05, "loss": 0.4139, "step": 659 }, { "epoch": 0.3853733304138384, "grad_norm": 0.10690352628125581, "learning_rate": 1.948213053134968e-05, "loss": 0.4693, "step": 660 }, { "epoch": 0.38595722939931393, "grad_norm": 0.09783063271790708, "learning_rate": 1.9478886784469124e-05, "loss": 0.4816, "step": 661 }, { "epoch": 0.38654112838478943, "grad_norm": 0.1042555864418764, "learning_rate": 1.947563318220237e-05, "loss": 0.44, "step": 662 }, { "epoch": 0.38712502737026494, "grad_norm": 0.105237777233771, "learning_rate": 1.9472369727932263e-05, "loss": 0.4546, "step": 663 }, { "epoch": 0.38770892635574045, "grad_norm": 0.08795275326207314, "learning_rate": 1.9469096425051872e-05, "loss": 0.4113, "step": 664 }, { "epoch": 0.38829282534121595, "grad_norm": 0.09498398800004343, "learning_rate": 1.946581327696452e-05, "loss": 0.4263, "step": 665 }, { "epoch": 0.38887672432669146, "grad_norm": 0.09688426758921763, "learning_rate": 1.9462520287083755e-05, "loss": 0.4818, "step": 666 }, { "epoch": 0.389460623312167, "grad_norm": 0.09445164631358792, "learning_rate": 1.945921745883337e-05, "loss": 0.4489, "step": 667 }, { "epoch": 0.3900445222976425, "grad_norm": 0.09827634629606337, "learning_rate": 1.945590479564738e-05, "loss": 0.4584, "step": 668 }, { "epoch": 0.39062842128311803, "grad_norm": 0.09752213183930311, "learning_rate": 1.9452582300970025e-05, "loss": 0.4957, "step": 669 }, { "epoch": 0.39121232026859354, "grad_norm": 0.09549573143907904, "learning_rate": 1.944924997825577e-05, "loss": 0.4767, "step": 670 }, { "epoch": 0.39179621925406904, "grad_norm": 0.10253036693577464, "learning_rate": 1.944590783096929e-05, "loss": 0.4191, "step": 671 }, { "epoch": 0.39238011823954455, "grad_norm": 0.09520228028606144, "learning_rate": 1.944255586258549e-05, "loss": 0.4317, "step": 672 }, { "epoch": 0.39296401722502006, "grad_norm": 0.09344216896955186, "learning_rate": 1.9439194076589477e-05, "loss": 0.4321, "step": 673 }, { "epoch": 0.39354791621049556, "grad_norm": 0.0958816974134834, "learning_rate": 1.9435822476476566e-05, "loss": 0.4392, "step": 674 }, { "epoch": 0.3941318151959711, "grad_norm": 0.09826252237597442, "learning_rate": 1.943244106575227e-05, "loss": 0.4268, "step": 675 }, { "epoch": 0.39471571418144663, "grad_norm": 0.09684718836320864, "learning_rate": 1.9429049847932317e-05, "loss": 0.4518, "step": 676 }, { "epoch": 0.39529961316692214, "grad_norm": 0.09234941217158513, "learning_rate": 1.9425648826542618e-05, "loss": 0.445, "step": 677 }, { "epoch": 0.39588351215239764, "grad_norm": 0.0928373778715394, "learning_rate": 1.9422238005119287e-05, "loss": 0.4087, "step": 678 }, { "epoch": 0.39646741113787315, "grad_norm": 0.09381692132331554, "learning_rate": 1.9418817387208614e-05, "loss": 0.4478, "step": 679 }, { "epoch": 0.39705131012334866, "grad_norm": 0.09519746749143314, "learning_rate": 1.9415386976367095e-05, "loss": 0.4326, "step": 680 }, { "epoch": 0.39763520910882416, "grad_norm": 0.10349000854901712, "learning_rate": 1.9411946776161388e-05, "loss": 0.4937, "step": 681 }, { "epoch": 0.39821910809429967, "grad_norm": 0.10175955870991861, "learning_rate": 1.9408496790168337e-05, "loss": 0.448, "step": 682 }, { "epoch": 0.3988030070797752, "grad_norm": 0.08944167415558167, "learning_rate": 1.9405037021974965e-05, "loss": 0.4324, "step": 683 }, { "epoch": 0.39938690606525074, "grad_norm": 0.09404737157897304, "learning_rate": 1.9401567475178457e-05, "loss": 0.4705, "step": 684 }, { "epoch": 0.39997080505072624, "grad_norm": 0.0970999933102515, "learning_rate": 1.9398088153386175e-05, "loss": 0.4418, "step": 685 }, { "epoch": 0.40055470403620175, "grad_norm": 0.09601346062380452, "learning_rate": 1.939459906021563e-05, "loss": 0.4397, "step": 686 }, { "epoch": 0.40113860302167725, "grad_norm": 0.09368416861180735, "learning_rate": 1.939110019929451e-05, "loss": 0.4285, "step": 687 }, { "epoch": 0.40172250200715276, "grad_norm": 0.09654021102661041, "learning_rate": 1.938759157426065e-05, "loss": 0.4688, "step": 688 }, { "epoch": 0.40230640099262827, "grad_norm": 0.09901828299846495, "learning_rate": 1.9384073188762027e-05, "loss": 0.4472, "step": 689 }, { "epoch": 0.40289029997810377, "grad_norm": 0.10552492702609927, "learning_rate": 1.9380545046456787e-05, "loss": 0.4741, "step": 690 }, { "epoch": 0.4034741989635793, "grad_norm": 0.09783494823203975, "learning_rate": 1.9377007151013205e-05, "loss": 0.4477, "step": 691 }, { "epoch": 0.40405809794905484, "grad_norm": 0.10057284110598397, "learning_rate": 1.93734595061097e-05, "loss": 0.434, "step": 692 }, { "epoch": 0.40464199693453035, "grad_norm": 0.0984320627486198, "learning_rate": 1.9369902115434827e-05, "loss": 0.4934, "step": 693 }, { "epoch": 0.40522589592000585, "grad_norm": 0.09742799052519063, "learning_rate": 1.936633498268728e-05, "loss": 0.4929, "step": 694 }, { "epoch": 0.40580979490548136, "grad_norm": 0.08501084769539646, "learning_rate": 1.9362758111575878e-05, "loss": 0.4144, "step": 695 }, { "epoch": 0.40639369389095686, "grad_norm": 0.11128274286027096, "learning_rate": 1.9359171505819558e-05, "loss": 0.4962, "step": 696 }, { "epoch": 0.40697759287643237, "grad_norm": 0.09252595409843999, "learning_rate": 1.935557516914739e-05, "loss": 0.4632, "step": 697 }, { "epoch": 0.4075614918619079, "grad_norm": 0.10008917672526214, "learning_rate": 1.9351969105298558e-05, "loss": 0.4136, "step": 698 }, { "epoch": 0.4081453908473834, "grad_norm": 0.10406812361745382, "learning_rate": 1.9348353318022353e-05, "loss": 0.4531, "step": 699 }, { "epoch": 0.4087292898328589, "grad_norm": 0.09582162330524467, "learning_rate": 1.9344727811078183e-05, "loss": 0.4643, "step": 700 }, { "epoch": 0.40931318881833445, "grad_norm": 0.09181189846722476, "learning_rate": 1.934109258823556e-05, "loss": 0.4588, "step": 701 }, { "epoch": 0.40989708780380996, "grad_norm": 0.08789804649299185, "learning_rate": 1.9337447653274097e-05, "loss": 0.4008, "step": 702 }, { "epoch": 0.41048098678928546, "grad_norm": 0.09114396444734435, "learning_rate": 1.9333793009983505e-05, "loss": 0.439, "step": 703 }, { "epoch": 0.41106488577476097, "grad_norm": 0.0927505197545379, "learning_rate": 1.9330128662163588e-05, "loss": 0.4538, "step": 704 }, { "epoch": 0.4116487847602365, "grad_norm": 0.10134316819273813, "learning_rate": 1.9326454613624243e-05, "loss": 0.416, "step": 705 }, { "epoch": 0.412232683745712, "grad_norm": 0.10160668182328521, "learning_rate": 1.932277086818545e-05, "loss": 0.4663, "step": 706 }, { "epoch": 0.4128165827311875, "grad_norm": 0.09539847277398356, "learning_rate": 1.931907742967727e-05, "loss": 0.4127, "step": 707 }, { "epoch": 0.413400481716663, "grad_norm": 0.08848994538216584, "learning_rate": 1.9315374301939843e-05, "loss": 0.4245, "step": 708 }, { "epoch": 0.41398438070213855, "grad_norm": 0.15719852626677044, "learning_rate": 1.9311661488823388e-05, "loss": 0.4666, "step": 709 }, { "epoch": 0.41456827968761406, "grad_norm": 0.09737010928214909, "learning_rate": 1.930793899418819e-05, "loss": 0.4404, "step": 710 }, { "epoch": 0.41515217867308957, "grad_norm": 0.09344852245502214, "learning_rate": 1.93042068219046e-05, "loss": 0.4231, "step": 711 }, { "epoch": 0.4157360776585651, "grad_norm": 0.09712649550653116, "learning_rate": 1.9300464975853032e-05, "loss": 0.4363, "step": 712 }, { "epoch": 0.4163199766440406, "grad_norm": 0.09848903203911874, "learning_rate": 1.9296713459923955e-05, "loss": 0.4222, "step": 713 }, { "epoch": 0.4169038756295161, "grad_norm": 0.0971679882845561, "learning_rate": 1.9292952278017892e-05, "loss": 0.4783, "step": 714 }, { "epoch": 0.4174877746149916, "grad_norm": 0.09858799016444908, "learning_rate": 1.9289181434045428e-05, "loss": 0.4644, "step": 715 }, { "epoch": 0.4180716736004671, "grad_norm": 0.0960283832629449, "learning_rate": 1.9285400931927177e-05, "loss": 0.4142, "step": 716 }, { "epoch": 0.41865557258594266, "grad_norm": 0.10805553286587623, "learning_rate": 1.92816107755938e-05, "loss": 0.4159, "step": 717 }, { "epoch": 0.41923947157141817, "grad_norm": 0.09685093423851593, "learning_rate": 1.9277810968986004e-05, "loss": 0.4666, "step": 718 }, { "epoch": 0.41982337055689367, "grad_norm": 0.08597115356055576, "learning_rate": 1.9274001516054513e-05, "loss": 0.3926, "step": 719 }, { "epoch": 0.4204072695423692, "grad_norm": 0.09616893912783621, "learning_rate": 1.9270182420760104e-05, "loss": 0.4424, "step": 720 }, { "epoch": 0.4209911685278447, "grad_norm": 0.10004473319799759, "learning_rate": 1.9266353687073557e-05, "loss": 0.4564, "step": 721 }, { "epoch": 0.4215750675133202, "grad_norm": 0.10106912475616883, "learning_rate": 1.9262515318975686e-05, "loss": 0.4558, "step": 722 }, { "epoch": 0.4221589664987957, "grad_norm": 0.10555898854521069, "learning_rate": 1.9258667320457313e-05, "loss": 0.4701, "step": 723 }, { "epoch": 0.4227428654842712, "grad_norm": 0.1257087364676739, "learning_rate": 1.9254809695519284e-05, "loss": 0.434, "step": 724 }, { "epoch": 0.4233267644697467, "grad_norm": 0.10417276074207545, "learning_rate": 1.9250942448172444e-05, "loss": 0.4942, "step": 725 }, { "epoch": 0.42391066345522227, "grad_norm": 0.095346841215881, "learning_rate": 1.924706558243765e-05, "loss": 0.4394, "step": 726 }, { "epoch": 0.4244945624406978, "grad_norm": 0.10771963139028508, "learning_rate": 1.9243179102345753e-05, "loss": 0.5136, "step": 727 }, { "epoch": 0.4250784614261733, "grad_norm": 0.10144480687678246, "learning_rate": 1.923928301193761e-05, "loss": 0.4483, "step": 728 }, { "epoch": 0.4256623604116488, "grad_norm": 0.09256400639448982, "learning_rate": 1.923537731526405e-05, "loss": 0.4237, "step": 729 }, { "epoch": 0.4262462593971243, "grad_norm": 0.09515630614164573, "learning_rate": 1.9231462016385917e-05, "loss": 0.4652, "step": 730 }, { "epoch": 0.4268301583825998, "grad_norm": 0.09650171132375637, "learning_rate": 1.9227537119374017e-05, "loss": 0.4186, "step": 731 }, { "epoch": 0.4274140573680753, "grad_norm": 0.10071936229438098, "learning_rate": 1.9223602628309144e-05, "loss": 0.4821, "step": 732 }, { "epoch": 0.4279979563535508, "grad_norm": 0.09890049447587904, "learning_rate": 1.921965854728207e-05, "loss": 0.4279, "step": 733 }, { "epoch": 0.4285818553390264, "grad_norm": 0.09148544940345006, "learning_rate": 1.9215704880393527e-05, "loss": 0.4497, "step": 734 }, { "epoch": 0.4291657543245019, "grad_norm": 0.10167469822692538, "learning_rate": 1.9211741631754228e-05, "loss": 0.4373, "step": 735 }, { "epoch": 0.4297496533099774, "grad_norm": 0.10526003259547594, "learning_rate": 1.9207768805484838e-05, "loss": 0.4542, "step": 736 }, { "epoch": 0.4303335522954529, "grad_norm": 0.09796787834399802, "learning_rate": 1.9203786405715984e-05, "loss": 0.4126, "step": 737 }, { "epoch": 0.4309174512809284, "grad_norm": 0.09827093781727202, "learning_rate": 1.9199794436588244e-05, "loss": 0.4289, "step": 738 }, { "epoch": 0.4315013502664039, "grad_norm": 0.09382891536854061, "learning_rate": 1.9195792902252148e-05, "loss": 0.4876, "step": 739 }, { "epoch": 0.4320852492518794, "grad_norm": 0.09668355571094721, "learning_rate": 1.9191781806868172e-05, "loss": 0.4463, "step": 740 }, { "epoch": 0.4326691482373549, "grad_norm": 0.10718359511037723, "learning_rate": 1.918776115460673e-05, "loss": 0.5046, "step": 741 }, { "epoch": 0.4332530472228304, "grad_norm": 0.10026499620375334, "learning_rate": 1.9183730949648173e-05, "loss": 0.4781, "step": 742 }, { "epoch": 0.433836946208306, "grad_norm": 0.10454186241932836, "learning_rate": 1.9179691196182782e-05, "loss": 0.4268, "step": 743 }, { "epoch": 0.4344208451937815, "grad_norm": 0.09983073121435526, "learning_rate": 1.917564189841078e-05, "loss": 0.4064, "step": 744 }, { "epoch": 0.435004744179257, "grad_norm": 0.09649949343756971, "learning_rate": 1.9171583060542288e-05, "loss": 0.4211, "step": 745 }, { "epoch": 0.4355886431647325, "grad_norm": 0.10306899667192736, "learning_rate": 1.916751468679737e-05, "loss": 0.4006, "step": 746 }, { "epoch": 0.436172542150208, "grad_norm": 0.09706885938119586, "learning_rate": 1.9163436781405992e-05, "loss": 0.4477, "step": 747 }, { "epoch": 0.4367564411356835, "grad_norm": 0.08881349665847803, "learning_rate": 1.915934934860803e-05, "loss": 0.3992, "step": 748 }, { "epoch": 0.437340340121159, "grad_norm": 0.09400753085878928, "learning_rate": 1.915525239265327e-05, "loss": 0.4173, "step": 749 }, { "epoch": 0.4379242391066345, "grad_norm": 0.0948327526084171, "learning_rate": 1.91511459178014e-05, "loss": 0.4133, "step": 750 }, { "epoch": 0.4385081380921101, "grad_norm": 0.11030981969217149, "learning_rate": 1.9147029928322002e-05, "loss": 0.4732, "step": 751 }, { "epoch": 0.4390920370775856, "grad_norm": 0.10097443420611413, "learning_rate": 1.9142904428494554e-05, "loss": 0.4325, "step": 752 }, { "epoch": 0.4396759360630611, "grad_norm": 0.09077515733476503, "learning_rate": 1.9138769422608413e-05, "loss": 0.3989, "step": 753 }, { "epoch": 0.4402598350485366, "grad_norm": 0.10433674098174116, "learning_rate": 1.9134624914962835e-05, "loss": 0.442, "step": 754 }, { "epoch": 0.4408437340340121, "grad_norm": 0.09889257153914731, "learning_rate": 1.9130470909866943e-05, "loss": 0.4375, "step": 755 }, { "epoch": 0.4414276330194876, "grad_norm": 0.1049386264249021, "learning_rate": 1.9126307411639736e-05, "loss": 0.4706, "step": 756 }, { "epoch": 0.4420115320049631, "grad_norm": 0.09743938379112574, "learning_rate": 1.912213442461009e-05, "loss": 0.4412, "step": 757 }, { "epoch": 0.44259543099043863, "grad_norm": 0.0991504945857414, "learning_rate": 1.9117951953116737e-05, "loss": 0.5283, "step": 758 }, { "epoch": 0.4431793299759142, "grad_norm": 0.09496596254396149, "learning_rate": 1.911376000150828e-05, "loss": 0.5005, "step": 759 }, { "epoch": 0.4437632289613897, "grad_norm": 0.09308412859519398, "learning_rate": 1.9109558574143173e-05, "loss": 0.4493, "step": 760 }, { "epoch": 0.4443471279468652, "grad_norm": 0.10897402980237238, "learning_rate": 1.9105347675389723e-05, "loss": 0.4813, "step": 761 }, { "epoch": 0.4449310269323407, "grad_norm": 0.09965459962038398, "learning_rate": 1.9101127309626083e-05, "loss": 0.4572, "step": 762 }, { "epoch": 0.4455149259178162, "grad_norm": 0.0944613435683884, "learning_rate": 1.909689748124025e-05, "loss": 0.4288, "step": 763 }, { "epoch": 0.4460988249032917, "grad_norm": 0.10313962368320903, "learning_rate": 1.9092658194630065e-05, "loss": 0.4474, "step": 764 }, { "epoch": 0.44668272388876723, "grad_norm": 0.10303830399102372, "learning_rate": 1.9088409454203196e-05, "loss": 0.4564, "step": 765 }, { "epoch": 0.44726662287424274, "grad_norm": 0.09383652708338931, "learning_rate": 1.908415126437714e-05, "loss": 0.4668, "step": 766 }, { "epoch": 0.44785052185971824, "grad_norm": 0.109808760334621, "learning_rate": 1.9079883629579224e-05, "loss": 0.4487, "step": 767 }, { "epoch": 0.4484344208451938, "grad_norm": 0.09714597099462728, "learning_rate": 1.9075606554246594e-05, "loss": 0.4815, "step": 768 }, { "epoch": 0.4490183198306693, "grad_norm": 0.08863546709482412, "learning_rate": 1.9071320042826206e-05, "loss": 0.4282, "step": 769 }, { "epoch": 0.4496022188161448, "grad_norm": 0.09882640468872426, "learning_rate": 1.9067024099774828e-05, "loss": 0.4127, "step": 770 }, { "epoch": 0.4501861178016203, "grad_norm": 0.09695440226126845, "learning_rate": 1.9062718729559048e-05, "loss": 0.4425, "step": 771 }, { "epoch": 0.45077001678709583, "grad_norm": 0.09383982570058645, "learning_rate": 1.9058403936655235e-05, "loss": 0.4525, "step": 772 }, { "epoch": 0.45135391577257133, "grad_norm": 0.09919821163955667, "learning_rate": 1.9054079725549565e-05, "loss": 0.4548, "step": 773 }, { "epoch": 0.45193781475804684, "grad_norm": 0.09642039832475975, "learning_rate": 1.9049746100738012e-05, "loss": 0.4463, "step": 774 }, { "epoch": 0.45252171374352235, "grad_norm": 0.10051729459848241, "learning_rate": 1.9045403066726325e-05, "loss": 0.5022, "step": 775 }, { "epoch": 0.4531056127289979, "grad_norm": 0.10169865206232559, "learning_rate": 1.904105062803005e-05, "loss": 0.4674, "step": 776 }, { "epoch": 0.4536895117144734, "grad_norm": 0.11215978874526727, "learning_rate": 1.9036688789174496e-05, "loss": 0.4639, "step": 777 }, { "epoch": 0.4542734106999489, "grad_norm": 0.10662050310533276, "learning_rate": 1.9032317554694756e-05, "loss": 0.4918, "step": 778 }, { "epoch": 0.4548573096854244, "grad_norm": 0.0982628495157606, "learning_rate": 1.9027936929135688e-05, "loss": 0.4772, "step": 779 }, { "epoch": 0.45544120867089993, "grad_norm": 0.09041746404652945, "learning_rate": 1.9023546917051917e-05, "loss": 0.4169, "step": 780 }, { "epoch": 0.45602510765637544, "grad_norm": 0.10021263211743202, "learning_rate": 1.901914752300783e-05, "loss": 0.4605, "step": 781 }, { "epoch": 0.45660900664185095, "grad_norm": 0.09906741586862165, "learning_rate": 1.9014738751577552e-05, "loss": 0.4001, "step": 782 }, { "epoch": 0.45719290562732645, "grad_norm": 0.10076826440003843, "learning_rate": 1.901032060734498e-05, "loss": 0.4199, "step": 783 }, { "epoch": 0.457776804612802, "grad_norm": 0.09471882959077427, "learning_rate": 1.900589309490374e-05, "loss": 0.4116, "step": 784 }, { "epoch": 0.4583607035982775, "grad_norm": 0.10164899413794538, "learning_rate": 1.9001456218857207e-05, "loss": 0.455, "step": 785 }, { "epoch": 0.458944602583753, "grad_norm": 0.11083635845318877, "learning_rate": 1.899700998381849e-05, "loss": 0.4601, "step": 786 }, { "epoch": 0.45952850156922853, "grad_norm": 0.10346733534599752, "learning_rate": 1.899255439441043e-05, "loss": 0.4891, "step": 787 }, { "epoch": 0.46011240055470404, "grad_norm": 0.1079347036108006, "learning_rate": 1.8988089455265585e-05, "loss": 0.4335, "step": 788 }, { "epoch": 0.46069629954017954, "grad_norm": 0.09304750213537169, "learning_rate": 1.898361517102624e-05, "loss": 0.4127, "step": 789 }, { "epoch": 0.46128019852565505, "grad_norm": 0.09771078858602578, "learning_rate": 1.8979131546344404e-05, "loss": 0.4621, "step": 790 }, { "epoch": 0.46186409751113056, "grad_norm": 0.11098075221812748, "learning_rate": 1.8974638585881787e-05, "loss": 0.4369, "step": 791 }, { "epoch": 0.46244799649660606, "grad_norm": 0.09426293559104408, "learning_rate": 1.8970136294309805e-05, "loss": 0.4258, "step": 792 }, { "epoch": 0.4630318954820816, "grad_norm": 0.10178243358975335, "learning_rate": 1.896562467630959e-05, "loss": 0.4643, "step": 793 }, { "epoch": 0.46361579446755713, "grad_norm": 0.09525122529361235, "learning_rate": 1.896110373657195e-05, "loss": 0.4192, "step": 794 }, { "epoch": 0.46419969345303264, "grad_norm": 0.09505441427134616, "learning_rate": 1.89565734797974e-05, "loss": 0.4368, "step": 795 }, { "epoch": 0.46478359243850814, "grad_norm": 0.09854203855506372, "learning_rate": 1.895203391069613e-05, "loss": 0.4916, "step": 796 }, { "epoch": 0.46536749142398365, "grad_norm": 0.10613761531689701, "learning_rate": 1.8947485033988034e-05, "loss": 0.5129, "step": 797 }, { "epoch": 0.46595139040945915, "grad_norm": 0.10079963916460617, "learning_rate": 1.894292685440266e-05, "loss": 0.476, "step": 798 }, { "epoch": 0.46653528939493466, "grad_norm": 0.09428711420430137, "learning_rate": 1.893835937667924e-05, "loss": 0.3994, "step": 799 }, { "epoch": 0.46711918838041017, "grad_norm": 0.10120736852703464, "learning_rate": 1.8933782605566672e-05, "loss": 0.455, "step": 800 }, { "epoch": 0.46770308736588573, "grad_norm": 0.10857525197262392, "learning_rate": 1.8929196545823512e-05, "loss": 0.4722, "step": 801 }, { "epoch": 0.46828698635136123, "grad_norm": 0.09794039904736455, "learning_rate": 1.8924601202217977e-05, "loss": 0.4124, "step": 802 }, { "epoch": 0.46887088533683674, "grad_norm": 0.10254403168379388, "learning_rate": 1.8919996579527943e-05, "loss": 0.4466, "step": 803 }, { "epoch": 0.46945478432231225, "grad_norm": 0.10446295711342768, "learning_rate": 1.891538268254092e-05, "loss": 0.4714, "step": 804 }, { "epoch": 0.47003868330778775, "grad_norm": 0.10008885397758875, "learning_rate": 1.8910759516054074e-05, "loss": 0.42, "step": 805 }, { "epoch": 0.47062258229326326, "grad_norm": 0.09493770279576011, "learning_rate": 1.8906127084874198e-05, "loss": 0.4076, "step": 806 }, { "epoch": 0.47120648127873876, "grad_norm": 0.09670908761154806, "learning_rate": 1.8901485393817724e-05, "loss": 0.3968, "step": 807 }, { "epoch": 0.47179038026421427, "grad_norm": 0.1002928815508898, "learning_rate": 1.889683444771071e-05, "loss": 0.4387, "step": 808 }, { "epoch": 0.4723742792496898, "grad_norm": 0.10746436993064183, "learning_rate": 1.889217425138884e-05, "loss": 0.4539, "step": 809 }, { "epoch": 0.47295817823516534, "grad_norm": 0.09417578259073582, "learning_rate": 1.8887504809697405e-05, "loss": 0.408, "step": 810 }, { "epoch": 0.47354207722064084, "grad_norm": 0.09898636901859856, "learning_rate": 1.888282612749132e-05, "loss": 0.4521, "step": 811 }, { "epoch": 0.47412597620611635, "grad_norm": 0.09671305383218667, "learning_rate": 1.8878138209635107e-05, "loss": 0.4011, "step": 812 }, { "epoch": 0.47470987519159186, "grad_norm": 0.09241044348616835, "learning_rate": 1.887344106100288e-05, "loss": 0.4443, "step": 813 }, { "epoch": 0.47529377417706736, "grad_norm": 0.09893919201120428, "learning_rate": 1.886873468647836e-05, "loss": 0.389, "step": 814 }, { "epoch": 0.47587767316254287, "grad_norm": 0.09496842310432957, "learning_rate": 1.8864019090954865e-05, "loss": 0.43, "step": 815 }, { "epoch": 0.4764615721480184, "grad_norm": 0.09656714888870092, "learning_rate": 1.8859294279335285e-05, "loss": 0.4197, "step": 816 }, { "epoch": 0.4770454711334939, "grad_norm": 0.09752152074857816, "learning_rate": 1.8854560256532098e-05, "loss": 0.4314, "step": 817 }, { "epoch": 0.47762937011896944, "grad_norm": 0.0965562413366046, "learning_rate": 1.884981702746737e-05, "loss": 0.4777, "step": 818 }, { "epoch": 0.47821326910444495, "grad_norm": 0.10282379241494537, "learning_rate": 1.8845064597072723e-05, "loss": 0.4972, "step": 819 }, { "epoch": 0.47879716808992046, "grad_norm": 0.08740749974456374, "learning_rate": 1.884030297028936e-05, "loss": 0.431, "step": 820 }, { "epoch": 0.47938106707539596, "grad_norm": 0.09475306928911946, "learning_rate": 1.8835532152068025e-05, "loss": 0.4236, "step": 821 }, { "epoch": 0.47996496606087147, "grad_norm": 0.09193858537368736, "learning_rate": 1.8830752147369047e-05, "loss": 0.4514, "step": 822 }, { "epoch": 0.480548865046347, "grad_norm": 0.09046147330873569, "learning_rate": 1.8825962961162284e-05, "loss": 0.4168, "step": 823 }, { "epoch": 0.4811327640318225, "grad_norm": 0.10145896121403378, "learning_rate": 1.8821164598427148e-05, "loss": 0.4303, "step": 824 }, { "epoch": 0.481716663017298, "grad_norm": 0.09317621990940313, "learning_rate": 1.8816357064152596e-05, "loss": 0.4444, "step": 825 }, { "epoch": 0.48230056200277355, "grad_norm": 0.09281820721780232, "learning_rate": 1.8811540363337107e-05, "loss": 0.4338, "step": 826 }, { "epoch": 0.48288446098824905, "grad_norm": 0.09553978285692674, "learning_rate": 1.880671450098871e-05, "loss": 0.4747, "step": 827 }, { "epoch": 0.48346835997372456, "grad_norm": 0.09370014014064389, "learning_rate": 1.880187948212495e-05, "loss": 0.4613, "step": 828 }, { "epoch": 0.48405225895920007, "grad_norm": 0.09099255825637885, "learning_rate": 1.8797035311772884e-05, "loss": 0.4279, "step": 829 }, { "epoch": 0.48463615794467557, "grad_norm": 0.09630616195195872, "learning_rate": 1.8792181994969095e-05, "loss": 0.5126, "step": 830 }, { "epoch": 0.4852200569301511, "grad_norm": 0.09194563433137208, "learning_rate": 1.8787319536759677e-05, "loss": 0.401, "step": 831 }, { "epoch": 0.4858039559156266, "grad_norm": 0.10096670716487574, "learning_rate": 1.878244794220022e-05, "loss": 0.473, "step": 832 }, { "epoch": 0.4863878549011021, "grad_norm": 0.09421278445632059, "learning_rate": 1.8777567216355814e-05, "loss": 0.4547, "step": 833 }, { "epoch": 0.4869717538865776, "grad_norm": 0.09261998597597933, "learning_rate": 1.8772677364301052e-05, "loss": 0.4256, "step": 834 }, { "epoch": 0.48755565287205316, "grad_norm": 0.09108488235178394, "learning_rate": 1.8767778391120008e-05, "loss": 0.4694, "step": 835 }, { "epoch": 0.48813955185752866, "grad_norm": 0.0912058129726249, "learning_rate": 1.876287030190624e-05, "loss": 0.4296, "step": 836 }, { "epoch": 0.48872345084300417, "grad_norm": 0.09323248076153715, "learning_rate": 1.8757953101762786e-05, "loss": 0.4735, "step": 837 }, { "epoch": 0.4893073498284797, "grad_norm": 0.08979730330544422, "learning_rate": 1.8753026795802158e-05, "loss": 0.4114, "step": 838 }, { "epoch": 0.4898912488139552, "grad_norm": 0.08994057781091057, "learning_rate": 1.8748091389146336e-05, "loss": 0.4742, "step": 839 }, { "epoch": 0.4904751477994307, "grad_norm": 0.09530343833937939, "learning_rate": 1.8743146886926755e-05, "loss": 0.4711, "step": 840 }, { "epoch": 0.4910590467849062, "grad_norm": 0.08889354488038634, "learning_rate": 1.8738193294284312e-05, "loss": 0.4509, "step": 841 }, { "epoch": 0.4916429457703817, "grad_norm": 0.09745933607809189, "learning_rate": 1.873323061636936e-05, "loss": 0.4498, "step": 842 }, { "epoch": 0.49222684475585726, "grad_norm": 0.09802252433334488, "learning_rate": 1.8728258858341684e-05, "loss": 0.4312, "step": 843 }, { "epoch": 0.49281074374133277, "grad_norm": 0.0943384176273851, "learning_rate": 1.872327802537053e-05, "loss": 0.4265, "step": 844 }, { "epoch": 0.4933946427268083, "grad_norm": 0.10703146977390589, "learning_rate": 1.8718288122634566e-05, "loss": 0.3766, "step": 845 }, { "epoch": 0.4939785417122838, "grad_norm": 0.10070899957687103, "learning_rate": 1.8713289155321888e-05, "loss": 0.4484, "step": 846 }, { "epoch": 0.4945624406977593, "grad_norm": 0.09345506971490411, "learning_rate": 1.8708281128630023e-05, "loss": 0.405, "step": 847 }, { "epoch": 0.4951463396832348, "grad_norm": 0.1108568870271914, "learning_rate": 1.870326404776592e-05, "loss": 0.4816, "step": 848 }, { "epoch": 0.4957302386687103, "grad_norm": 0.1009433073067622, "learning_rate": 1.869823791794593e-05, "loss": 0.4415, "step": 849 }, { "epoch": 0.4963141376541858, "grad_norm": 0.10394890950601937, "learning_rate": 1.869320274439583e-05, "loss": 0.4236, "step": 850 }, { "epoch": 0.4968980366396613, "grad_norm": 0.09479537531402285, "learning_rate": 1.8688158532350775e-05, "loss": 0.4625, "step": 851 }, { "epoch": 0.4974819356251369, "grad_norm": 0.09772476177454122, "learning_rate": 1.8683105287055344e-05, "loss": 0.4773, "step": 852 }, { "epoch": 0.4980658346106124, "grad_norm": 0.09866256708660082, "learning_rate": 1.8678043013763493e-05, "loss": 0.4873, "step": 853 }, { "epoch": 0.4986497335960879, "grad_norm": 0.09641969857242684, "learning_rate": 1.8672971717738565e-05, "loss": 0.4296, "step": 854 }, { "epoch": 0.4992336325815634, "grad_norm": 0.0960120986107987, "learning_rate": 1.866789140425329e-05, "loss": 0.4381, "step": 855 }, { "epoch": 0.4998175315670389, "grad_norm": 0.09809402712699024, "learning_rate": 1.866280207858977e-05, "loss": 0.4748, "step": 856 }, { "epoch": 0.5004014305525144, "grad_norm": 0.08669627162690524, "learning_rate": 1.865770374603948e-05, "loss": 0.4392, "step": 857 }, { "epoch": 0.5009853295379899, "grad_norm": 0.10091926679649366, "learning_rate": 1.865259641190325e-05, "loss": 0.4283, "step": 858 }, { "epoch": 0.5015692285234654, "grad_norm": 0.09506949761258994, "learning_rate": 1.864748008149128e-05, "loss": 0.4457, "step": 859 }, { "epoch": 0.5021531275089409, "grad_norm": 0.09723863144953826, "learning_rate": 1.8642354760123122e-05, "loss": 0.4131, "step": 860 }, { "epoch": 0.5027370264944164, "grad_norm": 0.0956437777816903, "learning_rate": 1.8637220453127675e-05, "loss": 0.3967, "step": 861 }, { "epoch": 0.5033209254798919, "grad_norm": 0.09689492672983661, "learning_rate": 1.8632077165843174e-05, "loss": 0.4351, "step": 862 }, { "epoch": 0.5039048244653674, "grad_norm": 0.09553460542083508, "learning_rate": 1.86269249036172e-05, "loss": 0.3799, "step": 863 }, { "epoch": 0.5044887234508431, "grad_norm": 0.09655123462303665, "learning_rate": 1.8621763671806663e-05, "loss": 0.4273, "step": 864 }, { "epoch": 0.5050726224363186, "grad_norm": 0.09111945138214567, "learning_rate": 1.8616593475777795e-05, "loss": 0.4593, "step": 865 }, { "epoch": 0.5056565214217941, "grad_norm": 0.10941272128336862, "learning_rate": 1.8611414320906155e-05, "loss": 0.4803, "step": 866 }, { "epoch": 0.5062404204072696, "grad_norm": 0.09238641616882362, "learning_rate": 1.8606226212576612e-05, "loss": 0.4497, "step": 867 }, { "epoch": 0.5068243193927451, "grad_norm": 0.10876310359530285, "learning_rate": 1.860102915618334e-05, "loss": 0.4569, "step": 868 }, { "epoch": 0.5074082183782206, "grad_norm": 0.097914377399241, "learning_rate": 1.8595823157129828e-05, "loss": 0.4341, "step": 869 }, { "epoch": 0.5079921173636961, "grad_norm": 0.08623830853015702, "learning_rate": 1.8590608220828855e-05, "loss": 0.3979, "step": 870 }, { "epoch": 0.5085760163491716, "grad_norm": 0.09838675289549814, "learning_rate": 1.8585384352702486e-05, "loss": 0.4597, "step": 871 }, { "epoch": 0.5091599153346471, "grad_norm": 0.09314667294617024, "learning_rate": 1.8580151558182093e-05, "loss": 0.4107, "step": 872 }, { "epoch": 0.5097438143201226, "grad_norm": 0.09206238505760418, "learning_rate": 1.85749098427083e-05, "loss": 0.4602, "step": 873 }, { "epoch": 0.5103277133055981, "grad_norm": 0.08969434837361556, "learning_rate": 1.856965921173104e-05, "loss": 0.4198, "step": 874 }, { "epoch": 0.5109116122910736, "grad_norm": 0.09839900494639148, "learning_rate": 1.8564399670709482e-05, "loss": 0.41, "step": 875 }, { "epoch": 0.5114955112765491, "grad_norm": 0.09063477109147836, "learning_rate": 1.8559131225112085e-05, "loss": 0.4238, "step": 876 }, { "epoch": 0.5120794102620246, "grad_norm": 0.08857252244709886, "learning_rate": 1.8553853880416555e-05, "loss": 0.4229, "step": 877 }, { "epoch": 0.5126633092475001, "grad_norm": 0.09129481777064383, "learning_rate": 1.8548567642109847e-05, "loss": 0.4062, "step": 878 }, { "epoch": 0.5132472082329756, "grad_norm": 0.09793411130991682, "learning_rate": 1.8543272515688172e-05, "loss": 0.4335, "step": 879 }, { "epoch": 0.5138311072184512, "grad_norm": 0.09619015640902587, "learning_rate": 1.8537968506656976e-05, "loss": 0.4271, "step": 880 }, { "epoch": 0.5144150062039268, "grad_norm": 0.09212534224792157, "learning_rate": 1.8532655620530943e-05, "loss": 0.4328, "step": 881 }, { "epoch": 0.5149989051894023, "grad_norm": 0.09355972383526398, "learning_rate": 1.8527333862833986e-05, "loss": 0.4392, "step": 882 }, { "epoch": 0.5155828041748778, "grad_norm": 0.10170345118464737, "learning_rate": 1.852200323909924e-05, "loss": 0.5018, "step": 883 }, { "epoch": 0.5161667031603533, "grad_norm": 0.09850236441576195, "learning_rate": 1.851666375486906e-05, "loss": 0.4363, "step": 884 }, { "epoch": 0.5167506021458288, "grad_norm": 0.1021087721757882, "learning_rate": 1.8511315415695013e-05, "loss": 0.4923, "step": 885 }, { "epoch": 0.5173345011313043, "grad_norm": 0.09182242022228075, "learning_rate": 1.8505958227137875e-05, "loss": 0.4271, "step": 886 }, { "epoch": 0.5179184001167798, "grad_norm": 0.09283121312143344, "learning_rate": 1.8500592194767625e-05, "loss": 0.3795, "step": 887 }, { "epoch": 0.5185022991022553, "grad_norm": 0.09188195697421225, "learning_rate": 1.8495217324163428e-05, "loss": 0.4249, "step": 888 }, { "epoch": 0.5190861980877308, "grad_norm": 0.08973588698526488, "learning_rate": 1.8489833620913644e-05, "loss": 0.3939, "step": 889 }, { "epoch": 0.5196700970732063, "grad_norm": 0.11599921949827832, "learning_rate": 1.848444109061581e-05, "loss": 0.4583, "step": 890 }, { "epoch": 0.5202539960586818, "grad_norm": 0.10470243018854666, "learning_rate": 1.847903973887666e-05, "loss": 0.5459, "step": 891 }, { "epoch": 0.5208378950441573, "grad_norm": 0.08946573948718761, "learning_rate": 1.8473629571312073e-05, "loss": 0.4752, "step": 892 }, { "epoch": 0.5214217940296328, "grad_norm": 0.09647484507395562, "learning_rate": 1.8468210593547114e-05, "loss": 0.5397, "step": 893 }, { "epoch": 0.5220056930151084, "grad_norm": 0.1032946763655856, "learning_rate": 1.8462782811216e-05, "loss": 0.4322, "step": 894 }, { "epoch": 0.5225895920005839, "grad_norm": 0.10729653315101868, "learning_rate": 1.8457346229962106e-05, "loss": 0.4785, "step": 895 }, { "epoch": 0.5231734909860594, "grad_norm": 0.09751203117340453, "learning_rate": 1.845190085543795e-05, "loss": 0.4331, "step": 896 }, { "epoch": 0.5237573899715349, "grad_norm": 0.09493159990725332, "learning_rate": 1.8446446693305194e-05, "loss": 0.4738, "step": 897 }, { "epoch": 0.5243412889570105, "grad_norm": 0.09011932167407403, "learning_rate": 1.8440983749234647e-05, "loss": 0.4222, "step": 898 }, { "epoch": 0.524925187942486, "grad_norm": 0.09301348163658513, "learning_rate": 1.8435512028906232e-05, "loss": 0.4466, "step": 899 }, { "epoch": 0.5255090869279615, "grad_norm": 0.10345303484765092, "learning_rate": 1.8430031538009005e-05, "loss": 0.5217, "step": 900 }, { "epoch": 0.526092985913437, "grad_norm": 0.09906950831860957, "learning_rate": 1.8424542282241144e-05, "loss": 0.461, "step": 901 }, { "epoch": 0.5266768848989125, "grad_norm": 0.09714663866368485, "learning_rate": 1.841904426730994e-05, "loss": 0.4411, "step": 902 }, { "epoch": 0.527260783884388, "grad_norm": 0.09526883981874988, "learning_rate": 1.8413537498931778e-05, "loss": 0.3928, "step": 903 }, { "epoch": 0.5278446828698635, "grad_norm": 0.09573907054071946, "learning_rate": 1.840802198283216e-05, "loss": 0.4508, "step": 904 }, { "epoch": 0.528428581855339, "grad_norm": 0.09376697674656435, "learning_rate": 1.840249772474568e-05, "loss": 0.4381, "step": 905 }, { "epoch": 0.5290124808408145, "grad_norm": 0.08927939091678197, "learning_rate": 1.8396964730416014e-05, "loss": 0.4447, "step": 906 }, { "epoch": 0.52959637982629, "grad_norm": 0.08952838729960534, "learning_rate": 1.8391423005595928e-05, "loss": 0.4306, "step": 907 }, { "epoch": 0.5301802788117655, "grad_norm": 0.0912685809977354, "learning_rate": 1.8385872556047263e-05, "loss": 0.4099, "step": 908 }, { "epoch": 0.530764177797241, "grad_norm": 0.09226695596833577, "learning_rate": 1.8380313387540928e-05, "loss": 0.4426, "step": 909 }, { "epoch": 0.5313480767827166, "grad_norm": 0.09442180671241847, "learning_rate": 1.8374745505856904e-05, "loss": 0.501, "step": 910 }, { "epoch": 0.5319319757681921, "grad_norm": 0.09753386437506581, "learning_rate": 1.836916891678423e-05, "loss": 0.4618, "step": 911 }, { "epoch": 0.5325158747536676, "grad_norm": 0.08667594579886714, "learning_rate": 1.836358362612099e-05, "loss": 0.4416, "step": 912 }, { "epoch": 0.5330997737391431, "grad_norm": 0.08860026802263654, "learning_rate": 1.8357989639674324e-05, "loss": 0.4276, "step": 913 }, { "epoch": 0.5336836727246187, "grad_norm": 0.09752897609103951, "learning_rate": 1.835238696326041e-05, "loss": 0.448, "step": 914 }, { "epoch": 0.5342675717100942, "grad_norm": 0.09477029253341411, "learning_rate": 1.8346775602704464e-05, "loss": 0.4473, "step": 915 }, { "epoch": 0.5348514706955697, "grad_norm": 0.09869381688796759, "learning_rate": 1.8341155563840726e-05, "loss": 0.4852, "step": 916 }, { "epoch": 0.5354353696810452, "grad_norm": 0.10208104428950313, "learning_rate": 1.833552685251246e-05, "loss": 0.4528, "step": 917 }, { "epoch": 0.5360192686665207, "grad_norm": 0.09669058213226167, "learning_rate": 1.8329889474571952e-05, "loss": 0.3862, "step": 918 }, { "epoch": 0.5366031676519962, "grad_norm": 0.08704944272128588, "learning_rate": 1.83242434358805e-05, "loss": 0.4147, "step": 919 }, { "epoch": 0.5371870666374717, "grad_norm": 0.11154168001622605, "learning_rate": 1.8318588742308387e-05, "loss": 0.4803, "step": 920 }, { "epoch": 0.5377709656229472, "grad_norm": 0.09757812659978361, "learning_rate": 1.8312925399734923e-05, "loss": 0.4843, "step": 921 }, { "epoch": 0.5383548646084227, "grad_norm": 0.09861546684874399, "learning_rate": 1.8307253414048395e-05, "loss": 0.4605, "step": 922 }, { "epoch": 0.5389387635938983, "grad_norm": 0.10169484646118591, "learning_rate": 1.8301572791146077e-05, "loss": 0.4358, "step": 923 }, { "epoch": 0.5395226625793738, "grad_norm": 0.09529981052681054, "learning_rate": 1.8295883536934228e-05, "loss": 0.4056, "step": 924 }, { "epoch": 0.5401065615648493, "grad_norm": 0.09259428673160874, "learning_rate": 1.8290185657328073e-05, "loss": 0.4638, "step": 925 }, { "epoch": 0.5406904605503248, "grad_norm": 0.10391357493395059, "learning_rate": 1.8284479158251813e-05, "loss": 0.4271, "step": 926 }, { "epoch": 0.5412743595358003, "grad_norm": 0.0934603856952097, "learning_rate": 1.827876404563861e-05, "loss": 0.3946, "step": 927 }, { "epoch": 0.5418582585212758, "grad_norm": 0.09081258456600295, "learning_rate": 1.8273040325430575e-05, "loss": 0.3865, "step": 928 }, { "epoch": 0.5424421575067513, "grad_norm": 0.08772491521213945, "learning_rate": 1.8267308003578774e-05, "loss": 0.4189, "step": 929 }, { "epoch": 0.5430260564922268, "grad_norm": 0.09883750391527263, "learning_rate": 1.826156708604322e-05, "loss": 0.4419, "step": 930 }, { "epoch": 0.5436099554777024, "grad_norm": 0.09003075608975027, "learning_rate": 1.8255817578792858e-05, "loss": 0.3758, "step": 931 }, { "epoch": 0.5441938544631779, "grad_norm": 0.0927447146723317, "learning_rate": 1.825005948780556e-05, "loss": 0.419, "step": 932 }, { "epoch": 0.5447777534486534, "grad_norm": 0.08922116897239434, "learning_rate": 1.824429281906813e-05, "loss": 0.4677, "step": 933 }, { "epoch": 0.5453616524341289, "grad_norm": 0.0934348097392724, "learning_rate": 1.8238517578576288e-05, "loss": 0.4246, "step": 934 }, { "epoch": 0.5459455514196044, "grad_norm": 0.09175278372530725, "learning_rate": 1.8232733772334663e-05, "loss": 0.4089, "step": 935 }, { "epoch": 0.5465294504050799, "grad_norm": 0.08576015835781982, "learning_rate": 1.8226941406356794e-05, "loss": 0.4242, "step": 936 }, { "epoch": 0.5471133493905554, "grad_norm": 0.10300439723557243, "learning_rate": 1.8221140486665125e-05, "loss": 0.4286, "step": 937 }, { "epoch": 0.547697248376031, "grad_norm": 0.09495298496783833, "learning_rate": 1.8215331019290975e-05, "loss": 0.4289, "step": 938 }, { "epoch": 0.5482811473615065, "grad_norm": 0.09486132025863231, "learning_rate": 1.8209513010274572e-05, "loss": 0.4351, "step": 939 }, { "epoch": 0.548865046346982, "grad_norm": 0.09234505954783746, "learning_rate": 1.820368646566501e-05, "loss": 0.4265, "step": 940 }, { "epoch": 0.5494489453324575, "grad_norm": 0.09474674877324764, "learning_rate": 1.8197851391520265e-05, "loss": 0.448, "step": 941 }, { "epoch": 0.550032844317933, "grad_norm": 0.09077883786254139, "learning_rate": 1.8192007793907177e-05, "loss": 0.4261, "step": 942 }, { "epoch": 0.5506167433034085, "grad_norm": 0.09498863330209138, "learning_rate": 1.8186155678901457e-05, "loss": 0.4627, "step": 943 }, { "epoch": 0.551200642288884, "grad_norm": 0.09241239920815188, "learning_rate": 1.8180295052587653e-05, "loss": 0.424, "step": 944 }, { "epoch": 0.5517845412743595, "grad_norm": 0.08853434385960215, "learning_rate": 1.8174425921059183e-05, "loss": 0.4328, "step": 945 }, { "epoch": 0.552368440259835, "grad_norm": 0.0888125052985895, "learning_rate": 1.81685482904183e-05, "loss": 0.4219, "step": 946 }, { "epoch": 0.5529523392453105, "grad_norm": 0.09544912843105113, "learning_rate": 1.8162662166776085e-05, "loss": 0.5031, "step": 947 }, { "epoch": 0.5535362382307861, "grad_norm": 0.09104265256511548, "learning_rate": 1.8156767556252464e-05, "loss": 0.4123, "step": 948 }, { "epoch": 0.5541201372162616, "grad_norm": 0.09186226257372132, "learning_rate": 1.815086446497618e-05, "loss": 0.3707, "step": 949 }, { "epoch": 0.5547040362017371, "grad_norm": 0.09397485793978502, "learning_rate": 1.8144952899084787e-05, "loss": 0.417, "step": 950 }, { "epoch": 0.5552879351872126, "grad_norm": 0.10765550592161285, "learning_rate": 1.8139032864724665e-05, "loss": 0.4949, "step": 951 }, { "epoch": 0.5558718341726882, "grad_norm": 0.09919450582807057, "learning_rate": 1.813310436805099e-05, "loss": 0.4995, "step": 952 }, { "epoch": 0.5564557331581637, "grad_norm": 0.08713431343894441, "learning_rate": 1.8127167415227736e-05, "loss": 0.4155, "step": 953 }, { "epoch": 0.5570396321436392, "grad_norm": 0.08743146050180022, "learning_rate": 1.8121222012427666e-05, "loss": 0.4407, "step": 954 }, { "epoch": 0.5576235311291147, "grad_norm": 0.09229956558889968, "learning_rate": 1.8115268165832336e-05, "loss": 0.4228, "step": 955 }, { "epoch": 0.5582074301145902, "grad_norm": 0.09332088896613346, "learning_rate": 1.810930588163208e-05, "loss": 0.4521, "step": 956 }, { "epoch": 0.5587913291000657, "grad_norm": 0.09615985940322089, "learning_rate": 1.8103335166026002e-05, "loss": 0.4718, "step": 957 }, { "epoch": 0.5593752280855412, "grad_norm": 0.09091049374051735, "learning_rate": 1.8097356025221975e-05, "loss": 0.3997, "step": 958 }, { "epoch": 0.5599591270710167, "grad_norm": 0.09144280545718589, "learning_rate": 1.8091368465436626e-05, "loss": 0.4821, "step": 959 }, { "epoch": 0.5605430260564922, "grad_norm": 0.09132963469686126, "learning_rate": 1.8085372492895338e-05, "loss": 0.4299, "step": 960 }, { "epoch": 0.5611269250419677, "grad_norm": 0.09322588395765792, "learning_rate": 1.807936811383225e-05, "loss": 0.4411, "step": 961 }, { "epoch": 0.5617108240274432, "grad_norm": 0.08822921735884835, "learning_rate": 1.8073355334490227e-05, "loss": 0.4545, "step": 962 }, { "epoch": 0.5622947230129187, "grad_norm": 0.08698086590275306, "learning_rate": 1.806733416112088e-05, "loss": 0.4388, "step": 963 }, { "epoch": 0.5628786219983942, "grad_norm": 0.09467727154833003, "learning_rate": 1.8061304599984537e-05, "loss": 0.4209, "step": 964 }, { "epoch": 0.5634625209838698, "grad_norm": 0.096344576411388, "learning_rate": 1.8055266657350256e-05, "loss": 0.4406, "step": 965 }, { "epoch": 0.5640464199693453, "grad_norm": 0.09188636069925325, "learning_rate": 1.8049220339495797e-05, "loss": 0.4204, "step": 966 }, { "epoch": 0.5646303189548209, "grad_norm": 0.0880448405997493, "learning_rate": 1.804316565270765e-05, "loss": 0.3915, "step": 967 }, { "epoch": 0.5652142179402964, "grad_norm": 0.09121882792587133, "learning_rate": 1.8037102603280984e-05, "loss": 0.4483, "step": 968 }, { "epoch": 0.5657981169257719, "grad_norm": 0.0926051762724398, "learning_rate": 1.8031031197519673e-05, "loss": 0.4084, "step": 969 }, { "epoch": 0.5663820159112474, "grad_norm": 0.09080259832248035, "learning_rate": 1.8024951441736275e-05, "loss": 0.4283, "step": 970 }, { "epoch": 0.5669659148967229, "grad_norm": 0.09325920934804743, "learning_rate": 1.8018863342252038e-05, "loss": 0.4557, "step": 971 }, { "epoch": 0.5675498138821984, "grad_norm": 0.08915345664840699, "learning_rate": 1.801276690539688e-05, "loss": 0.3874, "step": 972 }, { "epoch": 0.5681337128676739, "grad_norm": 0.09540701108199949, "learning_rate": 1.800666213750938e-05, "loss": 0.469, "step": 973 }, { "epoch": 0.5687176118531494, "grad_norm": 0.08813191692246371, "learning_rate": 1.800054904493679e-05, "loss": 0.4156, "step": 974 }, { "epoch": 0.5693015108386249, "grad_norm": 0.08896602686394334, "learning_rate": 1.7994427634035016e-05, "loss": 0.4162, "step": 975 }, { "epoch": 0.5698854098241004, "grad_norm": 0.09180272331601964, "learning_rate": 1.7988297911168602e-05, "loss": 0.3921, "step": 976 }, { "epoch": 0.5704693088095759, "grad_norm": 0.08519406392334627, "learning_rate": 1.798215988271075e-05, "loss": 0.41, "step": 977 }, { "epoch": 0.5710532077950514, "grad_norm": 0.09746198292138819, "learning_rate": 1.7976013555043286e-05, "loss": 0.4182, "step": 978 }, { "epoch": 0.5716371067805269, "grad_norm": 0.09963856889825948, "learning_rate": 1.7969858934556676e-05, "loss": 0.4414, "step": 979 }, { "epoch": 0.5722210057660024, "grad_norm": 0.09738498715329888, "learning_rate": 1.796369602764999e-05, "loss": 0.4615, "step": 980 }, { "epoch": 0.5728049047514779, "grad_norm": 0.09141688301898196, "learning_rate": 1.7957524840730925e-05, "loss": 0.4065, "step": 981 }, { "epoch": 0.5733888037369536, "grad_norm": 0.09072363132845022, "learning_rate": 1.7951345380215795e-05, "loss": 0.4163, "step": 982 }, { "epoch": 0.5739727027224291, "grad_norm": 0.09640914238440945, "learning_rate": 1.79451576525295e-05, "loss": 0.449, "step": 983 }, { "epoch": 0.5745566017079046, "grad_norm": 0.09992369250604267, "learning_rate": 1.7938961664105546e-05, "loss": 0.4778, "step": 984 }, { "epoch": 0.5751405006933801, "grad_norm": 0.08659636023668378, "learning_rate": 1.793275742138602e-05, "loss": 0.421, "step": 985 }, { "epoch": 0.5757243996788556, "grad_norm": 0.0919487439819441, "learning_rate": 1.7926544930821608e-05, "loss": 0.4077, "step": 986 }, { "epoch": 0.5763082986643311, "grad_norm": 0.09931701298487543, "learning_rate": 1.7920324198871546e-05, "loss": 0.4968, "step": 987 }, { "epoch": 0.5768921976498066, "grad_norm": 0.093971790880393, "learning_rate": 1.791409523200366e-05, "loss": 0.4486, "step": 988 }, { "epoch": 0.5774760966352821, "grad_norm": 0.09575437893044161, "learning_rate": 1.7907858036694325e-05, "loss": 0.4498, "step": 989 }, { "epoch": 0.5780599956207576, "grad_norm": 0.0997140994451522, "learning_rate": 1.790161261942848e-05, "loss": 0.4546, "step": 990 }, { "epoch": 0.5786438946062331, "grad_norm": 0.09431132904190959, "learning_rate": 1.7895358986699607e-05, "loss": 0.4273, "step": 991 }, { "epoch": 0.5792277935917086, "grad_norm": 0.10063986282889123, "learning_rate": 1.7889097145009736e-05, "loss": 0.4565, "step": 992 }, { "epoch": 0.5798116925771841, "grad_norm": 0.09459844725134567, "learning_rate": 1.788282710086942e-05, "loss": 0.4245, "step": 993 }, { "epoch": 0.5803955915626596, "grad_norm": 0.10146171246834455, "learning_rate": 1.7876548860797756e-05, "loss": 0.4392, "step": 994 }, { "epoch": 0.5809794905481351, "grad_norm": 0.10602113616861586, "learning_rate": 1.787026243132235e-05, "loss": 0.428, "step": 995 }, { "epoch": 0.5815633895336106, "grad_norm": 0.09937470692744398, "learning_rate": 1.7863967818979328e-05, "loss": 0.4504, "step": 996 }, { "epoch": 0.5821472885190861, "grad_norm": 0.09204894267032777, "learning_rate": 1.785766503031332e-05, "loss": 0.4246, "step": 997 }, { "epoch": 0.5827311875045618, "grad_norm": 0.10041432598491896, "learning_rate": 1.785135407187747e-05, "loss": 0.4446, "step": 998 }, { "epoch": 0.5833150864900373, "grad_norm": 0.10774002068502538, "learning_rate": 1.7845034950233394e-05, "loss": 0.4816, "step": 999 }, { "epoch": 0.5838989854755128, "grad_norm": 0.09390689659813653, "learning_rate": 1.7838707671951215e-05, "loss": 0.4268, "step": 1000 }, { "epoch": 0.5844828844609883, "grad_norm": 0.09015589039806007, "learning_rate": 1.7832372243609527e-05, "loss": 0.4354, "step": 1001 }, { "epoch": 0.5850667834464638, "grad_norm": 0.10918205703147686, "learning_rate": 1.78260286717954e-05, "loss": 0.4568, "step": 1002 }, { "epoch": 0.5856506824319393, "grad_norm": 0.09725779048671819, "learning_rate": 1.781967696310437e-05, "loss": 0.4336, "step": 1003 }, { "epoch": 0.5862345814174148, "grad_norm": 0.0877636469654997, "learning_rate": 1.7813317124140445e-05, "loss": 0.4264, "step": 1004 }, { "epoch": 0.5868184804028903, "grad_norm": 0.0950024764408864, "learning_rate": 1.7806949161516062e-05, "loss": 0.4535, "step": 1005 }, { "epoch": 0.5874023793883658, "grad_norm": 0.10479333015547466, "learning_rate": 1.7800573081852124e-05, "loss": 0.4312, "step": 1006 }, { "epoch": 0.5879862783738413, "grad_norm": 0.09525143573640177, "learning_rate": 1.7794188891777964e-05, "loss": 0.4125, "step": 1007 }, { "epoch": 0.5885701773593168, "grad_norm": 0.08778869076592848, "learning_rate": 1.7787796597931354e-05, "loss": 0.3771, "step": 1008 }, { "epoch": 0.5891540763447923, "grad_norm": 0.09285000090113495, "learning_rate": 1.7781396206958485e-05, "loss": 0.4385, "step": 1009 }, { "epoch": 0.5897379753302678, "grad_norm": 0.08836217929103189, "learning_rate": 1.7774987725513975e-05, "loss": 0.4219, "step": 1010 }, { "epoch": 0.5903218743157433, "grad_norm": 0.0880216145210006, "learning_rate": 1.7768571160260845e-05, "loss": 0.406, "step": 1011 }, { "epoch": 0.5909057733012189, "grad_norm": 0.09830508245190792, "learning_rate": 1.7762146517870526e-05, "loss": 0.4634, "step": 1012 }, { "epoch": 0.5914896722866944, "grad_norm": 0.09507705683022538, "learning_rate": 1.7755713805022846e-05, "loss": 0.4536, "step": 1013 }, { "epoch": 0.5920735712721699, "grad_norm": 0.09115599624348537, "learning_rate": 1.7749273028406025e-05, "loss": 0.3892, "step": 1014 }, { "epoch": 0.5926574702576455, "grad_norm": 0.09563453849448152, "learning_rate": 1.7742824194716664e-05, "loss": 0.5242, "step": 1015 }, { "epoch": 0.593241369243121, "grad_norm": 0.08342075146689386, "learning_rate": 1.7736367310659743e-05, "loss": 0.4396, "step": 1016 }, { "epoch": 0.5938252682285965, "grad_norm": 0.08712520706886073, "learning_rate": 1.7729902382948617e-05, "loss": 0.4171, "step": 1017 }, { "epoch": 0.594409167214072, "grad_norm": 0.09125507108985245, "learning_rate": 1.772342941830499e-05, "loss": 0.4378, "step": 1018 }, { "epoch": 0.5949930661995475, "grad_norm": 0.08935407816353083, "learning_rate": 1.771694842345894e-05, "loss": 0.414, "step": 1019 }, { "epoch": 0.595576965185023, "grad_norm": 0.08647437434193186, "learning_rate": 1.771045940514888e-05, "loss": 0.4007, "step": 1020 }, { "epoch": 0.5961608641704985, "grad_norm": 0.08824383858146106, "learning_rate": 1.7703962370121575e-05, "loss": 0.4073, "step": 1021 }, { "epoch": 0.596744763155974, "grad_norm": 0.09515883213178127, "learning_rate": 1.769745732513212e-05, "loss": 0.4646, "step": 1022 }, { "epoch": 0.5973286621414495, "grad_norm": 0.08934111583452063, "learning_rate": 1.7690944276943935e-05, "loss": 0.4073, "step": 1023 }, { "epoch": 0.597912561126925, "grad_norm": 0.09085472292803978, "learning_rate": 1.768442323232877e-05, "loss": 0.4269, "step": 1024 }, { "epoch": 0.5984964601124005, "grad_norm": 0.08772833506009374, "learning_rate": 1.767789419806668e-05, "loss": 0.4513, "step": 1025 }, { "epoch": 0.599080359097876, "grad_norm": 0.08722708733365771, "learning_rate": 1.7671357180946035e-05, "loss": 0.4068, "step": 1026 }, { "epoch": 0.5996642580833516, "grad_norm": 0.08594736857204191, "learning_rate": 1.76648121877635e-05, "loss": 0.4101, "step": 1027 }, { "epoch": 0.6002481570688271, "grad_norm": 0.08227972580999744, "learning_rate": 1.7658259225324036e-05, "loss": 0.4051, "step": 1028 }, { "epoch": 0.6008320560543026, "grad_norm": 0.09050478029945756, "learning_rate": 1.765169830044088e-05, "loss": 0.4627, "step": 1029 }, { "epoch": 0.6014159550397781, "grad_norm": 0.10381076890598134, "learning_rate": 1.7645129419935565e-05, "loss": 0.3973, "step": 1030 }, { "epoch": 0.6019998540252536, "grad_norm": 0.0823670935776632, "learning_rate": 1.763855259063788e-05, "loss": 0.3715, "step": 1031 }, { "epoch": 0.6025837530107292, "grad_norm": 0.08748163398087018, "learning_rate": 1.7631967819385883e-05, "loss": 0.4327, "step": 1032 }, { "epoch": 0.6031676519962047, "grad_norm": 0.08187457516982562, "learning_rate": 1.76253751130259e-05, "loss": 0.3767, "step": 1033 }, { "epoch": 0.6037515509816802, "grad_norm": 0.09011996940158751, "learning_rate": 1.761877447841249e-05, "loss": 0.4305, "step": 1034 }, { "epoch": 0.6043354499671557, "grad_norm": 0.09102208770959794, "learning_rate": 1.7612165922408463e-05, "loss": 0.4132, "step": 1035 }, { "epoch": 0.6049193489526312, "grad_norm": 0.09111659028930161, "learning_rate": 1.760554945188487e-05, "loss": 0.4327, "step": 1036 }, { "epoch": 0.6055032479381067, "grad_norm": 0.09624996484074201, "learning_rate": 1.759892507372099e-05, "loss": 0.4269, "step": 1037 }, { "epoch": 0.6060871469235822, "grad_norm": 0.08833857909564485, "learning_rate": 1.759229279480431e-05, "loss": 0.4528, "step": 1038 }, { "epoch": 0.6066710459090577, "grad_norm": 0.08736903345888744, "learning_rate": 1.758565262203055e-05, "loss": 0.4406, "step": 1039 }, { "epoch": 0.6072549448945332, "grad_norm": 0.09285425575316367, "learning_rate": 1.757900456230362e-05, "loss": 0.4216, "step": 1040 }, { "epoch": 0.6078388438800088, "grad_norm": 0.08538325972231291, "learning_rate": 1.757234862253565e-05, "loss": 0.4259, "step": 1041 }, { "epoch": 0.6084227428654843, "grad_norm": 0.08950611107641337, "learning_rate": 1.7565684809646946e-05, "loss": 0.4213, "step": 1042 }, { "epoch": 0.6090066418509598, "grad_norm": 0.09389910521634093, "learning_rate": 1.7559013130566003e-05, "loss": 0.4005, "step": 1043 }, { "epoch": 0.6095905408364353, "grad_norm": 0.0926011537524849, "learning_rate": 1.755233359222951e-05, "loss": 0.4163, "step": 1044 }, { "epoch": 0.6101744398219108, "grad_norm": 0.08817400773262006, "learning_rate": 1.7545646201582304e-05, "loss": 0.4121, "step": 1045 }, { "epoch": 0.6107583388073863, "grad_norm": 0.09477724865817842, "learning_rate": 1.75389509655774e-05, "loss": 0.4221, "step": 1046 }, { "epoch": 0.6113422377928618, "grad_norm": 0.09551606320248886, "learning_rate": 1.7532247891175968e-05, "loss": 0.4326, "step": 1047 }, { "epoch": 0.6119261367783373, "grad_norm": 0.09129362565513724, "learning_rate": 1.7525536985347328e-05, "loss": 0.504, "step": 1048 }, { "epoch": 0.6125100357638129, "grad_norm": 0.10318178153033519, "learning_rate": 1.751881825506894e-05, "loss": 0.5242, "step": 1049 }, { "epoch": 0.6130939347492884, "grad_norm": 0.08996774913061008, "learning_rate": 1.7512091707326403e-05, "loss": 0.4246, "step": 1050 }, { "epoch": 0.6136778337347639, "grad_norm": 0.09002944434614123, "learning_rate": 1.750535734911344e-05, "loss": 0.4199, "step": 1051 }, { "epoch": 0.6142617327202394, "grad_norm": 0.09326707326710015, "learning_rate": 1.7498615187431894e-05, "loss": 0.4156, "step": 1052 }, { "epoch": 0.6148456317057149, "grad_norm": 0.09765497498912173, "learning_rate": 1.7491865229291733e-05, "loss": 0.4366, "step": 1053 }, { "epoch": 0.6154295306911904, "grad_norm": 0.09241587618555039, "learning_rate": 1.7485107481711014e-05, "loss": 0.4998, "step": 1054 }, { "epoch": 0.616013429676666, "grad_norm": 0.09144015637998631, "learning_rate": 1.74783419517159e-05, "loss": 0.3918, "step": 1055 }, { "epoch": 0.6165973286621415, "grad_norm": 0.10981909305108999, "learning_rate": 1.7471568646340653e-05, "loss": 0.3912, "step": 1056 }, { "epoch": 0.617181227647617, "grad_norm": 0.09178406267415336, "learning_rate": 1.746478757262761e-05, "loss": 0.4493, "step": 1057 }, { "epoch": 0.6177651266330925, "grad_norm": 0.08951570126510709, "learning_rate": 1.7457998737627183e-05, "loss": 0.4476, "step": 1058 }, { "epoch": 0.618349025618568, "grad_norm": 0.09156670477316796, "learning_rate": 1.745120214839786e-05, "loss": 0.4484, "step": 1059 }, { "epoch": 0.6189329246040435, "grad_norm": 0.1056082421116698, "learning_rate": 1.7444397812006194e-05, "loss": 0.4555, "step": 1060 }, { "epoch": 0.619516823589519, "grad_norm": 0.09907929073930806, "learning_rate": 1.7437585735526785e-05, "loss": 0.4649, "step": 1061 }, { "epoch": 0.6201007225749945, "grad_norm": 0.11202186713486148, "learning_rate": 1.7430765926042287e-05, "loss": 0.4998, "step": 1062 }, { "epoch": 0.62068462156047, "grad_norm": 0.09084291294760292, "learning_rate": 1.7423938390643384e-05, "loss": 0.4444, "step": 1063 }, { "epoch": 0.6212685205459455, "grad_norm": 0.09723451859050643, "learning_rate": 1.7417103136428806e-05, "loss": 0.4491, "step": 1064 }, { "epoch": 0.6218524195314211, "grad_norm": 0.09157081383773819, "learning_rate": 1.74102601705053e-05, "loss": 0.4393, "step": 1065 }, { "epoch": 0.6224363185168966, "grad_norm": 0.09542177768852611, "learning_rate": 1.7403409499987633e-05, "loss": 0.4256, "step": 1066 }, { "epoch": 0.6230202175023721, "grad_norm": 0.09027261714414724, "learning_rate": 1.739655113199858e-05, "loss": 0.4303, "step": 1067 }, { "epoch": 0.6236041164878476, "grad_norm": 0.09344680148810144, "learning_rate": 1.7389685073668925e-05, "loss": 0.4172, "step": 1068 }, { "epoch": 0.6241880154733231, "grad_norm": 0.09326741447020781, "learning_rate": 1.7382811332137444e-05, "loss": 0.377, "step": 1069 }, { "epoch": 0.6247719144587986, "grad_norm": 0.09514855056865301, "learning_rate": 1.7375929914550906e-05, "loss": 0.4411, "step": 1070 }, { "epoch": 0.6253558134442742, "grad_norm": 0.08502552525385211, "learning_rate": 1.7369040828064046e-05, "loss": 0.3988, "step": 1071 }, { "epoch": 0.6259397124297497, "grad_norm": 0.09755605962881381, "learning_rate": 1.73621440798396e-05, "loss": 0.4471, "step": 1072 }, { "epoch": 0.6265236114152252, "grad_norm": 0.09231014682848958, "learning_rate": 1.7355239677048237e-05, "loss": 0.4404, "step": 1073 }, { "epoch": 0.6271075104007007, "grad_norm": 0.08698532233910933, "learning_rate": 1.734832762686861e-05, "loss": 0.4688, "step": 1074 }, { "epoch": 0.6276914093861762, "grad_norm": 0.08661993202579565, "learning_rate": 1.7341407936487316e-05, "loss": 0.4575, "step": 1075 }, { "epoch": 0.6282753083716517, "grad_norm": 0.09426010308813526, "learning_rate": 1.7334480613098893e-05, "loss": 0.457, "step": 1076 }, { "epoch": 0.6288592073571272, "grad_norm": 0.08851371855666262, "learning_rate": 1.7327545663905813e-05, "loss": 0.3734, "step": 1077 }, { "epoch": 0.6294431063426027, "grad_norm": 0.08928043973060518, "learning_rate": 1.7320603096118476e-05, "loss": 0.4231, "step": 1078 }, { "epoch": 0.6300270053280782, "grad_norm": 0.090084522308169, "learning_rate": 1.731365291695522e-05, "loss": 0.4245, "step": 1079 }, { "epoch": 0.6306109043135537, "grad_norm": 0.09966141862485951, "learning_rate": 1.730669513364227e-05, "loss": 0.434, "step": 1080 }, { "epoch": 0.6311948032990292, "grad_norm": 0.08488838734381386, "learning_rate": 1.7299729753413783e-05, "loss": 0.4295, "step": 1081 }, { "epoch": 0.6317787022845048, "grad_norm": 0.09579305277494848, "learning_rate": 1.7292756783511793e-05, "loss": 0.455, "step": 1082 }, { "epoch": 0.6323626012699803, "grad_norm": 0.09128993428209392, "learning_rate": 1.728577623118624e-05, "loss": 0.42, "step": 1083 }, { "epoch": 0.6329465002554558, "grad_norm": 0.08569182138552853, "learning_rate": 1.7278788103694944e-05, "loss": 0.386, "step": 1084 }, { "epoch": 0.6335303992409314, "grad_norm": 0.09435374194988813, "learning_rate": 1.7271792408303593e-05, "loss": 0.4596, "step": 1085 }, { "epoch": 0.6341142982264069, "grad_norm": 0.08409107422996433, "learning_rate": 1.7264789152285754e-05, "loss": 0.3752, "step": 1086 }, { "epoch": 0.6346981972118824, "grad_norm": 0.09011589648711192, "learning_rate": 1.7257778342922853e-05, "loss": 0.4377, "step": 1087 }, { "epoch": 0.6352820961973579, "grad_norm": 0.09142436904084796, "learning_rate": 1.7250759987504165e-05, "loss": 0.4041, "step": 1088 }, { "epoch": 0.6358659951828334, "grad_norm": 0.09432612815091881, "learning_rate": 1.724373409332681e-05, "loss": 0.4488, "step": 1089 }, { "epoch": 0.6364498941683089, "grad_norm": 0.08805984914997829, "learning_rate": 1.7236700667695754e-05, "loss": 0.4576, "step": 1090 }, { "epoch": 0.6370337931537844, "grad_norm": 0.0954847343597198, "learning_rate": 1.7229659717923784e-05, "loss": 0.4631, "step": 1091 }, { "epoch": 0.6376176921392599, "grad_norm": 0.08842078042093192, "learning_rate": 1.722261125133152e-05, "loss": 0.4146, "step": 1092 }, { "epoch": 0.6382015911247354, "grad_norm": 0.09420129347684829, "learning_rate": 1.721555527524739e-05, "loss": 0.3898, "step": 1093 }, { "epoch": 0.6387854901102109, "grad_norm": 0.086738640582419, "learning_rate": 1.7208491797007634e-05, "loss": 0.4611, "step": 1094 }, { "epoch": 0.6393693890956864, "grad_norm": 0.09541950757165253, "learning_rate": 1.7201420823956286e-05, "loss": 0.4112, "step": 1095 }, { "epoch": 0.6399532880811619, "grad_norm": 0.09937004292240001, "learning_rate": 1.719434236344518e-05, "loss": 0.404, "step": 1096 }, { "epoch": 0.6405371870666374, "grad_norm": 0.08761076911684837, "learning_rate": 1.7187256422833928e-05, "loss": 0.4257, "step": 1097 }, { "epoch": 0.6411210860521129, "grad_norm": 0.1181886551411168, "learning_rate": 1.7180163009489924e-05, "loss": 0.5047, "step": 1098 }, { "epoch": 0.6417049850375885, "grad_norm": 0.08867755349353244, "learning_rate": 1.7173062130788337e-05, "loss": 0.3988, "step": 1099 }, { "epoch": 0.642288884023064, "grad_norm": 0.10128439662606079, "learning_rate": 1.716595379411208e-05, "loss": 0.4657, "step": 1100 }, { "epoch": 0.6428727830085396, "grad_norm": 0.10038482677816951, "learning_rate": 1.715883800685184e-05, "loss": 0.4138, "step": 1101 }, { "epoch": 0.6434566819940151, "grad_norm": 0.08751924018788537, "learning_rate": 1.7151714776406034e-05, "loss": 0.4217, "step": 1102 }, { "epoch": 0.6440405809794906, "grad_norm": 0.08793546822271749, "learning_rate": 1.7144584110180834e-05, "loss": 0.412, "step": 1103 }, { "epoch": 0.6446244799649661, "grad_norm": 0.10318725817455951, "learning_rate": 1.7137446015590128e-05, "loss": 0.4228, "step": 1104 }, { "epoch": 0.6452083789504416, "grad_norm": 0.09445566496269081, "learning_rate": 1.7130300500055537e-05, "loss": 0.4013, "step": 1105 }, { "epoch": 0.6457922779359171, "grad_norm": 0.09411889777475531, "learning_rate": 1.7123147571006398e-05, "loss": 0.4474, "step": 1106 }, { "epoch": 0.6463761769213926, "grad_norm": 0.09815965962590555, "learning_rate": 1.711598723587975e-05, "loss": 0.4682, "step": 1107 }, { "epoch": 0.6469600759068681, "grad_norm": 0.09317048907520638, "learning_rate": 1.710881950212033e-05, "loss": 0.4493, "step": 1108 }, { "epoch": 0.6475439748923436, "grad_norm": 0.09469785619838919, "learning_rate": 1.7101644377180586e-05, "loss": 0.4188, "step": 1109 }, { "epoch": 0.6481278738778191, "grad_norm": 0.09046930736617381, "learning_rate": 1.7094461868520625e-05, "loss": 0.4258, "step": 1110 }, { "epoch": 0.6487117728632946, "grad_norm": 0.1006271596410462, "learning_rate": 1.708727198360825e-05, "loss": 0.4394, "step": 1111 }, { "epoch": 0.6492956718487701, "grad_norm": 0.09430394407538564, "learning_rate": 1.7080074729918918e-05, "loss": 0.4493, "step": 1112 }, { "epoch": 0.6498795708342456, "grad_norm": 0.08828286208425658, "learning_rate": 1.7072870114935766e-05, "loss": 0.4223, "step": 1113 }, { "epoch": 0.6504634698197211, "grad_norm": 0.08894195322112015, "learning_rate": 1.7065658146149572e-05, "loss": 0.3812, "step": 1114 }, { "epoch": 0.6510473688051966, "grad_norm": 0.09716451348424279, "learning_rate": 1.7058438831058763e-05, "loss": 0.4491, "step": 1115 }, { "epoch": 0.6516312677906723, "grad_norm": 0.09656663972125981, "learning_rate": 1.70512121771694e-05, "loss": 0.5103, "step": 1116 }, { "epoch": 0.6522151667761478, "grad_norm": 0.09102844314044885, "learning_rate": 1.7043978191995177e-05, "loss": 0.4094, "step": 1117 }, { "epoch": 0.6527990657616233, "grad_norm": 0.10980527494612091, "learning_rate": 1.7036736883057422e-05, "loss": 0.455, "step": 1118 }, { "epoch": 0.6533829647470988, "grad_norm": 0.09091492195675412, "learning_rate": 1.702948825788506e-05, "loss": 0.439, "step": 1119 }, { "epoch": 0.6539668637325743, "grad_norm": 0.0896153280576147, "learning_rate": 1.7022232324014628e-05, "loss": 0.4079, "step": 1120 }, { "epoch": 0.6545507627180498, "grad_norm": 0.0897685120584939, "learning_rate": 1.7014969088990265e-05, "loss": 0.3696, "step": 1121 }, { "epoch": 0.6551346617035253, "grad_norm": 0.093912915934424, "learning_rate": 1.7007698560363704e-05, "loss": 0.3938, "step": 1122 }, { "epoch": 0.6557185606890008, "grad_norm": 0.0863719413311183, "learning_rate": 1.7000420745694256e-05, "loss": 0.4194, "step": 1123 }, { "epoch": 0.6563024596744763, "grad_norm": 0.08354849503229415, "learning_rate": 1.6993135652548803e-05, "loss": 0.4112, "step": 1124 }, { "epoch": 0.6568863586599518, "grad_norm": 0.08560495102220175, "learning_rate": 1.6985843288501814e-05, "loss": 0.3871, "step": 1125 }, { "epoch": 0.6574702576454273, "grad_norm": 0.08659746278347599, "learning_rate": 1.697854366113529e-05, "loss": 0.4067, "step": 1126 }, { "epoch": 0.6580541566309028, "grad_norm": 0.21547007938107487, "learning_rate": 1.6971236778038806e-05, "loss": 0.4287, "step": 1127 }, { "epoch": 0.6586380556163783, "grad_norm": 0.08098659427595478, "learning_rate": 1.6963922646809475e-05, "loss": 0.387, "step": 1128 }, { "epoch": 0.6592219546018538, "grad_norm": 0.08518477047303744, "learning_rate": 1.6956601275051933e-05, "loss": 0.4099, "step": 1129 }, { "epoch": 0.6598058535873293, "grad_norm": 0.08705916258591465, "learning_rate": 1.694927267037837e-05, "loss": 0.4364, "step": 1130 }, { "epoch": 0.6603897525728049, "grad_norm": 0.09248384316943024, "learning_rate": 1.6941936840408465e-05, "loss": 0.3896, "step": 1131 }, { "epoch": 0.6609736515582805, "grad_norm": 0.08914779121264596, "learning_rate": 1.6934593792769435e-05, "loss": 0.409, "step": 1132 }, { "epoch": 0.661557550543756, "grad_norm": 0.09505923141847356, "learning_rate": 1.6927243535095995e-05, "loss": 0.4965, "step": 1133 }, { "epoch": 0.6621414495292315, "grad_norm": 0.08257288492498693, "learning_rate": 1.691988607503035e-05, "loss": 0.4227, "step": 1134 }, { "epoch": 0.662725348514707, "grad_norm": 0.09073634302172948, "learning_rate": 1.691252142022219e-05, "loss": 0.4439, "step": 1135 }, { "epoch": 0.6633092475001825, "grad_norm": 0.0886987699098452, "learning_rate": 1.6905149578328705e-05, "loss": 0.373, "step": 1136 }, { "epoch": 0.663893146485658, "grad_norm": 0.09647863704016836, "learning_rate": 1.6897770557014535e-05, "loss": 0.4762, "step": 1137 }, { "epoch": 0.6644770454711335, "grad_norm": 0.08726743313795597, "learning_rate": 1.6890384363951802e-05, "loss": 0.4192, "step": 1138 }, { "epoch": 0.665060944456609, "grad_norm": 0.08607710488475626, "learning_rate": 1.688299100682007e-05, "loss": 0.4324, "step": 1139 }, { "epoch": 0.6656448434420845, "grad_norm": 0.08916001610234651, "learning_rate": 1.687559049330636e-05, "loss": 0.415, "step": 1140 }, { "epoch": 0.66622874242756, "grad_norm": 0.09085641654226184, "learning_rate": 1.686818283110514e-05, "loss": 0.4407, "step": 1141 }, { "epoch": 0.6668126414130355, "grad_norm": 0.07901526142569126, "learning_rate": 1.6860768027918293e-05, "loss": 0.4124, "step": 1142 }, { "epoch": 0.667396540398511, "grad_norm": 0.08212503781516006, "learning_rate": 1.6853346091455143e-05, "loss": 0.3862, "step": 1143 }, { "epoch": 0.6679804393839865, "grad_norm": 0.07991489352903496, "learning_rate": 1.684591702943242e-05, "loss": 0.4083, "step": 1144 }, { "epoch": 0.668564338369462, "grad_norm": 0.08327475648014676, "learning_rate": 1.683848084957427e-05, "loss": 0.4018, "step": 1145 }, { "epoch": 0.6691482373549376, "grad_norm": 0.08450146196147934, "learning_rate": 1.6831037559612235e-05, "loss": 0.4202, "step": 1146 }, { "epoch": 0.6697321363404131, "grad_norm": 0.09288874652580006, "learning_rate": 1.682358716728525e-05, "loss": 0.4372, "step": 1147 }, { "epoch": 0.6703160353258886, "grad_norm": 0.08537285119667387, "learning_rate": 1.681612968033964e-05, "loss": 0.4086, "step": 1148 }, { "epoch": 0.6708999343113642, "grad_norm": 0.08187600563023624, "learning_rate": 1.6808665106529096e-05, "loss": 0.4233, "step": 1149 }, { "epoch": 0.6714838332968397, "grad_norm": 0.09143944591550165, "learning_rate": 1.6801193453614683e-05, "loss": 0.4129, "step": 1150 }, { "epoch": 0.6720677322823152, "grad_norm": 0.09136135156606306, "learning_rate": 1.679371472936483e-05, "loss": 0.4582, "step": 1151 }, { "epoch": 0.6726516312677907, "grad_norm": 0.08401348788839008, "learning_rate": 1.6786228941555318e-05, "loss": 0.4155, "step": 1152 }, { "epoch": 0.6732355302532662, "grad_norm": 0.08838016002206428, "learning_rate": 1.6778736097969258e-05, "loss": 0.4555, "step": 1153 }, { "epoch": 0.6738194292387417, "grad_norm": 0.07941430912474112, "learning_rate": 1.6771236206397123e-05, "loss": 0.4399, "step": 1154 }, { "epoch": 0.6744033282242172, "grad_norm": 0.08042800891991729, "learning_rate": 1.676372927463668e-05, "loss": 0.4513, "step": 1155 }, { "epoch": 0.6749872272096927, "grad_norm": 0.09191878736334715, "learning_rate": 1.675621531049305e-05, "loss": 0.4232, "step": 1156 }, { "epoch": 0.6755711261951682, "grad_norm": 0.08593007133453083, "learning_rate": 1.674869432177864e-05, "loss": 0.4597, "step": 1157 }, { "epoch": 0.6761550251806437, "grad_norm": 0.0867315658276952, "learning_rate": 1.674116631631318e-05, "loss": 0.4242, "step": 1158 }, { "epoch": 0.6767389241661192, "grad_norm": 0.08072328924445038, "learning_rate": 1.6733631301923678e-05, "loss": 0.371, "step": 1159 }, { "epoch": 0.6773228231515948, "grad_norm": 0.08399992968076667, "learning_rate": 1.672608928644444e-05, "loss": 0.4557, "step": 1160 }, { "epoch": 0.6779067221370703, "grad_norm": 0.08771219179823021, "learning_rate": 1.6718540277717057e-05, "loss": 0.4343, "step": 1161 }, { "epoch": 0.6784906211225458, "grad_norm": 0.08809977944302315, "learning_rate": 1.671098428359037e-05, "loss": 0.465, "step": 1162 }, { "epoch": 0.6790745201080213, "grad_norm": 0.09341377457487572, "learning_rate": 1.67034213119205e-05, "loss": 0.4259, "step": 1163 }, { "epoch": 0.6796584190934968, "grad_norm": 0.09014146664723646, "learning_rate": 1.6695851370570822e-05, "loss": 0.4486, "step": 1164 }, { "epoch": 0.6802423180789723, "grad_norm": 0.07934969094444563, "learning_rate": 1.6688274467411953e-05, "loss": 0.3795, "step": 1165 }, { "epoch": 0.6808262170644479, "grad_norm": 0.09186264814462194, "learning_rate": 1.6680690610321747e-05, "loss": 0.4303, "step": 1166 }, { "epoch": 0.6814101160499234, "grad_norm": 0.09003529266547788, "learning_rate": 1.667309980718529e-05, "loss": 0.4346, "step": 1167 }, { "epoch": 0.6819940150353989, "grad_norm": 0.08973747950120337, "learning_rate": 1.666550206589489e-05, "loss": 0.4836, "step": 1168 }, { "epoch": 0.6825779140208744, "grad_norm": 0.08320397846795485, "learning_rate": 1.6657897394350073e-05, "loss": 0.4268, "step": 1169 }, { "epoch": 0.6831618130063499, "grad_norm": 0.08441091248684143, "learning_rate": 1.665028580045756e-05, "loss": 0.3985, "step": 1170 }, { "epoch": 0.6837457119918254, "grad_norm": 0.08928015448602286, "learning_rate": 1.664266729213128e-05, "loss": 0.4289, "step": 1171 }, { "epoch": 0.6843296109773009, "grad_norm": 0.09482628974429008, "learning_rate": 1.6635041877292354e-05, "loss": 0.4458, "step": 1172 }, { "epoch": 0.6849135099627764, "grad_norm": 0.09461695194207963, "learning_rate": 1.662740956386906e-05, "loss": 0.4493, "step": 1173 }, { "epoch": 0.685497408948252, "grad_norm": 0.08469855265932653, "learning_rate": 1.661977035979688e-05, "loss": 0.3997, "step": 1174 }, { "epoch": 0.6860813079337275, "grad_norm": 0.08729594346418, "learning_rate": 1.661212427301844e-05, "loss": 0.447, "step": 1175 }, { "epoch": 0.686665206919203, "grad_norm": 0.09393962333023911, "learning_rate": 1.6604471311483526e-05, "loss": 0.4468, "step": 1176 }, { "epoch": 0.6872491059046785, "grad_norm": 0.0984742699461733, "learning_rate": 1.6596811483149077e-05, "loss": 0.4141, "step": 1177 }, { "epoch": 0.687833004890154, "grad_norm": 0.08458368501348092, "learning_rate": 1.6589144795979165e-05, "loss": 0.4065, "step": 1178 }, { "epoch": 0.6884169038756295, "grad_norm": 0.08860267708770832, "learning_rate": 1.6581471257944996e-05, "loss": 0.3983, "step": 1179 }, { "epoch": 0.689000802861105, "grad_norm": 0.09393257979739777, "learning_rate": 1.6573790877024903e-05, "loss": 0.4251, "step": 1180 }, { "epoch": 0.6895847018465805, "grad_norm": 0.08002158892481911, "learning_rate": 1.656610366120433e-05, "loss": 0.3916, "step": 1181 }, { "epoch": 0.690168600832056, "grad_norm": 0.09365309919124495, "learning_rate": 1.6558409618475826e-05, "loss": 0.4322, "step": 1182 }, { "epoch": 0.6907524998175316, "grad_norm": 0.08812607364035864, "learning_rate": 1.655070875683904e-05, "loss": 0.4229, "step": 1183 }, { "epoch": 0.6913363988030071, "grad_norm": 0.09030502840583521, "learning_rate": 1.6543001084300703e-05, "loss": 0.4416, "step": 1184 }, { "epoch": 0.6919202977884826, "grad_norm": 0.09615740369378337, "learning_rate": 1.653528660887465e-05, "loss": 0.4426, "step": 1185 }, { "epoch": 0.6925041967739581, "grad_norm": 0.08280834804866166, "learning_rate": 1.652756533858176e-05, "loss": 0.3851, "step": 1186 }, { "epoch": 0.6930880957594336, "grad_norm": 0.08932665687846941, "learning_rate": 1.651983728145e-05, "loss": 0.4546, "step": 1187 }, { "epoch": 0.6936719947449091, "grad_norm": 0.09854543648329804, "learning_rate": 1.6512102445514376e-05, "loss": 0.4328, "step": 1188 }, { "epoch": 0.6942558937303847, "grad_norm": 0.08935449096841382, "learning_rate": 1.6504360838816956e-05, "loss": 0.4147, "step": 1189 }, { "epoch": 0.6948397927158602, "grad_norm": 0.10417024430140417, "learning_rate": 1.6496612469406835e-05, "loss": 0.4713, "step": 1190 }, { "epoch": 0.6954236917013357, "grad_norm": 0.08730367802928783, "learning_rate": 1.648885734534015e-05, "loss": 0.401, "step": 1191 }, { "epoch": 0.6960075906868112, "grad_norm": 0.08302124428938837, "learning_rate": 1.6481095474680062e-05, "loss": 0.3936, "step": 1192 }, { "epoch": 0.6965914896722867, "grad_norm": 0.10256741865775945, "learning_rate": 1.6473326865496736e-05, "loss": 0.4571, "step": 1193 }, { "epoch": 0.6971753886577622, "grad_norm": 0.09462818557160775, "learning_rate": 1.6465551525867347e-05, "loss": 0.4258, "step": 1194 }, { "epoch": 0.6977592876432377, "grad_norm": 0.08203177045385743, "learning_rate": 1.6457769463876078e-05, "loss": 0.4171, "step": 1195 }, { "epoch": 0.6983431866287132, "grad_norm": 0.09612430520506357, "learning_rate": 1.644998068761408e-05, "loss": 0.4482, "step": 1196 }, { "epoch": 0.6989270856141887, "grad_norm": 0.08257206127203164, "learning_rate": 1.6442185205179507e-05, "loss": 0.3897, "step": 1197 }, { "epoch": 0.6995109845996642, "grad_norm": 0.08196472405086475, "learning_rate": 1.6434383024677475e-05, "loss": 0.3794, "step": 1198 }, { "epoch": 0.7000948835851397, "grad_norm": 0.08579753038672487, "learning_rate": 1.6426574154220066e-05, "loss": 0.4498, "step": 1199 }, { "epoch": 0.7006787825706153, "grad_norm": 0.08670048727551322, "learning_rate": 1.6418758601926313e-05, "loss": 0.4009, "step": 1200 }, { "epoch": 0.7012626815560908, "grad_norm": 0.09060959575237004, "learning_rate": 1.64109363759222e-05, "loss": 0.4136, "step": 1201 }, { "epoch": 0.7018465805415663, "grad_norm": 0.09119045714202108, "learning_rate": 1.640310748434066e-05, "loss": 0.427, "step": 1202 }, { "epoch": 0.7024304795270419, "grad_norm": 0.0851500569234826, "learning_rate": 1.639527193532154e-05, "loss": 0.4514, "step": 1203 }, { "epoch": 0.7030143785125174, "grad_norm": 0.09132340612455887, "learning_rate": 1.6387429737011612e-05, "loss": 0.4652, "step": 1204 }, { "epoch": 0.7035982774979929, "grad_norm": 0.07599116361863376, "learning_rate": 1.6379580897564568e-05, "loss": 0.4187, "step": 1205 }, { "epoch": 0.7041821764834684, "grad_norm": 0.08772839683132387, "learning_rate": 1.6371725425141e-05, "loss": 0.4084, "step": 1206 }, { "epoch": 0.7047660754689439, "grad_norm": 0.09849928611901632, "learning_rate": 1.6363863327908405e-05, "loss": 0.4689, "step": 1207 }, { "epoch": 0.7053499744544194, "grad_norm": 0.08481656518162523, "learning_rate": 1.6355994614041154e-05, "loss": 0.446, "step": 1208 }, { "epoch": 0.7059338734398949, "grad_norm": 0.08370686796643127, "learning_rate": 1.6348119291720504e-05, "loss": 0.4172, "step": 1209 }, { "epoch": 0.7065177724253704, "grad_norm": 0.09555556372980296, "learning_rate": 1.634023736913459e-05, "loss": 0.449, "step": 1210 }, { "epoch": 0.7071016714108459, "grad_norm": 0.09256217509198646, "learning_rate": 1.6332348854478398e-05, "loss": 0.4469, "step": 1211 }, { "epoch": 0.7076855703963214, "grad_norm": 0.09022960584583223, "learning_rate": 1.6324453755953772e-05, "loss": 0.4188, "step": 1212 }, { "epoch": 0.7082694693817969, "grad_norm": 0.08852409036510736, "learning_rate": 1.6316552081769404e-05, "loss": 0.4351, "step": 1213 }, { "epoch": 0.7088533683672724, "grad_norm": 0.09872722876031141, "learning_rate": 1.630864384014083e-05, "loss": 0.4685, "step": 1214 }, { "epoch": 0.7094372673527479, "grad_norm": 0.08978038224694604, "learning_rate": 1.6300729039290386e-05, "loss": 0.4276, "step": 1215 }, { "epoch": 0.7100211663382235, "grad_norm": 0.08698724431770309, "learning_rate": 1.6292807687447258e-05, "loss": 0.3984, "step": 1216 }, { "epoch": 0.710605065323699, "grad_norm": 0.08915609576948105, "learning_rate": 1.6284879792847433e-05, "loss": 0.3857, "step": 1217 }, { "epoch": 0.7111889643091746, "grad_norm": 0.09932484427854986, "learning_rate": 1.62769453637337e-05, "loss": 0.4729, "step": 1218 }, { "epoch": 0.7117728632946501, "grad_norm": 0.08885005456797195, "learning_rate": 1.626900440835564e-05, "loss": 0.427, "step": 1219 }, { "epoch": 0.7123567622801256, "grad_norm": 0.09294733461285795, "learning_rate": 1.6261056934969626e-05, "loss": 0.4451, "step": 1220 }, { "epoch": 0.7129406612656011, "grad_norm": 0.08747387869122987, "learning_rate": 1.6253102951838794e-05, "loss": 0.4052, "step": 1221 }, { "epoch": 0.7135245602510766, "grad_norm": 0.08887234426541622, "learning_rate": 1.6245142467233067e-05, "loss": 0.4441, "step": 1222 }, { "epoch": 0.7141084592365521, "grad_norm": 0.08448350314665391, "learning_rate": 1.6237175489429114e-05, "loss": 0.408, "step": 1223 }, { "epoch": 0.7146923582220276, "grad_norm": 0.08519581906277944, "learning_rate": 1.6229202026710356e-05, "loss": 0.4184, "step": 1224 }, { "epoch": 0.7152762572075031, "grad_norm": 0.08132760788982994, "learning_rate": 1.622122208736697e-05, "loss": 0.4023, "step": 1225 }, { "epoch": 0.7158601561929786, "grad_norm": 0.08850631443436577, "learning_rate": 1.6213235679695847e-05, "loss": 0.4099, "step": 1226 }, { "epoch": 0.7164440551784541, "grad_norm": 0.09475556023537628, "learning_rate": 1.620524281200062e-05, "loss": 0.4008, "step": 1227 }, { "epoch": 0.7170279541639296, "grad_norm": 0.08739456797806017, "learning_rate": 1.6197243492591627e-05, "loss": 0.4274, "step": 1228 }, { "epoch": 0.7176118531494051, "grad_norm": 0.09237798472687199, "learning_rate": 1.618923772978592e-05, "loss": 0.4217, "step": 1229 }, { "epoch": 0.7181957521348806, "grad_norm": 0.07794131847117078, "learning_rate": 1.618122553190725e-05, "loss": 0.3963, "step": 1230 }, { "epoch": 0.7187796511203561, "grad_norm": 0.08728128674554213, "learning_rate": 1.617320690728606e-05, "loss": 0.4249, "step": 1231 }, { "epoch": 0.7193635501058316, "grad_norm": 0.08829126154455143, "learning_rate": 1.6165181864259463e-05, "loss": 0.4335, "step": 1232 }, { "epoch": 0.7199474490913073, "grad_norm": 0.08621243879482034, "learning_rate": 1.6157150411171268e-05, "loss": 0.4295, "step": 1233 }, { "epoch": 0.7205313480767828, "grad_norm": 0.08775106616468552, "learning_rate": 1.614911255637193e-05, "loss": 0.4594, "step": 1234 }, { "epoch": 0.7211152470622583, "grad_norm": 0.08636793819778124, "learning_rate": 1.6141068308218565e-05, "loss": 0.3959, "step": 1235 }, { "epoch": 0.7216991460477338, "grad_norm": 0.09451826913595979, "learning_rate": 1.6133017675074935e-05, "loss": 0.4078, "step": 1236 }, { "epoch": 0.7222830450332093, "grad_norm": 0.08599750808796036, "learning_rate": 1.6124960665311447e-05, "loss": 0.4299, "step": 1237 }, { "epoch": 0.7228669440186848, "grad_norm": 0.0787283777237191, "learning_rate": 1.6116897287305132e-05, "loss": 0.3972, "step": 1238 }, { "epoch": 0.7234508430041603, "grad_norm": 0.10025679382285599, "learning_rate": 1.6108827549439642e-05, "loss": 0.4759, "step": 1239 }, { "epoch": 0.7240347419896358, "grad_norm": 0.09167218396093588, "learning_rate": 1.6100751460105244e-05, "loss": 0.4597, "step": 1240 }, { "epoch": 0.7246186409751113, "grad_norm": 0.09988262064566529, "learning_rate": 1.6092669027698812e-05, "loss": 0.4089, "step": 1241 }, { "epoch": 0.7252025399605868, "grad_norm": 0.08346632831990498, "learning_rate": 1.6084580260623805e-05, "loss": 0.4035, "step": 1242 }, { "epoch": 0.7257864389460623, "grad_norm": 0.08813249686351898, "learning_rate": 1.6076485167290278e-05, "loss": 0.4123, "step": 1243 }, { "epoch": 0.7263703379315378, "grad_norm": 0.08648742350979079, "learning_rate": 1.6068383756114857e-05, "loss": 0.4307, "step": 1244 }, { "epoch": 0.7269542369170133, "grad_norm": 0.0918290777716453, "learning_rate": 1.606027603552074e-05, "loss": 0.4356, "step": 1245 }, { "epoch": 0.7275381359024888, "grad_norm": 0.08824334777297017, "learning_rate": 1.6052162013937688e-05, "loss": 0.4278, "step": 1246 }, { "epoch": 0.7281220348879643, "grad_norm": 0.08778958675251784, "learning_rate": 1.6044041699802005e-05, "loss": 0.3971, "step": 1247 }, { "epoch": 0.7287059338734398, "grad_norm": 0.08672516425714637, "learning_rate": 1.6035915101556544e-05, "loss": 0.4331, "step": 1248 }, { "epoch": 0.7292898328589154, "grad_norm": 0.08363831762540239, "learning_rate": 1.6027782227650696e-05, "loss": 0.4057, "step": 1249 }, { "epoch": 0.729873731844391, "grad_norm": 0.09135793245352399, "learning_rate": 1.601964308654036e-05, "loss": 0.4252, "step": 1250 }, { "epoch": 0.7304576308298665, "grad_norm": 0.08829604897843103, "learning_rate": 1.601149768668797e-05, "loss": 0.4277, "step": 1251 }, { "epoch": 0.731041529815342, "grad_norm": 0.08925848776057331, "learning_rate": 1.6003346036562457e-05, "loss": 0.4837, "step": 1252 }, { "epoch": 0.7316254288008175, "grad_norm": 0.0955907431237989, "learning_rate": 1.599518814463925e-05, "loss": 0.4516, "step": 1253 }, { "epoch": 0.732209327786293, "grad_norm": 0.09902710588751572, "learning_rate": 1.598702401940028e-05, "loss": 0.4402, "step": 1254 }, { "epoch": 0.7327932267717685, "grad_norm": 0.0930607189938524, "learning_rate": 1.5978853669333938e-05, "loss": 0.4372, "step": 1255 }, { "epoch": 0.733377125757244, "grad_norm": 0.10135873187714184, "learning_rate": 1.597067710293511e-05, "loss": 0.4622, "step": 1256 }, { "epoch": 0.7339610247427195, "grad_norm": 0.09720080872524585, "learning_rate": 1.5962494328705123e-05, "loss": 0.3714, "step": 1257 }, { "epoch": 0.734544923728195, "grad_norm": 0.09029811873139457, "learning_rate": 1.5954305355151775e-05, "loss": 0.4671, "step": 1258 }, { "epoch": 0.7351288227136705, "grad_norm": 0.09476815359149436, "learning_rate": 1.5946110190789306e-05, "loss": 0.4461, "step": 1259 }, { "epoch": 0.735712721699146, "grad_norm": 0.0939445859954543, "learning_rate": 1.5937908844138386e-05, "loss": 0.4475, "step": 1260 }, { "epoch": 0.7362966206846215, "grad_norm": 0.08813005659388479, "learning_rate": 1.5929701323726113e-05, "loss": 0.469, "step": 1261 }, { "epoch": 0.736880519670097, "grad_norm": 0.08851048700493197, "learning_rate": 1.5921487638086024e-05, "loss": 0.4199, "step": 1262 }, { "epoch": 0.7374644186555726, "grad_norm": 0.08753742509076408, "learning_rate": 1.5913267795758037e-05, "loss": 0.4422, "step": 1263 }, { "epoch": 0.7380483176410481, "grad_norm": 0.0997587460279902, "learning_rate": 1.590504180528849e-05, "loss": 0.4416, "step": 1264 }, { "epoch": 0.7386322166265236, "grad_norm": 0.08568524321420433, "learning_rate": 1.5896809675230106e-05, "loss": 0.423, "step": 1265 }, { "epoch": 0.7392161156119991, "grad_norm": 0.08586208313871399, "learning_rate": 1.5888571414141997e-05, "loss": 0.392, "step": 1266 }, { "epoch": 0.7398000145974747, "grad_norm": 0.09156438870610967, "learning_rate": 1.588032703058964e-05, "loss": 0.4187, "step": 1267 }, { "epoch": 0.7403839135829502, "grad_norm": 0.09392776935667396, "learning_rate": 1.587207653314489e-05, "loss": 0.4321, "step": 1268 }, { "epoch": 0.7409678125684257, "grad_norm": 0.09832568470029261, "learning_rate": 1.586381993038595e-05, "loss": 0.4275, "step": 1269 }, { "epoch": 0.7415517115539012, "grad_norm": 0.08424999659475145, "learning_rate": 1.5855557230897373e-05, "loss": 0.4207, "step": 1270 }, { "epoch": 0.7421356105393767, "grad_norm": 0.0904527077631291, "learning_rate": 1.584728844327005e-05, "loss": 0.387, "step": 1271 }, { "epoch": 0.7427195095248522, "grad_norm": 0.09190319183337192, "learning_rate": 1.5839013576101206e-05, "loss": 0.4038, "step": 1272 }, { "epoch": 0.7433034085103277, "grad_norm": 0.08870346358113178, "learning_rate": 1.5830732637994382e-05, "loss": 0.4225, "step": 1273 }, { "epoch": 0.7438873074958032, "grad_norm": 0.09349856731248189, "learning_rate": 1.5822445637559435e-05, "loss": 0.4318, "step": 1274 }, { "epoch": 0.7444712064812787, "grad_norm": 0.09304408419902975, "learning_rate": 1.581415258341252e-05, "loss": 0.4215, "step": 1275 }, { "epoch": 0.7450551054667542, "grad_norm": 0.08393960324349085, "learning_rate": 1.5805853484176093e-05, "loss": 0.4265, "step": 1276 }, { "epoch": 0.7456390044522297, "grad_norm": 0.08804047636775532, "learning_rate": 1.5797548348478893e-05, "loss": 0.433, "step": 1277 }, { "epoch": 0.7462229034377053, "grad_norm": 0.0867718962925606, "learning_rate": 1.578923718495593e-05, "loss": 0.4336, "step": 1278 }, { "epoch": 0.7468068024231808, "grad_norm": 0.09192482928690912, "learning_rate": 1.5780920002248484e-05, "loss": 0.4529, "step": 1279 }, { "epoch": 0.7473907014086563, "grad_norm": 0.0809551373315767, "learning_rate": 1.5772596809004103e-05, "loss": 0.3996, "step": 1280 }, { "epoch": 0.7479746003941318, "grad_norm": 0.08259999109361353, "learning_rate": 1.5764267613876565e-05, "loss": 0.384, "step": 1281 }, { "epoch": 0.7485584993796073, "grad_norm": 0.0880165257836814, "learning_rate": 1.5755932425525907e-05, "loss": 0.4298, "step": 1282 }, { "epoch": 0.7491423983650829, "grad_norm": 0.08450577809049592, "learning_rate": 1.574759125261838e-05, "loss": 0.4088, "step": 1283 }, { "epoch": 0.7497262973505584, "grad_norm": 0.08407292941094657, "learning_rate": 1.573924410382648e-05, "loss": 0.4123, "step": 1284 }, { "epoch": 0.7503101963360339, "grad_norm": 0.09873075668911409, "learning_rate": 1.5730890987828893e-05, "loss": 0.4659, "step": 1285 }, { "epoch": 0.7508940953215094, "grad_norm": 0.09059640335636625, "learning_rate": 1.5722531913310523e-05, "loss": 0.4383, "step": 1286 }, { "epoch": 0.7514779943069849, "grad_norm": 0.08583923477135386, "learning_rate": 1.571416688896246e-05, "loss": 0.4002, "step": 1287 }, { "epoch": 0.7520618932924604, "grad_norm": 0.08959942826021465, "learning_rate": 1.5705795923481995e-05, "loss": 0.3505, "step": 1288 }, { "epoch": 0.7526457922779359, "grad_norm": 0.09115734889975004, "learning_rate": 1.5697419025572577e-05, "loss": 0.4532, "step": 1289 }, { "epoch": 0.7532296912634114, "grad_norm": 0.09194478499269917, "learning_rate": 1.5689036203943836e-05, "loss": 0.4414, "step": 1290 }, { "epoch": 0.7538135902488869, "grad_norm": 0.09854116371700462, "learning_rate": 1.568064746731156e-05, "loss": 0.4162, "step": 1291 }, { "epoch": 0.7543974892343625, "grad_norm": 0.10034928569372004, "learning_rate": 1.5672252824397683e-05, "loss": 0.4453, "step": 1292 }, { "epoch": 0.754981388219838, "grad_norm": 0.09536719175770134, "learning_rate": 1.5663852283930275e-05, "loss": 0.4344, "step": 1293 }, { "epoch": 0.7555652872053135, "grad_norm": 0.10384597144686054, "learning_rate": 1.5655445854643554e-05, "loss": 0.5513, "step": 1294 }, { "epoch": 0.756149186190789, "grad_norm": 0.08760945735655072, "learning_rate": 1.5647033545277847e-05, "loss": 0.4239, "step": 1295 }, { "epoch": 0.7567330851762645, "grad_norm": 0.08669681523159688, "learning_rate": 1.56386153645796e-05, "loss": 0.4365, "step": 1296 }, { "epoch": 0.75731698416174, "grad_norm": 0.08937619978831837, "learning_rate": 1.563019132130136e-05, "loss": 0.4021, "step": 1297 }, { "epoch": 0.7579008831472155, "grad_norm": 0.08550419489892074, "learning_rate": 1.562176142420177e-05, "loss": 0.4009, "step": 1298 }, { "epoch": 0.758484782132691, "grad_norm": 0.08294037120877863, "learning_rate": 1.5613325682045563e-05, "loss": 0.4078, "step": 1299 }, { "epoch": 0.7590686811181666, "grad_norm": 0.08665473070393541, "learning_rate": 1.5604884103603547e-05, "loss": 0.4291, "step": 1300 }, { "epoch": 0.7596525801036421, "grad_norm": 0.0803171387700432, "learning_rate": 1.55964366976526e-05, "loss": 0.4145, "step": 1301 }, { "epoch": 0.7602364790891176, "grad_norm": 0.08759857666703846, "learning_rate": 1.5587983472975653e-05, "loss": 0.4091, "step": 1302 }, { "epoch": 0.7608203780745931, "grad_norm": 0.0870566000849975, "learning_rate": 1.5579524438361693e-05, "loss": 0.3753, "step": 1303 }, { "epoch": 0.7614042770600686, "grad_norm": 0.09349436589305658, "learning_rate": 1.5571059602605746e-05, "loss": 0.4585, "step": 1304 }, { "epoch": 0.7619881760455441, "grad_norm": 0.08616502357478434, "learning_rate": 1.556258897450887e-05, "loss": 0.4351, "step": 1305 }, { "epoch": 0.7625720750310196, "grad_norm": 0.0802365133471683, "learning_rate": 1.5554112562878144e-05, "loss": 0.3906, "step": 1306 }, { "epoch": 0.7631559740164952, "grad_norm": 0.08668763504368514, "learning_rate": 1.5545630376526665e-05, "loss": 0.4104, "step": 1307 }, { "epoch": 0.7637398730019707, "grad_norm": 0.09209462879835686, "learning_rate": 1.553714242427352e-05, "loss": 0.4323, "step": 1308 }, { "epoch": 0.7643237719874462, "grad_norm": 0.08619482285796787, "learning_rate": 1.5528648714943807e-05, "loss": 0.4306, "step": 1309 }, { "epoch": 0.7649076709729217, "grad_norm": 0.08805069475457378, "learning_rate": 1.5520149257368608e-05, "loss": 0.4254, "step": 1310 }, { "epoch": 0.7654915699583972, "grad_norm": 0.08286445170869258, "learning_rate": 1.5511644060384968e-05, "loss": 0.4053, "step": 1311 }, { "epoch": 0.7660754689438727, "grad_norm": 0.08545851821729031, "learning_rate": 1.5503133132835916e-05, "loss": 0.4266, "step": 1312 }, { "epoch": 0.7666593679293482, "grad_norm": 0.08886853190115596, "learning_rate": 1.5494616483570428e-05, "loss": 0.4208, "step": 1313 }, { "epoch": 0.7672432669148237, "grad_norm": 0.07984509291539149, "learning_rate": 1.5486094121443434e-05, "loss": 0.3889, "step": 1314 }, { "epoch": 0.7678271659002992, "grad_norm": 0.08535705700702137, "learning_rate": 1.5477566055315808e-05, "loss": 0.4205, "step": 1315 }, { "epoch": 0.7684110648857747, "grad_norm": 0.08457923054787957, "learning_rate": 1.5469032294054336e-05, "loss": 0.433, "step": 1316 }, { "epoch": 0.7689949638712503, "grad_norm": 0.07949166104173624, "learning_rate": 1.5460492846531748e-05, "loss": 0.4098, "step": 1317 }, { "epoch": 0.7695788628567258, "grad_norm": 0.08664521574317473, "learning_rate": 1.5451947721626676e-05, "loss": 0.4417, "step": 1318 }, { "epoch": 0.7701627618422013, "grad_norm": 0.08611472888430773, "learning_rate": 1.5443396928223655e-05, "loss": 0.4157, "step": 1319 }, { "epoch": 0.7707466608276768, "grad_norm": 0.0831183248888501, "learning_rate": 1.5434840475213113e-05, "loss": 0.4183, "step": 1320 }, { "epoch": 0.7713305598131523, "grad_norm": 0.09084182060020743, "learning_rate": 1.5426278371491363e-05, "loss": 0.3867, "step": 1321 }, { "epoch": 0.7719144587986279, "grad_norm": 0.09187640379499179, "learning_rate": 1.5417710625960598e-05, "loss": 0.4152, "step": 1322 }, { "epoch": 0.7724983577841034, "grad_norm": 0.08763457600890916, "learning_rate": 1.5409137247528868e-05, "loss": 0.4118, "step": 1323 }, { "epoch": 0.7730822567695789, "grad_norm": 0.09181207040271494, "learning_rate": 1.5400558245110083e-05, "loss": 0.3981, "step": 1324 }, { "epoch": 0.7736661557550544, "grad_norm": 0.0863497019612279, "learning_rate": 1.5391973627624004e-05, "loss": 0.4346, "step": 1325 }, { "epoch": 0.7742500547405299, "grad_norm": 0.08872109348993765, "learning_rate": 1.538338340399623e-05, "loss": 0.3843, "step": 1326 }, { "epoch": 0.7748339537260054, "grad_norm": 0.09003511913195769, "learning_rate": 1.5374787583158188e-05, "loss": 0.449, "step": 1327 }, { "epoch": 0.7754178527114809, "grad_norm": 0.09114382063145272, "learning_rate": 1.5366186174047114e-05, "loss": 0.453, "step": 1328 }, { "epoch": 0.7760017516969564, "grad_norm": 0.0881202516425848, "learning_rate": 1.535757918560607e-05, "loss": 0.4485, "step": 1329 }, { "epoch": 0.7765856506824319, "grad_norm": 0.11501422178032314, "learning_rate": 1.534896662678391e-05, "loss": 0.4883, "step": 1330 }, { "epoch": 0.7771695496679074, "grad_norm": 0.0940717991749434, "learning_rate": 1.534034850653528e-05, "loss": 0.4341, "step": 1331 }, { "epoch": 0.7777534486533829, "grad_norm": 0.09015345848590105, "learning_rate": 1.533172483382062e-05, "loss": 0.3927, "step": 1332 }, { "epoch": 0.7783373476388584, "grad_norm": 0.0829770224844404, "learning_rate": 1.532309561760612e-05, "loss": 0.4149, "step": 1333 }, { "epoch": 0.778921246624334, "grad_norm": 0.08797410331090069, "learning_rate": 1.5314460866863758e-05, "loss": 0.4415, "step": 1334 }, { "epoch": 0.7795051456098095, "grad_norm": 0.08089348548960931, "learning_rate": 1.530582059057125e-05, "loss": 0.3667, "step": 1335 }, { "epoch": 0.780089044595285, "grad_norm": 0.09152630950746926, "learning_rate": 1.5297174797712057e-05, "loss": 0.4325, "step": 1336 }, { "epoch": 0.7806729435807606, "grad_norm": 0.10324102697627108, "learning_rate": 1.5288523497275392e-05, "loss": 0.4245, "step": 1337 }, { "epoch": 0.7812568425662361, "grad_norm": 0.09428770682214972, "learning_rate": 1.5279866698256177e-05, "loss": 0.4643, "step": 1338 }, { "epoch": 0.7818407415517116, "grad_norm": 0.08377843387116723, "learning_rate": 1.5271204409655055e-05, "loss": 0.3851, "step": 1339 }, { "epoch": 0.7824246405371871, "grad_norm": 0.08573154118615837, "learning_rate": 1.5262536640478386e-05, "loss": 0.3853, "step": 1340 }, { "epoch": 0.7830085395226626, "grad_norm": 0.09575641891982671, "learning_rate": 1.5253863399738218e-05, "loss": 0.4129, "step": 1341 }, { "epoch": 0.7835924385081381, "grad_norm": 0.08279045687933452, "learning_rate": 1.5245184696452286e-05, "loss": 0.397, "step": 1342 }, { "epoch": 0.7841763374936136, "grad_norm": 0.09152224895199507, "learning_rate": 1.5236500539644015e-05, "loss": 0.485, "step": 1343 }, { "epoch": 0.7847602364790891, "grad_norm": 0.08878122712107336, "learning_rate": 1.5227810938342493e-05, "loss": 0.4459, "step": 1344 }, { "epoch": 0.7853441354645646, "grad_norm": 0.08629837452601873, "learning_rate": 1.5219115901582471e-05, "loss": 0.4123, "step": 1345 }, { "epoch": 0.7859280344500401, "grad_norm": 0.08481067692768189, "learning_rate": 1.5210415438404354e-05, "loss": 0.4344, "step": 1346 }, { "epoch": 0.7865119334355156, "grad_norm": 0.09045579003024676, "learning_rate": 1.5201709557854178e-05, "loss": 0.4827, "step": 1347 }, { "epoch": 0.7870958324209911, "grad_norm": 0.09194199412810682, "learning_rate": 1.5192998268983625e-05, "loss": 0.4464, "step": 1348 }, { "epoch": 0.7876797314064666, "grad_norm": 0.08253545702261872, "learning_rate": 1.5184281580849999e-05, "loss": 0.4038, "step": 1349 }, { "epoch": 0.7882636303919422, "grad_norm": 0.08858156612448734, "learning_rate": 1.51755595025162e-05, "loss": 0.4224, "step": 1350 }, { "epoch": 0.7888475293774178, "grad_norm": 0.07679836495712918, "learning_rate": 1.5166832043050757e-05, "loss": 0.3752, "step": 1351 }, { "epoch": 0.7894314283628933, "grad_norm": 0.08411426857425848, "learning_rate": 1.5158099211527776e-05, "loss": 0.4081, "step": 1352 }, { "epoch": 0.7900153273483688, "grad_norm": 0.08140758146514929, "learning_rate": 1.5149361017026957e-05, "loss": 0.4108, "step": 1353 }, { "epoch": 0.7905992263338443, "grad_norm": 0.08956772263698928, "learning_rate": 1.5140617468633579e-05, "loss": 0.4347, "step": 1354 }, { "epoch": 0.7911831253193198, "grad_norm": 0.09300466510593045, "learning_rate": 1.513186857543847e-05, "loss": 0.4624, "step": 1355 }, { "epoch": 0.7917670243047953, "grad_norm": 0.08683991304475512, "learning_rate": 1.5123114346538037e-05, "loss": 0.4071, "step": 1356 }, { "epoch": 0.7923509232902708, "grad_norm": 0.07943291302839554, "learning_rate": 1.5114354791034225e-05, "loss": 0.4185, "step": 1357 }, { "epoch": 0.7929348222757463, "grad_norm": 0.09303702824298649, "learning_rate": 1.5105589918034511e-05, "loss": 0.4189, "step": 1358 }, { "epoch": 0.7935187212612218, "grad_norm": 0.09245565009027991, "learning_rate": 1.5096819736651913e-05, "loss": 0.4388, "step": 1359 }, { "epoch": 0.7941026202466973, "grad_norm": 0.08827007606506819, "learning_rate": 1.5088044256004958e-05, "loss": 0.4358, "step": 1360 }, { "epoch": 0.7946865192321728, "grad_norm": 0.09722443846142835, "learning_rate": 1.5079263485217693e-05, "loss": 0.4502, "step": 1361 }, { "epoch": 0.7952704182176483, "grad_norm": 0.08937265195066713, "learning_rate": 1.507047743341965e-05, "loss": 0.4066, "step": 1362 }, { "epoch": 0.7958543172031238, "grad_norm": 0.08700159964143424, "learning_rate": 1.506168610974587e-05, "loss": 0.4384, "step": 1363 }, { "epoch": 0.7964382161885993, "grad_norm": 0.0842064339769952, "learning_rate": 1.505288952333686e-05, "loss": 0.4122, "step": 1364 }, { "epoch": 0.7970221151740748, "grad_norm": 0.09292534036303671, "learning_rate": 1.5044087683338609e-05, "loss": 0.4486, "step": 1365 }, { "epoch": 0.7976060141595503, "grad_norm": 0.08412915100389641, "learning_rate": 1.5035280598902557e-05, "loss": 0.4005, "step": 1366 }, { "epoch": 0.798189913145026, "grad_norm": 0.0813385888079018, "learning_rate": 1.5026468279185615e-05, "loss": 0.4092, "step": 1367 }, { "epoch": 0.7987738121305015, "grad_norm": 0.08721261666929865, "learning_rate": 1.5017650733350121e-05, "loss": 0.4348, "step": 1368 }, { "epoch": 0.799357711115977, "grad_norm": 0.09925986048772169, "learning_rate": 1.5008827970563848e-05, "loss": 0.5097, "step": 1369 }, { "epoch": 0.7999416101014525, "grad_norm": 0.08286020134197015, "learning_rate": 1.5000000000000002e-05, "loss": 0.4381, "step": 1370 }, { "epoch": 0.800525509086928, "grad_norm": 0.08307566420758404, "learning_rate": 1.4991166830837198e-05, "loss": 0.4069, "step": 1371 }, { "epoch": 0.8011094080724035, "grad_norm": 0.09324156861247193, "learning_rate": 1.4982328472259453e-05, "loss": 0.3881, "step": 1372 }, { "epoch": 0.801693307057879, "grad_norm": 0.09220236149973084, "learning_rate": 1.4973484933456191e-05, "loss": 0.4345, "step": 1373 }, { "epoch": 0.8022772060433545, "grad_norm": 0.0937621584569041, "learning_rate": 1.4964636223622206e-05, "loss": 0.4915, "step": 1374 }, { "epoch": 0.80286110502883, "grad_norm": 0.07830019336326005, "learning_rate": 1.4955782351957681e-05, "loss": 0.3715, "step": 1375 }, { "epoch": 0.8034450040143055, "grad_norm": 0.0905569978059794, "learning_rate": 1.4946923327668164e-05, "loss": 0.4048, "step": 1376 }, { "epoch": 0.804028902999781, "grad_norm": 0.0825139138900073, "learning_rate": 1.4938059159964555e-05, "loss": 0.4084, "step": 1377 }, { "epoch": 0.8046128019852565, "grad_norm": 0.08253598687384921, "learning_rate": 1.4929189858063103e-05, "loss": 0.3757, "step": 1378 }, { "epoch": 0.805196700970732, "grad_norm": 0.09108069478034568, "learning_rate": 1.4920315431185398e-05, "loss": 0.4323, "step": 1379 }, { "epoch": 0.8057805999562075, "grad_norm": 0.08862761207779671, "learning_rate": 1.4911435888558356e-05, "loss": 0.457, "step": 1380 }, { "epoch": 0.806364498941683, "grad_norm": 0.08508241174681722, "learning_rate": 1.4902551239414218e-05, "loss": 0.4159, "step": 1381 }, { "epoch": 0.8069483979271586, "grad_norm": 0.0831606904502807, "learning_rate": 1.4893661492990527e-05, "loss": 0.4387, "step": 1382 }, { "epoch": 0.8075322969126341, "grad_norm": 0.08572651429386642, "learning_rate": 1.4884766658530126e-05, "loss": 0.4299, "step": 1383 }, { "epoch": 0.8081161958981097, "grad_norm": 0.08760937782953557, "learning_rate": 1.487586674528115e-05, "loss": 0.4028, "step": 1384 }, { "epoch": 0.8087000948835852, "grad_norm": 0.090437414318819, "learning_rate": 1.4866961762497018e-05, "loss": 0.4245, "step": 1385 }, { "epoch": 0.8092839938690607, "grad_norm": 0.08994912835171977, "learning_rate": 1.4858051719436418e-05, "loss": 0.3876, "step": 1386 }, { "epoch": 0.8098678928545362, "grad_norm": 0.08816595380581996, "learning_rate": 1.4849136625363297e-05, "loss": 0.3876, "step": 1387 }, { "epoch": 0.8104517918400117, "grad_norm": 0.09242904228616854, "learning_rate": 1.484021648954685e-05, "loss": 0.4233, "step": 1388 }, { "epoch": 0.8110356908254872, "grad_norm": 0.08588688791632759, "learning_rate": 1.4831291321261523e-05, "loss": 0.4611, "step": 1389 }, { "epoch": 0.8116195898109627, "grad_norm": 0.08941913374681694, "learning_rate": 1.4822361129786992e-05, "loss": 0.4526, "step": 1390 }, { "epoch": 0.8122034887964382, "grad_norm": 0.08684324646942568, "learning_rate": 1.4813425924408151e-05, "loss": 0.3956, "step": 1391 }, { "epoch": 0.8127873877819137, "grad_norm": 0.08118924991140372, "learning_rate": 1.480448571441511e-05, "loss": 0.4224, "step": 1392 }, { "epoch": 0.8133712867673892, "grad_norm": 0.08967133703315873, "learning_rate": 1.4795540509103182e-05, "loss": 0.4353, "step": 1393 }, { "epoch": 0.8139551857528647, "grad_norm": 0.08379596799631553, "learning_rate": 1.4786590317772875e-05, "loss": 0.3827, "step": 1394 }, { "epoch": 0.8145390847383402, "grad_norm": 0.08798613825758807, "learning_rate": 1.4777635149729878e-05, "loss": 0.408, "step": 1395 }, { "epoch": 0.8151229837238158, "grad_norm": 0.08435727321506858, "learning_rate": 1.4768675014285063e-05, "loss": 0.3913, "step": 1396 }, { "epoch": 0.8157068827092913, "grad_norm": 0.08247485781890371, "learning_rate": 1.4759709920754453e-05, "loss": 0.4533, "step": 1397 }, { "epoch": 0.8162907816947668, "grad_norm": 0.0814820642043294, "learning_rate": 1.4750739878459233e-05, "loss": 0.402, "step": 1398 }, { "epoch": 0.8168746806802423, "grad_norm": 0.0845887271453071, "learning_rate": 1.4741764896725736e-05, "loss": 0.391, "step": 1399 }, { "epoch": 0.8174585796657178, "grad_norm": 0.08667274883226213, "learning_rate": 1.473278498488543e-05, "loss": 0.4296, "step": 1400 }, { "epoch": 0.8180424786511934, "grad_norm": 0.09124206438622581, "learning_rate": 1.4723800152274905e-05, "loss": 0.4428, "step": 1401 }, { "epoch": 0.8186263776366689, "grad_norm": 0.08055373560697633, "learning_rate": 1.471481040823587e-05, "loss": 0.4179, "step": 1402 }, { "epoch": 0.8192102766221444, "grad_norm": 0.07998737286818845, "learning_rate": 1.4705815762115138e-05, "loss": 0.4054, "step": 1403 }, { "epoch": 0.8197941756076199, "grad_norm": 0.08460139524314694, "learning_rate": 1.4696816223264622e-05, "loss": 0.3638, "step": 1404 }, { "epoch": 0.8203780745930954, "grad_norm": 0.09043672227921956, "learning_rate": 1.4687811801041323e-05, "loss": 0.4271, "step": 1405 }, { "epoch": 0.8209619735785709, "grad_norm": 0.0844023209236629, "learning_rate": 1.4678802504807313e-05, "loss": 0.4025, "step": 1406 }, { "epoch": 0.8215458725640464, "grad_norm": 0.0809111117202899, "learning_rate": 1.4669788343929736e-05, "loss": 0.4287, "step": 1407 }, { "epoch": 0.8221297715495219, "grad_norm": 0.07864548765059197, "learning_rate": 1.4660769327780796e-05, "loss": 0.3956, "step": 1408 }, { "epoch": 0.8227136705349974, "grad_norm": 0.09687927127439569, "learning_rate": 1.465174546573774e-05, "loss": 0.471, "step": 1409 }, { "epoch": 0.823297569520473, "grad_norm": 0.08705934812197344, "learning_rate": 1.4642716767182858e-05, "loss": 0.3952, "step": 1410 }, { "epoch": 0.8238814685059485, "grad_norm": 0.0884742359509923, "learning_rate": 1.4633683241503464e-05, "loss": 0.4413, "step": 1411 }, { "epoch": 0.824465367491424, "grad_norm": 0.08491159644735177, "learning_rate": 1.4624644898091898e-05, "loss": 0.3797, "step": 1412 }, { "epoch": 0.8250492664768995, "grad_norm": 0.08833740426936032, "learning_rate": 1.4615601746345501e-05, "loss": 0.3941, "step": 1413 }, { "epoch": 0.825633165462375, "grad_norm": 0.07877848069981136, "learning_rate": 1.4606553795666616e-05, "loss": 0.3816, "step": 1414 }, { "epoch": 0.8262170644478505, "grad_norm": 0.08647547973414688, "learning_rate": 1.4597501055462577e-05, "loss": 0.3974, "step": 1415 }, { "epoch": 0.826800963433326, "grad_norm": 0.08783121235915899, "learning_rate": 1.45884435351457e-05, "loss": 0.4034, "step": 1416 }, { "epoch": 0.8273848624188015, "grad_norm": 0.08579060992903398, "learning_rate": 1.4579381244133265e-05, "loss": 0.4201, "step": 1417 }, { "epoch": 0.8279687614042771, "grad_norm": 0.08643257478114853, "learning_rate": 1.457031419184752e-05, "loss": 0.4584, "step": 1418 }, { "epoch": 0.8285526603897526, "grad_norm": 0.08514069283083285, "learning_rate": 1.4561242387715652e-05, "loss": 0.4052, "step": 1419 }, { "epoch": 0.8291365593752281, "grad_norm": 0.08744048343146647, "learning_rate": 1.45521658411698e-05, "loss": 0.4317, "step": 1420 }, { "epoch": 0.8297204583607036, "grad_norm": 0.08574941797126834, "learning_rate": 1.4543084561647028e-05, "loss": 0.441, "step": 1421 }, { "epoch": 0.8303043573461791, "grad_norm": 0.08562094029310764, "learning_rate": 1.4533998558589319e-05, "loss": 0.3685, "step": 1422 }, { "epoch": 0.8308882563316546, "grad_norm": 0.08905499050405688, "learning_rate": 1.4524907841443576e-05, "loss": 0.4132, "step": 1423 }, { "epoch": 0.8314721553171301, "grad_norm": 0.08273697932431588, "learning_rate": 1.4515812419661595e-05, "loss": 0.3802, "step": 1424 }, { "epoch": 0.8320560543026057, "grad_norm": 0.08278290658405586, "learning_rate": 1.4506712302700064e-05, "loss": 0.3874, "step": 1425 }, { "epoch": 0.8326399532880812, "grad_norm": 0.09594355666976516, "learning_rate": 1.4497607500020556e-05, "loss": 0.3925, "step": 1426 }, { "epoch": 0.8332238522735567, "grad_norm": 0.09195131351309689, "learning_rate": 1.4488498021089514e-05, "loss": 0.409, "step": 1427 }, { "epoch": 0.8338077512590322, "grad_norm": 0.08266594200871114, "learning_rate": 1.4479383875378245e-05, "loss": 0.4069, "step": 1428 }, { "epoch": 0.8343916502445077, "grad_norm": 0.08716242610704239, "learning_rate": 1.4470265072362906e-05, "loss": 0.414, "step": 1429 }, { "epoch": 0.8349755492299832, "grad_norm": 0.0863252725935302, "learning_rate": 1.4461141621524498e-05, "loss": 0.4254, "step": 1430 }, { "epoch": 0.8355594482154587, "grad_norm": 0.09114147268708127, "learning_rate": 1.4452013532348849e-05, "loss": 0.4607, "step": 1431 }, { "epoch": 0.8361433472009342, "grad_norm": 0.08374017118928433, "learning_rate": 1.444288081432662e-05, "loss": 0.3864, "step": 1432 }, { "epoch": 0.8367272461864097, "grad_norm": 0.08857848116516749, "learning_rate": 1.443374347695328e-05, "loss": 0.4479, "step": 1433 }, { "epoch": 0.8373111451718853, "grad_norm": 0.08463229609049736, "learning_rate": 1.442460152972909e-05, "loss": 0.3854, "step": 1434 }, { "epoch": 0.8378950441573608, "grad_norm": 0.09124701030341152, "learning_rate": 1.4415454982159121e-05, "loss": 0.4155, "step": 1435 }, { "epoch": 0.8384789431428363, "grad_norm": 0.08810020445357164, "learning_rate": 1.4406303843753215e-05, "loss": 0.4556, "step": 1436 }, { "epoch": 0.8390628421283118, "grad_norm": 0.08761731937862337, "learning_rate": 1.4397148124025997e-05, "loss": 0.4499, "step": 1437 }, { "epoch": 0.8396467411137873, "grad_norm": 0.08201678760546088, "learning_rate": 1.4387987832496848e-05, "loss": 0.4155, "step": 1438 }, { "epoch": 0.8402306400992628, "grad_norm": 0.09020973103741615, "learning_rate": 1.4378822978689901e-05, "loss": 0.393, "step": 1439 }, { "epoch": 0.8408145390847384, "grad_norm": 0.08171044241068305, "learning_rate": 1.436965357213404e-05, "loss": 0.4032, "step": 1440 }, { "epoch": 0.8413984380702139, "grad_norm": 0.0852667422652595, "learning_rate": 1.4360479622362877e-05, "loss": 0.4168, "step": 1441 }, { "epoch": 0.8419823370556894, "grad_norm": 0.08719035470414205, "learning_rate": 1.4351301138914749e-05, "loss": 0.4167, "step": 1442 }, { "epoch": 0.8425662360411649, "grad_norm": 0.08425320713960051, "learning_rate": 1.4342118131332704e-05, "loss": 0.3685, "step": 1443 }, { "epoch": 0.8431501350266404, "grad_norm": 0.08493148408030515, "learning_rate": 1.4332930609164503e-05, "loss": 0.3778, "step": 1444 }, { "epoch": 0.8437340340121159, "grad_norm": 0.0820332402132783, "learning_rate": 1.4323738581962593e-05, "loss": 0.4244, "step": 1445 }, { "epoch": 0.8443179329975914, "grad_norm": 0.08302240301586115, "learning_rate": 1.4314542059284102e-05, "loss": 0.4222, "step": 1446 }, { "epoch": 0.8449018319830669, "grad_norm": 0.0845248983908111, "learning_rate": 1.4305341050690845e-05, "loss": 0.4505, "step": 1447 }, { "epoch": 0.8454857309685424, "grad_norm": 0.10068392714166438, "learning_rate": 1.429613556574928e-05, "loss": 0.472, "step": 1448 }, { "epoch": 0.8460696299540179, "grad_norm": 0.08670738955071491, "learning_rate": 1.4286925614030542e-05, "loss": 0.4171, "step": 1449 }, { "epoch": 0.8466535289394934, "grad_norm": 0.0865504715400236, "learning_rate": 1.4277711205110398e-05, "loss": 0.418, "step": 1450 }, { "epoch": 0.847237427924969, "grad_norm": 0.08380994210829945, "learning_rate": 1.4268492348569252e-05, "loss": 0.3982, "step": 1451 }, { "epoch": 0.8478213269104445, "grad_norm": 0.09009846295217178, "learning_rate": 1.425926905399213e-05, "loss": 0.4064, "step": 1452 }, { "epoch": 0.84840522589592, "grad_norm": 0.08918768018840316, "learning_rate": 1.4250041330968674e-05, "loss": 0.4352, "step": 1453 }, { "epoch": 0.8489891248813956, "grad_norm": 0.08787025789406903, "learning_rate": 1.424080918909313e-05, "loss": 0.4625, "step": 1454 }, { "epoch": 0.8495730238668711, "grad_norm": 0.08945864852921918, "learning_rate": 1.4231572637964338e-05, "loss": 0.4278, "step": 1455 }, { "epoch": 0.8501569228523466, "grad_norm": 0.08266966583321876, "learning_rate": 1.4222331687185723e-05, "loss": 0.4109, "step": 1456 }, { "epoch": 0.8507408218378221, "grad_norm": 0.0919710571208377, "learning_rate": 1.421308634636529e-05, "loss": 0.3824, "step": 1457 }, { "epoch": 0.8513247208232976, "grad_norm": 0.09992265571052476, "learning_rate": 1.4203836625115595e-05, "loss": 0.4425, "step": 1458 }, { "epoch": 0.8519086198087731, "grad_norm": 0.08223280035160803, "learning_rate": 1.419458253305376e-05, "loss": 0.4053, "step": 1459 }, { "epoch": 0.8524925187942486, "grad_norm": 0.08233281747736411, "learning_rate": 1.4185324079801447e-05, "loss": 0.4148, "step": 1460 }, { "epoch": 0.8530764177797241, "grad_norm": 0.0856836142941745, "learning_rate": 1.4176061274984858e-05, "loss": 0.4271, "step": 1461 }, { "epoch": 0.8536603167651996, "grad_norm": 0.09454380822914647, "learning_rate": 1.4166794128234705e-05, "loss": 0.3996, "step": 1462 }, { "epoch": 0.8542442157506751, "grad_norm": 0.08027563814205076, "learning_rate": 1.415752264918623e-05, "loss": 0.4033, "step": 1463 }, { "epoch": 0.8548281147361506, "grad_norm": 0.0939381774602264, "learning_rate": 1.4148246847479173e-05, "loss": 0.4088, "step": 1464 }, { "epoch": 0.8554120137216261, "grad_norm": 0.08534120334725011, "learning_rate": 1.4138966732757766e-05, "loss": 0.4016, "step": 1465 }, { "epoch": 0.8559959127071016, "grad_norm": 0.08069367450293832, "learning_rate": 1.4129682314670731e-05, "loss": 0.4102, "step": 1466 }, { "epoch": 0.8565798116925771, "grad_norm": 0.08435007660216932, "learning_rate": 1.412039360287126e-05, "loss": 0.3918, "step": 1467 }, { "epoch": 0.8571637106780527, "grad_norm": 0.08754158004665953, "learning_rate": 1.411110060701701e-05, "loss": 0.4397, "step": 1468 }, { "epoch": 0.8577476096635283, "grad_norm": 0.08895718951421268, "learning_rate": 1.4101803336770092e-05, "loss": 0.3913, "step": 1469 }, { "epoch": 0.8583315086490038, "grad_norm": 0.09098867229628989, "learning_rate": 1.4092501801797063e-05, "loss": 0.3811, "step": 1470 }, { "epoch": 0.8589154076344793, "grad_norm": 0.0885082577360676, "learning_rate": 1.4083196011768913e-05, "loss": 0.4506, "step": 1471 }, { "epoch": 0.8594993066199548, "grad_norm": 0.09250376871000927, "learning_rate": 1.4073885976361056e-05, "loss": 0.4404, "step": 1472 }, { "epoch": 0.8600832056054303, "grad_norm": 0.08799586389348167, "learning_rate": 1.4064571705253323e-05, "loss": 0.3853, "step": 1473 }, { "epoch": 0.8606671045909058, "grad_norm": 0.0839163211345786, "learning_rate": 1.405525320812994e-05, "loss": 0.4342, "step": 1474 }, { "epoch": 0.8612510035763813, "grad_norm": 0.08221404490919879, "learning_rate": 1.4045930494679538e-05, "loss": 0.3865, "step": 1475 }, { "epoch": 0.8618349025618568, "grad_norm": 0.08889740921989796, "learning_rate": 1.4036603574595122e-05, "loss": 0.44, "step": 1476 }, { "epoch": 0.8624188015473323, "grad_norm": 0.07913702428794282, "learning_rate": 1.4027272457574082e-05, "loss": 0.3838, "step": 1477 }, { "epoch": 0.8630027005328078, "grad_norm": 0.08426665565102608, "learning_rate": 1.4017937153318157e-05, "loss": 0.4501, "step": 1478 }, { "epoch": 0.8635865995182833, "grad_norm": 0.07803952047290125, "learning_rate": 1.4008597671533455e-05, "loss": 0.3798, "step": 1479 }, { "epoch": 0.8641704985037588, "grad_norm": 0.08344704945676684, "learning_rate": 1.3999254021930416e-05, "loss": 0.3827, "step": 1480 }, { "epoch": 0.8647543974892343, "grad_norm": 0.08755003407906248, "learning_rate": 1.3989906214223817e-05, "loss": 0.3701, "step": 1481 }, { "epoch": 0.8653382964747098, "grad_norm": 0.09878634254685598, "learning_rate": 1.3980554258132761e-05, "loss": 0.4515, "step": 1482 }, { "epoch": 0.8659221954601853, "grad_norm": 0.08684170471233023, "learning_rate": 1.3971198163380659e-05, "loss": 0.4815, "step": 1483 }, { "epoch": 0.8665060944456608, "grad_norm": 0.08366767064294856, "learning_rate": 1.3961837939695231e-05, "loss": 0.4182, "step": 1484 }, { "epoch": 0.8670899934311365, "grad_norm": 0.08951742840052954, "learning_rate": 1.3952473596808485e-05, "loss": 0.4229, "step": 1485 }, { "epoch": 0.867673892416612, "grad_norm": 0.092555895787642, "learning_rate": 1.3943105144456715e-05, "loss": 0.4043, "step": 1486 }, { "epoch": 0.8682577914020875, "grad_norm": 0.0896920524746703, "learning_rate": 1.3933732592380485e-05, "loss": 0.4112, "step": 1487 }, { "epoch": 0.868841690387563, "grad_norm": 0.08943505172840009, "learning_rate": 1.3924355950324623e-05, "loss": 0.4541, "step": 1488 }, { "epoch": 0.8694255893730385, "grad_norm": 0.08932011651564876, "learning_rate": 1.391497522803821e-05, "loss": 0.4105, "step": 1489 }, { "epoch": 0.870009488358514, "grad_norm": 0.08880618146171475, "learning_rate": 1.390559043527457e-05, "loss": 0.4049, "step": 1490 }, { "epoch": 0.8705933873439895, "grad_norm": 0.08873676398021844, "learning_rate": 1.3896201581791253e-05, "loss": 0.4052, "step": 1491 }, { "epoch": 0.871177286329465, "grad_norm": 0.09448049917789945, "learning_rate": 1.388680867735004e-05, "loss": 0.4567, "step": 1492 }, { "epoch": 0.8717611853149405, "grad_norm": 0.08298415418706033, "learning_rate": 1.3877411731716917e-05, "loss": 0.412, "step": 1493 }, { "epoch": 0.872345084300416, "grad_norm": 0.08753079808797629, "learning_rate": 1.3868010754662077e-05, "loss": 0.4132, "step": 1494 }, { "epoch": 0.8729289832858915, "grad_norm": 0.07942051645005271, "learning_rate": 1.3858605755959902e-05, "loss": 0.4346, "step": 1495 }, { "epoch": 0.873512882271367, "grad_norm": 0.09082947652585188, "learning_rate": 1.3849196745388953e-05, "loss": 0.3922, "step": 1496 }, { "epoch": 0.8740967812568425, "grad_norm": 0.0826368230670149, "learning_rate": 1.3839783732731966e-05, "loss": 0.4381, "step": 1497 }, { "epoch": 0.874680680242318, "grad_norm": 0.07918270101307866, "learning_rate": 1.3830366727775835e-05, "loss": 0.3716, "step": 1498 }, { "epoch": 0.8752645792277935, "grad_norm": 0.08787686697640719, "learning_rate": 1.3820945740311609e-05, "loss": 0.3931, "step": 1499 }, { "epoch": 0.875848478213269, "grad_norm": 0.08196891098530402, "learning_rate": 1.3811520780134471e-05, "loss": 0.4001, "step": 1500 }, { "epoch": 0.8764323771987447, "grad_norm": 0.08946888825367849, "learning_rate": 1.3802091857043745e-05, "loss": 0.4343, "step": 1501 }, { "epoch": 0.8770162761842202, "grad_norm": 0.07541942437547663, "learning_rate": 1.3792658980842861e-05, "loss": 0.3618, "step": 1502 }, { "epoch": 0.8776001751696957, "grad_norm": 0.091900743952326, "learning_rate": 1.3783222161339375e-05, "loss": 0.4438, "step": 1503 }, { "epoch": 0.8781840741551712, "grad_norm": 0.08801812430552657, "learning_rate": 1.3773781408344931e-05, "loss": 0.4062, "step": 1504 }, { "epoch": 0.8787679731406467, "grad_norm": 0.09050688023632555, "learning_rate": 1.3764336731675266e-05, "loss": 0.4264, "step": 1505 }, { "epoch": 0.8793518721261222, "grad_norm": 0.08895925145586761, "learning_rate": 1.3754888141150197e-05, "loss": 0.4384, "step": 1506 }, { "epoch": 0.8799357711115977, "grad_norm": 0.08123489525187662, "learning_rate": 1.3745435646593613e-05, "loss": 0.4199, "step": 1507 }, { "epoch": 0.8805196700970732, "grad_norm": 0.078257227057691, "learning_rate": 1.373597925783346e-05, "loss": 0.3925, "step": 1508 }, { "epoch": 0.8811035690825487, "grad_norm": 0.08636233574985094, "learning_rate": 1.3726518984701731e-05, "loss": 0.435, "step": 1509 }, { "epoch": 0.8816874680680242, "grad_norm": 0.08341435857851036, "learning_rate": 1.3717054837034459e-05, "loss": 0.4346, "step": 1510 }, { "epoch": 0.8822713670534997, "grad_norm": 0.08321319056503639, "learning_rate": 1.3707586824671703e-05, "loss": 0.381, "step": 1511 }, { "epoch": 0.8828552660389752, "grad_norm": 0.08163244360326827, "learning_rate": 1.369811495745755e-05, "loss": 0.3831, "step": 1512 }, { "epoch": 0.8834391650244507, "grad_norm": 0.078818327030809, "learning_rate": 1.3688639245240078e-05, "loss": 0.428, "step": 1513 }, { "epoch": 0.8840230640099263, "grad_norm": 0.07941081960840093, "learning_rate": 1.3679159697871383e-05, "loss": 0.4115, "step": 1514 }, { "epoch": 0.8846069629954018, "grad_norm": 0.09378489440196569, "learning_rate": 1.3669676325207531e-05, "loss": 0.4198, "step": 1515 }, { "epoch": 0.8851908619808773, "grad_norm": 0.08572928837266178, "learning_rate": 1.3660189137108578e-05, "loss": 0.3902, "step": 1516 }, { "epoch": 0.8857747609663528, "grad_norm": 0.07927674237644144, "learning_rate": 1.3650698143438534e-05, "loss": 0.3811, "step": 1517 }, { "epoch": 0.8863586599518284, "grad_norm": 0.0823195105396909, "learning_rate": 1.3641203354065378e-05, "loss": 0.3914, "step": 1518 }, { "epoch": 0.8869425589373039, "grad_norm": 0.09635035990524858, "learning_rate": 1.3631704778861028e-05, "loss": 0.5154, "step": 1519 }, { "epoch": 0.8875264579227794, "grad_norm": 0.0848895028757173, "learning_rate": 1.3622202427701344e-05, "loss": 0.3975, "step": 1520 }, { "epoch": 0.8881103569082549, "grad_norm": 0.07981147647873602, "learning_rate": 1.3612696310466103e-05, "loss": 0.4091, "step": 1521 }, { "epoch": 0.8886942558937304, "grad_norm": 0.08569555744240154, "learning_rate": 1.360318643703901e-05, "loss": 0.4159, "step": 1522 }, { "epoch": 0.8892781548792059, "grad_norm": 0.09082567152338611, "learning_rate": 1.3593672817307661e-05, "loss": 0.398, "step": 1523 }, { "epoch": 0.8898620538646814, "grad_norm": 0.08942880227591786, "learning_rate": 1.3584155461163562e-05, "loss": 0.4508, "step": 1524 }, { "epoch": 0.8904459528501569, "grad_norm": 0.08035566261707049, "learning_rate": 1.3574634378502092e-05, "loss": 0.399, "step": 1525 }, { "epoch": 0.8910298518356324, "grad_norm": 0.0843788134312903, "learning_rate": 1.3565109579222511e-05, "loss": 0.3762, "step": 1526 }, { "epoch": 0.8916137508211079, "grad_norm": 0.08248065418369001, "learning_rate": 1.3555581073227942e-05, "loss": 0.4179, "step": 1527 }, { "epoch": 0.8921976498065834, "grad_norm": 0.082473549201713, "learning_rate": 1.3546048870425356e-05, "loss": 0.4034, "step": 1528 }, { "epoch": 0.892781548792059, "grad_norm": 0.08149781553929147, "learning_rate": 1.353651298072558e-05, "loss": 0.3851, "step": 1529 }, { "epoch": 0.8933654477775345, "grad_norm": 0.08757628501218516, "learning_rate": 1.3526973414043263e-05, "loss": 0.4103, "step": 1530 }, { "epoch": 0.89394934676301, "grad_norm": 0.08827833534308598, "learning_rate": 1.3517430180296886e-05, "loss": 0.4038, "step": 1531 }, { "epoch": 0.8945332457484855, "grad_norm": 0.08041532391052485, "learning_rate": 1.3507883289408732e-05, "loss": 0.4098, "step": 1532 }, { "epoch": 0.895117144733961, "grad_norm": 0.0814526464156628, "learning_rate": 1.3498332751304895e-05, "loss": 0.3851, "step": 1533 }, { "epoch": 0.8957010437194365, "grad_norm": 0.0905736594897718, "learning_rate": 1.3488778575915258e-05, "loss": 0.4023, "step": 1534 }, { "epoch": 0.8962849427049121, "grad_norm": 0.0835758278970036, "learning_rate": 1.3479220773173485e-05, "loss": 0.4245, "step": 1535 }, { "epoch": 0.8968688416903876, "grad_norm": 0.07774020842690202, "learning_rate": 1.3469659353017019e-05, "loss": 0.3657, "step": 1536 }, { "epoch": 0.8974527406758631, "grad_norm": 0.08764953269661732, "learning_rate": 1.346009432538705e-05, "loss": 0.4582, "step": 1537 }, { "epoch": 0.8980366396613386, "grad_norm": 0.07632360717553224, "learning_rate": 1.345052570022853e-05, "loss": 0.3453, "step": 1538 }, { "epoch": 0.8986205386468141, "grad_norm": 0.08195446482822594, "learning_rate": 1.3440953487490145e-05, "loss": 0.3885, "step": 1539 }, { "epoch": 0.8992044376322896, "grad_norm": 0.07951235701612301, "learning_rate": 1.343137769712432e-05, "loss": 0.4136, "step": 1540 }, { "epoch": 0.8997883366177651, "grad_norm": 0.07893203833286719, "learning_rate": 1.342179833908719e-05, "loss": 0.3522, "step": 1541 }, { "epoch": 0.9003722356032406, "grad_norm": 0.07869778885219582, "learning_rate": 1.3412215423338601e-05, "loss": 0.3466, "step": 1542 }, { "epoch": 0.9009561345887162, "grad_norm": 0.08373825880842413, "learning_rate": 1.3402628959842106e-05, "loss": 0.4361, "step": 1543 }, { "epoch": 0.9015400335741917, "grad_norm": 0.08541339173998211, "learning_rate": 1.3393038958564934e-05, "loss": 0.3791, "step": 1544 }, { "epoch": 0.9021239325596672, "grad_norm": 0.0945012729676154, "learning_rate": 1.3383445429478008e-05, "loss": 0.4539, "step": 1545 }, { "epoch": 0.9027078315451427, "grad_norm": 0.08064074789480115, "learning_rate": 1.33738483825559e-05, "loss": 0.4253, "step": 1546 }, { "epoch": 0.9032917305306182, "grad_norm": 0.09192162612954043, "learning_rate": 1.3364247827776854e-05, "loss": 0.5039, "step": 1547 }, { "epoch": 0.9038756295160937, "grad_norm": 0.08567110962227548, "learning_rate": 1.3354643775122762e-05, "loss": 0.4081, "step": 1548 }, { "epoch": 0.9044595285015692, "grad_norm": 0.08300180251435989, "learning_rate": 1.3345036234579138e-05, "loss": 0.4089, "step": 1549 }, { "epoch": 0.9050434274870447, "grad_norm": 0.08871722341853529, "learning_rate": 1.333542521613514e-05, "loss": 0.3888, "step": 1550 }, { "epoch": 0.9056273264725202, "grad_norm": 0.0909470711956883, "learning_rate": 1.332581072978353e-05, "loss": 0.4031, "step": 1551 }, { "epoch": 0.9062112254579958, "grad_norm": 0.07524808252727805, "learning_rate": 1.331619278552068e-05, "loss": 0.3953, "step": 1552 }, { "epoch": 0.9067951244434713, "grad_norm": 0.086964014695451, "learning_rate": 1.3306571393346557e-05, "loss": 0.4473, "step": 1553 }, { "epoch": 0.9073790234289468, "grad_norm": 0.08307387184736842, "learning_rate": 1.3296946563264715e-05, "loss": 0.3923, "step": 1554 }, { "epoch": 0.9079629224144223, "grad_norm": 0.09084190746346833, "learning_rate": 1.3287318305282277e-05, "loss": 0.4433, "step": 1555 }, { "epoch": 0.9085468213998978, "grad_norm": 0.08836021259581855, "learning_rate": 1.3277686629409936e-05, "loss": 0.4413, "step": 1556 }, { "epoch": 0.9091307203853733, "grad_norm": 0.080888468762874, "learning_rate": 1.3268051545661937e-05, "loss": 0.4192, "step": 1557 }, { "epoch": 0.9097146193708489, "grad_norm": 0.09018964738635812, "learning_rate": 1.3258413064056066e-05, "loss": 0.4646, "step": 1558 }, { "epoch": 0.9102985183563244, "grad_norm": 0.08232590238825269, "learning_rate": 1.3248771194613641e-05, "loss": 0.3686, "step": 1559 }, { "epoch": 0.9108824173417999, "grad_norm": 0.08147069565025285, "learning_rate": 1.3239125947359506e-05, "loss": 0.447, "step": 1560 }, { "epoch": 0.9114663163272754, "grad_norm": 0.07967762353566328, "learning_rate": 1.3229477332322016e-05, "loss": 0.388, "step": 1561 }, { "epoch": 0.9120502153127509, "grad_norm": 0.08172562656109467, "learning_rate": 1.3219825359533025e-05, "loss": 0.3824, "step": 1562 }, { "epoch": 0.9126341142982264, "grad_norm": 0.08779275467057321, "learning_rate": 1.3210170039027886e-05, "loss": 0.3933, "step": 1563 }, { "epoch": 0.9132180132837019, "grad_norm": 0.08255504881372427, "learning_rate": 1.320051138084542e-05, "loss": 0.4402, "step": 1564 }, { "epoch": 0.9138019122691774, "grad_norm": 0.08205290521867013, "learning_rate": 1.3190849395027926e-05, "loss": 0.4181, "step": 1565 }, { "epoch": 0.9143858112546529, "grad_norm": 0.0848213137695562, "learning_rate": 1.3181184091621165e-05, "loss": 0.4077, "step": 1566 }, { "epoch": 0.9149697102401284, "grad_norm": 0.08677756569640321, "learning_rate": 1.3171515480674342e-05, "loss": 0.4282, "step": 1567 }, { "epoch": 0.915553609225604, "grad_norm": 0.08275831564749142, "learning_rate": 1.3161843572240107e-05, "loss": 0.3849, "step": 1568 }, { "epoch": 0.9161375082110795, "grad_norm": 0.07987667295057291, "learning_rate": 1.3152168376374528e-05, "loss": 0.3869, "step": 1569 }, { "epoch": 0.916721407196555, "grad_norm": 0.08780120733717522, "learning_rate": 1.3142489903137101e-05, "loss": 0.466, "step": 1570 }, { "epoch": 0.9173053061820305, "grad_norm": 0.09065202528233884, "learning_rate": 1.313280816259073e-05, "loss": 0.427, "step": 1571 }, { "epoch": 0.917889205167506, "grad_norm": 0.07961507592814572, "learning_rate": 1.3123123164801706e-05, "loss": 0.3739, "step": 1572 }, { "epoch": 0.9184731041529816, "grad_norm": 0.08407161780911128, "learning_rate": 1.3113434919839715e-05, "loss": 0.4142, "step": 1573 }, { "epoch": 0.9190570031384571, "grad_norm": 0.0838729635706307, "learning_rate": 1.310374343777782e-05, "loss": 0.4323, "step": 1574 }, { "epoch": 0.9196409021239326, "grad_norm": 0.0831310162390343, "learning_rate": 1.3094048728692443e-05, "loss": 0.3956, "step": 1575 }, { "epoch": 0.9202248011094081, "grad_norm": 0.08014193504443905, "learning_rate": 1.3084350802663365e-05, "loss": 0.4118, "step": 1576 }, { "epoch": 0.9208087000948836, "grad_norm": 0.08555178170669438, "learning_rate": 1.3074649669773716e-05, "loss": 0.4073, "step": 1577 }, { "epoch": 0.9213925990803591, "grad_norm": 0.08752804099128854, "learning_rate": 1.306494534010995e-05, "loss": 0.437, "step": 1578 }, { "epoch": 0.9219764980658346, "grad_norm": 0.0818682813901196, "learning_rate": 1.3055237823761855e-05, "loss": 0.4237, "step": 1579 }, { "epoch": 0.9225603970513101, "grad_norm": 0.0847222895940667, "learning_rate": 1.3045527130822524e-05, "loss": 0.4109, "step": 1580 }, { "epoch": 0.9231442960367856, "grad_norm": 0.08190140143991867, "learning_rate": 1.303581327138836e-05, "loss": 0.4121, "step": 1581 }, { "epoch": 0.9237281950222611, "grad_norm": 0.08325708571105277, "learning_rate": 1.3026096255559055e-05, "loss": 0.4613, "step": 1582 }, { "epoch": 0.9243120940077366, "grad_norm": 0.0807717008155156, "learning_rate": 1.3016376093437577e-05, "loss": 0.39, "step": 1583 }, { "epoch": 0.9248959929932121, "grad_norm": 0.08631408175457018, "learning_rate": 1.3006652795130179e-05, "loss": 0.4364, "step": 1584 }, { "epoch": 0.9254798919786877, "grad_norm": 0.08204864682742313, "learning_rate": 1.299692637074636e-05, "loss": 0.4075, "step": 1585 }, { "epoch": 0.9260637909641632, "grad_norm": 0.07869804963383943, "learning_rate": 1.2987196830398884e-05, "loss": 0.3484, "step": 1586 }, { "epoch": 0.9266476899496388, "grad_norm": 0.08062975393126531, "learning_rate": 1.297746418420374e-05, "loss": 0.3788, "step": 1587 }, { "epoch": 0.9272315889351143, "grad_norm": 0.08233400152944863, "learning_rate": 1.2967728442280154e-05, "loss": 0.3763, "step": 1588 }, { "epoch": 0.9278154879205898, "grad_norm": 0.07536710476502821, "learning_rate": 1.2957989614750569e-05, "loss": 0.3952, "step": 1589 }, { "epoch": 0.9283993869060653, "grad_norm": 0.08789539455384082, "learning_rate": 1.2948247711740638e-05, "loss": 0.4207, "step": 1590 }, { "epoch": 0.9289832858915408, "grad_norm": 0.08755410362186102, "learning_rate": 1.2938502743379212e-05, "loss": 0.4237, "step": 1591 }, { "epoch": 0.9295671848770163, "grad_norm": 0.08140443906523884, "learning_rate": 1.2928754719798324e-05, "loss": 0.4203, "step": 1592 }, { "epoch": 0.9301510838624918, "grad_norm": 0.0834597651172638, "learning_rate": 1.291900365113319e-05, "loss": 0.4388, "step": 1593 }, { "epoch": 0.9307349828479673, "grad_norm": 0.0793500245252117, "learning_rate": 1.2909249547522184e-05, "loss": 0.4107, "step": 1594 }, { "epoch": 0.9313188818334428, "grad_norm": 0.08834566021776112, "learning_rate": 1.2899492419106848e-05, "loss": 0.4267, "step": 1595 }, { "epoch": 0.9319027808189183, "grad_norm": 0.08571705954168894, "learning_rate": 1.2889732276031856e-05, "loss": 0.428, "step": 1596 }, { "epoch": 0.9324866798043938, "grad_norm": 0.08084812516079183, "learning_rate": 1.2879969128445025e-05, "loss": 0.3782, "step": 1597 }, { "epoch": 0.9330705787898693, "grad_norm": 0.07988807260519172, "learning_rate": 1.2870202986497291e-05, "loss": 0.3715, "step": 1598 }, { "epoch": 0.9336544777753448, "grad_norm": 0.0777562032357311, "learning_rate": 1.2860433860342705e-05, "loss": 0.4008, "step": 1599 }, { "epoch": 0.9342383767608203, "grad_norm": 0.08782487703477351, "learning_rate": 1.2850661760138423e-05, "loss": 0.4077, "step": 1600 }, { "epoch": 0.9348222757462958, "grad_norm": 0.08701451905501784, "learning_rate": 1.284088669604469e-05, "loss": 0.4127, "step": 1601 }, { "epoch": 0.9354061747317715, "grad_norm": 0.08360185604887449, "learning_rate": 1.283110867822483e-05, "loss": 0.4207, "step": 1602 }, { "epoch": 0.935990073717247, "grad_norm": 0.08732621319206425, "learning_rate": 1.2821327716845246e-05, "loss": 0.3964, "step": 1603 }, { "epoch": 0.9365739727027225, "grad_norm": 0.09459047216712821, "learning_rate": 1.2811543822075396e-05, "loss": 0.4728, "step": 1604 }, { "epoch": 0.937157871688198, "grad_norm": 0.081675352004217, "learning_rate": 1.2801757004087792e-05, "loss": 0.3674, "step": 1605 }, { "epoch": 0.9377417706736735, "grad_norm": 0.08642078073503627, "learning_rate": 1.2791967273057978e-05, "loss": 0.444, "step": 1606 }, { "epoch": 0.938325669659149, "grad_norm": 0.07949525879044848, "learning_rate": 1.2782174639164528e-05, "loss": 0.3755, "step": 1607 }, { "epoch": 0.9389095686446245, "grad_norm": 0.07940139653116988, "learning_rate": 1.2772379112589043e-05, "loss": 0.4141, "step": 1608 }, { "epoch": 0.9394934676301, "grad_norm": 0.08175326263823182, "learning_rate": 1.2762580703516127e-05, "loss": 0.3848, "step": 1609 }, { "epoch": 0.9400773666155755, "grad_norm": 0.08467464566982191, "learning_rate": 1.2752779422133377e-05, "loss": 0.3848, "step": 1610 }, { "epoch": 0.940661265601051, "grad_norm": 0.07893071294160853, "learning_rate": 1.2742975278631378e-05, "loss": 0.3833, "step": 1611 }, { "epoch": 0.9412451645865265, "grad_norm": 0.07766795407106537, "learning_rate": 1.2733168283203692e-05, "loss": 0.357, "step": 1612 }, { "epoch": 0.941829063572002, "grad_norm": 0.07976719109748519, "learning_rate": 1.272335844604685e-05, "loss": 0.4008, "step": 1613 }, { "epoch": 0.9424129625574775, "grad_norm": 0.08161251906052731, "learning_rate": 1.2713545777360334e-05, "loss": 0.4103, "step": 1614 }, { "epoch": 0.942996861542953, "grad_norm": 0.07825621377333158, "learning_rate": 1.2703730287346565e-05, "loss": 0.3798, "step": 1615 }, { "epoch": 0.9435807605284285, "grad_norm": 0.08080142903828925, "learning_rate": 1.2693911986210905e-05, "loss": 0.3855, "step": 1616 }, { "epoch": 0.944164659513904, "grad_norm": 0.0845388287660371, "learning_rate": 1.2684090884161636e-05, "loss": 0.4325, "step": 1617 }, { "epoch": 0.9447485584993796, "grad_norm": 0.08768994218951089, "learning_rate": 1.2674266991409949e-05, "loss": 0.4591, "step": 1618 }, { "epoch": 0.9453324574848552, "grad_norm": 0.0784801648834077, "learning_rate": 1.2664440318169949e-05, "loss": 0.3897, "step": 1619 }, { "epoch": 0.9459163564703307, "grad_norm": 0.08562326613823361, "learning_rate": 1.265461087465861e-05, "loss": 0.434, "step": 1620 }, { "epoch": 0.9465002554558062, "grad_norm": 0.078310884195406, "learning_rate": 1.2644778671095808e-05, "loss": 0.3978, "step": 1621 }, { "epoch": 0.9470841544412817, "grad_norm": 0.08404343042669983, "learning_rate": 1.2634943717704275e-05, "loss": 0.4027, "step": 1622 }, { "epoch": 0.9476680534267572, "grad_norm": 0.0794785102812003, "learning_rate": 1.262510602470961e-05, "loss": 0.41, "step": 1623 }, { "epoch": 0.9482519524122327, "grad_norm": 0.08368397291658393, "learning_rate": 1.2615265602340259e-05, "loss": 0.3968, "step": 1624 }, { "epoch": 0.9488358513977082, "grad_norm": 0.07627590063670667, "learning_rate": 1.2605422460827494e-05, "loss": 0.3926, "step": 1625 }, { "epoch": 0.9494197503831837, "grad_norm": 0.08297139240805063, "learning_rate": 1.2595576610405436e-05, "loss": 0.4138, "step": 1626 }, { "epoch": 0.9500036493686592, "grad_norm": 0.08758440742517462, "learning_rate": 1.2585728061311003e-05, "loss": 0.3836, "step": 1627 }, { "epoch": 0.9505875483541347, "grad_norm": 0.09064138533634775, "learning_rate": 1.257587682378393e-05, "loss": 0.4385, "step": 1628 }, { "epoch": 0.9511714473396102, "grad_norm": 0.08110857205340231, "learning_rate": 1.256602290806674e-05, "loss": 0.3685, "step": 1629 }, { "epoch": 0.9517553463250857, "grad_norm": 0.08201666393996689, "learning_rate": 1.2556166324404747e-05, "loss": 0.424, "step": 1630 }, { "epoch": 0.9523392453105612, "grad_norm": 0.08928408296318346, "learning_rate": 1.2546307083046037e-05, "loss": 0.4334, "step": 1631 }, { "epoch": 0.9529231442960368, "grad_norm": 0.08358205914050629, "learning_rate": 1.2536445194241455e-05, "loss": 0.3994, "step": 1632 }, { "epoch": 0.9535070432815123, "grad_norm": 0.08607220230700316, "learning_rate": 1.2526580668244607e-05, "loss": 0.4158, "step": 1633 }, { "epoch": 0.9540909422669878, "grad_norm": 0.08689035992889411, "learning_rate": 1.2516713515311832e-05, "loss": 0.4018, "step": 1634 }, { "epoch": 0.9546748412524633, "grad_norm": 0.08181030840025058, "learning_rate": 1.2506843745702204e-05, "loss": 0.3947, "step": 1635 }, { "epoch": 0.9552587402379389, "grad_norm": 0.08023795930318631, "learning_rate": 1.2496971369677518e-05, "loss": 0.4283, "step": 1636 }, { "epoch": 0.9558426392234144, "grad_norm": 0.09071151552200574, "learning_rate": 1.248709639750228e-05, "loss": 0.5014, "step": 1637 }, { "epoch": 0.9564265382088899, "grad_norm": 0.07945531095353682, "learning_rate": 1.2477218839443694e-05, "loss": 0.3882, "step": 1638 }, { "epoch": 0.9570104371943654, "grad_norm": 0.08992183626704243, "learning_rate": 1.246733870577165e-05, "loss": 0.4399, "step": 1639 }, { "epoch": 0.9575943361798409, "grad_norm": 0.07982421878459509, "learning_rate": 1.2457456006758722e-05, "loss": 0.3978, "step": 1640 }, { "epoch": 0.9581782351653164, "grad_norm": 0.08028728762800279, "learning_rate": 1.2447570752680147e-05, "loss": 0.3816, "step": 1641 }, { "epoch": 0.9587621341507919, "grad_norm": 0.08984293214924874, "learning_rate": 1.243768295381382e-05, "loss": 0.5073, "step": 1642 }, { "epoch": 0.9593460331362674, "grad_norm": 0.090266841306563, "learning_rate": 1.242779262044028e-05, "loss": 0.4366, "step": 1643 }, { "epoch": 0.9599299321217429, "grad_norm": 0.07479895192479355, "learning_rate": 1.24178997628427e-05, "loss": 0.3829, "step": 1644 }, { "epoch": 0.9605138311072184, "grad_norm": 0.07633079137073334, "learning_rate": 1.2408004391306883e-05, "loss": 0.3689, "step": 1645 }, { "epoch": 0.961097730092694, "grad_norm": 0.0788213731078543, "learning_rate": 1.2398106516121243e-05, "loss": 0.3943, "step": 1646 }, { "epoch": 0.9616816290781695, "grad_norm": 0.08465925460205626, "learning_rate": 1.2388206147576796e-05, "loss": 0.4043, "step": 1647 }, { "epoch": 0.962265528063645, "grad_norm": 0.08585218980622954, "learning_rate": 1.2378303295967147e-05, "loss": 0.3908, "step": 1648 }, { "epoch": 0.9628494270491205, "grad_norm": 0.08106117778734491, "learning_rate": 1.2368397971588493e-05, "loss": 0.4018, "step": 1649 }, { "epoch": 0.963433326034596, "grad_norm": 0.08151938131256099, "learning_rate": 1.2358490184739593e-05, "loss": 0.4467, "step": 1650 }, { "epoch": 0.9640172250200715, "grad_norm": 0.077083743963094, "learning_rate": 1.2348579945721769e-05, "loss": 0.3871, "step": 1651 }, { "epoch": 0.9646011240055471, "grad_norm": 0.07585380915543372, "learning_rate": 1.2338667264838895e-05, "loss": 0.3935, "step": 1652 }, { "epoch": 0.9651850229910226, "grad_norm": 0.08586313780390398, "learning_rate": 1.2328752152397373e-05, "loss": 0.4244, "step": 1653 }, { "epoch": 0.9657689219764981, "grad_norm": 0.08224262207072089, "learning_rate": 1.2318834618706154e-05, "loss": 0.4318, "step": 1654 }, { "epoch": 0.9663528209619736, "grad_norm": 0.08377306202692739, "learning_rate": 1.2308914674076687e-05, "loss": 0.4435, "step": 1655 }, { "epoch": 0.9669367199474491, "grad_norm": 0.0801022155287669, "learning_rate": 1.2298992328822937e-05, "loss": 0.4287, "step": 1656 }, { "epoch": 0.9675206189329246, "grad_norm": 0.07808058130043961, "learning_rate": 1.2289067593261358e-05, "loss": 0.4139, "step": 1657 }, { "epoch": 0.9681045179184001, "grad_norm": 0.08859410243064396, "learning_rate": 1.2279140477710902e-05, "loss": 0.4057, "step": 1658 }, { "epoch": 0.9686884169038756, "grad_norm": 0.07715302376191274, "learning_rate": 1.2269210992492982e-05, "loss": 0.373, "step": 1659 }, { "epoch": 0.9692723158893511, "grad_norm": 0.0800868188416318, "learning_rate": 1.2259279147931479e-05, "loss": 0.4223, "step": 1660 }, { "epoch": 0.9698562148748266, "grad_norm": 0.08567996737789238, "learning_rate": 1.2249344954352735e-05, "loss": 0.4434, "step": 1661 }, { "epoch": 0.9704401138603022, "grad_norm": 0.08337498863745858, "learning_rate": 1.2239408422085518e-05, "loss": 0.4671, "step": 1662 }, { "epoch": 0.9710240128457777, "grad_norm": 0.08573320953917714, "learning_rate": 1.2229469561461046e-05, "loss": 0.4406, "step": 1663 }, { "epoch": 0.9716079118312532, "grad_norm": 0.08907360820449114, "learning_rate": 1.2219528382812946e-05, "loss": 0.453, "step": 1664 }, { "epoch": 0.9721918108167287, "grad_norm": 0.08236792020760901, "learning_rate": 1.2209584896477258e-05, "loss": 0.4049, "step": 1665 }, { "epoch": 0.9727757098022042, "grad_norm": 0.0837050029317369, "learning_rate": 1.2199639112792423e-05, "loss": 0.3829, "step": 1666 }, { "epoch": 0.9733596087876797, "grad_norm": 0.08711262227377514, "learning_rate": 1.2189691042099265e-05, "loss": 0.4217, "step": 1667 }, { "epoch": 0.9739435077731552, "grad_norm": 0.07992678694198527, "learning_rate": 1.2179740694740993e-05, "loss": 0.392, "step": 1668 }, { "epoch": 0.9745274067586308, "grad_norm": 0.0832590110685475, "learning_rate": 1.2169788081063181e-05, "loss": 0.4369, "step": 1669 }, { "epoch": 0.9751113057441063, "grad_norm": 0.08194973107917482, "learning_rate": 1.2159833211413759e-05, "loss": 0.4037, "step": 1670 }, { "epoch": 0.9756952047295818, "grad_norm": 0.08218495265066278, "learning_rate": 1.2149876096142998e-05, "loss": 0.411, "step": 1671 }, { "epoch": 0.9762791037150573, "grad_norm": 0.08159687377369446, "learning_rate": 1.2139916745603509e-05, "loss": 0.4109, "step": 1672 }, { "epoch": 0.9768630027005328, "grad_norm": 0.08041050484680595, "learning_rate": 1.2129955170150228e-05, "loss": 0.4057, "step": 1673 }, { "epoch": 0.9774469016860083, "grad_norm": 0.0885635385035147, "learning_rate": 1.21199913801404e-05, "loss": 0.4433, "step": 1674 }, { "epoch": 0.9780308006714838, "grad_norm": 0.08063029615193859, "learning_rate": 1.2110025385933582e-05, "loss": 0.4114, "step": 1675 }, { "epoch": 0.9786146996569594, "grad_norm": 0.08018828497310176, "learning_rate": 1.2100057197891601e-05, "loss": 0.3774, "step": 1676 }, { "epoch": 0.9791985986424349, "grad_norm": 0.07977000656939191, "learning_rate": 1.209008682637859e-05, "loss": 0.4008, "step": 1677 }, { "epoch": 0.9797824976279104, "grad_norm": 0.07935772935780212, "learning_rate": 1.2080114281760942e-05, "loss": 0.3991, "step": 1678 }, { "epoch": 0.9803663966133859, "grad_norm": 0.0841992980426373, "learning_rate": 1.2070139574407302e-05, "loss": 0.4259, "step": 1679 }, { "epoch": 0.9809502955988614, "grad_norm": 0.08968986841912678, "learning_rate": 1.2060162714688582e-05, "loss": 0.4253, "step": 1680 }, { "epoch": 0.9815341945843369, "grad_norm": 0.08176379381147157, "learning_rate": 1.2050183712977903e-05, "loss": 0.4133, "step": 1681 }, { "epoch": 0.9821180935698124, "grad_norm": 0.08209902240540211, "learning_rate": 1.2040202579650649e-05, "loss": 0.4323, "step": 1682 }, { "epoch": 0.9827019925552879, "grad_norm": 0.08383958288123852, "learning_rate": 1.2030219325084388e-05, "loss": 0.4535, "step": 1683 }, { "epoch": 0.9832858915407634, "grad_norm": 0.08214083742768441, "learning_rate": 1.2020233959658918e-05, "loss": 0.3775, "step": 1684 }, { "epoch": 0.9838697905262389, "grad_norm": 0.08218166972166731, "learning_rate": 1.2010246493756215e-05, "loss": 0.3777, "step": 1685 }, { "epoch": 0.9844536895117145, "grad_norm": 0.0842243830021239, "learning_rate": 1.2000256937760446e-05, "loss": 0.4213, "step": 1686 }, { "epoch": 0.98503758849719, "grad_norm": 0.08627160485947943, "learning_rate": 1.1990265302057948e-05, "loss": 0.3891, "step": 1687 }, { "epoch": 0.9856214874826655, "grad_norm": 0.08635002534234468, "learning_rate": 1.1980271597037228e-05, "loss": 0.3929, "step": 1688 }, { "epoch": 0.986205386468141, "grad_norm": 0.08304765878953237, "learning_rate": 1.1970275833088936e-05, "loss": 0.4227, "step": 1689 }, { "epoch": 0.9867892854536165, "grad_norm": 0.08218534434582954, "learning_rate": 1.1960278020605861e-05, "loss": 0.3713, "step": 1690 }, { "epoch": 0.987373184439092, "grad_norm": 0.08092629362609888, "learning_rate": 1.1950278169982934e-05, "loss": 0.3911, "step": 1691 }, { "epoch": 0.9879570834245676, "grad_norm": 0.08760956703557046, "learning_rate": 1.1940276291617192e-05, "loss": 0.3744, "step": 1692 }, { "epoch": 0.9885409824100431, "grad_norm": 0.08022445854855312, "learning_rate": 1.1930272395907789e-05, "loss": 0.3812, "step": 1693 }, { "epoch": 0.9891248813955186, "grad_norm": 0.08337803158522968, "learning_rate": 1.1920266493255976e-05, "loss": 0.3787, "step": 1694 }, { "epoch": 0.9897087803809941, "grad_norm": 0.09020030932228895, "learning_rate": 1.1910258594065079e-05, "loss": 0.4174, "step": 1695 }, { "epoch": 0.9902926793664696, "grad_norm": 0.08281386856313948, "learning_rate": 1.1900248708740515e-05, "loss": 0.4136, "step": 1696 }, { "epoch": 0.9908765783519451, "grad_norm": 0.08302390631207926, "learning_rate": 1.1890236847689762e-05, "loss": 0.3907, "step": 1697 }, { "epoch": 0.9914604773374206, "grad_norm": 0.08229080004330029, "learning_rate": 1.1880223021322348e-05, "loss": 0.4225, "step": 1698 }, { "epoch": 0.9920443763228961, "grad_norm": 0.07595627358234384, "learning_rate": 1.1870207240049845e-05, "loss": 0.371, "step": 1699 }, { "epoch": 0.9926282753083716, "grad_norm": 0.07841923340998565, "learning_rate": 1.1860189514285858e-05, "loss": 0.3611, "step": 1700 }, { "epoch": 0.9932121742938471, "grad_norm": 0.08881268321988911, "learning_rate": 1.185016985444602e-05, "loss": 0.3994, "step": 1701 }, { "epoch": 0.9937960732793226, "grad_norm": 0.08686904999352063, "learning_rate": 1.1840148270947962e-05, "loss": 0.3718, "step": 1702 }, { "epoch": 0.9943799722647982, "grad_norm": 0.08429148047729228, "learning_rate": 1.183012477421133e-05, "loss": 0.3894, "step": 1703 }, { "epoch": 0.9949638712502737, "grad_norm": 0.08078252222939454, "learning_rate": 1.1820099374657748e-05, "loss": 0.3663, "step": 1704 }, { "epoch": 0.9955477702357493, "grad_norm": 0.08033490907936931, "learning_rate": 1.1810072082710823e-05, "loss": 0.4016, "step": 1705 }, { "epoch": 0.9961316692212248, "grad_norm": 0.08544685637259906, "learning_rate": 1.180004290879613e-05, "loss": 0.4271, "step": 1706 }, { "epoch": 0.9967155682067003, "grad_norm": 0.07941443165413005, "learning_rate": 1.1790011863341197e-05, "loss": 0.373, "step": 1707 }, { "epoch": 0.9972994671921758, "grad_norm": 0.08368489570158724, "learning_rate": 1.1779978956775507e-05, "loss": 0.4443, "step": 1708 }, { "epoch": 0.9978833661776513, "grad_norm": 0.08493159321391357, "learning_rate": 1.1769944199530458e-05, "loss": 0.3679, "step": 1709 }, { "epoch": 0.9984672651631268, "grad_norm": 0.08094500701133298, "learning_rate": 1.17599076020394e-05, "loss": 0.3849, "step": 1710 }, { "epoch": 0.9990511641486023, "grad_norm": 0.07705320648498597, "learning_rate": 1.1749869174737575e-05, "loss": 0.3927, "step": 1711 }, { "epoch": 0.9996350631340778, "grad_norm": 0.08334746312735039, "learning_rate": 1.173982892806214e-05, "loss": 0.4131, "step": 1712 }, { "epoch": 1.0004379242391066, "grad_norm": 0.1901390643769905, "learning_rate": 1.172978687245213e-05, "loss": 0.6483, "step": 1713 }, { "epoch": 1.0010218232245822, "grad_norm": 0.08483607436704847, "learning_rate": 1.1719743018348477e-05, "loss": 0.3071, "step": 1714 }, { "epoch": 1.0016057222100576, "grad_norm": 0.08240299919845175, "learning_rate": 1.1709697376193967e-05, "loss": 0.3498, "step": 1715 }, { "epoch": 1.0021896211955332, "grad_norm": 0.09003730882147727, "learning_rate": 1.169964995643326e-05, "loss": 0.3264, "step": 1716 }, { "epoch": 1.0027735201810086, "grad_norm": 0.08946283992017498, "learning_rate": 1.1689600769512855e-05, "loss": 0.3016, "step": 1717 }, { "epoch": 1.0033574191664842, "grad_norm": 0.09452064166441314, "learning_rate": 1.1679549825881087e-05, "loss": 0.3196, "step": 1718 }, { "epoch": 1.0039413181519596, "grad_norm": 0.12054100390061326, "learning_rate": 1.1669497135988127e-05, "loss": 0.3724, "step": 1719 }, { "epoch": 1.0045252171374353, "grad_norm": 0.08646844968947143, "learning_rate": 1.1659442710285948e-05, "loss": 0.3253, "step": 1720 }, { "epoch": 1.0051091161229107, "grad_norm": 0.08236820323099996, "learning_rate": 1.1649386559228342e-05, "loss": 0.278, "step": 1721 }, { "epoch": 1.0056930151083863, "grad_norm": 0.11881165262038465, "learning_rate": 1.1639328693270887e-05, "loss": 0.3122, "step": 1722 }, { "epoch": 1.0062769140938617, "grad_norm": 0.0964833111675544, "learning_rate": 1.1629269122870942e-05, "loss": 0.3561, "step": 1723 }, { "epoch": 1.0068608130793373, "grad_norm": 0.08403751975184376, "learning_rate": 1.1619207858487646e-05, "loss": 0.337, "step": 1724 }, { "epoch": 1.007444712064813, "grad_norm": 0.08175706638192293, "learning_rate": 1.1609144910581891e-05, "loss": 0.3146, "step": 1725 }, { "epoch": 1.0080286110502883, "grad_norm": 0.10089757049492597, "learning_rate": 1.1599080289616329e-05, "loss": 0.387, "step": 1726 }, { "epoch": 1.008612510035764, "grad_norm": 0.10360514325051054, "learning_rate": 1.1589014006055337e-05, "loss": 0.3755, "step": 1727 }, { "epoch": 1.0091964090212393, "grad_norm": 0.08644749079222921, "learning_rate": 1.1578946070365035e-05, "loss": 0.3114, "step": 1728 }, { "epoch": 1.009780308006715, "grad_norm": 0.09734714679172922, "learning_rate": 1.1568876493013255e-05, "loss": 0.3828, "step": 1729 }, { "epoch": 1.0103642069921903, "grad_norm": 0.08985938864900093, "learning_rate": 1.1558805284469533e-05, "loss": 0.3391, "step": 1730 }, { "epoch": 1.010948105977666, "grad_norm": 0.0897198869563589, "learning_rate": 1.1548732455205105e-05, "loss": 0.3287, "step": 1731 }, { "epoch": 1.0115320049631413, "grad_norm": 0.08839700447734158, "learning_rate": 1.1538658015692892e-05, "loss": 0.3167, "step": 1732 }, { "epoch": 1.012115903948617, "grad_norm": 0.08928706308447175, "learning_rate": 1.1528581976407485e-05, "loss": 0.3122, "step": 1733 }, { "epoch": 1.0126998029340923, "grad_norm": 0.09240930520861046, "learning_rate": 1.1518504347825146e-05, "loss": 0.3, "step": 1734 }, { "epoch": 1.013283701919568, "grad_norm": 0.0889692071010252, "learning_rate": 1.1508425140423782e-05, "loss": 0.3194, "step": 1735 }, { "epoch": 1.0138676009050434, "grad_norm": 0.08633858310492089, "learning_rate": 1.1498344364682948e-05, "loss": 0.3264, "step": 1736 }, { "epoch": 1.014451499890519, "grad_norm": 0.08113572954387693, "learning_rate": 1.1488262031083816e-05, "loss": 0.2936, "step": 1737 }, { "epoch": 1.0150353988759944, "grad_norm": 0.09994607965892638, "learning_rate": 1.14781781501092e-05, "loss": 0.3418, "step": 1738 }, { "epoch": 1.01561929786147, "grad_norm": 0.09824926021704838, "learning_rate": 1.1468092732243506e-05, "loss": 0.3537, "step": 1739 }, { "epoch": 1.0162031968469454, "grad_norm": 0.09064010792243739, "learning_rate": 1.1458005787972743e-05, "loss": 0.3428, "step": 1740 }, { "epoch": 1.016787095832421, "grad_norm": 0.08788599427228375, "learning_rate": 1.1447917327784504e-05, "loss": 0.3297, "step": 1741 }, { "epoch": 1.0173709948178966, "grad_norm": 0.08991226970624891, "learning_rate": 1.143782736216796e-05, "loss": 0.3534, "step": 1742 }, { "epoch": 1.017954893803372, "grad_norm": 0.08913907161578245, "learning_rate": 1.1427735901613854e-05, "loss": 0.3282, "step": 1743 }, { "epoch": 1.0185387927888476, "grad_norm": 0.08584263336935467, "learning_rate": 1.1417642956614474e-05, "loss": 0.3043, "step": 1744 }, { "epoch": 1.019122691774323, "grad_norm": 0.09326098176874169, "learning_rate": 1.1407548537663655e-05, "loss": 0.3091, "step": 1745 }, { "epoch": 1.0197065907597986, "grad_norm": 0.09208310679883662, "learning_rate": 1.1397452655256762e-05, "loss": 0.3321, "step": 1746 }, { "epoch": 1.020290489745274, "grad_norm": 0.08375949514485032, "learning_rate": 1.1387355319890685e-05, "loss": 0.3435, "step": 1747 }, { "epoch": 1.0208743887307496, "grad_norm": 0.08815065024003516, "learning_rate": 1.1377256542063822e-05, "loss": 0.3729, "step": 1748 }, { "epoch": 1.021458287716225, "grad_norm": 0.08428830001135737, "learning_rate": 1.1367156332276077e-05, "loss": 0.2915, "step": 1749 }, { "epoch": 1.0220421867017007, "grad_norm": 0.09365690614022619, "learning_rate": 1.1357054701028836e-05, "loss": 0.3435, "step": 1750 }, { "epoch": 1.022626085687176, "grad_norm": 0.08587451037104543, "learning_rate": 1.1346951658824958e-05, "loss": 0.3008, "step": 1751 }, { "epoch": 1.0232099846726517, "grad_norm": 0.08040646277828527, "learning_rate": 1.1336847216168785e-05, "loss": 0.3122, "step": 1752 }, { "epoch": 1.023793883658127, "grad_norm": 0.0853246918639202, "learning_rate": 1.1326741383566102e-05, "loss": 0.3098, "step": 1753 }, { "epoch": 1.0243777826436027, "grad_norm": 0.08723388343042, "learning_rate": 1.1316634171524147e-05, "loss": 0.3612, "step": 1754 }, { "epoch": 1.024961681629078, "grad_norm": 0.0929439601741796, "learning_rate": 1.1306525590551585e-05, "loss": 0.3678, "step": 1755 }, { "epoch": 1.0255455806145537, "grad_norm": 0.08804342930507068, "learning_rate": 1.1296415651158506e-05, "loss": 0.326, "step": 1756 }, { "epoch": 1.026129479600029, "grad_norm": 0.08207662229574895, "learning_rate": 1.1286304363856418e-05, "loss": 0.3194, "step": 1757 }, { "epoch": 1.0267133785855047, "grad_norm": 0.0852419177038598, "learning_rate": 1.1276191739158222e-05, "loss": 0.3097, "step": 1758 }, { "epoch": 1.0272972775709803, "grad_norm": 0.16300192090000007, "learning_rate": 1.126607778757822e-05, "loss": 0.3285, "step": 1759 }, { "epoch": 1.0278811765564557, "grad_norm": 0.090982375398459, "learning_rate": 1.1255962519632082e-05, "loss": 0.3189, "step": 1760 }, { "epoch": 1.0284650755419313, "grad_norm": 0.08986669466895116, "learning_rate": 1.1245845945836855e-05, "loss": 0.3286, "step": 1761 }, { "epoch": 1.0290489745274067, "grad_norm": 0.08586281958496918, "learning_rate": 1.123572807671094e-05, "loss": 0.3175, "step": 1762 }, { "epoch": 1.0296328735128824, "grad_norm": 0.08660512218342147, "learning_rate": 1.122560892277409e-05, "loss": 0.33, "step": 1763 }, { "epoch": 1.0302167724983577, "grad_norm": 0.08395917855260457, "learning_rate": 1.1215488494547384e-05, "loss": 0.2979, "step": 1764 }, { "epoch": 1.0308006714838334, "grad_norm": 0.08922008459895485, "learning_rate": 1.1205366802553231e-05, "loss": 0.3216, "step": 1765 }, { "epoch": 1.0313845704693088, "grad_norm": 0.08630876316630932, "learning_rate": 1.1195243857315358e-05, "loss": 0.3108, "step": 1766 }, { "epoch": 1.0319684694547844, "grad_norm": 0.08310681900841638, "learning_rate": 1.1185119669358792e-05, "loss": 0.3327, "step": 1767 }, { "epoch": 1.0325523684402598, "grad_norm": 0.09608222193487019, "learning_rate": 1.1174994249209852e-05, "loss": 0.3749, "step": 1768 }, { "epoch": 1.0331362674257354, "grad_norm": 0.09241803801634678, "learning_rate": 1.1164867607396136e-05, "loss": 0.3277, "step": 1769 }, { "epoch": 1.0337201664112108, "grad_norm": 0.09173144213906566, "learning_rate": 1.115473975444651e-05, "loss": 0.325, "step": 1770 }, { "epoch": 1.0343040653966864, "grad_norm": 0.08616235370720378, "learning_rate": 1.1144610700891108e-05, "loss": 0.292, "step": 1771 }, { "epoch": 1.0348879643821618, "grad_norm": 0.09046743375342507, "learning_rate": 1.1134480457261308e-05, "loss": 0.3294, "step": 1772 }, { "epoch": 1.0354718633676374, "grad_norm": 0.08261064311024412, "learning_rate": 1.1124349034089724e-05, "loss": 0.2871, "step": 1773 }, { "epoch": 1.0360557623531128, "grad_norm": 0.08603055923607057, "learning_rate": 1.1114216441910195e-05, "loss": 0.3018, "step": 1774 }, { "epoch": 1.0366396613385884, "grad_norm": 0.08739804940950728, "learning_rate": 1.1104082691257778e-05, "loss": 0.3149, "step": 1775 }, { "epoch": 1.037223560324064, "grad_norm": 0.08858803659729841, "learning_rate": 1.1093947792668735e-05, "loss": 0.2944, "step": 1776 }, { "epoch": 1.0378074593095394, "grad_norm": 0.08965248754276796, "learning_rate": 1.1083811756680523e-05, "loss": 0.281, "step": 1777 }, { "epoch": 1.038391358295015, "grad_norm": 0.08792566179433424, "learning_rate": 1.1073674593831778e-05, "loss": 0.334, "step": 1778 }, { "epoch": 1.0389752572804904, "grad_norm": 0.09317763665202182, "learning_rate": 1.1063536314662301e-05, "loss": 0.3301, "step": 1779 }, { "epoch": 1.039559156265966, "grad_norm": 0.0966256309144721, "learning_rate": 1.1053396929713076e-05, "loss": 0.2998, "step": 1780 }, { "epoch": 1.0401430552514415, "grad_norm": 0.09504487706863517, "learning_rate": 1.1043256449526214e-05, "loss": 0.3219, "step": 1781 }, { "epoch": 1.040726954236917, "grad_norm": 0.08918459324468635, "learning_rate": 1.103311488464497e-05, "loss": 0.3109, "step": 1782 }, { "epoch": 1.0413108532223925, "grad_norm": 0.0918474215408648, "learning_rate": 1.1022972245613735e-05, "loss": 0.305, "step": 1783 }, { "epoch": 1.041894752207868, "grad_norm": 0.08462107211395671, "learning_rate": 1.101282854297801e-05, "loss": 0.2926, "step": 1784 }, { "epoch": 1.0424786511933435, "grad_norm": 0.09072289346470115, "learning_rate": 1.1002683787284403e-05, "loss": 0.3264, "step": 1785 }, { "epoch": 1.043062550178819, "grad_norm": 0.0935090548862524, "learning_rate": 1.0992537989080618e-05, "loss": 0.3445, "step": 1786 }, { "epoch": 1.0436464491642945, "grad_norm": 0.09065746162136963, "learning_rate": 1.0982391158915441e-05, "loss": 0.3532, "step": 1787 }, { "epoch": 1.0442303481497701, "grad_norm": 0.08277746364147143, "learning_rate": 1.0972243307338733e-05, "loss": 0.3175, "step": 1788 }, { "epoch": 1.0448142471352455, "grad_norm": 0.09102569684454244, "learning_rate": 1.0962094444901416e-05, "loss": 0.3543, "step": 1789 }, { "epoch": 1.0453981461207211, "grad_norm": 0.08654931812315818, "learning_rate": 1.0951944582155463e-05, "loss": 0.27, "step": 1790 }, { "epoch": 1.0459820451061965, "grad_norm": 0.08443920879025263, "learning_rate": 1.094179372965389e-05, "loss": 0.3083, "step": 1791 }, { "epoch": 1.0465659440916721, "grad_norm": 0.09100637531383425, "learning_rate": 1.0931641897950733e-05, "loss": 0.3189, "step": 1792 }, { "epoch": 1.0471498430771478, "grad_norm": 0.08708724976074679, "learning_rate": 1.0921489097601054e-05, "loss": 0.2998, "step": 1793 }, { "epoch": 1.0477337420626232, "grad_norm": 0.09440464444950313, "learning_rate": 1.0911335339160924e-05, "loss": 0.3201, "step": 1794 }, { "epoch": 1.0483176410480988, "grad_norm": 0.07940179542992595, "learning_rate": 1.090118063318741e-05, "loss": 0.2824, "step": 1795 }, { "epoch": 1.0489015400335742, "grad_norm": 0.08696254594646523, "learning_rate": 1.089102499023855e-05, "loss": 0.3343, "step": 1796 }, { "epoch": 1.0494854390190498, "grad_norm": 0.09034099232884503, "learning_rate": 1.0880868420873375e-05, "loss": 0.3052, "step": 1797 }, { "epoch": 1.0500693380045252, "grad_norm": 0.09545312974987578, "learning_rate": 1.0870710935651868e-05, "loss": 0.3483, "step": 1798 }, { "epoch": 1.0506532369900008, "grad_norm": 0.09098169895272872, "learning_rate": 1.086055254513497e-05, "loss": 0.3596, "step": 1799 }, { "epoch": 1.0512371359754762, "grad_norm": 0.08398210758519634, "learning_rate": 1.085039325988456e-05, "loss": 0.2997, "step": 1800 }, { "epoch": 1.0518210349609518, "grad_norm": 0.08325978007469563, "learning_rate": 1.0840233090463443e-05, "loss": 0.3242, "step": 1801 }, { "epoch": 1.0524049339464272, "grad_norm": 0.08205762992957062, "learning_rate": 1.0830072047435354e-05, "loss": 0.28, "step": 1802 }, { "epoch": 1.0529888329319028, "grad_norm": 0.08705762198247462, "learning_rate": 1.0819910141364929e-05, "loss": 0.3174, "step": 1803 }, { "epoch": 1.0535727319173782, "grad_norm": 0.09339054390883132, "learning_rate": 1.0809747382817702e-05, "loss": 0.3468, "step": 1804 }, { "epoch": 1.0541566309028538, "grad_norm": 0.08305773496625493, "learning_rate": 1.0799583782360097e-05, "loss": 0.3315, "step": 1805 }, { "epoch": 1.0547405298883292, "grad_norm": 0.09275474266434246, "learning_rate": 1.0789419350559407e-05, "loss": 0.336, "step": 1806 }, { "epoch": 1.0553244288738048, "grad_norm": 0.0918220752281563, "learning_rate": 1.0779254097983788e-05, "loss": 0.3042, "step": 1807 }, { "epoch": 1.0559083278592802, "grad_norm": 0.08495732270983779, "learning_rate": 1.0769088035202268e-05, "loss": 0.2862, "step": 1808 }, { "epoch": 1.0564922268447559, "grad_norm": 0.0823947096725146, "learning_rate": 1.0758921172784696e-05, "loss": 0.2985, "step": 1809 }, { "epoch": 1.0570761258302315, "grad_norm": 0.08895354390670823, "learning_rate": 1.0748753521301758e-05, "loss": 0.3215, "step": 1810 }, { "epoch": 1.0576600248157069, "grad_norm": 0.09204110722083321, "learning_rate": 1.0738585091324966e-05, "loss": 0.3118, "step": 1811 }, { "epoch": 1.0582439238011825, "grad_norm": 0.09793332391772008, "learning_rate": 1.0728415893426636e-05, "loss": 0.3356, "step": 1812 }, { "epoch": 1.0588278227866579, "grad_norm": 0.08497822766897264, "learning_rate": 1.0718245938179886e-05, "loss": 0.3087, "step": 1813 }, { "epoch": 1.0594117217721335, "grad_norm": 0.08725037107361513, "learning_rate": 1.0708075236158617e-05, "loss": 0.3019, "step": 1814 }, { "epoch": 1.059995620757609, "grad_norm": 0.0986405037846936, "learning_rate": 1.0697903797937513e-05, "loss": 0.3272, "step": 1815 }, { "epoch": 1.0605795197430845, "grad_norm": 0.08701582300692469, "learning_rate": 1.0687731634092016e-05, "loss": 0.2954, "step": 1816 }, { "epoch": 1.06116341872856, "grad_norm": 0.08565742496861144, "learning_rate": 1.0677558755198327e-05, "loss": 0.3072, "step": 1817 }, { "epoch": 1.0617473177140355, "grad_norm": 0.10088033632706317, "learning_rate": 1.0667385171833391e-05, "loss": 0.361, "step": 1818 }, { "epoch": 1.062331216699511, "grad_norm": 0.08555241852954845, "learning_rate": 1.0657210894574885e-05, "loss": 0.2954, "step": 1819 }, { "epoch": 1.0629151156849865, "grad_norm": 0.08589908397275763, "learning_rate": 1.0647035934001202e-05, "loss": 0.3039, "step": 1820 }, { "epoch": 1.063499014670462, "grad_norm": 0.08771724063287331, "learning_rate": 1.0636860300691452e-05, "loss": 0.331, "step": 1821 }, { "epoch": 1.0640829136559375, "grad_norm": 0.09144828037655571, "learning_rate": 1.0626684005225443e-05, "loss": 0.3532, "step": 1822 }, { "epoch": 1.064666812641413, "grad_norm": 0.08238224483012119, "learning_rate": 1.0616507058183674e-05, "loss": 0.3023, "step": 1823 }, { "epoch": 1.0652507116268886, "grad_norm": 0.0864738968887704, "learning_rate": 1.0606329470147313e-05, "loss": 0.3366, "step": 1824 }, { "epoch": 1.065834610612364, "grad_norm": 0.08700116308133421, "learning_rate": 1.05961512516982e-05, "loss": 0.3341, "step": 1825 }, { "epoch": 1.0664185095978396, "grad_norm": 0.0885502154996833, "learning_rate": 1.0585972413418833e-05, "loss": 0.3305, "step": 1826 }, { "epoch": 1.0670024085833152, "grad_norm": 0.09238001477041209, "learning_rate": 1.0575792965892349e-05, "loss": 0.3271, "step": 1827 }, { "epoch": 1.0675863075687906, "grad_norm": 0.08821177442598048, "learning_rate": 1.0565612919702527e-05, "loss": 0.3134, "step": 1828 }, { "epoch": 1.0681702065542662, "grad_norm": 0.08813767409040975, "learning_rate": 1.0555432285433754e-05, "loss": 0.2955, "step": 1829 }, { "epoch": 1.0687541055397416, "grad_norm": 0.09223985929801364, "learning_rate": 1.0545251073671041e-05, "loss": 0.3384, "step": 1830 }, { "epoch": 1.0693380045252172, "grad_norm": 0.09584611165114212, "learning_rate": 1.0535069294999995e-05, "loss": 0.3956, "step": 1831 }, { "epoch": 1.0699219035106926, "grad_norm": 0.08818534377937061, "learning_rate": 1.0524886960006813e-05, "loss": 0.3349, "step": 1832 }, { "epoch": 1.0705058024961682, "grad_norm": 0.08081784073926022, "learning_rate": 1.0514704079278273e-05, "loss": 0.3, "step": 1833 }, { "epoch": 1.0710897014816436, "grad_norm": 0.08440102137389577, "learning_rate": 1.0504520663401714e-05, "loss": 0.3237, "step": 1834 }, { "epoch": 1.0716736004671192, "grad_norm": 0.09460338585865075, "learning_rate": 1.049433672296503e-05, "loss": 0.3504, "step": 1835 }, { "epoch": 1.0722574994525946, "grad_norm": 0.08168308940352122, "learning_rate": 1.0484152268556677e-05, "loss": 0.285, "step": 1836 }, { "epoch": 1.0728413984380702, "grad_norm": 0.09570414267255133, "learning_rate": 1.0473967310765629e-05, "loss": 0.331, "step": 1837 }, { "epoch": 1.0734252974235456, "grad_norm": 0.08603942006520916, "learning_rate": 1.0463781860181385e-05, "loss": 0.3142, "step": 1838 }, { "epoch": 1.0740091964090213, "grad_norm": 0.08744984841012646, "learning_rate": 1.0453595927393962e-05, "loss": 0.34, "step": 1839 }, { "epoch": 1.0745930953944967, "grad_norm": 0.09837531677692957, "learning_rate": 1.0443409522993877e-05, "loss": 0.327, "step": 1840 }, { "epoch": 1.0751769943799723, "grad_norm": 0.08671600986218206, "learning_rate": 1.0433222657572135e-05, "loss": 0.3142, "step": 1841 }, { "epoch": 1.0757608933654477, "grad_norm": 0.08296083004751079, "learning_rate": 1.0423035341720222e-05, "loss": 0.3512, "step": 1842 }, { "epoch": 1.0763447923509233, "grad_norm": 0.08736797207507722, "learning_rate": 1.041284758603009e-05, "loss": 0.3169, "step": 1843 }, { "epoch": 1.076928691336399, "grad_norm": 0.08367169942816383, "learning_rate": 1.0402659401094154e-05, "loss": 0.2983, "step": 1844 }, { "epoch": 1.0775125903218743, "grad_norm": 0.0914572856474525, "learning_rate": 1.0392470797505268e-05, "loss": 0.3343, "step": 1845 }, { "epoch": 1.07809648930735, "grad_norm": 0.087325766441451, "learning_rate": 1.0382281785856725e-05, "loss": 0.3263, "step": 1846 }, { "epoch": 1.0786803882928253, "grad_norm": 0.08884332997802685, "learning_rate": 1.0372092376742247e-05, "loss": 0.3188, "step": 1847 }, { "epoch": 1.079264287278301, "grad_norm": 0.08449959572156981, "learning_rate": 1.0361902580755955e-05, "loss": 0.3126, "step": 1848 }, { "epoch": 1.0798481862637763, "grad_norm": 0.09755365400596099, "learning_rate": 1.035171240849239e-05, "loss": 0.3729, "step": 1849 }, { "epoch": 1.080432085249252, "grad_norm": 0.0798355678383743, "learning_rate": 1.0341521870546472e-05, "loss": 0.2692, "step": 1850 }, { "epoch": 1.0810159842347273, "grad_norm": 0.08474458158906627, "learning_rate": 1.033133097751351e-05, "loss": 0.3091, "step": 1851 }, { "epoch": 1.081599883220203, "grad_norm": 0.09055817031957668, "learning_rate": 1.0321139739989167e-05, "loss": 0.3062, "step": 1852 }, { "epoch": 1.0821837822056783, "grad_norm": 0.08499730785544848, "learning_rate": 1.0310948168569483e-05, "loss": 0.2947, "step": 1853 }, { "epoch": 1.082767681191154, "grad_norm": 0.09419827520378779, "learning_rate": 1.0300756273850837e-05, "loss": 0.3677, "step": 1854 }, { "epoch": 1.0833515801766294, "grad_norm": 0.0855954167821771, "learning_rate": 1.0290564066429935e-05, "loss": 0.3064, "step": 1855 }, { "epoch": 1.083935479162105, "grad_norm": 0.08880043184263535, "learning_rate": 1.0280371556903827e-05, "loss": 0.3333, "step": 1856 }, { "epoch": 1.0845193781475804, "grad_norm": 0.08695159762258424, "learning_rate": 1.0270178755869861e-05, "loss": 0.3261, "step": 1857 }, { "epoch": 1.085103277133056, "grad_norm": 0.08863928195331323, "learning_rate": 1.0259985673925694e-05, "loss": 0.339, "step": 1858 }, { "epoch": 1.0856871761185314, "grad_norm": 0.08698450633784642, "learning_rate": 1.0249792321669276e-05, "loss": 0.2891, "step": 1859 }, { "epoch": 1.086271075104007, "grad_norm": 0.0853393505159533, "learning_rate": 1.0239598709698839e-05, "loss": 0.3222, "step": 1860 }, { "epoch": 1.0868549740894826, "grad_norm": 0.09233970723375678, "learning_rate": 1.0229404848612882e-05, "loss": 0.3325, "step": 1861 }, { "epoch": 1.087438873074958, "grad_norm": 0.08193341800469768, "learning_rate": 1.0219210749010162e-05, "loss": 0.2891, "step": 1862 }, { "epoch": 1.0880227720604336, "grad_norm": 0.09658613581376245, "learning_rate": 1.0209016421489685e-05, "loss": 0.3395, "step": 1863 }, { "epoch": 1.088606671045909, "grad_norm": 0.09293231326065574, "learning_rate": 1.0198821876650702e-05, "loss": 0.3455, "step": 1864 }, { "epoch": 1.0891905700313846, "grad_norm": 0.08363323242126271, "learning_rate": 1.0188627125092678e-05, "loss": 0.3031, "step": 1865 }, { "epoch": 1.08977446901686, "grad_norm": 0.08927798312955061, "learning_rate": 1.0178432177415298e-05, "loss": 0.3009, "step": 1866 }, { "epoch": 1.0903583680023357, "grad_norm": 0.08920022205359651, "learning_rate": 1.0168237044218452e-05, "loss": 0.3167, "step": 1867 }, { "epoch": 1.090942266987811, "grad_norm": 0.09239922479066046, "learning_rate": 1.0158041736102221e-05, "loss": 0.3002, "step": 1868 }, { "epoch": 1.0915261659732867, "grad_norm": 0.09075736459099783, "learning_rate": 1.014784626366687e-05, "loss": 0.3444, "step": 1869 }, { "epoch": 1.092110064958762, "grad_norm": 0.09041994274214117, "learning_rate": 1.0137650637512835e-05, "loss": 0.32, "step": 1870 }, { "epoch": 1.0926939639442377, "grad_norm": 0.09659874803697359, "learning_rate": 1.0127454868240702e-05, "loss": 0.3601, "step": 1871 }, { "epoch": 1.093277862929713, "grad_norm": 0.09584988955584268, "learning_rate": 1.0117258966451224e-05, "loss": 0.3202, "step": 1872 }, { "epoch": 1.0938617619151887, "grad_norm": 0.09671368950591339, "learning_rate": 1.0107062942745276e-05, "loss": 0.3322, "step": 1873 }, { "epoch": 1.094445660900664, "grad_norm": 0.0836913594554645, "learning_rate": 1.0096866807723868e-05, "loss": 0.3187, "step": 1874 }, { "epoch": 1.0950295598861397, "grad_norm": 0.08246711998357648, "learning_rate": 1.0086670571988124e-05, "loss": 0.3018, "step": 1875 }, { "epoch": 1.095613458871615, "grad_norm": 0.08316662828728885, "learning_rate": 1.0076474246139272e-05, "loss": 0.3144, "step": 1876 }, { "epoch": 1.0961973578570907, "grad_norm": 0.08314014070036406, "learning_rate": 1.0066277840778626e-05, "loss": 0.2733, "step": 1877 }, { "epoch": 1.0967812568425663, "grad_norm": 0.0969711619450715, "learning_rate": 1.0056081366507602e-05, "loss": 0.3841, "step": 1878 }, { "epoch": 1.0973651558280417, "grad_norm": 0.08873512772939389, "learning_rate": 1.0045884833927673e-05, "loss": 0.3323, "step": 1879 }, { "epoch": 1.0979490548135173, "grad_norm": 0.08317703331447844, "learning_rate": 1.0035688253640372e-05, "loss": 0.2844, "step": 1880 }, { "epoch": 1.0985329537989927, "grad_norm": 0.09816030731071965, "learning_rate": 1.0025491636247287e-05, "loss": 0.3839, "step": 1881 }, { "epoch": 1.0991168527844684, "grad_norm": 0.0919617639034204, "learning_rate": 1.0015294992350044e-05, "loss": 0.3389, "step": 1882 }, { "epoch": 1.0997007517699438, "grad_norm": 0.1430338178803504, "learning_rate": 1.0005098332550293e-05, "loss": 0.3064, "step": 1883 }, { "epoch": 1.1002846507554194, "grad_norm": 0.08969630666050342, "learning_rate": 9.994901667449708e-06, "loss": 0.3305, "step": 1884 }, { "epoch": 1.1008685497408948, "grad_norm": 0.08887734274438952, "learning_rate": 9.98470500764996e-06, "loss": 0.3914, "step": 1885 }, { "epoch": 1.1014524487263704, "grad_norm": 0.08284574815966764, "learning_rate": 9.974508363752715e-06, "loss": 0.3, "step": 1886 }, { "epoch": 1.1020363477118458, "grad_norm": 0.0894401203035605, "learning_rate": 9.964311746359631e-06, "loss": 0.3285, "step": 1887 }, { "epoch": 1.1026202466973214, "grad_norm": 0.08096966420098195, "learning_rate": 9.95411516607233e-06, "loss": 0.317, "step": 1888 }, { "epoch": 1.1032041456827968, "grad_norm": 0.09045924363599013, "learning_rate": 9.943918633492401e-06, "loss": 0.3198, "step": 1889 }, { "epoch": 1.1037880446682724, "grad_norm": 0.0961728363021819, "learning_rate": 9.933722159221375e-06, "loss": 0.3504, "step": 1890 }, { "epoch": 1.1043719436537478, "grad_norm": 0.09781963319967386, "learning_rate": 9.923525753860735e-06, "loss": 0.3531, "step": 1891 }, { "epoch": 1.1049558426392234, "grad_norm": 0.07700948780130926, "learning_rate": 9.91332942801188e-06, "loss": 0.2632, "step": 1892 }, { "epoch": 1.1055397416246988, "grad_norm": 0.10381585622312958, "learning_rate": 9.903133192276134e-06, "loss": 0.3634, "step": 1893 }, { "epoch": 1.1061236406101744, "grad_norm": 0.08372716288754395, "learning_rate": 9.892937057254729e-06, "loss": 0.3124, "step": 1894 }, { "epoch": 1.10670753959565, "grad_norm": 0.08606131890910844, "learning_rate": 9.882741033548781e-06, "loss": 0.3241, "step": 1895 }, { "epoch": 1.1072914385811254, "grad_norm": 0.08933769886009113, "learning_rate": 9.872545131759301e-06, "loss": 0.3221, "step": 1896 }, { "epoch": 1.107875337566601, "grad_norm": 0.09375714221407187, "learning_rate": 9.862349362487172e-06, "loss": 0.3655, "step": 1897 }, { "epoch": 1.1084592365520765, "grad_norm": 0.0844211548116875, "learning_rate": 9.85215373633313e-06, "loss": 0.2877, "step": 1898 }, { "epoch": 1.109043135537552, "grad_norm": 0.09170920536836549, "learning_rate": 9.841958263897779e-06, "loss": 0.3408, "step": 1899 }, { "epoch": 1.1096270345230275, "grad_norm": 0.08561309229165015, "learning_rate": 9.831762955781548e-06, "loss": 0.3001, "step": 1900 }, { "epoch": 1.110210933508503, "grad_norm": 0.09619562532968921, "learning_rate": 9.8215678225847e-06, "loss": 0.3388, "step": 1901 }, { "epoch": 1.1107948324939785, "grad_norm": 0.08865470366946045, "learning_rate": 9.811372874907323e-06, "loss": 0.325, "step": 1902 }, { "epoch": 1.111378731479454, "grad_norm": 0.09322801354294509, "learning_rate": 9.801178123349298e-06, "loss": 0.3512, "step": 1903 }, { "epoch": 1.1119626304649295, "grad_norm": 0.08849430046272884, "learning_rate": 9.790983578510315e-06, "loss": 0.3579, "step": 1904 }, { "epoch": 1.112546529450405, "grad_norm": 0.0839806986970727, "learning_rate": 9.780789250989841e-06, "loss": 0.2916, "step": 1905 }, { "epoch": 1.1131304284358805, "grad_norm": 0.08739963458107147, "learning_rate": 9.77059515138712e-06, "loss": 0.3144, "step": 1906 }, { "epoch": 1.1137143274213561, "grad_norm": 0.08930541328054799, "learning_rate": 9.760401290301164e-06, "loss": 0.3166, "step": 1907 }, { "epoch": 1.1142982264068315, "grad_norm": 0.08942268759451988, "learning_rate": 9.750207678330726e-06, "loss": 0.3239, "step": 1908 }, { "epoch": 1.1148821253923071, "grad_norm": 0.087287888277298, "learning_rate": 9.740014326074308e-06, "loss": 0.3096, "step": 1909 }, { "epoch": 1.1154660243777825, "grad_norm": 0.09357274249922583, "learning_rate": 9.729821244130142e-06, "loss": 0.3216, "step": 1910 }, { "epoch": 1.1160499233632581, "grad_norm": 0.09014340054250404, "learning_rate": 9.719628443096175e-06, "loss": 0.3388, "step": 1911 }, { "epoch": 1.1166338223487338, "grad_norm": 0.10172751419250832, "learning_rate": 9.709435933570068e-06, "loss": 0.326, "step": 1912 }, { "epoch": 1.1172177213342092, "grad_norm": 0.09527758583818047, "learning_rate": 9.699243726149168e-06, "loss": 0.3219, "step": 1913 }, { "epoch": 1.1178016203196848, "grad_norm": 0.08782673389742597, "learning_rate": 9.689051831430518e-06, "loss": 0.3209, "step": 1914 }, { "epoch": 1.1183855193051602, "grad_norm": 0.0833973102401854, "learning_rate": 9.678860260010834e-06, "loss": 0.3015, "step": 1915 }, { "epoch": 1.1189694182906358, "grad_norm": 0.08312978262038574, "learning_rate": 9.668669022486495e-06, "loss": 0.2986, "step": 1916 }, { "epoch": 1.1195533172761112, "grad_norm": 0.09797778142724853, "learning_rate": 9.658478129453532e-06, "loss": 0.328, "step": 1917 }, { "epoch": 1.1201372162615868, "grad_norm": 0.0900397534880525, "learning_rate": 9.648287591507613e-06, "loss": 0.3307, "step": 1918 }, { "epoch": 1.1207211152470622, "grad_norm": 0.09122510882395748, "learning_rate": 9.638097419244048e-06, "loss": 0.3453, "step": 1919 }, { "epoch": 1.1213050142325378, "grad_norm": 0.09514536094057578, "learning_rate": 9.627907623257758e-06, "loss": 0.3462, "step": 1920 }, { "epoch": 1.1218889132180132, "grad_norm": 0.08580790563547075, "learning_rate": 9.617718214143279e-06, "loss": 0.3246, "step": 1921 }, { "epoch": 1.1224728122034888, "grad_norm": 0.09461438400718661, "learning_rate": 9.607529202494739e-06, "loss": 0.2925, "step": 1922 }, { "epoch": 1.1230567111889642, "grad_norm": 0.10266767080486297, "learning_rate": 9.597340598905851e-06, "loss": 0.366, "step": 1923 }, { "epoch": 1.1236406101744398, "grad_norm": 0.08830258617501557, "learning_rate": 9.587152413969915e-06, "loss": 0.3206, "step": 1924 }, { "epoch": 1.1242245091599152, "grad_norm": 0.08497790846806094, "learning_rate": 9.576964658279783e-06, "loss": 0.312, "step": 1925 }, { "epoch": 1.1248084081453908, "grad_norm": 0.08768361196031786, "learning_rate": 9.566777342427867e-06, "loss": 0.3032, "step": 1926 }, { "epoch": 1.1253923071308662, "grad_norm": 0.09572531659494779, "learning_rate": 9.556590477006123e-06, "loss": 0.3334, "step": 1927 }, { "epoch": 1.1259762061163419, "grad_norm": 0.0833409548899631, "learning_rate": 9.546404072606038e-06, "loss": 0.3241, "step": 1928 }, { "epoch": 1.1265601051018175, "grad_norm": 0.08506249588832869, "learning_rate": 9.536218139818615e-06, "loss": 0.3102, "step": 1929 }, { "epoch": 1.1271440040872929, "grad_norm": 0.08785540837613791, "learning_rate": 9.526032689234374e-06, "loss": 0.3364, "step": 1930 }, { "epoch": 1.1277279030727685, "grad_norm": 0.09528428809206664, "learning_rate": 9.515847731443324e-06, "loss": 0.2891, "step": 1931 }, { "epoch": 1.1283118020582439, "grad_norm": 0.0820207602412144, "learning_rate": 9.50566327703497e-06, "loss": 0.312, "step": 1932 }, { "epoch": 1.1288957010437195, "grad_norm": 0.09513606956356921, "learning_rate": 9.49547933659829e-06, "loss": 0.33, "step": 1933 }, { "epoch": 1.129479600029195, "grad_norm": 0.09053407255165143, "learning_rate": 9.48529592072173e-06, "loss": 0.3263, "step": 1934 }, { "epoch": 1.1300634990146705, "grad_norm": 0.08174814059015802, "learning_rate": 9.475113039993188e-06, "loss": 0.2969, "step": 1935 }, { "epoch": 1.130647398000146, "grad_norm": 0.08968961864684474, "learning_rate": 9.464930705000008e-06, "loss": 0.3228, "step": 1936 }, { "epoch": 1.1312312969856215, "grad_norm": 0.08752396321148621, "learning_rate": 9.454748926328962e-06, "loss": 0.3263, "step": 1937 }, { "epoch": 1.131815195971097, "grad_norm": 0.09375994570464974, "learning_rate": 9.44456771456625e-06, "loss": 0.3204, "step": 1938 }, { "epoch": 1.1323990949565725, "grad_norm": 0.09128758336763895, "learning_rate": 9.434387080297477e-06, "loss": 0.3323, "step": 1939 }, { "epoch": 1.132982993942048, "grad_norm": 0.08891429676296055, "learning_rate": 9.424207034107653e-06, "loss": 0.3263, "step": 1940 }, { "epoch": 1.1335668929275236, "grad_norm": 0.08603902463288904, "learning_rate": 9.41402758658117e-06, "loss": 0.3181, "step": 1941 }, { "epoch": 1.1341507919129992, "grad_norm": 0.08316627561233629, "learning_rate": 9.403848748301802e-06, "loss": 0.2884, "step": 1942 }, { "epoch": 1.1347346908984746, "grad_norm": 0.08859982542969037, "learning_rate": 9.39367052985269e-06, "loss": 0.3306, "step": 1943 }, { "epoch": 1.13531858988395, "grad_norm": 0.08805590955723738, "learning_rate": 9.38349294181633e-06, "loss": 0.2987, "step": 1944 }, { "epoch": 1.1359024888694256, "grad_norm": 0.08735730105646539, "learning_rate": 9.373315994774558e-06, "loss": 0.3065, "step": 1945 }, { "epoch": 1.1364863878549012, "grad_norm": 0.09180776090936667, "learning_rate": 9.363139699308552e-06, "loss": 0.3253, "step": 1946 }, { "epoch": 1.1370702868403766, "grad_norm": 0.09332972521588934, "learning_rate": 9.352964065998801e-06, "loss": 0.3477, "step": 1947 }, { "epoch": 1.1376541858258522, "grad_norm": 0.08968396373900739, "learning_rate": 9.34278910542512e-06, "loss": 0.3336, "step": 1948 }, { "epoch": 1.1382380848113276, "grad_norm": 0.0966767117247458, "learning_rate": 9.332614828166612e-06, "loss": 0.3856, "step": 1949 }, { "epoch": 1.1388219837968032, "grad_norm": 0.0957729876613784, "learning_rate": 9.322441244801678e-06, "loss": 0.3585, "step": 1950 }, { "epoch": 1.1394058827822786, "grad_norm": 0.08632002711949566, "learning_rate": 9.312268365907989e-06, "loss": 0.3185, "step": 1951 }, { "epoch": 1.1399897817677542, "grad_norm": 0.08727042433136854, "learning_rate": 9.302096202062492e-06, "loss": 0.3146, "step": 1952 }, { "epoch": 1.1405736807532296, "grad_norm": 0.08373498983830703, "learning_rate": 9.291924763841387e-06, "loss": 0.3011, "step": 1953 }, { "epoch": 1.1411575797387052, "grad_norm": 0.09235522765070184, "learning_rate": 9.281754061820116e-06, "loss": 0.361, "step": 1954 }, { "epoch": 1.1417414787241806, "grad_norm": 0.08406362317179852, "learning_rate": 9.271584106573364e-06, "loss": 0.2832, "step": 1955 }, { "epoch": 1.1423253777096563, "grad_norm": 0.09640686771451105, "learning_rate": 9.261414908675036e-06, "loss": 0.3405, "step": 1956 }, { "epoch": 1.1429092766951316, "grad_norm": 0.08731359097887899, "learning_rate": 9.251246478698242e-06, "loss": 0.3027, "step": 1957 }, { "epoch": 1.1434931756806073, "grad_norm": 0.09099597240564668, "learning_rate": 9.241078827215305e-06, "loss": 0.3701, "step": 1958 }, { "epoch": 1.1440770746660829, "grad_norm": 0.0934758567949581, "learning_rate": 9.230911964797734e-06, "loss": 0.3394, "step": 1959 }, { "epoch": 1.1446609736515583, "grad_norm": 0.09791467101575654, "learning_rate": 9.22074590201621e-06, "loss": 0.3817, "step": 1960 }, { "epoch": 1.1452448726370337, "grad_norm": 0.0846062991116479, "learning_rate": 9.210580649440598e-06, "loss": 0.297, "step": 1961 }, { "epoch": 1.1458287716225093, "grad_norm": 0.08955422135146658, "learning_rate": 9.200416217639906e-06, "loss": 0.3299, "step": 1962 }, { "epoch": 1.146412670607985, "grad_norm": 0.08437447519937563, "learning_rate": 9.190252617182301e-06, "loss": 0.2956, "step": 1963 }, { "epoch": 1.1469965695934603, "grad_norm": 0.08600961299430737, "learning_rate": 9.180089858635075e-06, "loss": 0.3104, "step": 1964 }, { "epoch": 1.147580468578936, "grad_norm": 0.09135076190312309, "learning_rate": 9.169927952564649e-06, "loss": 0.3235, "step": 1965 }, { "epoch": 1.1481643675644113, "grad_norm": 0.08924205568915256, "learning_rate": 9.159766909536559e-06, "loss": 0.3503, "step": 1966 }, { "epoch": 1.148748266549887, "grad_norm": 0.08630957600350984, "learning_rate": 9.149606740115444e-06, "loss": 0.3378, "step": 1967 }, { "epoch": 1.1493321655353623, "grad_norm": 0.08478959580115475, "learning_rate": 9.139447454865034e-06, "loss": 0.3015, "step": 1968 }, { "epoch": 1.149916064520838, "grad_norm": 0.09209025263326069, "learning_rate": 9.129289064348135e-06, "loss": 0.3241, "step": 1969 }, { "epoch": 1.1504999635063133, "grad_norm": 0.09535605313193664, "learning_rate": 9.119131579126628e-06, "loss": 0.357, "step": 1970 }, { "epoch": 1.151083862491789, "grad_norm": 0.08441604595612168, "learning_rate": 9.108975009761452e-06, "loss": 0.3272, "step": 1971 }, { "epoch": 1.1516677614772644, "grad_norm": 0.08552560626628097, "learning_rate": 9.098819366812594e-06, "loss": 0.309, "step": 1972 }, { "epoch": 1.15225166046274, "grad_norm": 0.09104428709753967, "learning_rate": 9.088664660839078e-06, "loss": 0.3534, "step": 1973 }, { "epoch": 1.1528355594482154, "grad_norm": 0.08578805965977553, "learning_rate": 9.078510902398948e-06, "loss": 0.3063, "step": 1974 }, { "epoch": 1.153419458433691, "grad_norm": 0.08792549249626005, "learning_rate": 9.068358102049272e-06, "loss": 0.3177, "step": 1975 }, { "epoch": 1.1540033574191666, "grad_norm": 0.08903501456377956, "learning_rate": 9.058206270346115e-06, "loss": 0.3139, "step": 1976 }, { "epoch": 1.154587256404642, "grad_norm": 0.08620546090520767, "learning_rate": 9.04805541784454e-06, "loss": 0.3306, "step": 1977 }, { "epoch": 1.1551711553901174, "grad_norm": 0.08364832199458826, "learning_rate": 9.037905555098589e-06, "loss": 0.3213, "step": 1978 }, { "epoch": 1.155755054375593, "grad_norm": 0.08203477810501263, "learning_rate": 9.027756692661272e-06, "loss": 0.2928, "step": 1979 }, { "epoch": 1.1563389533610686, "grad_norm": 0.0943705330725603, "learning_rate": 9.017608841084564e-06, "loss": 0.3417, "step": 1980 }, { "epoch": 1.156922852346544, "grad_norm": 0.0866561097549172, "learning_rate": 9.007462010919387e-06, "loss": 0.3153, "step": 1981 }, { "epoch": 1.1575067513320196, "grad_norm": 0.10315055873689433, "learning_rate": 8.997316212715599e-06, "loss": 0.3257, "step": 1982 }, { "epoch": 1.158090650317495, "grad_norm": 0.10412408795232994, "learning_rate": 8.987171457021992e-06, "loss": 0.3875, "step": 1983 }, { "epoch": 1.1586745493029706, "grad_norm": 0.09600748096188333, "learning_rate": 8.977027754386267e-06, "loss": 0.3222, "step": 1984 }, { "epoch": 1.159258448288446, "grad_norm": 0.09419014812585443, "learning_rate": 8.966885115355033e-06, "loss": 0.344, "step": 1985 }, { "epoch": 1.1598423472739217, "grad_norm": 0.08582729645896782, "learning_rate": 8.95674355047379e-06, "loss": 0.3164, "step": 1986 }, { "epoch": 1.160426246259397, "grad_norm": 0.09315531180713053, "learning_rate": 8.946603070286926e-06, "loss": 0.3348, "step": 1987 }, { "epoch": 1.1610101452448727, "grad_norm": 0.0849087806740082, "learning_rate": 8.936463685337697e-06, "loss": 0.3056, "step": 1988 }, { "epoch": 1.161594044230348, "grad_norm": 0.09571092298918632, "learning_rate": 8.926325406168225e-06, "loss": 0.3277, "step": 1989 }, { "epoch": 1.1621779432158237, "grad_norm": 0.09010140333430028, "learning_rate": 8.91618824331948e-06, "loss": 0.2901, "step": 1990 }, { "epoch": 1.162761842201299, "grad_norm": 0.09215159439698076, "learning_rate": 8.906052207331268e-06, "loss": 0.3438, "step": 1991 }, { "epoch": 1.1633457411867747, "grad_norm": 0.08874987010085279, "learning_rate": 8.895917308742224e-06, "loss": 0.3045, "step": 1992 }, { "epoch": 1.1639296401722503, "grad_norm": 0.09209099382330496, "learning_rate": 8.88578355808981e-06, "loss": 0.3206, "step": 1993 }, { "epoch": 1.1645135391577257, "grad_norm": 0.09191850533586973, "learning_rate": 8.87565096591028e-06, "loss": 0.3459, "step": 1994 }, { "epoch": 1.165097438143201, "grad_norm": 0.08711688936953038, "learning_rate": 8.865519542738696e-06, "loss": 0.3184, "step": 1995 }, { "epoch": 1.1656813371286767, "grad_norm": 0.09089400955444339, "learning_rate": 8.855389299108894e-06, "loss": 0.3755, "step": 1996 }, { "epoch": 1.1662652361141523, "grad_norm": 0.08583881794835398, "learning_rate": 8.845260245553493e-06, "loss": 0.32, "step": 1997 }, { "epoch": 1.1668491350996277, "grad_norm": 0.08700639570830677, "learning_rate": 8.83513239260387e-06, "loss": 0.2833, "step": 1998 }, { "epoch": 1.1674330340851033, "grad_norm": 0.08917896242287013, "learning_rate": 8.82500575079015e-06, "loss": 0.3628, "step": 1999 }, { "epoch": 1.1680169330705787, "grad_norm": 0.09168922042834127, "learning_rate": 8.81488033064121e-06, "loss": 0.2978, "step": 2000 }, { "epoch": 1.1686008320560544, "grad_norm": 0.08921478177348807, "learning_rate": 8.804756142684644e-06, "loss": 0.3172, "step": 2001 }, { "epoch": 1.1691847310415298, "grad_norm": 0.09580893013973872, "learning_rate": 8.79463319744677e-06, "loss": 0.3298, "step": 2002 }, { "epoch": 1.1697686300270054, "grad_norm": 0.09625345608049736, "learning_rate": 8.78451150545262e-06, "loss": 0.3343, "step": 2003 }, { "epoch": 1.1703525290124808, "grad_norm": 0.08163289497967964, "learning_rate": 8.774391077225914e-06, "loss": 0.2668, "step": 2004 }, { "epoch": 1.1709364279979564, "grad_norm": 0.08816432407283772, "learning_rate": 8.764271923289064e-06, "loss": 0.325, "step": 2005 }, { "epoch": 1.1715203269834318, "grad_norm": 0.08382790425962251, "learning_rate": 8.754154054163148e-06, "loss": 0.3126, "step": 2006 }, { "epoch": 1.1721042259689074, "grad_norm": 0.08870541389227733, "learning_rate": 8.744037480367922e-06, "loss": 0.3151, "step": 2007 }, { "epoch": 1.1726881249543828, "grad_norm": 0.09016727144100255, "learning_rate": 8.733922212421785e-06, "loss": 0.3214, "step": 2008 }, { "epoch": 1.1732720239398584, "grad_norm": 0.09283759128709923, "learning_rate": 8.723808260841781e-06, "loss": 0.3605, "step": 2009 }, { "epoch": 1.173855922925334, "grad_norm": 0.09199909057024754, "learning_rate": 8.713695636143584e-06, "loss": 0.3184, "step": 2010 }, { "epoch": 1.1744398219108094, "grad_norm": 0.08242929049335476, "learning_rate": 8.703584348841494e-06, "loss": 0.2979, "step": 2011 }, { "epoch": 1.1750237208962848, "grad_norm": 0.08885816849241786, "learning_rate": 8.693474409448416e-06, "loss": 0.3025, "step": 2012 }, { "epoch": 1.1756076198817604, "grad_norm": 0.09333264452357838, "learning_rate": 8.683365828475855e-06, "loss": 0.3461, "step": 2013 }, { "epoch": 1.176191518867236, "grad_norm": 0.08360197153753089, "learning_rate": 8.673258616433898e-06, "loss": 0.2985, "step": 2014 }, { "epoch": 1.1767754178527114, "grad_norm": 0.09194357575864455, "learning_rate": 8.663152783831215e-06, "loss": 0.3466, "step": 2015 }, { "epoch": 1.177359316838187, "grad_norm": 0.08120014567926066, "learning_rate": 8.653048341175044e-06, "loss": 0.2826, "step": 2016 }, { "epoch": 1.1779432158236625, "grad_norm": 0.08311416906072153, "learning_rate": 8.642945298971168e-06, "loss": 0.3075, "step": 2017 }, { "epoch": 1.178527114809138, "grad_norm": 0.08233729709598747, "learning_rate": 8.632843667723927e-06, "loss": 0.285, "step": 2018 }, { "epoch": 1.1791110137946135, "grad_norm": 0.08390984581262785, "learning_rate": 8.62274345793618e-06, "loss": 0.3399, "step": 2019 }, { "epoch": 1.179694912780089, "grad_norm": 0.0869319790293869, "learning_rate": 8.61264468010932e-06, "loss": 0.3141, "step": 2020 }, { "epoch": 1.1802788117655645, "grad_norm": 0.08851467308498265, "learning_rate": 8.602547344743241e-06, "loss": 0.3592, "step": 2021 }, { "epoch": 1.18086271075104, "grad_norm": 0.08101212694365587, "learning_rate": 8.592451462336348e-06, "loss": 0.3125, "step": 2022 }, { "epoch": 1.1814466097365155, "grad_norm": 0.08472653040240707, "learning_rate": 8.582357043385529e-06, "loss": 0.2957, "step": 2023 }, { "epoch": 1.1820305087219911, "grad_norm": 0.10008002362319894, "learning_rate": 8.572264098386149e-06, "loss": 0.3689, "step": 2024 }, { "epoch": 1.1826144077074665, "grad_norm": 0.09390956261412838, "learning_rate": 8.562172637832041e-06, "loss": 0.3228, "step": 2025 }, { "epoch": 1.1831983066929421, "grad_norm": 0.08257694493375262, "learning_rate": 8.5520826722155e-06, "loss": 0.2991, "step": 2026 }, { "epoch": 1.1837822056784177, "grad_norm": 0.08846060031031942, "learning_rate": 8.54199421202726e-06, "loss": 0.3474, "step": 2027 }, { "epoch": 1.1843661046638931, "grad_norm": 0.08760490040893816, "learning_rate": 8.531907267756498e-06, "loss": 0.2954, "step": 2028 }, { "epoch": 1.1849500036493688, "grad_norm": 0.0881520255609251, "learning_rate": 8.521821849890802e-06, "loss": 0.3229, "step": 2029 }, { "epoch": 1.1855339026348442, "grad_norm": 0.08960567102226359, "learning_rate": 8.511737968916185e-06, "loss": 0.3792, "step": 2030 }, { "epoch": 1.1861178016203198, "grad_norm": 0.09367771322276644, "learning_rate": 8.50165563531706e-06, "loss": 0.3666, "step": 2031 }, { "epoch": 1.1867017006057952, "grad_norm": 0.0837944399014834, "learning_rate": 8.491574859576222e-06, "loss": 0.2892, "step": 2032 }, { "epoch": 1.1872855995912708, "grad_norm": 0.08225297692267265, "learning_rate": 8.481495652174859e-06, "loss": 0.3051, "step": 2033 }, { "epoch": 1.1878694985767462, "grad_norm": 0.08742707413766022, "learning_rate": 8.47141802359252e-06, "loss": 0.3146, "step": 2034 }, { "epoch": 1.1884533975622218, "grad_norm": 0.08994239657199503, "learning_rate": 8.461341984307115e-06, "loss": 0.3299, "step": 2035 }, { "epoch": 1.1890372965476972, "grad_norm": 0.08201275912133438, "learning_rate": 8.4512675447949e-06, "loss": 0.301, "step": 2036 }, { "epoch": 1.1896211955331728, "grad_norm": 0.08544468398956115, "learning_rate": 8.441194715530472e-06, "loss": 0.3248, "step": 2037 }, { "epoch": 1.1902050945186482, "grad_norm": 0.07917384969846024, "learning_rate": 8.431123506986747e-06, "loss": 0.3008, "step": 2038 }, { "epoch": 1.1907889935041238, "grad_norm": 0.08697198376468179, "learning_rate": 8.421053929634966e-06, "loss": 0.3307, "step": 2039 }, { "epoch": 1.1913728924895992, "grad_norm": 0.09034355784874654, "learning_rate": 8.410985993944663e-06, "loss": 0.317, "step": 2040 }, { "epoch": 1.1919567914750748, "grad_norm": 0.08822924896716082, "learning_rate": 8.400919710383673e-06, "loss": 0.3262, "step": 2041 }, { "epoch": 1.1925406904605502, "grad_norm": 0.08160401491471869, "learning_rate": 8.390855089418109e-06, "loss": 0.2959, "step": 2042 }, { "epoch": 1.1931245894460258, "grad_norm": 0.08020482500602538, "learning_rate": 8.380792141512355e-06, "loss": 0.299, "step": 2043 }, { "epoch": 1.1937084884315015, "grad_norm": 0.07972166356272198, "learning_rate": 8.37073087712906e-06, "loss": 0.2866, "step": 2044 }, { "epoch": 1.1942923874169769, "grad_norm": 0.08186396802799997, "learning_rate": 8.360671306729114e-06, "loss": 0.2905, "step": 2045 }, { "epoch": 1.1948762864024525, "grad_norm": 0.08542179804421467, "learning_rate": 8.350613440771661e-06, "loss": 0.286, "step": 2046 }, { "epoch": 1.1954601853879279, "grad_norm": 0.08268229747314315, "learning_rate": 8.340557289714055e-06, "loss": 0.2816, "step": 2047 }, { "epoch": 1.1960440843734035, "grad_norm": 0.08749688287023968, "learning_rate": 8.330502864011878e-06, "loss": 0.3051, "step": 2048 }, { "epoch": 1.1966279833588789, "grad_norm": 0.08857566411672606, "learning_rate": 8.320450174118914e-06, "loss": 0.2941, "step": 2049 }, { "epoch": 1.1972118823443545, "grad_norm": 0.09193512992700782, "learning_rate": 8.310399230487148e-06, "loss": 0.3537, "step": 2050 }, { "epoch": 1.19779578132983, "grad_norm": 0.08335475440323102, "learning_rate": 8.300350043566742e-06, "loss": 0.3284, "step": 2051 }, { "epoch": 1.1983796803153055, "grad_norm": 0.0871628213314298, "learning_rate": 8.290302623806035e-06, "loss": 0.2961, "step": 2052 }, { "epoch": 1.198963579300781, "grad_norm": 0.08262678793966316, "learning_rate": 8.280256981651527e-06, "loss": 0.3304, "step": 2053 }, { "epoch": 1.1995474782862565, "grad_norm": 0.09047889807206126, "learning_rate": 8.270213127547871e-06, "loss": 0.3301, "step": 2054 }, { "epoch": 1.200131377271732, "grad_norm": 0.08604162302837294, "learning_rate": 8.260171071937863e-06, "loss": 0.323, "step": 2055 }, { "epoch": 1.2007152762572075, "grad_norm": 0.08835381189874468, "learning_rate": 8.250130825262426e-06, "loss": 0.2939, "step": 2056 }, { "epoch": 1.201299175242683, "grad_norm": 0.09280176342913644, "learning_rate": 8.240092397960601e-06, "loss": 0.3371, "step": 2057 }, { "epoch": 1.2018830742281585, "grad_norm": 0.08959884361996777, "learning_rate": 8.230055800469543e-06, "loss": 0.3305, "step": 2058 }, { "epoch": 1.202466973213634, "grad_norm": 0.07894014303361871, "learning_rate": 8.2200210432245e-06, "loss": 0.2723, "step": 2059 }, { "epoch": 1.2030508721991096, "grad_norm": 0.08087452601801001, "learning_rate": 8.209988136658805e-06, "loss": 0.2994, "step": 2060 }, { "epoch": 1.2036347711845852, "grad_norm": 0.08658670145464534, "learning_rate": 8.199957091203876e-06, "loss": 0.3024, "step": 2061 }, { "epoch": 1.2042186701700606, "grad_norm": 0.08900777542381434, "learning_rate": 8.189927917289182e-06, "loss": 0.3221, "step": 2062 }, { "epoch": 1.2048025691555362, "grad_norm": 0.09690753996610302, "learning_rate": 8.179900625342256e-06, "loss": 0.3662, "step": 2063 }, { "epoch": 1.2053864681410116, "grad_norm": 0.08799074673115133, "learning_rate": 8.169875225788675e-06, "loss": 0.3587, "step": 2064 }, { "epoch": 1.2059703671264872, "grad_norm": 0.07973315312213212, "learning_rate": 8.159851729052041e-06, "loss": 0.2946, "step": 2065 }, { "epoch": 1.2065542661119626, "grad_norm": 0.08970936077434381, "learning_rate": 8.149830145553982e-06, "loss": 0.3656, "step": 2066 }, { "epoch": 1.2071381650974382, "grad_norm": 0.08776605708697786, "learning_rate": 8.139810485714142e-06, "loss": 0.3188, "step": 2067 }, { "epoch": 1.2077220640829136, "grad_norm": 0.08705943128324074, "learning_rate": 8.129792759950157e-06, "loss": 0.3554, "step": 2068 }, { "epoch": 1.2083059630683892, "grad_norm": 0.08593240539977819, "learning_rate": 8.119776978677655e-06, "loss": 0.3182, "step": 2069 }, { "epoch": 1.2088898620538646, "grad_norm": 0.09224034227713827, "learning_rate": 8.10976315231024e-06, "loss": 0.367, "step": 2070 }, { "epoch": 1.2094737610393402, "grad_norm": 0.08433522982986638, "learning_rate": 8.099751291259485e-06, "loss": 0.3107, "step": 2071 }, { "epoch": 1.2100576600248156, "grad_norm": 0.08114709248152309, "learning_rate": 8.089741405934923e-06, "loss": 0.2869, "step": 2072 }, { "epoch": 1.2106415590102912, "grad_norm": 0.09368303899309445, "learning_rate": 8.079733506744027e-06, "loss": 0.3421, "step": 2073 }, { "epoch": 1.2112254579957666, "grad_norm": 0.09515405664382794, "learning_rate": 8.069727604092213e-06, "loss": 0.3233, "step": 2074 }, { "epoch": 1.2118093569812423, "grad_norm": 0.08986224149352919, "learning_rate": 8.05972370838281e-06, "loss": 0.3008, "step": 2075 }, { "epoch": 1.2123932559667177, "grad_norm": 0.08626084596975624, "learning_rate": 8.04972183001707e-06, "loss": 0.2973, "step": 2076 }, { "epoch": 1.2129771549521933, "grad_norm": 0.09228176103604617, "learning_rate": 8.03972197939414e-06, "loss": 0.3538, "step": 2077 }, { "epoch": 1.2135610539376689, "grad_norm": 0.09126341653164123, "learning_rate": 8.029724166911069e-06, "loss": 0.3731, "step": 2078 }, { "epoch": 1.2141449529231443, "grad_norm": 0.08092734876667305, "learning_rate": 8.019728402962776e-06, "loss": 0.2932, "step": 2079 }, { "epoch": 1.21472885190862, "grad_norm": 0.09416359986062338, "learning_rate": 8.009734697942054e-06, "loss": 0.3572, "step": 2080 }, { "epoch": 1.2153127508940953, "grad_norm": 0.08829388238119985, "learning_rate": 7.999743062239557e-06, "loss": 0.308, "step": 2081 }, { "epoch": 1.215896649879571, "grad_norm": 0.0874394699027345, "learning_rate": 7.989753506243787e-06, "loss": 0.3328, "step": 2082 }, { "epoch": 1.2164805488650463, "grad_norm": 0.08088151893101692, "learning_rate": 7.979766040341084e-06, "loss": 0.3098, "step": 2083 }, { "epoch": 1.217064447850522, "grad_norm": 0.08424497475452325, "learning_rate": 7.969780674915613e-06, "loss": 0.3302, "step": 2084 }, { "epoch": 1.2176483468359973, "grad_norm": 0.08210572707022881, "learning_rate": 7.959797420349356e-06, "loss": 0.2942, "step": 2085 }, { "epoch": 1.218232245821473, "grad_norm": 0.08959913594718236, "learning_rate": 7.949816287022098e-06, "loss": 0.3176, "step": 2086 }, { "epoch": 1.2188161448069483, "grad_norm": 0.09913480023658164, "learning_rate": 7.939837285311425e-06, "loss": 0.3894, "step": 2087 }, { "epoch": 1.219400043792424, "grad_norm": 0.09459672189240328, "learning_rate": 7.9298604255927e-06, "loss": 0.355, "step": 2088 }, { "epoch": 1.2199839427778993, "grad_norm": 0.08606251985966207, "learning_rate": 7.919885718239063e-06, "loss": 0.2999, "step": 2089 }, { "epoch": 1.220567841763375, "grad_norm": 0.09668733476168442, "learning_rate": 7.909913173621413e-06, "loss": 0.3595, "step": 2090 }, { "epoch": 1.2211517407488504, "grad_norm": 0.09120788409989254, "learning_rate": 7.899942802108402e-06, "loss": 0.3332, "step": 2091 }, { "epoch": 1.221735639734326, "grad_norm": 0.09131265856305618, "learning_rate": 7.889974614066425e-06, "loss": 0.3096, "step": 2092 }, { "epoch": 1.2223195387198014, "grad_norm": 0.08892149583190279, "learning_rate": 7.880008619859601e-06, "loss": 0.3438, "step": 2093 }, { "epoch": 1.222903437705277, "grad_norm": 0.0921176701178928, "learning_rate": 7.870044829849772e-06, "loss": 0.3347, "step": 2094 }, { "epoch": 1.2234873366907526, "grad_norm": 0.08997459740368331, "learning_rate": 7.860083254396491e-06, "loss": 0.3158, "step": 2095 }, { "epoch": 1.224071235676228, "grad_norm": 0.09378413918741564, "learning_rate": 7.850123903857004e-06, "loss": 0.3339, "step": 2096 }, { "epoch": 1.2246551346617036, "grad_norm": 0.0870073393352189, "learning_rate": 7.840166788586244e-06, "loss": 0.3019, "step": 2097 }, { "epoch": 1.225239033647179, "grad_norm": 0.08535315633798449, "learning_rate": 7.83021191893682e-06, "loss": 0.3154, "step": 2098 }, { "epoch": 1.2258229326326546, "grad_norm": 0.08166207112980854, "learning_rate": 7.820259305259009e-06, "loss": 0.2975, "step": 2099 }, { "epoch": 1.22640683161813, "grad_norm": 0.08310168715890733, "learning_rate": 7.810308957900736e-06, "loss": 0.2861, "step": 2100 }, { "epoch": 1.2269907306036056, "grad_norm": 0.09896229537620138, "learning_rate": 7.800360887207579e-06, "loss": 0.3498, "step": 2101 }, { "epoch": 1.227574629589081, "grad_norm": 0.09729995405313652, "learning_rate": 7.790415103522744e-06, "loss": 0.3505, "step": 2102 }, { "epoch": 1.2281585285745567, "grad_norm": 0.08613512798558857, "learning_rate": 7.780471617187056e-06, "loss": 0.3351, "step": 2103 }, { "epoch": 1.228742427560032, "grad_norm": 0.08168854151474499, "learning_rate": 7.770530438538955e-06, "loss": 0.2713, "step": 2104 }, { "epoch": 1.2293263265455077, "grad_norm": 0.10041626991117764, "learning_rate": 7.760591577914483e-06, "loss": 0.3448, "step": 2105 }, { "epoch": 1.229910225530983, "grad_norm": 0.09391646945567633, "learning_rate": 7.750655045647268e-06, "loss": 0.331, "step": 2106 }, { "epoch": 1.2304941245164587, "grad_norm": 0.10108979350494209, "learning_rate": 7.740720852068524e-06, "loss": 0.344, "step": 2107 }, { "epoch": 1.231078023501934, "grad_norm": 0.08752047862143388, "learning_rate": 7.730789007507023e-06, "loss": 0.2825, "step": 2108 }, { "epoch": 1.2316619224874097, "grad_norm": 0.09796653324389402, "learning_rate": 7.720859522289101e-06, "loss": 0.3541, "step": 2109 }, { "epoch": 1.232245821472885, "grad_norm": 0.09548129312430939, "learning_rate": 7.710932406738643e-06, "loss": 0.3013, "step": 2110 }, { "epoch": 1.2328297204583607, "grad_norm": 0.09238771878578352, "learning_rate": 7.701007671177066e-06, "loss": 0.3138, "step": 2111 }, { "epoch": 1.2334136194438363, "grad_norm": 0.09700250284677096, "learning_rate": 7.691085325923317e-06, "loss": 0.3354, "step": 2112 }, { "epoch": 1.2339975184293117, "grad_norm": 0.08825328043451053, "learning_rate": 7.68116538129385e-06, "loss": 0.3156, "step": 2113 }, { "epoch": 1.2345814174147873, "grad_norm": 0.08531181477144863, "learning_rate": 7.671247847602628e-06, "loss": 0.319, "step": 2114 }, { "epoch": 1.2351653164002627, "grad_norm": 0.0925239031541929, "learning_rate": 7.661332735161111e-06, "loss": 0.299, "step": 2115 }, { "epoch": 1.2357492153857383, "grad_norm": 0.09547338767161338, "learning_rate": 7.651420054278234e-06, "loss": 0.3425, "step": 2116 }, { "epoch": 1.2363331143712137, "grad_norm": 0.08276657589640057, "learning_rate": 7.641509815260412e-06, "loss": 0.2955, "step": 2117 }, { "epoch": 1.2369170133566894, "grad_norm": 0.08913175848339006, "learning_rate": 7.631602028411512e-06, "loss": 0.3462, "step": 2118 }, { "epoch": 1.2375009123421647, "grad_norm": 0.08660363071980094, "learning_rate": 7.621696704032857e-06, "loss": 0.3166, "step": 2119 }, { "epoch": 1.2380848113276404, "grad_norm": 0.08576788017472171, "learning_rate": 7.6117938524232105e-06, "loss": 0.2698, "step": 2120 }, { "epoch": 1.2386687103131158, "grad_norm": 0.08909533979604949, "learning_rate": 7.601893483878761e-06, "loss": 0.3167, "step": 2121 }, { "epoch": 1.2392526092985914, "grad_norm": 0.09102252315032247, "learning_rate": 7.591995608693118e-06, "loss": 0.3167, "step": 2122 }, { "epoch": 1.2398365082840668, "grad_norm": 0.091859462025965, "learning_rate": 7.5821002371573005e-06, "loss": 0.3224, "step": 2123 }, { "epoch": 1.2404204072695424, "grad_norm": 0.09197060687079364, "learning_rate": 7.572207379559722e-06, "loss": 0.3395, "step": 2124 }, { "epoch": 1.2410043062550178, "grad_norm": 0.08298059473289937, "learning_rate": 7.562317046186182e-06, "loss": 0.2833, "step": 2125 }, { "epoch": 1.2415882052404934, "grad_norm": 0.0848179580350833, "learning_rate": 7.552429247319854e-06, "loss": 0.2856, "step": 2126 }, { "epoch": 1.2421721042259688, "grad_norm": 0.08534376959445797, "learning_rate": 7.542543993241278e-06, "loss": 0.3337, "step": 2127 }, { "epoch": 1.2427560032114444, "grad_norm": 0.08727456476330539, "learning_rate": 7.53266129422835e-06, "loss": 0.2888, "step": 2128 }, { "epoch": 1.24333990219692, "grad_norm": 0.08640377237275836, "learning_rate": 7.522781160556308e-06, "loss": 0.3413, "step": 2129 }, { "epoch": 1.2439238011823954, "grad_norm": 0.08142490166891722, "learning_rate": 7.512903602497723e-06, "loss": 0.2599, "step": 2130 }, { "epoch": 1.244507700167871, "grad_norm": 0.08931412035777715, "learning_rate": 7.503028630322486e-06, "loss": 0.3136, "step": 2131 }, { "epoch": 1.2450915991533464, "grad_norm": 0.08912118714003404, "learning_rate": 7.4931562542977994e-06, "loss": 0.3297, "step": 2132 }, { "epoch": 1.245675498138822, "grad_norm": 0.09073852403090032, "learning_rate": 7.483286484688172e-06, "loss": 0.3409, "step": 2133 }, { "epoch": 1.2462593971242975, "grad_norm": 0.08674854407624938, "learning_rate": 7.473419331755395e-06, "loss": 0.2701, "step": 2134 }, { "epoch": 1.246843296109773, "grad_norm": 0.09358775998941823, "learning_rate": 7.463554805758546e-06, "loss": 0.3472, "step": 2135 }, { "epoch": 1.2474271950952485, "grad_norm": 0.08668573843424446, "learning_rate": 7.453692916953965e-06, "loss": 0.302, "step": 2136 }, { "epoch": 1.248011094080724, "grad_norm": 0.0869808821421798, "learning_rate": 7.443833675595254e-06, "loss": 0.3051, "step": 2137 }, { "epoch": 1.2485949930661995, "grad_norm": 0.08610632485889527, "learning_rate": 7.433977091933262e-06, "loss": 0.3154, "step": 2138 }, { "epoch": 1.249178892051675, "grad_norm": 0.08705991428054184, "learning_rate": 7.424123176216072e-06, "loss": 0.3553, "step": 2139 }, { "epoch": 1.2497627910371505, "grad_norm": 0.08832011556918457, "learning_rate": 7.414271938689e-06, "loss": 0.3115, "step": 2140 }, { "epoch": 1.250346690022626, "grad_norm": 0.0931947841667326, "learning_rate": 7.404423389594569e-06, "loss": 0.354, "step": 2141 }, { "epoch": 1.2509305890081017, "grad_norm": 0.08610499418191037, "learning_rate": 7.394577539172507e-06, "loss": 0.2775, "step": 2142 }, { "epoch": 1.2515144879935771, "grad_norm": 0.09512050829409294, "learning_rate": 7.3847343976597454e-06, "loss": 0.3427, "step": 2143 }, { "epoch": 1.2520983869790525, "grad_norm": 0.09731580529721853, "learning_rate": 7.374893975290391e-06, "loss": 0.3332, "step": 2144 }, { "epoch": 1.2526822859645281, "grad_norm": 0.08717385722519554, "learning_rate": 7.3650562822957285e-06, "loss": 0.2778, "step": 2145 }, { "epoch": 1.2532661849500037, "grad_norm": 0.0914460358278815, "learning_rate": 7.355221328904196e-06, "loss": 0.3237, "step": 2146 }, { "epoch": 1.2538500839354791, "grad_norm": 0.08800382171450388, "learning_rate": 7.3453891253413935e-06, "loss": 0.3138, "step": 2147 }, { "epoch": 1.2544339829209545, "grad_norm": 0.09306601283915962, "learning_rate": 7.335559681830058e-06, "loss": 0.3171, "step": 2148 }, { "epoch": 1.2550178819064302, "grad_norm": 0.08891057440658588, "learning_rate": 7.325733008590053e-06, "loss": 0.3002, "step": 2149 }, { "epoch": 1.2556017808919058, "grad_norm": 0.0872087692028981, "learning_rate": 7.315909115838367e-06, "loss": 0.3312, "step": 2150 }, { "epoch": 1.2561856798773812, "grad_norm": 0.0806611796758934, "learning_rate": 7.306088013789097e-06, "loss": 0.2655, "step": 2151 }, { "epoch": 1.2567695788628568, "grad_norm": 0.09406676835342327, "learning_rate": 7.296269712653436e-06, "loss": 0.3733, "step": 2152 }, { "epoch": 1.2573534778483322, "grad_norm": 0.08593019480015295, "learning_rate": 7.28645422263967e-06, "loss": 0.2683, "step": 2153 }, { "epoch": 1.2579373768338078, "grad_norm": 0.08838696893167697, "learning_rate": 7.27664155395315e-06, "loss": 0.3711, "step": 2154 }, { "epoch": 1.2585212758192832, "grad_norm": 0.08878362235106638, "learning_rate": 7.266831716796307e-06, "loss": 0.323, "step": 2155 }, { "epoch": 1.2591051748047588, "grad_norm": 0.07958286200024244, "learning_rate": 7.257024721368624e-06, "loss": 0.2856, "step": 2156 }, { "epoch": 1.2596890737902342, "grad_norm": 0.08234947616007947, "learning_rate": 7.247220577866625e-06, "loss": 0.2993, "step": 2157 }, { "epoch": 1.2602729727757098, "grad_norm": 0.08481802765730467, "learning_rate": 7.237419296483876e-06, "loss": 0.2949, "step": 2158 }, { "epoch": 1.2608568717611854, "grad_norm": 0.08309959539414018, "learning_rate": 7.227620887410958e-06, "loss": 0.3186, "step": 2159 }, { "epoch": 1.2614407707466608, "grad_norm": 0.09301097993974776, "learning_rate": 7.217825360835475e-06, "loss": 0.3614, "step": 2160 }, { "epoch": 1.2620246697321362, "grad_norm": 0.08752978392856701, "learning_rate": 7.208032726942027e-06, "loss": 0.3164, "step": 2161 }, { "epoch": 1.2626085687176118, "grad_norm": 0.08047829580756564, "learning_rate": 7.198242995912211e-06, "loss": 0.2605, "step": 2162 }, { "epoch": 1.2631924677030875, "grad_norm": 0.09082991392030525, "learning_rate": 7.1884561779246055e-06, "loss": 0.3481, "step": 2163 }, { "epoch": 1.2637763666885629, "grad_norm": 0.08485382539733681, "learning_rate": 7.178672283154756e-06, "loss": 0.3051, "step": 2164 }, { "epoch": 1.2643602656740383, "grad_norm": 0.09030906025108318, "learning_rate": 7.168891321775172e-06, "loss": 0.3773, "step": 2165 }, { "epoch": 1.2649441646595139, "grad_norm": 0.08235614748064492, "learning_rate": 7.159113303955314e-06, "loss": 0.2888, "step": 2166 }, { "epoch": 1.2655280636449895, "grad_norm": 0.08361432482219058, "learning_rate": 7.149338239861579e-06, "loss": 0.2865, "step": 2167 }, { "epoch": 1.2661119626304649, "grad_norm": 0.08788701182958861, "learning_rate": 7.139566139657298e-06, "loss": 0.3203, "step": 2168 }, { "epoch": 1.2666958616159405, "grad_norm": 0.08611834669916071, "learning_rate": 7.129797013502713e-06, "loss": 0.3012, "step": 2169 }, { "epoch": 1.267279760601416, "grad_norm": 0.08687050556063927, "learning_rate": 7.12003087155498e-06, "loss": 0.3004, "step": 2170 }, { "epoch": 1.2678636595868915, "grad_norm": 0.08637198710145119, "learning_rate": 7.110267723968147e-06, "loss": 0.3144, "step": 2171 }, { "epoch": 1.268447558572367, "grad_norm": 0.08528324548480504, "learning_rate": 7.100507580893156e-06, "loss": 0.3177, "step": 2172 }, { "epoch": 1.2690314575578425, "grad_norm": 0.08351602591289879, "learning_rate": 7.09075045247782e-06, "loss": 0.3142, "step": 2173 }, { "epoch": 1.269615356543318, "grad_norm": 0.08290019430587074, "learning_rate": 7.080996348866817e-06, "loss": 0.3119, "step": 2174 }, { "epoch": 1.2701992555287935, "grad_norm": 0.08140834897382238, "learning_rate": 7.071245280201682e-06, "loss": 0.2782, "step": 2175 }, { "epoch": 1.2707831545142692, "grad_norm": 0.09419869043461578, "learning_rate": 7.061497256620793e-06, "loss": 0.3178, "step": 2176 }, { "epoch": 1.2713670534997445, "grad_norm": 0.08997029704466278, "learning_rate": 7.051752288259366e-06, "loss": 0.296, "step": 2177 }, { "epoch": 1.27195095248522, "grad_norm": 0.08569815527817408, "learning_rate": 7.042010385249433e-06, "loss": 0.302, "step": 2178 }, { "epoch": 1.2725348514706956, "grad_norm": 0.08662756224932051, "learning_rate": 7.032271557719847e-06, "loss": 0.3446, "step": 2179 }, { "epoch": 1.2731187504561712, "grad_norm": 0.08760880446038999, "learning_rate": 7.022535815796261e-06, "loss": 0.3192, "step": 2180 }, { "epoch": 1.2737026494416466, "grad_norm": 0.09286258164023833, "learning_rate": 7.012803169601118e-06, "loss": 0.3075, "step": 2181 }, { "epoch": 1.2742865484271222, "grad_norm": 0.0897883552489813, "learning_rate": 7.003073629253638e-06, "loss": 0.2866, "step": 2182 }, { "epoch": 1.2748704474125976, "grad_norm": 0.08485114074243363, "learning_rate": 6.9933472048698225e-06, "loss": 0.291, "step": 2183 }, { "epoch": 1.2754543463980732, "grad_norm": 0.08830831317649213, "learning_rate": 6.983623906562422e-06, "loss": 0.3198, "step": 2184 }, { "epoch": 1.2760382453835486, "grad_norm": 0.08586254624809764, "learning_rate": 6.973903744440949e-06, "loss": 0.2989, "step": 2185 }, { "epoch": 1.2766221443690242, "grad_norm": 0.09636010921038482, "learning_rate": 6.964186728611644e-06, "loss": 0.3256, "step": 2186 }, { "epoch": 1.2772060433544996, "grad_norm": 0.08984117134967187, "learning_rate": 6.954472869177479e-06, "loss": 0.3139, "step": 2187 }, { "epoch": 1.2777899423399752, "grad_norm": 0.08626729101123223, "learning_rate": 6.944762176238149e-06, "loss": 0.3232, "step": 2188 }, { "epoch": 1.2783738413254506, "grad_norm": 0.09082250610716473, "learning_rate": 6.935054659890053e-06, "loss": 0.3365, "step": 2189 }, { "epoch": 1.2789577403109262, "grad_norm": 0.09394983916284141, "learning_rate": 6.9253503302262855e-06, "loss": 0.3248, "step": 2190 }, { "epoch": 1.2795416392964016, "grad_norm": 0.08433757097899675, "learning_rate": 6.915649197336638e-06, "loss": 0.293, "step": 2191 }, { "epoch": 1.2801255382818773, "grad_norm": 0.0860670202033237, "learning_rate": 6.905951271307561e-06, "loss": 0.3191, "step": 2192 }, { "epoch": 1.2807094372673529, "grad_norm": 0.09337063730843541, "learning_rate": 6.896256562222184e-06, "loss": 0.3364, "step": 2193 }, { "epoch": 1.2812933362528283, "grad_norm": 0.09899040650196779, "learning_rate": 6.8865650801602855e-06, "loss": 0.4078, "step": 2194 }, { "epoch": 1.2818772352383037, "grad_norm": 0.0775053112598099, "learning_rate": 6.8768768351982964e-06, "loss": 0.2624, "step": 2195 }, { "epoch": 1.2824611342237793, "grad_norm": 0.08413464858914325, "learning_rate": 6.867191837409275e-06, "loss": 0.2912, "step": 2196 }, { "epoch": 1.283045033209255, "grad_norm": 0.08544252659538373, "learning_rate": 6.857510096862901e-06, "loss": 0.2958, "step": 2197 }, { "epoch": 1.2836289321947303, "grad_norm": 0.09042173964144083, "learning_rate": 6.847831623625476e-06, "loss": 0.3472, "step": 2198 }, { "epoch": 1.284212831180206, "grad_norm": 0.08598613575294824, "learning_rate": 6.8381564277598974e-06, "loss": 0.284, "step": 2199 }, { "epoch": 1.2847967301656813, "grad_norm": 0.08764465347106148, "learning_rate": 6.82848451932566e-06, "loss": 0.3142, "step": 2200 }, { "epoch": 1.285380629151157, "grad_norm": 0.10069541785442301, "learning_rate": 6.81881590837884e-06, "loss": 0.383, "step": 2201 }, { "epoch": 1.2859645281366323, "grad_norm": 0.08011413362344992, "learning_rate": 6.809150604972079e-06, "loss": 0.2683, "step": 2202 }, { "epoch": 1.286548427122108, "grad_norm": 0.08133082857820634, "learning_rate": 6.799488619154586e-06, "loss": 0.2845, "step": 2203 }, { "epoch": 1.2871323261075833, "grad_norm": 0.0912591841521171, "learning_rate": 6.7898299609721186e-06, "loss": 0.3376, "step": 2204 }, { "epoch": 1.287716225093059, "grad_norm": 0.09601578902557209, "learning_rate": 6.780174640466976e-06, "loss": 0.3429, "step": 2205 }, { "epoch": 1.2883001240785343, "grad_norm": 0.10104437265713118, "learning_rate": 6.7705226676779855e-06, "loss": 0.3286, "step": 2206 }, { "epoch": 1.28888402306401, "grad_norm": 0.08973912631567826, "learning_rate": 6.760874052640494e-06, "loss": 0.2977, "step": 2207 }, { "epoch": 1.2894679220494853, "grad_norm": 0.07889689992197096, "learning_rate": 6.751228805386363e-06, "loss": 0.3031, "step": 2208 }, { "epoch": 1.290051821034961, "grad_norm": 0.09126789268604062, "learning_rate": 6.741586935943937e-06, "loss": 0.3244, "step": 2209 }, { "epoch": 1.2906357200204366, "grad_norm": 0.09201732982092668, "learning_rate": 6.731948454338064e-06, "loss": 0.2883, "step": 2210 }, { "epoch": 1.291219619005912, "grad_norm": 0.08945513181330798, "learning_rate": 6.7223133705900635e-06, "loss": 0.3554, "step": 2211 }, { "epoch": 1.2918035179913874, "grad_norm": 0.08655793699245458, "learning_rate": 6.712681694717723e-06, "loss": 0.3236, "step": 2212 }, { "epoch": 1.292387416976863, "grad_norm": 0.09522658467431686, "learning_rate": 6.7030534367352884e-06, "loss": 0.3381, "step": 2213 }, { "epoch": 1.2929713159623386, "grad_norm": 0.0957576503249212, "learning_rate": 6.693428606653445e-06, "loss": 0.3542, "step": 2214 }, { "epoch": 1.293555214947814, "grad_norm": 0.08856479337711057, "learning_rate": 6.683807214479323e-06, "loss": 0.3156, "step": 2215 }, { "epoch": 1.2941391139332896, "grad_norm": 0.09265247098335193, "learning_rate": 6.6741892702164735e-06, "loss": 0.3472, "step": 2216 }, { "epoch": 1.294723012918765, "grad_norm": 0.07967434909494173, "learning_rate": 6.664574783864862e-06, "loss": 0.2998, "step": 2217 }, { "epoch": 1.2953069119042406, "grad_norm": 0.0904109239380212, "learning_rate": 6.654963765420866e-06, "loss": 0.3213, "step": 2218 }, { "epoch": 1.295890810889716, "grad_norm": 0.07809774869844954, "learning_rate": 6.645356224877242e-06, "loss": 0.2744, "step": 2219 }, { "epoch": 1.2964747098751916, "grad_norm": 0.09351681498282806, "learning_rate": 6.635752172223146e-06, "loss": 0.3362, "step": 2220 }, { "epoch": 1.297058608860667, "grad_norm": 0.08002051668713166, "learning_rate": 6.626151617444103e-06, "loss": 0.2748, "step": 2221 }, { "epoch": 1.2976425078461427, "grad_norm": 0.08458411644193373, "learning_rate": 6.6165545705219955e-06, "loss": 0.3095, "step": 2222 }, { "epoch": 1.298226406831618, "grad_norm": 0.08880642007727417, "learning_rate": 6.606961041435068e-06, "loss": 0.3458, "step": 2223 }, { "epoch": 1.2988103058170937, "grad_norm": 0.0883997142137197, "learning_rate": 6.5973710401578985e-06, "loss": 0.3211, "step": 2224 }, { "epoch": 1.299394204802569, "grad_norm": 0.0870004578817393, "learning_rate": 6.587784576661401e-06, "loss": 0.2794, "step": 2225 }, { "epoch": 1.2999781037880447, "grad_norm": 0.08874379965674395, "learning_rate": 6.578201660912814e-06, "loss": 0.2783, "step": 2226 }, { "epoch": 1.3005620027735203, "grad_norm": 0.09533076369774553, "learning_rate": 6.568622302875682e-06, "loss": 0.3442, "step": 2227 }, { "epoch": 1.3011459017589957, "grad_norm": 0.08115335434102496, "learning_rate": 6.559046512509859e-06, "loss": 0.2729, "step": 2228 }, { "epoch": 1.301729800744471, "grad_norm": 0.0850998733317483, "learning_rate": 6.5494742997714765e-06, "loss": 0.3323, "step": 2229 }, { "epoch": 1.3023136997299467, "grad_norm": 0.08632346689736857, "learning_rate": 6.539905674612956e-06, "loss": 0.3351, "step": 2230 }, { "epoch": 1.3028975987154223, "grad_norm": 0.0902462963412857, "learning_rate": 6.530340646982987e-06, "loss": 0.3528, "step": 2231 }, { "epoch": 1.3034814977008977, "grad_norm": 0.09057147771840249, "learning_rate": 6.520779226826517e-06, "loss": 0.2897, "step": 2232 }, { "epoch": 1.3040653966863733, "grad_norm": 0.08830156927705757, "learning_rate": 6.511221424084748e-06, "loss": 0.2989, "step": 2233 }, { "epoch": 1.3046492956718487, "grad_norm": 0.08858461189602258, "learning_rate": 6.501667248695107e-06, "loss": 0.3007, "step": 2234 }, { "epoch": 1.3052331946573243, "grad_norm": 0.08108664710055206, "learning_rate": 6.4921167105912696e-06, "loss": 0.2534, "step": 2235 }, { "epoch": 1.3058170936427997, "grad_norm": 0.087557761420373, "learning_rate": 6.482569819703117e-06, "loss": 0.2833, "step": 2236 }, { "epoch": 1.3064009926282754, "grad_norm": 0.0905524879077499, "learning_rate": 6.473026585956736e-06, "loss": 0.3479, "step": 2237 }, { "epoch": 1.3069848916137508, "grad_norm": 0.09615559416274028, "learning_rate": 6.4634870192744205e-06, "loss": 0.341, "step": 2238 }, { "epoch": 1.3075687905992264, "grad_norm": 0.08624605615599874, "learning_rate": 6.453951129574644e-06, "loss": 0.2862, "step": 2239 }, { "epoch": 1.3081526895847018, "grad_norm": 0.09573700610611838, "learning_rate": 6.44441892677206e-06, "loss": 0.3384, "step": 2240 }, { "epoch": 1.3087365885701774, "grad_norm": 0.09228829259854936, "learning_rate": 6.434890420777491e-06, "loss": 0.3192, "step": 2241 }, { "epoch": 1.3093204875556528, "grad_norm": 0.0878535360879263, "learning_rate": 6.42536562149791e-06, "loss": 0.3072, "step": 2242 }, { "epoch": 1.3099043865411284, "grad_norm": 0.09392982233468405, "learning_rate": 6.41584453883644e-06, "loss": 0.2974, "step": 2243 }, { "epoch": 1.310488285526604, "grad_norm": 0.08715635294239421, "learning_rate": 6.40632718269234e-06, "loss": 0.3077, "step": 2244 }, { "epoch": 1.3110721845120794, "grad_norm": 0.08980154370329933, "learning_rate": 6.396813562960993e-06, "loss": 0.3171, "step": 2245 }, { "epoch": 1.3116560834975548, "grad_norm": 0.09149228739918655, "learning_rate": 6.387303689533899e-06, "loss": 0.2921, "step": 2246 }, { "epoch": 1.3122399824830304, "grad_norm": 0.08453221007335314, "learning_rate": 6.377797572298661e-06, "loss": 0.3493, "step": 2247 }, { "epoch": 1.312823881468506, "grad_norm": 0.08891750340128363, "learning_rate": 6.3682952211389735e-06, "loss": 0.32, "step": 2248 }, { "epoch": 1.3134077804539814, "grad_norm": 0.08264206668853581, "learning_rate": 6.358796645934624e-06, "loss": 0.2951, "step": 2249 }, { "epoch": 1.313991679439457, "grad_norm": 0.08582691628404672, "learning_rate": 6.349301856561468e-06, "loss": 0.3295, "step": 2250 }, { "epoch": 1.3145755784249324, "grad_norm": 0.09047236021623455, "learning_rate": 6.3398108628914264e-06, "loss": 0.3152, "step": 2251 }, { "epoch": 1.315159477410408, "grad_norm": 0.08343115878425511, "learning_rate": 6.330323674792472e-06, "loss": 0.2961, "step": 2252 }, { "epoch": 1.3157433763958835, "grad_norm": 0.07969295284138504, "learning_rate": 6.320840302128619e-06, "loss": 0.2838, "step": 2253 }, { "epoch": 1.316327275381359, "grad_norm": 0.08242357658053004, "learning_rate": 6.311360754759923e-06, "loss": 0.3188, "step": 2254 }, { "epoch": 1.3169111743668345, "grad_norm": 0.08288582932842543, "learning_rate": 6.301885042542455e-06, "loss": 0.3077, "step": 2255 }, { "epoch": 1.31749507335231, "grad_norm": 0.09182535314967293, "learning_rate": 6.292413175328302e-06, "loss": 0.3253, "step": 2256 }, { "epoch": 1.3180789723377855, "grad_norm": 0.08762487649712386, "learning_rate": 6.282945162965548e-06, "loss": 0.3271, "step": 2257 }, { "epoch": 1.318662871323261, "grad_norm": 0.08049025196646402, "learning_rate": 6.273481015298275e-06, "loss": 0.2884, "step": 2258 }, { "epoch": 1.3192467703087365, "grad_norm": 0.08859089407531165, "learning_rate": 6.264020742166543e-06, "loss": 0.3064, "step": 2259 }, { "epoch": 1.319830669294212, "grad_norm": 0.08525929098222725, "learning_rate": 6.2545643534063894e-06, "loss": 0.2978, "step": 2260 }, { "epoch": 1.3204145682796877, "grad_norm": 0.08762277334486207, "learning_rate": 6.245111858849808e-06, "loss": 0.3026, "step": 2261 }, { "epoch": 1.3209984672651631, "grad_norm": 0.08315511829418891, "learning_rate": 6.235663268324735e-06, "loss": 0.293, "step": 2262 }, { "epoch": 1.3215823662506385, "grad_norm": 0.08577330382275845, "learning_rate": 6.226218591655071e-06, "loss": 0.3107, "step": 2263 }, { "epoch": 1.3221662652361141, "grad_norm": 0.09048700169770689, "learning_rate": 6.216777838660627e-06, "loss": 0.3102, "step": 2264 }, { "epoch": 1.3227501642215898, "grad_norm": 0.08484809312631403, "learning_rate": 6.2073410191571395e-06, "loss": 0.3123, "step": 2265 }, { "epoch": 1.3233340632070651, "grad_norm": 0.08521936031963374, "learning_rate": 6.1979081429562575e-06, "loss": 0.3077, "step": 2266 }, { "epoch": 1.3239179621925408, "grad_norm": 0.08193863532635674, "learning_rate": 6.188479219865529e-06, "loss": 0.3165, "step": 2267 }, { "epoch": 1.3245018611780162, "grad_norm": 0.09004735849108048, "learning_rate": 6.179054259688393e-06, "loss": 0.2849, "step": 2268 }, { "epoch": 1.3250857601634918, "grad_norm": 0.08680882329694185, "learning_rate": 6.169633272224167e-06, "loss": 0.2873, "step": 2269 }, { "epoch": 1.3256696591489672, "grad_norm": 0.08127663082955232, "learning_rate": 6.160216267268037e-06, "loss": 0.2832, "step": 2270 }, { "epoch": 1.3262535581344428, "grad_norm": 0.09093699983869281, "learning_rate": 6.1508032546110485e-06, "loss": 0.3466, "step": 2271 }, { "epoch": 1.3268374571199182, "grad_norm": 0.09332989216592569, "learning_rate": 6.1413942440400994e-06, "loss": 0.3275, "step": 2272 }, { "epoch": 1.3274213561053938, "grad_norm": 0.08774262941413152, "learning_rate": 6.1319892453379235e-06, "loss": 0.3233, "step": 2273 }, { "epoch": 1.3280052550908692, "grad_norm": 0.0982297870077348, "learning_rate": 6.122588268283085e-06, "loss": 0.3491, "step": 2274 }, { "epoch": 1.3285891540763448, "grad_norm": 0.07789071154970467, "learning_rate": 6.113191322649964e-06, "loss": 0.2692, "step": 2275 }, { "epoch": 1.3291730530618202, "grad_norm": 0.09265226213877095, "learning_rate": 6.10379841820875e-06, "loss": 0.3373, "step": 2276 }, { "epoch": 1.3297569520472958, "grad_norm": 0.08454414002336644, "learning_rate": 6.094409564725435e-06, "loss": 0.3152, "step": 2277 }, { "epoch": 1.3303408510327714, "grad_norm": 0.08252382044411329, "learning_rate": 6.085024771961792e-06, "loss": 0.2981, "step": 2278 }, { "epoch": 1.3309247500182468, "grad_norm": 0.0884246612297904, "learning_rate": 6.07564404967538e-06, "loss": 0.312, "step": 2279 }, { "epoch": 1.3315086490037222, "grad_norm": 0.09378193462947719, "learning_rate": 6.06626740761952e-06, "loss": 0.3191, "step": 2280 }, { "epoch": 1.3320925479891979, "grad_norm": 0.08803427317096564, "learning_rate": 6.056894855543289e-06, "loss": 0.3111, "step": 2281 }, { "epoch": 1.3326764469746735, "grad_norm": 0.09276767658164181, "learning_rate": 6.047526403191517e-06, "loss": 0.3384, "step": 2282 }, { "epoch": 1.3332603459601489, "grad_norm": 0.09516984676300023, "learning_rate": 6.038162060304771e-06, "loss": 0.3291, "step": 2283 }, { "epoch": 1.3338442449456245, "grad_norm": 0.08117321403691427, "learning_rate": 6.028801836619345e-06, "loss": 0.3016, "step": 2284 }, { "epoch": 1.3344281439310999, "grad_norm": 0.08179274949319197, "learning_rate": 6.019445741867245e-06, "loss": 0.3092, "step": 2285 }, { "epoch": 1.3350120429165755, "grad_norm": 0.09360933806403865, "learning_rate": 6.010093785776188e-06, "loss": 0.3092, "step": 2286 }, { "epoch": 1.3355959419020509, "grad_norm": 0.08373505960694838, "learning_rate": 6.0007459780695885e-06, "loss": 0.2973, "step": 2287 }, { "epoch": 1.3361798408875265, "grad_norm": 0.08533743975939018, "learning_rate": 5.991402328466549e-06, "loss": 0.2858, "step": 2288 }, { "epoch": 1.336763739873002, "grad_norm": 0.08881720636598037, "learning_rate": 5.982062846681848e-06, "loss": 0.3314, "step": 2289 }, { "epoch": 1.3373476388584775, "grad_norm": 0.08793771065990484, "learning_rate": 5.97272754242592e-06, "loss": 0.3116, "step": 2290 }, { "epoch": 1.3379315378439531, "grad_norm": 0.08673433364140412, "learning_rate": 5.963396425404877e-06, "loss": 0.3225, "step": 2291 }, { "epoch": 1.3385154368294285, "grad_norm": 0.0831854895582105, "learning_rate": 5.954069505320466e-06, "loss": 0.2987, "step": 2292 }, { "epoch": 1.339099335814904, "grad_norm": 0.07694976742962513, "learning_rate": 5.944746791870062e-06, "loss": 0.2492, "step": 2293 }, { "epoch": 1.3396832348003795, "grad_norm": 0.09575392543027876, "learning_rate": 5.935428294746679e-06, "loss": 0.335, "step": 2294 }, { "epoch": 1.3402671337858552, "grad_norm": 0.08923925798808227, "learning_rate": 5.926114023638944e-06, "loss": 0.3172, "step": 2295 }, { "epoch": 1.3408510327713306, "grad_norm": 0.08610619058580482, "learning_rate": 5.916803988231087e-06, "loss": 0.2928, "step": 2296 }, { "epoch": 1.341434931756806, "grad_norm": 0.08975358871391753, "learning_rate": 5.907498198202939e-06, "loss": 0.3464, "step": 2297 }, { "epoch": 1.3420188307422816, "grad_norm": 0.08616857525963231, "learning_rate": 5.898196663229912e-06, "loss": 0.3249, "step": 2298 }, { "epoch": 1.3426027297277572, "grad_norm": 0.08759333912100035, "learning_rate": 5.888899392982994e-06, "loss": 0.3195, "step": 2299 }, { "epoch": 1.3431866287132326, "grad_norm": 0.09178099971457435, "learning_rate": 5.879606397128743e-06, "loss": 0.3163, "step": 2300 }, { "epoch": 1.3437705276987082, "grad_norm": 0.09153373176553041, "learning_rate": 5.8703176853292705e-06, "loss": 0.345, "step": 2301 }, { "epoch": 1.3443544266841836, "grad_norm": 0.08937671899472267, "learning_rate": 5.861033267242238e-06, "loss": 0.346, "step": 2302 }, { "epoch": 1.3449383256696592, "grad_norm": 0.08208113012645571, "learning_rate": 5.85175315252083e-06, "loss": 0.2881, "step": 2303 }, { "epoch": 1.3455222246551346, "grad_norm": 0.08796555573088084, "learning_rate": 5.842477350813773e-06, "loss": 0.3064, "step": 2304 }, { "epoch": 1.3461061236406102, "grad_norm": 0.09440330403612762, "learning_rate": 5.833205871765297e-06, "loss": 0.3769, "step": 2305 }, { "epoch": 1.3466900226260856, "grad_norm": 0.08275532492117607, "learning_rate": 5.823938725015148e-06, "loss": 0.263, "step": 2306 }, { "epoch": 1.3472739216115612, "grad_norm": 0.09003397745794528, "learning_rate": 5.8146759201985525e-06, "loss": 0.3383, "step": 2307 }, { "epoch": 1.3478578205970368, "grad_norm": 0.09847448524702053, "learning_rate": 5.8054174669462425e-06, "loss": 0.355, "step": 2308 }, { "epoch": 1.3484417195825122, "grad_norm": 0.09736111738347776, "learning_rate": 5.796163374884406e-06, "loss": 0.352, "step": 2309 }, { "epoch": 1.3490256185679876, "grad_norm": 0.08360253564360193, "learning_rate": 5.786913653634714e-06, "loss": 0.2942, "step": 2310 }, { "epoch": 1.3496095175534633, "grad_norm": 0.09493243116159365, "learning_rate": 5.77766831281428e-06, "loss": 0.3789, "step": 2311 }, { "epoch": 1.3501934165389389, "grad_norm": 0.08861016997188424, "learning_rate": 5.768427362035665e-06, "loss": 0.3477, "step": 2312 }, { "epoch": 1.3507773155244143, "grad_norm": 0.09046113710066689, "learning_rate": 5.759190810906876e-06, "loss": 0.2957, "step": 2313 }, { "epoch": 1.3513612145098897, "grad_norm": 0.0826716718820961, "learning_rate": 5.749958669031329e-06, "loss": 0.2736, "step": 2314 }, { "epoch": 1.3519451134953653, "grad_norm": 0.09432177233545337, "learning_rate": 5.740730946007874e-06, "loss": 0.3183, "step": 2315 }, { "epoch": 1.352529012480841, "grad_norm": 0.09054199419303746, "learning_rate": 5.7315076514307535e-06, "loss": 0.3162, "step": 2316 }, { "epoch": 1.3531129114663163, "grad_norm": 0.08961731067396933, "learning_rate": 5.722288794889603e-06, "loss": 0.3112, "step": 2317 }, { "epoch": 1.353696810451792, "grad_norm": 0.08600877367336744, "learning_rate": 5.713074385969457e-06, "loss": 0.2922, "step": 2318 }, { "epoch": 1.3542807094372673, "grad_norm": 0.0866007004058379, "learning_rate": 5.703864434250721e-06, "loss": 0.2922, "step": 2319 }, { "epoch": 1.354864608422743, "grad_norm": 0.08779476478099894, "learning_rate": 5.694658949309158e-06, "loss": 0.2941, "step": 2320 }, { "epoch": 1.3554485074082183, "grad_norm": 0.09313340072358776, "learning_rate": 5.685457940715898e-06, "loss": 0.3212, "step": 2321 }, { "epoch": 1.356032406393694, "grad_norm": 0.08459255419532735, "learning_rate": 5.67626141803741e-06, "loss": 0.2996, "step": 2322 }, { "epoch": 1.3566163053791693, "grad_norm": 0.08494285944172443, "learning_rate": 5.667069390835496e-06, "loss": 0.3006, "step": 2323 }, { "epoch": 1.357200204364645, "grad_norm": 0.09481270266494324, "learning_rate": 5.657881868667296e-06, "loss": 0.304, "step": 2324 }, { "epoch": 1.3577841033501206, "grad_norm": 0.08143122751494447, "learning_rate": 5.648698861085254e-06, "loss": 0.2713, "step": 2325 }, { "epoch": 1.358368002335596, "grad_norm": 0.08604426519380579, "learning_rate": 5.639520377637127e-06, "loss": 0.3001, "step": 2326 }, { "epoch": 1.3589519013210714, "grad_norm": 0.08898223050644989, "learning_rate": 5.630346427865965e-06, "loss": 0.3207, "step": 2327 }, { "epoch": 1.359535800306547, "grad_norm": 0.09401931367188837, "learning_rate": 5.621177021310101e-06, "loss": 0.3094, "step": 2328 }, { "epoch": 1.3601196992920226, "grad_norm": 0.09441494590387428, "learning_rate": 5.612012167503157e-06, "loss": 0.3665, "step": 2329 }, { "epoch": 1.360703598277498, "grad_norm": 0.09060397866897699, "learning_rate": 5.602851875974005e-06, "loss": 0.313, "step": 2330 }, { "epoch": 1.3612874972629734, "grad_norm": 0.08856070670426806, "learning_rate": 5.593696156246788e-06, "loss": 0.2953, "step": 2331 }, { "epoch": 1.361871396248449, "grad_norm": 0.09284217367482128, "learning_rate": 5.584545017840886e-06, "loss": 0.3706, "step": 2332 }, { "epoch": 1.3624552952339246, "grad_norm": 0.09416090588956384, "learning_rate": 5.575398470270913e-06, "loss": 0.3356, "step": 2333 }, { "epoch": 1.3630391942194, "grad_norm": 0.08489648989219867, "learning_rate": 5.566256523046727e-06, "loss": 0.2867, "step": 2334 }, { "epoch": 1.3636230932048756, "grad_norm": 0.0874206880928685, "learning_rate": 5.5571191856733795e-06, "loss": 0.2954, "step": 2335 }, { "epoch": 1.364206992190351, "grad_norm": 0.09760345279692444, "learning_rate": 5.547986467651152e-06, "loss": 0.3273, "step": 2336 }, { "epoch": 1.3647908911758266, "grad_norm": 0.09338689593694187, "learning_rate": 5.538858378475508e-06, "loss": 0.3513, "step": 2337 }, { "epoch": 1.365374790161302, "grad_norm": 0.09362660151833932, "learning_rate": 5.529734927637096e-06, "loss": 0.3019, "step": 2338 }, { "epoch": 1.3659586891467776, "grad_norm": 0.08847920859117826, "learning_rate": 5.520616124621759e-06, "loss": 0.3114, "step": 2339 }, { "epoch": 1.366542588132253, "grad_norm": 0.08348974381798854, "learning_rate": 5.511501978910488e-06, "loss": 0.3043, "step": 2340 }, { "epoch": 1.3671264871177287, "grad_norm": 0.08306554497601494, "learning_rate": 5.50239249997945e-06, "loss": 0.3052, "step": 2341 }, { "epoch": 1.3677103861032043, "grad_norm": 0.08611069599540168, "learning_rate": 5.493287697299943e-06, "loss": 0.3479, "step": 2342 }, { "epoch": 1.3682942850886797, "grad_norm": 0.08527206391526265, "learning_rate": 5.484187580338409e-06, "loss": 0.2762, "step": 2343 }, { "epoch": 1.368878184074155, "grad_norm": 0.08800813047316418, "learning_rate": 5.475092158556429e-06, "loss": 0.3342, "step": 2344 }, { "epoch": 1.3694620830596307, "grad_norm": 0.09490064570601169, "learning_rate": 5.4660014414106825e-06, "loss": 0.3597, "step": 2345 }, { "epoch": 1.3700459820451063, "grad_norm": 0.07915438046352805, "learning_rate": 5.4569154383529736e-06, "loss": 0.3043, "step": 2346 }, { "epoch": 1.3706298810305817, "grad_norm": 0.08148437212466486, "learning_rate": 5.447834158830202e-06, "loss": 0.2857, "step": 2347 }, { "epoch": 1.371213780016057, "grad_norm": 0.08740286878432764, "learning_rate": 5.438757612284348e-06, "loss": 0.2959, "step": 2348 }, { "epoch": 1.3717976790015327, "grad_norm": 0.09023359451626062, "learning_rate": 5.429685808152483e-06, "loss": 0.3253, "step": 2349 }, { "epoch": 1.3723815779870083, "grad_norm": 0.08546545252481401, "learning_rate": 5.420618755866736e-06, "loss": 0.3217, "step": 2350 }, { "epoch": 1.3729654769724837, "grad_norm": 0.08936108790830029, "learning_rate": 5.411556464854301e-06, "loss": 0.3019, "step": 2351 }, { "epoch": 1.3735493759579593, "grad_norm": 0.09481811991172912, "learning_rate": 5.4024989445374245e-06, "loss": 0.3072, "step": 2352 }, { "epoch": 1.3741332749434347, "grad_norm": 0.09327110273948484, "learning_rate": 5.393446204333386e-06, "loss": 0.3164, "step": 2353 }, { "epoch": 1.3747171739289104, "grad_norm": 0.08363842909881916, "learning_rate": 5.384398253654504e-06, "loss": 0.2811, "step": 2354 }, { "epoch": 1.3753010729143857, "grad_norm": 0.09360254923516381, "learning_rate": 5.3753551019081065e-06, "loss": 0.3504, "step": 2355 }, { "epoch": 1.3758849718998614, "grad_norm": 0.0840485444634229, "learning_rate": 5.366316758496537e-06, "loss": 0.2963, "step": 2356 }, { "epoch": 1.3764688708853368, "grad_norm": 0.08574881319253842, "learning_rate": 5.357283232817147e-06, "loss": 0.3035, "step": 2357 }, { "epoch": 1.3770527698708124, "grad_norm": 0.09249550643210704, "learning_rate": 5.348254534262262e-06, "loss": 0.3262, "step": 2358 }, { "epoch": 1.377636668856288, "grad_norm": 0.09937262423878042, "learning_rate": 5.339230672219209e-06, "loss": 0.3511, "step": 2359 }, { "epoch": 1.3782205678417634, "grad_norm": 0.09205947775733425, "learning_rate": 5.330211656070269e-06, "loss": 0.3197, "step": 2360 }, { "epoch": 1.3788044668272388, "grad_norm": 0.09467642178911786, "learning_rate": 5.3211974951926906e-06, "loss": 0.3653, "step": 2361 }, { "epoch": 1.3793883658127144, "grad_norm": 0.08264747606457908, "learning_rate": 5.312188198958681e-06, "loss": 0.299, "step": 2362 }, { "epoch": 1.37997226479819, "grad_norm": 0.088675769235655, "learning_rate": 5.303183776735379e-06, "loss": 0.2758, "step": 2363 }, { "epoch": 1.3805561637836654, "grad_norm": 0.09193065772442259, "learning_rate": 5.294184237884865e-06, "loss": 0.317, "step": 2364 }, { "epoch": 1.3811400627691408, "grad_norm": 0.08420801319193513, "learning_rate": 5.2851895917641345e-06, "loss": 0.3221, "step": 2365 }, { "epoch": 1.3817239617546164, "grad_norm": 0.08141945212865613, "learning_rate": 5.276199847725098e-06, "loss": 0.2877, "step": 2366 }, { "epoch": 1.382307860740092, "grad_norm": 0.08823894250041983, "learning_rate": 5.267215015114574e-06, "loss": 0.2969, "step": 2367 }, { "epoch": 1.3828917597255674, "grad_norm": 0.08145815299269736, "learning_rate": 5.258235103274265e-06, "loss": 0.2868, "step": 2368 }, { "epoch": 1.383475658711043, "grad_norm": 0.08707439961977498, "learning_rate": 5.249260121540772e-06, "loss": 0.3058, "step": 2369 }, { "epoch": 1.3840595576965184, "grad_norm": 0.08837249401443022, "learning_rate": 5.240290079245555e-06, "loss": 0.2824, "step": 2370 }, { "epoch": 1.384643456681994, "grad_norm": 0.08748505613404073, "learning_rate": 5.231324985714942e-06, "loss": 0.3047, "step": 2371 }, { "epoch": 1.3852273556674695, "grad_norm": 0.09612531396696913, "learning_rate": 5.222364850270125e-06, "loss": 0.3445, "step": 2372 }, { "epoch": 1.385811254652945, "grad_norm": 0.08417569682852692, "learning_rate": 5.213409682227129e-06, "loss": 0.3272, "step": 2373 }, { "epoch": 1.3863951536384205, "grad_norm": 0.09258441524959074, "learning_rate": 5.204459490896818e-06, "loss": 0.3245, "step": 2374 }, { "epoch": 1.386979052623896, "grad_norm": 0.08283856295196736, "learning_rate": 5.195514285584893e-06, "loss": 0.3075, "step": 2375 }, { "epoch": 1.3875629516093717, "grad_norm": 0.08958940112437727, "learning_rate": 5.1865740755918496e-06, "loss": 0.3061, "step": 2376 }, { "epoch": 1.388146850594847, "grad_norm": 0.09418867762321148, "learning_rate": 5.177638870213008e-06, "loss": 0.355, "step": 2377 }, { "epoch": 1.3887307495803225, "grad_norm": 0.08585638187101732, "learning_rate": 5.1687086787384786e-06, "loss": 0.3001, "step": 2378 }, { "epoch": 1.3893146485657981, "grad_norm": 0.08797467893223045, "learning_rate": 5.1597835104531514e-06, "loss": 0.32, "step": 2379 }, { "epoch": 1.3898985475512737, "grad_norm": 0.07530589210078502, "learning_rate": 5.1508633746367075e-06, "loss": 0.2661, "step": 2380 }, { "epoch": 1.3904824465367491, "grad_norm": 0.08314657989345382, "learning_rate": 5.141948280563582e-06, "loss": 0.3088, "step": 2381 }, { "epoch": 1.3910663455222245, "grad_norm": 0.08535685168215891, "learning_rate": 5.133038237502983e-06, "loss": 0.3038, "step": 2382 }, { "epoch": 1.3916502445077001, "grad_norm": 0.0858544206039079, "learning_rate": 5.1241332547188535e-06, "loss": 0.2899, "step": 2383 }, { "epoch": 1.3922341434931758, "grad_norm": 0.08359145474217688, "learning_rate": 5.1152333414698774e-06, "loss": 0.3179, "step": 2384 }, { "epoch": 1.3928180424786512, "grad_norm": 0.08900651910131571, "learning_rate": 5.106338507009478e-06, "loss": 0.2963, "step": 2385 }, { "epoch": 1.3934019414641268, "grad_norm": 0.08826004830242888, "learning_rate": 5.097448760585784e-06, "loss": 0.3162, "step": 2386 }, { "epoch": 1.3939858404496022, "grad_norm": 0.08551953348179987, "learning_rate": 5.088564111441645e-06, "loss": 0.3026, "step": 2387 }, { "epoch": 1.3945697394350778, "grad_norm": 0.08798355433901985, "learning_rate": 5.079684568814607e-06, "loss": 0.2937, "step": 2388 }, { "epoch": 1.3951536384205532, "grad_norm": 0.08251965242144757, "learning_rate": 5.070810141936901e-06, "loss": 0.2716, "step": 2389 }, { "epoch": 1.3957375374060288, "grad_norm": 0.0908530110955334, "learning_rate": 5.06194084003545e-06, "loss": 0.3334, "step": 2390 }, { "epoch": 1.3963214363915042, "grad_norm": 0.08155687151239266, "learning_rate": 5.053076672331837e-06, "loss": 0.2954, "step": 2391 }, { "epoch": 1.3969053353769798, "grad_norm": 0.08496431057171239, "learning_rate": 5.04421764804232e-06, "loss": 0.2962, "step": 2392 }, { "epoch": 1.3974892343624554, "grad_norm": 0.19611468182210692, "learning_rate": 5.035363776377797e-06, "loss": 0.2906, "step": 2393 }, { "epoch": 1.3980731333479308, "grad_norm": 0.095687665438837, "learning_rate": 5.026515066543813e-06, "loss": 0.3329, "step": 2394 }, { "epoch": 1.3986570323334062, "grad_norm": 0.09090191636657588, "learning_rate": 5.017671527740551e-06, "loss": 0.3164, "step": 2395 }, { "epoch": 1.3992409313188818, "grad_norm": 0.08681438627734209, "learning_rate": 5.008833169162805e-06, "loss": 0.2904, "step": 2396 }, { "epoch": 1.3998248303043574, "grad_norm": 0.0886026215131492, "learning_rate": 5.000000000000003e-06, "loss": 0.3078, "step": 2397 }, { "epoch": 1.4004087292898328, "grad_norm": 0.09269793849209079, "learning_rate": 4.991172029436157e-06, "loss": 0.3177, "step": 2398 }, { "epoch": 1.4009926282753082, "grad_norm": 0.08245909475099567, "learning_rate": 4.982349266649884e-06, "loss": 0.2883, "step": 2399 }, { "epoch": 1.4015765272607839, "grad_norm": 0.07925661546153173, "learning_rate": 4.97353172081439e-06, "loss": 0.2545, "step": 2400 }, { "epoch": 1.4021604262462595, "grad_norm": 0.09190476286483397, "learning_rate": 4.964719401097444e-06, "loss": 0.3407, "step": 2401 }, { "epoch": 1.4027443252317349, "grad_norm": 0.07882518859441015, "learning_rate": 4.9559123166613935e-06, "loss": 0.2805, "step": 2402 }, { "epoch": 1.4033282242172105, "grad_norm": 0.0962025740641383, "learning_rate": 4.947110476663143e-06, "loss": 0.345, "step": 2403 }, { "epoch": 1.4039121232026859, "grad_norm": 0.08222011699008967, "learning_rate": 4.93831389025413e-06, "loss": 0.2712, "step": 2404 }, { "epoch": 1.4044960221881615, "grad_norm": 0.09044195338709132, "learning_rate": 4.9295225665803504e-06, "loss": 0.2615, "step": 2405 }, { "epoch": 1.405079921173637, "grad_norm": 0.10380703384461384, "learning_rate": 4.9207365147823114e-06, "loss": 0.3319, "step": 2406 }, { "epoch": 1.4056638201591125, "grad_norm": 0.08802497670252618, "learning_rate": 4.911955743995042e-06, "loss": 0.3575, "step": 2407 }, { "epoch": 1.406247719144588, "grad_norm": 0.07908320799192949, "learning_rate": 4.90318026334809e-06, "loss": 0.2909, "step": 2408 }, { "epoch": 1.4068316181300635, "grad_norm": 0.08880292406186104, "learning_rate": 4.894410081965489e-06, "loss": 0.3147, "step": 2409 }, { "epoch": 1.4074155171155391, "grad_norm": 0.08228389323216383, "learning_rate": 4.885645208965779e-06, "loss": 0.2677, "step": 2410 }, { "epoch": 1.4079994161010145, "grad_norm": 0.09225713052080889, "learning_rate": 4.876885653461967e-06, "loss": 0.3545, "step": 2411 }, { "epoch": 1.40858331508649, "grad_norm": 0.08283192348774582, "learning_rate": 4.868131424561532e-06, "loss": 0.2809, "step": 2412 }, { "epoch": 1.4091672140719655, "grad_norm": 0.08275293695820472, "learning_rate": 4.859382531366428e-06, "loss": 0.3019, "step": 2413 }, { "epoch": 1.4097511130574412, "grad_norm": 0.0876853873291279, "learning_rate": 4.850638982973043e-06, "loss": 0.3043, "step": 2414 }, { "epoch": 1.4103350120429166, "grad_norm": 0.09064864325240873, "learning_rate": 4.841900788472227e-06, "loss": 0.288, "step": 2415 }, { "epoch": 1.410918911028392, "grad_norm": 0.08833463730195347, "learning_rate": 4.833167956949249e-06, "loss": 0.3059, "step": 2416 }, { "epoch": 1.4115028100138676, "grad_norm": 0.08530175368598045, "learning_rate": 4.824440497483802e-06, "loss": 0.2761, "step": 2417 }, { "epoch": 1.4120867089993432, "grad_norm": 0.08944382148226326, "learning_rate": 4.815718419150007e-06, "loss": 0.2858, "step": 2418 }, { "epoch": 1.4126706079848186, "grad_norm": 0.09469842984958769, "learning_rate": 4.807001731016374e-06, "loss": 0.3568, "step": 2419 }, { "epoch": 1.4132545069702942, "grad_norm": 0.0888957240954914, "learning_rate": 4.7982904421458245e-06, "loss": 0.307, "step": 2420 }, { "epoch": 1.4138384059557696, "grad_norm": 0.09069508181877699, "learning_rate": 4.789584561595651e-06, "loss": 0.3133, "step": 2421 }, { "epoch": 1.4144223049412452, "grad_norm": 0.09626795867612213, "learning_rate": 4.780884098417531e-06, "loss": 0.3355, "step": 2422 }, { "epoch": 1.4150062039267206, "grad_norm": 0.08983173254840386, "learning_rate": 4.772189061657511e-06, "loss": 0.299, "step": 2423 }, { "epoch": 1.4155901029121962, "grad_norm": 0.08777180579587864, "learning_rate": 4.763499460355988e-06, "loss": 0.2808, "step": 2424 }, { "epoch": 1.4161740018976716, "grad_norm": 0.09173728497142374, "learning_rate": 4.7548153035477185e-06, "loss": 0.309, "step": 2425 }, { "epoch": 1.4167579008831472, "grad_norm": 0.08551191537584889, "learning_rate": 4.746136600261791e-06, "loss": 0.2892, "step": 2426 }, { "epoch": 1.4173417998686229, "grad_norm": 0.08385868644842706, "learning_rate": 4.737463359521618e-06, "loss": 0.3034, "step": 2427 }, { "epoch": 1.4179256988540982, "grad_norm": 0.08870467714285539, "learning_rate": 4.728795590344948e-06, "loss": 0.3316, "step": 2428 }, { "epoch": 1.4185095978395736, "grad_norm": 0.08978595660458191, "learning_rate": 4.7201333017438266e-06, "loss": 0.3565, "step": 2429 }, { "epoch": 1.4190934968250493, "grad_norm": 0.08914861051012146, "learning_rate": 4.711476502724609e-06, "loss": 0.3129, "step": 2430 }, { "epoch": 1.4196773958105249, "grad_norm": 0.08361298688764628, "learning_rate": 4.702825202287944e-06, "loss": 0.2752, "step": 2431 }, { "epoch": 1.4202612947960003, "grad_norm": 0.09376399615130736, "learning_rate": 4.694179409428752e-06, "loss": 0.3332, "step": 2432 }, { "epoch": 1.4208451937814757, "grad_norm": 0.07890951860333585, "learning_rate": 4.685539133136244e-06, "loss": 0.255, "step": 2433 }, { "epoch": 1.4214290927669513, "grad_norm": 0.09390857814923426, "learning_rate": 4.6769043823938806e-06, "loss": 0.312, "step": 2434 }, { "epoch": 1.422012991752427, "grad_norm": 0.09665241957244797, "learning_rate": 4.668275166179383e-06, "loss": 0.3612, "step": 2435 }, { "epoch": 1.4225968907379023, "grad_norm": 0.08679031467366999, "learning_rate": 4.659651493464721e-06, "loss": 0.2892, "step": 2436 }, { "epoch": 1.423180789723378, "grad_norm": 0.08809944973539569, "learning_rate": 4.6510333732160915e-06, "loss": 0.3154, "step": 2437 }, { "epoch": 1.4237646887088533, "grad_norm": 0.07956349662721895, "learning_rate": 4.642420814393934e-06, "loss": 0.2847, "step": 2438 }, { "epoch": 1.424348587694329, "grad_norm": 0.09107111297386485, "learning_rate": 4.633813825952892e-06, "loss": 0.2961, "step": 2439 }, { "epoch": 1.4249324866798043, "grad_norm": 0.08455438463936661, "learning_rate": 4.625212416841816e-06, "loss": 0.3311, "step": 2440 }, { "epoch": 1.42551638566528, "grad_norm": 0.09124093366309591, "learning_rate": 4.616616596003772e-06, "loss": 0.37, "step": 2441 }, { "epoch": 1.4261002846507553, "grad_norm": 0.09617684882847008, "learning_rate": 4.6080263723759955e-06, "loss": 0.3477, "step": 2442 }, { "epoch": 1.426684183636231, "grad_norm": 0.09937035743586801, "learning_rate": 4.599441754889919e-06, "loss": 0.3686, "step": 2443 }, { "epoch": 1.4272680826217066, "grad_norm": 0.09639926900641384, "learning_rate": 4.590862752471138e-06, "loss": 0.3402, "step": 2444 }, { "epoch": 1.427851981607182, "grad_norm": 0.09075673149061966, "learning_rate": 4.582289374039405e-06, "loss": 0.2908, "step": 2445 }, { "epoch": 1.4284358805926574, "grad_norm": 0.08945575601215268, "learning_rate": 4.573721628508638e-06, "loss": 0.3264, "step": 2446 }, { "epoch": 1.429019779578133, "grad_norm": 0.08594402327289082, "learning_rate": 4.565159524786888e-06, "loss": 0.2966, "step": 2447 }, { "epoch": 1.4296036785636086, "grad_norm": 0.08386567772979121, "learning_rate": 4.556603071776347e-06, "loss": 0.2626, "step": 2448 }, { "epoch": 1.430187577549084, "grad_norm": 0.0855687483967937, "learning_rate": 4.548052278373327e-06, "loss": 0.302, "step": 2449 }, { "epoch": 1.4307714765345594, "grad_norm": 0.08782788535707856, "learning_rate": 4.539507153468254e-06, "loss": 0.309, "step": 2450 }, { "epoch": 1.431355375520035, "grad_norm": 0.09134483481057566, "learning_rate": 4.530967705945668e-06, "loss": 0.3172, "step": 2451 }, { "epoch": 1.4319392745055106, "grad_norm": 0.09492071958216543, "learning_rate": 4.522433944684197e-06, "loss": 0.3378, "step": 2452 }, { "epoch": 1.432523173490986, "grad_norm": 0.07747150922844902, "learning_rate": 4.513905878556568e-06, "loss": 0.2645, "step": 2453 }, { "epoch": 1.4331070724764616, "grad_norm": 0.09046928761921769, "learning_rate": 4.505383516429577e-06, "loss": 0.32, "step": 2454 }, { "epoch": 1.433690971461937, "grad_norm": 0.08360240047077284, "learning_rate": 4.496866867164087e-06, "loss": 0.3102, "step": 2455 }, { "epoch": 1.4342748704474126, "grad_norm": 0.08260333306950611, "learning_rate": 4.488355939615035e-06, "loss": 0.2856, "step": 2456 }, { "epoch": 1.434858769432888, "grad_norm": 0.08666872758262609, "learning_rate": 4.479850742631396e-06, "loss": 0.3235, "step": 2457 }, { "epoch": 1.4354426684183637, "grad_norm": 0.08029891789529729, "learning_rate": 4.471351285056192e-06, "loss": 0.3019, "step": 2458 }, { "epoch": 1.436026567403839, "grad_norm": 0.0846764450216516, "learning_rate": 4.462857575726482e-06, "loss": 0.2817, "step": 2459 }, { "epoch": 1.4366104663893147, "grad_norm": 0.08998043940656068, "learning_rate": 4.454369623473337e-06, "loss": 0.3103, "step": 2460 }, { "epoch": 1.4371943653747903, "grad_norm": 0.08684253448416031, "learning_rate": 4.445887437121855e-06, "loss": 0.2973, "step": 2461 }, { "epoch": 1.4377782643602657, "grad_norm": 0.08390985815531454, "learning_rate": 4.437411025491131e-06, "loss": 0.2901, "step": 2462 }, { "epoch": 1.438362163345741, "grad_norm": 0.09082579239414162, "learning_rate": 4.428940397394253e-06, "loss": 0.3097, "step": 2463 }, { "epoch": 1.4389460623312167, "grad_norm": 0.08932299274697621, "learning_rate": 4.420475561638309e-06, "loss": 0.3002, "step": 2464 }, { "epoch": 1.4395299613166923, "grad_norm": 0.09119277065316575, "learning_rate": 4.412016527024348e-06, "loss": 0.2787, "step": 2465 }, { "epoch": 1.4401138603021677, "grad_norm": 0.08940012216307111, "learning_rate": 4.4035633023474035e-06, "loss": 0.2968, "step": 2466 }, { "epoch": 1.4406977592876433, "grad_norm": 0.09343225280444262, "learning_rate": 4.395115896396457e-06, "loss": 0.338, "step": 2467 }, { "epoch": 1.4412816582731187, "grad_norm": 0.08503853470585143, "learning_rate": 4.386674317954439e-06, "loss": 0.2777, "step": 2468 }, { "epoch": 1.4418655572585943, "grad_norm": 0.09464191551199677, "learning_rate": 4.378238575798233e-06, "loss": 0.3224, "step": 2469 }, { "epoch": 1.4424494562440697, "grad_norm": 0.09044391056882807, "learning_rate": 4.3698086786986425e-06, "loss": 0.3223, "step": 2470 }, { "epoch": 1.4430333552295453, "grad_norm": 0.08994435752528243, "learning_rate": 4.3613846354204025e-06, "loss": 0.3014, "step": 2471 }, { "epoch": 1.4436172542150207, "grad_norm": 0.08773048884953383, "learning_rate": 4.352966454722155e-06, "loss": 0.3197, "step": 2472 }, { "epoch": 1.4442011532004964, "grad_norm": 0.09284819853545422, "learning_rate": 4.344554145356447e-06, "loss": 0.335, "step": 2473 }, { "epoch": 1.4447850521859718, "grad_norm": 0.08477141848334653, "learning_rate": 4.336147716069727e-06, "loss": 0.292, "step": 2474 }, { "epoch": 1.4453689511714474, "grad_norm": 0.09222768594723178, "learning_rate": 4.327747175602321e-06, "loss": 0.3368, "step": 2475 }, { "epoch": 1.4459528501569228, "grad_norm": 0.09272587131339775, "learning_rate": 4.319352532688444e-06, "loss": 0.3149, "step": 2476 }, { "epoch": 1.4465367491423984, "grad_norm": 0.08772198701073568, "learning_rate": 4.310963796056168e-06, "loss": 0.2817, "step": 2477 }, { "epoch": 1.447120648127874, "grad_norm": 0.0865785166874937, "learning_rate": 4.302580974427426e-06, "loss": 0.2899, "step": 2478 }, { "epoch": 1.4477045471133494, "grad_norm": 0.08977098435875924, "learning_rate": 4.29420407651801e-06, "loss": 0.3269, "step": 2479 }, { "epoch": 1.4482884460988248, "grad_norm": 0.09597846802090457, "learning_rate": 4.2858331110375406e-06, "loss": 0.3273, "step": 2480 }, { "epoch": 1.4488723450843004, "grad_norm": 0.09034246583476581, "learning_rate": 4.277468086689481e-06, "loss": 0.3183, "step": 2481 }, { "epoch": 1.449456244069776, "grad_norm": 0.09309452822283062, "learning_rate": 4.269109012171112e-06, "loss": 0.2997, "step": 2482 }, { "epoch": 1.4500401430552514, "grad_norm": 0.10016504775993754, "learning_rate": 4.260755896173523e-06, "loss": 0.3542, "step": 2483 }, { "epoch": 1.450624042040727, "grad_norm": 0.08940041061370212, "learning_rate": 4.252408747381622e-06, "loss": 0.2929, "step": 2484 }, { "epoch": 1.4512079410262024, "grad_norm": 0.08504323889580603, "learning_rate": 4.244067574474098e-06, "loss": 0.294, "step": 2485 }, { "epoch": 1.451791840011678, "grad_norm": 0.09137207477692262, "learning_rate": 4.235732386123437e-06, "loss": 0.3395, "step": 2486 }, { "epoch": 1.4523757389971534, "grad_norm": 0.08391891299681614, "learning_rate": 4.227403190995901e-06, "loss": 0.285, "step": 2487 }, { "epoch": 1.452959637982629, "grad_norm": 0.09288924230607497, "learning_rate": 4.219079997751515e-06, "loss": 0.3353, "step": 2488 }, { "epoch": 1.4535435369681045, "grad_norm": 0.09423771111767573, "learning_rate": 4.210762815044073e-06, "loss": 0.34, "step": 2489 }, { "epoch": 1.45412743595358, "grad_norm": 0.08188334891161637, "learning_rate": 4.20245165152111e-06, "loss": 0.2898, "step": 2490 }, { "epoch": 1.4547113349390555, "grad_norm": 0.08612592544836102, "learning_rate": 4.194146515823906e-06, "loss": 0.3023, "step": 2491 }, { "epoch": 1.455295233924531, "grad_norm": 0.09546324447908233, "learning_rate": 4.185847416587481e-06, "loss": 0.3234, "step": 2492 }, { "epoch": 1.4558791329100065, "grad_norm": 0.08837130004703611, "learning_rate": 4.177554362440565e-06, "loss": 0.3016, "step": 2493 }, { "epoch": 1.456463031895482, "grad_norm": 0.08839245884042338, "learning_rate": 4.169267362005619e-06, "loss": 0.2954, "step": 2494 }, { "epoch": 1.4570469308809577, "grad_norm": 0.08986078449771986, "learning_rate": 4.160986423898798e-06, "loss": 0.3226, "step": 2495 }, { "epoch": 1.457630829866433, "grad_norm": 0.08173885018251742, "learning_rate": 4.15271155672995e-06, "loss": 0.2805, "step": 2496 }, { "epoch": 1.4582147288519085, "grad_norm": 0.08938821408057002, "learning_rate": 4.14444276910263e-06, "loss": 0.3053, "step": 2497 }, { "epoch": 1.4587986278373841, "grad_norm": 0.0855818759882735, "learning_rate": 4.1361800696140505e-06, "loss": 0.2964, "step": 2498 }, { "epoch": 1.4593825268228597, "grad_norm": 0.09074187855969246, "learning_rate": 4.127923466855111e-06, "loss": 0.3014, "step": 2499 }, { "epoch": 1.4599664258083351, "grad_norm": 0.08627549570454388, "learning_rate": 4.119672969410362e-06, "loss": 0.307, "step": 2500 }, { "epoch": 1.4605503247938108, "grad_norm": 0.09103512138632662, "learning_rate": 4.111428585858005e-06, "loss": 0.3446, "step": 2501 }, { "epoch": 1.4611342237792861, "grad_norm": 0.07987527568938645, "learning_rate": 4.103190324769895e-06, "loss": 0.2731, "step": 2502 }, { "epoch": 1.4617181227647618, "grad_norm": 0.08776026365766984, "learning_rate": 4.0949581947115106e-06, "loss": 0.3206, "step": 2503 }, { "epoch": 1.4623020217502372, "grad_norm": 0.08127689612210375, "learning_rate": 4.086732204241964e-06, "loss": 0.2813, "step": 2504 }, { "epoch": 1.4628859207357128, "grad_norm": 0.09558283921303373, "learning_rate": 4.07851236191398e-06, "loss": 0.3659, "step": 2505 }, { "epoch": 1.4634698197211882, "grad_norm": 0.09903267657093881, "learning_rate": 4.070298676273886e-06, "loss": 0.294, "step": 2506 }, { "epoch": 1.4640537187066638, "grad_norm": 0.08371769553862034, "learning_rate": 4.06209115586162e-06, "loss": 0.3044, "step": 2507 }, { "epoch": 1.4646376176921392, "grad_norm": 0.09094319821894639, "learning_rate": 4.053889809210698e-06, "loss": 0.3514, "step": 2508 }, { "epoch": 1.4652215166776148, "grad_norm": 0.08274328243773552, "learning_rate": 4.045694644848228e-06, "loss": 0.2968, "step": 2509 }, { "epoch": 1.4658054156630902, "grad_norm": 0.08839535193181415, "learning_rate": 4.037505671294883e-06, "loss": 0.3527, "step": 2510 }, { "epoch": 1.4663893146485658, "grad_norm": 0.08982283519911273, "learning_rate": 4.0293228970648955e-06, "loss": 0.3453, "step": 2511 }, { "epoch": 1.4669732136340414, "grad_norm": 0.09423959942826715, "learning_rate": 4.021146330666065e-06, "loss": 0.331, "step": 2512 }, { "epoch": 1.4675571126195168, "grad_norm": 0.08561368707261134, "learning_rate": 4.012975980599724e-06, "loss": 0.2874, "step": 2513 }, { "epoch": 1.4681410116049922, "grad_norm": 0.1011586649563937, "learning_rate": 4.0048118553607485e-06, "loss": 0.3387, "step": 2514 }, { "epoch": 1.4687249105904678, "grad_norm": 0.09471540620049917, "learning_rate": 3.996653963437546e-06, "loss": 0.3127, "step": 2515 }, { "epoch": 1.4693088095759435, "grad_norm": 0.08904187859563029, "learning_rate": 3.98850231331203e-06, "loss": 0.329, "step": 2516 }, { "epoch": 1.4698927085614188, "grad_norm": 0.08770976369594169, "learning_rate": 3.980356913459642e-06, "loss": 0.32, "step": 2517 }, { "epoch": 1.4704766075468945, "grad_norm": 0.08447642656753152, "learning_rate": 3.972217772349309e-06, "loss": 0.279, "step": 2518 }, { "epoch": 1.4710605065323699, "grad_norm": 0.08866408144538242, "learning_rate": 3.9640848984434556e-06, "loss": 0.294, "step": 2519 }, { "epoch": 1.4716444055178455, "grad_norm": 0.08781911996903856, "learning_rate": 3.955958300197998e-06, "loss": 0.3259, "step": 2520 }, { "epoch": 1.4722283045033209, "grad_norm": 0.08228239517683485, "learning_rate": 3.947837986062314e-06, "loss": 0.2861, "step": 2521 }, { "epoch": 1.4728122034887965, "grad_norm": 0.08572653614882687, "learning_rate": 3.939723964479262e-06, "loss": 0.2991, "step": 2522 }, { "epoch": 1.4733961024742719, "grad_norm": 0.08464043576699666, "learning_rate": 3.931616243885148e-06, "loss": 0.3087, "step": 2523 }, { "epoch": 1.4739800014597475, "grad_norm": 0.08346673922355509, "learning_rate": 3.923514832709725e-06, "loss": 0.3242, "step": 2524 }, { "epoch": 1.474563900445223, "grad_norm": 0.08437789777975496, "learning_rate": 3.915419739376198e-06, "loss": 0.3252, "step": 2525 }, { "epoch": 1.4751477994306985, "grad_norm": 0.09045875420080554, "learning_rate": 3.90733097230119e-06, "loss": 0.2951, "step": 2526 }, { "epoch": 1.475731698416174, "grad_norm": 0.08945764545238996, "learning_rate": 3.899248539894756e-06, "loss": 0.304, "step": 2527 }, { "epoch": 1.4763155974016495, "grad_norm": 0.08515589620324529, "learning_rate": 3.891172450560362e-06, "loss": 0.2809, "step": 2528 }, { "epoch": 1.4768994963871251, "grad_norm": 0.08929818608105325, "learning_rate": 3.883102712694871e-06, "loss": 0.3189, "step": 2529 }, { "epoch": 1.4774833953726005, "grad_norm": 0.09480210083573734, "learning_rate": 3.875039334688556e-06, "loss": 0.3238, "step": 2530 }, { "epoch": 1.478067294358076, "grad_norm": 0.08487548349000108, "learning_rate": 3.866982324925066e-06, "loss": 0.3015, "step": 2531 }, { "epoch": 1.4786511933435516, "grad_norm": 0.09111796031467179, "learning_rate": 3.858931691781439e-06, "loss": 0.3091, "step": 2532 }, { "epoch": 1.4792350923290272, "grad_norm": 0.08847483753904392, "learning_rate": 3.850887443628075e-06, "loss": 0.3139, "step": 2533 }, { "epoch": 1.4798189913145026, "grad_norm": 0.09243315512957417, "learning_rate": 3.842849588828733e-06, "loss": 0.3263, "step": 2534 }, { "epoch": 1.4804028902999782, "grad_norm": 0.085897299905612, "learning_rate": 3.834818135740539e-06, "loss": 0.2931, "step": 2535 }, { "epoch": 1.4809867892854536, "grad_norm": 0.08584981520488316, "learning_rate": 3.826793092713944e-06, "loss": 0.3193, "step": 2536 }, { "epoch": 1.4815706882709292, "grad_norm": 0.09020973986278204, "learning_rate": 3.818774468092754e-06, "loss": 0.3274, "step": 2537 }, { "epoch": 1.4821545872564046, "grad_norm": 0.08847130714811804, "learning_rate": 3.8107622702140856e-06, "loss": 0.3271, "step": 2538 }, { "epoch": 1.4827384862418802, "grad_norm": 0.08837696276867985, "learning_rate": 3.802756507408377e-06, "loss": 0.3613, "step": 2539 }, { "epoch": 1.4833223852273556, "grad_norm": 0.0820952160876996, "learning_rate": 3.794757187999386e-06, "loss": 0.3146, "step": 2540 }, { "epoch": 1.4839062842128312, "grad_norm": 0.08617968393312178, "learning_rate": 3.7867643203041548e-06, "loss": 0.3286, "step": 2541 }, { "epoch": 1.4844901831983066, "grad_norm": 0.08509166418897073, "learning_rate": 3.7787779126330314e-06, "loss": 0.3038, "step": 2542 }, { "epoch": 1.4850740821837822, "grad_norm": 0.08762817353834146, "learning_rate": 3.770797973289644e-06, "loss": 0.3388, "step": 2543 }, { "epoch": 1.4856579811692576, "grad_norm": 0.07813603690366723, "learning_rate": 3.762824510570887e-06, "loss": 0.2538, "step": 2544 }, { "epoch": 1.4862418801547332, "grad_norm": 0.08772781330096498, "learning_rate": 3.7548575327669345e-06, "loss": 0.3025, "step": 2545 }, { "epoch": 1.4868257791402089, "grad_norm": 0.08439272459408455, "learning_rate": 3.7468970481612077e-06, "loss": 0.2756, "step": 2546 }, { "epoch": 1.4874096781256843, "grad_norm": 0.08415301327448581, "learning_rate": 3.738943065030376e-06, "loss": 0.3037, "step": 2547 }, { "epoch": 1.4879935771111596, "grad_norm": 0.08020955651615735, "learning_rate": 3.7309955916443597e-06, "loss": 0.2566, "step": 2548 }, { "epoch": 1.4885774760966353, "grad_norm": 0.08475681898410016, "learning_rate": 3.723054636266299e-06, "loss": 0.3214, "step": 2549 }, { "epoch": 1.4891613750821109, "grad_norm": 0.08395111864687037, "learning_rate": 3.715120207152567e-06, "loss": 0.3087, "step": 2550 }, { "epoch": 1.4897452740675863, "grad_norm": 0.08828581576765251, "learning_rate": 3.7071923125527444e-06, "loss": 0.3091, "step": 2551 }, { "epoch": 1.490329173053062, "grad_norm": 0.09085619638935687, "learning_rate": 3.6992709607096167e-06, "loss": 0.3211, "step": 2552 }, { "epoch": 1.4909130720385373, "grad_norm": 0.08744393461342108, "learning_rate": 3.6913561598591775e-06, "loss": 0.3005, "step": 2553 }, { "epoch": 1.491496971024013, "grad_norm": 0.08617856546714978, "learning_rate": 3.683447918230594e-06, "loss": 0.3342, "step": 2554 }, { "epoch": 1.4920808700094883, "grad_norm": 0.10452696581342298, "learning_rate": 3.6755462440462288e-06, "loss": 0.3821, "step": 2555 }, { "epoch": 1.492664768994964, "grad_norm": 0.08970303890991523, "learning_rate": 3.6676511455216056e-06, "loss": 0.3261, "step": 2556 }, { "epoch": 1.4932486679804393, "grad_norm": 0.08559211165163409, "learning_rate": 3.659762630865411e-06, "loss": 0.293, "step": 2557 }, { "epoch": 1.493832566965915, "grad_norm": 0.08740800223318528, "learning_rate": 3.651880708279497e-06, "loss": 0.3093, "step": 2558 }, { "epoch": 1.4944164659513903, "grad_norm": 0.09043523642253222, "learning_rate": 3.6440053859588478e-06, "loss": 0.2884, "step": 2559 }, { "epoch": 1.495000364936866, "grad_norm": 0.08789420326852868, "learning_rate": 3.636136672091598e-06, "loss": 0.3407, "step": 2560 }, { "epoch": 1.4955842639223413, "grad_norm": 0.07936392264516332, "learning_rate": 3.628274574859002e-06, "loss": 0.3026, "step": 2561 }, { "epoch": 1.496168162907817, "grad_norm": 0.08738729636957897, "learning_rate": 3.6204191024354352e-06, "loss": 0.3312, "step": 2562 }, { "epoch": 1.4967520618932926, "grad_norm": 0.09158707138343951, "learning_rate": 3.612570262988393e-06, "loss": 0.3265, "step": 2563 }, { "epoch": 1.497335960878768, "grad_norm": 0.0808331990218285, "learning_rate": 3.604728064678464e-06, "loss": 0.2509, "step": 2564 }, { "epoch": 1.4979198598642434, "grad_norm": 0.0967651918626877, "learning_rate": 3.5968925156593426e-06, "loss": 0.3239, "step": 2565 }, { "epoch": 1.498503758849719, "grad_norm": 0.08627203347654368, "learning_rate": 3.589063624077802e-06, "loss": 0.3285, "step": 2566 }, { "epoch": 1.4990876578351946, "grad_norm": 0.09011784621956505, "learning_rate": 3.5812413980736916e-06, "loss": 0.3506, "step": 2567 }, { "epoch": 1.49967155682067, "grad_norm": 0.09355680812080493, "learning_rate": 3.5734258457799407e-06, "loss": 0.3134, "step": 2568 }, { "epoch": 1.5002554558061454, "grad_norm": 0.0956369919306679, "learning_rate": 3.5656169753225278e-06, "loss": 0.3431, "step": 2569 }, { "epoch": 1.500839354791621, "grad_norm": 0.08560258262197865, "learning_rate": 3.5578147948204934e-06, "loss": 0.3056, "step": 2570 }, { "epoch": 1.5014232537770966, "grad_norm": 0.08592770639923118, "learning_rate": 3.5500193123859227e-06, "loss": 0.2954, "step": 2571 }, { "epoch": 1.5020071527625722, "grad_norm": 0.08558040389355198, "learning_rate": 3.542230536123925e-06, "loss": 0.2792, "step": 2572 }, { "epoch": 1.5025910517480476, "grad_norm": 0.08698262657251617, "learning_rate": 3.5344484741326533e-06, "loss": 0.3157, "step": 2573 }, { "epoch": 1.503174950733523, "grad_norm": 0.09011790824386029, "learning_rate": 3.526673134503267e-06, "loss": 0.3072, "step": 2574 }, { "epoch": 1.5037588497189986, "grad_norm": 0.08906979956600251, "learning_rate": 3.5189045253199384e-06, "loss": 0.3406, "step": 2575 }, { "epoch": 1.5043427487044743, "grad_norm": 0.090780296331361, "learning_rate": 3.51114265465985e-06, "loss": 0.3098, "step": 2576 }, { "epoch": 1.5049266476899497, "grad_norm": 0.09291148302705976, "learning_rate": 3.5033875305931662e-06, "loss": 0.3038, "step": 2577 }, { "epoch": 1.505510546675425, "grad_norm": 0.09041323610809082, "learning_rate": 3.4956391611830486e-06, "loss": 0.3244, "step": 2578 }, { "epoch": 1.5060944456609007, "grad_norm": 0.07897756228062049, "learning_rate": 3.4878975544856285e-06, "loss": 0.29, "step": 2579 }, { "epoch": 1.5066783446463763, "grad_norm": 0.08209072863366663, "learning_rate": 3.4801627185500033e-06, "loss": 0.3042, "step": 2580 }, { "epoch": 1.5072622436318517, "grad_norm": 0.0775808347769155, "learning_rate": 3.4724346614182427e-06, "loss": 0.2709, "step": 2581 }, { "epoch": 1.507846142617327, "grad_norm": 0.0872331001298501, "learning_rate": 3.4647133911253516e-06, "loss": 0.2939, "step": 2582 }, { "epoch": 1.5084300416028027, "grad_norm": 0.08502846036544637, "learning_rate": 3.4569989156992965e-06, "loss": 0.3212, "step": 2583 }, { "epoch": 1.5090139405882783, "grad_norm": 0.0891904106809121, "learning_rate": 3.449291243160966e-06, "loss": 0.3032, "step": 2584 }, { "epoch": 1.5095978395737537, "grad_norm": 0.10850831296739284, "learning_rate": 3.4415903815241757e-06, "loss": 0.2992, "step": 2585 }, { "epoch": 1.510181738559229, "grad_norm": 0.08183045876257128, "learning_rate": 3.4338963387956726e-06, "loss": 0.3018, "step": 2586 }, { "epoch": 1.5107656375447047, "grad_norm": 0.08339545442497565, "learning_rate": 3.4262091229750973e-06, "loss": 0.2815, "step": 2587 }, { "epoch": 1.5113495365301803, "grad_norm": 0.08670189088172164, "learning_rate": 3.418528742055006e-06, "loss": 0.3242, "step": 2588 }, { "epoch": 1.511933435515656, "grad_norm": 0.08998406316906789, "learning_rate": 3.4108552040208408e-06, "loss": 0.3372, "step": 2589 }, { "epoch": 1.5125173345011313, "grad_norm": 0.0967109966747691, "learning_rate": 3.403188516850927e-06, "loss": 0.3204, "step": 2590 }, { "epoch": 1.5131012334866067, "grad_norm": 0.08755860111119636, "learning_rate": 3.3955286885164786e-06, "loss": 0.3203, "step": 2591 }, { "epoch": 1.5136851324720824, "grad_norm": 0.0892164128928644, "learning_rate": 3.387875726981563e-06, "loss": 0.3228, "step": 2592 }, { "epoch": 1.514269031457558, "grad_norm": 0.08965050332385861, "learning_rate": 3.3802296402031234e-06, "loss": 0.3135, "step": 2593 }, { "epoch": 1.5148529304430334, "grad_norm": 0.08506851865911927, "learning_rate": 3.3725904361309426e-06, "loss": 0.2764, "step": 2594 }, { "epoch": 1.5154368294285088, "grad_norm": 0.08135979363084735, "learning_rate": 3.36495812270765e-06, "loss": 0.2728, "step": 2595 }, { "epoch": 1.5160207284139844, "grad_norm": 0.0908600442963011, "learning_rate": 3.35733270786872e-06, "loss": 0.3022, "step": 2596 }, { "epoch": 1.51660462739946, "grad_norm": 0.08582860720433647, "learning_rate": 3.3497141995424397e-06, "loss": 0.3266, "step": 2597 }, { "epoch": 1.5171885263849354, "grad_norm": 0.07590055774766839, "learning_rate": 3.3421026056499273e-06, "loss": 0.2622, "step": 2598 }, { "epoch": 1.5177724253704108, "grad_norm": 0.08624629750291889, "learning_rate": 3.3344979341051108e-06, "loss": 0.342, "step": 2599 }, { "epoch": 1.5183563243558864, "grad_norm": 0.0885338251293027, "learning_rate": 3.3269001928147103e-06, "loss": 0.3168, "step": 2600 }, { "epoch": 1.518940223341362, "grad_norm": 0.10036001063195381, "learning_rate": 3.3193093896782546e-06, "loss": 0.3349, "step": 2601 }, { "epoch": 1.5195241223268374, "grad_norm": 0.09401125463258427, "learning_rate": 3.311725532588049e-06, "loss": 0.3189, "step": 2602 }, { "epoch": 1.5201080213123128, "grad_norm": 0.08546120929097303, "learning_rate": 3.3041486294291767e-06, "loss": 0.2776, "step": 2603 }, { "epoch": 1.5206919202977884, "grad_norm": 0.09404909150114718, "learning_rate": 3.2965786880795005e-06, "loss": 0.373, "step": 2604 }, { "epoch": 1.521275819283264, "grad_norm": 0.08965355732369851, "learning_rate": 3.2890157164096315e-06, "loss": 0.3105, "step": 2605 }, { "epoch": 1.5218597182687397, "grad_norm": 0.09270458508019058, "learning_rate": 3.2814597222829468e-06, "loss": 0.3136, "step": 2606 }, { "epoch": 1.522443617254215, "grad_norm": 0.08491935125322983, "learning_rate": 3.2739107135555603e-06, "loss": 0.3066, "step": 2607 }, { "epoch": 1.5230275162396905, "grad_norm": 0.09229957163214667, "learning_rate": 3.266368698076323e-06, "loss": 0.3105, "step": 2608 }, { "epoch": 1.523611415225166, "grad_norm": 0.08325789620104056, "learning_rate": 3.258833683686824e-06, "loss": 0.2797, "step": 2609 }, { "epoch": 1.5241953142106417, "grad_norm": 0.08554165793393224, "learning_rate": 3.251305678221359e-06, "loss": 0.2958, "step": 2610 }, { "epoch": 1.524779213196117, "grad_norm": 0.09017831511427662, "learning_rate": 3.2437846895069535e-06, "loss": 0.3156, "step": 2611 }, { "epoch": 1.5253631121815925, "grad_norm": 0.08314857906794532, "learning_rate": 3.236270725363323e-06, "loss": 0.31, "step": 2612 }, { "epoch": 1.525947011167068, "grad_norm": 0.08401629515284222, "learning_rate": 3.2287637936028814e-06, "loss": 0.2877, "step": 2613 }, { "epoch": 1.5265309101525437, "grad_norm": 0.08410461902519041, "learning_rate": 3.2212639020307423e-06, "loss": 0.3047, "step": 2614 }, { "epoch": 1.5271148091380191, "grad_norm": 0.09582422536519905, "learning_rate": 3.2137710584446837e-06, "loss": 0.3107, "step": 2615 }, { "epoch": 1.5276987081234945, "grad_norm": 0.09311496031680074, "learning_rate": 3.20628527063517e-06, "loss": 0.2954, "step": 2616 }, { "epoch": 1.5282826071089701, "grad_norm": 0.08616322224863718, "learning_rate": 3.1988065463853204e-06, "loss": 0.2922, "step": 2617 }, { "epoch": 1.5288665060944457, "grad_norm": 0.08670678547675068, "learning_rate": 3.1913348934709076e-06, "loss": 0.2969, "step": 2618 }, { "epoch": 1.5294504050799211, "grad_norm": 0.08803550506938573, "learning_rate": 3.183870319660365e-06, "loss": 0.3131, "step": 2619 }, { "epoch": 1.5300343040653965, "grad_norm": 0.08496535222670006, "learning_rate": 3.1764128327147515e-06, "loss": 0.3063, "step": 2620 }, { "epoch": 1.5306182030508722, "grad_norm": 0.09334574352755075, "learning_rate": 3.1689624403877685e-06, "loss": 0.3086, "step": 2621 }, { "epoch": 1.5312021020363478, "grad_norm": 0.08955971972714553, "learning_rate": 3.161519150425735e-06, "loss": 0.289, "step": 2622 }, { "epoch": 1.5317860010218234, "grad_norm": 0.08634076492853125, "learning_rate": 3.1540829705675835e-06, "loss": 0.2984, "step": 2623 }, { "epoch": 1.5323699000072988, "grad_norm": 0.08172975702325094, "learning_rate": 3.1466539085448624e-06, "loss": 0.2809, "step": 2624 }, { "epoch": 1.5329537989927742, "grad_norm": 0.09012121894916074, "learning_rate": 3.139231972081709e-06, "loss": 0.333, "step": 2625 }, { "epoch": 1.5335376979782498, "grad_norm": 0.08348655669989771, "learning_rate": 3.1318171688948618e-06, "loss": 0.2889, "step": 2626 }, { "epoch": 1.5341215969637254, "grad_norm": 0.08811068896988447, "learning_rate": 3.1244095066936396e-06, "loss": 0.2987, "step": 2627 }, { "epoch": 1.5347054959492008, "grad_norm": 0.08506802061427637, "learning_rate": 3.1170089931799296e-06, "loss": 0.2795, "step": 2628 }, { "epoch": 1.5352893949346762, "grad_norm": 0.08910588049406602, "learning_rate": 3.1096156360482e-06, "loss": 0.3049, "step": 2629 }, { "epoch": 1.5358732939201518, "grad_norm": 0.09253992336159036, "learning_rate": 3.102229442985466e-06, "loss": 0.2845, "step": 2630 }, { "epoch": 1.5364571929056274, "grad_norm": 0.08902678128724342, "learning_rate": 3.094850421671295e-06, "loss": 0.3252, "step": 2631 }, { "epoch": 1.5370410918911028, "grad_norm": 0.0825879367482042, "learning_rate": 3.0874785797778096e-06, "loss": 0.2775, "step": 2632 }, { "epoch": 1.5376249908765782, "grad_norm": 0.09089789892343554, "learning_rate": 3.080113924969652e-06, "loss": 0.3377, "step": 2633 }, { "epoch": 1.5382088898620538, "grad_norm": 0.09199138003873894, "learning_rate": 3.0727564649040066e-06, "loss": 0.3349, "step": 2634 }, { "epoch": 1.5387927888475295, "grad_norm": 0.08964667681825708, "learning_rate": 3.0654062072305667e-06, "loss": 0.3093, "step": 2635 }, { "epoch": 1.5393766878330049, "grad_norm": 0.0905273085661763, "learning_rate": 3.0580631595915368e-06, "loss": 0.2992, "step": 2636 }, { "epoch": 1.5399605868184802, "grad_norm": 0.09798939686655929, "learning_rate": 3.050727329621637e-06, "loss": 0.3557, "step": 2637 }, { "epoch": 1.5405444858039559, "grad_norm": 0.08048230593660523, "learning_rate": 3.043398724948068e-06, "loss": 0.2581, "step": 2638 }, { "epoch": 1.5411283847894315, "grad_norm": 0.08946641302063456, "learning_rate": 3.03607735319053e-06, "loss": 0.3347, "step": 2639 }, { "epoch": 1.541712283774907, "grad_norm": 0.09355790063794749, "learning_rate": 3.028763221961196e-06, "loss": 0.3163, "step": 2640 }, { "epoch": 1.5422961827603825, "grad_norm": 0.09120268423862737, "learning_rate": 3.02145633886471e-06, "loss": 0.3109, "step": 2641 }, { "epoch": 1.542880081745858, "grad_norm": 0.08659343643124115, "learning_rate": 3.0141567114981897e-06, "loss": 0.2992, "step": 2642 }, { "epoch": 1.5434639807313335, "grad_norm": 0.08716096902430753, "learning_rate": 3.006864347451195e-06, "loss": 0.2867, "step": 2643 }, { "epoch": 1.5440478797168091, "grad_norm": 0.09037240294640397, "learning_rate": 2.999579254305748e-06, "loss": 0.3119, "step": 2644 }, { "epoch": 1.5446317787022845, "grad_norm": 0.09008473134151497, "learning_rate": 2.992301439636299e-06, "loss": 0.3056, "step": 2645 }, { "epoch": 1.54521567768776, "grad_norm": 0.0868863178859526, "learning_rate": 2.9850309110097364e-06, "loss": 0.3189, "step": 2646 }, { "epoch": 1.5457995766732355, "grad_norm": 0.09073481938667521, "learning_rate": 2.977767675985377e-06, "loss": 0.3364, "step": 2647 }, { "epoch": 1.5463834756587111, "grad_norm": 0.09411468233673285, "learning_rate": 2.970511742114943e-06, "loss": 0.3639, "step": 2648 }, { "epoch": 1.5469673746441865, "grad_norm": 0.07926321513444771, "learning_rate": 2.963263116942581e-06, "loss": 0.2758, "step": 2649 }, { "epoch": 1.547551273629662, "grad_norm": 0.08984022340233255, "learning_rate": 2.9560218080048243e-06, "loss": 0.3127, "step": 2650 }, { "epoch": 1.5481351726151376, "grad_norm": 0.09554832647343932, "learning_rate": 2.9487878228306044e-06, "loss": 0.3121, "step": 2651 }, { "epoch": 1.5487190716006132, "grad_norm": 0.09289222283593039, "learning_rate": 2.9415611689412426e-06, "loss": 0.3107, "step": 2652 }, { "epoch": 1.5493029705860886, "grad_norm": 0.08891007430777738, "learning_rate": 2.9343418538504297e-06, "loss": 0.3238, "step": 2653 }, { "epoch": 1.549886869571564, "grad_norm": 0.08793256776673751, "learning_rate": 2.9271298850642337e-06, "loss": 0.261, "step": 2654 }, { "epoch": 1.5504707685570396, "grad_norm": 0.08014817522100684, "learning_rate": 2.9199252700810833e-06, "loss": 0.2727, "step": 2655 }, { "epoch": 1.5510546675425152, "grad_norm": 0.08755602841547906, "learning_rate": 2.912728016391753e-06, "loss": 0.3331, "step": 2656 }, { "epoch": 1.5516385665279908, "grad_norm": 0.08613419373992146, "learning_rate": 2.905538131479376e-06, "loss": 0.2725, "step": 2657 }, { "epoch": 1.5522224655134662, "grad_norm": 0.09015473663542706, "learning_rate": 2.8983556228194165e-06, "loss": 0.2968, "step": 2658 }, { "epoch": 1.5528063644989416, "grad_norm": 0.0952125542140049, "learning_rate": 2.8911804978796664e-06, "loss": 0.3452, "step": 2659 }, { "epoch": 1.5533902634844172, "grad_norm": 0.08720965132685592, "learning_rate": 2.884012764120252e-06, "loss": 0.3136, "step": 2660 }, { "epoch": 1.5539741624698928, "grad_norm": 0.0884545847151543, "learning_rate": 2.8768524289936007e-06, "loss": 0.299, "step": 2661 }, { "epoch": 1.5545580614553682, "grad_norm": 0.09126654667896779, "learning_rate": 2.8696994999444614e-06, "loss": 0.3419, "step": 2662 }, { "epoch": 1.5551419604408436, "grad_norm": 0.09035372028053747, "learning_rate": 2.8625539844098736e-06, "loss": 0.2838, "step": 2663 }, { "epoch": 1.5557258594263192, "grad_norm": 0.09007901725130046, "learning_rate": 2.8554158898191674e-06, "loss": 0.3064, "step": 2664 }, { "epoch": 1.5563097584117949, "grad_norm": 0.08511536884391872, "learning_rate": 2.8482852235939672e-06, "loss": 0.3151, "step": 2665 }, { "epoch": 1.5568936573972703, "grad_norm": 0.09247356638550583, "learning_rate": 2.8411619931481627e-06, "loss": 0.3052, "step": 2666 }, { "epoch": 1.5574775563827457, "grad_norm": 0.09018846326510671, "learning_rate": 2.8340462058879214e-06, "loss": 0.3243, "step": 2667 }, { "epoch": 1.5580614553682213, "grad_norm": 0.09507298518650188, "learning_rate": 2.8269378692116676e-06, "loss": 0.3203, "step": 2668 }, { "epoch": 1.5586453543536969, "grad_norm": 0.08857003178918711, "learning_rate": 2.8198369905100754e-06, "loss": 0.3102, "step": 2669 }, { "epoch": 1.5592292533391723, "grad_norm": 0.08411786268804498, "learning_rate": 2.812743577166075e-06, "loss": 0.3113, "step": 2670 }, { "epoch": 1.5598131523246477, "grad_norm": 0.08897651418004422, "learning_rate": 2.8056576365548216e-06, "loss": 0.2935, "step": 2671 }, { "epoch": 1.5603970513101233, "grad_norm": 0.0891178760467339, "learning_rate": 2.7985791760437163e-06, "loss": 0.2956, "step": 2672 }, { "epoch": 1.560980950295599, "grad_norm": 0.09516150994544319, "learning_rate": 2.79150820299237e-06, "loss": 0.3092, "step": 2673 }, { "epoch": 1.5615648492810745, "grad_norm": 0.09972767608852026, "learning_rate": 2.784444724752611e-06, "loss": 0.3254, "step": 2674 }, { "epoch": 1.56214874826655, "grad_norm": 0.09201196159614539, "learning_rate": 2.7773887486684815e-06, "loss": 0.3411, "step": 2675 }, { "epoch": 1.5627326472520253, "grad_norm": 0.09094485611489869, "learning_rate": 2.770340282076216e-06, "loss": 0.3239, "step": 2676 }, { "epoch": 1.563316546237501, "grad_norm": 0.08599700938308731, "learning_rate": 2.76329933230425e-06, "loss": 0.2773, "step": 2677 }, { "epoch": 1.5639004452229766, "grad_norm": 0.08590167376455755, "learning_rate": 2.7562659066731947e-06, "loss": 0.2852, "step": 2678 }, { "epoch": 1.564484344208452, "grad_norm": 0.09366495523973, "learning_rate": 2.7492400124958397e-06, "loss": 0.36, "step": 2679 }, { "epoch": 1.5650682431939273, "grad_norm": 0.08469024745037851, "learning_rate": 2.742221657077151e-06, "loss": 0.2926, "step": 2680 }, { "epoch": 1.565652142179403, "grad_norm": 0.08432484573256707, "learning_rate": 2.735210847714247e-06, "loss": 0.3098, "step": 2681 }, { "epoch": 1.5662360411648786, "grad_norm": 0.08569401067516139, "learning_rate": 2.7282075916964077e-06, "loss": 0.3158, "step": 2682 }, { "epoch": 1.566819940150354, "grad_norm": 0.08875574883064716, "learning_rate": 2.721211896305059e-06, "loss": 0.322, "step": 2683 }, { "epoch": 1.5674038391358294, "grad_norm": 0.0819710406247977, "learning_rate": 2.7142237688137594e-06, "loss": 0.2915, "step": 2684 }, { "epoch": 1.567987738121305, "grad_norm": 0.08836471376643541, "learning_rate": 2.707243216488208e-06, "loss": 0.3432, "step": 2685 }, { "epoch": 1.5685716371067806, "grad_norm": 0.08560034026513805, "learning_rate": 2.7002702465862206e-06, "loss": 0.2883, "step": 2686 }, { "epoch": 1.569155536092256, "grad_norm": 0.0922229121981555, "learning_rate": 2.6933048663577297e-06, "loss": 0.3013, "step": 2687 }, { "epoch": 1.5697394350777314, "grad_norm": 0.08689578552709706, "learning_rate": 2.6863470830447837e-06, "loss": 0.3153, "step": 2688 }, { "epoch": 1.570323334063207, "grad_norm": 0.07877541154174991, "learning_rate": 2.6793969038815224e-06, "loss": 0.2387, "step": 2689 }, { "epoch": 1.5709072330486826, "grad_norm": 0.08812595375823319, "learning_rate": 2.672454336094191e-06, "loss": 0.3101, "step": 2690 }, { "epoch": 1.5714911320341582, "grad_norm": 0.09173176870494346, "learning_rate": 2.665519386901111e-06, "loss": 0.3285, "step": 2691 }, { "epoch": 1.5720750310196336, "grad_norm": 0.08533935609848665, "learning_rate": 2.658592063512684e-06, "loss": 0.3579, "step": 2692 }, { "epoch": 1.572658930005109, "grad_norm": 0.09908874996898344, "learning_rate": 2.6516723731313896e-06, "loss": 0.3515, "step": 2693 }, { "epoch": 1.5732428289905847, "grad_norm": 0.08824000077383869, "learning_rate": 2.644760322951764e-06, "loss": 0.2881, "step": 2694 }, { "epoch": 1.5738267279760603, "grad_norm": 0.08604431713317852, "learning_rate": 2.6378559201604047e-06, "loss": 0.3275, "step": 2695 }, { "epoch": 1.5744106269615357, "grad_norm": 0.08752244657383407, "learning_rate": 2.6309591719359563e-06, "loss": 0.3061, "step": 2696 }, { "epoch": 1.574994525947011, "grad_norm": 0.08726766057371044, "learning_rate": 2.6240700854490988e-06, "loss": 0.2811, "step": 2697 }, { "epoch": 1.5755784249324867, "grad_norm": 0.08754841019092427, "learning_rate": 2.6171886678625593e-06, "loss": 0.3003, "step": 2698 }, { "epoch": 1.5761623239179623, "grad_norm": 0.08680994469877383, "learning_rate": 2.6103149263310768e-06, "loss": 0.2948, "step": 2699 }, { "epoch": 1.5767462229034377, "grad_norm": 0.07927159049620183, "learning_rate": 2.6034488680014236e-06, "loss": 0.2713, "step": 2700 }, { "epoch": 1.577330121888913, "grad_norm": 0.08325680176605184, "learning_rate": 2.5965905000123736e-06, "loss": 0.2859, "step": 2701 }, { "epoch": 1.5779140208743887, "grad_norm": 0.09053910862208912, "learning_rate": 2.5897398294947027e-06, "loss": 0.293, "step": 2702 }, { "epoch": 1.5784979198598643, "grad_norm": 0.0904865595703982, "learning_rate": 2.582896863571197e-06, "loss": 0.2986, "step": 2703 }, { "epoch": 1.5790818188453397, "grad_norm": 0.08864564144868421, "learning_rate": 2.576061609356617e-06, "loss": 0.3276, "step": 2704 }, { "epoch": 1.579665717830815, "grad_norm": 0.08120326629439695, "learning_rate": 2.569234073957717e-06, "loss": 0.2869, "step": 2705 }, { "epoch": 1.5802496168162907, "grad_norm": 0.0868693948678393, "learning_rate": 2.5624142644732177e-06, "loss": 0.2974, "step": 2706 }, { "epoch": 1.5808335158017663, "grad_norm": 0.08258400920792473, "learning_rate": 2.5556021879938074e-06, "loss": 0.2861, "step": 2707 }, { "epoch": 1.581417414787242, "grad_norm": 0.08347529989296157, "learning_rate": 2.5487978516021426e-06, "loss": 0.3226, "step": 2708 }, { "epoch": 1.5820013137727174, "grad_norm": 0.09030913923063077, "learning_rate": 2.542001262372821e-06, "loss": 0.3207, "step": 2709 }, { "epoch": 1.5825852127581927, "grad_norm": 0.08086855959022193, "learning_rate": 2.535212427372393e-06, "loss": 0.2754, "step": 2710 }, { "epoch": 1.5831691117436684, "grad_norm": 0.09633809430861122, "learning_rate": 2.52843135365935e-06, "loss": 0.3638, "step": 2711 }, { "epoch": 1.583753010729144, "grad_norm": 0.08543175518811744, "learning_rate": 2.5216580482840993e-06, "loss": 0.2894, "step": 2712 }, { "epoch": 1.5843369097146194, "grad_norm": 0.09084325125773099, "learning_rate": 2.514892518288988e-06, "loss": 0.3045, "step": 2713 }, { "epoch": 1.5849208087000948, "grad_norm": 0.09416612197266123, "learning_rate": 2.50813477070827e-06, "loss": 0.3076, "step": 2714 }, { "epoch": 1.5855047076855704, "grad_norm": 0.09015803417588053, "learning_rate": 2.501384812568104e-06, "loss": 0.3037, "step": 2715 }, { "epoch": 1.586088606671046, "grad_norm": 0.0907057921755048, "learning_rate": 2.494642650886563e-06, "loss": 0.3421, "step": 2716 }, { "epoch": 1.5866725056565214, "grad_norm": 0.08888457010987814, "learning_rate": 2.4879082926735974e-06, "loss": 0.2927, "step": 2717 }, { "epoch": 1.5872564046419968, "grad_norm": 0.08149321879464773, "learning_rate": 2.4811817449310615e-06, "loss": 0.2801, "step": 2718 }, { "epoch": 1.5878403036274724, "grad_norm": 0.09105292142705328, "learning_rate": 2.4744630146526762e-06, "loss": 0.3259, "step": 2719 }, { "epoch": 1.588424202612948, "grad_norm": 0.08551898961440527, "learning_rate": 2.467752108824034e-06, "loss": 0.3001, "step": 2720 }, { "epoch": 1.5890081015984234, "grad_norm": 0.08748173745618505, "learning_rate": 2.4610490344226034e-06, "loss": 0.3225, "step": 2721 }, { "epoch": 1.5895920005838988, "grad_norm": 0.07893996523075889, "learning_rate": 2.454353798417698e-06, "loss": 0.2801, "step": 2722 }, { "epoch": 1.5901758995693744, "grad_norm": 0.09139774514224752, "learning_rate": 2.4476664077704926e-06, "loss": 0.3589, "step": 2723 }, { "epoch": 1.59075979855485, "grad_norm": 0.08780744111113463, "learning_rate": 2.4409868694339965e-06, "loss": 0.2891, "step": 2724 }, { "epoch": 1.5913436975403257, "grad_norm": 0.09233258423981752, "learning_rate": 2.434315190353056e-06, "loss": 0.3464, "step": 2725 }, { "epoch": 1.591927596525801, "grad_norm": 0.09508901605552286, "learning_rate": 2.427651377464353e-06, "loss": 0.3404, "step": 2726 }, { "epoch": 1.5925114955112765, "grad_norm": 0.08603070894246578, "learning_rate": 2.4209954376963797e-06, "loss": 0.2765, "step": 2727 }, { "epoch": 1.593095394496752, "grad_norm": 0.08125921888586506, "learning_rate": 2.4143473779694548e-06, "loss": 0.2929, "step": 2728 }, { "epoch": 1.5936792934822277, "grad_norm": 0.09112893793113959, "learning_rate": 2.407707205195694e-06, "loss": 0.3091, "step": 2729 }, { "epoch": 1.594263192467703, "grad_norm": 0.084827608187166, "learning_rate": 2.4010749262790136e-06, "loss": 0.2936, "step": 2730 }, { "epoch": 1.5948470914531785, "grad_norm": 0.08352210407677428, "learning_rate": 2.3944505481151303e-06, "loss": 0.2958, "step": 2731 }, { "epoch": 1.595430990438654, "grad_norm": 0.08703190607762694, "learning_rate": 2.387834077591538e-06, "loss": 0.3039, "step": 2732 }, { "epoch": 1.5960148894241297, "grad_norm": 0.09391480932994686, "learning_rate": 2.3812255215875147e-06, "loss": 0.3173, "step": 2733 }, { "epoch": 1.5965987884096051, "grad_norm": 0.08826217167348295, "learning_rate": 2.374624886974106e-06, "loss": 0.3016, "step": 2734 }, { "epoch": 1.5971826873950805, "grad_norm": 0.09033873751065774, "learning_rate": 2.3680321806141182e-06, "loss": 0.3321, "step": 2735 }, { "epoch": 1.5977665863805561, "grad_norm": 0.09382879185519377, "learning_rate": 2.3614474093621255e-06, "loss": 0.3019, "step": 2736 }, { "epoch": 1.5983504853660317, "grad_norm": 0.09159729772048257, "learning_rate": 2.354870580064439e-06, "loss": 0.2854, "step": 2737 }, { "epoch": 1.5989343843515071, "grad_norm": 0.09195692003057859, "learning_rate": 2.34830169955912e-06, "loss": 0.2972, "step": 2738 }, { "epoch": 1.5995182833369825, "grad_norm": 0.08614278791732241, "learning_rate": 2.341740774675968e-06, "loss": 0.3076, "step": 2739 }, { "epoch": 1.6001021823224582, "grad_norm": 0.08473939468837376, "learning_rate": 2.335187812236499e-06, "loss": 0.2943, "step": 2740 }, { "epoch": 1.6006860813079338, "grad_norm": 0.09270931898205137, "learning_rate": 2.3286428190539645e-06, "loss": 0.3308, "step": 2741 }, { "epoch": 1.6012699802934094, "grad_norm": 0.08787718601441341, "learning_rate": 2.322105801933321e-06, "loss": 0.2982, "step": 2742 }, { "epoch": 1.6018538792788848, "grad_norm": 0.09066312088483106, "learning_rate": 2.3155767676712317e-06, "loss": 0.3203, "step": 2743 }, { "epoch": 1.6024377782643602, "grad_norm": 0.08427726585561024, "learning_rate": 2.3090557230560673e-06, "loss": 0.3079, "step": 2744 }, { "epoch": 1.6030216772498358, "grad_norm": 0.08823689231833086, "learning_rate": 2.3025426748678814e-06, "loss": 0.3376, "step": 2745 }, { "epoch": 1.6036055762353114, "grad_norm": 0.08619687795821662, "learning_rate": 2.296037629878426e-06, "loss": 0.2792, "step": 2746 }, { "epoch": 1.6041894752207868, "grad_norm": 0.08730328371460341, "learning_rate": 2.289540594851122e-06, "loss": 0.3137, "step": 2747 }, { "epoch": 1.6047733742062622, "grad_norm": 0.08344980123322422, "learning_rate": 2.283051576541062e-06, "loss": 0.2839, "step": 2748 }, { "epoch": 1.6053572731917378, "grad_norm": 0.08549955761455856, "learning_rate": 2.2765705816950124e-06, "loss": 0.3096, "step": 2749 }, { "epoch": 1.6059411721772134, "grad_norm": 0.08683964289576866, "learning_rate": 2.2700976170513855e-06, "loss": 0.3014, "step": 2750 }, { "epoch": 1.6065250711626888, "grad_norm": 0.0773463535455817, "learning_rate": 2.263632689340257e-06, "loss": 0.2508, "step": 2751 }, { "epoch": 1.6071089701481642, "grad_norm": 0.08276510530252898, "learning_rate": 2.257175805283338e-06, "loss": 0.2869, "step": 2752 }, { "epoch": 1.6076928691336398, "grad_norm": 0.0829419059129464, "learning_rate": 2.250726971593976e-06, "loss": 0.265, "step": 2753 }, { "epoch": 1.6082767681191155, "grad_norm": 0.09146044817550521, "learning_rate": 2.2442861949771554e-06, "loss": 0.3581, "step": 2754 }, { "epoch": 1.6088606671045909, "grad_norm": 0.08238846028848792, "learning_rate": 2.237853482129475e-06, "loss": 0.288, "step": 2755 }, { "epoch": 1.6094445660900663, "grad_norm": 0.08099993326455163, "learning_rate": 2.231428839739157e-06, "loss": 0.2821, "step": 2756 }, { "epoch": 1.6100284650755419, "grad_norm": 0.08584625022719065, "learning_rate": 2.225012274486028e-06, "loss": 0.3098, "step": 2757 }, { "epoch": 1.6106123640610175, "grad_norm": 0.0952452236562889, "learning_rate": 2.218603793041516e-06, "loss": 0.3005, "step": 2758 }, { "epoch": 1.611196263046493, "grad_norm": 0.09016922890053188, "learning_rate": 2.21220340206865e-06, "loss": 0.3223, "step": 2759 }, { "epoch": 1.6117801620319685, "grad_norm": 0.08791932232282855, "learning_rate": 2.205811108222038e-06, "loss": 0.3225, "step": 2760 }, { "epoch": 1.612364061017444, "grad_norm": 0.08821734766341188, "learning_rate": 2.19942691814788e-06, "loss": 0.3143, "step": 2761 }, { "epoch": 1.6129479600029195, "grad_norm": 0.0864162785961648, "learning_rate": 2.193050838483942e-06, "loss": 0.2887, "step": 2762 }, { "epoch": 1.6135318589883951, "grad_norm": 0.08790122143929105, "learning_rate": 2.186682875859557e-06, "loss": 0.2942, "step": 2763 }, { "epoch": 1.6141157579738705, "grad_norm": 0.09769830015985884, "learning_rate": 2.1803230368956296e-06, "loss": 0.316, "step": 2764 }, { "epoch": 1.614699656959346, "grad_norm": 0.0885148594194079, "learning_rate": 2.1739713282046017e-06, "loss": 0.3164, "step": 2765 }, { "epoch": 1.6152835559448215, "grad_norm": 0.0884166998038942, "learning_rate": 2.1676277563904747e-06, "loss": 0.2774, "step": 2766 }, { "epoch": 1.6158674549302972, "grad_norm": 0.09256084482924096, "learning_rate": 2.1612923280487883e-06, "loss": 0.3112, "step": 2767 }, { "epoch": 1.6164513539157725, "grad_norm": 0.08552577964143582, "learning_rate": 2.1549650497666096e-06, "loss": 0.2952, "step": 2768 }, { "epoch": 1.617035252901248, "grad_norm": 0.08692038569389646, "learning_rate": 2.1486459281225337e-06, "loss": 0.3255, "step": 2769 }, { "epoch": 1.6176191518867236, "grad_norm": 0.087001297115984, "learning_rate": 2.14233496968668e-06, "loss": 0.3019, "step": 2770 }, { "epoch": 1.6182030508721992, "grad_norm": 0.08911488393144147, "learning_rate": 2.136032181020673e-06, "loss": 0.3008, "step": 2771 }, { "epoch": 1.6187869498576746, "grad_norm": 0.08981873768012062, "learning_rate": 2.1297375686776522e-06, "loss": 0.3287, "step": 2772 }, { "epoch": 1.6193708488431502, "grad_norm": 0.0809664756483585, "learning_rate": 2.1234511392022473e-06, "loss": 0.2735, "step": 2773 }, { "epoch": 1.6199547478286256, "grad_norm": 0.08641940456302503, "learning_rate": 2.1171728991305797e-06, "loss": 0.2748, "step": 2774 }, { "epoch": 1.6205386468141012, "grad_norm": 0.0890750943932867, "learning_rate": 2.110902854990268e-06, "loss": 0.3362, "step": 2775 }, { "epoch": 1.6211225457995768, "grad_norm": 0.08914380361042873, "learning_rate": 2.1046410133003923e-06, "loss": 0.2761, "step": 2776 }, { "epoch": 1.6217064447850522, "grad_norm": 0.08435784976064617, "learning_rate": 2.0983873805715216e-06, "loss": 0.2884, "step": 2777 }, { "epoch": 1.6222903437705276, "grad_norm": 0.09662600479123168, "learning_rate": 2.0921419633056782e-06, "loss": 0.3459, "step": 2778 }, { "epoch": 1.6228742427560032, "grad_norm": 0.07757905508843743, "learning_rate": 2.085904767996343e-06, "loss": 0.2709, "step": 2779 }, { "epoch": 1.6234581417414788, "grad_norm": 0.08462139191175207, "learning_rate": 2.0796758011284567e-06, "loss": 0.2879, "step": 2780 }, { "epoch": 1.6240420407269542, "grad_norm": 0.08790617373124446, "learning_rate": 2.0734550691783937e-06, "loss": 0.305, "step": 2781 }, { "epoch": 1.6246259397124296, "grad_norm": 0.08789561021801137, "learning_rate": 2.0672425786139794e-06, "loss": 0.3021, "step": 2782 }, { "epoch": 1.6252098386979053, "grad_norm": 0.08639059624156115, "learning_rate": 2.0610383358944584e-06, "loss": 0.3195, "step": 2783 }, { "epoch": 1.6257937376833809, "grad_norm": 0.08418604935673894, "learning_rate": 2.0548423474705024e-06, "loss": 0.2973, "step": 2784 }, { "epoch": 1.6263776366688563, "grad_norm": 0.07967208113768394, "learning_rate": 2.0486546197842096e-06, "loss": 0.2767, "step": 2785 }, { "epoch": 1.6269615356543317, "grad_norm": 0.08844318904579422, "learning_rate": 2.0424751592690762e-06, "loss": 0.2931, "step": 2786 }, { "epoch": 1.6275454346398073, "grad_norm": 0.08684156980378255, "learning_rate": 2.0363039723500155e-06, "loss": 0.3191, "step": 2787 }, { "epoch": 1.628129333625283, "grad_norm": 0.08813581646675314, "learning_rate": 2.0301410654433307e-06, "loss": 0.2985, "step": 2788 }, { "epoch": 1.6287132326107583, "grad_norm": 0.0842606809051516, "learning_rate": 2.023986444956715e-06, "loss": 0.2881, "step": 2789 }, { "epoch": 1.629297131596234, "grad_norm": 0.08748691642252449, "learning_rate": 2.017840117289254e-06, "loss": 0.3222, "step": 2790 }, { "epoch": 1.6298810305817093, "grad_norm": 0.08299737791380213, "learning_rate": 2.0117020888313998e-06, "loss": 0.2833, "step": 2791 }, { "epoch": 1.630464929567185, "grad_norm": 0.09551275172564963, "learning_rate": 2.0055723659649907e-06, "loss": 0.3287, "step": 2792 }, { "epoch": 1.6310488285526605, "grad_norm": 0.08883398421140205, "learning_rate": 1.999450955063216e-06, "loss": 0.2925, "step": 2793 }, { "epoch": 1.631632727538136, "grad_norm": 0.09119944362576803, "learning_rate": 1.9933378624906218e-06, "loss": 0.336, "step": 2794 }, { "epoch": 1.6322166265236113, "grad_norm": 0.08452497372586572, "learning_rate": 1.9872330946031237e-06, "loss": 0.2973, "step": 2795 }, { "epoch": 1.632800525509087, "grad_norm": 0.08748338167480758, "learning_rate": 1.981136657747963e-06, "loss": 0.329, "step": 2796 }, { "epoch": 1.6333844244945626, "grad_norm": 0.08204549586684005, "learning_rate": 1.9750485582637245e-06, "loss": 0.2799, "step": 2797 }, { "epoch": 1.633968323480038, "grad_norm": 0.08373688485096518, "learning_rate": 1.9689688024803298e-06, "loss": 0.284, "step": 2798 }, { "epoch": 1.6345522224655133, "grad_norm": 0.08160855362834762, "learning_rate": 1.962897396719018e-06, "loss": 0.2923, "step": 2799 }, { "epoch": 1.635136121450989, "grad_norm": 0.0877292440781213, "learning_rate": 1.9568343472923524e-06, "loss": 0.29, "step": 2800 }, { "epoch": 1.6357200204364646, "grad_norm": 0.09426279962351981, "learning_rate": 1.950779660504204e-06, "loss": 0.3025, "step": 2801 }, { "epoch": 1.63630391942194, "grad_norm": 0.09533846640703032, "learning_rate": 1.944733342649748e-06, "loss": 0.34, "step": 2802 }, { "epoch": 1.6368878184074154, "grad_norm": 0.0862963593181169, "learning_rate": 1.938695400015467e-06, "loss": 0.3053, "step": 2803 }, { "epoch": 1.637471717392891, "grad_norm": 0.08601031136679824, "learning_rate": 1.932665838879123e-06, "loss": 0.3378, "step": 2804 }, { "epoch": 1.6380556163783666, "grad_norm": 0.08852833085665776, "learning_rate": 1.926644665509775e-06, "loss": 0.2856, "step": 2805 }, { "epoch": 1.638639515363842, "grad_norm": 0.0923336605665639, "learning_rate": 1.920631886167754e-06, "loss": 0.3087, "step": 2806 }, { "epoch": 1.6392234143493176, "grad_norm": 0.09039401497800534, "learning_rate": 1.9146275071046626e-06, "loss": 0.3071, "step": 2807 }, { "epoch": 1.639807313334793, "grad_norm": 0.10441194362169896, "learning_rate": 1.9086315345633786e-06, "loss": 0.3091, "step": 2808 }, { "epoch": 1.6403912123202686, "grad_norm": 0.09211807820104405, "learning_rate": 1.9026439747780278e-06, "loss": 0.3384, "step": 2809 }, { "epoch": 1.6409751113057442, "grad_norm": 0.08538865350082887, "learning_rate": 1.8966648339740002e-06, "loss": 0.3042, "step": 2810 }, { "epoch": 1.6415590102912196, "grad_norm": 0.09065215878231554, "learning_rate": 1.8906941183679227e-06, "loss": 0.3271, "step": 2811 }, { "epoch": 1.642142909276695, "grad_norm": 0.0911155008448629, "learning_rate": 1.8847318341676657e-06, "loss": 0.2858, "step": 2812 }, { "epoch": 1.6427268082621707, "grad_norm": 0.09568305093884481, "learning_rate": 1.8787779875723389e-06, "loss": 0.3493, "step": 2813 }, { "epoch": 1.6433107072476463, "grad_norm": 0.08974060867511091, "learning_rate": 1.8728325847722684e-06, "loss": 0.313, "step": 2814 }, { "epoch": 1.6438946062331217, "grad_norm": 0.09000628141096831, "learning_rate": 1.8668956319490128e-06, "loss": 0.3214, "step": 2815 }, { "epoch": 1.644478505218597, "grad_norm": 0.083988102123952, "learning_rate": 1.8609671352753367e-06, "loss": 0.3131, "step": 2816 }, { "epoch": 1.6450624042040727, "grad_norm": 0.08061939727691927, "learning_rate": 1.8550471009152138e-06, "loss": 0.2657, "step": 2817 }, { "epoch": 1.6456463031895483, "grad_norm": 0.09494695566086868, "learning_rate": 1.849135535023825e-06, "loss": 0.3344, "step": 2818 }, { "epoch": 1.6462302021750237, "grad_norm": 0.09101408887690664, "learning_rate": 1.843232443747538e-06, "loss": 0.3152, "step": 2819 }, { "epoch": 1.646814101160499, "grad_norm": 0.08823766650169107, "learning_rate": 1.8373378332239177e-06, "loss": 0.3197, "step": 2820 }, { "epoch": 1.6473980001459747, "grad_norm": 0.0867228269131213, "learning_rate": 1.8314517095817052e-06, "loss": 0.2856, "step": 2821 }, { "epoch": 1.6479818991314503, "grad_norm": 0.0942191422944636, "learning_rate": 1.8255740789408161e-06, "loss": 0.3222, "step": 2822 }, { "epoch": 1.6485657981169257, "grad_norm": 0.0852524830647117, "learning_rate": 1.8197049474123475e-06, "loss": 0.3264, "step": 2823 }, { "epoch": 1.6491496971024013, "grad_norm": 0.09277657374498283, "learning_rate": 1.8138443210985468e-06, "loss": 0.312, "step": 2824 }, { "epoch": 1.6497335960878767, "grad_norm": 0.08634251190111814, "learning_rate": 1.8079922060928223e-06, "loss": 0.2703, "step": 2825 }, { "epoch": 1.6503174950733523, "grad_norm": 0.0835556081410612, "learning_rate": 1.8021486084797368e-06, "loss": 0.2833, "step": 2826 }, { "epoch": 1.650901394058828, "grad_norm": 0.07786242073865464, "learning_rate": 1.7963135343349914e-06, "loss": 0.2693, "step": 2827 }, { "epoch": 1.6514852930443034, "grad_norm": 0.08077726624800871, "learning_rate": 1.7904869897254308e-06, "loss": 0.2669, "step": 2828 }, { "epoch": 1.6520691920297788, "grad_norm": 0.09410733696281158, "learning_rate": 1.7846689807090277e-06, "loss": 0.305, "step": 2829 }, { "epoch": 1.6526530910152544, "grad_norm": 0.0873838147339705, "learning_rate": 1.7788595133348796e-06, "loss": 0.3171, "step": 2830 }, { "epoch": 1.65323699000073, "grad_norm": 0.08501000416355124, "learning_rate": 1.7730585936432077e-06, "loss": 0.3141, "step": 2831 }, { "epoch": 1.6538208889862054, "grad_norm": 0.08448244596418149, "learning_rate": 1.7672662276653384e-06, "loss": 0.3078, "step": 2832 }, { "epoch": 1.6544047879716808, "grad_norm": 0.08236054708424953, "learning_rate": 1.7614824214237158e-06, "loss": 0.2818, "step": 2833 }, { "epoch": 1.6549886869571564, "grad_norm": 0.08961687076373194, "learning_rate": 1.7557071809318737e-06, "loss": 0.3043, "step": 2834 }, { "epoch": 1.655572585942632, "grad_norm": 0.08966125062359698, "learning_rate": 1.7499405121944423e-06, "loss": 0.3229, "step": 2835 }, { "epoch": 1.6561564849281074, "grad_norm": 0.08644704808585049, "learning_rate": 1.7441824212071455e-06, "loss": 0.3008, "step": 2836 }, { "epoch": 1.6567403839135828, "grad_norm": 0.08640366334464832, "learning_rate": 1.73843291395678e-06, "loss": 0.2858, "step": 2837 }, { "epoch": 1.6573242828990584, "grad_norm": 0.08527459936453075, "learning_rate": 1.7326919964212275e-06, "loss": 0.3025, "step": 2838 }, { "epoch": 1.657908181884534, "grad_norm": 0.09137096618953858, "learning_rate": 1.7269596745694295e-06, "loss": 0.3158, "step": 2839 }, { "epoch": 1.6584920808700094, "grad_norm": 0.08784737990698119, "learning_rate": 1.7212359543613943e-06, "loss": 0.3233, "step": 2840 }, { "epoch": 1.659075979855485, "grad_norm": 0.09796280754631886, "learning_rate": 1.7155208417481906e-06, "loss": 0.3288, "step": 2841 }, { "epoch": 1.6596598788409604, "grad_norm": 0.09225563456935665, "learning_rate": 1.7098143426719293e-06, "loss": 0.326, "step": 2842 }, { "epoch": 1.660243777826436, "grad_norm": 0.08495883735599802, "learning_rate": 1.7041164630657757e-06, "loss": 0.3134, "step": 2843 }, { "epoch": 1.6608276768119117, "grad_norm": 0.08962327958971614, "learning_rate": 1.6984272088539256e-06, "loss": 0.3387, "step": 2844 }, { "epoch": 1.661411575797387, "grad_norm": 0.09459777179992607, "learning_rate": 1.6927465859516057e-06, "loss": 0.3422, "step": 2845 }, { "epoch": 1.6619954747828625, "grad_norm": 0.0966135827028723, "learning_rate": 1.6870746002650784e-06, "loss": 0.3437, "step": 2846 }, { "epoch": 1.662579373768338, "grad_norm": 0.08188225500053915, "learning_rate": 1.6814112576916142e-06, "loss": 0.2493, "step": 2847 }, { "epoch": 1.6631632727538137, "grad_norm": 0.09254092871108087, "learning_rate": 1.6757565641195073e-06, "loss": 0.3281, "step": 2848 }, { "epoch": 1.663747171739289, "grad_norm": 0.09875466957438242, "learning_rate": 1.6701105254280513e-06, "loss": 0.3341, "step": 2849 }, { "epoch": 1.6643310707247645, "grad_norm": 0.08710059086267972, "learning_rate": 1.664473147487541e-06, "loss": 0.289, "step": 2850 }, { "epoch": 1.6649149697102401, "grad_norm": 0.08543081680385539, "learning_rate": 1.658844436159277e-06, "loss": 0.3087, "step": 2851 }, { "epoch": 1.6654988686957157, "grad_norm": 0.09055865593332652, "learning_rate": 1.6532243972955397e-06, "loss": 0.2621, "step": 2852 }, { "epoch": 1.6660827676811911, "grad_norm": 0.09099352315862634, "learning_rate": 1.6476130367395914e-06, "loss": 0.2943, "step": 2853 }, { "epoch": 1.6666666666666665, "grad_norm": 0.08771248916074113, "learning_rate": 1.64201036032568e-06, "loss": 0.3062, "step": 2854 }, { "epoch": 1.6672505656521421, "grad_norm": 0.08657371407490197, "learning_rate": 1.6364163738790128e-06, "loss": 0.2855, "step": 2855 }, { "epoch": 1.6678344646376178, "grad_norm": 0.09086800680632882, "learning_rate": 1.6308310832157737e-06, "loss": 0.3573, "step": 2856 }, { "epoch": 1.6684183636230934, "grad_norm": 0.0859248768080611, "learning_rate": 1.6252544941430982e-06, "loss": 0.2942, "step": 2857 }, { "epoch": 1.6690022626085688, "grad_norm": 0.0868921590952517, "learning_rate": 1.6196866124590737e-06, "loss": 0.2642, "step": 2858 }, { "epoch": 1.6695861615940442, "grad_norm": 0.08202683971878419, "learning_rate": 1.614127443952741e-06, "loss": 0.2871, "step": 2859 }, { "epoch": 1.6701700605795198, "grad_norm": 0.0813574407513182, "learning_rate": 1.608576994404074e-06, "loss": 0.2749, "step": 2860 }, { "epoch": 1.6707539595649954, "grad_norm": 0.0826994852748955, "learning_rate": 1.603035269583989e-06, "loss": 0.2551, "step": 2861 }, { "epoch": 1.6713378585504708, "grad_norm": 0.09270594841307919, "learning_rate": 1.5975022752543247e-06, "loss": 0.3568, "step": 2862 }, { "epoch": 1.6719217575359462, "grad_norm": 0.09215704057760184, "learning_rate": 1.5919780171678412e-06, "loss": 0.2922, "step": 2863 }, { "epoch": 1.6725056565214218, "grad_norm": 0.09436261714610418, "learning_rate": 1.5864625010682266e-06, "loss": 0.3007, "step": 2864 }, { "epoch": 1.6730895555068974, "grad_norm": 0.08803415424769305, "learning_rate": 1.580955732690065e-06, "loss": 0.2828, "step": 2865 }, { "epoch": 1.6736734544923728, "grad_norm": 0.08613432074645452, "learning_rate": 1.5754577177588581e-06, "loss": 0.298, "step": 2866 }, { "epoch": 1.6742573534778482, "grad_norm": 0.08708991342389852, "learning_rate": 1.5699684619909983e-06, "loss": 0.32, "step": 2867 }, { "epoch": 1.6748412524633238, "grad_norm": 0.08918958781392515, "learning_rate": 1.5644879710937722e-06, "loss": 0.2811, "step": 2868 }, { "epoch": 1.6754251514487994, "grad_norm": 0.09647483575386433, "learning_rate": 1.5590162507653573e-06, "loss": 0.3234, "step": 2869 }, { "epoch": 1.6760090504342748, "grad_norm": 0.09090357012613853, "learning_rate": 1.5535533066948062e-06, "loss": 0.3009, "step": 2870 }, { "epoch": 1.6765929494197502, "grad_norm": 0.09817790915914208, "learning_rate": 1.5480991445620541e-06, "loss": 0.3179, "step": 2871 }, { "epoch": 1.6771768484052259, "grad_norm": 0.09326880861715198, "learning_rate": 1.5426537700378985e-06, "loss": 0.2969, "step": 2872 }, { "epoch": 1.6777607473907015, "grad_norm": 0.08514563612095435, "learning_rate": 1.5372171887840026e-06, "loss": 0.3245, "step": 2873 }, { "epoch": 1.678344646376177, "grad_norm": 0.09233762069652697, "learning_rate": 1.5317894064528905e-06, "loss": 0.3525, "step": 2874 }, { "epoch": 1.6789285453616525, "grad_norm": 0.08763527459581145, "learning_rate": 1.5263704286879311e-06, "loss": 0.306, "step": 2875 }, { "epoch": 1.6795124443471279, "grad_norm": 0.09491014588886987, "learning_rate": 1.5209602611233465e-06, "loss": 0.3092, "step": 2876 }, { "epoch": 1.6800963433326035, "grad_norm": 0.08725614081517413, "learning_rate": 1.5155589093841939e-06, "loss": 0.2881, "step": 2877 }, { "epoch": 1.680680242318079, "grad_norm": 0.09066333788663707, "learning_rate": 1.5101663790863597e-06, "loss": 0.3024, "step": 2878 }, { "epoch": 1.6812641413035545, "grad_norm": 0.09222853694764259, "learning_rate": 1.5047826758365748e-06, "loss": 0.3706, "step": 2879 }, { "epoch": 1.68184804028903, "grad_norm": 0.0833247419085825, "learning_rate": 1.4994078052323767e-06, "loss": 0.2856, "step": 2880 }, { "epoch": 1.6824319392745055, "grad_norm": 0.08574684559623595, "learning_rate": 1.4940417728621236e-06, "loss": 0.2837, "step": 2881 }, { "epoch": 1.6830158382599811, "grad_norm": 0.08295632028691116, "learning_rate": 1.488684584304988e-06, "loss": 0.2573, "step": 2882 }, { "epoch": 1.6835997372454565, "grad_norm": 0.08774934612779607, "learning_rate": 1.483336245130942e-06, "loss": 0.2992, "step": 2883 }, { "epoch": 1.684183636230932, "grad_norm": 0.09006749452618071, "learning_rate": 1.477996760900764e-06, "loss": 0.2748, "step": 2884 }, { "epoch": 1.6847675352164075, "grad_norm": 0.09093514233734055, "learning_rate": 1.4726661371660189e-06, "loss": 0.32, "step": 2885 }, { "epoch": 1.6853514342018832, "grad_norm": 0.09017808947471698, "learning_rate": 1.467344379469059e-06, "loss": 0.3113, "step": 2886 }, { "epoch": 1.6859353331873586, "grad_norm": 0.08174793719301952, "learning_rate": 1.4620314933430269e-06, "loss": 0.2661, "step": 2887 }, { "epoch": 1.686519232172834, "grad_norm": 0.0926178957170987, "learning_rate": 1.4567274843118296e-06, "loss": 0.3298, "step": 2888 }, { "epoch": 1.6871031311583096, "grad_norm": 0.08850302158132925, "learning_rate": 1.4514323578901545e-06, "loss": 0.3289, "step": 2889 }, { "epoch": 1.6876870301437852, "grad_norm": 0.08840663179358019, "learning_rate": 1.4461461195834491e-06, "loss": 0.2882, "step": 2890 }, { "epoch": 1.6882709291292608, "grad_norm": 0.07931221628370803, "learning_rate": 1.4408687748879157e-06, "loss": 0.2744, "step": 2891 }, { "epoch": 1.6888548281147362, "grad_norm": 0.08975772016815267, "learning_rate": 1.4356003292905197e-06, "loss": 0.3384, "step": 2892 }, { "epoch": 1.6894387271002116, "grad_norm": 0.09068081601005215, "learning_rate": 1.4303407882689635e-06, "loss": 0.321, "step": 2893 }, { "epoch": 1.6900226260856872, "grad_norm": 0.08117890451116522, "learning_rate": 1.4250901572917009e-06, "loss": 0.2679, "step": 2894 }, { "epoch": 1.6906065250711628, "grad_norm": 0.08519588278771616, "learning_rate": 1.4198484418179137e-06, "loss": 0.3079, "step": 2895 }, { "epoch": 1.6911904240566382, "grad_norm": 0.08748026127419362, "learning_rate": 1.4146156472975147e-06, "loss": 0.2849, "step": 2896 }, { "epoch": 1.6917743230421136, "grad_norm": 0.08535133896858342, "learning_rate": 1.4093917791711497e-06, "loss": 0.268, "step": 2897 }, { "epoch": 1.6923582220275892, "grad_norm": 0.08386635934416334, "learning_rate": 1.404176842870173e-06, "loss": 0.2889, "step": 2898 }, { "epoch": 1.6929421210130648, "grad_norm": 0.08958681715647361, "learning_rate": 1.3989708438166605e-06, "loss": 0.3176, "step": 2899 }, { "epoch": 1.6935260199985402, "grad_norm": 0.09242771566947783, "learning_rate": 1.3937737874233913e-06, "loss": 0.2996, "step": 2900 }, { "epoch": 1.6941099189840156, "grad_norm": 0.09236932936044999, "learning_rate": 1.3885856790938457e-06, "loss": 0.3581, "step": 2901 }, { "epoch": 1.6946938179694913, "grad_norm": 0.09010465956174579, "learning_rate": 1.383406524222206e-06, "loss": 0.2986, "step": 2902 }, { "epoch": 1.6952777169549669, "grad_norm": 0.09465353299551747, "learning_rate": 1.3782363281933387e-06, "loss": 0.3156, "step": 2903 }, { "epoch": 1.6958616159404423, "grad_norm": 0.08972017954108379, "learning_rate": 1.3730750963828033e-06, "loss": 0.3185, "step": 2904 }, { "epoch": 1.6964455149259177, "grad_norm": 0.08430528515282139, "learning_rate": 1.3679228341568308e-06, "loss": 0.264, "step": 2905 }, { "epoch": 1.6970294139113933, "grad_norm": 0.09387116164796085, "learning_rate": 1.362779546872327e-06, "loss": 0.3379, "step": 2906 }, { "epoch": 1.697613312896869, "grad_norm": 0.08381750684241812, "learning_rate": 1.357645239876879e-06, "loss": 0.2912, "step": 2907 }, { "epoch": 1.6981972118823445, "grad_norm": 0.09108634851954416, "learning_rate": 1.3525199185087223e-06, "loss": 0.3155, "step": 2908 }, { "epoch": 1.69878111086782, "grad_norm": 0.09294401537106343, "learning_rate": 1.3474035880967529e-06, "loss": 0.3341, "step": 2909 }, { "epoch": 1.6993650098532953, "grad_norm": 0.09117579742601305, "learning_rate": 1.3422962539605245e-06, "loss": 0.3098, "step": 2910 }, { "epoch": 1.699948908838771, "grad_norm": 0.08587039121245708, "learning_rate": 1.3371979214102293e-06, "loss": 0.3075, "step": 2911 }, { "epoch": 1.7005328078242465, "grad_norm": 0.08631671028364843, "learning_rate": 1.3321085957467107e-06, "loss": 0.2869, "step": 2912 }, { "epoch": 1.701116706809722, "grad_norm": 0.08469546362619754, "learning_rate": 1.3270282822614366e-06, "loss": 0.2836, "step": 2913 }, { "epoch": 1.7017006057951973, "grad_norm": 0.09052532413059522, "learning_rate": 1.321956986236509e-06, "loss": 0.3246, "step": 2914 }, { "epoch": 1.702284504780673, "grad_norm": 0.08844140041865178, "learning_rate": 1.3168947129446574e-06, "loss": 0.2991, "step": 2915 }, { "epoch": 1.7028684037661486, "grad_norm": 0.08372610789082963, "learning_rate": 1.3118414676492252e-06, "loss": 0.2768, "step": 2916 }, { "epoch": 1.703452302751624, "grad_norm": 0.09369679158146395, "learning_rate": 1.3067972556041753e-06, "loss": 0.3069, "step": 2917 }, { "epoch": 1.7040362017370994, "grad_norm": 0.0820610695134973, "learning_rate": 1.3017620820540721e-06, "loss": 0.2902, "step": 2918 }, { "epoch": 1.704620100722575, "grad_norm": 0.0962764796264189, "learning_rate": 1.2967359522340828e-06, "loss": 0.3494, "step": 2919 }, { "epoch": 1.7052039997080506, "grad_norm": 0.08294556184883646, "learning_rate": 1.2917188713699791e-06, "loss": 0.2533, "step": 2920 }, { "epoch": 1.705787898693526, "grad_norm": 0.08230205353107493, "learning_rate": 1.286710844678114e-06, "loss": 0.275, "step": 2921 }, { "epoch": 1.7063717976790014, "grad_norm": 0.08585206742994438, "learning_rate": 1.2817118773654381e-06, "loss": 0.3157, "step": 2922 }, { "epoch": 1.706955696664477, "grad_norm": 0.08124687061248298, "learning_rate": 1.2767219746294724e-06, "loss": 0.2946, "step": 2923 }, { "epoch": 1.7075395956499526, "grad_norm": 0.0821202042217555, "learning_rate": 1.271741141658317e-06, "loss": 0.2749, "step": 2924 }, { "epoch": 1.7081234946354282, "grad_norm": 0.0854514952533597, "learning_rate": 1.266769383630646e-06, "loss": 0.302, "step": 2925 }, { "epoch": 1.7087073936209036, "grad_norm": 0.08648104874003487, "learning_rate": 1.2618067057156901e-06, "loss": 0.3201, "step": 2926 }, { "epoch": 1.709291292606379, "grad_norm": 0.09296156257312196, "learning_rate": 1.2568531130732498e-06, "loss": 0.3411, "step": 2927 }, { "epoch": 1.7098751915918546, "grad_norm": 0.08872654045686106, "learning_rate": 1.2519086108536683e-06, "loss": 0.2986, "step": 2928 }, { "epoch": 1.7104590905773303, "grad_norm": 0.0886952509667533, "learning_rate": 1.2469732041978422e-06, "loss": 0.3341, "step": 2929 }, { "epoch": 1.7110429895628056, "grad_norm": 0.08575920301138647, "learning_rate": 1.2420468982372158e-06, "loss": 0.2818, "step": 2930 }, { "epoch": 1.711626888548281, "grad_norm": 0.08967092315476526, "learning_rate": 1.237129698093762e-06, "loss": 0.3059, "step": 2931 }, { "epoch": 1.7122107875337567, "grad_norm": 0.08446287824163413, "learning_rate": 1.2322216088799955e-06, "loss": 0.2875, "step": 2932 }, { "epoch": 1.7127946865192323, "grad_norm": 0.08488557670874923, "learning_rate": 1.227322635698952e-06, "loss": 0.2674, "step": 2933 }, { "epoch": 1.7133785855047077, "grad_norm": 0.09492979803338077, "learning_rate": 1.2224327836441863e-06, "loss": 0.3062, "step": 2934 }, { "epoch": 1.713962484490183, "grad_norm": 0.08453315099302815, "learning_rate": 1.2175520577997834e-06, "loss": 0.2894, "step": 2935 }, { "epoch": 1.7145463834756587, "grad_norm": 0.08374551363128548, "learning_rate": 1.2126804632403255e-06, "loss": 0.3004, "step": 2936 }, { "epoch": 1.7151302824611343, "grad_norm": 0.08961855701848664, "learning_rate": 1.207818005030904e-06, "loss": 0.3289, "step": 2937 }, { "epoch": 1.7157141814466097, "grad_norm": 0.08550803964919398, "learning_rate": 1.2029646882271173e-06, "loss": 0.2819, "step": 2938 }, { "epoch": 1.716298080432085, "grad_norm": 0.09098588673223762, "learning_rate": 1.1981205178750511e-06, "loss": 0.323, "step": 2939 }, { "epoch": 1.7168819794175607, "grad_norm": 0.08386563892505484, "learning_rate": 1.1932854990112896e-06, "loss": 0.2862, "step": 2940 }, { "epoch": 1.7174658784030363, "grad_norm": 0.091025200817462, "learning_rate": 1.1884596366628942e-06, "loss": 0.2993, "step": 2941 }, { "epoch": 1.718049777388512, "grad_norm": 0.08742826160033293, "learning_rate": 1.1836429358474077e-06, "loss": 0.2853, "step": 2942 }, { "epoch": 1.7186336763739873, "grad_norm": 0.08692877730864443, "learning_rate": 1.1788354015728543e-06, "loss": 0.3214, "step": 2943 }, { "epoch": 1.7192175753594627, "grad_norm": 0.09257177387049748, "learning_rate": 1.1740370388377188e-06, "loss": 0.3374, "step": 2944 }, { "epoch": 1.7198014743449384, "grad_norm": 0.08882830952576917, "learning_rate": 1.1692478526309558e-06, "loss": 0.3034, "step": 2945 }, { "epoch": 1.720385373330414, "grad_norm": 0.08901805873884211, "learning_rate": 1.1644678479319772e-06, "loss": 0.306, "step": 2946 }, { "epoch": 1.7209692723158894, "grad_norm": 0.08640763884924312, "learning_rate": 1.1596970297106458e-06, "loss": 0.2878, "step": 2947 }, { "epoch": 1.7215531713013648, "grad_norm": 0.09007519627500071, "learning_rate": 1.1549354029272786e-06, "loss": 0.3139, "step": 2948 }, { "epoch": 1.7221370702868404, "grad_norm": 0.0943400393222084, "learning_rate": 1.1501829725326307e-06, "loss": 0.3339, "step": 2949 }, { "epoch": 1.722720969272316, "grad_norm": 0.09046494587650966, "learning_rate": 1.1454397434679022e-06, "loss": 0.2807, "step": 2950 }, { "epoch": 1.7233048682577914, "grad_norm": 0.0899905642991717, "learning_rate": 1.1407057206647188e-06, "loss": 0.2936, "step": 2951 }, { "epoch": 1.7238887672432668, "grad_norm": 0.08863416799371446, "learning_rate": 1.1359809090451357e-06, "loss": 0.2971, "step": 2952 }, { "epoch": 1.7244726662287424, "grad_norm": 0.09000899012522381, "learning_rate": 1.131265313521639e-06, "loss": 0.3179, "step": 2953 }, { "epoch": 1.725056565214218, "grad_norm": 0.09636168097983253, "learning_rate": 1.126558938997121e-06, "loss": 0.3528, "step": 2954 }, { "epoch": 1.7256404641996934, "grad_norm": 0.08557012727625926, "learning_rate": 1.1218617903648966e-06, "loss": 0.2878, "step": 2955 }, { "epoch": 1.7262243631851688, "grad_norm": 0.09028539102549052, "learning_rate": 1.1171738725086833e-06, "loss": 0.3136, "step": 2956 }, { "epoch": 1.7268082621706444, "grad_norm": 0.07806628468507026, "learning_rate": 1.1124951903025981e-06, "loss": 0.2844, "step": 2957 }, { "epoch": 1.72739216115612, "grad_norm": 0.08254013932464654, "learning_rate": 1.1078257486111654e-06, "loss": 0.2686, "step": 2958 }, { "epoch": 1.7279760601415957, "grad_norm": 0.09639643577663715, "learning_rate": 1.1031655522892915e-06, "loss": 0.3113, "step": 2959 }, { "epoch": 1.728559959127071, "grad_norm": 0.08602150084595918, "learning_rate": 1.0985146061822794e-06, "loss": 0.2806, "step": 2960 }, { "epoch": 1.7291438581125465, "grad_norm": 0.08623921600278228, "learning_rate": 1.0938729151258065e-06, "loss": 0.3273, "step": 2961 }, { "epoch": 1.729727757098022, "grad_norm": 0.0889726656382713, "learning_rate": 1.0892404839459269e-06, "loss": 0.2868, "step": 2962 }, { "epoch": 1.7303116560834977, "grad_norm": 0.08351306512214322, "learning_rate": 1.0846173174590802e-06, "loss": 0.2933, "step": 2963 }, { "epoch": 1.730895555068973, "grad_norm": 0.08729588232491496, "learning_rate": 1.0800034204720588e-06, "loss": 0.2973, "step": 2964 }, { "epoch": 1.7314794540544485, "grad_norm": 0.08934292639051672, "learning_rate": 1.0753987977820214e-06, "loss": 0.2952, "step": 2965 }, { "epoch": 1.732063353039924, "grad_norm": 0.08452559307592387, "learning_rate": 1.07080345417649e-06, "loss": 0.2872, "step": 2966 }, { "epoch": 1.7326472520253997, "grad_norm": 0.0864419768319687, "learning_rate": 1.0662173944333288e-06, "loss": 0.297, "step": 2967 }, { "epoch": 1.733231151010875, "grad_norm": 0.0928105314811366, "learning_rate": 1.06164062332076e-06, "loss": 0.3349, "step": 2968 }, { "epoch": 1.7338150499963505, "grad_norm": 0.08123975907906995, "learning_rate": 1.0570731455973415e-06, "loss": 0.2701, "step": 2969 }, { "epoch": 1.7343989489818261, "grad_norm": 0.08374924368057422, "learning_rate": 1.052514966011966e-06, "loss": 0.3037, "step": 2970 }, { "epoch": 1.7349828479673017, "grad_norm": 0.08172957950708348, "learning_rate": 1.0479660893038702e-06, "loss": 0.2718, "step": 2971 }, { "epoch": 1.7355667469527771, "grad_norm": 0.09166963145542123, "learning_rate": 1.043426520202605e-06, "loss": 0.3478, "step": 2972 }, { "epoch": 1.7361506459382525, "grad_norm": 0.08215131072744351, "learning_rate": 1.0388962634280543e-06, "loss": 0.2784, "step": 2973 }, { "epoch": 1.7367345449237281, "grad_norm": 0.08444532146926187, "learning_rate": 1.0343753236904152e-06, "loss": 0.2749, "step": 2974 }, { "epoch": 1.7373184439092038, "grad_norm": 0.08156208800269321, "learning_rate": 1.029863705690195e-06, "loss": 0.295, "step": 2975 }, { "epoch": 1.7379023428946794, "grad_norm": 0.09241968789558193, "learning_rate": 1.0253614141182167e-06, "loss": 0.3024, "step": 2976 }, { "epoch": 1.7384862418801548, "grad_norm": 0.0891009168504168, "learning_rate": 1.0208684536555968e-06, "loss": 0.2807, "step": 2977 }, { "epoch": 1.7390701408656302, "grad_norm": 0.09109123400377071, "learning_rate": 1.016384828973761e-06, "loss": 0.3012, "step": 2978 }, { "epoch": 1.7396540398511058, "grad_norm": 0.09246721685207267, "learning_rate": 1.0119105447344203e-06, "loss": 0.3409, "step": 2979 }, { "epoch": 1.7402379388365814, "grad_norm": 0.08392403732458921, "learning_rate": 1.007445605589573e-06, "loss": 0.2931, "step": 2980 }, { "epoch": 1.7408218378220568, "grad_norm": 0.09134501782471835, "learning_rate": 1.0029900161815109e-06, "loss": 0.3108, "step": 2981 }, { "epoch": 1.7414057368075322, "grad_norm": 0.09199201788964816, "learning_rate": 9.985437811427934e-07, "loss": 0.337, "step": 2982 }, { "epoch": 1.7419896357930078, "grad_norm": 0.09113768966915169, "learning_rate": 9.941069050962626e-07, "loss": 0.2886, "step": 2983 }, { "epoch": 1.7425735347784834, "grad_norm": 0.09292052082915925, "learning_rate": 9.896793926550252e-07, "loss": 0.2772, "step": 2984 }, { "epoch": 1.7431574337639588, "grad_norm": 0.08639553034714839, "learning_rate": 9.8526124842245e-07, "loss": 0.2905, "step": 2985 }, { "epoch": 1.7437413327494342, "grad_norm": 0.08185908633950559, "learning_rate": 9.808524769921756e-07, "loss": 0.2672, "step": 2986 }, { "epoch": 1.7443252317349098, "grad_norm": 0.09017380988675185, "learning_rate": 9.764530829480822e-07, "loss": 0.3261, "step": 2987 }, { "epoch": 1.7449091307203854, "grad_norm": 0.08275666077333796, "learning_rate": 9.720630708643131e-07, "loss": 0.2727, "step": 2988 }, { "epoch": 1.7454930297058608, "grad_norm": 0.091037773571954, "learning_rate": 9.67682445305248e-07, "loss": 0.3043, "step": 2989 }, { "epoch": 1.7460769286913362, "grad_norm": 0.09273534928984468, "learning_rate": 9.63311210825505e-07, "loss": 0.3309, "step": 2990 }, { "epoch": 1.7466608276768119, "grad_norm": 0.08726802834861616, "learning_rate": 9.589493719699517e-07, "loss": 0.3078, "step": 2991 }, { "epoch": 1.7472447266622875, "grad_norm": 0.08983517146201521, "learning_rate": 9.545969332736748e-07, "loss": 0.3413, "step": 2992 }, { "epoch": 1.747828625647763, "grad_norm": 0.08116121856252766, "learning_rate": 9.502538992619892e-07, "loss": 0.2486, "step": 2993 }, { "epoch": 1.7484125246332385, "grad_norm": 0.0869316715608436, "learning_rate": 9.459202744504359e-07, "loss": 0.2675, "step": 2994 }, { "epoch": 1.7489964236187139, "grad_norm": 0.08010483100712715, "learning_rate": 9.415960633447674e-07, "loss": 0.2829, "step": 2995 }, { "epoch": 1.7495803226041895, "grad_norm": 0.08113421856306109, "learning_rate": 9.372812704409551e-07, "loss": 0.2704, "step": 2996 }, { "epoch": 1.7501642215896651, "grad_norm": 0.09310440632102926, "learning_rate": 9.329759002251726e-07, "loss": 0.3333, "step": 2997 }, { "epoch": 1.7507481205751405, "grad_norm": 0.08262084561836351, "learning_rate": 9.286799571737981e-07, "loss": 0.2892, "step": 2998 }, { "epoch": 1.751332019560616, "grad_norm": 0.08936619163158194, "learning_rate": 9.243934457534098e-07, "loss": 0.3014, "step": 2999 }, { "epoch": 1.7519159185460915, "grad_norm": 0.0890498866282284, "learning_rate": 9.201163704207771e-07, "loss": 0.3043, "step": 3000 }, { "epoch": 1.7524998175315671, "grad_norm": 0.08791197418949087, "learning_rate": 9.158487356228618e-07, "loss": 0.2825, "step": 3001 }, { "epoch": 1.7530837165170425, "grad_norm": 0.08507961298222336, "learning_rate": 9.115905457968077e-07, "loss": 0.2833, "step": 3002 }, { "epoch": 1.753667615502518, "grad_norm": 0.0846924221177898, "learning_rate": 9.073418053699368e-07, "loss": 0.2969, "step": 3003 }, { "epoch": 1.7542515144879935, "grad_norm": 0.0859236813806699, "learning_rate": 9.031025187597519e-07, "loss": 0.2938, "step": 3004 }, { "epoch": 1.7548354134734692, "grad_norm": 0.08240032979556106, "learning_rate": 8.988726903739197e-07, "loss": 0.2664, "step": 3005 }, { "epoch": 1.7554193124589446, "grad_norm": 0.08099313827466989, "learning_rate": 8.946523246102811e-07, "loss": 0.2803, "step": 3006 }, { "epoch": 1.75600321144442, "grad_norm": 0.09337904933476099, "learning_rate": 8.904414258568306e-07, "loss": 0.3051, "step": 3007 }, { "epoch": 1.7565871104298956, "grad_norm": 0.08552912116314822, "learning_rate": 8.862399984917214e-07, "loss": 0.2655, "step": 3008 }, { "epoch": 1.7571710094153712, "grad_norm": 0.0942186119187344, "learning_rate": 8.820480468832649e-07, "loss": 0.3031, "step": 3009 }, { "epoch": 1.7577549084008468, "grad_norm": 0.09393682039932542, "learning_rate": 8.778655753899124e-07, "loss": 0.3104, "step": 3010 }, { "epoch": 1.7583388073863222, "grad_norm": 0.084221298223468, "learning_rate": 8.736925883602665e-07, "loss": 0.2955, "step": 3011 }, { "epoch": 1.7589227063717976, "grad_norm": 0.08520629585464452, "learning_rate": 8.695290901330611e-07, "loss": 0.3108, "step": 3012 }, { "epoch": 1.7595066053572732, "grad_norm": 0.08541261759821231, "learning_rate": 8.653750850371667e-07, "loss": 0.2855, "step": 3013 }, { "epoch": 1.7600905043427488, "grad_norm": 0.08529473247040445, "learning_rate": 8.612305773915886e-07, "loss": 0.2953, "step": 3014 }, { "epoch": 1.7606744033282242, "grad_norm": 0.08761492507206504, "learning_rate": 8.570955715054496e-07, "loss": 0.315, "step": 3015 }, { "epoch": 1.7612583023136996, "grad_norm": 0.0804918084470463, "learning_rate": 8.529700716780009e-07, "loss": 0.3068, "step": 3016 }, { "epoch": 1.7618422012991752, "grad_norm": 0.08777908778032416, "learning_rate": 8.488540821986035e-07, "loss": 0.2951, "step": 3017 }, { "epoch": 1.7624261002846509, "grad_norm": 0.08639173166816312, "learning_rate": 8.447476073467309e-07, "loss": 0.2963, "step": 3018 }, { "epoch": 1.7630099992701262, "grad_norm": 0.09183133098271275, "learning_rate": 8.406506513919721e-07, "loss": 0.303, "step": 3019 }, { "epoch": 1.7635938982556016, "grad_norm": 0.0932024558026017, "learning_rate": 8.365632185940109e-07, "loss": 0.3013, "step": 3020 }, { "epoch": 1.7641777972410773, "grad_norm": 0.08240360124739192, "learning_rate": 8.3248531320263e-07, "loss": 0.3003, "step": 3021 }, { "epoch": 1.7647616962265529, "grad_norm": 0.09087553801247003, "learning_rate": 8.284169394577124e-07, "loss": 0.2818, "step": 3022 }, { "epoch": 1.7653455952120283, "grad_norm": 0.09172986435173691, "learning_rate": 8.243581015892221e-07, "loss": 0.2997, "step": 3023 }, { "epoch": 1.7659294941975037, "grad_norm": 0.08371396599894133, "learning_rate": 8.203088038172169e-07, "loss": 0.2599, "step": 3024 }, { "epoch": 1.7665133931829793, "grad_norm": 0.08009921972977734, "learning_rate": 8.1626905035183e-07, "loss": 0.2597, "step": 3025 }, { "epoch": 1.767097292168455, "grad_norm": 0.08586317921727558, "learning_rate": 8.122388453932728e-07, "loss": 0.3098, "step": 3026 }, { "epoch": 1.7676811911539305, "grad_norm": 0.07811799704291257, "learning_rate": 8.082181931318311e-07, "loss": 0.2596, "step": 3027 }, { "epoch": 1.768265090139406, "grad_norm": 0.09090016460432807, "learning_rate": 8.042070977478533e-07, "loss": 0.2787, "step": 3028 }, { "epoch": 1.7688489891248813, "grad_norm": 0.08647282352399725, "learning_rate": 8.002055634117578e-07, "loss": 0.3046, "step": 3029 }, { "epoch": 1.769432888110357, "grad_norm": 0.08739395863698797, "learning_rate": 7.962135942840188e-07, "loss": 0.3129, "step": 3030 }, { "epoch": 1.7700167870958325, "grad_norm": 0.08712557044696835, "learning_rate": 7.922311945151629e-07, "loss": 0.321, "step": 3031 }, { "epoch": 1.770600686081308, "grad_norm": 0.08862325434085755, "learning_rate": 7.882583682457734e-07, "loss": 0.3155, "step": 3032 }, { "epoch": 1.7711845850667833, "grad_norm": 0.085347305623251, "learning_rate": 7.84295119606473e-07, "loss": 0.2941, "step": 3033 }, { "epoch": 1.771768484052259, "grad_norm": 0.08797636996402192, "learning_rate": 7.803414527179343e-07, "loss": 0.3146, "step": 3034 }, { "epoch": 1.7723523830377346, "grad_norm": 0.0828627866871808, "learning_rate": 7.76397371690859e-07, "loss": 0.252, "step": 3035 }, { "epoch": 1.77293628202321, "grad_norm": 0.08779595066660646, "learning_rate": 7.72462880625986e-07, "loss": 0.3018, "step": 3036 }, { "epoch": 1.7735201810086854, "grad_norm": 0.08656645871220332, "learning_rate": 7.685379836140872e-07, "loss": 0.2993, "step": 3037 }, { "epoch": 1.774104079994161, "grad_norm": 0.08125653245443104, "learning_rate": 7.646226847359506e-07, "loss": 0.2961, "step": 3038 }, { "epoch": 1.7746879789796366, "grad_norm": 0.08887968957868758, "learning_rate": 7.607169880623955e-07, "loss": 0.3179, "step": 3039 }, { "epoch": 1.775271877965112, "grad_norm": 0.08414636763413312, "learning_rate": 7.568208976542491e-07, "loss": 0.2969, "step": 3040 }, { "epoch": 1.7758557769505874, "grad_norm": 0.0867347530000045, "learning_rate": 7.529344175623521e-07, "loss": 0.2988, "step": 3041 }, { "epoch": 1.776439675936063, "grad_norm": 0.0807608560656295, "learning_rate": 7.490575518275589e-07, "loss": 0.2848, "step": 3042 }, { "epoch": 1.7770235749215386, "grad_norm": 0.08463906733364057, "learning_rate": 7.451903044807185e-07, "loss": 0.2863, "step": 3043 }, { "epoch": 1.7776074739070142, "grad_norm": 0.08944513773747939, "learning_rate": 7.4133267954269e-07, "loss": 0.2696, "step": 3044 }, { "epoch": 1.7781913728924896, "grad_norm": 0.085166543897723, "learning_rate": 7.374846810243197e-07, "loss": 0.3105, "step": 3045 }, { "epoch": 1.778775271877965, "grad_norm": 0.08780469497470643, "learning_rate": 7.336463129264437e-07, "loss": 0.3144, "step": 3046 }, { "epoch": 1.7793591708634406, "grad_norm": 0.0852400028484058, "learning_rate": 7.298175792398976e-07, "loss": 0.2948, "step": 3047 }, { "epoch": 1.7799430698489163, "grad_norm": 0.08876687730707877, "learning_rate": 7.25998483945487e-07, "loss": 0.3291, "step": 3048 }, { "epoch": 1.7805269688343917, "grad_norm": 0.0822943977696894, "learning_rate": 7.22189031013999e-07, "loss": 0.2892, "step": 3049 }, { "epoch": 1.781110867819867, "grad_norm": 0.08100102520143981, "learning_rate": 7.183892244062018e-07, "loss": 0.2823, "step": 3050 }, { "epoch": 1.7816947668053427, "grad_norm": 0.08761430484192076, "learning_rate": 7.145990680728243e-07, "loss": 0.3076, "step": 3051 }, { "epoch": 1.7822786657908183, "grad_norm": 0.07635891776755517, "learning_rate": 7.10818565954573e-07, "loss": 0.2603, "step": 3052 }, { "epoch": 1.7828625647762937, "grad_norm": 0.09117143001344066, "learning_rate": 7.07047721982107e-07, "loss": 0.3289, "step": 3053 }, { "epoch": 1.783446463761769, "grad_norm": 0.08634730509111568, "learning_rate": 7.032865400760469e-07, "loss": 0.3185, "step": 3054 }, { "epoch": 1.7840303627472447, "grad_norm": 0.09451559054710895, "learning_rate": 6.995350241469701e-07, "loss": 0.3155, "step": 3055 }, { "epoch": 1.7846142617327203, "grad_norm": 0.0823352823972901, "learning_rate": 6.957931780954008e-07, "loss": 0.2955, "step": 3056 }, { "epoch": 1.7851981607181957, "grad_norm": 0.08495262892658528, "learning_rate": 6.920610058118105e-07, "loss": 0.2995, "step": 3057 }, { "epoch": 1.785782059703671, "grad_norm": 0.09586823908361528, "learning_rate": 6.883385111766139e-07, "loss": 0.3352, "step": 3058 }, { "epoch": 1.7863659586891467, "grad_norm": 0.0874088174126061, "learning_rate": 6.846256980601596e-07, "loss": 0.2785, "step": 3059 }, { "epoch": 1.7869498576746223, "grad_norm": 0.08804134301821614, "learning_rate": 6.809225703227352e-07, "loss": 0.2867, "step": 3060 }, { "epoch": 1.787533756660098, "grad_norm": 0.0822818106212217, "learning_rate": 6.772291318145541e-07, "loss": 0.2963, "step": 3061 }, { "epoch": 1.7881176556455733, "grad_norm": 0.08095725615383709, "learning_rate": 6.735453863757602e-07, "loss": 0.2578, "step": 3062 }, { "epoch": 1.7887015546310487, "grad_norm": 0.08929214937029839, "learning_rate": 6.698713378364142e-07, "loss": 0.309, "step": 3063 }, { "epoch": 1.7892854536165244, "grad_norm": 0.08593046562567491, "learning_rate": 6.662069900164969e-07, "loss": 0.2812, "step": 3064 }, { "epoch": 1.789869352602, "grad_norm": 0.08395241110565031, "learning_rate": 6.625523467259043e-07, "loss": 0.3269, "step": 3065 }, { "epoch": 1.7904532515874754, "grad_norm": 0.08592628785808448, "learning_rate": 6.589074117644411e-07, "loss": 0.2959, "step": 3066 }, { "epoch": 1.7910371505729508, "grad_norm": 0.08525183135503131, "learning_rate": 6.552721889218194e-07, "loss": 0.2744, "step": 3067 }, { "epoch": 1.7916210495584264, "grad_norm": 0.09224937339041725, "learning_rate": 6.516466819776502e-07, "loss": 0.3281, "step": 3068 }, { "epoch": 1.792204948543902, "grad_norm": 0.08672520222786442, "learning_rate": 6.480308947014458e-07, "loss": 0.2959, "step": 3069 }, { "epoch": 1.7927888475293774, "grad_norm": 0.09171614378767146, "learning_rate": 6.444248308526125e-07, "loss": 0.3629, "step": 3070 }, { "epoch": 1.7933727465148528, "grad_norm": 0.08422371103303725, "learning_rate": 6.408284941804444e-07, "loss": 0.28, "step": 3071 }, { "epoch": 1.7939566455003284, "grad_norm": 0.08487931372264909, "learning_rate": 6.372418884241271e-07, "loss": 0.2773, "step": 3072 }, { "epoch": 1.794540544485804, "grad_norm": 0.08953771344785107, "learning_rate": 6.336650173127224e-07, "loss": 0.3132, "step": 3073 }, { "epoch": 1.7951244434712794, "grad_norm": 0.08455338204156364, "learning_rate": 6.300978845651728e-07, "loss": 0.3159, "step": 3074 }, { "epoch": 1.795708342456755, "grad_norm": 0.08569587471604956, "learning_rate": 6.26540493890303e-07, "loss": 0.3238, "step": 3075 }, { "epoch": 1.7962922414422304, "grad_norm": 0.08545555706054969, "learning_rate": 6.229928489867987e-07, "loss": 0.3303, "step": 3076 }, { "epoch": 1.796876140427706, "grad_norm": 0.08413409819251894, "learning_rate": 6.194549535432137e-07, "loss": 0.2833, "step": 3077 }, { "epoch": 1.7974600394131817, "grad_norm": 0.08540152318409865, "learning_rate": 6.159268112379734e-07, "loss": 0.2696, "step": 3078 }, { "epoch": 1.798043938398657, "grad_norm": 0.08376497793606724, "learning_rate": 6.124084257393525e-07, "loss": 0.3015, "step": 3079 }, { "epoch": 1.7986278373841325, "grad_norm": 0.0822330278866036, "learning_rate": 6.088998007054903e-07, "loss": 0.2924, "step": 3080 }, { "epoch": 1.799211736369608, "grad_norm": 0.08769126746247988, "learning_rate": 6.054009397843708e-07, "loss": 0.3018, "step": 3081 }, { "epoch": 1.7997956353550837, "grad_norm": 0.08594975620684452, "learning_rate": 6.019118466138285e-07, "loss": 0.2992, "step": 3082 }, { "epoch": 1.800379534340559, "grad_norm": 0.09035791205045064, "learning_rate": 5.98432524821545e-07, "loss": 0.3069, "step": 3083 }, { "epoch": 1.8009634333260345, "grad_norm": 0.08748843550877322, "learning_rate": 5.949629780250376e-07, "loss": 0.3143, "step": 3084 }, { "epoch": 1.80154733231151, "grad_norm": 0.08647245599536582, "learning_rate": 5.915032098316653e-07, "loss": 0.2845, "step": 3085 }, { "epoch": 1.8021312312969857, "grad_norm": 0.08688822822258051, "learning_rate": 5.880532238386161e-07, "loss": 0.2875, "step": 3086 }, { "epoch": 1.802715130282461, "grad_norm": 0.09050904847548709, "learning_rate": 5.846130236329073e-07, "loss": 0.3384, "step": 3087 }, { "epoch": 1.8032990292679365, "grad_norm": 0.08150156761677134, "learning_rate": 5.811826127913855e-07, "loss": 0.2455, "step": 3088 }, { "epoch": 1.8038829282534121, "grad_norm": 0.08488177011956685, "learning_rate": 5.777619948807156e-07, "loss": 0.2912, "step": 3089 }, { "epoch": 1.8044668272388877, "grad_norm": 0.08102749419731592, "learning_rate": 5.743511734573837e-07, "loss": 0.2816, "step": 3090 }, { "epoch": 1.8050507262243631, "grad_norm": 0.09145922148206728, "learning_rate": 5.709501520676853e-07, "loss": 0.3224, "step": 3091 }, { "epoch": 1.8056346252098388, "grad_norm": 0.0845353171994088, "learning_rate": 5.675589342477305e-07, "loss": 0.2568, "step": 3092 }, { "epoch": 1.8062185241953141, "grad_norm": 0.08254252964675693, "learning_rate": 5.641775235234381e-07, "loss": 0.3154, "step": 3093 }, { "epoch": 1.8068024231807898, "grad_norm": 0.08807306433661434, "learning_rate": 5.608059234105234e-07, "loss": 0.3488, "step": 3094 }, { "epoch": 1.8073863221662654, "grad_norm": 0.09194330435215792, "learning_rate": 5.5744413741451e-07, "loss": 0.2764, "step": 3095 }, { "epoch": 1.8079702211517408, "grad_norm": 0.09286594835323321, "learning_rate": 5.540921690307111e-07, "loss": 0.3152, "step": 3096 }, { "epoch": 1.8085541201372162, "grad_norm": 0.09242194282462925, "learning_rate": 5.507500217442341e-07, "loss": 0.3309, "step": 3097 }, { "epoch": 1.8091380191226918, "grad_norm": 0.0829179739131839, "learning_rate": 5.474176990299773e-07, "loss": 0.2783, "step": 3098 }, { "epoch": 1.8097219181081674, "grad_norm": 0.08728359592797344, "learning_rate": 5.440952043526215e-07, "loss": 0.2771, "step": 3099 }, { "epoch": 1.8103058170936428, "grad_norm": 0.0906163442971701, "learning_rate": 5.407825411666312e-07, "loss": 0.3209, "step": 3100 }, { "epoch": 1.8108897160791182, "grad_norm": 0.08751515029610528, "learning_rate": 5.374797129162468e-07, "loss": 0.2934, "step": 3101 }, { "epoch": 1.8114736150645938, "grad_norm": 0.08855352055453199, "learning_rate": 5.341867230354824e-07, "loss": 0.3119, "step": 3102 }, { "epoch": 1.8120575140500694, "grad_norm": 0.09499747718366744, "learning_rate": 5.309035749481295e-07, "loss": 0.2998, "step": 3103 }, { "epoch": 1.8126414130355448, "grad_norm": 0.09054013420382089, "learning_rate": 5.276302720677395e-07, "loss": 0.3097, "step": 3104 }, { "epoch": 1.8132253120210202, "grad_norm": 0.09049485827887115, "learning_rate": 5.243668177976291e-07, "loss": 0.3025, "step": 3105 }, { "epoch": 1.8138092110064958, "grad_norm": 0.09256500233982058, "learning_rate": 5.211132155308785e-07, "loss": 0.3367, "step": 3106 }, { "epoch": 1.8143931099919715, "grad_norm": 0.09084230641517785, "learning_rate": 5.178694686503205e-07, "loss": 0.3087, "step": 3107 }, { "epoch": 1.8149770089774468, "grad_norm": 0.08295783751607333, "learning_rate": 5.146355805285452e-07, "loss": 0.2545, "step": 3108 }, { "epoch": 1.8155609079629225, "grad_norm": 0.09807532023333741, "learning_rate": 5.114115545278875e-07, "loss": 0.3694, "step": 3109 }, { "epoch": 1.8161448069483979, "grad_norm": 0.08781865783393687, "learning_rate": 5.081973940004315e-07, "loss": 0.2991, "step": 3110 }, { "epoch": 1.8167287059338735, "grad_norm": 0.08867125783437477, "learning_rate": 5.049931022880061e-07, "loss": 0.2811, "step": 3111 }, { "epoch": 1.817312604919349, "grad_norm": 0.09178131080633112, "learning_rate": 5.017986827221733e-07, "loss": 0.3392, "step": 3112 }, { "epoch": 1.8178965039048245, "grad_norm": 0.09278436534145992, "learning_rate": 4.986141386242371e-07, "loss": 0.3189, "step": 3113 }, { "epoch": 1.8184804028902999, "grad_norm": 0.08344402580963332, "learning_rate": 4.954394733052293e-07, "loss": 0.2982, "step": 3114 }, { "epoch": 1.8190643018757755, "grad_norm": 0.08749614524649471, "learning_rate": 4.922746900659125e-07, "loss": 0.3085, "step": 3115 }, { "epoch": 1.8196482008612511, "grad_norm": 0.08871464909207126, "learning_rate": 4.89119792196776e-07, "loss": 0.3171, "step": 3116 }, { "epoch": 1.8202320998467265, "grad_norm": 0.08360816937299884, "learning_rate": 4.85974782978027e-07, "loss": 0.2955, "step": 3117 }, { "epoch": 1.820815998832202, "grad_norm": 0.08434723598211624, "learning_rate": 4.828396656795964e-07, "loss": 0.2833, "step": 3118 }, { "epoch": 1.8213998978176775, "grad_norm": 0.08645312192099495, "learning_rate": 4.797144435611256e-07, "loss": 0.2968, "step": 3119 }, { "epoch": 1.8219837968031531, "grad_norm": 0.07971285761386303, "learning_rate": 4.76599119871971e-07, "loss": 0.2674, "step": 3120 }, { "epoch": 1.8225676957886285, "grad_norm": 0.08551090044035203, "learning_rate": 4.734936978511961e-07, "loss": 0.2899, "step": 3121 }, { "epoch": 1.823151594774104, "grad_norm": 0.0859746648837016, "learning_rate": 4.7039818072756927e-07, "loss": 0.3286, "step": 3122 }, { "epoch": 1.8237354937595796, "grad_norm": 0.08514445556962379, "learning_rate": 4.6731257171956256e-07, "loss": 0.2758, "step": 3123 }, { "epoch": 1.8243193927450552, "grad_norm": 0.10271015957610101, "learning_rate": 4.642368740353431e-07, "loss": 0.3172, "step": 3124 }, { "epoch": 1.8249032917305306, "grad_norm": 0.08934740431920998, "learning_rate": 4.61171090872774e-07, "loss": 0.3243, "step": 3125 }, { "epoch": 1.8254871907160062, "grad_norm": 0.0829502672079169, "learning_rate": 4.581152254194121e-07, "loss": 0.2862, "step": 3126 }, { "epoch": 1.8260710897014816, "grad_norm": 0.08165001705936793, "learning_rate": 4.5506928085250033e-07, "loss": 0.2533, "step": 3127 }, { "epoch": 1.8266549886869572, "grad_norm": 0.08370476443769154, "learning_rate": 4.520332603389699e-07, "loss": 0.2718, "step": 3128 }, { "epoch": 1.8272388876724328, "grad_norm": 0.08953381487292546, "learning_rate": 4.490071670354279e-07, "loss": 0.3151, "step": 3129 }, { "epoch": 1.8278227866579082, "grad_norm": 0.08746542992240394, "learning_rate": 4.459910040881632e-07, "loss": 0.3301, "step": 3130 }, { "epoch": 1.8284066856433836, "grad_norm": 0.08833337579610819, "learning_rate": 4.42984774633145e-07, "loss": 0.2937, "step": 3131 }, { "epoch": 1.8289905846288592, "grad_norm": 0.09104743684788563, "learning_rate": 4.399884817960065e-07, "loss": 0.324, "step": 3132 }, { "epoch": 1.8295744836143348, "grad_norm": 0.0865120173720812, "learning_rate": 4.3700212869205117e-07, "loss": 0.2948, "step": 3133 }, { "epoch": 1.8301583825998102, "grad_norm": 0.08937958753467712, "learning_rate": 4.34025718426252e-07, "loss": 0.3074, "step": 3134 }, { "epoch": 1.8307422815852856, "grad_norm": 0.09283070073442257, "learning_rate": 4.310592540932401e-07, "loss": 0.3191, "step": 3135 }, { "epoch": 1.8313261805707612, "grad_norm": 0.09241751749053892, "learning_rate": 4.2810273877730843e-07, "loss": 0.3134, "step": 3136 }, { "epoch": 1.8319100795562369, "grad_norm": 0.08479948454838837, "learning_rate": 4.251561755524036e-07, "loss": 0.2808, "step": 3137 }, { "epoch": 1.8324939785417123, "grad_norm": 0.08005655693877543, "learning_rate": 4.222195674821239e-07, "loss": 0.2793, "step": 3138 }, { "epoch": 1.8330778775271876, "grad_norm": 0.07531909446312772, "learning_rate": 4.192929176197236e-07, "loss": 0.2769, "step": 3139 }, { "epoch": 1.8336617765126633, "grad_norm": 0.08896449065703195, "learning_rate": 4.1637622900809304e-07, "loss": 0.3061, "step": 3140 }, { "epoch": 1.8342456754981389, "grad_norm": 0.08283165747842987, "learning_rate": 4.1346950467977545e-07, "loss": 0.2648, "step": 3141 }, { "epoch": 1.8348295744836145, "grad_norm": 0.08683492675762382, "learning_rate": 4.1057274765694765e-07, "loss": 0.2911, "step": 3142 }, { "epoch": 1.83541347346909, "grad_norm": 0.08095043790132182, "learning_rate": 4.0768596095142497e-07, "loss": 0.2902, "step": 3143 }, { "epoch": 1.8359973724545653, "grad_norm": 0.08726280887649936, "learning_rate": 4.048091475646576e-07, "loss": 0.3085, "step": 3144 }, { "epoch": 1.836581271440041, "grad_norm": 0.08938714091842369, "learning_rate": 4.0194231048772514e-07, "loss": 0.3414, "step": 3145 }, { "epoch": 1.8371651704255165, "grad_norm": 0.08283539914434328, "learning_rate": 3.9908545270133436e-07, "loss": 0.3067, "step": 3146 }, { "epoch": 1.837749069410992, "grad_norm": 0.09029028808523237, "learning_rate": 3.9623857717581813e-07, "loss": 0.308, "step": 3147 }, { "epoch": 1.8383329683964673, "grad_norm": 0.08568372572495192, "learning_rate": 3.934016868711266e-07, "loss": 0.2951, "step": 3148 }, { "epoch": 1.838916867381943, "grad_norm": 0.08414929897731903, "learning_rate": 3.905747847368335e-07, "loss": 0.2746, "step": 3149 }, { "epoch": 1.8395007663674185, "grad_norm": 0.08729913046721026, "learning_rate": 3.8775787371212346e-07, "loss": 0.2939, "step": 3150 }, { "epoch": 1.840084665352894, "grad_norm": 0.07717404846397707, "learning_rate": 3.8495095672579584e-07, "loss": 0.2647, "step": 3151 }, { "epoch": 1.8406685643383693, "grad_norm": 0.08842542775219478, "learning_rate": 3.8215403669625726e-07, "loss": 0.3049, "step": 3152 }, { "epoch": 1.841252463323845, "grad_norm": 0.09616648768180032, "learning_rate": 3.793671165315194e-07, "loss": 0.3105, "step": 3153 }, { "epoch": 1.8418363623093206, "grad_norm": 0.08362019132163073, "learning_rate": 3.765901991291998e-07, "loss": 0.2856, "step": 3154 }, { "epoch": 1.842420261294796, "grad_norm": 0.09026182211920644, "learning_rate": 3.738232873765146e-07, "loss": 0.3321, "step": 3155 }, { "epoch": 1.8430041602802714, "grad_norm": 0.08825138926307999, "learning_rate": 3.7106638415027594e-07, "loss": 0.3082, "step": 3156 }, { "epoch": 1.843588059265747, "grad_norm": 0.09158843522583991, "learning_rate": 3.6831949231689203e-07, "loss": 0.3301, "step": 3157 }, { "epoch": 1.8441719582512226, "grad_norm": 0.0926223279142062, "learning_rate": 3.65582614732356e-07, "loss": 0.2769, "step": 3158 }, { "epoch": 1.8447558572366982, "grad_norm": 0.08810397139788848, "learning_rate": 3.628557542422606e-07, "loss": 0.2925, "step": 3159 }, { "epoch": 1.8453397562221736, "grad_norm": 0.08772512903556026, "learning_rate": 3.6013891368177345e-07, "loss": 0.2886, "step": 3160 }, { "epoch": 1.845923655207649, "grad_norm": 0.0956377939420753, "learning_rate": 3.574320958756461e-07, "loss": 0.35, "step": 3161 }, { "epoch": 1.8465075541931246, "grad_norm": 0.09166591415145915, "learning_rate": 3.547353036382117e-07, "loss": 0.3168, "step": 3162 }, { "epoch": 1.8470914531786002, "grad_norm": 0.08764560387972857, "learning_rate": 3.520485397733786e-07, "loss": 0.2783, "step": 3163 }, { "epoch": 1.8476753521640756, "grad_norm": 0.07858340384187243, "learning_rate": 3.493718070746299e-07, "loss": 0.2632, "step": 3164 }, { "epoch": 1.848259251149551, "grad_norm": 0.08139886749054236, "learning_rate": 3.467051083250161e-07, "loss": 0.2696, "step": 3165 }, { "epoch": 1.8488431501350266, "grad_norm": 0.08042409365262781, "learning_rate": 3.440484462971549e-07, "loss": 0.269, "step": 3166 }, { "epoch": 1.8494270491205023, "grad_norm": 0.08331518254206628, "learning_rate": 3.414018237532335e-07, "loss": 0.2786, "step": 3167 }, { "epoch": 1.8500109481059777, "grad_norm": 0.08575648045944358, "learning_rate": 3.3876524344499507e-07, "loss": 0.3016, "step": 3168 }, { "epoch": 1.850594847091453, "grad_norm": 0.08940979900533674, "learning_rate": 3.3613870811374574e-07, "loss": 0.3191, "step": 3169 }, { "epoch": 1.8511787460769287, "grad_norm": 0.08363231843162781, "learning_rate": 3.335222204903477e-07, "loss": 0.3182, "step": 3170 }, { "epoch": 1.8517626450624043, "grad_norm": 0.08546732959905708, "learning_rate": 3.3091578329521147e-07, "loss": 0.2886, "step": 3171 }, { "epoch": 1.8523465440478797, "grad_norm": 0.08666274491748625, "learning_rate": 3.283193992383049e-07, "loss": 0.2881, "step": 3172 }, { "epoch": 1.852930443033355, "grad_norm": 0.086107266914269, "learning_rate": 3.2573307101913756e-07, "loss": 0.2831, "step": 3173 }, { "epoch": 1.8535143420188307, "grad_norm": 0.0816698952350077, "learning_rate": 3.231568013267672e-07, "loss": 0.2613, "step": 3174 }, { "epoch": 1.8540982410043063, "grad_norm": 0.10040064874711961, "learning_rate": 3.205905928397923e-07, "loss": 0.3319, "step": 3175 }, { "epoch": 1.854682139989782, "grad_norm": 0.09510112403247538, "learning_rate": 3.180344482263487e-07, "loss": 0.3047, "step": 3176 }, { "epoch": 1.8552660389752573, "grad_norm": 0.08274922027184924, "learning_rate": 3.154883701441136e-07, "loss": 0.2786, "step": 3177 }, { "epoch": 1.8558499379607327, "grad_norm": 0.08428513436984085, "learning_rate": 3.129523612402918e-07, "loss": 0.2646, "step": 3178 }, { "epoch": 1.8564338369462083, "grad_norm": 0.09038491478339093, "learning_rate": 3.1042642415162526e-07, "loss": 0.3116, "step": 3179 }, { "epoch": 1.857017735931684, "grad_norm": 0.09235640555416, "learning_rate": 3.079105615043787e-07, "loss": 0.3392, "step": 3180 }, { "epoch": 1.8576016349171594, "grad_norm": 0.0877471052523126, "learning_rate": 3.0540477591434415e-07, "loss": 0.3108, "step": 3181 }, { "epoch": 1.8581855339026347, "grad_norm": 0.0868130433097828, "learning_rate": 3.0290906998683755e-07, "loss": 0.3125, "step": 3182 }, { "epoch": 1.8587694328881104, "grad_norm": 0.08345751929085199, "learning_rate": 3.0042344631669217e-07, "loss": 0.2897, "step": 3183 }, { "epoch": 1.859353331873586, "grad_norm": 0.08801228670676767, "learning_rate": 2.9794790748826184e-07, "loss": 0.3254, "step": 3184 }, { "epoch": 1.8599372308590614, "grad_norm": 0.08353795741257049, "learning_rate": 2.9548245607541326e-07, "loss": 0.28, "step": 3185 }, { "epoch": 1.8605211298445368, "grad_norm": 0.08235590441183382, "learning_rate": 2.9302709464152144e-07, "loss": 0.2814, "step": 3186 }, { "epoch": 1.8611050288300124, "grad_norm": 0.08805729959222112, "learning_rate": 2.905818257394799e-07, "loss": 0.311, "step": 3187 }, { "epoch": 1.861688927815488, "grad_norm": 0.07681558537527977, "learning_rate": 2.881466519116793e-07, "loss": 0.2772, "step": 3188 }, { "epoch": 1.8622728268009634, "grad_norm": 0.08036641884324645, "learning_rate": 2.857215756900189e-07, "loss": 0.2771, "step": 3189 }, { "epoch": 1.8628567257864388, "grad_norm": 0.08032463723345866, "learning_rate": 2.8330659959589944e-07, "loss": 0.2715, "step": 3190 }, { "epoch": 1.8634406247719144, "grad_norm": 0.0954244524573023, "learning_rate": 2.8090172614021804e-07, "loss": 0.3131, "step": 3191 }, { "epoch": 1.86402452375739, "grad_norm": 0.08559954087308912, "learning_rate": 2.7850695782337124e-07, "loss": 0.254, "step": 3192 }, { "epoch": 1.8646084227428656, "grad_norm": 0.08648066354234599, "learning_rate": 2.761222971352451e-07, "loss": 0.311, "step": 3193 }, { "epoch": 1.865192321728341, "grad_norm": 0.09212008206162198, "learning_rate": 2.737477465552174e-07, "loss": 0.3258, "step": 3194 }, { "epoch": 1.8657762207138164, "grad_norm": 0.09249238678439586, "learning_rate": 2.713833085521589e-07, "loss": 0.3528, "step": 3195 }, { "epoch": 1.866360119699292, "grad_norm": 0.0784969116848241, "learning_rate": 2.690289855844186e-07, "loss": 0.2884, "step": 3196 }, { "epoch": 1.8669440186847677, "grad_norm": 0.08322296783734734, "learning_rate": 2.666847800998362e-07, "loss": 0.273, "step": 3197 }, { "epoch": 1.867527917670243, "grad_norm": 0.08367432518948008, "learning_rate": 2.643506945357277e-07, "loss": 0.2964, "step": 3198 }, { "epoch": 1.8681118166557185, "grad_norm": 0.08821387079572156, "learning_rate": 2.620267313188862e-07, "loss": 0.2876, "step": 3199 }, { "epoch": 1.868695715641194, "grad_norm": 0.09439395646952041, "learning_rate": 2.5971289286558455e-07, "loss": 0.2875, "step": 3200 }, { "epoch": 1.8692796146266697, "grad_norm": 0.08251968266427383, "learning_rate": 2.574091815815649e-07, "loss": 0.2648, "step": 3201 }, { "epoch": 1.869863513612145, "grad_norm": 0.08074562249343893, "learning_rate": 2.5511559986204247e-07, "loss": 0.2764, "step": 3202 }, { "epoch": 1.8704474125976205, "grad_norm": 0.09345572642598844, "learning_rate": 2.528321500916986e-07, "loss": 0.3559, "step": 3203 }, { "epoch": 1.871031311583096, "grad_norm": 0.08316143094514963, "learning_rate": 2.505588346446808e-07, "loss": 0.2762, "step": 3204 }, { "epoch": 1.8716152105685717, "grad_norm": 0.09011478499702885, "learning_rate": 2.482956558846017e-07, "loss": 0.3311, "step": 3205 }, { "epoch": 1.8721991095540471, "grad_norm": 0.08173763284993298, "learning_rate": 2.460426161645324e-07, "loss": 0.2622, "step": 3206 }, { "epoch": 1.8727830085395225, "grad_norm": 0.0903047588989918, "learning_rate": 2.437997178270035e-07, "loss": 0.2931, "step": 3207 }, { "epoch": 1.8733669075249981, "grad_norm": 0.08327732648610187, "learning_rate": 2.4156696320399963e-07, "loss": 0.2481, "step": 3208 }, { "epoch": 1.8739508065104737, "grad_norm": 0.09953401014445015, "learning_rate": 2.3934435461695936e-07, "loss": 0.3377, "step": 3209 }, { "epoch": 1.8745347054959494, "grad_norm": 0.09486914589879174, "learning_rate": 2.371318943767753e-07, "loss": 0.3522, "step": 3210 }, { "epoch": 1.8751186044814248, "grad_norm": 0.08633497225929139, "learning_rate": 2.3492958478378402e-07, "loss": 0.2978, "step": 3211 }, { "epoch": 1.8757025034669002, "grad_norm": 0.08948453554790906, "learning_rate": 2.3273742812777166e-07, "loss": 0.3392, "step": 3212 }, { "epoch": 1.8762864024523758, "grad_norm": 0.09137347142710973, "learning_rate": 2.3055542668796617e-07, "loss": 0.3286, "step": 3213 }, { "epoch": 1.8768703014378514, "grad_norm": 0.08601566343777735, "learning_rate": 2.2838358273303717e-07, "loss": 0.271, "step": 3214 }, { "epoch": 1.8774542004233268, "grad_norm": 0.09196817463757273, "learning_rate": 2.26221898521094e-07, "loss": 0.3043, "step": 3215 }, { "epoch": 1.8780380994088022, "grad_norm": 0.09233887484526732, "learning_rate": 2.2407037629968431e-07, "loss": 0.3101, "step": 3216 }, { "epoch": 1.8786219983942778, "grad_norm": 0.09101848024759458, "learning_rate": 2.219290183057865e-07, "loss": 0.3406, "step": 3217 }, { "epoch": 1.8792058973797534, "grad_norm": 0.08707332392927268, "learning_rate": 2.1979782676581408e-07, "loss": 0.2823, "step": 3218 }, { "epoch": 1.8797897963652288, "grad_norm": 0.08574652341261767, "learning_rate": 2.1767680389560785e-07, "loss": 0.3078, "step": 3219 }, { "epoch": 1.8803736953507042, "grad_norm": 0.08913663581770685, "learning_rate": 2.1556595190043718e-07, "loss": 0.3285, "step": 3220 }, { "epoch": 1.8809575943361798, "grad_norm": 0.08148407354317028, "learning_rate": 2.1346527297499752e-07, "loss": 0.2684, "step": 3221 }, { "epoch": 1.8815414933216554, "grad_norm": 0.0886585664423024, "learning_rate": 2.1137476930340628e-07, "loss": 0.3221, "step": 3222 }, { "epoch": 1.8821253923071308, "grad_norm": 0.08504206100105413, "learning_rate": 2.0929444305920142e-07, "loss": 0.2999, "step": 3223 }, { "epoch": 1.8827092912926062, "grad_norm": 0.08482415835940113, "learning_rate": 2.0722429640533948e-07, "loss": 0.2803, "step": 3224 }, { "epoch": 1.8832931902780818, "grad_norm": 0.09359696465800074, "learning_rate": 2.051643314941909e-07, "loss": 0.2846, "step": 3225 }, { "epoch": 1.8838770892635575, "grad_norm": 0.09090901172024897, "learning_rate": 2.0311455046754581e-07, "loss": 0.3143, "step": 3226 }, { "epoch": 1.884460988249033, "grad_norm": 0.09231620863481034, "learning_rate": 2.0107495545659829e-07, "loss": 0.3085, "step": 3227 }, { "epoch": 1.8850448872345085, "grad_norm": 0.08557691916049623, "learning_rate": 1.990455485819587e-07, "loss": 0.328, "step": 3228 }, { "epoch": 1.8856287862199839, "grad_norm": 0.08967802607356805, "learning_rate": 1.9702633195363918e-07, "loss": 0.2931, "step": 3229 }, { "epoch": 1.8862126852054595, "grad_norm": 0.08984775322351805, "learning_rate": 1.9501730767106043e-07, "loss": 0.2887, "step": 3230 }, { "epoch": 1.886796584190935, "grad_norm": 0.084327084925075, "learning_rate": 1.9301847782304484e-07, "loss": 0.2983, "step": 3231 }, { "epoch": 1.8873804831764105, "grad_norm": 0.087843880604157, "learning_rate": 1.9102984448781337e-07, "loss": 0.3198, "step": 3232 }, { "epoch": 1.887964382161886, "grad_norm": 0.08420403460726374, "learning_rate": 1.8905140973299096e-07, "loss": 0.2801, "step": 3233 }, { "epoch": 1.8885482811473615, "grad_norm": 0.0856446334072406, "learning_rate": 1.870831756155933e-07, "loss": 0.3357, "step": 3234 }, { "epoch": 1.8891321801328371, "grad_norm": 0.08706567364922672, "learning_rate": 1.851251441820323e-07, "loss": 0.2643, "step": 3235 }, { "epoch": 1.8897160791183125, "grad_norm": 0.0840994581670595, "learning_rate": 1.8317731746811285e-07, "loss": 0.3048, "step": 3236 }, { "epoch": 1.890299978103788, "grad_norm": 0.08929142405897295, "learning_rate": 1.8123969749902714e-07, "loss": 0.331, "step": 3237 }, { "epoch": 1.8908838770892635, "grad_norm": 0.09214260950933911, "learning_rate": 1.7931228628935926e-07, "loss": 0.3226, "step": 3238 }, { "epoch": 1.8914677760747391, "grad_norm": 0.08952440201598354, "learning_rate": 1.773950858430762e-07, "loss": 0.3034, "step": 3239 }, { "epoch": 1.8920516750602145, "grad_norm": 0.08575494474170184, "learning_rate": 1.7548809815352785e-07, "loss": 0.2988, "step": 3240 }, { "epoch": 1.89263557404569, "grad_norm": 0.08967776053556868, "learning_rate": 1.7359132520344823e-07, "loss": 0.2944, "step": 3241 }, { "epoch": 1.8932194730311656, "grad_norm": 0.0905217245687232, "learning_rate": 1.717047689649487e-07, "loss": 0.2889, "step": 3242 }, { "epoch": 1.8938033720166412, "grad_norm": 0.08798346112449493, "learning_rate": 1.6982843139952022e-07, "loss": 0.2763, "step": 3243 }, { "epoch": 1.8943872710021168, "grad_norm": 0.08845869514860859, "learning_rate": 1.6796231445802892e-07, "loss": 0.2885, "step": 3244 }, { "epoch": 1.8949711699875922, "grad_norm": 0.0801173337726863, "learning_rate": 1.6610642008071166e-07, "loss": 0.2777, "step": 3245 }, { "epoch": 1.8955550689730676, "grad_norm": 0.08611777162238558, "learning_rate": 1.6426075019717935e-07, "loss": 0.3035, "step": 3246 }, { "epoch": 1.8961389679585432, "grad_norm": 0.08809922624531498, "learning_rate": 1.6242530672641143e-07, "loss": 0.2718, "step": 3247 }, { "epoch": 1.8967228669440188, "grad_norm": 0.08778870554924931, "learning_rate": 1.6060009157675472e-07, "loss": 0.3098, "step": 3248 }, { "epoch": 1.8973067659294942, "grad_norm": 0.08737386696733666, "learning_rate": 1.5878510664592116e-07, "loss": 0.2676, "step": 3249 }, { "epoch": 1.8978906649149696, "grad_norm": 0.08822833363471408, "learning_rate": 1.5698035382098687e-07, "loss": 0.2979, "step": 3250 }, { "epoch": 1.8984745639004452, "grad_norm": 0.08395711236802691, "learning_rate": 1.551858349783908e-07, "loss": 0.2729, "step": 3251 }, { "epoch": 1.8990584628859208, "grad_norm": 0.08473049457002894, "learning_rate": 1.5340155198392716e-07, "loss": 0.3039, "step": 3252 }, { "epoch": 1.8996423618713962, "grad_norm": 0.08374799363179396, "learning_rate": 1.5162750669274973e-07, "loss": 0.3058, "step": 3253 }, { "epoch": 1.9002262608568716, "grad_norm": 0.08593706081433256, "learning_rate": 1.4986370094937197e-07, "loss": 0.2909, "step": 3254 }, { "epoch": 1.9008101598423472, "grad_norm": 0.08041649434731642, "learning_rate": 1.4811013658765471e-07, "loss": 0.2799, "step": 3255 }, { "epoch": 1.9013940588278229, "grad_norm": 0.08125609306647201, "learning_rate": 1.46366815430814e-07, "loss": 0.2767, "step": 3256 }, { "epoch": 1.9019779578132983, "grad_norm": 0.09375353365454038, "learning_rate": 1.4463373929141766e-07, "loss": 0.3451, "step": 3257 }, { "epoch": 1.9025618567987737, "grad_norm": 0.08856147042232591, "learning_rate": 1.4291090997137547e-07, "loss": 0.2688, "step": 3258 }, { "epoch": 1.9031457557842493, "grad_norm": 0.08280965435879103, "learning_rate": 1.411983292619501e-07, "loss": 0.2484, "step": 3259 }, { "epoch": 1.9037296547697249, "grad_norm": 0.08087945643021759, "learning_rate": 1.3949599894374276e-07, "loss": 0.2816, "step": 3260 }, { "epoch": 1.9043135537552005, "grad_norm": 0.08801687802889809, "learning_rate": 1.3780392078670436e-07, "loss": 0.2955, "step": 3261 }, { "epoch": 1.904897452740676, "grad_norm": 0.08651643716513796, "learning_rate": 1.3612209655011866e-07, "loss": 0.2961, "step": 3262 }, { "epoch": 1.9054813517261513, "grad_norm": 0.08692799950530637, "learning_rate": 1.3445052798261137e-07, "loss": 0.3109, "step": 3263 }, { "epoch": 1.906065250711627, "grad_norm": 0.09346576979915062, "learning_rate": 1.3278921682214784e-07, "loss": 0.3268, "step": 3264 }, { "epoch": 1.9066491496971025, "grad_norm": 0.08225994338498523, "learning_rate": 1.3113816479602304e-07, "loss": 0.2879, "step": 3265 }, { "epoch": 1.907233048682578, "grad_norm": 0.0787449503179459, "learning_rate": 1.2949737362087156e-07, "loss": 0.2621, "step": 3266 }, { "epoch": 1.9078169476680533, "grad_norm": 0.08943117462885233, "learning_rate": 1.2786684500265546e-07, "loss": 0.3044, "step": 3267 }, { "epoch": 1.908400846653529, "grad_norm": 0.08684387836671656, "learning_rate": 1.262465806366664e-07, "loss": 0.2778, "step": 3268 }, { "epoch": 1.9089847456390046, "grad_norm": 0.08962391428894087, "learning_rate": 1.2463658220752683e-07, "loss": 0.2978, "step": 3269 }, { "epoch": 1.90956864462448, "grad_norm": 0.08668838859592846, "learning_rate": 1.230368513891822e-07, "loss": 0.3127, "step": 3270 }, { "epoch": 1.9101525436099553, "grad_norm": 0.08802568448704766, "learning_rate": 1.2144738984490533e-07, "loss": 0.2724, "step": 3271 }, { "epoch": 1.910736442595431, "grad_norm": 0.09058583762416003, "learning_rate": 1.1986819922729209e-07, "loss": 0.3037, "step": 3272 }, { "epoch": 1.9113203415809066, "grad_norm": 0.0810954665176793, "learning_rate": 1.1829928117825685e-07, "loss": 0.3, "step": 3273 }, { "epoch": 1.911904240566382, "grad_norm": 0.09066577386704298, "learning_rate": 1.1674063732903473e-07, "loss": 0.3053, "step": 3274 }, { "epoch": 1.9124881395518574, "grad_norm": 0.07614381639164666, "learning_rate": 1.1519226930017946e-07, "loss": 0.2386, "step": 3275 }, { "epoch": 1.913072038537333, "grad_norm": 0.09191819868655002, "learning_rate": 1.1365417870155881e-07, "loss": 0.325, "step": 3276 }, { "epoch": 1.9136559375228086, "grad_norm": 0.08988675625902402, "learning_rate": 1.1212636713235581e-07, "loss": 0.3016, "step": 3277 }, { "epoch": 1.9142398365082842, "grad_norm": 0.08604908631826888, "learning_rate": 1.1060883618106754e-07, "loss": 0.2973, "step": 3278 }, { "epoch": 1.9148237354937596, "grad_norm": 0.08546342209672117, "learning_rate": 1.0910158742550081e-07, "loss": 0.2894, "step": 3279 }, { "epoch": 1.915407634479235, "grad_norm": 0.08839708440543058, "learning_rate": 1.0760462243277204e-07, "loss": 0.3094, "step": 3280 }, { "epoch": 1.9159915334647106, "grad_norm": 0.08364180918908759, "learning_rate": 1.0611794275930398e-07, "loss": 0.2778, "step": 3281 }, { "epoch": 1.9165754324501862, "grad_norm": 0.08484096173957062, "learning_rate": 1.0464154995082909e-07, "loss": 0.2815, "step": 3282 }, { "epoch": 1.9171593314356616, "grad_norm": 0.08720387970814143, "learning_rate": 1.0317544554238058e-07, "loss": 0.2703, "step": 3283 }, { "epoch": 1.917743230421137, "grad_norm": 0.0875088214095832, "learning_rate": 1.0171963105829686e-07, "loss": 0.3293, "step": 3284 }, { "epoch": 1.9183271294066127, "grad_norm": 0.08658879451123655, "learning_rate": 1.0027410801221604e-07, "loss": 0.2928, "step": 3285 }, { "epoch": 1.9189110283920883, "grad_norm": 0.08606385025251614, "learning_rate": 9.883887790707814e-08, "loss": 0.3002, "step": 3286 }, { "epoch": 1.9194949273775637, "grad_norm": 0.09103801436682388, "learning_rate": 9.741394223512057e-08, "loss": 0.3278, "step": 3287 }, { "epoch": 1.920078826363039, "grad_norm": 0.09278706809997461, "learning_rate": 9.599930247787604e-08, "loss": 0.3046, "step": 3288 }, { "epoch": 1.9206627253485147, "grad_norm": 0.08790032190619898, "learning_rate": 9.459496010617464e-08, "loss": 0.321, "step": 3289 }, { "epoch": 1.9212466243339903, "grad_norm": 0.08771037432024043, "learning_rate": 9.320091658013841e-08, "loss": 0.2985, "step": 3290 }, { "epoch": 1.9218305233194657, "grad_norm": 0.0815763801142426, "learning_rate": 9.181717334918127e-08, "loss": 0.2738, "step": 3291 }, { "epoch": 1.922414422304941, "grad_norm": 0.09189762229267717, "learning_rate": 9.044373185200906e-08, "loss": 0.3186, "step": 3292 }, { "epoch": 1.9229983212904167, "grad_norm": 0.08175340394963908, "learning_rate": 8.908059351661725e-08, "loss": 0.2505, "step": 3293 }, { "epoch": 1.9235822202758923, "grad_norm": 0.0892198232076409, "learning_rate": 8.772775976028547e-08, "loss": 0.3007, "step": 3294 }, { "epoch": 1.924166119261368, "grad_norm": 0.08038418668350802, "learning_rate": 8.638523198958415e-08, "loss": 0.3113, "step": 3295 }, { "epoch": 1.9247500182468433, "grad_norm": 0.08457482903640633, "learning_rate": 8.505301160036339e-08, "loss": 0.27, "step": 3296 }, { "epoch": 1.9253339172323187, "grad_norm": 0.09187618281492985, "learning_rate": 8.373109997776185e-08, "loss": 0.2715, "step": 3297 }, { "epoch": 1.9259178162177943, "grad_norm": 0.08606832282058195, "learning_rate": 8.241949849619457e-08, "loss": 0.284, "step": 3298 }, { "epoch": 1.92650171520327, "grad_norm": 0.09634213886961326, "learning_rate": 8.11182085193607e-08, "loss": 0.359, "step": 3299 }, { "epoch": 1.9270856141887454, "grad_norm": 0.0850179988153824, "learning_rate": 7.982723140023906e-08, "loss": 0.262, "step": 3300 }, { "epoch": 1.9276695131742208, "grad_norm": 0.08280547960994977, "learning_rate": 7.854656848108044e-08, "loss": 0.3079, "step": 3301 }, { "epoch": 1.9282534121596964, "grad_norm": 0.08693320473453917, "learning_rate": 7.727622109341859e-08, "loss": 0.3018, "step": 3302 }, { "epoch": 1.928837311145172, "grad_norm": 0.08817154311269668, "learning_rate": 7.601619055805697e-08, "loss": 0.3026, "step": 3303 }, { "epoch": 1.9294212101306474, "grad_norm": 0.0930707679886058, "learning_rate": 7.476647818507542e-08, "loss": 0.3268, "step": 3304 }, { "epoch": 1.9300051091161228, "grad_norm": 0.07716650572211788, "learning_rate": 7.352708527382346e-08, "loss": 0.249, "step": 3305 }, { "epoch": 1.9305890081015984, "grad_norm": 0.08663374366585758, "learning_rate": 7.229801311292361e-08, "loss": 0.3117, "step": 3306 }, { "epoch": 1.931172907087074, "grad_norm": 0.08446434701252649, "learning_rate": 7.10792629802659e-08, "loss": 0.2686, "step": 3307 }, { "epoch": 1.9317568060725494, "grad_norm": 0.09667728570377489, "learning_rate": 6.987083614300893e-08, "loss": 0.3428, "step": 3308 }, { "epoch": 1.9323407050580248, "grad_norm": 0.08662023711036862, "learning_rate": 6.867273385757767e-08, "loss": 0.2953, "step": 3309 }, { "epoch": 1.9329246040435004, "grad_norm": 0.09403191520378865, "learning_rate": 6.748495736966454e-08, "loss": 0.3383, "step": 3310 }, { "epoch": 1.933508503028976, "grad_norm": 0.09031175202278693, "learning_rate": 6.630750791422169e-08, "loss": 0.3004, "step": 3311 }, { "epoch": 1.9340924020144517, "grad_norm": 0.09128803234962121, "learning_rate": 6.514038671546874e-08, "loss": 0.2937, "step": 3312 }, { "epoch": 1.934676300999927, "grad_norm": 0.08855321011241368, "learning_rate": 6.398359498688278e-08, "loss": 0.3059, "step": 3313 }, { "epoch": 1.9352601999854024, "grad_norm": 0.07319036433295645, "learning_rate": 6.283713393120505e-08, "loss": 0.2508, "step": 3314 }, { "epoch": 1.935844098970878, "grad_norm": 0.08427057819378772, "learning_rate": 6.170100474043206e-08, "loss": 0.2968, "step": 3315 }, { "epoch": 1.9364279979563537, "grad_norm": 0.08748403927170163, "learning_rate": 6.05752085958211e-08, "loss": 0.3103, "step": 3316 }, { "epoch": 1.937011896941829, "grad_norm": 0.07793085951399464, "learning_rate": 5.945974666788479e-08, "loss": 0.2823, "step": 3317 }, { "epoch": 1.9375957959273045, "grad_norm": 0.07856986016663299, "learning_rate": 5.835462011638982e-08, "loss": 0.2744, "step": 3318 }, { "epoch": 1.93817969491278, "grad_norm": 0.0812087495934627, "learning_rate": 5.725983009035818e-08, "loss": 0.2993, "step": 3319 }, { "epoch": 1.9387635938982557, "grad_norm": 0.07987851599431385, "learning_rate": 5.617537772806603e-08, "loss": 0.2768, "step": 3320 }, { "epoch": 1.939347492883731, "grad_norm": 0.08171925190601233, "learning_rate": 5.5101264157039203e-08, "loss": 0.2643, "step": 3321 }, { "epoch": 1.9399313918692065, "grad_norm": 0.08722143581664975, "learning_rate": 5.403749049405438e-08, "loss": 0.3146, "step": 3322 }, { "epoch": 1.940515290854682, "grad_norm": 0.0816839274871359, "learning_rate": 5.298405784513905e-08, "loss": 0.2687, "step": 3323 }, { "epoch": 1.9410991898401577, "grad_norm": 0.08897097616839965, "learning_rate": 5.194096730556708e-08, "loss": 0.332, "step": 3324 }, { "epoch": 1.9416830888256331, "grad_norm": 0.07892603470673419, "learning_rate": 5.090821995986095e-08, "loss": 0.2658, "step": 3325 }, { "epoch": 1.9422669878111085, "grad_norm": 0.08444753478305228, "learning_rate": 4.9885816881787287e-08, "loss": 0.277, "step": 3326 }, { "epoch": 1.9428508867965841, "grad_norm": 0.08284116376833874, "learning_rate": 4.887375913436132e-08, "loss": 0.2912, "step": 3327 }, { "epoch": 1.9434347857820597, "grad_norm": 0.08063810487965671, "learning_rate": 4.787204776983689e-08, "loss": 0.2864, "step": 3328 }, { "epoch": 1.9440186847675354, "grad_norm": 0.08198011752555578, "learning_rate": 4.6880683829715335e-08, "loss": 0.2954, "step": 3329 }, { "epoch": 1.9446025837530108, "grad_norm": 0.0812784767051932, "learning_rate": 4.589966834473547e-08, "loss": 0.2834, "step": 3330 }, { "epoch": 1.9451864827384862, "grad_norm": 0.08859830337645863, "learning_rate": 4.492900233488029e-08, "loss": 0.273, "step": 3331 }, { "epoch": 1.9457703817239618, "grad_norm": 0.09023265182485751, "learning_rate": 4.3968686809369165e-08, "loss": 0.3008, "step": 3332 }, { "epoch": 1.9463542807094374, "grad_norm": 0.08346588924649834, "learning_rate": 4.3018722766661194e-08, "loss": 0.3133, "step": 3333 }, { "epoch": 1.9469381796949128, "grad_norm": 0.0852693665594825, "learning_rate": 4.207911119445296e-08, "loss": 0.283, "step": 3334 }, { "epoch": 1.9475220786803882, "grad_norm": 0.09375244820851326, "learning_rate": 4.114985306967745e-08, "loss": 0.3011, "step": 3335 }, { "epoch": 1.9481059776658638, "grad_norm": 0.08086734346936102, "learning_rate": 4.0230949358502915e-08, "loss": 0.244, "step": 3336 }, { "epoch": 1.9486898766513394, "grad_norm": 0.08966204587251964, "learning_rate": 3.932240101633178e-08, "loss": 0.3035, "step": 3337 }, { "epoch": 1.9492737756368148, "grad_norm": 0.0820947672024592, "learning_rate": 3.8424208987798415e-08, "loss": 0.2506, "step": 3338 }, { "epoch": 1.9498576746222902, "grad_norm": 0.08745029820720411, "learning_rate": 3.7536374206772475e-08, "loss": 0.2957, "step": 3339 }, { "epoch": 1.9504415736077658, "grad_norm": 0.07592197176694622, "learning_rate": 3.665889759635222e-08, "loss": 0.2679, "step": 3340 }, { "epoch": 1.9510254725932414, "grad_norm": 0.08381779846251648, "learning_rate": 3.579178006886896e-08, "loss": 0.2796, "step": 3341 }, { "epoch": 1.9516093715787168, "grad_norm": 0.09144443474024891, "learning_rate": 3.4935022525880434e-08, "loss": 0.3543, "step": 3342 }, { "epoch": 1.9521932705641922, "grad_norm": 0.08370415927490507, "learning_rate": 3.4088625858174075e-08, "loss": 0.2906, "step": 3343 }, { "epoch": 1.9527771695496678, "grad_norm": 0.08251351626203816, "learning_rate": 3.3252590945767047e-08, "loss": 0.269, "step": 3344 }, { "epoch": 1.9533610685351435, "grad_norm": 0.08490558966956412, "learning_rate": 3.242691865790071e-08, "loss": 0.3046, "step": 3345 }, { "epoch": 1.953944967520619, "grad_norm": 0.09522094824516569, "learning_rate": 3.161160985304168e-08, "loss": 0.3157, "step": 3346 }, { "epoch": 1.9545288665060945, "grad_norm": 0.08793191476819211, "learning_rate": 3.0806665378884106e-08, "loss": 0.3137, "step": 3347 }, { "epoch": 1.9551127654915699, "grad_norm": 0.09295999699632594, "learning_rate": 3.001208607234407e-08, "loss": 0.304, "step": 3348 }, { "epoch": 1.9556966644770455, "grad_norm": 0.09018496804781562, "learning_rate": 2.922787275956074e-08, "loss": 0.2865, "step": 3349 }, { "epoch": 1.956280563462521, "grad_norm": 0.08810329642889063, "learning_rate": 2.845402625589855e-08, "loss": 0.3001, "step": 3350 }, { "epoch": 1.9568644624479965, "grad_norm": 0.08281537325100105, "learning_rate": 2.7690547365938348e-08, "loss": 0.2658, "step": 3351 }, { "epoch": 1.957448361433472, "grad_norm": 0.08057399100423632, "learning_rate": 2.693743688348627e-08, "loss": 0.3024, "step": 3352 }, { "epoch": 1.9580322604189475, "grad_norm": 0.08071064326211577, "learning_rate": 2.6194695591563733e-08, "loss": 0.2811, "step": 3353 }, { "epoch": 1.9586161594044231, "grad_norm": 0.08331854143172743, "learning_rate": 2.546232426241635e-08, "loss": 0.2925, "step": 3354 }, { "epoch": 1.9592000583898985, "grad_norm": 0.08260383535945381, "learning_rate": 2.4740323657503895e-08, "loss": 0.299, "step": 3355 }, { "epoch": 1.959783957375374, "grad_norm": 0.09145270369339253, "learning_rate": 2.4028694527503673e-08, "loss": 0.3097, "step": 3356 }, { "epoch": 1.9603678563608495, "grad_norm": 0.08862812637691465, "learning_rate": 2.33274376123116e-08, "loss": 0.3211, "step": 3357 }, { "epoch": 1.9609517553463252, "grad_norm": 0.08110813890142171, "learning_rate": 2.2636553641040003e-08, "loss": 0.2927, "step": 3358 }, { "epoch": 1.9615356543318005, "grad_norm": 0.09288035661326452, "learning_rate": 2.1956043332010957e-08, "loss": 0.3259, "step": 3359 }, { "epoch": 1.9621195533172762, "grad_norm": 0.08016277183569406, "learning_rate": 2.1285907392767367e-08, "loss": 0.2601, "step": 3360 }, { "epoch": 1.9627034523027516, "grad_norm": 0.09564728507442284, "learning_rate": 2.0626146520061897e-08, "loss": 0.3095, "step": 3361 }, { "epoch": 1.9632873512882272, "grad_norm": 0.08759967642518818, "learning_rate": 1.997676139986138e-08, "loss": 0.2892, "step": 3362 }, { "epoch": 1.9638712502737028, "grad_norm": 0.084618402157927, "learning_rate": 1.9337752707343504e-08, "loss": 0.308, "step": 3363 }, { "epoch": 1.9644551492591782, "grad_norm": 0.08480581909486945, "learning_rate": 1.8709121106899043e-08, "loss": 0.2817, "step": 3364 }, { "epoch": 1.9650390482446536, "grad_norm": 0.09491895702308079, "learning_rate": 1.8090867252127387e-08, "loss": 0.3004, "step": 3365 }, { "epoch": 1.9656229472301292, "grad_norm": 0.09045587795379914, "learning_rate": 1.748299178584101e-08, "loss": 0.2738, "step": 3366 }, { "epoch": 1.9662068462156048, "grad_norm": 0.09150045558538782, "learning_rate": 1.68854953400599e-08, "loss": 0.2947, "step": 3367 }, { "epoch": 1.9667907452010802, "grad_norm": 0.09106546059813658, "learning_rate": 1.6298378536012682e-08, "loss": 0.3136, "step": 3368 }, { "epoch": 1.9673746441865556, "grad_norm": 0.08388452592094156, "learning_rate": 1.5721641984135505e-08, "loss": 0.2717, "step": 3369 }, { "epoch": 1.9679585431720312, "grad_norm": 0.08731255016142334, "learning_rate": 1.5155286284073146e-08, "loss": 0.2799, "step": 3370 }, { "epoch": 1.9685424421575068, "grad_norm": 0.09337077850984606, "learning_rate": 1.4599312024676792e-08, "loss": 0.3178, "step": 3371 }, { "epoch": 1.9691263411429822, "grad_norm": 0.08741358843633228, "learning_rate": 1.405371978400516e-08, "loss": 0.3041, "step": 3372 }, { "epoch": 1.9697102401284576, "grad_norm": 0.08885991998196908, "learning_rate": 1.351851012931893e-08, "loss": 0.3168, "step": 3373 }, { "epoch": 1.9702941391139333, "grad_norm": 0.09074075378417183, "learning_rate": 1.2993683617088526e-08, "loss": 0.3016, "step": 3374 }, { "epoch": 1.9708780380994089, "grad_norm": 0.09197551190775351, "learning_rate": 1.2479240792985237e-08, "loss": 0.3122, "step": 3375 }, { "epoch": 1.9714619370848843, "grad_norm": 0.11020973903739906, "learning_rate": 1.1975182191885648e-08, "loss": 0.3137, "step": 3376 }, { "epoch": 1.9720458360703599, "grad_norm": 0.08391582357736549, "learning_rate": 1.1481508337869429e-08, "loss": 0.276, "step": 3377 }, { "epoch": 1.9726297350558353, "grad_norm": 0.09736187311909869, "learning_rate": 1.099821974421933e-08, "loss": 0.3674, "step": 3378 }, { "epoch": 1.973213634041311, "grad_norm": 0.08582512026385335, "learning_rate": 1.0525316913420069e-08, "loss": 0.2978, "step": 3379 }, { "epoch": 1.9737975330267865, "grad_norm": 0.08972426310358984, "learning_rate": 1.006280033715723e-08, "loss": 0.3098, "step": 3380 }, { "epoch": 1.974381432012262, "grad_norm": 0.08278422460083916, "learning_rate": 9.610670496319475e-09, "loss": 0.2655, "step": 3381 }, { "epoch": 1.9749653309977373, "grad_norm": 0.08833809371072114, "learning_rate": 9.168927860994104e-09, "loss": 0.2924, "step": 3382 }, { "epoch": 1.975549229983213, "grad_norm": 0.09481713124600741, "learning_rate": 8.737572890470391e-09, "loss": 0.3355, "step": 3383 }, { "epoch": 1.9761331289686885, "grad_norm": 0.08552422230691882, "learning_rate": 8.316606033237362e-09, "loss": 0.3035, "step": 3384 }, { "epoch": 1.976717027954164, "grad_norm": 0.08799352885038914, "learning_rate": 7.906027726981568e-09, "loss": 0.286, "step": 3385 }, { "epoch": 1.9773009269396393, "grad_norm": 0.09176851355836813, "learning_rate": 7.505838398589316e-09, "loss": 0.328, "step": 3386 }, { "epoch": 1.977884825925115, "grad_norm": 0.09152816340723528, "learning_rate": 7.1160384641455475e-09, "loss": 0.29, "step": 3387 }, { "epoch": 1.9784687249105906, "grad_norm": 0.09309554602241768, "learning_rate": 6.736628328933847e-09, "loss": 0.3213, "step": 3388 }, { "epoch": 1.979052623896066, "grad_norm": 0.08399577481739949, "learning_rate": 6.367608387433111e-09, "loss": 0.3006, "step": 3389 }, { "epoch": 1.9796365228815413, "grad_norm": 0.08863314241315834, "learning_rate": 6.008979023320871e-09, "loss": 0.3171, "step": 3390 }, { "epoch": 1.980220421867017, "grad_norm": 0.0874935576639428, "learning_rate": 5.660740609472193e-09, "loss": 0.2905, "step": 3391 }, { "epoch": 1.9808043208524926, "grad_norm": 0.08056415914929047, "learning_rate": 5.322893507956339e-09, "loss": 0.293, "step": 3392 }, { "epoch": 1.981388219837968, "grad_norm": 0.08322821925670676, "learning_rate": 4.995438070041214e-09, "loss": 0.3481, "step": 3393 }, { "epoch": 1.9819721188234436, "grad_norm": 0.09329024298652523, "learning_rate": 4.6783746361867e-09, "loss": 0.3477, "step": 3394 }, { "epoch": 1.982556017808919, "grad_norm": 0.09270724815784975, "learning_rate": 4.3717035360502094e-09, "loss": 0.3469, "step": 3395 }, { "epoch": 1.9831399167943946, "grad_norm": 0.08804130217429616, "learning_rate": 4.075425088485574e-09, "loss": 0.3123, "step": 3396 }, { "epoch": 1.9837238157798702, "grad_norm": 0.07889121424849459, "learning_rate": 3.7895396015374955e-09, "loss": 0.2619, "step": 3397 }, { "epoch": 1.9843077147653456, "grad_norm": 0.08357270953542875, "learning_rate": 3.514047372448204e-09, "loss": 0.2754, "step": 3398 }, { "epoch": 1.984891613750821, "grad_norm": 0.09267724838351254, "learning_rate": 3.248948687650799e-09, "loss": 0.3422, "step": 3399 }, { "epoch": 1.9854755127362966, "grad_norm": 0.08185615174420331, "learning_rate": 2.9942438227748004e-09, "loss": 0.2684, "step": 3400 }, { "epoch": 1.9860594117217722, "grad_norm": 0.0895496073429692, "learning_rate": 2.749933042641706e-09, "loss": 0.2913, "step": 3401 }, { "epoch": 1.9866433107072476, "grad_norm": 0.08600775070825833, "learning_rate": 2.5160166012661024e-09, "loss": 0.275, "step": 3402 }, { "epoch": 1.987227209692723, "grad_norm": 0.08769498907450922, "learning_rate": 2.2924947418556666e-09, "loss": 0.3131, "step": 3403 }, { "epoch": 1.9878111086781987, "grad_norm": 0.08506318764452973, "learning_rate": 2.079367696810053e-09, "loss": 0.2905, "step": 3404 }, { "epoch": 1.9883950076636743, "grad_norm": 0.09017890716894336, "learning_rate": 1.876635687722006e-09, "loss": 0.2964, "step": 3405 }, { "epoch": 1.9889789066491497, "grad_norm": 0.08328328421553303, "learning_rate": 1.684298925377359e-09, "loss": 0.2948, "step": 3406 }, { "epoch": 1.989562805634625, "grad_norm": 0.08796096503056458, "learning_rate": 1.502357609749483e-09, "loss": 0.3176, "step": 3407 }, { "epoch": 1.9901467046201007, "grad_norm": 0.08471851593960288, "learning_rate": 1.3308119300092793e-09, "loss": 0.3249, "step": 3408 }, { "epoch": 1.9907306036055763, "grad_norm": 0.09241535912135769, "learning_rate": 1.1696620645140765e-09, "loss": 0.3418, "step": 3409 }, { "epoch": 1.9913145025910517, "grad_norm": 0.09338575168942034, "learning_rate": 1.0189081808154033e-09, "loss": 0.3579, "step": 3410 }, { "epoch": 1.9918984015765273, "grad_norm": 0.08742678050235152, "learning_rate": 8.785504356556562e-10, "loss": 0.283, "step": 3411 }, { "epoch": 1.9924823005620027, "grad_norm": 0.08303387306273124, "learning_rate": 7.485889749658803e-10, "loss": 0.2588, "step": 3412 }, { "epoch": 1.9930661995474783, "grad_norm": 0.0899971202095634, "learning_rate": 6.290239338724302e-10, "loss": 0.2986, "step": 3413 }, { "epoch": 1.993650098532954, "grad_norm": 0.079973695924508, "learning_rate": 5.198554366858676e-10, "loss": 0.2814, "step": 3414 }, { "epoch": 1.9942339975184293, "grad_norm": 0.08953449529991303, "learning_rate": 4.210835969142846e-10, "loss": 0.3133, "step": 3415 }, { "epoch": 1.9948178965039047, "grad_norm": 0.08946985626287382, "learning_rate": 3.3270851724998e-10, "loss": 0.3241, "step": 3416 }, { "epoch": 1.9954017954893803, "grad_norm": 0.08904539077695642, "learning_rate": 2.5473028957945234e-10, "loss": 0.3377, "step": 3417 }, { "epoch": 1.995985694474856, "grad_norm": 0.08275310010677074, "learning_rate": 1.8714899497895845e-10, "loss": 0.3058, "step": 3418 }, { "epoch": 1.9965695934603314, "grad_norm": 0.07914809998161648, "learning_rate": 1.2996470371229307e-10, "loss": 0.2663, "step": 3419 }, { "epoch": 1.9971534924458068, "grad_norm": 0.08895854623710063, "learning_rate": 8.317747523745035e-11, "loss": 0.3062, "step": 3420 }, { "epoch": 1.9977373914312824, "grad_norm": 0.08170588458751106, "learning_rate": 4.678735819774183e-11, "loss": 0.2995, "step": 3421 }, { "epoch": 1.998321290416758, "grad_norm": 0.08999303355829043, "learning_rate": 2.0794390429568212e-11, "loss": 0.3157, "step": 3422 }, { "epoch": 1.9989051894022334, "grad_norm": 0.08665206121739326, "learning_rate": 5.198598959088586e-12, "loss": 0.2915, "step": 3423 }, { "epoch": 1.9994890883877088, "grad_norm": 0.08445496136501865, "learning_rate": 0.0, "loss": 0.3303, "step": 3424 }, { "epoch": 1.9994890883877088, "step": 3424, "total_flos": 316426276208640.0, "train_loss": 0.3796345431218359, "train_runtime": 12695.7608, "train_samples_per_second": 17.266, "train_steps_per_second": 0.27 } ], "logging_steps": 1, "max_steps": 3424, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 316426276208640.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }