{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010465724751439037, "grad_norm": 62.41193288917299, "learning_rate": 6.968641114982578e-08, "loss": 2.0951, "step": 1 }, { "epoch": 0.00020931449502878074, "grad_norm": 62.41976397338786, "learning_rate": 1.3937282229965157e-07, "loss": 1.9349, "step": 2 }, { "epoch": 0.0003139717425431711, "grad_norm": 86.35511116310265, "learning_rate": 2.090592334494774e-07, "loss": 2.2482, "step": 3 }, { "epoch": 0.0004186289900575615, "grad_norm": 66.12787272745858, "learning_rate": 2.7874564459930313e-07, "loss": 1.9908, "step": 4 }, { "epoch": 0.0005232862375719519, "grad_norm": 60.71641111176538, "learning_rate": 3.4843205574912896e-07, "loss": 1.9762, "step": 5 }, { "epoch": 0.0006279434850863422, "grad_norm": 56.78291196095233, "learning_rate": 4.181184668989548e-07, "loss": 1.949, "step": 6 }, { "epoch": 0.0007326007326007326, "grad_norm": 38.93880117457696, "learning_rate": 4.878048780487805e-07, "loss": 1.9651, "step": 7 }, { "epoch": 0.000837257980115123, "grad_norm": 52.09622789077548, "learning_rate": 5.574912891986063e-07, "loss": 1.8705, "step": 8 }, { "epoch": 0.0009419152276295133, "grad_norm": 49.92480355404063, "learning_rate": 6.271777003484321e-07, "loss": 1.8799, "step": 9 }, { "epoch": 0.0010465724751439038, "grad_norm": 32.20342595275959, "learning_rate": 6.968641114982579e-07, "loss": 1.6128, "step": 10 }, { "epoch": 0.001151229722658294, "grad_norm": 53.634110936725754, "learning_rate": 7.665505226480836e-07, "loss": 1.7474, "step": 11 }, { "epoch": 0.0012558869701726845, "grad_norm": 36.607148713405984, "learning_rate": 8.362369337979096e-07, "loss": 1.7438, "step": 12 }, { "epoch": 0.0013605442176870747, "grad_norm": 3.9957477431354564, "learning_rate": 9.059233449477353e-07, "loss": 0.7564, "step": 13 }, { "epoch": 0.0014652014652014652, "grad_norm": 20.58298531785394, "learning_rate": 9.75609756097561e-07, "loss": 1.5063, "step": 14 }, { "epoch": 0.0015698587127158557, "grad_norm": 15.444768444817306, "learning_rate": 1.045296167247387e-06, "loss": 1.4041, "step": 15 }, { "epoch": 0.001674515960230246, "grad_norm": 12.107828560978803, "learning_rate": 1.1149825783972125e-06, "loss": 1.3931, "step": 16 }, { "epoch": 0.0017791732077446364, "grad_norm": 10.814057953845309, "learning_rate": 1.1846689895470384e-06, "loss": 1.3134, "step": 17 }, { "epoch": 0.0018838304552590266, "grad_norm": 7.249315729621174, "learning_rate": 1.2543554006968642e-06, "loss": 1.3668, "step": 18 }, { "epoch": 0.001988487702773417, "grad_norm": 7.144445780657985, "learning_rate": 1.32404181184669e-06, "loss": 1.2966, "step": 19 }, { "epoch": 0.0020931449502878076, "grad_norm": 8.323575437600967, "learning_rate": 1.3937282229965158e-06, "loss": 1.2833, "step": 20 }, { "epoch": 0.002197802197802198, "grad_norm": 6.584038254790013, "learning_rate": 1.4634146341463414e-06, "loss": 1.3209, "step": 21 }, { "epoch": 0.002302459445316588, "grad_norm": 7.650578965105668, "learning_rate": 1.5331010452961673e-06, "loss": 1.185, "step": 22 }, { "epoch": 0.0024071166928309787, "grad_norm": 6.43808617250343, "learning_rate": 1.602787456445993e-06, "loss": 1.1619, "step": 23 }, { "epoch": 0.002511773940345369, "grad_norm": 6.450238348514814, "learning_rate": 1.6724738675958191e-06, "loss": 1.1685, "step": 24 }, { "epoch": 0.0026164311878597592, "grad_norm": 6.596585528409829, "learning_rate": 1.742160278745645e-06, "loss": 1.2266, "step": 25 }, { "epoch": 0.0027210884353741495, "grad_norm": 6.701054827351807, "learning_rate": 1.8118466898954705e-06, "loss": 1.2315, "step": 26 }, { "epoch": 0.00282574568288854, "grad_norm": 6.515865205471776, "learning_rate": 1.8815331010452964e-06, "loss": 1.2951, "step": 27 }, { "epoch": 0.0029304029304029304, "grad_norm": 5.495232722232645, "learning_rate": 1.951219512195122e-06, "loss": 1.1448, "step": 28 }, { "epoch": 0.0030350601779173206, "grad_norm": 5.290875061084644, "learning_rate": 2.020905923344948e-06, "loss": 1.1407, "step": 29 }, { "epoch": 0.0031397174254317113, "grad_norm": 5.3025777909139045, "learning_rate": 2.090592334494774e-06, "loss": 1.127, "step": 30 }, { "epoch": 0.0032443746729461016, "grad_norm": 5.245351898269604, "learning_rate": 2.1602787456445995e-06, "loss": 1.1103, "step": 31 }, { "epoch": 0.003349031920460492, "grad_norm": 5.300255475950051, "learning_rate": 2.229965156794425e-06, "loss": 1.1917, "step": 32 }, { "epoch": 0.003453689167974882, "grad_norm": 4.572317456714689, "learning_rate": 2.299651567944251e-06, "loss": 1.0874, "step": 33 }, { "epoch": 0.0035583464154892728, "grad_norm": 4.107821899876536, "learning_rate": 2.3693379790940767e-06, "loss": 1.0825, "step": 34 }, { "epoch": 0.003663003663003663, "grad_norm": 4.21060285666979, "learning_rate": 2.4390243902439027e-06, "loss": 1.2219, "step": 35 }, { "epoch": 0.0037676609105180532, "grad_norm": 4.328566900505888, "learning_rate": 2.5087108013937284e-06, "loss": 1.0502, "step": 36 }, { "epoch": 0.003872318158032444, "grad_norm": 4.291665457341644, "learning_rate": 2.578397212543554e-06, "loss": 1.1426, "step": 37 }, { "epoch": 0.003976975405546834, "grad_norm": 3.8850933278156563, "learning_rate": 2.64808362369338e-06, "loss": 1.0759, "step": 38 }, { "epoch": 0.004081632653061225, "grad_norm": 4.413531892474245, "learning_rate": 2.7177700348432056e-06, "loss": 1.1848, "step": 39 }, { "epoch": 0.004186289900575615, "grad_norm": 4.844856614218897, "learning_rate": 2.7874564459930316e-06, "loss": 1.0984, "step": 40 }, { "epoch": 0.004290947148090005, "grad_norm": 4.748816166519876, "learning_rate": 2.8571428571428573e-06, "loss": 1.0605, "step": 41 }, { "epoch": 0.004395604395604396, "grad_norm": 4.777444061979255, "learning_rate": 2.926829268292683e-06, "loss": 1.2714, "step": 42 }, { "epoch": 0.004500261643118786, "grad_norm": 4.522554555361124, "learning_rate": 2.996515679442509e-06, "loss": 1.0385, "step": 43 }, { "epoch": 0.004604918890633176, "grad_norm": 4.075973193199012, "learning_rate": 3.0662020905923345e-06, "loss": 1.0521, "step": 44 }, { "epoch": 0.004709576138147566, "grad_norm": 4.3231014366459934, "learning_rate": 3.13588850174216e-06, "loss": 1.0546, "step": 45 }, { "epoch": 0.0048142333856619575, "grad_norm": 5.680111667722333, "learning_rate": 3.205574912891986e-06, "loss": 1.0781, "step": 46 }, { "epoch": 0.004918890633176348, "grad_norm": 4.1612873974328854, "learning_rate": 3.2752613240418118e-06, "loss": 1.027, "step": 47 }, { "epoch": 0.005023547880690738, "grad_norm": 4.992758604601489, "learning_rate": 3.3449477351916382e-06, "loss": 1.1227, "step": 48 }, { "epoch": 0.005128205128205128, "grad_norm": 4.226754919676679, "learning_rate": 3.414634146341464e-06, "loss": 1.1298, "step": 49 }, { "epoch": 0.0052328623757195184, "grad_norm": 5.514671205059567, "learning_rate": 3.48432055749129e-06, "loss": 1.1269, "step": 50 }, { "epoch": 0.005337519623233909, "grad_norm": 4.351259557452271, "learning_rate": 3.5540069686411155e-06, "loss": 1.0444, "step": 51 }, { "epoch": 0.005442176870748299, "grad_norm": 5.399366279585442, "learning_rate": 3.623693379790941e-06, "loss": 1.1222, "step": 52 }, { "epoch": 0.00554683411826269, "grad_norm": 4.028877965786529, "learning_rate": 3.693379790940767e-06, "loss": 1.1525, "step": 53 }, { "epoch": 0.00565149136577708, "grad_norm": 4.683154305540764, "learning_rate": 3.7630662020905927e-06, "loss": 0.9988, "step": 54 }, { "epoch": 0.0057561486132914706, "grad_norm": 3.937666148096688, "learning_rate": 3.832752613240418e-06, "loss": 1.0655, "step": 55 }, { "epoch": 0.005860805860805861, "grad_norm": 4.766869445012149, "learning_rate": 3.902439024390244e-06, "loss": 0.7926, "step": 56 }, { "epoch": 0.005965463108320251, "grad_norm": 4.319598323365691, "learning_rate": 3.97212543554007e-06, "loss": 1.0314, "step": 57 }, { "epoch": 0.006070120355834641, "grad_norm": 4.576774250115059, "learning_rate": 4.041811846689896e-06, "loss": 1.1618, "step": 58 }, { "epoch": 0.0061747776033490315, "grad_norm": 5.167500660632476, "learning_rate": 4.111498257839722e-06, "loss": 1.0677, "step": 59 }, { "epoch": 0.006279434850863423, "grad_norm": 4.38060785735574, "learning_rate": 4.181184668989548e-06, "loss": 1.0157, "step": 60 }, { "epoch": 0.006384092098377813, "grad_norm": 4.893113671421494, "learning_rate": 4.250871080139373e-06, "loss": 1.1794, "step": 61 }, { "epoch": 0.006488749345892203, "grad_norm": 4.849041572456957, "learning_rate": 4.320557491289199e-06, "loss": 1.046, "step": 62 }, { "epoch": 0.006593406593406593, "grad_norm": 3.991277540603915, "learning_rate": 4.390243902439025e-06, "loss": 1.0056, "step": 63 }, { "epoch": 0.006698063840920984, "grad_norm": 4.542337282469629, "learning_rate": 4.45993031358885e-06, "loss": 1.0251, "step": 64 }, { "epoch": 0.006802721088435374, "grad_norm": 4.28521583781609, "learning_rate": 4.529616724738676e-06, "loss": 1.0708, "step": 65 }, { "epoch": 0.006907378335949764, "grad_norm": 4.125637623705243, "learning_rate": 4.599303135888502e-06, "loss": 1.0472, "step": 66 }, { "epoch": 0.007012035583464155, "grad_norm": 5.268122050018008, "learning_rate": 4.668989547038328e-06, "loss": 0.99, "step": 67 }, { "epoch": 0.0071166928309785455, "grad_norm": 5.0648320868276375, "learning_rate": 4.738675958188153e-06, "loss": 1.0457, "step": 68 }, { "epoch": 0.007221350078492936, "grad_norm": 3.948886008013687, "learning_rate": 4.8083623693379794e-06, "loss": 1.1263, "step": 69 }, { "epoch": 0.007326007326007326, "grad_norm": 4.195666188025583, "learning_rate": 4.8780487804878055e-06, "loss": 1.0227, "step": 70 }, { "epoch": 0.007430664573521716, "grad_norm": 4.413992487953222, "learning_rate": 4.947735191637631e-06, "loss": 1.0551, "step": 71 }, { "epoch": 0.0075353218210361065, "grad_norm": 5.788427063687076, "learning_rate": 5.017421602787457e-06, "loss": 1.094, "step": 72 }, { "epoch": 0.007639979068550497, "grad_norm": 4.474163687473138, "learning_rate": 5.087108013937283e-06, "loss": 1.0967, "step": 73 }, { "epoch": 0.007744636316064888, "grad_norm": 4.512506707897861, "learning_rate": 5.156794425087108e-06, "loss": 1.1004, "step": 74 }, { "epoch": 0.007849293563579277, "grad_norm": 4.228651613816608, "learning_rate": 5.226480836236935e-06, "loss": 1.0283, "step": 75 }, { "epoch": 0.007953950811093667, "grad_norm": 4.3737582716641175, "learning_rate": 5.29616724738676e-06, "loss": 0.9979, "step": 76 }, { "epoch": 0.00805860805860806, "grad_norm": 3.9566453802387094, "learning_rate": 5.365853658536586e-06, "loss": 1.0923, "step": 77 }, { "epoch": 0.00816326530612245, "grad_norm": 4.071859232406034, "learning_rate": 5.435540069686411e-06, "loss": 1.0203, "step": 78 }, { "epoch": 0.00826792255363684, "grad_norm": 4.0563972426988295, "learning_rate": 5.505226480836237e-06, "loss": 1.127, "step": 79 }, { "epoch": 0.00837257980115123, "grad_norm": 4.657288632168375, "learning_rate": 5.574912891986063e-06, "loss": 1.1196, "step": 80 }, { "epoch": 0.00847723704866562, "grad_norm": 4.090819887573403, "learning_rate": 5.644599303135889e-06, "loss": 1.0944, "step": 81 }, { "epoch": 0.00858189429618001, "grad_norm": 4.482297147920703, "learning_rate": 5.7142857142857145e-06, "loss": 1.0254, "step": 82 }, { "epoch": 0.008686551543694401, "grad_norm": 4.152492838084387, "learning_rate": 5.7839721254355405e-06, "loss": 1.0602, "step": 83 }, { "epoch": 0.008791208791208791, "grad_norm": 4.327953715994116, "learning_rate": 5.853658536585366e-06, "loss": 0.978, "step": 84 }, { "epoch": 0.008895866038723181, "grad_norm": 3.0360168594436097, "learning_rate": 5.923344947735193e-06, "loss": 0.7671, "step": 85 }, { "epoch": 0.009000523286237572, "grad_norm": 4.211239355773984, "learning_rate": 5.993031358885018e-06, "loss": 1.0706, "step": 86 }, { "epoch": 0.009105180533751962, "grad_norm": 4.660606741232546, "learning_rate": 6.062717770034844e-06, "loss": 1.0697, "step": 87 }, { "epoch": 0.009209837781266352, "grad_norm": 3.96480287896892, "learning_rate": 6.132404181184669e-06, "loss": 1.0302, "step": 88 }, { "epoch": 0.009314495028780742, "grad_norm": 4.126939118609246, "learning_rate": 6.202090592334495e-06, "loss": 1.1167, "step": 89 }, { "epoch": 0.009419152276295133, "grad_norm": 4.87024424753776, "learning_rate": 6.27177700348432e-06, "loss": 0.993, "step": 90 }, { "epoch": 0.009523809523809525, "grad_norm": 3.9263349606700695, "learning_rate": 6.341463414634147e-06, "loss": 1.0546, "step": 91 }, { "epoch": 0.009628466771323915, "grad_norm": 3.985792246955264, "learning_rate": 6.411149825783972e-06, "loss": 0.9981, "step": 92 }, { "epoch": 0.009733124018838305, "grad_norm": 3.924993510446252, "learning_rate": 6.480836236933798e-06, "loss": 0.9752, "step": 93 }, { "epoch": 0.009837781266352695, "grad_norm": 4.292200608183251, "learning_rate": 6.5505226480836235e-06, "loss": 1.0762, "step": 94 }, { "epoch": 0.009942438513867086, "grad_norm": 4.485480700913085, "learning_rate": 6.62020905923345e-06, "loss": 1.104, "step": 95 }, { "epoch": 0.010047095761381476, "grad_norm": 4.5984608130045235, "learning_rate": 6.6898954703832765e-06, "loss": 1.051, "step": 96 }, { "epoch": 0.010151753008895866, "grad_norm": 4.449884370761991, "learning_rate": 6.759581881533102e-06, "loss": 0.9836, "step": 97 }, { "epoch": 0.010256410256410256, "grad_norm": 4.1157967239658575, "learning_rate": 6.829268292682928e-06, "loss": 1.0645, "step": 98 }, { "epoch": 0.010361067503924647, "grad_norm": 3.864292249565954, "learning_rate": 6.898954703832753e-06, "loss": 1.0302, "step": 99 }, { "epoch": 0.010465724751439037, "grad_norm": 5.087581226627934, "learning_rate": 6.96864111498258e-06, "loss": 0.9327, "step": 100 }, { "epoch": 0.010570381998953427, "grad_norm": 4.4876214527409655, "learning_rate": 7.038327526132405e-06, "loss": 1.0256, "step": 101 }, { "epoch": 0.010675039246467817, "grad_norm": 4.768444458649774, "learning_rate": 7.108013937282231e-06, "loss": 0.9977, "step": 102 }, { "epoch": 0.010779696493982208, "grad_norm": 4.8508061117938075, "learning_rate": 7.177700348432056e-06, "loss": 1.0055, "step": 103 }, { "epoch": 0.010884353741496598, "grad_norm": 4.971725250363107, "learning_rate": 7.247386759581882e-06, "loss": 0.9967, "step": 104 }, { "epoch": 0.01098901098901099, "grad_norm": 4.397737680675271, "learning_rate": 7.317073170731707e-06, "loss": 1.0356, "step": 105 }, { "epoch": 0.01109366823652538, "grad_norm": 3.973868161061544, "learning_rate": 7.386759581881534e-06, "loss": 1.0277, "step": 106 }, { "epoch": 0.01119832548403977, "grad_norm": 4.846756354395717, "learning_rate": 7.4564459930313594e-06, "loss": 1.113, "step": 107 }, { "epoch": 0.01130298273155416, "grad_norm": 4.364102429082418, "learning_rate": 7.5261324041811855e-06, "loss": 1.0448, "step": 108 }, { "epoch": 0.01140763997906855, "grad_norm": 4.772968488932661, "learning_rate": 7.595818815331011e-06, "loss": 0.9974, "step": 109 }, { "epoch": 0.011512297226582941, "grad_norm": 4.338651147105108, "learning_rate": 7.665505226480837e-06, "loss": 1.0383, "step": 110 }, { "epoch": 0.011616954474097331, "grad_norm": 4.665177086985384, "learning_rate": 7.735191637630662e-06, "loss": 0.9858, "step": 111 }, { "epoch": 0.011721611721611722, "grad_norm": 4.226627817628828, "learning_rate": 7.804878048780489e-06, "loss": 1.1107, "step": 112 }, { "epoch": 0.011826268969126112, "grad_norm": 4.365500077240299, "learning_rate": 7.874564459930314e-06, "loss": 0.9337, "step": 113 }, { "epoch": 0.011930926216640502, "grad_norm": 4.57360703127662, "learning_rate": 7.94425087108014e-06, "loss": 1.0088, "step": 114 }, { "epoch": 0.012035583464154892, "grad_norm": 4.672192335977975, "learning_rate": 8.013937282229966e-06, "loss": 1.0041, "step": 115 }, { "epoch": 0.012140240711669283, "grad_norm": 4.678168288512447, "learning_rate": 8.083623693379791e-06, "loss": 1.0472, "step": 116 }, { "epoch": 0.012244897959183673, "grad_norm": 4.804225775870655, "learning_rate": 8.153310104529616e-06, "loss": 1.0888, "step": 117 }, { "epoch": 0.012349555206698063, "grad_norm": 4.403636503472787, "learning_rate": 8.222996515679443e-06, "loss": 1.13, "step": 118 }, { "epoch": 0.012454212454212455, "grad_norm": 4.208797982507968, "learning_rate": 8.292682926829268e-06, "loss": 1.0292, "step": 119 }, { "epoch": 0.012558869701726845, "grad_norm": 5.510093081064813, "learning_rate": 8.362369337979095e-06, "loss": 1.0361, "step": 120 }, { "epoch": 0.012663526949241236, "grad_norm": 5.492522216479893, "learning_rate": 8.43205574912892e-06, "loss": 1.0391, "step": 121 }, { "epoch": 0.012768184196755626, "grad_norm": 4.227779135360482, "learning_rate": 8.501742160278746e-06, "loss": 1.1114, "step": 122 }, { "epoch": 0.012872841444270016, "grad_norm": 3.8028192975551014, "learning_rate": 8.571428571428571e-06, "loss": 1.0596, "step": 123 }, { "epoch": 0.012977498691784406, "grad_norm": 4.700182372142366, "learning_rate": 8.641114982578398e-06, "loss": 1.1663, "step": 124 }, { "epoch": 0.013082155939298797, "grad_norm": 4.393426240660029, "learning_rate": 8.710801393728223e-06, "loss": 0.9717, "step": 125 }, { "epoch": 0.013186813186813187, "grad_norm": 5.209722101990777, "learning_rate": 8.78048780487805e-06, "loss": 1.0548, "step": 126 }, { "epoch": 0.013291470434327577, "grad_norm": 4.44296200002556, "learning_rate": 8.850174216027875e-06, "loss": 1.0316, "step": 127 }, { "epoch": 0.013396127681841967, "grad_norm": 3.2269766969577525, "learning_rate": 8.9198606271777e-06, "loss": 0.7394, "step": 128 }, { "epoch": 0.013500784929356358, "grad_norm": 4.000784251757387, "learning_rate": 8.989547038327527e-06, "loss": 0.9277, "step": 129 }, { "epoch": 0.013605442176870748, "grad_norm": 3.76241263382153, "learning_rate": 9.059233449477352e-06, "loss": 1.0673, "step": 130 }, { "epoch": 0.013710099424385138, "grad_norm": 4.24231924403224, "learning_rate": 9.12891986062718e-06, "loss": 0.9685, "step": 131 }, { "epoch": 0.013814756671899528, "grad_norm": 2.3758209921949343, "learning_rate": 9.198606271777004e-06, "loss": 0.7123, "step": 132 }, { "epoch": 0.01391941391941392, "grad_norm": 3.3277360996370966, "learning_rate": 9.268292682926831e-06, "loss": 1.0094, "step": 133 }, { "epoch": 0.01402407116692831, "grad_norm": 3.9700536041385353, "learning_rate": 9.337979094076656e-06, "loss": 1.029, "step": 134 }, { "epoch": 0.0141287284144427, "grad_norm": 3.8999757749740973, "learning_rate": 9.407665505226482e-06, "loss": 1.099, "step": 135 }, { "epoch": 0.014233385661957091, "grad_norm": 4.007407594413442, "learning_rate": 9.477351916376307e-06, "loss": 0.9941, "step": 136 }, { "epoch": 0.014338042909471481, "grad_norm": 4.372638054940945, "learning_rate": 9.547038327526134e-06, "loss": 0.998, "step": 137 }, { "epoch": 0.014442700156985872, "grad_norm": 3.781559177153234, "learning_rate": 9.616724738675959e-06, "loss": 1.0021, "step": 138 }, { "epoch": 0.014547357404500262, "grad_norm": 3.8918297025335358, "learning_rate": 9.686411149825786e-06, "loss": 1.1862, "step": 139 }, { "epoch": 0.014652014652014652, "grad_norm": 4.87861639008213, "learning_rate": 9.756097560975611e-06, "loss": 1.0808, "step": 140 }, { "epoch": 0.014756671899529042, "grad_norm": 4.2959641485004365, "learning_rate": 9.825783972125436e-06, "loss": 1.0558, "step": 141 }, { "epoch": 0.014861329147043432, "grad_norm": 3.6891019296746816, "learning_rate": 9.895470383275261e-06, "loss": 0.9425, "step": 142 }, { "epoch": 0.014965986394557823, "grad_norm": 2.421295406110499, "learning_rate": 9.965156794425088e-06, "loss": 0.7397, "step": 143 }, { "epoch": 0.015070643642072213, "grad_norm": 4.319425469746967, "learning_rate": 1.0034843205574913e-05, "loss": 1.0007, "step": 144 }, { "epoch": 0.015175300889586603, "grad_norm": 6.872837574225154, "learning_rate": 1.0104529616724739e-05, "loss": 1.0009, "step": 145 }, { "epoch": 0.015279958137100993, "grad_norm": 4.1481133816472875, "learning_rate": 1.0174216027874565e-05, "loss": 1.1726, "step": 146 }, { "epoch": 0.015384615384615385, "grad_norm": 4.440751447995416, "learning_rate": 1.024390243902439e-05, "loss": 1.1265, "step": 147 }, { "epoch": 0.015489272632129776, "grad_norm": 4.171124786786287, "learning_rate": 1.0313588850174216e-05, "loss": 1.0745, "step": 148 }, { "epoch": 0.015593929879644166, "grad_norm": 4.2679960358102385, "learning_rate": 1.0383275261324041e-05, "loss": 1.0301, "step": 149 }, { "epoch": 0.015698587127158554, "grad_norm": 4.083629703686367, "learning_rate": 1.045296167247387e-05, "loss": 1.0725, "step": 150 }, { "epoch": 0.015803244374672946, "grad_norm": 4.756910822842423, "learning_rate": 1.0522648083623695e-05, "loss": 1.1538, "step": 151 }, { "epoch": 0.015907901622187335, "grad_norm": 4.175459761454693, "learning_rate": 1.059233449477352e-05, "loss": 1.0311, "step": 152 }, { "epoch": 0.016012558869701727, "grad_norm": 4.660499784764975, "learning_rate": 1.0662020905923345e-05, "loss": 1.0348, "step": 153 }, { "epoch": 0.01611721611721612, "grad_norm": 5.477934194364109, "learning_rate": 1.0731707317073172e-05, "loss": 0.947, "step": 154 }, { "epoch": 0.016221873364730507, "grad_norm": 3.8257154240108475, "learning_rate": 1.0801393728222997e-05, "loss": 1.0325, "step": 155 }, { "epoch": 0.0163265306122449, "grad_norm": 4.473013121990363, "learning_rate": 1.0871080139372822e-05, "loss": 1.0694, "step": 156 }, { "epoch": 0.016431187859759288, "grad_norm": 3.7487977727407107, "learning_rate": 1.0940766550522648e-05, "loss": 0.9867, "step": 157 }, { "epoch": 0.01653584510727368, "grad_norm": 3.6501719775560844, "learning_rate": 1.1010452961672475e-05, "loss": 1.0029, "step": 158 }, { "epoch": 0.01664050235478807, "grad_norm": 4.465954207259012, "learning_rate": 1.1080139372822301e-05, "loss": 1.1428, "step": 159 }, { "epoch": 0.01674515960230246, "grad_norm": 3.7784565534446597, "learning_rate": 1.1149825783972127e-05, "loss": 1.0845, "step": 160 }, { "epoch": 0.01684981684981685, "grad_norm": 4.3826834909022825, "learning_rate": 1.1219512195121953e-05, "loss": 1.029, "step": 161 }, { "epoch": 0.01695447409733124, "grad_norm": 5.076983929836477, "learning_rate": 1.1289198606271779e-05, "loss": 0.9905, "step": 162 }, { "epoch": 0.01705913134484563, "grad_norm": 3.87202080289898, "learning_rate": 1.1358885017421604e-05, "loss": 0.9471, "step": 163 }, { "epoch": 0.01716378859236002, "grad_norm": 3.947159057053555, "learning_rate": 1.1428571428571429e-05, "loss": 1.0517, "step": 164 }, { "epoch": 0.01726844583987441, "grad_norm": 3.5664649583899886, "learning_rate": 1.1498257839721256e-05, "loss": 1.0465, "step": 165 }, { "epoch": 0.017373103087388802, "grad_norm": 3.889725720772939, "learning_rate": 1.1567944250871081e-05, "loss": 1.0653, "step": 166 }, { "epoch": 0.01747776033490319, "grad_norm": 3.9170851836947143, "learning_rate": 1.1637630662020906e-05, "loss": 1.034, "step": 167 }, { "epoch": 0.017582417582417582, "grad_norm": 3.72251997612241, "learning_rate": 1.1707317073170731e-05, "loss": 1.0297, "step": 168 }, { "epoch": 0.017687074829931974, "grad_norm": 3.576705734526832, "learning_rate": 1.177700348432056e-05, "loss": 1.0623, "step": 169 }, { "epoch": 0.017791732077446363, "grad_norm": 3.263183539989399, "learning_rate": 1.1846689895470385e-05, "loss": 1.0284, "step": 170 }, { "epoch": 0.017896389324960755, "grad_norm": 4.86530329059982, "learning_rate": 1.191637630662021e-05, "loss": 1.1147, "step": 171 }, { "epoch": 0.018001046572475143, "grad_norm": 3.74319247595939, "learning_rate": 1.1986062717770036e-05, "loss": 1.0216, "step": 172 }, { "epoch": 0.018105703819989535, "grad_norm": 3.4535523228322274, "learning_rate": 1.2055749128919862e-05, "loss": 1.1057, "step": 173 }, { "epoch": 0.018210361067503924, "grad_norm": 3.5433866410709784, "learning_rate": 1.2125435540069688e-05, "loss": 1.1274, "step": 174 }, { "epoch": 0.018315018315018316, "grad_norm": 3.8964338737423567, "learning_rate": 1.2195121951219513e-05, "loss": 1.1093, "step": 175 }, { "epoch": 0.018419675562532704, "grad_norm": 3.7316530734886197, "learning_rate": 1.2264808362369338e-05, "loss": 1.0501, "step": 176 }, { "epoch": 0.018524332810047096, "grad_norm": 3.8491712042277872, "learning_rate": 1.2334494773519165e-05, "loss": 1.1212, "step": 177 }, { "epoch": 0.018628990057561485, "grad_norm": 3.4777164601747863, "learning_rate": 1.240418118466899e-05, "loss": 1.0654, "step": 178 }, { "epoch": 0.018733647305075877, "grad_norm": 2.261327289656553, "learning_rate": 1.2473867595818815e-05, "loss": 0.7282, "step": 179 }, { "epoch": 0.018838304552590265, "grad_norm": 3.552001161873253, "learning_rate": 1.254355400696864e-05, "loss": 0.9128, "step": 180 }, { "epoch": 0.018942961800104657, "grad_norm": 5.287461211801188, "learning_rate": 1.2613240418118469e-05, "loss": 1.051, "step": 181 }, { "epoch": 0.01904761904761905, "grad_norm": 4.4864817842055436, "learning_rate": 1.2682926829268294e-05, "loss": 1.0427, "step": 182 }, { "epoch": 0.019152276295133438, "grad_norm": 4.588279167310623, "learning_rate": 1.275261324041812e-05, "loss": 1.0934, "step": 183 }, { "epoch": 0.01925693354264783, "grad_norm": 5.248743013483407, "learning_rate": 1.2822299651567945e-05, "loss": 1.0224, "step": 184 }, { "epoch": 0.01936159079016222, "grad_norm": 3.599591701450194, "learning_rate": 1.2891986062717772e-05, "loss": 0.942, "step": 185 }, { "epoch": 0.01946624803767661, "grad_norm": 4.22948244161219, "learning_rate": 1.2961672473867597e-05, "loss": 1.096, "step": 186 }, { "epoch": 0.019570905285191, "grad_norm": 3.883052580674137, "learning_rate": 1.3031358885017422e-05, "loss": 1.0867, "step": 187 }, { "epoch": 0.01967556253270539, "grad_norm": 3.9641257982365024, "learning_rate": 1.3101045296167247e-05, "loss": 1.0495, "step": 188 }, { "epoch": 0.01978021978021978, "grad_norm": 3.98653960238745, "learning_rate": 1.3170731707317076e-05, "loss": 1.1492, "step": 189 }, { "epoch": 0.01988487702773417, "grad_norm": 2.3679885080466554, "learning_rate": 1.32404181184669e-05, "loss": 0.7717, "step": 190 }, { "epoch": 0.01998953427524856, "grad_norm": 4.239294493465942, "learning_rate": 1.3310104529616726e-05, "loss": 1.1124, "step": 191 }, { "epoch": 0.020094191522762952, "grad_norm": 4.028830074043196, "learning_rate": 1.3379790940766553e-05, "loss": 1.1247, "step": 192 }, { "epoch": 0.02019884877027734, "grad_norm": 3.522476393811863, "learning_rate": 1.3449477351916378e-05, "loss": 1.0408, "step": 193 }, { "epoch": 0.020303506017791732, "grad_norm": 3.420820328724481, "learning_rate": 1.3519163763066203e-05, "loss": 1.0528, "step": 194 }, { "epoch": 0.02040816326530612, "grad_norm": 3.855766622211791, "learning_rate": 1.3588850174216028e-05, "loss": 1.0484, "step": 195 }, { "epoch": 0.020512820512820513, "grad_norm": 4.09125354171716, "learning_rate": 1.3658536585365855e-05, "loss": 1.1015, "step": 196 }, { "epoch": 0.020617477760334905, "grad_norm": 3.3241462228706653, "learning_rate": 1.372822299651568e-05, "loss": 1.1006, "step": 197 }, { "epoch": 0.020722135007849293, "grad_norm": 4.516135214785916, "learning_rate": 1.3797909407665506e-05, "loss": 1.0546, "step": 198 }, { "epoch": 0.020826792255363685, "grad_norm": 3.3260375654673564, "learning_rate": 1.3867595818815331e-05, "loss": 1.1214, "step": 199 }, { "epoch": 0.020931449502878074, "grad_norm": 4.0907170025291935, "learning_rate": 1.393728222996516e-05, "loss": 1.0947, "step": 200 }, { "epoch": 0.021036106750392466, "grad_norm": 3.9206762011268337, "learning_rate": 1.4006968641114985e-05, "loss": 1.0005, "step": 201 }, { "epoch": 0.021140763997906854, "grad_norm": 3.74058600438095, "learning_rate": 1.407665505226481e-05, "loss": 1.0507, "step": 202 }, { "epoch": 0.021245421245421246, "grad_norm": 4.046674364114655, "learning_rate": 1.4146341463414635e-05, "loss": 1.0381, "step": 203 }, { "epoch": 0.021350078492935635, "grad_norm": 4.070409637629066, "learning_rate": 1.4216027874564462e-05, "loss": 1.0869, "step": 204 }, { "epoch": 0.021454735740450027, "grad_norm": 2.477212456664358, "learning_rate": 1.4285714285714287e-05, "loss": 0.7274, "step": 205 }, { "epoch": 0.021559392987964415, "grad_norm": 4.809989174382412, "learning_rate": 1.4355400696864112e-05, "loss": 0.9624, "step": 206 }, { "epoch": 0.021664050235478807, "grad_norm": 4.855067955207067, "learning_rate": 1.4425087108013938e-05, "loss": 1.056, "step": 207 }, { "epoch": 0.021768707482993196, "grad_norm": 4.666013151279942, "learning_rate": 1.4494773519163764e-05, "loss": 0.9354, "step": 208 }, { "epoch": 0.021873364730507588, "grad_norm": 3.940057891989908, "learning_rate": 1.456445993031359e-05, "loss": 1.014, "step": 209 }, { "epoch": 0.02197802197802198, "grad_norm": 5.015568593900297, "learning_rate": 1.4634146341463415e-05, "loss": 1.0634, "step": 210 }, { "epoch": 0.022082679225536368, "grad_norm": 3.5260271531233927, "learning_rate": 1.470383275261324e-05, "loss": 0.9887, "step": 211 }, { "epoch": 0.02218733647305076, "grad_norm": 4.142631353724366, "learning_rate": 1.4773519163763069e-05, "loss": 1.0409, "step": 212 }, { "epoch": 0.02229199372056515, "grad_norm": 4.18461357230588, "learning_rate": 1.4843205574912894e-05, "loss": 1.0713, "step": 213 }, { "epoch": 0.02239665096807954, "grad_norm": 4.157498562874197, "learning_rate": 1.4912891986062719e-05, "loss": 1.0933, "step": 214 }, { "epoch": 0.02250130821559393, "grad_norm": 4.027272133207346, "learning_rate": 1.4982578397212544e-05, "loss": 0.9743, "step": 215 }, { "epoch": 0.02260596546310832, "grad_norm": 4.379374705129572, "learning_rate": 1.5052264808362371e-05, "loss": 1.0026, "step": 216 }, { "epoch": 0.02271062271062271, "grad_norm": 3.239907491980648, "learning_rate": 1.5121951219512196e-05, "loss": 1.0678, "step": 217 }, { "epoch": 0.0228152799581371, "grad_norm": 4.0127649077633665, "learning_rate": 1.5191637630662021e-05, "loss": 1.1173, "step": 218 }, { "epoch": 0.02291993720565149, "grad_norm": 4.264440457331995, "learning_rate": 1.5261324041811848e-05, "loss": 0.9743, "step": 219 }, { "epoch": 0.023024594453165882, "grad_norm": 3.997508098429076, "learning_rate": 1.5331010452961673e-05, "loss": 1.0646, "step": 220 }, { "epoch": 0.02312925170068027, "grad_norm": 3.47902625924093, "learning_rate": 1.54006968641115e-05, "loss": 0.9376, "step": 221 }, { "epoch": 0.023233908948194663, "grad_norm": 4.175847455405455, "learning_rate": 1.5470383275261324e-05, "loss": 1.16, "step": 222 }, { "epoch": 0.02333856619570905, "grad_norm": 3.8697528880278775, "learning_rate": 1.554006968641115e-05, "loss": 1.1035, "step": 223 }, { "epoch": 0.023443223443223443, "grad_norm": 3.7062219666594522, "learning_rate": 1.5609756097560978e-05, "loss": 1.0437, "step": 224 }, { "epoch": 0.023547880690737835, "grad_norm": 3.604348929857401, "learning_rate": 1.5679442508710803e-05, "loss": 1.1209, "step": 225 }, { "epoch": 0.023652537938252224, "grad_norm": 4.264573745725593, "learning_rate": 1.5749128919860628e-05, "loss": 1.0217, "step": 226 }, { "epoch": 0.023757195185766616, "grad_norm": 5.445543416503218, "learning_rate": 1.5818815331010456e-05, "loss": 1.0786, "step": 227 }, { "epoch": 0.023861852433281004, "grad_norm": 5.26347191325024, "learning_rate": 1.588850174216028e-05, "loss": 1.1497, "step": 228 }, { "epoch": 0.023966509680795396, "grad_norm": 3.5473340256833548, "learning_rate": 1.5958188153310107e-05, "loss": 0.9933, "step": 229 }, { "epoch": 0.024071166928309785, "grad_norm": 3.654247972110555, "learning_rate": 1.6027874564459932e-05, "loss": 1.0457, "step": 230 }, { "epoch": 0.024175824175824177, "grad_norm": 3.6750069709677047, "learning_rate": 1.6097560975609757e-05, "loss": 1.1164, "step": 231 }, { "epoch": 0.024280481423338565, "grad_norm": 3.542624844100214, "learning_rate": 1.6167247386759582e-05, "loss": 1.1167, "step": 232 }, { "epoch": 0.024385138670852957, "grad_norm": 3.9872277812389996, "learning_rate": 1.6236933797909408e-05, "loss": 1.1433, "step": 233 }, { "epoch": 0.024489795918367346, "grad_norm": 3.7156048056910382, "learning_rate": 1.6306620209059233e-05, "loss": 1.0351, "step": 234 }, { "epoch": 0.024594453165881738, "grad_norm": 3.7816100969884348, "learning_rate": 1.637630662020906e-05, "loss": 1.0736, "step": 235 }, { "epoch": 0.024699110413396126, "grad_norm": 2.5076547499457362, "learning_rate": 1.6445993031358887e-05, "loss": 0.7651, "step": 236 }, { "epoch": 0.024803767660910518, "grad_norm": 3.3947366530434184, "learning_rate": 1.6515679442508712e-05, "loss": 1.1833, "step": 237 }, { "epoch": 0.02490842490842491, "grad_norm": 3.9432345380153713, "learning_rate": 1.6585365853658537e-05, "loss": 1.0596, "step": 238 }, { "epoch": 0.0250130821559393, "grad_norm": 4.098078794977877, "learning_rate": 1.6655052264808366e-05, "loss": 1.1229, "step": 239 }, { "epoch": 0.02511773940345369, "grad_norm": 3.9973498865337533, "learning_rate": 1.672473867595819e-05, "loss": 0.9674, "step": 240 }, { "epoch": 0.02522239665096808, "grad_norm": 3.5983933762145037, "learning_rate": 1.6794425087108016e-05, "loss": 1.2461, "step": 241 }, { "epoch": 0.02532705389848247, "grad_norm": 3.8197581407276977, "learning_rate": 1.686411149825784e-05, "loss": 1.033, "step": 242 }, { "epoch": 0.02543171114599686, "grad_norm": 2.3297919147992445, "learning_rate": 1.6933797909407666e-05, "loss": 0.7915, "step": 243 }, { "epoch": 0.02553636839351125, "grad_norm": 3.8568402426046853, "learning_rate": 1.700348432055749e-05, "loss": 1.1112, "step": 244 }, { "epoch": 0.02564102564102564, "grad_norm": 3.3775792696962434, "learning_rate": 1.7073170731707317e-05, "loss": 1.0562, "step": 245 }, { "epoch": 0.025745682888540032, "grad_norm": 3.8185082938369206, "learning_rate": 1.7142857142857142e-05, "loss": 1.2041, "step": 246 }, { "epoch": 0.02585034013605442, "grad_norm": 3.946632969495481, "learning_rate": 1.721254355400697e-05, "loss": 1.1593, "step": 247 }, { "epoch": 0.025954997383568813, "grad_norm": 3.4354194263721243, "learning_rate": 1.7282229965156796e-05, "loss": 1.0919, "step": 248 }, { "epoch": 0.0260596546310832, "grad_norm": 4.090271178228001, "learning_rate": 1.735191637630662e-05, "loss": 1.1536, "step": 249 }, { "epoch": 0.026164311878597593, "grad_norm": 3.7495551186516574, "learning_rate": 1.7421602787456446e-05, "loss": 1.024, "step": 250 }, { "epoch": 0.02626896912611198, "grad_norm": 3.682112510859972, "learning_rate": 1.7491289198606275e-05, "loss": 1.1665, "step": 251 }, { "epoch": 0.026373626373626374, "grad_norm": 3.3296117604440783, "learning_rate": 1.75609756097561e-05, "loss": 1.0428, "step": 252 }, { "epoch": 0.026478283621140766, "grad_norm": 3.200300909223925, "learning_rate": 1.7630662020905925e-05, "loss": 1.0666, "step": 253 }, { "epoch": 0.026582940868655154, "grad_norm": 3.747386619716652, "learning_rate": 1.770034843205575e-05, "loss": 1.154, "step": 254 }, { "epoch": 0.026687598116169546, "grad_norm": 3.9792918121852483, "learning_rate": 1.7770034843205575e-05, "loss": 0.9988, "step": 255 }, { "epoch": 0.026792255363683935, "grad_norm": 3.1282408579177283, "learning_rate": 1.78397212543554e-05, "loss": 0.9897, "step": 256 }, { "epoch": 0.026896912611198327, "grad_norm": 2.056362322374514, "learning_rate": 1.7909407665505226e-05, "loss": 0.7055, "step": 257 }, { "epoch": 0.027001569858712715, "grad_norm": 2.5522966724650624, "learning_rate": 1.7979094076655054e-05, "loss": 0.8675, "step": 258 }, { "epoch": 0.027106227106227107, "grad_norm": 3.5782559255097457, "learning_rate": 1.804878048780488e-05, "loss": 1.0991, "step": 259 }, { "epoch": 0.027210884353741496, "grad_norm": 3.928265536667273, "learning_rate": 1.8118466898954705e-05, "loss": 1.05, "step": 260 }, { "epoch": 0.027315541601255888, "grad_norm": 3.551876974379943, "learning_rate": 1.818815331010453e-05, "loss": 1.0506, "step": 261 }, { "epoch": 0.027420198848770276, "grad_norm": 2.60968997475218, "learning_rate": 1.825783972125436e-05, "loss": 0.7637, "step": 262 }, { "epoch": 0.027524856096284668, "grad_norm": 3.2023640410720504, "learning_rate": 1.8327526132404184e-05, "loss": 1.1014, "step": 263 }, { "epoch": 0.027629513343799057, "grad_norm": 4.254369956741867, "learning_rate": 1.839721254355401e-05, "loss": 1.1119, "step": 264 }, { "epoch": 0.02773417059131345, "grad_norm": 4.456237791959698, "learning_rate": 1.8466898954703834e-05, "loss": 1.0861, "step": 265 }, { "epoch": 0.02783882783882784, "grad_norm": 3.326487692184649, "learning_rate": 1.8536585365853663e-05, "loss": 1.1481, "step": 266 }, { "epoch": 0.02794348508634223, "grad_norm": 3.4486894326387985, "learning_rate": 1.8606271777003488e-05, "loss": 0.9798, "step": 267 }, { "epoch": 0.02804814233385662, "grad_norm": 3.5440191404796875, "learning_rate": 1.8675958188153313e-05, "loss": 1.1801, "step": 268 }, { "epoch": 0.02815279958137101, "grad_norm": 3.978193923401927, "learning_rate": 1.8745644599303138e-05, "loss": 1.0222, "step": 269 }, { "epoch": 0.0282574568288854, "grad_norm": 2.5739712495009224, "learning_rate": 1.8815331010452963e-05, "loss": 0.7676, "step": 270 }, { "epoch": 0.02836211407639979, "grad_norm": 3.7240630754036523, "learning_rate": 1.888501742160279e-05, "loss": 1.0653, "step": 271 }, { "epoch": 0.028466771323914182, "grad_norm": 3.1834334045524337, "learning_rate": 1.8954703832752614e-05, "loss": 1.0548, "step": 272 }, { "epoch": 0.02857142857142857, "grad_norm": 4.050102203011576, "learning_rate": 1.902439024390244e-05, "loss": 1.0478, "step": 273 }, { "epoch": 0.028676085818942963, "grad_norm": 3.1797343765169783, "learning_rate": 1.9094076655052267e-05, "loss": 1.0334, "step": 274 }, { "epoch": 0.02878074306645735, "grad_norm": 3.6752536373704454, "learning_rate": 1.9163763066202093e-05, "loss": 1.1476, "step": 275 }, { "epoch": 0.028885400313971743, "grad_norm": 4.244905455321221, "learning_rate": 1.9233449477351918e-05, "loss": 1.0884, "step": 276 }, { "epoch": 0.02899005756148613, "grad_norm": 2.548313029921991, "learning_rate": 1.9303135888501743e-05, "loss": 0.7592, "step": 277 }, { "epoch": 0.029094714809000524, "grad_norm": 4.015070153207213, "learning_rate": 1.937282229965157e-05, "loss": 1.1131, "step": 278 }, { "epoch": 0.029199372056514912, "grad_norm": 3.5031056198015382, "learning_rate": 1.9442508710801397e-05, "loss": 0.9966, "step": 279 }, { "epoch": 0.029304029304029304, "grad_norm": 3.7962075539310467, "learning_rate": 1.9512195121951222e-05, "loss": 1.1491, "step": 280 }, { "epoch": 0.029408686551543696, "grad_norm": 3.575956042142137, "learning_rate": 1.9581881533101047e-05, "loss": 1.0795, "step": 281 }, { "epoch": 0.029513343799058084, "grad_norm": 3.5922001990846497, "learning_rate": 1.9651567944250872e-05, "loss": 1.0633, "step": 282 }, { "epoch": 0.029618001046572476, "grad_norm": 3.176407389065215, "learning_rate": 1.9721254355400697e-05, "loss": 1.0095, "step": 283 }, { "epoch": 0.029722658294086865, "grad_norm": 3.9198920216298063, "learning_rate": 1.9790940766550523e-05, "loss": 1.1247, "step": 284 }, { "epoch": 0.029827315541601257, "grad_norm": 3.7058792697424425, "learning_rate": 1.9860627177700348e-05, "loss": 1.0748, "step": 285 }, { "epoch": 0.029931972789115645, "grad_norm": 3.1240553549271075, "learning_rate": 1.9930313588850176e-05, "loss": 1.0794, "step": 286 }, { "epoch": 0.030036630036630037, "grad_norm": 4.161059734877122, "learning_rate": 2e-05, "loss": 1.2648, "step": 287 }, { "epoch": 0.030141287284144426, "grad_norm": 3.292538569117115, "learning_rate": 1.9999999425489863e-05, "loss": 1.132, "step": 288 }, { "epoch": 0.030245944531658818, "grad_norm": 3.3817728904500988, "learning_rate": 1.9999997701959524e-05, "loss": 1.119, "step": 289 }, { "epoch": 0.030350601779173206, "grad_norm": 3.698433893111136, "learning_rate": 1.999999482940917e-05, "loss": 1.1232, "step": 290 }, { "epoch": 0.0304552590266876, "grad_norm": 3.867287084222632, "learning_rate": 1.9999990807839145e-05, "loss": 1.0713, "step": 291 }, { "epoch": 0.030559916274201987, "grad_norm": 3.4207322827343276, "learning_rate": 1.99999856372499e-05, "loss": 1.131, "step": 292 }, { "epoch": 0.03066457352171638, "grad_norm": 3.489327239181255, "learning_rate": 1.9999979317642035e-05, "loss": 1.0825, "step": 293 }, { "epoch": 0.03076923076923077, "grad_norm": 4.019683220751094, "learning_rate": 1.9999971849016274e-05, "loss": 1.0214, "step": 294 }, { "epoch": 0.03087388801674516, "grad_norm": 3.58104376114611, "learning_rate": 1.9999963231373474e-05, "loss": 1.0346, "step": 295 }, { "epoch": 0.03097854526425955, "grad_norm": 4.29557092301096, "learning_rate": 1.9999953464714628e-05, "loss": 1.1057, "step": 296 }, { "epoch": 0.03108320251177394, "grad_norm": 4.031376664345659, "learning_rate": 1.9999942549040857e-05, "loss": 1.199, "step": 297 }, { "epoch": 0.031187859759288332, "grad_norm": 3.2778277667278837, "learning_rate": 1.9999930484353412e-05, "loss": 1.1605, "step": 298 }, { "epoch": 0.031292517006802724, "grad_norm": 3.57216344766695, "learning_rate": 1.9999917270653686e-05, "loss": 1.2141, "step": 299 }, { "epoch": 0.03139717425431711, "grad_norm": 3.32290052080157, "learning_rate": 1.9999902907943195e-05, "loss": 1.0385, "step": 300 }, { "epoch": 0.0315018315018315, "grad_norm": 3.3614253151991718, "learning_rate": 1.9999887396223584e-05, "loss": 0.8076, "step": 301 }, { "epoch": 0.03160648874934589, "grad_norm": 3.999164186143186, "learning_rate": 1.999987073549664e-05, "loss": 1.1335, "step": 302 }, { "epoch": 0.031711145996860285, "grad_norm": 3.8434118240060937, "learning_rate": 1.9999852925764277e-05, "loss": 1.0446, "step": 303 }, { "epoch": 0.03181580324437467, "grad_norm": 4.069391175340414, "learning_rate": 1.9999833967028542e-05, "loss": 1.0591, "step": 304 }, { "epoch": 0.03192046049188906, "grad_norm": 3.8316602155773998, "learning_rate": 1.999981385929161e-05, "loss": 1.1707, "step": 305 }, { "epoch": 0.032025117739403454, "grad_norm": 4.737562782836604, "learning_rate": 1.99997926025558e-05, "loss": 1.0614, "step": 306 }, { "epoch": 0.032129774986917846, "grad_norm": 3.8265410957341515, "learning_rate": 1.9999770196823544e-05, "loss": 1.1765, "step": 307 }, { "epoch": 0.03223443223443224, "grad_norm": 3.265062349948494, "learning_rate": 1.999974664209742e-05, "loss": 1.0048, "step": 308 }, { "epoch": 0.03233908948194662, "grad_norm": 3.642030345306258, "learning_rate": 1.9999721938380133e-05, "loss": 1.1304, "step": 309 }, { "epoch": 0.032443746729461015, "grad_norm": 3.4292052143727405, "learning_rate": 1.999969608567453e-05, "loss": 1.1584, "step": 310 }, { "epoch": 0.03254840397697541, "grad_norm": 3.7985067604810565, "learning_rate": 1.999966908398357e-05, "loss": 1.0927, "step": 311 }, { "epoch": 0.0326530612244898, "grad_norm": 4.314018944438203, "learning_rate": 1.999964093331036e-05, "loss": 1.0461, "step": 312 }, { "epoch": 0.032757718472004184, "grad_norm": 3.083704093004486, "learning_rate": 1.9999611633658142e-05, "loss": 1.0489, "step": 313 }, { "epoch": 0.032862375719518576, "grad_norm": 3.261286750192, "learning_rate": 1.999958118503027e-05, "loss": 1.0078, "step": 314 }, { "epoch": 0.03296703296703297, "grad_norm": 3.4697246675242375, "learning_rate": 1.9999549587430252e-05, "loss": 1.1564, "step": 315 }, { "epoch": 0.03307169021454736, "grad_norm": 3.5137061405420478, "learning_rate": 1.9999516840861714e-05, "loss": 0.9702, "step": 316 }, { "epoch": 0.033176347462061745, "grad_norm": 3.7031729797465465, "learning_rate": 1.9999482945328422e-05, "loss": 1.1463, "step": 317 }, { "epoch": 0.03328100470957614, "grad_norm": 3.526121821006833, "learning_rate": 1.9999447900834266e-05, "loss": 0.9697, "step": 318 }, { "epoch": 0.03338566195709053, "grad_norm": 3.582049192765141, "learning_rate": 1.9999411707383273e-05, "loss": 1.1307, "step": 319 }, { "epoch": 0.03349031920460492, "grad_norm": 3.71675942867392, "learning_rate": 1.9999374364979608e-05, "loss": 1.1452, "step": 320 }, { "epoch": 0.033594976452119306, "grad_norm": 3.5193147394190074, "learning_rate": 1.9999335873627555e-05, "loss": 1.1801, "step": 321 }, { "epoch": 0.0336996336996337, "grad_norm": 3.5695487111516617, "learning_rate": 1.999929623333154e-05, "loss": 1.0796, "step": 322 }, { "epoch": 0.03380429094714809, "grad_norm": 3.32206675867571, "learning_rate": 1.999925544409612e-05, "loss": 1.0898, "step": 323 }, { "epoch": 0.03390894819466248, "grad_norm": 2.9565893128467153, "learning_rate": 1.9999213505925975e-05, "loss": 1.1213, "step": 324 }, { "epoch": 0.034013605442176874, "grad_norm": 3.595043850818446, "learning_rate": 1.9999170418825928e-05, "loss": 1.1474, "step": 325 }, { "epoch": 0.03411826268969126, "grad_norm": 3.003534456854358, "learning_rate": 1.9999126182800932e-05, "loss": 1.0788, "step": 326 }, { "epoch": 0.03422291993720565, "grad_norm": 3.9622027216915914, "learning_rate": 1.9999080797856063e-05, "loss": 1.1529, "step": 327 }, { "epoch": 0.03432757718472004, "grad_norm": 3.7438205661201955, "learning_rate": 1.9999034263996543e-05, "loss": 1.1211, "step": 328 }, { "epoch": 0.034432234432234435, "grad_norm": 3.1550464025240004, "learning_rate": 1.9998986581227718e-05, "loss": 1.1448, "step": 329 }, { "epoch": 0.03453689167974882, "grad_norm": 3.0209980125708435, "learning_rate": 1.999893774955506e-05, "loss": 1.053, "step": 330 }, { "epoch": 0.03464154892726321, "grad_norm": 3.214470861959865, "learning_rate": 1.999888776898419e-05, "loss": 1.0786, "step": 331 }, { "epoch": 0.034746206174777604, "grad_norm": 3.747250332756373, "learning_rate": 1.9998836639520843e-05, "loss": 1.1941, "step": 332 }, { "epoch": 0.034850863422291996, "grad_norm": 3.406359341493149, "learning_rate": 1.9998784361170893e-05, "loss": 1.1671, "step": 333 }, { "epoch": 0.03495552066980638, "grad_norm": 2.8938267258829944, "learning_rate": 1.9998730933940355e-05, "loss": 1.0199, "step": 334 }, { "epoch": 0.03506017791732077, "grad_norm": 3.1924364336563045, "learning_rate": 1.999867635783536e-05, "loss": 1.133, "step": 335 }, { "epoch": 0.035164835164835165, "grad_norm": 3.0001944625011707, "learning_rate": 1.9998620632862184e-05, "loss": 1.066, "step": 336 }, { "epoch": 0.03526949241234956, "grad_norm": 3.3739671593672447, "learning_rate": 1.9998563759027228e-05, "loss": 0.993, "step": 337 }, { "epoch": 0.03537414965986395, "grad_norm": 3.0009291832792693, "learning_rate": 1.9998505736337022e-05, "loss": 1.0397, "step": 338 }, { "epoch": 0.035478806907378334, "grad_norm": 3.57452005557107, "learning_rate": 1.9998446564798243e-05, "loss": 1.1239, "step": 339 }, { "epoch": 0.035583464154892726, "grad_norm": 3.4334676273713898, "learning_rate": 1.9998386244417686e-05, "loss": 1.059, "step": 340 }, { "epoch": 0.03568812140240712, "grad_norm": 3.4932921996468846, "learning_rate": 1.9998324775202277e-05, "loss": 1.0886, "step": 341 }, { "epoch": 0.03579277864992151, "grad_norm": 3.293255380782077, "learning_rate": 1.9998262157159087e-05, "loss": 0.8386, "step": 342 }, { "epoch": 0.035897435897435895, "grad_norm": 3.7924349792602188, "learning_rate": 1.99981983902953e-05, "loss": 1.1097, "step": 343 }, { "epoch": 0.03600209314495029, "grad_norm": 3.4574803613996297, "learning_rate": 1.9998133474618254e-05, "loss": 1.1045, "step": 344 }, { "epoch": 0.03610675039246468, "grad_norm": 3.2469937969404272, "learning_rate": 1.9998067410135403e-05, "loss": 1.1738, "step": 345 }, { "epoch": 0.03621140763997907, "grad_norm": 1.9876001648747574, "learning_rate": 1.999800019685434e-05, "loss": 0.7593, "step": 346 }, { "epoch": 0.036316064887493456, "grad_norm": 3.247944294880278, "learning_rate": 1.9997931834782783e-05, "loss": 1.0247, "step": 347 }, { "epoch": 0.03642072213500785, "grad_norm": 2.30114299659168, "learning_rate": 1.999786232392859e-05, "loss": 0.7979, "step": 348 }, { "epoch": 0.03652537938252224, "grad_norm": 3.0984066897700964, "learning_rate": 1.999779166429975e-05, "loss": 1.0613, "step": 349 }, { "epoch": 0.03663003663003663, "grad_norm": 4.166217793012304, "learning_rate": 1.999771985590438e-05, "loss": 1.1616, "step": 350 }, { "epoch": 0.036734693877551024, "grad_norm": 3.8538711686650697, "learning_rate": 1.999764689875073e-05, "loss": 1.034, "step": 351 }, { "epoch": 0.03683935112506541, "grad_norm": 3.2661014681676273, "learning_rate": 1.9997572792847186e-05, "loss": 1.0767, "step": 352 }, { "epoch": 0.0369440083725798, "grad_norm": 3.1154642175823195, "learning_rate": 1.999749753820226e-05, "loss": 1.1105, "step": 353 }, { "epoch": 0.03704866562009419, "grad_norm": 2.72464228837991, "learning_rate": 1.99974211348246e-05, "loss": 1.0197, "step": 354 }, { "epoch": 0.037153322867608585, "grad_norm": 4.151004358135479, "learning_rate": 1.9997343582722984e-05, "loss": 1.0986, "step": 355 }, { "epoch": 0.03725798011512297, "grad_norm": 3.2194916123198096, "learning_rate": 1.9997264881906324e-05, "loss": 0.9507, "step": 356 }, { "epoch": 0.03736263736263736, "grad_norm": 3.094166715267168, "learning_rate": 1.9997185032383663e-05, "loss": 1.0726, "step": 357 }, { "epoch": 0.037467294610151754, "grad_norm": 2.858902354576879, "learning_rate": 1.9997104034164176e-05, "loss": 0.9607, "step": 358 }, { "epoch": 0.037571951857666146, "grad_norm": 3.59796942440944, "learning_rate": 1.9997021887257166e-05, "loss": 1.1792, "step": 359 }, { "epoch": 0.03767660910518053, "grad_norm": 3.4966139920154546, "learning_rate": 1.9996938591672076e-05, "loss": 1.1609, "step": 360 }, { "epoch": 0.03778126635269492, "grad_norm": 3.2932872126047164, "learning_rate": 1.9996854147418477e-05, "loss": 1.1843, "step": 361 }, { "epoch": 0.037885923600209315, "grad_norm": 3.1771915837350897, "learning_rate": 1.999676855450607e-05, "loss": 1.1945, "step": 362 }, { "epoch": 0.03799058084772371, "grad_norm": 3.0394535417384465, "learning_rate": 1.999668181294469e-05, "loss": 1.1143, "step": 363 }, { "epoch": 0.0380952380952381, "grad_norm": 2.8537817079806067, "learning_rate": 1.9996593922744308e-05, "loss": 0.9112, "step": 364 }, { "epoch": 0.038199895342752484, "grad_norm": 3.322133711285726, "learning_rate": 1.9996504883915017e-05, "loss": 1.1074, "step": 365 }, { "epoch": 0.038304552590266876, "grad_norm": 3.0632114432772037, "learning_rate": 1.999641469646705e-05, "loss": 1.0545, "step": 366 }, { "epoch": 0.03840920983778127, "grad_norm": 3.7007048473022235, "learning_rate": 1.999632336041077e-05, "loss": 1.174, "step": 367 }, { "epoch": 0.03851386708529566, "grad_norm": 3.2674017560088946, "learning_rate": 1.999623087575667e-05, "loss": 1.0591, "step": 368 }, { "epoch": 0.038618524332810045, "grad_norm": 3.522836020354182, "learning_rate": 1.9996137242515376e-05, "loss": 1.0767, "step": 369 }, { "epoch": 0.03872318158032444, "grad_norm": 3.827590637468791, "learning_rate": 1.9996042460697654e-05, "loss": 1.046, "step": 370 }, { "epoch": 0.03882783882783883, "grad_norm": 3.301858757724581, "learning_rate": 1.9995946530314384e-05, "loss": 1.0542, "step": 371 }, { "epoch": 0.03893249607535322, "grad_norm": 3.119223795376493, "learning_rate": 1.9995849451376593e-05, "loss": 1.1795, "step": 372 }, { "epoch": 0.039037153322867606, "grad_norm": 3.623763948471002, "learning_rate": 1.999575122389544e-05, "loss": 1.0188, "step": 373 }, { "epoch": 0.039141810570382, "grad_norm": 2.947104901430164, "learning_rate": 1.9995651847882208e-05, "loss": 1.0813, "step": 374 }, { "epoch": 0.03924646781789639, "grad_norm": 3.284552361791963, "learning_rate": 1.9995551323348314e-05, "loss": 1.1506, "step": 375 }, { "epoch": 0.03935112506541078, "grad_norm": 3.4207057905425566, "learning_rate": 1.999544965030531e-05, "loss": 1.0482, "step": 376 }, { "epoch": 0.03945578231292517, "grad_norm": 3.9718583768537226, "learning_rate": 1.999534682876488e-05, "loss": 1.107, "step": 377 }, { "epoch": 0.03956043956043956, "grad_norm": 3.4233946505392274, "learning_rate": 1.9995242858738834e-05, "loss": 1.055, "step": 378 }, { "epoch": 0.03966509680795395, "grad_norm": 3.4939287719910905, "learning_rate": 1.999513774023912e-05, "loss": 1.1309, "step": 379 }, { "epoch": 0.03976975405546834, "grad_norm": 2.846888454584744, "learning_rate": 1.9995031473277822e-05, "loss": 1.0401, "step": 380 }, { "epoch": 0.039874411302982735, "grad_norm": 3.4884066840679826, "learning_rate": 1.9994924057867142e-05, "loss": 1.1144, "step": 381 }, { "epoch": 0.03997906855049712, "grad_norm": 3.2953541736196095, "learning_rate": 1.999481549401943e-05, "loss": 1.0356, "step": 382 }, { "epoch": 0.04008372579801151, "grad_norm": 3.8515521747365353, "learning_rate": 1.999470578174715e-05, "loss": 1.0333, "step": 383 }, { "epoch": 0.040188383045525904, "grad_norm": 3.862495676492481, "learning_rate": 1.999459492106292e-05, "loss": 1.0685, "step": 384 }, { "epoch": 0.040293040293040296, "grad_norm": 3.8607869501733596, "learning_rate": 1.999448291197947e-05, "loss": 1.0893, "step": 385 }, { "epoch": 0.04039769754055468, "grad_norm": 3.108316063988891, "learning_rate": 1.999436975450967e-05, "loss": 1.129, "step": 386 }, { "epoch": 0.04050235478806907, "grad_norm": 3.1460718799511724, "learning_rate": 1.9994255448666528e-05, "loss": 1.0922, "step": 387 }, { "epoch": 0.040607012035583465, "grad_norm": 4.815195078193566, "learning_rate": 1.9994139994463174e-05, "loss": 1.0287, "step": 388 }, { "epoch": 0.04071166928309786, "grad_norm": 3.2133489103871464, "learning_rate": 1.9994023391912873e-05, "loss": 0.9636, "step": 389 }, { "epoch": 0.04081632653061224, "grad_norm": 3.0323586032384466, "learning_rate": 1.9993905641029024e-05, "loss": 1.1492, "step": 390 }, { "epoch": 0.040920983778126634, "grad_norm": 3.408180618813306, "learning_rate": 1.999378674182516e-05, "loss": 1.1212, "step": 391 }, { "epoch": 0.041025641025641026, "grad_norm": 2.832485184741635, "learning_rate": 1.9993666694314938e-05, "loss": 1.1055, "step": 392 }, { "epoch": 0.04113029827315542, "grad_norm": 3.3779814563246826, "learning_rate": 1.999354549851215e-05, "loss": 1.0783, "step": 393 }, { "epoch": 0.04123495552066981, "grad_norm": 3.15338999466506, "learning_rate": 1.9993423154430732e-05, "loss": 1.1143, "step": 394 }, { "epoch": 0.041339612768184195, "grad_norm": 3.651309843882222, "learning_rate": 1.9993299662084734e-05, "loss": 1.1113, "step": 395 }, { "epoch": 0.04144427001569859, "grad_norm": 2.5508921394839152, "learning_rate": 1.9993175021488343e-05, "loss": 0.8452, "step": 396 }, { "epoch": 0.04154892726321298, "grad_norm": 3.3357843841110077, "learning_rate": 1.9993049232655882e-05, "loss": 1.1295, "step": 397 }, { "epoch": 0.04165358451072737, "grad_norm": 3.9695559727733483, "learning_rate": 1.999292229560181e-05, "loss": 0.9869, "step": 398 }, { "epoch": 0.041758241758241756, "grad_norm": 3.7523305234520024, "learning_rate": 1.9992794210340707e-05, "loss": 1.1228, "step": 399 }, { "epoch": 0.04186289900575615, "grad_norm": 3.5321821977005503, "learning_rate": 1.999266497688729e-05, "loss": 1.0244, "step": 400 }, { "epoch": 0.04196755625327054, "grad_norm": 3.433702518298409, "learning_rate": 1.9992534595256414e-05, "loss": 1.1635, "step": 401 }, { "epoch": 0.04207221350078493, "grad_norm": 3.3240435185890482, "learning_rate": 1.9992403065463053e-05, "loss": 1.1308, "step": 402 }, { "epoch": 0.04217687074829932, "grad_norm": 2.418715245595869, "learning_rate": 1.9992270387522327e-05, "loss": 0.7399, "step": 403 }, { "epoch": 0.04228152799581371, "grad_norm": 3.4217501748246497, "learning_rate": 1.999213656144947e-05, "loss": 1.0257, "step": 404 }, { "epoch": 0.0423861852433281, "grad_norm": 3.5574880915366642, "learning_rate": 1.9992001587259872e-05, "loss": 1.1526, "step": 405 }, { "epoch": 0.04249084249084249, "grad_norm": 3.439254247799819, "learning_rate": 1.9991865464969035e-05, "loss": 1.0449, "step": 406 }, { "epoch": 0.042595499738356885, "grad_norm": 3.5204318338726273, "learning_rate": 1.99917281945926e-05, "loss": 1.1471, "step": 407 }, { "epoch": 0.04270015698587127, "grad_norm": 3.858873349887819, "learning_rate": 1.9991589776146335e-05, "loss": 1.0934, "step": 408 }, { "epoch": 0.04280481423338566, "grad_norm": 3.362234619886235, "learning_rate": 1.9991450209646153e-05, "loss": 1.1658, "step": 409 }, { "epoch": 0.042909471480900054, "grad_norm": 3.220742237697856, "learning_rate": 1.999130949510809e-05, "loss": 1.1169, "step": 410 }, { "epoch": 0.043014128728414446, "grad_norm": 2.9342623132519536, "learning_rate": 1.999116763254831e-05, "loss": 0.9709, "step": 411 }, { "epoch": 0.04311878597592883, "grad_norm": 3.0789423142360572, "learning_rate": 1.9991024621983115e-05, "loss": 1.0549, "step": 412 }, { "epoch": 0.04322344322344322, "grad_norm": 3.5769673216250717, "learning_rate": 1.9990880463428938e-05, "loss": 1.0587, "step": 413 }, { "epoch": 0.043328100470957615, "grad_norm": 3.387771571360907, "learning_rate": 1.9990735156902337e-05, "loss": 1.1727, "step": 414 }, { "epoch": 0.043432757718472007, "grad_norm": 3.8004457138302237, "learning_rate": 1.9990588702420017e-05, "loss": 1.0929, "step": 415 }, { "epoch": 0.04353741496598639, "grad_norm": 2.9911775987883606, "learning_rate": 1.9990441099998802e-05, "loss": 1.0605, "step": 416 }, { "epoch": 0.043642072213500784, "grad_norm": 3.3225073464157875, "learning_rate": 1.999029234965565e-05, "loss": 1.101, "step": 417 }, { "epoch": 0.043746729461015176, "grad_norm": 3.142201219435067, "learning_rate": 1.9990142451407658e-05, "loss": 1.1804, "step": 418 }, { "epoch": 0.04385138670852957, "grad_norm": 3.2497520075753905, "learning_rate": 1.9989991405272043e-05, "loss": 1.09, "step": 419 }, { "epoch": 0.04395604395604396, "grad_norm": 2.8504351959056646, "learning_rate": 1.9989839211266164e-05, "loss": 1.089, "step": 420 }, { "epoch": 0.044060701203558345, "grad_norm": 3.425903015712905, "learning_rate": 1.998968586940751e-05, "loss": 1.1222, "step": 421 }, { "epoch": 0.044165358451072737, "grad_norm": 3.12857526376575, "learning_rate": 1.9989531379713697e-05, "loss": 1.1248, "step": 422 }, { "epoch": 0.04427001569858713, "grad_norm": 2.753045621554163, "learning_rate": 1.998937574220248e-05, "loss": 1.076, "step": 423 }, { "epoch": 0.04437467294610152, "grad_norm": 3.9313106712974, "learning_rate": 1.998921895689174e-05, "loss": 0.9975, "step": 424 }, { "epoch": 0.044479330193615906, "grad_norm": 3.1879760675576465, "learning_rate": 1.9989061023799486e-05, "loss": 1.0489, "step": 425 }, { "epoch": 0.0445839874411303, "grad_norm": 3.5604479042648447, "learning_rate": 1.9988901942943874e-05, "loss": 1.0179, "step": 426 }, { "epoch": 0.04468864468864469, "grad_norm": 3.4144953084922376, "learning_rate": 1.998874171434318e-05, "loss": 0.9328, "step": 427 }, { "epoch": 0.04479330193615908, "grad_norm": 2.955908397360233, "learning_rate": 1.998858033801581e-05, "loss": 1.1155, "step": 428 }, { "epoch": 0.044897959183673466, "grad_norm": 3.6880350580445254, "learning_rate": 1.9988417813980315e-05, "loss": 1.1311, "step": 429 }, { "epoch": 0.04500261643118786, "grad_norm": 3.3607014595989178, "learning_rate": 1.9988254142255362e-05, "loss": 1.1893, "step": 430 }, { "epoch": 0.04510727367870225, "grad_norm": 2.8727076333580395, "learning_rate": 1.998808932285976e-05, "loss": 0.9684, "step": 431 }, { "epoch": 0.04521193092621664, "grad_norm": 3.439241913580116, "learning_rate": 1.9987923355812448e-05, "loss": 1.0141, "step": 432 }, { "epoch": 0.04531658817373103, "grad_norm": 2.9285693240861783, "learning_rate": 1.9987756241132494e-05, "loss": 1.0593, "step": 433 }, { "epoch": 0.04542124542124542, "grad_norm": 3.670926244435246, "learning_rate": 1.99875879788391e-05, "loss": 1.0491, "step": 434 }, { "epoch": 0.04552590266875981, "grad_norm": 3.006014723154654, "learning_rate": 1.99874185689516e-05, "loss": 1.1722, "step": 435 }, { "epoch": 0.0456305599162742, "grad_norm": 3.7892151003300305, "learning_rate": 1.9987248011489462e-05, "loss": 1.0608, "step": 436 }, { "epoch": 0.045735217163788595, "grad_norm": 3.2332272253735765, "learning_rate": 1.998707630647228e-05, "loss": 1.1638, "step": 437 }, { "epoch": 0.04583987441130298, "grad_norm": 3.480839997662857, "learning_rate": 1.998690345391978e-05, "loss": 1.0832, "step": 438 }, { "epoch": 0.04594453165881737, "grad_norm": 3.8054324863562385, "learning_rate": 1.9986729453851833e-05, "loss": 1.0328, "step": 439 }, { "epoch": 0.046049188906331764, "grad_norm": 3.8488459066381497, "learning_rate": 1.998655430628843e-05, "loss": 0.9417, "step": 440 }, { "epoch": 0.046153846153846156, "grad_norm": 3.1253018165861786, "learning_rate": 1.9986378011249684e-05, "loss": 1.1043, "step": 441 }, { "epoch": 0.04625850340136054, "grad_norm": 3.5195863235640408, "learning_rate": 1.9986200568755863e-05, "loss": 1.1416, "step": 442 }, { "epoch": 0.04636316064887493, "grad_norm": 3.38872021874277, "learning_rate": 1.9986021978827353e-05, "loss": 1.1378, "step": 443 }, { "epoch": 0.046467817896389325, "grad_norm": 3.2198779898823475, "learning_rate": 1.9985842241484678e-05, "loss": 1.1032, "step": 444 }, { "epoch": 0.04657247514390372, "grad_norm": 3.037988731718675, "learning_rate": 1.998566135674848e-05, "loss": 1.0834, "step": 445 }, { "epoch": 0.0466771323914181, "grad_norm": 3.113016605036802, "learning_rate": 1.998547932463955e-05, "loss": 1.0245, "step": 446 }, { "epoch": 0.046781789638932494, "grad_norm": 2.981225578291425, "learning_rate": 1.9985296145178803e-05, "loss": 1.1454, "step": 447 }, { "epoch": 0.046886446886446886, "grad_norm": 3.2756449737549818, "learning_rate": 1.998511181838729e-05, "loss": 1.0624, "step": 448 }, { "epoch": 0.04699110413396128, "grad_norm": 3.2146017721872417, "learning_rate": 1.9984926344286184e-05, "loss": 1.121, "step": 449 }, { "epoch": 0.04709576138147567, "grad_norm": 3.6515983191470642, "learning_rate": 1.99847397228968e-05, "loss": 1.0705, "step": 450 }, { "epoch": 0.047200418628990055, "grad_norm": 2.7925140240267536, "learning_rate": 1.998455195424058e-05, "loss": 1.0412, "step": 451 }, { "epoch": 0.04730507587650445, "grad_norm": 3.2058786484672184, "learning_rate": 1.99843630383391e-05, "loss": 1.0438, "step": 452 }, { "epoch": 0.04740973312401884, "grad_norm": 3.206199705921322, "learning_rate": 1.9984172975214068e-05, "loss": 1.1241, "step": 453 }, { "epoch": 0.04751439037153323, "grad_norm": 2.96917837023507, "learning_rate": 1.998398176488732e-05, "loss": 1.0307, "step": 454 }, { "epoch": 0.047619047619047616, "grad_norm": 2.2142798706812514, "learning_rate": 1.9983789407380828e-05, "loss": 0.7792, "step": 455 }, { "epoch": 0.04772370486656201, "grad_norm": 2.1393293337536754, "learning_rate": 1.9983595902716693e-05, "loss": 0.8014, "step": 456 }, { "epoch": 0.0478283621140764, "grad_norm": 3.088242788316876, "learning_rate": 1.998340125091715e-05, "loss": 1.0111, "step": 457 }, { "epoch": 0.04793301936159079, "grad_norm": 3.001125565502091, "learning_rate": 1.9983205452004566e-05, "loss": 0.994, "step": 458 }, { "epoch": 0.04803767660910518, "grad_norm": 3.783192428283837, "learning_rate": 1.9983008506001437e-05, "loss": 1.1141, "step": 459 }, { "epoch": 0.04814233385661957, "grad_norm": 3.004204145609777, "learning_rate": 1.9982810412930393e-05, "loss": 1.1247, "step": 460 }, { "epoch": 0.04824699110413396, "grad_norm": 3.1685378042245147, "learning_rate": 1.9982611172814197e-05, "loss": 1.1299, "step": 461 }, { "epoch": 0.04835164835164835, "grad_norm": 3.1587611211309907, "learning_rate": 1.9982410785675735e-05, "loss": 1.112, "step": 462 }, { "epoch": 0.048456305599162745, "grad_norm": 3.458436913532201, "learning_rate": 1.9982209251538043e-05, "loss": 1.0351, "step": 463 }, { "epoch": 0.04856096284667713, "grad_norm": 2.969870331031836, "learning_rate": 1.998200657042427e-05, "loss": 1.1774, "step": 464 }, { "epoch": 0.04866562009419152, "grad_norm": 3.763085180450192, "learning_rate": 1.9981802742357704e-05, "loss": 1.0587, "step": 465 }, { "epoch": 0.048770277341705914, "grad_norm": 3.33040533060541, "learning_rate": 1.998159776736177e-05, "loss": 1.0296, "step": 466 }, { "epoch": 0.048874934589220306, "grad_norm": 3.290492374884369, "learning_rate": 1.998139164546002e-05, "loss": 1.2095, "step": 467 }, { "epoch": 0.04897959183673469, "grad_norm": 2.857263563106438, "learning_rate": 1.998118437667613e-05, "loss": 0.9923, "step": 468 }, { "epoch": 0.04908424908424908, "grad_norm": 3.053876121052991, "learning_rate": 1.9980975961033925e-05, "loss": 0.8369, "step": 469 }, { "epoch": 0.049188906331763475, "grad_norm": 3.177534083313426, "learning_rate": 1.998076639855735e-05, "loss": 1.0123, "step": 470 }, { "epoch": 0.04929356357927787, "grad_norm": 4.140885802398993, "learning_rate": 1.998055568927048e-05, "loss": 0.9744, "step": 471 }, { "epoch": 0.04939822082679225, "grad_norm": 2.9041664415140396, "learning_rate": 1.9980343833197528e-05, "loss": 1.0679, "step": 472 }, { "epoch": 0.049502878074306644, "grad_norm": 2.942481635418397, "learning_rate": 1.998013083036284e-05, "loss": 1.099, "step": 473 }, { "epoch": 0.049607535321821036, "grad_norm": 2.9546879658340486, "learning_rate": 1.9979916680790885e-05, "loss": 1.0112, "step": 474 }, { "epoch": 0.04971219256933543, "grad_norm": 3.0965296597879113, "learning_rate": 1.997970138450627e-05, "loss": 1.0086, "step": 475 }, { "epoch": 0.04981684981684982, "grad_norm": 3.855575845463165, "learning_rate": 1.997948494153374e-05, "loss": 1.1754, "step": 476 }, { "epoch": 0.049921507064364205, "grad_norm": 3.993215917358464, "learning_rate": 1.997926735189816e-05, "loss": 1.0746, "step": 477 }, { "epoch": 0.0500261643118786, "grad_norm": 3.0492290153570094, "learning_rate": 1.9979048615624526e-05, "loss": 1.1768, "step": 478 }, { "epoch": 0.05013082155939299, "grad_norm": 3.6838636144975068, "learning_rate": 1.997882873273798e-05, "loss": 1.0034, "step": 479 }, { "epoch": 0.05023547880690738, "grad_norm": 3.4794216494384926, "learning_rate": 1.9978607703263783e-05, "loss": 1.0218, "step": 480 }, { "epoch": 0.050340136054421766, "grad_norm": 3.241477320849195, "learning_rate": 1.9978385527227334e-05, "loss": 1.0337, "step": 481 }, { "epoch": 0.05044479330193616, "grad_norm": 3.6191060732119857, "learning_rate": 1.9978162204654155e-05, "loss": 1.1394, "step": 482 }, { "epoch": 0.05054945054945055, "grad_norm": 2.8341172658181253, "learning_rate": 1.9977937735569915e-05, "loss": 1.0184, "step": 483 }, { "epoch": 0.05065410779696494, "grad_norm": 3.16341632485204, "learning_rate": 1.99777121200004e-05, "loss": 1.0631, "step": 484 }, { "epoch": 0.05075876504447933, "grad_norm": 3.015431914842525, "learning_rate": 1.9977485357971535e-05, "loss": 1.2249, "step": 485 }, { "epoch": 0.05086342229199372, "grad_norm": 15.10309744378473, "learning_rate": 1.997725744950938e-05, "loss": 1.0752, "step": 486 }, { "epoch": 0.05096807953950811, "grad_norm": 3.0329623555885776, "learning_rate": 1.9977028394640113e-05, "loss": 1.1592, "step": 487 }, { "epoch": 0.0510727367870225, "grad_norm": 2.993215052911289, "learning_rate": 1.997679819339006e-05, "loss": 1.1015, "step": 488 }, { "epoch": 0.05117739403453689, "grad_norm": 3.028558942288711, "learning_rate": 1.997656684578567e-05, "loss": 1.0943, "step": 489 }, { "epoch": 0.05128205128205128, "grad_norm": 2.6575824106168677, "learning_rate": 1.9976334351853522e-05, "loss": 1.1031, "step": 490 }, { "epoch": 0.05138670852956567, "grad_norm": 3.262048948295798, "learning_rate": 1.997610071162033e-05, "loss": 1.0627, "step": 491 }, { "epoch": 0.051491365777080064, "grad_norm": 2.942164825678646, "learning_rate": 1.997586592511295e-05, "loss": 1.1196, "step": 492 }, { "epoch": 0.051596023024594456, "grad_norm": 3.143793552468755, "learning_rate": 1.997562999235835e-05, "loss": 1.1187, "step": 493 }, { "epoch": 0.05170068027210884, "grad_norm": 3.6555595068602846, "learning_rate": 1.997539291338364e-05, "loss": 0.9745, "step": 494 }, { "epoch": 0.05180533751962323, "grad_norm": 3.3509907787982494, "learning_rate": 1.9975154688216064e-05, "loss": 0.9672, "step": 495 }, { "epoch": 0.051909994767137625, "grad_norm": 3.446789550285205, "learning_rate": 1.997491531688299e-05, "loss": 1.0936, "step": 496 }, { "epoch": 0.05201465201465202, "grad_norm": 3.215446069445594, "learning_rate": 1.9974674799411927e-05, "loss": 1.1857, "step": 497 }, { "epoch": 0.0521193092621664, "grad_norm": 3.694397906430922, "learning_rate": 1.9974433135830505e-05, "loss": 1.0549, "step": 498 }, { "epoch": 0.052223966509680794, "grad_norm": 3.5507510319349462, "learning_rate": 1.9974190326166498e-05, "loss": 1.0954, "step": 499 }, { "epoch": 0.052328623757195186, "grad_norm": 2.911759573200471, "learning_rate": 1.9973946370447804e-05, "loss": 1.1315, "step": 500 }, { "epoch": 0.05243328100470958, "grad_norm": 3.830707804003234, "learning_rate": 1.9973701268702454e-05, "loss": 1.1961, "step": 501 }, { "epoch": 0.05253793825222396, "grad_norm": 3.5874003170282953, "learning_rate": 1.9973455020958602e-05, "loss": 1.1831, "step": 502 }, { "epoch": 0.052642595499738355, "grad_norm": 5.067523741996903, "learning_rate": 1.9973207627244556e-05, "loss": 1.06, "step": 503 }, { "epoch": 0.05274725274725275, "grad_norm": 3.126368066369804, "learning_rate": 1.9972959087588734e-05, "loss": 0.7553, "step": 504 }, { "epoch": 0.05285190999476714, "grad_norm": 3.7926866203618923, "learning_rate": 1.9972709402019696e-05, "loss": 1.1616, "step": 505 }, { "epoch": 0.05295656724228153, "grad_norm": 3.098260850078986, "learning_rate": 1.9972458570566134e-05, "loss": 1.0706, "step": 506 }, { "epoch": 0.053061224489795916, "grad_norm": 3.121342346390446, "learning_rate": 1.9972206593256863e-05, "loss": 1.0746, "step": 507 }, { "epoch": 0.05316588173731031, "grad_norm": 2.3263594607795484, "learning_rate": 1.997195347012084e-05, "loss": 0.8619, "step": 508 }, { "epoch": 0.0532705389848247, "grad_norm": 4.313521927576008, "learning_rate": 1.997169920118714e-05, "loss": 0.9864, "step": 509 }, { "epoch": 0.05337519623233909, "grad_norm": 2.8736994783195966, "learning_rate": 1.9971443786485e-05, "loss": 1.1794, "step": 510 }, { "epoch": 0.05347985347985348, "grad_norm": 2.67925787298318, "learning_rate": 1.9971187226043746e-05, "loss": 0.92, "step": 511 }, { "epoch": 0.05358451072736787, "grad_norm": 2.7647374456372544, "learning_rate": 1.997092951989287e-05, "loss": 1.1168, "step": 512 }, { "epoch": 0.05368916797488226, "grad_norm": 3.207257190593513, "learning_rate": 1.9970670668061977e-05, "loss": 1.182, "step": 513 }, { "epoch": 0.05379382522239665, "grad_norm": 2.9763787397307384, "learning_rate": 1.997041067058081e-05, "loss": 1.0417, "step": 514 }, { "epoch": 0.05389848246991104, "grad_norm": 3.3432997678318634, "learning_rate": 1.997014952747925e-05, "loss": 1.051, "step": 515 }, { "epoch": 0.05400313971742543, "grad_norm": 2.789610544658323, "learning_rate": 1.996988723878729e-05, "loss": 1.1173, "step": 516 }, { "epoch": 0.05410779696493982, "grad_norm": 3.7956635314776, "learning_rate": 1.9969623804535084e-05, "loss": 1.0319, "step": 517 }, { "epoch": 0.054212454212454214, "grad_norm": 3.0432435518135597, "learning_rate": 1.9969359224752884e-05, "loss": 1.0832, "step": 518 }, { "epoch": 0.054317111459968606, "grad_norm": 3.060370744438756, "learning_rate": 1.9969093499471106e-05, "loss": 1.0148, "step": 519 }, { "epoch": 0.05442176870748299, "grad_norm": 3.0599503263592265, "learning_rate": 1.996882662872027e-05, "loss": 0.9405, "step": 520 }, { "epoch": 0.05452642595499738, "grad_norm": 2.8659069744524346, "learning_rate": 1.996855861253105e-05, "loss": 1.0298, "step": 521 }, { "epoch": 0.054631083202511775, "grad_norm": 3.698825550046783, "learning_rate": 1.9968289450934235e-05, "loss": 1.163, "step": 522 }, { "epoch": 0.05473574045002617, "grad_norm": 3.0714368145204616, "learning_rate": 1.9968019143960755e-05, "loss": 1.0566, "step": 523 }, { "epoch": 0.05484039769754055, "grad_norm": 2.9908123101769912, "learning_rate": 1.9967747691641667e-05, "loss": 1.1947, "step": 524 }, { "epoch": 0.054945054945054944, "grad_norm": 3.455634138607162, "learning_rate": 1.996747509400816e-05, "loss": 1.1672, "step": 525 }, { "epoch": 0.055049712192569336, "grad_norm": 2.910243752165646, "learning_rate": 1.9967201351091562e-05, "loss": 1.1195, "step": 526 }, { "epoch": 0.05515436944008373, "grad_norm": 2.779316157067133, "learning_rate": 1.9966926462923324e-05, "loss": 0.8195, "step": 527 }, { "epoch": 0.05525902668759811, "grad_norm": 3.1584634955840154, "learning_rate": 1.996665042953503e-05, "loss": 1.1175, "step": 528 }, { "epoch": 0.055363683935112505, "grad_norm": 2.6746711631574938, "learning_rate": 1.9966373250958395e-05, "loss": 0.9986, "step": 529 }, { "epoch": 0.0554683411826269, "grad_norm": 2.956338657336674, "learning_rate": 1.9966094927225272e-05, "loss": 1.0119, "step": 530 }, { "epoch": 0.05557299843014129, "grad_norm": 3.287242958068802, "learning_rate": 1.9965815458367633e-05, "loss": 1.0788, "step": 531 }, { "epoch": 0.05567765567765568, "grad_norm": 2.8717406338940283, "learning_rate": 1.99655348444176e-05, "loss": 1.1331, "step": 532 }, { "epoch": 0.055782312925170066, "grad_norm": 3.1316748048585477, "learning_rate": 1.996525308540741e-05, "loss": 1.0685, "step": 533 }, { "epoch": 0.05588697017268446, "grad_norm": 3.1662562257885165, "learning_rate": 1.996497018136944e-05, "loss": 1.1702, "step": 534 }, { "epoch": 0.05599162742019885, "grad_norm": 2.9159713047319022, "learning_rate": 1.9964686132336193e-05, "loss": 1.1428, "step": 535 }, { "epoch": 0.05609628466771324, "grad_norm": 2.9197107556733863, "learning_rate": 1.9964400938340312e-05, "loss": 1.2099, "step": 536 }, { "epoch": 0.05620094191522763, "grad_norm": 3.103396125023537, "learning_rate": 1.996411459941456e-05, "loss": 1.0532, "step": 537 }, { "epoch": 0.05630559916274202, "grad_norm": 2.9755305851979985, "learning_rate": 1.9963827115591843e-05, "loss": 1.1459, "step": 538 }, { "epoch": 0.05641025641025641, "grad_norm": 3.1958262641616835, "learning_rate": 1.996353848690519e-05, "loss": 1.1823, "step": 539 }, { "epoch": 0.0565149136577708, "grad_norm": 3.044198764425719, "learning_rate": 1.9963248713387767e-05, "loss": 0.9377, "step": 540 }, { "epoch": 0.05661957090528519, "grad_norm": 2.7930578822333825, "learning_rate": 1.9962957795072874e-05, "loss": 0.9952, "step": 541 }, { "epoch": 0.05672422815279958, "grad_norm": 2.9967717314841598, "learning_rate": 1.9962665731993925e-05, "loss": 1.1515, "step": 542 }, { "epoch": 0.05682888540031397, "grad_norm": 3.0834478594381025, "learning_rate": 1.9962372524184493e-05, "loss": 1.1046, "step": 543 }, { "epoch": 0.056933542647828364, "grad_norm": 3.0669052108361456, "learning_rate": 1.996207817167826e-05, "loss": 1.1256, "step": 544 }, { "epoch": 0.05703819989534275, "grad_norm": 2.9144654203085496, "learning_rate": 1.996178267450905e-05, "loss": 1.0783, "step": 545 }, { "epoch": 0.05714285714285714, "grad_norm": 2.9822664939930537, "learning_rate": 1.9961486032710813e-05, "loss": 1.1593, "step": 546 }, { "epoch": 0.05724751439037153, "grad_norm": 3.1486295364529227, "learning_rate": 1.9961188246317644e-05, "loss": 1.0789, "step": 547 }, { "epoch": 0.057352171637885925, "grad_norm": 2.7926732727072054, "learning_rate": 1.9960889315363747e-05, "loss": 1.1259, "step": 548 }, { "epoch": 0.05745682888540032, "grad_norm": 3.057281352342737, "learning_rate": 1.9960589239883474e-05, "loss": 1.1181, "step": 549 }, { "epoch": 0.0575614861329147, "grad_norm": 3.254756100588196, "learning_rate": 1.996028801991131e-05, "loss": 1.047, "step": 550 }, { "epoch": 0.057666143380429094, "grad_norm": 2.770630465752798, "learning_rate": 1.9959985655481855e-05, "loss": 1.053, "step": 551 }, { "epoch": 0.057770800627943486, "grad_norm": 2.816610825508362, "learning_rate": 1.9959682146629862e-05, "loss": 1.1418, "step": 552 }, { "epoch": 0.05787545787545788, "grad_norm": 2.0688323215063646, "learning_rate": 1.9959377493390198e-05, "loss": 0.8396, "step": 553 }, { "epoch": 0.05798011512297226, "grad_norm": 2.1085687907562662, "learning_rate": 1.995907169579787e-05, "loss": 0.793, "step": 554 }, { "epoch": 0.058084772370486655, "grad_norm": 3.06254545731126, "learning_rate": 1.9958764753888015e-05, "loss": 1.1242, "step": 555 }, { "epoch": 0.05818942961800105, "grad_norm": 2.75776805013571, "learning_rate": 1.99584566676959e-05, "loss": 0.9819, "step": 556 }, { "epoch": 0.05829408686551544, "grad_norm": 3.275437113764389, "learning_rate": 1.995814743725693e-05, "loss": 1.1104, "step": 557 }, { "epoch": 0.058398744113029824, "grad_norm": 3.203542046928147, "learning_rate": 1.995783706260663e-05, "loss": 1.0711, "step": 558 }, { "epoch": 0.058503401360544216, "grad_norm": 3.3467386858572934, "learning_rate": 1.9957525543780663e-05, "loss": 1.1925, "step": 559 }, { "epoch": 0.05860805860805861, "grad_norm": 2.855041752035653, "learning_rate": 1.9957212880814826e-05, "loss": 0.8485, "step": 560 }, { "epoch": 0.058712715855573, "grad_norm": 3.4035834251768855, "learning_rate": 1.9956899073745046e-05, "loss": 0.9867, "step": 561 }, { "epoch": 0.05881737310308739, "grad_norm": 2.891412935735807, "learning_rate": 1.9956584122607373e-05, "loss": 1.1266, "step": 562 }, { "epoch": 0.05892203035060178, "grad_norm": 2.741554961840439, "learning_rate": 1.9956268027438006e-05, "loss": 1.1316, "step": 563 }, { "epoch": 0.05902668759811617, "grad_norm": 2.629765377645471, "learning_rate": 1.9955950788273255e-05, "loss": 0.9629, "step": 564 }, { "epoch": 0.05913134484563056, "grad_norm": 3.303903521460224, "learning_rate": 1.9955632405149577e-05, "loss": 1.1504, "step": 565 }, { "epoch": 0.05923600209314495, "grad_norm": 2.4920423274313244, "learning_rate": 1.9955312878103554e-05, "loss": 1.1054, "step": 566 }, { "epoch": 0.05934065934065934, "grad_norm": 2.4458587609353613, "learning_rate": 1.9954992207171898e-05, "loss": 0.8266, "step": 567 }, { "epoch": 0.05944531658817373, "grad_norm": 3.0320812785910274, "learning_rate": 1.995467039239146e-05, "loss": 1.0863, "step": 568 }, { "epoch": 0.05954997383568812, "grad_norm": 3.1180464676532025, "learning_rate": 1.995434743379921e-05, "loss": 1.0776, "step": 569 }, { "epoch": 0.059654631083202514, "grad_norm": 2.0897952402688795, "learning_rate": 1.995402333143226e-05, "loss": 0.8537, "step": 570 }, { "epoch": 0.0597592883307169, "grad_norm": 3.3450036899077102, "learning_rate": 1.9953698085327856e-05, "loss": 1.0668, "step": 571 }, { "epoch": 0.05986394557823129, "grad_norm": 3.719131888220057, "learning_rate": 1.995337169552336e-05, "loss": 1.1326, "step": 572 }, { "epoch": 0.05996860282574568, "grad_norm": 3.887030150352439, "learning_rate": 1.9953044162056275e-05, "loss": 1.0363, "step": 573 }, { "epoch": 0.060073260073260075, "grad_norm": 3.301925941503779, "learning_rate": 1.9952715484964242e-05, "loss": 1.1164, "step": 574 }, { "epoch": 0.06017791732077447, "grad_norm": 3.126271535901025, "learning_rate": 1.9952385664285024e-05, "loss": 0.9704, "step": 575 }, { "epoch": 0.06028257456828885, "grad_norm": 3.0038883887884067, "learning_rate": 1.9952054700056518e-05, "loss": 1.0455, "step": 576 }, { "epoch": 0.060387231815803244, "grad_norm": 3.0353412809857003, "learning_rate": 1.995172259231675e-05, "loss": 1.126, "step": 577 }, { "epoch": 0.060491889063317636, "grad_norm": 3.041115370144133, "learning_rate": 1.9951389341103885e-05, "loss": 1.1008, "step": 578 }, { "epoch": 0.06059654631083203, "grad_norm": 3.577476624319239, "learning_rate": 1.995105494645621e-05, "loss": 1.157, "step": 579 }, { "epoch": 0.06070120355834641, "grad_norm": 3.1702638194747808, "learning_rate": 1.9950719408412145e-05, "loss": 1.0744, "step": 580 }, { "epoch": 0.060805860805860805, "grad_norm": 2.6999043263976747, "learning_rate": 1.9950382727010254e-05, "loss": 1.0654, "step": 581 }, { "epoch": 0.0609105180533752, "grad_norm": 2.5781649995482216, "learning_rate": 1.9950044902289214e-05, "loss": 0.7968, "step": 582 }, { "epoch": 0.06101517530088959, "grad_norm": 2.8874420445573032, "learning_rate": 1.994970593428784e-05, "loss": 1.1324, "step": 583 }, { "epoch": 0.061119832548403974, "grad_norm": 3.5373274248424806, "learning_rate": 1.994936582304509e-05, "loss": 1.0826, "step": 584 }, { "epoch": 0.061224489795918366, "grad_norm": 3.2429438002106203, "learning_rate": 1.994902456860003e-05, "loss": 1.0991, "step": 585 }, { "epoch": 0.06132914704343276, "grad_norm": 2.83907357900198, "learning_rate": 1.9948682170991884e-05, "loss": 1.0165, "step": 586 }, { "epoch": 0.06143380429094715, "grad_norm": 2.756315965811872, "learning_rate": 1.9948338630259988e-05, "loss": 1.1076, "step": 587 }, { "epoch": 0.06153846153846154, "grad_norm": 3.0042678927369333, "learning_rate": 1.9947993946443814e-05, "loss": 1.1311, "step": 588 }, { "epoch": 0.06164311878597593, "grad_norm": 2.6664016639707975, "learning_rate": 1.9947648119582973e-05, "loss": 1.0378, "step": 589 }, { "epoch": 0.06174777603349032, "grad_norm": 3.1499853449871784, "learning_rate": 1.9947301149717194e-05, "loss": 1.0647, "step": 590 }, { "epoch": 0.06185243328100471, "grad_norm": 3.2629382002059293, "learning_rate": 1.9946953036886346e-05, "loss": 1.0228, "step": 591 }, { "epoch": 0.0619570905285191, "grad_norm": 3.129418827715554, "learning_rate": 1.994660378113043e-05, "loss": 1.1271, "step": 592 }, { "epoch": 0.06206174777603349, "grad_norm": 2.9554401950010414, "learning_rate": 1.994625338248958e-05, "loss": 1.1953, "step": 593 }, { "epoch": 0.06216640502354788, "grad_norm": 3.2598690152125314, "learning_rate": 1.994590184100405e-05, "loss": 1.1584, "step": 594 }, { "epoch": 0.06227106227106227, "grad_norm": 2.437511527082557, "learning_rate": 1.9945549156714236e-05, "loss": 0.8374, "step": 595 }, { "epoch": 0.062375719518576664, "grad_norm": 2.6096105594208985, "learning_rate": 1.994519532966066e-05, "loss": 1.1617, "step": 596 }, { "epoch": 0.06248037676609105, "grad_norm": 3.421662115903477, "learning_rate": 1.9944840359883983e-05, "loss": 1.1564, "step": 597 }, { "epoch": 0.06258503401360545, "grad_norm": 1.9096028175870379, "learning_rate": 1.9944484247424986e-05, "loss": 0.7336, "step": 598 }, { "epoch": 0.06268969126111983, "grad_norm": 2.8077084469651816, "learning_rate": 1.9944126992324592e-05, "loss": 1.1911, "step": 599 }, { "epoch": 0.06279434850863422, "grad_norm": 2.8556439666255695, "learning_rate": 1.9943768594623844e-05, "loss": 1.0634, "step": 600 }, { "epoch": 0.06289900575614861, "grad_norm": 3.117743097234504, "learning_rate": 1.994340905436393e-05, "loss": 1.1191, "step": 601 }, { "epoch": 0.063003663003663, "grad_norm": 3.1601015532368866, "learning_rate": 1.9943048371586158e-05, "loss": 0.9725, "step": 602 }, { "epoch": 0.0631083202511774, "grad_norm": 2.208608976213274, "learning_rate": 1.994268654633197e-05, "loss": 0.7586, "step": 603 }, { "epoch": 0.06321297749869179, "grad_norm": 2.1158633727476026, "learning_rate": 1.994232357864294e-05, "loss": 0.7692, "step": 604 }, { "epoch": 0.06331763474620618, "grad_norm": 2.9413326628625205, "learning_rate": 1.9941959468560782e-05, "loss": 1.1525, "step": 605 }, { "epoch": 0.06342229199372057, "grad_norm": 2.8204049381108742, "learning_rate": 1.9941594216127323e-05, "loss": 0.872, "step": 606 }, { "epoch": 0.06352694924123496, "grad_norm": 3.190529808813044, "learning_rate": 1.994122782138453e-05, "loss": 1.1846, "step": 607 }, { "epoch": 0.06363160648874934, "grad_norm": 3.08211718908032, "learning_rate": 1.9940860284374515e-05, "loss": 0.9465, "step": 608 }, { "epoch": 0.06373626373626373, "grad_norm": 3.0616588635006123, "learning_rate": 1.99404916051395e-05, "loss": 0.9674, "step": 609 }, { "epoch": 0.06384092098377812, "grad_norm": 2.9537487271769116, "learning_rate": 1.9940121783721846e-05, "loss": 1.1373, "step": 610 }, { "epoch": 0.06394557823129252, "grad_norm": 2.9668339388811407, "learning_rate": 1.9939750820164044e-05, "loss": 1.0418, "step": 611 }, { "epoch": 0.06405023547880691, "grad_norm": 2.8848768351239658, "learning_rate": 1.9939378714508728e-05, "loss": 0.9834, "step": 612 }, { "epoch": 0.0641548927263213, "grad_norm": 2.759886281009913, "learning_rate": 1.9939005466798648e-05, "loss": 1.0321, "step": 613 }, { "epoch": 0.06425954997383569, "grad_norm": 3.0129757609858245, "learning_rate": 1.9938631077076692e-05, "loss": 1.1952, "step": 614 }, { "epoch": 0.06436420722135008, "grad_norm": 2.573835926627507, "learning_rate": 1.9938255545385875e-05, "loss": 0.9685, "step": 615 }, { "epoch": 0.06446886446886448, "grad_norm": 3.453838047274854, "learning_rate": 1.993787887176935e-05, "loss": 1.129, "step": 616 }, { "epoch": 0.06457352171637885, "grad_norm": 3.1074759117744137, "learning_rate": 1.9937501056270397e-05, "loss": 1.0101, "step": 617 }, { "epoch": 0.06467817896389325, "grad_norm": 2.755027947394353, "learning_rate": 1.9937122098932428e-05, "loss": 0.9722, "step": 618 }, { "epoch": 0.06478283621140764, "grad_norm": 2.9013696059112224, "learning_rate": 1.9936741999798985e-05, "loss": 1.1728, "step": 619 }, { "epoch": 0.06488749345892203, "grad_norm": 3.1607523426322586, "learning_rate": 1.993636075891374e-05, "loss": 0.895, "step": 620 }, { "epoch": 0.06499215070643642, "grad_norm": 3.1382276779001597, "learning_rate": 1.9935978376320503e-05, "loss": 1.1031, "step": 621 }, { "epoch": 0.06509680795395081, "grad_norm": 2.52595770004853, "learning_rate": 1.9935594852063208e-05, "loss": 0.938, "step": 622 }, { "epoch": 0.0652014652014652, "grad_norm": 2.650381943162927, "learning_rate": 1.993521018618592e-05, "loss": 1.0193, "step": 623 }, { "epoch": 0.0653061224489796, "grad_norm": 2.651852123933312, "learning_rate": 1.9934824378732846e-05, "loss": 1.0853, "step": 624 }, { "epoch": 0.06541077969649398, "grad_norm": 3.306163119634711, "learning_rate": 1.9934437429748306e-05, "loss": 1.154, "step": 625 }, { "epoch": 0.06551543694400837, "grad_norm": 2.784487003175816, "learning_rate": 1.993404933927677e-05, "loss": 1.0449, "step": 626 }, { "epoch": 0.06562009419152276, "grad_norm": 2.759785634839251, "learning_rate": 1.9933660107362825e-05, "loss": 1.0753, "step": 627 }, { "epoch": 0.06572475143903715, "grad_norm": 2.731464057889386, "learning_rate": 1.9933269734051196e-05, "loss": 0.9269, "step": 628 }, { "epoch": 0.06582940868655154, "grad_norm": 2.837489067099457, "learning_rate": 1.9932878219386738e-05, "loss": 1.0685, "step": 629 }, { "epoch": 0.06593406593406594, "grad_norm": 3.017220693367057, "learning_rate": 1.9932485563414436e-05, "loss": 1.1035, "step": 630 }, { "epoch": 0.06603872318158033, "grad_norm": 3.3205197744653194, "learning_rate": 1.9932091766179408e-05, "loss": 1.1205, "step": 631 }, { "epoch": 0.06614338042909472, "grad_norm": 2.891684092310922, "learning_rate": 1.99316968277269e-05, "loss": 1.1477, "step": 632 }, { "epoch": 0.06624803767660911, "grad_norm": 2.9884409256785447, "learning_rate": 1.9931300748102294e-05, "loss": 1.1761, "step": 633 }, { "epoch": 0.06635269492412349, "grad_norm": 2.536315479596216, "learning_rate": 1.99309035273511e-05, "loss": 1.0411, "step": 634 }, { "epoch": 0.06645735217163788, "grad_norm": 2.9476335588680365, "learning_rate": 1.9930505165518958e-05, "loss": 1.1562, "step": 635 }, { "epoch": 0.06656200941915227, "grad_norm": 3.0500527548211704, "learning_rate": 1.993010566265164e-05, "loss": 1.1814, "step": 636 }, { "epoch": 0.06666666666666667, "grad_norm": 2.6707063897105905, "learning_rate": 1.9929705018795055e-05, "loss": 1.0591, "step": 637 }, { "epoch": 0.06677132391418106, "grad_norm": 2.618480485695442, "learning_rate": 1.9929303233995227e-05, "loss": 1.0465, "step": 638 }, { "epoch": 0.06687598116169545, "grad_norm": 2.66882534809794, "learning_rate": 1.9928900308298334e-05, "loss": 1.1071, "step": 639 }, { "epoch": 0.06698063840920984, "grad_norm": 2.9292541219026496, "learning_rate": 1.9928496241750667e-05, "loss": 1.1123, "step": 640 }, { "epoch": 0.06708529565672423, "grad_norm": 2.7357462801221546, "learning_rate": 1.992809103439865e-05, "loss": 1.132, "step": 641 }, { "epoch": 0.06718995290423861, "grad_norm": 3.003236810512024, "learning_rate": 1.9927684686288854e-05, "loss": 1.1636, "step": 642 }, { "epoch": 0.067294610151753, "grad_norm": 3.313716591599714, "learning_rate": 1.992727719746796e-05, "loss": 1.0848, "step": 643 }, { "epoch": 0.0673992673992674, "grad_norm": 3.0324704855965163, "learning_rate": 1.9926868567982788e-05, "loss": 1.177, "step": 644 }, { "epoch": 0.06750392464678179, "grad_norm": 3.172131349022228, "learning_rate": 1.9926458797880294e-05, "loss": 0.8632, "step": 645 }, { "epoch": 0.06760858189429618, "grad_norm": 3.0648739329108077, "learning_rate": 1.9926047887207565e-05, "loss": 1.0914, "step": 646 }, { "epoch": 0.06771323914181057, "grad_norm": 2.4156718664411696, "learning_rate": 1.992563583601181e-05, "loss": 0.947, "step": 647 }, { "epoch": 0.06781789638932496, "grad_norm": 2.7382356031716646, "learning_rate": 1.9925222644340372e-05, "loss": 1.0838, "step": 648 }, { "epoch": 0.06792255363683936, "grad_norm": 2.9469770945822047, "learning_rate": 1.9924808312240737e-05, "loss": 1.1064, "step": 649 }, { "epoch": 0.06802721088435375, "grad_norm": 3.1261406037905513, "learning_rate": 1.9924392839760505e-05, "loss": 1.1091, "step": 650 }, { "epoch": 0.06813186813186813, "grad_norm": 3.1877565587473575, "learning_rate": 1.9923976226947417e-05, "loss": 1.1858, "step": 651 }, { "epoch": 0.06823652537938252, "grad_norm": 2.8216280798542885, "learning_rate": 1.992355847384934e-05, "loss": 1.0689, "step": 652 }, { "epoch": 0.06834118262689691, "grad_norm": 2.9795946286363875, "learning_rate": 1.9923139580514284e-05, "loss": 1.0729, "step": 653 }, { "epoch": 0.0684458398744113, "grad_norm": 2.5206528985759324, "learning_rate": 1.9922719546990367e-05, "loss": 0.8577, "step": 654 }, { "epoch": 0.0685504971219257, "grad_norm": 2.9454318451318455, "learning_rate": 1.992229837332586e-05, "loss": 0.8568, "step": 655 }, { "epoch": 0.06865515436944009, "grad_norm": 2.774719223512651, "learning_rate": 1.9921876059569158e-05, "loss": 0.9953, "step": 656 }, { "epoch": 0.06875981161695448, "grad_norm": 3.19787444851232, "learning_rate": 1.9921452605768783e-05, "loss": 1.0818, "step": 657 }, { "epoch": 0.06886446886446887, "grad_norm": 3.4049791357976105, "learning_rate": 1.9921028011973388e-05, "loss": 1.1536, "step": 658 }, { "epoch": 0.06896912611198326, "grad_norm": 2.557786514794364, "learning_rate": 1.9920602278231765e-05, "loss": 0.986, "step": 659 }, { "epoch": 0.06907378335949764, "grad_norm": 3.1241150572729808, "learning_rate": 1.9920175404592827e-05, "loss": 0.9868, "step": 660 }, { "epoch": 0.06917844060701203, "grad_norm": 3.2992236992022788, "learning_rate": 1.9919747391105626e-05, "loss": 1.0701, "step": 661 }, { "epoch": 0.06928309785452642, "grad_norm": 3.4812568274144655, "learning_rate": 1.991931823781934e-05, "loss": 0.9236, "step": 662 }, { "epoch": 0.06938775510204082, "grad_norm": 2.7811990200114276, "learning_rate": 1.991888794478328e-05, "loss": 1.0129, "step": 663 }, { "epoch": 0.06949241234955521, "grad_norm": 3.0685242353855267, "learning_rate": 1.9918456512046886e-05, "loss": 1.1438, "step": 664 }, { "epoch": 0.0695970695970696, "grad_norm": 2.6809973552138824, "learning_rate": 1.9918023939659735e-05, "loss": 1.0722, "step": 665 }, { "epoch": 0.06970172684458399, "grad_norm": 2.5126108916830545, "learning_rate": 1.9917590227671523e-05, "loss": 0.8222, "step": 666 }, { "epoch": 0.06980638409209838, "grad_norm": 3.1637408972668135, "learning_rate": 1.9917155376132092e-05, "loss": 1.0751, "step": 667 }, { "epoch": 0.06991104133961276, "grad_norm": 2.7641342208213695, "learning_rate": 1.9916719385091404e-05, "loss": 0.9868, "step": 668 }, { "epoch": 0.07001569858712715, "grad_norm": 3.369758095168238, "learning_rate": 1.9916282254599556e-05, "loss": 1.073, "step": 669 }, { "epoch": 0.07012035583464155, "grad_norm": 2.9489363996307576, "learning_rate": 1.9915843984706773e-05, "loss": 1.0683, "step": 670 }, { "epoch": 0.07022501308215594, "grad_norm": 3.103782003060308, "learning_rate": 1.9915404575463414e-05, "loss": 1.0495, "step": 671 }, { "epoch": 0.07032967032967033, "grad_norm": 3.442901028729499, "learning_rate": 1.991496402691997e-05, "loss": 1.0754, "step": 672 }, { "epoch": 0.07043432757718472, "grad_norm": 2.861095313005916, "learning_rate": 1.991452233912706e-05, "loss": 1.0522, "step": 673 }, { "epoch": 0.07053898482469911, "grad_norm": 2.9472040327867144, "learning_rate": 1.9914079512135434e-05, "loss": 1.1586, "step": 674 }, { "epoch": 0.0706436420722135, "grad_norm": 3.1812368240216657, "learning_rate": 1.9913635545995972e-05, "loss": 1.1145, "step": 675 }, { "epoch": 0.0707482993197279, "grad_norm": 2.9277804198588666, "learning_rate": 1.9913190440759693e-05, "loss": 1.0576, "step": 676 }, { "epoch": 0.07085295656724228, "grad_norm": 3.271562835349517, "learning_rate": 1.9912744196477732e-05, "loss": 0.9771, "step": 677 }, { "epoch": 0.07095761381475667, "grad_norm": 3.0718494789605604, "learning_rate": 1.9912296813201372e-05, "loss": 1.0435, "step": 678 }, { "epoch": 0.07106227106227106, "grad_norm": 2.1086283397641576, "learning_rate": 1.991184829098201e-05, "loss": 0.8067, "step": 679 }, { "epoch": 0.07116692830978545, "grad_norm": 2.0279982104266643, "learning_rate": 1.9911398629871187e-05, "loss": 0.6897, "step": 680 }, { "epoch": 0.07127158555729984, "grad_norm": 2.7850815226838184, "learning_rate": 1.991094782992057e-05, "loss": 1.0727, "step": 681 }, { "epoch": 0.07137624280481424, "grad_norm": 2.7710574348613313, "learning_rate": 1.9910495891181956e-05, "loss": 1.1439, "step": 682 }, { "epoch": 0.07148090005232863, "grad_norm": 2.970435833694052, "learning_rate": 1.9910042813707272e-05, "loss": 1.1369, "step": 683 }, { "epoch": 0.07158555729984302, "grad_norm": 2.618973989205177, "learning_rate": 1.990958859754858e-05, "loss": 1.0571, "step": 684 }, { "epoch": 0.0716902145473574, "grad_norm": 3.5278999615831323, "learning_rate": 1.9909133242758066e-05, "loss": 1.177, "step": 685 }, { "epoch": 0.07179487179487179, "grad_norm": 3.266029063508325, "learning_rate": 1.990867674938806e-05, "loss": 0.9545, "step": 686 }, { "epoch": 0.07189952904238618, "grad_norm": 3.0659580296418856, "learning_rate": 1.9908219117491004e-05, "loss": 1.0684, "step": 687 }, { "epoch": 0.07200418628990057, "grad_norm": 3.164931997476543, "learning_rate": 1.9907760347119485e-05, "loss": 1.0845, "step": 688 }, { "epoch": 0.07210884353741497, "grad_norm": 2.8716658890938174, "learning_rate": 1.990730043832622e-05, "loss": 1.0957, "step": 689 }, { "epoch": 0.07221350078492936, "grad_norm": 2.545281120610984, "learning_rate": 1.990683939116405e-05, "loss": 1.0906, "step": 690 }, { "epoch": 0.07231815803244375, "grad_norm": 3.317532471187599, "learning_rate": 1.9906377205685953e-05, "loss": 1.1177, "step": 691 }, { "epoch": 0.07242281527995814, "grad_norm": 3.2165985399160575, "learning_rate": 1.9905913881945027e-05, "loss": 1.0378, "step": 692 }, { "epoch": 0.07252747252747253, "grad_norm": 2.8761721393997193, "learning_rate": 1.990544941999452e-05, "loss": 1.1571, "step": 693 }, { "epoch": 0.07263212977498691, "grad_norm": 3.4562167899000076, "learning_rate": 1.9904983819887788e-05, "loss": 0.9759, "step": 694 }, { "epoch": 0.0727367870225013, "grad_norm": 4.0163161813819155, "learning_rate": 1.9904517081678342e-05, "loss": 1.0135, "step": 695 }, { "epoch": 0.0728414442700157, "grad_norm": 2.999273333333365, "learning_rate": 1.99040492054198e-05, "loss": 1.1353, "step": 696 }, { "epoch": 0.07294610151753009, "grad_norm": 2.947720739435947, "learning_rate": 1.990358019116593e-05, "loss": 1.0866, "step": 697 }, { "epoch": 0.07305075876504448, "grad_norm": 2.559467975109432, "learning_rate": 1.9903110038970617e-05, "loss": 1.1007, "step": 698 }, { "epoch": 0.07315541601255887, "grad_norm": 2.6168641864922084, "learning_rate": 1.9902638748887886e-05, "loss": 1.0792, "step": 699 }, { "epoch": 0.07326007326007326, "grad_norm": 3.3899484327660367, "learning_rate": 1.9902166320971888e-05, "loss": 0.9802, "step": 700 }, { "epoch": 0.07336473050758766, "grad_norm": 3.5929670401239706, "learning_rate": 1.9901692755276905e-05, "loss": 1.14, "step": 701 }, { "epoch": 0.07346938775510205, "grad_norm": 3.5869697848385083, "learning_rate": 1.9901218051857354e-05, "loss": 1.0971, "step": 702 }, { "epoch": 0.07357404500261643, "grad_norm": 2.74561495745866, "learning_rate": 1.9900742210767773e-05, "loss": 1.1434, "step": 703 }, { "epoch": 0.07367870225013082, "grad_norm": 3.1605580372961306, "learning_rate": 1.9900265232062844e-05, "loss": 1.0261, "step": 704 }, { "epoch": 0.07378335949764521, "grad_norm": 3.0884788086378463, "learning_rate": 1.9899787115797373e-05, "loss": 1.0976, "step": 705 }, { "epoch": 0.0738880167451596, "grad_norm": 2.904397093628698, "learning_rate": 1.9899307862026288e-05, "loss": 1.0172, "step": 706 }, { "epoch": 0.073992673992674, "grad_norm": 2.550496674073018, "learning_rate": 1.9898827470804663e-05, "loss": 1.1443, "step": 707 }, { "epoch": 0.07409733124018839, "grad_norm": 3.0699639273230828, "learning_rate": 1.9898345942187695e-05, "loss": 1.1312, "step": 708 }, { "epoch": 0.07420198848770278, "grad_norm": 3.151696553595531, "learning_rate": 1.9897863276230712e-05, "loss": 0.9484, "step": 709 }, { "epoch": 0.07430664573521717, "grad_norm": 2.936271363452074, "learning_rate": 1.9897379472989177e-05, "loss": 1.0499, "step": 710 }, { "epoch": 0.07441130298273155, "grad_norm": 3.427173808865134, "learning_rate": 1.989689453251867e-05, "loss": 1.0594, "step": 711 }, { "epoch": 0.07451596023024594, "grad_norm": 2.7577070019256165, "learning_rate": 1.9896408454874924e-05, "loss": 1.0995, "step": 712 }, { "epoch": 0.07462061747776033, "grad_norm": 2.737358494436629, "learning_rate": 1.989592124011378e-05, "loss": 0.9775, "step": 713 }, { "epoch": 0.07472527472527472, "grad_norm": 2.637102118855309, "learning_rate": 1.9895432888291228e-05, "loss": 1.1897, "step": 714 }, { "epoch": 0.07482993197278912, "grad_norm": 2.8640305527444525, "learning_rate": 1.9894943399463373e-05, "loss": 1.068, "step": 715 }, { "epoch": 0.07493458922030351, "grad_norm": 2.506044517062919, "learning_rate": 1.9894452773686463e-05, "loss": 1.1048, "step": 716 }, { "epoch": 0.0750392464678179, "grad_norm": 2.731113694615565, "learning_rate": 1.9893961011016873e-05, "loss": 1.1107, "step": 717 }, { "epoch": 0.07514390371533229, "grad_norm": 2.892316241004994, "learning_rate": 1.9893468111511106e-05, "loss": 1.0969, "step": 718 }, { "epoch": 0.07524856096284668, "grad_norm": 2.55870522176311, "learning_rate": 1.98929740752258e-05, "loss": 1.0443, "step": 719 }, { "epoch": 0.07535321821036106, "grad_norm": 2.9295733001233044, "learning_rate": 1.989247890221771e-05, "loss": 1.0639, "step": 720 }, { "epoch": 0.07545787545787545, "grad_norm": 3.0665452355998073, "learning_rate": 1.9891982592543748e-05, "loss": 1.1135, "step": 721 }, { "epoch": 0.07556253270538985, "grad_norm": 2.8998275546803223, "learning_rate": 1.9891485146260926e-05, "loss": 0.9916, "step": 722 }, { "epoch": 0.07566718995290424, "grad_norm": 2.5710268709337294, "learning_rate": 1.989098656342641e-05, "loss": 1.0469, "step": 723 }, { "epoch": 0.07577184720041863, "grad_norm": 3.0608910480478535, "learning_rate": 1.989048684409749e-05, "loss": 1.1399, "step": 724 }, { "epoch": 0.07587650444793302, "grad_norm": 2.8003344138495887, "learning_rate": 1.988998598833158e-05, "loss": 1.0059, "step": 725 }, { "epoch": 0.07598116169544741, "grad_norm": 2.9559350963453785, "learning_rate": 1.988948399618623e-05, "loss": 1.1011, "step": 726 }, { "epoch": 0.0760858189429618, "grad_norm": 2.6079665939299583, "learning_rate": 1.988898086771912e-05, "loss": 0.9801, "step": 727 }, { "epoch": 0.0761904761904762, "grad_norm": 2.6799211618975143, "learning_rate": 1.9888476602988063e-05, "loss": 1.0884, "step": 728 }, { "epoch": 0.07629513343799058, "grad_norm": 2.6807547704920665, "learning_rate": 1.9887971202050996e-05, "loss": 1.0336, "step": 729 }, { "epoch": 0.07639979068550497, "grad_norm": 2.8884167531514233, "learning_rate": 1.9887464664965997e-05, "loss": 1.108, "step": 730 }, { "epoch": 0.07650444793301936, "grad_norm": 3.1096081530713118, "learning_rate": 1.9886956991791258e-05, "loss": 1.0963, "step": 731 }, { "epoch": 0.07660910518053375, "grad_norm": 2.5776344588421156, "learning_rate": 1.9886448182585122e-05, "loss": 1.071, "step": 732 }, { "epoch": 0.07671376242804814, "grad_norm": 2.7119013146953814, "learning_rate": 1.9885938237406048e-05, "loss": 1.0899, "step": 733 }, { "epoch": 0.07681841967556254, "grad_norm": 2.5283853043631153, "learning_rate": 1.9885427156312627e-05, "loss": 1.1398, "step": 734 }, { "epoch": 0.07692307692307693, "grad_norm": 2.563527440025152, "learning_rate": 1.988491493936359e-05, "loss": 1.0551, "step": 735 }, { "epoch": 0.07702773417059132, "grad_norm": 2.4468041205415805, "learning_rate": 1.9884401586617785e-05, "loss": 1.1602, "step": 736 }, { "epoch": 0.0771323914181057, "grad_norm": 3.40716823965318, "learning_rate": 1.98838870981342e-05, "loss": 1.0084, "step": 737 }, { "epoch": 0.07723704866562009, "grad_norm": 2.9705969422751113, "learning_rate": 1.988337147397195e-05, "loss": 1.2072, "step": 738 }, { "epoch": 0.07734170591313448, "grad_norm": 2.4958733209485864, "learning_rate": 1.9882854714190283e-05, "loss": 0.8871, "step": 739 }, { "epoch": 0.07744636316064887, "grad_norm": 2.890941680142748, "learning_rate": 1.9882336818848576e-05, "loss": 0.9567, "step": 740 }, { "epoch": 0.07755102040816327, "grad_norm": 3.002499996703523, "learning_rate": 1.9881817788006335e-05, "loss": 1.1373, "step": 741 }, { "epoch": 0.07765567765567766, "grad_norm": 2.697898190678546, "learning_rate": 1.9881297621723194e-05, "loss": 0.9728, "step": 742 }, { "epoch": 0.07776033490319205, "grad_norm": 3.317056448747156, "learning_rate": 1.9880776320058926e-05, "loss": 1.0526, "step": 743 }, { "epoch": 0.07786499215070644, "grad_norm": 3.070014562054591, "learning_rate": 1.988025388307343e-05, "loss": 1.1524, "step": 744 }, { "epoch": 0.07796964939822083, "grad_norm": 2.942240381438674, "learning_rate": 1.9879730310826733e-05, "loss": 1.1355, "step": 745 }, { "epoch": 0.07807430664573521, "grad_norm": 2.4967345724379144, "learning_rate": 1.9879205603379e-05, "loss": 1.0319, "step": 746 }, { "epoch": 0.0781789638932496, "grad_norm": 2.5917380077833445, "learning_rate": 1.9878679760790508e-05, "loss": 0.8069, "step": 747 }, { "epoch": 0.078283621140764, "grad_norm": 1.830900388771415, "learning_rate": 1.9878152783121686e-05, "loss": 0.7969, "step": 748 }, { "epoch": 0.07838827838827839, "grad_norm": 3.1335346676454017, "learning_rate": 1.9877624670433086e-05, "loss": 1.1493, "step": 749 }, { "epoch": 0.07849293563579278, "grad_norm": 2.3535864496436356, "learning_rate": 1.987709542278539e-05, "loss": 0.8744, "step": 750 }, { "epoch": 0.07859759288330717, "grad_norm": 2.9044705756712044, "learning_rate": 1.9876565040239404e-05, "loss": 0.9138, "step": 751 }, { "epoch": 0.07870225013082156, "grad_norm": 3.197340259960328, "learning_rate": 1.9876033522856072e-05, "loss": 1.0667, "step": 752 }, { "epoch": 0.07880690737833596, "grad_norm": 2.672736829036044, "learning_rate": 1.987550087069647e-05, "loss": 1.0583, "step": 753 }, { "epoch": 0.07891156462585033, "grad_norm": 2.9518811281187585, "learning_rate": 1.9874967083821796e-05, "loss": 1.0376, "step": 754 }, { "epoch": 0.07901622187336473, "grad_norm": 3.0369965552675917, "learning_rate": 1.9874432162293388e-05, "loss": 1.0123, "step": 755 }, { "epoch": 0.07912087912087912, "grad_norm": 2.527882866802051, "learning_rate": 1.9873896106172705e-05, "loss": 1.0585, "step": 756 }, { "epoch": 0.07922553636839351, "grad_norm": 2.619647602913425, "learning_rate": 1.9873358915521345e-05, "loss": 0.9611, "step": 757 }, { "epoch": 0.0793301936159079, "grad_norm": 3.167480526301533, "learning_rate": 1.9872820590401028e-05, "loss": 1.0156, "step": 758 }, { "epoch": 0.0794348508634223, "grad_norm": 2.573985063306484, "learning_rate": 1.9872281130873608e-05, "loss": 1.0724, "step": 759 }, { "epoch": 0.07953950811093669, "grad_norm": 2.946085000736679, "learning_rate": 1.9871740537001078e-05, "loss": 1.1423, "step": 760 }, { "epoch": 0.07964416535845108, "grad_norm": 2.569944570278952, "learning_rate": 1.9871198808845546e-05, "loss": 1.0442, "step": 761 }, { "epoch": 0.07974882260596547, "grad_norm": 2.1772171170370136, "learning_rate": 1.987065594646926e-05, "loss": 1.0529, "step": 762 }, { "epoch": 0.07985347985347985, "grad_norm": 2.845399072310291, "learning_rate": 1.9870111949934597e-05, "loss": 1.0669, "step": 763 }, { "epoch": 0.07995813710099424, "grad_norm": 2.987591086180299, "learning_rate": 1.9869566819304064e-05, "loss": 1.1109, "step": 764 }, { "epoch": 0.08006279434850863, "grad_norm": 2.9260073141957794, "learning_rate": 1.986902055464029e-05, "loss": 1.1047, "step": 765 }, { "epoch": 0.08016745159602302, "grad_norm": 2.978042723105865, "learning_rate": 1.9868473156006055e-05, "loss": 1.1091, "step": 766 }, { "epoch": 0.08027210884353742, "grad_norm": 3.0411024163342995, "learning_rate": 1.9867924623464244e-05, "loss": 1.072, "step": 767 }, { "epoch": 0.08037676609105181, "grad_norm": 2.804164851030498, "learning_rate": 1.986737495707789e-05, "loss": 1.1031, "step": 768 }, { "epoch": 0.0804814233385662, "grad_norm": 2.531079552784771, "learning_rate": 1.986682415691015e-05, "loss": 1.0565, "step": 769 }, { "epoch": 0.08058608058608059, "grad_norm": 3.015392851420843, "learning_rate": 1.9866272223024314e-05, "loss": 1.0387, "step": 770 }, { "epoch": 0.08069073783359498, "grad_norm": 3.0741672862303258, "learning_rate": 1.9865719155483794e-05, "loss": 1.0325, "step": 771 }, { "epoch": 0.08079539508110936, "grad_norm": 2.6992994649398785, "learning_rate": 1.9865164954352146e-05, "loss": 1.0413, "step": 772 }, { "epoch": 0.08090005232862375, "grad_norm": 2.652137381945092, "learning_rate": 1.986460961969305e-05, "loss": 1.0665, "step": 773 }, { "epoch": 0.08100470957613815, "grad_norm": 2.8296867873205316, "learning_rate": 1.9864053151570305e-05, "loss": 1.1983, "step": 774 }, { "epoch": 0.08110936682365254, "grad_norm": 2.6932652023688908, "learning_rate": 1.986349555004786e-05, "loss": 0.9712, "step": 775 }, { "epoch": 0.08121402407116693, "grad_norm": 2.8908231690550337, "learning_rate": 1.9862936815189778e-05, "loss": 1.0777, "step": 776 }, { "epoch": 0.08131868131868132, "grad_norm": 2.7585434811777327, "learning_rate": 1.9862376947060263e-05, "loss": 1.0458, "step": 777 }, { "epoch": 0.08142333856619571, "grad_norm": 3.4111164784807535, "learning_rate": 1.9861815945723647e-05, "loss": 1.0959, "step": 778 }, { "epoch": 0.0815279958137101, "grad_norm": 2.794422350270696, "learning_rate": 1.9861253811244383e-05, "loss": 1.0158, "step": 779 }, { "epoch": 0.08163265306122448, "grad_norm": 2.669728034578084, "learning_rate": 1.9860690543687064e-05, "loss": 1.0899, "step": 780 }, { "epoch": 0.08173731030873888, "grad_norm": 2.8079261760048513, "learning_rate": 1.9860126143116412e-05, "loss": 1.1234, "step": 781 }, { "epoch": 0.08184196755625327, "grad_norm": 2.926846571046833, "learning_rate": 1.9859560609597282e-05, "loss": 1.0468, "step": 782 }, { "epoch": 0.08194662480376766, "grad_norm": 2.8972050163501213, "learning_rate": 1.9858993943194648e-05, "loss": 1.044, "step": 783 }, { "epoch": 0.08205128205128205, "grad_norm": 2.718732850064825, "learning_rate": 1.9858426143973623e-05, "loss": 1.0915, "step": 784 }, { "epoch": 0.08215593929879644, "grad_norm": 2.509735712880021, "learning_rate": 1.9857857211999452e-05, "loss": 1.1174, "step": 785 }, { "epoch": 0.08226059654631084, "grad_norm": 2.7443000185455464, "learning_rate": 1.98572871473375e-05, "loss": 0.8195, "step": 786 }, { "epoch": 0.08236525379382523, "grad_norm": 3.129320040379283, "learning_rate": 1.9856715950053272e-05, "loss": 1.0873, "step": 787 }, { "epoch": 0.08246991104133962, "grad_norm": 2.734612164978207, "learning_rate": 1.9856143620212403e-05, "loss": 1.1222, "step": 788 }, { "epoch": 0.082574568288854, "grad_norm": 3.008168731676326, "learning_rate": 1.9855570157880648e-05, "loss": 0.9387, "step": 789 }, { "epoch": 0.08267922553636839, "grad_norm": 3.1280456930254923, "learning_rate": 1.9854995563123904e-05, "loss": 1.1312, "step": 790 }, { "epoch": 0.08278388278388278, "grad_norm": 2.8486226186326338, "learning_rate": 1.985441983600819e-05, "loss": 0.9654, "step": 791 }, { "epoch": 0.08288854003139717, "grad_norm": 2.880542844835971, "learning_rate": 1.985384297659966e-05, "loss": 1.0636, "step": 792 }, { "epoch": 0.08299319727891157, "grad_norm": 2.7235931768471437, "learning_rate": 1.98532649849646e-05, "loss": 1.017, "step": 793 }, { "epoch": 0.08309785452642596, "grad_norm": 2.968388296598955, "learning_rate": 1.9852685861169415e-05, "loss": 1.2004, "step": 794 }, { "epoch": 0.08320251177394035, "grad_norm": 2.80143430250923, "learning_rate": 1.9852105605280653e-05, "loss": 0.9673, "step": 795 }, { "epoch": 0.08330716902145474, "grad_norm": 2.579406765144872, "learning_rate": 1.9851524217364982e-05, "loss": 0.9932, "step": 796 }, { "epoch": 0.08341182626896912, "grad_norm": 2.5521410566455285, "learning_rate": 1.985094169748921e-05, "loss": 1.0031, "step": 797 }, { "epoch": 0.08351648351648351, "grad_norm": 2.5987542618084145, "learning_rate": 1.9850358045720268e-05, "loss": 1.066, "step": 798 }, { "epoch": 0.0836211407639979, "grad_norm": 2.6060537256654017, "learning_rate": 1.9849773262125215e-05, "loss": 1.0626, "step": 799 }, { "epoch": 0.0837257980115123, "grad_norm": 2.7508431371390514, "learning_rate": 1.9849187346771247e-05, "loss": 1.0875, "step": 800 }, { "epoch": 0.08383045525902669, "grad_norm": 3.149432837745595, "learning_rate": 1.9848600299725687e-05, "loss": 1.0231, "step": 801 }, { "epoch": 0.08393511250654108, "grad_norm": 3.0521072775807716, "learning_rate": 1.9848012121055987e-05, "loss": 1.0271, "step": 802 }, { "epoch": 0.08403976975405547, "grad_norm": 2.5717755802540085, "learning_rate": 1.9847422810829732e-05, "loss": 1.0108, "step": 803 }, { "epoch": 0.08414442700156986, "grad_norm": 2.574749430789081, "learning_rate": 1.984683236911463e-05, "loss": 1.0094, "step": 804 }, { "epoch": 0.08424908424908426, "grad_norm": 2.761780907407162, "learning_rate": 1.984624079597853e-05, "loss": 0.9979, "step": 805 }, { "epoch": 0.08435374149659863, "grad_norm": 2.2659780949782076, "learning_rate": 1.98456480914894e-05, "loss": 1.0866, "step": 806 }, { "epoch": 0.08445839874411303, "grad_norm": 2.880890790249313, "learning_rate": 1.9845054255715345e-05, "loss": 1.0858, "step": 807 }, { "epoch": 0.08456305599162742, "grad_norm": 3.2637552119352087, "learning_rate": 1.98444592887246e-05, "loss": 1.177, "step": 808 }, { "epoch": 0.08466771323914181, "grad_norm": 2.7805016938861216, "learning_rate": 1.9843863190585527e-05, "loss": 1.1337, "step": 809 }, { "epoch": 0.0847723704866562, "grad_norm": 2.9010068085027023, "learning_rate": 1.9843265961366614e-05, "loss": 1.0259, "step": 810 }, { "epoch": 0.0848770277341706, "grad_norm": 2.252289860307481, "learning_rate": 1.984266760113649e-05, "loss": 0.7456, "step": 811 }, { "epoch": 0.08498168498168499, "grad_norm": 2.80087053004204, "learning_rate": 1.9842068109963904e-05, "loss": 1.1414, "step": 812 }, { "epoch": 0.08508634222919938, "grad_norm": 2.9850122512331247, "learning_rate": 1.9841467487917743e-05, "loss": 1.1755, "step": 813 }, { "epoch": 0.08519099947671377, "grad_norm": 3.2848807536761595, "learning_rate": 1.9840865735067016e-05, "loss": 1.0185, "step": 814 }, { "epoch": 0.08529565672422815, "grad_norm": 2.8281996236712272, "learning_rate": 1.9840262851480866e-05, "loss": 1.1314, "step": 815 }, { "epoch": 0.08540031397174254, "grad_norm": 2.979553704920702, "learning_rate": 1.9839658837228562e-05, "loss": 1.1369, "step": 816 }, { "epoch": 0.08550497121925693, "grad_norm": 2.8697433057340134, "learning_rate": 1.9839053692379516e-05, "loss": 1.0285, "step": 817 }, { "epoch": 0.08560962846677132, "grad_norm": 3.4761142719041906, "learning_rate": 1.9838447417003253e-05, "loss": 1.0726, "step": 818 }, { "epoch": 0.08571428571428572, "grad_norm": 2.6104944016292957, "learning_rate": 1.983784001116944e-05, "loss": 0.9732, "step": 819 }, { "epoch": 0.08581894296180011, "grad_norm": 2.773752386393791, "learning_rate": 1.9837231474947862e-05, "loss": 1.0283, "step": 820 }, { "epoch": 0.0859236002093145, "grad_norm": 2.70939361192841, "learning_rate": 1.9836621808408448e-05, "loss": 1.0672, "step": 821 }, { "epoch": 0.08602825745682889, "grad_norm": 2.73253043675312, "learning_rate": 1.9836011011621246e-05, "loss": 0.9136, "step": 822 }, { "epoch": 0.08613291470434327, "grad_norm": 2.5285447684085773, "learning_rate": 1.983539908465644e-05, "loss": 1.0069, "step": 823 }, { "epoch": 0.08623757195185766, "grad_norm": 3.039707735225743, "learning_rate": 1.983478602758434e-05, "loss": 1.0909, "step": 824 }, { "epoch": 0.08634222919937205, "grad_norm": 3.0060510240991913, "learning_rate": 1.983417184047539e-05, "loss": 1.0854, "step": 825 }, { "epoch": 0.08644688644688645, "grad_norm": 2.575407129448105, "learning_rate": 1.9833556523400156e-05, "loss": 0.9942, "step": 826 }, { "epoch": 0.08655154369440084, "grad_norm": 2.399757949657678, "learning_rate": 1.9832940076429342e-05, "loss": 1.0323, "step": 827 }, { "epoch": 0.08665620094191523, "grad_norm": 2.6351017617726935, "learning_rate": 1.9832322499633785e-05, "loss": 1.0229, "step": 828 }, { "epoch": 0.08676085818942962, "grad_norm": 2.0539887200521685, "learning_rate": 1.9831703793084436e-05, "loss": 0.8446, "step": 829 }, { "epoch": 0.08686551543694401, "grad_norm": 2.655675530823441, "learning_rate": 1.9831083956852396e-05, "loss": 1.0945, "step": 830 }, { "epoch": 0.0869701726844584, "grad_norm": 2.3301289659294224, "learning_rate": 1.9830462991008875e-05, "loss": 0.9982, "step": 831 }, { "epoch": 0.08707482993197278, "grad_norm": 1.9878115279486144, "learning_rate": 1.982984089562523e-05, "loss": 0.7846, "step": 832 }, { "epoch": 0.08717948717948718, "grad_norm": 2.7308227759486043, "learning_rate": 1.9829217670772936e-05, "loss": 1.0973, "step": 833 }, { "epoch": 0.08728414442700157, "grad_norm": 3.21302132263838, "learning_rate": 1.9828593316523608e-05, "loss": 1.1387, "step": 834 }, { "epoch": 0.08738880167451596, "grad_norm": 2.580087307714874, "learning_rate": 1.9827967832948985e-05, "loss": 0.9605, "step": 835 }, { "epoch": 0.08749345892203035, "grad_norm": 3.2690704122614505, "learning_rate": 1.9827341220120933e-05, "loss": 1.1851, "step": 836 }, { "epoch": 0.08759811616954474, "grad_norm": 2.4671010918720144, "learning_rate": 1.982671347811145e-05, "loss": 1.0144, "step": 837 }, { "epoch": 0.08770277341705913, "grad_norm": 2.3586665066642176, "learning_rate": 1.982608460699267e-05, "loss": 0.9452, "step": 838 }, { "epoch": 0.08780743066457353, "grad_norm": 2.827484875847677, "learning_rate": 1.982545460683685e-05, "loss": 1.0465, "step": 839 }, { "epoch": 0.08791208791208792, "grad_norm": 2.8254407799166446, "learning_rate": 1.9824823477716374e-05, "loss": 0.9405, "step": 840 }, { "epoch": 0.0880167451596023, "grad_norm": 2.828527033050868, "learning_rate": 1.982419121970377e-05, "loss": 1.1592, "step": 841 }, { "epoch": 0.08812140240711669, "grad_norm": 2.958013431535381, "learning_rate": 1.9823557832871675e-05, "loss": 1.1503, "step": 842 }, { "epoch": 0.08822605965463108, "grad_norm": 2.5277477619914617, "learning_rate": 1.982292331729287e-05, "loss": 1.1166, "step": 843 }, { "epoch": 0.08833071690214547, "grad_norm": 2.8289824096892353, "learning_rate": 1.982228767304026e-05, "loss": 0.9632, "step": 844 }, { "epoch": 0.08843537414965986, "grad_norm": 2.5616715856813195, "learning_rate": 1.9821650900186887e-05, "loss": 0.963, "step": 845 }, { "epoch": 0.08854003139717426, "grad_norm": 2.5986696555097586, "learning_rate": 1.982101299880592e-05, "loss": 1.1771, "step": 846 }, { "epoch": 0.08864468864468865, "grad_norm": 3.051131850126143, "learning_rate": 1.9820373968970642e-05, "loss": 0.9058, "step": 847 }, { "epoch": 0.08874934589220304, "grad_norm": 2.7872842033896648, "learning_rate": 1.9819733810754492e-05, "loss": 1.0216, "step": 848 }, { "epoch": 0.08885400313971742, "grad_norm": 2.582106404341739, "learning_rate": 1.981909252423102e-05, "loss": 1.0599, "step": 849 }, { "epoch": 0.08895866038723181, "grad_norm": 2.524366926584677, "learning_rate": 1.981845010947391e-05, "loss": 1.0088, "step": 850 }, { "epoch": 0.0890633176347462, "grad_norm": 2.5675683272516445, "learning_rate": 1.9817806566556982e-05, "loss": 0.9454, "step": 851 }, { "epoch": 0.0891679748822606, "grad_norm": 2.522430268689835, "learning_rate": 1.9817161895554173e-05, "loss": 1.0239, "step": 852 }, { "epoch": 0.08927263212977499, "grad_norm": 2.7158212443276897, "learning_rate": 1.9816516096539563e-05, "loss": 0.9493, "step": 853 }, { "epoch": 0.08937728937728938, "grad_norm": 2.6167231825894963, "learning_rate": 1.981586916958735e-05, "loss": 0.9662, "step": 854 }, { "epoch": 0.08948194662480377, "grad_norm": 2.566987568415224, "learning_rate": 1.981522111477187e-05, "loss": 0.8303, "step": 855 }, { "epoch": 0.08958660387231816, "grad_norm": 1.9987336493251993, "learning_rate": 1.981457193216759e-05, "loss": 0.8232, "step": 856 }, { "epoch": 0.08969126111983255, "grad_norm": 3.165150991848205, "learning_rate": 1.98139216218491e-05, "loss": 1.0353, "step": 857 }, { "epoch": 0.08979591836734693, "grad_norm": 2.7844496531584766, "learning_rate": 1.9813270183891117e-05, "loss": 1.1477, "step": 858 }, { "epoch": 0.08990057561486132, "grad_norm": 2.8027853604323747, "learning_rate": 1.98126176183685e-05, "loss": 1.0745, "step": 859 }, { "epoch": 0.09000523286237572, "grad_norm": 2.741551828332874, "learning_rate": 1.9811963925356222e-05, "loss": 1.2044, "step": 860 }, { "epoch": 0.09010989010989011, "grad_norm": 2.2316560473076015, "learning_rate": 1.98113091049294e-05, "loss": 0.8208, "step": 861 }, { "epoch": 0.0902145473574045, "grad_norm": 2.8642542806528026, "learning_rate": 1.9810653157163275e-05, "loss": 1.1049, "step": 862 }, { "epoch": 0.09031920460491889, "grad_norm": 2.974066935720496, "learning_rate": 1.9809996082133208e-05, "loss": 1.1108, "step": 863 }, { "epoch": 0.09042386185243328, "grad_norm": 3.751229650832022, "learning_rate": 1.980933787991471e-05, "loss": 1.0713, "step": 864 }, { "epoch": 0.09052851909994768, "grad_norm": 2.355691840091666, "learning_rate": 1.98086785505834e-05, "loss": 1.0267, "step": 865 }, { "epoch": 0.09063317634746205, "grad_norm": 2.4093700564341782, "learning_rate": 1.9808018094215044e-05, "loss": 1.0784, "step": 866 }, { "epoch": 0.09073783359497645, "grad_norm": 2.9768464412651965, "learning_rate": 1.9807356510885526e-05, "loss": 1.0033, "step": 867 }, { "epoch": 0.09084249084249084, "grad_norm": 2.930398030142532, "learning_rate": 1.980669380067086e-05, "loss": 1.048, "step": 868 }, { "epoch": 0.09094714809000523, "grad_norm": 2.519451550821377, "learning_rate": 1.9806029963647198e-05, "loss": 1.0554, "step": 869 }, { "epoch": 0.09105180533751962, "grad_norm": 2.7989080225534084, "learning_rate": 1.9805364999890813e-05, "loss": 1.0775, "step": 870 }, { "epoch": 0.09115646258503401, "grad_norm": 2.41370990986101, "learning_rate": 1.980469890947811e-05, "loss": 1.0857, "step": 871 }, { "epoch": 0.0912611198325484, "grad_norm": 3.1596287961198146, "learning_rate": 1.9804031692485633e-05, "loss": 1.0765, "step": 872 }, { "epoch": 0.0913657770800628, "grad_norm": 2.584700642828629, "learning_rate": 1.9803363348990035e-05, "loss": 0.941, "step": 873 }, { "epoch": 0.09147043432757719, "grad_norm": 2.8784994569744193, "learning_rate": 1.9802693879068116e-05, "loss": 1.1304, "step": 874 }, { "epoch": 0.09157509157509157, "grad_norm": 2.849171759445592, "learning_rate": 1.9802023282796797e-05, "loss": 1.0429, "step": 875 }, { "epoch": 0.09167974882260596, "grad_norm": 3.3153795943487268, "learning_rate": 1.980135156025313e-05, "loss": 1.1432, "step": 876 }, { "epoch": 0.09178440607012035, "grad_norm": 2.472442664010389, "learning_rate": 1.9800678711514304e-05, "loss": 1.0624, "step": 877 }, { "epoch": 0.09188906331763474, "grad_norm": 2.7245355328223533, "learning_rate": 1.9800004736657623e-05, "loss": 1.0272, "step": 878 }, { "epoch": 0.09199372056514914, "grad_norm": 2.5078380754741847, "learning_rate": 1.9799329635760533e-05, "loss": 1.0269, "step": 879 }, { "epoch": 0.09209837781266353, "grad_norm": 2.9844539652083815, "learning_rate": 1.97986534089006e-05, "loss": 0.9574, "step": 880 }, { "epoch": 0.09220303506017792, "grad_norm": 3.058018108494758, "learning_rate": 1.9797976056155527e-05, "loss": 1.0867, "step": 881 }, { "epoch": 0.09230769230769231, "grad_norm": 2.675532794463805, "learning_rate": 1.9797297577603144e-05, "loss": 1.1262, "step": 882 }, { "epoch": 0.0924123495552067, "grad_norm": 2.626017155020485, "learning_rate": 1.979661797332141e-05, "loss": 1.1043, "step": 883 }, { "epoch": 0.09251700680272108, "grad_norm": 2.876484472128455, "learning_rate": 1.9795937243388405e-05, "loss": 1.1571, "step": 884 }, { "epoch": 0.09262166405023547, "grad_norm": 2.5311189083204146, "learning_rate": 1.9795255387882357e-05, "loss": 1.0845, "step": 885 }, { "epoch": 0.09272632129774987, "grad_norm": 2.510089613368414, "learning_rate": 1.9794572406881606e-05, "loss": 1.0432, "step": 886 }, { "epoch": 0.09283097854526426, "grad_norm": 3.2255430026028247, "learning_rate": 1.979388830046463e-05, "loss": 1.1879, "step": 887 }, { "epoch": 0.09293563579277865, "grad_norm": 2.4203136160149716, "learning_rate": 1.9793203068710036e-05, "loss": 0.8051, "step": 888 }, { "epoch": 0.09304029304029304, "grad_norm": 2.8289265412585736, "learning_rate": 1.9792516711696555e-05, "loss": 0.9694, "step": 889 }, { "epoch": 0.09314495028780743, "grad_norm": 2.582322870935106, "learning_rate": 1.9791829229503054e-05, "loss": 1.0845, "step": 890 }, { "epoch": 0.09324960753532183, "grad_norm": 2.6892222694722427, "learning_rate": 1.9791140622208523e-05, "loss": 1.111, "step": 891 }, { "epoch": 0.0933542647828362, "grad_norm": 3.0266338857528505, "learning_rate": 1.9790450889892082e-05, "loss": 1.0868, "step": 892 }, { "epoch": 0.0934589220303506, "grad_norm": 2.966278924481458, "learning_rate": 1.9789760032632993e-05, "loss": 1.1863, "step": 893 }, { "epoch": 0.09356357927786499, "grad_norm": 2.6158992441922773, "learning_rate": 1.9789068050510627e-05, "loss": 1.1321, "step": 894 }, { "epoch": 0.09366823652537938, "grad_norm": 3.081967659922265, "learning_rate": 1.9788374943604496e-05, "loss": 0.9553, "step": 895 }, { "epoch": 0.09377289377289377, "grad_norm": 2.527503447093225, "learning_rate": 1.9787680711994244e-05, "loss": 1.0302, "step": 896 }, { "epoch": 0.09387755102040816, "grad_norm": 2.7162559652765466, "learning_rate": 1.9786985355759633e-05, "loss": 1.0795, "step": 897 }, { "epoch": 0.09398220826792256, "grad_norm": 2.329247727727232, "learning_rate": 1.9786288874980567e-05, "loss": 0.8319, "step": 898 }, { "epoch": 0.09408686551543695, "grad_norm": 2.775539898357401, "learning_rate": 1.978559126973707e-05, "loss": 1.1444, "step": 899 }, { "epoch": 0.09419152276295134, "grad_norm": 2.9580563525888635, "learning_rate": 1.97848925401093e-05, "loss": 1.0485, "step": 900 }, { "epoch": 0.09429618001046572, "grad_norm": 2.5695675385307553, "learning_rate": 1.978419268617754e-05, "loss": 0.9955, "step": 901 }, { "epoch": 0.09440083725798011, "grad_norm": 2.2552487789593623, "learning_rate": 1.9783491708022203e-05, "loss": 0.8651, "step": 902 }, { "epoch": 0.0945054945054945, "grad_norm": 2.9417126110660554, "learning_rate": 1.978278960572384e-05, "loss": 1.1695, "step": 903 }, { "epoch": 0.0946101517530089, "grad_norm": 2.183916665081056, "learning_rate": 1.9782086379363116e-05, "loss": 0.9073, "step": 904 }, { "epoch": 0.09471480900052329, "grad_norm": 2.843970018173232, "learning_rate": 1.978138202902084e-05, "loss": 1.1337, "step": 905 }, { "epoch": 0.09481946624803768, "grad_norm": 2.5391065676287794, "learning_rate": 1.9780676554777937e-05, "loss": 1.0872, "step": 906 }, { "epoch": 0.09492412349555207, "grad_norm": 2.580130909019054, "learning_rate": 1.9779969956715473e-05, "loss": 0.9439, "step": 907 }, { "epoch": 0.09502878074306646, "grad_norm": 2.8311688424167016, "learning_rate": 1.9779262234914635e-05, "loss": 1.1965, "step": 908 }, { "epoch": 0.09513343799058084, "grad_norm": 3.140721287835419, "learning_rate": 1.977855338945674e-05, "loss": 1.0228, "step": 909 }, { "epoch": 0.09523809523809523, "grad_norm": 2.4322100869774976, "learning_rate": 1.977784342042323e-05, "loss": 1.1553, "step": 910 }, { "epoch": 0.09534275248560962, "grad_norm": 2.5964369564075196, "learning_rate": 1.9777132327895702e-05, "loss": 1.0501, "step": 911 }, { "epoch": 0.09544740973312402, "grad_norm": 2.6073741523793186, "learning_rate": 1.9776420111955842e-05, "loss": 1.0065, "step": 912 }, { "epoch": 0.09555206698063841, "grad_norm": 2.6220467436425863, "learning_rate": 1.9775706772685495e-05, "loss": 1.1565, "step": 913 }, { "epoch": 0.0956567242281528, "grad_norm": 2.923213677461564, "learning_rate": 1.977499231016662e-05, "loss": 1.0593, "step": 914 }, { "epoch": 0.09576138147566719, "grad_norm": 2.6380776247624027, "learning_rate": 1.9774276724481316e-05, "loss": 1.0011, "step": 915 }, { "epoch": 0.09586603872318158, "grad_norm": 3.8624650361902617, "learning_rate": 1.9773560015711798e-05, "loss": 1.0226, "step": 916 }, { "epoch": 0.09597069597069598, "grad_norm": 2.818754439571562, "learning_rate": 1.977284218394042e-05, "loss": 1.1649, "step": 917 }, { "epoch": 0.09607535321821035, "grad_norm": 2.4474931176286128, "learning_rate": 1.977212322924967e-05, "loss": 1.0857, "step": 918 }, { "epoch": 0.09618001046572475, "grad_norm": 2.8206323427096165, "learning_rate": 1.9771403151722147e-05, "loss": 1.0433, "step": 919 }, { "epoch": 0.09628466771323914, "grad_norm": 3.213099908339713, "learning_rate": 1.9770681951440595e-05, "loss": 1.1182, "step": 920 }, { "epoch": 0.09638932496075353, "grad_norm": 2.9662481224716974, "learning_rate": 1.9769959628487876e-05, "loss": 0.9635, "step": 921 }, { "epoch": 0.09649398220826792, "grad_norm": 4.016583647471507, "learning_rate": 1.9769236182946993e-05, "loss": 0.9521, "step": 922 }, { "epoch": 0.09659863945578231, "grad_norm": 3.508939746867643, "learning_rate": 1.9768511614901067e-05, "loss": 1.2703, "step": 923 }, { "epoch": 0.0967032967032967, "grad_norm": 2.953270773462452, "learning_rate": 1.9767785924433354e-05, "loss": 1.124, "step": 924 }, { "epoch": 0.0968079539508111, "grad_norm": 2.537786165701537, "learning_rate": 1.9767059111627237e-05, "loss": 1.0141, "step": 925 }, { "epoch": 0.09691261119832549, "grad_norm": 3.2226759688842415, "learning_rate": 1.9766331176566226e-05, "loss": 1.0785, "step": 926 }, { "epoch": 0.09701726844583987, "grad_norm": 2.8316546956911512, "learning_rate": 1.9765602119333967e-05, "loss": 1.084, "step": 927 }, { "epoch": 0.09712192569335426, "grad_norm": 2.8227008106460985, "learning_rate": 1.9764871940014226e-05, "loss": 1.0238, "step": 928 }, { "epoch": 0.09722658294086865, "grad_norm": 2.6956185802209323, "learning_rate": 1.97641406386909e-05, "loss": 1.1413, "step": 929 }, { "epoch": 0.09733124018838304, "grad_norm": 3.011507745775884, "learning_rate": 1.976340821544803e-05, "loss": 1.0178, "step": 930 }, { "epoch": 0.09743589743589744, "grad_norm": 2.7090414787190547, "learning_rate": 1.9762674670369757e-05, "loss": 1.1175, "step": 931 }, { "epoch": 0.09754055468341183, "grad_norm": 2.9878646220501337, "learning_rate": 1.9761940003540375e-05, "loss": 1.0434, "step": 932 }, { "epoch": 0.09764521193092622, "grad_norm": 2.9207377165620563, "learning_rate": 1.9761204215044295e-05, "loss": 0.9525, "step": 933 }, { "epoch": 0.09774986917844061, "grad_norm": 2.7502875366492985, "learning_rate": 1.9760467304966066e-05, "loss": 0.9973, "step": 934 }, { "epoch": 0.09785452642595499, "grad_norm": 2.866734557712832, "learning_rate": 1.9759729273390356e-05, "loss": 1.0749, "step": 935 }, { "epoch": 0.09795918367346938, "grad_norm": 2.195362172452806, "learning_rate": 1.9758990120401967e-05, "loss": 0.7863, "step": 936 }, { "epoch": 0.09806384092098377, "grad_norm": 2.470173351806748, "learning_rate": 1.975824984608583e-05, "loss": 1.0308, "step": 937 }, { "epoch": 0.09816849816849817, "grad_norm": 1.9811710309835036, "learning_rate": 1.9757508450527006e-05, "loss": 0.8232, "step": 938 }, { "epoch": 0.09827315541601256, "grad_norm": 2.608135387091775, "learning_rate": 1.9756765933810674e-05, "loss": 1.031, "step": 939 }, { "epoch": 0.09837781266352695, "grad_norm": 2.57500776512692, "learning_rate": 1.9756022296022164e-05, "loss": 1.1257, "step": 940 }, { "epoch": 0.09848246991104134, "grad_norm": 2.976812024734902, "learning_rate": 1.9755277537246914e-05, "loss": 1.1147, "step": 941 }, { "epoch": 0.09858712715855573, "grad_norm": 2.6608551364060307, "learning_rate": 1.9754531657570495e-05, "loss": 1.1327, "step": 942 }, { "epoch": 0.09869178440607013, "grad_norm": 2.564080580950772, "learning_rate": 1.9753784657078613e-05, "loss": 1.0696, "step": 943 }, { "epoch": 0.0987964416535845, "grad_norm": 2.3432953283718327, "learning_rate": 1.9753036535857106e-05, "loss": 1.0246, "step": 944 }, { "epoch": 0.0989010989010989, "grad_norm": 2.9307230460788904, "learning_rate": 1.9752287293991927e-05, "loss": 1.0714, "step": 945 }, { "epoch": 0.09900575614861329, "grad_norm": 3.2102158887761867, "learning_rate": 1.9751536931569167e-05, "loss": 1.198, "step": 946 }, { "epoch": 0.09911041339612768, "grad_norm": 2.8495315326753645, "learning_rate": 1.975078544867505e-05, "loss": 1.0824, "step": 947 }, { "epoch": 0.09921507064364207, "grad_norm": 2.682593584386815, "learning_rate": 1.9750032845395914e-05, "loss": 1.0324, "step": 948 }, { "epoch": 0.09931972789115646, "grad_norm": 2.8693006339538116, "learning_rate": 1.9749279121818235e-05, "loss": 1.1512, "step": 949 }, { "epoch": 0.09942438513867086, "grad_norm": 2.7517932371635774, "learning_rate": 1.974852427802863e-05, "loss": 0.9674, "step": 950 }, { "epoch": 0.09952904238618525, "grad_norm": 2.734800109801035, "learning_rate": 1.9747768314113817e-05, "loss": 1.0879, "step": 951 }, { "epoch": 0.09963369963369964, "grad_norm": 2.7033090734169893, "learning_rate": 1.9747011230160664e-05, "loss": 1.1549, "step": 952 }, { "epoch": 0.09973835688121402, "grad_norm": 2.6253103443822683, "learning_rate": 1.9746253026256164e-05, "loss": 1.0787, "step": 953 }, { "epoch": 0.09984301412872841, "grad_norm": 3.125722150105614, "learning_rate": 1.974549370248743e-05, "loss": 1.1259, "step": 954 }, { "epoch": 0.0999476713762428, "grad_norm": 2.7448374047244326, "learning_rate": 1.9744733258941717e-05, "loss": 0.9836, "step": 955 }, { "epoch": 0.1000523286237572, "grad_norm": 2.955943924297256, "learning_rate": 1.9743971695706398e-05, "loss": 1.1255, "step": 956 }, { "epoch": 0.10015698587127159, "grad_norm": 2.7395719342747897, "learning_rate": 1.9743209012868977e-05, "loss": 1.042, "step": 957 }, { "epoch": 0.10026164311878598, "grad_norm": 2.5186535770163103, "learning_rate": 1.974244521051709e-05, "loss": 0.7976, "step": 958 }, { "epoch": 0.10036630036630037, "grad_norm": 2.499288564217357, "learning_rate": 1.9741680288738495e-05, "loss": 1.0014, "step": 959 }, { "epoch": 0.10047095761381476, "grad_norm": 2.8901978870749008, "learning_rate": 1.9740914247621088e-05, "loss": 1.074, "step": 960 }, { "epoch": 0.10057561486132914, "grad_norm": 2.624567677834044, "learning_rate": 1.9740147087252887e-05, "loss": 1.0408, "step": 961 }, { "epoch": 0.10068027210884353, "grad_norm": 2.1866362133414827, "learning_rate": 1.973937880772204e-05, "loss": 1.0002, "step": 962 }, { "epoch": 0.10078492935635792, "grad_norm": 2.3411382143212793, "learning_rate": 1.9738609409116824e-05, "loss": 0.8504, "step": 963 }, { "epoch": 0.10088958660387232, "grad_norm": 2.9089697665795864, "learning_rate": 1.9737838891525647e-05, "loss": 1.0074, "step": 964 }, { "epoch": 0.10099424385138671, "grad_norm": 2.532467662978773, "learning_rate": 1.973706725503704e-05, "loss": 1.0635, "step": 965 }, { "epoch": 0.1010989010989011, "grad_norm": 2.6762533062616125, "learning_rate": 1.9736294499739667e-05, "loss": 0.9866, "step": 966 }, { "epoch": 0.10120355834641549, "grad_norm": 3.0420458033024658, "learning_rate": 1.9735520625722312e-05, "loss": 1.0371, "step": 967 }, { "epoch": 0.10130821559392988, "grad_norm": 2.5023937580187985, "learning_rate": 1.9734745633073908e-05, "loss": 1.1425, "step": 968 }, { "epoch": 0.10141287284144428, "grad_norm": 2.638941018679516, "learning_rate": 1.9733969521883494e-05, "loss": 1.0191, "step": 969 }, { "epoch": 0.10151753008895865, "grad_norm": 3.0955666704793576, "learning_rate": 1.9733192292240252e-05, "loss": 1.013, "step": 970 }, { "epoch": 0.10162218733647305, "grad_norm": 2.3039556868285223, "learning_rate": 1.973241394423348e-05, "loss": 0.8819, "step": 971 }, { "epoch": 0.10172684458398744, "grad_norm": 2.808971945311129, "learning_rate": 1.973163447795262e-05, "loss": 1.2111, "step": 972 }, { "epoch": 0.10183150183150183, "grad_norm": 2.1370050360048927, "learning_rate": 1.973085389348723e-05, "loss": 0.7627, "step": 973 }, { "epoch": 0.10193615907901622, "grad_norm": 2.597731049761627, "learning_rate": 1.9730072190926996e-05, "loss": 1.1669, "step": 974 }, { "epoch": 0.10204081632653061, "grad_norm": 3.395661508370253, "learning_rate": 1.9729289370361745e-05, "loss": 0.9587, "step": 975 }, { "epoch": 0.102145473574045, "grad_norm": 3.0895550900923294, "learning_rate": 1.9728505431881425e-05, "loss": 0.9229, "step": 976 }, { "epoch": 0.1022501308215594, "grad_norm": 2.458084346576055, "learning_rate": 1.972772037557611e-05, "loss": 1.0632, "step": 977 }, { "epoch": 0.10235478806907378, "grad_norm": 3.0516732311665313, "learning_rate": 1.9726934201535998e-05, "loss": 1.1159, "step": 978 }, { "epoch": 0.10245944531658817, "grad_norm": 2.354235000879386, "learning_rate": 1.972614690985143e-05, "loss": 0.9929, "step": 979 }, { "epoch": 0.10256410256410256, "grad_norm": 3.2111492690142995, "learning_rate": 1.9725358500612865e-05, "loss": 0.9502, "step": 980 }, { "epoch": 0.10266875981161695, "grad_norm": 3.0248285962435895, "learning_rate": 1.9724568973910893e-05, "loss": 1.0119, "step": 981 }, { "epoch": 0.10277341705913134, "grad_norm": 2.770623468886491, "learning_rate": 1.9723778329836235e-05, "loss": 1.0712, "step": 982 }, { "epoch": 0.10287807430664574, "grad_norm": 2.5692351103675364, "learning_rate": 1.9722986568479734e-05, "loss": 1.0985, "step": 983 }, { "epoch": 0.10298273155416013, "grad_norm": 2.8212239759961104, "learning_rate": 1.9722193689932364e-05, "loss": 1.0989, "step": 984 }, { "epoch": 0.10308738880167452, "grad_norm": 2.7660547575806187, "learning_rate": 1.9721399694285227e-05, "loss": 1.0853, "step": 985 }, { "epoch": 0.10319204604918891, "grad_norm": 2.7676555235602995, "learning_rate": 1.9720604581629562e-05, "loss": 1.1553, "step": 986 }, { "epoch": 0.10329670329670329, "grad_norm": 2.487671204295441, "learning_rate": 1.9719808352056728e-05, "loss": 0.9583, "step": 987 }, { "epoch": 0.10340136054421768, "grad_norm": 2.637848541738491, "learning_rate": 1.9719011005658203e-05, "loss": 1.1547, "step": 988 }, { "epoch": 0.10350601779173207, "grad_norm": 2.6148860251102524, "learning_rate": 1.9718212542525616e-05, "loss": 1.0602, "step": 989 }, { "epoch": 0.10361067503924647, "grad_norm": 2.512048499705755, "learning_rate": 1.9717412962750704e-05, "loss": 1.1129, "step": 990 }, { "epoch": 0.10371533228676086, "grad_norm": 2.9398186150069945, "learning_rate": 1.9716612266425343e-05, "loss": 1.0064, "step": 991 }, { "epoch": 0.10381998953427525, "grad_norm": 2.5999608156502245, "learning_rate": 1.971581045364154e-05, "loss": 1.0484, "step": 992 }, { "epoch": 0.10392464678178964, "grad_norm": 2.6014924463154743, "learning_rate": 1.9715007524491413e-05, "loss": 0.9897, "step": 993 }, { "epoch": 0.10402930402930403, "grad_norm": 2.361645305711183, "learning_rate": 1.9714203479067232e-05, "loss": 1.1281, "step": 994 }, { "epoch": 0.10413396127681843, "grad_norm": 2.5112783788806685, "learning_rate": 1.9713398317461377e-05, "loss": 0.8673, "step": 995 }, { "epoch": 0.1042386185243328, "grad_norm": 2.7601681785587373, "learning_rate": 1.9712592039766364e-05, "loss": 1.1427, "step": 996 }, { "epoch": 0.1043432757718472, "grad_norm": 2.9435423523320696, "learning_rate": 1.9711784646074837e-05, "loss": 1.0373, "step": 997 }, { "epoch": 0.10444793301936159, "grad_norm": 2.513526513093683, "learning_rate": 1.9710976136479565e-05, "loss": 1.0433, "step": 998 }, { "epoch": 0.10455259026687598, "grad_norm": 2.8188327082080575, "learning_rate": 1.971016651107345e-05, "loss": 1.0745, "step": 999 }, { "epoch": 0.10465724751439037, "grad_norm": 2.713657193992208, "learning_rate": 1.9709355769949515e-05, "loss": 1.1284, "step": 1000 }, { "epoch": 0.10476190476190476, "grad_norm": 3.0415166367388573, "learning_rate": 1.9708543913200925e-05, "loss": 1.0857, "step": 1001 }, { "epoch": 0.10486656200941916, "grad_norm": 3.0280839509415074, "learning_rate": 1.9707730940920955e-05, "loss": 1.1141, "step": 1002 }, { "epoch": 0.10497121925693355, "grad_norm": 2.94662887338583, "learning_rate": 1.970691685320302e-05, "loss": 1.0994, "step": 1003 }, { "epoch": 0.10507587650444793, "grad_norm": 2.610110149296357, "learning_rate": 1.9706101650140657e-05, "loss": 1.1039, "step": 1004 }, { "epoch": 0.10518053375196232, "grad_norm": 3.192863824515813, "learning_rate": 1.9705285331827544e-05, "loss": 1.0594, "step": 1005 }, { "epoch": 0.10528519099947671, "grad_norm": 3.0083558422084393, "learning_rate": 1.9704467898357473e-05, "loss": 0.9451, "step": 1006 }, { "epoch": 0.1053898482469911, "grad_norm": 2.692443979020099, "learning_rate": 1.970364934982436e-05, "loss": 1.0166, "step": 1007 }, { "epoch": 0.1054945054945055, "grad_norm": 2.473816517340895, "learning_rate": 1.9702829686322272e-05, "loss": 1.0046, "step": 1008 }, { "epoch": 0.10559916274201989, "grad_norm": 2.5772774995170136, "learning_rate": 1.970200890794538e-05, "loss": 1.0931, "step": 1009 }, { "epoch": 0.10570381998953428, "grad_norm": 2.4344331305938423, "learning_rate": 1.9701187014787994e-05, "loss": 0.9071, "step": 1010 }, { "epoch": 0.10580847723704867, "grad_norm": 2.49913692348178, "learning_rate": 1.970036400694456e-05, "loss": 1.0385, "step": 1011 }, { "epoch": 0.10591313448456306, "grad_norm": 2.7676304166369197, "learning_rate": 1.9699539884509632e-05, "loss": 0.9947, "step": 1012 }, { "epoch": 0.10601779173207744, "grad_norm": 2.653669677176504, "learning_rate": 1.969871464757791e-05, "loss": 1.0598, "step": 1013 }, { "epoch": 0.10612244897959183, "grad_norm": 3.1238605727049733, "learning_rate": 1.9697888296244214e-05, "loss": 1.1389, "step": 1014 }, { "epoch": 0.10622710622710622, "grad_norm": 2.9772018563723672, "learning_rate": 1.9697060830603495e-05, "loss": 1.0753, "step": 1015 }, { "epoch": 0.10633176347462062, "grad_norm": 2.668071509791539, "learning_rate": 1.9696232250750823e-05, "loss": 1.0608, "step": 1016 }, { "epoch": 0.10643642072213501, "grad_norm": 2.589387863961408, "learning_rate": 1.9695402556781414e-05, "loss": 1.0848, "step": 1017 }, { "epoch": 0.1065410779696494, "grad_norm": 2.575997267076478, "learning_rate": 1.9694571748790593e-05, "loss": 1.0157, "step": 1018 }, { "epoch": 0.10664573521716379, "grad_norm": 2.4804280288072222, "learning_rate": 1.9693739826873828e-05, "loss": 0.9616, "step": 1019 }, { "epoch": 0.10675039246467818, "grad_norm": 2.8490573537756854, "learning_rate": 1.9692906791126708e-05, "loss": 1.0175, "step": 1020 }, { "epoch": 0.10685504971219256, "grad_norm": 2.7446287985468367, "learning_rate": 1.9692072641644945e-05, "loss": 1.0971, "step": 1021 }, { "epoch": 0.10695970695970695, "grad_norm": 3.0448502861545292, "learning_rate": 1.9691237378524387e-05, "loss": 1.014, "step": 1022 }, { "epoch": 0.10706436420722135, "grad_norm": 2.6319835236235622, "learning_rate": 1.9690401001861006e-05, "loss": 1.0825, "step": 1023 }, { "epoch": 0.10716902145473574, "grad_norm": 2.4456143448333547, "learning_rate": 1.9689563511750907e-05, "loss": 1.1329, "step": 1024 }, { "epoch": 0.10727367870225013, "grad_norm": 3.059839362265981, "learning_rate": 1.9688724908290318e-05, "loss": 1.1094, "step": 1025 }, { "epoch": 0.10737833594976452, "grad_norm": 2.4535767216133797, "learning_rate": 1.9687885191575597e-05, "loss": 0.8073, "step": 1026 }, { "epoch": 0.10748299319727891, "grad_norm": 2.2330132410114656, "learning_rate": 1.9687044361703228e-05, "loss": 0.9921, "step": 1027 }, { "epoch": 0.1075876504447933, "grad_norm": 2.6139103762274876, "learning_rate": 1.9686202418769823e-05, "loss": 1.0012, "step": 1028 }, { "epoch": 0.1076923076923077, "grad_norm": 2.7091370591165576, "learning_rate": 1.9685359362872124e-05, "loss": 1.2318, "step": 1029 }, { "epoch": 0.10779696493982208, "grad_norm": 2.414700209712605, "learning_rate": 1.9684515194107004e-05, "loss": 1.0775, "step": 1030 }, { "epoch": 0.10790162218733647, "grad_norm": 2.6192205052365236, "learning_rate": 1.9683669912571452e-05, "loss": 1.0712, "step": 1031 }, { "epoch": 0.10800627943485086, "grad_norm": 2.737341412891626, "learning_rate": 1.96828235183626e-05, "loss": 1.0652, "step": 1032 }, { "epoch": 0.10811093668236525, "grad_norm": 2.465865978038424, "learning_rate": 1.968197601157769e-05, "loss": 1.124, "step": 1033 }, { "epoch": 0.10821559392987964, "grad_norm": 2.4388907774406574, "learning_rate": 1.968112739231411e-05, "loss": 1.0664, "step": 1034 }, { "epoch": 0.10832025117739404, "grad_norm": 2.412351278224597, "learning_rate": 1.968027766066937e-05, "loss": 1.0165, "step": 1035 }, { "epoch": 0.10842490842490843, "grad_norm": 3.061388366092205, "learning_rate": 1.9679426816741102e-05, "loss": 1.2182, "step": 1036 }, { "epoch": 0.10852956567242282, "grad_norm": 2.674591671542277, "learning_rate": 1.967857486062707e-05, "loss": 0.8857, "step": 1037 }, { "epoch": 0.10863422291993721, "grad_norm": 2.6778191321571, "learning_rate": 1.9677721792425167e-05, "loss": 1.1491, "step": 1038 }, { "epoch": 0.10873888016745159, "grad_norm": 2.591446579098128, "learning_rate": 1.967686761223341e-05, "loss": 0.9744, "step": 1039 }, { "epoch": 0.10884353741496598, "grad_norm": 2.7917814478780314, "learning_rate": 1.967601232014995e-05, "loss": 0.9032, "step": 1040 }, { "epoch": 0.10894819466248037, "grad_norm": 2.2711122801371517, "learning_rate": 1.9675155916273057e-05, "loss": 1.0988, "step": 1041 }, { "epoch": 0.10905285190999477, "grad_norm": 2.6881776652627645, "learning_rate": 1.9674298400701137e-05, "loss": 0.997, "step": 1042 }, { "epoch": 0.10915750915750916, "grad_norm": 2.4975909079275502, "learning_rate": 1.9673439773532714e-05, "loss": 0.9299, "step": 1043 }, { "epoch": 0.10926216640502355, "grad_norm": 2.710402657049504, "learning_rate": 1.9672580034866455e-05, "loss": 1.0081, "step": 1044 }, { "epoch": 0.10936682365253794, "grad_norm": 2.643549698589489, "learning_rate": 1.9671719184801144e-05, "loss": 1.0695, "step": 1045 }, { "epoch": 0.10947148090005233, "grad_norm": 2.7508161610052246, "learning_rate": 1.9670857223435688e-05, "loss": 1.112, "step": 1046 }, { "epoch": 0.10957613814756671, "grad_norm": 2.5165912885846113, "learning_rate": 1.9669994150869134e-05, "loss": 0.9325, "step": 1047 }, { "epoch": 0.1096807953950811, "grad_norm": 2.4710904066047616, "learning_rate": 1.9669129967200648e-05, "loss": 1.1537, "step": 1048 }, { "epoch": 0.1097854526425955, "grad_norm": 3.0975055499064514, "learning_rate": 1.9668264672529528e-05, "loss": 1.0, "step": 1049 }, { "epoch": 0.10989010989010989, "grad_norm": 2.253295288672016, "learning_rate": 1.9667398266955195e-05, "loss": 1.0463, "step": 1050 }, { "epoch": 0.10999476713762428, "grad_norm": 2.6293412785828014, "learning_rate": 1.9666530750577207e-05, "loss": 0.9453, "step": 1051 }, { "epoch": 0.11009942438513867, "grad_norm": 2.1033760444602727, "learning_rate": 1.966566212349524e-05, "loss": 0.9526, "step": 1052 }, { "epoch": 0.11020408163265306, "grad_norm": 2.5610804233496127, "learning_rate": 1.9664792385809094e-05, "loss": 0.9974, "step": 1053 }, { "epoch": 0.11030873888016746, "grad_norm": 2.5769705235076934, "learning_rate": 1.9663921537618717e-05, "loss": 1.0322, "step": 1054 }, { "epoch": 0.11041339612768185, "grad_norm": 2.4860849473694935, "learning_rate": 1.9663049579024162e-05, "loss": 1.0821, "step": 1055 }, { "epoch": 0.11051805337519623, "grad_norm": 2.556645999664551, "learning_rate": 1.966217651012562e-05, "loss": 0.9631, "step": 1056 }, { "epoch": 0.11062271062271062, "grad_norm": 2.69622491307049, "learning_rate": 1.9661302331023413e-05, "loss": 0.963, "step": 1057 }, { "epoch": 0.11072736787022501, "grad_norm": 2.322114587301298, "learning_rate": 1.9660427041817982e-05, "loss": 0.77, "step": 1058 }, { "epoch": 0.1108320251177394, "grad_norm": 2.086861967831109, "learning_rate": 1.9659550642609898e-05, "loss": 0.8174, "step": 1059 }, { "epoch": 0.1109366823652538, "grad_norm": 2.5465767908188, "learning_rate": 1.9658673133499868e-05, "loss": 1.1169, "step": 1060 }, { "epoch": 0.11104133961276819, "grad_norm": 2.3306210424010865, "learning_rate": 1.9657794514588708e-05, "loss": 1.0342, "step": 1061 }, { "epoch": 0.11114599686028258, "grad_norm": 4.169058819176632, "learning_rate": 1.9656914785977386e-05, "loss": 1.0828, "step": 1062 }, { "epoch": 0.11125065410779697, "grad_norm": 2.974406996118035, "learning_rate": 1.9656033947766974e-05, "loss": 1.1862, "step": 1063 }, { "epoch": 0.11135531135531136, "grad_norm": 2.263717528826466, "learning_rate": 1.965515200005869e-05, "loss": 0.8976, "step": 1064 }, { "epoch": 0.11145996860282574, "grad_norm": 2.1583188268439297, "learning_rate": 1.9654268942953867e-05, "loss": 0.9869, "step": 1065 }, { "epoch": 0.11156462585034013, "grad_norm": 2.6538687038608715, "learning_rate": 1.965338477655397e-05, "loss": 1.015, "step": 1066 }, { "epoch": 0.11166928309785452, "grad_norm": 3.088871449306695, "learning_rate": 1.9652499500960594e-05, "loss": 1.0569, "step": 1067 }, { "epoch": 0.11177394034536892, "grad_norm": 2.576832115600818, "learning_rate": 1.9651613116275457e-05, "loss": 0.8432, "step": 1068 }, { "epoch": 0.11187859759288331, "grad_norm": 2.110328498949912, "learning_rate": 1.9650725622600407e-05, "loss": 0.9884, "step": 1069 }, { "epoch": 0.1119832548403977, "grad_norm": 2.790857879613387, "learning_rate": 1.964983702003742e-05, "loss": 1.074, "step": 1070 }, { "epoch": 0.11208791208791209, "grad_norm": 2.9160190675466158, "learning_rate": 1.9648947308688594e-05, "loss": 1.1648, "step": 1071 }, { "epoch": 0.11219256933542648, "grad_norm": 2.122470818651794, "learning_rate": 1.9648056488656166e-05, "loss": 0.8107, "step": 1072 }, { "epoch": 0.11229722658294086, "grad_norm": 2.579496972713126, "learning_rate": 1.9647164560042486e-05, "loss": 1.1268, "step": 1073 }, { "epoch": 0.11240188383045525, "grad_norm": 3.122189191320992, "learning_rate": 1.9646271522950043e-05, "loss": 1.0926, "step": 1074 }, { "epoch": 0.11250654107796965, "grad_norm": 2.596905120190331, "learning_rate": 1.9645377377481446e-05, "loss": 1.0616, "step": 1075 }, { "epoch": 0.11261119832548404, "grad_norm": 2.851456857123207, "learning_rate": 1.9644482123739434e-05, "loss": 1.1331, "step": 1076 }, { "epoch": 0.11271585557299843, "grad_norm": 2.4375447703917223, "learning_rate": 1.964358576182688e-05, "loss": 0.9973, "step": 1077 }, { "epoch": 0.11282051282051282, "grad_norm": 2.388922166429233, "learning_rate": 1.9642688291846762e-05, "loss": 0.911, "step": 1078 }, { "epoch": 0.11292517006802721, "grad_norm": 2.6304965592524328, "learning_rate": 1.9641789713902223e-05, "loss": 1.0654, "step": 1079 }, { "epoch": 0.1130298273155416, "grad_norm": 2.4302290806051277, "learning_rate": 1.9640890028096492e-05, "loss": 1.0569, "step": 1080 }, { "epoch": 0.113134484563056, "grad_norm": 2.2285317412672745, "learning_rate": 1.9639989234532955e-05, "loss": 1.1056, "step": 1081 }, { "epoch": 0.11323914181057038, "grad_norm": 2.5935496575040693, "learning_rate": 1.9639087333315113e-05, "loss": 1.0176, "step": 1082 }, { "epoch": 0.11334379905808477, "grad_norm": 2.699922891159306, "learning_rate": 1.9638184324546597e-05, "loss": 1.1053, "step": 1083 }, { "epoch": 0.11344845630559916, "grad_norm": 3.2198916620549354, "learning_rate": 1.963728020833116e-05, "loss": 1.1036, "step": 1084 }, { "epoch": 0.11355311355311355, "grad_norm": 2.6375927221507145, "learning_rate": 1.963637498477269e-05, "loss": 0.9988, "step": 1085 }, { "epoch": 0.11365777080062794, "grad_norm": 2.938305615487851, "learning_rate": 1.9635468653975203e-05, "loss": 1.166, "step": 1086 }, { "epoch": 0.11376242804814234, "grad_norm": 2.2156017547233295, "learning_rate": 1.9634561216042834e-05, "loss": 1.0057, "step": 1087 }, { "epoch": 0.11386708529565673, "grad_norm": 2.4105372096002755, "learning_rate": 1.9633652671079854e-05, "loss": 0.9225, "step": 1088 }, { "epoch": 0.11397174254317112, "grad_norm": 2.412529782609449, "learning_rate": 1.9632743019190646e-05, "loss": 0.9356, "step": 1089 }, { "epoch": 0.1140763997906855, "grad_norm": 2.738234258928923, "learning_rate": 1.9631832260479736e-05, "loss": 1.1259, "step": 1090 }, { "epoch": 0.11418105703819989, "grad_norm": 2.295890829748179, "learning_rate": 1.963092039505178e-05, "loss": 1.0476, "step": 1091 }, { "epoch": 0.11428571428571428, "grad_norm": 2.4190921434998574, "learning_rate": 1.9630007423011543e-05, "loss": 1.0351, "step": 1092 }, { "epoch": 0.11439037153322867, "grad_norm": 2.5227441453952033, "learning_rate": 1.962909334446393e-05, "loss": 1.0712, "step": 1093 }, { "epoch": 0.11449502878074307, "grad_norm": 2.5782201716213775, "learning_rate": 1.9628178159513976e-05, "loss": 0.8372, "step": 1094 }, { "epoch": 0.11459968602825746, "grad_norm": 2.4279239527241367, "learning_rate": 1.962726186826683e-05, "loss": 1.0871, "step": 1095 }, { "epoch": 0.11470434327577185, "grad_norm": 2.2289981767762734, "learning_rate": 1.962634447082778e-05, "loss": 1.0551, "step": 1096 }, { "epoch": 0.11480900052328624, "grad_norm": 2.7919927641730387, "learning_rate": 1.9625425967302232e-05, "loss": 1.0695, "step": 1097 }, { "epoch": 0.11491365777080063, "grad_norm": 2.567716154128025, "learning_rate": 1.962450635779573e-05, "loss": 1.1617, "step": 1098 }, { "epoch": 0.11501831501831501, "grad_norm": 2.449051302816616, "learning_rate": 1.962358564241394e-05, "loss": 1.0829, "step": 1099 }, { "epoch": 0.1151229722658294, "grad_norm": 2.4898532560578728, "learning_rate": 1.9622663821262647e-05, "loss": 1.1857, "step": 1100 }, { "epoch": 0.1152276295133438, "grad_norm": 2.508714185849526, "learning_rate": 1.9621740894447776e-05, "loss": 0.9801, "step": 1101 }, { "epoch": 0.11533228676085819, "grad_norm": 2.864088004419296, "learning_rate": 1.9620816862075365e-05, "loss": 0.9741, "step": 1102 }, { "epoch": 0.11543694400837258, "grad_norm": 2.2451938017369457, "learning_rate": 1.9619891724251602e-05, "loss": 0.9874, "step": 1103 }, { "epoch": 0.11554160125588697, "grad_norm": 2.43522131047157, "learning_rate": 1.961896548108277e-05, "loss": 1.0198, "step": 1104 }, { "epoch": 0.11564625850340136, "grad_norm": 2.6482111396981196, "learning_rate": 1.961803813267531e-05, "loss": 1.0065, "step": 1105 }, { "epoch": 0.11575091575091576, "grad_norm": 2.663006273262908, "learning_rate": 1.961710967913577e-05, "loss": 1.0731, "step": 1106 }, { "epoch": 0.11585557299843015, "grad_norm": 2.2592261853498745, "learning_rate": 1.9616180120570833e-05, "loss": 1.0667, "step": 1107 }, { "epoch": 0.11596023024594453, "grad_norm": 2.5579103444641587, "learning_rate": 1.9615249457087302e-05, "loss": 1.0649, "step": 1108 }, { "epoch": 0.11606488749345892, "grad_norm": 2.5681709092759846, "learning_rate": 1.9614317688792118e-05, "loss": 1.0629, "step": 1109 }, { "epoch": 0.11616954474097331, "grad_norm": 2.7062909391638468, "learning_rate": 1.9613384815792343e-05, "loss": 1.0538, "step": 1110 }, { "epoch": 0.1162742019884877, "grad_norm": 3.1506469607183423, "learning_rate": 1.9612450838195164e-05, "loss": 0.9414, "step": 1111 }, { "epoch": 0.1163788592360021, "grad_norm": 2.771093053485509, "learning_rate": 1.9611515756107895e-05, "loss": 1.0721, "step": 1112 }, { "epoch": 0.11648351648351649, "grad_norm": 2.6299240759159033, "learning_rate": 1.9610579569637982e-05, "loss": 1.0506, "step": 1113 }, { "epoch": 0.11658817373103088, "grad_norm": 2.718685548324005, "learning_rate": 1.9609642278892992e-05, "loss": 1.0423, "step": 1114 }, { "epoch": 0.11669283097854527, "grad_norm": 2.617103354933396, "learning_rate": 1.9608703883980625e-05, "loss": 1.135, "step": 1115 }, { "epoch": 0.11679748822605965, "grad_norm": 2.8284262061059775, "learning_rate": 1.9607764385008706e-05, "loss": 0.9975, "step": 1116 }, { "epoch": 0.11690214547357404, "grad_norm": 2.422530563865842, "learning_rate": 1.9606823782085178e-05, "loss": 0.9965, "step": 1117 }, { "epoch": 0.11700680272108843, "grad_norm": 2.7309355633540022, "learning_rate": 1.9605882075318124e-05, "loss": 1.0416, "step": 1118 }, { "epoch": 0.11711145996860282, "grad_norm": 2.41740256806887, "learning_rate": 1.9604939264815746e-05, "loss": 1.091, "step": 1119 }, { "epoch": 0.11721611721611722, "grad_norm": 2.6049047174023565, "learning_rate": 1.960399535068638e-05, "loss": 0.9203, "step": 1120 }, { "epoch": 0.11732077446363161, "grad_norm": 2.352711031496301, "learning_rate": 1.9603050333038473e-05, "loss": 1.0542, "step": 1121 }, { "epoch": 0.117425431711146, "grad_norm": 3.3295305172916727, "learning_rate": 1.9602104211980614e-05, "loss": 0.9534, "step": 1122 }, { "epoch": 0.11753008895866039, "grad_norm": 3.1546428345111197, "learning_rate": 1.960115698762152e-05, "loss": 1.0338, "step": 1123 }, { "epoch": 0.11763474620617478, "grad_norm": 2.728833024559098, "learning_rate": 1.9600208660070022e-05, "loss": 1.131, "step": 1124 }, { "epoch": 0.11773940345368916, "grad_norm": 2.7185354106465236, "learning_rate": 1.9599259229435088e-05, "loss": 0.9256, "step": 1125 }, { "epoch": 0.11784406070120355, "grad_norm": 2.498618364520084, "learning_rate": 1.959830869582581e-05, "loss": 1.0662, "step": 1126 }, { "epoch": 0.11794871794871795, "grad_norm": 2.495040333087842, "learning_rate": 1.9597357059351404e-05, "loss": 1.0717, "step": 1127 }, { "epoch": 0.11805337519623234, "grad_norm": 2.623375696161012, "learning_rate": 1.9596404320121217e-05, "loss": 1.0103, "step": 1128 }, { "epoch": 0.11815803244374673, "grad_norm": 2.4391065346519496, "learning_rate": 1.9595450478244718e-05, "loss": 0.9587, "step": 1129 }, { "epoch": 0.11826268969126112, "grad_norm": 2.663286662491619, "learning_rate": 1.9594495533831507e-05, "loss": 1.1976, "step": 1130 }, { "epoch": 0.11836734693877551, "grad_norm": 2.5842407873088766, "learning_rate": 1.959353948699131e-05, "loss": 1.1649, "step": 1131 }, { "epoch": 0.1184720041862899, "grad_norm": 2.5723829154298534, "learning_rate": 1.959258233783398e-05, "loss": 1.1408, "step": 1132 }, { "epoch": 0.11857666143380428, "grad_norm": 2.378313766984001, "learning_rate": 1.959162408646949e-05, "loss": 1.0323, "step": 1133 }, { "epoch": 0.11868131868131868, "grad_norm": 2.561592915095796, "learning_rate": 1.9590664733007947e-05, "loss": 1.0473, "step": 1134 }, { "epoch": 0.11878597592883307, "grad_norm": 2.6784137838787765, "learning_rate": 1.9589704277559586e-05, "loss": 0.9144, "step": 1135 }, { "epoch": 0.11889063317634746, "grad_norm": 2.4050436698772057, "learning_rate": 1.958874272023476e-05, "loss": 0.9927, "step": 1136 }, { "epoch": 0.11899529042386185, "grad_norm": 2.787218387154808, "learning_rate": 1.9587780061143965e-05, "loss": 1.112, "step": 1137 }, { "epoch": 0.11909994767137624, "grad_norm": 2.467244551948357, "learning_rate": 1.9586816300397796e-05, "loss": 1.0967, "step": 1138 }, { "epoch": 0.11920460491889064, "grad_norm": 2.974817620992224, "learning_rate": 1.9585851438107007e-05, "loss": 1.0886, "step": 1139 }, { "epoch": 0.11930926216640503, "grad_norm": 2.7737239161164906, "learning_rate": 1.958488547438245e-05, "loss": 0.9261, "step": 1140 }, { "epoch": 0.11941391941391942, "grad_norm": 3.0863679881725132, "learning_rate": 1.958391840933512e-05, "loss": 0.8894, "step": 1141 }, { "epoch": 0.1195185766614338, "grad_norm": 2.2653693939706403, "learning_rate": 1.958295024307614e-05, "loss": 1.0348, "step": 1142 }, { "epoch": 0.11962323390894819, "grad_norm": 2.754842275125979, "learning_rate": 1.958198097571675e-05, "loss": 1.075, "step": 1143 }, { "epoch": 0.11972789115646258, "grad_norm": 2.832206040807847, "learning_rate": 1.9581010607368324e-05, "loss": 1.104, "step": 1144 }, { "epoch": 0.11983254840397697, "grad_norm": 2.6220443857258044, "learning_rate": 1.958003913814235e-05, "loss": 1.0461, "step": 1145 }, { "epoch": 0.11993720565149137, "grad_norm": 3.167797177471765, "learning_rate": 1.957906656815046e-05, "loss": 1.2079, "step": 1146 }, { "epoch": 0.12004186289900576, "grad_norm": 2.5145650658616647, "learning_rate": 1.9578092897504404e-05, "loss": 1.1371, "step": 1147 }, { "epoch": 0.12014652014652015, "grad_norm": 2.532990354557582, "learning_rate": 1.957711812631606e-05, "loss": 0.847, "step": 1148 }, { "epoch": 0.12025117739403454, "grad_norm": 2.498723144014304, "learning_rate": 1.9576142254697422e-05, "loss": 1.0768, "step": 1149 }, { "epoch": 0.12035583464154893, "grad_norm": 2.8848332638507608, "learning_rate": 1.957516528276063e-05, "loss": 1.0935, "step": 1150 }, { "epoch": 0.12046049188906331, "grad_norm": 2.407079189345069, "learning_rate": 1.9574187210617935e-05, "loss": 0.9884, "step": 1151 }, { "epoch": 0.1205651491365777, "grad_norm": 2.4789862581848325, "learning_rate": 1.9573208038381722e-05, "loss": 1.0612, "step": 1152 }, { "epoch": 0.1206698063840921, "grad_norm": 2.634050308370591, "learning_rate": 1.95722277661645e-05, "loss": 1.055, "step": 1153 }, { "epoch": 0.12077446363160649, "grad_norm": 2.646401341815591, "learning_rate": 1.95712463940789e-05, "loss": 1.1474, "step": 1154 }, { "epoch": 0.12087912087912088, "grad_norm": 2.8047167361076957, "learning_rate": 1.9570263922237686e-05, "loss": 1.0503, "step": 1155 }, { "epoch": 0.12098377812663527, "grad_norm": 2.5741451106686317, "learning_rate": 1.956928035075375e-05, "loss": 0.9393, "step": 1156 }, { "epoch": 0.12108843537414966, "grad_norm": 2.5973811680350045, "learning_rate": 1.9568295679740098e-05, "loss": 1.1236, "step": 1157 }, { "epoch": 0.12119309262166406, "grad_norm": 2.6113678867025616, "learning_rate": 1.956730990930988e-05, "loss": 1.0036, "step": 1158 }, { "epoch": 0.12129774986917843, "grad_norm": 2.763270013817754, "learning_rate": 1.9566323039576353e-05, "loss": 1.1911, "step": 1159 }, { "epoch": 0.12140240711669283, "grad_norm": 2.705626562419259, "learning_rate": 1.956533507065292e-05, "loss": 1.2495, "step": 1160 }, { "epoch": 0.12150706436420722, "grad_norm": 2.5695027901999175, "learning_rate": 1.9564346002653094e-05, "loss": 1.0895, "step": 1161 }, { "epoch": 0.12161172161172161, "grad_norm": 2.688239436865601, "learning_rate": 1.9563355835690526e-05, "loss": 1.066, "step": 1162 }, { "epoch": 0.121716378859236, "grad_norm": 2.4721295753539847, "learning_rate": 1.9562364569878985e-05, "loss": 0.9474, "step": 1163 }, { "epoch": 0.1218210361067504, "grad_norm": 2.9695025162758064, "learning_rate": 1.9561372205332366e-05, "loss": 1.1938, "step": 1164 }, { "epoch": 0.12192569335426479, "grad_norm": 2.3922931991769927, "learning_rate": 1.95603787421647e-05, "loss": 0.931, "step": 1165 }, { "epoch": 0.12203035060177918, "grad_norm": 2.533155414113076, "learning_rate": 1.9559384180490138e-05, "loss": 1.0848, "step": 1166 }, { "epoch": 0.12213500784929357, "grad_norm": 2.7804617711740462, "learning_rate": 1.9558388520422955e-05, "loss": 1.1158, "step": 1167 }, { "epoch": 0.12223966509680795, "grad_norm": 3.07185941955718, "learning_rate": 1.955739176207755e-05, "loss": 1.1685, "step": 1168 }, { "epoch": 0.12234432234432234, "grad_norm": 2.632035250919555, "learning_rate": 1.955639390556846e-05, "loss": 1.0409, "step": 1169 }, { "epoch": 0.12244897959183673, "grad_norm": 2.573941413049372, "learning_rate": 1.9555394951010337e-05, "loss": 1.2533, "step": 1170 }, { "epoch": 0.12255363683935112, "grad_norm": 2.5319580043800687, "learning_rate": 1.9554394898517962e-05, "loss": 1.1681, "step": 1171 }, { "epoch": 0.12265829408686552, "grad_norm": 2.983393734826706, "learning_rate": 1.9553393748206245e-05, "loss": 1.0353, "step": 1172 }, { "epoch": 0.12276295133437991, "grad_norm": 2.5654991130101776, "learning_rate": 1.955239150019022e-05, "loss": 1.1485, "step": 1173 }, { "epoch": 0.1228676085818943, "grad_norm": 2.577167856594292, "learning_rate": 1.9551388154585045e-05, "loss": 1.0843, "step": 1174 }, { "epoch": 0.12297226582940869, "grad_norm": 2.672800031307482, "learning_rate": 1.955038371150601e-05, "loss": 1.0571, "step": 1175 }, { "epoch": 0.12307692307692308, "grad_norm": 2.2255667081208133, "learning_rate": 1.954937817106853e-05, "loss": 0.9892, "step": 1176 }, { "epoch": 0.12318158032443746, "grad_norm": 2.5034867735183304, "learning_rate": 1.9548371533388134e-05, "loss": 1.1986, "step": 1177 }, { "epoch": 0.12328623757195185, "grad_norm": 2.8720211636190043, "learning_rate": 1.954736379858049e-05, "loss": 1.2097, "step": 1178 }, { "epoch": 0.12339089481946625, "grad_norm": 2.8330847231206047, "learning_rate": 1.9546354966761397e-05, "loss": 1.115, "step": 1179 }, { "epoch": 0.12349555206698064, "grad_norm": 2.4195461792808306, "learning_rate": 1.9545345038046762e-05, "loss": 1.0036, "step": 1180 }, { "epoch": 0.12360020931449503, "grad_norm": 2.906126283855808, "learning_rate": 1.9544334012552633e-05, "loss": 1.0183, "step": 1181 }, { "epoch": 0.12370486656200942, "grad_norm": 2.5436235281065698, "learning_rate": 1.954332189039518e-05, "loss": 0.9942, "step": 1182 }, { "epoch": 0.12380952380952381, "grad_norm": 2.826315997445539, "learning_rate": 1.954230867169069e-05, "loss": 1.2136, "step": 1183 }, { "epoch": 0.1239141810570382, "grad_norm": 2.7260954242001514, "learning_rate": 1.9541294356555594e-05, "loss": 0.9941, "step": 1184 }, { "epoch": 0.12401883830455258, "grad_norm": 3.1932984486388234, "learning_rate": 1.9540278945106433e-05, "loss": 1.065, "step": 1185 }, { "epoch": 0.12412349555206698, "grad_norm": 2.3449880675934884, "learning_rate": 1.953926243745988e-05, "loss": 0.965, "step": 1186 }, { "epoch": 0.12422815279958137, "grad_norm": 2.475504471283617, "learning_rate": 1.9538244833732737e-05, "loss": 1.0389, "step": 1187 }, { "epoch": 0.12433281004709576, "grad_norm": 2.648680996485242, "learning_rate": 1.9537226134041925e-05, "loss": 1.1165, "step": 1188 }, { "epoch": 0.12443746729461015, "grad_norm": 2.658874814055567, "learning_rate": 1.9536206338504494e-05, "loss": 1.0662, "step": 1189 }, { "epoch": 0.12454212454212454, "grad_norm": 2.486917672270425, "learning_rate": 1.9535185447237626e-05, "loss": 1.0065, "step": 1190 }, { "epoch": 0.12464678178963894, "grad_norm": 2.522352267870716, "learning_rate": 1.953416346035862e-05, "loss": 1.0536, "step": 1191 }, { "epoch": 0.12475143903715333, "grad_norm": 2.595719707912744, "learning_rate": 1.9533140377984906e-05, "loss": 1.0279, "step": 1192 }, { "epoch": 0.12485609628466772, "grad_norm": 2.7586476277587315, "learning_rate": 1.9532116200234034e-05, "loss": 0.997, "step": 1193 }, { "epoch": 0.1249607535321821, "grad_norm": 2.323954362886985, "learning_rate": 1.9531090927223687e-05, "loss": 1.0201, "step": 1194 }, { "epoch": 0.1250654107796965, "grad_norm": 2.3865025964537656, "learning_rate": 1.9530064559071672e-05, "loss": 1.0528, "step": 1195 }, { "epoch": 0.1251700680272109, "grad_norm": 2.4307960677955838, "learning_rate": 1.952903709589592e-05, "loss": 1.0709, "step": 1196 }, { "epoch": 0.12527472527472527, "grad_norm": 2.51314339549336, "learning_rate": 1.9528008537814488e-05, "loss": 1.1309, "step": 1197 }, { "epoch": 0.12537938252223965, "grad_norm": 2.4769743325065594, "learning_rate": 1.952697888494556e-05, "loss": 1.144, "step": 1198 }, { "epoch": 0.12548403976975406, "grad_norm": 2.358721280245034, "learning_rate": 1.9525948137407443e-05, "loss": 0.9865, "step": 1199 }, { "epoch": 0.12558869701726844, "grad_norm": 2.3206179689553723, "learning_rate": 1.9524916295318576e-05, "loss": 1.0968, "step": 1200 }, { "epoch": 0.12569335426478284, "grad_norm": 2.316497811222147, "learning_rate": 1.9523883358797517e-05, "loss": 0.9474, "step": 1201 }, { "epoch": 0.12579801151229722, "grad_norm": 2.706485454764553, "learning_rate": 1.9522849327962954e-05, "loss": 1.0489, "step": 1202 }, { "epoch": 0.12590266875981163, "grad_norm": 2.838226256388987, "learning_rate": 1.9521814202933697e-05, "loss": 1.1056, "step": 1203 }, { "epoch": 0.126007326007326, "grad_norm": 2.6389275404903327, "learning_rate": 1.952077798382869e-05, "loss": 1.0294, "step": 1204 }, { "epoch": 0.1261119832548404, "grad_norm": 2.24218821930294, "learning_rate": 1.9519740670766985e-05, "loss": 1.059, "step": 1205 }, { "epoch": 0.1262166405023548, "grad_norm": 2.3594941049558176, "learning_rate": 1.951870226386778e-05, "loss": 1.1141, "step": 1206 }, { "epoch": 0.12632129774986917, "grad_norm": 2.867410423081424, "learning_rate": 1.951766276325039e-05, "loss": 1.1167, "step": 1207 }, { "epoch": 0.12642595499738357, "grad_norm": 2.360366489000132, "learning_rate": 1.9516622169034253e-05, "loss": 1.0121, "step": 1208 }, { "epoch": 0.12653061224489795, "grad_norm": 2.8918870260240634, "learning_rate": 1.951558048133894e-05, "loss": 0.9879, "step": 1209 }, { "epoch": 0.12663526949241236, "grad_norm": 2.470183042311598, "learning_rate": 1.951453770028414e-05, "loss": 1.0716, "step": 1210 }, { "epoch": 0.12673992673992673, "grad_norm": 2.5896489389181374, "learning_rate": 1.9513493825989664e-05, "loss": 1.078, "step": 1211 }, { "epoch": 0.12684458398744114, "grad_norm": 2.476294549537629, "learning_rate": 1.9512448858575466e-05, "loss": 0.9433, "step": 1212 }, { "epoch": 0.12694924123495552, "grad_norm": 2.678105912674675, "learning_rate": 1.951140279816161e-05, "loss": 1.0967, "step": 1213 }, { "epoch": 0.12705389848246992, "grad_norm": 2.837977652142853, "learning_rate": 1.951035564486829e-05, "loss": 1.0051, "step": 1214 }, { "epoch": 0.1271585557299843, "grad_norm": 2.836479859442291, "learning_rate": 1.950930739881583e-05, "loss": 1.0707, "step": 1215 }, { "epoch": 0.12726321297749868, "grad_norm": 2.2675840070291633, "learning_rate": 1.9508258060124668e-05, "loss": 1.0395, "step": 1216 }, { "epoch": 0.12736787022501309, "grad_norm": 3.2573289605017974, "learning_rate": 1.950720762891538e-05, "loss": 1.1698, "step": 1217 }, { "epoch": 0.12747252747252746, "grad_norm": 2.252961963832606, "learning_rate": 1.9506156105308665e-05, "loss": 0.8967, "step": 1218 }, { "epoch": 0.12757718472004187, "grad_norm": 1.8906880106435673, "learning_rate": 1.950510348942534e-05, "loss": 0.8001, "step": 1219 }, { "epoch": 0.12768184196755625, "grad_norm": 2.5679268387367373, "learning_rate": 1.9504049781386358e-05, "loss": 1.0456, "step": 1220 }, { "epoch": 0.12778649921507065, "grad_norm": 2.742942369429627, "learning_rate": 1.9502994981312785e-05, "loss": 1.0444, "step": 1221 }, { "epoch": 0.12789115646258503, "grad_norm": 2.5837062790653302, "learning_rate": 1.950193908932583e-05, "loss": 1.0361, "step": 1222 }, { "epoch": 0.12799581371009944, "grad_norm": 2.601717403532944, "learning_rate": 1.9500882105546806e-05, "loss": 0.9211, "step": 1223 }, { "epoch": 0.12810047095761382, "grad_norm": 2.3596656749420317, "learning_rate": 1.9499824030097173e-05, "loss": 0.9949, "step": 1224 }, { "epoch": 0.1282051282051282, "grad_norm": 2.431051750501933, "learning_rate": 1.9498764863098494e-05, "loss": 1.111, "step": 1225 }, { "epoch": 0.1283097854526426, "grad_norm": 2.465871960006348, "learning_rate": 1.949770460467248e-05, "loss": 1.0887, "step": 1226 }, { "epoch": 0.12841444270015698, "grad_norm": 2.3804735150810883, "learning_rate": 1.9496643254940952e-05, "loss": 1.0723, "step": 1227 }, { "epoch": 0.12851909994767138, "grad_norm": 2.5375697941496265, "learning_rate": 1.9495580814025864e-05, "loss": 0.9679, "step": 1228 }, { "epoch": 0.12862375719518576, "grad_norm": 2.3461082467161565, "learning_rate": 1.949451728204929e-05, "loss": 0.9353, "step": 1229 }, { "epoch": 0.12872841444270017, "grad_norm": 2.770975522225647, "learning_rate": 1.9493452659133437e-05, "loss": 1.0304, "step": 1230 }, { "epoch": 0.12883307169021455, "grad_norm": 2.3083829511711396, "learning_rate": 1.9492386945400623e-05, "loss": 0.9771, "step": 1231 }, { "epoch": 0.12893772893772895, "grad_norm": 2.697784666924789, "learning_rate": 1.9491320140973305e-05, "loss": 1.1808, "step": 1232 }, { "epoch": 0.12904238618524333, "grad_norm": 2.8520274235768532, "learning_rate": 1.9490252245974062e-05, "loss": 1.0209, "step": 1233 }, { "epoch": 0.1291470434327577, "grad_norm": 2.2734770595974276, "learning_rate": 1.9489183260525603e-05, "loss": 1.0474, "step": 1234 }, { "epoch": 0.1292517006802721, "grad_norm": 2.645718364092188, "learning_rate": 1.9488113184750747e-05, "loss": 1.064, "step": 1235 }, { "epoch": 0.1293563579277865, "grad_norm": 2.41791085736045, "learning_rate": 1.9487042018772452e-05, "loss": 1.0731, "step": 1236 }, { "epoch": 0.1294610151753009, "grad_norm": 2.527589108825978, "learning_rate": 1.9485969762713797e-05, "loss": 1.082, "step": 1237 }, { "epoch": 0.12956567242281528, "grad_norm": 2.779752862819606, "learning_rate": 1.9484896416697983e-05, "loss": 0.9216, "step": 1238 }, { "epoch": 0.12967032967032968, "grad_norm": 2.732412932188138, "learning_rate": 1.9483821980848346e-05, "loss": 1.1465, "step": 1239 }, { "epoch": 0.12977498691784406, "grad_norm": 2.4729282241151935, "learning_rate": 1.948274645528834e-05, "loss": 0.9567, "step": 1240 }, { "epoch": 0.12987964416535844, "grad_norm": 2.6626696165070056, "learning_rate": 1.948166984014154e-05, "loss": 0.9809, "step": 1241 }, { "epoch": 0.12998430141287284, "grad_norm": 2.6256425374078676, "learning_rate": 1.9480592135531654e-05, "loss": 1.0356, "step": 1242 }, { "epoch": 0.13008895866038722, "grad_norm": 2.692113072642092, "learning_rate": 1.9479513341582513e-05, "loss": 1.028, "step": 1243 }, { "epoch": 0.13019361590790163, "grad_norm": 2.834736069467216, "learning_rate": 1.947843345841807e-05, "loss": 0.9586, "step": 1244 }, { "epoch": 0.130298273155416, "grad_norm": 2.7981103330661568, "learning_rate": 1.947735248616241e-05, "loss": 1.0169, "step": 1245 }, { "epoch": 0.1304029304029304, "grad_norm": 2.6541080453909545, "learning_rate": 1.9476270424939736e-05, "loss": 1.0977, "step": 1246 }, { "epoch": 0.1305075876504448, "grad_norm": 2.6432248199471005, "learning_rate": 1.9475187274874382e-05, "loss": 1.1162, "step": 1247 }, { "epoch": 0.1306122448979592, "grad_norm": 2.655070618581444, "learning_rate": 1.9474103036090795e-05, "loss": 1.0413, "step": 1248 }, { "epoch": 0.13071690214547357, "grad_norm": 3.197415853957612, "learning_rate": 1.947301770871357e-05, "loss": 1.0412, "step": 1249 }, { "epoch": 0.13082155939298795, "grad_norm": 2.592069054029112, "learning_rate": 1.9471931292867405e-05, "loss": 1.0165, "step": 1250 }, { "epoch": 0.13092621664050236, "grad_norm": 3.655152675264706, "learning_rate": 1.9470843788677132e-05, "loss": 0.9054, "step": 1251 }, { "epoch": 0.13103087388801674, "grad_norm": 2.882892532973182, "learning_rate": 1.9469755196267706e-05, "loss": 1.1792, "step": 1252 }, { "epoch": 0.13113553113553114, "grad_norm": 2.6595064843196954, "learning_rate": 1.9468665515764216e-05, "loss": 1.0301, "step": 1253 }, { "epoch": 0.13124018838304552, "grad_norm": 2.3865070700069158, "learning_rate": 1.9467574747291862e-05, "loss": 0.9721, "step": 1254 }, { "epoch": 0.13134484563055993, "grad_norm": 2.723979611241111, "learning_rate": 1.9466482890975975e-05, "loss": 1.1416, "step": 1255 }, { "epoch": 0.1314495028780743, "grad_norm": 2.605292005226839, "learning_rate": 1.9465389946942013e-05, "loss": 1.1899, "step": 1256 }, { "epoch": 0.1315541601255887, "grad_norm": 2.5605584808263675, "learning_rate": 1.9464295915315555e-05, "loss": 1.139, "step": 1257 }, { "epoch": 0.1316588173731031, "grad_norm": 2.6247600589750046, "learning_rate": 1.9463200796222318e-05, "loss": 1.1418, "step": 1258 }, { "epoch": 0.13176347462061747, "grad_norm": 2.0595672766120896, "learning_rate": 1.946210458978812e-05, "loss": 1.0889, "step": 1259 }, { "epoch": 0.13186813186813187, "grad_norm": 2.3474913936763313, "learning_rate": 1.9461007296138925e-05, "loss": 1.0599, "step": 1260 }, { "epoch": 0.13197278911564625, "grad_norm": 2.9581647619226685, "learning_rate": 1.945990891540081e-05, "loss": 0.9018, "step": 1261 }, { "epoch": 0.13207744636316066, "grad_norm": 2.984957639865946, "learning_rate": 1.9458809447699985e-05, "loss": 1.1799, "step": 1262 }, { "epoch": 0.13218210361067503, "grad_norm": 2.772705342436505, "learning_rate": 1.945770889316278e-05, "loss": 1.0051, "step": 1263 }, { "epoch": 0.13228676085818944, "grad_norm": 2.6058433960187894, "learning_rate": 1.945660725191565e-05, "loss": 1.0885, "step": 1264 }, { "epoch": 0.13239141810570382, "grad_norm": 2.7920782381998035, "learning_rate": 1.9455504524085174e-05, "loss": 0.9356, "step": 1265 }, { "epoch": 0.13249607535321822, "grad_norm": 2.6926709248661966, "learning_rate": 1.9454400709798063e-05, "loss": 1.0235, "step": 1266 }, { "epoch": 0.1326007326007326, "grad_norm": 2.306933420813203, "learning_rate": 1.9453295809181144e-05, "loss": 1.0709, "step": 1267 }, { "epoch": 0.13270538984824698, "grad_norm": 2.400935059767989, "learning_rate": 1.9452189822361368e-05, "loss": 1.0141, "step": 1268 }, { "epoch": 0.13281004709576139, "grad_norm": 2.706220647073549, "learning_rate": 1.9451082749465822e-05, "loss": 0.9398, "step": 1269 }, { "epoch": 0.13291470434327576, "grad_norm": 2.5158658128425015, "learning_rate": 1.9449974590621713e-05, "loss": 1.1766, "step": 1270 }, { "epoch": 0.13301936159079017, "grad_norm": 2.506227273861366, "learning_rate": 1.9448865345956364e-05, "loss": 1.0084, "step": 1271 }, { "epoch": 0.13312401883830455, "grad_norm": 2.2919348376307402, "learning_rate": 1.944775501559723e-05, "loss": 0.9608, "step": 1272 }, { "epoch": 0.13322867608581895, "grad_norm": 2.9193041380716545, "learning_rate": 1.9446643599671895e-05, "loss": 0.8943, "step": 1273 }, { "epoch": 0.13333333333333333, "grad_norm": 2.615394555513494, "learning_rate": 1.944553109830806e-05, "loss": 1.2087, "step": 1274 }, { "epoch": 0.13343799058084774, "grad_norm": 2.489254352916976, "learning_rate": 1.944441751163355e-05, "loss": 0.9713, "step": 1275 }, { "epoch": 0.13354264782836212, "grad_norm": 2.841255305838923, "learning_rate": 1.9443302839776327e-05, "loss": 1.0022, "step": 1276 }, { "epoch": 0.1336473050758765, "grad_norm": 2.5946261317323507, "learning_rate": 1.944218708286446e-05, "loss": 1.0554, "step": 1277 }, { "epoch": 0.1337519623233909, "grad_norm": 2.284752575634108, "learning_rate": 1.944107024102616e-05, "loss": 1.0401, "step": 1278 }, { "epoch": 0.13385661957090528, "grad_norm": 2.9886804389402735, "learning_rate": 1.943995231438975e-05, "loss": 1.0555, "step": 1279 }, { "epoch": 0.13396127681841968, "grad_norm": 2.602163456273472, "learning_rate": 1.9438833303083677e-05, "loss": 1.0488, "step": 1280 }, { "epoch": 0.13406593406593406, "grad_norm": 2.485141727539193, "learning_rate": 1.9437713207236525e-05, "loss": 1.04, "step": 1281 }, { "epoch": 0.13417059131344847, "grad_norm": 2.727354509959165, "learning_rate": 1.9436592026976994e-05, "loss": 1.0357, "step": 1282 }, { "epoch": 0.13427524856096285, "grad_norm": 2.3638486724054513, "learning_rate": 1.943546976243391e-05, "loss": 1.1289, "step": 1283 }, { "epoch": 0.13437990580847722, "grad_norm": 2.717669536906904, "learning_rate": 1.943434641373622e-05, "loss": 1.0574, "step": 1284 }, { "epoch": 0.13448456305599163, "grad_norm": 2.2734518432998914, "learning_rate": 1.9433221981013007e-05, "loss": 1.0728, "step": 1285 }, { "epoch": 0.134589220303506, "grad_norm": 2.6630965958749533, "learning_rate": 1.9432096464393463e-05, "loss": 1.061, "step": 1286 }, { "epoch": 0.1346938775510204, "grad_norm": 3.1093231373052923, "learning_rate": 1.943096986400691e-05, "loss": 1.0387, "step": 1287 }, { "epoch": 0.1347985347985348, "grad_norm": 2.6785202972118682, "learning_rate": 1.942984217998281e-05, "loss": 1.021, "step": 1288 }, { "epoch": 0.1349031920460492, "grad_norm": 2.5540937962722543, "learning_rate": 1.9428713412450718e-05, "loss": 1.0165, "step": 1289 }, { "epoch": 0.13500784929356358, "grad_norm": 2.3166372688832726, "learning_rate": 1.9427583561540344e-05, "loss": 1.0847, "step": 1290 }, { "epoch": 0.13511250654107798, "grad_norm": 2.301232179651965, "learning_rate": 1.942645262738151e-05, "loss": 1.1781, "step": 1291 }, { "epoch": 0.13521716378859236, "grad_norm": 2.3252095241169077, "learning_rate": 1.9425320610104157e-05, "loss": 1.0947, "step": 1292 }, { "epoch": 0.13532182103610674, "grad_norm": 2.493316820257742, "learning_rate": 1.942418750983836e-05, "loss": 1.0104, "step": 1293 }, { "epoch": 0.13542647828362114, "grad_norm": 2.3545726405840153, "learning_rate": 1.942305332671431e-05, "loss": 0.9365, "step": 1294 }, { "epoch": 0.13553113553113552, "grad_norm": 2.3550828896798324, "learning_rate": 1.9421918060862333e-05, "loss": 1.1441, "step": 1295 }, { "epoch": 0.13563579277864993, "grad_norm": 2.6556094154616146, "learning_rate": 1.942078171241287e-05, "loss": 1.0687, "step": 1296 }, { "epoch": 0.1357404500261643, "grad_norm": 2.5889079401056017, "learning_rate": 1.9419644281496492e-05, "loss": 1.036, "step": 1297 }, { "epoch": 0.1358451072736787, "grad_norm": 2.4413416204425222, "learning_rate": 1.9418505768243888e-05, "loss": 1.1142, "step": 1298 }, { "epoch": 0.1359497645211931, "grad_norm": 2.2226669500183753, "learning_rate": 1.941736617278588e-05, "loss": 1.0729, "step": 1299 }, { "epoch": 0.1360544217687075, "grad_norm": 2.3250136278743234, "learning_rate": 1.9416225495253406e-05, "loss": 1.0012, "step": 1300 }, { "epoch": 0.13615907901622187, "grad_norm": 2.6455616428189743, "learning_rate": 1.9415083735777535e-05, "loss": 0.9987, "step": 1301 }, { "epoch": 0.13626373626373625, "grad_norm": 2.515570541189039, "learning_rate": 1.9413940894489456e-05, "loss": 0.9546, "step": 1302 }, { "epoch": 0.13636839351125066, "grad_norm": 2.3610892292812014, "learning_rate": 1.9412796971520486e-05, "loss": 1.002, "step": 1303 }, { "epoch": 0.13647305075876504, "grad_norm": 2.351486484199385, "learning_rate": 1.9411651967002057e-05, "loss": 0.952, "step": 1304 }, { "epoch": 0.13657770800627944, "grad_norm": 2.2806059320082377, "learning_rate": 1.9410505881065745e-05, "loss": 1.0238, "step": 1305 }, { "epoch": 0.13668236525379382, "grad_norm": 2.8037668411236947, "learning_rate": 1.9409358713843226e-05, "loss": 0.8635, "step": 1306 }, { "epoch": 0.13678702250130823, "grad_norm": 2.354476426007749, "learning_rate": 1.9408210465466315e-05, "loss": 1.0029, "step": 1307 }, { "epoch": 0.1368916797488226, "grad_norm": 2.58570996843689, "learning_rate": 1.9407061136066952e-05, "loss": 1.1598, "step": 1308 }, { "epoch": 0.136996336996337, "grad_norm": 2.2297849493454174, "learning_rate": 1.940591072577719e-05, "loss": 1.0518, "step": 1309 }, { "epoch": 0.1371009942438514, "grad_norm": 2.4744470445442968, "learning_rate": 1.9404759234729224e-05, "loss": 1.0939, "step": 1310 }, { "epoch": 0.13720565149136577, "grad_norm": 2.597406306328376, "learning_rate": 1.940360666305535e-05, "loss": 1.122, "step": 1311 }, { "epoch": 0.13731030873888017, "grad_norm": 2.24297119060127, "learning_rate": 1.9402453010888013e-05, "loss": 0.9809, "step": 1312 }, { "epoch": 0.13741496598639455, "grad_norm": 2.3878513385258633, "learning_rate": 1.940129827835976e-05, "loss": 1.1173, "step": 1313 }, { "epoch": 0.13751962323390896, "grad_norm": 2.195222848508457, "learning_rate": 1.940014246560328e-05, "loss": 1.0293, "step": 1314 }, { "epoch": 0.13762428048142333, "grad_norm": 2.266044481844879, "learning_rate": 1.939898557275137e-05, "loss": 0.9389, "step": 1315 }, { "epoch": 0.13772893772893774, "grad_norm": 2.227326516279883, "learning_rate": 1.9397827599936967e-05, "loss": 1.0587, "step": 1316 }, { "epoch": 0.13783359497645212, "grad_norm": 2.3264930899908216, "learning_rate": 1.9396668547293123e-05, "loss": 1.0047, "step": 1317 }, { "epoch": 0.13793825222396652, "grad_norm": 2.7272328130888543, "learning_rate": 1.9395508414953014e-05, "loss": 1.0649, "step": 1318 }, { "epoch": 0.1380429094714809, "grad_norm": 2.317310994073378, "learning_rate": 1.9394347203049935e-05, "loss": 0.8952, "step": 1319 }, { "epoch": 0.13814756671899528, "grad_norm": 2.7425995244638863, "learning_rate": 1.9393184911717325e-05, "loss": 1.0672, "step": 1320 }, { "epoch": 0.13825222396650969, "grad_norm": 3.3758460756291897, "learning_rate": 1.9392021541088723e-05, "loss": 1.133, "step": 1321 }, { "epoch": 0.13835688121402406, "grad_norm": 2.316620489795767, "learning_rate": 1.939085709129781e-05, "loss": 0.9964, "step": 1322 }, { "epoch": 0.13846153846153847, "grad_norm": 2.3989333216450355, "learning_rate": 1.9389691562478375e-05, "loss": 1.111, "step": 1323 }, { "epoch": 0.13856619570905285, "grad_norm": 2.562438640327413, "learning_rate": 1.9388524954764345e-05, "loss": 0.9959, "step": 1324 }, { "epoch": 0.13867085295656725, "grad_norm": 2.4165917099961782, "learning_rate": 1.938735726828977e-05, "loss": 1.1408, "step": 1325 }, { "epoch": 0.13877551020408163, "grad_norm": 2.5972911347326297, "learning_rate": 1.938618850318881e-05, "loss": 1.0121, "step": 1326 }, { "epoch": 0.138880167451596, "grad_norm": 2.178384853061503, "learning_rate": 1.9385018659595768e-05, "loss": 0.9381, "step": 1327 }, { "epoch": 0.13898482469911042, "grad_norm": 2.341869108186974, "learning_rate": 1.9383847737645053e-05, "loss": 1.0985, "step": 1328 }, { "epoch": 0.1390894819466248, "grad_norm": 2.211733194769639, "learning_rate": 1.938267573747121e-05, "loss": 0.9999, "step": 1329 }, { "epoch": 0.1391941391941392, "grad_norm": 2.4888824976978463, "learning_rate": 1.9381502659208903e-05, "loss": 1.1316, "step": 1330 }, { "epoch": 0.13929879644165358, "grad_norm": 2.5487250145244755, "learning_rate": 1.9380328502992926e-05, "loss": 1.1004, "step": 1331 }, { "epoch": 0.13940345368916798, "grad_norm": 2.8605454756354027, "learning_rate": 1.9379153268958183e-05, "loss": 0.86, "step": 1332 }, { "epoch": 0.13950811093668236, "grad_norm": 2.275799199333925, "learning_rate": 1.937797695723972e-05, "loss": 1.0387, "step": 1333 }, { "epoch": 0.13961276818419677, "grad_norm": 2.256873395213135, "learning_rate": 1.9376799567972692e-05, "loss": 0.9848, "step": 1334 }, { "epoch": 0.13971742543171115, "grad_norm": 2.664507783387787, "learning_rate": 1.9375621101292386e-05, "loss": 1.0539, "step": 1335 }, { "epoch": 0.13982208267922552, "grad_norm": 2.571479041795175, "learning_rate": 1.9374441557334206e-05, "loss": 1.0578, "step": 1336 }, { "epoch": 0.13992673992673993, "grad_norm": 2.6718984503316623, "learning_rate": 1.9373260936233692e-05, "loss": 1.0356, "step": 1337 }, { "epoch": 0.1400313971742543, "grad_norm": 3.01919139078155, "learning_rate": 1.937207923812649e-05, "loss": 1.0371, "step": 1338 }, { "epoch": 0.1401360544217687, "grad_norm": 2.267242964618876, "learning_rate": 1.9370896463148387e-05, "loss": 0.9883, "step": 1339 }, { "epoch": 0.1402407116692831, "grad_norm": 2.513036526104354, "learning_rate": 1.9369712611435285e-05, "loss": 1.073, "step": 1340 }, { "epoch": 0.1403453689167975, "grad_norm": 2.3727052030885663, "learning_rate": 1.9368527683123205e-05, "loss": 0.8568, "step": 1341 }, { "epoch": 0.14045002616431188, "grad_norm": 2.3202546866710585, "learning_rate": 1.9367341678348307e-05, "loss": 1.0071, "step": 1342 }, { "epoch": 0.14055468341182628, "grad_norm": 2.689452076940995, "learning_rate": 1.9366154597246856e-05, "loss": 1.1804, "step": 1343 }, { "epoch": 0.14065934065934066, "grad_norm": 2.742100535424126, "learning_rate": 1.936496643995526e-05, "loss": 0.9589, "step": 1344 }, { "epoch": 0.14076399790685504, "grad_norm": 2.481796517485657, "learning_rate": 1.936377720661003e-05, "loss": 1.0471, "step": 1345 }, { "epoch": 0.14086865515436944, "grad_norm": 2.5501058876317995, "learning_rate": 1.936258689734782e-05, "loss": 0.9166, "step": 1346 }, { "epoch": 0.14097331240188382, "grad_norm": 2.336496862440958, "learning_rate": 1.9361395512305395e-05, "loss": 1.088, "step": 1347 }, { "epoch": 0.14107796964939823, "grad_norm": 2.8026304750796487, "learning_rate": 1.9360203051619648e-05, "loss": 1.084, "step": 1348 }, { "epoch": 0.1411826268969126, "grad_norm": 3.299010180332248, "learning_rate": 1.9359009515427595e-05, "loss": 1.1847, "step": 1349 }, { "epoch": 0.141287284144427, "grad_norm": 2.7958593548788655, "learning_rate": 1.9357814903866376e-05, "loss": 1.0987, "step": 1350 }, { "epoch": 0.1413919413919414, "grad_norm": 2.3205373831985274, "learning_rate": 1.9356619217073252e-05, "loss": 0.9035, "step": 1351 }, { "epoch": 0.1414965986394558, "grad_norm": 2.761571592559715, "learning_rate": 1.9355422455185614e-05, "loss": 1.1353, "step": 1352 }, { "epoch": 0.14160125588697017, "grad_norm": 2.7278975042129443, "learning_rate": 1.9354224618340974e-05, "loss": 1.0695, "step": 1353 }, { "epoch": 0.14170591313448455, "grad_norm": 2.261842313891641, "learning_rate": 1.9353025706676956e-05, "loss": 1.0403, "step": 1354 }, { "epoch": 0.14181057038199896, "grad_norm": 2.375471536045439, "learning_rate": 1.9351825720331328e-05, "loss": 1.0732, "step": 1355 }, { "epoch": 0.14191522762951334, "grad_norm": 2.27300544182062, "learning_rate": 1.9350624659441966e-05, "loss": 1.0484, "step": 1356 }, { "epoch": 0.14201988487702774, "grad_norm": 2.316150199671341, "learning_rate": 1.9349422524146876e-05, "loss": 1.0243, "step": 1357 }, { "epoch": 0.14212454212454212, "grad_norm": 2.1366142849380494, "learning_rate": 1.9348219314584182e-05, "loss": 0.9372, "step": 1358 }, { "epoch": 0.14222919937205653, "grad_norm": 2.4024552323411545, "learning_rate": 1.934701503089214e-05, "loss": 0.9742, "step": 1359 }, { "epoch": 0.1423338566195709, "grad_norm": 2.7893632727077557, "learning_rate": 1.934580967320912e-05, "loss": 0.9427, "step": 1360 }, { "epoch": 0.1424385138670853, "grad_norm": 2.7934280403563143, "learning_rate": 1.9344603241673624e-05, "loss": 1.0359, "step": 1361 }, { "epoch": 0.1425431711145997, "grad_norm": 2.4453534181554044, "learning_rate": 1.9343395736424273e-05, "loss": 0.915, "step": 1362 }, { "epoch": 0.14264782836211407, "grad_norm": 2.643045406854803, "learning_rate": 1.934218715759981e-05, "loss": 1.0316, "step": 1363 }, { "epoch": 0.14275248560962847, "grad_norm": 2.6295938809870023, "learning_rate": 1.9340977505339105e-05, "loss": 1.0912, "step": 1364 }, { "epoch": 0.14285714285714285, "grad_norm": 2.34279514669924, "learning_rate": 1.9339766779781145e-05, "loss": 0.9535, "step": 1365 }, { "epoch": 0.14296180010465726, "grad_norm": 2.9096664201728535, "learning_rate": 1.9338554981065055e-05, "loss": 1.0789, "step": 1366 }, { "epoch": 0.14306645735217163, "grad_norm": 1.7847384845546328, "learning_rate": 1.9337342109330063e-05, "loss": 0.8184, "step": 1367 }, { "epoch": 0.14317111459968604, "grad_norm": 4.650586633766713, "learning_rate": 1.933612816471553e-05, "loss": 0.9754, "step": 1368 }, { "epoch": 0.14327577184720042, "grad_norm": 2.4222691647243484, "learning_rate": 1.9334913147360947e-05, "loss": 1.1472, "step": 1369 }, { "epoch": 0.1433804290947148, "grad_norm": 2.8646659408254034, "learning_rate": 1.9333697057405923e-05, "loss": 1.0426, "step": 1370 }, { "epoch": 0.1434850863422292, "grad_norm": 2.755651171543178, "learning_rate": 1.933247989499018e-05, "loss": 0.9983, "step": 1371 }, { "epoch": 0.14358974358974358, "grad_norm": 2.823997144325609, "learning_rate": 1.9331261660253582e-05, "loss": 1.0014, "step": 1372 }, { "epoch": 0.14369440083725799, "grad_norm": 1.7965865119756081, "learning_rate": 1.9330042353336105e-05, "loss": 0.8996, "step": 1373 }, { "epoch": 0.14379905808477236, "grad_norm": 2.530518145697574, "learning_rate": 1.9328821974377843e-05, "loss": 1.0131, "step": 1374 }, { "epoch": 0.14390371533228677, "grad_norm": 2.5598798754427587, "learning_rate": 1.9327600523519024e-05, "loss": 0.8846, "step": 1375 }, { "epoch": 0.14400837257980115, "grad_norm": 2.404146251680647, "learning_rate": 1.93263780009e-05, "loss": 0.9166, "step": 1376 }, { "epoch": 0.14411302982731555, "grad_norm": 2.5090164360878915, "learning_rate": 1.9325154406661236e-05, "loss": 1.0343, "step": 1377 }, { "epoch": 0.14421768707482993, "grad_norm": 2.613140816630469, "learning_rate": 1.9323929740943327e-05, "loss": 1.0372, "step": 1378 }, { "epoch": 0.1443223443223443, "grad_norm": 2.7666096244065805, "learning_rate": 1.9322704003886988e-05, "loss": 1.0378, "step": 1379 }, { "epoch": 0.14442700156985872, "grad_norm": 3.1803876980173023, "learning_rate": 1.932147719563306e-05, "loss": 1.0994, "step": 1380 }, { "epoch": 0.1445316588173731, "grad_norm": 2.0896284201518567, "learning_rate": 1.9320249316322505e-05, "loss": 1.0265, "step": 1381 }, { "epoch": 0.1446363160648875, "grad_norm": 2.322244889283912, "learning_rate": 1.931902036609641e-05, "loss": 0.9989, "step": 1382 }, { "epoch": 0.14474097331240188, "grad_norm": 2.752401702892132, "learning_rate": 1.9317790345095984e-05, "loss": 1.0343, "step": 1383 }, { "epoch": 0.14484563055991628, "grad_norm": 2.6752134593317454, "learning_rate": 1.9316559253462556e-05, "loss": 1.0981, "step": 1384 }, { "epoch": 0.14495028780743066, "grad_norm": 2.234026157936921, "learning_rate": 1.9315327091337587e-05, "loss": 0.9987, "step": 1385 }, { "epoch": 0.14505494505494507, "grad_norm": 2.825030948671621, "learning_rate": 1.931409385886265e-05, "loss": 1.1929, "step": 1386 }, { "epoch": 0.14515960230245945, "grad_norm": 2.4650646815837676, "learning_rate": 1.9312859556179445e-05, "loss": 1.0682, "step": 1387 }, { "epoch": 0.14526425954997382, "grad_norm": 2.7590671185555267, "learning_rate": 1.9311624183429795e-05, "loss": 1.0708, "step": 1388 }, { "epoch": 0.14536891679748823, "grad_norm": 2.5815894772069954, "learning_rate": 1.9310387740755657e-05, "loss": 0.9889, "step": 1389 }, { "epoch": 0.1454735740450026, "grad_norm": 2.6236133860227637, "learning_rate": 1.930915022829909e-05, "loss": 1.071, "step": 1390 }, { "epoch": 0.145578231292517, "grad_norm": 2.7247337786787638, "learning_rate": 1.9307911646202288e-05, "loss": 1.1056, "step": 1391 }, { "epoch": 0.1456828885400314, "grad_norm": 2.862234904110278, "learning_rate": 1.930667199460757e-05, "loss": 1.0049, "step": 1392 }, { "epoch": 0.1457875457875458, "grad_norm": 2.4645303878971156, "learning_rate": 1.9305431273657373e-05, "loss": 1.005, "step": 1393 }, { "epoch": 0.14589220303506018, "grad_norm": 2.213086769666513, "learning_rate": 1.9304189483494264e-05, "loss": 0.8771, "step": 1394 }, { "epoch": 0.14599686028257458, "grad_norm": 2.743298220409371, "learning_rate": 1.9302946624260914e-05, "loss": 1.2425, "step": 1395 }, { "epoch": 0.14610151753008896, "grad_norm": 2.6622529241671757, "learning_rate": 1.930170269610014e-05, "loss": 1.0216, "step": 1396 }, { "epoch": 0.14620617477760334, "grad_norm": 2.7147812441457515, "learning_rate": 1.9300457699154874e-05, "loss": 0.9964, "step": 1397 }, { "epoch": 0.14631083202511774, "grad_norm": 2.393611225263065, "learning_rate": 1.929921163356816e-05, "loss": 1.0048, "step": 1398 }, { "epoch": 0.14641548927263212, "grad_norm": 2.7084391545813573, "learning_rate": 1.9297964499483178e-05, "loss": 1.069, "step": 1399 }, { "epoch": 0.14652014652014653, "grad_norm": 4.288573871222411, "learning_rate": 1.929671629704323e-05, "loss": 1.0899, "step": 1400 }, { "epoch": 0.1466248037676609, "grad_norm": 2.317166017560044, "learning_rate": 1.929546702639173e-05, "loss": 0.9723, "step": 1401 }, { "epoch": 0.1467294610151753, "grad_norm": 2.0977663597117373, "learning_rate": 1.9294216687672222e-05, "loss": 1.0926, "step": 1402 }, { "epoch": 0.1468341182626897, "grad_norm": 2.494438503271147, "learning_rate": 1.929296528102838e-05, "loss": 0.8911, "step": 1403 }, { "epoch": 0.1469387755102041, "grad_norm": 2.718143112765956, "learning_rate": 1.9291712806603987e-05, "loss": 1.0866, "step": 1404 }, { "epoch": 0.14704343275771847, "grad_norm": 2.2131296320523797, "learning_rate": 1.9290459264542957e-05, "loss": 1.0103, "step": 1405 }, { "epoch": 0.14714809000523285, "grad_norm": 2.9091331858933347, "learning_rate": 1.9289204654989324e-05, "loss": 1.1178, "step": 1406 }, { "epoch": 0.14725274725274726, "grad_norm": 2.6753705952770708, "learning_rate": 1.928794897808724e-05, "loss": 1.0762, "step": 1407 }, { "epoch": 0.14735740450026164, "grad_norm": 2.000163233169189, "learning_rate": 1.9286692233980994e-05, "loss": 1.0423, "step": 1408 }, { "epoch": 0.14746206174777604, "grad_norm": 2.293295761305028, "learning_rate": 1.9285434422814984e-05, "loss": 1.0555, "step": 1409 }, { "epoch": 0.14756671899529042, "grad_norm": 2.2949310514737262, "learning_rate": 1.9284175544733735e-05, "loss": 1.0947, "step": 1410 }, { "epoch": 0.14767137624280482, "grad_norm": 2.5294931864353485, "learning_rate": 1.9282915599881895e-05, "loss": 1.1798, "step": 1411 }, { "epoch": 0.1477760334903192, "grad_norm": 2.3551404864975822, "learning_rate": 1.9281654588404233e-05, "loss": 1.0175, "step": 1412 }, { "epoch": 0.1478806907378336, "grad_norm": 1.9955894483022132, "learning_rate": 1.9280392510445646e-05, "loss": 1.0019, "step": 1413 }, { "epoch": 0.147985347985348, "grad_norm": 2.1950167870687385, "learning_rate": 1.9279129366151143e-05, "loss": 0.8811, "step": 1414 }, { "epoch": 0.14809000523286237, "grad_norm": 2.21385771131273, "learning_rate": 1.9277865155665867e-05, "loss": 0.9541, "step": 1415 }, { "epoch": 0.14819466248037677, "grad_norm": 3.1210671096804017, "learning_rate": 1.9276599879135074e-05, "loss": 1.3079, "step": 1416 }, { "epoch": 0.14829931972789115, "grad_norm": 2.629699340784082, "learning_rate": 1.927533353670415e-05, "loss": 0.9163, "step": 1417 }, { "epoch": 0.14840397697540555, "grad_norm": 2.77960958263951, "learning_rate": 1.92740661285186e-05, "loss": 1.0696, "step": 1418 }, { "epoch": 0.14850863422291993, "grad_norm": 2.5708441812829177, "learning_rate": 1.9272797654724052e-05, "loss": 1.1057, "step": 1419 }, { "epoch": 0.14861329147043434, "grad_norm": 2.1036647647909303, "learning_rate": 1.9271528115466257e-05, "loss": 0.9253, "step": 1420 }, { "epoch": 0.14871794871794872, "grad_norm": 2.2534281596027483, "learning_rate": 1.9270257510891083e-05, "loss": 0.9099, "step": 1421 }, { "epoch": 0.1488226059654631, "grad_norm": 2.1743546862854366, "learning_rate": 1.926898584114453e-05, "loss": 1.047, "step": 1422 }, { "epoch": 0.1489272632129775, "grad_norm": 2.810151030114405, "learning_rate": 1.9267713106372716e-05, "loss": 0.9191, "step": 1423 }, { "epoch": 0.14903192046049188, "grad_norm": 2.2964956304821578, "learning_rate": 1.9266439306721874e-05, "loss": 1.0545, "step": 1424 }, { "epoch": 0.14913657770800628, "grad_norm": 2.3401825418391407, "learning_rate": 1.9265164442338374e-05, "loss": 1.0503, "step": 1425 }, { "epoch": 0.14924123495552066, "grad_norm": 2.2508646328782818, "learning_rate": 1.9263888513368693e-05, "loss": 1.0283, "step": 1426 }, { "epoch": 0.14934589220303507, "grad_norm": 2.6552965793248364, "learning_rate": 1.9262611519959446e-05, "loss": 1.0631, "step": 1427 }, { "epoch": 0.14945054945054945, "grad_norm": 2.532689406015064, "learning_rate": 1.9261333462257357e-05, "loss": 1.1762, "step": 1428 }, { "epoch": 0.14955520669806385, "grad_norm": 2.5552203568036758, "learning_rate": 1.9260054340409277e-05, "loss": 1.0354, "step": 1429 }, { "epoch": 0.14965986394557823, "grad_norm": 2.3797872928219026, "learning_rate": 1.9258774154562183e-05, "loss": 1.044, "step": 1430 }, { "epoch": 0.1497645211930926, "grad_norm": 2.5188480453800097, "learning_rate": 1.9257492904863168e-05, "loss": 1.0178, "step": 1431 }, { "epoch": 0.14986917844060701, "grad_norm": 1.9882868472731003, "learning_rate": 1.9256210591459452e-05, "loss": 0.7584, "step": 1432 }, { "epoch": 0.1499738356881214, "grad_norm": 2.4171327636272837, "learning_rate": 1.9254927214498376e-05, "loss": 1.0275, "step": 1433 }, { "epoch": 0.1500784929356358, "grad_norm": 2.6335229959993693, "learning_rate": 1.92536427741274e-05, "loss": 1.157, "step": 1434 }, { "epoch": 0.15018315018315018, "grad_norm": 2.418758003326527, "learning_rate": 1.9252357270494108e-05, "loss": 1.107, "step": 1435 }, { "epoch": 0.15028780743066458, "grad_norm": 2.4515624975379073, "learning_rate": 1.9251070703746212e-05, "loss": 1.0629, "step": 1436 }, { "epoch": 0.15039246467817896, "grad_norm": 2.5078647866604364, "learning_rate": 1.9249783074031537e-05, "loss": 0.9575, "step": 1437 }, { "epoch": 0.15049712192569337, "grad_norm": 2.05009431610471, "learning_rate": 1.9248494381498036e-05, "loss": 0.9304, "step": 1438 }, { "epoch": 0.15060177917320774, "grad_norm": 2.2150107036134288, "learning_rate": 1.924720462629378e-05, "loss": 1.0904, "step": 1439 }, { "epoch": 0.15070643642072212, "grad_norm": 2.147637671766143, "learning_rate": 1.9245913808566972e-05, "loss": 0.8344, "step": 1440 }, { "epoch": 0.15081109366823653, "grad_norm": 2.883011617888164, "learning_rate": 1.9244621928465922e-05, "loss": 1.1529, "step": 1441 }, { "epoch": 0.1509157509157509, "grad_norm": 2.5500903724133543, "learning_rate": 1.9243328986139067e-05, "loss": 1.0428, "step": 1442 }, { "epoch": 0.1510204081632653, "grad_norm": 2.4159647382548393, "learning_rate": 1.9242034981734977e-05, "loss": 1.0373, "step": 1443 }, { "epoch": 0.1511250654107797, "grad_norm": 2.454453579212534, "learning_rate": 1.924073991540233e-05, "loss": 1.0861, "step": 1444 }, { "epoch": 0.1512297226582941, "grad_norm": 1.8614495838101486, "learning_rate": 1.9239443787289937e-05, "loss": 0.821, "step": 1445 }, { "epoch": 0.15133437990580847, "grad_norm": 2.450919159468918, "learning_rate": 1.923814659754672e-05, "loss": 0.9241, "step": 1446 }, { "epoch": 0.15143903715332288, "grad_norm": 2.3140383964591784, "learning_rate": 1.9236848346321733e-05, "loss": 1.09, "step": 1447 }, { "epoch": 0.15154369440083726, "grad_norm": 2.4237734839061624, "learning_rate": 1.9235549033764146e-05, "loss": 0.944, "step": 1448 }, { "epoch": 0.15164835164835164, "grad_norm": 2.748321943938643, "learning_rate": 1.923424866002325e-05, "loss": 1.1062, "step": 1449 }, { "epoch": 0.15175300889586604, "grad_norm": 3.0209245770619453, "learning_rate": 1.9232947225248465e-05, "loss": 1.0515, "step": 1450 }, { "epoch": 0.15185766614338042, "grad_norm": 2.5606861904785476, "learning_rate": 1.9231644729589326e-05, "loss": 1.1291, "step": 1451 }, { "epoch": 0.15196232339089483, "grad_norm": 2.581511623994528, "learning_rate": 1.9230341173195493e-05, "loss": 1.0573, "step": 1452 }, { "epoch": 0.1520669806384092, "grad_norm": 2.158586432143417, "learning_rate": 1.9229036556216748e-05, "loss": 0.9755, "step": 1453 }, { "epoch": 0.1521716378859236, "grad_norm": 2.4855105857033926, "learning_rate": 1.9227730878802996e-05, "loss": 1.023, "step": 1454 }, { "epoch": 0.152276295133438, "grad_norm": 2.5999432367590316, "learning_rate": 1.9226424141104252e-05, "loss": 1.1008, "step": 1455 }, { "epoch": 0.1523809523809524, "grad_norm": 2.3917057103613075, "learning_rate": 1.9225116343270677e-05, "loss": 1.0145, "step": 1456 }, { "epoch": 0.15248560962846677, "grad_norm": 2.2293516130088795, "learning_rate": 1.9223807485452532e-05, "loss": 1.019, "step": 1457 }, { "epoch": 0.15259026687598115, "grad_norm": 2.42242544414817, "learning_rate": 1.9222497567800208e-05, "loss": 0.9934, "step": 1458 }, { "epoch": 0.15269492412349556, "grad_norm": 2.209915153802859, "learning_rate": 1.9221186590464214e-05, "loss": 1.0644, "step": 1459 }, { "epoch": 0.15279958137100993, "grad_norm": 2.2671395202419773, "learning_rate": 1.9219874553595192e-05, "loss": 0.98, "step": 1460 }, { "epoch": 0.15290423861852434, "grad_norm": 2.6320601579269725, "learning_rate": 1.9218561457343892e-05, "loss": 0.9559, "step": 1461 }, { "epoch": 0.15300889586603872, "grad_norm": 3.128326326853589, "learning_rate": 1.9217247301861192e-05, "loss": 1.057, "step": 1462 }, { "epoch": 0.15311355311355312, "grad_norm": 2.6110874383167224, "learning_rate": 1.9215932087298093e-05, "loss": 1.1651, "step": 1463 }, { "epoch": 0.1532182103610675, "grad_norm": 2.899961166471011, "learning_rate": 1.9214615813805713e-05, "loss": 1.0419, "step": 1464 }, { "epoch": 0.15332286760858188, "grad_norm": 2.484808931850369, "learning_rate": 1.9213298481535295e-05, "loss": 0.9486, "step": 1465 }, { "epoch": 0.1534275248560963, "grad_norm": 2.4399470897359095, "learning_rate": 1.9211980090638204e-05, "loss": 0.9143, "step": 1466 }, { "epoch": 0.15353218210361066, "grad_norm": 2.6027448143774756, "learning_rate": 1.9210660641265926e-05, "loss": 1.0988, "step": 1467 }, { "epoch": 0.15363683935112507, "grad_norm": 2.806968895627165, "learning_rate": 1.920934013357007e-05, "loss": 1.1467, "step": 1468 }, { "epoch": 0.15374149659863945, "grad_norm": 2.5531294035484446, "learning_rate": 1.9208018567702365e-05, "loss": 1.1388, "step": 1469 }, { "epoch": 0.15384615384615385, "grad_norm": 2.811782089564992, "learning_rate": 1.9206695943814657e-05, "loss": 0.9528, "step": 1470 }, { "epoch": 0.15395081109366823, "grad_norm": 2.3041792729089683, "learning_rate": 1.9205372262058923e-05, "loss": 0.9661, "step": 1471 }, { "epoch": 0.15405546834118264, "grad_norm": 2.106029830838967, "learning_rate": 1.9204047522587256e-05, "loss": 0.8363, "step": 1472 }, { "epoch": 0.15416012558869702, "grad_norm": 2.417394330181495, "learning_rate": 1.9202721725551864e-05, "loss": 1.1144, "step": 1473 }, { "epoch": 0.1542647828362114, "grad_norm": 2.046523358752946, "learning_rate": 1.9201394871105098e-05, "loss": 0.9455, "step": 1474 }, { "epoch": 0.1543694400837258, "grad_norm": 2.6640103245094946, "learning_rate": 1.9200066959399403e-05, "loss": 1.007, "step": 1475 }, { "epoch": 0.15447409733124018, "grad_norm": 2.583568326692352, "learning_rate": 1.9198737990587367e-05, "loss": 1.0767, "step": 1476 }, { "epoch": 0.15457875457875458, "grad_norm": 2.5142524512605644, "learning_rate": 1.9197407964821684e-05, "loss": 1.1079, "step": 1477 }, { "epoch": 0.15468341182626896, "grad_norm": 2.608398316355045, "learning_rate": 1.919607688225518e-05, "loss": 1.0722, "step": 1478 }, { "epoch": 0.15478806907378337, "grad_norm": 2.575108862444703, "learning_rate": 1.9194744743040806e-05, "loss": 1.1298, "step": 1479 }, { "epoch": 0.15489272632129775, "grad_norm": 2.548511459425752, "learning_rate": 1.9193411547331618e-05, "loss": 1.0149, "step": 1480 }, { "epoch": 0.15499738356881215, "grad_norm": 2.8655016663149366, "learning_rate": 1.9192077295280804e-05, "loss": 0.9912, "step": 1481 }, { "epoch": 0.15510204081632653, "grad_norm": 2.302307429570692, "learning_rate": 1.9190741987041677e-05, "loss": 1.1227, "step": 1482 }, { "epoch": 0.1552066980638409, "grad_norm": 2.709474213319208, "learning_rate": 1.9189405622767666e-05, "loss": 1.1448, "step": 1483 }, { "epoch": 0.15531135531135531, "grad_norm": 1.9449774082886633, "learning_rate": 1.9188068202612317e-05, "loss": 0.8605, "step": 1484 }, { "epoch": 0.1554160125588697, "grad_norm": 2.217490324767267, "learning_rate": 1.9186729726729308e-05, "loss": 1.0131, "step": 1485 }, { "epoch": 0.1555206698063841, "grad_norm": 2.3193422710271605, "learning_rate": 1.9185390195272428e-05, "loss": 1.0585, "step": 1486 }, { "epoch": 0.15562532705389848, "grad_norm": 2.4865611357772432, "learning_rate": 1.9184049608395596e-05, "loss": 1.0997, "step": 1487 }, { "epoch": 0.15572998430141288, "grad_norm": 2.783780276185395, "learning_rate": 1.9182707966252842e-05, "loss": 0.9589, "step": 1488 }, { "epoch": 0.15583464154892726, "grad_norm": 1.8816997343630404, "learning_rate": 1.9181365268998328e-05, "loss": 0.8778, "step": 1489 }, { "epoch": 0.15593929879644167, "grad_norm": 2.341369035073116, "learning_rate": 1.9180021516786334e-05, "loss": 1.0037, "step": 1490 }, { "epoch": 0.15604395604395604, "grad_norm": 2.7516857057542397, "learning_rate": 1.917867670977126e-05, "loss": 1.1602, "step": 1491 }, { "epoch": 0.15614861329147042, "grad_norm": 2.5272703478492127, "learning_rate": 1.917733084810762e-05, "loss": 1.0647, "step": 1492 }, { "epoch": 0.15625327053898483, "grad_norm": 2.4404363392464723, "learning_rate": 1.9175983931950064e-05, "loss": 0.9985, "step": 1493 }, { "epoch": 0.1563579277864992, "grad_norm": 2.9891735698022908, "learning_rate": 1.9174635961453353e-05, "loss": 1.1115, "step": 1494 }, { "epoch": 0.1564625850340136, "grad_norm": 2.67141152526144, "learning_rate": 1.9173286936772368e-05, "loss": 1.1347, "step": 1495 }, { "epoch": 0.156567242281528, "grad_norm": 2.4091308182799223, "learning_rate": 1.917193685806212e-05, "loss": 1.1213, "step": 1496 }, { "epoch": 0.1566718995290424, "grad_norm": 2.126176198100282, "learning_rate": 1.9170585725477734e-05, "loss": 1.0569, "step": 1497 }, { "epoch": 0.15677655677655677, "grad_norm": 2.729603104944734, "learning_rate": 1.9169233539174458e-05, "loss": 1.0782, "step": 1498 }, { "epoch": 0.15688121402407118, "grad_norm": 2.513953681189516, "learning_rate": 1.916788029930766e-05, "loss": 1.0887, "step": 1499 }, { "epoch": 0.15698587127158556, "grad_norm": 2.6538867773454715, "learning_rate": 1.9166526006032828e-05, "loss": 1.0529, "step": 1500 }, { "epoch": 0.15709052851909994, "grad_norm": 2.7220788186480744, "learning_rate": 1.9165170659505577e-05, "loss": 1.108, "step": 1501 }, { "epoch": 0.15719518576661434, "grad_norm": 2.4347603808435676, "learning_rate": 1.916381425988164e-05, "loss": 1.0695, "step": 1502 }, { "epoch": 0.15729984301412872, "grad_norm": 2.8818520587577425, "learning_rate": 1.916245680731686e-05, "loss": 1.1366, "step": 1503 }, { "epoch": 0.15740450026164313, "grad_norm": 2.3660768203543325, "learning_rate": 1.916109830196723e-05, "loss": 1.1054, "step": 1504 }, { "epoch": 0.1575091575091575, "grad_norm": 2.0199292578050114, "learning_rate": 1.9159738743988824e-05, "loss": 0.9812, "step": 1505 }, { "epoch": 0.1576138147566719, "grad_norm": 2.846624791338478, "learning_rate": 1.915837813353787e-05, "loss": 1.0785, "step": 1506 }, { "epoch": 0.1577184720041863, "grad_norm": 2.84528056057021, "learning_rate": 1.9157016470770704e-05, "loss": 1.0292, "step": 1507 }, { "epoch": 0.15782312925170067, "grad_norm": 2.9007238219313765, "learning_rate": 1.915565375584378e-05, "loss": 1.0423, "step": 1508 }, { "epoch": 0.15792778649921507, "grad_norm": 2.2980335605739857, "learning_rate": 1.9154289988913684e-05, "loss": 1.0505, "step": 1509 }, { "epoch": 0.15803244374672945, "grad_norm": 2.5527713053393626, "learning_rate": 1.9152925170137107e-05, "loss": 0.9359, "step": 1510 }, { "epoch": 0.15813710099424386, "grad_norm": 2.8078774178279953, "learning_rate": 1.915155929967087e-05, "loss": 0.941, "step": 1511 }, { "epoch": 0.15824175824175823, "grad_norm": 2.5590185945658077, "learning_rate": 1.9150192377671923e-05, "loss": 1.0774, "step": 1512 }, { "epoch": 0.15834641548927264, "grad_norm": 2.6027080227490953, "learning_rate": 1.914882440429732e-05, "loss": 0.9899, "step": 1513 }, { "epoch": 0.15845107273678702, "grad_norm": 2.6337545570436927, "learning_rate": 1.9147455379704246e-05, "loss": 1.0319, "step": 1514 }, { "epoch": 0.15855572998430142, "grad_norm": 2.502047847027918, "learning_rate": 1.9146085304050007e-05, "loss": 0.8999, "step": 1515 }, { "epoch": 0.1586603872318158, "grad_norm": 2.327612116726649, "learning_rate": 1.9144714177492022e-05, "loss": 1.0622, "step": 1516 }, { "epoch": 0.15876504447933018, "grad_norm": 2.5795756527864837, "learning_rate": 1.9143342000187843e-05, "loss": 1.0127, "step": 1517 }, { "epoch": 0.1588697017268446, "grad_norm": 2.440086755209261, "learning_rate": 1.914196877229513e-05, "loss": 0.9672, "step": 1518 }, { "epoch": 0.15897435897435896, "grad_norm": 2.6625915906187654, "learning_rate": 1.9140594493971676e-05, "loss": 1.0476, "step": 1519 }, { "epoch": 0.15907901622187337, "grad_norm": 2.2315198666216443, "learning_rate": 1.9139219165375383e-05, "loss": 1.0311, "step": 1520 }, { "epoch": 0.15918367346938775, "grad_norm": 2.4701713494311592, "learning_rate": 1.913784278666428e-05, "loss": 1.0938, "step": 1521 }, { "epoch": 0.15928833071690215, "grad_norm": 2.377821429542283, "learning_rate": 1.9136465357996517e-05, "loss": 1.0208, "step": 1522 }, { "epoch": 0.15939298796441653, "grad_norm": 2.4194712197927974, "learning_rate": 1.9135086879530363e-05, "loss": 1.0568, "step": 1523 }, { "epoch": 0.15949764521193094, "grad_norm": 2.3390951758786005, "learning_rate": 1.9133707351424204e-05, "loss": 1.0695, "step": 1524 }, { "epoch": 0.15960230245944532, "grad_norm": 2.530783648332205, "learning_rate": 1.913232677383656e-05, "loss": 0.9741, "step": 1525 }, { "epoch": 0.1597069597069597, "grad_norm": 2.5392759403405254, "learning_rate": 1.9130945146926054e-05, "loss": 1.0942, "step": 1526 }, { "epoch": 0.1598116169544741, "grad_norm": 2.3718557105745073, "learning_rate": 1.912956247085144e-05, "loss": 0.9684, "step": 1527 }, { "epoch": 0.15991627420198848, "grad_norm": 2.260719250710198, "learning_rate": 1.9128178745771592e-05, "loss": 0.9989, "step": 1528 }, { "epoch": 0.16002093144950288, "grad_norm": 2.739167566121222, "learning_rate": 1.91267939718455e-05, "loss": 1.0678, "step": 1529 }, { "epoch": 0.16012558869701726, "grad_norm": 2.6646863118785613, "learning_rate": 1.912540814923228e-05, "loss": 1.0639, "step": 1530 }, { "epoch": 0.16023024594453167, "grad_norm": 2.194868214474638, "learning_rate": 1.912402127809116e-05, "loss": 0.874, "step": 1531 }, { "epoch": 0.16033490319204605, "grad_norm": 2.3676540984204992, "learning_rate": 1.9122633358581507e-05, "loss": 1.1603, "step": 1532 }, { "epoch": 0.16043956043956045, "grad_norm": 2.6218767772877634, "learning_rate": 1.912124439086278e-05, "loss": 1.0975, "step": 1533 }, { "epoch": 0.16054421768707483, "grad_norm": 2.5083643616431206, "learning_rate": 1.9119854375094586e-05, "loss": 0.9346, "step": 1534 }, { "epoch": 0.1606488749345892, "grad_norm": 2.721016148141621, "learning_rate": 1.9118463311436636e-05, "loss": 1.0989, "step": 1535 }, { "epoch": 0.16075353218210361, "grad_norm": 2.473905042239637, "learning_rate": 1.9117071200048766e-05, "loss": 1.1074, "step": 1536 }, { "epoch": 0.160858189429618, "grad_norm": 2.265192355726035, "learning_rate": 1.9115678041090934e-05, "loss": 0.9356, "step": 1537 }, { "epoch": 0.1609628466771324, "grad_norm": 2.8019320178423563, "learning_rate": 1.911428383472321e-05, "loss": 1.1416, "step": 1538 }, { "epoch": 0.16106750392464678, "grad_norm": 2.339428603113728, "learning_rate": 1.9112888581105803e-05, "loss": 1.0687, "step": 1539 }, { "epoch": 0.16117216117216118, "grad_norm": 2.832854657617948, "learning_rate": 1.9111492280399022e-05, "loss": 1.056, "step": 1540 }, { "epoch": 0.16127681841967556, "grad_norm": 2.230222759725198, "learning_rate": 1.911009493276331e-05, "loss": 1.0508, "step": 1541 }, { "epoch": 0.16138147566718997, "grad_norm": 2.3090795673914215, "learning_rate": 1.9108696538359218e-05, "loss": 1.1137, "step": 1542 }, { "epoch": 0.16148613291470434, "grad_norm": 2.613787477378895, "learning_rate": 1.910729709734743e-05, "loss": 1.0693, "step": 1543 }, { "epoch": 0.16159079016221872, "grad_norm": 2.7873744759292243, "learning_rate": 1.9105896609888742e-05, "loss": 1.0308, "step": 1544 }, { "epoch": 0.16169544740973313, "grad_norm": 2.2750511261647026, "learning_rate": 1.9104495076144077e-05, "loss": 0.9986, "step": 1545 }, { "epoch": 0.1618001046572475, "grad_norm": 2.2610184840565863, "learning_rate": 1.9103092496274467e-05, "loss": 0.9975, "step": 1546 }, { "epoch": 0.1619047619047619, "grad_norm": 2.6441259800557275, "learning_rate": 1.910168887044108e-05, "loss": 1.0092, "step": 1547 }, { "epoch": 0.1620094191522763, "grad_norm": 2.4288506046325096, "learning_rate": 1.910028419880519e-05, "loss": 1.1243, "step": 1548 }, { "epoch": 0.1621140763997907, "grad_norm": 2.4032519124937473, "learning_rate": 1.9098878481528192e-05, "loss": 0.9376, "step": 1549 }, { "epoch": 0.16221873364730507, "grad_norm": 2.6930599410682805, "learning_rate": 1.9097471718771615e-05, "loss": 1.0011, "step": 1550 }, { "epoch": 0.16232339089481945, "grad_norm": 2.944692543573583, "learning_rate": 1.9096063910697096e-05, "loss": 1.1354, "step": 1551 }, { "epoch": 0.16242804814233386, "grad_norm": 2.1076650082395787, "learning_rate": 1.9094655057466394e-05, "loss": 0.8077, "step": 1552 }, { "epoch": 0.16253270538984824, "grad_norm": 2.633417159588095, "learning_rate": 1.9093245159241386e-05, "loss": 0.9511, "step": 1553 }, { "epoch": 0.16263736263736264, "grad_norm": 2.4640899726417986, "learning_rate": 1.9091834216184078e-05, "loss": 1.1126, "step": 1554 }, { "epoch": 0.16274201988487702, "grad_norm": 2.309637404087591, "learning_rate": 1.9090422228456586e-05, "loss": 1.0576, "step": 1555 }, { "epoch": 0.16284667713239143, "grad_norm": 2.4383894345913655, "learning_rate": 1.9089009196221153e-05, "loss": 0.7495, "step": 1556 }, { "epoch": 0.1629513343799058, "grad_norm": 2.445885248005966, "learning_rate": 1.9087595119640137e-05, "loss": 1.0271, "step": 1557 }, { "epoch": 0.1630559916274202, "grad_norm": 2.5458426627911757, "learning_rate": 1.908617999887602e-05, "loss": 1.0411, "step": 1558 }, { "epoch": 0.1631606488749346, "grad_norm": 2.1937796229963213, "learning_rate": 1.90847638340914e-05, "loss": 1.0347, "step": 1559 }, { "epoch": 0.16326530612244897, "grad_norm": 2.537434579639156, "learning_rate": 1.9083346625449e-05, "loss": 1.0541, "step": 1560 }, { "epoch": 0.16336996336996337, "grad_norm": 2.144339908895929, "learning_rate": 1.908192837311166e-05, "loss": 0.9906, "step": 1561 }, { "epoch": 0.16347462061747775, "grad_norm": 2.7950297429331843, "learning_rate": 1.9080509077242336e-05, "loss": 1.0121, "step": 1562 }, { "epoch": 0.16357927786499216, "grad_norm": 2.146536389927275, "learning_rate": 1.9079088738004115e-05, "loss": 0.8715, "step": 1563 }, { "epoch": 0.16368393511250653, "grad_norm": 2.342813449285308, "learning_rate": 1.907766735556019e-05, "loss": 0.9457, "step": 1564 }, { "epoch": 0.16378859236002094, "grad_norm": 2.423858021588017, "learning_rate": 1.9076244930073885e-05, "loss": 0.9405, "step": 1565 }, { "epoch": 0.16389324960753532, "grad_norm": 2.2113311521403514, "learning_rate": 1.9074821461708633e-05, "loss": 1.032, "step": 1566 }, { "epoch": 0.16399790685504972, "grad_norm": 2.5538309174750347, "learning_rate": 1.9073396950628006e-05, "loss": 1.078, "step": 1567 }, { "epoch": 0.1641025641025641, "grad_norm": 2.116186550938168, "learning_rate": 1.9071971396995673e-05, "loss": 0.9762, "step": 1568 }, { "epoch": 0.16420722135007848, "grad_norm": 2.787785331892462, "learning_rate": 1.9070544800975433e-05, "loss": 1.1774, "step": 1569 }, { "epoch": 0.1643118785975929, "grad_norm": 2.4908693717866295, "learning_rate": 1.906911716273121e-05, "loss": 1.0658, "step": 1570 }, { "epoch": 0.16441653584510726, "grad_norm": 2.267571098697772, "learning_rate": 1.9067688482427037e-05, "loss": 0.9325, "step": 1571 }, { "epoch": 0.16452119309262167, "grad_norm": 2.3281432184570665, "learning_rate": 1.906625876022708e-05, "loss": 1.0401, "step": 1572 }, { "epoch": 0.16462585034013605, "grad_norm": 2.2909902279482703, "learning_rate": 1.906482799629561e-05, "loss": 1.0412, "step": 1573 }, { "epoch": 0.16473050758765045, "grad_norm": 2.2388140959121916, "learning_rate": 1.9063396190797024e-05, "loss": 1.0802, "step": 1574 }, { "epoch": 0.16483516483516483, "grad_norm": 2.4751194312927973, "learning_rate": 1.9061963343895848e-05, "loss": 0.9623, "step": 1575 }, { "epoch": 0.16493982208267924, "grad_norm": 2.2766727680557963, "learning_rate": 1.9060529455756707e-05, "loss": 1.078, "step": 1576 }, { "epoch": 0.16504447933019362, "grad_norm": 2.2759067164759785, "learning_rate": 1.905909452654437e-05, "loss": 1.0286, "step": 1577 }, { "epoch": 0.165149136577708, "grad_norm": 2.366060725452454, "learning_rate": 1.9057658556423702e-05, "loss": 0.9825, "step": 1578 }, { "epoch": 0.1652537938252224, "grad_norm": 2.3640547803376646, "learning_rate": 1.9056221545559704e-05, "loss": 0.9975, "step": 1579 }, { "epoch": 0.16535845107273678, "grad_norm": 2.4952266534643512, "learning_rate": 1.9054783494117496e-05, "loss": 0.9696, "step": 1580 }, { "epoch": 0.16546310832025118, "grad_norm": 2.566903238353252, "learning_rate": 1.9053344402262306e-05, "loss": 1.1521, "step": 1581 }, { "epoch": 0.16556776556776556, "grad_norm": 2.39022571551939, "learning_rate": 1.9051904270159492e-05, "loss": 0.8817, "step": 1582 }, { "epoch": 0.16567242281527997, "grad_norm": 2.461629879445559, "learning_rate": 1.9050463097974528e-05, "loss": 0.9734, "step": 1583 }, { "epoch": 0.16577708006279435, "grad_norm": 2.398269719344242, "learning_rate": 1.9049020885873008e-05, "loss": 1.0474, "step": 1584 }, { "epoch": 0.16588173731030875, "grad_norm": 1.9737021667497825, "learning_rate": 1.904757763402064e-05, "loss": 0.9153, "step": 1585 }, { "epoch": 0.16598639455782313, "grad_norm": 2.4557990474197258, "learning_rate": 1.904613334258326e-05, "loss": 1.0526, "step": 1586 }, { "epoch": 0.1660910518053375, "grad_norm": 2.296988769284568, "learning_rate": 1.9044688011726823e-05, "loss": 0.9095, "step": 1587 }, { "epoch": 0.16619570905285191, "grad_norm": 2.261789866599913, "learning_rate": 1.9043241641617398e-05, "loss": 1.0134, "step": 1588 }, { "epoch": 0.1663003663003663, "grad_norm": 2.457617607787954, "learning_rate": 1.9041794232421177e-05, "loss": 1.0589, "step": 1589 }, { "epoch": 0.1664050235478807, "grad_norm": 2.5050727103817696, "learning_rate": 1.9040345784304467e-05, "loss": 1.0968, "step": 1590 }, { "epoch": 0.16650968079539508, "grad_norm": 2.6614296904360106, "learning_rate": 1.90388962974337e-05, "loss": 0.9425, "step": 1591 }, { "epoch": 0.16661433804290948, "grad_norm": 2.388506412574016, "learning_rate": 1.9037445771975425e-05, "loss": 1.0221, "step": 1592 }, { "epoch": 0.16671899529042386, "grad_norm": 2.410859511215317, "learning_rate": 1.903599420809631e-05, "loss": 0.9942, "step": 1593 }, { "epoch": 0.16682365253793824, "grad_norm": 2.263106423709346, "learning_rate": 1.9034541605963143e-05, "loss": 1.0328, "step": 1594 }, { "epoch": 0.16692830978545264, "grad_norm": 2.322273159353024, "learning_rate": 1.9033087965742826e-05, "loss": 1.0696, "step": 1595 }, { "epoch": 0.16703296703296702, "grad_norm": 2.5405277930274197, "learning_rate": 1.9031633287602396e-05, "loss": 1.0605, "step": 1596 }, { "epoch": 0.16713762428048143, "grad_norm": 2.2074468419651474, "learning_rate": 1.903017757170899e-05, "loss": 1.0402, "step": 1597 }, { "epoch": 0.1672422815279958, "grad_norm": 2.754455496878057, "learning_rate": 1.9028720818229874e-05, "loss": 1.0416, "step": 1598 }, { "epoch": 0.1673469387755102, "grad_norm": 2.3580569683993198, "learning_rate": 1.9027263027332434e-05, "loss": 1.1088, "step": 1599 }, { "epoch": 0.1674515960230246, "grad_norm": 2.3717864565978757, "learning_rate": 1.9025804199184173e-05, "loss": 1.0257, "step": 1600 }, { "epoch": 0.167556253270539, "grad_norm": 2.314198126343136, "learning_rate": 1.9024344333952708e-05, "loss": 1.1035, "step": 1601 }, { "epoch": 0.16766091051805337, "grad_norm": 2.442992823002754, "learning_rate": 1.902288343180579e-05, "loss": 0.9067, "step": 1602 }, { "epoch": 0.16776556776556775, "grad_norm": 2.3303308497808963, "learning_rate": 1.9021421492911272e-05, "loss": 1.0525, "step": 1603 }, { "epoch": 0.16787022501308216, "grad_norm": 2.3600681521712255, "learning_rate": 1.901995851743714e-05, "loss": 1.0627, "step": 1604 }, { "epoch": 0.16797488226059654, "grad_norm": 2.5485749167168077, "learning_rate": 1.9018494505551484e-05, "loss": 1.0949, "step": 1605 }, { "epoch": 0.16807953950811094, "grad_norm": 2.403018203528686, "learning_rate": 1.9017029457422525e-05, "loss": 0.9727, "step": 1606 }, { "epoch": 0.16818419675562532, "grad_norm": 2.186348329353992, "learning_rate": 1.9015563373218607e-05, "loss": 1.0965, "step": 1607 }, { "epoch": 0.16828885400313973, "grad_norm": 2.5369378525468216, "learning_rate": 1.901409625310818e-05, "loss": 0.9336, "step": 1608 }, { "epoch": 0.1683935112506541, "grad_norm": 2.406260744207432, "learning_rate": 1.901262809725982e-05, "loss": 0.9296, "step": 1609 }, { "epoch": 0.1684981684981685, "grad_norm": 2.229664380822881, "learning_rate": 1.9011158905842218e-05, "loss": 1.0721, "step": 1610 }, { "epoch": 0.1686028257456829, "grad_norm": 2.588043339747656, "learning_rate": 1.900968867902419e-05, "loss": 0.9903, "step": 1611 }, { "epoch": 0.16870748299319727, "grad_norm": 2.4100976190366903, "learning_rate": 1.9008217416974674e-05, "loss": 0.9882, "step": 1612 }, { "epoch": 0.16881214024071167, "grad_norm": 2.2572340824619013, "learning_rate": 1.9006745119862713e-05, "loss": 1.0155, "step": 1613 }, { "epoch": 0.16891679748822605, "grad_norm": 2.5087512390175117, "learning_rate": 1.9005271787857477e-05, "loss": 1.0246, "step": 1614 }, { "epoch": 0.16902145473574046, "grad_norm": 2.644963375872955, "learning_rate": 1.900379742112826e-05, "loss": 1.0464, "step": 1615 }, { "epoch": 0.16912611198325483, "grad_norm": 2.4254460348690987, "learning_rate": 1.900232201984446e-05, "loss": 1.0792, "step": 1616 }, { "epoch": 0.16923076923076924, "grad_norm": 2.3127114426824975, "learning_rate": 1.9000845584175617e-05, "loss": 0.9903, "step": 1617 }, { "epoch": 0.16933542647828362, "grad_norm": 2.412862064453713, "learning_rate": 1.8999368114291367e-05, "loss": 0.9951, "step": 1618 }, { "epoch": 0.16944008372579802, "grad_norm": 2.469305422395179, "learning_rate": 1.899788961036148e-05, "loss": 0.918, "step": 1619 }, { "epoch": 0.1695447409733124, "grad_norm": 2.1595304334372347, "learning_rate": 1.8996410072555833e-05, "loss": 1.0967, "step": 1620 }, { "epoch": 0.16964939822082678, "grad_norm": 2.7358337011342106, "learning_rate": 1.8994929501044433e-05, "loss": 1.0458, "step": 1621 }, { "epoch": 0.1697540554683412, "grad_norm": 2.5015188075028747, "learning_rate": 1.8993447895997396e-05, "loss": 1.0962, "step": 1622 }, { "epoch": 0.16985871271585556, "grad_norm": 2.746712031614558, "learning_rate": 1.8991965257584967e-05, "loss": 0.9008, "step": 1623 }, { "epoch": 0.16996336996336997, "grad_norm": 2.308046183330425, "learning_rate": 1.8990481585977503e-05, "loss": 0.8214, "step": 1624 }, { "epoch": 0.17006802721088435, "grad_norm": 2.241008498260066, "learning_rate": 1.8988996881345472e-05, "loss": 0.9915, "step": 1625 }, { "epoch": 0.17017268445839875, "grad_norm": 2.336157225073833, "learning_rate": 1.8987511143859484e-05, "loss": 1.0093, "step": 1626 }, { "epoch": 0.17027734170591313, "grad_norm": 2.456597311443299, "learning_rate": 1.898602437369024e-05, "loss": 1.0753, "step": 1627 }, { "epoch": 0.17038199895342754, "grad_norm": 2.1653784471734583, "learning_rate": 1.8984536571008585e-05, "loss": 0.9081, "step": 1628 }, { "epoch": 0.17048665620094192, "grad_norm": 2.1955935295728515, "learning_rate": 1.8983047735985464e-05, "loss": 0.9734, "step": 1629 }, { "epoch": 0.1705913134484563, "grad_norm": 3.0667450842861457, "learning_rate": 1.8981557868791943e-05, "loss": 1.1295, "step": 1630 }, { "epoch": 0.1706959706959707, "grad_norm": 2.457644941062628, "learning_rate": 1.8980066969599216e-05, "loss": 1.1109, "step": 1631 }, { "epoch": 0.17080062794348508, "grad_norm": 2.5249379238888627, "learning_rate": 1.8978575038578593e-05, "loss": 1.0207, "step": 1632 }, { "epoch": 0.17090528519099948, "grad_norm": 2.3825489629259966, "learning_rate": 1.8977082075901493e-05, "loss": 1.0369, "step": 1633 }, { "epoch": 0.17100994243851386, "grad_norm": 2.2994283024432063, "learning_rate": 1.8975588081739468e-05, "loss": 0.9943, "step": 1634 }, { "epoch": 0.17111459968602827, "grad_norm": 2.7924613134451888, "learning_rate": 1.8974093056264173e-05, "loss": 1.1996, "step": 1635 }, { "epoch": 0.17121925693354265, "grad_norm": 2.44507996655918, "learning_rate": 1.8972596999647398e-05, "loss": 1.1193, "step": 1636 }, { "epoch": 0.17132391418105705, "grad_norm": 2.16180218326001, "learning_rate": 1.897109991206103e-05, "loss": 0.8592, "step": 1637 }, { "epoch": 0.17142857142857143, "grad_norm": 2.699376290036355, "learning_rate": 1.8969601793677107e-05, "loss": 1.1523, "step": 1638 }, { "epoch": 0.1715332286760858, "grad_norm": 2.735100934448089, "learning_rate": 1.8968102644667748e-05, "loss": 1.0564, "step": 1639 }, { "epoch": 0.17163788592360021, "grad_norm": 2.573807990283748, "learning_rate": 1.8966602465205214e-05, "loss": 1.0115, "step": 1640 }, { "epoch": 0.1717425431711146, "grad_norm": 2.7249740908747784, "learning_rate": 1.8965101255461883e-05, "loss": 0.9658, "step": 1641 }, { "epoch": 0.171847200418629, "grad_norm": 2.9345488789227785, "learning_rate": 1.896359901561024e-05, "loss": 1.0128, "step": 1642 }, { "epoch": 0.17195185766614338, "grad_norm": 2.889098265415476, "learning_rate": 1.89620957458229e-05, "loss": 1.0816, "step": 1643 }, { "epoch": 0.17205651491365778, "grad_norm": 2.627888084723054, "learning_rate": 1.8960591446272595e-05, "loss": 1.0394, "step": 1644 }, { "epoch": 0.17216117216117216, "grad_norm": 2.2397484008487663, "learning_rate": 1.8959086117132162e-05, "loss": 0.9653, "step": 1645 }, { "epoch": 0.17226582940868654, "grad_norm": 1.9966793466082513, "learning_rate": 1.8957579758574575e-05, "loss": 0.8132, "step": 1646 }, { "epoch": 0.17237048665620094, "grad_norm": 3.0317885783655307, "learning_rate": 1.8956072370772914e-05, "loss": 1.0076, "step": 1647 }, { "epoch": 0.17247514390371532, "grad_norm": 2.6374048369635483, "learning_rate": 1.8954563953900386e-05, "loss": 1.0983, "step": 1648 }, { "epoch": 0.17257980115122973, "grad_norm": 2.96669464371244, "learning_rate": 1.8953054508130302e-05, "loss": 0.9889, "step": 1649 }, { "epoch": 0.1726844583987441, "grad_norm": 2.4723284751273322, "learning_rate": 1.8951544033636105e-05, "loss": 1.1264, "step": 1650 }, { "epoch": 0.1727891156462585, "grad_norm": 2.622144198286044, "learning_rate": 1.8950032530591356e-05, "loss": 1.0118, "step": 1651 }, { "epoch": 0.1728937728937729, "grad_norm": 2.891941422685323, "learning_rate": 1.8948519999169723e-05, "loss": 0.9545, "step": 1652 }, { "epoch": 0.1729984301412873, "grad_norm": 2.91494413227998, "learning_rate": 1.8947006439545e-05, "loss": 1.0054, "step": 1653 }, { "epoch": 0.17310308738880167, "grad_norm": 2.3618205093150344, "learning_rate": 1.89454918518911e-05, "loss": 1.0833, "step": 1654 }, { "epoch": 0.17320774463631605, "grad_norm": 2.381058104001439, "learning_rate": 1.8943976236382055e-05, "loss": 0.7995, "step": 1655 }, { "epoch": 0.17331240188383046, "grad_norm": 2.4725029468294357, "learning_rate": 1.8942459593192008e-05, "loss": 1.1094, "step": 1656 }, { "epoch": 0.17341705913134484, "grad_norm": 2.5977724802112045, "learning_rate": 1.8940941922495223e-05, "loss": 0.9014, "step": 1657 }, { "epoch": 0.17352171637885924, "grad_norm": 3.566756798991803, "learning_rate": 1.8939423224466086e-05, "loss": 0.9807, "step": 1658 }, { "epoch": 0.17362637362637362, "grad_norm": 2.5787026088981637, "learning_rate": 1.8937903499279104e-05, "loss": 1.0571, "step": 1659 }, { "epoch": 0.17373103087388803, "grad_norm": 2.5611787827337182, "learning_rate": 1.8936382747108885e-05, "loss": 1.0353, "step": 1660 }, { "epoch": 0.1738356881214024, "grad_norm": 2.5012911002756417, "learning_rate": 1.8934860968130176e-05, "loss": 1.1265, "step": 1661 }, { "epoch": 0.1739403453689168, "grad_norm": 2.2836015231005033, "learning_rate": 1.8933338162517824e-05, "loss": 1.026, "step": 1662 }, { "epoch": 0.1740450026164312, "grad_norm": 2.8317634648824486, "learning_rate": 1.8931814330446813e-05, "loss": 1.1476, "step": 1663 }, { "epoch": 0.17414965986394557, "grad_norm": 2.5276867878076907, "learning_rate": 1.8930289472092225e-05, "loss": 1.0794, "step": 1664 }, { "epoch": 0.17425431711145997, "grad_norm": 2.6172530432146957, "learning_rate": 1.8928763587629273e-05, "loss": 1.1507, "step": 1665 }, { "epoch": 0.17435897435897435, "grad_norm": 3.298918918566836, "learning_rate": 1.8927236677233286e-05, "loss": 1.0261, "step": 1666 }, { "epoch": 0.17446363160648876, "grad_norm": 2.0906785380095956, "learning_rate": 1.8925708741079706e-05, "loss": 0.9586, "step": 1667 }, { "epoch": 0.17456828885400313, "grad_norm": 2.3161353864865806, "learning_rate": 1.89241797793441e-05, "loss": 1.0169, "step": 1668 }, { "epoch": 0.17467294610151754, "grad_norm": 2.7515446593384065, "learning_rate": 1.8922649792202145e-05, "loss": 1.1237, "step": 1669 }, { "epoch": 0.17477760334903192, "grad_norm": 2.5581343380268833, "learning_rate": 1.892111877982964e-05, "loss": 0.9509, "step": 1670 }, { "epoch": 0.17488226059654632, "grad_norm": 2.215250689710175, "learning_rate": 1.8919586742402504e-05, "loss": 1.0509, "step": 1671 }, { "epoch": 0.1749869178440607, "grad_norm": 2.2557081770607454, "learning_rate": 1.891805368009677e-05, "loss": 0.9262, "step": 1672 }, { "epoch": 0.17509157509157508, "grad_norm": 2.6566875549500626, "learning_rate": 1.8916519593088586e-05, "loss": 1.0298, "step": 1673 }, { "epoch": 0.17519623233908949, "grad_norm": 2.5841170671199993, "learning_rate": 1.8914984481554226e-05, "loss": 1.1961, "step": 1674 }, { "epoch": 0.17530088958660386, "grad_norm": 2.401181500742945, "learning_rate": 1.8913448345670075e-05, "loss": 0.8559, "step": 1675 }, { "epoch": 0.17540554683411827, "grad_norm": 2.7236964577084275, "learning_rate": 1.891191118561264e-05, "loss": 0.9921, "step": 1676 }, { "epoch": 0.17551020408163265, "grad_norm": 2.279719174576562, "learning_rate": 1.8910373001558548e-05, "loss": 1.1413, "step": 1677 }, { "epoch": 0.17561486132914705, "grad_norm": 2.4343550584438725, "learning_rate": 1.890883379368453e-05, "loss": 1.0764, "step": 1678 }, { "epoch": 0.17571951857666143, "grad_norm": 2.6430739908289986, "learning_rate": 1.890729356216745e-05, "loss": 0.9843, "step": 1679 }, { "epoch": 0.17582417582417584, "grad_norm": 2.2188927123894544, "learning_rate": 1.8905752307184287e-05, "loss": 1.0551, "step": 1680 }, { "epoch": 0.17592883307169022, "grad_norm": 2.674634884089362, "learning_rate": 1.8904210028912126e-05, "loss": 1.0451, "step": 1681 }, { "epoch": 0.1760334903192046, "grad_norm": 2.604704932941921, "learning_rate": 1.8902666727528184e-05, "loss": 1.0089, "step": 1682 }, { "epoch": 0.176138147566719, "grad_norm": 2.542591420241676, "learning_rate": 1.8901122403209785e-05, "loss": 1.0962, "step": 1683 }, { "epoch": 0.17624280481423338, "grad_norm": 2.6574054799605755, "learning_rate": 1.889957705613438e-05, "loss": 1.1428, "step": 1684 }, { "epoch": 0.17634746206174778, "grad_norm": 2.486565966246706, "learning_rate": 1.8898030686479526e-05, "loss": 1.0292, "step": 1685 }, { "epoch": 0.17645211930926216, "grad_norm": 2.4219588038934443, "learning_rate": 1.889648329442291e-05, "loss": 1.0308, "step": 1686 }, { "epoch": 0.17655677655677657, "grad_norm": 2.503380792899425, "learning_rate": 1.889493488014233e-05, "loss": 1.0214, "step": 1687 }, { "epoch": 0.17666143380429095, "grad_norm": 2.346582662942175, "learning_rate": 1.88933854438157e-05, "loss": 1.0546, "step": 1688 }, { "epoch": 0.17676609105180532, "grad_norm": 2.460045482771163, "learning_rate": 1.889183498562105e-05, "loss": 1.1341, "step": 1689 }, { "epoch": 0.17687074829931973, "grad_norm": 2.5881691646400653, "learning_rate": 1.889028350573654e-05, "loss": 1.1007, "step": 1690 }, { "epoch": 0.1769754055468341, "grad_norm": 2.622909655326263, "learning_rate": 1.888873100434043e-05, "loss": 1.069, "step": 1691 }, { "epoch": 0.17708006279434851, "grad_norm": 2.5204132573206692, "learning_rate": 1.8887177481611107e-05, "loss": 0.885, "step": 1692 }, { "epoch": 0.1771847200418629, "grad_norm": 2.510954873616835, "learning_rate": 1.888562293772708e-05, "loss": 1.0541, "step": 1693 }, { "epoch": 0.1772893772893773, "grad_norm": 2.7873358491588167, "learning_rate": 1.888406737286696e-05, "loss": 0.9605, "step": 1694 }, { "epoch": 0.17739403453689168, "grad_norm": 2.326692971407233, "learning_rate": 1.888251078720949e-05, "loss": 0.967, "step": 1695 }, { "epoch": 0.17749869178440608, "grad_norm": 2.106615127470322, "learning_rate": 1.8880953180933524e-05, "loss": 0.8576, "step": 1696 }, { "epoch": 0.17760334903192046, "grad_norm": 2.4795735591289487, "learning_rate": 1.8879394554218033e-05, "loss": 1.1236, "step": 1697 }, { "epoch": 0.17770800627943484, "grad_norm": 3.0042166107472825, "learning_rate": 1.887783490724211e-05, "loss": 1.0146, "step": 1698 }, { "epoch": 0.17781266352694924, "grad_norm": 2.492060495608788, "learning_rate": 1.8876274240184954e-05, "loss": 0.9161, "step": 1699 }, { "epoch": 0.17791732077446362, "grad_norm": 2.777606937433633, "learning_rate": 1.88747125532259e-05, "loss": 1.0153, "step": 1700 }, { "epoch": 0.17802197802197803, "grad_norm": 2.460229108054716, "learning_rate": 1.8873149846544377e-05, "loss": 0.9356, "step": 1701 }, { "epoch": 0.1781266352694924, "grad_norm": 2.5144312422866197, "learning_rate": 1.8871586120319952e-05, "loss": 1.0638, "step": 1702 }, { "epoch": 0.1782312925170068, "grad_norm": 2.1719985008734466, "learning_rate": 1.8870021374732297e-05, "loss": 1.009, "step": 1703 }, { "epoch": 0.1783359497645212, "grad_norm": 2.464059885032523, "learning_rate": 1.8868455609961206e-05, "loss": 1.1706, "step": 1704 }, { "epoch": 0.1784406070120356, "grad_norm": 2.7220400341035917, "learning_rate": 1.8866888826186583e-05, "loss": 1.0106, "step": 1705 }, { "epoch": 0.17854526425954997, "grad_norm": 2.3596603917693, "learning_rate": 1.886532102358846e-05, "loss": 1.0622, "step": 1706 }, { "epoch": 0.17864992150706435, "grad_norm": 2.4967392208163317, "learning_rate": 1.8863752202346982e-05, "loss": 1.0043, "step": 1707 }, { "epoch": 0.17875457875457876, "grad_norm": 1.937686380270267, "learning_rate": 1.8862182362642406e-05, "loss": 0.8468, "step": 1708 }, { "epoch": 0.17885923600209314, "grad_norm": 2.4387935220753048, "learning_rate": 1.8860611504655112e-05, "loss": 1.0241, "step": 1709 }, { "epoch": 0.17896389324960754, "grad_norm": 2.3758956185372755, "learning_rate": 1.885903962856559e-05, "loss": 1.1111, "step": 1710 }, { "epoch": 0.17906855049712192, "grad_norm": 2.21312437965851, "learning_rate": 1.885746673455446e-05, "loss": 1.0037, "step": 1711 }, { "epoch": 0.17917320774463633, "grad_norm": 2.5602801001146673, "learning_rate": 1.8855892822802443e-05, "loss": 1.1503, "step": 1712 }, { "epoch": 0.1792778649921507, "grad_norm": 2.558534262130665, "learning_rate": 1.885431789349039e-05, "loss": 1.1362, "step": 1713 }, { "epoch": 0.1793825222396651, "grad_norm": 2.074103983805813, "learning_rate": 1.885274194679926e-05, "loss": 1.017, "step": 1714 }, { "epoch": 0.1794871794871795, "grad_norm": 3.144604908715539, "learning_rate": 1.8851164982910135e-05, "loss": 1.1212, "step": 1715 }, { "epoch": 0.17959183673469387, "grad_norm": 2.142185971803945, "learning_rate": 1.884958700200421e-05, "loss": 1.1587, "step": 1716 }, { "epoch": 0.17969649398220827, "grad_norm": 2.0371324766241394, "learning_rate": 1.88480080042628e-05, "loss": 1.0488, "step": 1717 }, { "epoch": 0.17980115122972265, "grad_norm": 2.5493299229194855, "learning_rate": 1.8846427989867334e-05, "loss": 1.1183, "step": 1718 }, { "epoch": 0.17990580847723706, "grad_norm": 2.1735484655699477, "learning_rate": 1.8844846958999355e-05, "loss": 1.0369, "step": 1719 }, { "epoch": 0.18001046572475143, "grad_norm": 2.084854109587084, "learning_rate": 1.884326491184053e-05, "loss": 0.9337, "step": 1720 }, { "epoch": 0.18011512297226584, "grad_norm": 2.6326212200262202, "learning_rate": 1.884168184857264e-05, "loss": 1.0429, "step": 1721 }, { "epoch": 0.18021978021978022, "grad_norm": 2.4562822207892756, "learning_rate": 1.8840097769377585e-05, "loss": 1.0598, "step": 1722 }, { "epoch": 0.18032443746729462, "grad_norm": 2.4277508413270272, "learning_rate": 1.8838512674437372e-05, "loss": 0.9412, "step": 1723 }, { "epoch": 0.180429094714809, "grad_norm": 2.8821956418407804, "learning_rate": 1.8836926563934137e-05, "loss": 0.8627, "step": 1724 }, { "epoch": 0.18053375196232338, "grad_norm": 2.709669048226828, "learning_rate": 1.8835339438050125e-05, "loss": 0.9982, "step": 1725 }, { "epoch": 0.18063840920983779, "grad_norm": 2.111424162303935, "learning_rate": 1.88337512969677e-05, "loss": 1.0113, "step": 1726 }, { "epoch": 0.18074306645735216, "grad_norm": 2.234604237746023, "learning_rate": 1.8832162140869343e-05, "loss": 1.0126, "step": 1727 }, { "epoch": 0.18084772370486657, "grad_norm": 2.524183949936875, "learning_rate": 1.883057196993765e-05, "loss": 1.2232, "step": 1728 }, { "epoch": 0.18095238095238095, "grad_norm": 2.130924634431564, "learning_rate": 1.882898078435534e-05, "loss": 0.9923, "step": 1729 }, { "epoch": 0.18105703819989535, "grad_norm": 2.4549286849941385, "learning_rate": 1.8827388584305237e-05, "loss": 1.1306, "step": 1730 }, { "epoch": 0.18116169544740973, "grad_norm": 2.698247562016706, "learning_rate": 1.882579536997029e-05, "loss": 1.0832, "step": 1731 }, { "epoch": 0.1812663526949241, "grad_norm": 2.4883956970188463, "learning_rate": 1.8824201141533565e-05, "loss": 1.0421, "step": 1732 }, { "epoch": 0.18137100994243852, "grad_norm": 2.205789516439095, "learning_rate": 1.8822605899178242e-05, "loss": 0.8382, "step": 1733 }, { "epoch": 0.1814756671899529, "grad_norm": 2.043592458587824, "learning_rate": 1.8821009643087613e-05, "loss": 1.0106, "step": 1734 }, { "epoch": 0.1815803244374673, "grad_norm": 2.678564167470479, "learning_rate": 1.8819412373445094e-05, "loss": 1.0847, "step": 1735 }, { "epoch": 0.18168498168498168, "grad_norm": 2.29945001233077, "learning_rate": 1.8817814090434218e-05, "loss": 0.9098, "step": 1736 }, { "epoch": 0.18178963893249608, "grad_norm": 2.294178577646669, "learning_rate": 1.8816214794238623e-05, "loss": 1.0782, "step": 1737 }, { "epoch": 0.18189429618001046, "grad_norm": 2.420407699481665, "learning_rate": 1.8814614485042077e-05, "loss": 1.1683, "step": 1738 }, { "epoch": 0.18199895342752487, "grad_norm": 2.389361847046125, "learning_rate": 1.881301316302846e-05, "loss": 0.9132, "step": 1739 }, { "epoch": 0.18210361067503925, "grad_norm": 2.3666157461593653, "learning_rate": 1.881141082838176e-05, "loss": 1.0545, "step": 1740 }, { "epoch": 0.18220826792255362, "grad_norm": 2.3334015197345024, "learning_rate": 1.8809807481286096e-05, "loss": 1.0485, "step": 1741 }, { "epoch": 0.18231292517006803, "grad_norm": 2.0751756735643223, "learning_rate": 1.8808203121925692e-05, "loss": 0.8661, "step": 1742 }, { "epoch": 0.1824175824175824, "grad_norm": 2.138459888794792, "learning_rate": 1.8806597750484895e-05, "loss": 0.9413, "step": 1743 }, { "epoch": 0.1825222396650968, "grad_norm": 2.3884065014316707, "learning_rate": 1.8804991367148165e-05, "loss": 1.0748, "step": 1744 }, { "epoch": 0.1826268969126112, "grad_norm": 2.3903789475716826, "learning_rate": 1.8803383972100073e-05, "loss": 1.2001, "step": 1745 }, { "epoch": 0.1827315541601256, "grad_norm": 2.431230215643124, "learning_rate": 1.880177556552532e-05, "loss": 1.097, "step": 1746 }, { "epoch": 0.18283621140763998, "grad_norm": 2.6229790482474016, "learning_rate": 1.880016614760871e-05, "loss": 1.1281, "step": 1747 }, { "epoch": 0.18294086865515438, "grad_norm": 2.3850699291301, "learning_rate": 1.879855571853517e-05, "loss": 1.0819, "step": 1748 }, { "epoch": 0.18304552590266876, "grad_norm": 2.876444507419557, "learning_rate": 1.879694427848974e-05, "loss": 1.0471, "step": 1749 }, { "epoch": 0.18315018315018314, "grad_norm": 2.316504236046675, "learning_rate": 1.879533182765758e-05, "loss": 1.0519, "step": 1750 }, { "epoch": 0.18325484039769754, "grad_norm": 2.2030334955552413, "learning_rate": 1.8793718366223963e-05, "loss": 0.928, "step": 1751 }, { "epoch": 0.18335949764521192, "grad_norm": 2.3511232102886495, "learning_rate": 1.879210389437428e-05, "loss": 0.9923, "step": 1752 }, { "epoch": 0.18346415489272633, "grad_norm": 2.4416831019547907, "learning_rate": 1.8790488412294035e-05, "loss": 1.0361, "step": 1753 }, { "epoch": 0.1835688121402407, "grad_norm": 2.4333832677997034, "learning_rate": 1.8788871920168855e-05, "loss": 1.0897, "step": 1754 }, { "epoch": 0.1836734693877551, "grad_norm": 2.2799904335051915, "learning_rate": 1.878725441818447e-05, "loss": 1.0096, "step": 1755 }, { "epoch": 0.1837781266352695, "grad_norm": 2.462858451122516, "learning_rate": 1.8785635906526737e-05, "loss": 1.0372, "step": 1756 }, { "epoch": 0.1838827838827839, "grad_norm": 2.3533243746357146, "learning_rate": 1.8784016385381633e-05, "loss": 1.0798, "step": 1757 }, { "epoch": 0.18398744113029827, "grad_norm": 2.718496586207958, "learning_rate": 1.8782395854935237e-05, "loss": 1.0633, "step": 1758 }, { "epoch": 0.18409209837781265, "grad_norm": 2.652705423511989, "learning_rate": 1.878077431537375e-05, "loss": 1.0664, "step": 1759 }, { "epoch": 0.18419675562532706, "grad_norm": 2.586313483256547, "learning_rate": 1.8779151766883502e-05, "loss": 1.0038, "step": 1760 }, { "epoch": 0.18430141287284144, "grad_norm": 2.5041990988530514, "learning_rate": 1.877752820965091e-05, "loss": 1.0798, "step": 1761 }, { "epoch": 0.18440607012035584, "grad_norm": 2.53966882445818, "learning_rate": 1.877590364386254e-05, "loss": 1.1485, "step": 1762 }, { "epoch": 0.18451072736787022, "grad_norm": 2.5828686624066695, "learning_rate": 1.877427806970505e-05, "loss": 1.0969, "step": 1763 }, { "epoch": 0.18461538461538463, "grad_norm": 2.3393212197615343, "learning_rate": 1.8772651487365217e-05, "loss": 1.1038, "step": 1764 }, { "epoch": 0.184720041862899, "grad_norm": 2.7443600527578393, "learning_rate": 1.8771023897029948e-05, "loss": 0.8974, "step": 1765 }, { "epoch": 0.1848246991104134, "grad_norm": 2.5532153964529893, "learning_rate": 1.876939529888625e-05, "loss": 0.9866, "step": 1766 }, { "epoch": 0.1849293563579278, "grad_norm": 2.4029344349509194, "learning_rate": 1.8767765693121258e-05, "loss": 1.0403, "step": 1767 }, { "epoch": 0.18503401360544217, "grad_norm": 2.176455654469618, "learning_rate": 1.8766135079922213e-05, "loss": 0.9654, "step": 1768 }, { "epoch": 0.18513867085295657, "grad_norm": 2.396055407588553, "learning_rate": 1.8764503459476476e-05, "loss": 0.8856, "step": 1769 }, { "epoch": 0.18524332810047095, "grad_norm": 2.8406525852271165, "learning_rate": 1.8762870831971523e-05, "loss": 1.1081, "step": 1770 }, { "epoch": 0.18534798534798536, "grad_norm": 2.9851011238261362, "learning_rate": 1.8761237197594945e-05, "loss": 1.0719, "step": 1771 }, { "epoch": 0.18545264259549973, "grad_norm": 1.982828765335705, "learning_rate": 1.8759602556534458e-05, "loss": 0.9932, "step": 1772 }, { "epoch": 0.18555729984301414, "grad_norm": 2.7241455065505287, "learning_rate": 1.8757966908977875e-05, "loss": 1.2454, "step": 1773 }, { "epoch": 0.18566195709052852, "grad_norm": 2.252662593453829, "learning_rate": 1.875633025511314e-05, "loss": 1.0011, "step": 1774 }, { "epoch": 0.1857666143380429, "grad_norm": 2.5528950049004204, "learning_rate": 1.8754692595128313e-05, "loss": 0.9809, "step": 1775 }, { "epoch": 0.1858712715855573, "grad_norm": 2.0122663818965134, "learning_rate": 1.8753053929211555e-05, "loss": 0.9965, "step": 1776 }, { "epoch": 0.18597592883307168, "grad_norm": 2.1764615737427997, "learning_rate": 1.875141425755116e-05, "loss": 1.0969, "step": 1777 }, { "epoch": 0.18608058608058609, "grad_norm": 1.8948836535370301, "learning_rate": 1.874977358033552e-05, "loss": 0.8658, "step": 1778 }, { "epoch": 0.18618524332810046, "grad_norm": 2.361535893832336, "learning_rate": 1.874813189775316e-05, "loss": 0.964, "step": 1779 }, { "epoch": 0.18628990057561487, "grad_norm": 2.3456335496105707, "learning_rate": 1.8746489209992713e-05, "loss": 0.924, "step": 1780 }, { "epoch": 0.18639455782312925, "grad_norm": 2.423670459664142, "learning_rate": 1.874484551724292e-05, "loss": 0.9615, "step": 1781 }, { "epoch": 0.18649921507064365, "grad_norm": 2.2890242877296125, "learning_rate": 1.8743200819692652e-05, "loss": 1.0306, "step": 1782 }, { "epoch": 0.18660387231815803, "grad_norm": 2.5596300069718505, "learning_rate": 1.8741555117530887e-05, "loss": 0.9138, "step": 1783 }, { "epoch": 0.1867085295656724, "grad_norm": 2.129042464863284, "learning_rate": 1.873990841094672e-05, "loss": 1.1452, "step": 1784 }, { "epoch": 0.18681318681318682, "grad_norm": 3.064823960289263, "learning_rate": 1.8738260700129354e-05, "loss": 0.9969, "step": 1785 }, { "epoch": 0.1869178440607012, "grad_norm": 2.1130779511781688, "learning_rate": 1.8736611985268124e-05, "loss": 0.9711, "step": 1786 }, { "epoch": 0.1870225013082156, "grad_norm": 2.0389255541139994, "learning_rate": 1.873496226655246e-05, "loss": 0.8438, "step": 1787 }, { "epoch": 0.18712715855572998, "grad_norm": 2.2033567685248947, "learning_rate": 1.873331154417193e-05, "loss": 1.0108, "step": 1788 }, { "epoch": 0.18723181580324438, "grad_norm": 2.15080379319512, "learning_rate": 1.8731659818316196e-05, "loss": 0.8633, "step": 1789 }, { "epoch": 0.18733647305075876, "grad_norm": 2.2191227916720004, "learning_rate": 1.8730007089175047e-05, "loss": 0.9503, "step": 1790 }, { "epoch": 0.18744113029827317, "grad_norm": 2.3128407690400814, "learning_rate": 1.8728353356938386e-05, "loss": 0.9191, "step": 1791 }, { "epoch": 0.18754578754578755, "grad_norm": 2.3372349993067782, "learning_rate": 1.872669862179623e-05, "loss": 1.0454, "step": 1792 }, { "epoch": 0.18765044479330192, "grad_norm": 2.4025198105450607, "learning_rate": 1.8725042883938714e-05, "loss": 1.0697, "step": 1793 }, { "epoch": 0.18775510204081633, "grad_norm": 2.2588303051011196, "learning_rate": 1.8723386143556085e-05, "loss": 1.21, "step": 1794 }, { "epoch": 0.1878597592883307, "grad_norm": 2.1758809138656834, "learning_rate": 1.87217284008387e-05, "loss": 1.017, "step": 1795 }, { "epoch": 0.1879644165358451, "grad_norm": 2.505013897983491, "learning_rate": 1.872006965597704e-05, "loss": 0.96, "step": 1796 }, { "epoch": 0.1880690737833595, "grad_norm": 2.5795773446705303, "learning_rate": 1.8718409909161704e-05, "loss": 1.0457, "step": 1797 }, { "epoch": 0.1881737310308739, "grad_norm": 2.5053247104374945, "learning_rate": 1.8716749160583393e-05, "loss": 1.0533, "step": 1798 }, { "epoch": 0.18827838827838828, "grad_norm": 2.5122626396928633, "learning_rate": 1.871508741043293e-05, "loss": 0.9278, "step": 1799 }, { "epoch": 0.18838304552590268, "grad_norm": 2.443610775616308, "learning_rate": 1.871342465890126e-05, "loss": 0.85, "step": 1800 }, { "epoch": 0.18848770277341706, "grad_norm": 2.515901355227164, "learning_rate": 1.8711760906179428e-05, "loss": 1.0415, "step": 1801 }, { "epoch": 0.18859236002093144, "grad_norm": 2.6696223601207714, "learning_rate": 1.871009615245861e-05, "loss": 1.0236, "step": 1802 }, { "epoch": 0.18869701726844584, "grad_norm": 2.4784833366393553, "learning_rate": 1.870843039793009e-05, "loss": 0.9525, "step": 1803 }, { "epoch": 0.18880167451596022, "grad_norm": 2.4542775584185264, "learning_rate": 1.870676364278526e-05, "loss": 1.1093, "step": 1804 }, { "epoch": 0.18890633176347463, "grad_norm": 2.1766923572797774, "learning_rate": 1.8705095887215636e-05, "loss": 0.9177, "step": 1805 }, { "epoch": 0.189010989010989, "grad_norm": 2.486937565003481, "learning_rate": 1.870342713141285e-05, "loss": 1.0531, "step": 1806 }, { "epoch": 0.1891156462585034, "grad_norm": 2.5489906996516605, "learning_rate": 1.8701757375568642e-05, "loss": 1.1089, "step": 1807 }, { "epoch": 0.1892203035060178, "grad_norm": 1.940507425147104, "learning_rate": 1.870008661987487e-05, "loss": 0.8917, "step": 1808 }, { "epoch": 0.1893249607535322, "grad_norm": 2.0972924062895464, "learning_rate": 1.8698414864523512e-05, "loss": 1.0538, "step": 1809 }, { "epoch": 0.18942961800104657, "grad_norm": 2.2496046336500166, "learning_rate": 1.869674210970665e-05, "loss": 1.0835, "step": 1810 }, { "epoch": 0.18953427524856095, "grad_norm": 2.5621615108753035, "learning_rate": 1.869506835561649e-05, "loss": 1.0739, "step": 1811 }, { "epoch": 0.18963893249607536, "grad_norm": 2.440505472169808, "learning_rate": 1.869339360244535e-05, "loss": 1.0871, "step": 1812 }, { "epoch": 0.18974358974358974, "grad_norm": 2.403241452428988, "learning_rate": 1.869171785038566e-05, "loss": 0.9661, "step": 1813 }, { "epoch": 0.18984824699110414, "grad_norm": 2.469688713891638, "learning_rate": 1.869004109962997e-05, "loss": 1.0122, "step": 1814 }, { "epoch": 0.18995290423861852, "grad_norm": 2.333884875941877, "learning_rate": 1.868836335037094e-05, "loss": 1.1133, "step": 1815 }, { "epoch": 0.19005756148613293, "grad_norm": 2.3621677518785376, "learning_rate": 1.868668460280135e-05, "loss": 0.8132, "step": 1816 }, { "epoch": 0.1901622187336473, "grad_norm": 2.7072055929293235, "learning_rate": 1.8685004857114085e-05, "loss": 1.0276, "step": 1817 }, { "epoch": 0.19026687598116168, "grad_norm": 2.645013242609602, "learning_rate": 1.8683324113502158e-05, "loss": 1.0181, "step": 1818 }, { "epoch": 0.1903715332286761, "grad_norm": 2.5368104479142692, "learning_rate": 1.868164237215869e-05, "loss": 0.919, "step": 1819 }, { "epoch": 0.19047619047619047, "grad_norm": 1.9021982130849477, "learning_rate": 1.867995963327691e-05, "loss": 0.8938, "step": 1820 }, { "epoch": 0.19058084772370487, "grad_norm": 2.33319178257821, "learning_rate": 1.8678275897050177e-05, "loss": 1.0466, "step": 1821 }, { "epoch": 0.19068550497121925, "grad_norm": 2.7008377094227676, "learning_rate": 1.8676591163671948e-05, "loss": 1.1263, "step": 1822 }, { "epoch": 0.19079016221873366, "grad_norm": 2.1650716424421073, "learning_rate": 1.8674905433335805e-05, "loss": 0.9968, "step": 1823 }, { "epoch": 0.19089481946624803, "grad_norm": 2.398261119939507, "learning_rate": 1.867321870623544e-05, "loss": 1.1226, "step": 1824 }, { "epoch": 0.19099947671376244, "grad_norm": 2.312460276704168, "learning_rate": 1.8671530982564664e-05, "loss": 0.9971, "step": 1825 }, { "epoch": 0.19110413396127682, "grad_norm": 2.647523183773923, "learning_rate": 1.86698422625174e-05, "loss": 1.1726, "step": 1826 }, { "epoch": 0.1912087912087912, "grad_norm": 2.6076487342169603, "learning_rate": 1.8668152546287686e-05, "loss": 1.0306, "step": 1827 }, { "epoch": 0.1913134484563056, "grad_norm": 2.3115074361083883, "learning_rate": 1.8666461834069672e-05, "loss": 0.8432, "step": 1828 }, { "epoch": 0.19141810570381998, "grad_norm": 2.1836521898238197, "learning_rate": 1.866477012605762e-05, "loss": 1.0452, "step": 1829 }, { "epoch": 0.19152276295133439, "grad_norm": 2.330317681639118, "learning_rate": 1.866307742244592e-05, "loss": 1.0984, "step": 1830 }, { "epoch": 0.19162742019884876, "grad_norm": 2.457940283244116, "learning_rate": 1.8661383723429062e-05, "loss": 1.108, "step": 1831 }, { "epoch": 0.19173207744636317, "grad_norm": 2.4031558334493757, "learning_rate": 1.8659689029201654e-05, "loss": 1.0354, "step": 1832 }, { "epoch": 0.19183673469387755, "grad_norm": 1.9518351072841722, "learning_rate": 1.8657993339958422e-05, "loss": 0.8381, "step": 1833 }, { "epoch": 0.19194139194139195, "grad_norm": 2.140948339516903, "learning_rate": 1.8656296655894205e-05, "loss": 0.9547, "step": 1834 }, { "epoch": 0.19204604918890633, "grad_norm": 2.248185211385205, "learning_rate": 1.865459897720395e-05, "loss": 0.8359, "step": 1835 }, { "epoch": 0.1921507064364207, "grad_norm": 1.9412091916767238, "learning_rate": 1.865290030408273e-05, "loss": 0.7927, "step": 1836 }, { "epoch": 0.19225536368393512, "grad_norm": 2.7448554100886717, "learning_rate": 1.8651200636725727e-05, "loss": 1.1132, "step": 1837 }, { "epoch": 0.1923600209314495, "grad_norm": 2.2079122058545533, "learning_rate": 1.864949997532823e-05, "loss": 0.9166, "step": 1838 }, { "epoch": 0.1924646781789639, "grad_norm": 2.512086470901001, "learning_rate": 1.864779832008565e-05, "loss": 1.0589, "step": 1839 }, { "epoch": 0.19256933542647828, "grad_norm": 2.232551449072093, "learning_rate": 1.8646095671193512e-05, "loss": 1.0556, "step": 1840 }, { "epoch": 0.19267399267399268, "grad_norm": 2.5120233437446577, "learning_rate": 1.8644392028847457e-05, "loss": 1.0832, "step": 1841 }, { "epoch": 0.19277864992150706, "grad_norm": 2.4799212636866454, "learning_rate": 1.8642687393243233e-05, "loss": 0.9932, "step": 1842 }, { "epoch": 0.19288330716902147, "grad_norm": 2.0756667968361073, "learning_rate": 1.8640981764576707e-05, "loss": 1.0258, "step": 1843 }, { "epoch": 0.19298796441653585, "grad_norm": 2.570142389257925, "learning_rate": 1.863927514304386e-05, "loss": 1.179, "step": 1844 }, { "epoch": 0.19309262166405022, "grad_norm": 2.3453819079115314, "learning_rate": 1.8637567528840784e-05, "loss": 0.9876, "step": 1845 }, { "epoch": 0.19319727891156463, "grad_norm": 2.1997745972557015, "learning_rate": 1.863585892216369e-05, "loss": 0.9367, "step": 1846 }, { "epoch": 0.193301936159079, "grad_norm": 2.2836486587979543, "learning_rate": 1.8634149323208896e-05, "loss": 1.0386, "step": 1847 }, { "epoch": 0.1934065934065934, "grad_norm": 2.21044201206823, "learning_rate": 1.8632438732172846e-05, "loss": 1.0342, "step": 1848 }, { "epoch": 0.1935112506541078, "grad_norm": 2.9097569831352352, "learning_rate": 1.8630727149252082e-05, "loss": 0.8409, "step": 1849 }, { "epoch": 0.1936159079016222, "grad_norm": 2.498032621838584, "learning_rate": 1.8629014574643275e-05, "loss": 1.032, "step": 1850 }, { "epoch": 0.19372056514913658, "grad_norm": 2.3432769230009196, "learning_rate": 1.8627301008543198e-05, "loss": 1.0969, "step": 1851 }, { "epoch": 0.19382522239665098, "grad_norm": 2.241916204274371, "learning_rate": 1.8625586451148747e-05, "loss": 0.8316, "step": 1852 }, { "epoch": 0.19392987964416536, "grad_norm": 2.5512406560898055, "learning_rate": 1.8623870902656928e-05, "loss": 1.0418, "step": 1853 }, { "epoch": 0.19403453689167974, "grad_norm": 2.3680570078020233, "learning_rate": 1.862215436326486e-05, "loss": 0.8752, "step": 1854 }, { "epoch": 0.19413919413919414, "grad_norm": 2.416117696667423, "learning_rate": 1.8620436833169773e-05, "loss": 0.972, "step": 1855 }, { "epoch": 0.19424385138670852, "grad_norm": 2.2006979425801783, "learning_rate": 1.861871831256902e-05, "loss": 1.0598, "step": 1856 }, { "epoch": 0.19434850863422293, "grad_norm": 2.1767343281841884, "learning_rate": 1.861699880166006e-05, "loss": 0.9034, "step": 1857 }, { "epoch": 0.1944531658817373, "grad_norm": 2.4534072009756973, "learning_rate": 1.8615278300640475e-05, "loss": 0.9428, "step": 1858 }, { "epoch": 0.1945578231292517, "grad_norm": 2.1602024149446355, "learning_rate": 1.8613556809707943e-05, "loss": 1.0906, "step": 1859 }, { "epoch": 0.1946624803767661, "grad_norm": 2.169341746326727, "learning_rate": 1.8611834329060272e-05, "loss": 0.9131, "step": 1860 }, { "epoch": 0.1947671376242805, "grad_norm": 2.9643059788895476, "learning_rate": 1.8610110858895383e-05, "loss": 0.9651, "step": 1861 }, { "epoch": 0.19487179487179487, "grad_norm": 2.213393165596908, "learning_rate": 1.8608386399411296e-05, "loss": 1.1012, "step": 1862 }, { "epoch": 0.19497645211930925, "grad_norm": 2.530920660576334, "learning_rate": 1.8606660950806165e-05, "loss": 1.0807, "step": 1863 }, { "epoch": 0.19508110936682366, "grad_norm": 2.5971872408801744, "learning_rate": 1.860493451327824e-05, "loss": 1.0592, "step": 1864 }, { "epoch": 0.19518576661433804, "grad_norm": 2.9737379735677725, "learning_rate": 1.8603207087025897e-05, "loss": 1.0376, "step": 1865 }, { "epoch": 0.19529042386185244, "grad_norm": 2.3943885247243006, "learning_rate": 1.8601478672247622e-05, "loss": 0.8863, "step": 1866 }, { "epoch": 0.19539508110936682, "grad_norm": 2.4125949201995787, "learning_rate": 1.8599749269142007e-05, "loss": 1.0162, "step": 1867 }, { "epoch": 0.19549973835688123, "grad_norm": 2.3700879805758883, "learning_rate": 1.859801887790777e-05, "loss": 0.9566, "step": 1868 }, { "epoch": 0.1956043956043956, "grad_norm": 2.17186126800691, "learning_rate": 1.859628749874373e-05, "loss": 1.0475, "step": 1869 }, { "epoch": 0.19570905285190998, "grad_norm": 2.0282801776801715, "learning_rate": 1.8594555131848834e-05, "loss": 0.9321, "step": 1870 }, { "epoch": 0.1958137100994244, "grad_norm": 2.4375974168550423, "learning_rate": 1.8592821777422126e-05, "loss": 0.9614, "step": 1871 }, { "epoch": 0.19591836734693877, "grad_norm": 2.2740457885998624, "learning_rate": 1.859108743566278e-05, "loss": 1.0135, "step": 1872 }, { "epoch": 0.19602302459445317, "grad_norm": 2.417294338783456, "learning_rate": 1.8589352106770072e-05, "loss": 0.9173, "step": 1873 }, { "epoch": 0.19612768184196755, "grad_norm": 2.2618143493988363, "learning_rate": 1.858761579094339e-05, "loss": 0.8704, "step": 1874 }, { "epoch": 0.19623233908948196, "grad_norm": 2.3808284708358363, "learning_rate": 1.858587848838225e-05, "loss": 1.0471, "step": 1875 }, { "epoch": 0.19633699633699633, "grad_norm": 2.210022086562613, "learning_rate": 1.8584140199286263e-05, "loss": 0.9631, "step": 1876 }, { "epoch": 0.19644165358451074, "grad_norm": 2.3840130902341916, "learning_rate": 1.8582400923855166e-05, "loss": 1.045, "step": 1877 }, { "epoch": 0.19654631083202512, "grad_norm": 2.606315391047431, "learning_rate": 1.8580660662288807e-05, "loss": 1.0785, "step": 1878 }, { "epoch": 0.1966509680795395, "grad_norm": 2.6364699756222194, "learning_rate": 1.857891941478714e-05, "loss": 0.9784, "step": 1879 }, { "epoch": 0.1967556253270539, "grad_norm": 2.072482657011719, "learning_rate": 1.8577177181550243e-05, "loss": 0.8879, "step": 1880 }, { "epoch": 0.19686028257456828, "grad_norm": 2.2034184725027237, "learning_rate": 1.8575433962778297e-05, "loss": 1.0744, "step": 1881 }, { "epoch": 0.19696493982208269, "grad_norm": 2.5086871489753526, "learning_rate": 1.8573689758671607e-05, "loss": 1.0613, "step": 1882 }, { "epoch": 0.19706959706959706, "grad_norm": 2.1761373108687416, "learning_rate": 1.8571944569430582e-05, "loss": 1.0279, "step": 1883 }, { "epoch": 0.19717425431711147, "grad_norm": 2.3377334894353488, "learning_rate": 1.857019839525575e-05, "loss": 1.0278, "step": 1884 }, { "epoch": 0.19727891156462585, "grad_norm": 2.4580887107257685, "learning_rate": 1.856845123634775e-05, "loss": 1.019, "step": 1885 }, { "epoch": 0.19738356881214025, "grad_norm": 2.5860343029123913, "learning_rate": 1.856670309290733e-05, "loss": 1.0096, "step": 1886 }, { "epoch": 0.19748822605965463, "grad_norm": 2.960738910419292, "learning_rate": 1.8564953965135358e-05, "loss": 0.9591, "step": 1887 }, { "epoch": 0.197592883307169, "grad_norm": 2.3795107266092264, "learning_rate": 1.8563203853232812e-05, "loss": 0.8907, "step": 1888 }, { "epoch": 0.19769754055468342, "grad_norm": 2.2435840544300336, "learning_rate": 1.8561452757400785e-05, "loss": 0.9413, "step": 1889 }, { "epoch": 0.1978021978021978, "grad_norm": 2.068348693567875, "learning_rate": 1.8559700677840482e-05, "loss": 1.1047, "step": 1890 }, { "epoch": 0.1979068550497122, "grad_norm": 2.114604247450884, "learning_rate": 1.855794761475322e-05, "loss": 1.0478, "step": 1891 }, { "epoch": 0.19801151229722658, "grad_norm": 2.3715850812930466, "learning_rate": 1.8556193568340423e-05, "loss": 1.0699, "step": 1892 }, { "epoch": 0.19811616954474098, "grad_norm": 2.037660362224576, "learning_rate": 1.855443853880364e-05, "loss": 0.9552, "step": 1893 }, { "epoch": 0.19822082679225536, "grad_norm": 2.472478303294875, "learning_rate": 1.8552682526344532e-05, "loss": 1.0812, "step": 1894 }, { "epoch": 0.19832548403976977, "grad_norm": 2.174274765095585, "learning_rate": 1.855092553116486e-05, "loss": 1.0514, "step": 1895 }, { "epoch": 0.19843014128728415, "grad_norm": 2.418758882311152, "learning_rate": 1.8549167553466515e-05, "loss": 1.1054, "step": 1896 }, { "epoch": 0.19853479853479852, "grad_norm": 2.6391811792480926, "learning_rate": 1.8547408593451483e-05, "loss": 0.8834, "step": 1897 }, { "epoch": 0.19863945578231293, "grad_norm": 2.362589627641238, "learning_rate": 1.8545648651321874e-05, "loss": 1.049, "step": 1898 }, { "epoch": 0.1987441130298273, "grad_norm": 3.6366633137355113, "learning_rate": 1.8543887727279915e-05, "loss": 0.8768, "step": 1899 }, { "epoch": 0.1988487702773417, "grad_norm": 2.5316763474698423, "learning_rate": 1.8542125821527933e-05, "loss": 0.95, "step": 1900 }, { "epoch": 0.1989534275248561, "grad_norm": 2.4002232910837322, "learning_rate": 1.8540362934268378e-05, "loss": 1.1223, "step": 1901 }, { "epoch": 0.1990580847723705, "grad_norm": 2.4136377650468477, "learning_rate": 1.853859906570381e-05, "loss": 1.0736, "step": 1902 }, { "epoch": 0.19916274201988488, "grad_norm": 2.5540511046453362, "learning_rate": 1.85368342160369e-05, "loss": 1.1074, "step": 1903 }, { "epoch": 0.19926739926739928, "grad_norm": 2.2183989268155706, "learning_rate": 1.8535068385470434e-05, "loss": 0.9569, "step": 1904 }, { "epoch": 0.19937205651491366, "grad_norm": 2.2251618210625463, "learning_rate": 1.8533301574207305e-05, "loss": 1.024, "step": 1905 }, { "epoch": 0.19947671376242804, "grad_norm": 2.4725959487205005, "learning_rate": 1.853153378245053e-05, "loss": 0.9864, "step": 1906 }, { "epoch": 0.19958137100994244, "grad_norm": 2.426105671626237, "learning_rate": 1.8529765010403226e-05, "loss": 1.1186, "step": 1907 }, { "epoch": 0.19968602825745682, "grad_norm": 2.2610224754205457, "learning_rate": 1.852799525826863e-05, "loss": 1.0378, "step": 1908 }, { "epoch": 0.19979068550497123, "grad_norm": 2.1525597269244683, "learning_rate": 1.852622452625009e-05, "loss": 0.9537, "step": 1909 }, { "epoch": 0.1998953427524856, "grad_norm": 2.251421695737976, "learning_rate": 1.8524452814551067e-05, "loss": 1.0182, "step": 1910 }, { "epoch": 0.2, "grad_norm": 2.4742262214471458, "learning_rate": 1.852268012337514e-05, "loss": 0.9118, "step": 1911 }, { "epoch": 0.2001046572475144, "grad_norm": 2.351956722228832, "learning_rate": 1.8520906452925983e-05, "loss": 0.9906, "step": 1912 }, { "epoch": 0.20020931449502877, "grad_norm": 2.5457326056054304, "learning_rate": 1.8519131803407405e-05, "loss": 0.9223, "step": 1913 }, { "epoch": 0.20031397174254317, "grad_norm": 2.3125295104481087, "learning_rate": 1.8517356175023312e-05, "loss": 1.0705, "step": 1914 }, { "epoch": 0.20041862899005755, "grad_norm": 2.4672166374713655, "learning_rate": 1.8515579567977728e-05, "loss": 1.003, "step": 1915 }, { "epoch": 0.20052328623757196, "grad_norm": 2.1521918308626193, "learning_rate": 1.8513801982474788e-05, "loss": 0.9784, "step": 1916 }, { "epoch": 0.20062794348508634, "grad_norm": 2.4414899299046295, "learning_rate": 1.8512023418718745e-05, "loss": 1.0942, "step": 1917 }, { "epoch": 0.20073260073260074, "grad_norm": 2.3650808987908105, "learning_rate": 1.851024387691395e-05, "loss": 1.047, "step": 1918 }, { "epoch": 0.20083725798011512, "grad_norm": 2.5314876863007703, "learning_rate": 1.8508463357264883e-05, "loss": 1.0066, "step": 1919 }, { "epoch": 0.20094191522762953, "grad_norm": 2.0756056456744156, "learning_rate": 1.8506681859976127e-05, "loss": 0.8131, "step": 1920 }, { "epoch": 0.2010465724751439, "grad_norm": 2.4985838715578894, "learning_rate": 1.850489938525238e-05, "loss": 0.9835, "step": 1921 }, { "epoch": 0.20115122972265828, "grad_norm": 2.272801695052019, "learning_rate": 1.8503115933298455e-05, "loss": 1.0649, "step": 1922 }, { "epoch": 0.2012558869701727, "grad_norm": 2.0961137864615265, "learning_rate": 1.850133150431927e-05, "loss": 1.0169, "step": 1923 }, { "epoch": 0.20136054421768707, "grad_norm": 2.2133469820080234, "learning_rate": 1.8499546098519863e-05, "loss": 0.7855, "step": 1924 }, { "epoch": 0.20146520146520147, "grad_norm": 2.272863601676403, "learning_rate": 1.8497759716105376e-05, "loss": 1.1192, "step": 1925 }, { "epoch": 0.20156985871271585, "grad_norm": 2.3222789724527915, "learning_rate": 1.8495972357281073e-05, "loss": 1.1352, "step": 1926 }, { "epoch": 0.20167451596023026, "grad_norm": 2.3537538388981933, "learning_rate": 1.8494184022252324e-05, "loss": 1.0501, "step": 1927 }, { "epoch": 0.20177917320774463, "grad_norm": 2.7018467868355485, "learning_rate": 1.849239471122461e-05, "loss": 1.1058, "step": 1928 }, { "epoch": 0.20188383045525904, "grad_norm": 2.004307635291416, "learning_rate": 1.8490604424403527e-05, "loss": 0.874, "step": 1929 }, { "epoch": 0.20198848770277342, "grad_norm": 2.096808325868463, "learning_rate": 1.8488813161994784e-05, "loss": 1.0896, "step": 1930 }, { "epoch": 0.2020931449502878, "grad_norm": 2.0352763262829705, "learning_rate": 1.84870209242042e-05, "loss": 0.8975, "step": 1931 }, { "epoch": 0.2021978021978022, "grad_norm": 2.6788994218915705, "learning_rate": 1.8485227711237707e-05, "loss": 1.0147, "step": 1932 }, { "epoch": 0.20230245944531658, "grad_norm": 2.462010742069801, "learning_rate": 1.848343352330135e-05, "loss": 1.1053, "step": 1933 }, { "epoch": 0.20240711669283099, "grad_norm": 2.50095298478946, "learning_rate": 1.848163836060128e-05, "loss": 1.0792, "step": 1934 }, { "epoch": 0.20251177394034536, "grad_norm": 2.49507265553553, "learning_rate": 1.847984222334377e-05, "loss": 0.9941, "step": 1935 }, { "epoch": 0.20261643118785977, "grad_norm": 2.669346708093879, "learning_rate": 1.84780451117352e-05, "loss": 1.1946, "step": 1936 }, { "epoch": 0.20272108843537415, "grad_norm": 3.0351707992452344, "learning_rate": 1.8476247025982058e-05, "loss": 1.0224, "step": 1937 }, { "epoch": 0.20282574568288855, "grad_norm": 2.1301304657947058, "learning_rate": 1.847444796629095e-05, "loss": 0.9222, "step": 1938 }, { "epoch": 0.20293040293040293, "grad_norm": 2.1876447064810467, "learning_rate": 1.847264793286859e-05, "loss": 0.9753, "step": 1939 }, { "epoch": 0.2030350601779173, "grad_norm": 2.2563332049617006, "learning_rate": 1.8470846925921807e-05, "loss": 1.0181, "step": 1940 }, { "epoch": 0.20313971742543172, "grad_norm": 2.3267222100748826, "learning_rate": 1.8469044945657543e-05, "loss": 1.015, "step": 1941 }, { "epoch": 0.2032443746729461, "grad_norm": 2.441159690044786, "learning_rate": 1.8467241992282842e-05, "loss": 1.1735, "step": 1942 }, { "epoch": 0.2033490319204605, "grad_norm": 2.1163326570343806, "learning_rate": 1.8465438066004875e-05, "loss": 1.0618, "step": 1943 }, { "epoch": 0.20345368916797488, "grad_norm": 2.117403160909713, "learning_rate": 1.846363316703091e-05, "loss": 1.0112, "step": 1944 }, { "epoch": 0.20355834641548928, "grad_norm": 2.597051152568622, "learning_rate": 1.846182729556834e-05, "loss": 0.9412, "step": 1945 }, { "epoch": 0.20366300366300366, "grad_norm": 2.3342553808450024, "learning_rate": 1.8460020451824658e-05, "loss": 0.8846, "step": 1946 }, { "epoch": 0.20376766091051807, "grad_norm": 2.493805789843552, "learning_rate": 1.8458212636007474e-05, "loss": 1.0156, "step": 1947 }, { "epoch": 0.20387231815803245, "grad_norm": 2.597283250350886, "learning_rate": 1.8456403848324513e-05, "loss": 1.0745, "step": 1948 }, { "epoch": 0.20397697540554682, "grad_norm": 2.178100429523685, "learning_rate": 1.8454594088983608e-05, "loss": 1.0338, "step": 1949 }, { "epoch": 0.20408163265306123, "grad_norm": 2.627673207192234, "learning_rate": 1.8452783358192697e-05, "loss": 1.1234, "step": 1950 }, { "epoch": 0.2041862899005756, "grad_norm": 2.6660377019985315, "learning_rate": 1.845097165615985e-05, "loss": 1.0812, "step": 1951 }, { "epoch": 0.20429094714809, "grad_norm": 2.4399501701622492, "learning_rate": 1.8449158983093226e-05, "loss": 1.1253, "step": 1952 }, { "epoch": 0.2043956043956044, "grad_norm": 2.078040777525875, "learning_rate": 1.8447345339201105e-05, "loss": 0.9986, "step": 1953 }, { "epoch": 0.2045002616431188, "grad_norm": 2.344622782021923, "learning_rate": 1.844553072469188e-05, "loss": 1.0179, "step": 1954 }, { "epoch": 0.20460491889063318, "grad_norm": 2.3007348240880807, "learning_rate": 1.8443715139774055e-05, "loss": 1.0646, "step": 1955 }, { "epoch": 0.20470957613814755, "grad_norm": 2.281518769125784, "learning_rate": 1.844189858465624e-05, "loss": 1.0593, "step": 1956 }, { "epoch": 0.20481423338566196, "grad_norm": 2.4164773491310765, "learning_rate": 1.844008105954717e-05, "loss": 1.078, "step": 1957 }, { "epoch": 0.20491889063317634, "grad_norm": 2.3330463609538232, "learning_rate": 1.843826256465567e-05, "loss": 1.0492, "step": 1958 }, { "epoch": 0.20502354788069074, "grad_norm": 2.2742931414383167, "learning_rate": 1.8436443100190702e-05, "loss": 0.9409, "step": 1959 }, { "epoch": 0.20512820512820512, "grad_norm": 4.341203039506652, "learning_rate": 1.8434622666361316e-05, "loss": 1.0529, "step": 1960 }, { "epoch": 0.20523286237571953, "grad_norm": 2.293281560740709, "learning_rate": 1.8432801263376686e-05, "loss": 1.1591, "step": 1961 }, { "epoch": 0.2053375196232339, "grad_norm": 2.0799628461136614, "learning_rate": 1.8430978891446097e-05, "loss": 1.009, "step": 1962 }, { "epoch": 0.2054421768707483, "grad_norm": 2.34597923911034, "learning_rate": 1.8429155550778942e-05, "loss": 1.0635, "step": 1963 }, { "epoch": 0.2055468341182627, "grad_norm": 2.525872889513285, "learning_rate": 1.8427331241584724e-05, "loss": 0.9838, "step": 1964 }, { "epoch": 0.20565149136577707, "grad_norm": 2.178144919085616, "learning_rate": 1.8425505964073065e-05, "loss": 1.0336, "step": 1965 }, { "epoch": 0.20575614861329147, "grad_norm": 2.8651481979144036, "learning_rate": 1.8423679718453692e-05, "loss": 1.1158, "step": 1966 }, { "epoch": 0.20586080586080585, "grad_norm": 2.555904916520759, "learning_rate": 1.842185250493644e-05, "loss": 1.0651, "step": 1967 }, { "epoch": 0.20596546310832026, "grad_norm": 2.140596234465768, "learning_rate": 1.842002432373126e-05, "loss": 0.8645, "step": 1968 }, { "epoch": 0.20607012035583464, "grad_norm": 2.089196272187266, "learning_rate": 1.8418195175048217e-05, "loss": 1.0134, "step": 1969 }, { "epoch": 0.20617477760334904, "grad_norm": 2.6661677515756597, "learning_rate": 1.841636505909749e-05, "loss": 1.0963, "step": 1970 }, { "epoch": 0.20627943485086342, "grad_norm": 2.023006671314738, "learning_rate": 1.841453397608935e-05, "loss": 0.9889, "step": 1971 }, { "epoch": 0.20638409209837782, "grad_norm": 2.4607506296207267, "learning_rate": 1.8412701926234197e-05, "loss": 0.9246, "step": 1972 }, { "epoch": 0.2064887493458922, "grad_norm": 2.0748117406848228, "learning_rate": 1.841086890974254e-05, "loss": 1.0495, "step": 1973 }, { "epoch": 0.20659340659340658, "grad_norm": 2.293516807467432, "learning_rate": 1.8409034926824995e-05, "loss": 1.0593, "step": 1974 }, { "epoch": 0.206698063840921, "grad_norm": 2.2095392100107003, "learning_rate": 1.8407199977692292e-05, "loss": 0.9036, "step": 1975 }, { "epoch": 0.20680272108843537, "grad_norm": 2.0148849689379587, "learning_rate": 1.8405364062555263e-05, "loss": 0.9078, "step": 1976 }, { "epoch": 0.20690737833594977, "grad_norm": 2.3688402106868907, "learning_rate": 1.840352718162487e-05, "loss": 1.0614, "step": 1977 }, { "epoch": 0.20701203558346415, "grad_norm": 2.316317970644639, "learning_rate": 1.8401689335112164e-05, "loss": 0.9362, "step": 1978 }, { "epoch": 0.20711669283097855, "grad_norm": 2.16166387910284, "learning_rate": 1.8399850523228325e-05, "loss": 0.9942, "step": 1979 }, { "epoch": 0.20722135007849293, "grad_norm": 2.6063947890703902, "learning_rate": 1.8398010746184628e-05, "loss": 1.1124, "step": 1980 }, { "epoch": 0.20732600732600734, "grad_norm": 2.47106442178898, "learning_rate": 1.8396170004192474e-05, "loss": 1.1101, "step": 1981 }, { "epoch": 0.20743066457352172, "grad_norm": 2.2549560321044875, "learning_rate": 1.8394328297463366e-05, "loss": 1.0547, "step": 1982 }, { "epoch": 0.2075353218210361, "grad_norm": 2.0122298157744796, "learning_rate": 1.839248562620892e-05, "loss": 0.8095, "step": 1983 }, { "epoch": 0.2076399790685505, "grad_norm": 2.1855154297200494, "learning_rate": 1.839064199064086e-05, "loss": 0.9771, "step": 1984 }, { "epoch": 0.20774463631606488, "grad_norm": 1.9322206736673364, "learning_rate": 1.8388797390971026e-05, "loss": 0.8952, "step": 1985 }, { "epoch": 0.20784929356357928, "grad_norm": 2.414532007703879, "learning_rate": 1.838695182741137e-05, "loss": 0.9121, "step": 1986 }, { "epoch": 0.20795395081109366, "grad_norm": 2.186308171958817, "learning_rate": 1.8385105300173943e-05, "loss": 1.0025, "step": 1987 }, { "epoch": 0.20805860805860807, "grad_norm": 2.2280850457274455, "learning_rate": 1.838325780947092e-05, "loss": 1.0759, "step": 1988 }, { "epoch": 0.20816326530612245, "grad_norm": 2.2909334704230195, "learning_rate": 1.838140935551458e-05, "loss": 0.9594, "step": 1989 }, { "epoch": 0.20826792255363685, "grad_norm": 2.104015005202415, "learning_rate": 1.8379559938517314e-05, "loss": 0.8966, "step": 1990 }, { "epoch": 0.20837257980115123, "grad_norm": 3.017180350499111, "learning_rate": 1.8377709558691622e-05, "loss": 1.0479, "step": 1991 }, { "epoch": 0.2084772370486656, "grad_norm": 2.1719617305672356, "learning_rate": 1.837585821625012e-05, "loss": 1.131, "step": 1992 }, { "epoch": 0.20858189429618001, "grad_norm": 2.8996497541322994, "learning_rate": 1.8374005911405528e-05, "loss": 0.9922, "step": 1993 }, { "epoch": 0.2086865515436944, "grad_norm": 2.2079598409798527, "learning_rate": 1.8372152644370684e-05, "loss": 1.007, "step": 1994 }, { "epoch": 0.2087912087912088, "grad_norm": 2.1831162058992684, "learning_rate": 1.8370298415358527e-05, "loss": 1.0665, "step": 1995 }, { "epoch": 0.20889586603872318, "grad_norm": 2.164569022998782, "learning_rate": 1.8368443224582115e-05, "loss": 1.1039, "step": 1996 }, { "epoch": 0.20900052328623758, "grad_norm": 3.8082638015537533, "learning_rate": 1.836658707225461e-05, "loss": 1.1108, "step": 1997 }, { "epoch": 0.20910518053375196, "grad_norm": 2.252635657133077, "learning_rate": 1.836472995858929e-05, "loss": 1.0929, "step": 1998 }, { "epoch": 0.20920983778126634, "grad_norm": 2.159315517759604, "learning_rate": 1.836287188379954e-05, "loss": 1.0949, "step": 1999 }, { "epoch": 0.20931449502878074, "grad_norm": 2.3732269117605327, "learning_rate": 1.836101284809886e-05, "loss": 1.2197, "step": 2000 }, { "epoch": 0.20941915227629512, "grad_norm": 2.062458615626669, "learning_rate": 1.835915285170085e-05, "loss": 0.8862, "step": 2001 }, { "epoch": 0.20952380952380953, "grad_norm": 2.3583708531763636, "learning_rate": 1.8357291894819236e-05, "loss": 1.0029, "step": 2002 }, { "epoch": 0.2096284667713239, "grad_norm": 2.284742186222721, "learning_rate": 1.8355429977667837e-05, "loss": 0.9637, "step": 2003 }, { "epoch": 0.2097331240188383, "grad_norm": 2.2878915237206865, "learning_rate": 1.83535671004606e-05, "loss": 1.0314, "step": 2004 }, { "epoch": 0.2098377812663527, "grad_norm": 2.380158906701526, "learning_rate": 1.8351703263411567e-05, "loss": 1.0208, "step": 2005 }, { "epoch": 0.2099424385138671, "grad_norm": 2.3836744458989756, "learning_rate": 1.83498384667349e-05, "loss": 1.0668, "step": 2006 }, { "epoch": 0.21004709576138147, "grad_norm": 2.1286045732612147, "learning_rate": 1.8347972710644862e-05, "loss": 0.9946, "step": 2007 }, { "epoch": 0.21015175300889585, "grad_norm": 2.400087509008151, "learning_rate": 1.8346105995355837e-05, "loss": 0.9493, "step": 2008 }, { "epoch": 0.21025641025641026, "grad_norm": 2.128726329762142, "learning_rate": 1.8344238321082316e-05, "loss": 0.9336, "step": 2009 }, { "epoch": 0.21036106750392464, "grad_norm": 2.1830248786492956, "learning_rate": 1.8342369688038894e-05, "loss": 1.0607, "step": 2010 }, { "epoch": 0.21046572475143904, "grad_norm": 2.2689021885521576, "learning_rate": 1.8340500096440286e-05, "loss": 1.069, "step": 2011 }, { "epoch": 0.21057038199895342, "grad_norm": 2.5029531826193057, "learning_rate": 1.833862954650131e-05, "loss": 1.1992, "step": 2012 }, { "epoch": 0.21067503924646783, "grad_norm": 2.69872427128542, "learning_rate": 1.833675803843689e-05, "loss": 1.048, "step": 2013 }, { "epoch": 0.2107796964939822, "grad_norm": 2.6551070968845094, "learning_rate": 1.8334885572462076e-05, "loss": 1.0161, "step": 2014 }, { "epoch": 0.2108843537414966, "grad_norm": 2.5294187266588293, "learning_rate": 1.8333012148792008e-05, "loss": 1.0735, "step": 2015 }, { "epoch": 0.210989010989011, "grad_norm": 2.1783913112001754, "learning_rate": 1.8331137767641958e-05, "loss": 1.0417, "step": 2016 }, { "epoch": 0.21109366823652537, "grad_norm": 2.369045148316055, "learning_rate": 1.8329262429227285e-05, "loss": 1.1235, "step": 2017 }, { "epoch": 0.21119832548403977, "grad_norm": 2.117286187563206, "learning_rate": 1.8327386133763475e-05, "loss": 0.8765, "step": 2018 }, { "epoch": 0.21130298273155415, "grad_norm": 2.481083285401919, "learning_rate": 1.8325508881466115e-05, "loss": 1.0116, "step": 2019 }, { "epoch": 0.21140763997906856, "grad_norm": 2.2535978341456753, "learning_rate": 1.832363067255091e-05, "loss": 1.0478, "step": 2020 }, { "epoch": 0.21151229722658293, "grad_norm": 2.1891174065268824, "learning_rate": 1.8321751507233663e-05, "loss": 0.9707, "step": 2021 }, { "epoch": 0.21161695447409734, "grad_norm": 2.29735895919819, "learning_rate": 1.8319871385730298e-05, "loss": 1.0669, "step": 2022 }, { "epoch": 0.21172161172161172, "grad_norm": 2.147861663463576, "learning_rate": 1.831799030825685e-05, "loss": 1.0486, "step": 2023 }, { "epoch": 0.21182626896912612, "grad_norm": 2.0405385220270973, "learning_rate": 1.831610827502945e-05, "loss": 0.9038, "step": 2024 }, { "epoch": 0.2119309262166405, "grad_norm": 2.5478447806636657, "learning_rate": 1.8314225286264353e-05, "loss": 1.0453, "step": 2025 }, { "epoch": 0.21203558346415488, "grad_norm": 2.3690660543652475, "learning_rate": 1.8312341342177912e-05, "loss": 1.0493, "step": 2026 }, { "epoch": 0.2121402407116693, "grad_norm": 2.2032721351975995, "learning_rate": 1.83104564429866e-05, "loss": 0.9498, "step": 2027 }, { "epoch": 0.21224489795918366, "grad_norm": 2.609109823726276, "learning_rate": 1.8308570588906996e-05, "loss": 1.1299, "step": 2028 }, { "epoch": 0.21234955520669807, "grad_norm": 1.8610292340166064, "learning_rate": 1.8306683780155792e-05, "loss": 0.9029, "step": 2029 }, { "epoch": 0.21245421245421245, "grad_norm": 2.4304964979220025, "learning_rate": 1.830479601694978e-05, "loss": 1.087, "step": 2030 }, { "epoch": 0.21255886970172685, "grad_norm": 2.381417687058148, "learning_rate": 1.830290729950587e-05, "loss": 0.9628, "step": 2031 }, { "epoch": 0.21266352694924123, "grad_norm": 2.017480163965436, "learning_rate": 1.8301017628041076e-05, "loss": 0.799, "step": 2032 }, { "epoch": 0.21276818419675564, "grad_norm": 2.935623238435619, "learning_rate": 1.829912700277253e-05, "loss": 0.9128, "step": 2033 }, { "epoch": 0.21287284144427002, "grad_norm": 2.2265140475206384, "learning_rate": 1.8297235423917473e-05, "loss": 0.9985, "step": 2034 }, { "epoch": 0.2129774986917844, "grad_norm": 2.188420784579257, "learning_rate": 1.829534289169324e-05, "loss": 1.0615, "step": 2035 }, { "epoch": 0.2130821559392988, "grad_norm": 1.9589933092767846, "learning_rate": 1.8293449406317294e-05, "loss": 0.9486, "step": 2036 }, { "epoch": 0.21318681318681318, "grad_norm": 2.7569189446285796, "learning_rate": 1.82915549680072e-05, "loss": 1.0995, "step": 2037 }, { "epoch": 0.21329147043432758, "grad_norm": 1.9836569561149964, "learning_rate": 1.828965957698063e-05, "loss": 1.0582, "step": 2038 }, { "epoch": 0.21339612768184196, "grad_norm": 2.6141067722134266, "learning_rate": 1.8287763233455365e-05, "loss": 0.9942, "step": 2039 }, { "epoch": 0.21350078492935637, "grad_norm": 2.6804666482513726, "learning_rate": 1.8285865937649313e-05, "loss": 1.0738, "step": 2040 }, { "epoch": 0.21360544217687075, "grad_norm": 2.129743160129942, "learning_rate": 1.8283967689780464e-05, "loss": 0.81, "step": 2041 }, { "epoch": 0.21371009942438512, "grad_norm": 2.363273343477441, "learning_rate": 1.8282068490066932e-05, "loss": 0.8765, "step": 2042 }, { "epoch": 0.21381475667189953, "grad_norm": 2.476072286035289, "learning_rate": 1.828016833872694e-05, "loss": 1.0747, "step": 2043 }, { "epoch": 0.2139194139194139, "grad_norm": 2.2832920524718343, "learning_rate": 1.8278267235978823e-05, "loss": 1.0481, "step": 2044 }, { "epoch": 0.21402407116692831, "grad_norm": 2.0256536513257473, "learning_rate": 1.8276365182041015e-05, "loss": 0.9437, "step": 2045 }, { "epoch": 0.2141287284144427, "grad_norm": 2.0349450996429237, "learning_rate": 1.8274462177132074e-05, "loss": 0.9745, "step": 2046 }, { "epoch": 0.2142333856619571, "grad_norm": 2.3558277924424154, "learning_rate": 1.8272558221470652e-05, "loss": 0.9551, "step": 2047 }, { "epoch": 0.21433804290947148, "grad_norm": 2.2290513676067962, "learning_rate": 1.8270653315275525e-05, "loss": 1.0465, "step": 2048 }, { "epoch": 0.21444270015698588, "grad_norm": 2.258045963609993, "learning_rate": 1.826874745876556e-05, "loss": 1.0306, "step": 2049 }, { "epoch": 0.21454735740450026, "grad_norm": 3.527078541148376, "learning_rate": 1.8266840652159752e-05, "loss": 0.8868, "step": 2050 }, { "epoch": 0.21465201465201464, "grad_norm": 2.2609749036853226, "learning_rate": 1.8264932895677195e-05, "loss": 0.9106, "step": 2051 }, { "epoch": 0.21475667189952904, "grad_norm": 2.5062029257028215, "learning_rate": 1.826302418953709e-05, "loss": 1.1274, "step": 2052 }, { "epoch": 0.21486132914704342, "grad_norm": 2.353992741301644, "learning_rate": 1.8261114533958757e-05, "loss": 1.0853, "step": 2053 }, { "epoch": 0.21496598639455783, "grad_norm": 2.2428620293641144, "learning_rate": 1.825920392916162e-05, "loss": 0.9662, "step": 2054 }, { "epoch": 0.2150706436420722, "grad_norm": 2.305344851187627, "learning_rate": 1.8257292375365202e-05, "loss": 1.1007, "step": 2055 }, { "epoch": 0.2151753008895866, "grad_norm": 2.2656413224426033, "learning_rate": 1.825537987278916e-05, "loss": 0.9697, "step": 2056 }, { "epoch": 0.215279958137101, "grad_norm": 2.148064446805488, "learning_rate": 1.8253466421653226e-05, "loss": 0.9534, "step": 2057 }, { "epoch": 0.2153846153846154, "grad_norm": 2.1646722395898714, "learning_rate": 1.8251552022177273e-05, "loss": 0.9849, "step": 2058 }, { "epoch": 0.21548927263212977, "grad_norm": 2.2230725886492007, "learning_rate": 1.8249636674581265e-05, "loss": 1.0095, "step": 2059 }, { "epoch": 0.21559392987964415, "grad_norm": 2.284428221986189, "learning_rate": 1.824772037908528e-05, "loss": 0.987, "step": 2060 }, { "epoch": 0.21569858712715856, "grad_norm": 2.5819764848015256, "learning_rate": 1.8245803135909498e-05, "loss": 1.0183, "step": 2061 }, { "epoch": 0.21580324437467294, "grad_norm": 2.3210577007465423, "learning_rate": 1.8243884945274227e-05, "loss": 0.9843, "step": 2062 }, { "epoch": 0.21590790162218734, "grad_norm": 2.5362607817744625, "learning_rate": 1.824196580739986e-05, "loss": 1.1811, "step": 2063 }, { "epoch": 0.21601255886970172, "grad_norm": 2.3962630982553397, "learning_rate": 1.8240045722506915e-05, "loss": 1.0299, "step": 2064 }, { "epoch": 0.21611721611721613, "grad_norm": 1.988482806508578, "learning_rate": 1.823812469081601e-05, "loss": 1.0407, "step": 2065 }, { "epoch": 0.2162218733647305, "grad_norm": 2.727138846671659, "learning_rate": 1.823620271254788e-05, "loss": 0.8944, "step": 2066 }, { "epoch": 0.2163265306122449, "grad_norm": 2.2182850283211475, "learning_rate": 1.8234279787923358e-05, "loss": 1.0132, "step": 2067 }, { "epoch": 0.2164311878597593, "grad_norm": 2.4303463162856365, "learning_rate": 1.82323559171634e-05, "loss": 1.0625, "step": 2068 }, { "epoch": 0.21653584510727367, "grad_norm": 2.419684174616418, "learning_rate": 1.8230431100489057e-05, "loss": 0.89, "step": 2069 }, { "epoch": 0.21664050235478807, "grad_norm": 2.4048668926385885, "learning_rate": 1.8228505338121496e-05, "loss": 1.0583, "step": 2070 }, { "epoch": 0.21674515960230245, "grad_norm": 2.342058187371194, "learning_rate": 1.822657863028199e-05, "loss": 0.9905, "step": 2071 }, { "epoch": 0.21684981684981686, "grad_norm": 2.4230482675249716, "learning_rate": 1.822465097719192e-05, "loss": 1.0081, "step": 2072 }, { "epoch": 0.21695447409733123, "grad_norm": 2.72537241732261, "learning_rate": 1.8222722379072785e-05, "loss": 1.0245, "step": 2073 }, { "epoch": 0.21705913134484564, "grad_norm": 2.4296676623937556, "learning_rate": 1.8220792836146177e-05, "loss": 1.0386, "step": 2074 }, { "epoch": 0.21716378859236002, "grad_norm": 2.5375888498111343, "learning_rate": 1.8218862348633806e-05, "loss": 1.0651, "step": 2075 }, { "epoch": 0.21726844583987442, "grad_norm": 2.3320537485576804, "learning_rate": 1.821693091675749e-05, "loss": 1.0419, "step": 2076 }, { "epoch": 0.2173731030873888, "grad_norm": 2.2920845320825793, "learning_rate": 1.8214998540739154e-05, "loss": 1.0797, "step": 2077 }, { "epoch": 0.21747776033490318, "grad_norm": 2.156245726685389, "learning_rate": 1.8213065220800833e-05, "loss": 1.0072, "step": 2078 }, { "epoch": 0.2175824175824176, "grad_norm": 2.4898649858067117, "learning_rate": 1.821113095716467e-05, "loss": 1.1149, "step": 2079 }, { "epoch": 0.21768707482993196, "grad_norm": 2.0771189974183586, "learning_rate": 1.820919575005291e-05, "loss": 1.0435, "step": 2080 }, { "epoch": 0.21779173207744637, "grad_norm": 2.277727814797236, "learning_rate": 1.820725959968792e-05, "loss": 1.0899, "step": 2081 }, { "epoch": 0.21789638932496075, "grad_norm": 2.373876155395553, "learning_rate": 1.8205322506292162e-05, "loss": 1.0877, "step": 2082 }, { "epoch": 0.21800104657247515, "grad_norm": 2.1588302308233525, "learning_rate": 1.8203384470088217e-05, "loss": 0.8801, "step": 2083 }, { "epoch": 0.21810570381998953, "grad_norm": 2.28595739064826, "learning_rate": 1.8201445491298765e-05, "loss": 1.051, "step": 2084 }, { "epoch": 0.21821036106750394, "grad_norm": 2.1087266644570923, "learning_rate": 1.81995055701466e-05, "loss": 1.1129, "step": 2085 }, { "epoch": 0.21831501831501832, "grad_norm": 2.425503184353989, "learning_rate": 1.8197564706854624e-05, "loss": 0.855, "step": 2086 }, { "epoch": 0.2184196755625327, "grad_norm": 2.2595365972197294, "learning_rate": 1.8195622901645843e-05, "loss": 0.8961, "step": 2087 }, { "epoch": 0.2185243328100471, "grad_norm": 2.514090138405137, "learning_rate": 1.8193680154743375e-05, "loss": 1.0657, "step": 2088 }, { "epoch": 0.21862899005756148, "grad_norm": 2.4118987132654266, "learning_rate": 1.819173646637045e-05, "loss": 1.0297, "step": 2089 }, { "epoch": 0.21873364730507588, "grad_norm": 2.1575019695204127, "learning_rate": 1.8189791836750396e-05, "loss": 0.9463, "step": 2090 }, { "epoch": 0.21883830455259026, "grad_norm": 2.408277886570118, "learning_rate": 1.818784626610666e-05, "loss": 1.1036, "step": 2091 }, { "epoch": 0.21894296180010467, "grad_norm": 1.795388244350893, "learning_rate": 1.8185899754662787e-05, "loss": 0.8641, "step": 2092 }, { "epoch": 0.21904761904761905, "grad_norm": 2.581772835215814, "learning_rate": 1.818395230264244e-05, "loss": 1.061, "step": 2093 }, { "epoch": 0.21915227629513342, "grad_norm": 2.259983033421856, "learning_rate": 1.8182003910269382e-05, "loss": 1.0749, "step": 2094 }, { "epoch": 0.21925693354264783, "grad_norm": 2.3449988057297206, "learning_rate": 1.818005457776749e-05, "loss": 1.1159, "step": 2095 }, { "epoch": 0.2193615907901622, "grad_norm": 2.2960373512554746, "learning_rate": 1.8178104305360743e-05, "loss": 1.155, "step": 2096 }, { "epoch": 0.21946624803767661, "grad_norm": 2.2977900952956687, "learning_rate": 1.817615309327323e-05, "loss": 1.0825, "step": 2097 }, { "epoch": 0.219570905285191, "grad_norm": 1.9351630635888202, "learning_rate": 1.8174200941729153e-05, "loss": 0.818, "step": 2098 }, { "epoch": 0.2196755625327054, "grad_norm": 1.9567019706927198, "learning_rate": 1.8172247850952816e-05, "loss": 0.9481, "step": 2099 }, { "epoch": 0.21978021978021978, "grad_norm": 2.431540987817807, "learning_rate": 1.817029382116864e-05, "loss": 1.1475, "step": 2100 }, { "epoch": 0.21988487702773418, "grad_norm": 2.6689599398800925, "learning_rate": 1.8168338852601136e-05, "loss": 0.9702, "step": 2101 }, { "epoch": 0.21998953427524856, "grad_norm": 2.1586431953787004, "learning_rate": 1.8166382945474937e-05, "loss": 0.9888, "step": 2102 }, { "epoch": 0.22009419152276294, "grad_norm": 2.0960254129246865, "learning_rate": 1.8164426100014787e-05, "loss": 0.8704, "step": 2103 }, { "epoch": 0.22019884877027734, "grad_norm": 2.717246868399569, "learning_rate": 1.8162468316445526e-05, "loss": 0.7871, "step": 2104 }, { "epoch": 0.22030350601779172, "grad_norm": 2.2870173127883255, "learning_rate": 1.8160509594992105e-05, "loss": 1.0826, "step": 2105 }, { "epoch": 0.22040816326530613, "grad_norm": 2.383105180002082, "learning_rate": 1.815854993587959e-05, "loss": 1.017, "step": 2106 }, { "epoch": 0.2205128205128205, "grad_norm": 2.1939394100255596, "learning_rate": 1.8156589339333154e-05, "loss": 0.992, "step": 2107 }, { "epoch": 0.2206174777603349, "grad_norm": 1.995289142653823, "learning_rate": 1.815462780557806e-05, "loss": 0.833, "step": 2108 }, { "epoch": 0.2207221350078493, "grad_norm": 2.528094256454689, "learning_rate": 1.8152665334839704e-05, "loss": 1.1359, "step": 2109 }, { "epoch": 0.2208267922553637, "grad_norm": 2.8889436520004237, "learning_rate": 1.815070192734357e-05, "loss": 0.9671, "step": 2110 }, { "epoch": 0.22093144950287807, "grad_norm": 2.566640937673796, "learning_rate": 1.8148737583315266e-05, "loss": 1.0503, "step": 2111 }, { "epoch": 0.22103610675039245, "grad_norm": 2.1263834504381656, "learning_rate": 1.8146772302980494e-05, "loss": 0.7854, "step": 2112 }, { "epoch": 0.22114076399790686, "grad_norm": 2.121646140657449, "learning_rate": 1.814480608656507e-05, "loss": 0.8763, "step": 2113 }, { "epoch": 0.22124542124542124, "grad_norm": 2.4127305165775956, "learning_rate": 1.814283893429491e-05, "loss": 0.906, "step": 2114 }, { "epoch": 0.22135007849293564, "grad_norm": 2.4552352187699493, "learning_rate": 1.8140870846396055e-05, "loss": 1.1067, "step": 2115 }, { "epoch": 0.22145473574045002, "grad_norm": 2.116477172570462, "learning_rate": 1.8138901823094634e-05, "loss": 0.9615, "step": 2116 }, { "epoch": 0.22155939298796443, "grad_norm": 2.1641002061702035, "learning_rate": 1.8136931864616894e-05, "loss": 1.0059, "step": 2117 }, { "epoch": 0.2216640502354788, "grad_norm": 2.418244872883079, "learning_rate": 1.8134960971189186e-05, "loss": 1.1253, "step": 2118 }, { "epoch": 0.2217687074829932, "grad_norm": 2.550192195359462, "learning_rate": 1.8132989143037973e-05, "loss": 0.9129, "step": 2119 }, { "epoch": 0.2218733647305076, "grad_norm": 2.1808748350442664, "learning_rate": 1.8131016380389822e-05, "loss": 0.9744, "step": 2120 }, { "epoch": 0.22197802197802197, "grad_norm": 1.8551489114253599, "learning_rate": 1.8129042683471404e-05, "loss": 0.8682, "step": 2121 }, { "epoch": 0.22208267922553637, "grad_norm": 2.08940663789057, "learning_rate": 1.81270680525095e-05, "loss": 0.9402, "step": 2122 }, { "epoch": 0.22218733647305075, "grad_norm": 2.2964529328748506, "learning_rate": 1.8125092487731005e-05, "loss": 0.9279, "step": 2123 }, { "epoch": 0.22229199372056516, "grad_norm": 2.215371699637496, "learning_rate": 1.812311598936291e-05, "loss": 1.0981, "step": 2124 }, { "epoch": 0.22239665096807953, "grad_norm": 2.652740527835587, "learning_rate": 1.8121138557632322e-05, "loss": 1.083, "step": 2125 }, { "epoch": 0.22250130821559394, "grad_norm": 2.5149433842670503, "learning_rate": 1.811916019276645e-05, "loss": 0.8659, "step": 2126 }, { "epoch": 0.22260596546310832, "grad_norm": 1.9949643722746215, "learning_rate": 1.811718089499261e-05, "loss": 1.0477, "step": 2127 }, { "epoch": 0.22271062271062272, "grad_norm": 2.6367101170753173, "learning_rate": 1.8115200664538234e-05, "loss": 0.9465, "step": 2128 }, { "epoch": 0.2228152799581371, "grad_norm": 2.1662145017836645, "learning_rate": 1.8113219501630848e-05, "loss": 0.9172, "step": 2129 }, { "epoch": 0.22291993720565148, "grad_norm": 2.139400049602698, "learning_rate": 1.8111237406498096e-05, "loss": 0.9777, "step": 2130 }, { "epoch": 0.2230245944531659, "grad_norm": 2.0637795213950327, "learning_rate": 1.810925437936772e-05, "loss": 0.8885, "step": 2131 }, { "epoch": 0.22312925170068026, "grad_norm": 2.4433179860559733, "learning_rate": 1.810727042046758e-05, "loss": 1.1147, "step": 2132 }, { "epoch": 0.22323390894819467, "grad_norm": 2.507296458731943, "learning_rate": 1.810528553002563e-05, "loss": 1.0424, "step": 2133 }, { "epoch": 0.22333856619570905, "grad_norm": 2.476296110697989, "learning_rate": 1.810329970826994e-05, "loss": 1.0969, "step": 2134 }, { "epoch": 0.22344322344322345, "grad_norm": 2.2719919935385966, "learning_rate": 1.810131295542869e-05, "loss": 1.086, "step": 2135 }, { "epoch": 0.22354788069073783, "grad_norm": 2.1254287766207867, "learning_rate": 1.8099325271730158e-05, "loss": 1.0048, "step": 2136 }, { "epoch": 0.2236525379382522, "grad_norm": 2.2899571405767913, "learning_rate": 1.8097336657402733e-05, "loss": 1.025, "step": 2137 }, { "epoch": 0.22375719518576662, "grad_norm": 2.493384145123412, "learning_rate": 1.809534711267491e-05, "loss": 0.9832, "step": 2138 }, { "epoch": 0.223861852433281, "grad_norm": 2.3792722351135867, "learning_rate": 1.8093356637775296e-05, "loss": 1.1609, "step": 2139 }, { "epoch": 0.2239665096807954, "grad_norm": 2.007926261369221, "learning_rate": 1.8091365232932595e-05, "loss": 0.9712, "step": 2140 }, { "epoch": 0.22407116692830978, "grad_norm": 2.483067842936785, "learning_rate": 1.8089372898375622e-05, "loss": 1.0444, "step": 2141 }, { "epoch": 0.22417582417582418, "grad_norm": 2.3009244591558815, "learning_rate": 1.808737963433331e-05, "loss": 1.13, "step": 2142 }, { "epoch": 0.22428048142333856, "grad_norm": 2.3317608975873028, "learning_rate": 1.808538544103468e-05, "loss": 1.0947, "step": 2143 }, { "epoch": 0.22438513867085297, "grad_norm": 2.058037185074867, "learning_rate": 1.8083390318708875e-05, "loss": 1.0688, "step": 2144 }, { "epoch": 0.22448979591836735, "grad_norm": 2.1821994697350813, "learning_rate": 1.8081394267585134e-05, "loss": 1.0108, "step": 2145 }, { "epoch": 0.22459445316588172, "grad_norm": 2.093805533273431, "learning_rate": 1.8079397287892808e-05, "loss": 1.0334, "step": 2146 }, { "epoch": 0.22469911041339613, "grad_norm": 2.230362524509398, "learning_rate": 1.807739937986136e-05, "loss": 0.983, "step": 2147 }, { "epoch": 0.2248037676609105, "grad_norm": 2.2669738777674517, "learning_rate": 1.8075400543720342e-05, "loss": 1.0644, "step": 2148 }, { "epoch": 0.22490842490842491, "grad_norm": 2.4539503469583237, "learning_rate": 1.8073400779699434e-05, "loss": 1.0708, "step": 2149 }, { "epoch": 0.2250130821559393, "grad_norm": 2.366911153866586, "learning_rate": 1.807140008802841e-05, "loss": 1.0431, "step": 2150 }, { "epoch": 0.2251177394034537, "grad_norm": 2.144584808902021, "learning_rate": 1.8069398468937153e-05, "loss": 0.9618, "step": 2151 }, { "epoch": 0.22522239665096808, "grad_norm": 2.540674460417446, "learning_rate": 1.806739592265565e-05, "loss": 1.138, "step": 2152 }, { "epoch": 0.22532705389848248, "grad_norm": 2.206689167590175, "learning_rate": 1.8065392449414004e-05, "loss": 1.0123, "step": 2153 }, { "epoch": 0.22543171114599686, "grad_norm": 2.2813596687579474, "learning_rate": 1.806338804944242e-05, "loss": 1.07, "step": 2154 }, { "epoch": 0.22553636839351124, "grad_norm": 2.4718690247538873, "learning_rate": 1.8061382722971196e-05, "loss": 0.9068, "step": 2155 }, { "epoch": 0.22564102564102564, "grad_norm": 2.3583234330786507, "learning_rate": 1.8059376470230757e-05, "loss": 1.055, "step": 2156 }, { "epoch": 0.22574568288854002, "grad_norm": 2.4215574962079205, "learning_rate": 1.8057369291451626e-05, "loss": 1.0428, "step": 2157 }, { "epoch": 0.22585034013605443, "grad_norm": 1.9930502014891032, "learning_rate": 1.805536118686443e-05, "loss": 0.9008, "step": 2158 }, { "epoch": 0.2259549973835688, "grad_norm": 2.3283037616599027, "learning_rate": 1.80533521566999e-05, "loss": 1.0235, "step": 2159 }, { "epoch": 0.2260596546310832, "grad_norm": 2.3801695391927753, "learning_rate": 1.805134220118888e-05, "loss": 1.022, "step": 2160 }, { "epoch": 0.2261643118785976, "grad_norm": 2.397543403584425, "learning_rate": 1.8049331320562324e-05, "loss": 0.915, "step": 2161 }, { "epoch": 0.226268969126112, "grad_norm": 2.626509392011337, "learning_rate": 1.804731951505128e-05, "loss": 1.0158, "step": 2162 }, { "epoch": 0.22637362637362637, "grad_norm": 2.6074284746843395, "learning_rate": 1.804530678488691e-05, "loss": 0.9578, "step": 2163 }, { "epoch": 0.22647828362114075, "grad_norm": 2.2563607196090665, "learning_rate": 1.8043293130300482e-05, "loss": 0.9736, "step": 2164 }, { "epoch": 0.22658294086865516, "grad_norm": 2.5275115770936285, "learning_rate": 1.8041278551523365e-05, "loss": 1.1927, "step": 2165 }, { "epoch": 0.22668759811616954, "grad_norm": 3.2122832502725074, "learning_rate": 1.8039263048787042e-05, "loss": 1.0338, "step": 2166 }, { "epoch": 0.22679225536368394, "grad_norm": 2.1422445536418078, "learning_rate": 1.80372466223231e-05, "loss": 0.8647, "step": 2167 }, { "epoch": 0.22689691261119832, "grad_norm": 2.30508825920427, "learning_rate": 1.8035229272363226e-05, "loss": 0.9192, "step": 2168 }, { "epoch": 0.22700156985871273, "grad_norm": 2.3600467400757346, "learning_rate": 1.803321099913922e-05, "loss": 1.0098, "step": 2169 }, { "epoch": 0.2271062271062271, "grad_norm": 2.239202924612382, "learning_rate": 1.8031191802882984e-05, "loss": 1.0374, "step": 2170 }, { "epoch": 0.2272108843537415, "grad_norm": 2.007640041272985, "learning_rate": 1.8029171683826528e-05, "loss": 0.9717, "step": 2171 }, { "epoch": 0.2273155416012559, "grad_norm": 2.0849186079241453, "learning_rate": 1.802715064220197e-05, "loss": 0.8889, "step": 2172 }, { "epoch": 0.22742019884877027, "grad_norm": 2.441637760590174, "learning_rate": 1.8025128678241532e-05, "loss": 1.0504, "step": 2173 }, { "epoch": 0.22752485609628467, "grad_norm": 1.9626865084302276, "learning_rate": 1.802310579217754e-05, "loss": 0.9714, "step": 2174 }, { "epoch": 0.22762951334379905, "grad_norm": 2.434914606469338, "learning_rate": 1.8021081984242426e-05, "loss": 1.0148, "step": 2175 }, { "epoch": 0.22773417059131346, "grad_norm": 2.464927468743602, "learning_rate": 1.801905725466873e-05, "loss": 0.9263, "step": 2176 }, { "epoch": 0.22783882783882783, "grad_norm": 2.1365896230444017, "learning_rate": 1.8017031603689105e-05, "loss": 1.0849, "step": 2177 }, { "epoch": 0.22794348508634224, "grad_norm": 2.0108040763164894, "learning_rate": 1.801500503153629e-05, "loss": 0.8429, "step": 2178 }, { "epoch": 0.22804814233385662, "grad_norm": 2.5168079546538507, "learning_rate": 1.8012977538443156e-05, "loss": 1.0145, "step": 2179 }, { "epoch": 0.228152799581371, "grad_norm": 2.2024177758411088, "learning_rate": 1.801094912464265e-05, "loss": 1.1185, "step": 2180 }, { "epoch": 0.2282574568288854, "grad_norm": 1.8831830056093484, "learning_rate": 1.8008919790367854e-05, "loss": 0.9436, "step": 2181 }, { "epoch": 0.22836211407639978, "grad_norm": 2.5755945919781063, "learning_rate": 1.800688953585194e-05, "loss": 1.0213, "step": 2182 }, { "epoch": 0.2284667713239142, "grad_norm": 2.3639675977701535, "learning_rate": 1.8004858361328185e-05, "loss": 1.009, "step": 2183 }, { "epoch": 0.22857142857142856, "grad_norm": 2.4516774443504716, "learning_rate": 1.8002826267029977e-05, "loss": 1.0469, "step": 2184 }, { "epoch": 0.22867608581894297, "grad_norm": 2.2277531735938414, "learning_rate": 1.800079325319081e-05, "loss": 1.1314, "step": 2185 }, { "epoch": 0.22878074306645735, "grad_norm": 2.1946636768725116, "learning_rate": 1.7998759320044276e-05, "loss": 1.1728, "step": 2186 }, { "epoch": 0.22888540031397175, "grad_norm": 1.8628296808775904, "learning_rate": 1.799672446782408e-05, "loss": 0.7964, "step": 2187 }, { "epoch": 0.22899005756148613, "grad_norm": 2.2414870196187984, "learning_rate": 1.7994688696764037e-05, "loss": 1.0465, "step": 2188 }, { "epoch": 0.2290947148090005, "grad_norm": 2.1280884247041056, "learning_rate": 1.799265200709805e-05, "loss": 0.934, "step": 2189 }, { "epoch": 0.22919937205651492, "grad_norm": 2.4695395417950308, "learning_rate": 1.7990614399060144e-05, "loss": 1.078, "step": 2190 }, { "epoch": 0.2293040293040293, "grad_norm": 2.240850930676284, "learning_rate": 1.798857587288445e-05, "loss": 1.0257, "step": 2191 }, { "epoch": 0.2294086865515437, "grad_norm": 2.4333038713844988, "learning_rate": 1.798653642880519e-05, "loss": 0.9771, "step": 2192 }, { "epoch": 0.22951334379905808, "grad_norm": 2.3919820342346827, "learning_rate": 1.7984496067056704e-05, "loss": 1.0281, "step": 2193 }, { "epoch": 0.22961800104657248, "grad_norm": 2.9237043226574633, "learning_rate": 1.7982454787873436e-05, "loss": 1.0971, "step": 2194 }, { "epoch": 0.22972265829408686, "grad_norm": 2.499578413555501, "learning_rate": 1.7980412591489932e-05, "loss": 1.0503, "step": 2195 }, { "epoch": 0.22982731554160127, "grad_norm": 2.1481213076928793, "learning_rate": 1.7978369478140838e-05, "loss": 1.0634, "step": 2196 }, { "epoch": 0.22993197278911565, "grad_norm": 2.0306762217088568, "learning_rate": 1.797632544806092e-05, "loss": 0.9728, "step": 2197 }, { "epoch": 0.23003663003663002, "grad_norm": 2.483176372385859, "learning_rate": 1.7974280501485037e-05, "loss": 0.9275, "step": 2198 }, { "epoch": 0.23014128728414443, "grad_norm": 2.319447402532949, "learning_rate": 1.797223463864816e-05, "loss": 1.146, "step": 2199 }, { "epoch": 0.2302459445316588, "grad_norm": 2.277766050423453, "learning_rate": 1.797018785978536e-05, "loss": 0.8616, "step": 2200 }, { "epoch": 0.23035060177917321, "grad_norm": 2.3369318419186786, "learning_rate": 1.796814016513182e-05, "loss": 1.0268, "step": 2201 }, { "epoch": 0.2304552590266876, "grad_norm": 2.597419543719222, "learning_rate": 1.7966091554922823e-05, "loss": 0.9834, "step": 2202 }, { "epoch": 0.230559916274202, "grad_norm": 2.011269693810685, "learning_rate": 1.7964042029393755e-05, "loss": 0.9039, "step": 2203 }, { "epoch": 0.23066457352171638, "grad_norm": 2.2717414735854837, "learning_rate": 1.7961991588780114e-05, "loss": 1.0691, "step": 2204 }, { "epoch": 0.23076923076923078, "grad_norm": 2.2621982011763326, "learning_rate": 1.79599402333175e-05, "loss": 1.0414, "step": 2205 }, { "epoch": 0.23087388801674516, "grad_norm": 2.1871162127683323, "learning_rate": 1.7957887963241613e-05, "loss": 1.0806, "step": 2206 }, { "epoch": 0.23097854526425954, "grad_norm": 2.2467526764365604, "learning_rate": 1.7955834778788266e-05, "loss": 1.0368, "step": 2207 }, { "epoch": 0.23108320251177394, "grad_norm": 2.49703081611621, "learning_rate": 1.795378068019338e-05, "loss": 1.0251, "step": 2208 }, { "epoch": 0.23118785975928832, "grad_norm": 2.2621831608178393, "learning_rate": 1.7951725667692967e-05, "loss": 0.9758, "step": 2209 }, { "epoch": 0.23129251700680273, "grad_norm": 2.8459265279372192, "learning_rate": 1.7949669741523156e-05, "loss": 1.0373, "step": 2210 }, { "epoch": 0.2313971742543171, "grad_norm": 2.6273563201754957, "learning_rate": 1.7947612901920174e-05, "loss": 0.8752, "step": 2211 }, { "epoch": 0.2315018315018315, "grad_norm": 2.5197273005659264, "learning_rate": 1.794555514912036e-05, "loss": 0.9882, "step": 2212 }, { "epoch": 0.2316064887493459, "grad_norm": 2.274914592268484, "learning_rate": 1.7943496483360152e-05, "loss": 0.9677, "step": 2213 }, { "epoch": 0.2317111459968603, "grad_norm": 2.428626518972456, "learning_rate": 1.7941436904876095e-05, "loss": 1.1441, "step": 2214 }, { "epoch": 0.23181580324437467, "grad_norm": 2.394836969811865, "learning_rate": 1.7939376413904836e-05, "loss": 0.9864, "step": 2215 }, { "epoch": 0.23192046049188905, "grad_norm": 2.6595728488415182, "learning_rate": 1.7937315010683135e-05, "loss": 1.0768, "step": 2216 }, { "epoch": 0.23202511773940346, "grad_norm": 2.1906514810889846, "learning_rate": 1.7935252695447846e-05, "loss": 0.9923, "step": 2217 }, { "epoch": 0.23212977498691784, "grad_norm": 2.3158045248201065, "learning_rate": 1.793318946843594e-05, "loss": 1.0749, "step": 2218 }, { "epoch": 0.23223443223443224, "grad_norm": 2.2698926380173563, "learning_rate": 1.793112532988448e-05, "loss": 1.0514, "step": 2219 }, { "epoch": 0.23233908948194662, "grad_norm": 2.3318940874120564, "learning_rate": 1.7929060280030642e-05, "loss": 1.0105, "step": 2220 }, { "epoch": 0.23244374672946103, "grad_norm": 2.3711702488900857, "learning_rate": 1.7926994319111704e-05, "loss": 1.0331, "step": 2221 }, { "epoch": 0.2325484039769754, "grad_norm": 2.150975144233381, "learning_rate": 1.7924927447365048e-05, "loss": 1.0613, "step": 2222 }, { "epoch": 0.23265306122448978, "grad_norm": 2.536648515717417, "learning_rate": 1.7922859665028164e-05, "loss": 1.0065, "step": 2223 }, { "epoch": 0.2327577184720042, "grad_norm": 1.9397689780705203, "learning_rate": 1.792079097233864e-05, "loss": 0.9451, "step": 2224 }, { "epoch": 0.23286237571951857, "grad_norm": 2.2704190613976114, "learning_rate": 1.791872136953418e-05, "loss": 0.9858, "step": 2225 }, { "epoch": 0.23296703296703297, "grad_norm": 2.8347645497669656, "learning_rate": 1.7916650856852577e-05, "loss": 1.1249, "step": 2226 }, { "epoch": 0.23307169021454735, "grad_norm": 2.408645013203526, "learning_rate": 1.7914579434531746e-05, "loss": 0.9415, "step": 2227 }, { "epoch": 0.23317634746206176, "grad_norm": 2.5151198920475557, "learning_rate": 1.7912507102809692e-05, "loss": 1.0078, "step": 2228 }, { "epoch": 0.23328100470957613, "grad_norm": 2.049430149571946, "learning_rate": 1.7910433861924533e-05, "loss": 0.9601, "step": 2229 }, { "epoch": 0.23338566195709054, "grad_norm": 2.406663254302629, "learning_rate": 1.7908359712114484e-05, "loss": 0.9539, "step": 2230 }, { "epoch": 0.23349031920460492, "grad_norm": 2.32058310508671, "learning_rate": 1.7906284653617874e-05, "loss": 1.0463, "step": 2231 }, { "epoch": 0.2335949764521193, "grad_norm": 2.1243497033328076, "learning_rate": 1.790420868667313e-05, "loss": 0.8934, "step": 2232 }, { "epoch": 0.2336996336996337, "grad_norm": 2.172322288650889, "learning_rate": 1.7902131811518784e-05, "loss": 0.8929, "step": 2233 }, { "epoch": 0.23380429094714808, "grad_norm": 2.626463122324986, "learning_rate": 1.7900054028393475e-05, "loss": 0.9942, "step": 2234 }, { "epoch": 0.23390894819466249, "grad_norm": 2.277059459157924, "learning_rate": 1.789797533753594e-05, "loss": 1.0414, "step": 2235 }, { "epoch": 0.23401360544217686, "grad_norm": 2.676163205276392, "learning_rate": 1.789589573918503e-05, "loss": 0.8041, "step": 2236 }, { "epoch": 0.23411826268969127, "grad_norm": 2.1358940445833188, "learning_rate": 1.7893815233579692e-05, "loss": 1.0874, "step": 2237 }, { "epoch": 0.23422291993720565, "grad_norm": 2.499667365100757, "learning_rate": 1.7891733820958983e-05, "loss": 0.9692, "step": 2238 }, { "epoch": 0.23432757718472005, "grad_norm": 2.8983591306679104, "learning_rate": 1.788965150156206e-05, "loss": 0.8887, "step": 2239 }, { "epoch": 0.23443223443223443, "grad_norm": 2.4745131578348856, "learning_rate": 1.788756827562818e-05, "loss": 0.975, "step": 2240 }, { "epoch": 0.2345368916797488, "grad_norm": 2.3648247665584434, "learning_rate": 1.788548414339672e-05, "loss": 0.924, "step": 2241 }, { "epoch": 0.23464154892726322, "grad_norm": 2.3253167404589643, "learning_rate": 1.7883399105107146e-05, "loss": 1.0481, "step": 2242 }, { "epoch": 0.2347462061747776, "grad_norm": 2.2078932669131306, "learning_rate": 1.7881313160999035e-05, "loss": 1.0526, "step": 2243 }, { "epoch": 0.234850863422292, "grad_norm": 2.260715971363778, "learning_rate": 1.7879226311312063e-05, "loss": 0.9958, "step": 2244 }, { "epoch": 0.23495552066980638, "grad_norm": 2.103577147400585, "learning_rate": 1.7877138556286014e-05, "loss": 1.0592, "step": 2245 }, { "epoch": 0.23506017791732078, "grad_norm": 2.7075505673940774, "learning_rate": 1.787504989616078e-05, "loss": 1.0385, "step": 2246 }, { "epoch": 0.23516483516483516, "grad_norm": 2.6763027546096807, "learning_rate": 1.7872960331176347e-05, "loss": 0.972, "step": 2247 }, { "epoch": 0.23526949241234957, "grad_norm": 2.3328246332033227, "learning_rate": 1.7870869861572807e-05, "loss": 0.8997, "step": 2248 }, { "epoch": 0.23537414965986395, "grad_norm": 2.4266627069351108, "learning_rate": 1.786877848759037e-05, "loss": 0.9698, "step": 2249 }, { "epoch": 0.23547880690737832, "grad_norm": 2.093701504040857, "learning_rate": 1.7866686209469328e-05, "loss": 0.871, "step": 2250 }, { "epoch": 0.23558346415489273, "grad_norm": 2.4270676083889087, "learning_rate": 1.7864593027450097e-05, "loss": 1.156, "step": 2251 }, { "epoch": 0.2356881214024071, "grad_norm": 2.6995988050304716, "learning_rate": 1.7862498941773184e-05, "loss": 1.0907, "step": 2252 }, { "epoch": 0.23579277864992151, "grad_norm": 2.0633497910278256, "learning_rate": 1.78604039526792e-05, "loss": 0.9509, "step": 2253 }, { "epoch": 0.2358974358974359, "grad_norm": 2.337404008068736, "learning_rate": 1.785830806040887e-05, "loss": 1.0717, "step": 2254 }, { "epoch": 0.2360020931449503, "grad_norm": 2.2055920312977295, "learning_rate": 1.785621126520301e-05, "loss": 0.8269, "step": 2255 }, { "epoch": 0.23610675039246468, "grad_norm": 2.5981676845560764, "learning_rate": 1.7854113567302557e-05, "loss": 0.9251, "step": 2256 }, { "epoch": 0.23621140763997908, "grad_norm": 2.183621120398911, "learning_rate": 1.785201496694853e-05, "loss": 1.0284, "step": 2257 }, { "epoch": 0.23631606488749346, "grad_norm": 2.2329023327526, "learning_rate": 1.784991546438206e-05, "loss": 1.0342, "step": 2258 }, { "epoch": 0.23642072213500784, "grad_norm": 2.871738043676333, "learning_rate": 1.7847815059844395e-05, "loss": 1.1025, "step": 2259 }, { "epoch": 0.23652537938252224, "grad_norm": 2.4479413257311378, "learning_rate": 1.784571375357687e-05, "loss": 1.0315, "step": 2260 }, { "epoch": 0.23663003663003662, "grad_norm": 2.315149134540888, "learning_rate": 1.7843611545820926e-05, "loss": 1.1481, "step": 2261 }, { "epoch": 0.23673469387755103, "grad_norm": 2.4131417962503177, "learning_rate": 1.784150843681812e-05, "loss": 0.9819, "step": 2262 }, { "epoch": 0.2368393511250654, "grad_norm": 2.009874656026206, "learning_rate": 1.7839404426810095e-05, "loss": 0.8828, "step": 2263 }, { "epoch": 0.2369440083725798, "grad_norm": 2.30932078574492, "learning_rate": 1.7837299516038608e-05, "loss": 1.0608, "step": 2264 }, { "epoch": 0.2370486656200942, "grad_norm": 2.056529909679889, "learning_rate": 1.7835193704745523e-05, "loss": 0.931, "step": 2265 }, { "epoch": 0.23715332286760857, "grad_norm": 1.9976085370847383, "learning_rate": 1.7833086993172797e-05, "loss": 0.8794, "step": 2266 }, { "epoch": 0.23725798011512297, "grad_norm": 2.3451892003689854, "learning_rate": 1.7830979381562493e-05, "loss": 1.0935, "step": 2267 }, { "epoch": 0.23736263736263735, "grad_norm": 2.3332512825866756, "learning_rate": 1.7828870870156783e-05, "loss": 1.0952, "step": 2268 }, { "epoch": 0.23746729461015176, "grad_norm": 2.4752809555669257, "learning_rate": 1.7826761459197943e-05, "loss": 0.9602, "step": 2269 }, { "epoch": 0.23757195185766614, "grad_norm": 2.2358190924088017, "learning_rate": 1.7824651148928343e-05, "loss": 1.1246, "step": 2270 }, { "epoch": 0.23767660910518054, "grad_norm": 2.6059633471148915, "learning_rate": 1.782253993959046e-05, "loss": 1.0941, "step": 2271 }, { "epoch": 0.23778126635269492, "grad_norm": 2.084559757590681, "learning_rate": 1.7820427831426887e-05, "loss": 0.9926, "step": 2272 }, { "epoch": 0.23788592360020933, "grad_norm": 2.2523944777498968, "learning_rate": 1.78183148246803e-05, "loss": 0.9949, "step": 2273 }, { "epoch": 0.2379905808477237, "grad_norm": 2.8181662408595187, "learning_rate": 1.781620091959349e-05, "loss": 1.012, "step": 2274 }, { "epoch": 0.23809523809523808, "grad_norm": 2.328558412920668, "learning_rate": 1.781408611640935e-05, "loss": 1.0071, "step": 2275 }, { "epoch": 0.2381998953427525, "grad_norm": 2.183389318821081, "learning_rate": 1.781197041537087e-05, "loss": 1.0634, "step": 2276 }, { "epoch": 0.23830455259026687, "grad_norm": 2.2754131017205417, "learning_rate": 1.7809853816721157e-05, "loss": 0.9493, "step": 2277 }, { "epoch": 0.23840920983778127, "grad_norm": 2.180076226224215, "learning_rate": 1.7807736320703405e-05, "loss": 0.98, "step": 2278 }, { "epoch": 0.23851386708529565, "grad_norm": 2.1011853330474772, "learning_rate": 1.7805617927560926e-05, "loss": 1.0353, "step": 2279 }, { "epoch": 0.23861852433281006, "grad_norm": 2.685310139913174, "learning_rate": 1.780349863753712e-05, "loss": 0.8339, "step": 2280 }, { "epoch": 0.23872318158032443, "grad_norm": 2.2643234179394183, "learning_rate": 1.7801378450875504e-05, "loss": 0.9221, "step": 2281 }, { "epoch": 0.23882783882783884, "grad_norm": 2.0845511977759914, "learning_rate": 1.7799257367819687e-05, "loss": 1.0691, "step": 2282 }, { "epoch": 0.23893249607535322, "grad_norm": 2.1996457975610286, "learning_rate": 1.779713538861339e-05, "loss": 1.0552, "step": 2283 }, { "epoch": 0.2390371533228676, "grad_norm": 2.563382788930006, "learning_rate": 1.7795012513500427e-05, "loss": 1.0386, "step": 2284 }, { "epoch": 0.239141810570382, "grad_norm": 2.482883361316951, "learning_rate": 1.7792888742724727e-05, "loss": 1.116, "step": 2285 }, { "epoch": 0.23924646781789638, "grad_norm": 2.173594595079943, "learning_rate": 1.7790764076530312e-05, "loss": 1.0827, "step": 2286 }, { "epoch": 0.23935112506541079, "grad_norm": 1.973340347588087, "learning_rate": 1.778863851516131e-05, "loss": 1.068, "step": 2287 }, { "epoch": 0.23945578231292516, "grad_norm": 2.266375557620404, "learning_rate": 1.7786512058861952e-05, "loss": 0.8809, "step": 2288 }, { "epoch": 0.23956043956043957, "grad_norm": 1.952846674363708, "learning_rate": 1.7784384707876576e-05, "loss": 1.0294, "step": 2289 }, { "epoch": 0.23966509680795395, "grad_norm": 2.2539440156526975, "learning_rate": 1.7782256462449615e-05, "loss": 1.0162, "step": 2290 }, { "epoch": 0.23976975405546835, "grad_norm": 2.278316834840628, "learning_rate": 1.7780127322825615e-05, "loss": 1.0605, "step": 2291 }, { "epoch": 0.23987441130298273, "grad_norm": 2.1166188092910674, "learning_rate": 1.777799728924921e-05, "loss": 0.9187, "step": 2292 }, { "epoch": 0.2399790685504971, "grad_norm": 2.2188937843904473, "learning_rate": 1.7775866361965145e-05, "loss": 0.9865, "step": 2293 }, { "epoch": 0.24008372579801152, "grad_norm": 2.2834201572097936, "learning_rate": 1.7773734541218277e-05, "loss": 1.027, "step": 2294 }, { "epoch": 0.2401883830455259, "grad_norm": 2.3639841943193223, "learning_rate": 1.777160182725355e-05, "loss": 0.998, "step": 2295 }, { "epoch": 0.2402930402930403, "grad_norm": 2.6545219995276783, "learning_rate": 1.7769468220316016e-05, "loss": 1.0317, "step": 2296 }, { "epoch": 0.24039769754055468, "grad_norm": 2.806578222987241, "learning_rate": 1.7767333720650836e-05, "loss": 1.0528, "step": 2297 }, { "epoch": 0.24050235478806908, "grad_norm": 2.450717164421872, "learning_rate": 1.7765198328503262e-05, "loss": 1.0429, "step": 2298 }, { "epoch": 0.24060701203558346, "grad_norm": 2.3826191374301326, "learning_rate": 1.7763062044118662e-05, "loss": 0.9769, "step": 2299 }, { "epoch": 0.24071166928309787, "grad_norm": 2.5379042446021782, "learning_rate": 1.7760924867742493e-05, "loss": 1.0339, "step": 2300 }, { "epoch": 0.24081632653061225, "grad_norm": 2.3669619873635352, "learning_rate": 1.7758786799620327e-05, "loss": 1.0572, "step": 2301 }, { "epoch": 0.24092098377812662, "grad_norm": 2.2289468079215142, "learning_rate": 1.7756647839997824e-05, "loss": 1.0289, "step": 2302 }, { "epoch": 0.24102564102564103, "grad_norm": 2.2529359568142517, "learning_rate": 1.7754507989120762e-05, "loss": 0.9694, "step": 2303 }, { "epoch": 0.2411302982731554, "grad_norm": 2.222490094675877, "learning_rate": 1.7752367247235015e-05, "loss": 0.9996, "step": 2304 }, { "epoch": 0.2412349555206698, "grad_norm": 2.3032261423529254, "learning_rate": 1.7750225614586557e-05, "loss": 1.0049, "step": 2305 }, { "epoch": 0.2413396127681842, "grad_norm": 2.158558091358129, "learning_rate": 1.774808309142146e-05, "loss": 1.013, "step": 2306 }, { "epoch": 0.2414442700156986, "grad_norm": 2.0156168291391134, "learning_rate": 1.7745939677985912e-05, "loss": 0.9208, "step": 2307 }, { "epoch": 0.24154892726321298, "grad_norm": 2.2427438503410353, "learning_rate": 1.7743795374526186e-05, "loss": 1.1672, "step": 2308 }, { "epoch": 0.24165358451072738, "grad_norm": 2.416172594533138, "learning_rate": 1.774165018128868e-05, "loss": 1.0452, "step": 2309 }, { "epoch": 0.24175824175824176, "grad_norm": 2.5723958354089254, "learning_rate": 1.7739504098519872e-05, "loss": 0.973, "step": 2310 }, { "epoch": 0.24186289900575614, "grad_norm": 2.9877649086017666, "learning_rate": 1.773735712646635e-05, "loss": 0.9828, "step": 2311 }, { "epoch": 0.24196755625327054, "grad_norm": 2.3078412731916367, "learning_rate": 1.7735209265374816e-05, "loss": 1.0852, "step": 2312 }, { "epoch": 0.24207221350078492, "grad_norm": 2.1969741515349233, "learning_rate": 1.773306051549205e-05, "loss": 1.0456, "step": 2313 }, { "epoch": 0.24217687074829933, "grad_norm": 2.5003537240671885, "learning_rate": 1.7730910877064955e-05, "loss": 0.9049, "step": 2314 }, { "epoch": 0.2422815279958137, "grad_norm": 2.519922133498177, "learning_rate": 1.772876035034053e-05, "loss": 1.0232, "step": 2315 }, { "epoch": 0.2423861852433281, "grad_norm": 2.477645819821275, "learning_rate": 1.7726608935565874e-05, "loss": 0.9229, "step": 2316 }, { "epoch": 0.2424908424908425, "grad_norm": 2.6081751548110685, "learning_rate": 1.7724456632988188e-05, "loss": 1.0518, "step": 2317 }, { "epoch": 0.24259549973835687, "grad_norm": 2.5904924581138005, "learning_rate": 1.7722303442854774e-05, "loss": 0.8876, "step": 2318 }, { "epoch": 0.24270015698587127, "grad_norm": 2.2369477600349845, "learning_rate": 1.7720149365413036e-05, "loss": 0.9971, "step": 2319 }, { "epoch": 0.24280481423338565, "grad_norm": 2.4995571839955786, "learning_rate": 1.771799440091049e-05, "loss": 0.8509, "step": 2320 }, { "epoch": 0.24290947148090006, "grad_norm": 2.2952550447378774, "learning_rate": 1.7715838549594744e-05, "loss": 1.0066, "step": 2321 }, { "epoch": 0.24301412872841444, "grad_norm": 2.190996661293464, "learning_rate": 1.7713681811713504e-05, "loss": 1.0152, "step": 2322 }, { "epoch": 0.24311878597592884, "grad_norm": 2.259684687855643, "learning_rate": 1.7711524187514586e-05, "loss": 0.8633, "step": 2323 }, { "epoch": 0.24322344322344322, "grad_norm": 2.121917767970034, "learning_rate": 1.7709365677245906e-05, "loss": 0.9001, "step": 2324 }, { "epoch": 0.24332810047095763, "grad_norm": 2.2233654020848475, "learning_rate": 1.7707206281155482e-05, "loss": 1.1153, "step": 2325 }, { "epoch": 0.243432757718472, "grad_norm": 2.444459263315588, "learning_rate": 1.770504599949143e-05, "loss": 1.0226, "step": 2326 }, { "epoch": 0.24353741496598638, "grad_norm": 2.349907556465352, "learning_rate": 1.7702884832501978e-05, "loss": 1.0579, "step": 2327 }, { "epoch": 0.2436420722135008, "grad_norm": 2.484238355922039, "learning_rate": 1.7700722780435437e-05, "loss": 0.9627, "step": 2328 }, { "epoch": 0.24374672946101517, "grad_norm": 2.4085127927382732, "learning_rate": 1.7698559843540242e-05, "loss": 1.0758, "step": 2329 }, { "epoch": 0.24385138670852957, "grad_norm": 2.4015208510522745, "learning_rate": 1.769639602206491e-05, "loss": 1.0412, "step": 2330 }, { "epoch": 0.24395604395604395, "grad_norm": 2.2260639525963675, "learning_rate": 1.769423131625808e-05, "loss": 1.0326, "step": 2331 }, { "epoch": 0.24406070120355836, "grad_norm": 2.2914496037327288, "learning_rate": 1.769206572636847e-05, "loss": 1.1783, "step": 2332 }, { "epoch": 0.24416535845107273, "grad_norm": 2.226935286535785, "learning_rate": 1.7689899252644912e-05, "loss": 0.9281, "step": 2333 }, { "epoch": 0.24427001569858714, "grad_norm": 2.835666597627508, "learning_rate": 1.768773189533634e-05, "loss": 1.016, "step": 2334 }, { "epoch": 0.24437467294610152, "grad_norm": 2.7851995018242417, "learning_rate": 1.7685563654691797e-05, "loss": 1.0492, "step": 2335 }, { "epoch": 0.2444793301936159, "grad_norm": 2.1483770409252445, "learning_rate": 1.7683394530960402e-05, "loss": 1.0236, "step": 2336 }, { "epoch": 0.2445839874411303, "grad_norm": 2.170226481420682, "learning_rate": 1.76812245243914e-05, "loss": 1.0287, "step": 2337 }, { "epoch": 0.24468864468864468, "grad_norm": 2.5889214185058895, "learning_rate": 1.7679053635234134e-05, "loss": 0.9133, "step": 2338 }, { "epoch": 0.24479330193615909, "grad_norm": 2.526081416072903, "learning_rate": 1.7676881863738034e-05, "loss": 1.1081, "step": 2339 }, { "epoch": 0.24489795918367346, "grad_norm": 1.928523666124805, "learning_rate": 1.767470921015265e-05, "loss": 0.7997, "step": 2340 }, { "epoch": 0.24500261643118787, "grad_norm": 2.20948369983928, "learning_rate": 1.767253567472761e-05, "loss": 0.9804, "step": 2341 }, { "epoch": 0.24510727367870225, "grad_norm": 2.0194710314361632, "learning_rate": 1.7670361257712674e-05, "loss": 0.987, "step": 2342 }, { "epoch": 0.24521193092621665, "grad_norm": 2.5584737884538935, "learning_rate": 1.766818595935768e-05, "loss": 1.0407, "step": 2343 }, { "epoch": 0.24531658817373103, "grad_norm": 2.236875174210803, "learning_rate": 1.7666009779912574e-05, "loss": 1.0802, "step": 2344 }, { "epoch": 0.2454212454212454, "grad_norm": 2.0282577109477686, "learning_rate": 1.7663832719627404e-05, "loss": 0.953, "step": 2345 }, { "epoch": 0.24552590266875982, "grad_norm": 2.073084277889144, "learning_rate": 1.7661654778752315e-05, "loss": 1.0081, "step": 2346 }, { "epoch": 0.2456305599162742, "grad_norm": 2.149914350909731, "learning_rate": 1.765947595753756e-05, "loss": 1.0456, "step": 2347 }, { "epoch": 0.2457352171637886, "grad_norm": 2.2340276298746535, "learning_rate": 1.7657296256233494e-05, "loss": 1.0553, "step": 2348 }, { "epoch": 0.24583987441130298, "grad_norm": 2.0128834946003784, "learning_rate": 1.7655115675090566e-05, "loss": 1.0138, "step": 2349 }, { "epoch": 0.24594453165881738, "grad_norm": 2.032422061077809, "learning_rate": 1.7652934214359324e-05, "loss": 1.005, "step": 2350 }, { "epoch": 0.24604918890633176, "grad_norm": 2.2528606162122515, "learning_rate": 1.7650751874290427e-05, "loss": 1.1266, "step": 2351 }, { "epoch": 0.24615384615384617, "grad_norm": 2.1025231064248566, "learning_rate": 1.7648568655134633e-05, "loss": 1.1058, "step": 2352 }, { "epoch": 0.24625850340136055, "grad_norm": 2.1702704933156896, "learning_rate": 1.7646384557142796e-05, "loss": 1.0684, "step": 2353 }, { "epoch": 0.24636316064887492, "grad_norm": 2.343302825772316, "learning_rate": 1.764419958056587e-05, "loss": 1.0974, "step": 2354 }, { "epoch": 0.24646781789638933, "grad_norm": 2.3639946811692045, "learning_rate": 1.7642013725654915e-05, "loss": 0.9979, "step": 2355 }, { "epoch": 0.2465724751439037, "grad_norm": 2.336282852242526, "learning_rate": 1.763982699266109e-05, "loss": 1.0527, "step": 2356 }, { "epoch": 0.2466771323914181, "grad_norm": 2.2842090040435368, "learning_rate": 1.7637639381835658e-05, "loss": 0.959, "step": 2357 }, { "epoch": 0.2467817896389325, "grad_norm": 1.9972596299075889, "learning_rate": 1.7635450893429977e-05, "loss": 0.9649, "step": 2358 }, { "epoch": 0.2468864468864469, "grad_norm": 1.9988043374236892, "learning_rate": 1.763326152769551e-05, "loss": 1.1658, "step": 2359 }, { "epoch": 0.24699110413396128, "grad_norm": 2.0588021433267896, "learning_rate": 1.763107128488382e-05, "loss": 1.0258, "step": 2360 }, { "epoch": 0.24709576138147565, "grad_norm": 1.8656858774757512, "learning_rate": 1.7628880165246572e-05, "loss": 0.9793, "step": 2361 }, { "epoch": 0.24720041862899006, "grad_norm": 2.218035457125112, "learning_rate": 1.7626688169035524e-05, "loss": 0.9543, "step": 2362 }, { "epoch": 0.24730507587650444, "grad_norm": 1.9653508672118056, "learning_rate": 1.7624495296502545e-05, "loss": 0.8786, "step": 2363 }, { "epoch": 0.24740973312401884, "grad_norm": 1.9480601105860273, "learning_rate": 1.7622301547899605e-05, "loss": 0.8262, "step": 2364 }, { "epoch": 0.24751439037153322, "grad_norm": 2.333099449535231, "learning_rate": 1.7620106923478756e-05, "loss": 1.0377, "step": 2365 }, { "epoch": 0.24761904761904763, "grad_norm": 2.0818968205046455, "learning_rate": 1.761791142349218e-05, "loss": 0.8524, "step": 2366 }, { "epoch": 0.247723704866562, "grad_norm": 2.3374344327141303, "learning_rate": 1.761571504819214e-05, "loss": 1.1216, "step": 2367 }, { "epoch": 0.2478283621140764, "grad_norm": 2.4268357445524424, "learning_rate": 1.7613517797830995e-05, "loss": 1.0762, "step": 2368 }, { "epoch": 0.2479330193615908, "grad_norm": 2.590696844481067, "learning_rate": 1.7611319672661227e-05, "loss": 0.9002, "step": 2369 }, { "epoch": 0.24803767660910517, "grad_norm": 2.909652369590741, "learning_rate": 1.7609120672935396e-05, "loss": 1.0978, "step": 2370 }, { "epoch": 0.24814233385661957, "grad_norm": 2.576806230412862, "learning_rate": 1.7606920798906176e-05, "loss": 1.0588, "step": 2371 }, { "epoch": 0.24824699110413395, "grad_norm": 2.0096476869887, "learning_rate": 1.7604720050826333e-05, "loss": 0.9611, "step": 2372 }, { "epoch": 0.24835164835164836, "grad_norm": 2.2977012476580208, "learning_rate": 1.7602518428948742e-05, "loss": 1.0389, "step": 2373 }, { "epoch": 0.24845630559916274, "grad_norm": 2.945269543421569, "learning_rate": 1.760031593352637e-05, "loss": 1.0266, "step": 2374 }, { "epoch": 0.24856096284667714, "grad_norm": 2.1640744849684155, "learning_rate": 1.7598112564812292e-05, "loss": 1.1166, "step": 2375 }, { "epoch": 0.24866562009419152, "grad_norm": 2.1568155494244285, "learning_rate": 1.7595908323059675e-05, "loss": 1.1188, "step": 2376 }, { "epoch": 0.24877027734170593, "grad_norm": 2.584518671074471, "learning_rate": 1.7593703208521794e-05, "loss": 1.0749, "step": 2377 }, { "epoch": 0.2488749345892203, "grad_norm": 2.6817350218871767, "learning_rate": 1.7591497221452022e-05, "loss": 1.0488, "step": 2378 }, { "epoch": 0.24897959183673468, "grad_norm": 2.0586150260625797, "learning_rate": 1.7589290362103827e-05, "loss": 1.0097, "step": 2379 }, { "epoch": 0.2490842490842491, "grad_norm": 2.459396989353071, "learning_rate": 1.7587082630730786e-05, "loss": 1.0839, "step": 2380 }, { "epoch": 0.24918890633176347, "grad_norm": 2.4145810020805314, "learning_rate": 1.758487402758657e-05, "loss": 1.0401, "step": 2381 }, { "epoch": 0.24929356357927787, "grad_norm": 2.3872298010666952, "learning_rate": 1.7582664552924953e-05, "loss": 1.1245, "step": 2382 }, { "epoch": 0.24939822082679225, "grad_norm": 2.466040223475429, "learning_rate": 1.7580454206999805e-05, "loss": 0.8704, "step": 2383 }, { "epoch": 0.24950287807430666, "grad_norm": 2.327011051347902, "learning_rate": 1.75782429900651e-05, "loss": 1.0102, "step": 2384 }, { "epoch": 0.24960753532182103, "grad_norm": 2.222950496140978, "learning_rate": 1.7576030902374917e-05, "loss": 0.8952, "step": 2385 }, { "epoch": 0.24971219256933544, "grad_norm": 2.0002826498689266, "learning_rate": 1.7573817944183426e-05, "loss": 0.977, "step": 2386 }, { "epoch": 0.24981684981684982, "grad_norm": 2.261276443674775, "learning_rate": 1.7571604115744895e-05, "loss": 1.0078, "step": 2387 }, { "epoch": 0.2499215070643642, "grad_norm": 2.31943025867348, "learning_rate": 1.7569389417313704e-05, "loss": 1.0752, "step": 2388 }, { "epoch": 0.2500261643118786, "grad_norm": 2.2233824615996007, "learning_rate": 1.7567173849144322e-05, "loss": 1.0591, "step": 2389 }, { "epoch": 0.250130821559393, "grad_norm": 2.335773242353471, "learning_rate": 1.7564957411491324e-05, "loss": 1.0174, "step": 2390 }, { "epoch": 0.2502354788069074, "grad_norm": 2.274951953356186, "learning_rate": 1.756274010460939e-05, "loss": 1.088, "step": 2391 }, { "epoch": 0.2503401360544218, "grad_norm": 1.928296571150385, "learning_rate": 1.756052192875328e-05, "loss": 0.8277, "step": 2392 }, { "epoch": 0.25044479330193614, "grad_norm": 2.2156454730583555, "learning_rate": 1.7558302884177876e-05, "loss": 1.0403, "step": 2393 }, { "epoch": 0.25054945054945055, "grad_norm": 2.1520512914884153, "learning_rate": 1.755608297113815e-05, "loss": 1.0643, "step": 2394 }, { "epoch": 0.25065410779696495, "grad_norm": 2.2740218868907927, "learning_rate": 1.7553862189889172e-05, "loss": 1.0674, "step": 2395 }, { "epoch": 0.2507587650444793, "grad_norm": 2.1433388658904318, "learning_rate": 1.7551640540686114e-05, "loss": 0.9212, "step": 2396 }, { "epoch": 0.2508634222919937, "grad_norm": 2.5749819609252764, "learning_rate": 1.754941802378425e-05, "loss": 0.9503, "step": 2397 }, { "epoch": 0.2509680795395081, "grad_norm": 2.4391582025577843, "learning_rate": 1.754719463943895e-05, "loss": 1.0504, "step": 2398 }, { "epoch": 0.2510727367870225, "grad_norm": 2.517643489899688, "learning_rate": 1.7544970387905687e-05, "loss": 0.97, "step": 2399 }, { "epoch": 0.25117739403453687, "grad_norm": 2.202807983897776, "learning_rate": 1.754274526944003e-05, "loss": 0.8349, "step": 2400 }, { "epoch": 0.2512820512820513, "grad_norm": 2.9251756781669127, "learning_rate": 1.7540519284297652e-05, "loss": 1.1699, "step": 2401 }, { "epoch": 0.2513867085295657, "grad_norm": 2.356785224256036, "learning_rate": 1.753829243273432e-05, "loss": 0.9605, "step": 2402 }, { "epoch": 0.2514913657770801, "grad_norm": 2.423973980202741, "learning_rate": 1.7536064715005906e-05, "loss": 0.8849, "step": 2403 }, { "epoch": 0.25159602302459444, "grad_norm": 2.446059031088621, "learning_rate": 1.7533836131368383e-05, "loss": 1.093, "step": 2404 }, { "epoch": 0.25170068027210885, "grad_norm": 2.399076855736286, "learning_rate": 1.7531606682077814e-05, "loss": 1.0542, "step": 2405 }, { "epoch": 0.25180533751962325, "grad_norm": 2.2136011597833267, "learning_rate": 1.7529376367390366e-05, "loss": 0.915, "step": 2406 }, { "epoch": 0.2519099947671376, "grad_norm": 2.226461148880702, "learning_rate": 1.7527145187562314e-05, "loss": 0.9114, "step": 2407 }, { "epoch": 0.252014652014652, "grad_norm": 2.41593091900151, "learning_rate": 1.7524913142850017e-05, "loss": 1.0315, "step": 2408 }, { "epoch": 0.2521193092621664, "grad_norm": 2.2319401902707146, "learning_rate": 1.7522680233509947e-05, "loss": 0.987, "step": 2409 }, { "epoch": 0.2522239665096808, "grad_norm": 2.7690125251281366, "learning_rate": 1.752044645979867e-05, "loss": 1.1371, "step": 2410 }, { "epoch": 0.25232862375719517, "grad_norm": 2.286815126513628, "learning_rate": 1.7518211821972845e-05, "loss": 1.1027, "step": 2411 }, { "epoch": 0.2524332810047096, "grad_norm": 2.0820445058724695, "learning_rate": 1.7515976320289247e-05, "loss": 1.0881, "step": 2412 }, { "epoch": 0.252537938252224, "grad_norm": 2.987553814644758, "learning_rate": 1.751373995500473e-05, "loss": 1.107, "step": 2413 }, { "epoch": 0.25264259549973833, "grad_norm": 1.9581027443263326, "learning_rate": 1.7511502726376257e-05, "loss": 0.8612, "step": 2414 }, { "epoch": 0.25274725274725274, "grad_norm": 2.376542007505258, "learning_rate": 1.7509264634660896e-05, "loss": 1.006, "step": 2415 }, { "epoch": 0.25285190999476714, "grad_norm": 2.80707883970879, "learning_rate": 1.7507025680115807e-05, "loss": 1.1002, "step": 2416 }, { "epoch": 0.25295656724228155, "grad_norm": 1.6942186283185672, "learning_rate": 1.7504785862998246e-05, "loss": 0.8788, "step": 2417 }, { "epoch": 0.2530612244897959, "grad_norm": 2.5777349929769433, "learning_rate": 1.750254518356558e-05, "loss": 1.103, "step": 2418 }, { "epoch": 0.2531658817373103, "grad_norm": 2.4495279259341776, "learning_rate": 1.7500303642075257e-05, "loss": 1.0692, "step": 2419 }, { "epoch": 0.2532705389848247, "grad_norm": 1.8822814244979826, "learning_rate": 1.7498061238784843e-05, "loss": 0.8956, "step": 2420 }, { "epoch": 0.2533751962323391, "grad_norm": 2.223853110843607, "learning_rate": 1.7495817973951997e-05, "loss": 0.969, "step": 2421 }, { "epoch": 0.25347985347985347, "grad_norm": 2.238360132707453, "learning_rate": 1.7493573847834465e-05, "loss": 1.0264, "step": 2422 }, { "epoch": 0.2535845107273679, "grad_norm": 2.0197198632761606, "learning_rate": 1.749132886069011e-05, "loss": 0.9527, "step": 2423 }, { "epoch": 0.2536891679748823, "grad_norm": 2.22631274546597, "learning_rate": 1.748908301277688e-05, "loss": 0.9435, "step": 2424 }, { "epoch": 0.25379382522239663, "grad_norm": 2.299475782910513, "learning_rate": 1.748683630435283e-05, "loss": 1.1404, "step": 2425 }, { "epoch": 0.25389848246991104, "grad_norm": 2.5843227105301034, "learning_rate": 1.7484588735676117e-05, "loss": 0.8494, "step": 2426 }, { "epoch": 0.25400313971742544, "grad_norm": 1.9572204780029898, "learning_rate": 1.748234030700498e-05, "loss": 0.9776, "step": 2427 }, { "epoch": 0.25410779696493985, "grad_norm": 3.069066964069534, "learning_rate": 1.7480091018597775e-05, "loss": 0.968, "step": 2428 }, { "epoch": 0.2542124542124542, "grad_norm": 2.5784423660832405, "learning_rate": 1.7477840870712946e-05, "loss": 1.0712, "step": 2429 }, { "epoch": 0.2543171114599686, "grad_norm": 2.400723658989671, "learning_rate": 1.7475589863609042e-05, "loss": 0.9463, "step": 2430 }, { "epoch": 0.254421768707483, "grad_norm": 2.332512218507682, "learning_rate": 1.747333799754471e-05, "loss": 1.0427, "step": 2431 }, { "epoch": 0.25452642595499736, "grad_norm": 2.069397809708061, "learning_rate": 1.747108527277869e-05, "loss": 0.9479, "step": 2432 }, { "epoch": 0.25463108320251177, "grad_norm": 2.284939347387363, "learning_rate": 1.7468831689569827e-05, "loss": 1.0377, "step": 2433 }, { "epoch": 0.25473574045002617, "grad_norm": 2.013249004544656, "learning_rate": 1.7466577248177062e-05, "loss": 1.0894, "step": 2434 }, { "epoch": 0.2548403976975406, "grad_norm": 2.095695638030201, "learning_rate": 1.746432194885944e-05, "loss": 1.0162, "step": 2435 }, { "epoch": 0.2549450549450549, "grad_norm": 2.308895697628336, "learning_rate": 1.7462065791876087e-05, "loss": 0.9949, "step": 2436 }, { "epoch": 0.25504971219256933, "grad_norm": 2.1787458199138183, "learning_rate": 1.745980877748625e-05, "loss": 1.0669, "step": 2437 }, { "epoch": 0.25515436944008374, "grad_norm": 2.27381391199627, "learning_rate": 1.7457550905949263e-05, "loss": 0.9188, "step": 2438 }, { "epoch": 0.2552590266875981, "grad_norm": 2.0547178362816423, "learning_rate": 1.745529217752456e-05, "loss": 1.0003, "step": 2439 }, { "epoch": 0.2553636839351125, "grad_norm": 2.003826247372063, "learning_rate": 1.7453032592471667e-05, "loss": 0.8219, "step": 2440 }, { "epoch": 0.2554683411826269, "grad_norm": 2.2165668382792676, "learning_rate": 1.7450772151050226e-05, "loss": 1.078, "step": 2441 }, { "epoch": 0.2555729984301413, "grad_norm": 2.406127059643738, "learning_rate": 1.744851085351996e-05, "loss": 1.0154, "step": 2442 }, { "epoch": 0.25567765567765566, "grad_norm": 1.9609166344880296, "learning_rate": 1.7446248700140694e-05, "loss": 1.0885, "step": 2443 }, { "epoch": 0.25578231292517006, "grad_norm": 2.0711606782794947, "learning_rate": 1.744398569117236e-05, "loss": 1.0171, "step": 2444 }, { "epoch": 0.25588697017268447, "grad_norm": 2.0871258754382636, "learning_rate": 1.7441721826874976e-05, "loss": 0.9873, "step": 2445 }, { "epoch": 0.2559916274201989, "grad_norm": 2.112116868972381, "learning_rate": 1.743945710750867e-05, "loss": 0.9903, "step": 2446 }, { "epoch": 0.2560962846677132, "grad_norm": 1.8482931898345583, "learning_rate": 1.7437191533333663e-05, "loss": 0.8978, "step": 2447 }, { "epoch": 0.25620094191522763, "grad_norm": 2.361301370582731, "learning_rate": 1.7434925104610273e-05, "loss": 1.0131, "step": 2448 }, { "epoch": 0.25630559916274204, "grad_norm": 2.1470314899465275, "learning_rate": 1.7432657821598912e-05, "loss": 1.1222, "step": 2449 }, { "epoch": 0.2564102564102564, "grad_norm": 2.243154931598059, "learning_rate": 1.74303896845601e-05, "loss": 0.9956, "step": 2450 }, { "epoch": 0.2565149136577708, "grad_norm": 2.090796158960878, "learning_rate": 1.7428120693754453e-05, "loss": 0.9429, "step": 2451 }, { "epoch": 0.2566195709052852, "grad_norm": 2.129012668639158, "learning_rate": 1.7425850849442677e-05, "loss": 0.9842, "step": 2452 }, { "epoch": 0.2567242281527996, "grad_norm": 2.587169226816751, "learning_rate": 1.742358015188559e-05, "loss": 0.9024, "step": 2453 }, { "epoch": 0.25682888540031396, "grad_norm": 2.186311266652209, "learning_rate": 1.742130860134409e-05, "loss": 1.0747, "step": 2454 }, { "epoch": 0.25693354264782836, "grad_norm": 2.2324519069996223, "learning_rate": 1.7419036198079186e-05, "loss": 1.0882, "step": 2455 }, { "epoch": 0.25703819989534277, "grad_norm": 2.044693604835276, "learning_rate": 1.7416762942351986e-05, "loss": 1.0221, "step": 2456 }, { "epoch": 0.2571428571428571, "grad_norm": 2.4466601197617344, "learning_rate": 1.7414488834423687e-05, "loss": 1.1308, "step": 2457 }, { "epoch": 0.2572475143903715, "grad_norm": 2.473780111943485, "learning_rate": 1.7412213874555594e-05, "loss": 1.1312, "step": 2458 }, { "epoch": 0.25735217163788593, "grad_norm": 2.4776047017601948, "learning_rate": 1.7409938063009092e-05, "loss": 0.9445, "step": 2459 }, { "epoch": 0.25745682888540034, "grad_norm": 2.4309185606448636, "learning_rate": 1.740766140004569e-05, "loss": 1.1362, "step": 2460 }, { "epoch": 0.2575614861329147, "grad_norm": 2.3084232636893853, "learning_rate": 1.740538388592698e-05, "loss": 0.9599, "step": 2461 }, { "epoch": 0.2576661433804291, "grad_norm": 2.2338587055904693, "learning_rate": 1.7403105520914644e-05, "loss": 1.0872, "step": 2462 }, { "epoch": 0.2577708006279435, "grad_norm": 2.32586858159479, "learning_rate": 1.7400826305270477e-05, "loss": 1.1381, "step": 2463 }, { "epoch": 0.2578754578754579, "grad_norm": 1.9459202093623904, "learning_rate": 1.7398546239256367e-05, "loss": 0.888, "step": 2464 }, { "epoch": 0.25798011512297225, "grad_norm": 2.1322352083234266, "learning_rate": 1.7396265323134293e-05, "loss": 1.0406, "step": 2465 }, { "epoch": 0.25808477237048666, "grad_norm": 2.1704661597271118, "learning_rate": 1.739398355716634e-05, "loss": 0.9406, "step": 2466 }, { "epoch": 0.25818942961800107, "grad_norm": 2.5070809156506564, "learning_rate": 1.7391700941614687e-05, "loss": 0.9449, "step": 2467 }, { "epoch": 0.2582940868655154, "grad_norm": 2.2868072880319286, "learning_rate": 1.7389417476741615e-05, "loss": 1.1516, "step": 2468 }, { "epoch": 0.2583987441130298, "grad_norm": 2.2741590192268526, "learning_rate": 1.7387133162809492e-05, "loss": 1.0228, "step": 2469 }, { "epoch": 0.2585034013605442, "grad_norm": 2.361486277697732, "learning_rate": 1.738484800008079e-05, "loss": 0.9587, "step": 2470 }, { "epoch": 0.25860805860805863, "grad_norm": 2.1605751628195358, "learning_rate": 1.738256198881809e-05, "loss": 1.087, "step": 2471 }, { "epoch": 0.258712715855573, "grad_norm": 2.0828368567159368, "learning_rate": 1.7380275129284047e-05, "loss": 1.0085, "step": 2472 }, { "epoch": 0.2588173731030874, "grad_norm": 2.180627297007591, "learning_rate": 1.7377987421741428e-05, "loss": 1.0592, "step": 2473 }, { "epoch": 0.2589220303506018, "grad_norm": 2.2690156377111035, "learning_rate": 1.73756988664531e-05, "loss": 1.1374, "step": 2474 }, { "epoch": 0.25902668759811615, "grad_norm": 2.16338779374175, "learning_rate": 1.7373409463682017e-05, "loss": 0.999, "step": 2475 }, { "epoch": 0.25913134484563055, "grad_norm": 2.1101239537203216, "learning_rate": 1.7371119213691243e-05, "loss": 0.9949, "step": 2476 }, { "epoch": 0.25923600209314496, "grad_norm": 2.0134589337565085, "learning_rate": 1.7368828116743927e-05, "loss": 0.9535, "step": 2477 }, { "epoch": 0.25934065934065936, "grad_norm": 2.0592210690399995, "learning_rate": 1.7366536173103325e-05, "loss": 1.0145, "step": 2478 }, { "epoch": 0.2594453165881737, "grad_norm": 2.418699264093393, "learning_rate": 1.736424338303278e-05, "loss": 0.9113, "step": 2479 }, { "epoch": 0.2595499738356881, "grad_norm": 2.6190682853440084, "learning_rate": 1.7361949746795744e-05, "loss": 1.1143, "step": 2480 }, { "epoch": 0.2596546310832025, "grad_norm": 2.2236033474143975, "learning_rate": 1.7359655264655755e-05, "loss": 0.9108, "step": 2481 }, { "epoch": 0.2597592883307169, "grad_norm": 2.1342142709306207, "learning_rate": 1.7357359936876457e-05, "loss": 1.077, "step": 2482 }, { "epoch": 0.2598639455782313, "grad_norm": 2.0438697294062544, "learning_rate": 1.735506376372159e-05, "loss": 1.0832, "step": 2483 }, { "epoch": 0.2599686028257457, "grad_norm": 2.2685453564325933, "learning_rate": 1.7352766745454982e-05, "loss": 1.0577, "step": 2484 }, { "epoch": 0.2600732600732601, "grad_norm": 2.277948996427162, "learning_rate": 1.7350468882340572e-05, "loss": 1.1536, "step": 2485 }, { "epoch": 0.26017791732077444, "grad_norm": 2.2839736714043117, "learning_rate": 1.7348170174642387e-05, "loss": 1.0408, "step": 2486 }, { "epoch": 0.26028257456828885, "grad_norm": 2.1783014726910532, "learning_rate": 1.7345870622624552e-05, "loss": 1.0058, "step": 2487 }, { "epoch": 0.26038723181580326, "grad_norm": 2.316943295044069, "learning_rate": 1.734357022655129e-05, "loss": 1.1339, "step": 2488 }, { "epoch": 0.26049188906331766, "grad_norm": 2.2969718645888144, "learning_rate": 1.7341268986686924e-05, "loss": 1.0786, "step": 2489 }, { "epoch": 0.260596546310832, "grad_norm": 2.1322028884382185, "learning_rate": 1.7338966903295865e-05, "loss": 0.8794, "step": 2490 }, { "epoch": 0.2607012035583464, "grad_norm": 2.3892801496526994, "learning_rate": 1.7336663976642634e-05, "loss": 1.0003, "step": 2491 }, { "epoch": 0.2608058608058608, "grad_norm": 2.3446023789762047, "learning_rate": 1.7334360206991842e-05, "loss": 0.9991, "step": 2492 }, { "epoch": 0.2609105180533752, "grad_norm": 2.222641398320293, "learning_rate": 1.7332055594608188e-05, "loss": 1.0998, "step": 2493 }, { "epoch": 0.2610151753008896, "grad_norm": 2.0463322929767784, "learning_rate": 1.7329750139756484e-05, "loss": 1.0081, "step": 2494 }, { "epoch": 0.261119832548404, "grad_norm": 2.156653372785402, "learning_rate": 1.732744384270163e-05, "loss": 1.0108, "step": 2495 }, { "epoch": 0.2612244897959184, "grad_norm": 2.2798443834114432, "learning_rate": 1.732513670370863e-05, "loss": 0.9697, "step": 2496 }, { "epoch": 0.26132914704343274, "grad_norm": 2.5215817238707827, "learning_rate": 1.7322828723042566e-05, "loss": 1.0863, "step": 2497 }, { "epoch": 0.26143380429094715, "grad_norm": 2.1107145371174347, "learning_rate": 1.732051990096864e-05, "loss": 1.0245, "step": 2498 }, { "epoch": 0.26153846153846155, "grad_norm": 2.3976283399692537, "learning_rate": 1.7318210237752137e-05, "loss": 0.9136, "step": 2499 }, { "epoch": 0.2616431187859759, "grad_norm": 2.182358026003498, "learning_rate": 1.7315899733658443e-05, "loss": 0.9992, "step": 2500 }, { "epoch": 0.2617477760334903, "grad_norm": 2.2560760531967947, "learning_rate": 1.7313588388953037e-05, "loss": 0.9428, "step": 2501 }, { "epoch": 0.2618524332810047, "grad_norm": 2.409534450316176, "learning_rate": 1.73112762039015e-05, "loss": 1.1515, "step": 2502 }, { "epoch": 0.2619570905285191, "grad_norm": 2.0440248637735365, "learning_rate": 1.7308963178769507e-05, "loss": 0.9662, "step": 2503 }, { "epoch": 0.26206174777603347, "grad_norm": 2.2837513962147105, "learning_rate": 1.7306649313822826e-05, "loss": 1.0215, "step": 2504 }, { "epoch": 0.2621664050235479, "grad_norm": 2.0651869724475986, "learning_rate": 1.7304334609327326e-05, "loss": 1.0749, "step": 2505 }, { "epoch": 0.2622710622710623, "grad_norm": 2.2950964938180034, "learning_rate": 1.7302019065548973e-05, "loss": 1.0932, "step": 2506 }, { "epoch": 0.2623757195185767, "grad_norm": 2.6958405572322697, "learning_rate": 1.7299702682753826e-05, "loss": 0.8741, "step": 2507 }, { "epoch": 0.26248037676609104, "grad_norm": 2.3931112849601615, "learning_rate": 1.7297385461208044e-05, "loss": 0.9674, "step": 2508 }, { "epoch": 0.26258503401360545, "grad_norm": 2.0885169426285652, "learning_rate": 1.7295067401177877e-05, "loss": 1.0842, "step": 2509 }, { "epoch": 0.26268969126111985, "grad_norm": 2.3250017262626503, "learning_rate": 1.7292748502929678e-05, "loss": 1.012, "step": 2510 }, { "epoch": 0.2627943485086342, "grad_norm": 2.379541018104846, "learning_rate": 1.7290428766729893e-05, "loss": 1.1306, "step": 2511 }, { "epoch": 0.2628990057561486, "grad_norm": 2.1706941809048153, "learning_rate": 1.728810819284506e-05, "loss": 0.9991, "step": 2512 }, { "epoch": 0.263003663003663, "grad_norm": 2.075305628554083, "learning_rate": 1.7285786781541825e-05, "loss": 1.0348, "step": 2513 }, { "epoch": 0.2631083202511774, "grad_norm": 2.0890810561710933, "learning_rate": 1.7283464533086917e-05, "loss": 0.8281, "step": 2514 }, { "epoch": 0.26321297749869177, "grad_norm": 2.3435013116193035, "learning_rate": 1.728114144774717e-05, "loss": 1.0713, "step": 2515 }, { "epoch": 0.2633176347462062, "grad_norm": 2.1669632646702155, "learning_rate": 1.7278817525789513e-05, "loss": 1.1107, "step": 2516 }, { "epoch": 0.2634222919937206, "grad_norm": 2.171445878531362, "learning_rate": 1.7276492767480962e-05, "loss": 0.9642, "step": 2517 }, { "epoch": 0.26352694924123493, "grad_norm": 2.417950887212835, "learning_rate": 1.7274167173088643e-05, "loss": 1.0002, "step": 2518 }, { "epoch": 0.26363160648874934, "grad_norm": 2.0843255174378092, "learning_rate": 1.727184074287977e-05, "loss": 1.0267, "step": 2519 }, { "epoch": 0.26373626373626374, "grad_norm": 2.0538455916227134, "learning_rate": 1.7269513477121652e-05, "loss": 0.8743, "step": 2520 }, { "epoch": 0.26384092098377815, "grad_norm": 2.1773574455703892, "learning_rate": 1.7267185376081702e-05, "loss": 0.9744, "step": 2521 }, { "epoch": 0.2639455782312925, "grad_norm": 2.5975951297910167, "learning_rate": 1.7264856440027417e-05, "loss": 0.9104, "step": 2522 }, { "epoch": 0.2640502354788069, "grad_norm": 2.03025817791244, "learning_rate": 1.72625266692264e-05, "loss": 0.987, "step": 2523 }, { "epoch": 0.2641548927263213, "grad_norm": 2.549525974397687, "learning_rate": 1.726019606394635e-05, "loss": 1.0157, "step": 2524 }, { "epoch": 0.26425954997383566, "grad_norm": 3.248084272655294, "learning_rate": 1.7257864624455048e-05, "loss": 1.0282, "step": 2525 }, { "epoch": 0.26436420722135007, "grad_norm": 2.6468935665860807, "learning_rate": 1.7255532351020393e-05, "loss": 0.9803, "step": 2526 }, { "epoch": 0.2644688644688645, "grad_norm": 2.088664398222349, "learning_rate": 1.7253199243910357e-05, "loss": 1.1068, "step": 2527 }, { "epoch": 0.2645735217163789, "grad_norm": 2.181269418325987, "learning_rate": 1.725086530339303e-05, "loss": 0.9588, "step": 2528 }, { "epoch": 0.26467817896389323, "grad_norm": 1.9358065140447658, "learning_rate": 1.7248530529736575e-05, "loss": 0.9188, "step": 2529 }, { "epoch": 0.26478283621140764, "grad_norm": 2.182930759674411, "learning_rate": 1.724619492320927e-05, "loss": 1.1289, "step": 2530 }, { "epoch": 0.26488749345892204, "grad_norm": 2.1868917273765227, "learning_rate": 1.724385848407948e-05, "loss": 0.9698, "step": 2531 }, { "epoch": 0.26499215070643645, "grad_norm": 2.0248128570915918, "learning_rate": 1.7241521212615663e-05, "loss": 0.9791, "step": 2532 }, { "epoch": 0.2650968079539508, "grad_norm": 2.4684093554379167, "learning_rate": 1.7239183109086384e-05, "loss": 1.0178, "step": 2533 }, { "epoch": 0.2652014652014652, "grad_norm": 2.3248588536485535, "learning_rate": 1.7236844173760286e-05, "loss": 0.9927, "step": 2534 }, { "epoch": 0.2653061224489796, "grad_norm": 2.3426825101423945, "learning_rate": 1.7234504406906124e-05, "loss": 1.0613, "step": 2535 }, { "epoch": 0.26541077969649396, "grad_norm": 2.1367495439409643, "learning_rate": 1.7232163808792733e-05, "loss": 1.0705, "step": 2536 }, { "epoch": 0.26551543694400837, "grad_norm": 2.3408082854159264, "learning_rate": 1.7229822379689067e-05, "loss": 1.1775, "step": 2537 }, { "epoch": 0.26562009419152277, "grad_norm": 2.2416930097749646, "learning_rate": 1.722748011986415e-05, "loss": 1.0386, "step": 2538 }, { "epoch": 0.2657247514390372, "grad_norm": 2.1645588725679112, "learning_rate": 1.7225137029587115e-05, "loss": 0.9904, "step": 2539 }, { "epoch": 0.2658294086865515, "grad_norm": 2.1423029600647925, "learning_rate": 1.7222793109127193e-05, "loss": 0.987, "step": 2540 }, { "epoch": 0.26593406593406593, "grad_norm": 2.360699966256053, "learning_rate": 1.7220448358753693e-05, "loss": 1.0804, "step": 2541 }, { "epoch": 0.26603872318158034, "grad_norm": 2.484820983087851, "learning_rate": 1.7218102778736046e-05, "loss": 1.1052, "step": 2542 }, { "epoch": 0.2661433804290947, "grad_norm": 2.5559902418339298, "learning_rate": 1.7215756369343755e-05, "loss": 1.166, "step": 2543 }, { "epoch": 0.2662480376766091, "grad_norm": 2.273062565811467, "learning_rate": 1.721340913084643e-05, "loss": 1.0314, "step": 2544 }, { "epoch": 0.2663526949241235, "grad_norm": 2.390606522743288, "learning_rate": 1.721106106351377e-05, "loss": 1.0458, "step": 2545 }, { "epoch": 0.2664573521716379, "grad_norm": 2.3461424836775313, "learning_rate": 1.720871216761558e-05, "loss": 0.9914, "step": 2546 }, { "epoch": 0.26656200941915226, "grad_norm": 2.1093537321214333, "learning_rate": 1.7206362443421742e-05, "loss": 0.9635, "step": 2547 }, { "epoch": 0.26666666666666666, "grad_norm": 2.2076603089511333, "learning_rate": 1.7204011891202256e-05, "loss": 0.8798, "step": 2548 }, { "epoch": 0.26677132391418107, "grad_norm": 2.1198208472221376, "learning_rate": 1.7201660511227195e-05, "loss": 1.0799, "step": 2549 }, { "epoch": 0.2668759811616955, "grad_norm": 2.257648787705932, "learning_rate": 1.7199308303766745e-05, "loss": 1.1216, "step": 2550 }, { "epoch": 0.2669806384092098, "grad_norm": 2.167568660205902, "learning_rate": 1.7196955269091175e-05, "loss": 0.9121, "step": 2551 }, { "epoch": 0.26708529565672423, "grad_norm": 2.070104233631567, "learning_rate": 1.7194601407470857e-05, "loss": 1.1141, "step": 2552 }, { "epoch": 0.26718995290423864, "grad_norm": 2.121285217113809, "learning_rate": 1.719224671917625e-05, "loss": 1.0316, "step": 2553 }, { "epoch": 0.267294610151753, "grad_norm": 2.1816334225398997, "learning_rate": 1.718989120447792e-05, "loss": 0.8018, "step": 2554 }, { "epoch": 0.2673992673992674, "grad_norm": 2.2372260823335557, "learning_rate": 1.718753486364651e-05, "loss": 0.936, "step": 2555 }, { "epoch": 0.2675039246467818, "grad_norm": 2.1411788143946895, "learning_rate": 1.7185177696952773e-05, "loss": 1.0857, "step": 2556 }, { "epoch": 0.2676085818942962, "grad_norm": 2.06941219144131, "learning_rate": 1.7182819704667552e-05, "loss": 0.9053, "step": 2557 }, { "epoch": 0.26771323914181056, "grad_norm": 2.152176539140967, "learning_rate": 1.718046088706179e-05, "loss": 0.9493, "step": 2558 }, { "epoch": 0.26781789638932496, "grad_norm": 2.09104261234392, "learning_rate": 1.7178101244406512e-05, "loss": 1.0374, "step": 2559 }, { "epoch": 0.26792255363683937, "grad_norm": 2.346305270831672, "learning_rate": 1.7175740776972855e-05, "loss": 0.9768, "step": 2560 }, { "epoch": 0.2680272108843537, "grad_norm": 2.2927895598403847, "learning_rate": 1.7173379485032028e-05, "loss": 0.939, "step": 2561 }, { "epoch": 0.2681318681318681, "grad_norm": 2.3628912312336148, "learning_rate": 1.7171017368855363e-05, "loss": 1.0344, "step": 2562 }, { "epoch": 0.26823652537938253, "grad_norm": 2.210950954427031, "learning_rate": 1.7168654428714262e-05, "loss": 1.0587, "step": 2563 }, { "epoch": 0.26834118262689693, "grad_norm": 2.4183417784541517, "learning_rate": 1.7166290664880234e-05, "loss": 0.9456, "step": 2564 }, { "epoch": 0.2684458398744113, "grad_norm": 2.123364599494718, "learning_rate": 1.7163926077624886e-05, "loss": 0.9227, "step": 2565 }, { "epoch": 0.2685504971219257, "grad_norm": 2.1478356534162586, "learning_rate": 1.7161560667219907e-05, "loss": 0.9954, "step": 2566 }, { "epoch": 0.2686551543694401, "grad_norm": 2.2147876235241797, "learning_rate": 1.7159194433937087e-05, "loss": 0.9726, "step": 2567 }, { "epoch": 0.26875981161695445, "grad_norm": 2.1406444243414526, "learning_rate": 1.7156827378048313e-05, "loss": 0.9457, "step": 2568 }, { "epoch": 0.26886446886446885, "grad_norm": 1.979769022328967, "learning_rate": 1.7154459499825564e-05, "loss": 0.9121, "step": 2569 }, { "epoch": 0.26896912611198326, "grad_norm": 2.0747397671929626, "learning_rate": 1.715209079954092e-05, "loss": 1.0564, "step": 2570 }, { "epoch": 0.26907378335949766, "grad_norm": 2.2405940574201164, "learning_rate": 1.7149721277466537e-05, "loss": 1.0748, "step": 2571 }, { "epoch": 0.269178440607012, "grad_norm": 2.38814683134122, "learning_rate": 1.7147350933874693e-05, "loss": 0.9308, "step": 2572 }, { "epoch": 0.2692830978545264, "grad_norm": 2.1247668633842394, "learning_rate": 1.7144979769037732e-05, "loss": 1.0777, "step": 2573 }, { "epoch": 0.2693877551020408, "grad_norm": 2.262069730010127, "learning_rate": 1.714260778322811e-05, "loss": 1.0581, "step": 2574 }, { "epoch": 0.26949241234955523, "grad_norm": 2.184376285021379, "learning_rate": 1.7140234976718376e-05, "loss": 1.0185, "step": 2575 }, { "epoch": 0.2695970695970696, "grad_norm": 2.267123524429682, "learning_rate": 1.7137861349781172e-05, "loss": 0.9554, "step": 2576 }, { "epoch": 0.269701726844584, "grad_norm": 2.36727063416499, "learning_rate": 1.7135486902689226e-05, "loss": 1.0104, "step": 2577 }, { "epoch": 0.2698063840920984, "grad_norm": 2.0589416272264223, "learning_rate": 1.713311163571537e-05, "loss": 1.0443, "step": 2578 }, { "epoch": 0.26991104133961275, "grad_norm": 1.902080690208934, "learning_rate": 1.7130735549132528e-05, "loss": 1.0204, "step": 2579 }, { "epoch": 0.27001569858712715, "grad_norm": 2.4326326349397838, "learning_rate": 1.7128358643213715e-05, "loss": 1.0687, "step": 2580 }, { "epoch": 0.27012035583464156, "grad_norm": 2.068315109987545, "learning_rate": 1.7125980918232043e-05, "loss": 1.0579, "step": 2581 }, { "epoch": 0.27022501308215596, "grad_norm": 2.2810820908724567, "learning_rate": 1.712360237446072e-05, "loss": 1.0034, "step": 2582 }, { "epoch": 0.2703296703296703, "grad_norm": 2.16614232204801, "learning_rate": 1.7121223012173037e-05, "loss": 1.0208, "step": 2583 }, { "epoch": 0.2704343275771847, "grad_norm": 2.3587680319750266, "learning_rate": 1.71188428316424e-05, "loss": 0.9352, "step": 2584 }, { "epoch": 0.2705389848246991, "grad_norm": 2.1472264377221766, "learning_rate": 1.711646183314229e-05, "loss": 1.0184, "step": 2585 }, { "epoch": 0.2706436420722135, "grad_norm": 2.114763044771736, "learning_rate": 1.711408001694628e-05, "loss": 0.9823, "step": 2586 }, { "epoch": 0.2707482993197279, "grad_norm": 2.241676810783072, "learning_rate": 1.7111697383328066e-05, "loss": 1.0956, "step": 2587 }, { "epoch": 0.2708529565672423, "grad_norm": 2.1020077851868635, "learning_rate": 1.71093139325614e-05, "loss": 1.0291, "step": 2588 }, { "epoch": 0.2709576138147567, "grad_norm": 2.022324455238823, "learning_rate": 1.710692966492015e-05, "loss": 0.8429, "step": 2589 }, { "epoch": 0.27106227106227104, "grad_norm": 2.112019727111849, "learning_rate": 1.7104544580678276e-05, "loss": 1.0672, "step": 2590 }, { "epoch": 0.27116692830978545, "grad_norm": 2.2294841470996736, "learning_rate": 1.7102158680109827e-05, "loss": 1.0076, "step": 2591 }, { "epoch": 0.27127158555729985, "grad_norm": 2.0630709446164976, "learning_rate": 1.7099771963488948e-05, "loss": 0.9595, "step": 2592 }, { "epoch": 0.27137624280481426, "grad_norm": 2.17182249171102, "learning_rate": 1.709738443108988e-05, "loss": 0.9664, "step": 2593 }, { "epoch": 0.2714809000523286, "grad_norm": 1.6976481509832229, "learning_rate": 1.7094996083186947e-05, "loss": 0.8269, "step": 2594 }, { "epoch": 0.271585557299843, "grad_norm": 2.297472828435568, "learning_rate": 1.709260692005459e-05, "loss": 1.0357, "step": 2595 }, { "epoch": 0.2716902145473574, "grad_norm": 2.215787382610391, "learning_rate": 1.7090216941967314e-05, "loss": 1.0665, "step": 2596 }, { "epoch": 0.2717948717948718, "grad_norm": 1.9350048535413022, "learning_rate": 1.7087826149199735e-05, "loss": 0.9186, "step": 2597 }, { "epoch": 0.2718995290423862, "grad_norm": 2.449759441348814, "learning_rate": 1.7085434542026568e-05, "loss": 1.0463, "step": 2598 }, { "epoch": 0.2720041862899006, "grad_norm": 2.045258414864514, "learning_rate": 1.7083042120722606e-05, "loss": 0.999, "step": 2599 }, { "epoch": 0.272108843537415, "grad_norm": 2.1280959060956333, "learning_rate": 1.7080648885562746e-05, "loss": 1.0704, "step": 2600 }, { "epoch": 0.27221350078492934, "grad_norm": 2.058863219285702, "learning_rate": 1.7078254836821978e-05, "loss": 0.9272, "step": 2601 }, { "epoch": 0.27231815803244375, "grad_norm": 2.139899360821639, "learning_rate": 1.7075859974775377e-05, "loss": 0.9393, "step": 2602 }, { "epoch": 0.27242281527995815, "grad_norm": 2.4744357953899474, "learning_rate": 1.7073464299698122e-05, "loss": 1.1305, "step": 2603 }, { "epoch": 0.2725274725274725, "grad_norm": 2.070308973029564, "learning_rate": 1.7071067811865477e-05, "loss": 0.9733, "step": 2604 }, { "epoch": 0.2726321297749869, "grad_norm": 2.2526454703490364, "learning_rate": 1.706867051155281e-05, "loss": 0.9585, "step": 2605 }, { "epoch": 0.2727367870225013, "grad_norm": 2.212572397139927, "learning_rate": 1.7066272399035568e-05, "loss": 0.9461, "step": 2606 }, { "epoch": 0.2728414442700157, "grad_norm": 2.263865126136771, "learning_rate": 1.70638734745893e-05, "loss": 1.091, "step": 2607 }, { "epoch": 0.27294610151753007, "grad_norm": 2.1151782209613854, "learning_rate": 1.7061473738489655e-05, "loss": 0.8887, "step": 2608 }, { "epoch": 0.2730507587650445, "grad_norm": 2.246083764897805, "learning_rate": 1.705907319101236e-05, "loss": 1.1668, "step": 2609 }, { "epoch": 0.2731554160125589, "grad_norm": 2.0910504342650262, "learning_rate": 1.7056671832433246e-05, "loss": 0.9767, "step": 2610 }, { "epoch": 0.27326007326007323, "grad_norm": 1.844847039254374, "learning_rate": 1.7054269663028232e-05, "loss": 0.9653, "step": 2611 }, { "epoch": 0.27336473050758764, "grad_norm": 2.324658565103168, "learning_rate": 1.7051866683073337e-05, "loss": 1.0032, "step": 2612 }, { "epoch": 0.27346938775510204, "grad_norm": 2.3338728511235636, "learning_rate": 1.7049462892844663e-05, "loss": 1.0501, "step": 2613 }, { "epoch": 0.27357404500261645, "grad_norm": 2.3092244039265895, "learning_rate": 1.704705829261841e-05, "loss": 1.0663, "step": 2614 }, { "epoch": 0.2736787022501308, "grad_norm": 2.158121468687123, "learning_rate": 1.7044652882670874e-05, "loss": 0.9789, "step": 2615 }, { "epoch": 0.2737833594976452, "grad_norm": 2.4459807471934174, "learning_rate": 1.704224666327844e-05, "loss": 0.9862, "step": 2616 }, { "epoch": 0.2738880167451596, "grad_norm": 1.97502784186903, "learning_rate": 1.703983963471759e-05, "loss": 0.9387, "step": 2617 }, { "epoch": 0.273992673992674, "grad_norm": 2.2367807182106834, "learning_rate": 1.703743179726489e-05, "loss": 0.9994, "step": 2618 }, { "epoch": 0.27409733124018837, "grad_norm": 1.8582838838815607, "learning_rate": 1.703502315119702e-05, "loss": 0.969, "step": 2619 }, { "epoch": 0.2742019884877028, "grad_norm": 2.021019921533219, "learning_rate": 1.7032613696790718e-05, "loss": 1.0694, "step": 2620 }, { "epoch": 0.2743066457352172, "grad_norm": 2.1420561465239993, "learning_rate": 1.7030203434322852e-05, "loss": 1.1157, "step": 2621 }, { "epoch": 0.27441130298273153, "grad_norm": 2.192997753489374, "learning_rate": 1.7027792364070358e-05, "loss": 1.1059, "step": 2622 }, { "epoch": 0.27451596023024594, "grad_norm": 2.363367853081556, "learning_rate": 1.7025380486310275e-05, "loss": 1.024, "step": 2623 }, { "epoch": 0.27462061747776034, "grad_norm": 2.0560880376217803, "learning_rate": 1.702296780131973e-05, "loss": 0.9622, "step": 2624 }, { "epoch": 0.27472527472527475, "grad_norm": 2.0836175068267004, "learning_rate": 1.7020554309375947e-05, "loss": 0.8943, "step": 2625 }, { "epoch": 0.2748299319727891, "grad_norm": 2.1909340338926797, "learning_rate": 1.7018140010756246e-05, "loss": 1.0327, "step": 2626 }, { "epoch": 0.2749345892203035, "grad_norm": 2.5561649979411385, "learning_rate": 1.7015724905738025e-05, "loss": 0.9729, "step": 2627 }, { "epoch": 0.2750392464678179, "grad_norm": 2.0793636772474717, "learning_rate": 1.7013308994598796e-05, "loss": 0.8657, "step": 2628 }, { "epoch": 0.27514390371533226, "grad_norm": 2.092569812266147, "learning_rate": 1.701089227761614e-05, "loss": 0.9508, "step": 2629 }, { "epoch": 0.27524856096284667, "grad_norm": 2.0132533158168804, "learning_rate": 1.7008474755067755e-05, "loss": 0.938, "step": 2630 }, { "epoch": 0.2753532182103611, "grad_norm": 1.926555041783822, "learning_rate": 1.700605642723141e-05, "loss": 1.0228, "step": 2631 }, { "epoch": 0.2754578754578755, "grad_norm": 2.234682199384651, "learning_rate": 1.700363729438498e-05, "loss": 1.091, "step": 2632 }, { "epoch": 0.27556253270538983, "grad_norm": 2.424151503612507, "learning_rate": 1.7001217356806424e-05, "loss": 1.1339, "step": 2633 }, { "epoch": 0.27566718995290423, "grad_norm": 2.3376824460058794, "learning_rate": 1.6998796614773802e-05, "loss": 0.9105, "step": 2634 }, { "epoch": 0.27577184720041864, "grad_norm": 2.057373304612056, "learning_rate": 1.6996375068565264e-05, "loss": 0.9211, "step": 2635 }, { "epoch": 0.27587650444793305, "grad_norm": 2.187999947763851, "learning_rate": 1.6993952718459044e-05, "loss": 1.0002, "step": 2636 }, { "epoch": 0.2759811616954474, "grad_norm": 3.049341917756041, "learning_rate": 1.699152956473348e-05, "loss": 1.0756, "step": 2637 }, { "epoch": 0.2760858189429618, "grad_norm": 2.014925562032507, "learning_rate": 1.6989105607666993e-05, "loss": 0.9493, "step": 2638 }, { "epoch": 0.2761904761904762, "grad_norm": 2.2153507871464293, "learning_rate": 1.6986680847538107e-05, "loss": 0.9276, "step": 2639 }, { "epoch": 0.27629513343799056, "grad_norm": 2.042229591409745, "learning_rate": 1.6984255284625425e-05, "loss": 0.9397, "step": 2640 }, { "epoch": 0.27639979068550496, "grad_norm": 2.317973974796922, "learning_rate": 1.6981828919207656e-05, "loss": 0.9842, "step": 2641 }, { "epoch": 0.27650444793301937, "grad_norm": 2.0908721172521103, "learning_rate": 1.6979401751563584e-05, "loss": 1.0226, "step": 2642 }, { "epoch": 0.2766091051805338, "grad_norm": 2.3306273872258294, "learning_rate": 1.697697378197211e-05, "loss": 0.9604, "step": 2643 }, { "epoch": 0.2767137624280481, "grad_norm": 2.037113746353718, "learning_rate": 1.69745450107122e-05, "loss": 1.0226, "step": 2644 }, { "epoch": 0.27681841967556253, "grad_norm": 2.3954710797112524, "learning_rate": 1.697211543806293e-05, "loss": 0.9924, "step": 2645 }, { "epoch": 0.27692307692307694, "grad_norm": 2.2168247323986083, "learning_rate": 1.6969685064303462e-05, "loss": 1.0382, "step": 2646 }, { "epoch": 0.2770277341705913, "grad_norm": 2.497079920708726, "learning_rate": 1.696725388971305e-05, "loss": 0.9435, "step": 2647 }, { "epoch": 0.2771323914181057, "grad_norm": 2.526133193397156, "learning_rate": 1.6964821914571046e-05, "loss": 1.1236, "step": 2648 }, { "epoch": 0.2772370486656201, "grad_norm": 2.626567114858534, "learning_rate": 1.6962389139156883e-05, "loss": 1.1044, "step": 2649 }, { "epoch": 0.2773417059131345, "grad_norm": 2.2181232399143402, "learning_rate": 1.6959955563750094e-05, "loss": 1.0042, "step": 2650 }, { "epoch": 0.27744636316064886, "grad_norm": 2.2945097994513053, "learning_rate": 1.69575211886303e-05, "loss": 0.8661, "step": 2651 }, { "epoch": 0.27755102040816326, "grad_norm": 2.310307859900674, "learning_rate": 1.6955086014077215e-05, "loss": 1.0534, "step": 2652 }, { "epoch": 0.27765567765567767, "grad_norm": 2.069211757222885, "learning_rate": 1.6952650040370652e-05, "loss": 1.034, "step": 2653 }, { "epoch": 0.277760334903192, "grad_norm": 1.9076079670724744, "learning_rate": 1.6950213267790504e-05, "loss": 0.8245, "step": 2654 }, { "epoch": 0.2778649921507064, "grad_norm": 3.6932632423547944, "learning_rate": 1.694777569661676e-05, "loss": 0.8772, "step": 2655 }, { "epoch": 0.27796964939822083, "grad_norm": 1.9051520913310178, "learning_rate": 1.6945337327129504e-05, "loss": 0.8986, "step": 2656 }, { "epoch": 0.27807430664573524, "grad_norm": 2.305637947733258, "learning_rate": 1.694289815960891e-05, "loss": 1.0223, "step": 2657 }, { "epoch": 0.2781789638932496, "grad_norm": 2.7028733357321615, "learning_rate": 1.6940458194335243e-05, "loss": 0.9702, "step": 2658 }, { "epoch": 0.278283621140764, "grad_norm": 2.390388801259551, "learning_rate": 1.693801743158886e-05, "loss": 0.9889, "step": 2659 }, { "epoch": 0.2783882783882784, "grad_norm": 2.2381841916149066, "learning_rate": 1.693557587165021e-05, "loss": 1.0082, "step": 2660 }, { "epoch": 0.2784929356357928, "grad_norm": 2.071420896938347, "learning_rate": 1.693313351479983e-05, "loss": 1.0009, "step": 2661 }, { "epoch": 0.27859759288330715, "grad_norm": 2.446957221763722, "learning_rate": 1.693069036131836e-05, "loss": 1.0082, "step": 2662 }, { "epoch": 0.27870225013082156, "grad_norm": 2.105944137294003, "learning_rate": 1.692824641148651e-05, "loss": 0.8971, "step": 2663 }, { "epoch": 0.27880690737833597, "grad_norm": 2.3614811300833884, "learning_rate": 1.6925801665585103e-05, "loss": 0.9576, "step": 2664 }, { "epoch": 0.2789115646258503, "grad_norm": 2.058948239976716, "learning_rate": 1.692335612389505e-05, "loss": 0.9072, "step": 2665 }, { "epoch": 0.2790162218733647, "grad_norm": 2.2051843696736153, "learning_rate": 1.692090978669734e-05, "loss": 0.9603, "step": 2666 }, { "epoch": 0.27912087912087913, "grad_norm": 2.2044668414956456, "learning_rate": 1.6918462654273063e-05, "loss": 1.0132, "step": 2667 }, { "epoch": 0.27922553636839353, "grad_norm": 2.388456279940295, "learning_rate": 1.6916014726903408e-05, "loss": 0.9757, "step": 2668 }, { "epoch": 0.2793301936159079, "grad_norm": 2.1759554884870957, "learning_rate": 1.6913566004869637e-05, "loss": 1.0602, "step": 2669 }, { "epoch": 0.2794348508634223, "grad_norm": 2.4236744464592825, "learning_rate": 1.6911116488453118e-05, "loss": 1.096, "step": 2670 }, { "epoch": 0.2795395081109367, "grad_norm": 2.354246032793071, "learning_rate": 1.69086661779353e-05, "loss": 0.8912, "step": 2671 }, { "epoch": 0.27964416535845105, "grad_norm": 1.9463786670522631, "learning_rate": 1.6906215073597736e-05, "loss": 1.0506, "step": 2672 }, { "epoch": 0.27974882260596545, "grad_norm": 2.3854108573613213, "learning_rate": 1.690376317572206e-05, "loss": 1.0731, "step": 2673 }, { "epoch": 0.27985347985347986, "grad_norm": 2.287680313241345, "learning_rate": 1.690131048459e-05, "loss": 1.0453, "step": 2674 }, { "epoch": 0.27995813710099426, "grad_norm": 2.23259788225129, "learning_rate": 1.6898857000483375e-05, "loss": 1.039, "step": 2675 }, { "epoch": 0.2800627943485086, "grad_norm": 2.4640669787362626, "learning_rate": 1.6896402723684095e-05, "loss": 1.035, "step": 2676 }, { "epoch": 0.280167451596023, "grad_norm": 2.226817030735215, "learning_rate": 1.689394765447416e-05, "loss": 1.0474, "step": 2677 }, { "epoch": 0.2802721088435374, "grad_norm": 1.9193868173180237, "learning_rate": 1.6891491793135663e-05, "loss": 1.0085, "step": 2678 }, { "epoch": 0.28037676609105183, "grad_norm": 2.1659873373450838, "learning_rate": 1.6889035139950795e-05, "loss": 0.9146, "step": 2679 }, { "epoch": 0.2804814233385662, "grad_norm": 2.57303138488225, "learning_rate": 1.6886577695201816e-05, "loss": 1.1251, "step": 2680 }, { "epoch": 0.2805860805860806, "grad_norm": 2.042240644425295, "learning_rate": 1.6884119459171104e-05, "loss": 0.9829, "step": 2681 }, { "epoch": 0.280690737833595, "grad_norm": 1.931077914764023, "learning_rate": 1.688166043214111e-05, "loss": 1.0262, "step": 2682 }, { "epoch": 0.28079539508110934, "grad_norm": 2.6511446782137083, "learning_rate": 1.687920061439438e-05, "loss": 1.0604, "step": 2683 }, { "epoch": 0.28090005232862375, "grad_norm": 2.3009554948835698, "learning_rate": 1.6876740006213556e-05, "loss": 0.8862, "step": 2684 }, { "epoch": 0.28100470957613816, "grad_norm": 2.4972612313324274, "learning_rate": 1.6874278607881362e-05, "loss": 0.9708, "step": 2685 }, { "epoch": 0.28110936682365256, "grad_norm": 2.156106891119093, "learning_rate": 1.687181641968062e-05, "loss": 0.9762, "step": 2686 }, { "epoch": 0.2812140240711669, "grad_norm": 2.211635752927475, "learning_rate": 1.6869353441894245e-05, "loss": 1.0014, "step": 2687 }, { "epoch": 0.2813186813186813, "grad_norm": 2.4317542466014284, "learning_rate": 1.6866889674805233e-05, "loss": 0.9357, "step": 2688 }, { "epoch": 0.2814233385661957, "grad_norm": 2.4931495962019987, "learning_rate": 1.686442511869667e-05, "loss": 0.9232, "step": 2689 }, { "epoch": 0.2815279958137101, "grad_norm": 2.1696324074929394, "learning_rate": 1.6861959773851754e-05, "loss": 0.8919, "step": 2690 }, { "epoch": 0.2816326530612245, "grad_norm": 2.168218378578173, "learning_rate": 1.685949364055375e-05, "loss": 1.1053, "step": 2691 }, { "epoch": 0.2817373103087389, "grad_norm": 2.334913438897909, "learning_rate": 1.6857026719086014e-05, "loss": 1.0497, "step": 2692 }, { "epoch": 0.2818419675562533, "grad_norm": 2.307342605366626, "learning_rate": 1.6854559009732006e-05, "loss": 1.0982, "step": 2693 }, { "epoch": 0.28194662480376764, "grad_norm": 2.11751636176187, "learning_rate": 1.685209051277528e-05, "loss": 0.9288, "step": 2694 }, { "epoch": 0.28205128205128205, "grad_norm": 1.702869333805771, "learning_rate": 1.684962122849946e-05, "loss": 0.8285, "step": 2695 }, { "epoch": 0.28215593929879645, "grad_norm": 2.5478588197388983, "learning_rate": 1.6847151157188274e-05, "loss": 1.1483, "step": 2696 }, { "epoch": 0.2822605965463108, "grad_norm": 1.6423189951148351, "learning_rate": 1.6844680299125542e-05, "loss": 0.8226, "step": 2697 }, { "epoch": 0.2823652537938252, "grad_norm": 2.7051398620041796, "learning_rate": 1.6842208654595164e-05, "loss": 1.1181, "step": 2698 }, { "epoch": 0.2824699110413396, "grad_norm": 2.328813950042279, "learning_rate": 1.6839736223881144e-05, "loss": 1.0891, "step": 2699 }, { "epoch": 0.282574568288854, "grad_norm": 2.2355207375652766, "learning_rate": 1.6837263007267567e-05, "loss": 0.8753, "step": 2700 }, { "epoch": 0.2826792255363684, "grad_norm": 2.395946624547171, "learning_rate": 1.683478900503861e-05, "loss": 1.0681, "step": 2701 }, { "epoch": 0.2827838827838828, "grad_norm": 1.8895809027956487, "learning_rate": 1.6832314217478538e-05, "loss": 0.966, "step": 2702 }, { "epoch": 0.2828885400313972, "grad_norm": 2.108842553403623, "learning_rate": 1.6829838644871716e-05, "loss": 0.9917, "step": 2703 }, { "epoch": 0.2829931972789116, "grad_norm": 1.9546264051078577, "learning_rate": 1.6827362287502583e-05, "loss": 0.9654, "step": 2704 }, { "epoch": 0.28309785452642594, "grad_norm": 2.1750684526478095, "learning_rate": 1.6824885145655685e-05, "loss": 0.9892, "step": 2705 }, { "epoch": 0.28320251177394035, "grad_norm": 2.2396694828655472, "learning_rate": 1.6822407219615646e-05, "loss": 0.8008, "step": 2706 }, { "epoch": 0.28330716902145475, "grad_norm": 2.0811467507416777, "learning_rate": 1.6819928509667193e-05, "loss": 1.0043, "step": 2707 }, { "epoch": 0.2834118262689691, "grad_norm": 2.156062381671543, "learning_rate": 1.6817449016095124e-05, "loss": 0.8708, "step": 2708 }, { "epoch": 0.2835164835164835, "grad_norm": 2.2971033102764524, "learning_rate": 1.681496873918434e-05, "loss": 0.9787, "step": 2709 }, { "epoch": 0.2836211407639979, "grad_norm": 1.8028079138511994, "learning_rate": 1.681248767921984e-05, "loss": 0.7918, "step": 2710 }, { "epoch": 0.2837257980115123, "grad_norm": 1.9797359746272927, "learning_rate": 1.6810005836486693e-05, "loss": 0.9433, "step": 2711 }, { "epoch": 0.28383045525902667, "grad_norm": 2.1233134417416246, "learning_rate": 1.6807523211270065e-05, "loss": 1.1024, "step": 2712 }, { "epoch": 0.2839351125065411, "grad_norm": 2.0119377968605217, "learning_rate": 1.6805039803855225e-05, "loss": 1.0896, "step": 2713 }, { "epoch": 0.2840397697540555, "grad_norm": 2.1762408296082083, "learning_rate": 1.6802555614527513e-05, "loss": 1.1555, "step": 2714 }, { "epoch": 0.28414442700156983, "grad_norm": 2.0014014481336195, "learning_rate": 1.6800070643572374e-05, "loss": 1.0324, "step": 2715 }, { "epoch": 0.28424908424908424, "grad_norm": 2.2844238601393676, "learning_rate": 1.679758489127533e-05, "loss": 1.0019, "step": 2716 }, { "epoch": 0.28435374149659864, "grad_norm": 2.107100861748374, "learning_rate": 1.6795098357922004e-05, "loss": 0.8909, "step": 2717 }, { "epoch": 0.28445839874411305, "grad_norm": 2.309820312590304, "learning_rate": 1.67926110437981e-05, "loss": 1.0038, "step": 2718 }, { "epoch": 0.2845630559916274, "grad_norm": 2.9601375842932005, "learning_rate": 1.679012294918942e-05, "loss": 1.0204, "step": 2719 }, { "epoch": 0.2846677132391418, "grad_norm": 2.301335233810261, "learning_rate": 1.6787634074381844e-05, "loss": 1.1004, "step": 2720 }, { "epoch": 0.2847723704866562, "grad_norm": 2.2912609106687367, "learning_rate": 1.6785144419661356e-05, "loss": 0.8342, "step": 2721 }, { "epoch": 0.2848770277341706, "grad_norm": 2.2352473794305903, "learning_rate": 1.678265398531402e-05, "loss": 0.9728, "step": 2722 }, { "epoch": 0.28498168498168497, "grad_norm": 2.378174318133523, "learning_rate": 1.6780162771625987e-05, "loss": 1.0774, "step": 2723 }, { "epoch": 0.2850863422291994, "grad_norm": 2.18094544554846, "learning_rate": 1.6777670778883507e-05, "loss": 1.049, "step": 2724 }, { "epoch": 0.2851909994767138, "grad_norm": 2.4504749993683723, "learning_rate": 1.6775178007372915e-05, "loss": 0.8661, "step": 2725 }, { "epoch": 0.28529565672422813, "grad_norm": 2.282848626968931, "learning_rate": 1.677268445738064e-05, "loss": 1.0587, "step": 2726 }, { "epoch": 0.28540031397174254, "grad_norm": 2.2046921458707653, "learning_rate": 1.677019012919319e-05, "loss": 0.8776, "step": 2727 }, { "epoch": 0.28550497121925694, "grad_norm": 1.9336248225640682, "learning_rate": 1.6767695023097164e-05, "loss": 0.9612, "step": 2728 }, { "epoch": 0.28560962846677135, "grad_norm": 2.1614112374322523, "learning_rate": 1.6765199139379265e-05, "loss": 1.0527, "step": 2729 }, { "epoch": 0.2857142857142857, "grad_norm": 2.026874347225188, "learning_rate": 1.676270247832627e-05, "loss": 0.9901, "step": 2730 }, { "epoch": 0.2858189429618001, "grad_norm": 2.5279801213743056, "learning_rate": 1.6760205040225052e-05, "loss": 1.0751, "step": 2731 }, { "epoch": 0.2859236002093145, "grad_norm": 2.408066662111667, "learning_rate": 1.6757706825362565e-05, "loss": 1.0041, "step": 2732 }, { "epoch": 0.28602825745682886, "grad_norm": 2.2561275528825693, "learning_rate": 1.675520783402587e-05, "loss": 1.0136, "step": 2733 }, { "epoch": 0.28613291470434327, "grad_norm": 2.079863026171039, "learning_rate": 1.6752708066502097e-05, "loss": 1.0614, "step": 2734 }, { "epoch": 0.28623757195185767, "grad_norm": 2.081290751507961, "learning_rate": 1.675020752307848e-05, "loss": 1.0573, "step": 2735 }, { "epoch": 0.2863422291993721, "grad_norm": 2.030844176643617, "learning_rate": 1.6747706204042335e-05, "loss": 1.1131, "step": 2736 }, { "epoch": 0.28644688644688643, "grad_norm": 2.1050959828240936, "learning_rate": 1.6745204109681064e-05, "loss": 0.9127, "step": 2737 }, { "epoch": 0.28655154369440083, "grad_norm": 2.055509983409726, "learning_rate": 1.6742701240282174e-05, "loss": 0.9553, "step": 2738 }, { "epoch": 0.28665620094191524, "grad_norm": 2.137470369141178, "learning_rate": 1.6740197596133238e-05, "loss": 1.0597, "step": 2739 }, { "epoch": 0.2867608581894296, "grad_norm": 2.048361552464316, "learning_rate": 1.6737693177521936e-05, "loss": 0.8446, "step": 2740 }, { "epoch": 0.286865515436944, "grad_norm": 2.12112325524762, "learning_rate": 1.673518798473603e-05, "loss": 1.0618, "step": 2741 }, { "epoch": 0.2869701726844584, "grad_norm": 2.6242506865312363, "learning_rate": 1.6732682018063368e-05, "loss": 0.8252, "step": 2742 }, { "epoch": 0.2870748299319728, "grad_norm": 3.0996650803222674, "learning_rate": 1.6730175277791895e-05, "loss": 1.0003, "step": 2743 }, { "epoch": 0.28717948717948716, "grad_norm": 2.350027250748929, "learning_rate": 1.6727667764209638e-05, "loss": 0.9309, "step": 2744 }, { "epoch": 0.28728414442700156, "grad_norm": 2.0077156617023855, "learning_rate": 1.6725159477604716e-05, "loss": 1.0252, "step": 2745 }, { "epoch": 0.28738880167451597, "grad_norm": 2.635741023964438, "learning_rate": 1.672265041826534e-05, "loss": 0.8513, "step": 2746 }, { "epoch": 0.2874934589220304, "grad_norm": 2.2433336022783172, "learning_rate": 1.67201405864798e-05, "loss": 1.0492, "step": 2747 }, { "epoch": 0.2875981161695447, "grad_norm": 2.0807771153561547, "learning_rate": 1.6717629982536484e-05, "loss": 0.9764, "step": 2748 }, { "epoch": 0.28770277341705913, "grad_norm": 2.3123250176131807, "learning_rate": 1.6715118606723867e-05, "loss": 0.9599, "step": 2749 }, { "epoch": 0.28780743066457354, "grad_norm": 2.0101181547077998, "learning_rate": 1.6712606459330503e-05, "loss": 0.8779, "step": 2750 }, { "epoch": 0.2879120879120879, "grad_norm": 2.0975900598381467, "learning_rate": 1.6710093540645056e-05, "loss": 0.9257, "step": 2751 }, { "epoch": 0.2880167451596023, "grad_norm": 1.958047612574768, "learning_rate": 1.6707579850956256e-05, "loss": 0.8114, "step": 2752 }, { "epoch": 0.2881214024071167, "grad_norm": 2.1295560514183425, "learning_rate": 1.6705065390552934e-05, "loss": 1.0102, "step": 2753 }, { "epoch": 0.2882260596546311, "grad_norm": 2.3322838337100795, "learning_rate": 1.6702550159724005e-05, "loss": 1.0251, "step": 2754 }, { "epoch": 0.28833071690214546, "grad_norm": 2.0269061447136276, "learning_rate": 1.6700034158758476e-05, "loss": 0.9773, "step": 2755 }, { "epoch": 0.28843537414965986, "grad_norm": 2.2135086571849927, "learning_rate": 1.6697517387945437e-05, "loss": 1.0397, "step": 2756 }, { "epoch": 0.28854003139717427, "grad_norm": 3.020623424201838, "learning_rate": 1.669499984757408e-05, "loss": 1.1779, "step": 2757 }, { "epoch": 0.2886446886446886, "grad_norm": 2.3913345388059675, "learning_rate": 1.669248153793366e-05, "loss": 0.9306, "step": 2758 }, { "epoch": 0.288749345892203, "grad_norm": 2.0642701710655196, "learning_rate": 1.6689962459313547e-05, "loss": 1.0335, "step": 2759 }, { "epoch": 0.28885400313971743, "grad_norm": 2.084776755214591, "learning_rate": 1.6687442612003185e-05, "loss": 0.8789, "step": 2760 }, { "epoch": 0.28895866038723184, "grad_norm": 2.1559657822579616, "learning_rate": 1.668492199629211e-05, "loss": 0.873, "step": 2761 }, { "epoch": 0.2890633176347462, "grad_norm": 2.1968258921987043, "learning_rate": 1.668240061246995e-05, "loss": 1.0585, "step": 2762 }, { "epoch": 0.2891679748822606, "grad_norm": 2.037768963343927, "learning_rate": 1.6679878460826407e-05, "loss": 1.0065, "step": 2763 }, { "epoch": 0.289272632129775, "grad_norm": 2.311144324625343, "learning_rate": 1.6677355541651292e-05, "loss": 1.0024, "step": 2764 }, { "epoch": 0.2893772893772894, "grad_norm": 2.5667946832040536, "learning_rate": 1.6674831855234486e-05, "loss": 0.9484, "step": 2765 }, { "epoch": 0.28948194662480375, "grad_norm": 2.434219899597603, "learning_rate": 1.667230740186597e-05, "loss": 1.0179, "step": 2766 }, { "epoch": 0.28958660387231816, "grad_norm": 2.7601269464079676, "learning_rate": 1.6669782181835807e-05, "loss": 1.0228, "step": 2767 }, { "epoch": 0.28969126111983257, "grad_norm": 2.5403438469183697, "learning_rate": 1.666725619543415e-05, "loss": 1.047, "step": 2768 }, { "epoch": 0.2897959183673469, "grad_norm": 2.051984141112977, "learning_rate": 1.666472944295124e-05, "loss": 0.9447, "step": 2769 }, { "epoch": 0.2899005756148613, "grad_norm": 2.512139628224453, "learning_rate": 1.666220192467741e-05, "loss": 1.1367, "step": 2770 }, { "epoch": 0.29000523286237573, "grad_norm": 2.718503682041811, "learning_rate": 1.6659673640903067e-05, "loss": 1.0922, "step": 2771 }, { "epoch": 0.29010989010989013, "grad_norm": 2.5150069168743885, "learning_rate": 1.6657144591918726e-05, "loss": 1.1432, "step": 2772 }, { "epoch": 0.2902145473574045, "grad_norm": 2.3080940442950304, "learning_rate": 1.665461477801497e-05, "loss": 1.1565, "step": 2773 }, { "epoch": 0.2903192046049189, "grad_norm": 3.162428243155795, "learning_rate": 1.6652084199482496e-05, "loss": 1.1392, "step": 2774 }, { "epoch": 0.2904238618524333, "grad_norm": 2.460964843259728, "learning_rate": 1.6649552856612056e-05, "loss": 1.0668, "step": 2775 }, { "epoch": 0.29052851909994765, "grad_norm": 2.2837818852824747, "learning_rate": 1.6647020749694513e-05, "loss": 1.0405, "step": 2776 }, { "epoch": 0.29063317634746205, "grad_norm": 2.3757375317221348, "learning_rate": 1.6644487879020812e-05, "loss": 0.988, "step": 2777 }, { "epoch": 0.29073783359497646, "grad_norm": 2.0992714934923065, "learning_rate": 1.6641954244881984e-05, "loss": 1.0678, "step": 2778 }, { "epoch": 0.29084249084249086, "grad_norm": 2.4400916221420963, "learning_rate": 1.6639419847569147e-05, "loss": 1.0578, "step": 2779 }, { "epoch": 0.2909471480900052, "grad_norm": 2.1469546303983043, "learning_rate": 1.6636884687373508e-05, "loss": 0.9257, "step": 2780 }, { "epoch": 0.2910518053375196, "grad_norm": 2.334369619059665, "learning_rate": 1.663434876458637e-05, "loss": 0.9772, "step": 2781 }, { "epoch": 0.291156462585034, "grad_norm": 2.3345181976583653, "learning_rate": 1.6631812079499106e-05, "loss": 1.0597, "step": 2782 }, { "epoch": 0.2912611198325484, "grad_norm": 2.0700463369958815, "learning_rate": 1.6629274632403193e-05, "loss": 1.0176, "step": 2783 }, { "epoch": 0.2913657770800628, "grad_norm": 2.202087466914214, "learning_rate": 1.6626736423590186e-05, "loss": 1.0216, "step": 2784 }, { "epoch": 0.2914704343275772, "grad_norm": 2.0357122851821883, "learning_rate": 1.6624197453351723e-05, "loss": 1.0036, "step": 2785 }, { "epoch": 0.2915750915750916, "grad_norm": 2.6405418320254457, "learning_rate": 1.662165772197955e-05, "loss": 1.0112, "step": 2786 }, { "epoch": 0.29167974882260594, "grad_norm": 2.1803315886051453, "learning_rate": 1.661911722976548e-05, "loss": 0.8626, "step": 2787 }, { "epoch": 0.29178440607012035, "grad_norm": 1.9949267891582323, "learning_rate": 1.6616575977001423e-05, "loss": 0.8258, "step": 2788 }, { "epoch": 0.29188906331763476, "grad_norm": 2.1296048269861965, "learning_rate": 1.661403396397937e-05, "loss": 0.9722, "step": 2789 }, { "epoch": 0.29199372056514916, "grad_norm": 1.9633811576783402, "learning_rate": 1.661149119099141e-05, "loss": 0.9531, "step": 2790 }, { "epoch": 0.2920983778126635, "grad_norm": 2.4559017455704146, "learning_rate": 1.6608947658329705e-05, "loss": 0.9185, "step": 2791 }, { "epoch": 0.2922030350601779, "grad_norm": 2.6490541995498593, "learning_rate": 1.6606403366286522e-05, "loss": 1.0208, "step": 2792 }, { "epoch": 0.2923076923076923, "grad_norm": 2.230218351877687, "learning_rate": 1.6603858315154194e-05, "loss": 0.9397, "step": 2793 }, { "epoch": 0.2924123495552067, "grad_norm": 2.386055723778366, "learning_rate": 1.660131250522516e-05, "loss": 0.8659, "step": 2794 }, { "epoch": 0.2925170068027211, "grad_norm": 2.223350214103197, "learning_rate": 1.659876593679194e-05, "loss": 1.0085, "step": 2795 }, { "epoch": 0.2926216640502355, "grad_norm": 2.362704726315233, "learning_rate": 1.6596218610147134e-05, "loss": 1.0483, "step": 2796 }, { "epoch": 0.2927263212977499, "grad_norm": 2.554013585062049, "learning_rate": 1.6593670525583437e-05, "loss": 0.9678, "step": 2797 }, { "epoch": 0.29283097854526424, "grad_norm": 2.9600036988498477, "learning_rate": 1.659112168339363e-05, "loss": 0.855, "step": 2798 }, { "epoch": 0.29293563579277865, "grad_norm": 2.1825672871247135, "learning_rate": 1.6588572083870583e-05, "loss": 1.0531, "step": 2799 }, { "epoch": 0.29304029304029305, "grad_norm": 2.0933057115237603, "learning_rate": 1.6586021727307247e-05, "loss": 1.0515, "step": 2800 }, { "epoch": 0.2931449502878074, "grad_norm": 2.400496066375719, "learning_rate": 1.658347061399666e-05, "loss": 1.0419, "step": 2801 }, { "epoch": 0.2932496075353218, "grad_norm": 2.016597627986569, "learning_rate": 1.6580918744231955e-05, "loss": 0.9104, "step": 2802 }, { "epoch": 0.2933542647828362, "grad_norm": 2.3757951069404326, "learning_rate": 1.6578366118306343e-05, "loss": 1.1151, "step": 2803 }, { "epoch": 0.2934589220303506, "grad_norm": 2.0100169684030114, "learning_rate": 1.6575812736513133e-05, "loss": 0.9798, "step": 2804 }, { "epoch": 0.29356357927786497, "grad_norm": 2.1122429080682945, "learning_rate": 1.6573258599145704e-05, "loss": 0.9442, "step": 2805 }, { "epoch": 0.2936682365253794, "grad_norm": 2.154489488864629, "learning_rate": 1.657070370649754e-05, "loss": 1.0333, "step": 2806 }, { "epoch": 0.2937728937728938, "grad_norm": 2.275200742495966, "learning_rate": 1.65681480588622e-05, "loss": 1.083, "step": 2807 }, { "epoch": 0.2938775510204082, "grad_norm": 2.1710490075798514, "learning_rate": 1.656559165653333e-05, "loss": 1.0605, "step": 2808 }, { "epoch": 0.29398220826792254, "grad_norm": 2.598428034733466, "learning_rate": 1.6563034499804672e-05, "loss": 1.0464, "step": 2809 }, { "epoch": 0.29408686551543695, "grad_norm": 2.151540858385915, "learning_rate": 1.6560476588970044e-05, "loss": 1.0399, "step": 2810 }, { "epoch": 0.29419152276295135, "grad_norm": 2.1278728592287868, "learning_rate": 1.6557917924323358e-05, "loss": 1.0988, "step": 2811 }, { "epoch": 0.2942961800104657, "grad_norm": 2.1705417888870944, "learning_rate": 1.6555358506158604e-05, "loss": 0.9765, "step": 2812 }, { "epoch": 0.2944008372579801, "grad_norm": 2.206832470090881, "learning_rate": 1.6552798334769874e-05, "loss": 0.9632, "step": 2813 }, { "epoch": 0.2945054945054945, "grad_norm": 2.1115011840786586, "learning_rate": 1.655023741045133e-05, "loss": 1.1335, "step": 2814 }, { "epoch": 0.2946101517530089, "grad_norm": 1.90252480707429, "learning_rate": 1.6547675733497226e-05, "loss": 1.0063, "step": 2815 }, { "epoch": 0.29471480900052327, "grad_norm": 2.349864203175167, "learning_rate": 1.6545113304201906e-05, "loss": 1.018, "step": 2816 }, { "epoch": 0.2948194662480377, "grad_norm": 1.9420251810407316, "learning_rate": 1.6542550122859804e-05, "loss": 0.88, "step": 2817 }, { "epoch": 0.2949241234955521, "grad_norm": 2.2700830883100767, "learning_rate": 1.6539986189765425e-05, "loss": 1.0005, "step": 2818 }, { "epoch": 0.29502878074306643, "grad_norm": 2.214309833369823, "learning_rate": 1.6537421505213377e-05, "loss": 0.9971, "step": 2819 }, { "epoch": 0.29513343799058084, "grad_norm": 2.0402371350258415, "learning_rate": 1.6534856069498345e-05, "loss": 1.0959, "step": 2820 }, { "epoch": 0.29523809523809524, "grad_norm": 2.0958531308968196, "learning_rate": 1.6532289882915104e-05, "loss": 1.0072, "step": 2821 }, { "epoch": 0.29534275248560965, "grad_norm": 1.858230244811686, "learning_rate": 1.6529722945758512e-05, "loss": 0.8212, "step": 2822 }, { "epoch": 0.295447409733124, "grad_norm": 1.817144737007705, "learning_rate": 1.6527155258323517e-05, "loss": 1.0134, "step": 2823 }, { "epoch": 0.2955520669806384, "grad_norm": 2.4276651505529454, "learning_rate": 1.652458682090515e-05, "loss": 1.0547, "step": 2824 }, { "epoch": 0.2956567242281528, "grad_norm": 2.963051911787662, "learning_rate": 1.6522017633798534e-05, "loss": 1.0579, "step": 2825 }, { "epoch": 0.2957613814756672, "grad_norm": 2.2165478606094355, "learning_rate": 1.6519447697298866e-05, "loss": 0.9951, "step": 2826 }, { "epoch": 0.29586603872318157, "grad_norm": 1.9820489456783845, "learning_rate": 1.6516877011701443e-05, "loss": 0.7579, "step": 2827 }, { "epoch": 0.295970695970696, "grad_norm": 2.211338495085747, "learning_rate": 1.651430557730164e-05, "loss": 1.0503, "step": 2828 }, { "epoch": 0.2960753532182104, "grad_norm": 2.0412669316228964, "learning_rate": 1.6511733394394922e-05, "loss": 0.8973, "step": 2829 }, { "epoch": 0.29618001046572473, "grad_norm": 1.930076486135168, "learning_rate": 1.6509160463276835e-05, "loss": 0.945, "step": 2830 }, { "epoch": 0.29628466771323914, "grad_norm": 2.2334950597171677, "learning_rate": 1.6506586784243015e-05, "loss": 0.9924, "step": 2831 }, { "epoch": 0.29638932496075354, "grad_norm": 2.3824449949926727, "learning_rate": 1.650401235758918e-05, "loss": 0.8262, "step": 2832 }, { "epoch": 0.29649398220826795, "grad_norm": 2.2784589120562795, "learning_rate": 1.6501437183611148e-05, "loss": 1.1117, "step": 2833 }, { "epoch": 0.2965986394557823, "grad_norm": 2.250028215509174, "learning_rate": 1.64988612626048e-05, "loss": 1.0606, "step": 2834 }, { "epoch": 0.2967032967032967, "grad_norm": 2.3794072494274827, "learning_rate": 1.6496284594866115e-05, "loss": 0.8615, "step": 2835 }, { "epoch": 0.2968079539508111, "grad_norm": 2.470955073403848, "learning_rate": 1.6493707180691166e-05, "loss": 0.9446, "step": 2836 }, { "epoch": 0.29691261119832546, "grad_norm": 2.1185633078780004, "learning_rate": 1.6491129020376094e-05, "loss": 1.1212, "step": 2837 }, { "epoch": 0.29701726844583987, "grad_norm": 2.40021678899163, "learning_rate": 1.6488550114217138e-05, "loss": 1.1674, "step": 2838 }, { "epoch": 0.29712192569335427, "grad_norm": 2.8426932715804276, "learning_rate": 1.6485970462510624e-05, "loss": 0.9707, "step": 2839 }, { "epoch": 0.2972265829408687, "grad_norm": 1.9555538468974023, "learning_rate": 1.6483390065552952e-05, "loss": 0.9935, "step": 2840 }, { "epoch": 0.29733124018838303, "grad_norm": 2.001436241338665, "learning_rate": 1.648080892364062e-05, "loss": 0.8675, "step": 2841 }, { "epoch": 0.29743589743589743, "grad_norm": 2.3809732845318456, "learning_rate": 1.6478227037070205e-05, "loss": 1.0029, "step": 2842 }, { "epoch": 0.29754055468341184, "grad_norm": 2.052147061815293, "learning_rate": 1.6475644406138372e-05, "loss": 0.9261, "step": 2843 }, { "epoch": 0.2976452119309262, "grad_norm": 1.8579343591461566, "learning_rate": 1.6473061031141868e-05, "loss": 0.9575, "step": 2844 }, { "epoch": 0.2977498691784406, "grad_norm": 2.0536350820054743, "learning_rate": 1.6470476912377526e-05, "loss": 0.8525, "step": 2845 }, { "epoch": 0.297854526425955, "grad_norm": 2.7957230117068432, "learning_rate": 1.646789205014227e-05, "loss": 1.0007, "step": 2846 }, { "epoch": 0.2979591836734694, "grad_norm": 1.8368027105734126, "learning_rate": 1.646530644473311e-05, "loss": 0.8189, "step": 2847 }, { "epoch": 0.29806384092098376, "grad_norm": 1.9674169831122257, "learning_rate": 1.6462720096447134e-05, "loss": 0.8723, "step": 2848 }, { "epoch": 0.29816849816849816, "grad_norm": 2.1875273033291136, "learning_rate": 1.6460133005581512e-05, "loss": 0.953, "step": 2849 }, { "epoch": 0.29827315541601257, "grad_norm": 1.9643878581413023, "learning_rate": 1.6457545172433515e-05, "loss": 0.8681, "step": 2850 }, { "epoch": 0.298377812663527, "grad_norm": 2.071057314759131, "learning_rate": 1.6454956597300486e-05, "loss": 1.0712, "step": 2851 }, { "epoch": 0.2984824699110413, "grad_norm": 3.0502621658394604, "learning_rate": 1.645236728047986e-05, "loss": 0.9732, "step": 2852 }, { "epoch": 0.29858712715855573, "grad_norm": 2.208086371273379, "learning_rate": 1.6449777222269153e-05, "loss": 1.1401, "step": 2853 }, { "epoch": 0.29869178440607014, "grad_norm": 2.374468906716569, "learning_rate": 1.6447186422965962e-05, "loss": 0.9075, "step": 2854 }, { "epoch": 0.2987964416535845, "grad_norm": 2.075865806365837, "learning_rate": 1.644459488286799e-05, "loss": 0.9824, "step": 2855 }, { "epoch": 0.2989010989010989, "grad_norm": 2.1721807151694814, "learning_rate": 1.6442002602272994e-05, "loss": 1.0686, "step": 2856 }, { "epoch": 0.2990057561486133, "grad_norm": 2.1427069344984546, "learning_rate": 1.6439409581478843e-05, "loss": 1.0767, "step": 2857 }, { "epoch": 0.2991104133961277, "grad_norm": 2.3195474794616504, "learning_rate": 1.6436815820783477e-05, "loss": 1.0688, "step": 2858 }, { "epoch": 0.29921507064364206, "grad_norm": 2.078009990693235, "learning_rate": 1.6434221320484928e-05, "loss": 1.0676, "step": 2859 }, { "epoch": 0.29931972789115646, "grad_norm": 2.652882959489745, "learning_rate": 1.64316260808813e-05, "loss": 1.1277, "step": 2860 }, { "epoch": 0.29942438513867087, "grad_norm": 1.9324618218876926, "learning_rate": 1.6429030102270802e-05, "loss": 0.9057, "step": 2861 }, { "epoch": 0.2995290423861852, "grad_norm": 2.1542787245014763, "learning_rate": 1.6426433384951707e-05, "loss": 1.0036, "step": 2862 }, { "epoch": 0.2996336996336996, "grad_norm": 2.316215732282425, "learning_rate": 1.6423835929222393e-05, "loss": 1.0189, "step": 2863 }, { "epoch": 0.29973835688121403, "grad_norm": 2.0832717770915785, "learning_rate": 1.6421237735381305e-05, "loss": 0.9312, "step": 2864 }, { "epoch": 0.29984301412872844, "grad_norm": 2.0990095053120794, "learning_rate": 1.6418638803726988e-05, "loss": 1.0452, "step": 2865 }, { "epoch": 0.2999476713762428, "grad_norm": 2.3858579539152416, "learning_rate": 1.6416039134558058e-05, "loss": 0.8431, "step": 2866 }, { "epoch": 0.3000523286237572, "grad_norm": 2.0445888077165084, "learning_rate": 1.641343872817322e-05, "loss": 0.921, "step": 2867 }, { "epoch": 0.3001569858712716, "grad_norm": 2.3463310835524225, "learning_rate": 1.6410837584871276e-05, "loss": 1.0862, "step": 2868 }, { "epoch": 0.300261643118786, "grad_norm": 2.5259232781216316, "learning_rate": 1.6408235704951098e-05, "loss": 0.9665, "step": 2869 }, { "epoch": 0.30036630036630035, "grad_norm": 2.4845918292536244, "learning_rate": 1.6405633088711646e-05, "loss": 1.0941, "step": 2870 }, { "epoch": 0.30047095761381476, "grad_norm": 2.250546185914107, "learning_rate": 1.6403029736451964e-05, "loss": 1.0009, "step": 2871 }, { "epoch": 0.30057561486132917, "grad_norm": 2.2255990588605004, "learning_rate": 1.6400425648471187e-05, "loss": 0.9602, "step": 2872 }, { "epoch": 0.3006802721088435, "grad_norm": 2.17264095954443, "learning_rate": 1.6397820825068528e-05, "loss": 0.9746, "step": 2873 }, { "epoch": 0.3007849293563579, "grad_norm": 2.425953579485797, "learning_rate": 1.6395215266543284e-05, "loss": 0.8, "step": 2874 }, { "epoch": 0.3008895866038723, "grad_norm": 2.4373639606890998, "learning_rate": 1.6392608973194847e-05, "loss": 0.9525, "step": 2875 }, { "epoch": 0.30099424385138673, "grad_norm": 2.6105194549403894, "learning_rate": 1.6390001945322674e-05, "loss": 1.0145, "step": 2876 }, { "epoch": 0.3010989010989011, "grad_norm": 2.2916573263775644, "learning_rate": 1.6387394183226327e-05, "loss": 0.9484, "step": 2877 }, { "epoch": 0.3012035583464155, "grad_norm": 2.385274366893819, "learning_rate": 1.6384785687205438e-05, "loss": 1.1407, "step": 2878 }, { "epoch": 0.3013082155939299, "grad_norm": 2.3566579851084395, "learning_rate": 1.6382176457559732e-05, "loss": 1.0001, "step": 2879 }, { "epoch": 0.30141287284144425, "grad_norm": 2.395710605917217, "learning_rate": 1.637956649458901e-05, "loss": 1.0079, "step": 2880 }, { "epoch": 0.30151753008895865, "grad_norm": 2.0298439688834753, "learning_rate": 1.637695579859317e-05, "loss": 1.0266, "step": 2881 }, { "epoch": 0.30162218733647306, "grad_norm": 2.3803391461053116, "learning_rate": 1.637434436987218e-05, "loss": 0.953, "step": 2882 }, { "epoch": 0.30172684458398746, "grad_norm": 2.339978618501876, "learning_rate": 1.6371732208726098e-05, "loss": 1.0875, "step": 2883 }, { "epoch": 0.3018315018315018, "grad_norm": 2.0575949988075752, "learning_rate": 1.6369119315455067e-05, "loss": 1.1127, "step": 2884 }, { "epoch": 0.3019361590790162, "grad_norm": 2.553619190127974, "learning_rate": 1.6366505690359316e-05, "loss": 0.8558, "step": 2885 }, { "epoch": 0.3020408163265306, "grad_norm": 2.2250332246678606, "learning_rate": 1.6363891333739153e-05, "loss": 0.9865, "step": 2886 }, { "epoch": 0.302145473574045, "grad_norm": 2.263025646780511, "learning_rate": 1.6361276245894982e-05, "loss": 0.94, "step": 2887 }, { "epoch": 0.3022501308215594, "grad_norm": 3.2202185282441733, "learning_rate": 1.6358660427127267e-05, "loss": 0.9522, "step": 2888 }, { "epoch": 0.3023547880690738, "grad_norm": 2.6014220655520686, "learning_rate": 1.6356043877736582e-05, "loss": 1.0032, "step": 2889 }, { "epoch": 0.3024594453165882, "grad_norm": 2.5626669505776145, "learning_rate": 1.635342659802357e-05, "loss": 1.0856, "step": 2890 }, { "epoch": 0.30256410256410254, "grad_norm": 2.343833892601788, "learning_rate": 1.6350808588288964e-05, "loss": 1.0161, "step": 2891 }, { "epoch": 0.30266875981161695, "grad_norm": 2.586511158457927, "learning_rate": 1.6348189848833574e-05, "loss": 1.0082, "step": 2892 }, { "epoch": 0.30277341705913136, "grad_norm": 1.7944614432349806, "learning_rate": 1.6345570379958302e-05, "loss": 0.8952, "step": 2893 }, { "epoch": 0.30287807430664576, "grad_norm": 2.2117394132793047, "learning_rate": 1.634295018196413e-05, "loss": 0.9989, "step": 2894 }, { "epoch": 0.3029827315541601, "grad_norm": 2.12160379753247, "learning_rate": 1.6340329255152124e-05, "loss": 1.0493, "step": 2895 }, { "epoch": 0.3030873888016745, "grad_norm": 2.5588546522643707, "learning_rate": 1.633770759982343e-05, "loss": 0.9269, "step": 2896 }, { "epoch": 0.3031920460491889, "grad_norm": 2.4173578432974008, "learning_rate": 1.633508521627929e-05, "loss": 0.915, "step": 2897 }, { "epoch": 0.3032967032967033, "grad_norm": 1.9462185216594179, "learning_rate": 1.633246210482101e-05, "loss": 1.0375, "step": 2898 }, { "epoch": 0.3034013605442177, "grad_norm": 3.1222882590726995, "learning_rate": 1.6329838265750005e-05, "loss": 1.0446, "step": 2899 }, { "epoch": 0.3035060177917321, "grad_norm": 2.2579929742271796, "learning_rate": 1.6327213699367746e-05, "loss": 1.0918, "step": 2900 }, { "epoch": 0.3036106750392465, "grad_norm": 2.130384120094451, "learning_rate": 1.6324588405975807e-05, "loss": 0.8925, "step": 2901 }, { "epoch": 0.30371533228676084, "grad_norm": 2.2025752462669486, "learning_rate": 1.6321962385875837e-05, "loss": 0.9963, "step": 2902 }, { "epoch": 0.30381998953427525, "grad_norm": 2.385277002826419, "learning_rate": 1.6319335639369577e-05, "loss": 1.0002, "step": 2903 }, { "epoch": 0.30392464678178965, "grad_norm": 1.9086149634590928, "learning_rate": 1.631670816675884e-05, "loss": 0.8627, "step": 2904 }, { "epoch": 0.304029304029304, "grad_norm": 2.172309116560836, "learning_rate": 1.6314079968345527e-05, "loss": 0.9886, "step": 2905 }, { "epoch": 0.3041339612768184, "grad_norm": 2.286003474124238, "learning_rate": 1.631145104443163e-05, "loss": 0.8937, "step": 2906 }, { "epoch": 0.3042386185243328, "grad_norm": 2.1054654506329804, "learning_rate": 1.630882139531921e-05, "loss": 0.9913, "step": 2907 }, { "epoch": 0.3043432757718472, "grad_norm": 2.188221004194734, "learning_rate": 1.6306191021310423e-05, "loss": 1.0191, "step": 2908 }, { "epoch": 0.30444793301936157, "grad_norm": 2.1904821257327374, "learning_rate": 1.6303559922707503e-05, "loss": 0.8218, "step": 2909 }, { "epoch": 0.304552590266876, "grad_norm": 2.5842095448369844, "learning_rate": 1.6300928099812772e-05, "loss": 0.9329, "step": 2910 }, { "epoch": 0.3046572475143904, "grad_norm": 2.132246343055798, "learning_rate": 1.6298295552928626e-05, "loss": 0.8609, "step": 2911 }, { "epoch": 0.3047619047619048, "grad_norm": 2.2252491798180514, "learning_rate": 1.6295662282357555e-05, "loss": 1.118, "step": 2912 }, { "epoch": 0.30486656200941914, "grad_norm": 2.098601354897258, "learning_rate": 1.6293028288402123e-05, "loss": 0.9224, "step": 2913 }, { "epoch": 0.30497121925693355, "grad_norm": 2.012587732978616, "learning_rate": 1.6290393571364987e-05, "loss": 1.0842, "step": 2914 }, { "epoch": 0.30507587650444795, "grad_norm": 2.3614383102118555, "learning_rate": 1.628775813154887e-05, "loss": 1.0265, "step": 2915 }, { "epoch": 0.3051805337519623, "grad_norm": 2.7878156230807134, "learning_rate": 1.6285121969256605e-05, "loss": 1.1703, "step": 2916 }, { "epoch": 0.3052851909994767, "grad_norm": 2.1636226722521856, "learning_rate": 1.6282485084791086e-05, "loss": 1.072, "step": 2917 }, { "epoch": 0.3053898482469911, "grad_norm": 1.914547100044751, "learning_rate": 1.627984747845529e-05, "loss": 0.8617, "step": 2918 }, { "epoch": 0.3054945054945055, "grad_norm": 2.4366729944284753, "learning_rate": 1.6277209150552285e-05, "loss": 1.042, "step": 2919 }, { "epoch": 0.30559916274201987, "grad_norm": 1.8211650521764042, "learning_rate": 1.627457010138523e-05, "loss": 1.0024, "step": 2920 }, { "epoch": 0.3057038199895343, "grad_norm": 2.4531522351641417, "learning_rate": 1.6271930331257345e-05, "loss": 0.9587, "step": 2921 }, { "epoch": 0.3058084772370487, "grad_norm": 2.5130079196870208, "learning_rate": 1.6269289840471952e-05, "loss": 1.0224, "step": 2922 }, { "epoch": 0.30591313448456303, "grad_norm": 1.9747172743888348, "learning_rate": 1.6266648629332448e-05, "loss": 0.9663, "step": 2923 }, { "epoch": 0.30601779173207744, "grad_norm": 2.3944082981680705, "learning_rate": 1.6264006698142318e-05, "loss": 0.9934, "step": 2924 }, { "epoch": 0.30612244897959184, "grad_norm": 2.508078569407357, "learning_rate": 1.6261364047205114e-05, "loss": 1.017, "step": 2925 }, { "epoch": 0.30622710622710625, "grad_norm": 2.33208975658998, "learning_rate": 1.6258720676824487e-05, "loss": 0.8922, "step": 2926 }, { "epoch": 0.3063317634746206, "grad_norm": 1.9714230710953324, "learning_rate": 1.625607658730417e-05, "loss": 0.9413, "step": 2927 }, { "epoch": 0.306436420722135, "grad_norm": 2.006875902861234, "learning_rate": 1.625343177894797e-05, "loss": 0.9562, "step": 2928 }, { "epoch": 0.3065410779696494, "grad_norm": 1.9744248707683965, "learning_rate": 1.625078625205978e-05, "loss": 1.0386, "step": 2929 }, { "epoch": 0.30664573521716376, "grad_norm": 2.3082227558275377, "learning_rate": 1.6248140006943577e-05, "loss": 1.1613, "step": 2930 }, { "epoch": 0.30675039246467817, "grad_norm": 2.1702890355942337, "learning_rate": 1.624549304390342e-05, "loss": 0.9356, "step": 2931 }, { "epoch": 0.3068550497121926, "grad_norm": 2.424739431310441, "learning_rate": 1.6242845363243455e-05, "loss": 1.1796, "step": 2932 }, { "epoch": 0.306959706959707, "grad_norm": 2.171158515959578, "learning_rate": 1.62401969652679e-05, "loss": 1.109, "step": 2933 }, { "epoch": 0.30706436420722133, "grad_norm": 2.186889110464127, "learning_rate": 1.6237547850281065e-05, "loss": 0.9602, "step": 2934 }, { "epoch": 0.30716902145473574, "grad_norm": 2.404453450737933, "learning_rate": 1.6234898018587336e-05, "loss": 1.0674, "step": 2935 }, { "epoch": 0.30727367870225014, "grad_norm": 2.2426489372917224, "learning_rate": 1.6232247470491188e-05, "loss": 1.0203, "step": 2936 }, { "epoch": 0.30737833594976455, "grad_norm": 2.1305208739832135, "learning_rate": 1.622959620629717e-05, "loss": 0.8933, "step": 2937 }, { "epoch": 0.3074829931972789, "grad_norm": 2.475213166949153, "learning_rate": 1.6226944226309916e-05, "loss": 0.9715, "step": 2938 }, { "epoch": 0.3075876504447933, "grad_norm": 2.229771800563122, "learning_rate": 1.6224291530834147e-05, "loss": 1.0384, "step": 2939 }, { "epoch": 0.3076923076923077, "grad_norm": 1.9303541858289048, "learning_rate": 1.6221638120174668e-05, "loss": 1.0791, "step": 2940 }, { "epoch": 0.30779696493982206, "grad_norm": 2.3662003014744437, "learning_rate": 1.6218983994636355e-05, "loss": 1.0525, "step": 2941 }, { "epoch": 0.30790162218733647, "grad_norm": 2.1079944888721127, "learning_rate": 1.621632915452417e-05, "loss": 0.9803, "step": 2942 }, { "epoch": 0.30800627943485087, "grad_norm": 2.092870856439816, "learning_rate": 1.6213673600143168e-05, "loss": 0.9583, "step": 2943 }, { "epoch": 0.3081109366823653, "grad_norm": 2.243256700536703, "learning_rate": 1.6211017331798473e-05, "loss": 0.9972, "step": 2944 }, { "epoch": 0.3082155939298796, "grad_norm": 2.205695168816815, "learning_rate": 1.6208360349795293e-05, "loss": 1.0897, "step": 2945 }, { "epoch": 0.30832025117739403, "grad_norm": 2.4776470468527103, "learning_rate": 1.620570265443892e-05, "loss": 1.0388, "step": 2946 }, { "epoch": 0.30842490842490844, "grad_norm": 2.344647528820799, "learning_rate": 1.620304424603474e-05, "loss": 1.0009, "step": 2947 }, { "epoch": 0.3085295656724228, "grad_norm": 2.108670142100681, "learning_rate": 1.6200385124888195e-05, "loss": 0.9776, "step": 2948 }, { "epoch": 0.3086342229199372, "grad_norm": 2.3408563934308897, "learning_rate": 1.6197725291304833e-05, "loss": 0.9592, "step": 2949 }, { "epoch": 0.3087388801674516, "grad_norm": 2.18058930736239, "learning_rate": 1.6195064745590267e-05, "loss": 1.049, "step": 2950 }, { "epoch": 0.308843537414966, "grad_norm": 2.1381863651278854, "learning_rate": 1.619240348805021e-05, "loss": 0.962, "step": 2951 }, { "epoch": 0.30894819466248036, "grad_norm": 1.8703885232830255, "learning_rate": 1.6189741518990433e-05, "loss": 1.0408, "step": 2952 }, { "epoch": 0.30905285190999476, "grad_norm": 2.170717558554135, "learning_rate": 1.618707883871681e-05, "loss": 1.136, "step": 2953 }, { "epoch": 0.30915750915750917, "grad_norm": 2.365760154975676, "learning_rate": 1.6184415447535284e-05, "loss": 0.9957, "step": 2954 }, { "epoch": 0.3092621664050236, "grad_norm": 2.2575554436817606, "learning_rate": 1.6181751345751887e-05, "loss": 1.0823, "step": 2955 }, { "epoch": 0.3093668236525379, "grad_norm": 2.00512183891559, "learning_rate": 1.6179086533672726e-05, "loss": 1.064, "step": 2956 }, { "epoch": 0.30947148090005233, "grad_norm": 2.0900482163912932, "learning_rate": 1.6176421011604e-05, "loss": 1.0277, "step": 2957 }, { "epoch": 0.30957613814756674, "grad_norm": 2.028018177800534, "learning_rate": 1.6173754779851978e-05, "loss": 0.9109, "step": 2958 }, { "epoch": 0.3096807953950811, "grad_norm": 1.911257078717531, "learning_rate": 1.6171087838723015e-05, "loss": 1.0144, "step": 2959 }, { "epoch": 0.3097854526425955, "grad_norm": 2.2217963538032284, "learning_rate": 1.616842018852355e-05, "loss": 0.9425, "step": 2960 }, { "epoch": 0.3098901098901099, "grad_norm": 2.59813827847896, "learning_rate": 1.61657518295601e-05, "loss": 1.0, "step": 2961 }, { "epoch": 0.3099947671376243, "grad_norm": 2.16386391537696, "learning_rate": 1.6163082762139265e-05, "loss": 0.836, "step": 2962 }, { "epoch": 0.31009942438513866, "grad_norm": 2.8465111690795064, "learning_rate": 1.616041298656773e-05, "loss": 1.008, "step": 2963 }, { "epoch": 0.31020408163265306, "grad_norm": 2.58047061290731, "learning_rate": 1.6157742503152253e-05, "loss": 0.9143, "step": 2964 }, { "epoch": 0.31030873888016747, "grad_norm": 1.9097639693984674, "learning_rate": 1.6155071312199676e-05, "loss": 0.8861, "step": 2965 }, { "epoch": 0.3104133961276818, "grad_norm": 2.124026315060564, "learning_rate": 1.615239941401693e-05, "loss": 0.965, "step": 2966 }, { "epoch": 0.3105180533751962, "grad_norm": 2.39401063448098, "learning_rate": 1.614972680891102e-05, "loss": 1.0503, "step": 2967 }, { "epoch": 0.31062271062271063, "grad_norm": 1.8182089689462924, "learning_rate": 1.6147053497189032e-05, "loss": 0.8512, "step": 2968 }, { "epoch": 0.31072736787022504, "grad_norm": 2.0677360921080115, "learning_rate": 1.6144379479158136e-05, "loss": 0.9903, "step": 2969 }, { "epoch": 0.3108320251177394, "grad_norm": 2.428228198980805, "learning_rate": 1.614170475512558e-05, "loss": 0.8599, "step": 2970 }, { "epoch": 0.3109366823652538, "grad_norm": 2.0105272211059027, "learning_rate": 1.61390293253987e-05, "loss": 0.9592, "step": 2971 }, { "epoch": 0.3110413396127682, "grad_norm": 2.323659569472944, "learning_rate": 1.6136353190284903e-05, "loss": 0.9728, "step": 2972 }, { "epoch": 0.31114599686028255, "grad_norm": 1.9635717769932388, "learning_rate": 1.6133676350091686e-05, "loss": 0.9582, "step": 2973 }, { "epoch": 0.31125065410779695, "grad_norm": 1.9068151782035103, "learning_rate": 1.6130998805126622e-05, "loss": 0.8325, "step": 2974 }, { "epoch": 0.31135531135531136, "grad_norm": 2.0598995448173314, "learning_rate": 1.6128320555697365e-05, "loss": 1.0608, "step": 2975 }, { "epoch": 0.31145996860282577, "grad_norm": 2.2373806491125463, "learning_rate": 1.6125641602111655e-05, "loss": 0.9931, "step": 2976 }, { "epoch": 0.3115646258503401, "grad_norm": 2.0920136465295918, "learning_rate": 1.6122961944677308e-05, "loss": 0.9315, "step": 2977 }, { "epoch": 0.3116692830978545, "grad_norm": 2.082366198297534, "learning_rate": 1.612028158370222e-05, "loss": 0.9886, "step": 2978 }, { "epoch": 0.3117739403453689, "grad_norm": 2.2060623386259635, "learning_rate": 1.6117600519494373e-05, "loss": 0.9608, "step": 2979 }, { "epoch": 0.31187859759288333, "grad_norm": 2.552233556749464, "learning_rate": 1.611491875236182e-05, "loss": 1.018, "step": 2980 }, { "epoch": 0.3119832548403977, "grad_norm": 2.2999204774617157, "learning_rate": 1.6112236282612705e-05, "loss": 0.9881, "step": 2981 }, { "epoch": 0.3120879120879121, "grad_norm": 2.084664368943567, "learning_rate": 1.6109553110555256e-05, "loss": 0.9935, "step": 2982 }, { "epoch": 0.3121925693354265, "grad_norm": 2.077448020127175, "learning_rate": 1.6106869236497767e-05, "loss": 0.9272, "step": 2983 }, { "epoch": 0.31229722658294085, "grad_norm": 1.9328287512609774, "learning_rate": 1.610418466074862e-05, "loss": 1.037, "step": 2984 }, { "epoch": 0.31240188383045525, "grad_norm": 2.3308588787662674, "learning_rate": 1.6101499383616284e-05, "loss": 1.0365, "step": 2985 }, { "epoch": 0.31250654107796966, "grad_norm": 2.1473404366037023, "learning_rate": 1.60988134054093e-05, "loss": 0.9637, "step": 2986 }, { "epoch": 0.31261119832548406, "grad_norm": 2.0439604173751627, "learning_rate": 1.609612672643629e-05, "loss": 0.8799, "step": 2987 }, { "epoch": 0.3127158555729984, "grad_norm": 2.285023732772858, "learning_rate": 1.609343934700596e-05, "loss": 1.0245, "step": 2988 }, { "epoch": 0.3128205128205128, "grad_norm": 2.47915241747156, "learning_rate": 1.60907512674271e-05, "loss": 1.05, "step": 2989 }, { "epoch": 0.3129251700680272, "grad_norm": 2.0809584106263057, "learning_rate": 1.6088062488008575e-05, "loss": 1.0695, "step": 2990 }, { "epoch": 0.3130298273155416, "grad_norm": 2.1591951982467927, "learning_rate": 1.6085373009059322e-05, "loss": 0.9742, "step": 2991 }, { "epoch": 0.313134484563056, "grad_norm": 1.8386586833176444, "learning_rate": 1.6082682830888376e-05, "loss": 0.8092, "step": 2992 }, { "epoch": 0.3132391418105704, "grad_norm": 2.598989002728532, "learning_rate": 1.607999195380484e-05, "loss": 0.9899, "step": 2993 }, { "epoch": 0.3133437990580848, "grad_norm": 2.7275001106943297, "learning_rate": 1.6077300378117906e-05, "loss": 0.9487, "step": 2994 }, { "epoch": 0.31344845630559914, "grad_norm": 2.2062421090046342, "learning_rate": 1.6074608104136838e-05, "loss": 0.9532, "step": 2995 }, { "epoch": 0.31355311355311355, "grad_norm": 2.0978845722449995, "learning_rate": 1.6071915132170986e-05, "loss": 0.8883, "step": 2996 }, { "epoch": 0.31365777080062796, "grad_norm": 2.1704288073650653, "learning_rate": 1.606922146252977e-05, "loss": 1.0379, "step": 2997 }, { "epoch": 0.31376242804814236, "grad_norm": 2.0169657476089755, "learning_rate": 1.6066527095522707e-05, "loss": 1.0009, "step": 2998 }, { "epoch": 0.3138670852956567, "grad_norm": 2.256441777616629, "learning_rate": 1.6063832031459384e-05, "loss": 0.972, "step": 2999 }, { "epoch": 0.3139717425431711, "grad_norm": 1.8938052813988708, "learning_rate": 1.6061136270649473e-05, "loss": 0.9554, "step": 3000 }, { "epoch": 0.3140763997906855, "grad_norm": 2.5447202789705554, "learning_rate": 1.6058439813402712e-05, "loss": 1.0768, "step": 3001 }, { "epoch": 0.3141810570381999, "grad_norm": 2.0871529984817965, "learning_rate": 1.605574266002893e-05, "loss": 0.9473, "step": 3002 }, { "epoch": 0.3142857142857143, "grad_norm": 2.1668427542881097, "learning_rate": 1.6053044810838048e-05, "loss": 0.9402, "step": 3003 }, { "epoch": 0.3143903715332287, "grad_norm": 2.3159990985111936, "learning_rate": 1.6050346266140046e-05, "loss": 0.9726, "step": 3004 }, { "epoch": 0.3144950287807431, "grad_norm": 1.9154842831401457, "learning_rate": 1.6047647026244988e-05, "loss": 0.8467, "step": 3005 }, { "epoch": 0.31459968602825744, "grad_norm": 2.25244360448727, "learning_rate": 1.604494709146303e-05, "loss": 1.1072, "step": 3006 }, { "epoch": 0.31470434327577185, "grad_norm": 2.207816514269716, "learning_rate": 1.6042246462104394e-05, "loss": 1.011, "step": 3007 }, { "epoch": 0.31480900052328625, "grad_norm": 2.5547164500222044, "learning_rate": 1.6039545138479395e-05, "loss": 0.968, "step": 3008 }, { "epoch": 0.3149136577708006, "grad_norm": 2.677040611649757, "learning_rate": 1.603684312089841e-05, "loss": 1.1204, "step": 3009 }, { "epoch": 0.315018315018315, "grad_norm": 1.9755495487026258, "learning_rate": 1.6034140409671916e-05, "loss": 0.97, "step": 3010 }, { "epoch": 0.3151229722658294, "grad_norm": 2.1587533164996566, "learning_rate": 1.6031437005110456e-05, "loss": 1.0208, "step": 3011 }, { "epoch": 0.3152276295133438, "grad_norm": 1.9806892809879053, "learning_rate": 1.602873290752466e-05, "loss": 1.1013, "step": 3012 }, { "epoch": 0.31533228676085817, "grad_norm": 2.4483979214827745, "learning_rate": 1.602602811722523e-05, "loss": 1.1004, "step": 3013 }, { "epoch": 0.3154369440083726, "grad_norm": 2.5101769689868436, "learning_rate": 1.602332263452295e-05, "loss": 0.9379, "step": 3014 }, { "epoch": 0.315541601255887, "grad_norm": 2.538507265258334, "learning_rate": 1.602061645972869e-05, "loss": 1.0772, "step": 3015 }, { "epoch": 0.31564625850340133, "grad_norm": 2.1327347713309415, "learning_rate": 1.6017909593153394e-05, "loss": 0.9862, "step": 3016 }, { "epoch": 0.31575091575091574, "grad_norm": 1.8079317995730482, "learning_rate": 1.601520203510809e-05, "loss": 0.9613, "step": 3017 }, { "epoch": 0.31585557299843015, "grad_norm": 2.3477974076051424, "learning_rate": 1.6012493785903874e-05, "loss": 1.0938, "step": 3018 }, { "epoch": 0.31596023024594455, "grad_norm": 2.3995917756166953, "learning_rate": 1.6009784845851936e-05, "loss": 0.9591, "step": 3019 }, { "epoch": 0.3160648874934589, "grad_norm": 2.171025021213507, "learning_rate": 1.6007075215263533e-05, "loss": 1.0401, "step": 3020 }, { "epoch": 0.3161695447409733, "grad_norm": 2.453631613652852, "learning_rate": 1.6004364894450015e-05, "loss": 1.0872, "step": 3021 }, { "epoch": 0.3162742019884877, "grad_norm": 2.0649905270363003, "learning_rate": 1.6001653883722794e-05, "loss": 0.947, "step": 3022 }, { "epoch": 0.3163788592360021, "grad_norm": 1.9845232246558884, "learning_rate": 1.5998942183393372e-05, "loss": 1.0143, "step": 3023 }, { "epoch": 0.31648351648351647, "grad_norm": 2.027999737482337, "learning_rate": 1.599622979377334e-05, "loss": 1.058, "step": 3024 }, { "epoch": 0.3165881737310309, "grad_norm": 2.386479104325292, "learning_rate": 1.5993516715174345e-05, "loss": 1.0363, "step": 3025 }, { "epoch": 0.3166928309785453, "grad_norm": 2.026243723400283, "learning_rate": 1.5990802947908132e-05, "loss": 0.8436, "step": 3026 }, { "epoch": 0.31679748822605963, "grad_norm": 2.4461769884069486, "learning_rate": 1.598808849228651e-05, "loss": 1.0391, "step": 3027 }, { "epoch": 0.31690214547357404, "grad_norm": 2.735072516013726, "learning_rate": 1.5985373348621384e-05, "loss": 1.1361, "step": 3028 }, { "epoch": 0.31700680272108844, "grad_norm": 2.0367817571925677, "learning_rate": 1.598265751722473e-05, "loss": 1.1262, "step": 3029 }, { "epoch": 0.31711145996860285, "grad_norm": 2.389592094981779, "learning_rate": 1.5979940998408593e-05, "loss": 1.0193, "step": 3030 }, { "epoch": 0.3172161172161172, "grad_norm": 2.051597409307426, "learning_rate": 1.597722379248512e-05, "loss": 0.8997, "step": 3031 }, { "epoch": 0.3173207744636316, "grad_norm": 1.8589251291767148, "learning_rate": 1.597450589976651e-05, "loss": 0.957, "step": 3032 }, { "epoch": 0.317425431711146, "grad_norm": 2.2975676929699937, "learning_rate": 1.5971787320565063e-05, "loss": 0.9536, "step": 3033 }, { "epoch": 0.31753008895866036, "grad_norm": 2.2772719111714297, "learning_rate": 1.5969068055193146e-05, "loss": 1.008, "step": 3034 }, { "epoch": 0.31763474620617477, "grad_norm": 2.348844355294729, "learning_rate": 1.596634810396321e-05, "loss": 0.9407, "step": 3035 }, { "epoch": 0.3177394034536892, "grad_norm": 2.1144033212390494, "learning_rate": 1.5963627467187783e-05, "loss": 0.913, "step": 3036 }, { "epoch": 0.3178440607012036, "grad_norm": 2.078038440024001, "learning_rate": 1.5960906145179466e-05, "loss": 0.9828, "step": 3037 }, { "epoch": 0.31794871794871793, "grad_norm": 2.1282457724139667, "learning_rate": 1.595818413825095e-05, "loss": 0.9836, "step": 3038 }, { "epoch": 0.31805337519623234, "grad_norm": 1.9799340852762877, "learning_rate": 1.5955461446715002e-05, "loss": 1.044, "step": 3039 }, { "epoch": 0.31815803244374674, "grad_norm": 2.1983306415646693, "learning_rate": 1.5952738070884458e-05, "loss": 0.9124, "step": 3040 }, { "epoch": 0.31826268969126115, "grad_norm": 1.974260011569216, "learning_rate": 1.5950014011072238e-05, "loss": 0.9891, "step": 3041 }, { "epoch": 0.3183673469387755, "grad_norm": 2.310426349353939, "learning_rate": 1.5947289267591354e-05, "loss": 0.9872, "step": 3042 }, { "epoch": 0.3184720041862899, "grad_norm": 1.9450362823631278, "learning_rate": 1.5944563840754873e-05, "loss": 1.0055, "step": 3043 }, { "epoch": 0.3185766614338043, "grad_norm": 2.036067128143145, "learning_rate": 1.5941837730875956e-05, "loss": 0.9706, "step": 3044 }, { "epoch": 0.31868131868131866, "grad_norm": 2.05039801626101, "learning_rate": 1.593911093826784e-05, "loss": 0.9528, "step": 3045 }, { "epoch": 0.31878597592883307, "grad_norm": 2.2876059678714857, "learning_rate": 1.5936383463243836e-05, "loss": 0.9563, "step": 3046 }, { "epoch": 0.31889063317634747, "grad_norm": 2.697037738761109, "learning_rate": 1.5933655306117338e-05, "loss": 1.0216, "step": 3047 }, { "epoch": 0.3189952904238619, "grad_norm": 2.5022369955164385, "learning_rate": 1.5930926467201816e-05, "loss": 1.033, "step": 3048 }, { "epoch": 0.3190999476713762, "grad_norm": 2.113886043265922, "learning_rate": 1.592819694681082e-05, "loss": 0.9792, "step": 3049 }, { "epoch": 0.31920460491889063, "grad_norm": 2.237260875974751, "learning_rate": 1.5925466745257977e-05, "loss": 1.0271, "step": 3050 }, { "epoch": 0.31930926216640504, "grad_norm": 2.1237623188008037, "learning_rate": 1.592273586285699e-05, "loss": 0.9983, "step": 3051 }, { "epoch": 0.3194139194139194, "grad_norm": 2.2392068397320344, "learning_rate": 1.5920004299921652e-05, "loss": 1.0828, "step": 3052 }, { "epoch": 0.3195185766614338, "grad_norm": 3.338921058059234, "learning_rate": 1.5917272056765815e-05, "loss": 1.0108, "step": 3053 }, { "epoch": 0.3196232339089482, "grad_norm": 2.1076301049943647, "learning_rate": 1.591453913370342e-05, "loss": 1.0223, "step": 3054 }, { "epoch": 0.3197278911564626, "grad_norm": 2.0503145112616825, "learning_rate": 1.5911805531048495e-05, "loss": 1.0396, "step": 3055 }, { "epoch": 0.31983254840397696, "grad_norm": 2.4375226451961827, "learning_rate": 1.5909071249115128e-05, "loss": 1.07, "step": 3056 }, { "epoch": 0.31993720565149136, "grad_norm": 1.9288776837663408, "learning_rate": 1.5906336288217494e-05, "loss": 0.9863, "step": 3057 }, { "epoch": 0.32004186289900577, "grad_norm": 2.0943331121912605, "learning_rate": 1.5903600648669847e-05, "loss": 0.8811, "step": 3058 }, { "epoch": 0.3201465201465201, "grad_norm": 2.0975483698938917, "learning_rate": 1.590086433078652e-05, "loss": 1.1083, "step": 3059 }, { "epoch": 0.3202511773940345, "grad_norm": 1.991398727953297, "learning_rate": 1.5898127334881916e-05, "loss": 0.8839, "step": 3060 }, { "epoch": 0.32035583464154893, "grad_norm": 2.2279462939438655, "learning_rate": 1.5895389661270528e-05, "loss": 0.9735, "step": 3061 }, { "epoch": 0.32046049188906334, "grad_norm": 1.8857564494547183, "learning_rate": 1.5892651310266912e-05, "loss": 1.0815, "step": 3062 }, { "epoch": 0.3205651491365777, "grad_norm": 2.1252881559677492, "learning_rate": 1.5889912282185717e-05, "loss": 0.956, "step": 3063 }, { "epoch": 0.3206698063840921, "grad_norm": 2.2558822549332236, "learning_rate": 1.588717257734166e-05, "loss": 1.0757, "step": 3064 }, { "epoch": 0.3207744636316065, "grad_norm": 2.3702017920147567, "learning_rate": 1.588443219604954e-05, "loss": 0.9591, "step": 3065 }, { "epoch": 0.3208791208791209, "grad_norm": 1.9235192765898275, "learning_rate": 1.5881691138624236e-05, "loss": 1.0212, "step": 3066 }, { "epoch": 0.32098377812663526, "grad_norm": 2.664761728680135, "learning_rate": 1.5878949405380694e-05, "loss": 1.1368, "step": 3067 }, { "epoch": 0.32108843537414966, "grad_norm": 2.3349986003807683, "learning_rate": 1.5876206996633943e-05, "loss": 1.0861, "step": 3068 }, { "epoch": 0.32119309262166407, "grad_norm": 2.0350639615647435, "learning_rate": 1.58734639126991e-05, "loss": 1.0298, "step": 3069 }, { "epoch": 0.3212977498691784, "grad_norm": 2.1379630016372473, "learning_rate": 1.5870720153891346e-05, "loss": 1.0008, "step": 3070 }, { "epoch": 0.3214024071166928, "grad_norm": 2.423558596633754, "learning_rate": 1.5867975720525947e-05, "loss": 0.9308, "step": 3071 }, { "epoch": 0.32150706436420723, "grad_norm": 2.0816034578512728, "learning_rate": 1.586523061291824e-05, "loss": 0.9646, "step": 3072 }, { "epoch": 0.32161172161172163, "grad_norm": 2.560250524049416, "learning_rate": 1.5862484831383643e-05, "loss": 1.0258, "step": 3073 }, { "epoch": 0.321716378859236, "grad_norm": 2.239124234673666, "learning_rate": 1.5859738376237657e-05, "loss": 1.0865, "step": 3074 }, { "epoch": 0.3218210361067504, "grad_norm": 2.3699409843476666, "learning_rate": 1.5856991247795853e-05, "loss": 0.9975, "step": 3075 }, { "epoch": 0.3219256933542648, "grad_norm": 1.9916759495681915, "learning_rate": 1.5854243446373877e-05, "loss": 0.9162, "step": 3076 }, { "epoch": 0.32203035060177915, "grad_norm": 1.9898290475403624, "learning_rate": 1.5851494972287466e-05, "loss": 1.0024, "step": 3077 }, { "epoch": 0.32213500784929355, "grad_norm": 2.0511214319935456, "learning_rate": 1.584874582585242e-05, "loss": 1.0025, "step": 3078 }, { "epoch": 0.32223966509680796, "grad_norm": 2.180082289189908, "learning_rate": 1.5845996007384626e-05, "loss": 0.7952, "step": 3079 }, { "epoch": 0.32234432234432236, "grad_norm": 2.0373190957074314, "learning_rate": 1.5843245517200033e-05, "loss": 0.9029, "step": 3080 }, { "epoch": 0.3224489795918367, "grad_norm": 2.2294041825628117, "learning_rate": 1.5840494355614686e-05, "loss": 1.1581, "step": 3081 }, { "epoch": 0.3225536368393511, "grad_norm": 2.3966321362004512, "learning_rate": 1.5837742522944698e-05, "loss": 0.9631, "step": 3082 }, { "epoch": 0.3226582940868655, "grad_norm": 1.924447534905052, "learning_rate": 1.5834990019506263e-05, "loss": 1.0266, "step": 3083 }, { "epoch": 0.32276295133437993, "grad_norm": 2.3585671923673908, "learning_rate": 1.583223684561564e-05, "loss": 1.0525, "step": 3084 }, { "epoch": 0.3228676085818943, "grad_norm": 2.3735070616891334, "learning_rate": 1.5829483001589185e-05, "loss": 1.0239, "step": 3085 }, { "epoch": 0.3229722658294087, "grad_norm": 2.107789442162236, "learning_rate": 1.582672848774331e-05, "loss": 0.9487, "step": 3086 }, { "epoch": 0.3230769230769231, "grad_norm": 1.9939952977167719, "learning_rate": 1.5823973304394526e-05, "loss": 0.9027, "step": 3087 }, { "epoch": 0.32318158032443745, "grad_norm": 1.9853892321221516, "learning_rate": 1.5821217451859402e-05, "loss": 0.9402, "step": 3088 }, { "epoch": 0.32328623757195185, "grad_norm": 2.294801256285806, "learning_rate": 1.5818460930454588e-05, "loss": 1.082, "step": 3089 }, { "epoch": 0.32339089481946626, "grad_norm": 2.0584151362636267, "learning_rate": 1.5815703740496823e-05, "loss": 0.9276, "step": 3090 }, { "epoch": 0.32349555206698066, "grad_norm": 1.9598606086672974, "learning_rate": 1.5812945882302907e-05, "loss": 0.9895, "step": 3091 }, { "epoch": 0.323600209314495, "grad_norm": 2.0072911970323135, "learning_rate": 1.5810187356189722e-05, "loss": 1.0237, "step": 3092 }, { "epoch": 0.3237048665620094, "grad_norm": 1.8808583035296527, "learning_rate": 1.580742816247423e-05, "loss": 0.9557, "step": 3093 }, { "epoch": 0.3238095238095238, "grad_norm": 2.241856615594079, "learning_rate": 1.5804668301473473e-05, "loss": 1.0337, "step": 3094 }, { "epoch": 0.3239141810570382, "grad_norm": 2.238604082081988, "learning_rate": 1.5801907773504563e-05, "loss": 1.0139, "step": 3095 }, { "epoch": 0.3240188383045526, "grad_norm": 2.086041350119793, "learning_rate": 1.5799146578884686e-05, "loss": 1.0195, "step": 3096 }, { "epoch": 0.324123495552067, "grad_norm": 2.177520822966867, "learning_rate": 1.579638471793111e-05, "loss": 0.9967, "step": 3097 }, { "epoch": 0.3242281527995814, "grad_norm": 2.590324178614816, "learning_rate": 1.579362219096118e-05, "loss": 0.9962, "step": 3098 }, { "epoch": 0.32433281004709574, "grad_norm": 2.0699609992470407, "learning_rate": 1.5790858998292313e-05, "loss": 1.0322, "step": 3099 }, { "epoch": 0.32443746729461015, "grad_norm": 2.206119437570607, "learning_rate": 1.5788095140242014e-05, "loss": 1.0231, "step": 3100 }, { "epoch": 0.32454212454212455, "grad_norm": 1.8212021688643432, "learning_rate": 1.5785330617127844e-05, "loss": 0.8309, "step": 3101 }, { "epoch": 0.3246467817896389, "grad_norm": 2.2727201676845583, "learning_rate": 1.578256542926746e-05, "loss": 0.986, "step": 3102 }, { "epoch": 0.3247514390371533, "grad_norm": 2.1449608431036684, "learning_rate": 1.5779799576978584e-05, "loss": 1.099, "step": 3103 }, { "epoch": 0.3248560962846677, "grad_norm": 2.480440020807763, "learning_rate": 1.5777033060579022e-05, "loss": 0.9017, "step": 3104 }, { "epoch": 0.3249607535321821, "grad_norm": 2.0367956477342166, "learning_rate": 1.577426588038665e-05, "loss": 0.8987, "step": 3105 }, { "epoch": 0.3250654107796965, "grad_norm": 1.9815075457418991, "learning_rate": 1.5771498036719418e-05, "loss": 0.8948, "step": 3106 }, { "epoch": 0.3251700680272109, "grad_norm": 1.9726747058162075, "learning_rate": 1.5768729529895365e-05, "loss": 0.9563, "step": 3107 }, { "epoch": 0.3252747252747253, "grad_norm": 2.1307459003781295, "learning_rate": 1.57659603602326e-05, "loss": 0.9732, "step": 3108 }, { "epoch": 0.3253793825222397, "grad_norm": 4.287887282310838, "learning_rate": 1.5763190528049295e-05, "loss": 1.1042, "step": 3109 }, { "epoch": 0.32548403976975404, "grad_norm": 2.1285826245261483, "learning_rate": 1.5760420033663716e-05, "loss": 1.0633, "step": 3110 }, { "epoch": 0.32558869701726845, "grad_norm": 1.8011470361015922, "learning_rate": 1.5757648877394197e-05, "loss": 0.8628, "step": 3111 }, { "epoch": 0.32569335426478285, "grad_norm": 2.151338719221326, "learning_rate": 1.575487705955915e-05, "loss": 1.0653, "step": 3112 }, { "epoch": 0.3257980115122972, "grad_norm": 2.1328861874541563, "learning_rate": 1.5752104580477065e-05, "loss": 1.11, "step": 3113 }, { "epoch": 0.3259026687598116, "grad_norm": 2.088911019800362, "learning_rate": 1.5749331440466497e-05, "loss": 1.0583, "step": 3114 }, { "epoch": 0.326007326007326, "grad_norm": 1.7497905672760596, "learning_rate": 1.5746557639846095e-05, "loss": 0.8168, "step": 3115 }, { "epoch": 0.3261119832548404, "grad_norm": 2.0703455005124765, "learning_rate": 1.5743783178934572e-05, "loss": 0.9295, "step": 3116 }, { "epoch": 0.32621664050235477, "grad_norm": 2.051050023116992, "learning_rate": 1.5741008058050718e-05, "loss": 0.9884, "step": 3117 }, { "epoch": 0.3263212977498692, "grad_norm": 1.8426576087061717, "learning_rate": 1.57382322775134e-05, "loss": 0.9219, "step": 3118 }, { "epoch": 0.3264259549973836, "grad_norm": 2.1973030476954123, "learning_rate": 1.5735455837641556e-05, "loss": 0.9935, "step": 3119 }, { "epoch": 0.32653061224489793, "grad_norm": 2.390653054556527, "learning_rate": 1.5732678738754214e-05, "loss": 1.1316, "step": 3120 }, { "epoch": 0.32663526949241234, "grad_norm": 2.099033446066326, "learning_rate": 1.5729900981170464e-05, "loss": 1.036, "step": 3121 }, { "epoch": 0.32673992673992674, "grad_norm": 1.7457495629091635, "learning_rate": 1.5727122565209474e-05, "loss": 0.8337, "step": 3122 }, { "epoch": 0.32684458398744115, "grad_norm": 2.5405638057373365, "learning_rate": 1.5724343491190488e-05, "loss": 1.019, "step": 3123 }, { "epoch": 0.3269492412349555, "grad_norm": 2.2469815540311826, "learning_rate": 1.5721563759432835e-05, "loss": 1.0026, "step": 3124 }, { "epoch": 0.3270538984824699, "grad_norm": 2.397277989964288, "learning_rate": 1.5718783370255905e-05, "loss": 0.9063, "step": 3125 }, { "epoch": 0.3271585557299843, "grad_norm": 2.353318814929306, "learning_rate": 1.5716002323979174e-05, "loss": 1.194, "step": 3126 }, { "epoch": 0.3272632129774987, "grad_norm": 2.028797944646975, "learning_rate": 1.571322062092219e-05, "loss": 0.7781, "step": 3127 }, { "epoch": 0.32736787022501307, "grad_norm": 2.1763538986167346, "learning_rate": 1.571043826140457e-05, "loss": 1.0173, "step": 3128 }, { "epoch": 0.3274725274725275, "grad_norm": 2.2559481805621866, "learning_rate": 1.5707655245746022e-05, "loss": 1.0209, "step": 3129 }, { "epoch": 0.3275771847200419, "grad_norm": 2.266294866644528, "learning_rate": 1.5704871574266315e-05, "loss": 1.1683, "step": 3130 }, { "epoch": 0.32768184196755623, "grad_norm": 2.1728502972455774, "learning_rate": 1.5702087247285297e-05, "loss": 1.0714, "step": 3131 }, { "epoch": 0.32778649921507064, "grad_norm": 2.412631577860173, "learning_rate": 1.5699302265122894e-05, "loss": 1.0777, "step": 3132 }, { "epoch": 0.32789115646258504, "grad_norm": 2.4781137882095403, "learning_rate": 1.569651662809911e-05, "loss": 1.086, "step": 3133 }, { "epoch": 0.32799581371009945, "grad_norm": 2.2300712669478036, "learning_rate": 1.5693730336534017e-05, "loss": 1.0021, "step": 3134 }, { "epoch": 0.3281004709576138, "grad_norm": 2.2052539997942655, "learning_rate": 1.5690943390747764e-05, "loss": 1.0284, "step": 3135 }, { "epoch": 0.3282051282051282, "grad_norm": 2.4413624909490497, "learning_rate": 1.5688155791060577e-05, "loss": 1.0287, "step": 3136 }, { "epoch": 0.3283097854526426, "grad_norm": 2.556415622005147, "learning_rate": 1.5685367537792765e-05, "loss": 1.075, "step": 3137 }, { "epoch": 0.32841444270015696, "grad_norm": 2.1283762756864677, "learning_rate": 1.568257863126469e-05, "loss": 0.8205, "step": 3138 }, { "epoch": 0.32851909994767137, "grad_norm": 2.232790862732562, "learning_rate": 1.567978907179682e-05, "loss": 1.0162, "step": 3139 }, { "epoch": 0.3286237571951858, "grad_norm": 2.3136474249151537, "learning_rate": 1.5676998859709663e-05, "loss": 1.0353, "step": 3140 }, { "epoch": 0.3287284144427002, "grad_norm": 2.638925386745023, "learning_rate": 1.5674207995323828e-05, "loss": 0.9493, "step": 3141 }, { "epoch": 0.32883307169021453, "grad_norm": 2.506618787870084, "learning_rate": 1.5671416478959995e-05, "loss": 1.0422, "step": 3142 }, { "epoch": 0.32893772893772893, "grad_norm": 2.0975015700370125, "learning_rate": 1.5668624310938914e-05, "loss": 0.9647, "step": 3143 }, { "epoch": 0.32904238618524334, "grad_norm": 2.1397256829041456, "learning_rate": 1.5665831491581405e-05, "loss": 0.8599, "step": 3144 }, { "epoch": 0.3291470434327577, "grad_norm": 2.63504903803654, "learning_rate": 1.566303802120837e-05, "loss": 1.0202, "step": 3145 }, { "epoch": 0.3292517006802721, "grad_norm": 2.026871373608857, "learning_rate": 1.566024390014079e-05, "loss": 1.0174, "step": 3146 }, { "epoch": 0.3293563579277865, "grad_norm": 2.222517598112396, "learning_rate": 1.5657449128699708e-05, "loss": 0.8657, "step": 3147 }, { "epoch": 0.3294610151753009, "grad_norm": 1.8704721739634205, "learning_rate": 1.5654653707206255e-05, "loss": 1.0354, "step": 3148 }, { "epoch": 0.32956567242281526, "grad_norm": 2.215919635887023, "learning_rate": 1.5651857635981628e-05, "loss": 1.0849, "step": 3149 }, { "epoch": 0.32967032967032966, "grad_norm": 2.564246658121425, "learning_rate": 1.56490609153471e-05, "loss": 0.9606, "step": 3150 }, { "epoch": 0.32977498691784407, "grad_norm": 2.1280666629677696, "learning_rate": 1.564626354562402e-05, "loss": 1.0398, "step": 3151 }, { "epoch": 0.3298796441653585, "grad_norm": 2.007176609705347, "learning_rate": 1.5643465527133815e-05, "loss": 0.9364, "step": 3152 }, { "epoch": 0.3299843014128728, "grad_norm": 2.374920213374514, "learning_rate": 1.5640666860197975e-05, "loss": 1.0571, "step": 3153 }, { "epoch": 0.33008895866038723, "grad_norm": 1.9142213744766876, "learning_rate": 1.5637867545138083e-05, "loss": 0.96, "step": 3154 }, { "epoch": 0.33019361590790164, "grad_norm": 2.2782769847496223, "learning_rate": 1.563506758227578e-05, "loss": 1.0653, "step": 3155 }, { "epoch": 0.330298273155416, "grad_norm": 1.884020880571543, "learning_rate": 1.563226697193279e-05, "loss": 1.0129, "step": 3156 }, { "epoch": 0.3304029304029304, "grad_norm": 2.2062503790267862, "learning_rate": 1.5629465714430906e-05, "loss": 0.9958, "step": 3157 }, { "epoch": 0.3305075876504448, "grad_norm": 2.3997612816521086, "learning_rate": 1.5626663810091997e-05, "loss": 1.0093, "step": 3158 }, { "epoch": 0.3306122448979592, "grad_norm": 2.103715083415718, "learning_rate": 1.562386125923801e-05, "loss": 0.9297, "step": 3159 }, { "epoch": 0.33071690214547356, "grad_norm": 2.215995087737775, "learning_rate": 1.5621058062190965e-05, "loss": 0.9648, "step": 3160 }, { "epoch": 0.33082155939298796, "grad_norm": 1.8692320023257354, "learning_rate": 1.5618254219272954e-05, "loss": 1.0103, "step": 3161 }, { "epoch": 0.33092621664050237, "grad_norm": 2.0395101583342305, "learning_rate": 1.561544973080614e-05, "loss": 0.936, "step": 3162 }, { "epoch": 0.3310308738880167, "grad_norm": 1.9092428105317705, "learning_rate": 1.5612644597112773e-05, "loss": 0.8791, "step": 3163 }, { "epoch": 0.3311355311355311, "grad_norm": 2.340830105583403, "learning_rate": 1.5609838818515162e-05, "loss": 1.0334, "step": 3164 }, { "epoch": 0.33124018838304553, "grad_norm": 2.500046118391191, "learning_rate": 1.56070323953357e-05, "loss": 0.8887, "step": 3165 }, { "epoch": 0.33134484563055994, "grad_norm": 2.3218591928110794, "learning_rate": 1.5604225327896846e-05, "loss": 1.0837, "step": 3166 }, { "epoch": 0.3314495028780743, "grad_norm": 2.119253835435931, "learning_rate": 1.5601417616521143e-05, "loss": 0.9716, "step": 3167 }, { "epoch": 0.3315541601255887, "grad_norm": 2.038070808087544, "learning_rate": 1.5598609261531202e-05, "loss": 1.0377, "step": 3168 }, { "epoch": 0.3316588173731031, "grad_norm": 2.4134269571581215, "learning_rate": 1.5595800263249702e-05, "loss": 0.8931, "step": 3169 }, { "epoch": 0.3317634746206175, "grad_norm": 1.7317120142862439, "learning_rate": 1.5592990621999414e-05, "loss": 0.8002, "step": 3170 }, { "epoch": 0.33186813186813185, "grad_norm": 2.150415859427035, "learning_rate": 1.559018033810316e-05, "loss": 0.8975, "step": 3171 }, { "epoch": 0.33197278911564626, "grad_norm": 2.90864630369287, "learning_rate": 1.5587369411883856e-05, "loss": 0.998, "step": 3172 }, { "epoch": 0.33207744636316067, "grad_norm": 2.271286673783965, "learning_rate": 1.558455784366448e-05, "loss": 1.021, "step": 3173 }, { "epoch": 0.332182103610675, "grad_norm": 2.3150664350547885, "learning_rate": 1.5581745633768086e-05, "loss": 1.0001, "step": 3174 }, { "epoch": 0.3322867608581894, "grad_norm": 3.363470350927157, "learning_rate": 1.5578932782517804e-05, "loss": 0.9165, "step": 3175 }, { "epoch": 0.33239141810570383, "grad_norm": 2.08188408813949, "learning_rate": 1.5576119290236836e-05, "loss": 0.9887, "step": 3176 }, { "epoch": 0.33249607535321823, "grad_norm": 1.930989000386139, "learning_rate": 1.5573305157248454e-05, "loss": 0.9224, "step": 3177 }, { "epoch": 0.3326007326007326, "grad_norm": 1.9621642580428715, "learning_rate": 1.5570490383876015e-05, "loss": 0.9062, "step": 3178 }, { "epoch": 0.332705389848247, "grad_norm": 1.9661202771421844, "learning_rate": 1.5567674970442936e-05, "loss": 0.8901, "step": 3179 }, { "epoch": 0.3328100470957614, "grad_norm": 2.1572459606930625, "learning_rate": 1.5564858917272716e-05, "loss": 0.9869, "step": 3180 }, { "epoch": 0.33291470434327575, "grad_norm": 2.474124091599375, "learning_rate": 1.5562042224688926e-05, "loss": 1.1371, "step": 3181 }, { "epoch": 0.33301936159079015, "grad_norm": 1.8653639765364336, "learning_rate": 1.555922489301521e-05, "loss": 0.8485, "step": 3182 }, { "epoch": 0.33312401883830456, "grad_norm": 2.1078524268043783, "learning_rate": 1.555640692257528e-05, "loss": 0.9803, "step": 3183 }, { "epoch": 0.33322867608581896, "grad_norm": 2.139476652742253, "learning_rate": 1.5553588313692936e-05, "loss": 0.9596, "step": 3184 }, { "epoch": 0.3333333333333333, "grad_norm": 2.141936726484649, "learning_rate": 1.5550769066692036e-05, "loss": 1.0432, "step": 3185 }, { "epoch": 0.3334379905808477, "grad_norm": 2.12152468260362, "learning_rate": 1.5547949181896515e-05, "loss": 1.01, "step": 3186 }, { "epoch": 0.3335426478283621, "grad_norm": 2.264842415629573, "learning_rate": 1.5545128659630385e-05, "loss": 1.0728, "step": 3187 }, { "epoch": 0.3336473050758765, "grad_norm": 2.1304146804426924, "learning_rate": 1.5542307500217734e-05, "loss": 0.967, "step": 3188 }, { "epoch": 0.3337519623233909, "grad_norm": 1.9276499028871452, "learning_rate": 1.5539485703982714e-05, "loss": 0.9121, "step": 3189 }, { "epoch": 0.3338566195709053, "grad_norm": 1.9765479283756435, "learning_rate": 1.553666327124956e-05, "loss": 1.0808, "step": 3190 }, { "epoch": 0.3339612768184197, "grad_norm": 1.7361909209283715, "learning_rate": 1.553384020234257e-05, "loss": 0.8643, "step": 3191 }, { "epoch": 0.33406593406593404, "grad_norm": 2.1761684193729174, "learning_rate": 1.553101649758612e-05, "loss": 1.0361, "step": 3192 }, { "epoch": 0.33417059131344845, "grad_norm": 2.523913526660812, "learning_rate": 1.5528192157304663e-05, "loss": 1.0898, "step": 3193 }, { "epoch": 0.33427524856096286, "grad_norm": 2.118661622161171, "learning_rate": 1.5525367181822722e-05, "loss": 0.956, "step": 3194 }, { "epoch": 0.33437990580847726, "grad_norm": 2.1545428502001123, "learning_rate": 1.5522541571464894e-05, "loss": 0.9939, "step": 3195 }, { "epoch": 0.3344845630559916, "grad_norm": 2.2676647398119107, "learning_rate": 1.5519715326555837e-05, "loss": 1.0386, "step": 3196 }, { "epoch": 0.334589220303506, "grad_norm": 2.06105181377489, "learning_rate": 1.5516888447420303e-05, "loss": 1.0757, "step": 3197 }, { "epoch": 0.3346938775510204, "grad_norm": 2.3715768654167775, "learning_rate": 1.5514060934383103e-05, "loss": 0.997, "step": 3198 }, { "epoch": 0.3347985347985348, "grad_norm": 2.027803037575869, "learning_rate": 1.5511232787769124e-05, "loss": 0.8679, "step": 3199 }, { "epoch": 0.3349031920460492, "grad_norm": 2.1181232730913964, "learning_rate": 1.5508404007903322e-05, "loss": 0.9015, "step": 3200 }, { "epoch": 0.3350078492935636, "grad_norm": 2.3226842222737365, "learning_rate": 1.5505574595110734e-05, "loss": 0.9824, "step": 3201 }, { "epoch": 0.335112506541078, "grad_norm": 2.47655647815724, "learning_rate": 1.5502744549716465e-05, "loss": 0.9697, "step": 3202 }, { "epoch": 0.33521716378859234, "grad_norm": 2.0566095932796986, "learning_rate": 1.5499913872045694e-05, "loss": 0.8571, "step": 3203 }, { "epoch": 0.33532182103610675, "grad_norm": 2.28076401752358, "learning_rate": 1.5497082562423665e-05, "loss": 1.0462, "step": 3204 }, { "epoch": 0.33542647828362115, "grad_norm": 2.3423053756381784, "learning_rate": 1.5494250621175706e-05, "loss": 1.01, "step": 3205 }, { "epoch": 0.3355311355311355, "grad_norm": 2.5072831733976653, "learning_rate": 1.5491418048627214e-05, "loss": 1.0932, "step": 3206 }, { "epoch": 0.3356357927786499, "grad_norm": 2.1432608031534723, "learning_rate": 1.548858484510366e-05, "loss": 0.8312, "step": 3207 }, { "epoch": 0.3357404500261643, "grad_norm": 2.437167122394929, "learning_rate": 1.5485751010930577e-05, "loss": 0.927, "step": 3208 }, { "epoch": 0.3358451072736787, "grad_norm": 2.0576201295605925, "learning_rate": 1.5482916546433583e-05, "loss": 1.0367, "step": 3209 }, { "epoch": 0.3359497645211931, "grad_norm": 2.1847007438604362, "learning_rate": 1.5480081451938362e-05, "loss": 1.0442, "step": 3210 }, { "epoch": 0.3360544217687075, "grad_norm": 2.068195651770208, "learning_rate": 1.5477245727770672e-05, "loss": 0.9245, "step": 3211 }, { "epoch": 0.3361590790162219, "grad_norm": 2.1072128887111967, "learning_rate": 1.5474409374256344e-05, "loss": 0.9496, "step": 3212 }, { "epoch": 0.3362637362637363, "grad_norm": 2.3804536046173914, "learning_rate": 1.5471572391721282e-05, "loss": 0.977, "step": 3213 }, { "epoch": 0.33636839351125064, "grad_norm": 2.0895509232276406, "learning_rate": 1.5468734780491465e-05, "loss": 1.0336, "step": 3214 }, { "epoch": 0.33647305075876505, "grad_norm": 2.168752460124443, "learning_rate": 1.5465896540892932e-05, "loss": 0.9361, "step": 3215 }, { "epoch": 0.33657770800627945, "grad_norm": 2.1004970170203197, "learning_rate": 1.5463057673251802e-05, "loss": 0.9727, "step": 3216 }, { "epoch": 0.3366823652537938, "grad_norm": 2.050190223227202, "learning_rate": 1.5460218177894274e-05, "loss": 0.9999, "step": 3217 }, { "epoch": 0.3367870225013082, "grad_norm": 2.038917201160083, "learning_rate": 1.5457378055146607e-05, "loss": 0.962, "step": 3218 }, { "epoch": 0.3368916797488226, "grad_norm": 2.0493286691810395, "learning_rate": 1.545453730533514e-05, "loss": 0.918, "step": 3219 }, { "epoch": 0.336996336996337, "grad_norm": 2.009804376163448, "learning_rate": 1.545169592878628e-05, "loss": 0.7884, "step": 3220 }, { "epoch": 0.33710099424385137, "grad_norm": 1.9418895146815451, "learning_rate": 1.5448853925826505e-05, "loss": 0.9334, "step": 3221 }, { "epoch": 0.3372056514913658, "grad_norm": 2.0920719859447363, "learning_rate": 1.5446011296782368e-05, "loss": 0.9992, "step": 3222 }, { "epoch": 0.3373103087388802, "grad_norm": 2.7317450031003876, "learning_rate": 1.5443168041980488e-05, "loss": 0.9853, "step": 3223 }, { "epoch": 0.33741496598639453, "grad_norm": 2.5941883234023564, "learning_rate": 1.544032416174757e-05, "loss": 1.0523, "step": 3224 }, { "epoch": 0.33751962323390894, "grad_norm": 2.2629384674782442, "learning_rate": 1.5437479656410376e-05, "loss": 1.1233, "step": 3225 }, { "epoch": 0.33762428048142334, "grad_norm": 2.2335346692765126, "learning_rate": 1.543463452629575e-05, "loss": 0.9641, "step": 3226 }, { "epoch": 0.33772893772893775, "grad_norm": 2.1221463470407427, "learning_rate": 1.5431788771730597e-05, "loss": 1.0066, "step": 3227 }, { "epoch": 0.3378335949764521, "grad_norm": 2.2293540541218504, "learning_rate": 1.5428942393041904e-05, "loss": 1.0154, "step": 3228 }, { "epoch": 0.3379382522239665, "grad_norm": 2.3900893182152845, "learning_rate": 1.5426095390556725e-05, "loss": 1.053, "step": 3229 }, { "epoch": 0.3380429094714809, "grad_norm": 2.048307102090152, "learning_rate": 1.542324776460218e-05, "loss": 1.0966, "step": 3230 }, { "epoch": 0.33814756671899526, "grad_norm": 2.141298333153114, "learning_rate": 1.5420399515505478e-05, "loss": 0.9943, "step": 3231 }, { "epoch": 0.33825222396650967, "grad_norm": 2.1539784614556936, "learning_rate": 1.5417550643593885e-05, "loss": 1.0074, "step": 3232 }, { "epoch": 0.3383568812140241, "grad_norm": 2.244060531894042, "learning_rate": 1.5414701149194736e-05, "loss": 1.0491, "step": 3233 }, { "epoch": 0.3384615384615385, "grad_norm": 1.995913218973165, "learning_rate": 1.541185103263545e-05, "loss": 0.9373, "step": 3234 }, { "epoch": 0.33856619570905283, "grad_norm": 2.172151133964098, "learning_rate": 1.540900029424351e-05, "loss": 0.9444, "step": 3235 }, { "epoch": 0.33867085295656724, "grad_norm": 1.89676124574755, "learning_rate": 1.5406148934346468e-05, "loss": 0.8628, "step": 3236 }, { "epoch": 0.33877551020408164, "grad_norm": 1.8828681953222985, "learning_rate": 1.5403296953271958e-05, "loss": 0.9545, "step": 3237 }, { "epoch": 0.33888016745159605, "grad_norm": 1.8741424977674062, "learning_rate": 1.5400444351347674e-05, "loss": 0.966, "step": 3238 }, { "epoch": 0.3389848246991104, "grad_norm": 2.0333171496050304, "learning_rate": 1.5397591128901384e-05, "loss": 0.9455, "step": 3239 }, { "epoch": 0.3390894819466248, "grad_norm": 1.9816357166577598, "learning_rate": 1.539473728626093e-05, "loss": 1.0213, "step": 3240 }, { "epoch": 0.3391941391941392, "grad_norm": 2.363598537983322, "learning_rate": 1.539188282375423e-05, "loss": 1.0232, "step": 3241 }, { "epoch": 0.33929879644165356, "grad_norm": 2.0397959543670314, "learning_rate": 1.538902774170926e-05, "loss": 1.0472, "step": 3242 }, { "epoch": 0.33940345368916797, "grad_norm": 2.070254879911, "learning_rate": 1.538617204045408e-05, "loss": 1.0323, "step": 3243 }, { "epoch": 0.3395081109366824, "grad_norm": 2.0695439666681814, "learning_rate": 1.538331572031681e-05, "loss": 1.063, "step": 3244 }, { "epoch": 0.3396127681841968, "grad_norm": 2.1305043728996567, "learning_rate": 1.5380458781625654e-05, "loss": 1.0096, "step": 3245 }, { "epoch": 0.33971742543171113, "grad_norm": 1.967788896520279, "learning_rate": 1.5377601224708875e-05, "loss": 1.002, "step": 3246 }, { "epoch": 0.33982208267922553, "grad_norm": 2.1322066005520592, "learning_rate": 1.5374743049894813e-05, "loss": 1.0679, "step": 3247 }, { "epoch": 0.33992673992673994, "grad_norm": 1.8579885877502837, "learning_rate": 1.537188425751188e-05, "loss": 0.8455, "step": 3248 }, { "epoch": 0.3400313971742543, "grad_norm": 2.15408723090576, "learning_rate": 1.536902484788856e-05, "loss": 1.0029, "step": 3249 }, { "epoch": 0.3401360544217687, "grad_norm": 2.4599439352791537, "learning_rate": 1.5366164821353392e-05, "loss": 1.0669, "step": 3250 }, { "epoch": 0.3402407116692831, "grad_norm": 2.2542203659980142, "learning_rate": 1.5363304178235017e-05, "loss": 1.0087, "step": 3251 }, { "epoch": 0.3403453689167975, "grad_norm": 2.0949948404828187, "learning_rate": 1.536044291886211e-05, "loss": 0.9923, "step": 3252 }, { "epoch": 0.34045002616431186, "grad_norm": 1.9315131178475988, "learning_rate": 1.535758104356345e-05, "loss": 1.0347, "step": 3253 }, { "epoch": 0.34055468341182626, "grad_norm": 2.172260985606811, "learning_rate": 1.535471855266787e-05, "loss": 1.0161, "step": 3254 }, { "epoch": 0.34065934065934067, "grad_norm": 2.03347702907374, "learning_rate": 1.5351855446504268e-05, "loss": 0.9756, "step": 3255 }, { "epoch": 0.3407639979068551, "grad_norm": 2.1511761068591344, "learning_rate": 1.534899172540163e-05, "loss": 0.9812, "step": 3256 }, { "epoch": 0.3408686551543694, "grad_norm": 2.014625870073615, "learning_rate": 1.5346127389688996e-05, "loss": 1.0583, "step": 3257 }, { "epoch": 0.34097331240188383, "grad_norm": 2.424338507190857, "learning_rate": 1.534326243969549e-05, "loss": 1.1823, "step": 3258 }, { "epoch": 0.34107796964939824, "grad_norm": 2.381135018727616, "learning_rate": 1.5340396875750294e-05, "loss": 0.9861, "step": 3259 }, { "epoch": 0.3411826268969126, "grad_norm": 2.4725877348005585, "learning_rate": 1.5337530698182674e-05, "loss": 1.0594, "step": 3260 }, { "epoch": 0.341287284144427, "grad_norm": 1.9709311757797843, "learning_rate": 1.5334663907321957e-05, "loss": 1.0043, "step": 3261 }, { "epoch": 0.3413919413919414, "grad_norm": 2.0576277902991693, "learning_rate": 1.533179650349754e-05, "loss": 0.9401, "step": 3262 }, { "epoch": 0.3414965986394558, "grad_norm": 1.9172132204679706, "learning_rate": 1.5328928487038898e-05, "loss": 1.0187, "step": 3263 }, { "epoch": 0.34160125588697016, "grad_norm": 2.2358901429303026, "learning_rate": 1.5326059858275568e-05, "loss": 1.0068, "step": 3264 }, { "epoch": 0.34170591313448456, "grad_norm": 2.130154051130339, "learning_rate": 1.5323190617537165e-05, "loss": 0.9645, "step": 3265 }, { "epoch": 0.34181057038199897, "grad_norm": 1.9736640857121206, "learning_rate": 1.5320320765153367e-05, "loss": 0.9449, "step": 3266 }, { "epoch": 0.3419152276295133, "grad_norm": 2.2610379556501226, "learning_rate": 1.531745030145393e-05, "loss": 0.9335, "step": 3267 }, { "epoch": 0.3420198848770277, "grad_norm": 1.9304145684884284, "learning_rate": 1.531457922676867e-05, "loss": 0.9527, "step": 3268 }, { "epoch": 0.34212454212454213, "grad_norm": 2.23937829886418, "learning_rate": 1.531170754142749e-05, "loss": 1.0275, "step": 3269 }, { "epoch": 0.34222919937205654, "grad_norm": 1.952354365893699, "learning_rate": 1.5308835245760342e-05, "loss": 0.9735, "step": 3270 }, { "epoch": 0.3423338566195709, "grad_norm": 1.7116086172760445, "learning_rate": 1.530596234009726e-05, "loss": 0.9834, "step": 3271 }, { "epoch": 0.3424385138670853, "grad_norm": 2.209172716848948, "learning_rate": 1.5303088824768347e-05, "loss": 0.9872, "step": 3272 }, { "epoch": 0.3425431711145997, "grad_norm": 2.2644113537174504, "learning_rate": 1.5300214700103784e-05, "loss": 1.0005, "step": 3273 }, { "epoch": 0.3426478283621141, "grad_norm": 2.2147137441366063, "learning_rate": 1.52973399664338e-05, "loss": 1.0471, "step": 3274 }, { "epoch": 0.34275248560962845, "grad_norm": 2.202945184782102, "learning_rate": 1.5294464624088718e-05, "loss": 1.0856, "step": 3275 }, { "epoch": 0.34285714285714286, "grad_norm": 2.036998214551154, "learning_rate": 1.529158867339892e-05, "loss": 1.0183, "step": 3276 }, { "epoch": 0.34296180010465727, "grad_norm": 2.3880333648717778, "learning_rate": 1.5288712114694855e-05, "loss": 0.9732, "step": 3277 }, { "epoch": 0.3430664573521716, "grad_norm": 2.293141355535519, "learning_rate": 1.5285834948307047e-05, "loss": 1.0911, "step": 3278 }, { "epoch": 0.343171114599686, "grad_norm": 2.3667237873101103, "learning_rate": 1.5282957174566086e-05, "loss": 1.0271, "step": 3279 }, { "epoch": 0.34327577184720043, "grad_norm": 2.5802091802539118, "learning_rate": 1.528007879380264e-05, "loss": 1.0383, "step": 3280 }, { "epoch": 0.34338042909471483, "grad_norm": 2.2984418484959037, "learning_rate": 1.527719980634743e-05, "loss": 1.0983, "step": 3281 }, { "epoch": 0.3434850863422292, "grad_norm": 1.9988451780658998, "learning_rate": 1.527432021253127e-05, "loss": 0.9391, "step": 3282 }, { "epoch": 0.3435897435897436, "grad_norm": 1.9979482563308306, "learning_rate": 1.5271440012685027e-05, "loss": 0.9803, "step": 3283 }, { "epoch": 0.343694400837258, "grad_norm": 2.0462333311095144, "learning_rate": 1.5268559207139637e-05, "loss": 0.953, "step": 3284 }, { "epoch": 0.34379905808477235, "grad_norm": 2.018679553660942, "learning_rate": 1.5265677796226114e-05, "loss": 1.0009, "step": 3285 }, { "epoch": 0.34390371533228675, "grad_norm": 1.8547261935286408, "learning_rate": 1.5262795780275538e-05, "loss": 1.0313, "step": 3286 }, { "epoch": 0.34400837257980116, "grad_norm": 2.0846595576576843, "learning_rate": 1.5259913159619054e-05, "loss": 0.9638, "step": 3287 }, { "epoch": 0.34411302982731556, "grad_norm": 2.2092617207892458, "learning_rate": 1.5257029934587888e-05, "loss": 1.048, "step": 3288 }, { "epoch": 0.3442176870748299, "grad_norm": 2.115010279212093, "learning_rate": 1.5254146105513328e-05, "loss": 1.139, "step": 3289 }, { "epoch": 0.3443223443223443, "grad_norm": 2.0072225659373903, "learning_rate": 1.5251261672726727e-05, "loss": 1.0207, "step": 3290 }, { "epoch": 0.3444270015698587, "grad_norm": 1.9343608998324062, "learning_rate": 1.5248376636559514e-05, "loss": 0.8169, "step": 3291 }, { "epoch": 0.3445316588173731, "grad_norm": 2.2114081845502795, "learning_rate": 1.5245490997343187e-05, "loss": 1.0064, "step": 3292 }, { "epoch": 0.3446363160648875, "grad_norm": 2.19450830303441, "learning_rate": 1.5242604755409307e-05, "loss": 1.0446, "step": 3293 }, { "epoch": 0.3447409733124019, "grad_norm": 2.9828919291241727, "learning_rate": 1.5239717911089518e-05, "loss": 0.9943, "step": 3294 }, { "epoch": 0.3448456305599163, "grad_norm": 1.8836484748965538, "learning_rate": 1.5236830464715517e-05, "loss": 0.8834, "step": 3295 }, { "epoch": 0.34495028780743064, "grad_norm": 2.484034519489334, "learning_rate": 1.523394241661908e-05, "loss": 0.8373, "step": 3296 }, { "epoch": 0.34505494505494505, "grad_norm": 2.3959300576508262, "learning_rate": 1.5231053767132047e-05, "loss": 0.9524, "step": 3297 }, { "epoch": 0.34515960230245946, "grad_norm": 2.0058874784680447, "learning_rate": 1.5228164516586332e-05, "loss": 0.9646, "step": 3298 }, { "epoch": 0.34526425954997386, "grad_norm": 2.1806341190742944, "learning_rate": 1.5225274665313916e-05, "loss": 0.9534, "step": 3299 }, { "epoch": 0.3453689167974882, "grad_norm": 2.3809715607129274, "learning_rate": 1.5222384213646847e-05, "loss": 1.0433, "step": 3300 }, { "epoch": 0.3454735740450026, "grad_norm": 2.243507765788451, "learning_rate": 1.5219493161917248e-05, "loss": 0.9947, "step": 3301 }, { "epoch": 0.345578231292517, "grad_norm": 1.9486940004775435, "learning_rate": 1.5216601510457305e-05, "loss": 1.0537, "step": 3302 }, { "epoch": 0.3456828885400314, "grad_norm": 2.1151878794379058, "learning_rate": 1.5213709259599267e-05, "loss": 0.9945, "step": 3303 }, { "epoch": 0.3457875457875458, "grad_norm": 2.4085393603562073, "learning_rate": 1.5210816409675473e-05, "loss": 1.0041, "step": 3304 }, { "epoch": 0.3458922030350602, "grad_norm": 2.0934987255320405, "learning_rate": 1.5207922961018304e-05, "loss": 1.0212, "step": 3305 }, { "epoch": 0.3459968602825746, "grad_norm": 2.040087227874492, "learning_rate": 1.5205028913960232e-05, "loss": 1.045, "step": 3306 }, { "epoch": 0.34610151753008894, "grad_norm": 2.0986542779184183, "learning_rate": 1.5202134268833786e-05, "loss": 0.9866, "step": 3307 }, { "epoch": 0.34620617477760335, "grad_norm": 2.2898782266773425, "learning_rate": 1.5199239025971567e-05, "loss": 0.8739, "step": 3308 }, { "epoch": 0.34631083202511775, "grad_norm": 2.7507453481175292, "learning_rate": 1.5196343185706244e-05, "loss": 0.9127, "step": 3309 }, { "epoch": 0.3464154892726321, "grad_norm": 2.1750979583796366, "learning_rate": 1.5193446748370551e-05, "loss": 1.0279, "step": 3310 }, { "epoch": 0.3465201465201465, "grad_norm": 2.0867343208501365, "learning_rate": 1.5190549714297303e-05, "loss": 1.0373, "step": 3311 }, { "epoch": 0.3466248037676609, "grad_norm": 2.5327508046734626, "learning_rate": 1.5187652083819369e-05, "loss": 1.0135, "step": 3312 }, { "epoch": 0.3467294610151753, "grad_norm": 2.1859927718890555, "learning_rate": 1.5184753857269697e-05, "loss": 0.9699, "step": 3313 }, { "epoch": 0.3468341182626897, "grad_norm": 2.009327763025113, "learning_rate": 1.518185503498129e-05, "loss": 0.907, "step": 3314 }, { "epoch": 0.3469387755102041, "grad_norm": 2.2224539969275643, "learning_rate": 1.5178955617287239e-05, "loss": 1.083, "step": 3315 }, { "epoch": 0.3470434327577185, "grad_norm": 2.4096293598821177, "learning_rate": 1.517605560452069e-05, "loss": 0.8995, "step": 3316 }, { "epoch": 0.3471480900052329, "grad_norm": 2.146816047792693, "learning_rate": 1.5173154997014857e-05, "loss": 0.8222, "step": 3317 }, { "epoch": 0.34725274725274724, "grad_norm": 2.3069897827667623, "learning_rate": 1.5170253795103025e-05, "loss": 0.9993, "step": 3318 }, { "epoch": 0.34735740450026165, "grad_norm": 1.74531938947694, "learning_rate": 1.5167351999118554e-05, "loss": 0.9175, "step": 3319 }, { "epoch": 0.34746206174777605, "grad_norm": 2.1288546754174993, "learning_rate": 1.5164449609394863e-05, "loss": 1.0347, "step": 3320 }, { "epoch": 0.3475667189952904, "grad_norm": 1.8501462739239363, "learning_rate": 1.5161546626265442e-05, "loss": 0.9374, "step": 3321 }, { "epoch": 0.3476713762428048, "grad_norm": 2.416666658774087, "learning_rate": 1.515864305006385e-05, "loss": 1.1008, "step": 3322 }, { "epoch": 0.3477760334903192, "grad_norm": 2.1683860693468913, "learning_rate": 1.5155738881123711e-05, "loss": 0.8261, "step": 3323 }, { "epoch": 0.3478806907378336, "grad_norm": 1.9981929383306056, "learning_rate": 1.5152834119778727e-05, "loss": 0.8493, "step": 3324 }, { "epoch": 0.34798534798534797, "grad_norm": 2.149137266048455, "learning_rate": 1.5149928766362658e-05, "loss": 1.0977, "step": 3325 }, { "epoch": 0.3480900052328624, "grad_norm": 2.078026164203414, "learning_rate": 1.5147022821209331e-05, "loss": 1.107, "step": 3326 }, { "epoch": 0.3481946624803768, "grad_norm": 2.3425230247962987, "learning_rate": 1.5144116284652645e-05, "loss": 0.9754, "step": 3327 }, { "epoch": 0.34829931972789113, "grad_norm": 1.9091246891467695, "learning_rate": 1.5141209157026576e-05, "loss": 0.8426, "step": 3328 }, { "epoch": 0.34840397697540554, "grad_norm": 1.9479723011505157, "learning_rate": 1.5138301438665148e-05, "loss": 1.0919, "step": 3329 }, { "epoch": 0.34850863422291994, "grad_norm": 2.2310776562980412, "learning_rate": 1.5135393129902469e-05, "loss": 1.1552, "step": 3330 }, { "epoch": 0.34861329147043435, "grad_norm": 2.0947428083855786, "learning_rate": 1.513248423107271e-05, "loss": 1.0404, "step": 3331 }, { "epoch": 0.3487179487179487, "grad_norm": 2.066377865083883, "learning_rate": 1.5129574742510107e-05, "loss": 1.0278, "step": 3332 }, { "epoch": 0.3488226059654631, "grad_norm": 2.099307559790499, "learning_rate": 1.5126664664548969e-05, "loss": 1.1413, "step": 3333 }, { "epoch": 0.3489272632129775, "grad_norm": 2.154455920018568, "learning_rate": 1.5123753997523665e-05, "loss": 1.0198, "step": 3334 }, { "epoch": 0.34903192046049186, "grad_norm": 2.2352534977710152, "learning_rate": 1.5120842741768644e-05, "loss": 1.0821, "step": 3335 }, { "epoch": 0.34913657770800627, "grad_norm": 1.883735141874073, "learning_rate": 1.5117930897618407e-05, "loss": 1.0526, "step": 3336 }, { "epoch": 0.3492412349555207, "grad_norm": 2.085232249057728, "learning_rate": 1.5115018465407536e-05, "loss": 0.9961, "step": 3337 }, { "epoch": 0.3493458922030351, "grad_norm": 2.2405865672055567, "learning_rate": 1.5112105445470677e-05, "loss": 0.9606, "step": 3338 }, { "epoch": 0.34945054945054943, "grad_norm": 2.210569750343716, "learning_rate": 1.5109191838142537e-05, "loss": 0.9826, "step": 3339 }, { "epoch": 0.34955520669806384, "grad_norm": 1.8900569961244176, "learning_rate": 1.5106277643757895e-05, "loss": 0.9309, "step": 3340 }, { "epoch": 0.34965986394557824, "grad_norm": 2.119499479604053, "learning_rate": 1.5103362862651602e-05, "loss": 0.8946, "step": 3341 }, { "epoch": 0.34976452119309265, "grad_norm": 2.14866784367062, "learning_rate": 1.5100447495158572e-05, "loss": 1.0256, "step": 3342 }, { "epoch": 0.349869178440607, "grad_norm": 2.21964636178506, "learning_rate": 1.5097531541613784e-05, "loss": 0.8199, "step": 3343 }, { "epoch": 0.3499738356881214, "grad_norm": 2.0824336161678474, "learning_rate": 1.5094615002352284e-05, "loss": 1.0465, "step": 3344 }, { "epoch": 0.3500784929356358, "grad_norm": 2.2906700002330354, "learning_rate": 1.50916978777092e-05, "loss": 1.0289, "step": 3345 }, { "epoch": 0.35018315018315016, "grad_norm": 1.7870706054978838, "learning_rate": 1.5088780168019704e-05, "loss": 0.8907, "step": 3346 }, { "epoch": 0.35028780743066457, "grad_norm": 2.350201821325944, "learning_rate": 1.508586187361905e-05, "loss": 0.9207, "step": 3347 }, { "epoch": 0.35039246467817897, "grad_norm": 1.9237037149190483, "learning_rate": 1.5082942994842557e-05, "loss": 0.9421, "step": 3348 }, { "epoch": 0.3504971219256934, "grad_norm": 2.6474938275921, "learning_rate": 1.508002353202561e-05, "loss": 1.0688, "step": 3349 }, { "epoch": 0.35060177917320773, "grad_norm": 2.266954434535588, "learning_rate": 1.5077103485503664e-05, "loss": 0.938, "step": 3350 }, { "epoch": 0.35070643642072213, "grad_norm": 1.838992898597147, "learning_rate": 1.507418285561223e-05, "loss": 1.0003, "step": 3351 }, { "epoch": 0.35081109366823654, "grad_norm": 2.1043607397062702, "learning_rate": 1.5071261642686903e-05, "loss": 0.8642, "step": 3352 }, { "epoch": 0.3509157509157509, "grad_norm": 2.092736361302439, "learning_rate": 1.506833984706333e-05, "loss": 1.0074, "step": 3353 }, { "epoch": 0.3510204081632653, "grad_norm": 2.540716324430332, "learning_rate": 1.5065417469077235e-05, "loss": 1.112, "step": 3354 }, { "epoch": 0.3511250654107797, "grad_norm": 2.2462673291428548, "learning_rate": 1.5062494509064406e-05, "loss": 1.053, "step": 3355 }, { "epoch": 0.3512297226582941, "grad_norm": 1.9698347906672438, "learning_rate": 1.5059570967360693e-05, "loss": 0.9501, "step": 3356 }, { "epoch": 0.35133437990580846, "grad_norm": 2.8732667059564707, "learning_rate": 1.5056646844302018e-05, "loss": 0.9699, "step": 3357 }, { "epoch": 0.35143903715332286, "grad_norm": 2.300905830511158, "learning_rate": 1.5053722140224371e-05, "loss": 1.0558, "step": 3358 }, { "epoch": 0.35154369440083727, "grad_norm": 2.1008549188317573, "learning_rate": 1.5050796855463807e-05, "loss": 1.0461, "step": 3359 }, { "epoch": 0.3516483516483517, "grad_norm": 2.193971701711426, "learning_rate": 1.5047870990356443e-05, "loss": 0.8999, "step": 3360 }, { "epoch": 0.351753008895866, "grad_norm": 1.9237206891325724, "learning_rate": 1.5044944545238472e-05, "loss": 0.919, "step": 3361 }, { "epoch": 0.35185766614338043, "grad_norm": 2.3398493395367828, "learning_rate": 1.5042017520446144e-05, "loss": 0.9966, "step": 3362 }, { "epoch": 0.35196232339089484, "grad_norm": 2.0245486796099414, "learning_rate": 1.5039089916315778e-05, "loss": 0.9552, "step": 3363 }, { "epoch": 0.3520669806384092, "grad_norm": 2.146653040509672, "learning_rate": 1.5036161733183772e-05, "loss": 0.9682, "step": 3364 }, { "epoch": 0.3521716378859236, "grad_norm": 2.3003979859770167, "learning_rate": 1.5033232971386569e-05, "loss": 0.9486, "step": 3365 }, { "epoch": 0.352276295133438, "grad_norm": 2.080353917018897, "learning_rate": 1.5030303631260691e-05, "loss": 1.008, "step": 3366 }, { "epoch": 0.3523809523809524, "grad_norm": 2.145495285206271, "learning_rate": 1.5027373713142735e-05, "loss": 0.8679, "step": 3367 }, { "epoch": 0.35248560962846676, "grad_norm": 2.219416644891993, "learning_rate": 1.5024443217369346e-05, "loss": 0.8524, "step": 3368 }, { "epoch": 0.35259026687598116, "grad_norm": 2.5932343785171432, "learning_rate": 1.5021512144277246e-05, "loss": 0.8825, "step": 3369 }, { "epoch": 0.35269492412349557, "grad_norm": 2.2140426590236575, "learning_rate": 1.5018580494203217e-05, "loss": 1.0054, "step": 3370 }, { "epoch": 0.3527995813710099, "grad_norm": 2.640222446891625, "learning_rate": 1.501564826748412e-05, "loss": 1.0553, "step": 3371 }, { "epoch": 0.3529042386185243, "grad_norm": 2.6088669036618453, "learning_rate": 1.5012715464456866e-05, "loss": 0.9632, "step": 3372 }, { "epoch": 0.35300889586603873, "grad_norm": 2.3393856800568513, "learning_rate": 1.5009782085458447e-05, "loss": 0.9854, "step": 3373 }, { "epoch": 0.35311355311355314, "grad_norm": 2.13358684905417, "learning_rate": 1.5006848130825909e-05, "loss": 0.861, "step": 3374 }, { "epoch": 0.3532182103610675, "grad_norm": 1.948963946557993, "learning_rate": 1.500391360089637e-05, "loss": 0.93, "step": 3375 }, { "epoch": 0.3533228676085819, "grad_norm": 2.2621209126353374, "learning_rate": 1.5000978496007013e-05, "loss": 1.0111, "step": 3376 }, { "epoch": 0.3534275248560963, "grad_norm": 2.175682145812267, "learning_rate": 1.4998042816495091e-05, "loss": 1.0821, "step": 3377 }, { "epoch": 0.35353218210361065, "grad_norm": 2.118572813572351, "learning_rate": 1.4995106562697917e-05, "loss": 1.0052, "step": 3378 }, { "epoch": 0.35363683935112505, "grad_norm": 2.2925720756150514, "learning_rate": 1.4992169734952872e-05, "loss": 0.8211, "step": 3379 }, { "epoch": 0.35374149659863946, "grad_norm": 1.9505334994916146, "learning_rate": 1.4989232333597404e-05, "loss": 0.9554, "step": 3380 }, { "epoch": 0.35384615384615387, "grad_norm": 2.0479847216050135, "learning_rate": 1.4986294358969029e-05, "loss": 1.0622, "step": 3381 }, { "epoch": 0.3539508110936682, "grad_norm": 1.940644043048965, "learning_rate": 1.498335581140532e-05, "loss": 0.9721, "step": 3382 }, { "epoch": 0.3540554683411826, "grad_norm": 2.357644955495196, "learning_rate": 1.4980416691243929e-05, "loss": 0.9845, "step": 3383 }, { "epoch": 0.35416012558869703, "grad_norm": 1.9728875562735373, "learning_rate": 1.4977476998822564e-05, "loss": 0.9456, "step": 3384 }, { "epoch": 0.35426478283621143, "grad_norm": 2.5759061498541773, "learning_rate": 1.4974536734479e-05, "loss": 1.0961, "step": 3385 }, { "epoch": 0.3543694400837258, "grad_norm": 2.0650501070386436, "learning_rate": 1.497159589855108e-05, "loss": 0.9497, "step": 3386 }, { "epoch": 0.3544740973312402, "grad_norm": 2.049511205390927, "learning_rate": 1.4968654491376715e-05, "loss": 0.9196, "step": 3387 }, { "epoch": 0.3545787545787546, "grad_norm": 2.119804481767199, "learning_rate": 1.4965712513293874e-05, "loss": 1.1051, "step": 3388 }, { "epoch": 0.35468341182626895, "grad_norm": 2.0198789773840153, "learning_rate": 1.49627699646406e-05, "loss": 0.9479, "step": 3389 }, { "epoch": 0.35478806907378335, "grad_norm": 2.2514166159676376, "learning_rate": 1.4959826845754996e-05, "loss": 1.1349, "step": 3390 }, { "epoch": 0.35489272632129776, "grad_norm": 1.9991992740675673, "learning_rate": 1.4956883156975235e-05, "loss": 0.9907, "step": 3391 }, { "epoch": 0.35499738356881216, "grad_norm": 2.165858304353837, "learning_rate": 1.495393889863955e-05, "loss": 1.0128, "step": 3392 }, { "epoch": 0.3551020408163265, "grad_norm": 2.046874393542093, "learning_rate": 1.495099407108624e-05, "loss": 0.9728, "step": 3393 }, { "epoch": 0.3552066980638409, "grad_norm": 1.841861405528776, "learning_rate": 1.4948048674653677e-05, "loss": 0.9024, "step": 3394 }, { "epoch": 0.3553113553113553, "grad_norm": 1.963788808953176, "learning_rate": 1.4945102709680291e-05, "loss": 1.047, "step": 3395 }, { "epoch": 0.3554160125588697, "grad_norm": 1.8854311633911396, "learning_rate": 1.4942156176504577e-05, "loss": 0.9661, "step": 3396 }, { "epoch": 0.3555206698063841, "grad_norm": 2.0001437303254312, "learning_rate": 1.49392090754651e-05, "loss": 1.0731, "step": 3397 }, { "epoch": 0.3556253270538985, "grad_norm": 2.2429282145699645, "learning_rate": 1.493626140690049e-05, "loss": 1.0166, "step": 3398 }, { "epoch": 0.3557299843014129, "grad_norm": 2.230032061722695, "learning_rate": 1.4933313171149433e-05, "loss": 0.9602, "step": 3399 }, { "epoch": 0.35583464154892724, "grad_norm": 2.3611671076976517, "learning_rate": 1.4930364368550697e-05, "loss": 1.0514, "step": 3400 }, { "epoch": 0.35593929879644165, "grad_norm": 1.9997981736421566, "learning_rate": 1.4927414999443093e-05, "loss": 0.8934, "step": 3401 }, { "epoch": 0.35604395604395606, "grad_norm": 2.2260425699224378, "learning_rate": 1.492446506416552e-05, "loss": 1.105, "step": 3402 }, { "epoch": 0.35614861329147046, "grad_norm": 2.6007886380070064, "learning_rate": 1.4921514563056931e-05, "loss": 1.0282, "step": 3403 }, { "epoch": 0.3562532705389848, "grad_norm": 2.2911564149754664, "learning_rate": 1.491856349645634e-05, "loss": 0.9593, "step": 3404 }, { "epoch": 0.3563579277864992, "grad_norm": 2.144042496081372, "learning_rate": 1.4915611864702832e-05, "loss": 1.0542, "step": 3405 }, { "epoch": 0.3564625850340136, "grad_norm": 2.314520779479154, "learning_rate": 1.4912659668135553e-05, "loss": 1.0739, "step": 3406 }, { "epoch": 0.356567242281528, "grad_norm": 2.0119651946546755, "learning_rate": 1.4909706907093725e-05, "loss": 1.0338, "step": 3407 }, { "epoch": 0.3566718995290424, "grad_norm": 2.0594144199775517, "learning_rate": 1.4906753581916618e-05, "loss": 1.0535, "step": 3408 }, { "epoch": 0.3567765567765568, "grad_norm": 2.628859441235232, "learning_rate": 1.4903799692943575e-05, "loss": 1.1152, "step": 3409 }, { "epoch": 0.3568812140240712, "grad_norm": 2.3483961540859224, "learning_rate": 1.4900845240514009e-05, "loss": 1.0825, "step": 3410 }, { "epoch": 0.35698587127158554, "grad_norm": 2.199102694351282, "learning_rate": 1.4897890224967388e-05, "loss": 1.0981, "step": 3411 }, { "epoch": 0.35709052851909995, "grad_norm": 2.437840230574091, "learning_rate": 1.4894934646643253e-05, "loss": 1.0649, "step": 3412 }, { "epoch": 0.35719518576661435, "grad_norm": 2.35758514512899, "learning_rate": 1.4891978505881202e-05, "loss": 1.0239, "step": 3413 }, { "epoch": 0.3572998430141287, "grad_norm": 2.2981006350213304, "learning_rate": 1.4889021803020904e-05, "loss": 1.031, "step": 3414 }, { "epoch": 0.3574045002616431, "grad_norm": 2.0708809030772484, "learning_rate": 1.4886064538402091e-05, "loss": 0.9587, "step": 3415 }, { "epoch": 0.3575091575091575, "grad_norm": 2.454255588394301, "learning_rate": 1.4883106712364558e-05, "loss": 0.9636, "step": 3416 }, { "epoch": 0.3576138147566719, "grad_norm": 1.870007612357461, "learning_rate": 1.4880148325248163e-05, "loss": 1.0022, "step": 3417 }, { "epoch": 0.35771847200418627, "grad_norm": 2.1025070655550904, "learning_rate": 1.4877189377392831e-05, "loss": 1.0294, "step": 3418 }, { "epoch": 0.3578231292517007, "grad_norm": 2.192703285083379, "learning_rate": 1.4874229869138554e-05, "loss": 1.1935, "step": 3419 }, { "epoch": 0.3579277864992151, "grad_norm": 2.1921332205785555, "learning_rate": 1.4871269800825384e-05, "loss": 0.9556, "step": 3420 }, { "epoch": 0.35803244374672943, "grad_norm": 2.3389520267003547, "learning_rate": 1.486830917279344e-05, "loss": 0.959, "step": 3421 }, { "epoch": 0.35813710099424384, "grad_norm": 1.887005791860528, "learning_rate": 1.4865347985382901e-05, "loss": 0.9283, "step": 3422 }, { "epoch": 0.35824175824175825, "grad_norm": 3.738025461337002, "learning_rate": 1.4862386238934017e-05, "loss": 1.0093, "step": 3423 }, { "epoch": 0.35834641548927265, "grad_norm": 2.0931366465972268, "learning_rate": 1.4859423933787097e-05, "loss": 0.9058, "step": 3424 }, { "epoch": 0.358451072736787, "grad_norm": 2.068043177767639, "learning_rate": 1.4856461070282512e-05, "loss": 1.044, "step": 3425 }, { "epoch": 0.3585557299843014, "grad_norm": 2.2134904451583206, "learning_rate": 1.4853497648760711e-05, "loss": 0.8614, "step": 3426 }, { "epoch": 0.3586603872318158, "grad_norm": 2.1848502762270368, "learning_rate": 1.4850533669562189e-05, "loss": 1.0403, "step": 3427 }, { "epoch": 0.3587650444793302, "grad_norm": 2.190313212576901, "learning_rate": 1.4847569133027512e-05, "loss": 1.0629, "step": 3428 }, { "epoch": 0.35886970172684457, "grad_norm": 2.2018800818289765, "learning_rate": 1.4844604039497319e-05, "loss": 1.0115, "step": 3429 }, { "epoch": 0.358974358974359, "grad_norm": 1.8297165257801538, "learning_rate": 1.4841638389312298e-05, "loss": 0.9768, "step": 3430 }, { "epoch": 0.3590790162218734, "grad_norm": 2.0502371253182177, "learning_rate": 1.4838672182813213e-05, "loss": 0.8625, "step": 3431 }, { "epoch": 0.35918367346938773, "grad_norm": 2.0678658262449994, "learning_rate": 1.4835705420340884e-05, "loss": 0.9285, "step": 3432 }, { "epoch": 0.35928833071690214, "grad_norm": 2.097477714048872, "learning_rate": 1.48327381022362e-05, "loss": 0.9737, "step": 3433 }, { "epoch": 0.35939298796441654, "grad_norm": 2.216587571402107, "learning_rate": 1.4829770228840111e-05, "loss": 0.9205, "step": 3434 }, { "epoch": 0.35949764521193095, "grad_norm": 2.242990585095391, "learning_rate": 1.482680180049363e-05, "loss": 1.0677, "step": 3435 }, { "epoch": 0.3596023024594453, "grad_norm": 2.1956250689462595, "learning_rate": 1.4823832817537838e-05, "loss": 0.9873, "step": 3436 }, { "epoch": 0.3597069597069597, "grad_norm": 2.250186078331522, "learning_rate": 1.4820863280313874e-05, "loss": 0.9965, "step": 3437 }, { "epoch": 0.3598116169544741, "grad_norm": 2.080456079650869, "learning_rate": 1.4817893189162954e-05, "loss": 1.0024, "step": 3438 }, { "epoch": 0.35991627420198846, "grad_norm": 2.0093127130290576, "learning_rate": 1.4814922544426332e-05, "loss": 1.0008, "step": 3439 }, { "epoch": 0.36002093144950287, "grad_norm": 2.2251021302294083, "learning_rate": 1.481195134644535e-05, "loss": 1.0177, "step": 3440 }, { "epoch": 0.3601255886970173, "grad_norm": 1.9221853447634059, "learning_rate": 1.4808979595561406e-05, "loss": 0.8362, "step": 3441 }, { "epoch": 0.3602302459445317, "grad_norm": 1.9001182786985462, "learning_rate": 1.4806007292115955e-05, "loss": 1.077, "step": 3442 }, { "epoch": 0.36033490319204603, "grad_norm": 2.24243203183523, "learning_rate": 1.4803034436450528e-05, "loss": 0.9866, "step": 3443 }, { "epoch": 0.36043956043956044, "grad_norm": 2.1248905323421217, "learning_rate": 1.4800061028906703e-05, "loss": 0.9718, "step": 3444 }, { "epoch": 0.36054421768707484, "grad_norm": 2.1426511270144966, "learning_rate": 1.4797087069826136e-05, "loss": 1.0464, "step": 3445 }, { "epoch": 0.36064887493458925, "grad_norm": 1.7983654364896233, "learning_rate": 1.4794112559550542e-05, "loss": 0.8639, "step": 3446 }, { "epoch": 0.3607535321821036, "grad_norm": 2.053627748221558, "learning_rate": 1.4791137498421692e-05, "loss": 0.903, "step": 3447 }, { "epoch": 0.360858189429618, "grad_norm": 1.8683934406849398, "learning_rate": 1.4788161886781434e-05, "loss": 0.867, "step": 3448 }, { "epoch": 0.3609628466771324, "grad_norm": 2.101452076878475, "learning_rate": 1.4785185724971667e-05, "loss": 0.9869, "step": 3449 }, { "epoch": 0.36106750392464676, "grad_norm": 2.496970829177601, "learning_rate": 1.4782209013334364e-05, "loss": 0.8874, "step": 3450 }, { "epoch": 0.36117216117216117, "grad_norm": 1.9906643960387973, "learning_rate": 1.4779231752211546e-05, "loss": 1.0344, "step": 3451 }, { "epoch": 0.36127681841967557, "grad_norm": 2.134773764897517, "learning_rate": 1.477625394194531e-05, "loss": 1.0026, "step": 3452 }, { "epoch": 0.36138147566719, "grad_norm": 1.9360626015222449, "learning_rate": 1.4773275582877818e-05, "loss": 1.0467, "step": 3453 }, { "epoch": 0.36148613291470433, "grad_norm": 2.3545030642754585, "learning_rate": 1.477029667535128e-05, "loss": 1.0638, "step": 3454 }, { "epoch": 0.36159079016221873, "grad_norm": 2.178802837894888, "learning_rate": 1.476731721970799e-05, "loss": 0.9444, "step": 3455 }, { "epoch": 0.36169544740973314, "grad_norm": 2.4728900427384155, "learning_rate": 1.4764337216290283e-05, "loss": 1.0585, "step": 3456 }, { "epoch": 0.3618001046572475, "grad_norm": 2.203666741614066, "learning_rate": 1.4761356665440571e-05, "loss": 1.0357, "step": 3457 }, { "epoch": 0.3619047619047619, "grad_norm": 2.1800575119431986, "learning_rate": 1.475837556750133e-05, "loss": 0.9192, "step": 3458 }, { "epoch": 0.3620094191522763, "grad_norm": 2.237067611748822, "learning_rate": 1.4755393922815086e-05, "loss": 0.9418, "step": 3459 }, { "epoch": 0.3621140763997907, "grad_norm": 2.0662472125409352, "learning_rate": 1.4752411731724443e-05, "loss": 1.0651, "step": 3460 }, { "epoch": 0.36221873364730506, "grad_norm": 1.9800019356904621, "learning_rate": 1.4749428994572058e-05, "loss": 1.0229, "step": 3461 }, { "epoch": 0.36232339089481946, "grad_norm": 2.193928271349309, "learning_rate": 1.4746445711700648e-05, "loss": 0.9627, "step": 3462 }, { "epoch": 0.36242804814233387, "grad_norm": 2.1284831634544017, "learning_rate": 1.474346188345301e-05, "loss": 1.0074, "step": 3463 }, { "epoch": 0.3625327053898482, "grad_norm": 1.9794414505430156, "learning_rate": 1.4740477510171982e-05, "loss": 0.9674, "step": 3464 }, { "epoch": 0.3626373626373626, "grad_norm": 1.9781425949045437, "learning_rate": 1.473749259220048e-05, "loss": 0.9827, "step": 3465 }, { "epoch": 0.36274201988487703, "grad_norm": 2.1680974375346174, "learning_rate": 1.4734507129881473e-05, "loss": 0.9404, "step": 3466 }, { "epoch": 0.36284667713239144, "grad_norm": 2.104960303718753, "learning_rate": 1.4731521123558e-05, "loss": 0.9687, "step": 3467 }, { "epoch": 0.3629513343799058, "grad_norm": 1.950557453989959, "learning_rate": 1.4728534573573159e-05, "loss": 0.766, "step": 3468 }, { "epoch": 0.3630559916274202, "grad_norm": 2.2931497243072374, "learning_rate": 1.472554748027011e-05, "loss": 0.9558, "step": 3469 }, { "epoch": 0.3631606488749346, "grad_norm": 2.114730292398457, "learning_rate": 1.4722559843992075e-05, "loss": 1.0599, "step": 3470 }, { "epoch": 0.363265306122449, "grad_norm": 1.9387223713176414, "learning_rate": 1.471957166508234e-05, "loss": 0.9173, "step": 3471 }, { "epoch": 0.36336996336996336, "grad_norm": 2.0196303025643934, "learning_rate": 1.4716582943884254e-05, "loss": 0.8972, "step": 3472 }, { "epoch": 0.36347462061747776, "grad_norm": 2.3779507104223176, "learning_rate": 1.4713593680741225e-05, "loss": 1.0191, "step": 3473 }, { "epoch": 0.36357927786499217, "grad_norm": 2.171151514960678, "learning_rate": 1.4710603875996725e-05, "loss": 0.9061, "step": 3474 }, { "epoch": 0.3636839351125065, "grad_norm": 2.3376877060855974, "learning_rate": 1.4707613529994294e-05, "loss": 1.0131, "step": 3475 }, { "epoch": 0.3637885923600209, "grad_norm": 2.240363414874548, "learning_rate": 1.4704622643077524e-05, "loss": 1.0021, "step": 3476 }, { "epoch": 0.36389324960753533, "grad_norm": 2.2172322785021126, "learning_rate": 1.4701631215590075e-05, "loss": 1.0355, "step": 3477 }, { "epoch": 0.36399790685504974, "grad_norm": 2.072175096832046, "learning_rate": 1.4698639247875668e-05, "loss": 1.0331, "step": 3478 }, { "epoch": 0.3641025641025641, "grad_norm": 2.0743508713728844, "learning_rate": 1.4695646740278084e-05, "loss": 0.9647, "step": 3479 }, { "epoch": 0.3642072213500785, "grad_norm": 1.8532767415518483, "learning_rate": 1.4692653693141175e-05, "loss": 0.8776, "step": 3480 }, { "epoch": 0.3643118785975929, "grad_norm": 2.302001815845674, "learning_rate": 1.468966010680884e-05, "loss": 0.9075, "step": 3481 }, { "epoch": 0.36441653584510725, "grad_norm": 2.394789643374174, "learning_rate": 1.4686665981625055e-05, "loss": 1.0686, "step": 3482 }, { "epoch": 0.36452119309262165, "grad_norm": 2.2837151238361697, "learning_rate": 1.4683671317933843e-05, "loss": 1.0853, "step": 3483 }, { "epoch": 0.36462585034013606, "grad_norm": 2.025193817081867, "learning_rate": 1.4680676116079307e-05, "loss": 0.9073, "step": 3484 }, { "epoch": 0.36473050758765047, "grad_norm": 2.4029403299228274, "learning_rate": 1.4677680376405592e-05, "loss": 0.9786, "step": 3485 }, { "epoch": 0.3648351648351648, "grad_norm": 2.275681739422216, "learning_rate": 1.4674684099256923e-05, "loss": 1.1143, "step": 3486 }, { "epoch": 0.3649398220826792, "grad_norm": 2.2365915095598576, "learning_rate": 1.4671687284977572e-05, "loss": 0.9557, "step": 3487 }, { "epoch": 0.3650444793301936, "grad_norm": 2.2324428792489495, "learning_rate": 1.4668689933911882e-05, "loss": 0.961, "step": 3488 }, { "epoch": 0.36514913657770803, "grad_norm": 2.187984861710933, "learning_rate": 1.4665692046404253e-05, "loss": 1.063, "step": 3489 }, { "epoch": 0.3652537938252224, "grad_norm": 2.2899556426594243, "learning_rate": 1.4662693622799152e-05, "loss": 1.0662, "step": 3490 }, { "epoch": 0.3653584510727368, "grad_norm": 2.4136159700742716, "learning_rate": 1.4659694663441102e-05, "loss": 0.9803, "step": 3491 }, { "epoch": 0.3654631083202512, "grad_norm": 1.7662751611483918, "learning_rate": 1.4656695168674686e-05, "loss": 0.8149, "step": 3492 }, { "epoch": 0.36556776556776555, "grad_norm": 2.2806309857676226, "learning_rate": 1.4653695138844556e-05, "loss": 1.0818, "step": 3493 }, { "epoch": 0.36567242281527995, "grad_norm": 1.7491999471330408, "learning_rate": 1.4650694574295423e-05, "loss": 0.8499, "step": 3494 }, { "epoch": 0.36577708006279436, "grad_norm": 2.1237260413546526, "learning_rate": 1.4647693475372054e-05, "loss": 0.9252, "step": 3495 }, { "epoch": 0.36588173731030876, "grad_norm": 2.0122406066004506, "learning_rate": 1.4644691842419278e-05, "loss": 0.9047, "step": 3496 }, { "epoch": 0.3659863945578231, "grad_norm": 1.7997357542002177, "learning_rate": 1.4641689675782e-05, "loss": 0.9075, "step": 3497 }, { "epoch": 0.3660910518053375, "grad_norm": 2.0095662635854126, "learning_rate": 1.4638686975805168e-05, "loss": 0.99, "step": 3498 }, { "epoch": 0.3661957090528519, "grad_norm": 2.305964189521301, "learning_rate": 1.4635683742833798e-05, "loss": 0.9384, "step": 3499 }, { "epoch": 0.3663003663003663, "grad_norm": 1.9289338620569885, "learning_rate": 1.4632679977212965e-05, "loss": 0.9879, "step": 3500 }, { "epoch": 0.3664050235478807, "grad_norm": 2.134260629081426, "learning_rate": 1.4629675679287814e-05, "loss": 0.9906, "step": 3501 }, { "epoch": 0.3665096807953951, "grad_norm": 2.125024277582387, "learning_rate": 1.4626670849403541e-05, "loss": 0.8551, "step": 3502 }, { "epoch": 0.3666143380429095, "grad_norm": 2.014078167441046, "learning_rate": 1.4623665487905409e-05, "loss": 0.9331, "step": 3503 }, { "epoch": 0.36671899529042384, "grad_norm": 2.3595548560106674, "learning_rate": 1.462065959513874e-05, "loss": 1.0379, "step": 3504 }, { "epoch": 0.36682365253793825, "grad_norm": 2.9757237588936847, "learning_rate": 1.4617653171448915e-05, "loss": 1.1066, "step": 3505 }, { "epoch": 0.36692830978545266, "grad_norm": 2.246060097728677, "learning_rate": 1.4614646217181384e-05, "loss": 1.0206, "step": 3506 }, { "epoch": 0.367032967032967, "grad_norm": 2.1279065491723577, "learning_rate": 1.461163873268164e-05, "loss": 0.9343, "step": 3507 }, { "epoch": 0.3671376242804814, "grad_norm": 2.0617313415147582, "learning_rate": 1.4608630718295262e-05, "loss": 1.0097, "step": 3508 }, { "epoch": 0.3672422815279958, "grad_norm": 1.8166947694326565, "learning_rate": 1.4605622174367873e-05, "loss": 1.0153, "step": 3509 }, { "epoch": 0.3673469387755102, "grad_norm": 2.1056303565410115, "learning_rate": 1.4602613101245156e-05, "loss": 1.0769, "step": 3510 }, { "epoch": 0.3674515960230246, "grad_norm": 2.229739943150304, "learning_rate": 1.4599603499272866e-05, "loss": 1.0299, "step": 3511 }, { "epoch": 0.367556253270539, "grad_norm": 2.1505713190979674, "learning_rate": 1.4596593368796808e-05, "loss": 1.0079, "step": 3512 }, { "epoch": 0.3676609105180534, "grad_norm": 2.0137320566247445, "learning_rate": 1.4593582710162854e-05, "loss": 0.9401, "step": 3513 }, { "epoch": 0.3677655677655678, "grad_norm": 1.97738685057115, "learning_rate": 1.4590571523716932e-05, "loss": 0.9479, "step": 3514 }, { "epoch": 0.36787022501308214, "grad_norm": 2.1775568508800305, "learning_rate": 1.4587559809805041e-05, "loss": 0.8715, "step": 3515 }, { "epoch": 0.36797488226059655, "grad_norm": 2.4652646105763663, "learning_rate": 1.4584547568773225e-05, "loss": 1.0586, "step": 3516 }, { "epoch": 0.36807953950811095, "grad_norm": 2.2880185364374355, "learning_rate": 1.4581534800967598e-05, "loss": 0.9568, "step": 3517 }, { "epoch": 0.3681841967556253, "grad_norm": 2.0626244211775986, "learning_rate": 1.4578521506734337e-05, "loss": 0.8189, "step": 3518 }, { "epoch": 0.3682888540031397, "grad_norm": 2.0228867528318966, "learning_rate": 1.4575507686419672e-05, "loss": 0.9727, "step": 3519 }, { "epoch": 0.3683935112506541, "grad_norm": 1.9220254744007452, "learning_rate": 1.4572493340369899e-05, "loss": 1.0084, "step": 3520 }, { "epoch": 0.3684981684981685, "grad_norm": 2.2566198672292392, "learning_rate": 1.4569478468931371e-05, "loss": 1.0256, "step": 3521 }, { "epoch": 0.36860282574568287, "grad_norm": 2.180361824104086, "learning_rate": 1.4566463072450503e-05, "loss": 1.044, "step": 3522 }, { "epoch": 0.3687074829931973, "grad_norm": 2.623792129539169, "learning_rate": 1.4563447151273774e-05, "loss": 0.9307, "step": 3523 }, { "epoch": 0.3688121402407117, "grad_norm": 2.4668276543972047, "learning_rate": 1.4560430705747712e-05, "loss": 0.9877, "step": 3524 }, { "epoch": 0.36891679748822603, "grad_norm": 2.454886424734209, "learning_rate": 1.4557413736218921e-05, "loss": 0.8058, "step": 3525 }, { "epoch": 0.36902145473574044, "grad_norm": 2.1383002692744557, "learning_rate": 1.4554396243034052e-05, "loss": 0.9637, "step": 3526 }, { "epoch": 0.36912611198325485, "grad_norm": 2.187789113145796, "learning_rate": 1.4551378226539821e-05, "loss": 1.011, "step": 3527 }, { "epoch": 0.36923076923076925, "grad_norm": 1.7592035312864471, "learning_rate": 1.4548359687083005e-05, "loss": 0.8919, "step": 3528 }, { "epoch": 0.3693354264782836, "grad_norm": 2.1794650803355373, "learning_rate": 1.454534062501044e-05, "loss": 0.9867, "step": 3529 }, { "epoch": 0.369440083725798, "grad_norm": 1.9320597793347067, "learning_rate": 1.4542321040669025e-05, "loss": 0.9989, "step": 3530 }, { "epoch": 0.3695447409733124, "grad_norm": 1.9447012593805288, "learning_rate": 1.4539300934405712e-05, "loss": 0.9174, "step": 3531 }, { "epoch": 0.3696493982208268, "grad_norm": 2.161309649226642, "learning_rate": 1.4536280306567521e-05, "loss": 1.0729, "step": 3532 }, { "epoch": 0.36975405546834117, "grad_norm": 2.199055254223592, "learning_rate": 1.4533259157501523e-05, "loss": 0.9474, "step": 3533 }, { "epoch": 0.3698587127158556, "grad_norm": 1.9653608066864128, "learning_rate": 1.4530237487554863e-05, "loss": 0.8146, "step": 3534 }, { "epoch": 0.36996336996337, "grad_norm": 2.0938517524245803, "learning_rate": 1.4527215297074729e-05, "loss": 1.0321, "step": 3535 }, { "epoch": 0.37006802721088433, "grad_norm": 2.2264094228758307, "learning_rate": 1.452419258640838e-05, "loss": 1.068, "step": 3536 }, { "epoch": 0.37017268445839874, "grad_norm": 2.003519287512863, "learning_rate": 1.4521169355903134e-05, "loss": 1.0125, "step": 3537 }, { "epoch": 0.37027734170591314, "grad_norm": 2.06288913711494, "learning_rate": 1.451814560590636e-05, "loss": 1.0217, "step": 3538 }, { "epoch": 0.37038199895342755, "grad_norm": 2.32869949729458, "learning_rate": 1.4515121336765499e-05, "loss": 0.9788, "step": 3539 }, { "epoch": 0.3704866562009419, "grad_norm": 2.2629636033796503, "learning_rate": 1.4512096548828041e-05, "loss": 1.0602, "step": 3540 }, { "epoch": 0.3705913134484563, "grad_norm": 1.9185846266270503, "learning_rate": 1.4509071242441543e-05, "loss": 0.9762, "step": 3541 }, { "epoch": 0.3706959706959707, "grad_norm": 2.2625877749763172, "learning_rate": 1.4506045417953623e-05, "loss": 1.0291, "step": 3542 }, { "epoch": 0.37080062794348506, "grad_norm": 1.809617016801832, "learning_rate": 1.4503019075711944e-05, "loss": 0.9528, "step": 3543 }, { "epoch": 0.37090528519099947, "grad_norm": 2.002346966236511, "learning_rate": 1.4499992216064247e-05, "loss": 0.8509, "step": 3544 }, { "epoch": 0.3710099424385139, "grad_norm": 2.0335315203538697, "learning_rate": 1.4496964839358322e-05, "loss": 0.976, "step": 3545 }, { "epoch": 0.3711145996860283, "grad_norm": 2.438768221681832, "learning_rate": 1.449393694594202e-05, "loss": 0.9592, "step": 3546 }, { "epoch": 0.37121925693354263, "grad_norm": 2.188091743315195, "learning_rate": 1.4490908536163256e-05, "loss": 1.0567, "step": 3547 }, { "epoch": 0.37132391418105704, "grad_norm": 2.0144776071078168, "learning_rate": 1.448787961036999e-05, "loss": 1.0472, "step": 3548 }, { "epoch": 0.37142857142857144, "grad_norm": 1.9614612256190855, "learning_rate": 1.4484850168910264e-05, "loss": 0.8886, "step": 3549 }, { "epoch": 0.3715332286760858, "grad_norm": 1.8138847376496081, "learning_rate": 1.4481820212132163e-05, "loss": 0.9632, "step": 3550 }, { "epoch": 0.3716378859236002, "grad_norm": 2.2730125431428863, "learning_rate": 1.4478789740383831e-05, "loss": 0.9063, "step": 3551 }, { "epoch": 0.3717425431711146, "grad_norm": 2.4175931682424947, "learning_rate": 1.4475758754013481e-05, "loss": 1.0576, "step": 3552 }, { "epoch": 0.371847200418629, "grad_norm": 1.9765613042579417, "learning_rate": 1.4472727253369374e-05, "loss": 0.9173, "step": 3553 }, { "epoch": 0.37195185766614336, "grad_norm": 2.105898251066037, "learning_rate": 1.4469695238799841e-05, "loss": 1.0151, "step": 3554 }, { "epoch": 0.37205651491365777, "grad_norm": 2.063365637058352, "learning_rate": 1.4466662710653261e-05, "loss": 0.9934, "step": 3555 }, { "epoch": 0.37216117216117217, "grad_norm": 2.16238034321692, "learning_rate": 1.4463629669278083e-05, "loss": 1.095, "step": 3556 }, { "epoch": 0.3722658294086866, "grad_norm": 2.064593186035993, "learning_rate": 1.4460596115022808e-05, "loss": 0.93, "step": 3557 }, { "epoch": 0.3723704866562009, "grad_norm": 2.1031716067377406, "learning_rate": 1.4457562048235993e-05, "loss": 1.1012, "step": 3558 }, { "epoch": 0.37247514390371533, "grad_norm": 2.040768861352751, "learning_rate": 1.4454527469266269e-05, "loss": 1.0625, "step": 3559 }, { "epoch": 0.37257980115122974, "grad_norm": 2.1485703086705508, "learning_rate": 1.4451492378462302e-05, "loss": 1.119, "step": 3560 }, { "epoch": 0.3726844583987441, "grad_norm": 1.8403984678229408, "learning_rate": 1.444845677617284e-05, "loss": 0.9352, "step": 3561 }, { "epoch": 0.3727891156462585, "grad_norm": 1.922757309168295, "learning_rate": 1.4445420662746674e-05, "loss": 1.0356, "step": 3562 }, { "epoch": 0.3728937728937729, "grad_norm": 2.1213514343363564, "learning_rate": 1.4442384038532664e-05, "loss": 1.0686, "step": 3563 }, { "epoch": 0.3729984301412873, "grad_norm": 2.112512593103979, "learning_rate": 1.4439346903879724e-05, "loss": 0.9844, "step": 3564 }, { "epoch": 0.37310308738880166, "grad_norm": 2.1935515618029577, "learning_rate": 1.443630925913682e-05, "loss": 0.9552, "step": 3565 }, { "epoch": 0.37320774463631606, "grad_norm": 1.963564429493465, "learning_rate": 1.4433271104652993e-05, "loss": 1.0212, "step": 3566 }, { "epoch": 0.37331240188383047, "grad_norm": 2.1958400025455855, "learning_rate": 1.4430232440777325e-05, "loss": 1.0752, "step": 3567 }, { "epoch": 0.3734170591313448, "grad_norm": 2.321242717364229, "learning_rate": 1.4427193267858974e-05, "loss": 0.9584, "step": 3568 }, { "epoch": 0.3735217163788592, "grad_norm": 2.4134787950833174, "learning_rate": 1.4424153586247139e-05, "loss": 1.075, "step": 3569 }, { "epoch": 0.37362637362637363, "grad_norm": 2.204301556211592, "learning_rate": 1.4421113396291087e-05, "loss": 1.0884, "step": 3570 }, { "epoch": 0.37373103087388804, "grad_norm": 2.4179458922764874, "learning_rate": 1.4418072698340147e-05, "loss": 0.9831, "step": 3571 }, { "epoch": 0.3738356881214024, "grad_norm": 2.07945845423551, "learning_rate": 1.4415031492743692e-05, "loss": 1.1162, "step": 3572 }, { "epoch": 0.3739403453689168, "grad_norm": 2.383328131104694, "learning_rate": 1.4411989779851175e-05, "loss": 0.9214, "step": 3573 }, { "epoch": 0.3740450026164312, "grad_norm": 2.811893175065896, "learning_rate": 1.4408947560012081e-05, "loss": 0.9125, "step": 3574 }, { "epoch": 0.3741496598639456, "grad_norm": 2.177298784002885, "learning_rate": 1.4405904833575979e-05, "loss": 0.99, "step": 3575 }, { "epoch": 0.37425431711145996, "grad_norm": 2.1489316661311437, "learning_rate": 1.440286160089248e-05, "loss": 1.0198, "step": 3576 }, { "epoch": 0.37435897435897436, "grad_norm": 2.2742667087048436, "learning_rate": 1.4399817862311255e-05, "loss": 1.0415, "step": 3577 }, { "epoch": 0.37446363160648877, "grad_norm": 2.2904104266630103, "learning_rate": 1.439677361818204e-05, "loss": 0.9554, "step": 3578 }, { "epoch": 0.3745682888540031, "grad_norm": 2.0120333801365846, "learning_rate": 1.4393728868854622e-05, "loss": 0.9557, "step": 3579 }, { "epoch": 0.3746729461015175, "grad_norm": 1.97838166233458, "learning_rate": 1.4390683614678851e-05, "loss": 0.9937, "step": 3580 }, { "epoch": 0.37477760334903193, "grad_norm": 2.1059022035617803, "learning_rate": 1.4387637856004633e-05, "loss": 1.0074, "step": 3581 }, { "epoch": 0.37488226059654634, "grad_norm": 2.0354065733173403, "learning_rate": 1.438459159318193e-05, "loss": 0.9762, "step": 3582 }, { "epoch": 0.3749869178440607, "grad_norm": 2.2000318232269924, "learning_rate": 1.4381544826560762e-05, "loss": 0.9475, "step": 3583 }, { "epoch": 0.3750915750915751, "grad_norm": 1.928430649502382, "learning_rate": 1.4378497556491212e-05, "loss": 0.8959, "step": 3584 }, { "epoch": 0.3751962323390895, "grad_norm": 2.0181021559315897, "learning_rate": 1.4375449783323419e-05, "loss": 0.8834, "step": 3585 }, { "epoch": 0.37530088958660385, "grad_norm": 2.179626276000838, "learning_rate": 1.4372401507407575e-05, "loss": 0.8578, "step": 3586 }, { "epoch": 0.37540554683411825, "grad_norm": 1.9333891804031262, "learning_rate": 1.436935272909393e-05, "loss": 1.0699, "step": 3587 }, { "epoch": 0.37551020408163266, "grad_norm": 1.9551604770253188, "learning_rate": 1.4366303448732804e-05, "loss": 0.9671, "step": 3588 }, { "epoch": 0.37561486132914707, "grad_norm": 1.9683480948127803, "learning_rate": 1.4363253666674559e-05, "loss": 0.8561, "step": 3589 }, { "epoch": 0.3757195185766614, "grad_norm": 2.2367501994279526, "learning_rate": 1.4360203383269622e-05, "loss": 0.8811, "step": 3590 }, { "epoch": 0.3758241758241758, "grad_norm": 2.108577595208127, "learning_rate": 1.4357152598868478e-05, "loss": 1.0762, "step": 3591 }, { "epoch": 0.3759288330716902, "grad_norm": 2.0801774600295357, "learning_rate": 1.4354101313821666e-05, "loss": 1.0351, "step": 3592 }, { "epoch": 0.3760334903192046, "grad_norm": 1.9951115073973749, "learning_rate": 1.4351049528479788e-05, "loss": 0.9729, "step": 3593 }, { "epoch": 0.376138147566719, "grad_norm": 2.254132484905743, "learning_rate": 1.4347997243193497e-05, "loss": 0.9553, "step": 3594 }, { "epoch": 0.3762428048142334, "grad_norm": 2.2575494533039846, "learning_rate": 1.434494445831351e-05, "loss": 0.9477, "step": 3595 }, { "epoch": 0.3763474620617478, "grad_norm": 2.411830867087975, "learning_rate": 1.4341891174190594e-05, "loss": 0.8515, "step": 3596 }, { "epoch": 0.37645211930926215, "grad_norm": 2.0691126406810563, "learning_rate": 1.4338837391175582e-05, "loss": 0.9937, "step": 3597 }, { "epoch": 0.37655677655677655, "grad_norm": 1.8560655060436033, "learning_rate": 1.4335783109619356e-05, "loss": 0.9589, "step": 3598 }, { "epoch": 0.37666143380429096, "grad_norm": 1.9064645496752881, "learning_rate": 1.4332728329872867e-05, "loss": 1.0487, "step": 3599 }, { "epoch": 0.37676609105180536, "grad_norm": 2.4222992304653923, "learning_rate": 1.4329673052287105e-05, "loss": 0.9782, "step": 3600 }, { "epoch": 0.3768707482993197, "grad_norm": 2.0131408426702126, "learning_rate": 1.432661727721313e-05, "loss": 1.0806, "step": 3601 }, { "epoch": 0.3769754055468341, "grad_norm": 2.0235190034405948, "learning_rate": 1.4323561005002064e-05, "loss": 0.8947, "step": 3602 }, { "epoch": 0.3770800627943485, "grad_norm": 2.1458509322659545, "learning_rate": 1.432050423600507e-05, "loss": 1.0682, "step": 3603 }, { "epoch": 0.3771847200418629, "grad_norm": 2.266595492288038, "learning_rate": 1.4317446970573386e-05, "loss": 0.9339, "step": 3604 }, { "epoch": 0.3772893772893773, "grad_norm": 1.7601596273055828, "learning_rate": 1.4314389209058287e-05, "loss": 0.7858, "step": 3605 }, { "epoch": 0.3773940345368917, "grad_norm": 1.9387994291788024, "learning_rate": 1.4311330951811125e-05, "loss": 0.9226, "step": 3606 }, { "epoch": 0.3774986917844061, "grad_norm": 1.8920940023031685, "learning_rate": 1.4308272199183296e-05, "loss": 0.9885, "step": 3607 }, { "epoch": 0.37760334903192044, "grad_norm": 2.058831017637809, "learning_rate": 1.4305212951526255e-05, "loss": 0.8887, "step": 3608 }, { "epoch": 0.37770800627943485, "grad_norm": 2.025314142101822, "learning_rate": 1.4302153209191518e-05, "loss": 0.9362, "step": 3609 }, { "epoch": 0.37781266352694926, "grad_norm": 2.202887178692797, "learning_rate": 1.4299092972530656e-05, "loss": 0.9994, "step": 3610 }, { "epoch": 0.3779173207744636, "grad_norm": 2.1307137717760694, "learning_rate": 1.42960322418953e-05, "loss": 1.0086, "step": 3611 }, { "epoch": 0.378021978021978, "grad_norm": 2.23112028651418, "learning_rate": 1.4292971017637131e-05, "loss": 1.0339, "step": 3612 }, { "epoch": 0.3781266352694924, "grad_norm": 1.826253404688636, "learning_rate": 1.4289909300107885e-05, "loss": 0.8473, "step": 3613 }, { "epoch": 0.3782312925170068, "grad_norm": 2.428885841856068, "learning_rate": 1.4286847089659368e-05, "loss": 1.001, "step": 3614 }, { "epoch": 0.3783359497645212, "grad_norm": 1.8607496884224413, "learning_rate": 1.4283784386643427e-05, "loss": 1.0102, "step": 3615 }, { "epoch": 0.3784406070120356, "grad_norm": 2.101434244434794, "learning_rate": 1.4280721191411976e-05, "loss": 1.0508, "step": 3616 }, { "epoch": 0.37854526425955, "grad_norm": 1.9702696695086308, "learning_rate": 1.4277657504316984e-05, "loss": 0.9761, "step": 3617 }, { "epoch": 0.3786499215070644, "grad_norm": 2.189847545110545, "learning_rate": 1.427459332571047e-05, "loss": 0.9961, "step": 3618 }, { "epoch": 0.37875457875457874, "grad_norm": 2.30294474388609, "learning_rate": 1.4271528655944522e-05, "loss": 0.9599, "step": 3619 }, { "epoch": 0.37885923600209315, "grad_norm": 1.8696799148064482, "learning_rate": 1.4268463495371267e-05, "loss": 1.0181, "step": 3620 }, { "epoch": 0.37896389324960755, "grad_norm": 2.09128819824349, "learning_rate": 1.426539784434291e-05, "loss": 1.0629, "step": 3621 }, { "epoch": 0.3790685504971219, "grad_norm": 2.0599948614085815, "learning_rate": 1.4262331703211686e-05, "loss": 0.9998, "step": 3622 }, { "epoch": 0.3791732077446363, "grad_norm": 2.05462296115126, "learning_rate": 1.4259265072329914e-05, "loss": 0.8353, "step": 3623 }, { "epoch": 0.3792778649921507, "grad_norm": 2.4818761789564814, "learning_rate": 1.425619795204995e-05, "loss": 0.885, "step": 3624 }, { "epoch": 0.3793825222396651, "grad_norm": 1.9669887610252532, "learning_rate": 1.425313034272421e-05, "loss": 1.0076, "step": 3625 }, { "epoch": 0.37948717948717947, "grad_norm": 2.4144006548804295, "learning_rate": 1.4250062244705174e-05, "loss": 1.0021, "step": 3626 }, { "epoch": 0.3795918367346939, "grad_norm": 2.298079044624755, "learning_rate": 1.4246993658345369e-05, "loss": 0.8731, "step": 3627 }, { "epoch": 0.3796964939822083, "grad_norm": 1.8240722916104797, "learning_rate": 1.4243924583997386e-05, "loss": 0.9051, "step": 3628 }, { "epoch": 0.37980115122972263, "grad_norm": 2.5476300949512387, "learning_rate": 1.4240855022013863e-05, "loss": 1.1031, "step": 3629 }, { "epoch": 0.37990580847723704, "grad_norm": 1.7527472576385803, "learning_rate": 1.4237784972747501e-05, "loss": 0.8347, "step": 3630 }, { "epoch": 0.38001046572475145, "grad_norm": 2.0657909815224618, "learning_rate": 1.4234714436551053e-05, "loss": 0.853, "step": 3631 }, { "epoch": 0.38011512297226585, "grad_norm": 2.0203292973651283, "learning_rate": 1.4231643413777333e-05, "loss": 0.9679, "step": 3632 }, { "epoch": 0.3802197802197802, "grad_norm": 2.0882649368291553, "learning_rate": 1.4228571904779209e-05, "loss": 1.0813, "step": 3633 }, { "epoch": 0.3803244374672946, "grad_norm": 2.1962421392589837, "learning_rate": 1.4225499909909597e-05, "loss": 1.0255, "step": 3634 }, { "epoch": 0.380429094714809, "grad_norm": 2.0096595971812405, "learning_rate": 1.4222427429521482e-05, "loss": 0.9536, "step": 3635 }, { "epoch": 0.38053375196232336, "grad_norm": 2.349899372814253, "learning_rate": 1.4219354463967893e-05, "loss": 0.9828, "step": 3636 }, { "epoch": 0.38063840920983777, "grad_norm": 2.15612266191841, "learning_rate": 1.4216281013601926e-05, "loss": 0.8592, "step": 3637 }, { "epoch": 0.3807430664573522, "grad_norm": 2.1616922957234164, "learning_rate": 1.4213207078776723e-05, "loss": 1.0161, "step": 3638 }, { "epoch": 0.3808477237048666, "grad_norm": 2.0271980981355813, "learning_rate": 1.4210132659845482e-05, "loss": 0.8895, "step": 3639 }, { "epoch": 0.38095238095238093, "grad_norm": 1.8793729122503329, "learning_rate": 1.4207057757161465e-05, "loss": 0.9433, "step": 3640 }, { "epoch": 0.38105703819989534, "grad_norm": 2.2584716002700422, "learning_rate": 1.4203982371077984e-05, "loss": 0.9715, "step": 3641 }, { "epoch": 0.38116169544740974, "grad_norm": 1.9923565504390046, "learning_rate": 1.4200906501948405e-05, "loss": 0.8118, "step": 3642 }, { "epoch": 0.38126635269492415, "grad_norm": 2.3266543948061065, "learning_rate": 1.4197830150126155e-05, "loss": 1.0415, "step": 3643 }, { "epoch": 0.3813710099424385, "grad_norm": 1.9354684638961517, "learning_rate": 1.4194753315964707e-05, "loss": 0.9977, "step": 3644 }, { "epoch": 0.3814756671899529, "grad_norm": 2.0521405009628513, "learning_rate": 1.4191675999817603e-05, "loss": 0.9696, "step": 3645 }, { "epoch": 0.3815803244374673, "grad_norm": 2.2085561140812775, "learning_rate": 1.4188598202038428e-05, "loss": 1.0361, "step": 3646 }, { "epoch": 0.38168498168498166, "grad_norm": 2.447907112557791, "learning_rate": 1.418551992298083e-05, "loss": 1.0262, "step": 3647 }, { "epoch": 0.38178963893249607, "grad_norm": 2.083006037799446, "learning_rate": 1.4182441162998506e-05, "loss": 0.9471, "step": 3648 }, { "epoch": 0.3818942961800105, "grad_norm": 2.2012013601209466, "learning_rate": 1.4179361922445214e-05, "loss": 1.0669, "step": 3649 }, { "epoch": 0.3819989534275249, "grad_norm": 2.356165760065059, "learning_rate": 1.4176282201674764e-05, "loss": 1.0236, "step": 3650 }, { "epoch": 0.38210361067503923, "grad_norm": 2.1910213266631553, "learning_rate": 1.4173202001041024e-05, "loss": 0.9729, "step": 3651 }, { "epoch": 0.38220826792255364, "grad_norm": 1.9619249365232363, "learning_rate": 1.4170121320897911e-05, "loss": 0.9726, "step": 3652 }, { "epoch": 0.38231292517006804, "grad_norm": 2.7128459699808483, "learning_rate": 1.416704016159941e-05, "loss": 1.0244, "step": 3653 }, { "epoch": 0.3824175824175824, "grad_norm": 1.9243530614458784, "learning_rate": 1.4163958523499543e-05, "loss": 0.8521, "step": 3654 }, { "epoch": 0.3825222396650968, "grad_norm": 2.139320327183821, "learning_rate": 1.4160876406952402e-05, "loss": 0.9358, "step": 3655 }, { "epoch": 0.3826268969126112, "grad_norm": 1.8395056038544089, "learning_rate": 1.4157793812312126e-05, "loss": 0.9061, "step": 3656 }, { "epoch": 0.3827315541601256, "grad_norm": 2.094018606545468, "learning_rate": 1.4154710739932913e-05, "loss": 0.9147, "step": 3657 }, { "epoch": 0.38283621140763996, "grad_norm": 2.0930445989438553, "learning_rate": 1.4151627190169013e-05, "loss": 0.9932, "step": 3658 }, { "epoch": 0.38294086865515437, "grad_norm": 2.143172865705711, "learning_rate": 1.4148543163374733e-05, "loss": 0.9458, "step": 3659 }, { "epoch": 0.38304552590266877, "grad_norm": 2.2091982738545033, "learning_rate": 1.4145458659904433e-05, "loss": 0.9759, "step": 3660 }, { "epoch": 0.3831501831501832, "grad_norm": 1.9567813439206125, "learning_rate": 1.4142373680112528e-05, "loss": 0.9427, "step": 3661 }, { "epoch": 0.3832548403976975, "grad_norm": 2.041638652930433, "learning_rate": 1.4139288224353494e-05, "loss": 0.9669, "step": 3662 }, { "epoch": 0.38335949764521193, "grad_norm": 2.194450474916531, "learning_rate": 1.4136202292981848e-05, "loss": 0.8689, "step": 3663 }, { "epoch": 0.38346415489272634, "grad_norm": 2.048372715974301, "learning_rate": 1.4133115886352176e-05, "loss": 0.8711, "step": 3664 }, { "epoch": 0.3835688121402407, "grad_norm": 2.081252667466177, "learning_rate": 1.4130029004819109e-05, "loss": 0.9163, "step": 3665 }, { "epoch": 0.3836734693877551, "grad_norm": 2.063327599869014, "learning_rate": 1.4126941648737334e-05, "loss": 0.9385, "step": 3666 }, { "epoch": 0.3837781266352695, "grad_norm": 2.2359367310037133, "learning_rate": 1.4123853818461601e-05, "loss": 1.1035, "step": 3667 }, { "epoch": 0.3838827838827839, "grad_norm": 1.7634923657138273, "learning_rate": 1.4120765514346703e-05, "loss": 0.8968, "step": 3668 }, { "epoch": 0.38398744113029826, "grad_norm": 2.141693015056442, "learning_rate": 1.4117676736747494e-05, "loss": 0.921, "step": 3669 }, { "epoch": 0.38409209837781266, "grad_norm": 2.1476056016940035, "learning_rate": 1.411458748601888e-05, "loss": 1.0029, "step": 3670 }, { "epoch": 0.38419675562532707, "grad_norm": 1.9647392959113228, "learning_rate": 1.4111497762515822e-05, "loss": 0.9642, "step": 3671 }, { "epoch": 0.3843014128728414, "grad_norm": 2.5121210210965006, "learning_rate": 1.410840756659334e-05, "loss": 1.1309, "step": 3672 }, { "epoch": 0.3844060701203558, "grad_norm": 1.9954842133931197, "learning_rate": 1.4105316898606499e-05, "loss": 0.9308, "step": 3673 }, { "epoch": 0.38451072736787023, "grad_norm": 2.1044462151613135, "learning_rate": 1.410222575891042e-05, "loss": 0.8752, "step": 3674 }, { "epoch": 0.38461538461538464, "grad_norm": 2.0953131458808962, "learning_rate": 1.4099134147860287e-05, "loss": 0.6864, "step": 3675 }, { "epoch": 0.384720041862899, "grad_norm": 2.0201216369124335, "learning_rate": 1.4096042065811333e-05, "loss": 1.0058, "step": 3676 }, { "epoch": 0.3848246991104134, "grad_norm": 2.1104902948779034, "learning_rate": 1.409294951311884e-05, "loss": 0.9358, "step": 3677 }, { "epoch": 0.3849293563579278, "grad_norm": 2.0782997552273783, "learning_rate": 1.408985649013815e-05, "loss": 1.0495, "step": 3678 }, { "epoch": 0.38503401360544215, "grad_norm": 2.2354543394222226, "learning_rate": 1.4086762997224658e-05, "loss": 1.028, "step": 3679 }, { "epoch": 0.38513867085295656, "grad_norm": 2.344608781601967, "learning_rate": 1.4083669034733816e-05, "loss": 1.1165, "step": 3680 }, { "epoch": 0.38524332810047096, "grad_norm": 2.093693999681073, "learning_rate": 1.4080574603021121e-05, "loss": 0.9505, "step": 3681 }, { "epoch": 0.38534798534798537, "grad_norm": 1.9846470070251416, "learning_rate": 1.4077479702442132e-05, "loss": 0.9975, "step": 3682 }, { "epoch": 0.3854526425954997, "grad_norm": 1.9333444382455947, "learning_rate": 1.407438433335246e-05, "loss": 0.8526, "step": 3683 }, { "epoch": 0.3855572998430141, "grad_norm": 2.020798413551147, "learning_rate": 1.4071288496107769e-05, "loss": 0.8515, "step": 3684 }, { "epoch": 0.38566195709052853, "grad_norm": 2.008663061169858, "learning_rate": 1.4068192191063777e-05, "loss": 0.9006, "step": 3685 }, { "epoch": 0.38576661433804293, "grad_norm": 2.117370458308192, "learning_rate": 1.4065095418576254e-05, "loss": 1.0621, "step": 3686 }, { "epoch": 0.3858712715855573, "grad_norm": 2.4013394801880836, "learning_rate": 1.4061998179001025e-05, "loss": 1.0932, "step": 3687 }, { "epoch": 0.3859759288330717, "grad_norm": 2.1143080149970603, "learning_rate": 1.4058900472693973e-05, "loss": 0.8957, "step": 3688 }, { "epoch": 0.3860805860805861, "grad_norm": 2.075156804250162, "learning_rate": 1.4055802300011027e-05, "loss": 1.0239, "step": 3689 }, { "epoch": 0.38618524332810045, "grad_norm": 2.8275247517173216, "learning_rate": 1.4052703661308175e-05, "loss": 1.0158, "step": 3690 }, { "epoch": 0.38628990057561485, "grad_norm": 1.7962929732068007, "learning_rate": 1.4049604556941457e-05, "loss": 0.924, "step": 3691 }, { "epoch": 0.38639455782312926, "grad_norm": 2.01783824353475, "learning_rate": 1.4046504987266964e-05, "loss": 0.9401, "step": 3692 }, { "epoch": 0.38649921507064366, "grad_norm": 2.2187966441047435, "learning_rate": 1.4043404952640848e-05, "loss": 1.0663, "step": 3693 }, { "epoch": 0.386603872318158, "grad_norm": 1.999279925714374, "learning_rate": 1.4040304453419306e-05, "loss": 0.9407, "step": 3694 }, { "epoch": 0.3867085295656724, "grad_norm": 2.130348742018532, "learning_rate": 1.4037203489958593e-05, "loss": 0.9873, "step": 3695 }, { "epoch": 0.3868131868131868, "grad_norm": 2.325652550191666, "learning_rate": 1.4034102062615009e-05, "loss": 0.8994, "step": 3696 }, { "epoch": 0.3869178440607012, "grad_norm": 2.3197905613136345, "learning_rate": 1.4031000171744924e-05, "loss": 0.9815, "step": 3697 }, { "epoch": 0.3870225013082156, "grad_norm": 2.2641945333562385, "learning_rate": 1.402789781770475e-05, "loss": 1.0339, "step": 3698 }, { "epoch": 0.38712715855573, "grad_norm": 2.3574842229562614, "learning_rate": 1.4024795000850946e-05, "loss": 0.9534, "step": 3699 }, { "epoch": 0.3872318158032444, "grad_norm": 2.0918637615880566, "learning_rate": 1.4021691721540038e-05, "loss": 0.9808, "step": 3700 }, { "epoch": 0.38733647305075875, "grad_norm": 2.075992424556234, "learning_rate": 1.4018587980128602e-05, "loss": 0.8702, "step": 3701 }, { "epoch": 0.38744113029827315, "grad_norm": 1.8782927333898265, "learning_rate": 1.401548377697326e-05, "loss": 1.0075, "step": 3702 }, { "epoch": 0.38754578754578756, "grad_norm": 2.083887164652835, "learning_rate": 1.4012379112430692e-05, "loss": 1.0557, "step": 3703 }, { "epoch": 0.38765044479330196, "grad_norm": 2.389475853756546, "learning_rate": 1.4009273986857625e-05, "loss": 0.9441, "step": 3704 }, { "epoch": 0.3877551020408163, "grad_norm": 2.1864008731163795, "learning_rate": 1.4006168400610853e-05, "loss": 0.9095, "step": 3705 }, { "epoch": 0.3878597592883307, "grad_norm": 2.1399945231613624, "learning_rate": 1.4003062354047211e-05, "loss": 1.0264, "step": 3706 }, { "epoch": 0.3879644165358451, "grad_norm": 2.039186746148691, "learning_rate": 1.3999955847523588e-05, "loss": 1.0078, "step": 3707 }, { "epoch": 0.3880690737833595, "grad_norm": 1.7528376320399341, "learning_rate": 1.3996848881396932e-05, "loss": 0.7449, "step": 3708 }, { "epoch": 0.3881737310308739, "grad_norm": 2.4937767704582265, "learning_rate": 1.3993741456024233e-05, "loss": 0.9109, "step": 3709 }, { "epoch": 0.3882783882783883, "grad_norm": 2.129798206261498, "learning_rate": 1.3990633571762547e-05, "loss": 1.1029, "step": 3710 }, { "epoch": 0.3883830455259027, "grad_norm": 1.9957488140578195, "learning_rate": 1.3987525228968972e-05, "loss": 1.018, "step": 3711 }, { "epoch": 0.38848770277341704, "grad_norm": 2.1354640527218947, "learning_rate": 1.398441642800067e-05, "loss": 0.8997, "step": 3712 }, { "epoch": 0.38859236002093145, "grad_norm": 1.964424076142884, "learning_rate": 1.3981307169214837e-05, "loss": 0.9455, "step": 3713 }, { "epoch": 0.38869701726844585, "grad_norm": 2.221101115356642, "learning_rate": 1.397819745296874e-05, "loss": 1.0268, "step": 3714 }, { "epoch": 0.3888016745159602, "grad_norm": 2.3833418030736553, "learning_rate": 1.3975087279619695e-05, "loss": 1.04, "step": 3715 }, { "epoch": 0.3889063317634746, "grad_norm": 1.9463675227198336, "learning_rate": 1.397197664952506e-05, "loss": 0.9108, "step": 3716 }, { "epoch": 0.389010989010989, "grad_norm": 2.104522166089339, "learning_rate": 1.3968865563042255e-05, "loss": 1.0029, "step": 3717 }, { "epoch": 0.3891156462585034, "grad_norm": 2.2293325863509623, "learning_rate": 1.3965754020528755e-05, "loss": 1.0006, "step": 3718 }, { "epoch": 0.3892203035060178, "grad_norm": 2.225977481709003, "learning_rate": 1.3962642022342075e-05, "loss": 0.9976, "step": 3719 }, { "epoch": 0.3893249607535322, "grad_norm": 2.1371606373643983, "learning_rate": 1.3959529568839796e-05, "loss": 0.9584, "step": 3720 }, { "epoch": 0.3894296180010466, "grad_norm": 2.1984571676095217, "learning_rate": 1.3956416660379542e-05, "loss": 0.9966, "step": 3721 }, { "epoch": 0.389534275248561, "grad_norm": 2.3612095316369994, "learning_rate": 1.3953303297318992e-05, "loss": 1.0549, "step": 3722 }, { "epoch": 0.38963893249607534, "grad_norm": 2.002770628808304, "learning_rate": 1.3950189480015879e-05, "loss": 0.9726, "step": 3723 }, { "epoch": 0.38974358974358975, "grad_norm": 2.2409687498155817, "learning_rate": 1.3947075208827986e-05, "loss": 1.051, "step": 3724 }, { "epoch": 0.38984824699110415, "grad_norm": 2.4369275610874417, "learning_rate": 1.3943960484113154e-05, "loss": 1.0222, "step": 3725 }, { "epoch": 0.3899529042386185, "grad_norm": 2.001381883429874, "learning_rate": 1.3940845306229263e-05, "loss": 0.8523, "step": 3726 }, { "epoch": 0.3900575614861329, "grad_norm": 2.379959624189031, "learning_rate": 1.3937729675534259e-05, "loss": 1.0068, "step": 3727 }, { "epoch": 0.3901622187336473, "grad_norm": 2.065993568806048, "learning_rate": 1.3934613592386133e-05, "loss": 0.9928, "step": 3728 }, { "epoch": 0.3902668759811617, "grad_norm": 2.14687057918789, "learning_rate": 1.393149705714293e-05, "loss": 0.9085, "step": 3729 }, { "epoch": 0.39037153322867607, "grad_norm": 2.056457087459079, "learning_rate": 1.3928380070162743e-05, "loss": 1.031, "step": 3730 }, { "epoch": 0.3904761904761905, "grad_norm": 2.3376645905376447, "learning_rate": 1.3925262631803722e-05, "loss": 1.0316, "step": 3731 }, { "epoch": 0.3905808477237049, "grad_norm": 2.345112786800701, "learning_rate": 1.392214474242407e-05, "loss": 1.0802, "step": 3732 }, { "epoch": 0.39068550497121923, "grad_norm": 2.0352455265227443, "learning_rate": 1.3919026402382034e-05, "loss": 0.9909, "step": 3733 }, { "epoch": 0.39079016221873364, "grad_norm": 2.0459097619442863, "learning_rate": 1.3915907612035922e-05, "loss": 0.9397, "step": 3734 }, { "epoch": 0.39089481946624804, "grad_norm": 1.954972542902261, "learning_rate": 1.3912788371744084e-05, "loss": 0.825, "step": 3735 }, { "epoch": 0.39099947671376245, "grad_norm": 2.1145543797342983, "learning_rate": 1.3909668681864932e-05, "loss": 1.0745, "step": 3736 }, { "epoch": 0.3911041339612768, "grad_norm": 1.9636359750174326, "learning_rate": 1.3906548542756921e-05, "loss": 0.9003, "step": 3737 }, { "epoch": 0.3912087912087912, "grad_norm": 2.405810011459009, "learning_rate": 1.3903427954778566e-05, "loss": 1.0238, "step": 3738 }, { "epoch": 0.3913134484563056, "grad_norm": 2.0934967739240813, "learning_rate": 1.3900306918288423e-05, "loss": 1.004, "step": 3739 }, { "epoch": 0.39141810570381996, "grad_norm": 2.017817533341966, "learning_rate": 1.3897185433645106e-05, "loss": 0.9562, "step": 3740 }, { "epoch": 0.39152276295133437, "grad_norm": 2.0048073269684865, "learning_rate": 1.389406350120729e-05, "loss": 0.9614, "step": 3741 }, { "epoch": 0.3916274201988488, "grad_norm": 2.1093352540533985, "learning_rate": 1.3890941121333677e-05, "loss": 0.9876, "step": 3742 }, { "epoch": 0.3917320774463632, "grad_norm": 2.221682557807781, "learning_rate": 1.3887818294383042e-05, "loss": 0.9904, "step": 3743 }, { "epoch": 0.39183673469387753, "grad_norm": 1.9422817754452815, "learning_rate": 1.3884695020714207e-05, "loss": 0.8561, "step": 3744 }, { "epoch": 0.39194139194139194, "grad_norm": 2.080159971701426, "learning_rate": 1.3881571300686037e-05, "loss": 0.9193, "step": 3745 }, { "epoch": 0.39204604918890634, "grad_norm": 2.1895432862916078, "learning_rate": 1.3878447134657455e-05, "loss": 0.9276, "step": 3746 }, { "epoch": 0.39215070643642075, "grad_norm": 2.154912897139779, "learning_rate": 1.3875322522987435e-05, "loss": 0.9417, "step": 3747 }, { "epoch": 0.3922553636839351, "grad_norm": 2.375072450587214, "learning_rate": 1.3872197466035003e-05, "loss": 1.0635, "step": 3748 }, { "epoch": 0.3923600209314495, "grad_norm": 2.029749571414169, "learning_rate": 1.3869071964159231e-05, "loss": 0.9101, "step": 3749 }, { "epoch": 0.3924646781789639, "grad_norm": 2.298693925354746, "learning_rate": 1.3865946017719248e-05, "loss": 1.0388, "step": 3750 }, { "epoch": 0.39256933542647826, "grad_norm": 2.32567539400059, "learning_rate": 1.3862819627074231e-05, "loss": 0.9207, "step": 3751 }, { "epoch": 0.39267399267399267, "grad_norm": 1.8738585362137903, "learning_rate": 1.3859692792583403e-05, "loss": 0.9748, "step": 3752 }, { "epoch": 0.3927786499215071, "grad_norm": 1.903263299759873, "learning_rate": 1.3856565514606053e-05, "loss": 1.0709, "step": 3753 }, { "epoch": 0.3928833071690215, "grad_norm": 2.1106708790964377, "learning_rate": 1.3853437793501507e-05, "loss": 1.0031, "step": 3754 }, { "epoch": 0.39298796441653583, "grad_norm": 2.0628055794677493, "learning_rate": 1.3850309629629146e-05, "loss": 0.998, "step": 3755 }, { "epoch": 0.39309262166405023, "grad_norm": 1.8840425280715498, "learning_rate": 1.3847181023348404e-05, "loss": 0.9089, "step": 3756 }, { "epoch": 0.39319727891156464, "grad_norm": 2.2547660697868332, "learning_rate": 1.3844051975018761e-05, "loss": 0.9776, "step": 3757 }, { "epoch": 0.393301936159079, "grad_norm": 1.9372101963139208, "learning_rate": 1.3840922484999758e-05, "loss": 0.9781, "step": 3758 }, { "epoch": 0.3934065934065934, "grad_norm": 2.3442712849451657, "learning_rate": 1.383779255365097e-05, "loss": 0.9706, "step": 3759 }, { "epoch": 0.3935112506541078, "grad_norm": 1.9728314083799356, "learning_rate": 1.3834662181332043e-05, "loss": 0.8493, "step": 3760 }, { "epoch": 0.3936159079016222, "grad_norm": 1.8671773883426177, "learning_rate": 1.3831531368402653e-05, "loss": 0.8621, "step": 3761 }, { "epoch": 0.39372056514913656, "grad_norm": 2.0851610975093, "learning_rate": 1.3828400115222542e-05, "loss": 0.9945, "step": 3762 }, { "epoch": 0.39382522239665096, "grad_norm": 2.0950765767067296, "learning_rate": 1.38252684221515e-05, "loss": 0.9923, "step": 3763 }, { "epoch": 0.39392987964416537, "grad_norm": 2.0426883514777616, "learning_rate": 1.382213628954936e-05, "loss": 1.0127, "step": 3764 }, { "epoch": 0.3940345368916798, "grad_norm": 2.1796168252887713, "learning_rate": 1.381900371777601e-05, "loss": 1.0253, "step": 3765 }, { "epoch": 0.3941391941391941, "grad_norm": 2.056669859459506, "learning_rate": 1.3815870707191393e-05, "loss": 1.0314, "step": 3766 }, { "epoch": 0.39424385138670853, "grad_norm": 2.058977648719983, "learning_rate": 1.3812737258155495e-05, "loss": 0.8932, "step": 3767 }, { "epoch": 0.39434850863422294, "grad_norm": 2.3166421400138177, "learning_rate": 1.380960337102836e-05, "loss": 0.964, "step": 3768 }, { "epoch": 0.3944531658817373, "grad_norm": 1.8919186197649047, "learning_rate": 1.380646904617007e-05, "loss": 0.7797, "step": 3769 }, { "epoch": 0.3945578231292517, "grad_norm": 2.0859503146939256, "learning_rate": 1.3803334283940772e-05, "loss": 0.9242, "step": 3770 }, { "epoch": 0.3946624803767661, "grad_norm": 1.9003991213190858, "learning_rate": 1.3800199084700655e-05, "loss": 0.9279, "step": 3771 }, { "epoch": 0.3947671376242805, "grad_norm": 1.9199813086679065, "learning_rate": 1.3797063448809959e-05, "loss": 0.8576, "step": 3772 }, { "epoch": 0.39487179487179486, "grad_norm": 1.984278812039716, "learning_rate": 1.3793927376628977e-05, "loss": 0.9722, "step": 3773 }, { "epoch": 0.39497645211930926, "grad_norm": 2.1654990774470426, "learning_rate": 1.3790790868518044e-05, "loss": 0.8915, "step": 3774 }, { "epoch": 0.39508110936682367, "grad_norm": 2.0693477910040152, "learning_rate": 1.378765392483756e-05, "loss": 1.0609, "step": 3775 }, { "epoch": 0.395185766614338, "grad_norm": 2.5214519932464263, "learning_rate": 1.378451654594796e-05, "loss": 0.8346, "step": 3776 }, { "epoch": 0.3952904238618524, "grad_norm": 2.273499618027654, "learning_rate": 1.3781378732209738e-05, "loss": 0.971, "step": 3777 }, { "epoch": 0.39539508110936683, "grad_norm": 2.0346748171851594, "learning_rate": 1.3778240483983432e-05, "loss": 0.921, "step": 3778 }, { "epoch": 0.39549973835688124, "grad_norm": 2.4181856266842705, "learning_rate": 1.3775101801629636e-05, "loss": 0.9929, "step": 3779 }, { "epoch": 0.3956043956043956, "grad_norm": 1.8889123693724994, "learning_rate": 1.3771962685508991e-05, "loss": 0.9226, "step": 3780 }, { "epoch": 0.39570905285191, "grad_norm": 1.8892857747949212, "learning_rate": 1.3768823135982185e-05, "loss": 0.9675, "step": 3781 }, { "epoch": 0.3958137100994244, "grad_norm": 2.421740842709229, "learning_rate": 1.3765683153409963e-05, "loss": 0.9961, "step": 3782 }, { "epoch": 0.39591836734693875, "grad_norm": 2.19282905248247, "learning_rate": 1.376254273815311e-05, "loss": 1.045, "step": 3783 }, { "epoch": 0.39602302459445315, "grad_norm": 2.6404415016056695, "learning_rate": 1.375940189057247e-05, "loss": 1.0346, "step": 3784 }, { "epoch": 0.39612768184196756, "grad_norm": 2.087172492105292, "learning_rate": 1.3756260611028932e-05, "loss": 1.0005, "step": 3785 }, { "epoch": 0.39623233908948197, "grad_norm": 1.8576562979717508, "learning_rate": 1.3753118899883436e-05, "loss": 0.8769, "step": 3786 }, { "epoch": 0.3963369963369963, "grad_norm": 1.7251243282419155, "learning_rate": 1.3749976757496969e-05, "loss": 0.9917, "step": 3787 }, { "epoch": 0.3964416535845107, "grad_norm": 2.3431283462971244, "learning_rate": 1.3746834184230569e-05, "loss": 0.9063, "step": 3788 }, { "epoch": 0.39654631083202513, "grad_norm": 2.100375176753719, "learning_rate": 1.3743691180445328e-05, "loss": 1.056, "step": 3789 }, { "epoch": 0.39665096807953953, "grad_norm": 1.883781822130877, "learning_rate": 1.374054774650238e-05, "loss": 0.9573, "step": 3790 }, { "epoch": 0.3967556253270539, "grad_norm": 1.9710349136351437, "learning_rate": 1.3737403882762914e-05, "loss": 0.9161, "step": 3791 }, { "epoch": 0.3968602825745683, "grad_norm": 2.0619488055452067, "learning_rate": 1.3734259589588165e-05, "loss": 0.8067, "step": 3792 }, { "epoch": 0.3969649398220827, "grad_norm": 2.1265304547515607, "learning_rate": 1.3731114867339418e-05, "loss": 1.1066, "step": 3793 }, { "epoch": 0.39706959706959705, "grad_norm": 2.1845388692035135, "learning_rate": 1.3727969716378012e-05, "loss": 1.0085, "step": 3794 }, { "epoch": 0.39717425431711145, "grad_norm": 2.025263339801749, "learning_rate": 1.3724824137065325e-05, "loss": 0.9922, "step": 3795 }, { "epoch": 0.39727891156462586, "grad_norm": 1.9103665780889987, "learning_rate": 1.3721678129762792e-05, "loss": 1.0307, "step": 3796 }, { "epoch": 0.39738356881214026, "grad_norm": 2.420546254970621, "learning_rate": 1.3718531694831903e-05, "loss": 0.9924, "step": 3797 }, { "epoch": 0.3974882260596546, "grad_norm": 1.9578789743491487, "learning_rate": 1.371538483263418e-05, "loss": 0.9418, "step": 3798 }, { "epoch": 0.397592883307169, "grad_norm": 2.2999551242197107, "learning_rate": 1.3712237543531208e-05, "loss": 1.0449, "step": 3799 }, { "epoch": 0.3976975405546834, "grad_norm": 1.9021993595587816, "learning_rate": 1.3709089827884616e-05, "loss": 0.8744, "step": 3800 }, { "epoch": 0.3978021978021978, "grad_norm": 2.0695970711361444, "learning_rate": 1.3705941686056086e-05, "loss": 1.0607, "step": 3801 }, { "epoch": 0.3979068550497122, "grad_norm": 1.8591241528827835, "learning_rate": 1.3702793118407345e-05, "loss": 0.9284, "step": 3802 }, { "epoch": 0.3980115122972266, "grad_norm": 1.87633082654073, "learning_rate": 1.3699644125300165e-05, "loss": 0.9277, "step": 3803 }, { "epoch": 0.398116169544741, "grad_norm": 2.1413866117330675, "learning_rate": 1.3696494707096374e-05, "loss": 0.9777, "step": 3804 }, { "epoch": 0.39822082679225534, "grad_norm": 2.029453889878876, "learning_rate": 1.369334486415785e-05, "loss": 0.9102, "step": 3805 }, { "epoch": 0.39832548403976975, "grad_norm": 1.914840282215036, "learning_rate": 1.3690194596846516e-05, "loss": 0.8982, "step": 3806 }, { "epoch": 0.39843014128728416, "grad_norm": 2.2156107459198267, "learning_rate": 1.3687043905524337e-05, "loss": 0.9374, "step": 3807 }, { "epoch": 0.39853479853479856, "grad_norm": 2.353831603694141, "learning_rate": 1.3683892790553344e-05, "loss": 1.0299, "step": 3808 }, { "epoch": 0.3986394557823129, "grad_norm": 2.2201207812404333, "learning_rate": 1.3680741252295597e-05, "loss": 0.9126, "step": 3809 }, { "epoch": 0.3987441130298273, "grad_norm": 2.0962471812380006, "learning_rate": 1.367758929111322e-05, "loss": 0.9077, "step": 3810 }, { "epoch": 0.3988487702773417, "grad_norm": 2.21964726775872, "learning_rate": 1.3674436907368377e-05, "loss": 1.1087, "step": 3811 }, { "epoch": 0.3989534275248561, "grad_norm": 2.5819871421432286, "learning_rate": 1.3671284101423288e-05, "loss": 0.9415, "step": 3812 }, { "epoch": 0.3990580847723705, "grad_norm": 1.7198628116479615, "learning_rate": 1.3668130873640211e-05, "loss": 0.8898, "step": 3813 }, { "epoch": 0.3991627420198849, "grad_norm": 1.8597498193101791, "learning_rate": 1.3664977224381461e-05, "loss": 0.8606, "step": 3814 }, { "epoch": 0.3992673992673993, "grad_norm": 2.6740292842251234, "learning_rate": 1.3661823154009397e-05, "loss": 1.0387, "step": 3815 }, { "epoch": 0.39937205651491364, "grad_norm": 2.3003617051512735, "learning_rate": 1.3658668662886432e-05, "loss": 1.1021, "step": 3816 }, { "epoch": 0.39947671376242805, "grad_norm": 2.3043271725583963, "learning_rate": 1.3655513751375017e-05, "loss": 0.9864, "step": 3817 }, { "epoch": 0.39958137100994245, "grad_norm": 1.741403290306905, "learning_rate": 1.3652358419837664e-05, "loss": 0.9179, "step": 3818 }, { "epoch": 0.3996860282574568, "grad_norm": 2.143710219932102, "learning_rate": 1.3649202668636923e-05, "loss": 0.8958, "step": 3819 }, { "epoch": 0.3997906855049712, "grad_norm": 2.029828508661703, "learning_rate": 1.36460464981354e-05, "loss": 0.9657, "step": 3820 }, { "epoch": 0.3998953427524856, "grad_norm": 1.9833644803425685, "learning_rate": 1.3642889908695742e-05, "loss": 0.9026, "step": 3821 }, { "epoch": 0.4, "grad_norm": 2.210053513135352, "learning_rate": 1.3639732900680646e-05, "loss": 1.0516, "step": 3822 }, { "epoch": 0.4001046572475144, "grad_norm": 2.3239837565552985, "learning_rate": 1.3636575474452865e-05, "loss": 0.9058, "step": 3823 }, { "epoch": 0.4002093144950288, "grad_norm": 2.0848417519232454, "learning_rate": 1.3633417630375188e-05, "loss": 0.9769, "step": 3824 }, { "epoch": 0.4003139717425432, "grad_norm": 1.953543514114223, "learning_rate": 1.3630259368810461e-05, "loss": 0.9432, "step": 3825 }, { "epoch": 0.40041862899005753, "grad_norm": 2.1198900948291928, "learning_rate": 1.3627100690121571e-05, "loss": 0.9108, "step": 3826 }, { "epoch": 0.40052328623757194, "grad_norm": 2.1614969325672377, "learning_rate": 1.362394159467146e-05, "loss": 0.9297, "step": 3827 }, { "epoch": 0.40062794348508635, "grad_norm": 2.2955139319444418, "learning_rate": 1.3620782082823115e-05, "loss": 0.9899, "step": 3828 }, { "epoch": 0.40073260073260075, "grad_norm": 2.2734494900620947, "learning_rate": 1.3617622154939565e-05, "loss": 0.8636, "step": 3829 }, { "epoch": 0.4008372579801151, "grad_norm": 2.3826089612311607, "learning_rate": 1.3614461811383897e-05, "loss": 0.9787, "step": 3830 }, { "epoch": 0.4009419152276295, "grad_norm": 2.233808465500224, "learning_rate": 1.3611301052519242e-05, "loss": 1.1001, "step": 3831 }, { "epoch": 0.4010465724751439, "grad_norm": 2.4947957206362976, "learning_rate": 1.360813987870877e-05, "loss": 1.0562, "step": 3832 }, { "epoch": 0.4011512297226583, "grad_norm": 2.1160921112573807, "learning_rate": 1.3604978290315717e-05, "loss": 0.9986, "step": 3833 }, { "epoch": 0.40125588697017267, "grad_norm": 2.310532165642057, "learning_rate": 1.360181628770335e-05, "loss": 0.9502, "step": 3834 }, { "epoch": 0.4013605442176871, "grad_norm": 2.3190655097380106, "learning_rate": 1.3598653871234986e-05, "loss": 1.0915, "step": 3835 }, { "epoch": 0.4014652014652015, "grad_norm": 2.108855272952508, "learning_rate": 1.3595491041273999e-05, "loss": 0.8137, "step": 3836 }, { "epoch": 0.40156985871271583, "grad_norm": 2.0659350062129263, "learning_rate": 1.3592327798183802e-05, "loss": 1.0533, "step": 3837 }, { "epoch": 0.40167451596023024, "grad_norm": 1.9843597273847278, "learning_rate": 1.3589164142327863e-05, "loss": 0.9426, "step": 3838 }, { "epoch": 0.40177917320774464, "grad_norm": 2.074638091286358, "learning_rate": 1.3586000074069679e-05, "loss": 1.0032, "step": 3839 }, { "epoch": 0.40188383045525905, "grad_norm": 1.8783659326755497, "learning_rate": 1.3582835593772822e-05, "loss": 0.9629, "step": 3840 }, { "epoch": 0.4019884877027734, "grad_norm": 2.1806512008747188, "learning_rate": 1.3579670701800893e-05, "loss": 1.0359, "step": 3841 }, { "epoch": 0.4020931449502878, "grad_norm": 1.8780201218056713, "learning_rate": 1.3576505398517546e-05, "loss": 0.8616, "step": 3842 }, { "epoch": 0.4021978021978022, "grad_norm": 2.16324024690094, "learning_rate": 1.3573339684286472e-05, "loss": 0.9952, "step": 3843 }, { "epoch": 0.40230245944531656, "grad_norm": 2.07630913117498, "learning_rate": 1.3570173559471427e-05, "loss": 1.0358, "step": 3844 }, { "epoch": 0.40240711669283097, "grad_norm": 2.062316520805037, "learning_rate": 1.3567007024436206e-05, "loss": 0.9196, "step": 3845 }, { "epoch": 0.4025117739403454, "grad_norm": 2.252816836018333, "learning_rate": 1.3563840079544642e-05, "loss": 0.9085, "step": 3846 }, { "epoch": 0.4026164311878598, "grad_norm": 2.0407067151488087, "learning_rate": 1.3560672725160631e-05, "loss": 1.0633, "step": 3847 }, { "epoch": 0.40272108843537413, "grad_norm": 2.1656996102091126, "learning_rate": 1.3557504961648102e-05, "loss": 1.046, "step": 3848 }, { "epoch": 0.40282574568288854, "grad_norm": 1.904183686696131, "learning_rate": 1.3554336789371046e-05, "loss": 0.9816, "step": 3849 }, { "epoch": 0.40293040293040294, "grad_norm": 2.0311197336923623, "learning_rate": 1.3551168208693486e-05, "loss": 0.9372, "step": 3850 }, { "epoch": 0.40303506017791735, "grad_norm": 2.1048762647281403, "learning_rate": 1.35479992199795e-05, "loss": 1.0747, "step": 3851 }, { "epoch": 0.4031397174254317, "grad_norm": 2.219151744165179, "learning_rate": 1.3544829823593208e-05, "loss": 1.0558, "step": 3852 }, { "epoch": 0.4032443746729461, "grad_norm": 1.8701877669813198, "learning_rate": 1.3541660019898784e-05, "loss": 1.0087, "step": 3853 }, { "epoch": 0.4033490319204605, "grad_norm": 1.8767505975265026, "learning_rate": 1.3538489809260447e-05, "loss": 0.8751, "step": 3854 }, { "epoch": 0.40345368916797486, "grad_norm": 1.949016676761772, "learning_rate": 1.3535319192042455e-05, "loss": 1.0158, "step": 3855 }, { "epoch": 0.40355834641548927, "grad_norm": 1.9762439941084444, "learning_rate": 1.3532148168609123e-05, "loss": 1.0097, "step": 3856 }, { "epoch": 0.40366300366300367, "grad_norm": 2.490558941314484, "learning_rate": 1.3528976739324807e-05, "loss": 0.9381, "step": 3857 }, { "epoch": 0.4037676609105181, "grad_norm": 2.3815368861074737, "learning_rate": 1.3525804904553906e-05, "loss": 0.994, "step": 3858 }, { "epoch": 0.40387231815803243, "grad_norm": 2.224663411985367, "learning_rate": 1.3522632664660878e-05, "loss": 1.0024, "step": 3859 }, { "epoch": 0.40397697540554683, "grad_norm": 2.070837677342153, "learning_rate": 1.351946002001021e-05, "loss": 0.9638, "step": 3860 }, { "epoch": 0.40408163265306124, "grad_norm": 2.0394436878689084, "learning_rate": 1.3516286970966454e-05, "loss": 1.0819, "step": 3861 }, { "epoch": 0.4041862899005756, "grad_norm": 2.104205603022299, "learning_rate": 1.35131135178942e-05, "loss": 0.9877, "step": 3862 }, { "epoch": 0.40429094714809, "grad_norm": 2.2820283958841325, "learning_rate": 1.3509939661158078e-05, "loss": 1.0813, "step": 3863 }, { "epoch": 0.4043956043956044, "grad_norm": 1.8165693573504802, "learning_rate": 1.3506765401122774e-05, "loss": 0.9544, "step": 3864 }, { "epoch": 0.4045002616431188, "grad_norm": 1.8593322923606608, "learning_rate": 1.3503590738153015e-05, "loss": 0.8905, "step": 3865 }, { "epoch": 0.40460491889063316, "grad_norm": 1.9003454201874181, "learning_rate": 1.3500415672613578e-05, "loss": 0.9831, "step": 3866 }, { "epoch": 0.40470957613814756, "grad_norm": 2.0723713810408553, "learning_rate": 1.3497240204869287e-05, "loss": 0.9065, "step": 3867 }, { "epoch": 0.40481423338566197, "grad_norm": 2.0143809176887264, "learning_rate": 1.3494064335285004e-05, "loss": 0.9393, "step": 3868 }, { "epoch": 0.4049188906331763, "grad_norm": 2.297759622527477, "learning_rate": 1.3490888064225645e-05, "loss": 0.9157, "step": 3869 }, { "epoch": 0.4050235478806907, "grad_norm": 2.047595647603498, "learning_rate": 1.348771139205617e-05, "loss": 1.0448, "step": 3870 }, { "epoch": 0.40512820512820513, "grad_norm": 2.304679069365117, "learning_rate": 1.3484534319141592e-05, "loss": 1.0223, "step": 3871 }, { "epoch": 0.40523286237571954, "grad_norm": 2.2989950063633264, "learning_rate": 1.3481356845846951e-05, "loss": 0.9272, "step": 3872 }, { "epoch": 0.4053375196232339, "grad_norm": 2.202029868599979, "learning_rate": 1.3478178972537354e-05, "loss": 0.9782, "step": 3873 }, { "epoch": 0.4054421768707483, "grad_norm": 2.271682115831657, "learning_rate": 1.347500069957794e-05, "loss": 0.9876, "step": 3874 }, { "epoch": 0.4055468341182627, "grad_norm": 2.1087888782860222, "learning_rate": 1.3471822027333901e-05, "loss": 1.1044, "step": 3875 }, { "epoch": 0.4056514913657771, "grad_norm": 1.9199653211400192, "learning_rate": 1.3468642956170474e-05, "loss": 0.9796, "step": 3876 }, { "epoch": 0.40575614861329146, "grad_norm": 2.2590968517959737, "learning_rate": 1.3465463486452942e-05, "loss": 0.8456, "step": 3877 }, { "epoch": 0.40586080586080586, "grad_norm": 2.4486677027922283, "learning_rate": 1.3462283618546623e-05, "loss": 1.0471, "step": 3878 }, { "epoch": 0.40596546310832027, "grad_norm": 2.1883091875894745, "learning_rate": 1.3459103352816905e-05, "loss": 1.0535, "step": 3879 }, { "epoch": 0.4060701203558346, "grad_norm": 1.9403740977737154, "learning_rate": 1.3455922689629195e-05, "loss": 0.9947, "step": 3880 }, { "epoch": 0.406174777603349, "grad_norm": 2.1633648926775164, "learning_rate": 1.3452741629348966e-05, "loss": 0.9966, "step": 3881 }, { "epoch": 0.40627943485086343, "grad_norm": 2.1785103292518944, "learning_rate": 1.3449560172341718e-05, "loss": 0.9293, "step": 3882 }, { "epoch": 0.40638409209837784, "grad_norm": 2.0241202507761953, "learning_rate": 1.344637831897302e-05, "loss": 0.9471, "step": 3883 }, { "epoch": 0.4064887493458922, "grad_norm": 2.0299069179358016, "learning_rate": 1.3443196069608462e-05, "loss": 0.9633, "step": 3884 }, { "epoch": 0.4065934065934066, "grad_norm": 1.8230670234656603, "learning_rate": 1.3440013424613699e-05, "loss": 0.8566, "step": 3885 }, { "epoch": 0.406698063840921, "grad_norm": 2.265586535508352, "learning_rate": 1.3436830384354417e-05, "loss": 1.1007, "step": 3886 }, { "epoch": 0.40680272108843535, "grad_norm": 2.0941195839844484, "learning_rate": 1.3433646949196354e-05, "loss": 0.9932, "step": 3887 }, { "epoch": 0.40690737833594975, "grad_norm": 2.0164409933200034, "learning_rate": 1.3430463119505304e-05, "loss": 0.9493, "step": 3888 }, { "epoch": 0.40701203558346416, "grad_norm": 2.1172725827353522, "learning_rate": 1.3427278895647083e-05, "loss": 1.0876, "step": 3889 }, { "epoch": 0.40711669283097857, "grad_norm": 2.1818693573193984, "learning_rate": 1.342409427798757e-05, "loss": 0.9947, "step": 3890 }, { "epoch": 0.4072213500784929, "grad_norm": 1.8963779475096132, "learning_rate": 1.3420909266892679e-05, "loss": 1.092, "step": 3891 }, { "epoch": 0.4073260073260073, "grad_norm": 2.388464219116994, "learning_rate": 1.341772386272838e-05, "loss": 1.0153, "step": 3892 }, { "epoch": 0.40743066457352173, "grad_norm": 2.0886208182575445, "learning_rate": 1.3414538065860685e-05, "loss": 1.0086, "step": 3893 }, { "epoch": 0.40753532182103613, "grad_norm": 1.8683253041052692, "learning_rate": 1.341135187665564e-05, "loss": 1.0031, "step": 3894 }, { "epoch": 0.4076399790685505, "grad_norm": 1.9302119953502512, "learning_rate": 1.340816529547935e-05, "loss": 0.9738, "step": 3895 }, { "epoch": 0.4077446363160649, "grad_norm": 2.076097227173372, "learning_rate": 1.340497832269796e-05, "loss": 0.9809, "step": 3896 }, { "epoch": 0.4078492935635793, "grad_norm": 1.9832695165206116, "learning_rate": 1.3401790958677653e-05, "loss": 0.875, "step": 3897 }, { "epoch": 0.40795395081109365, "grad_norm": 1.9007778657336216, "learning_rate": 1.3398603203784678e-05, "loss": 0.9117, "step": 3898 }, { "epoch": 0.40805860805860805, "grad_norm": 2.1000112600332077, "learning_rate": 1.3395415058385297e-05, "loss": 0.9539, "step": 3899 }, { "epoch": 0.40816326530612246, "grad_norm": 2.128028337883832, "learning_rate": 1.3392226522845843e-05, "loss": 0.9829, "step": 3900 }, { "epoch": 0.40826792255363686, "grad_norm": 2.3062569400722808, "learning_rate": 1.3389037597532689e-05, "loss": 0.9517, "step": 3901 }, { "epoch": 0.4083725798011512, "grad_norm": 2.2026909714583254, "learning_rate": 1.3385848282812242e-05, "loss": 0.9477, "step": 3902 }, { "epoch": 0.4084772370486656, "grad_norm": 1.8784837438489583, "learning_rate": 1.3382658579050964e-05, "loss": 0.8672, "step": 3903 }, { "epoch": 0.40858189429618, "grad_norm": 1.9667452879169667, "learning_rate": 1.3379468486615357e-05, "loss": 0.9565, "step": 3904 }, { "epoch": 0.4086865515436944, "grad_norm": 1.9420479807212934, "learning_rate": 1.3376278005871972e-05, "loss": 0.8869, "step": 3905 }, { "epoch": 0.4087912087912088, "grad_norm": 2.1557813800708057, "learning_rate": 1.33730871371874e-05, "loss": 0.9541, "step": 3906 }, { "epoch": 0.4088958660387232, "grad_norm": 2.0260654319412574, "learning_rate": 1.3369895880928277e-05, "loss": 1.0403, "step": 3907 }, { "epoch": 0.4090005232862376, "grad_norm": 2.1520685254880885, "learning_rate": 1.3366704237461283e-05, "loss": 0.9977, "step": 3908 }, { "epoch": 0.40910518053375194, "grad_norm": 2.0017224417903794, "learning_rate": 1.336351220715315e-05, "loss": 1.0277, "step": 3909 }, { "epoch": 0.40920983778126635, "grad_norm": 2.085668966136735, "learning_rate": 1.3360319790370646e-05, "loss": 0.9904, "step": 3910 }, { "epoch": 0.40931449502878076, "grad_norm": 2.233810172732676, "learning_rate": 1.3357126987480587e-05, "loss": 1.0078, "step": 3911 }, { "epoch": 0.4094191522762951, "grad_norm": 2.456133446509698, "learning_rate": 1.335393379884983e-05, "loss": 1.1043, "step": 3912 }, { "epoch": 0.4095238095238095, "grad_norm": 1.9446442501126147, "learning_rate": 1.3350740224845277e-05, "loss": 0.9661, "step": 3913 }, { "epoch": 0.4096284667713239, "grad_norm": 2.139239033329667, "learning_rate": 1.3347546265833885e-05, "loss": 1.0164, "step": 3914 }, { "epoch": 0.4097331240188383, "grad_norm": 2.414435331126368, "learning_rate": 1.3344351922182642e-05, "loss": 0.9962, "step": 3915 }, { "epoch": 0.4098377812663527, "grad_norm": 2.2612369566120236, "learning_rate": 1.3341157194258578e-05, "loss": 0.8626, "step": 3916 }, { "epoch": 0.4099424385138671, "grad_norm": 2.007696031752954, "learning_rate": 1.3337962082428782e-05, "loss": 0.9339, "step": 3917 }, { "epoch": 0.4100470957613815, "grad_norm": 2.1776335671383453, "learning_rate": 1.3334766587060372e-05, "loss": 0.9919, "step": 3918 }, { "epoch": 0.4101517530088959, "grad_norm": 2.4159392572041267, "learning_rate": 1.3331570708520527e-05, "loss": 0.8825, "step": 3919 }, { "epoch": 0.41025641025641024, "grad_norm": 2.2382392402884173, "learning_rate": 1.3328374447176448e-05, "loss": 0.9198, "step": 3920 }, { "epoch": 0.41036106750392465, "grad_norm": 2.011649128934293, "learning_rate": 1.3325177803395402e-05, "loss": 0.958, "step": 3921 }, { "epoch": 0.41046572475143905, "grad_norm": 2.1286275554802456, "learning_rate": 1.3321980777544686e-05, "loss": 0.9464, "step": 3922 }, { "epoch": 0.4105703819989534, "grad_norm": 1.892321406734822, "learning_rate": 1.331878336999164e-05, "loss": 0.9145, "step": 3923 }, { "epoch": 0.4106750392464678, "grad_norm": 1.8424101188930837, "learning_rate": 1.3315585581103663e-05, "loss": 0.8959, "step": 3924 }, { "epoch": 0.4107796964939822, "grad_norm": 2.126780664206234, "learning_rate": 1.3312387411248175e-05, "loss": 1.0817, "step": 3925 }, { "epoch": 0.4108843537414966, "grad_norm": 2.0459834656964384, "learning_rate": 1.330918886079266e-05, "loss": 1.0016, "step": 3926 }, { "epoch": 0.41098901098901097, "grad_norm": 1.9966709058022425, "learning_rate": 1.3305989930104639e-05, "loss": 0.9498, "step": 3927 }, { "epoch": 0.4110936682365254, "grad_norm": 1.9943061429132058, "learning_rate": 1.3302790619551673e-05, "loss": 1.0189, "step": 3928 }, { "epoch": 0.4111983254840398, "grad_norm": 2.2183362311043466, "learning_rate": 1.3299590929501369e-05, "loss": 0.9745, "step": 3929 }, { "epoch": 0.41130298273155413, "grad_norm": 2.1746772650891826, "learning_rate": 1.3296390860321376e-05, "loss": 0.9183, "step": 3930 }, { "epoch": 0.41140763997906854, "grad_norm": 1.9839787897777303, "learning_rate": 1.3293190412379392e-05, "loss": 0.9328, "step": 3931 }, { "epoch": 0.41151229722658295, "grad_norm": 2.106457836929408, "learning_rate": 1.3289989586043153e-05, "loss": 0.9307, "step": 3932 }, { "epoch": 0.41161695447409735, "grad_norm": 2.00060665012128, "learning_rate": 1.3286788381680444e-05, "loss": 1.0349, "step": 3933 }, { "epoch": 0.4117216117216117, "grad_norm": 2.1356259208594497, "learning_rate": 1.3283586799659083e-05, "loss": 1.0561, "step": 3934 }, { "epoch": 0.4118262689691261, "grad_norm": 2.0728612956807138, "learning_rate": 1.3280384840346942e-05, "loss": 0.9573, "step": 3935 }, { "epoch": 0.4119309262166405, "grad_norm": 2.114394535148644, "learning_rate": 1.3277182504111936e-05, "loss": 0.9407, "step": 3936 }, { "epoch": 0.4120355834641549, "grad_norm": 2.3060951675811867, "learning_rate": 1.3273979791322016e-05, "loss": 1.0736, "step": 3937 }, { "epoch": 0.41214024071166927, "grad_norm": 2.2146993223832774, "learning_rate": 1.3270776702345182e-05, "loss": 0.8368, "step": 3938 }, { "epoch": 0.4122448979591837, "grad_norm": 1.9659217726233473, "learning_rate": 1.3267573237549471e-05, "loss": 0.9114, "step": 3939 }, { "epoch": 0.4123495552066981, "grad_norm": 2.1795203888166546, "learning_rate": 1.3264369397302974e-05, "loss": 1.0584, "step": 3940 }, { "epoch": 0.41245421245421243, "grad_norm": 2.2274974384458184, "learning_rate": 1.3261165181973814e-05, "loss": 0.941, "step": 3941 }, { "epoch": 0.41255886970172684, "grad_norm": 2.0938075682129784, "learning_rate": 1.3257960591930164e-05, "loss": 0.9865, "step": 3942 }, { "epoch": 0.41266352694924124, "grad_norm": 2.252476991607518, "learning_rate": 1.3254755627540237e-05, "loss": 0.8637, "step": 3943 }, { "epoch": 0.41276818419675565, "grad_norm": 1.9902821855928159, "learning_rate": 1.3251550289172293e-05, "loss": 0.8776, "step": 3944 }, { "epoch": 0.41287284144427, "grad_norm": 2.4524909184883628, "learning_rate": 1.3248344577194625e-05, "loss": 0.9399, "step": 3945 }, { "epoch": 0.4129774986917844, "grad_norm": 2.113793753822339, "learning_rate": 1.3245138491975582e-05, "loss": 0.9082, "step": 3946 }, { "epoch": 0.4130821559392988, "grad_norm": 2.122883266073302, "learning_rate": 1.3241932033883544e-05, "loss": 0.9503, "step": 3947 }, { "epoch": 0.41318681318681316, "grad_norm": 1.750988968337933, "learning_rate": 1.3238725203286951e-05, "loss": 0.8322, "step": 3948 }, { "epoch": 0.41329147043432757, "grad_norm": 2.1864026140069694, "learning_rate": 1.323551800055426e-05, "loss": 1.0516, "step": 3949 }, { "epoch": 0.413396127681842, "grad_norm": 2.1920452942734316, "learning_rate": 1.3232310426053996e-05, "loss": 0.974, "step": 3950 }, { "epoch": 0.4135007849293564, "grad_norm": 2.531072565644497, "learning_rate": 1.3229102480154708e-05, "loss": 0.9761, "step": 3951 }, { "epoch": 0.41360544217687073, "grad_norm": 2.1000618795832526, "learning_rate": 1.3225894163224999e-05, "loss": 1.0461, "step": 3952 }, { "epoch": 0.41371009942438514, "grad_norm": 2.4031213533226694, "learning_rate": 1.3222685475633515e-05, "loss": 0.945, "step": 3953 }, { "epoch": 0.41381475667189954, "grad_norm": 2.1941396313682606, "learning_rate": 1.3219476417748934e-05, "loss": 1.0891, "step": 3954 }, { "epoch": 0.4139194139194139, "grad_norm": 1.8725259557278697, "learning_rate": 1.3216266989939987e-05, "loss": 0.9485, "step": 3955 }, { "epoch": 0.4140240711669283, "grad_norm": 2.0302624935221814, "learning_rate": 1.321305719257544e-05, "loss": 0.9908, "step": 3956 }, { "epoch": 0.4141287284144427, "grad_norm": 2.1705816692660274, "learning_rate": 1.3209847026024112e-05, "loss": 0.9744, "step": 3957 }, { "epoch": 0.4142333856619571, "grad_norm": 1.958836116018982, "learning_rate": 1.3206636490654851e-05, "loss": 0.8859, "step": 3958 }, { "epoch": 0.41433804290947146, "grad_norm": 1.9407740225892292, "learning_rate": 1.3203425586836558e-05, "loss": 0.9871, "step": 3959 }, { "epoch": 0.41444270015698587, "grad_norm": 2.356610680228509, "learning_rate": 1.3200214314938169e-05, "loss": 0.8781, "step": 3960 }, { "epoch": 0.41454735740450027, "grad_norm": 1.9211681674761067, "learning_rate": 1.3197002675328669e-05, "loss": 1.0168, "step": 3961 }, { "epoch": 0.4146520146520147, "grad_norm": 1.9962879034585508, "learning_rate": 1.3193790668377082e-05, "loss": 0.9542, "step": 3962 }, { "epoch": 0.41475667189952903, "grad_norm": 2.236000041036604, "learning_rate": 1.3190578294452471e-05, "loss": 1.0742, "step": 3963 }, { "epoch": 0.41486132914704343, "grad_norm": 1.9426088190833695, "learning_rate": 1.3187365553923948e-05, "loss": 1.0262, "step": 3964 }, { "epoch": 0.41496598639455784, "grad_norm": 2.35541736370535, "learning_rate": 1.3184152447160658e-05, "loss": 1.0172, "step": 3965 }, { "epoch": 0.4150706436420722, "grad_norm": 2.088936709244117, "learning_rate": 1.3180938974531799e-05, "loss": 1.0208, "step": 3966 }, { "epoch": 0.4151753008895866, "grad_norm": 1.8738282005240101, "learning_rate": 1.3177725136406605e-05, "loss": 0.9879, "step": 3967 }, { "epoch": 0.415279958137101, "grad_norm": 1.9962816803985017, "learning_rate": 1.3174510933154348e-05, "loss": 1.0465, "step": 3968 }, { "epoch": 0.4153846153846154, "grad_norm": 2.0688274484112705, "learning_rate": 1.3171296365144351e-05, "loss": 0.9194, "step": 3969 }, { "epoch": 0.41548927263212976, "grad_norm": 1.778517909912603, "learning_rate": 1.3168081432745971e-05, "loss": 0.9127, "step": 3970 }, { "epoch": 0.41559392987964416, "grad_norm": 2.0828264061542083, "learning_rate": 1.3164866136328613e-05, "loss": 0.8817, "step": 3971 }, { "epoch": 0.41569858712715857, "grad_norm": 2.229721638477741, "learning_rate": 1.3161650476261723e-05, "loss": 1.0506, "step": 3972 }, { "epoch": 0.4158032443746729, "grad_norm": 1.9680337726426218, "learning_rate": 1.315843445291478e-05, "loss": 0.9459, "step": 3973 }, { "epoch": 0.4159079016221873, "grad_norm": 2.14107133654801, "learning_rate": 1.3155218066657315e-05, "loss": 1.0548, "step": 3974 }, { "epoch": 0.41601255886970173, "grad_norm": 2.013522258681747, "learning_rate": 1.3152001317858902e-05, "loss": 1.0813, "step": 3975 }, { "epoch": 0.41611721611721614, "grad_norm": 2.1608038286842595, "learning_rate": 1.3148784206889144e-05, "loss": 1.0775, "step": 3976 }, { "epoch": 0.4162218733647305, "grad_norm": 2.4659678787298094, "learning_rate": 1.3145566734117701e-05, "loss": 1.079, "step": 3977 }, { "epoch": 0.4163265306122449, "grad_norm": 2.2519431817057, "learning_rate": 1.3142348899914259e-05, "loss": 1.0753, "step": 3978 }, { "epoch": 0.4164311878597593, "grad_norm": 1.9039732617022176, "learning_rate": 1.3139130704648562e-05, "loss": 0.9651, "step": 3979 }, { "epoch": 0.4165358451072737, "grad_norm": 2.0825234022954486, "learning_rate": 1.313591214869038e-05, "loss": 0.9306, "step": 3980 }, { "epoch": 0.41664050235478806, "grad_norm": 1.8388913462765786, "learning_rate": 1.3132693232409538e-05, "loss": 0.7844, "step": 3981 }, { "epoch": 0.41674515960230246, "grad_norm": 1.9839223196264366, "learning_rate": 1.3129473956175893e-05, "loss": 1.0239, "step": 3982 }, { "epoch": 0.41684981684981687, "grad_norm": 2.1149890042363855, "learning_rate": 1.3126254320359344e-05, "loss": 1.1038, "step": 3983 }, { "epoch": 0.4169544740973312, "grad_norm": 2.3388045331623553, "learning_rate": 1.3123034325329838e-05, "loss": 1.0914, "step": 3984 }, { "epoch": 0.4170591313448456, "grad_norm": 2.3672832146022524, "learning_rate": 1.3119813971457356e-05, "loss": 1.0583, "step": 3985 }, { "epoch": 0.41716378859236003, "grad_norm": 2.198594788955325, "learning_rate": 1.3116593259111922e-05, "loss": 1.0107, "step": 3986 }, { "epoch": 0.41726844583987444, "grad_norm": 2.066530857997824, "learning_rate": 1.311337218866361e-05, "loss": 1.0408, "step": 3987 }, { "epoch": 0.4173731030873888, "grad_norm": 2.1047196181740557, "learning_rate": 1.3110150760482518e-05, "loss": 1.0169, "step": 3988 }, { "epoch": 0.4174777603349032, "grad_norm": 1.9723714808567003, "learning_rate": 1.3106928974938801e-05, "loss": 0.9556, "step": 3989 }, { "epoch": 0.4175824175824176, "grad_norm": 2.264669592855649, "learning_rate": 1.3103706832402643e-05, "loss": 0.9994, "step": 3990 }, { "epoch": 0.41768707482993195, "grad_norm": 1.9073420765404958, "learning_rate": 1.3100484333244282e-05, "loss": 0.8359, "step": 3991 }, { "epoch": 0.41779173207744635, "grad_norm": 1.8596817557441248, "learning_rate": 1.3097261477833985e-05, "loss": 0.9506, "step": 3992 }, { "epoch": 0.41789638932496076, "grad_norm": 2.363734651762851, "learning_rate": 1.3094038266542062e-05, "loss": 0.8798, "step": 3993 }, { "epoch": 0.41800104657247517, "grad_norm": 2.0676118196497733, "learning_rate": 1.3090814699738875e-05, "loss": 0.8891, "step": 3994 }, { "epoch": 0.4181057038199895, "grad_norm": 2.515973859229479, "learning_rate": 1.3087590777794805e-05, "loss": 0.9715, "step": 3995 }, { "epoch": 0.4182103610675039, "grad_norm": 2.5102079979788647, "learning_rate": 1.3084366501080304e-05, "loss": 0.9184, "step": 3996 }, { "epoch": 0.4183150183150183, "grad_norm": 2.0704341062792397, "learning_rate": 1.3081141869965834e-05, "loss": 0.9982, "step": 3997 }, { "epoch": 0.4184196755625327, "grad_norm": 2.255147279683263, "learning_rate": 1.3077916884821922e-05, "loss": 0.9624, "step": 3998 }, { "epoch": 0.4185243328100471, "grad_norm": 2.1492963447091644, "learning_rate": 1.3074691546019116e-05, "loss": 0.8961, "step": 3999 }, { "epoch": 0.4186289900575615, "grad_norm": 2.0388505483770194, "learning_rate": 1.3071465853928018e-05, "loss": 0.9276, "step": 4000 }, { "epoch": 0.4187336473050759, "grad_norm": 1.866837078619738, "learning_rate": 1.3068239808919271e-05, "loss": 0.8709, "step": 4001 }, { "epoch": 0.41883830455259025, "grad_norm": 1.986139574512811, "learning_rate": 1.3065013411363548e-05, "loss": 0.9092, "step": 4002 }, { "epoch": 0.41894296180010465, "grad_norm": 1.8886956223922813, "learning_rate": 1.306178666163157e-05, "loss": 0.9102, "step": 4003 }, { "epoch": 0.41904761904761906, "grad_norm": 1.9906538968606682, "learning_rate": 1.3058559560094097e-05, "loss": 0.9435, "step": 4004 }, { "epoch": 0.41915227629513346, "grad_norm": 2.1441939858155425, "learning_rate": 1.3055332107121932e-05, "loss": 1.0081, "step": 4005 }, { "epoch": 0.4192569335426478, "grad_norm": 2.3353197187128836, "learning_rate": 1.3052104303085915e-05, "loss": 1.064, "step": 4006 }, { "epoch": 0.4193615907901622, "grad_norm": 1.83565573131337, "learning_rate": 1.3048876148356924e-05, "loss": 0.8715, "step": 4007 }, { "epoch": 0.4194662480376766, "grad_norm": 2.3218482775446474, "learning_rate": 1.3045647643305883e-05, "loss": 1.0566, "step": 4008 }, { "epoch": 0.419570905285191, "grad_norm": 1.9284607667631406, "learning_rate": 1.3042418788303752e-05, "loss": 0.9192, "step": 4009 }, { "epoch": 0.4196755625327054, "grad_norm": 2.061609946734348, "learning_rate": 1.303918958372154e-05, "loss": 0.9721, "step": 4010 }, { "epoch": 0.4197802197802198, "grad_norm": 1.8955350228878054, "learning_rate": 1.303596002993028e-05, "loss": 0.9905, "step": 4011 }, { "epoch": 0.4198848770277342, "grad_norm": 2.3133480934371526, "learning_rate": 1.3032730127301056e-05, "loss": 0.8862, "step": 4012 }, { "epoch": 0.41998953427524854, "grad_norm": 2.0623630507668596, "learning_rate": 1.3029499876204996e-05, "loss": 0.9311, "step": 4013 }, { "epoch": 0.42009419152276295, "grad_norm": 1.9829665019642981, "learning_rate": 1.3026269277013256e-05, "loss": 1.0163, "step": 4014 }, { "epoch": 0.42019884877027736, "grad_norm": 2.626803548305666, "learning_rate": 1.3023038330097047e-05, "loss": 1.0789, "step": 4015 }, { "epoch": 0.4203035060177917, "grad_norm": 2.229456363139348, "learning_rate": 1.3019807035827599e-05, "loss": 1.0455, "step": 4016 }, { "epoch": 0.4204081632653061, "grad_norm": 1.945363260039406, "learning_rate": 1.3016575394576204e-05, "loss": 1.0051, "step": 4017 }, { "epoch": 0.4205128205128205, "grad_norm": 1.8657699677861808, "learning_rate": 1.3013343406714181e-05, "loss": 0.9354, "step": 4018 }, { "epoch": 0.4206174777603349, "grad_norm": 2.13301131218169, "learning_rate": 1.3010111072612888e-05, "loss": 0.9128, "step": 4019 }, { "epoch": 0.4207221350078493, "grad_norm": 2.0809458736583637, "learning_rate": 1.3006878392643736e-05, "loss": 0.9963, "step": 4020 }, { "epoch": 0.4208267922553637, "grad_norm": 2.2025198937923784, "learning_rate": 1.3003645367178158e-05, "loss": 1.0341, "step": 4021 }, { "epoch": 0.4209314495028781, "grad_norm": 1.8303178475154966, "learning_rate": 1.300041199658764e-05, "loss": 0.9209, "step": 4022 }, { "epoch": 0.4210361067503925, "grad_norm": 2.0995406687477423, "learning_rate": 1.2997178281243698e-05, "loss": 1.0328, "step": 4023 }, { "epoch": 0.42114076399790684, "grad_norm": 2.133177530342129, "learning_rate": 1.2993944221517898e-05, "loss": 0.918, "step": 4024 }, { "epoch": 0.42124542124542125, "grad_norm": 1.913400092526822, "learning_rate": 1.2990709817781839e-05, "loss": 0.8177, "step": 4025 }, { "epoch": 0.42135007849293565, "grad_norm": 1.8746443011882772, "learning_rate": 1.2987475070407154e-05, "loss": 0.9147, "step": 4026 }, { "epoch": 0.42145473574045, "grad_norm": 2.01816221130607, "learning_rate": 1.2984239979765531e-05, "loss": 1.0686, "step": 4027 }, { "epoch": 0.4215593929879644, "grad_norm": 2.1127015875668365, "learning_rate": 1.2981004546228685e-05, "loss": 1.0448, "step": 4028 }, { "epoch": 0.4216640502354788, "grad_norm": 2.1501657966221526, "learning_rate": 1.2977768770168374e-05, "loss": 1.0125, "step": 4029 }, { "epoch": 0.4217687074829932, "grad_norm": 1.8600363609360118, "learning_rate": 1.2974532651956393e-05, "loss": 1.0479, "step": 4030 }, { "epoch": 0.42187336473050757, "grad_norm": 2.4569790644754996, "learning_rate": 1.297129619196458e-05, "loss": 0.9219, "step": 4031 }, { "epoch": 0.421978021978022, "grad_norm": 2.47767730012033, "learning_rate": 1.2968059390564813e-05, "loss": 0.9192, "step": 4032 }, { "epoch": 0.4220826792255364, "grad_norm": 2.0774703638862415, "learning_rate": 1.2964822248129003e-05, "loss": 0.9324, "step": 4033 }, { "epoch": 0.42218733647305073, "grad_norm": 2.5616604388013524, "learning_rate": 1.2961584765029107e-05, "loss": 0.9997, "step": 4034 }, { "epoch": 0.42229199372056514, "grad_norm": 2.3036391018677227, "learning_rate": 1.2958346941637119e-05, "loss": 0.9766, "step": 4035 }, { "epoch": 0.42239665096807955, "grad_norm": 2.4294354631420885, "learning_rate": 1.295510877832507e-05, "loss": 0.8532, "step": 4036 }, { "epoch": 0.42250130821559395, "grad_norm": 2.262444310074482, "learning_rate": 1.2951870275465033e-05, "loss": 1.0549, "step": 4037 }, { "epoch": 0.4226059654631083, "grad_norm": 2.1283035221559654, "learning_rate": 1.2948631433429114e-05, "loss": 1.0502, "step": 4038 }, { "epoch": 0.4227106227106227, "grad_norm": 2.214881325613008, "learning_rate": 1.2945392252589466e-05, "loss": 0.9077, "step": 4039 }, { "epoch": 0.4228152799581371, "grad_norm": 2.369040409827918, "learning_rate": 1.2942152733318278e-05, "loss": 0.9257, "step": 4040 }, { "epoch": 0.42291993720565146, "grad_norm": 1.6747394664320463, "learning_rate": 1.2938912875987776e-05, "loss": 0.9738, "step": 4041 }, { "epoch": 0.42302459445316587, "grad_norm": 1.8358883748542993, "learning_rate": 1.2935672680970231e-05, "loss": 0.8974, "step": 4042 }, { "epoch": 0.4231292517006803, "grad_norm": 1.9700721618750736, "learning_rate": 1.2932432148637937e-05, "loss": 0.8539, "step": 4043 }, { "epoch": 0.4232339089481947, "grad_norm": 1.7081738341481085, "learning_rate": 1.2929191279363248e-05, "loss": 0.9157, "step": 4044 }, { "epoch": 0.42333856619570903, "grad_norm": 1.9888702561572258, "learning_rate": 1.2925950073518543e-05, "loss": 0.8719, "step": 4045 }, { "epoch": 0.42344322344322344, "grad_norm": 2.1023936971584494, "learning_rate": 1.2922708531476245e-05, "loss": 0.9698, "step": 4046 }, { "epoch": 0.42354788069073784, "grad_norm": 2.065987444351679, "learning_rate": 1.2919466653608807e-05, "loss": 1.0014, "step": 4047 }, { "epoch": 0.42365253793825225, "grad_norm": 2.1081730464926682, "learning_rate": 1.2916224440288736e-05, "loss": 0.8841, "step": 4048 }, { "epoch": 0.4237571951857666, "grad_norm": 1.9254261511446567, "learning_rate": 1.2912981891888566e-05, "loss": 1.0805, "step": 4049 }, { "epoch": 0.423861852433281, "grad_norm": 1.8610671376702372, "learning_rate": 1.2909739008780866e-05, "loss": 1.0011, "step": 4050 }, { "epoch": 0.4239665096807954, "grad_norm": 1.8387402872473773, "learning_rate": 1.290649579133826e-05, "loss": 1.0046, "step": 4051 }, { "epoch": 0.42407116692830976, "grad_norm": 1.9929812428438674, "learning_rate": 1.2903252239933393e-05, "loss": 0.8668, "step": 4052 }, { "epoch": 0.42417582417582417, "grad_norm": 2.140516044935987, "learning_rate": 1.290000835493896e-05, "loss": 1.0137, "step": 4053 }, { "epoch": 0.4242804814233386, "grad_norm": 2.271991022792856, "learning_rate": 1.2896764136727687e-05, "loss": 0.9522, "step": 4054 }, { "epoch": 0.424385138670853, "grad_norm": 1.8813686606836715, "learning_rate": 1.2893519585672342e-05, "loss": 0.986, "step": 4055 }, { "epoch": 0.42448979591836733, "grad_norm": 1.974109588184302, "learning_rate": 1.289027470214573e-05, "loss": 1.0201, "step": 4056 }, { "epoch": 0.42459445316588174, "grad_norm": 2.0120661694359607, "learning_rate": 1.2887029486520697e-05, "loss": 0.9441, "step": 4057 }, { "epoch": 0.42469911041339614, "grad_norm": 2.141950924459728, "learning_rate": 1.2883783939170125e-05, "loss": 0.9926, "step": 4058 }, { "epoch": 0.4248037676609105, "grad_norm": 2.5623343563978915, "learning_rate": 1.2880538060466931e-05, "loss": 0.9227, "step": 4059 }, { "epoch": 0.4249084249084249, "grad_norm": 1.9750731791255807, "learning_rate": 1.287729185078407e-05, "loss": 1.0129, "step": 4060 }, { "epoch": 0.4250130821559393, "grad_norm": 2.0054191639646692, "learning_rate": 1.2874045310494549e-05, "loss": 0.9068, "step": 4061 }, { "epoch": 0.4251177394034537, "grad_norm": 1.9724083424778205, "learning_rate": 1.2870798439971393e-05, "loss": 0.862, "step": 4062 }, { "epoch": 0.42522239665096806, "grad_norm": 2.319494091259867, "learning_rate": 1.2867551239587677e-05, "loss": 1.0978, "step": 4063 }, { "epoch": 0.42532705389848247, "grad_norm": 2.319487442343795, "learning_rate": 1.286430370971651e-05, "loss": 1.0296, "step": 4064 }, { "epoch": 0.42543171114599687, "grad_norm": 2.0836979512763336, "learning_rate": 1.2861055850731039e-05, "loss": 0.972, "step": 4065 }, { "epoch": 0.4255363683935113, "grad_norm": 2.552512993363399, "learning_rate": 1.2857807663004453e-05, "loss": 0.8689, "step": 4066 }, { "epoch": 0.4256410256410256, "grad_norm": 2.1467202430472763, "learning_rate": 1.285455914690997e-05, "loss": 0.9987, "step": 4067 }, { "epoch": 0.42574568288854003, "grad_norm": 2.1204522151177474, "learning_rate": 1.285131030282086e-05, "loss": 1.0174, "step": 4068 }, { "epoch": 0.42585034013605444, "grad_norm": 2.3582725345374027, "learning_rate": 1.284806113111041e-05, "loss": 0.9143, "step": 4069 }, { "epoch": 0.4259549973835688, "grad_norm": 2.0482186802071287, "learning_rate": 1.2844811632151964e-05, "loss": 0.9063, "step": 4070 }, { "epoch": 0.4260596546310832, "grad_norm": 2.0895838757594696, "learning_rate": 1.2841561806318897e-05, "loss": 1.052, "step": 4071 }, { "epoch": 0.4261643118785976, "grad_norm": 2.0282129968162246, "learning_rate": 1.2838311653984615e-05, "loss": 0.9849, "step": 4072 }, { "epoch": 0.426268969126112, "grad_norm": 2.5256239159226745, "learning_rate": 1.2835061175522569e-05, "loss": 1.102, "step": 4073 }, { "epoch": 0.42637362637362636, "grad_norm": 2.0879581133791687, "learning_rate": 1.2831810371306247e-05, "loss": 0.9754, "step": 4074 }, { "epoch": 0.42647828362114076, "grad_norm": 1.8366234669283463, "learning_rate": 1.2828559241709177e-05, "loss": 0.7956, "step": 4075 }, { "epoch": 0.42658294086865517, "grad_norm": 1.8797736027924894, "learning_rate": 1.2825307787104917e-05, "loss": 0.9943, "step": 4076 }, { "epoch": 0.4266875981161695, "grad_norm": 2.092067441756589, "learning_rate": 1.282205600786706e-05, "loss": 1.0322, "step": 4077 }, { "epoch": 0.4267922553636839, "grad_norm": 2.080732968369759, "learning_rate": 1.2818803904369248e-05, "loss": 0.9993, "step": 4078 }, { "epoch": 0.42689691261119833, "grad_norm": 1.8121577795467074, "learning_rate": 1.2815551476985151e-05, "loss": 0.8637, "step": 4079 }, { "epoch": 0.42700156985871274, "grad_norm": 1.8717669220769215, "learning_rate": 1.2812298726088486e-05, "loss": 0.8787, "step": 4080 }, { "epoch": 0.4271062271062271, "grad_norm": 2.179750288411442, "learning_rate": 1.2809045652052992e-05, "loss": 0.8961, "step": 4081 }, { "epoch": 0.4272108843537415, "grad_norm": 2.0838225686688263, "learning_rate": 1.280579225525246e-05, "loss": 0.9929, "step": 4082 }, { "epoch": 0.4273155416012559, "grad_norm": 2.275118316941627, "learning_rate": 1.2802538536060712e-05, "loss": 1.0092, "step": 4083 }, { "epoch": 0.42742019884877025, "grad_norm": 2.130895970762807, "learning_rate": 1.2799284494851602e-05, "loss": 0.8968, "step": 4084 }, { "epoch": 0.42752485609628466, "grad_norm": 1.8836199307900716, "learning_rate": 1.279603013199903e-05, "loss": 0.9125, "step": 4085 }, { "epoch": 0.42762951334379906, "grad_norm": 2.458988223380957, "learning_rate": 1.2792775447876928e-05, "loss": 1.0487, "step": 4086 }, { "epoch": 0.42773417059131347, "grad_norm": 1.9971378709402376, "learning_rate": 1.2789520442859265e-05, "loss": 0.863, "step": 4087 }, { "epoch": 0.4278388278388278, "grad_norm": 2.007011122353183, "learning_rate": 1.2786265117320047e-05, "loss": 1.0189, "step": 4088 }, { "epoch": 0.4279434850863422, "grad_norm": 2.1617166345362673, "learning_rate": 1.2783009471633321e-05, "loss": 0.9055, "step": 4089 }, { "epoch": 0.42804814233385663, "grad_norm": 2.5367597991139115, "learning_rate": 1.2779753506173163e-05, "loss": 1.0275, "step": 4090 }, { "epoch": 0.42815279958137104, "grad_norm": 1.9826469266228839, "learning_rate": 1.277649722131369e-05, "loss": 0.893, "step": 4091 }, { "epoch": 0.4282574568288854, "grad_norm": 2.038739638755624, "learning_rate": 1.277324061742906e-05, "loss": 1.0419, "step": 4092 }, { "epoch": 0.4283621140763998, "grad_norm": 2.5868448257014705, "learning_rate": 1.2769983694893461e-05, "loss": 1.0708, "step": 4093 }, { "epoch": 0.4284667713239142, "grad_norm": 1.952363024664158, "learning_rate": 1.2766726454081124e-05, "loss": 0.8693, "step": 4094 }, { "epoch": 0.42857142857142855, "grad_norm": 2.537669019028795, "learning_rate": 1.2763468895366304e-05, "loss": 0.8952, "step": 4095 }, { "epoch": 0.42867608581894295, "grad_norm": 2.1143917570175166, "learning_rate": 1.2760211019123307e-05, "loss": 0.9822, "step": 4096 }, { "epoch": 0.42878074306645736, "grad_norm": 2.0948895182122915, "learning_rate": 1.2756952825726469e-05, "loss": 0.9116, "step": 4097 }, { "epoch": 0.42888540031397177, "grad_norm": 2.222927984303674, "learning_rate": 1.2753694315550163e-05, "loss": 0.9202, "step": 4098 }, { "epoch": 0.4289900575614861, "grad_norm": 2.127865487750365, "learning_rate": 1.2750435488968793e-05, "loss": 1.0449, "step": 4099 }, { "epoch": 0.4290947148090005, "grad_norm": 1.9788232612921248, "learning_rate": 1.2747176346356815e-05, "loss": 0.9387, "step": 4100 }, { "epoch": 0.4291993720565149, "grad_norm": 1.9016287167665056, "learning_rate": 1.2743916888088706e-05, "loss": 0.8767, "step": 4101 }, { "epoch": 0.4293040293040293, "grad_norm": 2.1605389550086196, "learning_rate": 1.274065711453898e-05, "loss": 1.0258, "step": 4102 }, { "epoch": 0.4294086865515437, "grad_norm": 2.5323063383357463, "learning_rate": 1.2737397026082199e-05, "loss": 0.9743, "step": 4103 }, { "epoch": 0.4295133437990581, "grad_norm": 1.9921710514030042, "learning_rate": 1.2734136623092948e-05, "loss": 1.0633, "step": 4104 }, { "epoch": 0.4296180010465725, "grad_norm": 2.054756726068761, "learning_rate": 1.273087590594586e-05, "loss": 1.0129, "step": 4105 }, { "epoch": 0.42972265829408685, "grad_norm": 2.1517769857523517, "learning_rate": 1.272761487501559e-05, "loss": 0.9996, "step": 4106 }, { "epoch": 0.42982731554160125, "grad_norm": 2.0631947885960598, "learning_rate": 1.2724353530676842e-05, "loss": 1.0288, "step": 4107 }, { "epoch": 0.42993197278911566, "grad_norm": 2.0629095347948625, "learning_rate": 1.2721091873304351e-05, "loss": 0.9055, "step": 4108 }, { "epoch": 0.43003663003663006, "grad_norm": 2.1035215723837544, "learning_rate": 1.2717829903272889e-05, "loss": 0.9523, "step": 4109 }, { "epoch": 0.4301412872841444, "grad_norm": 2.1532839005297766, "learning_rate": 1.2714567620957262e-05, "loss": 0.9819, "step": 4110 }, { "epoch": 0.4302459445316588, "grad_norm": 2.4892633893694778, "learning_rate": 1.271130502673231e-05, "loss": 0.9969, "step": 4111 }, { "epoch": 0.4303506017791732, "grad_norm": 2.0071464961288243, "learning_rate": 1.2708042120972914e-05, "loss": 1.0227, "step": 4112 }, { "epoch": 0.4304552590266876, "grad_norm": 2.155107165578016, "learning_rate": 1.270477890405399e-05, "loss": 0.9929, "step": 4113 }, { "epoch": 0.430559916274202, "grad_norm": 2.169518740126109, "learning_rate": 1.2701515376350489e-05, "loss": 0.9272, "step": 4114 }, { "epoch": 0.4306645735217164, "grad_norm": 2.739527866572885, "learning_rate": 1.2698251538237389e-05, "loss": 0.9999, "step": 4115 }, { "epoch": 0.4307692307692308, "grad_norm": 2.513640769034525, "learning_rate": 1.2694987390089723e-05, "loss": 1.0112, "step": 4116 }, { "epoch": 0.43087388801674514, "grad_norm": 2.0925444858975326, "learning_rate": 1.269172293228254e-05, "loss": 0.9724, "step": 4117 }, { "epoch": 0.43097854526425955, "grad_norm": 2.1067772462429883, "learning_rate": 1.2688458165190933e-05, "loss": 0.9119, "step": 4118 }, { "epoch": 0.43108320251177396, "grad_norm": 2.0030783316902165, "learning_rate": 1.2685193089190038e-05, "loss": 1.0959, "step": 4119 }, { "epoch": 0.4311878597592883, "grad_norm": 2.1073270714006034, "learning_rate": 1.2681927704655008e-05, "loss": 1.0327, "step": 4120 }, { "epoch": 0.4312925170068027, "grad_norm": 2.137450289870023, "learning_rate": 1.2678662011961051e-05, "loss": 0.9802, "step": 4121 }, { "epoch": 0.4313971742543171, "grad_norm": 2.0752705351892673, "learning_rate": 1.2675396011483398e-05, "loss": 0.9755, "step": 4122 }, { "epoch": 0.4315018315018315, "grad_norm": 2.2558252138712582, "learning_rate": 1.2672129703597321e-05, "loss": 1.0148, "step": 4123 }, { "epoch": 0.4316064887493459, "grad_norm": 1.9748773751441262, "learning_rate": 1.2668863088678124e-05, "loss": 0.9922, "step": 4124 }, { "epoch": 0.4317111459968603, "grad_norm": 1.8299642601513824, "learning_rate": 1.2665596167101144e-05, "loss": 0.8775, "step": 4125 }, { "epoch": 0.4318158032443747, "grad_norm": 2.2374692170514527, "learning_rate": 1.2662328939241766e-05, "loss": 1.0831, "step": 4126 }, { "epoch": 0.4319204604918891, "grad_norm": 2.1794273635322376, "learning_rate": 1.2659061405475392e-05, "loss": 0.9673, "step": 4127 }, { "epoch": 0.43202511773940344, "grad_norm": 1.939053173523464, "learning_rate": 1.2655793566177475e-05, "loss": 0.8898, "step": 4128 }, { "epoch": 0.43212977498691785, "grad_norm": 2.0573506268728328, "learning_rate": 1.2652525421723489e-05, "loss": 0.8148, "step": 4129 }, { "epoch": 0.43223443223443225, "grad_norm": 1.8819328528426567, "learning_rate": 1.2649256972488956e-05, "loss": 1.0517, "step": 4130 }, { "epoch": 0.4323390894819466, "grad_norm": 1.9984163786543963, "learning_rate": 1.2645988218849432e-05, "loss": 0.908, "step": 4131 }, { "epoch": 0.432443746729461, "grad_norm": 1.8773422683017276, "learning_rate": 1.264271916118049e-05, "loss": 0.8387, "step": 4132 }, { "epoch": 0.4325484039769754, "grad_norm": 2.0149662866765863, "learning_rate": 1.2639449799857766e-05, "loss": 0.9982, "step": 4133 }, { "epoch": 0.4326530612244898, "grad_norm": 2.174292902817768, "learning_rate": 1.2636180135256904e-05, "loss": 1.1082, "step": 4134 }, { "epoch": 0.43275771847200417, "grad_norm": 2.2208332106910897, "learning_rate": 1.2632910167753601e-05, "loss": 0.8762, "step": 4135 }, { "epoch": 0.4328623757195186, "grad_norm": 2.181100971590527, "learning_rate": 1.2629639897723585e-05, "loss": 0.975, "step": 4136 }, { "epoch": 0.432967032967033, "grad_norm": 1.931613675855517, "learning_rate": 1.262636932554261e-05, "loss": 0.8959, "step": 4137 }, { "epoch": 0.43307169021454733, "grad_norm": 2.2708040082763943, "learning_rate": 1.2623098451586477e-05, "loss": 1.0665, "step": 4138 }, { "epoch": 0.43317634746206174, "grad_norm": 1.9224472683374367, "learning_rate": 1.2619827276231017e-05, "loss": 0.9472, "step": 4139 }, { "epoch": 0.43328100470957615, "grad_norm": 2.2865237719592275, "learning_rate": 1.261655579985209e-05, "loss": 0.8564, "step": 4140 }, { "epoch": 0.43338566195709055, "grad_norm": 1.937006380048429, "learning_rate": 1.2613284022825596e-05, "loss": 1.0623, "step": 4141 }, { "epoch": 0.4334903192046049, "grad_norm": 2.5686460907237096, "learning_rate": 1.2610011945527471e-05, "loss": 1.0288, "step": 4142 }, { "epoch": 0.4335949764521193, "grad_norm": 2.4290445999367405, "learning_rate": 1.2606739568333684e-05, "loss": 1.0339, "step": 4143 }, { "epoch": 0.4336996336996337, "grad_norm": 2.009556122935142, "learning_rate": 1.2603466891620232e-05, "loss": 0.8586, "step": 4144 }, { "epoch": 0.43380429094714806, "grad_norm": 1.7741581344189656, "learning_rate": 1.2600193915763162e-05, "loss": 0.8876, "step": 4145 }, { "epoch": 0.43390894819466247, "grad_norm": 2.4372967065545317, "learning_rate": 1.2596920641138538e-05, "loss": 1.0327, "step": 4146 }, { "epoch": 0.4340136054421769, "grad_norm": 2.0894117465917215, "learning_rate": 1.2593647068122465e-05, "loss": 1.0019, "step": 4147 }, { "epoch": 0.4341182626896913, "grad_norm": 2.123368845322283, "learning_rate": 1.259037319709109e-05, "loss": 0.9752, "step": 4148 }, { "epoch": 0.43422291993720563, "grad_norm": 2.041973404473876, "learning_rate": 1.258709902842058e-05, "loss": 1.0645, "step": 4149 }, { "epoch": 0.43432757718472004, "grad_norm": 2.2360927293499544, "learning_rate": 1.258382456248715e-05, "loss": 0.9802, "step": 4150 }, { "epoch": 0.43443223443223444, "grad_norm": 1.9616686428786763, "learning_rate": 1.2580549799667034e-05, "loss": 0.8553, "step": 4151 }, { "epoch": 0.43453689167974885, "grad_norm": 2.2952336749644804, "learning_rate": 1.2577274740336523e-05, "loss": 1.0347, "step": 4152 }, { "epoch": 0.4346415489272632, "grad_norm": 2.370451934856473, "learning_rate": 1.2573999384871916e-05, "loss": 0.9924, "step": 4153 }, { "epoch": 0.4347462061747776, "grad_norm": 2.0939601713417577, "learning_rate": 1.2570723733649558e-05, "loss": 0.9574, "step": 4154 }, { "epoch": 0.434850863422292, "grad_norm": 2.388950829479333, "learning_rate": 1.256744778704584e-05, "loss": 0.9239, "step": 4155 }, { "epoch": 0.43495552066980636, "grad_norm": 1.978177781520638, "learning_rate": 1.2564171545437156e-05, "loss": 0.9031, "step": 4156 }, { "epoch": 0.43506017791732077, "grad_norm": 2.112737659944072, "learning_rate": 1.2560895009199972e-05, "loss": 0.9537, "step": 4157 }, { "epoch": 0.4351648351648352, "grad_norm": 2.033176365951217, "learning_rate": 1.2557618178710756e-05, "loss": 1.0335, "step": 4158 }, { "epoch": 0.4352694924123496, "grad_norm": 2.1148782403285784, "learning_rate": 1.2554341054346027e-05, "loss": 0.7705, "step": 4159 }, { "epoch": 0.43537414965986393, "grad_norm": 1.6826596407900571, "learning_rate": 1.2551063636482332e-05, "loss": 0.7461, "step": 4160 }, { "epoch": 0.43547880690737834, "grad_norm": 2.0821373935182623, "learning_rate": 1.2547785925496255e-05, "loss": 0.8977, "step": 4161 }, { "epoch": 0.43558346415489274, "grad_norm": 2.7138514710502126, "learning_rate": 1.254450792176441e-05, "loss": 1.0286, "step": 4162 }, { "epoch": 0.4356881214024071, "grad_norm": 1.928568939003756, "learning_rate": 1.2541229625663445e-05, "loss": 0.8945, "step": 4163 }, { "epoch": 0.4357927786499215, "grad_norm": 2.5708479006499085, "learning_rate": 1.2537951037570047e-05, "loss": 0.9281, "step": 4164 }, { "epoch": 0.4358974358974359, "grad_norm": 2.7402121626792773, "learning_rate": 1.2534672157860928e-05, "loss": 0.9111, "step": 4165 }, { "epoch": 0.4360020931449503, "grad_norm": 2.1020926928873744, "learning_rate": 1.2531392986912839e-05, "loss": 1.0641, "step": 4166 }, { "epoch": 0.43610675039246466, "grad_norm": 2.0147104900016153, "learning_rate": 1.2528113525102568e-05, "loss": 0.9251, "step": 4167 }, { "epoch": 0.43621140763997907, "grad_norm": 2.1361681568328463, "learning_rate": 1.2524833772806924e-05, "loss": 1.0695, "step": 4168 }, { "epoch": 0.43631606488749347, "grad_norm": 1.9844534323381833, "learning_rate": 1.2521553730402762e-05, "loss": 1.022, "step": 4169 }, { "epoch": 0.4364207221350079, "grad_norm": 2.1536611659943854, "learning_rate": 1.2518273398266963e-05, "loss": 1.0211, "step": 4170 }, { "epoch": 0.4365253793825222, "grad_norm": 1.9066376292841682, "learning_rate": 1.251499277677645e-05, "loss": 0.9381, "step": 4171 }, { "epoch": 0.43663003663003663, "grad_norm": 1.979655024358628, "learning_rate": 1.2511711866308167e-05, "loss": 0.9354, "step": 4172 }, { "epoch": 0.43673469387755104, "grad_norm": 1.930714983152759, "learning_rate": 1.2508430667239094e-05, "loss": 0.8527, "step": 4173 }, { "epoch": 0.4368393511250654, "grad_norm": 2.1508753326787144, "learning_rate": 1.2505149179946257e-05, "loss": 1.0292, "step": 4174 }, { "epoch": 0.4369440083725798, "grad_norm": 1.799153917872675, "learning_rate": 1.25018674048067e-05, "loss": 0.9594, "step": 4175 }, { "epoch": 0.4370486656200942, "grad_norm": 2.1158310072558493, "learning_rate": 1.2498585342197507e-05, "loss": 1.0612, "step": 4176 }, { "epoch": 0.4371533228676086, "grad_norm": 1.784434212580124, "learning_rate": 1.2495302992495793e-05, "loss": 1.0151, "step": 4177 }, { "epoch": 0.43725798011512296, "grad_norm": 1.9237924946084939, "learning_rate": 1.2492020356078705e-05, "loss": 0.8651, "step": 4178 }, { "epoch": 0.43736263736263736, "grad_norm": 2.5836868280522576, "learning_rate": 1.2488737433323429e-05, "loss": 0.9226, "step": 4179 }, { "epoch": 0.43746729461015177, "grad_norm": 2.019400496982174, "learning_rate": 1.2485454224607171e-05, "loss": 0.8531, "step": 4180 }, { "epoch": 0.4375719518576661, "grad_norm": 1.8357957654319403, "learning_rate": 1.2482170730307191e-05, "loss": 0.9594, "step": 4181 }, { "epoch": 0.4376766091051805, "grad_norm": 2.2211596017891084, "learning_rate": 1.2478886950800757e-05, "loss": 0.9483, "step": 4182 }, { "epoch": 0.43778126635269493, "grad_norm": 2.0328108335435093, "learning_rate": 1.2475602886465187e-05, "loss": 1.0499, "step": 4183 }, { "epoch": 0.43788592360020934, "grad_norm": 1.9848307097983084, "learning_rate": 1.2472318537677833e-05, "loss": 0.868, "step": 4184 }, { "epoch": 0.4379905808477237, "grad_norm": 1.9830682161242188, "learning_rate": 1.2469033904816062e-05, "loss": 0.952, "step": 4185 }, { "epoch": 0.4380952380952381, "grad_norm": 1.8538521533923138, "learning_rate": 1.246574898825729e-05, "loss": 0.8598, "step": 4186 }, { "epoch": 0.4381998953427525, "grad_norm": 2.3076134727295843, "learning_rate": 1.246246378837896e-05, "loss": 0.8155, "step": 4187 }, { "epoch": 0.43830455259026685, "grad_norm": 2.3923552507037185, "learning_rate": 1.2459178305558553e-05, "loss": 1.0912, "step": 4188 }, { "epoch": 0.43840920983778126, "grad_norm": 1.8990741957670583, "learning_rate": 1.2455892540173572e-05, "loss": 0.8969, "step": 4189 }, { "epoch": 0.43851386708529566, "grad_norm": 1.8395674522822645, "learning_rate": 1.2452606492601556e-05, "loss": 0.905, "step": 4190 }, { "epoch": 0.43861852433281007, "grad_norm": 1.859136871339698, "learning_rate": 1.2449320163220087e-05, "loss": 0.9013, "step": 4191 }, { "epoch": 0.4387231815803244, "grad_norm": 2.407371693080437, "learning_rate": 1.2446033552406763e-05, "loss": 0.8304, "step": 4192 }, { "epoch": 0.4388278388278388, "grad_norm": 2.388853746571948, "learning_rate": 1.2442746660539226e-05, "loss": 1.089, "step": 4193 }, { "epoch": 0.43893249607535323, "grad_norm": 1.9365985500657477, "learning_rate": 1.2439459487995145e-05, "loss": 0.9722, "step": 4194 }, { "epoch": 0.43903715332286763, "grad_norm": 2.963598627913234, "learning_rate": 1.2436172035152223e-05, "loss": 0.8233, "step": 4195 }, { "epoch": 0.439141810570382, "grad_norm": 2.2703125367436363, "learning_rate": 1.2432884302388198e-05, "loss": 1.0116, "step": 4196 }, { "epoch": 0.4392464678178964, "grad_norm": 2.129118913151718, "learning_rate": 1.2429596290080832e-05, "loss": 1.0584, "step": 4197 }, { "epoch": 0.4393511250654108, "grad_norm": 2.0256017953605383, "learning_rate": 1.2426307998607929e-05, "loss": 1.008, "step": 4198 }, { "epoch": 0.43945578231292515, "grad_norm": 2.028462889521559, "learning_rate": 1.2423019428347316e-05, "loss": 0.9788, "step": 4199 }, { "epoch": 0.43956043956043955, "grad_norm": 2.1206707709023003, "learning_rate": 1.2419730579676861e-05, "loss": 0.8672, "step": 4200 }, { "epoch": 0.43966509680795396, "grad_norm": 2.0885573335422705, "learning_rate": 1.2416441452974457e-05, "loss": 0.934, "step": 4201 }, { "epoch": 0.43976975405546836, "grad_norm": 1.9950363704570153, "learning_rate": 1.241315204861803e-05, "loss": 0.9081, "step": 4202 }, { "epoch": 0.4398744113029827, "grad_norm": 1.8813239970215452, "learning_rate": 1.2409862366985541e-05, "loss": 0.8674, "step": 4203 }, { "epoch": 0.4399790685504971, "grad_norm": 2.1510971349415757, "learning_rate": 1.240657240845498e-05, "loss": 0.9775, "step": 4204 }, { "epoch": 0.4400837257980115, "grad_norm": 2.455900196950652, "learning_rate": 1.2403282173404373e-05, "loss": 0.8979, "step": 4205 }, { "epoch": 0.4401883830455259, "grad_norm": 2.003615035493078, "learning_rate": 1.239999166221177e-05, "loss": 0.949, "step": 4206 }, { "epoch": 0.4402930402930403, "grad_norm": 2.122829687717139, "learning_rate": 1.2396700875255263e-05, "loss": 1.0648, "step": 4207 }, { "epoch": 0.4403976975405547, "grad_norm": 1.7612998154135466, "learning_rate": 1.2393409812912963e-05, "loss": 0.8243, "step": 4208 }, { "epoch": 0.4405023547880691, "grad_norm": 2.151390451235141, "learning_rate": 1.2390118475563024e-05, "loss": 1.1014, "step": 4209 }, { "epoch": 0.44060701203558345, "grad_norm": 1.769570106535323, "learning_rate": 1.238682686358363e-05, "loss": 0.9075, "step": 4210 }, { "epoch": 0.44071166928309785, "grad_norm": 1.8741126127698302, "learning_rate": 1.2383534977352989e-05, "loss": 0.9628, "step": 4211 }, { "epoch": 0.44081632653061226, "grad_norm": 2.11490519415388, "learning_rate": 1.2380242817249343e-05, "loss": 1.0346, "step": 4212 }, { "epoch": 0.44092098377812666, "grad_norm": 1.9472990388460194, "learning_rate": 1.2376950383650978e-05, "loss": 0.9551, "step": 4213 }, { "epoch": 0.441025641025641, "grad_norm": 2.1912277529516357, "learning_rate": 1.2373657676936194e-05, "loss": 0.9105, "step": 4214 }, { "epoch": 0.4411302982731554, "grad_norm": 2.216215119841429, "learning_rate": 1.237036469748333e-05, "loss": 1.0117, "step": 4215 }, { "epoch": 0.4412349555206698, "grad_norm": 1.9395445115395555, "learning_rate": 1.2367071445670755e-05, "loss": 0.9748, "step": 4216 }, { "epoch": 0.4413396127681842, "grad_norm": 2.4154728984909717, "learning_rate": 1.2363777921876876e-05, "loss": 1.048, "step": 4217 }, { "epoch": 0.4414442700156986, "grad_norm": 2.2295606753839006, "learning_rate": 1.2360484126480121e-05, "loss": 1.1085, "step": 4218 }, { "epoch": 0.441548927263213, "grad_norm": 21.310321961686725, "learning_rate": 1.2357190059858955e-05, "loss": 0.9542, "step": 4219 }, { "epoch": 0.4416535845107274, "grad_norm": 2.275313654143178, "learning_rate": 1.2353895722391873e-05, "loss": 1.0865, "step": 4220 }, { "epoch": 0.44175824175824174, "grad_norm": 2.008270022618239, "learning_rate": 1.2350601114457397e-05, "loss": 0.8425, "step": 4221 }, { "epoch": 0.44186289900575615, "grad_norm": 30.912506956421982, "learning_rate": 1.2347306236434091e-05, "loss": 1.8411, "step": 4222 }, { "epoch": 0.44196755625327055, "grad_norm": 14.423513861175078, "learning_rate": 1.234401108870054e-05, "loss": 1.2736, "step": 4223 }, { "epoch": 0.4420722135007849, "grad_norm": 2.5430152347578816, "learning_rate": 1.2340715671635365e-05, "loss": 0.9757, "step": 4224 }, { "epoch": 0.4421768707482993, "grad_norm": 18.390708578682016, "learning_rate": 1.2337419985617209e-05, "loss": 1.0966, "step": 4225 }, { "epoch": 0.4422815279958137, "grad_norm": 2.1563225849286964, "learning_rate": 1.2334124031024759e-05, "loss": 0.9753, "step": 4226 }, { "epoch": 0.4423861852433281, "grad_norm": 2.209454133083187, "learning_rate": 1.2330827808236729e-05, "loss": 1.0031, "step": 4227 }, { "epoch": 0.4424908424908425, "grad_norm": 2.0533065013534006, "learning_rate": 1.2327531317631858e-05, "loss": 1.0234, "step": 4228 }, { "epoch": 0.4425954997383569, "grad_norm": 2.267657972321477, "learning_rate": 1.232423455958892e-05, "loss": 0.9795, "step": 4229 }, { "epoch": 0.4427001569858713, "grad_norm": 2.3724169669212163, "learning_rate": 1.232093753448672e-05, "loss": 1.0609, "step": 4230 }, { "epoch": 0.44280481423338564, "grad_norm": 2.2647618736763864, "learning_rate": 1.2317640242704089e-05, "loss": 0.9277, "step": 4231 }, { "epoch": 0.44290947148090004, "grad_norm": 2.426127300716815, "learning_rate": 1.23143426846199e-05, "loss": 0.9978, "step": 4232 }, { "epoch": 0.44301412872841445, "grad_norm": 12.748064877007662, "learning_rate": 1.2311044860613044e-05, "loss": 1.059, "step": 4233 }, { "epoch": 0.44311878597592885, "grad_norm": 2.2159616926614953, "learning_rate": 1.2307746771062445e-05, "loss": 0.9901, "step": 4234 }, { "epoch": 0.4432234432234432, "grad_norm": 1.9980360395518977, "learning_rate": 1.2304448416347066e-05, "loss": 0.8674, "step": 4235 }, { "epoch": 0.4433281004709576, "grad_norm": 2.2959534557033416, "learning_rate": 1.2301149796845895e-05, "loss": 1.0436, "step": 4236 }, { "epoch": 0.443432757718472, "grad_norm": 2.449657687751539, "learning_rate": 1.2297850912937946e-05, "loss": 1.1151, "step": 4237 }, { "epoch": 0.4435374149659864, "grad_norm": 1.852361721600591, "learning_rate": 1.2294551765002265e-05, "loss": 0.8302, "step": 4238 }, { "epoch": 0.44364207221350077, "grad_norm": 1.7397853007381971, "learning_rate": 1.229125235341794e-05, "loss": 0.839, "step": 4239 }, { "epoch": 0.4437467294610152, "grad_norm": 2.309355901645421, "learning_rate": 1.228795267856407e-05, "loss": 1.0563, "step": 4240 }, { "epoch": 0.4438513867085296, "grad_norm": 2.1118423440247627, "learning_rate": 1.2284652740819803e-05, "loss": 1.0022, "step": 4241 }, { "epoch": 0.44395604395604393, "grad_norm": 2.214872624726237, "learning_rate": 1.2281352540564302e-05, "loss": 0.9913, "step": 4242 }, { "epoch": 0.44406070120355834, "grad_norm": 2.239625811455294, "learning_rate": 1.227805207817677e-05, "loss": 0.9527, "step": 4243 }, { "epoch": 0.44416535845107274, "grad_norm": 31.368906412723593, "learning_rate": 1.2274751354036439e-05, "loss": 1.574, "step": 4244 }, { "epoch": 0.44427001569858715, "grad_norm": 1.9338765858884037, "learning_rate": 1.2271450368522562e-05, "loss": 0.9677, "step": 4245 }, { "epoch": 0.4443746729461015, "grad_norm": 2.531637449689326, "learning_rate": 1.2268149122014436e-05, "loss": 1.0082, "step": 4246 }, { "epoch": 0.4444793301936159, "grad_norm": 12.934239661888748, "learning_rate": 1.2264847614891373e-05, "loss": 1.0943, "step": 4247 }, { "epoch": 0.4445839874411303, "grad_norm": 2.519052355502292, "learning_rate": 1.2261545847532731e-05, "loss": 0.958, "step": 4248 }, { "epoch": 0.44468864468864466, "grad_norm": 109.81451808318943, "learning_rate": 1.225824382031789e-05, "loss": 2.7126, "step": 4249 }, { "epoch": 0.44479330193615907, "grad_norm": 2.461447085373303, "learning_rate": 1.2254941533626253e-05, "loss": 0.9693, "step": 4250 }, { "epoch": 0.4448979591836735, "grad_norm": 2.9594119092355458, "learning_rate": 1.225163898783726e-05, "loss": 0.8497, "step": 4251 }, { "epoch": 0.4450026164311879, "grad_norm": 5.254203001278294, "learning_rate": 1.2248336183330384e-05, "loss": 1.0249, "step": 4252 }, { "epoch": 0.44510727367870223, "grad_norm": 4.105904824801587, "learning_rate": 1.2245033120485127e-05, "loss": 0.9186, "step": 4253 }, { "epoch": 0.44521193092621664, "grad_norm": 2.945698769993366, "learning_rate": 1.2241729799681008e-05, "loss": 0.8103, "step": 4254 }, { "epoch": 0.44531658817373104, "grad_norm": 5.8775538590842995, "learning_rate": 1.223842622129759e-05, "loss": 1.147, "step": 4255 }, { "epoch": 0.44542124542124545, "grad_norm": 30.161697236880144, "learning_rate": 1.2235122385714467e-05, "loss": 4.1431, "step": 4256 }, { "epoch": 0.4455259026687598, "grad_norm": 8.326858087895113, "learning_rate": 1.2231818293311246e-05, "loss": 1.1568, "step": 4257 }, { "epoch": 0.4456305599162742, "grad_norm": 9.194801521266452, "learning_rate": 1.2228513944467581e-05, "loss": 1.2268, "step": 4258 }, { "epoch": 0.4457352171637886, "grad_norm": 7.57812928636459, "learning_rate": 1.2225209339563144e-05, "loss": 1.1154, "step": 4259 }, { "epoch": 0.44583987441130296, "grad_norm": 6.130800488712524, "learning_rate": 1.2221904478977646e-05, "loss": 0.9692, "step": 4260 }, { "epoch": 0.44594453165881737, "grad_norm": 4.833189877025352, "learning_rate": 1.2218599363090818e-05, "loss": 0.9329, "step": 4261 }, { "epoch": 0.4460491889063318, "grad_norm": 3.840930122922004, "learning_rate": 1.2215293992282424e-05, "loss": 1.103, "step": 4262 }, { "epoch": 0.4461538461538462, "grad_norm": 3.8094608876035267, "learning_rate": 1.2211988366932262e-05, "loss": 1.0614, "step": 4263 }, { "epoch": 0.44625850340136053, "grad_norm": 4.16696149043726, "learning_rate": 1.2208682487420149e-05, "loss": 0.9907, "step": 4264 }, { "epoch": 0.44636316064887493, "grad_norm": 4.495255491410338, "learning_rate": 1.2205376354125943e-05, "loss": 1.1127, "step": 4265 }, { "epoch": 0.44646781789638934, "grad_norm": 3.289846433479056, "learning_rate": 1.2202069967429524e-05, "loss": 1.0025, "step": 4266 }, { "epoch": 0.4465724751439037, "grad_norm": 2.922002497701106, "learning_rate": 1.2198763327710801e-05, "loss": 1.0079, "step": 4267 }, { "epoch": 0.4466771323914181, "grad_norm": 2.33163105345098, "learning_rate": 1.2195456435349714e-05, "loss": 1.0025, "step": 4268 }, { "epoch": 0.4467817896389325, "grad_norm": 2.993882810202546, "learning_rate": 1.219214929072623e-05, "loss": 1.0342, "step": 4269 }, { "epoch": 0.4468864468864469, "grad_norm": 3.138840354515787, "learning_rate": 1.2188841894220351e-05, "loss": 1.0363, "step": 4270 }, { "epoch": 0.44699110413396126, "grad_norm": 2.8709505565686144, "learning_rate": 1.21855342462121e-05, "loss": 0.9547, "step": 4271 }, { "epoch": 0.44709576138147566, "grad_norm": 3.5060252310763858, "learning_rate": 1.2182226347081536e-05, "loss": 1.0352, "step": 4272 }, { "epoch": 0.44720041862899007, "grad_norm": 3.067631908770296, "learning_rate": 1.2178918197208737e-05, "loss": 0.936, "step": 4273 }, { "epoch": 0.4473050758765044, "grad_norm": 3.033451592129497, "learning_rate": 1.2175609796973822e-05, "loss": 0.9364, "step": 4274 }, { "epoch": 0.4474097331240188, "grad_norm": 2.0393393340393087, "learning_rate": 1.2172301146756933e-05, "loss": 0.9712, "step": 4275 }, { "epoch": 0.44751439037153323, "grad_norm": 38.81812474011344, "learning_rate": 1.2168992246938235e-05, "loss": 2.2501, "step": 4276 }, { "epoch": 0.44761904761904764, "grad_norm": 2.253226221283191, "learning_rate": 1.2165683097897931e-05, "loss": 1.1345, "step": 4277 }, { "epoch": 0.447723704866562, "grad_norm": 2.3205323315504813, "learning_rate": 1.2162373700016253e-05, "loss": 1.0651, "step": 4278 }, { "epoch": 0.4478283621140764, "grad_norm": 4.7092329513884295, "learning_rate": 1.2159064053673449e-05, "loss": 0.9147, "step": 4279 }, { "epoch": 0.4479330193615908, "grad_norm": 29.970922275864194, "learning_rate": 1.2155754159249811e-05, "loss": 1.2273, "step": 4280 }, { "epoch": 0.4480376766091052, "grad_norm": 2.187035261760947, "learning_rate": 1.2152444017125647e-05, "loss": 0.9912, "step": 4281 }, { "epoch": 0.44814233385661956, "grad_norm": 3.2809808282344446, "learning_rate": 1.2149133627681303e-05, "loss": 1.1204, "step": 4282 }, { "epoch": 0.44824699110413396, "grad_norm": 2.045684946775204, "learning_rate": 1.2145822991297148e-05, "loss": 1.0637, "step": 4283 }, { "epoch": 0.44835164835164837, "grad_norm": 2.000665560599059, "learning_rate": 1.2142512108353583e-05, "loss": 1.0627, "step": 4284 }, { "epoch": 0.4484563055991627, "grad_norm": 1.879007116893966, "learning_rate": 1.2139200979231033e-05, "loss": 0.9889, "step": 4285 }, { "epoch": 0.4485609628466771, "grad_norm": 2.191861241077447, "learning_rate": 1.2135889604309952e-05, "loss": 0.9967, "step": 4286 }, { "epoch": 0.44866562009419153, "grad_norm": 2.090962524194402, "learning_rate": 1.2132577983970828e-05, "loss": 0.9794, "step": 4287 }, { "epoch": 0.44877027734170594, "grad_norm": 1.88873946046564, "learning_rate": 1.212926611859417e-05, "loss": 0.9027, "step": 4288 }, { "epoch": 0.4488749345892203, "grad_norm": 2.2151145974628763, "learning_rate": 1.212595400856052e-05, "loss": 0.9032, "step": 4289 }, { "epoch": 0.4489795918367347, "grad_norm": 2.130497424667586, "learning_rate": 1.212264165425044e-05, "loss": 0.9292, "step": 4290 }, { "epoch": 0.4490842490842491, "grad_norm": 2.0469637222247554, "learning_rate": 1.2119329056044533e-05, "loss": 1.0018, "step": 4291 }, { "epoch": 0.44918890633176345, "grad_norm": 2.5073666390831466, "learning_rate": 1.2116016214323423e-05, "loss": 1.0831, "step": 4292 }, { "epoch": 0.44929356357927785, "grad_norm": 2.1859064919513522, "learning_rate": 1.2112703129467758e-05, "loss": 1.0436, "step": 4293 }, { "epoch": 0.44939822082679226, "grad_norm": 2.0934320510193682, "learning_rate": 1.2109389801858225e-05, "loss": 0.9668, "step": 4294 }, { "epoch": 0.44950287807430667, "grad_norm": 2.1756826044133004, "learning_rate": 1.2106076231875524e-05, "loss": 0.9583, "step": 4295 }, { "epoch": 0.449607535321821, "grad_norm": 2.353709737315531, "learning_rate": 1.2102762419900391e-05, "loss": 1.0629, "step": 4296 }, { "epoch": 0.4497121925693354, "grad_norm": 1.952613682916308, "learning_rate": 1.20994483663136e-05, "loss": 0.9243, "step": 4297 }, { "epoch": 0.44981684981684983, "grad_norm": 2.2467039184701445, "learning_rate": 1.209613407149593e-05, "loss": 1.0368, "step": 4298 }, { "epoch": 0.44992150706436423, "grad_norm": 2.0181287771348284, "learning_rate": 1.2092819535828208e-05, "loss": 0.8519, "step": 4299 }, { "epoch": 0.4500261643118786, "grad_norm": 1.9314596098382928, "learning_rate": 1.208950475969128e-05, "loss": 0.8568, "step": 4300 }, { "epoch": 0.450130821559393, "grad_norm": 1.832701400236754, "learning_rate": 1.208618974346602e-05, "loss": 0.927, "step": 4301 }, { "epoch": 0.4502354788069074, "grad_norm": 1.9459296613855224, "learning_rate": 1.2082874487533324e-05, "loss": 1.0155, "step": 4302 }, { "epoch": 0.45034013605442175, "grad_norm": 2.3792299560839667, "learning_rate": 1.2079558992274131e-05, "loss": 1.0069, "step": 4303 }, { "epoch": 0.45044479330193615, "grad_norm": 2.170868389797402, "learning_rate": 1.2076243258069396e-05, "loss": 1.0151, "step": 4304 }, { "epoch": 0.45054945054945056, "grad_norm": 2.2013372328892458, "learning_rate": 1.2072927285300099e-05, "loss": 0.9911, "step": 4305 }, { "epoch": 0.45065410779696496, "grad_norm": 2.2705803608142863, "learning_rate": 1.2069611074347255e-05, "loss": 1.0045, "step": 4306 }, { "epoch": 0.4507587650444793, "grad_norm": 1.9277985272803437, "learning_rate": 1.2066294625591903e-05, "loss": 0.8673, "step": 4307 }, { "epoch": 0.4508634222919937, "grad_norm": 2.0440176202060156, "learning_rate": 1.206297793941511e-05, "loss": 1.0108, "step": 4308 }, { "epoch": 0.4509680795395081, "grad_norm": 2.5462646813427097, "learning_rate": 1.205966101619797e-05, "loss": 0.859, "step": 4309 }, { "epoch": 0.4510727367870225, "grad_norm": 2.248355643793843, "learning_rate": 1.2056343856321605e-05, "loss": 1.1021, "step": 4310 }, { "epoch": 0.4511773940345369, "grad_norm": 4.908250414144032, "learning_rate": 1.2053026460167158e-05, "loss": 1.0568, "step": 4311 }, { "epoch": 0.4512820512820513, "grad_norm": 2.248952995961383, "learning_rate": 1.2049708828115811e-05, "loss": 0.9196, "step": 4312 }, { "epoch": 0.4513867085295657, "grad_norm": 1.9290283733393867, "learning_rate": 1.2046390960548765e-05, "loss": 1.0563, "step": 4313 }, { "epoch": 0.45149136577708004, "grad_norm": 2.0660674453272336, "learning_rate": 1.204307285784725e-05, "loss": 0.9519, "step": 4314 }, { "epoch": 0.45159602302459445, "grad_norm": 1.996879072685833, "learning_rate": 1.2039754520392522e-05, "loss": 1.0722, "step": 4315 }, { "epoch": 0.45170068027210886, "grad_norm": 2.5022938436613624, "learning_rate": 1.2036435948565863e-05, "loss": 1.0778, "step": 4316 }, { "epoch": 0.4518053375196232, "grad_norm": 2.2109024944276885, "learning_rate": 1.2033117142748585e-05, "loss": 1.0058, "step": 4317 }, { "epoch": 0.4519099947671376, "grad_norm": 2.2693762543652674, "learning_rate": 1.2029798103322028e-05, "loss": 0.9833, "step": 4318 }, { "epoch": 0.452014652014652, "grad_norm": 2.116453756308576, "learning_rate": 1.2026478830667551e-05, "loss": 0.9363, "step": 4319 }, { "epoch": 0.4521193092621664, "grad_norm": 3.342959773212483, "learning_rate": 1.2023159325166551e-05, "loss": 0.8688, "step": 4320 }, { "epoch": 0.4522239665096808, "grad_norm": 2.3163640304872812, "learning_rate": 1.2019839587200442e-05, "loss": 0.8287, "step": 4321 }, { "epoch": 0.4523286237571952, "grad_norm": 1.9681851990856516, "learning_rate": 1.2016519617150668e-05, "loss": 0.9896, "step": 4322 }, { "epoch": 0.4524332810047096, "grad_norm": 2.143371304104883, "learning_rate": 1.2013199415398706e-05, "loss": 0.8437, "step": 4323 }, { "epoch": 0.452537938252224, "grad_norm": 2.4121771940664587, "learning_rate": 1.2009878982326047e-05, "loss": 0.898, "step": 4324 }, { "epoch": 0.45264259549973834, "grad_norm": 2.2077705572979847, "learning_rate": 1.2006558318314219e-05, "loss": 0.981, "step": 4325 }, { "epoch": 0.45274725274725275, "grad_norm": 1.8137959654643108, "learning_rate": 1.2003237423744773e-05, "loss": 0.9494, "step": 4326 }, { "epoch": 0.45285190999476715, "grad_norm": 2.043816728834952, "learning_rate": 1.1999916298999285e-05, "loss": 1.0507, "step": 4327 }, { "epoch": 0.4529565672422815, "grad_norm": 1.8399500676215097, "learning_rate": 1.1996594944459363e-05, "loss": 0.9921, "step": 4328 }, { "epoch": 0.4530612244897959, "grad_norm": 1.8888260838202213, "learning_rate": 1.199327336050663e-05, "loss": 1.0088, "step": 4329 }, { "epoch": 0.4531658817373103, "grad_norm": 2.2320849845692474, "learning_rate": 1.198995154752275e-05, "loss": 1.0454, "step": 4330 }, { "epoch": 0.4532705389848247, "grad_norm": 2.315466026238162, "learning_rate": 1.1986629505889402e-05, "loss": 0.8162, "step": 4331 }, { "epoch": 0.4533751962323391, "grad_norm": 2.0448363790200745, "learning_rate": 1.1983307235988297e-05, "loss": 0.92, "step": 4332 }, { "epoch": 0.4534798534798535, "grad_norm": 1.9783944834680685, "learning_rate": 1.1979984738201172e-05, "loss": 0.9508, "step": 4333 }, { "epoch": 0.4535845107273679, "grad_norm": 2.1566268663323167, "learning_rate": 1.1976662012909781e-05, "loss": 0.9213, "step": 4334 }, { "epoch": 0.45368916797488223, "grad_norm": 2.088854843791031, "learning_rate": 1.1973339060495923e-05, "loss": 0.9617, "step": 4335 }, { "epoch": 0.45379382522239664, "grad_norm": 2.0416167816146977, "learning_rate": 1.1970015881341405e-05, "loss": 0.9292, "step": 4336 }, { "epoch": 0.45389848246991105, "grad_norm": 2.0230522167032206, "learning_rate": 1.196669247582807e-05, "loss": 0.8476, "step": 4337 }, { "epoch": 0.45400313971742545, "grad_norm": 2.1730001331296704, "learning_rate": 1.1963368844337779e-05, "loss": 0.9025, "step": 4338 }, { "epoch": 0.4541077969649398, "grad_norm": 1.8508787301092124, "learning_rate": 1.196004498725243e-05, "loss": 0.8897, "step": 4339 }, { "epoch": 0.4542124542124542, "grad_norm": 2.2654361716293336, "learning_rate": 1.195672090495394e-05, "loss": 1.0252, "step": 4340 }, { "epoch": 0.4543171114599686, "grad_norm": 1.8928691469715988, "learning_rate": 1.1953396597824249e-05, "loss": 0.9031, "step": 4341 }, { "epoch": 0.454421768707483, "grad_norm": 2.3645611004316702, "learning_rate": 1.1950072066245328e-05, "loss": 1.0823, "step": 4342 }, { "epoch": 0.45452642595499737, "grad_norm": 1.7075263528138125, "learning_rate": 1.1946747310599176e-05, "loss": 0.9554, "step": 4343 }, { "epoch": 0.4546310832025118, "grad_norm": 2.3091455726092884, "learning_rate": 1.1943422331267811e-05, "loss": 1.0067, "step": 4344 }, { "epoch": 0.4547357404500262, "grad_norm": 2.098765765313036, "learning_rate": 1.194009712863328e-05, "loss": 0.9867, "step": 4345 }, { "epoch": 0.45484039769754053, "grad_norm": 1.7727533702277667, "learning_rate": 1.1936771703077656e-05, "loss": 0.9671, "step": 4346 }, { "epoch": 0.45494505494505494, "grad_norm": 2.074696485047073, "learning_rate": 1.1933446054983035e-05, "loss": 0.9257, "step": 4347 }, { "epoch": 0.45504971219256934, "grad_norm": 2.039328732869746, "learning_rate": 1.1930120184731544e-05, "loss": 0.9301, "step": 4348 }, { "epoch": 0.45515436944008375, "grad_norm": 2.1160277945116586, "learning_rate": 1.1926794092705335e-05, "loss": 0.9874, "step": 4349 }, { "epoch": 0.4552590266875981, "grad_norm": 1.8772607321330006, "learning_rate": 1.1923467779286573e-05, "loss": 0.941, "step": 4350 }, { "epoch": 0.4553636839351125, "grad_norm": 1.9022941147099999, "learning_rate": 1.1920141244857466e-05, "loss": 0.908, "step": 4351 }, { "epoch": 0.4554683411826269, "grad_norm": 2.3523711222096404, "learning_rate": 1.191681448980024e-05, "loss": 0.8785, "step": 4352 }, { "epoch": 0.45557299843014126, "grad_norm": 2.002200825932997, "learning_rate": 1.191348751449714e-05, "loss": 1.0048, "step": 4353 }, { "epoch": 0.45567765567765567, "grad_norm": 2.0270694451502282, "learning_rate": 1.1910160319330446e-05, "loss": 1.0233, "step": 4354 }, { "epoch": 0.4557823129251701, "grad_norm": 2.0541524762488814, "learning_rate": 1.1906832904682459e-05, "loss": 0.7993, "step": 4355 }, { "epoch": 0.4558869701726845, "grad_norm": 1.8638621351295752, "learning_rate": 1.1903505270935507e-05, "loss": 0.8905, "step": 4356 }, { "epoch": 0.45599162742019883, "grad_norm": 1.9736021287043854, "learning_rate": 1.190017741847194e-05, "loss": 1.0373, "step": 4357 }, { "epoch": 0.45609628466771324, "grad_norm": 1.8797795224789213, "learning_rate": 1.1896849347674135e-05, "loss": 0.9821, "step": 4358 }, { "epoch": 0.45620094191522764, "grad_norm": 1.9368205053092564, "learning_rate": 1.1893521058924497e-05, "loss": 1.0668, "step": 4359 }, { "epoch": 0.456305599162742, "grad_norm": 2.132381992165538, "learning_rate": 1.1890192552605447e-05, "loss": 0.9218, "step": 4360 }, { "epoch": 0.4564102564102564, "grad_norm": 2.0456266510770056, "learning_rate": 1.1886863829099442e-05, "loss": 0.8965, "step": 4361 }, { "epoch": 0.4565149136577708, "grad_norm": 2.2461716024926823, "learning_rate": 1.1883534888788961e-05, "loss": 1.072, "step": 4362 }, { "epoch": 0.4566195709052852, "grad_norm": 1.8114815853829138, "learning_rate": 1.18802057320565e-05, "loss": 0.9784, "step": 4363 }, { "epoch": 0.45672422815279956, "grad_norm": 2.074839338100702, "learning_rate": 1.1876876359284589e-05, "loss": 0.9458, "step": 4364 }, { "epoch": 0.45682888540031397, "grad_norm": 3.272621021673472, "learning_rate": 1.187354677085578e-05, "loss": 1.0344, "step": 4365 }, { "epoch": 0.4569335426478284, "grad_norm": 1.8423824960811983, "learning_rate": 1.1870216967152649e-05, "loss": 0.9991, "step": 4366 }, { "epoch": 0.4570381998953428, "grad_norm": 2.0436626042627735, "learning_rate": 1.1866886948557796e-05, "loss": 0.9367, "step": 4367 }, { "epoch": 0.45714285714285713, "grad_norm": 2.243427552622139, "learning_rate": 1.1863556715453847e-05, "loss": 1.081, "step": 4368 }, { "epoch": 0.45724751439037153, "grad_norm": 2.1396337485328965, "learning_rate": 1.1860226268223459e-05, "loss": 1.0269, "step": 4369 }, { "epoch": 0.45735217163788594, "grad_norm": 2.0912618560301715, "learning_rate": 1.1856895607249297e-05, "loss": 0.9635, "step": 4370 }, { "epoch": 0.4574568288854003, "grad_norm": 1.8134320104293313, "learning_rate": 1.1853564732914066e-05, "loss": 0.8776, "step": 4371 }, { "epoch": 0.4575614861329147, "grad_norm": 1.873748145270781, "learning_rate": 1.185023364560049e-05, "loss": 0.8853, "step": 4372 }, { "epoch": 0.4576661433804291, "grad_norm": 1.8136741058042303, "learning_rate": 1.1846902345691317e-05, "loss": 0.98, "step": 4373 }, { "epoch": 0.4577708006279435, "grad_norm": 1.7292941149458805, "learning_rate": 1.1843570833569324e-05, "loss": 0.9699, "step": 4374 }, { "epoch": 0.45787545787545786, "grad_norm": 2.6314289006744587, "learning_rate": 1.1840239109617302e-05, "loss": 1.1551, "step": 4375 }, { "epoch": 0.45798011512297226, "grad_norm": 22.007184596120965, "learning_rate": 1.1836907174218078e-05, "loss": 1.8465, "step": 4376 }, { "epoch": 0.45808477237048667, "grad_norm": 2.295041122834174, "learning_rate": 1.183357502775449e-05, "loss": 1.0055, "step": 4377 }, { "epoch": 0.458189429618001, "grad_norm": 31.559871912501077, "learning_rate": 1.1830242670609417e-05, "loss": 1.7263, "step": 4378 }, { "epoch": 0.4582940868655154, "grad_norm": 2.0871333437628055, "learning_rate": 1.1826910103165753e-05, "loss": 0.8551, "step": 4379 }, { "epoch": 0.45839874411302983, "grad_norm": 2.726282817146809, "learning_rate": 1.1823577325806414e-05, "loss": 1.0138, "step": 4380 }, { "epoch": 0.45850340136054424, "grad_norm": 2.1086177426453374, "learning_rate": 1.1820244338914341e-05, "loss": 0.9771, "step": 4381 }, { "epoch": 0.4586080586080586, "grad_norm": 2.3764884198932, "learning_rate": 1.1816911142872507e-05, "loss": 1.0197, "step": 4382 }, { "epoch": 0.458712715855573, "grad_norm": 2.045556899175085, "learning_rate": 1.1813577738063897e-05, "loss": 0.9103, "step": 4383 }, { "epoch": 0.4588173731030874, "grad_norm": 2.5972265367678564, "learning_rate": 1.1810244124871528e-05, "loss": 1.0018, "step": 4384 }, { "epoch": 0.4589220303506018, "grad_norm": 2.551709456154248, "learning_rate": 1.1806910303678443e-05, "loss": 1.0322, "step": 4385 }, { "epoch": 0.45902668759811616, "grad_norm": 2.1748517522412407, "learning_rate": 1.1803576274867698e-05, "loss": 0.9927, "step": 4386 }, { "epoch": 0.45913134484563056, "grad_norm": 3.812812110949521, "learning_rate": 1.1800242038822382e-05, "loss": 0.9246, "step": 4387 }, { "epoch": 0.45923600209314497, "grad_norm": 2.376394222425811, "learning_rate": 1.1796907595925608e-05, "loss": 0.9445, "step": 4388 }, { "epoch": 0.4593406593406593, "grad_norm": 2.5161863830002735, "learning_rate": 1.179357294656051e-05, "loss": 1.0452, "step": 4389 }, { "epoch": 0.4594453165881737, "grad_norm": 2.6069106789560488, "learning_rate": 1.179023809111024e-05, "loss": 1.1245, "step": 4390 }, { "epoch": 0.45954997383568813, "grad_norm": 2.4849062318250397, "learning_rate": 1.1786903029957987e-05, "loss": 1.0237, "step": 4391 }, { "epoch": 0.45965463108320254, "grad_norm": 2.1071131463822357, "learning_rate": 1.1783567763486954e-05, "loss": 0.9831, "step": 4392 }, { "epoch": 0.4597592883307169, "grad_norm": 2.7912058793141115, "learning_rate": 1.1780232292080371e-05, "loss": 0.9761, "step": 4393 }, { "epoch": 0.4598639455782313, "grad_norm": 2.033218484100733, "learning_rate": 1.1776896616121483e-05, "loss": 0.8881, "step": 4394 }, { "epoch": 0.4599686028257457, "grad_norm": 2.044503825084946, "learning_rate": 1.1773560735993576e-05, "loss": 1.0179, "step": 4395 }, { "epoch": 0.46007326007326005, "grad_norm": 2.2543377501620308, "learning_rate": 1.1770224652079943e-05, "loss": 0.823, "step": 4396 }, { "epoch": 0.46017791732077445, "grad_norm": 2.3807439687952217, "learning_rate": 1.1766888364763912e-05, "loss": 0.8314, "step": 4397 }, { "epoch": 0.46028257456828886, "grad_norm": 1.970046805954839, "learning_rate": 1.1763551874428823e-05, "loss": 0.879, "step": 4398 }, { "epoch": 0.46038723181580327, "grad_norm": 2.1330010409516116, "learning_rate": 1.1760215181458048e-05, "loss": 1.1079, "step": 4399 }, { "epoch": 0.4604918890633176, "grad_norm": 1.9239002886217489, "learning_rate": 1.1756878286234983e-05, "loss": 0.9412, "step": 4400 }, { "epoch": 0.460596546310832, "grad_norm": 2.1793334815489693, "learning_rate": 1.1753541189143039e-05, "loss": 0.8828, "step": 4401 }, { "epoch": 0.46070120355834643, "grad_norm": 2.254420506227355, "learning_rate": 1.1750203890565661e-05, "loss": 0.8499, "step": 4402 }, { "epoch": 0.4608058608058608, "grad_norm": 2.1754811115427843, "learning_rate": 1.1746866390886304e-05, "loss": 0.924, "step": 4403 }, { "epoch": 0.4609105180533752, "grad_norm": 2.396703022879703, "learning_rate": 1.1743528690488457e-05, "loss": 0.936, "step": 4404 }, { "epoch": 0.4610151753008896, "grad_norm": 1.8475247870093567, "learning_rate": 1.1740190789755635e-05, "loss": 0.9359, "step": 4405 }, { "epoch": 0.461119832548404, "grad_norm": 1.9948668165368353, "learning_rate": 1.1736852689071357e-05, "loss": 0.9159, "step": 4406 }, { "epoch": 0.46122448979591835, "grad_norm": 2.3311014731095474, "learning_rate": 1.1733514388819186e-05, "loss": 1.0174, "step": 4407 }, { "epoch": 0.46132914704343275, "grad_norm": 2.657571922556327, "learning_rate": 1.1730175889382697e-05, "loss": 0.9448, "step": 4408 }, { "epoch": 0.46143380429094716, "grad_norm": 1.9668833389759839, "learning_rate": 1.1726837191145492e-05, "loss": 0.9506, "step": 4409 }, { "epoch": 0.46153846153846156, "grad_norm": 161.64348480668815, "learning_rate": 1.1723498294491196e-05, "loss": 3.7708, "step": 4410 }, { "epoch": 0.4616431187859759, "grad_norm": 2.1254582340869836, "learning_rate": 1.1720159199803447e-05, "loss": 1.0292, "step": 4411 }, { "epoch": 0.4617477760334903, "grad_norm": 3.149115475190535, "learning_rate": 1.1716819907465922e-05, "loss": 1.0485, "step": 4412 }, { "epoch": 0.4618524332810047, "grad_norm": 2.7285678675780627, "learning_rate": 1.1713480417862306e-05, "loss": 1.0172, "step": 4413 }, { "epoch": 0.4619570905285191, "grad_norm": 2.2641817986994957, "learning_rate": 1.171014073137632e-05, "loss": 0.829, "step": 4414 }, { "epoch": 0.4620617477760335, "grad_norm": 21.38524678867409, "learning_rate": 1.1706800848391694e-05, "loss": 2.726, "step": 4415 }, { "epoch": 0.4621664050235479, "grad_norm": 2.442948735347796, "learning_rate": 1.1703460769292192e-05, "loss": 0.9794, "step": 4416 }, { "epoch": 0.4622710622710623, "grad_norm": 2.762838250862807, "learning_rate": 1.1700120494461595e-05, "loss": 1.0578, "step": 4417 }, { "epoch": 0.46237571951857664, "grad_norm": 3.0564755310218885, "learning_rate": 1.1696780024283708e-05, "loss": 0.912, "step": 4418 }, { "epoch": 0.46248037676609105, "grad_norm": 4.2059114992513935, "learning_rate": 1.1693439359142356e-05, "loss": 1.211, "step": 4419 }, { "epoch": 0.46258503401360546, "grad_norm": 3.764016376227401, "learning_rate": 1.1690098499421384e-05, "loss": 1.0091, "step": 4420 }, { "epoch": 0.4626896912611198, "grad_norm": 3.1796219459492705, "learning_rate": 1.1686757445504673e-05, "loss": 1.0657, "step": 4421 }, { "epoch": 0.4627943485086342, "grad_norm": 2.579766465159248, "learning_rate": 1.1683416197776113e-05, "loss": 0.9986, "step": 4422 }, { "epoch": 0.4628990057561486, "grad_norm": 2.122008428264213, "learning_rate": 1.1680074756619614e-05, "loss": 1.0272, "step": 4423 }, { "epoch": 0.463003663003663, "grad_norm": 2.26997985317698, "learning_rate": 1.1676733122419124e-05, "loss": 0.9752, "step": 4424 }, { "epoch": 0.4631083202511774, "grad_norm": 2.2969054913130313, "learning_rate": 1.1673391295558596e-05, "loss": 0.9876, "step": 4425 }, { "epoch": 0.4632129774986918, "grad_norm": 2.7300799446270467, "learning_rate": 1.1670049276422017e-05, "loss": 1.006, "step": 4426 }, { "epoch": 0.4633176347462062, "grad_norm": 2.843913255345786, "learning_rate": 1.166670706539339e-05, "loss": 1.0773, "step": 4427 }, { "epoch": 0.4634222919937206, "grad_norm": 2.661110599574868, "learning_rate": 1.1663364662856747e-05, "loss": 0.9482, "step": 4428 }, { "epoch": 0.46352694924123494, "grad_norm": 2.2380660123868115, "learning_rate": 1.1660022069196126e-05, "loss": 0.8998, "step": 4429 }, { "epoch": 0.46363160648874935, "grad_norm": 2.178013787617105, "learning_rate": 1.1656679284795607e-05, "loss": 0.9949, "step": 4430 }, { "epoch": 0.46373626373626375, "grad_norm": 2.1088259317784295, "learning_rate": 1.165333631003928e-05, "loss": 0.9476, "step": 4431 }, { "epoch": 0.4638409209837781, "grad_norm": 2.185800456163915, "learning_rate": 1.1649993145311258e-05, "loss": 1.0237, "step": 4432 }, { "epoch": 0.4639455782312925, "grad_norm": 2.148724627286947, "learning_rate": 1.1646649790995679e-05, "loss": 0.9447, "step": 4433 }, { "epoch": 0.4640502354788069, "grad_norm": 2.4213799049050566, "learning_rate": 1.1643306247476702e-05, "loss": 0.9744, "step": 4434 }, { "epoch": 0.4641548927263213, "grad_norm": 2.088289187776693, "learning_rate": 1.1639962515138506e-05, "loss": 0.9488, "step": 4435 }, { "epoch": 0.4642595499738357, "grad_norm": 2.1310001246837653, "learning_rate": 1.1636618594365291e-05, "loss": 0.9596, "step": 4436 }, { "epoch": 0.4643642072213501, "grad_norm": 2.4081476306831053, "learning_rate": 1.1633274485541284e-05, "loss": 0.9266, "step": 4437 }, { "epoch": 0.4644688644688645, "grad_norm": 2.247732950837275, "learning_rate": 1.1629930189050724e-05, "loss": 1.141, "step": 4438 }, { "epoch": 0.46457352171637883, "grad_norm": 2.7553884410693534, "learning_rate": 1.1626585705277886e-05, "loss": 0.977, "step": 4439 }, { "epoch": 0.46467817896389324, "grad_norm": 2.093553580818768, "learning_rate": 1.1623241034607049e-05, "loss": 0.92, "step": 4440 }, { "epoch": 0.46478283621140765, "grad_norm": 2.102413811719979, "learning_rate": 1.161989617742253e-05, "loss": 0.9748, "step": 4441 }, { "epoch": 0.46488749345892205, "grad_norm": 2.1474470371663523, "learning_rate": 1.1616551134108649e-05, "loss": 0.8978, "step": 4442 }, { "epoch": 0.4649921507064364, "grad_norm": 1.8755006879257996, "learning_rate": 1.1613205905049774e-05, "loss": 1.0037, "step": 4443 }, { "epoch": 0.4650968079539508, "grad_norm": 12.367373504928828, "learning_rate": 1.1609860490630265e-05, "loss": 1.7202, "step": 4444 }, { "epoch": 0.4652014652014652, "grad_norm": 2.434919186952808, "learning_rate": 1.1606514891234526e-05, "loss": 1.0571, "step": 4445 }, { "epoch": 0.46530612244897956, "grad_norm": 8.610048201919636, "learning_rate": 1.1603169107246966e-05, "loss": 1.6151, "step": 4446 }, { "epoch": 0.46541077969649397, "grad_norm": 1.9525960112801914, "learning_rate": 1.1599823139052027e-05, "loss": 0.875, "step": 4447 }, { "epoch": 0.4655154369440084, "grad_norm": 2.2632342098228775, "learning_rate": 1.1596476987034167e-05, "loss": 1.0215, "step": 4448 }, { "epoch": 0.4656200941915228, "grad_norm": 2.2366838939003335, "learning_rate": 1.1593130651577861e-05, "loss": 1.0237, "step": 4449 }, { "epoch": 0.46572475143903713, "grad_norm": 38.16329198450561, "learning_rate": 1.1589784133067619e-05, "loss": 1.9354, "step": 4450 }, { "epoch": 0.46582940868655154, "grad_norm": 2.3716405493770485, "learning_rate": 1.1586437431887954e-05, "loss": 0.8736, "step": 4451 }, { "epoch": 0.46593406593406594, "grad_norm": 2.125681534199429, "learning_rate": 1.158309054842341e-05, "loss": 1.0289, "step": 4452 }, { "epoch": 0.46603872318158035, "grad_norm": 2.3919411314017536, "learning_rate": 1.1579743483058556e-05, "loss": 0.9878, "step": 4453 }, { "epoch": 0.4661433804290947, "grad_norm": 2.39638097963942, "learning_rate": 1.1576396236177974e-05, "loss": 1.0409, "step": 4454 }, { "epoch": 0.4662480376766091, "grad_norm": 2.5103806573334215, "learning_rate": 1.1573048808166265e-05, "loss": 0.9669, "step": 4455 }, { "epoch": 0.4663526949241235, "grad_norm": 19.54312815228122, "learning_rate": 1.1569701199408063e-05, "loss": 1.6532, "step": 4456 }, { "epoch": 0.46645735217163786, "grad_norm": 2.118501244763855, "learning_rate": 1.1566353410288009e-05, "loss": 1.0849, "step": 4457 }, { "epoch": 0.46656200941915227, "grad_norm": 1.7858040697407882, "learning_rate": 1.1563005441190776e-05, "loss": 0.8408, "step": 4458 }, { "epoch": 0.4666666666666667, "grad_norm": 2.2969793541608166, "learning_rate": 1.1559657292501044e-05, "loss": 0.9529, "step": 4459 }, { "epoch": 0.4667713239141811, "grad_norm": 2.5581364794755355, "learning_rate": 1.1556308964603531e-05, "loss": 0.8612, "step": 4460 }, { "epoch": 0.46687598116169543, "grad_norm": 2.300009884961941, "learning_rate": 1.155296045788296e-05, "loss": 1.1076, "step": 4461 }, { "epoch": 0.46698063840920984, "grad_norm": 1.9569146697798587, "learning_rate": 1.1549611772724087e-05, "loss": 0.9219, "step": 4462 }, { "epoch": 0.46708529565672424, "grad_norm": 2.0103709930682876, "learning_rate": 1.1546262909511679e-05, "loss": 1.0447, "step": 4463 }, { "epoch": 0.4671899529042386, "grad_norm": 2.2197999718443238, "learning_rate": 1.1542913868630527e-05, "loss": 1.0264, "step": 4464 }, { "epoch": 0.467294610151753, "grad_norm": 1.8177368735546657, "learning_rate": 1.1539564650465447e-05, "loss": 0.9716, "step": 4465 }, { "epoch": 0.4673992673992674, "grad_norm": 2.1059310144937617, "learning_rate": 1.1536215255401265e-05, "loss": 1.0372, "step": 4466 }, { "epoch": 0.4675039246467818, "grad_norm": 2.197191188748699, "learning_rate": 1.1532865683822836e-05, "loss": 1.0703, "step": 4467 }, { "epoch": 0.46760858189429616, "grad_norm": 2.6122799139573423, "learning_rate": 1.1529515936115033e-05, "loss": 1.0208, "step": 4468 }, { "epoch": 0.46771323914181057, "grad_norm": 2.093417244729284, "learning_rate": 1.1526166012662748e-05, "loss": 0.8367, "step": 4469 }, { "epoch": 0.46781789638932497, "grad_norm": 2.135644048890782, "learning_rate": 1.1522815913850896e-05, "loss": 0.905, "step": 4470 }, { "epoch": 0.4679225536368394, "grad_norm": 2.045207253531856, "learning_rate": 1.1519465640064405e-05, "loss": 0.8331, "step": 4471 }, { "epoch": 0.46802721088435373, "grad_norm": 7.896758500820517, "learning_rate": 1.1516115191688232e-05, "loss": 1.3637, "step": 4472 }, { "epoch": 0.46813186813186813, "grad_norm": 2.143393184203902, "learning_rate": 1.151276456910735e-05, "loss": 1.0432, "step": 4473 }, { "epoch": 0.46823652537938254, "grad_norm": 2.075941729013746, "learning_rate": 1.1509413772706757e-05, "loss": 1.1271, "step": 4474 }, { "epoch": 0.4683411826268969, "grad_norm": 1.863301564352949, "learning_rate": 1.150606280287146e-05, "loss": 0.9209, "step": 4475 }, { "epoch": 0.4684458398744113, "grad_norm": 2.298175682660464, "learning_rate": 1.1502711659986491e-05, "loss": 0.9713, "step": 4476 }, { "epoch": 0.4685504971219257, "grad_norm": 2.4343036503123994, "learning_rate": 1.1499360344436908e-05, "loss": 0.9463, "step": 4477 }, { "epoch": 0.4686551543694401, "grad_norm": 2.1765146376199946, "learning_rate": 1.1496008856607779e-05, "loss": 0.9651, "step": 4478 }, { "epoch": 0.46875981161695446, "grad_norm": 1.8298464336339146, "learning_rate": 1.1492657196884203e-05, "loss": 0.9422, "step": 4479 }, { "epoch": 0.46886446886446886, "grad_norm": 1.945013493191885, "learning_rate": 1.1489305365651286e-05, "loss": 1.0195, "step": 4480 }, { "epoch": 0.46896912611198327, "grad_norm": 1.9336766437798703, "learning_rate": 1.1485953363294167e-05, "loss": 1.0002, "step": 4481 }, { "epoch": 0.4690737833594976, "grad_norm": 1.9030733453182003, "learning_rate": 1.1482601190197993e-05, "loss": 0.8946, "step": 4482 }, { "epoch": 0.469178440607012, "grad_norm": 4.129348859610016, "learning_rate": 1.1479248846747934e-05, "loss": 1.1738, "step": 4483 }, { "epoch": 0.46928309785452643, "grad_norm": 2.041585816161266, "learning_rate": 1.1475896333329188e-05, "loss": 0.9353, "step": 4484 }, { "epoch": 0.46938775510204084, "grad_norm": 1.8593945186994896, "learning_rate": 1.1472543650326959e-05, "loss": 0.9923, "step": 4485 }, { "epoch": 0.4694924123495552, "grad_norm": 5.031841167564904, "learning_rate": 1.1469190798126479e-05, "loss": 1.179, "step": 4486 }, { "epoch": 0.4695970695970696, "grad_norm": 2.1889564063975135, "learning_rate": 1.1465837777113e-05, "loss": 0.9345, "step": 4487 }, { "epoch": 0.469701726844584, "grad_norm": 2.0960505576447868, "learning_rate": 1.1462484587671788e-05, "loss": 1.057, "step": 4488 }, { "epoch": 0.46980638409209835, "grad_norm": 2.122413193941446, "learning_rate": 1.1459131230188132e-05, "loss": 0.9431, "step": 4489 }, { "epoch": 0.46991104133961276, "grad_norm": 1.8665692849116569, "learning_rate": 1.1455777705047337e-05, "loss": 0.8859, "step": 4490 }, { "epoch": 0.47001569858712716, "grad_norm": 2.2256281211232967, "learning_rate": 1.1452424012634734e-05, "loss": 0.9491, "step": 4491 }, { "epoch": 0.47012035583464157, "grad_norm": 2.3097076298000068, "learning_rate": 1.1449070153335669e-05, "loss": 1.0756, "step": 4492 }, { "epoch": 0.4702250130821559, "grad_norm": 5.195580495117896, "learning_rate": 1.1445716127535506e-05, "loss": 1.1469, "step": 4493 }, { "epoch": 0.4703296703296703, "grad_norm": 1.8105322884131707, "learning_rate": 1.1442361935619628e-05, "loss": 0.9045, "step": 4494 }, { "epoch": 0.47043432757718473, "grad_norm": 2.01942155404332, "learning_rate": 1.143900757797344e-05, "loss": 0.9083, "step": 4495 }, { "epoch": 0.47053898482469914, "grad_norm": 1.932387279006279, "learning_rate": 1.1435653054982365e-05, "loss": 0.9502, "step": 4496 }, { "epoch": 0.4706436420722135, "grad_norm": 4.1256458431646355, "learning_rate": 1.1432298367031843e-05, "loss": 1.0059, "step": 4497 }, { "epoch": 0.4707482993197279, "grad_norm": 1.942103396468479, "learning_rate": 1.1428943514507334e-05, "loss": 1.0407, "step": 4498 }, { "epoch": 0.4708529565672423, "grad_norm": 2.273840379567588, "learning_rate": 1.1425588497794319e-05, "loss": 1.0614, "step": 4499 }, { "epoch": 0.47095761381475665, "grad_norm": 2.1200667200778907, "learning_rate": 1.1422233317278295e-05, "loss": 0.9771, "step": 4500 }, { "epoch": 0.47106227106227105, "grad_norm": 2.083148734600215, "learning_rate": 1.1418877973344781e-05, "loss": 0.8671, "step": 4501 }, { "epoch": 0.47116692830978546, "grad_norm": 2.2953006328435674, "learning_rate": 1.1415522466379312e-05, "loss": 0.9418, "step": 4502 }, { "epoch": 0.47127158555729987, "grad_norm": 2.189812095653051, "learning_rate": 1.1412166796767442e-05, "loss": 0.8781, "step": 4503 }, { "epoch": 0.4713762428048142, "grad_norm": 2.2314434200482407, "learning_rate": 1.1408810964894743e-05, "loss": 0.9772, "step": 4504 }, { "epoch": 0.4714809000523286, "grad_norm": 2.0090730558505205, "learning_rate": 1.1405454971146808e-05, "loss": 0.9608, "step": 4505 }, { "epoch": 0.47158555729984303, "grad_norm": 2.2175393347464314, "learning_rate": 1.140209881590925e-05, "loss": 1.0648, "step": 4506 }, { "epoch": 0.4716902145473574, "grad_norm": 1.8968894676133286, "learning_rate": 1.1398742499567689e-05, "loss": 0.9704, "step": 4507 }, { "epoch": 0.4717948717948718, "grad_norm": 2.2753935373797916, "learning_rate": 1.1395386022507786e-05, "loss": 0.9457, "step": 4508 }, { "epoch": 0.4718995290423862, "grad_norm": 2.2158138128200533, "learning_rate": 1.13920293851152e-05, "loss": 0.939, "step": 4509 }, { "epoch": 0.4720041862899006, "grad_norm": 2.149361055179521, "learning_rate": 1.1388672587775615e-05, "loss": 0.8684, "step": 4510 }, { "epoch": 0.47210884353741495, "grad_norm": 2.242585827443349, "learning_rate": 1.1385315630874732e-05, "loss": 0.9222, "step": 4511 }, { "epoch": 0.47221350078492935, "grad_norm": 2.0858170359584878, "learning_rate": 1.1381958514798277e-05, "loss": 1.0521, "step": 4512 }, { "epoch": 0.47231815803244376, "grad_norm": 2.3578219833534635, "learning_rate": 1.1378601239931988e-05, "loss": 0.9778, "step": 4513 }, { "epoch": 0.47242281527995816, "grad_norm": 2.318116192208368, "learning_rate": 1.1375243806661617e-05, "loss": 0.9584, "step": 4514 }, { "epoch": 0.4725274725274725, "grad_norm": 1.8752032576547455, "learning_rate": 1.1371886215372952e-05, "loss": 1.0631, "step": 4515 }, { "epoch": 0.4726321297749869, "grad_norm": 2.4461026659114067, "learning_rate": 1.1368528466451774e-05, "loss": 1.0267, "step": 4516 }, { "epoch": 0.4727367870225013, "grad_norm": 2.0213212076244638, "learning_rate": 1.1365170560283903e-05, "loss": 0.9101, "step": 4517 }, { "epoch": 0.4728414442700157, "grad_norm": 1.8825133489006614, "learning_rate": 1.1361812497255167e-05, "loss": 1.0026, "step": 4518 }, { "epoch": 0.4729461015175301, "grad_norm": 2.2245743255363286, "learning_rate": 1.1358454277751415e-05, "loss": 0.9904, "step": 4519 }, { "epoch": 0.4730507587650445, "grad_norm": 1.9671246859720815, "learning_rate": 1.1355095902158507e-05, "loss": 0.8788, "step": 4520 }, { "epoch": 0.4731554160125589, "grad_norm": 2.045528524105347, "learning_rate": 1.1351737370862339e-05, "loss": 1.0552, "step": 4521 }, { "epoch": 0.47326007326007324, "grad_norm": 2.076578499573767, "learning_rate": 1.1348378684248806e-05, "loss": 0.9716, "step": 4522 }, { "epoch": 0.47336473050758765, "grad_norm": 2.1579134355425764, "learning_rate": 1.134501984270383e-05, "loss": 1.1023, "step": 4523 }, { "epoch": 0.47346938775510206, "grad_norm": 2.340911759330156, "learning_rate": 1.1341660846613343e-05, "loss": 0.9794, "step": 4524 }, { "epoch": 0.4735740450026164, "grad_norm": 1.9924334227820981, "learning_rate": 1.133830169636331e-05, "loss": 1.0848, "step": 4525 }, { "epoch": 0.4736787022501308, "grad_norm": 1.962966847625367, "learning_rate": 1.1334942392339693e-05, "loss": 1.0938, "step": 4526 }, { "epoch": 0.4737833594976452, "grad_norm": 2.178454110615139, "learning_rate": 1.1331582934928495e-05, "loss": 1.0561, "step": 4527 }, { "epoch": 0.4738880167451596, "grad_norm": 2.368913775696146, "learning_rate": 1.1328223324515714e-05, "loss": 0.8537, "step": 4528 }, { "epoch": 0.473992673992674, "grad_norm": 1.9280380818904281, "learning_rate": 1.1324863561487383e-05, "loss": 0.8553, "step": 4529 }, { "epoch": 0.4740973312401884, "grad_norm": 1.962816395052731, "learning_rate": 1.1321503646229543e-05, "loss": 0.8091, "step": 4530 }, { "epoch": 0.4742019884877028, "grad_norm": 2.068969926585598, "learning_rate": 1.1318143579128254e-05, "loss": 0.9076, "step": 4531 }, { "epoch": 0.47430664573521714, "grad_norm": 1.9731106713105366, "learning_rate": 1.1314783360569597e-05, "loss": 0.9354, "step": 4532 }, { "epoch": 0.47441130298273154, "grad_norm": 1.9249243484641931, "learning_rate": 1.1311422990939664e-05, "loss": 0.8459, "step": 4533 }, { "epoch": 0.47451596023024595, "grad_norm": 2.014143494128217, "learning_rate": 1.1308062470624572e-05, "loss": 1.0709, "step": 4534 }, { "epoch": 0.47462061747776035, "grad_norm": 1.790897621437224, "learning_rate": 1.1304701800010453e-05, "loss": 0.9237, "step": 4535 }, { "epoch": 0.4747252747252747, "grad_norm": 1.8946726238557325, "learning_rate": 1.1301340979483448e-05, "loss": 0.9411, "step": 4536 }, { "epoch": 0.4748299319727891, "grad_norm": 1.8356376485135328, "learning_rate": 1.129798000942973e-05, "loss": 0.958, "step": 4537 }, { "epoch": 0.4749345892203035, "grad_norm": 2.0838881862402765, "learning_rate": 1.1294618890235474e-05, "loss": 1.0522, "step": 4538 }, { "epoch": 0.4750392464678179, "grad_norm": 2.1995593212183264, "learning_rate": 1.1291257622286887e-05, "loss": 0.9551, "step": 4539 }, { "epoch": 0.47514390371533227, "grad_norm": 1.8441762381994096, "learning_rate": 1.1287896205970178e-05, "loss": 0.8266, "step": 4540 }, { "epoch": 0.4752485609628467, "grad_norm": 4.422227979112271, "learning_rate": 1.1284534641671588e-05, "loss": 1.1611, "step": 4541 }, { "epoch": 0.4753532182103611, "grad_norm": 2.3144636336989945, "learning_rate": 1.128117292977736e-05, "loss": 0.9875, "step": 4542 }, { "epoch": 0.47545787545787543, "grad_norm": 2.10412297009563, "learning_rate": 1.1277811070673765e-05, "loss": 0.9113, "step": 4543 }, { "epoch": 0.47556253270538984, "grad_norm": 2.215518349186847, "learning_rate": 1.127444906474709e-05, "loss": 0.8954, "step": 4544 }, { "epoch": 0.47566718995290425, "grad_norm": 2.30718848055899, "learning_rate": 1.1271086912383633e-05, "loss": 0.9245, "step": 4545 }, { "epoch": 0.47577184720041865, "grad_norm": 2.028770055513572, "learning_rate": 1.1267724613969712e-05, "loss": 0.9973, "step": 4546 }, { "epoch": 0.475876504447933, "grad_norm": 2.2846525131478437, "learning_rate": 1.1264362169891664e-05, "loss": 1.0271, "step": 4547 }, { "epoch": 0.4759811616954474, "grad_norm": 2.0978658851996257, "learning_rate": 1.1260999580535842e-05, "loss": 0.9312, "step": 4548 }, { "epoch": 0.4760858189429618, "grad_norm": 3.541394648972959, "learning_rate": 1.125763684628861e-05, "loss": 1.0477, "step": 4549 }, { "epoch": 0.47619047619047616, "grad_norm": 2.317340528280266, "learning_rate": 1.1254273967536352e-05, "loss": 0.9305, "step": 4550 }, { "epoch": 0.47629513343799057, "grad_norm": 2.5592216831346093, "learning_rate": 1.1250910944665475e-05, "loss": 0.9933, "step": 4551 }, { "epoch": 0.476399790685505, "grad_norm": 2.3033433240600942, "learning_rate": 1.1247547778062394e-05, "loss": 1.0646, "step": 4552 }, { "epoch": 0.4765044479330194, "grad_norm": 1.782189325078589, "learning_rate": 1.1244184468113544e-05, "loss": 0.8961, "step": 4553 }, { "epoch": 0.47660910518053373, "grad_norm": 2.3437218275063914, "learning_rate": 1.1240821015205378e-05, "loss": 0.9471, "step": 4554 }, { "epoch": 0.47671376242804814, "grad_norm": 2.0231024397759563, "learning_rate": 1.1237457419724357e-05, "loss": 0.9574, "step": 4555 }, { "epoch": 0.47681841967556254, "grad_norm": 1.9992919395657165, "learning_rate": 1.1234093682056976e-05, "loss": 0.8381, "step": 4556 }, { "epoch": 0.47692307692307695, "grad_norm": 2.2448074144884433, "learning_rate": 1.1230729802589727e-05, "loss": 1.005, "step": 4557 }, { "epoch": 0.4770277341705913, "grad_norm": 2.094998090831663, "learning_rate": 1.1227365781709129e-05, "loss": 0.8991, "step": 4558 }, { "epoch": 0.4771323914181057, "grad_norm": 2.1848516156132227, "learning_rate": 1.1224001619801711e-05, "loss": 0.9532, "step": 4559 }, { "epoch": 0.4772370486656201, "grad_norm": 1.9856619963316688, "learning_rate": 1.122063731725403e-05, "loss": 0.9081, "step": 4560 }, { "epoch": 0.47734170591313446, "grad_norm": 2.5402648997557367, "learning_rate": 1.1217272874452647e-05, "loss": 1.0494, "step": 4561 }, { "epoch": 0.47744636316064887, "grad_norm": 2.129703098208944, "learning_rate": 1.121390829178414e-05, "loss": 0.8839, "step": 4562 }, { "epoch": 0.4775510204081633, "grad_norm": 2.207630890065239, "learning_rate": 1.1210543569635113e-05, "loss": 1.0222, "step": 4563 }, { "epoch": 0.4776556776556777, "grad_norm": 2.3323434569261425, "learning_rate": 1.1207178708392173e-05, "loss": 0.8753, "step": 4564 }, { "epoch": 0.47776033490319203, "grad_norm": 2.0722087950689616, "learning_rate": 1.1203813708441953e-05, "loss": 1.0534, "step": 4565 }, { "epoch": 0.47786499215070644, "grad_norm": 1.9543831145256765, "learning_rate": 1.1200448570171099e-05, "loss": 1.0708, "step": 4566 }, { "epoch": 0.47796964939822084, "grad_norm": 2.2669487242998487, "learning_rate": 1.1197083293966267e-05, "loss": 0.901, "step": 4567 }, { "epoch": 0.4780743066457352, "grad_norm": 1.950939820948088, "learning_rate": 1.1193717880214139e-05, "loss": 0.8952, "step": 4568 }, { "epoch": 0.4781789638932496, "grad_norm": 2.1867834960215737, "learning_rate": 1.1190352329301408e-05, "loss": 0.9256, "step": 4569 }, { "epoch": 0.478283621140764, "grad_norm": 1.9100913001648718, "learning_rate": 1.118698664161478e-05, "loss": 0.9184, "step": 4570 }, { "epoch": 0.4783882783882784, "grad_norm": 2.1641906169210765, "learning_rate": 1.1183620817540985e-05, "loss": 0.7917, "step": 4571 }, { "epoch": 0.47849293563579276, "grad_norm": 1.964578596649523, "learning_rate": 1.1180254857466753e-05, "loss": 0.9407, "step": 4572 }, { "epoch": 0.47859759288330717, "grad_norm": 1.9010529569383956, "learning_rate": 1.1176888761778848e-05, "loss": 0.9953, "step": 4573 }, { "epoch": 0.47870225013082157, "grad_norm": 1.933991820551362, "learning_rate": 1.1173522530864036e-05, "loss": 1.007, "step": 4574 }, { "epoch": 0.478806907378336, "grad_norm": 1.972812990270169, "learning_rate": 1.117015616510911e-05, "loss": 0.9306, "step": 4575 }, { "epoch": 0.47891156462585033, "grad_norm": 2.0473114243949815, "learning_rate": 1.1166789664900866e-05, "loss": 1.005, "step": 4576 }, { "epoch": 0.47901622187336473, "grad_norm": 2.1372399776159168, "learning_rate": 1.1163423030626123e-05, "loss": 0.9524, "step": 4577 }, { "epoch": 0.47912087912087914, "grad_norm": 2.1890057903200826, "learning_rate": 1.1160056262671718e-05, "loss": 0.8805, "step": 4578 }, { "epoch": 0.4792255363683935, "grad_norm": 2.2821866808196196, "learning_rate": 1.1156689361424496e-05, "loss": 0.931, "step": 4579 }, { "epoch": 0.4793301936159079, "grad_norm": 1.92030262663086, "learning_rate": 1.1153322327271324e-05, "loss": 1.0229, "step": 4580 }, { "epoch": 0.4794348508634223, "grad_norm": 2.264619342723187, "learning_rate": 1.1149955160599073e-05, "loss": 1.0943, "step": 4581 }, { "epoch": 0.4795395081109367, "grad_norm": 2.4491546035308986, "learning_rate": 1.1146587861794645e-05, "loss": 0.8622, "step": 4582 }, { "epoch": 0.47964416535845106, "grad_norm": 2.700636952916195, "learning_rate": 1.114322043124495e-05, "loss": 0.9035, "step": 4583 }, { "epoch": 0.47974882260596546, "grad_norm": 5.883040265797574, "learning_rate": 1.1139852869336907e-05, "loss": 1.0753, "step": 4584 }, { "epoch": 0.47985347985347987, "grad_norm": 2.2890986655042234, "learning_rate": 1.113648517645746e-05, "loss": 1.0844, "step": 4585 }, { "epoch": 0.4799581371009942, "grad_norm": 1.8524051771038759, "learning_rate": 1.1133117352993559e-05, "loss": 0.9628, "step": 4586 }, { "epoch": 0.4800627943485086, "grad_norm": 2.2350258071349263, "learning_rate": 1.1129749399332182e-05, "loss": 0.9315, "step": 4587 }, { "epoch": 0.48016745159602303, "grad_norm": 3.5795609315488726, "learning_rate": 1.1126381315860306e-05, "loss": 1.1486, "step": 4588 }, { "epoch": 0.48027210884353744, "grad_norm": 2.0026623409331696, "learning_rate": 1.1123013102964934e-05, "loss": 1.0134, "step": 4589 }, { "epoch": 0.4803767660910518, "grad_norm": 2.44461585559246, "learning_rate": 1.1119644761033079e-05, "loss": 1.0314, "step": 4590 }, { "epoch": 0.4804814233385662, "grad_norm": 1.9386712926730827, "learning_rate": 1.1116276290451773e-05, "loss": 1.0203, "step": 4591 }, { "epoch": 0.4805860805860806, "grad_norm": 1.9273580062176585, "learning_rate": 1.1112907691608059e-05, "loss": 0.9667, "step": 4592 }, { "epoch": 0.48069073783359495, "grad_norm": 2.2393335832841483, "learning_rate": 1.1109538964888993e-05, "loss": 0.9332, "step": 4593 }, { "epoch": 0.48079539508110936, "grad_norm": 2.2101404361932397, "learning_rate": 1.110617011068165e-05, "loss": 0.8757, "step": 4594 }, { "epoch": 0.48090005232862376, "grad_norm": 2.07916151536985, "learning_rate": 1.110280112937312e-05, "loss": 0.8394, "step": 4595 }, { "epoch": 0.48100470957613817, "grad_norm": 2.4008188696017077, "learning_rate": 1.1099432021350504e-05, "loss": 0.9937, "step": 4596 }, { "epoch": 0.4811093668236525, "grad_norm": 1.98281407430736, "learning_rate": 1.1096062787000923e-05, "loss": 0.9293, "step": 4597 }, { "epoch": 0.4812140240711669, "grad_norm": 1.6750650744945574, "learning_rate": 1.1092693426711501e-05, "loss": 0.8145, "step": 4598 }, { "epoch": 0.48131868131868133, "grad_norm": 2.1258343895313137, "learning_rate": 1.1089323940869392e-05, "loss": 1.0777, "step": 4599 }, { "epoch": 0.48142333856619574, "grad_norm": 2.3850630786161227, "learning_rate": 1.1085954329861754e-05, "loss": 1.1615, "step": 4600 }, { "epoch": 0.4815279958137101, "grad_norm": 2.041493035918398, "learning_rate": 1.1082584594075762e-05, "loss": 0.9125, "step": 4601 }, { "epoch": 0.4816326530612245, "grad_norm": 2.102783748957608, "learning_rate": 1.1079214733898606e-05, "loss": 0.9909, "step": 4602 }, { "epoch": 0.4817373103087389, "grad_norm": 1.6810209540981638, "learning_rate": 1.1075844749717486e-05, "loss": 0.9937, "step": 4603 }, { "epoch": 0.48184196755625325, "grad_norm": 1.9499292480253156, "learning_rate": 1.1072474641919626e-05, "loss": 1.0339, "step": 4604 }, { "epoch": 0.48194662480376765, "grad_norm": 1.9692957698439062, "learning_rate": 1.1069104410892255e-05, "loss": 0.9911, "step": 4605 }, { "epoch": 0.48205128205128206, "grad_norm": 1.9832587584087509, "learning_rate": 1.1065734057022622e-05, "loss": 0.9872, "step": 4606 }, { "epoch": 0.48215593929879647, "grad_norm": 1.9753953053703226, "learning_rate": 1.1062363580697982e-05, "loss": 0.8596, "step": 4607 }, { "epoch": 0.4822605965463108, "grad_norm": 2.2053778552706387, "learning_rate": 1.1058992982305613e-05, "loss": 0.9707, "step": 4608 }, { "epoch": 0.4823652537938252, "grad_norm": 4.451868601106817, "learning_rate": 1.1055622262232808e-05, "loss": 1.0707, "step": 4609 }, { "epoch": 0.4824699110413396, "grad_norm": 2.427513648919258, "learning_rate": 1.105225142086686e-05, "loss": 1.093, "step": 4610 }, { "epoch": 0.482574568288854, "grad_norm": 1.8975018847651661, "learning_rate": 1.1048880458595093e-05, "loss": 0.9038, "step": 4611 }, { "epoch": 0.4826792255363684, "grad_norm": 2.101137234743737, "learning_rate": 1.1045509375804835e-05, "loss": 0.9936, "step": 4612 }, { "epoch": 0.4827838827838828, "grad_norm": 2.116146425294967, "learning_rate": 1.104213817288343e-05, "loss": 0.9991, "step": 4613 }, { "epoch": 0.4828885400313972, "grad_norm": 7.379023528902324, "learning_rate": 1.1038766850218237e-05, "loss": 1.0964, "step": 4614 }, { "epoch": 0.48299319727891155, "grad_norm": 2.303483496597832, "learning_rate": 1.1035395408196625e-05, "loss": 1.0693, "step": 4615 }, { "epoch": 0.48309785452642595, "grad_norm": 1.8368414922882212, "learning_rate": 1.103202384720598e-05, "loss": 0.9938, "step": 4616 }, { "epoch": 0.48320251177394036, "grad_norm": 1.939124879567201, "learning_rate": 1.1028652167633706e-05, "loss": 0.9361, "step": 4617 }, { "epoch": 0.48330716902145476, "grad_norm": 1.931462825025141, "learning_rate": 1.1025280369867216e-05, "loss": 0.996, "step": 4618 }, { "epoch": 0.4834118262689691, "grad_norm": 1.8660312624366957, "learning_rate": 1.102190845429393e-05, "loss": 0.8925, "step": 4619 }, { "epoch": 0.4835164835164835, "grad_norm": 1.9759725551334741, "learning_rate": 1.1018536421301287e-05, "loss": 0.8974, "step": 4620 }, { "epoch": 0.4836211407639979, "grad_norm": 2.042722969414731, "learning_rate": 1.1015164271276747e-05, "loss": 1.0188, "step": 4621 }, { "epoch": 0.4837257980115123, "grad_norm": 2.038746548550091, "learning_rate": 1.1011792004607778e-05, "loss": 0.875, "step": 4622 }, { "epoch": 0.4838304552590267, "grad_norm": 2.104636214257139, "learning_rate": 1.1008419621681854e-05, "loss": 0.9584, "step": 4623 }, { "epoch": 0.4839351125065411, "grad_norm": 1.8747597757848282, "learning_rate": 1.100504712288647e-05, "loss": 0.9251, "step": 4624 }, { "epoch": 0.4840397697540555, "grad_norm": 2.2211808317732418, "learning_rate": 1.1001674508609135e-05, "loss": 1.0446, "step": 4625 }, { "epoch": 0.48414442700156984, "grad_norm": 2.3946698315504005, "learning_rate": 1.0998301779237367e-05, "loss": 1.0903, "step": 4626 }, { "epoch": 0.48424908424908425, "grad_norm": 2.2432670399188472, "learning_rate": 1.0994928935158703e-05, "loss": 0.9233, "step": 4627 }, { "epoch": 0.48435374149659866, "grad_norm": 1.8197431587694193, "learning_rate": 1.0991555976760688e-05, "loss": 0.9245, "step": 4628 }, { "epoch": 0.484458398744113, "grad_norm": 2.025709348707062, "learning_rate": 1.098818290443088e-05, "loss": 0.9994, "step": 4629 }, { "epoch": 0.4845630559916274, "grad_norm": 2.065981035535976, "learning_rate": 1.098480971855685e-05, "loss": 1.0224, "step": 4630 }, { "epoch": 0.4846677132391418, "grad_norm": 5.77757048663166, "learning_rate": 1.0981436419526193e-05, "loss": 1.1061, "step": 4631 }, { "epoch": 0.4847723704866562, "grad_norm": 1.870408334687293, "learning_rate": 1.0978063007726495e-05, "loss": 0.7735, "step": 4632 }, { "epoch": 0.4848770277341706, "grad_norm": 3.3267436752556065, "learning_rate": 1.0974689483545378e-05, "loss": 1.0417, "step": 4633 }, { "epoch": 0.484981684981685, "grad_norm": 1.9245609462036217, "learning_rate": 1.0971315847370463e-05, "loss": 0.8995, "step": 4634 }, { "epoch": 0.4850863422291994, "grad_norm": 1.9011101446154672, "learning_rate": 1.0967942099589391e-05, "loss": 0.8339, "step": 4635 }, { "epoch": 0.48519099947671374, "grad_norm": 1.9115078692573162, "learning_rate": 1.0964568240589808e-05, "loss": 0.7742, "step": 4636 }, { "epoch": 0.48529565672422814, "grad_norm": 1.9022548969400057, "learning_rate": 1.0961194270759375e-05, "loss": 0.9253, "step": 4637 }, { "epoch": 0.48540031397174255, "grad_norm": 2.0318224541882657, "learning_rate": 1.0957820190485775e-05, "loss": 0.9972, "step": 4638 }, { "epoch": 0.48550497121925695, "grad_norm": 2.0727823257764544, "learning_rate": 1.0954446000156693e-05, "loss": 1.058, "step": 4639 }, { "epoch": 0.4856096284667713, "grad_norm": 1.9956018186674882, "learning_rate": 1.0951071700159833e-05, "loss": 0.8057, "step": 4640 }, { "epoch": 0.4857142857142857, "grad_norm": 2.20621162649101, "learning_rate": 1.0947697290882903e-05, "loss": 0.834, "step": 4641 }, { "epoch": 0.4858189429618001, "grad_norm": 2.051794455421428, "learning_rate": 1.0944322772713634e-05, "loss": 0.9757, "step": 4642 }, { "epoch": 0.4859236002093145, "grad_norm": 2.1409550137945974, "learning_rate": 1.0940948146039768e-05, "loss": 0.974, "step": 4643 }, { "epoch": 0.48602825745682887, "grad_norm": 1.948251250014736, "learning_rate": 1.0937573411249047e-05, "loss": 0.9849, "step": 4644 }, { "epoch": 0.4861329147043433, "grad_norm": 1.9818999450949544, "learning_rate": 1.0934198568729245e-05, "loss": 0.9915, "step": 4645 }, { "epoch": 0.4862375719518577, "grad_norm": 2.056814682650398, "learning_rate": 1.0930823618868128e-05, "loss": 0.9572, "step": 4646 }, { "epoch": 0.48634222919937203, "grad_norm": 1.93186679850152, "learning_rate": 1.0927448562053494e-05, "loss": 0.9447, "step": 4647 }, { "epoch": 0.48644688644688644, "grad_norm": 1.9807004018502983, "learning_rate": 1.092407339867314e-05, "loss": 0.9509, "step": 4648 }, { "epoch": 0.48655154369440085, "grad_norm": 2.2561421939508324, "learning_rate": 1.0920698129114877e-05, "loss": 0.9115, "step": 4649 }, { "epoch": 0.48665620094191525, "grad_norm": 1.7925812958550895, "learning_rate": 1.0917322753766536e-05, "loss": 0.8921, "step": 4650 }, { "epoch": 0.4867608581894296, "grad_norm": 1.980186761047103, "learning_rate": 1.0913947273015948e-05, "loss": 0.9832, "step": 4651 }, { "epoch": 0.486865515436944, "grad_norm": 2.3562237026686343, "learning_rate": 1.0910571687250965e-05, "loss": 0.9792, "step": 4652 }, { "epoch": 0.4869701726844584, "grad_norm": 5.0353868336406915, "learning_rate": 1.0907195996859453e-05, "loss": 0.9708, "step": 4653 }, { "epoch": 0.48707482993197276, "grad_norm": 1.9131908425142234, "learning_rate": 1.090382020222928e-05, "loss": 0.9326, "step": 4654 }, { "epoch": 0.48717948717948717, "grad_norm": 1.790537959582867, "learning_rate": 1.0900444303748333e-05, "loss": 0.9168, "step": 4655 }, { "epoch": 0.4872841444270016, "grad_norm": 2.140607384118724, "learning_rate": 1.089706830180451e-05, "loss": 1.1137, "step": 4656 }, { "epoch": 0.487388801674516, "grad_norm": 1.891634591818433, "learning_rate": 1.0893692196785722e-05, "loss": 0.8324, "step": 4657 }, { "epoch": 0.48749345892203033, "grad_norm": 2.09720854094086, "learning_rate": 1.0890315989079887e-05, "loss": 0.9571, "step": 4658 }, { "epoch": 0.48759811616954474, "grad_norm": 2.332233200389157, "learning_rate": 1.0886939679074939e-05, "loss": 1.0647, "step": 4659 }, { "epoch": 0.48770277341705914, "grad_norm": 2.3675130191516014, "learning_rate": 1.0883563267158827e-05, "loss": 0.8783, "step": 4660 }, { "epoch": 0.48780743066457355, "grad_norm": 2.067847253311974, "learning_rate": 1.0880186753719505e-05, "loss": 0.9496, "step": 4661 }, { "epoch": 0.4879120879120879, "grad_norm": 2.059957357047224, "learning_rate": 1.087681013914494e-05, "loss": 1.0097, "step": 4662 }, { "epoch": 0.4880167451596023, "grad_norm": 1.7780085625194908, "learning_rate": 1.0873433423823108e-05, "loss": 0.9731, "step": 4663 }, { "epoch": 0.4881214024071167, "grad_norm": 2.2693994526383183, "learning_rate": 1.087005660814201e-05, "loss": 0.9633, "step": 4664 }, { "epoch": 0.48822605965463106, "grad_norm": 2.2058172273341343, "learning_rate": 1.0866679692489643e-05, "loss": 0.8353, "step": 4665 }, { "epoch": 0.48833071690214547, "grad_norm": 2.1061269956596846, "learning_rate": 1.0863302677254021e-05, "loss": 0.9219, "step": 4666 }, { "epoch": 0.4884353741496599, "grad_norm": 2.000729147710908, "learning_rate": 1.0859925562823172e-05, "loss": 0.9055, "step": 4667 }, { "epoch": 0.4885400313971743, "grad_norm": 1.9868769740264263, "learning_rate": 1.0856548349585132e-05, "loss": 0.9625, "step": 4668 }, { "epoch": 0.48864468864468863, "grad_norm": 1.9715388535082388, "learning_rate": 1.0853171037927952e-05, "loss": 0.9459, "step": 4669 }, { "epoch": 0.48874934589220304, "grad_norm": 2.1247424417156147, "learning_rate": 1.0849793628239687e-05, "loss": 1.0457, "step": 4670 }, { "epoch": 0.48885400313971744, "grad_norm": 2.0181718772211426, "learning_rate": 1.0846416120908416e-05, "loss": 0.8496, "step": 4671 }, { "epoch": 0.4889586603872318, "grad_norm": 1.858935680154186, "learning_rate": 1.0843038516322213e-05, "loss": 1.0369, "step": 4672 }, { "epoch": 0.4890633176347462, "grad_norm": 1.9353831132266497, "learning_rate": 1.0839660814869174e-05, "loss": 1.0288, "step": 4673 }, { "epoch": 0.4891679748822606, "grad_norm": 1.92877995038633, "learning_rate": 1.083628301693741e-05, "loss": 0.9495, "step": 4674 }, { "epoch": 0.489272632129775, "grad_norm": 1.8743301066850448, "learning_rate": 1.083290512291503e-05, "loss": 0.9853, "step": 4675 }, { "epoch": 0.48937728937728936, "grad_norm": 2.166368633461282, "learning_rate": 1.0829527133190163e-05, "loss": 0.9273, "step": 4676 }, { "epoch": 0.48948194662480377, "grad_norm": 2.626516377134673, "learning_rate": 1.0826149048150947e-05, "loss": 0.9658, "step": 4677 }, { "epoch": 0.48958660387231817, "grad_norm": 2.007119331685366, "learning_rate": 1.082277086818553e-05, "loss": 0.9525, "step": 4678 }, { "epoch": 0.4896912611198325, "grad_norm": 2.3214333736204043, "learning_rate": 1.0819392593682074e-05, "loss": 0.9228, "step": 4679 }, { "epoch": 0.4897959183673469, "grad_norm": 2.1792774915549464, "learning_rate": 1.0816014225028745e-05, "loss": 1.001, "step": 4680 }, { "epoch": 0.48990057561486133, "grad_norm": 1.871180713970224, "learning_rate": 1.0812635762613728e-05, "loss": 0.9649, "step": 4681 }, { "epoch": 0.49000523286237574, "grad_norm": 2.415124070494655, "learning_rate": 1.0809257206825214e-05, "loss": 0.8755, "step": 4682 }, { "epoch": 0.4901098901098901, "grad_norm": 1.972768747408386, "learning_rate": 1.080587855805141e-05, "loss": 0.9227, "step": 4683 }, { "epoch": 0.4902145473574045, "grad_norm": 2.0814935675576196, "learning_rate": 1.0802499816680525e-05, "loss": 0.9524, "step": 4684 }, { "epoch": 0.4903192046049189, "grad_norm": 2.1273011146323424, "learning_rate": 1.079912098310078e-05, "loss": 0.9118, "step": 4685 }, { "epoch": 0.4904238618524333, "grad_norm": 2.3374604970516932, "learning_rate": 1.0795742057700419e-05, "loss": 1.0986, "step": 4686 }, { "epoch": 0.49052851909994766, "grad_norm": 2.362107494802518, "learning_rate": 1.0792363040867677e-05, "loss": 0.941, "step": 4687 }, { "epoch": 0.49063317634746206, "grad_norm": 1.7863431015815339, "learning_rate": 1.0788983932990821e-05, "loss": 0.8811, "step": 4688 }, { "epoch": 0.49073783359497647, "grad_norm": 1.9476171669722804, "learning_rate": 1.0785604734458108e-05, "loss": 0.9976, "step": 4689 }, { "epoch": 0.4908424908424908, "grad_norm": 1.9535812880238408, "learning_rate": 1.0782225445657818e-05, "loss": 0.9657, "step": 4690 }, { "epoch": 0.4909471480900052, "grad_norm": 2.09502949679378, "learning_rate": 1.0778846066978244e-05, "loss": 1.0246, "step": 4691 }, { "epoch": 0.49105180533751963, "grad_norm": 1.9911290871631744, "learning_rate": 1.0775466598807673e-05, "loss": 0.9342, "step": 4692 }, { "epoch": 0.49115646258503404, "grad_norm": 2.0405251799273074, "learning_rate": 1.077208704153442e-05, "loss": 0.8705, "step": 4693 }, { "epoch": 0.4912611198325484, "grad_norm": 2.230283723602765, "learning_rate": 1.07687073955468e-05, "loss": 0.9413, "step": 4694 }, { "epoch": 0.4913657770800628, "grad_norm": 2.0963860345437997, "learning_rate": 1.076532766123314e-05, "loss": 1.0367, "step": 4695 }, { "epoch": 0.4914704343275772, "grad_norm": 2.0670944200322845, "learning_rate": 1.0761947838981784e-05, "loss": 0.9836, "step": 4696 }, { "epoch": 0.49157509157509155, "grad_norm": 1.9165456806981649, "learning_rate": 1.0758567929181074e-05, "loss": 0.959, "step": 4697 }, { "epoch": 0.49167974882260596, "grad_norm": 2.228963424636731, "learning_rate": 1.075518793221937e-05, "loss": 0.9847, "step": 4698 }, { "epoch": 0.49178440607012036, "grad_norm": 2.3859878281796747, "learning_rate": 1.0751807848485043e-05, "loss": 0.9669, "step": 4699 }, { "epoch": 0.49188906331763477, "grad_norm": 1.8576336024035527, "learning_rate": 1.0748427678366474e-05, "loss": 0.9819, "step": 4700 }, { "epoch": 0.4919937205651491, "grad_norm": 1.9665979871967703, "learning_rate": 1.0745047422252041e-05, "loss": 0.9886, "step": 4701 }, { "epoch": 0.4920983778126635, "grad_norm": 1.9561562986500205, "learning_rate": 1.0741667080530152e-05, "loss": 0.9116, "step": 4702 }, { "epoch": 0.49220303506017793, "grad_norm": 1.84642427868714, "learning_rate": 1.0738286653589215e-05, "loss": 0.9101, "step": 4703 }, { "epoch": 0.49230769230769234, "grad_norm": 2.2932509082000823, "learning_rate": 1.0734906141817638e-05, "loss": 1.0601, "step": 4704 }, { "epoch": 0.4924123495552067, "grad_norm": 2.1010715133795403, "learning_rate": 1.0731525545603863e-05, "loss": 0.9556, "step": 4705 }, { "epoch": 0.4925170068027211, "grad_norm": 2.0849794034429117, "learning_rate": 1.0728144865336315e-05, "loss": 0.8108, "step": 4706 }, { "epoch": 0.4926216640502355, "grad_norm": 6.181013708885361, "learning_rate": 1.0724764101403445e-05, "loss": 0.9935, "step": 4707 }, { "epoch": 0.49272632129774985, "grad_norm": 2.045937670470183, "learning_rate": 1.0721383254193714e-05, "loss": 0.9471, "step": 4708 }, { "epoch": 0.49283097854526425, "grad_norm": 2.7772247895749613, "learning_rate": 1.0718002324095582e-05, "loss": 0.9565, "step": 4709 }, { "epoch": 0.49293563579277866, "grad_norm": 1.890604125310268, "learning_rate": 1.071462131149753e-05, "loss": 0.9257, "step": 4710 }, { "epoch": 0.49304029304029307, "grad_norm": 2.489027702426938, "learning_rate": 1.0711240216788036e-05, "loss": 0.9638, "step": 4711 }, { "epoch": 0.4931449502878074, "grad_norm": 2.455683040276122, "learning_rate": 1.0707859040355603e-05, "loss": 1.0213, "step": 4712 }, { "epoch": 0.4932496075353218, "grad_norm": 2.143398110677024, "learning_rate": 1.0704477782588733e-05, "loss": 0.9781, "step": 4713 }, { "epoch": 0.4933542647828362, "grad_norm": 1.9663410678077706, "learning_rate": 1.0701096443875936e-05, "loss": 0.9224, "step": 4714 }, { "epoch": 0.4934589220303506, "grad_norm": 2.1480345691790705, "learning_rate": 1.0697715024605735e-05, "loss": 0.9175, "step": 4715 }, { "epoch": 0.493563579277865, "grad_norm": 9.477655375017251, "learning_rate": 1.0694333525166662e-05, "loss": 0.978, "step": 4716 }, { "epoch": 0.4936682365253794, "grad_norm": 1.80863597665584, "learning_rate": 1.0690951945947263e-05, "loss": 0.8808, "step": 4717 }, { "epoch": 0.4937728937728938, "grad_norm": 1.886985809641338, "learning_rate": 1.0687570287336082e-05, "loss": 0.8756, "step": 4718 }, { "epoch": 0.49387755102040815, "grad_norm": 2.0556937103470143, "learning_rate": 1.0684188549721683e-05, "loss": 0.9244, "step": 4719 }, { "epoch": 0.49398220826792255, "grad_norm": 2.0110374519014154, "learning_rate": 1.0680806733492632e-05, "loss": 1.0597, "step": 4720 }, { "epoch": 0.49408686551543696, "grad_norm": 1.7973197164685586, "learning_rate": 1.0677424839037506e-05, "loss": 0.9803, "step": 4721 }, { "epoch": 0.4941915227629513, "grad_norm": 2.186835840861067, "learning_rate": 1.0674042866744896e-05, "loss": 0.9843, "step": 4722 }, { "epoch": 0.4942961800104657, "grad_norm": 1.9713736175942005, "learning_rate": 1.0670660817003391e-05, "loss": 1.0191, "step": 4723 }, { "epoch": 0.4944008372579801, "grad_norm": 2.1417694138266845, "learning_rate": 1.0667278690201597e-05, "loss": 0.9995, "step": 4724 }, { "epoch": 0.4945054945054945, "grad_norm": 2.8859513823150724, "learning_rate": 1.0663896486728134e-05, "loss": 1.1489, "step": 4725 }, { "epoch": 0.4946101517530089, "grad_norm": 1.9628740590998055, "learning_rate": 1.0660514206971615e-05, "loss": 0.9062, "step": 4726 }, { "epoch": 0.4947148090005233, "grad_norm": 1.8512096693230278, "learning_rate": 1.0657131851320677e-05, "loss": 0.8803, "step": 4727 }, { "epoch": 0.4948194662480377, "grad_norm": 2.433646998663187, "learning_rate": 1.0653749420163953e-05, "loss": 0.9568, "step": 4728 }, { "epoch": 0.4949241234955521, "grad_norm": 2.0345077036147656, "learning_rate": 1.06503669138901e-05, "loss": 0.7353, "step": 4729 }, { "epoch": 0.49502878074306644, "grad_norm": 1.9424603161161924, "learning_rate": 1.0646984332887766e-05, "loss": 0.968, "step": 4730 }, { "epoch": 0.49513343799058085, "grad_norm": 2.3399922101360553, "learning_rate": 1.0643601677545626e-05, "loss": 0.9581, "step": 4731 }, { "epoch": 0.49523809523809526, "grad_norm": 1.8649126698331577, "learning_rate": 1.0640218948252345e-05, "loss": 0.7876, "step": 4732 }, { "epoch": 0.4953427524856096, "grad_norm": 2.331216885557431, "learning_rate": 1.0636836145396607e-05, "loss": 0.9769, "step": 4733 }, { "epoch": 0.495447409733124, "grad_norm": 2.163582657761894, "learning_rate": 1.063345326936711e-05, "loss": 0.9926, "step": 4734 }, { "epoch": 0.4955520669806384, "grad_norm": 2.081349677166352, "learning_rate": 1.0630070320552544e-05, "loss": 1.0198, "step": 4735 }, { "epoch": 0.4956567242281528, "grad_norm": 1.7921126006767771, "learning_rate": 1.0626687299341622e-05, "loss": 0.8941, "step": 4736 }, { "epoch": 0.4957613814756672, "grad_norm": 1.7566094714296194, "learning_rate": 1.062330420612306e-05, "loss": 0.9175, "step": 4737 }, { "epoch": 0.4958660387231816, "grad_norm": 2.3106445373652496, "learning_rate": 1.0619921041285579e-05, "loss": 0.9188, "step": 4738 }, { "epoch": 0.495970695970696, "grad_norm": 2.8264044010883413, "learning_rate": 1.0616537805217918e-05, "loss": 0.8804, "step": 4739 }, { "epoch": 0.49607535321821034, "grad_norm": 1.9520944530357538, "learning_rate": 1.0613154498308808e-05, "loss": 0.957, "step": 4740 }, { "epoch": 0.49618001046572474, "grad_norm": 2.3861280899553905, "learning_rate": 1.0609771120947005e-05, "loss": 0.9467, "step": 4741 }, { "epoch": 0.49628466771323915, "grad_norm": 2.0740843031163045, "learning_rate": 1.0606387673521265e-05, "loss": 0.9223, "step": 4742 }, { "epoch": 0.49638932496075355, "grad_norm": 9.399003361754902, "learning_rate": 1.0603004156420354e-05, "loss": 1.2298, "step": 4743 }, { "epoch": 0.4964939822082679, "grad_norm": 2.0945292190249747, "learning_rate": 1.059962057003304e-05, "loss": 1.0156, "step": 4744 }, { "epoch": 0.4965986394557823, "grad_norm": 2.1545476746682453, "learning_rate": 1.0596236914748107e-05, "loss": 1.0291, "step": 4745 }, { "epoch": 0.4967032967032967, "grad_norm": 1.8857323776138402, "learning_rate": 1.0592853190954345e-05, "loss": 0.9664, "step": 4746 }, { "epoch": 0.4968079539508111, "grad_norm": 2.095800214096105, "learning_rate": 1.0589469399040546e-05, "loss": 0.9522, "step": 4747 }, { "epoch": 0.49691261119832547, "grad_norm": 2.0247693141512055, "learning_rate": 1.0586085539395523e-05, "loss": 1.0192, "step": 4748 }, { "epoch": 0.4970172684458399, "grad_norm": 1.8584866253076624, "learning_rate": 1.0582701612408083e-05, "loss": 0.9195, "step": 4749 }, { "epoch": 0.4971219256933543, "grad_norm": 1.931335358563096, "learning_rate": 1.0579317618467043e-05, "loss": 0.9267, "step": 4750 }, { "epoch": 0.49722658294086863, "grad_norm": 2.097579996549894, "learning_rate": 1.0575933557961238e-05, "loss": 0.9581, "step": 4751 }, { "epoch": 0.49733124018838304, "grad_norm": 2.033270771923552, "learning_rate": 1.0572549431279495e-05, "loss": 0.9175, "step": 4752 }, { "epoch": 0.49743589743589745, "grad_norm": 2.2607341333747772, "learning_rate": 1.0569165238810666e-05, "loss": 1.0193, "step": 4753 }, { "epoch": 0.49754055468341185, "grad_norm": 1.8647489455827506, "learning_rate": 1.0565780980943596e-05, "loss": 0.9772, "step": 4754 }, { "epoch": 0.4976452119309262, "grad_norm": 1.9057129930055388, "learning_rate": 1.0562396658067142e-05, "loss": 0.9925, "step": 4755 }, { "epoch": 0.4977498691784406, "grad_norm": 2.8885566484907823, "learning_rate": 1.0559012270570177e-05, "loss": 0.9656, "step": 4756 }, { "epoch": 0.497854526425955, "grad_norm": 1.895968677495429, "learning_rate": 1.0555627818841563e-05, "loss": 0.8475, "step": 4757 }, { "epoch": 0.49795918367346936, "grad_norm": 2.019680946799338, "learning_rate": 1.055224330327019e-05, "loss": 0.983, "step": 4758 }, { "epoch": 0.49806384092098377, "grad_norm": 1.9749773215931818, "learning_rate": 1.0548858724244939e-05, "loss": 0.948, "step": 4759 }, { "epoch": 0.4981684981684982, "grad_norm": 1.9535266279189702, "learning_rate": 1.0545474082154711e-05, "loss": 0.9621, "step": 4760 }, { "epoch": 0.4982731554160126, "grad_norm": 3.9985738676646103, "learning_rate": 1.0542089377388406e-05, "loss": 0.9936, "step": 4761 }, { "epoch": 0.49837781266352693, "grad_norm": 2.2584714009921356, "learning_rate": 1.0538704610334929e-05, "loss": 0.871, "step": 4762 }, { "epoch": 0.49848246991104134, "grad_norm": 2.2312381629656404, "learning_rate": 1.0535319781383202e-05, "loss": 1.001, "step": 4763 }, { "epoch": 0.49858712715855574, "grad_norm": 2.0424072639034816, "learning_rate": 1.0531934890922147e-05, "loss": 0.9762, "step": 4764 }, { "epoch": 0.4986917844060701, "grad_norm": 2.1580470907610887, "learning_rate": 1.0528549939340695e-05, "loss": 0.9045, "step": 4765 }, { "epoch": 0.4987964416535845, "grad_norm": 2.2467255664294528, "learning_rate": 1.0525164927027783e-05, "loss": 1.0969, "step": 4766 }, { "epoch": 0.4989010989010989, "grad_norm": 1.982846113442221, "learning_rate": 1.0521779854372353e-05, "loss": 1.038, "step": 4767 }, { "epoch": 0.4990057561486133, "grad_norm": 2.3854040119508233, "learning_rate": 1.0518394721763365e-05, "loss": 1.0746, "step": 4768 }, { "epoch": 0.49911041339612766, "grad_norm": 1.9707658343258043, "learning_rate": 1.051500952958977e-05, "loss": 0.8012, "step": 4769 }, { "epoch": 0.49921507064364207, "grad_norm": 2.1046625201814746, "learning_rate": 1.051162427824054e-05, "loss": 0.9312, "step": 4770 }, { "epoch": 0.4993197278911565, "grad_norm": 1.6661194083882027, "learning_rate": 1.0508238968104638e-05, "loss": 0.7945, "step": 4771 }, { "epoch": 0.4994243851386709, "grad_norm": 2.083849894430902, "learning_rate": 1.050485359957105e-05, "loss": 0.8743, "step": 4772 }, { "epoch": 0.49952904238618523, "grad_norm": 2.2924292467485974, "learning_rate": 1.0501468173028762e-05, "loss": 1.0575, "step": 4773 }, { "epoch": 0.49963369963369964, "grad_norm": 1.9733099739758075, "learning_rate": 1.0498082688866762e-05, "loss": 0.9393, "step": 4774 }, { "epoch": 0.49973835688121404, "grad_norm": 1.9074190492709282, "learning_rate": 1.0494697147474053e-05, "loss": 0.9925, "step": 4775 }, { "epoch": 0.4998430141287284, "grad_norm": 1.859529806411289, "learning_rate": 1.0491311549239635e-05, "loss": 1.0015, "step": 4776 }, { "epoch": 0.4999476713762428, "grad_norm": 1.8791033216965147, "learning_rate": 1.0487925894552529e-05, "loss": 1.0242, "step": 4777 }, { "epoch": 0.5000523286237571, "grad_norm": 1.9342110638713974, "learning_rate": 1.0484540183801744e-05, "loss": 0.9966, "step": 4778 }, { "epoch": 0.5001569858712716, "grad_norm": 1.7724120947349102, "learning_rate": 1.0481154417376313e-05, "loss": 0.9939, "step": 4779 }, { "epoch": 0.500261643118786, "grad_norm": 2.154486064088719, "learning_rate": 1.0477768595665263e-05, "loss": 1.0931, "step": 4780 }, { "epoch": 0.5003663003663004, "grad_norm": 2.1518890500826506, "learning_rate": 1.0474382719057633e-05, "loss": 0.9757, "step": 4781 }, { "epoch": 0.5004709576138148, "grad_norm": 1.8763970416206928, "learning_rate": 1.0470996787942466e-05, "loss": 0.9491, "step": 4782 }, { "epoch": 0.5005756148613292, "grad_norm": 2.3823417687483732, "learning_rate": 1.0467610802708814e-05, "loss": 1.045, "step": 4783 }, { "epoch": 0.5006802721088436, "grad_norm": 2.0877142215744087, "learning_rate": 1.0464224763745737e-05, "loss": 0.8905, "step": 4784 }, { "epoch": 0.5007849293563579, "grad_norm": 1.8525479505640599, "learning_rate": 1.0460838671442286e-05, "loss": 0.9826, "step": 4785 }, { "epoch": 0.5008895866038723, "grad_norm": 2.029476451987434, "learning_rate": 1.0457452526187541e-05, "loss": 1.035, "step": 4786 }, { "epoch": 0.5009942438513867, "grad_norm": 2.0510409778147842, "learning_rate": 1.0454066328370573e-05, "loss": 1.0014, "step": 4787 }, { "epoch": 0.5010989010989011, "grad_norm": 2.050804457353104, "learning_rate": 1.0450680078380465e-05, "loss": 1.0106, "step": 4788 }, { "epoch": 0.5012035583464155, "grad_norm": 1.9228482758753471, "learning_rate": 1.04472937766063e-05, "loss": 1.0036, "step": 4789 }, { "epoch": 0.5013082155939299, "grad_norm": 1.8304036460035518, "learning_rate": 1.0443907423437177e-05, "loss": 0.9524, "step": 4790 }, { "epoch": 0.5014128728414443, "grad_norm": 2.2850976774419918, "learning_rate": 1.0440521019262187e-05, "loss": 0.8331, "step": 4791 }, { "epoch": 0.5015175300889586, "grad_norm": 2.038030446976533, "learning_rate": 1.0437134564470442e-05, "loss": 0.9628, "step": 4792 }, { "epoch": 0.501622187336473, "grad_norm": 2.113415421458547, "learning_rate": 1.0433748059451045e-05, "loss": 1.0095, "step": 4793 }, { "epoch": 0.5017268445839874, "grad_norm": 2.1782622269854675, "learning_rate": 1.0430361504593118e-05, "loss": 0.9503, "step": 4794 }, { "epoch": 0.5018315018315018, "grad_norm": 2.238496790294893, "learning_rate": 1.0426974900285784e-05, "loss": 1.0506, "step": 4795 }, { "epoch": 0.5019361590790162, "grad_norm": 2.1891901037629746, "learning_rate": 1.0423588246918168e-05, "loss": 1.0063, "step": 4796 }, { "epoch": 0.5020408163265306, "grad_norm": 2.092133045742852, "learning_rate": 1.0420201544879405e-05, "loss": 1.0078, "step": 4797 }, { "epoch": 0.502145473574045, "grad_norm": 1.9013198519529835, "learning_rate": 1.0416814794558628e-05, "loss": 0.9786, "step": 4798 }, { "epoch": 0.5022501308215594, "grad_norm": 2.1099077143715914, "learning_rate": 1.0413427996344991e-05, "loss": 0.9702, "step": 4799 }, { "epoch": 0.5023547880690737, "grad_norm": 1.7239541182847857, "learning_rate": 1.0410041150627636e-05, "loss": 0.9089, "step": 4800 }, { "epoch": 0.5024594453165881, "grad_norm": 2.3381648619636137, "learning_rate": 1.0406654257795723e-05, "loss": 0.7951, "step": 4801 }, { "epoch": 0.5025641025641026, "grad_norm": 2.282729233569848, "learning_rate": 1.040326731823841e-05, "loss": 1.0216, "step": 4802 }, { "epoch": 0.502668759811617, "grad_norm": 2.2977040163903264, "learning_rate": 1.0399880332344865e-05, "loss": 1.0084, "step": 4803 }, { "epoch": 0.5027734170591314, "grad_norm": 2.006735467260644, "learning_rate": 1.0396493300504263e-05, "loss": 0.9048, "step": 4804 }, { "epoch": 0.5028780743066458, "grad_norm": 1.8040980766553913, "learning_rate": 1.0393106223105771e-05, "loss": 0.9794, "step": 4805 }, { "epoch": 0.5029827315541602, "grad_norm": 2.131327716426053, "learning_rate": 1.0389719100538583e-05, "loss": 0.9558, "step": 4806 }, { "epoch": 0.5030873888016745, "grad_norm": 2.0367496240204246, "learning_rate": 1.0386331933191874e-05, "loss": 0.8524, "step": 4807 }, { "epoch": 0.5031920460491889, "grad_norm": 1.927303561475201, "learning_rate": 1.0382944721454847e-05, "loss": 0.8965, "step": 4808 }, { "epoch": 0.5032967032967033, "grad_norm": 2.4684209425092485, "learning_rate": 1.0379557465716696e-05, "loss": 0.8173, "step": 4809 }, { "epoch": 0.5034013605442177, "grad_norm": 1.9985491017896084, "learning_rate": 1.0376170166366619e-05, "loss": 1.0035, "step": 4810 }, { "epoch": 0.5035060177917321, "grad_norm": 1.8490510380262115, "learning_rate": 1.037278282379383e-05, "loss": 0.889, "step": 4811 }, { "epoch": 0.5036106750392465, "grad_norm": 1.9348394119418788, "learning_rate": 1.0369395438387535e-05, "loss": 0.8902, "step": 4812 }, { "epoch": 0.5037153322867609, "grad_norm": 2.08472258783819, "learning_rate": 1.0366008010536962e-05, "loss": 0.8893, "step": 4813 }, { "epoch": 0.5038199895342752, "grad_norm": 1.8325956711014155, "learning_rate": 1.036262054063132e-05, "loss": 0.9288, "step": 4814 }, { "epoch": 0.5039246467817896, "grad_norm": 1.9933367795637793, "learning_rate": 1.0359233029059845e-05, "loss": 0.886, "step": 4815 }, { "epoch": 0.504029304029304, "grad_norm": 2.1844459941163157, "learning_rate": 1.035584547621177e-05, "loss": 0.8664, "step": 4816 }, { "epoch": 0.5041339612768184, "grad_norm": 2.124796059309372, "learning_rate": 1.0352457882476326e-05, "loss": 0.901, "step": 4817 }, { "epoch": 0.5042386185243328, "grad_norm": 2.0620543862987395, "learning_rate": 1.0349070248242756e-05, "loss": 1.0371, "step": 4818 }, { "epoch": 0.5043432757718472, "grad_norm": 1.9219187469619123, "learning_rate": 1.0345682573900306e-05, "loss": 0.8801, "step": 4819 }, { "epoch": 0.5044479330193616, "grad_norm": 1.9754947289145173, "learning_rate": 1.0342294859838228e-05, "loss": 0.9137, "step": 4820 }, { "epoch": 0.5045525902668759, "grad_norm": 1.9746909929500338, "learning_rate": 1.0338907106445781e-05, "loss": 1.008, "step": 4821 }, { "epoch": 0.5046572475143903, "grad_norm": 1.827473906585448, "learning_rate": 1.0335519314112217e-05, "loss": 0.8131, "step": 4822 }, { "epoch": 0.5047619047619047, "grad_norm": 2.196935612027101, "learning_rate": 1.0332131483226805e-05, "loss": 1.0269, "step": 4823 }, { "epoch": 0.5048665620094192, "grad_norm": 2.0147704818493, "learning_rate": 1.0328743614178807e-05, "loss": 0.8966, "step": 4824 }, { "epoch": 0.5049712192569336, "grad_norm": 1.7839156040063775, "learning_rate": 1.0325355707357507e-05, "loss": 0.9519, "step": 4825 }, { "epoch": 0.505075876504448, "grad_norm": 2.299507673691946, "learning_rate": 1.0321967763152176e-05, "loss": 0.959, "step": 4826 }, { "epoch": 0.5051805337519624, "grad_norm": 2.2267555529088825, "learning_rate": 1.0318579781952095e-05, "loss": 0.8851, "step": 4827 }, { "epoch": 0.5052851909994767, "grad_norm": 2.102441977176161, "learning_rate": 1.0315191764146551e-05, "loss": 0.9512, "step": 4828 }, { "epoch": 0.5053898482469911, "grad_norm": 1.9711747773484447, "learning_rate": 1.0311803710124832e-05, "loss": 1.0464, "step": 4829 }, { "epoch": 0.5054945054945055, "grad_norm": 2.1197570532175285, "learning_rate": 1.030841562027624e-05, "loss": 0.914, "step": 4830 }, { "epoch": 0.5055991627420199, "grad_norm": 2.2152984722921327, "learning_rate": 1.0305027494990065e-05, "loss": 0.9595, "step": 4831 }, { "epoch": 0.5057038199895343, "grad_norm": 2.006104338040643, "learning_rate": 1.0301639334655612e-05, "loss": 0.9543, "step": 4832 }, { "epoch": 0.5058084772370487, "grad_norm": 2.233956630960079, "learning_rate": 1.029825113966219e-05, "loss": 1.0661, "step": 4833 }, { "epoch": 0.5059131344845631, "grad_norm": 1.8056443704209098, "learning_rate": 1.0294862910399106e-05, "loss": 0.778, "step": 4834 }, { "epoch": 0.5060177917320774, "grad_norm": 1.8210897127610195, "learning_rate": 1.0291474647255677e-05, "loss": 1.0009, "step": 4835 }, { "epoch": 0.5061224489795918, "grad_norm": 2.265489705370913, "learning_rate": 1.0288086350621219e-05, "loss": 0.8936, "step": 4836 }, { "epoch": 0.5062271062271062, "grad_norm": 2.2329819145492724, "learning_rate": 1.0284698020885054e-05, "loss": 0.8475, "step": 4837 }, { "epoch": 0.5063317634746206, "grad_norm": 1.9140187907534734, "learning_rate": 1.0281309658436514e-05, "loss": 0.9804, "step": 4838 }, { "epoch": 0.506436420722135, "grad_norm": 2.1495837609117077, "learning_rate": 1.0277921263664918e-05, "loss": 0.9649, "step": 4839 }, { "epoch": 0.5065410779696494, "grad_norm": 2.147434202179756, "learning_rate": 1.027453283695961e-05, "loss": 0.9257, "step": 4840 }, { "epoch": 0.5066457352171638, "grad_norm": 2.1508850173428504, "learning_rate": 1.0271144378709919e-05, "loss": 1.0815, "step": 4841 }, { "epoch": 0.5067503924646782, "grad_norm": 1.890843092683489, "learning_rate": 1.026775588930519e-05, "loss": 0.9078, "step": 4842 }, { "epoch": 0.5068550497121925, "grad_norm": 1.718743135555168, "learning_rate": 1.0264367369134767e-05, "loss": 0.8615, "step": 4843 }, { "epoch": 0.5069597069597069, "grad_norm": 2.1338027250972718, "learning_rate": 1.0260978818588e-05, "loss": 0.9485, "step": 4844 }, { "epoch": 0.5070643642072213, "grad_norm": 2.3030713079734197, "learning_rate": 1.0257590238054238e-05, "loss": 1.0152, "step": 4845 }, { "epoch": 0.5071690214547357, "grad_norm": 2.108066963927156, "learning_rate": 1.0254201627922829e-05, "loss": 0.8987, "step": 4846 }, { "epoch": 0.5072736787022502, "grad_norm": 1.9348248604669798, "learning_rate": 1.0250812988583144e-05, "loss": 0.8945, "step": 4847 }, { "epoch": 0.5073783359497646, "grad_norm": 2.0214829385791426, "learning_rate": 1.0247424320424534e-05, "loss": 0.9324, "step": 4848 }, { "epoch": 0.507482993197279, "grad_norm": 2.103062514985118, "learning_rate": 1.024403562383637e-05, "loss": 0.9869, "step": 4849 }, { "epoch": 0.5075876504447933, "grad_norm": 2.0388689399057633, "learning_rate": 1.0240646899208018e-05, "loss": 1.0085, "step": 4850 }, { "epoch": 0.5076923076923077, "grad_norm": 2.2019460294281243, "learning_rate": 1.0237258146928849e-05, "loss": 0.9457, "step": 4851 }, { "epoch": 0.5077969649398221, "grad_norm": 1.8404221416654807, "learning_rate": 1.023386936738824e-05, "loss": 0.9133, "step": 4852 }, { "epoch": 0.5079016221873365, "grad_norm": 1.9361603963782874, "learning_rate": 1.0230480560975564e-05, "loss": 0.9412, "step": 4853 }, { "epoch": 0.5080062794348509, "grad_norm": 2.0201875001702807, "learning_rate": 1.0227091728080203e-05, "loss": 0.9903, "step": 4854 }, { "epoch": 0.5081109366823653, "grad_norm": 2.174119911436257, "learning_rate": 1.0223702869091548e-05, "loss": 0.9529, "step": 4855 }, { "epoch": 0.5082155939298797, "grad_norm": 2.1378859332077047, "learning_rate": 1.0220313984398972e-05, "loss": 1.0162, "step": 4856 }, { "epoch": 0.508320251177394, "grad_norm": 1.9197826619859129, "learning_rate": 1.0216925074391879e-05, "loss": 0.9307, "step": 4857 }, { "epoch": 0.5084249084249084, "grad_norm": 2.2932973290164096, "learning_rate": 1.0213536139459651e-05, "loss": 1.0056, "step": 4858 }, { "epoch": 0.5085295656724228, "grad_norm": 2.2317819242913695, "learning_rate": 1.021014717999169e-05, "loss": 0.9703, "step": 4859 }, { "epoch": 0.5086342229199372, "grad_norm": 2.1281106553255675, "learning_rate": 1.0206758196377391e-05, "loss": 1.0403, "step": 4860 }, { "epoch": 0.5087388801674516, "grad_norm": 2.171215516994493, "learning_rate": 1.0203369189006157e-05, "loss": 0.8991, "step": 4861 }, { "epoch": 0.508843537414966, "grad_norm": 2.255784354708711, "learning_rate": 1.019998015826739e-05, "loss": 0.8639, "step": 4862 }, { "epoch": 0.5089481946624804, "grad_norm": 2.2311385876757166, "learning_rate": 1.0196591104550497e-05, "loss": 0.9527, "step": 4863 }, { "epoch": 0.5090528519099947, "grad_norm": 2.1216367946605614, "learning_rate": 1.0193202028244891e-05, "loss": 0.7992, "step": 4864 }, { "epoch": 0.5091575091575091, "grad_norm": 2.2266924800513928, "learning_rate": 1.0189812929739976e-05, "loss": 0.9312, "step": 4865 }, { "epoch": 0.5092621664050235, "grad_norm": 2.184861742273941, "learning_rate": 1.0186423809425175e-05, "loss": 0.9421, "step": 4866 }, { "epoch": 0.5093668236525379, "grad_norm": 2.2359774420575045, "learning_rate": 1.0183034667689898e-05, "loss": 0.8726, "step": 4867 }, { "epoch": 0.5094714809000523, "grad_norm": 2.251077097785011, "learning_rate": 1.0179645504923565e-05, "loss": 0.9215, "step": 4868 }, { "epoch": 0.5095761381475667, "grad_norm": 1.962671210761371, "learning_rate": 1.0176256321515601e-05, "loss": 0.9715, "step": 4869 }, { "epoch": 0.5096807953950812, "grad_norm": 2.1786034572062687, "learning_rate": 1.0172867117855428e-05, "loss": 0.9004, "step": 4870 }, { "epoch": 0.5097854526425954, "grad_norm": 1.984923185389256, "learning_rate": 1.0169477894332473e-05, "loss": 1.0999, "step": 4871 }, { "epoch": 0.5098901098901099, "grad_norm": 2.3249280492593107, "learning_rate": 1.016608865133616e-05, "loss": 1.0306, "step": 4872 }, { "epoch": 0.5099947671376243, "grad_norm": 2.631735537137025, "learning_rate": 1.0162699389255933e-05, "loss": 0.7941, "step": 4873 }, { "epoch": 0.5100994243851387, "grad_norm": 2.074101642604738, "learning_rate": 1.0159310108481212e-05, "loss": 0.8317, "step": 4874 }, { "epoch": 0.5102040816326531, "grad_norm": 2.0707425216929094, "learning_rate": 1.0155920809401437e-05, "loss": 0.9869, "step": 4875 }, { "epoch": 0.5103087388801675, "grad_norm": 1.9855437335816901, "learning_rate": 1.015253149240604e-05, "loss": 0.8376, "step": 4876 }, { "epoch": 0.5104133961276819, "grad_norm": 2.125627681296742, "learning_rate": 1.014914215788447e-05, "loss": 0.8265, "step": 4877 }, { "epoch": 0.5105180533751962, "grad_norm": 2.0097025281038183, "learning_rate": 1.0145752806226165e-05, "loss": 1.0651, "step": 4878 }, { "epoch": 0.5106227106227106, "grad_norm": 1.9381824884213676, "learning_rate": 1.0142363437820566e-05, "loss": 0.9377, "step": 4879 }, { "epoch": 0.510727367870225, "grad_norm": 1.8057048082927534, "learning_rate": 1.0138974053057118e-05, "loss": 1.0258, "step": 4880 }, { "epoch": 0.5108320251177394, "grad_norm": 1.9284372353034598, "learning_rate": 1.013558465232527e-05, "loss": 0.9767, "step": 4881 }, { "epoch": 0.5109366823652538, "grad_norm": 2.0912145996565807, "learning_rate": 1.0132195236014471e-05, "loss": 0.9621, "step": 4882 }, { "epoch": 0.5110413396127682, "grad_norm": 2.4074410237986203, "learning_rate": 1.0128805804514173e-05, "loss": 0.9953, "step": 4883 }, { "epoch": 0.5111459968602826, "grad_norm": 2.1350122261578948, "learning_rate": 1.0125416358213825e-05, "loss": 0.9114, "step": 4884 }, { "epoch": 0.511250654107797, "grad_norm": 2.1766627738568824, "learning_rate": 1.0122026897502882e-05, "loss": 0.9303, "step": 4885 }, { "epoch": 0.5113553113553113, "grad_norm": 1.659780196796726, "learning_rate": 1.0118637422770803e-05, "loss": 0.8643, "step": 4886 }, { "epoch": 0.5114599686028257, "grad_norm": 1.8759283670973985, "learning_rate": 1.0115247934407045e-05, "loss": 0.9288, "step": 4887 }, { "epoch": 0.5115646258503401, "grad_norm": 1.981277049724218, "learning_rate": 1.0111858432801063e-05, "loss": 0.807, "step": 4888 }, { "epoch": 0.5116692830978545, "grad_norm": 2.104064781470603, "learning_rate": 1.0108468918342317e-05, "loss": 0.9398, "step": 4889 }, { "epoch": 0.5117739403453689, "grad_norm": 1.862863320953064, "learning_rate": 1.0105079391420281e-05, "loss": 0.8971, "step": 4890 }, { "epoch": 0.5118785975928833, "grad_norm": 2.0632811238069872, "learning_rate": 1.0101689852424404e-05, "loss": 0.9229, "step": 4891 }, { "epoch": 0.5119832548403978, "grad_norm": 2.0328293416753787, "learning_rate": 1.0098300301744159e-05, "loss": 0.8541, "step": 4892 }, { "epoch": 0.512087912087912, "grad_norm": 2.2183959790280094, "learning_rate": 1.0094910739769007e-05, "loss": 0.9588, "step": 4893 }, { "epoch": 0.5121925693354265, "grad_norm": 2.0086969358639597, "learning_rate": 1.0091521166888422e-05, "loss": 0.9431, "step": 4894 }, { "epoch": 0.5122972265829409, "grad_norm": 1.8566480087720016, "learning_rate": 1.0088131583491869e-05, "loss": 0.9237, "step": 4895 }, { "epoch": 0.5124018838304553, "grad_norm": 1.978008384805466, "learning_rate": 1.0084741989968818e-05, "loss": 0.9383, "step": 4896 }, { "epoch": 0.5125065410779697, "grad_norm": 2.2899059061417146, "learning_rate": 1.008135238670874e-05, "loss": 1.0155, "step": 4897 }, { "epoch": 0.5126111983254841, "grad_norm": 2.1435767412931215, "learning_rate": 1.0077962774101108e-05, "loss": 0.7721, "step": 4898 }, { "epoch": 0.5127158555729985, "grad_norm": 1.9651414340794402, "learning_rate": 1.0074573152535397e-05, "loss": 0.8895, "step": 4899 }, { "epoch": 0.5128205128205128, "grad_norm": 1.9700996694269328, "learning_rate": 1.0071183522401078e-05, "loss": 0.8386, "step": 4900 }, { "epoch": 0.5129251700680272, "grad_norm": 2.043250711393673, "learning_rate": 1.0067793884087626e-05, "loss": 1.0259, "step": 4901 }, { "epoch": 0.5130298273155416, "grad_norm": 2.2215963106479064, "learning_rate": 1.0064404237984522e-05, "loss": 1.0239, "step": 4902 }, { "epoch": 0.513134484563056, "grad_norm": 2.043312538764727, "learning_rate": 1.0061014584481242e-05, "loss": 0.9675, "step": 4903 }, { "epoch": 0.5132391418105704, "grad_norm": 2.017720307338605, "learning_rate": 1.0057624923967258e-05, "loss": 1.0402, "step": 4904 }, { "epoch": 0.5133437990580848, "grad_norm": 1.9522219810870778, "learning_rate": 1.0054235256832057e-05, "loss": 0.9143, "step": 4905 }, { "epoch": 0.5134484563055992, "grad_norm": 2.3060614281471423, "learning_rate": 1.0050845583465112e-05, "loss": 0.8615, "step": 4906 }, { "epoch": 0.5135531135531135, "grad_norm": 2.070929318747068, "learning_rate": 1.004745590425591e-05, "loss": 0.8553, "step": 4907 }, { "epoch": 0.5136577708006279, "grad_norm": 2.198912283882256, "learning_rate": 1.0044066219593925e-05, "loss": 0.9677, "step": 4908 }, { "epoch": 0.5137624280481423, "grad_norm": 2.2590338340709204, "learning_rate": 1.0040676529868644e-05, "loss": 1.0453, "step": 4909 }, { "epoch": 0.5138670852956567, "grad_norm": 2.3124159355511766, "learning_rate": 1.0037286835469546e-05, "loss": 1.1255, "step": 4910 }, { "epoch": 0.5139717425431711, "grad_norm": 1.8769919169356388, "learning_rate": 1.0033897136786115e-05, "loss": 0.8565, "step": 4911 }, { "epoch": 0.5140763997906855, "grad_norm": 2.032937691495361, "learning_rate": 1.0030507434207836e-05, "loss": 0.9021, "step": 4912 }, { "epoch": 0.5141810570381999, "grad_norm": 1.948805286567476, "learning_rate": 1.002711772812419e-05, "loss": 0.9255, "step": 4913 }, { "epoch": 0.5142857142857142, "grad_norm": 1.8811051091573516, "learning_rate": 1.0023728018924663e-05, "loss": 0.9634, "step": 4914 }, { "epoch": 0.5143903715332286, "grad_norm": 1.895037622840537, "learning_rate": 1.0020338306998739e-05, "loss": 0.7764, "step": 4915 }, { "epoch": 0.514495028780743, "grad_norm": 2.3622734576508417, "learning_rate": 1.0016948592735898e-05, "loss": 0.9616, "step": 4916 }, { "epoch": 0.5145996860282575, "grad_norm": 2.1068542919039004, "learning_rate": 1.0013558876525635e-05, "loss": 0.9764, "step": 4917 }, { "epoch": 0.5147043432757719, "grad_norm": 2.0518452033894428, "learning_rate": 1.0010169158757425e-05, "loss": 0.9017, "step": 4918 }, { "epoch": 0.5148090005232863, "grad_norm": 2.268883115799465, "learning_rate": 1.000677943982076e-05, "loss": 0.8253, "step": 4919 }, { "epoch": 0.5149136577708007, "grad_norm": 1.9101165871841854, "learning_rate": 1.000338972010512e-05, "loss": 1.0396, "step": 4920 }, { "epoch": 0.515018315018315, "grad_norm": 1.9018696065166185, "learning_rate": 1e-05, "loss": 0.8675, "step": 4921 }, { "epoch": 0.5151229722658294, "grad_norm": 1.9135895846787352, "learning_rate": 9.99661027989488e-06, "loss": 0.953, "step": 4922 }, { "epoch": 0.5152276295133438, "grad_norm": 2.2944292672819744, "learning_rate": 9.993220560179244e-06, "loss": 0.9654, "step": 4923 }, { "epoch": 0.5153322867608582, "grad_norm": 2.3373997913546187, "learning_rate": 9.98983084124258e-06, "loss": 1.0137, "step": 4924 }, { "epoch": 0.5154369440083726, "grad_norm": 2.1760314698321697, "learning_rate": 9.986441123474371e-06, "loss": 1.0149, "step": 4925 }, { "epoch": 0.515541601255887, "grad_norm": 2.0995522000742635, "learning_rate": 9.983051407264102e-06, "loss": 0.994, "step": 4926 }, { "epoch": 0.5156462585034014, "grad_norm": 2.135301385827847, "learning_rate": 9.979661693001267e-06, "loss": 0.9958, "step": 4927 }, { "epoch": 0.5157509157509158, "grad_norm": 2.016177869195349, "learning_rate": 9.976271981075339e-06, "loss": 1.0032, "step": 4928 }, { "epoch": 0.5158555729984301, "grad_norm": 2.198242514789201, "learning_rate": 9.972882271875814e-06, "loss": 0.9751, "step": 4929 }, { "epoch": 0.5159602302459445, "grad_norm": 2.568304071771772, "learning_rate": 9.969492565792166e-06, "loss": 0.8212, "step": 4930 }, { "epoch": 0.5160648874934589, "grad_norm": 2.150285720935691, "learning_rate": 9.966102863213889e-06, "loss": 0.8911, "step": 4931 }, { "epoch": 0.5161695447409733, "grad_norm": 1.8427410847871126, "learning_rate": 9.962713164530457e-06, "loss": 0.9333, "step": 4932 }, { "epoch": 0.5162742019884877, "grad_norm": 1.9777153731928134, "learning_rate": 9.95932347013136e-06, "loss": 0.9572, "step": 4933 }, { "epoch": 0.5163788592360021, "grad_norm": 2.023147402384463, "learning_rate": 9.955933780406079e-06, "loss": 0.9403, "step": 4934 }, { "epoch": 0.5164835164835165, "grad_norm": 2.339289650649997, "learning_rate": 9.952544095744092e-06, "loss": 1.012, "step": 4935 }, { "epoch": 0.5165881737310308, "grad_norm": 2.0703824315481447, "learning_rate": 9.94915441653489e-06, "loss": 0.8705, "step": 4936 }, { "epoch": 0.5166928309785452, "grad_norm": 2.1054511171333252, "learning_rate": 9.945764743167947e-06, "loss": 0.8875, "step": 4937 }, { "epoch": 0.5167974882260596, "grad_norm": 2.0257304303475108, "learning_rate": 9.942375076032745e-06, "loss": 0.792, "step": 4938 }, { "epoch": 0.516902145473574, "grad_norm": 2.1389835239632298, "learning_rate": 9.938985415518763e-06, "loss": 0.8896, "step": 4939 }, { "epoch": 0.5170068027210885, "grad_norm": 2.2239270796318156, "learning_rate": 9.935595762015481e-06, "loss": 0.9462, "step": 4940 }, { "epoch": 0.5171114599686029, "grad_norm": 1.922334466980784, "learning_rate": 9.932206115912379e-06, "loss": 0.9672, "step": 4941 }, { "epoch": 0.5172161172161173, "grad_norm": 2.0497231083957583, "learning_rate": 9.928816477598927e-06, "loss": 0.9285, "step": 4942 }, { "epoch": 0.5173207744636316, "grad_norm": 2.036361821419443, "learning_rate": 9.925426847464605e-06, "loss": 0.9388, "step": 4943 }, { "epoch": 0.517425431711146, "grad_norm": 2.0485533284742767, "learning_rate": 9.922037225898893e-06, "loss": 1.0012, "step": 4944 }, { "epoch": 0.5175300889586604, "grad_norm": 1.781792397254675, "learning_rate": 9.918647613291262e-06, "loss": 0.9396, "step": 4945 }, { "epoch": 0.5176347462061748, "grad_norm": 2.6325182895039045, "learning_rate": 9.915258010031184e-06, "loss": 1.0054, "step": 4946 }, { "epoch": 0.5177394034536892, "grad_norm": 2.24395320097852, "learning_rate": 9.911868416508133e-06, "loss": 0.8993, "step": 4947 }, { "epoch": 0.5178440607012036, "grad_norm": 1.950860927917329, "learning_rate": 9.908478833111581e-06, "loss": 0.7653, "step": 4948 }, { "epoch": 0.517948717948718, "grad_norm": 2.3503369439679136, "learning_rate": 9.905089260230994e-06, "loss": 0.8748, "step": 4949 }, { "epoch": 0.5180533751962323, "grad_norm": 1.8774277642314408, "learning_rate": 9.901699698255846e-06, "loss": 0.989, "step": 4950 }, { "epoch": 0.5181580324437467, "grad_norm": 1.8303975022931225, "learning_rate": 9.8983101475756e-06, "loss": 1.0733, "step": 4951 }, { "epoch": 0.5182626896912611, "grad_norm": 2.317212810335493, "learning_rate": 9.89492060857972e-06, "loss": 1.1076, "step": 4952 }, { "epoch": 0.5183673469387755, "grad_norm": 2.1808943237436993, "learning_rate": 9.891531081657681e-06, "loss": 0.9556, "step": 4953 }, { "epoch": 0.5184720041862899, "grad_norm": 2.029241896808696, "learning_rate": 9.888141567198938e-06, "loss": 1.0964, "step": 4954 }, { "epoch": 0.5185766614338043, "grad_norm": 1.9949608216514931, "learning_rate": 9.88475206559296e-06, "loss": 0.9498, "step": 4955 }, { "epoch": 0.5186813186813187, "grad_norm": 2.2830478335421174, "learning_rate": 9.881362577229199e-06, "loss": 1.0157, "step": 4956 }, { "epoch": 0.518785975928833, "grad_norm": 1.9690660007947534, "learning_rate": 9.87797310249712e-06, "loss": 1.0411, "step": 4957 }, { "epoch": 0.5188906331763474, "grad_norm": 1.977273523937895, "learning_rate": 9.874583641786178e-06, "loss": 0.8597, "step": 4958 }, { "epoch": 0.5189952904238618, "grad_norm": 1.9243362489372648, "learning_rate": 9.871194195485833e-06, "loss": 0.9745, "step": 4959 }, { "epoch": 0.5190999476713762, "grad_norm": 2.1432993449566737, "learning_rate": 9.867804763985534e-06, "loss": 1.0604, "step": 4960 }, { "epoch": 0.5192046049188906, "grad_norm": 2.0876636574915595, "learning_rate": 9.864415347674732e-06, "loss": 0.9654, "step": 4961 }, { "epoch": 0.519309262166405, "grad_norm": 1.9248359573202058, "learning_rate": 9.861025946942883e-06, "loss": 0.8849, "step": 4962 }, { "epoch": 0.5194139194139195, "grad_norm": 2.151905236783193, "learning_rate": 9.857636562179439e-06, "loss": 0.917, "step": 4963 }, { "epoch": 0.5195185766614338, "grad_norm": 2.40363372620985, "learning_rate": 9.854247193773837e-06, "loss": 0.8764, "step": 4964 }, { "epoch": 0.5196232339089482, "grad_norm": 2.2427852684647425, "learning_rate": 9.850857842115533e-06, "loss": 1.0175, "step": 4965 }, { "epoch": 0.5197278911564626, "grad_norm": 1.9138156790964573, "learning_rate": 9.847468507593961e-06, "loss": 0.8898, "step": 4966 }, { "epoch": 0.519832548403977, "grad_norm": 2.0160695177069603, "learning_rate": 9.844079190598568e-06, "loss": 0.9839, "step": 4967 }, { "epoch": 0.5199372056514914, "grad_norm": 1.8516868195463738, "learning_rate": 9.840689891518793e-06, "loss": 1.0036, "step": 4968 }, { "epoch": 0.5200418628990058, "grad_norm": 2.1447596627697254, "learning_rate": 9.837300610744069e-06, "loss": 0.8792, "step": 4969 }, { "epoch": 0.5201465201465202, "grad_norm": 2.4054927998015447, "learning_rate": 9.833911348663838e-06, "loss": 1.0158, "step": 4970 }, { "epoch": 0.5202511773940346, "grad_norm": 2.228363478525115, "learning_rate": 9.83052210566753e-06, "loss": 0.9684, "step": 4971 }, { "epoch": 0.5203558346415489, "grad_norm": 1.8149258250247282, "learning_rate": 9.827132882144576e-06, "loss": 0.9464, "step": 4972 }, { "epoch": 0.5204604918890633, "grad_norm": 2.380722994225285, "learning_rate": 9.8237436784844e-06, "loss": 1.027, "step": 4973 }, { "epoch": 0.5205651491365777, "grad_norm": 1.7873907854560183, "learning_rate": 9.820354495076439e-06, "loss": 0.769, "step": 4974 }, { "epoch": 0.5206698063840921, "grad_norm": 2.2802115646024133, "learning_rate": 9.816965332310106e-06, "loss": 0.9555, "step": 4975 }, { "epoch": 0.5207744636316065, "grad_norm": 2.038426429660569, "learning_rate": 9.81357619057483e-06, "loss": 0.9761, "step": 4976 }, { "epoch": 0.5208791208791209, "grad_norm": 2.264959189290348, "learning_rate": 9.810187070260029e-06, "loss": 0.8613, "step": 4977 }, { "epoch": 0.5209837781266353, "grad_norm": 2.3352359731890537, "learning_rate": 9.80679797175511e-06, "loss": 0.9207, "step": 4978 }, { "epoch": 0.5210884353741496, "grad_norm": 1.813637189730288, "learning_rate": 9.803408895449502e-06, "loss": 0.8949, "step": 4979 }, { "epoch": 0.521193092621664, "grad_norm": 2.0367522181120985, "learning_rate": 9.800019841732613e-06, "loss": 0.9757, "step": 4980 }, { "epoch": 0.5212977498691784, "grad_norm": 2.098582096085878, "learning_rate": 9.796630810993844e-06, "loss": 1.0026, "step": 4981 }, { "epoch": 0.5214024071166928, "grad_norm": 1.9944200217385741, "learning_rate": 9.793241803622612e-06, "loss": 0.8153, "step": 4982 }, { "epoch": 0.5215070643642072, "grad_norm": 2.0471451034556147, "learning_rate": 9.789852820008312e-06, "loss": 0.782, "step": 4983 }, { "epoch": 0.5216117216117216, "grad_norm": 2.199390774507097, "learning_rate": 9.786463860540352e-06, "loss": 0.8614, "step": 4984 }, { "epoch": 0.521716378859236, "grad_norm": 2.071453495859607, "learning_rate": 9.783074925608126e-06, "loss": 0.8793, "step": 4985 }, { "epoch": 0.5218210361067503, "grad_norm": 2.3490583249057893, "learning_rate": 9.779686015601031e-06, "loss": 1.0274, "step": 4986 }, { "epoch": 0.5219256933542648, "grad_norm": 1.959370734056799, "learning_rate": 9.776297130908456e-06, "loss": 0.9316, "step": 4987 }, { "epoch": 0.5220303506017792, "grad_norm": 2.2160442511485825, "learning_rate": 9.772908271919797e-06, "loss": 1.0251, "step": 4988 }, { "epoch": 0.5221350078492936, "grad_norm": 1.968606658582411, "learning_rate": 9.76951943902444e-06, "loss": 0.944, "step": 4989 }, { "epoch": 0.522239665096808, "grad_norm": 2.4107803674809736, "learning_rate": 9.766130632611762e-06, "loss": 0.9429, "step": 4990 }, { "epoch": 0.5223443223443224, "grad_norm": 2.0652044268509693, "learning_rate": 9.762741853071153e-06, "loss": 0.8697, "step": 4991 }, { "epoch": 0.5224489795918368, "grad_norm": 2.062990165066534, "learning_rate": 9.759353100791986e-06, "loss": 1.0389, "step": 4992 }, { "epoch": 0.5225536368393511, "grad_norm": 1.9364323150880431, "learning_rate": 9.755964376163633e-06, "loss": 0.9711, "step": 4993 }, { "epoch": 0.5226582940868655, "grad_norm": 2.4876581630290273, "learning_rate": 9.752575679575469e-06, "loss": 0.8624, "step": 4994 }, { "epoch": 0.5227629513343799, "grad_norm": 1.8708610631837064, "learning_rate": 9.749187011416858e-06, "loss": 0.8952, "step": 4995 }, { "epoch": 0.5228676085818943, "grad_norm": 2.0610166226921467, "learning_rate": 9.745798372077173e-06, "loss": 1.0246, "step": 4996 }, { "epoch": 0.5229722658294087, "grad_norm": 2.091437740906605, "learning_rate": 9.742409761945765e-06, "loss": 0.9257, "step": 4997 }, { "epoch": 0.5230769230769231, "grad_norm": 3.1322596751647236, "learning_rate": 9.739021181412003e-06, "loss": 0.9185, "step": 4998 }, { "epoch": 0.5231815803244375, "grad_norm": 2.983387965543901, "learning_rate": 9.735632630865234e-06, "loss": 0.8107, "step": 4999 }, { "epoch": 0.5232862375719518, "grad_norm": 2.3964467020952425, "learning_rate": 9.732244110694813e-06, "loss": 0.9936, "step": 5000 }, { "epoch": 0.5233908948194662, "grad_norm": 2.1655991778042893, "learning_rate": 9.728855621290086e-06, "loss": 0.881, "step": 5001 }, { "epoch": 0.5234955520669806, "grad_norm": 2.323301195527726, "learning_rate": 9.725467163040395e-06, "loss": 1.0034, "step": 5002 }, { "epoch": 0.523600209314495, "grad_norm": 2.1683566774088194, "learning_rate": 9.722078736335087e-06, "loss": 0.9435, "step": 5003 }, { "epoch": 0.5237048665620094, "grad_norm": 1.9021798201348261, "learning_rate": 9.71869034156349e-06, "loss": 0.9163, "step": 5004 }, { "epoch": 0.5238095238095238, "grad_norm": 2.1575573537521278, "learning_rate": 9.715301979114946e-06, "loss": 0.9994, "step": 5005 }, { "epoch": 0.5239141810570382, "grad_norm": 2.1460735218265174, "learning_rate": 9.711913649378785e-06, "loss": 0.9674, "step": 5006 }, { "epoch": 0.5240188383045525, "grad_norm": 2.0470323099465486, "learning_rate": 9.708525352744325e-06, "loss": 0.9025, "step": 5007 }, { "epoch": 0.5241234955520669, "grad_norm": 2.0043888800821907, "learning_rate": 9.705137089600898e-06, "loss": 0.943, "step": 5008 }, { "epoch": 0.5242281527995813, "grad_norm": 2.011463626160743, "learning_rate": 9.701748860337812e-06, "loss": 0.935, "step": 5009 }, { "epoch": 0.5243328100470958, "grad_norm": 2.050505275508048, "learning_rate": 9.698360665344391e-06, "loss": 0.9326, "step": 5010 }, { "epoch": 0.5244374672946102, "grad_norm": 2.144129516792977, "learning_rate": 9.694972505009938e-06, "loss": 1.0178, "step": 5011 }, { "epoch": 0.5245421245421246, "grad_norm": 1.9003656125321036, "learning_rate": 9.691584379723766e-06, "loss": 0.9608, "step": 5012 }, { "epoch": 0.524646781789639, "grad_norm": 2.2612768774796654, "learning_rate": 9.688196289875168e-06, "loss": 0.9817, "step": 5013 }, { "epoch": 0.5247514390371534, "grad_norm": 2.290640926354991, "learning_rate": 9.68480823585345e-06, "loss": 1.1034, "step": 5014 }, { "epoch": 0.5248560962846677, "grad_norm": 2.4607191178201475, "learning_rate": 9.681420218047909e-06, "loss": 0.9906, "step": 5015 }, { "epoch": 0.5249607535321821, "grad_norm": 1.9026816954278603, "learning_rate": 9.678032236847827e-06, "loss": 0.8915, "step": 5016 }, { "epoch": 0.5250654107796965, "grad_norm": 1.9421672245762756, "learning_rate": 9.674644292642496e-06, "loss": 0.9105, "step": 5017 }, { "epoch": 0.5251700680272109, "grad_norm": 2.4702863347349084, "learning_rate": 9.671256385821196e-06, "loss": 1.0937, "step": 5018 }, { "epoch": 0.5252747252747253, "grad_norm": 1.876747432470777, "learning_rate": 9.6678685167732e-06, "loss": 0.8908, "step": 5019 }, { "epoch": 0.5253793825222397, "grad_norm": 2.9876022744753934, "learning_rate": 9.66448068588779e-06, "loss": 0.9807, "step": 5020 }, { "epoch": 0.5254840397697541, "grad_norm": 1.9265704361668035, "learning_rate": 9.661092893554222e-06, "loss": 0.8965, "step": 5021 }, { "epoch": 0.5255886970172684, "grad_norm": 2.092597658236264, "learning_rate": 9.657705140161772e-06, "loss": 0.9318, "step": 5022 }, { "epoch": 0.5256933542647828, "grad_norm": 1.9546071290696667, "learning_rate": 9.654317426099695e-06, "loss": 0.9737, "step": 5023 }, { "epoch": 0.5257980115122972, "grad_norm": 1.9365552977725047, "learning_rate": 9.650929751757247e-06, "loss": 1.0049, "step": 5024 }, { "epoch": 0.5259026687598116, "grad_norm": 1.967006406063824, "learning_rate": 9.647542117523679e-06, "loss": 0.9506, "step": 5025 }, { "epoch": 0.526007326007326, "grad_norm": 2.3155282840857847, "learning_rate": 9.644154523788233e-06, "loss": 0.7686, "step": 5026 }, { "epoch": 0.5261119832548404, "grad_norm": 2.157265802659998, "learning_rate": 9.640766970940157e-06, "loss": 0.9982, "step": 5027 }, { "epoch": 0.5262166405023548, "grad_norm": 2.1389647147739823, "learning_rate": 9.637379459368682e-06, "loss": 0.8971, "step": 5028 }, { "epoch": 0.5263212977498691, "grad_norm": 2.0363015164707607, "learning_rate": 9.633991989463043e-06, "loss": 1.0196, "step": 5029 }, { "epoch": 0.5264259549973835, "grad_norm": 1.9294652609584093, "learning_rate": 9.630604561612465e-06, "loss": 0.7722, "step": 5030 }, { "epoch": 0.5265306122448979, "grad_norm": 2.342838953335117, "learning_rate": 9.627217176206172e-06, "loss": 0.9184, "step": 5031 }, { "epoch": 0.5266352694924124, "grad_norm": 1.9756381699879217, "learning_rate": 9.623829833633384e-06, "loss": 1.0176, "step": 5032 }, { "epoch": 0.5267399267399268, "grad_norm": 2.03546759419274, "learning_rate": 9.620442534283308e-06, "loss": 0.9261, "step": 5033 }, { "epoch": 0.5268445839874412, "grad_norm": 2.288452566068461, "learning_rate": 9.617055278545155e-06, "loss": 0.9991, "step": 5034 }, { "epoch": 0.5269492412349556, "grad_norm": 2.057195359204059, "learning_rate": 9.61366806680813e-06, "loss": 0.9043, "step": 5035 }, { "epoch": 0.5270538984824699, "grad_norm": 2.0034791907235134, "learning_rate": 9.610280899461422e-06, "loss": 0.8265, "step": 5036 }, { "epoch": 0.5271585557299843, "grad_norm": 2.300980115407969, "learning_rate": 9.606893776894232e-06, "loss": 0.972, "step": 5037 }, { "epoch": 0.5272632129774987, "grad_norm": 2.1374768924728715, "learning_rate": 9.603506699495742e-06, "loss": 0.9861, "step": 5038 }, { "epoch": 0.5273678702250131, "grad_norm": 1.901539200611645, "learning_rate": 9.600119667655135e-06, "loss": 0.9153, "step": 5039 }, { "epoch": 0.5274725274725275, "grad_norm": 1.898313837762518, "learning_rate": 9.596732681761591e-06, "loss": 0.9927, "step": 5040 }, { "epoch": 0.5275771847200419, "grad_norm": 2.203116494419673, "learning_rate": 9.593345742204278e-06, "loss": 0.9173, "step": 5041 }, { "epoch": 0.5276818419675563, "grad_norm": 1.6960490365340934, "learning_rate": 9.589958849372367e-06, "loss": 0.9086, "step": 5042 }, { "epoch": 0.5277864992150706, "grad_norm": 2.2070200124057133, "learning_rate": 9.586572003655012e-06, "loss": 0.8878, "step": 5043 }, { "epoch": 0.527891156462585, "grad_norm": 1.954022666257893, "learning_rate": 9.583185205441376e-06, "loss": 0.9223, "step": 5044 }, { "epoch": 0.5279958137100994, "grad_norm": 2.0745859713137866, "learning_rate": 9.5797984551206e-06, "loss": 1.0438, "step": 5045 }, { "epoch": 0.5281004709576138, "grad_norm": 2.265741127424256, "learning_rate": 9.576411753081837e-06, "loss": 0.9013, "step": 5046 }, { "epoch": 0.5282051282051282, "grad_norm": 1.8371450812640804, "learning_rate": 9.573025099714218e-06, "loss": 1.0232, "step": 5047 }, { "epoch": 0.5283097854526426, "grad_norm": 2.033374031917156, "learning_rate": 9.569638495406882e-06, "loss": 0.8909, "step": 5048 }, { "epoch": 0.528414442700157, "grad_norm": 2.1076096190375697, "learning_rate": 9.566251940548957e-06, "loss": 0.9968, "step": 5049 }, { "epoch": 0.5285190999476713, "grad_norm": 1.941537663106642, "learning_rate": 9.562865435529563e-06, "loss": 0.884, "step": 5050 }, { "epoch": 0.5286237571951857, "grad_norm": 1.993306824385622, "learning_rate": 9.559478980737817e-06, "loss": 0.9804, "step": 5051 }, { "epoch": 0.5287284144427001, "grad_norm": 1.8310381477052629, "learning_rate": 9.556092576562828e-06, "loss": 0.8713, "step": 5052 }, { "epoch": 0.5288330716902145, "grad_norm": 2.1706814859478643, "learning_rate": 9.552706223393704e-06, "loss": 0.953, "step": 5053 }, { "epoch": 0.528937728937729, "grad_norm": 2.0181734603318917, "learning_rate": 9.54931992161954e-06, "loss": 1.0406, "step": 5054 }, { "epoch": 0.5290423861852434, "grad_norm": 2.1729682113864537, "learning_rate": 9.54593367162943e-06, "loss": 0.9522, "step": 5055 }, { "epoch": 0.5291470434327578, "grad_norm": 1.9793023318843381, "learning_rate": 9.54254747381246e-06, "loss": 0.8517, "step": 5056 }, { "epoch": 0.5292517006802722, "grad_norm": 1.956291432464463, "learning_rate": 9.539161328557716e-06, "loss": 0.9517, "step": 5057 }, { "epoch": 0.5293563579277865, "grad_norm": 2.033082190907283, "learning_rate": 9.535775236254268e-06, "loss": 0.8648, "step": 5058 }, { "epoch": 0.5294610151753009, "grad_norm": 2.13270173634752, "learning_rate": 9.532389197291187e-06, "loss": 0.9357, "step": 5059 }, { "epoch": 0.5295656724228153, "grad_norm": 1.91157758973134, "learning_rate": 9.529003212057536e-06, "loss": 0.9013, "step": 5060 }, { "epoch": 0.5296703296703297, "grad_norm": 1.832312512024985, "learning_rate": 9.52561728094237e-06, "loss": 0.9018, "step": 5061 }, { "epoch": 0.5297749869178441, "grad_norm": 2.316257080957267, "learning_rate": 9.52223140433474e-06, "loss": 0.7679, "step": 5062 }, { "epoch": 0.5298796441653585, "grad_norm": 1.8496246062520718, "learning_rate": 9.518845582623692e-06, "loss": 0.9617, "step": 5063 }, { "epoch": 0.5299843014128729, "grad_norm": 2.0807591771920197, "learning_rate": 9.515459816198258e-06, "loss": 0.9035, "step": 5064 }, { "epoch": 0.5300889586603872, "grad_norm": 1.8998070623413483, "learning_rate": 9.512074105447473e-06, "loss": 0.9626, "step": 5065 }, { "epoch": 0.5301936159079016, "grad_norm": 1.9812096820937861, "learning_rate": 9.508688450760367e-06, "loss": 0.9601, "step": 5066 }, { "epoch": 0.530298273155416, "grad_norm": 2.147757889449252, "learning_rate": 9.50530285252595e-06, "loss": 1.0095, "step": 5067 }, { "epoch": 0.5304029304029304, "grad_norm": 1.8908877406835036, "learning_rate": 9.501917311133241e-06, "loss": 0.9554, "step": 5068 }, { "epoch": 0.5305075876504448, "grad_norm": 2.006086304429676, "learning_rate": 9.498531826971242e-06, "loss": 0.9882, "step": 5069 }, { "epoch": 0.5306122448979592, "grad_norm": 1.7842507152981133, "learning_rate": 9.495146400428953e-06, "loss": 0.9366, "step": 5070 }, { "epoch": 0.5307169021454736, "grad_norm": 2.046078498853944, "learning_rate": 9.491761031895364e-06, "loss": 0.876, "step": 5071 }, { "epoch": 0.5308215593929879, "grad_norm": 1.9166303496266188, "learning_rate": 9.488375721759467e-06, "loss": 0.8727, "step": 5072 }, { "epoch": 0.5309262166405023, "grad_norm": 1.8738691308012003, "learning_rate": 9.48499047041023e-06, "loss": 0.9591, "step": 5073 }, { "epoch": 0.5310308738880167, "grad_norm": 2.107065364309017, "learning_rate": 9.481605278236636e-06, "loss": 0.8822, "step": 5074 }, { "epoch": 0.5311355311355311, "grad_norm": 2.327347288791375, "learning_rate": 9.478220145627645e-06, "loss": 1.0899, "step": 5075 }, { "epoch": 0.5312401883830455, "grad_norm": 2.2223497049637713, "learning_rate": 9.47483507297222e-06, "loss": 0.8937, "step": 5076 }, { "epoch": 0.53134484563056, "grad_norm": 2.256580775897136, "learning_rate": 9.471450060659308e-06, "loss": 1.0627, "step": 5077 }, { "epoch": 0.5314495028780744, "grad_norm": 2.1603161263939628, "learning_rate": 9.468065109077858e-06, "loss": 0.8642, "step": 5078 }, { "epoch": 0.5315541601255886, "grad_norm": 2.021675968414649, "learning_rate": 9.464680218616801e-06, "loss": 0.9218, "step": 5079 }, { "epoch": 0.531658817373103, "grad_norm": 2.027162622909609, "learning_rate": 9.461295389665075e-06, "loss": 0.8784, "step": 5080 }, { "epoch": 0.5317634746206175, "grad_norm": 2.1737803034324896, "learning_rate": 9.457910622611599e-06, "loss": 0.9499, "step": 5081 }, { "epoch": 0.5318681318681319, "grad_norm": 2.1361694404225573, "learning_rate": 9.454525917845289e-06, "loss": 0.9557, "step": 5082 }, { "epoch": 0.5319727891156463, "grad_norm": 1.9555190602414432, "learning_rate": 9.451141275755061e-06, "loss": 1.0161, "step": 5083 }, { "epoch": 0.5320774463631607, "grad_norm": 2.0343065372865823, "learning_rate": 9.447756696729811e-06, "loss": 0.9702, "step": 5084 }, { "epoch": 0.5321821036106751, "grad_norm": 1.990208162802974, "learning_rate": 9.444372181158439e-06, "loss": 0.881, "step": 5085 }, { "epoch": 0.5322867608581894, "grad_norm": 1.9953926441133305, "learning_rate": 9.440987729429827e-06, "loss": 0.8863, "step": 5086 }, { "epoch": 0.5323914181057038, "grad_norm": 2.3741453667707257, "learning_rate": 9.437603341932861e-06, "loss": 0.8405, "step": 5087 }, { "epoch": 0.5324960753532182, "grad_norm": 2.0080306933110252, "learning_rate": 9.434219019056407e-06, "loss": 0.8811, "step": 5088 }, { "epoch": 0.5326007326007326, "grad_norm": 1.9052829376729115, "learning_rate": 9.430834761189339e-06, "loss": 0.9062, "step": 5089 }, { "epoch": 0.532705389848247, "grad_norm": 2.232762167686221, "learning_rate": 9.427450568720507e-06, "loss": 1.066, "step": 5090 }, { "epoch": 0.5328100470957614, "grad_norm": 1.7077102794519774, "learning_rate": 9.424066442038764e-06, "loss": 0.998, "step": 5091 }, { "epoch": 0.5329147043432758, "grad_norm": 2.0986156305726316, "learning_rate": 9.420682381532959e-06, "loss": 1.0063, "step": 5092 }, { "epoch": 0.5330193615907901, "grad_norm": 1.985756966883505, "learning_rate": 9.417298387591923e-06, "loss": 0.9973, "step": 5093 }, { "epoch": 0.5331240188383045, "grad_norm": 2.3041795560347555, "learning_rate": 9.41391446060448e-06, "loss": 1.0417, "step": 5094 }, { "epoch": 0.5332286760858189, "grad_norm": 2.3998949708050135, "learning_rate": 9.410530600959455e-06, "loss": 1.0162, "step": 5095 }, { "epoch": 0.5333333333333333, "grad_norm": 1.9102226487477791, "learning_rate": 9.407146809045658e-06, "loss": 0.8784, "step": 5096 }, { "epoch": 0.5334379905808477, "grad_norm": 1.9875471868758454, "learning_rate": 9.403763085251898e-06, "loss": 0.8156, "step": 5097 }, { "epoch": 0.5335426478283621, "grad_norm": 1.953172303607464, "learning_rate": 9.400379429966965e-06, "loss": 0.8733, "step": 5098 }, { "epoch": 0.5336473050758765, "grad_norm": 2.2056345622053537, "learning_rate": 9.396995843579648e-06, "loss": 0.9868, "step": 5099 }, { "epoch": 0.533751962323391, "grad_norm": 2.053690818273544, "learning_rate": 9.393612326478736e-06, "loss": 0.8402, "step": 5100 }, { "epoch": 0.5338566195709052, "grad_norm": 2.3399486682168775, "learning_rate": 9.390228879052995e-06, "loss": 1.043, "step": 5101 }, { "epoch": 0.5339612768184197, "grad_norm": 2.075427595574749, "learning_rate": 9.386845501691194e-06, "loss": 0.9583, "step": 5102 }, { "epoch": 0.5340659340659341, "grad_norm": 2.21323312612293, "learning_rate": 9.383462194782086e-06, "loss": 0.8203, "step": 5103 }, { "epoch": 0.5341705913134485, "grad_norm": 1.7638697451900374, "learning_rate": 9.380078958714424e-06, "loss": 0.8476, "step": 5104 }, { "epoch": 0.5342752485609629, "grad_norm": 2.1757947162837636, "learning_rate": 9.376695793876943e-06, "loss": 1.0289, "step": 5105 }, { "epoch": 0.5343799058084773, "grad_norm": 2.0498873657140675, "learning_rate": 9.373312700658381e-06, "loss": 0.9722, "step": 5106 }, { "epoch": 0.5344845630559917, "grad_norm": 2.146781521567094, "learning_rate": 9.36992967944746e-06, "loss": 0.9346, "step": 5107 }, { "epoch": 0.534589220303506, "grad_norm": 2.4924758465124643, "learning_rate": 9.366546730632892e-06, "loss": 0.9294, "step": 5108 }, { "epoch": 0.5346938775510204, "grad_norm": 2.1871982606239224, "learning_rate": 9.363163854603393e-06, "loss": 0.9268, "step": 5109 }, { "epoch": 0.5347985347985348, "grad_norm": 2.6130844536579145, "learning_rate": 9.359781051747658e-06, "loss": 0.8488, "step": 5110 }, { "epoch": 0.5349031920460492, "grad_norm": 2.3176010643788856, "learning_rate": 9.356398322454378e-06, "loss": 1.0246, "step": 5111 }, { "epoch": 0.5350078492935636, "grad_norm": 1.9976965271715834, "learning_rate": 9.353015667112236e-06, "loss": 0.9324, "step": 5112 }, { "epoch": 0.535112506541078, "grad_norm": 2.1264457121845073, "learning_rate": 9.349633086109904e-06, "loss": 0.957, "step": 5113 }, { "epoch": 0.5352171637885924, "grad_norm": 2.1082662476439897, "learning_rate": 9.34625057983605e-06, "loss": 0.8603, "step": 5114 }, { "epoch": 0.5353218210361067, "grad_norm": 2.081793729672057, "learning_rate": 9.342868148679328e-06, "loss": 0.9384, "step": 5115 }, { "epoch": 0.5354264782836211, "grad_norm": 1.8602117077983622, "learning_rate": 9.33948579302839e-06, "loss": 0.9818, "step": 5116 }, { "epoch": 0.5355311355311355, "grad_norm": 2.068748019729851, "learning_rate": 9.33610351327187e-06, "loss": 0.8313, "step": 5117 }, { "epoch": 0.5356357927786499, "grad_norm": 2.250399416328852, "learning_rate": 9.332721309798403e-06, "loss": 0.9082, "step": 5118 }, { "epoch": 0.5357404500261643, "grad_norm": 2.0089516460747068, "learning_rate": 9.329339182996614e-06, "loss": 1.0031, "step": 5119 }, { "epoch": 0.5358451072736787, "grad_norm": 2.1921142529491062, "learning_rate": 9.325957133255107e-06, "loss": 0.9773, "step": 5120 }, { "epoch": 0.5359497645211931, "grad_norm": 1.7765848948209197, "learning_rate": 9.322575160962497e-06, "loss": 0.8713, "step": 5121 }, { "epoch": 0.5360544217687074, "grad_norm": 2.2822852338608133, "learning_rate": 9.319193266507371e-06, "loss": 1.0108, "step": 5122 }, { "epoch": 0.5361590790162218, "grad_norm": 1.9080359520771442, "learning_rate": 9.315811450278322e-06, "loss": 1.0417, "step": 5123 }, { "epoch": 0.5362637362637362, "grad_norm": 2.126848470721643, "learning_rate": 9.312429712663922e-06, "loss": 1.0819, "step": 5124 }, { "epoch": 0.5363683935112507, "grad_norm": 1.9448417891971947, "learning_rate": 9.309048054052738e-06, "loss": 1.0022, "step": 5125 }, { "epoch": 0.5364730507587651, "grad_norm": 2.0110601683746716, "learning_rate": 9.30566647483334e-06, "loss": 0.9406, "step": 5126 }, { "epoch": 0.5365777080062795, "grad_norm": 2.059272200316169, "learning_rate": 9.302284975394267e-06, "loss": 0.9979, "step": 5127 }, { "epoch": 0.5366823652537939, "grad_norm": 1.9516944950557442, "learning_rate": 9.298903556124069e-06, "loss": 0.964, "step": 5128 }, { "epoch": 0.5367870225013082, "grad_norm": 1.9463999538178405, "learning_rate": 9.29552221741127e-06, "loss": 0.9758, "step": 5129 }, { "epoch": 0.5368916797488226, "grad_norm": 1.98796486302683, "learning_rate": 9.292140959644398e-06, "loss": 0.9545, "step": 5130 }, { "epoch": 0.536996336996337, "grad_norm": 1.9929189015148039, "learning_rate": 9.288759783211967e-06, "loss": 0.9298, "step": 5131 }, { "epoch": 0.5371009942438514, "grad_norm": 2.1502952424043196, "learning_rate": 9.285378688502475e-06, "loss": 0.9023, "step": 5132 }, { "epoch": 0.5372056514913658, "grad_norm": 1.8849112778207193, "learning_rate": 9.281997675904423e-06, "loss": 0.9625, "step": 5133 }, { "epoch": 0.5373103087388802, "grad_norm": 2.2034978552750926, "learning_rate": 9.27861674580629e-06, "loss": 0.8996, "step": 5134 }, { "epoch": 0.5374149659863946, "grad_norm": 2.1228025728157514, "learning_rate": 9.275235898596555e-06, "loss": 0.8977, "step": 5135 }, { "epoch": 0.5375196232339089, "grad_norm": 2.155478741058007, "learning_rate": 9.271855134663689e-06, "loss": 1.0623, "step": 5136 }, { "epoch": 0.5376242804814233, "grad_norm": 2.017078129664075, "learning_rate": 9.26847445439614e-06, "loss": 0.9586, "step": 5137 }, { "epoch": 0.5377289377289377, "grad_norm": 2.2424961468183295, "learning_rate": 9.265093858182364e-06, "loss": 1.0009, "step": 5138 }, { "epoch": 0.5378335949764521, "grad_norm": 1.9391397613594792, "learning_rate": 9.26171334641079e-06, "loss": 0.831, "step": 5139 }, { "epoch": 0.5379382522239665, "grad_norm": 2.041390251270642, "learning_rate": 9.258332919469851e-06, "loss": 0.9798, "step": 5140 }, { "epoch": 0.5380429094714809, "grad_norm": 2.086322474319101, "learning_rate": 9.25495257774796e-06, "loss": 1.0148, "step": 5141 }, { "epoch": 0.5381475667189953, "grad_norm": 1.9006136542065337, "learning_rate": 9.251572321633533e-06, "loss": 0.9227, "step": 5142 }, { "epoch": 0.5382522239665097, "grad_norm": 1.9414787885775822, "learning_rate": 9.248192151514957e-06, "loss": 0.8972, "step": 5143 }, { "epoch": 0.538356881214024, "grad_norm": 1.8618340965139868, "learning_rate": 9.24481206778063e-06, "loss": 1.0364, "step": 5144 }, { "epoch": 0.5384615384615384, "grad_norm": 1.7750231543939474, "learning_rate": 9.241432070818929e-06, "loss": 0.8416, "step": 5145 }, { "epoch": 0.5385661957090528, "grad_norm": 1.7192970430261034, "learning_rate": 9.23805216101822e-06, "loss": 0.8291, "step": 5146 }, { "epoch": 0.5386708529565672, "grad_norm": 2.267675160306062, "learning_rate": 9.234672338766863e-06, "loss": 0.8009, "step": 5147 }, { "epoch": 0.5387755102040817, "grad_norm": 1.9151967169817872, "learning_rate": 9.231292604453203e-06, "loss": 0.983, "step": 5148 }, { "epoch": 0.5388801674515961, "grad_norm": 1.8860340166769067, "learning_rate": 9.227912958465584e-06, "loss": 0.9827, "step": 5149 }, { "epoch": 0.5389848246991105, "grad_norm": 2.377402452195343, "learning_rate": 9.224533401192332e-06, "loss": 0.7757, "step": 5150 }, { "epoch": 0.5390894819466248, "grad_norm": 2.1940029519811444, "learning_rate": 9.221153933021759e-06, "loss": 0.9636, "step": 5151 }, { "epoch": 0.5391941391941392, "grad_norm": 2.0338541104057266, "learning_rate": 9.21777455434218e-06, "loss": 1.0547, "step": 5152 }, { "epoch": 0.5392987964416536, "grad_norm": 2.9722612954759255, "learning_rate": 9.214395265541894e-06, "loss": 0.9978, "step": 5153 }, { "epoch": 0.539403453689168, "grad_norm": 2.1825836270355654, "learning_rate": 9.211016067009182e-06, "loss": 0.8928, "step": 5154 }, { "epoch": 0.5395081109366824, "grad_norm": 2.0138240313959543, "learning_rate": 9.207636959132325e-06, "loss": 0.8572, "step": 5155 }, { "epoch": 0.5396127681841968, "grad_norm": 2.3175892149524966, "learning_rate": 9.204257942299585e-06, "loss": 1.0819, "step": 5156 }, { "epoch": 0.5397174254317112, "grad_norm": 2.235711126597796, "learning_rate": 9.200879016899223e-06, "loss": 0.8762, "step": 5157 }, { "epoch": 0.5398220826792255, "grad_norm": 2.450747499531445, "learning_rate": 9.197500183319481e-06, "loss": 1.0596, "step": 5158 }, { "epoch": 0.5399267399267399, "grad_norm": 2.3432608161807513, "learning_rate": 9.194121441948596e-06, "loss": 0.9195, "step": 5159 }, { "epoch": 0.5400313971742543, "grad_norm": 2.2491282476173775, "learning_rate": 9.190742793174786e-06, "loss": 0.8444, "step": 5160 }, { "epoch": 0.5401360544217687, "grad_norm": 2.3639647455866895, "learning_rate": 9.187364237386273e-06, "loss": 0.9589, "step": 5161 }, { "epoch": 0.5402407116692831, "grad_norm": 2.359812971612293, "learning_rate": 9.183985774971258e-06, "loss": 0.8739, "step": 5162 }, { "epoch": 0.5403453689167975, "grad_norm": 1.7777656507828674, "learning_rate": 9.18060740631793e-06, "loss": 0.8449, "step": 5163 }, { "epoch": 0.5404500261643119, "grad_norm": 2.173125028700542, "learning_rate": 9.177229131814473e-06, "loss": 0.9589, "step": 5164 }, { "epoch": 0.5405546834118262, "grad_norm": 2.1583802338493427, "learning_rate": 9.173850951849057e-06, "loss": 1.0773, "step": 5165 }, { "epoch": 0.5406593406593406, "grad_norm": 1.897622605941461, "learning_rate": 9.170472866809842e-06, "loss": 0.9341, "step": 5166 }, { "epoch": 0.540763997906855, "grad_norm": 1.884134801109637, "learning_rate": 9.167094877084974e-06, "loss": 0.9404, "step": 5167 }, { "epoch": 0.5408686551543694, "grad_norm": 2.1870995357335525, "learning_rate": 9.163716983062594e-06, "loss": 0.8965, "step": 5168 }, { "epoch": 0.5409733124018838, "grad_norm": 2.141040011803557, "learning_rate": 9.160339185130825e-06, "loss": 0.9386, "step": 5169 }, { "epoch": 0.5410779696493982, "grad_norm": 2.042779608504371, "learning_rate": 9.15696148367779e-06, "loss": 0.8375, "step": 5170 }, { "epoch": 0.5411826268969127, "grad_norm": 1.8593064018228953, "learning_rate": 9.153583879091588e-06, "loss": 0.8555, "step": 5171 }, { "epoch": 0.541287284144427, "grad_norm": 2.3293817595274056, "learning_rate": 9.150206371760315e-06, "loss": 0.9834, "step": 5172 }, { "epoch": 0.5413919413919414, "grad_norm": 2.0079430212166627, "learning_rate": 9.146828962072051e-06, "loss": 0.9392, "step": 5173 }, { "epoch": 0.5414965986394558, "grad_norm": 2.1119483487510355, "learning_rate": 9.143451650414871e-06, "loss": 1.0077, "step": 5174 }, { "epoch": 0.5416012558869702, "grad_norm": 1.9355398541105262, "learning_rate": 9.140074437176831e-06, "loss": 0.9627, "step": 5175 }, { "epoch": 0.5417059131344846, "grad_norm": 2.066119618087384, "learning_rate": 9.136697322745984e-06, "loss": 0.9772, "step": 5176 }, { "epoch": 0.541810570381999, "grad_norm": 1.8564381011561766, "learning_rate": 9.133320307510359e-06, "loss": 0.8735, "step": 5177 }, { "epoch": 0.5419152276295134, "grad_norm": 1.9999578854370876, "learning_rate": 9.129943391857992e-06, "loss": 1.0295, "step": 5178 }, { "epoch": 0.5420198848770277, "grad_norm": 2.264031570128424, "learning_rate": 9.126566576176893e-06, "loss": 0.7778, "step": 5179 }, { "epoch": 0.5421245421245421, "grad_norm": 1.8699876284645591, "learning_rate": 9.123189860855064e-06, "loss": 0.9129, "step": 5180 }, { "epoch": 0.5422291993720565, "grad_norm": 1.9231443138469622, "learning_rate": 9.119813246280499e-06, "loss": 0.9216, "step": 5181 }, { "epoch": 0.5423338566195709, "grad_norm": 1.7836314115264373, "learning_rate": 9.116436732841174e-06, "loss": 0.9005, "step": 5182 }, { "epoch": 0.5424385138670853, "grad_norm": 2.2350357880443448, "learning_rate": 9.113060320925063e-06, "loss": 0.9321, "step": 5183 }, { "epoch": 0.5425431711145997, "grad_norm": 2.079863133130194, "learning_rate": 9.109684010920116e-06, "loss": 0.8967, "step": 5184 }, { "epoch": 0.5426478283621141, "grad_norm": 2.2139801096197553, "learning_rate": 9.106307803214283e-06, "loss": 1.0516, "step": 5185 }, { "epoch": 0.5427524856096285, "grad_norm": 2.179734467120083, "learning_rate": 9.102931698195491e-06, "loss": 0.7468, "step": 5186 }, { "epoch": 0.5428571428571428, "grad_norm": 2.157564716376065, "learning_rate": 9.099555696251667e-06, "loss": 0.9357, "step": 5187 }, { "epoch": 0.5429618001046572, "grad_norm": 2.041100938324909, "learning_rate": 9.096179797770723e-06, "loss": 0.9291, "step": 5188 }, { "epoch": 0.5430664573521716, "grad_norm": 1.8303406459070106, "learning_rate": 9.09280400314055e-06, "loss": 0.8139, "step": 5189 }, { "epoch": 0.543171114599686, "grad_norm": 1.8598137235962091, "learning_rate": 9.089428312749036e-06, "loss": 0.8703, "step": 5190 }, { "epoch": 0.5432757718472004, "grad_norm": 2.0757977617675945, "learning_rate": 9.086052726984057e-06, "loss": 0.9464, "step": 5191 }, { "epoch": 0.5433804290947148, "grad_norm": 1.891684900539831, "learning_rate": 9.08267724623347e-06, "loss": 0.9067, "step": 5192 }, { "epoch": 0.5434850863422293, "grad_norm": 1.9766032826136062, "learning_rate": 9.079301870885126e-06, "loss": 0.9417, "step": 5193 }, { "epoch": 0.5435897435897435, "grad_norm": 2.1848428050243824, "learning_rate": 9.075926601326865e-06, "loss": 0.8109, "step": 5194 }, { "epoch": 0.543694400837258, "grad_norm": 1.928000464883851, "learning_rate": 9.072551437946508e-06, "loss": 0.9048, "step": 5195 }, { "epoch": 0.5437990580847724, "grad_norm": 2.2851706994938477, "learning_rate": 9.069176381131874e-06, "loss": 1.0057, "step": 5196 }, { "epoch": 0.5439037153322868, "grad_norm": 2.2316974873884634, "learning_rate": 9.06580143127076e-06, "loss": 0.9539, "step": 5197 }, { "epoch": 0.5440083725798012, "grad_norm": 2.431415213869758, "learning_rate": 9.062426588750956e-06, "loss": 0.9679, "step": 5198 }, { "epoch": 0.5441130298273156, "grad_norm": 1.8303873361505676, "learning_rate": 9.059051853960237e-06, "loss": 1.0003, "step": 5199 }, { "epoch": 0.54421768707483, "grad_norm": 1.9180411674794864, "learning_rate": 9.05567722728637e-06, "loss": 0.941, "step": 5200 }, { "epoch": 0.5443223443223443, "grad_norm": 2.1309566319139055, "learning_rate": 9.0523027091171e-06, "loss": 0.9292, "step": 5201 }, { "epoch": 0.5444270015698587, "grad_norm": 1.9920223205280956, "learning_rate": 9.048928299840172e-06, "loss": 0.9156, "step": 5202 }, { "epoch": 0.5445316588173731, "grad_norm": 1.9755399701908867, "learning_rate": 9.04555399984331e-06, "loss": 1.0254, "step": 5203 }, { "epoch": 0.5446363160648875, "grad_norm": 1.867791911625654, "learning_rate": 9.042179809514225e-06, "loss": 0.9065, "step": 5204 }, { "epoch": 0.5447409733124019, "grad_norm": 2.142420704324338, "learning_rate": 9.038805729240626e-06, "loss": 0.9879, "step": 5205 }, { "epoch": 0.5448456305599163, "grad_norm": 1.8804509672023069, "learning_rate": 9.035431759410194e-06, "loss": 0.8701, "step": 5206 }, { "epoch": 0.5449502878074307, "grad_norm": 1.8907052172885805, "learning_rate": 9.032057900410612e-06, "loss": 0.9644, "step": 5207 }, { "epoch": 0.545054945054945, "grad_norm": 2.3910996543132965, "learning_rate": 9.02868415262954e-06, "loss": 0.9196, "step": 5208 }, { "epoch": 0.5451596023024594, "grad_norm": 3.681066797684883, "learning_rate": 9.025310516454623e-06, "loss": 1.0363, "step": 5209 }, { "epoch": 0.5452642595499738, "grad_norm": 1.6994987522917748, "learning_rate": 9.021936992273508e-06, "loss": 0.8546, "step": 5210 }, { "epoch": 0.5453689167974882, "grad_norm": 2.5337992006092405, "learning_rate": 9.018563580473814e-06, "loss": 0.9382, "step": 5211 }, { "epoch": 0.5454735740450026, "grad_norm": 1.8493509787452933, "learning_rate": 9.01519028144315e-06, "loss": 0.8746, "step": 5212 }, { "epoch": 0.545578231292517, "grad_norm": 1.9579976590736436, "learning_rate": 9.011817095569124e-06, "loss": 0.9685, "step": 5213 }, { "epoch": 0.5456828885400314, "grad_norm": 2.1333444289770087, "learning_rate": 9.008444023239315e-06, "loss": 0.7952, "step": 5214 }, { "epoch": 0.5457875457875457, "grad_norm": 2.3503433202907478, "learning_rate": 9.005071064841299e-06, "loss": 0.88, "step": 5215 }, { "epoch": 0.5458922030350601, "grad_norm": 2.2546724085226844, "learning_rate": 9.001698220762634e-06, "loss": 0.9255, "step": 5216 }, { "epoch": 0.5459968602825745, "grad_norm": 2.356440368535242, "learning_rate": 8.998325491390869e-06, "loss": 1.0099, "step": 5217 }, { "epoch": 0.546101517530089, "grad_norm": 2.2034666717622033, "learning_rate": 8.994952877113533e-06, "loss": 1.0675, "step": 5218 }, { "epoch": 0.5462061747776034, "grad_norm": 1.9688211472663686, "learning_rate": 8.991580378318151e-06, "loss": 0.845, "step": 5219 }, { "epoch": 0.5463108320251178, "grad_norm": 2.1233709050192213, "learning_rate": 8.988207995392227e-06, "loss": 1.1076, "step": 5220 }, { "epoch": 0.5464154892726322, "grad_norm": 2.2100117848067886, "learning_rate": 8.984835728723251e-06, "loss": 0.9292, "step": 5221 }, { "epoch": 0.5465201465201465, "grad_norm": 2.00762864700889, "learning_rate": 8.981463578698714e-06, "loss": 0.9889, "step": 5222 }, { "epoch": 0.5466248037676609, "grad_norm": 2.000506555020569, "learning_rate": 8.978091545706074e-06, "loss": 0.9671, "step": 5223 }, { "epoch": 0.5467294610151753, "grad_norm": 1.9318729631393265, "learning_rate": 8.974719630132788e-06, "loss": 0.9534, "step": 5224 }, { "epoch": 0.5468341182626897, "grad_norm": 2.704458307866667, "learning_rate": 8.971347832366296e-06, "loss": 0.9215, "step": 5225 }, { "epoch": 0.5469387755102041, "grad_norm": 1.8785357601729604, "learning_rate": 8.96797615279402e-06, "loss": 0.8026, "step": 5226 }, { "epoch": 0.5470434327577185, "grad_norm": 1.982632154167765, "learning_rate": 8.96460459180338e-06, "loss": 0.9695, "step": 5227 }, { "epoch": 0.5471480900052329, "grad_norm": 2.143564500630684, "learning_rate": 8.961233149781768e-06, "loss": 0.939, "step": 5228 }, { "epoch": 0.5472527472527473, "grad_norm": 2.203497996122728, "learning_rate": 8.957861827116576e-06, "loss": 0.9991, "step": 5229 }, { "epoch": 0.5473574045002616, "grad_norm": 1.9379036312231492, "learning_rate": 8.954490624195169e-06, "loss": 0.8894, "step": 5230 }, { "epoch": 0.547462061747776, "grad_norm": 2.0508974667099245, "learning_rate": 8.951119541404908e-06, "loss": 1.0449, "step": 5231 }, { "epoch": 0.5475667189952904, "grad_norm": 2.588002315713727, "learning_rate": 8.947748579133143e-06, "loss": 0.9373, "step": 5232 }, { "epoch": 0.5476713762428048, "grad_norm": 2.38449359167579, "learning_rate": 8.944377737767195e-06, "loss": 1.041, "step": 5233 }, { "epoch": 0.5477760334903192, "grad_norm": 1.9288662968847525, "learning_rate": 8.94100701769439e-06, "loss": 0.9473, "step": 5234 }, { "epoch": 0.5478806907378336, "grad_norm": 2.1753526717070155, "learning_rate": 8.93763641930202e-06, "loss": 0.8533, "step": 5235 }, { "epoch": 0.547985347985348, "grad_norm": 2.214682121752073, "learning_rate": 8.934265942977383e-06, "loss": 1.0522, "step": 5236 }, { "epoch": 0.5480900052328623, "grad_norm": 1.9884467197972144, "learning_rate": 8.930895589107748e-06, "loss": 1.0617, "step": 5237 }, { "epoch": 0.5481946624803767, "grad_norm": 1.8273085911630584, "learning_rate": 8.927525358080374e-06, "loss": 0.8846, "step": 5238 }, { "epoch": 0.5482993197278911, "grad_norm": 2.132364155223627, "learning_rate": 8.924155250282515e-06, "loss": 0.8767, "step": 5239 }, { "epoch": 0.5484039769754055, "grad_norm": 2.1653790608930557, "learning_rate": 8.920785266101396e-06, "loss": 1.0162, "step": 5240 }, { "epoch": 0.54850863422292, "grad_norm": 1.9836556420176448, "learning_rate": 8.917415405924241e-06, "loss": 0.9864, "step": 5241 }, { "epoch": 0.5486132914704344, "grad_norm": 2.497343830248175, "learning_rate": 8.914045670138247e-06, "loss": 1.0194, "step": 5242 }, { "epoch": 0.5487179487179488, "grad_norm": 2.2357270888989262, "learning_rate": 8.910676059130611e-06, "loss": 1.0598, "step": 5243 }, { "epoch": 0.5488226059654631, "grad_norm": 2.076847097610531, "learning_rate": 8.907306573288502e-06, "loss": 0.916, "step": 5244 }, { "epoch": 0.5489272632129775, "grad_norm": 1.9802029893372697, "learning_rate": 8.903937212999082e-06, "loss": 0.843, "step": 5245 }, { "epoch": 0.5490319204604919, "grad_norm": 2.0192766915384155, "learning_rate": 8.900567978649501e-06, "loss": 0.9069, "step": 5246 }, { "epoch": 0.5491365777080063, "grad_norm": 2.432583695671836, "learning_rate": 8.897198870626882e-06, "loss": 0.8992, "step": 5247 }, { "epoch": 0.5492412349555207, "grad_norm": 2.213741486304002, "learning_rate": 8.893829889318352e-06, "loss": 0.8917, "step": 5248 }, { "epoch": 0.5493458922030351, "grad_norm": 2.135434999613721, "learning_rate": 8.89046103511101e-06, "loss": 0.9377, "step": 5249 }, { "epoch": 0.5494505494505495, "grad_norm": 2.122303160131647, "learning_rate": 8.887092308391945e-06, "loss": 0.9298, "step": 5250 }, { "epoch": 0.5495552066980638, "grad_norm": 2.2880405163464532, "learning_rate": 8.883723709548232e-06, "loss": 1.0074, "step": 5251 }, { "epoch": 0.5496598639455782, "grad_norm": 1.9274854009826472, "learning_rate": 8.880355238966923e-06, "loss": 0.8281, "step": 5252 }, { "epoch": 0.5497645211930926, "grad_norm": 1.9114757164675305, "learning_rate": 8.87698689703507e-06, "loss": 0.9118, "step": 5253 }, { "epoch": 0.549869178440607, "grad_norm": 2.1358804324224088, "learning_rate": 8.873618684139697e-06, "loss": 0.9843, "step": 5254 }, { "epoch": 0.5499738356881214, "grad_norm": 2.1558357260108205, "learning_rate": 8.870250600667823e-06, "loss": 1.0874, "step": 5255 }, { "epoch": 0.5500784929356358, "grad_norm": 2.4247493566253926, "learning_rate": 8.866882647006441e-06, "loss": 0.9887, "step": 5256 }, { "epoch": 0.5501831501831502, "grad_norm": 2.028235978688342, "learning_rate": 8.863514823542542e-06, "loss": 0.9167, "step": 5257 }, { "epoch": 0.5502878074306645, "grad_norm": 1.9043186883483203, "learning_rate": 8.860147130663096e-06, "loss": 0.8844, "step": 5258 }, { "epoch": 0.5503924646781789, "grad_norm": 2.8270360697499566, "learning_rate": 8.856779568755052e-06, "loss": 0.9991, "step": 5259 }, { "epoch": 0.5504971219256933, "grad_norm": 1.942578281067632, "learning_rate": 8.853412138205357e-06, "loss": 0.9976, "step": 5260 }, { "epoch": 0.5506017791732077, "grad_norm": 2.2072497818987467, "learning_rate": 8.85004483940093e-06, "loss": 0.9079, "step": 5261 }, { "epoch": 0.5507064364207221, "grad_norm": 1.958197617874853, "learning_rate": 8.846677672728683e-06, "loss": 1.0171, "step": 5262 }, { "epoch": 0.5508110936682366, "grad_norm": 1.946933390805341, "learning_rate": 8.84331063857551e-06, "loss": 0.9166, "step": 5263 }, { "epoch": 0.550915750915751, "grad_norm": 2.099808823892071, "learning_rate": 8.839943737328282e-06, "loss": 0.9827, "step": 5264 }, { "epoch": 0.5510204081632653, "grad_norm": 2.018139469461617, "learning_rate": 8.836576969373877e-06, "loss": 0.9721, "step": 5265 }, { "epoch": 0.5511250654107797, "grad_norm": 2.297998216881771, "learning_rate": 8.833210335099137e-06, "loss": 0.9523, "step": 5266 }, { "epoch": 0.5512297226582941, "grad_norm": 2.0134991695196005, "learning_rate": 8.829843834890893e-06, "loss": 0.975, "step": 5267 }, { "epoch": 0.5513343799058085, "grad_norm": 2.150846758924609, "learning_rate": 8.826477469135967e-06, "loss": 0.8611, "step": 5268 }, { "epoch": 0.5514390371533229, "grad_norm": 1.82333106005884, "learning_rate": 8.823111238221156e-06, "loss": 0.9238, "step": 5269 }, { "epoch": 0.5515436944008373, "grad_norm": 2.4050851629812655, "learning_rate": 8.819745142533252e-06, "loss": 1.0834, "step": 5270 }, { "epoch": 0.5516483516483517, "grad_norm": 2.28131876935219, "learning_rate": 8.81637918245902e-06, "loss": 0.9526, "step": 5271 }, { "epoch": 0.5517530088958661, "grad_norm": 2.1174671106347236, "learning_rate": 8.813013358385223e-06, "loss": 0.9436, "step": 5272 }, { "epoch": 0.5518576661433804, "grad_norm": 1.9671009087310183, "learning_rate": 8.809647670698594e-06, "loss": 1.0025, "step": 5273 }, { "epoch": 0.5519623233908948, "grad_norm": 2.052327149623945, "learning_rate": 8.806282119785861e-06, "loss": 0.8248, "step": 5274 }, { "epoch": 0.5520669806384092, "grad_norm": 2.384339181879582, "learning_rate": 8.802916706033734e-06, "loss": 1.008, "step": 5275 }, { "epoch": 0.5521716378859236, "grad_norm": 1.8901455647678653, "learning_rate": 8.799551429828906e-06, "loss": 0.8009, "step": 5276 }, { "epoch": 0.552276295133438, "grad_norm": 1.9504635654515794, "learning_rate": 8.796186291558052e-06, "loss": 0.9597, "step": 5277 }, { "epoch": 0.5523809523809524, "grad_norm": 2.3275789921544017, "learning_rate": 8.79282129160783e-06, "loss": 0.8494, "step": 5278 }, { "epoch": 0.5524856096284668, "grad_norm": 1.835713336563086, "learning_rate": 8.789456430364892e-06, "loss": 0.9284, "step": 5279 }, { "epoch": 0.5525902668759811, "grad_norm": 2.017436152195592, "learning_rate": 8.786091708215863e-06, "loss": 0.8889, "step": 5280 }, { "epoch": 0.5526949241234955, "grad_norm": 1.959115114298543, "learning_rate": 8.782727125547358e-06, "loss": 0.8949, "step": 5281 }, { "epoch": 0.5527995813710099, "grad_norm": 2.0528665958582093, "learning_rate": 8.779362682745971e-06, "loss": 0.9803, "step": 5282 }, { "epoch": 0.5529042386185243, "grad_norm": 1.8604560176356941, "learning_rate": 8.77599838019829e-06, "loss": 0.923, "step": 5283 }, { "epoch": 0.5530088958660387, "grad_norm": 1.800262625922265, "learning_rate": 8.772634218290874e-06, "loss": 0.9148, "step": 5284 }, { "epoch": 0.5531135531135531, "grad_norm": 2.4993048236879525, "learning_rate": 8.769270197410277e-06, "loss": 0.9607, "step": 5285 }, { "epoch": 0.5532182103610676, "grad_norm": 1.8252037891658632, "learning_rate": 8.765906317943027e-06, "loss": 0.8508, "step": 5286 }, { "epoch": 0.5533228676085818, "grad_norm": 2.238304823724985, "learning_rate": 8.762542580275644e-06, "loss": 0.9228, "step": 5287 }, { "epoch": 0.5534275248560963, "grad_norm": 2.1072228350806896, "learning_rate": 8.759178984794627e-06, "loss": 0.8588, "step": 5288 }, { "epoch": 0.5535321821036107, "grad_norm": 1.9622546540770844, "learning_rate": 8.755815531886461e-06, "loss": 0.9111, "step": 5289 }, { "epoch": 0.5536368393511251, "grad_norm": 2.2818234572001144, "learning_rate": 8.752452221937608e-06, "loss": 1.0615, "step": 5290 }, { "epoch": 0.5537414965986395, "grad_norm": 2.1750822391853344, "learning_rate": 8.749089055334527e-06, "loss": 0.8601, "step": 5291 }, { "epoch": 0.5538461538461539, "grad_norm": 2.0616636889051896, "learning_rate": 8.74572603246365e-06, "loss": 1.0359, "step": 5292 }, { "epoch": 0.5539508110936683, "grad_norm": 2.710571314021102, "learning_rate": 8.742363153711396e-06, "loss": 1.0601, "step": 5293 }, { "epoch": 0.5540554683411826, "grad_norm": 2.6145632006844886, "learning_rate": 8.739000419464164e-06, "loss": 1.0233, "step": 5294 }, { "epoch": 0.554160125588697, "grad_norm": 1.9109618531413386, "learning_rate": 8.735637830108337e-06, "loss": 0.9872, "step": 5295 }, { "epoch": 0.5542647828362114, "grad_norm": 2.1467379854230995, "learning_rate": 8.732275386030291e-06, "loss": 0.9363, "step": 5296 }, { "epoch": 0.5543694400837258, "grad_norm": 2.0056169978726786, "learning_rate": 8.72891308761637e-06, "loss": 0.989, "step": 5297 }, { "epoch": 0.5544740973312402, "grad_norm": 2.1780493741262608, "learning_rate": 8.725550935252914e-06, "loss": 1.0403, "step": 5298 }, { "epoch": 0.5545787545787546, "grad_norm": 2.21960346530747, "learning_rate": 8.722188929326237e-06, "loss": 0.9504, "step": 5299 }, { "epoch": 0.554683411826269, "grad_norm": 2.1450710730738116, "learning_rate": 8.718827070222642e-06, "loss": 1.1257, "step": 5300 }, { "epoch": 0.5547880690737833, "grad_norm": 2.228582965255394, "learning_rate": 8.715465358328417e-06, "loss": 0.9697, "step": 5301 }, { "epoch": 0.5548927263212977, "grad_norm": 2.4830808941857554, "learning_rate": 8.712103794029824e-06, "loss": 0.8945, "step": 5302 }, { "epoch": 0.5549973835688121, "grad_norm": 2.2847911242856656, "learning_rate": 8.708742377713118e-06, "loss": 0.9817, "step": 5303 }, { "epoch": 0.5551020408163265, "grad_norm": 2.1604390174895416, "learning_rate": 8.705381109764529e-06, "loss": 0.9853, "step": 5304 }, { "epoch": 0.5552066980638409, "grad_norm": 2.2396160300426082, "learning_rate": 8.702019990570274e-06, "loss": 0.8813, "step": 5305 }, { "epoch": 0.5553113553113553, "grad_norm": 2.130623108079099, "learning_rate": 8.698659020516556e-06, "loss": 0.8336, "step": 5306 }, { "epoch": 0.5554160125588697, "grad_norm": 1.9908826133921063, "learning_rate": 8.695298199989552e-06, "loss": 0.9344, "step": 5307 }, { "epoch": 0.555520669806384, "grad_norm": 1.9711870901565465, "learning_rate": 8.691937529375428e-06, "loss": 0.9166, "step": 5308 }, { "epoch": 0.5556253270538984, "grad_norm": 1.9835386623262095, "learning_rate": 8.688577009060338e-06, "loss": 0.9319, "step": 5309 }, { "epoch": 0.5557299843014128, "grad_norm": 2.3085235533894126, "learning_rate": 8.685216639430405e-06, "loss": 0.8664, "step": 5310 }, { "epoch": 0.5558346415489273, "grad_norm": 3.3332655720477895, "learning_rate": 8.681856420871747e-06, "loss": 0.8843, "step": 5311 }, { "epoch": 0.5559392987964417, "grad_norm": 1.8582951572007596, "learning_rate": 8.678496353770458e-06, "loss": 0.8867, "step": 5312 }, { "epoch": 0.5560439560439561, "grad_norm": 2.173807723457474, "learning_rate": 8.67513643851262e-06, "loss": 0.9826, "step": 5313 }, { "epoch": 0.5561486132914705, "grad_norm": 1.7581129151706683, "learning_rate": 8.671776675484288e-06, "loss": 0.9056, "step": 5314 }, { "epoch": 0.5562532705389849, "grad_norm": 2.1180334877984017, "learning_rate": 8.66841706507151e-06, "loss": 0.9616, "step": 5315 }, { "epoch": 0.5563579277864992, "grad_norm": 2.508246391771904, "learning_rate": 8.665057607660305e-06, "loss": 0.9179, "step": 5316 }, { "epoch": 0.5564625850340136, "grad_norm": 1.9132610573755788, "learning_rate": 8.661698303636692e-06, "loss": 0.8745, "step": 5317 }, { "epoch": 0.556567242281528, "grad_norm": 2.4958112187692767, "learning_rate": 8.658339153386658e-06, "loss": 0.9097, "step": 5318 }, { "epoch": 0.5566718995290424, "grad_norm": 2.249652224152348, "learning_rate": 8.654980157296173e-06, "loss": 1.0163, "step": 5319 }, { "epoch": 0.5567765567765568, "grad_norm": 2.1636612203919787, "learning_rate": 8.651621315751197e-06, "loss": 1.0146, "step": 5320 }, { "epoch": 0.5568812140240712, "grad_norm": 2.4873635948113484, "learning_rate": 8.648262629137663e-06, "loss": 1.0301, "step": 5321 }, { "epoch": 0.5569858712715856, "grad_norm": 2.1298989438781044, "learning_rate": 8.644904097841494e-06, "loss": 0.979, "step": 5322 }, { "epoch": 0.5570905285190999, "grad_norm": 2.296389158373194, "learning_rate": 8.641545722248592e-06, "loss": 0.9278, "step": 5323 }, { "epoch": 0.5571951857666143, "grad_norm": 2.2433733862004055, "learning_rate": 8.638187502744838e-06, "loss": 0.8756, "step": 5324 }, { "epoch": 0.5572998430141287, "grad_norm": 1.9895145461612076, "learning_rate": 8.634829439716099e-06, "loss": 0.9287, "step": 5325 }, { "epoch": 0.5574045002616431, "grad_norm": 1.8763204041932915, "learning_rate": 8.63147153354823e-06, "loss": 0.875, "step": 5326 }, { "epoch": 0.5575091575091575, "grad_norm": 2.2272253472444103, "learning_rate": 8.628113784627053e-06, "loss": 0.9695, "step": 5327 }, { "epoch": 0.5576138147566719, "grad_norm": 1.972305846861178, "learning_rate": 8.624756193338384e-06, "loss": 0.9767, "step": 5328 }, { "epoch": 0.5577184720041863, "grad_norm": 2.3386342244281124, "learning_rate": 8.621398760068017e-06, "loss": 0.9315, "step": 5329 }, { "epoch": 0.5578231292517006, "grad_norm": 2.3263287856510773, "learning_rate": 8.618041485201728e-06, "loss": 0.8489, "step": 5330 }, { "epoch": 0.557927786499215, "grad_norm": 2.1728955209423315, "learning_rate": 8.61468436912527e-06, "loss": 0.8711, "step": 5331 }, { "epoch": 0.5580324437467294, "grad_norm": 1.9615782696853798, "learning_rate": 8.61132741222439e-06, "loss": 0.9714, "step": 5332 }, { "epoch": 0.5581371009942439, "grad_norm": 1.997852261589968, "learning_rate": 8.607970614884804e-06, "loss": 0.7903, "step": 5333 }, { "epoch": 0.5582417582417583, "grad_norm": 2.0318840672120375, "learning_rate": 8.604613977492212e-06, "loss": 0.9649, "step": 5334 }, { "epoch": 0.5583464154892727, "grad_norm": 2.0231096418522756, "learning_rate": 8.60125750043231e-06, "loss": 0.894, "step": 5335 }, { "epoch": 0.5584510727367871, "grad_norm": 2.0941739900946663, "learning_rate": 8.597901184090753e-06, "loss": 1.0217, "step": 5336 }, { "epoch": 0.5585557299843014, "grad_norm": 2.0371217772719383, "learning_rate": 8.594545028853195e-06, "loss": 0.9738, "step": 5337 }, { "epoch": 0.5586603872318158, "grad_norm": 2.4539134994923724, "learning_rate": 8.59118903510526e-06, "loss": 0.8423, "step": 5338 }, { "epoch": 0.5587650444793302, "grad_norm": 2.0675918762574557, "learning_rate": 8.587833203232564e-06, "loss": 0.9802, "step": 5339 }, { "epoch": 0.5588697017268446, "grad_norm": 2.419698934398359, "learning_rate": 8.584477533620693e-06, "loss": 1.0828, "step": 5340 }, { "epoch": 0.558974358974359, "grad_norm": 1.7962022864475375, "learning_rate": 8.581122026655222e-06, "loss": 0.8116, "step": 5341 }, { "epoch": 0.5590790162218734, "grad_norm": 2.2433085787237044, "learning_rate": 8.577766682721705e-06, "loss": 0.913, "step": 5342 }, { "epoch": 0.5591836734693878, "grad_norm": 2.1428509222947656, "learning_rate": 8.574411502205682e-06, "loss": 1.0034, "step": 5343 }, { "epoch": 0.5592883307169021, "grad_norm": 2.0066030593881035, "learning_rate": 8.571056485492668e-06, "loss": 0.9946, "step": 5344 }, { "epoch": 0.5593929879644165, "grad_norm": 2.1542448139874164, "learning_rate": 8.56770163296816e-06, "loss": 0.9432, "step": 5345 }, { "epoch": 0.5594976452119309, "grad_norm": 2.133404649889683, "learning_rate": 8.564346945017637e-06, "loss": 0.8289, "step": 5346 }, { "epoch": 0.5596023024594453, "grad_norm": 1.9683719436497848, "learning_rate": 8.560992422026563e-06, "loss": 0.9729, "step": 5347 }, { "epoch": 0.5597069597069597, "grad_norm": 2.0108641044421374, "learning_rate": 8.557638064380375e-06, "loss": 0.9478, "step": 5348 }, { "epoch": 0.5598116169544741, "grad_norm": 1.9417606569777943, "learning_rate": 8.554283872464499e-06, "loss": 1.1138, "step": 5349 }, { "epoch": 0.5599162742019885, "grad_norm": 2.0004963619150646, "learning_rate": 8.550929846664333e-06, "loss": 0.832, "step": 5350 }, { "epoch": 0.5600209314495028, "grad_norm": 1.9750719959886254, "learning_rate": 8.547575987365266e-06, "loss": 0.8858, "step": 5351 }, { "epoch": 0.5601255886970172, "grad_norm": 1.962371605202089, "learning_rate": 8.544222294952665e-06, "loss": 0.8864, "step": 5352 }, { "epoch": 0.5602302459445316, "grad_norm": 2.2675771268273506, "learning_rate": 8.540868769811871e-06, "loss": 1.0038, "step": 5353 }, { "epoch": 0.560334903192046, "grad_norm": 2.0704526254917055, "learning_rate": 8.537515412328216e-06, "loss": 0.9498, "step": 5354 }, { "epoch": 0.5604395604395604, "grad_norm": 2.073132608363666, "learning_rate": 8.534162222887003e-06, "loss": 0.9982, "step": 5355 }, { "epoch": 0.5605442176870749, "grad_norm": 2.171878885625719, "learning_rate": 8.530809201873523e-06, "loss": 0.8136, "step": 5356 }, { "epoch": 0.5606488749345893, "grad_norm": 2.1045724839104367, "learning_rate": 8.527456349673043e-06, "loss": 1.0294, "step": 5357 }, { "epoch": 0.5607535321821037, "grad_norm": 2.1863543307087987, "learning_rate": 8.524103666670817e-06, "loss": 0.9556, "step": 5358 }, { "epoch": 0.560858189429618, "grad_norm": 2.0478052085618557, "learning_rate": 8.52075115325207e-06, "loss": 0.9486, "step": 5359 }, { "epoch": 0.5609628466771324, "grad_norm": 2.186343411269352, "learning_rate": 8.517398809802009e-06, "loss": 1.1094, "step": 5360 }, { "epoch": 0.5610675039246468, "grad_norm": 2.1314430854613007, "learning_rate": 8.514046636705835e-06, "loss": 0.9564, "step": 5361 }, { "epoch": 0.5611721611721612, "grad_norm": 2.118979548912484, "learning_rate": 8.510694634348715e-06, "loss": 0.9487, "step": 5362 }, { "epoch": 0.5612768184196756, "grad_norm": 1.9296804650577581, "learning_rate": 8.507342803115799e-06, "loss": 1.041, "step": 5363 }, { "epoch": 0.56138147566719, "grad_norm": 1.7942107435770736, "learning_rate": 8.503991143392225e-06, "loss": 0.9425, "step": 5364 }, { "epoch": 0.5614861329147044, "grad_norm": 2.465347277668139, "learning_rate": 8.500639655563097e-06, "loss": 0.9761, "step": 5365 }, { "epoch": 0.5615907901622187, "grad_norm": 2.1891772863766774, "learning_rate": 8.497288340013514e-06, "loss": 0.9151, "step": 5366 }, { "epoch": 0.5616954474097331, "grad_norm": 1.726643594528601, "learning_rate": 8.493937197128546e-06, "loss": 0.959, "step": 5367 }, { "epoch": 0.5618001046572475, "grad_norm": 1.7289012170427154, "learning_rate": 8.490586227293244e-06, "loss": 0.8114, "step": 5368 }, { "epoch": 0.5619047619047619, "grad_norm": 1.8444026585255053, "learning_rate": 8.487235430892648e-06, "loss": 0.8111, "step": 5369 }, { "epoch": 0.5620094191522763, "grad_norm": 2.1272015623519778, "learning_rate": 8.483884808311768e-06, "loss": 0.8135, "step": 5370 }, { "epoch": 0.5621140763997907, "grad_norm": 2.064621150380741, "learning_rate": 8.480534359935596e-06, "loss": 0.7135, "step": 5371 }, { "epoch": 0.5622187336473051, "grad_norm": 2.201762241819266, "learning_rate": 8.477184086149109e-06, "loss": 1.0045, "step": 5372 }, { "epoch": 0.5623233908948194, "grad_norm": 2.331143016583063, "learning_rate": 8.473833987337255e-06, "loss": 0.8975, "step": 5373 }, { "epoch": 0.5624280481423338, "grad_norm": 2.956408422523764, "learning_rate": 8.470484063884969e-06, "loss": 0.9348, "step": 5374 }, { "epoch": 0.5625327053898482, "grad_norm": 2.929476780401184, "learning_rate": 8.467134316177167e-06, "loss": 0.9468, "step": 5375 }, { "epoch": 0.5626373626373626, "grad_norm": 2.022910118977885, "learning_rate": 8.46378474459874e-06, "loss": 0.8546, "step": 5376 }, { "epoch": 0.562742019884877, "grad_norm": 2.2903787311759123, "learning_rate": 8.460435349534555e-06, "loss": 0.9358, "step": 5377 }, { "epoch": 0.5628466771323914, "grad_norm": 2.899288834804206, "learning_rate": 8.457086131369475e-06, "loss": 0.938, "step": 5378 }, { "epoch": 0.5629513343799059, "grad_norm": 1.8808885484596338, "learning_rate": 8.453737090488324e-06, "loss": 0.8926, "step": 5379 }, { "epoch": 0.5630559916274201, "grad_norm": 2.015376498610437, "learning_rate": 8.450388227275914e-06, "loss": 0.9181, "step": 5380 }, { "epoch": 0.5631606488749346, "grad_norm": 2.4062132896620576, "learning_rate": 8.447039542117043e-06, "loss": 0.9622, "step": 5381 }, { "epoch": 0.563265306122449, "grad_norm": 2.163527877551462, "learning_rate": 8.443691035396472e-06, "loss": 0.9314, "step": 5382 }, { "epoch": 0.5633699633699634, "grad_norm": 1.7474043964097528, "learning_rate": 8.440342707498961e-06, "loss": 0.8849, "step": 5383 }, { "epoch": 0.5634746206174778, "grad_norm": 2.0844952386004256, "learning_rate": 8.43699455880923e-06, "loss": 0.9463, "step": 5384 }, { "epoch": 0.5635792778649922, "grad_norm": 2.1603071390316018, "learning_rate": 8.433646589711996e-06, "loss": 0.9383, "step": 5385 }, { "epoch": 0.5636839351125066, "grad_norm": 2.0181491721198626, "learning_rate": 8.430298800591939e-06, "loss": 0.9197, "step": 5386 }, { "epoch": 0.5637885923600209, "grad_norm": 1.827058275655275, "learning_rate": 8.426951191833735e-06, "loss": 0.7931, "step": 5387 }, { "epoch": 0.5638932496075353, "grad_norm": 1.901008131381671, "learning_rate": 8.42360376382203e-06, "loss": 0.9066, "step": 5388 }, { "epoch": 0.5639979068550497, "grad_norm": 2.061629426104861, "learning_rate": 8.420256516941446e-06, "loss": 0.9644, "step": 5389 }, { "epoch": 0.5641025641025641, "grad_norm": 2.0842792186189225, "learning_rate": 8.416909451576592e-06, "loss": 0.8135, "step": 5390 }, { "epoch": 0.5642072213500785, "grad_norm": 2.3449029492925906, "learning_rate": 8.41356256811205e-06, "loss": 0.9087, "step": 5391 }, { "epoch": 0.5643118785975929, "grad_norm": 2.382812471439128, "learning_rate": 8.410215866932386e-06, "loss": 0.9713, "step": 5392 }, { "epoch": 0.5644165358451073, "grad_norm": 2.1076018814211466, "learning_rate": 8.40686934842214e-06, "loss": 1.0884, "step": 5393 }, { "epoch": 0.5645211930926216, "grad_norm": 2.220481614569876, "learning_rate": 8.403523012965835e-06, "loss": 0.9772, "step": 5394 }, { "epoch": 0.564625850340136, "grad_norm": 2.0659611056947687, "learning_rate": 8.400176860947975e-06, "loss": 1.0665, "step": 5395 }, { "epoch": 0.5647305075876504, "grad_norm": 1.8672547133425201, "learning_rate": 8.396830892753036e-06, "loss": 0.9571, "step": 5396 }, { "epoch": 0.5648351648351648, "grad_norm": 2.1394363159336955, "learning_rate": 8.393485108765477e-06, "loss": 0.7481, "step": 5397 }, { "epoch": 0.5649398220826792, "grad_norm": 2.007437446941716, "learning_rate": 8.390139509369739e-06, "loss": 0.8622, "step": 5398 }, { "epoch": 0.5650444793301936, "grad_norm": 2.1993539972983154, "learning_rate": 8.386794094950231e-06, "loss": 1.032, "step": 5399 }, { "epoch": 0.565149136577708, "grad_norm": 1.9010216623009064, "learning_rate": 8.383448865891353e-06, "loss": 0.8278, "step": 5400 }, { "epoch": 0.5652537938252225, "grad_norm": 1.8728602015988864, "learning_rate": 8.380103822577476e-06, "loss": 0.7388, "step": 5401 }, { "epoch": 0.5653584510727367, "grad_norm": 1.9149296409387317, "learning_rate": 8.376758965392956e-06, "loss": 0.9607, "step": 5402 }, { "epoch": 0.5654631083202512, "grad_norm": 1.8429299678951467, "learning_rate": 8.373414294722117e-06, "loss": 0.9577, "step": 5403 }, { "epoch": 0.5655677655677656, "grad_norm": 2.0351716866666587, "learning_rate": 8.370069810949276e-06, "loss": 0.9344, "step": 5404 }, { "epoch": 0.56567242281528, "grad_norm": 2.029063031151813, "learning_rate": 8.366725514458719e-06, "loss": 0.8565, "step": 5405 }, { "epoch": 0.5657770800627944, "grad_norm": 2.25021340401501, "learning_rate": 8.36338140563471e-06, "loss": 0.9405, "step": 5406 }, { "epoch": 0.5658817373103088, "grad_norm": 1.9493532133820115, "learning_rate": 8.360037484861497e-06, "loss": 1.0342, "step": 5407 }, { "epoch": 0.5659863945578232, "grad_norm": 1.9189066069765968, "learning_rate": 8.3566937525233e-06, "loss": 0.9019, "step": 5408 }, { "epoch": 0.5660910518053375, "grad_norm": 2.2068316926644633, "learning_rate": 8.353350209004323e-06, "loss": 1.089, "step": 5409 }, { "epoch": 0.5661957090528519, "grad_norm": 2.0845230459581163, "learning_rate": 8.350006854688744e-06, "loss": 0.8706, "step": 5410 }, { "epoch": 0.5663003663003663, "grad_norm": 1.9447916121641364, "learning_rate": 8.346663689960724e-06, "loss": 0.8611, "step": 5411 }, { "epoch": 0.5664050235478807, "grad_norm": 2.1579606427265885, "learning_rate": 8.343320715204393e-06, "loss": 0.894, "step": 5412 }, { "epoch": 0.5665096807953951, "grad_norm": 2.0662389913637087, "learning_rate": 8.339977930803872e-06, "loss": 1.0039, "step": 5413 }, { "epoch": 0.5666143380429095, "grad_norm": 1.9074458194803643, "learning_rate": 8.336635337143257e-06, "loss": 0.9126, "step": 5414 }, { "epoch": 0.5667189952904239, "grad_norm": 2.260555516177037, "learning_rate": 8.333292934606611e-06, "loss": 0.9502, "step": 5415 }, { "epoch": 0.5668236525379382, "grad_norm": 2.5104319898551886, "learning_rate": 8.329950723577985e-06, "loss": 0.8688, "step": 5416 }, { "epoch": 0.5669283097854526, "grad_norm": 1.938544875865869, "learning_rate": 8.326608704441408e-06, "loss": 0.8114, "step": 5417 }, { "epoch": 0.567032967032967, "grad_norm": 1.873974736150882, "learning_rate": 8.323266877580881e-06, "loss": 0.9325, "step": 5418 }, { "epoch": 0.5671376242804814, "grad_norm": 2.316623632030778, "learning_rate": 8.31992524338039e-06, "loss": 0.9622, "step": 5419 }, { "epoch": 0.5672422815279958, "grad_norm": 1.762820617049393, "learning_rate": 8.316583802223892e-06, "loss": 0.9175, "step": 5420 }, { "epoch": 0.5673469387755102, "grad_norm": 2.0087856448492736, "learning_rate": 8.313242554495328e-06, "loss": 1.0089, "step": 5421 }, { "epoch": 0.5674515960230246, "grad_norm": 2.0981791703798978, "learning_rate": 8.309901500578617e-06, "loss": 0.9378, "step": 5422 }, { "epoch": 0.5675562532705389, "grad_norm": 2.0273142602067216, "learning_rate": 8.306560640857649e-06, "loss": 0.896, "step": 5423 }, { "epoch": 0.5676609105180533, "grad_norm": 2.2220399006402647, "learning_rate": 8.303219975716296e-06, "loss": 0.8499, "step": 5424 }, { "epoch": 0.5677655677655677, "grad_norm": 1.9703508493150457, "learning_rate": 8.299879505538407e-06, "loss": 0.9592, "step": 5425 }, { "epoch": 0.5678702250130822, "grad_norm": 2.105250742608055, "learning_rate": 8.29653923070781e-06, "loss": 0.8972, "step": 5426 }, { "epoch": 0.5679748822605966, "grad_norm": 2.030286562075985, "learning_rate": 8.293199151608307e-06, "loss": 0.9613, "step": 5427 }, { "epoch": 0.568079539508111, "grad_norm": 2.3379239596475685, "learning_rate": 8.289859268623685e-06, "loss": 0.9236, "step": 5428 }, { "epoch": 0.5681841967556254, "grad_norm": 1.9841324284776523, "learning_rate": 8.286519582137694e-06, "loss": 1.055, "step": 5429 }, { "epoch": 0.5682888540031397, "grad_norm": 2.4795485146638647, "learning_rate": 8.28318009253408e-06, "loss": 0.9924, "step": 5430 }, { "epoch": 0.5683935112506541, "grad_norm": 2.320539277472763, "learning_rate": 8.279840800196555e-06, "loss": 1.0494, "step": 5431 }, { "epoch": 0.5684981684981685, "grad_norm": 1.8100197495806924, "learning_rate": 8.276501705508808e-06, "loss": 0.8236, "step": 5432 }, { "epoch": 0.5686028257456829, "grad_norm": 1.9211836188740459, "learning_rate": 8.27316280885451e-06, "loss": 0.9775, "step": 5433 }, { "epoch": 0.5687074829931973, "grad_norm": 1.7835418034139316, "learning_rate": 8.269824110617306e-06, "loss": 0.9137, "step": 5434 }, { "epoch": 0.5688121402407117, "grad_norm": 2.4840168814546533, "learning_rate": 8.266485611180816e-06, "loss": 1.0129, "step": 5435 }, { "epoch": 0.5689167974882261, "grad_norm": 2.124908482971047, "learning_rate": 8.263147310928647e-06, "loss": 1.0432, "step": 5436 }, { "epoch": 0.5690214547357404, "grad_norm": 1.8840247947452105, "learning_rate": 8.259809210244372e-06, "loss": 0.9159, "step": 5437 }, { "epoch": 0.5691261119832548, "grad_norm": 2.0456780135127426, "learning_rate": 8.256471309511541e-06, "loss": 0.9619, "step": 5438 }, { "epoch": 0.5692307692307692, "grad_norm": 1.9569879555280116, "learning_rate": 8.253133609113699e-06, "loss": 0.8922, "step": 5439 }, { "epoch": 0.5693354264782836, "grad_norm": 2.2202855654973868, "learning_rate": 8.249796109434342e-06, "loss": 0.8476, "step": 5440 }, { "epoch": 0.569440083725798, "grad_norm": 1.8957910452182647, "learning_rate": 8.246458810856963e-06, "loss": 0.974, "step": 5441 }, { "epoch": 0.5695447409733124, "grad_norm": 2.0379277438037575, "learning_rate": 8.24312171376502e-06, "loss": 0.9315, "step": 5442 }, { "epoch": 0.5696493982208268, "grad_norm": 2.3174337089710435, "learning_rate": 8.239784818541955e-06, "loss": 0.9172, "step": 5443 }, { "epoch": 0.5697540554683412, "grad_norm": 1.8964131960152406, "learning_rate": 8.23644812557118e-06, "loss": 0.9638, "step": 5444 }, { "epoch": 0.5698587127158555, "grad_norm": 1.8962610994511044, "learning_rate": 8.233111635236093e-06, "loss": 0.8465, "step": 5445 }, { "epoch": 0.5699633699633699, "grad_norm": 2.0617322216840632, "learning_rate": 8.22977534792006e-06, "loss": 0.9347, "step": 5446 }, { "epoch": 0.5700680272108843, "grad_norm": 2.010417513155231, "learning_rate": 8.226439264006425e-06, "loss": 0.9013, "step": 5447 }, { "epoch": 0.5701726844583987, "grad_norm": 1.905616574561522, "learning_rate": 8.223103383878517e-06, "loss": 0.9417, "step": 5448 }, { "epoch": 0.5702773417059132, "grad_norm": 2.0075024867237548, "learning_rate": 8.219767707919632e-06, "loss": 1.0392, "step": 5449 }, { "epoch": 0.5703819989534276, "grad_norm": 1.8721709785411973, "learning_rate": 8.216432236513048e-06, "loss": 0.9228, "step": 5450 }, { "epoch": 0.570486656200942, "grad_norm": 1.9218512333873883, "learning_rate": 8.213096970042015e-06, "loss": 1.0, "step": 5451 }, { "epoch": 0.5705913134484563, "grad_norm": 2.329819893077006, "learning_rate": 8.209761908889762e-06, "loss": 0.9031, "step": 5452 }, { "epoch": 0.5706959706959707, "grad_norm": 1.8170918140020158, "learning_rate": 8.206427053439497e-06, "loss": 0.8399, "step": 5453 }, { "epoch": 0.5708006279434851, "grad_norm": 2.195702490756807, "learning_rate": 8.203092404074395e-06, "loss": 0.902, "step": 5454 }, { "epoch": 0.5709052851909995, "grad_norm": 1.8273594046031365, "learning_rate": 8.199757961177618e-06, "loss": 0.8918, "step": 5455 }, { "epoch": 0.5710099424385139, "grad_norm": 2.146875048935083, "learning_rate": 8.196423725132305e-06, "loss": 0.9573, "step": 5456 }, { "epoch": 0.5711145996860283, "grad_norm": 2.1126148763204022, "learning_rate": 8.19308969632156e-06, "loss": 0.8839, "step": 5457 }, { "epoch": 0.5712192569335427, "grad_norm": 2.390207657529769, "learning_rate": 8.189755875128474e-06, "loss": 1.0406, "step": 5458 }, { "epoch": 0.571323914181057, "grad_norm": 2.1517068860045128, "learning_rate": 8.186422261936105e-06, "loss": 0.8052, "step": 5459 }, { "epoch": 0.5714285714285714, "grad_norm": 2.119466613691594, "learning_rate": 8.183088857127496e-06, "loss": 0.9079, "step": 5460 }, { "epoch": 0.5715332286760858, "grad_norm": 2.231374940946366, "learning_rate": 8.17975566108566e-06, "loss": 0.9486, "step": 5461 }, { "epoch": 0.5716378859236002, "grad_norm": 2.073842713633685, "learning_rate": 8.176422674193591e-06, "loss": 0.9796, "step": 5462 }, { "epoch": 0.5717425431711146, "grad_norm": 1.8850162422585321, "learning_rate": 8.173089896834249e-06, "loss": 0.9233, "step": 5463 }, { "epoch": 0.571847200418629, "grad_norm": 2.2918049824032605, "learning_rate": 8.169757329390581e-06, "loss": 0.9653, "step": 5464 }, { "epoch": 0.5719518576661434, "grad_norm": 1.987836099412543, "learning_rate": 8.16642497224551e-06, "loss": 0.8883, "step": 5465 }, { "epoch": 0.5720565149136577, "grad_norm": 1.9580693885487788, "learning_rate": 8.163092825781927e-06, "loss": 1.0531, "step": 5466 }, { "epoch": 0.5721611721611721, "grad_norm": 2.5063692651334195, "learning_rate": 8.159760890382703e-06, "loss": 0.9574, "step": 5467 }, { "epoch": 0.5722658294086865, "grad_norm": 2.2807498841062275, "learning_rate": 8.15642916643068e-06, "loss": 1.09, "step": 5468 }, { "epoch": 0.5723704866562009, "grad_norm": 1.9133632160896512, "learning_rate": 8.153097654308684e-06, "loss": 0.8269, "step": 5469 }, { "epoch": 0.5724751439037153, "grad_norm": 1.8332089750958485, "learning_rate": 8.149766354399512e-06, "loss": 0.8831, "step": 5470 }, { "epoch": 0.5725798011512298, "grad_norm": 1.9413274135809737, "learning_rate": 8.146435267085937e-06, "loss": 0.9679, "step": 5471 }, { "epoch": 0.5726844583987442, "grad_norm": 1.8297455313311937, "learning_rate": 8.143104392750708e-06, "loss": 0.949, "step": 5472 }, { "epoch": 0.5727891156462585, "grad_norm": 2.0994208439284052, "learning_rate": 8.139773731776545e-06, "loss": 0.9182, "step": 5473 }, { "epoch": 0.5728937728937729, "grad_norm": 2.2224164168293887, "learning_rate": 8.136443284546153e-06, "loss": 0.8501, "step": 5474 }, { "epoch": 0.5729984301412873, "grad_norm": 1.9784406564547241, "learning_rate": 8.133113051442207e-06, "loss": 0.9075, "step": 5475 }, { "epoch": 0.5731030873888017, "grad_norm": 2.1561299853644162, "learning_rate": 8.129783032847353e-06, "loss": 0.8528, "step": 5476 }, { "epoch": 0.5732077446363161, "grad_norm": 1.8007982019446203, "learning_rate": 8.126453229144224e-06, "loss": 0.9403, "step": 5477 }, { "epoch": 0.5733124018838305, "grad_norm": 1.944118541502138, "learning_rate": 8.123123640715415e-06, "loss": 0.9122, "step": 5478 }, { "epoch": 0.5734170591313449, "grad_norm": 2.0045254601484443, "learning_rate": 8.119794267943506e-06, "loss": 0.8893, "step": 5479 }, { "epoch": 0.5735217163788592, "grad_norm": 1.908937792505844, "learning_rate": 8.116465111211045e-06, "loss": 0.8759, "step": 5480 }, { "epoch": 0.5736263736263736, "grad_norm": 1.9982276022368928, "learning_rate": 8.113136170900558e-06, "loss": 0.9882, "step": 5481 }, { "epoch": 0.573731030873888, "grad_norm": 2.0425269282462803, "learning_rate": 8.109807447394555e-06, "loss": 0.9599, "step": 5482 }, { "epoch": 0.5738356881214024, "grad_norm": 1.8900946053549617, "learning_rate": 8.106478941075506e-06, "loss": 0.8938, "step": 5483 }, { "epoch": 0.5739403453689168, "grad_norm": 2.0201948859873022, "learning_rate": 8.103150652325867e-06, "loss": 0.9496, "step": 5484 }, { "epoch": 0.5740450026164312, "grad_norm": 1.914651039584181, "learning_rate": 8.099822581528061e-06, "loss": 0.8073, "step": 5485 }, { "epoch": 0.5741496598639456, "grad_norm": 1.9592779998675791, "learning_rate": 8.096494729064497e-06, "loss": 0.9203, "step": 5486 }, { "epoch": 0.57425431711146, "grad_norm": 1.9404520688455333, "learning_rate": 8.093167095317543e-06, "loss": 0.9277, "step": 5487 }, { "epoch": 0.5743589743589743, "grad_norm": 2.102772910368145, "learning_rate": 8.089839680669557e-06, "loss": 1.0397, "step": 5488 }, { "epoch": 0.5744636316064887, "grad_norm": 2.0883574055987197, "learning_rate": 8.086512485502864e-06, "loss": 0.8756, "step": 5489 }, { "epoch": 0.5745682888540031, "grad_norm": 1.885771295192384, "learning_rate": 8.083185510199762e-06, "loss": 0.9681, "step": 5490 }, { "epoch": 0.5746729461015175, "grad_norm": 2.025131247967418, "learning_rate": 8.079858755142536e-06, "loss": 0.9212, "step": 5491 }, { "epoch": 0.5747776033490319, "grad_norm": 2.055060910903538, "learning_rate": 8.076532220713429e-06, "loss": 0.9848, "step": 5492 }, { "epoch": 0.5748822605965463, "grad_norm": 1.6221987425177355, "learning_rate": 8.073205907294669e-06, "loss": 0.8392, "step": 5493 }, { "epoch": 0.5749869178440608, "grad_norm": 2.112224534296191, "learning_rate": 8.069879815268458e-06, "loss": 0.9113, "step": 5494 }, { "epoch": 0.575091575091575, "grad_norm": 1.9240692006326605, "learning_rate": 8.066553945016968e-06, "loss": 0.9778, "step": 5495 }, { "epoch": 0.5751962323390895, "grad_norm": 1.9511742285030678, "learning_rate": 8.063228296922349e-06, "loss": 0.9533, "step": 5496 }, { "epoch": 0.5753008895866039, "grad_norm": 2.2922538518406, "learning_rate": 8.059902871366724e-06, "loss": 0.8646, "step": 5497 }, { "epoch": 0.5754055468341183, "grad_norm": 2.126390921205575, "learning_rate": 8.056577668732196e-06, "loss": 0.7988, "step": 5498 }, { "epoch": 0.5755102040816327, "grad_norm": 2.0074966511095758, "learning_rate": 8.053252689400826e-06, "loss": 0.9166, "step": 5499 }, { "epoch": 0.5756148613291471, "grad_norm": 2.0738861300544134, "learning_rate": 8.049927933754672e-06, "loss": 1.006, "step": 5500 }, { "epoch": 0.5757195185766615, "grad_norm": 2.415212004526124, "learning_rate": 8.046603402175755e-06, "loss": 0.9133, "step": 5501 }, { "epoch": 0.5758241758241758, "grad_norm": 1.9234109153078869, "learning_rate": 8.043279095046064e-06, "loss": 0.8681, "step": 5502 }, { "epoch": 0.5759288330716902, "grad_norm": 1.889529105841766, "learning_rate": 8.039955012747573e-06, "loss": 0.9011, "step": 5503 }, { "epoch": 0.5760334903192046, "grad_norm": 2.034277892033179, "learning_rate": 8.036631155662223e-06, "loss": 1.0261, "step": 5504 }, { "epoch": 0.576138147566719, "grad_norm": 1.8787654537409386, "learning_rate": 8.033307524171935e-06, "loss": 0.9117, "step": 5505 }, { "epoch": 0.5762428048142334, "grad_norm": 1.9010114330459553, "learning_rate": 8.029984118658598e-06, "loss": 0.9688, "step": 5506 }, { "epoch": 0.5763474620617478, "grad_norm": 1.8350448320881683, "learning_rate": 8.026660939504076e-06, "loss": 0.9382, "step": 5507 }, { "epoch": 0.5764521193092622, "grad_norm": 2.3648628400669183, "learning_rate": 8.023337987090219e-06, "loss": 0.7732, "step": 5508 }, { "epoch": 0.5765567765567765, "grad_norm": 1.9441530508631961, "learning_rate": 8.02001526179883e-06, "loss": 0.9569, "step": 5509 }, { "epoch": 0.5766614338042909, "grad_norm": 2.148620913574854, "learning_rate": 8.016692764011705e-06, "loss": 1.017, "step": 5510 }, { "epoch": 0.5767660910518053, "grad_norm": 2.054332121041, "learning_rate": 8.013370494110601e-06, "loss": 0.9866, "step": 5511 }, { "epoch": 0.5768707482993197, "grad_norm": 1.7926126111942025, "learning_rate": 8.010048452477253e-06, "loss": 0.9684, "step": 5512 }, { "epoch": 0.5769754055468341, "grad_norm": 1.9449655810781312, "learning_rate": 8.006726639493374e-06, "loss": 0.8362, "step": 5513 }, { "epoch": 0.5770800627943485, "grad_norm": 2.1002111681115965, "learning_rate": 8.003405055540642e-06, "loss": 0.8563, "step": 5514 }, { "epoch": 0.5771847200418629, "grad_norm": 1.8369318308457716, "learning_rate": 8.00008370100072e-06, "loss": 0.9018, "step": 5515 }, { "epoch": 0.5772893772893772, "grad_norm": 2.2436953873311256, "learning_rate": 7.996762576255229e-06, "loss": 0.8905, "step": 5516 }, { "epoch": 0.5773940345368916, "grad_norm": 2.1273954997385434, "learning_rate": 7.993441681685783e-06, "loss": 0.9944, "step": 5517 }, { "epoch": 0.577498691784406, "grad_norm": 2.0031275428527837, "learning_rate": 7.990121017673956e-06, "loss": 0.8795, "step": 5518 }, { "epoch": 0.5776033490319205, "grad_norm": 2.2855770714536683, "learning_rate": 7.986800584601298e-06, "loss": 1.0042, "step": 5519 }, { "epoch": 0.5777080062794349, "grad_norm": 1.7868487916899245, "learning_rate": 7.983480382849335e-06, "loss": 0.8817, "step": 5520 }, { "epoch": 0.5778126635269493, "grad_norm": 2.0217427991401986, "learning_rate": 7.980160412799561e-06, "loss": 0.8515, "step": 5521 }, { "epoch": 0.5779173207744637, "grad_norm": 2.0627313405323697, "learning_rate": 7.976840674833454e-06, "loss": 1.0603, "step": 5522 }, { "epoch": 0.578021978021978, "grad_norm": 1.8428954962859756, "learning_rate": 7.973521169332452e-06, "loss": 0.857, "step": 5523 }, { "epoch": 0.5781266352694924, "grad_norm": 2.075289447886697, "learning_rate": 7.970201896677978e-06, "loss": 0.9109, "step": 5524 }, { "epoch": 0.5782312925170068, "grad_norm": 1.885372734744151, "learning_rate": 7.966882857251416e-06, "loss": 0.9159, "step": 5525 }, { "epoch": 0.5783359497645212, "grad_norm": 1.6845318805206528, "learning_rate": 7.963564051434139e-06, "loss": 0.7578, "step": 5526 }, { "epoch": 0.5784406070120356, "grad_norm": 2.0450170748383014, "learning_rate": 7.960245479607482e-06, "loss": 0.9455, "step": 5527 }, { "epoch": 0.57854526425955, "grad_norm": 2.2160296759535885, "learning_rate": 7.956927142152751e-06, "loss": 1.004, "step": 5528 }, { "epoch": 0.5786499215070644, "grad_norm": 1.71231827285406, "learning_rate": 7.953609039451238e-06, "loss": 0.7896, "step": 5529 }, { "epoch": 0.5787545787545788, "grad_norm": 2.0196918037710567, "learning_rate": 7.950291171884192e-06, "loss": 0.8954, "step": 5530 }, { "epoch": 0.5788592360020931, "grad_norm": 1.9380102025151027, "learning_rate": 7.946973539832845e-06, "loss": 0.8539, "step": 5531 }, { "epoch": 0.5789638932496075, "grad_norm": 2.079187841186523, "learning_rate": 7.943656143678402e-06, "loss": 0.9011, "step": 5532 }, { "epoch": 0.5790685504971219, "grad_norm": 2.0873564237094318, "learning_rate": 7.940338983802032e-06, "loss": 0.859, "step": 5533 }, { "epoch": 0.5791732077446363, "grad_norm": 3.551683446968433, "learning_rate": 7.937022060584891e-06, "loss": 0.8587, "step": 5534 }, { "epoch": 0.5792778649921507, "grad_norm": 1.786949141630154, "learning_rate": 7.933705374408099e-06, "loss": 0.8939, "step": 5535 }, { "epoch": 0.5793825222396651, "grad_norm": 2.307443887128846, "learning_rate": 7.930388925652746e-06, "loss": 0.9096, "step": 5536 }, { "epoch": 0.5794871794871795, "grad_norm": 2.429912604129563, "learning_rate": 7.927072714699903e-06, "loss": 0.9904, "step": 5537 }, { "epoch": 0.5795918367346938, "grad_norm": 1.929972910007676, "learning_rate": 7.923756741930607e-06, "loss": 0.9509, "step": 5538 }, { "epoch": 0.5796964939822082, "grad_norm": 2.0723179929400652, "learning_rate": 7.92044100772587e-06, "loss": 0.9514, "step": 5539 }, { "epoch": 0.5798011512297226, "grad_norm": 1.9377894076263404, "learning_rate": 7.917125512466677e-06, "loss": 0.9522, "step": 5540 }, { "epoch": 0.579905808477237, "grad_norm": 1.9466085430494366, "learning_rate": 7.913810256533987e-06, "loss": 1.0248, "step": 5541 }, { "epoch": 0.5800104657247515, "grad_norm": 1.9946195154612312, "learning_rate": 7.910495240308722e-06, "loss": 0.9148, "step": 5542 }, { "epoch": 0.5801151229722659, "grad_norm": 2.066175895381359, "learning_rate": 7.907180464171792e-06, "loss": 0.992, "step": 5543 }, { "epoch": 0.5802197802197803, "grad_norm": 2.091752799122247, "learning_rate": 7.903865928504073e-06, "loss": 0.9818, "step": 5544 }, { "epoch": 0.5803244374672946, "grad_norm": 2.0815270541413438, "learning_rate": 7.900551633686405e-06, "loss": 1.053, "step": 5545 }, { "epoch": 0.580429094714809, "grad_norm": 1.8552562530126415, "learning_rate": 7.89723758009961e-06, "loss": 0.8185, "step": 5546 }, { "epoch": 0.5805337519623234, "grad_norm": 2.3294466334249555, "learning_rate": 7.893923768124481e-06, "loss": 0.9063, "step": 5547 }, { "epoch": 0.5806384092098378, "grad_norm": 2.883079539310358, "learning_rate": 7.890610198141782e-06, "loss": 0.9315, "step": 5548 }, { "epoch": 0.5807430664573522, "grad_norm": 1.7119553078829457, "learning_rate": 7.887296870532246e-06, "loss": 0.9367, "step": 5549 }, { "epoch": 0.5808477237048666, "grad_norm": 2.2200861287766114, "learning_rate": 7.88398378567658e-06, "loss": 0.7905, "step": 5550 }, { "epoch": 0.580952380952381, "grad_norm": 2.039585468725626, "learning_rate": 7.880670943955467e-06, "loss": 1.0008, "step": 5551 }, { "epoch": 0.5810570381998953, "grad_norm": 1.9366320130825088, "learning_rate": 7.877358345749562e-06, "loss": 0.8768, "step": 5552 }, { "epoch": 0.5811616954474097, "grad_norm": 2.056342630692964, "learning_rate": 7.874045991439484e-06, "loss": 0.9288, "step": 5553 }, { "epoch": 0.5812663526949241, "grad_norm": 2.113800312709259, "learning_rate": 7.870733881405835e-06, "loss": 0.9684, "step": 5554 }, { "epoch": 0.5813710099424385, "grad_norm": 2.0422803075735403, "learning_rate": 7.867422016029174e-06, "loss": 0.8649, "step": 5555 }, { "epoch": 0.5814756671899529, "grad_norm": 2.208609946344471, "learning_rate": 7.864110395690051e-06, "loss": 1.0076, "step": 5556 }, { "epoch": 0.5815803244374673, "grad_norm": 2.2154587482971935, "learning_rate": 7.86079902076897e-06, "loss": 0.7496, "step": 5557 }, { "epoch": 0.5816849816849817, "grad_norm": 2.0643605224522616, "learning_rate": 7.857487891646422e-06, "loss": 0.8465, "step": 5558 }, { "epoch": 0.581789638932496, "grad_norm": 1.900572692373446, "learning_rate": 7.854177008702852e-06, "loss": 0.9254, "step": 5559 }, { "epoch": 0.5818942961800104, "grad_norm": 2.070474926104496, "learning_rate": 7.850866372318697e-06, "loss": 1.0204, "step": 5560 }, { "epoch": 0.5819989534275248, "grad_norm": 1.9724412431279685, "learning_rate": 7.847555982874355e-06, "loss": 1.0067, "step": 5561 }, { "epoch": 0.5821036106750392, "grad_norm": 2.0205088862472285, "learning_rate": 7.844245840750194e-06, "loss": 0.9708, "step": 5562 }, { "epoch": 0.5822082679225536, "grad_norm": 2.1780476787506076, "learning_rate": 7.840935946326554e-06, "loss": 0.8583, "step": 5563 }, { "epoch": 0.582312925170068, "grad_norm": 2.113023798763882, "learning_rate": 7.83762629998375e-06, "loss": 0.9245, "step": 5564 }, { "epoch": 0.5824175824175825, "grad_norm": 2.197691507927687, "learning_rate": 7.834316902102072e-06, "loss": 0.8333, "step": 5565 }, { "epoch": 0.5825222396650968, "grad_norm": 2.1409592615846664, "learning_rate": 7.83100775306177e-06, "loss": 0.9258, "step": 5566 }, { "epoch": 0.5826268969126112, "grad_norm": 2.045119732160066, "learning_rate": 7.827698853243073e-06, "loss": 0.957, "step": 5567 }, { "epoch": 0.5827315541601256, "grad_norm": 2.256212640620829, "learning_rate": 7.82439020302618e-06, "loss": 0.9719, "step": 5568 }, { "epoch": 0.58283621140764, "grad_norm": 1.9793551575101223, "learning_rate": 7.821081802791265e-06, "loss": 1.0033, "step": 5569 }, { "epoch": 0.5829408686551544, "grad_norm": 1.9336354212815183, "learning_rate": 7.817773652918467e-06, "loss": 0.9265, "step": 5570 }, { "epoch": 0.5830455259026688, "grad_norm": 2.132258291924315, "learning_rate": 7.814465753787902e-06, "loss": 0.9252, "step": 5571 }, { "epoch": 0.5831501831501832, "grad_norm": 2.2295413408637854, "learning_rate": 7.81115810577965e-06, "loss": 0.9697, "step": 5572 }, { "epoch": 0.5832548403976976, "grad_norm": 1.906463312716307, "learning_rate": 7.807850709273772e-06, "loss": 0.8958, "step": 5573 }, { "epoch": 0.5833594976452119, "grad_norm": 2.1146161586182495, "learning_rate": 7.80454356465029e-06, "loss": 0.9138, "step": 5574 }, { "epoch": 0.5834641548927263, "grad_norm": 2.3811043954927906, "learning_rate": 7.801236672289204e-06, "loss": 1.0349, "step": 5575 }, { "epoch": 0.5835688121402407, "grad_norm": 2.108644748299407, "learning_rate": 7.79793003257048e-06, "loss": 0.839, "step": 5576 }, { "epoch": 0.5836734693877551, "grad_norm": 2.1813751914717234, "learning_rate": 7.794623645874057e-06, "loss": 0.99, "step": 5577 }, { "epoch": 0.5837781266352695, "grad_norm": 1.8978749173293086, "learning_rate": 7.791317512579853e-06, "loss": 0.9008, "step": 5578 }, { "epoch": 0.5838827838827839, "grad_norm": 2.0693475512491664, "learning_rate": 7.78801163306774e-06, "loss": 0.9453, "step": 5579 }, { "epoch": 0.5839874411302983, "grad_norm": 2.3044788296228194, "learning_rate": 7.784706007717578e-06, "loss": 0.948, "step": 5580 }, { "epoch": 0.5840920983778126, "grad_norm": 1.8045170295820876, "learning_rate": 7.781400636909184e-06, "loss": 0.9382, "step": 5581 }, { "epoch": 0.584196755625327, "grad_norm": 2.483078766173217, "learning_rate": 7.778095521022357e-06, "loss": 0.8534, "step": 5582 }, { "epoch": 0.5843014128728414, "grad_norm": 2.1631414208383806, "learning_rate": 7.774790660436857e-06, "loss": 0.8908, "step": 5583 }, { "epoch": 0.5844060701203558, "grad_norm": 2.168390803098506, "learning_rate": 7.771486055532424e-06, "loss": 0.9576, "step": 5584 }, { "epoch": 0.5845107273678702, "grad_norm": 2.123503361696419, "learning_rate": 7.768181706688754e-06, "loss": 0.9218, "step": 5585 }, { "epoch": 0.5846153846153846, "grad_norm": 2.1013306321139487, "learning_rate": 7.764877614285533e-06, "loss": 0.8682, "step": 5586 }, { "epoch": 0.584720041862899, "grad_norm": 2.255480731355252, "learning_rate": 7.76157377870241e-06, "loss": 1.0501, "step": 5587 }, { "epoch": 0.5848246991104133, "grad_norm": 1.945562700084701, "learning_rate": 7.758270200318995e-06, "loss": 0.8875, "step": 5588 }, { "epoch": 0.5849293563579278, "grad_norm": 2.1195133177923235, "learning_rate": 7.754966879514878e-06, "loss": 0.8932, "step": 5589 }, { "epoch": 0.5850340136054422, "grad_norm": 2.1209423994658114, "learning_rate": 7.75166381666962e-06, "loss": 1.0249, "step": 5590 }, { "epoch": 0.5851386708529566, "grad_norm": 2.009199929745769, "learning_rate": 7.748361012162743e-06, "loss": 0.9536, "step": 5591 }, { "epoch": 0.585243328100471, "grad_norm": 1.8131883322530284, "learning_rate": 7.745058466373754e-06, "loss": 0.7916, "step": 5592 }, { "epoch": 0.5853479853479854, "grad_norm": 2.318244040850203, "learning_rate": 7.741756179682116e-06, "loss": 0.8724, "step": 5593 }, { "epoch": 0.5854526425954998, "grad_norm": 2.1235713244627523, "learning_rate": 7.738454152467267e-06, "loss": 0.9192, "step": 5594 }, { "epoch": 0.5855572998430141, "grad_norm": 2.2040042407951987, "learning_rate": 7.735152385108627e-06, "loss": 1.007, "step": 5595 }, { "epoch": 0.5856619570905285, "grad_norm": 2.292748328386475, "learning_rate": 7.731850877985567e-06, "loss": 0.9257, "step": 5596 }, { "epoch": 0.5857666143380429, "grad_norm": 2.077810847731391, "learning_rate": 7.728549631477441e-06, "loss": 0.9772, "step": 5597 }, { "epoch": 0.5858712715855573, "grad_norm": 2.0349358401554727, "learning_rate": 7.725248645963565e-06, "loss": 0.9287, "step": 5598 }, { "epoch": 0.5859759288330717, "grad_norm": 2.4781346688637935, "learning_rate": 7.721947921823232e-06, "loss": 0.9193, "step": 5599 }, { "epoch": 0.5860805860805861, "grad_norm": 1.9931375215068494, "learning_rate": 7.7186474594357e-06, "loss": 0.9548, "step": 5600 }, { "epoch": 0.5861852433281005, "grad_norm": 1.7997207751821085, "learning_rate": 7.715347259180202e-06, "loss": 0.9453, "step": 5601 }, { "epoch": 0.5862899005756148, "grad_norm": 2.010052931739926, "learning_rate": 7.712047321435932e-06, "loss": 1.0162, "step": 5602 }, { "epoch": 0.5863945578231292, "grad_norm": 2.333239467555819, "learning_rate": 7.708747646582061e-06, "loss": 0.9328, "step": 5603 }, { "epoch": 0.5864992150706436, "grad_norm": 2.10594950324619, "learning_rate": 7.705448234997735e-06, "loss": 0.9705, "step": 5604 }, { "epoch": 0.586603872318158, "grad_norm": 2.166799778137154, "learning_rate": 7.702149087062056e-06, "loss": 0.9954, "step": 5605 }, { "epoch": 0.5867085295656724, "grad_norm": 2.0918172155366177, "learning_rate": 7.698850203154108e-06, "loss": 0.839, "step": 5606 }, { "epoch": 0.5868131868131868, "grad_norm": 1.9671646846087965, "learning_rate": 7.695551583652935e-06, "loss": 0.8696, "step": 5607 }, { "epoch": 0.5869178440607012, "grad_norm": 2.2227576188186267, "learning_rate": 7.692253228937558e-06, "loss": 1.0203, "step": 5608 }, { "epoch": 0.5870225013082155, "grad_norm": 2.2068097944504164, "learning_rate": 7.688955139386961e-06, "loss": 0.9753, "step": 5609 }, { "epoch": 0.5871271585557299, "grad_norm": 2.439406039090037, "learning_rate": 7.685657315380105e-06, "loss": 0.9711, "step": 5610 }, { "epoch": 0.5872318158032444, "grad_norm": 2.1525531876859767, "learning_rate": 7.682359757295911e-06, "loss": 1.005, "step": 5611 }, { "epoch": 0.5873364730507588, "grad_norm": 2.036134319104039, "learning_rate": 7.679062465513282e-06, "loss": 0.9106, "step": 5612 }, { "epoch": 0.5874411302982732, "grad_norm": 1.980663244945154, "learning_rate": 7.675765440411082e-06, "loss": 1.0143, "step": 5613 }, { "epoch": 0.5875457875457876, "grad_norm": 1.9155213798006931, "learning_rate": 7.672468682368145e-06, "loss": 0.8642, "step": 5614 }, { "epoch": 0.587650444793302, "grad_norm": 2.0336061775870693, "learning_rate": 7.669172191763273e-06, "loss": 0.9126, "step": 5615 }, { "epoch": 0.5877551020408164, "grad_norm": 1.8849398925253606, "learning_rate": 7.665875968975243e-06, "loss": 0.7622, "step": 5616 }, { "epoch": 0.5878597592883307, "grad_norm": 2.325465135874042, "learning_rate": 7.662580014382794e-06, "loss": 0.9221, "step": 5617 }, { "epoch": 0.5879644165358451, "grad_norm": 2.315387773578512, "learning_rate": 7.659284328364642e-06, "loss": 0.854, "step": 5618 }, { "epoch": 0.5880690737833595, "grad_norm": 2.520796475508092, "learning_rate": 7.655988911299463e-06, "loss": 0.9864, "step": 5619 }, { "epoch": 0.5881737310308739, "grad_norm": 1.9719906781148255, "learning_rate": 7.652693763565909e-06, "loss": 1.0244, "step": 5620 }, { "epoch": 0.5882783882783883, "grad_norm": 2.1962848838041085, "learning_rate": 7.649398885542603e-06, "loss": 0.9411, "step": 5621 }, { "epoch": 0.5883830455259027, "grad_norm": 2.0497329566693945, "learning_rate": 7.64610427760813e-06, "loss": 0.9327, "step": 5622 }, { "epoch": 0.5884877027734171, "grad_norm": 2.3462319809252232, "learning_rate": 7.642809940141047e-06, "loss": 1.0223, "step": 5623 }, { "epoch": 0.5885923600209314, "grad_norm": 1.7385446984243167, "learning_rate": 7.639515873519882e-06, "loss": 0.8747, "step": 5624 }, { "epoch": 0.5886970172684458, "grad_norm": 2.0670189309137323, "learning_rate": 7.636222078123126e-06, "loss": 0.9075, "step": 5625 }, { "epoch": 0.5888016745159602, "grad_norm": 1.9507533341417447, "learning_rate": 7.632928554329246e-06, "loss": 0.9021, "step": 5626 }, { "epoch": 0.5889063317634746, "grad_norm": 2.3873787735902074, "learning_rate": 7.629635302516673e-06, "loss": 1.0425, "step": 5627 }, { "epoch": 0.589010989010989, "grad_norm": 2.252557905765437, "learning_rate": 7.626342323063811e-06, "loss": 0.9155, "step": 5628 }, { "epoch": 0.5891156462585034, "grad_norm": 1.9671306821641084, "learning_rate": 7.623049616349023e-06, "loss": 0.8436, "step": 5629 }, { "epoch": 0.5892203035060178, "grad_norm": 2.228355294848782, "learning_rate": 7.619757182750655e-06, "loss": 0.9464, "step": 5630 }, { "epoch": 0.5893249607535321, "grad_norm": 1.944122763671574, "learning_rate": 7.616465022647015e-06, "loss": 0.917, "step": 5631 }, { "epoch": 0.5894296180010465, "grad_norm": 2.0093474365195956, "learning_rate": 7.613173136416373e-06, "loss": 0.9609, "step": 5632 }, { "epoch": 0.589534275248561, "grad_norm": 2.3171664214346746, "learning_rate": 7.6098815244369785e-06, "loss": 0.9894, "step": 5633 }, { "epoch": 0.5896389324960754, "grad_norm": 2.0398463213152893, "learning_rate": 7.60659018708704e-06, "loss": 0.9755, "step": 5634 }, { "epoch": 0.5897435897435898, "grad_norm": 1.974866296695783, "learning_rate": 7.603299124744743e-06, "loss": 0.9829, "step": 5635 }, { "epoch": 0.5898482469911042, "grad_norm": 1.8436780153736634, "learning_rate": 7.600008337788233e-06, "loss": 0.8847, "step": 5636 }, { "epoch": 0.5899529042386186, "grad_norm": 1.7596168571097486, "learning_rate": 7.596717826595627e-06, "loss": 0.8302, "step": 5637 }, { "epoch": 0.5900575614861329, "grad_norm": 2.286365759016598, "learning_rate": 7.593427591545021e-06, "loss": 0.9302, "step": 5638 }, { "epoch": 0.5901622187336473, "grad_norm": 2.069205329083003, "learning_rate": 7.5901376330144606e-06, "loss": 0.8283, "step": 5639 }, { "epoch": 0.5902668759811617, "grad_norm": 2.241407089031377, "learning_rate": 7.586847951381973e-06, "loss": 0.9715, "step": 5640 }, { "epoch": 0.5903715332286761, "grad_norm": 2.0834114666515267, "learning_rate": 7.583558547025546e-06, "loss": 0.9345, "step": 5641 }, { "epoch": 0.5904761904761905, "grad_norm": 2.1953788167615262, "learning_rate": 7.580269420323143e-06, "loss": 1.0446, "step": 5642 }, { "epoch": 0.5905808477237049, "grad_norm": 2.2480917530067375, "learning_rate": 7.5769805716526876e-06, "loss": 0.8577, "step": 5643 }, { "epoch": 0.5906855049712193, "grad_norm": 2.4677953070932155, "learning_rate": 7.5736920013920756e-06, "loss": 0.9092, "step": 5644 }, { "epoch": 0.5907901622187336, "grad_norm": 2.0662054886088934, "learning_rate": 7.570403709919173e-06, "loss": 0.7921, "step": 5645 }, { "epoch": 0.590894819466248, "grad_norm": 2.1423059288297246, "learning_rate": 7.5671156976118056e-06, "loss": 0.9767, "step": 5646 }, { "epoch": 0.5909994767137624, "grad_norm": 2.7938569718008646, "learning_rate": 7.563827964847779e-06, "loss": 0.9416, "step": 5647 }, { "epoch": 0.5911041339612768, "grad_norm": 2.012361091935804, "learning_rate": 7.560540512004859e-06, "loss": 0.9601, "step": 5648 }, { "epoch": 0.5912087912087912, "grad_norm": 2.025112150721346, "learning_rate": 7.557253339460777e-06, "loss": 1.0345, "step": 5649 }, { "epoch": 0.5913134484563056, "grad_norm": 2.098963973383834, "learning_rate": 7.5539664475932415e-06, "loss": 0.953, "step": 5650 }, { "epoch": 0.59141810570382, "grad_norm": 2.155862643447173, "learning_rate": 7.550679836779917e-06, "loss": 0.9, "step": 5651 }, { "epoch": 0.5915227629513344, "grad_norm": 2.0776024089586214, "learning_rate": 7.547393507398447e-06, "loss": 0.8768, "step": 5652 }, { "epoch": 0.5916274201988487, "grad_norm": 2.040547845106933, "learning_rate": 7.544107459826433e-06, "loss": 0.885, "step": 5653 }, { "epoch": 0.5917320774463631, "grad_norm": 1.877629870992842, "learning_rate": 7.5408216944414515e-06, "loss": 0.7986, "step": 5654 }, { "epoch": 0.5918367346938775, "grad_norm": 2.108404306286894, "learning_rate": 7.537536211621039e-06, "loss": 0.9868, "step": 5655 }, { "epoch": 0.591941391941392, "grad_norm": 2.3003723833156635, "learning_rate": 7.53425101174271e-06, "loss": 0.854, "step": 5656 }, { "epoch": 0.5920460491889064, "grad_norm": 2.14897580363839, "learning_rate": 7.530966095183942e-06, "loss": 0.8716, "step": 5657 }, { "epoch": 0.5921507064364208, "grad_norm": 1.856710380053524, "learning_rate": 7.5276814623221705e-06, "loss": 0.9414, "step": 5658 }, { "epoch": 0.5922553636839352, "grad_norm": 2.125965075529774, "learning_rate": 7.5243971135348136e-06, "loss": 0.8968, "step": 5659 }, { "epoch": 0.5923600209314495, "grad_norm": 2.076781557575531, "learning_rate": 7.521113049199244e-06, "loss": 0.9287, "step": 5660 }, { "epoch": 0.5924646781789639, "grad_norm": 1.9002232671975265, "learning_rate": 7.517829269692815e-06, "loss": 1.0265, "step": 5661 }, { "epoch": 0.5925693354264783, "grad_norm": 2.222162234459858, "learning_rate": 7.514545775392833e-06, "loss": 0.9957, "step": 5662 }, { "epoch": 0.5926739926739927, "grad_norm": 2.124845865430392, "learning_rate": 7.511262566676574e-06, "loss": 1.0127, "step": 5663 }, { "epoch": 0.5927786499215071, "grad_norm": 1.9891156119781586, "learning_rate": 7.507979643921297e-06, "loss": 0.8318, "step": 5664 }, { "epoch": 0.5928833071690215, "grad_norm": 2.0909557900542133, "learning_rate": 7.504697007504212e-06, "loss": 0.8629, "step": 5665 }, { "epoch": 0.5929879644165359, "grad_norm": 1.9403378716887565, "learning_rate": 7.501414657802495e-06, "loss": 0.9464, "step": 5666 }, { "epoch": 0.5930926216640502, "grad_norm": 2.043310271462764, "learning_rate": 7.498132595193304e-06, "loss": 0.8681, "step": 5667 }, { "epoch": 0.5931972789115646, "grad_norm": 1.9687560488818703, "learning_rate": 7.494850820053745e-06, "loss": 1.0069, "step": 5668 }, { "epoch": 0.593301936159079, "grad_norm": 2.053403263724564, "learning_rate": 7.491569332760909e-06, "loss": 0.7221, "step": 5669 }, { "epoch": 0.5934065934065934, "grad_norm": 2.046467258975427, "learning_rate": 7.488288133691839e-06, "loss": 0.9088, "step": 5670 }, { "epoch": 0.5935112506541078, "grad_norm": 1.9238393365949373, "learning_rate": 7.485007223223556e-06, "loss": 0.8819, "step": 5671 }, { "epoch": 0.5936159079016222, "grad_norm": 1.6696389669150944, "learning_rate": 7.481726601733037e-06, "loss": 0.7529, "step": 5672 }, { "epoch": 0.5937205651491366, "grad_norm": 2.59751100553255, "learning_rate": 7.478446269597239e-06, "loss": 0.7676, "step": 5673 }, { "epoch": 0.5938252223966509, "grad_norm": 1.9990709348617675, "learning_rate": 7.475166227193079e-06, "loss": 0.889, "step": 5674 }, { "epoch": 0.5939298796441653, "grad_norm": 1.910729649122657, "learning_rate": 7.471886474897436e-06, "loss": 0.9689, "step": 5675 }, { "epoch": 0.5940345368916797, "grad_norm": 1.9841915683745623, "learning_rate": 7.468607013087164e-06, "loss": 0.9288, "step": 5676 }, { "epoch": 0.5941391941391941, "grad_norm": 2.0782450749530095, "learning_rate": 7.465327842139074e-06, "loss": 0.8997, "step": 5677 }, { "epoch": 0.5942438513867085, "grad_norm": 2.1382950383338013, "learning_rate": 7.4620489624299576e-06, "loss": 0.9772, "step": 5678 }, { "epoch": 0.594348508634223, "grad_norm": 1.9347499610481047, "learning_rate": 7.458770374336557e-06, "loss": 0.8774, "step": 5679 }, { "epoch": 0.5944531658817374, "grad_norm": 1.943431111602753, "learning_rate": 7.455492078235596e-06, "loss": 0.9336, "step": 5680 }, { "epoch": 0.5945578231292517, "grad_norm": 1.8838788238338073, "learning_rate": 7.452214074503748e-06, "loss": 0.8452, "step": 5681 }, { "epoch": 0.5946624803767661, "grad_norm": 1.9609711265415661, "learning_rate": 7.44893636351767e-06, "loss": 0.91, "step": 5682 }, { "epoch": 0.5947671376242805, "grad_norm": 2.2357068851381894, "learning_rate": 7.445658945653975e-06, "loss": 0.8955, "step": 5683 }, { "epoch": 0.5948717948717949, "grad_norm": 2.129172955119575, "learning_rate": 7.442381821289248e-06, "loss": 0.8215, "step": 5684 }, { "epoch": 0.5949764521193093, "grad_norm": 2.0015511967472412, "learning_rate": 7.439104990800032e-06, "loss": 0.9191, "step": 5685 }, { "epoch": 0.5950811093668237, "grad_norm": 2.0354524088249515, "learning_rate": 7.435828454562846e-06, "loss": 0.8986, "step": 5686 }, { "epoch": 0.5951857666143381, "grad_norm": 2.539924639282485, "learning_rate": 7.432552212954167e-06, "loss": 0.9566, "step": 5687 }, { "epoch": 0.5952904238618524, "grad_norm": 2.6018174099109874, "learning_rate": 7.429276266350445e-06, "loss": 0.8285, "step": 5688 }, { "epoch": 0.5953950811093668, "grad_norm": 2.0533166547957884, "learning_rate": 7.426000615128087e-06, "loss": 0.9661, "step": 5689 }, { "epoch": 0.5954997383568812, "grad_norm": 1.7827839006990294, "learning_rate": 7.422725259663479e-06, "loss": 0.8243, "step": 5690 }, { "epoch": 0.5956043956043956, "grad_norm": 2.2481511288263603, "learning_rate": 7.419450200332965e-06, "loss": 0.9338, "step": 5691 }, { "epoch": 0.59570905285191, "grad_norm": 1.9634491736223318, "learning_rate": 7.416175437512854e-06, "loss": 1.0209, "step": 5692 }, { "epoch": 0.5958137100994244, "grad_norm": 1.9664186395780534, "learning_rate": 7.412900971579422e-06, "loss": 0.8375, "step": 5693 }, { "epoch": 0.5959183673469388, "grad_norm": 2.002056184772738, "learning_rate": 7.4096268029089136e-06, "loss": 0.9765, "step": 5694 }, { "epoch": 0.5960230245944532, "grad_norm": 2.0319582433132224, "learning_rate": 7.406352931877538e-06, "loss": 0.885, "step": 5695 }, { "epoch": 0.5961276818419675, "grad_norm": 1.9584262931491643, "learning_rate": 7.403079358861466e-06, "loss": 1.0275, "step": 5696 }, { "epoch": 0.5962323390894819, "grad_norm": 1.9530535654924333, "learning_rate": 7.399806084236843e-06, "loss": 0.7674, "step": 5697 }, { "epoch": 0.5963369963369963, "grad_norm": 2.022147305775228, "learning_rate": 7.396533108379767e-06, "loss": 0.9294, "step": 5698 }, { "epoch": 0.5964416535845107, "grad_norm": 2.0733270021944303, "learning_rate": 7.393260431666317e-06, "loss": 0.9985, "step": 5699 }, { "epoch": 0.5965463108320251, "grad_norm": 2.057304250473014, "learning_rate": 7.38998805447253e-06, "loss": 1.0389, "step": 5700 }, { "epoch": 0.5966509680795395, "grad_norm": 2.465161068029499, "learning_rate": 7.386715977174406e-06, "loss": 0.9919, "step": 5701 }, { "epoch": 0.596755625327054, "grad_norm": 2.3904950100409454, "learning_rate": 7.383444200147914e-06, "loss": 0.9943, "step": 5702 }, { "epoch": 0.5968602825745682, "grad_norm": 2.083028232439757, "learning_rate": 7.380172723768988e-06, "loss": 0.9033, "step": 5703 }, { "epoch": 0.5969649398220827, "grad_norm": 2.3016492387048535, "learning_rate": 7.376901548413525e-06, "loss": 1.0375, "step": 5704 }, { "epoch": 0.5970695970695971, "grad_norm": 1.8832286691796583, "learning_rate": 7.373630674457393e-06, "loss": 0.8139, "step": 5705 }, { "epoch": 0.5971742543171115, "grad_norm": 2.183575586002926, "learning_rate": 7.370360102276419e-06, "loss": 0.9699, "step": 5706 }, { "epoch": 0.5972789115646259, "grad_norm": 2.067091529022829, "learning_rate": 7.367089832246399e-06, "loss": 0.8456, "step": 5707 }, { "epoch": 0.5973835688121403, "grad_norm": 1.9426102438871136, "learning_rate": 7.363819864743098e-06, "loss": 0.9009, "step": 5708 }, { "epoch": 0.5974882260596547, "grad_norm": 1.990935623039761, "learning_rate": 7.360550200142238e-06, "loss": 0.889, "step": 5709 }, { "epoch": 0.597592883307169, "grad_norm": 1.7709079037998374, "learning_rate": 7.357280838819512e-06, "loss": 0.9062, "step": 5710 }, { "epoch": 0.5976975405546834, "grad_norm": 2.2877813592159004, "learning_rate": 7.354011781150572e-06, "loss": 1.0542, "step": 5711 }, { "epoch": 0.5978021978021978, "grad_norm": 2.020894122061592, "learning_rate": 7.350743027511045e-06, "loss": 0.9956, "step": 5712 }, { "epoch": 0.5979068550497122, "grad_norm": 1.9546500888309954, "learning_rate": 7.3474745782765125e-06, "loss": 1.0172, "step": 5713 }, { "epoch": 0.5980115122972266, "grad_norm": 1.9265892498802972, "learning_rate": 7.3442064338225315e-06, "loss": 0.9464, "step": 5714 }, { "epoch": 0.598116169544741, "grad_norm": 2.2740116881308605, "learning_rate": 7.340938594524611e-06, "loss": 0.935, "step": 5715 }, { "epoch": 0.5982208267922554, "grad_norm": 2.3159963017309275, "learning_rate": 7.337671060758236e-06, "loss": 0.8863, "step": 5716 }, { "epoch": 0.5983254840397697, "grad_norm": 2.1779592238981573, "learning_rate": 7.334403832898856e-06, "loss": 0.92, "step": 5717 }, { "epoch": 0.5984301412872841, "grad_norm": 2.3970607095582284, "learning_rate": 7.331136911321879e-06, "loss": 0.9618, "step": 5718 }, { "epoch": 0.5985347985347985, "grad_norm": 1.7681357135220832, "learning_rate": 7.327870296402682e-06, "loss": 0.8879, "step": 5719 }, { "epoch": 0.5986394557823129, "grad_norm": 1.8481004018316487, "learning_rate": 7.324603988516605e-06, "loss": 0.9292, "step": 5720 }, { "epoch": 0.5987441130298273, "grad_norm": 2.0081982515082624, "learning_rate": 7.321337988038952e-06, "loss": 0.831, "step": 5721 }, { "epoch": 0.5988487702773417, "grad_norm": 2.053737396236648, "learning_rate": 7.318072295344996e-06, "loss": 0.9926, "step": 5722 }, { "epoch": 0.5989534275248561, "grad_norm": 1.932122093359507, "learning_rate": 7.314806910809968e-06, "loss": 0.8565, "step": 5723 }, { "epoch": 0.5990580847723704, "grad_norm": 2.112245623049056, "learning_rate": 7.311541834809066e-06, "loss": 0.9997, "step": 5724 }, { "epoch": 0.5991627420198848, "grad_norm": 2.2611995197979664, "learning_rate": 7.308277067717464e-06, "loss": 0.9893, "step": 5725 }, { "epoch": 0.5992673992673992, "grad_norm": 2.1706750569520508, "learning_rate": 7.30501260991028e-06, "loss": 0.902, "step": 5726 }, { "epoch": 0.5993720565149137, "grad_norm": 2.0515848466057873, "learning_rate": 7.301748461762612e-06, "loss": 0.918, "step": 5727 }, { "epoch": 0.5994767137624281, "grad_norm": 2.0173947607217504, "learning_rate": 7.298484623649515e-06, "loss": 0.8884, "step": 5728 }, { "epoch": 0.5995813710099425, "grad_norm": 2.538071049144709, "learning_rate": 7.295221095946012e-06, "loss": 0.9454, "step": 5729 }, { "epoch": 0.5996860282574569, "grad_norm": 1.9533801934375317, "learning_rate": 7.291957879027087e-06, "loss": 0.9301, "step": 5730 }, { "epoch": 0.5997906855049712, "grad_norm": 2.25854110503167, "learning_rate": 7.288694973267693e-06, "loss": 1.059, "step": 5731 }, { "epoch": 0.5998953427524856, "grad_norm": 1.9262169394873385, "learning_rate": 7.2854323790427425e-06, "loss": 0.8526, "step": 5732 }, { "epoch": 0.6, "grad_norm": 2.0501402686145234, "learning_rate": 7.28217009672711e-06, "loss": 0.8932, "step": 5733 }, { "epoch": 0.6001046572475144, "grad_norm": 1.990109963355697, "learning_rate": 7.2789081266956495e-06, "loss": 0.9841, "step": 5734 }, { "epoch": 0.6002093144950288, "grad_norm": 1.9142264065765036, "learning_rate": 7.2756464693231585e-06, "loss": 0.9663, "step": 5735 }, { "epoch": 0.6003139717425432, "grad_norm": 2.1083186271489995, "learning_rate": 7.272385124984414e-06, "loss": 0.9541, "step": 5736 }, { "epoch": 0.6004186289900576, "grad_norm": 2.438641624027379, "learning_rate": 7.269124094054144e-06, "loss": 0.9692, "step": 5737 }, { "epoch": 0.600523286237572, "grad_norm": 1.7085030156404912, "learning_rate": 7.265863376907054e-06, "loss": 0.8373, "step": 5738 }, { "epoch": 0.6006279434850863, "grad_norm": 2.175525308596229, "learning_rate": 7.262602973917807e-06, "loss": 0.8631, "step": 5739 }, { "epoch": 0.6007326007326007, "grad_norm": 2.4383259594069577, "learning_rate": 7.259342885461023e-06, "loss": 0.9384, "step": 5740 }, { "epoch": 0.6008372579801151, "grad_norm": 2.3558493908846363, "learning_rate": 7.256083111911301e-06, "loss": 0.8584, "step": 5741 }, { "epoch": 0.6009419152276295, "grad_norm": 1.8065835519696472, "learning_rate": 7.252823653643186e-06, "loss": 0.938, "step": 5742 }, { "epoch": 0.6010465724751439, "grad_norm": 2.2545538260720392, "learning_rate": 7.249564511031206e-06, "loss": 0.9175, "step": 5743 }, { "epoch": 0.6011512297226583, "grad_norm": 1.8867293237713263, "learning_rate": 7.246305684449842e-06, "loss": 0.8941, "step": 5744 }, { "epoch": 0.6012558869701727, "grad_norm": 2.091930423181412, "learning_rate": 7.243047174273534e-06, "loss": 1.0297, "step": 5745 }, { "epoch": 0.601360544217687, "grad_norm": 1.9561529165386784, "learning_rate": 7.239788980876696e-06, "loss": 0.8983, "step": 5746 }, { "epoch": 0.6014652014652014, "grad_norm": 2.088099544372334, "learning_rate": 7.236531104633699e-06, "loss": 0.9198, "step": 5747 }, { "epoch": 0.6015698587127158, "grad_norm": 2.1188876407664066, "learning_rate": 7.233273545918882e-06, "loss": 0.8943, "step": 5748 }, { "epoch": 0.6016745159602302, "grad_norm": 2.402914355301177, "learning_rate": 7.230016305106539e-06, "loss": 0.9966, "step": 5749 }, { "epoch": 0.6017791732077447, "grad_norm": 2.157789260501364, "learning_rate": 7.226759382570938e-06, "loss": 1.0866, "step": 5750 }, { "epoch": 0.6018838304552591, "grad_norm": 2.4322710098817746, "learning_rate": 7.223502778686311e-06, "loss": 0.9455, "step": 5751 }, { "epoch": 0.6019884877027735, "grad_norm": 2.104737383056769, "learning_rate": 7.220246493826839e-06, "loss": 0.8708, "step": 5752 }, { "epoch": 0.6020931449502878, "grad_norm": 1.9327478109090237, "learning_rate": 7.216990528366683e-06, "loss": 0.8164, "step": 5753 }, { "epoch": 0.6021978021978022, "grad_norm": 1.9784215425645908, "learning_rate": 7.213734882679955e-06, "loss": 0.8658, "step": 5754 }, { "epoch": 0.6023024594453166, "grad_norm": 2.0937291019563755, "learning_rate": 7.21047955714074e-06, "loss": 0.9194, "step": 5755 }, { "epoch": 0.602407116692831, "grad_norm": 2.2283678360274823, "learning_rate": 7.207224552123075e-06, "loss": 1.0105, "step": 5756 }, { "epoch": 0.6025117739403454, "grad_norm": 2.086487895813268, "learning_rate": 7.203969868000975e-06, "loss": 0.9044, "step": 5757 }, { "epoch": 0.6026164311878598, "grad_norm": 2.0822701283652316, "learning_rate": 7.200715505148404e-06, "loss": 0.9292, "step": 5758 }, { "epoch": 0.6027210884353742, "grad_norm": 2.51397980592967, "learning_rate": 7.1974614639392915e-06, "loss": 1.006, "step": 5759 }, { "epoch": 0.6028257456828885, "grad_norm": 2.3703132970824274, "learning_rate": 7.194207744747541e-06, "loss": 0.907, "step": 5760 }, { "epoch": 0.6029304029304029, "grad_norm": 2.063722563737003, "learning_rate": 7.19095434794701e-06, "loss": 0.8732, "step": 5761 }, { "epoch": 0.6030350601779173, "grad_norm": 2.262710366424598, "learning_rate": 7.187701273911518e-06, "loss": 0.9704, "step": 5762 }, { "epoch": 0.6031397174254317, "grad_norm": 2.2761471236819615, "learning_rate": 7.184448523014851e-06, "loss": 0.9213, "step": 5763 }, { "epoch": 0.6032443746729461, "grad_norm": 2.252681155905622, "learning_rate": 7.181196095630756e-06, "loss": 1.0017, "step": 5764 }, { "epoch": 0.6033490319204605, "grad_norm": 2.218841967236834, "learning_rate": 7.177943992132945e-06, "loss": 1.0078, "step": 5765 }, { "epoch": 0.6034536891679749, "grad_norm": 2.371167922078118, "learning_rate": 7.174692212895089e-06, "loss": 1.0293, "step": 5766 }, { "epoch": 0.6035583464154892, "grad_norm": 2.0568882242914177, "learning_rate": 7.1714407582908265e-06, "loss": 0.8454, "step": 5767 }, { "epoch": 0.6036630036630036, "grad_norm": 1.691696703496195, "learning_rate": 7.168189628693752e-06, "loss": 0.8477, "step": 5768 }, { "epoch": 0.603767660910518, "grad_norm": 1.8979982946907608, "learning_rate": 7.16493882447743e-06, "loss": 0.9209, "step": 5769 }, { "epoch": 0.6038723181580324, "grad_norm": 2.5519832703990266, "learning_rate": 7.1616883460153875e-06, "loss": 0.8273, "step": 5770 }, { "epoch": 0.6039769754055468, "grad_norm": 2.0415668920164434, "learning_rate": 7.158438193681107e-06, "loss": 0.861, "step": 5771 }, { "epoch": 0.6040816326530613, "grad_norm": 2.0190515557119846, "learning_rate": 7.155188367848038e-06, "loss": 0.8852, "step": 5772 }, { "epoch": 0.6041862899005757, "grad_norm": 2.018878210032007, "learning_rate": 7.151938868889593e-06, "loss": 0.9771, "step": 5773 }, { "epoch": 0.60429094714809, "grad_norm": 2.3049004074694337, "learning_rate": 7.148689697179147e-06, "loss": 0.9558, "step": 5774 }, { "epoch": 0.6043956043956044, "grad_norm": 1.9496529744525466, "learning_rate": 7.145440853090033e-06, "loss": 0.947, "step": 5775 }, { "epoch": 0.6045002616431188, "grad_norm": 1.7724900507895518, "learning_rate": 7.1421923369955485e-06, "loss": 0.8816, "step": 5776 }, { "epoch": 0.6046049188906332, "grad_norm": 2.3142724481991936, "learning_rate": 7.138944149268963e-06, "loss": 0.9659, "step": 5777 }, { "epoch": 0.6047095761381476, "grad_norm": 2.1236464851755743, "learning_rate": 7.1356962902834935e-06, "loss": 0.9831, "step": 5778 }, { "epoch": 0.604814233385662, "grad_norm": 2.2271103729867083, "learning_rate": 7.132448760412326e-06, "loss": 1.0104, "step": 5779 }, { "epoch": 0.6049188906331764, "grad_norm": 2.0033029015172774, "learning_rate": 7.129201560028611e-06, "loss": 0.9995, "step": 5780 }, { "epoch": 0.6050235478806908, "grad_norm": 2.5095302090496037, "learning_rate": 7.1259546895054545e-06, "loss": 0.979, "step": 5781 }, { "epoch": 0.6051282051282051, "grad_norm": 1.8438079575156054, "learning_rate": 7.122708149215931e-06, "loss": 0.989, "step": 5782 }, { "epoch": 0.6052328623757195, "grad_norm": 1.7506092086261658, "learning_rate": 7.1194619395330746e-06, "loss": 0.9699, "step": 5783 }, { "epoch": 0.6053375196232339, "grad_norm": 1.8710351375548784, "learning_rate": 7.116216060829881e-06, "loss": 0.7888, "step": 5784 }, { "epoch": 0.6054421768707483, "grad_norm": 2.253637141321553, "learning_rate": 7.112970513479303e-06, "loss": 1.0412, "step": 5785 }, { "epoch": 0.6055468341182627, "grad_norm": 2.1113622507792926, "learning_rate": 7.10972529785427e-06, "loss": 0.9001, "step": 5786 }, { "epoch": 0.6056514913657771, "grad_norm": 2.116957635415604, "learning_rate": 7.106480414327661e-06, "loss": 0.9627, "step": 5787 }, { "epoch": 0.6057561486132915, "grad_norm": 1.9655834383321291, "learning_rate": 7.103235863272315e-06, "loss": 0.9217, "step": 5788 }, { "epoch": 0.6058608058608058, "grad_norm": 1.9253182713618326, "learning_rate": 7.099991645061044e-06, "loss": 0.9115, "step": 5789 }, { "epoch": 0.6059654631083202, "grad_norm": 1.9112762675976478, "learning_rate": 7.096747760066609e-06, "loss": 0.9361, "step": 5790 }, { "epoch": 0.6060701203558346, "grad_norm": 1.9544948764185148, "learning_rate": 7.0935042086617444e-06, "loss": 0.8774, "step": 5791 }, { "epoch": 0.606174777603349, "grad_norm": 2.1298758076771387, "learning_rate": 7.090260991219135e-06, "loss": 0.9291, "step": 5792 }, { "epoch": 0.6062794348508634, "grad_norm": 1.8407570735350673, "learning_rate": 7.087018108111441e-06, "loss": 0.7909, "step": 5793 }, { "epoch": 0.6063840920983778, "grad_norm": 2.0086101467681123, "learning_rate": 7.083775559711265e-06, "loss": 0.8417, "step": 5794 }, { "epoch": 0.6064887493458923, "grad_norm": 2.1799980186333197, "learning_rate": 7.080533346391192e-06, "loss": 0.9765, "step": 5795 }, { "epoch": 0.6065934065934065, "grad_norm": 2.045424197962744, "learning_rate": 7.0772914685237585e-06, "loss": 0.9536, "step": 5796 }, { "epoch": 0.606698063840921, "grad_norm": 1.6592809893413245, "learning_rate": 7.074049926481459e-06, "loss": 0.846, "step": 5797 }, { "epoch": 0.6068027210884354, "grad_norm": 2.6554474976351625, "learning_rate": 7.070808720636754e-06, "loss": 0.9715, "step": 5798 }, { "epoch": 0.6069073783359498, "grad_norm": 2.0940703129068563, "learning_rate": 7.067567851362066e-06, "loss": 0.9867, "step": 5799 }, { "epoch": 0.6070120355834642, "grad_norm": 2.1226712056254584, "learning_rate": 7.064327319029775e-06, "loss": 0.9931, "step": 5800 }, { "epoch": 0.6071166928309786, "grad_norm": 1.7565753791576322, "learning_rate": 7.061087124012227e-06, "loss": 0.8574, "step": 5801 }, { "epoch": 0.607221350078493, "grad_norm": 2.5046485756656036, "learning_rate": 7.057847266681723e-06, "loss": 0.9254, "step": 5802 }, { "epoch": 0.6073260073260073, "grad_norm": 2.099170045438125, "learning_rate": 7.054607747410535e-06, "loss": 0.9769, "step": 5803 }, { "epoch": 0.6074306645735217, "grad_norm": 2.422351961670763, "learning_rate": 7.05136856657089e-06, "loss": 0.9626, "step": 5804 }, { "epoch": 0.6075353218210361, "grad_norm": 1.934073799795149, "learning_rate": 7.048129724534971e-06, "loss": 0.9296, "step": 5805 }, { "epoch": 0.6076399790685505, "grad_norm": 1.9490333500220711, "learning_rate": 7.044891221674934e-06, "loss": 0.8619, "step": 5806 }, { "epoch": 0.6077446363160649, "grad_norm": 2.0467303551021256, "learning_rate": 7.041653058362883e-06, "loss": 0.8767, "step": 5807 }, { "epoch": 0.6078492935635793, "grad_norm": 2.1579085714167805, "learning_rate": 7.038415234970896e-06, "loss": 0.8894, "step": 5808 }, { "epoch": 0.6079539508110937, "grad_norm": 1.94349634206974, "learning_rate": 7.035177751870999e-06, "loss": 0.9374, "step": 5809 }, { "epoch": 0.608058608058608, "grad_norm": 2.0657703345239278, "learning_rate": 7.031940609435192e-06, "loss": 0.8923, "step": 5810 }, { "epoch": 0.6081632653061224, "grad_norm": 2.070527663098741, "learning_rate": 7.028703808035421e-06, "loss": 0.8972, "step": 5811 }, { "epoch": 0.6082679225536368, "grad_norm": 1.9175580806828474, "learning_rate": 7.025467348043608e-06, "loss": 0.947, "step": 5812 }, { "epoch": 0.6083725798011512, "grad_norm": 2.106923878326725, "learning_rate": 7.022231229831629e-06, "loss": 0.9325, "step": 5813 }, { "epoch": 0.6084772370486656, "grad_norm": 1.9730559307739923, "learning_rate": 7.018995453771318e-06, "loss": 0.8576, "step": 5814 }, { "epoch": 0.60858189429618, "grad_norm": 2.0508153945924, "learning_rate": 7.01576002023447e-06, "loss": 0.8826, "step": 5815 }, { "epoch": 0.6086865515436944, "grad_norm": 2.001780253253112, "learning_rate": 7.012524929592848e-06, "loss": 0.8283, "step": 5816 }, { "epoch": 0.6087912087912087, "grad_norm": 1.8973036803507934, "learning_rate": 7.009290182218166e-06, "loss": 0.9091, "step": 5817 }, { "epoch": 0.6088958660387231, "grad_norm": 1.9406056643325182, "learning_rate": 7.006055778482106e-06, "loss": 0.8443, "step": 5818 }, { "epoch": 0.6090005232862375, "grad_norm": 2.111807902749534, "learning_rate": 7.002821718756305e-06, "loss": 0.8516, "step": 5819 }, { "epoch": 0.609105180533752, "grad_norm": 2.0277118642001994, "learning_rate": 6.999588003412363e-06, "loss": 0.8915, "step": 5820 }, { "epoch": 0.6092098377812664, "grad_norm": 1.8707711600366936, "learning_rate": 6.996354632821845e-06, "loss": 0.9782, "step": 5821 }, { "epoch": 0.6093144950287808, "grad_norm": 1.826421423539411, "learning_rate": 6.993121607356267e-06, "loss": 0.8156, "step": 5822 }, { "epoch": 0.6094191522762952, "grad_norm": 1.8648721842207836, "learning_rate": 6.989888927387114e-06, "loss": 0.8111, "step": 5823 }, { "epoch": 0.6095238095238096, "grad_norm": 1.9578433557880752, "learning_rate": 6.986656593285824e-06, "loss": 0.7663, "step": 5824 }, { "epoch": 0.6096284667713239, "grad_norm": 2.3788854431050046, "learning_rate": 6.983424605423801e-06, "loss": 0.8084, "step": 5825 }, { "epoch": 0.6097331240188383, "grad_norm": 1.9702807385169419, "learning_rate": 6.980192964172404e-06, "loss": 0.9583, "step": 5826 }, { "epoch": 0.6098377812663527, "grad_norm": 2.1748718305550687, "learning_rate": 6.97696166990296e-06, "loss": 0.9703, "step": 5827 }, { "epoch": 0.6099424385138671, "grad_norm": 2.2580551673376763, "learning_rate": 6.9737307229867435e-06, "loss": 0.804, "step": 5828 }, { "epoch": 0.6100470957613815, "grad_norm": 2.2895933354446316, "learning_rate": 6.970500123795004e-06, "loss": 0.9405, "step": 5829 }, { "epoch": 0.6101517530088959, "grad_norm": 2.0595158690924906, "learning_rate": 6.9672698726989455e-06, "loss": 0.8, "step": 5830 }, { "epoch": 0.6102564102564103, "grad_norm": 2.3271064177398637, "learning_rate": 6.964039970069722e-06, "loss": 0.9542, "step": 5831 }, { "epoch": 0.6103610675039246, "grad_norm": 1.7796600145148627, "learning_rate": 6.9608104162784655e-06, "loss": 0.9259, "step": 5832 }, { "epoch": 0.610465724751439, "grad_norm": 1.973121849768075, "learning_rate": 6.957581211696251e-06, "loss": 0.878, "step": 5833 }, { "epoch": 0.6105703819989534, "grad_norm": 1.9358767168769708, "learning_rate": 6.954352356694121e-06, "loss": 0.9568, "step": 5834 }, { "epoch": 0.6106750392464678, "grad_norm": 2.2637017462136377, "learning_rate": 6.9511238516430814e-06, "loss": 1.0218, "step": 5835 }, { "epoch": 0.6107796964939822, "grad_norm": 1.679462688094085, "learning_rate": 6.947895696914091e-06, "loss": 0.7966, "step": 5836 }, { "epoch": 0.6108843537414966, "grad_norm": 2.1125768150832873, "learning_rate": 6.944667892878069e-06, "loss": 0.7789, "step": 5837 }, { "epoch": 0.610989010989011, "grad_norm": 2.1110418210605633, "learning_rate": 6.941440439905904e-06, "loss": 0.9139, "step": 5838 }, { "epoch": 0.6110936682365253, "grad_norm": 2.4312531473703736, "learning_rate": 6.938213338368431e-06, "loss": 0.9509, "step": 5839 }, { "epoch": 0.6111983254840397, "grad_norm": 2.215490332569269, "learning_rate": 6.934986588636455e-06, "loss": 0.8285, "step": 5840 }, { "epoch": 0.6113029827315541, "grad_norm": 2.028628090171237, "learning_rate": 6.931760191080732e-06, "loss": 0.8616, "step": 5841 }, { "epoch": 0.6114076399790686, "grad_norm": 2.1117389186178945, "learning_rate": 6.928534146071983e-06, "loss": 0.9494, "step": 5842 }, { "epoch": 0.611512297226583, "grad_norm": 1.8477902899995793, "learning_rate": 6.925308453980887e-06, "loss": 0.8444, "step": 5843 }, { "epoch": 0.6116169544740974, "grad_norm": 2.2634245815200615, "learning_rate": 6.9220831151780845e-06, "loss": 0.8982, "step": 5844 }, { "epoch": 0.6117216117216118, "grad_norm": 1.8300503492297004, "learning_rate": 6.918858130034167e-06, "loss": 0.9627, "step": 5845 }, { "epoch": 0.6118262689691261, "grad_norm": 2.0217643390425013, "learning_rate": 6.915633498919698e-06, "loss": 0.8742, "step": 5846 }, { "epoch": 0.6119309262166405, "grad_norm": 2.1967587863707707, "learning_rate": 6.9124092222051945e-06, "loss": 0.9867, "step": 5847 }, { "epoch": 0.6120355834641549, "grad_norm": 1.8375475239699013, "learning_rate": 6.9091853002611296e-06, "loss": 0.897, "step": 5848 }, { "epoch": 0.6121402407116693, "grad_norm": 2.1183156451380487, "learning_rate": 6.905961733457941e-06, "loss": 0.8377, "step": 5849 }, { "epoch": 0.6122448979591837, "grad_norm": 1.9783448138880826, "learning_rate": 6.902738522166019e-06, "loss": 0.9428, "step": 5850 }, { "epoch": 0.6123495552066981, "grad_norm": 2.21998303732816, "learning_rate": 6.899515666755722e-06, "loss": 0.909, "step": 5851 }, { "epoch": 0.6124542124542125, "grad_norm": 2.3831003494991063, "learning_rate": 6.896293167597361e-06, "loss": 0.9709, "step": 5852 }, { "epoch": 0.6125588697017268, "grad_norm": 2.0207894472977608, "learning_rate": 6.8930710250612044e-06, "loss": 0.9789, "step": 5853 }, { "epoch": 0.6126635269492412, "grad_norm": 1.9009358397826535, "learning_rate": 6.889849239517483e-06, "loss": 0.8241, "step": 5854 }, { "epoch": 0.6127681841967556, "grad_norm": 1.8698070433483918, "learning_rate": 6.886627811336393e-06, "loss": 0.8008, "step": 5855 }, { "epoch": 0.61287284144427, "grad_norm": 2.005435434602962, "learning_rate": 6.883406740888077e-06, "loss": 0.9513, "step": 5856 }, { "epoch": 0.6129774986917844, "grad_norm": 1.8480823682591345, "learning_rate": 6.880186028542647e-06, "loss": 0.8963, "step": 5857 }, { "epoch": 0.6130821559392988, "grad_norm": 1.8958475524506964, "learning_rate": 6.876965674670164e-06, "loss": 0.9899, "step": 5858 }, { "epoch": 0.6131868131868132, "grad_norm": 2.2557462857033186, "learning_rate": 6.8737456796406594e-06, "loss": 0.9383, "step": 5859 }, { "epoch": 0.6132914704343275, "grad_norm": 1.8928132503094905, "learning_rate": 6.870526043824111e-06, "loss": 0.8379, "step": 5860 }, { "epoch": 0.6133961276818419, "grad_norm": 1.938497040014973, "learning_rate": 6.867306767590466e-06, "loss": 1.0145, "step": 5861 }, { "epoch": 0.6135007849293563, "grad_norm": 1.8779981359486946, "learning_rate": 6.864087851309622e-06, "loss": 0.9063, "step": 5862 }, { "epoch": 0.6136054421768707, "grad_norm": 1.834130321006479, "learning_rate": 6.860869295351439e-06, "loss": 0.8479, "step": 5863 }, { "epoch": 0.6137100994243851, "grad_norm": 2.225164974831015, "learning_rate": 6.857651100085742e-06, "loss": 1.0329, "step": 5864 }, { "epoch": 0.6138147566718996, "grad_norm": 2.2962326966980067, "learning_rate": 6.854433265882301e-06, "loss": 0.9488, "step": 5865 }, { "epoch": 0.613919413919414, "grad_norm": 1.9342617679397862, "learning_rate": 6.851215793110857e-06, "loss": 0.9494, "step": 5866 }, { "epoch": 0.6140240711669284, "grad_norm": 2.052176188364496, "learning_rate": 6.8479986821411e-06, "loss": 0.9748, "step": 5867 }, { "epoch": 0.6141287284144427, "grad_norm": 2.1836996140380402, "learning_rate": 6.844781933342687e-06, "loss": 0.9203, "step": 5868 }, { "epoch": 0.6142333856619571, "grad_norm": 2.1846424471266843, "learning_rate": 6.841565547085223e-06, "loss": 1.0104, "step": 5869 }, { "epoch": 0.6143380429094715, "grad_norm": 2.063761945658502, "learning_rate": 6.838349523738283e-06, "loss": 1.0052, "step": 5870 }, { "epoch": 0.6144427001569859, "grad_norm": 2.345754619202839, "learning_rate": 6.83513386367139e-06, "loss": 0.9294, "step": 5871 }, { "epoch": 0.6145473574045003, "grad_norm": 1.8945517389181037, "learning_rate": 6.83191856725403e-06, "loss": 0.8727, "step": 5872 }, { "epoch": 0.6146520146520147, "grad_norm": 2.042059497052054, "learning_rate": 6.828703634855651e-06, "loss": 0.8322, "step": 5873 }, { "epoch": 0.6147566718995291, "grad_norm": 2.1770922500505354, "learning_rate": 6.825489066845654e-06, "loss": 0.8193, "step": 5874 }, { "epoch": 0.6148613291470434, "grad_norm": 2.3497422750449126, "learning_rate": 6.822274863593399e-06, "loss": 0.9328, "step": 5875 }, { "epoch": 0.6149659863945578, "grad_norm": 2.176112444967684, "learning_rate": 6.8190610254682034e-06, "loss": 0.9731, "step": 5876 }, { "epoch": 0.6150706436420722, "grad_norm": 1.918343615201989, "learning_rate": 6.815847552839343e-06, "loss": 0.8911, "step": 5877 }, { "epoch": 0.6151753008895866, "grad_norm": 2.01864048793102, "learning_rate": 6.812634446076056e-06, "loss": 1.0051, "step": 5878 }, { "epoch": 0.615279958137101, "grad_norm": 2.1005849771125953, "learning_rate": 6.809421705547532e-06, "loss": 1.0042, "step": 5879 }, { "epoch": 0.6153846153846154, "grad_norm": 2.1391206561805016, "learning_rate": 6.806209331622919e-06, "loss": 0.8773, "step": 5880 }, { "epoch": 0.6154892726321298, "grad_norm": 2.1522010281571227, "learning_rate": 6.802997324671332e-06, "loss": 0.7671, "step": 5881 }, { "epoch": 0.6155939298796441, "grad_norm": 1.888842162645125, "learning_rate": 6.799785685061832e-06, "loss": 0.8971, "step": 5882 }, { "epoch": 0.6156985871271585, "grad_norm": 2.2616843959962356, "learning_rate": 6.796574413163446e-06, "loss": 0.7999, "step": 5883 }, { "epoch": 0.6158032443746729, "grad_norm": 2.2496513510327247, "learning_rate": 6.793363509345151e-06, "loss": 1.0051, "step": 5884 }, { "epoch": 0.6159079016221873, "grad_norm": 2.0525978260362687, "learning_rate": 6.790152973975892e-06, "loss": 0.9044, "step": 5885 }, { "epoch": 0.6160125588697017, "grad_norm": 2.01635783084318, "learning_rate": 6.786942807424561e-06, "loss": 1.0252, "step": 5886 }, { "epoch": 0.6161172161172161, "grad_norm": 2.153628252992972, "learning_rate": 6.783733010060018e-06, "loss": 0.8818, "step": 5887 }, { "epoch": 0.6162218733647306, "grad_norm": 1.9635936104933305, "learning_rate": 6.78052358225107e-06, "loss": 0.9493, "step": 5888 }, { "epoch": 0.6163265306122448, "grad_norm": 2.0898861954151733, "learning_rate": 6.777314524366486e-06, "loss": 0.89, "step": 5889 }, { "epoch": 0.6164311878597593, "grad_norm": 1.9717149097514748, "learning_rate": 6.774105836775e-06, "loss": 0.8447, "step": 5890 }, { "epoch": 0.6165358451072737, "grad_norm": 1.956228723574464, "learning_rate": 6.770897519845294e-06, "loss": 0.8423, "step": 5891 }, { "epoch": 0.6166405023547881, "grad_norm": 1.9423670450685322, "learning_rate": 6.767689573946008e-06, "loss": 0.9756, "step": 5892 }, { "epoch": 0.6167451596023025, "grad_norm": 2.0612156081235313, "learning_rate": 6.7644819994457424e-06, "loss": 0.8537, "step": 5893 }, { "epoch": 0.6168498168498169, "grad_norm": 2.134024125429241, "learning_rate": 6.761274796713053e-06, "loss": 0.8747, "step": 5894 }, { "epoch": 0.6169544740973313, "grad_norm": 2.1094468946194276, "learning_rate": 6.7580679661164575e-06, "loss": 0.9272, "step": 5895 }, { "epoch": 0.6170591313448456, "grad_norm": 1.96616440967844, "learning_rate": 6.754861508024422e-06, "loss": 1.0121, "step": 5896 }, { "epoch": 0.61716378859236, "grad_norm": 2.2360078591987667, "learning_rate": 6.751655422805381e-06, "loss": 0.9208, "step": 5897 }, { "epoch": 0.6172684458398744, "grad_norm": 1.9125102631579798, "learning_rate": 6.748449710827711e-06, "loss": 0.9544, "step": 5898 }, { "epoch": 0.6173731030873888, "grad_norm": 1.9824080317071273, "learning_rate": 6.745244372459765e-06, "loss": 0.9728, "step": 5899 }, { "epoch": 0.6174777603349032, "grad_norm": 2.128988283347369, "learning_rate": 6.7420394080698385e-06, "loss": 0.8864, "step": 5900 }, { "epoch": 0.6175824175824176, "grad_norm": 1.8967381718005982, "learning_rate": 6.738834818026187e-06, "loss": 0.8736, "step": 5901 }, { "epoch": 0.617687074829932, "grad_norm": 2.013465087057385, "learning_rate": 6.7356306026970295e-06, "loss": 1.0341, "step": 5902 }, { "epoch": 0.6177917320774463, "grad_norm": 2.129706926698933, "learning_rate": 6.7324267624505305e-06, "loss": 0.8628, "step": 5903 }, { "epoch": 0.6178963893249607, "grad_norm": 1.8225296515950085, "learning_rate": 6.729223297654823e-06, "loss": 0.8744, "step": 5904 }, { "epoch": 0.6180010465724751, "grad_norm": 2.171135955598889, "learning_rate": 6.726020208677986e-06, "loss": 0.9598, "step": 5905 }, { "epoch": 0.6181057038199895, "grad_norm": 1.9615288782179878, "learning_rate": 6.722817495888063e-06, "loss": 0.8919, "step": 5906 }, { "epoch": 0.6182103610675039, "grad_norm": 2.3348478756088964, "learning_rate": 6.719615159653057e-06, "loss": 1.0335, "step": 5907 }, { "epoch": 0.6183150183150183, "grad_norm": 1.8529500238451388, "learning_rate": 6.716413200340917e-06, "loss": 0.9656, "step": 5908 }, { "epoch": 0.6184196755625327, "grad_norm": 2.101263167248366, "learning_rate": 6.713211618319559e-06, "loss": 0.8609, "step": 5909 }, { "epoch": 0.6185243328100472, "grad_norm": 2.082342140819496, "learning_rate": 6.710010413956849e-06, "loss": 0.9781, "step": 5910 }, { "epoch": 0.6186289900575614, "grad_norm": 2.0995332207635595, "learning_rate": 6.706809587620611e-06, "loss": 0.9303, "step": 5911 }, { "epoch": 0.6187336473050759, "grad_norm": 1.8736139083894665, "learning_rate": 6.703609139678628e-06, "loss": 0.7238, "step": 5912 }, { "epoch": 0.6188383045525903, "grad_norm": 2.02936203552506, "learning_rate": 6.700409070498636e-06, "loss": 0.8549, "step": 5913 }, { "epoch": 0.6189429618001047, "grad_norm": 2.1028471786177434, "learning_rate": 6.697209380448333e-06, "loss": 0.835, "step": 5914 }, { "epoch": 0.6190476190476191, "grad_norm": 2.220094548261755, "learning_rate": 6.694010069895363e-06, "loss": 0.8769, "step": 5915 }, { "epoch": 0.6191522762951335, "grad_norm": 1.7219940474883257, "learning_rate": 6.6908111392073395e-06, "loss": 0.7758, "step": 5916 }, { "epoch": 0.6192569335426479, "grad_norm": 2.0998250638549525, "learning_rate": 6.687612588751827e-06, "loss": 0.9183, "step": 5917 }, { "epoch": 0.6193615907901622, "grad_norm": 2.0505701485922643, "learning_rate": 6.684414418896341e-06, "loss": 0.8306, "step": 5918 }, { "epoch": 0.6194662480376766, "grad_norm": 2.2514903555360717, "learning_rate": 6.681216630008363e-06, "loss": 1.044, "step": 5919 }, { "epoch": 0.619570905285191, "grad_norm": 1.7740449558139142, "learning_rate": 6.678019222455318e-06, "loss": 0.9587, "step": 5920 }, { "epoch": 0.6196755625327054, "grad_norm": 1.9435874998522282, "learning_rate": 6.674822196604601e-06, "loss": 0.8681, "step": 5921 }, { "epoch": 0.6197802197802198, "grad_norm": 2.136025694684406, "learning_rate": 6.671625552823553e-06, "loss": 0.9398, "step": 5922 }, { "epoch": 0.6198848770277342, "grad_norm": 1.923864776761002, "learning_rate": 6.668429291479479e-06, "loss": 0.9224, "step": 5923 }, { "epoch": 0.6199895342752486, "grad_norm": 2.163924861536042, "learning_rate": 6.665233412939628e-06, "loss": 0.8994, "step": 5924 }, { "epoch": 0.6200941915227629, "grad_norm": 2.4204765320256647, "learning_rate": 6.6620379175712205e-06, "loss": 1.0021, "step": 5925 }, { "epoch": 0.6201988487702773, "grad_norm": 2.06754965058645, "learning_rate": 6.6588428057414256e-06, "loss": 0.9148, "step": 5926 }, { "epoch": 0.6203035060177917, "grad_norm": 2.337630297175959, "learning_rate": 6.655648077817362e-06, "loss": 0.9678, "step": 5927 }, { "epoch": 0.6204081632653061, "grad_norm": 2.040465726503583, "learning_rate": 6.652453734166118e-06, "loss": 0.9536, "step": 5928 }, { "epoch": 0.6205128205128205, "grad_norm": 2.092903427609399, "learning_rate": 6.649259775154725e-06, "loss": 0.9262, "step": 5929 }, { "epoch": 0.6206174777603349, "grad_norm": 1.9715070313368872, "learning_rate": 6.646066201150174e-06, "loss": 1.048, "step": 5930 }, { "epoch": 0.6207221350078493, "grad_norm": 1.8718505714062148, "learning_rate": 6.6428730125194196e-06, "loss": 0.9048, "step": 5931 }, { "epoch": 0.6208267922553636, "grad_norm": 1.8924153254665501, "learning_rate": 6.6396802096293555e-06, "loss": 0.951, "step": 5932 }, { "epoch": 0.620931449502878, "grad_norm": 2.242893217335704, "learning_rate": 6.6364877928468505e-06, "loss": 0.9777, "step": 5933 }, { "epoch": 0.6210361067503924, "grad_norm": 2.0854393025410247, "learning_rate": 6.633295762538718e-06, "loss": 0.9969, "step": 5934 }, { "epoch": 0.6211407639979069, "grad_norm": 1.8436423025912454, "learning_rate": 6.6301041190717255e-06, "loss": 0.964, "step": 5935 }, { "epoch": 0.6212454212454213, "grad_norm": 1.8863362444224359, "learning_rate": 6.626912862812605e-06, "loss": 0.8836, "step": 5936 }, { "epoch": 0.6213500784929357, "grad_norm": 2.262054897692573, "learning_rate": 6.62372199412803e-06, "loss": 0.8957, "step": 5937 }, { "epoch": 0.6214547357404501, "grad_norm": 2.201402572775819, "learning_rate": 6.620531513384645e-06, "loss": 0.9112, "step": 5938 }, { "epoch": 0.6215593929879644, "grad_norm": 2.2199717929386664, "learning_rate": 6.6173414209490395e-06, "loss": 1.1, "step": 5939 }, { "epoch": 0.6216640502354788, "grad_norm": 2.3970536946249505, "learning_rate": 6.614151717187762e-06, "loss": 0.935, "step": 5940 }, { "epoch": 0.6217687074829932, "grad_norm": 1.9767721037140733, "learning_rate": 6.610962402467314e-06, "loss": 0.9553, "step": 5941 }, { "epoch": 0.6218733647305076, "grad_norm": 2.205710848436116, "learning_rate": 6.607773477154156e-06, "loss": 0.8938, "step": 5942 }, { "epoch": 0.621978021978022, "grad_norm": 1.9654480260758713, "learning_rate": 6.604584941614706e-06, "loss": 0.9242, "step": 5943 }, { "epoch": 0.6220826792255364, "grad_norm": 1.750180216403602, "learning_rate": 6.601396796215327e-06, "loss": 0.8576, "step": 5944 }, { "epoch": 0.6221873364730508, "grad_norm": 2.236278725054879, "learning_rate": 6.5982090413223476e-06, "loss": 1.0135, "step": 5945 }, { "epoch": 0.6222919937205651, "grad_norm": 2.027612274915688, "learning_rate": 6.595021677302044e-06, "loss": 0.939, "step": 5946 }, { "epoch": 0.6223966509680795, "grad_norm": 2.244750079445713, "learning_rate": 6.591834704520653e-06, "loss": 0.8214, "step": 5947 }, { "epoch": 0.6225013082155939, "grad_norm": 2.4590294267920476, "learning_rate": 6.5886481233443656e-06, "loss": 0.9864, "step": 5948 }, { "epoch": 0.6226059654631083, "grad_norm": 2.030725863909143, "learning_rate": 6.585461934139321e-06, "loss": 0.9592, "step": 5949 }, { "epoch": 0.6227106227106227, "grad_norm": 1.988399104391876, "learning_rate": 6.5822761372716195e-06, "loss": 0.9493, "step": 5950 }, { "epoch": 0.6228152799581371, "grad_norm": 1.954025794060076, "learning_rate": 6.579090733107323e-06, "loss": 0.9747, "step": 5951 }, { "epoch": 0.6229199372056515, "grad_norm": 2.1922622101716223, "learning_rate": 6.575905722012435e-06, "loss": 0.9192, "step": 5952 }, { "epoch": 0.6230245944531659, "grad_norm": 2.1750494542346788, "learning_rate": 6.5727211043529214e-06, "loss": 0.9707, "step": 5953 }, { "epoch": 0.6231292517006802, "grad_norm": 2.2279016358653125, "learning_rate": 6.5695368804947e-06, "loss": 0.8988, "step": 5954 }, { "epoch": 0.6232339089481946, "grad_norm": 2.3809272214435104, "learning_rate": 6.566353050803646e-06, "loss": 1.0132, "step": 5955 }, { "epoch": 0.623338566195709, "grad_norm": 2.3706566423677065, "learning_rate": 6.563169615645586e-06, "loss": 1.0054, "step": 5956 }, { "epoch": 0.6234432234432234, "grad_norm": 1.7752367380253748, "learning_rate": 6.559986575386307e-06, "loss": 0.862, "step": 5957 }, { "epoch": 0.6235478806907379, "grad_norm": 1.8311258992017134, "learning_rate": 6.556803930391542e-06, "loss": 0.7931, "step": 5958 }, { "epoch": 0.6236525379382523, "grad_norm": 2.005969903941888, "learning_rate": 6.553621681026983e-06, "loss": 0.8692, "step": 5959 }, { "epoch": 0.6237571951857667, "grad_norm": 2.4536539179443575, "learning_rate": 6.550439827658282e-06, "loss": 1.0995, "step": 5960 }, { "epoch": 0.623861852433281, "grad_norm": 1.829663168954961, "learning_rate": 6.5472583706510375e-06, "loss": 0.9042, "step": 5961 }, { "epoch": 0.6239665096807954, "grad_norm": 1.8713774810103647, "learning_rate": 6.544077310370806e-06, "loss": 0.8337, "step": 5962 }, { "epoch": 0.6240711669283098, "grad_norm": 1.7943958864399616, "learning_rate": 6.540896647183098e-06, "loss": 0.8306, "step": 5963 }, { "epoch": 0.6241758241758242, "grad_norm": 2.142987115852985, "learning_rate": 6.537716381453379e-06, "loss": 0.9367, "step": 5964 }, { "epoch": 0.6242804814233386, "grad_norm": 2.0012001401459862, "learning_rate": 6.534536513547065e-06, "loss": 0.8729, "step": 5965 }, { "epoch": 0.624385138670853, "grad_norm": 1.976227019935359, "learning_rate": 6.531357043829529e-06, "loss": 0.8953, "step": 5966 }, { "epoch": 0.6244897959183674, "grad_norm": 2.044153940630976, "learning_rate": 6.5281779726661e-06, "loss": 0.9151, "step": 5967 }, { "epoch": 0.6245944531658817, "grad_norm": 2.5746730325720395, "learning_rate": 6.524999300422064e-06, "loss": 0.8603, "step": 5968 }, { "epoch": 0.6246991104133961, "grad_norm": 1.8639820031464824, "learning_rate": 6.5218210274626495e-06, "loss": 0.868, "step": 5969 }, { "epoch": 0.6248037676609105, "grad_norm": 2.294972502505636, "learning_rate": 6.5186431541530514e-06, "loss": 0.9509, "step": 5970 }, { "epoch": 0.6249084249084249, "grad_norm": 2.251140816785971, "learning_rate": 6.515465680858412e-06, "loss": 0.8821, "step": 5971 }, { "epoch": 0.6250130821559393, "grad_norm": 1.8794029352605075, "learning_rate": 6.512288607943831e-06, "loss": 0.7838, "step": 5972 }, { "epoch": 0.6251177394034537, "grad_norm": 1.9880704609132205, "learning_rate": 6.509111935774357e-06, "loss": 0.9442, "step": 5973 }, { "epoch": 0.6252223966509681, "grad_norm": 2.1245881624662153, "learning_rate": 6.5059356647150004e-06, "loss": 0.9828, "step": 5974 }, { "epoch": 0.6253270538984824, "grad_norm": 2.241601235510337, "learning_rate": 6.502759795130717e-06, "loss": 1.0231, "step": 5975 }, { "epoch": 0.6254317111459968, "grad_norm": 2.075450945502386, "learning_rate": 6.499584327386422e-06, "loss": 0.9298, "step": 5976 }, { "epoch": 0.6255363683935112, "grad_norm": 1.9389873315576378, "learning_rate": 6.496409261846988e-06, "loss": 0.7791, "step": 5977 }, { "epoch": 0.6256410256410256, "grad_norm": 2.1343252505964387, "learning_rate": 6.493234598877229e-06, "loss": 1.1021, "step": 5978 }, { "epoch": 0.62574568288854, "grad_norm": 1.7296284100239168, "learning_rate": 6.490060338841926e-06, "loss": 0.8877, "step": 5979 }, { "epoch": 0.6258503401360545, "grad_norm": 2.6208758089241075, "learning_rate": 6.486886482105803e-06, "loss": 1.0258, "step": 5980 }, { "epoch": 0.6259549973835689, "grad_norm": 1.6310737999103246, "learning_rate": 6.483713029033547e-06, "loss": 0.8419, "step": 5981 }, { "epoch": 0.6260596546310832, "grad_norm": 1.9090171828142979, "learning_rate": 6.4805399799897905e-06, "loss": 0.9513, "step": 5982 }, { "epoch": 0.6261643118785976, "grad_norm": 2.0317937367699104, "learning_rate": 6.477367335339129e-06, "loss": 0.8979, "step": 5983 }, { "epoch": 0.626268969126112, "grad_norm": 1.9744350469086347, "learning_rate": 6.474195095446099e-06, "loss": 0.9687, "step": 5984 }, { "epoch": 0.6263736263736264, "grad_norm": 2.0167761714344996, "learning_rate": 6.471023260675196e-06, "loss": 0.9428, "step": 5985 }, { "epoch": 0.6264782836211408, "grad_norm": 1.944143991441491, "learning_rate": 6.467851831390879e-06, "loss": 1.0335, "step": 5986 }, { "epoch": 0.6265829408686552, "grad_norm": 2.037332698815376, "learning_rate": 6.464680807957547e-06, "loss": 0.9471, "step": 5987 }, { "epoch": 0.6266875981161696, "grad_norm": 2.1505226752128794, "learning_rate": 6.461510190739555e-06, "loss": 0.9963, "step": 5988 }, { "epoch": 0.6267922553636839, "grad_norm": 1.970070926677773, "learning_rate": 6.458339980101218e-06, "loss": 0.9675, "step": 5989 }, { "epoch": 0.6268969126111983, "grad_norm": 1.8959906001986315, "learning_rate": 6.455170176406795e-06, "loss": 0.8347, "step": 5990 }, { "epoch": 0.6270015698587127, "grad_norm": 1.8273443555041868, "learning_rate": 6.452000780020507e-06, "loss": 0.9721, "step": 5991 }, { "epoch": 0.6271062271062271, "grad_norm": 2.1115215989855187, "learning_rate": 6.448831791306518e-06, "loss": 0.9706, "step": 5992 }, { "epoch": 0.6272108843537415, "grad_norm": 1.9586483418053429, "learning_rate": 6.445663210628955e-06, "loss": 0.8123, "step": 5993 }, { "epoch": 0.6273155416012559, "grad_norm": 2.2343184089780768, "learning_rate": 6.442495038351898e-06, "loss": 0.9436, "step": 5994 }, { "epoch": 0.6274201988487703, "grad_norm": 2.0306223291943546, "learning_rate": 6.439327274839371e-06, "loss": 0.8866, "step": 5995 }, { "epoch": 0.6275248560962847, "grad_norm": 2.2375207468319482, "learning_rate": 6.4361599204553604e-06, "loss": 0.9457, "step": 5996 }, { "epoch": 0.627629513343799, "grad_norm": 1.8709896809018647, "learning_rate": 6.4329929755637975e-06, "loss": 0.9759, "step": 5997 }, { "epoch": 0.6277341705913134, "grad_norm": 2.0216688575621453, "learning_rate": 6.429826440528575e-06, "loss": 0.888, "step": 5998 }, { "epoch": 0.6278388278388278, "grad_norm": 1.8399467700878034, "learning_rate": 6.4266603157135295e-06, "loss": 0.8394, "step": 5999 }, { "epoch": 0.6279434850863422, "grad_norm": 2.1376344086932373, "learning_rate": 6.4234946014824604e-06, "loss": 0.9686, "step": 6000 }, { "epoch": 0.6280481423338566, "grad_norm": 1.9022841655578102, "learning_rate": 6.42032929819911e-06, "loss": 0.864, "step": 6001 }, { "epoch": 0.628152799581371, "grad_norm": 2.083437132779924, "learning_rate": 6.417164406227177e-06, "loss": 0.9067, "step": 6002 }, { "epoch": 0.6282574568288855, "grad_norm": 2.2018711885514612, "learning_rate": 6.413999925930321e-06, "loss": 0.9072, "step": 6003 }, { "epoch": 0.6283621140763997, "grad_norm": 1.9021036398571836, "learning_rate": 6.410835857672142e-06, "loss": 0.9087, "step": 6004 }, { "epoch": 0.6284667713239142, "grad_norm": 1.935354772806226, "learning_rate": 6.4076722018162e-06, "loss": 0.9119, "step": 6005 }, { "epoch": 0.6285714285714286, "grad_norm": 1.9033643377647045, "learning_rate": 6.4045089587260054e-06, "loss": 0.8665, "step": 6006 }, { "epoch": 0.628676085818943, "grad_norm": 2.3015121040796434, "learning_rate": 6.401346128765018e-06, "loss": 0.9669, "step": 6007 }, { "epoch": 0.6287807430664574, "grad_norm": 2.203066783924028, "learning_rate": 6.398183712296657e-06, "loss": 0.8435, "step": 6008 }, { "epoch": 0.6288854003139718, "grad_norm": 1.833007785001033, "learning_rate": 6.395021709684288e-06, "loss": 0.9143, "step": 6009 }, { "epoch": 0.6289900575614862, "grad_norm": 2.3499962012268814, "learning_rate": 6.391860121291233e-06, "loss": 0.8905, "step": 6010 }, { "epoch": 0.6290947148090005, "grad_norm": 2.1025471869532764, "learning_rate": 6.388698947480762e-06, "loss": 0.8267, "step": 6011 }, { "epoch": 0.6291993720565149, "grad_norm": 1.8540577312675692, "learning_rate": 6.385538188616105e-06, "loss": 0.8396, "step": 6012 }, { "epoch": 0.6293040293040293, "grad_norm": 1.8832622109992163, "learning_rate": 6.3823778450604376e-06, "loss": 0.8341, "step": 6013 }, { "epoch": 0.6294086865515437, "grad_norm": 2.404866254697502, "learning_rate": 6.37921791717689e-06, "loss": 0.9017, "step": 6014 }, { "epoch": 0.6295133437990581, "grad_norm": 1.940080093100944, "learning_rate": 6.376058405328543e-06, "loss": 0.964, "step": 6015 }, { "epoch": 0.6296180010465725, "grad_norm": 2.045127466425197, "learning_rate": 6.372899309878432e-06, "loss": 0.9815, "step": 6016 }, { "epoch": 0.6297226582940869, "grad_norm": 2.1577986035955505, "learning_rate": 6.369740631189545e-06, "loss": 1.0398, "step": 6017 }, { "epoch": 0.6298273155416012, "grad_norm": 1.749672091610911, "learning_rate": 6.366582369624816e-06, "loss": 0.7923, "step": 6018 }, { "epoch": 0.6299319727891156, "grad_norm": 2.109385169681741, "learning_rate": 6.363424525547136e-06, "loss": 1.0015, "step": 6019 }, { "epoch": 0.63003663003663, "grad_norm": 2.4515134604996156, "learning_rate": 6.360267099319355e-06, "loss": 1.0339, "step": 6020 }, { "epoch": 0.6301412872841444, "grad_norm": 1.8551476559676408, "learning_rate": 6.357110091304259e-06, "loss": 0.8783, "step": 6021 }, { "epoch": 0.6302459445316588, "grad_norm": 2.0924036270338786, "learning_rate": 6.353953501864602e-06, "loss": 0.9481, "step": 6022 }, { "epoch": 0.6303506017791732, "grad_norm": 1.895267228752824, "learning_rate": 6.35079733136308e-06, "loss": 0.9792, "step": 6023 }, { "epoch": 0.6304552590266876, "grad_norm": 1.8118380058338375, "learning_rate": 6.3476415801623394e-06, "loss": 0.8829, "step": 6024 }, { "epoch": 0.6305599162742019, "grad_norm": 1.8405134542145882, "learning_rate": 6.344486248624987e-06, "loss": 0.7713, "step": 6025 }, { "epoch": 0.6306645735217163, "grad_norm": 2.218547626688002, "learning_rate": 6.341331337113573e-06, "loss": 0.9786, "step": 6026 }, { "epoch": 0.6307692307692307, "grad_norm": 1.882913448926194, "learning_rate": 6.338176845990608e-06, "loss": 0.9793, "step": 6027 }, { "epoch": 0.6308738880167452, "grad_norm": 1.9751456320726135, "learning_rate": 6.335022775618542e-06, "loss": 0.8445, "step": 6028 }, { "epoch": 0.6309785452642596, "grad_norm": 2.1125744499378496, "learning_rate": 6.331869126359791e-06, "loss": 0.9355, "step": 6029 }, { "epoch": 0.631083202511774, "grad_norm": 1.6815396479096147, "learning_rate": 6.328715898576716e-06, "loss": 0.7911, "step": 6030 }, { "epoch": 0.6311878597592884, "grad_norm": 1.7816434864441257, "learning_rate": 6.3255630926316236e-06, "loss": 0.8058, "step": 6031 }, { "epoch": 0.6312925170068027, "grad_norm": 2.075892498540421, "learning_rate": 6.3224107088867835e-06, "loss": 0.9163, "step": 6032 }, { "epoch": 0.6313971742543171, "grad_norm": 1.7175221205045825, "learning_rate": 6.319258747704406e-06, "loss": 0.7745, "step": 6033 }, { "epoch": 0.6315018315018315, "grad_norm": 1.90865898477721, "learning_rate": 6.316107209446662e-06, "loss": 0.8428, "step": 6034 }, { "epoch": 0.6316064887493459, "grad_norm": 2.011691584165178, "learning_rate": 6.312956094475666e-06, "loss": 0.9125, "step": 6035 }, { "epoch": 0.6317111459968603, "grad_norm": 2.117883184326949, "learning_rate": 6.30980540315349e-06, "loss": 1.0146, "step": 6036 }, { "epoch": 0.6318158032443747, "grad_norm": 1.9763590631964858, "learning_rate": 6.306655135842151e-06, "loss": 0.8664, "step": 6037 }, { "epoch": 0.6319204604918891, "grad_norm": 1.9200563612501522, "learning_rate": 6.303505292903627e-06, "loss": 0.865, "step": 6038 }, { "epoch": 0.6320251177394035, "grad_norm": 1.95702742334413, "learning_rate": 6.300355874699839e-06, "loss": 0.984, "step": 6039 }, { "epoch": 0.6321297749869178, "grad_norm": 1.9835884003747482, "learning_rate": 6.297206881592659e-06, "loss": 0.8374, "step": 6040 }, { "epoch": 0.6322344322344322, "grad_norm": 1.9891775252939559, "learning_rate": 6.294058313943916e-06, "loss": 0.8428, "step": 6041 }, { "epoch": 0.6323390894819466, "grad_norm": 2.2448267920259655, "learning_rate": 6.290910172115387e-06, "loss": 0.8765, "step": 6042 }, { "epoch": 0.632443746729461, "grad_norm": 2.0880819996965405, "learning_rate": 6.287762456468796e-06, "loss": 1.0132, "step": 6043 }, { "epoch": 0.6325484039769754, "grad_norm": 2.321191163987611, "learning_rate": 6.284615167365826e-06, "loss": 0.9126, "step": 6044 }, { "epoch": 0.6326530612244898, "grad_norm": 2.157686318063533, "learning_rate": 6.281468305168102e-06, "loss": 0.9658, "step": 6045 }, { "epoch": 0.6327577184720042, "grad_norm": 1.8521424353333522, "learning_rate": 6.278321870237207e-06, "loss": 0.9874, "step": 6046 }, { "epoch": 0.6328623757195185, "grad_norm": 1.985600041977417, "learning_rate": 6.275175862934679e-06, "loss": 0.8763, "step": 6047 }, { "epoch": 0.6329670329670329, "grad_norm": 2.167359949334803, "learning_rate": 6.2720302836219925e-06, "loss": 0.8538, "step": 6048 }, { "epoch": 0.6330716902145473, "grad_norm": 1.892338682422864, "learning_rate": 6.268885132660585e-06, "loss": 0.857, "step": 6049 }, { "epoch": 0.6331763474620618, "grad_norm": 2.074732815371282, "learning_rate": 6.265740410411838e-06, "loss": 0.9257, "step": 6050 }, { "epoch": 0.6332810047095762, "grad_norm": 1.9538306094870916, "learning_rate": 6.26259611723709e-06, "loss": 0.9022, "step": 6051 }, { "epoch": 0.6333856619570906, "grad_norm": 2.052293995501398, "learning_rate": 6.259452253497622e-06, "loss": 0.9688, "step": 6052 }, { "epoch": 0.633490319204605, "grad_norm": 1.9040390605918567, "learning_rate": 6.256308819554676e-06, "loss": 0.9216, "step": 6053 }, { "epoch": 0.6335949764521193, "grad_norm": 1.887491395994407, "learning_rate": 6.253165815769431e-06, "loss": 0.9326, "step": 6054 }, { "epoch": 0.6336996336996337, "grad_norm": 2.1059814291825054, "learning_rate": 6.250023242503031e-06, "loss": 0.9491, "step": 6055 }, { "epoch": 0.6338042909471481, "grad_norm": 1.949072153051419, "learning_rate": 6.246881100116566e-06, "loss": 0.7935, "step": 6056 }, { "epoch": 0.6339089481946625, "grad_norm": 2.0739299147239607, "learning_rate": 6.243739388971068e-06, "loss": 0.959, "step": 6057 }, { "epoch": 0.6340136054421769, "grad_norm": 2.001653115479742, "learning_rate": 6.240598109427531e-06, "loss": 0.9694, "step": 6058 }, { "epoch": 0.6341182626896913, "grad_norm": 2.2062451919821355, "learning_rate": 6.237457261846891e-06, "loss": 0.9713, "step": 6059 }, { "epoch": 0.6342229199372057, "grad_norm": 2.0518558414861845, "learning_rate": 6.234316846590041e-06, "loss": 0.9721, "step": 6060 }, { "epoch": 0.63432757718472, "grad_norm": 2.0231758494324352, "learning_rate": 6.231176864017819e-06, "loss": 1.0087, "step": 6061 }, { "epoch": 0.6344322344322344, "grad_norm": 2.315599702222333, "learning_rate": 6.228037314491013e-06, "loss": 0.9439, "step": 6062 }, { "epoch": 0.6345368916797488, "grad_norm": 2.0500400828302956, "learning_rate": 6.224898198370363e-06, "loss": 0.9502, "step": 6063 }, { "epoch": 0.6346415489272632, "grad_norm": 1.9582030992702546, "learning_rate": 6.22175951601657e-06, "loss": 0.9388, "step": 6064 }, { "epoch": 0.6347462061747776, "grad_norm": 1.8671881803341661, "learning_rate": 6.218621267790263e-06, "loss": 0.9292, "step": 6065 }, { "epoch": 0.634850863422292, "grad_norm": 1.9018283992660436, "learning_rate": 6.215483454052043e-06, "loss": 0.8481, "step": 6066 }, { "epoch": 0.6349555206698064, "grad_norm": 2.1228197696527915, "learning_rate": 6.2123460751624425e-06, "loss": 0.9088, "step": 6067 }, { "epoch": 0.6350601779173207, "grad_norm": 2.0541407952110817, "learning_rate": 6.209209131481957e-06, "loss": 0.8942, "step": 6068 }, { "epoch": 0.6351648351648351, "grad_norm": 2.4812046684098146, "learning_rate": 6.206072623371027e-06, "loss": 0.9851, "step": 6069 }, { "epoch": 0.6352694924123495, "grad_norm": 2.466479670275315, "learning_rate": 6.202936551190044e-06, "loss": 0.9819, "step": 6070 }, { "epoch": 0.6353741496598639, "grad_norm": 1.928191042129914, "learning_rate": 6.199800915299345e-06, "loss": 0.8792, "step": 6071 }, { "epoch": 0.6354788069073783, "grad_norm": 1.638080208345466, "learning_rate": 6.196665716059229e-06, "loss": 0.7502, "step": 6072 }, { "epoch": 0.6355834641548928, "grad_norm": 1.935024991236978, "learning_rate": 6.193530953829931e-06, "loss": 0.9067, "step": 6073 }, { "epoch": 0.6356881214024072, "grad_norm": 1.9042984524406725, "learning_rate": 6.190396628971644e-06, "loss": 1.0139, "step": 6074 }, { "epoch": 0.6357927786499215, "grad_norm": 2.096217219665164, "learning_rate": 6.187262741844507e-06, "loss": 0.9256, "step": 6075 }, { "epoch": 0.6358974358974359, "grad_norm": 2.1629109423290602, "learning_rate": 6.18412929280861e-06, "loss": 1.1161, "step": 6076 }, { "epoch": 0.6360020931449503, "grad_norm": 2.4819953770244543, "learning_rate": 6.180996282223993e-06, "loss": 1.015, "step": 6077 }, { "epoch": 0.6361067503924647, "grad_norm": 1.973489568225318, "learning_rate": 6.177863710450645e-06, "loss": 0.8687, "step": 6078 }, { "epoch": 0.6362114076399791, "grad_norm": 2.0537245713359455, "learning_rate": 6.174731577848505e-06, "loss": 1.0115, "step": 6079 }, { "epoch": 0.6363160648874935, "grad_norm": 2.1026542763075056, "learning_rate": 6.171599884777458e-06, "loss": 0.8719, "step": 6080 }, { "epoch": 0.6364207221350079, "grad_norm": 1.9810644360085412, "learning_rate": 6.168468631597349e-06, "loss": 0.9621, "step": 6081 }, { "epoch": 0.6365253793825223, "grad_norm": 1.8498313936174973, "learning_rate": 6.16533781866796e-06, "loss": 0.8684, "step": 6082 }, { "epoch": 0.6366300366300366, "grad_norm": 1.882513401890856, "learning_rate": 6.162207446349031e-06, "loss": 0.8804, "step": 6083 }, { "epoch": 0.636734693877551, "grad_norm": 2.1454199707720045, "learning_rate": 6.159077515000245e-06, "loss": 0.8786, "step": 6084 }, { "epoch": 0.6368393511250654, "grad_norm": 2.0348014676633848, "learning_rate": 6.155948024981241e-06, "loss": 0.9031, "step": 6085 }, { "epoch": 0.6369440083725798, "grad_norm": 2.0035495835149786, "learning_rate": 6.1528189766515996e-06, "loss": 0.965, "step": 6086 }, { "epoch": 0.6370486656200942, "grad_norm": 2.180282601428462, "learning_rate": 6.1496903703708575e-06, "loss": 1.0432, "step": 6087 }, { "epoch": 0.6371533228676086, "grad_norm": 2.1368862326905043, "learning_rate": 6.146562206498497e-06, "loss": 0.9117, "step": 6088 }, { "epoch": 0.637257980115123, "grad_norm": 2.043077967035869, "learning_rate": 6.143434485393947e-06, "loss": 0.9375, "step": 6089 }, { "epoch": 0.6373626373626373, "grad_norm": 2.133413981464569, "learning_rate": 6.140307207416598e-06, "loss": 0.9432, "step": 6090 }, { "epoch": 0.6374672946101517, "grad_norm": 1.8874105602733608, "learning_rate": 6.137180372925773e-06, "loss": 0.9215, "step": 6091 }, { "epoch": 0.6375719518576661, "grad_norm": 2.1540406495422624, "learning_rate": 6.134053982280756e-06, "loss": 0.8921, "step": 6092 }, { "epoch": 0.6376766091051805, "grad_norm": 2.267611660094676, "learning_rate": 6.130928035840771e-06, "loss": 0.9158, "step": 6093 }, { "epoch": 0.6377812663526949, "grad_norm": 2.132196533283258, "learning_rate": 6.127802533965001e-06, "loss": 0.9576, "step": 6094 }, { "epoch": 0.6378859236002093, "grad_norm": 1.7952285637216865, "learning_rate": 6.124677477012566e-06, "loss": 0.8652, "step": 6095 }, { "epoch": 0.6379905808477238, "grad_norm": 2.268355822133597, "learning_rate": 6.121552865342548e-06, "loss": 0.9496, "step": 6096 }, { "epoch": 0.638095238095238, "grad_norm": 2.480468965787619, "learning_rate": 6.118428699313965e-06, "loss": 0.9992, "step": 6097 }, { "epoch": 0.6381998953427525, "grad_norm": 2.284899377257853, "learning_rate": 6.115304979285794e-06, "loss": 1.0189, "step": 6098 }, { "epoch": 0.6383045525902669, "grad_norm": 2.2705419138837386, "learning_rate": 6.112181705616958e-06, "loss": 0.7452, "step": 6099 }, { "epoch": 0.6384092098377813, "grad_norm": 1.9550159382736858, "learning_rate": 6.109058878666326e-06, "loss": 0.934, "step": 6100 }, { "epoch": 0.6385138670852957, "grad_norm": 2.380989502303415, "learning_rate": 6.105936498792715e-06, "loss": 0.8759, "step": 6101 }, { "epoch": 0.6386185243328101, "grad_norm": 2.3521973407044827, "learning_rate": 6.102814566354896e-06, "loss": 0.9418, "step": 6102 }, { "epoch": 0.6387231815803245, "grad_norm": 2.1620835017709092, "learning_rate": 6.099693081711582e-06, "loss": 0.9934, "step": 6103 }, { "epoch": 0.6388278388278388, "grad_norm": 2.304478296902805, "learning_rate": 6.09657204522144e-06, "loss": 0.9824, "step": 6104 }, { "epoch": 0.6389324960753532, "grad_norm": 2.124733642865628, "learning_rate": 6.093451457243082e-06, "loss": 0.8826, "step": 6105 }, { "epoch": 0.6390371533228676, "grad_norm": 2.2586347480935216, "learning_rate": 6.09033131813507e-06, "loss": 1.0353, "step": 6106 }, { "epoch": 0.639141810570382, "grad_norm": 2.183731475063431, "learning_rate": 6.087211628255918e-06, "loss": 0.9315, "step": 6107 }, { "epoch": 0.6392464678178964, "grad_norm": 2.1727879672656063, "learning_rate": 6.08409238796408e-06, "loss": 1.017, "step": 6108 }, { "epoch": 0.6393511250654108, "grad_norm": 1.8520184743679367, "learning_rate": 6.080973597617969e-06, "loss": 0.7484, "step": 6109 }, { "epoch": 0.6394557823129252, "grad_norm": 2.1309335686211384, "learning_rate": 6.077855257575932e-06, "loss": 0.9459, "step": 6110 }, { "epoch": 0.6395604395604395, "grad_norm": 2.112146194809444, "learning_rate": 6.074737368196279e-06, "loss": 0.9341, "step": 6111 }, { "epoch": 0.6396650968079539, "grad_norm": 2.2818082480682373, "learning_rate": 6.071619929837259e-06, "loss": 0.9016, "step": 6112 }, { "epoch": 0.6397697540554683, "grad_norm": 1.9751366903614145, "learning_rate": 6.068502942857075e-06, "loss": 0.7711, "step": 6113 }, { "epoch": 0.6398744113029827, "grad_norm": 2.0611570942571813, "learning_rate": 6.065386407613869e-06, "loss": 0.9724, "step": 6114 }, { "epoch": 0.6399790685504971, "grad_norm": 2.0283236031744316, "learning_rate": 6.06227032446574e-06, "loss": 0.8681, "step": 6115 }, { "epoch": 0.6400837257980115, "grad_norm": 1.840467172717708, "learning_rate": 6.0591546937707375e-06, "loss": 0.8617, "step": 6116 }, { "epoch": 0.6401883830455259, "grad_norm": 2.216732313893134, "learning_rate": 6.056039515886848e-06, "loss": 0.9226, "step": 6117 }, { "epoch": 0.6402930402930402, "grad_norm": 2.0576439170006062, "learning_rate": 6.0529247911720145e-06, "loss": 0.9738, "step": 6118 }, { "epoch": 0.6403976975405546, "grad_norm": 2.004555679033776, "learning_rate": 6.049810519984125e-06, "loss": 0.9752, "step": 6119 }, { "epoch": 0.640502354788069, "grad_norm": 2.121801609117083, "learning_rate": 6.046696702681012e-06, "loss": 0.9376, "step": 6120 }, { "epoch": 0.6406070120355835, "grad_norm": 2.0557921754943638, "learning_rate": 6.043583339620465e-06, "loss": 0.8175, "step": 6121 }, { "epoch": 0.6407116692830979, "grad_norm": 2.117196486653035, "learning_rate": 6.0404704311602095e-06, "loss": 0.914, "step": 6122 }, { "epoch": 0.6408163265306123, "grad_norm": 2.454371990920559, "learning_rate": 6.037357977657926e-06, "loss": 0.8974, "step": 6123 }, { "epoch": 0.6409209837781267, "grad_norm": 1.779633370179486, "learning_rate": 6.034245979471249e-06, "loss": 0.8059, "step": 6124 }, { "epoch": 0.6410256410256411, "grad_norm": 2.224227729668064, "learning_rate": 6.031134436957747e-06, "loss": 0.9438, "step": 6125 }, { "epoch": 0.6411302982731554, "grad_norm": 1.8793850100294136, "learning_rate": 6.028023350474943e-06, "loss": 0.8463, "step": 6126 }, { "epoch": 0.6412349555206698, "grad_norm": 2.0192489810899823, "learning_rate": 6.024912720380309e-06, "loss": 0.8065, "step": 6127 }, { "epoch": 0.6413396127681842, "grad_norm": 2.086275873749281, "learning_rate": 6.021802547031263e-06, "loss": 0.9134, "step": 6128 }, { "epoch": 0.6414442700156986, "grad_norm": 2.239582460843957, "learning_rate": 6.018692830785167e-06, "loss": 0.8925, "step": 6129 }, { "epoch": 0.641548927263213, "grad_norm": 1.9036697749177023, "learning_rate": 6.015583571999337e-06, "loss": 0.9513, "step": 6130 }, { "epoch": 0.6416535845107274, "grad_norm": 1.967430877780038, "learning_rate": 6.012474771031029e-06, "loss": 0.9026, "step": 6131 }, { "epoch": 0.6417582417582418, "grad_norm": 1.9634313043742981, "learning_rate": 6.009366428237453e-06, "loss": 0.9432, "step": 6132 }, { "epoch": 0.6418628990057561, "grad_norm": 1.907415430341769, "learning_rate": 6.006258543975769e-06, "loss": 1.0205, "step": 6133 }, { "epoch": 0.6419675562532705, "grad_norm": 1.9958836925353802, "learning_rate": 6.003151118603071e-06, "loss": 0.9038, "step": 6134 }, { "epoch": 0.6420722135007849, "grad_norm": 1.974573234104124, "learning_rate": 6.000044152476414e-06, "loss": 0.8984, "step": 6135 }, { "epoch": 0.6421768707482993, "grad_norm": 1.9662322439234587, "learning_rate": 5.996937645952792e-06, "loss": 0.8528, "step": 6136 }, { "epoch": 0.6422815279958137, "grad_norm": 2.049330183711433, "learning_rate": 5.993831599389149e-06, "loss": 0.8704, "step": 6137 }, { "epoch": 0.6423861852433281, "grad_norm": 1.8386217231992765, "learning_rate": 5.990726013142378e-06, "loss": 0.8267, "step": 6138 }, { "epoch": 0.6424908424908425, "grad_norm": 1.9591896043802715, "learning_rate": 5.987620887569314e-06, "loss": 0.8269, "step": 6139 }, { "epoch": 0.6425954997383568, "grad_norm": 2.0298076454052065, "learning_rate": 5.984516223026746e-06, "loss": 0.9347, "step": 6140 }, { "epoch": 0.6427001569858712, "grad_norm": 2.0195419480669683, "learning_rate": 5.9814120198714e-06, "loss": 0.9478, "step": 6141 }, { "epoch": 0.6428048142333856, "grad_norm": 2.0883419789864335, "learning_rate": 5.9783082784599615e-06, "loss": 0.8287, "step": 6142 }, { "epoch": 0.6429094714809, "grad_norm": 2.1269657774275004, "learning_rate": 5.975204999149056e-06, "loss": 0.9302, "step": 6143 }, { "epoch": 0.6430141287284145, "grad_norm": 2.016828126075074, "learning_rate": 5.972102182295254e-06, "loss": 1.0265, "step": 6144 }, { "epoch": 0.6431187859759289, "grad_norm": 1.6917299157653185, "learning_rate": 5.968999828255079e-06, "loss": 0.7979, "step": 6145 }, { "epoch": 0.6432234432234433, "grad_norm": 1.8616864521421854, "learning_rate": 5.965897937384992e-06, "loss": 0.7963, "step": 6146 }, { "epoch": 0.6433281004709576, "grad_norm": 2.341785220873819, "learning_rate": 5.962796510041413e-06, "loss": 0.9599, "step": 6147 }, { "epoch": 0.643432757718472, "grad_norm": 1.9957063834445605, "learning_rate": 5.9596955465806974e-06, "loss": 1.0314, "step": 6148 }, { "epoch": 0.6435374149659864, "grad_norm": 2.08882364485049, "learning_rate": 5.956595047359151e-06, "loss": 0.8416, "step": 6149 }, { "epoch": 0.6436420722135008, "grad_norm": 1.8725034362012762, "learning_rate": 5.953495012733035e-06, "loss": 0.8405, "step": 6150 }, { "epoch": 0.6437467294610152, "grad_norm": 1.9625445407363562, "learning_rate": 5.950395443058543e-06, "loss": 0.9756, "step": 6151 }, { "epoch": 0.6438513867085296, "grad_norm": 2.0630976658247766, "learning_rate": 5.947296338691827e-06, "loss": 0.9711, "step": 6152 }, { "epoch": 0.643956043956044, "grad_norm": 1.9216746094429082, "learning_rate": 5.944197699988975e-06, "loss": 0.9195, "step": 6153 }, { "epoch": 0.6440607012035583, "grad_norm": 2.2677133214478324, "learning_rate": 5.9410995273060316e-06, "loss": 0.9668, "step": 6154 }, { "epoch": 0.6441653584510727, "grad_norm": 2.026665277815765, "learning_rate": 5.93800182099898e-06, "loss": 1.006, "step": 6155 }, { "epoch": 0.6442700156985871, "grad_norm": 2.309012027737432, "learning_rate": 5.93490458142375e-06, "loss": 0.8949, "step": 6156 }, { "epoch": 0.6443746729461015, "grad_norm": 1.8376028280551246, "learning_rate": 5.93180780893623e-06, "loss": 0.9266, "step": 6157 }, { "epoch": 0.6444793301936159, "grad_norm": 1.8687096935531817, "learning_rate": 5.928711503892233e-06, "loss": 0.7169, "step": 6158 }, { "epoch": 0.6445839874411303, "grad_norm": 1.7565771976435207, "learning_rate": 5.925615666647541e-06, "loss": 0.8147, "step": 6159 }, { "epoch": 0.6446886446886447, "grad_norm": 2.2216686274696182, "learning_rate": 5.92252029755787e-06, "loss": 0.9832, "step": 6160 }, { "epoch": 0.644793301936159, "grad_norm": 1.9664584115913126, "learning_rate": 5.91942539697888e-06, "loss": 0.9132, "step": 6161 }, { "epoch": 0.6448979591836734, "grad_norm": 2.2259482681346325, "learning_rate": 5.9163309652661886e-06, "loss": 1.0205, "step": 6162 }, { "epoch": 0.6450026164311878, "grad_norm": 1.974310097833552, "learning_rate": 5.913237002775342e-06, "loss": 0.8725, "step": 6163 }, { "epoch": 0.6451072736787022, "grad_norm": 1.9361555583674028, "learning_rate": 5.910143509861854e-06, "loss": 0.8765, "step": 6164 }, { "epoch": 0.6452119309262166, "grad_norm": 2.3259766646067805, "learning_rate": 5.907050486881164e-06, "loss": 0.8672, "step": 6165 }, { "epoch": 0.645316588173731, "grad_norm": 2.0520065137688976, "learning_rate": 5.903957934188673e-06, "loss": 0.942, "step": 6166 }, { "epoch": 0.6454212454212455, "grad_norm": 2.308496686545806, "learning_rate": 5.900865852139715e-06, "loss": 1.0235, "step": 6167 }, { "epoch": 0.6455259026687599, "grad_norm": 1.9275377714588016, "learning_rate": 5.897774241089581e-06, "loss": 0.9101, "step": 6168 }, { "epoch": 0.6456305599162742, "grad_norm": 1.7246090565713368, "learning_rate": 5.894683101393505e-06, "loss": 0.813, "step": 6169 }, { "epoch": 0.6457352171637886, "grad_norm": 2.090211706193853, "learning_rate": 5.8915924334066635e-06, "loss": 0.9071, "step": 6170 }, { "epoch": 0.645839874411303, "grad_norm": 2.225562586570551, "learning_rate": 5.888502237484179e-06, "loss": 0.89, "step": 6171 }, { "epoch": 0.6459445316588174, "grad_norm": 2.1959182655755507, "learning_rate": 5.885412513981121e-06, "loss": 0.9924, "step": 6172 }, { "epoch": 0.6460491889063318, "grad_norm": 2.0400315285376918, "learning_rate": 5.88232326325251e-06, "loss": 0.9976, "step": 6173 }, { "epoch": 0.6461538461538462, "grad_norm": 2.2524957844292084, "learning_rate": 5.879234485653302e-06, "loss": 0.9471, "step": 6174 }, { "epoch": 0.6462585034013606, "grad_norm": 1.9865407512804576, "learning_rate": 5.8761461815384005e-06, "loss": 1.007, "step": 6175 }, { "epoch": 0.6463631606488749, "grad_norm": 1.8960367283948207, "learning_rate": 5.873058351262666e-06, "loss": 0.8504, "step": 6176 }, { "epoch": 0.6464678178963893, "grad_norm": 1.7594307610018052, "learning_rate": 5.869970995180896e-06, "loss": 0.9622, "step": 6177 }, { "epoch": 0.6465724751439037, "grad_norm": 2.262308058632982, "learning_rate": 5.866884113647827e-06, "loss": 1.0252, "step": 6178 }, { "epoch": 0.6466771323914181, "grad_norm": 1.9209190231553315, "learning_rate": 5.863797707018155e-06, "loss": 0.8007, "step": 6179 }, { "epoch": 0.6467817896389325, "grad_norm": 1.5447458009603428, "learning_rate": 5.86071177564651e-06, "loss": 0.778, "step": 6180 }, { "epoch": 0.6468864468864469, "grad_norm": 1.8092557053926288, "learning_rate": 5.857626319887475e-06, "loss": 0.9498, "step": 6181 }, { "epoch": 0.6469911041339613, "grad_norm": 2.2187668350942693, "learning_rate": 5.85454134009557e-06, "loss": 0.8878, "step": 6182 }, { "epoch": 0.6470957613814756, "grad_norm": 2.0160589952562384, "learning_rate": 5.851456836625271e-06, "loss": 0.8794, "step": 6183 }, { "epoch": 0.64720041862899, "grad_norm": 1.8692365583128616, "learning_rate": 5.848372809830989e-06, "loss": 0.9669, "step": 6184 }, { "epoch": 0.6473050758765044, "grad_norm": 2.127493895012381, "learning_rate": 5.845289260067089e-06, "loss": 0.92, "step": 6185 }, { "epoch": 0.6474097331240188, "grad_norm": 2.2617699972750125, "learning_rate": 5.842206187687876e-06, "loss": 0.889, "step": 6186 }, { "epoch": 0.6475143903715332, "grad_norm": 2.134441719505655, "learning_rate": 5.8391235930476e-06, "loss": 0.8696, "step": 6187 }, { "epoch": 0.6476190476190476, "grad_norm": 2.6083578482447947, "learning_rate": 5.836041476500458e-06, "loss": 0.9542, "step": 6188 }, { "epoch": 0.6477237048665621, "grad_norm": 2.3257712734015388, "learning_rate": 5.832959838400593e-06, "loss": 0.8224, "step": 6189 }, { "epoch": 0.6478283621140764, "grad_norm": 2.3044088351111256, "learning_rate": 5.82987867910209e-06, "loss": 0.8298, "step": 6190 }, { "epoch": 0.6479330193615908, "grad_norm": 2.026561166745952, "learning_rate": 5.8267979989589815e-06, "loss": 0.82, "step": 6191 }, { "epoch": 0.6480376766091052, "grad_norm": 1.965006660005929, "learning_rate": 5.823717798325238e-06, "loss": 0.8973, "step": 6192 }, { "epoch": 0.6481423338566196, "grad_norm": 1.9771041937813247, "learning_rate": 5.820638077554785e-06, "loss": 0.9496, "step": 6193 }, { "epoch": 0.648246991104134, "grad_norm": 2.1258967665098134, "learning_rate": 5.8175588370014955e-06, "loss": 0.9364, "step": 6194 }, { "epoch": 0.6483516483516484, "grad_norm": 2.1439985227923644, "learning_rate": 5.814480077019173e-06, "loss": 0.988, "step": 6195 }, { "epoch": 0.6484563055991628, "grad_norm": 2.133683906939611, "learning_rate": 5.811401797961576e-06, "loss": 0.7218, "step": 6196 }, { "epoch": 0.6485609628466771, "grad_norm": 2.1684367284425567, "learning_rate": 5.8083240001824015e-06, "loss": 0.9307, "step": 6197 }, { "epoch": 0.6486656200941915, "grad_norm": 2.1509821140259078, "learning_rate": 5.805246684035293e-06, "loss": 0.9419, "step": 6198 }, { "epoch": 0.6487702773417059, "grad_norm": 2.40745342036477, "learning_rate": 5.802169849873849e-06, "loss": 0.971, "step": 6199 }, { "epoch": 0.6488749345892203, "grad_norm": 1.8193600056832606, "learning_rate": 5.799093498051599e-06, "loss": 0.948, "step": 6200 }, { "epoch": 0.6489795918367347, "grad_norm": 1.7020389159483353, "learning_rate": 5.796017628922018e-06, "loss": 0.7398, "step": 6201 }, { "epoch": 0.6490842490842491, "grad_norm": 2.04512342282411, "learning_rate": 5.792942242838537e-06, "loss": 0.8347, "step": 6202 }, { "epoch": 0.6491889063317635, "grad_norm": 2.5262087123815844, "learning_rate": 5.789867340154518e-06, "loss": 0.8673, "step": 6203 }, { "epoch": 0.6492935635792778, "grad_norm": 1.7089620050014493, "learning_rate": 5.786792921223281e-06, "loss": 0.7972, "step": 6204 }, { "epoch": 0.6493982208267922, "grad_norm": 2.1423319228812825, "learning_rate": 5.783718986398077e-06, "loss": 0.84, "step": 6205 }, { "epoch": 0.6495028780743066, "grad_norm": 2.095804353523492, "learning_rate": 5.78064553603211e-06, "loss": 0.8465, "step": 6206 }, { "epoch": 0.649607535321821, "grad_norm": 1.6986750357778488, "learning_rate": 5.77757257047852e-06, "loss": 0.8179, "step": 6207 }, { "epoch": 0.6497121925693354, "grad_norm": 1.9991041419560274, "learning_rate": 5.774500090090404e-06, "loss": 0.8437, "step": 6208 }, { "epoch": 0.6498168498168498, "grad_norm": 2.5476008805446435, "learning_rate": 5.7714280952207955e-06, "loss": 0.894, "step": 6209 }, { "epoch": 0.6499215070643642, "grad_norm": 1.840271331808465, "learning_rate": 5.7683565862226676e-06, "loss": 0.9403, "step": 6210 }, { "epoch": 0.6500261643118787, "grad_norm": 2.5171778195678955, "learning_rate": 5.765285563448948e-06, "loss": 0.8461, "step": 6211 }, { "epoch": 0.650130821559393, "grad_norm": 1.7716194503918323, "learning_rate": 5.7622150272525e-06, "loss": 0.9805, "step": 6212 }, { "epoch": 0.6502354788069074, "grad_norm": 1.8377563447111556, "learning_rate": 5.759144977986138e-06, "loss": 0.8581, "step": 6213 }, { "epoch": 0.6503401360544218, "grad_norm": 1.8723011718069902, "learning_rate": 5.756075416002618e-06, "loss": 0.9373, "step": 6214 }, { "epoch": 0.6504447933019362, "grad_norm": 2.069818431579889, "learning_rate": 5.753006341654634e-06, "loss": 0.9817, "step": 6215 }, { "epoch": 0.6505494505494506, "grad_norm": 2.1304030948159105, "learning_rate": 5.749937755294831e-06, "loss": 0.8727, "step": 6216 }, { "epoch": 0.650654107796965, "grad_norm": 1.8379536286843263, "learning_rate": 5.746869657275792e-06, "loss": 0.8884, "step": 6217 }, { "epoch": 0.6507587650444794, "grad_norm": 2.193473187783517, "learning_rate": 5.743802047950055e-06, "loss": 0.9205, "step": 6218 }, { "epoch": 0.6508634222919937, "grad_norm": 2.122540449060093, "learning_rate": 5.740734927670089e-06, "loss": 0.8567, "step": 6219 }, { "epoch": 0.6509680795395081, "grad_norm": 1.9639756156046886, "learning_rate": 5.737668296788314e-06, "loss": 0.9146, "step": 6220 }, { "epoch": 0.6510727367870225, "grad_norm": 1.952699246508381, "learning_rate": 5.734602155657096e-06, "loss": 0.8395, "step": 6221 }, { "epoch": 0.6511773940345369, "grad_norm": 1.8418042050457508, "learning_rate": 5.731536504628732e-06, "loss": 0.8573, "step": 6222 }, { "epoch": 0.6512820512820513, "grad_norm": 1.8770950809548828, "learning_rate": 5.728471344055482e-06, "loss": 0.9337, "step": 6223 }, { "epoch": 0.6513867085295657, "grad_norm": 1.8578509595752808, "learning_rate": 5.725406674289532e-06, "loss": 0.8205, "step": 6224 }, { "epoch": 0.6514913657770801, "grad_norm": 2.250049618236253, "learning_rate": 5.722342495683021e-06, "loss": 0.9339, "step": 6225 }, { "epoch": 0.6515960230245944, "grad_norm": 2.075268254460256, "learning_rate": 5.7192788085880245e-06, "loss": 0.9066, "step": 6226 }, { "epoch": 0.6517006802721088, "grad_norm": 2.0103126248212058, "learning_rate": 5.716215613356577e-06, "loss": 0.8782, "step": 6227 }, { "epoch": 0.6518053375196232, "grad_norm": 2.170160364779897, "learning_rate": 5.713152910340634e-06, "loss": 0.9634, "step": 6228 }, { "epoch": 0.6519099947671376, "grad_norm": 1.8429017442207223, "learning_rate": 5.7100906998921154e-06, "loss": 0.8229, "step": 6229 }, { "epoch": 0.652014652014652, "grad_norm": 1.9662643778222724, "learning_rate": 5.707028982362873e-06, "loss": 0.9536, "step": 6230 }, { "epoch": 0.6521193092621664, "grad_norm": 2.3974867995173166, "learning_rate": 5.7039677581046984e-06, "loss": 0.9297, "step": 6231 }, { "epoch": 0.6522239665096808, "grad_norm": 2.0737121782229746, "learning_rate": 5.700907027469342e-06, "loss": 0.9138, "step": 6232 }, { "epoch": 0.6523286237571951, "grad_norm": 1.9226135804608533, "learning_rate": 5.697846790808483e-06, "loss": 0.7444, "step": 6233 }, { "epoch": 0.6524332810047095, "grad_norm": 2.0066517801777994, "learning_rate": 5.69478704847375e-06, "loss": 0.82, "step": 6234 }, { "epoch": 0.652537938252224, "grad_norm": 1.891347237478382, "learning_rate": 5.691727800816712e-06, "loss": 0.9539, "step": 6235 }, { "epoch": 0.6526425954997384, "grad_norm": 1.9297427031420433, "learning_rate": 5.68866904818888e-06, "loss": 0.8912, "step": 6236 }, { "epoch": 0.6527472527472528, "grad_norm": 1.9332020266170267, "learning_rate": 5.685610790941713e-06, "loss": 0.8453, "step": 6237 }, { "epoch": 0.6528519099947672, "grad_norm": 2.0549224122607925, "learning_rate": 5.6825530294266185e-06, "loss": 0.9198, "step": 6238 }, { "epoch": 0.6529565672422816, "grad_norm": 1.7906027105209696, "learning_rate": 5.679495763994931e-06, "loss": 0.9266, "step": 6239 }, { "epoch": 0.6530612244897959, "grad_norm": 1.6523611298603982, "learning_rate": 5.67643899499794e-06, "loss": 0.8616, "step": 6240 }, { "epoch": 0.6531658817373103, "grad_norm": 2.1388297304679718, "learning_rate": 5.673382722786869e-06, "loss": 0.8582, "step": 6241 }, { "epoch": 0.6532705389848247, "grad_norm": 1.918250281902123, "learning_rate": 5.670326947712899e-06, "loss": 0.9843, "step": 6242 }, { "epoch": 0.6533751962323391, "grad_norm": 1.990471992424265, "learning_rate": 5.667271670127138e-06, "loss": 0.8863, "step": 6243 }, { "epoch": 0.6534798534798535, "grad_norm": 2.053395723942595, "learning_rate": 5.664216890380647e-06, "loss": 1.0306, "step": 6244 }, { "epoch": 0.6535845107273679, "grad_norm": 1.9787757140031736, "learning_rate": 5.66116260882442e-06, "loss": 0.8954, "step": 6245 }, { "epoch": 0.6536891679748823, "grad_norm": 2.1654076339913457, "learning_rate": 5.6581088258094054e-06, "loss": 0.9597, "step": 6246 }, { "epoch": 0.6537938252223966, "grad_norm": 2.18172363279797, "learning_rate": 5.655055541686491e-06, "loss": 1.008, "step": 6247 }, { "epoch": 0.653898482469911, "grad_norm": 1.9685000331248872, "learning_rate": 5.652002756806506e-06, "loss": 0.8069, "step": 6248 }, { "epoch": 0.6540031397174254, "grad_norm": 2.024974383790366, "learning_rate": 5.648950471520217e-06, "loss": 0.888, "step": 6249 }, { "epoch": 0.6541077969649398, "grad_norm": 1.9484380553012453, "learning_rate": 5.645898686178335e-06, "loss": 0.944, "step": 6250 }, { "epoch": 0.6542124542124542, "grad_norm": 2.231538349975824, "learning_rate": 5.642847401131526e-06, "loss": 0.8687, "step": 6251 }, { "epoch": 0.6543171114599686, "grad_norm": 1.9993737936439624, "learning_rate": 5.639796616730382e-06, "loss": 0.9617, "step": 6252 }, { "epoch": 0.654421768707483, "grad_norm": 2.1435053716762376, "learning_rate": 5.636746333325447e-06, "loss": 1.0731, "step": 6253 }, { "epoch": 0.6545264259549974, "grad_norm": 1.9548403870891942, "learning_rate": 5.633696551267198e-06, "loss": 0.9179, "step": 6254 }, { "epoch": 0.6546310832025117, "grad_norm": 1.910022534905838, "learning_rate": 5.630647270906071e-06, "loss": 0.9102, "step": 6255 }, { "epoch": 0.6547357404500261, "grad_norm": 1.8907963804653292, "learning_rate": 5.627598492592428e-06, "loss": 0.922, "step": 6256 }, { "epoch": 0.6548403976975405, "grad_norm": 1.954572343884513, "learning_rate": 5.624550216676584e-06, "loss": 0.9166, "step": 6257 }, { "epoch": 0.654945054945055, "grad_norm": 1.8353655197423198, "learning_rate": 5.621502443508791e-06, "loss": 0.9632, "step": 6258 }, { "epoch": 0.6550497121925694, "grad_norm": 2.18435647821634, "learning_rate": 5.618455173439244e-06, "loss": 0.8807, "step": 6259 }, { "epoch": 0.6551543694400838, "grad_norm": 2.527165737623044, "learning_rate": 5.615408406818074e-06, "loss": 1.04, "step": 6260 }, { "epoch": 0.6552590266875982, "grad_norm": 2.1620951304463962, "learning_rate": 5.6123621439953715e-06, "loss": 0.8673, "step": 6261 }, { "epoch": 0.6553636839351125, "grad_norm": 1.9064074958893684, "learning_rate": 5.609316385321149e-06, "loss": 0.8773, "step": 6262 }, { "epoch": 0.6554683411826269, "grad_norm": 2.240290365862607, "learning_rate": 5.60627113114538e-06, "loss": 0.8222, "step": 6263 }, { "epoch": 0.6555729984301413, "grad_norm": 2.0718457091136147, "learning_rate": 5.6032263818179635e-06, "loss": 0.8648, "step": 6264 }, { "epoch": 0.6556776556776557, "grad_norm": 2.0971279832921965, "learning_rate": 5.6001821376887454e-06, "loss": 0.9237, "step": 6265 }, { "epoch": 0.6557823129251701, "grad_norm": 1.9785023614732484, "learning_rate": 5.5971383991075234e-06, "loss": 0.7651, "step": 6266 }, { "epoch": 0.6558869701726845, "grad_norm": 1.9909685665336019, "learning_rate": 5.594095166424025e-06, "loss": 0.8833, "step": 6267 }, { "epoch": 0.6559916274201989, "grad_norm": 2.105567126725272, "learning_rate": 5.591052439987923e-06, "loss": 0.8752, "step": 6268 }, { "epoch": 0.6560962846677132, "grad_norm": 1.8400676598198988, "learning_rate": 5.588010220148834e-06, "loss": 0.8795, "step": 6269 }, { "epoch": 0.6562009419152276, "grad_norm": 1.8357939138695751, "learning_rate": 5.58496850725631e-06, "loss": 0.7996, "step": 6270 }, { "epoch": 0.656305599162742, "grad_norm": 2.0900322388751653, "learning_rate": 5.581927301659855e-06, "loss": 1.0018, "step": 6271 }, { "epoch": 0.6564102564102564, "grad_norm": 1.9794563181186657, "learning_rate": 5.578886603708914e-06, "loss": 0.9069, "step": 6272 }, { "epoch": 0.6565149136577708, "grad_norm": 1.9823398345613306, "learning_rate": 5.575846413752865e-06, "loss": 0.8751, "step": 6273 }, { "epoch": 0.6566195709052852, "grad_norm": 2.443483126872738, "learning_rate": 5.572806732141031e-06, "loss": 0.891, "step": 6274 }, { "epoch": 0.6567242281527996, "grad_norm": 1.952400398515589, "learning_rate": 5.569767559222674e-06, "loss": 0.8864, "step": 6275 }, { "epoch": 0.6568288854003139, "grad_norm": 1.8830733377298436, "learning_rate": 5.5667288953470115e-06, "loss": 0.9083, "step": 6276 }, { "epoch": 0.6569335426478283, "grad_norm": 1.9839437867503655, "learning_rate": 5.563690740863184e-06, "loss": 0.8745, "step": 6277 }, { "epoch": 0.6570381998953427, "grad_norm": 1.8975928814696994, "learning_rate": 5.560653096120283e-06, "loss": 0.9024, "step": 6278 }, { "epoch": 0.6571428571428571, "grad_norm": 2.147697717951089, "learning_rate": 5.557615961467338e-06, "loss": 0.8365, "step": 6279 }, { "epoch": 0.6572475143903715, "grad_norm": 1.7757224649611865, "learning_rate": 5.554579337253326e-06, "loss": 0.827, "step": 6280 }, { "epoch": 0.657352171637886, "grad_norm": 2.306267658791178, "learning_rate": 5.551543223827162e-06, "loss": 0.8783, "step": 6281 }, { "epoch": 0.6574568288854004, "grad_norm": 1.938353410885278, "learning_rate": 5.5485076215377e-06, "loss": 0.8631, "step": 6282 }, { "epoch": 0.6575614861329147, "grad_norm": 3.588827654123888, "learning_rate": 5.545472530733738e-06, "loss": 0.922, "step": 6283 }, { "epoch": 0.6576661433804291, "grad_norm": 2.3146863860542197, "learning_rate": 5.542437951764006e-06, "loss": 0.9648, "step": 6284 }, { "epoch": 0.6577708006279435, "grad_norm": 1.8737153115128953, "learning_rate": 5.539403884977195e-06, "loss": 0.8414, "step": 6285 }, { "epoch": 0.6578754578754579, "grad_norm": 1.92712911224643, "learning_rate": 5.536370330721921e-06, "loss": 0.9653, "step": 6286 }, { "epoch": 0.6579801151229723, "grad_norm": 1.7867220203896153, "learning_rate": 5.533337289346743e-06, "loss": 0.9625, "step": 6287 }, { "epoch": 0.6580847723704867, "grad_norm": 2.618706543055954, "learning_rate": 5.530304761200162e-06, "loss": 1.0739, "step": 6288 }, { "epoch": 0.6581894296180011, "grad_norm": 1.9789921876753085, "learning_rate": 5.527272746630625e-06, "loss": 0.8933, "step": 6289 }, { "epoch": 0.6582940868655154, "grad_norm": 1.9939197083721731, "learning_rate": 5.5242412459865215e-06, "loss": 1.0279, "step": 6290 }, { "epoch": 0.6583987441130298, "grad_norm": 1.911672548685458, "learning_rate": 5.521210259616171e-06, "loss": 0.9188, "step": 6291 }, { "epoch": 0.6585034013605442, "grad_norm": 2.081526787157685, "learning_rate": 5.518179787867841e-06, "loss": 0.8945, "step": 6292 }, { "epoch": 0.6586080586080586, "grad_norm": 2.102166057104072, "learning_rate": 5.515149831089739e-06, "loss": 0.901, "step": 6293 }, { "epoch": 0.658712715855573, "grad_norm": 1.7756557962954296, "learning_rate": 5.51212038963001e-06, "loss": 0.933, "step": 6294 }, { "epoch": 0.6588173731030874, "grad_norm": 1.8389100423069065, "learning_rate": 5.5090914638367495e-06, "loss": 0.9373, "step": 6295 }, { "epoch": 0.6589220303506018, "grad_norm": 2.112443963445209, "learning_rate": 5.506063054057984e-06, "loss": 0.9985, "step": 6296 }, { "epoch": 0.6590266875981162, "grad_norm": 1.9306029603381285, "learning_rate": 5.50303516064168e-06, "loss": 0.7859, "step": 6297 }, { "epoch": 0.6591313448456305, "grad_norm": 1.9275367700602777, "learning_rate": 5.500007783935757e-06, "loss": 0.9392, "step": 6298 }, { "epoch": 0.6592360020931449, "grad_norm": 1.9953473847028906, "learning_rate": 5.4969809242880555e-06, "loss": 1.0136, "step": 6299 }, { "epoch": 0.6593406593406593, "grad_norm": 2.0576053965813808, "learning_rate": 5.493954582046381e-06, "loss": 1.006, "step": 6300 }, { "epoch": 0.6594453165881737, "grad_norm": 1.9656517014244708, "learning_rate": 5.490928757558458e-06, "loss": 0.8898, "step": 6301 }, { "epoch": 0.6595499738356881, "grad_norm": 2.328622538340134, "learning_rate": 5.487903451171963e-06, "loss": 0.9392, "step": 6302 }, { "epoch": 0.6596546310832025, "grad_norm": 2.024875566385732, "learning_rate": 5.484878663234503e-06, "loss": 0.9248, "step": 6303 }, { "epoch": 0.659759288330717, "grad_norm": 2.151355182140815, "learning_rate": 5.481854394093643e-06, "loss": 0.879, "step": 6304 }, { "epoch": 0.6598639455782312, "grad_norm": 2.190977467557319, "learning_rate": 5.478830644096872e-06, "loss": 1.0399, "step": 6305 }, { "epoch": 0.6599686028257457, "grad_norm": 1.7250845041049725, "learning_rate": 5.475807413591621e-06, "loss": 0.8283, "step": 6306 }, { "epoch": 0.6600732600732601, "grad_norm": 2.0039747052514776, "learning_rate": 5.4727847029252735e-06, "loss": 0.8717, "step": 6307 }, { "epoch": 0.6601779173207745, "grad_norm": 2.2311307267829874, "learning_rate": 5.469762512445142e-06, "loss": 0.8769, "step": 6308 }, { "epoch": 0.6602825745682889, "grad_norm": 1.992690818121849, "learning_rate": 5.466740842498477e-06, "loss": 0.8671, "step": 6309 }, { "epoch": 0.6603872318158033, "grad_norm": 2.2879738880594176, "learning_rate": 5.463719693432483e-06, "loss": 0.9096, "step": 6310 }, { "epoch": 0.6604918890633177, "grad_norm": 2.0374811856000465, "learning_rate": 5.460699065594292e-06, "loss": 0.9504, "step": 6311 }, { "epoch": 0.660596546310832, "grad_norm": 1.8719692965200658, "learning_rate": 5.4576789593309805e-06, "loss": 0.9048, "step": 6312 }, { "epoch": 0.6607012035583464, "grad_norm": 2.2231971812121794, "learning_rate": 5.454659374989563e-06, "loss": 0.9228, "step": 6313 }, { "epoch": 0.6608058608058608, "grad_norm": 1.965484690948648, "learning_rate": 5.451640312916995e-06, "loss": 0.9135, "step": 6314 }, { "epoch": 0.6609105180533752, "grad_norm": 2.1030519351417505, "learning_rate": 5.448621773460181e-06, "loss": 0.9864, "step": 6315 }, { "epoch": 0.6610151753008896, "grad_norm": 1.9317968866735937, "learning_rate": 5.445603756965952e-06, "loss": 0.9095, "step": 6316 }, { "epoch": 0.661119832548404, "grad_norm": 2.0771595808477943, "learning_rate": 5.442586263781082e-06, "loss": 0.979, "step": 6317 }, { "epoch": 0.6612244897959184, "grad_norm": 2.108827558618817, "learning_rate": 5.439569294252287e-06, "loss": 0.9105, "step": 6318 }, { "epoch": 0.6613291470434327, "grad_norm": 2.0983819392455314, "learning_rate": 5.436552848726229e-06, "loss": 0.9923, "step": 6319 }, { "epoch": 0.6614338042909471, "grad_norm": 2.473425853943777, "learning_rate": 5.433536927549499e-06, "loss": 1.0007, "step": 6320 }, { "epoch": 0.6615384615384615, "grad_norm": 1.9633903506211725, "learning_rate": 5.430521531068634e-06, "loss": 0.8951, "step": 6321 }, { "epoch": 0.6616431187859759, "grad_norm": 2.100683110676726, "learning_rate": 5.427506659630104e-06, "loss": 0.9602, "step": 6322 }, { "epoch": 0.6617477760334903, "grad_norm": 2.266215087701523, "learning_rate": 5.4244923135803275e-06, "loss": 0.9114, "step": 6323 }, { "epoch": 0.6618524332810047, "grad_norm": 1.8670218973616595, "learning_rate": 5.421478493265664e-06, "loss": 0.9076, "step": 6324 }, { "epoch": 0.6619570905285191, "grad_norm": 2.0326379729684545, "learning_rate": 5.418465199032404e-06, "loss": 0.849, "step": 6325 }, { "epoch": 0.6620617477760334, "grad_norm": 1.9942841480440203, "learning_rate": 5.4154524312267795e-06, "loss": 0.886, "step": 6326 }, { "epoch": 0.6621664050235478, "grad_norm": 2.0675772411489386, "learning_rate": 5.412440190194965e-06, "loss": 0.8767, "step": 6327 }, { "epoch": 0.6622710622710622, "grad_norm": 2.216673868421644, "learning_rate": 5.409428476283068e-06, "loss": 0.8531, "step": 6328 }, { "epoch": 0.6623757195185767, "grad_norm": 2.114144277955494, "learning_rate": 5.4064172898371506e-06, "loss": 0.8506, "step": 6329 }, { "epoch": 0.6624803767660911, "grad_norm": 2.11736267037912, "learning_rate": 5.403406631203197e-06, "loss": 0.8308, "step": 6330 }, { "epoch": 0.6625850340136055, "grad_norm": 1.8344929417267373, "learning_rate": 5.400396500727141e-06, "loss": 0.885, "step": 6331 }, { "epoch": 0.6626896912611199, "grad_norm": 2.005302325582287, "learning_rate": 5.397386898754847e-06, "loss": 0.7028, "step": 6332 }, { "epoch": 0.6627943485086342, "grad_norm": 2.007310766798443, "learning_rate": 5.394377825632129e-06, "loss": 1.0009, "step": 6333 }, { "epoch": 0.6628990057561486, "grad_norm": 2.298430147972449, "learning_rate": 5.3913692817047395e-06, "loss": 0.7607, "step": 6334 }, { "epoch": 0.663003663003663, "grad_norm": 2.0426983676977746, "learning_rate": 5.388361267318362e-06, "loss": 1.0074, "step": 6335 }, { "epoch": 0.6631083202511774, "grad_norm": 2.36635698834763, "learning_rate": 5.385353782818623e-06, "loss": 0.8771, "step": 6336 }, { "epoch": 0.6632129774986918, "grad_norm": 1.89057291548456, "learning_rate": 5.382346828551086e-06, "loss": 0.9702, "step": 6337 }, { "epoch": 0.6633176347462062, "grad_norm": 2.1549128599478347, "learning_rate": 5.379340404861263e-06, "loss": 0.9578, "step": 6338 }, { "epoch": 0.6634222919937206, "grad_norm": 2.264597570417053, "learning_rate": 5.3763345120945944e-06, "loss": 0.9258, "step": 6339 }, { "epoch": 0.663526949241235, "grad_norm": 1.9983468153168773, "learning_rate": 5.37332915059646e-06, "loss": 0.8108, "step": 6340 }, { "epoch": 0.6636316064887493, "grad_norm": 2.199629599477845, "learning_rate": 5.370324320712189e-06, "loss": 0.7242, "step": 6341 }, { "epoch": 0.6637362637362637, "grad_norm": 2.037900821563259, "learning_rate": 5.3673200227870345e-06, "loss": 0.856, "step": 6342 }, { "epoch": 0.6638409209837781, "grad_norm": 1.9505192747155962, "learning_rate": 5.364316257166207e-06, "loss": 0.8252, "step": 6343 }, { "epoch": 0.6639455782312925, "grad_norm": 2.092352950973099, "learning_rate": 5.361313024194837e-06, "loss": 0.9537, "step": 6344 }, { "epoch": 0.6640502354788069, "grad_norm": 2.284549338180081, "learning_rate": 5.358310324218003e-06, "loss": 0.7024, "step": 6345 }, { "epoch": 0.6641548927263213, "grad_norm": 2.113760328686243, "learning_rate": 5.355308157580725e-06, "loss": 0.8261, "step": 6346 }, { "epoch": 0.6642595499738357, "grad_norm": 2.256190025263873, "learning_rate": 5.35230652462795e-06, "loss": 0.9598, "step": 6347 }, { "epoch": 0.66436420722135, "grad_norm": 1.9207259907187089, "learning_rate": 5.349305425704583e-06, "loss": 0.9572, "step": 6348 }, { "epoch": 0.6644688644688644, "grad_norm": 2.3515307832470334, "learning_rate": 5.346304861155445e-06, "loss": 0.9013, "step": 6349 }, { "epoch": 0.6645735217163788, "grad_norm": 1.988489105543637, "learning_rate": 5.343304831325315e-06, "loss": 0.8901, "step": 6350 }, { "epoch": 0.6646781789638933, "grad_norm": 2.1734081104477965, "learning_rate": 5.340305336558902e-06, "loss": 1.0388, "step": 6351 }, { "epoch": 0.6647828362114077, "grad_norm": 2.0143701834494263, "learning_rate": 5.337306377200848e-06, "loss": 0.8924, "step": 6352 }, { "epoch": 0.6648874934589221, "grad_norm": 1.9233691798259414, "learning_rate": 5.334307953595747e-06, "loss": 0.9476, "step": 6353 }, { "epoch": 0.6649921507064365, "grad_norm": 2.130187551657956, "learning_rate": 5.331310066088121e-06, "loss": 0.8532, "step": 6354 }, { "epoch": 0.6650968079539508, "grad_norm": 1.9538394918636528, "learning_rate": 5.328312715022432e-06, "loss": 0.9532, "step": 6355 }, { "epoch": 0.6652014652014652, "grad_norm": 2.3831371289530074, "learning_rate": 5.32531590074308e-06, "loss": 0.9532, "step": 6356 }, { "epoch": 0.6653061224489796, "grad_norm": 2.4969974074294585, "learning_rate": 5.32231962359441e-06, "loss": 0.9987, "step": 6357 }, { "epoch": 0.665410779696494, "grad_norm": 2.176103875459444, "learning_rate": 5.319323883920695e-06, "loss": 0.988, "step": 6358 }, { "epoch": 0.6655154369440084, "grad_norm": 1.6925971627256475, "learning_rate": 5.316328682066157e-06, "loss": 0.9428, "step": 6359 }, { "epoch": 0.6656200941915228, "grad_norm": 2.0471516717225784, "learning_rate": 5.313334018374949e-06, "loss": 0.921, "step": 6360 }, { "epoch": 0.6657247514390372, "grad_norm": 1.9398078548541235, "learning_rate": 5.310339893191161e-06, "loss": 0.8818, "step": 6361 }, { "epoch": 0.6658294086865515, "grad_norm": 1.9679163945822484, "learning_rate": 5.307346306858827e-06, "loss": 0.9378, "step": 6362 }, { "epoch": 0.6659340659340659, "grad_norm": 1.9372496636716319, "learning_rate": 5.304353259721917e-06, "loss": 0.8058, "step": 6363 }, { "epoch": 0.6660387231815803, "grad_norm": 1.943843808421226, "learning_rate": 5.301360752124337e-06, "loss": 0.8519, "step": 6364 }, { "epoch": 0.6661433804290947, "grad_norm": 2.0411186947239552, "learning_rate": 5.298368784409931e-06, "loss": 0.8791, "step": 6365 }, { "epoch": 0.6662480376766091, "grad_norm": 2.2814426931295793, "learning_rate": 5.29537735692248e-06, "loss": 1.0396, "step": 6366 }, { "epoch": 0.6663526949241235, "grad_norm": 2.0786777164691888, "learning_rate": 5.292386470005706e-06, "loss": 0.844, "step": 6367 }, { "epoch": 0.6664573521716379, "grad_norm": 2.04748550942362, "learning_rate": 5.289396124003274e-06, "loss": 1.0169, "step": 6368 }, { "epoch": 0.6665620094191522, "grad_norm": 2.123957153054228, "learning_rate": 5.286406319258779e-06, "loss": 0.9936, "step": 6369 }, { "epoch": 0.6666666666666666, "grad_norm": 2.0189091508075583, "learning_rate": 5.2834170561157514e-06, "loss": 0.8023, "step": 6370 }, { "epoch": 0.666771323914181, "grad_norm": 1.9067663948673013, "learning_rate": 5.280428334917662e-06, "loss": 0.8841, "step": 6371 }, { "epoch": 0.6668759811616954, "grad_norm": 2.0191663801552426, "learning_rate": 5.277440156007929e-06, "loss": 0.8464, "step": 6372 }, { "epoch": 0.6669806384092098, "grad_norm": 1.9211457524771827, "learning_rate": 5.274452519729895e-06, "loss": 0.8674, "step": 6373 }, { "epoch": 0.6670852956567243, "grad_norm": 1.9382829851230479, "learning_rate": 5.271465426426847e-06, "loss": 1.0491, "step": 6374 }, { "epoch": 0.6671899529042387, "grad_norm": 1.8888165263402947, "learning_rate": 5.268478876442003e-06, "loss": 0.8538, "step": 6375 }, { "epoch": 0.667294610151753, "grad_norm": 2.091423517591985, "learning_rate": 5.2654928701185274e-06, "loss": 0.8253, "step": 6376 }, { "epoch": 0.6673992673992674, "grad_norm": 2.2768215094704822, "learning_rate": 5.262507407799522e-06, "loss": 0.9538, "step": 6377 }, { "epoch": 0.6675039246467818, "grad_norm": 2.1494565343878533, "learning_rate": 5.259522489828022e-06, "loss": 0.9563, "step": 6378 }, { "epoch": 0.6676085818942962, "grad_norm": 2.189850705311329, "learning_rate": 5.2565381165469954e-06, "loss": 0.8845, "step": 6379 }, { "epoch": 0.6677132391418106, "grad_norm": 2.3476427995274882, "learning_rate": 5.253554288299352e-06, "loss": 0.893, "step": 6380 }, { "epoch": 0.667817896389325, "grad_norm": 1.92825991613659, "learning_rate": 5.250571005427947e-06, "loss": 0.8203, "step": 6381 }, { "epoch": 0.6679225536368394, "grad_norm": 2.031488675623292, "learning_rate": 5.247588268275561e-06, "loss": 0.8845, "step": 6382 }, { "epoch": 0.6680272108843538, "grad_norm": 1.7586373295933992, "learning_rate": 5.244606077184918e-06, "loss": 0.7734, "step": 6383 }, { "epoch": 0.6681318681318681, "grad_norm": 2.394679003738517, "learning_rate": 5.241624432498673e-06, "loss": 0.9904, "step": 6384 }, { "epoch": 0.6682365253793825, "grad_norm": 2.2617940928572047, "learning_rate": 5.23864333455943e-06, "loss": 0.9123, "step": 6385 }, { "epoch": 0.6683411826268969, "grad_norm": 6.141677769409521, "learning_rate": 5.235662783709717e-06, "loss": 0.8455, "step": 6386 }, { "epoch": 0.6684458398744113, "grad_norm": 1.8536950099015304, "learning_rate": 5.232682780292012e-06, "loss": 0.872, "step": 6387 }, { "epoch": 0.6685504971219257, "grad_norm": 1.9152603791182172, "learning_rate": 5.22970332464872e-06, "loss": 0.9247, "step": 6388 }, { "epoch": 0.6686551543694401, "grad_norm": 2.1906359883969166, "learning_rate": 5.2267244171221864e-06, "loss": 0.9432, "step": 6389 }, { "epoch": 0.6687598116169545, "grad_norm": 1.8408546196814852, "learning_rate": 5.223746058054691e-06, "loss": 0.9843, "step": 6390 }, { "epoch": 0.6688644688644688, "grad_norm": 1.977899230941687, "learning_rate": 5.220768247788458e-06, "loss": 0.8972, "step": 6391 }, { "epoch": 0.6689691261119832, "grad_norm": 2.0400496145208225, "learning_rate": 5.217790986665639e-06, "loss": 0.8671, "step": 6392 }, { "epoch": 0.6690737833594976, "grad_norm": 2.7116537319504066, "learning_rate": 5.214814275028334e-06, "loss": 1.0178, "step": 6393 }, { "epoch": 0.669178440607012, "grad_norm": 2.073375860762836, "learning_rate": 5.211838113218568e-06, "loss": 0.9932, "step": 6394 }, { "epoch": 0.6692830978545264, "grad_norm": 2.096782968232844, "learning_rate": 5.208862501578307e-06, "loss": 0.7974, "step": 6395 }, { "epoch": 0.6693877551020408, "grad_norm": 1.8056629099927985, "learning_rate": 5.205887440449462e-06, "loss": 0.8922, "step": 6396 }, { "epoch": 0.6694924123495553, "grad_norm": 2.1562515150078734, "learning_rate": 5.202912930173867e-06, "loss": 0.9763, "step": 6397 }, { "epoch": 0.6695970695970695, "grad_norm": 1.964969717746662, "learning_rate": 5.1999389710933015e-06, "loss": 0.7849, "step": 6398 }, { "epoch": 0.669701726844584, "grad_norm": 2.1575433456324498, "learning_rate": 5.196965563549475e-06, "loss": 0.8537, "step": 6399 }, { "epoch": 0.6698063840920984, "grad_norm": 2.2390395785938106, "learning_rate": 5.193992707884045e-06, "loss": 0.9165, "step": 6400 }, { "epoch": 0.6699110413396128, "grad_norm": 2.0512664839575727, "learning_rate": 5.1910204044385935e-06, "loss": 0.8743, "step": 6401 }, { "epoch": 0.6700156985871272, "grad_norm": 1.832927563252606, "learning_rate": 5.188048653554649e-06, "loss": 0.9634, "step": 6402 }, { "epoch": 0.6701203558346416, "grad_norm": 2.163369405212209, "learning_rate": 5.185077455573671e-06, "loss": 0.8162, "step": 6403 }, { "epoch": 0.670225013082156, "grad_norm": 2.096116535555188, "learning_rate": 5.182106810837053e-06, "loss": 0.8113, "step": 6404 }, { "epoch": 0.6703296703296703, "grad_norm": 2.2187096652736225, "learning_rate": 5.179136719686124e-06, "loss": 0.7653, "step": 6405 }, { "epoch": 0.6704343275771847, "grad_norm": 1.7426313844832375, "learning_rate": 5.176167182462164e-06, "loss": 0.7568, "step": 6406 }, { "epoch": 0.6705389848246991, "grad_norm": 2.610383984694903, "learning_rate": 5.173198199506375e-06, "loss": 1.0115, "step": 6407 }, { "epoch": 0.6706436420722135, "grad_norm": 1.7869109945081258, "learning_rate": 5.170229771159896e-06, "loss": 0.8499, "step": 6408 }, { "epoch": 0.6707482993197279, "grad_norm": 1.8327398454588113, "learning_rate": 5.167261897763804e-06, "loss": 0.9006, "step": 6409 }, { "epoch": 0.6708529565672423, "grad_norm": 1.7464546014687978, "learning_rate": 5.164294579659117e-06, "loss": 0.8632, "step": 6410 }, { "epoch": 0.6709576138147567, "grad_norm": 2.0024420499608797, "learning_rate": 5.161327817186789e-06, "loss": 0.8386, "step": 6411 }, { "epoch": 0.671062271062271, "grad_norm": 3.683976549889003, "learning_rate": 5.158361610687704e-06, "loss": 0.9202, "step": 6412 }, { "epoch": 0.6711669283097854, "grad_norm": 2.418966089950013, "learning_rate": 5.155395960502685e-06, "loss": 0.8548, "step": 6413 }, { "epoch": 0.6712715855572998, "grad_norm": 1.8796425565869193, "learning_rate": 5.152430866972489e-06, "loss": 0.8549, "step": 6414 }, { "epoch": 0.6713762428048142, "grad_norm": 2.208142074213644, "learning_rate": 5.1494663304378144e-06, "loss": 0.9177, "step": 6415 }, { "epoch": 0.6714809000523286, "grad_norm": 2.2446841553324903, "learning_rate": 5.146502351239293e-06, "loss": 0.8943, "step": 6416 }, { "epoch": 0.671585557299843, "grad_norm": 1.8751432705355553, "learning_rate": 5.143538929717491e-06, "loss": 0.7813, "step": 6417 }, { "epoch": 0.6716902145473574, "grad_norm": 1.9930880359584375, "learning_rate": 5.140576066212906e-06, "loss": 0.8551, "step": 6418 }, { "epoch": 0.6717948717948717, "grad_norm": 2.4278638629645815, "learning_rate": 5.137613761065983e-06, "loss": 0.9474, "step": 6419 }, { "epoch": 0.6718995290423861, "grad_norm": 2.14429691352793, "learning_rate": 5.134652014617099e-06, "loss": 0.9384, "step": 6420 }, { "epoch": 0.6720041862899006, "grad_norm": 1.8951332364379743, "learning_rate": 5.131690827206563e-06, "loss": 0.9869, "step": 6421 }, { "epoch": 0.672108843537415, "grad_norm": 1.9516044400568116, "learning_rate": 5.128730199174619e-06, "loss": 0.885, "step": 6422 }, { "epoch": 0.6722135007849294, "grad_norm": 2.1976500792646414, "learning_rate": 5.125770130861449e-06, "loss": 1.0194, "step": 6423 }, { "epoch": 0.6723181580324438, "grad_norm": 2.0018038250643317, "learning_rate": 5.12281062260717e-06, "loss": 0.9788, "step": 6424 }, { "epoch": 0.6724228152799582, "grad_norm": 2.1270551713262744, "learning_rate": 5.119851674751841e-06, "loss": 0.9429, "step": 6425 }, { "epoch": 0.6725274725274726, "grad_norm": 2.025799543591311, "learning_rate": 5.116893287635448e-06, "loss": 0.7762, "step": 6426 }, { "epoch": 0.6726321297749869, "grad_norm": 1.8699078179658104, "learning_rate": 5.1139354615979105e-06, "loss": 0.8726, "step": 6427 }, { "epoch": 0.6727367870225013, "grad_norm": 1.9041060855902985, "learning_rate": 5.110978196979098e-06, "loss": 0.84, "step": 6428 }, { "epoch": 0.6728414442700157, "grad_norm": 2.1433565757666115, "learning_rate": 5.1080214941187975e-06, "loss": 0.954, "step": 6429 }, { "epoch": 0.6729461015175301, "grad_norm": 2.046061335552731, "learning_rate": 5.1050653533567496e-06, "loss": 0.8755, "step": 6430 }, { "epoch": 0.6730507587650445, "grad_norm": 1.8884753453862546, "learning_rate": 5.102109775032615e-06, "loss": 0.7611, "step": 6431 }, { "epoch": 0.6731554160125589, "grad_norm": 2.033544305804191, "learning_rate": 5.0991547594859955e-06, "loss": 0.908, "step": 6432 }, { "epoch": 0.6732600732600733, "grad_norm": 1.8338267562653663, "learning_rate": 5.096200307056426e-06, "loss": 1.005, "step": 6433 }, { "epoch": 0.6733647305075876, "grad_norm": 2.108484710554183, "learning_rate": 5.093246418083386e-06, "loss": 0.9521, "step": 6434 }, { "epoch": 0.673469387755102, "grad_norm": 2.085184894766022, "learning_rate": 5.09029309290628e-06, "loss": 0.8918, "step": 6435 }, { "epoch": 0.6735740450026164, "grad_norm": 2.1541212873096636, "learning_rate": 5.087340331864446e-06, "loss": 0.7858, "step": 6436 }, { "epoch": 0.6736787022501308, "grad_norm": 2.1748331560744893, "learning_rate": 5.084388135297171e-06, "loss": 0.9436, "step": 6437 }, { "epoch": 0.6737833594976452, "grad_norm": 2.213867089819224, "learning_rate": 5.08143650354366e-06, "loss": 0.9051, "step": 6438 }, { "epoch": 0.6738880167451596, "grad_norm": 2.276279627419914, "learning_rate": 5.07848543694307e-06, "loss": 0.8651, "step": 6439 }, { "epoch": 0.673992673992674, "grad_norm": 1.7847556728333231, "learning_rate": 5.075534935834481e-06, "loss": 0.7922, "step": 6440 }, { "epoch": 0.6740973312401883, "grad_norm": 1.9203154340553588, "learning_rate": 5.07258500055691e-06, "loss": 0.8485, "step": 6441 }, { "epoch": 0.6742019884877027, "grad_norm": 2.033181016154668, "learning_rate": 5.069635631449311e-06, "loss": 0.9363, "step": 6442 }, { "epoch": 0.6743066457352171, "grad_norm": 1.953116403946782, "learning_rate": 5.066686828850569e-06, "loss": 0.9573, "step": 6443 }, { "epoch": 0.6744113029827316, "grad_norm": 2.146061199630762, "learning_rate": 5.063738593099512e-06, "loss": 0.883, "step": 6444 }, { "epoch": 0.674515960230246, "grad_norm": 2.1400402168097457, "learning_rate": 5.060790924534902e-06, "loss": 0.8179, "step": 6445 }, { "epoch": 0.6746206174777604, "grad_norm": 1.7980728816836642, "learning_rate": 5.057843823495425e-06, "loss": 0.8783, "step": 6446 }, { "epoch": 0.6747252747252748, "grad_norm": 2.283729906739794, "learning_rate": 5.054897290319713e-06, "loss": 0.8026, "step": 6447 }, { "epoch": 0.6748299319727891, "grad_norm": 2.037589904474863, "learning_rate": 5.051951325346325e-06, "loss": 0.9345, "step": 6448 }, { "epoch": 0.6749345892203035, "grad_norm": 2.1144335870638185, "learning_rate": 5.049005928913762e-06, "loss": 0.9946, "step": 6449 }, { "epoch": 0.6750392464678179, "grad_norm": 1.936401258521119, "learning_rate": 5.046061101360456e-06, "loss": 0.8188, "step": 6450 }, { "epoch": 0.6751439037153323, "grad_norm": 2.2507533396021926, "learning_rate": 5.04311684302477e-06, "loss": 0.7269, "step": 6451 }, { "epoch": 0.6752485609628467, "grad_norm": 1.963538133706413, "learning_rate": 5.0401731542450046e-06, "loss": 0.8515, "step": 6452 }, { "epoch": 0.6753532182103611, "grad_norm": 1.9890165032492324, "learning_rate": 5.037230035359398e-06, "loss": 0.8833, "step": 6453 }, { "epoch": 0.6754578754578755, "grad_norm": 1.9566124199728854, "learning_rate": 5.034287486706126e-06, "loss": 0.8651, "step": 6454 }, { "epoch": 0.6755625327053898, "grad_norm": 1.9567451091759998, "learning_rate": 5.031345508623287e-06, "loss": 0.8554, "step": 6455 }, { "epoch": 0.6756671899529042, "grad_norm": 2.0495146610167954, "learning_rate": 5.028404101448923e-06, "loss": 0.9391, "step": 6456 }, { "epoch": 0.6757718472004186, "grad_norm": 1.9180912820360527, "learning_rate": 5.025463265521001e-06, "loss": 0.8958, "step": 6457 }, { "epoch": 0.675876504447933, "grad_norm": 2.108925051546432, "learning_rate": 5.0225230011774395e-06, "loss": 0.9158, "step": 6458 }, { "epoch": 0.6759811616954474, "grad_norm": 2.0210637600732655, "learning_rate": 5.0195833087560745e-06, "loss": 0.8969, "step": 6459 }, { "epoch": 0.6760858189429618, "grad_norm": 1.978491474863179, "learning_rate": 5.016644188594683e-06, "loss": 0.9462, "step": 6460 }, { "epoch": 0.6761904761904762, "grad_norm": 1.7918602559844456, "learning_rate": 5.013705641030978e-06, "loss": 0.916, "step": 6461 }, { "epoch": 0.6762951334379905, "grad_norm": 2.1981945869138233, "learning_rate": 5.010767666402599e-06, "loss": 0.9247, "step": 6462 }, { "epoch": 0.6763997906855049, "grad_norm": 2.1851426012383866, "learning_rate": 5.007830265047129e-06, "loss": 0.9502, "step": 6463 }, { "epoch": 0.6765044479330193, "grad_norm": 2.2216190305636596, "learning_rate": 5.004893437302085e-06, "loss": 1.0375, "step": 6464 }, { "epoch": 0.6766091051805337, "grad_norm": 1.815266465146527, "learning_rate": 5.001957183504913e-06, "loss": 0.9195, "step": 6465 }, { "epoch": 0.6767137624280481, "grad_norm": 2.0023594949351593, "learning_rate": 4.999021503992992e-06, "loss": 0.7293, "step": 6466 }, { "epoch": 0.6768184196755626, "grad_norm": 2.5183477674119086, "learning_rate": 4.996086399103633e-06, "loss": 0.9346, "step": 6467 }, { "epoch": 0.676923076923077, "grad_norm": 2.98805111119296, "learning_rate": 4.9931518691740954e-06, "loss": 0.8498, "step": 6468 }, { "epoch": 0.6770277341705914, "grad_norm": 1.9793193853571587, "learning_rate": 4.990217914541559e-06, "loss": 0.8217, "step": 6469 }, { "epoch": 0.6771323914181057, "grad_norm": 2.2564143430387302, "learning_rate": 4.987284535543139e-06, "loss": 0.8779, "step": 6470 }, { "epoch": 0.6772370486656201, "grad_norm": 1.671524269897445, "learning_rate": 4.984351732515883e-06, "loss": 0.8147, "step": 6471 }, { "epoch": 0.6773417059131345, "grad_norm": 2.255727997330643, "learning_rate": 4.981419505796782e-06, "loss": 0.9483, "step": 6472 }, { "epoch": 0.6774463631606489, "grad_norm": 2.0870637943921917, "learning_rate": 4.978487855722757e-06, "loss": 0.9186, "step": 6473 }, { "epoch": 0.6775510204081633, "grad_norm": 2.061420193131777, "learning_rate": 4.975556782630657e-06, "loss": 0.8609, "step": 6474 }, { "epoch": 0.6776556776556777, "grad_norm": 2.1025694958037966, "learning_rate": 4.972626286857268e-06, "loss": 0.9063, "step": 6475 }, { "epoch": 0.6777603349031921, "grad_norm": 2.0926436744834533, "learning_rate": 4.969696368739308e-06, "loss": 0.9524, "step": 6476 }, { "epoch": 0.6778649921507064, "grad_norm": 2.4314254518234386, "learning_rate": 4.966767028613435e-06, "loss": 0.965, "step": 6477 }, { "epoch": 0.6779696493982208, "grad_norm": 2.473343295432155, "learning_rate": 4.963838266816234e-06, "loss": 0.7574, "step": 6478 }, { "epoch": 0.6780743066457352, "grad_norm": 2.163717925184423, "learning_rate": 4.960910083684222e-06, "loss": 0.9157, "step": 6479 }, { "epoch": 0.6781789638932496, "grad_norm": 1.9819090705808335, "learning_rate": 4.95798247955386e-06, "loss": 0.7951, "step": 6480 }, { "epoch": 0.678283621140764, "grad_norm": 2.306299973255293, "learning_rate": 4.955055454761532e-06, "loss": 0.8959, "step": 6481 }, { "epoch": 0.6783882783882784, "grad_norm": 1.8508763399022372, "learning_rate": 4.952129009643557e-06, "loss": 0.8809, "step": 6482 }, { "epoch": 0.6784929356357928, "grad_norm": 2.1808393788781903, "learning_rate": 4.949203144536195e-06, "loss": 0.8524, "step": 6483 }, { "epoch": 0.6785975928833071, "grad_norm": 2.6263905356474657, "learning_rate": 4.946277859775631e-06, "loss": 0.9204, "step": 6484 }, { "epoch": 0.6787022501308215, "grad_norm": 1.9970126875729537, "learning_rate": 4.943353155697985e-06, "loss": 0.8717, "step": 6485 }, { "epoch": 0.6788069073783359, "grad_norm": 2.0984787953717556, "learning_rate": 4.94042903263931e-06, "loss": 0.9325, "step": 6486 }, { "epoch": 0.6789115646258503, "grad_norm": 2.00008151496788, "learning_rate": 4.937505490935599e-06, "loss": 0.9307, "step": 6487 }, { "epoch": 0.6790162218733647, "grad_norm": 1.9504354659273084, "learning_rate": 4.934582530922765e-06, "loss": 0.889, "step": 6488 }, { "epoch": 0.6791208791208792, "grad_norm": 1.9738078584785779, "learning_rate": 4.931660152936673e-06, "loss": 0.829, "step": 6489 }, { "epoch": 0.6792255363683936, "grad_norm": 2.258801843221124, "learning_rate": 4.928738357313102e-06, "loss": 0.865, "step": 6490 }, { "epoch": 0.6793301936159079, "grad_norm": 1.7536804465963125, "learning_rate": 4.9258171443877715e-06, "loss": 0.9223, "step": 6491 }, { "epoch": 0.6794348508634223, "grad_norm": 1.8798393029654916, "learning_rate": 4.922896514496341e-06, "loss": 0.8812, "step": 6492 }, { "epoch": 0.6795395081109367, "grad_norm": 1.9041463209466114, "learning_rate": 4.919976467974393e-06, "loss": 0.8996, "step": 6493 }, { "epoch": 0.6796441653584511, "grad_norm": 2.2063895357599925, "learning_rate": 4.917057005157447e-06, "loss": 0.8618, "step": 6494 }, { "epoch": 0.6797488226059655, "grad_norm": 2.068532864168024, "learning_rate": 4.914138126380952e-06, "loss": 0.9522, "step": 6495 }, { "epoch": 0.6798534798534799, "grad_norm": 1.870324584594993, "learning_rate": 4.911219831980299e-06, "loss": 0.9341, "step": 6496 }, { "epoch": 0.6799581371009943, "grad_norm": 2.014330793845203, "learning_rate": 4.908302122290801e-06, "loss": 0.8728, "step": 6497 }, { "epoch": 0.6800627943485086, "grad_norm": 2.038100868952104, "learning_rate": 4.905384997647715e-06, "loss": 0.8701, "step": 6498 }, { "epoch": 0.680167451596023, "grad_norm": 2.1455491337861843, "learning_rate": 4.9024684583862206e-06, "loss": 0.8911, "step": 6499 }, { "epoch": 0.6802721088435374, "grad_norm": 1.903568909831072, "learning_rate": 4.8995525048414325e-06, "loss": 0.7694, "step": 6500 }, { "epoch": 0.6803767660910518, "grad_norm": 1.6985342718111633, "learning_rate": 4.896637137348399e-06, "loss": 0.8418, "step": 6501 }, { "epoch": 0.6804814233385662, "grad_norm": 2.2017283202638773, "learning_rate": 4.893722356242108e-06, "loss": 0.9125, "step": 6502 }, { "epoch": 0.6805860805860806, "grad_norm": 2.2531458138902702, "learning_rate": 4.8908081618574685e-06, "loss": 0.8669, "step": 6503 }, { "epoch": 0.680690737833595, "grad_norm": 1.853444123308966, "learning_rate": 4.88789455452933e-06, "loss": 0.8773, "step": 6504 }, { "epoch": 0.6807953950811093, "grad_norm": 2.3305108211749928, "learning_rate": 4.884981534592466e-06, "loss": 0.9728, "step": 6505 }, { "epoch": 0.6809000523286237, "grad_norm": 2.424972902046859, "learning_rate": 4.882069102381593e-06, "loss": 0.9775, "step": 6506 }, { "epoch": 0.6810047095761381, "grad_norm": 2.5274554940365643, "learning_rate": 4.879157258231358e-06, "loss": 0.8468, "step": 6507 }, { "epoch": 0.6811093668236525, "grad_norm": 2.0276231050948446, "learning_rate": 4.876246002476337e-06, "loss": 0.933, "step": 6508 }, { "epoch": 0.6812140240711669, "grad_norm": 1.9937132552470647, "learning_rate": 4.873335335451036e-06, "loss": 1.0518, "step": 6509 }, { "epoch": 0.6813186813186813, "grad_norm": 1.920197673037133, "learning_rate": 4.870425257489895e-06, "loss": 0.8334, "step": 6510 }, { "epoch": 0.6814233385661957, "grad_norm": 1.7724221748359235, "learning_rate": 4.8675157689272936e-06, "loss": 0.8897, "step": 6511 }, { "epoch": 0.6815279958137102, "grad_norm": 2.30963352647495, "learning_rate": 4.864606870097535e-06, "loss": 0.867, "step": 6512 }, { "epoch": 0.6816326530612244, "grad_norm": 1.9385477533205635, "learning_rate": 4.861698561334858e-06, "loss": 0.842, "step": 6513 }, { "epoch": 0.6817373103087389, "grad_norm": 2.362323418390193, "learning_rate": 4.858790842973428e-06, "loss": 1.0088, "step": 6514 }, { "epoch": 0.6818419675562533, "grad_norm": 1.9262422529795675, "learning_rate": 4.855883715347353e-06, "loss": 0.8423, "step": 6515 }, { "epoch": 0.6819466248037677, "grad_norm": 1.9990004709172928, "learning_rate": 4.852977178790671e-06, "loss": 0.9374, "step": 6516 }, { "epoch": 0.6820512820512821, "grad_norm": 2.3761732334999084, "learning_rate": 4.850071233637345e-06, "loss": 0.9739, "step": 6517 }, { "epoch": 0.6821559392987965, "grad_norm": 1.9130503375804242, "learning_rate": 4.847165880221275e-06, "loss": 0.9095, "step": 6518 }, { "epoch": 0.6822605965463109, "grad_norm": 1.8654272704894526, "learning_rate": 4.844261118876291e-06, "loss": 0.8022, "step": 6519 }, { "epoch": 0.6823652537938252, "grad_norm": 2.2423240527964166, "learning_rate": 4.841356949936152e-06, "loss": 1.0298, "step": 6520 }, { "epoch": 0.6824699110413396, "grad_norm": 2.0407588030898745, "learning_rate": 4.838453373734562e-06, "loss": 0.947, "step": 6521 }, { "epoch": 0.682574568288854, "grad_norm": 2.273219656507149, "learning_rate": 4.835550390605141e-06, "loss": 0.8439, "step": 6522 }, { "epoch": 0.6826792255363684, "grad_norm": 2.3321959685240374, "learning_rate": 4.832648000881448e-06, "loss": 0.9268, "step": 6523 }, { "epoch": 0.6827838827838828, "grad_norm": 1.922263615943614, "learning_rate": 4.829746204896978e-06, "loss": 0.953, "step": 6524 }, { "epoch": 0.6828885400313972, "grad_norm": 1.9490059628377225, "learning_rate": 4.8268450029851456e-06, "loss": 0.8965, "step": 6525 }, { "epoch": 0.6829931972789116, "grad_norm": 2.0239108880099144, "learning_rate": 4.823944395479314e-06, "loss": 0.9316, "step": 6526 }, { "epoch": 0.6830978545264259, "grad_norm": 2.019795192924344, "learning_rate": 4.821044382712764e-06, "loss": 0.9969, "step": 6527 }, { "epoch": 0.6832025117739403, "grad_norm": 2.066141054067395, "learning_rate": 4.818144965018714e-06, "loss": 0.911, "step": 6528 }, { "epoch": 0.6833071690214547, "grad_norm": 2.085608618994838, "learning_rate": 4.8152461427303075e-06, "loss": 0.8309, "step": 6529 }, { "epoch": 0.6834118262689691, "grad_norm": 2.546699608829966, "learning_rate": 4.812347916180634e-06, "loss": 0.9144, "step": 6530 }, { "epoch": 0.6835164835164835, "grad_norm": 2.214180533685667, "learning_rate": 4.809450285702697e-06, "loss": 0.8954, "step": 6531 }, { "epoch": 0.6836211407639979, "grad_norm": 2.308704951945097, "learning_rate": 4.806553251629449e-06, "loss": 0.7669, "step": 6532 }, { "epoch": 0.6837257980115123, "grad_norm": 1.9800008001673324, "learning_rate": 4.803656814293761e-06, "loss": 0.8632, "step": 6533 }, { "epoch": 0.6838304552590266, "grad_norm": 1.6800482696957828, "learning_rate": 4.800760974028435e-06, "loss": 0.816, "step": 6534 }, { "epoch": 0.683935112506541, "grad_norm": 2.1903832073879825, "learning_rate": 4.7978657311662155e-06, "loss": 0.9341, "step": 6535 }, { "epoch": 0.6840397697540554, "grad_norm": 2.1866955766796607, "learning_rate": 4.794971086039771e-06, "loss": 0.9187, "step": 6536 }, { "epoch": 0.6841444270015699, "grad_norm": 2.171235833925488, "learning_rate": 4.7920770389816995e-06, "loss": 0.9389, "step": 6537 }, { "epoch": 0.6842490842490843, "grad_norm": 2.133592554033059, "learning_rate": 4.7891835903245345e-06, "loss": 0.8337, "step": 6538 }, { "epoch": 0.6843537414965987, "grad_norm": 2.164792966032931, "learning_rate": 4.786290740400734e-06, "loss": 0.8745, "step": 6539 }, { "epoch": 0.6844583987441131, "grad_norm": 2.566602076396997, "learning_rate": 4.783398489542696e-06, "loss": 1.0129, "step": 6540 }, { "epoch": 0.6845630559916274, "grad_norm": 2.1672816313961385, "learning_rate": 4.780506838082752e-06, "loss": 0.91, "step": 6541 }, { "epoch": 0.6846677132391418, "grad_norm": 2.5299316051254173, "learning_rate": 4.7776157863531535e-06, "loss": 0.8804, "step": 6542 }, { "epoch": 0.6847723704866562, "grad_norm": 2.050095970695568, "learning_rate": 4.7747253346860865e-06, "loss": 0.9931, "step": 6543 }, { "epoch": 0.6848770277341706, "grad_norm": 1.950038717236096, "learning_rate": 4.771835483413668e-06, "loss": 0.8983, "step": 6544 }, { "epoch": 0.684981684981685, "grad_norm": 1.7294746393168354, "learning_rate": 4.768946232867956e-06, "loss": 0.7711, "step": 6545 }, { "epoch": 0.6850863422291994, "grad_norm": 2.1177999258178346, "learning_rate": 4.766057583380925e-06, "loss": 0.9021, "step": 6546 }, { "epoch": 0.6851909994767138, "grad_norm": 2.1071905114151597, "learning_rate": 4.763169535284488e-06, "loss": 0.923, "step": 6547 }, { "epoch": 0.6852956567242282, "grad_norm": 2.186034990894795, "learning_rate": 4.760282088910485e-06, "loss": 0.9689, "step": 6548 }, { "epoch": 0.6854003139717425, "grad_norm": 2.0749256838860552, "learning_rate": 4.757395244590692e-06, "loss": 0.9672, "step": 6549 }, { "epoch": 0.6855049712192569, "grad_norm": 1.9694250708561203, "learning_rate": 4.754509002656815e-06, "loss": 0.936, "step": 6550 }, { "epoch": 0.6856096284667713, "grad_norm": 1.698078542210978, "learning_rate": 4.751623363440488e-06, "loss": 0.8718, "step": 6551 }, { "epoch": 0.6857142857142857, "grad_norm": 2.0573301693561583, "learning_rate": 4.748738327273277e-06, "loss": 0.93, "step": 6552 }, { "epoch": 0.6858189429618001, "grad_norm": 2.2826375431191774, "learning_rate": 4.745853894486674e-06, "loss": 1.0443, "step": 6553 }, { "epoch": 0.6859236002093145, "grad_norm": 1.8209812613407657, "learning_rate": 4.742970065412112e-06, "loss": 0.8636, "step": 6554 }, { "epoch": 0.6860282574568289, "grad_norm": 2.050736728583209, "learning_rate": 4.740086840380948e-06, "loss": 0.9672, "step": 6555 }, { "epoch": 0.6861329147043432, "grad_norm": 1.9397854484778958, "learning_rate": 4.7372042197244684e-06, "loss": 0.9258, "step": 6556 }, { "epoch": 0.6862375719518576, "grad_norm": 2.086617574002967, "learning_rate": 4.734322203773889e-06, "loss": 0.9393, "step": 6557 }, { "epoch": 0.686342229199372, "grad_norm": 2.012325233074569, "learning_rate": 4.731440792860368e-06, "loss": 0.8641, "step": 6558 }, { "epoch": 0.6864468864468865, "grad_norm": 2.1204083885533227, "learning_rate": 4.728559987314975e-06, "loss": 0.9989, "step": 6559 }, { "epoch": 0.6865515436944009, "grad_norm": 2.148258411085086, "learning_rate": 4.72567978746873e-06, "loss": 0.8573, "step": 6560 }, { "epoch": 0.6866562009419153, "grad_norm": 2.079331381933887, "learning_rate": 4.72280019365257e-06, "loss": 0.8727, "step": 6561 }, { "epoch": 0.6867608581894297, "grad_norm": 1.8462912023639682, "learning_rate": 4.719921206197365e-06, "loss": 0.8153, "step": 6562 }, { "epoch": 0.686865515436944, "grad_norm": 2.1589001836310433, "learning_rate": 4.717042825433914e-06, "loss": 0.9984, "step": 6563 }, { "epoch": 0.6869701726844584, "grad_norm": 2.0590111114309293, "learning_rate": 4.714165051692956e-06, "loss": 0.9904, "step": 6564 }, { "epoch": 0.6870748299319728, "grad_norm": 2.0472445373680217, "learning_rate": 4.711287885305149e-06, "loss": 0.8908, "step": 6565 }, { "epoch": 0.6871794871794872, "grad_norm": 2.1696081831633967, "learning_rate": 4.708411326601081e-06, "loss": 0.9935, "step": 6566 }, { "epoch": 0.6872841444270016, "grad_norm": 2.7641764279513628, "learning_rate": 4.705535375911283e-06, "loss": 0.9877, "step": 6567 }, { "epoch": 0.687388801674516, "grad_norm": 2.131915916399177, "learning_rate": 4.7026600335662e-06, "loss": 0.9278, "step": 6568 }, { "epoch": 0.6874934589220304, "grad_norm": 1.8259729970596064, "learning_rate": 4.69978529989622e-06, "loss": 0.7988, "step": 6569 }, { "epoch": 0.6875981161695447, "grad_norm": 2.1246313055880135, "learning_rate": 4.696911175231655e-06, "loss": 0.9864, "step": 6570 }, { "epoch": 0.6877027734170591, "grad_norm": 2.028298179481332, "learning_rate": 4.694037659902747e-06, "loss": 0.9062, "step": 6571 }, { "epoch": 0.6878074306645735, "grad_norm": 1.6089562184719646, "learning_rate": 4.691164754239663e-06, "loss": 0.7789, "step": 6572 }, { "epoch": 0.6879120879120879, "grad_norm": 1.8248486409728757, "learning_rate": 4.6882924585725155e-06, "loss": 0.8198, "step": 6573 }, { "epoch": 0.6880167451596023, "grad_norm": 2.095291077164802, "learning_rate": 4.685420773231333e-06, "loss": 0.9641, "step": 6574 }, { "epoch": 0.6881214024071167, "grad_norm": 2.2450995973799013, "learning_rate": 4.682549698546073e-06, "loss": 0.9317, "step": 6575 }, { "epoch": 0.6882260596546311, "grad_norm": 2.0526149027810283, "learning_rate": 4.679679234846636e-06, "loss": 0.9398, "step": 6576 }, { "epoch": 0.6883307169021454, "grad_norm": 1.7534041188486345, "learning_rate": 4.67680938246284e-06, "loss": 0.8108, "step": 6577 }, { "epoch": 0.6884353741496598, "grad_norm": 2.0637109461898553, "learning_rate": 4.6739401417244335e-06, "loss": 0.9386, "step": 6578 }, { "epoch": 0.6885400313971742, "grad_norm": 1.954071200949578, "learning_rate": 4.671071512961106e-06, "loss": 0.8725, "step": 6579 }, { "epoch": 0.6886446886446886, "grad_norm": 2.128303103825469, "learning_rate": 4.668203496502464e-06, "loss": 0.9578, "step": 6580 }, { "epoch": 0.688749345892203, "grad_norm": 1.8201644126560876, "learning_rate": 4.665336092678049e-06, "loss": 0.903, "step": 6581 }, { "epoch": 0.6888540031397175, "grad_norm": 2.1543435935120763, "learning_rate": 4.6624693018173285e-06, "loss": 0.9748, "step": 6582 }, { "epoch": 0.6889586603872319, "grad_norm": 2.1608061059358765, "learning_rate": 4.659603124249704e-06, "loss": 0.9338, "step": 6583 }, { "epoch": 0.6890633176347462, "grad_norm": 2.260571417991753, "learning_rate": 4.6567375603045114e-06, "loss": 0.8783, "step": 6584 }, { "epoch": 0.6891679748822606, "grad_norm": 2.0583659884788923, "learning_rate": 4.653872610311005e-06, "loss": 0.922, "step": 6585 }, { "epoch": 0.689272632129775, "grad_norm": 1.7393344357365446, "learning_rate": 4.651008274598373e-06, "loss": 0.8636, "step": 6586 }, { "epoch": 0.6893772893772894, "grad_norm": 2.1302121690545084, "learning_rate": 4.648144553495732e-06, "loss": 0.895, "step": 6587 }, { "epoch": 0.6894819466248038, "grad_norm": 1.870330710588847, "learning_rate": 4.645281447332133e-06, "loss": 0.9744, "step": 6588 }, { "epoch": 0.6895866038723182, "grad_norm": 2.1942089129705358, "learning_rate": 4.642418956436551e-06, "loss": 0.8749, "step": 6589 }, { "epoch": 0.6896912611198326, "grad_norm": 1.7509480014685161, "learning_rate": 4.639557081137891e-06, "loss": 0.8509, "step": 6590 }, { "epoch": 0.689795918367347, "grad_norm": 2.0185777788794015, "learning_rate": 4.636695821764987e-06, "loss": 0.9448, "step": 6591 }, { "epoch": 0.6899005756148613, "grad_norm": 2.0964031784253674, "learning_rate": 4.633835178646605e-06, "loss": 0.9755, "step": 6592 }, { "epoch": 0.6900052328623757, "grad_norm": 2.1491220196897958, "learning_rate": 4.630975152111443e-06, "loss": 0.9519, "step": 6593 }, { "epoch": 0.6901098901098901, "grad_norm": 1.9800787335964507, "learning_rate": 4.628115742488119e-06, "loss": 0.8376, "step": 6594 }, { "epoch": 0.6902145473574045, "grad_norm": 2.2297898584366918, "learning_rate": 4.625256950105188e-06, "loss": 0.9368, "step": 6595 }, { "epoch": 0.6903192046049189, "grad_norm": 1.9376950455128639, "learning_rate": 4.622398775291129e-06, "loss": 0.7967, "step": 6596 }, { "epoch": 0.6904238618524333, "grad_norm": 2.1640506849550776, "learning_rate": 4.6195412183743485e-06, "loss": 0.965, "step": 6597 }, { "epoch": 0.6905285190999477, "grad_norm": 2.1629346113510945, "learning_rate": 4.616684279683193e-06, "loss": 0.7332, "step": 6598 }, { "epoch": 0.690633176347462, "grad_norm": 2.4687954434374264, "learning_rate": 4.6138279595459255e-06, "loss": 0.9789, "step": 6599 }, { "epoch": 0.6907378335949764, "grad_norm": 1.9293003022663573, "learning_rate": 4.610972258290745e-06, "loss": 0.8768, "step": 6600 }, { "epoch": 0.6908424908424908, "grad_norm": 2.000231072203521, "learning_rate": 4.608117176245773e-06, "loss": 0.9758, "step": 6601 }, { "epoch": 0.6909471480900052, "grad_norm": 2.223037956191113, "learning_rate": 4.60526271373907e-06, "loss": 0.9322, "step": 6602 }, { "epoch": 0.6910518053375196, "grad_norm": 1.9628517839572384, "learning_rate": 4.602408871098618e-06, "loss": 0.8642, "step": 6603 }, { "epoch": 0.691156462585034, "grad_norm": 1.985045530723375, "learning_rate": 4.599555648652331e-06, "loss": 0.9152, "step": 6604 }, { "epoch": 0.6912611198325485, "grad_norm": 1.9758634460748625, "learning_rate": 4.5967030467280475e-06, "loss": 0.8055, "step": 6605 }, { "epoch": 0.6913657770800627, "grad_norm": 3.103005744229717, "learning_rate": 4.5938510656535325e-06, "loss": 0.8094, "step": 6606 }, { "epoch": 0.6914704343275772, "grad_norm": 2.1448807382230024, "learning_rate": 4.590999705756495e-06, "loss": 0.9655, "step": 6607 }, { "epoch": 0.6915750915750916, "grad_norm": 1.8041089841225364, "learning_rate": 4.588148967364555e-06, "loss": 0.8029, "step": 6608 }, { "epoch": 0.691679748822606, "grad_norm": 2.0056877275259106, "learning_rate": 4.585298850805266e-06, "loss": 0.8963, "step": 6609 }, { "epoch": 0.6917844060701204, "grad_norm": 1.993991355774141, "learning_rate": 4.58244935640612e-06, "loss": 0.8601, "step": 6610 }, { "epoch": 0.6918890633176348, "grad_norm": 1.8982333609414117, "learning_rate": 4.579600484494522e-06, "loss": 0.9538, "step": 6611 }, { "epoch": 0.6919937205651492, "grad_norm": 2.2109365168453623, "learning_rate": 4.57675223539782e-06, "loss": 0.95, "step": 6612 }, { "epoch": 0.6920983778126635, "grad_norm": 2.077498771030345, "learning_rate": 4.573904609443281e-06, "loss": 0.9701, "step": 6613 }, { "epoch": 0.6922030350601779, "grad_norm": 2.706301406949021, "learning_rate": 4.571057606958102e-06, "loss": 0.9979, "step": 6614 }, { "epoch": 0.6923076923076923, "grad_norm": 1.9123792412988372, "learning_rate": 4.568211228269408e-06, "loss": 0.8161, "step": 6615 }, { "epoch": 0.6924123495552067, "grad_norm": 2.0744022076341393, "learning_rate": 4.565365473704253e-06, "loss": 0.8749, "step": 6616 }, { "epoch": 0.6925170068027211, "grad_norm": 1.9700768269508973, "learning_rate": 4.5625203435896244e-06, "loss": 0.7862, "step": 6617 }, { "epoch": 0.6926216640502355, "grad_norm": 2.1381491046390257, "learning_rate": 4.55967583825243e-06, "loss": 0.8554, "step": 6618 }, { "epoch": 0.6927263212977499, "grad_norm": 1.912057699434843, "learning_rate": 4.556831958019512e-06, "loss": 0.884, "step": 6619 }, { "epoch": 0.6928309785452642, "grad_norm": 1.8093614568345422, "learning_rate": 4.553988703217638e-06, "loss": 0.9215, "step": 6620 }, { "epoch": 0.6929356357927786, "grad_norm": 1.9690519624929577, "learning_rate": 4.551146074173497e-06, "loss": 0.8911, "step": 6621 }, { "epoch": 0.693040293040293, "grad_norm": 2.0811151718511525, "learning_rate": 4.548304071213723e-06, "loss": 1.0378, "step": 6622 }, { "epoch": 0.6931449502878074, "grad_norm": 1.9485593151139378, "learning_rate": 4.545462694664863e-06, "loss": 0.8596, "step": 6623 }, { "epoch": 0.6932496075353218, "grad_norm": 2.1609504606964784, "learning_rate": 4.542621944853396e-06, "loss": 0.8655, "step": 6624 }, { "epoch": 0.6933542647828362, "grad_norm": 1.897884223321221, "learning_rate": 4.539781822105728e-06, "loss": 0.8386, "step": 6625 }, { "epoch": 0.6934589220303506, "grad_norm": 1.72032226811971, "learning_rate": 4.5369423267482004e-06, "loss": 0.8291, "step": 6626 }, { "epoch": 0.6935635792778649, "grad_norm": 2.2020630417755997, "learning_rate": 4.534103459107071e-06, "loss": 0.8895, "step": 6627 }, { "epoch": 0.6936682365253793, "grad_norm": 2.0309417972154664, "learning_rate": 4.5312652195085385e-06, "loss": 0.9303, "step": 6628 }, { "epoch": 0.6937728937728938, "grad_norm": 2.0639435237790953, "learning_rate": 4.528427608278718e-06, "loss": 0.9642, "step": 6629 }, { "epoch": 0.6938775510204082, "grad_norm": 2.1146137247817527, "learning_rate": 4.525590625743654e-06, "loss": 0.8332, "step": 6630 }, { "epoch": 0.6939822082679226, "grad_norm": 2.218674423062428, "learning_rate": 4.522754272229329e-06, "loss": 0.99, "step": 6631 }, { "epoch": 0.694086865515437, "grad_norm": 2.1718805339372045, "learning_rate": 4.519918548061642e-06, "loss": 0.974, "step": 6632 }, { "epoch": 0.6941915227629514, "grad_norm": 1.925204404610261, "learning_rate": 4.517083453566422e-06, "loss": 0.8138, "step": 6633 }, { "epoch": 0.6942961800104658, "grad_norm": 2.5317634962266538, "learning_rate": 4.514248989069428e-06, "loss": 1.0308, "step": 6634 }, { "epoch": 0.6944008372579801, "grad_norm": 1.8774753895683007, "learning_rate": 4.511415154896343e-06, "loss": 0.8771, "step": 6635 }, { "epoch": 0.6945054945054945, "grad_norm": 2.006100990453835, "learning_rate": 4.508581951372783e-06, "loss": 0.9325, "step": 6636 }, { "epoch": 0.6946101517530089, "grad_norm": 1.9301716377433031, "learning_rate": 4.505749378824294e-06, "loss": 0.89, "step": 6637 }, { "epoch": 0.6947148090005233, "grad_norm": 1.851932126585198, "learning_rate": 4.502917437576338e-06, "loss": 0.9756, "step": 6638 }, { "epoch": 0.6948194662480377, "grad_norm": 1.8626235138561205, "learning_rate": 4.500086127954313e-06, "loss": 0.9258, "step": 6639 }, { "epoch": 0.6949241234955521, "grad_norm": 1.8143489151160312, "learning_rate": 4.497255450283537e-06, "loss": 0.8582, "step": 6640 }, { "epoch": 0.6950287807430665, "grad_norm": 1.682072962368349, "learning_rate": 4.4944254048892685e-06, "loss": 0.8266, "step": 6641 }, { "epoch": 0.6951334379905808, "grad_norm": 2.2156198148472828, "learning_rate": 4.491595992096682e-06, "loss": 0.9815, "step": 6642 }, { "epoch": 0.6952380952380952, "grad_norm": 1.9277959774869025, "learning_rate": 4.488767212230884e-06, "loss": 0.9182, "step": 6643 }, { "epoch": 0.6953427524856096, "grad_norm": 2.2062231983360925, "learning_rate": 4.4859390656169e-06, "loss": 0.9683, "step": 6644 }, { "epoch": 0.695447409733124, "grad_norm": 1.9811122926788303, "learning_rate": 4.4831115525796965e-06, "loss": 0.8219, "step": 6645 }, { "epoch": 0.6955520669806384, "grad_norm": 2.0834949431347014, "learning_rate": 4.480284673444163e-06, "loss": 1.0044, "step": 6646 }, { "epoch": 0.6956567242281528, "grad_norm": 1.9488184187877493, "learning_rate": 4.477458428535111e-06, "loss": 0.9673, "step": 6647 }, { "epoch": 0.6957613814756672, "grad_norm": 2.297182952318726, "learning_rate": 4.47463281817728e-06, "loss": 0.9403, "step": 6648 }, { "epoch": 0.6958660387231815, "grad_norm": 1.7266007102300813, "learning_rate": 4.471807842695339e-06, "loss": 0.7784, "step": 6649 }, { "epoch": 0.6959706959706959, "grad_norm": 2.228867198331409, "learning_rate": 4.468983502413882e-06, "loss": 0.9004, "step": 6650 }, { "epoch": 0.6960753532182103, "grad_norm": 2.141327124826885, "learning_rate": 4.466159797657434e-06, "loss": 0.9162, "step": 6651 }, { "epoch": 0.6961800104657248, "grad_norm": 2.0821483089510937, "learning_rate": 4.463336728750446e-06, "loss": 0.8988, "step": 6652 }, { "epoch": 0.6962846677132392, "grad_norm": 2.0393438945172804, "learning_rate": 4.460514296017286e-06, "loss": 0.9121, "step": 6653 }, { "epoch": 0.6963893249607536, "grad_norm": 2.407022437793897, "learning_rate": 4.457692499782269e-06, "loss": 0.9244, "step": 6654 }, { "epoch": 0.696493982208268, "grad_norm": 2.222925129117027, "learning_rate": 4.454871340369614e-06, "loss": 0.9437, "step": 6655 }, { "epoch": 0.6965986394557823, "grad_norm": 2.113703320766968, "learning_rate": 4.452050818103487e-06, "loss": 0.8637, "step": 6656 }, { "epoch": 0.6967032967032967, "grad_norm": 1.79736876167149, "learning_rate": 4.4492309333079685e-06, "loss": 0.7871, "step": 6657 }, { "epoch": 0.6968079539508111, "grad_norm": 2.035698628522382, "learning_rate": 4.446411686307067e-06, "loss": 1.034, "step": 6658 }, { "epoch": 0.6969126111983255, "grad_norm": 1.9079323228404186, "learning_rate": 4.443593077424718e-06, "loss": 0.9502, "step": 6659 }, { "epoch": 0.6970172684458399, "grad_norm": 2.018868747120195, "learning_rate": 4.440775106984793e-06, "loss": 0.8453, "step": 6660 }, { "epoch": 0.6971219256933543, "grad_norm": 1.9769821063615076, "learning_rate": 4.437957775311073e-06, "loss": 1.0526, "step": 6661 }, { "epoch": 0.6972265829408687, "grad_norm": 1.8727187713600468, "learning_rate": 4.435141082727285e-06, "loss": 0.8593, "step": 6662 }, { "epoch": 0.697331240188383, "grad_norm": 2.087270497186403, "learning_rate": 4.432325029557067e-06, "loss": 0.8752, "step": 6663 }, { "epoch": 0.6974358974358974, "grad_norm": 2.1401689080867365, "learning_rate": 4.429509616123987e-06, "loss": 0.9799, "step": 6664 }, { "epoch": 0.6975405546834118, "grad_norm": 1.7455887505421854, "learning_rate": 4.426694842751548e-06, "loss": 0.9056, "step": 6665 }, { "epoch": 0.6976452119309262, "grad_norm": 2.0761648451950054, "learning_rate": 4.4238807097631685e-06, "loss": 0.6883, "step": 6666 }, { "epoch": 0.6977498691784406, "grad_norm": 2.4630286085688673, "learning_rate": 4.421067217482201e-06, "loss": 0.8402, "step": 6667 }, { "epoch": 0.697854526425955, "grad_norm": 2.2700430686821496, "learning_rate": 4.418254366231919e-06, "loss": 0.8151, "step": 6668 }, { "epoch": 0.6979591836734694, "grad_norm": 2.106078902374301, "learning_rate": 4.415442156335522e-06, "loss": 0.974, "step": 6669 }, { "epoch": 0.6980638409209837, "grad_norm": 2.1498422358932547, "learning_rate": 4.412630588116144e-06, "loss": 0.9164, "step": 6670 }, { "epoch": 0.6981684981684981, "grad_norm": 2.1170538037613413, "learning_rate": 4.409819661896839e-06, "loss": 0.9384, "step": 6671 }, { "epoch": 0.6982731554160125, "grad_norm": 2.0390019015189864, "learning_rate": 4.40700937800059e-06, "loss": 0.9529, "step": 6672 }, { "epoch": 0.6983778126635269, "grad_norm": 2.1816342624312215, "learning_rate": 4.4041997367503e-06, "loss": 0.7565, "step": 6673 }, { "epoch": 0.6984824699110413, "grad_norm": 2.0528691851303353, "learning_rate": 4.401390738468801e-06, "loss": 0.9013, "step": 6674 }, { "epoch": 0.6985871271585558, "grad_norm": 2.2025257423746725, "learning_rate": 4.39858238347886e-06, "loss": 0.8969, "step": 6675 }, { "epoch": 0.6986917844060702, "grad_norm": 2.1117193529663916, "learning_rate": 4.395774672103157e-06, "loss": 0.9545, "step": 6676 }, { "epoch": 0.6987964416535846, "grad_norm": 1.8309553111911094, "learning_rate": 4.392967604664306e-06, "loss": 0.8649, "step": 6677 }, { "epoch": 0.6989010989010989, "grad_norm": 1.8989290566978339, "learning_rate": 4.3901611814848396e-06, "loss": 0.9028, "step": 6678 }, { "epoch": 0.6990057561486133, "grad_norm": 2.326356063146579, "learning_rate": 4.387355402887227e-06, "loss": 0.9389, "step": 6679 }, { "epoch": 0.6991104133961277, "grad_norm": 1.9814214365310432, "learning_rate": 4.384550269193859e-06, "loss": 0.947, "step": 6680 }, { "epoch": 0.6992150706436421, "grad_norm": 1.936796902489133, "learning_rate": 4.381745780727049e-06, "loss": 0.7648, "step": 6681 }, { "epoch": 0.6993197278911565, "grad_norm": 2.3068454046130187, "learning_rate": 4.378941937809039e-06, "loss": 0.9382, "step": 6682 }, { "epoch": 0.6994243851386709, "grad_norm": 2.0579861242161295, "learning_rate": 4.3761387407619915e-06, "loss": 0.8562, "step": 6683 }, { "epoch": 0.6995290423861853, "grad_norm": 2.022770051321196, "learning_rate": 4.373336189908007e-06, "loss": 0.8727, "step": 6684 }, { "epoch": 0.6996336996336996, "grad_norm": 1.8295973872025644, "learning_rate": 4.3705342855691e-06, "loss": 0.8223, "step": 6685 }, { "epoch": 0.699738356881214, "grad_norm": 2.415516211385723, "learning_rate": 4.367733028067217e-06, "loss": 0.8492, "step": 6686 }, { "epoch": 0.6998430141287284, "grad_norm": 2.5417469338554994, "learning_rate": 4.364932417724222e-06, "loss": 0.8159, "step": 6687 }, { "epoch": 0.6999476713762428, "grad_norm": 1.7713596531603963, "learning_rate": 4.362132454861916e-06, "loss": 0.8118, "step": 6688 }, { "epoch": 0.7000523286237572, "grad_norm": 1.9958944703716315, "learning_rate": 4.359333139802024e-06, "loss": 0.7404, "step": 6689 }, { "epoch": 0.7001569858712716, "grad_norm": 2.0988662837685887, "learning_rate": 4.356534472866189e-06, "loss": 0.9781, "step": 6690 }, { "epoch": 0.700261643118786, "grad_norm": 2.213783510735197, "learning_rate": 4.353736454375983e-06, "loss": 0.9575, "step": 6691 }, { "epoch": 0.7003663003663003, "grad_norm": 2.17188348926095, "learning_rate": 4.350939084652906e-06, "loss": 0.9206, "step": 6692 }, { "epoch": 0.7004709576138147, "grad_norm": 2.115726513977887, "learning_rate": 4.348142364018375e-06, "loss": 0.9514, "step": 6693 }, { "epoch": 0.7005756148613291, "grad_norm": 1.9850295855218871, "learning_rate": 4.345346292793748e-06, "loss": 0.8236, "step": 6694 }, { "epoch": 0.7006802721088435, "grad_norm": 1.94482693927181, "learning_rate": 4.342550871300295e-06, "loss": 0.8051, "step": 6695 }, { "epoch": 0.7007849293563579, "grad_norm": 2.043191145044091, "learning_rate": 4.339756099859213e-06, "loss": 0.8906, "step": 6696 }, { "epoch": 0.7008895866038723, "grad_norm": 2.128017700100571, "learning_rate": 4.336961978791632e-06, "loss": 0.9098, "step": 6697 }, { "epoch": 0.7009942438513868, "grad_norm": 1.9203516360713857, "learning_rate": 4.3341685084185965e-06, "loss": 0.833, "step": 6698 }, { "epoch": 0.701098901098901, "grad_norm": 1.9846571799966501, "learning_rate": 4.331375689061089e-06, "loss": 0.8093, "step": 6699 }, { "epoch": 0.7012035583464155, "grad_norm": 2.6300245916251925, "learning_rate": 4.328583521040006e-06, "loss": 0.8283, "step": 6700 }, { "epoch": 0.7013082155939299, "grad_norm": 2.227609828599236, "learning_rate": 4.325792004676175e-06, "loss": 0.9819, "step": 6701 }, { "epoch": 0.7014128728414443, "grad_norm": 2.438764625145741, "learning_rate": 4.32300114029034e-06, "loss": 0.8809, "step": 6702 }, { "epoch": 0.7015175300889587, "grad_norm": 2.2905189173309046, "learning_rate": 4.320210928203187e-06, "loss": 0.9054, "step": 6703 }, { "epoch": 0.7016221873364731, "grad_norm": 2.089414522385608, "learning_rate": 4.317421368735313e-06, "loss": 0.9524, "step": 6704 }, { "epoch": 0.7017268445839875, "grad_norm": 1.774274758188664, "learning_rate": 4.314632462207239e-06, "loss": 0.8178, "step": 6705 }, { "epoch": 0.7018315018315018, "grad_norm": 2.147402104777586, "learning_rate": 4.311844208939424e-06, "loss": 0.7808, "step": 6706 }, { "epoch": 0.7019361590790162, "grad_norm": 1.9194606735701536, "learning_rate": 4.309056609252241e-06, "loss": 0.9445, "step": 6707 }, { "epoch": 0.7020408163265306, "grad_norm": 2.464691497275074, "learning_rate": 4.306269663465986e-06, "loss": 0.9314, "step": 6708 }, { "epoch": 0.702145473574045, "grad_norm": 2.258686029857579, "learning_rate": 4.3034833719008935e-06, "loss": 0.9055, "step": 6709 }, { "epoch": 0.7022501308215594, "grad_norm": 1.9254564663234373, "learning_rate": 4.300697734877109e-06, "loss": 0.9699, "step": 6710 }, { "epoch": 0.7023547880690738, "grad_norm": 2.266755065720574, "learning_rate": 4.297912752714709e-06, "loss": 0.8583, "step": 6711 }, { "epoch": 0.7024594453165882, "grad_norm": 1.9181752019643432, "learning_rate": 4.29512842573369e-06, "loss": 0.9748, "step": 6712 }, { "epoch": 0.7025641025641025, "grad_norm": 1.8549903822927598, "learning_rate": 4.2923447542539785e-06, "loss": 0.7526, "step": 6713 }, { "epoch": 0.7026687598116169, "grad_norm": 1.9363352003242609, "learning_rate": 4.289561738595431e-06, "loss": 0.9082, "step": 6714 }, { "epoch": 0.7027734170591313, "grad_norm": 2.2343004147251095, "learning_rate": 4.2867793790778136e-06, "loss": 0.8977, "step": 6715 }, { "epoch": 0.7028780743066457, "grad_norm": 2.0029331901560696, "learning_rate": 4.283997676020829e-06, "loss": 0.8673, "step": 6716 }, { "epoch": 0.7029827315541601, "grad_norm": 1.93825276090313, "learning_rate": 4.2812166297440946e-06, "loss": 0.8511, "step": 6717 }, { "epoch": 0.7030873888016745, "grad_norm": 2.1492481371612215, "learning_rate": 4.278436240567166e-06, "loss": 0.8906, "step": 6718 }, { "epoch": 0.703192046049189, "grad_norm": 1.9356220967112918, "learning_rate": 4.2756565088095125e-06, "loss": 0.8833, "step": 6719 }, { "epoch": 0.7032967032967034, "grad_norm": 1.9085420160045585, "learning_rate": 4.272877434790531e-06, "loss": 0.7844, "step": 6720 }, { "epoch": 0.7034013605442176, "grad_norm": 2.1196564007597307, "learning_rate": 4.270099018829539e-06, "loss": 0.9646, "step": 6721 }, { "epoch": 0.703506017791732, "grad_norm": 2.2766384603785346, "learning_rate": 4.267321261245785e-06, "loss": 0.8636, "step": 6722 }, { "epoch": 0.7036106750392465, "grad_norm": 2.224471343348745, "learning_rate": 4.264544162358443e-06, "loss": 0.9361, "step": 6723 }, { "epoch": 0.7037153322867609, "grad_norm": 1.8561557729274152, "learning_rate": 4.2617677224866035e-06, "loss": 0.7476, "step": 6724 }, { "epoch": 0.7038199895342753, "grad_norm": 2.0486894102396254, "learning_rate": 4.258991941949286e-06, "loss": 0.7964, "step": 6725 }, { "epoch": 0.7039246467817897, "grad_norm": 2.0441810946350936, "learning_rate": 4.256216821065431e-06, "loss": 0.9484, "step": 6726 }, { "epoch": 0.7040293040293041, "grad_norm": 2.1128385722097507, "learning_rate": 4.253442360153905e-06, "loss": 0.9235, "step": 6727 }, { "epoch": 0.7041339612768184, "grad_norm": 2.313931237729976, "learning_rate": 4.250668559533504e-06, "loss": 0.9055, "step": 6728 }, { "epoch": 0.7042386185243328, "grad_norm": 1.8402369003536179, "learning_rate": 4.247895419522941e-06, "loss": 0.7616, "step": 6729 }, { "epoch": 0.7043432757718472, "grad_norm": 1.8143536226822832, "learning_rate": 4.245122940440855e-06, "loss": 0.935, "step": 6730 }, { "epoch": 0.7044479330193616, "grad_norm": 1.9312652171108742, "learning_rate": 4.242351122605807e-06, "loss": 0.8543, "step": 6731 }, { "epoch": 0.704552590266876, "grad_norm": 1.9610927104433202, "learning_rate": 4.239579966336286e-06, "loss": 0.824, "step": 6732 }, { "epoch": 0.7046572475143904, "grad_norm": 2.003342197333868, "learning_rate": 4.236809471950708e-06, "loss": 0.859, "step": 6733 }, { "epoch": 0.7047619047619048, "grad_norm": 1.9433809349811346, "learning_rate": 4.234039639767406e-06, "loss": 0.902, "step": 6734 }, { "epoch": 0.7048665620094191, "grad_norm": 2.178288945579384, "learning_rate": 4.231270470104636e-06, "loss": 0.8413, "step": 6735 }, { "epoch": 0.7049712192569335, "grad_norm": 2.3256221301858866, "learning_rate": 4.228501963280581e-06, "loss": 0.9749, "step": 6736 }, { "epoch": 0.7050758765044479, "grad_norm": 2.1566296507304763, "learning_rate": 4.225734119613354e-06, "loss": 0.9976, "step": 6737 }, { "epoch": 0.7051805337519623, "grad_norm": 2.2955385696891453, "learning_rate": 4.222966939420983e-06, "loss": 0.9847, "step": 6738 }, { "epoch": 0.7052851909994767, "grad_norm": 2.219932302560416, "learning_rate": 4.220200423021421e-06, "loss": 0.9591, "step": 6739 }, { "epoch": 0.7053898482469911, "grad_norm": 2.171556004505835, "learning_rate": 4.217434570732544e-06, "loss": 0.9488, "step": 6740 }, { "epoch": 0.7054945054945055, "grad_norm": 2.003742070991652, "learning_rate": 4.214669382872157e-06, "loss": 1.0053, "step": 6741 }, { "epoch": 0.7055991627420198, "grad_norm": 2.2735271709277938, "learning_rate": 4.2119048597579905e-06, "loss": 0.7012, "step": 6742 }, { "epoch": 0.7057038199895342, "grad_norm": 1.9509244256018325, "learning_rate": 4.2091410017076884e-06, "loss": 0.8871, "step": 6743 }, { "epoch": 0.7058084772370486, "grad_norm": 1.803214751829208, "learning_rate": 4.206377809038825e-06, "loss": 0.8344, "step": 6744 }, { "epoch": 0.705913134484563, "grad_norm": 2.1681624054889075, "learning_rate": 4.203615282068896e-06, "loss": 0.9701, "step": 6745 }, { "epoch": 0.7060177917320775, "grad_norm": 2.003295420905956, "learning_rate": 4.200853421115317e-06, "loss": 0.9148, "step": 6746 }, { "epoch": 0.7061224489795919, "grad_norm": 1.8542405762144707, "learning_rate": 4.1980922264954415e-06, "loss": 0.7981, "step": 6747 }, { "epoch": 0.7062271062271063, "grad_norm": 2.2185412641129605, "learning_rate": 4.195331698526526e-06, "loss": 0.9382, "step": 6748 }, { "epoch": 0.7063317634746206, "grad_norm": 1.9421670362200358, "learning_rate": 4.19257183752577e-06, "loss": 0.9211, "step": 6749 }, { "epoch": 0.706436420722135, "grad_norm": 2.350163108156549, "learning_rate": 4.189812643810282e-06, "loss": 0.9539, "step": 6750 }, { "epoch": 0.7065410779696494, "grad_norm": 2.1933671368986256, "learning_rate": 4.187054117697097e-06, "loss": 0.9205, "step": 6751 }, { "epoch": 0.7066457352171638, "grad_norm": 1.8016529905030467, "learning_rate": 4.184296259503181e-06, "loss": 0.863, "step": 6752 }, { "epoch": 0.7067503924646782, "grad_norm": 1.638442385400924, "learning_rate": 4.181539069545414e-06, "loss": 0.8414, "step": 6753 }, { "epoch": 0.7068550497121926, "grad_norm": 2.2815059203943218, "learning_rate": 4.1787825481406044e-06, "loss": 0.9858, "step": 6754 }, { "epoch": 0.706959706959707, "grad_norm": 2.0723833971887684, "learning_rate": 4.176026695605476e-06, "loss": 0.8515, "step": 6755 }, { "epoch": 0.7070643642072213, "grad_norm": 2.0981663306838585, "learning_rate": 4.17327151225669e-06, "loss": 0.9072, "step": 6756 }, { "epoch": 0.7071690214547357, "grad_norm": 1.885601962808024, "learning_rate": 4.170516998410817e-06, "loss": 0.9031, "step": 6757 }, { "epoch": 0.7072736787022501, "grad_norm": 2.253291731810213, "learning_rate": 4.1677631543843625e-06, "loss": 0.9391, "step": 6758 }, { "epoch": 0.7073783359497645, "grad_norm": 1.956313519509557, "learning_rate": 4.165009980493742e-06, "loss": 0.8985, "step": 6759 }, { "epoch": 0.7074829931972789, "grad_norm": 2.2664708007932743, "learning_rate": 4.1622574770553025e-06, "loss": 0.8731, "step": 6760 }, { "epoch": 0.7075876504447933, "grad_norm": 2.195150032088823, "learning_rate": 4.159505644385316e-06, "loss": 0.8524, "step": 6761 }, { "epoch": 0.7076923076923077, "grad_norm": 1.9346641546170762, "learning_rate": 4.1567544827999705e-06, "loss": 0.8993, "step": 6762 }, { "epoch": 0.7077969649398221, "grad_norm": 2.2504254337448453, "learning_rate": 4.154003992615381e-06, "loss": 0.9644, "step": 6763 }, { "epoch": 0.7079016221873364, "grad_norm": 2.188401815842773, "learning_rate": 4.151254174147584e-06, "loss": 0.8787, "step": 6764 }, { "epoch": 0.7080062794348508, "grad_norm": 2.037348430435397, "learning_rate": 4.148505027712535e-06, "loss": 0.8898, "step": 6765 }, { "epoch": 0.7081109366823652, "grad_norm": 2.121364396425157, "learning_rate": 4.1457565536261204e-06, "loss": 0.9164, "step": 6766 }, { "epoch": 0.7082155939298796, "grad_norm": 2.1227061314556104, "learning_rate": 4.14300875220415e-06, "loss": 0.8375, "step": 6767 }, { "epoch": 0.7083202511773941, "grad_norm": 2.2345470752377112, "learning_rate": 4.140261623762346e-06, "loss": 1.0657, "step": 6768 }, { "epoch": 0.7084249084249085, "grad_norm": 2.115378021426516, "learning_rate": 4.13751516861636e-06, "loss": 0.8428, "step": 6769 }, { "epoch": 0.7085295656724229, "grad_norm": 1.8781409525877706, "learning_rate": 4.134769387081764e-06, "loss": 0.8158, "step": 6770 }, { "epoch": 0.7086342229199372, "grad_norm": 2.870662361008902, "learning_rate": 4.132024279474058e-06, "loss": 0.8034, "step": 6771 }, { "epoch": 0.7087388801674516, "grad_norm": 1.9910392284133023, "learning_rate": 4.129279846108658e-06, "loss": 0.8839, "step": 6772 }, { "epoch": 0.708843537414966, "grad_norm": 2.7345053141599274, "learning_rate": 4.1265360873009054e-06, "loss": 0.8741, "step": 6773 }, { "epoch": 0.7089481946624804, "grad_norm": 1.817693344021189, "learning_rate": 4.123793003366059e-06, "loss": 0.8432, "step": 6774 }, { "epoch": 0.7090528519099948, "grad_norm": 1.9857696095073964, "learning_rate": 4.121050594619308e-06, "loss": 0.9486, "step": 6775 }, { "epoch": 0.7091575091575092, "grad_norm": 2.138607157252604, "learning_rate": 4.118308861375766e-06, "loss": 0.9636, "step": 6776 }, { "epoch": 0.7092621664050236, "grad_norm": 1.801323948809371, "learning_rate": 4.11556780395046e-06, "loss": 0.8476, "step": 6777 }, { "epoch": 0.7093668236525379, "grad_norm": 2.120684116550481, "learning_rate": 4.112827422658341e-06, "loss": 0.9265, "step": 6778 }, { "epoch": 0.7094714809000523, "grad_norm": 1.9405112301994334, "learning_rate": 4.1100877178142825e-06, "loss": 0.8766, "step": 6779 }, { "epoch": 0.7095761381475667, "grad_norm": 2.33308294996142, "learning_rate": 4.10734868973309e-06, "loss": 0.8442, "step": 6780 }, { "epoch": 0.7096807953950811, "grad_norm": 2.0092401960246016, "learning_rate": 4.104610338729478e-06, "loss": 0.9067, "step": 6781 }, { "epoch": 0.7097854526425955, "grad_norm": 2.4295543764879763, "learning_rate": 4.101872665118088e-06, "loss": 0.9433, "step": 6782 }, { "epoch": 0.7098901098901099, "grad_norm": 2.1008017438183924, "learning_rate": 4.099135669213483e-06, "loss": 0.8513, "step": 6783 }, { "epoch": 0.7099947671376243, "grad_norm": 2.445983785008863, "learning_rate": 4.096399351330156e-06, "loss": 0.9137, "step": 6784 }, { "epoch": 0.7100994243851386, "grad_norm": 2.1839137429933606, "learning_rate": 4.093663711782507e-06, "loss": 0.8465, "step": 6785 }, { "epoch": 0.710204081632653, "grad_norm": 1.9874222387615066, "learning_rate": 4.0909287508848745e-06, "loss": 0.8232, "step": 6786 }, { "epoch": 0.7103087388801674, "grad_norm": 1.8844372558093263, "learning_rate": 4.0881944689515085e-06, "loss": 0.8779, "step": 6787 }, { "epoch": 0.7104133961276818, "grad_norm": 1.9799678528512243, "learning_rate": 4.085460866296581e-06, "loss": 0.9447, "step": 6788 }, { "epoch": 0.7105180533751962, "grad_norm": 2.124231521122012, "learning_rate": 4.082727943234187e-06, "loss": 0.8971, "step": 6789 }, { "epoch": 0.7106227106227107, "grad_norm": 1.8530431577227409, "learning_rate": 4.079995700078352e-06, "loss": 0.8753, "step": 6790 }, { "epoch": 0.7107273678702251, "grad_norm": 2.0466031803847162, "learning_rate": 4.077264137143012e-06, "loss": 0.8169, "step": 6791 }, { "epoch": 0.7108320251177394, "grad_norm": 2.183687382955889, "learning_rate": 4.074533254742026e-06, "loss": 0.937, "step": 6792 }, { "epoch": 0.7109366823652538, "grad_norm": 1.8299109318292106, "learning_rate": 4.071803053189184e-06, "loss": 0.8489, "step": 6793 }, { "epoch": 0.7110413396127682, "grad_norm": 2.48666879446886, "learning_rate": 4.069073532798185e-06, "loss": 0.9826, "step": 6794 }, { "epoch": 0.7111459968602826, "grad_norm": 1.9966103200441245, "learning_rate": 4.066344693882665e-06, "loss": 0.9272, "step": 6795 }, { "epoch": 0.711250654107797, "grad_norm": 2.0372668354924715, "learning_rate": 4.063616536756168e-06, "loss": 0.8891, "step": 6796 }, { "epoch": 0.7113553113553114, "grad_norm": 2.0421407323614487, "learning_rate": 4.060889061732165e-06, "loss": 0.8493, "step": 6797 }, { "epoch": 0.7114599686028258, "grad_norm": 2.051281230221165, "learning_rate": 4.058162269124045e-06, "loss": 0.8805, "step": 6798 }, { "epoch": 0.7115646258503401, "grad_norm": 2.0417527238947617, "learning_rate": 4.055436159245129e-06, "loss": 0.8527, "step": 6799 }, { "epoch": 0.7116692830978545, "grad_norm": 2.01692569770081, "learning_rate": 4.0527107324086465e-06, "loss": 0.8964, "step": 6800 }, { "epoch": 0.7117739403453689, "grad_norm": 2.498291537658827, "learning_rate": 4.04998598892776e-06, "loss": 0.8871, "step": 6801 }, { "epoch": 0.7118785975928833, "grad_norm": 1.7272578518417077, "learning_rate": 4.047261929115546e-06, "loss": 0.7592, "step": 6802 }, { "epoch": 0.7119832548403977, "grad_norm": 2.067238416917133, "learning_rate": 4.044538553285003e-06, "loss": 0.8075, "step": 6803 }, { "epoch": 0.7120879120879121, "grad_norm": 2.0306398676192043, "learning_rate": 4.04181586174905e-06, "loss": 0.9844, "step": 6804 }, { "epoch": 0.7121925693354265, "grad_norm": 1.7327165325882197, "learning_rate": 4.039093854820537e-06, "loss": 0.8486, "step": 6805 }, { "epoch": 0.7122972265829409, "grad_norm": 1.954070649531059, "learning_rate": 4.036372532812224e-06, "loss": 0.9154, "step": 6806 }, { "epoch": 0.7124018838304552, "grad_norm": 2.113577347797847, "learning_rate": 4.033651896036797e-06, "loss": 0.8904, "step": 6807 }, { "epoch": 0.7125065410779696, "grad_norm": 2.103592454954578, "learning_rate": 4.030931944806857e-06, "loss": 0.8943, "step": 6808 }, { "epoch": 0.712611198325484, "grad_norm": 2.1336538127277405, "learning_rate": 4.0282126794349385e-06, "loss": 0.8553, "step": 6809 }, { "epoch": 0.7127158555729984, "grad_norm": 1.919816279896638, "learning_rate": 4.0254941002334914e-06, "loss": 0.8675, "step": 6810 }, { "epoch": 0.7128205128205128, "grad_norm": 1.8626947261175397, "learning_rate": 4.022776207514885e-06, "loss": 0.8163, "step": 6811 }, { "epoch": 0.7129251700680272, "grad_norm": 2.2003625119304875, "learning_rate": 4.020059001591409e-06, "loss": 0.9314, "step": 6812 }, { "epoch": 0.7130298273155417, "grad_norm": 2.1580488886290325, "learning_rate": 4.0173424827752725e-06, "loss": 0.9063, "step": 6813 }, { "epoch": 0.713134484563056, "grad_norm": 1.813499024046145, "learning_rate": 4.014626651378617e-06, "loss": 0.8229, "step": 6814 }, { "epoch": 0.7132391418105704, "grad_norm": 2.13054755191047, "learning_rate": 4.011911507713493e-06, "loss": 0.9653, "step": 6815 }, { "epoch": 0.7133437990580848, "grad_norm": 2.3481830820459466, "learning_rate": 4.0091970520918755e-06, "loss": 0.9658, "step": 6816 }, { "epoch": 0.7134484563055992, "grad_norm": 3.110141424492723, "learning_rate": 4.006483284825658e-06, "loss": 0.9323, "step": 6817 }, { "epoch": 0.7135531135531136, "grad_norm": 1.9336469464972577, "learning_rate": 4.00377020622666e-06, "loss": 0.9056, "step": 6818 }, { "epoch": 0.713657770800628, "grad_norm": 1.7890408343625537, "learning_rate": 4.001057816606626e-06, "loss": 0.8723, "step": 6819 }, { "epoch": 0.7137624280481424, "grad_norm": 2.0458672431955427, "learning_rate": 3.99834611627721e-06, "loss": 0.9242, "step": 6820 }, { "epoch": 0.7138670852956567, "grad_norm": 2.169252164372577, "learning_rate": 3.995635105549991e-06, "loss": 0.8732, "step": 6821 }, { "epoch": 0.7139717425431711, "grad_norm": 2.1207025732586677, "learning_rate": 3.99292478473647e-06, "loss": 0.9312, "step": 6822 }, { "epoch": 0.7140763997906855, "grad_norm": 1.8860336974481258, "learning_rate": 3.990215154148067e-06, "loss": 0.8711, "step": 6823 }, { "epoch": 0.7141810570381999, "grad_norm": 1.7535400399995449, "learning_rate": 3.987506214096128e-06, "loss": 0.8432, "step": 6824 }, { "epoch": 0.7142857142857143, "grad_norm": 1.9294483445338717, "learning_rate": 3.984797964891914e-06, "loss": 0.8547, "step": 6825 }, { "epoch": 0.7143903715332287, "grad_norm": 2.1241763749956415, "learning_rate": 3.982090406846605e-06, "loss": 0.9769, "step": 6826 }, { "epoch": 0.7144950287807431, "grad_norm": 1.617410025245596, "learning_rate": 3.979383540271311e-06, "loss": 0.8158, "step": 6827 }, { "epoch": 0.7145996860282574, "grad_norm": 1.9851405262752755, "learning_rate": 3.976677365477049e-06, "loss": 0.8617, "step": 6828 }, { "epoch": 0.7147043432757718, "grad_norm": 2.3520800347490125, "learning_rate": 3.973971882774773e-06, "loss": 0.8526, "step": 6829 }, { "epoch": 0.7148090005232862, "grad_norm": 1.9514505903548016, "learning_rate": 3.971267092475343e-06, "loss": 0.9752, "step": 6830 }, { "epoch": 0.7149136577708006, "grad_norm": 2.5826157191432997, "learning_rate": 3.968562994889545e-06, "loss": 0.9407, "step": 6831 }, { "epoch": 0.715018315018315, "grad_norm": 2.300126908869992, "learning_rate": 3.9658595903280836e-06, "loss": 0.8193, "step": 6832 }, { "epoch": 0.7151229722658294, "grad_norm": 1.8165911897167484, "learning_rate": 3.963156879101591e-06, "loss": 0.8707, "step": 6833 }, { "epoch": 0.7152276295133438, "grad_norm": 2.059647993743263, "learning_rate": 3.96045486152061e-06, "loss": 0.9069, "step": 6834 }, { "epoch": 0.7153322867608581, "grad_norm": 3.670243670467124, "learning_rate": 3.957753537895607e-06, "loss": 0.9077, "step": 6835 }, { "epoch": 0.7154369440083725, "grad_norm": 2.4214679282114204, "learning_rate": 3.9550529085369735e-06, "loss": 0.9734, "step": 6836 }, { "epoch": 0.715541601255887, "grad_norm": 1.908703225014119, "learning_rate": 3.952352973755012e-06, "loss": 0.9761, "step": 6837 }, { "epoch": 0.7156462585034014, "grad_norm": 1.9494300614897873, "learning_rate": 3.949653733859958e-06, "loss": 0.9582, "step": 6838 }, { "epoch": 0.7157509157509158, "grad_norm": 2.3349105601456652, "learning_rate": 3.946955189161954e-06, "loss": 0.9848, "step": 6839 }, { "epoch": 0.7158555729984302, "grad_norm": 1.98806617370881, "learning_rate": 3.944257339971071e-06, "loss": 1.0294, "step": 6840 }, { "epoch": 0.7159602302459446, "grad_norm": 2.032978994521527, "learning_rate": 3.941560186597295e-06, "loss": 0.7697, "step": 6841 }, { "epoch": 0.7160648874934589, "grad_norm": 2.2859142638467196, "learning_rate": 3.938863729350532e-06, "loss": 0.9599, "step": 6842 }, { "epoch": 0.7161695447409733, "grad_norm": 1.95473032037277, "learning_rate": 3.936167968540617e-06, "loss": 0.8378, "step": 6843 }, { "epoch": 0.7162742019884877, "grad_norm": 2.2383483044471206, "learning_rate": 3.933472904477291e-06, "loss": 0.8798, "step": 6844 }, { "epoch": 0.7163788592360021, "grad_norm": 1.6286590487546038, "learning_rate": 3.930778537470231e-06, "loss": 0.9243, "step": 6845 }, { "epoch": 0.7164835164835165, "grad_norm": 1.8458670553744083, "learning_rate": 3.928084867829021e-06, "loss": 0.7944, "step": 6846 }, { "epoch": 0.7165881737310309, "grad_norm": 2.270745347465646, "learning_rate": 3.9253918958631635e-06, "loss": 1.0004, "step": 6847 }, { "epoch": 0.7166928309785453, "grad_norm": 1.8785468739801112, "learning_rate": 3.922699621882097e-06, "loss": 0.8825, "step": 6848 }, { "epoch": 0.7167974882260597, "grad_norm": 2.05640939822826, "learning_rate": 3.920008046195163e-06, "loss": 0.8316, "step": 6849 }, { "epoch": 0.716902145473574, "grad_norm": 2.2722674004261476, "learning_rate": 3.917317169111629e-06, "loss": 1.0579, "step": 6850 }, { "epoch": 0.7170068027210884, "grad_norm": 2.4202451915007512, "learning_rate": 3.9146269909406805e-06, "loss": 0.9921, "step": 6851 }, { "epoch": 0.7171114599686028, "grad_norm": 2.2412583484266295, "learning_rate": 3.911937511991428e-06, "loss": 0.8362, "step": 6852 }, { "epoch": 0.7172161172161172, "grad_norm": 2.0935113321976733, "learning_rate": 3.9092487325729e-06, "loss": 0.7682, "step": 6853 }, { "epoch": 0.7173207744636316, "grad_norm": 2.0222550806048267, "learning_rate": 3.906560652994039e-06, "loss": 0.8426, "step": 6854 }, { "epoch": 0.717425431711146, "grad_norm": 1.9696243728220855, "learning_rate": 3.903873273563713e-06, "loss": 0.8273, "step": 6855 }, { "epoch": 0.7175300889586604, "grad_norm": 1.8907957710602337, "learning_rate": 3.901186594590702e-06, "loss": 0.8415, "step": 6856 }, { "epoch": 0.7176347462061747, "grad_norm": 1.8888147086385425, "learning_rate": 3.898500616383718e-06, "loss": 0.9082, "step": 6857 }, { "epoch": 0.7177394034536891, "grad_norm": 1.9341244750710724, "learning_rate": 3.895815339251382e-06, "loss": 0.8821, "step": 6858 }, { "epoch": 0.7178440607012035, "grad_norm": 2.2280568062526522, "learning_rate": 3.893130763502239e-06, "loss": 0.8867, "step": 6859 }, { "epoch": 0.717948717948718, "grad_norm": 1.9985998533707623, "learning_rate": 3.890446889444751e-06, "loss": 0.8671, "step": 6860 }, { "epoch": 0.7180533751962324, "grad_norm": 1.9789150948560013, "learning_rate": 3.887763717387296e-06, "loss": 0.9274, "step": 6861 }, { "epoch": 0.7181580324437468, "grad_norm": 2.008681323462506, "learning_rate": 3.8850812476381814e-06, "loss": 0.918, "step": 6862 }, { "epoch": 0.7182626896912612, "grad_norm": 1.6970462178438779, "learning_rate": 3.882399480505632e-06, "loss": 0.7673, "step": 6863 }, { "epoch": 0.7183673469387755, "grad_norm": 1.9737100576957622, "learning_rate": 3.879718416297783e-06, "loss": 0.8705, "step": 6864 }, { "epoch": 0.7184720041862899, "grad_norm": 2.116672494256888, "learning_rate": 3.877038055322696e-06, "loss": 0.9756, "step": 6865 }, { "epoch": 0.7185766614338043, "grad_norm": 1.6446313933488226, "learning_rate": 3.874358397888345e-06, "loss": 0.7854, "step": 6866 }, { "epoch": 0.7186813186813187, "grad_norm": 2.2698290566150323, "learning_rate": 3.871679444302635e-06, "loss": 0.8557, "step": 6867 }, { "epoch": 0.7187859759288331, "grad_norm": 2.1256730677441578, "learning_rate": 3.86900119487338e-06, "loss": 0.8339, "step": 6868 }, { "epoch": 0.7188906331763475, "grad_norm": 1.9881658784209284, "learning_rate": 3.866323649908318e-06, "loss": 0.9258, "step": 6869 }, { "epoch": 0.7189952904238619, "grad_norm": 2.124198993385882, "learning_rate": 3.8636468097150995e-06, "loss": 0.883, "step": 6870 }, { "epoch": 0.7190999476713762, "grad_norm": 1.9123797963557438, "learning_rate": 3.860970674601301e-06, "loss": 0.8941, "step": 6871 }, { "epoch": 0.7192046049188906, "grad_norm": 2.0104065734716636, "learning_rate": 3.85829524487442e-06, "loss": 0.8806, "step": 6872 }, { "epoch": 0.719309262166405, "grad_norm": 2.1166454011026707, "learning_rate": 3.855620520841868e-06, "loss": 0.923, "step": 6873 }, { "epoch": 0.7194139194139194, "grad_norm": 1.9204632832965207, "learning_rate": 3.8529465028109725e-06, "loss": 0.8695, "step": 6874 }, { "epoch": 0.7195185766614338, "grad_norm": 1.937070827448725, "learning_rate": 3.8502731910889826e-06, "loss": 0.835, "step": 6875 }, { "epoch": 0.7196232339089482, "grad_norm": 2.423772623864867, "learning_rate": 3.8476005859830725e-06, "loss": 1.0035, "step": 6876 }, { "epoch": 0.7197278911564626, "grad_norm": 2.2319802937639213, "learning_rate": 3.844928687800328e-06, "loss": 0.9172, "step": 6877 }, { "epoch": 0.7198325484039769, "grad_norm": 1.8816969569554647, "learning_rate": 3.842257496847751e-06, "loss": 0.8183, "step": 6878 }, { "epoch": 0.7199372056514913, "grad_norm": 2.328593060017702, "learning_rate": 3.839587013432273e-06, "loss": 0.9581, "step": 6879 }, { "epoch": 0.7200418628990057, "grad_norm": 2.357599652370192, "learning_rate": 3.836917237860738e-06, "loss": 0.8882, "step": 6880 }, { "epoch": 0.7201465201465201, "grad_norm": 2.112443593657366, "learning_rate": 3.834248170439901e-06, "loss": 0.8824, "step": 6881 }, { "epoch": 0.7202511773940345, "grad_norm": 2.5342213753725265, "learning_rate": 3.831579811476452e-06, "loss": 0.8558, "step": 6882 }, { "epoch": 0.720355834641549, "grad_norm": 2.187453453613242, "learning_rate": 3.8289121612769885e-06, "loss": 1.0146, "step": 6883 }, { "epoch": 0.7204604918890634, "grad_norm": 2.0491956184185764, "learning_rate": 3.8262452201480275e-06, "loss": 0.9876, "step": 6884 }, { "epoch": 0.7205651491365777, "grad_norm": 1.9619918810973644, "learning_rate": 3.823578988396003e-06, "loss": 0.9359, "step": 6885 }, { "epoch": 0.7206698063840921, "grad_norm": 2.435486034108263, "learning_rate": 3.820913466327276e-06, "loss": 0.9142, "step": 6886 }, { "epoch": 0.7207744636316065, "grad_norm": 2.128985121841604, "learning_rate": 3.818248654248115e-06, "loss": 0.8178, "step": 6887 }, { "epoch": 0.7208791208791209, "grad_norm": 2.1846007264436063, "learning_rate": 3.81558455246472e-06, "loss": 0.8921, "step": 6888 }, { "epoch": 0.7209837781266353, "grad_norm": 2.134017674581687, "learning_rate": 3.8129211612831953e-06, "loss": 0.9682, "step": 6889 }, { "epoch": 0.7210884353741497, "grad_norm": 2.4046779312825386, "learning_rate": 3.8102584810095687e-06, "loss": 0.801, "step": 6890 }, { "epoch": 0.7211930926216641, "grad_norm": 2.2865554223892066, "learning_rate": 3.8075965119497936e-06, "loss": 0.9072, "step": 6891 }, { "epoch": 0.7212977498691785, "grad_norm": 1.8509159996167859, "learning_rate": 3.804935254409734e-06, "loss": 0.88, "step": 6892 }, { "epoch": 0.7214024071166928, "grad_norm": 2.2394314733138647, "learning_rate": 3.8022747086951715e-06, "loss": 0.904, "step": 6893 }, { "epoch": 0.7215070643642072, "grad_norm": 1.9861651335567048, "learning_rate": 3.7996148751118057e-06, "loss": 0.8216, "step": 6894 }, { "epoch": 0.7216117216117216, "grad_norm": 1.9218843095758162, "learning_rate": 3.7969557539652636e-06, "loss": 0.918, "step": 6895 }, { "epoch": 0.721716378859236, "grad_norm": 1.9157472248957887, "learning_rate": 3.7942973455610766e-06, "loss": 0.8062, "step": 6896 }, { "epoch": 0.7218210361067504, "grad_norm": 1.9189826257514782, "learning_rate": 3.791639650204709e-06, "loss": 0.8006, "step": 6897 }, { "epoch": 0.7219256933542648, "grad_norm": 1.8660285862083692, "learning_rate": 3.7889826682015306e-06, "loss": 0.7262, "step": 6898 }, { "epoch": 0.7220303506017792, "grad_norm": 2.0909598391703046, "learning_rate": 3.7863263998568346e-06, "loss": 0.9164, "step": 6899 }, { "epoch": 0.7221350078492935, "grad_norm": 2.5637117016782507, "learning_rate": 3.7836708454758287e-06, "loss": 0.8562, "step": 6900 }, { "epoch": 0.7222396650968079, "grad_norm": 2.09685317781106, "learning_rate": 3.781016005363648e-06, "loss": 0.9648, "step": 6901 }, { "epoch": 0.7223443223443223, "grad_norm": 1.832463976356295, "learning_rate": 3.7783618798253354e-06, "loss": 0.8759, "step": 6902 }, { "epoch": 0.7224489795918367, "grad_norm": 1.9229291271734896, "learning_rate": 3.7757084691658552e-06, "loss": 0.8913, "step": 6903 }, { "epoch": 0.7225536368393511, "grad_norm": 2.519016468836667, "learning_rate": 3.7730557736900865e-06, "loss": 0.9351, "step": 6904 }, { "epoch": 0.7226582940868655, "grad_norm": 1.9402129919604572, "learning_rate": 3.7704037937028324e-06, "loss": 0.9447, "step": 6905 }, { "epoch": 0.72276295133438, "grad_norm": 1.9677225873579622, "learning_rate": 3.7677525295088146e-06, "loss": 0.8379, "step": 6906 }, { "epoch": 0.7228676085818942, "grad_norm": 2.3777845625897482, "learning_rate": 3.7651019814126656e-06, "loss": 0.8652, "step": 6907 }, { "epoch": 0.7229722658294087, "grad_norm": 1.9315748698397706, "learning_rate": 3.762452149718938e-06, "loss": 0.9468, "step": 6908 }, { "epoch": 0.7230769230769231, "grad_norm": 2.205149403013181, "learning_rate": 3.7598030347321e-06, "loss": 0.8733, "step": 6909 }, { "epoch": 0.7231815803244375, "grad_norm": 1.8693736948006856, "learning_rate": 3.757154636756547e-06, "loss": 0.8561, "step": 6910 }, { "epoch": 0.7232862375719519, "grad_norm": 1.873969507521071, "learning_rate": 3.7545069560965817e-06, "loss": 0.8318, "step": 6911 }, { "epoch": 0.7233908948194663, "grad_norm": 2.42283057627858, "learning_rate": 3.751859993056428e-06, "loss": 0.8737, "step": 6912 }, { "epoch": 0.7234955520669807, "grad_norm": 1.865784635381646, "learning_rate": 3.749213747940225e-06, "loss": 1.0405, "step": 6913 }, { "epoch": 0.723600209314495, "grad_norm": 2.6319699814934623, "learning_rate": 3.7465682210520325e-06, "loss": 0.934, "step": 6914 }, { "epoch": 0.7237048665620094, "grad_norm": 1.9195838969682297, "learning_rate": 3.7439234126958326e-06, "loss": 0.8494, "step": 6915 }, { "epoch": 0.7238095238095238, "grad_norm": 2.1904292611117726, "learning_rate": 3.741279323175515e-06, "loss": 0.9698, "step": 6916 }, { "epoch": 0.7239141810570382, "grad_norm": 2.2002972862059833, "learning_rate": 3.7386359527948914e-06, "loss": 0.8893, "step": 6917 }, { "epoch": 0.7240188383045526, "grad_norm": 2.3913339759973917, "learning_rate": 3.7359933018576887e-06, "loss": 0.8367, "step": 6918 }, { "epoch": 0.724123495552067, "grad_norm": 2.1969314364288484, "learning_rate": 3.733351370667552e-06, "loss": 0.9061, "step": 6919 }, { "epoch": 0.7242281527995814, "grad_norm": 2.024923979347576, "learning_rate": 3.7307101595280495e-06, "loss": 0.9443, "step": 6920 }, { "epoch": 0.7243328100470957, "grad_norm": 2.113986243612249, "learning_rate": 3.7280696687426578e-06, "loss": 0.9265, "step": 6921 }, { "epoch": 0.7244374672946101, "grad_norm": 1.950614982154138, "learning_rate": 3.725429898614773e-06, "loss": 0.9245, "step": 6922 }, { "epoch": 0.7245421245421245, "grad_norm": 2.1324732521905205, "learning_rate": 3.7227908494477174e-06, "loss": 0.9454, "step": 6923 }, { "epoch": 0.7246467817896389, "grad_norm": 2.2386944352919906, "learning_rate": 3.7201525215447133e-06, "loss": 0.9405, "step": 6924 }, { "epoch": 0.7247514390371533, "grad_norm": 2.0332904488764236, "learning_rate": 3.7175149152089185e-06, "loss": 0.982, "step": 6925 }, { "epoch": 0.7248560962846677, "grad_norm": 1.8576178852621916, "learning_rate": 3.7148780307433975e-06, "loss": 0.7685, "step": 6926 }, { "epoch": 0.7249607535321821, "grad_norm": 2.1484299818831945, "learning_rate": 3.7122418684511306e-06, "loss": 0.8236, "step": 6927 }, { "epoch": 0.7250654107796964, "grad_norm": 2.516712152720112, "learning_rate": 3.7096064286350164e-06, "loss": 0.8525, "step": 6928 }, { "epoch": 0.7251700680272108, "grad_norm": 2.1566899339097936, "learning_rate": 3.706971711597879e-06, "loss": 0.926, "step": 6929 }, { "epoch": 0.7252747252747253, "grad_norm": 1.7869536827919366, "learning_rate": 3.7043377176424467e-06, "loss": 0.9757, "step": 6930 }, { "epoch": 0.7253793825222397, "grad_norm": 1.70469612797693, "learning_rate": 3.701704447071376e-06, "loss": 0.8288, "step": 6931 }, { "epoch": 0.7254840397697541, "grad_norm": 1.8514012748832875, "learning_rate": 3.699071900187232e-06, "loss": 0.8338, "step": 6932 }, { "epoch": 0.7255886970172685, "grad_norm": 2.3795752920291773, "learning_rate": 3.696440077292497e-06, "loss": 0.862, "step": 6933 }, { "epoch": 0.7256933542647829, "grad_norm": 2.2429703352322004, "learning_rate": 3.6938089786895783e-06, "loss": 1.0258, "step": 6934 }, { "epoch": 0.7257980115122973, "grad_norm": 1.986089459097724, "learning_rate": 3.691178604680793e-06, "loss": 0.8968, "step": 6935 }, { "epoch": 0.7259026687598116, "grad_norm": 1.8760611340077848, "learning_rate": 3.688548955568375e-06, "loss": 0.8637, "step": 6936 }, { "epoch": 0.726007326007326, "grad_norm": 1.8062260800693908, "learning_rate": 3.685920031654476e-06, "loss": 0.8974, "step": 6937 }, { "epoch": 0.7261119832548404, "grad_norm": 2.0910032713345155, "learning_rate": 3.683291833241163e-06, "loss": 0.9255, "step": 6938 }, { "epoch": 0.7262166405023548, "grad_norm": 1.952084219425161, "learning_rate": 3.6806643606304226e-06, "loss": 0.954, "step": 6939 }, { "epoch": 0.7263212977498692, "grad_norm": 2.2475768165777756, "learning_rate": 3.6780376141241624e-06, "loss": 0.7996, "step": 6940 }, { "epoch": 0.7264259549973836, "grad_norm": 2.039613944161897, "learning_rate": 3.6754115940241954e-06, "loss": 0.8029, "step": 6941 }, { "epoch": 0.726530612244898, "grad_norm": 2.4372491132455054, "learning_rate": 3.6727863006322585e-06, "loss": 0.8573, "step": 6942 }, { "epoch": 0.7266352694924123, "grad_norm": 2.135064158968042, "learning_rate": 3.6701617342499975e-06, "loss": 0.9144, "step": 6943 }, { "epoch": 0.7267399267399267, "grad_norm": 2.141195883147221, "learning_rate": 3.66753789517899e-06, "loss": 0.9424, "step": 6944 }, { "epoch": 0.7268445839874411, "grad_norm": 2.3453010874226496, "learning_rate": 3.6649147837207143e-06, "loss": 0.8654, "step": 6945 }, { "epoch": 0.7269492412349555, "grad_norm": 1.7642203173816415, "learning_rate": 3.6622924001765725e-06, "loss": 0.8269, "step": 6946 }, { "epoch": 0.7270538984824699, "grad_norm": 2.2323698391066835, "learning_rate": 3.6596707448478796e-06, "loss": 0.9424, "step": 6947 }, { "epoch": 0.7271585557299843, "grad_norm": 2.631836450156995, "learning_rate": 3.6570498180358705e-06, "loss": 0.9094, "step": 6948 }, { "epoch": 0.7272632129774987, "grad_norm": 1.9050273254003347, "learning_rate": 3.6544296200416994e-06, "loss": 0.9081, "step": 6949 }, { "epoch": 0.727367870225013, "grad_norm": 2.1745905818103606, "learning_rate": 3.6518101511664284e-06, "loss": 0.9148, "step": 6950 }, { "epoch": 0.7274725274725274, "grad_norm": 2.1487595088513487, "learning_rate": 3.6491914117110405e-06, "loss": 0.9708, "step": 6951 }, { "epoch": 0.7275771847200418, "grad_norm": 2.171265284624992, "learning_rate": 3.646573401976431e-06, "loss": 0.9458, "step": 6952 }, { "epoch": 0.7276818419675563, "grad_norm": 2.762939860481466, "learning_rate": 3.64395612226342e-06, "loss": 0.9506, "step": 6953 }, { "epoch": 0.7277864992150707, "grad_norm": 2.076483034212252, "learning_rate": 3.641339572872735e-06, "loss": 0.8965, "step": 6954 }, { "epoch": 0.7278911564625851, "grad_norm": 1.8982571709599116, "learning_rate": 3.638723754105025e-06, "loss": 0.8545, "step": 6955 }, { "epoch": 0.7279958137100995, "grad_norm": 1.6455039950849177, "learning_rate": 3.636108666260847e-06, "loss": 0.7816, "step": 6956 }, { "epoch": 0.7281004709576138, "grad_norm": 2.1303615491352286, "learning_rate": 3.6334943096406873e-06, "loss": 0.967, "step": 6957 }, { "epoch": 0.7282051282051282, "grad_norm": 2.3205676424264197, "learning_rate": 3.630880684544934e-06, "loss": 0.9537, "step": 6958 }, { "epoch": 0.7283097854526426, "grad_norm": 1.9729345731294923, "learning_rate": 3.628267791273906e-06, "loss": 0.8789, "step": 6959 }, { "epoch": 0.728414442700157, "grad_norm": 2.0365158246102704, "learning_rate": 3.625655630127826e-06, "loss": 0.8889, "step": 6960 }, { "epoch": 0.7285190999476714, "grad_norm": 2.0572596307136086, "learning_rate": 3.6230442014068346e-06, "loss": 0.8673, "step": 6961 }, { "epoch": 0.7286237571951858, "grad_norm": 2.265474450323085, "learning_rate": 3.6204335054109897e-06, "loss": 0.8654, "step": 6962 }, { "epoch": 0.7287284144427002, "grad_norm": 2.036780676342137, "learning_rate": 3.6178235424402707e-06, "loss": 0.9066, "step": 6963 }, { "epoch": 0.7288330716902145, "grad_norm": 2.3072772153174985, "learning_rate": 3.6152143127945647e-06, "loss": 0.8402, "step": 6964 }, { "epoch": 0.7289377289377289, "grad_norm": 1.5267072530528316, "learning_rate": 3.6126058167736742e-06, "loss": 0.7113, "step": 6965 }, { "epoch": 0.7290423861852433, "grad_norm": 2.312199363621423, "learning_rate": 3.609998054677327e-06, "loss": 0.894, "step": 6966 }, { "epoch": 0.7291470434327577, "grad_norm": 2.1365598908923964, "learning_rate": 3.607391026805155e-06, "loss": 0.8484, "step": 6967 }, { "epoch": 0.7292517006802721, "grad_norm": 1.814578926920421, "learning_rate": 3.6047847334567153e-06, "loss": 0.8571, "step": 6968 }, { "epoch": 0.7293563579277865, "grad_norm": 2.5103976495037257, "learning_rate": 3.602179174931475e-06, "loss": 0.874, "step": 6969 }, { "epoch": 0.7294610151753009, "grad_norm": 1.9816767917289961, "learning_rate": 3.5995743515288163e-06, "loss": 0.9487, "step": 6970 }, { "epoch": 0.7295656724228152, "grad_norm": 1.6802672691339464, "learning_rate": 3.5969702635480374e-06, "loss": 0.9187, "step": 6971 }, { "epoch": 0.7296703296703296, "grad_norm": 1.9087881371348294, "learning_rate": 3.594366911288358e-06, "loss": 0.8732, "step": 6972 }, { "epoch": 0.729774986917844, "grad_norm": 2.097973491871284, "learning_rate": 3.5917642950489062e-06, "loss": 1.0095, "step": 6973 }, { "epoch": 0.7298796441653584, "grad_norm": 2.0996759377189043, "learning_rate": 3.5891624151287253e-06, "loss": 0.9249, "step": 6974 }, { "epoch": 0.7299843014128728, "grad_norm": 2.0231942048602405, "learning_rate": 3.5865612718267807e-06, "loss": 0.8537, "step": 6975 }, { "epoch": 0.7300889586603873, "grad_norm": 2.2582186780087894, "learning_rate": 3.583960865441949e-06, "loss": 0.8968, "step": 6976 }, { "epoch": 0.7301936159079017, "grad_norm": 2.1127814779861462, "learning_rate": 3.5813611962730165e-06, "loss": 0.8105, "step": 6977 }, { "epoch": 0.7302982731554161, "grad_norm": 2.1735847999153677, "learning_rate": 3.578762264618697e-06, "loss": 0.9232, "step": 6978 }, { "epoch": 0.7304029304029304, "grad_norm": 2.086077873057493, "learning_rate": 3.5761640707776115e-06, "loss": 0.9935, "step": 6979 }, { "epoch": 0.7305075876504448, "grad_norm": 1.926989359894305, "learning_rate": 3.573566615048297e-06, "loss": 0.9609, "step": 6980 }, { "epoch": 0.7306122448979592, "grad_norm": 1.7955588355729455, "learning_rate": 3.570969897729202e-06, "loss": 0.9249, "step": 6981 }, { "epoch": 0.7307169021454736, "grad_norm": 2.0822537293283134, "learning_rate": 3.5683739191187027e-06, "loss": 0.92, "step": 6982 }, { "epoch": 0.730821559392988, "grad_norm": 1.988495753359855, "learning_rate": 3.565778679515075e-06, "loss": 0.7726, "step": 6983 }, { "epoch": 0.7309262166405024, "grad_norm": 2.1187421281363172, "learning_rate": 3.5631841792165236e-06, "loss": 0.8689, "step": 6984 }, { "epoch": 0.7310308738880168, "grad_norm": 1.8611241031497499, "learning_rate": 3.5605904185211582e-06, "loss": 0.931, "step": 6985 }, { "epoch": 0.7311355311355311, "grad_norm": 1.8366170447158119, "learning_rate": 3.5579973977270057e-06, "loss": 0.864, "step": 6986 }, { "epoch": 0.7312401883830455, "grad_norm": 2.204065983976754, "learning_rate": 3.5554051171320136e-06, "loss": 0.9756, "step": 6987 }, { "epoch": 0.7313448456305599, "grad_norm": 2.133144515554238, "learning_rate": 3.552813577034039e-06, "loss": 0.9533, "step": 6988 }, { "epoch": 0.7314495028780743, "grad_norm": 1.9662548833887095, "learning_rate": 3.550222777730854e-06, "loss": 0.8933, "step": 6989 }, { "epoch": 0.7315541601255887, "grad_norm": 1.8080125380457004, "learning_rate": 3.5476327195201463e-06, "loss": 0.8176, "step": 6990 }, { "epoch": 0.7316588173731031, "grad_norm": 2.097754039027627, "learning_rate": 3.545043402699514e-06, "loss": 0.9289, "step": 6991 }, { "epoch": 0.7317634746206175, "grad_norm": 1.9647814759624955, "learning_rate": 3.542454827566486e-06, "loss": 0.9348, "step": 6992 }, { "epoch": 0.7318681318681318, "grad_norm": 2.183568175809098, "learning_rate": 3.5398669944184894e-06, "loss": 0.8125, "step": 6993 }, { "epoch": 0.7319727891156462, "grad_norm": 2.2546413328737906, "learning_rate": 3.5372799035528716e-06, "loss": 0.8066, "step": 6994 }, { "epoch": 0.7320774463631606, "grad_norm": 2.018409429501914, "learning_rate": 3.5346935552668936e-06, "loss": 0.876, "step": 6995 }, { "epoch": 0.732182103610675, "grad_norm": 1.8056317532440103, "learning_rate": 3.5321079498577292e-06, "loss": 0.7721, "step": 6996 }, { "epoch": 0.7322867608581894, "grad_norm": 2.3671016869226147, "learning_rate": 3.5295230876224763e-06, "loss": 0.9053, "step": 6997 }, { "epoch": 0.7323914181057039, "grad_norm": 2.26537165470211, "learning_rate": 3.526938968858139e-06, "loss": 0.8924, "step": 6998 }, { "epoch": 0.7324960753532183, "grad_norm": 2.261177073983615, "learning_rate": 3.5243555938616348e-06, "loss": 0.7816, "step": 6999 }, { "epoch": 0.7326007326007326, "grad_norm": 2.0780543997404592, "learning_rate": 3.521772962929798e-06, "loss": 0.9149, "step": 7000 }, { "epoch": 0.732705389848247, "grad_norm": 2.0900590029616897, "learning_rate": 3.5191910763593808e-06, "loss": 0.8636, "step": 7001 }, { "epoch": 0.7328100470957614, "grad_norm": 2.0840433266339184, "learning_rate": 3.5166099344470493e-06, "loss": 0.8882, "step": 7002 }, { "epoch": 0.7329147043432758, "grad_norm": 2.1351284570724713, "learning_rate": 3.5140295374893797e-06, "loss": 0.8069, "step": 7003 }, { "epoch": 0.7330193615907902, "grad_norm": 2.104817813524145, "learning_rate": 3.511449885782865e-06, "loss": 0.9998, "step": 7004 }, { "epoch": 0.7331240188383046, "grad_norm": 1.977905622536921, "learning_rate": 3.5088709796239086e-06, "loss": 0.8087, "step": 7005 }, { "epoch": 0.733228676085819, "grad_norm": 1.803351442799279, "learning_rate": 3.5062928193088385e-06, "loss": 0.8974, "step": 7006 }, { "epoch": 0.7333333333333333, "grad_norm": 2.1137702304154575, "learning_rate": 3.5037154051338883e-06, "loss": 0.9726, "step": 7007 }, { "epoch": 0.7334379905808477, "grad_norm": 2.2621279545247024, "learning_rate": 3.501138737395208e-06, "loss": 0.9225, "step": 7008 }, { "epoch": 0.7335426478283621, "grad_norm": 2.042349787240519, "learning_rate": 3.4985628163888564e-06, "loss": 1.043, "step": 7009 }, { "epoch": 0.7336473050758765, "grad_norm": 2.1738234952277793, "learning_rate": 3.4959876424108173e-06, "loss": 0.9549, "step": 7010 }, { "epoch": 0.7337519623233909, "grad_norm": 1.8561044157153646, "learning_rate": 3.4934132157569866e-06, "loss": 0.9149, "step": 7011 }, { "epoch": 0.7338566195709053, "grad_norm": 1.8449197732704996, "learning_rate": 3.4908395367231683e-06, "loss": 0.7587, "step": 7012 }, { "epoch": 0.7339612768184197, "grad_norm": 2.0696609987749945, "learning_rate": 3.488266605605082e-06, "loss": 0.9359, "step": 7013 }, { "epoch": 0.734065934065934, "grad_norm": 2.138497203840195, "learning_rate": 3.4856944226983637e-06, "loss": 0.8327, "step": 7014 }, { "epoch": 0.7341705913134484, "grad_norm": 1.6468331397988603, "learning_rate": 3.4831229882985585e-06, "loss": 0.8576, "step": 7015 }, { "epoch": 0.7342752485609628, "grad_norm": 1.977918872042594, "learning_rate": 3.4805523027011368e-06, "loss": 0.9462, "step": 7016 }, { "epoch": 0.7343799058084772, "grad_norm": 1.8654816094421645, "learning_rate": 3.477982366201468e-06, "loss": 0.8792, "step": 7017 }, { "epoch": 0.7344845630559916, "grad_norm": 2.2589654681645936, "learning_rate": 3.475413179094851e-06, "loss": 0.9523, "step": 7018 }, { "epoch": 0.734589220303506, "grad_norm": 1.9481397990624987, "learning_rate": 3.4728447416764854e-06, "loss": 0.9307, "step": 7019 }, { "epoch": 0.7346938775510204, "grad_norm": 1.8809532054531273, "learning_rate": 3.470277054241488e-06, "loss": 0.8929, "step": 7020 }, { "epoch": 0.7347985347985349, "grad_norm": 1.7678658679826986, "learning_rate": 3.467710117084897e-06, "loss": 0.8018, "step": 7021 }, { "epoch": 0.7349031920460491, "grad_norm": 2.14212227669439, "learning_rate": 3.4651439305016565e-06, "loss": 0.8358, "step": 7022 }, { "epoch": 0.7350078492935636, "grad_norm": 2.0802743972041218, "learning_rate": 3.4625784947866258e-06, "loss": 0.8696, "step": 7023 }, { "epoch": 0.735112506541078, "grad_norm": 1.945003157814486, "learning_rate": 3.460013810234576e-06, "loss": 0.9213, "step": 7024 }, { "epoch": 0.7352171637885924, "grad_norm": 2.0651692861153945, "learning_rate": 3.4574498771401997e-06, "loss": 0.8322, "step": 7025 }, { "epoch": 0.7353218210361068, "grad_norm": 1.8951231983789885, "learning_rate": 3.454886695798093e-06, "loss": 0.9921, "step": 7026 }, { "epoch": 0.7354264782836212, "grad_norm": 2.034145921283138, "learning_rate": 3.452324266502777e-06, "loss": 0.9316, "step": 7027 }, { "epoch": 0.7355311355311356, "grad_norm": 2.632863001346807, "learning_rate": 3.4497625895486755e-06, "loss": 0.9863, "step": 7028 }, { "epoch": 0.7356357927786499, "grad_norm": 1.8786479307461825, "learning_rate": 3.4472016652301276e-06, "loss": 0.8824, "step": 7029 }, { "epoch": 0.7357404500261643, "grad_norm": 2.0135193900407584, "learning_rate": 3.4446414938413964e-06, "loss": 0.9466, "step": 7030 }, { "epoch": 0.7358451072736787, "grad_norm": 2.2920626537087316, "learning_rate": 3.442082075676646e-06, "loss": 0.864, "step": 7031 }, { "epoch": 0.7359497645211931, "grad_norm": 1.9835068207886113, "learning_rate": 3.4395234110299593e-06, "loss": 1.0386, "step": 7032 }, { "epoch": 0.7360544217687075, "grad_norm": 1.8964138450221253, "learning_rate": 3.4369655001953327e-06, "loss": 0.8778, "step": 7033 }, { "epoch": 0.7361590790162219, "grad_norm": 1.9000636515942269, "learning_rate": 3.4344083434666707e-06, "loss": 0.9043, "step": 7034 }, { "epoch": 0.7362637362637363, "grad_norm": 1.885698255986817, "learning_rate": 3.4318519411378006e-06, "loss": 0.8209, "step": 7035 }, { "epoch": 0.7363683935112506, "grad_norm": 1.9675198172784103, "learning_rate": 3.429296293502461e-06, "loss": 0.9245, "step": 7036 }, { "epoch": 0.736473050758765, "grad_norm": 2.3490174091108518, "learning_rate": 3.4267414008542967e-06, "loss": 0.8804, "step": 7037 }, { "epoch": 0.7365777080062794, "grad_norm": 1.981563826198973, "learning_rate": 3.424187263486871e-06, "loss": 0.9156, "step": 7038 }, { "epoch": 0.7366823652537938, "grad_norm": 2.0151298287952373, "learning_rate": 3.421633881693657e-06, "loss": 0.8659, "step": 7039 }, { "epoch": 0.7367870225013082, "grad_norm": 1.7892471082853696, "learning_rate": 3.4190812557680487e-06, "loss": 0.8883, "step": 7040 }, { "epoch": 0.7368916797488226, "grad_norm": 2.6350578287776822, "learning_rate": 3.4165293860033444e-06, "loss": 0.9169, "step": 7041 }, { "epoch": 0.736996336996337, "grad_norm": 1.9233102606504318, "learning_rate": 3.4139782726927597e-06, "loss": 0.8025, "step": 7042 }, { "epoch": 0.7371009942438513, "grad_norm": 1.8506753750571818, "learning_rate": 3.4114279161294195e-06, "loss": 0.9393, "step": 7043 }, { "epoch": 0.7372056514913657, "grad_norm": 1.850056642327913, "learning_rate": 3.4088783166063686e-06, "loss": 0.8943, "step": 7044 }, { "epoch": 0.7373103087388801, "grad_norm": 2.153444187423901, "learning_rate": 3.406329474416563e-06, "loss": 1.0154, "step": 7045 }, { "epoch": 0.7374149659863946, "grad_norm": 2.0167001869694383, "learning_rate": 3.4037813898528682e-06, "loss": 0.8576, "step": 7046 }, { "epoch": 0.737519623233909, "grad_norm": 2.065120817248703, "learning_rate": 3.401234063208064e-06, "loss": 0.839, "step": 7047 }, { "epoch": 0.7376242804814234, "grad_norm": 2.066946242767024, "learning_rate": 3.3986874947748428e-06, "loss": 0.9666, "step": 7048 }, { "epoch": 0.7377289377289378, "grad_norm": 1.8889031726518197, "learning_rate": 3.3961416848458073e-06, "loss": 0.8222, "step": 7049 }, { "epoch": 0.7378335949764521, "grad_norm": 1.9632185729176141, "learning_rate": 3.393596633713483e-06, "loss": 0.8447, "step": 7050 }, { "epoch": 0.7379382522239665, "grad_norm": 2.0510910347919413, "learning_rate": 3.3910523416702977e-06, "loss": 1.0462, "step": 7051 }, { "epoch": 0.7380429094714809, "grad_norm": 2.12388351552657, "learning_rate": 3.388508809008594e-06, "loss": 0.8637, "step": 7052 }, { "epoch": 0.7381475667189953, "grad_norm": 1.8703899844326586, "learning_rate": 3.3859660360206336e-06, "loss": 0.8867, "step": 7053 }, { "epoch": 0.7382522239665097, "grad_norm": 2.022816664688723, "learning_rate": 3.383424022998579e-06, "loss": 0.8605, "step": 7054 }, { "epoch": 0.7383568812140241, "grad_norm": 2.1134498670646216, "learning_rate": 3.3808827702345227e-06, "loss": 0.8655, "step": 7055 }, { "epoch": 0.7384615384615385, "grad_norm": 1.9780213401794018, "learning_rate": 3.3783422780204535e-06, "loss": 0.9235, "step": 7056 }, { "epoch": 0.7385661957090528, "grad_norm": 1.9464458644225906, "learning_rate": 3.37580254664828e-06, "loss": 0.9951, "step": 7057 }, { "epoch": 0.7386708529565672, "grad_norm": 1.8745039327238657, "learning_rate": 3.37326357640982e-06, "loss": 0.7171, "step": 7058 }, { "epoch": 0.7387755102040816, "grad_norm": 2.037899312393001, "learning_rate": 3.370725367596811e-06, "loss": 0.9593, "step": 7059 }, { "epoch": 0.738880167451596, "grad_norm": 2.1099882075886844, "learning_rate": 3.368187920500897e-06, "loss": 0.9113, "step": 7060 }, { "epoch": 0.7389848246991104, "grad_norm": 2.6349773976618667, "learning_rate": 3.3656512354136317e-06, "loss": 0.7943, "step": 7061 }, { "epoch": 0.7390894819466248, "grad_norm": 2.262938094716051, "learning_rate": 3.3631153126264915e-06, "loss": 0.8173, "step": 7062 }, { "epoch": 0.7391941391941392, "grad_norm": 1.9162832453637948, "learning_rate": 3.3605801524308535e-06, "loss": 0.7447, "step": 7063 }, { "epoch": 0.7392987964416536, "grad_norm": 2.4566628443132372, "learning_rate": 3.358045755118019e-06, "loss": 0.7898, "step": 7064 }, { "epoch": 0.7394034536891679, "grad_norm": 2.1132700561524316, "learning_rate": 3.3555121209791906e-06, "loss": 0.8397, "step": 7065 }, { "epoch": 0.7395081109366823, "grad_norm": 2.6164728972304645, "learning_rate": 3.3529792503054903e-06, "loss": 0.9608, "step": 7066 }, { "epoch": 0.7396127681841967, "grad_norm": 2.3074332053564137, "learning_rate": 3.3504471433879493e-06, "loss": 0.9128, "step": 7067 }, { "epoch": 0.7397174254317112, "grad_norm": 2.0330030779609234, "learning_rate": 3.3479158005175082e-06, "loss": 0.9589, "step": 7068 }, { "epoch": 0.7398220826792256, "grad_norm": 2.120921500126816, "learning_rate": 3.345385221985026e-06, "loss": 0.9727, "step": 7069 }, { "epoch": 0.73992673992674, "grad_norm": 2.6676041899583107, "learning_rate": 3.342855408081276e-06, "loss": 0.912, "step": 7070 }, { "epoch": 0.7400313971742544, "grad_norm": 1.9000272361188917, "learning_rate": 3.340326359096935e-06, "loss": 0.7653, "step": 7071 }, { "epoch": 0.7401360544217687, "grad_norm": 2.1277226935445817, "learning_rate": 3.337798075322596e-06, "loss": 0.835, "step": 7072 }, { "epoch": 0.7402407116692831, "grad_norm": 2.1217486595239565, "learning_rate": 3.3352705570487598e-06, "loss": 0.9353, "step": 7073 }, { "epoch": 0.7403453689167975, "grad_norm": 2.099696714940113, "learning_rate": 3.332743804565851e-06, "loss": 0.9437, "step": 7074 }, { "epoch": 0.7404500261643119, "grad_norm": 2.310679915225124, "learning_rate": 3.330217818164195e-06, "loss": 0.8943, "step": 7075 }, { "epoch": 0.7405546834118263, "grad_norm": 1.9472970296459848, "learning_rate": 3.3276925981340336e-06, "loss": 0.8915, "step": 7076 }, { "epoch": 0.7406593406593407, "grad_norm": 2.3805641441348975, "learning_rate": 3.325168144765515e-06, "loss": 0.9253, "step": 7077 }, { "epoch": 0.7407639979068551, "grad_norm": 1.9193799868580086, "learning_rate": 3.3226444583487085e-06, "loss": 1.0081, "step": 7078 }, { "epoch": 0.7408686551543694, "grad_norm": 2.2795516135400287, "learning_rate": 3.320121539173592e-06, "loss": 0.8681, "step": 7079 }, { "epoch": 0.7409733124018838, "grad_norm": 1.9038999366774318, "learning_rate": 3.3175993875300527e-06, "loss": 0.8152, "step": 7080 }, { "epoch": 0.7410779696493982, "grad_norm": 2.209622572681599, "learning_rate": 3.315078003707891e-06, "loss": 1.0209, "step": 7081 }, { "epoch": 0.7411826268969126, "grad_norm": 2.3636374036749004, "learning_rate": 3.3125573879968154e-06, "loss": 0.8659, "step": 7082 }, { "epoch": 0.741287284144427, "grad_norm": 2.125287520788546, "learning_rate": 3.310037540686455e-06, "loss": 1.0528, "step": 7083 }, { "epoch": 0.7413919413919414, "grad_norm": 2.015778138651614, "learning_rate": 3.307518462066344e-06, "loss": 0.8942, "step": 7084 }, { "epoch": 0.7414965986394558, "grad_norm": 2.4686495141026987, "learning_rate": 3.305000152425928e-06, "loss": 0.8532, "step": 7085 }, { "epoch": 0.7416012558869701, "grad_norm": 1.9474568598951196, "learning_rate": 3.3024826120545673e-06, "loss": 0.9053, "step": 7086 }, { "epoch": 0.7417059131344845, "grad_norm": 1.6828735085636821, "learning_rate": 3.299965841241525e-06, "loss": 0.8393, "step": 7087 }, { "epoch": 0.7418105703819989, "grad_norm": 2.081597291218235, "learning_rate": 3.297449840275996e-06, "loss": 0.9671, "step": 7088 }, { "epoch": 0.7419152276295133, "grad_norm": 1.6821107578784833, "learning_rate": 3.294934609447068e-06, "loss": 0.7459, "step": 7089 }, { "epoch": 0.7420198848770277, "grad_norm": 2.2858216269881684, "learning_rate": 3.292420149043747e-06, "loss": 0.9515, "step": 7090 }, { "epoch": 0.7421245421245422, "grad_norm": 2.1396541624839793, "learning_rate": 3.2899064593549477e-06, "loss": 0.9349, "step": 7091 }, { "epoch": 0.7422291993720566, "grad_norm": 2.172610516131483, "learning_rate": 3.2873935406694956e-06, "loss": 0.9582, "step": 7092 }, { "epoch": 0.7423338566195709, "grad_norm": 2.1072743883866374, "learning_rate": 3.284881393276137e-06, "loss": 0.9638, "step": 7093 }, { "epoch": 0.7424385138670853, "grad_norm": 2.30997826595737, "learning_rate": 3.2823700174635185e-06, "loss": 0.9079, "step": 7094 }, { "epoch": 0.7425431711145997, "grad_norm": 2.079680297310184, "learning_rate": 3.2798594135202012e-06, "loss": 0.9564, "step": 7095 }, { "epoch": 0.7426478283621141, "grad_norm": 2.2079477477185763, "learning_rate": 3.2773495817346636e-06, "loss": 0.8085, "step": 7096 }, { "epoch": 0.7427524856096285, "grad_norm": 1.9783748322647048, "learning_rate": 3.274840522395283e-06, "loss": 0.9652, "step": 7097 }, { "epoch": 0.7428571428571429, "grad_norm": 2.013884564094195, "learning_rate": 3.272332235790363e-06, "loss": 0.8247, "step": 7098 }, { "epoch": 0.7429618001046573, "grad_norm": 1.8380568856422557, "learning_rate": 3.269824722208108e-06, "loss": 0.7911, "step": 7099 }, { "epoch": 0.7430664573521716, "grad_norm": 1.9699143563198767, "learning_rate": 3.2673179819366363e-06, "loss": 0.917, "step": 7100 }, { "epoch": 0.743171114599686, "grad_norm": 1.8324074187595984, "learning_rate": 3.264812015263973e-06, "loss": 0.8143, "step": 7101 }, { "epoch": 0.7432757718472004, "grad_norm": 2.0807751148157196, "learning_rate": 3.2623068224780663e-06, "loss": 0.9625, "step": 7102 }, { "epoch": 0.7433804290947148, "grad_norm": 2.0925637950807556, "learning_rate": 3.2598024038667655e-06, "loss": 0.8431, "step": 7103 }, { "epoch": 0.7434850863422292, "grad_norm": 2.3755126784483402, "learning_rate": 3.2572987597178274e-06, "loss": 0.8246, "step": 7104 }, { "epoch": 0.7435897435897436, "grad_norm": 1.9385820118174737, "learning_rate": 3.254795890318935e-06, "loss": 0.8357, "step": 7105 }, { "epoch": 0.743694400837258, "grad_norm": 1.9631332561496653, "learning_rate": 3.2522937959576684e-06, "loss": 0.9545, "step": 7106 }, { "epoch": 0.7437990580847724, "grad_norm": 1.9072432029408546, "learning_rate": 3.2497924769215206e-06, "loss": 0.9965, "step": 7107 }, { "epoch": 0.7439037153322867, "grad_norm": 2.3417750032717493, "learning_rate": 3.2472919334979034e-06, "loss": 1.0051, "step": 7108 }, { "epoch": 0.7440083725798011, "grad_norm": 2.00729117210432, "learning_rate": 3.2447921659741333e-06, "loss": 0.9051, "step": 7109 }, { "epoch": 0.7441130298273155, "grad_norm": 1.8562956071816943, "learning_rate": 3.2422931746374375e-06, "loss": 0.8059, "step": 7110 }, { "epoch": 0.7442176870748299, "grad_norm": 2.5424170068230647, "learning_rate": 3.2397949597749525e-06, "loss": 0.767, "step": 7111 }, { "epoch": 0.7443223443223443, "grad_norm": 2.2671400535027204, "learning_rate": 3.2372975216737335e-06, "loss": 0.9178, "step": 7112 }, { "epoch": 0.7444270015698587, "grad_norm": 2.3361550774786988, "learning_rate": 3.234800860620736e-06, "loss": 0.7264, "step": 7113 }, { "epoch": 0.7445316588173732, "grad_norm": 1.9859089354809334, "learning_rate": 3.232304976902837e-06, "loss": 0.9158, "step": 7114 }, { "epoch": 0.7446363160648874, "grad_norm": 1.7913360410024908, "learning_rate": 3.229809870806815e-06, "loss": 0.8756, "step": 7115 }, { "epoch": 0.7447409733124019, "grad_norm": 2.122253887975913, "learning_rate": 3.2273155426193613e-06, "loss": 0.6974, "step": 7116 }, { "epoch": 0.7448456305599163, "grad_norm": 1.790135145026046, "learning_rate": 3.224821992627084e-06, "loss": 0.8797, "step": 7117 }, { "epoch": 0.7449502878074307, "grad_norm": 2.0103996789911998, "learning_rate": 3.2223292211164946e-06, "loss": 0.7686, "step": 7118 }, { "epoch": 0.7450549450549451, "grad_norm": 2.3239725196592413, "learning_rate": 3.2198372283740176e-06, "loss": 0.8821, "step": 7119 }, { "epoch": 0.7451596023024595, "grad_norm": 2.4439097088179667, "learning_rate": 3.217346014685985e-06, "loss": 0.8314, "step": 7120 }, { "epoch": 0.7452642595499739, "grad_norm": 1.8200085048128307, "learning_rate": 3.214855580338644e-06, "loss": 0.8329, "step": 7121 }, { "epoch": 0.7453689167974882, "grad_norm": 1.8651829102105335, "learning_rate": 3.212365925618156e-06, "loss": 0.8808, "step": 7122 }, { "epoch": 0.7454735740450026, "grad_norm": 1.954259108086037, "learning_rate": 3.2098770508105825e-06, "loss": 0.9463, "step": 7123 }, { "epoch": 0.745578231292517, "grad_norm": 2.026272455843576, "learning_rate": 3.207388956201901e-06, "loss": 0.8874, "step": 7124 }, { "epoch": 0.7456828885400314, "grad_norm": 1.9861545009241273, "learning_rate": 3.204901642077999e-06, "loss": 0.836, "step": 7125 }, { "epoch": 0.7457875457875458, "grad_norm": 2.1476725693471126, "learning_rate": 3.2024151087246704e-06, "loss": 0.9005, "step": 7126 }, { "epoch": 0.7458922030350602, "grad_norm": 2.3373997568981313, "learning_rate": 3.199929356427628e-06, "loss": 0.8782, "step": 7127 }, { "epoch": 0.7459968602825746, "grad_norm": 2.0228477518559522, "learning_rate": 3.197444385472489e-06, "loss": 0.8036, "step": 7128 }, { "epoch": 0.7461015175300889, "grad_norm": 1.728215649932167, "learning_rate": 3.1949601961447795e-06, "loss": 0.7443, "step": 7129 }, { "epoch": 0.7462061747776033, "grad_norm": 2.1796559194931606, "learning_rate": 3.1924767887299357e-06, "loss": 0.8856, "step": 7130 }, { "epoch": 0.7463108320251177, "grad_norm": 2.171160116743099, "learning_rate": 3.1899941635133092e-06, "loss": 0.8125, "step": 7131 }, { "epoch": 0.7464154892726321, "grad_norm": 1.688400395699494, "learning_rate": 3.1875123207801616e-06, "loss": 0.7771, "step": 7132 }, { "epoch": 0.7465201465201465, "grad_norm": 2.6231117402138593, "learning_rate": 3.1850312608156596e-06, "loss": 0.9425, "step": 7133 }, { "epoch": 0.7466248037676609, "grad_norm": 1.8438824122826507, "learning_rate": 3.1825509839048806e-06, "loss": 0.788, "step": 7134 }, { "epoch": 0.7467294610151753, "grad_norm": 2.2251217003891828, "learning_rate": 3.1800714903328102e-06, "loss": 0.9457, "step": 7135 }, { "epoch": 0.7468341182626896, "grad_norm": 2.1306082735607808, "learning_rate": 3.1775927803843543e-06, "loss": 0.9576, "step": 7136 }, { "epoch": 0.746938775510204, "grad_norm": 2.1790105073105757, "learning_rate": 3.1751148543443187e-06, "loss": 0.8739, "step": 7137 }, { "epoch": 0.7470434327577185, "grad_norm": 2.239558304766855, "learning_rate": 3.1726377124974217e-06, "loss": 0.8207, "step": 7138 }, { "epoch": 0.7471480900052329, "grad_norm": 2.008095181346031, "learning_rate": 3.1701613551282893e-06, "loss": 0.867, "step": 7139 }, { "epoch": 0.7472527472527473, "grad_norm": 2.3260833446259777, "learning_rate": 3.1676857825214623e-06, "loss": 0.8897, "step": 7140 }, { "epoch": 0.7473574045002617, "grad_norm": 2.070498373715419, "learning_rate": 3.165210994961393e-06, "loss": 0.8824, "step": 7141 }, { "epoch": 0.7474620617477761, "grad_norm": 2.0573703279887807, "learning_rate": 3.162736992732436e-06, "loss": 0.8889, "step": 7142 }, { "epoch": 0.7475667189952904, "grad_norm": 2.3464698580212304, "learning_rate": 3.160263776118858e-06, "loss": 0.9127, "step": 7143 }, { "epoch": 0.7476713762428048, "grad_norm": 2.314229707278681, "learning_rate": 3.1577913454048393e-06, "loss": 0.9432, "step": 7144 }, { "epoch": 0.7477760334903192, "grad_norm": 2.2903886734227856, "learning_rate": 3.1553197008744607e-06, "loss": 0.9033, "step": 7145 }, { "epoch": 0.7478806907378336, "grad_norm": 2.26707032536701, "learning_rate": 3.1528488428117287e-06, "loss": 0.8826, "step": 7146 }, { "epoch": 0.747985347985348, "grad_norm": 2.201793259018066, "learning_rate": 3.150378771500542e-06, "loss": 0.8848, "step": 7147 }, { "epoch": 0.7480900052328624, "grad_norm": 2.153815935771742, "learning_rate": 3.147909487224723e-06, "loss": 1.0046, "step": 7148 }, { "epoch": 0.7481946624803768, "grad_norm": 2.23656638946847, "learning_rate": 3.145440990267994e-06, "loss": 0.8385, "step": 7149 }, { "epoch": 0.7482993197278912, "grad_norm": 1.9518763135706487, "learning_rate": 3.142973280913988e-06, "loss": 0.8302, "step": 7150 }, { "epoch": 0.7484039769754055, "grad_norm": 1.9560563182373518, "learning_rate": 3.140506359446256e-06, "loss": 0.8629, "step": 7151 }, { "epoch": 0.7485086342229199, "grad_norm": 1.827294647023681, "learning_rate": 3.138040226148249e-06, "loss": 0.8265, "step": 7152 }, { "epoch": 0.7486132914704343, "grad_norm": 2.1222980400949245, "learning_rate": 3.1355748813033305e-06, "loss": 0.8273, "step": 7153 }, { "epoch": 0.7487179487179487, "grad_norm": 2.1538492848488855, "learning_rate": 3.1331103251947703e-06, "loss": 0.8809, "step": 7154 }, { "epoch": 0.7488226059654631, "grad_norm": 1.9153204561792283, "learning_rate": 3.130646558105758e-06, "loss": 0.8623, "step": 7155 }, { "epoch": 0.7489272632129775, "grad_norm": 2.146577046698544, "learning_rate": 3.128183580319378e-06, "loss": 0.9102, "step": 7156 }, { "epoch": 0.7490319204604919, "grad_norm": 2.2253981483892598, "learning_rate": 3.125721392118639e-06, "loss": 0.8395, "step": 7157 }, { "epoch": 0.7491365777080062, "grad_norm": 2.252424457310398, "learning_rate": 3.1232599937864483e-06, "loss": 1.0134, "step": 7158 }, { "epoch": 0.7492412349555206, "grad_norm": 2.160152684893339, "learning_rate": 3.1207993856056205e-06, "loss": 0.9069, "step": 7159 }, { "epoch": 0.749345892203035, "grad_norm": 2.163254492760139, "learning_rate": 3.118339567858892e-06, "loss": 0.8737, "step": 7160 }, { "epoch": 0.7494505494505495, "grad_norm": 1.6978836150995644, "learning_rate": 3.1158805408288995e-06, "loss": 0.8209, "step": 7161 }, { "epoch": 0.7495552066980639, "grad_norm": 1.9948469938453408, "learning_rate": 3.1134223047981872e-06, "loss": 0.947, "step": 7162 }, { "epoch": 0.7496598639455783, "grad_norm": 2.261922765416398, "learning_rate": 3.1109648600492126e-06, "loss": 0.8666, "step": 7163 }, { "epoch": 0.7497645211930927, "grad_norm": 2.004553104391324, "learning_rate": 3.108508206864338e-06, "loss": 0.8072, "step": 7164 }, { "epoch": 0.749869178440607, "grad_norm": 1.9717258623857916, "learning_rate": 3.1060523455258406e-06, "loss": 0.8721, "step": 7165 }, { "epoch": 0.7499738356881214, "grad_norm": 2.1108494565639857, "learning_rate": 3.1035972763159074e-06, "loss": 0.8608, "step": 7166 }, { "epoch": 0.7500784929356358, "grad_norm": 2.0578079658481423, "learning_rate": 3.1011429995166288e-06, "loss": 0.8963, "step": 7167 }, { "epoch": 0.7501831501831502, "grad_norm": 1.983828357087619, "learning_rate": 3.098689515410004e-06, "loss": 0.899, "step": 7168 }, { "epoch": 0.7502878074306646, "grad_norm": 2.2998223358846333, "learning_rate": 3.0962368242779406e-06, "loss": 0.8537, "step": 7169 }, { "epoch": 0.750392464678179, "grad_norm": 1.8820805197314077, "learning_rate": 3.0937849264022658e-06, "loss": 0.8407, "step": 7170 }, { "epoch": 0.7504971219256934, "grad_norm": 2.177356874448031, "learning_rate": 3.0913338220647028e-06, "loss": 0.9178, "step": 7171 }, { "epoch": 0.7506017791732077, "grad_norm": 2.198171422534131, "learning_rate": 3.0888835115468883e-06, "loss": 0.9125, "step": 7172 }, { "epoch": 0.7507064364207221, "grad_norm": 2.031436579163277, "learning_rate": 3.086433995130367e-06, "loss": 0.8067, "step": 7173 }, { "epoch": 0.7508110936682365, "grad_norm": 1.8475737903440372, "learning_rate": 3.0839852730965934e-06, "loss": 0.8454, "step": 7174 }, { "epoch": 0.7509157509157509, "grad_norm": 2.25678437649617, "learning_rate": 3.081537345726936e-06, "loss": 0.839, "step": 7175 }, { "epoch": 0.7510204081632653, "grad_norm": 1.9401602491295404, "learning_rate": 3.0790902133026625e-06, "loss": 0.8353, "step": 7176 }, { "epoch": 0.7511250654107797, "grad_norm": 2.0825066629501694, "learning_rate": 3.0766438761049544e-06, "loss": 0.8053, "step": 7177 }, { "epoch": 0.7512297226582941, "grad_norm": 2.451954510004089, "learning_rate": 3.074198334414896e-06, "loss": 0.851, "step": 7178 }, { "epoch": 0.7513343799058084, "grad_norm": 2.048655055859803, "learning_rate": 3.071753588513493e-06, "loss": 0.8167, "step": 7179 }, { "epoch": 0.7514390371533228, "grad_norm": 2.177508435886103, "learning_rate": 3.069309638681647e-06, "loss": 0.9355, "step": 7180 }, { "epoch": 0.7515436944008372, "grad_norm": 2.144395053127334, "learning_rate": 3.066866485200174e-06, "loss": 1.0297, "step": 7181 }, { "epoch": 0.7516483516483516, "grad_norm": 1.8941507496621728, "learning_rate": 3.0644241283497934e-06, "loss": 0.7487, "step": 7182 }, { "epoch": 0.751753008895866, "grad_norm": 2.03238588774476, "learning_rate": 3.0619825684111425e-06, "loss": 0.9322, "step": 7183 }, { "epoch": 0.7518576661433805, "grad_norm": 2.232382445814665, "learning_rate": 3.0595418056647574e-06, "loss": 1.0348, "step": 7184 }, { "epoch": 0.7519623233908949, "grad_norm": 2.027939873344001, "learning_rate": 3.0571018403910914e-06, "loss": 0.8486, "step": 7185 }, { "epoch": 0.7520669806384092, "grad_norm": 1.7087518006551394, "learning_rate": 3.0546626728704986e-06, "loss": 0.7637, "step": 7186 }, { "epoch": 0.7521716378859236, "grad_norm": 2.090097485492809, "learning_rate": 3.0522243033832455e-06, "loss": 0.8497, "step": 7187 }, { "epoch": 0.752276295133438, "grad_norm": 1.9895769202770441, "learning_rate": 3.0497867322094998e-06, "loss": 0.8822, "step": 7188 }, { "epoch": 0.7523809523809524, "grad_norm": 2.40584150601798, "learning_rate": 3.047349959629352e-06, "loss": 0.9995, "step": 7189 }, { "epoch": 0.7524856096284668, "grad_norm": 2.0368993311848977, "learning_rate": 3.0449139859227883e-06, "loss": 0.8247, "step": 7190 }, { "epoch": 0.7525902668759812, "grad_norm": 2.056257702126359, "learning_rate": 3.0424788113697036e-06, "loss": 0.9778, "step": 7191 }, { "epoch": 0.7526949241234956, "grad_norm": 1.9693791719687486, "learning_rate": 3.0400444362499115e-06, "loss": 0.9017, "step": 7192 }, { "epoch": 0.75279958137101, "grad_norm": 2.0591702890964654, "learning_rate": 3.0376108608431188e-06, "loss": 0.9149, "step": 7193 }, { "epoch": 0.7529042386185243, "grad_norm": 2.0700766255920944, "learning_rate": 3.035178085428957e-06, "loss": 0.8404, "step": 7194 }, { "epoch": 0.7530088958660387, "grad_norm": 1.8951621768222429, "learning_rate": 3.0327461102869514e-06, "loss": 0.8727, "step": 7195 }, { "epoch": 0.7531135531135531, "grad_norm": 1.760732012119775, "learning_rate": 3.0303149356965424e-06, "loss": 0.7707, "step": 7196 }, { "epoch": 0.7532182103610675, "grad_norm": 2.0503813015447903, "learning_rate": 3.027884561937072e-06, "loss": 0.9688, "step": 7197 }, { "epoch": 0.7533228676085819, "grad_norm": 2.0021170411447766, "learning_rate": 3.0254549892878038e-06, "loss": 0.922, "step": 7198 }, { "epoch": 0.7534275248560963, "grad_norm": 1.8860849552256713, "learning_rate": 3.0230262180278925e-06, "loss": 0.9059, "step": 7199 }, { "epoch": 0.7535321821036107, "grad_norm": 2.1818774407352404, "learning_rate": 3.020598248436415e-06, "loss": 0.9216, "step": 7200 }, { "epoch": 0.753636839351125, "grad_norm": 1.8584792324138004, "learning_rate": 3.0181710807923492e-06, "loss": 0.7917, "step": 7201 }, { "epoch": 0.7537414965986394, "grad_norm": 1.819519299711091, "learning_rate": 3.015744715374579e-06, "loss": 0.8911, "step": 7202 }, { "epoch": 0.7538461538461538, "grad_norm": 2.164007510379012, "learning_rate": 3.0133191524618956e-06, "loss": 0.9111, "step": 7203 }, { "epoch": 0.7539508110936682, "grad_norm": 1.8081808504852512, "learning_rate": 3.010894392333009e-06, "loss": 1.0013, "step": 7204 }, { "epoch": 0.7540554683411826, "grad_norm": 1.9661369597242528, "learning_rate": 3.008470435266525e-06, "loss": 0.9094, "step": 7205 }, { "epoch": 0.754160125588697, "grad_norm": 2.1709315190330494, "learning_rate": 3.0060472815409614e-06, "loss": 0.856, "step": 7206 }, { "epoch": 0.7542647828362115, "grad_norm": 2.077801523128462, "learning_rate": 3.00362493143474e-06, "loss": 0.8178, "step": 7207 }, { "epoch": 0.7543694400837258, "grad_norm": 2.0343924494049737, "learning_rate": 3.001203385226198e-06, "loss": 0.9672, "step": 7208 }, { "epoch": 0.7544740973312402, "grad_norm": 2.612125199657769, "learning_rate": 2.9987826431935773e-06, "loss": 0.7652, "step": 7209 }, { "epoch": 0.7545787545787546, "grad_norm": 1.9881607065905555, "learning_rate": 2.9963627056150234e-06, "loss": 0.8869, "step": 7210 }, { "epoch": 0.754683411826269, "grad_norm": 2.2653350892421775, "learning_rate": 2.993943572768594e-06, "loss": 1.0044, "step": 7211 }, { "epoch": 0.7547880690737834, "grad_norm": 1.9442254836980624, "learning_rate": 2.9915252449322463e-06, "loss": 0.8595, "step": 7212 }, { "epoch": 0.7548927263212978, "grad_norm": 1.7828004495021605, "learning_rate": 2.9891077223838594e-06, "loss": 0.7383, "step": 7213 }, { "epoch": 0.7549973835688122, "grad_norm": 2.1375811309701143, "learning_rate": 2.9866910054012078e-06, "loss": 0.8917, "step": 7214 }, { "epoch": 0.7551020408163265, "grad_norm": 1.8434137581998618, "learning_rate": 2.9842750942619767e-06, "loss": 0.8375, "step": 7215 }, { "epoch": 0.7552066980638409, "grad_norm": 1.8544288019845543, "learning_rate": 2.9818599892437572e-06, "loss": 0.8359, "step": 7216 }, { "epoch": 0.7553113553113553, "grad_norm": 2.190544453353631, "learning_rate": 2.979445690624051e-06, "loss": 0.9268, "step": 7217 }, { "epoch": 0.7554160125588697, "grad_norm": 1.6724087178652072, "learning_rate": 2.977032198680272e-06, "loss": 0.788, "step": 7218 }, { "epoch": 0.7555206698063841, "grad_norm": 2.444110327040675, "learning_rate": 2.9746195136897294e-06, "loss": 1.0125, "step": 7219 }, { "epoch": 0.7556253270538985, "grad_norm": 2.395682775915913, "learning_rate": 2.9722076359296457e-06, "loss": 0.9869, "step": 7220 }, { "epoch": 0.7557299843014129, "grad_norm": 2.1788752080998837, "learning_rate": 2.9697965656771534e-06, "loss": 0.8759, "step": 7221 }, { "epoch": 0.7558346415489272, "grad_norm": 2.3785762750866484, "learning_rate": 2.967386303209283e-06, "loss": 0.9297, "step": 7222 }, { "epoch": 0.7559392987964416, "grad_norm": 1.9232465597929709, "learning_rate": 2.9649768488029862e-06, "loss": 0.867, "step": 7223 }, { "epoch": 0.756043956043956, "grad_norm": 1.9326752300709829, "learning_rate": 2.962568202735111e-06, "loss": 0.8029, "step": 7224 }, { "epoch": 0.7561486132914704, "grad_norm": 2.127211853559956, "learning_rate": 2.960160365282413e-06, "loss": 0.855, "step": 7225 }, { "epoch": 0.7562532705389848, "grad_norm": 2.268097028045051, "learning_rate": 2.957753336721563e-06, "loss": 0.9204, "step": 7226 }, { "epoch": 0.7563579277864992, "grad_norm": 2.0703952112572157, "learning_rate": 2.955347117329127e-06, "loss": 0.9523, "step": 7227 }, { "epoch": 0.7564625850340136, "grad_norm": 2.6447867664491347, "learning_rate": 2.9529417073815925e-06, "loss": 0.8857, "step": 7228 }, { "epoch": 0.7565672422815279, "grad_norm": 2.5816773745873927, "learning_rate": 2.950537107155341e-06, "loss": 0.9542, "step": 7229 }, { "epoch": 0.7566718995290423, "grad_norm": 2.16016794368315, "learning_rate": 2.9481333169266667e-06, "loss": 0.9248, "step": 7230 }, { "epoch": 0.7567765567765568, "grad_norm": 2.0925825053657405, "learning_rate": 2.945730336971767e-06, "loss": 0.8558, "step": 7231 }, { "epoch": 0.7568812140240712, "grad_norm": 2.0853760218196413, "learning_rate": 2.9433281675667545e-06, "loss": 0.9312, "step": 7232 }, { "epoch": 0.7569858712715856, "grad_norm": 2.2418754596740755, "learning_rate": 2.940926808987642e-06, "loss": 0.7494, "step": 7233 }, { "epoch": 0.7570905285191, "grad_norm": 1.958769862807278, "learning_rate": 2.938526261510346e-06, "loss": 0.9602, "step": 7234 }, { "epoch": 0.7571951857666144, "grad_norm": 1.9315836818382839, "learning_rate": 2.9361265254106997e-06, "loss": 0.9757, "step": 7235 }, { "epoch": 0.7572998430141288, "grad_norm": 2.2173927927463297, "learning_rate": 2.933727600964433e-06, "loss": 0.8727, "step": 7236 }, { "epoch": 0.7574045002616431, "grad_norm": 1.7683552818088055, "learning_rate": 2.9313294884471945e-06, "loss": 0.7764, "step": 7237 }, { "epoch": 0.7575091575091575, "grad_norm": 2.042604009056469, "learning_rate": 2.9289321881345257e-06, "loss": 0.8552, "step": 7238 }, { "epoch": 0.7576138147566719, "grad_norm": 1.963283487176052, "learning_rate": 2.9265357003018836e-06, "loss": 0.8042, "step": 7239 }, { "epoch": 0.7577184720041863, "grad_norm": 2.181411025825987, "learning_rate": 2.924140025224629e-06, "loss": 0.9476, "step": 7240 }, { "epoch": 0.7578231292517007, "grad_norm": 1.7246146896003263, "learning_rate": 2.921745163178026e-06, "loss": 0.7424, "step": 7241 }, { "epoch": 0.7579277864992151, "grad_norm": 1.679941753555536, "learning_rate": 2.9193511144372565e-06, "loss": 0.8579, "step": 7242 }, { "epoch": 0.7580324437467295, "grad_norm": 2.166472002968409, "learning_rate": 2.9169578792773944e-06, "loss": 0.9046, "step": 7243 }, { "epoch": 0.7581371009942438, "grad_norm": 1.6957884419729579, "learning_rate": 2.9145654579734352e-06, "loss": 0.7482, "step": 7244 }, { "epoch": 0.7582417582417582, "grad_norm": 2.2702186393221044, "learning_rate": 2.9121738508002675e-06, "loss": 0.8589, "step": 7245 }, { "epoch": 0.7583464154892726, "grad_norm": 2.1812945897899727, "learning_rate": 2.90978305803269e-06, "loss": 0.7682, "step": 7246 }, { "epoch": 0.758451072736787, "grad_norm": 1.7927929123690813, "learning_rate": 2.9073930799454153e-06, "loss": 0.7735, "step": 7247 }, { "epoch": 0.7585557299843014, "grad_norm": 2.353877525391729, "learning_rate": 2.9050039168130537e-06, "loss": 0.9638, "step": 7248 }, { "epoch": 0.7586603872318158, "grad_norm": 2.653249641565394, "learning_rate": 2.9026155689101256e-06, "loss": 0.8886, "step": 7249 }, { "epoch": 0.7587650444793302, "grad_norm": 2.343195015332429, "learning_rate": 2.9002280365110534e-06, "loss": 0.8646, "step": 7250 }, { "epoch": 0.7588697017268445, "grad_norm": 7.944090802217359, "learning_rate": 2.8978413198901754e-06, "loss": 0.7561, "step": 7251 }, { "epoch": 0.7589743589743589, "grad_norm": 1.7220024064319135, "learning_rate": 2.8954554193217254e-06, "loss": 0.8006, "step": 7252 }, { "epoch": 0.7590790162218733, "grad_norm": 2.230587821004055, "learning_rate": 2.893070335079852e-06, "loss": 0.8161, "step": 7253 }, { "epoch": 0.7591836734693878, "grad_norm": 2.278411767642871, "learning_rate": 2.890686067438605e-06, "loss": 0.8948, "step": 7254 }, { "epoch": 0.7592883307169022, "grad_norm": 2.3002079292626387, "learning_rate": 2.8883026166719374e-06, "loss": 1.0273, "step": 7255 }, { "epoch": 0.7593929879644166, "grad_norm": 2.3979608311717864, "learning_rate": 2.8859199830537188e-06, "loss": 0.8825, "step": 7256 }, { "epoch": 0.759497645211931, "grad_norm": 1.9704760095316236, "learning_rate": 2.8835381668577167e-06, "loss": 0.9408, "step": 7257 }, { "epoch": 0.7596023024594453, "grad_norm": 1.9634553592169977, "learning_rate": 2.8811571683576047e-06, "loss": 0.8909, "step": 7258 }, { "epoch": 0.7597069597069597, "grad_norm": 2.0676146544534997, "learning_rate": 2.8787769878269667e-06, "loss": 0.8999, "step": 7259 }, { "epoch": 0.7598116169544741, "grad_norm": 1.902506916202938, "learning_rate": 2.8763976255392854e-06, "loss": 0.7752, "step": 7260 }, { "epoch": 0.7599162742019885, "grad_norm": 1.784936461204701, "learning_rate": 2.874019081767958e-06, "loss": 0.9145, "step": 7261 }, { "epoch": 0.7600209314495029, "grad_norm": 1.9199931698650219, "learning_rate": 2.8716413567862865e-06, "loss": 0.8145, "step": 7262 }, { "epoch": 0.7601255886970173, "grad_norm": 2.053832292499548, "learning_rate": 2.869264450867475e-06, "loss": 0.8412, "step": 7263 }, { "epoch": 0.7602302459445317, "grad_norm": 1.8965072652709092, "learning_rate": 2.8668883642846325e-06, "loss": 0.8407, "step": 7264 }, { "epoch": 0.760334903192046, "grad_norm": 2.121023584744659, "learning_rate": 2.864513097310775e-06, "loss": 0.9228, "step": 7265 }, { "epoch": 0.7604395604395604, "grad_norm": 2.2573579326522, "learning_rate": 2.8621386502188296e-06, "loss": 0.8228, "step": 7266 }, { "epoch": 0.7605442176870748, "grad_norm": 2.0668406704888103, "learning_rate": 2.8597650232816245e-06, "loss": 0.8929, "step": 7267 }, { "epoch": 0.7606488749345892, "grad_norm": 2.249728883019577, "learning_rate": 2.8573922167718927e-06, "loss": 0.8714, "step": 7268 }, { "epoch": 0.7607535321821036, "grad_norm": 2.140716219541326, "learning_rate": 2.855020230962271e-06, "loss": 0.9412, "step": 7269 }, { "epoch": 0.760858189429618, "grad_norm": 2.313215833515219, "learning_rate": 2.8526490661253104e-06, "loss": 0.9789, "step": 7270 }, { "epoch": 0.7609628466771324, "grad_norm": 2.1190694336129585, "learning_rate": 2.8502787225334636e-06, "loss": 0.8946, "step": 7271 }, { "epoch": 0.7610675039246467, "grad_norm": 2.4208134953494795, "learning_rate": 2.8479092004590857e-06, "loss": 0.7714, "step": 7272 }, { "epoch": 0.7611721611721611, "grad_norm": 2.3515481614362646, "learning_rate": 2.8455405001744397e-06, "loss": 0.7356, "step": 7273 }, { "epoch": 0.7612768184196755, "grad_norm": 1.8723314249073932, "learning_rate": 2.8431726219516896e-06, "loss": 0.7776, "step": 7274 }, { "epoch": 0.7613814756671899, "grad_norm": 1.7706806040241194, "learning_rate": 2.8408055660629185e-06, "loss": 0.8914, "step": 7275 }, { "epoch": 0.7614861329147044, "grad_norm": 1.923104823220645, "learning_rate": 2.8384393327800997e-06, "loss": 0.8769, "step": 7276 }, { "epoch": 0.7615907901622188, "grad_norm": 2.094266643745062, "learning_rate": 2.83607392237512e-06, "loss": 0.9438, "step": 7277 }, { "epoch": 0.7616954474097332, "grad_norm": 1.7758737658741865, "learning_rate": 2.8337093351197664e-06, "loss": 0.8055, "step": 7278 }, { "epoch": 0.7618001046572476, "grad_norm": 2.2135817220161376, "learning_rate": 2.8313455712857408e-06, "loss": 0.8821, "step": 7279 }, { "epoch": 0.7619047619047619, "grad_norm": 1.933667595189823, "learning_rate": 2.828982631144639e-06, "loss": 0.8911, "step": 7280 }, { "epoch": 0.7620094191522763, "grad_norm": 2.5068411421201553, "learning_rate": 2.8266205149679717e-06, "loss": 1.022, "step": 7281 }, { "epoch": 0.7621140763997907, "grad_norm": 2.109661522511011, "learning_rate": 2.8242592230271506e-06, "loss": 0.9163, "step": 7282 }, { "epoch": 0.7622187336473051, "grad_norm": 2.2212961677842493, "learning_rate": 2.821898755593491e-06, "loss": 0.9055, "step": 7283 }, { "epoch": 0.7623233908948195, "grad_norm": 2.4633904814759053, "learning_rate": 2.819539112938212e-06, "loss": 0.7539, "step": 7284 }, { "epoch": 0.7624280481423339, "grad_norm": 1.8332947615386888, "learning_rate": 2.817180295332449e-06, "loss": 0.8608, "step": 7285 }, { "epoch": 0.7625327053898483, "grad_norm": 2.252313473361386, "learning_rate": 2.8148223030472287e-06, "loss": 0.8452, "step": 7286 }, { "epoch": 0.7626373626373626, "grad_norm": 2.430798136578984, "learning_rate": 2.812465136353494e-06, "loss": 0.9796, "step": 7287 }, { "epoch": 0.762742019884877, "grad_norm": 2.145484648346231, "learning_rate": 2.8101087955220864e-06, "loss": 0.9313, "step": 7288 }, { "epoch": 0.7628466771323914, "grad_norm": 2.30890945546967, "learning_rate": 2.8077532808237497e-06, "loss": 0.9836, "step": 7289 }, { "epoch": 0.7629513343799058, "grad_norm": 2.4395138433728456, "learning_rate": 2.805398592529145e-06, "loss": 0.7632, "step": 7290 }, { "epoch": 0.7630559916274202, "grad_norm": 2.1560151415099833, "learning_rate": 2.803044730908826e-06, "loss": 0.938, "step": 7291 }, { "epoch": 0.7631606488749346, "grad_norm": 1.799807577137677, "learning_rate": 2.8006916962332586e-06, "loss": 0.749, "step": 7292 }, { "epoch": 0.763265306122449, "grad_norm": 2.6437825820162133, "learning_rate": 2.7983394887728054e-06, "loss": 0.9678, "step": 7293 }, { "epoch": 0.7633699633699633, "grad_norm": 2.0001658220701786, "learning_rate": 2.795988108797748e-06, "loss": 0.879, "step": 7294 }, { "epoch": 0.7634746206174777, "grad_norm": 2.4220922582417654, "learning_rate": 2.793637556578258e-06, "loss": 0.8539, "step": 7295 }, { "epoch": 0.7635792778649921, "grad_norm": 2.326513581121549, "learning_rate": 2.791287832384424e-06, "loss": 0.9231, "step": 7296 }, { "epoch": 0.7636839351125065, "grad_norm": 2.0582333962241037, "learning_rate": 2.788938936486232e-06, "loss": 0.817, "step": 7297 }, { "epoch": 0.763788592360021, "grad_norm": 1.964193500525283, "learning_rate": 2.7865908691535746e-06, "loss": 0.7875, "step": 7298 }, { "epoch": 0.7638932496075354, "grad_norm": 1.9120684932559597, "learning_rate": 2.784243630656247e-06, "loss": 0.9067, "step": 7299 }, { "epoch": 0.7639979068550498, "grad_norm": 2.203369880119307, "learning_rate": 2.781897221263956e-06, "loss": 0.9302, "step": 7300 }, { "epoch": 0.764102564102564, "grad_norm": 1.8664131554131014, "learning_rate": 2.7795516412463077e-06, "loss": 0.8078, "step": 7301 }, { "epoch": 0.7642072213500785, "grad_norm": 2.152251063216064, "learning_rate": 2.7772068908728133e-06, "loss": 0.8731, "step": 7302 }, { "epoch": 0.7643118785975929, "grad_norm": 1.9704503123853456, "learning_rate": 2.774862970412886e-06, "loss": 0.9309, "step": 7303 }, { "epoch": 0.7644165358451073, "grad_norm": 2.217027087214899, "learning_rate": 2.77251988013585e-06, "loss": 0.9636, "step": 7304 }, { "epoch": 0.7645211930926217, "grad_norm": 2.4945137079215836, "learning_rate": 2.7701776203109342e-06, "loss": 0.9788, "step": 7305 }, { "epoch": 0.7646258503401361, "grad_norm": 1.979000751747689, "learning_rate": 2.767836191207267e-06, "loss": 0.9692, "step": 7306 }, { "epoch": 0.7647305075876505, "grad_norm": 1.9320377725750433, "learning_rate": 2.765495593093882e-06, "loss": 0.8792, "step": 7307 }, { "epoch": 0.7648351648351648, "grad_norm": 1.661537117127484, "learning_rate": 2.7631558262397164e-06, "loss": 0.7901, "step": 7308 }, { "epoch": 0.7649398220826792, "grad_norm": 2.066054899073813, "learning_rate": 2.7608168909136203e-06, "loss": 0.789, "step": 7309 }, { "epoch": 0.7650444793301936, "grad_norm": 2.309887605904426, "learning_rate": 2.7584787873843376e-06, "loss": 0.9919, "step": 7310 }, { "epoch": 0.765149136577708, "grad_norm": 2.219400510624547, "learning_rate": 2.756141515920524e-06, "loss": 0.8606, "step": 7311 }, { "epoch": 0.7652537938252224, "grad_norm": 2.1735797860738977, "learning_rate": 2.753805076790731e-06, "loss": 0.854, "step": 7312 }, { "epoch": 0.7653584510727368, "grad_norm": 2.132548975039601, "learning_rate": 2.7514694702634237e-06, "loss": 0.8229, "step": 7313 }, { "epoch": 0.7654631083202512, "grad_norm": 2.2324206156113777, "learning_rate": 2.7491346966069734e-06, "loss": 0.8859, "step": 7314 }, { "epoch": 0.7655677655677655, "grad_norm": 2.0237282392424114, "learning_rate": 2.7468007560896435e-06, "loss": 0.817, "step": 7315 }, { "epoch": 0.7656724228152799, "grad_norm": 1.8891116758472233, "learning_rate": 2.744467648979612e-06, "loss": 0.8634, "step": 7316 }, { "epoch": 0.7657770800627943, "grad_norm": 1.9422661279068079, "learning_rate": 2.7421353755449552e-06, "loss": 0.9092, "step": 7317 }, { "epoch": 0.7658817373103087, "grad_norm": 2.2850690583866644, "learning_rate": 2.7398039360536543e-06, "loss": 0.8976, "step": 7318 }, { "epoch": 0.7659863945578231, "grad_norm": 2.0211359219333467, "learning_rate": 2.7374733307736025e-06, "loss": 0.8664, "step": 7319 }, { "epoch": 0.7660910518053375, "grad_norm": 1.8583554118941399, "learning_rate": 2.7351435599725874e-06, "loss": 0.8905, "step": 7320 }, { "epoch": 0.766195709052852, "grad_norm": 1.9734674152504228, "learning_rate": 2.7328146239183007e-06, "loss": 0.8991, "step": 7321 }, { "epoch": 0.7663003663003664, "grad_norm": 1.7218591246248722, "learning_rate": 2.7304865228783507e-06, "loss": 0.8628, "step": 7322 }, { "epoch": 0.7664050235478806, "grad_norm": 1.7601071260258068, "learning_rate": 2.7281592571202307e-06, "loss": 0.8435, "step": 7323 }, { "epoch": 0.766509680795395, "grad_norm": 2.5384380504587583, "learning_rate": 2.725832826911359e-06, "loss": 0.9552, "step": 7324 }, { "epoch": 0.7666143380429095, "grad_norm": 1.9897641468927232, "learning_rate": 2.7235072325190404e-06, "loss": 0.8246, "step": 7325 }, { "epoch": 0.7667189952904239, "grad_norm": 2.18530144040617, "learning_rate": 2.721182474210492e-06, "loss": 1.0168, "step": 7326 }, { "epoch": 0.7668236525379383, "grad_norm": 1.9476014865880453, "learning_rate": 2.71885855225283e-06, "loss": 0.8297, "step": 7327 }, { "epoch": 0.7669283097854527, "grad_norm": 2.0184306914039083, "learning_rate": 2.716535466913084e-06, "loss": 0.8521, "step": 7328 }, { "epoch": 0.7670329670329671, "grad_norm": 2.1006322169829144, "learning_rate": 2.714213218458178e-06, "loss": 0.7977, "step": 7329 }, { "epoch": 0.7671376242804814, "grad_norm": 2.0262143165867337, "learning_rate": 2.7118918071549395e-06, "loss": 0.8349, "step": 7330 }, { "epoch": 0.7672422815279958, "grad_norm": 2.269865382008233, "learning_rate": 2.7095712332701108e-06, "loss": 0.9142, "step": 7331 }, { "epoch": 0.7673469387755102, "grad_norm": 2.3038111471769858, "learning_rate": 2.7072514970703224e-06, "loss": 0.9053, "step": 7332 }, { "epoch": 0.7674515960230246, "grad_norm": 1.936577382811956, "learning_rate": 2.7049325988221255e-06, "loss": 0.8464, "step": 7333 }, { "epoch": 0.767556253270539, "grad_norm": 2.5043378818478494, "learning_rate": 2.70261453879196e-06, "loss": 0.7622, "step": 7334 }, { "epoch": 0.7676609105180534, "grad_norm": 2.0270141293438635, "learning_rate": 2.7002973172461775e-06, "loss": 0.8564, "step": 7335 }, { "epoch": 0.7677655677655678, "grad_norm": 2.238914386289856, "learning_rate": 2.6979809344510323e-06, "loss": 0.949, "step": 7336 }, { "epoch": 0.7678702250130821, "grad_norm": 2.112977680901526, "learning_rate": 2.695665390672677e-06, "loss": 0.8794, "step": 7337 }, { "epoch": 0.7679748822605965, "grad_norm": 1.7578779088455374, "learning_rate": 2.6933506861771764e-06, "loss": 0.9656, "step": 7338 }, { "epoch": 0.7680795395081109, "grad_norm": 2.025671558545498, "learning_rate": 2.691036821230496e-06, "loss": 0.8891, "step": 7339 }, { "epoch": 0.7681841967556253, "grad_norm": 2.2330212723105913, "learning_rate": 2.6887237960985024e-06, "loss": 0.9527, "step": 7340 }, { "epoch": 0.7682888540031397, "grad_norm": 2.312102639631419, "learning_rate": 2.6864116110469664e-06, "loss": 0.9365, "step": 7341 }, { "epoch": 0.7683935112506541, "grad_norm": 2.2905403470328847, "learning_rate": 2.684100266341558e-06, "loss": 0.9652, "step": 7342 }, { "epoch": 0.7684981684981685, "grad_norm": 1.7444191920341985, "learning_rate": 2.681789762247864e-06, "loss": 0.7956, "step": 7343 }, { "epoch": 0.7686028257456828, "grad_norm": 2.125810203110687, "learning_rate": 2.6794800990313617e-06, "loss": 0.8981, "step": 7344 }, { "epoch": 0.7687074829931972, "grad_norm": 2.0640436363199584, "learning_rate": 2.6771712769574364e-06, "loss": 0.9515, "step": 7345 }, { "epoch": 0.7688121402407117, "grad_norm": 2.2540317780667283, "learning_rate": 2.6748632962913732e-06, "loss": 0.9397, "step": 7346 }, { "epoch": 0.7689167974882261, "grad_norm": 2.2939707078082434, "learning_rate": 2.672556157298367e-06, "loss": 0.873, "step": 7347 }, { "epoch": 0.7690214547357405, "grad_norm": 2.0271839169120343, "learning_rate": 2.6702498602435155e-06, "loss": 0.9098, "step": 7348 }, { "epoch": 0.7691261119832549, "grad_norm": 2.194903694850483, "learning_rate": 2.6679444053918137e-06, "loss": 0.7982, "step": 7349 }, { "epoch": 0.7692307692307693, "grad_norm": 1.8945213031067587, "learning_rate": 2.6656397930081635e-06, "loss": 0.9307, "step": 7350 }, { "epoch": 0.7693354264782836, "grad_norm": 1.9063057362434497, "learning_rate": 2.6633360233573656e-06, "loss": 0.9288, "step": 7351 }, { "epoch": 0.769440083725798, "grad_norm": 2.318201908671572, "learning_rate": 2.6610330967041366e-06, "loss": 0.9832, "step": 7352 }, { "epoch": 0.7695447409733124, "grad_norm": 2.087584884442611, "learning_rate": 2.6587310133130805e-06, "loss": 0.8781, "step": 7353 }, { "epoch": 0.7696493982208268, "grad_norm": 1.8841301045621537, "learning_rate": 2.6564297734487144e-06, "loss": 0.7486, "step": 7354 }, { "epoch": 0.7697540554683412, "grad_norm": 2.048554104308666, "learning_rate": 2.654129377375454e-06, "loss": 0.7509, "step": 7355 }, { "epoch": 0.7698587127158556, "grad_norm": 1.9448133038229751, "learning_rate": 2.6518298253576167e-06, "loss": 0.8034, "step": 7356 }, { "epoch": 0.76996336996337, "grad_norm": 2.3334382875645465, "learning_rate": 2.6495311176594286e-06, "loss": 1.0057, "step": 7357 }, { "epoch": 0.7700680272108843, "grad_norm": 2.0609258709754203, "learning_rate": 2.6472332545450195e-06, "loss": 0.9352, "step": 7358 }, { "epoch": 0.7701726844583987, "grad_norm": 2.043836151919123, "learning_rate": 2.6449362362784147e-06, "loss": 0.8086, "step": 7359 }, { "epoch": 0.7702773417059131, "grad_norm": 1.9222038165954007, "learning_rate": 2.642640063123546e-06, "loss": 0.8762, "step": 7360 }, { "epoch": 0.7703819989534275, "grad_norm": 2.272474659914367, "learning_rate": 2.640344735344247e-06, "loss": 0.887, "step": 7361 }, { "epoch": 0.7704866562009419, "grad_norm": 2.0559426832557066, "learning_rate": 2.6380502532042608e-06, "loss": 0.8748, "step": 7362 }, { "epoch": 0.7705913134484563, "grad_norm": 1.9381978766078287, "learning_rate": 2.635756616967223e-06, "loss": 0.8543, "step": 7363 }, { "epoch": 0.7706959706959707, "grad_norm": 1.885340263704655, "learning_rate": 2.6334638268966773e-06, "loss": 0.8073, "step": 7364 }, { "epoch": 0.7708006279434851, "grad_norm": 1.939561768160846, "learning_rate": 2.631171883256074e-06, "loss": 0.8754, "step": 7365 }, { "epoch": 0.7709052851909994, "grad_norm": 1.7916271930244994, "learning_rate": 2.628880786308756e-06, "loss": 0.9365, "step": 7366 }, { "epoch": 0.7710099424385138, "grad_norm": 1.9214226616582117, "learning_rate": 2.626590536317982e-06, "loss": 0.9126, "step": 7367 }, { "epoch": 0.7711145996860282, "grad_norm": 1.6201683681441432, "learning_rate": 2.6243011335469027e-06, "loss": 0.7946, "step": 7368 }, { "epoch": 0.7712192569335427, "grad_norm": 2.0135703962696954, "learning_rate": 2.622012578258576e-06, "loss": 0.9228, "step": 7369 }, { "epoch": 0.7713239141810571, "grad_norm": 2.1109014783062747, "learning_rate": 2.6197248707159575e-06, "loss": 0.8835, "step": 7370 }, { "epoch": 0.7714285714285715, "grad_norm": 2.004035053535731, "learning_rate": 2.6174380111819144e-06, "loss": 0.8671, "step": 7371 }, { "epoch": 0.7715332286760859, "grad_norm": 2.027444223995198, "learning_rate": 2.615151999919211e-06, "loss": 0.9357, "step": 7372 }, { "epoch": 0.7716378859236002, "grad_norm": 1.9959569059145792, "learning_rate": 2.61286683719051e-06, "loss": 0.9009, "step": 7373 }, { "epoch": 0.7717425431711146, "grad_norm": 1.9792281441894708, "learning_rate": 2.610582523258388e-06, "loss": 0.8614, "step": 7374 }, { "epoch": 0.771847200418629, "grad_norm": 1.983946749775523, "learning_rate": 2.608299058385314e-06, "loss": 0.8806, "step": 7375 }, { "epoch": 0.7719518576661434, "grad_norm": 1.9428206558603256, "learning_rate": 2.60601644283366e-06, "loss": 0.8755, "step": 7376 }, { "epoch": 0.7720565149136578, "grad_norm": 2.0749303955604703, "learning_rate": 2.603734676865708e-06, "loss": 0.8801, "step": 7377 }, { "epoch": 0.7721611721611722, "grad_norm": 1.9352525690019882, "learning_rate": 2.6014537607436365e-06, "loss": 0.9563, "step": 7378 }, { "epoch": 0.7722658294086866, "grad_norm": 2.0793047689529844, "learning_rate": 2.5991736947295254e-06, "loss": 0.8347, "step": 7379 }, { "epoch": 0.7723704866562009, "grad_norm": 2.293524394942549, "learning_rate": 2.596894479085357e-06, "loss": 0.8757, "step": 7380 }, { "epoch": 0.7724751439037153, "grad_norm": 1.751731557615987, "learning_rate": 2.5946161140730243e-06, "loss": 0.8043, "step": 7381 }, { "epoch": 0.7725798011512297, "grad_norm": 2.25419896408031, "learning_rate": 2.592338599954308e-06, "loss": 0.7497, "step": 7382 }, { "epoch": 0.7726844583987441, "grad_norm": 2.0024558583214342, "learning_rate": 2.5900619369909074e-06, "loss": 0.8527, "step": 7383 }, { "epoch": 0.7727891156462585, "grad_norm": 2.0969280155721552, "learning_rate": 2.5877861254444115e-06, "loss": 0.7273, "step": 7384 }, { "epoch": 0.7728937728937729, "grad_norm": 2.1227745715824717, "learning_rate": 2.5855111655763134e-06, "loss": 0.86, "step": 7385 }, { "epoch": 0.7729984301412873, "grad_norm": 1.8610864043360293, "learning_rate": 2.583237057648016e-06, "loss": 0.8691, "step": 7386 }, { "epoch": 0.7731030873888016, "grad_norm": 2.1425965300073884, "learning_rate": 2.580963801920816e-06, "loss": 0.8999, "step": 7387 }, { "epoch": 0.773207744636316, "grad_norm": 2.042623189976076, "learning_rate": 2.5786913986559147e-06, "loss": 0.8993, "step": 7388 }, { "epoch": 0.7733124018838304, "grad_norm": 1.9775430060536299, "learning_rate": 2.5764198481144164e-06, "loss": 0.9127, "step": 7389 }, { "epoch": 0.7734170591313448, "grad_norm": 2.000333442979551, "learning_rate": 2.5741491505573213e-06, "loss": 0.9102, "step": 7390 }, { "epoch": 0.7735217163788592, "grad_norm": 1.9614405240639192, "learning_rate": 2.5718793062455472e-06, "loss": 0.8835, "step": 7391 }, { "epoch": 0.7736263736263737, "grad_norm": 2.4448514916772806, "learning_rate": 2.5696103154399e-06, "loss": 0.9974, "step": 7392 }, { "epoch": 0.7737310308738881, "grad_norm": 1.9778536256057575, "learning_rate": 2.567342178401091e-06, "loss": 0.8884, "step": 7393 }, { "epoch": 0.7738356881214024, "grad_norm": 1.965676707722803, "learning_rate": 2.565074895389733e-06, "loss": 0.8692, "step": 7394 }, { "epoch": 0.7739403453689168, "grad_norm": 2.1903706642179195, "learning_rate": 2.562808466666338e-06, "loss": 0.8845, "step": 7395 }, { "epoch": 0.7740450026164312, "grad_norm": 1.9841184192672507, "learning_rate": 2.5605428924913312e-06, "loss": 0.8959, "step": 7396 }, { "epoch": 0.7741496598639456, "grad_norm": 2.169747156193136, "learning_rate": 2.558278173125026e-06, "loss": 0.8318, "step": 7397 }, { "epoch": 0.77425431711146, "grad_norm": 2.742205301494219, "learning_rate": 2.5560143088276456e-06, "loss": 0.9708, "step": 7398 }, { "epoch": 0.7743589743589744, "grad_norm": 2.1736256054455145, "learning_rate": 2.553751299859308e-06, "loss": 0.8634, "step": 7399 }, { "epoch": 0.7744636316064888, "grad_norm": 2.0298243788952792, "learning_rate": 2.551489146480042e-06, "loss": 0.98, "step": 7400 }, { "epoch": 0.7745682888540032, "grad_norm": 1.8171963930524047, "learning_rate": 2.5492278489497745e-06, "loss": 0.8868, "step": 7401 }, { "epoch": 0.7746729461015175, "grad_norm": 1.8750562250635816, "learning_rate": 2.5469674075283325e-06, "loss": 0.8463, "step": 7402 }, { "epoch": 0.7747776033490319, "grad_norm": 2.0170705567329934, "learning_rate": 2.544707822475444e-06, "loss": 0.8602, "step": 7403 }, { "epoch": 0.7748822605965463, "grad_norm": 2.0889797065511657, "learning_rate": 2.5424490940507373e-06, "loss": 0.8067, "step": 7404 }, { "epoch": 0.7749869178440607, "grad_norm": 2.084921965591833, "learning_rate": 2.5401912225137504e-06, "loss": 0.8538, "step": 7405 }, { "epoch": 0.7750915750915751, "grad_norm": 1.999119792459058, "learning_rate": 2.5379342081239157e-06, "loss": 0.954, "step": 7406 }, { "epoch": 0.7751962323390895, "grad_norm": 1.9723776486252687, "learning_rate": 2.5356780511405666e-06, "loss": 0.8641, "step": 7407 }, { "epoch": 0.7753008895866039, "grad_norm": 2.07024245752328, "learning_rate": 2.533422751822938e-06, "loss": 0.8942, "step": 7408 }, { "epoch": 0.7754055468341182, "grad_norm": 1.8764528707975676, "learning_rate": 2.531168310430172e-06, "loss": 0.9478, "step": 7409 }, { "epoch": 0.7755102040816326, "grad_norm": 1.8645837015695488, "learning_rate": 2.528914727221311e-06, "loss": 0.8407, "step": 7410 }, { "epoch": 0.775614861329147, "grad_norm": 2.172900648266963, "learning_rate": 2.5266620024552937e-06, "loss": 0.856, "step": 7411 }, { "epoch": 0.7757195185766614, "grad_norm": 2.0753861210504425, "learning_rate": 2.524410136390961e-06, "loss": 0.9372, "step": 7412 }, { "epoch": 0.7758241758241758, "grad_norm": 2.0279346463791588, "learning_rate": 2.5221591292870595e-06, "loss": 0.8748, "step": 7413 }, { "epoch": 0.7759288330716902, "grad_norm": 1.9053518322080332, "learning_rate": 2.5199089814022293e-06, "loss": 0.846, "step": 7414 }, { "epoch": 0.7760334903192047, "grad_norm": 1.951751195516781, "learning_rate": 2.517659692995025e-06, "loss": 0.7616, "step": 7415 }, { "epoch": 0.776138147566719, "grad_norm": 2.258508279860391, "learning_rate": 2.515411264323887e-06, "loss": 0.9151, "step": 7416 }, { "epoch": 0.7762428048142334, "grad_norm": 2.318867938802657, "learning_rate": 2.5131636956471696e-06, "loss": 0.8769, "step": 7417 }, { "epoch": 0.7763474620617478, "grad_norm": 1.867705727278113, "learning_rate": 2.510916987223122e-06, "loss": 0.9135, "step": 7418 }, { "epoch": 0.7764521193092622, "grad_norm": 2.100130971915618, "learning_rate": 2.5086711393098906e-06, "loss": 0.85, "step": 7419 }, { "epoch": 0.7765567765567766, "grad_norm": 2.0579255004196852, "learning_rate": 2.5064261521655355e-06, "loss": 0.8838, "step": 7420 }, { "epoch": 0.776661433804291, "grad_norm": 2.0359736419827033, "learning_rate": 2.5041820260480063e-06, "loss": 0.891, "step": 7421 }, { "epoch": 0.7767660910518054, "grad_norm": 2.007394652518043, "learning_rate": 2.501938761215158e-06, "loss": 0.8018, "step": 7422 }, { "epoch": 0.7768707482993197, "grad_norm": 2.309114884247499, "learning_rate": 2.4996963579247433e-06, "loss": 0.8789, "step": 7423 }, { "epoch": 0.7769754055468341, "grad_norm": 2.1276331305118017, "learning_rate": 2.497454816434425e-06, "loss": 0.9842, "step": 7424 }, { "epoch": 0.7770800627943485, "grad_norm": 2.0491657489397985, "learning_rate": 2.4952141370017536e-06, "loss": 0.8562, "step": 7425 }, { "epoch": 0.7771847200418629, "grad_norm": 1.780804392760166, "learning_rate": 2.492974319884196e-06, "loss": 0.8353, "step": 7426 }, { "epoch": 0.7772893772893773, "grad_norm": 2.829423926147118, "learning_rate": 2.4907353653391062e-06, "loss": 0.8086, "step": 7427 }, { "epoch": 0.7773940345368917, "grad_norm": 2.1527688872034516, "learning_rate": 2.4884972736237433e-06, "loss": 0.8685, "step": 7428 }, { "epoch": 0.7774986917844061, "grad_norm": 1.7756364808096, "learning_rate": 2.4862600449952744e-06, "loss": 0.8753, "step": 7429 }, { "epoch": 0.7776033490319204, "grad_norm": 2.193742164532645, "learning_rate": 2.4840236797107575e-06, "loss": 0.7831, "step": 7430 }, { "epoch": 0.7777080062794348, "grad_norm": 2.415166469626955, "learning_rate": 2.481788178027157e-06, "loss": 0.8495, "step": 7431 }, { "epoch": 0.7778126635269492, "grad_norm": 2.2290675901663652, "learning_rate": 2.479553540201335e-06, "loss": 0.9439, "step": 7432 }, { "epoch": 0.7779173207744636, "grad_norm": 2.2829886391415255, "learning_rate": 2.4773197664900538e-06, "loss": 1.0281, "step": 7433 }, { "epoch": 0.778021978021978, "grad_norm": 2.142967693368868, "learning_rate": 2.475086857149982e-06, "loss": 0.9277, "step": 7434 }, { "epoch": 0.7781266352694924, "grad_norm": 2.1352013664869403, "learning_rate": 2.4728548124376882e-06, "loss": 1.0059, "step": 7435 }, { "epoch": 0.7782312925170068, "grad_norm": 2.1208486875780537, "learning_rate": 2.4706236326096357e-06, "loss": 0.9999, "step": 7436 }, { "epoch": 0.7783359497645211, "grad_norm": 2.351231832444182, "learning_rate": 2.4683933179221908e-06, "loss": 0.9088, "step": 7437 }, { "epoch": 0.7784406070120355, "grad_norm": 2.612668770820266, "learning_rate": 2.4661638686316193e-06, "loss": 0.8534, "step": 7438 }, { "epoch": 0.77854526425955, "grad_norm": 1.9406158005630711, "learning_rate": 2.4639352849940947e-06, "loss": 0.8145, "step": 7439 }, { "epoch": 0.7786499215070644, "grad_norm": 2.000613163110507, "learning_rate": 2.461707567265682e-06, "loss": 0.8868, "step": 7440 }, { "epoch": 0.7787545787545788, "grad_norm": 2.316740372445306, "learning_rate": 2.4594807157023525e-06, "loss": 0.8466, "step": 7441 }, { "epoch": 0.7788592360020932, "grad_norm": 1.9850899017077899, "learning_rate": 2.4572547305599727e-06, "loss": 0.9479, "step": 7442 }, { "epoch": 0.7789638932496076, "grad_norm": 2.2991940584000825, "learning_rate": 2.455029612094314e-06, "loss": 0.8785, "step": 7443 }, { "epoch": 0.779068550497122, "grad_norm": 2.0602886300262164, "learning_rate": 2.4528053605610513e-06, "loss": 0.9742, "step": 7444 }, { "epoch": 0.7791732077446363, "grad_norm": 1.7206711855095842, "learning_rate": 2.4505819762157534e-06, "loss": 0.7868, "step": 7445 }, { "epoch": 0.7792778649921507, "grad_norm": 2.1321280190827805, "learning_rate": 2.4483594593138894e-06, "loss": 0.9418, "step": 7446 }, { "epoch": 0.7793825222396651, "grad_norm": 2.2378800662787888, "learning_rate": 2.4461378101108333e-06, "loss": 1.0102, "step": 7447 }, { "epoch": 0.7794871794871795, "grad_norm": 1.891205894491629, "learning_rate": 2.443917028861853e-06, "loss": 0.8933, "step": 7448 }, { "epoch": 0.7795918367346939, "grad_norm": 2.21558145359656, "learning_rate": 2.441697115822126e-06, "loss": 0.878, "step": 7449 }, { "epoch": 0.7796964939822083, "grad_norm": 2.072454113701976, "learning_rate": 2.4394780712467236e-06, "loss": 0.7732, "step": 7450 }, { "epoch": 0.7798011512297227, "grad_norm": 2.10593743081749, "learning_rate": 2.4372598953906133e-06, "loss": 0.768, "step": 7451 }, { "epoch": 0.779905808477237, "grad_norm": 1.9053195631944175, "learning_rate": 2.435042588508677e-06, "loss": 0.8531, "step": 7452 }, { "epoch": 0.7800104657247514, "grad_norm": 1.7533274864047492, "learning_rate": 2.4328261508556795e-06, "loss": 0.9297, "step": 7453 }, { "epoch": 0.7801151229722658, "grad_norm": 2.074809239844302, "learning_rate": 2.4306105826862993e-06, "loss": 0.8273, "step": 7454 }, { "epoch": 0.7802197802197802, "grad_norm": 2.125111767929294, "learning_rate": 2.428395884255109e-06, "loss": 0.7983, "step": 7455 }, { "epoch": 0.7803244374672946, "grad_norm": 2.1801053470480323, "learning_rate": 2.4261820558165804e-06, "loss": 0.858, "step": 7456 }, { "epoch": 0.780429094714809, "grad_norm": 1.8010107947795544, "learning_rate": 2.4239690976250854e-06, "loss": 0.7268, "step": 7457 }, { "epoch": 0.7805337519623234, "grad_norm": 2.26916467402209, "learning_rate": 2.421757009934901e-06, "loss": 0.8247, "step": 7458 }, { "epoch": 0.7806384092098377, "grad_norm": 1.9033771203519017, "learning_rate": 2.4195457930001998e-06, "loss": 0.7597, "step": 7459 }, { "epoch": 0.7807430664573521, "grad_norm": 1.9507378053947775, "learning_rate": 2.417335447075051e-06, "loss": 0.8733, "step": 7460 }, { "epoch": 0.7808477237048665, "grad_norm": 2.099170592286283, "learning_rate": 2.4151259724134336e-06, "loss": 0.824, "step": 7461 }, { "epoch": 0.780952380952381, "grad_norm": 2.227444898261774, "learning_rate": 2.4129173692692155e-06, "loss": 0.8599, "step": 7462 }, { "epoch": 0.7810570381998954, "grad_norm": 1.9903419632586217, "learning_rate": 2.4107096378961746e-06, "loss": 0.9733, "step": 7463 }, { "epoch": 0.7811616954474098, "grad_norm": 2.037233493155889, "learning_rate": 2.408502778547982e-06, "loss": 0.7757, "step": 7464 }, { "epoch": 0.7812663526949242, "grad_norm": 2.0708333939156134, "learning_rate": 2.4062967914782086e-06, "loss": 0.8146, "step": 7465 }, { "epoch": 0.7813710099424385, "grad_norm": 2.0344131802341976, "learning_rate": 2.404091676940329e-06, "loss": 0.9128, "step": 7466 }, { "epoch": 0.7814756671899529, "grad_norm": 2.032647250442873, "learning_rate": 2.401887435187712e-06, "loss": 0.9985, "step": 7467 }, { "epoch": 0.7815803244374673, "grad_norm": 2.431120027142936, "learning_rate": 2.39968406647363e-06, "loss": 0.8236, "step": 7468 }, { "epoch": 0.7816849816849817, "grad_norm": 2.1068090556259738, "learning_rate": 2.39748157105126e-06, "loss": 0.9063, "step": 7469 }, { "epoch": 0.7817896389324961, "grad_norm": 2.234104108726407, "learning_rate": 2.39527994917367e-06, "loss": 0.9853, "step": 7470 }, { "epoch": 0.7818942961800105, "grad_norm": 2.115693775219522, "learning_rate": 2.3930792010938286e-06, "loss": 0.9584, "step": 7471 }, { "epoch": 0.7819989534275249, "grad_norm": 2.173122868655898, "learning_rate": 2.3908793270646057e-06, "loss": 0.8623, "step": 7472 }, { "epoch": 0.7821036106750392, "grad_norm": 2.158065559668168, "learning_rate": 2.388680327338777e-06, "loss": 0.9185, "step": 7473 }, { "epoch": 0.7822082679225536, "grad_norm": 2.4956821194494454, "learning_rate": 2.386482202169008e-06, "loss": 1.0272, "step": 7474 }, { "epoch": 0.782312925170068, "grad_norm": 2.0104495991381888, "learning_rate": 2.384284951807868e-06, "loss": 0.8998, "step": 7475 }, { "epoch": 0.7824175824175824, "grad_norm": 1.8416688486042285, "learning_rate": 2.3820885765078226e-06, "loss": 0.8212, "step": 7476 }, { "epoch": 0.7825222396650968, "grad_norm": 2.302585517865405, "learning_rate": 2.3798930765212435e-06, "loss": 0.9272, "step": 7477 }, { "epoch": 0.7826268969126112, "grad_norm": 2.13934576419671, "learning_rate": 2.3776984521003997e-06, "loss": 0.7576, "step": 7478 }, { "epoch": 0.7827315541601256, "grad_norm": 2.136647066906963, "learning_rate": 2.3755047034974567e-06, "loss": 0.9061, "step": 7479 }, { "epoch": 0.7828362114076399, "grad_norm": 1.8057322334613368, "learning_rate": 2.373311830964479e-06, "loss": 0.8824, "step": 7480 }, { "epoch": 0.7829408686551543, "grad_norm": 1.9635915461888118, "learning_rate": 2.3711198347534305e-06, "loss": 0.8677, "step": 7481 }, { "epoch": 0.7830455259026687, "grad_norm": 2.1726928757916673, "learning_rate": 2.36892871511618e-06, "loss": 0.8336, "step": 7482 }, { "epoch": 0.7831501831501831, "grad_norm": 2.002270471261225, "learning_rate": 2.3667384723044918e-06, "loss": 0.923, "step": 7483 }, { "epoch": 0.7832548403976975, "grad_norm": 2.0664098657994883, "learning_rate": 2.3645491065700257e-06, "loss": 0.8348, "step": 7484 }, { "epoch": 0.783359497645212, "grad_norm": 2.2593815059893734, "learning_rate": 2.3623606181643465e-06, "loss": 0.9542, "step": 7485 }, { "epoch": 0.7834641548927264, "grad_norm": 2.025749143560053, "learning_rate": 2.3601730073389096e-06, "loss": 0.9224, "step": 7486 }, { "epoch": 0.7835688121402408, "grad_norm": 2.0959469108319047, "learning_rate": 2.3579862743450877e-06, "loss": 0.8847, "step": 7487 }, { "epoch": 0.7836734693877551, "grad_norm": 2.2588339335795173, "learning_rate": 2.3558004194341343e-06, "loss": 0.8129, "step": 7488 }, { "epoch": 0.7837781266352695, "grad_norm": 1.9031430821916164, "learning_rate": 2.35361544285721e-06, "loss": 0.8896, "step": 7489 }, { "epoch": 0.7838827838827839, "grad_norm": 2.2458748833211506, "learning_rate": 2.3514313448653715e-06, "loss": 0.965, "step": 7490 }, { "epoch": 0.7839874411302983, "grad_norm": 1.9843366337068091, "learning_rate": 2.3492481257095746e-06, "loss": 0.9045, "step": 7491 }, { "epoch": 0.7840920983778127, "grad_norm": 2.3071137715563186, "learning_rate": 2.34706578564068e-06, "loss": 0.8596, "step": 7492 }, { "epoch": 0.7841967556253271, "grad_norm": 2.1993251510987437, "learning_rate": 2.34488432490944e-06, "loss": 1.0281, "step": 7493 }, { "epoch": 0.7843014128728415, "grad_norm": 1.767074435073521, "learning_rate": 2.342703743766508e-06, "loss": 0.9149, "step": 7494 }, { "epoch": 0.7844060701203558, "grad_norm": 1.9658161545730846, "learning_rate": 2.340524042462441e-06, "loss": 0.8962, "step": 7495 }, { "epoch": 0.7845107273678702, "grad_norm": 2.1705849889367888, "learning_rate": 2.3383452212476866e-06, "loss": 0.9249, "step": 7496 }, { "epoch": 0.7846153846153846, "grad_norm": 2.0783706544712217, "learning_rate": 2.3361672803725997e-06, "loss": 0.972, "step": 7497 }, { "epoch": 0.784720041862899, "grad_norm": 1.8899752350082029, "learning_rate": 2.33399022008743e-06, "loss": 0.8817, "step": 7498 }, { "epoch": 0.7848246991104134, "grad_norm": 2.5517986810451783, "learning_rate": 2.331814040642324e-06, "loss": 0.8759, "step": 7499 }, { "epoch": 0.7849293563579278, "grad_norm": 1.8563647336761964, "learning_rate": 2.329638742287327e-06, "loss": 0.8498, "step": 7500 }, { "epoch": 0.7850340136054422, "grad_norm": 1.7691375881118099, "learning_rate": 2.3274643252723904e-06, "loss": 0.7296, "step": 7501 }, { "epoch": 0.7851386708529565, "grad_norm": 2.1783402785198667, "learning_rate": 2.3252907898473564e-06, "loss": 0.8703, "step": 7502 }, { "epoch": 0.7852433281004709, "grad_norm": 2.205810214469163, "learning_rate": 2.323118136261967e-06, "loss": 0.9018, "step": 7503 }, { "epoch": 0.7853479853479853, "grad_norm": 1.8545931677630307, "learning_rate": 2.320946364765869e-06, "loss": 0.8532, "step": 7504 }, { "epoch": 0.7854526425954997, "grad_norm": 2.0691817245860333, "learning_rate": 2.3187754756086e-06, "loss": 0.9241, "step": 7505 }, { "epoch": 0.7855572998430141, "grad_norm": 1.830258047422379, "learning_rate": 2.316605469039599e-06, "loss": 0.777, "step": 7506 }, { "epoch": 0.7856619570905286, "grad_norm": 2.1983345444045077, "learning_rate": 2.3144363453082076e-06, "loss": 0.9011, "step": 7507 }, { "epoch": 0.785766614338043, "grad_norm": 2.1342406030341867, "learning_rate": 2.31226810466366e-06, "loss": 0.9166, "step": 7508 }, { "epoch": 0.7858712715855573, "grad_norm": 2.1953438695436276, "learning_rate": 2.310100747355092e-06, "loss": 0.9392, "step": 7509 }, { "epoch": 0.7859759288330717, "grad_norm": 2.0745384468715766, "learning_rate": 2.3079342736315335e-06, "loss": 0.85, "step": 7510 }, { "epoch": 0.7860805860805861, "grad_norm": 2.0531247089902953, "learning_rate": 2.3057686837419246e-06, "loss": 0.9608, "step": 7511 }, { "epoch": 0.7861852433281005, "grad_norm": 2.0646468979200194, "learning_rate": 2.303603977935088e-06, "loss": 0.8047, "step": 7512 }, { "epoch": 0.7862899005756149, "grad_norm": 2.232605413909429, "learning_rate": 2.30144015645976e-06, "loss": 0.8882, "step": 7513 }, { "epoch": 0.7863945578231293, "grad_norm": 1.9565581752784238, "learning_rate": 2.299277219564565e-06, "loss": 0.8375, "step": 7514 }, { "epoch": 0.7864992150706437, "grad_norm": 2.0031871819667497, "learning_rate": 2.2971151674980254e-06, "loss": 0.8445, "step": 7515 }, { "epoch": 0.786603872318158, "grad_norm": 2.222141146594989, "learning_rate": 2.29495400050857e-06, "loss": 0.9172, "step": 7516 }, { "epoch": 0.7867085295656724, "grad_norm": 1.9729975164234717, "learning_rate": 2.2927937188445217e-06, "loss": 0.9044, "step": 7517 }, { "epoch": 0.7868131868131868, "grad_norm": 1.8762093594300924, "learning_rate": 2.2906343227540973e-06, "loss": 0.8125, "step": 7518 }, { "epoch": 0.7869178440607012, "grad_norm": 2.134221883980738, "learning_rate": 2.2884758124854156e-06, "loss": 0.8042, "step": 7519 }, { "epoch": 0.7870225013082156, "grad_norm": 1.9860025023180254, "learning_rate": 2.2863181882864995e-06, "loss": 0.8895, "step": 7520 }, { "epoch": 0.78712715855573, "grad_norm": 1.879511345324259, "learning_rate": 2.2841614504052576e-06, "loss": 0.8006, "step": 7521 }, { "epoch": 0.7872318158032444, "grad_norm": 1.9777761746411802, "learning_rate": 2.28200559908951e-06, "loss": 0.9588, "step": 7522 }, { "epoch": 0.7873364730507587, "grad_norm": 2.0015943184841034, "learning_rate": 2.279850634586963e-06, "loss": 0.8274, "step": 7523 }, { "epoch": 0.7874411302982731, "grad_norm": 2.126665132820641, "learning_rate": 2.2776965571452305e-06, "loss": 0.8101, "step": 7524 }, { "epoch": 0.7875457875457875, "grad_norm": 1.9386922457161486, "learning_rate": 2.2755433670118156e-06, "loss": 0.9006, "step": 7525 }, { "epoch": 0.7876504447933019, "grad_norm": 2.1069264857135974, "learning_rate": 2.2733910644341284e-06, "loss": 0.8787, "step": 7526 }, { "epoch": 0.7877551020408163, "grad_norm": 2.312845843018434, "learning_rate": 2.2712396496594724e-06, "loss": 0.8824, "step": 7527 }, { "epoch": 0.7878597592883307, "grad_norm": 2.410379692069536, "learning_rate": 2.2690891229350474e-06, "loss": 0.8534, "step": 7528 }, { "epoch": 0.7879644165358451, "grad_norm": 1.6004259225754003, "learning_rate": 2.266939484507953e-06, "loss": 0.7655, "step": 7529 }, { "epoch": 0.7880690737833596, "grad_norm": 1.9401251846318475, "learning_rate": 2.2647907346251874e-06, "loss": 0.9055, "step": 7530 }, { "epoch": 0.7881737310308738, "grad_norm": 1.8259854422426756, "learning_rate": 2.2626428735336504e-06, "loss": 0.8849, "step": 7531 }, { "epoch": 0.7882783882783883, "grad_norm": 2.086066746215832, "learning_rate": 2.260495901480132e-06, "loss": 0.9008, "step": 7532 }, { "epoch": 0.7883830455259027, "grad_norm": 1.920464809197468, "learning_rate": 2.2583498187113253e-06, "loss": 0.9415, "step": 7533 }, { "epoch": 0.7884877027734171, "grad_norm": 2.266413825635565, "learning_rate": 2.2562046254738144e-06, "loss": 0.8273, "step": 7534 }, { "epoch": 0.7885923600209315, "grad_norm": 1.737401277999669, "learning_rate": 2.2540603220140934e-06, "loss": 0.9201, "step": 7535 }, { "epoch": 0.7886970172684459, "grad_norm": 2.1096363498094854, "learning_rate": 2.251916908578544e-06, "loss": 0.9345, "step": 7536 }, { "epoch": 0.7888016745159603, "grad_norm": 2.150831886317924, "learning_rate": 2.2497743854134493e-06, "loss": 0.844, "step": 7537 }, { "epoch": 0.7889063317634746, "grad_norm": 2.0401061997847547, "learning_rate": 2.2476327527649865e-06, "loss": 0.7954, "step": 7538 }, { "epoch": 0.789010989010989, "grad_norm": 2.337995915788079, "learning_rate": 2.2454920108792354e-06, "loss": 0.9412, "step": 7539 }, { "epoch": 0.7891156462585034, "grad_norm": 1.9830974874569107, "learning_rate": 2.2433521600021744e-06, "loss": 0.8127, "step": 7540 }, { "epoch": 0.7892203035060178, "grad_norm": 2.167769265794437, "learning_rate": 2.241213200379676e-06, "loss": 0.7914, "step": 7541 }, { "epoch": 0.7893249607535322, "grad_norm": 2.33273978878609, "learning_rate": 2.239075132257509e-06, "loss": 0.8763, "step": 7542 }, { "epoch": 0.7894296180010466, "grad_norm": 2.030942504421985, "learning_rate": 2.236937955881343e-06, "loss": 0.9236, "step": 7543 }, { "epoch": 0.789534275248561, "grad_norm": 1.7829580923400006, "learning_rate": 2.234801671496739e-06, "loss": 0.7905, "step": 7544 }, { "epoch": 0.7896389324960753, "grad_norm": 1.9662829211533972, "learning_rate": 2.232666279349168e-06, "loss": 0.8887, "step": 7545 }, { "epoch": 0.7897435897435897, "grad_norm": 2.0352979592881444, "learning_rate": 2.2305317796839887e-06, "loss": 0.7428, "step": 7546 }, { "epoch": 0.7898482469911041, "grad_norm": 1.6539156555371572, "learning_rate": 2.2283981727464544e-06, "loss": 0.7944, "step": 7547 }, { "epoch": 0.7899529042386185, "grad_norm": 2.1614558820180783, "learning_rate": 2.226265458781728e-06, "loss": 0.8095, "step": 7548 }, { "epoch": 0.7900575614861329, "grad_norm": 1.908631638174456, "learning_rate": 2.2241336380348557e-06, "loss": 0.8981, "step": 7549 }, { "epoch": 0.7901622187336473, "grad_norm": 2.073463123923285, "learning_rate": 2.222002710750796e-06, "loss": 0.8807, "step": 7550 }, { "epoch": 0.7902668759811617, "grad_norm": 2.0673778416845185, "learning_rate": 2.2198726771743905e-06, "loss": 0.8022, "step": 7551 }, { "epoch": 0.790371533228676, "grad_norm": 1.6946057542306046, "learning_rate": 2.2177435375503874e-06, "loss": 0.7675, "step": 7552 }, { "epoch": 0.7904761904761904, "grad_norm": 2.04567393679294, "learning_rate": 2.2156152921234254e-06, "loss": 0.9319, "step": 7553 }, { "epoch": 0.7905808477237048, "grad_norm": 2.3078311593974514, "learning_rate": 2.2134879411380505e-06, "loss": 0.9253, "step": 7554 }, { "epoch": 0.7906855049712193, "grad_norm": 2.115100849729748, "learning_rate": 2.2113614848386924e-06, "loss": 0.9134, "step": 7555 }, { "epoch": 0.7907901622187337, "grad_norm": 2.20323705537602, "learning_rate": 2.2092359234696916e-06, "loss": 0.977, "step": 7556 }, { "epoch": 0.7908948194662481, "grad_norm": 1.9616275621121293, "learning_rate": 2.207111257275276e-06, "loss": 0.9302, "step": 7557 }, { "epoch": 0.7909994767137625, "grad_norm": 2.059271815772192, "learning_rate": 2.2049874864995735e-06, "loss": 0.9602, "step": 7558 }, { "epoch": 0.7911041339612768, "grad_norm": 1.9637722265176025, "learning_rate": 2.2028646113866127e-06, "loss": 0.8741, "step": 7559 }, { "epoch": 0.7912087912087912, "grad_norm": 2.2232201652151837, "learning_rate": 2.200742632180315e-06, "loss": 0.8846, "step": 7560 }, { "epoch": 0.7913134484563056, "grad_norm": 2.30436743903849, "learning_rate": 2.1986215491245e-06, "loss": 0.9266, "step": 7561 }, { "epoch": 0.79141810570382, "grad_norm": 2.364762077617069, "learning_rate": 2.196501362462883e-06, "loss": 0.9375, "step": 7562 }, { "epoch": 0.7915227629513344, "grad_norm": 2.353159650570667, "learning_rate": 2.1943820724390763e-06, "loss": 0.8549, "step": 7563 }, { "epoch": 0.7916274201988488, "grad_norm": 2.032814738216188, "learning_rate": 2.1922636792965935e-06, "loss": 0.7728, "step": 7564 }, { "epoch": 0.7917320774463632, "grad_norm": 1.8205772867039858, "learning_rate": 2.1901461832788453e-06, "loss": 0.9063, "step": 7565 }, { "epoch": 0.7918367346938775, "grad_norm": 2.1878001602801254, "learning_rate": 2.188029584629131e-06, "loss": 0.8742, "step": 7566 }, { "epoch": 0.7919413919413919, "grad_norm": 2.2177495247640664, "learning_rate": 2.1859138835906557e-06, "loss": 0.8931, "step": 7567 }, { "epoch": 0.7920460491889063, "grad_norm": 2.5843532600798915, "learning_rate": 2.1837990804065125e-06, "loss": 0.93, "step": 7568 }, { "epoch": 0.7921507064364207, "grad_norm": 1.7462838713565032, "learning_rate": 2.1816851753197023e-06, "loss": 0.7721, "step": 7569 }, { "epoch": 0.7922553636839351, "grad_norm": 2.7345603215828964, "learning_rate": 2.1795721685731153e-06, "loss": 0.7922, "step": 7570 }, { "epoch": 0.7923600209314495, "grad_norm": 2.458606643347218, "learning_rate": 2.1774600604095407e-06, "loss": 0.893, "step": 7571 }, { "epoch": 0.7924646781789639, "grad_norm": 2.0422870744164956, "learning_rate": 2.175348851071659e-06, "loss": 0.8463, "step": 7572 }, { "epoch": 0.7925693354264783, "grad_norm": 2.220719234475042, "learning_rate": 2.1732385408020575e-06, "loss": 0.941, "step": 7573 }, { "epoch": 0.7926739926739926, "grad_norm": 1.975429690998908, "learning_rate": 2.1711291298432157e-06, "loss": 0.9173, "step": 7574 }, { "epoch": 0.792778649921507, "grad_norm": 2.036016848312796, "learning_rate": 2.1690206184375086e-06, "loss": 0.8766, "step": 7575 }, { "epoch": 0.7928833071690214, "grad_norm": 1.8743908375351486, "learning_rate": 2.166913006827207e-06, "loss": 0.8244, "step": 7576 }, { "epoch": 0.7929879644165359, "grad_norm": 1.933946634791488, "learning_rate": 2.164806295254478e-06, "loss": 0.8598, "step": 7577 }, { "epoch": 0.7930926216640503, "grad_norm": 1.7813235104033394, "learning_rate": 2.162700483961392e-06, "loss": 0.8794, "step": 7578 }, { "epoch": 0.7931972789115647, "grad_norm": 2.2090460695960026, "learning_rate": 2.1605955731899086e-06, "loss": 0.9021, "step": 7579 }, { "epoch": 0.7933019361590791, "grad_norm": 2.0974899535355616, "learning_rate": 2.158491563181885e-06, "loss": 0.8928, "step": 7580 }, { "epoch": 0.7934065934065934, "grad_norm": 2.0169609622705154, "learning_rate": 2.1563884541790747e-06, "loss": 0.7913, "step": 7581 }, { "epoch": 0.7935112506541078, "grad_norm": 2.199278005040713, "learning_rate": 2.154286246423135e-06, "loss": 0.9365, "step": 7582 }, { "epoch": 0.7936159079016222, "grad_norm": 1.9357028205928937, "learning_rate": 2.152184940155607e-06, "loss": 0.8404, "step": 7583 }, { "epoch": 0.7937205651491366, "grad_norm": 2.299234559003062, "learning_rate": 2.150084535617942e-06, "loss": 0.8855, "step": 7584 }, { "epoch": 0.793825222396651, "grad_norm": 2.1127780268188454, "learning_rate": 2.147985033051476e-06, "loss": 0.8773, "step": 7585 }, { "epoch": 0.7939298796441654, "grad_norm": 1.9778297585940738, "learning_rate": 2.1458864326974495e-06, "loss": 0.7991, "step": 7586 }, { "epoch": 0.7940345368916798, "grad_norm": 2.815180081463802, "learning_rate": 2.14378873479699e-06, "loss": 1.1133, "step": 7587 }, { "epoch": 0.7941391941391941, "grad_norm": 2.2536327406341474, "learning_rate": 2.1416919395911327e-06, "loss": 0.9107, "step": 7588 }, { "epoch": 0.7942438513867085, "grad_norm": 2.0517416713013, "learning_rate": 2.139596047320803e-06, "loss": 0.9105, "step": 7589 }, { "epoch": 0.7943485086342229, "grad_norm": 1.9716793290444405, "learning_rate": 2.1375010582268194e-06, "loss": 0.9514, "step": 7590 }, { "epoch": 0.7944531658817373, "grad_norm": 1.7217851253182228, "learning_rate": 2.135406972549906e-06, "loss": 0.776, "step": 7591 }, { "epoch": 0.7945578231292517, "grad_norm": 1.9071443124007157, "learning_rate": 2.1333137905306723e-06, "loss": 0.953, "step": 7592 }, { "epoch": 0.7946624803767661, "grad_norm": 1.789970511220544, "learning_rate": 2.1312215124096337e-06, "loss": 0.7764, "step": 7593 }, { "epoch": 0.7947671376242805, "grad_norm": 2.728962359247706, "learning_rate": 2.1291301384271955e-06, "loss": 0.8014, "step": 7594 }, { "epoch": 0.7948717948717948, "grad_norm": 1.9322455444724893, "learning_rate": 2.1270396688236595e-06, "loss": 0.7246, "step": 7595 }, { "epoch": 0.7949764521193092, "grad_norm": 2.0774223052480107, "learning_rate": 2.124950103839224e-06, "loss": 1.0114, "step": 7596 }, { "epoch": 0.7950811093668236, "grad_norm": 2.275100035949746, "learning_rate": 2.122861443713987e-06, "loss": 0.8527, "step": 7597 }, { "epoch": 0.795185766614338, "grad_norm": 1.9482436075243985, "learning_rate": 2.1207736886879406e-06, "loss": 0.8765, "step": 7598 }, { "epoch": 0.7952904238618524, "grad_norm": 1.9226926868108565, "learning_rate": 2.1186868390009664e-06, "loss": 0.8391, "step": 7599 }, { "epoch": 0.7953950811093669, "grad_norm": 1.968122748788881, "learning_rate": 2.116600894892855e-06, "loss": 0.8938, "step": 7600 }, { "epoch": 0.7954997383568813, "grad_norm": 2.0362358868182655, "learning_rate": 2.1145158566032808e-06, "loss": 0.92, "step": 7601 }, { "epoch": 0.7956043956043956, "grad_norm": 1.8167616269052242, "learning_rate": 2.1124317243718184e-06, "loss": 0.874, "step": 7602 }, { "epoch": 0.79570905285191, "grad_norm": 2.081440590488677, "learning_rate": 2.1103484984379443e-06, "loss": 0.9316, "step": 7603 }, { "epoch": 0.7958137100994244, "grad_norm": 2.0790409856006864, "learning_rate": 2.1082661790410197e-06, "loss": 0.8893, "step": 7604 }, { "epoch": 0.7959183673469388, "grad_norm": 2.0996779895786135, "learning_rate": 2.1061847664203107e-06, "loss": 0.9361, "step": 7605 }, { "epoch": 0.7960230245944532, "grad_norm": 2.1580716719163386, "learning_rate": 2.1041042608149707e-06, "loss": 0.9383, "step": 7606 }, { "epoch": 0.7961276818419676, "grad_norm": 1.8816414196677191, "learning_rate": 2.1020246624640595e-06, "loss": 0.7567, "step": 7607 }, { "epoch": 0.796232339089482, "grad_norm": 2.052842909348786, "learning_rate": 2.0999459716065273e-06, "loss": 0.8624, "step": 7608 }, { "epoch": 0.7963369963369963, "grad_norm": 2.0109322496007733, "learning_rate": 2.097868188481217e-06, "loss": 0.9112, "step": 7609 }, { "epoch": 0.7964416535845107, "grad_norm": 1.9850219383542753, "learning_rate": 2.0957913133268726e-06, "loss": 0.7805, "step": 7610 }, { "epoch": 0.7965463108320251, "grad_norm": 2.257844754431785, "learning_rate": 2.0937153463821257e-06, "loss": 0.8675, "step": 7611 }, { "epoch": 0.7966509680795395, "grad_norm": 2.002649601742002, "learning_rate": 2.0916402878855168e-06, "loss": 0.8938, "step": 7612 }, { "epoch": 0.7967556253270539, "grad_norm": 1.921356147737922, "learning_rate": 2.0895661380754715e-06, "loss": 0.8056, "step": 7613 }, { "epoch": 0.7968602825745683, "grad_norm": 2.2073850950321185, "learning_rate": 2.087492897190312e-06, "loss": 0.9354, "step": 7614 }, { "epoch": 0.7969649398220827, "grad_norm": 2.3982569594670218, "learning_rate": 2.085420565468256e-06, "loss": 0.9788, "step": 7615 }, { "epoch": 0.7970695970695971, "grad_norm": 2.3499158850699007, "learning_rate": 2.083349143147422e-06, "loss": 0.9463, "step": 7616 }, { "epoch": 0.7971742543171114, "grad_norm": 1.784998934231335, "learning_rate": 2.081278630465823e-06, "loss": 0.8517, "step": 7617 }, { "epoch": 0.7972789115646258, "grad_norm": 2.3333292170699687, "learning_rate": 2.079209027661362e-06, "loss": 0.8067, "step": 7618 }, { "epoch": 0.7973835688121402, "grad_norm": 2.4308857648334627, "learning_rate": 2.077140334971841e-06, "loss": 0.8981, "step": 7619 }, { "epoch": 0.7974882260596546, "grad_norm": 2.2155185275133524, "learning_rate": 2.075072552634957e-06, "loss": 0.8724, "step": 7620 }, { "epoch": 0.797592883307169, "grad_norm": 1.8788752477824127, "learning_rate": 2.0730056808882993e-06, "loss": 0.7483, "step": 7621 }, { "epoch": 0.7976975405546834, "grad_norm": 2.2866944996543146, "learning_rate": 2.0709397199693616e-06, "loss": 1.0201, "step": 7622 }, { "epoch": 0.7978021978021979, "grad_norm": 2.2884876205119853, "learning_rate": 2.068874670115524e-06, "loss": 0.8809, "step": 7623 }, { "epoch": 0.7979068550497121, "grad_norm": 2.2069390747276367, "learning_rate": 2.0668105315640643e-06, "loss": 0.9331, "step": 7624 }, { "epoch": 0.7980115122972266, "grad_norm": 2.2300750543609316, "learning_rate": 2.0647473045521536e-06, "loss": 0.9861, "step": 7625 }, { "epoch": 0.798116169544741, "grad_norm": 1.9473189124396104, "learning_rate": 2.0626849893168655e-06, "loss": 0.8974, "step": 7626 }, { "epoch": 0.7982208267922554, "grad_norm": 2.1241126439462175, "learning_rate": 2.060623586095164e-06, "loss": 0.8733, "step": 7627 }, { "epoch": 0.7983254840397698, "grad_norm": 2.596050491489067, "learning_rate": 2.058563095123908e-06, "loss": 0.8455, "step": 7628 }, { "epoch": 0.7984301412872842, "grad_norm": 1.6496184313046733, "learning_rate": 2.056503516639852e-06, "loss": 0.7479, "step": 7629 }, { "epoch": 0.7985347985347986, "grad_norm": 1.9392866056291445, "learning_rate": 2.054444850879641e-06, "loss": 0.8385, "step": 7630 }, { "epoch": 0.7986394557823129, "grad_norm": 2.2720725714699754, "learning_rate": 2.052387098079828e-06, "loss": 0.9297, "step": 7631 }, { "epoch": 0.7987441130298273, "grad_norm": 2.0705720785860238, "learning_rate": 2.050330258476848e-06, "loss": 0.8512, "step": 7632 }, { "epoch": 0.7988487702773417, "grad_norm": 1.9969354338670204, "learning_rate": 2.048274332307034e-06, "loss": 0.8813, "step": 7633 }, { "epoch": 0.7989534275248561, "grad_norm": 2.6051722309501764, "learning_rate": 2.0462193198066226e-06, "loss": 0.9048, "step": 7634 }, { "epoch": 0.7990580847723705, "grad_norm": 2.164629627302996, "learning_rate": 2.0441652212117326e-06, "loss": 0.9597, "step": 7635 }, { "epoch": 0.7991627420198849, "grad_norm": 1.9912390762348158, "learning_rate": 2.042112036758389e-06, "loss": 0.9058, "step": 7636 }, { "epoch": 0.7992673992673993, "grad_norm": 1.8720345668097196, "learning_rate": 2.040059766682504e-06, "loss": 0.8318, "step": 7637 }, { "epoch": 0.7993720565149136, "grad_norm": 2.0710286245998946, "learning_rate": 2.0380084112198893e-06, "loss": 0.849, "step": 7638 }, { "epoch": 0.799476713762428, "grad_norm": 1.8830717363876028, "learning_rate": 2.0359579706062493e-06, "loss": 0.9041, "step": 7639 }, { "epoch": 0.7995813710099424, "grad_norm": 1.895209812890259, "learning_rate": 2.0339084450771797e-06, "loss": 0.7978, "step": 7640 }, { "epoch": 0.7996860282574568, "grad_norm": 1.8023260782135615, "learning_rate": 2.031859834868182e-06, "loss": 0.8613, "step": 7641 }, { "epoch": 0.7997906855049712, "grad_norm": 2.1045087518839125, "learning_rate": 2.0298121402146386e-06, "loss": 0.8604, "step": 7642 }, { "epoch": 0.7998953427524856, "grad_norm": 1.7435782505197222, "learning_rate": 2.0277653613518413e-06, "loss": 0.7904, "step": 7643 }, { "epoch": 0.8, "grad_norm": 2.393742131042189, "learning_rate": 2.0257194985149653e-06, "loss": 0.8332, "step": 7644 }, { "epoch": 0.8001046572475143, "grad_norm": 2.0074026574381003, "learning_rate": 2.023674551939081e-06, "loss": 1.0339, "step": 7645 }, { "epoch": 0.8002093144950287, "grad_norm": 2.1072035338876045, "learning_rate": 2.0216305218591636e-06, "loss": 0.9326, "step": 7646 }, { "epoch": 0.8003139717425432, "grad_norm": 2.2169694179321766, "learning_rate": 2.019587408510074e-06, "loss": 1.0086, "step": 7647 }, { "epoch": 0.8004186289900576, "grad_norm": 1.745468429945779, "learning_rate": 2.017545212126568e-06, "loss": 0.7837, "step": 7648 }, { "epoch": 0.800523286237572, "grad_norm": 2.307042540661392, "learning_rate": 2.015503932943297e-06, "loss": 0.9432, "step": 7649 }, { "epoch": 0.8006279434850864, "grad_norm": 1.8426257142814524, "learning_rate": 2.0134635711948125e-06, "loss": 0.9432, "step": 7650 }, { "epoch": 0.8007326007326008, "grad_norm": 1.9616990567522987, "learning_rate": 2.011424127115552e-06, "loss": 0.8726, "step": 7651 }, { "epoch": 0.8008372579801151, "grad_norm": 2.0819208074157536, "learning_rate": 2.009385600939856e-06, "loss": 0.8719, "step": 7652 }, { "epoch": 0.8009419152276295, "grad_norm": 2.357873291710539, "learning_rate": 2.0073479929019536e-06, "loss": 1.0302, "step": 7653 }, { "epoch": 0.8010465724751439, "grad_norm": 2.22508676135223, "learning_rate": 2.005311303235966e-06, "loss": 0.9423, "step": 7654 }, { "epoch": 0.8011512297226583, "grad_norm": 2.4557652270731616, "learning_rate": 2.00327553217592e-06, "loss": 0.8045, "step": 7655 }, { "epoch": 0.8012558869701727, "grad_norm": 2.258447659718386, "learning_rate": 2.001240679955727e-06, "loss": 0.8484, "step": 7656 }, { "epoch": 0.8013605442176871, "grad_norm": 2.0218151709850316, "learning_rate": 1.9992067468091947e-06, "loss": 0.9074, "step": 7657 }, { "epoch": 0.8014652014652015, "grad_norm": 2.31088112748935, "learning_rate": 1.9971737329700256e-06, "loss": 0.7952, "step": 7658 }, { "epoch": 0.8015698587127159, "grad_norm": 2.132378499236807, "learning_rate": 1.995141638671816e-06, "loss": 0.8665, "step": 7659 }, { "epoch": 0.8016745159602302, "grad_norm": 2.1426183845478053, "learning_rate": 1.993110464148059e-06, "loss": 0.9102, "step": 7660 }, { "epoch": 0.8017791732077446, "grad_norm": 2.1293042574252867, "learning_rate": 1.9910802096321457e-06, "loss": 0.8794, "step": 7661 }, { "epoch": 0.801883830455259, "grad_norm": 2.1089889655402168, "learning_rate": 1.98905087535735e-06, "loss": 0.9595, "step": 7662 }, { "epoch": 0.8019884877027734, "grad_norm": 2.0969209074252615, "learning_rate": 1.98702246155685e-06, "loss": 0.9647, "step": 7663 }, { "epoch": 0.8020931449502878, "grad_norm": 1.992617104506457, "learning_rate": 1.9849949684637103e-06, "loss": 0.8987, "step": 7664 }, { "epoch": 0.8021978021978022, "grad_norm": 2.633618499233918, "learning_rate": 1.9829683963108992e-06, "loss": 0.9034, "step": 7665 }, { "epoch": 0.8023024594453166, "grad_norm": 2.2056134263285907, "learning_rate": 1.980942745331271e-06, "loss": 0.8807, "step": 7666 }, { "epoch": 0.8024071166928309, "grad_norm": 2.068273680645422, "learning_rate": 1.9789180157575795e-06, "loss": 0.8689, "step": 7667 }, { "epoch": 0.8025117739403453, "grad_norm": 2.019594583927551, "learning_rate": 1.976894207822464e-06, "loss": 0.9953, "step": 7668 }, { "epoch": 0.8026164311878597, "grad_norm": 2.316889621162141, "learning_rate": 1.9748713217584693e-06, "loss": 0.8515, "step": 7669 }, { "epoch": 0.8027210884353742, "grad_norm": 2.111794166943864, "learning_rate": 1.972849357798031e-06, "loss": 0.9745, "step": 7670 }, { "epoch": 0.8028257456828886, "grad_norm": 2.03962760228518, "learning_rate": 1.970828316173474e-06, "loss": 0.8139, "step": 7671 }, { "epoch": 0.802930402930403, "grad_norm": 2.069040546093496, "learning_rate": 1.9688081971170202e-06, "loss": 0.8368, "step": 7672 }, { "epoch": 0.8030350601779174, "grad_norm": 2.1304131521617613, "learning_rate": 1.9667890008607827e-06, "loss": 0.8431, "step": 7673 }, { "epoch": 0.8031397174254317, "grad_norm": 1.95426739187706, "learning_rate": 1.964770727636778e-06, "loss": 0.9034, "step": 7674 }, { "epoch": 0.8032443746729461, "grad_norm": 2.1064759648577773, "learning_rate": 1.962753377676905e-06, "loss": 0.8432, "step": 7675 }, { "epoch": 0.8033490319204605, "grad_norm": 1.8850818133792997, "learning_rate": 1.9607369512129615e-06, "loss": 0.8973, "step": 7676 }, { "epoch": 0.8034536891679749, "grad_norm": 1.5859786709753045, "learning_rate": 1.958721448476637e-06, "loss": 0.7476, "step": 7677 }, { "epoch": 0.8035583464154893, "grad_norm": 2.220270218255743, "learning_rate": 1.9567068696995227e-06, "loss": 0.9196, "step": 7678 }, { "epoch": 0.8036630036630037, "grad_norm": 1.6376859017657255, "learning_rate": 1.9546932151130913e-06, "loss": 0.771, "step": 7679 }, { "epoch": 0.8037676609105181, "grad_norm": 2.1672382842265185, "learning_rate": 1.9526804849487225e-06, "loss": 0.9884, "step": 7680 }, { "epoch": 0.8038723181580324, "grad_norm": 2.16714258180435, "learning_rate": 1.9506686794376782e-06, "loss": 0.9114, "step": 7681 }, { "epoch": 0.8039769754055468, "grad_norm": 2.130041323517797, "learning_rate": 1.948657798811121e-06, "loss": 0.9454, "step": 7682 }, { "epoch": 0.8040816326530612, "grad_norm": 2.0922079549466512, "learning_rate": 1.9466478433001022e-06, "loss": 0.8827, "step": 7683 }, { "epoch": 0.8041862899005756, "grad_norm": 2.0945418746948197, "learning_rate": 1.9446388131355753e-06, "loss": 0.8897, "step": 7684 }, { "epoch": 0.80429094714809, "grad_norm": 2.236535687893619, "learning_rate": 1.9426307085483753e-06, "loss": 0.9248, "step": 7685 }, { "epoch": 0.8043956043956044, "grad_norm": 2.0020235889260647, "learning_rate": 1.9406235297692434e-06, "loss": 0.8902, "step": 7686 }, { "epoch": 0.8045002616431188, "grad_norm": 2.161723959121442, "learning_rate": 1.9386172770288057e-06, "loss": 0.9059, "step": 7687 }, { "epoch": 0.8046049188906331, "grad_norm": 2.1277819801638373, "learning_rate": 1.936611950557583e-06, "loss": 0.7741, "step": 7688 }, { "epoch": 0.8047095761381475, "grad_norm": 1.8553064489672486, "learning_rate": 1.9346075505859953e-06, "loss": 0.8993, "step": 7689 }, { "epoch": 0.8048142333856619, "grad_norm": 1.7911656465689691, "learning_rate": 1.932604077344351e-06, "loss": 0.7607, "step": 7690 }, { "epoch": 0.8049188906331763, "grad_norm": 1.6049725585488301, "learning_rate": 1.9306015310628523e-06, "loss": 0.7695, "step": 7691 }, { "epoch": 0.8050235478806907, "grad_norm": 1.9936774666260564, "learning_rate": 1.9285999119715936e-06, "loss": 0.755, "step": 7692 }, { "epoch": 0.8051282051282052, "grad_norm": 2.164086612979867, "learning_rate": 1.926599220300569e-06, "loss": 0.9036, "step": 7693 }, { "epoch": 0.8052328623757196, "grad_norm": 2.1877719623928247, "learning_rate": 1.924599456279659e-06, "loss": 0.9081, "step": 7694 }, { "epoch": 0.8053375196232339, "grad_norm": 1.9764063147501494, "learning_rate": 1.922600620138645e-06, "loss": 0.7899, "step": 7695 }, { "epoch": 0.8054421768707483, "grad_norm": 2.096661411005218, "learning_rate": 1.9206027121071937e-06, "loss": 0.8407, "step": 7696 }, { "epoch": 0.8055468341182627, "grad_norm": 2.2156877709091742, "learning_rate": 1.918605732414869e-06, "loss": 0.924, "step": 7697 }, { "epoch": 0.8056514913657771, "grad_norm": 1.6524591320676993, "learning_rate": 1.916609681291126e-06, "loss": 0.8249, "step": 7698 }, { "epoch": 0.8057561486132915, "grad_norm": 2.3280205948736694, "learning_rate": 1.914614558965321e-06, "loss": 0.864, "step": 7699 }, { "epoch": 0.8058608058608059, "grad_norm": 1.8899662650771822, "learning_rate": 1.9126203656666918e-06, "loss": 0.7827, "step": 7700 }, { "epoch": 0.8059654631083203, "grad_norm": 1.8294831259412443, "learning_rate": 1.9106271016243792e-06, "loss": 0.7265, "step": 7701 }, { "epoch": 0.8060701203558347, "grad_norm": 1.8127721118364941, "learning_rate": 1.908634767067409e-06, "loss": 0.7789, "step": 7702 }, { "epoch": 0.806174777603349, "grad_norm": 2.0601656865089004, "learning_rate": 1.906643362224706e-06, "loss": 0.7856, "step": 7703 }, { "epoch": 0.8062794348508634, "grad_norm": 2.0340414414029984, "learning_rate": 1.9046528873250902e-06, "loss": 0.8515, "step": 7704 }, { "epoch": 0.8063840920983778, "grad_norm": 1.9432516065364067, "learning_rate": 1.9026633425972696e-06, "loss": 0.8491, "step": 7705 }, { "epoch": 0.8064887493458922, "grad_norm": 2.4510874165764838, "learning_rate": 1.9006747282698445e-06, "loss": 0.7151, "step": 7706 }, { "epoch": 0.8065934065934066, "grad_norm": 2.3967160624954786, "learning_rate": 1.8986870445713112e-06, "loss": 0.8809, "step": 7707 }, { "epoch": 0.806698063840921, "grad_norm": 2.1255469660957766, "learning_rate": 1.8967002917300614e-06, "loss": 0.918, "step": 7708 }, { "epoch": 0.8068027210884354, "grad_norm": 2.178668681213783, "learning_rate": 1.8947144699743747e-06, "loss": 0.9926, "step": 7709 }, { "epoch": 0.8069073783359497, "grad_norm": 2.2070908456686933, "learning_rate": 1.8927295795324275e-06, "loss": 0.8722, "step": 7710 }, { "epoch": 0.8070120355834641, "grad_norm": 1.817792067239064, "learning_rate": 1.890745620632284e-06, "loss": 0.8888, "step": 7711 }, { "epoch": 0.8071166928309785, "grad_norm": 2.5554707262103293, "learning_rate": 1.8887625935019072e-06, "loss": 0.9806, "step": 7712 }, { "epoch": 0.8072213500784929, "grad_norm": 2.304574255962299, "learning_rate": 1.886780498369154e-06, "loss": 0.8317, "step": 7713 }, { "epoch": 0.8073260073260073, "grad_norm": 2.0339279457346375, "learning_rate": 1.8847993354617689e-06, "loss": 0.9634, "step": 7714 }, { "epoch": 0.8074306645735218, "grad_norm": 2.0844568689691867, "learning_rate": 1.8828191050073918e-06, "loss": 0.9645, "step": 7715 }, { "epoch": 0.8075353218210362, "grad_norm": 1.9109270715202566, "learning_rate": 1.8808398072335555e-06, "loss": 0.8214, "step": 7716 }, { "epoch": 0.8076399790685505, "grad_norm": 1.9339620974128462, "learning_rate": 1.878861442367681e-06, "loss": 0.8987, "step": 7717 }, { "epoch": 0.8077446363160649, "grad_norm": 1.9397724352401922, "learning_rate": 1.8768840106370922e-06, "loss": 0.8367, "step": 7718 }, { "epoch": 0.8078492935635793, "grad_norm": 2.1190867449260917, "learning_rate": 1.8749075122689986e-06, "loss": 0.8944, "step": 7719 }, { "epoch": 0.8079539508110937, "grad_norm": 2.670252322303464, "learning_rate": 1.8729319474905005e-06, "loss": 0.8799, "step": 7720 }, { "epoch": 0.8080586080586081, "grad_norm": 2.141457200125952, "learning_rate": 1.8709573165286e-06, "loss": 0.8191, "step": 7721 }, { "epoch": 0.8081632653061225, "grad_norm": 1.7558169293557664, "learning_rate": 1.8689836196101797e-06, "loss": 0.8741, "step": 7722 }, { "epoch": 0.8082679225536369, "grad_norm": 2.06145311599018, "learning_rate": 1.867010856962027e-06, "loss": 0.9091, "step": 7723 }, { "epoch": 0.8083725798011512, "grad_norm": 2.1553715349745937, "learning_rate": 1.8650390288108156e-06, "loss": 0.9571, "step": 7724 }, { "epoch": 0.8084772370486656, "grad_norm": 2.6232290933594307, "learning_rate": 1.8630681353831104e-06, "loss": 0.7693, "step": 7725 }, { "epoch": 0.80858189429618, "grad_norm": 2.3021150057545627, "learning_rate": 1.8610981769053681e-06, "loss": 0.9316, "step": 7726 }, { "epoch": 0.8086865515436944, "grad_norm": 1.9431158516385172, "learning_rate": 1.859129153603949e-06, "loss": 0.9173, "step": 7727 }, { "epoch": 0.8087912087912088, "grad_norm": 2.040241180020073, "learning_rate": 1.8571610657050921e-06, "loss": 0.9225, "step": 7728 }, { "epoch": 0.8088958660387232, "grad_norm": 2.049568434268225, "learning_rate": 1.855193913434934e-06, "loss": 0.8637, "step": 7729 }, { "epoch": 0.8090005232862376, "grad_norm": 2.2567330112829866, "learning_rate": 1.8532276970195085e-06, "loss": 0.8094, "step": 7730 }, { "epoch": 0.8091051805337519, "grad_norm": 2.4898641315793144, "learning_rate": 1.8512624166847338e-06, "loss": 0.9544, "step": 7731 }, { "epoch": 0.8092098377812663, "grad_norm": 2.087408291721611, "learning_rate": 1.8492980726564292e-06, "loss": 0.8928, "step": 7732 }, { "epoch": 0.8093144950287807, "grad_norm": 1.9848372835802772, "learning_rate": 1.8473346651602986e-06, "loss": 0.9422, "step": 7733 }, { "epoch": 0.8094191522762951, "grad_norm": 1.9749206184830705, "learning_rate": 1.8453721944219428e-06, "loss": 0.835, "step": 7734 }, { "epoch": 0.8095238095238095, "grad_norm": 2.298149888799161, "learning_rate": 1.8434106606668522e-06, "loss": 0.9568, "step": 7735 }, { "epoch": 0.8096284667713239, "grad_norm": 2.081259887949481, "learning_rate": 1.8414500641204104e-06, "loss": 0.8567, "step": 7736 }, { "epoch": 0.8097331240188383, "grad_norm": 2.1873810909435294, "learning_rate": 1.8394904050078943e-06, "loss": 0.8909, "step": 7737 }, { "epoch": 0.8098377812663526, "grad_norm": 2.064239458078798, "learning_rate": 1.8375316835544775e-06, "loss": 0.7535, "step": 7738 }, { "epoch": 0.809942438513867, "grad_norm": 1.9681755929693125, "learning_rate": 1.8355738999852157e-06, "loss": 0.8752, "step": 7739 }, { "epoch": 0.8100470957613815, "grad_norm": 1.9419726368546049, "learning_rate": 1.8336170545250653e-06, "loss": 0.9044, "step": 7740 }, { "epoch": 0.8101517530088959, "grad_norm": 2.1087815522447033, "learning_rate": 1.8316611473988678e-06, "loss": 0.8237, "step": 7741 }, { "epoch": 0.8102564102564103, "grad_norm": 2.078852602883871, "learning_rate": 1.8297061788313652e-06, "loss": 0.843, "step": 7742 }, { "epoch": 0.8103610675039247, "grad_norm": 1.951856666967445, "learning_rate": 1.827752149047185e-06, "loss": 0.9578, "step": 7743 }, { "epoch": 0.8104657247514391, "grad_norm": 1.7953444494690902, "learning_rate": 1.8257990582708508e-06, "loss": 0.7784, "step": 7744 }, { "epoch": 0.8105703819989535, "grad_norm": 1.9250641329953944, "learning_rate": 1.8238469067267728e-06, "loss": 0.9653, "step": 7745 }, { "epoch": 0.8106750392464678, "grad_norm": 1.8425811745515865, "learning_rate": 1.8218956946392597e-06, "loss": 0.8461, "step": 7746 }, { "epoch": 0.8107796964939822, "grad_norm": 1.6927128485168828, "learning_rate": 1.819945422232513e-06, "loss": 0.8375, "step": 7747 }, { "epoch": 0.8108843537414966, "grad_norm": 1.96120471071044, "learning_rate": 1.8179960897306192e-06, "loss": 0.6927, "step": 7748 }, { "epoch": 0.810989010989011, "grad_norm": 1.8908478940115092, "learning_rate": 1.8160476973575624e-06, "loss": 0.9127, "step": 7749 }, { "epoch": 0.8110936682365254, "grad_norm": 2.3740896100862194, "learning_rate": 1.8141002453372126e-06, "loss": 0.8577, "step": 7750 }, { "epoch": 0.8111983254840398, "grad_norm": 1.9405102048474503, "learning_rate": 1.8121537338933425e-06, "loss": 0.8621, "step": 7751 }, { "epoch": 0.8113029827315542, "grad_norm": 2.101087698006679, "learning_rate": 1.8102081632496061e-06, "loss": 0.9008, "step": 7752 }, { "epoch": 0.8114076399790685, "grad_norm": 2.109664241764611, "learning_rate": 1.808263533629554e-06, "loss": 0.8728, "step": 7753 }, { "epoch": 0.8115122972265829, "grad_norm": 1.957536899983253, "learning_rate": 1.8063198452566288e-06, "loss": 0.9817, "step": 7754 }, { "epoch": 0.8116169544740973, "grad_norm": 2.29053423643524, "learning_rate": 1.804377098354161e-06, "loss": 0.8997, "step": 7755 }, { "epoch": 0.8117216117216117, "grad_norm": 1.9515520676387297, "learning_rate": 1.8024352931453791e-06, "loss": 0.9398, "step": 7756 }, { "epoch": 0.8118262689691261, "grad_norm": 2.2052154737151164, "learning_rate": 1.8004944298534022e-06, "loss": 0.9776, "step": 7757 }, { "epoch": 0.8119309262166405, "grad_norm": 1.8460361778888326, "learning_rate": 1.798554508701238e-06, "loss": 0.7619, "step": 7758 }, { "epoch": 0.8120355834641549, "grad_norm": 2.0564089124118703, "learning_rate": 1.796615529911786e-06, "loss": 0.9962, "step": 7759 }, { "epoch": 0.8121402407116692, "grad_norm": 1.5242392257544974, "learning_rate": 1.7946774937078383e-06, "loss": 0.7455, "step": 7760 }, { "epoch": 0.8122448979591836, "grad_norm": 1.9373147411960117, "learning_rate": 1.792740400312082e-06, "loss": 0.9294, "step": 7761 }, { "epoch": 0.812349555206698, "grad_norm": 2.253211453976033, "learning_rate": 1.7908042499470924e-06, "loss": 1.0895, "step": 7762 }, { "epoch": 0.8124542124542125, "grad_norm": 2.010874372372902, "learning_rate": 1.788869042835335e-06, "loss": 0.7848, "step": 7763 }, { "epoch": 0.8125588697017269, "grad_norm": 2.1110812558999026, "learning_rate": 1.7869347791991686e-06, "loss": 0.8749, "step": 7764 }, { "epoch": 0.8126635269492413, "grad_norm": 2.0790033933698506, "learning_rate": 1.7850014592608457e-06, "loss": 0.8357, "step": 7765 }, { "epoch": 0.8127681841967557, "grad_norm": 2.2764948530690794, "learning_rate": 1.7830690832425114e-06, "loss": 0.85, "step": 7766 }, { "epoch": 0.81287284144427, "grad_norm": 2.0246844811622617, "learning_rate": 1.781137651366196e-06, "loss": 0.9454, "step": 7767 }, { "epoch": 0.8129774986917844, "grad_norm": 2.062713254460875, "learning_rate": 1.7792071638538267e-06, "loss": 0.914, "step": 7768 }, { "epoch": 0.8130821559392988, "grad_norm": 2.060831788514829, "learning_rate": 1.777277620927217e-06, "loss": 0.8909, "step": 7769 }, { "epoch": 0.8131868131868132, "grad_norm": 2.1960138141055037, "learning_rate": 1.7753490228080795e-06, "loss": 0.8942, "step": 7770 }, { "epoch": 0.8132914704343276, "grad_norm": 2.019766608559704, "learning_rate": 1.7734213697180137e-06, "loss": 0.7633, "step": 7771 }, { "epoch": 0.813396127681842, "grad_norm": 1.9474343159482823, "learning_rate": 1.7714946618785056e-06, "loss": 0.8488, "step": 7772 }, { "epoch": 0.8135007849293564, "grad_norm": 2.1858861212932004, "learning_rate": 1.769568899510945e-06, "loss": 0.8966, "step": 7773 }, { "epoch": 0.8136054421768707, "grad_norm": 1.8619826191371032, "learning_rate": 1.7676440828366026e-06, "loss": 0.849, "step": 7774 }, { "epoch": 0.8137100994243851, "grad_norm": 1.882083312231594, "learning_rate": 1.7657202120766414e-06, "loss": 0.851, "step": 7775 }, { "epoch": 0.8138147566718995, "grad_norm": 2.0412076045520138, "learning_rate": 1.7637972874521237e-06, "loss": 0.711, "step": 7776 }, { "epoch": 0.8139194139194139, "grad_norm": 2.0941706761569145, "learning_rate": 1.7618753091839924e-06, "loss": 0.904, "step": 7777 }, { "epoch": 0.8140240711669283, "grad_norm": 2.070231862068319, "learning_rate": 1.75995427749309e-06, "loss": 0.9615, "step": 7778 }, { "epoch": 0.8141287284144427, "grad_norm": 1.9533497638106823, "learning_rate": 1.758034192600142e-06, "loss": 0.7906, "step": 7779 }, { "epoch": 0.8142333856619571, "grad_norm": 1.8061715691464177, "learning_rate": 1.7561150547257766e-06, "loss": 0.9816, "step": 7780 }, { "epoch": 0.8143380429094714, "grad_norm": 1.9499562486471225, "learning_rate": 1.7541968640905005e-06, "loss": 0.8275, "step": 7781 }, { "epoch": 0.8144427001569858, "grad_norm": 1.9941853271497223, "learning_rate": 1.7522796209147241e-06, "loss": 1.0171, "step": 7782 }, { "epoch": 0.8145473574045002, "grad_norm": 2.016835021467956, "learning_rate": 1.750363325418738e-06, "loss": 0.9194, "step": 7783 }, { "epoch": 0.8146520146520146, "grad_norm": 2.349001003678539, "learning_rate": 1.7484479778227281e-06, "loss": 0.8795, "step": 7784 }, { "epoch": 0.814756671899529, "grad_norm": 2.815561540260484, "learning_rate": 1.7465335783467753e-06, "loss": 0.8893, "step": 7785 }, { "epoch": 0.8148613291470435, "grad_norm": 1.856879993868537, "learning_rate": 1.7446201272108454e-06, "loss": 0.8485, "step": 7786 }, { "epoch": 0.8149659863945579, "grad_norm": 1.6972040689665617, "learning_rate": 1.7427076246347984e-06, "loss": 0.8006, "step": 7787 }, { "epoch": 0.8150706436420723, "grad_norm": 2.4050270886241174, "learning_rate": 1.7407960708383852e-06, "loss": 0.8119, "step": 7788 }, { "epoch": 0.8151753008895866, "grad_norm": 2.0596208441064747, "learning_rate": 1.7388854660412435e-06, "loss": 0.8787, "step": 7789 }, { "epoch": 0.815279958137101, "grad_norm": 2.6144498345375675, "learning_rate": 1.7369758104629098e-06, "loss": 1.0528, "step": 7790 }, { "epoch": 0.8153846153846154, "grad_norm": 1.8301486855272067, "learning_rate": 1.7350671043228072e-06, "loss": 0.8948, "step": 7791 }, { "epoch": 0.8154892726321298, "grad_norm": 2.2092777762924656, "learning_rate": 1.7331593478402508e-06, "loss": 0.9019, "step": 7792 }, { "epoch": 0.8155939298796442, "grad_norm": 1.931054386839299, "learning_rate": 1.7312525412344428e-06, "loss": 0.8937, "step": 7793 }, { "epoch": 0.8156985871271586, "grad_norm": 2.0088421049144327, "learning_rate": 1.7293466847244788e-06, "loss": 0.7165, "step": 7794 }, { "epoch": 0.815803244374673, "grad_norm": 2.064343587455126, "learning_rate": 1.7274417785293485e-06, "loss": 0.7951, "step": 7795 }, { "epoch": 0.8159079016221873, "grad_norm": 1.9979352317040366, "learning_rate": 1.7255378228679286e-06, "loss": 0.8228, "step": 7796 }, { "epoch": 0.8160125588697017, "grad_norm": 2.257708751650851, "learning_rate": 1.7236348179589869e-06, "loss": 0.8863, "step": 7797 }, { "epoch": 0.8161172161172161, "grad_norm": 2.518173481553307, "learning_rate": 1.72173276402118e-06, "loss": 0.8981, "step": 7798 }, { "epoch": 0.8162218733647305, "grad_norm": 1.9462944845738346, "learning_rate": 1.7198316612730603e-06, "loss": 0.9347, "step": 7799 }, { "epoch": 0.8163265306122449, "grad_norm": 2.595705625274348, "learning_rate": 1.7179315099330707e-06, "loss": 0.966, "step": 7800 }, { "epoch": 0.8164311878597593, "grad_norm": 1.977122273036907, "learning_rate": 1.7160323102195408e-06, "loss": 0.8375, "step": 7801 }, { "epoch": 0.8165358451072737, "grad_norm": 2.1978666809470386, "learning_rate": 1.7141340623506908e-06, "loss": 0.8184, "step": 7802 }, { "epoch": 0.816640502354788, "grad_norm": 2.1815414177103523, "learning_rate": 1.7122367665446327e-06, "loss": 0.8825, "step": 7803 }, { "epoch": 0.8167451596023024, "grad_norm": 1.547128969295642, "learning_rate": 1.7103404230193733e-06, "loss": 0.6754, "step": 7804 }, { "epoch": 0.8168498168498168, "grad_norm": 1.6815037025349835, "learning_rate": 1.7084450319928037e-06, "loss": 0.6787, "step": 7805 }, { "epoch": 0.8169544740973312, "grad_norm": 1.959387083414091, "learning_rate": 1.7065505936827097e-06, "loss": 0.972, "step": 7806 }, { "epoch": 0.8170591313448456, "grad_norm": 2.3459581609655227, "learning_rate": 1.704657108306762e-06, "loss": 0.7988, "step": 7807 }, { "epoch": 0.81716378859236, "grad_norm": 2.8102944684349676, "learning_rate": 1.7027645760825284e-06, "loss": 0.926, "step": 7808 }, { "epoch": 0.8172684458398745, "grad_norm": 1.7141239024853219, "learning_rate": 1.7008729972274685e-06, "loss": 0.8889, "step": 7809 }, { "epoch": 0.8173731030873888, "grad_norm": 1.9586989744589782, "learning_rate": 1.698982371958925e-06, "loss": 0.9195, "step": 7810 }, { "epoch": 0.8174777603349032, "grad_norm": 2.13663131703756, "learning_rate": 1.697092700494135e-06, "loss": 1.009, "step": 7811 }, { "epoch": 0.8175824175824176, "grad_norm": 1.9020925116780771, "learning_rate": 1.6952039830502253e-06, "loss": 0.8457, "step": 7812 }, { "epoch": 0.817687074829932, "grad_norm": 1.9607266851857876, "learning_rate": 1.693316219844211e-06, "loss": 0.8744, "step": 7813 }, { "epoch": 0.8177917320774464, "grad_norm": 2.1863791159608077, "learning_rate": 1.6914294110930052e-06, "loss": 0.8709, "step": 7814 }, { "epoch": 0.8178963893249608, "grad_norm": 1.918146066711303, "learning_rate": 1.6895435570134032e-06, "loss": 0.8755, "step": 7815 }, { "epoch": 0.8180010465724752, "grad_norm": 2.3464466253166925, "learning_rate": 1.687658657822091e-06, "loss": 0.8249, "step": 7816 }, { "epoch": 0.8181057038199895, "grad_norm": 2.094038559858304, "learning_rate": 1.6857747137356529e-06, "loss": 0.8999, "step": 7817 }, { "epoch": 0.8182103610675039, "grad_norm": 1.969446534885216, "learning_rate": 1.683891724970551e-06, "loss": 0.7884, "step": 7818 }, { "epoch": 0.8183150183150183, "grad_norm": 1.9254682173238997, "learning_rate": 1.6820096917431527e-06, "loss": 0.8815, "step": 7819 }, { "epoch": 0.8184196755625327, "grad_norm": 1.705412210976725, "learning_rate": 1.6801286142697015e-06, "loss": 0.7476, "step": 7820 }, { "epoch": 0.8185243328100471, "grad_norm": 1.8741055231594557, "learning_rate": 1.67824849276634e-06, "loss": 0.8466, "step": 7821 }, { "epoch": 0.8186289900575615, "grad_norm": 1.9978999965045052, "learning_rate": 1.6763693274490932e-06, "loss": 0.8856, "step": 7822 }, { "epoch": 0.8187336473050759, "grad_norm": 2.1962770410951977, "learning_rate": 1.6744911185338874e-06, "loss": 0.8014, "step": 7823 }, { "epoch": 0.8188383045525902, "grad_norm": 2.5133884425384214, "learning_rate": 1.6726138662365276e-06, "loss": 0.9689, "step": 7824 }, { "epoch": 0.8189429618001046, "grad_norm": 2.0611491066129197, "learning_rate": 1.6707375707727169e-06, "loss": 0.7664, "step": 7825 }, { "epoch": 0.819047619047619, "grad_norm": 1.9924960787409718, "learning_rate": 1.6688622323580461e-06, "loss": 0.7497, "step": 7826 }, { "epoch": 0.8191522762951334, "grad_norm": 2.112698383860386, "learning_rate": 1.6669878512079906e-06, "loss": 0.8948, "step": 7827 }, { "epoch": 0.8192569335426478, "grad_norm": 2.4115852205279804, "learning_rate": 1.6651144275379261e-06, "loss": 0.9912, "step": 7828 }, { "epoch": 0.8193615907901622, "grad_norm": 2.141183062004032, "learning_rate": 1.6632419615631112e-06, "loss": 0.8449, "step": 7829 }, { "epoch": 0.8194662480376766, "grad_norm": 2.3622806603126696, "learning_rate": 1.661370453498694e-06, "loss": 0.9455, "step": 7830 }, { "epoch": 0.819570905285191, "grad_norm": 2.161674209789048, "learning_rate": 1.659499903559717e-06, "loss": 0.8994, "step": 7831 }, { "epoch": 0.8196755625327053, "grad_norm": 2.2133213841412993, "learning_rate": 1.6576303119611058e-06, "loss": 0.9306, "step": 7832 }, { "epoch": 0.8197802197802198, "grad_norm": 2.2348885981045226, "learning_rate": 1.6557616789176844e-06, "loss": 0.7268, "step": 7833 }, { "epoch": 0.8198848770277342, "grad_norm": 1.9426173933907054, "learning_rate": 1.653894004644163e-06, "loss": 0.8147, "step": 7834 }, { "epoch": 0.8199895342752486, "grad_norm": 2.4120961582899416, "learning_rate": 1.6520272893551404e-06, "loss": 0.8665, "step": 7835 }, { "epoch": 0.820094191522763, "grad_norm": 1.7715546995709663, "learning_rate": 1.6501615332651054e-06, "loss": 0.8394, "step": 7836 }, { "epoch": 0.8201988487702774, "grad_norm": 2.1557330830514867, "learning_rate": 1.648296736588435e-06, "loss": 0.82, "step": 7837 }, { "epoch": 0.8203035060177918, "grad_norm": 1.9987706186923369, "learning_rate": 1.6464328995394018e-06, "loss": 0.8204, "step": 7838 }, { "epoch": 0.8204081632653061, "grad_norm": 2.057663150677782, "learning_rate": 1.6445700223321637e-06, "loss": 0.8971, "step": 7839 }, { "epoch": 0.8205128205128205, "grad_norm": 1.7641921997749053, "learning_rate": 1.642708105180768e-06, "loss": 0.8375, "step": 7840 }, { "epoch": 0.8206174777603349, "grad_norm": 2.0728485301068957, "learning_rate": 1.6408471482991506e-06, "loss": 0.8569, "step": 7841 }, { "epoch": 0.8207221350078493, "grad_norm": 2.036559853741599, "learning_rate": 1.6389871519011414e-06, "loss": 0.8177, "step": 7842 }, { "epoch": 0.8208267922553637, "grad_norm": 2.1297430764924368, "learning_rate": 1.6371281162004604e-06, "loss": 0.8486, "step": 7843 }, { "epoch": 0.8209314495028781, "grad_norm": 1.920858753141409, "learning_rate": 1.6352700414107113e-06, "loss": 0.9678, "step": 7844 }, { "epoch": 0.8210361067503925, "grad_norm": 1.977146348689036, "learning_rate": 1.6334129277453935e-06, "loss": 0.8985, "step": 7845 }, { "epoch": 0.8211407639979068, "grad_norm": 2.220183521723425, "learning_rate": 1.6315567754178896e-06, "loss": 0.7621, "step": 7846 }, { "epoch": 0.8212454212454212, "grad_norm": 2.1123357019169626, "learning_rate": 1.6297015846414755e-06, "loss": 0.7536, "step": 7847 }, { "epoch": 0.8213500784929356, "grad_norm": 2.353601092727366, "learning_rate": 1.6278473556293195e-06, "loss": 0.8775, "step": 7848 }, { "epoch": 0.82145473574045, "grad_norm": 2.14734632695945, "learning_rate": 1.6259940885944737e-06, "loss": 0.8968, "step": 7849 }, { "epoch": 0.8215593929879644, "grad_norm": 2.0771918291151557, "learning_rate": 1.6241417837498818e-06, "loss": 0.9364, "step": 7850 }, { "epoch": 0.8216640502354788, "grad_norm": 2.0500743901250504, "learning_rate": 1.6222904413083806e-06, "loss": 0.8666, "step": 7851 }, { "epoch": 0.8217687074829932, "grad_norm": 2.0916815220393183, "learning_rate": 1.6204400614826886e-06, "loss": 0.7629, "step": 7852 }, { "epoch": 0.8218733647305075, "grad_norm": 2.0764823972980597, "learning_rate": 1.6185906444854227e-06, "loss": 1.0247, "step": 7853 }, { "epoch": 0.8219780219780219, "grad_norm": 1.832597003879775, "learning_rate": 1.6167421905290837e-06, "loss": 0.9094, "step": 7854 }, { "epoch": 0.8220826792255364, "grad_norm": 2.0982201560289813, "learning_rate": 1.6148946998260605e-06, "loss": 1.0225, "step": 7855 }, { "epoch": 0.8221873364730508, "grad_norm": 1.8679791707883626, "learning_rate": 1.6130481725886327e-06, "loss": 0.854, "step": 7856 }, { "epoch": 0.8222919937205652, "grad_norm": 2.4377106859096997, "learning_rate": 1.6112026090289744e-06, "loss": 0.9012, "step": 7857 }, { "epoch": 0.8223966509680796, "grad_norm": 1.9716656960681878, "learning_rate": 1.609358009359142e-06, "loss": 0.8272, "step": 7858 }, { "epoch": 0.822501308215594, "grad_norm": 2.039719279072701, "learning_rate": 1.6075143737910826e-06, "loss": 0.7955, "step": 7859 }, { "epoch": 0.8226059654631083, "grad_norm": 2.1817572238574945, "learning_rate": 1.6056717025366364e-06, "loss": 0.8247, "step": 7860 }, { "epoch": 0.8227106227106227, "grad_norm": 1.9613810519827801, "learning_rate": 1.6038299958075266e-06, "loss": 0.922, "step": 7861 }, { "epoch": 0.8228152799581371, "grad_norm": 1.964948398942913, "learning_rate": 1.6019892538153725e-06, "loss": 0.7781, "step": 7862 }, { "epoch": 0.8229199372056515, "grad_norm": 2.13598752292871, "learning_rate": 1.6001494767716786e-06, "loss": 0.7924, "step": 7863 }, { "epoch": 0.8230245944531659, "grad_norm": 2.204916092833003, "learning_rate": 1.598310664887839e-06, "loss": 1.0455, "step": 7864 }, { "epoch": 0.8231292517006803, "grad_norm": 2.2186437702248085, "learning_rate": 1.5964728183751343e-06, "loss": 0.8428, "step": 7865 }, { "epoch": 0.8232339089481947, "grad_norm": 2.478462994063437, "learning_rate": 1.5946359374447374e-06, "loss": 0.8116, "step": 7866 }, { "epoch": 0.823338566195709, "grad_norm": 1.8826304139905394, "learning_rate": 1.5928000223077123e-06, "loss": 0.9264, "step": 7867 }, { "epoch": 0.8234432234432234, "grad_norm": 2.166154757400412, "learning_rate": 1.5909650731750048e-06, "loss": 0.8777, "step": 7868 }, { "epoch": 0.8235478806907378, "grad_norm": 2.6409362779319325, "learning_rate": 1.5891310902574608e-06, "loss": 0.9525, "step": 7869 }, { "epoch": 0.8236525379382522, "grad_norm": 2.0823720652767244, "learning_rate": 1.587298073765805e-06, "loss": 0.9399, "step": 7870 }, { "epoch": 0.8237571951857666, "grad_norm": 2.147673418143053, "learning_rate": 1.5854660239106523e-06, "loss": 0.9762, "step": 7871 }, { "epoch": 0.823861852433281, "grad_norm": 2.000527006452208, "learning_rate": 1.5836349409025143e-06, "loss": 0.7641, "step": 7872 }, { "epoch": 0.8239665096807954, "grad_norm": 2.4818231228477012, "learning_rate": 1.5818048249517825e-06, "loss": 1.0183, "step": 7873 }, { "epoch": 0.8240711669283098, "grad_norm": 2.0501868554721265, "learning_rate": 1.5799756762687424e-06, "loss": 0.8242, "step": 7874 }, { "epoch": 0.8241758241758241, "grad_norm": 2.3433303375111545, "learning_rate": 1.5781474950635633e-06, "loss": 0.8638, "step": 7875 }, { "epoch": 0.8242804814233385, "grad_norm": 2.0395663531419577, "learning_rate": 1.5763202815463109e-06, "loss": 0.8088, "step": 7876 }, { "epoch": 0.824385138670853, "grad_norm": 2.037603786377761, "learning_rate": 1.5744940359269357e-06, "loss": 0.9229, "step": 7877 }, { "epoch": 0.8244897959183674, "grad_norm": 1.8506973012344865, "learning_rate": 1.5726687584152778e-06, "loss": 0.8673, "step": 7878 }, { "epoch": 0.8245944531658818, "grad_norm": 2.288058667834201, "learning_rate": 1.5708444492210617e-06, "loss": 0.7638, "step": 7879 }, { "epoch": 0.8246991104133962, "grad_norm": 2.1522314144552337, "learning_rate": 1.5690211085539054e-06, "loss": 0.8846, "step": 7880 }, { "epoch": 0.8248037676609106, "grad_norm": 1.6233838085258567, "learning_rate": 1.5671987366233166e-06, "loss": 0.7405, "step": 7881 }, { "epoch": 0.8249084249084249, "grad_norm": 2.0740723600378566, "learning_rate": 1.565377333638688e-06, "loss": 0.9315, "step": 7882 }, { "epoch": 0.8250130821559393, "grad_norm": 2.2960235649753047, "learning_rate": 1.563556899809302e-06, "loss": 0.9879, "step": 7883 }, { "epoch": 0.8251177394034537, "grad_norm": 1.7103999518415944, "learning_rate": 1.5617374353443316e-06, "loss": 0.7416, "step": 7884 }, { "epoch": 0.8252223966509681, "grad_norm": 2.5050104670986597, "learning_rate": 1.5599189404528337e-06, "loss": 0.8611, "step": 7885 }, { "epoch": 0.8253270538984825, "grad_norm": 2.612738780164187, "learning_rate": 1.5581014153437578e-06, "loss": 0.9615, "step": 7886 }, { "epoch": 0.8254317111459969, "grad_norm": 1.8339034152233065, "learning_rate": 1.5562848602259473e-06, "loss": 0.9589, "step": 7887 }, { "epoch": 0.8255363683935113, "grad_norm": 1.9861260916511432, "learning_rate": 1.5544692753081226e-06, "loss": 0.8367, "step": 7888 }, { "epoch": 0.8256410256410256, "grad_norm": 2.019172682002892, "learning_rate": 1.552654660798899e-06, "loss": 0.8907, "step": 7889 }, { "epoch": 0.82574568288854, "grad_norm": 2.266069990170753, "learning_rate": 1.5508410169067767e-06, "loss": 0.7255, "step": 7890 }, { "epoch": 0.8258503401360544, "grad_norm": 1.7448652237490383, "learning_rate": 1.5490283438401533e-06, "loss": 0.8038, "step": 7891 }, { "epoch": 0.8259549973835688, "grad_norm": 2.236043533397473, "learning_rate": 1.5472166418073042e-06, "loss": 0.8716, "step": 7892 }, { "epoch": 0.8260596546310832, "grad_norm": 1.7295344167594429, "learning_rate": 1.5454059110163977e-06, "loss": 0.7992, "step": 7893 }, { "epoch": 0.8261643118785976, "grad_norm": 2.114455364406809, "learning_rate": 1.5435961516754904e-06, "loss": 0.8744, "step": 7894 }, { "epoch": 0.826268969126112, "grad_norm": 1.7900260474592373, "learning_rate": 1.5417873639925274e-06, "loss": 0.8994, "step": 7895 }, { "epoch": 0.8263736263736263, "grad_norm": 2.0911021977650135, "learning_rate": 1.5399795481753454e-06, "loss": 0.8086, "step": 7896 }, { "epoch": 0.8264782836211407, "grad_norm": 2.1517967909735547, "learning_rate": 1.5381727044316641e-06, "loss": 0.9165, "step": 7897 }, { "epoch": 0.8265829408686551, "grad_norm": 1.864956322145815, "learning_rate": 1.536366832969093e-06, "loss": 0.9554, "step": 7898 }, { "epoch": 0.8266875981161695, "grad_norm": 2.0999023389103915, "learning_rate": 1.5345619339951267e-06, "loss": 0.884, "step": 7899 }, { "epoch": 0.826792255363684, "grad_norm": 2.3031128045450577, "learning_rate": 1.5327580077171589e-06, "loss": 0.7399, "step": 7900 }, { "epoch": 0.8268969126111984, "grad_norm": 1.8424502804616814, "learning_rate": 1.5309550543424612e-06, "loss": 0.7417, "step": 7901 }, { "epoch": 0.8270015698587128, "grad_norm": 1.9163642835084853, "learning_rate": 1.5291530740781923e-06, "loss": 0.751, "step": 7902 }, { "epoch": 0.827106227106227, "grad_norm": 1.87018617560281, "learning_rate": 1.5273520671314113e-06, "loss": 0.8395, "step": 7903 }, { "epoch": 0.8272108843537415, "grad_norm": 1.9411831340122883, "learning_rate": 1.5255520337090534e-06, "loss": 0.8028, "step": 7904 }, { "epoch": 0.8273155416012559, "grad_norm": 2.4058082707488113, "learning_rate": 1.5237529740179436e-06, "loss": 0.9005, "step": 7905 }, { "epoch": 0.8274201988487703, "grad_norm": 2.1409745262541287, "learning_rate": 1.5219548882648027e-06, "loss": 0.9353, "step": 7906 }, { "epoch": 0.8275248560962847, "grad_norm": 1.9577882525040946, "learning_rate": 1.5201577766562314e-06, "loss": 0.8448, "step": 7907 }, { "epoch": 0.8276295133437991, "grad_norm": 2.0318542534635275, "learning_rate": 1.5183616393987232e-06, "loss": 0.8689, "step": 7908 }, { "epoch": 0.8277341705913135, "grad_norm": 2.301362720034534, "learning_rate": 1.5165664766986533e-06, "loss": 0.9667, "step": 7909 }, { "epoch": 0.8278388278388278, "grad_norm": 1.886895169603913, "learning_rate": 1.5147722887622961e-06, "loss": 0.8658, "step": 7910 }, { "epoch": 0.8279434850863422, "grad_norm": 2.46318780324625, "learning_rate": 1.5129790757958008e-06, "loss": 0.8375, "step": 7911 }, { "epoch": 0.8280481423338566, "grad_norm": 1.8771844183637372, "learning_rate": 1.5111868380052185e-06, "loss": 0.7701, "step": 7912 }, { "epoch": 0.828152799581371, "grad_norm": 2.070606556582261, "learning_rate": 1.5093955755964762e-06, "loss": 1.0035, "step": 7913 }, { "epoch": 0.8282574568288854, "grad_norm": 2.201869626898473, "learning_rate": 1.5076052887753923e-06, "loss": 0.904, "step": 7914 }, { "epoch": 0.8283621140763998, "grad_norm": 1.996397707124482, "learning_rate": 1.5058159777476788e-06, "loss": 0.7816, "step": 7915 }, { "epoch": 0.8284667713239142, "grad_norm": 2.0077562325381133, "learning_rate": 1.5040276427189294e-06, "loss": 0.9121, "step": 7916 }, { "epoch": 0.8285714285714286, "grad_norm": 1.9941521295888824, "learning_rate": 1.502240283894626e-06, "loss": 0.8295, "step": 7917 }, { "epoch": 0.8286760858189429, "grad_norm": 1.9915709085478066, "learning_rate": 1.500453901480139e-06, "loss": 0.8647, "step": 7918 }, { "epoch": 0.8287807430664573, "grad_norm": 2.0000071542196145, "learning_rate": 1.498668495680732e-06, "loss": 0.9201, "step": 7919 }, { "epoch": 0.8288854003139717, "grad_norm": 1.9756711232215005, "learning_rate": 1.4968840667015462e-06, "loss": 0.8459, "step": 7920 }, { "epoch": 0.8289900575614861, "grad_norm": 1.8528329572690898, "learning_rate": 1.4951006147476211e-06, "loss": 0.7497, "step": 7921 }, { "epoch": 0.8290947148090005, "grad_norm": 1.782034124098557, "learning_rate": 1.493318140023876e-06, "loss": 0.8792, "step": 7922 }, { "epoch": 0.829199372056515, "grad_norm": 2.106613209839586, "learning_rate": 1.4915366427351219e-06, "loss": 0.7935, "step": 7923 }, { "epoch": 0.8293040293040294, "grad_norm": 2.1408753179094857, "learning_rate": 1.4897561230860536e-06, "loss": 1.0341, "step": 7924 }, { "epoch": 0.8294086865515437, "grad_norm": 2.1278800514184324, "learning_rate": 1.4879765812812607e-06, "loss": 0.8901, "step": 7925 }, { "epoch": 0.8295133437990581, "grad_norm": 2.1440190302189714, "learning_rate": 1.4861980175252154e-06, "loss": 0.898, "step": 7926 }, { "epoch": 0.8296180010465725, "grad_norm": 2.0503918898516384, "learning_rate": 1.4844204320222767e-06, "loss": 0.8825, "step": 7927 }, { "epoch": 0.8297226582940869, "grad_norm": 2.353937197043025, "learning_rate": 1.4826438249766906e-06, "loss": 0.8895, "step": 7928 }, { "epoch": 0.8298273155416013, "grad_norm": 2.2495600621190586, "learning_rate": 1.4808681965925954e-06, "loss": 0.9815, "step": 7929 }, { "epoch": 0.8299319727891157, "grad_norm": 2.1503657595480843, "learning_rate": 1.4790935470740175e-06, "loss": 0.9053, "step": 7930 }, { "epoch": 0.8300366300366301, "grad_norm": 1.8861867201869633, "learning_rate": 1.4773198766248642e-06, "loss": 0.8636, "step": 7931 }, { "epoch": 0.8301412872841444, "grad_norm": 2.1460191083705906, "learning_rate": 1.4755471854489345e-06, "loss": 1.0363, "step": 7932 }, { "epoch": 0.8302459445316588, "grad_norm": 2.493276713397057, "learning_rate": 1.4737754737499111e-06, "loss": 0.8972, "step": 7933 }, { "epoch": 0.8303506017791732, "grad_norm": 1.6858625797981937, "learning_rate": 1.4720047417313731e-06, "loss": 0.7886, "step": 7934 }, { "epoch": 0.8304552590266876, "grad_norm": 1.8962906371419326, "learning_rate": 1.4702349895967782e-06, "loss": 0.9196, "step": 7935 }, { "epoch": 0.830559916274202, "grad_norm": 1.9184229473202252, "learning_rate": 1.4684662175494747e-06, "loss": 0.8052, "step": 7936 }, { "epoch": 0.8306645735217164, "grad_norm": 2.137568048466092, "learning_rate": 1.4666984257926964e-06, "loss": 0.7906, "step": 7937 }, { "epoch": 0.8307692307692308, "grad_norm": 1.5889031763544768, "learning_rate": 1.4649316145295668e-06, "loss": 0.7409, "step": 7938 }, { "epoch": 0.8308738880167451, "grad_norm": 1.8404031630051605, "learning_rate": 1.4631657839630996e-06, "loss": 0.7866, "step": 7939 }, { "epoch": 0.8309785452642595, "grad_norm": 2.286617894950235, "learning_rate": 1.46140093429619e-06, "loss": 0.8095, "step": 7940 }, { "epoch": 0.8310832025117739, "grad_norm": 2.05496477949122, "learning_rate": 1.4596370657316229e-06, "loss": 0.9575, "step": 7941 }, { "epoch": 0.8311878597592883, "grad_norm": 2.427977961236888, "learning_rate": 1.4578741784720697e-06, "loss": 0.876, "step": 7942 }, { "epoch": 0.8312925170068027, "grad_norm": 2.227236132950701, "learning_rate": 1.4561122727200872e-06, "loss": 0.7827, "step": 7943 }, { "epoch": 0.8313971742543171, "grad_norm": 1.8480415224847573, "learning_rate": 1.454351348678128e-06, "loss": 0.9043, "step": 7944 }, { "epoch": 0.8315018315018315, "grad_norm": 2.152502773769006, "learning_rate": 1.4525914065485225e-06, "loss": 0.824, "step": 7945 }, { "epoch": 0.8316064887493458, "grad_norm": 1.9448080461974455, "learning_rate": 1.4508324465334888e-06, "loss": 0.8931, "step": 7946 }, { "epoch": 0.8317111459968602, "grad_norm": 2.0868231871819733, "learning_rate": 1.449074468835141e-06, "loss": 0.8808, "step": 7947 }, { "epoch": 0.8318158032443747, "grad_norm": 2.0813903646254786, "learning_rate": 1.4473174736554685e-06, "loss": 0.7902, "step": 7948 }, { "epoch": 0.8319204604918891, "grad_norm": 2.123099741849126, "learning_rate": 1.4455614611963599e-06, "loss": 0.8902, "step": 7949 }, { "epoch": 0.8320251177394035, "grad_norm": 1.9932955603081572, "learning_rate": 1.44380643165958e-06, "loss": 0.7715, "step": 7950 }, { "epoch": 0.8321297749869179, "grad_norm": 2.068162191946537, "learning_rate": 1.4420523852467861e-06, "loss": 0.8567, "step": 7951 }, { "epoch": 0.8322344322344323, "grad_norm": 1.9814333811554212, "learning_rate": 1.4402993221595197e-06, "loss": 0.8771, "step": 7952 }, { "epoch": 0.8323390894819466, "grad_norm": 2.2197393374158936, "learning_rate": 1.4385472425992164e-06, "loss": 0.8788, "step": 7953 }, { "epoch": 0.832443746729461, "grad_norm": 2.375333340064998, "learning_rate": 1.4367961467671875e-06, "loss": 0.9018, "step": 7954 }, { "epoch": 0.8325484039769754, "grad_norm": 1.9216141945810121, "learning_rate": 1.4350460348646444e-06, "loss": 0.8605, "step": 7955 }, { "epoch": 0.8326530612244898, "grad_norm": 2.398938782927487, "learning_rate": 1.4332969070926739e-06, "loss": 0.8632, "step": 7956 }, { "epoch": 0.8327577184720042, "grad_norm": 2.0555051893772056, "learning_rate": 1.431548763652253e-06, "loss": 0.9676, "step": 7957 }, { "epoch": 0.8328623757195186, "grad_norm": 2.1581306202378276, "learning_rate": 1.429801604744252e-06, "loss": 0.9032, "step": 7958 }, { "epoch": 0.832967032967033, "grad_norm": 1.897752103573272, "learning_rate": 1.4280554305694205e-06, "loss": 0.8139, "step": 7959 }, { "epoch": 0.8330716902145474, "grad_norm": 1.9164586231571057, "learning_rate": 1.426310241328397e-06, "loss": 0.794, "step": 7960 }, { "epoch": 0.8331763474620617, "grad_norm": 2.1626145846140323, "learning_rate": 1.4245660372217062e-06, "loss": 0.9293, "step": 7961 }, { "epoch": 0.8332810047095761, "grad_norm": 2.1783332161398836, "learning_rate": 1.4228228184497607e-06, "loss": 0.8771, "step": 7962 }, { "epoch": 0.8333856619570905, "grad_norm": 2.0795885143306276, "learning_rate": 1.4210805852128617e-06, "loss": 0.8529, "step": 7963 }, { "epoch": 0.8334903192046049, "grad_norm": 2.5330157272730602, "learning_rate": 1.4193393377111964e-06, "loss": 0.8425, "step": 7964 }, { "epoch": 0.8335949764521193, "grad_norm": 1.8622801123769794, "learning_rate": 1.4175990761448355e-06, "loss": 0.8042, "step": 7965 }, { "epoch": 0.8336996336996337, "grad_norm": 1.8796275243042533, "learning_rate": 1.4158598007137391e-06, "loss": 0.7952, "step": 7966 }, { "epoch": 0.8338042909471481, "grad_norm": 2.0693042166079056, "learning_rate": 1.414121511617752e-06, "loss": 0.8121, "step": 7967 }, { "epoch": 0.8339089481946624, "grad_norm": 2.1022638373991014, "learning_rate": 1.4123842090566108e-06, "loss": 0.915, "step": 7968 }, { "epoch": 0.8340136054421768, "grad_norm": 1.993990099910452, "learning_rate": 1.4106478932299316e-06, "loss": 0.8766, "step": 7969 }, { "epoch": 0.8341182626896912, "grad_norm": 1.7756065930189895, "learning_rate": 1.4089125643372237e-06, "loss": 0.777, "step": 7970 }, { "epoch": 0.8342229199372057, "grad_norm": 2.5496490825925906, "learning_rate": 1.4071782225778751e-06, "loss": 0.8664, "step": 7971 }, { "epoch": 0.8343275771847201, "grad_norm": 2.2433472242170396, "learning_rate": 1.405444868151168e-06, "loss": 0.9331, "step": 7972 }, { "epoch": 0.8344322344322345, "grad_norm": 1.7433595951078031, "learning_rate": 1.4037125012562702e-06, "loss": 0.7727, "step": 7973 }, { "epoch": 0.8345368916797489, "grad_norm": 1.8722606546409677, "learning_rate": 1.401981122092233e-06, "loss": 0.9949, "step": 7974 }, { "epoch": 0.8346415489272632, "grad_norm": 2.019791859336298, "learning_rate": 1.400250730857996e-06, "loss": 0.8875, "step": 7975 }, { "epoch": 0.8347462061747776, "grad_norm": 1.984713578640695, "learning_rate": 1.3985213277523802e-06, "loss": 0.8334, "step": 7976 }, { "epoch": 0.834850863422292, "grad_norm": 1.8860559224768174, "learning_rate": 1.3967929129741032e-06, "loss": 0.8732, "step": 7977 }, { "epoch": 0.8349555206698064, "grad_norm": 2.4921343619909906, "learning_rate": 1.3950654867217605e-06, "loss": 0.8214, "step": 7978 }, { "epoch": 0.8350601779173208, "grad_norm": 2.6451975469839133, "learning_rate": 1.3933390491938381e-06, "loss": 1.0114, "step": 7979 }, { "epoch": 0.8351648351648352, "grad_norm": 2.345016840340204, "learning_rate": 1.3916136005887048e-06, "loss": 0.8941, "step": 7980 }, { "epoch": 0.8352694924123496, "grad_norm": 2.414100792833315, "learning_rate": 1.3898891411046212e-06, "loss": 0.8546, "step": 7981 }, { "epoch": 0.8353741496598639, "grad_norm": 1.9845989400524062, "learning_rate": 1.3881656709397272e-06, "loss": 0.8789, "step": 7982 }, { "epoch": 0.8354788069073783, "grad_norm": 1.7934306576299728, "learning_rate": 1.3864431902920584e-06, "loss": 0.8559, "step": 7983 }, { "epoch": 0.8355834641548927, "grad_norm": 1.9810087701516985, "learning_rate": 1.3847216993595292e-06, "loss": 0.7373, "step": 7984 }, { "epoch": 0.8356881214024071, "grad_norm": 1.84000027892072, "learning_rate": 1.3830011983399404e-06, "loss": 0.8423, "step": 7985 }, { "epoch": 0.8357927786499215, "grad_norm": 1.983947105295788, "learning_rate": 1.381281687430981e-06, "loss": 0.9642, "step": 7986 }, { "epoch": 0.8358974358974359, "grad_norm": 2.240241092713476, "learning_rate": 1.37956316683023e-06, "loss": 0.8095, "step": 7987 }, { "epoch": 0.8360020931449503, "grad_norm": 2.328360525402851, "learning_rate": 1.3778456367351455e-06, "loss": 0.9569, "step": 7988 }, { "epoch": 0.8361067503924646, "grad_norm": 2.290792673993712, "learning_rate": 1.3761290973430753e-06, "loss": 0.8242, "step": 7989 }, { "epoch": 0.836211407639979, "grad_norm": 1.9941656847351477, "learning_rate": 1.3744135488512556e-06, "loss": 0.9385, "step": 7990 }, { "epoch": 0.8363160648874934, "grad_norm": 1.9735468832454361, "learning_rate": 1.3726989914568034e-06, "loss": 0.8731, "step": 7991 }, { "epoch": 0.8364207221350078, "grad_norm": 1.9837606343238872, "learning_rate": 1.3709854253567278e-06, "loss": 0.8264, "step": 7992 }, { "epoch": 0.8365253793825222, "grad_norm": 1.9088882775380245, "learning_rate": 1.3692728507479214e-06, "loss": 0.8842, "step": 7993 }, { "epoch": 0.8366300366300367, "grad_norm": 2.119436328203735, "learning_rate": 1.3675612678271588e-06, "loss": 0.906, "step": 7994 }, { "epoch": 0.8367346938775511, "grad_norm": 2.407719091887164, "learning_rate": 1.365850676791105e-06, "loss": 0.8475, "step": 7995 }, { "epoch": 0.8368393511250654, "grad_norm": 2.103481136150311, "learning_rate": 1.364141077836314e-06, "loss": 0.8192, "step": 7996 }, { "epoch": 0.8369440083725798, "grad_norm": 2.1854323710686114, "learning_rate": 1.3624324711592196e-06, "loss": 0.8484, "step": 7997 }, { "epoch": 0.8370486656200942, "grad_norm": 2.1386964506824895, "learning_rate": 1.3607248569561426e-06, "loss": 0.8292, "step": 7998 }, { "epoch": 0.8371533228676086, "grad_norm": 2.115583413702666, "learning_rate": 1.359018235423295e-06, "loss": 0.8606, "step": 7999 }, { "epoch": 0.837257980115123, "grad_norm": 2.238976220893437, "learning_rate": 1.3573126067567688e-06, "loss": 0.8858, "step": 8000 }, { "epoch": 0.8373626373626374, "grad_norm": 2.049600921572707, "learning_rate": 1.3556079711525439e-06, "loss": 0.8915, "step": 8001 }, { "epoch": 0.8374672946101518, "grad_norm": 1.8171596729987456, "learning_rate": 1.353904328806488e-06, "loss": 0.8115, "step": 8002 }, { "epoch": 0.8375719518576662, "grad_norm": 2.399384125655146, "learning_rate": 1.3522016799143522e-06, "loss": 0.9062, "step": 8003 }, { "epoch": 0.8376766091051805, "grad_norm": 1.7533273781629497, "learning_rate": 1.3505000246717747e-06, "loss": 0.8903, "step": 8004 }, { "epoch": 0.8377812663526949, "grad_norm": 1.9579932469356014, "learning_rate": 1.348799363274277e-06, "loss": 0.7385, "step": 8005 }, { "epoch": 0.8378859236002093, "grad_norm": 1.981901250159665, "learning_rate": 1.34709969591727e-06, "loss": 0.8783, "step": 8006 }, { "epoch": 0.8379905808477237, "grad_norm": 2.167993399995397, "learning_rate": 1.3454010227960513e-06, "loss": 0.9084, "step": 8007 }, { "epoch": 0.8380952380952381, "grad_norm": 1.9178702073749012, "learning_rate": 1.3437033441057989e-06, "loss": 0.8963, "step": 8008 }, { "epoch": 0.8381998953427525, "grad_norm": 1.951517186745851, "learning_rate": 1.3420066600415815e-06, "loss": 0.8355, "step": 8009 }, { "epoch": 0.8383045525902669, "grad_norm": 1.909486949227223, "learning_rate": 1.3403109707983475e-06, "loss": 0.8665, "step": 8010 }, { "epoch": 0.8384092098377812, "grad_norm": 1.9369839312160841, "learning_rate": 1.338616276570941e-06, "loss": 0.8397, "step": 8011 }, { "epoch": 0.8385138670852956, "grad_norm": 1.8773973941438988, "learning_rate": 1.336922577554083e-06, "loss": 0.8907, "step": 8012 }, { "epoch": 0.83861852433281, "grad_norm": 2.116860394094237, "learning_rate": 1.335229873942382e-06, "loss": 0.8874, "step": 8013 }, { "epoch": 0.8387231815803244, "grad_norm": 2.170506935277406, "learning_rate": 1.3335381659303315e-06, "loss": 0.9886, "step": 8014 }, { "epoch": 0.8388278388278388, "grad_norm": 1.8806562248296783, "learning_rate": 1.3318474537123138e-06, "loss": 0.7677, "step": 8015 }, { "epoch": 0.8389324960753533, "grad_norm": 2.407172290628666, "learning_rate": 1.3301577374825992e-06, "loss": 0.7599, "step": 8016 }, { "epoch": 0.8390371533228677, "grad_norm": 3.184412100368414, "learning_rate": 1.3284690174353364e-06, "loss": 0.8256, "step": 8017 }, { "epoch": 0.839141810570382, "grad_norm": 1.7731206492806486, "learning_rate": 1.326781293764562e-06, "loss": 0.7793, "step": 8018 }, { "epoch": 0.8392464678178964, "grad_norm": 1.9962901080318212, "learning_rate": 1.325094566664199e-06, "loss": 0.8381, "step": 8019 }, { "epoch": 0.8393511250654108, "grad_norm": 2.0858347365200736, "learning_rate": 1.323408836328054e-06, "loss": 0.8617, "step": 8020 }, { "epoch": 0.8394557823129252, "grad_norm": 1.941127370534017, "learning_rate": 1.3217241029498263e-06, "loss": 0.8268, "step": 8021 }, { "epoch": 0.8395604395604396, "grad_norm": 1.9438795971577278, "learning_rate": 1.320040366723091e-06, "loss": 0.7854, "step": 8022 }, { "epoch": 0.839665096807954, "grad_norm": 2.0873160477402175, "learning_rate": 1.3183576278413134e-06, "loss": 0.8417, "step": 8023 }, { "epoch": 0.8397697540554684, "grad_norm": 1.7491945690304727, "learning_rate": 1.3166758864978424e-06, "loss": 0.8151, "step": 8024 }, { "epoch": 0.8398744113029827, "grad_norm": 1.9769300150442357, "learning_rate": 1.3149951428859142e-06, "loss": 0.8788, "step": 8025 }, { "epoch": 0.8399790685504971, "grad_norm": 2.140498082088263, "learning_rate": 1.3133153971986534e-06, "loss": 0.7433, "step": 8026 }, { "epoch": 0.8400837257980115, "grad_norm": 2.4165371301116045, "learning_rate": 1.3116366496290633e-06, "loss": 0.8785, "step": 8027 }, { "epoch": 0.8401883830455259, "grad_norm": 1.9222172060976765, "learning_rate": 1.3099589003700341e-06, "loss": 0.9783, "step": 8028 }, { "epoch": 0.8402930402930403, "grad_norm": 2.3781849035951943, "learning_rate": 1.3082821496143428e-06, "loss": 0.7902, "step": 8029 }, { "epoch": 0.8403976975405547, "grad_norm": 2.0993772636604304, "learning_rate": 1.3066063975546538e-06, "loss": 0.9355, "step": 8030 }, { "epoch": 0.8405023547880691, "grad_norm": 2.1721127058441674, "learning_rate": 1.3049316443835137e-06, "loss": 0.7599, "step": 8031 }, { "epoch": 0.8406070120355834, "grad_norm": 2.531892795002363, "learning_rate": 1.3032578902933546e-06, "loss": 0.8561, "step": 8032 }, { "epoch": 0.8407116692830978, "grad_norm": 2.2300704327515004, "learning_rate": 1.301585135476492e-06, "loss": 0.8676, "step": 8033 }, { "epoch": 0.8408163265306122, "grad_norm": 2.1993303961757826, "learning_rate": 1.2999133801251295e-06, "loss": 0.8671, "step": 8034 }, { "epoch": 0.8409209837781266, "grad_norm": 2.154741893990914, "learning_rate": 1.2982426244313595e-06, "loss": 0.8462, "step": 8035 }, { "epoch": 0.841025641025641, "grad_norm": 2.129202801530897, "learning_rate": 1.2965728685871525e-06, "loss": 1.0065, "step": 8036 }, { "epoch": 0.8411302982731554, "grad_norm": 1.8293468950695384, "learning_rate": 1.2949041127843665e-06, "loss": 0.8104, "step": 8037 }, { "epoch": 0.8412349555206698, "grad_norm": 1.907250964658945, "learning_rate": 1.2932363572147445e-06, "loss": 0.8117, "step": 8038 }, { "epoch": 0.8413396127681841, "grad_norm": 2.2639632750436793, "learning_rate": 1.2915696020699142e-06, "loss": 0.8617, "step": 8039 }, { "epoch": 0.8414442700156985, "grad_norm": 2.308249827742698, "learning_rate": 1.2899038475413917e-06, "loss": 0.8622, "step": 8040 }, { "epoch": 0.841548927263213, "grad_norm": 2.227723639811261, "learning_rate": 1.2882390938205725e-06, "loss": 0.9042, "step": 8041 }, { "epoch": 0.8416535845107274, "grad_norm": 2.1043886188970413, "learning_rate": 1.2865753410987447e-06, "loss": 0.9361, "step": 8042 }, { "epoch": 0.8417582417582418, "grad_norm": 1.845500363111109, "learning_rate": 1.2849125895670733e-06, "loss": 0.8998, "step": 8043 }, { "epoch": 0.8418628990057562, "grad_norm": 2.042113662517905, "learning_rate": 1.283250839416611e-06, "loss": 0.8967, "step": 8044 }, { "epoch": 0.8419675562532706, "grad_norm": 2.5260072411582986, "learning_rate": 1.2815900908383004e-06, "loss": 0.8395, "step": 8045 }, { "epoch": 0.842072213500785, "grad_norm": 1.569850831580542, "learning_rate": 1.2799303440229616e-06, "loss": 0.764, "step": 8046 }, { "epoch": 0.8421768707482993, "grad_norm": 2.164867452931564, "learning_rate": 1.2782715991613048e-06, "loss": 0.8424, "step": 8047 }, { "epoch": 0.8422815279958137, "grad_norm": 1.713908988310335, "learning_rate": 1.276613856443919e-06, "loss": 0.8094, "step": 8048 }, { "epoch": 0.8423861852433281, "grad_norm": 1.9683438158526272, "learning_rate": 1.2749571160612872e-06, "loss": 0.8499, "step": 8049 }, { "epoch": 0.8424908424908425, "grad_norm": 2.9203629814106002, "learning_rate": 1.2733013782037695e-06, "loss": 0.8889, "step": 8050 }, { "epoch": 0.8425954997383569, "grad_norm": 1.8540479952789417, "learning_rate": 1.2716466430616148e-06, "loss": 0.8076, "step": 8051 }, { "epoch": 0.8427001569858713, "grad_norm": 1.9814166025936952, "learning_rate": 1.2699929108249565e-06, "loss": 0.8634, "step": 8052 }, { "epoch": 0.8428048142333857, "grad_norm": 1.9793401850173653, "learning_rate": 1.2683401816838071e-06, "loss": 0.8196, "step": 8053 }, { "epoch": 0.8429094714809, "grad_norm": 2.267300532522284, "learning_rate": 1.2666884558280745e-06, "loss": 0.7867, "step": 8054 }, { "epoch": 0.8430141287284144, "grad_norm": 1.789887206030474, "learning_rate": 1.2650377334475416e-06, "loss": 0.7908, "step": 8055 }, { "epoch": 0.8431187859759288, "grad_norm": 1.881345825048694, "learning_rate": 1.2633880147318822e-06, "loss": 0.8574, "step": 8056 }, { "epoch": 0.8432234432234432, "grad_norm": 1.9203037592773267, "learning_rate": 1.2617392998706502e-06, "loss": 0.7534, "step": 8057 }, { "epoch": 0.8433281004709576, "grad_norm": 2.2154089371004573, "learning_rate": 1.260091589053284e-06, "loss": 0.8416, "step": 8058 }, { "epoch": 0.843432757718472, "grad_norm": 2.1555356685174454, "learning_rate": 1.2584448824691132e-06, "loss": 0.8896, "step": 8059 }, { "epoch": 0.8435374149659864, "grad_norm": 2.081590502046218, "learning_rate": 1.2567991803073476e-06, "loss": 0.8501, "step": 8060 }, { "epoch": 0.8436420722135007, "grad_norm": 2.1329378594631203, "learning_rate": 1.2551544827570805e-06, "loss": 0.8398, "step": 8061 }, { "epoch": 0.8437467294610151, "grad_norm": 2.037326064879042, "learning_rate": 1.2535107900072918e-06, "loss": 0.9452, "step": 8062 }, { "epoch": 0.8438513867085295, "grad_norm": 1.8536690182768223, "learning_rate": 1.2518681022468414e-06, "loss": 0.841, "step": 8063 }, { "epoch": 0.843956043956044, "grad_norm": 2.418709398888962, "learning_rate": 1.250226419664483e-06, "loss": 0.8493, "step": 8064 }, { "epoch": 0.8440607012035584, "grad_norm": 1.9982124764464644, "learning_rate": 1.2485857424488457e-06, "loss": 0.9066, "step": 8065 }, { "epoch": 0.8441653584510728, "grad_norm": 1.826129673099164, "learning_rate": 1.2469460707884485e-06, "loss": 0.7961, "step": 8066 }, { "epoch": 0.8442700156985872, "grad_norm": 2.0052685371560126, "learning_rate": 1.2453074048716896e-06, "loss": 0.9495, "step": 8067 }, { "epoch": 0.8443746729461015, "grad_norm": 2.4774578028328977, "learning_rate": 1.2436697448868572e-06, "loss": 0.8768, "step": 8068 }, { "epoch": 0.8444793301936159, "grad_norm": 1.9032740646595938, "learning_rate": 1.2420330910221245e-06, "loss": 0.9143, "step": 8069 }, { "epoch": 0.8445839874411303, "grad_norm": 2.1131379226433524, "learning_rate": 1.240397443465544e-06, "loss": 0.8795, "step": 8070 }, { "epoch": 0.8446886446886447, "grad_norm": 2.037973685230345, "learning_rate": 1.2387628024050557e-06, "loss": 0.8836, "step": 8071 }, { "epoch": 0.8447933019361591, "grad_norm": 1.8857034384869027, "learning_rate": 1.237129168028479e-06, "loss": 0.8511, "step": 8072 }, { "epoch": 0.8448979591836735, "grad_norm": 2.0457364245538807, "learning_rate": 1.2354965405235276e-06, "loss": 0.9149, "step": 8073 }, { "epoch": 0.8450026164311879, "grad_norm": 2.1833956563172103, "learning_rate": 1.2338649200777908e-06, "loss": 0.7978, "step": 8074 }, { "epoch": 0.8451072736787022, "grad_norm": 1.95060154540977, "learning_rate": 1.2322343068787456e-06, "loss": 0.8084, "step": 8075 }, { "epoch": 0.8452119309262166, "grad_norm": 2.051054237471809, "learning_rate": 1.23060470111375e-06, "loss": 0.8513, "step": 8076 }, { "epoch": 0.845316588173731, "grad_norm": 1.9270136243745513, "learning_rate": 1.2289761029700553e-06, "loss": 0.8409, "step": 8077 }, { "epoch": 0.8454212454212454, "grad_norm": 2.2645480935646702, "learning_rate": 1.227348512634784e-06, "loss": 0.9181, "step": 8078 }, { "epoch": 0.8455259026687598, "grad_norm": 2.2076948615617185, "learning_rate": 1.2257219302949541e-06, "loss": 0.9305, "step": 8079 }, { "epoch": 0.8456305599162742, "grad_norm": 2.511590266125373, "learning_rate": 1.224096356137463e-06, "loss": 0.911, "step": 8080 }, { "epoch": 0.8457352171637886, "grad_norm": 1.9894212656748564, "learning_rate": 1.2224717903490901e-06, "loss": 0.8961, "step": 8081 }, { "epoch": 0.8458398744113029, "grad_norm": 2.2954720374673028, "learning_rate": 1.2208482331165007e-06, "loss": 0.8985, "step": 8082 }, { "epoch": 0.8459445316588173, "grad_norm": 1.9884523540654926, "learning_rate": 1.2192256846262484e-06, "loss": 0.8576, "step": 8083 }, { "epoch": 0.8460491889063317, "grad_norm": 1.6017827918804268, "learning_rate": 1.2176041450647657e-06, "loss": 0.7318, "step": 8084 }, { "epoch": 0.8461538461538461, "grad_norm": 2.191831478479857, "learning_rate": 1.215983614618369e-06, "loss": 0.8629, "step": 8085 }, { "epoch": 0.8462585034013606, "grad_norm": 1.9686856066340168, "learning_rate": 1.2143640934732636e-06, "loss": 0.7544, "step": 8086 }, { "epoch": 0.846363160648875, "grad_norm": 2.15328965743791, "learning_rate": 1.2127455818155321e-06, "loss": 0.9058, "step": 8087 }, { "epoch": 0.8464678178963894, "grad_norm": 2.0301113009573046, "learning_rate": 1.2111280798311486e-06, "loss": 0.8852, "step": 8088 }, { "epoch": 0.8465724751439038, "grad_norm": 2.124293788106227, "learning_rate": 1.209511587705966e-06, "loss": 0.825, "step": 8089 }, { "epoch": 0.8466771323914181, "grad_norm": 1.8508848742570292, "learning_rate": 1.2078961056257222e-06, "loss": 0.8428, "step": 8090 }, { "epoch": 0.8467817896389325, "grad_norm": 2.1311794047736337, "learning_rate": 1.2062816337760374e-06, "loss": 0.9301, "step": 8091 }, { "epoch": 0.8468864468864469, "grad_norm": 2.104243985600498, "learning_rate": 1.2046681723424214e-06, "loss": 0.872, "step": 8092 }, { "epoch": 0.8469911041339613, "grad_norm": 2.130616894657293, "learning_rate": 1.20305572151026e-06, "loss": 0.7725, "step": 8093 }, { "epoch": 0.8470957613814757, "grad_norm": 1.9408014352741592, "learning_rate": 1.2014442814648318e-06, "loss": 0.9378, "step": 8094 }, { "epoch": 0.8472004186289901, "grad_norm": 2.430277353641727, "learning_rate": 1.1998338523912923e-06, "loss": 0.8954, "step": 8095 }, { "epoch": 0.8473050758765045, "grad_norm": 2.147447871976377, "learning_rate": 1.198224434474683e-06, "loss": 0.9456, "step": 8096 }, { "epoch": 0.8474097331240188, "grad_norm": 2.150148583677661, "learning_rate": 1.1966160278999273e-06, "loss": 0.845, "step": 8097 }, { "epoch": 0.8475143903715332, "grad_norm": 1.8999522656749204, "learning_rate": 1.1950086328518383e-06, "loss": 0.9598, "step": 8098 }, { "epoch": 0.8476190476190476, "grad_norm": 2.22762433278561, "learning_rate": 1.1934022495151064e-06, "loss": 0.9627, "step": 8099 }, { "epoch": 0.847723704866562, "grad_norm": 1.9018640415303447, "learning_rate": 1.1917968780743094e-06, "loss": 0.9066, "step": 8100 }, { "epoch": 0.8478283621140764, "grad_norm": 2.2146699983704914, "learning_rate": 1.1901925187139052e-06, "loss": 0.8584, "step": 8101 }, { "epoch": 0.8479330193615908, "grad_norm": 2.0599972177345305, "learning_rate": 1.1885891716182395e-06, "loss": 0.8889, "step": 8102 }, { "epoch": 0.8480376766091052, "grad_norm": 2.21986430951051, "learning_rate": 1.1869868369715431e-06, "loss": 0.9083, "step": 8103 }, { "epoch": 0.8481423338566195, "grad_norm": 2.1417810174460854, "learning_rate": 1.1853855149579251e-06, "loss": 0.8543, "step": 8104 }, { "epoch": 0.8482469911041339, "grad_norm": 1.7461793353367485, "learning_rate": 1.1837852057613808e-06, "loss": 0.7139, "step": 8105 }, { "epoch": 0.8483516483516483, "grad_norm": 1.80563643788023, "learning_rate": 1.182185909565785e-06, "loss": 0.8036, "step": 8106 }, { "epoch": 0.8484563055991627, "grad_norm": 2.3503816379233844, "learning_rate": 1.1805876265549076e-06, "loss": 0.9276, "step": 8107 }, { "epoch": 0.8485609628466771, "grad_norm": 2.4489921440588605, "learning_rate": 1.1789903569123896e-06, "loss": 0.9568, "step": 8108 }, { "epoch": 0.8486656200941916, "grad_norm": 1.9034330562968986, "learning_rate": 1.1773941008217627e-06, "loss": 0.8873, "step": 8109 }, { "epoch": 0.848770277341706, "grad_norm": 2.2375424098728205, "learning_rate": 1.175798858466436e-06, "loss": 0.8962, "step": 8110 }, { "epoch": 0.8488749345892203, "grad_norm": 2.0427220355559004, "learning_rate": 1.1742046300297084e-06, "loss": 0.8506, "step": 8111 }, { "epoch": 0.8489795918367347, "grad_norm": 1.9535985727245424, "learning_rate": 1.1726114156947644e-06, "loss": 0.7011, "step": 8112 }, { "epoch": 0.8490842490842491, "grad_norm": 1.9013330814423128, "learning_rate": 1.171019215644662e-06, "loss": 0.8496, "step": 8113 }, { "epoch": 0.8491889063317635, "grad_norm": 1.8885673787395196, "learning_rate": 1.1694280300623505e-06, "loss": 0.9879, "step": 8114 }, { "epoch": 0.8492935635792779, "grad_norm": 1.7156423861186427, "learning_rate": 1.1678378591306604e-06, "loss": 0.812, "step": 8115 }, { "epoch": 0.8493982208267923, "grad_norm": 2.1750963180245293, "learning_rate": 1.1662487030323021e-06, "loss": 0.9194, "step": 8116 }, { "epoch": 0.8495028780743067, "grad_norm": 2.0315617464728604, "learning_rate": 1.1646605619498784e-06, "loss": 0.8036, "step": 8117 }, { "epoch": 0.849607535321821, "grad_norm": 2.1841952937990325, "learning_rate": 1.1630734360658669e-06, "loss": 0.8595, "step": 8118 }, { "epoch": 0.8497121925693354, "grad_norm": 2.1088635653194627, "learning_rate": 1.161487325562629e-06, "loss": 0.7815, "step": 8119 }, { "epoch": 0.8498168498168498, "grad_norm": 2.058500389607447, "learning_rate": 1.1599022306224184e-06, "loss": 0.8606, "step": 8120 }, { "epoch": 0.8499215070643642, "grad_norm": 2.140038653165263, "learning_rate": 1.15831815142736e-06, "loss": 0.9239, "step": 8121 }, { "epoch": 0.8500261643118786, "grad_norm": 2.0778034483126575, "learning_rate": 1.1567350881594708e-06, "loss": 0.955, "step": 8122 }, { "epoch": 0.850130821559393, "grad_norm": 1.998286679619626, "learning_rate": 1.1551530410006484e-06, "loss": 0.9129, "step": 8123 }, { "epoch": 0.8502354788069074, "grad_norm": 1.5817030186016467, "learning_rate": 1.153572010132672e-06, "loss": 0.7925, "step": 8124 }, { "epoch": 0.8503401360544217, "grad_norm": 1.9794772310958975, "learning_rate": 1.1519919957372029e-06, "loss": 0.9053, "step": 8125 }, { "epoch": 0.8504447933019361, "grad_norm": 1.8803338170594943, "learning_rate": 1.1504129979957913e-06, "loss": 0.8949, "step": 8126 }, { "epoch": 0.8505494505494505, "grad_norm": 2.145083879121954, "learning_rate": 1.1488350170898676e-06, "loss": 0.9441, "step": 8127 }, { "epoch": 0.8506541077969649, "grad_norm": 1.864355430498479, "learning_rate": 1.1472580532007405e-06, "loss": 0.9112, "step": 8128 }, { "epoch": 0.8507587650444793, "grad_norm": 1.5284392423180004, "learning_rate": 1.1456821065096125e-06, "loss": 0.7273, "step": 8129 }, { "epoch": 0.8508634222919937, "grad_norm": 1.7736534908958845, "learning_rate": 1.1441071771975576e-06, "loss": 0.746, "step": 8130 }, { "epoch": 0.8509680795395081, "grad_norm": 2.0415742507935613, "learning_rate": 1.1425332654455422e-06, "loss": 0.7637, "step": 8131 }, { "epoch": 0.8510727367870226, "grad_norm": 1.9995682557341876, "learning_rate": 1.140960371434411e-06, "loss": 0.8595, "step": 8132 }, { "epoch": 0.8511773940345368, "grad_norm": 2.134115968088377, "learning_rate": 1.1393884953448931e-06, "loss": 0.9112, "step": 8133 }, { "epoch": 0.8512820512820513, "grad_norm": 2.0560620276562083, "learning_rate": 1.1378176373575977e-06, "loss": 0.8082, "step": 8134 }, { "epoch": 0.8513867085295657, "grad_norm": 1.951430060069111, "learning_rate": 1.1362477976530206e-06, "loss": 0.8975, "step": 8135 }, { "epoch": 0.8514913657770801, "grad_norm": 1.6765488705122318, "learning_rate": 1.1346789764115418e-06, "loss": 0.7827, "step": 8136 }, { "epoch": 0.8515960230245945, "grad_norm": 2.1566144334273014, "learning_rate": 1.1331111738134193e-06, "loss": 0.8666, "step": 8137 }, { "epoch": 0.8517006802721089, "grad_norm": 2.324273210903453, "learning_rate": 1.131544390038799e-06, "loss": 0.8484, "step": 8138 }, { "epoch": 0.8518053375196233, "grad_norm": 2.1363318732510352, "learning_rate": 1.129978625267707e-06, "loss": 0.8032, "step": 8139 }, { "epoch": 0.8519099947671376, "grad_norm": 1.873627689153686, "learning_rate": 1.1284138796800492e-06, "loss": 0.8898, "step": 8140 }, { "epoch": 0.852014652014652, "grad_norm": 2.2677845731395205, "learning_rate": 1.1268501534556242e-06, "loss": 0.8408, "step": 8141 }, { "epoch": 0.8521193092621664, "grad_norm": 2.0835248846493557, "learning_rate": 1.125287446774105e-06, "loss": 0.9684, "step": 8142 }, { "epoch": 0.8522239665096808, "grad_norm": 2.1770740624500657, "learning_rate": 1.1237257598150487e-06, "loss": 0.8597, "step": 8143 }, { "epoch": 0.8523286237571952, "grad_norm": 2.1071307255681284, "learning_rate": 1.1221650927578942e-06, "loss": 0.9521, "step": 8144 }, { "epoch": 0.8524332810047096, "grad_norm": 2.3515348488854944, "learning_rate": 1.1206054457819671e-06, "loss": 0.8748, "step": 8145 }, { "epoch": 0.852537938252224, "grad_norm": 2.228241072663553, "learning_rate": 1.1190468190664782e-06, "loss": 0.8862, "step": 8146 }, { "epoch": 0.8526425954997383, "grad_norm": 2.0454265798917985, "learning_rate": 1.1174892127905123e-06, "loss": 0.7851, "step": 8147 }, { "epoch": 0.8527472527472527, "grad_norm": 2.9906239949974824, "learning_rate": 1.115932627133043e-06, "loss": 0.9877, "step": 8148 }, { "epoch": 0.8528519099947671, "grad_norm": 1.7183578316341401, "learning_rate": 1.1143770622729232e-06, "loss": 0.8169, "step": 8149 }, { "epoch": 0.8529565672422815, "grad_norm": 1.9631303418317718, "learning_rate": 1.112822518388893e-06, "loss": 0.9437, "step": 8150 }, { "epoch": 0.8530612244897959, "grad_norm": 2.2478081957198883, "learning_rate": 1.1112689956595724e-06, "loss": 0.9049, "step": 8151 }, { "epoch": 0.8531658817373103, "grad_norm": 1.9907598676903082, "learning_rate": 1.109716494263463e-06, "loss": 0.7481, "step": 8152 }, { "epoch": 0.8532705389848247, "grad_norm": 2.092413746291313, "learning_rate": 1.1081650143789513e-06, "loss": 0.888, "step": 8153 }, { "epoch": 0.853375196232339, "grad_norm": 2.150281264392465, "learning_rate": 1.1066145561843033e-06, "loss": 0.9098, "step": 8154 }, { "epoch": 0.8534798534798534, "grad_norm": 1.7025890287640117, "learning_rate": 1.1050651198576713e-06, "loss": 0.7393, "step": 8155 }, { "epoch": 0.8535845107273679, "grad_norm": 2.165113850117995, "learning_rate": 1.1035167055770901e-06, "loss": 0.8349, "step": 8156 }, { "epoch": 0.8536891679748823, "grad_norm": 2.5762419911687826, "learning_rate": 1.1019693135204757e-06, "loss": 0.969, "step": 8157 }, { "epoch": 0.8537938252223967, "grad_norm": 1.9241185785842263, "learning_rate": 1.1004229438656245e-06, "loss": 0.7895, "step": 8158 }, { "epoch": 0.8538984824699111, "grad_norm": 2.028496874452248, "learning_rate": 1.0988775967902165e-06, "loss": 0.8903, "step": 8159 }, { "epoch": 0.8540031397174255, "grad_norm": 2.388468483206559, "learning_rate": 1.09733327247182e-06, "loss": 0.773, "step": 8160 }, { "epoch": 0.8541077969649398, "grad_norm": 1.941083291471243, "learning_rate": 1.0957899710878772e-06, "loss": 0.8528, "step": 8161 }, { "epoch": 0.8542124542124542, "grad_norm": 2.2068646875053877, "learning_rate": 1.0942476928157175e-06, "loss": 0.8462, "step": 8162 }, { "epoch": 0.8543171114599686, "grad_norm": 2.071496324715311, "learning_rate": 1.0927064378325503e-06, "loss": 0.9587, "step": 8163 }, { "epoch": 0.854421768707483, "grad_norm": 2.150573325542622, "learning_rate": 1.0911662063154694e-06, "loss": 0.9465, "step": 8164 }, { "epoch": 0.8545264259549974, "grad_norm": 1.7662897309607424, "learning_rate": 1.0896269984414543e-06, "loss": 0.8114, "step": 8165 }, { "epoch": 0.8546310832025118, "grad_norm": 1.7296411837745311, "learning_rate": 1.088088814387359e-06, "loss": 0.7644, "step": 8166 }, { "epoch": 0.8547357404500262, "grad_norm": 2.7077350662708257, "learning_rate": 1.086551654329927e-06, "loss": 0.6619, "step": 8167 }, { "epoch": 0.8548403976975405, "grad_norm": 1.7773722774962573, "learning_rate": 1.0850155184457767e-06, "loss": 0.768, "step": 8168 }, { "epoch": 0.8549450549450549, "grad_norm": 1.913533199941065, "learning_rate": 1.083480406911418e-06, "loss": 0.9215, "step": 8169 }, { "epoch": 0.8550497121925693, "grad_norm": 2.2851663792939894, "learning_rate": 1.0819463199032354e-06, "loss": 0.9138, "step": 8170 }, { "epoch": 0.8551543694400837, "grad_norm": 2.434539639739813, "learning_rate": 1.0804132575974979e-06, "loss": 0.7474, "step": 8171 }, { "epoch": 0.8552590266875981, "grad_norm": 2.220301148902414, "learning_rate": 1.0788812201703614e-06, "loss": 0.9146, "step": 8172 }, { "epoch": 0.8553636839351125, "grad_norm": 2.412245947837503, "learning_rate": 1.0773502077978571e-06, "loss": 0.9518, "step": 8173 }, { "epoch": 0.8554683411826269, "grad_norm": 2.0701141368028915, "learning_rate": 1.075820220655901e-06, "loss": 0.7442, "step": 8174 }, { "epoch": 0.8555729984301413, "grad_norm": 2.0366797846248668, "learning_rate": 1.0742912589202935e-06, "loss": 0.8856, "step": 8175 }, { "epoch": 0.8556776556776556, "grad_norm": 2.2237305192244743, "learning_rate": 1.0727633227667157e-06, "loss": 0.9214, "step": 8176 }, { "epoch": 0.85578231292517, "grad_norm": 1.9347821344648546, "learning_rate": 1.0712364123707287e-06, "loss": 0.9748, "step": 8177 }, { "epoch": 0.8558869701726844, "grad_norm": 2.0653013623994054, "learning_rate": 1.069710527907777e-06, "loss": 0.8951, "step": 8178 }, { "epoch": 0.8559916274201989, "grad_norm": 2.3214860069724947, "learning_rate": 1.068185669553191e-06, "loss": 0.9846, "step": 8179 }, { "epoch": 0.8560962846677133, "grad_norm": 2.1905071628526986, "learning_rate": 1.0666618374821759e-06, "loss": 0.7894, "step": 8180 }, { "epoch": 0.8562009419152277, "grad_norm": 2.1369361625201875, "learning_rate": 1.0651390318698285e-06, "loss": 0.9375, "step": 8181 }, { "epoch": 0.8563055991627421, "grad_norm": 2.1626356161328935, "learning_rate": 1.0636172528911182e-06, "loss": 0.9148, "step": 8182 }, { "epoch": 0.8564102564102564, "grad_norm": 2.085183578524157, "learning_rate": 1.0620965007208993e-06, "loss": 0.928, "step": 8183 }, { "epoch": 0.8565149136577708, "grad_norm": 1.7680046788843726, "learning_rate": 1.0605767755339147e-06, "loss": 0.7463, "step": 8184 }, { "epoch": 0.8566195709052852, "grad_norm": 1.9582799315088195, "learning_rate": 1.0590580775047798e-06, "loss": 0.9068, "step": 8185 }, { "epoch": 0.8567242281527996, "grad_norm": 2.0988281526131845, "learning_rate": 1.0575404068079965e-06, "loss": 0.8693, "step": 8186 }, { "epoch": 0.856828885400314, "grad_norm": 2.192703067041728, "learning_rate": 1.0560237636179493e-06, "loss": 0.8442, "step": 8187 }, { "epoch": 0.8569335426478284, "grad_norm": 1.8377811369460275, "learning_rate": 1.0545081481089015e-06, "loss": 0.853, "step": 8188 }, { "epoch": 0.8570381998953428, "grad_norm": 1.9277235332711045, "learning_rate": 1.0529935604550012e-06, "loss": 0.8852, "step": 8189 }, { "epoch": 0.8571428571428571, "grad_norm": 1.8127579290454636, "learning_rate": 1.0514800008302806e-06, "loss": 0.7326, "step": 8190 }, { "epoch": 0.8572475143903715, "grad_norm": 2.4923220960574306, "learning_rate": 1.049967469408648e-06, "loss": 1.0099, "step": 8191 }, { "epoch": 0.8573521716378859, "grad_norm": 2.291069167052195, "learning_rate": 1.0484559663638971e-06, "loss": 0.8967, "step": 8192 }, { "epoch": 0.8574568288854003, "grad_norm": 2.1378655909099367, "learning_rate": 1.0469454918696998e-06, "loss": 0.9139, "step": 8193 }, { "epoch": 0.8575614861329147, "grad_norm": 2.0113703844240303, "learning_rate": 1.0454360460996181e-06, "loss": 0.8301, "step": 8194 }, { "epoch": 0.8576661433804291, "grad_norm": 2.1271770785333066, "learning_rate": 1.0439276292270872e-06, "loss": 0.812, "step": 8195 }, { "epoch": 0.8577708006279435, "grad_norm": 2.3638152006231903, "learning_rate": 1.042420241425427e-06, "loss": 0.777, "step": 8196 }, { "epoch": 0.8578754578754578, "grad_norm": 1.9732167242741625, "learning_rate": 1.0409138828678389e-06, "loss": 0.6977, "step": 8197 }, { "epoch": 0.8579801151229722, "grad_norm": 2.1089360027170083, "learning_rate": 1.0394085537274069e-06, "loss": 0.7946, "step": 8198 }, { "epoch": 0.8580847723704866, "grad_norm": 1.7844456116204388, "learning_rate": 1.0379042541771e-06, "loss": 0.796, "step": 8199 }, { "epoch": 0.858189429618001, "grad_norm": 2.136236846911897, "learning_rate": 1.0364009843897615e-06, "loss": 0.8329, "step": 8200 }, { "epoch": 0.8582940868655154, "grad_norm": 1.86851966011732, "learning_rate": 1.0348987445381208e-06, "loss": 0.774, "step": 8201 }, { "epoch": 0.8583987441130299, "grad_norm": 2.1299132196721042, "learning_rate": 1.0333975347947866e-06, "loss": 0.9495, "step": 8202 }, { "epoch": 0.8585034013605443, "grad_norm": 1.785497778491649, "learning_rate": 1.0318973553322553e-06, "loss": 0.8073, "step": 8203 }, { "epoch": 0.8586080586080586, "grad_norm": 2.259096733636607, "learning_rate": 1.0303982063228978e-06, "loss": 0.8527, "step": 8204 }, { "epoch": 0.858712715855573, "grad_norm": 2.0636141494036515, "learning_rate": 1.02890008793897e-06, "loss": 0.898, "step": 8205 }, { "epoch": 0.8588173731030874, "grad_norm": 2.349058809970281, "learning_rate": 1.0274030003526069e-06, "loss": 0.8855, "step": 8206 }, { "epoch": 0.8589220303506018, "grad_norm": 2.152390907657251, "learning_rate": 1.0259069437358271e-06, "loss": 0.8474, "step": 8207 }, { "epoch": 0.8590266875981162, "grad_norm": 2.104781303316511, "learning_rate": 1.024411918260535e-06, "loss": 0.8537, "step": 8208 }, { "epoch": 0.8591313448456306, "grad_norm": 2.002176163454363, "learning_rate": 1.0229179240985089e-06, "loss": 0.8734, "step": 8209 }, { "epoch": 0.859236002093145, "grad_norm": 2.148962156095097, "learning_rate": 1.021424961421411e-06, "loss": 0.8443, "step": 8210 }, { "epoch": 0.8593406593406593, "grad_norm": 1.7193849211791452, "learning_rate": 1.0199330304007858e-06, "loss": 0.7636, "step": 8211 }, { "epoch": 0.8594453165881737, "grad_norm": 2.107603846212653, "learning_rate": 1.0184421312080594e-06, "loss": 0.9227, "step": 8212 }, { "epoch": 0.8595499738356881, "grad_norm": 1.9252852291664944, "learning_rate": 1.0169522640145412e-06, "loss": 0.9483, "step": 8213 }, { "epoch": 0.8596546310832025, "grad_norm": 2.226183549251737, "learning_rate": 1.015463428991419e-06, "loss": 1.0013, "step": 8214 }, { "epoch": 0.8597592883307169, "grad_norm": 2.323773979390847, "learning_rate": 1.013975626309759e-06, "loss": 0.8943, "step": 8215 }, { "epoch": 0.8598639455782313, "grad_norm": 2.019532286245795, "learning_rate": 1.0124888561405188e-06, "loss": 0.8684, "step": 8216 }, { "epoch": 0.8599686028257457, "grad_norm": 2.270980325536687, "learning_rate": 1.0110031186545265e-06, "loss": 0.9197, "step": 8217 }, { "epoch": 0.8600732600732601, "grad_norm": 2.00584994006669, "learning_rate": 1.0095184140225011e-06, "loss": 0.804, "step": 8218 }, { "epoch": 0.8601779173207744, "grad_norm": 2.059032541958812, "learning_rate": 1.0080347424150349e-06, "loss": 0.9117, "step": 8219 }, { "epoch": 0.8602825745682888, "grad_norm": 1.8583774944181204, "learning_rate": 1.006552104002605e-06, "loss": 0.8409, "step": 8220 }, { "epoch": 0.8603872318158032, "grad_norm": 1.7469649900300717, "learning_rate": 1.0050704989555693e-06, "loss": 0.7859, "step": 8221 }, { "epoch": 0.8604918890633176, "grad_norm": 1.9209381565368364, "learning_rate": 1.0035899274441684e-06, "loss": 0.8993, "step": 8222 }, { "epoch": 0.860596546310832, "grad_norm": 2.034671367413749, "learning_rate": 1.002110389638521e-06, "loss": 0.8571, "step": 8223 }, { "epoch": 0.8607012035583465, "grad_norm": 1.9206079541192072, "learning_rate": 1.0006318857086328e-06, "loss": 0.8058, "step": 8224 }, { "epoch": 0.8608058608058609, "grad_norm": 1.7267084357758915, "learning_rate": 9.991544158243848e-07, "loss": 0.7573, "step": 8225 }, { "epoch": 0.8609105180533752, "grad_norm": 2.271042408187385, "learning_rate": 9.976779801555404e-07, "loss": 0.9341, "step": 8226 }, { "epoch": 0.8610151753008896, "grad_norm": 2.194966202864827, "learning_rate": 9.962025788717434e-07, "loss": 0.8549, "step": 8227 }, { "epoch": 0.861119832548404, "grad_norm": 2.1563970927227016, "learning_rate": 9.94728212142525e-07, "loss": 0.9266, "step": 8228 }, { "epoch": 0.8612244897959184, "grad_norm": 1.98123025878276, "learning_rate": 9.93254880137291e-07, "loss": 0.8194, "step": 8229 }, { "epoch": 0.8613291470434328, "grad_norm": 2.027077949956647, "learning_rate": 9.917825830253303e-07, "loss": 0.962, "step": 8230 }, { "epoch": 0.8614338042909472, "grad_norm": 2.3467824454655966, "learning_rate": 9.903113209758098e-07, "loss": 0.9625, "step": 8231 }, { "epoch": 0.8615384615384616, "grad_norm": 1.7368296277382502, "learning_rate": 9.888410941577819e-07, "loss": 0.7428, "step": 8232 }, { "epoch": 0.8616431187859759, "grad_norm": 1.9207232431358994, "learning_rate": 9.87371902740183e-07, "loss": 0.9178, "step": 8233 }, { "epoch": 0.8617477760334903, "grad_norm": 1.7639576896606057, "learning_rate": 9.859037468918232e-07, "loss": 0.8275, "step": 8234 }, { "epoch": 0.8618524332810047, "grad_norm": 2.096385523082317, "learning_rate": 9.844366267813965e-07, "loss": 0.8697, "step": 8235 }, { "epoch": 0.8619570905285191, "grad_norm": 2.1971747126218157, "learning_rate": 9.82970542577475e-07, "loss": 0.86, "step": 8236 }, { "epoch": 0.8620617477760335, "grad_norm": 2.0670900252205056, "learning_rate": 9.815054944485203e-07, "loss": 0.85, "step": 8237 }, { "epoch": 0.8621664050235479, "grad_norm": 2.0255958527875766, "learning_rate": 9.800414825628657e-07, "loss": 0.7782, "step": 8238 }, { "epoch": 0.8622710622710623, "grad_norm": 2.5228783731175466, "learning_rate": 9.78578507088731e-07, "loss": 0.8302, "step": 8239 }, { "epoch": 0.8623757195185766, "grad_norm": 2.0907415401597547, "learning_rate": 9.771165681942118e-07, "loss": 0.9448, "step": 8240 }, { "epoch": 0.862480376766091, "grad_norm": 1.795604413521706, "learning_rate": 9.756556660472904e-07, "loss": 0.8402, "step": 8241 }, { "epoch": 0.8625850340136054, "grad_norm": 2.224964285791813, "learning_rate": 9.741958008158292e-07, "loss": 0.8443, "step": 8242 }, { "epoch": 0.8626896912611198, "grad_norm": 2.1230855516357847, "learning_rate": 9.727369726675673e-07, "loss": 0.85, "step": 8243 }, { "epoch": 0.8627943485086342, "grad_norm": 2.047579087438642, "learning_rate": 9.712791817701273e-07, "loss": 0.8912, "step": 8244 }, { "epoch": 0.8628990057561486, "grad_norm": 1.8823706152629818, "learning_rate": 9.698224282910128e-07, "loss": 0.8377, "step": 8245 }, { "epoch": 0.863003663003663, "grad_norm": 2.0560768069448714, "learning_rate": 9.68366712397606e-07, "loss": 0.8321, "step": 8246 }, { "epoch": 0.8631083202511773, "grad_norm": 2.010540058978142, "learning_rate": 9.669120342571747e-07, "loss": 0.8789, "step": 8247 }, { "epoch": 0.8632129774986917, "grad_norm": 1.78965265727034, "learning_rate": 9.654583940368622e-07, "loss": 0.6758, "step": 8248 }, { "epoch": 0.8633176347462062, "grad_norm": 2.0314846628821766, "learning_rate": 9.640057919036926e-07, "loss": 0.8291, "step": 8249 }, { "epoch": 0.8634222919937206, "grad_norm": 2.0004853915166354, "learning_rate": 9.62554228024578e-07, "loss": 0.8815, "step": 8250 }, { "epoch": 0.863526949241235, "grad_norm": 2.1679537430544205, "learning_rate": 9.611037025663017e-07, "loss": 0.955, "step": 8251 }, { "epoch": 0.8636316064887494, "grad_norm": 1.8433670228793322, "learning_rate": 9.596542156955357e-07, "loss": 0.8938, "step": 8252 }, { "epoch": 0.8637362637362638, "grad_norm": 2.1490342455394162, "learning_rate": 9.58205767578827e-07, "loss": 0.8708, "step": 8253 }, { "epoch": 0.8638409209837782, "grad_norm": 2.258196552431531, "learning_rate": 9.567583583826046e-07, "loss": 0.8351, "step": 8254 }, { "epoch": 0.8639455782312925, "grad_norm": 2.076287178014026, "learning_rate": 9.553119882731777e-07, "loss": 0.949, "step": 8255 }, { "epoch": 0.8640502354788069, "grad_norm": 2.247501553759808, "learning_rate": 9.53866657416741e-07, "loss": 0.9345, "step": 8256 }, { "epoch": 0.8641548927263213, "grad_norm": 2.281215175212065, "learning_rate": 9.524223659793641e-07, "loss": 0.8919, "step": 8257 }, { "epoch": 0.8642595499738357, "grad_norm": 1.9878173669322587, "learning_rate": 9.509791141269964e-07, "loss": 0.9017, "step": 8258 }, { "epoch": 0.8643642072213501, "grad_norm": 1.9534688327422565, "learning_rate": 9.495369020254753e-07, "loss": 0.8423, "step": 8259 }, { "epoch": 0.8644688644688645, "grad_norm": 1.9918692395385151, "learning_rate": 9.48095729840508e-07, "loss": 0.9165, "step": 8260 }, { "epoch": 0.8645735217163789, "grad_norm": 1.8489436949868145, "learning_rate": 9.466555977376946e-07, "loss": 0.8444, "step": 8261 }, { "epoch": 0.8646781789638932, "grad_norm": 2.1193645421178093, "learning_rate": 9.452165058825058e-07, "loss": 0.8266, "step": 8262 }, { "epoch": 0.8647828362114076, "grad_norm": 2.0097760670300975, "learning_rate": 9.437784544402961e-07, "loss": 0.8798, "step": 8263 }, { "epoch": 0.864887493458922, "grad_norm": 2.107855735727707, "learning_rate": 9.423414435763012e-07, "loss": 0.8519, "step": 8264 }, { "epoch": 0.8649921507064364, "grad_norm": 1.9743559910054354, "learning_rate": 9.409054734556344e-07, "loss": 0.8584, "step": 8265 }, { "epoch": 0.8650968079539508, "grad_norm": 2.565003258614491, "learning_rate": 9.39470544243295e-07, "loss": 0.9632, "step": 8266 }, { "epoch": 0.8652014652014652, "grad_norm": 2.0088418728464643, "learning_rate": 9.380366561041553e-07, "loss": 0.897, "step": 8267 }, { "epoch": 0.8653061224489796, "grad_norm": 2.102961311337027, "learning_rate": 9.366038092029773e-07, "loss": 0.9862, "step": 8268 }, { "epoch": 0.8654107796964939, "grad_norm": 2.0365497413831424, "learning_rate": 9.351720037043943e-07, "loss": 0.7902, "step": 8269 }, { "epoch": 0.8655154369440083, "grad_norm": 2.2180607315989436, "learning_rate": 9.337412397729229e-07, "loss": 0.8182, "step": 8270 }, { "epoch": 0.8656200941915227, "grad_norm": 2.488823116296402, "learning_rate": 9.323115175729636e-07, "loss": 0.7917, "step": 8271 }, { "epoch": 0.8657247514390372, "grad_norm": 2.2666525932415844, "learning_rate": 9.308828372687939e-07, "loss": 0.8258, "step": 8272 }, { "epoch": 0.8658294086865516, "grad_norm": 1.7807084928441752, "learning_rate": 9.294551990245704e-07, "loss": 0.8042, "step": 8273 }, { "epoch": 0.865934065934066, "grad_norm": 1.886320152024955, "learning_rate": 9.280286030043306e-07, "loss": 0.9426, "step": 8274 }, { "epoch": 0.8660387231815804, "grad_norm": 2.3063773024197705, "learning_rate": 9.266030493719979e-07, "loss": 0.9073, "step": 8275 }, { "epoch": 0.8661433804290947, "grad_norm": 2.2479580097764535, "learning_rate": 9.251785382913658e-07, "loss": 0.9288, "step": 8276 }, { "epoch": 0.8662480376766091, "grad_norm": 2.1411449789130566, "learning_rate": 9.237550699261188e-07, "loss": 0.8874, "step": 8277 }, { "epoch": 0.8663526949241235, "grad_norm": 1.824544022620309, "learning_rate": 9.223326444398128e-07, "loss": 0.8135, "step": 8278 }, { "epoch": 0.8664573521716379, "grad_norm": 1.6429551373780529, "learning_rate": 9.20911261995887e-07, "loss": 0.7566, "step": 8279 }, { "epoch": 0.8665620094191523, "grad_norm": 2.176909472080666, "learning_rate": 9.194909227576642e-07, "loss": 0.9291, "step": 8280 }, { "epoch": 0.8666666666666667, "grad_norm": 2.093261943588003, "learning_rate": 9.180716268883427e-07, "loss": 0.8321, "step": 8281 }, { "epoch": 0.8667713239141811, "grad_norm": 2.1603700977335256, "learning_rate": 9.16653374551002e-07, "loss": 0.8882, "step": 8282 }, { "epoch": 0.8668759811616954, "grad_norm": 1.8687484939968493, "learning_rate": 9.152361659086029e-07, "loss": 0.9273, "step": 8283 }, { "epoch": 0.8669806384092098, "grad_norm": 2.03676255437104, "learning_rate": 9.138200011239828e-07, "loss": 0.9724, "step": 8284 }, { "epoch": 0.8670852956567242, "grad_norm": 2.116389446692822, "learning_rate": 9.124048803598639e-07, "loss": 0.9146, "step": 8285 }, { "epoch": 0.8671899529042386, "grad_norm": 1.919555592655635, "learning_rate": 9.109908037788484e-07, "loss": 0.9482, "step": 8286 }, { "epoch": 0.867294610151753, "grad_norm": 2.1601273225442625, "learning_rate": 9.095777715434162e-07, "loss": 0.9593, "step": 8287 }, { "epoch": 0.8673992673992674, "grad_norm": 1.927506321708376, "learning_rate": 9.081657838159253e-07, "loss": 0.8351, "step": 8288 }, { "epoch": 0.8675039246467818, "grad_norm": 2.0177113831137357, "learning_rate": 9.067548407586146e-07, "loss": 0.889, "step": 8289 }, { "epoch": 0.8676085818942961, "grad_norm": 2.1523136474810163, "learning_rate": 9.053449425336092e-07, "loss": 0.9081, "step": 8290 }, { "epoch": 0.8677132391418105, "grad_norm": 1.9842678328945351, "learning_rate": 9.03936089302907e-07, "loss": 0.9047, "step": 8291 }, { "epoch": 0.8678178963893249, "grad_norm": 1.946904140627659, "learning_rate": 9.025282812283875e-07, "loss": 0.7917, "step": 8292 }, { "epoch": 0.8679225536368393, "grad_norm": 1.7343881101313423, "learning_rate": 9.011215184718081e-07, "loss": 0.7012, "step": 8293 }, { "epoch": 0.8680272108843538, "grad_norm": 1.6121923338112008, "learning_rate": 8.997158011948126e-07, "loss": 0.739, "step": 8294 }, { "epoch": 0.8681318681318682, "grad_norm": 1.7420719829107674, "learning_rate": 8.98311129558922e-07, "loss": 0.6873, "step": 8295 }, { "epoch": 0.8682365253793826, "grad_norm": 2.1273765607618667, "learning_rate": 8.969075037255326e-07, "loss": 0.9182, "step": 8296 }, { "epoch": 0.868341182626897, "grad_norm": 1.9827264389330077, "learning_rate": 8.955049238559254e-07, "loss": 0.9137, "step": 8297 }, { "epoch": 0.8684458398744113, "grad_norm": 1.9931692934159142, "learning_rate": 8.941033901112572e-07, "loss": 0.9219, "step": 8298 }, { "epoch": 0.8685504971219257, "grad_norm": 2.0187325482134026, "learning_rate": 8.927029026525713e-07, "loss": 0.8923, "step": 8299 }, { "epoch": 0.8686551543694401, "grad_norm": 2.121244415757878, "learning_rate": 8.913034616407846e-07, "loss": 0.8702, "step": 8300 }, { "epoch": 0.8687598116169545, "grad_norm": 2.0917927570195145, "learning_rate": 8.899050672366949e-07, "loss": 0.7015, "step": 8301 }, { "epoch": 0.8688644688644689, "grad_norm": 2.3545018308026564, "learning_rate": 8.885077196009783e-07, "loss": 0.8551, "step": 8302 }, { "epoch": 0.8689691261119833, "grad_norm": 2.367894373396556, "learning_rate": 8.871114188941987e-07, "loss": 0.8081, "step": 8303 }, { "epoch": 0.8690737833594977, "grad_norm": 1.9788955523872949, "learning_rate": 8.857161652767887e-07, "loss": 0.83, "step": 8304 }, { "epoch": 0.869178440607012, "grad_norm": 2.27059813164129, "learning_rate": 8.843219589090701e-07, "loss": 0.8601, "step": 8305 }, { "epoch": 0.8692830978545264, "grad_norm": 2.3545604185359497, "learning_rate": 8.829287999512371e-07, "loss": 0.8818, "step": 8306 }, { "epoch": 0.8693877551020408, "grad_norm": 2.122637516854195, "learning_rate": 8.81536688563367e-07, "loss": 0.9421, "step": 8307 }, { "epoch": 0.8694924123495552, "grad_norm": 2.041713532250219, "learning_rate": 8.801456249054152e-07, "loss": 0.8007, "step": 8308 }, { "epoch": 0.8695970695970696, "grad_norm": 2.141284640895024, "learning_rate": 8.787556091372207e-07, "loss": 0.7057, "step": 8309 }, { "epoch": 0.869701726844584, "grad_norm": 2.1933709636305596, "learning_rate": 8.773666414184955e-07, "loss": 0.8779, "step": 8310 }, { "epoch": 0.8698063840920984, "grad_norm": 2.2346155866799533, "learning_rate": 8.759787219088389e-07, "loss": 0.7949, "step": 8311 }, { "epoch": 0.8699110413396127, "grad_norm": 2.3392273884858836, "learning_rate": 8.745918507677232e-07, "loss": 0.9422, "step": 8312 }, { "epoch": 0.8700156985871271, "grad_norm": 1.8380709262710446, "learning_rate": 8.732060281545007e-07, "loss": 0.769, "step": 8313 }, { "epoch": 0.8701203558346415, "grad_norm": 1.7448127426806292, "learning_rate": 8.718212542284099e-07, "loss": 0.8186, "step": 8314 }, { "epoch": 0.8702250130821559, "grad_norm": 2.1138633029838867, "learning_rate": 8.704375291485623e-07, "loss": 0.7661, "step": 8315 }, { "epoch": 0.8703296703296703, "grad_norm": 2.0932322539738784, "learning_rate": 8.690548530739496e-07, "loss": 0.9311, "step": 8316 }, { "epoch": 0.8704343275771848, "grad_norm": 1.7979421071378918, "learning_rate": 8.676732261634424e-07, "loss": 0.784, "step": 8317 }, { "epoch": 0.8705389848246992, "grad_norm": 1.9703330311133151, "learning_rate": 8.662926485757961e-07, "loss": 0.8364, "step": 8318 }, { "epoch": 0.8706436420722135, "grad_norm": 2.3370476834492413, "learning_rate": 8.649131204696392e-07, "loss": 0.9065, "step": 8319 }, { "epoch": 0.8707482993197279, "grad_norm": 2.404568265608144, "learning_rate": 8.635346420034862e-07, "loss": 0.8264, "step": 8320 }, { "epoch": 0.8708529565672423, "grad_norm": 1.8537120114233208, "learning_rate": 8.621572133357236e-07, "loss": 0.8729, "step": 8321 }, { "epoch": 0.8709576138147567, "grad_norm": 2.2930255513118705, "learning_rate": 8.607808346246216e-07, "loss": 0.9316, "step": 8322 }, { "epoch": 0.8710622710622711, "grad_norm": 2.029189543208393, "learning_rate": 8.594055060283268e-07, "loss": 0.7619, "step": 8323 }, { "epoch": 0.8711669283097855, "grad_norm": 2.570973752644621, "learning_rate": 8.580312277048708e-07, "loss": 0.9147, "step": 8324 }, { "epoch": 0.8712715855572999, "grad_norm": 2.0422859206952424, "learning_rate": 8.566579998121593e-07, "loss": 0.9099, "step": 8325 }, { "epoch": 0.8713762428048142, "grad_norm": 2.76800341990982, "learning_rate": 8.552858225079807e-07, "loss": 0.8457, "step": 8326 }, { "epoch": 0.8714809000523286, "grad_norm": 2.1054140471943392, "learning_rate": 8.539146959499956e-07, "loss": 0.7575, "step": 8327 }, { "epoch": 0.871585557299843, "grad_norm": 2.0253201711623565, "learning_rate": 8.525446202957543e-07, "loss": 0.8476, "step": 8328 }, { "epoch": 0.8716902145473574, "grad_norm": 1.644414039598465, "learning_rate": 8.511755957026813e-07, "loss": 0.6856, "step": 8329 }, { "epoch": 0.8717948717948718, "grad_norm": 2.278761036199369, "learning_rate": 8.498076223280794e-07, "loss": 0.8249, "step": 8330 }, { "epoch": 0.8718995290423862, "grad_norm": 2.0636072314286698, "learning_rate": 8.484407003291306e-07, "loss": 0.8947, "step": 8331 }, { "epoch": 0.8720041862899006, "grad_norm": 2.211711428835568, "learning_rate": 8.47074829862895e-07, "loss": 0.9184, "step": 8332 }, { "epoch": 0.8721088435374149, "grad_norm": 2.016664180022378, "learning_rate": 8.457100110863192e-07, "loss": 0.8512, "step": 8333 }, { "epoch": 0.8722135007849293, "grad_norm": 2.0273235658439654, "learning_rate": 8.443462441562211e-07, "loss": 0.9258, "step": 8334 }, { "epoch": 0.8723181580324437, "grad_norm": 2.1797348714707145, "learning_rate": 8.429835292292987e-07, "loss": 0.9337, "step": 8335 }, { "epoch": 0.8724228152799581, "grad_norm": 2.128984970826725, "learning_rate": 8.416218664621312e-07, "loss": 0.903, "step": 8336 }, { "epoch": 0.8725274725274725, "grad_norm": 1.9652208208969064, "learning_rate": 8.402612560111767e-07, "loss": 0.908, "step": 8337 }, { "epoch": 0.8726321297749869, "grad_norm": 2.0961986353412168, "learning_rate": 8.389016980327746e-07, "loss": 0.9465, "step": 8338 }, { "epoch": 0.8727367870225013, "grad_norm": 2.5872968306860313, "learning_rate": 8.37543192683139e-07, "loss": 0.9034, "step": 8339 }, { "epoch": 0.8728414442700158, "grad_norm": 2.443057693441533, "learning_rate": 8.361857401183649e-07, "loss": 0.8729, "step": 8340 }, { "epoch": 0.87294610151753, "grad_norm": 2.1845773081813635, "learning_rate": 8.348293404944263e-07, "loss": 0.8835, "step": 8341 }, { "epoch": 0.8730507587650445, "grad_norm": 1.9310878809345682, "learning_rate": 8.334739939671743e-07, "loss": 0.8102, "step": 8342 }, { "epoch": 0.8731554160125589, "grad_norm": 1.9452518156409584, "learning_rate": 8.321197006923442e-07, "loss": 0.8457, "step": 8343 }, { "epoch": 0.8732600732600733, "grad_norm": 1.8616667743331106, "learning_rate": 8.307664608255461e-07, "loss": 0.9066, "step": 8344 }, { "epoch": 0.8733647305075877, "grad_norm": 1.9175403993591587, "learning_rate": 8.294142745222678e-07, "loss": 0.8309, "step": 8345 }, { "epoch": 0.8734693877551021, "grad_norm": 2.4850953298749996, "learning_rate": 8.280631419378815e-07, "loss": 0.9685, "step": 8346 }, { "epoch": 0.8735740450026165, "grad_norm": 2.044753493702447, "learning_rate": 8.267130632276321e-07, "loss": 0.9937, "step": 8347 }, { "epoch": 0.8736787022501308, "grad_norm": 2.3810577763966876, "learning_rate": 8.253640385466499e-07, "loss": 1.0281, "step": 8348 }, { "epoch": 0.8737833594976452, "grad_norm": 1.963834819571071, "learning_rate": 8.240160680499388e-07, "loss": 0.9345, "step": 8349 }, { "epoch": 0.8738880167451596, "grad_norm": 1.8751548428811395, "learning_rate": 8.226691518923835e-07, "loss": 0.8952, "step": 8350 }, { "epoch": 0.873992673992674, "grad_norm": 2.330321107761336, "learning_rate": 8.213232902287438e-07, "loss": 0.899, "step": 8351 }, { "epoch": 0.8740973312401884, "grad_norm": 2.2633398328522847, "learning_rate": 8.199784832136682e-07, "loss": 0.9012, "step": 8352 }, { "epoch": 0.8742019884877028, "grad_norm": 1.916777768307915, "learning_rate": 8.18634731001674e-07, "loss": 0.8853, "step": 8353 }, { "epoch": 0.8743066457352172, "grad_norm": 2.1050944988737412, "learning_rate": 8.172920337471601e-07, "loss": 0.9568, "step": 8354 }, { "epoch": 0.8744113029827315, "grad_norm": 1.8481452188298315, "learning_rate": 8.159503916044087e-07, "loss": 0.9897, "step": 8355 }, { "epoch": 0.8745159602302459, "grad_norm": 2.250311788077037, "learning_rate": 8.146098047275741e-07, "loss": 0.8498, "step": 8356 }, { "epoch": 0.8746206174777603, "grad_norm": 2.3260125318486335, "learning_rate": 8.132702732706954e-07, "loss": 0.9126, "step": 8357 }, { "epoch": 0.8747252747252747, "grad_norm": 2.2314067384407994, "learning_rate": 8.11931797387685e-07, "loss": 0.8325, "step": 8358 }, { "epoch": 0.8748299319727891, "grad_norm": 1.9457108277580055, "learning_rate": 8.105943772323377e-07, "loss": 0.9133, "step": 8359 }, { "epoch": 0.8749345892203035, "grad_norm": 2.0107629993432607, "learning_rate": 8.09258012958325e-07, "loss": 0.8153, "step": 8360 }, { "epoch": 0.8750392464678179, "grad_norm": 2.2793780471848253, "learning_rate": 8.079227047191962e-07, "loss": 0.9221, "step": 8361 }, { "epoch": 0.8751439037153322, "grad_norm": 2.123456861211417, "learning_rate": 8.065884526683832e-07, "loss": 0.7958, "step": 8362 }, { "epoch": 0.8752485609628466, "grad_norm": 1.926502112741626, "learning_rate": 8.052552569591965e-07, "loss": 0.7358, "step": 8363 }, { "epoch": 0.875353218210361, "grad_norm": 2.0489738249039506, "learning_rate": 8.039231177448192e-07, "loss": 0.8492, "step": 8364 }, { "epoch": 0.8754578754578755, "grad_norm": 1.8975269132659613, "learning_rate": 8.025920351783189e-07, "loss": 0.8582, "step": 8365 }, { "epoch": 0.8755625327053899, "grad_norm": 2.256895879823261, "learning_rate": 8.012620094126367e-07, "loss": 0.8729, "step": 8366 }, { "epoch": 0.8756671899529043, "grad_norm": 2.025845185272999, "learning_rate": 7.999330406005989e-07, "loss": 0.8187, "step": 8367 }, { "epoch": 0.8757718472004187, "grad_norm": 2.0895848407187922, "learning_rate": 7.986051288949059e-07, "loss": 0.9252, "step": 8368 }, { "epoch": 0.875876504447933, "grad_norm": 2.139667630918293, "learning_rate": 7.972782744481367e-07, "loss": 0.8566, "step": 8369 }, { "epoch": 0.8759811616954474, "grad_norm": 2.3232743712934294, "learning_rate": 7.959524774127481e-07, "loss": 0.7616, "step": 8370 }, { "epoch": 0.8760858189429618, "grad_norm": 2.128576816385248, "learning_rate": 7.946277379410771e-07, "loss": 0.7066, "step": 8371 }, { "epoch": 0.8761904761904762, "grad_norm": 1.9889905519313846, "learning_rate": 7.933040561853433e-07, "loss": 0.8717, "step": 8372 }, { "epoch": 0.8762951334379906, "grad_norm": 1.7790368676618327, "learning_rate": 7.919814322976371e-07, "loss": 0.7829, "step": 8373 }, { "epoch": 0.876399790685505, "grad_norm": 2.191730959271324, "learning_rate": 7.906598664299303e-07, "loss": 0.889, "step": 8374 }, { "epoch": 0.8765044479330194, "grad_norm": 2.018457235121592, "learning_rate": 7.893393587340725e-07, "loss": 0.9284, "step": 8375 }, { "epoch": 0.8766091051805337, "grad_norm": 1.844476762441355, "learning_rate": 7.880199093617968e-07, "loss": 0.771, "step": 8376 }, { "epoch": 0.8767137624280481, "grad_norm": 1.9252327144567856, "learning_rate": 7.867015184647075e-07, "loss": 0.8846, "step": 8377 }, { "epoch": 0.8768184196755625, "grad_norm": 2.428489369199683, "learning_rate": 7.853841861942912e-07, "loss": 0.8245, "step": 8378 }, { "epoch": 0.8769230769230769, "grad_norm": 2.1246442895723927, "learning_rate": 7.840679127019124e-07, "loss": 0.9374, "step": 8379 }, { "epoch": 0.8770277341705913, "grad_norm": 2.0282518178011637, "learning_rate": 7.827526981388112e-07, "loss": 0.7992, "step": 8380 }, { "epoch": 0.8771323914181057, "grad_norm": 2.2457001610010634, "learning_rate": 7.814385426561099e-07, "loss": 0.8949, "step": 8381 }, { "epoch": 0.8772370486656201, "grad_norm": 2.358939783286938, "learning_rate": 7.801254464048092e-07, "loss": 0.8137, "step": 8382 }, { "epoch": 0.8773417059131345, "grad_norm": 1.9086185068244006, "learning_rate": 7.788134095357869e-07, "loss": 0.9158, "step": 8383 }, { "epoch": 0.8774463631606488, "grad_norm": 1.400948820647384, "learning_rate": 7.775024321997959e-07, "loss": 0.6796, "step": 8384 }, { "epoch": 0.8775510204081632, "grad_norm": 2.1402975917994134, "learning_rate": 7.761925145474702e-07, "loss": 0.9254, "step": 8385 }, { "epoch": 0.8776556776556776, "grad_norm": 2.28391791015574, "learning_rate": 7.748836567293238e-07, "loss": 0.8438, "step": 8386 }, { "epoch": 0.877760334903192, "grad_norm": 2.0399330609262143, "learning_rate": 7.735758588957476e-07, "loss": 0.8036, "step": 8387 }, { "epoch": 0.8778649921507065, "grad_norm": 1.6399077111346803, "learning_rate": 7.722691211970079e-07, "loss": 0.7449, "step": 8388 }, { "epoch": 0.8779696493982209, "grad_norm": 2.060963302205363, "learning_rate": 7.709634437832537e-07, "loss": 0.8799, "step": 8389 }, { "epoch": 0.8780743066457353, "grad_norm": 2.0689600387801446, "learning_rate": 7.696588268045069e-07, "loss": 0.892, "step": 8390 }, { "epoch": 0.8781789638932496, "grad_norm": 1.8775333765697284, "learning_rate": 7.683552704106756e-07, "loss": 0.7523, "step": 8391 }, { "epoch": 0.878283621140764, "grad_norm": 2.347297278777117, "learning_rate": 7.670527747515366e-07, "loss": 0.8529, "step": 8392 }, { "epoch": 0.8783882783882784, "grad_norm": 1.8280093832995228, "learning_rate": 7.657513399767525e-07, "loss": 0.8632, "step": 8393 }, { "epoch": 0.8784929356357928, "grad_norm": 2.4132747406203694, "learning_rate": 7.644509662358568e-07, "loss": 0.7902, "step": 8394 }, { "epoch": 0.8785975928833072, "grad_norm": 1.7734274400061258, "learning_rate": 7.631516536782691e-07, "loss": 0.8234, "step": 8395 }, { "epoch": 0.8787022501308216, "grad_norm": 1.9302295122926418, "learning_rate": 7.61853402453282e-07, "loss": 0.9466, "step": 8396 }, { "epoch": 0.878806907378336, "grad_norm": 2.017176887079925, "learning_rate": 7.605562127100641e-07, "loss": 0.9422, "step": 8397 }, { "epoch": 0.8789115646258503, "grad_norm": 2.204693966210214, "learning_rate": 7.592600845976694e-07, "loss": 0.9219, "step": 8398 }, { "epoch": 0.8790162218733647, "grad_norm": 1.9260542762683226, "learning_rate": 7.579650182650245e-07, "loss": 0.8415, "step": 8399 }, { "epoch": 0.8791208791208791, "grad_norm": 2.3183132581995025, "learning_rate": 7.566710138609323e-07, "loss": 0.8479, "step": 8400 }, { "epoch": 0.8792255363683935, "grad_norm": 1.9805826657109464, "learning_rate": 7.55378071534082e-07, "loss": 0.8046, "step": 8401 }, { "epoch": 0.8793301936159079, "grad_norm": 2.177056885011439, "learning_rate": 7.540861914330299e-07, "loss": 0.9871, "step": 8402 }, { "epoch": 0.8794348508634223, "grad_norm": 2.287633781357303, "learning_rate": 7.527953737062188e-07, "loss": 0.8662, "step": 8403 }, { "epoch": 0.8795395081109367, "grad_norm": 2.043158350778613, "learning_rate": 7.515056185019642e-07, "loss": 0.8425, "step": 8404 }, { "epoch": 0.879644165358451, "grad_norm": 2.4480997698564857, "learning_rate": 7.502169259684633e-07, "loss": 0.9648, "step": 8405 }, { "epoch": 0.8797488226059654, "grad_norm": 2.4174555351369555, "learning_rate": 7.489292962537887e-07, "loss": 0.8642, "step": 8406 }, { "epoch": 0.8798534798534798, "grad_norm": 2.360136663855378, "learning_rate": 7.476427295058918e-07, "loss": 0.822, "step": 8407 }, { "epoch": 0.8799581371009942, "grad_norm": 2.1342092374115054, "learning_rate": 7.463572258726027e-07, "loss": 0.9561, "step": 8408 }, { "epoch": 0.8800627943485086, "grad_norm": 2.2233381286212315, "learning_rate": 7.45072785501626e-07, "loss": 0.9782, "step": 8409 }, { "epoch": 0.880167451596023, "grad_norm": 2.0969014014371865, "learning_rate": 7.437894085405484e-07, "loss": 0.9458, "step": 8410 }, { "epoch": 0.8802721088435375, "grad_norm": 2.1757758424267464, "learning_rate": 7.42507095136833e-07, "loss": 0.8587, "step": 8411 }, { "epoch": 0.8803767660910518, "grad_norm": 2.306166415311752, "learning_rate": 7.412258454378196e-07, "loss": 0.8882, "step": 8412 }, { "epoch": 0.8804814233385662, "grad_norm": 2.269362231176697, "learning_rate": 7.39945659590724e-07, "loss": 0.8249, "step": 8413 }, { "epoch": 0.8805860805860806, "grad_norm": 1.938643465951603, "learning_rate": 7.386665377426438e-07, "loss": 0.8922, "step": 8414 }, { "epoch": 0.880690737833595, "grad_norm": 2.005191893832761, "learning_rate": 7.37388480040555e-07, "loss": 0.8959, "step": 8415 }, { "epoch": 0.8807953950811094, "grad_norm": 2.1034084182074713, "learning_rate": 7.361114866313069e-07, "loss": 0.8488, "step": 8416 }, { "epoch": 0.8809000523286238, "grad_norm": 2.0833541849870536, "learning_rate": 7.348355576616295e-07, "loss": 0.8547, "step": 8417 }, { "epoch": 0.8810047095761382, "grad_norm": 1.9436409663601768, "learning_rate": 7.33560693278128e-07, "loss": 0.9296, "step": 8418 }, { "epoch": 0.8811093668236525, "grad_norm": 2.6142542944966416, "learning_rate": 7.322868936272876e-07, "loss": 0.9786, "step": 8419 }, { "epoch": 0.8812140240711669, "grad_norm": 1.975543273238409, "learning_rate": 7.310141588554709e-07, "loss": 0.8701, "step": 8420 }, { "epoch": 0.8813186813186813, "grad_norm": 2.175997926219932, "learning_rate": 7.297424891089189e-07, "loss": 0.833, "step": 8421 }, { "epoch": 0.8814233385661957, "grad_norm": 1.779910295096858, "learning_rate": 7.284718845337469e-07, "loss": 0.8065, "step": 8422 }, { "epoch": 0.8815279958137101, "grad_norm": 2.323280192485811, "learning_rate": 7.272023452759491e-07, "loss": 0.9152, "step": 8423 }, { "epoch": 0.8816326530612245, "grad_norm": 1.9712072593281076, "learning_rate": 7.259338714813991e-07, "loss": 0.9558, "step": 8424 }, { "epoch": 0.8817373103087389, "grad_norm": 2.0403158599975058, "learning_rate": 7.2466646329585e-07, "loss": 0.9015, "step": 8425 }, { "epoch": 0.8818419675562533, "grad_norm": 2.1242870940274057, "learning_rate": 7.234001208649277e-07, "loss": 0.9321, "step": 8426 }, { "epoch": 0.8819466248037676, "grad_norm": 2.268858095340938, "learning_rate": 7.221348443341369e-07, "loss": 0.9714, "step": 8427 }, { "epoch": 0.882051282051282, "grad_norm": 1.995883555744217, "learning_rate": 7.208706338488591e-07, "loss": 0.9395, "step": 8428 }, { "epoch": 0.8821559392987964, "grad_norm": 2.1098847144251054, "learning_rate": 7.196074895543581e-07, "loss": 0.8725, "step": 8429 }, { "epoch": 0.8822605965463108, "grad_norm": 2.409353406645249, "learning_rate": 7.183454115957688e-07, "loss": 1.0137, "step": 8430 }, { "epoch": 0.8823652537938252, "grad_norm": 2.1180455101201408, "learning_rate": 7.170844001181087e-07, "loss": 0.9623, "step": 8431 }, { "epoch": 0.8824699110413396, "grad_norm": 2.151280329173097, "learning_rate": 7.158244552662674e-07, "loss": 0.9246, "step": 8432 }, { "epoch": 0.8825745682888541, "grad_norm": 2.0323499167485104, "learning_rate": 7.145655771850179e-07, "loss": 0.9656, "step": 8433 }, { "epoch": 0.8826792255363684, "grad_norm": 2.282886441190445, "learning_rate": 7.13307766019008e-07, "loss": 0.9472, "step": 8434 }, { "epoch": 0.8827838827838828, "grad_norm": 2.06209446070617, "learning_rate": 7.120510219127619e-07, "loss": 0.9185, "step": 8435 }, { "epoch": 0.8828885400313972, "grad_norm": 2.5731722325288855, "learning_rate": 7.107953450106819e-07, "loss": 0.8998, "step": 8436 }, { "epoch": 0.8829931972789116, "grad_norm": 1.8408399665438568, "learning_rate": 7.095407354570483e-07, "loss": 0.8116, "step": 8437 }, { "epoch": 0.883097854526426, "grad_norm": 2.0268812460354453, "learning_rate": 7.082871933960156e-07, "loss": 0.8808, "step": 8438 }, { "epoch": 0.8832025117739404, "grad_norm": 1.6490758747411414, "learning_rate": 7.070347189716231e-07, "loss": 0.7978, "step": 8439 }, { "epoch": 0.8833071690214548, "grad_norm": 1.8039746990089798, "learning_rate": 7.057833123277779e-07, "loss": 0.8668, "step": 8440 }, { "epoch": 0.8834118262689691, "grad_norm": 2.0025313820891246, "learning_rate": 7.04532973608274e-07, "loss": 0.8699, "step": 8441 }, { "epoch": 0.8835164835164835, "grad_norm": 2.013068044107957, "learning_rate": 7.032837029567741e-07, "loss": 0.8394, "step": 8442 }, { "epoch": 0.8836211407639979, "grad_norm": 1.9791349791116444, "learning_rate": 7.020355005168223e-07, "loss": 0.8135, "step": 8443 }, { "epoch": 0.8837257980115123, "grad_norm": 1.9761998674343688, "learning_rate": 7.007883664318416e-07, "loss": 0.9571, "step": 8444 }, { "epoch": 0.8838304552590267, "grad_norm": 2.217869103536607, "learning_rate": 6.995423008451296e-07, "loss": 0.8986, "step": 8445 }, { "epoch": 0.8839351125065411, "grad_norm": 1.7193627330671324, "learning_rate": 6.982973038998608e-07, "loss": 0.7972, "step": 8446 }, { "epoch": 0.8840397697540555, "grad_norm": 1.9318398791675795, "learning_rate": 6.970533757390874e-07, "loss": 0.9375, "step": 8447 }, { "epoch": 0.8841444270015698, "grad_norm": 2.040044194610826, "learning_rate": 6.958105165057405e-07, "loss": 0.9477, "step": 8448 }, { "epoch": 0.8842490842490842, "grad_norm": 2.3099661723992795, "learning_rate": 6.945687263426259e-07, "loss": 0.996, "step": 8449 }, { "epoch": 0.8843537414965986, "grad_norm": 2.3098910608338126, "learning_rate": 6.933280053924307e-07, "loss": 0.8932, "step": 8450 }, { "epoch": 0.884458398744113, "grad_norm": 2.0083350641115847, "learning_rate": 6.920883537977141e-07, "loss": 0.8619, "step": 8451 }, { "epoch": 0.8845630559916274, "grad_norm": 2.17092705026124, "learning_rate": 6.908497717009132e-07, "loss": 0.8771, "step": 8452 }, { "epoch": 0.8846677132391418, "grad_norm": 1.9588841874329892, "learning_rate": 6.896122592443466e-07, "loss": 0.843, "step": 8453 }, { "epoch": 0.8847723704866562, "grad_norm": 2.06887109912915, "learning_rate": 6.883758165702048e-07, "loss": 0.9597, "step": 8454 }, { "epoch": 0.8848770277341705, "grad_norm": 2.0239969794410007, "learning_rate": 6.871404438205598e-07, "loss": 0.8352, "step": 8455 }, { "epoch": 0.884981684981685, "grad_norm": 2.2891549492476795, "learning_rate": 6.859061411373557e-07, "loss": 0.8596, "step": 8456 }, { "epoch": 0.8850863422291994, "grad_norm": 2.134710339429134, "learning_rate": 6.846729086624159e-07, "loss": 0.7921, "step": 8457 }, { "epoch": 0.8851909994767138, "grad_norm": 2.021668403658534, "learning_rate": 6.834407465374438e-07, "loss": 0.9, "step": 8458 }, { "epoch": 0.8852956567242282, "grad_norm": 1.6910079970551906, "learning_rate": 6.82209654904018e-07, "loss": 0.7481, "step": 8459 }, { "epoch": 0.8854003139717426, "grad_norm": 2.2018914810401933, "learning_rate": 6.809796339035923e-07, "loss": 0.915, "step": 8460 }, { "epoch": 0.885504971219257, "grad_norm": 1.6986887147965564, "learning_rate": 6.797506836774981e-07, "loss": 0.7935, "step": 8461 }, { "epoch": 0.8856096284667713, "grad_norm": 1.9522509649243391, "learning_rate": 6.785228043669423e-07, "loss": 0.8457, "step": 8462 }, { "epoch": 0.8857142857142857, "grad_norm": 2.5679630478892643, "learning_rate": 6.772959961130154e-07, "loss": 0.9314, "step": 8463 }, { "epoch": 0.8858189429618001, "grad_norm": 1.8880060673610959, "learning_rate": 6.76070259056677e-07, "loss": 0.8698, "step": 8464 }, { "epoch": 0.8859236002093145, "grad_norm": 2.0499745907035742, "learning_rate": 6.748455933387666e-07, "loss": 0.8072, "step": 8465 }, { "epoch": 0.8860282574568289, "grad_norm": 2.0849975990310283, "learning_rate": 6.736219991000015e-07, "loss": 0.8479, "step": 8466 }, { "epoch": 0.8861329147043433, "grad_norm": 2.285861980076164, "learning_rate": 6.72399476480975e-07, "loss": 0.8301, "step": 8467 }, { "epoch": 0.8862375719518577, "grad_norm": 2.0055314886862146, "learning_rate": 6.711780256221578e-07, "loss": 0.9137, "step": 8468 }, { "epoch": 0.8863422291993721, "grad_norm": 2.3060763965788507, "learning_rate": 6.699576466638991e-07, "loss": 0.9079, "step": 8469 }, { "epoch": 0.8864468864468864, "grad_norm": 2.007322337516734, "learning_rate": 6.687383397464187e-07, "loss": 0.8655, "step": 8470 }, { "epoch": 0.8865515436944008, "grad_norm": 2.0833706920548356, "learning_rate": 6.67520105009819e-07, "loss": 0.783, "step": 8471 }, { "epoch": 0.8866562009419152, "grad_norm": 2.0582965508334468, "learning_rate": 6.663029425940804e-07, "loss": 0.9511, "step": 8472 }, { "epoch": 0.8867608581894296, "grad_norm": 2.2808925451077933, "learning_rate": 6.650868526390541e-07, "loss": 0.8643, "step": 8473 }, { "epoch": 0.886865515436944, "grad_norm": 1.946899059032306, "learning_rate": 6.63871835284472e-07, "loss": 0.9066, "step": 8474 }, { "epoch": 0.8869701726844584, "grad_norm": 2.068214617975085, "learning_rate": 6.626578906699421e-07, "loss": 0.8209, "step": 8475 }, { "epoch": 0.8870748299319728, "grad_norm": 2.093189904582817, "learning_rate": 6.614450189349497e-07, "loss": 0.9761, "step": 8476 }, { "epoch": 0.8871794871794871, "grad_norm": 2.1709216272284424, "learning_rate": 6.602332202188544e-07, "loss": 0.9507, "step": 8477 }, { "epoch": 0.8872841444270015, "grad_norm": 1.9156403618097797, "learning_rate": 6.590224946608981e-07, "loss": 0.7868, "step": 8478 }, { "epoch": 0.887388801674516, "grad_norm": 1.8763348533122814, "learning_rate": 6.578128424001929e-07, "loss": 0.7929, "step": 8479 }, { "epoch": 0.8874934589220304, "grad_norm": 2.1596639833953897, "learning_rate": 6.566042635757297e-07, "loss": 0.9342, "step": 8480 }, { "epoch": 0.8875981161695448, "grad_norm": 2.25994957436867, "learning_rate": 6.553967583263776e-07, "loss": 0.8573, "step": 8481 }, { "epoch": 0.8877027734170592, "grad_norm": 2.171303606502574, "learning_rate": 6.541903267908823e-07, "loss": 0.8721, "step": 8482 }, { "epoch": 0.8878074306645736, "grad_norm": 2.1186511007584454, "learning_rate": 6.529849691078638e-07, "loss": 0.8509, "step": 8483 }, { "epoch": 0.8879120879120879, "grad_norm": 1.9924052169713273, "learning_rate": 6.517806854158204e-07, "loss": 0.8332, "step": 8484 }, { "epoch": 0.8880167451596023, "grad_norm": 1.8798641457276724, "learning_rate": 6.50577475853128e-07, "loss": 0.9086, "step": 8485 }, { "epoch": 0.8881214024071167, "grad_norm": 2.030261354667674, "learning_rate": 6.493753405580349e-07, "loss": 0.8013, "step": 8486 }, { "epoch": 0.8882260596546311, "grad_norm": 1.9482170795471423, "learning_rate": 6.48174279668673e-07, "loss": 0.8105, "step": 8487 }, { "epoch": 0.8883307169021455, "grad_norm": 2.1213839881968064, "learning_rate": 6.46974293323045e-07, "loss": 0.8659, "step": 8488 }, { "epoch": 0.8884353741496599, "grad_norm": 1.7682685942041678, "learning_rate": 6.457753816590307e-07, "loss": 0.8687, "step": 8489 }, { "epoch": 0.8885400313971743, "grad_norm": 2.0809007657182397, "learning_rate": 6.445775448143854e-07, "loss": 0.8524, "step": 8490 }, { "epoch": 0.8886446886446886, "grad_norm": 2.2455986826973278, "learning_rate": 6.433807829267491e-07, "loss": 0.9529, "step": 8491 }, { "epoch": 0.888749345892203, "grad_norm": 1.9497228881367459, "learning_rate": 6.421850961336262e-07, "loss": 0.9028, "step": 8492 }, { "epoch": 0.8888540031397174, "grad_norm": 1.7807386421204503, "learning_rate": 6.409904845724068e-07, "loss": 0.8272, "step": 8493 }, { "epoch": 0.8889586603872318, "grad_norm": 1.9539951561107944, "learning_rate": 6.397969483803546e-07, "loss": 0.8359, "step": 8494 }, { "epoch": 0.8890633176347462, "grad_norm": 1.8823048125200246, "learning_rate": 6.386044876946073e-07, "loss": 0.9503, "step": 8495 }, { "epoch": 0.8891679748822606, "grad_norm": 2.013354508723251, "learning_rate": 6.374131026521813e-07, "loss": 0.8966, "step": 8496 }, { "epoch": 0.889272632129775, "grad_norm": 2.187845858922677, "learning_rate": 6.362227933899701e-07, "loss": 0.8339, "step": 8497 }, { "epoch": 0.8893772893772893, "grad_norm": 2.1954224044761563, "learning_rate": 6.350335600447433e-07, "loss": 0.9413, "step": 8498 }, { "epoch": 0.8894819466248037, "grad_norm": 2.1320897727273165, "learning_rate": 6.338454027531449e-07, "loss": 0.952, "step": 8499 }, { "epoch": 0.8895866038723181, "grad_norm": 2.336668103416007, "learning_rate": 6.326583216516957e-07, "loss": 0.8382, "step": 8500 }, { "epoch": 0.8896912611198325, "grad_norm": 1.8795899973747725, "learning_rate": 6.314723168767945e-07, "loss": 0.8066, "step": 8501 }, { "epoch": 0.889795918367347, "grad_norm": 1.9308760249674062, "learning_rate": 6.302873885647176e-07, "loss": 0.8645, "step": 8502 }, { "epoch": 0.8899005756148614, "grad_norm": 1.9444834126204222, "learning_rate": 6.291035368516141e-07, "loss": 0.7657, "step": 8503 }, { "epoch": 0.8900052328623758, "grad_norm": 2.1736958135133593, "learning_rate": 6.279207618735117e-07, "loss": 1.0026, "step": 8504 }, { "epoch": 0.8901098901098901, "grad_norm": 2.1650434307122093, "learning_rate": 6.267390637663107e-07, "loss": 0.8737, "step": 8505 }, { "epoch": 0.8902145473574045, "grad_norm": 2.5479226003532167, "learning_rate": 6.255584426657946e-07, "loss": 0.8008, "step": 8506 }, { "epoch": 0.8903192046049189, "grad_norm": 1.8553494681317815, "learning_rate": 6.243788987076172e-07, "loss": 0.8209, "step": 8507 }, { "epoch": 0.8904238618524333, "grad_norm": 1.9483177698300278, "learning_rate": 6.232004320273111e-07, "loss": 0.8308, "step": 8508 }, { "epoch": 0.8905285190999477, "grad_norm": 1.6634174156291366, "learning_rate": 6.220230427602814e-07, "loss": 0.827, "step": 8509 }, { "epoch": 0.8906331763474621, "grad_norm": 2.1685853413646323, "learning_rate": 6.208467310418165e-07, "loss": 0.9, "step": 8510 }, { "epoch": 0.8907378335949765, "grad_norm": 1.9637945475462335, "learning_rate": 6.196714970070772e-07, "loss": 0.8718, "step": 8511 }, { "epoch": 0.8908424908424909, "grad_norm": 1.96490255215975, "learning_rate": 6.184973407910977e-07, "loss": 0.8939, "step": 8512 }, { "epoch": 0.8909471480900052, "grad_norm": 2.101746073662847, "learning_rate": 6.173242625287934e-07, "loss": 0.8335, "step": 8513 }, { "epoch": 0.8910518053375196, "grad_norm": 2.106827762594044, "learning_rate": 6.161522623549509e-07, "loss": 0.8706, "step": 8514 }, { "epoch": 0.891156462585034, "grad_norm": 2.1039813494995157, "learning_rate": 6.14981340404236e-07, "loss": 0.8625, "step": 8515 }, { "epoch": 0.8912611198325484, "grad_norm": 2.1108841193616983, "learning_rate": 6.138114968111918e-07, "loss": 0.9328, "step": 8516 }, { "epoch": 0.8913657770800628, "grad_norm": 2.3087652071285567, "learning_rate": 6.126427317102335e-07, "loss": 0.9118, "step": 8517 }, { "epoch": 0.8914704343275772, "grad_norm": 2.2400146974772515, "learning_rate": 6.114750452356555e-07, "loss": 0.9425, "step": 8518 }, { "epoch": 0.8915750915750916, "grad_norm": 2.311746241511402, "learning_rate": 6.103084375216273e-07, "loss": 0.9304, "step": 8519 }, { "epoch": 0.8916797488226059, "grad_norm": 2.1638223427184586, "learning_rate": 6.091429087021938e-07, "loss": 0.9901, "step": 8520 }, { "epoch": 0.8917844060701203, "grad_norm": 1.9528700522695477, "learning_rate": 6.079784589112792e-07, "loss": 0.7986, "step": 8521 }, { "epoch": 0.8918890633176347, "grad_norm": 1.6876998724103345, "learning_rate": 6.068150882826785e-07, "loss": 0.7391, "step": 8522 }, { "epoch": 0.8919937205651491, "grad_norm": 1.8784697035517413, "learning_rate": 6.056527969500659e-07, "loss": 0.7807, "step": 8523 }, { "epoch": 0.8920983778126635, "grad_norm": 1.7297614397650942, "learning_rate": 6.044915850469912e-07, "loss": 0.7549, "step": 8524 }, { "epoch": 0.892203035060178, "grad_norm": 2.148635473847736, "learning_rate": 6.033314527068801e-07, "loss": 0.901, "step": 8525 }, { "epoch": 0.8923076923076924, "grad_norm": 2.409030290338374, "learning_rate": 6.021724000630347e-07, "loss": 0.8012, "step": 8526 }, { "epoch": 0.8924123495552067, "grad_norm": 2.035224893111058, "learning_rate": 6.010144272486307e-07, "loss": 0.8819, "step": 8527 }, { "epoch": 0.8925170068027211, "grad_norm": 1.9861692794496122, "learning_rate": 5.998575343967238e-07, "loss": 0.8553, "step": 8528 }, { "epoch": 0.8926216640502355, "grad_norm": 2.139461496658353, "learning_rate": 5.987017216402402e-07, "loss": 0.8632, "step": 8529 }, { "epoch": 0.8927263212977499, "grad_norm": 2.3325515767132985, "learning_rate": 5.9754698911199e-07, "loss": 0.8565, "step": 8530 }, { "epoch": 0.8928309785452643, "grad_norm": 2.1213813372529686, "learning_rate": 5.963933369446506e-07, "loss": 0.9007, "step": 8531 }, { "epoch": 0.8929356357927787, "grad_norm": 1.9716547476429467, "learning_rate": 5.952407652707803e-07, "loss": 0.801, "step": 8532 }, { "epoch": 0.8930402930402931, "grad_norm": 2.0956419653603167, "learning_rate": 5.940892742228111e-07, "loss": 0.9066, "step": 8533 }, { "epoch": 0.8931449502878074, "grad_norm": 2.1015997554130053, "learning_rate": 5.929388639330514e-07, "loss": 0.8079, "step": 8534 }, { "epoch": 0.8932496075353218, "grad_norm": 2.338930939945522, "learning_rate": 5.917895345336866e-07, "loss": 0.9304, "step": 8535 }, { "epoch": 0.8933542647828362, "grad_norm": 2.370725798752193, "learning_rate": 5.906412861567768e-07, "loss": 0.9179, "step": 8536 }, { "epoch": 0.8934589220303506, "grad_norm": 2.212434822841478, "learning_rate": 5.894941189342585e-07, "loss": 0.9368, "step": 8537 }, { "epoch": 0.893563579277865, "grad_norm": 1.7221383035811781, "learning_rate": 5.88348032997943e-07, "loss": 0.818, "step": 8538 }, { "epoch": 0.8936682365253794, "grad_norm": 2.160032789570768, "learning_rate": 5.872030284795161e-07, "loss": 0.8613, "step": 8539 }, { "epoch": 0.8937728937728938, "grad_norm": 2.0833805352848525, "learning_rate": 5.860591055105446e-07, "loss": 0.8991, "step": 8540 }, { "epoch": 0.8938775510204081, "grad_norm": 2.386495900806836, "learning_rate": 5.849162642224659e-07, "loss": 0.973, "step": 8541 }, { "epoch": 0.8939822082679225, "grad_norm": 2.127775483833274, "learning_rate": 5.837745047465959e-07, "loss": 0.8681, "step": 8542 }, { "epoch": 0.8940868655154369, "grad_norm": 2.1017239869056916, "learning_rate": 5.826338272141219e-07, "loss": 0.8879, "step": 8543 }, { "epoch": 0.8941915227629513, "grad_norm": 2.470116877242277, "learning_rate": 5.814942317561134e-07, "loss": 0.6756, "step": 8544 }, { "epoch": 0.8942961800104657, "grad_norm": 2.4032567630871706, "learning_rate": 5.803557185035092e-07, "loss": 0.9041, "step": 8545 }, { "epoch": 0.8944008372579801, "grad_norm": 1.847332668697746, "learning_rate": 5.792182875871299e-07, "loss": 0.8516, "step": 8546 }, { "epoch": 0.8945054945054945, "grad_norm": 2.2507894430237214, "learning_rate": 5.78081939137668e-07, "loss": 0.862, "step": 8547 }, { "epoch": 0.8946101517530088, "grad_norm": 2.233081933735458, "learning_rate": 5.769466732856899e-07, "loss": 0.955, "step": 8548 }, { "epoch": 0.8947148090005232, "grad_norm": 2.158698856453009, "learning_rate": 5.758124901616424e-07, "loss": 0.8542, "step": 8549 }, { "epoch": 0.8948194662480377, "grad_norm": 2.3430196102926213, "learning_rate": 5.746793898958458e-07, "loss": 0.9469, "step": 8550 }, { "epoch": 0.8949241234955521, "grad_norm": 2.313609302497972, "learning_rate": 5.735473726184937e-07, "loss": 0.859, "step": 8551 }, { "epoch": 0.8950287807430665, "grad_norm": 2.2653809195446235, "learning_rate": 5.724164384596576e-07, "loss": 0.8801, "step": 8552 }, { "epoch": 0.8951334379905809, "grad_norm": 1.7697838567450326, "learning_rate": 5.712865875492835e-07, "loss": 0.7973, "step": 8553 }, { "epoch": 0.8952380952380953, "grad_norm": 2.25827405042216, "learning_rate": 5.701578200171942e-07, "loss": 0.845, "step": 8554 }, { "epoch": 0.8953427524856097, "grad_norm": 2.3169595444377893, "learning_rate": 5.690301359930883e-07, "loss": 0.8851, "step": 8555 }, { "epoch": 0.895447409733124, "grad_norm": 2.1775535449606003, "learning_rate": 5.679035356065399e-07, "loss": 0.7539, "step": 8556 }, { "epoch": 0.8955520669806384, "grad_norm": 1.827925306636399, "learning_rate": 5.667780189869954e-07, "loss": 0.8239, "step": 8557 }, { "epoch": 0.8956567242281528, "grad_norm": 2.042695270687885, "learning_rate": 5.656535862637779e-07, "loss": 0.8298, "step": 8558 }, { "epoch": 0.8957613814756672, "grad_norm": 1.8663790088557182, "learning_rate": 5.64530237566091e-07, "loss": 0.8654, "step": 8559 }, { "epoch": 0.8958660387231816, "grad_norm": 2.3551080593344285, "learning_rate": 5.634079730230069e-07, "loss": 0.8198, "step": 8560 }, { "epoch": 0.895970695970696, "grad_norm": 2.085971922826618, "learning_rate": 5.622867927634768e-07, "loss": 0.9079, "step": 8561 }, { "epoch": 0.8960753532182104, "grad_norm": 2.1241341765259976, "learning_rate": 5.611666969163243e-07, "loss": 0.8598, "step": 8562 }, { "epoch": 0.8961800104657247, "grad_norm": 2.0606089660513596, "learning_rate": 5.600476856102543e-07, "loss": 0.9451, "step": 8563 }, { "epoch": 0.8962846677132391, "grad_norm": 2.0689539964279797, "learning_rate": 5.589297589738418e-07, "loss": 0.9374, "step": 8564 }, { "epoch": 0.8963893249607535, "grad_norm": 1.7677701894726472, "learning_rate": 5.578129171355407e-07, "loss": 0.6829, "step": 8565 }, { "epoch": 0.8964939822082679, "grad_norm": 2.0461327983112727, "learning_rate": 5.56697160223676e-07, "loss": 0.8308, "step": 8566 }, { "epoch": 0.8965986394557823, "grad_norm": 2.2030981092811928, "learning_rate": 5.555824883664496e-07, "loss": 0.8447, "step": 8567 }, { "epoch": 0.8967032967032967, "grad_norm": 2.165264235397933, "learning_rate": 5.544689016919425e-07, "loss": 0.8813, "step": 8568 }, { "epoch": 0.8968079539508111, "grad_norm": 2.040145311512098, "learning_rate": 5.533564003281067e-07, "loss": 0.941, "step": 8569 }, { "epoch": 0.8969126111983254, "grad_norm": 2.139615944092077, "learning_rate": 5.52244984402771e-07, "loss": 0.8565, "step": 8570 }, { "epoch": 0.8970172684458398, "grad_norm": 2.000369105386169, "learning_rate": 5.51134654043638e-07, "loss": 0.9503, "step": 8571 }, { "epoch": 0.8971219256933542, "grad_norm": 2.3047482566850266, "learning_rate": 5.500254093782886e-07, "loss": 0.9249, "step": 8572 }, { "epoch": 0.8972265829408687, "grad_norm": 2.357051052649448, "learning_rate": 5.489172505341756e-07, "loss": 0.8812, "step": 8573 }, { "epoch": 0.8973312401883831, "grad_norm": 1.7124549432994773, "learning_rate": 5.478101776386324e-07, "loss": 0.8168, "step": 8574 }, { "epoch": 0.8974358974358975, "grad_norm": 2.081260844225879, "learning_rate": 5.467041908188608e-07, "loss": 0.9044, "step": 8575 }, { "epoch": 0.8975405546834119, "grad_norm": 2.3845487096865634, "learning_rate": 5.455992902019413e-07, "loss": 0.9173, "step": 8576 }, { "epoch": 0.8976452119309262, "grad_norm": 2.236894173393846, "learning_rate": 5.44495475914828e-07, "loss": 0.902, "step": 8577 }, { "epoch": 0.8977498691784406, "grad_norm": 1.8686621081953942, "learning_rate": 5.433927480843537e-07, "loss": 0.8863, "step": 8578 }, { "epoch": 0.897854526425955, "grad_norm": 2.0220966066928128, "learning_rate": 5.422911068372227e-07, "loss": 0.8258, "step": 8579 }, { "epoch": 0.8979591836734694, "grad_norm": 2.407389734858554, "learning_rate": 5.411905523000172e-07, "loss": 0.8185, "step": 8580 }, { "epoch": 0.8980638409209838, "grad_norm": 2.180556601813161, "learning_rate": 5.400910845991925e-07, "loss": 0.7839, "step": 8581 }, { "epoch": 0.8981684981684982, "grad_norm": 2.2606617621305434, "learning_rate": 5.389927038610776e-07, "loss": 0.8516, "step": 8582 }, { "epoch": 0.8982731554160126, "grad_norm": 1.780676054406007, "learning_rate": 5.378954102118827e-07, "loss": 0.7737, "step": 8583 }, { "epoch": 0.8983778126635269, "grad_norm": 2.0946716817061386, "learning_rate": 5.367992037776859e-07, "loss": 0.9084, "step": 8584 }, { "epoch": 0.8984824699110413, "grad_norm": 1.91970079060269, "learning_rate": 5.357040846844452e-07, "loss": 0.7988, "step": 8585 }, { "epoch": 0.8985871271585557, "grad_norm": 1.9948211908466287, "learning_rate": 5.346100530579911e-07, "loss": 0.8677, "step": 8586 }, { "epoch": 0.8986917844060701, "grad_norm": 2.1808113443571493, "learning_rate": 5.335171090240287e-07, "loss": 0.9124, "step": 8587 }, { "epoch": 0.8987964416535845, "grad_norm": 1.9339613369467432, "learning_rate": 5.324252527081408e-07, "loss": 0.8807, "step": 8588 }, { "epoch": 0.8989010989010989, "grad_norm": 1.8243315084924947, "learning_rate": 5.31334484235786e-07, "loss": 0.7639, "step": 8589 }, { "epoch": 0.8990057561486133, "grad_norm": 1.8725702372061233, "learning_rate": 5.302448037322938e-07, "loss": 0.7613, "step": 8590 }, { "epoch": 0.8991104133961276, "grad_norm": 2.267799187028563, "learning_rate": 5.29156211322871e-07, "loss": 0.8779, "step": 8591 }, { "epoch": 0.899215070643642, "grad_norm": 2.147263551468828, "learning_rate": 5.280687071325974e-07, "loss": 0.7696, "step": 8592 }, { "epoch": 0.8993197278911564, "grad_norm": 2.2925736831888557, "learning_rate": 5.269822912864319e-07, "loss": 0.8893, "step": 8593 }, { "epoch": 0.8994243851386708, "grad_norm": 2.1055062293520415, "learning_rate": 5.258969639092049e-07, "loss": 0.8334, "step": 8594 }, { "epoch": 0.8995290423861853, "grad_norm": 2.361570188646754, "learning_rate": 5.248127251256241e-07, "loss": 0.8185, "step": 8595 }, { "epoch": 0.8996336996336997, "grad_norm": 1.937369636411695, "learning_rate": 5.237295750602667e-07, "loss": 0.8652, "step": 8596 }, { "epoch": 0.8997383568812141, "grad_norm": 2.090179273646795, "learning_rate": 5.22647513837592e-07, "loss": 0.8299, "step": 8597 }, { "epoch": 0.8998430141287285, "grad_norm": 1.984663581897822, "learning_rate": 5.215665415819315e-07, "loss": 0.9113, "step": 8598 }, { "epoch": 0.8999476713762428, "grad_norm": 2.3235181239813163, "learning_rate": 5.204866584174906e-07, "loss": 0.9133, "step": 8599 }, { "epoch": 0.9000523286237572, "grad_norm": 2.0441409129727797, "learning_rate": 5.194078644683498e-07, "loss": 0.943, "step": 8600 }, { "epoch": 0.9001569858712716, "grad_norm": 1.8067996809796987, "learning_rate": 5.183301598584633e-07, "loss": 0.8274, "step": 8601 }, { "epoch": 0.900261643118786, "grad_norm": 2.1626365258620486, "learning_rate": 5.172535447116634e-07, "loss": 0.8934, "step": 8602 }, { "epoch": 0.9003663003663004, "grad_norm": 2.2909402751050814, "learning_rate": 5.161780191516552e-07, "loss": 0.8364, "step": 8603 }, { "epoch": 0.9004709576138148, "grad_norm": 1.9983348573884885, "learning_rate": 5.151035833020179e-07, "loss": 0.862, "step": 8604 }, { "epoch": 0.9005756148613292, "grad_norm": 2.202331715706061, "learning_rate": 5.14030237286206e-07, "loss": 0.8951, "step": 8605 }, { "epoch": 0.9006802721088435, "grad_norm": 1.838553460141115, "learning_rate": 5.129579812275498e-07, "loss": 0.7951, "step": 8606 }, { "epoch": 0.9007849293563579, "grad_norm": 2.2012009750979735, "learning_rate": 5.118868152492551e-07, "loss": 0.6918, "step": 8607 }, { "epoch": 0.9008895866038723, "grad_norm": 1.8524490674209988, "learning_rate": 5.10816739474399e-07, "loss": 0.8473, "step": 8608 }, { "epoch": 0.9009942438513867, "grad_norm": 2.3959923668906518, "learning_rate": 5.097477540259365e-07, "loss": 0.8172, "step": 8609 }, { "epoch": 0.9010989010989011, "grad_norm": 1.9678387832442228, "learning_rate": 5.086798590266961e-07, "loss": 0.9161, "step": 8610 }, { "epoch": 0.9012035583464155, "grad_norm": 1.9199312869676968, "learning_rate": 5.076130545993796e-07, "loss": 0.7752, "step": 8611 }, { "epoch": 0.9013082155939299, "grad_norm": 2.7222834602307215, "learning_rate": 5.065473408665678e-07, "loss": 0.9413, "step": 8612 }, { "epoch": 0.9014128728414442, "grad_norm": 1.9979574681439125, "learning_rate": 5.054827179507115e-07, "loss": 0.7943, "step": 8613 }, { "epoch": 0.9015175300889586, "grad_norm": 2.206392186358271, "learning_rate": 5.044191859741365e-07, "loss": 0.8969, "step": 8614 }, { "epoch": 0.901622187336473, "grad_norm": 2.1430840266190376, "learning_rate": 5.033567450590482e-07, "loss": 0.8642, "step": 8615 }, { "epoch": 0.9017268445839874, "grad_norm": 2.017404088868683, "learning_rate": 5.022953953275201e-07, "loss": 0.9434, "step": 8616 }, { "epoch": 0.9018315018315018, "grad_norm": 2.0133723850020346, "learning_rate": 5.012351369015067e-07, "loss": 0.7414, "step": 8617 }, { "epoch": 0.9019361590790163, "grad_norm": 2.046862624034036, "learning_rate": 5.001759699028319e-07, "loss": 0.9387, "step": 8618 }, { "epoch": 0.9020408163265307, "grad_norm": 2.0907637713335987, "learning_rate": 4.99117894453196e-07, "loss": 0.7797, "step": 8619 }, { "epoch": 0.902145473574045, "grad_norm": 2.013207011952914, "learning_rate": 4.980609106741729e-07, "loss": 0.9043, "step": 8620 }, { "epoch": 0.9022501308215594, "grad_norm": 1.8844202484567696, "learning_rate": 4.970050186872155e-07, "loss": 0.879, "step": 8621 }, { "epoch": 0.9023547880690738, "grad_norm": 2.3362225206127696, "learning_rate": 4.959502186136456e-07, "loss": 0.8738, "step": 8622 }, { "epoch": 0.9024594453165882, "grad_norm": 2.0248898990471367, "learning_rate": 4.948965105746617e-07, "loss": 0.8554, "step": 8623 }, { "epoch": 0.9025641025641026, "grad_norm": 1.8717907789505432, "learning_rate": 4.938438946913382e-07, "loss": 0.8623, "step": 8624 }, { "epoch": 0.902668759811617, "grad_norm": 2.1406021198538188, "learning_rate": 4.927923710846228e-07, "loss": 0.8994, "step": 8625 }, { "epoch": 0.9027734170591314, "grad_norm": 2.105701972491203, "learning_rate": 4.917419398753342e-07, "loss": 0.8342, "step": 8626 }, { "epoch": 0.9028780743066457, "grad_norm": 2.0643422231373707, "learning_rate": 4.906926011841751e-07, "loss": 0.9147, "step": 8627 }, { "epoch": 0.9029827315541601, "grad_norm": 1.9814741563458926, "learning_rate": 4.89644355131712e-07, "loss": 0.8054, "step": 8628 }, { "epoch": 0.9030873888016745, "grad_norm": 2.1051194899963317, "learning_rate": 4.885972018383933e-07, "loss": 0.9514, "step": 8629 }, { "epoch": 0.9031920460491889, "grad_norm": 2.459613012959799, "learning_rate": 4.87551141424536e-07, "loss": 0.9572, "step": 8630 }, { "epoch": 0.9032967032967033, "grad_norm": 2.629282062410733, "learning_rate": 4.865061740103361e-07, "loss": 0.8158, "step": 8631 }, { "epoch": 0.9034013605442177, "grad_norm": 2.0103071109487853, "learning_rate": 4.854622997158643e-07, "loss": 0.8943, "step": 8632 }, { "epoch": 0.9035060177917321, "grad_norm": 1.9239594017345965, "learning_rate": 4.844195186610622e-07, "loss": 0.8074, "step": 8633 }, { "epoch": 0.9036106750392464, "grad_norm": 2.1402627344081875, "learning_rate": 4.833778309657467e-07, "loss": 0.8907, "step": 8634 }, { "epoch": 0.9037153322867608, "grad_norm": 2.137809328262573, "learning_rate": 4.823372367496104e-07, "loss": 0.8235, "step": 8635 }, { "epoch": 0.9038199895342752, "grad_norm": 2.0550886701913593, "learning_rate": 4.812977361322201e-07, "loss": 0.8927, "step": 8636 }, { "epoch": 0.9039246467817896, "grad_norm": 1.9128522746883567, "learning_rate": 4.802593292330171e-07, "loss": 0.7968, "step": 8637 }, { "epoch": 0.904029304029304, "grad_norm": 1.9358701087954804, "learning_rate": 4.792220161713157e-07, "loss": 0.8978, "step": 8638 }, { "epoch": 0.9041339612768184, "grad_norm": 2.291398568641111, "learning_rate": 4.78185797066304e-07, "loss": 0.8892, "step": 8639 }, { "epoch": 0.9042386185243328, "grad_norm": 2.4919196805707418, "learning_rate": 4.771506720370466e-07, "loss": 0.9424, "step": 8640 }, { "epoch": 0.9043432757718473, "grad_norm": 1.6894319599246974, "learning_rate": 4.761166412024831e-07, "loss": 0.7277, "step": 8641 }, { "epoch": 0.9044479330193615, "grad_norm": 1.9107521958807865, "learning_rate": 4.7508370468142496e-07, "loss": 0.885, "step": 8642 }, { "epoch": 0.904552590266876, "grad_norm": 2.0614995865460863, "learning_rate": 4.7405186259255833e-07, "loss": 0.7822, "step": 8643 }, { "epoch": 0.9046572475143904, "grad_norm": 1.7924039712471196, "learning_rate": 4.7302111505444284e-07, "loss": 0.9039, "step": 8644 }, { "epoch": 0.9047619047619048, "grad_norm": 2.249557355888428, "learning_rate": 4.719914621855137e-07, "loss": 0.8715, "step": 8645 }, { "epoch": 0.9048665620094192, "grad_norm": 2.1203352917619345, "learning_rate": 4.7096290410408196e-07, "loss": 0.8193, "step": 8646 }, { "epoch": 0.9049712192569336, "grad_norm": 2.115446838243104, "learning_rate": 4.6993544092832965e-07, "loss": 0.7967, "step": 8647 }, { "epoch": 0.905075876504448, "grad_norm": 1.6513015444716435, "learning_rate": 4.689090727763157e-07, "loss": 0.6944, "step": 8648 }, { "epoch": 0.9051805337519623, "grad_norm": 2.1211276289299272, "learning_rate": 4.6788379976596797e-07, "loss": 0.8097, "step": 8649 }, { "epoch": 0.9052851909994767, "grad_norm": 1.7872427675425049, "learning_rate": 4.668596220150967e-07, "loss": 0.742, "step": 8650 }, { "epoch": 0.9053898482469911, "grad_norm": 1.898405645689105, "learning_rate": 4.658365396413811e-07, "loss": 0.6804, "step": 8651 }, { "epoch": 0.9054945054945055, "grad_norm": 2.2484345299825756, "learning_rate": 4.6481455276237485e-07, "loss": 0.9168, "step": 8652 }, { "epoch": 0.9055991627420199, "grad_norm": 1.8890310215611323, "learning_rate": 4.637936614955063e-07, "loss": 0.8853, "step": 8653 }, { "epoch": 0.9057038199895343, "grad_norm": 2.116084072039392, "learning_rate": 4.6277386595807717e-07, "loss": 0.9191, "step": 8654 }, { "epoch": 0.9058084772370487, "grad_norm": 1.8431218097882023, "learning_rate": 4.617551662672659e-07, "loss": 0.8677, "step": 8655 }, { "epoch": 0.905913134484563, "grad_norm": 2.3481517816362905, "learning_rate": 4.6073756254012224e-07, "loss": 0.8765, "step": 8656 }, { "epoch": 0.9060177917320774, "grad_norm": 2.0817117165028898, "learning_rate": 4.597210548935693e-07, "loss": 0.8923, "step": 8657 }, { "epoch": 0.9061224489795918, "grad_norm": 2.2883970597196366, "learning_rate": 4.5870564344440815e-07, "loss": 0.914, "step": 8658 }, { "epoch": 0.9062271062271062, "grad_norm": 1.83618675401989, "learning_rate": 4.576913283093098e-07, "loss": 0.765, "step": 8659 }, { "epoch": 0.9063317634746206, "grad_norm": 1.989383983995067, "learning_rate": 4.566781096048234e-07, "loss": 0.7973, "step": 8660 }, { "epoch": 0.906436420722135, "grad_norm": 2.3861984180984344, "learning_rate": 4.5566598744736903e-07, "loss": 0.9352, "step": 8661 }, { "epoch": 0.9065410779696494, "grad_norm": 2.0378146649226068, "learning_rate": 4.5465496195324035e-07, "loss": 0.8419, "step": 8662 }, { "epoch": 0.9066457352171637, "grad_norm": 2.392679580789577, "learning_rate": 4.5364503323860666e-07, "loss": 0.849, "step": 8663 }, { "epoch": 0.9067503924646781, "grad_norm": 1.9701786010249667, "learning_rate": 4.526362014195107e-07, "loss": 0.9152, "step": 8664 }, { "epoch": 0.9068550497121926, "grad_norm": 2.2953866617085827, "learning_rate": 4.516284666118709e-07, "loss": 0.9003, "step": 8665 }, { "epoch": 0.906959706959707, "grad_norm": 1.9748169384630196, "learning_rate": 4.506218289314757e-07, "loss": 0.8263, "step": 8666 }, { "epoch": 0.9070643642072214, "grad_norm": 1.9629754135097301, "learning_rate": 4.496162884939914e-07, "loss": 0.934, "step": 8667 }, { "epoch": 0.9071690214547358, "grad_norm": 2.008476459907381, "learning_rate": 4.4861184541495685e-07, "loss": 0.8925, "step": 8668 }, { "epoch": 0.9072736787022502, "grad_norm": 1.7441402259407277, "learning_rate": 4.4760849980978184e-07, "loss": 0.8045, "step": 8669 }, { "epoch": 0.9073783359497645, "grad_norm": 2.085568340205773, "learning_rate": 4.466062517937575e-07, "loss": 0.8026, "step": 8670 }, { "epoch": 0.9074829931972789, "grad_norm": 2.1982232449226977, "learning_rate": 4.456051014820406e-07, "loss": 0.8355, "step": 8671 }, { "epoch": 0.9075876504447933, "grad_norm": 2.1354303926988765, "learning_rate": 4.4460504898966693e-07, "loss": 0.8294, "step": 8672 }, { "epoch": 0.9076923076923077, "grad_norm": 2.034301795378783, "learning_rate": 4.4360609443154233e-07, "loss": 0.9312, "step": 8673 }, { "epoch": 0.9077969649398221, "grad_norm": 2.355315349345066, "learning_rate": 4.426082379224517e-07, "loss": 0.9095, "step": 8674 }, { "epoch": 0.9079016221873365, "grad_norm": 1.9806257600484884, "learning_rate": 4.4161147957704767e-07, "loss": 0.7933, "step": 8675 }, { "epoch": 0.9080062794348509, "grad_norm": 2.2251811085208804, "learning_rate": 4.406158195098642e-07, "loss": 0.9325, "step": 8676 }, { "epoch": 0.9081109366823652, "grad_norm": 2.2630252661426575, "learning_rate": 4.39621257835301e-07, "loss": 0.8204, "step": 8677 }, { "epoch": 0.9082155939298796, "grad_norm": 2.013858317073263, "learning_rate": 4.386277946676343e-07, "loss": 0.8767, "step": 8678 }, { "epoch": 0.908320251177394, "grad_norm": 2.051873546111938, "learning_rate": 4.376354301210184e-07, "loss": 0.9409, "step": 8679 }, { "epoch": 0.9084249084249084, "grad_norm": 2.13004369241599, "learning_rate": 4.3664416430947766e-07, "loss": 0.8222, "step": 8680 }, { "epoch": 0.9085295656724228, "grad_norm": 2.0442840076206705, "learning_rate": 4.356539973469087e-07, "loss": 0.9183, "step": 8681 }, { "epoch": 0.9086342229199372, "grad_norm": 1.7421295502754213, "learning_rate": 4.346649293470839e-07, "loss": 0.8429, "step": 8682 }, { "epoch": 0.9087388801674516, "grad_norm": 2.3968867260487117, "learning_rate": 4.33676960423649e-07, "loss": 0.9008, "step": 8683 }, { "epoch": 0.908843537414966, "grad_norm": 1.9127124116543281, "learning_rate": 4.326900906901232e-07, "loss": 0.7683, "step": 8684 }, { "epoch": 0.9089481946624803, "grad_norm": 1.95961808035582, "learning_rate": 4.317043202599036e-07, "loss": 0.8165, "step": 8685 }, { "epoch": 0.9090528519099947, "grad_norm": 1.9844531465124018, "learning_rate": 4.30719649246254e-07, "loss": 0.8512, "step": 8686 }, { "epoch": 0.9091575091575091, "grad_norm": 1.9622621837590426, "learning_rate": 4.297360777623161e-07, "loss": 0.7713, "step": 8687 }, { "epoch": 0.9092621664050236, "grad_norm": 1.8041201019218724, "learning_rate": 4.287536059211017e-07, "loss": 0.8295, "step": 8688 }, { "epoch": 0.909366823652538, "grad_norm": 2.337575130743714, "learning_rate": 4.277722338355028e-07, "loss": 0.8251, "step": 8689 }, { "epoch": 0.9094714809000524, "grad_norm": 2.0458103819689866, "learning_rate": 4.267919616182792e-07, "loss": 0.8311, "step": 8690 }, { "epoch": 0.9095761381475668, "grad_norm": 1.8361080971407697, "learning_rate": 4.2581278938206626e-07, "loss": 0.7123, "step": 8691 }, { "epoch": 0.9096807953950811, "grad_norm": 2.1827141207863607, "learning_rate": 4.2483471723937075e-07, "loss": 0.9557, "step": 8692 }, { "epoch": 0.9097854526425955, "grad_norm": 1.7316495003042163, "learning_rate": 4.2385774530257717e-07, "loss": 0.7874, "step": 8693 }, { "epoch": 0.9098901098901099, "grad_norm": 2.1643266776521997, "learning_rate": 4.2288187368394353e-07, "loss": 0.7739, "step": 8694 }, { "epoch": 0.9099947671376243, "grad_norm": 1.6984430830527935, "learning_rate": 4.219071024955967e-07, "loss": 0.781, "step": 8695 }, { "epoch": 0.9100994243851387, "grad_norm": 1.9916952789284774, "learning_rate": 4.209334318495406e-07, "loss": 0.8571, "step": 8696 }, { "epoch": 0.9102040816326531, "grad_norm": 2.02650344774727, "learning_rate": 4.1996086185765115e-07, "loss": 0.8146, "step": 8697 }, { "epoch": 0.9103087388801675, "grad_norm": 2.0774078712351667, "learning_rate": 4.1898939263168127e-07, "loss": 0.8396, "step": 8698 }, { "epoch": 0.9104133961276818, "grad_norm": 2.171351968437315, "learning_rate": 4.1801902428325156e-07, "loss": 0.9199, "step": 8699 }, { "epoch": 0.9105180533751962, "grad_norm": 2.0626628160103855, "learning_rate": 4.1704975692386186e-07, "loss": 0.8262, "step": 8700 }, { "epoch": 0.9106227106227106, "grad_norm": 2.2474029662554256, "learning_rate": 4.160815906648796e-07, "loss": 0.8491, "step": 8701 }, { "epoch": 0.910727367870225, "grad_norm": 1.8343930692503205, "learning_rate": 4.1511452561755263e-07, "loss": 0.9191, "step": 8702 }, { "epoch": 0.9108320251177394, "grad_norm": 1.919914539345838, "learning_rate": 4.1414856189299635e-07, "loss": 0.8064, "step": 8703 }, { "epoch": 0.9109366823652538, "grad_norm": 2.2697378486199185, "learning_rate": 4.131836996022043e-07, "loss": 0.8934, "step": 8704 }, { "epoch": 0.9110413396127682, "grad_norm": 2.1565994352606648, "learning_rate": 4.122199388560388e-07, "loss": 0.8283, "step": 8705 }, { "epoch": 0.9111459968602825, "grad_norm": 2.0593897209824243, "learning_rate": 4.1125727976523923e-07, "loss": 0.8751, "step": 8706 }, { "epoch": 0.9112506541077969, "grad_norm": 2.633616179719094, "learning_rate": 4.1029572244041585e-07, "loss": 0.8806, "step": 8707 }, { "epoch": 0.9113553113553113, "grad_norm": 2.217152686708234, "learning_rate": 4.0933526699205475e-07, "loss": 0.911, "step": 8708 }, { "epoch": 0.9114599686028257, "grad_norm": 2.166895554817742, "learning_rate": 4.083759135305132e-07, "loss": 0.7358, "step": 8709 }, { "epoch": 0.9115646258503401, "grad_norm": 2.0236710752805713, "learning_rate": 4.074176621660242e-07, "loss": 0.8996, "step": 8710 }, { "epoch": 0.9116692830978546, "grad_norm": 2.2962325093594607, "learning_rate": 4.0646051300869295e-07, "loss": 0.9279, "step": 8711 }, { "epoch": 0.911773940345369, "grad_norm": 2.042939432062761, "learning_rate": 4.0550446616849484e-07, "loss": 0.8324, "step": 8712 }, { "epoch": 0.9118785975928833, "grad_norm": 2.428545782174381, "learning_rate": 4.0454952175528527e-07, "loss": 0.8856, "step": 8713 }, { "epoch": 0.9119832548403977, "grad_norm": 2.2988669806643784, "learning_rate": 4.0359567987878655e-07, "loss": 0.8653, "step": 8714 }, { "epoch": 0.9120879120879121, "grad_norm": 1.9821777641406835, "learning_rate": 4.026429406485988e-07, "loss": 0.9642, "step": 8715 }, { "epoch": 0.9121925693354265, "grad_norm": 2.301086960995479, "learning_rate": 4.016913041741921e-07, "loss": 0.8863, "step": 8716 }, { "epoch": 0.9122972265829409, "grad_norm": 2.5204244955883515, "learning_rate": 4.0074077056491355e-07, "loss": 0.8635, "step": 8717 }, { "epoch": 0.9124018838304553, "grad_norm": 1.8160765406116912, "learning_rate": 3.997913399299791e-07, "loss": 0.7991, "step": 8718 }, { "epoch": 0.9125065410779697, "grad_norm": 1.934088855695629, "learning_rate": 3.9884301237848255e-07, "loss": 0.9356, "step": 8719 }, { "epoch": 0.912611198325484, "grad_norm": 2.2496984791324177, "learning_rate": 3.978957880193868e-07, "loss": 0.8782, "step": 8720 }, { "epoch": 0.9127158555729984, "grad_norm": 2.3632313366350703, "learning_rate": 3.969496669615314e-07, "loss": 0.8665, "step": 8721 }, { "epoch": 0.9128205128205128, "grad_norm": 2.272870516424909, "learning_rate": 3.9600464931362494e-07, "loss": 0.973, "step": 8722 }, { "epoch": 0.9129251700680272, "grad_norm": 2.146882820843765, "learning_rate": 3.9506073518425504e-07, "loss": 0.9286, "step": 8723 }, { "epoch": 0.9130298273155416, "grad_norm": 2.0647435155269576, "learning_rate": 3.9411792468187716e-07, "loss": 0.8374, "step": 8724 }, { "epoch": 0.913134484563056, "grad_norm": 2.059170164055368, "learning_rate": 3.931762179148235e-07, "loss": 0.8575, "step": 8725 }, { "epoch": 0.9132391418105704, "grad_norm": 1.9573837147170599, "learning_rate": 3.9223561499129647e-07, "loss": 0.8199, "step": 8726 }, { "epoch": 0.9133437990580848, "grad_norm": 2.1437953288885017, "learning_rate": 3.91296116019374e-07, "loss": 0.7705, "step": 8727 }, { "epoch": 0.9134484563055991, "grad_norm": 1.858952500811545, "learning_rate": 3.903577211070075e-07, "loss": 0.825, "step": 8728 }, { "epoch": 0.9135531135531135, "grad_norm": 2.160108943394208, "learning_rate": 3.894204303620197e-07, "loss": 0.8654, "step": 8729 }, { "epoch": 0.9136577708006279, "grad_norm": 2.1084513039063117, "learning_rate": 3.884842438921077e-07, "loss": 1.033, "step": 8730 }, { "epoch": 0.9137624280481423, "grad_norm": 2.7878592333589607, "learning_rate": 3.875491618048388e-07, "loss": 0.9281, "step": 8731 }, { "epoch": 0.9138670852956567, "grad_norm": 1.991323149670702, "learning_rate": 3.866151842076593e-07, "loss": 0.9283, "step": 8732 }, { "epoch": 0.9139717425431712, "grad_norm": 2.2833156721267507, "learning_rate": 3.8568231120788334e-07, "loss": 0.9989, "step": 8733 }, { "epoch": 0.9140763997906856, "grad_norm": 1.9572296751214142, "learning_rate": 3.8475054291270077e-07, "loss": 0.7219, "step": 8734 }, { "epoch": 0.9141810570381999, "grad_norm": 2.2701248411948085, "learning_rate": 3.838198794291714e-07, "loss": 0.911, "step": 8735 }, { "epoch": 0.9142857142857143, "grad_norm": 1.924604288549798, "learning_rate": 3.8289032086423095e-07, "loss": 0.9511, "step": 8736 }, { "epoch": 0.9143903715332287, "grad_norm": 2.141850749754829, "learning_rate": 3.8196186732469056e-07, "loss": 0.8538, "step": 8737 }, { "epoch": 0.9144950287807431, "grad_norm": 1.8871726093707848, "learning_rate": 3.810345189172293e-07, "loss": 0.8743, "step": 8738 }, { "epoch": 0.9145996860282575, "grad_norm": 2.345605190515875, "learning_rate": 3.80108275748402e-07, "loss": 0.9693, "step": 8739 }, { "epoch": 0.9147043432757719, "grad_norm": 1.8872898192305327, "learning_rate": 3.791831379246347e-07, "loss": 0.8189, "step": 8740 }, { "epoch": 0.9148090005232863, "grad_norm": 2.146002787179079, "learning_rate": 3.7825910555222666e-07, "loss": 0.8324, "step": 8741 }, { "epoch": 0.9149136577708006, "grad_norm": 2.085783356155515, "learning_rate": 3.773361787373553e-07, "loss": 0.8937, "step": 8742 }, { "epoch": 0.915018315018315, "grad_norm": 2.383660628094254, "learning_rate": 3.7641435758606347e-07, "loss": 0.9274, "step": 8743 }, { "epoch": 0.9151229722658294, "grad_norm": 1.887353522221794, "learning_rate": 3.754936422042699e-07, "loss": 0.8397, "step": 8744 }, { "epoch": 0.9152276295133438, "grad_norm": 2.1935449423146656, "learning_rate": 3.745740326977687e-07, "loss": 0.8892, "step": 8745 }, { "epoch": 0.9153322867608582, "grad_norm": 1.9262362500423253, "learning_rate": 3.7365552917222213e-07, "loss": 0.8224, "step": 8746 }, { "epoch": 0.9154369440083726, "grad_norm": 2.0505509798208417, "learning_rate": 3.7273813173317243e-07, "loss": 0.8285, "step": 8747 }, { "epoch": 0.915541601255887, "grad_norm": 2.3324333915301216, "learning_rate": 3.718218404860263e-07, "loss": 0.8936, "step": 8748 }, { "epoch": 0.9156462585034013, "grad_norm": 2.031276939093479, "learning_rate": 3.7090665553607076e-07, "loss": 0.7884, "step": 8749 }, { "epoch": 0.9157509157509157, "grad_norm": 2.2510972837867875, "learning_rate": 3.6999257698845825e-07, "loss": 0.9864, "step": 8750 }, { "epoch": 0.9158555729984301, "grad_norm": 2.3120088138033257, "learning_rate": 3.690796049482226e-07, "loss": 0.8674, "step": 8751 }, { "epoch": 0.9159602302459445, "grad_norm": 2.5331940368689367, "learning_rate": 3.681677395202632e-07, "loss": 0.8305, "step": 8752 }, { "epoch": 0.9160648874934589, "grad_norm": 2.134818511890312, "learning_rate": 3.6725698080935626e-07, "loss": 0.8018, "step": 8753 }, { "epoch": 0.9161695447409733, "grad_norm": 2.2432740685775148, "learning_rate": 3.6634732892015025e-07, "loss": 0.9043, "step": 8754 }, { "epoch": 0.9162742019884877, "grad_norm": 1.9549596052281204, "learning_rate": 3.654387839571649e-07, "loss": 0.9349, "step": 8755 }, { "epoch": 0.916378859236002, "grad_norm": 2.0607140029707605, "learning_rate": 3.645313460247957e-07, "loss": 0.8228, "step": 8756 }, { "epoch": 0.9164835164835164, "grad_norm": 1.9888703343791325, "learning_rate": 3.6362501522730797e-07, "loss": 0.7932, "step": 8757 }, { "epoch": 0.9165881737310309, "grad_norm": 2.1547284667729314, "learning_rate": 3.6271979166884076e-07, "loss": 0.8284, "step": 8758 }, { "epoch": 0.9166928309785453, "grad_norm": 2.3830343538126315, "learning_rate": 3.6181567545340634e-07, "loss": 0.8717, "step": 8759 }, { "epoch": 0.9167974882260597, "grad_norm": 2.7710627812990074, "learning_rate": 3.6091266668488835e-07, "loss": 0.7445, "step": 8760 }, { "epoch": 0.9169021454735741, "grad_norm": 1.8300014783449674, "learning_rate": 3.6001076546704595e-07, "loss": 0.92, "step": 8761 }, { "epoch": 0.9170068027210885, "grad_norm": 2.035387613216914, "learning_rate": 3.591099719035096e-07, "loss": 0.7801, "step": 8762 }, { "epoch": 0.9171114599686028, "grad_norm": 2.104498222120233, "learning_rate": 3.582102860977821e-07, "loss": 0.8691, "step": 8763 }, { "epoch": 0.9172161172161172, "grad_norm": 2.083491104776923, "learning_rate": 3.5731170815323733e-07, "loss": 0.918, "step": 8764 }, { "epoch": 0.9173207744636316, "grad_norm": 2.0988470418475984, "learning_rate": 3.5641423817312503e-07, "loss": 0.8197, "step": 8765 }, { "epoch": 0.917425431711146, "grad_norm": 2.3323620083693677, "learning_rate": 3.555178762605671e-07, "loss": 0.9047, "step": 8766 }, { "epoch": 0.9175300889586604, "grad_norm": 2.235958503717904, "learning_rate": 3.546226225185567e-07, "loss": 0.8173, "step": 8767 }, { "epoch": 0.9176347462061748, "grad_norm": 2.3113114359456826, "learning_rate": 3.537284770499605e-07, "loss": 0.9114, "step": 8768 }, { "epoch": 0.9177394034536892, "grad_norm": 2.095066016594399, "learning_rate": 3.528354399575151e-07, "loss": 1.0252, "step": 8769 }, { "epoch": 0.9178440607012036, "grad_norm": 1.9933094078470959, "learning_rate": 3.519435113438352e-07, "loss": 0.8784, "step": 8770 }, { "epoch": 0.9179487179487179, "grad_norm": 2.0786146110365746, "learning_rate": 3.510526913114065e-07, "loss": 0.8294, "step": 8771 }, { "epoch": 0.9180533751962323, "grad_norm": 2.195402968074223, "learning_rate": 3.5016297996258276e-07, "loss": 0.9349, "step": 8772 }, { "epoch": 0.9181580324437467, "grad_norm": 2.05314297727512, "learning_rate": 3.4927437739959547e-07, "loss": 0.7679, "step": 8773 }, { "epoch": 0.9182626896912611, "grad_norm": 2.244479903766985, "learning_rate": 3.483868837245452e-07, "loss": 0.929, "step": 8774 }, { "epoch": 0.9183673469387755, "grad_norm": 2.015140547965409, "learning_rate": 3.4750049903940817e-07, "loss": 0.8448, "step": 8775 }, { "epoch": 0.9184720041862899, "grad_norm": 1.8382679008968412, "learning_rate": 3.466152234460318e-07, "loss": 0.751, "step": 8776 }, { "epoch": 0.9185766614338043, "grad_norm": 2.205165672351134, "learning_rate": 3.4573105704613586e-07, "loss": 0.9405, "step": 8777 }, { "epoch": 0.9186813186813186, "grad_norm": 1.821503436915656, "learning_rate": 3.4484799994131345e-07, "loss": 0.808, "step": 8778 }, { "epoch": 0.918785975928833, "grad_norm": 2.259936480939421, "learning_rate": 3.439660522330268e-07, "loss": 0.8702, "step": 8779 }, { "epoch": 0.9188906331763474, "grad_norm": 2.0808618852041847, "learning_rate": 3.43085214022616e-07, "loss": 0.7654, "step": 8780 }, { "epoch": 0.9189952904238619, "grad_norm": 1.970573498767891, "learning_rate": 3.4220548541129217e-07, "loss": 0.8929, "step": 8781 }, { "epoch": 0.9190999476713763, "grad_norm": 1.9944791324781226, "learning_rate": 3.413268665001357e-07, "loss": 0.8615, "step": 8782 }, { "epoch": 0.9192046049188907, "grad_norm": 1.9279258269514712, "learning_rate": 3.404493573901024e-07, "loss": 0.7665, "step": 8783 }, { "epoch": 0.9193092621664051, "grad_norm": 2.180216746439435, "learning_rate": 3.3957295818201954e-07, "loss": 0.9315, "step": 8784 }, { "epoch": 0.9194139194139194, "grad_norm": 1.9619360610575096, "learning_rate": 3.3869766897658753e-07, "loss": 0.878, "step": 8785 }, { "epoch": 0.9195185766614338, "grad_norm": 2.030802148720462, "learning_rate": 3.378234898743804e-07, "loss": 0.73, "step": 8786 }, { "epoch": 0.9196232339089482, "grad_norm": 2.1100710529585625, "learning_rate": 3.369504209758401e-07, "loss": 0.8821, "step": 8787 }, { "epoch": 0.9197278911564626, "grad_norm": 2.2642436944668822, "learning_rate": 3.3607846238128517e-07, "loss": 0.8657, "step": 8788 }, { "epoch": 0.919832548403977, "grad_norm": 1.8980867961682124, "learning_rate": 3.352076141909055e-07, "loss": 0.7698, "step": 8789 }, { "epoch": 0.9199372056514914, "grad_norm": 2.116331603985098, "learning_rate": 3.3433787650476444e-07, "loss": 0.7825, "step": 8790 }, { "epoch": 0.9200418628990058, "grad_norm": 2.016895335801583, "learning_rate": 3.3346924942279645e-07, "loss": 0.8306, "step": 8791 }, { "epoch": 0.9201465201465201, "grad_norm": 2.1969878425950435, "learning_rate": 3.3260173304480724e-07, "loss": 0.9345, "step": 8792 }, { "epoch": 0.9202511773940345, "grad_norm": 2.4238866705980904, "learning_rate": 3.317353274704749e-07, "loss": 0.8095, "step": 8793 }, { "epoch": 0.9203558346415489, "grad_norm": 2.061511830200133, "learning_rate": 3.3087003279935527e-07, "loss": 0.9398, "step": 8794 }, { "epoch": 0.9204604918890633, "grad_norm": 1.9785156370811716, "learning_rate": 3.3000584913087e-07, "loss": 0.8761, "step": 8795 }, { "epoch": 0.9205651491365777, "grad_norm": 2.6239846193899687, "learning_rate": 3.2914277656431405e-07, "loss": 0.9125, "step": 8796 }, { "epoch": 0.9206698063840921, "grad_norm": 2.098352277907113, "learning_rate": 3.282808151988603e-07, "loss": 0.7692, "step": 8797 }, { "epoch": 0.9207744636316065, "grad_norm": 1.9428905633278999, "learning_rate": 3.2741996513354637e-07, "loss": 0.8322, "step": 8798 }, { "epoch": 0.9208791208791208, "grad_norm": 1.783743284332841, "learning_rate": 3.265602264672862e-07, "loss": 0.7968, "step": 8799 }, { "epoch": 0.9209837781266352, "grad_norm": 1.9603968225564672, "learning_rate": 3.257015992988677e-07, "loss": 0.9033, "step": 8800 }, { "epoch": 0.9210884353741496, "grad_norm": 1.9634203307317444, "learning_rate": 3.248440837269462e-07, "loss": 0.9142, "step": 8801 }, { "epoch": 0.921193092621664, "grad_norm": 1.8398499107034954, "learning_rate": 3.239876798500541e-07, "loss": 0.8375, "step": 8802 }, { "epoch": 0.9212977498691785, "grad_norm": 2.113545071381351, "learning_rate": 3.2313238776659037e-07, "loss": 0.842, "step": 8803 }, { "epoch": 0.9214024071166929, "grad_norm": 2.0202953316393164, "learning_rate": 3.222782075748354e-07, "loss": 0.7569, "step": 8804 }, { "epoch": 0.9215070643642073, "grad_norm": 2.1413122563788227, "learning_rate": 3.2142513937293064e-07, "loss": 0.8861, "step": 8805 }, { "epoch": 0.9216117216117216, "grad_norm": 2.235823853768752, "learning_rate": 3.2057318325889877e-07, "loss": 0.9496, "step": 8806 }, { "epoch": 0.921716378859236, "grad_norm": 2.16956827769149, "learning_rate": 3.1972233933063144e-07, "loss": 0.8859, "step": 8807 }, { "epoch": 0.9218210361067504, "grad_norm": 2.1026176075046874, "learning_rate": 3.1887260768588946e-07, "loss": 0.8599, "step": 8808 }, { "epoch": 0.9219256933542648, "grad_norm": 1.8287944523325435, "learning_rate": 3.180239884223124e-07, "loss": 0.8797, "step": 8809 }, { "epoch": 0.9220303506017792, "grad_norm": 2.0117907483069764, "learning_rate": 3.1717648163740567e-07, "loss": 0.9159, "step": 8810 }, { "epoch": 0.9221350078492936, "grad_norm": 2.1179998673763234, "learning_rate": 3.163300874285513e-07, "loss": 0.7744, "step": 8811 }, { "epoch": 0.922239665096808, "grad_norm": 2.2470761723344452, "learning_rate": 3.154848058929982e-07, "loss": 0.9273, "step": 8812 }, { "epoch": 0.9223443223443224, "grad_norm": 1.881792495195148, "learning_rate": 3.146406371278754e-07, "loss": 0.8834, "step": 8813 }, { "epoch": 0.9224489795918367, "grad_norm": 1.979370856312482, "learning_rate": 3.1379758123017636e-07, "loss": 0.8525, "step": 8814 }, { "epoch": 0.9225536368393511, "grad_norm": 2.139478294351095, "learning_rate": 3.1295563829677247e-07, "loss": 0.8343, "step": 8815 }, { "epoch": 0.9226582940868655, "grad_norm": 2.1238442720838013, "learning_rate": 3.121148084244041e-07, "loss": 1.0092, "step": 8816 }, { "epoch": 0.9227629513343799, "grad_norm": 1.7314699596986751, "learning_rate": 3.1127509170968295e-07, "loss": 0.8082, "step": 8817 }, { "epoch": 0.9228676085818943, "grad_norm": 2.012711651464119, "learning_rate": 3.10436488249094e-07, "loss": 0.8154, "step": 8818 }, { "epoch": 0.9229722658294087, "grad_norm": 2.611942421259733, "learning_rate": 3.0959899813899576e-07, "loss": 0.6697, "step": 8819 }, { "epoch": 0.9230769230769231, "grad_norm": 2.2358990969751193, "learning_rate": 3.0876262147561784e-07, "loss": 0.9779, "step": 8820 }, { "epoch": 0.9231815803244374, "grad_norm": 2.303016822984439, "learning_rate": 3.0792735835506015e-07, "loss": 0.7672, "step": 8821 }, { "epoch": 0.9232862375719518, "grad_norm": 2.125149271133452, "learning_rate": 3.0709320887329697e-07, "loss": 0.8014, "step": 8822 }, { "epoch": 0.9233908948194662, "grad_norm": 1.825121847185592, "learning_rate": 3.0626017312617274e-07, "loss": 0.841, "step": 8823 }, { "epoch": 0.9234955520669806, "grad_norm": 1.8166108344674616, "learning_rate": 3.0542825120940757e-07, "loss": 0.9354, "step": 8824 }, { "epoch": 0.923600209314495, "grad_norm": 2.1961545359673376, "learning_rate": 3.045974432185883e-07, "loss": 0.9738, "step": 8825 }, { "epoch": 0.9237048665620095, "grad_norm": 2.3971264132457257, "learning_rate": 3.0376774924917863e-07, "loss": 1.0354, "step": 8826 }, { "epoch": 0.9238095238095239, "grad_norm": 2.021091464574574, "learning_rate": 3.029391693965089e-07, "loss": 0.7179, "step": 8827 }, { "epoch": 0.9239141810570382, "grad_norm": 2.02476604473955, "learning_rate": 3.0211170375578747e-07, "loss": 0.9331, "step": 8828 }, { "epoch": 0.9240188383045526, "grad_norm": 2.2717440474645407, "learning_rate": 3.0128535242209155e-07, "loss": 0.8373, "step": 8829 }, { "epoch": 0.924123495552067, "grad_norm": 2.419214715389792, "learning_rate": 3.004601154903697e-07, "loss": 0.9407, "step": 8830 }, { "epoch": 0.9242281527995814, "grad_norm": 2.0851387695815573, "learning_rate": 2.9963599305544267e-07, "loss": 0.9082, "step": 8831 }, { "epoch": 0.9243328100470958, "grad_norm": 2.2246862727059207, "learning_rate": 2.9881298521200477e-07, "loss": 0.8642, "step": 8832 }, { "epoch": 0.9244374672946102, "grad_norm": 2.3566781906040704, "learning_rate": 2.9799109205462254e-07, "loss": 0.9774, "step": 8833 }, { "epoch": 0.9245421245421246, "grad_norm": 2.2258715574619967, "learning_rate": 2.971703136777315e-07, "loss": 0.9839, "step": 8834 }, { "epoch": 0.9246467817896389, "grad_norm": 2.37543036241895, "learning_rate": 2.963506501756419e-07, "loss": 0.8264, "step": 8835 }, { "epoch": 0.9247514390371533, "grad_norm": 2.1881079550525584, "learning_rate": 2.955321016425328e-07, "loss": 0.8516, "step": 8836 }, { "epoch": 0.9248560962846677, "grad_norm": 2.0859708402910884, "learning_rate": 2.947146681724578e-07, "loss": 0.8664, "step": 8837 }, { "epoch": 0.9249607535321821, "grad_norm": 2.1999782584393506, "learning_rate": 2.938983498593428e-07, "loss": 0.8514, "step": 8838 }, { "epoch": 0.9250654107796965, "grad_norm": 2.1661333998629333, "learning_rate": 2.930831467969841e-07, "loss": 0.8986, "step": 8839 }, { "epoch": 0.9251700680272109, "grad_norm": 2.4097416377658254, "learning_rate": 2.922690590790478e-07, "loss": 0.9982, "step": 8840 }, { "epoch": 0.9252747252747253, "grad_norm": 1.7688846225655979, "learning_rate": 2.91456086799079e-07, "loss": 0.7996, "step": 8841 }, { "epoch": 0.9253793825222396, "grad_norm": 2.3499275256087273, "learning_rate": 2.906442300504841e-07, "loss": 0.9031, "step": 8842 }, { "epoch": 0.925484039769754, "grad_norm": 2.0100308955005257, "learning_rate": 2.8983348892655303e-07, "loss": 0.9804, "step": 8843 }, { "epoch": 0.9255886970172684, "grad_norm": 2.6234232462180898, "learning_rate": 2.8902386352043675e-07, "loss": 0.8138, "step": 8844 }, { "epoch": 0.9256933542647828, "grad_norm": 2.3067667180043174, "learning_rate": 2.8821535392516644e-07, "loss": 0.8897, "step": 8845 }, { "epoch": 0.9257980115122972, "grad_norm": 2.275440873262541, "learning_rate": 2.874079602336377e-07, "loss": 0.8794, "step": 8846 }, { "epoch": 0.9259026687598116, "grad_norm": 2.5876415121542107, "learning_rate": 2.866016825386253e-07, "loss": 0.8667, "step": 8847 }, { "epoch": 0.926007326007326, "grad_norm": 1.8078289776282361, "learning_rate": 2.857965209327695e-07, "loss": 0.8499, "step": 8848 }, { "epoch": 0.9261119832548403, "grad_norm": 1.8051645575388477, "learning_rate": 2.8499247550858753e-07, "loss": 0.8481, "step": 8849 }, { "epoch": 0.9262166405023547, "grad_norm": 2.1114047838811127, "learning_rate": 2.841895463584643e-07, "loss": 0.8574, "step": 8850 }, { "epoch": 0.9263212977498692, "grad_norm": 2.221883898704795, "learning_rate": 2.833877335746571e-07, "loss": 0.9177, "step": 8851 }, { "epoch": 0.9264259549973836, "grad_norm": 2.049867364566891, "learning_rate": 2.8258703724929783e-07, "loss": 0.8104, "step": 8852 }, { "epoch": 0.926530612244898, "grad_norm": 2.0227819344438682, "learning_rate": 2.817874574743873e-07, "loss": 0.9001, "step": 8853 }, { "epoch": 0.9266352694924124, "grad_norm": 2.154423868042412, "learning_rate": 2.8098899434179874e-07, "loss": 0.8108, "step": 8854 }, { "epoch": 0.9267399267399268, "grad_norm": 2.2633954827114424, "learning_rate": 2.8019164794327756e-07, "loss": 0.8926, "step": 8855 }, { "epoch": 0.9268445839874412, "grad_norm": 2.122598963752719, "learning_rate": 2.793954183704384e-07, "loss": 0.8493, "step": 8856 }, { "epoch": 0.9269492412349555, "grad_norm": 2.2463946823059646, "learning_rate": 2.7860030571477237e-07, "loss": 0.9276, "step": 8857 }, { "epoch": 0.9270538984824699, "grad_norm": 1.7833875668973516, "learning_rate": 2.778063100676387e-07, "loss": 0.7611, "step": 8858 }, { "epoch": 0.9271585557299843, "grad_norm": 2.1337740367922007, "learning_rate": 2.7701343152026883e-07, "loss": 0.9961, "step": 8859 }, { "epoch": 0.9272632129774987, "grad_norm": 1.6322981589903927, "learning_rate": 2.7622167016376767e-07, "loss": 0.7977, "step": 8860 }, { "epoch": 0.9273678702250131, "grad_norm": 1.9024737473287638, "learning_rate": 2.754310260891058e-07, "loss": 0.8432, "step": 8861 }, { "epoch": 0.9274725274725275, "grad_norm": 1.8617079620794417, "learning_rate": 2.746414993871349e-07, "loss": 0.7266, "step": 8862 }, { "epoch": 0.9275771847200419, "grad_norm": 2.230787630710201, "learning_rate": 2.738530901485714e-07, "loss": 0.8972, "step": 8863 }, { "epoch": 0.9276818419675562, "grad_norm": 1.9515071420546217, "learning_rate": 2.7306579846400396e-07, "loss": 0.7411, "step": 8864 }, { "epoch": 0.9277864992150706, "grad_norm": 2.0010638403952616, "learning_rate": 2.7227962442389453e-07, "loss": 0.7764, "step": 8865 }, { "epoch": 0.927891156462585, "grad_norm": 2.269353148166856, "learning_rate": 2.7149456811857546e-07, "loss": 0.8705, "step": 8866 }, { "epoch": 0.9279958137100994, "grad_norm": 2.1031934409220963, "learning_rate": 2.7071062963825444e-07, "loss": 0.8417, "step": 8867 }, { "epoch": 0.9281004709576138, "grad_norm": 2.015166756657374, "learning_rate": 2.699278090730051e-07, "loss": 0.7953, "step": 8868 }, { "epoch": 0.9282051282051282, "grad_norm": 2.3657122687519143, "learning_rate": 2.6914610651277427e-07, "loss": 0.8704, "step": 8869 }, { "epoch": 0.9283097854526426, "grad_norm": 1.7952034748061156, "learning_rate": 2.683655220473824e-07, "loss": 0.8351, "step": 8870 }, { "epoch": 0.9284144427001569, "grad_norm": 2.261515967889273, "learning_rate": 2.675860557665211e-07, "loss": 0.8985, "step": 8871 }, { "epoch": 0.9285190999476713, "grad_norm": 1.8641043302959848, "learning_rate": 2.6680770775975196e-07, "loss": 0.8101, "step": 8872 }, { "epoch": 0.9286237571951858, "grad_norm": 1.9479323389296166, "learning_rate": 2.6603047811650795e-07, "loss": 0.8856, "step": 8873 }, { "epoch": 0.9287284144427002, "grad_norm": 2.3595217516180487, "learning_rate": 2.652543669260932e-07, "loss": 0.8375, "step": 8874 }, { "epoch": 0.9288330716902146, "grad_norm": 2.2492688188141723, "learning_rate": 2.644793742776874e-07, "loss": 0.7974, "step": 8875 }, { "epoch": 0.928937728937729, "grad_norm": 1.5664487634193003, "learning_rate": 2.637055002603373e-07, "loss": 0.7147, "step": 8876 }, { "epoch": 0.9290423861852434, "grad_norm": 1.6411930077631471, "learning_rate": 2.6293274496296263e-07, "loss": 0.7573, "step": 8877 }, { "epoch": 0.9291470434327577, "grad_norm": 2.2388975244958655, "learning_rate": 2.621611084743558e-07, "loss": 0.8901, "step": 8878 }, { "epoch": 0.9292517006802721, "grad_norm": 1.8373181670762098, "learning_rate": 2.6139059088317707e-07, "loss": 0.7095, "step": 8879 }, { "epoch": 0.9293563579277865, "grad_norm": 2.0228887964257214, "learning_rate": 2.606211922779611e-07, "loss": 0.7826, "step": 8880 }, { "epoch": 0.9294610151753009, "grad_norm": 2.582925357687833, "learning_rate": 2.59852912747115e-07, "loss": 0.9785, "step": 8881 }, { "epoch": 0.9295656724228153, "grad_norm": 2.186974123773659, "learning_rate": 2.5908575237891476e-07, "loss": 0.8608, "step": 8882 }, { "epoch": 0.9296703296703297, "grad_norm": 1.5660897431908913, "learning_rate": 2.5831971126150767e-07, "loss": 0.6913, "step": 8883 }, { "epoch": 0.9297749869178441, "grad_norm": 2.4094802073679755, "learning_rate": 2.575547894829145e-07, "loss": 0.9072, "step": 8884 }, { "epoch": 0.9298796441653584, "grad_norm": 2.2742005617782755, "learning_rate": 2.567909871310259e-07, "loss": 0.9929, "step": 8885 }, { "epoch": 0.9299843014128728, "grad_norm": 2.4008864387506677, "learning_rate": 2.5602830429360516e-07, "loss": 0.8501, "step": 8886 }, { "epoch": 0.9300889586603872, "grad_norm": 1.9539433575898302, "learning_rate": 2.552667410582843e-07, "loss": 0.8582, "step": 8887 }, { "epoch": 0.9301936159079016, "grad_norm": 2.0416965429628635, "learning_rate": 2.5450629751257117e-07, "loss": 0.8709, "step": 8888 }, { "epoch": 0.930298273155416, "grad_norm": 2.506235715702831, "learning_rate": 2.5374697374383806e-07, "loss": 1.0232, "step": 8889 }, { "epoch": 0.9304029304029304, "grad_norm": 1.99806566122585, "learning_rate": 2.529887698393374e-07, "loss": 0.8371, "step": 8890 }, { "epoch": 0.9305075876504448, "grad_norm": 1.9042092264811785, "learning_rate": 2.522316858861862e-07, "loss": 0.855, "step": 8891 }, { "epoch": 0.9306122448979591, "grad_norm": 2.038381656157126, "learning_rate": 2.514757219713737e-07, "loss": 0.8411, "step": 8892 }, { "epoch": 0.9307169021454735, "grad_norm": 2.340813532948446, "learning_rate": 2.507208781817638e-07, "loss": 0.923, "step": 8893 }, { "epoch": 0.9308215593929879, "grad_norm": 2.0448666923378265, "learning_rate": 2.499671546040894e-07, "loss": 0.8978, "step": 8894 }, { "epoch": 0.9309262166405023, "grad_norm": 2.199976902362381, "learning_rate": 2.492145513249533e-07, "loss": 0.961, "step": 8895 }, { "epoch": 0.9310308738880168, "grad_norm": 2.0776211466361274, "learning_rate": 2.4846306843083315e-07, "loss": 0.7628, "step": 8896 }, { "epoch": 0.9311355311355312, "grad_norm": 1.957550937998734, "learning_rate": 2.477127060080753e-07, "loss": 0.8212, "step": 8897 }, { "epoch": 0.9312401883830456, "grad_norm": 1.925782850036767, "learning_rate": 2.469634641428964e-07, "loss": 0.8907, "step": 8898 }, { "epoch": 0.93134484563056, "grad_norm": 2.1162098484306533, "learning_rate": 2.462153429213865e-07, "loss": 0.8915, "step": 8899 }, { "epoch": 0.9314495028780743, "grad_norm": 1.9771946100363844, "learning_rate": 2.454683424295068e-07, "loss": 0.742, "step": 8900 }, { "epoch": 0.9315541601255887, "grad_norm": 1.9810077999238465, "learning_rate": 2.4472246275308976e-07, "loss": 0.9926, "step": 8901 }, { "epoch": 0.9316588173731031, "grad_norm": 1.9014151842209916, "learning_rate": 2.4397770397783794e-07, "loss": 0.8555, "step": 8902 }, { "epoch": 0.9317634746206175, "grad_norm": 1.9053437153678374, "learning_rate": 2.4323406618932623e-07, "loss": 0.8252, "step": 8903 }, { "epoch": 0.9318681318681319, "grad_norm": 2.1866635447476193, "learning_rate": 2.4249154947299734e-07, "loss": 0.8793, "step": 8904 }, { "epoch": 0.9319727891156463, "grad_norm": 2.162217503848748, "learning_rate": 2.4175015391417087e-07, "loss": 0.8766, "step": 8905 }, { "epoch": 0.9320774463631607, "grad_norm": 2.3164940465618886, "learning_rate": 2.410098795980342e-07, "loss": 0.9261, "step": 8906 }, { "epoch": 0.932182103610675, "grad_norm": 1.968520269068906, "learning_rate": 2.40270726609646e-07, "loss": 0.879, "step": 8907 }, { "epoch": 0.9322867608581894, "grad_norm": 2.3833618329222865, "learning_rate": 2.3953269503393496e-07, "loss": 0.9269, "step": 8908 }, { "epoch": 0.9323914181057038, "grad_norm": 2.403516866391237, "learning_rate": 2.387957849557032e-07, "loss": 0.8012, "step": 8909 }, { "epoch": 0.9324960753532182, "grad_norm": 2.0016102230175528, "learning_rate": 2.380599964596264e-07, "loss": 0.8501, "step": 8910 }, { "epoch": 0.9326007326007326, "grad_norm": 2.0012188366238703, "learning_rate": 2.3732532963024468e-07, "loss": 0.9091, "step": 8911 }, { "epoch": 0.932705389848247, "grad_norm": 2.332974869714162, "learning_rate": 2.3659178455197274e-07, "loss": 0.8408, "step": 8912 }, { "epoch": 0.9328100470957614, "grad_norm": 2.2463160819031587, "learning_rate": 2.358593613090987e-07, "loss": 0.8569, "step": 8913 }, { "epoch": 0.9329147043432757, "grad_norm": 1.8053639500260543, "learning_rate": 2.3512805998577638e-07, "loss": 0.7705, "step": 8914 }, { "epoch": 0.9330193615907901, "grad_norm": 2.2853468515179345, "learning_rate": 2.343978806660363e-07, "loss": 0.8963, "step": 8915 }, { "epoch": 0.9331240188383045, "grad_norm": 2.0353226627808616, "learning_rate": 2.3366882343377695e-07, "loss": 0.8253, "step": 8916 }, { "epoch": 0.9332286760858189, "grad_norm": 2.312638823803725, "learning_rate": 2.3294088837276796e-07, "loss": 0.8579, "step": 8917 }, { "epoch": 0.9333333333333333, "grad_norm": 1.9526099594306408, "learning_rate": 2.322140755666491e-07, "loss": 0.812, "step": 8918 }, { "epoch": 0.9334379905808478, "grad_norm": 2.0469902618255724, "learning_rate": 2.3148838509893579e-07, "loss": 0.8683, "step": 8919 }, { "epoch": 0.9335426478283622, "grad_norm": 1.880159180549677, "learning_rate": 2.3076381705301022e-07, "loss": 0.9037, "step": 8920 }, { "epoch": 0.9336473050758765, "grad_norm": 1.5969199873906468, "learning_rate": 2.3004037151212576e-07, "loss": 0.7873, "step": 8921 }, { "epoch": 0.9337519623233909, "grad_norm": 1.9933441261897502, "learning_rate": 2.293180485594093e-07, "loss": 0.9562, "step": 8922 }, { "epoch": 0.9338566195709053, "grad_norm": 1.9912633475021717, "learning_rate": 2.2859684827785444e-07, "loss": 0.8938, "step": 8923 }, { "epoch": 0.9339612768184197, "grad_norm": 1.7915205291719956, "learning_rate": 2.2787677075033266e-07, "loss": 0.797, "step": 8924 }, { "epoch": 0.9340659340659341, "grad_norm": 1.9232604600710044, "learning_rate": 2.2715781605957886e-07, "loss": 0.8286, "step": 8925 }, { "epoch": 0.9341705913134485, "grad_norm": 2.106021331701821, "learning_rate": 2.2643998428820368e-07, "loss": 0.7703, "step": 8926 }, { "epoch": 0.9342752485609629, "grad_norm": 1.880171483367633, "learning_rate": 2.2572327551868778e-07, "loss": 0.8119, "step": 8927 }, { "epoch": 0.9343799058084772, "grad_norm": 2.318935123745428, "learning_rate": 2.2500768983338085e-07, "loss": 0.9226, "step": 8928 }, { "epoch": 0.9344845630559916, "grad_norm": 1.6832835230122096, "learning_rate": 2.2429322731450708e-07, "loss": 0.7947, "step": 8929 }, { "epoch": 0.934589220303506, "grad_norm": 2.1120628902234615, "learning_rate": 2.2357988804415976e-07, "loss": 0.9599, "step": 8930 }, { "epoch": 0.9346938775510204, "grad_norm": 2.0233896081392504, "learning_rate": 2.2286767210430105e-07, "loss": 0.8827, "step": 8931 }, { "epoch": 0.9347985347985348, "grad_norm": 2.040817721869765, "learning_rate": 2.2215657957676774e-07, "loss": 0.9144, "step": 8932 }, { "epoch": 0.9349031920460492, "grad_norm": 1.922624595035762, "learning_rate": 2.2144661054326443e-07, "loss": 0.8159, "step": 8933 }, { "epoch": 0.9350078492935636, "grad_norm": 1.6714847110688713, "learning_rate": 2.2073776508537038e-07, "loss": 0.7365, "step": 8934 }, { "epoch": 0.9351125065410779, "grad_norm": 2.1817225705896393, "learning_rate": 2.2003004328452926e-07, "loss": 0.7756, "step": 8935 }, { "epoch": 0.9352171637885923, "grad_norm": 2.173883426438636, "learning_rate": 2.1932344522206494e-07, "loss": 0.8909, "step": 8936 }, { "epoch": 0.9353218210361067, "grad_norm": 2.029134530081249, "learning_rate": 2.186179709791625e-07, "loss": 0.9164, "step": 8937 }, { "epoch": 0.9354264782836211, "grad_norm": 2.020431951662609, "learning_rate": 2.1791362063688481e-07, "loss": 0.8419, "step": 8938 }, { "epoch": 0.9355311355311355, "grad_norm": 2.1073518757776664, "learning_rate": 2.1721039427616164e-07, "loss": 0.8759, "step": 8939 }, { "epoch": 0.9356357927786499, "grad_norm": 1.976379251020562, "learning_rate": 2.1650829197779722e-07, "loss": 0.9092, "step": 8940 }, { "epoch": 0.9357404500261643, "grad_norm": 2.2148390374272737, "learning_rate": 2.1580731382246255e-07, "loss": 0.8463, "step": 8941 }, { "epoch": 0.9358451072736788, "grad_norm": 2.3658272163282734, "learning_rate": 2.1510745989070215e-07, "loss": 0.8825, "step": 8942 }, { "epoch": 0.935949764521193, "grad_norm": 2.1121430732937596, "learning_rate": 2.144087302629305e-07, "loss": 0.7826, "step": 8943 }, { "epoch": 0.9360544217687075, "grad_norm": 1.725035272587334, "learning_rate": 2.1371112501943348e-07, "loss": 0.7282, "step": 8944 }, { "epoch": 0.9361590790162219, "grad_norm": 1.8745605871155686, "learning_rate": 2.130146442403669e-07, "loss": 0.864, "step": 8945 }, { "epoch": 0.9362637362637363, "grad_norm": 2.4987700519915994, "learning_rate": 2.1231928800575897e-07, "loss": 0.9905, "step": 8946 }, { "epoch": 0.9363683935112507, "grad_norm": 1.7532505918821066, "learning_rate": 2.1162505639550468e-07, "loss": 0.7843, "step": 8947 }, { "epoch": 0.9364730507587651, "grad_norm": 2.1811207939897783, "learning_rate": 2.109319494893758e-07, "loss": 0.9079, "step": 8948 }, { "epoch": 0.9365777080062795, "grad_norm": 2.0571665531615944, "learning_rate": 2.1023996736700968e-07, "loss": 0.7592, "step": 8949 }, { "epoch": 0.9366823652537938, "grad_norm": 1.9908145189339748, "learning_rate": 2.0954911010791834e-07, "loss": 0.8919, "step": 8950 }, { "epoch": 0.9367870225013082, "grad_norm": 1.879635801498437, "learning_rate": 2.0885937779148158e-07, "loss": 0.756, "step": 8951 }, { "epoch": 0.9368916797488226, "grad_norm": 2.0278954913132843, "learning_rate": 2.0817077049694934e-07, "loss": 0.8411, "step": 8952 }, { "epoch": 0.936996336996337, "grad_norm": 2.1053845951642396, "learning_rate": 2.074832883034461e-07, "loss": 0.8745, "step": 8953 }, { "epoch": 0.9371009942438514, "grad_norm": 1.9499721033329678, "learning_rate": 2.0679693128996535e-07, "loss": 0.8599, "step": 8954 }, { "epoch": 0.9372056514913658, "grad_norm": 2.2937105767755996, "learning_rate": 2.061116995353707e-07, "loss": 0.9106, "step": 8955 }, { "epoch": 0.9373103087388802, "grad_norm": 2.0231281748039085, "learning_rate": 2.0542759311839468e-07, "loss": 0.8773, "step": 8956 }, { "epoch": 0.9374149659863945, "grad_norm": 2.0532732484680594, "learning_rate": 2.047446121176444e-07, "loss": 0.939, "step": 8957 }, { "epoch": 0.9375196232339089, "grad_norm": 1.8426241069013005, "learning_rate": 2.040627566115949e-07, "loss": 0.8707, "step": 8958 }, { "epoch": 0.9376242804814233, "grad_norm": 1.9657462776989114, "learning_rate": 2.0338202667859462e-07, "loss": 0.9519, "step": 8959 }, { "epoch": 0.9377289377289377, "grad_norm": 2.2777404864137556, "learning_rate": 2.0270242239685768e-07, "loss": 0.8969, "step": 8960 }, { "epoch": 0.9378335949764521, "grad_norm": 2.080680228339346, "learning_rate": 2.0202394384447376e-07, "loss": 0.9103, "step": 8961 }, { "epoch": 0.9379382522239665, "grad_norm": 2.301306982738778, "learning_rate": 2.0134659109940057e-07, "loss": 0.9517, "step": 8962 }, { "epoch": 0.938042909471481, "grad_norm": 2.1571228068534767, "learning_rate": 2.0067036423946916e-07, "loss": 0.9849, "step": 8963 }, { "epoch": 0.9381475667189952, "grad_norm": 1.9252681696412213, "learning_rate": 1.999952633423785e-07, "loss": 0.7812, "step": 8964 }, { "epoch": 0.9382522239665096, "grad_norm": 1.9760309458462875, "learning_rate": 1.9932128848569875e-07, "loss": 0.8616, "step": 8965 }, { "epoch": 0.938356881214024, "grad_norm": 2.4624548411289324, "learning_rate": 1.9864843974687133e-07, "loss": 0.9182, "step": 8966 }, { "epoch": 0.9384615384615385, "grad_norm": 2.0682795924841653, "learning_rate": 1.9797671720320543e-07, "loss": 0.9754, "step": 8967 }, { "epoch": 0.9385661957090529, "grad_norm": 2.014284349621675, "learning_rate": 1.9730612093188716e-07, "loss": 0.8685, "step": 8968 }, { "epoch": 0.9386708529565673, "grad_norm": 2.4120062967864513, "learning_rate": 1.966366510099682e-07, "loss": 0.9244, "step": 8969 }, { "epoch": 0.9387755102040817, "grad_norm": 2.3280174180689035, "learning_rate": 1.9596830751437034e-07, "loss": 0.9299, "step": 8970 }, { "epoch": 0.938880167451596, "grad_norm": 2.074408047932079, "learning_rate": 1.9530109052188883e-07, "loss": 0.9401, "step": 8971 }, { "epoch": 0.9389848246991104, "grad_norm": 1.7420567087126304, "learning_rate": 1.946350001091879e-07, "loss": 0.669, "step": 8972 }, { "epoch": 0.9390894819466248, "grad_norm": 2.2410970937411743, "learning_rate": 1.9397003635280408e-07, "loss": 0.8553, "step": 8973 }, { "epoch": 0.9391941391941392, "grad_norm": 1.9769963769064203, "learning_rate": 1.9330619932914184e-07, "loss": 0.7936, "step": 8974 }, { "epoch": 0.9392987964416536, "grad_norm": 2.37273596450195, "learning_rate": 1.9264348911447795e-07, "loss": 0.7941, "step": 8975 }, { "epoch": 0.939403453689168, "grad_norm": 2.0813158115777854, "learning_rate": 1.91981905784957e-07, "loss": 0.9177, "step": 8976 }, { "epoch": 0.9395081109366824, "grad_norm": 2.290897372588027, "learning_rate": 1.9132144941659935e-07, "loss": 0.8598, "step": 8977 }, { "epoch": 0.9396127681841967, "grad_norm": 2.2230141215809534, "learning_rate": 1.9066212008529095e-07, "loss": 0.9614, "step": 8978 }, { "epoch": 0.9397174254317111, "grad_norm": 1.9411269671666274, "learning_rate": 1.9000391786679118e-07, "loss": 0.8767, "step": 8979 }, { "epoch": 0.9398220826792255, "grad_norm": 1.9684623940624517, "learning_rate": 1.8934684283672844e-07, "loss": 0.7992, "step": 8980 }, { "epoch": 0.9399267399267399, "grad_norm": 2.2097045916457687, "learning_rate": 1.886908950706001e-07, "loss": 0.7911, "step": 8981 }, { "epoch": 0.9400313971742543, "grad_norm": 2.2051242891982823, "learning_rate": 1.8803607464377816e-07, "loss": 0.8268, "step": 8982 }, { "epoch": 0.9401360544217687, "grad_norm": 2.0731015805325947, "learning_rate": 1.8738238163150346e-07, "loss": 0.9092, "step": 8983 }, { "epoch": 0.9402407116692831, "grad_norm": 2.0982860002338235, "learning_rate": 1.8672981610888484e-07, "loss": 0.9503, "step": 8984 }, { "epoch": 0.9403453689167975, "grad_norm": 2.0377487369557055, "learning_rate": 1.8607837815090345e-07, "loss": 0.9304, "step": 8985 }, { "epoch": 0.9404500261643118, "grad_norm": 2.2351989370813543, "learning_rate": 1.8542806783241053e-07, "loss": 0.8992, "step": 8986 }, { "epoch": 0.9405546834118262, "grad_norm": 2.0623835431044477, "learning_rate": 1.8477888522812847e-07, "loss": 0.9759, "step": 8987 }, { "epoch": 0.9406593406593406, "grad_norm": 2.2769860510032776, "learning_rate": 1.8413083041265213e-07, "loss": 0.8573, "step": 8988 }, { "epoch": 0.940763997906855, "grad_norm": 1.9523228068902658, "learning_rate": 1.8348390346044187e-07, "loss": 0.7817, "step": 8989 }, { "epoch": 0.9408686551543695, "grad_norm": 2.18666471676111, "learning_rate": 1.828381044458305e-07, "loss": 0.9009, "step": 8990 }, { "epoch": 0.9409733124018839, "grad_norm": 1.9241205019311667, "learning_rate": 1.8219343344302198e-07, "loss": 0.8631, "step": 8991 }, { "epoch": 0.9410779696493983, "grad_norm": 2.04039821239051, "learning_rate": 1.815498905260915e-07, "loss": 0.8371, "step": 8992 }, { "epoch": 0.9411826268969126, "grad_norm": 2.3419236200201516, "learning_rate": 1.8090747576898217e-07, "loss": 0.8494, "step": 8993 }, { "epoch": 0.941287284144427, "grad_norm": 2.1610439047997145, "learning_rate": 1.8026618924551041e-07, "loss": 0.8086, "step": 8994 }, { "epoch": 0.9413919413919414, "grad_norm": 1.9220224743991272, "learning_rate": 1.7962603102935848e-07, "loss": 0.8518, "step": 8995 }, { "epoch": 0.9414965986394558, "grad_norm": 2.0058482503008763, "learning_rate": 1.789870011940842e-07, "loss": 0.7359, "step": 8996 }, { "epoch": 0.9416012558869702, "grad_norm": 2.1650612466125145, "learning_rate": 1.7834909981311321e-07, "loss": 0.9187, "step": 8997 }, { "epoch": 0.9417059131344846, "grad_norm": 2.1222322501737008, "learning_rate": 1.7771232695974028e-07, "loss": 0.9376, "step": 8998 }, { "epoch": 0.941810570381999, "grad_norm": 1.8756795331701073, "learning_rate": 1.7707668270713463e-07, "loss": 0.8314, "step": 8999 }, { "epoch": 0.9419152276295133, "grad_norm": 2.0250266930245373, "learning_rate": 1.7644216712832897e-07, "loss": 0.8112, "step": 9000 }, { "epoch": 0.9420198848770277, "grad_norm": 1.8965489062179972, "learning_rate": 1.7580878029623382e-07, "loss": 0.8542, "step": 9001 }, { "epoch": 0.9421245421245421, "grad_norm": 2.0182139408122657, "learning_rate": 1.751765222836266e-07, "loss": 0.9074, "step": 9002 }, { "epoch": 0.9422291993720565, "grad_norm": 1.915756504936258, "learning_rate": 1.7454539316315356e-07, "loss": 0.9071, "step": 9003 }, { "epoch": 0.9423338566195709, "grad_norm": 1.8890927536983408, "learning_rate": 1.7391539300733118e-07, "loss": 0.7825, "step": 9004 }, { "epoch": 0.9424385138670853, "grad_norm": 2.0792217263743016, "learning_rate": 1.7328652188855044e-07, "loss": 0.8445, "step": 9005 }, { "epoch": 0.9425431711145997, "grad_norm": 2.2281920177680767, "learning_rate": 1.726587798790702e-07, "loss": 0.7547, "step": 9006 }, { "epoch": 0.942647828362114, "grad_norm": 2.3393383954988853, "learning_rate": 1.7203216705101834e-07, "loss": 0.8758, "step": 9007 }, { "epoch": 0.9427524856096284, "grad_norm": 2.025231383373048, "learning_rate": 1.714066834763939e-07, "loss": 0.8952, "step": 9008 }, { "epoch": 0.9428571428571428, "grad_norm": 1.9231761232116151, "learning_rate": 1.7078232922706495e-07, "loss": 0.8794, "step": 9009 }, { "epoch": 0.9429618001046572, "grad_norm": 2.55540070865077, "learning_rate": 1.70159104374773e-07, "loss": 0.8213, "step": 9010 }, { "epoch": 0.9430664573521716, "grad_norm": 2.2864462638812113, "learning_rate": 1.6953700899112746e-07, "loss": 0.9661, "step": 9011 }, { "epoch": 0.9431711145996861, "grad_norm": 2.022644471342003, "learning_rate": 1.689160431476078e-07, "loss": 0.7653, "step": 9012 }, { "epoch": 0.9432757718472005, "grad_norm": 1.9668619711190107, "learning_rate": 1.682962069155636e-07, "loss": 0.8767, "step": 9013 }, { "epoch": 0.9433804290947148, "grad_norm": 2.125692202668173, "learning_rate": 1.6767750036621677e-07, "loss": 0.8827, "step": 9014 }, { "epoch": 0.9434850863422292, "grad_norm": 1.9970347338306567, "learning_rate": 1.6705992357065713e-07, "loss": 0.8181, "step": 9015 }, { "epoch": 0.9435897435897436, "grad_norm": 2.1417706192896504, "learning_rate": 1.6644347659984571e-07, "loss": 0.8472, "step": 9016 }, { "epoch": 0.943694400837258, "grad_norm": 2.4107829527631517, "learning_rate": 1.6582815952461474e-07, "loss": 0.9227, "step": 9017 }, { "epoch": 0.9437990580847724, "grad_norm": 2.272587316891678, "learning_rate": 1.6521397241566317e-07, "loss": 0.7621, "step": 9018 }, { "epoch": 0.9439037153322868, "grad_norm": 1.9616247365575001, "learning_rate": 1.6460091534356238e-07, "loss": 0.8642, "step": 9019 }, { "epoch": 0.9440083725798012, "grad_norm": 2.213024695754038, "learning_rate": 1.6398898837875598e-07, "loss": 0.8659, "step": 9020 }, { "epoch": 0.9441130298273155, "grad_norm": 2.18849504081884, "learning_rate": 1.6337819159155556e-07, "loss": 0.8914, "step": 9021 }, { "epoch": 0.9442176870748299, "grad_norm": 1.9962396515971108, "learning_rate": 1.6276852505213937e-07, "loss": 0.8487, "step": 9022 }, { "epoch": 0.9443223443223443, "grad_norm": 1.60529112595556, "learning_rate": 1.621599888305636e-07, "loss": 0.8027, "step": 9023 }, { "epoch": 0.9444270015698587, "grad_norm": 2.3647898369898535, "learning_rate": 1.615525829967479e-07, "loss": 0.8755, "step": 9024 }, { "epoch": 0.9445316588173731, "grad_norm": 2.199820408495756, "learning_rate": 1.6094630762048535e-07, "loss": 0.9419, "step": 9025 }, { "epoch": 0.9446363160648875, "grad_norm": 2.1623987139154295, "learning_rate": 1.6034116277143795e-07, "loss": 0.9243, "step": 9026 }, { "epoch": 0.9447409733124019, "grad_norm": 2.223255861105165, "learning_rate": 1.5973714851913792e-07, "loss": 0.9347, "step": 9027 }, { "epoch": 0.9448456305599163, "grad_norm": 2.227214544210135, "learning_rate": 1.5913426493298745e-07, "loss": 0.8761, "step": 9028 }, { "epoch": 0.9449502878074306, "grad_norm": 2.0771281382507465, "learning_rate": 1.5853251208225895e-07, "loss": 0.8977, "step": 9029 }, { "epoch": 0.945054945054945, "grad_norm": 2.1250598064265716, "learning_rate": 1.57931890036096e-07, "loss": 0.8215, "step": 9030 }, { "epoch": 0.9451596023024594, "grad_norm": 1.906429846009798, "learning_rate": 1.5733239886351114e-07, "loss": 0.8799, "step": 9031 }, { "epoch": 0.9452642595499738, "grad_norm": 2.0717540052678385, "learning_rate": 1.567340386333871e-07, "loss": 0.8241, "step": 9032 }, { "epoch": 0.9453689167974882, "grad_norm": 2.2358749882717532, "learning_rate": 1.5613680941447663e-07, "loss": 0.878, "step": 9033 }, { "epoch": 0.9454735740450027, "grad_norm": 2.3438037978057498, "learning_rate": 1.555407112754015e-07, "loss": 0.9401, "step": 9034 }, { "epoch": 0.9455782312925171, "grad_norm": 1.8342854640981783, "learning_rate": 1.549457442846558e-07, "loss": 0.9056, "step": 9035 }, { "epoch": 0.9456828885400314, "grad_norm": 1.9116944068383945, "learning_rate": 1.5435190851060156e-07, "loss": 0.7609, "step": 9036 }, { "epoch": 0.9457875457875458, "grad_norm": 1.7292150071570478, "learning_rate": 1.5375920402147305e-07, "loss": 0.7398, "step": 9037 }, { "epoch": 0.9458922030350602, "grad_norm": 2.181589596999383, "learning_rate": 1.5316763088537135e-07, "loss": 0.8476, "step": 9038 }, { "epoch": 0.9459968602825746, "grad_norm": 2.1307840848771447, "learning_rate": 1.52577189170271e-07, "loss": 0.8192, "step": 9039 }, { "epoch": 0.946101517530089, "grad_norm": 2.360122477075766, "learning_rate": 1.5198787894401433e-07, "loss": 0.9385, "step": 9040 }, { "epoch": 0.9462061747776034, "grad_norm": 2.456600162959172, "learning_rate": 1.5139970027431505e-07, "loss": 0.87, "step": 9041 }, { "epoch": 0.9463108320251178, "grad_norm": 1.9169412412415379, "learning_rate": 1.508126532287557e-07, "loss": 0.8699, "step": 9042 }, { "epoch": 0.9464154892726321, "grad_norm": 2.2877674182580297, "learning_rate": 1.5022673787478793e-07, "loss": 0.8462, "step": 9043 }, { "epoch": 0.9465201465201465, "grad_norm": 2.039640660135877, "learning_rate": 1.496419542797356e-07, "loss": 0.7836, "step": 9044 }, { "epoch": 0.9466248037676609, "grad_norm": 1.9739212006187399, "learning_rate": 1.4905830251079167e-07, "loss": 0.9003, "step": 9045 }, { "epoch": 0.9467294610151753, "grad_norm": 1.9770702491808176, "learning_rate": 1.4847578263501916e-07, "loss": 0.8087, "step": 9046 }, { "epoch": 0.9468341182626897, "grad_norm": 2.25033314641664, "learning_rate": 1.4789439471935008e-07, "loss": 0.9281, "step": 9047 }, { "epoch": 0.9469387755102041, "grad_norm": 2.2873109923661126, "learning_rate": 1.4731413883058655e-07, "loss": 0.9335, "step": 9048 }, { "epoch": 0.9470434327577185, "grad_norm": 2.151440014464125, "learning_rate": 1.4673501503540187e-07, "loss": 0.8403, "step": 9049 }, { "epoch": 0.9471480900052328, "grad_norm": 1.8386378234588971, "learning_rate": 1.4615702340033954e-07, "loss": 0.8304, "step": 9050 }, { "epoch": 0.9472527472527472, "grad_norm": 2.11039769161247, "learning_rate": 1.4558016399181086e-07, "loss": 0.8667, "step": 9051 }, { "epoch": 0.9473574045002616, "grad_norm": 2.2668021084417527, "learning_rate": 1.4500443687609945e-07, "loss": 0.9028, "step": 9052 }, { "epoch": 0.947462061747776, "grad_norm": 2.5925480089352844, "learning_rate": 1.4442984211935463e-07, "loss": 0.9285, "step": 9053 }, { "epoch": 0.9475667189952904, "grad_norm": 1.9257446949630599, "learning_rate": 1.4385637978760026e-07, "loss": 0.8561, "step": 9054 }, { "epoch": 0.9476713762428048, "grad_norm": 2.2283800756599264, "learning_rate": 1.4328404994672917e-07, "loss": 0.8976, "step": 9055 }, { "epoch": 0.9477760334903192, "grad_norm": 1.8811361955772714, "learning_rate": 1.4271285266250323e-07, "loss": 0.8489, "step": 9056 }, { "epoch": 0.9478806907378335, "grad_norm": 2.2540466745301493, "learning_rate": 1.4214278800055102e-07, "loss": 0.7986, "step": 9057 }, { "epoch": 0.947985347985348, "grad_norm": 1.7605020970876353, "learning_rate": 1.4157385602637685e-07, "loss": 0.7758, "step": 9058 }, { "epoch": 0.9480900052328624, "grad_norm": 2.0483519247325197, "learning_rate": 1.4100605680535284e-07, "loss": 0.8848, "step": 9059 }, { "epoch": 0.9481946624803768, "grad_norm": 1.814643560597211, "learning_rate": 1.4043939040272014e-07, "loss": 0.8009, "step": 9060 }, { "epoch": 0.9482993197278912, "grad_norm": 2.1844789581596333, "learning_rate": 1.398738568835878e-07, "loss": 0.848, "step": 9061 }, { "epoch": 0.9484039769754056, "grad_norm": 2.08957785838643, "learning_rate": 1.3930945631293712e-07, "loss": 0.9124, "step": 9062 }, { "epoch": 0.94850863422292, "grad_norm": 2.1549858224284826, "learning_rate": 1.3874618875561963e-07, "loss": 0.9237, "step": 9063 }, { "epoch": 0.9486132914704343, "grad_norm": 2.029197205102344, "learning_rate": 1.3818405427635683e-07, "loss": 0.8428, "step": 9064 }, { "epoch": 0.9487179487179487, "grad_norm": 2.162465083314952, "learning_rate": 1.376230529397371e-07, "loss": 0.953, "step": 9065 }, { "epoch": 0.9488226059654631, "grad_norm": 2.2719870379626967, "learning_rate": 1.370631848102233e-07, "loss": 0.9192, "step": 9066 }, { "epoch": 0.9489272632129775, "grad_norm": 2.3224845795701117, "learning_rate": 1.3650444995214286e-07, "loss": 0.8813, "step": 9067 }, { "epoch": 0.9490319204604919, "grad_norm": 2.1462838164243014, "learning_rate": 1.3594684842969664e-07, "loss": 0.8676, "step": 9068 }, { "epoch": 0.9491365777080063, "grad_norm": 1.815945744671672, "learning_rate": 1.3539038030695338e-07, "loss": 0.8986, "step": 9069 }, { "epoch": 0.9492412349555207, "grad_norm": 1.906488906851326, "learning_rate": 1.3483504564785422e-07, "loss": 0.8174, "step": 9070 }, { "epoch": 0.9493458922030351, "grad_norm": 2.236795411684707, "learning_rate": 1.3428084451620694e-07, "loss": 0.8814, "step": 9071 }, { "epoch": 0.9494505494505494, "grad_norm": 2.0557267418605902, "learning_rate": 1.337277769756895e-07, "loss": 0.8568, "step": 9072 }, { "epoch": 0.9495552066980638, "grad_norm": 2.6957377041769504, "learning_rate": 1.331758430898522e-07, "loss": 0.9719, "step": 9073 }, { "epoch": 0.9496598639455782, "grad_norm": 1.8970879310242872, "learning_rate": 1.326250429221121e-07, "loss": 0.8987, "step": 9074 }, { "epoch": 0.9497645211930926, "grad_norm": 1.934705112665449, "learning_rate": 1.3207537653575851e-07, "loss": 0.8891, "step": 9075 }, { "epoch": 0.949869178440607, "grad_norm": 2.2063646405916124, "learning_rate": 1.3152684399394877e-07, "loss": 0.8031, "step": 9076 }, { "epoch": 0.9499738356881214, "grad_norm": 1.979196270712241, "learning_rate": 1.3097944535970908e-07, "loss": 0.8487, "step": 9077 }, { "epoch": 0.9500784929356358, "grad_norm": 2.3747448704287173, "learning_rate": 1.3043318069593914e-07, "loss": 0.9484, "step": 9078 }, { "epoch": 0.9501831501831501, "grad_norm": 1.9720183796039061, "learning_rate": 1.2988805006540317e-07, "loss": 0.7593, "step": 9079 }, { "epoch": 0.9502878074306645, "grad_norm": 1.66623523158947, "learning_rate": 1.2934405353073997e-07, "loss": 0.7237, "step": 9080 }, { "epoch": 0.950392464678179, "grad_norm": 2.010121946011193, "learning_rate": 1.288011911544551e-07, "loss": 0.781, "step": 9081 }, { "epoch": 0.9504971219256934, "grad_norm": 2.338519331926691, "learning_rate": 1.282594629989231e-07, "loss": 0.8435, "step": 9082 }, { "epoch": 0.9506017791732078, "grad_norm": 2.1718237588653144, "learning_rate": 1.2771886912639088e-07, "loss": 0.8509, "step": 9083 }, { "epoch": 0.9507064364207222, "grad_norm": 2.2698280951956, "learning_rate": 1.2717940959897424e-07, "loss": 0.857, "step": 9084 }, { "epoch": 0.9508110936682366, "grad_norm": 2.1107172447457065, "learning_rate": 1.2664108447865918e-07, "loss": 0.7931, "step": 9085 }, { "epoch": 0.9509157509157509, "grad_norm": 1.9797524667575515, "learning_rate": 1.2610389382729738e-07, "loss": 0.7899, "step": 9086 }, { "epoch": 0.9510204081632653, "grad_norm": 2.057673027423047, "learning_rate": 1.2556783770661497e-07, "loss": 0.9024, "step": 9087 }, { "epoch": 0.9511250654107797, "grad_norm": 2.1671668650036535, "learning_rate": 1.250329161782049e-07, "loss": 0.8494, "step": 9088 }, { "epoch": 0.9512297226582941, "grad_norm": 2.0877723433934086, "learning_rate": 1.2449912930353248e-07, "loss": 0.885, "step": 9089 }, { "epoch": 0.9513343799058085, "grad_norm": 2.177290420704625, "learning_rate": 1.2396647714392974e-07, "loss": 0.8511, "step": 9090 }, { "epoch": 0.9514390371533229, "grad_norm": 2.051463049714876, "learning_rate": 1.2343495976059882e-07, "loss": 0.9175, "step": 9091 }, { "epoch": 0.9515436944008373, "grad_norm": 1.9910691736016368, "learning_rate": 1.2290457721461314e-07, "loss": 0.8602, "step": 9092 }, { "epoch": 0.9516483516483516, "grad_norm": 1.9342141459380122, "learning_rate": 1.22375329566915e-07, "loss": 0.8365, "step": 9093 }, { "epoch": 0.951753008895866, "grad_norm": 1.800982384325224, "learning_rate": 1.2184721687831468e-07, "loss": 0.847, "step": 9094 }, { "epoch": 0.9518576661433804, "grad_norm": 2.176163511782998, "learning_rate": 1.2132023920949586e-07, "loss": 0.8672, "step": 9095 }, { "epoch": 0.9519623233908948, "grad_norm": 2.0098442785162343, "learning_rate": 1.2079439662100567e-07, "loss": 0.7643, "step": 9096 }, { "epoch": 0.9520669806384092, "grad_norm": 2.1686254394715867, "learning_rate": 1.20269689173268e-07, "loss": 0.8923, "step": 9097 }, { "epoch": 0.9521716378859236, "grad_norm": 2.4479350828010182, "learning_rate": 1.1974611692657124e-07, "loss": 0.9458, "step": 9098 }, { "epoch": 0.952276295133438, "grad_norm": 1.7758983963507677, "learning_rate": 1.1922367994107397e-07, "loss": 0.798, "step": 9099 }, { "epoch": 0.9523809523809523, "grad_norm": 1.9312505481701443, "learning_rate": 1.1870237827680708e-07, "loss": 0.8766, "step": 9100 }, { "epoch": 0.9524856096284667, "grad_norm": 2.1489020046415384, "learning_rate": 1.181822119936682e-07, "loss": 0.8693, "step": 9101 }, { "epoch": 0.9525902668759811, "grad_norm": 1.9604487123502172, "learning_rate": 1.1766318115142505e-07, "loss": 0.9279, "step": 9102 }, { "epoch": 0.9526949241234955, "grad_norm": 1.979028252970426, "learning_rate": 1.1714528580971774e-07, "loss": 0.9074, "step": 9103 }, { "epoch": 0.95279958137101, "grad_norm": 2.0147250746415244, "learning_rate": 1.1662852602805087e-07, "loss": 0.9098, "step": 9104 }, { "epoch": 0.9529042386185244, "grad_norm": 1.941672354841768, "learning_rate": 1.1611290186580137e-07, "loss": 0.8705, "step": 9105 }, { "epoch": 0.9530088958660388, "grad_norm": 2.081202528742428, "learning_rate": 1.1559841338221633e-07, "loss": 0.8618, "step": 9106 }, { "epoch": 0.9531135531135531, "grad_norm": 1.900573593944078, "learning_rate": 1.1508506063641178e-07, "loss": 0.7274, "step": 9107 }, { "epoch": 0.9532182103610675, "grad_norm": 2.2879689467033897, "learning_rate": 1.1457284368737276e-07, "loss": 0.8774, "step": 9108 }, { "epoch": 0.9533228676085819, "grad_norm": 2.0021870264860553, "learning_rate": 1.140617625939533e-07, "loss": 0.8242, "step": 9109 }, { "epoch": 0.9534275248560963, "grad_norm": 2.189380584746929, "learning_rate": 1.1355181741487864e-07, "loss": 0.9973, "step": 9110 }, { "epoch": 0.9535321821036107, "grad_norm": 2.046145341571205, "learning_rate": 1.1304300820874192e-07, "loss": 0.9143, "step": 9111 }, { "epoch": 0.9536368393511251, "grad_norm": 1.9336419044364952, "learning_rate": 1.1253533503400638e-07, "loss": 0.8793, "step": 9112 }, { "epoch": 0.9537414965986395, "grad_norm": 2.1511547350474616, "learning_rate": 1.1202879794900535e-07, "loss": 0.8096, "step": 9113 }, { "epoch": 0.9538461538461539, "grad_norm": 1.9734945556556622, "learning_rate": 1.1152339701194004e-07, "loss": 0.808, "step": 9114 }, { "epoch": 0.9539508110936682, "grad_norm": 2.0081235379107203, "learning_rate": 1.1101913228088179e-07, "loss": 0.8715, "step": 9115 }, { "epoch": 0.9540554683411826, "grad_norm": 2.117755926561859, "learning_rate": 1.1051600381377315e-07, "loss": 0.8589, "step": 9116 }, { "epoch": 0.954160125588697, "grad_norm": 2.0526210136326695, "learning_rate": 1.100140116684234e-07, "loss": 0.8793, "step": 9117 }, { "epoch": 0.9542647828362114, "grad_norm": 2.112405339433251, "learning_rate": 1.095131559025131e-07, "loss": 0.8667, "step": 9118 }, { "epoch": 0.9543694400837258, "grad_norm": 2.533576036279591, "learning_rate": 1.0901343657359175e-07, "loss": 0.7516, "step": 9119 }, { "epoch": 0.9544740973312402, "grad_norm": 2.154561014085007, "learning_rate": 1.0851485373907677e-07, "loss": 0.9504, "step": 9120 }, { "epoch": 0.9545787545787546, "grad_norm": 2.4408447371630464, "learning_rate": 1.0801740745625677e-07, "loss": 0.9668, "step": 9121 }, { "epoch": 0.9546834118262689, "grad_norm": 1.9109247782495005, "learning_rate": 1.0752109778229158e-07, "loss": 0.8883, "step": 9122 }, { "epoch": 0.9547880690737833, "grad_norm": 2.013558136876636, "learning_rate": 1.070259247742056e-07, "loss": 0.885, "step": 9123 }, { "epoch": 0.9548927263212977, "grad_norm": 2.136904562312622, "learning_rate": 1.0653188848889551e-07, "loss": 0.8507, "step": 9124 }, { "epoch": 0.9549973835688121, "grad_norm": 1.978016541353706, "learning_rate": 1.0603898898312704e-07, "loss": 0.8988, "step": 9125 }, { "epoch": 0.9551020408163265, "grad_norm": 2.292835501413161, "learning_rate": 1.0554722631353709e-07, "loss": 0.9214, "step": 9126 }, { "epoch": 0.955206698063841, "grad_norm": 1.9578723859363283, "learning_rate": 1.0505660053662825e-07, "loss": 0.807, "step": 9127 }, { "epoch": 0.9553113553113554, "grad_norm": 2.074313232154308, "learning_rate": 1.0456711170877543e-07, "loss": 0.865, "step": 9128 }, { "epoch": 0.9554160125588697, "grad_norm": 2.4188323428193166, "learning_rate": 1.0407875988622252e-07, "loss": 0.8573, "step": 9129 }, { "epoch": 0.9555206698063841, "grad_norm": 1.9965673098227608, "learning_rate": 1.0359154512508019e-07, "loss": 0.8046, "step": 9130 }, { "epoch": 0.9556253270538985, "grad_norm": 2.424445738477294, "learning_rate": 1.0310546748133143e-07, "loss": 1.0085, "step": 9131 }, { "epoch": 0.9557299843014129, "grad_norm": 1.9689504274155718, "learning_rate": 1.0262052701082825e-07, "loss": 0.8351, "step": 9132 }, { "epoch": 0.9558346415489273, "grad_norm": 2.108713019030409, "learning_rate": 1.021367237692894e-07, "loss": 0.9932, "step": 9133 }, { "epoch": 0.9559392987964417, "grad_norm": 1.7814011137810855, "learning_rate": 1.0165405781230708e-07, "loss": 0.8543, "step": 9134 }, { "epoch": 0.9560439560439561, "grad_norm": 2.458888218864724, "learning_rate": 1.0117252919533804e-07, "loss": 0.9106, "step": 9135 }, { "epoch": 0.9561486132914704, "grad_norm": 2.454880605782708, "learning_rate": 1.0069213797371357e-07, "loss": 0.8216, "step": 9136 }, { "epoch": 0.9562532705389848, "grad_norm": 2.3292738121246934, "learning_rate": 1.0021288420263065e-07, "loss": 0.8631, "step": 9137 }, { "epoch": 0.9563579277864992, "grad_norm": 1.9940769879892457, "learning_rate": 9.97347679371563e-08, "loss": 0.9346, "step": 9138 }, { "epoch": 0.9564625850340136, "grad_norm": 2.168224739156848, "learning_rate": 9.925778923222773e-08, "loss": 0.9215, "step": 9139 }, { "epoch": 0.956567242281528, "grad_norm": 2.0395622501364605, "learning_rate": 9.878194814264886e-08, "loss": 0.9338, "step": 9140 }, { "epoch": 0.9566718995290424, "grad_norm": 2.2995306333544696, "learning_rate": 9.830724472309594e-08, "loss": 0.9621, "step": 9141 }, { "epoch": 0.9567765567765568, "grad_norm": 1.9195393542003314, "learning_rate": 9.783367902811424e-08, "loss": 0.9511, "step": 9142 }, { "epoch": 0.9568812140240711, "grad_norm": 2.001176501644658, "learning_rate": 9.73612511121158e-08, "loss": 0.9361, "step": 9143 }, { "epoch": 0.9569858712715855, "grad_norm": 2.1602312695732224, "learning_rate": 9.688996102938497e-08, "loss": 0.7655, "step": 9144 }, { "epoch": 0.9570905285190999, "grad_norm": 2.3661293111777515, "learning_rate": 9.641980883407287e-08, "loss": 0.9697, "step": 9145 }, { "epoch": 0.9571951857666143, "grad_norm": 1.9875384007327113, "learning_rate": 9.595079458020185e-08, "loss": 0.7232, "step": 9146 }, { "epoch": 0.9572998430141287, "grad_norm": 1.5923915548495342, "learning_rate": 9.548291832166213e-08, "loss": 0.8366, "step": 9147 }, { "epoch": 0.9574045002616431, "grad_norm": 2.2856279399112225, "learning_rate": 9.501618011221291e-08, "loss": 0.9121, "step": 9148 }, { "epoch": 0.9575091575091575, "grad_norm": 2.1702328491811045, "learning_rate": 9.455058000548467e-08, "loss": 0.8221, "step": 9149 }, { "epoch": 0.957613814756672, "grad_norm": 2.169354118160728, "learning_rate": 9.408611805497459e-08, "loss": 0.8867, "step": 9150 }, { "epoch": 0.9577184720041862, "grad_norm": 1.9949827280347652, "learning_rate": 9.36227943140522e-08, "loss": 0.8586, "step": 9151 }, { "epoch": 0.9578231292517007, "grad_norm": 2.6143300269638288, "learning_rate": 9.316060883595158e-08, "loss": 0.8846, "step": 9152 }, { "epoch": 0.9579277864992151, "grad_norm": 2.0238769325890096, "learning_rate": 9.269956167378136e-08, "loss": 0.863, "step": 9153 }, { "epoch": 0.9580324437467295, "grad_norm": 1.7312936279188003, "learning_rate": 9.223965288051474e-08, "loss": 0.775, "step": 9154 }, { "epoch": 0.9581371009942439, "grad_norm": 1.679219951168678, "learning_rate": 9.17808825089983e-08, "loss": 0.7436, "step": 9155 }, { "epoch": 0.9582417582417583, "grad_norm": 2.0696046017408114, "learning_rate": 9.132325061194325e-08, "loss": 0.8891, "step": 9156 }, { "epoch": 0.9583464154892727, "grad_norm": 1.9650852136731827, "learning_rate": 9.086675724193528e-08, "loss": 0.8477, "step": 9157 }, { "epoch": 0.958451072736787, "grad_norm": 1.9933766746277393, "learning_rate": 9.041140245142355e-08, "loss": 0.9044, "step": 9158 }, { "epoch": 0.9585557299843014, "grad_norm": 2.43537039286737, "learning_rate": 8.995718629273065e-08, "loss": 0.9137, "step": 9159 }, { "epoch": 0.9586603872318158, "grad_norm": 2.0921310588419493, "learning_rate": 8.950410881804706e-08, "loss": 0.9069, "step": 9160 }, { "epoch": 0.9587650444793302, "grad_norm": 2.1188665014531147, "learning_rate": 8.905217007943223e-08, "loss": 0.8711, "step": 9161 }, { "epoch": 0.9588697017268446, "grad_norm": 1.8922155563080787, "learning_rate": 8.860137012881465e-08, "loss": 0.9361, "step": 9162 }, { "epoch": 0.958974358974359, "grad_norm": 1.711472138483851, "learning_rate": 8.815170901799174e-08, "loss": 0.7832, "step": 9163 }, { "epoch": 0.9590790162218734, "grad_norm": 2.05901764490939, "learning_rate": 8.770318679862999e-08, "loss": 0.791, "step": 9164 }, { "epoch": 0.9591836734693877, "grad_norm": 2.0760777814710285, "learning_rate": 8.725580352226815e-08, "loss": 0.8347, "step": 9165 }, { "epoch": 0.9592883307169021, "grad_norm": 1.904314569738717, "learning_rate": 8.680955924030954e-08, "loss": 0.8394, "step": 9166 }, { "epoch": 0.9593929879644165, "grad_norm": 1.5764956424130905, "learning_rate": 8.63644540040276e-08, "loss": 0.6921, "step": 9167 }, { "epoch": 0.9594976452119309, "grad_norm": 1.9033111511642289, "learning_rate": 8.592048786456698e-08, "loss": 0.7482, "step": 9168 }, { "epoch": 0.9596023024594453, "grad_norm": 2.07972888411746, "learning_rate": 8.547766087294018e-08, "loss": 0.8112, "step": 9169 }, { "epoch": 0.9597069597069597, "grad_norm": 2.1173563174976393, "learning_rate": 8.503597308002986e-08, "loss": 0.7684, "step": 9170 }, { "epoch": 0.9598116169544741, "grad_norm": 2.3978904207022067, "learning_rate": 8.459542453658653e-08, "loss": 0.9662, "step": 9171 }, { "epoch": 0.9599162742019884, "grad_norm": 2.015486093729567, "learning_rate": 8.415601529322858e-08, "loss": 0.9097, "step": 9172 }, { "epoch": 0.9600209314495028, "grad_norm": 2.100667884440473, "learning_rate": 8.371774540044564e-08, "loss": 0.8485, "step": 9173 }, { "epoch": 0.9601255886970173, "grad_norm": 2.2677052547515455, "learning_rate": 8.328061490859629e-08, "loss": 0.805, "step": 9174 }, { "epoch": 0.9602302459445317, "grad_norm": 2.502950900699366, "learning_rate": 8.284462386790815e-08, "loss": 0.8131, "step": 9175 }, { "epoch": 0.9603349031920461, "grad_norm": 1.845406579180182, "learning_rate": 8.240977232847669e-08, "loss": 0.8898, "step": 9176 }, { "epoch": 0.9604395604395605, "grad_norm": 2.644639245831181, "learning_rate": 8.19760603402675e-08, "loss": 0.9884, "step": 9177 }, { "epoch": 0.9605442176870749, "grad_norm": 2.0136239922533767, "learning_rate": 8.154348795311518e-08, "loss": 0.9439, "step": 9178 }, { "epoch": 0.9606488749345892, "grad_norm": 2.508687484109057, "learning_rate": 8.111205521672105e-08, "loss": 0.842, "step": 9179 }, { "epoch": 0.9607535321821036, "grad_norm": 2.247230140286789, "learning_rate": 8.068176218066104e-08, "loss": 0.7672, "step": 9180 }, { "epoch": 0.960858189429618, "grad_norm": 2.1501454719365465, "learning_rate": 8.025260889437558e-08, "loss": 0.906, "step": 9181 }, { "epoch": 0.9609628466771324, "grad_norm": 2.2857097799522395, "learning_rate": 7.982459540717413e-08, "loss": 0.8782, "step": 9182 }, { "epoch": 0.9610675039246468, "grad_norm": 2.418986186789819, "learning_rate": 7.939772176823735e-08, "loss": 0.8045, "step": 9183 }, { "epoch": 0.9611721611721612, "grad_norm": 2.1459549533496594, "learning_rate": 7.897198802661266e-08, "loss": 0.9515, "step": 9184 }, { "epoch": 0.9612768184196756, "grad_norm": 2.109672695130541, "learning_rate": 7.854739423121982e-08, "loss": 0.8162, "step": 9185 }, { "epoch": 0.9613814756671899, "grad_norm": 2.0736403186020205, "learning_rate": 7.812394043084315e-08, "loss": 0.9333, "step": 9186 }, { "epoch": 0.9614861329147043, "grad_norm": 1.9672228441543578, "learning_rate": 7.770162667413928e-08, "loss": 0.878, "step": 9187 }, { "epoch": 0.9615907901622187, "grad_norm": 2.152750014034463, "learning_rate": 7.728045300963383e-08, "loss": 0.8695, "step": 9188 }, { "epoch": 0.9616954474097331, "grad_norm": 2.1946243167100854, "learning_rate": 7.686041948571921e-08, "loss": 0.8581, "step": 9189 }, { "epoch": 0.9618001046572475, "grad_norm": 1.7958639695003695, "learning_rate": 7.644152615065903e-08, "loss": 0.9317, "step": 9190 }, { "epoch": 0.9619047619047619, "grad_norm": 2.103440015443088, "learning_rate": 7.602377305258479e-08, "loss": 0.8852, "step": 9191 }, { "epoch": 0.9620094191522763, "grad_norm": 1.9632826033869528, "learning_rate": 7.560716023949699e-08, "loss": 0.9123, "step": 9192 }, { "epoch": 0.9621140763997907, "grad_norm": 2.582600940053974, "learning_rate": 7.51916877592651e-08, "loss": 0.7138, "step": 9193 }, { "epoch": 0.962218733647305, "grad_norm": 2.426593772598077, "learning_rate": 7.477735565962873e-08, "loss": 0.9125, "step": 9194 }, { "epoch": 0.9623233908948194, "grad_norm": 2.0534735800973913, "learning_rate": 7.436416398819313e-08, "loss": 0.9127, "step": 9195 }, { "epoch": 0.9624280481423338, "grad_norm": 1.8146955476652327, "learning_rate": 7.395211279243808e-08, "loss": 0.7993, "step": 9196 }, { "epoch": 0.9625327053898483, "grad_norm": 2.2323319981904874, "learning_rate": 7.354120211970684e-08, "loss": 0.9055, "step": 9197 }, { "epoch": 0.9626373626373627, "grad_norm": 1.846938976318557, "learning_rate": 7.313143201721384e-08, "loss": 0.7602, "step": 9198 }, { "epoch": 0.9627420198848771, "grad_norm": 1.9136272699219579, "learning_rate": 7.272280253204367e-08, "loss": 0.7391, "step": 9199 }, { "epoch": 0.9628466771323915, "grad_norm": 2.430133835674404, "learning_rate": 7.231531371114875e-08, "loss": 0.8995, "step": 9200 }, { "epoch": 0.9629513343799058, "grad_norm": 2.131458703559915, "learning_rate": 7.190896560134942e-08, "loss": 0.8002, "step": 9201 }, { "epoch": 0.9630559916274202, "grad_norm": 1.9827611709558692, "learning_rate": 7.150375824933608e-08, "loss": 0.8915, "step": 9202 }, { "epoch": 0.9631606488749346, "grad_norm": 2.1954279263735317, "learning_rate": 7.10996917016682e-08, "loss": 0.9135, "step": 9203 }, { "epoch": 0.963265306122449, "grad_norm": 2.150004140942097, "learning_rate": 7.069676600477304e-08, "loss": 0.8268, "step": 9204 }, { "epoch": 0.9633699633699634, "grad_norm": 2.2008699779019256, "learning_rate": 7.029498120494916e-08, "loss": 0.8064, "step": 9205 }, { "epoch": 0.9634746206174778, "grad_norm": 1.9973249721182413, "learning_rate": 6.989433734836182e-08, "loss": 0.8953, "step": 9206 }, { "epoch": 0.9635792778649922, "grad_norm": 2.1012015027082023, "learning_rate": 6.949483448104422e-08, "loss": 0.8212, "step": 9207 }, { "epoch": 0.9636839351125065, "grad_norm": 2.252680211089396, "learning_rate": 6.909647264890296e-08, "loss": 0.9004, "step": 9208 }, { "epoch": 0.9637885923600209, "grad_norm": 2.0623106938542444, "learning_rate": 6.869925189770809e-08, "loss": 0.8154, "step": 9209 }, { "epoch": 0.9638932496075353, "grad_norm": 2.4261216634526113, "learning_rate": 6.830317227310202e-08, "loss": 0.9477, "step": 9210 }, { "epoch": 0.9639979068550497, "grad_norm": 2.2540675121561056, "learning_rate": 6.790823382059497e-08, "loss": 0.947, "step": 9211 }, { "epoch": 0.9641025641025641, "grad_norm": 2.1048027694750844, "learning_rate": 6.751443658556733e-08, "loss": 0.9247, "step": 9212 }, { "epoch": 0.9642072213500785, "grad_norm": 2.0158913138500045, "learning_rate": 6.712178061326513e-08, "loss": 0.9507, "step": 9213 }, { "epoch": 0.9643118785975929, "grad_norm": 2.003911985362856, "learning_rate": 6.673026594880672e-08, "loss": 0.8659, "step": 9214 }, { "epoch": 0.9644165358451072, "grad_norm": 1.9811498476133749, "learning_rate": 6.633989263717722e-08, "loss": 0.8903, "step": 9215 }, { "epoch": 0.9645211930926216, "grad_norm": 2.232106088676546, "learning_rate": 6.595066072323297e-08, "loss": 0.8571, "step": 9216 }, { "epoch": 0.964625850340136, "grad_norm": 2.5558019802236105, "learning_rate": 6.556257025169488e-08, "loss": 0.787, "step": 9217 }, { "epoch": 0.9647305075876504, "grad_norm": 1.8740286415803826, "learning_rate": 6.517562126715726e-08, "loss": 0.7737, "step": 9218 }, { "epoch": 0.9648351648351648, "grad_norm": 2.2998119940125403, "learning_rate": 6.478981381408012e-08, "loss": 0.8484, "step": 9219 }, { "epoch": 0.9649398220826793, "grad_norm": 2.1309929398704592, "learning_rate": 6.440514793679465e-08, "loss": 0.9934, "step": 9220 }, { "epoch": 0.9650444793301937, "grad_norm": 1.6771005831362835, "learning_rate": 6.402162367949883e-08, "loss": 0.7815, "step": 9221 }, { "epoch": 0.965149136577708, "grad_norm": 2.1014499143649603, "learning_rate": 6.363924108626074e-08, "loss": 0.8083, "step": 9222 }, { "epoch": 0.9652537938252224, "grad_norm": 1.896185187952413, "learning_rate": 6.325800020101747e-08, "loss": 0.7202, "step": 9223 }, { "epoch": 0.9653584510727368, "grad_norm": 1.9377624373854216, "learning_rate": 6.287790106757396e-08, "loss": 0.8109, "step": 9224 }, { "epoch": 0.9654631083202512, "grad_norm": 2.079814185022681, "learning_rate": 6.249894372960419e-08, "loss": 0.8617, "step": 9225 }, { "epoch": 0.9655677655677656, "grad_norm": 1.9888426701736415, "learning_rate": 6.212112823065109e-08, "loss": 0.7941, "step": 9226 }, { "epoch": 0.96567242281528, "grad_norm": 2.014484823659193, "learning_rate": 6.174445461412659e-08, "loss": 0.9727, "step": 9227 }, { "epoch": 0.9657770800627944, "grad_norm": 2.114946519857328, "learning_rate": 6.136892292331165e-08, "loss": 0.9009, "step": 9228 }, { "epoch": 0.9658817373103087, "grad_norm": 2.464678652778644, "learning_rate": 6.099453320135396e-08, "loss": 0.8523, "step": 9229 }, { "epoch": 0.9659863945578231, "grad_norm": 2.064257270218177, "learning_rate": 6.062128549127355e-08, "loss": 0.7611, "step": 9230 }, { "epoch": 0.9660910518053375, "grad_norm": 2.434856201533525, "learning_rate": 6.024917983595613e-08, "loss": 0.8406, "step": 9231 }, { "epoch": 0.9661957090528519, "grad_norm": 2.3887496596256113, "learning_rate": 5.98782162781586e-08, "loss": 0.8096, "step": 9232 }, { "epoch": 0.9663003663003663, "grad_norm": 2.365481278236734, "learning_rate": 5.9508394860504635e-08, "loss": 0.8159, "step": 9233 }, { "epoch": 0.9664050235478807, "grad_norm": 2.3102168301713832, "learning_rate": 5.9139715625488036e-08, "loss": 0.9929, "step": 9234 }, { "epoch": 0.9665096807953951, "grad_norm": 2.0516025566151264, "learning_rate": 5.8772178615469355e-08, "loss": 0.8985, "step": 9235 }, { "epoch": 0.9666143380429095, "grad_norm": 1.9481670124245183, "learning_rate": 5.8405783872680364e-08, "loss": 0.9224, "step": 9236 }, { "epoch": 0.9667189952904238, "grad_norm": 1.936915127599676, "learning_rate": 5.804053143922184e-08, "loss": 0.9193, "step": 9237 }, { "epoch": 0.9668236525379382, "grad_norm": 2.159472821731071, "learning_rate": 5.76764213570602e-08, "loss": 0.933, "step": 9238 }, { "epoch": 0.9669283097854526, "grad_norm": 2.1432057819082044, "learning_rate": 5.731345366803198e-08, "loss": 0.7927, "step": 9239 }, { "epoch": 0.967032967032967, "grad_norm": 2.1392093036071356, "learning_rate": 5.695162841384383e-08, "loss": 0.8698, "step": 9240 }, { "epoch": 0.9671376242804814, "grad_norm": 1.6287695667451592, "learning_rate": 5.659094563607137e-08, "loss": 0.6806, "step": 9241 }, { "epoch": 0.9672422815279959, "grad_norm": 1.9649345198964663, "learning_rate": 5.62314053761559e-08, "loss": 1.017, "step": 9242 }, { "epoch": 0.9673469387755103, "grad_norm": 1.9674398387069323, "learning_rate": 5.587300767540993e-08, "loss": 0.8725, "step": 9243 }, { "epoch": 0.9674515960230246, "grad_norm": 2.1414946410927294, "learning_rate": 5.551575257501496e-08, "loss": 1.0452, "step": 9244 }, { "epoch": 0.967556253270539, "grad_norm": 1.9468821502685336, "learning_rate": 5.5159640116019264e-08, "loss": 0.8087, "step": 9245 }, { "epoch": 0.9676609105180534, "grad_norm": 1.6540577154311573, "learning_rate": 5.4804670339341225e-08, "loss": 0.7482, "step": 9246 }, { "epoch": 0.9677655677655678, "grad_norm": 1.7009899889159676, "learning_rate": 5.445084328576711e-08, "loss": 0.7608, "step": 9247 }, { "epoch": 0.9678702250130822, "grad_norm": 2.2598483825041096, "learning_rate": 5.4098158995953274e-08, "loss": 0.9915, "step": 9248 }, { "epoch": 0.9679748822605966, "grad_norm": 2.3568793617507335, "learning_rate": 5.374661751042287e-08, "loss": 0.8288, "step": 9249 }, { "epoch": 0.968079539508111, "grad_norm": 2.1736820945639828, "learning_rate": 5.339621886957025e-08, "loss": 0.9018, "step": 9250 }, { "epoch": 0.9681841967556253, "grad_norm": 2.101174708101391, "learning_rate": 5.304696311365543e-08, "loss": 0.8313, "step": 9251 }, { "epoch": 0.9682888540031397, "grad_norm": 2.3344750804334122, "learning_rate": 5.269885028280963e-08, "loss": 0.8387, "step": 9252 }, { "epoch": 0.9683935112506541, "grad_norm": 2.2550389618502407, "learning_rate": 5.2351880417030874e-08, "loss": 0.8327, "step": 9253 }, { "epoch": 0.9684981684981685, "grad_norm": 1.8950935631200951, "learning_rate": 5.2006053556186155e-08, "loss": 0.9351, "step": 9254 }, { "epoch": 0.9686028257456829, "grad_norm": 2.3653393297563, "learning_rate": 5.166136974001368e-08, "loss": 0.8625, "step": 9255 }, { "epoch": 0.9687074829931973, "grad_norm": 2.098650984633713, "learning_rate": 5.1317829008116215e-08, "loss": 0.9453, "step": 9256 }, { "epoch": 0.9688121402407117, "grad_norm": 2.0350999152809006, "learning_rate": 5.097543139996886e-08, "loss": 0.8874, "step": 9257 }, { "epoch": 0.968916797488226, "grad_norm": 1.7287186179945104, "learning_rate": 5.0634176954913464e-08, "loss": 0.8007, "step": 9258 }, { "epoch": 0.9690214547357404, "grad_norm": 2.243175615621909, "learning_rate": 5.029406571216089e-08, "loss": 0.8416, "step": 9259 }, { "epoch": 0.9691261119832548, "grad_norm": 2.089301015163357, "learning_rate": 4.995509771078877e-08, "loss": 0.8799, "step": 9260 }, { "epoch": 0.9692307692307692, "grad_norm": 1.8265334250247782, "learning_rate": 4.9617272989748166e-08, "loss": 0.7664, "step": 9261 }, { "epoch": 0.9693354264782836, "grad_norm": 2.1422139711001673, "learning_rate": 4.92805915878547e-08, "loss": 0.8051, "step": 9262 }, { "epoch": 0.969440083725798, "grad_norm": 1.7236231384803589, "learning_rate": 4.8945053543792975e-08, "loss": 0.7868, "step": 9263 }, { "epoch": 0.9695447409733124, "grad_norm": 1.9977730114261136, "learning_rate": 4.86106588961166e-08, "loss": 0.8113, "step": 9264 }, { "epoch": 0.9696493982208267, "grad_norm": 2.196435687000056, "learning_rate": 4.827740768324929e-08, "loss": 0.88, "step": 9265 }, { "epoch": 0.9697540554683411, "grad_norm": 2.154480773213278, "learning_rate": 4.794529994348374e-08, "loss": 0.8232, "step": 9266 }, { "epoch": 0.9698587127158556, "grad_norm": 2.054282523683849, "learning_rate": 4.76143357149772e-08, "loss": 0.9648, "step": 9267 }, { "epoch": 0.96996336996337, "grad_norm": 2.1057006673654297, "learning_rate": 4.728451503575815e-08, "loss": 0.8939, "step": 9268 }, { "epoch": 0.9700680272108844, "grad_norm": 2.1885036662303445, "learning_rate": 4.695583794372516e-08, "loss": 0.9587, "step": 9269 }, { "epoch": 0.9701726844583988, "grad_norm": 2.15267308775205, "learning_rate": 4.662830447664357e-08, "loss": 0.9066, "step": 9270 }, { "epoch": 0.9702773417059132, "grad_norm": 2.4891197615275202, "learning_rate": 4.630191467214773e-08, "loss": 0.9047, "step": 9271 }, { "epoch": 0.9703819989534275, "grad_norm": 2.152695134921288, "learning_rate": 4.5976668567739856e-08, "loss": 0.8575, "step": 9272 }, { "epoch": 0.9704866562009419, "grad_norm": 2.086583458860564, "learning_rate": 4.565256620079117e-08, "loss": 0.9318, "step": 9273 }, { "epoch": 0.9705913134484563, "grad_norm": 1.9990374714123995, "learning_rate": 4.532960760854299e-08, "loss": 0.8295, "step": 9274 }, { "epoch": 0.9706959706959707, "grad_norm": 2.045660681387157, "learning_rate": 4.5007792828102305e-08, "loss": 0.7581, "step": 9275 }, { "epoch": 0.9708006279434851, "grad_norm": 2.1535992375534088, "learning_rate": 4.4687121896447305e-08, "loss": 0.8037, "step": 9276 }, { "epoch": 0.9709052851909995, "grad_norm": 1.933861715496548, "learning_rate": 4.436759485042408e-08, "loss": 0.9047, "step": 9277 }, { "epoch": 0.9710099424385139, "grad_norm": 2.460250636187858, "learning_rate": 4.404921172674659e-08, "loss": 0.8763, "step": 9278 }, { "epoch": 0.9711145996860283, "grad_norm": 2.05066277066522, "learning_rate": 4.373197256199668e-08, "loss": 0.8936, "step": 9279 }, { "epoch": 0.9712192569335426, "grad_norm": 2.0608607605365914, "learning_rate": 4.3415877392626314e-08, "loss": 0.8599, "step": 9280 }, { "epoch": 0.971323914181057, "grad_norm": 1.9005786956775899, "learning_rate": 4.3100926254956434e-08, "loss": 0.9429, "step": 9281 }, { "epoch": 0.9714285714285714, "grad_norm": 2.3320969105885028, "learning_rate": 4.2787119185174756e-08, "loss": 0.9442, "step": 9282 }, { "epoch": 0.9715332286760858, "grad_norm": 1.9467790462210524, "learning_rate": 4.247445621933799e-08, "loss": 0.9022, "step": 9283 }, { "epoch": 0.9716378859236002, "grad_norm": 2.1353572789569237, "learning_rate": 4.216293739337296e-08, "loss": 0.8758, "step": 9284 }, { "epoch": 0.9717425431711146, "grad_norm": 2.0519015338547826, "learning_rate": 4.1852562743072144e-08, "loss": 0.8413, "step": 9285 }, { "epoch": 0.971847200418629, "grad_norm": 2.07726966403895, "learning_rate": 4.154333230410035e-08, "loss": 0.9582, "step": 9286 }, { "epoch": 0.9719518576661433, "grad_norm": 1.734959100833163, "learning_rate": 4.1235246111986925e-08, "loss": 0.715, "step": 9287 }, { "epoch": 0.9720565149136577, "grad_norm": 2.3376164950199807, "learning_rate": 4.092830420213134e-08, "loss": 0.7634, "step": 9288 }, { "epoch": 0.9721611721611721, "grad_norm": 2.0354642090531447, "learning_rate": 4.062250660980427e-08, "loss": 0.9849, "step": 9289 }, { "epoch": 0.9722658294086866, "grad_norm": 2.4203466382212264, "learning_rate": 4.0317853370139826e-08, "loss": 0.9165, "step": 9290 }, { "epoch": 0.972370486656201, "grad_norm": 2.2074429056634024, "learning_rate": 4.0014344518145566e-08, "loss": 0.8997, "step": 9291 }, { "epoch": 0.9724751439037154, "grad_norm": 1.9811031986210714, "learning_rate": 3.971198008869359e-08, "loss": 0.8448, "step": 9292 }, { "epoch": 0.9725798011512298, "grad_norm": 2.140986208627522, "learning_rate": 3.9410760116526115e-08, "loss": 0.9095, "step": 9293 }, { "epoch": 0.9726844583987441, "grad_norm": 2.1998429755193047, "learning_rate": 3.9110684636254334e-08, "loss": 0.8572, "step": 9294 }, { "epoch": 0.9727891156462585, "grad_norm": 1.791680128166834, "learning_rate": 3.8811753682358454e-08, "loss": 0.8926, "step": 9295 }, { "epoch": 0.9728937728937729, "grad_norm": 2.0214903816242873, "learning_rate": 3.851396728918544e-08, "loss": 0.9469, "step": 9296 }, { "epoch": 0.9729984301412873, "grad_norm": 2.086981122937348, "learning_rate": 3.8217325490952365e-08, "loss": 0.8404, "step": 9297 }, { "epoch": 0.9731030873888017, "grad_norm": 2.175421733396647, "learning_rate": 3.792182832174196e-08, "loss": 0.9427, "step": 9298 }, { "epoch": 0.9732077446363161, "grad_norm": 1.9291300718410933, "learning_rate": 3.76274758155093e-08, "loss": 0.8368, "step": 9299 }, { "epoch": 0.9733124018838305, "grad_norm": 2.117352303798171, "learning_rate": 3.7334268006075094e-08, "loss": 0.9744, "step": 9300 }, { "epoch": 0.9734170591313448, "grad_norm": 1.675142403551649, "learning_rate": 3.7042204927130177e-08, "loss": 0.7633, "step": 9301 }, { "epoch": 0.9735217163788592, "grad_norm": 2.036570056386732, "learning_rate": 3.675128661223326e-08, "loss": 0.6977, "step": 9302 }, { "epoch": 0.9736263736263736, "grad_norm": 1.8889553909422538, "learning_rate": 3.646151309481094e-08, "loss": 0.792, "step": 9303 }, { "epoch": 0.973731030873888, "grad_norm": 2.209590888127619, "learning_rate": 3.6172884408158805e-08, "loss": 0.874, "step": 9304 }, { "epoch": 0.9738356881214024, "grad_norm": 1.9542284512171744, "learning_rate": 3.5885400585441435e-08, "loss": 0.8408, "step": 9305 }, { "epoch": 0.9739403453689168, "grad_norm": 2.079751071735509, "learning_rate": 3.559906165969018e-08, "loss": 0.8197, "step": 9306 }, { "epoch": 0.9740450026164312, "grad_norm": 1.7986235225721088, "learning_rate": 3.531386766380762e-08, "loss": 0.8663, "step": 9307 }, { "epoch": 0.9741496598639455, "grad_norm": 2.120431133086565, "learning_rate": 3.502981863056088e-08, "loss": 1.016, "step": 9308 }, { "epoch": 0.9742543171114599, "grad_norm": 2.27161995008741, "learning_rate": 3.4746914592590496e-08, "loss": 0.8601, "step": 9309 }, { "epoch": 0.9743589743589743, "grad_norm": 1.8602129108469572, "learning_rate": 3.446515558240049e-08, "loss": 0.8764, "step": 9310 }, { "epoch": 0.9744636316064887, "grad_norm": 2.5728016050016245, "learning_rate": 3.4184541632366066e-08, "loss": 1.0023, "step": 9311 }, { "epoch": 0.9745682888540032, "grad_norm": 2.3187035734822334, "learning_rate": 3.390507277473143e-08, "loss": 0.9637, "step": 9312 }, { "epoch": 0.9746729461015176, "grad_norm": 1.8523908673682685, "learning_rate": 3.3626749041606454e-08, "loss": 0.9465, "step": 9313 }, { "epoch": 0.974777603349032, "grad_norm": 1.9384599924677752, "learning_rate": 3.3349570464973336e-08, "loss": 0.7849, "step": 9314 }, { "epoch": 0.9748822605965463, "grad_norm": 1.7474977395994695, "learning_rate": 3.307353707667771e-08, "loss": 0.7028, "step": 9315 }, { "epoch": 0.9749869178440607, "grad_norm": 1.7458067005238376, "learning_rate": 3.279864890843865e-08, "loss": 0.9057, "step": 9316 }, { "epoch": 0.9750915750915751, "grad_norm": 1.93802753381986, "learning_rate": 3.2524905991839775e-08, "loss": 0.7735, "step": 9317 }, { "epoch": 0.9751962323390895, "grad_norm": 2.0070696055706616, "learning_rate": 3.225230835833593e-08, "loss": 0.8985, "step": 9318 }, { "epoch": 0.9753008895866039, "grad_norm": 2.290676066932009, "learning_rate": 3.198085603924761e-08, "loss": 0.8573, "step": 9319 }, { "epoch": 0.9754055468341183, "grad_norm": 1.9242114892590325, "learning_rate": 3.1710549065767645e-08, "loss": 0.8236, "step": 9320 }, { "epoch": 0.9755102040816327, "grad_norm": 2.0179022005643312, "learning_rate": 3.144138746895231e-08, "loss": 0.9594, "step": 9321 }, { "epoch": 0.9756148613291471, "grad_norm": 1.688029558610551, "learning_rate": 3.117337127973019e-08, "loss": 0.6891, "step": 9322 }, { "epoch": 0.9757195185766614, "grad_norm": 2.123029102476217, "learning_rate": 3.0906500528896656e-08, "loss": 0.9667, "step": 9323 }, { "epoch": 0.9758241758241758, "grad_norm": 2.114035431835684, "learning_rate": 3.064077524711606e-08, "loss": 0.8246, "step": 9324 }, { "epoch": 0.9759288330716902, "grad_norm": 1.9867461075805437, "learning_rate": 3.0376195464920655e-08, "loss": 0.9916, "step": 9325 }, { "epoch": 0.9760334903192046, "grad_norm": 2.2706805982649962, "learning_rate": 3.0112761212709454e-08, "loss": 0.716, "step": 9326 }, { "epoch": 0.976138147566719, "grad_norm": 2.2268234648526413, "learning_rate": 2.9850472520753795e-08, "loss": 0.84, "step": 9327 }, { "epoch": 0.9762428048142334, "grad_norm": 2.217677339428372, "learning_rate": 2.9589329419190683e-08, "loss": 0.8897, "step": 9328 }, { "epoch": 0.9763474620617478, "grad_norm": 2.093519971186103, "learning_rate": 2.9329331938026117e-08, "loss": 0.8808, "step": 9329 }, { "epoch": 0.9764521193092621, "grad_norm": 1.9406821046190856, "learning_rate": 2.907048010713398e-08, "loss": 0.8893, "step": 9330 }, { "epoch": 0.9765567765567765, "grad_norm": 1.9257719073652975, "learning_rate": 2.8812773956256034e-08, "loss": 0.8714, "step": 9331 }, { "epoch": 0.9766614338042909, "grad_norm": 2.330309710581296, "learning_rate": 2.855621351500415e-08, "loss": 0.9156, "step": 9332 }, { "epoch": 0.9767660910518053, "grad_norm": 1.742461766113821, "learning_rate": 2.8300798812858078e-08, "loss": 0.7883, "step": 9333 }, { "epoch": 0.9768707482993197, "grad_norm": 2.150066157396379, "learning_rate": 2.8046529879164343e-08, "loss": 0.7517, "step": 9334 }, { "epoch": 0.9769754055468342, "grad_norm": 2.162913910208753, "learning_rate": 2.7793406743139572e-08, "loss": 0.8699, "step": 9335 }, { "epoch": 0.9770800627943486, "grad_norm": 3.0655887965284343, "learning_rate": 2.7541429433869393e-08, "loss": 0.8514, "step": 9336 }, { "epoch": 0.9771847200418629, "grad_norm": 1.9873130806621349, "learning_rate": 2.729059798030398e-08, "loss": 0.7885, "step": 9337 }, { "epoch": 0.9772893772893773, "grad_norm": 2.2769678998356295, "learning_rate": 2.7040912411265828e-08, "loss": 0.9203, "step": 9338 }, { "epoch": 0.9773940345368917, "grad_norm": 1.9774891685632308, "learning_rate": 2.679237275544422e-08, "loss": 0.8983, "step": 9339 }, { "epoch": 0.9774986917844061, "grad_norm": 2.2420458226870985, "learning_rate": 2.6544979041397412e-08, "loss": 0.7687, "step": 9340 }, { "epoch": 0.9776033490319205, "grad_norm": 2.119669776522172, "learning_rate": 2.6298731297550452e-08, "loss": 0.8316, "step": 9341 }, { "epoch": 0.9777080062794349, "grad_norm": 2.4118682693637385, "learning_rate": 2.605362955219737e-08, "loss": 0.7018, "step": 9342 }, { "epoch": 0.9778126635269493, "grad_norm": 2.0233107477477787, "learning_rate": 2.5809673833502303e-08, "loss": 0.8836, "step": 9343 }, { "epoch": 0.9779173207744636, "grad_norm": 1.9871161511437192, "learning_rate": 2.5566864169495054e-08, "loss": 0.735, "step": 9344 }, { "epoch": 0.978021978021978, "grad_norm": 2.292335147286737, "learning_rate": 2.5325200588076636e-08, "loss": 0.839, "step": 9345 }, { "epoch": 0.9781266352694924, "grad_norm": 2.2868212579297644, "learning_rate": 2.508468311701151e-08, "loss": 0.8197, "step": 9346 }, { "epoch": 0.9782312925170068, "grad_norm": 2.1010720301980004, "learning_rate": 2.4845311783938676e-08, "loss": 0.9124, "step": 9347 }, { "epoch": 0.9783359497645212, "grad_norm": 1.8598221177836944, "learning_rate": 2.4607086616361687e-08, "loss": 0.8497, "step": 9348 }, { "epoch": 0.9784406070120356, "grad_norm": 2.237038039840748, "learning_rate": 2.4370007641651983e-08, "loss": 0.8217, "step": 9349 }, { "epoch": 0.97854526425955, "grad_norm": 2.085673606140328, "learning_rate": 2.4134074887051108e-08, "loss": 0.8642, "step": 9350 }, { "epoch": 0.9786499215070643, "grad_norm": 1.9367343428587138, "learning_rate": 2.3899288379668483e-08, "loss": 0.9598, "step": 9351 }, { "epoch": 0.9787545787545787, "grad_norm": 1.968532478877287, "learning_rate": 2.3665648146480315e-08, "loss": 0.9346, "step": 9352 }, { "epoch": 0.9788592360020931, "grad_norm": 2.2698739958097547, "learning_rate": 2.343315421433401e-08, "loss": 0.9245, "step": 9353 }, { "epoch": 0.9789638932496075, "grad_norm": 2.1111746202775517, "learning_rate": 2.3201806609943756e-08, "loss": 0.8771, "step": 9354 }, { "epoch": 0.9790685504971219, "grad_norm": 2.395697271895756, "learning_rate": 2.2971605359890514e-08, "loss": 0.7715, "step": 9355 }, { "epoch": 0.9791732077446363, "grad_norm": 2.048806092369536, "learning_rate": 2.2742550490624237e-08, "loss": 0.8791, "step": 9356 }, { "epoch": 0.9792778649921507, "grad_norm": 2.0576345151800197, "learning_rate": 2.2514642028466092e-08, "loss": 0.9454, "step": 9357 }, { "epoch": 0.979382522239665, "grad_norm": 2.1060942188116294, "learning_rate": 2.2287879999601804e-08, "loss": 0.8494, "step": 9358 }, { "epoch": 0.9794871794871794, "grad_norm": 2.0030711225076745, "learning_rate": 2.2062264430087187e-08, "loss": 0.7593, "step": 9359 }, { "epoch": 0.9795918367346939, "grad_norm": 2.18284415979648, "learning_rate": 2.1837795345844847e-08, "loss": 0.8958, "step": 9360 }, { "epoch": 0.9796964939822083, "grad_norm": 1.9127644215367114, "learning_rate": 2.1614472772668592e-08, "loss": 0.9716, "step": 9361 }, { "epoch": 0.9798011512297227, "grad_norm": 1.8495943717092973, "learning_rate": 2.13922967362179e-08, "loss": 0.8342, "step": 9362 }, { "epoch": 0.9799058084772371, "grad_norm": 2.1175372567636703, "learning_rate": 2.1171267262020124e-08, "loss": 0.9731, "step": 9363 }, { "epoch": 0.9800104657247515, "grad_norm": 1.961813987025682, "learning_rate": 2.0951384375473837e-08, "loss": 0.8674, "step": 9364 }, { "epoch": 0.9801151229722659, "grad_norm": 2.2096628727490555, "learning_rate": 2.073264810184328e-08, "loss": 0.8222, "step": 9365 }, { "epoch": 0.9802197802197802, "grad_norm": 3.0174110994465653, "learning_rate": 2.0515058466260563e-08, "loss": 0.7058, "step": 9366 }, { "epoch": 0.9803244374672946, "grad_norm": 1.9638967173681348, "learning_rate": 2.0298615493729025e-08, "loss": 0.8708, "step": 9367 }, { "epoch": 0.980429094714809, "grad_norm": 1.8984784000934047, "learning_rate": 2.0083319209116547e-08, "loss": 0.8403, "step": 9368 }, { "epoch": 0.9805337519623234, "grad_norm": 1.9998734455651994, "learning_rate": 1.9869169637162232e-08, "loss": 0.8549, "step": 9369 }, { "epoch": 0.9806384092098378, "grad_norm": 2.7056047403555916, "learning_rate": 1.9656166802473066e-08, "loss": 0.7828, "step": 9370 }, { "epoch": 0.9807430664573522, "grad_norm": 1.885856551772748, "learning_rate": 1.9444310729521686e-08, "loss": 0.8384, "step": 9371 }, { "epoch": 0.9808477237048666, "grad_norm": 2.3439297429643835, "learning_rate": 1.9233601442653073e-08, "loss": 0.8945, "step": 9372 }, { "epoch": 0.9809523809523809, "grad_norm": 2.273645673597301, "learning_rate": 1.902403896607563e-08, "loss": 0.8892, "step": 9373 }, { "epoch": 0.9810570381998953, "grad_norm": 2.101477849817953, "learning_rate": 1.8815623323870102e-08, "loss": 0.7195, "step": 9374 }, { "epoch": 0.9811616954474097, "grad_norm": 2.067407200022368, "learning_rate": 1.8608354539982886e-08, "loss": 0.931, "step": 9375 }, { "epoch": 0.9812663526949241, "grad_norm": 2.0656438344157837, "learning_rate": 1.8402232638230488e-08, "loss": 0.9159, "step": 9376 }, { "epoch": 0.9813710099424385, "grad_norm": 1.989366318840071, "learning_rate": 1.8197257642296183e-08, "loss": 0.9061, "step": 9377 }, { "epoch": 0.9814756671899529, "grad_norm": 2.1104290095055007, "learning_rate": 1.7993429575732247e-08, "loss": 0.7356, "step": 9378 }, { "epoch": 0.9815803244374673, "grad_norm": 2.3574441233228804, "learning_rate": 1.779074846195883e-08, "loss": 0.7541, "step": 9379 }, { "epoch": 0.9816849816849816, "grad_norm": 2.053364465624434, "learning_rate": 1.758921432426397e-08, "loss": 0.929, "step": 9380 }, { "epoch": 0.981789638932496, "grad_norm": 1.9733850210433772, "learning_rate": 1.738882718580581e-08, "loss": 0.8051, "step": 9381 }, { "epoch": 0.9818942961800105, "grad_norm": 2.010472060114112, "learning_rate": 1.718958706960816e-08, "loss": 0.8714, "step": 9382 }, { "epoch": 0.9819989534275249, "grad_norm": 1.9418541630139494, "learning_rate": 1.6991493998563814e-08, "loss": 0.9111, "step": 9383 }, { "epoch": 0.9821036106750393, "grad_norm": 2.0017027127013303, "learning_rate": 1.6794547995434563e-08, "loss": 0.8167, "step": 9384 }, { "epoch": 0.9822082679225537, "grad_norm": 2.0504485758602895, "learning_rate": 1.6598749082850084e-08, "loss": 0.8904, "step": 9385 }, { "epoch": 0.9823129251700681, "grad_norm": 2.360758373402244, "learning_rate": 1.640409728330794e-08, "loss": 0.9911, "step": 9386 }, { "epoch": 0.9824175824175824, "grad_norm": 1.99459443178208, "learning_rate": 1.621059261917357e-08, "loss": 0.844, "step": 9387 }, { "epoch": 0.9825222396650968, "grad_norm": 1.8418136023305347, "learning_rate": 1.6018235112681412e-08, "loss": 0.8789, "step": 9388 }, { "epoch": 0.9826268969126112, "grad_norm": 1.6553513965603035, "learning_rate": 1.5827024785933785e-08, "loss": 0.7062, "step": 9389 }, { "epoch": 0.9827315541601256, "grad_norm": 1.9968661967922576, "learning_rate": 1.5636961660899787e-08, "loss": 0.7489, "step": 9390 }, { "epoch": 0.98283621140764, "grad_norm": 2.3529232712311807, "learning_rate": 1.5448045759420827e-08, "loss": 0.9545, "step": 9391 }, { "epoch": 0.9829408686551544, "grad_norm": 1.8819806519766973, "learning_rate": 1.526027710320177e-08, "loss": 0.8448, "step": 9392 }, { "epoch": 0.9830455259026688, "grad_norm": 2.138462511740512, "learning_rate": 1.5073655713818692e-08, "loss": 0.9485, "step": 9393 }, { "epoch": 0.9831501831501831, "grad_norm": 2.3151261223417703, "learning_rate": 1.4888181612713327e-08, "loss": 0.9372, "step": 9394 }, { "epoch": 0.9832548403976975, "grad_norm": 2.1342293657223292, "learning_rate": 1.4703854821197516e-08, "loss": 0.7339, "step": 9395 }, { "epoch": 0.9833594976452119, "grad_norm": 1.9715185392028405, "learning_rate": 1.4520675360450987e-08, "loss": 0.9157, "step": 9396 }, { "epoch": 0.9834641548927263, "grad_norm": 2.6450142775406964, "learning_rate": 1.4338643251522456e-08, "loss": 0.9199, "step": 9397 }, { "epoch": 0.9835688121402407, "grad_norm": 1.8046838157595604, "learning_rate": 1.4157758515326303e-08, "loss": 0.749, "step": 9398 }, { "epoch": 0.9836734693877551, "grad_norm": 1.8899814023719925, "learning_rate": 1.3978021172648126e-08, "loss": 0.93, "step": 9399 }, { "epoch": 0.9837781266352695, "grad_norm": 1.6809007606749307, "learning_rate": 1.3799431244138073e-08, "loss": 0.7007, "step": 9400 }, { "epoch": 0.9838827838827838, "grad_norm": 1.6089297962761584, "learning_rate": 1.3621988750317505e-08, "loss": 0.7509, "step": 9401 }, { "epoch": 0.9839874411302982, "grad_norm": 2.032535731745511, "learning_rate": 1.344569371157567e-08, "loss": 0.8048, "step": 9402 }, { "epoch": 0.9840920983778126, "grad_norm": 1.9666083217252117, "learning_rate": 1.3270546148167474e-08, "loss": 0.8324, "step": 9403 }, { "epoch": 0.984196755625327, "grad_norm": 2.0185643963937054, "learning_rate": 1.3096546080219042e-08, "loss": 0.8746, "step": 9404 }, { "epoch": 0.9843014128728415, "grad_norm": 1.9656375458562914, "learning_rate": 1.292369352772327e-08, "loss": 0.8422, "step": 9405 }, { "epoch": 0.9844060701203559, "grad_norm": 2.331480394760753, "learning_rate": 1.2751988510540937e-08, "loss": 0.9247, "step": 9406 }, { "epoch": 0.9845107273678703, "grad_norm": 1.9029882738516415, "learning_rate": 1.2581431048401815e-08, "loss": 0.8051, "step": 9407 }, { "epoch": 0.9846153846153847, "grad_norm": 1.9192845614133922, "learning_rate": 1.2412021160902454e-08, "loss": 0.851, "step": 9408 }, { "epoch": 0.984720041862899, "grad_norm": 1.8685299347220463, "learning_rate": 1.2243758867508393e-08, "loss": 0.798, "step": 9409 }, { "epoch": 0.9848246991104134, "grad_norm": 2.171112776891488, "learning_rate": 1.2076644187554165e-08, "loss": 0.9017, "step": 9410 }, { "epoch": 0.9849293563579278, "grad_norm": 2.3532297197914427, "learning_rate": 1.1910677140241077e-08, "loss": 0.902, "step": 9411 }, { "epoch": 0.9850340136054422, "grad_norm": 2.1352803267811944, "learning_rate": 1.1745857744639433e-08, "loss": 0.9434, "step": 9412 }, { "epoch": 0.9851386708529566, "grad_norm": 2.0466470004257444, "learning_rate": 1.1582186019686303e-08, "loss": 0.854, "step": 9413 }, { "epoch": 0.985243328100471, "grad_norm": 1.8753970699615763, "learning_rate": 1.1419661984189978e-08, "loss": 0.885, "step": 9414 }, { "epoch": 0.9853479853479854, "grad_norm": 1.9062133085606772, "learning_rate": 1.1258285656822187e-08, "loss": 0.8093, "step": 9415 }, { "epoch": 0.9854526425954997, "grad_norm": 2.2126700579173644, "learning_rate": 1.1098057056126987e-08, "loss": 0.9876, "step": 9416 }, { "epoch": 0.9855572998430141, "grad_norm": 2.0255896960925255, "learning_rate": 1.0938976200515206e-08, "loss": 0.742, "step": 9417 }, { "epoch": 0.9856619570905285, "grad_norm": 2.1729727012223328, "learning_rate": 1.0781043108264444e-08, "loss": 0.8281, "step": 9418 }, { "epoch": 0.9857666143380429, "grad_norm": 2.5225886899280794, "learning_rate": 1.0624257797522408e-08, "loss": 0.8728, "step": 9419 }, { "epoch": 0.9858712715855573, "grad_norm": 2.0350945295517993, "learning_rate": 1.0468620286303577e-08, "loss": 0.8491, "step": 9420 }, { "epoch": 0.9859759288330717, "grad_norm": 2.2700948009138253, "learning_rate": 1.0314130592490313e-08, "loss": 0.9252, "step": 9421 }, { "epoch": 0.9860805860805861, "grad_norm": 1.9846202839221025, "learning_rate": 1.0160788733836191e-08, "loss": 0.8137, "step": 9422 }, { "epoch": 0.9861852433281004, "grad_norm": 2.3145335457286462, "learning_rate": 1.0008594727958232e-08, "loss": 0.6612, "step": 9423 }, { "epoch": 0.9862899005756148, "grad_norm": 1.6193909422722288, "learning_rate": 9.857548592343558e-09, "loss": 0.7132, "step": 9424 }, { "epoch": 0.9863945578231292, "grad_norm": 2.3517435794568353, "learning_rate": 9.707650344350505e-09, "loss": 0.8793, "step": 9425 }, { "epoch": 0.9864992150706436, "grad_norm": 1.9920534058323922, "learning_rate": 9.558900001199745e-09, "loss": 0.8218, "step": 9426 }, { "epoch": 0.986603872318158, "grad_norm": 2.427971815968549, "learning_rate": 9.411297579984269e-09, "loss": 0.9036, "step": 9427 }, { "epoch": 0.9867085295656725, "grad_norm": 1.8261977196224324, "learning_rate": 9.264843097663845e-09, "loss": 0.776, "step": 9428 }, { "epoch": 0.9868131868131869, "grad_norm": 2.197882541506156, "learning_rate": 9.119536571066123e-09, "loss": 0.8071, "step": 9429 }, { "epoch": 0.9869178440607012, "grad_norm": 1.9625634256614233, "learning_rate": 8.975378016887748e-09, "loss": 0.7967, "step": 9430 }, { "epoch": 0.9870225013082156, "grad_norm": 1.6165607498949757, "learning_rate": 8.832367451692137e-09, "loss": 0.7942, "step": 9431 }, { "epoch": 0.98712715855573, "grad_norm": 2.0636190975616793, "learning_rate": 8.6905048919117e-09, "loss": 0.9357, "step": 9432 }, { "epoch": 0.9872318158032444, "grad_norm": 2.0496132764727255, "learning_rate": 8.549790353846731e-09, "loss": 0.9125, "step": 9433 }, { "epoch": 0.9873364730507588, "grad_norm": 1.98179465824096, "learning_rate": 8.410223853665411e-09, "loss": 0.7822, "step": 9434 }, { "epoch": 0.9874411302982732, "grad_norm": 2.321799912705264, "learning_rate": 8.27180540740491e-09, "loss": 0.8323, "step": 9435 }, { "epoch": 0.9875457875457876, "grad_norm": 1.8047308460509242, "learning_rate": 8.134535030968061e-09, "loss": 0.839, "step": 9436 }, { "epoch": 0.9876504447933019, "grad_norm": 2.7150759739338293, "learning_rate": 7.998412740130023e-09, "loss": 0.9566, "step": 9437 }, { "epoch": 0.9877551020408163, "grad_norm": 1.8975694108774446, "learning_rate": 7.863438550529401e-09, "loss": 0.8174, "step": 9438 }, { "epoch": 0.9878597592883307, "grad_norm": 2.239706974304119, "learning_rate": 7.729612477677118e-09, "loss": 0.9733, "step": 9439 }, { "epoch": 0.9879644165358451, "grad_norm": 1.7630296843202333, "learning_rate": 7.596934536947542e-09, "loss": 0.8704, "step": 9440 }, { "epoch": 0.9880690737833595, "grad_norm": 2.1363895002879407, "learning_rate": 7.465404743587368e-09, "loss": 0.7886, "step": 9441 }, { "epoch": 0.9881737310308739, "grad_norm": 2.2348274430335837, "learning_rate": 7.33502311271006e-09, "loss": 0.9167, "step": 9442 }, { "epoch": 0.9882783882783883, "grad_norm": 2.9112946474193615, "learning_rate": 7.205789659294748e-09, "loss": 0.9517, "step": 9443 }, { "epoch": 0.9883830455259026, "grad_norm": 1.9351507967777781, "learning_rate": 7.0777043981917756e-09, "loss": 0.9959, "step": 9444 }, { "epoch": 0.988487702773417, "grad_norm": 2.205838160688878, "learning_rate": 6.950767344118259e-09, "loss": 1.0637, "step": 9445 }, { "epoch": 0.9885923600209314, "grad_norm": 1.7747166910206333, "learning_rate": 6.824978511660307e-09, "loss": 0.7722, "step": 9446 }, { "epoch": 0.9886970172684458, "grad_norm": 1.7660762190615527, "learning_rate": 6.700337915269694e-09, "loss": 0.8423, "step": 9447 }, { "epoch": 0.9888016745159602, "grad_norm": 2.1500153429277287, "learning_rate": 6.576845569269408e-09, "loss": 0.9119, "step": 9448 }, { "epoch": 0.9889063317634746, "grad_norm": 2.1119803885084334, "learning_rate": 6.454501487848097e-09, "loss": 0.8946, "step": 9449 }, { "epoch": 0.989010989010989, "grad_norm": 2.045363078081982, "learning_rate": 6.333305685064517e-09, "loss": 0.814, "step": 9450 }, { "epoch": 0.9891156462585035, "grad_norm": 2.143018279644762, "learning_rate": 6.213258174841974e-09, "loss": 0.8763, "step": 9451 }, { "epoch": 0.9892203035060178, "grad_norm": 1.8414553993367668, "learning_rate": 6.094358970976099e-09, "loss": 0.8197, "step": 9452 }, { "epoch": 0.9893249607535322, "grad_norm": 2.2429501404967374, "learning_rate": 5.976608087128188e-09, "loss": 0.9149, "step": 9453 }, { "epoch": 0.9894296180010466, "grad_norm": 1.7802095129658109, "learning_rate": 5.860005536828528e-09, "loss": 0.829, "step": 9454 }, { "epoch": 0.989534275248561, "grad_norm": 1.958887466082894, "learning_rate": 5.74455133347418e-09, "loss": 0.9254, "step": 9455 }, { "epoch": 0.9896389324960754, "grad_norm": 2.0406723361448926, "learning_rate": 5.630245490331199e-09, "loss": 0.8935, "step": 9456 }, { "epoch": 0.9897435897435898, "grad_norm": 1.985359908345336, "learning_rate": 5.517088020533523e-09, "loss": 0.7691, "step": 9457 }, { "epoch": 0.9898482469911042, "grad_norm": 1.9928529617111481, "learning_rate": 5.405078937082975e-09, "loss": 0.8635, "step": 9458 }, { "epoch": 0.9899529042386185, "grad_norm": 2.0172391384978616, "learning_rate": 5.2942182528503695e-09, "loss": 0.8513, "step": 9459 }, { "epoch": 0.9900575614861329, "grad_norm": 2.3808186332110246, "learning_rate": 5.184505980574406e-09, "loss": 0.9489, "step": 9460 }, { "epoch": 0.9901622187336473, "grad_norm": 2.318056374421155, "learning_rate": 5.075942132859446e-09, "loss": 0.9651, "step": 9461 }, { "epoch": 0.9902668759811617, "grad_norm": 2.342430733838004, "learning_rate": 4.968526722179956e-09, "loss": 0.9706, "step": 9462 }, { "epoch": 0.9903715332286761, "grad_norm": 2.210156685115261, "learning_rate": 4.8622597608793954e-09, "loss": 0.8883, "step": 9463 }, { "epoch": 0.9904761904761905, "grad_norm": 2.100418573269556, "learning_rate": 4.757141261167997e-09, "loss": 0.9061, "step": 9464 }, { "epoch": 0.9905808477237049, "grad_norm": 2.092025490922726, "learning_rate": 4.653171235122767e-09, "loss": 0.9194, "step": 9465 }, { "epoch": 0.9906855049712192, "grad_norm": 2.2321294289010294, "learning_rate": 4.550349694690814e-09, "loss": 0.8867, "step": 9466 }, { "epoch": 0.9907901622187336, "grad_norm": 2.032473032864792, "learning_rate": 4.4486766516871335e-09, "loss": 0.8543, "step": 9467 }, { "epoch": 0.990894819466248, "grad_norm": 2.117448656836186, "learning_rate": 4.348152117793491e-09, "loss": 0.8434, "step": 9468 }, { "epoch": 0.9909994767137624, "grad_norm": 2.2695523367962305, "learning_rate": 4.248776104560648e-09, "loss": 0.9899, "step": 9469 }, { "epoch": 0.9911041339612768, "grad_norm": 2.053174387036949, "learning_rate": 4.150548623406136e-09, "loss": 0.8451, "step": 9470 }, { "epoch": 0.9912087912087912, "grad_norm": 2.1987039378958233, "learning_rate": 4.053469685617595e-09, "loss": 0.7486, "step": 9471 }, { "epoch": 0.9913134484563056, "grad_norm": 2.0752962011666423, "learning_rate": 3.957539302349434e-09, "loss": 0.9223, "step": 9472 }, { "epoch": 0.9914181057038199, "grad_norm": 2.318495473294372, "learning_rate": 3.862757484623947e-09, "loss": 0.8358, "step": 9473 }, { "epoch": 0.9915227629513343, "grad_norm": 2.0490582959624812, "learning_rate": 3.769124243332423e-09, "loss": 0.9201, "step": 9474 }, { "epoch": 0.9916274201988488, "grad_norm": 2.1895429135365436, "learning_rate": 3.676639589232922e-09, "loss": 0.8669, "step": 9475 }, { "epoch": 0.9917320774463632, "grad_norm": 2.081764463908597, "learning_rate": 3.5853035329513898e-09, "loss": 0.7138, "step": 9476 }, { "epoch": 0.9918367346938776, "grad_norm": 1.9405197361065825, "learning_rate": 3.4951160849838738e-09, "loss": 0.9535, "step": 9477 }, { "epoch": 0.991941391941392, "grad_norm": 1.962384250829884, "learning_rate": 3.4060772556931965e-09, "loss": 0.8824, "step": 9478 }, { "epoch": 0.9920460491889064, "grad_norm": 1.9074856383254064, "learning_rate": 3.3181870553089523e-09, "loss": 0.889, "step": 9479 }, { "epoch": 0.9921507064364207, "grad_norm": 2.044982555109191, "learning_rate": 3.2314454939297304e-09, "loss": 0.8188, "step": 9480 }, { "epoch": 0.9922553636839351, "grad_norm": 1.9272346116373464, "learning_rate": 3.1458525815231122e-09, "loss": 0.7799, "step": 9481 }, { "epoch": 0.9923600209314495, "grad_norm": 1.8056929489369389, "learning_rate": 3.0614083279245644e-09, "loss": 0.7488, "step": 9482 }, { "epoch": 0.9924646781789639, "grad_norm": 2.111478786571531, "learning_rate": 2.9781127428352153e-09, "loss": 0.9358, "step": 9483 }, { "epoch": 0.9925693354264783, "grad_norm": 2.2187962376027532, "learning_rate": 2.8959658358274077e-09, "loss": 0.8856, "step": 9484 }, { "epoch": 0.9926739926739927, "grad_norm": 2.250021426198808, "learning_rate": 2.814967616339148e-09, "loss": 0.9267, "step": 9485 }, { "epoch": 0.9927786499215071, "grad_norm": 1.9471155417501587, "learning_rate": 2.735118093678546e-09, "loss": 0.8243, "step": 9486 }, { "epoch": 0.9928833071690214, "grad_norm": 2.2292413033155447, "learning_rate": 2.656417277018264e-09, "loss": 0.9037, "step": 9487 }, { "epoch": 0.9929879644165358, "grad_norm": 2.1438692135997863, "learning_rate": 2.578865175402179e-09, "loss": 0.92, "step": 9488 }, { "epoch": 0.9930926216640502, "grad_norm": 2.0987549084951564, "learning_rate": 2.5024617977420507e-09, "loss": 0.866, "step": 9489 }, { "epoch": 0.9931972789115646, "grad_norm": 2.1710980962999833, "learning_rate": 2.427207152815303e-09, "loss": 0.8711, "step": 9490 }, { "epoch": 0.993301936159079, "grad_norm": 1.9976864946771278, "learning_rate": 2.3531012492705728e-09, "loss": 0.8194, "step": 9491 }, { "epoch": 0.9934065934065934, "grad_norm": 2.0151148991749315, "learning_rate": 2.2801440956210506e-09, "loss": 0.8259, "step": 9492 }, { "epoch": 0.9935112506541078, "grad_norm": 1.7788086796827887, "learning_rate": 2.208335700251141e-09, "loss": 0.6933, "step": 9493 }, { "epoch": 0.9936159079016222, "grad_norm": 2.243955438948143, "learning_rate": 2.13767607141091e-09, "loss": 1.0067, "step": 9494 }, { "epoch": 0.9937205651491365, "grad_norm": 2.138700100591113, "learning_rate": 2.06816521721942e-09, "loss": 0.9177, "step": 9495 }, { "epoch": 0.9938252223966509, "grad_norm": 2.0913847556873537, "learning_rate": 1.9998031456636147e-09, "loss": 0.9114, "step": 9496 }, { "epoch": 0.9939298796441653, "grad_norm": 1.658736570996112, "learning_rate": 1.9325898645983223e-09, "loss": 0.8015, "step": 9497 }, { "epoch": 0.9940345368916798, "grad_norm": 2.1338090873686615, "learning_rate": 1.866525381747364e-09, "loss": 0.9249, "step": 9498 }, { "epoch": 0.9941391941391942, "grad_norm": 2.469627726192965, "learning_rate": 1.8016097047002246e-09, "loss": 0.8704, "step": 9499 }, { "epoch": 0.9942438513867086, "grad_norm": 2.1414411950046026, "learning_rate": 1.7378428409164927e-09, "loss": 0.8624, "step": 9500 }, { "epoch": 0.994348508634223, "grad_norm": 2.042150186442989, "learning_rate": 1.6752247977236401e-09, "loss": 0.8937, "step": 9501 }, { "epoch": 0.9944531658817373, "grad_norm": 1.9900829634578683, "learning_rate": 1.6137555823159123e-09, "loss": 0.9209, "step": 9502 }, { "epoch": 0.9945578231292517, "grad_norm": 2.410591588659901, "learning_rate": 1.5534352017565479e-09, "loss": 0.8172, "step": 9503 }, { "epoch": 0.9946624803767661, "grad_norm": 2.236434312692957, "learning_rate": 1.4942636629766693e-09, "loss": 0.8395, "step": 9504 }, { "epoch": 0.9947671376242805, "grad_norm": 2.1401814250288265, "learning_rate": 1.4362409727752823e-09, "loss": 0.8297, "step": 9505 }, { "epoch": 0.9948717948717949, "grad_norm": 1.648097635917843, "learning_rate": 1.3793671378181662e-09, "loss": 0.7285, "step": 9506 }, { "epoch": 0.9949764521193093, "grad_norm": 2.3796168135285676, "learning_rate": 1.3236421646412034e-09, "loss": 0.8813, "step": 9507 }, { "epoch": 0.9950811093668237, "grad_norm": 1.9707127190807336, "learning_rate": 1.2690660596481609e-09, "loss": 0.7346, "step": 9508 }, { "epoch": 0.995185766614338, "grad_norm": 2.161094149096642, "learning_rate": 1.2156388291084675e-09, "loss": 0.8541, "step": 9509 }, { "epoch": 0.9952904238618524, "grad_norm": 2.010479082385231, "learning_rate": 1.1633604791605468e-09, "loss": 0.8944, "step": 9510 }, { "epoch": 0.9953950811093668, "grad_norm": 2.4630434927929925, "learning_rate": 1.1122310158129257e-09, "loss": 1.0294, "step": 9511 }, { "epoch": 0.9954997383568812, "grad_norm": 2.254975083731318, "learning_rate": 1.0622504449409043e-09, "loss": 0.9957, "step": 9512 }, { "epoch": 0.9956043956043956, "grad_norm": 2.4241701416533905, "learning_rate": 1.013418772285446e-09, "loss": 0.8659, "step": 9513 }, { "epoch": 0.99570905285191, "grad_norm": 2.3939669658872362, "learning_rate": 9.65736003457618e-10, "loss": 0.7979, "step": 9514 }, { "epoch": 0.9958137100994244, "grad_norm": 2.2441183666873323, "learning_rate": 9.19202143937481e-10, "loss": 0.7062, "step": 9515 }, { "epoch": 0.9959183673469387, "grad_norm": 1.9769218010919176, "learning_rate": 8.738171990707589e-10, "loss": 0.8455, "step": 9516 }, { "epoch": 0.9960230245944531, "grad_norm": 1.9141948632707264, "learning_rate": 8.295811740732796e-10, "loss": 0.7915, "step": 9517 }, { "epoch": 0.9961276818419675, "grad_norm": 2.3415485768220994, "learning_rate": 7.864940740276439e-10, "loss": 0.9133, "step": 9518 }, { "epoch": 0.9962323390894819, "grad_norm": 2.1401706902733837, "learning_rate": 7.445559038832262e-10, "loss": 0.8676, "step": 9519 }, { "epoch": 0.9963369963369964, "grad_norm": 1.8282468400440854, "learning_rate": 7.037666684606148e-10, "loss": 0.9874, "step": 9520 }, { "epoch": 0.9964416535845108, "grad_norm": 2.2683960849665543, "learning_rate": 6.641263724460612e-10, "loss": 0.7478, "step": 9521 }, { "epoch": 0.9965463108320252, "grad_norm": 2.1668339291877157, "learning_rate": 6.256350203948103e-10, "loss": 0.8291, "step": 9522 }, { "epoch": 0.9966509680795395, "grad_norm": 2.4253965330798546, "learning_rate": 5.882926167277703e-10, "loss": 0.8643, "step": 9523 }, { "epoch": 0.9967556253270539, "grad_norm": 2.1583972769715265, "learning_rate": 5.520991657370633e-10, "loss": 0.8868, "step": 9524 }, { "epoch": 0.9968602825745683, "grad_norm": 2.3240808973920046, "learning_rate": 5.17054671581585e-10, "loss": 0.8017, "step": 9525 }, { "epoch": 0.9969649398220827, "grad_norm": 2.194269062168786, "learning_rate": 4.831591382870038e-10, "loss": 0.9831, "step": 9526 }, { "epoch": 0.9970695970695971, "grad_norm": 2.0354807231276872, "learning_rate": 4.504125697490924e-10, "loss": 0.9727, "step": 9527 }, { "epoch": 0.9971742543171115, "grad_norm": 2.063899720456543, "learning_rate": 4.188149697303967e-10, "loss": 0.9148, "step": 9528 }, { "epoch": 0.9972789115646259, "grad_norm": 2.2768683287427196, "learning_rate": 3.883663418602357e-10, "loss": 0.8469, "step": 9529 }, { "epoch": 0.9973835688121402, "grad_norm": 2.0468757802028033, "learning_rate": 3.5906668963803235e-10, "loss": 0.8325, "step": 9530 }, { "epoch": 0.9974882260596546, "grad_norm": 2.051294044317142, "learning_rate": 3.3091601643109314e-10, "loss": 0.8025, "step": 9531 }, { "epoch": 0.997592883307169, "grad_norm": 2.105439857515838, "learning_rate": 3.039143254723875e-10, "loss": 0.9892, "step": 9532 }, { "epoch": 0.9976975405546834, "grad_norm": 2.220984101222805, "learning_rate": 2.7806161986609905e-10, "loss": 0.7738, "step": 9533 }, { "epoch": 0.9978021978021978, "grad_norm": 2.115650631099441, "learning_rate": 2.5335790258207426e-10, "loss": 0.8181, "step": 9534 }, { "epoch": 0.9979068550497122, "grad_norm": 2.1913243479989855, "learning_rate": 2.298031764591535e-10, "loss": 0.8192, "step": 9535 }, { "epoch": 0.9980115122972266, "grad_norm": 1.9652396497864315, "learning_rate": 2.0739744420295026e-10, "loss": 0.8074, "step": 9536 }, { "epoch": 0.998116169544741, "grad_norm": 2.4031821017278814, "learning_rate": 1.8614070838918198e-10, "loss": 0.9575, "step": 9537 }, { "epoch": 0.9982208267922553, "grad_norm": 2.037609844489397, "learning_rate": 1.6603297145922902e-10, "loss": 0.8618, "step": 9538 }, { "epoch": 0.9983254840397697, "grad_norm": 1.6984142714399961, "learning_rate": 1.4707423572346556e-10, "loss": 0.754, "step": 9539 }, { "epoch": 0.9984301412872841, "grad_norm": 1.627184498513617, "learning_rate": 1.292645033612594e-10, "loss": 0.7935, "step": 9540 }, { "epoch": 0.9985347985347985, "grad_norm": 1.6578736068172026, "learning_rate": 1.1260377641764131e-10, "loss": 0.7769, "step": 9541 }, { "epoch": 0.998639455782313, "grad_norm": 2.0902509066149784, "learning_rate": 9.709205680885624e-11, "loss": 0.7798, "step": 9542 }, { "epoch": 0.9987441130298274, "grad_norm": 2.1775821639631023, "learning_rate": 8.272934631459173e-11, "loss": 0.8777, "step": 9543 }, { "epoch": 0.9988487702773418, "grad_norm": 2.2443525221661678, "learning_rate": 6.951564658796983e-11, "loss": 0.85, "step": 9544 }, { "epoch": 0.998953427524856, "grad_norm": 1.8463156292838934, "learning_rate": 5.745095914555521e-11, "loss": 0.8024, "step": 9545 }, { "epoch": 0.9990580847723705, "grad_norm": 2.2214610429811676, "learning_rate": 4.6535285374016416e-11, "loss": 0.843, "step": 9546 }, { "epoch": 0.9991627420198849, "grad_norm": 2.1287397785084075, "learning_rate": 3.676862652790547e-11, "loss": 0.8161, "step": 9547 }, { "epoch": 0.9992673992673993, "grad_norm": 2.1670560388230014, "learning_rate": 2.8150983728547632e-11, "loss": 0.9613, "step": 9548 }, { "epoch": 0.9993720565149137, "grad_norm": 1.8271825583030723, "learning_rate": 2.0682357966261834e-11, "loss": 0.8962, "step": 9549 }, { "epoch": 0.9994767137624281, "grad_norm": 1.847917867704652, "learning_rate": 1.4362750100360701e-11, "loss": 0.8912, "step": 9550 }, { "epoch": 0.9995813710099425, "grad_norm": 1.540095392303899, "learning_rate": 9.192160856930088e-12, "loss": 0.6913, "step": 9551 }, { "epoch": 0.9996860282574568, "grad_norm": 1.8041355621927853, "learning_rate": 5.170590828829092e-12, "loss": 0.7122, "step": 9552 }, { "epoch": 0.9997906855049712, "grad_norm": 2.0773412293224243, "learning_rate": 2.2980404779104904e-12, "loss": 0.8123, "step": 9553 }, { "epoch": 0.9998953427524856, "grad_norm": 2.2586840266436203, "learning_rate": 5.745101361309679e-13, "loss": 0.8988, "step": 9554 }, { "epoch": 1.0, "grad_norm": 1.738276172232474, "learning_rate": 0.0, "loss": 0.7992, "step": 9555 }, { "epoch": 1.0, "step": 9555, "total_flos": 1276904626028544.0, "train_loss": 0.9531744299042231, "train_runtime": 26831.9909, "train_samples_per_second": 22.79, "train_steps_per_second": 0.356 } ], "logging_steps": 1.0, "max_steps": 9555, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1276904626028544.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }